From af03f7fb37bf13ed923f893182fd4e2f6c40bfd2 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 28 Jun 2021 12:22:27 +0700 Subject: [PATCH 001/619] [Test] Add XFAIL test for PR50918 --- .../Transforms/LoopStrengthReduce/pr50918.ll | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/pr50918.ll diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr50918.ll b/llvm/test/Transforms/LoopStrengthReduce/pr50918.ll new file mode 100644 index 0000000000000..f8c26d3181ae7 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/pr50918.ll @@ -0,0 +1,44 @@ +; RUN: opt -S -loop-reduce < %s | FileCheck %s +; +; REQUIRES: asserts +; XFAIL: * +; +; Make sure we don't fail an assertion here. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +define void @test() { +; CHECK-LABEL: test +bb: + br label %bb1 + +bb1: ; preds = %bb12, %bb + %tmp2 = phi i64 [ 94, %bb ], [ %tmp20, %bb12 ] + %tmp3 = phi i32 [ -28407, %bb ], [ %tmp23, %bb12 ] + %tmp4 = trunc i64 %tmp2 to i32 + %tmp5 = add i32 %tmp3, %tmp4 + %tmp6 = mul i32 undef, %tmp5 + %tmp7 = sub i32 %tmp6, %tmp5 + %tmp8 = shl i32 %tmp7, 1 + %tmp9 = add i32 %tmp8, %tmp3 + %tmp10 = add i32 %tmp9, %tmp4 + %tmp11 = shl i32 %tmp10, 1 + br label %bb21 + +bb12: ; preds = %bb21 + %tmp13 = mul i32 %tmp22, -101 + %tmp14 = add i32 %tmp22, 2 + %tmp15 = add i32 %tmp14, %tmp13 + %tmp16 = trunc i32 %tmp15 to i8 + %tmp17 = shl i8 %tmp16, 5 + %tmp18 = add i8 %tmp17, 64 + %tmp19 = sext i8 %tmp18 to i32 + %tmp20 = add nsw i64 %tmp2, -3 + br label %bb1 + +bb21: ; preds = %bb21, %bb1 + %tmp22 = phi i32 [ %tmp11, %bb1 ], [ %tmp23, %bb21 ] + %tmp23 = add i32 %tmp22, 1 + br i1 false, label %bb12, label %bb21 +} From d58514d41c3dedc67b9f043d61f160810a64b208 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 28 Jun 2021 12:50:04 +0700 Subject: [PATCH 002/619] [LSR][NFC] Make sure that after the canonicalization the formula is canonical --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5d2d9678ce353..c573b1a3a7702 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -533,6 +533,7 @@ void Formula::canonicalize(const Loop &L) { if (I != BaseRegs.end()) std::swap(ScaledReg, *I); } + assert(isCanonical(L) && "Failed to canonicalize?"); } /// Get rid of the scale in the formula. From 616b998b53d8510c3d538ce34209575140955880 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 28 Jun 2021 12:55:53 +0700 Subject: [PATCH 003/619] Revert "[Test] Add XFAIL test for PR50918" This reverts commit af03f7fb37bf13ed923f893182fd4e2f6c40bfd2. Looks like the test is passing in some architectures. Reverting to green. --- .../Transforms/LoopStrengthReduce/pr50918.ll | 44 ------------------- 1 file changed, 44 deletions(-) delete mode 100644 llvm/test/Transforms/LoopStrengthReduce/pr50918.ll diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr50918.ll b/llvm/test/Transforms/LoopStrengthReduce/pr50918.ll deleted file mode 100644 index f8c26d3181ae7..0000000000000 --- a/llvm/test/Transforms/LoopStrengthReduce/pr50918.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: opt -S -loop-reduce < %s | FileCheck %s -; -; REQUIRES: asserts -; XFAIL: * -; -; Make sure we don't fail an assertion here. - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" -target triple = "x86_64-unknown-linux-gnu" - -define void @test() { -; CHECK-LABEL: test -bb: - br label %bb1 - -bb1: ; preds = %bb12, %bb - %tmp2 = phi i64 [ 94, %bb ], [ %tmp20, %bb12 ] - %tmp3 = phi i32 [ -28407, %bb ], [ %tmp23, %bb12 ] - %tmp4 = trunc i64 %tmp2 to i32 - %tmp5 = add i32 %tmp3, %tmp4 - %tmp6 = mul i32 undef, %tmp5 - %tmp7 = sub i32 %tmp6, %tmp5 - %tmp8 = shl i32 %tmp7, 1 - %tmp9 = add i32 %tmp8, %tmp3 - %tmp10 = add i32 %tmp9, %tmp4 - %tmp11 = shl i32 %tmp10, 1 - br label %bb21 - -bb12: ; preds = %bb21 - %tmp13 = mul i32 %tmp22, -101 - %tmp14 = add i32 %tmp22, 2 - %tmp15 = add i32 %tmp14, %tmp13 - %tmp16 = trunc i32 %tmp15 to i8 - %tmp17 = shl i8 %tmp16, 5 - %tmp18 = add i8 %tmp17, 64 - %tmp19 = sext i8 %tmp18 to i32 - %tmp20 = add nsw i64 %tmp2, -3 - br label %bb1 - -bb21: ; preds = %bb21, %bb1 - %tmp22 = phi i32 [ %tmp11, %bb1 ], [ %tmp23, %bb21 ] - %tmp23 = add i32 %tmp22, 1 - br i1 false, label %bb12, label %bb21 -} From 04242bdca991145548ab70be5e1c6c65390699dd Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Sun, 27 Jun 2021 23:22:40 -0700 Subject: [PATCH 004/619] Revert "[M68k][GloballSel] Formal arguments lowering in IRTranslator" This reverts commit 8f43407a07f015ca9a7543c6a0b5bde3918f9a0e due to failure on its associated test. --- .../Target/M68k/GlSel/M68kCallLowering.cpp | 52 +----- llvm/lib/Target/M68k/GlSel/M68kCallLowering.h | 23 --- llvm/lib/Target/M68k/M68kISelLowering.cpp | 6 - llvm/lib/Target/M68k/M68kISelLowering.h | 3 - .../M68k/GlobalISel/irtranslator-ret.ll | 169 ------------------ 5 files changed, 4 insertions(+), 249 deletions(-) diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp index ece282ea4faed..9f58834c85239 100644 --- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp +++ b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp @@ -15,17 +15,12 @@ #include "M68kCallLowering.h" #include "M68kISelLowering.h" #include "M68kInstrInfo.h" -#include "M68kSubtarget.h" -#include "M68kTargetMachine.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/TargetCallingConv.h" - using namespace llvm; M68kCallLowering::M68kCallLowering(const M68kTargetLowering &TLI) : CallLowering(&TLI) {} + bool M68kCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef VRegs, FunctionLoweringInfo &FLI, @@ -41,50 +36,11 @@ bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs, FunctionLoweringInfo &FLI) const { - MachineFunction &MF = MIRBuilder.getMF(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const auto &DL = F.getParent()->getDataLayout(); - auto &TLI = *getTLI(); - SmallVector SplitArgs; - unsigned I = 0; - for (const auto &Arg : F.args()) { - ArgInfo OrigArg{VRegs[I], Arg.getType()}; - setArgFlags(OrigArg, I + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); - ++I; - } + if (F.arg_empty()) + return true; - CCAssignFn *AssignFn = - TLI.getCCAssignFnForCall(F.getCallingConv(), false, F.isVarArg()); - IncomingValueAssigner ArgAssigner(AssignFn); - FormalArgHandler ArgHandler(MIRBuilder, MRI); - return determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgs, - MIRBuilder, F.getCallingConv(), - F.isVarArg()); -} - -void M68kIncomingValueHandler::assignValueToReg(Register ValVReg, - Register PhysReg, - CCValAssign &VA) { - MIRBuilder.getMRI()->addLiveIn(PhysReg); - MIRBuilder.getMBB().addLiveIn(PhysReg); - IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); -} - -void M68kIncomingValueHandler::assignValueToAddress(Register ValVReg, - Register Addr, - uint64_t Size, - MachinePointerInfo &MPO, - CCValAssign &VA) { - llvm_unreachable("unimeplemented"); -} - -Register M68kIncomingValueHandler::getStackAddress(uint64_t Size, - int64_t Offset, - MachinePointerInfo &MPO, - ISD::ArgFlagsTy Flags) { - llvm_unreachable("unimeplemented"); + return false; } bool M68kCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h index a9ebb2b0f0aa5..b58db2322e27f 100644 --- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h +++ b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h @@ -43,29 +43,6 @@ class M68kCallLowering : public CallLowering { bool enableBigEndian() const override; }; -struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler { - M68kIncomingValueHandler(MachineIRBuilder &MIRBuilder, - MachineRegisterInfo &MRI) - : CallLowering::IncomingValueHandler(MIRBuilder, MRI) {} - - uint64_t StackUsed; - -private: - void assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign &VA) override; - - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, - MachinePointerInfo &MPO, CCValAssign &VA) override; - - Register getStackAddress(uint64_t Size, int64_t Offset, - MachinePointerInfo &MPO, - ISD::ArgFlagsTy Flags) override; -}; - -struct FormalArgHandler : public M68kIncomingValueHandler { - FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) - : M68kIncomingValueHandler(MIRBuilder, MRI) {} -}; } // end namespace llvm diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index ef61942275790..c79e9d124db57 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -3412,9 +3412,3 @@ const char *M68kTargetLowering::getTargetNodeName(unsigned Opcode) const { return NULL; } } - -CCAssignFn *M68kTargetLowering::getCCAssignFnForCall(CallingConv::ID CC, - bool Return, - bool IsVarArg) const { - return CC_M68k_C; -} diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h index 064936a4a6e50..8c3c0511ecf80 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.h +++ b/llvm/lib/Target/M68k/M68kISelLowering.h @@ -171,9 +171,6 @@ class M68kTargetLowering : public TargetLowering { EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; - CCAssignFn *getCCAssignFnForCall(CallingConv::ID CC, bool Return, - bool IsVarArg) const; - private: unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/M68k/GlobalISel/irtranslator-ret.ll b/llvm/test/CodeGen/M68k/GlobalISel/irtranslator-ret.ll index 99004a4c40045..38849e30bf777 100644 --- a/llvm/test/CodeGen/M68k/GlobalISel/irtranslator-ret.ll +++ b/llvm/test/CodeGen/M68k/GlobalISel/irtranslator-ret.ll @@ -1,176 +1,7 @@ ; RUN: llc -mtriple=m68k -global-isel -stop-after=irtranslator < %s | FileCheck %s -%struct.A = type { i8, float, i32, i32, i32 } - ; CHECK: name: noArgRetVoid ; CHECK: RTS define void @noArgRetVoid() { ret void } - -define void @test_arg_lowering1(i8 %x, i8 %y) { - ; CHECK-LABEL: name: test_arg_lowering1 - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I1]](p0) - ; CHECK: [[G_TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD1]](s32) - ; CHECK: [[G_F_I2:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I2]](p0) - ; CHECK: [[G_TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD2]](s32) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering2(i16 %x, i16 %y) { - ; CHECK-LABEL: name: test_arg_lowering2 - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I1]](p0) - ; CHECK: [[G_TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[G_LOAD1]](s32) - ; CHECK: [[G_F_I2:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I2]](p0) - ; CHECK: [[G_TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[G_LOAD2]](s32) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering3(i32 %x, i32 %y) { - ; CHECK-LABEL: name: test_arg_lowering3 - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: {{%.*}} G_LOAD [[G_F_I1]](p0) - ; CHECK: [[G_F_I2:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: {{%.*}} G_LOAD [[G_F_I2]](p0) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering_vector(<5 x i8> %x) { - ; CHECK-LABEL: name: test_arg_lowering_vector - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I1]](p0) - ; CHECK: [[G_F_I2:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I2]](p0) - ; CHECK: [[G_F_I3:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I3]](p0) - ; CHECK: [[G_F_I4:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I4]](p0) - ; CHECK: [[G_F_I5:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I5]](p0) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[G_LOAD1]](s32), [[G_LOAD2]](s32), [[G_LOAD3]](s32), [[G_LOAD4]](s32), [[G_LOAD5]](s32) - ; CHECK: [[G_TRUNC:%[0-9]+]]:_(<5 x s8>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering_array([5 x i8] %x) { - ; CHECK-LABEL: name: test_arg_lowering_array - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I1]](p0) - ; CHECK: [[G_TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD1]](s32) - ; CHECK: [[G_F_I2:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I2]](p0) - ; CHECK: [[G_TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD2]](s32) - ; CHECK: [[G_F_I3:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I3]](p0) - ; CHECK: [[G_TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD3]](s32) - ; CHECK: [[G_F_I4:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I4]](p0) - ; CHECK: [[G_TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD4]](s32) - ; CHECK: [[G_F_I5:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I5]](p0) - ; CHECK: [[G_TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD5]](s32) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering_double(double %x) { - ; CHECK-LABEL: name: test_arg_lowering_double - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I1]](p0) - ; CHECK: [[G_F_I2:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I2]](p0) - ; CHECK: [[G_MERGE_VAL:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[G_LOAD1]](s32), [[G_LOAD2]](s32) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering_float(float %x) { - ; CHECK-LABEL: name: test_arg_lowering_float - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I1]](p0) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering_multiple(i1 %a, i8 %b, i16 %c, i32 %d, i64 %e, i128 %f){ - ; CHECK-LABEL: name: test_arg_lowering_multiple - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I1]](p0) - ; CHECK: [[G_TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[G_LOAD1]](s32) - ; CHECK: [[G_F_I2:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I2]](p0) - ; CHECK: [[G_TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD2]](s32) - ; CHECK: [[G_F_I3:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I3]](p0) - ; CHECK: [[G_TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[G_LOAD3]](s32) - ; CHECK: [[G_F_I4:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I4]](p0) - ; CHECK: [[G_F_I5:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I5]](p0) - ; CHECK: [[G_F_I6:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I6]](p0) - ; CHECK: [[G_MERGE_VAL:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[G_LOAD5]](s32), [[G_LOAD6]](s32) - ; CHECK: [[G_F_I7:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I7]](p0) - ; CHECK: [[G_F_I8:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I8]](p0) - ; CHECK: [[G_F_I9:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I9]](p0) - ; CHECK: [[G_F_I10:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I10]](p0) - ; CHECK: [[G_MERGE_VAL:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[G_LOAD7]](s32), [[G_LOAD8]](s32), [[G_LOAD9]](s32), [[G_LOAD10]](s32) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering_ptr(i32* %x) { - ; CHECK-LABEL: name: test_arg_lowering_ptr - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[G_F_I1]](p0) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering_float_ptr(float* %x) { - ; CHECK-LABEL: name: test_arg_lowering_float_ptr - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[G_F_I1]](p0) - ; CHECK: RTS - ret void -} - -define void @test_arg_lowering_struct(%struct.A %a) #0 { - ; CHECK-LABEL: name: test_arg_lowering_struct - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[G_F_I1:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I1]](p0) - ; CHECK: [[G_TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[G_LOAD1]](s32) - ; CHECK: [[G_F_I2:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I2]](p0) - ; CHECK: [[G_F_I3:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I3]](p0) - ; CHECK: [[G_F_I4:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I4]](p0) - ; CHECK: [[G_F_I5:%[0-9]+]]:_(p0) = G_FRAME_INDEX - ; CHECK: [[G_LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[G_F_I5]](p0) - ; CHECK: RTS - ret void -} From abe0fa43523502c549ff9394d28f9f29f5be0a3d Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 28 Jun 2021 14:22:41 +0700 Subject: [PATCH 005/619] [llvm-objdump] Print comments for the disassembled code LLVM disassembler can generate comments for disassembled instructions. The patch enables printing these comments for 'llvm-objdump -d'. Differential Revision: https://reviews.llvm.org/D104699 --- .../ELF/AArch64/disassemble-align.s | 8 +++-- .../ELF/AArch64/disassemble-print-comments.s | 13 +++++++ .../llvm-objdump/X86/disassemble-align.s | 29 +++++++-------- .../X86/disassemble-print-comments.s | 13 +++++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 35 ++++++++++++++++--- 5 files changed, 77 insertions(+), 21 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-print-comments.s create mode 100644 llvm/test/tools/llvm-objdump/X86/disassemble-print-comments.s diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-align.s b/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-align.s index 8ad204200e544..30528d94cd339 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-align.s +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-align.s @@ -3,10 +3,12 @@ ## Use '|' to show where the tabs line up. # CHECK:0000000000000000 <$x.0>: -# CHECK-NEXT: 0: 62 10 00 91 |add|x2, x3, #4 +# CHECK-NEXT: 0: 62 10 00 91 |add|x2, x3, #4 // =4 +# CHECK-NEXT: 4: 1f 20 03 d5 |nop # CHECK-EMPTY: -# CHECK-NEXT:0000000000000004 <$d.1>: -# CHECK-NEXT: 4:|ff ff 00 00|.word|0x0000ffff +# CHECK-NEXT:0000000000000008 <$d.1>: +# CHECK-NEXT: 8:|ff ff 00 00|.word|0x0000ffff add x2, x3, #4 + nop .word 0xffff diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-print-comments.s b/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-print-comments.s new file mode 100644 index 0000000000000..81bafe58816c8 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-print-comments.s @@ -0,0 +1,13 @@ +## Check that 'llvm-objdump -d' prints comments generated by the disassembler. + +# RUN: llvm-mc -filetype=obj -triple=aarch64 -mattr=+sve %s -o %t +# RUN: llvm-objdump -d --mattr=+sve --no-show-raw-insn %t | FileCheck %s + +# CHECK: 0000000000000000 : +# CHECK-NEXT: 0: add x0, x2, #2, lsl #12 // =8192 +# CHECK-NEXT: 4: add z31.d, z31.d, #65280 // =0xff00 + + .text +foo: + add x0, x2, 8192 + add z31.d, z31.d, #65280 diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-align.s b/llvm/test/tools/llvm-objdump/X86/disassemble-align.s index 09366c9b6b645..17809d88bfbf4 100644 --- a/llvm/test/tools/llvm-objdump/X86/disassemble-align.s +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-align.s @@ -1,26 +1,27 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t # Use '|' to show where the tabs line up. -# RUN: llvm-objdump -d --print-imm-hex %t | tr '\t' '|' | FileCheck --strict-whitespace %s +# RUN: llvm-objdump -d --print-imm-hex %t | tr '\t' '|' | \ +# RUN: FileCheck --match-full-lines --strict-whitespace %s # RUN: llvm-objdump -d --print-imm-hex --no-show-raw-insn %t | tr '\t' '|' | \ -# RUN: FileCheck -check-prefix=NORAW -strict-whitespace %s +# RUN: FileCheck -check-prefix=NORAW --match-full-lines -strict-whitespace %s # Instructions are expected to be aligned if the instruction in hex is not too long. -# CHECK: 0: c3 |retq -# CHECK-NEXT: 1: 48 8b 05 56 34 12 00 |movq|0x123456(%rip), %rax -# CHECK-NEXT: 8: 48 b8 54 55 55 55 55 55 55 55|movabsq|$0x5555555555555554, %rax -# CHECK-NEXT: 12: 8f ea 00 12 4c 02 40 00 00 00 00 |lwpval|$0x0, 0x40(%rdx,%rax), %r15d -# CHECK-NEXT: 1d: 8f ea 00 12 04 25 f0 1c f0 1c 00 00 00 00 |lwpins|$0x0, 0x1cf01cf0, %r15d -# CHECK-NEXT: 2b: ff ff | +# CHECK: 0: c3 |retq +# CHECK-NEXT: 1: 48 8b 05 56 34 12 00 |movq|0x123456(%rip), %rax # 12345e <.text+0x12345e> +# CHECK-NEXT: 8: 48 b8 54 55 55 55 55 55 55 55|movabsq|$0x5555555555555554, %rax # imm = 0x5555555555555554 +# CHECK-NEXT: 12: 8f ea 00 12 4c 02 40 00 00 00 00 |lwpval|$0x0, 0x40(%rdx,%rax), %r15d +# CHECK-NEXT: 1d: 8f ea 00 12 04 25 f0 1c f0 1c 00 00 00 00 |lwpins|$0x0, 0x1cf01cf0, %r15d +# CHECK-NEXT: 2b: ff ff | -# NORAW: 0: |retq -# NORAW-NEXT: 1: |movq|0x123456(%rip), %rax -# NORAW-NEXT: 8: |movabsq|$0x5555555555555554, %rax -# NORAW-NEXT: 12: |lwpval|$0x0, 0x40(%rdx,%rax), %r15d -# NORAW-NEXT: 1d: |lwpins|$0x0, 0x1cf01cf0, %r15d -# NORAW-NEXT: 2b: | +# NORAW: 0: |retq +# NORAW-NEXT: 1: |movq|0x123456(%rip), %rax # 12345e <.text+0x12345e> +# NORAW-NEXT: 8: |movabsq|$0x5555555555555554, %rax # imm = 0x5555555555555554 +# NORAW-NEXT: 12: |lwpval|$0x0, 0x40(%rdx,%rax), %r15d +# NORAW-NEXT: 1d: |lwpins|$0x0, 0x1cf01cf0, %r15d +# NORAW-NEXT: 2b: | .text retq diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-print-comments.s b/llvm/test/tools/llvm-objdump/X86/disassemble-print-comments.s new file mode 100644 index 0000000000000..7e5c29efaf704 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-print-comments.s @@ -0,0 +1,13 @@ +## Check that 'llvm-objdump -d' prints comments generated by the disassembler. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s + +# CHECK: 0000000000000000 : +# CHECK-NEXT: 0: nop +# CHECK-NEXT: 1: cmpl $305419896, %eax # imm = 0x12345678 + + .text +foo: + nop + cmpl $0x12345678, %eax diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b0ca095db381e..13e5f68c6d833 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1039,6 +1039,29 @@ static StringRef getSegmentName(const MachOObjectFile *MachO, return ""; } +static void emitPostInstructionInfo(formatted_raw_ostream &FOS, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI, + StringRef Comments, + LiveVariablePrinter &LVP) { + do { + if (!Comments.empty()) { + // Emit a line of comments. + StringRef Comment; + std::tie(Comment, Comments) = Comments.split('\n'); + // MAI.getCommentColumn() assumes that instructions are printed at the + // position of 8, while getInstStartColumn() returns the actual position. + unsigned CommentColumn = + MAI.getCommentColumn() - 8 + getInstStartColumn(STI); + FOS.PadToColumn(CommentColumn); + FOS << MAI.getCommentString() << ' ' << Comment; + } + LVP.printAfterInst(FOS); + FOS << '\n'; + } while (!Comments.empty()); + FOS.flush(); +} + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, @@ -1396,12 +1419,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, LVP.update({Index, Section.getIndex()}, {Index + Size, Section.getIndex()}, Index + Size != End); + IP->setCommentStream(CommentStream); + PIP.printInst( *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, FOS, "", *STI, &SP, Obj->getFileName(), &Rels, LVP); - FOS << CommentStream.str(); - Comments.clear(); + + IP->setCommentStream(llvm::nulls()); // If disassembly has failed, avoid analysing invalid/incomplete // instruction information. Otherwise, try to resolve the target @@ -1498,8 +1523,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } } - LVP.printAfterInst(FOS); - FOS << "\n"; + assert(Ctx.getAsmInfo()); + emitPostInstructionInfo(FOS, *Ctx.getAsmInfo(), *STI, + CommentStream.str(), LVP); + Comments.clear(); // Hexagon does this in pretty printer if (Obj->getArch() != Triple::hexagon) { From c2e6bcb494c12b138283272e2e0932e13627898e Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 28 Jun 2021 14:23:22 +0700 Subject: [PATCH 006/619] [llvm-objdump] Prevent variable locations to overlap short comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For now, the source variable locations are printed at about the same space as the comments for disassembled code, which can make some ranges for variables disappear if a line contains comments, for example: ┠─ bar = W1 0: add x0, x2, #2, lsl #12 // =8192┃ 4: add z31.d, z31.d, #65280 // =0xff00 8: nop ┻ The patch shifts the report a bit to allow printing comments up to approximately 16 characters without interferences. Differential Revision: https://reviews.llvm.org/D104700 --- llvm/docs/CommandGuide/llvm-objdump.rst | 2 +- .../ELF/AArch64/disassemble-print-comments.s | 56 ++++++++ .../llvm-objdump/ELF/ARM/debug-vars-dwarf4.s | 122 +++++++++--------- .../ELF/ARM/debug-vars-wide-chars.s | 6 +- .../X86/disassemble-print-comments.s | 56 ++++++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 2 +- 6 files changed, 178 insertions(+), 66 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index 860d84303c9db..28ac7e4abbc42 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -132,7 +132,7 @@ OPTIONS .. option:: --debug-vars-indent= Distance to indent the source-level variable display, relative to the start - of the disassembly. Defaults to 40 characters. + of the disassembly. Defaults to 52 characters. .. option:: -j, --section= diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-print-comments.s b/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-print-comments.s index 81bafe58816c8..b417486a3c66a 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-print-comments.s +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/disassemble-print-comments.s @@ -7,7 +7,63 @@ # CHECK-NEXT: 0: add x0, x2, #2, lsl #12 // =8192 # CHECK-NEXT: 4: add z31.d, z31.d, #65280 // =0xff00 +## Check that comments and locations of variables can be printed together. +# RUN: llvm-objdump -d --mattr=+sve --debug-vars --no-show-raw-insn %t | \ +# RUN: FileCheck %s --check-prefix=DBGVARS + +# DBGVARS: 0000000000000000 : +# DBGVARS-NEXT: ┠─ bar = W1 +# DBGVARS-NEXT: 0: add x0, x2, #2, lsl #12 // =8192 ┃ +# DBGVARS-NEXT: 4: add z31.d, z31.d, #65280 // =0xff00 ┻ + .text foo: add x0, x2, 8192 add z31.d, z31.d, #65280 +.LFooEnd: + + .section .debug_abbrev,"",@progbits + .uleb128 1 // Abbreviation Code + .uleb128 0x11 // DW_TAG_compile_unit + .byte 1 // DW_CHILDREN_yes + .byte 0 // EOM(1) + .byte 0 // EOM(2) + .uleb128 2 // Abbreviation Code + .uleb128 0x2e // DW_TAG_subprogram + .byte 1 // DW_CHILDREN_yes + .uleb128 0x11 // DW_AT_low_pc + .uleb128 0x01 // DW_FORM_addr + .uleb128 0x12 // DW_AT_high_pc + .uleb128 0x06 // DW_FORM_data4 + .byte 0 // EOM(1) + .byte 0 // EOM(2) + .uleb128 3 // Abbreviation Code + .uleb128 0x34 // DW_TAG_variable + .byte 0 // DW_CHILDREN_no + .uleb128 0x02 // DW_AT_location + .uleb128 0x18 // DW_FORM_exprloc + .uleb128 0x03 // DW_AT_name + .uleb128 0x08 // DW_FORM_string + .byte 0 // EOM(1) + .byte 0 // EOM(2) + .byte 0 // EOM(3) + + .section .debug_info,"",@progbits + .long .LCuEnd-.LCuBegin // Length of Unit +.LCuBegin: + .short 4 // DWARF version number + .long .debug_abbrev // Offset Into Abbrev. Section + .byte 8 // Address Size + .uleb128 1 // Abbrev [1] DW_TAG_compile_unit + .uleb128 2 // Abbrev [2] DW_TAG_subprogram + .quad foo // DW_AT_low_pc + .long .LFooEnd-foo // DW_AT_high_pc + .uleb128 3 // Abbrev [3] DW_TAG_variable + .byte .LLocEnd-.LLocBegin // DW_AT_location +.LLocBegin: + .byte 0x51 // DW_OP_reg1 +.LLocEnd: + .asciz "bar" // DW_FORM_string + .byte 0 // End Of Children Mark + .byte 0 // End Of Children Mark +.LCuEnd: diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s index bf0c7bd52feb4..43a30aeaca6fb 100644 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s @@ -12,10 +12,10 @@ # RUN: llvm-objdump - -d --debug-vars | \ # RUN: FileCheck %s --check-prefix=RAW --strict-whitespace -## Check that passing the default value for --debug-vars-indent (40) makes no +## Check that passing the default value for --debug-vars-indent (52) makes no ## change to the output. # RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ -# RUN: llvm-objdump - -d --debug-vars --debug-vars-indent=40 | \ +# RUN: llvm-objdump - -d --debug-vars --debug-vars-indent=52 | \ # RUN: FileCheck %s --check-prefix=RAW --strict-whitespace # RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ @@ -50,19 +50,19 @@ ## 8-byte tab stop, so these might not look aligned in a text editor. # RAW: 00000000 : -# RAW-NEXT: ┠─ a = R0 -# RAW-NEXT: ┃ ┠─ b = R1 -# RAW-NEXT: ┃ ┃ ┠─ c = R2 -# RAW-NEXT: ┃ ┃ ┃ ┌─ x = R0 -# RAW-NEXT: 0: 00 00 81 e0 add r0, r1, r0 ┻ ┃ ┃ ╈ -# RAW-NEXT: ┌─ y = R0 -# RAW-NEXT: 4: 02 00 80 e0 add r0, r0, r2 ╈ ┃ ┃ ┻ -# RAW-NEXT: 8: 1e ff 2f e1 bx lr ┻ ┻ ┻ +# RAW-NEXT: ┠─ a = R0 +# RAW-NEXT: ┃ ┠─ b = R1 +# RAW-NEXT: ┃ ┃ ┠─ c = R2 +# RAW-NEXT: ┃ ┃ ┃ ┌─ x = R0 +# RAW-NEXT: 0: 00 00 81 e0 add r0, r1, r0 ┻ ┃ ┃ ╈ +# RAW-NEXT: ┌─ y = R0 +# RAW-NEXT: 4: 02 00 80 e0 add r0, r0, r2 ╈ ┃ ┃ ┻ +# RAW-NEXT: 8: 1e ff 2f e1 bx lr ┻ ┻ ┻ # RAW-EMPTY: # RAW-NEXT: 0000000c : -# RAW-NEXT: ┠─ a = R0 -# RAW-NEXT: c: 01 00 80 e2 add r0, r0, #1 ┃ -# RAW-NEXT: 10: 1e ff 2f e1 bx lr ┻ +# RAW-NEXT: ┠─ a = R0 +# RAW-NEXT: c: 01 00 80 e2 add r0, r0, #1 ┃ +# RAW-NEXT: 10: 1e ff 2f e1 bx lr ┻ # INDENT: 00000000 : @@ -81,70 +81,70 @@ # INDENT-NEXT: 10: 1e ff 2f e1 bx lr ┻ # NO-RAW: 00000000 : -# NO-RAW-NEXT: ┠─ a = R0 -# NO-RAW-NEXT: ┃ ┠─ b = R1 -# NO-RAW-NEXT: ┃ ┃ ┠─ c = R2 -# NO-RAW-NEXT: ┃ ┃ ┃ ┌─ x = R0 -# NO-RAW-NEXT: 0: add r0, r1, r0 ┻ ┃ ┃ ╈ -# NO-RAW-NEXT: ┌─ y = R0 -# NO-RAW-NEXT: 4: add r0, r0, r2 ╈ ┃ ┃ ┻ -# NO-RAW-NEXT: 8: bx lr ┻ ┻ ┻ +# NO-RAW-NEXT: ┠─ a = R0 +# NO-RAW-NEXT: ┃ ┠─ b = R1 +# NO-RAW-NEXT: ┃ ┃ ┠─ c = R2 +# NO-RAW-NEXT: ┃ ┃ ┃ ┌─ x = R0 +# NO-RAW-NEXT: 0: add r0, r1, r0 ┻ ┃ ┃ ╈ +# NO-RAW-NEXT: ┌─ y = R0 +# NO-RAW-NEXT: 4: add r0, r0, r2 ╈ ┃ ┃ ┻ +# NO-RAW-NEXT: 8: bx lr ┻ ┻ ┻ # NO-RAW-EMPTY: # NO-RAW-NEXT: 0000000c : -# NO-RAW-NEXT: ┠─ a = R0 -# NO-RAW-NEXT: c: add r0, r0, #1 ┃ -# NO-RAW-NEXT: 10: bx lr ┻ +# NO-RAW-NEXT: ┠─ a = R0 +# NO-RAW-NEXT: c: add r0, r0, #1 ┃ +# NO-RAW-NEXT: 10: bx lr ┻ # LINE-NUMS: 00000000 : # LINE-NUMS-NEXT: ; foo(): -# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:2 ┠─ a = R0 -# LINE-NUMS-NEXT: ┃ ┠─ b = R1 -# LINE-NUMS-NEXT: ┃ ┃ ┠─ c = R2 -# LINE-NUMS-NEXT: ┃ ┃ ┃ ┌─ x = R0 -# LINE-NUMS-NEXT: 0: add r0, r1, r0 ┻ ┃ ┃ ╈ -# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:3 ┌─ y = R0 -# LINE-NUMS-NEXT: 4: add r0, r0, r2 ╈ ┃ ┃ ┻ -# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:4 ┃ ┃ ┃ -# LINE-NUMS-NEXT: 8: bx lr ┻ ┻ ┻ +# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:2 ┠─ a = R0 +# LINE-NUMS-NEXT: ┃ ┠─ b = R1 +# LINE-NUMS-NEXT: ┃ ┃ ┠─ c = R2 +# LINE-NUMS-NEXT: ┃ ┃ ┃ ┌─ x = R0 +# LINE-NUMS-NEXT: 0: add r0, r1, r0 ┻ ┃ ┃ ╈ +# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:3 ┌─ y = R0 +# LINE-NUMS-NEXT: 4: add r0, r0, r2 ╈ ┃ ┃ ┻ +# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:4 ┃ ┃ ┃ +# LINE-NUMS-NEXT: 8: bx lr ┻ ┻ ┻ # LINE-NUMS-EMPTY: # LINE-NUMS-NEXT: 0000000c : # LINE-NUMS-NEXT: ; bar(): -# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:8 ┠─ a = R0 -# LINE-NUMS-NEXT: c: add r0, r0, #1 ┃ -# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:9 ┃ -# LINE-NUMS-NEXT: 10: bx lr ┻ +# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:8 ┠─ a = R0 +# LINE-NUMS-NEXT: c: add r0, r0, #1 ┃ +# LINE-NUMS-NEXT: ; SRC_COMPDIR{{[\\/]}}debug.c:9 ┃ +# LINE-NUMS-NEXT: 10: bx lr ┻ # SOURCE: 00000000 : -# SOURCE-NEXT: ; int x = a + b; ┠─ a = R0 -# SOURCE-NEXT: ┃ ┠─ b = R1 -# SOURCE-NEXT: ┃ ┃ ┠─ c = R2 -# SOURCE-NEXT: ┃ ┃ ┃ ┌─ x = R0 -# SOURCE-NEXT: 0: add r0, r1, r0 ┻ ┃ ┃ ╈ -# SOURCE-NEXT: ; int y = x + c; ┌─ y = R0 -# SOURCE-NEXT: 4: add r0, r0, r2 ╈ ┃ ┃ ┻ -# SOURCE-NEXT: ; return y; ┃ ┃ ┃ -# SOURCE-NEXT: 8: bx lr ┻ ┻ ┻ +# SOURCE-NEXT: ; int x = a + b; ┠─ a = R0 +# SOURCE-NEXT: ┃ ┠─ b = R1 +# SOURCE-NEXT: ┃ ┃ ┠─ c = R2 +# SOURCE-NEXT: ┃ ┃ ┃ ┌─ x = R0 +# SOURCE-NEXT: 0: add r0, r1, r0 ┻ ┃ ┃ ╈ +# SOURCE-NEXT: ; int y = x + c; ┌─ y = R0 +# SOURCE-NEXT: 4: add r0, r0, r2 ╈ ┃ ┃ ┻ +# SOURCE-NEXT: ; return y; ┃ ┃ ┃ +# SOURCE-NEXT: 8: bx lr ┻ ┻ ┻ # SOURCE-EMPTY: # SOURCE-NEXT: 0000000c : -# SOURCE-NEXT: ; a++; ┠─ a = R0 -# SOURCE-NEXT: c: add r0, r0, #1 ┃ -# SOURCE-NEXT: ; return a; ┃ -# SOURCE-NEXT: 10: bx lr ┻ +# SOURCE-NEXT: ; a++; ┠─ a = R0 +# SOURCE-NEXT: c: add r0, r0, #1 ┃ +# SOURCE-NEXT: ; return a; ┃ +# SOURCE-NEXT: 10: bx lr ┻ # ASCII: 00000000 : -# ASCII-NEXT: |- a = R0 -# ASCII-NEXT: | |- b = R1 -# ASCII-NEXT: | | |- c = R2 -# ASCII-NEXT: | | | /- x = R0 -# ASCII-NEXT: 0: 00 00 81 e0 add r0, r1, r0 v | | ^ -# ASCII-NEXT: /- y = R0 -# ASCII-NEXT: 4: 02 00 80 e0 add r0, r0, r2 ^ | | v -# ASCII-NEXT: 8: 1e ff 2f e1 bx lr v v v +# ASCII-NEXT: |- a = R0 +# ASCII-NEXT: | |- b = R1 +# ASCII-NEXT: | | |- c = R2 +# ASCII-NEXT: | | | /- x = R0 +# ASCII-NEXT: 0: 00 00 81 e0 add r0, r1, r0 v | | ^ +# ASCII-NEXT: /- y = R0 +# ASCII-NEXT: 4: 02 00 80 e0 add r0, r0, r2 ^ | | v +# ASCII-NEXT: 8: 1e ff 2f e1 bx lr v v v # ASCII-EMPTY: # ASCII-NEXT: 0000000c : -# ASCII-NEXT: |- a = R0 -# ASCII-NEXT: c: 01 00 80 e2 add r0, r0, #1 | -# ASCII-NEXT: 10: 1e ff 2f e1 bx lr v +# ASCII-NEXT: |- a = R0 +# ASCII-NEXT: c: 01 00 80 e2 add r0, r0, #1 | +# ASCII-NEXT: 10: 1e ff 2f e1 bx lr v .text .syntax unified diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-wide-chars.s b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-wide-chars.s index 2573dc63513e2..2cee8593da96d 100644 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-wide-chars.s +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-wide-chars.s @@ -13,9 +13,9 @@ ## characters. # CHECK: 00000000 : -# CHECK-NEXT: ; return *喵; ┠─ 喵 = R0 -# CHECK-NEXT: 0: 00 00 90 e5 ldr r0, [r0] ┻ -# CHECK-NEXT: 4: 1e ff 2f e1 bx lr +# CHECK-NEXT: ; return *喵; ┠─ 喵 = R0 +# CHECK-NEXT: 0: 00 00 90 e5 ldr r0, [r0] ┻ +# CHECK-NEXT: 4: 1e ff 2f e1 bx lr .text .syntax unified diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-print-comments.s b/llvm/test/tools/llvm-objdump/X86/disassemble-print-comments.s index 7e5c29efaf704..8b4a4ca9ef5b9 100644 --- a/llvm/test/tools/llvm-objdump/X86/disassemble-print-comments.s +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-print-comments.s @@ -7,7 +7,63 @@ # CHECK-NEXT: 0: nop # CHECK-NEXT: 1: cmpl $305419896, %eax # imm = 0x12345678 +## Check that comments and locations of variables can be printed together. +# RUN: llvm-objdump -d --debug-vars --no-show-raw-insn %t | \ +# RUN: FileCheck %s --check-prefix=DBGVARS + +# DBGVARS: 0000000000000000 : +# DBGVARS-NEXT: ┠─ bar = RDX +# DBGVARS-NEXT: 0: nop ┃ +# DBGVARS-NEXT: 1: cmpl $305419896, %eax # imm = 0x12345678 ┻ + .text foo: nop cmpl $0x12345678, %eax +.LFooEnd: + + .section .debug_abbrev,"",@progbits + .uleb128 1 # Abbreviation Code + .uleb128 0x11 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .uleb128 2 # Abbreviation Code + .uleb128 0x2e # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .uleb128 0x11 # DW_AT_low_pc + .uleb128 0x01 # DW_FORM_addr + .uleb128 0x12 # DW_AT_high_pc + .uleb128 0x06 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .uleb128 3 # Abbreviation Code + .uleb128 0x34 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .uleb128 0x02 # DW_AT_location + .uleb128 0x18 # DW_FORM_exprloc + .uleb128 0x03 # DW_AT_name + .uleb128 0x08 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits + .long .LCuEnd-.LCuBegin # Length of Unit +.LCuBegin: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size + .uleb128 1 # Abbrev [1] DW_TAG_compile_unit + .uleb128 2 # Abbrev [2] DW_TAG_subprogram + .quad foo # DW_AT_low_pc + .long .LFooEnd-foo # DW_AT_high_pc + .uleb128 3 # Abbrev [3] DW_TAG_variable + .byte .LLocEnd-.LLocBegin # DW_AT_location +.LLocBegin: + .byte 0x51 # DW_OP_reg1 +.LLocEnd: + .asciz "bar" # DW_FORM_string + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.LCuEnd: diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 13e5f68c6d833..0fd388da37713 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -221,7 +221,7 @@ uint32_t objdump::PrefixStrip; DebugVarsFormat objdump::DbgVariables = DVDisabled; -int objdump::DbgIndent = 40; +int objdump::DbgIndent = 52; static StringSet<> DisasmSymbolSet; StringSet<> objdump::FoundSectionSet; From e7fffa6f032b58d2cf04b05c3992c5195c2dfd56 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 28 Jun 2021 14:24:03 +0700 Subject: [PATCH 007/619] [llvm-objdump] Prefix memory operand addresses with '0x' This helps to avoid ambiguity when the address contains only digits 0..9. Differential Revision: https://reviews.llvm.org/D104909 --- lld/test/ELF/symver.s | 4 +- lld/test/ELF/wrap-no-real.s | 12 ++--- lld/test/ELF/x86-64-gotpc-offset.s | 4 +- lld/test/ELF/x86-64-gotpc-relax-nopic.s | 50 +++++++++---------- lld/test/ELF/x86-64-plt.s | 16 +++--- lld/test/ELF/x86-x32-plt.s | 8 +-- lld/test/MachO/dso-handle.s | 8 +-- lld/test/MachO/dylink-ordinal.s | 4 +- lld/test/MachO/static-link.s | 2 +- lld/test/MachO/tapi-link.s | 2 +- lld/test/MachO/tlv-dylib.s | 6 +-- lld/test/MachO/weak-binding.s | 12 ++--- lld/test/MachO/x86-64-reloc-signed.s | 12 ++--- llvm/test/MC/X86/tlsdesc-64.s | 2 +- llvm/test/MC/X86/tlsdesc-x32.s | 2 +- .../llvm-objdump/X86/disassemble-align.s | 4 +- .../llvm-objdump/X86/disassemble-gdtls.s | 2 +- .../elf-disassemble-symbol-references.yaml | 2 +- llvm/tools/llvm-objdump/llvm-objdump.cpp | 2 +- 19 files changed, 77 insertions(+), 77 deletions(-) diff --git a/lld/test/ELF/symver.s b/lld/test/ELF/symver.s index 7111f4264f3d0..81f10f0017fc9 100644 --- a/lld/test/ELF/symver.s +++ b/lld/test/ELF/symver.s @@ -75,9 +75,9 @@ # DIS3-COUNT-3: int3 # DIS3-NEXT: callq 0x1390 # DIS3-LABEL: : -# DIS3-NEXT: jmpq *{{.*}}(%rip) # 34a8 +# DIS3-NEXT: jmpq *{{.*}}(%rip) # 0x34a8 # DIS3-LABEL: : -# DIS3-NEXT: jmpq *{{.*}}(%rip) # 34b0 +# DIS3-NEXT: jmpq *{{.*}}(%rip) # 0x34b0 ## Then, test the interaction with versioned definitions in shared objects. diff --git a/lld/test/ELF/wrap-no-real.s b/lld/test/ELF/wrap-no-real.s index 4bcde9cf24d5f..75df54be61ba5 100644 --- a/lld/test/ELF/wrap-no-real.s +++ b/lld/test/ELF/wrap-no-real.s @@ -9,9 +9,9 @@ // RUN: llvm-readelf -s -x .got %t | FileCheck --check-prefix=READELF --implicit-check-not=__real_ %s // CHECK: <_start>: -// CHECK-NEXT: movq {{.*}}(%rip), %rax # 2021a8 -// CHECK-NEXT: movq {{.*}}(%rip), %rbx # 2021a8 -// CHECK-NEXT: movq {{.*}}(%rip), %rcx # 2021b0 +// CHECK-NEXT: movq {{.*}}(%rip), %rax # 0x2021a8 +// CHECK-NEXT: movq {{.*}}(%rip), %rbx # 0x2021a8 +// CHECK-NEXT: movq {{.*}}(%rip), %rcx # 0x2021b0 // READELF: 0000000000011000 0 NOTYPE GLOBAL DEFAULT ABS foo // READELF: 0000000000011010 0 NOTYPE GLOBAL DEFAULT ABS __wrap_foo @@ -23,9 +23,9 @@ // RUN: llvm-readelf -s -x .got %t2 | FileCheck --check-prefix=READELF --implicit-check-not=__real_ %s // CHECK2: <_start>: -// CHECK2-NEXT: movq {{.*}}(%rip), %rax # 2022e0 -// CHECK2-NEXT: movq {{.*}}(%rip), %rbx # 2022e0 -// CHECK2-NEXT: movq {{.*}}(%rip), %rcx # 2022e8 +// CHECK2-NEXT: movq {{.*}}(%rip), %rax # 0x2022e0 +// CHECK2-NEXT: movq {{.*}}(%rip), %rbx # 0x2022e0 +// CHECK2-NEXT: movq {{.*}}(%rip), %rcx # 0x2022e8 .global _start _start: diff --git a/lld/test/ELF/x86-64-gotpc-offset.s b/lld/test/ELF/x86-64-gotpc-offset.s index 60b007608b96e..2e6ace5ea87f4 100644 --- a/lld/test/ELF/x86-64-gotpc-offset.s +++ b/lld/test/ELF/x86-64-gotpc-offset.s @@ -7,8 +7,8 @@ # CHECK-NEXT: {{^}} [[#%x,ADDR:]] {{.*}} 00000000 # CHECK: leal {{.*}}(%rip), %eax # {{.*}} -# CHECK-NEXT: movl {{.*}}(%rip), %eax # [[#ADDR+4]] -# CHECK-NEXT: movq {{.*}}(%rip), %rax # [[#ADDR+1]] +# CHECK-NEXT: movl {{.*}}(%rip), %eax # 0x[[#ADDR+4]] +# CHECK-NEXT: movq {{.*}}(%rip), %rax # 0x[[#ADDR+1]] ## movl foo@GOTPCREL(%rip), %eax movl 0(%rip), %eax diff --git a/lld/test/ELF/x86-64-gotpc-relax-nopic.s b/lld/test/ELF/x86-64-gotpc-relax-nopic.s index 81d25f9ecafb6..cd12be86c7406 100644 --- a/lld/test/ELF/x86-64-gotpc-relax-nopic.s +++ b/lld/test/ELF/x86-64-gotpc-relax-nopic.s @@ -16,13 +16,13 @@ # DISASM: Disassembly of section .text: # DISASM-EMPTY: # DISASM-NEXT: <_start>: -# DISASM-NEXT: 2011c8: adcl {{.*}}(%rip), %eax # 202240 -# DISASM-NEXT: addl {{.*}}(%rip), %ebx # 202240 -# DISASM-NEXT: andl {{.*}}(%rip), %ecx # 202240 -# DISASM-NEXT: cmpl {{.*}}(%rip), %edx # 202240 -# DISASM-NEXT: orl {{.*}}(%rip), %edi # 202240 -# DISASM-NEXT: sbbl {{.*}}(%rip), %esi # 202240 -# DISASM-NEXT: subl {{.*}}(%rip), %ebp # 202240 +# DISASM-NEXT: 2011c8: adcl {{.*}}(%rip), %eax # 0x202240 +# DISASM-NEXT: addl {{.*}}(%rip), %ebx # 0x202240 +# DISASM-NEXT: andl {{.*}}(%rip), %ecx # 0x202240 +# DISASM-NEXT: cmpl {{.*}}(%rip), %edx # 0x202240 +# DISASM-NEXT: orl {{.*}}(%rip), %edi # 0x202240 +# DISASM-NEXT: sbbl {{.*}}(%rip), %esi # 0x202240 +# DISASM-NEXT: subl {{.*}}(%rip), %ebp # 0x202240 # DISASM-NEXT: xorl $0x203248, %r8d # DISASM-NEXT: testl $0x203248, %r15d # DISASM-NEXT: 201200: adcq $0x203248, %rax @@ -65,24 +65,24 @@ # DISASM-PIC: Disassembly of section .text: # DISASM-PIC-EMPTY: # DISASM-PIC-NEXT: <_start>: -# DISASM-PIC-NEXT: 1268: adcl {{.*}}(%rip), %eax # 2380 -# DISASM-PIC-NEXT: addl {{.*}}(%rip), %ebx # 2380 -# DISASM-PIC-NEXT: andl {{.*}}(%rip), %ecx # 2380 -# DISASM-PIC-NEXT: cmpl {{.*}}(%rip), %edx # 2380 -# DISASM-PIC-NEXT: orl {{.*}}(%rip), %edi # 2380 -# DISASM-PIC-NEXT: sbbl {{.*}}(%rip), %esi # 2380 -# DISASM-PIC-NEXT: subl {{.*}}(%rip), %ebp # 2380 -# DISASM-PIC-NEXT: xorl {{.*}}(%rip), %r8d # 2380 -# DISASM-PIC-NEXT: testl %r15d, {{.*}}(%rip) # 2380 -# DISASM-PIC-NEXT: 12a0: adcq {{.*}}(%rip), %rax # 2380 -# DISASM-PIC-NEXT: addq {{.*}}(%rip), %rbx # 2380 -# DISASM-PIC-NEXT: andq {{.*}}(%rip), %rcx # 2380 -# DISASM-PIC-NEXT: cmpq {{.*}}(%rip), %rdx # 2380 -# DISASM-PIC-NEXT: orq {{.*}}(%rip), %rdi # 2380 -# DISASM-PIC-NEXT: sbbq {{.*}}(%rip), %rsi # 2380 -# DISASM-PIC-NEXT: subq {{.*}}(%rip), %rbp # 2380 -# DISASM-PIC-NEXT: xorq {{.*}}(%rip), %r8 # 2380 -# DISASM-PIC-NEXT: testq %r15, {{.*}}(%rip) # 2380 +# DISASM-PIC-NEXT: 1268: adcl {{.*}}(%rip), %eax # 0x2380 +# DISASM-PIC-NEXT: addl {{.*}}(%rip), %ebx # 0x2380 +# DISASM-PIC-NEXT: andl {{.*}}(%rip), %ecx # 0x2380 +# DISASM-PIC-NEXT: cmpl {{.*}}(%rip), %edx # 0x2380 +# DISASM-PIC-NEXT: orl {{.*}}(%rip), %edi # 0x2380 +# DISASM-PIC-NEXT: sbbl {{.*}}(%rip), %esi # 0x2380 +# DISASM-PIC-NEXT: subl {{.*}}(%rip), %ebp # 0x2380 +# DISASM-PIC-NEXT: xorl {{.*}}(%rip), %r8d # 0x2380 +# DISASM-PIC-NEXT: testl %r15d, {{.*}}(%rip) # 0x2380 +# DISASM-PIC-NEXT: 12a0: adcq {{.*}}(%rip), %rax # 0x2380 +# DISASM-PIC-NEXT: addq {{.*}}(%rip), %rbx # 0x2380 +# DISASM-PIC-NEXT: andq {{.*}}(%rip), %rcx # 0x2380 +# DISASM-PIC-NEXT: cmpq {{.*}}(%rip), %rdx # 0x2380 +# DISASM-PIC-NEXT: orq {{.*}}(%rip), %rdi # 0x2380 +# DISASM-PIC-NEXT: sbbq {{.*}}(%rip), %rsi # 0x2380 +# DISASM-PIC-NEXT: subq {{.*}}(%rip), %rbp # 0x2380 +# DISASM-PIC-NEXT: xorq {{.*}}(%rip), %r8 # 0x2380 +# DISASM-PIC-NEXT: testq %r15, {{.*}}(%rip) # 0x2380 .data .type bar, @object diff --git a/lld/test/ELF/x86-64-plt.s b/lld/test/ELF/x86-64-plt.s index 53a082ed1b5ec..9309f5e0320b8 100644 --- a/lld/test/ELF/x86-64-plt.s +++ b/lld/test/ELF/x86-64-plt.s @@ -33,17 +33,17 @@ # DISASM1: Disassembly of section .plt: # DISASM1-EMPTY: # DISASM1-NEXT: <.plt>: -# DISASM1-NEXT: 2012e0: pushq 8450(%rip) # 2033e8 -# DISASM1-NEXT: jmpq *8452(%rip) # 2033f0 +# DISASM1-NEXT: 2012e0: pushq 8450(%rip) # 0x2033e8 +# DISASM1-NEXT: jmpq *8452(%rip) # 0x2033f0 # DISASM1-NEXT: nopl (%rax) # DISASM1-EMPTY: # DISASM1-NEXT: : -# DISASM1-NEXT: 2012f0: jmpq *8450(%rip) # 2033f8 +# DISASM1-NEXT: 2012f0: jmpq *8450(%rip) # 0x2033f8 # DISASM1-NEXT: pushq $0 # DISASM1-NEXT: jmp 0x2012e0 <.plt> # DISASM1-EMPTY: # DISASM1-NEXT: : -# DISASM1-NEXT: 201300: jmpq *8442(%rip) # 203400 +# DISASM1-NEXT: 201300: jmpq *8442(%rip) # 0x203400 # DISASM1-NEXT: pushq $1 # DISASM1-NEXT: jmp 0x2012e0 <.plt> # DISASM1-NOT: {{.}} @@ -51,17 +51,17 @@ # DISASM2: Disassembly of section .plt: # DISASM2-EMPTY: # DISASM2-NEXT: <.plt>: -# DISASM2-NEXT: 1310: pushq 8434(%rip) # 3408 -# DISASM2-NEXT: jmpq *8436(%rip) # 3410 +# DISASM2-NEXT: 1310: pushq 8434(%rip) # 0x3408 +# DISASM2-NEXT: jmpq *8436(%rip) # 0x3410 # DISASM2-NEXT: nopl (%rax) # DISASM2-EMPTY: # DISASM2-NEXT: : -# DISASM2-NEXT: 1320: jmpq *8434(%rip) # 3418 +# DISASM2-NEXT: 1320: jmpq *8434(%rip) # 0x3418 # DISASM2-NEXT: pushq $0 # DISASM2-NEXT: jmp 0x1310 <.plt> # DISASM2-EMPTY: # DISASM2-NEXT: : -# DISASM2-NEXT: 1330: jmpq *8426(%rip) # 3420 +# DISASM2-NEXT: 1330: jmpq *8426(%rip) # 0x3420 # DISASM2-NEXT: pushq $1 # DISASM2-NEXT: jmp 0x1310 <.plt> # DISASM2-NOT: {{.}} diff --git a/lld/test/ELF/x86-x32-plt.s b/lld/test/ELF/x86-x32-plt.s index 145c15dfe164e..8c6569fffa41c 100644 --- a/lld/test/ELF/x86-x32-plt.s +++ b/lld/test/ELF/x86-x32-plt.s @@ -23,17 +23,17 @@ # DISASM: Disassembly of section .plt: # DISASM-EMPTY: # DISASM-NEXT: <.plt>: -# DISASM-NEXT: 2011e0: pushq 8346(%rip) # 203280 -# DISASM-NEXT: jmpq *8348(%rip) # 203288 +# DISASM-NEXT: 2011e0: pushq 8346(%rip) # 0x203280 +# DISASM-NEXT: jmpq *8348(%rip) # 0x203288 # DISASM-NEXT: nopl (%rax) # DISASM-EMPTY: # DISASM-NEXT: : -# DISASM-NEXT: 2011f0: jmpq *8346(%rip) # 203290 +# DISASM-NEXT: 2011f0: jmpq *8346(%rip) # 0x203290 # DISASM-NEXT: pushq $0 # DISASM-NEXT: jmp 0x2011e0 <.plt> # DISASM-EMPTY: # DISASM-NEXT: : -# DISASM-NEXT: 201200: jmpq *8338(%rip) # 203298 +# DISASM-NEXT: 201200: jmpq *8338(%rip) # 0x203298 # DISASM-NEXT: pushq $1 # DISASM-NEXT: jmp 0x2011e0 <.plt> # DISASM-NOT: {{.}} diff --git a/lld/test/MachO/dso-handle.s b/lld/test/MachO/dso-handle.s index 16fc535cf8b07..2104d1dcf75ae 100644 --- a/lld/test/MachO/dso-handle.s +++ b/lld/test/MachO/dso-handle.s @@ -3,13 +3,13 @@ # RUN: %lld -lSystem %t.o -o %t # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s -# CHECK: leaq {{.*}} # 100000000 -# CHECK-NEXT: leaq {{.*}} # 100000000 +# CHECK: leaq {{.*}} # 0x100000000 +# CHECK-NEXT: leaq {{.*}} # 0x100000000 # RUN: %lld -dylib %t.o -o %t.dylib # RUN: llvm-objdump -d --no-show-raw-insn --rebase --section-headers %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK -# DYLIB-CHECK: leaq {{.*}} # 0 -# DYLIB-CHECK-NEXT: leaq {{.*}} # 0 +# DYLIB-CHECK: leaq {{.*}} # 0x0 +# DYLIB-CHECK-NEXT: leaq {{.*}} # 0x0 # DYLIB-LABEL: Sections: # DYLIB: __data 00000008 [[#%x,DATA:]] DATA diff --git a/lld/test/MachO/dylink-ordinal.s b/lld/test/MachO/dylink-ordinal.s index 26a988f312669..9dcdaccf7cb9f 100644 --- a/lld/test/MachO/dylink-ordinal.s +++ b/lld/test/MachO/dylink-ordinal.s @@ -10,8 +10,8 @@ # CHECK: callq 0x[[#%x,FOO_OFF:]] # CHECK-NEXT: callq 0x[[#%x,BAR_OFF:]] -# CHECK: [[#%x,BAR_OFF]]: jmpq {{.*}} # [[#%x,BAR_BIND:]] -# CHECK: [[#%x,FOO_OFF]]: jmpq {{.*}} # [[#%x,FOO_BIND:]] +# CHECK: [[#%x,BAR_OFF]]: jmpq {{.*}} # 0x[[#%x,BAR_BIND:]] +# CHECK: [[#%x,FOO_OFF]]: jmpq {{.*}} # 0x[[#%x,FOO_BIND:]] # CHECK-LABEL: Lazy bind table: # CHECK-DAG: __DATA __la_symbol_ptr 0x[[#%x,FOO_BIND]] Foo _foo diff --git a/lld/test/MachO/static-link.s b/lld/test/MachO/static-link.s index 1306aa60a8b02..b3e197cda80df 100644 --- a/lld/test/MachO/static-link.s +++ b/lld/test/MachO/static-link.s @@ -15,7 +15,7 @@ # CHECK: Disassembly of section __TEXT,__text # CHECK-LABEL: <_main>: -# CHECK: leaq {{.*}}(%rip), %rsi # [[ADDR]] <_goodbye_world> +# CHECK: leaq {{.*}}(%rip), %rsi # 0x[[ADDR]] <_goodbye_world> .section __TEXT,__text .global _main diff --git a/lld/test/MachO/tapi-link.s b/lld/test/MachO/tapi-link.s index 786aca68c8905..8e073b844f7df 100644 --- a/lld/test/MachO/tapi-link.s +++ b/lld/test/MachO/tapi-link.s @@ -14,7 +14,7 @@ # RUN: llvm-objdump --bind --no-show-raw-insn -d -r %t/with-reexport | FileCheck %s # CHECK: Disassembly of section __TEXT,__text: -# CHECK: movq {{.*}} # [[ADDR:[0-9a-f]+]] +# CHECK: movq {{.*}} # 0x[[ADDR:[0-9a-f]+]] # CHECK: Bind table: # CHECK-DAG: __DATA_CONST __got 0x[[ADDR]] pointer 0 libSystem ___nan diff --git a/lld/test/MachO/tlv-dylib.s b/lld/test/MachO/tlv-dylib.s index c41f152eb16fa..7a083615492f8 100644 --- a/lld/test/MachO/tlv-dylib.s +++ b/lld/test/MachO/tlv-dylib.s @@ -17,9 +17,9 @@ # RUN: %lld -lSystem -L%t -ltlv %t/test.o -o %t/test # RUN: llvm-objdump --bind -d --no-show-raw-insn %t/test | FileCheck %s -# CHECK: movq [[#]](%rip), %rax # [[#%x, FOO:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%x, BAR:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%x, BAZ:]] +# CHECK: movq [[#]](%rip), %rax # 0x[[#%x, FOO:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%x, BAR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%x, BAZ:]] # CHECK-LABEL: Bind table: # CHECK-DAG: __DATA __thread_ptrs 0x{{0*}}[[#%x, FOO]] pointer 0 libtlv _foo diff --git a/lld/test/MachO/weak-binding.s b/lld/test/MachO/weak-binding.s index 2b39b0ed19502..11b7562c69e97 100644 --- a/lld/test/MachO/weak-binding.s +++ b/lld/test/MachO/weak-binding.s @@ -19,12 +19,12 @@ # CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} # CHECK: <_main>: -# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_DY_GOT_ADDR:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_EXT_GOT_ADDR:]] -# CHECK-NEXT: leaq [[#]](%rip), %rax # [[#%X,WEAK_INT_GOT_ADDR:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_TLV_ADDR:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # [[#%X,WEAK_DY_TLV_ADDR:]] -# CHECK-NEXT: leaq [[#]](%rip), %rax # [[#%X,WEAK_INT_TLV_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%X,WEAK_DY_GOT_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%X,WEAK_EXT_GOT_ADDR:]] +# CHECK-NEXT: leaq [[#]](%rip), %rax # 0x[[#%X,WEAK_INT_GOT_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%X,WEAK_TLV_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%X,WEAK_DY_TLV_ADDR:]] +# CHECK-NEXT: leaq [[#]](%rip), %rax # 0x[[#%X,WEAK_INT_TLV_ADDR:]] # CHECK-NEXT: callq 0x{{[0-9a-f]*}} # CHECK-NEXT: callq 0x{{[0-9a-f]*}} # CHECK-NEXT: callq 0x{{[0-9a-f]*}} diff --git a/lld/test/MachO/x86-64-reloc-signed.s b/lld/test/MachO/x86-64-reloc-signed.s index 74c59529b98ad..fd12b9bff7010 100644 --- a/lld/test/MachO/x86-64-reloc-signed.s +++ b/lld/test/MachO/x86-64-reloc-signed.s @@ -12,18 +12,18 @@ # CHECK-LABEL: Disassembly of section # CHECK: <_main>: -# CHECK-NEXT: movl {{.*}} # [[#S]] +# CHECK-NEXT: movl {{.*}} # 0x[[#S]] # CHECK-NEXT: callq {{.*}} -# CHECK-NEXT: movl {{.*}} # [[#S + 2]] +# CHECK-NEXT: movl {{.*}} # 0x[[#S + 2]] # CHECK-NEXT: callq {{.*}} -# CHECK-NEXT: movb {{.*}} # [[#S]] +# CHECK-NEXT: movb {{.*}} # 0x[[#S]] # CHECK-NEXT: callq {{.*}} # CHECK: <__not_text>: -# CHECK-NEXT: movl {{.*}} # [[#FOO + 8]] +# CHECK-NEXT: movl {{.*}} # 0x[[#FOO + 8]] # CHECK-NEXT: callq {{.*}} -# CHECK-NEXT: movl {{.*}} # [[#FOO + 8 + 2]] +# CHECK-NEXT: movl {{.*}} # 0x[[#FOO + 8 + 2]] # CHECK-NEXT: callq {{.*}} -# CHECK-NEXT: movb {{.*}} # [[#FOO + 8]] +# CHECK-NEXT: movb {{.*}} # 0x[[#FOO + 8]] # CHECK-NEXT: callq {{.*}} .section __TEXT,__text diff --git a/llvm/test/MC/X86/tlsdesc-64.s b/llvm/test/MC/X86/tlsdesc-64.s index eaec13785b3a5..ebe1710c3e869 100644 --- a/llvm/test/MC/X86/tlsdesc-64.s +++ b/llvm/test/MC/X86/tlsdesc-64.s @@ -9,7 +9,7 @@ # SYM: TLS GLOBAL DEFAULT UND a -# CHECK: 0: leaq (%rip), %rax # 7 <{{.*}}> +# CHECK: 0: leaq (%rip), %rax # 0x7 <{{.*}}> # CHECK-NEXT: 0000000000000003: R_X86_64_GOTPC32_TLSDESC a-0x4 # CHECK-NEXT: 7: callq *(%rax) # CHECK-NEXT: 0000000000000007: R_X86_64_TLSDESC_CALL a diff --git a/llvm/test/MC/X86/tlsdesc-x32.s b/llvm/test/MC/X86/tlsdesc-x32.s index a9884fb5e2ee2..836a3c5527f85 100644 --- a/llvm/test/MC/X86/tlsdesc-x32.s +++ b/llvm/test/MC/X86/tlsdesc-x32.s @@ -9,7 +9,7 @@ # SYM: TLS GLOBAL DEFAULT UND a -# CHECK: 0: 40 8d 05 00 00 00 00 leal (%rip), %eax # 7 <{{.*}}> +# CHECK: 0: 40 8d 05 00 00 00 00 leal (%rip), %eax # 0x7 <{{.*}}> # CHECK-NEXT: 00000003: R_X86_64_GOTPC32_TLSDESC a-0x4 # CHECK-NEXT: 7: 67 ff 10 callq *(%eax) # CHECK-NEXT: 00000007: R_X86_64_TLSDESC_CALL a diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-align.s b/llvm/test/tools/llvm-objdump/X86/disassemble-align.s index 17809d88bfbf4..d3ccc59e64ecc 100644 --- a/llvm/test/tools/llvm-objdump/X86/disassemble-align.s +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-align.s @@ -10,14 +10,14 @@ # Instructions are expected to be aligned if the instruction in hex is not too long. # CHECK: 0: c3 |retq -# CHECK-NEXT: 1: 48 8b 05 56 34 12 00 |movq|0x123456(%rip), %rax # 12345e <.text+0x12345e> +# CHECK-NEXT: 1: 48 8b 05 56 34 12 00 |movq|0x123456(%rip), %rax # 0x12345e <.text+0x12345e> # CHECK-NEXT: 8: 48 b8 54 55 55 55 55 55 55 55|movabsq|$0x5555555555555554, %rax # imm = 0x5555555555555554 # CHECK-NEXT: 12: 8f ea 00 12 4c 02 40 00 00 00 00 |lwpval|$0x0, 0x40(%rdx,%rax), %r15d # CHECK-NEXT: 1d: 8f ea 00 12 04 25 f0 1c f0 1c 00 00 00 00 |lwpins|$0x0, 0x1cf01cf0, %r15d # CHECK-NEXT: 2b: ff ff | # NORAW: 0: |retq -# NORAW-NEXT: 1: |movq|0x123456(%rip), %rax # 12345e <.text+0x12345e> +# NORAW-NEXT: 1: |movq|0x123456(%rip), %rax # 0x12345e <.text+0x12345e> # NORAW-NEXT: 8: |movabsq|$0x5555555555555554, %rax # imm = 0x5555555555555554 # NORAW-NEXT: 12: |lwpval|$0x0, 0x40(%rdx,%rax), %r15d # NORAW-NEXT: 1d: |lwpins|$0x0, 0x1cf01cf0, %r15d diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s index e913f5f6a3454..3012b0de69d24 100644 --- a/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s @@ -2,7 +2,7 @@ # CHECK: : # TODO: Should display data16 prefixes. -# CHECK-NEXT: 0: 66 48 8d 3d 00 00 00 00 leaq (%rip), %rdi # 8 +# CHECK-NEXT: 0: 66 48 8d 3d 00 00 00 00 leaq (%rip), %rdi # 0x8 # CHECK-NEXT: 8: 66 66 48 e8 00 00 00 00 callq 0x10 # CHECK-EMPTY: diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml index a7ec7a2997e5f..b0845fcb6b587 100644 --- a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml +++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml @@ -7,7 +7,7 @@ # EXEC-LABEL: : # EXEC-NEXT: 4005: e8 12 34 56 78 callq 0x7856741c # EXEC-LABEL: : -# EXEC-NEXT: 400a: 8b 05 f0 0f 00 00 movl 4080(%rip), %eax # 5000 +# EXEC-NEXT: 400a: 8b 05 f0 0f 00 00 movl 4080(%rip), %eax # 0x5000 --- !ELF FileHeader: diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 0fd388da37713..c75d0e482dbc3 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1444,7 +1444,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, PrintTarget = true; // Do not print real address when symbolizing. if (!SymbolizeOperands) - FOS << " # " << Twine::utohexstr(Target); + FOS << " # 0x" << Twine::utohexstr(Target); } if (PrintTarget) { // In a relocatable object, the target's section must reside in From d25e572421a66270c0ee8d51c96256f2958a6f1d Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 28 Jun 2021 14:24:39 +0700 Subject: [PATCH 008/619] [llvm-objdump] Print memory operand addresses as regular comments The patch reuses the common code to print memory operand addresses as instruction comments. This helps to align the comments and enables using target-specific comment markers when `evaluateMemoryOperandAddress()` is implemented for them. Differential Revision: https://reviews.llvm.org/D104861 --- lld/test/MachO/dso-handle.s | 8 +++--- lld/test/MachO/dylink-ordinal.s | 4 +-- lld/test/MachO/mattrs.ll | 3 ++- lld/test/MachO/static-link.s | 2 +- lld/test/MachO/tapi-link.s | 2 +- lld/test/MachO/tlv-dylib.s | 6 ++--- lld/test/MachO/tlv.s | 8 +++--- lld/test/MachO/weak-binding.s | 12 ++++----- lld/test/MachO/x86-64-reloc-got-load.s | 2 +- lld/test/MachO/x86-64-reloc-signed.s | 14 ++++++----- .../llvm-objdump/X86/disassemble-align.s | 4 +-- llvm/tools/llvm-objdump/llvm-objdump.cpp | 25 +++++++++++++------ 12 files changed, 51 insertions(+), 39 deletions(-) diff --git a/lld/test/MachO/dso-handle.s b/lld/test/MachO/dso-handle.s index 2104d1dcf75ae..260a0495989cc 100644 --- a/lld/test/MachO/dso-handle.s +++ b/lld/test/MachO/dso-handle.s @@ -3,13 +3,13 @@ # RUN: %lld -lSystem %t.o -o %t # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s -# CHECK: leaq {{.*}} # 0x100000000 -# CHECK-NEXT: leaq {{.*}} # 0x100000000 +# CHECK: leaq {{.*}} ## 0x100000000 +# CHECK-NEXT: leaq {{.*}} ## 0x100000000 # RUN: %lld -dylib %t.o -o %t.dylib # RUN: llvm-objdump -d --no-show-raw-insn --rebase --section-headers %t.dylib | FileCheck %s --check-prefix=DYLIB-CHECK -# DYLIB-CHECK: leaq {{.*}} # 0x0 -# DYLIB-CHECK-NEXT: leaq {{.*}} # 0x0 +# DYLIB-CHECK: leaq {{.*}} ## 0x0 +# DYLIB-CHECK-NEXT: leaq {{.*}} ## 0x0 # DYLIB-LABEL: Sections: # DYLIB: __data 00000008 [[#%x,DATA:]] DATA diff --git a/lld/test/MachO/dylink-ordinal.s b/lld/test/MachO/dylink-ordinal.s index 9dcdaccf7cb9f..7edb218f7321c 100644 --- a/lld/test/MachO/dylink-ordinal.s +++ b/lld/test/MachO/dylink-ordinal.s @@ -10,8 +10,8 @@ # CHECK: callq 0x[[#%x,FOO_OFF:]] # CHECK-NEXT: callq 0x[[#%x,BAR_OFF:]] -# CHECK: [[#%x,BAR_OFF]]: jmpq {{.*}} # 0x[[#%x,BAR_BIND:]] -# CHECK: [[#%x,FOO_OFF]]: jmpq {{.*}} # 0x[[#%x,FOO_BIND:]] +# CHECK: [[#%x,BAR_OFF]]: jmpq {{.*}} ## 0x[[#%x,BAR_BIND:]] +# CHECK: [[#%x,FOO_OFF]]: jmpq {{.*}} ## 0x[[#%x,FOO_BIND:]] # CHECK-LABEL: Lazy bind table: # CHECK-DAG: __DATA __la_symbol_ptr 0x[[#%x,FOO_BIND]] Foo _foo diff --git a/lld/test/MachO/mattrs.ll b/lld/test/MachO/mattrs.ll index 25ed6f0cf73a2..fad0b5e3bc61d 100644 --- a/lld/test/MachO/mattrs.ll +++ b/lld/test/MachO/mattrs.ll @@ -18,7 +18,8 @@ ; NO-FMA: <_foo>: ; NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 ; NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; NO-FMA-NEXT: vmovss [[#]](%rip), %xmm2 +; NO-FMA-NEXT: vmovss [[#]](%rip), %xmm2 ## xmm2 = +; NO-FMA-NEXT: ## 0x ; NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 ; NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 diff --git a/lld/test/MachO/static-link.s b/lld/test/MachO/static-link.s index b3e197cda80df..c3760f13af953 100644 --- a/lld/test/MachO/static-link.s +++ b/lld/test/MachO/static-link.s @@ -15,7 +15,7 @@ # CHECK: Disassembly of section __TEXT,__text # CHECK-LABEL: <_main>: -# CHECK: leaq {{.*}}(%rip), %rsi # 0x[[ADDR]] <_goodbye_world> +# CHECK: leaq {{.*}}(%rip), %rsi ## 0x[[ADDR]] <_goodbye_world> .section __TEXT,__text .global _main diff --git a/lld/test/MachO/tapi-link.s b/lld/test/MachO/tapi-link.s index 8e073b844f7df..e46871a5b271d 100644 --- a/lld/test/MachO/tapi-link.s +++ b/lld/test/MachO/tapi-link.s @@ -14,7 +14,7 @@ # RUN: llvm-objdump --bind --no-show-raw-insn -d -r %t/with-reexport | FileCheck %s # CHECK: Disassembly of section __TEXT,__text: -# CHECK: movq {{.*}} # 0x[[ADDR:[0-9a-f]+]] +# CHECK: movq {{.*}} ## 0x[[ADDR:[0-9a-f]+]] # CHECK: Bind table: # CHECK-DAG: __DATA_CONST __got 0x[[ADDR]] pointer 0 libSystem ___nan diff --git a/lld/test/MachO/tlv-dylib.s b/lld/test/MachO/tlv-dylib.s index 7a083615492f8..dc6bdef1433e1 100644 --- a/lld/test/MachO/tlv-dylib.s +++ b/lld/test/MachO/tlv-dylib.s @@ -17,9 +17,9 @@ # RUN: %lld -lSystem -L%t -ltlv %t/test.o -o %t/test # RUN: llvm-objdump --bind -d --no-show-raw-insn %t/test | FileCheck %s -# CHECK: movq [[#]](%rip), %rax # 0x[[#%x, FOO:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%x, BAR:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%x, BAZ:]] +# CHECK: movq [[#]](%rip), %rax ## 0x[[#%x, FOO:]] +# CHECK-NEXT: movq [[#]](%rip), %rax ## 0x[[#%x, BAR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax ## 0x[[#%x, BAZ:]] # CHECK-LABEL: Bind table: # CHECK-DAG: __DATA __thread_ptrs 0x{{0*}}[[#%x, FOO]] pointer 0 libtlv _foo diff --git a/lld/test/MachO/tlv.s b/lld/test/MachO/tlv.s index a7e032b01a4a2..9c74ea082ba7a 100644 --- a/lld/test/MachO/tlv.s +++ b/lld/test/MachO/tlv.s @@ -34,13 +34,13 @@ # HEADER: MH_HAS_TLV_DESCRIPTORS # REG: <_main>: -# REG-NEXT: leaq {{.*}}(%rip), %rax # {{.*}} <_foo> -# REG-NEXT: leaq {{.*}}(%rip), %rax # {{.*}} <_bar> +# REG-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_foo> +# REG-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_bar> # REG-NEXT: retq # TBSS: <_f>: -# TBSS-NEXT: leaq {{.*}}(%rip), %rax # {{.*}} <_baz> -# TBSS-NEXT: leaq {{.*}}(%rip), %rax # {{.*}} <_qux> +# TBSS-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_baz> +# TBSS-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_qux> # TBSS-NEXT: retq # REG-TLVP: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 diff --git a/lld/test/MachO/weak-binding.s b/lld/test/MachO/weak-binding.s index 11b7562c69e97..0c79f981137a2 100644 --- a/lld/test/MachO/weak-binding.s +++ b/lld/test/MachO/weak-binding.s @@ -19,12 +19,12 @@ # CHECK-NEXT: {{[0-9a-f]+}} {{[0-9a-f ]*[1-9a-f]+[0-9a-f ]*}} # CHECK: <_main>: -# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%X,WEAK_DY_GOT_ADDR:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%X,WEAK_EXT_GOT_ADDR:]] -# CHECK-NEXT: leaq [[#]](%rip), %rax # 0x[[#%X,WEAK_INT_GOT_ADDR:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%X,WEAK_TLV_ADDR:]] -# CHECK-NEXT: movq [[#]](%rip), %rax # 0x[[#%X,WEAK_DY_TLV_ADDR:]] -# CHECK-NEXT: leaq [[#]](%rip), %rax # 0x[[#%X,WEAK_INT_TLV_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax ## 0x[[#%X,WEAK_DY_GOT_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax ## 0x[[#%X,WEAK_EXT_GOT_ADDR:]] +# CHECK-NEXT: leaq [[#]](%rip), %rax ## 0x[[#%X,WEAK_INT_GOT_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax ## 0x[[#%X,WEAK_TLV_ADDR:]] +# CHECK-NEXT: movq [[#]](%rip), %rax ## 0x[[#%X,WEAK_DY_TLV_ADDR:]] +# CHECK-NEXT: leaq [[#]](%rip), %rax ## 0x[[#%X,WEAK_INT_TLV_ADDR:]] # CHECK-NEXT: callq 0x{{[0-9a-f]*}} # CHECK-NEXT: callq 0x{{[0-9a-f]*}} # CHECK-NEXT: callq 0x{{[0-9a-f]*}} diff --git a/lld/test/MachO/x86-64-reloc-got-load.s b/lld/test/MachO/x86-64-reloc-got-load.s index 9bc65ecefa4df..b5443126866ee 100644 --- a/lld/test/MachO/x86-64-reloc-got-load.s +++ b/lld/test/MachO/x86-64-reloc-got-load.s @@ -6,7 +6,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: %lld -o %t %t.o # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s -# CHECK: leaq [[#]](%rip), %rax # {{.*}} <_foo> +# CHECK: leaq [[#]](%rip), %rax ## {{.*}} <_foo> .globl _main, _foo diff --git a/lld/test/MachO/x86-64-reloc-signed.s b/lld/test/MachO/x86-64-reloc-signed.s index fd12b9bff7010..00643c838cde5 100644 --- a/lld/test/MachO/x86-64-reloc-signed.s +++ b/lld/test/MachO/x86-64-reloc-signed.s @@ -12,18 +12,20 @@ # CHECK-LABEL: Disassembly of section # CHECK: <_main>: -# CHECK-NEXT: movl {{.*}} # 0x[[#S]] +# CHECK-NEXT: movl {{.*}} ## imm = +# CHECK-NEXT: ## 0x[[#S]] # CHECK-NEXT: callq {{.*}} -# CHECK-NEXT: movl {{.*}} # 0x[[#S + 2]] +# CHECK-NEXT: movl {{.*}} ## 0x[[#S + 2]] # CHECK-NEXT: callq {{.*}} -# CHECK-NEXT: movb {{.*}} # 0x[[#S]] +# CHECK-NEXT: movb {{.*}} ## 0x[[#S]] # CHECK-NEXT: callq {{.*}} # CHECK: <__not_text>: -# CHECK-NEXT: movl {{.*}} # 0x[[#FOO + 8]] +# CHECK-NEXT: movl {{.*}} ## imm = +# CHECK-NEXT: ## 0x[[#FOO + 8]] # CHECK-NEXT: callq {{.*}} -# CHECK-NEXT: movl {{.*}} # 0x[[#FOO + 8 + 2]] +# CHECK-NEXT: movl {{.*}} ## 0x[[#FOO + 8 + 2]] # CHECK-NEXT: callq {{.*}} -# CHECK-NEXT: movb {{.*}} # 0x[[#FOO + 8]] +# CHECK-NEXT: movb {{.*}} ## 0x[[#FOO + 8]] # CHECK-NEXT: callq {{.*}} .section __TEXT,__text diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-align.s b/llvm/test/tools/llvm-objdump/X86/disassemble-align.s index d3ccc59e64ecc..68565fe9732e4 100644 --- a/llvm/test/tools/llvm-objdump/X86/disassemble-align.s +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-align.s @@ -10,14 +10,14 @@ # Instructions are expected to be aligned if the instruction in hex is not too long. # CHECK: 0: c3 |retq -# CHECK-NEXT: 1: 48 8b 05 56 34 12 00 |movq|0x123456(%rip), %rax # 0x12345e <.text+0x12345e> +# CHECK-NEXT: 1: 48 8b 05 56 34 12 00 |movq|0x123456(%rip), %rax # 0x12345e <.text+0x12345e> # CHECK-NEXT: 8: 48 b8 54 55 55 55 55 55 55 55|movabsq|$0x5555555555555554, %rax # imm = 0x5555555555555554 # CHECK-NEXT: 12: 8f ea 00 12 4c 02 40 00 00 00 00 |lwpval|$0x0, 0x40(%rdx,%rax), %r15d # CHECK-NEXT: 1d: 8f ea 00 12 04 25 f0 1c f0 1c 00 00 00 00 |lwpins|$0x0, 0x1cf01cf0, %r15d # CHECK-NEXT: 2b: ff ff | # NORAW: 0: |retq -# NORAW-NEXT: 1: |movq|0x123456(%rip), %rax # 0x12345e <.text+0x12345e> +# NORAW-NEXT: 1: |movq|0x123456(%rip), %rax # 0x12345e <.text+0x12345e> # NORAW-NEXT: 8: |movabsq|$0x5555555555555554, %rax # imm = 0x5555555555555554 # NORAW-NEXT: 12: |lwpval|$0x0, 0x40(%rdx,%rax), %r15d # NORAW-NEXT: 1d: |lwpins|$0x0, 0x1cf01cf0, %r15d diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index c75d0e482dbc3..39ce32d892aa2 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1433,6 +1433,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // address (jump target or memory operand address) and print it on the // right of the instruction. if (Disassembled && MIA) { + // Branch targets are printed just after the instructions. + llvm::raw_ostream *TargetOS = &FOS; uint64_t Target; bool PrintTarget = MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target); @@ -1443,8 +1445,11 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, Target = *MaybeTarget; PrintTarget = true; // Do not print real address when symbolizing. - if (!SymbolizeOperands) - FOS << " # 0x" << Twine::utohexstr(Target); + if (!SymbolizeOperands) { + // Memory operand addresses are printed as comments. + TargetOS = &CommentStream; + *TargetOS << "0x" << Twine::utohexstr(Target); + } } if (PrintTarget) { // In a relocatable object, the target's section must reside in @@ -1503,22 +1508,26 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (Demangle) TargetName = demangle(TargetName); - FOS << " <"; + *TargetOS << " <"; if (!Disp) { // Always Print the binary symbol precisely corresponding to // the target address. - FOS << TargetName; + *TargetOS << TargetName; } else if (!LabelAvailable) { // Always Print the binary symbol plus an offset if there's no // local label corresponding to the target address. - FOS << TargetName << "+0x" << Twine::utohexstr(Disp); + *TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp); } else { - FOS << AllLabels[Target]; + *TargetOS << AllLabels[Target]; } - FOS << ">"; + *TargetOS << ">"; } else if (LabelAvailable) { - FOS << " <" << AllLabels[Target] << ">"; + *TargetOS << " <" << AllLabels[Target] << ">"; } + // By convention, each record in the comment stream should be + // terminated. + if (TargetOS == &CommentStream) + *TargetOS << "\n"; } } } From a1c0f09a89690f39683bf22126fe8999e62a6645 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 28 Jun 2021 08:54:03 +0100 Subject: [PATCH 009/619] [ARM] Add an extra fold for f32 extract(vdup(i32)) This adds another small fold for extract of a vdup, between a i32 and a f32, converting to a BITCAST. This allows some extra folding to happen, simplifying the resulting code. Differential Revision: https://reviews.llvm.org/D104857 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 + llvm/test/CodeGen/Thumb2/mve-vst4.ll | 98 ++++++++++++------------- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cfdff80585c8b..0bd4306309f28 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14650,6 +14650,8 @@ static SDValue PerformExtractEltCombine(SDNode *N, return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X); if (VT == MVT::i32 && X.getValueType() == MVT::f16) return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X); + if (VT == MVT::f32 && X.getValueType() == MVT::i32) + return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X); while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST) X = X->getOperand(0); diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll index 7a46f7920b04d..cb933dc41f15a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll @@ -205,37 +205,34 @@ define void @vst4_v4i32_align1(<4 x i32> *%src, <16 x i32> *%dst) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .vsave {d8, d9, d10} +; CHECK-NEXT: vpush {d8, d9, d10} ; CHECK-NEXT: vldrw.u32 q0, [r0, #32] -; CHECK-NEXT: vldrw.u32 q3, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r0, #16] +; CHECK-NEXT: vldrw.u32 q4, [r0] ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: vmov r12, lr, d0 -; CHECK-NEXT: vdup.32 q4, r3 -; CHECK-NEXT: vmov.f64 d0, d6 -; CHECK-NEXT: vmov.f32 s1, s4 -; CHECK-NEXT: vmov.f32 s4, s13 -; CHECK-NEXT: vmov.f64 d4, d7 -; CHECK-NEXT: vmov.f32 s12, s15 -; CHECK-NEXT: vmov.f32 s13, s7 -; CHECK-NEXT: vmov.f32 s14, s18 -; CHECK-NEXT: vmov.f32 s15, s19 +; CHECK-NEXT: vldrw.u32 q0, [r0, #16] +; CHECK-NEXT: vmov.f64 d2, d8 +; CHECK-NEXT: vmov.f32 s5, s0 +; CHECK-NEXT: vmov s10, r2 +; CHECK-NEXT: vmov s14, r3 +; CHECK-NEXT: vmov.f32 s8, s18 +; CHECK-NEXT: vmov s20, lr +; CHECK-NEXT: vmov.f32 s9, s2 +; CHECK-NEXT: vmov s6, r12 +; CHECK-NEXT: vmov.f32 s0, s17 +; CHECK-NEXT: vmov.f32 s12, s19 +; CHECK-NEXT: vmov.f32 s13, s3 +; CHECK-NEXT: vmov.f32 s2, s20 +; CHECK-NEXT: vmov.f32 s15, s14 +; CHECK-NEXT: vmov.f32 s11, s10 ; CHECK-NEXT: vstrb.8 q3, [r1, #48] -; CHECK-NEXT: vmov.f32 s9, s6 -; CHECK-NEXT: vdup.32 q3, r2 -; CHECK-NEXT: vmov.f32 s10, s14 -; CHECK-NEXT: vmov.f32 s11, s15 +; CHECK-NEXT: vmov.f32 s3, s20 ; CHECK-NEXT: vstrb.8 q2, [r1, #32] -; CHECK-NEXT: vdup.32 q2, lr -; CHECK-NEXT: vmov.f32 s6, s10 -; CHECK-NEXT: vmov.f32 s7, s11 -; CHECK-NEXT: vstrb.8 q1, [r1, #16] -; CHECK-NEXT: vdup.32 q1, r12 -; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s3, s7 -; CHECK-NEXT: vstrb.8 q0, [r1] -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vmov.f32 s7, s6 +; CHECK-NEXT: vstrb.8 q0, [r1, #16] +; CHECK-NEXT: vstrb.8 q1, [r1] +; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop {r7, pc} entry: %s1 = getelementptr <4 x i32>, <4 x i32>* %src, i32 0 @@ -975,37 +972,34 @@ define void @vst4_v4f32_align1(<4 x float> *%src, <16 x float> *%dst) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vldrw.u32 q4, [r0] +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} ; CHECK-NEXT: vldrw.u32 q0, [r0, #32] -; CHECK-NEXT: vldrw.u32 q2, [r0, #16] -; CHECK-NEXT: vmov.f64 d2, d8 +; CHECK-NEXT: vldrw.u32 q5, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0, #16] ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: vmov.f32 s5, s8 -; CHECK-NEXT: vdup.32 q5, r3 -; CHECK-NEXT: vmov.f32 s8, s17 -; CHECK-NEXT: vmov.f64 d6, d9 -; CHECK-NEXT: vmov.f32 s16, s19 -; CHECK-NEXT: vmov.f32 s17, s11 -; CHECK-NEXT: vmov.f32 s18, s22 -; CHECK-NEXT: vmov.f32 s19, s23 -; CHECK-NEXT: vstrb.8 q4, [r1, #48] -; CHECK-NEXT: vmov.f32 s13, s10 -; CHECK-NEXT: vdup.32 q4, r2 ; CHECK-NEXT: vmov r12, lr, d0 -; CHECK-NEXT: vmov.f32 s14, s18 +; CHECK-NEXT: vmov.f64 d4, d10 +; CHECK-NEXT: vmov.f32 s9, s4 +; CHECK-NEXT: vmov s14, r2 +; CHECK-NEXT: vmov s18, r3 +; CHECK-NEXT: vmov.f32 s12, s22 +; CHECK-NEXT: vmov s24, lr +; CHECK-NEXT: vmov.f32 s13, s6 +; CHECK-NEXT: vmov.f32 s4, s21 +; CHECK-NEXT: vmov.f32 s16, s23 +; CHECK-NEXT: vmov.f32 s17, s7 +; CHECK-NEXT: vmov s10, r12 +; CHECK-NEXT: vmov.f32 s6, s24 +; CHECK-NEXT: vmov.f32 s19, s18 ; CHECK-NEXT: vmov.f32 s15, s2 +; CHECK-NEXT: vstrb.8 q4, [r1, #48] +; CHECK-NEXT: vmov.f32 s7, s24 ; CHECK-NEXT: vstrb.8 q3, [r1, #32] -; CHECK-NEXT: vdup.32 q3, lr -; CHECK-NEXT: vmov.f32 s10, s14 -; CHECK-NEXT: vmov.f32 s11, s15 -; CHECK-NEXT: vstrb.8 q2, [r1, #16] -; CHECK-NEXT: vdup.32 q2, r12 -; CHECK-NEXT: vmov.f32 s6, s10 -; CHECK-NEXT: vmov.f32 s7, s0 -; CHECK-NEXT: vstrb.8 q1, [r1] -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vmov.f32 s11, s0 +; CHECK-NEXT: vstrb.8 q1, [r1, #16] +; CHECK-NEXT: vstrb.8 q2, [r1] +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} ; CHECK-NEXT: pop {r7, pc} entry: %s1 = getelementptr <4 x float>, <4 x float>* %src, i32 0 From bbf4436a82febeab811af59b20d6928e694b4178 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Mon, 28 Jun 2021 07:30:02 +0000 Subject: [PATCH 010/619] [mlir][linalg] Remove the StructuredOp capture mechanism. After https://reviews.llvm.org/D104109, structured ops support scalar inputs. As a result, the capture mechanism meant to pass non-shaped parameters got redundant. The patch removes the capture semantics after the FillOp migrated to use scalar operands https://reviews.llvm.org/D104121. Differential Revision: https://reviews.llvm.org/D104785 --- mlir/include/mlir-c/Dialect/Linalg.h | 4 +- .../mlir/Dialect/Linalg/IR/LinalgBase.td | 2 +- .../Dialect/Linalg/IR/LinalgInterfaces.td | 2 +- .../Dialect/Linalg/IR/LinalgStructuredOps.td | 18 +++----- mlir/lib/Bindings/Python/DialectLinalg.cpp | 11 ++--- mlir/lib/CAPI/Dialect/Linalg.cpp | 9 +--- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 44 +++++++------------ .../Linalg/Transforms/Generalization.cpp | 3 +- mlir/python/mlir/dialects/_linalg_ops_ext.py | 2 +- .../test-linalg-ods-gen.tc | 10 ++--- .../test-linalg-ods-yaml-gen.yaml | 5 +-- .../mlir-linalg-ods-gen.cpp | 18 ++++---- .../mlir-linalg-ods-yaml-gen.cpp | 9 ++-- 13 files changed, 51 insertions(+), 86 deletions(-) diff --git a/mlir/include/mlir-c/Dialect/Linalg.h b/mlir/include/mlir-c/Dialect/Linalg.h index 6e20eec16481a..27f2f7bc897f7 100644 --- a/mlir/include/mlir-c/Dialect/Linalg.h +++ b/mlir/include/mlir-c/Dialect/Linalg.h @@ -18,11 +18,9 @@ extern "C" { #endif /// Apply the special region builder for the builtin named Linalg op. -/// The list of `capture` MlirValue is passed as-is to the region builder. /// Assert that `op` is a builtin named Linalg op. MLIR_CAPI_EXPORTED void -mlirLinalgFillBuiltinNamedOpRegion(MlirDialect linalgDialect, MlirOperation op, - intptr_t n, MlirValue const *mlirCaptures); +mlirLinalgFillBuiltinNamedOpRegion(MlirDialect linalgDialect, MlirOperation op); MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(Linalg, linalg); diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td index 9d1e3baad8ee7..092d22983d3f2 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td @@ -49,7 +49,7 @@ def Linalg_Dialect : Dialect { kInplaceableAttrName = "linalg.inplaceable"; using RegionBuilderFunType = - llvm::function_ref; + llvm::function_ref; RegionBuilderFunType getRegionBuilder(StringRef name) { return namedStructuredOpRegionBuilders.lookup(name); } diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td index 8c0d4763376c3..ad91e23607141 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td @@ -901,7 +901,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { Returns a null function if this named op does not define a region builder. }], - /*retTy=*/"std::function", + /*retTy=*/"std::function", /*methodName=*/"getRegionBuilder", (ins), [{ return ConcreteOp::getRegionBuilder(); }] diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index f83f484187c97..18f5beeddf2ea 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -153,10 +153,8 @@ def CopyOp : LinalgStructured_Op<"copy", [CopyOpInterface]> { Value getSource() { return input();} Value getTarget() { return output(); } - static void regionBuilder( - ImplicitLocOpBuilder &b, Block &block, ValueRange captures); - static std::function< - void(ImplicitLocOpBuilder &b, Block &block, ValueRange captures)> + static void regionBuilder(ImplicitLocOpBuilder &b, Block &block); + static std::function getRegionBuilder() { return ®ionBuilder; } @@ -200,10 +198,8 @@ def FillOp : LinalgStructured_Op<"fill", []> { extractOrIdentityMap(llvm::None, getNumParallelLoops(), context)}); } - static void regionBuilder( - ImplicitLocOpBuilder &b, Block &block, ValueRange captures); - static std::function< - void(ImplicitLocOpBuilder &b, Block &block, ValueRange captures)> + static void regionBuilder(ImplicitLocOpBuilder &b, Block &block); + static std::function getRegionBuilder() { return ®ionBuilder; } @@ -291,8 +287,7 @@ class PoolingBase_Op props> return padding().getValue().getValue({i, 1}); } - static std::function< - void(ImplicitLocOpBuilder &b, Block &block, ValueRange captures)> + static std::function getRegionBuilder() { return nullptr; } @@ -533,8 +528,7 @@ class GenericOpBase : LinalgStructuredBase_Opstr() : "op_has_no_registered_library_name"; } - static std::function< - void(ImplicitLocOpBuilder &b, Block &block, ValueRange captures)> + static std::function getRegionBuilder() { return nullptr; } diff --git a/mlir/lib/Bindings/Python/DialectLinalg.cpp b/mlir/lib/Bindings/Python/DialectLinalg.cpp index dfac96db74b12..a2a54249e6d68 100644 --- a/mlir/lib/Bindings/Python/DialectLinalg.cpp +++ b/mlir/lib/Bindings/Python/DialectLinalg.cpp @@ -21,15 +21,10 @@ using namespace mlir::python; void mlir::python::populateDialectLinalgSubmodule(py::module m) { m.def( "fill_builtin_region", - [](PyDialectDescriptor &dialect, PyOperation &op, py::list captures) { - llvm::SmallVector mlirOperands; - mlirOperands.reserve(captures.size()); - for (auto v : captures) - mlirOperands.push_back(py::cast(v)->get()); - mlirLinalgFillBuiltinNamedOpRegion( - dialect.get(), op.get(), mlirOperands.size(), mlirOperands.data()); + [](PyDialectDescriptor &dialect, PyOperation &op) { + mlirLinalgFillBuiltinNamedOpRegion(dialect.get(), op.get()); }, - py::arg("dialect"), py::arg("op"), py::arg("captures") = py::list(), + py::arg("dialect"), py::arg("op"), "Fill the region for `op`, which is assumed to be a builtin named Linalg " "op."); } diff --git a/mlir/lib/CAPI/Dialect/Linalg.cpp b/mlir/lib/CAPI/Dialect/Linalg.cpp index be0d5448819d9..902599f3b9adf 100644 --- a/mlir/lib/CAPI/Dialect/Linalg.cpp +++ b/mlir/lib/CAPI/Dialect/Linalg.cpp @@ -16,13 +16,8 @@ using namespace mlir::linalg; /// Apply the special region builder for the builtin named Linalg op. /// Assert that `op` is a builtin named Linalg op. void mlirLinalgFillBuiltinNamedOpRegion(MlirDialect linalgDialect, - MlirOperation mlirOp, intptr_t n, - MlirValue const *mlirCaptures) { + MlirOperation mlirOp) { Operation *op = unwrap(mlirOp); - SmallVector captures; - captures.reserve(n); - for (unsigned idx = 0; idx < n; ++idx) - captures.push_back(unwrap(mlirCaptures[idx])); LinalgDialect::RegionBuilderFunType fun = static_cast(unwrap(linalgDialect)) @@ -41,7 +36,7 @@ void mlirLinalgFillBuiltinNamedOpRegion(MlirDialect linalgDialect, Region ®ion = op->getRegion(0); Block *body = b.createBlock(®ion, /*insertPt=*/{}, argTypes); b.setInsertionPointToStart(body); - fun(b, *body, captures); + fun(b, *body); } MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(Linalg, linalg, LinalgDialect) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 109a1c60ddc39..11cb3e15c0e0c 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -43,20 +43,19 @@ using namespace mlir::linalg; /// defined C++ ops. /// This is used by both builders and parsers. /// This function creates the block in the region with arguments corresponding -/// to the elemental types of `inputTypes` and `outputTypes`, which are asserted -/// to be ShapedType. +/// to the elemental types of `inputTypes` and `outputTypes`. The latter are +/// asserted to be of ShapedType. template static void fillStructuredOpRegion( OpBuilder &opBuilder, Region ®ion, TypeRange inputTypes, - TypeRange outputTypes, ValueRange captures = {}, + TypeRange outputTypes, std::function errorHandler = nullptr); /// Generic entry point to create both the region and the block of a LinalgOp. template static void createAndFillStructuredOpRegion(OpBuilder &opBuilder, OperationState &result, - TypeRange inputTypes, TypeRange outputTypes, - ValueRange captures = {}); + TypeRange inputTypes, TypeRange outputTypes); /// Common parsing and printing used for both named structured ops created by /// ods-gen and by manually defined C++ ops. Does not handle regions. @@ -72,17 +71,15 @@ static void printCommonStructuredOpParts(OpAsmPrinter &p, template static ParseResult parseNamedStructuredOpRegion(OpAsmParser &parser, Region ®ion, - TypeRange inputTypes, TypeRange outputTypes, - ArrayRef captures = {}); + TypeRange inputTypes, TypeRange outputTypes); static ParseResult parseNamedStructuredOpResults(OpAsmParser &parser, SmallVectorImpl &resultTypes); template -static ParseResult -parseNamedStructuredOp(OpAsmParser &parser, OperationState &result, - ArrayRef captures = {}); +static ParseResult parseNamedStructuredOp(OpAsmParser &parser, + OperationState &result); static void printNamedStructuredOpResults(OpAsmPrinter &p, TypeRange resultTypes); @@ -323,8 +320,7 @@ class RegionBuilderHelper { //===----------------------------------------------------------------------===// // CopyOp //===----------------------------------------------------------------------===// -void CopyOp::regionBuilder(ImplicitLocOpBuilder &b, Block &block, - ValueRange captures) { +void CopyOp::regionBuilder(ImplicitLocOpBuilder &b, Block &block) { assert(block.getNumArguments() == 2 && "CopyOp regionBuilder expects 2 args"); b.create(block.getArgument(0)); } @@ -403,8 +399,7 @@ void CopyOp::getEffects( //===----------------------------------------------------------------------===// // FillOp //===----------------------------------------------------------------------===// -void FillOp::regionBuilder(ImplicitLocOpBuilder &b, Block &block, - ValueRange captures) { +void FillOp::regionBuilder(ImplicitLocOpBuilder &b, Block &block) { assert(block.getNumArguments() == 2 && "FillOp regionBuilder expects 2 args"); b.create(block.getArgument(0)); } @@ -2799,7 +2794,6 @@ template static void fillStructuredOpRegion(OpBuilder &opBuilder, Region ®ion, TypeRange inputTypes, TypeRange outputTypes, - ValueRange captures, std::function errorHandler) { assert(llvm::all_of(outputTypes, [](Type t) { return t.isa(); })); @@ -2823,7 +2817,7 @@ fillStructuredOpRegion(OpBuilder &opBuilder, Region ®ion, opBuilder.setInsertionPointToStart(body); ImplicitLocOpBuilder b(opBuilder.getUnknownLoc(), opBuilder); - NamedStructuredOpType::regionBuilder(b, *body, captures); + NamedStructuredOpType::regionBuilder(b, *body); // indexing_maps is an auto-generated method. @@ -2835,11 +2829,10 @@ template void createAndFillStructuredOpRegion(OpBuilder &opBuilder, OperationState &result, TypeRange inputTypes, - TypeRange outputTypes, - ValueRange captures) { + TypeRange outputTypes) { Region ®ion = *result.addRegion(); fillStructuredOpRegion( - opBuilder, region, inputTypes, outputTypes, captures, + opBuilder, region, inputTypes, outputTypes, [&](unsigned expected, unsigned actual) { assert(expected != actual && "incorrect number of arguments"); }); @@ -2902,15 +2895,14 @@ static void printCommonStructuredOpParts(OpAsmPrinter &p, template static ParseResult parseNamedStructuredOpRegion(OpAsmParser &parser, Region ®ion, - TypeRange inputTypes, TypeRange outputTypes, - ArrayRef captures) { + TypeRange inputTypes, TypeRange outputTypes) { ParseResult res = success(); OpBuilder opBuilder(parser.getBuilder().getContext()); // Resolve `captures` into `capturedValues` at parse time so we can build the // region with captures. SmallVector capturedValues; fillStructuredOpRegion( - opBuilder, region, inputTypes, outputTypes, capturedValues, + opBuilder, region, inputTypes, outputTypes, [&](unsigned expected, unsigned actual) { res = parser.emitError( parser.getCurrentLocation(), @@ -2931,11 +2923,9 @@ parseNamedStructuredOpResults(OpAsmParser &parser, } template -static ParseResult -parseNamedStructuredOp(OpAsmParser &parser, OperationState &result, - ArrayRef captures) { +static ParseResult parseNamedStructuredOp(OpAsmParser &parser, + OperationState &result) { // TODO: Enable when ods-gen supports captures. - assert(captures.empty() && "unexpected captures for named structured ops"); SmallVector inputTypes, outputTypes; if (parseCommonStructuredOpParts(parser, result, inputTypes, outputTypes)) return failure(); @@ -2949,7 +2939,7 @@ parseNamedStructuredOp(OpAsmParser &parser, OperationState &result, std::unique_ptr region = std::make_unique(); if (parseNamedStructuredOpRegion( - parser, *region, inputTypes, outputTypes, captures)) + parser, *region, inputTypes, outputTypes)) return failure(); result.addRegion(std::move(region)); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp index d5e619719fd7f..d0d14f86c54dd 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp @@ -63,8 +63,7 @@ static GenericOp createGenericOpFromNamedOp(LinalgOp namedOp, iterators, [®ionBuilder](OpBuilder &bodyBuilder, Location loc, ValueRange) { ImplicitLocOpBuilder b(loc, bodyBuilder); - regionBuilder(b, *bodyBuilder.getBlock(), - /*captures=*/{}); + regionBuilder(b, *bodyBuilder.getBlock()); }); } diff --git a/mlir/python/mlir/dialects/_linalg_ops_ext.py b/mlir/python/mlir/dialects/_linalg_ops_ext.py index c7ddfb962375d..bce4e08ae3a06 100644 --- a/mlir/python/mlir/dialects/_linalg_ops_ext.py +++ b/mlir/python/mlir/dialects/_linalg_ops_ext.py @@ -33,7 +33,7 @@ def __init__(self, output: Value, value: Value, *, loc=None, ip=None): ip=ip) OpView.__init__(self, op) linalgDialect = Context.current.get_dialect_descriptor("linalg") - fill_builtin_region(linalgDialect, self.operation, []) + fill_builtin_region(linalgDialect, self.operation) # TODO: self.result is None. When len(results) == 1 we expect it to be # results[0] as per _linalg_ops_gen.py. This seems like an orthogonal bug # in the generator of _linalg_ops_gen.py where we have: diff --git a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc index 84adc8b260c49..471961f837bf3 100644 --- a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc +++ b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc @@ -24,7 +24,7 @@ // IMPL-NEXT: return {{.+}}.getAffineMapArrayAttr({ map0, map1, map2 }); // // IMPL: void Test1Op::regionBuilder(ImplicitLocOpBuilder &b, -// IMPL: Block &block, ValueRange captures) { +// IMPL: Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); // IMPL: Value [[d:.*]] = b.create([[a]], [[b]]); // IMPL: Value [[e:.*]] = b.create([[c]], [[d]]); @@ -49,7 +49,7 @@ def test1(A: f32(M, K), B: f32(K)) -> (C: f32(M)) { // IMPL: AffineMap::get(3, 3, {d0, d1}, context) // // IMPL: Test2Op::regionBuilder(ImplicitLocOpBuilder &b, -// IMPL: Block &block, ValueRange captures) { +// IMPL: Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); // IMPL: Value [[d:.*]] = b.create([[a]], [[b]]); // IMPL: Value [[e:.*]] = b.create([[c]], [[d]]); @@ -74,7 +74,7 @@ def test2(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) { // IMPL: AffineMap::get(4, 4, {d0, d1, d2}, context) // // IMPL: Test3Op::regionBuilder(ImplicitLocOpBuilder &b, -// IMPL: Block &block, ValueRange captures) { +// IMPL: Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); // IMPL: Value [[d:.*]] = b.create([[a]], [[b]]); // IMPL: Value [[e:.*]] = b.create([[c]], [[d]]); @@ -182,7 +182,7 @@ def test7(A: f32(M, K), B: f32(K)) -> (C: f32(M)) // Test output arg order. // IMPL-LABEL: void Test8Op::regionBuilder(ImplicitLocOpBuilder &b, -// IMPL: Block &block, ValueRange captures) { +// IMPL: Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); // IMPL: Value [[d:.*]] = b.create([[a]], [[b]]); // IMPL: Value [[e:.*]] = b.create([[d]], [[c]]); @@ -199,7 +199,7 @@ def test8(A: f32(M, K), B: f32(K)) -> (C: f32(M)) // IMPL: auto map1 = AffineMap::get(2, 2, {d1}, context); // IMPL: auto map2 = AffineMap::get(2, 2, {d0}, context); // IMPL-LABEL: void Test9Op::regionBuilder(ImplicitLocOpBuilder &b, -// IMPL: Block &block, ValueRange captures) { +// IMPL: Block &block) { // IMPL: Value [[a:.*]](args[0]), [[c:.*]](args[2]); ods_def: def test9(A: f32(M, K), B: f32(K)) -> (C: f32(M)) diff --git a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-yaml-gen.yaml b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-yaml-gen.yaml index 471890e5f4a45..3c8b5271cf5c3 100644 --- a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-yaml-gen.yaml +++ b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-yaml-gen.yaml @@ -76,7 +76,7 @@ structured_op: !LinalgStructuredOpConfig # ODS-NEXT: TypeRange(outputs) # IMPL-LABEL: void Test1Op::regionBuilder( -# IMPL: ImplicitLocOpBuilder &b, Block &block, ValueRange captures) +# IMPL: ImplicitLocOpBuilder &b, Block &block) # IMPL: Value [[VAL0:[a-z0-9]+]] = helper.constant("42 : i64"); # IMPL-DAG: Value [[VAL1:[a-z0-9]+]] = helper.cast(block.getArgument(0).getType(), [[VAL0]]); # IMPL-DAG: Value [[VAL2:[a-z0-9]+]] = helper.index(1); @@ -163,8 +163,7 @@ structured_op: !LinalgStructuredOpConfig # IMPL: auto attr = op->getAttrOfType("strides") # IMPL: "missing indexing map required attribute 'strides'" -# IMPL: void Test2Op::regionBuilder( -# IMPL-NEXT: ImplicitLocOpBuilder &b, Block &block, ValueRange captures) +# IMPL: void Test2Op::regionBuilder(ImplicitLocOpBuilder &b, Block &block) # IMPL-NEXT: assert(2 > 0 && block.getNumArguments() == 2 && # IMPL: yields.push_back(block.getArgument(0)); diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp index faa2835d589e7..1bdb5b8806d0d 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -1923,7 +1923,7 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, $_builder, $_state, TypeRange(inputs), - TypeRange(outputs)/*, TODO: support captures*/); + TypeRange(outputs)); }]>, OpBuilder< (ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs, @@ -1941,7 +1941,7 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, $_builder, $_state, TypeRange(inputs), - TypeRange(outputs)/*, TODO: support captures*/); + TypeRange(outputs)); }]>, OpBuilder< (ins "TypeRange":$resultTensorTypes, "ValueRange":$operands, @@ -1956,7 +1956,7 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, ]; let printer = [{{ return ::printNamedStructuredOp(p, *this); }]; let parser = [{{ - return ::parseNamedStructuredOp<{0}>(parser, result/*TODO:, captures*/); + return ::parseNamedStructuredOp<{0}>(parser, result); }]; let hasFolder = 1; @@ -1964,10 +1964,9 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, // Auto-generated. ArrayAttr iterator_types(); ArrayAttr indexing_maps(); - static void regionBuilder(ImplicitLocOpBuilder &b, - Block &block, ValueRange captures); - static std::function getRegionBuilder() {{ + static void regionBuilder(ImplicitLocOpBuilder &b, Block &block); + static std::function + getRegionBuilder() {{ return regionBuilder; } @@ -2035,7 +2034,7 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName, $_builder, $_state, TypeRange(inputs), - TypeRange(outputs)/*, TODO: support captures*/); + TypeRange(outputs)); {2} }]> )FMT"; @@ -2354,8 +2353,7 @@ void TCParser::printRegionBuilder(llvm::raw_ostream &os, StringRef cppOpName, }; const char *regionBuilderFmt = R"FMT( - void {0}::regionBuilder(ImplicitLocOpBuilder &b, - Block &block, ValueRange captures) { + void {0}::regionBuilder(ImplicitLocOpBuilder &b, Block &block) { auto args = block.getArguments(); Value {1}; {2} diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp index 00c4096d095cf..83447f4930170 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp @@ -511,10 +511,8 @@ def {0} : LinalgStructuredBase_Op<"{1}", !listconcat([ // Auto-generated. ArrayAttr iterator_types(); ArrayAttr indexing_maps(); - static void regionBuilder( - ImplicitLocOpBuilder &b, Block &block, ValueRange captures); - static std::function< - void(ImplicitLocOpBuilder &b, Block &, ValueRange)> + static void regionBuilder(ImplicitLocOpBuilder &b, Block &block); + static std::function getRegionBuilder() {{ return regionBuilder; } @@ -883,8 +881,7 @@ LogicalResult {0}::verifyIndexingMapRequiredAttributes() {{ // {1}: Number of args // {2}: Statements static const char structuredOpRegionBuilderFormat[] = R"FMT( -void {0}::regionBuilder( - ImplicitLocOpBuilder &b, Block &block, ValueRange captures) {{ +void {0}::regionBuilder(ImplicitLocOpBuilder &b, Block &block) {{ assert({1} > 0 && block.getNumArguments() == {1} && "{0} regionBuilder expects {1} (>=0) args"); RegionBuilderHelper helper(block.getArgument(0).getContext(), block); From a49855316251aaa3cfe62b797b5650ae55b09378 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 28 Jun 2021 08:32:36 +0000 Subject: [PATCH 011/619] [clang][ARM] Mark sanitize-coverage-old-pm.c unsupported on armv7l Our v7 Linux bots report the arch as "armv7l", not "armv7". --- clang/test/CodeGen/sanitize-coverage-old-pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/sanitize-coverage-old-pm.c b/clang/test/CodeGen/sanitize-coverage-old-pm.c index ff37eda464a85..9b4f8991864d6 100644 --- a/clang/test/CodeGen/sanitize-coverage-old-pm.c +++ b/clang/test/CodeGen/sanitize-coverage-old-pm.c @@ -6,7 +6,7 @@ // RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=undefined -fsanitize-coverage=trace-pc,trace-cmp -o - -flegacy-pass-manager | FileCheck %s --check-prefixes=CHECK,UBSAN // // Host armv7 is currently unsupported: https://bugs.llvm.org/show_bug.cgi?id=46117 -// UNSUPPORTED: armv7, thumbv7, armv8l +// UNSUPPORTED: armv7, armv7l, thumbv7, armv8l // The same issue also occurs on a riscv32 host. // XFAIL: riscv32 From 499e39c5983dba35861b5482bd298a8da726f1b6 Mon Sep 17 00:00:00 2001 From: Whisperity Date: Tue, 29 Oct 2019 13:52:15 +0100 Subject: [PATCH 012/619] [clang-tidy] Add 'bugprone-easily-swappable-parameters' check Finds function definitions where parameters of convertible types follow each other directly, making call sites prone to calling the function with swapped (or badly ordered) arguments. Such constructs are usually the result of inefficient design and lack of exploitation of strong type capabilities that are possible in the language. This check finds and flags **function definitions** and **not** call sites! Reviewed By: aaron.ballman, alexfh Differential Revision: http://reviews.llvm.org/D69560 --- .../bugprone/BugproneTidyModule.cpp | 3 + .../clang-tidy/bugprone/CMakeLists.txt | 1 + .../EasilySwappableParametersCheck.cpp | 495 ++++++++++++++++++ .../bugprone/EasilySwappableParametersCheck.h | 47 ++ clang-tools-extra/docs/ReleaseNotes.rst | 7 + .../bugprone-easily-swappable-parameters.rst | 113 ++++ .../docs/clang-tidy/checks/list.rst | 1 + ...one-easily-swappable-parameters-ignore.cpp | 33 ++ ...prone-easily-swappable-parameters-len2.cpp | 188 +++++++ ...prone-easily-swappable-parameters-len3.cpp | 24 + .../bugprone-easily-swappable-parameters.c | 148 ++++++ 11 files changed, 1060 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp index 595a30e8d8ce3..35b5f2c37df68 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp @@ -18,6 +18,7 @@ #include "CopyConstructorInitCheck.h" #include "DanglingHandleCheck.h" #include "DynamicStaticInitializersCheck.h" +#include "EasilySwappableParametersCheck.h" #include "ExceptionEscapeCheck.h" #include "FoldInitTypeCheck.h" #include "ForwardDeclarationNamespaceCheck.h" @@ -91,6 +92,8 @@ class BugproneModule : public ClangTidyModule { "bugprone-dangling-handle"); CheckFactories.registerCheck( "bugprone-dynamic-static-initializers"); + CheckFactories.registerCheck( + "bugprone-easily-swappable-parameters"); CheckFactories.registerCheck( "bugprone-exception-escape"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt index 022e5c5842ee2..78a70c703dc09 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt @@ -13,6 +13,7 @@ add_clang_library(clangTidyBugproneModule CopyConstructorInitCheck.cpp DanglingHandleCheck.cpp DynamicStaticInitializersCheck.cpp + EasilySwappableParametersCheck.cpp ExceptionEscapeCheck.cpp FoldInitTypeCheck.cpp ForwardDeclarationNamespaceCheck.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp new file mode 100644 index 0000000000000..07c8ef486f654 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -0,0 +1,495 @@ +//===--- EasilySwappableParametersCheck.cpp - clang-tidy ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "EasilySwappableParametersCheck.h" +#include "../utils/OptionsUtils.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Lex/Lexer.h" + +#define DEBUG_TYPE "EasilySwappableParametersCheck" +#include "llvm/Support/Debug.h" + +namespace optutils = clang::tidy::utils::options; + +/// The default value for the MinimumLength check option. +static constexpr std::size_t DefaultMinimumLength = 2; + +/// The default value for ignored parameter names. +static const std::string DefaultIgnoredParameterNames = + optutils::serializeStringList({"\"\"", "iterator", "Iterator", "begin", + "Begin", "end", "End", "first", "First", + "last", "Last", "lhs", "LHS", "rhs", "RHS"}); + +/// The default value for ignored parameter type suffixes. +static const std::string DefaultIgnoredParameterTypeSuffixes = + optutils::serializeStringList({"bool", + "Bool", + "_Bool", + "it", + "It", + "iterator", + "Iterator", + "inputit", + "InputIt", + "forwardit", + "FowardIt", + "bidirit", + "BidirIt", + "constiterator", + "const_iterator", + "Const_Iterator", + "Constiterator", + "ConstIterator", + "RandomIt", + "randomit", + "random_iterator", + "ReverseIt", + "reverse_iterator", + "reverse_const_iterator", + "ConstReverseIterator", + "Const_Reverse_Iterator", + "const_reverse_iterator" + "Constreverseiterator", + "constreverseiterator"}); + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace bugprone { + +using TheCheck = EasilySwappableParametersCheck; + +namespace filter { +static bool isIgnoredParameter(const TheCheck &Check, const ParmVarDecl *Node); +} // namespace filter + +namespace model { + +/// The language features involved in allowing the mix between two parameters. +enum class MixFlags : unsigned char { + Invalid = 0, //< Sentinel bit pattern. DO NOT USE! + + None = 1, //< Mix between the two parameters is not possible. + Trivial = 2, //< The two mix trivially, and are the exact same type. + Canonical = 4, //< The two mix because the types refer to the same + // CanonicalType, but we do not elaborate as to how. + + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue =*/Canonical) +}; +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + +/// Returns whether the SearchedFlag is turned on in the Data. +static inline bool hasFlag(MixFlags Data, MixFlags SearchedFlag) { + assert(SearchedFlag != MixFlags::Invalid && + "can't be used to detect lack of all bits!"); + + // "Data & SearchedFlag" would need static_cast() in conditions. + return (Data & SearchedFlag) == SearchedFlag; +} + +#ifndef NDEBUG + +// The modelling logic of this check is more complex than usual, and +// potentially hard to understand without the ability to see into the +// representation during the recursive descent. This debug code is only +// compiled in 'Debug' mode, or if LLVM_ENABLE_ASSERTIONS config is turned on. + +/// Formats the MixFlags enum into a useful, user-readable representation. +static inline std::string formatMixFlags(MixFlags F) { + if (F == MixFlags::Invalid) + return "#Inv!"; + + SmallString<4> Str{"---"}; + + if (hasFlag(F, MixFlags::None)) + // Shows the None bit explicitly, as it can be applied in the recursion + // even if other bits are set. + Str[0] = '!'; + if (hasFlag(F, MixFlags::Trivial)) + Str[1] = 'T'; + if (hasFlag(F, MixFlags::Canonical)) + Str[2] = 'C'; + + return Str.str().str(); +} + +#else + +static inline std::string formatMixFlags(MixFlags F); + +#endif // NDEBUG + +/// Contains the metadata for the mixability result between two types, +/// independently of which parameters they were calculated from. +struct MixData { + MixFlags Flags; + + MixData(MixFlags Flags) : Flags(Flags) {} + + void sanitize() { + assert(Flags != MixFlags::Invalid && "sanitize() called on invalid bitvec"); + // TODO: There will be statements here in further extensions of the check. + } +}; + +/// A named tuple that contains the information for a mix between two concrete +/// parameters. +struct Mix { + const ParmVarDecl *First, *Second; + MixData Data; + + Mix(const ParmVarDecl *F, const ParmVarDecl *S, MixData Data) + : First(F), Second(S), Data(std::move(Data)) {} + + void sanitize() { Data.sanitize(); } + MixFlags flags() const { return Data.Flags; } +}; + +// NOLINTNEXTLINE(misc-redundant-expression): Seems to be a bogus warning. +static_assert(std::is_trivially_copyable::value && + std::is_trivially_move_constructible::value && + std::is_trivially_move_assignable::value, + "Keep frequently used data simple!"); + +struct MixableParameterRange { + /// A container for Mixes. + using MixVector = SmallVector; + + /// The number of parameters iterated to build the instance. + std::size_t NumParamsChecked = 0; + + /// The individual flags and supporting information for the mixes. + MixVector Mixes; + + /// Gets the leftmost parameter of the range. + const ParmVarDecl *getFirstParam() const { + // The first element is the LHS of the very first mix in the range. + assert(!Mixes.empty()); + return Mixes.front().First; + } + + /// Gets the rightmost parameter of the range. + const ParmVarDecl *getLastParam() const { + // The builder function breaks building an instance of this type if it + // finds something that can not be mixed with the rest, by going *forward* + // in the list of parameters. So at any moment of break, the RHS of the last + // element of the mix vector is also the last element of the mixing range. + assert(!Mixes.empty()); + return Mixes.back().Second; + } +}; + +/// Approximate the way how LType and RType might refer to "essentially the +/// same" type, in a sense that at a particular call site, an expression of +/// type LType and RType might be successfully passed to a variable (in our +/// specific case, a parameter) of type RType and LType, respectively. +/// Note the swapped order! +/// +/// The returned data structure is not guaranteed to be properly set, as this +/// function is potentially recursive. It is the caller's responsibility to +/// call sanitize() on the result once the recursion is over. +static MixData calculateMixability(const TheCheck &Check, const QualType LType, + const QualType RType, + const ASTContext &Ctx) { + LLVM_DEBUG(llvm::dbgs() << ">>> calculateMixability for LType:\n"; + LType.dump(llvm::dbgs(), Ctx); llvm::dbgs() << "\nand RType:\n"; + RType.dump(llvm::dbgs(), Ctx); llvm::dbgs() << '\n';); + + if (LType == RType) { + LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. Trivial equality.\n"); + return {MixFlags::Trivial}; + } + + // TODO: Implement more elaborate logic, such as typedef, implicit + // conversions, etc. + + // If none of the previous logic found a match, try if Clang otherwise + // believes the types to be the same. + if (LType.getCanonicalType() == RType.getCanonicalType()) { + LLVM_DEBUG(llvm::dbgs() + << "<<< calculateMixability. Same CanonicalType.\n"); + return {MixFlags::Canonical}; + } + + LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. No match found.\n"); + return {MixFlags::None}; +} + +static MixableParameterRange modelMixingRange(const TheCheck &Check, + const FunctionDecl *FD, + std::size_t StartIndex) { + std::size_t NumParams = FD->getNumParams(); + assert(StartIndex < NumParams && "out of bounds for start"); + const ASTContext &Ctx = FD->getASTContext(); + + MixableParameterRange Ret; + // A parameter at index 'StartIndex' had been trivially "checked". + Ret.NumParamsChecked = 1; + + for (std::size_t I = StartIndex + 1; I < NumParams; ++I) { + const ParmVarDecl *Ith = FD->getParamDecl(I); + LLVM_DEBUG(llvm::dbgs() << "Check param #" << I << "...\n"); + + if (filter::isIgnoredParameter(Check, Ith)) { + LLVM_DEBUG(llvm::dbgs() << "Param #" << I << " is ignored. Break!\n"); + break; + } + + // Now try to go forward and build the range of [Start, ..., I, I + 1, ...] + // parameters that can be messed up at a call site. + MixableParameterRange::MixVector MixesOfIth; + for (std::size_t J = StartIndex; J < I; ++J) { + const ParmVarDecl *Jth = FD->getParamDecl(J); + LLVM_DEBUG(llvm::dbgs() + << "Check mix of #" << J << " against #" << I << "...\n"); + + Mix M{Jth, Ith, + calculateMixability(Check, Jth->getType(), Ith->getType(), Ctx)}; + LLVM_DEBUG(llvm::dbgs() << "Mix flags (raw) : " + << formatMixFlags(M.flags()) << '\n'); + M.sanitize(); + LLVM_DEBUG(llvm::dbgs() << "Mix flags (after sanitize): " + << formatMixFlags(M.flags()) << '\n'); + + assert(M.flags() != MixFlags::Invalid && "All flags decayed!"); + + if (M.flags() != MixFlags::None) + MixesOfIth.emplace_back(std::move(M)); + } + + if (MixesOfIth.empty()) { + // If there weren't any new mixes stored for Ith, the range is + // [Start, ..., I]. + LLVM_DEBUG(llvm::dbgs() + << "Param #" << I + << " does not mix with any in the current range. Break!\n"); + break; + } + + Ret.Mixes.insert(Ret.Mixes.end(), MixesOfIth.begin(), MixesOfIth.end()); + ++Ret.NumParamsChecked; // Otherwise a new param was iterated. + } + + return Ret; +} + +} // namespace model + +namespace filter { + +/// Returns whether the parameter's name or the parameter's type's name is +/// configured by the user to be ignored from analysis and diagnostic. +static bool isIgnoredParameter(const TheCheck &Check, const ParmVarDecl *Node) { + LLVM_DEBUG(llvm::dbgs() << "Checking if '" << Node->getName() + << "' is ignored.\n"); + + if (!Node->getIdentifier()) + return llvm::find(Check.IgnoredParameterNames, "\"\"") != + Check.IgnoredParameterNames.end(); + + StringRef NodeName = Node->getName(); + if (llvm::find(Check.IgnoredParameterNames, NodeName) != + Check.IgnoredParameterNames.end()) { + LLVM_DEBUG(llvm::dbgs() << "\tName ignored.\n"); + return true; + } + + StringRef NodeTypeName = [Node] { + const ASTContext &Ctx = Node->getASTContext(); + const SourceManager &SM = Ctx.getSourceManager(); + SourceLocation B = Node->getTypeSpecStartLoc(); + SourceLocation E = Node->getTypeSpecEndLoc(); + LangOptions LO; + + LLVM_DEBUG(llvm::dbgs() << "\tType name code is '" + << Lexer::getSourceText( + CharSourceRange::getTokenRange(B, E), SM, LO) + << "'...\n"); + if (B.isMacroID()) { + LLVM_DEBUG(llvm::dbgs() << "\t\tBeginning is macro.\n"); + B = SM.getTopMacroCallerLoc(B); + } + if (E.isMacroID()) { + LLVM_DEBUG(llvm::dbgs() << "\t\tEnding is macro.\n"); + E = Lexer::getLocForEndOfToken(SM.getTopMacroCallerLoc(E), 0, SM, LO); + } + LLVM_DEBUG(llvm::dbgs() << "\tType name code is '" + << Lexer::getSourceText( + CharSourceRange::getTokenRange(B, E), SM, LO) + << "'...\n"); + + return Lexer::getSourceText(CharSourceRange::getTokenRange(B, E), SM, LO); + }(); + + LLVM_DEBUG(llvm::dbgs() << "\tType name is '" << NodeTypeName << "'\n"); + if (!NodeTypeName.empty()) { + if (llvm::any_of(Check.IgnoredParameterTypeSuffixes, + [NodeTypeName](const std::string &E) { + return !E.empty() && NodeTypeName.endswith(E); + })) { + LLVM_DEBUG(llvm::dbgs() << "\tType suffix ignored.\n"); + return true; + } + } + + return false; +} + +} // namespace filter + +/// Matches functions that have at least the specified amount of parameters. +AST_MATCHER_P(FunctionDecl, parameterCountGE, unsigned, N) { + return Node.getNumParams() >= N; +} + +/// Matches *any* overloaded unary and binary operators. +AST_MATCHER(FunctionDecl, isOverloadedUnaryOrBinaryOperator) { + switch (Node.getOverloadedOperator()) { + case OO_None: + case OO_New: + case OO_Delete: + case OO_Array_New: + case OO_Array_Delete: + case OO_Conditional: + case OO_Coawait: + return false; + + default: + return Node.getNumParams() <= 2; + } +} + +/// Returns the DefaultMinimumLength if the Value of requested minimum length +/// is less than 2. Minimum lengths of 0 or 1 are not accepted. +static inline unsigned clampMinimumLength(const unsigned Value) { + return Value < 2 ? DefaultMinimumLength : Value; +} + +// FIXME: Maybe unneeded, getNameForDiagnostic() is expected to change to return +// a crafted location when the node itself is unnamed. (See D84658, D85033.) +/// Returns the diagnostic-friendly name of the node, or empty string. +static SmallString<64> getName(const NamedDecl *ND) { + SmallString<64> Name; + llvm::raw_svector_ostream OS{Name}; + ND->getNameForDiagnostic(OS, ND->getASTContext().getPrintingPolicy(), false); + return Name; +} + +/// Returns the diagnostic-friendly name of the node, or a constant value. +static SmallString<64> getNameOrUnnamed(const NamedDecl *ND) { + auto Name = getName(ND); + if (Name.empty()) + Name = ""; + return Name; +} + +EasilySwappableParametersCheck::EasilySwappableParametersCheck( + StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + MinimumLength(clampMinimumLength( + Options.get("MinimumLength", DefaultMinimumLength))), + IgnoredParameterNames(optutils::parseStringList( + Options.get("IgnoredParameterNames", DefaultIgnoredParameterNames))), + IgnoredParameterTypeSuffixes(optutils::parseStringList( + Options.get("IgnoredParameterTypeSuffixes", + DefaultIgnoredParameterTypeSuffixes))) {} + +void EasilySwappableParametersCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "MinimumLength", MinimumLength); + Options.store(Opts, "IgnoredParameterNames", + optutils::serializeStringList(IgnoredParameterNames)); + Options.store(Opts, "IgnoredParameterTypeSuffixes", + optutils::serializeStringList(IgnoredParameterTypeSuffixes)); +} + +void EasilySwappableParametersCheck::registerMatchers(MatchFinder *Finder) { + const auto BaseConstraints = functionDecl( + // Only report for definition nodes, as fixing the issues reported + // requires the user to be able to change code. + isDefinition(), parameterCountGE(MinimumLength), + unless(isOverloadedUnaryOrBinaryOperator())); + + Finder->addMatcher( + functionDecl(BaseConstraints, + unless(ast_matchers::isTemplateInstantiation())) + .bind("func"), + this); + Finder->addMatcher( + functionDecl(BaseConstraints, isExplicitTemplateSpecialization()) + .bind("func"), + this); +} + +void EasilySwappableParametersCheck::check( + const MatchFinder::MatchResult &Result) { + const auto *FD = Result.Nodes.getNodeAs("func"); + assert(FD); + + const PrintingPolicy &PP = FD->getASTContext().getPrintingPolicy(); + std::size_t NumParams = FD->getNumParams(); + std::size_t MixableRangeStartIndex = 0; + + LLVM_DEBUG(llvm::dbgs() << "Begin analysis of " << getName(FD) << " with " + << NumParams << " parameters...\n"); + while (MixableRangeStartIndex < NumParams) { + if (filter::isIgnoredParameter(*this, + FD->getParamDecl(MixableRangeStartIndex))) { + LLVM_DEBUG(llvm::dbgs() + << "Parameter #" << MixableRangeStartIndex << " ignored.\n"); + ++MixableRangeStartIndex; + continue; + } + + model::MixableParameterRange R = + model::modelMixingRange(*this, FD, MixableRangeStartIndex); + assert(R.NumParamsChecked > 0 && "Ensure forward progress!"); + MixableRangeStartIndex += R.NumParamsChecked; + if (R.NumParamsChecked < MinimumLength) { + LLVM_DEBUG(llvm::dbgs() << "Ignoring range of " << R.NumParamsChecked + << " lower than limit.\n"); + continue; + } + + const ParmVarDecl *First = R.getFirstParam(), *Last = R.getLastParam(); + std::string FirstParamTypeAsWritten = First->getType().getAsString(PP); + { + StringRef DiagText = "%0 adjacent parameters of %1 of similar type " + "('%2') are easily swapped by mistake"; + // TODO: This logic will get extended here with future flags. + + auto Diag = diag(First->getOuterLocStart(), DiagText) + << static_cast(R.NumParamsChecked) << FD + << FirstParamTypeAsWritten; + + CharSourceRange HighlightRange = CharSourceRange::getTokenRange( + First->getBeginLoc(), Last->getEndLoc()); + Diag << HighlightRange; + } + + // There is a chance that the previous highlight did not succeed, e.g. when + // the two parameters are on different lines. For clarity, show the user + // the involved variable explicitly. + diag(First->getLocation(), "the first parameter in the range is '%0'", + DiagnosticIDs::Note) + << getNameOrUnnamed(First) + << CharSourceRange::getTokenRange(First->getLocation(), + First->getLocation()); + diag(Last->getLocation(), "the last parameter in the range is '%0'", + DiagnosticIDs::Note) + << getNameOrUnnamed(Last) + << CharSourceRange::getTokenRange(Last->getLocation(), + Last->getLocation()); + } +} + +} // namespace bugprone +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h new file mode 100644 index 0000000000000..6d236a205cef5 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h @@ -0,0 +1,47 @@ +//===--- EasilySwappableParametersCheck.h - clang-tidy ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EASILYSWAPPABLEPARAMETERSCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EASILYSWAPPABLEPARAMETERSCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace bugprone { + +/// Finds function definitions where parameters of convertible types follow +/// each other directly, making call sites prone to calling the function with +/// swapped (or badly ordered) arguments. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-easily-swappable-parameters.html +class EasilySwappableParametersCheck : public ClangTidyCheck { +public: + EasilySwappableParametersCheck(StringRef Name, ClangTidyContext *Context); + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + + /// The minimum length of an adjacent swappable parameter range required for + /// a diagnostic. + const std::size_t MinimumLength; + + /// The parameter names (as written in the source text) to be ignored. + const std::vector IgnoredParameterNames; + + /// The parameter typename suffixes (as written in the source code) to be + /// ignored. + const std::vector IgnoredParameterTypeSuffixes; +}; + +} // namespace bugprone +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_EASILYSWAPPABLEPARAMETERSCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 28cb3b2c55529..e9b186a81b391 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -108,6 +108,13 @@ New checks Finds inner loops that have not been unrolled, as well as fully unrolled loops with unknown loops bounds or a large number of iterations. +- New :doc:`bugprone-easily-swappable-parameters + ` check. + + Finds function definitions where parameters of convertible types follow each + other directly, making call sites prone to calling the function with + swapped (or badly ordered) arguments. + - New :doc:`cppcoreguidelines-prefer-member-initializer ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst new file mode 100644 index 0000000000000..bc4e3220428d3 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst @@ -0,0 +1,113 @@ +.. title:: clang-tidy - bugprone-easily-swappable-parameters + +bugprone-easily-swappable-parameters +==================================== + +Finds function definitions where parameters of convertible types follow each +other directly, making call sites prone to calling the function with +swapped (or badly ordered) arguments. + +.. code-block:: c++ + + void drawPoint(int X, int Y) { /* ... */ } + FILE *open(const char *Dir, const char *Name, Flags Mode) { /* ... */ } + +A potential call like ``drawPoint(-2, 5)`` or ``openPath("a.txt", "tmp", Read)`` +is perfectly legal from the language's perspective, but might not be what the +developer of the function intended. + +More elaborate and type-safe constructs, such as opaque typedefs or strong +types should be used instead, to prevent a mistaken order of arguments. + +.. code-block:: c++ + + struct Coord2D { int X; int Y; }; + void drawPoint(const Coord2D Pos) { /* ... */ } + + FILE *open(const Path &Dir, const Filename &Name, Flags Mode) { /* ... */ } + +Due to the potentially elaborate refactoring and API-breaking that is necessary +to strengthen the type safety of a project, no automatic fix-its are offered. + +Options +------- + +Filtering options +^^^^^^^^^^^^^^^^^ + +Filtering options can be used to lessen the size of the diagnostics emitted by +the checker, whether the aim is to ignore certain constructs or dampen the +noisiness. + +.. option:: MinimumLength + + The minimum length required from an adjacent parameter sequence to be + diagnosed. + Defaults to `2`. + Might be any positive integer greater or equal to `2`. + If `0` or `1` is given, the default value `2` will be used instead. + + For example, if `3` is specified, the examples above will not be matched. + +.. option:: IgnoredParameterNames + + The list of parameter **names** that should never be considered part of a + swappable adjacent parameter sequence. + The value is a `;`-separated list of names. + To ignore unnamed parameters, add `""` to the list verbatim (not the + empty string, but the two quotes, potentially escaped!). + **This options is case-sensitive!** + + By default, the following parameter names, and their Uppercase-initial + variants are ignored: + `""` (unnamed parameters), `iterator`, `begin`, `end`, `first`, `last`, + `lhs`, `rhs`. + +.. option:: IgnoredParameterTypeSuffixes + + The list of parameter **type name suffixes** that should never be + considered part of a swappable adjacent parameter sequence. + Parameters which type, as written in the source code, end with an element + of this option will be ignored. + The value is a `;`-separated list of names. + **This option is case-sensitive!** + + By default, the following, and their lowercase-initial variants are ignored: + `bool`, `It`, `Iterator`, `InputIt`, `ForwardIt`, `BidirIt`, `RandomIt`, + `random_iterator`, `ReverseIt`, `reverse_iterator`, + `reverse_const_iterator`, `RandomIt`, `random_iterator`, `ReverseIt`, + `reverse_iterator`, `reverse_const_iterator`, `Const_Iterator`, + `ConstIterator`, `const_reverse_iterator`, `ConstReverseIterator`. + In addition, `_Bool` (but not `_bool`) is also part of the default value. + + +Limitations +----------- + +**This check is designed to check function signatures!** + +The check does not investigate functions that are generated by the compiler +in a context that is only determined from a call site. +These cases include variadic functions, functions in C code that do not have +an argument list, and C++ template instantiations. +Most of these cases, which are otherwise swappable from a caller's standpoint, +have no way of getting "fixed" at the definition point. +In the case of C++ templates, only primary template definitions and explicit +specialisations are matched and analysed. + +None of the following cases produce a diagnostic: + +.. code-block:: c++ + + int printf(const char *Format, ...) { /* ... */ } + int someOldCFunction() { /* ... */ } + + template + int add(T X, U Y) { return X + Y }; + + void TheseAreNotWarnedAbout() { + printf("%d %d\n", 1, 2); // Two ints passed, they could be swapped. + someOldCFunction(1, 2, 3); // Similarly, multiple ints passed. + + add(1, 2); // Instantiates 'add', but that's not a user-defined function. + } diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 913c7fde26a56..83ec376c401f0 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -59,6 +59,7 @@ Clang-Tidy Checks `bugprone-copy-constructor-init `_, "Yes" `bugprone-dangling-handle `_, `bugprone-dynamic-static-initializers `_, + `bugprone-easily-swappable-parameters `_, `bugprone-exception-escape `_, `bugprone-fold-init-type `_, `bugprone-forward-declaration-namespace `_, diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp new file mode 100644 index 0000000000000..c04b1bab74e31 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp @@ -0,0 +1,33 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: "\"\";Foo;Bar"}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "T"} \ +// RUN: ]}' -- + +void ignoredUnnamed(int I, int, int) {} // NO-WARN: No >= 2 length of non-unnamed. + +void nothingIgnored(int I, int J) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: 2 adjacent parameters of 'nothingIgnored' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:25: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:32: note: the last parameter in the range is 'J' + +void ignoredParameter(int Foo, int I, int J) {} +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: 2 adjacent parameters of 'ignoredParameter' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:43: note: the last parameter in the range is 'J' + +void ignoredParameterBoth(int Foo, int Bar) {} // NO-WARN. + +struct S {}; +struct T {}; +struct MyT {}; + +void notIgnoredType(S S1, S S2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: 2 adjacent parameters of 'notIgnoredType' of similar type ('S') +// CHECK-MESSAGES: :[[@LINE-2]]:23: note: the first parameter in the range is 'S1' +// CHECK-MESSAGES: :[[@LINE-3]]:29: note: the last parameter in the range is 'S2' + +void ignoredTypeExact(T T1, T T2) {} // NO-WARN. + +void ignoredTypeSuffix(MyT M1, MyT M2) {} // NO-WARN. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp new file mode 100644 index 0000000000000..f1c8c277d50a0 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp @@ -0,0 +1,188 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""} \ +// RUN: ]}' -- + +namespace std { +using size_t = decltype(sizeof(int)); +} // namespace std + +#define assert(X) ((void)(X)) + +void declaration(int Param, int Other); // NO-WARN: No chance to change this function. + +struct S {}; + +S *allocate() { return nullptr; } // NO-WARN: 0 parameters. +void allocate(S **Out) {} // NO-WARN: 1 parameter. +bool operator<(const S &LHS, const S &RHS) { return true; } // NO-WARN: Binary operator. + +struct MyComparator { + bool operator()(const S &LHS, const S &RHS) { return true; } // NO-WARN: Binary operator. +}; + +struct MyFactory { + S operator()() { return {}; } // NO-WARN: 0 parameters, overloaded operator. + S operator()(int I) { return {}; } // NO-WARN: 1 parameter, overloaded operator. + S operator()(int I, int J) { return {}; } // NO-WARN: Binary operator. + + S operator()(int I, int J, int K) { return {}; } + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: 3 adjacent parameters of 'operator()' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters] + // CHECK-MESSAGES: :[[@LINE-2]]:20: note: the first parameter in the range is 'I' + // CHECK-MESSAGES: :[[@LINE-3]]:34: note: the last parameter in the range is 'K' +}; + +// Variadic functions are not checked because the types are not seen from the +// *definition*. It would require analysing the call sites to do something +// for these. +int printf(const char *Format, ...) { return 0; } // NO-WARN: Variadic function not checked. +int sum(...) { return 0; } // NO-WARN: Variadic function not checked. + +void *operator new(std::size_t Count, S &Manager, S &Janitor) noexcept { return nullptr; } +// CHECK-MESSAGES: :[[@LINE-1]]:39: warning: 2 adjacent parameters of 'operator new' of similar type ('S &') +// CHECK-MESSAGES: :[[@LINE-2]]:42: note: the first parameter in the range is 'Manager' +// CHECK-MESSAGES: :[[@LINE-3]]:54: note: the last parameter in the range is 'Janitor' + +void redeclChain(int, int, int); +void redeclChain(int I, int, int); +void redeclChain(int, int J, int); +void redeclChain(int I, int J, int K) {} +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: 3 adjacent parameters of 'redeclChain' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:22: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:36: note: the last parameter in the range is 'K' + +void copyMany(S *Src, S *Dst, unsigned Num) {} +// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 2 adjacent parameters of 'copyMany' of similar type ('S *') +// CHECK-MESSAGES: :[[@LINE-2]]:18: note: the first parameter in the range is 'Src' +// CHECK-MESSAGES: :[[@LINE-3]]:26: note: the last parameter in the range is 'Dst' + +template +bool binaryPredicate(T L, U R) { return false; } // NO-WARN: Distinct types in template. + +template <> // Explicit specialisation. +bool binaryPredicate(S *L, S *R) { return true; } +// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 2 adjacent parameters of 'binaryPredicate' of similar type ('S *') +// CHECK-MESSAGES: :[[@LINE-2]]:25: note: the first parameter in the range is 'L' +// CHECK-MESSAGES: :[[@LINE-3]]:31: note: the last parameter in the range is 'R' + +template +T algebraicOperation(T L, T R) { return L; } +// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 2 adjacent parameters of 'algebraicOperation' of similar type ('T') +// CHECK-MESSAGES: :[[@LINE-2]]:24: note: the first parameter in the range is 'L' +// CHECK-MESSAGES: :[[@LINE-3]]:29: note: the last parameter in the range is 'R' + +void applyBinaryToS(S SInstance) { // NO-WARN: 1 parameter. + assert(binaryPredicate(SInstance, SInstance) != + binaryPredicate(&SInstance, &SInstance)); + // NO-WARN: binaryPredicate(S, S) is instantiated, but it's not written + // by the user. +} + +void unnamedParameter(int I, int, int K, int) {} +// CHECK-MESSAGES: :[[@LINE-1]]:23: warning: 4 adjacent parameters of 'unnamedParameter' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:27: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:45: note: the last parameter in the range is '' + +void fullyUnnamed(int, int) {} +// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 2 adjacent parameters of 'fullyUnnamed' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:22: note: the first parameter in the range is '' +// CHECK-MESSAGES: :[[@LINE-3]]:27: note: the last parameter in the range is '' + +void multipleDistinctTypes(int I, int J, long L, long M) {} +// CHECK-MESSAGES: :[[@LINE-1]]:28: warning: 2 adjacent parameters of 'multipleDistinctTypes' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:39: note: the last parameter in the range is 'J' +// CHECK-MESSAGES: :[[@LINE-4]]:42: warning: 2 adjacent parameters of 'multipleDistinctTypes' of similar type ('long') +// CHECK-MESSAGES: :[[@LINE-5]]:47: note: the first parameter in the range is 'L' +// CHECK-MESSAGES: :[[@LINE-6]]:55: note: the last parameter in the range is 'M' + +void variableAndPtr(int I, int *IP) {} // NO-WARN: Not the same type. + +void differentPtrs(int *IP, long *LP) {} // NO-WARN: Not the same type. + +typedef int MyInt1; +using MyInt2 = int; + +void typedefAndTypedef1(MyInt1 I1, MyInt1 I2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'typedefAndTypedef1' of similar type ('MyInt1') +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'I1' +// CHECK-MESSAGES: :[[@LINE-3]]:43: note: the last parameter in the range is 'I2' + +void typedefAndTypedef2(MyInt2 I1, MyInt2 I2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'typedefAndTypedef2' of similar type ('MyInt2') +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'I1' +// CHECK-MESSAGES: :[[@LINE-3]]:43: note: the last parameter in the range is 'I2' + +void throughTypedef(int I, MyInt1 J) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: 2 adjacent parameters of 'throughTypedef' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:25: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:35: note: the last parameter in the range is 'J' + +void betweenTypedef(MyInt1 I, MyInt2 J) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: 2 adjacent parameters of 'betweenTypedef' of similar type ('MyInt1') +// CHECK-MESSAGES: :[[@LINE-2]]:28: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:38: note: the last parameter in the range is 'J' + +typedef long MyLong1; +using MyLong2 = long; + +void throughTypedefToOtherType(MyInt1 I, MyLong1 J) {} // NO-WARN: Not the same type. + +void qualified1(int I, const int CI) {} // NO-WARN: Not the same type. + +void qualified2(int I, volatile int VI) {} // NO-WARN: Not the same type. + +void qualified3(int *IP, const int *CIP) {} // NO-WARN: Not the same type. + +void qualified4(const int CI, const long CL) {} // NO-WARN: Not the same type. + +using CInt = const int; + +void qualifiedThroughTypedef1(int I, CInt CI) {} // NO-WARN: Not the same type. + +void qualifiedThroughTypedef2(CInt CI1, const int CI2) {} // NO-WARN: Not the same type. +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef2' of similar type ('CInt') +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'CI1' +// CHECK-MESSAGES: :[[@LINE-3]]:51: note: the last parameter in the range is 'CI2' + +void reference1(int I, int &IR) {} // NO-WARN: Not the same type. + +void reference2(int I, const int &CIR) {} // NO-WARN: Not the same type. + +void reference3(int I, int &&IRR) {} // NO-WARN: Not the same type. + +void reference4(int I, const int &&CIRR) {} // NO-WARN: Not the same type. + +template +struct Pair {}; + +void templateParam1(Pair P1, Pair P2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: 2 adjacent parameters of 'templateParam1' of similar type ('Pair') +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'P1' +// CHECK-MESSAGES: :[[@LINE-3]]:55: note: the last parameter in the range is 'P2' + +void templateParam2(Pair P1, Pair P2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: 2 adjacent parameters of 'templateParam2' of similar type ('Pair') +// CHECK-MESSAGES: :[[@LINE-2]]:37: note: the first parameter in the range is 'P1' +// CHECK-MESSAGES: :[[@LINE-3]]:57: note: the last parameter in the range is 'P2' + +void templateParam3(Pair P1, Pair P2) {} // NO-WARN: Not the same type. + +template +struct Coord {}; + +void templateAndOtherTemplate1(Pair P, Coord C) {} // NO-WARN: Not the same type. + +template +void templateVariadic1(Ts TVars...) {} // NO-WARN: Requires instantiation to check. + +template +void templateVariadic2(T TVar, Us... UVars) {} // NO-WARN: Distinct types in primary template. + +template <> +void templateVariadic2(int TVar, int UVars1, int UVars2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: 3 adjacent parameters of 'templateVariadic2' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:28: note: the first parameter in the range is 'TVar' +// CHECK-MESSAGES: :[[@LINE-3]]:50: note: the last parameter in the range is 'UVars2' diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp new file mode 100644 index 0000000000000..10f8841368dfa --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp @@ -0,0 +1,24 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 3}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""} \ +// RUN: ]}' -- + +int add(int Left, int Right) { return Left + Right; } // NO-WARN: Only 2 parameters. + +int magic(int Left, int Right, int X, int Y) { return 0; } +// CHECK-MESSAGES: :[[@LINE-1]]:11: warning: 4 adjacent parameters of 'magic' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:15: note: the first parameter in the range is 'Left' +// CHECK-MESSAGES: :[[@LINE-3]]:43: note: the last parameter in the range is 'Y' + +void multipleDistinctTypes(int I, int J, int K, + long L, long M, + double D, double E, double F) {} +// CHECK-MESSAGES: :[[@LINE-3]]:28: warning: 3 adjacent parameters of 'multipleDistinctTypes' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-4]]:32: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-5]]:46: note: the last parameter in the range is 'K' +// NO-WARN: The [long, long] range is length of 2. +// CHECK-MESSAGES: :[[@LINE-5]]:28: warning: 3 adjacent parameters of 'multipleDistinctTypes' of similar type ('double') +// CHECK-MESSAGES: :[[@LINE-6]]:35: note: the first parameter in the range is 'D' +// CHECK-MESSAGES: :[[@LINE-7]]:55: note: the last parameter in the range is 'F' diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c new file mode 100644 index 0000000000000..f52652e38636b --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c @@ -0,0 +1,148 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "bool;MyBool;struct U;MAKE_LOGICAL_TYPE(int)"} \ +// RUN: ]}' -- -x c + +#define bool _Bool +#define true 1 +#define false 0 + +typedef bool MyBool; + +#define TheLogicalType bool + +void declVoid(void); // NO-WARN: Declaration only. +void decl(); // NO-WARN: Declaration only. +void oneParam(int I) {} // NO-WARN: 1 parameter. +void variadic(int I, ...) {} // NO-WARN: 1 visible parameter. + +void trivial(int I, int J) {} +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: 2 adjacent parameters of 'trivial' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:18: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:25: note: the last parameter in the range is 'J' + +void qualifier(int I, const int CI) {} // NO-WARN: Distinct types. + +void restrictQualifier(char *restrict CPR1, char *restrict CPR2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: 2 adjacent parameters of 'restrictQualifier' of similar type ('char *restrict') +// CHECK-MESSAGES: :[[@LINE-2]]:39: note: the first parameter in the range is 'CPR1' +// CHECK-MESSAGES: :[[@LINE-3]]:60: note: the last parameter in the range is 'CPR2' + +void pointer1(int *IP1, int *IP2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 2 adjacent parameters of 'pointer1' of similar type ('int *') +// CHECK-MESSAGES: :[[@LINE-2]]:20: note: the first parameter in the range is 'IP1' +// CHECK-MESSAGES: :[[@LINE-3]]:30: note: the last parameter in the range is 'IP2' + +void pointerConversion(int *IP, long *LP) {} +// NO-WARN: Even though C can convert any T* to U* back and forth, compiler +// warnings already exist for this. + +void testVariadicsCall() { + int IVal = 1; + decl(IVal); // NO-WARN: Particular calls to "variadics" are like template + // instantiations, and we do not model them. + + variadic(IVal); // NO-WARN. + variadic(IVal, 2, 3, 4); // NO-WARN. +} + +struct S {}; +struct T {}; + +void taggedTypes1(struct S SVar, struct T TVar) {} // NO-WARN: Distinct types. + +void taggedTypes2(struct S SVar1, struct S SVar2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 2 adjacent parameters of 'taggedTypes2' of similar type ('struct S') +// CHECK-MESSAGES: :[[@LINE-2]]:28: note: the first parameter in the range is 'SVar1' +// CHECK-MESSAGES: :[[@LINE-3]]:44: note: the last parameter in the range is 'SVar2' + +void wrappers(struct { int I; } I1, struct { int I; } I2) {} // NO-WARN: Distinct anonymous types. + +void knr(I, J) + int I; + int J; +{} +// CHECK-MESSAGES: :[[@LINE-3]]:3: warning: 2 adjacent parameters of 'knr' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-4]]:7: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-4]]:7: note: the last parameter in the range is 'J' + +void boolAsWritten(bool B1, bool B2) {} // NO-WARN: The type name is ignored. +// Note that "bool" is a macro that expands to "_Bool" internally, but it is +// only "bool" that is ignored from the two. + +void underscoreBoolAsWritten(_Bool B1, _Bool B2) {} +// Even though it is "_Bool" that is written in the code, the diagnostic message +// respects the printing policy as defined by the compilation commands. Clang's +// default in C mode seems to say that the type itself is "bool", not "_Bool". +// CHECK-MESSAGES: :[[@LINE-4]]:30: warning: 2 adjacent parameters of 'underscoreBoolAsWritten' of similar type ('bool') +// CHECK-MESSAGES: :[[@LINE-5]]:36: note: the first parameter in the range is 'B1' +// CHECK-MESSAGES: :[[@LINE-6]]:46: note: the last parameter in the range is 'B2' + +void typedefdBoolAsWritten(MyBool MB1, MyBool MB2) {} // NO-WARN: "MyBool" as written type name ignored. + +void otherBoolMacroAsWritten(TheLogicalType TLT1, TheLogicalType TLT2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:30: warning: 2 adjacent parameters of 'otherBoolMacroAsWritten' of similar type ('bool') +// CHECK-MESSAGES: :[[@LINE-2]]:45: note: the first parameter in the range is 'TLT1' +// CHECK-MESSAGES: :[[@LINE-3]]:66: note: the last parameter in the range is 'TLT2' + +struct U {}; +typedef struct U U; + +void typedefStruct(U X, U Y) {} +// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: 2 adjacent parameters of 'typedefStruct' of similar type ('U') +// CHECK-MESSAGES: :[[@LINE-2]]:22: note: the first parameter in the range is 'X' +// CHECK-MESSAGES: :[[@LINE-3]]:27: note: the last parameter in the range is 'Y' + +void ignoredStructU(struct U X, struct U Y) {} // NO-WARN: "struct U" ignored. + +#define TYPE_TAG_TO_USE struct // We are in C! +#define MAKE_TYPE_NAME(T) TYPE_TAG_TO_USE T + +void macroMagic1(TYPE_TAG_TO_USE T X, TYPE_TAG_TO_USE T Y) {} +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: 2 adjacent parameters of 'macroMagic1' of similar type ('struct T') +// CHECK-MESSAGES: :[[@LINE-5]]:25: note: expanded from macro 'TYPE_TAG_TO_USE' +// CHECK-MESSAGES: :[[@LINE-3]]:36: note: the first parameter in the range is 'X' +// CHECK-MESSAGES: :[[@LINE-4]]:57: note: the last parameter in the range is 'Y' + +void macroMagic2(TYPE_TAG_TO_USE U X, TYPE_TAG_TO_USE U Y) {} +// "struct U" is ignored, but that is not what is written here! +// CHECK-MESSAGES: :[[@LINE-2]]:18: warning: 2 adjacent parameters of 'macroMagic2' of similar type ('struct U') +// CHECK-MESSAGES: :[[@LINE-12]]:25: note: expanded from macro 'TYPE_TAG_TO_USE' +// CHECK-MESSAGES: :[[@LINE-4]]:36: note: the first parameter in the range is 'X' +// CHECK-MESSAGES: :[[@LINE-5]]:57: note: the last parameter in the range is 'Y' + +void evenMoreMacroMagic1(MAKE_TYPE_NAME(T) X, MAKE_TYPE_NAME(T) Y) {} +// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: 2 adjacent parameters of 'evenMoreMacroMagic1' of similar type ('struct T') +// CHECK-MESSAGES: :[[@LINE-17]]:27: note: expanded from macro 'MAKE_TYPE_NAME' +// CHECK-MESSAGES: :[[@LINE-19]]:25: note: expanded from macro 'TYPE_TAG_TO_USE' +// CHECK-MESSAGES: :[[@LINE-4]]:44: note: the first parameter in the range is 'X' +// CHECK-MESSAGES: :[[@LINE-5]]:65: note: the last parameter in the range is 'Y' + +void evenMoreMacroMagic2(MAKE_TYPE_NAME(U) X, MAKE_TYPE_NAME(U) Y) {} +// "struct U" is ignored, but that is not what is written here! +// CHECK-MESSAGES: :[[@LINE-2]]:26: warning: 2 adjacent parameters of 'evenMoreMacroMagic2' of similar type ('struct U') +// CHECK-MESSAGES: :[[@LINE-25]]:27: note: expanded from macro 'MAKE_TYPE_NAME' +// CHECK-MESSAGES: :[[@LINE-27]]:25: note: expanded from macro 'TYPE_TAG_TO_USE' +// CHECK-MESSAGES: :[[@LINE-5]]:44: note: the first parameter in the range is 'X' +// CHECK-MESSAGES: :[[@LINE-6]]:65: note: the last parameter in the range is 'Y' + +#define MAKE_PRIMITIVE_WRAPPER(WRAPPED_TYPE) \ + MAKE_TYPE_NAME() { \ + WRAPPED_TYPE Member; \ + } + +void thisIsGettingRidiculous(MAKE_PRIMITIVE_WRAPPER(int) I1, + MAKE_PRIMITIVE_WRAPPER(int) I2) {} // NO-WARN: Distinct anonymous types. + +#define MAKE_LOGICAL_TYPE(X) bool + +void macroMagic3(MAKE_LOGICAL_TYPE(char) B1, MAKE_LOGICAL_TYPE(long) B2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: 2 adjacent parameters of 'macroMagic3' of similar type ('bool') +// CHECK-MESSAGES: :[[@LINE-4]]:30: note: expanded from macro 'MAKE_LOGICAL_TYPE' +// CHECK-MESSAGES: :[[@LINE-136]]:14: note: expanded from macro 'bool' +// CHECK-MESSAGES: :[[@LINE-4]]:42: note: the first parameter in the range is 'B1' +// CHECK-MESSAGES: :[[@LINE-5]]:70: note: the last parameter in the range is 'B2' + +void macroMagic4(MAKE_LOGICAL_TYPE(int) B1, MAKE_LOGICAL_TYPE(int) B2) {} // NO-WARN: "Type name" ignored. From 26d864b44b9d3326984a7041124aa0f9e8ebc5cb Mon Sep 17 00:00:00 2001 From: Whisperity Date: Fri, 8 Nov 2019 19:58:23 +0100 Subject: [PATCH 013/619] [clang-tidy] Extend 'bugprone-easily-swappable-parameters' with `typedef` and `const &` diagnostics The base patch only deals with strict (canonical) type equality, which is merely a subset of all the dangerous function interfaces that we intend to find. In addition, in the base patch, canonical type equivalence is not diagnosed in a way that is immediately apparent to the user. This patch extends the check with two features: * Proper typedef diagnostics and explanations to the user. * "Reference bind power" matching. Case 2 is a necessary addition because in every case someone encounters a function `f(T t, const T& tr)`, any expression that might be passed to either can be passed to both. Thus, such adjacent parameter sequences should be matched. Reviewed By: aaron.ballman Differential Revision: http://reviews.llvm.org/D95736 --- .../EasilySwappableParametersCheck.cpp | 295 ++++++++++++++++-- .../bugprone-easily-swappable-parameters.rst | 25 ++ ...prone-easily-swappable-parameters-len2.cpp | 162 ++++++++-- 3 files changed, 443 insertions(+), 39 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index 07c8ef486f654..d9124e6c8361b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -11,6 +11,7 @@ #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/Lexer.h" +#include "llvm/ADT/SmallSet.h" #define DEBUG_TYPE "EasilySwappableParametersCheck" #include "llvm/Support/Debug.h" @@ -76,12 +77,15 @@ namespace model { enum class MixFlags : unsigned char { Invalid = 0, //< Sentinel bit pattern. DO NOT USE! - None = 1, //< Mix between the two parameters is not possible. - Trivial = 2, //< The two mix trivially, and are the exact same type. - Canonical = 4, //< The two mix because the types refer to the same - // CanonicalType, but we do not elaborate as to how. + None = 1, //< Mix between the two parameters is not possible. + Trivial = 2, //< The two mix trivially, and are the exact same type. + Canonical = 4, //< The two mix because the types refer to the same + // CanonicalType, but we do not elaborate as to how. + TypeAlias = 8, //< The path from one type to the other involves + // desugaring type aliases. + ReferenceBind = 16, //< The mix involves the binding power of "const &". - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue =*/Canonical) + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue =*/ReferenceBind) }; LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); @@ -106,7 +110,7 @@ static inline std::string formatMixFlags(MixFlags F) { if (F == MixFlags::Invalid) return "#Inv!"; - SmallString<4> Str{"---"}; + SmallString<8> Str{"-----"}; if (hasFlag(F, MixFlags::None)) // Shows the None bit explicitly, as it can be applied in the recursion @@ -116,6 +120,10 @@ static inline std::string formatMixFlags(MixFlags F) { Str[1] = 'T'; if (hasFlag(F, MixFlags::Canonical)) Str[2] = 'C'; + if (hasFlag(F, MixFlags::TypeAlias)) + Str[3] = 't'; + if (hasFlag(F, MixFlags::ReferenceBind)) + Str[4] = '&'; return Str.str().str(); } @@ -129,13 +137,44 @@ static inline std::string formatMixFlags(MixFlags F); /// Contains the metadata for the mixability result between two types, /// independently of which parameters they were calculated from. struct MixData { + /// The flag bits of the mix indicating what language features allow for it. MixFlags Flags; + /// A potentially calculated common underlying type after desugaring, that + /// both sides of the mix can originate from. + QualType CommonType; + MixData(MixFlags Flags) : Flags(Flags) {} + MixData(MixFlags Flags, QualType CommonType) + : Flags(Flags), CommonType(CommonType) {} void sanitize() { assert(Flags != MixFlags::Invalid && "sanitize() called on invalid bitvec"); - // TODO: There will be statements here in further extensions of the check. + + if (hasFlag(Flags, MixFlags::None)) { + // If anywhere down the recursion a potential mix "path" is deemed + // impossible, throw away all the other bits because the mix is not + // possible. + Flags = MixFlags::None; + return; + } + + if (Flags == MixFlags::Trivial) + return; + + if (static_cast(Flags ^ MixFlags::Trivial)) + // If the mix involves somewhere trivial equivalence but down the + // recursion other bit(s) were set, remove the trivial bit, as it is not + // trivial. + Flags &= ~MixFlags::Trivial; + } + + MixData operator|(MixFlags EnableFlags) const { + return {Flags | EnableFlags, CommonType}; + } + MixData &operator|=(MixFlags EnableFlags) { + Flags |= EnableFlags; + return *this; } }; @@ -150,6 +189,7 @@ struct Mix { void sanitize() { Data.sanitize(); } MixFlags flags() const { return Data.Flags; } + QualType commonUnderlyingType() const { return Data.CommonType; } }; // NOLINTNEXTLINE(misc-redundant-expression): Seems to be a bogus warning. @@ -186,6 +226,11 @@ struct MixableParameterRange { } }; +static MixData isLRefEquallyBindingToType(const TheCheck &Check, + const LValueReferenceType *LRef, + QualType Ty, const ASTContext &Ctx, + bool IsRefRHS); + /// Approximate the way how LType and RType might refer to "essentially the /// same" type, in a sense that at a particular call site, an expression of /// type LType and RType might be successfully passed to a variable (in our @@ -204,24 +249,94 @@ static MixData calculateMixability(const TheCheck &Check, const QualType LType, if (LType == RType) { LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. Trivial equality.\n"); - return {MixFlags::Trivial}; + return {MixFlags::Trivial, LType}; + } + + // Dissolve certain type sugars that do not affect the mixability of one type + // with the other, and also do not require any sort of elaboration for the + // user to understand. + if (isa(LType.getTypePtr())) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS is ParenType.\n"); + return calculateMixability(Check, LType.getSingleStepDesugaredType(Ctx), + RType, Ctx); + } + if (isa(RType.getTypePtr())) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. RHS is ParenType.\n"); + return calculateMixability(Check, LType, + RType.getSingleStepDesugaredType(Ctx), Ctx); + } + + // Dissolve typedefs. + if (const auto *LTypedef = LType->getAs()) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS is typedef.\n"); + return calculateMixability(Check, LTypedef->desugar(), RType, Ctx) | + MixFlags::TypeAlias; + } + if (const auto *RTypedef = RType->getAs()) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. RHS is typedef.\n"); + return calculateMixability(Check, LType, RTypedef->desugar(), Ctx) | + MixFlags::TypeAlias; } - // TODO: Implement more elaborate logic, such as typedef, implicit - // conversions, etc. + // At a particular call site, what could be passed to a 'T' or 'const T' might + // also be passed to a 'const T &' without the call site putting a direct + // side effect on the passed expressions. + if (const auto *LRef = LType->getAs()) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS is &.\n"); + return isLRefEquallyBindingToType(Check, LRef, RType, Ctx, false) | + MixFlags::ReferenceBind; + } + if (const auto *RRef = RType->getAs()) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. RHS is &.\n"); + return isLRefEquallyBindingToType(Check, RRef, LType, Ctx, true) | + MixFlags::ReferenceBind; + } // If none of the previous logic found a match, try if Clang otherwise // believes the types to be the same. if (LType.getCanonicalType() == RType.getCanonicalType()) { LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. Same CanonicalType.\n"); - return {MixFlags::Canonical}; + return {MixFlags::Canonical, LType.getCanonicalType()}; } LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. No match found.\n"); return {MixFlags::None}; } +/// Calculates if the reference binds an expression of the given type. This is +/// true iff 'LRef' is some 'const T &' type, and the 'Ty' is 'T' or 'const T'. +static MixData isLRefEquallyBindingToType(const TheCheck &Check, + const LValueReferenceType *LRef, + QualType Ty, const ASTContext &Ctx, + bool IsRefRHS) { + LLVM_DEBUG(llvm::dbgs() << ">>> isLRefEquallyBindingToType for LRef:\n"; + LRef->dump(llvm::dbgs(), Ctx); llvm::dbgs() << "\nand Type:\n"; + Ty.dump(llvm::dbgs(), Ctx); llvm::dbgs() << '\n';); + + QualType ReferredType = LRef->getPointeeType(); + if (!ReferredType.isLocalConstQualified()) { + LLVM_DEBUG(llvm::dbgs() + << "<<< isLRefEquallyBindingToType. Not const ref.\n"); + return {MixFlags::None}; + }; + + QualType NonConstReferredType = ReferredType; + NonConstReferredType.removeLocalConst(); + if (ReferredType == Ty || NonConstReferredType == Ty) { + LLVM_DEBUG( + llvm::dbgs() + << "<<< isLRefEquallyBindingToType. Type of referred matches.\n"); + return {MixFlags::Trivial, ReferredType}; + } + + LLVM_DEBUG( + llvm::dbgs() + << "--- isLRefEquallyBindingToType. Checking mix for underlying type.\n"); + return IsRefRHS ? calculateMixability(Check, Ty, NonConstReferredType, Ctx) + : calculateMixability(Check, NonConstReferredType, Ty, Ctx); +} + static MixableParameterRange modelMixingRange(const TheCheck &Check, const FunctionDecl *FD, std::size_t StartIndex) { @@ -390,6 +505,85 @@ static SmallString<64> getNameOrUnnamed(const NamedDecl *ND) { return Name; } +/// Returns whether a particular Mix between two parameters should have the +/// types involved diagnosed to the user. This is only a flag check. +static inline bool needsToPrintTypeInDiagnostic(const model::Mix &M) { + return static_cast(M.flags() & (model::MixFlags::TypeAlias | + model::MixFlags::ReferenceBind)); +} + +namespace { + +/// Retains the elements called with and returns whether the call is done with +/// a new element. +template class InsertOnce { + llvm::SmallSet CalledWith; + +public: + bool operator()(E El) { return CalledWith.insert(std::move(El)).second; } + + bool calledWith(const E &El) const { return CalledWith.contains(El); } +}; + +struct SwappedEqualQualTypePair { + QualType LHSType, RHSType; + + bool operator==(const SwappedEqualQualTypePair &Other) const { + return (LHSType == Other.LHSType && RHSType == Other.RHSType) || + (LHSType == Other.RHSType && RHSType == Other.LHSType); + } + + bool operator<(const SwappedEqualQualTypePair &Other) const { + return LHSType < Other.LHSType && RHSType < Other.RHSType; + } +}; + +struct TypeAliasDiagnosticTuple { + QualType LHSType, RHSType, CommonType; + + bool operator==(const TypeAliasDiagnosticTuple &Other) const { + return CommonType == Other.CommonType && + ((LHSType == Other.LHSType && RHSType == Other.RHSType) || + (LHSType == Other.RHSType && RHSType == Other.LHSType)); + } + + bool operator<(const TypeAliasDiagnosticTuple &Other) const { + return CommonType < Other.CommonType && LHSType < Other.LHSType && + RHSType < Other.RHSType; + } +}; + +/// Helper class to only emit a diagnostic related to MixFlags::TypeAlias once. +class UniqueTypeAliasDiagnosticHelper + : public InsertOnce { + using Base = InsertOnce; + +public: + /// Returns whether the diagnostic for LHSType and RHSType which are both + /// referring to CommonType being the same has not been emitted already. + bool operator()(QualType LHSType, QualType RHSType, QualType CommonType) { + if (CommonType.isNull() || CommonType == LHSType || CommonType == RHSType) + return Base::operator()({LHSType, RHSType, {}}); + + TypeAliasDiagnosticTuple ThreeTuple{LHSType, RHSType, CommonType}; + if (!Base::operator()(ThreeTuple)) + return false; + + bool AlreadySaidLHSAndCommonIsSame = calledWith({LHSType, CommonType, {}}); + bool AlreadySaidRHSAndCommonIsSame = calledWith({RHSType, CommonType, {}}); + if (AlreadySaidLHSAndCommonIsSame && AlreadySaidRHSAndCommonIsSame) { + // "SomeInt == int" && "SomeOtherInt == int" => "Common(SomeInt, + // SomeOtherInt) == int", no need to diagnose it. Save the 3-tuple only + // for shortcut if it ever appears again. + return false; + } + + return true; + } +}; + +} // namespace + EasilySwappableParametersCheck::EasilySwappableParametersCheck( StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), @@ -430,6 +624,9 @@ void EasilySwappableParametersCheck::registerMatchers(MatchFinder *Finder) { void EasilySwappableParametersCheck::check( const MatchFinder::MatchResult &Result) { + using namespace model; + using namespace filter; + const auto *FD = Result.Nodes.getNodeAs("func"); assert(FD); @@ -440,16 +637,15 @@ void EasilySwappableParametersCheck::check( LLVM_DEBUG(llvm::dbgs() << "Begin analysis of " << getName(FD) << " with " << NumParams << " parameters...\n"); while (MixableRangeStartIndex < NumParams) { - if (filter::isIgnoredParameter(*this, - FD->getParamDecl(MixableRangeStartIndex))) { + if (isIgnoredParameter(*this, FD->getParamDecl(MixableRangeStartIndex))) { LLVM_DEBUG(llvm::dbgs() << "Parameter #" << MixableRangeStartIndex << " ignored.\n"); ++MixableRangeStartIndex; continue; } - model::MixableParameterRange R = - model::modelMixingRange(*this, FD, MixableRangeStartIndex); + MixableParameterRange R = + modelMixingRange(*this, FD, MixableRangeStartIndex); assert(R.NumParamsChecked > 0 && "Ensure forward progress!"); MixableRangeStartIndex += R.NumParamsChecked; if (R.NumParamsChecked < MinimumLength) { @@ -458,16 +654,23 @@ void EasilySwappableParametersCheck::check( continue; } + bool NeedsAnyTypeNote = llvm::any_of(R.Mixes, needsToPrintTypeInDiagnostic); const ParmVarDecl *First = R.getFirstParam(), *Last = R.getLastParam(); std::string FirstParamTypeAsWritten = First->getType().getAsString(PP); { - StringRef DiagText = "%0 adjacent parameters of %1 of similar type " - "('%2') are easily swapped by mistake"; - // TODO: This logic will get extended here with future flags. + StringRef DiagText; + + if (NeedsAnyTypeNote) + DiagText = "%0 adjacent parameters of %1 of similar type are easily " + "swapped by mistake"; + else + DiagText = "%0 adjacent parameters of %1 of similar type ('%2') are " + "easily swapped by mistake"; auto Diag = diag(First->getOuterLocStart(), DiagText) - << static_cast(R.NumParamsChecked) << FD - << FirstParamTypeAsWritten; + << static_cast(R.NumParamsChecked) << FD; + if (!NeedsAnyTypeNote) + Diag << FirstParamTypeAsWritten; CharSourceRange HighlightRange = CharSourceRange::getTokenRange( First->getBeginLoc(), Last->getEndLoc()); @@ -487,6 +690,58 @@ void EasilySwappableParametersCheck::check( << getNameOrUnnamed(Last) << CharSourceRange::getTokenRange(Last->getLocation(), Last->getLocation()); + + // Helper classes to silence elaborative diagnostic notes that would be + // too verbose. + UniqueTypeAliasDiagnosticHelper UniqueTypeAlias; + InsertOnce UniqueBindPower; + + for (const Mix &M : R.Mixes) { + assert(M.flags() >= MixFlags::Trivial && + "Sentinel or false mix in result."); + + if (needsToPrintTypeInDiagnostic(M)) { + // Typedefs might result in the type of the variable needing to be + // emitted to a note diagnostic, so prepare it. + const ParmVarDecl *LVar = M.First; + const ParmVarDecl *RVar = M.Second; + QualType LType = LVar->getType(); + QualType RType = RVar->getType(); + QualType CommonType = M.commonUnderlyingType(); + std::string LTypeAsWritten = LType.getAsString(PP); + std::string RTypeAsWritten = RType.getAsString(PP); + std::string CommonTypeStr = CommonType.getAsString(PP); + + if (hasFlag(M.flags(), MixFlags::TypeAlias) && + UniqueTypeAlias(LType, RType, CommonType)) { + StringRef DiagText; + bool ExplicitlyPrintCommonType = false; + if (LTypeAsWritten == CommonTypeStr || + RTypeAsWritten == CommonTypeStr) + DiagText = + "after resolving type aliases, '%0' and '%1' are the same"; + else { + DiagText = "after resolving type aliases, the common type of '%0' " + "and '%1' is '%2'"; + ExplicitlyPrintCommonType = true; + } + + auto Diag = + diag(LVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) + << LTypeAsWritten << RTypeAsWritten; + if (ExplicitlyPrintCommonType) + Diag << CommonTypeStr; + } + + if (hasFlag(M.flags(), MixFlags::ReferenceBind) && + UniqueBindPower({LType, RType})) { + StringRef DiagText = "'%0' and '%1' parameters accept and bind the " + "same kind of values"; + diag(RVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) + << LTypeAsWritten << RTypeAsWritten; + } + } + } } } diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst index bc4e3220428d3..22224f39beab0 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst @@ -111,3 +111,28 @@ None of the following cases produce a diagnostic: add(1, 2); // Instantiates 'add', but that's not a user-defined function. } + +Due to the limitation above, parameters which type are further dependent upon +template instantiations to *prove* that they mix with another parameter's is +not diagnosed. + +.. code-block:: c++ + + template + struct Vector { + typedef T element_type; + }; + + // Diagnosed: Explicit instantiation was done by the user, we can prove it + // is the same type. + void Explicit(int A, Vector::element_type B) { /* ... */ } + + // Diagnosed: The two parameter types are exactly the same. + template + void Exact(typename Vector::element_type A, + typename Vector::element_type B) { /* ... */ } + + // Skipped: The two parameters are both 'T' but we can not prove this + // without actually instantiating. + template + void FalseNegative(T A, typename Vector::element_type B) { /* ... */ } diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp index f1c8c277d50a0..9de0787b971d1 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp @@ -115,20 +115,38 @@ void typedefAndTypedef2(MyInt2 I1, MyInt2 I2) {} // CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'I1' // CHECK-MESSAGES: :[[@LINE-3]]:43: note: the last parameter in the range is 'I2' -void throughTypedef(int I, MyInt1 J) {} -// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: 2 adjacent parameters of 'throughTypedef' of similar type ('int') -// CHECK-MESSAGES: :[[@LINE-2]]:25: note: the first parameter in the range is 'I' -// CHECK-MESSAGES: :[[@LINE-3]]:35: note: the last parameter in the range is 'J' +void typedefMultiple(MyInt1 I1, MyInt2 I2x, MyInt2 I2y) {} +// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 3 adjacent parameters of 'typedefMultiple' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:29: note: the first parameter in the range is 'I1' +// CHECK-MESSAGES: :[[@LINE-3]]:52: note: the last parameter in the range is 'I2y' +// CHECK-MESSAGES: :[[@LINE-4]]:22: note: after resolving type aliases, the common type of 'MyInt1' and 'MyInt2' is 'int' + +void throughTypedef1(int I, MyInt1 J) {} +// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 2 adjacent parameters of 'throughTypedef1' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:26: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:36: note: the last parameter in the range is 'J' +// CHECK-MESSAGES: :[[@LINE-4]]:22: note: after resolving type aliases, 'int' and 'MyInt1' are the same + +void betweenTypedef2(MyInt1 I, MyInt2 J) {} +// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: 2 adjacent parameters of 'betweenTypedef2' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:29: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:39: note: the last parameter in the range is 'J' +// CHECK-MESSAGES: :[[@LINE-4]]:22: note: after resolving type aliases, the common type of 'MyInt1' and 'MyInt2' is 'int' + +typedef MyInt2 MyInt2b; -void betweenTypedef(MyInt1 I, MyInt2 J) {} -// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: 2 adjacent parameters of 'betweenTypedef' of similar type ('MyInt1') -// CHECK-MESSAGES: :[[@LINE-2]]:28: note: the first parameter in the range is 'I' -// CHECK-MESSAGES: :[[@LINE-3]]:38: note: the last parameter in the range is 'J' +void typedefChain(int I, MyInt1 MI1, MyInt2 MI2, MyInt2b MI2b) {} +// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 4 adjacent parameters of 'typedefChain' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:23: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:58: note: the last parameter in the range is 'MI2b' +// CHECK-MESSAGES: :[[@LINE-4]]:19: note: after resolving type aliases, 'int' and 'MyInt1' are the same +// CHECK-MESSAGES: :[[@LINE-5]]:19: note: after resolving type aliases, 'int' and 'MyInt2' are the same +// CHECK-MESSAGES: :[[@LINE-6]]:19: note: after resolving type aliases, 'int' and 'MyInt2b' are the same typedef long MyLong1; using MyLong2 = long; -void throughTypedefToOtherType(MyInt1 I, MyLong1 J) {} // NO-WARN: Not the same type. +void throughTypedefToOtherType(MyInt1 I, MyLong1 J) {} // NO-WARN: int and long. void qualified1(int I, const int CI) {} // NO-WARN: Not the same type. @@ -142,18 +160,73 @@ using CInt = const int; void qualifiedThroughTypedef1(int I, CInt CI) {} // NO-WARN: Not the same type. -void qualifiedThroughTypedef2(CInt CI1, const int CI2) {} // NO-WARN: Not the same type. -// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef2' of similar type ('CInt') +void qualifiedThroughTypedef2(CInt CI1, const int CI2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef2' of similar type are // CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'CI1' // CHECK-MESSAGES: :[[@LINE-3]]:51: note: the last parameter in the range is 'CI2' - -void reference1(int I, int &IR) {} // NO-WARN: Not the same type. - -void reference2(int I, const int &CIR) {} // NO-WARN: Not the same type. - -void reference3(int I, int &&IRR) {} // NO-WARN: Not the same type. - -void reference4(int I, const int &&CIRR) {} // NO-WARN: Not the same type. +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, 'CInt' and 'const int' are the same + +void qualifiedThroughTypedef3(CInt CI1, const MyInt1 CI2, const int CI3) {} // NO-WARN: Not the same type. + +void qualifiedThroughTypedef4(CInt CI1, const MyInt1 CI2, const MyInt2 CI3) {} +// CHECK-MESSAGES: :[[@LINE-1]]:41: warning: 2 adjacent parameters of 'qualifiedThroughTypedef4' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:54: note: the first parameter in the range is 'CI2' +// CHECK-MESSAGES: :[[@LINE-3]]:72: note: the last parameter in the range is 'CI3' +// CHECK-MESSAGES: :[[@LINE-4]]:41: note: after resolving type aliases, the common type of 'const MyInt1' and 'const MyInt2' is 'int' + +void reference1(int I, int &IR) {} // NO-WARN: Distinct semantics when called. + +void reference2(int I, const int &CIR) {} +// CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 2 adjacent parameters of 'reference2' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:21: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:35: note: the last parameter in the range is 'CIR' +// CHECK-MESSAGES: :[[@LINE-4]]:24: note: 'int' and 'const int &' parameters accept and bind the same kind of values + +void reference3(int I, int &&IRR) {} // NO-WARN: Distinct semantics when called. + +void reference4(int I, const int &&CIRR) {} // NO-WARN: Distinct semantics when called. + +void reference5(const int CI, const int &CIR) {} +// CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 2 adjacent parameters of 'reference5' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:27: note: the first parameter in the range is 'CI' +// CHECK-MESSAGES: :[[@LINE-3]]:42: note: the last parameter in the range is 'CIR' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: 'const int' and 'const int &' parameters accept and bind the same kind of values + +void reference6(int I, const int &CIR, int J, const int &CJR) {} +// CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 4 adjacent parameters of 'reference6' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:21: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:58: note: the last parameter in the range is 'CJR' +// CHECK-MESSAGES: :[[@LINE-4]]:24: note: 'int' and 'const int &' parameters accept and bind the same kind of values + +using ICRTy = const int &; +using MyIntCRTy = const MyInt1 &; + +void referenceThroughTypedef(int I, ICRTy Builtin, MyIntCRTy MyInt) {} +// CHECK-MESSAGES: :[[@LINE-1]]:30: warning: 3 adjacent parameters of 'referenceThroughTypedef' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:34: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:62: note: the last parameter in the range is 'MyInt' +// CHECK-MESSAGES: :[[@LINE-4]]:30: note: after resolving type aliases, the common type of 'int' and 'ICRTy' is 'const int' +// CHECK-MESSAGES: :[[@LINE-5]]:37: note: 'int' and 'ICRTy' parameters accept and bind the same kind of values +// CHECK-MESSAGES: :[[@LINE-6]]:30: note: after resolving type aliases, 'int' and 'MyIntCRTy' are the same +// CHECK-MESSAGES: :[[@LINE-7]]:52: note: 'int' and 'MyIntCRTy' parameters accept and bind the same kind of values +// CHECK-MESSAGES: :[[@LINE-8]]:37: note: after resolving type aliases, the common type of 'ICRTy' and 'MyIntCRTy' is 'int' +// CHECK-MESSAGES: :[[@LINE-9]]:52: note: 'ICRTy' and 'MyIntCRTy' parameters accept and bind the same kind of values + +short const typedef int unsigned Eldritch; +typedef const unsigned short Holy; + +void collapse(Eldritch Cursed, Holy Blessed) {} +// CHECK-MESSAGES: :[[@LINE-1]]:15: warning: 2 adjacent parameters of 'collapse' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:24: note: the first parameter in the range is 'Cursed' +// CHECK-MESSAGES: :[[@LINE-3]]:37: note: the last parameter in the range is 'Blessed' +// CHECK-MESSAGES: :[[@LINE-4]]:15: note: after resolving type aliases, the common type of 'Eldritch' and 'Holy' is 'const unsigned short' + +void collapseAndTypedef(Eldritch Cursed, const Holy &Blessed) {} +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'collapseAndTypedef' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:34: note: the first parameter in the range is 'Cursed' +// CHECK-MESSAGES: :[[@LINE-3]]:54: note: the last parameter in the range is 'Blessed' +// CHECK-MESSAGES: :[[@LINE-4]]:25: note: after resolving type aliases, the common type of 'Eldritch' and 'const Holy &' is 'const unsigned short' +// CHECK-MESSAGES: :[[@LINE-5]]:42: note: 'Eldritch' and 'const Holy &' parameters accept and bind the same kind of values template struct Pair {}; @@ -186,3 +259,54 @@ void templateVariadic2(int TVar, int UVars1, int UVars2) {} // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: 3 adjacent parameters of 'templateVariadic2' of similar type ('int') // CHECK-MESSAGES: :[[@LINE-2]]:28: note: the first parameter in the range is 'TVar' // CHECK-MESSAGES: :[[@LINE-3]]:50: note: the last parameter in the range is 'UVars2' + +template +using TwoOf = Pair; + +void templateAndAliasTemplate(Pair P, TwoOf I) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'templateAndAliasTemplate' of similar type ('Pair') +// CHECK-MESSAGES: :[[@LINE-2]]:46: note: the first parameter in the range is 'P' +// CHECK-MESSAGES: :[[@LINE-3]]:60: note: the last parameter in the range is 'I' + +template +struct Vector { + typedef T element_type; + typedef T &reference_type; + typedef const T const_element_type; + typedef const T &const_reference_type; +}; + +void memberTypedef(int I, Vector::element_type E) {} +// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: 2 adjacent parameters of 'memberTypedef' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:24: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:53: note: the last parameter in the range is 'E' +// CHECK-MESSAGES: :[[@LINE-4]]:20: note: after resolving type aliases, 'int' and 'Vector::element_type' are the same + +template +void memberTypedefDependent1(T T1, typename Vector::element_type T2) {} // NO-WARN: Dependent name is not instantiated and resolved against other type. + +template +void memberTypedefDependent2(typename Vector::element_type E1, + typename Vector::element_type E2) {} +// CHECK-MESSAGES: :[[@LINE-2]]:30: warning: 2 adjacent parameters of 'memberTypedefDependent2' of similar type ('typename Vector::element_type') +// CHECK-MESSAGES: :[[@LINE-3]]:63: note: the first parameter in the range is 'E1' +// CHECK-MESSAGES: :[[@LINE-3]]:63: note: the last parameter in the range is 'E2' + +template +void memberTypedefDependentReference1( + typename Vector::element_type E, + typename Vector::const_element_type &R) {} // NO-WARN: Not instantiated. + +template +void memberTypedefDependentReference2( + typename Vector::element_type E, + typename Vector::const_reference_type R) {} // NO-WARN: Not instantiated. + +template +void memberTypedefDependentReference3( + typename Vector::element_type E, + const typename Vector::element_type &R) {} +// CHECK-MESSAGES: :[[@LINE-2]]:5: warning: 2 adjacent parameters of 'memberTypedefDependentReference3' of similar type are +// CHECK-MESSAGES: :[[@LINE-3]]:38: note: the first parameter in the range is 'E' +// CHECK-MESSAGES: :[[@LINE-3]]:45: note: the last parameter in the range is 'R' +// CHECK-MESSAGES: :[[@LINE-4]]:5: note: 'typename Vector::element_type' and 'const typename Vector::element_type &' parameters accept and bind the same kind of values From 961e9e6af65ef097678c57fe5f1c18b825eb723f Mon Sep 17 00:00:00 2001 From: Whisperity Date: Wed, 20 Nov 2019 14:12:57 +0100 Subject: [PATCH 014/619] [clang-tidy] Extend 'bugprone-easily-swappable-parameters' with optionally considering differently qualified types mixable Adds a relaxation option QualifiersMix which will make the check report for cases where parameters refer to the same type if they only differ in qualifiers. This makes cases, such as the following, not warned about by default, produce a warning. void* memcpy(void* dst, const void* src, unsigned size) {} However, unless people meticulously const their local variables, unfortunately, even such a function carry a potential swap: T* obj = new T; // Not const!!! void* buf = malloc(sizeof(T)); memcpy(obj, buf, sizeof(T)); // ^~~ ^~~ accidental swap here, even though the interface "specified" a const. Reviewed By: aaron.ballman Differential Revision: http://reviews.llvm.org/D96355 --- .../EasilySwappableParametersCheck.cpp | 151 ++++++++++++++---- .../bugprone/EasilySwappableParametersCheck.h | 3 + .../bugprone-easily-swappable-parameters.rst | 28 ++++ ...one-easily-swappable-parameters-ignore.cpp | 3 +- ...prone-easily-swappable-parameters-len2.cpp | 72 ++++++--- ...prone-easily-swappable-parameters-len3.cpp | 3 +- ...y-swappable-parameters-qualifiermixing.cpp | 112 +++++++++++++ .../bugprone-easily-swappable-parameters.c | 3 +- 8 files changed, 320 insertions(+), 55 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index d9124e6c8361b..8266ee62ada64 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -59,6 +59,9 @@ static const std::string DefaultIgnoredParameterTypeSuffixes = "Constreverseiterator", "constreverseiterator"}); +/// The default value for the QualifiersMix check option. +static constexpr bool DefaultQualifiersMix = false; + using namespace clang::ast_matchers; namespace clang { @@ -84,8 +87,9 @@ enum class MixFlags : unsigned char { TypeAlias = 8, //< The path from one type to the other involves // desugaring type aliases. ReferenceBind = 16, //< The mix involves the binding power of "const &". + Qualifiers = 32, //< The mix involves change in the qualifiers. - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue =*/ReferenceBind) + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue =*/Qualifiers) }; LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); @@ -110,7 +114,7 @@ static inline std::string formatMixFlags(MixFlags F) { if (F == MixFlags::Invalid) return "#Inv!"; - SmallString<8> Str{"-----"}; + SmallString<8> Str{"------"}; if (hasFlag(F, MixFlags::None)) // Shows the None bit explicitly, as it can be applied in the recursion @@ -124,6 +128,8 @@ static inline std::string formatMixFlags(MixFlags F) { Str[3] = 't'; if (hasFlag(F, MixFlags::ReferenceBind)) Str[4] = '&'; + if (hasFlag(F, MixFlags::Qualifiers)) + Str[5] = 'Q'; return Str.str().str(); } @@ -169,13 +175,24 @@ struct MixData { Flags &= ~MixFlags::Trivial; } + /// Add the specified flag bits to the flags. MixData operator|(MixFlags EnableFlags) const { return {Flags | EnableFlags, CommonType}; } + + /// Add the specified flag bits to the flags. MixData &operator|=(MixFlags EnableFlags) { Flags |= EnableFlags; return *this; } + + /// Add the specified qualifiers to the common type in the Mix. + MixData qualify(Qualifiers Quals) const { + SplitQualType Split = CommonType.split(); + Split.Quals.addQualifiers(Quals); + + return {Flags, QualType(Split.Ty, Split.Quals.getAsOpaqueValue())}; + } }; /// A named tuple that contains the information for a mix between two concrete @@ -266,18 +283,6 @@ static MixData calculateMixability(const TheCheck &Check, const QualType LType, RType.getSingleStepDesugaredType(Ctx), Ctx); } - // Dissolve typedefs. - if (const auto *LTypedef = LType->getAs()) { - LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS is typedef.\n"); - return calculateMixability(Check, LTypedef->desugar(), RType, Ctx) | - MixFlags::TypeAlias; - } - if (const auto *RTypedef = RType->getAs()) { - LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. RHS is typedef.\n"); - return calculateMixability(Check, LType, RTypedef->desugar(), Ctx) | - MixFlags::TypeAlias; - } - // At a particular call site, what could be passed to a 'T' or 'const T' might // also be passed to a 'const T &' without the call site putting a direct // side effect on the passed expressions. @@ -292,6 +297,59 @@ static MixData calculateMixability(const TheCheck &Check, const QualType LType, MixFlags::ReferenceBind; } + // Dissolve typedefs after the qualifiers outside the typedef are dealt with. + if (LType->getAs()) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS is typedef.\n"); + return calculateMixability(Check, LType.getSingleStepDesugaredType(Ctx), + RType, Ctx) | + MixFlags::TypeAlias; + } + if (RType->getAs()) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. RHS is typedef.\n"); + return calculateMixability(Check, LType, + RType.getSingleStepDesugaredType(Ctx), Ctx) | + MixFlags::TypeAlias; + } + + // A parameter of type 'cvr1 T' and another of potentially differently + // qualified 'cvr2 T' may bind with the same power, if the user so requested. + if (LType.getLocalCVRQualifiers() != RType.getLocalCVRQualifiers()) { + LLVM_DEBUG(if (LType.getLocalCVRQualifiers()) llvm::dbgs() + << "--- calculateMixability. LHS is CVR.\n"); + LLVM_DEBUG(if (RType.getLocalCVRQualifiers()) llvm::dbgs() + << "--- calculateMixability. RHS is CVR.\n"); + + if (!Check.QualifiersMix) { + LLVM_DEBUG(llvm::dbgs() + << "<<< calculateMixability. QualifiersMix turned off.\n"); + return {MixFlags::None}; + } + + return calculateMixability(Check, LType.getLocalUnqualifiedType(), + RType.getLocalUnqualifiedType(), Ctx) | + MixFlags::Qualifiers; + } + if (LType.getLocalCVRQualifiers() == RType.getLocalCVRQualifiers() && + LType.getLocalCVRQualifiers() != 0) { + LLVM_DEBUG(llvm::dbgs() + << "--- calculateMixability. LHS and RHS same CVR.\n"); + // Apply the same qualifier back into the found common type if we found + // a common type between the unqualified versions. + return calculateMixability(Check, LType.getLocalUnqualifiedType(), + RType.getLocalUnqualifiedType(), Ctx) + .qualify(LType.getLocalQualifiers()); + } + + if (LType->isPointerType() && RType->isPointerType()) { + // If both types are pointers, and pointed to the exact same type, + // LType == RType took care of that. + // Try to see if the pointee type has some other match. + LLVM_DEBUG(llvm::dbgs() + << "--- calculateMixability. LHS and RHS are Ptrs.\n"); + return calculateMixability(Check, LType->getPointeeType(), + RType->getPointeeType(), Ctx); + } + // If none of the previous logic found a match, try if Clang otherwise // believes the types to be the same. if (LType.getCanonicalType() == RType.getCanonicalType()) { @@ -315,21 +373,44 @@ static MixData isLRefEquallyBindingToType(const TheCheck &Check, Ty.dump(llvm::dbgs(), Ctx); llvm::dbgs() << '\n';); QualType ReferredType = LRef->getPointeeType(); - if (!ReferredType.isLocalConstQualified()) { + if (!ReferredType.isLocalConstQualified() && + ReferredType->getAs()) { + LLVM_DEBUG( + llvm::dbgs() + << "--- isLRefEquallyBindingToType. Non-const LRef to Typedef.\n"); + ReferredType = ReferredType.getDesugaredType(Ctx); + if (!ReferredType.isLocalConstQualified()) { + LLVM_DEBUG(llvm::dbgs() + << "<<< isLRefEquallyBindingToType. Typedef is not const.\n"); + return {MixFlags::None}; + } + + LLVM_DEBUG(llvm::dbgs() << "--- isLRefEquallyBindingToType. Typedef is " + "const, considering as const LRef.\n"); + } else if (!ReferredType.isLocalConstQualified()) { LLVM_DEBUG(llvm::dbgs() - << "<<< isLRefEquallyBindingToType. Not const ref.\n"); + << "<<< isLRefEquallyBindingToType. Not const LRef.\n"); return {MixFlags::None}; }; - QualType NonConstReferredType = ReferredType; - NonConstReferredType.removeLocalConst(); - if (ReferredType == Ty || NonConstReferredType == Ty) { + assert(ReferredType.isLocalConstQualified() && + "Reaching this point means we are sure LRef is effectively a const&."); + + if (ReferredType == Ty) { LLVM_DEBUG( llvm::dbgs() << "<<< isLRefEquallyBindingToType. Type of referred matches.\n"); return {MixFlags::Trivial, ReferredType}; } + QualType NonConstReferredType = ReferredType; + NonConstReferredType.removeLocalConst(); + if (NonConstReferredType == Ty) { + LLVM_DEBUG(llvm::dbgs() << "<<< isLRefEquallyBindingToType. Type of " + "referred matches to non-const qualified.\n"); + return {MixFlags::Trivial, NonConstReferredType}; + } + LLVM_DEBUG( llvm::dbgs() << "--- isLRefEquallyBindingToType. Checking mix for underlying type.\n"); @@ -508,8 +589,10 @@ static SmallString<64> getNameOrUnnamed(const NamedDecl *ND) { /// Returns whether a particular Mix between two parameters should have the /// types involved diagnosed to the user. This is only a flag check. static inline bool needsToPrintTypeInDiagnostic(const model::Mix &M) { - return static_cast(M.flags() & (model::MixFlags::TypeAlias | - model::MixFlags::ReferenceBind)); + using namespace model; + return static_cast( + M.flags() & + (MixFlags::TypeAlias | MixFlags::ReferenceBind | MixFlags::Qualifiers)); } namespace { @@ -593,7 +676,8 @@ EasilySwappableParametersCheck::EasilySwappableParametersCheck( Options.get("IgnoredParameterNames", DefaultIgnoredParameterNames))), IgnoredParameterTypeSuffixes(optutils::parseStringList( Options.get("IgnoredParameterTypeSuffixes", - DefaultIgnoredParameterTypeSuffixes))) {} + DefaultIgnoredParameterTypeSuffixes))), + QualifiersMix(Options.get("QualifiersMix", DefaultQualifiersMix)) {} void EasilySwappableParametersCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { @@ -602,6 +686,7 @@ void EasilySwappableParametersCheck::storeOptions( optutils::serializeStringList(IgnoredParameterNames)); Options.store(Opts, "IgnoredParameterTypeSuffixes", optutils::serializeStringList(IgnoredParameterTypeSuffixes)); + Options.store(Opts, "QualifiersMix", QualifiersMix); } void EasilySwappableParametersCheck::registerMatchers(MatchFinder *Finder) { @@ -708,18 +793,21 @@ void EasilySwappableParametersCheck::check( QualType LType = LVar->getType(); QualType RType = RVar->getType(); QualType CommonType = M.commonUnderlyingType(); - std::string LTypeAsWritten = LType.getAsString(PP); - std::string RTypeAsWritten = RType.getAsString(PP); + std::string LTypeStr = LType.getAsString(PP); + std::string RTypeStr = RType.getAsString(PP); std::string CommonTypeStr = CommonType.getAsString(PP); if (hasFlag(M.flags(), MixFlags::TypeAlias) && UniqueTypeAlias(LType, RType, CommonType)) { StringRef DiagText; bool ExplicitlyPrintCommonType = false; - if (LTypeAsWritten == CommonTypeStr || - RTypeAsWritten == CommonTypeStr) - DiagText = - "after resolving type aliases, '%0' and '%1' are the same"; + if (LTypeStr == CommonTypeStr || RTypeStr == CommonTypeStr) + if (hasFlag(M.flags(), MixFlags::Qualifiers)) + DiagText = "after resolving type aliases, '%0' and '%1' share a " + "common type"; + else + DiagText = + "after resolving type aliases, '%0' and '%1' are the same"; else { DiagText = "after resolving type aliases, the common type of '%0' " "and '%1' is '%2'"; @@ -728,17 +816,18 @@ void EasilySwappableParametersCheck::check( auto Diag = diag(LVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) - << LTypeAsWritten << RTypeAsWritten; + << LTypeStr << RTypeStr; if (ExplicitlyPrintCommonType) Diag << CommonTypeStr; } - if (hasFlag(M.flags(), MixFlags::ReferenceBind) && + if ((hasFlag(M.flags(), MixFlags::ReferenceBind) || + hasFlag(M.flags(), MixFlags::Qualifiers)) && UniqueBindPower({LType, RType})) { StringRef DiagText = "'%0' and '%1' parameters accept and bind the " "same kind of values"; diag(RVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) - << LTypeAsWritten << RTypeAsWritten; + << LTypeStr << RTypeStr; } } } diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h index 6d236a205cef5..e3c58eb5d7013 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h @@ -38,6 +38,9 @@ class EasilySwappableParametersCheck : public ClangTidyCheck { /// The parameter typename suffixes (as written in the source code) to be /// ignored. const std::vector IgnoredParameterTypeSuffixes; + + /// Whether to consider an unqualified and a qualified type mixable. + const bool QualifiersMix; }; } // namespace bugprone diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst index 22224f39beab0..b9dafd3b32602 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst @@ -32,6 +32,34 @@ to strengthen the type safety of a project, no automatic fix-its are offered. Options ------- +Extension/relaxation options +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Relaxation (or extension) options can be used to broaden the scope of the +analysis and fine-tune the enabling of more mixes between types. +Some mixes may depend on coding style or preference specific to a project, +however, it should be noted that enabling *all* of these relaxations model the +way of mixing at call sites the most. +These options are expected to make the check report for more functions, and +report longer mixable ranges. + +.. option:: QualifiersMix + + Whether to consider parameters of some *cvr-qualified* ``T`` and a + differently *cvr-qualified* ``T`` (i.e. ``T`` and ``const T``, ``const T`` + and ``volatile T``, etc.) mixable between one another. + If `false`, the check will consider differently qualified types unmixable. + `True` turns the warnings on. + Defaults to `false`. + + The following example produces a diagnostic only if `QualifiersMix` is + enabled: + + .. code-block:: c++ + + void *memcpy(const void *Destination, void *Source, std::size_t N) {} + + Filtering options ^^^^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp index c04b1bab74e31..a61c666ac9682 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp @@ -2,7 +2,8 @@ // RUN: -config='{CheckOptions: [ \ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: "\"\";Foo;Bar"}, \ -// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "T"} \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "T"}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0} \ // RUN: ]}' -- void ignoredUnnamed(int I, int, int) {} // NO-WARN: No >= 2 length of non-unnamed. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp index 9de0787b971d1..f0c1c57e25848 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp @@ -2,7 +2,8 @@ // RUN: -config='{CheckOptions: [ \ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ -// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""} \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0} \ // RUN: ]}' -- namespace std { @@ -104,6 +105,14 @@ void differentPtrs(int *IP, long *LP) {} // NO-WARN: Not the same type. typedef int MyInt1; using MyInt2 = int; +typedef MyInt2 MyInt2b; + +using CInt = const int; +using CMyInt1 = const MyInt1; +using CMyInt2 = const MyInt2; + +typedef long MyLong1; +using MyLong2 = long; void typedefAndTypedef1(MyInt1 I1, MyInt1 I2) {} // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'typedefAndTypedef1' of similar type ('MyInt1') @@ -133,8 +142,6 @@ void betweenTypedef2(MyInt1 I, MyInt2 J) {} // CHECK-MESSAGES: :[[@LINE-3]]:39: note: the last parameter in the range is 'J' // CHECK-MESSAGES: :[[@LINE-4]]:22: note: after resolving type aliases, the common type of 'MyInt1' and 'MyInt2' is 'int' -typedef MyInt2 MyInt2b; - void typedefChain(int I, MyInt1 MI1, MyInt2 MI2, MyInt2b MI2b) {} // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 4 adjacent parameters of 'typedefChain' of similar type are // CHECK-MESSAGES: :[[@LINE-2]]:23: note: the first parameter in the range is 'I' @@ -143,22 +150,21 @@ void typedefChain(int I, MyInt1 MI1, MyInt2 MI2, MyInt2b MI2b) {} // CHECK-MESSAGES: :[[@LINE-5]]:19: note: after resolving type aliases, 'int' and 'MyInt2' are the same // CHECK-MESSAGES: :[[@LINE-6]]:19: note: after resolving type aliases, 'int' and 'MyInt2b' are the same -typedef long MyLong1; -using MyLong2 = long; - void throughTypedefToOtherType(MyInt1 I, MyLong1 J) {} // NO-WARN: int and long. -void qualified1(int I, const int CI) {} // NO-WARN: Not the same type. +void qualified1(int I, const int CI) {} // NO-WARN: Different qualifiers. -void qualified2(int I, volatile int VI) {} // NO-WARN: Not the same type. +void qualified2(int I, volatile int VI) {} // NO-WARN: Different qualifiers. -void qualified3(int *IP, const int *CIP) {} // NO-WARN: Not the same type. +void qualified3(int *IP, const int *CIP) {} // NO-WARN: Different qualifiers. void qualified4(const int CI, const long CL) {} // NO-WARN: Not the same type. -using CInt = const int; +void qualifiedPtr1(int *IP, int *const IPC) {} // NO-WARN: Different qualifiers. + +void qualifiedTypeAndQualifiedPtr1(const int *CIP, int *const volatile IPCV) {} // NO-WARN: Not the same type. -void qualifiedThroughTypedef1(int I, CInt CI) {} // NO-WARN: Not the same type. +void qualifiedThroughTypedef1(int I, CInt CI) {} // NO-WARN: Different qualifiers. void qualifiedThroughTypedef2(CInt CI1, const int CI2) {} // CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef2' of similar type are @@ -166,13 +172,32 @@ void qualifiedThroughTypedef2(CInt CI1, const int CI2) {} // CHECK-MESSAGES: :[[@LINE-3]]:51: note: the last parameter in the range is 'CI2' // CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, 'CInt' and 'const int' are the same -void qualifiedThroughTypedef3(CInt CI1, const MyInt1 CI2, const int CI3) {} // NO-WARN: Not the same type. +void qualifiedThroughTypedef3(CInt CI1, const MyInt1 CI2, const int CI3) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 3 adjacent parameters of 'qualifiedThroughTypedef3' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'CI1' +// CHECK-MESSAGES: :[[@LINE-3]]:69: note: the last parameter in the range is 'CI3' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, the common type of 'CInt' and 'const MyInt1' is 'const int' +// CHECK-MESSAGES: :[[@LINE-5]]:31: note: after resolving type aliases, 'CInt' and 'const int' are the same +// CHECK-MESSAGES: :[[@LINE-6]]:41: note: after resolving type aliases, 'const MyInt1' and 'const int' are the same void qualifiedThroughTypedef4(CInt CI1, const MyInt1 CI2, const MyInt2 CI3) {} -// CHECK-MESSAGES: :[[@LINE-1]]:41: warning: 2 adjacent parameters of 'qualifiedThroughTypedef4' of similar type are -// CHECK-MESSAGES: :[[@LINE-2]]:54: note: the first parameter in the range is 'CI2' +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 3 adjacent parameters of 'qualifiedThroughTypedef4' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'CI1' // CHECK-MESSAGES: :[[@LINE-3]]:72: note: the last parameter in the range is 'CI3' -// CHECK-MESSAGES: :[[@LINE-4]]:41: note: after resolving type aliases, the common type of 'const MyInt1' and 'const MyInt2' is 'int' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, the common type of 'CInt' and 'const MyInt1' is 'const int' +// CHECK-MESSAGES: :[[@LINE-5]]:31: note: after resolving type aliases, the common type of 'CInt' and 'const MyInt2' is 'const int' +// CHECK-MESSAGES: :[[@LINE-6]]:41: note: after resolving type aliases, the common type of 'const MyInt1' and 'const MyInt2' is 'const int' + +void qualifiedThroughTypedef5(CMyInt1 CMI1, CMyInt2 CMI2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef5' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:39: note: the first parameter in the range is 'CMI1' +// CHECK-MESSAGES: :[[@LINE-3]]:53: note: the last parameter in the range is 'CMI2' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, the common type of 'CMyInt1' and 'CMyInt2' is 'const int' + +void qualifiedThroughTypedef6(CMyInt1 CMI1, int I) {} // NO-WARN: Different qualifiers. + +template +void copy(const T *Dest, T *Source) {} // NO-WARN: Different qualifiers. void reference1(int I, int &IR) {} // NO-WARN: Distinct semantics when called. @@ -201,16 +226,21 @@ void reference6(int I, const int &CIR, int J, const int &CJR) {} using ICRTy = const int &; using MyIntCRTy = const MyInt1 &; +void referenceToTypedef1(CInt &CIR, int I) {} +// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: 2 adjacent parameters of 'referenceToTypedef1' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'CIR' +// CHECK-MESSAGES: :[[@LINE-3]]:41: note: the last parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-4]]:37: note: 'CInt &' and 'int' parameters accept and bind the same kind of values + void referenceThroughTypedef(int I, ICRTy Builtin, MyIntCRTy MyInt) {} // CHECK-MESSAGES: :[[@LINE-1]]:30: warning: 3 adjacent parameters of 'referenceThroughTypedef' of similar type are // CHECK-MESSAGES: :[[@LINE-2]]:34: note: the first parameter in the range is 'I' // CHECK-MESSAGES: :[[@LINE-3]]:62: note: the last parameter in the range is 'MyInt' -// CHECK-MESSAGES: :[[@LINE-4]]:30: note: after resolving type aliases, the common type of 'int' and 'ICRTy' is 'const int' -// CHECK-MESSAGES: :[[@LINE-5]]:37: note: 'int' and 'ICRTy' parameters accept and bind the same kind of values -// CHECK-MESSAGES: :[[@LINE-6]]:30: note: after resolving type aliases, 'int' and 'MyIntCRTy' are the same -// CHECK-MESSAGES: :[[@LINE-7]]:52: note: 'int' and 'MyIntCRTy' parameters accept and bind the same kind of values -// CHECK-MESSAGES: :[[@LINE-8]]:37: note: after resolving type aliases, the common type of 'ICRTy' and 'MyIntCRTy' is 'int' -// CHECK-MESSAGES: :[[@LINE-9]]:52: note: 'ICRTy' and 'MyIntCRTy' parameters accept and bind the same kind of values +// CHECK-MESSAGES: :[[@LINE-4]]:37: note: 'int' and 'ICRTy' parameters accept and bind the same kind of values +// CHECK-MESSAGES: :[[@LINE-5]]:30: note: after resolving type aliases, 'int' and 'MyIntCRTy' are the same +// CHECK-MESSAGES: :[[@LINE-6]]:52: note: 'int' and 'MyIntCRTy' parameters accept and bind the same kind of values +// CHECK-MESSAGES: :[[@LINE-7]]:37: note: after resolving type aliases, the common type of 'ICRTy' and 'MyIntCRTy' is 'int' +// CHECK-MESSAGES: :[[@LINE-8]]:52: note: 'ICRTy' and 'MyIntCRTy' parameters accept and bind the same kind of values short const typedef int unsigned Eldritch; typedef const unsigned short Holy; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp index 10f8841368dfa..c833077285be9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp @@ -2,7 +2,8 @@ // RUN: -config='{CheckOptions: [ \ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 3}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ -// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""} \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0} \ // RUN: ]}' -- int add(int Left, int Right) { return Left + Right; } // NO-WARN: Only 2 parameters. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp new file mode 100644 index 0000000000000..7b9fdceda7465 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp @@ -0,0 +1,112 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 1} \ +// RUN: ]}' -- + +typedef int MyInt1; +typedef int MyInt2; +using CInt = const int; +using CMyInt1 = const MyInt1; +using CMyInt2 = const MyInt2; + +void qualified1(int I, const int CI) {} +// CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 2 adjacent parameters of 'qualified1' of similar type are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:21: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:34: note: the last parameter in the range is 'CI' +// CHECK-MESSAGES: :[[@LINE-4]]:24: note: 'int' and 'const int' parameters accept and bind the same kind of values + +void qualified2(int I, volatile int VI) {} +// CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 2 adjacent parameters of 'qualified2' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:21: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:37: note: the last parameter in the range is 'VI' +// CHECK-MESSAGES: :[[@LINE-4]]:24: note: 'int' and 'volatile int' parameters accept and bind the same kind of values + +void qualified3(int I, const volatile int CVI) {} +// CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 2 adjacent parameters of 'qualified3' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:21: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:43: note: the last parameter in the range is 'CVI' +// CHECK-MESSAGES: :[[@LINE-4]]:24: note: 'int' and 'const volatile int' parameters accept and bind the same kind of values + +void qualified4(int *IP, const int *CIP) {} +// CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 2 adjacent parameters of 'qualified4' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:22: note: the first parameter in the range is 'IP' +// CHECK-MESSAGES: :[[@LINE-3]]:37: note: the last parameter in the range is 'CIP' +// CHECK-MESSAGES: :[[@LINE-4]]:26: note: 'int *' and 'const int *' parameters accept and bind the same kind of values + +void qualified5(const int CI, const long CL) {} // NO-WARN: Not the same type + +void qualifiedPtr1(int *IP, int *const IPC) {} +// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: 2 adjacent parameters of 'qualifiedPtr1' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:25: note: the first parameter in the range is 'IP' +// CHECK-MESSAGES: :[[@LINE-3]]:40: note: the last parameter in the range is 'IPC' +// CHECK-MESSAGES: :[[@LINE-4]]:29: note: 'int *' and 'int *const' parameters accept and bind the same kind of values + +void qualifiedPtr2(int *IP, int *volatile IPV) {} +// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: 2 adjacent parameters of 'qualifiedPtr2' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:25: note: the first parameter in the range is 'IP' +// CHECK-MESSAGES: :[[@LINE-3]]:43: note: the last parameter in the range is 'IPV' +// CHECK-MESSAGES: :[[@LINE-4]]:29: note: 'int *' and 'int *volatile' parameters accept and bind the same kind of values + +void qualifiedTypeAndQualifiedPtr1(const int *CIP, int *const volatile IPCV) {} +// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: 2 adjacent parameters of 'qualifiedTypeAndQualifiedPtr1' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:47: note: the first parameter in the range is 'CIP' +// CHECK-MESSAGES: :[[@LINE-3]]:72: note: the last parameter in the range is 'IPCV' +// CHECK-MESSAGES: :[[@LINE-4]]:52: note: 'const int *' and 'int *const volatile' parameters accept and bind the same kind of values + +void qualifiedThroughTypedef1(int I, CInt CI) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef1' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:35: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:43: note: the last parameter in the range is 'CI' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, 'int' and 'CInt' share a common type +// CHECK-MESSAGES: :[[@LINE-5]]:38: note: 'int' and 'CInt' parameters accept and bind the same kind of values + +void qualifiedThroughTypedef2(CInt CI1, const int CI2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef2' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'CI1' +// CHECK-MESSAGES: :[[@LINE-3]]:51: note: the last parameter in the range is 'CI2' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, 'CInt' and 'const int' are the same + +void qualifiedThroughTypedef3(CInt CI1, const MyInt1 CI2, const int CI3) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 3 adjacent parameters of 'qualifiedThroughTypedef3' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'CI1' +// CHECK-MESSAGES: :[[@LINE-3]]:69: note: the last parameter in the range is 'CI3' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, the common type of 'CInt' and 'const MyInt1' is 'const int' +// CHECK-MESSAGES: :[[@LINE-5]]:31: note: after resolving type aliases, 'CInt' and 'const int' are the same +// CHECK-MESSAGES: :[[@LINE-6]]:41: note: after resolving type aliases, 'const MyInt1' and 'const int' are the same + +void qualifiedThroughTypedef4(CInt CI1, const MyInt1 CI2, const MyInt2 CI3) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 3 adjacent parameters of 'qualifiedThroughTypedef4' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'CI1' +// CHECK-MESSAGES: :[[@LINE-3]]:72: note: the last parameter in the range is 'CI3' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, the common type of 'CInt' and 'const MyInt1' is 'const int' +// CHECK-MESSAGES: :[[@LINE-5]]:31: note: after resolving type aliases, the common type of 'CInt' and 'const MyInt2' is 'const int' +// CHECK-MESSAGES: :[[@LINE-6]]:41: note: after resolving type aliases, the common type of 'const MyInt1' and 'const MyInt2' is 'const int' + +void qualifiedThroughTypedef5(CMyInt1 CMI1, CMyInt2 CMI2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef5' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:39: note: the first parameter in the range is 'CMI1' +// CHECK-MESSAGES: :[[@LINE-3]]:53: note: the last parameter in the range is 'CMI2' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, the common type of 'CMyInt1' and 'CMyInt2' is 'const int' + +void qualifiedThroughTypedef6(CMyInt1 CMI1, int I) {} +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: 2 adjacent parameters of 'qualifiedThroughTypedef6' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:39: note: the first parameter in the range is 'CMI1' +// CHECK-MESSAGES: :[[@LINE-3]]:49: note: the last parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-4]]:31: note: after resolving type aliases, 'CMyInt1' and 'int' share a common type +// CHECK-MESSAGES: :[[@LINE-5]]:45: note: 'CMyInt1' and 'int' parameters accept and bind the same kind of values + +void referenceToTypedef1(CInt &CIR, int I) {} +// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: 2 adjacent parameters of 'referenceToTypedef1' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'CIR' +// CHECK-MESSAGES: :[[@LINE-3]]:41: note: the last parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-4]]:37: note: 'CInt &' and 'int' parameters accept and bind the same kind of values + +template +void copy(const T *Dest, T *Source) {} +// CHECK-MESSAGES: :[[@LINE-1]]:11: warning: 2 adjacent parameters of 'copy' of similar type are +// CHECK-MESSAGES: :[[@LINE-2]]:20: note: the first parameter in the range is 'Dest' +// CHECK-MESSAGES: :[[@LINE-3]]:29: note: the last parameter in the range is 'Source' +// CHECK-MESSAGES: :[[@LINE-4]]:26: note: 'const T *' and 'T *' parameters accept and bind the same kind of values diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c index f52652e38636b..591a5cb353ee8 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c @@ -2,7 +2,8 @@ // RUN: -config='{CheckOptions: [ \ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ -// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "bool;MyBool;struct U;MAKE_LOGICAL_TYPE(int)"} \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "bool;MyBool;struct U;MAKE_LOGICAL_TYPE(int)"}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0} \ // RUN: ]}' -- -x c #define bool _Bool From e33d0478831e4a295cb136ce1f58587155309fa2 Mon Sep 17 00:00:00 2001 From: Whisperity Date: Tue, 17 Dec 2019 18:00:08 +0100 Subject: [PATCH 015/619] [clang-tidy] Extend 'bugprone-easily-swappable-parameters' with mixability because of implicit conversions Adds a relaxation option ModelImplicitConversions which will make the check report for cases where parameters refer to types that are implicitly convertible to one another. Example: struct IntBox { IntBox(int); operator int(); }; void foo(int i, double d, IntBox ib) {} Implicit conversions are the last to model in the set of things that are reasons for the possibility of a function being called the wrong way which is not always immediately apparent when looking at the function (signature or call). Reviewed By: aaron.ballman, martong Differential Revision: http://reviews.llvm.org/D75041 --- .../EasilySwappableParametersCheck.cpp | 1158 +++++++++++++++-- .../bugprone/EasilySwappableParametersCheck.h | 8 +- .../bugprone-easily-swappable-parameters.rst | 71 +- ...one-easily-swappable-parameters-ignore.cpp | 3 +- ...appable-parameters-implicit-qualifiers.cpp | 15 + ...ne-easily-swappable-parameters-implicits.c | 75 ++ ...-easily-swappable-parameters-implicits.cpp | 303 +++++ ...prone-easily-swappable-parameters-len2.cpp | 6 +- ...prone-easily-swappable-parameters-len3.cpp | 3 +- ...y-swappable-parameters-qualifiermixing.cpp | 3 +- .../bugprone-easily-swappable-parameters.c | 3 +- 11 files changed, 1544 insertions(+), 104 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index 8266ee62ada64..c4896979d2e99 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -62,6 +62,9 @@ static const std::string DefaultIgnoredParameterTypeSuffixes = /// The default value for the QualifiersMix check option. static constexpr bool DefaultQualifiersMix = false; +/// The default value for the ModelImplicitConversions check option. +static constexpr bool DefaultModelImplicitConversions = true; + using namespace clang::ast_matchers; namespace clang { @@ -80,16 +83,24 @@ namespace model { enum class MixFlags : unsigned char { Invalid = 0, //< Sentinel bit pattern. DO NOT USE! - None = 1, //< Mix between the two parameters is not possible. - Trivial = 2, //< The two mix trivially, and are the exact same type. - Canonical = 4, //< The two mix because the types refer to the same + //< Certain constructs (such as pointers to noexcept/non-noexcept functions) + // have the same CanonicalType, which would result in false positives. + // During the recursive modelling call, this flag is set if a later diagnosed + // canonical type equivalence should be thrown away. + WorkaroundDisableCanonicalEquivalence = 1, + + None = 2, //< Mix between the two parameters is not possible. + Trivial = 4, //< The two mix trivially, and are the exact same type. + Canonical = 8, //< The two mix because the types refer to the same // CanonicalType, but we do not elaborate as to how. - TypeAlias = 8, //< The path from one type to the other involves + TypeAlias = 16, //< The path from one type to the other involves // desugaring type aliases. - ReferenceBind = 16, //< The mix involves the binding power of "const &". - Qualifiers = 32, //< The mix involves change in the qualifiers. + ReferenceBind = 32, //< The mix involves the binding power of "const &". + Qualifiers = 64, //< The mix involves change in the qualifiers. + ImplicitConversion = 128, //< The mixing of the parameters is possible + // through implicit conversions between the types. - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue =*/Qualifiers) + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue =*/ImplicitConversion) }; LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); @@ -114,7 +125,7 @@ static inline std::string formatMixFlags(MixFlags F) { if (F == MixFlags::Invalid) return "#Inv!"; - SmallString<8> Str{"------"}; + SmallString<8> Str{"-------"}; if (hasFlag(F, MixFlags::None)) // Shows the None bit explicitly, as it can be applied in the recursion @@ -130,6 +141,11 @@ static inline std::string formatMixFlags(MixFlags F) { Str[4] = '&'; if (hasFlag(F, MixFlags::Qualifiers)) Str[5] = 'Q'; + if (hasFlag(F, MixFlags::ImplicitConversion)) + Str[6] = 'i'; + + if (hasFlag(F, MixFlags::WorkaroundDisableCanonicalEquivalence)) + Str.append("(~C)"); return Str.str().str(); } @@ -140,23 +156,254 @@ static inline std::string formatMixFlags(MixFlags F); #endif // NDEBUG +/// The results of the steps of an Implicit Conversion Sequence is saved in +/// an instance of this record. +/// +/// A ConversionSequence maps the steps of the conversion with a member for +/// each type involved in the conversion. Imagine going from a hypothetical +/// Complex class to projecting it to the real part as a const double. +/// +/// I.e., given: +/// +/// struct Complex { +/// operator double() const; +/// }; +/// +/// void functionBeingAnalysed(Complex C, const double R); +/// +/// we will get the following sequence: +/// +/// (Begin=) Complex +/// +/// The first standard conversion is a qualification adjustment. +/// (AfterFirstStandard=) const Complex +/// +/// Then the user-defined conversion is executed. +/// (UDConvOp.ConversionOperatorResultType=) double +/// +/// Then this 'double' is qualifier-adjusted to 'const double'. +/// (AfterSecondStandard=) double +/// +/// The conversion's result has now been calculated, so it ends here. +/// (End=) double. +/// +/// Explicit storing of Begin and End in this record is needed, because +/// getting to what Begin and End here are needs further resolution of types, +/// e.g. in the case of typedefs: +/// +/// using Comp = Complex; +/// using CD = const double; +/// void functionBeingAnalysed2(Comp C, CD R); +/// +/// In this case, the user will be diagnosed with a potential conversion +/// between the two typedefs as written in the code, but to elaborate the +/// reasoning behind this conversion, we also need to show what the typedefs +/// mean. See FormattedConversionSequence towards the bottom of this file! +struct ConversionSequence { + enum UserDefinedConversionKind { UDCK_None, UDCK_Ctor, UDCK_Oper }; + + struct UserDefinedConvertingConstructor { + const CXXConstructorDecl *Fun; + QualType ConstructorParameterType; + QualType UserDefinedType; + }; + + struct UserDefinedConversionOperator { + const CXXConversionDecl *Fun; + QualType UserDefinedType; + QualType ConversionOperatorResultType; + }; + + /// The type the conversion stared from. + QualType Begin; + + /// The intermediate type after the first Standard Conversion Sequence. + QualType AfterFirstStandard; + + /// The details of the user-defined conversion involved, as a tagged union. + union { + char None; + UserDefinedConvertingConstructor UDConvCtor; + UserDefinedConversionOperator UDConvOp; + }; + UserDefinedConversionKind UDConvKind; + + /// The intermediate type after performing the second Standard Conversion + /// Sequence. + QualType AfterSecondStandard; + + /// The result type the conversion targeted. + QualType End; + + ConversionSequence() : None(0), UDConvKind(UDCK_None) {} + ConversionSequence(QualType From, QualType To) + : Begin(From), None(0), UDConvKind(UDCK_None), End(To) {} + + explicit operator bool() const { + return !AfterFirstStandard.isNull() || UDConvKind != UDCK_None || + !AfterSecondStandard.isNull(); + } + + /// Returns all the "steps" (non-unique and non-similar) types involved in + /// the conversion sequence. This method does **NOT** return Begin and End. + SmallVector getInvolvedTypesInSequence() const { + SmallVector Ret; + auto EmplaceIfDifferent = [&Ret](QualType QT) { + if (QT.isNull()) + return; + if (Ret.empty()) + Ret.emplace_back(QT); + else if (Ret.back() != QT) + Ret.emplace_back(QT); + }; + + EmplaceIfDifferent(AfterFirstStandard); + switch (UDConvKind) { + case UDCK_Ctor: + EmplaceIfDifferent(UDConvCtor.ConstructorParameterType); + EmplaceIfDifferent(UDConvCtor.UserDefinedType); + break; + case UDCK_Oper: + EmplaceIfDifferent(UDConvOp.UserDefinedType); + EmplaceIfDifferent(UDConvOp.ConversionOperatorResultType); + break; + case UDCK_None: + break; + } + EmplaceIfDifferent(AfterSecondStandard); + + return Ret; + } + + /// Updates the steps of the conversion sequence with the steps from the + /// other instance. + /// + /// \note This method does not check if the resulting conversion sequence is + /// sensible! + ConversionSequence &update(const ConversionSequence &RHS) { + if (!RHS.AfterFirstStandard.isNull()) + AfterFirstStandard = RHS.AfterFirstStandard; + switch (RHS.UDConvKind) { + case UDCK_Ctor: + UDConvKind = UDCK_Ctor; + UDConvCtor = RHS.UDConvCtor; + break; + case UDCK_Oper: + UDConvKind = UDCK_Oper; + UDConvOp = RHS.UDConvOp; + break; + case UDCK_None: + break; + } + if (!RHS.AfterSecondStandard.isNull()) + AfterSecondStandard = RHS.AfterSecondStandard; + + return *this; + } + + /// Sets the user-defined conversion to the given constructor. + void setConversion(const UserDefinedConvertingConstructor &UDCC) { + UDConvKind = UDCK_Ctor; + UDConvCtor = UDCC; + } + + /// Sets the user-defined conversion to the given operator. + void setConversion(const UserDefinedConversionOperator &UDCO) { + UDConvKind = UDCK_Oper; + UDConvOp = UDCO; + } + + /// Returns the type in the conversion that's formally "in our hands" once + /// the user-defined conversion is executed. + QualType getTypeAfterUserDefinedConversion() const { + switch (UDConvKind) { + case UDCK_Ctor: + return UDConvCtor.UserDefinedType; + case UDCK_Oper: + return UDConvOp.ConversionOperatorResultType; + case UDCK_None: + return {}; + } + llvm_unreachable("Invalid UDConv kind."); + } + + const CXXMethodDecl *getUserDefinedConversionFunction() const { + switch (UDConvKind) { + case UDCK_Ctor: + return UDConvCtor.Fun; + case UDCK_Oper: + return UDConvOp.Fun; + case UDCK_None: + return {}; + } + llvm_unreachable("Invalid UDConv kind."); + } + + /// Returns the SourceRange in the text that corresponds to the interesting + /// part of the user-defined conversion. This is either the parameter type + /// in a converting constructor, or the conversion result type in a conversion + /// operator. + SourceRange getUserDefinedConversionHighlight() const { + switch (UDConvKind) { + case UDCK_Ctor: + return UDConvCtor.Fun->getParamDecl(0)->getSourceRange(); + case UDCK_Oper: + // getReturnTypeSourceRange() does not work for CXXConversionDecls as the + // returned type is physically behind the declaration's name ("operator"). + if (const FunctionTypeLoc FTL = UDConvOp.Fun->getFunctionTypeLoc()) + if (const TypeLoc RetLoc = FTL.getReturnLoc()) + return RetLoc.getSourceRange(); + return {}; + case UDCK_None: + return {}; + } + llvm_unreachable("Invalid UDConv kind."); + } +}; + /// Contains the metadata for the mixability result between two types, /// independently of which parameters they were calculated from. struct MixData { /// The flag bits of the mix indicating what language features allow for it. - MixFlags Flags; + MixFlags Flags = MixFlags::Invalid; /// A potentially calculated common underlying type after desugaring, that /// both sides of the mix can originate from. QualType CommonType; + /// The steps an implicit conversion performs to get from one type to the + /// other. + ConversionSequence Conversion, ConversionRTL; + + /// True if the MixData was specifically created with only a one-way + /// conversion modelled. + bool CreatedFromOneWayConversion = false; + MixData(MixFlags Flags) : Flags(Flags) {} MixData(MixFlags Flags, QualType CommonType) : Flags(Flags), CommonType(CommonType) {} + MixData(MixFlags Flags, ConversionSequence Conv) + : Flags(Flags), Conversion(Conv), CreatedFromOneWayConversion(true) {} + MixData(MixFlags Flags, ConversionSequence LTR, ConversionSequence RTL) + : Flags(Flags), Conversion(LTR), ConversionRTL(RTL) {} + MixData(MixFlags Flags, QualType CommonType, ConversionSequence LTR, + ConversionSequence RTL) + : Flags(Flags), CommonType(CommonType), Conversion(LTR), + ConversionRTL(RTL) {} void sanitize() { assert(Flags != MixFlags::Invalid && "sanitize() called on invalid bitvec"); + MixFlags CanonicalAndWorkaround = + MixFlags::Canonical | MixFlags::WorkaroundDisableCanonicalEquivalence; + if ((Flags & CanonicalAndWorkaround) == CanonicalAndWorkaround) { + // A workaround for too eagerly equivalent canonical types was requested, + // and a canonical equivalence was proven. Fulfill the request and throw + // this result away. + Flags = MixFlags::None; + return; + } + if (hasFlag(Flags, MixFlags::None)) { // If anywhere down the recursion a potential mix "path" is deemed // impossible, throw away all the other bits because the mix is not @@ -173,11 +420,34 @@ struct MixData { // recursion other bit(s) were set, remove the trivial bit, as it is not // trivial. Flags &= ~MixFlags::Trivial; + + bool ShouldHaveImplicitConvFlag = false; + if (CreatedFromOneWayConversion && Conversion) + ShouldHaveImplicitConvFlag = true; + else if (!CreatedFromOneWayConversion && Conversion && ConversionRTL) + // Only say that we have implicit conversion mix possibility if it is + // bidirectional. Otherwise, the compiler would report an *actual* swap + // at a call site... + ShouldHaveImplicitConvFlag = true; + + if (ShouldHaveImplicitConvFlag) + Flags |= MixFlags::ImplicitConversion; + else + Flags &= ~MixFlags::ImplicitConversion; } + bool isValid() const { return Flags >= MixFlags::None; } + + bool indicatesMixability() const { return Flags > MixFlags::None; } + /// Add the specified flag bits to the flags. MixData operator|(MixFlags EnableFlags) const { - return {Flags | EnableFlags, CommonType}; + if (CreatedFromOneWayConversion) { + MixData M{Flags | EnableFlags, Conversion}; + M.CommonType = CommonType; + return M; + } + return {Flags | EnableFlags, CommonType, Conversion, ConversionRTL}; } /// Add the specified flag bits to the flags. @@ -190,8 +460,14 @@ struct MixData { MixData qualify(Qualifiers Quals) const { SplitQualType Split = CommonType.split(); Split.Quals.addQualifiers(Quals); + QualType CommonType{Split.Ty, Split.Quals.getAsOpaqueValue()}; - return {Flags, QualType(Split.Ty, Split.Quals.getAsOpaqueValue())}; + if (CreatedFromOneWayConversion) { + MixData M{Flags, Conversion}; + M.CommonType = CommonType; + return M; + } + return {Flags, CommonType, Conversion, ConversionRTL}; } }; @@ -206,7 +482,15 @@ struct Mix { void sanitize() { Data.sanitize(); } MixFlags flags() const { return Data.Flags; } + bool flagsValid() const { return Data.isValid(); } + bool mixable() const { return Data.indicatesMixability(); } QualType commonUnderlyingType() const { return Data.CommonType; } + const ConversionSequence &leftToRightConversionSequence() const { + return Data.Conversion; + } + const ConversionSequence &rightToLeftConversionSequence() const { + return Data.ConversionRTL; + } }; // NOLINTNEXTLINE(misc-redundant-expression): Seems to be a bogus warning. @@ -243,10 +527,34 @@ struct MixableParameterRange { } }; -static MixData isLRefEquallyBindingToType(const TheCheck &Check, - const LValueReferenceType *LRef, - QualType Ty, const ASTContext &Ctx, - bool IsRefRHS); +/// Helper enum for the recursive calls in the modelling that toggle what kinds +/// of implicit conversions are to be modelled. +enum ImplicitConversionModellingMode : unsigned char { + //< No implicit conversions are modelled. + ICMM_None, + + //< The full implicit conversion sequence is modelled. + ICMM_All, + + //< Only model a unidirectional implicit conversion and within it only one + // standard conversion sequence. + ICMM_OneWaySingleStandardOnly +}; + +static MixData +isLRefEquallyBindingToType(const TheCheck &Check, + const LValueReferenceType *LRef, QualType Ty, + const ASTContext &Ctx, bool IsRefRHS, + ImplicitConversionModellingMode ImplicitMode); + +static MixData +approximateImplicitConversion(const TheCheck &Check, QualType LType, + QualType RType, const ASTContext &Ctx, + ImplicitConversionModellingMode ImplicitMode); + +static inline bool isUselessSugar(const Type *T) { + return isa(T); +} /// Approximate the way how LType and RType might refer to "essentially the /// same" type, in a sense that at a particular call site, an expression of @@ -257,13 +565,19 @@ static MixData isLRefEquallyBindingToType(const TheCheck &Check, /// The returned data structure is not guaranteed to be properly set, as this /// function is potentially recursive. It is the caller's responsibility to /// call sanitize() on the result once the recursion is over. -static MixData calculateMixability(const TheCheck &Check, const QualType LType, - const QualType RType, - const ASTContext &Ctx) { +static MixData +calculateMixability(const TheCheck &Check, QualType LType, QualType RType, + const ASTContext &Ctx, + ImplicitConversionModellingMode ImplicitMode) { LLVM_DEBUG(llvm::dbgs() << ">>> calculateMixability for LType:\n"; LType.dump(llvm::dbgs(), Ctx); llvm::dbgs() << "\nand RType:\n"; RType.dump(llvm::dbgs(), Ctx); llvm::dbgs() << '\n';); + // Certain constructs match on the last catch-all getCanonicalType() equality, + // which is perhaps something not what we want. If this variable is true, + // the canonical type equality will be ignored. + bool RecursiveReturnDiscardingCanonicalType = false; + if (LType == RType) { LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. Trivial equality.\n"); return {MixFlags::Trivial, LType}; @@ -272,15 +586,17 @@ static MixData calculateMixability(const TheCheck &Check, const QualType LType, // Dissolve certain type sugars that do not affect the mixability of one type // with the other, and also do not require any sort of elaboration for the // user to understand. - if (isa(LType.getTypePtr())) { - LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS is ParenType.\n"); + if (isUselessSugar(LType.getTypePtr())) { + LLVM_DEBUG(llvm::dbgs() + << "--- calculateMixability. LHS is useless sugar.\n"); return calculateMixability(Check, LType.getSingleStepDesugaredType(Ctx), - RType, Ctx); + RType, Ctx, ImplicitMode); } - if (isa(RType.getTypePtr())) { - LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. RHS is ParenType.\n"); - return calculateMixability(Check, LType, - RType.getSingleStepDesugaredType(Ctx), Ctx); + if (isUselessSugar(RType.getTypePtr())) { + LLVM_DEBUG(llvm::dbgs() + << "--- calculateMixability. RHS is useless sugar.\n"); + return calculateMixability( + Check, LType, RType.getSingleStepDesugaredType(Ctx), Ctx, ImplicitMode); } // At a particular call site, what could be passed to a 'T' or 'const T' might @@ -288,12 +604,14 @@ static MixData calculateMixability(const TheCheck &Check, const QualType LType, // side effect on the passed expressions. if (const auto *LRef = LType->getAs()) { LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS is &.\n"); - return isLRefEquallyBindingToType(Check, LRef, RType, Ctx, false) | + return isLRefEquallyBindingToType(Check, LRef, RType, Ctx, false, + ImplicitMode) | MixFlags::ReferenceBind; } if (const auto *RRef = RType->getAs()) { LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. RHS is &.\n"); - return isLRefEquallyBindingToType(Check, RRef, LType, Ctx, true) | + return isLRefEquallyBindingToType(Check, RRef, LType, Ctx, true, + ImplicitMode) | MixFlags::ReferenceBind; } @@ -301,13 +619,14 @@ static MixData calculateMixability(const TheCheck &Check, const QualType LType, if (LType->getAs()) { LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS is typedef.\n"); return calculateMixability(Check, LType.getSingleStepDesugaredType(Ctx), - RType, Ctx) | + RType, Ctx, ImplicitMode) | MixFlags::TypeAlias; } if (RType->getAs()) { LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. RHS is typedef.\n"); return calculateMixability(Check, LType, - RType.getSingleStepDesugaredType(Ctx), Ctx) | + RType.getSingleStepDesugaredType(Ctx), Ctx, + ImplicitMode) | MixFlags::TypeAlias; } @@ -326,7 +645,8 @@ static MixData calculateMixability(const TheCheck &Check, const QualType LType, } return calculateMixability(Check, LType.getLocalUnqualifiedType(), - RType.getLocalUnqualifiedType(), Ctx) | + RType.getLocalUnqualifiedType(), Ctx, + ImplicitMode) | MixFlags::Qualifiers; } if (LType.getLocalCVRQualifiers() == RType.getLocalCVRQualifiers() && @@ -336,38 +656,113 @@ static MixData calculateMixability(const TheCheck &Check, const QualType LType, // Apply the same qualifier back into the found common type if we found // a common type between the unqualified versions. return calculateMixability(Check, LType.getLocalUnqualifiedType(), - RType.getLocalUnqualifiedType(), Ctx) + RType.getLocalUnqualifiedType(), Ctx, + ImplicitMode) .qualify(LType.getLocalQualifiers()); } if (LType->isPointerType() && RType->isPointerType()) { // If both types are pointers, and pointed to the exact same type, - // LType == RType took care of that. - // Try to see if the pointee type has some other match. + // LType == RType took care of that. Try to see if the pointee type has + // some other match. However, this must not consider implicit conversions. LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS and RHS are Ptrs.\n"); - return calculateMixability(Check, LType->getPointeeType(), - RType->getPointeeType(), Ctx); + MixData MixOfPointee = + calculateMixability(Check, LType->getPointeeType(), + RType->getPointeeType(), Ctx, ICMM_None); + if (hasFlag(MixOfPointee.Flags, + MixFlags::WorkaroundDisableCanonicalEquivalence)) + RecursiveReturnDiscardingCanonicalType = true; + + MixOfPointee.sanitize(); + if (MixOfPointee.indicatesMixability()) { + LLVM_DEBUG(llvm::dbgs() + << "<<< calculateMixability. Pointees are mixable.\n"); + return MixOfPointee; + } + } + + if (ImplicitMode > ICMM_None) { + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. Start implicit...\n"); + MixData MixLTR = + approximateImplicitConversion(Check, LType, RType, Ctx, ImplicitMode); + LLVM_DEBUG( + if (hasFlag(MixLTR.Flags, MixFlags::ImplicitConversion)) llvm::dbgs() + << "--- calculateMixability. Implicit Left -> Right found.\n";); + + if (ImplicitMode == ICMM_OneWaySingleStandardOnly && MixLTR.Conversion && + !MixLTR.Conversion.AfterFirstStandard.isNull() && + MixLTR.Conversion.UDConvKind == ConversionSequence::UDCK_None && + MixLTR.Conversion.AfterSecondStandard.isNull()) { + // The invoker of the method requested only modelling a single standard + // conversion, in only the forward direction, and they got just that. + LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. Implicit " + "conversion, one-way, standard-only.\n"); + return {MixFlags::ImplicitConversion, MixLTR.Conversion}; + } + + // Otherwise if the invoker requested a full modelling, do the other + // direction as well. + MixData MixRTL = + approximateImplicitConversion(Check, RType, LType, Ctx, ImplicitMode); + LLVM_DEBUG( + if (hasFlag(MixRTL.Flags, MixFlags::ImplicitConversion)) llvm::dbgs() + << "--- calculateMixability. Implicit Right -> Left found.\n";); + + if (MixLTR.Conversion && MixRTL.Conversion) { + LLVM_DEBUG( + llvm::dbgs() + << "<<< calculateMixability. Implicit conversion, bidirectional.\n"); + return {MixFlags::ImplicitConversion, MixLTR.Conversion, + MixRTL.Conversion}; + } + } + + if (RecursiveReturnDiscardingCanonicalType) + LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. Before CanonicalType, " + "Discard was enabled.\n"); + + // Certain kinds unfortunately need to be side-stepped for canonical type + // matching. + if (LType->getAs() || RType->getAs()) { + // Unfortunately, the canonical type of a function pointer becomes the + // same even if exactly one is "noexcept" and the other isn't, making us + // give a false positive report irrespective of implicit conversions. + LLVM_DEBUG(llvm::dbgs() + << "--- calculateMixability. Discarding potential canonical " + "equivalence on FunctionProtoTypes.\n"); + RecursiveReturnDiscardingCanonicalType = true; } + MixData MixToReturn{MixFlags::None}; + // If none of the previous logic found a match, try if Clang otherwise // believes the types to be the same. - if (LType.getCanonicalType() == RType.getCanonicalType()) { + QualType LCanonical = LType.getCanonicalType(); + if (LCanonical == RType.getCanonicalType()) { LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. Same CanonicalType.\n"); - return {MixFlags::Canonical, LType.getCanonicalType()}; + MixToReturn = {MixFlags::Canonical, LCanonical}; } - LLVM_DEBUG(llvm::dbgs() << "<<< calculateMixability. No match found.\n"); - return {MixFlags::None}; + if (RecursiveReturnDiscardingCanonicalType) + MixToReturn |= MixFlags::WorkaroundDisableCanonicalEquivalence; + + LLVM_DEBUG(if (MixToReturn.Flags == MixFlags::None) llvm::dbgs() + << "<<< calculateMixability. No match found.\n"); + return MixToReturn; } /// Calculates if the reference binds an expression of the given type. This is /// true iff 'LRef' is some 'const T &' type, and the 'Ty' is 'T' or 'const T'. -static MixData isLRefEquallyBindingToType(const TheCheck &Check, - const LValueReferenceType *LRef, - QualType Ty, const ASTContext &Ctx, - bool IsRefRHS) { +/// +/// \param ImplicitMode is forwarded in the possible recursive call to +/// calculateMixability. +static MixData +isLRefEquallyBindingToType(const TheCheck &Check, + const LValueReferenceType *LRef, QualType Ty, + const ASTContext &Ctx, bool IsRefRHS, + ImplicitConversionModellingMode ImplicitMode) { LLVM_DEBUG(llvm::dbgs() << ">>> isLRefEquallyBindingToType for LRef:\n"; LRef->dump(llvm::dbgs(), Ctx); llvm::dbgs() << "\nand Type:\n"; Ty.dump(llvm::dbgs(), Ctx); llvm::dbgs() << '\n';); @@ -414,8 +809,464 @@ static MixData isLRefEquallyBindingToType(const TheCheck &Check, LLVM_DEBUG( llvm::dbgs() << "--- isLRefEquallyBindingToType. Checking mix for underlying type.\n"); - return IsRefRHS ? calculateMixability(Check, Ty, NonConstReferredType, Ctx) - : calculateMixability(Check, NonConstReferredType, Ty, Ctx); + return IsRefRHS ? calculateMixability(Check, Ty, NonConstReferredType, Ctx, + ImplicitMode) + : calculateMixability(Check, NonConstReferredType, Ty, Ctx, + ImplicitMode); +} + +static inline bool isDerivedToBase(const CXXRecordDecl *Derived, + const CXXRecordDecl *Base) { + return Derived && Base && Derived->isCompleteDefinition() && + Base->isCompleteDefinition() && Derived->isDerivedFrom(Base); +} + +static Optional +approximateStandardConversionSequence(const TheCheck &Check, QualType From, + QualType To, const ASTContext &Ctx) { + LLVM_DEBUG(llvm::dbgs() << ">>> approximateStdConv for LType:\n"; + From.dump(llvm::dbgs(), Ctx); llvm::dbgs() << "\nand RType:\n"; + To.dump(llvm::dbgs(), Ctx); llvm::dbgs() << '\n';); + + // A standard conversion sequence consists of the following, in order: + // * Maybe either LValue->RValue conv., Array->Ptr conv., Function->Ptr conv. + // * Maybe Numeric promotion or conversion. + // * Maybe function pointer conversion. + // * Maybe qualifier adjustments. + QualType WorkType = From; + // Get out the qualifiers of the original type. This will always be + // re-applied to the WorkType to ensure it is the same qualification as the + // original From was. + auto QualifiersToApply = From.split().Quals.getAsOpaqueValue(); + + // LValue->RValue is irrelevant for the check, because it is a thing to be + // done at a call site, and will be performed if need be performed. + + // Array->Ptr decay. + if (const auto *ArrayT = dyn_cast(From)) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateStdConv. Array->Ptr decayed.\n"); + WorkType = ArrayT->getPointeeType(); + } + + // Function->Pointer conversions are also irrelevant, because a + // "FunctionType" cannot be the type of a parameter variable, so this + // conversion is only meaningful at call sites. + + // Numeric promotions and conversions. + const auto *FromBuiltin = WorkType->getAs(); + const auto *ToBuiltin = To->getAs(); + bool FromNumeric = FromBuiltin && (FromBuiltin->isIntegerType() || + FromBuiltin->isFloatingType()); + bool ToNumeric = + ToBuiltin && (ToBuiltin->isIntegerType() || ToBuiltin->isFloatingType()); + if (FromNumeric && ToNumeric) { + // If both are integral types, the numeric conversion is performed. + // Reapply the qualifiers of the original type, however, so + // "const int -> double" in this case moves over to + // "const double -> double". + LLVM_DEBUG(llvm::dbgs() + << "--- approximateStdConv. Conversion between numerics.\n"); + WorkType = QualType{ToBuiltin, QualifiersToApply}; + } + + const auto *FromEnum = WorkType->getAs(); + const auto *ToEnum = To->getAs(); + if (FromEnum && ToNumeric && FromEnum->isUnscopedEnumerationType()) { + // Unscoped enumerations (or enumerations in C) convert to numerics. + LLVM_DEBUG(llvm::dbgs() + << "--- approximateStdConv. Unscoped enum to numeric.\n"); + WorkType = QualType{ToBuiltin, QualifiersToApply}; + } else if (FromNumeric && ToEnum && ToEnum->isUnscopedEnumerationType()) { + // Numeric types convert to enumerations only in C. + if (Ctx.getLangOpts().CPlusPlus) { + LLVM_DEBUG(llvm::dbgs() << "<<< approximateStdConv. Numeric to unscoped " + "enum, not possible in C++!\n"); + return {}; + } + + LLVM_DEBUG(llvm::dbgs() + << "--- approximateStdConv. Numeric to unscoped enum.\n"); + WorkType = QualType{ToEnum, QualifiersToApply}; + } + + // Check for pointer conversions. + const auto *FromPtr = WorkType->getAs(); + const auto *ToPtr = To->getAs(); + if (FromPtr && ToPtr) { + if (ToPtr->isVoidPointerType()) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateStdConv. To void pointer.\n"); + WorkType = QualType{ToPtr, QualifiersToApply}; + } + + const auto *FromRecordPtr = FromPtr->getPointeeCXXRecordDecl(); + const auto *ToRecordPtr = ToPtr->getPointeeCXXRecordDecl(); + if (isDerivedToBase(FromRecordPtr, ToRecordPtr)) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateStdConv. Derived* to Base*\n"); + WorkType = QualType{ToPtr, QualifiersToApply}; + } + } + + // Model the slicing Derived-to-Base too, as "BaseT temporary = derived;" + // can also be compiled. + const auto *FromRecord = WorkType->getAsCXXRecordDecl(); + const auto *ToRecord = To->getAsCXXRecordDecl(); + if (isDerivedToBase(FromRecord, ToRecord)) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateStdConv. Derived To Base.\n"); + WorkType = QualType{ToRecord->getTypeForDecl(), QualifiersToApply}; + } + + if (Ctx.getLangOpts().CPlusPlus17 && FromPtr && ToPtr) { + // Function pointer conversion: A noexcept function pointer can be passed + // to a non-noexcept one. + const auto *FromFunctionPtr = + FromPtr->getPointeeType()->getAs(); + const auto *ToFunctionPtr = + ToPtr->getPointeeType()->getAs(); + if (FromFunctionPtr && ToFunctionPtr && + FromFunctionPtr->hasNoexceptExceptionSpec() && + !ToFunctionPtr->hasNoexceptExceptionSpec()) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateStdConv. noexcept function " + "pointer to non-noexcept.\n"); + WorkType = QualType{ToPtr, QualifiersToApply}; + } + } + + // Qualifier adjustments are modelled according to the user's request in + // the QualifiersMix check config. + LLVM_DEBUG(llvm::dbgs() + << "--- approximateStdConv. Trying qualifier adjustment...\n"); + MixData QualConv = calculateMixability(Check, WorkType, To, Ctx, ICMM_None); + QualConv.sanitize(); + if (hasFlag(QualConv.Flags, MixFlags::Qualifiers)) { + LLVM_DEBUG(llvm::dbgs() + << "<<< approximateStdConv. Qualifiers adjusted.\n"); + WorkType = To; + } + + if (WorkType == To) { + LLVM_DEBUG(llvm::dbgs() << "<<< approximateStdConv. Reached 'To' type.\n"); + return {WorkType}; + } + + LLVM_DEBUG(llvm::dbgs() << "<<< approximateStdConv. Did not reach 'To'.\n"); + return {}; +} + +namespace { + +/// Helper class for storing possible user-defined conversion calls that +/// *could* take place in an implicit conversion, and selecting the one that +/// most likely *does*, if any. +class UserDefinedConversionSelector { +public: + /// The conversion associated with a conversion function, together with the + /// mixability flags of the conversion function's parameter or return type + /// to the rest of the sequence the selector is used in, and the sequence + /// that applied through the conversion itself. + struct PreparedConversion { + const CXXMethodDecl *ConversionFun; + MixFlags Flags; + ConversionSequence Seq; + + PreparedConversion(const CXXMethodDecl *CMD, MixFlags F, + ConversionSequence S) + : ConversionFun(CMD), Flags(F), Seq(S) {} + }; + + UserDefinedConversionSelector(const TheCheck &Check) : Check(Check) {} + + /// Adds the conversion between the two types for the given function into + /// the possible implicit conversion set. FromType and ToType is either: + /// * the result of a standard sequence and a converting ctor parameter + /// * the return type of a conversion operator and the expected target of + /// an implicit conversion. + void addConversion(const CXXMethodDecl *ConvFun, QualType FromType, + QualType ToType) { + // Try to go from the FromType to the ToType wiht only a single implicit + // conversion, to see if the conversion function is applicable. + MixData Mix = + calculateMixability(Check, FromType, ToType, ConvFun->getASTContext(), + ICMM_OneWaySingleStandardOnly); + Mix.sanitize(); + if (!Mix.indicatesMixability()) + return; + + LLVM_DEBUG(llvm::dbgs() << "--- tryConversion. Found viable with flags: " + << formatMixFlags(Mix.Flags) << '\n'); + FlaggedConversions.emplace_back(ConvFun, Mix.Flags, Mix.Conversion); + } + + /// Selects the best conversion function that is applicable from the + /// prepared set of potential conversion functions taken. + Optional operator()() const { + if (FlaggedConversions.empty()) { + LLVM_DEBUG(llvm::dbgs() << "--- selectUserDefinedConv. Empty.\n"); + return {}; + } + if (FlaggedConversions.size() == 1) { + LLVM_DEBUG(llvm::dbgs() << "--- selectUserDefinedConv. Single.\n"); + return FlaggedConversions.front(); + } + + Optional BestConversion; + unsigned short HowManyGoodConversions = 0; + for (const auto &Prepared : FlaggedConversions) { + LLVM_DEBUG(llvm::dbgs() << "--- selectUserDefinedConv. Candidate flags: " + << formatMixFlags(Prepared.Flags) << '\n'); + if (!BestConversion) { + BestConversion = Prepared; + ++HowManyGoodConversions; + continue; + } + + bool BestConversionHasImplicit = + hasFlag(BestConversion->Flags, MixFlags::ImplicitConversion); + bool ThisConversionHasImplicit = + hasFlag(Prepared.Flags, MixFlags::ImplicitConversion); + if (!BestConversionHasImplicit && ThisConversionHasImplicit) + // This is a worse conversion, because a better one was found earlier. + continue; + + if (BestConversionHasImplicit && !ThisConversionHasImplicit) { + // If the so far best selected conversion needs a previous implicit + // conversion to match the user-defined converting function, but this + // conversion does not, this is a better conversion, and we can throw + // away the previously selected conversion(s). + BestConversion = Prepared; + HowManyGoodConversions = 1; + continue; + } + + if (BestConversionHasImplicit == ThisConversionHasImplicit) + // The current conversion is the same in term of goodness than the + // already selected one. + ++HowManyGoodConversions; + } + + if (HowManyGoodConversions == 1) { + LLVM_DEBUG(llvm::dbgs() + << "--- selectUserDefinedConv. Unique result. Flags: " + << formatMixFlags(BestConversion->Flags) << '\n'); + return BestConversion; + } + + LLVM_DEBUG(llvm::dbgs() + << "--- selectUserDefinedConv. No, or ambiguous.\n"); + return {}; + } + +private: + llvm::SmallVector FlaggedConversions; + const TheCheck &Check; +}; + +} // namespace + +static Optional +tryConversionOperators(const TheCheck &Check, const CXXRecordDecl *RD, + QualType ToType) { + if (!RD || !RD->isCompleteDefinition()) + return {}; + RD = RD->getDefinition(); + + LLVM_DEBUG(llvm::dbgs() << ">>> tryConversionOperators: " << RD->getName() + << " to:\n"; + ToType.dump(llvm::dbgs(), RD->getASTContext()); + llvm::dbgs() << '\n';); + + UserDefinedConversionSelector ConversionSet{Check}; + + for (const NamedDecl *Method : RD->getVisibleConversionFunctions()) { + const auto *Con = dyn_cast(Method); + if (!Con || Con->isExplicit()) + continue; + LLVM_DEBUG(llvm::dbgs() << "--- tryConversionOperators. Trying:\n"; + Con->dump(llvm::dbgs()); llvm::dbgs() << '\n';); + + // Try to go from the result of conversion operator to the expected type, + // without calculating another user-defined conversion. + ConversionSet.addConversion(Con, Con->getConversionType(), ToType); + } + + if (Optional + SelectedConversion = ConversionSet()) { + QualType RecordType{RD->getTypeForDecl(), 0}; + + ConversionSequence Result{RecordType, ToType}; + // The conversion from the operator call's return type to ToType was + // modelled as a "pre-conversion" in the operator call, but it is the + // "post-conversion" from the point of view of the original conversion + // we are modelling. + Result.AfterSecondStandard = SelectedConversion->Seq.AfterFirstStandard; + + ConversionSequence::UserDefinedConversionOperator ConvOp; + ConvOp.Fun = cast(SelectedConversion->ConversionFun); + ConvOp.UserDefinedType = RecordType; + ConvOp.ConversionOperatorResultType = ConvOp.Fun->getConversionType(); + Result.setConversion(ConvOp); + + LLVM_DEBUG(llvm::dbgs() << "<<< tryConversionOperators. Found result.\n"); + return Result; + } + + LLVM_DEBUG(llvm::dbgs() << "<<< tryConversionOperators. No conversion.\n"); + return {}; +} + +static Optional +tryConvertingConstructors(const TheCheck &Check, QualType FromType, + const CXXRecordDecl *RD) { + if (!RD || !RD->isCompleteDefinition()) + return {}; + RD = RD->getDefinition(); + + LLVM_DEBUG(llvm::dbgs() << ">>> tryConveringConstructors: " << RD->getName() + << " from:\n"; + FromType.dump(llvm::dbgs(), RD->getASTContext()); + llvm::dbgs() << '\n';); + + UserDefinedConversionSelector ConversionSet{Check}; + + for (const CXXConstructorDecl *Con : RD->ctors()) { + if (Con->isCopyOrMoveConstructor() || + !Con->isConvertingConstructor(/* AllowExplicit =*/false)) + continue; + LLVM_DEBUG(llvm::dbgs() << "--- tryConvertingConstructors. Trying:\n"; + Con->dump(llvm::dbgs()); llvm::dbgs() << '\n';); + + // Try to go from the original FromType to the converting constructor's + // parameter type without another user-defined conversion. + ConversionSet.addConversion(Con, FromType, Con->getParamDecl(0)->getType()); + } + + if (Optional + SelectedConversion = ConversionSet()) { + QualType RecordType{RD->getTypeForDecl(), 0}; + + ConversionSequence Result{FromType, RecordType}; + Result.AfterFirstStandard = SelectedConversion->Seq.AfterFirstStandard; + + ConversionSequence::UserDefinedConvertingConstructor Ctor; + Ctor.Fun = cast(SelectedConversion->ConversionFun); + Ctor.ConstructorParameterType = Ctor.Fun->getParamDecl(0)->getType(); + Ctor.UserDefinedType = RecordType; + Result.setConversion(Ctor); + + LLVM_DEBUG(llvm::dbgs() + << "<<< tryConvertingConstructors. Found result.\n"); + return Result; + } + + LLVM_DEBUG(llvm::dbgs() << "<<< tryConvertingConstructors. No conversion.\n"); + return {}; +} + +/// Returns whether an expression of LType can be used in an RType context, as +/// per the implicit conversion rules. +/// +/// Note: the result of this operation, unlike that of calculateMixability, is +/// **NOT** symmetric. +static MixData +approximateImplicitConversion(const TheCheck &Check, QualType LType, + QualType RType, const ASTContext &Ctx, + ImplicitConversionModellingMode ImplicitMode) { + LLVM_DEBUG(llvm::dbgs() << ">>> approximateImplicitConversion for LType:\n"; + LType.dump(llvm::dbgs(), Ctx); llvm::dbgs() << "\nand RType:\n"; + RType.dump(llvm::dbgs(), Ctx); + llvm::dbgs() << "\nimplicit mode: " << ImplicitMode << '\n';); + if (LType == RType) + return {MixFlags::Trivial, LType}; + + // An implicit conversion sequence consists of the following, in order: + // * Maybe standard conversion sequence. + // * Maybe user-defined conversion. + // * Maybe standard conversion sequence. + ConversionSequence ImplicitSeq{LType, RType}; + QualType WorkType = LType; + + Optional AfterFirstStdConv = + approximateStandardConversionSequence(Check, LType, RType, Ctx); + if (AfterFirstStdConv) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateImplicitConversion. Standard " + "Pre-Conversion found!\n"); + ImplicitSeq.AfterFirstStandard = AfterFirstStdConv.getValue(); + WorkType = ImplicitSeq.AfterFirstStandard; + } + + if (ImplicitMode == ICMM_OneWaySingleStandardOnly) + // If the caller only requested modelling of a standard conversion, bail. + return {ImplicitSeq.AfterFirstStandard.isNull() + ? MixFlags::None + : MixFlags::ImplicitConversion, + ImplicitSeq}; + + if (Ctx.getLangOpts().CPlusPlus) { + bool FoundConversionOperator = false, FoundConvertingCtor = false; + + if (const auto *LRD = WorkType->getAsCXXRecordDecl()) { + Optional ConversionOperatorResult = + tryConversionOperators(Check, LRD, RType); + if (ConversionOperatorResult) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateImplicitConversion. Found " + "conversion operator.\n"); + ImplicitSeq.update(ConversionOperatorResult.getValue()); + WorkType = ImplicitSeq.getTypeAfterUserDefinedConversion(); + FoundConversionOperator = true; + } + } + + if (const auto *RRD = RType->getAsCXXRecordDecl()) { + // Use the original "LType" here, and not WorkType, because the + // conversion to the converting constructors' parameters will be + // modelled in the recursive call. + Optional ConvCtorResult = + tryConvertingConstructors(Check, LType, RRD); + if (ConvCtorResult) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateImplicitConversion. Found " + "converting constructor.\n"); + ImplicitSeq.update(ConvCtorResult.getValue()); + WorkType = ImplicitSeq.getTypeAfterUserDefinedConversion(); + FoundConvertingCtor = true; + } + } + + if (FoundConversionOperator && FoundConvertingCtor) { + // If both an operator and a ctor matches, the sequence is ambiguous. + LLVM_DEBUG(llvm::dbgs() + << "<<< approximateImplicitConversion. Found both " + "user-defined conversion kinds in the same sequence!\n"); + return {MixFlags::None}; + } + } + + // After the potential user-defined conversion, another standard conversion + // sequence might exist. + LLVM_DEBUG( + llvm::dbgs() + << "--- approximateImplicitConversion. Try to find post-conversion.\n"); + MixData SecondStdConv = approximateImplicitConversion( + Check, WorkType, RType, Ctx, ICMM_OneWaySingleStandardOnly); + if (SecondStdConv.indicatesMixability()) { + LLVM_DEBUG(llvm::dbgs() << "--- approximateImplicitConversion. Standard " + "Post-Conversion found!\n"); + + // The single-step modelling puts the modelled conversion into the "PreStd" + // variable in the recursive call, but from the PoV of this function, it is + // the post-conversion. + ImplicitSeq.AfterSecondStandard = + SecondStdConv.Conversion.AfterFirstStandard; + WorkType = ImplicitSeq.AfterSecondStandard; + } + + if (ImplicitSeq) { + LLVM_DEBUG(llvm::dbgs() + << "<<< approximateImplicitConversion. Found a conversion.\n"); + return {MixFlags::ImplicitConversion, ImplicitSeq}; + } + + LLVM_DEBUG( + llvm::dbgs() << "<<< approximateImplicitConversion. No match found.\n"); + return {MixFlags::None}; } static MixableParameterRange modelMixingRange(const TheCheck &Check, @@ -447,16 +1298,18 @@ static MixableParameterRange modelMixingRange(const TheCheck &Check, << "Check mix of #" << J << " against #" << I << "...\n"); Mix M{Jth, Ith, - calculateMixability(Check, Jth->getType(), Ith->getType(), Ctx)}; + calculateMixability(Check, Jth->getType(), Ith->getType(), Ctx, + Check.ModelImplicitConversions ? ICMM_All + : ICMM_None)}; LLVM_DEBUG(llvm::dbgs() << "Mix flags (raw) : " << formatMixFlags(M.flags()) << '\n'); M.sanitize(); LLVM_DEBUG(llvm::dbgs() << "Mix flags (after sanitize): " << formatMixFlags(M.flags()) << '\n'); - assert(M.flags() != MixFlags::Invalid && "All flags decayed!"); + assert(M.flagsValid() && "All flags decayed!"); - if (M.flags() != MixFlags::None) + if (M.mixable()) MixesOfIth.emplace_back(std::move(M)); } @@ -595,8 +1448,80 @@ static inline bool needsToPrintTypeInDiagnostic(const model::Mix &M) { (MixFlags::TypeAlias | MixFlags::ReferenceBind | MixFlags::Qualifiers)); } +/// Returns whether a particular Mix between the two parameters should have +/// implicit conversions elaborated. +static inline bool needsToElaborateImplicitConversion(const model::Mix &M) { + return hasFlag(M.flags(), model::MixFlags::ImplicitConversion); +} + namespace { +/// This class formats a conversion sequence into a "Ty1 -> Ty2 -> Ty3" line +/// that can be used in diagnostics. +struct FormattedConversionSequence { + std::string DiagnosticText; + + /// The formatted sequence is trivial if it is "Ty1 -> Ty2", but Ty1 and + /// Ty2 are the types that are shown in the code. A trivial diagnostic + /// does not need to be printed. + bool Trivial; + + FormattedConversionSequence(const PrintingPolicy &PP, + StringRef StartTypeAsDiagnosed, + const model::ConversionSequence &Conv, + StringRef DestinationTypeAsDiagnosed) { + Trivial = true; + llvm::raw_string_ostream OS{DiagnosticText}; + + // Print the type name as it is printed in other places in the diagnostic. + OS << '\'' << StartTypeAsDiagnosed << '\''; + std::string LastAddedType = StartTypeAsDiagnosed.str(); + std::size_t NumElementsAdded = 1; + + // However, the parameter's defined type might not be what the implicit + // conversion started with, e.g. if a typedef is found to convert. + std::string SeqBeginTypeStr = Conv.Begin.getAsString(PP); + std::string SeqEndTypeStr = Conv.End.getAsString(PP); + if (StartTypeAsDiagnosed != SeqBeginTypeStr) { + OS << " (as '" << SeqBeginTypeStr << "')"; + LastAddedType = SeqBeginTypeStr; + Trivial = false; + } + + auto AddType = [&](StringRef ToAdd) { + if (LastAddedType != ToAdd && ToAdd != SeqEndTypeStr) { + OS << " -> '" << ToAdd << "'"; + LastAddedType = ToAdd.str(); + ++NumElementsAdded; + } + }; + for (QualType InvolvedType : Conv.getInvolvedTypesInSequence()) + // Print every type that's unique in the sequence into the diagnosis. + AddType(InvolvedType.getAsString(PP)); + + if (LastAddedType != DestinationTypeAsDiagnosed) { + OS << " -> '" << DestinationTypeAsDiagnosed << "'"; + LastAddedType = DestinationTypeAsDiagnosed.str(); + ++NumElementsAdded; + } + + // Same reasoning as with the Begin, e.g. if the converted-to type is a + // typedef, it will not be the same inside the conversion sequence (where + // the model already tore off typedefs) as in the code. + if (DestinationTypeAsDiagnosed != SeqEndTypeStr) { + OS << " (as '" << SeqEndTypeStr << "')"; + LastAddedType = SeqEndTypeStr; + Trivial = false; + } + + if (Trivial && NumElementsAdded > 2) + // If the thing is still marked trivial but we have more than the + // from and to types added, it should not be trivial, and elaborated + // when printing the diagnostic. + Trivial = false; + } +}; + /// Retains the elements called with and returns whether the call is done with /// a new element. template class InsertOnce { @@ -677,7 +1602,9 @@ EasilySwappableParametersCheck::EasilySwappableParametersCheck( IgnoredParameterTypeSuffixes(optutils::parseStringList( Options.get("IgnoredParameterTypeSuffixes", DefaultIgnoredParameterTypeSuffixes))), - QualifiersMix(Options.get("QualifiersMix", DefaultQualifiersMix)) {} + QualifiersMix(Options.get("QualifiersMix", DefaultQualifiersMix)), + ModelImplicitConversions(Options.get("ModelImplicitConversions", + DefaultModelImplicitConversions)) {} void EasilySwappableParametersCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { @@ -687,6 +1614,7 @@ void EasilySwappableParametersCheck::storeOptions( Options.store(Opts, "IgnoredParameterTypeSuffixes", optutils::serializeStringList(IgnoredParameterTypeSuffixes)); Options.store(Opts, "QualifiersMix", QualifiersMix); + Options.store(Opts, "ModelImplicitConversions", ModelImplicitConversions); } void EasilySwappableParametersCheck::registerMatchers(MatchFinder *Finder) { @@ -740,12 +1668,17 @@ void EasilySwappableParametersCheck::check( } bool NeedsAnyTypeNote = llvm::any_of(R.Mixes, needsToPrintTypeInDiagnostic); + bool HasAnyImplicits = + llvm::any_of(R.Mixes, needsToElaborateImplicitConversion); const ParmVarDecl *First = R.getFirstParam(), *Last = R.getLastParam(); std::string FirstParamTypeAsWritten = First->getType().getAsString(PP); { StringRef DiagText; - if (NeedsAnyTypeNote) + if (HasAnyImplicits) + DiagText = "%0 adjacent parameters of %1 of convertible types are " + "easily swapped by mistake"; + else if (NeedsAnyTypeNote) DiagText = "%0 adjacent parameters of %1 of similar type are easily " "swapped by mistake"; else @@ -780,55 +1713,94 @@ void EasilySwappableParametersCheck::check( // too verbose. UniqueTypeAliasDiagnosticHelper UniqueTypeAlias; InsertOnce UniqueBindPower; + InsertOnce UniqueImplicitConversion; + + for (const model::Mix &M : R.Mixes) { + assert(M.mixable() && "Sentinel or false mix in result."); + if (!needsToPrintTypeInDiagnostic(M) && + !needsToElaborateImplicitConversion(M)) + continue; + + // Typedefs might result in the type of the variable needing to be + // emitted to a note diagnostic, so prepare it. + const ParmVarDecl *LVar = M.First; + const ParmVarDecl *RVar = M.Second; + QualType LType = LVar->getType(); + QualType RType = RVar->getType(); + QualType CommonType = M.commonUnderlyingType(); + std::string LTypeStr = LType.getAsString(PP); + std::string RTypeStr = RType.getAsString(PP); + std::string CommonTypeStr = CommonType.getAsString(PP); + + if (hasFlag(M.flags(), MixFlags::TypeAlias) && + UniqueTypeAlias(LType, RType, CommonType)) { + StringRef DiagText; + bool ExplicitlyPrintCommonType = false; + if (LTypeStr == CommonTypeStr || RTypeStr == CommonTypeStr) + if (hasFlag(M.flags(), MixFlags::Qualifiers)) + DiagText = "after resolving type aliases, '%0' and '%1' share a " + "common type"; + else + DiagText = + "after resolving type aliases, '%0' and '%1' are the same"; + else if (!CommonType.isNull()) { + DiagText = "after resolving type aliases, the common type of '%0' " + "and '%1' is '%2'"; + ExplicitlyPrintCommonType = true; + } + + auto Diag = + diag(LVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) + << LTypeStr << RTypeStr; + if (ExplicitlyPrintCommonType) + Diag << CommonTypeStr; + } + + if ((hasFlag(M.flags(), MixFlags::ReferenceBind) || + hasFlag(M.flags(), MixFlags::Qualifiers)) && + UniqueBindPower({LType, RType})) { + StringRef DiagText = "'%0' and '%1' parameters accept and bind the " + "same kind of values"; + diag(RVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) + << LTypeStr << RTypeStr; + } + + if (needsToElaborateImplicitConversion(M) && + UniqueImplicitConversion({LType, RType})) { + const model::ConversionSequence <R = + M.leftToRightConversionSequence(); + const model::ConversionSequence &RTL = + M.rightToLeftConversionSequence(); + FormattedConversionSequence LTRFmt{PP, LTypeStr, LTR, RTypeStr}; + FormattedConversionSequence RTLFmt{PP, RTypeStr, RTL, LTypeStr}; - for (const Mix &M : R.Mixes) { - assert(M.flags() >= MixFlags::Trivial && - "Sentinel or false mix in result."); - - if (needsToPrintTypeInDiagnostic(M)) { - // Typedefs might result in the type of the variable needing to be - // emitted to a note diagnostic, so prepare it. - const ParmVarDecl *LVar = M.First; - const ParmVarDecl *RVar = M.Second; - QualType LType = LVar->getType(); - QualType RType = RVar->getType(); - QualType CommonType = M.commonUnderlyingType(); - std::string LTypeStr = LType.getAsString(PP); - std::string RTypeStr = RType.getAsString(PP); - std::string CommonTypeStr = CommonType.getAsString(PP); - - if (hasFlag(M.flags(), MixFlags::TypeAlias) && - UniqueTypeAlias(LType, RType, CommonType)) { - StringRef DiagText; - bool ExplicitlyPrintCommonType = false; - if (LTypeStr == CommonTypeStr || RTypeStr == CommonTypeStr) - if (hasFlag(M.flags(), MixFlags::Qualifiers)) - DiagText = "after resolving type aliases, '%0' and '%1' share a " - "common type"; - else - DiagText = - "after resolving type aliases, '%0' and '%1' are the same"; - else { - DiagText = "after resolving type aliases, the common type of '%0' " - "and '%1' is '%2'"; - ExplicitlyPrintCommonType = true; - } + StringRef DiagText = "'%0' and '%1' may be implicitly converted"; + if (!LTRFmt.Trivial || !RTLFmt.Trivial) + DiagText = "'%0' and '%1' may be implicitly converted: %2, %3"; + { auto Diag = - diag(LVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) + diag(RVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) << LTypeStr << RTypeStr; - if (ExplicitlyPrintCommonType) - Diag << CommonTypeStr; - } - if ((hasFlag(M.flags(), MixFlags::ReferenceBind) || - hasFlag(M.flags(), MixFlags::Qualifiers)) && - UniqueBindPower({LType, RType})) { - StringRef DiagText = "'%0' and '%1' parameters accept and bind the " - "same kind of values"; - diag(RVar->getOuterLocStart(), DiagText, DiagnosticIDs::Note) - << LTypeStr << RTypeStr; + if (!LTRFmt.Trivial || !RTLFmt.Trivial) + Diag << LTRFmt.DiagnosticText << RTLFmt.DiagnosticText; } + + StringRef ConversionFunctionDiagText = + "the implicit conversion involves the " + "%select{|converting constructor|conversion operator}0 " + "declared here"; + if (const FunctionDecl *LFD = LTR.getUserDefinedConversionFunction()) + diag(LFD->getLocation(), ConversionFunctionDiagText, + DiagnosticIDs::Note) + << static_cast(LTR.UDConvKind) + << LTR.getUserDefinedConversionHighlight(); + if (const FunctionDecl *RFD = RTL.getUserDefinedConversionFunction()) + diag(RFD->getLocation(), ConversionFunctionDiagText, + DiagnosticIDs::Note) + << static_cast(RTL.UDConvKind) + << RTL.getUserDefinedConversionHighlight(); } } } diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h index e3c58eb5d7013..b072c46680452 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h @@ -39,8 +39,14 @@ class EasilySwappableParametersCheck : public ClangTidyCheck { /// ignored. const std::vector IgnoredParameterTypeSuffixes; - /// Whether to consider an unqualified and a qualified type mixable. + /// Whether to consider differently qualified versions of the same type + /// mixable. const bool QualifiersMix; + + /// Whether to model implicit conversions "in full" (conditions apply) + /// during analysis and consider types that are implicitly convertible to + /// one another mixable. + const bool ModelImplicitConversions; }; } // namespace bugprone diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst index b9dafd3b32602..5158394e4e1fe 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst @@ -57,8 +57,40 @@ report longer mixable ranges. .. code-block:: c++ - void *memcpy(const void *Destination, void *Source, std::size_t N) {} + void *memcpy(const void *Destination, void *Source, std::size_t N) { /* ... */ } +.. option:: ModelImplicitConversions + + Whether to consider parameters of type ``T`` and ``U`` mixable if there + exists an implicit conversion from ``T`` to ``U`` and ``U`` to ``T``. + If `false`, the check will not consider implicitly convertible types for + mixability. + `True` turns warnings for implicit conversions on. + Defaults to `true`. + + The following examples produce a diagnostic only if + `ModelImplicitConversions` is enabled: + + .. code-block:: c++ + + void fun(int Int, double Double) { /* ... */ } + void compare(const char *CharBuf, std::string String) { /* ... */ } + + .. note:: + + Changing the qualifiers of an expression's type (e.g. from ``int`` to + ``const int``) is defined as an *implicit conversion* in the C++ + Standard. + However, the check separates this decision-making on the mixability of + differently qualified types based on whether `QualifiersMix` was + enabled. + + For example, the following code snippet will only produce a diagnostic + if **both** `QualifiersMix` and `ModelImplicitConversions` are enabled: + + .. code-block:: c++ + + void fun2(int Int, const double Double) { /* ... */ } Filtering options ^^^^^^^^^^^^^^^^^ @@ -133,7 +165,7 @@ None of the following cases produce a diagnostic: template int add(T X, U Y) { return X + Y }; - void TheseAreNotWarnedAbout() { + void theseAreNotWarnedAbout() { printf("%d %d\n", 1, 2); // Two ints passed, they could be swapped. someOldCFunction(1, 2, 3); // Similarly, multiple ints passed. @@ -153,14 +185,43 @@ not diagnosed. // Diagnosed: Explicit instantiation was done by the user, we can prove it // is the same type. - void Explicit(int A, Vector::element_type B) { /* ... */ } + void instantiated(int A, Vector::element_type B) { /* ... */ } // Diagnosed: The two parameter types are exactly the same. template - void Exact(typename Vector::element_type A, + void exact(typename Vector::element_type A, typename Vector::element_type B) { /* ... */ } // Skipped: The two parameters are both 'T' but we can not prove this // without actually instantiating. template - void FalseNegative(T A, typename Vector::element_type B) { /* ... */ } + void falseNegative(T A, typename Vector::element_type B) { /* ... */ } + +In the context of *implicit conversions* (when `ModelImplicitConversions` is +enabled), the modelling performed by the check +warns if the parameters are swappable and the swapped order matches implicit +conversions. +It does not model whether there exists an unrelated third type from which +*both* parameters can be given in a function call. +This means that in the following example, even while ``strs()`` clearly carries +the possibility to be called with swapped arguments (as long as the arguments +are string literals), will not be warned about. + +.. code-block:: c++ + + struct String { + String(const char *Buf); + }; + + struct StringView { + StringView(const char *Buf); + operator const char *() const; + }; + + // Skipped: Directly swapping expressions of the two type cannot mix. + // (Note: StringView -> const char * -> String would be **two** + // user-defined conversions, which is disallowed by the language.) + void strs(String Str, StringView SV) { /* ... */ } + + // Diagnosed: StringView implicitly converts to and from a buffer. + void cStr(StringView SV, const char *Buf() { /* ... */ } diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp index a61c666ac9682..528363bef815f 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp @@ -3,7 +3,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: "\"\";Foo;Bar"}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "T"}, \ -// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ // RUN: ]}' -- void ignoredUnnamed(int I, int, int) {} // NO-WARN: No >= 2 length of non-unnamed. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp new file mode 100644 index 0000000000000..42dbe9101126f --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp @@ -0,0 +1,15 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 1}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1} \ +// RUN: ]}' -- + +void numericAndQualifierConversion(int I, const double CD) { numericAndQualifierConversion(CD, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: 2 adjacent parameters of 'numericAndQualifierConversion' of convertible types are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:40: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:56: note: the last parameter in the range is 'CD' +// CHECK-MESSAGES: :[[@LINE-4]]:43: note: 'int' and 'const double' parameters accept and bind the same kind of values +// CHECK-MESSAGES: :[[@LINE-5]]:43: note: 'int' and 'const double' may be implicitly converted: 'int' -> 'const double' (as 'double'), 'const double' (as 'double') -> 'int' diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c new file mode 100644 index 0000000000000..48a3e10d3e316 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c @@ -0,0 +1,75 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1} \ +// RUN: ]}' -- + +void implicitDoesntBreakOtherStuff(int A, int B) {} +// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: 2 adjacent parameters of 'implicitDoesntBreakOtherStuff' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:40: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-3]]:47: note: the last parameter in the range is 'B' + +void arrayAndPtr1(int *IP, int IA[]) { arrayAndPtr1(IA, IP); } +// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 2 adjacent parameters of 'arrayAndPtr1' of similar type ('int *') +// CHECK-MESSAGES: :[[@LINE-2]]:24: note: the first parameter in the range is 'IP' +// CHECK-MESSAGES: :[[@LINE-3]]:32: note: the last parameter in the range is 'IA' + +void arrayAndPtr2(int *IP, int IA[8]) { arrayAndPtr2(IA, IP); } +// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 2 adjacent parameters of 'arrayAndPtr2' of similar type ('int *') +// CHECK-MESSAGES: :[[@LINE-2]]:24: note: the first parameter in the range is 'IP' +// CHECK-MESSAGES: :[[@LINE-3]]:32: note: the last parameter in the range is 'IA' + +void arrayAndElement(int I, int IA[]) {} // NO-WARN. + +void numericConversion1(int I, double D) { numericConversion1(D, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion1' of convertible types are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:29: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:39: note: the last parameter in the range is 'D' +// CHECK-MESSAGES: :[[@LINE-4]]:32: note: 'int' and 'double' may be implicitly converted{{$}} + +void numericConversion2(int I, short S) { numericConversion2(S, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion2' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:29: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:38: note: the last parameter in the range is 'S' +// CHECK-MESSAGES: :[[@LINE-4]]:32: note: 'int' and 'short' may be implicitly converted{{$}} + +void numericConversion3(float F, unsigned long UL) { numericConversion3(UL, F); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion3' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:31: note: the first parameter in the range is 'F' +// CHECK-MESSAGES: :[[@LINE-3]]:48: note: the last parameter in the range is 'UL' +// CHECK-MESSAGES: :[[@LINE-4]]:34: note: 'float' and 'unsigned long' may be implicitly converted{{$}} + +enum Unscoped { U_A, + U_B }; +enum UnscopedFixed : char { UF_A, + UF_B }; + +void numericConversion4(int I, enum Unscoped U) { numericConversion4(U, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion4' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:29: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:46: note: the last parameter in the range is 'U' +// CHECK-MESSAGES: :[[@LINE-4]]:32: note: 'int' and 'enum Unscoped' may be implicitly converted{{$}} + +void numericConversion5(int I, enum UnscopedFixed UF) { numericConversion5(UF, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion5' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:29: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:51: note: the last parameter in the range is 'UF' +// CHECK-MESSAGES: :[[@LINE-4]]:32: note: 'int' and 'enum UnscopedFixed' may be implicitly converted{{$}} + +void numericConversion7(double D, enum Unscoped U) { numericConversion7(U, D); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion7' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'D' +// CHECK-MESSAGES: :[[@LINE-3]]:49: note: the last parameter in the range is 'U' +// CHECK-MESSAGES: :[[@LINE-4]]:35: note: 'double' and 'enum Unscoped' may be implicitly converted{{$}} + +void numericConversion8(double D, enum UnscopedFixed UF) { numericConversion8(UF, D); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion8' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'D' +// CHECK-MESSAGES: :[[@LINE-3]]:54: note: the last parameter in the range is 'UF' +// CHECK-MESSAGES: :[[@LINE-4]]:35: note: 'double' and 'enum UnscopedFixed' may be implicitly converted{{$}} + +void pointeeConverison(int *IP, double *DP) { pointeeConversion(DP, IP); } +// NO-WARN: Even though this is possible in C, a swap is diagnosed by the compiler. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp new file mode 100644 index 0000000000000..7205d87a41e78 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp @@ -0,0 +1,303 @@ +// RUN: %check_clang_tidy -std=c++17 %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1} \ +// RUN: ]}' -- + +void implicitDoesntBreakOtherStuff(int A, int B) {} +// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: 2 adjacent parameters of 'implicitDoesntBreakOtherStuff' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:40: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-3]]:47: note: the last parameter in the range is 'B' + +void arrayAndPtr1(int *IP, int IA[]) { arrayAndPtr1(IA, IP); } +// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 2 adjacent parameters of 'arrayAndPtr1' of similar type ('int *') +// CHECK-MESSAGES: :[[@LINE-2]]:24: note: the first parameter in the range is 'IP' +// CHECK-MESSAGES: :[[@LINE-3]]:32: note: the last parameter in the range is 'IA' + +void arrayAndPtr2(int *IP, int IA[8]) { arrayAndPtr2(IA, IP); } +// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: 2 adjacent parameters of 'arrayAndPtr2' of similar type ('int *') +// CHECK-MESSAGES: :[[@LINE-2]]:24: note: the first parameter in the range is 'IP' +// CHECK-MESSAGES: :[[@LINE-3]]:32: note: the last parameter in the range is 'IA' + +void arrayAndElement(int I, int IA[]) {} // NO-WARN. + +void numericConversion1(int I, double D) { numericConversion1(D, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion1' of convertible types are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:29: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:39: note: the last parameter in the range is 'D' +// CHECK-MESSAGES: :[[@LINE-4]]:32: note: 'int' and 'double' may be implicitly converted{{$}} + +void numericConversion2(int I, short S) { numericConversion2(S, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion2' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:29: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:38: note: the last parameter in the range is 'S' +// CHECK-MESSAGES: :[[@LINE-4]]:32: note: 'int' and 'short' may be implicitly converted{{$}} + +void numericConversion3(float F, unsigned long long ULL) { numericConversion3(ULL, F); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'numericConversion3' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:31: note: the first parameter in the range is 'F' +// CHECK-MESSAGES: :[[@LINE-3]]:53: note: the last parameter in the range is 'ULL' +// CHECK-MESSAGES: :[[@LINE-4]]:34: note: 'float' and 'unsigned long long' may be implicitly converted{{$}} + +enum Unscoped { U_A, + U_B }; +enum UnscopedFixed : char { UF_A, + UF_B }; +enum struct Scoped { A, + B }; + +void numericConversion4(int I, Unscoped U) {} // NO-WARN. + +void numericConversion5(int I, UnscopedFixed UF) {} // NO-WARN. + +void numericConversion6(int I, Scoped S) {} // NO-WARN. + +void numericConversion7(double D, Unscoped U) {} // NO-WARN. + +void numericConversion8(double D, UnscopedFixed UF) {} // NO-WARN. + +void numericConversion9(double D, Scoped S) {} // NO-WARN. + +void numericConversionMultiUnique(int I, double D1, double D2) {} +// CHECK-MESSAGES: :[[@LINE-1]]:35: warning: 3 adjacent parameters of 'numericConversionMultiUnique' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:39: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:60: note: the last parameter in the range is 'D2' +// CHECK-MESSAGES: :[[@LINE-4]]:42: note: 'int' and 'double' may be implicitly converted{{$}} +// (Note: int<->double conversion for I<->D2 not diagnosed again.) + +typedef int MyInt; +using MyDouble = double; + +void numericConversion10(MyInt MI, MyDouble MD) { numericConversion10(MD, MI); } +// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: 2 adjacent parameters of 'numericConversion10' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'MI' +// CHECK-MESSAGES: :[[@LINE-3]]:45: note: the last parameter in the range is 'MD' +// CHECK-MESSAGES: :[[@LINE-4]]:36: note: 'MyInt' and 'MyDouble' may be implicitly converted: 'MyInt' (as 'int') -> 'MyDouble' (as 'double'), 'MyDouble' (as 'double') -> 'MyInt' (as 'int') + +void numericAndQualifierConversion(int I, const double CD) { numericAndQualifierConversion(CD, I); } +// NO-WARN: Qualifier mixing is handled by a different check option. + +struct FromInt { + FromInt(int); +}; + +void oneWayConversion1(int I, FromInt FI) {} // NO-WARN: One-way. + +struct AmbiguousConvCtor { + AmbiguousConvCtor(int); + AmbiguousConvCtor(double); +}; + +void ambiguous1(long L, AmbiguousConvCtor ACC) {} // NO-WARN: Ambiguous, one-way. + +struct ToInt { + operator int() const; +}; + +void oneWayConversion2(ToInt TI, int I) {} // NO-WARN: One-way. + +struct AmbiguousConvOp { + operator int() const; + operator double() const; +}; + +void ambiguous2(AmbiguousConvOp ACO, long L) {} // NO-WARN: Ambiguous, one-way. + +struct AmbiguousEverything1; +struct AmbiguousEverything2; +struct AmbiguousEverything1 { + AmbiguousEverything1(); + AmbiguousEverything1(AmbiguousEverything2); + operator AmbiguousEverything2() const; +}; +struct AmbiguousEverything2 { + AmbiguousEverything2(); + AmbiguousEverything2(AmbiguousEverything1); + operator AmbiguousEverything1() const; +}; + +void ambiguous3(AmbiguousEverything1 AE1, AmbiguousEverything2 AE2) {} // NO-WARN: Ambiguous. + +struct Integer { + Integer(int); + operator int() const; +}; + +void userDefinedConversion1(int I1, Integer I2) { userDefinedConversion1(I2, I1); } +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: 2 adjacent parameters of 'userDefinedConversion1' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:33: note: the first parameter in the range is 'I1' +// CHECK-MESSAGES: :[[@LINE-3]]:45: note: the last parameter in the range is 'I2' +// CHECK-MESSAGES: :[[@LINE-4]]:37: note: 'int' and 'Integer' may be implicitly converted{{$}} +// CHECK-MESSAGES: :[[@LINE-9]]:3: note: the implicit conversion involves the converting constructor declared here +// CHECK-MESSAGES: :[[@LINE-9]]:3: note: the implicit conversion involves the conversion operator declared here + +struct Ambiguous { + Ambiguous(int); + Ambiguous(double); + operator long() const; + operator float() const; +}; + +void ambiguous3(char C, Ambiguous A) {} // NO-WARN: Ambiguous. + +struct CDouble { + CDouble(const double &); + operator const double &() const; +}; + +void userDefinedConversion2(double D, CDouble CD) { userDefinedConversion2(CD, D); } +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: 2 adjacent parameters of 'userDefinedConversion2' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'D' +// CHECK-MESSAGES: :[[@LINE-3]]:47: note: the last parameter in the range is 'CD' +// CHECK-MESSAGES: :[[@LINE-4]]:39: note: 'double' and 'CDouble' may be implicitly converted: 'double' -> 'const double &' -> 'CDouble', 'CDouble' -> 'const double &' -> 'double' +// CHECK-MESSAGES: :[[@LINE-9]]:3: note: the implicit conversion involves the converting constructor declared here +// CHECK-MESSAGES: :[[@LINE-9]]:3: note: the implicit conversion involves the conversion operator declared here + +void userDefinedConversion3(int I, CDouble CD) { userDefinedConversion3(CD, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: 2 adjacent parameters of 'userDefinedConversion3' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:33: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:44: note: the last parameter in the range is 'CD' +// CHECK-MESSAGES: :[[@LINE-4]]:36: note: 'int' and 'CDouble' may be implicitly converted: 'int' -> 'double' -> 'const double &' -> 'CDouble', 'CDouble' -> 'const double &' -> 'int' +// CHECK-MESSAGES: :[[@LINE-17]]:3: note: the implicit conversion involves the converting constructor declared here +// CHECK-MESSAGES: :[[@LINE-17]]:3: note: the implicit conversion involves the conversion operator declared here + +struct TDInt { + TDInt(const MyInt &); + operator MyInt() const; +}; + +void userDefinedConversion4(int I, TDInt TDI) { userDefinedConversion4(TDI, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: 2 adjacent parameters of 'userDefinedConversion4' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:33: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:42: note: the last parameter in the range is 'TDI' +// CHECK-MESSAGES: :[[@LINE-4]]:36: note: 'int' and 'TDInt' may be implicitly converted: 'int' -> 'const MyInt &' -> 'TDInt', 'TDInt' -> 'MyInt' -> 'int' +// CHECK-MESSAGES: :[[@LINE-9]]:3: note: the implicit conversion involves the converting constructor declared here +// CHECK-MESSAGES: :[[@LINE-9]]:3: note: the implicit conversion involves the conversion operator declared here + +struct TDIntDouble { + TDIntDouble(const MyInt &); + TDIntDouble(const MyDouble &); + operator MyInt() const; + operator MyDouble() const; +}; + +void userDefinedConversion5(int I, TDIntDouble TDID) { userDefinedConversion5(TDID, I); } +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: 2 adjacent parameters of 'userDefinedConversion5' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:33: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-3]]:48: note: the last parameter in the range is 'TDID' +// CHECK-MESSAGES: :[[@LINE-4]]:36: note: 'int' and 'TDIntDouble' may be implicitly converted: 'int' -> 'const MyInt &' -> 'TDIntDouble', 'TDIntDouble' -> 'MyInt' -> 'int' +// CHECK-MESSAGES: :[[@LINE-11]]:3: note: the implicit conversion involves the converting constructor declared here +// CHECK-MESSAGES: :[[@LINE-10]]:3: note: the implicit conversion involves the conversion operator declared here + +void userDefinedConversion6(double D, TDIntDouble TDID) { userDefinedConversion6(TDID, D); } +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: 2 adjacent parameters of 'userDefinedConversion6' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'D' +// CHECK-MESSAGES: :[[@LINE-3]]:51: note: the last parameter in the range is 'TDID' +// CHECK-MESSAGES: :[[@LINE-4]]:39: note: 'double' and 'TDIntDouble' may be implicitly converted: 'double' -> 'const MyDouble &' -> 'TDIntDouble', 'TDIntDouble' -> 'MyDouble' -> 'double' +// CHECK-MESSAGES: :[[@LINE-18]]:3: note: the implicit conversion involves the converting constructor declared here +// CHECK-MESSAGES: :[[@LINE-17]]:3: note: the implicit conversion involves the conversion operator declared here + +void userDefinedConversion7(char C, TDIntDouble TDID) {} // NO-WARN: Ambiguous. + +struct Forward1; +struct Forward2; + +void incomplete(Forward1 *F1, Forward2 *F2) {} // NO-WARN: Do not compare incomplete types. + +void pointeeConverison(int *IP, double *DP) {} // NO-WARN. + +void pointerConversion1(void *VP, int *IP) {} // NO-WARN: One-way. + +struct PointerBox { + PointerBox(void *); + operator int *() const; +}; + +void pointerConversion2(PointerBox PB, int *IP) { pointerConversion2(IP, PB); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'pointerConversion2' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:36: note: the first parameter in the range is 'PB' +// CHECK-MESSAGES: :[[@LINE-3]]:45: note: the last parameter in the range is 'IP' +// CHECK-MESSAGES: :[[@LINE-4]]:40: note: 'PointerBox' and 'int *' may be implicitly converted: 'PointerBox' -> 'int *', 'int *' -> 'void *' -> 'PointerBox' +// CHECK-MESSAGES: :[[@LINE-8]]:3: note: the implicit conversion involves the conversion operator declared here +// CHECK-MESSAGES: :[[@LINE-10]]:3: note: the implicit conversion involves the converting constructor declared here + +void pointerConversion3(PointerBox PB, double *DP) {} // NO-WARN: Not convertible. + +struct Base {}; +struct Derived : Base {}; + +void pointerConversion4(Base *BP, Derived *DP) {} // NO-WARN: One-way. + +struct BaseAndDerivedInverter { + BaseAndDerivedInverter(Base); // Takes a Base + operator Derived() const; // and becomes a Derived. +}; + +void pointerConversion5(BaseAndDerivedInverter BADI, Derived D) { pointerConversion5(D, BADI); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'pointerConversion5' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:48: note: the first parameter in the range is 'BADI' +// CHECK-MESSAGES: :[[@LINE-3]]:62: note: the last parameter in the range is 'D' +// CHECK-MESSAGES: :[[@LINE-4]]:54: note: 'BaseAndDerivedInverter' and 'Derived' may be implicitly converted: 'BaseAndDerivedInverter' -> 'Derived', 'Derived' -> 'Base' -> 'BaseAndDerivedInverter' +// CHECK-MESSAGES: :[[@LINE-8]]:3: note: the implicit conversion involves the conversion operator declared here +// CHECK-MESSAGES: :[[@LINE-10]]:3: note: the implicit conversion involves the converting constructor declared here + +void pointerConversion6(void (*NTF)() noexcept, void (*TF)()) {} +// NO-WARN: This call cannot be swapped, even if "getCanonicalType()" believes otherwise. + +using NonThrowingFunction = void (*)() noexcept; + +struct NoexceptMaker { + NoexceptMaker(void (*ThrowingFunction)()); + // Need to use a typedef here because + // "conversion function cannot convert to a function type". + // operator (void (*)() noexcept) () const; + operator NonThrowingFunction() const; +}; + +void pointerConversion7(void (*NTF)() noexcept, NoexceptMaker NM) { pointerConversion7(NM, NTF); } +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: 2 adjacent parameters of 'pointerConversion7' of convertible types +// CHECK-MESSAGES: :[[@LINE-2]]:32: note: the first parameter in the range is 'NTF' +// CHECK-MESSAGES: :[[@LINE-3]]:63: note: the last parameter in the range is 'NM' +// CHECK-MESSAGES: :[[@LINE-4]]:49: note: 'void (*)() noexcept' and 'NoexceptMaker' may be implicitly converted: 'void (*)() noexcept' -> 'void (*)()' -> 'NoexceptMaker', 'NoexceptMaker' -> 'NonThrowingFunction' -> 'void (*)() noexcept' +// CHECK-MESSAGES: :[[@LINE-12]]:3: note: the implicit conversion involves the converting constructor declared here +// CHECK-MESSAGES: :[[@LINE-9]]:3: note: the implicit conversion involves the conversion operator declared here + +struct ToType; +struct MiddleStep1 { + operator ToType() const; +}; +struct FromType { + operator MiddleStep1() const; +}; +struct MiddleStep2 { + operator FromType() const; +}; +struct ToType { + operator MiddleStep2() const; +}; + +void f(FromType F, ToType T) { // NO-WARN: The path takes two steps. + MiddleStep2 MS2 = T; + FromType F2 = MS2; + + MiddleStep1 MS1 = F; + ToType T2 = MS1; + + f(F2, T2); +} + +// Synthesised example from OpenCV. +template +struct TemplateConversion { + template + operator TemplateConversion() const; +}; +using IntConverter = TemplateConversion; +using FloatConverter = TemplateConversion; + +void templateConversion(IntConverter IC, FloatConverter FC) { templateConversion(FC, IC); } +// Note: even though this swap is possible, we do not model things when it comes to "template magic". +// But at least the check should not crash! diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp index f0c1c57e25848..3bcdee2e49796 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp @@ -3,7 +3,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ -// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ // RUN: ]}' -- namespace std { @@ -340,3 +341,6 @@ void memberTypedefDependentReference3( // CHECK-MESSAGES: :[[@LINE-3]]:38: note: the first parameter in the range is 'E' // CHECK-MESSAGES: :[[@LINE-3]]:45: note: the last parameter in the range is 'R' // CHECK-MESSAGES: :[[@LINE-4]]:5: note: 'typename Vector::element_type' and 'const typename Vector::element_type &' parameters accept and bind the same kind of values + +void functionPrototypeLosesNoexcept(void (*NonThrowing)() noexcept, void (*Throwing)()) {} +// NO-WARN: This call cannot be swapped, even if "getCanonicalType()" believes otherwise. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp index c833077285be9..4b1f086aba948 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp @@ -3,7 +3,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 3}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ -// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ // RUN: ]}' -- int add(int Left, int Right) { return Left + Right; } // NO-WARN: Only 2 parameters. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp index 7b9fdceda7465..02d41661802d2 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp @@ -3,7 +3,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ -// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 1} \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 1}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ // RUN: ]}' -- typedef int MyInt1; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c index 591a5cb353ee8..06a3993472ae6 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c @@ -3,7 +3,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "bool;MyBool;struct U;MAKE_LOGICAL_TYPE(int)"}, \ -// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ // RUN: ]}' -- -x c #define bool _Bool From b9ece034611239d008ac11d8bb9b3af91313c41f Mon Sep 17 00:00:00 2001 From: Whisperity Date: Wed, 8 Apr 2020 16:33:15 +0200 Subject: [PATCH 016/619] [clang-tidy] Suppress reports to similarly used parameters in 'bugprone-easily-swappable-parameters' There are several types of functions and various reasons why some "swappable parameters" cannot be fixed with changing the parameters' types, etc. The most common example might be int `min(int a, int b)`... no matter what you do, the two parameters must remain the same type. The **filtering heuristic** implemented in this patch deals with trying to find such functions during the modelling and building of the swappable parameter range. If the parameter currently scrutinised matches either of the predicates below, it will be regarded as **not swappable** even if the type of the parameter matches. Reviewed By: aaron.ballman Differential Revision: http://reviews.llvm.org/D78652 --- .../EasilySwappableParametersCheck.cpp | 306 +++++++++++++++++- .../bugprone/EasilySwappableParametersCheck.h | 4 + .../bugprone-easily-swappable-parameters.rst | 28 ++ ...one-easily-swappable-parameters-ignore.cpp | 3 +- ...appable-parameters-implicit-qualifiers.cpp | 3 +- ...ne-easily-swappable-parameters-implicits.c | 3 +- ...-easily-swappable-parameters-implicits.cpp | 3 +- ...prone-easily-swappable-parameters-len2.cpp | 3 +- ...prone-easily-swappable-parameters-len3.cpp | 3 +- ...y-swappable-parameters-qualifiermixing.cpp | 3 +- ...-easily-swappable-parameters-relatedness.c | 30 ++ ...asily-swappable-parameters-relatedness.cpp | 231 +++++++++++++ .../bugprone-easily-swappable-parameters.c | 3 +- 13 files changed, 609 insertions(+), 14 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.c create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index c4896979d2e99..247953c25a3b8 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -9,6 +9,7 @@ #include "EasilySwappableParametersCheck.h" #include "../utils/OptionsUtils.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/SmallSet.h" @@ -65,6 +66,10 @@ static constexpr bool DefaultQualifiersMix = false; /// The default value for the ModelImplicitConversions check option. static constexpr bool DefaultModelImplicitConversions = true; +/// The default value for suppressing diagnostics about parameters that are +/// used together. +static constexpr bool DefaultSuppressParametersUsedTogether = true; + using namespace clang::ast_matchers; namespace clang { @@ -74,7 +79,12 @@ namespace bugprone { using TheCheck = EasilySwappableParametersCheck; namespace filter { +class SimilarlyUsedParameterPairSuppressor; + static bool isIgnoredParameter(const TheCheck &Check, const ParmVarDecl *Node); +static inline bool +isSimilarlyUsedParameter(const SimilarlyUsedParameterPairSuppressor &Suppressor, + const ParmVarDecl *Param1, const ParmVarDecl *Param2); } // namespace filter namespace model { @@ -1269,9 +1279,9 @@ approximateImplicitConversion(const TheCheck &Check, QualType LType, return {MixFlags::None}; } -static MixableParameterRange modelMixingRange(const TheCheck &Check, - const FunctionDecl *FD, - std::size_t StartIndex) { +static MixableParameterRange modelMixingRange( + const TheCheck &Check, const FunctionDecl *FD, std::size_t StartIndex, + const filter::SimilarlyUsedParameterPairSuppressor &UsageBasedSuppressor) { std::size_t NumParams = FD->getNumParams(); assert(StartIndex < NumParams && "out of bounds for start"); const ASTContext &Ctx = FD->getASTContext(); @@ -1297,6 +1307,19 @@ static MixableParameterRange modelMixingRange(const TheCheck &Check, LLVM_DEBUG(llvm::dbgs() << "Check mix of #" << J << " against #" << I << "...\n"); + if (isSimilarlyUsedParameter(UsageBasedSuppressor, Ith, Jth)) { + // Consider the two similarly used parameters to not be possible in a + // mix-up at the user's request, if they enabled this heuristic. + LLVM_DEBUG(llvm::dbgs() << "Parameters #" << I << " and #" << J + << " deemed related, ignoring...\n"); + + // If the parameter #I and #J mixes, then I is mixable with something + // in the current range, so the range has to be broken and I not + // included. + MixesOfIth.clear(); + break; + } + Mix M{Jth, Ith, calculateMixability(Check, Jth->getType(), Ith->getType(), Ctx, Check.ModelImplicitConversions ? ICMM_All @@ -1331,6 +1354,12 @@ static MixableParameterRange modelMixingRange(const TheCheck &Check, } // namespace model +/// Matches DeclRefExprs and their ignorable wrappers to ParmVarDecls. +AST_MATCHER_FUNCTION(ast_matchers::internal::Matcher, paramRefExpr) { + return expr(ignoringParenImpCasts(ignoringElidableConstructorCall( + declRefExpr(to(parmVarDecl().bind("param")))))); +} + namespace filter { /// Returns whether the parameter's name or the parameter's type's name is @@ -1391,6 +1420,261 @@ static bool isIgnoredParameter(const TheCheck &Check, const ParmVarDecl *Node) { return false; } +/// This namespace contains the implementations for the suppression of +/// diagnostics from similaly used ("related") parameters. +namespace relatedness_heuristic { + +static constexpr std::size_t SmallDataStructureSize = 4; + +template +using ParamToSmallSetMap = + llvm::DenseMap>; + +/// Returns whether the sets mapped to the two elements in the map have at +/// least one element in common. +template +bool lazyMapOfSetsIntersectionExists(const MapTy &Map, const ElemTy &E1, + const ElemTy &E2) { + auto E1Iterator = Map.find(E1); + auto E2Iterator = Map.find(E2); + if (E1Iterator == Map.end() || E2Iterator == Map.end()) + return false; + + for (const auto &E1SetElem : E1Iterator->second) + if (llvm::find(E2Iterator->second, E1SetElem) != E2Iterator->second.end()) + return true; + + return false; +} + +/// Implements the heuristic that marks two parameters related if there is +/// a usage for both in the same strict expression subtree. A strict +/// expression subtree is a tree which only includes Expr nodes, i.e. no +/// Stmts and no Decls. +class AppearsInSameExpr : public RecursiveASTVisitor { + using Base = RecursiveASTVisitor; + + const FunctionDecl *FD; + const Expr *CurrentExprOnlyTreeRoot = nullptr; + llvm::DenseMap> + ParentExprsForParamRefs; + +public: + void setup(const FunctionDecl *FD) { + this->FD = FD; + TraverseFunctionDecl(const_cast(FD)); + } + + bool operator()(const ParmVarDecl *Param1, const ParmVarDecl *Param2) const { + return lazyMapOfSetsIntersectionExists(ParentExprsForParamRefs, Param1, + Param2); + } + + bool TraverseDecl(Decl *D) { + CurrentExprOnlyTreeRoot = nullptr; + return Base::TraverseDecl(D); + } + + bool TraverseStmt(Stmt *S, DataRecursionQueue *Queue = nullptr) { + if (auto *E = dyn_cast_or_null(S)) { + bool RootSetInCurrentStackFrame = false; + if (!CurrentExprOnlyTreeRoot) { + CurrentExprOnlyTreeRoot = E; + RootSetInCurrentStackFrame = true; + } + + bool Ret = Base::TraverseStmt(S); + + if (RootSetInCurrentStackFrame) + CurrentExprOnlyTreeRoot = nullptr; + + return Ret; + } + + // A Stmt breaks the strictly Expr subtree. + CurrentExprOnlyTreeRoot = nullptr; + return Base::TraverseStmt(S); + } + + bool VisitDeclRefExpr(DeclRefExpr *DRE) { + if (!CurrentExprOnlyTreeRoot) + return true; + + if (auto *PVD = dyn_cast(DRE->getDecl())) + if (llvm::find(FD->parameters(), PVD)) + ParentExprsForParamRefs[PVD].insert(CurrentExprOnlyTreeRoot); + + return true; + } +}; + +/// Implements the heuristic that marks two parameters related if there are +/// two separate calls to the same function (overload) and the parameters are +/// passed to the same index in both calls, i.e f(a, b) and f(a, c) passes +/// b and c to the same index (2) of f(), marking them related. +class PassedToSameFunction { + ParamToSmallSetMap> TargetParams; + +public: + void setup(const FunctionDecl *FD) { + auto ParamsAsArgsInFnCalls = + match(functionDecl(forEachDescendant( + callExpr(forEachArgumentWithParam( + paramRefExpr(), parmVarDecl().bind("passed-to"))) + .bind("call-expr"))), + *FD, FD->getASTContext()); + for (const auto &Match : ParamsAsArgsInFnCalls) { + const auto *PassedParamOfThisFn = Match.getNodeAs("param"); + const auto *CE = Match.getNodeAs("call-expr"); + const auto *PassedToParam = Match.getNodeAs("passed-to"); + assert(PassedParamOfThisFn && CE && PassedToParam); + + const FunctionDecl *CalledFn = CE->getDirectCallee(); + if (!CalledFn) + continue; + + llvm::Optional TargetIdx; + unsigned NumFnParams = CalledFn->getNumParams(); + for (unsigned Idx = 0; Idx < NumFnParams; ++Idx) + if (CalledFn->getParamDecl(Idx) == PassedToParam) + TargetIdx.emplace(Idx); + + assert(TargetIdx.hasValue() && "Matched, but didn't find index?"); + TargetParams[PassedParamOfThisFn].insert( + {CalledFn->getCanonicalDecl(), *TargetIdx}); + } + } + + bool operator()(const ParmVarDecl *Param1, const ParmVarDecl *Param2) const { + return lazyMapOfSetsIntersectionExists(TargetParams, Param1, Param2); + } +}; + +/// Implements the heuristic that marks two parameters related if the same +/// member is accessed (referred to) inside the current function's body. +class AccessedSameMemberOf { + ParamToSmallSetMap AccessedMembers; + +public: + void setup(const FunctionDecl *FD) { + auto MembersCalledOnParams = match( + functionDecl(forEachDescendant( + memberExpr(hasObjectExpression(paramRefExpr())).bind("mem-expr"))), + *FD, FD->getASTContext()); + + for (const auto &Match : MembersCalledOnParams) { + const auto *AccessedParam = Match.getNodeAs("param"); + const auto *ME = Match.getNodeAs("mem-expr"); + assert(AccessedParam && ME); + AccessedMembers[AccessedParam].insert( + ME->getMemberDecl()->getCanonicalDecl()); + } + } + + bool operator()(const ParmVarDecl *Param1, const ParmVarDecl *Param2) const { + return lazyMapOfSetsIntersectionExists(AccessedMembers, Param1, Param2); + } +}; + +/// Implements the heuristic that marks two parameters related if different +/// ReturnStmts return them from the function. +class Returned { + llvm::SmallVector ReturnedParams; + +public: + void setup(const FunctionDecl *FD) { + // TODO: Handle co_return. + auto ParamReturns = match(functionDecl(forEachDescendant( + returnStmt(hasReturnValue(paramRefExpr())))), + *FD, FD->getASTContext()); + for (const auto &Match : ParamReturns) { + const auto *ReturnedParam = Match.getNodeAs("param"); + assert(ReturnedParam); + + if (find(FD->parameters(), ReturnedParam) == FD->param_end()) + // Inside the subtree of a FunctionDecl there might be ReturnStmts of + // a parameter that isn't the parameter of the function, e.g. in the + // case of lambdas. + continue; + + ReturnedParams.emplace_back(ReturnedParam); + } + } + + bool operator()(const ParmVarDecl *Param1, const ParmVarDecl *Param2) const { + return llvm::find(ReturnedParams, Param1) != ReturnedParams.end() && + llvm::find(ReturnedParams, Param2) != ReturnedParams.end(); + } +}; + +} // namespace relatedness_heuristic + +/// Helper class that is used to detect if two parameters of the same function +/// are used in a similar fashion, to suppress the result. +class SimilarlyUsedParameterPairSuppressor { + const bool Enabled; + relatedness_heuristic::AppearsInSameExpr SameExpr; + relatedness_heuristic::PassedToSameFunction PassToFun; + relatedness_heuristic::AccessedSameMemberOf SameMember; + relatedness_heuristic::Returned Returns; + +public: + SimilarlyUsedParameterPairSuppressor(const FunctionDecl *FD, bool Enable) + : Enabled(Enable) { + if (!Enable) + return; + + SameExpr.setup(FD); + PassToFun.setup(FD); + SameMember.setup(FD); + Returns.setup(FD); + } + + /// Returns whether the specified two parameters are deemed similarly used + /// or related by the heuristics. + bool operator()(const ParmVarDecl *Param1, const ParmVarDecl *Param2) const { + if (!Enabled) + return false; + + LLVM_DEBUG(llvm::dbgs() + << "::: Matching similar usage / relatedness heuristic...\n"); + + if (SameExpr(Param1, Param2)) { + LLVM_DEBUG(llvm::dbgs() << "::: Used in the same expression.\n"); + return true; + } + + if (PassToFun(Param1, Param2)) { + LLVM_DEBUG(llvm::dbgs() + << "::: Passed to same function in different calls.\n"); + return true; + } + + if (SameMember(Param1, Param2)) { + LLVM_DEBUG(llvm::dbgs() + << "::: Same member field access or method called.\n"); + return true; + } + + if (Returns(Param1, Param2)) { + LLVM_DEBUG(llvm::dbgs() << "::: Both parameter returned.\n"); + return true; + } + + LLVM_DEBUG(llvm::dbgs() << "::: None.\n"); + return false; + } +}; + +// (This function hoists the call to operator() of the wrapper, so we do not +// need to define the previous class at the top of the file.) +static inline bool +isSimilarlyUsedParameter(const SimilarlyUsedParameterPairSuppressor &Suppressor, + const ParmVarDecl *Param1, const ParmVarDecl *Param2) { + return Suppressor(Param1, Param2); +} + } // namespace filter /// Matches functions that have at least the specified amount of parameters. @@ -1604,7 +1888,10 @@ EasilySwappableParametersCheck::EasilySwappableParametersCheck( DefaultIgnoredParameterTypeSuffixes))), QualifiersMix(Options.get("QualifiersMix", DefaultQualifiersMix)), ModelImplicitConversions(Options.get("ModelImplicitConversions", - DefaultModelImplicitConversions)) {} + DefaultModelImplicitConversions)), + SuppressParametersUsedTogether( + Options.get("SuppressParametersUsedTogether", + DefaultSuppressParametersUsedTogether)) {} void EasilySwappableParametersCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { @@ -1615,6 +1902,8 @@ void EasilySwappableParametersCheck::storeOptions( optutils::serializeStringList(IgnoredParameterTypeSuffixes)); Options.store(Opts, "QualifiersMix", QualifiersMix); Options.store(Opts, "ModelImplicitConversions", ModelImplicitConversions); + Options.store(Opts, "SuppressParametersUsedTogether", + SuppressParametersUsedTogether); } void EasilySwappableParametersCheck::registerMatchers(MatchFinder *Finder) { @@ -1647,6 +1936,11 @@ void EasilySwappableParametersCheck::check( std::size_t NumParams = FD->getNumParams(); std::size_t MixableRangeStartIndex = 0; + // Spawn one suppressor and if the user requested, gather information from + // the AST for the parameters' usages. + filter::SimilarlyUsedParameterPairSuppressor UsageBasedSuppressor{ + FD, SuppressParametersUsedTogether}; + LLVM_DEBUG(llvm::dbgs() << "Begin analysis of " << getName(FD) << " with " << NumParams << " parameters...\n"); while (MixableRangeStartIndex < NumParams) { @@ -1657,8 +1951,8 @@ void EasilySwappableParametersCheck::check( continue; } - MixableParameterRange R = - modelMixingRange(*this, FD, MixableRangeStartIndex); + MixableParameterRange R = modelMixingRange( + *this, FD, MixableRangeStartIndex, UsageBasedSuppressor); assert(R.NumParamsChecked > 0 && "Ensure forward progress!"); MixableRangeStartIndex += R.NumParamsChecked; if (R.NumParamsChecked < MinimumLength) { diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h index b072c46680452..22e36ffa91cc1 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h @@ -47,6 +47,10 @@ class EasilySwappableParametersCheck : public ClangTidyCheck { /// during analysis and consider types that are implicitly convertible to /// one another mixable. const bool ModelImplicitConversions; + + /// If enabled, diagnostics for parameters that are used together in a + /// similar way are not emitted. + const bool SuppressParametersUsedTogether; }; } // namespace bugprone diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst index 5158394e4e1fe..5a61de8f2d770 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst @@ -140,6 +140,34 @@ noisiness. `ConstIterator`, `const_reverse_iterator`, `ConstReverseIterator`. In addition, `_Bool` (but not `_bool`) is also part of the default value. +.. option:: SuppressParametersUsedTogether + + Suppresses diagnostics about parameters that are used together or in a + similar fashion inside the function's body. + Defaults to `true`. + Specifying `false` will turn off the heuristics. + + Currently, the following heuristics are implemented which will suppress the + warning about the parameter pair involved: + + * The parameters are used in the same expression, e.g. ``f(a, b)`` or + ``a < b``. + * The parameters are further passed to the same function to the same + parameter of that function, of the same overload. + E.g. ``f(a, 1)`` and ``f(b, 2)`` to some ``f(T, int)``. + + .. note:: + + The check does not perform path-sensitive analysis, and as such, + "same function" in this context means the same function declaration. + If the same member function of a type on two distinct instances are + called with the parameters, it will still be regarded as + "same function". + + * The same member field is accessed, or member method is called of the + two parameters, e.g. ``a.foo()`` and ``b.foo()``. + * Separate ``return`` statements return either of the parameters on + different code paths. Limitations ----------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp index 528363bef815f..14d6c05a20ef7 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp @@ -4,7 +4,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: "\"\";Foo;Bar"}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "T"}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ // RUN: ]}' -- void ignoredUnnamed(int I, int, int) {} // NO-WARN: No >= 2 length of non-unnamed. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp index 42dbe9101126f..93290e51ef689 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp @@ -4,7 +4,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 1}, \ -// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1} \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ // RUN: ]}' -- void numericAndQualifierConversion(int I, const double CD) { numericAndQualifierConversion(CD, I); } diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c index 48a3e10d3e316..92a70f44e912c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c @@ -4,7 +4,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1} \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ // RUN: ]}' -- void implicitDoesntBreakOtherStuff(int A, int B) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp index 7205d87a41e78..4481b516b65b6 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp @@ -4,7 +4,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1} \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ // RUN: ]}' -- void implicitDoesntBreakOtherStuff(int A, int B) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp index 3bcdee2e49796..ceb870cda3aac 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp @@ -4,7 +4,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ // RUN: ]}' -- namespace std { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp index 4b1f086aba948..bae1cf883c3c6 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp @@ -4,7 +4,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ // RUN: ]}' -- int add(int Left, int Right) { return Left + Right; } // NO-WARN: Only 2 parameters. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp index 02d41661802d2..8b547850ae639 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp @@ -4,7 +4,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 1}, \ -// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ // RUN: ]}' -- typedef int MyInt1; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.c new file mode 100644 index 0000000000000..f1d4e15ff6c8c --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.c @@ -0,0 +1,30 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 1} \ +// RUN: ]}' -- -x c + +int myprint(); +int add(int X, int Y); + +void notRelated(int A, int B) {} +// CHECK-MESSAGES: :[[@LINE-1]]:17: warning: 2 adjacent parameters of 'notRelated' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:21: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-3]]:28: note: the last parameter in the range is 'B' + +int addedTogether(int A, int B) { return add(A, B); } // NO-WARN: Passed to same function. + +void passedToSameKNRFunction(int A, int B) { + myprint("foo", A); + myprint("bar", B); +} +// CHECK-MESSAGES: :[[@LINE-4]]:30: warning: 2 adjacent parameters of 'passedToSameKNRFunction' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-5]]:34: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-6]]:41: note: the last parameter in the range is 'B' +// This is actually a false positive: the "passed to same function" heuristic +// can't map the parameter index 1 to A and B because myprint() has no +// parameters. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.cpp new file mode 100644 index 0000000000000..e6fae124aad3f --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.cpp @@ -0,0 +1,231 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 1} \ +// RUN: ]}' -- + +namespace std { +template +T max(const T &A, const T &B); +} // namespace std + +bool coin(); +void f(int); +void g(int); +void h(int, int); +void i(int, bool); +void i(int, char); + +struct Tmp { + int f(int); + int g(int, int); +}; + +struct Int { + int I; +}; + +void compare(int Left, int Right) {} +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: 2 adjacent parameters of 'compare' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:18: note: the first parameter in the range is 'Left' +// CHECK-MESSAGES: :[[@LINE-3]]:28: note: the last parameter in the range is 'Right' + +int decideSequence(int A, int B) { + if (A) + return 1; + if (B) + return 2; + return 3; +} +// CHECK-MESSAGES: :[[@LINE-7]]:20: warning: 2 adjacent parameters of 'decideSequence' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-8]]:24: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-9]]:31: note: the last parameter in the range is 'B' + +int myMax(int A, int B) { // NO-WARN: Appears in same expression. + return A < B ? A : B; +} + +int myMax2(int A, int B) { // NO-WARN: Appears in same expression. + if (A < B) + return A; + return B; +} + +int myMax3(int A, int B) { // NO-WARN: Appears in same expression. + return std::max(A, B); +} + +int binaryToUnary(int A, int) { + return A; +} +// CHECK-MESSAGES: :[[@LINE-3]]:19: warning: 2 adjacent parameters of 'binaryToUnary' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-4]]:23: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-5]]:29: note: the last parameter in the range is '' + +int randomReturn1(int A, int B) { // NO-WARN: Appears in same expression. + return coin() ? A : B; +} + +int randomReturn2(int A, int B) { // NO-WARN: Both parameters returned. + if (coin()) + return A; + return B; +} + +int randomReturn3(int A, int B) { // NO-WARN: Both parameters returned. + bool Flip = coin(); + if (Flip) + return A; + Flip = coin(); + if (Flip) + return B; + Flip = coin(); + if (!Flip) + return 0; + return -1; +} + +void passthrough1(int A, int B) { // WARN: Different functions, different params. + f(A); + g(B); +} +// CHECK-MESSAGES: :[[@LINE-4]]:19: warning: 2 adjacent parameters of 'passthrough1' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-5]]:23: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-6]]:30: note: the last parameter in the range is 'B' + +void passthrough2(int A, int B) { // NO-WARN: Passed to same index of same function. + f(A); + f(B); +} + +void passthrough3(int A, int B) { // NO-WARN: Passed to same index of same funtion. + h(1, A); + h(1, B); +} + +void passthrough4(int A, int B) { // WARN: Different index used. + h(1, A); + h(B, 2); +} +// CHECK-MESSAGES: :[[@LINE-4]]:19: warning: 2 adjacent parameters of 'passthrough4' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-5]]:23: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-6]]:30: note: the last parameter in the range is 'B' + +void passthrough5(int A, int B) { // WARN: Different function overload. + i(A, false); + i(A, '\0'); +} +// CHECK-MESSAGES: :[[@LINE-4]]:19: warning: 2 adjacent parameters of 'passthrough5' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-5]]:23: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-6]]:30: note: the last parameter in the range is 'B' + +void passthrough6(int A, int B) { // NO-WARN: Passed to same index of same function. + Tmp Temp; + Temp.f(A); + Temp.f(B); +} + +void passthrough7(int A, int B) { // NO-WARN: Passed to same index of same function. + // Clang-Tidy isn't path sensitive, the fact that the two objects we call the + // function on is different is not modelled. + Tmp Temp1, Temp2; + Temp1.f(A); + Temp2.f(B); +} + +void passthrough8(int A, int B) { // WARN: Different functions used. + f(A); + Tmp{}.f(B); +} +// CHECK-MESSAGES: :[[@LINE-4]]:19: warning: 2 adjacent parameters of 'passthrough8' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-5]]:23: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-6]]:30: note: the last parameter in the range is 'B' + +// Test that the matching of "passed-to-function" is done to the proper node. +// Put simply, this test should not crash here. +void forwardDeclared(int X); + +void passthrough9(int A, int B) { // NO-WARN: Passed to same index of same function. + forwardDeclared(A); + forwardDeclared(B); +} + +void forwardDeclared(int X) {} + +void passthrough10(int A, int B) { // NO-WARN: Passed to same index of same function. + forwardDeclared(A); + forwardDeclared(B); +} + +bool compare1(Int I, Int J) { // NO-WARN: Same member accessed. + int Val1 = I.I; + int Val2 = J.I; + return Val1 < Val2; +} + +bool compare2(Tmp T1, Tmp T2) { // NO-WARN: Same member accessed. + int Val1 = T1.g(0, 1); + int Val2 = T2.g(2, 3); + return Val1 < Val2; +} + +bool compare3(Tmp T1, Tmp T2) { // WARN: Different member accessed. + int Val1 = T1.f(0); + int Val2 = T2.g(1, 2); + return Val1 < Val2; +} +// CHECK-MESSAGES: :[[@LINE-5]]:15: warning: 2 adjacent parameters of 'compare3' of similar type ('Tmp') +// CHECK-MESSAGES: :[[@LINE-6]]:19: note: the first parameter in the range is 'T1' +// CHECK-MESSAGES: :[[@LINE-7]]:27: note: the last parameter in the range is 'T2' + +int rangeBreaker(int I, int J, int K, int L, int M, int N) { + // (I, J) swappable. + + if (J == K) // (J, K) related. + return -1; + + if (K + 2 > Tmp{}.f(K)) + return M; + + // (K, L, M) swappable. + + return N; // (M, N) related. +} +// CHECK-MESSAGES: :[[@LINE-13]]:18: warning: 2 adjacent parameters of 'rangeBreaker' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-14]]:22: note: the first parameter in the range is 'I' +// CHECK-MESSAGES: :[[@LINE-15]]:29: note: the last parameter in the range is 'J' +// CHECK-MESSAGES: :[[@LINE-16]]:32: warning: 3 adjacent parameters of 'rangeBreaker' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-17]]:36: note: the first parameter in the range is 'K' +// CHECK-MESSAGES: :[[@LINE-18]]:50: note: the last parameter in the range is 'M' + +int returnsNotOwnParameter(int I, int J, int K) { + const auto &Lambda = [&K](int L, int M, int N) { + if (K) + return L; + return M; // (L, M) related. + }; + + if (Lambda(-1, 0, 1)) + return I; + return J; // (I, J) related. +} +// CHECK-MESSAGES: :[[@LINE-11]]:35: warning: 2 adjacent parameters of 'returnsNotOwnParameter' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-12]]:39: note: the first parameter in the range is 'J' +// CHECK-MESSAGES: :[[@LINE-13]]:46: note: the last parameter in the range is 'K' +// CHECK-MESSAGES: :[[@LINE-13]]:36: warning: 2 adjacent parameters of 'operator()' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-14]]:40: note: the first parameter in the range is 'M' +// CHECK-MESSAGES: :[[@LINE-15]]:47: note: the last parameter in the range is 'N' + +int usedTogetherInCapture(int I, int J, int K) { // NO-WARN: Used together. + const auto &Lambda = [I, J, K]() { + int A = I + 1; + int B = J - 2; + int C = K * 3; + return A + B + C; + }; + return Lambda(); +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c index 06a3993472ae6..a0b3b52188906 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c @@ -4,7 +4,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "bool;MyBool;struct U;MAKE_LOGICAL_TYPE(int)"}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ // RUN: ]}' -- -x c #define bool _Bool From 0fba450b9756a496224efd06e5ba76c9a61d3e15 Mon Sep 17 00:00:00 2001 From: Whisperity Date: Tue, 12 Jan 2021 17:03:42 +0100 Subject: [PATCH 017/619] [clang-tidy] Suppress reports to patternedly named parameters in 'bugprone-easily-swappable-parameters' While the original check's purpose is to identify potentially dangerous functions based on the parameter types (as identifier names do not mean anything when it comes to the language rules), unfortunately, such a plain interface check rule can be incredibly noisy. While the previous "filtering heuristic" is able to find many similar usages, there is an entire class of parameters that should not be warned about very easily mixed by that check: parameters that have a name and their name follows a pattern, e.g. `text1, text2, text3, ...`.` This patch implements a simple, but powerful rule, that allows us to detect such cases and ensure that no warnings are emitted for parameter sequences that follow a pattern, even if their types allow for them to be potentially mixed at a call site. Given a threshold `k`, warnings about two parameters are filtered from the result set if the names of the parameters are either prefixes or suffixes of each other, with at most k letters difference on the non-common end. (Assuming that the names themselves are at least `k` long.) - The above `text1, text2` is an example of this. (Live finding from Xerces.) - `LHS` and `RHS` are also fitting the bill here. (Live finding from... virtually any project.) - So does `Qmat, Tmat, Rmat`. (Live finding from I think OpenCV.) Reviewed By: aaron.ballman Differential Revision: http://reviews.llvm.org/D97297 --- .../EasilySwappableParametersCheck.cpp | 94 ++++++++++++++++++- .../bugprone/EasilySwappableParametersCheck.h | 6 ++ .../bugprone-easily-swappable-parameters.rst | 21 +++++ ...one-easily-swappable-parameters-ignore.cpp | 3 +- ...appable-parameters-implicit-qualifiers.cpp | 3 +- ...ne-easily-swappable-parameters-implicits.c | 3 +- ...-easily-swappable-parameters-implicits.cpp | 3 +- ...prone-easily-swappable-parameters-len2.cpp | 3 +- ...prone-easily-swappable-parameters-len3.cpp | 3 +- ...-swappable-parameters-prefixsuffixname.cpp | 56 +++++++++++ ...y-swappable-parameters-qualifiermixing.cpp | 3 +- ...-easily-swappable-parameters-relatedness.c | 3 +- ...asily-swappable-parameters-relatedness.cpp | 3 +- .../bugprone-easily-swappable-parameters.c | 3 +- 14 files changed, 194 insertions(+), 13 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-prefixsuffixname.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index 247953c25a3b8..6c84cb3e55537 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -70,6 +70,11 @@ static constexpr bool DefaultModelImplicitConversions = true; /// used together. static constexpr bool DefaultSuppressParametersUsedTogether = true; +/// The default value for the NamePrefixSuffixSilenceDissimilarityTreshold +/// check option. +static constexpr std::size_t + DefaultNamePrefixSuffixSilenceDissimilarityTreshold = 1; + using namespace clang::ast_matchers; namespace clang { @@ -85,6 +90,8 @@ static bool isIgnoredParameter(const TheCheck &Check, const ParmVarDecl *Node); static inline bool isSimilarlyUsedParameter(const SimilarlyUsedParameterPairSuppressor &Suppressor, const ParmVarDecl *Param1, const ParmVarDecl *Param2); +static bool prefixSuffixCoverUnderThreshold(std::size_t Threshold, + StringRef Str1, StringRef Str2); } // namespace filter namespace model { @@ -1292,13 +1299,25 @@ static MixableParameterRange modelMixingRange( for (std::size_t I = StartIndex + 1; I < NumParams; ++I) { const ParmVarDecl *Ith = FD->getParamDecl(I); - LLVM_DEBUG(llvm::dbgs() << "Check param #" << I << "...\n"); - + StringRef ParamName = Ith->getName(); + LLVM_DEBUG(llvm::dbgs() + << "Check param #" << I << " '" << ParamName << "'...\n"); if (filter::isIgnoredParameter(Check, Ith)) { LLVM_DEBUG(llvm::dbgs() << "Param #" << I << " is ignored. Break!\n"); break; } + StringRef PrevParamName = FD->getParamDecl(I - 1)->getName(); + if (!ParamName.empty() && !PrevParamName.empty() && + filter::prefixSuffixCoverUnderThreshold( + Check.NamePrefixSuffixSilenceDissimilarityTreshold, PrevParamName, + ParamName)) { + LLVM_DEBUG(llvm::dbgs() << "Parameter '" << ParamName + << "' follows a pattern with previous parameter '" + << PrevParamName << "'. Break!\n"); + break; + } + // Now try to go forward and build the range of [Start, ..., I, I + 1, ...] // parameters that can be messed up at a call site. MixableParameterRange::MixVector MixesOfIth; @@ -1675,6 +1694,70 @@ isSimilarlyUsedParameter(const SimilarlyUsedParameterPairSuppressor &Suppressor, return Suppressor(Param1, Param2); } +static void padStringAtEnd(SmallVectorImpl &Str, std::size_t ToLen) { + while (Str.size() < ToLen) + Str.emplace_back('\0'); +} + +static void padStringAtBegin(SmallVectorImpl &Str, std::size_t ToLen) { + while (Str.size() < ToLen) + Str.insert(Str.begin(), '\0'); +} + +static bool isCommonPrefixWithoutSomeCharacters(std::size_t N, StringRef S1, + StringRef S2) { + assert(S1.size() >= N && S2.size() >= N); + StringRef S1Prefix = S1.take_front(S1.size() - N), + S2Prefix = S2.take_front(S2.size() - N); + return S1Prefix == S2Prefix && !S1Prefix.empty(); +} + +static bool isCommonSuffixWithoutSomeCharacters(std::size_t N, StringRef S1, + StringRef S2) { + assert(S1.size() >= N && S2.size() >= N); + StringRef S1Suffix = S1.take_back(S1.size() - N), + S2Suffix = S2.take_back(S2.size() - N); + return S1Suffix == S2Suffix && !S1Suffix.empty(); +} + +/// Returns whether the two strings are prefixes or suffixes of each other with +/// at most Threshold characters differing on the non-common end. +static bool prefixSuffixCoverUnderThreshold(std::size_t Threshold, + StringRef Str1, StringRef Str2) { + if (Threshold == 0) + return false; + + // Pad the two strings to the longer length. + std::size_t BiggerLength = std::max(Str1.size(), Str2.size()); + + if (BiggerLength <= Threshold) + // If the length of the strings is still smaller than the threshold, they + // would be covered by an empty prefix/suffix with the rest differing. + // (E.g. "A" and "X" with Threshold = 1 would mean we think they are + // similar and do not warn about them, which is a too eager assumption.) + return false; + + SmallString<32> S1PadE{Str1}, S2PadE{Str2}; + padStringAtEnd(S1PadE, BiggerLength); + padStringAtEnd(S2PadE, BiggerLength); + + if (isCommonPrefixWithoutSomeCharacters( + Threshold, StringRef{S1PadE.begin(), BiggerLength}, + StringRef{S2PadE.begin(), BiggerLength})) + return true; + + SmallString<32> S1PadB{Str1}, S2PadB{Str2}; + padStringAtBegin(S1PadB, BiggerLength); + padStringAtBegin(S2PadB, BiggerLength); + + if (isCommonSuffixWithoutSomeCharacters( + Threshold, StringRef{S1PadB.begin(), BiggerLength}, + StringRef{S2PadB.begin(), BiggerLength})) + return true; + + return false; +} + } // namespace filter /// Matches functions that have at least the specified amount of parameters. @@ -1891,7 +1974,10 @@ EasilySwappableParametersCheck::EasilySwappableParametersCheck( DefaultModelImplicitConversions)), SuppressParametersUsedTogether( Options.get("SuppressParametersUsedTogether", - DefaultSuppressParametersUsedTogether)) {} + DefaultSuppressParametersUsedTogether)), + NamePrefixSuffixSilenceDissimilarityTreshold( + Options.get("NamePrefixSuffixSilenceDissimilarityTreshold", + DefaultNamePrefixSuffixSilenceDissimilarityTreshold)) {} void EasilySwappableParametersCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { @@ -1904,6 +1990,8 @@ void EasilySwappableParametersCheck::storeOptions( Options.store(Opts, "ModelImplicitConversions", ModelImplicitConversions); Options.store(Opts, "SuppressParametersUsedTogether", SuppressParametersUsedTogether); + Options.store(Opts, "NamePrefixSuffixSilenceDissimilarityTreshold", + NamePrefixSuffixSilenceDissimilarityTreshold); } void EasilySwappableParametersCheck::registerMatchers(MatchFinder *Finder) { diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h index 22e36ffa91cc1..a1fade5277f00 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h @@ -51,6 +51,12 @@ class EasilySwappableParametersCheck : public ClangTidyCheck { /// If enabled, diagnostics for parameters that are used together in a /// similar way are not emitted. const bool SuppressParametersUsedTogether; + + /// The number of characters two parameter names might be dissimilar at + /// either end for the report about the parameters to be silenced. + /// E.g. the names "LHS" and "RHS" are 1-dissimilar suffixes of each other, + /// while "Text1" and "Text2" are 1-dissimilar prefixes of each other. + const std::size_t NamePrefixSuffixSilenceDissimilarityTreshold; }; } // namespace bugprone diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst index 5a61de8f2d770..5ea635766e5bc 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-easily-swappable-parameters.rst @@ -169,6 +169,27 @@ noisiness. * Separate ``return`` statements return either of the parameters on different code paths. +.. option:: NamePrefixSuffixSilenceDissimilarityTreshold + + The number of characters two parameter names might be different on *either* + the head or the tail end with the rest of the name the same so that the + warning about the two parameters are silenced. + Defaults to `1`. + Might be any positive integer. + If `0`, the filtering heuristic based on the parameters' names is turned + off. + + This option can be used to silence warnings about parameters where the + naming scheme indicates that the order of those parameters do not matter. + + For example, the parameters ``LHS`` and ``RHS`` are 1-dissimilar suffixes + of each other: ``L`` and ``R`` is the different character, while ``HS`` + is the common suffix. + Similarly, parameters ``text1, text2, text3`` are 1-dissimilar prefixes + of each other, with the numbers at the end being the dissimilar part. + If the value is at least `1`, such cases will not be reported. + + Limitations ----------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp index 14d6c05a20ef7..d10e367007d27 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-ignore.cpp @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "T"}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- void ignoredUnnamed(int I, int, int) {} // NO-WARN: No >= 2 length of non-unnamed. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp index 93290e51ef689..a3b33822268d9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicit-qualifiers.cpp @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 1}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- void numericAndQualifierConversion(int I, const double CD) { numericAndQualifierConversion(CD, I); } diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c index 92a70f44e912c..b7d92ce43f64b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.c @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- void implicitDoesntBreakOtherStuff(int A, int B) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp index 4481b516b65b6..c1a72d687b135 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-implicits.cpp @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 1}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- void implicitDoesntBreakOtherStuff(int A, int B) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp index ceb870cda3aac..e2e836b67cc33 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len2.cpp @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- namespace std { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp index bae1cf883c3c6..ee943e39d0386 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-len3.cpp @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- int add(int Left, int Right) { return Left + Right; } // NO-WARN: Only 2 parameters. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-prefixsuffixname.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-prefixsuffixname.cpp new file mode 100644 index 0000000000000..a60b45f4cf5ce --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-prefixsuffixname.cpp @@ -0,0 +1,56 @@ +// RUN: %check_clang_tidy %s bugprone-easily-swappable-parameters %t \ +// RUN: -config='{CheckOptions: [ \ +// RUN: {key: bugprone-easily-swappable-parameters.MinimumLength, value: 2}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterNames, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ +// RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 1} \ +// RUN: ]}' -- + +namespace std { +struct string {}; +} // namespace std +class Matrix {}; + +void test1(int Foo, int Bar) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: 2 adjacent parameters of 'test1' of similar type ('int') are easily swapped by mistake [bugprone-easily-swappable-parameters] +// CHECK-MESSAGES: :[[@LINE-2]]:16: note: the first parameter in the range is 'Foo' +// CHECK-MESSAGES: :[[@LINE-3]]:25: note: the last parameter in the range is 'Bar' + +void test2(int A, int B) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: 2 adjacent parameters of 'test2' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:16: note: the first parameter in the range is 'A' +// CHECK-MESSAGES: :[[@LINE-3]]:23: note: the last parameter in the range is 'B' + +void test3(int Val1, int Val2) {} // NO-WARN. + +void test4(int ValA, int Valb) {} // NO-WARN. + +void test5(int Val1, int ValZ) {} // NO-WARN. + +void test6(int PValue, int QValue) {} // NO-WARN. + +void test7(std::string Astr, std::string Bstr) {} // NO-WARN. + +void test8(int Aladdin, int Alabaster) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: 2 adjacent parameters of 'test8' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:16: note: the first parameter in the range is 'Aladdin' +// CHECK-MESSAGES: :[[@LINE-3]]:29: note: the last parameter in the range is 'Alabaster' + +void test9(Matrix Qmat, Matrix Rmat, Matrix Tmat) {} // NO-WARN. + +void test10(int Something, int Other, int Foo, int Bar1, int Bar2, int Baz, int Qux) {} +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: 4 adjacent parameters of 'test10' of similar type ('int') are +// CHECK-MESSAGES: :[[@LINE-2]]:17: note: the first parameter in the range is 'Something' +// CHECK-MESSAGES: :[[@LINE-3]]:52: note: the last parameter in the range is 'Bar1' +// +// CHECK-MESSAGES: :[[@LINE-5]]:58: warning: 3 adjacent parameters of 'test10' of similar type ('int') are +// CHECK-MESSAGES: :[[@LINE-6]]:62: note: the first parameter in the range is 'Bar2' +// CHECK-MESSAGES: :[[@LINE-7]]:81: note: the last parameter in the range is 'Qux' + +void test11(int Foobar, int Foo) {} +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: 2 adjacent parameters of 'test11' of similar type ('int') +// CHECK-MESSAGES: :[[@LINE-2]]:17: note: the first parameter in the range is 'Foobar' +// CHECK-MESSAGES: :[[@LINE-3]]:29: note: the last parameter in the range is 'Foo' diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp index 8b547850ae639..9ba81a484163e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-qualifiermixing.cpp @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 1}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- typedef int MyInt1; diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.c index f1d4e15ff6c8c..a6f2a736fcfb8 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.c @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 1} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 1}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- -x c int myprint(); diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.cpp index e6fae124aad3f..c202d20423004 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters-relatedness.cpp @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: ""}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 1} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 1}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- namespace std { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c index a0b3b52188906..9a945dab08cda 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-easily-swappable-parameters.c @@ -5,7 +5,8 @@ // RUN: {key: bugprone-easily-swappable-parameters.IgnoredParameterTypeSuffixes, value: "bool;MyBool;struct U;MAKE_LOGICAL_TYPE(int)"}, \ // RUN: {key: bugprone-easily-swappable-parameters.QualifiersMix, value: 0}, \ // RUN: {key: bugprone-easily-swappable-parameters.ModelImplicitConversions, value: 0}, \ -// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0} \ +// RUN: {key: bugprone-easily-swappable-parameters.SuppressParametersUsedTogether, value: 0}, \ +// RUN: {key: bugprone-easily-swappable-parameters.NamePrefixSuffixSilenceDissimilarityTreshold, value: 0} \ // RUN: ]}' -- -x c #define bool _Bool From 9061da274898270ed1e06cff6c22510ea74ab01e Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 28 Jun 2021 08:50:56 +0000 Subject: [PATCH 018/619] [gn build] Port 499e39c5983d --- .../gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn index 190a9bf0bc222..259a030ff8ab9 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn @@ -23,6 +23,7 @@ static_library("bugprone") { "CopyConstructorInitCheck.cpp", "DanglingHandleCheck.cpp", "DynamicStaticInitializersCheck.cpp", + "EasilySwappableParametersCheck.cpp", "ExceptionEscapeCheck.cpp", "FoldInitTypeCheck.cpp", "ForwardDeclarationNamespaceCheck.cpp", From f3b55a8a06529bee95ed9a1b55c98ce3839b40ea Mon Sep 17 00:00:00 2001 From: Whisperity Date: Mon, 28 Jun 2021 11:17:51 +0200 Subject: [PATCH 019/619] [clang-tidy][NFC] Fix buildbot failures in 'bugprone-easily-swappable-parameters' --- .../EasilySwappableParametersCheck.cpp | 90 +++++++++++-------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index 6c84cb3e55537..72f5b25e9f66a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -98,24 +98,24 @@ namespace model { /// The language features involved in allowing the mix between two parameters. enum class MixFlags : unsigned char { - Invalid = 0, //< Sentinel bit pattern. DO NOT USE! + Invalid = 0, ///< Sentinel bit pattern. DO NOT USE! - //< Certain constructs (such as pointers to noexcept/non-noexcept functions) - // have the same CanonicalType, which would result in false positives. - // During the recursive modelling call, this flag is set if a later diagnosed - // canonical type equivalence should be thrown away. + /// Certain constructs (such as pointers to noexcept/non-noexcept functions) + /// have the same CanonicalType, which would result in false positives. + /// During the recursive modelling call, this flag is set if a later diagnosed + /// canonical type equivalence should be thrown away. WorkaroundDisableCanonicalEquivalence = 1, - None = 2, //< Mix between the two parameters is not possible. - Trivial = 4, //< The two mix trivially, and are the exact same type. - Canonical = 8, //< The two mix because the types refer to the same - // CanonicalType, but we do not elaborate as to how. - TypeAlias = 16, //< The path from one type to the other involves - // desugaring type aliases. - ReferenceBind = 32, //< The mix involves the binding power of "const &". - Qualifiers = 64, //< The mix involves change in the qualifiers. - ImplicitConversion = 128, //< The mixing of the parameters is possible - // through implicit conversions between the types. + None = 2, ///< Mix between the two parameters is not possible. + Trivial = 4, ///< The two mix trivially, and are the exact same type. + Canonical = 8, ///< The two mix because the types refer to the same + /// CanonicalType, but we do not elaborate as to how. + TypeAlias = 16, ///< The path from one type to the other involves + /// desugaring type aliases. + ReferenceBind = 32, ///< The mix involves the binding power of "const &". + Qualifiers = 64, ///< The mix involves change in the qualifiers. + ImplicitConversion = 128, ///< The mixing of the parameters is possible + /// through implicit conversions between the types. LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue =*/ImplicitConversion) }; @@ -546,16 +546,16 @@ struct MixableParameterRange { /// Helper enum for the recursive calls in the modelling that toggle what kinds /// of implicit conversions are to be modelled. -enum ImplicitConversionModellingMode : unsigned char { - //< No implicit conversions are modelled. - ICMM_None, +enum class ImplicitConversionModellingMode : unsigned char { + /// No implicit conversions are modelled. + None, - //< The full implicit conversion sequence is modelled. - ICMM_All, + /// The full implicit conversion sequence is modelled. + All, - //< Only model a unidirectional implicit conversion and within it only one - // standard conversion sequence. - ICMM_OneWaySingleStandardOnly + /// Only model a unidirectional implicit conversion and within it only one + /// standard conversion sequence. + OneWaySingleStandardOnly }; static MixData @@ -684,9 +684,9 @@ calculateMixability(const TheCheck &Check, QualType LType, QualType RType, // some other match. However, this must not consider implicit conversions. LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. LHS and RHS are Ptrs.\n"); - MixData MixOfPointee = - calculateMixability(Check, LType->getPointeeType(), - RType->getPointeeType(), Ctx, ICMM_None); + MixData MixOfPointee = calculateMixability( + Check, LType->getPointeeType(), RType->getPointeeType(), Ctx, + ImplicitConversionModellingMode::None); if (hasFlag(MixOfPointee.Flags, MixFlags::WorkaroundDisableCanonicalEquivalence)) RecursiveReturnDiscardingCanonicalType = true; @@ -699,7 +699,7 @@ calculateMixability(const TheCheck &Check, QualType LType, QualType RType, } } - if (ImplicitMode > ICMM_None) { + if (ImplicitMode > ImplicitConversionModellingMode::None) { LLVM_DEBUG(llvm::dbgs() << "--- calculateMixability. Start implicit...\n"); MixData MixLTR = approximateImplicitConversion(Check, LType, RType, Ctx, ImplicitMode); @@ -707,8 +707,9 @@ calculateMixability(const TheCheck &Check, QualType LType, QualType RType, if (hasFlag(MixLTR.Flags, MixFlags::ImplicitConversion)) llvm::dbgs() << "--- calculateMixability. Implicit Left -> Right found.\n";); - if (ImplicitMode == ICMM_OneWaySingleStandardOnly && MixLTR.Conversion && - !MixLTR.Conversion.AfterFirstStandard.isNull() && + if (ImplicitMode == + ImplicitConversionModellingMode::OneWaySingleStandardOnly && + MixLTR.Conversion && !MixLTR.Conversion.AfterFirstStandard.isNull() && MixLTR.Conversion.UDConvKind == ConversionSequence::UDCK_None && MixLTR.Conversion.AfterSecondStandard.isNull()) { // The invoker of the method requested only modelling a single standard @@ -952,7 +953,8 @@ approximateStandardConversionSequence(const TheCheck &Check, QualType From, // the QualifiersMix check config. LLVM_DEBUG(llvm::dbgs() << "--- approximateStdConv. Trying qualifier adjustment...\n"); - MixData QualConv = calculateMixability(Check, WorkType, To, Ctx, ICMM_None); + MixData QualConv = calculateMixability(Check, WorkType, To, Ctx, + ImplicitConversionModellingMode::None); QualConv.sanitize(); if (hasFlag(QualConv.Flags, MixFlags::Qualifiers)) { LLVM_DEBUG(llvm::dbgs() @@ -1001,9 +1003,9 @@ class UserDefinedConversionSelector { QualType ToType) { // Try to go from the FromType to the ToType wiht only a single implicit // conversion, to see if the conversion function is applicable. - MixData Mix = - calculateMixability(Check, FromType, ToType, ConvFun->getASTContext(), - ICMM_OneWaySingleStandardOnly); + MixData Mix = calculateMixability( + Check, FromType, ToType, ConvFun->getASTContext(), + ImplicitConversionModellingMode::OneWaySingleStandardOnly); Mix.sanitize(); if (!Mix.indicatesMixability()) return; @@ -1190,7 +1192,17 @@ approximateImplicitConversion(const TheCheck &Check, QualType LType, LLVM_DEBUG(llvm::dbgs() << ">>> approximateImplicitConversion for LType:\n"; LType.dump(llvm::dbgs(), Ctx); llvm::dbgs() << "\nand RType:\n"; RType.dump(llvm::dbgs(), Ctx); - llvm::dbgs() << "\nimplicit mode: " << ImplicitMode << '\n';); + llvm::dbgs() << "\nimplicit mode: "; switch (ImplicitMode) { + case ImplicitConversionModellingMode::None: + llvm::dbgs() << "None"; + break; + case ImplicitConversionModellingMode::All: + llvm::dbgs() << "All"; + break; + case ImplicitConversionModellingMode::OneWaySingleStandardOnly: + llvm::dbgs() << "OneWay, Single, STD Only"; + break; + } llvm::dbgs() << '\n';); if (LType == RType) return {MixFlags::Trivial, LType}; @@ -1210,7 +1222,7 @@ approximateImplicitConversion(const TheCheck &Check, QualType LType, WorkType = ImplicitSeq.AfterFirstStandard; } - if (ImplicitMode == ICMM_OneWaySingleStandardOnly) + if (ImplicitMode == ImplicitConversionModellingMode::OneWaySingleStandardOnly) // If the caller only requested modelling of a standard conversion, bail. return {ImplicitSeq.AfterFirstStandard.isNull() ? MixFlags::None @@ -1262,7 +1274,8 @@ approximateImplicitConversion(const TheCheck &Check, QualType LType, llvm::dbgs() << "--- approximateImplicitConversion. Try to find post-conversion.\n"); MixData SecondStdConv = approximateImplicitConversion( - Check, WorkType, RType, Ctx, ICMM_OneWaySingleStandardOnly); + Check, WorkType, RType, Ctx, + ImplicitConversionModellingMode::OneWaySingleStandardOnly); if (SecondStdConv.indicatesMixability()) { LLVM_DEBUG(llvm::dbgs() << "--- approximateImplicitConversion. Standard " "Post-Conversion found!\n"); @@ -1341,8 +1354,9 @@ static MixableParameterRange modelMixingRange( Mix M{Jth, Ith, calculateMixability(Check, Jth->getType(), Ith->getType(), Ctx, - Check.ModelImplicitConversions ? ICMM_All - : ICMM_None)}; + Check.ModelImplicitConversions + ? ImplicitConversionModellingMode::All + : ImplicitConversionModellingMode::None)}; LLVM_DEBUG(llvm::dbgs() << "Mix flags (raw) : " << formatMixFlags(M.flags()) << '\n'); M.sanitize(); From 8f9db0aeebecc48bdcc1ba7e4443c7a4467159b6 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 23 Jun 2021 15:36:14 +0100 Subject: [PATCH 020/619] [hwasan] Show sp in register dump. Reviewed By: hctim, eugenis Differential Revision: https://reviews.llvm.org/D104787 --- compiler-rt/lib/hwasan/hwasan_report.cpp | 6 ++++-- .../hwasan/TestCases/register-dump-read.c | 20 ++++++++++--------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index b39dade11347a..715b4e05992a6 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -672,8 +672,10 @@ void ReportRegisters(uptr *frame, uptr pc) { frame[20], frame[21], frame[22], frame[23]); Printf(" x24 %016llx x25 %016llx x26 %016llx x27 %016llx\n", frame[24], frame[25], frame[26], frame[27]); - Printf(" x28 %016llx x29 %016llx x30 %016llx\n", - frame[28], frame[29], frame[30]); + // hwasan_check* reduces the stack pointer by 256, then __hwasan_tag_mismatch + // passes it to this function. + Printf(" x28 %016llx x29 %016llx x30 %016llx sp %016llx\n", frame[28], + frame[29], frame[30], reinterpret_cast(frame) + 256); } } // namespace __hwasan diff --git a/compiler-rt/test/hwasan/TestCases/register-dump-read.c b/compiler-rt/test/hwasan/TestCases/register-dump-read.c index 002c5dd5e1f9e..17a340a94b425 100644 --- a/compiler-rt/test/hwasan/TestCases/register-dump-read.c +++ b/compiler-rt/test/hwasan/TestCases/register-dump-read.c @@ -1,11 +1,11 @@ -// RUN: %clang_hwasan -ffixed-x10 -ffixed-x23 -ffixed-x27 -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK -// RUN: %clang_hwasan -ffixed-x10 -ffixed-x23 -ffixed-x27 -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK -// RUN: %clang_hwasan -ffixed-x10 -ffixed-x23 -ffixed-x27 -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK -// RUN: %clang_hwasan -ffixed-x10 -ffixed-x23 -ffixed-x27 -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_hwasan -ffixed-x10 -ffixed-x11 -ffixed-x23 -ffixed-x27 -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_hwasan -ffixed-x10 -ffixed-x11 -ffixed-x23 -ffixed-x27 -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_hwasan -ffixed-x10 -ffixed-x11 -ffixed-x23 -ffixed-x27 -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_hwasan -ffixed-x10 -ffixed-x11 -ffixed-x23 -ffixed-x27 -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK // REQUIRES: aarch64-target-arch -// RUN: %clang_hwasan -ffixed-x10 -ffixed-x23 -ffixed-x27 -O2 %s -o %t && not %env_hwasan_opts=fast_unwind_on_fatal=true %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK -// RUN: %clang_hwasan -ffixed-x10 -ffixed-x23 -ffixed-x27 -O2 %s -o %t && not %env_hwasan_opts=fast_unwind_on_fatal=false %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_hwasan -ffixed-x10 -ffixed-x11 -ffixed-x23 -ffixed-x27 -O2 %s -o %t && not %env_hwasan_opts=fast_unwind_on_fatal=true %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_hwasan -ffixed-x10 -ffixed-x11 -ffixed-x23 -ffixed-x27 -O2 %s -o %t && not %env_hwasan_opts=fast_unwind_on_fatal=false %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK #include #include @@ -16,7 +16,8 @@ int main() { char * volatile x = (char*) malloc(10); asm volatile("mov x10, #0x2222\n" "mov x23, #0x3333\n" - "mov x27, #0x4444\n"); + "mov x27, #0x4444\n" + "mov x11, sp\n"); return x[16]; // CHECK: ERROR: HWAddressSanitizer: @@ -32,12 +33,13 @@ int main() { // CHECK-NEXT: x4{{[ ]+[0-9a-f]{16}[ ]}}x5{{[ ]+[0-9a-f]{16}[ ]}}x6{{[ ]+[0-9a-f]{16}[ ]}}x7{{[ ]+[0-9a-f]{16}$}} // CHECK-NEXT: x8{{[ ]+[0-9a-f]{16}[ ]}}x9{{[ ]+[0-9a-f]{16}[ ]}} // CHECK-SAME: x10 0000000000002222 - // CHECK-SAME: x11{{[ ]+[0-9a-f]{16}$}} + // CHECK-SAME: x11[[STACK:[ ]+[0-9a-f]{16}$]] // CHECK-NEXT: x12{{[ ]+[0-9a-f]{16}[ ]}}x13{{[ ]+[0-9a-f]{16}[ ]}}x14{{[ ]+[0-9a-f]{16}[ ]}}x15{{[ ]+[0-9a-f]{16}$}} // CHECK-NEXT: x16{{[ ]+[0-9a-f]{16}[ ]}}x17{{[ ]+[0-9a-f]{16}[ ]}}x18{{[ ]+[0-9a-f]{16}[ ]}}x19{{[ ]+[0-9a-f]{16}$}} // CHECK-NEXT: x20{{[ ]+[0-9a-f]{16}[ ]}}x21{{[ ]+[0-9a-f]{16}[ ]}}x22{{[ ]+[0-9a-f]{16}[ ]}} // CHECK-SAME: x23 0000000000003333{{$}} // CHECK-NEXT: x24{{[ ]+[0-9a-f]{16}[ ]}}x25{{[ ]+[0-9a-f]{16}[ ]}}x26{{[ ]+[0-9a-f]{16}[ ]}} // CHECK-SAME: x27 0000000000004444 - // CHECK-NEXT: x28{{[ ]+[0-9a-f]{16}[ ]}}x29{{[ ]+[0-9a-f]{16}[ ]}}x30{{[ ]+[0-9a-f]{16}$}} + // CHECK-NEXT: x28{{[ ]+[0-9a-f]{16}[ ]}}x29{{[ ]+[0-9a-f]{16}[ ]}}x30{{[ ]+[0-9a-f]{16}[ ]}} + // CHECK-SAME: sp{{.*}}[[STACK]] } From 6942076096e6dcfb0893a351a9a586490beec572 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 28 Jun 2021 09:36:04 +0000 Subject: [PATCH 021/619] [lld][MachO] Temporarily require 64 bit build for dead-strip.s This test has always failed on 32 bit armv8 bots: https://lab.llvm.org/buildbot/#/builders/178/builds/42 Due to the output order of some symbols changing. I don't think this is an Arm specific issue so disabling on 32 bit while it's investigated. --- lld/test/MachO/dead-strip.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/test/MachO/dead-strip.s b/lld/test/MachO/dead-strip.s index e64c95e837170..5e18be3ca71a9 100644 --- a/lld/test/MachO/dead-strip.s +++ b/lld/test/MachO/dead-strip.s @@ -1,4 +1,4 @@ -# REQUIRES: x86 +# REQUIRES: x86, llvm-64-bits # RUN: rm -rf %t; split-file %s %t From 80aa7e147e2a3a2a0435663ad9171b9ccdcc0178 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 28 Jun 2021 11:01:27 +0100 Subject: [PATCH 022/619] [VPlan] Merge predicated-triangle regions, after sinking. Sinking scalar operands into predicated-triangle regions may allow merging regions. This patch adds a VPlan-to-VPlan transform that tries to merge predicate-triangle regions after sinking. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D100260 --- .../Transforms/Vectorize/LoopVectorize.cpp | 1 + .../Transforms/Vectorize/VPlanTransforms.cpp | 135 ++ .../Transforms/Vectorize/VPlanTransforms.h | 2 + .../X86/consecutive-ptr-uniforms.ll | 26 +- .../LoopVectorize/X86/small-size.ll | 392 +++--- .../LoopVectorize/X86/x86-pr39099.ll | 2 +- .../LoopVectorize/X86/x86-predication.ll | 20 +- ...-order-recurrence-sink-replicate-region.ll | 97 +- .../LoopVectorize/if-pred-non-void.ll | 71 +- .../LoopVectorize/reduction-inloop-pred.ll | 1170 +++++++---------- .../LoopVectorize/reduction-predselect.ll | 14 +- .../vplan-sink-scalars-and-merge.ll | 148 +-- 12 files changed, 899 insertions(+), 1179 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e1dc71cd01801..cec0cf2ad6c0f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9298,6 +9298,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( } VPlanTransforms::sinkScalarOperands(*Plan); + VPlanTransforms::mergeReplicateRegions(*Plan); std::string PlanName; raw_string_ostream RSO(PlanName); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4d63538e741f1..e61caaacf2d30 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -148,3 +148,138 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { } return Changed; } + +/// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return +/// the mask. +VPValue *getPredicatedMask(VPRegionBlock *R) { + auto *EntryBB = dyn_cast(R->getEntry()); + if (!EntryBB || EntryBB->size() != 1 || + !isa(EntryBB->begin())) + return nullptr; + + return cast(&*EntryBB->begin())->getOperand(0); +} + +/// If \p R is a triangle region, return the 'then' block of the triangle. +static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { + auto *EntryBB = cast(R->getEntry()); + if (EntryBB->getNumSuccessors() != 2) + return nullptr; + + auto *Succ0 = dyn_cast(EntryBB->getSuccessors()[0]); + auto *Succ1 = dyn_cast(EntryBB->getSuccessors()[1]); + if (!Succ0 || !Succ1) + return nullptr; + + if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1) + return nullptr; + if (Succ0->getSingleSuccessor() == Succ1) + return Succ0; + if (Succ1->getSingleSuccessor() == Succ0) + return Succ1; + return nullptr; +} + +bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { + SetVector DeletedRegions; + bool Changed = false; + + // Collect region blocks to process up-front, to avoid iterator invalidation + // issues while merging regions. + SmallVector CandidateRegions( + VPBlockUtils::blocksOnly(depth_first( + VPBlockRecursiveTraversalWrapper(Plan.getEntry())))); + + // Check if Base is a predicated triangle, followed by an empty block, + // followed by another predicate triangle. If that's the case, move the + // recipes from the first to the second triangle. + for (VPRegionBlock *Region1 : CandidateRegions) { + if (DeletedRegions.contains(Region1)) + continue; + auto *MiddleBasicBlock = + dyn_cast_or_null(Region1->getSingleSuccessor()); + if (!MiddleBasicBlock || !MiddleBasicBlock->empty()) + continue; + + auto *Region2 = + dyn_cast_or_null(MiddleBasicBlock->getSingleSuccessor()); + if (!Region2) + continue; + + VPValue *Mask1 = getPredicatedMask(Region1); + VPValue *Mask2 = getPredicatedMask(Region2); + if (!Mask1 || Mask1 != Mask2) + continue; + VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); + VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); + if (!Then1 || !Then2) + continue; + + assert(Mask1 && Mask2 && "both region must have conditions"); + + // Note: No fusion-preventing memory dependencies are expected in either + // region. Such dependencies should be rejected during earlier dependence + // checks, which guarantee accesses can be re-ordered for vectorization. + // + // If a recipe is used by a first-order recurrence phi, we cannot move it at + // the moment: a recipe R feeding a first order recurrence phi must allow + // for a *vector* shuffle to be inserted immediately after it, and therefore + // if R is *scalarized and predicated* it must appear last in its basic + // block. In addition, other recipes may need to "sink after" R, so best if + // R not be moved at all. + auto IsImmovableRecipe = [](VPRecipeBase &R) { + assert(R.getNumDefinedValues() <= 1 && + "no multi-defs are expected in predicated blocks"); + for (VPUser *U : R.getVPValue()->users()) { + auto *UI = dyn_cast(U); + if (!UI) + continue; + auto *PhiR = dyn_cast(UI); + if (PhiR && !PhiR->getRecurrenceDescriptor()) + return true; + } + return false; + }; + if (any_of(*Then1, IsImmovableRecipe)) + continue; + + // Move recipes to the successor region. + for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) + ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); + + auto *Merge1 = cast(Then1->getSingleSuccessor()); + auto *Merge2 = cast(Then2->getSingleSuccessor()); + + // Move VPPredInstPHIRecipes from the merge block to the successor region's + // merge block. Update all users inside the successor region to use the + // original values. + for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { + VPValue *PredInst1 = + cast(&Phi1ToMove)->getOperand(0); + for (VPUser *U : Phi1ToMove.getVPValue()->users()) { + auto *UI = dyn_cast(U); + if (!UI || UI->getParent() != Then2) + continue; + for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { + if (Phi1ToMove.getVPValue() != U->getOperand(I)) + continue; + U->setOperand(I, PredInst1); + } + } + + Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); + } + + // Finally, remove the first region. + for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) { + VPBlockUtils::disconnectBlocks(Pred, Region1); + VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock); + } + VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock); + DeletedRegions.insert(Region1); + } + + for (VPRegionBlock *ToDelete : DeletedRegions) + delete ToDelete; + return Changed; +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index f201805101592..c740f2c022da2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -30,6 +30,8 @@ struct VPlanTransforms { SmallPtrSetImpl &DeadInstructions, ScalarEvolution &SE); static bool sinkScalarOperands(VPlan &Plan); + + static bool mergeReplicateRegions(VPlan &Plan); }; } // namespace llvm diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index d46a811322a64..1a030ab6d72ef 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -89,38 +89,28 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE: vector.body: ; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE4:%.*]] ] ; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE4]] ] -; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[VEC_IND]], ; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; FORCE: pred.store.if: -; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1 -; FORCE-NEXT: br label [[PRED_STORE_CONTINUE]] -; FORCE: pred.store.continue: -; FORCE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; FORCE-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] -; FORCE: pred.store.if1: -; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1 -; FORCE-NEXT: br label [[PRED_STORE_CONTINUE2]] -; FORCE: pred.store.continue2: -; FORCE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; FORCE-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; FORCE-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; FORCE: pred.load.if: +; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; FORCE-NEXT: store i32 [[TMP0]], i32* @b, align 1 ; FORCE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP0]] ; FORCE-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 1 ; FORCE-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 ; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE]] ; FORCE: pred.load.continue: -; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] +; FORCE-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] ; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 ; FORCE-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]] -; FORCE: pred.load.if3: +; FORCE: pred.load.if1: +; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; FORCE-NEXT: store i32 [[TMP1]], i32* @b, align 1 ; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @a, i32 0, i32 [[TMP1]] ; FORCE-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 1 ; FORCE-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i32 1 ; FORCE-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; FORCE: pred.load.continue4: +; FORCE: pred.load.continue2: ; FORCE-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF3]] ] ; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 595668a90ff92..7558d849ae6bb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -40,13 +40,13 @@ define void @example1() optsize { ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[TMP10:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP9:%.*]] ; CHECK: 9: -; CHECK-NEXT: br i1 undef, label [[TMP10]], label [[TMP9]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 undef, label [[TMP10]], label [[TMP9]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: 10: ; CHECK-NEXT: ret void ; @@ -123,7 +123,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_PREHEADER_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -146,122 +146,74 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT20]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY9:%.*]] ; CHECK: vector.body9: -; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE51:%.*]] ] +; CHECK-NEXT: [[INDEX14:%.*]] = phi i64 [ 0, [[VECTOR_PH10]] ], [ [[INDEX_NEXT15:%.*]], [[PRED_STORE_CONTINUE37:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX14]] -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX14]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT28]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT29]], -; CHECK-NEXT: [[TMP23:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] -; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ poison, [[VECTOR_BODY9]] ], [ [[TMP26]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1 -; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]] -; CHECK: pred.load.if30: -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP20]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE31]] -; CHECK: pred.load.continue31: -; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP30]], [[PRED_LOAD_IF30]] ] -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2 -; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33:%.*]] -; CHECK: pred.load.if32: -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP21]] -; CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE33]] -; CHECK: pred.load.continue33: -; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE31]] ], [ [[TMP34]], [[PRED_LOAD_IF32]] ] -; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3 -; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_LOAD_IF34:%.*]], label [[PRED_LOAD_CONTINUE35:%.*]] -; CHECK: pred.load.if34: -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP22]] -; CHECK-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE35]] -; CHECK: pred.load.continue35: -; CHECK-NEXT: [[TMP39:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE33]] ], [ [[TMP38]], [[PRED_LOAD_IF34]] ] -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP40]], label [[PRED_LOAD_IF36:%.*]], label [[PRED_LOAD_CONTINUE37:%.*]] -; CHECK: pred.load.if36: -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE37]] -; CHECK: pred.load.continue37: -; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE35]] ], [ [[TMP42]], [[PRED_LOAD_IF36]] ] -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1 -; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF38:%.*]], label [[PRED_LOAD_CONTINUE39:%.*]] -; CHECK: pred.load.if38: -; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP20]] -; CHECK-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE39]] -; CHECK: pred.load.continue39: -; CHECK-NEXT: [[TMP47:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE37]] ], [ [[TMP46]], [[PRED_LOAD_IF38]] ] -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2 -; CHECK-NEXT: br i1 [[TMP48]], label [[PRED_LOAD_IF40:%.*]], label [[PRED_LOAD_CONTINUE41:%.*]] -; CHECK: pred.load.if40: -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP21]] -; CHECK-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE41]] -; CHECK: pred.load.continue41: -; CHECK-NEXT: [[TMP51:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE39]] ], [ [[TMP50]], [[PRED_LOAD_IF40]] ] -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3 -; CHECK-NEXT: br i1 [[TMP52]], label [[PRED_LOAD_IF42:%.*]], label [[PRED_LOAD_CONTINUE43:%.*]] -; CHECK: pred.load.if42: -; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP22]] -; CHECK-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE43]] -; CHECK: pred.load.continue43: -; CHECK-NEXT: [[TMP55:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE41]] ], [ [[TMP54]], [[PRED_LOAD_IF42]] ] -; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP23]], i32 0 -; CHECK-NEXT: br i1 [[TMP56]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]] -; CHECK: pred.store.if44: -; CHECK-NEXT: [[TMP57:%.*]] = and i32 [[TMP43]], [[TMP27]] -; CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: store i32 [[TMP57]], i32* [[TMP58]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE45]] -; CHECK: pred.store.continue45: -; CHECK-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP23]], i32 1 -; CHECK-NEXT: br i1 [[TMP59]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]] -; CHECK: pred.store.if46: -; CHECK-NEXT: [[TMP60:%.*]] = and i32 [[TMP47]], [[TMP31]] -; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP20]] -; CHECK-NEXT: store i32 [[TMP60]], i32* [[TMP61]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE47]] -; CHECK: pred.store.continue47: -; CHECK-NEXT: [[TMP62:%.*]] = extractelement <4 x i1> [[TMP23]], i32 2 -; CHECK-NEXT: br i1 [[TMP62]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]] -; CHECK: pred.store.if48: -; CHECK-NEXT: [[TMP63:%.*]] = and i32 [[TMP51]], [[TMP35]] -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP21]] -; CHECK-NEXT: store i32 [[TMP63]], i32* [[TMP64]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE49]] -; CHECK: pred.store.continue49: -; CHECK-NEXT: [[TMP65:%.*]] = extractelement <4 x i1> [[TMP23]], i32 3 -; CHECK-NEXT: br i1 [[TMP65]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51]] -; CHECK: pred.store.if50: -; CHECK-NEXT: [[TMP66:%.*]] = and i32 [[TMP55]], [[TMP39]] -; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP22]] -; CHECK-NEXT: store i32 [[TMP66]], i32* [[TMP67]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE51]] -; CHECK: pred.store.continue51: +; CHECK-NEXT: [[TMP20:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT21]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]] +; CHECK: pred.store.if30: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], [[TMP23]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[OFFSET_IDX]] +; CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP27]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE31]] +; CHECK: pred.store.continue31: +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1 +; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]] +; CHECK: pred.store.if32: +; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP29]] +; CHECK-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE33]] +; CHECK: pred.store.continue33: +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2 +; CHECK-NEXT: br i1 [[TMP36]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]] +; CHECK: pred.store.if34: +; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP38]], align 4 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP37]] +; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[TMP39]] +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP37]] +; CHECK-NEXT: store i32 [[TMP42]], i32* [[TMP43]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE35]] +; CHECK: pred.store.continue35: +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3 +; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37]] +; CHECK: pred.store.if36: +; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP45]] +; CHECK-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 +; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], [[TMP47]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[TMP45]] +; CHECK-NEXT: store i32 [[TMP50]], i32* [[TMP51]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE37]] +; CHECK: pred.store.continue37: ; CHECK-NEXT: [[INDEX_NEXT15]] = add i64 [[INDEX14]], 4 -; CHECK-NEXT: [[TMP68:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]] -; CHECK-NEXT: br i1 [[TMP68]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], [[LOOP5:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC13]] +; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY9]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block7: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH8]] ; CHECK: scalar.ph8: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph5: -; CHECK-NEXT: br i1 undef, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]], [[LOOP6:!llvm.loop !.*]] +; CHECK-NEXT: br i1 undef, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: .lr.ph: -; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], [[LOOP7:!llvm.loop !.*]] +; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: @@ -324,86 +276,62 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE27:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE21:%.*]] ] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT14]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT15]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; CHECK: pred.load.if: +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[NEXT_GEP10]], align 16 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] -; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]] -; CHECK: pred.load.if16: +; CHECK-NEXT: store i32 [[TMP6]], i32* [[NEXT_GEP]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] +; CHECK: pred.store.if16: +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[NEXT_GEP11]], align 16 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE17]] -; CHECK: pred.load.continue17: -; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF16]] ] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 -; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]] -; CHECK: pred.load.if18: +; CHECK-NEXT: store i32 [[TMP10]], i32* [[NEXT_GEP7]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] +; CHECK: pred.store.continue17: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] +; CHECK: pred.store.if18: +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[NEXT_GEP12]], align 16 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE19]] -; CHECK: pred.load.continue19: -; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP14]], [[PRED_LOAD_IF18]] ] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]] -; CHECK: pred.load.if20: +; CHECK-NEXT: store i32 [[TMP14]], i32* [[NEXT_GEP8]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE19]] +; CHECK: pred.store.continue19: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21]] +; CHECK: pred.store.if20: +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i32, i32* [[Q]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[NEXT_GEP13]], align 16 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE21]] -; CHECK: pred.load.continue21: -; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE19]] ], [ [[TMP18]], [[PRED_LOAD_IF20]] ] -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK: pred.store.if: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[TMP7]], i32* [[NEXT_GEP]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]] -; CHECK: pred.store.if22: -; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP22]] -; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP7]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE23]] -; CHECK: pred.store.continue23: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 -; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] -; CHECK: pred.store.if24: -; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP24]] -; CHECK-NEXT: store i32 [[TMP15]], i32* [[NEXT_GEP8]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE25]] -; CHECK: pred.store.continue25: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 -; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27]] -; CHECK: pred.store.if26: -; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[P]], i64 [[TMP26]] -; CHECK-NEXT: store i32 [[TMP19]], i32* [[NEXT_GEP9]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE27]] -; CHECK: pred.store.continue27: +; CHECK-NEXT: store i32 [[TMP18]], i32* [[NEXT_GEP9]], align 16 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE21]] +; CHECK: pred.store.continue21: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: -; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], [[LOOP9:!llvm.loop !.*]] +; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: @@ -487,13 +415,13 @@ define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[TMP7:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP6:%.*]] ; CHECK: 6: -; CHECK-NEXT: br i1 undef, label [[TMP7]], label [[TMP6]], [[LOOP11:!llvm.loop !.*]] +; CHECK-NEXT: br i1 undef, label [[TMP7]], label [[TMP6]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: 7: ; CHECK-NEXT: ret void ; @@ -524,95 +452,71 @@ define void @example23c(i16* noalias nocapture %src, i32* noalias nocapture %dst ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE22:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[INDUCTION]], ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; CHECK: pred.load.if: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[NEXT_GEP]], align 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] -; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP4:%.*]] = phi i16 [ poison, [[VECTOR_BODY]] ], [ [[TMP3]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[NEXT_GEP4]], align 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP8:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP7]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[NEXT_GEP5]], align 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP12:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE12]] ], [ [[TMP11]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] -; CHECK: pred.load.if15: -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[NEXT_GEP6]], align 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE16]] -; CHECK: pred.load.continue16: -; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ poison, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP15]], [[PRED_LOAD_IF15]] ] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP18:%.*]] = zext i16 [[TMP4]] to i32 -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i32 [[TMP18]], 7 -; CHECK-NEXT: store i32 [[TMP19]], i32* [[NEXT_GEP7]], align 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[SRC:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7 +; CHECK-NEXT: store i32 [[TMP5]], i32* [[NEXT_GEP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]] -; CHECK: pred.store.if17: -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] +; CHECK: pred.store.if11: +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[NEXT_GEP4]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7 +; CHECK-NEXT: store i32 [[TMP11]], i32* [[NEXT_GEP8]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] +; CHECK: pred.store.continue12: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] +; CHECK: pred.store.if13: +; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[NEXT_GEP5]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7 +; CHECK-NEXT: store i32 [[TMP17]], i32* [[NEXT_GEP9]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] +; CHECK: pred.store.continue14: +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] +; CHECK: pred.store.if15: +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[SRC]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[NEXT_GEP6]], align 2 +; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32 ; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7 -; CHECK-NEXT: store i32 [[TMP23]], i32* [[NEXT_GEP8]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]] -; CHECK: pred.store.continue18: -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]] -; CHECK: pred.store.if19: -; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP12]] to i32 -; CHECK-NEXT: [[TMP27:%.*]] = shl nuw nsw i32 [[TMP26]], 7 -; CHECK-NEXT: store i32 [[TMP27]], i32* [[NEXT_GEP9]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] -; CHECK: pred.store.continue20: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22]] -; CHECK: pred.store.if21: -; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP29]] -; CHECK-NEXT: [[TMP30:%.*]] = zext i16 [[TMP16]] to i32 -; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 7 -; CHECK-NEXT: store i32 [[TMP31]], i32* [[NEXT_GEP10]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] -; CHECK: pred.store.continue22: +; CHECK-NEXT: store i32 [[TMP23]], i32* [[NEXT_GEP10]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] +; CHECK: pred.store.continue16: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[TMP34:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br i1 true, label [[TMP26:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[TMP33:%.*]] -; CHECK: 33: -; CHECK-NEXT: br i1 undef, label [[TMP34]], label [[TMP33]], [[LOOP13:!llvm.loop !.*]] -; CHECK: 34: +; CHECK-NEXT: br label [[TMP25:%.*]] +; CHECK: 25: +; CHECK-NEXT: br i1 undef, label [[TMP26]], label [[TMP25]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: 26: ; CHECK-NEXT: ret void ; br label %1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll index 1f1e9aa890d45..4ebf3018629bd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll @@ -28,7 +28,7 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" ;CHECK-NEXT: %[[VMASK:.+]] = icmp ugt <8 x i32> %[[VECIND]], %{{broadcast.splat*}} ;CHECK-NEXT: %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], ;CHECK-NEXT: %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0 -;CHECK-NEXT: br i1 %[[M]], label %pred.load.if, label %pred.load.continue +;CHECK-NEXT: br i1 %[[M]], label %pred.store.if, label %pred.store.continue ;CHECK-NOT: %{{.+}} = load <16 x i8>, <16 x i8>* %{{.*}}, align 1 define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr { diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index 65459284ecffa..0ba9b207c1f66 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -64,14 +64,18 @@ for.end: ; instead scalarized if Cost-Model so decided as part of its ; sink-scalar-operands optimization for predicated instructions. ; -; SINK-GATHER: vector.body: -; SINK-GATHER: pred.load.if: -; SINK-GATHER: %[[T0:.+]] = load i32, i32* %{{.*}}, align 4 -; SINK-GATHER: pred.load.continue: -; SINK-GATHER: %[[T1:.+]] = phi i32 [ poison, %vector.body ], [ %[[T0]], %pred.load.if ] -; SINK-GATHER: pred.udiv.if: -; SINK-GATHER: %{{.*}} = udiv i32 %[[T1]], %{{.*}} -; SINK-GATHER: pred.udiv.continue: +; SINK-GATHER-LABEL: @scalarize_and_sink_gather +; SINK-GATHER: vector.body: +; SINK-GATHER-LABEL: pred.udiv.if: ; preds = %vector.body +; SINK-GATHER-NEXT: [[EXT:%.+]] = extractelement <8 x i64> {{.*}}, i32 0 +; SINK-GATHER-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, i32* %a, i64 [[EXT]] +; SINK-GATHER-NEXT: [[LV:%.+]] = load i32, i32* [[GEP]], align 4 +; SINK-GATHER-NEXT: [[UDIV:%.+]] = udiv i32 [[LV]], %x +; SINK-GATHER-NEXT: [[INS:%.+]] = insertelement <8 x i32> poison, i32 [[UDIV]], i32 0 +; SINK-GATHER-NEXT: br label %pred.udiv.continue +; SINK-GATHER: pred.udiv.continue: +; SINK-GATHER-NEXT: phi i32 [ poison, %vector.body ], [ [[LV]], %pred.udiv.if ] +; SINK-GATHER-NEXT: phi <8 x i32> [ poison, %vector.body ], [ [[INS]], %pred.udiv.if ] define i32 @scalarize_and_sink_gather(i32* %a, i1 %c, i32 %x, i64 %n) { entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index d38fc97226a48..ed0cc712d2c51 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -90,40 +90,26 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> -; CHECK-NEXT: Successor(s): pred.srem -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem: { -; CHECK-NEXT: pred.srem.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%3> -; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue -; CHECK-NEXT: CondBit: vp<%3> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> -; CHECK-NEXT: Successor(s): pred.srem.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.0.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.0.split: -; CHECK-NEXT: Successor(s): pred.store +; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { -; CHECK-NEXT: pred.store.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%3> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue -; CHECK-NEXT: CondBit: vp<%3> (loop) +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%3> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: -; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%add> = add vp<%6>, ir<%recur.next> +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> +; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 @@ -241,45 +227,30 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, i8* ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: ; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6> -; CHECK-NEXT: Successor(s): pred.srem -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem: { -; CHECK-NEXT: pred.srem.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%3> -; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue -; CHECK-NEXT: CondBit: vp<%3> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V) -; CHECK-NEXT: Successor(s): pred.srem.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.srem.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1.split -; CHECK-EMPTY: -; CHECK-NEXT: loop.1.split: + +; CHECK: loop.1.split: ; CHECK-NEXT: Successor(s): pred.load -; CHECK-EMPTY: -; CHECK-NEXT: pred.load: { + +; CHECK: pred.load: { ; CHECK-NEXT: pred.load.entry: ; CHECK-NEXT: BRANCH-ON-MASK vp<%3> ; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-NEXT: CondBit: vp<%3> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.load.if: + +; CHECK: pred.load.if: +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V) ; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V) ; CHECK-NEXT: Successor(s): pred.load.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.load.continue: + +; CHECK: pred.load.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem> ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): loop.2 -; CHECK-EMPTY: -; CHECK-NEXT: loop.2: -; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%9> + +; CHECK: loop.2: +; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<%10> ; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<%11> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2> ; CHECK-NEXT: No successors @@ -338,21 +309,6 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 ; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem> ; CHECK-NEXT: No successors ; CHECK-NEXT: } - -; CHECK: pred.sdiv: { -; CHECK-NEXT: pred.sdiv.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%3> -; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue -; CHECK-NEXT: CondBit: vp<%3> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.sdiv.if: -; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6> -; CHECK-NEXT: Successor(s): pred.sdiv.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.sdiv.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem.div> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1.split ; CHECK-EMPTY: ; CHECK-NEXT: loop.1.split: @@ -365,15 +321,18 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> -; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep> +; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%rem.div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } - -; CHECK: loop.2: +; CHECK-NEXT: Successor(s): loop.2 +; CHECK-EMPTY: +; CHECK-NEXT: loop.2: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index b8d9b458aa4c3..362a0ed690d42 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -5,9 +5,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; Test predication of non-void instructions, specifically (i) that these ; instructions permit vectorization and (ii) the creation of an insertelement -; and a Phi node. We check the full 2-element sequence for the first -; instruction; For the rest we'll just make sure they get predicated based -; on the code generated for the first element. +; and a Phi node. We check the full 2-element sequence for all predicate instructions. define void @test(i32* nocapture %asd, i32* nocapture %aud, i32* nocapture %asr, i32* nocapture %aur) { entry: @@ -25,53 +23,50 @@ for.cond.cleanup: ; preds = %if.end ; CHECK: %[[SDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0 ; CHECK: %[[SD0:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0]], %[[SDA1]] ; CHECK: %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SD0]], i32 0 -; CHECK: br label %[[ESD]] -; CHECK: [[ESD]]: -; CHECK: %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[SD1]], %[[CSD]] ] -; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1 -; CHECK: br i1 %[[SDEEH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]] -; CHECK: [[CSDH]]: -; CHECK: %[[SDA0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 -; CHECK: %[[SDA1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 -; CHECK: %[[SD0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0H]], %[[SDA1H]] -; CHECK: %[[SD1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> %[[SDR]], i32 %[[SD0H]], i32 1 -; CHECK: br label %[[ESDH]] -; CHECK: [[ESDH]]: -; CHECK: %{{.*}} = phi <2 x i32> [ %[[SDR]], %[[ESD]] ], [ %[[SD1H]], %[[CSDH]] ] - -; CHECK: %[[UDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0 -; CHECK: br i1 %[[UDEE]], label %[[CUD:[a-zA-Z0-9.]+]], label %[[EUD:[a-zA-Z0-9.]+]] -; CHECK: [[CUD]]: ; CHECK: %[[UDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0 ; CHECK: %[[UDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0 ; CHECK: %[[UD0:[a-zA-Z0-9]+]] = udiv i32 %[[UDA0]], %[[UDA1]] ; CHECK: %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UD0]], i32 0 -; CHECK: br label %[[EUD]] -; CHECK: [[EUD]]: -; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UD1]], %[[CUD]] ] - -; CHECK: %[[SREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0 -; CHECK: br i1 %[[SREE]], label %[[CSR:[a-zA-Z0-9.]+]], label %[[ESR:[a-zA-Z0-9.]+]] -; CHECK: [[CSR]]: ; CHECK: %[[SRA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0 ; CHECK: %[[SRA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0 ; CHECK: %[[SR0:[a-zA-Z0-9]+]] = srem i32 %[[SRA0]], %[[SRA1]] ; CHECK: %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SR0]], i32 0 -; CHECK: br label %[[ESR]] -; CHECK: [[ESR]]: -; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[SR1]], %[[CSR]] ] - -; CHECK: %[[UREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0 -; CHECK: br i1 %[[UREE]], label %[[CUR:[a-zA-Z0-9.]+]], label %[[EUR:[a-zA-Z0-9.]+]] -; CHECK: [[CUR]]: ; CHECK: %[[URA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0 ; CHECK: %[[URA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0 ; CHECK: %[[UR0:[a-zA-Z0-9]+]] = urem i32 %[[URA0]], %[[URA1]] ; CHECK: %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UR0]], i32 0 -; CHECK: br label %[[EUR]] -; CHECK: [[EUR]]: -; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UR1]], %[[CUR]] ] - +; CHECK: br label %[[ESD]] +; CHECK: [[ESD]]: +; CHECK: [[SDR:%[a-zA-Z0-9]+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[SD1]], %[[CSD]] ] +; CHECK: [[UDR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UD1]], %[[CSD]] ] +; CHECK: [[SRR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[SR1]], %[[CSD]] ] +; CHECK: [[URR:%.+]] = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UR1]], %[[CSD]] ] +; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1 +; CHECK: br i1 %[[SDEEH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]] +; CHECK: [[CSDH]]: +; CHECK: %[[SD1_A0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 +; CHECK: %[[SD1_A1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 +; CHECK: %[[SD1_0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SD1_A0H]], %[[SD1_A1H]] +; CHECK: %[[SD1_1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[SDR]], i32 %[[SD1_0H]], i32 1 +; CHECK: %[[UD1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 +; CHECK: %[[UD1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 +; CHECK: %[[UD1_0:[a-zA-Z0-9]+]] = udiv i32 %[[UD1_A0]], %[[UD1_A1]] +; CHECK: %[[UD1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[UDR]], i32 %[[UD1_0]], i32 1 +; CHECK: %[[SR1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 +; CHECK: %[[SR1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 +; CHECK: %[[SR1_0:[a-zA-Z0-9]+]] = srem i32 %[[SR1_A0]], %[[SR1_A1]] +; CHECK: %[[SR1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[SRR]], i32 %[[SR1_0]], i32 1 +; CHECK: %[[UR1_A0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 +; CHECK: %[[UR1_A1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1 +; CHECK: %[[UR1_0:[a-zA-Z0-9]+]] = urem i32 %[[UR1_A0]], %[[UR1_A1]] +; CHECK: %[[UR1_1:[a-zA-Z0-9]+]] = insertelement <2 x i32> [[URR]], i32 %[[UR1_0]], i32 1 +; CHECK: br label %[[ESDH]] +; CHECK: [[ESDH]]: +; CHECK: [[SDR1:%[a-zA-Z0-9]+]] = phi <2 x i32> [ [[SDR]], %[[ESD]] ], [ %[[SD1_1H]], %[[CSDH]] ] +; CHECK: [[UDR1:%.+]] = phi <2 x i32> [ [[UDR]], %{{.*}} ], [ %[[UD1_1]], %[[CSDH]] ] +; CHECK: [[SRR1:%.+]] = phi <2 x i32> [ [[SRR]], %{{.*}} ], [ %[[SR1_1]], %[[CSDH]] ] +; CHECK: [[URR1:%.+]] = phi <2 x i32> [ [[URR]], %{{.*}} ], [ %[[UR1_1]], %[[CSDH]] ] +; for.body: ; preds = %if.end, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ] %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index b1136689285e5..90f3323941c3b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -96,100 +96,80 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND15]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]]) -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP45]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP47]]) -; CHECK-NEXT: [[TMP49:%.*]] = add i32 [[TMP48]], [[TMP46]] -; CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP43]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP50]]) -; CHECK-NEXT: [[TMP52]] = add i32 [[TMP51]], [[TMP49]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], [[TMP42]] +; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]]) +; CHECK-NEXT: [[TMP48]] = add i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], -; CHECK-NEXT: [[TMP53:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP53]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -197,7 +177,7 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: .lr.ph: ; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP52]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -321,100 +301,80 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND15]], <4 x i32> -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP44]]) -; CHECK-NEXT: [[TMP46:%.*]] = mul i32 [[TMP45]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP23]], <4 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP47]]) -; CHECK-NEXT: [[TMP49:%.*]] = mul i32 [[TMP48]], [[TMP46]] -; CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP43]], <4 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP50]]) -; CHECK-NEXT: [[TMP52]] = mul i32 [[TMP51]], [[TMP49]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP42]] +; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) +; CHECK-NEXT: [[TMP48]] = mul i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], -; CHECK-NEXT: [[TMP53:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP53]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -422,7 +382,7 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: .lr.ph: ; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP52]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] ; entry: @@ -456,98 +416,78 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP23]] -; CHECK-NEXT: [[TMP45:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND15]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP45]]) -; CHECK-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP44]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP48]]) -; CHECK-NEXT: [[TMP50]] = add i32 [[TMP49]], [[TMP47]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]]) +; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], -; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], +; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -555,7 +495,7 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: .lr.ph: ; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP46]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -589,95 +529,75 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP23]], <4 x i32> -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP44]]) -; CHECK-NEXT: [[TMP46:%.*]] = mul i32 [[TMP45]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP43]], <4 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP47]]) -; CHECK-NEXT: [[TMP49]] = mul i32 [[TMP48]], [[TMP46]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) +; CHECK-NEXT: [[TMP45]] = mul i32 [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -685,7 +605,7 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) ; CHECK: .lr.ph: ; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: ._crit_edge: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -717,95 +637,75 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP23]], <4 x i32> -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP44]]) -; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP43]], <4 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP47]]) -; CHECK-NEXT: [[TMP49]] = and i32 [[TMP48]], [[TMP46]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]]) +; CHECK-NEXT: [[TMP45]] = and i32 [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -813,7 +713,7 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -845,93 +745,73 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP23]] -; CHECK-NEXT: [[TMP45:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP44]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP45]]) -; CHECK-NEXT: [[TMP47]] = or i32 [[TMP46]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]]) +; CHECK-NEXT: [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -939,7 +819,7 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP47]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -971,93 +851,73 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP23]] -; CHECK-NEXT: [[TMP45:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP44]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP45]]) -; CHECK-NEXT: [[TMP47]] = xor i32 [[TMP46]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]]) +; CHECK-NEXT: [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1065,7 +925,7 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP47]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -1097,95 +957,75 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load float, float* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load float, float* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> poison, float [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load float, float* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load float, float* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x float> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP23]], <4 x float> zeroinitializer -; CHECK-NEXT: [[TMP45:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP44]]) -; CHECK-NEXT: [[TMP46:%.*]] = fadd float [[TMP45]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP43]], <4 x float> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP47]]) -; CHECK-NEXT: [[TMP49]] = fadd float [[TMP48]], [[TMP46]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer +; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = fadd float [[TMP41]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP43]]) +; CHECK-NEXT: [[TMP45]] = fadd float [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1193,7 +1033,7 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] ; entry: @@ -1225,95 +1065,75 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE14]] ] -; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP16]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP25]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP20]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = load float, float* [[TMP32]], align 4 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = load float, float* [[TMP35]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] -; CHECK: pred.load.if7: -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP25]], align 4 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> poison, float [[TMP26]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: -; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] -; CHECK: pred.load.if9: -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[TMP30]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP31]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] -; CHECK: pred.load.continue10: -; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] -; CHECK: pred.load.if11: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP36:%.*]] = load float, float* [[TMP35]], align 4 -; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP36]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] -; CHECK: pred.load.continue12: -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.if13: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = load float, float* [[TMP40]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP41]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] -; CHECK: pred.load.continue14: -; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x float> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP23]], <4 x float> -; CHECK-NEXT: [[TMP45:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP44]]) -; CHECK-NEXT: [[TMP46:%.*]] = fmul float [[TMP45]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP43]], <4 x float> -; CHECK-NEXT: [[TMP48:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP47]]) -; CHECK-NEXT: [[TMP49]] = fmul float [[TMP48]], [[TMP46]] +; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> +; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = fmul float [[TMP41]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]]) +; CHECK-NEXT: [[TMP45]] = fmul float [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1321,7 +1141,7 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index d8b323406d533..452c4f7ae0489 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -32,7 +32,7 @@ entry: define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) { ; CHECK-LABEL: @reduction_sum( -; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP47:%.*]], %pred.load.continue14 ] +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP47:%.*]], %pred.load.continue6 ] ; CHECK: [[TMP44:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND:%.*]] ; CHECK: [[TMP45:%.*]] = add <4 x i32> [[TMP44]], [[TMP23:%.*]] ; CHECK: [[TMP46:%.*]] = add <4 x i32> [[TMP45]], [[TMP43:%.*]] @@ -65,7 +65,7 @@ entry: define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) { ; CHECK-LABEL: @reduction_prod( ; CHECK: vector.body: -; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ] ; CHECK: [[TMP44:%.*]] = mul <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] ; CHECK: [[TMP45:%.*]] = mul <4 x i32> [[TMP44]], [[TMP43:%.*]] ; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] @@ -96,7 +96,7 @@ entry: define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { ; CHECK-LABEL: @reduction_and( ; CHECK: vector.body: -; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ] ; CHECK: [[TMP44:%.*]] = and <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] ; CHECK: [[TMP45:%.*]] = and <4 x i32> [[TMP44]], [[TMP43:%.*]] ; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] @@ -127,7 +127,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { ; CHECK-LABEL: @reduction_or( ; CHECK: vector.body: -; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ] ; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer ; CHECK: [[TMP46]] = or <4 x i32> [[VEC_PHI]], [[TMP45]] ; CHECK: middle.block: @@ -157,7 +157,7 @@ for.end: ; preds = %for.body, %entry define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { ; CHECK-LABEL: @reduction_xor( ; CHECK: vector.body: -; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ] ; CHECK: [[TMP45:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP44:%.*]], <4 x i32> zeroinitializer ; CHECK: [[TMP46]] = xor <4 x i32> [[VEC_PHI]], [[TMP45]] ; CHECK: middle.block: @@ -187,7 +187,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK-LABEL: @reduction_fadd( ; CHECK: vector.body: -; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] +; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ] ; CHECK: [[TMP44:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]] ; CHECK: [[TMP45:%.*]] = fadd fast <4 x float> [[TMP44]], [[TMP43:%.*]] ; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] @@ -218,7 +218,7 @@ for.end: ; preds = %for.body, %entry define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { ; CHECK-LABEL: @reduction_fmul( ; CHECK: vector.body: -; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ , %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] +; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ , %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue6 ] ; CHECK: [[TMP44:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]] ; CHECK: [[TMP45:%.*]] = fmul fast <4 x float> [[TMP44]], [[TMP43:%.*]] ; CHECK: [[TMP46]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index c7156fd3f70dc..934d55d92ae5c 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -14,23 +14,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: loop: ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0> -; CHECK-NEXT: Successor(s): pred.load - -; CHECK: pred.load: { -; CHECK-NEXT: pred.load.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%2> -; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue -; CHECK-NEXT: CondBit: vp<%2> (loop) - -; CHECK: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> -; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> -; CHECK-NEXT: Successor(s): pred.load.continue - -; CHECK: pred.load.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.b> -; CHECK-NEXT: No successors -; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): loop.0 ; CHECK: loop.0: ; CHECK-NEXT: Successor(s): pred.store @@ -42,13 +26,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: CondBit: vp<%2> (loop) ; CHECK: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%add> = add vp<%5>, ir<10> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> +; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10> ; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> ; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.b> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -615,61 +602,12 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> -; CHECK-NEXT: Successor(s): pred.load -; CHECK-EMPTY: -; CHECK-NEXT: pred.load: { -; CHECK-NEXT: pred.load.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%2> -; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue -; CHECK-NEXT: CondBit: vp<%2> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a> -; CHECK-NEXT: Successor(s): pred.load.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.load.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.a> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: Successor(s): pred.load -; CHECK-EMPTY: -; CHECK-NEXT: pred.load: { -; CHECK-NEXT: pred.load.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%2> -; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue -; CHECK-NEXT: CondBit: vp<%2> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> -; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> -; CHECK-NEXT: Successor(s): pred.load.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.load.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%lv.b> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: -; CHECK-NEXT: Successor(s): pred.store -; CHECK-EMPTY: -; CHECK-NEXT: pred.store: { -; CHECK-NEXT: pred.store.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%2> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue -; CHECK-NEXT: CondBit: vp<%2> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, ir<%iv> -; CHECK-NEXT: REPLICATE store vp<%5>, ir<%gep.c> -; CHECK-NEXT: Successor(s): pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: @@ -682,10 +620,17 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: CondBit: vp<%2> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep.a> +; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a> +; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> +; CHECK-NEXT: REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, ir<%iv> +; CHECK-NEXT: REPLICATE store ir<%lv.a>, ir<%gep.c> +; CHECK-NEXT: REPLICATE store ir<%lv.b>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%lv.a> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.b> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.3 @@ -695,7 +640,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0: -; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%5>, vp<%8> +; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%10>, vp<%11> ; CHECK-NEXT: EMIT vp<%14> = select vp<%2> ir<%c.0> ir ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -764,41 +709,9 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<%2> = icmp ule ir<%iv> vp<%0> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> -; CHECK-NEXT: Successor(s): pred.load -; CHECK-EMPTY: -; CHECK-NEXT: pred.load: { -; CHECK-NEXT: pred.load.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%2> -; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue -; CHECK-NEXT: CondBit: vp<%2> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.load.if: -; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a> -; CHECK-NEXT: Successor(s): pred.load.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.load.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.a> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: Successor(s): pred.sdiv -; CHECK-EMPTY: -; CHECK-NEXT: pred.sdiv: { -; CHECK-NEXT: pred.sdiv.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%2> -; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue -; CHECK-NEXT: CondBit: vp<%2> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.sdiv.if: -; CHECK-NEXT: REPLICATE ir<%div> = sdiv vp<%5>, vp<%5> -; CHECK-NEXT: Successor(s): pred.sdiv.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.sdiv.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%div> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: @@ -811,10 +724,14 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: CondBit: vp<%2> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store vp<%7>, ir<%gep.a> +; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a> +; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%lv.a>, ir<%lv.a> +; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%lv.a> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.2 @@ -871,22 +788,6 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: Successor(s): pred.sdiv -; CHECK-EMPTY: -; CHECK-NEXT: pred.sdiv: { -; CHECK-NEXT: pred.sdiv.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<%3> -; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue -; CHECK-NEXT: CondBit: vp<%3> (loop) -; CHECK-EMPTY: -; CHECK-NEXT: pred.sdiv.if: -; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6> -; CHECK-NEXT: Successor(s): pred.sdiv.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.sdiv.continue: -; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%div> -; CHECK-NEXT: No successors -; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: @@ -899,12 +800,21 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: CondBit: vp<%3> (loop) ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep.a> +; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%for>, vp<%6> +; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%div> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): loop.2 +; CHECK-EMPTY: +; CHECK-NEXT: loop.2: +; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> +; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: No successors +; CHECK-NEXT: } ; entry: br label %loop From f99672568fda6a9bc1ee4f09d5d84066f4979889 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 28 Jun 2021 11:26:10 +0100 Subject: [PATCH 023/619] [LoopVectorize] Fix strict reductions where VF = 1 Currently we will allow loops with a fixed width VF of 1 to vectorize if the -enable-strict-reductions flag is set. However, the loop vectorizer will not use ordered reductions if `VF.isScalar()` and the resulting vectorized loop will be out of order. This patch removes `VF.isVector()` when checking if ordered reductions should be used. Also, instead of converting the FAdds to reductions if the VF = 1, operands of the FAdds are changed such that the order is preserved. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D104533 --- .../Vectorize/LoopVectorizationPlanner.h | 3 +- .../Transforms/Vectorize/LoopVectorize.cpp | 25 +++-- .../LoopVectorize/AArch64/strict-fadd.ll | 97 ++++++++++++++++--- 3 files changed, 103 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 2213e73b21e4f..5c4c4fdfa3f76 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -356,7 +356,8 @@ class LoopVectorizationPlanner { /// reductions, with one operand being vector and the other being the scalar /// reduction chain. void adjustRecipesForInLoopReductions(VPlanPtr &Plan, - VPRecipeBuilder &RecipeBuilder); + VPRecipeBuilder &RecipeBuilder, + ElementCount MinVF); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cec0cf2ad6c0f..e609fddbe1220 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4344,8 +4344,7 @@ void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, // any loop invariant values. BasicBlock *VectorLoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); - bool IsOrdered = State.VF.isVector() && IsInLoopReductionPhi && - Cost->useOrderedReductions(RdxDesc); + bool IsOrdered = IsInLoopReductionPhi && Cost->useOrderedReductions(RdxDesc); for (unsigned Part = 0; Part < UF; ++Part) { if (IsOrdered && Part > 0) @@ -4759,8 +4758,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, Type *VecTy = ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); - bool IsOrdered = State.VF.isVector() && - Cost->isInLoopReduction(cast(PN)) && + bool IsOrdered = Cost->isInLoopReduction(cast(PN)) && Cost->useOrderedReductions(*RdxDesc); unsigned LastPartForNewPhi = IsOrdered ? 1 : State.UF; for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { @@ -9280,8 +9278,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( } // Adjust the recipes for any inloop reductions. - if (Range.Start.isVector()) - adjustRecipesForInLoopReductions(Plan, RecipeBuilder); + adjustRecipesForInLoopReductions(Plan, RecipeBuilder, Range.Start); // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. @@ -9356,12 +9353,15 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // reductions, with one operand being vector and the other being the scalar // reduction chain. void LoopVectorizationPlanner::adjustRecipesForInLoopReductions( - VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder) { + VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) { for (auto &Reduction : CM.getInLoopReductionChains()) { PHINode *Phi = Reduction.first; RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi]; const SmallVector &ReductionOperations = Reduction.second; + if (MinVF.isScalar() && !CM.useOrderedReductions(RdxDesc)) + continue; + // ReductionOperations are orders top-down from the phi's use to the // LoopExitValue. We keep a track of the previous item (the Chain) to tell // which of the two operands will remain scalar and which will be reduced. @@ -9378,7 +9378,7 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions( "Expected to replace a VPWidenSelectSC"); FirstOpId = 1; } else { - assert(isa(WidenRecipe) && + assert((MinVF.isScalar() || isa(WidenRecipe)) && "Expected to replace a VPWidenSC"); FirstOpId = 0; } @@ -9527,8 +9527,13 @@ void VPReductionRecipe::execute(VPTransformState &State) { Value *NewRed; Value *NextInChain; if (IsOrdered) { - NewRed = createOrderedReduction(State.Builder, *RdxDesc, NewVecOp, - PrevInChain); + if (State.VF.isVector()) + NewRed = createOrderedReduction(State.Builder, *RdxDesc, NewVecOp, + PrevInChain); + else + NewRed = State.Builder.CreateBinOp( + (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), + PrevInChain, NewVecOp); PrevInChain = NewRed; } else { PrevInChain = State.get(getChainOp(), Part); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll index 9e402f964c781..f2d5f426103ee 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -693,14 +693,89 @@ for.end: ret float %add6 } -!0 = distinct !{!0, !4, !7, !9} -!1 = distinct !{!1, !4, !8, !9} -!2 = distinct !{!2, !5, !7, !9} -!3 = distinct !{!3, !6, !7, !9, !10} -!4 = !{!"llvm.loop.vectorize.width", i32 8} -!5 = !{!"llvm.loop.vectorize.width", i32 4} -!6 = !{!"llvm.loop.vectorize.width", i32 2} -!7 = !{!"llvm.loop.interleave.count", i32 1} -!8 = !{!"llvm.loop.interleave.count", i32 4} -!9 = !{!"llvm.loop.vectorize.enable", i1 true} -!10 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} +; Test reductions for a VF of 1 and a UF > 1. +define float @fadd_scalar_vf(float* noalias nocapture readonly %a, i64 %n) { +; CHECK-ORDERED-LABEL: @fadd_scalar_vf +; CHECK-ORDERED: vector.body +; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, {{.*}} ], [ %[[FADD4:.*]], %vector.body ] +; CHECK-ORDERED: %[[LOAD1:.*]] = load float, float* +; CHECK-ORDERED: %[[LOAD2:.*]] = load float, float* +; CHECK-ORDERED: %[[LOAD3:.*]] = load float, float* +; CHECK-ORDERED: %[[LOAD4:.*]] = load float, float* +; CHECK-ORDERED: %[[FADD1:.*]] = fadd float %[[VEC_PHI]], %[[LOAD1]] +; CHECK-ORDERED: %[[FADD2:.*]] = fadd float %[[FADD1]], %[[LOAD2]] +; CHECK-ORDERED: %[[FADD3:.*]] = fadd float %[[FADD2]], %[[LOAD3]] +; CHECK-ORDERED: %[[FADD4]] = fadd float %[[FADD3]], %[[LOAD4]] +; CHECK-ORDERED-NOT: call float @llvm.vector.reduce.fadd +; CHECK-ORDERED: scalar.ph +; CHECK-ORDERED: %[[MERGE_RDX:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[FADD4]], %middle.block ] +; CHECK-ORDERED: for.body +; CHECK-ORDERED: %[[SUM_PHI:.*]] = phi float [ %[[MERGE_RDX]], %scalar.ph ], [ %[[FADD5:.*]], %for.body ] +; CHECK-ORDERED: %[[LOAD5:.*]] = load float, float* +; CHECK-ORDERED: %[[FADD5]] = fadd float %[[LOAD5]], %[[SUM_PHI]] +; CHECK-ORDERED: for.end +; CHECK-ORDERED: %[[RES_PHI:.*]] = phi float [ %[[FADD5]], %for.body ], [ %[[FADD4]], %middle.block ] +; CHECK-ORDERED: ret float %[[RES_PHI]] + +; CHECK-UNORDERED-LABEL: @fadd_scalar_vf +; CHECK-UNORDERED: vector.body +; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[FADD1:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi float [ -0.000000e+00, %vector.ph ], [ %[[FADD2:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi float [ -0.000000e+00, %vector.ph ], [ %[[FADD3:.*]], %vector.body ] +; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi float [ -0.000000e+00, %vector.ph ], [ %[[FADD4:.*]], %vector.body ] +; CHECK-UNORDERED: %[[LOAD1:.*]] = load float, float* +; CHECK-UNORDERED: %[[LOAD2:.*]] = load float, float* +; CHECK-UNORDERED: %[[LOAD3:.*]] = load float, float* +; CHECK-UNORDERED: %[[LOAD4:.*]] = load float, float* +; CHECK-UNORDERED: %[[FADD1]] = fadd float %[[LOAD1]], %[[VEC_PHI1]] +; CHECK-UNORDERED: %[[FADD2]] = fadd float %[[LOAD2]], %[[VEC_PHI2]] +; CHECK-UNORDERED: %[[FADD3]] = fadd float %[[LOAD3]], %[[VEC_PHI3]] +; CHECK-UNORDERED: %[[FADD4]] = fadd float %[[LOAD4]], %[[VEC_PHI4]] +; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd +; CHECK-UNORDERED: middle.block +; CHECK-UNORDERED: %[[BIN_RDX1:.*]] = fadd float %[[FADD2]], %[[FADD1]] +; CHECK-UNORDERED: %[[BIN_RDX2:.*]] = fadd float %[[FADD3]], %[[BIN_RDX1]] +; CHECK-UNORDERED: %[[BIN_RDX3:.*]] = fadd float %[[FADD4]], %[[BIN_RDX2]] +; CHECK-UNORDERED: scalar.ph +; CHECK-UNORDERED: %[[MERGE_RDX:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[BIN_RDX3]], %middle.block ] +; CHECK-UNORDERED: for.body +; CHECK-UNORDERED: %[[SUM_PHI:.*]] = phi float [ %[[MERGE_RDX]], %scalar.ph ], [ %[[FADD5:.*]], %for.body ] +; CHECK-UNORDERED: %[[LOAD5:.*]] = load float, float* +; CHECK-UNORDERED: %[[FADD5]] = fadd float %[[LOAD5]], %[[SUM_PHI]] +; CHECK-UNORDERED: for.end +; CHECK-UNORDERED: %[[RES_PHI:.*]] = phi float [ %[[FADD5]], %for.body ], [ %[[BIN_RDX3]], %middle.block ] +; CHECK-UNORDERED: ret float %[[RES_PHI]] + +; CHECK-NOT-VECTORIZED-LABEL: @fadd_scalar_vf +; CHECK-NOT-VECTORIZED-NOT: @vector.body + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %a, i64 %iv + %0 = load float, float* %arrayidx, align 4 + %add = fadd float %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !4 + +for.end: + ret float %add +} + +!0 = distinct !{!0, !5, !9, !11} +!1 = distinct !{!1, !5, !10, !11} +!2 = distinct !{!2, !6, !9, !11} +!3 = distinct !{!3, !7, !9, !11, !12} +!4 = distinct !{!4, !8, !10, !11} +!5 = !{!"llvm.loop.vectorize.width", i32 8} +!6 = !{!"llvm.loop.vectorize.width", i32 4} +!7 = !{!"llvm.loop.vectorize.width", i32 2} +!8 = !{!"llvm.loop.vectorize.width", i32 1} +!9 = !{!"llvm.loop.interleave.count", i32 1} +!10 = !{!"llvm.loop.interleave.count", i32 4} +!11 = !{!"llvm.loop.vectorize.enable", i1 true} +!12 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} From 24af0990de255d3862e4c4ed73c00872ba67647e Mon Sep 17 00:00:00 2001 From: James Henderson Date: Thu, 11 Feb 2021 15:41:32 +0000 Subject: [PATCH 024/619] [cross-project-tests] Rename vars to make sense for new directory name Differential Revision: https://reviews.llvm.org/D96509 Reviewed by: aprantl --- cross-project-tests/CMakeLists.txt | 26 ++++++++++++++------------ cross-project-tests/lit.cfg.py | 16 +++++----------- cross-project-tests/lit.site.cfg.py.in | 6 +++--- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt index dfa34fe15c795..22c6ef0785344 100644 --- a/cross-project-tests/CMakeLists.txt +++ b/cross-project-tests/CMakeLists.txt @@ -1,4 +1,6 @@ -# Debug Info tests. These tests invoke clang to generate programs with +# Cross project tests, for tests that require access to multiple projects across +# LLVM (e.g. clang, lld and lldb). +# The subset inside debuginfo-tests invoke clang to generate programs with # various types of debug info, and then run those programs under a debugger # such as GDB or LLDB to verify the results. @@ -9,10 +11,10 @@ add_llvm_executable(check-gdb-llvm-support ) target_link_libraries(check-gdb-llvm-support PRIVATE LLVMSupport) -set(DEBUGINFO_TESTS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(DEBUGINFO_TESTS_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) +set(CROSS_PROJECT_TESTS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(CROSS_PROJECT_TESTS_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) -set(DEBUGINFO_TEST_DEPS +set(CROSS_PROJECT_TEST_DEPS clang FileCheck count @@ -30,28 +32,28 @@ if ("mlir" IN_LIST LLVM_ENABLE_PROJECTS) ${LLVM_EXTERNAL_MLIR_SOURCE_DIR}/include ${LLVM_BINARY_DIR}/tools/mlir/include) target_link_libraries(check-gdb-mlir-support PRIVATE MLIRIR) - list(APPEND DEBUGINFO_TEST_DEPS check-gdb-mlir-support) + list(APPEND CROSS_PROJECT_TEST_DEPS check-gdb-mlir-support) set(MLIR_SOURCE_DIR ${LLVM_EXTERNAL_MLIR_SOURCE_DIR}) endif() if("compiler-rt" IN_LIST LLVM_ENABLE_PROJECTS) # llgdb-tests/asan.c and other asan* files. if(TARGET asan) - list(APPEND DEBUGINFO_TEST_DEPS asan) + list(APPEND CROSS_PROJECT_TEST_DEPS asan) endif() # llgdb-tests/safestack.c if(TARGET safestack) - list(APPEND DEBUGINFO_TEST_DEPS safestack) + list(APPEND CROSS_PROJECT_TEST_DEPS safestack) endif() endif() # Many dexter tests depend on lldb. if("lldb" IN_LIST LLVM_ENABLE_PROJECTS) - list(APPEND DEBUGINFO_TEST_DEPS lldb lldb-server) + list(APPEND CROSS_PROJECT_TEST_DEPS lldb lldb-server) endif() # The Windows builder scripts pass -fuse-ld=lld. if (WIN32) - set(DEBUGINFO_TEST_DEPS ${DEBUGINFO_TEST_DEPS} lld) + list(APPEND CROSS_PROJECT_TEST_DEPS lld) endif() configure_lit_site_cfg( @@ -63,12 +65,12 @@ configure_lit_site_cfg( add_lit_testsuite(check-debuginfo "Running cross-project tests" ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${DEBUGINFO_TEST_DEPS} + DEPENDS ${CROSS_PROJECT_TEST_DEPS} ) # Add check-debuginfo-* targets. -add_lit_testsuites(DEBUGINFO ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS ${DEBUGINFO_TEST_DEPS} +add_lit_testsuites(CROSS_PROJECT ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS ${CROSS_PROJECT_TEST_DEPS} ) set_target_properties(check-debuginfo PROPERTIES FOLDER "Tests") diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index aefe0303b764a..9cab3fc6a14c4 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -1,5 +1,3 @@ -# -*- Python -*- - import os import platform import re @@ -15,12 +13,9 @@ # Configuration file for the 'lit' test runner. # name: The name of this test suite. -config.name = 'debuginfo-tests' +config.name = 'cross-project-tests' # testFormat: The test format to use to interpret tests. -# -# For now we require '&&' between commands, until they get globally killed and -# the test runner updated. config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) # suffixes: A list of file extensions to treat as test files. @@ -32,16 +27,16 @@ config.excludes = ['Inputs'] # test_source_root: The root path where tests are located. -config.test_source_root = config.debuginfo_tests_src_root +config.test_source_root = config.cross_project_tests_src_root # test_exec_root: The root path where tests should be run. -config.test_exec_root = config.debuginfo_tests_obj_root +config.test_exec_root = config.cross_project_tests_obj_root llvm_config.use_default_substitutions() tools = [ ToolSubst('%test_debuginfo', command=os.path.join( - config.debuginfo_tests_src_root, 'debuginfo-tests', + config.cross_project_tests_src_root, 'debuginfo-tests', 'llgdb-tests', 'test_debuginfo.pl')), ToolSubst("%llvm_src_root", config.llvm_src_root), ToolSubst("%llvm_tools_dir", config.llvm_tools_dir), @@ -126,7 +121,7 @@ def can_target_host(): # Produce dexter path, lldb path, and combine into the %dexter substitution # for running a test. -dexter_path = os.path.join(config.debuginfo_tests_src_root, +dexter_path = os.path.join(config.cross_project_tests_src_root, 'debuginfo-tests', 'dexter', 'dexter.py') dexter_test_cmd = '"{}" "{}" test'.format(sys.executable, dexter_path) if lldb_path is not None: @@ -173,7 +168,6 @@ def can_target_host(): lit.util.usePlatformSdkOnDarwin(config, lit_config) -# available_features: REQUIRES/UNSUPPORTED lit commands look at this list. if platform.system() == 'Darwin': xcode_lldb_vers = subprocess.check_output(['xcrun', 'lldb', '--version']).decode("utf-8") match = re.search('lldb-(\d+)', xcode_lldb_vers) diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in index bd2cadc71d063..eb918ca96d5f0 100644 --- a/cross-project-tests/lit.site.cfg.py.in +++ b/cross-project-tests/lit.site.cfg.py.in @@ -10,8 +10,8 @@ config.llvm_libs_dir = "@LLVM_LIBS_DIR@" config.llvm_shlib_dir = "@SHLIBDIR@" config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" config.llvm_plugin_ext = "@LLVM_PLUGIN_EXT@" -config.debuginfo_tests_obj_root = "@DEBUGINFO_TESTS_BINARY_DIR@" -config.debuginfo_tests_src_root = "@DEBUGINFO_TESTS_SOURCE_DIR@" +config.cross_project_tests_obj_root = "@CROSS_PROJECT_TESTS_BINARY_DIR@" +config.cross_project_tests_src_root = "@CROSS_PROJECT_TESTS_SOURCE_DIR@" config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" config.is_msvc = lit.util.pythonize_bool("@MSVC@") @@ -34,4 +34,4 @@ import lit.llvm lit.llvm.initialize(lit_config, config) # Let the main config do the real work. -lit_config.load_config(config, "@DEBUGINFO_TESTS_SOURCE_DIR@/lit.cfg.py") +lit_config.load_config(config, "@CROSS_PROJECT_TESTS_SOURCE_DIR@/lit.cfg.py") From 4446a72ad2dc2c268015dea4c649fcf4bca7f619 Mon Sep 17 00:00:00 2001 From: James Henderson Date: Tue, 9 Feb 2021 14:57:03 +0000 Subject: [PATCH 025/619] [cross-project-tests] Add lld as a dependency if in LLVM_ENABLE_PROJECTS Differential Revision: https://reviews.llvm.org/D96510 Reviewed by: aprantl --- cross-project-tests/CMakeLists.txt | 2 +- cross-project-tests/lit.cfg.py | 7 +++++-- cross-project-tests/lit.site.cfg.py.in | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt index 22c6ef0785344..57bcc769cd6ca 100644 --- a/cross-project-tests/CMakeLists.txt +++ b/cross-project-tests/CMakeLists.txt @@ -52,7 +52,7 @@ if("lldb" IN_LIST LLVM_ENABLE_PROJECTS) endif() # The Windows builder scripts pass -fuse-ld=lld. -if (WIN32) +if (WIN32 OR "lld" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND CROSS_PROJECT_TEST_DEPS lld) endif() diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index 9cab3fc6a14c4..2df478d29bf8c 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -67,11 +67,14 @@ def get_required_attr(config, attr_name): tools.append(ToolSubst('%cdb', '"%s"' % os.path.join(win_sdk, 'Debuggers', arch, 'cdb.exe'))) -# clang_src_dir is not used by these tests, but is required by -# use_clang(), so set it to "". +# clang_src_dir and lld_src_dir are not used by these tests, but are required by +# use_clang() and use_lld() respectively, so set them to "", if needed. if not hasattr(config, 'clang_src_dir'): config.clang_src_dir = "" llvm_config.use_clang() +if not hasattr(config, 'lld_src_dir'): + config.lld_src_dir = "" +llvm_config.use_lld(required=('lld' in config.llvm_enabled_projects)) if config.llvm_use_sanitizer: # Propagate path to symbolizer for ASan/MSan. diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in index eb918ca96d5f0..ae009ea04b228 100644 --- a/cross-project-tests/lit.site.cfg.py.in +++ b/cross-project-tests/lit.site.cfg.py.in @@ -15,6 +15,7 @@ config.cross_project_tests_src_root = "@CROSS_PROJECT_TESTS_SOURCE_DIR@" config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" config.is_msvc = lit.util.pythonize_bool("@MSVC@") +config.llvm_enabled_projects = "@LLVM_ENABLE_PROJECTS@".split(";") config.mlir_src_root = "@MLIR_SOURCE_DIR@" From 3827600ff3f78b788b910ea1ebc6f86b132b6048 Mon Sep 17 00:00:00 2001 From: James Henderson Date: Tue, 9 Feb 2021 15:19:27 +0000 Subject: [PATCH 026/619] [cross-project-tests] Make clang optional if not in LLVM_ENABLE_PROJECTS Also mark debuginfo_tests as UNSUPPORTED if clang can't be found and remove it from the list of test dependencies if not in LLVM_ENABLE_PROJECTS. Differential Revision: https://reviews.llvm.org/D96511 Reviewed by: aprantl --- cross-project-tests/CMakeLists.txt | 5 ++++- cross-project-tests/debuginfo-tests/lit.local.cfg | 2 ++ cross-project-tests/lit.cfg.py | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 cross-project-tests/debuginfo-tests/lit.local.cfg diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt index 57bcc769cd6ca..a4ae34d70227e 100644 --- a/cross-project-tests/CMakeLists.txt +++ b/cross-project-tests/CMakeLists.txt @@ -15,7 +15,6 @@ set(CROSS_PROJECT_TESTS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(CROSS_PROJECT_TESTS_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(CROSS_PROJECT_TEST_DEPS - clang FileCheck count llvm-config @@ -24,6 +23,10 @@ set(CROSS_PROJECT_TEST_DEPS not ) +if ("clang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND CROSS_PROJECT_TEST_DEPS clang) +endif() + if ("mlir" IN_LIST LLVM_ENABLE_PROJECTS) add_llvm_executable(check-gdb-mlir-support debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp diff --git a/cross-project-tests/debuginfo-tests/lit.local.cfg b/cross-project-tests/debuginfo-tests/lit.local.cfg new file mode 100644 index 0000000000000..62f90a181d630 --- /dev/null +++ b/cross-project-tests/debuginfo-tests/lit.local.cfg @@ -0,0 +1,2 @@ +if 'clang' not in config.available_features: + config.unsupported = True diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index 2df478d29bf8c..9af7361a6633b 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -71,7 +71,8 @@ def get_required_attr(config, attr_name): # use_clang() and use_lld() respectively, so set them to "", if needed. if not hasattr(config, 'clang_src_dir'): config.clang_src_dir = "" -llvm_config.use_clang() +llvm_config.use_clang(required=('clang' in config.llvm_enabled_projects)) + if not hasattr(config, 'lld_src_dir'): config.lld_src_dir = "" llvm_config.use_lld(required=('lld' in config.llvm_enabled_projects)) From 1364750dadbb56032ef73b4d0d8cbc88a51392da Mon Sep 17 00:00:00 2001 From: James Henderson Date: Mon, 8 Feb 2021 15:40:55 +0000 Subject: [PATCH 027/619] [RFC][debuginfo-test] Rename debug-info lit tests for general purposes Discussion thread: https://lists.llvm.org/pipermail/llvm-dev/2021-January/148048.html Move debuginfo-test into a subdirectory of a new top-level directory, called cross-project-tests. The new name replaces "debuginfo-test" as an LLVM project enabled via LLVM_ENABLE_PROJECTS. Differential Revision: https://reviews.llvm.org/D95339 Reviewed by: aprantl --- README.md | 2 +- {debuginfo-tests => cross-project-tests}/CMakeLists.txt | 8 ++++---- .../debuginfo-tests}/README.txt | 0 .../dexter-tests/aggregate-indirect-arg.cpp | 0 .../debuginfo-tests}/dexter-tests/asan-deque.cpp | 0 .../debuginfo-tests}/dexter-tests/asan.c | 0 .../debuginfo-tests}/dexter-tests/ctor.cpp | 0 .../debuginfo-tests}/dexter-tests/dbg-arg.c | 0 .../debuginfo-tests}/dexter-tests/deferred_globals.cpp | 0 .../debuginfo-tests}/dexter-tests/global-constant.cpp | 0 .../debuginfo-tests}/dexter-tests/hello.c | 0 .../debuginfo-tests}/dexter-tests/inline-line-gap.cpp | 0 .../debuginfo-tests}/dexter-tests/lit.local.cfg | 0 .../debuginfo-tests}/dexter-tests/memvars/bitcast.c | 0 .../debuginfo-tests}/dexter-tests/memvars/const-branch.c | 0 .../debuginfo-tests}/dexter-tests/memvars/ctrl-flow.c | 0 .../debuginfo-tests}/dexter-tests/memvars/implicit-ptr.c | 0 .../dexter-tests/memvars/inline-escaping-function.c | 0 .../debuginfo-tests}/dexter-tests/memvars/inlining-dse.c | 0 .../debuginfo-tests}/dexter-tests/memvars/inlining.c | 0 .../debuginfo-tests}/dexter-tests/memvars/loop.c | 0 .../debuginfo-tests}/dexter-tests/memvars/merged-store.c | 0 .../debuginfo-tests}/dexter-tests/memvars/ptr-to.c | 0 .../debuginfo-tests}/dexter-tests/memvars/struct-dse.c | 0 .../dexter-tests/memvars/unused-merged-value.c | 0 .../debuginfo-tests}/dexter-tests/namespace.cpp | 0 .../debuginfo-tests}/dexter-tests/nrvo-string.cpp | 0 .../debuginfo-tests}/dexter-tests/nrvo.cpp | 0 .../debuginfo-tests}/dexter-tests/optnone-fastmath.cpp | 0 .../debuginfo-tests}/dexter-tests/optnone-loops.cpp | 0 .../dexter-tests/optnone-simple-functions.cpp | 0 .../dexter-tests/optnone-struct-and-methods.cpp | 0 .../dexter-tests/optnone-vectors-and-functions.cpp | 0 .../debuginfo-tests}/dexter-tests/realigned-frame.cpp | 0 .../debuginfo-tests}/dexter-tests/stack-var.c | 0 .../debuginfo-tests}/dexter-tests/vla.c | 0 .../debuginfo-tests}/dexter/.gitignore | 0 .../debuginfo-tests}/dexter/Commands.md | 0 .../debuginfo-tests}/dexter/LICENSE.txt | 0 .../debuginfo-tests}/dexter/README.md | 0 .../debuginfo-tests}/dexter/dex/__init__.py | 0 .../debuginfo-tests}/dexter/dex/builder/Builder.py | 0 .../debuginfo-tests}/dexter/dex/builder/ParserOptions.py | 0 .../debuginfo-tests}/dexter/dex/builder/__init__.py | 0 .../dexter/dex/builder/scripts/posix/clang-c.sh | 0 .../dexter/dex/builder/scripts/posix/clang.sh | 0 .../dexter/dex/builder/scripts/posix/gcc.sh | 0 .../dex/builder/scripts/windows/clang-cl_vs2015.bat | 0 .../dexter/dex/builder/scripts/windows/clang.bat | 0 .../debuginfo-tests}/dexter/dex/command/CommandBase.py | 0 .../debuginfo-tests}/dexter/dex/command/ParseCommand.py | 0 .../debuginfo-tests}/dexter/dex/command/StepValueInfo.py | 0 .../debuginfo-tests}/dexter/dex/command/__init__.py | 0 .../dexter/dex/command/commands/DexDeclareFile.py | 0 .../dexter/dex/command/commands/DexExpectProgramState.py | 0 .../dexter/dex/command/commands/DexExpectStepKind.py | 0 .../dexter/dex/command/commands/DexExpectStepOrder.py | 0 .../dexter/dex/command/commands/DexExpectWatchBase.py | 0 .../dexter/dex/command/commands/DexExpectWatchType.py | 0 .../dexter/dex/command/commands/DexExpectWatchValue.py | 0 .../dexter/dex/command/commands/DexLabel.py | 0 .../dexter/dex/command/commands/DexLimitSteps.py | 0 .../dexter/dex/command/commands/DexUnreachable.py | 0 .../dexter/dex/command/commands/DexWatch.py | 0 .../debuginfo-tests}/dexter/dex/debugger/DebuggerBase.py | 0 .../debugger/DebuggerControllers/ConditionalController.py | 0 .../dex/debugger/DebuggerControllers/ControllerHelpers.py | 0 .../DebuggerControllers/DebuggerControllerBase.py | 0 .../dex/debugger/DebuggerControllers/DefaultController.py | 0 .../debuginfo-tests}/dexter/dex/debugger/Debuggers.py | 0 .../debuginfo-tests}/dexter/dex/debugger/__init__.py | 0 .../debuginfo-tests}/dexter/dex/debugger/dbgeng/README.md | 0 .../dexter/dex/debugger/dbgeng/__init__.py | 0 .../dexter/dex/debugger/dbgeng/breakpoint.py | 0 .../debuginfo-tests}/dexter/dex/debugger/dbgeng/client.py | 0 .../dexter/dex/debugger/dbgeng/control.py | 0 .../debuginfo-tests}/dexter/dex/debugger/dbgeng/dbgeng.py | 0 .../dexter/dex/debugger/dbgeng/probe_process.py | 0 .../debuginfo-tests}/dexter/dex/debugger/dbgeng/setup.py | 0 .../dexter/dex/debugger/dbgeng/symbols.py | 0 .../dexter/dex/debugger/dbgeng/symgroup.py | 0 .../dexter/dex/debugger/dbgeng/sysobjs.py | 0 .../debuginfo-tests}/dexter/dex/debugger/dbgeng/utils.py | 0 .../debuginfo-tests}/dexter/dex/debugger/lldb/LLDB.py | 0 .../debuginfo-tests}/dexter/dex/debugger/lldb/__init__.py | 0 .../dexter/dex/debugger/visualstudio/VisualStudio.py | 0 .../dexter/dex/debugger/visualstudio/VisualStudio2015.py | 0 .../dexter/dex/debugger/visualstudio/VisualStudio2017.py | 0 .../dexter/dex/debugger/visualstudio/VisualStudio2019.py | 0 .../dexter/dex/debugger/visualstudio/__init__.py | 0 .../dex/debugger/visualstudio/windows/ComInterface.py | 0 .../dexter/dex/debugger/visualstudio/windows/__init__.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/BuilderIR.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/DebuggerIR.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/DextIR.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/FrameIR.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/LocIR.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/ProgramState.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/StepIR.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/ValueIR.py | 0 .../debuginfo-tests}/dexter/dex/dextIR/__init__.py | 0 .../debuginfo-tests}/dexter/dex/heuristic/Heuristic.py | 0 .../debuginfo-tests}/dexter/dex/heuristic/__init__.py | 0 .../debuginfo-tests}/dexter/dex/tools/Main.py | 0 .../debuginfo-tests}/dexter/dex/tools/TestToolBase.py | 0 .../debuginfo-tests}/dexter/dex/tools/ToolBase.py | 0 .../debuginfo-tests}/dexter/dex/tools/__init__.py | 0 .../dexter/dex/tools/clang_opt_bisect/Tool.py | 0 .../dexter/dex/tools/clang_opt_bisect/__init__.py | 0 .../debuginfo-tests}/dexter/dex/tools/help/Tool.py | 0 .../debuginfo-tests}/dexter/dex/tools/help/__init__.py | 0 .../dexter/dex/tools/list_debuggers/Tool.py | 0 .../dexter/dex/tools/list_debuggers/__init__.py | 0 .../debuginfo-tests}/dexter/dex/tools/no_tool_/Tool.py | 0 .../dexter/dex/tools/no_tool_/__init__.py | 0 .../dexter/dex/tools/run_debugger_internal_/Tool.py | 0 .../dexter/dex/tools/run_debugger_internal_/__init__.py | 0 .../debuginfo-tests}/dexter/dex/tools/test/Tool.py | 0 .../debuginfo-tests}/dexter/dex/tools/test/__init__.py | 0 .../debuginfo-tests}/dexter/dex/tools/view/Tool.py | 0 .../debuginfo-tests}/dexter/dex/tools/view/__init__.py | 0 .../debuginfo-tests}/dexter/dex/utils/Environment.py | 0 .../debuginfo-tests}/dexter/dex/utils/Exceptions.py | 0 .../debuginfo-tests}/dexter/dex/utils/ExtArgParse.py | 0 .../debuginfo-tests}/dexter/dex/utils/PrettyOutputBase.py | 0 .../debuginfo-tests}/dexter/dex/utils/ReturnCode.py | 0 .../debuginfo-tests}/dexter/dex/utils/RootDirectory.py | 0 .../debuginfo-tests}/dexter/dex/utils/Timer.py | 0 .../debuginfo-tests}/dexter/dex/utils/UnitTests.py | 0 .../debuginfo-tests}/dexter/dex/utils/Version.py | 0 .../debuginfo-tests}/dexter/dex/utils/Warning.py | 0 .../debuginfo-tests}/dexter/dex/utils/WorkingDirectory.py | 0 .../debuginfo-tests}/dexter/dex/utils/__init__.py | 0 .../dexter/dex/utils/posix/PrettyOutput.py | 0 .../debuginfo-tests}/dexter/dex/utils/posix/__init__.py | 0 .../dexter/dex/utils/windows/PrettyOutput.py | 0 .../debuginfo-tests}/dexter/dex/utils/windows/__init__.py | 0 .../debuginfo-tests}/dexter/dexter.py | 0 .../debuginfo-tests}/dexter/feature_tests/Readme.md | 0 .../feature_tests/commands/penalty/dex_declare_file.cpp | 0 .../commands/penalty/expect_program_state.cpp | 0 .../feature_tests/commands/penalty/expect_step_kinds.cpp | 0 .../feature_tests/commands/penalty/expect_step_order.cpp | 0 .../feature_tests/commands/penalty/expect_watch_type.cpp | 0 .../feature_tests/commands/penalty/expect_watch_value.cpp | 0 .../dexter/feature_tests/commands/penalty/unreachable.cpp | 0 .../perfect/dex_declare_file/dex_and_source/commands.dex | 0 .../dex_declare_file/dex_and_source/lit.local.cfg.py | 0 .../perfect/dex_declare_file/dex_and_source/test.cfg | 0 .../perfect/dex_declare_file/dex_and_source/test.cpp | 0 .../dex_declare_file/precompiled_binary/commands.dex | 0 .../dex_declare_file/precompiled_binary/lit.local.cfg.py | 0 .../perfect/dex_declare_file/precompiled_binary/test.cpp | 0 .../dex_commands/commands.dex | 0 .../dex_commands/source_root_dir.dex | 0 .../precompiled_binary_different_dir/lit.local.cfg.py | 0 .../precompiled_binary_different_dir/source/test.cpp | 0 .../windows_noncanonical_path/lit.local.cfg.py | 0 .../windows_noncanonical_path/source/test file.cpp | 0 .../dex_declare_file/windows_noncanonical_path/test.cfg | 0 .../dex_declare_file/windows_noncanonical_path/test.dex | 0 .../commands/perfect/expect_program_state.cpp | 0 .../commands/perfect/expect_step_kind/direction.cpp | 0 .../commands/perfect/expect_step_kind/func.cpp | 0 .../commands/perfect/expect_step_kind/func_external.cpp | 0 .../commands/perfect/expect_step_kind/recursive.cpp | 0 .../commands/perfect/expect_step_kind/small_loop.cpp | 0 .../feature_tests/commands/perfect/expect_step_order.cpp | 0 .../feature_tests/commands/perfect/expect_watch_type.cpp | 0 .../feature_tests/commands/perfect/expect_watch_value.cpp | 0 .../commands/perfect/limit_steps/hit_count.cpp | 0 .../limit_steps/limit_steps_check_json_step_count.cpp | 0 .../perfect/limit_steps/limit_steps_expect_loop.cpp | 0 .../perfect/limit_steps/limit_steps_expect_value.cpp | 0 .../perfect/limit_steps/limit_steps_line_mismatch.cpp | 0 .../limit_steps/limit_steps_overlapping_ranges.cpp | 0 .../limit_steps/limit_steps_same_line_conditional.cpp | 0 .../commands/perfect/limit_steps/unconditional.cpp | 0 .../dexter/feature_tests/commands/perfect/lit.local.cfg | 0 .../dexter/feature_tests/commands/perfect/unreachable.cpp | 0 .../debuginfo-tests}/dexter/feature_tests/lit.local.cfg | 0 .../subtools/clang-opt-bisect/clang-opt-bisect.cpp | 0 .../dexter/feature_tests/subtools/help/help.test | 0 .../subtools/list-debuggers/list-debuggers.test | 0 .../feature_tests/subtools/test/err_bad_label_ref.cpp | 0 .../feature_tests/subtools/test/err_duplicate_label.cpp | 0 .../feature_tests/subtools/test/err_label_kwarg.cpp | 0 .../subtools/test/err_limit_steps_no_values.cpp | 0 .../dexter/feature_tests/subtools/test/err_paren.cpp | 0 .../feature_tests/subtools/test/err_paren_mline.cpp | 0 .../dexter/feature_tests/subtools/test/err_syntax.cpp | 0 .../feature_tests/subtools/test/err_syntax_mline.cpp | 0 .../dexter/feature_tests/subtools/test/err_type.cpp | 0 .../dexter/feature_tests/subtools/test/err_type_mline.cpp | 0 .../feature_tests/subtools/test/label_another_line.cpp | 0 .../dexter/feature_tests/subtools/test/label_offset.cpp | 0 .../feature_tests/subtools/test/source-root-dir.cpp | 0 .../dexter/feature_tests/subtools/view.cpp | 0 .../dexter/feature_tests/unittests/run.test | 0 .../debuginfo-tests}/llgdb-tests/apple-accel.cpp | 0 .../debuginfo-tests}/llgdb-tests/asan-blocks.c | 0 .../debuginfo-tests}/llgdb-tests/asan-deque.cpp | 0 .../debuginfo-tests}/llgdb-tests/asan.c | 0 .../debuginfo-tests}/llgdb-tests/block_var.m | 0 .../debuginfo-tests}/llgdb-tests/blocks.m | 0 .../debuginfo-tests}/llgdb-tests/foreach.m | 0 .../llgdb-tests/forward-declare-class.cpp | 0 .../debuginfo-tests}/llgdb-tests/lit.local.cfg | 0 .../debuginfo-tests}/llgdb-tests/llgdb.py | 0 .../debuginfo-tests}/llgdb-tests/nested-struct.cpp | 0 .../debuginfo-tests}/llgdb-tests/nrvo-string.cpp | 0 .../debuginfo-tests}/llgdb-tests/safestack.c | 0 .../debuginfo-tests}/llgdb-tests/sret.cpp | 0 .../debuginfo-tests}/llgdb-tests/static-member-2.cpp | 0 .../debuginfo-tests}/llgdb-tests/static-member.cpp | 0 .../debuginfo-tests}/llgdb-tests/test_debuginfo.pl | 0 .../llvm-prettyprinters/gdb/lit.local.cfg | 0 .../llvm-prettyprinters/gdb/llvm-support.cpp | 0 .../llvm-prettyprinters/gdb/llvm-support.gdb | 0 .../llvm-prettyprinters/gdb/mlir-support.cpp | 0 .../llvm-prettyprinters/gdb/mlir-support.gdb | 0 .../debuginfo-tests}/win_cdb-tests/README.txt | 0 .../debuginfo-tests}/win_cdb-tests/lit.local.cfg.py | 0 {debuginfo-tests => cross-project-tests}/lit.cfg.py | 7 ++++--- .../lit.site.cfg.py.in | 0 llvm/CMakeLists.txt | 2 +- llvm/docs/CMake.rst | 2 +- llvm/docs/GettingStarted.rst | 2 +- llvm/docs/TestingGuide.rst | 2 +- llvm/projects/CMakeLists.txt | 4 ++-- 230 files changed, 15 insertions(+), 14 deletions(-) rename {debuginfo-tests => cross-project-tests}/CMakeLists.txt (88%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/README.txt (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/aggregate-indirect-arg.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/asan-deque.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/asan.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/ctor.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/dbg-arg.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/deferred_globals.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/global-constant.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/hello.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/inline-line-gap.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/lit.local.cfg (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/bitcast.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/const-branch.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/ctrl-flow.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/implicit-ptr.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/inline-escaping-function.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/inlining-dse.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/inlining.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/loop.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/merged-store.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/ptr-to.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/struct-dse.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/memvars/unused-merged-value.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/namespace.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/nrvo-string.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/nrvo.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/optnone-fastmath.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/optnone-loops.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/optnone-simple-functions.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/optnone-struct-and-methods.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/optnone-vectors-and-functions.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/realigned-frame.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/stack-var.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter-tests/vla.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/.gitignore (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/Commands.md (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/LICENSE.txt (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/README.md (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/builder/Builder.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/builder/ParserOptions.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/builder/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/builder/scripts/posix/clang-c.sh (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/builder/scripts/posix/clang.sh (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/builder/scripts/posix/gcc.sh (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/builder/scripts/windows/clang-cl_vs2015.bat (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/builder/scripts/windows/clang.bat (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/CommandBase.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/ParseCommand.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/StepValueInfo.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexDeclareFile.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexExpectProgramState.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexExpectStepKind.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexExpectStepOrder.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexExpectWatchBase.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexExpectWatchType.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexExpectWatchValue.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexLabel.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexLimitSteps.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexUnreachable.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/command/commands/DexWatch.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/DebuggerBase.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/DebuggerControllers/ConditionalController.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/DebuggerControllers/ControllerHelpers.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/DebuggerControllers/DebuggerControllerBase.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/DebuggerControllers/DefaultController.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/Debuggers.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/README.md (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/breakpoint.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/client.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/control.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/dbgeng.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/probe_process.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/setup.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/symbols.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/symgroup.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/sysobjs.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/dbgeng/utils.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/lldb/LLDB.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/lldb/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/visualstudio/VisualStudio.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/visualstudio/VisualStudio2015.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/visualstudio/VisualStudio2017.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/visualstudio/VisualStudio2019.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/visualstudio/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/visualstudio/windows/ComInterface.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/debugger/visualstudio/windows/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/BuilderIR.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/DebuggerIR.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/DextIR.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/FrameIR.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/LocIR.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/ProgramState.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/StepIR.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/ValueIR.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/dextIR/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/heuristic/Heuristic.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/heuristic/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/Main.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/TestToolBase.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/ToolBase.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/clang_opt_bisect/Tool.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/clang_opt_bisect/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/help/Tool.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/help/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/list_debuggers/Tool.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/list_debuggers/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/no_tool_/Tool.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/no_tool_/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/run_debugger_internal_/Tool.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/run_debugger_internal_/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/test/Tool.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/test/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/view/Tool.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/tools/view/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/Environment.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/Exceptions.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/ExtArgParse.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/PrettyOutputBase.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/ReturnCode.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/RootDirectory.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/Timer.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/UnitTests.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/Version.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/Warning.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/WorkingDirectory.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/posix/PrettyOutput.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/posix/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/windows/PrettyOutput.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dex/utils/windows/__init__.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/dexter.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/Readme.md (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/penalty/dex_declare_file.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/penalty/expect_program_state.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/penalty/expect_step_kinds.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/penalty/expect_step_order.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/penalty/expect_watch_type.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/penalty/expect_watch_value.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/penalty/unreachable.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/commands.dex (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/lit.local.cfg.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cfg (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/commands.dex (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/lit.local.cfg.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/test.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/commands.dex (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/lit.local.cfg.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/source/test.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/lit.local.cfg.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/source/test file.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.cfg (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.dex (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_program_state.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_step_kind/direction.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_step_kind/func.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_step_kind/func_external.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_step_kind/recursive.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_step_kind/small_loop.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_step_order.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_watch_type.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/expect_watch_value.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/lit.local.cfg (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/commands/perfect/unreachable.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/lit.local.cfg (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/help/help.test (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_bad_label_ref.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_duplicate_label.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_label_kwarg.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_paren.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_paren_mline.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_syntax.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_syntax_mline.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_type.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/err_type_mline.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/label_another_line.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/label_offset.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/test/source-root-dir.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/subtools/view.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/dexter/feature_tests/unittests/run.test (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/apple-accel.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/asan-blocks.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/asan-deque.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/asan.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/block_var.m (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/blocks.m (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/foreach.m (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/forward-declare-class.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/lit.local.cfg (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/llgdb.py (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/nested-struct.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/nrvo-string.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/safestack.c (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/sret.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/static-member-2.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/static-member.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llgdb-tests/test_debuginfo.pl (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llvm-prettyprinters/gdb/lit.local.cfg (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llvm-prettyprinters/gdb/llvm-support.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llvm-prettyprinters/gdb/llvm-support.gdb (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llvm-prettyprinters/gdb/mlir-support.cpp (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/llvm-prettyprinters/gdb/mlir-support.gdb (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/win_cdb-tests/README.txt (100%) rename {debuginfo-tests => cross-project-tests/debuginfo-tests}/win_cdb-tests/lit.local.cfg.py (100%) rename {debuginfo-tests => cross-project-tests}/lit.cfg.py (96%) rename {debuginfo-tests => cross-project-tests}/lit.site.cfg.py.in (100%) diff --git a/README.md b/README.md index c48159404445b..edbc4b80d5fbf 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ This is an example work-flow and configuration to get and build the LLVM source: * ``-DLLVM_ENABLE_PROJECTS='...'`` --- semicolon-separated list of the LLVM sub-projects you'd like to additionally build. Can include any of: clang, clang-tools-extra, libcxx, libcxxabi, libunwind, lldb, compiler-rt, lld, - polly, or debuginfo-tests. + polly, or cross-project-tests. For example, to build LLVM, Clang, libcxx, and libcxxabi, use ``-DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi"``. diff --git a/debuginfo-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt similarity index 88% rename from debuginfo-tests/CMakeLists.txt rename to cross-project-tests/CMakeLists.txt index 0b01202a0bd0e..dfa34fe15c795 100644 --- a/debuginfo-tests/CMakeLists.txt +++ b/cross-project-tests/CMakeLists.txt @@ -5,7 +5,7 @@ find_package(Python3 COMPONENTS Interpreter) add_llvm_executable(check-gdb-llvm-support - llvm-prettyprinters/gdb/llvm-support.cpp + debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp ) target_link_libraries(check-gdb-llvm-support PRIVATE LLVMSupport) @@ -24,7 +24,7 @@ set(DEBUGINFO_TEST_DEPS if ("mlir" IN_LIST LLVM_ENABLE_PROJECTS) add_llvm_executable(check-gdb-mlir-support - llvm-prettyprinters/gdb/mlir-support.cpp + debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp ) target_include_directories(check-gdb-mlir-support PRIVATE ${LLVM_EXTERNAL_MLIR_SOURCE_DIR}/include @@ -61,7 +61,7 @@ configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py ) -add_lit_testsuite(check-debuginfo "Running debug info integration tests" +add_lit_testsuite(check-debuginfo "Running cross-project tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${DEBUGINFO_TEST_DEPS} ) @@ -71,4 +71,4 @@ add_lit_testsuites(DEBUGINFO ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${DEBUGINFO_TEST_DEPS} ) -set_target_properties(check-debuginfo PROPERTIES FOLDER "Debug info tests") +set_target_properties(check-debuginfo PROPERTIES FOLDER "Tests") diff --git a/debuginfo-tests/README.txt b/cross-project-tests/debuginfo-tests/README.txt similarity index 100% rename from debuginfo-tests/README.txt rename to cross-project-tests/debuginfo-tests/README.txt diff --git a/debuginfo-tests/dexter-tests/aggregate-indirect-arg.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/aggregate-indirect-arg.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/aggregate-indirect-arg.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/aggregate-indirect-arg.cpp diff --git a/debuginfo-tests/dexter-tests/asan-deque.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/asan-deque.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/asan-deque.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/asan-deque.cpp diff --git a/debuginfo-tests/dexter-tests/asan.c b/cross-project-tests/debuginfo-tests/dexter-tests/asan.c similarity index 100% rename from debuginfo-tests/dexter-tests/asan.c rename to cross-project-tests/debuginfo-tests/dexter-tests/asan.c diff --git a/debuginfo-tests/dexter-tests/ctor.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/ctor.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/ctor.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/ctor.cpp diff --git a/debuginfo-tests/dexter-tests/dbg-arg.c b/cross-project-tests/debuginfo-tests/dexter-tests/dbg-arg.c similarity index 100% rename from debuginfo-tests/dexter-tests/dbg-arg.c rename to cross-project-tests/debuginfo-tests/dexter-tests/dbg-arg.c diff --git a/debuginfo-tests/dexter-tests/deferred_globals.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/deferred_globals.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/deferred_globals.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/deferred_globals.cpp diff --git a/debuginfo-tests/dexter-tests/global-constant.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/global-constant.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/global-constant.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/global-constant.cpp diff --git a/debuginfo-tests/dexter-tests/hello.c b/cross-project-tests/debuginfo-tests/dexter-tests/hello.c similarity index 100% rename from debuginfo-tests/dexter-tests/hello.c rename to cross-project-tests/debuginfo-tests/dexter-tests/hello.c diff --git a/debuginfo-tests/dexter-tests/inline-line-gap.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/inline-line-gap.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/inline-line-gap.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/inline-line-gap.cpp diff --git a/debuginfo-tests/dexter-tests/lit.local.cfg b/cross-project-tests/debuginfo-tests/dexter-tests/lit.local.cfg similarity index 100% rename from debuginfo-tests/dexter-tests/lit.local.cfg rename to cross-project-tests/debuginfo-tests/dexter-tests/lit.local.cfg diff --git a/debuginfo-tests/dexter-tests/memvars/bitcast.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/bitcast.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/bitcast.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/bitcast.c diff --git a/debuginfo-tests/dexter-tests/memvars/const-branch.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/const-branch.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/const-branch.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/const-branch.c diff --git a/debuginfo-tests/dexter-tests/memvars/ctrl-flow.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/ctrl-flow.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/ctrl-flow.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/ctrl-flow.c diff --git a/debuginfo-tests/dexter-tests/memvars/implicit-ptr.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/implicit-ptr.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/implicit-ptr.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/implicit-ptr.c diff --git a/debuginfo-tests/dexter-tests/memvars/inline-escaping-function.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/inline-escaping-function.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/inline-escaping-function.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/inline-escaping-function.c diff --git a/debuginfo-tests/dexter-tests/memvars/inlining-dse.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/inlining-dse.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/inlining-dse.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/inlining-dse.c diff --git a/debuginfo-tests/dexter-tests/memvars/inlining.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/inlining.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/inlining.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/inlining.c diff --git a/debuginfo-tests/dexter-tests/memvars/loop.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/loop.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/loop.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/loop.c diff --git a/debuginfo-tests/dexter-tests/memvars/merged-store.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/merged-store.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/merged-store.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/merged-store.c diff --git a/debuginfo-tests/dexter-tests/memvars/ptr-to.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/ptr-to.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/ptr-to.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/ptr-to.c diff --git a/debuginfo-tests/dexter-tests/memvars/struct-dse.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/struct-dse.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/struct-dse.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/struct-dse.c diff --git a/debuginfo-tests/dexter-tests/memvars/unused-merged-value.c b/cross-project-tests/debuginfo-tests/dexter-tests/memvars/unused-merged-value.c similarity index 100% rename from debuginfo-tests/dexter-tests/memvars/unused-merged-value.c rename to cross-project-tests/debuginfo-tests/dexter-tests/memvars/unused-merged-value.c diff --git a/debuginfo-tests/dexter-tests/namespace.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/namespace.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/namespace.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/namespace.cpp diff --git a/debuginfo-tests/dexter-tests/nrvo-string.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/nrvo-string.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/nrvo-string.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/nrvo-string.cpp diff --git a/debuginfo-tests/dexter-tests/nrvo.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/nrvo.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/nrvo.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/nrvo.cpp diff --git a/debuginfo-tests/dexter-tests/optnone-fastmath.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/optnone-fastmath.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/optnone-fastmath.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/optnone-fastmath.cpp diff --git a/debuginfo-tests/dexter-tests/optnone-loops.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/optnone-loops.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/optnone-loops.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/optnone-loops.cpp diff --git a/debuginfo-tests/dexter-tests/optnone-simple-functions.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/optnone-simple-functions.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/optnone-simple-functions.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/optnone-simple-functions.cpp diff --git a/debuginfo-tests/dexter-tests/optnone-struct-and-methods.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/optnone-struct-and-methods.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/optnone-struct-and-methods.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/optnone-struct-and-methods.cpp diff --git a/debuginfo-tests/dexter-tests/optnone-vectors-and-functions.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/optnone-vectors-and-functions.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/optnone-vectors-and-functions.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/optnone-vectors-and-functions.cpp diff --git a/debuginfo-tests/dexter-tests/realigned-frame.cpp b/cross-project-tests/debuginfo-tests/dexter-tests/realigned-frame.cpp similarity index 100% rename from debuginfo-tests/dexter-tests/realigned-frame.cpp rename to cross-project-tests/debuginfo-tests/dexter-tests/realigned-frame.cpp diff --git a/debuginfo-tests/dexter-tests/stack-var.c b/cross-project-tests/debuginfo-tests/dexter-tests/stack-var.c similarity index 100% rename from debuginfo-tests/dexter-tests/stack-var.c rename to cross-project-tests/debuginfo-tests/dexter-tests/stack-var.c diff --git a/debuginfo-tests/dexter-tests/vla.c b/cross-project-tests/debuginfo-tests/dexter-tests/vla.c similarity index 100% rename from debuginfo-tests/dexter-tests/vla.c rename to cross-project-tests/debuginfo-tests/dexter-tests/vla.c diff --git a/debuginfo-tests/dexter/.gitignore b/cross-project-tests/debuginfo-tests/dexter/.gitignore similarity index 100% rename from debuginfo-tests/dexter/.gitignore rename to cross-project-tests/debuginfo-tests/dexter/.gitignore diff --git a/debuginfo-tests/dexter/Commands.md b/cross-project-tests/debuginfo-tests/dexter/Commands.md similarity index 100% rename from debuginfo-tests/dexter/Commands.md rename to cross-project-tests/debuginfo-tests/dexter/Commands.md diff --git a/debuginfo-tests/dexter/LICENSE.txt b/cross-project-tests/debuginfo-tests/dexter/LICENSE.txt similarity index 100% rename from debuginfo-tests/dexter/LICENSE.txt rename to cross-project-tests/debuginfo-tests/dexter/LICENSE.txt diff --git a/debuginfo-tests/dexter/README.md b/cross-project-tests/debuginfo-tests/dexter/README.md similarity index 100% rename from debuginfo-tests/dexter/README.md rename to cross-project-tests/debuginfo-tests/dexter/README.md diff --git a/debuginfo-tests/dexter/dex/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/__init__.py diff --git a/debuginfo-tests/dexter/dex/builder/Builder.py b/cross-project-tests/debuginfo-tests/dexter/dex/builder/Builder.py similarity index 100% rename from debuginfo-tests/dexter/dex/builder/Builder.py rename to cross-project-tests/debuginfo-tests/dexter/dex/builder/Builder.py diff --git a/debuginfo-tests/dexter/dex/builder/ParserOptions.py b/cross-project-tests/debuginfo-tests/dexter/dex/builder/ParserOptions.py similarity index 100% rename from debuginfo-tests/dexter/dex/builder/ParserOptions.py rename to cross-project-tests/debuginfo-tests/dexter/dex/builder/ParserOptions.py diff --git a/debuginfo-tests/dexter/dex/builder/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/builder/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/builder/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/builder/__init__.py diff --git a/debuginfo-tests/dexter/dex/builder/scripts/posix/clang-c.sh b/cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/posix/clang-c.sh similarity index 100% rename from debuginfo-tests/dexter/dex/builder/scripts/posix/clang-c.sh rename to cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/posix/clang-c.sh diff --git a/debuginfo-tests/dexter/dex/builder/scripts/posix/clang.sh b/cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/posix/clang.sh similarity index 100% rename from debuginfo-tests/dexter/dex/builder/scripts/posix/clang.sh rename to cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/posix/clang.sh diff --git a/debuginfo-tests/dexter/dex/builder/scripts/posix/gcc.sh b/cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/posix/gcc.sh similarity index 100% rename from debuginfo-tests/dexter/dex/builder/scripts/posix/gcc.sh rename to cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/posix/gcc.sh diff --git a/debuginfo-tests/dexter/dex/builder/scripts/windows/clang-cl_vs2015.bat b/cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/windows/clang-cl_vs2015.bat similarity index 100% rename from debuginfo-tests/dexter/dex/builder/scripts/windows/clang-cl_vs2015.bat rename to cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/windows/clang-cl_vs2015.bat diff --git a/debuginfo-tests/dexter/dex/builder/scripts/windows/clang.bat b/cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/windows/clang.bat similarity index 100% rename from debuginfo-tests/dexter/dex/builder/scripts/windows/clang.bat rename to cross-project-tests/debuginfo-tests/dexter/dex/builder/scripts/windows/clang.bat diff --git a/debuginfo-tests/dexter/dex/command/CommandBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/CommandBase.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/CommandBase.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/CommandBase.py diff --git a/debuginfo-tests/dexter/dex/command/ParseCommand.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/ParseCommand.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py diff --git a/debuginfo-tests/dexter/dex/command/StepValueInfo.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/StepValueInfo.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/StepValueInfo.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/StepValueInfo.py diff --git a/debuginfo-tests/dexter/dex/command/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/__init__.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexDeclareFile.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexDeclareFile.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexDeclareFile.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexDeclareFile.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexExpectProgramState.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectProgramState.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexExpectProgramState.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectProgramState.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexExpectStepKind.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectStepKind.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexExpectStepKind.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectStepKind.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexExpectStepOrder.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectStepOrder.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexExpectStepOrder.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectStepOrder.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchBase.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexExpectWatchBase.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchBase.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchType.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchType.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexExpectWatchType.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchType.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchValue.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchValue.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexExpectWatchValue.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexExpectWatchValue.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexLabel.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexLabel.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexLabel.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexLabel.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexLimitSteps.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexLimitSteps.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexLimitSteps.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexLimitSteps.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexUnreachable.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexUnreachable.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexUnreachable.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexUnreachable.py diff --git a/debuginfo-tests/dexter/dex/command/commands/DexWatch.py b/cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexWatch.py similarity index 100% rename from debuginfo-tests/dexter/dex/command/commands/DexWatch.py rename to cross-project-tests/debuginfo-tests/dexter/dex/command/commands/DexWatch.py diff --git a/debuginfo-tests/dexter/dex/debugger/DebuggerBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerBase.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/DebuggerBase.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerBase.py diff --git a/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ConditionalController.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ConditionalController.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ConditionalController.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ConditionalController.py diff --git a/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ControllerHelpers.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ControllerHelpers.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ControllerHelpers.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ControllerHelpers.py diff --git a/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DebuggerControllerBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DebuggerControllerBase.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DebuggerControllerBase.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DebuggerControllerBase.py diff --git a/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DefaultController.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DefaultController.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DefaultController.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DefaultController.py diff --git a/debuginfo-tests/dexter/dex/debugger/Debuggers.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/Debuggers.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py diff --git a/debuginfo-tests/dexter/dex/debugger/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/__init__.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/README.md b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/README.md similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/README.md rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/README.md diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/__init__.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/breakpoint.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/breakpoint.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/breakpoint.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/breakpoint.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/client.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/client.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/client.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/client.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/control.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/control.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/control.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/control.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/dbgeng.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/dbgeng.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/dbgeng.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/dbgeng.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/probe_process.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/probe_process.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/probe_process.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/probe_process.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/setup.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/setup.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/setup.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/setup.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/symbols.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/symbols.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/symbols.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/symbols.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/symgroup.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/symgroup.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/symgroup.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/symgroup.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/sysobjs.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/sysobjs.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/sysobjs.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/sysobjs.py diff --git a/debuginfo-tests/dexter/dex/debugger/dbgeng/utils.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/utils.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/dbgeng/utils.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/dbgeng/utils.py diff --git a/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/LLDB.py diff --git a/debuginfo-tests/dexter/dex/debugger/lldb/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/lldb/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/lldb/__init__.py diff --git a/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio.py diff --git a/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2015.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2015.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2015.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2015.py diff --git a/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2017.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2017.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2017.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2017.py diff --git a/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2019.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2019.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2019.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/VisualStudio2019.py diff --git a/debuginfo-tests/dexter/dex/debugger/visualstudio/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/visualstudio/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/__init__.py diff --git a/debuginfo-tests/dexter/dex/debugger/visualstudio/windows/ComInterface.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/windows/ComInterface.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/visualstudio/windows/ComInterface.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/windows/ComInterface.py diff --git a/debuginfo-tests/dexter/dex/debugger/visualstudio/windows/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/windows/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/debugger/visualstudio/windows/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/debugger/visualstudio/windows/__init__.py diff --git a/debuginfo-tests/dexter/dex/dextIR/BuilderIR.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/BuilderIR.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/BuilderIR.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/BuilderIR.py diff --git a/debuginfo-tests/dexter/dex/dextIR/DebuggerIR.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/DebuggerIR.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/DebuggerIR.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/DebuggerIR.py diff --git a/debuginfo-tests/dexter/dex/dextIR/DextIR.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/DextIR.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/DextIR.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/DextIR.py diff --git a/debuginfo-tests/dexter/dex/dextIR/FrameIR.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/FrameIR.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/FrameIR.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/FrameIR.py diff --git a/debuginfo-tests/dexter/dex/dextIR/LocIR.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/LocIR.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/LocIR.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/LocIR.py diff --git a/debuginfo-tests/dexter/dex/dextIR/ProgramState.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/ProgramState.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/ProgramState.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/ProgramState.py diff --git a/debuginfo-tests/dexter/dex/dextIR/StepIR.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/StepIR.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/StepIR.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/StepIR.py diff --git a/debuginfo-tests/dexter/dex/dextIR/ValueIR.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/ValueIR.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/ValueIR.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/ValueIR.py diff --git a/debuginfo-tests/dexter/dex/dextIR/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/dextIR/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/dextIR/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/dextIR/__init__.py diff --git a/debuginfo-tests/dexter/dex/heuristic/Heuristic.py b/cross-project-tests/debuginfo-tests/dexter/dex/heuristic/Heuristic.py similarity index 100% rename from debuginfo-tests/dexter/dex/heuristic/Heuristic.py rename to cross-project-tests/debuginfo-tests/dexter/dex/heuristic/Heuristic.py diff --git a/debuginfo-tests/dexter/dex/heuristic/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/heuristic/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/heuristic/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/heuristic/__init__.py diff --git a/debuginfo-tests/dexter/dex/tools/Main.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/Main.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/Main.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/Main.py diff --git a/debuginfo-tests/dexter/dex/tools/TestToolBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/TestToolBase.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/TestToolBase.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/TestToolBase.py diff --git a/debuginfo-tests/dexter/dex/tools/ToolBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/ToolBase.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py diff --git a/debuginfo-tests/dexter/dex/tools/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/__init__.py diff --git a/debuginfo-tests/dexter/dex/tools/clang_opt_bisect/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/clang_opt_bisect/Tool.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/clang_opt_bisect/Tool.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/clang_opt_bisect/Tool.py diff --git a/debuginfo-tests/dexter/dex/tools/clang_opt_bisect/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/clang_opt_bisect/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/clang_opt_bisect/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/clang_opt_bisect/__init__.py diff --git a/debuginfo-tests/dexter/dex/tools/help/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/help/Tool.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/help/Tool.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/help/Tool.py diff --git a/debuginfo-tests/dexter/dex/tools/help/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/help/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/help/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/help/__init__.py diff --git a/debuginfo-tests/dexter/dex/tools/list_debuggers/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/list_debuggers/Tool.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/list_debuggers/Tool.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/list_debuggers/Tool.py diff --git a/debuginfo-tests/dexter/dex/tools/list_debuggers/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/list_debuggers/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/list_debuggers/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/list_debuggers/__init__.py diff --git a/debuginfo-tests/dexter/dex/tools/no_tool_/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/no_tool_/Tool.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/no_tool_/Tool.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/no_tool_/Tool.py diff --git a/debuginfo-tests/dexter/dex/tools/no_tool_/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/no_tool_/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/no_tool_/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/no_tool_/__init__.py diff --git a/debuginfo-tests/dexter/dex/tools/run_debugger_internal_/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/run_debugger_internal_/Tool.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/run_debugger_internal_/Tool.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/run_debugger_internal_/Tool.py diff --git a/debuginfo-tests/dexter/dex/tools/run_debugger_internal_/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/run_debugger_internal_/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/run_debugger_internal_/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/run_debugger_internal_/__init__.py diff --git a/debuginfo-tests/dexter/dex/tools/test/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/test/Tool.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py diff --git a/debuginfo-tests/dexter/dex/tools/test/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/test/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/test/__init__.py diff --git a/debuginfo-tests/dexter/dex/tools/view/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/view/Tool.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/view/Tool.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/view/Tool.py diff --git a/debuginfo-tests/dexter/dex/tools/view/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/view/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/tools/view/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/tools/view/__init__.py diff --git a/debuginfo-tests/dexter/dex/utils/Environment.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/Environment.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/Environment.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/Environment.py diff --git a/debuginfo-tests/dexter/dex/utils/Exceptions.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/Exceptions.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/Exceptions.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/Exceptions.py diff --git a/debuginfo-tests/dexter/dex/utils/ExtArgParse.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/ExtArgParse.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/ExtArgParse.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/ExtArgParse.py diff --git a/debuginfo-tests/dexter/dex/utils/PrettyOutputBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/PrettyOutputBase.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/PrettyOutputBase.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/PrettyOutputBase.py diff --git a/debuginfo-tests/dexter/dex/utils/ReturnCode.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/ReturnCode.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/ReturnCode.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/ReturnCode.py diff --git a/debuginfo-tests/dexter/dex/utils/RootDirectory.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/RootDirectory.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/RootDirectory.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/RootDirectory.py diff --git a/debuginfo-tests/dexter/dex/utils/Timer.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/Timer.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/Timer.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/Timer.py diff --git a/debuginfo-tests/dexter/dex/utils/UnitTests.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/UnitTests.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/UnitTests.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/UnitTests.py diff --git a/debuginfo-tests/dexter/dex/utils/Version.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/Version.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/Version.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/Version.py diff --git a/debuginfo-tests/dexter/dex/utils/Warning.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/Warning.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/Warning.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/Warning.py diff --git a/debuginfo-tests/dexter/dex/utils/WorkingDirectory.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/WorkingDirectory.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/WorkingDirectory.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/WorkingDirectory.py diff --git a/debuginfo-tests/dexter/dex/utils/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/__init__.py diff --git a/debuginfo-tests/dexter/dex/utils/posix/PrettyOutput.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/posix/PrettyOutput.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/posix/PrettyOutput.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/posix/PrettyOutput.py diff --git a/debuginfo-tests/dexter/dex/utils/posix/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/posix/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/posix/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/posix/__init__.py diff --git a/debuginfo-tests/dexter/dex/utils/windows/PrettyOutput.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/windows/PrettyOutput.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/windows/PrettyOutput.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/windows/PrettyOutput.py diff --git a/debuginfo-tests/dexter/dex/utils/windows/__init__.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/windows/__init__.py similarity index 100% rename from debuginfo-tests/dexter/dex/utils/windows/__init__.py rename to cross-project-tests/debuginfo-tests/dexter/dex/utils/windows/__init__.py diff --git a/debuginfo-tests/dexter/dexter.py b/cross-project-tests/debuginfo-tests/dexter/dexter.py similarity index 100% rename from debuginfo-tests/dexter/dexter.py rename to cross-project-tests/debuginfo-tests/dexter/dexter.py diff --git a/debuginfo-tests/dexter/feature_tests/Readme.md b/cross-project-tests/debuginfo-tests/dexter/feature_tests/Readme.md similarity index 100% rename from debuginfo-tests/dexter/feature_tests/Readme.md rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/Readme.md diff --git a/debuginfo-tests/dexter/feature_tests/commands/penalty/dex_declare_file.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/dex_declare_file.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/penalty/dex_declare_file.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/dex_declare_file.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_program_state.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_program_state.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/penalty/expect_program_state.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_program_state.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_step_kinds.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_step_kinds.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/penalty/expect_step_kinds.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_step_kinds.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_step_order.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_step_order.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/penalty/expect_step_order.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_step_order.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_watch_type.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_watch_type.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/penalty/expect_watch_type.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_watch_type.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_watch_value.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_watch_value.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/penalty/expect_watch_value.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/expect_watch_value.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/penalty/unreachable.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/unreachable.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/penalty/unreachable.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/unreachable.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/commands.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/commands.dex similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/commands.dex rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/commands.dex diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/lit.local.cfg.py b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/lit.local.cfg.py similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/lit.local.cfg.py rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/lit.local.cfg.py diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cfg b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cfg similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cfg rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cfg diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/commands.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/commands.dex similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/commands.dex rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/commands.dex diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/lit.local.cfg.py b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/lit.local.cfg.py similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/lit.local.cfg.py rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/lit.local.cfg.py diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/test.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/test.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/test.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/test.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/commands.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/commands.dex similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/commands.dex rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/commands.dex diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/lit.local.cfg.py b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/lit.local.cfg.py similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/lit.local.cfg.py rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/lit.local.cfg.py diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/source/test.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/source/test.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/source/test.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/source/test.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/lit.local.cfg.py b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/lit.local.cfg.py similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/lit.local.cfg.py rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/lit.local.cfg.py diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/source/test file.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/source/test file.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/source/test file.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/source/test file.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.cfg b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.cfg similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.cfg rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.cfg diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.dex similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.dex rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/test.dex diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_program_state.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_program_state.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_program_state.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_program_state.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/direction.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/direction.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/direction.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/direction.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/func.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/func.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/func.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/func.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/func_external.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/func_external.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/func_external.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/func_external.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/recursive.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/recursive.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/recursive.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/recursive.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/small_loop.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/small_loop.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/small_loop.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind/small_loop.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_order.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_watch_type.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_watch_type.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_watch_type.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_watch_type.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_watch_value.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_watch_value.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/expect_watch_value.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_watch_value.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/lit.local.cfg b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/lit.local.cfg similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/lit.local.cfg rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/lit.local.cfg diff --git a/debuginfo-tests/dexter/feature_tests/commands/perfect/unreachable.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/unreachable.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/commands/perfect/unreachable.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/unreachable.cpp diff --git a/debuginfo-tests/dexter/feature_tests/lit.local.cfg b/cross-project-tests/debuginfo-tests/dexter/feature_tests/lit.local.cfg similarity index 100% rename from debuginfo-tests/dexter/feature_tests/lit.local.cfg rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/lit.local.cfg diff --git a/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/help/help.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/help/help.test similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/help/help.test rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/help/help.test diff --git a/debuginfo-tests/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/list-debuggers/list-debuggers.test diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_bad_label_ref.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_bad_label_ref.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_bad_label_ref.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_bad_label_ref.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_duplicate_label.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_duplicate_label.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_duplicate_label.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_duplicate_label.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_label_kwarg.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_label_kwarg.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_label_kwarg.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_label_kwarg.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_paren.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_paren.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_paren.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_paren.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_paren_mline.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_paren_mline.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_paren_mline.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_paren_mline.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax_mline.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax_mline.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax_mline.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_syntax_mline.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_type.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_type.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_type.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_type.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/err_type_mline.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_type_mline.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/err_type_mline.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_type_mline.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/label_another_line.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_another_line.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/label_another_line.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_another_line.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/label_offset.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_offset.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/label_offset.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_offset.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/test/source-root-dir.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/source-root-dir.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/test/source-root-dir.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/source-root-dir.cpp diff --git a/debuginfo-tests/dexter/feature_tests/subtools/view.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/view.cpp similarity index 100% rename from debuginfo-tests/dexter/feature_tests/subtools/view.cpp rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/view.cpp diff --git a/debuginfo-tests/dexter/feature_tests/unittests/run.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/unittests/run.test similarity index 100% rename from debuginfo-tests/dexter/feature_tests/unittests/run.test rename to cross-project-tests/debuginfo-tests/dexter/feature_tests/unittests/run.test diff --git a/debuginfo-tests/llgdb-tests/apple-accel.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/apple-accel.cpp similarity index 100% rename from debuginfo-tests/llgdb-tests/apple-accel.cpp rename to cross-project-tests/debuginfo-tests/llgdb-tests/apple-accel.cpp diff --git a/debuginfo-tests/llgdb-tests/asan-blocks.c b/cross-project-tests/debuginfo-tests/llgdb-tests/asan-blocks.c similarity index 100% rename from debuginfo-tests/llgdb-tests/asan-blocks.c rename to cross-project-tests/debuginfo-tests/llgdb-tests/asan-blocks.c diff --git a/debuginfo-tests/llgdb-tests/asan-deque.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/asan-deque.cpp similarity index 100% rename from debuginfo-tests/llgdb-tests/asan-deque.cpp rename to cross-project-tests/debuginfo-tests/llgdb-tests/asan-deque.cpp diff --git a/debuginfo-tests/llgdb-tests/asan.c b/cross-project-tests/debuginfo-tests/llgdb-tests/asan.c similarity index 100% rename from debuginfo-tests/llgdb-tests/asan.c rename to cross-project-tests/debuginfo-tests/llgdb-tests/asan.c diff --git a/debuginfo-tests/llgdb-tests/block_var.m b/cross-project-tests/debuginfo-tests/llgdb-tests/block_var.m similarity index 100% rename from debuginfo-tests/llgdb-tests/block_var.m rename to cross-project-tests/debuginfo-tests/llgdb-tests/block_var.m diff --git a/debuginfo-tests/llgdb-tests/blocks.m b/cross-project-tests/debuginfo-tests/llgdb-tests/blocks.m similarity index 100% rename from debuginfo-tests/llgdb-tests/blocks.m rename to cross-project-tests/debuginfo-tests/llgdb-tests/blocks.m diff --git a/debuginfo-tests/llgdb-tests/foreach.m b/cross-project-tests/debuginfo-tests/llgdb-tests/foreach.m similarity index 100% rename from debuginfo-tests/llgdb-tests/foreach.m rename to cross-project-tests/debuginfo-tests/llgdb-tests/foreach.m diff --git a/debuginfo-tests/llgdb-tests/forward-declare-class.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/forward-declare-class.cpp similarity index 100% rename from debuginfo-tests/llgdb-tests/forward-declare-class.cpp rename to cross-project-tests/debuginfo-tests/llgdb-tests/forward-declare-class.cpp diff --git a/debuginfo-tests/llgdb-tests/lit.local.cfg b/cross-project-tests/debuginfo-tests/llgdb-tests/lit.local.cfg similarity index 100% rename from debuginfo-tests/llgdb-tests/lit.local.cfg rename to cross-project-tests/debuginfo-tests/llgdb-tests/lit.local.cfg diff --git a/debuginfo-tests/llgdb-tests/llgdb.py b/cross-project-tests/debuginfo-tests/llgdb-tests/llgdb.py similarity index 100% rename from debuginfo-tests/llgdb-tests/llgdb.py rename to cross-project-tests/debuginfo-tests/llgdb-tests/llgdb.py diff --git a/debuginfo-tests/llgdb-tests/nested-struct.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/nested-struct.cpp similarity index 100% rename from debuginfo-tests/llgdb-tests/nested-struct.cpp rename to cross-project-tests/debuginfo-tests/llgdb-tests/nested-struct.cpp diff --git a/debuginfo-tests/llgdb-tests/nrvo-string.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/nrvo-string.cpp similarity index 100% rename from debuginfo-tests/llgdb-tests/nrvo-string.cpp rename to cross-project-tests/debuginfo-tests/llgdb-tests/nrvo-string.cpp diff --git a/debuginfo-tests/llgdb-tests/safestack.c b/cross-project-tests/debuginfo-tests/llgdb-tests/safestack.c similarity index 100% rename from debuginfo-tests/llgdb-tests/safestack.c rename to cross-project-tests/debuginfo-tests/llgdb-tests/safestack.c diff --git a/debuginfo-tests/llgdb-tests/sret.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/sret.cpp similarity index 100% rename from debuginfo-tests/llgdb-tests/sret.cpp rename to cross-project-tests/debuginfo-tests/llgdb-tests/sret.cpp diff --git a/debuginfo-tests/llgdb-tests/static-member-2.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp similarity index 100% rename from debuginfo-tests/llgdb-tests/static-member-2.cpp rename to cross-project-tests/debuginfo-tests/llgdb-tests/static-member-2.cpp diff --git a/debuginfo-tests/llgdb-tests/static-member.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp similarity index 100% rename from debuginfo-tests/llgdb-tests/static-member.cpp rename to cross-project-tests/debuginfo-tests/llgdb-tests/static-member.cpp diff --git a/debuginfo-tests/llgdb-tests/test_debuginfo.pl b/cross-project-tests/debuginfo-tests/llgdb-tests/test_debuginfo.pl similarity index 100% rename from debuginfo-tests/llgdb-tests/test_debuginfo.pl rename to cross-project-tests/debuginfo-tests/llgdb-tests/test_debuginfo.pl diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/lit.local.cfg b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/lit.local.cfg similarity index 100% rename from debuginfo-tests/llvm-prettyprinters/gdb/lit.local.cfg rename to cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/lit.local.cfg diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp similarity index 100% rename from debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp rename to cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb similarity index 100% rename from debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb rename to cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp similarity index 100% rename from debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp rename to cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.cpp diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb similarity index 100% rename from debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb rename to cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/mlir-support.gdb diff --git a/debuginfo-tests/win_cdb-tests/README.txt b/cross-project-tests/debuginfo-tests/win_cdb-tests/README.txt similarity index 100% rename from debuginfo-tests/win_cdb-tests/README.txt rename to cross-project-tests/debuginfo-tests/win_cdb-tests/README.txt diff --git a/debuginfo-tests/win_cdb-tests/lit.local.cfg.py b/cross-project-tests/debuginfo-tests/win_cdb-tests/lit.local.cfg.py similarity index 100% rename from debuginfo-tests/win_cdb-tests/lit.local.cfg.py rename to cross-project-tests/debuginfo-tests/win_cdb-tests/lit.local.cfg.py diff --git a/debuginfo-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py similarity index 96% rename from debuginfo-tests/lit.cfg.py rename to cross-project-tests/lit.cfg.py index ac46e27d41cbf..aefe0303b764a 100644 --- a/debuginfo-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -32,7 +32,7 @@ config.excludes = ['Inputs'] # test_source_root: The root path where tests are located. -config.test_source_root = os.path.join(config.debuginfo_tests_src_root) +config.test_source_root = config.debuginfo_tests_src_root # test_exec_root: The root path where tests should be run. config.test_exec_root = config.debuginfo_tests_obj_root @@ -41,7 +41,8 @@ tools = [ ToolSubst('%test_debuginfo', command=os.path.join( - config.debuginfo_tests_src_root, 'llgdb-tests', 'test_debuginfo.pl')), + config.debuginfo_tests_src_root, 'debuginfo-tests', + 'llgdb-tests', 'test_debuginfo.pl')), ToolSubst("%llvm_src_root", config.llvm_src_root), ToolSubst("%llvm_tools_dir", config.llvm_tools_dir), ] @@ -126,7 +127,7 @@ def can_target_host(): # Produce dexter path, lldb path, and combine into the %dexter substitution # for running a test. dexter_path = os.path.join(config.debuginfo_tests_src_root, - 'dexter', 'dexter.py') + 'debuginfo-tests', 'dexter', 'dexter.py') dexter_test_cmd = '"{}" "{}" test'.format(sys.executable, dexter_path) if lldb_path is not None: dexter_test_cmd += ' --lldb-executable "{}"'.format(lldb_path) diff --git a/debuginfo-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in similarity index 100% rename from debuginfo-tests/lit.site.cfg.py.in rename to cross-project-tests/lit.site.cfg.py.in diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 135036f509d20..5d3ad7a4fd582 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -64,7 +64,7 @@ endif() # LLVM_EXTERNAL_${project}_SOURCE_DIR using LLVM_ALL_PROJECTS # This allows an easy way of setting up a build directory for llvm and another # one for llvm+clang+... using the same sources. -set(LLVM_ALL_PROJECTS "clang;clang-tools-extra;compiler-rt;debuginfo-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;mlir;openmp;parallel-libs;polly;pstl") +set(LLVM_ALL_PROJECTS "clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;mlir;openmp;parallel-libs;polly;pstl") # The flang project is not yet part of "all" projects (see C++ requirements) set(LLVM_EXTRA_PROJECTS "flang") # List of all known projects in the mono repo diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index f1ac2c7d49347..bab0508fdeb3d 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -434,7 +434,7 @@ LLVM-specific variables This feature allows to have one build for only LLVM and another for clang+llvm using the same source checkout. The full list is: - ``clang;clang-tools-extra;compiler-rt;debuginfo-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;openmp;parallel-libs;polly;pstl`` + ``clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;openmp;parallel-libs;polly;pstl`` **LLVM_ENABLE_RTTI**:BOOL Build LLVM with run-time type information. Defaults to OFF. diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst index e44059b3031c7..b776ae742b4e5 100644 --- a/llvm/docs/GettingStarted.rst +++ b/llvm/docs/GettingStarted.rst @@ -64,7 +64,7 @@ This is an example workflow and configuration to get and build the LLVM source: * ``-DLLVM_ENABLE_PROJECTS='...'`` --- semicolon-separated list of the LLVM subprojects you'd like to additionally build. Can include any of: clang, clang-tools-extra, libcxx, libcxxabi, libunwind, lldb, compiler-rt, lld, - polly, or debuginfo-tests. + polly, or cross-project-tests. For example, to build LLVM, Clang, libcxx, and libcxxabi, use ``-DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi"``. diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index 64c1c0ea78ebe..4ec6a3d52b072 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -107,7 +107,7 @@ The test are written in C based languages or in LLVM assembly language. These tests are compiled and run under a debugger. The debugger output is checked to validate of debugging information. See README.txt in the test suite for more information. This test suite is located in the -``debuginfo-tests`` Subversion module. +``cross-project-tests/debuginfo-tests`` directory. Quick start =========== diff --git a/llvm/projects/CMakeLists.txt b/llvm/projects/CMakeLists.txt index 7a948bdc8506b..0ffc4fa3c0496 100644 --- a/llvm/projects/CMakeLists.txt +++ b/llvm/projects/CMakeLists.txt @@ -12,7 +12,7 @@ foreach(entry ${entries}) (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/test-suite) AND (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/parallel-libs) AND (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/openmp) AND - (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/debuginfo-tests)) + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/cross-project-tests)) get_filename_component(entry_name "${entry}" NAME) add_llvm_external_project(${entry_name}) endif() @@ -45,5 +45,5 @@ add_llvm_external_project(parallel-libs) add_llvm_external_project(openmp) if(LLVM_INCLUDE_TESTS) - add_llvm_external_project(debuginfo-tests) + add_llvm_external_project(cross-project-tests) endif() From 2b9ac789ecdb2f8fd23c5af9be72e05191dbbae2 Mon Sep 17 00:00:00 2001 From: James Henderson Date: Wed, 10 Feb 2021 14:16:45 +0000 Subject: [PATCH 028/619] [cross-project-tests] Add/update check-* targets for cross-project-tests This change modifies the existing check-debuginfo target to only run the debuginfo tests within the cross-project-tests, and adds a new target (check-cross-project) which runs all the tests. The former has also been modified to not be included in check-all (since the check-cross-project target covers them). Differential Revision: https://reviews.llvm.org/D96513 Reviewed by: aprantl --- cross-project-tests/CMakeLists.txt | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt index a4ae34d70227e..f3345099190b2 100644 --- a/cross-project-tests/CMakeLists.txt +++ b/cross-project-tests/CMakeLists.txt @@ -66,14 +66,22 @@ configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py ) -add_lit_testsuite(check-debuginfo "Running cross-project tests" +add_lit_testsuite(check-cross-project "Running cross-project tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CROSS_PROJECT_TEST_DEPS} ) -# Add check-debuginfo-* targets. +# Add alias for debuginfo test subset. +add_lit_testsuite(check-debuginfo "Running debuginfo tests" + ${CMAKE_CURRENT_BINARY_DIR}/debuginfo-tests + EXCLUDE_FROM_CHECK_ALL + DEPENDS ${CROSS_PROJECT_TEST_DEPS} + ) + +# Add check-cross-project-* targets. add_lit_testsuites(CROSS_PROJECT ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${CROSS_PROJECT_TEST_DEPS} ) +set_target_properties(check-cross-project PROPERTIES FOLDER "Tests") set_target_properties(check-debuginfo PROPERTIES FOLDER "Tests") From ad81dea9f66db61eed1229cd7e62babb6d381257 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 28 Jun 2021 10:32:24 +0000 Subject: [PATCH 029/619] [compiler-rt][asan] Disable two tests on Arm Thumb I can't be sure of the cause but I believe these fail due to to fast unwinding not working on Thumb. Whatever the case, they have been failing on our bots for a long time: https://lab.llvm.org/buildbot/#/builders/170/builds/46 Require fast-unwinder-works for both. --- compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp | 2 ++ compiler-rt/test/asan/TestCases/Linux/recvfrom.cpp | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp index 1a9b39f89b522..478568cd122c2 100644 --- a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp @@ -43,6 +43,8 @@ // GNU driver doesn't handle .so files properly. // REQUIRES: Clang +// REQUIRES: fast-unwinder-works + #ifndef SZ # define SZ 4 #endif diff --git a/compiler-rt/test/asan/TestCases/Linux/recvfrom.cpp b/compiler-rt/test/asan/TestCases/Linux/recvfrom.cpp index 46aa202c2ad86..11ac93cda11d1 100644 --- a/compiler-rt/test/asan/TestCases/Linux/recvfrom.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/recvfrom.cpp @@ -4,7 +4,8 @@ // RUN: %clangxx_asan %s -DSENDTO -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-SENDTO // RUN: %clangxx_asan %s -DSENDTO -o %t && %env_asan_opts=intercept_send=0 %run %t 2>&1 // -// UNSUPPORTED: android +// This will try to fast unwind on Arm Thumb, where fast unwinding does not work. +// UNSUPPORTED: android, !fast-unwinder-works #include #include From 6f3b775c3e9c685f74ecbe2ce1a94af52cc17c2f Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Wed, 9 Jun 2021 17:03:47 +0200 Subject: [PATCH 030/619] [Analyzer][solver] Add dump methods for (dis)equality classes. This proved to be very useful during debugging. Differential Revision: https://reviews.llvm.org/D103967 --- .../Core/RangeConstraintManager.cpp | 138 ++++++++++++++++++ .../expr-inspection-printState-diseq-info.c | 34 +++++ .../expr-inspection-printState-eq-classes.c | 21 +++ clang/test/Analysis/expr-inspection.c | 2 + 4 files changed, 195 insertions(+) create mode 100644 clang/test/Analysis/expr-inspection-printState-diseq-info.c create mode 100644 clang/test/Analysis/expr-inspection-printState-eq-classes.c diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 6d17bcb8b87f0..27367ff5ae80c 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -594,6 +594,11 @@ class EquivalenceClass : public llvm::FoldingSetNode { RangeSet::Factory &F, ProgramStateRef State); + void dumpToStream(ProgramStateRef State, raw_ostream &os) const; + LLVM_DUMP_METHOD void dump(ProgramStateRef State) const { + dumpToStream(State, llvm::errs()); + } + /// Check equivalence data for consistency. LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED static bool isClassDataConsistent(ProgramStateRef State); @@ -1414,6 +1419,17 @@ class RangeConstraintManager : public RangedConstraintManager { void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n", unsigned int Space = 0, bool IsDot = false) const override; + void printConstraints(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; + void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; + void printEquivalenceClass(raw_ostream &Out, ProgramStateRef State, + EquivalenceClass Class) const; + void printDisequalities(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; //===------------------------------------------------------------------===// // Implementation for interface from RangedConstraintManager. @@ -1637,6 +1653,15 @@ ConstraintMap ento::getConstraintMap(ProgramStateRef State) { // EqualityClass implementation details //===----------------------------------------------------------------------===// +LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State, + raw_ostream &os) const { + SymbolSet ClassMembers = getClassMembers(State); + for (const SymbolRef &MemberSym : ClassMembers) { + MemberSym->dump(); + os << "\n"; + } +} + inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State, SymbolRef Sym) { assert(State && "State should not be null"); @@ -2483,6 +2508,16 @@ ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange( void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State, const char *NL, unsigned int Space, bool IsDot) const { + printConstraints(Out, State, NL, Space, IsDot); + printEquivalenceClasses(Out, State, NL, Space, IsDot); + printDisequalities(Out, State, NL, Space, IsDot); +} + +void RangeConstraintManager::printConstraints(raw_ostream &Out, + ProgramStateRef State, + const char *NL, + unsigned int Space, + bool IsDot) const { ConstraintRangeTy Constraints = State->get(); Indent(Out, Space, IsDot) << "\"constraints\": "; @@ -2516,3 +2551,106 @@ void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State, --Space; Indent(Out, Space, IsDot) << "]," << NL; } + +void RangeConstraintManager::printEquivalenceClass( + raw_ostream &Out, ProgramStateRef State, EquivalenceClass Class) const { + bool FirstMember = true; + SymbolSet ClassMembers = Class.getClassMembers(State); + Out << "[ "; + for (SymbolRef ClassMember : ClassMembers) { + if (FirstMember) + FirstMember = false; + else + Out << ", "; + Out << "\"" << ClassMember << "\""; + } + Out << " ]"; +} + +void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out, + ProgramStateRef State, + const char *NL, + unsigned int Space, + bool IsDot) const { + ClassMembersTy Members = State->get(); + + Indent(Out, Space, IsDot) << "\"equivalence_classes\": "; + if (Members.isEmpty()) { + Out << "null," << NL; + return; + } + + ++Space; + Out << '[' << NL; + bool FirstClass = true; + for (std::pair ClassToSymbolSet : Members) { + EquivalenceClass Class = ClassToSymbolSet.first; + + if (FirstClass) { + FirstClass = false; + } else { + Out << ','; + Out << NL; + } + Indent(Out, Space, IsDot); + printEquivalenceClass(Out, State, Class); + } + Out << NL; + + --Space; + Indent(Out, Space, IsDot) << "]," << NL; +} + +void RangeConstraintManager::printDisequalities(raw_ostream &Out, + ProgramStateRef State, + const char *NL, + unsigned int Space, + bool IsDot) const { + DisequalityMapTy Disequalities = State->get(); + + Indent(Out, Space, IsDot) << "\"disequality_info\": "; + if (Disequalities.isEmpty()) { + Out << "null," << NL; + return; + } + + ++Space; + Out << '[' << NL; + bool FirstClass = true; + for (std::pair ClassToDisEqSet : Disequalities) { + EquivalenceClass Class = ClassToDisEqSet.first; + if (FirstClass) { + FirstClass = false; + } else { + Out << ','; + Out << NL; + } + Indent(Out, Space, IsDot) << "{" << NL; + unsigned int DisEqSpace = Space + 1; + Indent(Out, DisEqSpace, IsDot) << "\"class\": "; + printEquivalenceClass(Out, State, Class); + ClassSet DisequalClasses = ClassToDisEqSet.second; + if (!DisequalClasses.isEmpty()) { + Out << "," << NL; + Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL; + unsigned int DisEqClassSpace = DisEqSpace + 1; + Indent(Out, DisEqClassSpace, IsDot); + bool FirstDisEqClass = true; + for (EquivalenceClass DisEqClass : DisequalClasses) { + if (FirstDisEqClass) { + FirstDisEqClass = false; + } else { + Out << ',' << NL; + Indent(Out, DisEqClassSpace, IsDot); + } + printEquivalenceClass(Out, State, DisEqClass); + } + Out << "]" << NL; + } + Indent(Out, Space, IsDot) << "}"; + } + Out << NL; + + --Space; + Indent(Out, Space, IsDot) << "]," << NL; +} diff --git a/clang/test/Analysis/expr-inspection-printState-diseq-info.c b/clang/test/Analysis/expr-inspection-printState-diseq-info.c new file mode 100644 index 0000000000000..fe2ee324105fa --- /dev/null +++ b/clang/test/Analysis/expr-inspection-printState-diseq-info.c @@ -0,0 +1,34 @@ +// RUN: %clang_analyze_cc1 \ +// RUN: -analyzer-checker=debug.ExprInspection %s 2>&1 | FileCheck %s + +void clang_analyzer_printState(); + +void test_disequality_info(int e0, int b0, int b1, int c0) { + int e1 = e0 - b0; + if (b0 == 2) { + int e2 = e1 - b1; + if (e2 > 0) { + if (b1 != c0) + clang_analyzer_printState(); + } + } +} + +// CHECK: "disequality_info": [ +// CHECK-NEXT: { +// CHECK-NEXT: "class": [ "reg_$2" ], +// CHECK-NEXT: "disequal_to": [ +// CHECK-NEXT: [ "(reg_$0) - 2" ], +// CHECK-NEXT: [ "reg_$3" ]] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "class": [ "(reg_$0) - 2" ], +// CHECK-NEXT: "disequal_to": [ +// CHECK-NEXT: [ "reg_$2" ]] +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "class": [ "reg_$3" ], +// CHECK-NEXT: "disequal_to": [ +// CHECK-NEXT: [ "reg_$2" ]] +// CHECK-NEXT: } +// CHECK-NEXT: ], diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c new file mode 100644 index 0000000000000..5b40ac5cd47e9 --- /dev/null +++ b/clang/test/Analysis/expr-inspection-printState-eq-classes.c @@ -0,0 +1,21 @@ +// RUN: %clang_analyze_cc1 \ +// RUN: -analyzer-checker=debug.ExprInspection %s 2>&1 | FileCheck %s + +void clang_analyzer_printState(); + +void test_equivalence_classes(int a, int b, int c, int d) { + if (a + b != c) + return; + if (a != d) + return; + if (b != 0) + return; + clang_analyzer_printState(); + (void)(a * b * c * d); + return; +} + +// CHECK: "equivalence_classes": [ +// CHECK-NEXT: [ "reg_$0", "(reg_$0) + (reg_$1)", "reg_$2", "reg_$3" ], +// CHECK-NEXT: [ "((reg_$0) + (reg_$1)) != (reg_$2)", "(reg_$0) != (reg_$2)" ] +// CHECK-NEXT: ], diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c index 283fa9bdb724a..76118a76e71ca 100644 --- a/clang/test/Analysis/expr-inspection.c +++ b/clang/test/Analysis/expr-inspection.c @@ -38,6 +38,8 @@ void foo(int x) { // CHECK-NEXT: "constraints": [ // CHECK-NEXT: { "symbol": "reg_$0", "range": "{ [-2147483648, 13] }" } // CHECK-NEXT: ], +// CHECK-NEXT: "equivalence_classes": null, +// CHECK-NEXT: "disequality_info": null, // CHECK-NEXT: "dynamic_types": null, // CHECK-NEXT: "dynamic_casts": null, // CHECK-NEXT: "constructing_objects": null, From 8474bb13c3270d4195a663013b95e6065075ce56 Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Mon, 28 Jun 2021 11:56:05 +0300 Subject: [PATCH 031/619] [analyzer][solver][NFC] Simplify function signatures Since RangeSet::Factory actually contains BasicValueFactory, we can remove value factory from many function signatures inside the solver. Differential Revision: https://reviews.llvm.org/D105005 --- .../PathSensitive/RangedConstraintManager.h | 3 + .../Core/RangeConstraintManager.cpp | 127 ++++++++---------- 2 files changed, 59 insertions(+), 71 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h index bf00fd98a4616..c67df1e51b4ff 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h @@ -213,6 +213,9 @@ class RangeSet { /// where N = size(What) RangeSet negate(RangeSet What); + /// Return associated value factory. + BasicValueFactory &getValueFactory() const { return ValueFactory; } + private: /// Return a persistent version of the given container. RangeSet makePersistent(ContainerType &&From); diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 27367ff5ae80c..c3d8a0a87635d 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -549,14 +549,13 @@ class EquivalenceClass : public llvm::FoldingSetNode { SymbolRef Sym); /// Merge classes for the given symbols and return a new state. - LLVM_NODISCARD static inline ProgramStateRef - merge(BasicValueFactory &BV, RangeSet::Factory &F, ProgramStateRef State, - SymbolRef First, SymbolRef Second); + LLVM_NODISCARD static inline ProgramStateRef merge(RangeSet::Factory &F, + ProgramStateRef State, + SymbolRef First, + SymbolRef Second); // Merge this class with the given class and return a new state. - LLVM_NODISCARD inline ProgramStateRef merge(BasicValueFactory &BV, - RangeSet::Factory &F, - ProgramStateRef State, - EquivalenceClass Other); + LLVM_NODISCARD inline ProgramStateRef + merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other); /// Return a set of class members for the given state. LLVM_NODISCARD inline SymbolSet getClassMembers(ProgramStateRef State) const; @@ -567,15 +566,14 @@ class EquivalenceClass : public llvm::FoldingSetNode { SymbolReaper &Reaper) const; LLVM_NODISCARD static inline ProgramStateRef - markDisequal(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, SymbolRef First, SymbolRef Second); + markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First, + SymbolRef Second); LLVM_NODISCARD static inline ProgramStateRef - markDisequal(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, EquivalenceClass First, - EquivalenceClass Second); + markDisequal(RangeSet::Factory &F, ProgramStateRef State, + EquivalenceClass First, EquivalenceClass Second); LLVM_NODISCARD inline ProgramStateRef - markDisequal(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, EquivalenceClass Other) const; + markDisequal(RangeSet::Factory &F, ProgramStateRef State, + EquivalenceClass Other) const; LLVM_NODISCARD static inline ClassSet getDisequalClasses(ProgramStateRef State, SymbolRef Sym); LLVM_NODISCARD inline ClassSet @@ -641,15 +639,13 @@ class EquivalenceClass : public llvm::FoldingSetNode { } static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State); - inline ProgramStateRef mergeImpl(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, SymbolSet Members, - EquivalenceClass Other, + inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State, + SymbolSet Members, EquivalenceClass Other, SymbolSet OtherMembers); static inline bool addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints, - BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, EquivalenceClass First, - EquivalenceClass Second); + RangeSet::Factory &F, ProgramStateRef State, + EquivalenceClass First, EquivalenceClass Second); /// This is a unique identifier of the class. uintptr_t ID; @@ -740,8 +736,7 @@ struct EqualityInfo { //===----------------------------------------------------------------------===// template -LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, - RangeSet::Factory &F, RangeSet Head, +LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, SecondTy Second, RestTy... Tail); template struct IntersectionTraits; @@ -764,15 +759,14 @@ struct IntersectionTraits { }; template -LLVM_NODISCARD inline EndTy intersect(BasicValueFactory &BV, - RangeSet::Factory &F, EndTy End) { +LLVM_NODISCARD inline EndTy intersect(RangeSet::Factory &F, EndTy End) { // If the list contains only RangeSet or Optional, simply return // that range set. return End; } LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED inline Optional -intersect(BasicValueFactory &BV, RangeSet::Factory &F, const RangeSet *End) { +intersect(RangeSet::Factory &F, const RangeSet *End) { // This is an extraneous conversion from a raw pointer into Optional if (End) { return *End; @@ -781,25 +775,23 @@ intersect(BasicValueFactory &BV, RangeSet::Factory &F, const RangeSet *End) { } template -LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, - RangeSet::Factory &F, RangeSet Head, +LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, RangeSet Second, RestTy... Tail) { // Here we call either the or version // of the function and can be sure that the result is RangeSet. - return intersect(BV, F, F.intersect(Head, Second), Tail...); + return intersect(F, F.intersect(Head, Second), Tail...); } template -LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, - RangeSet::Factory &F, RangeSet Head, +LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head, SecondTy Second, RestTy... Tail) { if (Second) { // Here we call the version of the function... - return intersect(BV, F, Head, *Second, Tail...); + return intersect(F, Head, *Second, Tail...); } // ...and here it is either or , which // means that the result is definitely RangeSet. - return intersect(BV, F, Head, Tail...); + return intersect(F, Head, Tail...); } /// Main generic intersect function. @@ -824,12 +816,12 @@ LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, template LLVM_NODISCARD inline typename IntersectionTraits::Type - intersect(BasicValueFactory &BV, RangeSet::Factory &F, HeadTy Head, - SecondTy Second, RestTy... Tail) { + intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second, + RestTy... Tail) { if (Head) { - return intersect(BV, F, *Head, Second, Tail...); + return intersect(F, *Head, Second, Tail...); } - return intersect(BV, F, Second, Tail...); + return intersect(F, Second, Tail...); } //===----------------------------------------------------------------------===// @@ -845,9 +837,9 @@ class SymbolicRangeInferrer : public SymExprVisitor { public: template - static RangeSet inferRange(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef State, SourceType Origin) { - SymbolicRangeInferrer Inferrer(BV, F, State); + static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State, + SourceType Origin) { + SymbolicRangeInferrer Inferrer(F, State); return Inferrer.infer(Origin); } @@ -872,9 +864,8 @@ class SymbolicRangeInferrer } private: - SymbolicRangeInferrer(BasicValueFactory &BV, RangeSet::Factory &F, - ProgramStateRef S) - : ValueFactory(BV), RangeFactory(F), State(S) {} + SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S) + : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {} /// Infer range information from the given integer constant. /// @@ -899,7 +890,7 @@ class SymbolicRangeInferrer RangeSet infer(SymbolRef Sym) { if (Optional ConstraintBasedRange = intersect( - ValueFactory, RangeFactory, getConstraint(State, Sym), + RangeFactory, getConstraint(State, Sym), // If Sym is a difference of symbols A - B, then maybe we have range // set stored for B - A. // @@ -1536,12 +1527,12 @@ class RangeConstraintManager : public RangedConstraintManager { ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS, SymbolRef RHS) { - return EquivalenceClass::markDisequal(getBasicVals(), F, State, LHS, RHS); + return EquivalenceClass::markDisequal(F, State, LHS, RHS); } ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS, SymbolRef RHS) { - return EquivalenceClass::merge(getBasicVals(), F, State, LHS, RHS); + return EquivalenceClass::merge(F, State, LHS, RHS); } LLVM_NODISCARD ProgramStateRef setConstraint(ProgramStateRef State, @@ -1674,19 +1665,17 @@ inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State, return Sym; } -inline ProgramStateRef EquivalenceClass::merge(BasicValueFactory &BV, - RangeSet::Factory &F, +inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First, SymbolRef Second) { EquivalenceClass FirstClass = find(State, First); EquivalenceClass SecondClass = find(State, Second); - return FirstClass.merge(BV, F, State, SecondClass); + return FirstClass.merge(F, State, SecondClass); } -inline ProgramStateRef EquivalenceClass::merge(BasicValueFactory &BV, - RangeSet::Factory &F, +inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other) { // It is already the same class. @@ -1714,15 +1703,14 @@ inline ProgramStateRef EquivalenceClass::merge(BasicValueFactory &BV, // its members. Merging is not a trivial operation, so it's easier to // merge the smaller class into the bigger one. if (Members.getHeight() >= OtherMembers.getHeight()) { - return mergeImpl(BV, F, State, Members, Other, OtherMembers); + return mergeImpl(F, State, Members, Other, OtherMembers); } else { - return Other.mergeImpl(BV, F, State, OtherMembers, *this, Members); + return Other.mergeImpl(F, State, OtherMembers, *this, Members); } } inline ProgramStateRef -EquivalenceClass::mergeImpl(BasicValueFactory &ValueFactory, - RangeSet::Factory &RangeFactory, +EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory, ProgramStateRef State, SymbolSet MyMembers, EquivalenceClass Other, SymbolSet OtherMembers) { // Essentially what we try to recreate here is some kind of union-find @@ -1745,7 +1733,7 @@ EquivalenceClass::mergeImpl(BasicValueFactory &ValueFactory, // Intersection here makes perfect sense because both of these constraints // must hold for the whole new class. if (Optional NewClassConstraint = - intersect(ValueFactory, RangeFactory, getConstraint(State, *this), + intersect(RangeFactory, getConstraint(State, *this), getConstraint(State, Other))) { // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because // range inferrer shouldn't generate ranges incompatible with @@ -1858,25 +1846,22 @@ bool EquivalenceClass::isTriviallyDead(ProgramStateRef State, return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol()); } -inline ProgramStateRef EquivalenceClass::markDisequal(BasicValueFactory &VF, - RangeSet::Factory &RF, +inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State, SymbolRef First, SymbolRef Second) { - return markDisequal(VF, RF, State, find(State, First), find(State, Second)); + return markDisequal(RF, State, find(State, First), find(State, Second)); } -inline ProgramStateRef EquivalenceClass::markDisequal(BasicValueFactory &VF, - RangeSet::Factory &RF, +inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First, EquivalenceClass Second) { - return First.markDisequal(VF, RF, State, Second); + return First.markDisequal(RF, State, Second); } inline ProgramStateRef -EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF, - ProgramStateRef State, +EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass Other) const { // If we know that two classes are equal, we can only produce an infeasible // state. @@ -1889,9 +1874,9 @@ EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF, // Disequality is a symmetric relation, so if we mark A as disequal to B, // we should also mark B as disequalt to A. - if (!addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, *this, + if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this, Other) || - !addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, Other, + !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other, *this)) return nullptr; @@ -1906,8 +1891,8 @@ EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF, inline bool EquivalenceClass::addToDisequalityInfo( DisequalityMapTy &Info, ConstraintRangeTy &Constraints, - BasicValueFactory &VF, RangeSet::Factory &RF, ProgramStateRef State, - EquivalenceClass First, EquivalenceClass Second) { + RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First, + EquivalenceClass Second) { // 1. Get all of the required factories. DisequalityMapTy::Factory &F = State->get_context(); @@ -1930,7 +1915,7 @@ inline bool EquivalenceClass::addToDisequalityInfo( if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) { RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange( - VF, RF, State, First.getRepresentativeSymbol()); + RF, State, First.getRepresentativeSymbol()); FirstConstraint = RF.deletePoint(FirstConstraint, *Point); @@ -1985,7 +1970,7 @@ LLVM_NODISCARD ProgramStateRef EquivalenceClass::simplify( // The simplified symbol should be the member of the original Class, // however, it might be in another existing class at the moment. We // have to merge these classes. - State = merge(SVB.getBasicValueFactory(), F, State, ClassOfSimplifiedSym); + State = merge(F, State, ClassOfSimplifiedSym); if (!State) return nullptr; } @@ -2274,12 +2259,12 @@ RangeConstraintManager::removeDeadBindings(ProgramStateRef State, RangeSet RangeConstraintManager::getRange(ProgramStateRef State, SymbolRef Sym) { - return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, Sym); + return SymbolicRangeInferrer::inferRange(F, State, Sym); } RangeSet RangeConstraintManager::getRange(ProgramStateRef State, EquivalenceClass Class) { - return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, Class); + return SymbolicRangeInferrer::inferRange(F, State, Class); } //===------------------------------------------------------------------------=== From 20df2c7052c09934ce87ccc409da9d3dc24b7ca0 Mon Sep 17 00:00:00 2001 From: Pushpinder Singh Date: Mon, 28 Jun 2021 11:27:05 +0000 Subject: [PATCH 032/619] [AMDGPU][Libomptarget] Collect allocatable memory pools using HSA The logic is almost similar to that of system.cpp with one change that instead of adding all the memory pools to a device struct it only keeps a single pool. The existing approach also always allocated memory on the first HSA pool found for a GPU. This depends on D104691. The goal of this series of patches is to remove _atl_machine global. The next patch will drop g_atl_machine entirely. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D104695 --- .../libomptarget/plugins/amdgpu/src/rtl.cpp | 164 ++++++++++++++++-- 1 file changed, 154 insertions(+), 10 deletions(-) diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index a8252f2cb4b00..9a07d26546bbc 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -104,6 +105,16 @@ template hsa_status_t iterate_agents(C cb) { return hsa_iterate_agents(L, static_cast(&cb)); } +template +hsa_status_t amd_agent_iterate_memory_pools(hsa_agent_t Agent, C cb) { + auto L = [](hsa_amd_memory_pool_t MemoryPool, void *data) -> hsa_status_t { + C *unwrapped = static_cast(data); + return (*unwrapped)(MemoryPool); + }; + + return hsa_amd_agent_iterate_memory_pools(Agent, L, static_cast(&cb)); +} + } // namespace hsa /// Keep entries table per device @@ -329,18 +340,60 @@ hsa_status_t addKernArgPool(hsa_amd_memory_pool_t MemoryPool, void *Data) { return err; } + size_t size = 0; + err = hsa_amd_memory_pool_get_info(MemoryPool, HSA_AMD_MEMORY_POOL_INFO_SIZE, + &size); + if (err != HSA_STATUS_SUCCESS) { + fprintf(stderr, "Get memory pool size failed: %s\n", get_error_string(err)); + return err; + } + if ((GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) && - (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT)) { - size_t size = 0; - err = hsa_amd_memory_pool_get_info(MemoryPool, - HSA_AMD_MEMORY_POOL_INFO_SIZE, &size); - if (err != HSA_STATUS_SUCCESS) { - fprintf(stderr, "Get memory pool size failed: %s\n", - get_error_string(err)); - return err; + (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT) && + size > 0) { + Result->push_back(MemoryPool); + } + + return HSA_STATUS_SUCCESS; +} + +std::pair +isValidMemoryPool(hsa_amd_memory_pool_t MemoryPool) { + bool AllocAllowed = false; + hsa_status_t Err = hsa_amd_memory_pool_get_info( + MemoryPool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, + &AllocAllowed); + if (Err != HSA_STATUS_SUCCESS) { + fprintf(stderr, "Alloc allowed in memory pool check failed: %s\n", + get_error_string(Err)); + return {Err, false}; + } + + return {HSA_STATUS_SUCCESS, AllocAllowed}; +} + +template +hsa_status_t collectMemoryPools(const std::vector &Agents, + AccumulatorFunc Func) { + for (int DeviceId = 0; DeviceId < Agents.size(); DeviceId++) { + hsa_status_t Err = hsa::amd_agent_iterate_memory_pools( + Agents[DeviceId], [&](hsa_amd_memory_pool_t MemoryPool) { + hsa_status_t Err; + bool Valid = false; + std::tie(Err, Valid) = isValidMemoryPool(MemoryPool); + if (Err != HSA_STATUS_SUCCESS) { + return Err; + } + if (Valid) + Func(MemoryPool, DeviceId); + return HSA_STATUS_SUCCESS; + }); + + if (Err != HSA_STATUS_SUCCESS) { + printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, + "Iterate all memory pools", get_error_string(Err)); + return Err; } - if (size > 0) - Result->push_back(MemoryPool); } return HSA_STATUS_SUCCESS; @@ -421,6 +474,13 @@ class RTLDeviceInfoTy { hsa_amd_memory_pool_t KernArgPool; + // fine grained memory pool for host allocations + hsa_amd_memory_pool_t HostFineGrainedMemoryPool; + + // fine and coarse-grained memory pools per offloading device + std::vector DeviceFineGrainedMemoryPools; + std::vector DeviceCoarseGrainedMemoryPools; + struct atmiFreePtrDeletor { void operator()(void *p) { core::Runtime::Memfree(p); // ignore failure to free @@ -523,6 +583,82 @@ class RTLDeviceInfoTy { E.Table.EntriesBegin = E.Table.EntriesEnd = 0; } + hsa_status_t addDeviceMemoryPool(hsa_amd_memory_pool_t MemoryPool, + int DeviceId) { + assert(DeviceId < DeviceFineGrainedMemoryPools.size() && "Error here."); + uint32_t GlobalFlags = 0; + hsa_status_t Err = hsa_amd_memory_pool_get_info( + MemoryPool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags); + + if (Err != HSA_STATUS_SUCCESS) { + return Err; + } + + if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) { + DeviceFineGrainedMemoryPools[DeviceId] = MemoryPool; + } else if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) { + DeviceCoarseGrainedMemoryPools[DeviceId] = MemoryPool; + } + + return HSA_STATUS_SUCCESS; + } + + hsa_status_t addHostMemoryPool(hsa_amd_memory_pool_t MemoryPool, + int DeviceId) { + uint32_t GlobalFlags = 0; + hsa_status_t Err = hsa_amd_memory_pool_get_info( + MemoryPool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags); + + if (Err != HSA_STATUS_SUCCESS) { + return Err; + } + + uint32_t Size; + Err = hsa_amd_memory_pool_get_info(MemoryPool, + HSA_AMD_MEMORY_POOL_INFO_SIZE, &Size); + if (Err != HSA_STATUS_SUCCESS) { + return Err; + } + + if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED && + Size > 0) { + HostFineGrainedMemoryPool = MemoryPool; + } + + return HSA_STATUS_SUCCESS; + } + + hsa_status_t setupMemoryPools() { + using namespace std::placeholders; + hsa_status_t Err; + Err = core::collectMemoryPools( + CPUAgents, std::bind(&RTLDeviceInfoTy::addHostMemoryPool, this, _1, _2)); + if (Err != HSA_STATUS_SUCCESS) { + fprintf(stderr, "HSA error in collecting memory pools for CPU: %s\n", + get_error_string(Err)); + return Err; + } + Err = core::collectMemoryPools( + HSAAgents, std::bind(&RTLDeviceInfoTy::addDeviceMemoryPool, this, _1, _2)); + if (Err != HSA_STATUS_SUCCESS) { + fprintf(stderr, + "HSA error in collecting memory pools for offload devices: %s\n", + get_error_string(Err)); + return Err; + } + return HSA_STATUS_SUCCESS; + } + + hsa_amd_memory_pool_t getDeviceMemoryPool(int DeviceId) { + assert(DeviceId >= 0 && DeviceId < DeviceCoarseGrainedMemoryPools.size() && + "Invalid device Id"); + return DeviceCoarseGrainedMemoryPools[DeviceId]; + } + + hsa_amd_memory_pool_t getHostMemoryPool() { + return HostFineGrainedMemoryPool; + } + RTLDeviceInfoTy() { // LIBOMPTARGET_KERNEL_TRACE provides a kernel launch trace to stderr // anytime. You do not need a debug library build. @@ -581,6 +717,14 @@ class RTLDeviceInfoTy { deviceStateStore.resize(NumberOfDevices); KernelInfoTable.resize(NumberOfDevices); SymbolInfoTable.resize(NumberOfDevices); + DeviceCoarseGrainedMemoryPools.resize(NumberOfDevices); + DeviceFineGrainedMemoryPools.resize(NumberOfDevices); + + err = setupMemoryPools(); + if (err != HSA_STATUS_SUCCESS) { + DP("Error when setting up memory pools"); + return; + } for (int i = 0; i < NumberOfDevices; i++) { HSAQueues[i] = nullptr; From 7749b19e9cb19c0cf5f4e026c719f612e9292fb4 Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Tue, 9 Feb 2021 13:39:08 +0000 Subject: [PATCH 033/619] [NFC] Adding test for clobbering of high registers in Thumb Prior to the changes from D52010, clobbering Thumb's high registers in inline asm would cause incorrect code to be generated - or an assertion failure for debug builds. Now that the issue is no longer reproducible, this patch adds a MIR test to cover that scenario. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D96335 --- llvm/test/CodeGen/Thumb/high-reg-clobber.mir | 50 ++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb/high-reg-clobber.mir diff --git a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir new file mode 100644 index 0000000000000..7b292aee7f7ea --- /dev/null +++ b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocbasic %s -o - | FileCheck %s +# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s +# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST + +... +--- +name: constraint_h +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr } + - { id: 1, class: hgpr } + - { id: 2, class: tgpr } +liveins: + - { reg: '$r0', virtual-reg: '%0' } +frameInfo: + maxAlignment: 4 + maxCallFrameSize: 0 + localFrameSize: 4 +stack: + - { id: 0, size: 4, alignment: 4, local-offset: -4 } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $r0 + + ; CHECK-LABEL: name: constraint_h + ; CHECK: liveins: $r0 + ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 + ; CHECK: tSTRspi [[COPY]], %stack.0, 0, 14 /* CC::al */, $noreg + ; CHECK: [[tLDRspi:%[0-9]+]]:tgpr = tLDRspi %stack.0, 0, 14 /* CC::al */, $noreg + ; CHECK: [[COPY1:%[0-9]+]]:hgpr = COPY [[tLDRspi]] + ; CHECK: INLINEASM &"mov r12, $0", 1 /* sideeffect attdialect */, 1048585 /* reguse:GPRnoip_and_GPRwithAPSR_NZCVnosp */, [[COPY1]], 12 /* clobber */, implicit-def early-clobber $r12 + ; CHECK: tBX_RET 14 /* CC::al */, $noreg + ; FAST-LABEL: name: constraint_h + ; FAST: liveins: $r0 + ; FAST: tSTRspi killed renamable $r0, %stack.0, 0, 14 /* CC::al */, $noreg + ; FAST: renamable $r0 = tLDRspi %stack.0, 0, 14 /* CC::al */, $noreg + ; FAST: renamable $r8 = COPY killed renamable $r0 + ; FAST: INLINEASM &"mov r12, $0", 1 /* sideeffect attdialect */, 1048585 /* reguse:GPRnoip_and_GPRwithAPSR_NZCVnosp */, killed renamable $r8, 12 /* clobber */, implicit-def dead early-clobber $r12 + ; FAST: tBX_RET 14 /* CC::al */, $noreg + %0:tgpr = COPY $r0 + tSTRspi %0, %stack.0, 0, 14 /* CC::al */, $noreg + %2:tgpr = tLDRspi %stack.0, 0, 14 /* CC::al */, $noreg + %1:hgpr = COPY %2 + INLINEASM &"mov r12, $0", 1 /* sideeffect attdialect */, 1048585 /* reguse:hGPR */, %1, 12 /* clobber */, implicit-def early-clobber $r12 + tBX_RET 14 /* CC::al */, $noreg + +... From 88b1135e72ea15196c87cfcbecac29596d1d3002 Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Tue, 22 Jun 2021 16:43:36 +0100 Subject: [PATCH 034/619] [Aarch64] Adding support for Armv9-A Realm Management Extension This adds support for Armv9-A's Realm Management Extension, including three new system registers - MFAR_EL3, GPCCR_EL3 and GPTBR_EL3 - and four new TLBI instructions. The reference for the Realm Management Extension can be found at: https://developer.arm.com/documentation/ddi0615/aa. Based on patches by Victor Campos. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D104773 --- llvm/docs/ReleaseNotes.rst | 5 ++ llvm/lib/Target/AArch64/AArch64.td | 3 + llvm/lib/Target/AArch64/AArch64Subtarget.h | 3 + .../Target/AArch64/AArch64SystemOperands.td | 15 ++++ .../AArch64/AsmParser/AArch64AsmParser.cpp | 1 + llvm/test/MC/AArch64/armv9a-rme.s | 70 +++++++++++++++++++ .../MC/Disassembler/AArch64/armv9a-rme.txt | 25 +++++++ 7 files changed, 122 insertions(+) create mode 100644 llvm/test/MC/AArch64/armv9a-rme.s create mode 100644 llvm/test/MC/Disassembler/AArch64/armv9a-rme.txt diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index bb88699fc76de..e0961e60763d8 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -74,6 +74,11 @@ Changes to building LLVM Changes to TableGen ------------------- +Changes to the AArch64 Backend +-------------------------- + +* Introduced support for Armv9-A's Realm Management Extension. + Changes to the ARM Backend -------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index dab6dbe2352c3..4e1dc81a8aa41 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -426,6 +426,9 @@ def FeatureEnhancedCounterVirtualization : SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization", "true", "Enable enhanced counter virtualization extension">; +def FeatureRME : SubtargetFeature<"rme", "HasRME", + "true", "Enable Realm Management Extension">; + //===----------------------------------------------------------------------===// // Architectures. // diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index ea1fbc18d3bcd..491e7bdaa8dae 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -183,6 +183,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool HasSVE2SHA3 = false; bool HasSVE2BitPerm = false; + // Armv9-A Extensions + bool HasRME = false; + // Future architecture extensions. bool HasETE = false; bool HasTRBE = false; diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index 1909e79fa3a96..8594ec9dac729 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -531,6 +531,14 @@ defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>; defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>; } //FeatureTLB_RMI +// Armv9-A Realm Management Extention TLBI Instructions +let Requires = ["AArch64::FeatureRME"] in { +defm : TLBI<"RPAOS", 0b110, 0b1000, 0b0100, 0b011>; +defm : TLBI<"RPALOS", 0b110, 0b1000, 0b0100, 0b111>; +defm : TLBI<"PAALLOS", 0b110, 0b1000, 0b0001, 0b100, 0>; +defm : TLBI<"PAALL", 0b110, 0b1000, 0b0111, 0b100, 0>; +} + // Armv8.5-A Prediction Restriction by Context instruction options: class PRCTX crm> : SearchableTable { let SearchableFields = ["Name", "Encoding"]; @@ -743,6 +751,13 @@ def : RWSysReg<"SCXTNUM_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b111>; def : RWSysReg<"SCXTNUM_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b111>; } +// v9a Realm Management Extension registers +let Requires = [{ {AArch64::FeatureRME} }] in { +def : RWSysReg<"MFAR_EL3", 0b11, 0b110, 0b0110, 0b0000, 0b101>; +def : RWSysReg<"GPCCR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b110>; +def : RWSysReg<"GPTBR_EL3", 0b11, 0b110, 0b0010, 0b0001, 0b100>; +} + //===---------------------- // Write-only regs //===---------------------- diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index d2a0b1dcecb36..cd7001f1d4c8d 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2917,6 +2917,7 @@ static const struct Extension { {"xs", {AArch64::FeatureXS}}, {"pauth", {AArch64::FeaturePAuth}}, {"flagm", {AArch64::FeatureFlagM}}, + {"rme", {AArch64::FeatureRME}}, // FIXME: Unsupported extensions {"lor", {}}, {"rdma", {}}, diff --git a/llvm/test/MC/AArch64/armv9a-rme.s b/llvm/test/MC/AArch64/armv9a-rme.s new file mode 100644 index 0000000000000..0a575fa8ae579 --- /dev/null +++ b/llvm/test/MC/AArch64/armv9a-rme.s @@ -0,0 +1,70 @@ +// RUN: not llvm-mc -triple aarch64-arm-none-eabi -mattr +rme -show-encoding %s 2> %t | FileCheck %s +// RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t +// RUN: not llvm-mc -triple aarch64-arm-none-eabi -show-encoding %s 2> %t | FileCheck --check-prefix=CHECK-NO-RME %s +// RUN: FileCheck --check-prefix=CHECK-NO-RME-ERROR %s < %t + +msr MFAR_EL3, x0 +msr GPCCR_EL3, x0 +msr GPTBR_EL3, x0 +mrs x0, MFAR_EL3 +mrs x0, GPCCR_EL3 +mrs x0, GPTBR_EL3 +// CHECK: msr MFAR_EL3, x0 // encoding: [0xa0,0x60,0x1e,0xd5] +// CHECK: msr GPCCR_EL3, x0 // encoding: [0xc0,0x21,0x1e,0xd5] +// CHECK: msr GPTBR_EL3, x0 // encoding: [0x80,0x21,0x1e,0xd5] +// CHECK: mrs x0, MFAR_EL3 // encoding: [0xa0,0x60,0x3e,0xd5] +// CHECK: mrs x0, GPCCR_EL3 // encoding: [0xc0,0x21,0x3e,0xd5] +// CHECK: mrs x0, GPTBR_EL3 // encoding: [0x80,0x21,0x3e,0xd5] +// CHECK-NO-RME-ERROR: [[@LINE-12]]:5: error: expected writable system register +// CHECK-NO-RME-ERROR: [[@LINE-12]]:5: error: expected writable system register +// CHECK-NO-RME-ERROR: [[@LINE-12]]:5: error: expected writable system register +// CHECK-NO-RME-ERROR: [[@LINE-12]]:9: error: expected readable system register +// CHECK-NO-RME-ERROR: [[@LINE-12]]:9: error: expected readable system register +// CHECK-NO-RME-ERROR: [[@LINE-12]]:9: error: expected readable system register + +tlbi rpaos, x0 +tlbi rpalos, x0 +tlbi paallos +tlbi paall +// CHECK: tlbi rpaos, x0 // encoding: [0x60,0x84,0x0e,0xd5] +// CHECK: tlbi rpalos, x0 // encoding: [0xe0,0x84,0x0e,0xd5] +// CHECK: tlbi paallos // encoding: [0x9f,0x81,0x0e,0xd5] +// CHECK: tlbi paall // encoding: [0x9f,0x87,0x0e,0xd5] +// CHECK-NO-RME-ERROR: [[@LINE-8]]:6: error: TLBI RPAOS requires: rme +// CHECK-NO-RME-ERROR: [[@LINE-8]]:6: error: TLBI RPALOS requires: rme +// CHECK-NO-RME-ERROR: [[@LINE-8]]:6: error: TLBI PAALLOS requires: rme +// CHECK-NO-RME-ERROR: [[@LINE-8]]:6: error: TLBI PAALL requires: rme + +tlbi RPAOS +tlbi RPALOS +tlbi PAALLOS, x25 +tlbi PAALL, x25 +// CHECK-ERROR: error: specified {{TLBI|tlbi}} op requires a register +// CHECK-ERROR-NEXT: tlbi RPAOS +// CHECK-ERROR-NEXT: ^ +// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op requires a register +// CHECK-ERROR-NEXT: tlbi RPALOS +// CHECK-ERROR-NEXT: ^ +// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register +// CHECK-ERROR-NEXT: tlbi PAALLOS, x25 +// CHECK-ERROR-NEXT: ^ +// CHECK-ERROR-NEXT: error: specified {{TLBI|tlbi}} op does not use a register +// CHECK-ERROR-NEXT: tlbi PAALL, x25 +// CHECK-ERROR-NEXT: ^ +// CHECK-NO-RME-ERROR: [[@LINE-16]]:6: error: TLBI RPAOS requires: rme +// CHECK-NO-RME-ERROR: [[@LINE-16]]:6: error: TLBI RPALOS requires: rme +// CHECK-NO-RME-ERROR: [[@LINE-16]]:6: error: TLBI PAALLOS requires: rme +// CHECK-NO-RME-ERROR: [[@LINE-16]]:6: error: TLBI PAALL requires: rme + +sys #6, c8, c4, #3 +sys #6, c8, c4, #7 +sys #6, c8, c1, #4 +sys #6, c8, c7, #4 +// CHECK: tlbi rpaos +// CHECK: tlbi rpalos +// CHECK: tlbi paallos +// CHECK: tlbi paall +// CHECK-NO-RME: sys #6, c8, c4, #3 +// CHECK-NO-RME: sys #6, c8, c4, #7 +// CHECK-NO-RME: sys #6, c8, c1, #4 +// CHECK-NO-RME: sys #6, c8, c7, #4 diff --git a/llvm/test/MC/Disassembler/AArch64/armv9a-rme.txt b/llvm/test/MC/Disassembler/AArch64/armv9a-rme.txt new file mode 100644 index 0000000000000..f10a07ce1e8ab --- /dev/null +++ b/llvm/test/MC/Disassembler/AArch64/armv9a-rme.txt @@ -0,0 +1,25 @@ +# RUN: llvm-mc -triple aarch64-arm-none-eabi -mattr +rme -disassemble %s 2>&1 | FileCheck %s +# RUN: llvm-mc -triple aarch64-arm-none-eabi -disassemble %s 2>&1 | FileCheck --check-prefix=CHECK-NO-RME %s + +[0xa0,0x60,0x3e,0xd5] +[0xc0,0x21,0x3e,0xd5] +[0x80,0x21,0x3e,0xd5] +# CHECK: mrs x0, MFAR_EL3 +# CHECK: mrs x0, GPCCR_EL3 +# CHECK: mrs x0, GPTBR_EL3 +# CHECK-NO-RME: mrs x0, S3_6_C6_C0_5 +# CHECK-NO-RME: mrs x0, S3_6_C2_C1_6 +# CHECK-NO-RME: mrs x0, S3_6_C2_C1_4 + +[0x60,0x84,0x0e,0xd5] +[0xe0,0x84,0x0e,0xd5] +[0x9f,0x81,0x0e,0xd5] +[0x9f,0x87,0x0e,0xd5] +# CHECK: tlbi rpaos, x0 +# CHECK: tlbi rpalos, x0 +# CHECK: tlbi paallos +# CHECK: tlbi paall +# CHECK-NO-RME: sys #6, c8, c4, #3 +# CHECK-NO-RME: sys #6, c8, c4, #7 +# CHECK-NO-RME: sys #6, c8, c1, #4 +# CHECK-NO-RME: sys #6, c8, c7, #4 \ No newline at end of file From 4a6bd8e3e76d60c44fef70ed03f661b0c73fb5c7 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 25 Jun 2021 10:42:00 -0400 Subject: [PATCH 035/619] [OpenMP] Increase attributor iterations on the GPU Increase the number of attributor iterations on a GPU target. I forgot to change this in D104416. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D104920 --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 32db92ee86463..2410e52848cb0 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2668,9 +2668,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (Kernels.empty()) ? 64 : 32; - Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixponitIterations, OREGetter, - DEBUG_TYPE); + unsigned MaxFixponitIterations = (isOpenMPDevice) ? 128 : 32; + Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, + MaxFixponitIterations, OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); @@ -2720,9 +2720,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32; - Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixponitIterations, OREGetter, - DEBUG_TYPE); + unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 128 : 32; + Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, + MaxFixponitIterations, OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(false); @@ -2786,7 +2786,7 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32; + unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 128 : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixponitIterations, OREGetter, DEBUG_TYPE); From e9396449777f02d573deb25d603ee1b1d6e568c1 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Mon, 28 Jun 2021 13:25:29 +0200 Subject: [PATCH 036/619] [mlir][memref] Implement lowering of memref.copy to llvm This lowering uses a library call to implement copying in the general case, i.e., supporting arbitrary rank and strided layouts. --- .../mlir/Dialect/LLVMIR/FunctionCallUtils.h | 2 + .../mlir/ExecutionEngine/CRunnerUtils.h | 7 +++ .../StandardToLLVM/StandardToLLVM.cpp | 63 +++++++++++++++++++ .../Dialect/LLVMIR/IR/FunctionCallUtils.cpp | 10 +++ mlir/lib/ExecutionEngine/CRunnerUtils.cpp | 48 ++++++++++++++ 5 files changed, 130 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h index 7efff9774cd50..6380ff2d8e132 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h +++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h @@ -45,6 +45,8 @@ LLVM::LLVMFuncOp lookupOrCreateMallocFn(ModuleOp moduleOp, Type indexType); LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(ModuleOp moduleOp, Type indexType); LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp); +LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType, + Type unrankedDescriptorType); /// Create a FuncOp with signature `resultType`(`paramTypes`)` and name `name`. LLVM::LLVMFuncOp lookupOrCreateFn(ModuleOp moduleOp, StringRef name, diff --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h index fb0b2a65a67eb..bd855fcc03a96 100644 --- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h @@ -330,6 +330,13 @@ class DynamicMemRefType { const int64_t *strides; }; +//===----------------------------------------------------------------------===// +// Small runtime support library for memref.copy lowering during codegen. +//===----------------------------------------------------------------------===// +extern "C" MLIR_CRUNNERUTILS_EXPORT void +memrefCopy(int64_t elemSize, UnrankedMemRefType *src, + UnrankedMemRefType *dst); + //===----------------------------------------------------------------------===// // Small runtime support library for vector.print lowering during codegen. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index db5918e95f182..eb390bf8844fa 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -2618,6 +2618,68 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern { } }; +struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(memref::CopyOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + memref::CopyOp::Adaptor adaptor(operands); + auto srcType = op.source().getType().cast(); + auto targetType = op.target().getType().cast(); + + // First make sure we have an unranked memref descriptor representation. + auto makeUnranked = [&, this](Value ranked, BaseMemRefType type) { + auto rank = rewriter.create( + loc, getIndexType(), rewriter.getIndexAttr(type.getRank())); + auto *typeConverter = getTypeConverter(); + auto ptr = + typeConverter->promoteOneMemRefDescriptor(loc, ranked, rewriter); + auto voidPtr = + rewriter.create(loc, getVoidPtrType(), ptr) + .getResult(); + auto unrankedType = + UnrankedMemRefType::get(type.getElementType(), type.getMemorySpace()); + return UnrankedMemRefDescriptor::pack(rewriter, loc, *typeConverter, + unrankedType, + ValueRange{rank, voidPtr}); + }; + + Value unrankedSource = srcType.hasRank() + ? makeUnranked(adaptor.source(), srcType) + : adaptor.source(); + Value unrankedTarget = targetType.hasRank() + ? makeUnranked(adaptor.target(), targetType) + : adaptor.target(); + + // Now promote the unranked descriptors to the stack. + auto one = rewriter.create(loc, getIndexType(), + rewriter.getIndexAttr(1)); + auto promote = [&](Value desc) { + auto ptrType = LLVM::LLVMPointerType::get(desc.getType()); + auto allocated = + rewriter.create(loc, ptrType, ValueRange{one}); + rewriter.create(loc, desc, allocated); + return allocated; + }; + + auto sourcePtr = promote(unrankedSource); + auto targetPtr = promote(unrankedTarget); + + auto elemSize = rewriter.create( + loc, getIndexType(), + rewriter.getIndexAttr(srcType.getElementTypeBitWidth() / 8)); + auto copyFn = LLVM::lookupOrCreateMemRefCopyFn( + op->getParentOfType(), getIndexType(), sourcePtr.getType()); + rewriter.create(loc, copyFn, + ValueRange{elemSize, sourcePtr, targetPtr}); + rewriter.eraseOp(op); + + return success(); + } +}; + /// Extracts allocated, aligned pointers and offset from a ranked or unranked /// memref type. In unranked case, the fields are extracted from the underlying /// ranked descriptor. @@ -4009,6 +4071,7 @@ void mlir::populateStdToLLVMMemoryConversionPatterns( GetGlobalMemrefOpLowering, LoadOpLowering, MemRefCastOpLowering, + MemRefCopyOpLowering, MemRefReinterpretCastOpLowering, MemRefReshapeOpLowering, RankOpLowering, diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp index a43c2251c2d99..47a5851b51f2e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp @@ -35,6 +35,7 @@ static constexpr llvm::StringRef kPrintNewline = "printNewline"; static constexpr llvm::StringRef kMalloc = "malloc"; static constexpr llvm::StringRef kAlignedAlloc = "aligned_alloc"; static constexpr llvm::StringRef kFree = "free"; +static constexpr llvm::StringRef kMemRefCopy = "memref_copy"; /// Generic print function lookupOrCreate helper. LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(ModuleOp moduleOp, StringRef name, @@ -114,6 +115,15 @@ LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(ModuleOp moduleOp) { LLVM::LLVMVoidType::get(moduleOp->getContext())); } +LLVM::LLVMFuncOp +mlir::LLVM::lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType, + Type unrankedDescriptorType) { + return LLVM::lookupOrCreateFn( + moduleOp, kMemRefCopy, + ArrayRef{indexType, unrankedDescriptorType, unrankedDescriptorType}, + LLVM::LLVMVoidType::get(moduleOp->getContext())); +} + Operation::result_range mlir::LLVM::createLLVMCall(OpBuilder &b, Location loc, LLVM::LLVMFuncOp fn, ValueRange paramTypes, diff --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp index e5b682a7b6de5..bf96afb73725b 100644 --- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp @@ -18,8 +18,10 @@ #include #endif // _WIN32 +#include #include #include +#include #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS @@ -36,6 +38,52 @@ extern "C" void printClose() { fputs(" )", stdout); } extern "C" void printComma() { fputs(", ", stdout); } extern "C" void printNewline() { fputc('\n', stdout); } +extern "C" MLIR_CRUNNERUTILS_EXPORT void +memrefCopy(int64_t elemSize, UnrankedMemRefType *srcArg, + UnrankedMemRefType *dstArg) { + DynamicMemRefType src(*srcArg); + DynamicMemRefType dst(*dstArg); + + int64_t rank = src.rank; + int64_t *indices = static_cast(alloca(sizeof(int64_t) * rank)); + int64_t *srcStrides = static_cast(alloca(sizeof(int64_t) * rank)); + int64_t *dstStrides = static_cast(alloca(sizeof(int64_t) * rank)); + + char *srcPtr = src.data + src.offset * elemSize; + char *dstPtr = dst.data + dst.offset * elemSize; + + // Initialize index and scale strides. + for (int rankp = 0; rankp < rank; ++rankp) { + indices[rankp] = 0; + srcStrides[rankp] = src.strides[rankp] * elemSize; + dstStrides[rankp] = dst.strides[rankp] * elemSize; + } + + int64_t readIndex = 0, writeIndex = 0; + for (;;) { + // Copy over the element, byte by byte. + memcpy(dstPtr + writeIndex, srcPtr + readIndex, elemSize); + // Advance index and read position. + for (int64_t axis = rank - 1; axis >= 0; --axis) { + // Advance at current axis. + auto newIndex = ++indices[axis]; + readIndex += srcStrides[axis]; + writeIndex += dstStrides[axis]; + // If this is a valid index, we have our next index, so continue copying. + if (src.sizes[axis] != newIndex) + break; + // We reached the end of this axis. If this is axis 0, we are done. + if (axis == 0) + return; + // Else, reset to 0 and undo the advancement of the linear index that + // this axis had. The continue with the axis one outer. + indices[axis] = 0; + readIndex -= src.sizes[axis] * srcStrides[axis]; + writeIndex -= dst.sizes[axis] * dstStrides[axis]; + } + } +} + /// Prints GFLOPS rating. extern "C" void print_flops(double flops) { fprintf(stderr, "%lf GFLOPS\n", flops / 1.0E9); From 5cf27532fa6f8b01acd0bd5401ce933b2eba0688 Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Mon, 28 Jun 2021 13:51:27 +0100 Subject: [PATCH 037/619] [NFC] Fixing short title underline in release notes file --- llvm/docs/ReleaseNotes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index e0961e60763d8..ad3905080338b 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -75,7 +75,7 @@ Changes to TableGen ------------------- Changes to the AArch64 Backend --------------------------- +------------------------------ * Introduced support for Armv9-A's Realm Management Extension. From 22aa3680eaccb9b77ca224711c4da3a354aa2d45 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Mon, 28 Jun 2021 09:00:45 -0400 Subject: [PATCH 038/619] [C++20] Support for lambdas in unevaluated context Partially implement P0315R4. This patch allow lambda in unevaluated context. It does not implement temp.deduct/9. --- clang/lib/Sema/SemaConcept.cpp | 9 ++++--- clang/lib/Sema/SemaExpr.cpp | 6 +++-- clang/test/SemaCXX/anonymous-struct.cpp | 2 +- clang/test/SemaCXX/lambda-unevaluated.cpp | 31 +++++++++++++++++++++++ clang/www/cxx_status.html | 2 +- 5 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 clang/test/SemaCXX/lambda-unevaluated.cpp diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 552534824588e..f2c70d0a56efb 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -43,9 +43,12 @@ class LogicalBinOp { LHS = BO->getLHS(); RHS = BO->getRHS(); } else if (auto *OO = dyn_cast(E)) { - Op = OO->getOperator(); - LHS = OO->getArg(0); - RHS = OO->getArg(1); + // If OO is not || or && it might not have exactly 2 arguments. + if (OO->getNumArgs() == 2) { + Op = OO->getOperator(); + LHS = OO->getArg(0); + RHS = OO->getArg(1); + } } } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2d0f314f380fe..728d7b61d4a86 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -16722,8 +16722,10 @@ void Sema::PopExpressionEvaluationContext() { if (!Rec.Lambdas.empty()) { using ExpressionKind = ExpressionEvaluationContextRecord::ExpressionKind; - if (Rec.ExprContext == ExpressionKind::EK_TemplateArgument || Rec.isUnevaluated() || - (Rec.isConstantEvaluated() && !getLangOpts().CPlusPlus17)) { + if (!getLangOpts().CPlusPlus20 && + (Rec.ExprContext == ExpressionKind::EK_TemplateArgument || + Rec.isUnevaluated() || + (Rec.isConstantEvaluated() && !getLangOpts().CPlusPlus17))) { unsigned D; if (Rec.isUnevaluated()) { // C++11 [expr.prim.lambda]p2: diff --git a/clang/test/SemaCXX/anonymous-struct.cpp b/clang/test/SemaCXX/anonymous-struct.cpp index 1b6207d19e44d..0a5395e15780b 100644 --- a/clang/test/SemaCXX/anonymous-struct.cpp +++ b/clang/test/SemaCXX/anonymous-struct.cpp @@ -49,7 +49,7 @@ typedef struct // expected-warning {{anonymous non-C-compatible type given name : B { // expected-note {{type is not C-compatible due to this base class}} } C; // expected-note {{type is given name 'C' for linkage purposes by this typedef declaration}} -#if __cplusplus > 201703L +#if __cplusplus > 201703L && __cplusplus < 202002L typedef struct { // expected-warning {{anonymous non-C-compatible type given name for linkage purposes by typedef declaration; add a tag name here}} static_assert([]{ return true; }()); // expected-note {{type is not C-compatible due to this lambda expression}} } Lambda1; // expected-note {{type is given name 'Lambda1' for linkage purposes by this typedef declaration}} diff --git a/clang/test/SemaCXX/lambda-unevaluated.cpp b/clang/test/SemaCXX/lambda-unevaluated.cpp new file mode 100644 index 0000000000000..07fa0d94bc8e4 --- /dev/null +++ b/clang/test/SemaCXX/lambda-unevaluated.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -std=c++20 %s -verify + + +template struct Nothing {}; +Nothing<[]() { return 0; }()> nothing; + +template struct NothingT {}; +Nothing<[]() { return 0; }> nothingT; + +template +concept True = [] { return true; }(); +static_assert(True); + +static_assert(sizeof([] { return 0; })); +static_assert(sizeof([] { return 0; }())); + +void f() noexcept(noexcept([] { return 0; }())); + +using a = decltype([] { return 0; }); +using b = decltype([] { return 0; }()); +using c = decltype([]() noexcept(noexcept([] { return 0; }())) { return 0; }); +using d = decltype(sizeof([] { return 0; })); + +template +int unique_test1(); +static_assert(&unique_test1<[](){}> != &unique_test1<[](){}>); + +template +auto g(T) -> decltype([]() { T::invalid; } ()); +auto e = g(0); // expected-error{{no matching function for call}} +// expected-note@-2 {{substitution failure}} diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index a478a4487d150..b7f2501cbc534 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1011,7 +1011,7 @@

C++20 implementation status

Lambdas in unevaluated contexts P0315R4 - No + Clang 13 From f9f5d415453b3fee98817d4f0bd8e5b5415e34cc Mon Sep 17 00:00:00 2001 From: Brendon Cahoon Date: Fri, 30 Apr 2021 09:57:44 -0400 Subject: [PATCH 039/619] [AMDGPU][GlobalISel] Legalize and select G_SBFX and G_UBFX Adds legalizer, register bank select, and instruction select support for G_SBFX and G_UBFX. These opcodes generate scalar or vector ALU bitfield extract instructions for AMDGPU. The instructions allow both constant or register values for the offset and width operands. The 32-bit scalar version is expanded to a sequence that combines the offset and width into a single register. There are no 64-bit vgpr bitfield extract instructions, so the operations are expanded to a sequence of instructions that implement the operation. If the width is a constant, then the 32-bit bitfield extract instructions are used. Moved the AArch64 specific code for creating G_SBFX to CombinerHelper.cpp so that it can be used by other targets. Only bitfield extracts with constant offset and width values are handled currently. Differential Revision: https://reviews.llvm.org/D100149 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 2 + .../include/llvm/Target/GlobalISel/Combine.td | 11 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 35 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 15 + llvm/lib/Target/AArch64/AArch64Combine.td | 9 - .../GISel/AArch64PostLegalizerCombiner.cpp | 28 - llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 5 + llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 27 + .../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 7 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 78 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.h | 3 +- llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll | 135 +- .../AMDGPU/GlobalISel/cvt_f32_ubyte.ll | 158 +- .../GlobalISel/extractelement-stack-lower.ll | 3 +- .../AMDGPU/GlobalISel/extractelement.i8.ll | 3860 ++++---- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll | 445 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll | 664 +- .../AMDGPU/GlobalISel/insertelement.i8.ll | 8046 ++++++++--------- .../AMDGPU/GlobalISel/inst-select-sbfx.mir | 50 + .../AMDGPU/GlobalISel/inst-select-ubfx.mir | 78 + .../AMDGPU/GlobalISel/legalize-sbfx.mir | 104 + .../AMDGPU/GlobalISel/legalize-ubfx.mir | 105 + .../AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll | 3 +- .../GlobalISel/llvm.amdgcn.workitem.id.ll | 14 +- .../GlobalISel/postlegalizercombiner-sbfx.mir | 151 + .../GlobalISel/postlegalizercombiner-ubfx.mir | 103 + .../AMDGPU/GlobalISel/regbankselect-sbfx.mir | 342 + .../AMDGPU/GlobalISel/regbankselect-ubfx.mir | 342 + .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 160 +- llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll | 202 + .../test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll | 410 +- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 160 +- .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 160 +- llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll | 112 + .../test/CodeGen/AMDGPU/GlobalISel/udivrem.ll | 260 +- .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 160 +- 38 files changed, 8591 insertions(+), 7860 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 90697306b5281..6acf6ccb616e3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -522,6 +522,8 @@ class CombinerHelper { /// or false constant based off of KnownBits information. bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo); + bool matchBitfieldExtractFromSExtInReg( + MachineInstr &MI, std::function &MatchInfo); /// Match: and (lshr x, cst), mask -> ubfx x, cst, width bool matchBitfieldExtractFromAnd( MachineInstr &MI, std::function &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 3ced5dc793da9..a01d9b26f351c 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -637,6 +637,15 @@ def bitfield_extract_from_and : GICombineRule< def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; +def bitfield_extract_from_sext_inreg : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_SEXT_INREG):$root, + [{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]), + (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>; + +def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg, + bitfield_extract_from_and]>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -680,7 +689,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, - div_rem_to_divrem, funnel_shift_combines, bitfield_extract_from_and]>; + div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 4966f6d8df8d7..0520fbee2692c 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4013,6 +4013,36 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, return true; } +/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. +bool CombinerHelper::matchBitfieldExtractFromSExtInReg( + MachineInstr &MI, std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(Src); + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); + if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}})) + return false; + int64_t Width = MI.getOperand(2).getImm(); + Register ShiftSrc; + int64_t ShiftImm; + if (!mi_match( + Src, MRI, + m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)), + m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)))))) + return false; + if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits()) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + auto Cst1 = B.buildConstant(ExtractTy, ShiftImm); + auto Cst2 = B.buildConstant(ExtractTy, Width); + B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2); + }; + return true; +} + +/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants. bool CombinerHelper::matchBitfieldExtractFromAnd( MachineInstr &MI, std::function &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_AND); @@ -4039,10 +4069,11 @@ bool CombinerHelper::matchBitfieldExtractFromAnd( if (static_cast(LSBImm) >= Size) return false; + LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty); uint64_t Width = APInt(Size, AndImm).countTrailingOnes(); MatchInfo = [=](MachineIRBuilder &B) { - auto WidthCst = B.buildConstant(Ty, Width); - auto LSBCst = B.buildConstant(Ty, LSBImm); + auto WidthCst = B.buildConstant(ExtractTy, Width); + auto LSBCst = B.buildConstant(ExtractTy, LSBImm); B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst}); }; return true; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 963b893d1629b..d4c4663146e54 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2027,6 +2027,21 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_SBFX: + case TargetOpcode::G_UBFX: + Observer.changingInstr(MI); + + if (TypeIdx == 0) { + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + } else { + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); + } + + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_SHL: Observer.changingInstr(MI); diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index b395a4ef81ff4..62493ae4c0568 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -168,15 +168,6 @@ def build_vector_to_dup : GICombineRule< def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>; -def bitfield_extract_from_sext_inreg : GICombineRule< - (defs root:$root, build_fn_matchinfo:$info), - (match (wip_match_opcode G_SEXT_INREG):$root, - [{ return matchBitfieldExtractFromSExtInReg(*${root}, MRI, ${info}); }]), - (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])>; - -def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg, - bitfield_extract_from_and]>; - def lower_vector_fcmp : GICombineRule< (defs root:$root), (match (wip_match_opcode G_FCMP):$root, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 753b380faf44d..3001a8f9b18e0 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -240,34 +240,6 @@ bool applyAArch64MulConstCombine( return true; } -/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. -static bool matchBitfieldExtractFromSExtInReg( - MachineInstr &MI, MachineRegisterInfo &MRI, - std::function &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - int64_t Width = MI.getOperand(2).getImm(); - LLT Ty = MRI.getType(Src); - assert((Ty == LLT::scalar(32) || Ty == LLT::scalar(64)) && - "Unexpected type for G_SEXT_INREG?"); - Register ShiftSrc; - int64_t ShiftImm; - if (!mi_match( - Src, MRI, - m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)), - m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)))))) - return false; - if (ShiftImm < 0 || ShiftImm + Width > Ty.getSizeInBits()) - return false; - MatchInfo = [=](MachineIRBuilder &B) { - auto Cst1 = B.buildConstant(Ty, ShiftImm); - auto Cst2 = B.buildConstant(Ty, Width); - B.buildInstr(TargetOpcode::G_SBFX, {Dst}, {ShiftSrc, Cst1, Cst2}); - }; - return true; -} - #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 6f9ff03dcb87b..fecbf5d80e8e1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4827,3 +4827,8 @@ AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AtomicExpansionKind::None; } } + +bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtactLegal( + unsigned Opc, LLT Ty1, LLT Ty2) const { + return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index dba01af02d064..e61021d451f87 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -327,6 +327,9 @@ class AMDGPUTargetLowering : public TargetLowering { } AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + + bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1, + LLT Ty2) const override; }; namespace AMDGPUISD { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index e5d154d1ff327..323aaaf70cd43 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -755,6 +755,30 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(MachineInstr &MI) const { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register OffsetReg = MI.getOperand(2).getReg(); + Register WidthReg = MI.getOperand(3).getReg(); + + assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID && + "scalar BFX instructions are expanded in regbankselect"); + assert(MRI->getType(MI.getOperand(0).getReg()).getSizeInBits() == 32 && + "64-bit vector BFX instructions are expanded in regbankselect"); + + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock *MBB = MI.getParent(); + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SBFX; + unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64; + auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), DstReg) + .addReg(SrcReg) + .addReg(OffsetReg) + .addReg(WidthReg); + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const { if (STI.getLDSBankCount() != 16) return selectImpl(MI, *CoverageInfo); @@ -3189,6 +3213,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { return selectBVHIntrinsic(I); case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD: return selectAMDGPU_BUFFER_ATOMIC_FADD(I); + case AMDGPU::G_SBFX: + case AMDGPU::G_UBFX: + return selectG_SBFX_UBFX(I); default: return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 51878c99dcc18..cb05a1cb63691 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -109,6 +109,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectG_PTR_ADD(MachineInstr &I) const; bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; + bool selectG_SBFX_UBFX(MachineInstr &I) const; bool selectInterpP1F16(MachineInstr &MI) const; bool selectWritelane(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f029639f78d5a..5bbb5f3f77d54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1654,6 +1654,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .minScalar(0, S32) .lower(); + getActionDefinitionsBuilder({G_SBFX, G_UBFX}) + .legalFor({{S32, S32}, {S64, S32}}) + .clampScalar(1, S32, S32) + .clampScalar(0, S32, S64) + .widenScalarToNextPow2(0) + .scalarize(0); + getActionDefinitionsBuilder({ // TODO: Verify V_BFI_B32 is generated from expanded bit ops G_FCOPYSIGN, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 4fbd4618e4317..0e4005627e025 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1532,8 +1532,8 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad( return true; } -bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic( - const OperandsMapper &OpdMapper, bool Signed) const { +bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper, + bool Signed) const { MachineInstr &MI = OpdMapper.getMI(); MachineRegisterInfo &MRI = OpdMapper.getMRI(); @@ -1545,19 +1545,69 @@ bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic( const LLT S32 = LLT::scalar(32); + unsigned FirstOpnd = MI.getOpcode() == AMDGPU::G_INTRINSIC ? 2 : 1; + Register SrcReg = MI.getOperand(FirstOpnd).getReg(); + Register OffsetReg = MI.getOperand(FirstOpnd + 1).getReg(); + Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg(); + const RegisterBank *DstBank = OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; if (DstBank == &AMDGPU::VGPRRegBank) { if (Ty == S32) return true; - // TODO: 64-bit version is scalar only, so we need to expand this. - return false; - } + // There is no 64-bit vgpr bitfield extract instructions so the operation + // is expanded to a sequence of instructions that implement the operation. + ApplyRegBankMapping ApplyBank(*this, MRI, &AMDGPU::VGPRRegBank); + MachineIRBuilder B(MI, ApplyBank); + + const LLT S64 = LLT::scalar(64); + // Shift the source operand so that extracted bits start at bit 0. + auto ShiftOffset = Signed ? B.buildAShr(S64, SrcReg, OffsetReg) + : B.buildLShr(S64, SrcReg, OffsetReg); + auto UnmergeSOffset = B.buildUnmerge({S32, S32}, ShiftOffset); + + // A 64-bit bitfield extract uses the 32-bit bitfield extract instructions + // if the width is a constant. + if (auto ConstWidth = getConstantVRegValWithLookThrough(WidthReg, MRI)) { + // Use the 32-bit bitfield extract instruction if the width is a constant. + // Depending on the width size, use either the low or high 32-bits. + auto Zero = B.buildConstant(S32, 0); + auto WidthImm = ConstWidth->Value.getZExtValue(); + if (WidthImm <= 32) { + // Use bitfield extract on the lower 32-bit source, and then sign-extend + // or clear the upper 32-bits. + auto Extract = + Signed ? B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg) + : B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg); + auto Extend = + Signed ? B.buildAShr(S32, Extract, B.buildConstant(S32, 31)) : Zero; + B.buildMerge(DstReg, {Extract, Extend}); + } else { + // Use bitfield extract on upper 32-bit source, and combine with lower + // 32-bit source. + auto UpperWidth = B.buildConstant(S32, WidthImm - 32); + auto Extract = + Signed + ? B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth) + : B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth); + B.buildMerge(DstReg, {UnmergeSOffset.getReg(0), Extract}); + } + MI.eraseFromParent(); + return true; + } - Register SrcReg = MI.getOperand(2).getReg(); - Register OffsetReg = MI.getOperand(3).getReg(); - Register WidthReg = MI.getOperand(4).getReg(); + // Expand to Src >> Offset << (64 - Width) >> (64 - Width) using 64-bit + // operations. + auto ExtShift = B.buildSub(S32, B.buildConstant(S32, 64), WidthReg); + auto SignBit = B.buildShl(S64, ShiftOffset, ExtShift); + if (Signed) + B.buildAShr(S64, SignBit, ExtShift); + else + B.buildLShr(S64, SignBit, ExtShift); + MI.eraseFromParent(); + return true; + } // The scalar form packs the offset and width in a single operand. @@ -2960,10 +3010,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case Intrinsic::amdgcn_sbfe: - applyMappingBFEIntrinsic(OpdMapper, true); + applyMappingBFE(OpdMapper, true); return; case Intrinsic::amdgcn_ubfe: - applyMappingBFEIntrinsic(OpdMapper, false); + applyMappingBFE(OpdMapper, false); return; case Intrinsic::amdgcn_ballot: // Use default handling and insert copy to vcc source. @@ -3055,6 +3105,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_DYN_STACKALLOC: applyMappingDynStackAlloc(MI, OpdMapper, MRI); return; + case AMDGPU::G_SBFX: + applyMappingBFE(OpdMapper, /*Signed*/ true); + return; + case AMDGPU::G_UBFX: + applyMappingBFE(OpdMapper, /*Signed*/ false); + return; default: break; } @@ -3529,6 +3585,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_UMAX: case AMDGPU::G_ABS: case AMDGPU::G_SHUFFLE_VECTOR: + case AMDGPU::G_SBFX: + case AMDGPU::G_UBFX: if (isSALUMapping(MI)) return getDefaultMappingSOP(MI); LLVM_FALLTHROUGH; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index c481aadeb226f..7e051e4a5424c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -81,8 +81,7 @@ class AMDGPURegisterBankInfo final : public AMDGPUGenRegisterBankInfo { MachineRegisterInfo &MRI, int RSrcIdx) const; bool applyMappingSBufferLoad(const OperandsMapper &OpdMapper) const; - bool applyMappingBFEIntrinsic(const OperandsMapper &OpdMapper, - bool Signed) const; + bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const; Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll index 1c17f1523de00..ece08668dcd65 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -808,17 +808,16 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou ; ; GFX8-LABEL: s_ashr_v2i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 -; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_sext_i32_i16 s3, s3 +; GFX8-NEXT: s_mov_b32 s3, 0x100010 +; GFX8-NEXT: s_sext_i32_i16 s2, s0 +; GFX8-NEXT: s_sext_i32_i16 s4, s1 +; GFX8-NEXT: s_bfe_i32 s0, s0, s3 +; GFX8-NEXT: s_bfe_i32 s1, s1, s3 ; GFX8-NEXT: s_ashr_i32 s0, s0, s1 -; GFX8-NEXT: s_ashr_i32 s1, s2, s3 -; GFX8-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff -; GFX8-NEXT: s_or_b32 s0, s1, s0 +; GFX8-NEXT: s_ashr_i32 s2, s2, s4 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_and_b32 s1, s2, 0xffff +; GFX8-NEXT: s_or_b32 s0, s0, s1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_ashr_v2i16: @@ -1021,29 +1020,26 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg ; ; GFX8-LABEL: s_ashr_v4i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s4, s0, 16 -; GFX8-NEXT: s_lshr_b32 s6, s2, 16 -; GFX8-NEXT: s_lshr_b32 s5, s1, 16 -; GFX8-NEXT: s_lshr_b32 s7, s3, 16 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 -; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_sext_i32_i16 s6, s6 +; GFX8-NEXT: s_mov_b32 s5, 0x100010 +; GFX8-NEXT: s_sext_i32_i16 s4, s0 +; GFX8-NEXT: s_sext_i32_i16 s7, s2 +; GFX8-NEXT: s_sext_i32_i16 s6, s1 +; GFX8-NEXT: s_sext_i32_i16 s8, s3 +; GFX8-NEXT: s_bfe_i32 s0, s0, s5 +; GFX8-NEXT: s_bfe_i32 s2, s2, s5 +; GFX8-NEXT: s_bfe_i32 s1, s1, s5 +; GFX8-NEXT: s_bfe_i32 s3, s3, s5 ; GFX8-NEXT: s_ashr_i32 s0, s0, s2 -; GFX8-NEXT: s_ashr_i32 s2, s4, s6 -; GFX8-NEXT: s_mov_b32 s4, 0xffff -; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_sext_i32_i16 s7, s7 ; GFX8-NEXT: s_ashr_i32 s1, s1, s3 -; GFX8-NEXT: s_ashr_i32 s3, s5, s7 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_and_b32 s0, s0, s4 -; GFX8-NEXT: s_or_b32 s0, s2, s0 -; GFX8-NEXT: s_lshl_b32 s2, s3, 16 -; GFX8-NEXT: s_and_b32 s1, s1, s4 -; GFX8-NEXT: s_or_b32 s1, s2, s1 +; GFX8-NEXT: s_ashr_i32 s4, s4, s7 +; GFX8-NEXT: s_mov_b32 s3, 0xffff +; GFX8-NEXT: s_ashr_i32 s2, s6, s8 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_and_b32 s4, s4, s3 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_and_b32 s2, s2, s3 +; GFX8-NEXT: s_or_b32 s0, s0, s4 +; GFX8-NEXT: s_or_b32 s1, s1, s2 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_ashr_v4i16: @@ -1245,51 +1241,44 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg ; ; GFX8-LABEL: s_ashr_v8i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_lshr_b32 s8, s0, 16 -; GFX8-NEXT: s_lshr_b32 s12, s4, 16 -; GFX8-NEXT: s_lshr_b32 s9, s1, 16 -; GFX8-NEXT: s_lshr_b32 s13, s5, 16 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 -; GFX8-NEXT: s_sext_i32_i16 s4, s4 -; GFX8-NEXT: s_sext_i32_i16 s8, s8 -; GFX8-NEXT: s_sext_i32_i16 s12, s12 -; GFX8-NEXT: s_lshr_b32 s10, s2, 16 -; GFX8-NEXT: s_lshr_b32 s14, s6, 16 +; GFX8-NEXT: s_mov_b32 s9, 0x100010 +; GFX8-NEXT: s_sext_i32_i16 s8, s0 +; GFX8-NEXT: s_sext_i32_i16 s13, s4 +; GFX8-NEXT: s_sext_i32_i16 s10, s1 +; GFX8-NEXT: s_sext_i32_i16 s12, s3 +; GFX8-NEXT: s_sext_i32_i16 s14, s5 +; GFX8-NEXT: s_sext_i32_i16 s16, s7 +; GFX8-NEXT: s_bfe_i32 s0, s0, s9 +; GFX8-NEXT: s_bfe_i32 s4, s4, s9 +; GFX8-NEXT: s_bfe_i32 s1, s1, s9 +; GFX8-NEXT: s_bfe_i32 s5, s5, s9 +; GFX8-NEXT: s_bfe_i32 s3, s3, s9 +; GFX8-NEXT: s_bfe_i32 s7, s7, s9 ; GFX8-NEXT: s_ashr_i32 s0, s0, s4 -; GFX8-NEXT: s_ashr_i32 s4, s8, s12 -; GFX8-NEXT: s_mov_b32 s8, 0xffff -; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_sext_i32_i16 s5, s5 -; GFX8-NEXT: s_sext_i32_i16 s9, s9 -; GFX8-NEXT: s_sext_i32_i16 s13, s13 -; GFX8-NEXT: s_lshr_b32 s11, s3, 16 -; GFX8-NEXT: s_lshr_b32 s15, s7, 16 +; GFX8-NEXT: s_ashr_i32 s3, s3, s7 ; GFX8-NEXT: s_ashr_i32 s1, s1, s5 -; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_sext_i32_i16 s6, s6 -; GFX8-NEXT: s_sext_i32_i16 s10, s10 -; GFX8-NEXT: s_sext_i32_i16 s14, s14 -; GFX8-NEXT: s_ashr_i32 s5, s9, s13 -; GFX8-NEXT: s_lshl_b32 s4, s4, 16 -; GFX8-NEXT: s_and_b32 s0, s0, s8 +; GFX8-NEXT: s_sext_i32_i16 s11, s2 +; GFX8-NEXT: s_sext_i32_i16 s15, s6 +; GFX8-NEXT: s_bfe_i32 s2, s2, s9 +; GFX8-NEXT: s_bfe_i32 s6, s6, s9 +; GFX8-NEXT: s_ashr_i32 s4, s10, s14 +; GFX8-NEXT: s_mov_b32 s7, 0xffff ; GFX8-NEXT: s_ashr_i32 s2, s2, s6 -; GFX8-NEXT: s_or_b32 s0, s4, s0 -; GFX8-NEXT: s_sext_i32_i16 s3, s3 -; GFX8-NEXT: s_sext_i32_i16 s7, s7 -; GFX8-NEXT: s_sext_i32_i16 s11, s11 -; GFX8-NEXT: s_sext_i32_i16 s15, s15 -; GFX8-NEXT: s_ashr_i32 s6, s10, s14 -; GFX8-NEXT: s_lshl_b32 s4, s5, 16 -; GFX8-NEXT: s_and_b32 s1, s1, s8 -; GFX8-NEXT: s_ashr_i32 s3, s3, s7 -; GFX8-NEXT: s_or_b32 s1, s4, s1 -; GFX8-NEXT: s_ashr_i32 s7, s11, s15 -; GFX8-NEXT: s_lshl_b32 s4, s6, 16 -; GFX8-NEXT: s_and_b32 s2, s2, s8 -; GFX8-NEXT: s_or_b32 s2, s4, s2 -; GFX8-NEXT: s_lshl_b32 s4, s7, 16 -; GFX8-NEXT: s_and_b32 s3, s3, s8 -; GFX8-NEXT: s_or_b32 s3, s4, s3 +; GFX8-NEXT: s_ashr_i32 s5, s11, s15 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_and_b32 s4, s4, s7 +; GFX8-NEXT: s_ashr_i32 s8, s8, s13 +; GFX8-NEXT: s_or_b32 s1, s1, s4 +; GFX8-NEXT: s_ashr_i32 s6, s12, s16 +; GFX8-NEXT: s_lshl_b32 s2, s2, 16 +; GFX8-NEXT: s_and_b32 s4, s5, s7 +; GFX8-NEXT: s_or_b32 s2, s2, s4 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_and_b32 s8, s8, s7 +; GFX8-NEXT: s_lshl_b32 s3, s3, 16 +; GFX8-NEXT: s_and_b32 s4, s6, s7 +; GFX8-NEXT: s_or_b32 s0, s0, s8 +; GFX8-NEXT: s_or_b32 s3, s3, s4 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_ashr_v8i16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll index 890a9b3e36447..cbca2a96f1d40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll @@ -42,20 +42,12 @@ define float @v_sitofp_i32_to_f32_mask255(i32 %arg0) nounwind { } define float @v_uitofp_to_f32_lshr7_mask255(i32 %arg0) nounwind { -; SI-LABEL: v_uitofp_to_f32_lshr7_mask255: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, 7, v0 -; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_uitofp_to_f32_lshr7_mask255: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v0, 7, v0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -; VI-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_uitofp_to_f32_lshr7_mask255: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 7, 8 +; GCN-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] %lshr.7 = lshr i32 %arg0, 7 %masked = and i32 %lshr.7, 255 %cvt = uitofp i32 %masked to float @@ -66,16 +58,14 @@ define float @v_uitofp_to_f32_lshr8_mask255(i32 %arg0) nounwind { ; SI-LABEL: v_uitofp_to_f32_lshr8_mask255: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 +; SI-NEXT: v_bfe_u32 v0, v0, 8, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_uitofp_to_f32_lshr8_mask255: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 ; VI-NEXT: s_setpc_b64 s[30:31] %lshr.8 = lshr i32 %arg0, 8 %masked = and i32 %lshr.8, 255 @@ -116,17 +106,14 @@ define float @v_uitofp_to_f32_lshr16_mask255(i32 %arg0) nounwind { ; SI-LABEL: v_uitofp_to_f32_lshr16_mask255: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 +; SI-NEXT: v_bfe_u32 v0, v0, 16, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_uitofp_to_f32_lshr16_mask255: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, 0xff -; VI-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; VI-NEXT: s_setpc_b64 s[30:31] %lshr.16 = lshr i32 %arg0, 16 %masked = and i32 %lshr.16, 255 @@ -167,20 +154,19 @@ define <2 x float> @v_uitofp_v2i8_to_v2f32(i16 %arg0) nounwind { ; SI-LABEL: v_uitofp_v2i8_to_v2f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SI-NEXT: s_movk_i32 s4, 0xff -; SI-NEXT: v_and_b32_e32 v0, s4, v0 -; SI-NEXT: v_and_b32_e32 v1, s4, v1 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; SI-NEXT: v_and_b32_e32 v1, 0xff, v0 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v1 +; SI-NEXT: v_bfe_u32 v0, v0, 8, 8 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v0 +; SI-NEXT: v_mov_b32_e32 v0, v2 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_uitofp_v2i8_to_v2f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 +; VI-NEXT: v_mov_b32_e32 v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] %val = bitcast i16 %arg0 to <2 x i8> %cvt = uitofp <2 x i8> %val to <2 x float> @@ -191,27 +177,21 @@ define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind { ; SI-LABEL: v_uitofp_v3i8_to_v3f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SI-NEXT: s_movk_i32 s4, 0xff -; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SI-NEXT: v_and_b32_e32 v0, s4, v0 -; SI-NEXT: v_and_b32_e32 v1, s4, v1 -; SI-NEXT: v_and_b32_e32 v2, s4, v2 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xff, v0 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v3, v1 +; SI-NEXT: v_bfe_u32 v1, v0, 8, 8 +; SI-NEXT: v_bfe_u32 v0, v0, 16, 8 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2 +; SI-NEXT: v_mov_b32_e32 v0, v3 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_uitofp_v3i8_to_v3f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: s_movk_i32 s4, 0xff -; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; VI-NEXT: v_cvt_f32_ubyte0_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -; VI-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; VI-NEXT: v_mov_b32_e32 v0, v3 ; VI-NEXT: s_setpc_b64 s[30:31] %trunc = trunc i32 %arg0 to i24 @@ -224,13 +204,10 @@ define <4 x float> @v_uitofp_v4i8_to_v4f32(i32 %arg0) nounwind { ; SI-LABEL: v_uitofp_v4i8_to_v4f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: s_movk_i32 s4, 0xff -; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SI-NEXT: v_and_b32_e32 v3, s4, v0 -; SI-NEXT: v_and_b32_e32 v1, s4, v1 -; SI-NEXT: v_and_b32_e32 v2, s4, v2 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v3 +; SI-NEXT: v_and_b32_e32 v1, 0xff, v0 +; SI-NEXT: v_bfe_u32 v2, v0, 16, 8 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v1 +; SI-NEXT: v_bfe_u32 v1, v0, 8, 8 ; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v0 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2 @@ -240,14 +217,10 @@ define <4 x float> @v_uitofp_v4i8_to_v4f32(i32 %arg0) nounwind { ; VI-LABEL: v_uitofp_v4i8_to_v4f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: s_movk_i32 s4, 0xff -; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; VI-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_cvt_f32_ubyte0_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2 ; VI-NEXT: v_mov_b32_e32 v0, v4 ; VI-NEXT: s_setpc_b64 s[30:31] %val = bitcast i32 %arg0 to <4 x i8> @@ -259,13 +232,10 @@ define <4 x float> @v_uitofp_unpack_i32_to_v4f32(i32 %arg0) nounwind { ; SI-LABEL: v_uitofp_unpack_i32_to_v4f32: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: s_movk_i32 s4, 0xff -; SI-NEXT: v_and_b32_e32 v1, s4, v0 -; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xff, v0 +; SI-NEXT: v_bfe_u32 v2, v0, 16, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v1 -; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SI-NEXT: v_and_b32_e32 v1, s4, v1 -; SI-NEXT: v_and_b32_e32 v2, s4, v2 +; SI-NEXT: v_bfe_u32 v1, v0, 8, 8 ; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v0 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2 @@ -275,14 +245,10 @@ define <4 x float> @v_uitofp_unpack_i32_to_v4f32(i32 %arg0) nounwind { ; VI-LABEL: v_uitofp_unpack_i32_to_v4f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: s_movk_i32 s4, 0xff -; VI-NEXT: v_mov_b32_e32 v2, s4 -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; VI-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_cvt_f32_ubyte0_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2 ; VI-NEXT: v_mov_b32_e32 v0, v4 ; VI-NEXT: s_setpc_b64 s[30:31] %mask.arg0 = and i32 %arg0, 255 @@ -351,8 +317,7 @@ define half @v_uitofp_to_f16_lshr8_mask255(i32 %arg0) nounwind { ; SI-LABEL: v_uitofp_to_f16_lshr8_mask255: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 +; SI-NEXT: v_bfe_u32 v0, v0, 8, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] @@ -360,8 +325,7 @@ define half @v_uitofp_to_f16_lshr8_mask255(i32 %arg0) nounwind { ; VI-LABEL: v_uitofp_to_f16_lshr8_mask255: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] %lshr.8 = lshr i32 %arg0, 8 @@ -374,8 +338,7 @@ define half @v_uitofp_to_f16_lshr16_mask255(i32 %arg0) nounwind { ; SI-LABEL: v_uitofp_to_f16_lshr16_mask255: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 +; SI-NEXT: v_bfe_u32 v0, v0, 16, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: s_setpc_b64 s[30:31] @@ -383,9 +346,7 @@ define half @v_uitofp_to_f16_lshr16_mask255(i32 %arg0) nounwind { ; VI-LABEL: v_uitofp_to_f16_lshr16_mask255: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, 0xff -; VI-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] %lshr.16 = lshr i32 %arg0, 16 @@ -442,8 +403,7 @@ define double @v_uitofp_to_f64_lshr8_mask255(i32 %arg0) nounwind { ; GCN-LABEL: v_uitofp_to_f64_lshr8_mask255: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; GCN-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GCN-NEXT: v_bfe_u32 v0, v0, 8, 8 ; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 ; GCN-NEXT: s_setpc_b64 s[30:31] %lshr.8 = lshr i32 %arg0, 8 @@ -453,21 +413,12 @@ define double @v_uitofp_to_f64_lshr8_mask255(i32 %arg0) nounwind { } define double @v_uitofp_to_f64_lshr16_mask255(i32 %arg0) nounwind { -; SI-LABEL: v_uitofp_to_f64_lshr16_mask255: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 -; SI-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_uitofp_to_f64_lshr16_mask255: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_mov_b32_e32 v1, 0xff -; VI-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_uitofp_to_f64_lshr16_mask255: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GCN-NEXT: v_cvt_f64_u32_e32 v[0:1], v0 +; GCN-NEXT: s_setpc_b64 s[30:31] %lshr.16 = lshr i32 %arg0, 16 %masked = and i32 %lshr.16, 255 %cvt = uitofp i32 %masked to double @@ -952,8 +903,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3] ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 +; SI-NEXT: v_bfe_u32 v0, v0, 8, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -970,8 +920,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: flat_store_dword v[0:1], v2 @@ -1000,8 +949,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3] ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; SI-NEXT: v_and_b32_e32 v0, 0xff, v0 +; SI-NEXT: v_bfe_u32 v0, v0, 16, 8 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -1017,10 +965,8 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v1, 0xff ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0 +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: flat_store_dword v[0:1], v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 2356045459a83..c0fcbd3b5d550 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -312,8 +312,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: v_mov_b32_e32 v5, s4 ; GCN-NEXT: v_add_co_u32_e32 v60, vcc, v0, v5 ; GCN-NEXT: v_addc_co_u32_e32 v61, vcc, v1, v6, vcc -; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v2 -; GCN-NEXT: v_and_b32_e32 v0, 63, v0 +; GCN-NEXT: v_bfe_u32 v0, v2, 1, 6 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_and_b32_e32 v1, 1, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll index c820562bf9f8c..2e108ee65a252 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll @@ -8,19 +8,16 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* in ; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s5, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s1, s0, 8 -; GCN-NEXT: s_and_b32 s1, s1, s5 -; GCN-NEXT: s_lshr_b32 s2, s0, 16 -; GCN-NEXT: s_lshr_b32 s3, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s5 -; GCN-NEXT: s_lshl_b32 s1, s1, 8 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_and_b32 s1, s2, s5 -; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: s_and_b32 s1, s4, 3 ; GCN-NEXT: s_lshl_b32 s1, s1, 3 @@ -30,22 +27,19 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* in ; GFX10-LABEL: extractelement_sgpr_v4i8_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_lshr_b32 s5, s0, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 -; GFX10-NEXT: s_lshl_b32 s2, s2, 8 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s2, s5, 24 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GFX10-NEXT: s_lshr_b32 s1, s0, 24 +; GFX10-NEXT: s_and_b32 s2, s0, 0xff +; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX10-NEXT: s_lshl_b32 s3, s3, 8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s2, s2, s3 +; GFX10-NEXT: s_lshl_b32 s1, s1, 24 +; GFX10-NEXT: s_or_b32 s0, s2, s0 +; GFX10-NEXT: s_and_b32 s2, s4, 3 ; GFX10-NEXT: s_or_b32 s0, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s4, 3 -; GFX10-NEXT: s_or_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s1, s1, 3 +; GFX10-NEXT: s_lshl_b32 s1, s2, 3 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1 ; GFX10-NEXT: ; return to shader part epilog %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr @@ -57,18 +51,18 @@ define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %p ; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_movk_i32 s1, 0xff -; GFX9-NEXT: s_and_b32 s2, s2, 3 +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: s_and_b32 s0, s2, 3 +; GFX9-NEXT: s_lshl_b32 s0, s0, 3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v1 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v2 -; GFX9-NEXT: s_lshl_b32 s0, s2, 3 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -76,20 +70,18 @@ define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %p ; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 ; GFX8-NEXT: s_and_b32 s0, s2, 3 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: ; return to shader part epilog @@ -100,22 +92,19 @@ define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %p ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s0, 0xff -; GFX7-NEXT: s_and_b32 s1, s2, 3 +; GFX7-NEXT: s_and_b32 s0, s2, 3 +; GFX7-NEXT: s_lshl_b32 s0, s0, 3 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: s_lshl_b32 s0, s1, 3 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0 ; GFX7-NEXT: ; return to shader part epilog @@ -123,18 +112,17 @@ define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %p ; GFX10-LABEL: extractelement_vgpr_v4i8_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 ; GFX10-NEXT: s_and_b32 s0, s2, 3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog @@ -148,18 +136,18 @@ define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %i ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_and_b32_e32 v1, 3, v2 -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 8 +; GFX9-NEXT: v_mov_b32_e32 v4, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v4, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v4, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v3 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v5 +; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -167,19 +155,17 @@ define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %i ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: v_mov_b32_e32 v3, 16 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -192,21 +178,18 @@ define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %i ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: v_and_b32_e32 v1, 3, v2 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v3, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -215,18 +198,17 @@ define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %i ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_mov_b32_e32 v3, 16 +; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_and_b32_sdwa v4, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_and_b32_e32 v1, 3, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v4, v3 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr @@ -238,21 +220,18 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* in ; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s4, 0xff ; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s1, s0, 8 -; GFX9-NEXT: s_and_b32 s1, s1, s4 -; GFX9-NEXT: s_lshr_b32 s2, s0, 16 -; GFX9-NEXT: s_lshr_b32 s3, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s4 -; GFX9-NEXT: s_lshl_b32 s1, s1, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s2, s4 -; GFX9-NEXT: s_lshl_b32 s1, s1, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_lshl_b32 s1, s3, 24 +; GFX9-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GFX9-NEXT: s_lshr_b32 s1, s0, 24 +; GFX9-NEXT: s_and_b32 s2, s0, 0xff +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX9-NEXT: s_lshl_b32 s3, s3, 8 +; GFX9-NEXT: s_or_b32 s2, s2, s3 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s2, s0 +; GFX9-NEXT: s_lshl_b32 s1, s1, 24 ; GFX9-NEXT: s_or_b32 s0, s0, s1 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s0 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 @@ -261,21 +240,18 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* in ; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 8 -; GFX8-NEXT: s_and_b32 s1, s1, s4 -; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: s_lshr_b32 s3, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s4 -; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s2, s4 -; GFX8-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s3, 24 +; GFX8-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GFX8-NEXT: s_lshr_b32 s1, s0, 24 +; GFX8-NEXT: s_and_b32 s2, s0, 0xff +; GFX8-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX8-NEXT: s_lshl_b32 s3, s3, 8 +; GFX8-NEXT: s_or_b32 s2, s2, s3 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s2, s0 +; GFX8-NEXT: s_lshl_b32 s1, s1, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s1 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s0 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 @@ -284,21 +260,18 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* in ; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s1, s0, 8 -; GFX7-NEXT: s_and_b32 s1, s1, s4 -; GFX7-NEXT: s_lshr_b32 s2, s0, 16 -; GFX7-NEXT: s_lshr_b32 s3, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s4 -; GFX7-NEXT: s_lshl_b32 s1, s1, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s2, s4 -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s3, 24 +; GFX7-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GFX7-NEXT: s_lshr_b32 s1, s0, 24 +; GFX7-NEXT: s_and_b32 s2, s0, 0xff +; GFX7-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX7-NEXT: s_lshl_b32 s3, s3, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s3 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s2, s0 +; GFX7-NEXT: s_lshl_b32 s1, s1, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s1 ; GFX7-NEXT: v_lshr_b32_e32 v0, s0, v0 ; GFX7-NEXT: v_readfirstlane_b32 s0, v0 @@ -307,21 +280,18 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* in ; GFX10-LABEL: extractelement_sgpr_v4i8_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 @@ -334,39 +304,33 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* in define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i8_idx0: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s0, 0xff +; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s2, s1, 8 -; GCN-NEXT: s_and_b32 s2, s2, s0 -; GCN-NEXT: s_lshr_b32 s3, s1, 16 -; GCN-NEXT: s_lshr_b32 s4, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s0 -; GCN-NEXT: s_and_b32 s0, s3, s0 -; GCN-NEXT: s_lshl_b32 s2, s2, 8 -; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_lshl_b32 s0, s0, 16 -; GCN-NEXT: s_or_b32 s0, s1, s0 -; GCN-NEXT: s_lshl_b32 s1, s4, 24 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: extractelement_sgpr_v4i8_idx0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: ; return to shader part epilog %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr @@ -377,20 +341,17 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i8_idx1: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s0, 0xff +; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s2, s1, 8 -; GCN-NEXT: s_and_b32 s2, s2, s0 -; GCN-NEXT: s_lshr_b32 s3, s1, 16 -; GCN-NEXT: s_lshr_b32 s4, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s0 -; GCN-NEXT: s_and_b32 s0, s3, s0 -; GCN-NEXT: s_lshl_b32 s2, s2, 8 -; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_lshl_b32 s0, s0, 16 -; GCN-NEXT: s_or_b32 s0, s1, s0 -; GCN-NEXT: s_lshl_b32 s1, s4, 24 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: s_lshr_b32 s0, s0, 8 ; GCN-NEXT: ; return to shader part epilog @@ -398,19 +359,16 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg ; GFX10-LABEL: extractelement_sgpr_v4i8_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_lshr_b32 s0, s0, 8 ; GFX10-NEXT: ; return to shader part epilog @@ -422,20 +380,17 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i8_idx2: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s0, 0xff +; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s2, s1, 8 -; GCN-NEXT: s_and_b32 s2, s2, s0 -; GCN-NEXT: s_lshr_b32 s3, s1, 16 -; GCN-NEXT: s_lshr_b32 s4, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s0 -; GCN-NEXT: s_and_b32 s0, s3, s0 -; GCN-NEXT: s_lshl_b32 s2, s2, 8 -; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_lshl_b32 s0, s0, 16 -; GCN-NEXT: s_or_b32 s0, s1, s0 -; GCN-NEXT: s_lshl_b32 s1, s4, 24 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: s_lshr_b32 s0, s0, 16 ; GCN-NEXT: ; return to shader part epilog @@ -443,19 +398,16 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg ; GFX10-LABEL: extractelement_sgpr_v4i8_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16 ; GFX10-NEXT: ; return to shader part epilog @@ -467,20 +419,17 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) { ; GCN-LABEL: extractelement_sgpr_v4i8_idx3: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s0, 0xff +; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s2, s1, 8 -; GCN-NEXT: s_and_b32 s2, s2, s0 -; GCN-NEXT: s_lshr_b32 s3, s1, 16 -; GCN-NEXT: s_lshr_b32 s4, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s0 -; GCN-NEXT: s_and_b32 s0, s3, s0 -; GCN-NEXT: s_lshl_b32 s2, s2, 8 -; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_lshl_b32 s0, s0, 16 -; GCN-NEXT: s_or_b32 s0, s1, s0 -; GCN-NEXT: s_lshl_b32 s1, s4, 24 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: s_lshr_b32 s0, s0, 24 ; GCN-NEXT: ; return to shader part epilog @@ -488,19 +437,16 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg ; GFX10-LABEL: extractelement_sgpr_v4i8_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 ; GFX10-NEXT: ; return to shader part epilog @@ -514,15 +460,15 @@ define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -530,17 +476,15 @@ define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -551,19 +495,16 @@ define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -572,16 +513,15 @@ define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr %element = extractelement <4 x i8> %vector, i32 0 @@ -594,15 +534,15 @@ define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -610,17 +550,15 @@ define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -632,19 +570,16 @@ define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -655,15 +590,14 @@ define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr @@ -676,16 +610,16 @@ define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: s_mov_b32 s4, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -693,17 +627,15 @@ define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -715,19 +647,16 @@ define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -737,14 +666,13 @@ define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: s_mov_b32 s4, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -759,15 +687,15 @@ define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -776,17 +704,15 @@ define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -798,19 +724,16 @@ define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -820,16 +743,15 @@ define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr @@ -841,31 +763,29 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* in ; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s9, 0xff +; GCN-NEXT: s_mov_b32 s7, 0x80008 +; GCN-NEXT: s_movk_i32 s5, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s2, s0, 8 -; GCN-NEXT: s_and_b32 s2, s2, s9 -; GCN-NEXT: s_lshr_b32 s3, s0, 16 -; GCN-NEXT: s_lshr_b32 s5, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s9 -; GCN-NEXT: s_lshl_b32 s2, s2, 8 -; GCN-NEXT: s_or_b32 s0, s0, s2 -; GCN-NEXT: s_and_b32 s2, s3, s9 -; GCN-NEXT: s_lshl_b32 s2, s2, 16 -; GCN-NEXT: s_or_b32 s0, s0, s2 -; GCN-NEXT: s_lshl_b32 s2, s5, 24 -; GCN-NEXT: s_lshr_b32 s6, s1, 8 +; GCN-NEXT: s_bfe_u32 s8, s0, s7 +; GCN-NEXT: s_and_b32 s6, s0, s5 +; GCN-NEXT: s_lshl_b32 s8, s8, 8 +; GCN-NEXT: s_or_b32 s6, s6, s8 +; GCN-NEXT: s_mov_b32 s8, 0x80010 +; GCN-NEXT: s_lshr_b32 s2, s0, 24 +; GCN-NEXT: s_bfe_u32 s0, s0, s8 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s6, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 24 ; GCN-NEXT: s_or_b32 s0, s0, s2 -; GCN-NEXT: s_and_b32 s2, s6, s9 -; GCN-NEXT: s_lshr_b32 s7, s1, 16 -; GCN-NEXT: s_lshr_b32 s8, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s9 -; GCN-NEXT: s_lshl_b32 s2, s2, 8 -; GCN-NEXT: s_or_b32 s1, s1, s2 -; GCN-NEXT: s_and_b32 s2, s7, s9 -; GCN-NEXT: s_lshl_b32 s2, s2, 16 -; GCN-NEXT: s_or_b32 s1, s1, s2 -; GCN-NEXT: s_lshl_b32 s2, s8, 24 +; GCN-NEXT: s_and_b32 s2, s1, s5 +; GCN-NEXT: s_bfe_u32 s5, s1, s7 +; GCN-NEXT: s_lshr_b32 s3, s1, 24 +; GCN-NEXT: s_bfe_u32 s1, s1, s8 +; GCN-NEXT: s_lshl_b32 s5, s5, 8 +; GCN-NEXT: s_or_b32 s2, s2, s5 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s1, s2, s1 +; GCN-NEXT: s_lshl_b32 s2, s3, 24 ; GCN-NEXT: s_or_b32 s1, s1, s2 ; GCN-NEXT: s_lshr_b32 s2, s4, 2 ; GCN-NEXT: s_cmp_eq_u32 s2, 1 @@ -878,34 +798,32 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* in ; GFX10-LABEL: extractelement_sgpr_v8i8_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 ; GFX10-NEXT: s_movk_i32 s2, 0xff -; GFX10-NEXT: s_lshr_b32 s3, s4, 2 +; GFX10-NEXT: s_mov_b32 s5, 0x80010 +; GFX10-NEXT: s_lshr_b32 s6, s4, 2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s5, s0, 8 -; GFX10-NEXT: s_lshr_b32 s8, s1, 8 -; GFX10-NEXT: s_lshr_b32 s6, s0, 16 -; GFX10-NEXT: s_and_b32 s5, s5, s2 -; GFX10-NEXT: s_and_b32 s8, s8, s2 -; GFX10-NEXT: s_lshr_b32 s9, s1, 16 +; GFX10-NEXT: s_bfe_u32 s10, s0, s3 +; GFX10-NEXT: s_bfe_u32 s3, s1, s3 ; GFX10-NEXT: s_lshr_b32 s7, s0, 24 -; GFX10-NEXT: s_and_b32 s6, s6, s2 -; GFX10-NEXT: s_lshr_b32 s10, s1, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s2 -; GFX10-NEXT: s_and_b32 s1, s1, s2 -; GFX10-NEXT: s_and_b32 s2, s9, s2 -; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshl_b32 s6, s6, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s5 -; GFX10-NEXT: s_lshl_b32 s2, s2, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s8 +; GFX10-NEXT: s_lshr_b32 s8, s1, 24 +; GFX10-NEXT: s_and_b32 s9, s0, s2 +; GFX10-NEXT: s_bfe_u32 s0, s0, s5 +; GFX10-NEXT: s_and_b32 s2, s1, s2 +; GFX10-NEXT: s_bfe_u32 s1, s1, s5 +; GFX10-NEXT: s_lshl_b32 s5, s10, 8 +; GFX10-NEXT: s_lshl_b32 s3, s3, 8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s5, s9, s5 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s2, s2, s3 ; GFX10-NEXT: s_lshl_b32 s7, s7, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_lshl_b32 s9, s10, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s2 +; GFX10-NEXT: s_or_b32 s0, s5, s0 +; GFX10-NEXT: s_lshl_b32 s8, s8, 24 +; GFX10-NEXT: s_or_b32 s1, s2, s1 ; GFX10-NEXT: s_or_b32 s0, s0, s7 -; GFX10-NEXT: s_or_b32 s1, s1, s9 -; GFX10-NEXT: s_cmp_eq_u32 s3, 1 +; GFX10-NEXT: s_or_b32 s1, s1, s8 +; GFX10-NEXT: s_cmp_eq_u32 s6, 1 ; GFX10-NEXT: s_cselect_b32 s0, s1, s0 ; GFX10-NEXT: s_and_b32 s1, s4, 3 ; GFX10-NEXT: s_lshl_b32 s1, s1, 3 @@ -921,25 +839,24 @@ define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %p ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_movk_i32 s1, 0xff -; GFX9-NEXT: s_lshr_b32 s3, s2, 2 +; GFX9-NEXT: s_mov_b32 s1, 16 +; GFX9-NEXT: s_movk_i32 s3, 0xff +; GFX9-NEXT: s_lshr_b32 s4, s2, 2 ; GFX9-NEXT: s_and_b32 s2, s2, 3 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v6, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v7, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v6 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX9-NEXT: v_and_or_b32 v1, v1, s1, v4 -; GFX9-NEXT: v_or3_b32 v0, v0, v6, v3 -; GFX9-NEXT: v_or3_b32 v1, v1, v7, v5 +; GFX9-NEXT: v_or3_b32 v0, v0, v5, v2 +; GFX9-NEXT: v_or3_b32 v1, v1, v7, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX9-NEXT: s_lshl_b32 s0, s2, 3 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 @@ -949,31 +866,27 @@ define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %p ; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v2, 8 -; GFX8-NEXT: v_mov_b32_e32 v3, 8 -; GFX8-NEXT: v_mov_b32_e32 v4, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, 16 ; GFX8-NEXT: s_lshr_b32 s0, s2, 2 ; GFX8-NEXT: s_and_b32 s1, s2, 3 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX8-NEXT: s_lshl_b32 s0, s1, 3 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v6 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 -; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v8 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v6 +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v5 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 @@ -990,30 +903,26 @@ define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %p ; GFX7-NEXT: s_and_b32 s2, s2, 3 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_and_b32_e32 v5, s0, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v3, s0, v3 -; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v6, s0, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v5, v6, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX7-NEXT: s_lshl_b32 s0, s2, 3 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 @@ -1024,24 +933,23 @@ define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %p ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_movk_i32 s1, 0xff +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_movk_i32 s3, 0xff ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v6, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v7, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s3, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v5 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 ; GFX10-NEXT: s_lshr_b32 s0, s2, 2 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 -; GFX10-NEXT: v_or3_b32 v0, v0, v6, v4 -; GFX10-NEXT: v_or3_b32 v1, v1, v7, v5 +; GFX10-NEXT: v_or3_b32 v0, v0, v6, v2 +; GFX10-NEXT: v_or3_b32 v1, v1, v7, v3 ; GFX10-NEXT: s_and_b32 s0, s2, 3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -1059,25 +967,24 @@ define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %i ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_mov_b32 s5, 16 +; GFX9-NEXT: s_movk_i32 s6, 0xff ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 2, v2 ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v8, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v9, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v9, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s6, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX9-NEXT: v_and_or_b32 v1, v1, s6, v8 ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v4 -; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX9-NEXT: v_and_or_b32 v1, v1, s5, v6 -; GFX9-NEXT: v_or3_b32 v0, v0, v8, v5 -; GFX9-NEXT: v_or3_b32 v1, v1, v9, v7 +; GFX9-NEXT: v_or3_b32 v0, v0, v7, v4 +; GFX9-NEXT: v_or3_b32 v1, v1, v9, v5 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 @@ -1087,30 +994,26 @@ define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %i ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: v_mov_b32_e32 v3, 8 -; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_mov_b32_e32 v5, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 2, v2 +; GFX8-NEXT: v_mov_b32_e32 v4, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 2, v2 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v11, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v8 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v11 -; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v10 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v7 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 @@ -1128,30 +1031,26 @@ define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %i ; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 -; GFX7-NEXT: v_and_b32_e32 v7, s4, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v5, s4, v5 -; GFX7-NEXT: v_and_b32_e32 v8, s4, v8 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_bfe_u32 v7, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v9, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v6, s4, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v8, s4, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v7, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_or_b32_e32 v0, v6, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX7-NEXT: v_or_b32_e32 v1, v7, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 @@ -1163,25 +1062,24 @@ define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %i ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: s_movk_i32 s5, 0xff +; GFX10-NEXT: s_mov_b32 s5, 16 +; GFX10-NEXT: s_movk_i32 s6, 0xff ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v7, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v8, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v0, v0, s5, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, s5, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 2, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v8, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s6, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX10-NEXT: v_and_or_b32 v1, v1, s6, v6 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 2, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v7, v5 -; GFX10-NEXT: v_or3_b32 v1, v1, v8, v6 -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v7, v3 +; GFX10-NEXT: v_or3_b32 v1, v1, v8, v4 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 @@ -1195,37 +1093,35 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* in ; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s8, 0xff +; GCN-NEXT: s_mov_b32 s6, 0x80008 +; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 ; GCN-NEXT: v_and_b32_e32 v0, 3, v0 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s2, s0, 8 -; GCN-NEXT: s_and_b32 s2, s2, s8 -; GCN-NEXT: s_lshr_b32 s3, s0, 16 -; GCN-NEXT: s_lshr_b32 s4, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s8 -; GCN-NEXT: s_lshl_b32 s2, s2, 8 -; GCN-NEXT: s_or_b32 s0, s0, s2 -; GCN-NEXT: s_and_b32 s2, s3, s8 -; GCN-NEXT: s_lshl_b32 s2, s2, 16 -; GCN-NEXT: s_or_b32 s0, s0, s2 -; GCN-NEXT: s_lshl_b32 s2, s4, 24 -; GCN-NEXT: s_lshr_b32 s5, s1, 8 +; GCN-NEXT: s_bfe_u32 s7, s0, s6 +; GCN-NEXT: s_and_b32 s5, s0, s4 +; GCN-NEXT: s_lshl_b32 s7, s7, 8 +; GCN-NEXT: s_or_b32 s5, s5, s7 +; GCN-NEXT: s_mov_b32 s7, 0x80010 +; GCN-NEXT: s_lshr_b32 s2, s0, 24 +; GCN-NEXT: s_bfe_u32 s0, s0, s7 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s5, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 24 ; GCN-NEXT: s_or_b32 s0, s0, s2 -; GCN-NEXT: s_and_b32 s2, s5, s8 -; GCN-NEXT: s_lshr_b32 s6, s1, 16 -; GCN-NEXT: s_lshr_b32 s7, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s8 -; GCN-NEXT: s_lshl_b32 s2, s2, 8 -; GCN-NEXT: s_or_b32 s1, s1, s2 -; GCN-NEXT: s_and_b32 s2, s6, s8 -; GCN-NEXT: s_lshl_b32 s2, s2, 16 -; GCN-NEXT: s_or_b32 s1, s1, s2 -; GCN-NEXT: s_lshl_b32 s2, s7, 24 +; GCN-NEXT: s_and_b32 s2, s1, s4 +; GCN-NEXT: s_bfe_u32 s4, s1, s6 +; GCN-NEXT: s_lshr_b32 s3, s1, 24 +; GCN-NEXT: s_bfe_u32 s1, s1, s7 +; GCN-NEXT: s_lshl_b32 s4, s4, 8 +; GCN-NEXT: s_or_b32 s2, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s1, s2, s1 +; GCN-NEXT: s_lshl_b32 s2, s3, 24 ; GCN-NEXT: s_or_b32 s1, s1, s2 ; GCN-NEXT: v_mov_b32_e32 v2, s0 ; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 ; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc ; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 @@ -1235,37 +1131,35 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* in ; GFX10-LABEL: extractelement_sgpr_v8i8_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 ; GFX10-NEXT: s_movk_i32 s2, 0xff +; GFX10-NEXT: s_mov_b32 s4, 0x80010 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 2, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s6, s1, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 8 -; GFX10-NEXT: s_lshr_b32 s4, s0, 16 -; GFX10-NEXT: s_and_b32 s6, s6, s2 -; GFX10-NEXT: s_lshr_b32 s7, s1, 16 -; GFX10-NEXT: s_lshr_b32 s5, s0, 24 -; GFX10-NEXT: s_and_b32 s3, s3, s2 -; GFX10-NEXT: s_lshr_b32 s8, s1, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s2 -; GFX10-NEXT: s_and_b32 s1, s1, s2 -; GFX10-NEXT: s_and_b32 s4, s4, s2 -; GFX10-NEXT: s_and_b32 s2, s7, s2 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_lshl_b32 s2, s2, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s6 -; GFX10-NEXT: s_lshl_b32 s7, s8, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s2 +; GFX10-NEXT: s_bfe_u32 s8, s0, s3 +; GFX10-NEXT: s_bfe_u32 s3, s1, s3 +; GFX10-NEXT: s_and_b32 s7, s0, s2 +; GFX10-NEXT: s_lshr_b32 s6, s1, 24 +; GFX10-NEXT: s_and_b32 s2, s1, s2 +; GFX10-NEXT: s_bfe_u32 s1, s1, s4 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: s_lshl_b32 s2, s4, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s3 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s2, s2, s3 +; GFX10-NEXT: s_lshl_b32 s3, s6, 24 +; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_lshr_b32 s5, s0, 24 +; GFX10-NEXT: s_bfe_u32 s0, s0, s4 +; GFX10-NEXT: s_lshl_b32 s4, s8, 8 +; GFX10-NEXT: s_or_b32 s1, s1, s3 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s3, s7, s4 ; GFX10-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-NEXT: s_lshl_b32 s5, s5, 24 +; GFX10-NEXT: s_lshl_b32 s2, s5, 24 +; GFX10-NEXT: s_or_b32 s0, s3, s0 ; GFX10-NEXT: s_or_b32 s0, s0, s2 -; GFX10-NEXT: s_or_b32 s0, s0, s5 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 @@ -1279,19 +1173,16 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg ; GCN-LABEL: extractelement_sgpr_v8i8_idx0: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s1, s0, 8 -; GCN-NEXT: s_and_b32 s1, s1, s4 -; GCN-NEXT: s_lshr_b32 s2, s0, 16 -; GCN-NEXT: s_lshr_b32 s3, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s4 -; GCN-NEXT: s_lshl_b32 s1, s1, 8 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_and_b32 s1, s2, s4 -; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: ; return to shader part epilog ; @@ -1299,18 +1190,15 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_movk_i32 s1, 0xff -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: ; return to shader part epilog %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr @@ -1322,19 +1210,16 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg ; GCN-LABEL: extractelement_sgpr_v8i8_idx1: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s1, s0, 8 -; GCN-NEXT: s_and_b32 s1, s1, s4 -; GCN-NEXT: s_lshr_b32 s2, s0, 16 -; GCN-NEXT: s_lshr_b32 s3, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s4 -; GCN-NEXT: s_lshl_b32 s1, s1, 8 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_and_b32 s1, s2, s4 -; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: s_lshr_b32 s0, s0, 8 ; GCN-NEXT: ; return to shader part epilog @@ -1343,18 +1228,15 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_movk_i32 s1, 0xff -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_lshr_b32 s0, s0, 8 ; GFX10-NEXT: ; return to shader part epilog @@ -1367,19 +1249,16 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg ; GCN-LABEL: extractelement_sgpr_v8i8_idx2: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s1, s0, 8 -; GCN-NEXT: s_and_b32 s1, s1, s4 -; GCN-NEXT: s_lshr_b32 s2, s0, 16 -; GCN-NEXT: s_lshr_b32 s3, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s4 -; GCN-NEXT: s_lshl_b32 s1, s1, 8 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_and_b32 s1, s2, s4 -; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: s_lshr_b32 s0, s0, 16 ; GCN-NEXT: ; return to shader part epilog @@ -1388,18 +1267,15 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_movk_i32 s1, 0xff -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16 ; GFX10-NEXT: ; return to shader part epilog @@ -1412,19 +1288,16 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg ; GCN-LABEL: extractelement_sgpr_v8i8_idx3: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s1, s0, 8 -; GCN-NEXT: s_and_b32 s1, s1, s4 -; GCN-NEXT: s_lshr_b32 s2, s0, 16 -; GCN-NEXT: s_lshr_b32 s3, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s4 -; GCN-NEXT: s_lshl_b32 s1, s1, 8 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_and_b32 s1, s2, s4 -; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 +; GCN-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GCN-NEXT: s_lshr_b32 s1, s0, 24 +; GCN-NEXT: s_and_b32 s2, s0, 0xff +; GCN-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: s_lshr_b32 s0, s0, 24 ; GCN-NEXT: ; return to shader part epilog @@ -1433,18 +1306,15 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_movk_i32 s1, 0xff -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s4, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s3, s1 +; GFX10-NEXT: s_bfe_u32 s2, s0, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80010 +; GFX10-NEXT: s_and_b32 s1, s0, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 24 -; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_or_b32 s1, s1, s3 ; GFX10-NEXT: s_or_b32 s0, s1, s0 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 ; GFX10-NEXT: ; return to shader part epilog @@ -1457,38 +1327,32 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg ; GCN-LABEL: extractelement_sgpr_v8i8_idx4: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s0, s1, 8 -; GCN-NEXT: s_and_b32 s0, s0, s4 -; GCN-NEXT: s_lshr_b32 s2, s1, 16 -; GCN-NEXT: s_lshr_b32 s3, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s4 -; GCN-NEXT: s_lshl_b32 s0, s0, 8 -; GCN-NEXT: s_or_b32 s0, s1, s0 -; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 +; GCN-NEXT: s_lshr_b32 s0, s1, 24 +; GCN-NEXT: s_and_b32 s2, s1, 0xff +; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 -; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_or_b32 s1, s2, s1 +; GCN-NEXT: s_lshl_b32 s0, s0, 24 +; GCN-NEXT: s_or_b32 s0, s1, s0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: extractelement_sgpr_v8i8_idx4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: s_lshr_b32 s2, s1, 8 -; GFX10-NEXT: s_lshr_b32 s3, s1, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s0 -; GFX10-NEXT: s_and_b32 s4, s1, s0 -; GFX10-NEXT: s_and_b32 s0, s3, s0 +; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 +; GFX10-NEXT: s_and_b32 s0, s1, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s1, s1, 24 -; GFX10-NEXT: s_lshl_b32 s0, s0, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 24 -; GFX10-NEXT: s_or_b32 s0, s2, s0 +; GFX10-NEXT: s_or_b32 s0, s0, s3 ; GFX10-NEXT: s_or_b32 s0, s0, s1 ; GFX10-NEXT: ; return to shader part epilog %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr @@ -1500,20 +1364,17 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg ; GCN-LABEL: extractelement_sgpr_v8i8_idx5: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s0, s1, 8 -; GCN-NEXT: s_and_b32 s0, s0, s4 -; GCN-NEXT: s_lshr_b32 s2, s1, 16 -; GCN-NEXT: s_lshr_b32 s3, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s4 -; GCN-NEXT: s_lshl_b32 s0, s0, 8 -; GCN-NEXT: s_or_b32 s0, s1, s0 -; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 +; GCN-NEXT: s_lshr_b32 s0, s1, 24 +; GCN-NEXT: s_and_b32 s2, s1, 0xff +; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 -; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_or_b32 s1, s2, s1 +; GCN-NEXT: s_lshl_b32 s0, s0, 24 +; GCN-NEXT: s_or_b32 s0, s1, s0 ; GCN-NEXT: s_lshr_b32 s0, s0, 8 ; GCN-NEXT: ; return to shader part epilog ; @@ -1521,18 +1382,15 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: s_lshr_b32 s2, s1, 8 -; GFX10-NEXT: s_lshr_b32 s3, s1, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s0 -; GFX10-NEXT: s_and_b32 s4, s1, s0 -; GFX10-NEXT: s_and_b32 s0, s3, s0 +; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 +; GFX10-NEXT: s_and_b32 s0, s1, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s1, s1, 24 -; GFX10-NEXT: s_lshl_b32 s0, s0, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 24 -; GFX10-NEXT: s_or_b32 s0, s2, s0 +; GFX10-NEXT: s_or_b32 s0, s0, s3 ; GFX10-NEXT: s_or_b32 s0, s0, s1 ; GFX10-NEXT: s_lshr_b32 s0, s0, 8 ; GFX10-NEXT: ; return to shader part epilog @@ -1545,20 +1403,17 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg ; GCN-LABEL: extractelement_sgpr_v8i8_idx6: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s0, s1, 8 -; GCN-NEXT: s_and_b32 s0, s0, s4 -; GCN-NEXT: s_lshr_b32 s2, s1, 16 -; GCN-NEXT: s_lshr_b32 s3, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s4 -; GCN-NEXT: s_lshl_b32 s0, s0, 8 -; GCN-NEXT: s_or_b32 s0, s1, s0 -; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 +; GCN-NEXT: s_lshr_b32 s0, s1, 24 +; GCN-NEXT: s_and_b32 s2, s1, 0xff +; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 -; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_or_b32 s1, s2, s1 +; GCN-NEXT: s_lshl_b32 s0, s0, 24 +; GCN-NEXT: s_or_b32 s0, s1, s0 ; GCN-NEXT: s_lshr_b32 s0, s0, 16 ; GCN-NEXT: ; return to shader part epilog ; @@ -1566,18 +1421,15 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: s_lshr_b32 s2, s1, 8 -; GFX10-NEXT: s_lshr_b32 s3, s1, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s0 -; GFX10-NEXT: s_and_b32 s4, s1, s0 -; GFX10-NEXT: s_and_b32 s0, s3, s0 +; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 +; GFX10-NEXT: s_and_b32 s0, s1, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s1, s1, 24 -; GFX10-NEXT: s_lshl_b32 s0, s0, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 24 -; GFX10-NEXT: s_or_b32 s0, s2, s0 +; GFX10-NEXT: s_or_b32 s0, s0, s3 ; GFX10-NEXT: s_or_b32 s0, s0, s1 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16 ; GFX10-NEXT: ; return to shader part epilog @@ -1590,20 +1442,17 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg ; GCN-LABEL: extractelement_sgpr_v8i8_idx7: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s4, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s0, s1, 8 -; GCN-NEXT: s_and_b32 s0, s0, s4 -; GCN-NEXT: s_lshr_b32 s2, s1, 16 -; GCN-NEXT: s_lshr_b32 s3, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s4 -; GCN-NEXT: s_lshl_b32 s0, s0, 8 -; GCN-NEXT: s_or_b32 s0, s1, s0 -; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_bfe_u32 s3, s1, 0x80008 +; GCN-NEXT: s_lshr_b32 s0, s1, 24 +; GCN-NEXT: s_and_b32 s2, s1, 0xff +; GCN-NEXT: s_bfe_u32 s1, s1, 0x80010 +; GCN-NEXT: s_lshl_b32 s3, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_lshl_b32 s1, s1, 16 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_lshl_b32 s1, s3, 24 -; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_or_b32 s1, s2, s1 +; GCN-NEXT: s_lshl_b32 s0, s0, 24 +; GCN-NEXT: s_or_b32 s0, s1, s0 ; GCN-NEXT: s_lshr_b32 s0, s0, 24 ; GCN-NEXT: ; return to shader part epilog ; @@ -1611,18 +1460,15 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: s_lshr_b32 s2, s1, 8 -; GFX10-NEXT: s_lshr_b32 s3, s1, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s0 -; GFX10-NEXT: s_and_b32 s4, s1, s0 -; GFX10-NEXT: s_and_b32 s0, s3, s0 +; GFX10-NEXT: s_bfe_u32 s2, s1, 0x80008 +; GFX10-NEXT: s_bfe_u32 s3, s1, 0x80010 +; GFX10-NEXT: s_and_b32 s0, s1, 0xff ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshr_b32 s1, s1, 24 -; GFX10-NEXT: s_lshl_b32 s0, s0, 16 -; GFX10-NEXT: s_or_b32 s2, s4, s2 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 24 -; GFX10-NEXT: s_or_b32 s0, s2, s0 +; GFX10-NEXT: s_or_b32 s0, s0, s3 ; GFX10-NEXT: s_or_b32 s0, s0, s1 ; GFX10-NEXT: s_lshr_b32 s0, s0, 24 ; GFX10-NEXT: ; return to shader part epilog @@ -1636,15 +1482,15 @@ define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1652,17 +1498,15 @@ define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -1673,19 +1517,16 @@ define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -1694,16 +1535,15 @@ define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr %element = extractelement <8 x i8> %vector, i32 0 @@ -1716,15 +1556,15 @@ define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1732,17 +1572,15 @@ define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -1754,19 +1592,16 @@ define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -1778,14 +1613,13 @@ define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr @@ -1798,16 +1632,16 @@ define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: s_mov_b32 s4, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1815,17 +1649,15 @@ define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -1837,19 +1669,16 @@ define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -1859,14 +1688,13 @@ define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: s_mov_b32 s4, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -1881,15 +1709,15 @@ define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -1898,17 +1726,15 @@ define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -1920,19 +1746,16 @@ define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -1942,16 +1765,15 @@ define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr @@ -1964,15 +1786,15 @@ define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -1980,17 +1802,15 @@ define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -2001,20 +1821,17 @@ define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v8i8_idx4: @@ -2022,16 +1839,15 @@ define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v1, s4, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr %element = extractelement <8 x i8> %vector, i32 4 @@ -2044,15 +1860,15 @@ define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2060,17 +1876,15 @@ define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -2082,20 +1896,17 @@ define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -2106,14 +1917,13 @@ define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v1, s4, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v1, 0xff, v1, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr @@ -2126,16 +1936,16 @@ define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: s_mov_b32 s4, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2143,17 +1953,15 @@ define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -2165,20 +1973,17 @@ define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -2187,14 +1992,13 @@ define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: s_mov_b32 s4, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v1, s4, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -2209,15 +2013,15 @@ define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -2226,17 +2030,15 @@ define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -2248,20 +2050,17 @@ define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -2270,16 +2069,15 @@ define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v1, s4, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr @@ -2291,55 +2089,49 @@ define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* ; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s17, 0xff +; GCN-NEXT: s_mov_b32 s11, 0x80008 +; GCN-NEXT: s_movk_i32 s9, 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s5, s0, 8 -; GCN-NEXT: s_and_b32 s5, s5, s17 -; GCN-NEXT: s_lshr_b32 s6, s0, 16 -; GCN-NEXT: s_lshr_b32 s7, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s17 -; GCN-NEXT: s_lshl_b32 s5, s5, 8 -; GCN-NEXT: s_or_b32 s0, s0, s5 -; GCN-NEXT: s_and_b32 s5, s6, s17 -; GCN-NEXT: s_lshl_b32 s5, s5, 16 -; GCN-NEXT: s_or_b32 s0, s0, s5 -; GCN-NEXT: s_lshl_b32 s5, s7, 24 -; GCN-NEXT: s_lshr_b32 s8, s1, 8 +; GCN-NEXT: s_bfe_u32 s12, s0, s11 +; GCN-NEXT: s_and_b32 s10, s0, s9 +; GCN-NEXT: s_lshl_b32 s12, s12, 8 +; GCN-NEXT: s_or_b32 s10, s10, s12 +; GCN-NEXT: s_mov_b32 s12, 0x80010 +; GCN-NEXT: s_lshr_b32 s5, s0, 24 +; GCN-NEXT: s_bfe_u32 s0, s0, s12 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s10, s0 +; GCN-NEXT: s_bfe_u32 s10, s1, s11 +; GCN-NEXT: s_lshl_b32 s5, s5, 24 ; GCN-NEXT: s_or_b32 s0, s0, s5 -; GCN-NEXT: s_and_b32 s5, s8, s17 -; GCN-NEXT: s_lshr_b32 s9, s1, 16 -; GCN-NEXT: s_lshr_b32 s10, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s17 -; GCN-NEXT: s_lshl_b32 s5, s5, 8 -; GCN-NEXT: s_or_b32 s1, s1, s5 -; GCN-NEXT: s_and_b32 s5, s9, s17 -; GCN-NEXT: s_lshl_b32 s5, s5, 16 -; GCN-NEXT: s_or_b32 s1, s1, s5 -; GCN-NEXT: s_lshl_b32 s5, s10, 24 -; GCN-NEXT: s_lshr_b32 s11, s2, 8 +; GCN-NEXT: s_lshr_b32 s6, s1, 24 +; GCN-NEXT: s_and_b32 s5, s1, s9 +; GCN-NEXT: s_bfe_u32 s1, s1, s12 +; GCN-NEXT: s_lshl_b32 s10, s10, 8 +; GCN-NEXT: s_or_b32 s5, s5, s10 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s1, s5, s1 +; GCN-NEXT: s_lshl_b32 s5, s6, 24 +; GCN-NEXT: s_bfe_u32 s6, s2, s11 ; GCN-NEXT: s_or_b32 s1, s1, s5 -; GCN-NEXT: s_and_b32 s5, s11, s17 -; GCN-NEXT: s_lshr_b32 s12, s2, 16 -; GCN-NEXT: s_lshr_b32 s13, s2, 24 -; GCN-NEXT: s_and_b32 s2, s2, s17 -; GCN-NEXT: s_lshl_b32 s5, s5, 8 -; GCN-NEXT: s_or_b32 s2, s2, s5 -; GCN-NEXT: s_and_b32 s5, s12, s17 -; GCN-NEXT: s_lshl_b32 s5, s5, 16 -; GCN-NEXT: s_or_b32 s2, s2, s5 -; GCN-NEXT: s_lshl_b32 s5, s13, 24 -; GCN-NEXT: s_lshr_b32 s14, s3, 8 +; GCN-NEXT: s_lshr_b32 s7, s2, 24 +; GCN-NEXT: s_and_b32 s5, s2, s9 +; GCN-NEXT: s_bfe_u32 s2, s2, s12 +; GCN-NEXT: s_lshl_b32 s6, s6, 8 +; GCN-NEXT: s_or_b32 s5, s5, s6 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_bfe_u32 s6, s3, s11 +; GCN-NEXT: s_or_b32 s2, s5, s2 +; GCN-NEXT: s_lshl_b32 s5, s7, 24 ; GCN-NEXT: s_or_b32 s2, s2, s5 -; GCN-NEXT: s_and_b32 s5, s14, s17 -; GCN-NEXT: s_lshr_b32 s15, s3, 16 -; GCN-NEXT: s_lshr_b32 s16, s3, 24 -; GCN-NEXT: s_and_b32 s3, s3, s17 -; GCN-NEXT: s_lshl_b32 s5, s5, 8 -; GCN-NEXT: s_or_b32 s3, s3, s5 -; GCN-NEXT: s_and_b32 s5, s15, s17 -; GCN-NEXT: s_lshl_b32 s5, s5, 16 -; GCN-NEXT: s_or_b32 s3, s3, s5 -; GCN-NEXT: s_lshl_b32 s5, s16, 24 +; GCN-NEXT: s_lshr_b32 s8, s3, 24 +; GCN-NEXT: s_and_b32 s5, s3, s9 +; GCN-NEXT: s_bfe_u32 s3, s3, s12 +; GCN-NEXT: s_lshl_b32 s6, s6, 8 +; GCN-NEXT: s_or_b32 s5, s5, s6 +; GCN-NEXT: s_lshl_b32 s3, s3, 16 +; GCN-NEXT: s_or_b32 s3, s5, s3 +; GCN-NEXT: s_lshl_b32 s5, s8, 24 ; GCN-NEXT: s_or_b32 s3, s3, s5 ; GCN-NEXT: s_lshr_b32 s5, s4, 2 ; GCN-NEXT: s_cmp_eq_u32 s5, 1 @@ -2356,56 +2148,50 @@ define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* ; GFX10-LABEL: extractelement_sgpr_v16i8_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s6, 0x80008 ; GFX10-NEXT: s_movk_i32 s5, 0xff +; GFX10-NEXT: s_mov_b32 s7, 0x80010 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: s_lshr_b32 s7, s0, 16 -; GFX10-NEXT: s_and_b32 s6, s6, s5 +; GFX10-NEXT: s_bfe_u32 s13, s0, s6 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 -; GFX10-NEXT: s_and_b32 s7, s7, s5 -; GFX10-NEXT: s_and_b32 s0, s0, s5 +; GFX10-NEXT: s_and_b32 s12, s0, s5 +; GFX10-NEXT: s_bfe_u32 s0, s0, s7 +; GFX10-NEXT: s_lshl_b32 s13, s13, 8 +; GFX10-NEXT: s_bfe_u32 s15, s1, s6 +; GFX10-NEXT: s_bfe_u32 s17, s2, s6 +; GFX10-NEXT: s_bfe_u32 s6, s3, s6 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s12, s12, s13 +; GFX10-NEXT: s_lshr_b32 s9, s1, 24 +; GFX10-NEXT: s_and_b32 s14, s1, s5 +; GFX10-NEXT: s_bfe_u32 s1, s1, s7 +; GFX10-NEXT: s_and_b32 s16, s2, s5 +; GFX10-NEXT: s_lshr_b32 s10, s2, 24 +; GFX10-NEXT: s_bfe_u32 s2, s2, s7 +; GFX10-NEXT: s_lshl_b32 s15, s15, 8 +; GFX10-NEXT: s_lshr_b32 s11, s3, 24 +; GFX10-NEXT: s_and_b32 s5, s3, s5 +; GFX10-NEXT: s_bfe_u32 s3, s3, s7 ; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_lshr_b32 s12, s2, 8 -; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_lshr_b32 s13, s2, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s7 -; GFX10-NEXT: s_and_b32 s7, s12, s5 ; GFX10-NEXT: s_lshl_b32 s8, s8, 24 -; GFX10-NEXT: s_lshr_b32 s9, s1, 8 -; GFX10-NEXT: s_lshr_b32 s14, s2, 24 +; GFX10-NEXT: s_or_b32 s0, s12, s0 +; GFX10-NEXT: s_lshl_b32 s17, s17, 8 ; GFX10-NEXT: s_or_b32 s0, s0, s8 -; GFX10-NEXT: s_and_b32 s2, s2, s5 -; GFX10-NEXT: s_lshl_b32 s7, s7, 8 -; GFX10-NEXT: s_and_b32 s8, s13, s5 -; GFX10-NEXT: s_lshr_b32 s10, s1, 16 -; GFX10-NEXT: s_and_b32 s9, s9, s5 -; GFX10-NEXT: s_or_b32 s2, s2, s7 -; GFX10-NEXT: s_lshl_b32 s7, s8, 16 -; GFX10-NEXT: s_lshr_b32 s15, s3, 8 -; GFX10-NEXT: s_lshr_b32 s11, s1, 24 -; GFX10-NEXT: s_and_b32 s10, s10, s5 -; GFX10-NEXT: s_and_b32 s1, s1, s5 -; GFX10-NEXT: s_lshl_b32 s9, s9, 8 -; GFX10-NEXT: s_or_b32 s2, s2, s7 -; GFX10-NEXT: s_and_b32 s7, s15, s5 -; GFX10-NEXT: s_lshr_b32 s16, s3, 16 -; GFX10-NEXT: s_lshl_b32 s10, s10, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s9 -; GFX10-NEXT: s_lshr_b32 s17, s3, 24 -; GFX10-NEXT: s_and_b32 s3, s3, s5 -; GFX10-NEXT: s_and_b32 s5, s16, s5 -; GFX10-NEXT: s_lshl_b32 s7, s7, 8 -; GFX10-NEXT: s_or_b32 s1, s1, s10 -; GFX10-NEXT: s_lshl_b32 s6, s11, 24 -; GFX10-NEXT: s_or_b32 s3, s3, s7 -; GFX10-NEXT: s_lshl_b32 s5, s5, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s6 -; GFX10-NEXT: s_lshl_b32 s6, s14, 24 -; GFX10-NEXT: s_or_b32 s3, s3, s5 -; GFX10-NEXT: s_lshl_b32 s5, s17, 24 -; GFX10-NEXT: s_or_b32 s2, s2, s6 +; GFX10-NEXT: s_or_b32 s5, s5, s6 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s13, s14, s15 +; GFX10-NEXT: s_or_b32 s8, s16, s17 +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 +; GFX10-NEXT: s_or_b32 s3, s5, s3 +; GFX10-NEXT: s_or_b32 s2, s8, s2 +; GFX10-NEXT: s_lshl_b32 s8, s10, 24 +; GFX10-NEXT: s_lshl_b32 s5, s11, 24 +; GFX10-NEXT: s_lshl_b32 s9, s9, 24 +; GFX10-NEXT: s_or_b32 s1, s13, s1 ; GFX10-NEXT: s_lshr_b32 s6, s4, 2 +; GFX10-NEXT: s_or_b32 s1, s1, s9 +; GFX10-NEXT: s_or_b32 s2, s2, s8 ; GFX10-NEXT: s_or_b32 s3, s3, s5 ; GFX10-NEXT: s_cmp_eq_u32 s6, 1 ; GFX10-NEXT: s_cselect_b32 s0, s1, s0 @@ -2427,45 +2213,43 @@ define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: s_mov_b32 s1, 16 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 -; GFX9-NEXT: s_movk_i32 s1, 0xff -; GFX9-NEXT: s_lshr_b32 s3, s2, 2 +; GFX9-NEXT: s_movk_i32 s3, 0xff +; GFX9-NEXT: s_lshr_b32 s4, s2, 2 +; GFX9-NEXT: v_mov_b32_e32 v6, 16 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xff -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 ; GFX9-NEXT: s_and_b32 s2, s2, 3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v8, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX9-NEXT: v_and_b32_sdwa v14, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v15, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 8, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v11 ; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v6 +; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v13 +; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v15 ; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX9-NEXT: v_and_or_b32 v1, v1, s1, v8 -; GFX9-NEXT: v_and_b32_sdwa v16, v2, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 24, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX9-NEXT: v_and_or_b32 v2, v2, s1, v10 -; GFX9-NEXT: v_or3_b32 v0, v0, v14, v7 -; GFX9-NEXT: v_or3_b32 v1, v1, v15, v9 -; GFX9-NEXT: v_and_b32_sdwa v17, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v12, 24, v13 +; GFX9-NEXT: v_or3_b32 v0, v0, v12, v7 +; GFX9-NEXT: v_or3_b32 v1, v1, v14, v8 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v3, v3, v4, v5 -; GFX9-NEXT: v_or3_b32 v2, v2, v16, v11 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 2 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_or3_b32 v2, v2, v16, v9 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 2 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_or3_b32 v3, v3, v17, v12 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 3 +; GFX9-NEXT: v_or3_b32 v3, v3, v6, v4 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 3 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: s_lshl_b32 s0, s2, 3 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 @@ -2475,51 +2259,46 @@ define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* ; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff -; GFX8-NEXT: v_mov_b32_e32 v5, 8 +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_mov_b32_e32 v5, 16 ; GFX8-NEXT: v_mov_b32_e32 v6, 8 -; GFX8-NEXT: v_mov_b32_e32 v7, s0 -; GFX8-NEXT: v_mov_b32_e32 v4, 0xff +; GFX8-NEXT: v_mov_b32_e32 v7, 16 ; GFX8-NEXT: s_lshr_b32 s0, s2, 2 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX8-NEXT: s_and_b32 s1, s2, 3 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v9 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v6, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v2 -; GFX8-NEXT: v_and_b32_sdwa v16, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v11 -; GFX8-NEXT: v_lshrrev_b32_e32 v14, 8, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v6, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v16 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v2 -; GFX8-NEXT: v_and_b32_sdwa v17, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v2, v2, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GFX8-NEXT: v_and_b32_sdwa v4, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v2 +; GFX8-NEXT: v_or_b32_sdwa v2, v2, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v9 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v13 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v17 +; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v10 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v15 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX8-NEXT: v_lshlrev_b32_e32 v13, 24, v15 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v12 +; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v11 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v13 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v6 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX8-NEXT: s_lshl_b32 s0, s1, 3 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 @@ -2538,57 +2317,49 @@ define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 ; GFX7-NEXT: s_and_b32 s2, s2, 3 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 8, v2 -; GFX7-NEXT: v_and_b32_e32 v5, s0, v5 -; GFX7-NEXT: v_and_b32_e32 v8, s0, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 8, v3 -; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 -; GFX7-NEXT: v_and_b32_e32 v9, s0, v9 -; GFX7-NEXT: v_and_b32_e32 v11, s0, v11 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v12, v12, v4 -; GFX7-NEXT: v_and_b32_e32 v14, v14, v4 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v16, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_and_b32_e32 v4, v15, v4 +; GFX7-NEXT: v_bfe_u32 v10, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v12, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v1 +; GFX7-NEXT: v_bfe_u32 v14, v2, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v9, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v11, s0, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v13, v2, v4 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_bfe_u32 v15, v3, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v9, v9, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v10, v11, v12 ; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 +; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v4, v3, v4 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX7-NEXT: v_or_b32_e32 v0, v9, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX7-NEXT: v_or_b32_e32 v1, v10, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v11, v13, v14 ; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v11 -; GFX7-NEXT: v_lshlrev_b32_e32 v13, 24, v13 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v12 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v14 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX7-NEXT: v_or_b32_e32 v2, v11, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v15 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GFX7-NEXT: v_lshlrev_b32_e32 v15, 24, v16 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v13 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX7-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v7 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 2 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX7-NEXT: v_or_b32_e32 v3, v3, v15 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v8 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 3 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX7-NEXT: s_lshl_b32 s0, s2, 3 @@ -2599,43 +2370,41 @@ define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* ; GFX10-LABEL: extractelement_vgpr_v16i8_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_mov_b32_e32 v5, 8 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_movk_i32 s1, 0xff +; GFX10-NEXT: v_mov_b32_e32 v5, 8 +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_movk_i32 s3, 0xff +; GFX10-NEXT: v_mov_b32_e32 v6, 16 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xff ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 8, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX10-NEXT: v_and_b32_sdwa v13, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v14, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v6 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v0, v0, s3, v10 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 8, v3 -; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v8 +; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v12 +; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 ; GFX10-NEXT: s_lshr_b32 s0, s2, 2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v5, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v15, v2, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX10-NEXT: v_and_or_b32 v2, v2, s1, v10 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v3 -; GFX10-NEXT: v_or3_b32 v0, v0, v13, v7 -; GFX10-NEXT: v_or3_b32 v1, v1, v14, v9 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v2, v2, v4, v14 +; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v11, v7 +; GFX10-NEXT: v_or3_b32 v1, v1, v13, v8 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 -; GFX10-NEXT: v_and_or_b32 v5, v3, v4, v5 -; GFX10-NEXT: v_and_b32_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_or3_b32 v2, v2, v15, v11 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v6 +; GFX10-NEXT: v_and_or_b32 v4, v3, v4, v5 +; GFX10-NEXT: v_or3_b32 v2, v2, v15, v9 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v10 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 2 -; GFX10-NEXT: v_or3_b32 v1, v5, v3, v4 +; GFX10-NEXT: v_or3_b32 v1, v4, v3, v5 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 3 ; GFX10-NEXT: s_and_b32 s0, s2, 3 @@ -2655,45 +2424,43 @@ define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_mov_b32 s5, 16 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 2, v2 +; GFX9-NEXT: s_movk_i32 s6, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 2, v2 +; GFX9-NEXT: v_mov_b32_e32 v7, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, 0xff -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v4 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 8, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v14, 8, v6 -; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 24, v5 -; GFX9-NEXT: v_and_b32_sdwa v16, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v17, v4, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v5 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v13 ; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v15 +; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX9-NEXT: v_lshlrev_b32_sdwa v18, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v5, v5, v0, v17 ; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v3, v3, s5, v8 -; GFX9-NEXT: v_and_or_b32 v4, v4, s5, v10 -; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v6 -; GFX9-NEXT: v_and_b32_sdwa v18, v5, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v19, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-NEXT: v_and_or_b32 v0, v6, v0, v1 -; GFX9-NEXT: v_or3_b32 v1, v3, v16, v9 -; GFX9-NEXT: v_or3_b32 v3, v4, v17, v11 -; GFX9-NEXT: v_lshlrev_b32_e32 v13, 24, v13 -; GFX9-NEXT: v_and_or_b32 v5, v5, s5, v12 -; GFX9-NEXT: v_lshlrev_b32_e32 v14, 24, v15 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_or3_b32 v4, v5, v18, v13 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_or3_b32 v0, v0, v19, v14 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v12 +; GFX9-NEXT: v_or3_b32 v3, v3, v14, v9 +; GFX9-NEXT: v_or3_b32 v4, v4, v16, v10 +; GFX9-NEXT: v_or3_b32 v0, v0, v7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-NEXT: v_or3_b32 v5, v5, v18, v11 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 @@ -2703,52 +2470,47 @@ define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff -; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, 16 ; GFX8-NEXT: v_mov_b32_e32 v7, 8 -; GFX8-NEXT: v_mov_b32_e32 v8, s4 -; GFX8-NEXT: v_mov_b32_e32 v0, 0xff +; GFX8-NEXT: v_mov_b32_e32 v8, 16 ; GFX8-NEXT: v_lshrrev_b32_e32 v9, 2, v2 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v4 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v11 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v4 -; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v7, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v14, 8, v5 -; GFX8-NEXT: v_and_b32_sdwa v18, v3, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX8-NEXT: v_lshrrev_b32_e32 v16, 8, v6 -; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v7, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_and_b32_sdwa v8, v4, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v4, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v1, v18 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v8 -; GFX8-NEXT: v_lshrrev_b32_e32 v15, 24, v5 -; GFX8-NEXT: v_and_b32_sdwa v19, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v4, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v17, 24, v6 -; GFX8-NEXT: v_lshlrev_b32_e32 v14, 24, v15 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v19 -; GFX8-NEXT: v_and_b32_sdwa v0, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v12 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX8-NEXT: v_lshlrev_b32_e32 v15, 24, v17 -; GFX8-NEXT: v_or_b32_e32 v0, v5, v0 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v14 +; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v16, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v17, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v11 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_or_b32_sdwa v5, v5, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v15 +; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v6 +; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v6, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v5, v17 +; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v12 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v10 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v13 +; GFX8-NEXT: v_or_b32_e32 v5, v6, v8 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v11 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX8-NEXT: v_or_b32_e32 v0, v0, v15 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_or_b32_e32 v4, v5, v7 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -2762,62 +2524,54 @@ define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 ; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: v_mov_b32_e32 v0, 0xff -; GFX7-NEXT: v_lshrrev_b32_e32 v18, 2, v2 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v18 +; GFX7-NEXT: v_lshrrev_b32_e32 v17, 2, v2 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v17 ; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 8, v5 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v9, s4, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 16, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 8, v6 -; GFX7-NEXT: v_and_b32_e32 v7, s4, v7 -; GFX7-NEXT: v_and_b32_e32 v10, s4, v10 -; GFX7-NEXT: v_and_b32_e32 v12, s4, v12 -; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 24, v5 -; GFX7-NEXT: v_and_b32_e32 v13, v13, v0 -; GFX7-NEXT: v_and_b32_e32 v15, v15, v0 +; GFX7-NEXT: v_bfe_u32 v11, v3, 8, 8 +; GFX7-NEXT: v_bfe_u32 v13, v4, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v4 +; GFX7-NEXT: v_bfe_u32 v15, v5, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v10, s4, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v12, s4, v4 +; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 +; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v13 +; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v5 +; GFX7-NEXT: v_and_b32_e32 v14, v5, v0 +; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8 +; GFX7-NEXT: v_bfe_u32 v16, v6, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v10, v10, v11 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX7-NEXT: v_or_b32_e32 v11, v12, v13 +; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 +; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v6 +; GFX7-NEXT: v_and_b32_e32 v0, v6, v0 +; GFX7-NEXT: v_bfe_u32 v6, v6, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v16, 8, v16 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v3, v10, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_or_b32_e32 v12, v14, v15 +; GFX7-NEXT: v_or_b32_e32 v4, v11, v4 ; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v6 -; GFX7-NEXT: v_and_b32_e32 v5, s4, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX7-NEXT: v_or_b32_e32 v3, v4, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 -; GFX7-NEXT: v_lshrrev_b32_e32 v17, 24, v6 -; GFX7-NEXT: v_and_b32_e32 v6, v6, v0 -; GFX7-NEXT: v_and_b32_e32 v0, v16, v0 +; GFX7-NEXT: v_or_b32_e32 v3, v4, v7 ; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX7-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v10 -; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13 -; GFX7-NEXT: v_or_b32_e32 v4, v5, v12 -; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 -; GFX7-NEXT: v_lshlrev_b32_e32 v14, 24, v14 -; GFX7-NEXT: v_or_b32_e32 v4, v4, v13 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_or_b32_e32 v5, v6, v15 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX7-NEXT: v_or_b32_e32 v5, v12, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v16 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX7-NEXT: v_lshlrev_b32_e32 v16, 24, v17 -; GFX7-NEXT: v_or_b32_e32 v0, v5, v0 -; GFX7-NEXT: v_or_b32_e32 v4, v4, v14 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v18 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX7-NEXT: v_or_b32_e32 v4, v5, v8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v17 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX7-NEXT: v_or_b32_e32 v0, v0, v16 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v18 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v9 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v17 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 @@ -2828,46 +2582,44 @@ define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off -; GFX10-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: s_movk_i32 s5, 0xff -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 2, v2 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: s_mov_b32 s5, 16 +; GFX10-NEXT: s_movk_i32 s6, 0xff +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 2, v2 +; GFX10-NEXT: v_mov_b32_e32 v7, 16 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xff ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 8, v4 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 8, v5 -; GFX10-NEXT: v_lshlrev_b32_sdwa v8, s4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v16, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v17, v4, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v13, 24, v5 -; GFX10-NEXT: v_and_or_b32 v3, v3, s5, v8 +; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v4 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v5 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, s5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v16, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v17, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v3, v3, s6, v13 ; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX10-NEXT: v_lshrrev_b32_e32 v14, 8, v6 -; GFX10-NEXT: v_lshlrev_b32_sdwa v12, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: v_and_or_b32 v4, v4, s6, v15 +; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX10-NEXT: v_lshlrev_b32_sdwa v18, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v6 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v5, v5, v0, v17 ; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX10-NEXT: v_and_or_b32 v4, v4, s5, v10 -; GFX10-NEXT: v_and_b32_sdwa v18, v5, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v15, 24, v6 -; GFX10-NEXT: v_or3_b32 v3, v3, v16, v9 -; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v13 -; GFX10-NEXT: v_or3_b32 v4, v4, v17, v11 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_or_b32 v5, v5, s5, v12 -; GFX10-NEXT: v_and_b32_sdwa v19, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v15 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX10-NEXT: v_or3_b32 v3, v3, v14, v9 +; GFX10-NEXT: v_or3_b32 v4, v4, v16, v10 ; GFX10-NEXT: v_and_or_b32 v0, v6, v0, v1 -; GFX10-NEXT: v_or3_b32 v1, v5, v18, v8 -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v7 -; GFX10-NEXT: v_or3_b32 v0, v0, v19, v9 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v7 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v12 +; GFX10-NEXT: v_or3_b32 v5, v5, v18, v11 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 +; GFX10-NEXT: v_or3_b32 v0, v0, v7, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 @@ -2881,64 +2633,58 @@ define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* ; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GCN-NEXT: s_movk_i32 s16, 0xff +; GCN-NEXT: s_mov_b32 s10, 0x80008 +; GCN-NEXT: s_movk_i32 s8, 0xff ; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_and_b32_e32 v0, 3, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_lshr_b32 s4, s0, 8 -; GCN-NEXT: s_and_b32 s4, s4, s16 -; GCN-NEXT: s_lshr_b32 s5, s0, 16 -; GCN-NEXT: s_lshr_b32 s6, s0, 24 -; GCN-NEXT: s_and_b32 s0, s0, s16 -; GCN-NEXT: s_lshl_b32 s4, s4, 8 -; GCN-NEXT: s_or_b32 s0, s0, s4 -; GCN-NEXT: s_and_b32 s4, s5, s16 -; GCN-NEXT: s_lshl_b32 s4, s4, 16 -; GCN-NEXT: s_or_b32 s0, s0, s4 -; GCN-NEXT: s_lshl_b32 s4, s6, 24 -; GCN-NEXT: s_lshr_b32 s7, s1, 8 +; GCN-NEXT: s_bfe_u32 s11, s0, s10 +; GCN-NEXT: s_and_b32 s9, s0, s8 +; GCN-NEXT: s_lshl_b32 s11, s11, 8 +; GCN-NEXT: s_or_b32 s9, s9, s11 +; GCN-NEXT: s_mov_b32 s11, 0x80010 +; GCN-NEXT: s_lshr_b32 s4, s0, 24 +; GCN-NEXT: s_bfe_u32 s0, s0, s11 +; GCN-NEXT: s_lshl_b32 s0, s0, 16 +; GCN-NEXT: s_or_b32 s0, s9, s0 +; GCN-NEXT: s_bfe_u32 s9, s1, s10 +; GCN-NEXT: s_lshl_b32 s4, s4, 24 ; GCN-NEXT: s_or_b32 s0, s0, s4 -; GCN-NEXT: s_and_b32 s4, s7, s16 -; GCN-NEXT: s_lshr_b32 s8, s1, 16 -; GCN-NEXT: s_lshr_b32 s9, s1, 24 -; GCN-NEXT: s_and_b32 s1, s1, s16 -; GCN-NEXT: s_lshl_b32 s4, s4, 8 -; GCN-NEXT: s_or_b32 s1, s1, s4 -; GCN-NEXT: s_and_b32 s4, s8, s16 -; GCN-NEXT: s_lshl_b32 s4, s4, 16 -; GCN-NEXT: s_or_b32 s1, s1, s4 -; GCN-NEXT: s_lshl_b32 s4, s9, 24 -; GCN-NEXT: s_lshr_b32 s10, s2, 8 +; GCN-NEXT: s_lshr_b32 s5, s1, 24 +; GCN-NEXT: s_and_b32 s4, s1, s8 +; GCN-NEXT: s_bfe_u32 s1, s1, s11 +; GCN-NEXT: s_lshl_b32 s9, s9, 8 +; GCN-NEXT: s_or_b32 s4, s4, s9 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s1, s4, s1 +; GCN-NEXT: s_lshl_b32 s4, s5, 24 +; GCN-NEXT: s_bfe_u32 s5, s2, s10 ; GCN-NEXT: s_or_b32 s1, s1, s4 -; GCN-NEXT: s_and_b32 s4, s10, s16 -; GCN-NEXT: s_lshr_b32 s11, s2, 16 -; GCN-NEXT: s_lshr_b32 s12, s2, 24 -; GCN-NEXT: s_and_b32 s2, s2, s16 -; GCN-NEXT: s_lshl_b32 s4, s4, 8 -; GCN-NEXT: s_or_b32 s2, s2, s4 -; GCN-NEXT: s_and_b32 s4, s11, s16 -; GCN-NEXT: s_lshl_b32 s4, s4, 16 -; GCN-NEXT: s_or_b32 s2, s2, s4 -; GCN-NEXT: s_lshl_b32 s4, s12, 24 -; GCN-NEXT: s_lshr_b32 s13, s3, 8 +; GCN-NEXT: s_lshr_b32 s6, s2, 24 +; GCN-NEXT: s_and_b32 s4, s2, s8 +; GCN-NEXT: s_bfe_u32 s2, s2, s11 +; GCN-NEXT: s_lshl_b32 s5, s5, 8 +; GCN-NEXT: s_or_b32 s4, s4, s5 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_bfe_u32 s5, s3, s10 +; GCN-NEXT: s_or_b32 s2, s4, s2 +; GCN-NEXT: s_lshl_b32 s4, s6, 24 ; GCN-NEXT: s_or_b32 s2, s2, s4 -; GCN-NEXT: s_and_b32 s4, s13, s16 -; GCN-NEXT: s_lshr_b32 s14, s3, 16 -; GCN-NEXT: s_lshr_b32 s15, s3, 24 -; GCN-NEXT: s_and_b32 s3, s3, s16 -; GCN-NEXT: s_lshl_b32 s4, s4, 8 -; GCN-NEXT: s_or_b32 s3, s3, s4 -; GCN-NEXT: s_and_b32 s4, s14, s16 -; GCN-NEXT: s_lshl_b32 s4, s4, 16 -; GCN-NEXT: s_or_b32 s3, s3, s4 -; GCN-NEXT: s_lshl_b32 s4, s15, 24 +; GCN-NEXT: s_lshr_b32 s7, s3, 24 +; GCN-NEXT: s_and_b32 s4, s3, s8 +; GCN-NEXT: s_bfe_u32 s3, s3, s11 +; GCN-NEXT: s_lshl_b32 s5, s5, 8 +; GCN-NEXT: s_or_b32 s4, s4, s5 +; GCN-NEXT: s_lshl_b32 s3, s3, 16 +; GCN-NEXT: s_or_b32 s3, s4, s3 +; GCN-NEXT: s_lshl_b32 s4, s7, 24 ; GCN-NEXT: v_mov_b32_e32 v2, s0 ; GCN-NEXT: v_mov_b32_e32 v3, s1 ; GCN-NEXT: s_or_b32 s3, s3, s4 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; GCN-NEXT: v_mov_b32_e32 v4, s2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 +; GCN-NEXT: v_and_b32_e32 v0, 3, v0 ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GCN-NEXT: v_mov_b32_e32 v5, s3 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 @@ -2951,64 +2697,58 @@ define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* ; GFX10-LABEL: extractelement_sgpr_v16i8_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s5, 0x80008 ; GFX10-NEXT: s_movk_i32 s4, 0xff +; GFX10-NEXT: s_mov_b32 s6, 0x80010 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 2, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s8, s1, 8 -; GFX10-NEXT: s_lshr_b32 s5, s0, 8 -; GFX10-NEXT: s_lshr_b32 s9, s1, 16 -; GFX10-NEXT: s_and_b32 s8, s8, s4 -; GFX10-NEXT: s_and_b32 s5, s5, s4 -; GFX10-NEXT: s_and_b32 s9, s9, s4 -; GFX10-NEXT: s_lshr_b32 s10, s1, 24 -; GFX10-NEXT: s_and_b32 s1, s1, s4 -; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshr_b32 s6, s0, 16 +; GFX10-NEXT: s_bfe_u32 s12, s0, s5 +; GFX10-NEXT: s_bfe_u32 s14, s1, s5 +; GFX10-NEXT: s_lshr_b32 s8, s1, 24 +; GFX10-NEXT: s_and_b32 s13, s1, s4 +; GFX10-NEXT: s_bfe_u32 s1, s1, s6 +; GFX10-NEXT: s_and_b32 s11, s0, s4 +; GFX10-NEXT: s_lshl_b32 s12, s12, 8 +; GFX10-NEXT: s_lshl_b32 s14, s14, 8 +; GFX10-NEXT: s_or_b32 s11, s11, s12 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s12, s13, s14 +; GFX10-NEXT: s_lshl_b32 s8, s8, 24 +; GFX10-NEXT: s_or_b32 s1, s12, s1 ; GFX10-NEXT: s_lshr_b32 s7, s0, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s4 -; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: s_lshl_b32 s9, s9, 16 +; GFX10-NEXT: s_bfe_u32 s0, s0, s6 ; GFX10-NEXT: s_or_b32 s1, s1, s8 -; GFX10-NEXT: s_and_b32 s6, s6, s4 -; GFX10-NEXT: s_or_b32 s0, s0, s5 -; GFX10-NEXT: s_or_b32 s1, s1, s9 -; GFX10-NEXT: s_lshl_b32 s5, s10, 24 -; GFX10-NEXT: s_lshl_b32 s6, s6, 16 -; GFX10-NEXT: s_lshr_b32 s11, s2, 8 -; GFX10-NEXT: s_or_b32 s1, s1, s5 -; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_and_b32 s6, s11, s4 -; GFX10-NEXT: s_lshl_b32 s7, s7, 24 -; GFX10-NEXT: s_lshr_b32 s12, s2, 16 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_bfe_u32 s16, s2, s5 ; GFX10-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-NEXT: s_lshr_b32 s13, s2, 24 +; GFX10-NEXT: s_lshl_b32 s7, s7, 24 +; GFX10-NEXT: s_or_b32 s0, s11, s0 +; GFX10-NEXT: s_and_b32 s15, s2, s4 +; GFX10-NEXT: s_lshr_b32 s9, s2, 24 +; GFX10-NEXT: s_bfe_u32 s2, s2, s6 +; GFX10-NEXT: s_lshl_b32 s16, s16, 8 ; GFX10-NEXT: s_or_b32 s0, s0, s7 -; GFX10-NEXT: s_and_b32 s2, s2, s4 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_and_b32 s7, s12, s4 -; GFX10-NEXT: s_or_b32 s2, s2, s6 -; GFX10-NEXT: s_lshl_b32 s6, s7, 16 -; GFX10-NEXT: s_lshr_b32 s14, s3, 8 -; GFX10-NEXT: s_lshr_b32 s15, s3, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s6 -; GFX10-NEXT: s_and_b32 s6, s14, s4 +; GFX10-NEXT: s_or_b32 s7, s15, s16 +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 +; GFX10-NEXT: s_bfe_u32 s5, s3, s5 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s0, v2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 -; GFX10-NEXT: s_lshl_b32 s5, s13, 24 -; GFX10-NEXT: s_and_b32 s1, s15, s4 -; GFX10-NEXT: s_lshr_b32 s16, s3, 24 -; GFX10-NEXT: s_and_b32 s3, s3, s4 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_or_b32 s2, s2, s5 -; GFX10-NEXT: s_or_b32 s3, s3, s6 +; GFX10-NEXT: s_or_b32 s2, s7, s2 +; GFX10-NEXT: s_lshl_b32 s7, s9, 24 +; GFX10-NEXT: s_bfe_u32 s1, s3, s6 +; GFX10-NEXT: s_and_b32 s4, s3, s4 +; GFX10-NEXT: s_lshl_b32 s5, s5, 8 +; GFX10-NEXT: s_or_b32 s2, s2, s7 +; GFX10-NEXT: s_lshr_b32 s10, s3, 24 +; GFX10-NEXT: s_or_b32 s3, s4, s5 ; GFX10-NEXT: s_lshl_b32 s1, s1, 16 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 ; GFX10-NEXT: s_or_b32 s0, s3, s1 -; GFX10-NEXT: s_lshl_b32 s1, s16, 24 +; GFX10-NEXT: s_lshl_b32 s1, s10, 24 ; GFX10-NEXT: s_or_b32 s3, s0, s1 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s3, vcc_lo ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v0, v1 @@ -3024,15 +2764,15 @@ define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3040,17 +2780,15 @@ define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -3061,19 +2799,16 @@ define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3082,16 +2817,15 @@ define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr %element = extractelement <16 x i8> %vector, i32 0 @@ -3104,15 +2838,15 @@ define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3120,17 +2854,15 @@ define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3142,19 +2874,16 @@ define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -3166,14 +2895,13 @@ define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr @@ -3186,16 +2914,16 @@ define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: s_mov_b32 s4, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3203,17 +2931,15 @@ define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3225,19 +2951,16 @@ define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -3247,14 +2970,13 @@ define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: s_mov_b32 s4, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -3269,15 +2991,15 @@ define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -3286,17 +3008,15 @@ define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3308,19 +3028,16 @@ define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] @@ -3330,16 +3047,15 @@ define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr @@ -3352,15 +3068,15 @@ define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3368,17 +3084,15 @@ define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -3389,20 +3103,17 @@ define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v16i8_idx4: @@ -3410,16 +3121,15 @@ define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v1, s4, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr %element = extractelement <16 x i8> %vector, i32 4 @@ -3432,15 +3142,15 @@ define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3448,17 +3158,15 @@ define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3470,20 +3178,17 @@ define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3494,14 +3199,13 @@ define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v1, s4, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v1, 0xff, v1, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr @@ -3514,16 +3218,16 @@ define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: s_mov_b32 s4, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3531,17 +3235,15 @@ define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3553,20 +3255,17 @@ define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3575,14 +3274,13 @@ define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: s_mov_b32 s4, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v1, s4, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 @@ -3597,15 +3295,15 @@ define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX9-NEXT: v_and_b32_sdwa v3, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v1, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -3614,17 +3312,15 @@ define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3636,20 +3332,17 @@ define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_bfe_u32 v3, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3658,16 +3351,15 @@ define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_b32_sdwa v3, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v1, s4, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr @@ -3680,15 +3372,15 @@ define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v2 +; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3696,17 +3388,15 @@ define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 -; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -3717,20 +3407,17 @@ define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v16i8_idx8: @@ -3738,16 +3425,15 @@ define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v3, v2, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v2, s4, v0 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr %element = extractelement <16 x i8> %vector, i32 8 @@ -3760,15 +3446,15 @@ define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3776,17 +3462,15 @@ define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 -; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3798,20 +3482,17 @@ define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3822,14 +3503,13 @@ define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v3, v2, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v2, s4, v0 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v1, 0xff, v2, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr @@ -3842,16 +3522,16 @@ define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: s_mov_b32 s4, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -3859,17 +3539,15 @@ define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 -; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3881,20 +3559,17 @@ define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3903,15 +3578,14 @@ define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: s_mov_b32 s4, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v3, v2, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v2, s4, v0 ; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -3925,15 +3599,15 @@ define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v3, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v2 +; GFX9-NEXT: v_and_or_b32 v0, v2, v0, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v3, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -3942,17 +3616,15 @@ define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 -; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -3964,20 +3636,17 @@ define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfe_u32 v3, v2, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3986,16 +3655,15 @@ define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v3, v2, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v2, s4, v0 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v2, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr @@ -4008,15 +3676,15 @@ define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: v_mov_b32_e32 v2, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v3 +; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4024,17 +3692,15 @@ define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 -; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -4045,20 +3711,17 @@ define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v16i8_idx12: @@ -4066,16 +3729,15 @@ define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v3 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v2, v3, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v3, s4, v0 -; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr %element = extractelement <16 x i8> %vector, i32 12 @@ -4088,15 +3750,15 @@ define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4104,17 +3766,15 @@ define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 -; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -4126,20 +3786,17 @@ define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -4150,14 +3807,13 @@ define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v3 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v2, v3, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v3, s4, v0 -; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v1, 0xff, v3, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: v_or3_b32 v0, v1, v0, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr @@ -4170,16 +3826,16 @@ define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: s_mov_b32 s4, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_or3_b32 v0, v0, v4, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -4187,17 +3843,15 @@ define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 -; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -4209,20 +3863,17 @@ define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -4231,15 +3882,14 @@ define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: s_mov_b32 s4, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v3 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v2, v3, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v3, s4, v0 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -4253,15 +3903,15 @@ define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: v_mov_b32_e32 v2, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v3 +; GFX9-NEXT: v_and_or_b32 v0, v3, v0, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -4270,17 +3920,15 @@ define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 -; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -4292,20 +3940,17 @@ define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfe_u32 v2, v3, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -4314,16 +3959,15 @@ define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 24, v3 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v2, v3, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v3, s4, v0 -; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v3, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index 552906092fd44..cbeb0140efd00 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -551,12 +551,12 @@ define amdgpu_ps i16 @s_fshl_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 in ; GFX6-NEXT: s_lshr_b32 s4, s2, 8 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2 ; GFX6-NEXT: s_lshr_b32 s5, s5, 1 -; GFX6-NEXT: s_lshr_b32 s1, s1, 9 +; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008 ; GFX6-NEXT: s_lshr_b32 s2, s5, s2 ; GFX6-NEXT: s_or_b32 s0, s0, s2 ; GFX6-NEXT: s_and_b32 s2, s4, 7 ; GFX6-NEXT: s_andn2_b32 s4, 7, s4 -; GFX6-NEXT: s_and_b32 s1, s1, 0x7f +; GFX6-NEXT: s_lshr_b32 s1, s1, 1 ; GFX6-NEXT: s_lshl_b32 s2, s3, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, s4 ; GFX6-NEXT: s_or_b32 s1, s2, s1 @@ -673,12 +673,12 @@ define i16 @v_fshl_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v5, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v6 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 9, v1 +; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v4 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 -; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v4, v1 ; GFX6-NEXT: v_mov_b32_e32 v5, 0xff @@ -795,17 +795,16 @@ define amdgpu_ps i32 @s_fshl_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 in ; GFX6-NEXT: s_or_b32 s0, s0, s2 ; GFX6-NEXT: s_and_b32 s2, s6, 7 ; GFX6-NEXT: s_lshl_b32 s2, s3, s2 -; GFX6-NEXT: s_lshr_b32 s3, s1, 9 -; GFX6-NEXT: s_movk_i32 s9, 0x7f +; GFX6-NEXT: s_bfe_u32 s3, s1, 0x80008 ; GFX6-NEXT: s_andn2_b32 s6, 7, s6 -; GFX6-NEXT: s_and_b32 s3, s3, s9 +; GFX6-NEXT: s_lshr_b32 s3, s3, 1 ; GFX6-NEXT: s_lshr_b32 s3, s3, s6 ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s3, s7, 7 ; GFX6-NEXT: s_lshl_b32 s3, s4, s3 -; GFX6-NEXT: s_lshr_b32 s4, s1, 17 +; GFX6-NEXT: s_bfe_u32 s4, s1, 0x80010 ; GFX6-NEXT: s_andn2_b32 s6, 7, s7 -; GFX6-NEXT: s_and_b32 s4, s4, s9 +; GFX6-NEXT: s_lshr_b32 s4, s4, 1 ; GFX6-NEXT: s_lshr_b32 s4, s4, s6 ; GFX6-NEXT: s_and_b32 s2, s2, s10 ; GFX6-NEXT: s_or_b32 s3, s3, s4 @@ -1016,18 +1015,17 @@ define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX6-NEXT: v_and_b32_e32 v2, 7, v6 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 9, v1 -; GFX6-NEXT: s_movk_i32 s4, 0x7f +; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 ; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 -; GFX6-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v7 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v3, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 17, v1 +; GFX6-NEXT: v_bfe_u32 v4, v1, 16, 8 ; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 -; GFX6-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4 ; GFX6-NEXT: v_mov_b32_e32 v9, 0xff ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8 @@ -1477,73 +1475,71 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) { define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { ; GFX6-LABEL: s_fshl_v2i24: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_lshr_b32 s6, s0, 8 -; GFX6-NEXT: s_movk_i32 s10, 0xff -; GFX6-NEXT: s_and_b32 s6, s6, s10 -; GFX6-NEXT: s_lshr_b32 s7, s0, 16 -; GFX6-NEXT: s_lshr_b32 s8, s0, 24 -; GFX6-NEXT: s_and_b32 s0, s0, s10 -; GFX6-NEXT: s_lshl_b32 s6, s6, 8 -; GFX6-NEXT: s_or_b32 s0, s0, s6 -; GFX6-NEXT: s_and_b32 s6, s7, s10 +; GFX6-NEXT: s_movk_i32 s9, 0xff +; GFX6-NEXT: s_mov_b32 s11, 0x80008 +; GFX6-NEXT: s_lshr_b32 s6, s0, 16 +; GFX6-NEXT: s_lshr_b32 s7, s0, 24 +; GFX6-NEXT: s_and_b32 s10, s0, s9 +; GFX6-NEXT: s_bfe_u32 s0, s0, s11 +; GFX6-NEXT: s_lshl_b32 s0, s0, 8 +; GFX6-NEXT: s_and_b32 s6, s6, s9 +; GFX6-NEXT: s_or_b32 s0, s10, s0 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 -; GFX6-NEXT: s_lshr_b32 s9, s1, 8 -; GFX6-NEXT: s_and_b32 s1, s1, s10 +; GFX6-NEXT: s_lshr_b32 s8, s1, 8 +; GFX6-NEXT: s_and_b32 s1, s1, s9 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_or_b32 s0, s0, s6 -; GFX6-NEXT: s_and_b32 s6, s9, s10 -; GFX6-NEXT: s_or_b32 s1, s8, s1 +; GFX6-NEXT: s_and_b32 s6, s8, s9 +; GFX6-NEXT: s_or_b32 s1, s7, s1 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_or_b32 s1, s1, s6 -; GFX6-NEXT: s_lshr_b32 s6, s2, 8 -; GFX6-NEXT: s_and_b32 s6, s6, s10 -; GFX6-NEXT: s_lshr_b32 s7, s2, 16 -; GFX6-NEXT: s_lshr_b32 s8, s2, 24 -; GFX6-NEXT: s_and_b32 s2, s2, s10 -; GFX6-NEXT: s_lshl_b32 s6, s6, 8 -; GFX6-NEXT: s_or_b32 s2, s2, s6 -; GFX6-NEXT: s_and_b32 s6, s7, s10 +; GFX6-NEXT: s_lshr_b32 s6, s2, 16 +; GFX6-NEXT: s_lshr_b32 s7, s2, 24 +; GFX6-NEXT: s_and_b32 s10, s2, s9 +; GFX6-NEXT: s_bfe_u32 s2, s2, s11 +; GFX6-NEXT: s_lshl_b32 s2, s2, 8 +; GFX6-NEXT: s_and_b32 s6, s6, s9 +; GFX6-NEXT: s_or_b32 s2, s10, s2 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 -; GFX6-NEXT: s_lshr_b32 s9, s3, 8 -; GFX6-NEXT: s_and_b32 s3, s3, s10 +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_lshr_b32 s8, s3, 8 +; GFX6-NEXT: s_and_b32 s3, s3, s9 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX6-NEXT: s_or_b32 s2, s2, s6 -; GFX6-NEXT: s_and_b32 s6, s9, s10 -; GFX6-NEXT: s_or_b32 s3, s8, s3 +; GFX6-NEXT: s_and_b32 s6, s8, s9 +; GFX6-NEXT: s_or_b32 s3, s7, s3 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_or_b32 s3, s3, s6 -; GFX6-NEXT: s_lshr_b32 s6, s4, 8 -; GFX6-NEXT: s_and_b32 s6, s6, s10 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: s_lshr_b32 s7, s4, 16 -; GFX6-NEXT: s_lshr_b32 s8, s4, 24 -; GFX6-NEXT: s_and_b32 s4, s4, s10 -; GFX6-NEXT: s_lshl_b32 s6, s6, 8 -; GFX6-NEXT: s_or_b32 s4, s4, s6 -; GFX6-NEXT: s_and_b32 s6, s7, s10 +; GFX6-NEXT: s_or_b32 s3, s3, s6 +; GFX6-NEXT: s_lshr_b32 s6, s4, 16 +; GFX6-NEXT: s_lshr_b32 s7, s4, 24 +; GFX6-NEXT: s_and_b32 s10, s4, s9 +; GFX6-NEXT: s_bfe_u32 s4, s4, s11 +; GFX6-NEXT: s_lshl_b32 s4, s4, 8 +; GFX6-NEXT: s_and_b32 s6, s6, s9 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX6-NEXT: s_or_b32 s4, s10, s4 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 ; GFX6-NEXT: s_or_b32 s4, s4, s6 ; GFX6-NEXT: s_sub_i32 s6, 0, 24 ; GFX6-NEXT: v_mul_lo_u32 v1, s6, v0 -; GFX6-NEXT: s_lshr_b32 s9, s5, 8 -; GFX6-NEXT: s_and_b32 s5, s5, s10 +; GFX6-NEXT: s_lshr_b32 s8, s5, 8 +; GFX6-NEXT: s_and_b32 s5, s5, s9 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: s_and_b32 s7, s9, s10 -; GFX6-NEXT: s_or_b32 s5, s8, s5 +; GFX6-NEXT: s_or_b32 s5, s7, s5 +; GFX6-NEXT: s_and_b32 s7, s8, s9 ; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 @@ -1587,27 +1583,23 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 ; GFX6-NEXT: s_lshr_b32 s0, s3, 1 ; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 ; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX6-NEXT: v_and_b32_e32 v2, s10, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX6-NEXT: v_and_b32_e32 v0, s10, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_and_b32_e32 v2, s10, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s10, v1 +; GFX6-NEXT: v_and_b32_e32 v2, s9, v0 +; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_and_b32_e32 v2, s9, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_and_b32_e32 v2, s10, v5 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s10, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: v_readfirstlane_b32 s1, v1 ; GFX6-NEXT: ; return to shader part epilog @@ -1728,20 +1720,17 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s1 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v4, s10 -; GFX8-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_and_b32_e32 v2, s10, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v4, 16 +; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX8-NEXT: v_and_b32_e32 v3, s10, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1 ; GFX8-NEXT: ; return to shader part epilog @@ -1749,60 +1738,60 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX9-LABEL: s_fshl_v2i24: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshr_b32 s7, s0, 8 -; GFX9-NEXT: s_movk_i32 s11, 0xff -; GFX9-NEXT: s_and_b32 s7, s7, s11 -; GFX9-NEXT: s_bfe_u32 s12, 8, 0x100000 -; GFX9-NEXT: s_lshr_b32 s8, s0, 16 -; GFX9-NEXT: s_lshr_b32 s9, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s11 -; GFX9-NEXT: s_lshl_b32 s7, s7, s12 +; GFX9-NEXT: s_movk_i32 s12, 0xff +; GFX9-NEXT: s_and_b32 s7, s7, s12 +; GFX9-NEXT: s_bfe_u32 s13, 8, 0x100000 +; GFX9-NEXT: s_lshr_b32 s9, s0, 16 +; GFX9-NEXT: s_lshr_b32 s10, s0, 24 +; GFX9-NEXT: s_and_b32 s0, s0, s12 +; GFX9-NEXT: s_lshl_b32 s7, s7, s13 ; GFX9-NEXT: s_or_b32 s0, s0, s7 -; GFX9-NEXT: s_and_b32 s7, s8, s11 +; GFX9-NEXT: s_and_b32 s7, s9, s12 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 -; GFX9-NEXT: s_lshr_b32 s10, s1, 8 -; GFX9-NEXT: s_and_b32 s1, s1, s11 +; GFX9-NEXT: s_lshr_b32 s11, s1, 8 +; GFX9-NEXT: s_and_b32 s1, s1, s12 ; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 -; GFX9-NEXT: s_lshl_b32 s1, s1, s12 +; GFX9-NEXT: s_lshl_b32 s1, s1, s13 ; GFX9-NEXT: s_or_b32 s0, s0, s7 -; GFX9-NEXT: s_and_b32 s7, s10, s11 -; GFX9-NEXT: s_or_b32 s1, s9, s1 +; GFX9-NEXT: s_and_b32 s7, s11, s12 +; GFX9-NEXT: s_or_b32 s1, s10, s1 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s1, s1, s7 ; GFX9-NEXT: s_lshr_b32 s7, s2, 8 -; GFX9-NEXT: s_and_b32 s7, s7, s11 -; GFX9-NEXT: s_lshr_b32 s8, s2, 16 -; GFX9-NEXT: s_lshr_b32 s9, s2, 24 -; GFX9-NEXT: s_and_b32 s2, s2, s11 -; GFX9-NEXT: s_lshl_b32 s7, s7, s12 +; GFX9-NEXT: s_and_b32 s7, s7, s12 +; GFX9-NEXT: s_lshr_b32 s9, s2, 16 +; GFX9-NEXT: s_lshr_b32 s10, s2, 24 +; GFX9-NEXT: s_and_b32 s2, s2, s12 +; GFX9-NEXT: s_lshl_b32 s7, s7, s13 ; GFX9-NEXT: s_or_b32 s2, s2, s7 -; GFX9-NEXT: s_and_b32 s7, s8, s11 +; GFX9-NEXT: s_and_b32 s7, s9, s12 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 -; GFX9-NEXT: s_lshr_b32 s10, s3, 8 -; GFX9-NEXT: s_and_b32 s3, s3, s11 +; GFX9-NEXT: s_lshr_b32 s11, s3, 8 +; GFX9-NEXT: s_and_b32 s3, s3, s12 ; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s2, s2, s7 -; GFX9-NEXT: s_and_b32 s7, s10, s11 -; GFX9-NEXT: s_lshl_b32 s3, s3, s12 +; GFX9-NEXT: s_and_b32 s7, s11, s12 +; GFX9-NEXT: s_lshl_b32 s3, s3, s13 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX9-NEXT: s_or_b32 s3, s9, s3 +; GFX9-NEXT: s_or_b32 s3, s10, s3 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16 ; GFX9-NEXT: s_or_b32 s3, s3, s7 ; GFX9-NEXT: s_lshr_b32 s7, s4, 8 -; GFX9-NEXT: s_and_b32 s7, s7, s11 +; GFX9-NEXT: s_and_b32 s7, s7, s12 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: s_lshr_b32 s8, s4, 16 -; GFX9-NEXT: s_lshr_b32 s9, s4, 24 -; GFX9-NEXT: s_and_b32 s4, s4, s11 -; GFX9-NEXT: s_lshl_b32 s7, s7, s12 +; GFX9-NEXT: s_lshr_b32 s9, s4, 16 +; GFX9-NEXT: s_lshr_b32 s10, s4, 24 +; GFX9-NEXT: s_and_b32 s4, s4, s12 +; GFX9-NEXT: s_lshl_b32 s7, s7, s13 ; GFX9-NEXT: s_or_b32 s4, s4, s7 -; GFX9-NEXT: s_and_b32 s7, s8, s11 +; GFX9-NEXT: s_and_b32 s7, s9, s12 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 ; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 @@ -1810,24 +1799,24 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX9-NEXT: s_or_b32 s4, s4, s7 ; GFX9-NEXT: s_sub_i32 s7, 0, 24 ; GFX9-NEXT: v_mul_lo_u32 v1, s7, v0 -; GFX9-NEXT: s_lshr_b32 s10, s5, 8 -; GFX9-NEXT: s_and_b32 s5, s5, s11 -; GFX9-NEXT: s_lshl_b32 s5, s5, s12 +; GFX9-NEXT: s_lshr_b32 s11, s5, 8 +; GFX9-NEXT: s_and_b32 s5, s5, s12 +; GFX9-NEXT: s_lshl_b32 s5, s5, s13 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX9-NEXT: s_and_b32 s8, s10, s11 -; GFX9-NEXT: s_or_b32 s5, s9, s5 -; GFX9-NEXT: s_bfe_u32 s8, s8, 0x100000 +; GFX9-NEXT: s_and_b32 s9, s11, s12 +; GFX9-NEXT: s_or_b32 s5, s10, s5 +; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 ; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000 -; GFX9-NEXT: s_lshl_b32 s8, s8, 16 +; GFX9-NEXT: s_lshl_b32 s9, s9, 16 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX9-NEXT: s_or_b32 s5, s5, s8 -; GFX9-NEXT: s_mov_b32 s8, 0xffffff +; GFX9-NEXT: s_or_b32 s5, s5, s9 +; GFX9-NEXT: s_mov_b32 s9, 0xffffff ; GFX9-NEXT: v_mul_lo_u32 v3, s7, v1 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 @@ -1842,8 +1831,8 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 ; GFX9-NEXT: s_lshr_b32 s2, s2, 1 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX9-NEXT: v_and_b32_e32 v2, s8, v2 -; GFX9-NEXT: v_and_b32_e32 v0, s8, v0 +; GFX9-NEXT: v_and_b32_e32 v2, s9, v2 +; GFX9-NEXT: v_and_b32_e32 v0, s9, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2 ; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v2 @@ -1861,17 +1850,16 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s0 ; GFX9-NEXT: v_lshl_or_b32 v1, s1, v1, v2 ; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_e32 v4, s11, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX9-NEXT: v_and_or_b32 v2, v0, s11, v2 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v4 -; GFX9-NEXT: v_and_or_b32 v1, v3, s11, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_b32_e32 v3, s12, v1 +; GFX9-NEXT: s_mov_b32 s8, 16 +; GFX9-NEXT: v_and_or_b32 v2, v0, s12, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 +; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: v_readfirstlane_b32 s1, v1 ; GFX9-NEXT: ; return to shader part epilog @@ -1880,130 +1868,129 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 -; GFX10-NEXT: s_movk_i32 s8, 0xff -; GFX10-NEXT: s_lshr_b32 s11, s1, 8 -; GFX10-NEXT: s_bfe_u32 s10, 8, 0x100000 +; GFX10-NEXT: s_sub_i32 s14, 0, 24 +; GFX10-NEXT: s_movk_i32 s9, 0xff +; GFX10-NEXT: s_lshr_b32 s10, s1, 8 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX10-NEXT: s_and_b32 s1, s1, s8 -; GFX10-NEXT: s_lshr_b32 s9, s0, 24 -; GFX10-NEXT: s_lshl_b32 s1, s1, s10 +; GFX10-NEXT: s_bfe_u32 s11, 8, 0x100000 +; GFX10-NEXT: s_and_b32 s1, s1, s9 ; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: s_or_b32 s1, s9, s1 -; GFX10-NEXT: s_sub_i32 s9, 0, 24 -; GFX10-NEXT: s_and_b32 s6, s6, s8 -; GFX10-NEXT: s_lshr_b32 s7, s0, 16 +; GFX10-NEXT: s_lshr_b32 s8, s0, 24 +; GFX10-NEXT: s_lshl_b32 s1, s1, s11 +; GFX10-NEXT: s_and_b32 s6, s6, s9 +; GFX10-NEXT: s_or_b32 s1, s8, s1 +; GFX10-NEXT: s_lshr_b32 s8, s4, 8 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 -; GFX10-NEXT: s_and_b32 s0, s0, s8 -; GFX10-NEXT: s_lshl_b32 s6, s6, s10 -; GFX10-NEXT: s_lshr_b32 s12, s4, 24 +; GFX10-NEXT: s_and_b32 s8, s8, s9 +; GFX10-NEXT: s_lshr_b32 s7, s0, 16 +; GFX10-NEXT: s_and_b32 s0, s0, s9 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX10-NEXT: s_lshl_b32 s6, s6, s11 +; GFX10-NEXT: s_lshr_b32 s12, s4, 24 ; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_and_b32 s6, s7, s8 -; GFX10-NEXT: s_lshr_b32 s7, s4, 8 -; GFX10-NEXT: v_mul_lo_u32 v2, s9, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, s9, v1 -; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 -; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 -; GFX10-NEXT: s_lshl_b32 s6, s6, 16 -; GFX10-NEXT: s_and_b32 s7, s7, s8 -; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_and_b32 s6, s11, s8 -; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX10-NEXT: s_lshr_b32 s11, s4, 16 -; GFX10-NEXT: s_and_b32 s4, s4, s8 -; GFX10-NEXT: s_lshl_b32 s7, s7, s10 -; GFX10-NEXT: s_and_b32 s9, s11, s8 -; GFX10-NEXT: s_or_b32 s4, s4, s7 -; GFX10-NEXT: s_bfe_u32 s7, s9, 0x100000 +; GFX10-NEXT: v_mul_lo_u32 v2, s14, v0 +; GFX10-NEXT: v_mul_lo_u32 v3, s14, v1 +; GFX10-NEXT: s_and_b32 s6, s7, s9 +; GFX10-NEXT: s_and_b32 s7, s10, s9 +; GFX10-NEXT: s_lshr_b32 s10, s4, 16 +; GFX10-NEXT: s_and_b32 s4, s4, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, s11 ; GFX10-NEXT: s_lshr_b32 s13, s5, 8 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 -; GFX10-NEXT: v_mul_hi_u32 v2, v1, v3 -; GFX10-NEXT: s_and_b32 s5, s5, s8 +; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX10-NEXT: s_or_b32 s4, s4, s8 +; GFX10-NEXT: s_and_b32 s8, s10, s9 +; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 +; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000 +; GFX10-NEXT: s_and_b32 s5, s5, s9 ; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 -; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: s_lshl_b32 s5, s5, s10 -; GFX10-NEXT: s_or_b32 s4, s4, s7 -; GFX10-NEXT: s_and_b32 s7, s13, s8 +; GFX10-NEXT: s_lshl_b32 s8, s8, 16 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 +; GFX10-NEXT: s_lshl_b32 s5, s5, s11 +; GFX10-NEXT: s_or_b32 s4, s4, s8 +; GFX10-NEXT: s_and_b32 s8, s13, s9 ; GFX10-NEXT: s_or_b32 s5, s12, s5 -; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000 +; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000 ; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 ; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 -; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: s_lshr_b32 s9, s2, 16 -; GFX10-NEXT: s_or_b32 s5, s5, s7 -; GFX10-NEXT: s_lshr_b32 s7, s2, 8 +; GFX10-NEXT: s_lshl_b32 s8, s8, 16 +; GFX10-NEXT: s_lshr_b32 s10, s2, 16 +; GFX10-NEXT: s_or_b32 s5, s5, s8 +; GFX10-NEXT: s_lshr_b32 s8, s2, 8 ; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX10-NEXT: s_lshr_b32 s11, s2, 24 -; GFX10-NEXT: s_and_b32 s2, s2, s8 -; GFX10-NEXT: s_lshr_b32 s12, s3, 8 -; GFX10-NEXT: s_and_b32 s3, s3, s8 -; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 -; GFX10-NEXT: s_lshl_b32 s3, s3, s10 +; GFX10-NEXT: s_and_b32 s8, s8, s9 +; GFX10-NEXT: s_and_b32 s12, s2, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, s11 +; GFX10-NEXT: s_and_b32 s10, s10, s9 +; GFX10-NEXT: s_or_b32 s8, s12, s8 +; GFX10-NEXT: s_lshr_b32 s2, s2, 24 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: s_and_b32 s4, s7, s8 -; GFX10-NEXT: s_and_b32 s7, s9, s8 -; GFX10-NEXT: s_lshl_b32 s4, s4, s10 -; GFX10-NEXT: s_or_b32 s3, s11, s3 +; GFX10-NEXT: s_bfe_u32 s4, s8, 0x100000 +; GFX10-NEXT: s_bfe_u32 s8, s10, 0x100000 +; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 -; GFX10-NEXT: s_or_b32 s2, s2, s4 -; GFX10-NEXT: s_bfe_u32 s4, s7, 0x100000 -; GFX10-NEXT: s_mov_b32 s5, 0xffffff +; GFX10-NEXT: s_lshl_b32 s5, s8, 16 +; GFX10-NEXT: s_lshr_b32 s8, s3, 8 +; GFX10-NEXT: s_and_b32 s3, s3, s9 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 -; GFX10-NEXT: s_or_b32 s2, s2, s4 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: s_lshl_b32 s3, s3, s11 +; GFX10-NEXT: s_or_b32 s4, s4, s5 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 +; GFX10-NEXT: s_or_b32 s2, s2, s3 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: s_and_b32 s4, s12, s8 -; GFX10-NEXT: s_lshr_b32 s2, s2, 1 -; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo +; GFX10-NEXT: s_and_b32 s3, s8, s9 +; GFX10-NEXT: s_mov_b32 s5, 0xffffff +; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 -; GFX10-NEXT: s_lshl_b32 s6, s6, 16 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v0, s5, v0 -; GFX10-NEXT: v_and_b32_e32 v2, s5, v3 +; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 0xffffff +; GFX10-NEXT: s_or_b32 s2, s2, s3 +; GFX10-NEXT: s_lshr_b32 s3, s4, 1 +; GFX10-NEXT: v_and_b32_e32 v2, s5, v2 ; GFX10-NEXT: v_sub_nc_u32_e32 v4, 23, v1 -; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s2 -; GFX10-NEXT: s_bfe_u32 s2, s3, 0x100000 -; GFX10-NEXT: s_lshl_b32 s3, s4, 16 +; GFX10-NEXT: s_lshr_b32 s2, s2, 1 +; GFX10-NEXT: v_and_b32_e32 v0, s5, v0 +; GFX10-NEXT: v_and_b32_e32 v1, v1, v3 +; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s3 ; GFX10-NEXT: v_and_b32_e32 v4, v4, v3 -; GFX10-NEXT: s_or_b32 s2, s2, s3 +; GFX10-NEXT: s_lshl_b32 s6, s6, 16 +; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 +; GFX10-NEXT: s_lshl_b32 s7, s7, 16 +; GFX10-NEXT: s_or_b32 s0, s0, s6 +; GFX10-NEXT: v_lshrrev_b32_e64 v3, v4, s2 +; GFX10-NEXT: s_or_b32 s1, s1, s7 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v2 -; GFX10-NEXT: s_lshr_b32 s0, s2, 1 -; GFX10-NEXT: v_and_b32_e32 v1, v1, v3 -; GFX10-NEXT: v_lshrrev_b32_e64 v2, v4, s0 -; GFX10-NEXT: s_or_b32 s0, s1, s6 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_lshl_or_b32 v1, s0, v1, v2 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_e32 v3, s8, v1 -; GFX10-NEXT: v_and_b32_sdwa v4, v1, s8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-NEXT: v_and_or_b32 v2, v0, s8, v2 -; GFX10-NEXT: v_and_b32_sdwa v0, v0, s8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshl_or_b32 v1, s1, v1, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: s_mov_b32 s0, 16 +; GFX10-NEXT: v_and_b32_e32 v3, s9, v1 +; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 +; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX10-NEXT: v_and_or_b32 v2, v0, s9, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 ; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, s8, v4 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %lhs = bitcast i48 %lhs.arg to <2 x i24> %rhs = bitcast i48 %rhs.arg to <2 x i24> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index 8315b9aa51dec..b8a83aac36044 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -538,26 +538,25 @@ define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) { define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) { ; GFX6-LABEL: s_fshr_v2i8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s7, 0xff ; GFX6-NEXT: s_lshr_b32 s3, s0, 8 -; GFX6-NEXT: s_lshr_b32 s4, s1, 8 -; GFX6-NEXT: s_lshr_b32 s5, s2, 8 -; GFX6-NEXT: s_and_b32 s6, s2, 7 -; GFX6-NEXT: s_and_b32 s1, s1, s7 +; GFX6-NEXT: s_lshr_b32 s4, s2, 8 +; GFX6-NEXT: s_and_b32 s5, s2, 7 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_movk_i32 s6, 0xff ; GFX6-NEXT: s_lshl_b32 s0, s0, s2 -; GFX6-NEXT: s_lshr_b32 s1, s1, s6 -; GFX6-NEXT: s_andn2_b32 s2, 7, s5 +; GFX6-NEXT: s_and_b32 s2, s1, s6 +; GFX6-NEXT: s_lshr_b32 s2, s2, s5 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_and_b32 s2, s4, 7 +; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008 +; GFX6-NEXT: s_andn2_b32 s4, 7, s4 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s2, s3, s2 -; GFX6-NEXT: s_and_b32 s1, s5, 7 -; GFX6-NEXT: s_and_b32 s3, s4, s7 -; GFX6-NEXT: s_lshr_b32 s1, s3, s1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_and_b32 s1, s1, s7 -; GFX6-NEXT: s_and_b32 s0, s0, s7 +; GFX6-NEXT: s_lshl_b32 s3, s3, s4 +; GFX6-NEXT: s_lshr_b32 s1, s1, s2 +; GFX6-NEXT: s_or_b32 s1, s3, s1 +; GFX6-NEXT: s_and_b32 s1, s1, s6 +; GFX6-NEXT: s_and_b32 s0, s0, s6 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -660,26 +659,25 @@ define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { ; GFX6-LABEL: v_fshr_v2i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX6-NEXT: v_and_b32_e32 v6, 7, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2 +; GFX6-NEXT: v_and_b32_e32 v5, 7, v2 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX6-NEXT: s_movk_i32 s4, 0xff -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_xor_b32_e32 v2, -1, v5 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, v6, v1 -; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX6-NEXT: v_and_b32_e32 v2, s4, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, v5, v2 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_and_b32_e32 v2, 7, v4 +; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8 +; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_and_b32_e32 v1, 7, v5 -; GFX6-NEXT: v_and_b32_e32 v3, s4, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, v1, v3 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_or_b32_e32 v1, v3, v1 ; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 ; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 @@ -774,51 +772,49 @@ define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) { ; GFX6-LABEL: s_fshr_v4i8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s13, 0xff ; GFX6-NEXT: s_lshr_b32 s3, s0, 8 ; GFX6-NEXT: s_lshr_b32 s4, s0, 16 ; GFX6-NEXT: s_lshr_b32 s5, s0, 24 -; GFX6-NEXT: s_lshr_b32 s6, s1, 8 -; GFX6-NEXT: s_lshr_b32 s7, s1, 16 -; GFX6-NEXT: s_lshr_b32 s8, s1, 24 -; GFX6-NEXT: s_lshr_b32 s9, s2, 8 -; GFX6-NEXT: s_lshr_b32 s10, s2, 16 -; GFX6-NEXT: s_lshr_b32 s11, s2, 24 -; GFX6-NEXT: s_and_b32 s12, s2, 7 -; GFX6-NEXT: s_and_b32 s1, s1, s13 +; GFX6-NEXT: s_lshr_b32 s7, s2, 8 +; GFX6-NEXT: s_lshr_b32 s8, s2, 16 +; GFX6-NEXT: s_lshr_b32 s9, s2, 24 +; GFX6-NEXT: s_and_b32 s10, s2, 7 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_movk_i32 s11, 0xff ; GFX6-NEXT: s_lshl_b32 s0, s0, s2 -; GFX6-NEXT: s_lshr_b32 s1, s1, s12 -; GFX6-NEXT: s_andn2_b32 s2, 7, s9 +; GFX6-NEXT: s_and_b32 s2, s1, s11 +; GFX6-NEXT: s_lshr_b32 s2, s2, s10 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_and_b32 s2, s7, 7 +; GFX6-NEXT: s_andn2_b32 s7, 7, s7 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s2, s3, s2 -; GFX6-NEXT: s_and_b32 s1, s9, 7 -; GFX6-NEXT: s_and_b32 s3, s6, s13 -; GFX6-NEXT: s_lshr_b32 s1, s3, s1 -; GFX6-NEXT: s_andn2_b32 s3, 7, s10 -; GFX6-NEXT: s_lshl_b32 s4, s4, 1 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_lshl_b32 s3, s4, s3 -; GFX6-NEXT: s_and_b32 s2, s10, 7 -; GFX6-NEXT: s_and_b32 s4, s7, s13 -; GFX6-NEXT: s_lshr_b32 s2, s4, s2 -; GFX6-NEXT: s_and_b32 s1, s1, s13 +; GFX6-NEXT: s_lshl_b32 s3, s3, s7 +; GFX6-NEXT: s_bfe_u32 s7, s1, 0x80008 +; GFX6-NEXT: s_lshr_b32 s2, s7, s2 ; GFX6-NEXT: s_or_b32 s2, s3, s2 -; GFX6-NEXT: s_and_b32 s3, s11, 7 -; GFX6-NEXT: s_andn2_b32 s4, 7, s11 +; GFX6-NEXT: s_lshr_b32 s6, s1, 24 +; GFX6-NEXT: s_and_b32 s3, s8, 7 +; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80010 +; GFX6-NEXT: s_andn2_b32 s7, 7, s8 +; GFX6-NEXT: s_lshl_b32 s4, s4, 1 +; GFX6-NEXT: s_lshr_b32 s1, s1, s3 +; GFX6-NEXT: s_lshl_b32 s4, s4, s7 +; GFX6-NEXT: s_or_b32 s1, s4, s1 +; GFX6-NEXT: s_and_b32 s3, s9, 7 +; GFX6-NEXT: s_and_b32 s2, s2, s11 +; GFX6-NEXT: s_andn2_b32 s4, 7, s9 ; GFX6-NEXT: s_lshl_b32 s5, s5, 1 -; GFX6-NEXT: s_and_b32 s0, s0, s13 -; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_and_b32 s1, s2, s13 +; GFX6-NEXT: s_and_b32 s1, s1, s11 ; GFX6-NEXT: s_lshl_b32 s4, s5, s4 -; GFX6-NEXT: s_lshr_b32 s3, s8, s3 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s6, s3 +; GFX6-NEXT: s_and_b32 s0, s0, s11 +; GFX6-NEXT: s_lshl_b32 s2, s2, 8 ; GFX6-NEXT: s_or_b32 s3, s4, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 ; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_and_b32 s1, s3, s13 +; GFX6-NEXT: s_and_b32 s1, s3, s11 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: ; return to shader part epilog @@ -996,54 +992,51 @@ define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX6-LABEL: v_fshr_v4i8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX6-NEXT: v_and_b32_e32 v12, 7, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v7, 8, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX6-NEXT: v_and_b32_e32 v10, 7, v2 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 -; GFX6-NEXT: s_movk_i32 s4, 0xff ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v8, 24, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX6-NEXT: v_and_b32_e32 v11, 0xff, v1 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, v12, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_and_b32_e32 v1, 7, v9 -; GFX6-NEXT: v_xor_b32_e32 v9, -1, v9 -; GFX6-NEXT: v_and_b32_e32 v6, s4, v6 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, v1, v6 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v10 -; GFX6-NEXT: v_and_b32_e32 v9, 7, v9 +; GFX6-NEXT: v_lshrrev_b32_e32 v10, v10, v11 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v10 +; GFX6-NEXT: v_and_b32_e32 v10, 7, v7 +; GFX6-NEXT: v_xor_b32_e32 v7, -1, v7 +; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 -; GFX6-NEXT: v_mov_b32_e32 v2, 0xff -; GFX6-NEXT: v_lshlrev_b32_e32 v3, v9, v3 -; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, v7, v3 +; GFX6-NEXT: v_bfe_u32 v7, v1, 8, 8 +; GFX6-NEXT: v_lshrrev_b32_e32 v7, v10, v7 +; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 +; GFX6-NEXT: v_and_b32_e32 v7, 7, v8 +; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 +; GFX6-NEXT: v_lshrrev_b32_e32 v6, 24, v1 +; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX6-NEXT: v_and_b32_e32 v8, 7, v8 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 -; GFX6-NEXT: v_or_b32_e32 v1, v3, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, v6, v4 -; GFX6-NEXT: v_and_b32_e32 v3, 7, v10 -; GFX6-NEXT: v_and_b32_e32 v6, v7, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v6 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v11 -; GFX6-NEXT: v_and_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_or_b32_e32 v3, v4, v3 -; GFX6-NEXT: v_and_b32_e32 v4, 7, v11 -; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 +; GFX6-NEXT: v_mov_b32_e32 v2, 0xff +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v7, v1 +; GFX6-NEXT: v_xor_b32_e32 v7, -1, v9 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, v8, v4 +; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX6-NEXT: v_and_b32_e32 v4, 7, v9 +; GFX6-NEXT: v_and_b32_e32 v3, v3, v2 +; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 +; GFX6-NEXT: v_and_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v5, v7, v5 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v4, v6 ; GFX6-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_and_b32_e32 v1, v3, v2 -; GFX6-NEXT: v_lshlrev_b32_e32 v5, v6, v5 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, v4, v8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_and_b32_e32 v1, v4, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 @@ -1477,69 +1470,64 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) { define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { ; GFX6-LABEL: s_fshr_v2i24: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s10, 0xff -; GFX6-NEXT: s_lshr_b32 s9, s1, 8 -; GFX6-NEXT: s_and_b32 s1, s1, s10 -; GFX6-NEXT: s_lshr_b32 s6, s0, 8 -; GFX6-NEXT: s_lshr_b32 s8, s0, 24 -; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: s_or_b32 s1, s8, s1 -; GFX6-NEXT: s_and_b32 s6, s6, s10 -; GFX6-NEXT: s_lshr_b32 s8, s2, 8 -; GFX6-NEXT: s_and_b32 s8, s8, s10 -; GFX6-NEXT: s_lshr_b32 s7, s0, 16 -; GFX6-NEXT: s_and_b32 s0, s0, s10 -; GFX6-NEXT: s_lshl_b32 s6, s6, 8 -; GFX6-NEXT: s_or_b32 s0, s0, s6 -; GFX6-NEXT: s_and_b32 s6, s7, s10 -; GFX6-NEXT: s_and_b32 s7, s9, s10 -; GFX6-NEXT: s_lshr_b32 s9, s2, 16 -; GFX6-NEXT: s_lshr_b32 s11, s2, 24 -; GFX6-NEXT: s_and_b32 s2, s2, s10 -; GFX6-NEXT: s_lshl_b32 s8, s8, 8 -; GFX6-NEXT: s_or_b32 s2, s2, s8 -; GFX6-NEXT: s_and_b32 s8, s9, s10 +; GFX6-NEXT: s_movk_i32 s9, 0xff +; GFX6-NEXT: s_mov_b32 s11, 0x80008 +; GFX6-NEXT: s_lshr_b32 s6, s0, 16 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 +; GFX6-NEXT: s_lshr_b32 s8, s1, 8 +; GFX6-NEXT: s_and_b32 s1, s1, s9 +; GFX6-NEXT: s_lshr_b32 s7, s0, 24 +; GFX6-NEXT: s_and_b32 s10, s0, s9 +; GFX6-NEXT: s_bfe_u32 s0, s0, s11 +; GFX6-NEXT: s_lshl_b32 s0, s0, 8 +; GFX6-NEXT: s_lshl_b32 s1, s1, 8 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: s_or_b32 s0, s10, s0 +; GFX6-NEXT: s_or_b32 s1, s7, s1 +; GFX6-NEXT: s_and_b32 s7, s8, s9 +; GFX6-NEXT: s_lshr_b32 s8, s2, 16 +; GFX6-NEXT: s_lshr_b32 s10, s2, 24 +; GFX6-NEXT: s_and_b32 s13, s2, s9 +; GFX6-NEXT: s_bfe_u32 s2, s2, s11 +; GFX6-NEXT: s_lshl_b32 s2, s2, 8 +; GFX6-NEXT: s_and_b32 s8, s8, s9 +; GFX6-NEXT: s_or_b32 s2, s13, s2 +; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 +; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: s_lshr_b32 s12, s3, 8 -; GFX6-NEXT: s_and_b32 s3, s3, s10 +; GFX6-NEXT: s_and_b32 s3, s3, s9 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8 ; GFX6-NEXT: s_or_b32 s2, s2, s8 -; GFX6-NEXT: s_and_b32 s8, s12, s10 -; GFX6-NEXT: s_or_b32 s3, s11, s3 +; GFX6-NEXT: s_and_b32 s8, s12, s9 +; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX6-NEXT: s_or_b32 s3, s10, s3 ; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_or_b32 s3, s3, s8 -; GFX6-NEXT: s_lshr_b32 s8, s4, 8 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: s_and_b32 s8, s8, s10 -; GFX6-NEXT: s_lshr_b32 s9, s4, 16 -; GFX6-NEXT: s_lshr_b32 s11, s4, 24 -; GFX6-NEXT: s_and_b32 s4, s4, s10 -; GFX6-NEXT: s_lshl_b32 s8, s8, 8 -; GFX6-NEXT: s_or_b32 s4, s4, s8 -; GFX6-NEXT: s_and_b32 s8, s9, s10 -; GFX6-NEXT: s_sub_i32 s9, 0, 24 -; GFX6-NEXT: v_mul_lo_u32 v1, s9, v0 +; GFX6-NEXT: s_lshr_b32 s8, s4, 16 +; GFX6-NEXT: s_lshr_b32 s10, s4, 24 +; GFX6-NEXT: s_and_b32 s13, s4, s9 +; GFX6-NEXT: s_bfe_u32 s4, s4, s11 +; GFX6-NEXT: s_sub_i32 s11, 0, 24 +; GFX6-NEXT: v_mul_lo_u32 v1, s11, v0 +; GFX6-NEXT: s_lshl_b32 s4, s4, 8 +; GFX6-NEXT: s_and_b32 s8, s8, s9 +; GFX6-NEXT: s_or_b32 s4, s13, s4 +; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: s_or_b32 s4, s4, s8 -; GFX6-NEXT: s_lshr_b32 s12, s5, 8 -; GFX6-NEXT: s_and_b32 s5, s5, s10 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX6-NEXT: s_or_b32 s4, s4, s8 ; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX6-NEXT: s_lshl_b32 s5, s5, 8 +; GFX6-NEXT: s_lshr_b32 s12, s5, 8 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX6-NEXT: s_and_b32 s8, s12, s10 +; GFX6-NEXT: s_and_b32 s5, s5, s9 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 @@ -1549,20 +1537,23 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_mul_lo_u32 v2, s9, v1 -; GFX6-NEXT: s_or_b32 s5, s11, s5 +; GFX6-NEXT: v_mul_lo_u32 v2, s11, v1 +; GFX6-NEXT: s_lshl_b32 s5, s5, 8 +; GFX6-NEXT: s_and_b32 s8, s12, s9 +; GFX6-NEXT: s_or_b32 s5, s10, s5 +; GFX6-NEXT: v_mul_hi_u32 v2, v1, v2 ; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 ; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 -; GFX6-NEXT: v_mul_hi_u32 v2, v1, v2 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16 ; GFX6-NEXT: s_or_b32 s5, s5, s8 -; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX6-NEXT: s_and_b32 s6, s6, s9 +; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX6-NEXT: s_mov_b32 s8, 0xffffff ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX6-NEXT: s_lshl_b32 s4, s6, 17 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1 ; GFX6-NEXT: v_and_b32_e32 v0, s8, v0 @@ -1587,27 +1578,23 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 ; GFX6-NEXT: s_or_b32 s0, s0, s1 ; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 ; GFX6-NEXT: v_lshr_b32_e32 v1, s3, v1 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX6-NEXT: v_and_b32_e32 v2, s10, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX6-NEXT: v_and_b32_e32 v0, s10, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_and_b32_e32 v2, s10, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s10, v1 +; GFX6-NEXT: v_and_b32_e32 v2, s9, v0 +; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_and_b32_e32 v2, s9, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_and_b32_e32 v2, s10, v5 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_and_b32_e32 v1, s10, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: v_readfirstlane_b32 s1, v1 ; GFX6-NEXT: ; return to shader part epilog @@ -1728,103 +1715,100 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s3 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v4, s10 -; GFX8-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_and_b32_e32 v2, s10, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v2, 8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v4, 16 +; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX8-NEXT: v_and_b32_e32 v3, s10, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshr_v2i24: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s11, 0xff -; GFX9-NEXT: s_lshr_b32 s10, s1, 8 -; GFX9-NEXT: s_bfe_u32 s12, 8, 0x100000 -; GFX9-NEXT: s_and_b32 s1, s1, s11 +; GFX9-NEXT: s_movk_i32 s12, 0xff +; GFX9-NEXT: s_lshr_b32 s11, s1, 8 +; GFX9-NEXT: s_bfe_u32 s13, 8, 0x100000 +; GFX9-NEXT: s_and_b32 s1, s1, s12 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8 -; GFX9-NEXT: s_lshr_b32 s9, s0, 24 -; GFX9-NEXT: s_lshl_b32 s1, s1, s12 -; GFX9-NEXT: s_or_b32 s1, s9, s1 -; GFX9-NEXT: s_and_b32 s7, s7, s11 -; GFX9-NEXT: s_lshr_b32 s9, s2, 8 -; GFX9-NEXT: s_and_b32 s9, s9, s11 -; GFX9-NEXT: s_lshr_b32 s8, s0, 16 -; GFX9-NEXT: s_and_b32 s0, s0, s11 -; GFX9-NEXT: s_lshl_b32 s7, s7, s12 +; GFX9-NEXT: s_lshr_b32 s10, s0, 24 +; GFX9-NEXT: s_lshl_b32 s1, s1, s13 +; GFX9-NEXT: s_or_b32 s1, s10, s1 +; GFX9-NEXT: s_and_b32 s7, s7, s12 +; GFX9-NEXT: s_lshr_b32 s10, s2, 8 +; GFX9-NEXT: s_and_b32 s10, s10, s12 +; GFX9-NEXT: s_lshr_b32 s9, s0, 16 +; GFX9-NEXT: s_and_b32 s0, s0, s12 +; GFX9-NEXT: s_lshl_b32 s7, s7, s13 ; GFX9-NEXT: s_or_b32 s0, s0, s7 -; GFX9-NEXT: s_and_b32 s7, s8, s11 -; GFX9-NEXT: s_and_b32 s8, s10, s11 -; GFX9-NEXT: s_lshr_b32 s10, s2, 16 -; GFX9-NEXT: s_lshr_b32 s13, s2, 24 -; GFX9-NEXT: s_and_b32 s2, s2, s11 -; GFX9-NEXT: s_lshl_b32 s9, s9, s12 -; GFX9-NEXT: s_or_b32 s2, s2, s9 -; GFX9-NEXT: s_and_b32 s9, s10, s11 +; GFX9-NEXT: s_and_b32 s7, s9, s12 +; GFX9-NEXT: s_and_b32 s9, s11, s12 +; GFX9-NEXT: s_lshr_b32 s11, s2, 16 +; GFX9-NEXT: s_lshr_b32 s14, s2, 24 +; GFX9-NEXT: s_and_b32 s2, s2, s12 +; GFX9-NEXT: s_lshl_b32 s10, s10, s13 +; GFX9-NEXT: s_or_b32 s2, s2, s10 +; GFX9-NEXT: s_and_b32 s10, s11, s12 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 +; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_lshr_b32 s14, s3, 8 -; GFX9-NEXT: s_and_b32 s3, s3, s11 +; GFX9-NEXT: s_lshr_b32 s15, s3, 8 +; GFX9-NEXT: s_and_b32 s3, s3, s12 ; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 -; GFX9-NEXT: s_lshl_b32 s9, s9, 16 -; GFX9-NEXT: s_lshl_b32 s3, s3, s12 -; GFX9-NEXT: s_or_b32 s2, s2, s9 -; GFX9-NEXT: s_and_b32 s9, s14, s11 -; GFX9-NEXT: s_or_b32 s3, s13, s3 -; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 +; GFX9-NEXT: s_lshl_b32 s10, s10, 16 +; GFX9-NEXT: s_lshl_b32 s3, s3, s13 +; GFX9-NEXT: s_or_b32 s2, s2, s10 +; GFX9-NEXT: s_and_b32 s10, s15, s12 +; GFX9-NEXT: s_or_b32 s3, s14, s3 +; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 -; GFX9-NEXT: s_lshl_b32 s9, s9, 16 -; GFX9-NEXT: s_or_b32 s3, s3, s9 -; GFX9-NEXT: s_lshr_b32 s9, s4, 8 +; GFX9-NEXT: s_lshl_b32 s10, s10, 16 +; GFX9-NEXT: s_or_b32 s3, s3, s10 +; GFX9-NEXT: s_lshr_b32 s10, s4, 8 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_and_b32 s9, s9, s11 -; GFX9-NEXT: s_lshr_b32 s10, s4, 16 -; GFX9-NEXT: s_lshr_b32 s13, s4, 24 -; GFX9-NEXT: s_and_b32 s4, s4, s11 -; GFX9-NEXT: s_lshl_b32 s9, s9, s12 -; GFX9-NEXT: s_or_b32 s4, s4, s9 -; GFX9-NEXT: s_and_b32 s9, s10, s11 -; GFX9-NEXT: s_sub_i32 s10, 0, 24 -; GFX9-NEXT: v_mul_lo_u32 v1, s10, v0 -; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 +; GFX9-NEXT: s_and_b32 s10, s10, s12 +; GFX9-NEXT: s_lshr_b32 s11, s4, 16 +; GFX9-NEXT: s_lshr_b32 s14, s4, 24 +; GFX9-NEXT: s_and_b32 s4, s4, s12 +; GFX9-NEXT: s_lshl_b32 s10, s10, s13 +; GFX9-NEXT: s_or_b32 s4, s4, s10 +; GFX9-NEXT: s_and_b32 s10, s11, s12 +; GFX9-NEXT: s_sub_i32 s11, 0, 24 +; GFX9-NEXT: v_mul_lo_u32 v1, s11, v0 +; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 ; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 -; GFX9-NEXT: s_lshl_b32 s9, s9, 16 +; GFX9-NEXT: s_lshl_b32 s10, s10, 16 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX9-NEXT: s_or_b32 s4, s4, s9 -; GFX9-NEXT: s_lshr_b32 s14, s5, 8 -; GFX9-NEXT: s_and_b32 s5, s5, s11 +; GFX9-NEXT: s_or_b32 s4, s4, s10 +; GFX9-NEXT: s_lshr_b32 s15, s5, 8 +; GFX9-NEXT: s_and_b32 s5, s5, s12 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX9-NEXT: s_lshl_b32 s5, s5, s12 +; GFX9-NEXT: s_lshl_b32 s5, s5, s13 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX9-NEXT: s_and_b32 s9, s14, s11 +; GFX9-NEXT: s_and_b32 s10, s15, s12 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, s10, v1 -; GFX9-NEXT: s_or_b32 s5, s13, s5 -; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 +; GFX9-NEXT: v_mul_lo_u32 v2, s11, v1 +; GFX9-NEXT: s_or_b32 s5, s14, s5 +; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 ; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000 ; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 -; GFX9-NEXT: s_lshl_b32 s9, s9, 16 -; GFX9-NEXT: s_or_b32 s5, s5, s9 +; GFX9-NEXT: s_lshl_b32 s10, s10, 16 +; GFX9-NEXT: s_or_b32 s5, s5, s10 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 ; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 @@ -1833,14 +1817,14 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 -; GFX9-NEXT: s_mov_b32 s9, 0xffffff +; GFX9-NEXT: s_mov_b32 s10, 0xffffff ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0 -; GFX9-NEXT: v_and_b32_e32 v0, s9, v0 +; GFX9-NEXT: v_and_b32_e32 v0, s10, v0 ; GFX9-NEXT: s_lshl_b32 s4, s7, 17 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 ; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 ; GFX9-NEXT: s_or_b32 s0, s4, s0 -; GFX9-NEXT: v_and_b32_e32 v3, s9, v3 +; GFX9-NEXT: v_and_b32_e32 v3, s10, v3 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s2 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v3, v0 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 @@ -1850,28 +1834,27 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: s_bfe_u32 s8, s8, 0x100000 +; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffff ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v1 ; GFX9-NEXT: v_and_b32_e32 v1, v1, v2 -; GFX9-NEXT: s_lshl_b32 s0, s8, 17 +; GFX9-NEXT: s_lshl_b32 s0, s9, 17 ; GFX9-NEXT: s_lshl_b32 s1, s1, 1 -; GFX9-NEXT: v_and_b32_e32 v3, v3, v2 ; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v2 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s3 ; GFX9-NEXT: v_lshl_or_b32 v1, s0, v3, v1 ; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_e32 v4, s11, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX9-NEXT: v_and_or_b32 v2, v0, s11, v2 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v4 -; GFX9-NEXT: v_and_or_b32 v1, v3, s11, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_b32_e32 v3, s12, v1 +; GFX9-NEXT: s_mov_b32 s8, 16 +; GFX9-NEXT: v_and_or_b32 v2, v0, s12, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 +; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 +; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: v_readfirstlane_b32 s1, v1 ; GFX9-NEXT: ; return to shader part epilog @@ -1880,130 +1863,129 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 -; GFX10-NEXT: s_sub_i32 s12, 0, 24 -; GFX10-NEXT: s_movk_i32 s9, 0xff -; GFX10-NEXT: s_lshr_b32 s14, s4, 8 +; GFX10-NEXT: s_sub_i32 s13, 0, 24 +; GFX10-NEXT: s_movk_i32 s10, 0xff +; GFX10-NEXT: s_lshr_b32 s12, s4, 8 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX10-NEXT: s_lshr_b32 s15, s4, 16 -; GFX10-NEXT: s_bfe_u32 s10, 8, 0x100000 -; GFX10-NEXT: s_and_b32 s14, s14, s9 -; GFX10-NEXT: s_and_b32 s16, s4, s9 -; GFX10-NEXT: s_lshl_b32 s14, s14, s10 -; GFX10-NEXT: s_and_b32 s15, s15, s9 -; GFX10-NEXT: s_or_b32 s14, s16, s14 -; GFX10-NEXT: s_lshr_b32 s4, s4, 24 +; GFX10-NEXT: s_bfe_u32 s11, 8, 0x100000 +; GFX10-NEXT: s_and_b32 s12, s12, s10 +; GFX10-NEXT: s_lshr_b32 s14, s4, 16 +; GFX10-NEXT: s_lshr_b32 s15, s4, 24 +; GFX10-NEXT: s_and_b32 s4, s4, s10 +; GFX10-NEXT: s_lshl_b32 s12, s12, s11 +; GFX10-NEXT: s_lshr_b32 s16, s5, 8 +; GFX10-NEXT: s_or_b32 s4, s4, s12 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 -; GFX10-NEXT: s_bfe_u32 s14, s14, 0x100000 -; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: s_lshr_b32 s11, s1, 8 +; GFX10-NEXT: s_and_b32 s5, s5, s10 +; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GFX10-NEXT: s_lshl_b32 s5, s5, s11 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX10-NEXT: s_and_b32 s1, s1, s9 -; GFX10-NEXT: s_and_b32 s6, s6, s9 -; GFX10-NEXT: s_lshr_b32 s8, s0, 24 -; GFX10-NEXT: v_mul_lo_u32 v2, s12, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, s12, v1 -; GFX10-NEXT: s_bfe_u32 s12, s15, 0x100000 -; GFX10-NEXT: s_lshr_b32 s15, s5, 8 +; GFX10-NEXT: s_or_b32 s5, s15, s5 +; GFX10-NEXT: s_lshr_b32 s9, s1, 8 +; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 +; GFX10-NEXT: v_mul_lo_u32 v2, s13, v0 +; GFX10-NEXT: v_mul_lo_u32 v3, s13, v1 +; GFX10-NEXT: s_and_b32 s13, s14, s10 +; GFX10-NEXT: s_and_b32 s1, s1, s10 +; GFX10-NEXT: s_bfe_u32 s12, s13, 0x100000 +; GFX10-NEXT: s_lshr_b32 s6, s0, 8 ; GFX10-NEXT: s_lshl_b32 s12, s12, 16 -; GFX10-NEXT: s_and_b32 s5, s5, s9 -; GFX10-NEXT: s_or_b32 s12, s14, s12 -; GFX10-NEXT: s_lshl_b32 s5, s5, s10 +; GFX10-NEXT: s_lshr_b32 s8, s0, 24 ; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX10-NEXT: s_and_b32 s14, s15, s9 -; GFX10-NEXT: s_or_b32 s4, s4, s5 -; GFX10-NEXT: s_bfe_u32 s5, s14, 0x100000 -; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 -; GFX10-NEXT: s_lshl_b32 s5, s5, 16 -; GFX10-NEXT: s_lshl_b32 s1, s1, s10 -; GFX10-NEXT: s_or_b32 s4, s4, s5 +; GFX10-NEXT: s_or_b32 s4, s4, s12 +; GFX10-NEXT: s_and_b32 s12, s16, s10 +; GFX10-NEXT: s_lshl_b32 s1, s1, s11 +; GFX10-NEXT: s_bfe_u32 s12, s12, 0x100000 +; GFX10-NEXT: s_or_b32 s1, s8, s1 +; GFX10-NEXT: s_lshl_b32 s12, s12, 16 +; GFX10-NEXT: s_and_b32 s6, s6, s10 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_hi_u32 v2, v1, v3 -; GFX10-NEXT: s_or_b32 s1, s8, s1 +; GFX10-NEXT: s_or_b32 s5, s5, s12 ; GFX10-NEXT: s_lshr_b32 s8, s2, 8 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16 -; GFX10-NEXT: v_mul_hi_u32 v0, s12, v0 -; GFX10-NEXT: s_and_b32 s0, s0, s9 -; GFX10-NEXT: s_lshl_b32 s6, s6, s10 -; GFX10-NEXT: s_and_b32 s8, s8, s9 +; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX10-NEXT: s_and_b32 s8, s8, s10 +; GFX10-NEXT: s_and_b32 s0, s0, s10 +; GFX10-NEXT: s_lshl_b32 s6, s6, s11 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_and_b32 s6, s7, s9 -; GFX10-NEXT: s_and_b32 s7, s11, s9 +; GFX10-NEXT: s_and_b32 s6, s7, s10 +; GFX10-NEXT: s_and_b32 s7, s9, s10 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX10-NEXT: v_mul_hi_u32 v1, s4, v1 -; GFX10-NEXT: s_lshr_b32 s11, s2, 16 -; GFX10-NEXT: s_and_b32 s13, s2, s9 -; GFX10-NEXT: s_lshl_b32 s5, s8, s10 -; GFX10-NEXT: s_and_b32 s8, s11, s9 -; GFX10-NEXT: s_lshr_b32 s11, s3, 8 -; GFX10-NEXT: s_and_b32 s3, s3, s9 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, s12, v0 +; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX10-NEXT: s_lshr_b32 s9, s2, 16 +; GFX10-NEXT: s_lshr_b32 s13, s2, 24 +; GFX10-NEXT: s_and_b32 s2, s2, s10 +; GFX10-NEXT: s_lshl_b32 s8, s8, s11 +; GFX10-NEXT: s_lshr_b32 s12, s3, 8 +; GFX10-NEXT: s_or_b32 s2, s2, s8 +; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX10-NEXT: s_or_b32 s5, s13, s5 -; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000 -; GFX10-NEXT: s_lshr_b32 s2, s2, 24 +; GFX10-NEXT: s_and_b32 s8, s9, s10 +; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 +; GFX10-NEXT: s_bfe_u32 s4, s8, 0x100000 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: s_lshl_b32 s3, s3, s10 -; GFX10-NEXT: v_mov_b32_e32 v4, 0xffffff -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 -; GFX10-NEXT: s_mov_b32 s4, 0xffffff +; GFX10-NEXT: s_lshl_b32 s4, s4, 16 +; GFX10-NEXT: s_and_b32 s3, s3, s10 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 +; GFX10-NEXT: s_or_b32 s2, s2, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: s_mov_b32 s4, 0xffffff +; GFX10-NEXT: s_lshl_b32 s3, s3, s11 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 +; GFX10-NEXT: s_and_b32 s5, s12, s10 +; GFX10-NEXT: s_or_b32 s3, s13, s3 ; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 -; GFX10-NEXT: s_lshl_b32 s8, s8, 16 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 -; GFX10-NEXT: s_or_b32 s2, s2, s3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 -; GFX10-NEXT: s_and_b32 s3, s11, s9 -; GFX10-NEXT: s_or_b32 s5, s5, s8 -; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 -; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 ; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 -; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 -; GFX10-NEXT: s_lshl_b32 s3, s3, 16 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX10-NEXT: s_lshl_b32 s6, s6, 17 -; GFX10-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 +; GFX10-NEXT: s_lshl_b32 s5, s5, 16 +; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s5 -; GFX10-NEXT: s_or_b32 s0, s6, s0 -; GFX10-NEXT: s_or_b32 s2, s2, s3 +; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 +; GFX10-NEXT: s_or_b32 s3, s3, s5 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffffff +; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 ; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000 -; GFX10-NEXT: v_lshl_or_b32 v0, s0, v2, v0 -; GFX10-NEXT: s_lshl_b32 s0, s7, 17 +; GFX10-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX10-NEXT: v_sub_nc_u32_e32 v4, 23, v1 +; GFX10-NEXT: v_and_b32_e32 v1, v1, v2 +; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2 +; GFX10-NEXT: s_lshl_b32 s5, s6, 17 +; GFX10-NEXT: s_lshl_b32 s0, s0, 1 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v2 +; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3 +; GFX10-NEXT: s_or_b32 s0, s5, s0 +; GFX10-NEXT: s_lshl_b32 s2, s7, 17 ; GFX10-NEXT: s_lshl_b32 s1, s1, 1 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1 -; GFX10-NEXT: v_and_b32_e32 v1, v1, v4 -; GFX10-NEXT: s_or_b32 s0, s0, s1 -; GFX10-NEXT: v_and_b32_e32 v2, v3, v4 -; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s2 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0 +; GFX10-NEXT: s_or_b32 s0, s2, s1 ; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_e32 v3, s9, v1 -; GFX10-NEXT: v_and_b32_sdwa v4, v1, s9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-NEXT: v_and_or_b32 v2, v0, s9, v2 -; GFX10-NEXT: v_and_b32_sdwa v0, v0, s9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: s_mov_b32 s0, 16 +; GFX10-NEXT: v_and_b32_e32 v3, s10, v1 +; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 +; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX10-NEXT: v_and_or_b32 v2, v0, s10, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 ; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, s9, v4 -; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %lhs = bitcast i48 %lhs.arg to <2 x i24> %rhs = bitcast i48 %rhs.arg to <2 x i24> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll index 76b83e3c08ee5..b2d8b512cf3c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll @@ -721,32 +721,32 @@ define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX9-LABEL: insertelement_v_v4i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: s_movk_i32 s1, 0xff ; GFX9-NEXT: s_and_b32 s3, s3, 3 ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s1 +; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: s_mov_b32 s1, 16 +; GFX9-NEXT: s_and_b32 s2, s2, s4 ; GFX9-NEXT: s_lshl_b32 s3, s3, 3 ; GFX9-NEXT: s_lshl_b32 s2, s2, s3 -; GFX9-NEXT: s_lshl_b32 s3, s1, s3 +; GFX9-NEXT: s_lshl_b32 s3, s4, s3 ; GFX9-NEXT: s_not_b32 s3, s3 -; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_mov_b32_e32 v3, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: v_mov_b32_e32 v2, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v5, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v5 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v3 -; GFX9-NEXT: v_or3_b32 v0, v0, v5, v4 -; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX9-NEXT: v_or3_b32 v0, v0, v6, v4 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v3 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v4, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v3 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v1 -; GFX9-NEXT: v_or3_b32 v2, v0, v4, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX9-NEXT: v_or3_b32 v2, v0, v2, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -755,35 +755,34 @@ define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX8-LABEL: insertelement_v_v4i8_s_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, 16 ; GFX8-NEXT: s_and_b32 s1, s3, 3 -; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_lshl_b32 s1, s1, 3 ; GFX8-NEXT: s_and_b32 s2, s2, s0 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1 ; GFX8-NEXT: s_not_b32 s0, s0 ; GFX8-NEXT: s_lshl_b32 s2, s2, s1 -; GFX8-NEXT: v_mov_b32_e32 v2, 8 +; GFX8-NEXT: v_mov_b32_e32 v3, 8 +; GFX8-NEXT: v_mov_b32_e32 v4, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v6, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v5 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v5 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_and_b32_e32 v0, s0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, s2, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v2, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -804,32 +803,28 @@ define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX7-NEXT: s_not_b32 s1, s1 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 ; GFX7-NEXT: v_and_b32_e32 v0, s1, v0 ; GFX7-NEXT: v_or_b32_e32 v0, s2, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -837,30 +832,30 @@ define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_and_b32 s1, s3, 3 -; GFX10-NEXT: s_lshl_b32 s1, s1, 3 +; GFX10-NEXT: s_movk_i32 s1, 0xff +; GFX10-NEXT: s_and_b32 s2, s2, s1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: s_and_b32 s2, s2, s0 -; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v1 -; GFX10-NEXT: s_lshl_b32 s3, s0, s1 -; GFX10-NEXT: s_lshl_b32 s1, s2, s1 +; GFX10-NEXT: s_mov_b32 s0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX10-NEXT: s_and_b32 s0, s3, 3 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: s_lshl_b32 s0, s0, 3 +; GFX10-NEXT: v_or3_b32 v0, v0, v3, v1 +; GFX10-NEXT: s_lshl_b32 s3, s1, s0 +; GFX10-NEXT: s_lshl_b32 s0, s2, s0 ; GFX10-NEXT: s_not_b32 s2, s3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_or3_b32 v0, v0, v3, v2 -; GFX10-NEXT: v_and_or_b32 v0, v0, s2, s1 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX10-NEXT: v_and_or_b32 v0, v0, s2, s0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v1 -; GFX10-NEXT: v_or3_b32 v2, v0, v2, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v2, v0, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -874,36 +869,34 @@ define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8 define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) { ; GFX9-LABEL: insertelement_s_v4i8_v_s: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s6, 0xff -; GFX9-NEXT: v_and_b32_e32 v0, s6, v0 -; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_and_b32_e32 v0, s5, v0 +; GFX9-NEXT: s_mov_b32 s1, 8 +; GFX9-NEXT: s_mov_b32 s2, 16 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s2, s1, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s6 -; GFX9-NEXT: s_lshr_b32 s3, s1, 16 -; GFX9-NEXT: s_lshr_b32 s5, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s6 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s3, s6 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshl_b32 s2, s5, 24 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s4, 3 -; GFX9-NEXT: s_lshl_b32 s2, s2, 3 -; GFX9-NEXT: s_lshl_b32 s3, s6, s2 -; GFX9-NEXT: s_andn2_b32 s1, s1, s3 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, s2, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v1, v0, s6, v1 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX9-NEXT: v_or3_b32 v2, v1, v0, v2 +; GFX9-NEXT: s_bfe_u32 s7, s0, 0x80008 +; GFX9-NEXT: s_lshr_b32 s3, s0, 24 +; GFX9-NEXT: s_and_b32 s6, s0, s5 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_or_b32 s6, s6, s7 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s6, s0 +; GFX9-NEXT: s_lshl_b32 s3, s3, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s3 +; GFX9-NEXT: s_and_b32 s3, s4, 3 +; GFX9-NEXT: s_lshl_b32 s3, s3, 3 +; GFX9-NEXT: s_lshl_b32 s4, s5, s3 +; GFX9-NEXT: s_andn2_b32 s0, s0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, s3, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v2, v0, s5, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX9-NEXT: v_or3_b32 v2, v2, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -911,37 +904,34 @@ define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %pt ; ; GFX8-LABEL: insertelement_s_v4i8_v_s: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s5, 0xff -; GFX8-NEXT: v_mov_b32_e32 v3, 8 +; GFX8-NEXT: s_load_dword s1, s[2:3], 0x0 +; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: v_mov_b32_e32 v2, 8 +; GFX8-NEXT: v_mov_b32_e32 v3, 16 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 8 -; GFX8-NEXT: s_and_b32 s1, s1, s5 -; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: s_lshr_b32 s3, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s5 -; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s2, s5 +; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX8-NEXT: s_lshr_b32 s2, s1, 24 +; GFX8-NEXT: s_and_b32 s3, s1, s0 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x80010 +; GFX8-NEXT: s_lshl_b32 s5, s5, 8 +; GFX8-NEXT: s_or_b32 s3, s3, s5 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s3, 24 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s4, 3 -; GFX8-NEXT: s_lshl_b32 s1, s1, 3 -; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: s_lshl_b32 s1, s5, s1 +; GFX8-NEXT: s_or_b32 s1, s3, s1 +; GFX8-NEXT: s_lshl_b32 s2, s2, 24 +; GFX8-NEXT: s_or_b32 s1, s1, s2 +; GFX8-NEXT: s_and_b32 s2, s4, 3 +; GFX8-NEXT: s_lshl_b32 s2, s2, 3 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: s_andn2_b32 s0, s0, s1 +; GFX8-NEXT: s_andn2_b32 s0, s1, s0 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 @@ -954,17 +944,15 @@ define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %pt ; GFX7-NEXT: s_movk_i32 s5, 0xff ; GFX7-NEXT: v_and_b32_e32 v0, s5, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s1, s0, 8 -; GFX7-NEXT: s_and_b32 s1, s1, s5 -; GFX7-NEXT: s_lshr_b32 s2, s0, 16 -; GFX7-NEXT: s_lshr_b32 s3, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s5 -; GFX7-NEXT: s_lshl_b32 s1, s1, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s2, s5 -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s3, 24 +; GFX7-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GFX7-NEXT: s_lshr_b32 s1, s0, 24 +; GFX7-NEXT: s_and_b32 s2, s0, s5 +; GFX7-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX7-NEXT: s_lshl_b32 s3, s3, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s3 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s2, s0 +; GFX7-NEXT: s_lshl_b32 s1, s1, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s1 ; GFX7-NEXT: s_and_b32 s1, s4, 3 ; GFX7-NEXT: s_lshl_b32 s1, s1, 3 @@ -972,17 +960,15 @@ define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %pt ; GFX7-NEXT: s_lshl_b32 s1, s5, s1 ; GFX7-NEXT: s_andn2_b32 s0, s0, s1 ; GFX7-NEXT: v_or_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s5, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s5, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s5, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s5, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -998,29 +984,27 @@ define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %pt ; GFX10-NEXT: v_and_b32_e32 v0, s2, v0 ; GFX10-NEXT: s_lshl_b32 s1, s1, 3 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s3, s0, 8 -; GFX10-NEXT: s_lshr_b32 s4, s0, 16 -; GFX10-NEXT: s_and_b32 s3, s3, s2 -; GFX10-NEXT: s_and_b32 s4, s4, s2 -; GFX10-NEXT: s_lshr_b32 s5, s0, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s3, s3, 8 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s3 -; GFX10-NEXT: s_lshl_b32 s3, s5, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s4 +; GFX10-NEXT: s_bfe_u32 s5, s0, 0x80008 +; GFX10-NEXT: s_lshr_b32 s3, s0, 24 +; GFX10-NEXT: s_and_b32 s4, s0, s2 +; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX10-NEXT: s_lshl_b32 s5, s5, 8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s4, s4, s5 +; GFX10-NEXT: s_lshl_b32 s3, s3, 24 +; GFX10-NEXT: s_or_b32 s0, s4, s0 ; GFX10-NEXT: s_lshl_b32 s4, s2, s1 ; GFX10-NEXT: s_or_b32 s0, s0, s3 ; GFX10-NEXT: s_andn2_b32 s0, s0, s4 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, s1, s0 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: s_mov_b32 s0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s2, v1 -; GFX10-NEXT: v_or3_b32 v2, v0, v3, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX10-NEXT: v_or3_b32 v2, v0, v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1034,36 +1018,34 @@ define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %pt define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v4i8_s_v: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s6, 0xff +; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX9-NEXT: s_movk_i32 s5, 0xff ; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: s_mov_b32 s1, 8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s2, s1, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s6 -; GFX9-NEXT: s_lshr_b32 s3, s1, 16 -; GFX9-NEXT: s_lshr_b32 s5, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s6 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s3, s6 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshl_b32 s2, s5, 24 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s4, s6 -; GFX9-NEXT: v_lshlrev_b32_e64 v1, v0, s2 -; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s6 +; GFX9-NEXT: s_bfe_u32 s7, s0, 0x80008 +; GFX9-NEXT: s_lshr_b32 s3, s0, 24 +; GFX9-NEXT: s_and_b32 s6, s0, s5 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_or_b32 s6, s6, s7 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s6, s0 +; GFX9-NEXT: s_lshl_b32 s3, s3, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s3 +; GFX9-NEXT: s_and_b32 s3, s4, s5 +; GFX9-NEXT: v_lshlrev_b32_e64 v1, v0, s3 +; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s5 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX9-NEXT: v_and_or_b32 v0, s1, v0, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v1, v0, s6, v1 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX9-NEXT: v_or3_b32 v2, v1, v0, v2 +; GFX9-NEXT: v_and_or_b32 v0, s0, v0, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: s_mov_b32 s2, 16 +; GFX9-NEXT: v_and_or_b32 v2, v0, s5, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX9-NEXT: v_or3_b32 v2, v2, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -1071,38 +1053,35 @@ define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %pt ; ; GFX8-LABEL: insertelement_s_v4i8_s_v: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s5, 0xff +; GFX8-NEXT: s_load_dword s1, s[2:3], 0x0 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX8-NEXT: v_mov_b32_e32 v3, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, 8 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 8 -; GFX8-NEXT: s_and_b32 s1, s1, s5 -; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: s_lshr_b32 s3, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s5 -; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s2, s5 +; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80008 +; GFX8-NEXT: s_lshr_b32 s2, s1, 24 +; GFX8-NEXT: s_and_b32 s3, s1, s0 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x80010 +; GFX8-NEXT: s_lshl_b32 s5, s5, 8 +; GFX8-NEXT: s_or_b32 s3, s3, s5 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s3, 24 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s4, s5 -; GFX8-NEXT: v_lshlrev_b32_e64 v1, v0, s1 -; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s5 +; GFX8-NEXT: s_or_b32 s1, s3, s1 +; GFX8-NEXT: s_lshl_b32 s2, s2, 24 +; GFX8-NEXT: s_or_b32 s1, s1, s2 +; GFX8-NEXT: s_and_b32 s2, s4, s0 +; GFX8-NEXT: v_lshlrev_b32_e64 v1, v0, s2 +; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_and_b32_e32 v0, s1, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v3, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 @@ -1116,17 +1095,15 @@ define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %pt ; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s1, s0, 8 -; GFX7-NEXT: s_and_b32 s1, s1, s5 -; GFX7-NEXT: s_lshr_b32 s2, s0, 16 -; GFX7-NEXT: s_lshr_b32 s3, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s5 -; GFX7-NEXT: s_lshl_b32 s1, s1, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s2, s5 -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s3, 24 +; GFX7-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GFX7-NEXT: s_lshr_b32 s1, s0, 24 +; GFX7-NEXT: s_and_b32 s2, s0, s5 +; GFX7-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX7-NEXT: s_lshl_b32 s3, s3, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s3 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s2, s0 +; GFX7-NEXT: s_lshl_b32 s1, s1, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s1 ; GFX7-NEXT: s_and_b32 s1, s4, s5 ; GFX7-NEXT: v_lshl_b32_e32 v1, s1, v0 @@ -1134,17 +1111,15 @@ define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %pt ; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 ; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s5, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s5, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s5, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s5, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -1163,27 +1138,25 @@ define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %pt ; GFX10-NEXT: v_lshlrev_b32_e64 v0, v0, s2 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s3, s3, s1 -; GFX10-NEXT: s_lshr_b32 s4, s0, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s1 -; GFX10-NEXT: s_lshl_b32 s2, s2, 8 -; GFX10-NEXT: s_lshl_b32 s3, s3, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s2, s4, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s3 +; GFX10-NEXT: s_bfe_u32 s4, s0, 0x80008 +; GFX10-NEXT: s_lshr_b32 s2, s0, 24 +; GFX10-NEXT: s_and_b32 s3, s0, s1 +; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX10-NEXT: s_lshl_b32 s4, s4, 8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s3, s3, s4 +; GFX10-NEXT: s_lshl_b32 s2, s2, 24 +; GFX10-NEXT: s_or_b32 s0, s3, s0 ; GFX10-NEXT: s_or_b32 s0, s0, s2 ; GFX10-NEXT: v_and_or_b32 v0, s0, v1, v0 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: s_mov_b32 s0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v1 -; GFX10-NEXT: v_or3_b32 v2, v0, v3, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX10-NEXT: v_or3_b32 v2, v0, v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1197,35 +1170,33 @@ define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %pt define amdgpu_ps void @insertelement_s_v4i8_v_v(<4 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) { ; GFX9-LABEL: insertelement_s_v4i8_v_v: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX9-NEXT: s_movk_i32 s4, 0xff ; GFX9-NEXT: v_and_b32_e32 v1, 3, v1 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s2, s1, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s5 -; GFX9-NEXT: s_lshr_b32 s3, s1, 16 -; GFX9-NEXT: s_lshr_b32 s4, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s5 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s3, s5 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s5 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshl_b32 s2, s4, 24 -; GFX9-NEXT: s_or_b32 s1, s1, s2 +; GFX9-NEXT: s_bfe_u32 s6, s0, 0x80008 +; GFX9-NEXT: s_lshr_b32 s3, s0, 24 +; GFX9-NEXT: s_and_b32 s5, s0, s4 +; GFX9-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_or_b32 s5, s5, s6 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s4 +; GFX9-NEXT: s_or_b32 s0, s5, s0 +; GFX9-NEXT: s_lshl_b32 s3, s3, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s3 ; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX9-NEXT: v_and_or_b32 v0, s1, v1, v0 -; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX9-NEXT: v_or3_b32 v2, v1, v0, v2 +; GFX9-NEXT: v_and_or_b32 v0, s0, v1, v0 +; GFX9-NEXT: s_mov_b32 s1, 8 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: s_mov_b32 s2, 16 +; GFX9-NEXT: v_and_or_b32 v2, v0, s4, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX9-NEXT: v_or3_b32 v2, v2, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -1233,37 +1204,34 @@ define amdgpu_ps void @insertelement_s_v4i8_v_v(<4 x i8> addrspace(4)* inreg %pt ; ; GFX8-LABEL: insertelement_s_v4i8_v_v: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_load_dword s1, s[2:3], 0x0 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_and_b32_e32 v1, 3, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 8 -; GFX8-NEXT: s_and_b32 s1, s1, s4 -; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: s_lshr_b32 s3, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s4 -; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s2, s4 +; GFX8-NEXT: s_bfe_u32 s4, s1, 0x80008 +; GFX8-NEXT: s_lshr_b32 s2, s1, 24 +; GFX8-NEXT: s_and_b32 s3, s1, s0 +; GFX8-NEXT: s_bfe_u32 s1, s1, 0x80010 +; GFX8-NEXT: s_lshl_b32 s4, s4, 8 +; GFX8-NEXT: s_or_b32 s3, s3, s4 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s4 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s3, 24 -; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 +; GFX8-NEXT: s_or_b32 s1, s3, s1 +; GFX8-NEXT: s_lshl_b32 s2, s2, 24 +; GFX8-NEXT: s_or_b32 s1, s1, s2 ; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX8-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX8-NEXT: v_mov_b32_e32 v3, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v3, s4 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v1, s1, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX8-NEXT: v_mov_b32_e32 v2, 8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v3, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 @@ -1278,34 +1246,30 @@ define amdgpu_ps void @insertelement_s_v4i8_v_v(<4 x i8> addrspace(4)* inreg %pt ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s1, s0, 8 -; GFX7-NEXT: s_and_b32 s1, s1, s4 -; GFX7-NEXT: s_lshr_b32 s2, s0, 16 -; GFX7-NEXT: s_lshr_b32 s3, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s4 -; GFX7-NEXT: s_lshl_b32 s1, s1, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s2, s4 -; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s3, 24 +; GFX7-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GFX7-NEXT: s_lshr_b32 s1, s0, 24 +; GFX7-NEXT: s_and_b32 s2, s0, s4 +; GFX7-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX7-NEXT: s_lshl_b32 s3, s3, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s3 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s2, s0 +; GFX7-NEXT: s_lshl_b32 s1, s1, 24 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshl_b32_e32 v1, s4, v1 ; GFX7-NEXT: s_or_b32 s0, s0, s1 ; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s4, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -1323,27 +1287,25 @@ define amdgpu_ps void @insertelement_s_v4i8_v_v(<4 x i8> addrspace(4)* inreg %pt ; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 16 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_and_b32 s3, s3, s1 -; GFX10-NEXT: s_lshr_b32 s4, s0, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s1 -; GFX10-NEXT: s_lshl_b32 s2, s2, 8 -; GFX10-NEXT: s_lshl_b32 s3, s3, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s2, s4, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s3 +; GFX10-NEXT: s_bfe_u32 s4, s0, 0x80008 +; GFX10-NEXT: s_lshr_b32 s2, s0, 24 +; GFX10-NEXT: s_and_b32 s3, s0, s1 +; GFX10-NEXT: s_bfe_u32 s0, s0, 0x80010 +; GFX10-NEXT: s_lshl_b32 s4, s4, 8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s3, s3, s4 +; GFX10-NEXT: s_lshl_b32 s2, s2, 24 +; GFX10-NEXT: s_or_b32 s0, s3, s0 ; GFX10-NEXT: s_or_b32 s0, s0, s2 ; GFX10-NEXT: v_and_or_b32 v0, s0, v1, v0 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX10-NEXT: v_and_b32_sdwa v3, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: s_mov_b32 s0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v1 -; GFX10-NEXT: v_or3_b32 v2, v0, v3, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX10-NEXT: v_or3_b32 v2, v0, v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1358,31 +1320,31 @@ define amdgpu_ps void @insertelement_v_v4i8_s_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX9-LABEL: insertelement_v_v4i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: s_movk_i32 s1, 0xff ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s1 +; GFX9-NEXT: s_movk_i32 s3, 0xff +; GFX9-NEXT: s_mov_b32 s1, 16 +; GFX9-NEXT: s_and_b32 s2, s2, s3 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX9-NEXT: v_lshlrev_b32_e64 v3, v2, s2 -; GFX9-NEXT: v_lshlrev_b32_e64 v2, v2, s1 +; GFX9-NEXT: v_lshlrev_b32_e64 v4, v2, s2 +; GFX9-NEXT: v_lshlrev_b32_e64 v2, v2, s3 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v6, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v6 ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v4 -; GFX9-NEXT: v_or3_b32 v0, v0, v6, v5 -; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v4, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v3 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v1 -; GFX9-NEXT: v_or3_b32 v2, v0, v4, v2 +; GFX9-NEXT: v_or3_b32 v0, v0, v7, v5 +; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_or3_b32 v2, v0, v3, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -1391,35 +1353,34 @@ define amdgpu_ps void @insertelement_v_v4i8_s_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX8-LABEL: insertelement_v_v4i8_s_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v3, 16 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 -; GFX8-NEXT: v_mov_b32_e32 v4, s0 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_and_b32 s1, s2, s0 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX8-NEXT: v_lshlrev_b32_e64 v5, v2, s1 +; GFX8-NEXT: v_lshlrev_b32_e64 v6, v2, s1 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, 8 +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_mov_b32_e32 v5, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v7 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v4, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_e32 v2, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -1440,32 +1401,28 @@ define amdgpu_ps void @insertelement_v_v4i8_s_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v3, s0, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v4, s0, v4 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -1478,25 +1435,25 @@ define amdgpu_ps void @insertelement_v_v4i8_s_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX10-NEXT: v_lshlrev_b32_e64 v4, v1, s1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX10-NEXT: v_and_b32_sdwa v5, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX10-NEXT: s_mov_b32 s0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX10-NEXT: s_and_b32 s0, s2, s1 +; GFX10-NEXT: v_xor_b32_e32 v3, -1, v4 ; GFX10-NEXT: v_lshlrev_b32_e64 v1, v1, s0 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 -; GFX10-NEXT: v_xor_b32_e32 v2, -1, v4 -; GFX10-NEXT: v_or3_b32 v0, v0, v5, v3 -; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v1 -; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX10-NEXT: v_or3_b32 v0, v0, v5, v2 +; GFX10-NEXT: v_mov_b32_e32 v2, 8 +; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v1 -; GFX10-NEXT: v_or3_b32 v2, v0, v2, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v2, v0, v1, v2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1513,28 +1470,28 @@ define amdgpu_ps void @insertelement_v_v4i8_v_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: s_and_b32 s2, s2, 3 ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_movk_i32 s1, 0xff +; GFX9-NEXT: s_mov_b32 s1, 16 ; GFX9-NEXT: s_lshl_b32 s2, s2, 3 +; GFX9-NEXT: s_movk_i32 s3, 0xff ; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: s_lshl_b32 s2, s1, s2 +; GFX9-NEXT: s_lshl_b32 s2, s3, s2 ; GFX9-NEXT: s_not_b32 s2, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v5, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v5 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v3 -; GFX9-NEXT: v_or3_b32 v0, v0, v5, v4 +; GFX9-NEXT: v_or3_b32 v0, v0, v6, v4 ; GFX9-NEXT: v_and_or_b32 v0, v0, s2, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v4, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v3 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v1 -; GFX9-NEXT: v_or3_b32 v2, v0, v4, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_or3_b32 v2, v0, v3, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -1544,34 +1501,33 @@ define amdgpu_ps void @insertelement_v_v4i8_v_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: s_and_b32 s1, s2, 3 +; GFX8-NEXT: v_mov_b32_e32 v1, 8 ; GFX8-NEXT: s_lshl_b32 s1, s1, 3 -; GFX8-NEXT: v_mov_b32_e32 v5, s1 +; GFX8-NEXT: v_mov_b32_e32 v3, 16 +; GFX8-NEXT: v_mov_b32_e32 v6, s1 ; GFX8-NEXT: s_movk_i32 s0, 0xff -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v4, s0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1 ; GFX8-NEXT: s_not_b32 s0, s0 -; GFX8-NEXT: v_mov_b32_e32 v3, 8 +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_mov_b32_e32 v5, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v7, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v6 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v6 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_and_b32_e32 v0, s0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v4, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_e32 v2, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -1584,40 +1540,36 @@ define amdgpu_ps void @insertelement_v_v4i8_v_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: s_movk_i32 s0, 0xff -; GFX7-NEXT: v_and_b32_e32 v1, s0, v2 ; GFX7-NEXT: s_and_b32 s1, s2, 3 +; GFX7-NEXT: v_and_b32_e32 v1, s0, v2 ; GFX7-NEXT: s_lshl_b32 s1, s1, 3 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, s1, v1 ; GFX7-NEXT: s_lshl_b32 s1, s0, s1 ; GFX7-NEXT: s_not_b32 s1, s1 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v3, s0, v3 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_bfe_u32 v4, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 ; GFX7-NEXT: v_and_b32_e32 v0, s1, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -1625,29 +1577,29 @@ define amdgpu_ps void @insertelement_v_v4i8_v_s(<4 x i8> addrspace(1)* %ptr, i8 ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_and_b32 s1, s2, 3 -; GFX10-NEXT: s_lshl_b32 s1, s1, 3 +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_and_b32 s2, s2, 3 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX10-NEXT: v_and_b32_sdwa v4, v0, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: s_lshl_b32 s1, s2, 3 ; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: s_lshl_b32 s1, s0, s1 ; GFX10-NEXT: s_not_b32 s1, s1 -; GFX10-NEXT: v_or3_b32 v0, v0, v4, v3 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v1 +; GFX10-NEXT: v_or3_b32 v0, v0, v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v1 -; GFX10-NEXT: v_or3_b32 v2, v0, v2, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX10-NEXT: v_or3_b32 v2, v0, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1664,29 +1616,29 @@ define amdgpu_ps void @insertelement_v_v4i8_v_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_and_b32_e32 v3, 3, v3 ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_movk_i32 s1, 0xff +; GFX9-NEXT: s_mov_b32 s1, 16 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX9-NEXT: s_movk_i32 s2, 0xff ; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_e64 v3, v3, s1 +; GFX9-NEXT: v_lshlrev_b32_e64 v3, v3, s2 ; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3 ; GFX9-NEXT: v_mov_b32_e32 v4, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-NEXT: v_mov_b32_e32 v5, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, s0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v7, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s2, v7 ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v5 -; GFX9-NEXT: v_or3_b32 v0, v0, v7, v6 +; GFX9-NEXT: v_or3_b32 v0, v0, v8, v6 ; GFX9-NEXT: v_and_or_b32 v0, v0, v3, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v5, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v2 -; GFX9-NEXT: v_or3_b32 v2, v0, v5, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v1, v3 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX9-NEXT: v_or3_b32 v2, v0, v4, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_store_dword v[0:1], v2, off @@ -1695,35 +1647,33 @@ define amdgpu_ps void @insertelement_v_v4i8_v_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX8-LABEL: insertelement_v_v4i8_v_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dword v0, v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_mov_b32_e32 v6, s0 ; GFX8-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX8-NEXT: v_mov_b32_e32 v5, 16 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e64 v3, v3, s0 -; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3 -; GFX8-NEXT: v_mov_b32_e32 v5, 8 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xff +; GFX8-NEXT: v_lshlrev_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX8-NEXT: v_mov_b32_e32 v6, 8 +; GFX8-NEXT: v_mov_b32_e32 v7, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v6, v0, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v8 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX8-NEXT: v_and_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_or_b32_e32 v2, v0, v3 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_e32 v2, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -1737,39 +1687,35 @@ define amdgpu_ps void @insertelement_v_v4i8_v_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_movk_i32 s2, 0xff ; GFX7-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX7-NEXT: v_mov_b32_e32 v1, 0xff ; GFX7-NEXT: v_and_b32_e32 v2, s2, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 3, v3 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, v3, v2 -; GFX7-NEXT: v_lshl_b32_e32 v3, s2, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, v3, v1 ; GFX7-NEXT: v_xor_b32_e32 v3, -1, v3 -; GFX7-NEXT: v_mov_b32_e32 v1, 0xff ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v4, s2, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v5, s2, v5 -; GFX7-NEXT: v_and_b32_e32 v0, s2, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_bfe_u32 v6, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v5, s2, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v5, v5, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_or_b32_e32 v0, v5, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 ; GFX7-NEXT: v_and_b32_e32 v0, v0, v3 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v2, v2, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v1, v3, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v1, v0, v1 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -1785,23 +1731,22 @@ define amdgpu_ps void @insertelement_v_v4i8_v_v(<4 x i8> addrspace(1)* %ptr, i8 ; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_xor_b32_e32 v2, -1, v5 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX10-NEXT: v_and_b32_sdwa v6, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: s_mov_b32 s0, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v3 -; GFX10-NEXT: v_mov_b32_e32 v3, 0xff -; GFX10-NEXT: v_or3_b32 v0, v0, v6, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 +; GFX10-NEXT: v_or3_b32 v0, v0, v6, v3 +; GFX10-NEXT: v_mov_b32_e32 v3, 8 ; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v1 -; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v2, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v1 -; GFX10-NEXT: v_or3_b32 v2, v0, v2, v4 +; GFX10-NEXT: v_mov_b32_e32 v1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_or3_b32 v2, v0, v1, v2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1816,71 +1761,65 @@ define amdgpu_ps void @insertelement_s_v8i8_s_s(<8 x i8> addrspace(4)* inreg %pt ; GFX9-LABEL: insertelement_s_v8i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s10, 0xff +; GFX9-NEXT: s_mov_b32 s8, 0x80008 +; GFX9-NEXT: s_movk_i32 s6, 0xff ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s10 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s6, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s3, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_lshl_b32 s2, s6, 24 -; GFX9-NEXT: s_lshr_b32 s7, s1, 8 +; GFX9-NEXT: s_bfe_u32 s9, s0, s8 +; GFX9-NEXT: s_and_b32 s7, s0, s6 +; GFX9-NEXT: s_lshl_b32 s9, s9, 8 +; GFX9-NEXT: s_or_b32 s7, s7, s9 +; GFX9-NEXT: s_mov_b32 s9, 0x80010 +; GFX9-NEXT: s_lshr_b32 s2, s0, 24 +; GFX9-NEXT: s_bfe_u32 s0, s0, s9 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s7, s0 +; GFX9-NEXT: s_bfe_u32 s7, s1, s8 +; GFX9-NEXT: s_lshl_b32 s2, s2, 24 ; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s7, s10 -; GFX9-NEXT: s_lshr_b32 s8, s1, 16 -; GFX9-NEXT: s_lshr_b32 s9, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s8, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshl_b32 s2, s9, 24 +; GFX9-NEXT: s_lshr_b32 s3, s1, 24 +; GFX9-NEXT: s_and_b32 s2, s1, s6 +; GFX9-NEXT: s_bfe_u32 s1, s1, s9 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_or_b32 s2, s2, s7 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s1, s2, s1 +; GFX9-NEXT: s_lshl_b32 s2, s3, 24 ; GFX9-NEXT: s_or_b32 s1, s1, s2 ; GFX9-NEXT: s_lshr_b32 s2, s5, 2 ; GFX9-NEXT: s_cmp_eq_u32 s2, 1 ; GFX9-NEXT: s_cselect_b32 s3, s1, s0 ; GFX9-NEXT: s_and_b32 s5, s5, 3 ; GFX9-NEXT: s_lshl_b32 s5, s5, 3 -; GFX9-NEXT: s_and_b32 s4, s4, s10 +; GFX9-NEXT: s_and_b32 s4, s4, s6 ; GFX9-NEXT: s_lshl_b32 s4, s4, s5 -; GFX9-NEXT: s_lshl_b32 s5, s10, s5 +; GFX9-NEXT: s_lshl_b32 s5, s6, s5 ; GFX9-NEXT: s_andn2_b32 s3, s3, s5 ; GFX9-NEXT: s_or_b32 s3, s3, s4 ; GFX9-NEXT: s_cmp_eq_u32 s2, 0 ; GFX9-NEXT: s_cselect_b32 s0, s3, s0 ; GFX9-NEXT: s_cmp_eq_u32 s2, 1 ; GFX9-NEXT: s_cselect_b32 s1, s3, s1 -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s10 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s4, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s3, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_lshl_b32 s2, s4, 24 -; GFX9-NEXT: s_lshr_b32 s5, s1, 8 +; GFX9-NEXT: s_bfe_u32 s5, s0, s8 +; GFX9-NEXT: s_lshr_b32 s2, s0, 24 +; GFX9-NEXT: s_and_b32 s4, s0, s6 +; GFX9-NEXT: s_bfe_u32 s0, s0, s9 +; GFX9-NEXT: s_lshl_b32 s5, s5, 8 +; GFX9-NEXT: s_or_b32 s4, s4, s5 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s4, s0 +; GFX9-NEXT: s_bfe_u32 s4, s1, s8 +; GFX9-NEXT: s_lshl_b32 s2, s2, 24 ; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s5, s10 -; GFX9-NEXT: s_lshr_b32 s6, s1, 16 -; GFX9-NEXT: s_lshr_b32 s7, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s6, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshl_b32 s2, s7, 24 +; GFX9-NEXT: s_lshr_b32 s3, s1, 24 +; GFX9-NEXT: s_and_b32 s2, s1, s6 +; GFX9-NEXT: s_bfe_u32 s1, s1, s9 +; GFX9-NEXT: s_lshl_b32 s4, s4, 8 +; GFX9-NEXT: s_or_b32 s2, s2, s4 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s1, s2, s1 +; GFX9-NEXT: s_lshl_b32 s2, s3, 24 ; GFX9-NEXT: s_or_b32 s1, s1, s2 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 @@ -1890,71 +1829,65 @@ define amdgpu_ps void @insertelement_s_v8i8_s_s(<8 x i8> addrspace(4)* inreg %pt ; GFX8-LABEL: insertelement_s_v8i8_s_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s10, 0xff +; GFX8-NEXT: s_mov_b32 s8, 0x80008 +; GFX8-NEXT: s_movk_i32 s6, 0xff ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s2, s0, 8 -; GFX8-NEXT: s_and_b32 s2, s2, s10 -; GFX8-NEXT: s_lshr_b32 s3, s0, 16 -; GFX8-NEXT: s_lshr_b32 s6, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s10 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s3, s10 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s2, s6, 24 -; GFX8-NEXT: s_lshr_b32 s7, s1, 8 +; GFX8-NEXT: s_bfe_u32 s9, s0, s8 +; GFX8-NEXT: s_and_b32 s7, s0, s6 +; GFX8-NEXT: s_lshl_b32 s9, s9, 8 +; GFX8-NEXT: s_or_b32 s7, s7, s9 +; GFX8-NEXT: s_mov_b32 s9, 0x80010 +; GFX8-NEXT: s_lshr_b32 s2, s0, 24 +; GFX8-NEXT: s_bfe_u32 s0, s0, s9 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s7, s0 +; GFX8-NEXT: s_bfe_u32 s7, s1, s8 +; GFX8-NEXT: s_lshl_b32 s2, s2, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s7, s10 -; GFX8-NEXT: s_lshr_b32 s8, s1, 16 -; GFX8-NEXT: s_lshr_b32 s9, s1, 24 -; GFX8-NEXT: s_and_b32 s1, s1, s10 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_and_b32 s2, s8, s10 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_lshl_b32 s2, s9, 24 +; GFX8-NEXT: s_lshr_b32 s3, s1, 24 +; GFX8-NEXT: s_and_b32 s2, s1, s6 +; GFX8-NEXT: s_bfe_u32 s1, s1, s9 +; GFX8-NEXT: s_lshl_b32 s7, s7, 8 +; GFX8-NEXT: s_or_b32 s2, s2, s7 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s1, s2, s1 +; GFX8-NEXT: s_lshl_b32 s2, s3, 24 ; GFX8-NEXT: s_or_b32 s1, s1, s2 ; GFX8-NEXT: s_lshr_b32 s2, s5, 2 ; GFX8-NEXT: s_cmp_eq_u32 s2, 1 ; GFX8-NEXT: s_cselect_b32 s3, s1, s0 ; GFX8-NEXT: s_and_b32 s5, s5, 3 ; GFX8-NEXT: s_lshl_b32 s5, s5, 3 -; GFX8-NEXT: s_and_b32 s4, s4, s10 +; GFX8-NEXT: s_and_b32 s4, s4, s6 ; GFX8-NEXT: s_lshl_b32 s4, s4, s5 -; GFX8-NEXT: s_lshl_b32 s5, s10, s5 +; GFX8-NEXT: s_lshl_b32 s5, s6, s5 ; GFX8-NEXT: s_andn2_b32 s3, s3, s5 ; GFX8-NEXT: s_or_b32 s3, s3, s4 ; GFX8-NEXT: s_cmp_eq_u32 s2, 0 ; GFX8-NEXT: s_cselect_b32 s0, s3, s0 ; GFX8-NEXT: s_cmp_eq_u32 s2, 1 ; GFX8-NEXT: s_cselect_b32 s1, s3, s1 -; GFX8-NEXT: s_lshr_b32 s2, s0, 8 -; GFX8-NEXT: s_and_b32 s2, s2, s10 -; GFX8-NEXT: s_lshr_b32 s3, s0, 16 -; GFX8-NEXT: s_lshr_b32 s4, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s10 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s3, s10 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s2, s4, 24 -; GFX8-NEXT: s_lshr_b32 s5, s1, 8 +; GFX8-NEXT: s_bfe_u32 s5, s0, s8 +; GFX8-NEXT: s_lshr_b32 s2, s0, 24 +; GFX8-NEXT: s_and_b32 s4, s0, s6 +; GFX8-NEXT: s_bfe_u32 s0, s0, s9 +; GFX8-NEXT: s_lshl_b32 s5, s5, 8 +; GFX8-NEXT: s_or_b32 s4, s4, s5 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s4, s0 +; GFX8-NEXT: s_bfe_u32 s4, s1, s8 +; GFX8-NEXT: s_lshl_b32 s2, s2, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s5, s10 -; GFX8-NEXT: s_lshr_b32 s6, s1, 16 -; GFX8-NEXT: s_lshr_b32 s7, s1, 24 -; GFX8-NEXT: s_and_b32 s1, s1, s10 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_and_b32 s2, s6, s10 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_lshl_b32 s2, s7, 24 +; GFX8-NEXT: s_lshr_b32 s3, s1, 24 +; GFX8-NEXT: s_and_b32 s2, s1, s6 +; GFX8-NEXT: s_bfe_u32 s1, s1, s9 +; GFX8-NEXT: s_lshl_b32 s4, s4, 8 +; GFX8-NEXT: s_or_b32 s2, s2, s4 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s1, s2, s1 +; GFX8-NEXT: s_lshl_b32 s2, s3, 24 ; GFX8-NEXT: s_or_b32 s1, s1, s2 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 @@ -1964,69 +1897,63 @@ define amdgpu_ps void @insertelement_s_v8i8_s_s(<8 x i8> addrspace(4)* inreg %pt ; GFX7-LABEL: insertelement_s_v8i8_s_s: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s10, 0xff +; GFX7-NEXT: s_mov_b32 s8, 0x80008 +; GFX7-NEXT: s_movk_i32 s6, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 8 -; GFX7-NEXT: s_and_b32 s2, s2, s10 -; GFX7-NEXT: s_lshr_b32 s3, s0, 16 -; GFX7-NEXT: s_lshr_b32 s6, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s10 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s2, s3, s10 -; GFX7-NEXT: s_lshl_b32 s2, s2, 16 +; GFX7-NEXT: s_bfe_u32 s9, s0, s8 +; GFX7-NEXT: s_and_b32 s7, s0, s6 +; GFX7-NEXT: s_lshl_b32 s9, s9, 8 +; GFX7-NEXT: s_or_b32 s7, s7, s9 +; GFX7-NEXT: s_mov_b32 s9, 0x80010 +; GFX7-NEXT: s_lshr_b32 s2, s0, 24 +; GFX7-NEXT: s_bfe_u32 s0, s0, s9 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s7, s0 +; GFX7-NEXT: s_bfe_u32 s7, s1, s8 +; GFX7-NEXT: s_lshl_b32 s2, s2, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_lshl_b32 s2, s6, 24 -; GFX7-NEXT: s_lshr_b32 s7, s1, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s2, s7, s10 -; GFX7-NEXT: s_lshr_b32 s8, s1, 16 -; GFX7-NEXT: s_lshr_b32 s9, s1, 24 -; GFX7-NEXT: s_and_b32 s1, s1, s10 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_and_b32 s2, s8, s10 -; GFX7-NEXT: s_lshl_b32 s2, s2, 16 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_lshl_b32 s2, s9, 24 +; GFX7-NEXT: s_lshr_b32 s3, s1, 24 +; GFX7-NEXT: s_and_b32 s2, s1, s6 +; GFX7-NEXT: s_bfe_u32 s1, s1, s9 +; GFX7-NEXT: s_lshl_b32 s7, s7, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s7 +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s1, s2, s1 +; GFX7-NEXT: s_lshl_b32 s2, s3, 24 ; GFX7-NEXT: s_or_b32 s1, s1, s2 ; GFX7-NEXT: s_lshr_b32 s2, s5, 2 ; GFX7-NEXT: s_cmp_eq_u32 s2, 1 ; GFX7-NEXT: s_cselect_b32 s3, s1, s0 ; GFX7-NEXT: s_and_b32 s5, s5, 3 ; GFX7-NEXT: s_lshl_b32 s5, s5, 3 -; GFX7-NEXT: s_and_b32 s4, s4, s10 +; GFX7-NEXT: s_and_b32 s4, s4, s6 ; GFX7-NEXT: s_lshl_b32 s4, s4, s5 -; GFX7-NEXT: s_lshl_b32 s5, s10, s5 +; GFX7-NEXT: s_lshl_b32 s5, s6, s5 ; GFX7-NEXT: s_andn2_b32 s3, s3, s5 ; GFX7-NEXT: s_or_b32 s3, s3, s4 ; GFX7-NEXT: s_cmp_eq_u32 s2, 0 ; GFX7-NEXT: s_cselect_b32 s4, s3, s0 ; GFX7-NEXT: s_cmp_eq_u32 s2, 1 ; GFX7-NEXT: s_cselect_b32 s3, s3, s1 -; GFX7-NEXT: s_lshr_b32 s2, s4, 8 -; GFX7-NEXT: s_and_b32 s2, s2, s10 -; GFX7-NEXT: s_lshr_b32 s5, s4, 16 -; GFX7-NEXT: s_lshr_b32 s6, s4, 24 -; GFX7-NEXT: s_and_b32 s4, s4, s10 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 -; GFX7-NEXT: s_or_b32 s2, s4, s2 -; GFX7-NEXT: s_and_b32 s4, s5, s10 -; GFX7-NEXT: s_lshl_b32 s4, s4, 16 -; GFX7-NEXT: s_or_b32 s2, s2, s4 -; GFX7-NEXT: s_lshl_b32 s4, s6, 24 -; GFX7-NEXT: s_lshr_b32 s7, s3, 8 -; GFX7-NEXT: s_or_b32 s2, s2, s4 -; GFX7-NEXT: s_and_b32 s4, s7, s10 -; GFX7-NEXT: s_lshr_b32 s8, s3, 16 -; GFX7-NEXT: s_lshr_b32 s9, s3, 24 -; GFX7-NEXT: s_and_b32 s3, s3, s10 -; GFX7-NEXT: s_lshl_b32 s4, s4, 8 -; GFX7-NEXT: s_or_b32 s3, s3, s4 -; GFX7-NEXT: s_and_b32 s4, s8, s10 +; GFX7-NEXT: s_bfe_u32 s10, s4, s8 +; GFX7-NEXT: s_lshr_b32 s2, s4, 24 +; GFX7-NEXT: s_and_b32 s7, s4, s6 +; GFX7-NEXT: s_bfe_u32 s4, s4, s9 +; GFX7-NEXT: s_lshl_b32 s10, s10, 8 +; GFX7-NEXT: s_or_b32 s7, s7, s10 ; GFX7-NEXT: s_lshl_b32 s4, s4, 16 -; GFX7-NEXT: s_or_b32 s3, s3, s4 -; GFX7-NEXT: s_lshl_b32 s4, s9, 24 +; GFX7-NEXT: s_or_b32 s4, s7, s4 +; GFX7-NEXT: s_lshl_b32 s2, s2, 24 +; GFX7-NEXT: s_or_b32 s2, s4, s2 +; GFX7-NEXT: s_and_b32 s4, s3, s6 +; GFX7-NEXT: s_bfe_u32 s6, s3, s8 +; GFX7-NEXT: s_lshr_b32 s5, s3, 24 +; GFX7-NEXT: s_bfe_u32 s3, s3, s9 +; GFX7-NEXT: s_lshl_b32 s6, s6, 8 +; GFX7-NEXT: s_or_b32 s4, s4, s6 +; GFX7-NEXT: s_lshl_b32 s3, s3, 16 +; GFX7-NEXT: s_or_b32 s3, s4, s3 +; GFX7-NEXT: s_lshl_b32 s4, s5, 24 ; GFX7-NEXT: s_or_b32 s3, s3, s4 ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -2039,71 +1966,65 @@ define amdgpu_ps void @insertelement_s_v8i8_s_s(<8 x i8> addrspace(4)* inreg %pt ; GFX10-LABEL: insertelement_s_v8i8_s_s: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 ; GFX10-NEXT: s_movk_i32 s2, 0xff -; GFX10-NEXT: s_lshr_b32 s3, s5, 2 +; GFX10-NEXT: s_mov_b32 s6, 0x80010 +; GFX10-NEXT: s_lshr_b32 s7, s5, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: s_lshr_b32 s9, s1, 8 -; GFX10-NEXT: s_lshr_b32 s7, s0, 16 -; GFX10-NEXT: s_lshr_b32 s10, s1, 16 -; GFX10-NEXT: s_and_b32 s6, s6, s2 -; GFX10-NEXT: s_and_b32 s9, s9, s2 +; GFX10-NEXT: s_bfe_u32 s11, s0, s3 +; GFX10-NEXT: s_bfe_u32 s13, s1, s3 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 -; GFX10-NEXT: s_lshr_b32 s11, s1, 24 -; GFX10-NEXT: s_and_b32 s7, s7, s2 -; GFX10-NEXT: s_and_b32 s10, s10, s2 -; GFX10-NEXT: s_and_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_and_b32 s1, s1, s2 -; GFX10-NEXT: s_lshl_b32 s9, s9, 8 -; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_lshl_b32 s10, s10, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s9 +; GFX10-NEXT: s_lshr_b32 s9, s1, 24 +; GFX10-NEXT: s_and_b32 s10, s0, s2 +; GFX10-NEXT: s_bfe_u32 s0, s0, s6 +; GFX10-NEXT: s_and_b32 s12, s1, s2 +; GFX10-NEXT: s_bfe_u32 s1, s1, s6 +; GFX10-NEXT: s_lshl_b32 s11, s11, 8 +; GFX10-NEXT: s_lshl_b32 s13, s13, 8 +; GFX10-NEXT: s_or_b32 s10, s10, s11 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s11, s12, s13 ; GFX10-NEXT: s_lshl_b32 s8, s8, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s7 -; GFX10-NEXT: s_lshl_b32 s11, s11, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s10 +; GFX10-NEXT: s_or_b32 s0, s10, s0 +; GFX10-NEXT: s_lshl_b32 s9, s9, 24 +; GFX10-NEXT: s_or_b32 s1, s11, s1 ; GFX10-NEXT: s_or_b32 s0, s0, s8 -; GFX10-NEXT: s_or_b32 s1, s1, s11 -; GFX10-NEXT: s_cmp_eq_u32 s3, 1 -; GFX10-NEXT: s_cselect_b32 s6, s1, s0 +; GFX10-NEXT: s_or_b32 s1, s1, s9 +; GFX10-NEXT: s_cmp_eq_u32 s7, 1 +; GFX10-NEXT: s_cselect_b32 s8, s1, s0 ; GFX10-NEXT: s_and_b32 s5, s5, 3 ; GFX10-NEXT: s_and_b32 s4, s4, s2 ; GFX10-NEXT: s_lshl_b32 s5, s5, 3 -; GFX10-NEXT: s_lshl_b32 s7, s2, s5 +; GFX10-NEXT: s_lshl_b32 s9, s2, s5 ; GFX10-NEXT: s_lshl_b32 s4, s4, s5 -; GFX10-NEXT: s_andn2_b32 s5, s6, s7 +; GFX10-NEXT: s_andn2_b32 s5, s8, s9 ; GFX10-NEXT: s_or_b32 s4, s5, s4 -; GFX10-NEXT: s_cmp_eq_u32 s3, 0 +; GFX10-NEXT: s_cmp_eq_u32 s7, 0 ; GFX10-NEXT: s_cselect_b32 s0, s4, s0 -; GFX10-NEXT: s_cmp_eq_u32 s3, 1 +; GFX10-NEXT: s_cmp_eq_u32 s7, 1 ; GFX10-NEXT: s_cselect_b32 s1, s4, s1 -; GFX10-NEXT: s_lshr_b32 s3, s0, 8 -; GFX10-NEXT: s_lshr_b32 s4, s0, 16 -; GFX10-NEXT: s_and_b32 s3, s3, s2 -; GFX10-NEXT: s_and_b32 s4, s4, s2 -; GFX10-NEXT: s_lshr_b32 s5, s0, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s7, s0, s3 +; GFX10-NEXT: s_bfe_u32 s3, s1, s3 +; GFX10-NEXT: s_and_b32 s5, s0, s2 +; GFX10-NEXT: s_lshr_b32 s4, s0, 24 +; GFX10-NEXT: s_bfe_u32 s0, s0, s6 +; GFX10-NEXT: s_lshl_b32 s7, s7, 8 +; GFX10-NEXT: s_lshr_b32 s8, s1, 24 +; GFX10-NEXT: s_and_b32 s2, s1, s2 +; GFX10-NEXT: s_bfe_u32 s1, s1, s6 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 -; GFX10-NEXT: s_lshr_b32 s6, s1, 8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s5, s5, s7 +; GFX10-NEXT: s_or_b32 s2, s2, s3 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s0, s5, s0 +; GFX10-NEXT: s_lshl_b32 s3, s4, 24 +; GFX10-NEXT: s_or_b32 s1, s2, s1 +; GFX10-NEXT: s_lshl_b32 s2, s8, 24 ; GFX10-NEXT: s_or_b32 s0, s0, s3 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: s_lshr_b32 s7, s1, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s4 -; GFX10-NEXT: s_and_b32 s4, s6, s2 -; GFX10-NEXT: s_lshr_b32 s3, s1, 24 -; GFX10-NEXT: s_and_b32 s1, s1, s2 -; GFX10-NEXT: s_and_b32 s2, s7, s2 -; GFX10-NEXT: s_lshl_b32 s4, s4, 8 -; GFX10-NEXT: s_lshl_b32 s2, s2, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s4 -; GFX10-NEXT: s_lshl_b32 s4, s5, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s2 -; GFX10-NEXT: s_lshl_b32 s2, s3, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s4 ; GFX10-NEXT: s_or_b32 s1, s1, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 @@ -2120,52 +2041,50 @@ define amdgpu_ps void @insertelement_v_v8i8_s_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_movk_i32 s4, 0xff -; GFX9-NEXT: s_lshr_b32 s1, s3, 2 +; GFX9-NEXT: s_mov_b32 s1, 16 +; GFX9-NEXT: s_lshr_b32 s5, s3, 2 ; GFX9-NEXT: s_and_b32 s3, s3, 3 +; GFX9-NEXT: s_movk_i32 s4, 0xff ; GFX9-NEXT: s_and_b32 s2, s2, s4 ; GFX9-NEXT: s_lshl_b32 s3, s3, 3 ; GFX9-NEXT: s_lshl_b32 s2, s2, s3 ; GFX9-NEXT: s_lshl_b32 s3, s4, s3 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s5, 1 ; GFX9-NEXT: s_not_b32 s3, s3 -; GFX9-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NEXT: v_mov_b32_e32 v4, s2 ; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v8, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v9, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v9, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v10, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v7 ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v4 -; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX9-NEXT: v_and_or_b32 v1, v1, s4, v6 +; GFX9-NEXT: v_and_or_b32 v1, v1, s4, v9 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 ; GFX9-NEXT: v_or3_b32 v0, v0, v8, v5 -; GFX9-NEXT: v_or3_b32 v1, v1, v9, v7 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc -; GFX9-NEXT: v_and_or_b32 v3, v4, s3, v3 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX9-NEXT: v_or3_b32 v1, v1, v10, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc +; GFX9-NEXT: v_and_or_b32 v4, v5, s3, v4 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s5, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v7, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v8, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v1, v1, s4, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v5 +; GFX9-NEXT: v_or3_b32 v1, v1, v3, v2 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 ; GFX9-NEXT: v_or3_b32 v0, v0, v7, v4 -; GFX9-NEXT: v_or3_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm @@ -2173,58 +2092,55 @@ define amdgpu_ps void @insertelement_v_v8i8_s_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX8-LABEL: insertelement_v_v8i8_s_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v2, 8 -; GFX8-NEXT: v_mov_b32_e32 v3, 8 -; GFX8-NEXT: v_mov_b32_e32 v4, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, 16 ; GFX8-NEXT: s_lshr_b32 s1, s3, 2 ; GFX8-NEXT: s_and_b32 s3, s3, 3 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_lshl_b32 s3, s3, 3 ; GFX8-NEXT: s_and_b32 s2, s2, s0 ; GFX8-NEXT: s_lshl_b32 s0, s0, s3 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 ; GFX8-NEXT: s_not_b32 s0, s0 ; GFX8-NEXT: s_lshl_b32 s2, s2, s3 +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_mov_b32_e32 v5, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v6 -; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v6 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 -; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v8 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v7 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc ; GFX8-NEXT: v_and_b32_e32 v2, s0, v2 ; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX8-NEXT: v_mov_b32_e32 v2, 0 -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v6 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm @@ -2245,60 +2161,52 @@ define amdgpu_ps void @insertelement_v_v8i8_s_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX7-NEXT: s_not_b32 s1, s1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_and_b32_e32 v5, s6, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v3 -; GFX7-NEXT: v_and_b32_e32 v6, s6, v6 -; GFX7-NEXT: v_and_b32_e32 v0, s6, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v1 +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s6, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v6, s6, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v5, v6, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc ; GFX7-NEXT: v_and_b32_e32 v2, s1, v2 ; GFX7-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_and_b32_e32 v5, s6, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v3 -; GFX7-NEXT: v_and_b32_e32 v6, s6, v6 -; GFX7-NEXT: v_and_b32_e32 v0, s6, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s6, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v6, s6, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v5, v6, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX7-NEXT: s_endpgm @@ -2307,52 +2215,50 @@ define amdgpu_ps void @insertelement_v_v8i8_s_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_movk_i32 s1, 0xff -; GFX10-NEXT: s_and_b32 s2, s2, s1 +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_movk_i32 s4, 0xff +; GFX10-NEXT: s_and_b32 s2, s2, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v6, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v7, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, s4, v5 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 ; GFX10-NEXT: s_lshr_b32 s0, s3, 2 -; GFX10-NEXT: s_and_b32 s3, s3, 3 +; GFX10-NEXT: s_and_b32 s1, s3, 3 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s0, 1 -; GFX10-NEXT: v_or3_b32 v0, v0, v6, v4 -; GFX10-NEXT: v_or3_b32 v1, v1, v7, v5 -; GFX10-NEXT: s_lshl_b32 s3, s3, 3 +; GFX10-NEXT: v_or3_b32 v0, v0, v6, v2 +; GFX10-NEXT: v_or3_b32 v1, v1, v7, v3 +; GFX10-NEXT: s_lshl_b32 s1, s1, 3 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s0, 0 -; GFX10-NEXT: s_lshl_b32 s4, s1, s3 -; GFX10-NEXT: s_lshl_b32 s2, s2, s3 +; GFX10-NEXT: s_lshl_b32 s3, s4, s1 +; GFX10-NEXT: s_lshl_b32 s1, s2, s1 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc_lo -; GFX10-NEXT: s_not_b32 s3, s4 -; GFX10-NEXT: v_and_or_b32 v2, v2, s3, s2 +; GFX10-NEXT: s_not_b32 s2, s3 +; GFX10-NEXT: v_mov_b32_e32 v3, 8 +; GFX10-NEXT: v_and_or_b32 v2, v2, s2, s1 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v2, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v2, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v7, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v4, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v5 +; GFX10-NEXT: v_and_or_b32 v1, v1, s4, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_or3_b32 v0, v0, v7, v4 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_or3_b32 v0, v0, v7, v5 -; GFX10-NEXT: v_or3_b32 v1, v1, v4, v6 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %vec = load <8 x i8>, <8 x i8> addrspace(1 )* %ptr @@ -2365,62 +2271,59 @@ define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %pt ; GFX9-LABEL: insertelement_s_v8i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s10, 0xff -; GFX9-NEXT: v_and_b32_e32 v0, s10, v0 -; GFX9-NEXT: s_mov_b32 s5, 8 +; GFX9-NEXT: s_mov_b32 s9, 0x80008 +; GFX9-NEXT: s_movk_i32 s7, 0xff +; GFX9-NEXT: v_and_b32_e32 v0, s7, v0 +; GFX9-NEXT: s_mov_b32 s2, 8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s10 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s6, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s3, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_lshl_b32 s2, s6, 24 -; GFX9-NEXT: s_lshr_b32 s7, s1, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s7, s10 -; GFX9-NEXT: s_lshr_b32 s8, s1, 16 -; GFX9-NEXT: s_lshr_b32 s9, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s8, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshl_b32 s2, s9, 24 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshr_b32 s2, s4, 2 -; GFX9-NEXT: s_cmp_eq_u32 s2, 1 -; GFX9-NEXT: s_cselect_b32 s3, s1, s0 +; GFX9-NEXT: s_bfe_u32 s10, s0, s9 +; GFX9-NEXT: s_and_b32 s8, s0, s7 +; GFX9-NEXT: s_lshl_b32 s10, s10, 8 +; GFX9-NEXT: s_or_b32 s8, s8, s10 +; GFX9-NEXT: s_mov_b32 s10, 0x80010 +; GFX9-NEXT: s_lshr_b32 s5, s0, 24 +; GFX9-NEXT: s_bfe_u32 s0, s0, s10 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s8, s0 +; GFX9-NEXT: s_bfe_u32 s8, s1, s9 +; GFX9-NEXT: s_lshl_b32 s5, s5, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s5 +; GFX9-NEXT: s_lshr_b32 s6, s1, 24 +; GFX9-NEXT: s_and_b32 s5, s1, s7 +; GFX9-NEXT: s_bfe_u32 s1, s1, s10 +; GFX9-NEXT: s_lshl_b32 s8, s8, 8 +; GFX9-NEXT: s_or_b32 s5, s5, s8 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s1, s5, s1 +; GFX9-NEXT: s_lshl_b32 s5, s6, 24 +; GFX9-NEXT: s_or_b32 s1, s1, s5 +; GFX9-NEXT: s_lshr_b32 s5, s4, 2 +; GFX9-NEXT: s_cmp_eq_u32 s5, 1 +; GFX9-NEXT: s_cselect_b32 s6, s1, s0 ; GFX9-NEXT: s_and_b32 s4, s4, 3 ; GFX9-NEXT: s_lshl_b32 s4, s4, 3 -; GFX9-NEXT: s_lshl_b32 s6, s10, s4 -; GFX9-NEXT: s_andn2_b32 s3, s3, s6 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: s_lshl_b32 s8, s7, s4 +; GFX9-NEXT: s_andn2_b32 s6, s6, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s6 ; GFX9-NEXT: v_lshl_or_b32 v2, v0, s4, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s5, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s5, 1 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v2, v0, s10, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: s_mov_b32 s3, 16 +; GFX9-NEXT: v_and_or_b32 v4, v0, s7, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_or3_b32 v0, v4, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v2, v1, s7, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX9-NEXT: v_and_or_b32 v2, v1, s10, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v5 ; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 @@ -2430,33 +2333,31 @@ define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %pt ; GFX8-LABEL: insertelement_s_v8i8_v_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s9, 0xff -; GFX8-NEXT: v_mov_b32_e32 v6, 8 -; GFX8-NEXT: v_mov_b32_e32 v7, s9 +; GFX8-NEXT: s_mov_b32 s7, 0x80008 +; GFX8-NEXT: s_movk_i32 s5, 0xff +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_mov_b32_e32 v6, 16 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s2, s0, 8 -; GFX8-NEXT: s_and_b32 s2, s2, s9 -; GFX8-NEXT: s_lshr_b32 s3, s0, 16 -; GFX8-NEXT: s_lshr_b32 s5, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s9 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s3, s9 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s2, s5, 24 -; GFX8-NEXT: s_lshr_b32 s6, s1, 8 +; GFX8-NEXT: s_bfe_u32 s8, s0, s7 +; GFX8-NEXT: s_and_b32 s6, s0, s5 +; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: s_or_b32 s6, s6, s8 +; GFX8-NEXT: s_mov_b32 s8, 0x80010 +; GFX8-NEXT: s_lshr_b32 s2, s0, 24 +; GFX8-NEXT: s_bfe_u32 s0, s0, s8 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s6, s0 +; GFX8-NEXT: s_bfe_u32 s6, s1, s7 +; GFX8-NEXT: s_lshl_b32 s2, s2, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s6, s9 -; GFX8-NEXT: s_lshr_b32 s7, s1, 16 -; GFX8-NEXT: s_lshr_b32 s8, s1, 24 -; GFX8-NEXT: s_and_b32 s1, s1, s9 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_and_b32 s2, s7, s9 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_lshl_b32 s2, s8, 24 +; GFX8-NEXT: s_lshr_b32 s3, s1, 24 +; GFX8-NEXT: s_and_b32 s2, s1, s5 +; GFX8-NEXT: s_bfe_u32 s1, s1, s8 +; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: s_or_b32 s2, s2, s6 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s1, s2, s1 +; GFX8-NEXT: s_lshl_b32 s2, s3, 24 ; GFX8-NEXT: s_or_b32 s1, s1, s2 ; GFX8-NEXT: s_lshr_b32 s2, s4, 2 ; GFX8-NEXT: s_cmp_eq_u32 s2, 1 @@ -2464,31 +2365,29 @@ define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %pt ; GFX8-NEXT: s_and_b32 s4, s4, 3 ; GFX8-NEXT: s_lshl_b32 s4, s4, 3 ; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: s_lshl_b32 s4, s9, s4 +; GFX8-NEXT: s_lshl_b32 s4, s5, s4 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: s_andn2_b32 s3, s3, s4 ; GFX8-NEXT: v_or_b32_e32 v2, s3, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v5, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v0, v5, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 ; GFX8-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 @@ -2498,32 +2397,30 @@ define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %pt ; GFX7-LABEL: insertelement_s_v8i8_v_s: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s9, 0xff -; GFX7-NEXT: v_and_b32_e32 v0, s9, v0 +; GFX7-NEXT: s_mov_b32 s7, 0x80008 +; GFX7-NEXT: s_movk_i32 s5, 0xff +; GFX7-NEXT: v_and_b32_e32 v0, s5, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 8 -; GFX7-NEXT: s_and_b32 s2, s2, s9 -; GFX7-NEXT: s_lshr_b32 s3, s0, 16 -; GFX7-NEXT: s_lshr_b32 s5, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s9 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 +; GFX7-NEXT: s_bfe_u32 s8, s0, s7 +; GFX7-NEXT: s_and_b32 s6, s0, s5 +; GFX7-NEXT: s_lshl_b32 s8, s8, 8 +; GFX7-NEXT: s_or_b32 s6, s6, s8 +; GFX7-NEXT: s_mov_b32 s8, 0x80010 +; GFX7-NEXT: s_lshr_b32 s2, s0, 24 +; GFX7-NEXT: s_bfe_u32 s0, s0, s8 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s6, s0 +; GFX7-NEXT: s_bfe_u32 s6, s1, s7 +; GFX7-NEXT: s_lshl_b32 s2, s2, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s2, s3, s9 -; GFX7-NEXT: s_lshl_b32 s2, s2, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_lshl_b32 s2, s5, 24 -; GFX7-NEXT: s_lshr_b32 s6, s1, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s2, s6, s9 -; GFX7-NEXT: s_lshr_b32 s7, s1, 16 -; GFX7-NEXT: s_lshr_b32 s8, s1, 24 -; GFX7-NEXT: s_and_b32 s1, s1, s9 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_and_b32 s2, s7, s9 -; GFX7-NEXT: s_lshl_b32 s2, s2, 16 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_lshl_b32 s2, s8, 24 +; GFX7-NEXT: s_lshr_b32 s3, s1, 24 +; GFX7-NEXT: s_and_b32 s2, s1, s5 +; GFX7-NEXT: s_bfe_u32 s1, s1, s8 +; GFX7-NEXT: s_lshl_b32 s6, s6, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s6 +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s1, s2, s1 +; GFX7-NEXT: s_lshl_b32 s2, s3, 24 ; GFX7-NEXT: s_or_b32 s1, s1, s2 ; GFX7-NEXT: s_lshr_b32 s2, s4, 2 ; GFX7-NEXT: s_cmp_eq_u32 s2, 1 @@ -2531,38 +2428,34 @@ define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %pt ; GFX7-NEXT: s_and_b32 s4, s4, 3 ; GFX7-NEXT: s_lshl_b32 s4, s4, 3 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, s4, v0 -; GFX7-NEXT: s_lshl_b32 s4, s9, s4 +; GFX7-NEXT: s_lshl_b32 s4, s5, s4 ; GFX7-NEXT: s_andn2_b32 s3, s3, s4 ; GFX7-NEXT: v_or_b32_e32 v2, s3, v0 ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 ; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s9, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s9, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s9, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v4, s5, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_bfe_u32 v4, v1, 8, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s9, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s9, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s9, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v7 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s5, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -2573,64 +2466,61 @@ define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %pt ; GFX10-LABEL: insertelement_s_v8i8_v_s: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 ; GFX10-NEXT: s_movk_i32 s2, 0xff -; GFX10-NEXT: s_lshr_b32 s3, s4, 2 +; GFX10-NEXT: s_mov_b32 s5, 0x80010 +; GFX10-NEXT: s_lshr_b32 s6, s4, 2 ; GFX10-NEXT: v_and_b32_e32 v2, s2, v0 -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 0 +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s6, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s5, s0, 8 -; GFX10-NEXT: s_lshr_b32 s8, s1, 8 -; GFX10-NEXT: s_lshr_b32 s6, s0, 16 -; GFX10-NEXT: s_lshr_b32 s9, s1, 16 -; GFX10-NEXT: s_and_b32 s5, s5, s2 -; GFX10-NEXT: s_and_b32 s8, s8, s2 +; GFX10-NEXT: s_bfe_u32 s10, s0, s3 +; GFX10-NEXT: s_bfe_u32 s3, s1, s3 ; GFX10-NEXT: s_lshr_b32 s7, s0, 24 -; GFX10-NEXT: s_lshr_b32 s10, s1, 24 -; GFX10-NEXT: s_and_b32 s6, s6, s2 -; GFX10-NEXT: s_and_b32 s9, s9, s2 -; GFX10-NEXT: s_and_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: s_and_b32 s1, s1, s2 -; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshl_b32 s6, s6, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s5 -; GFX10-NEXT: s_lshl_b32 s9, s9, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s8 +; GFX10-NEXT: s_lshr_b32 s8, s1, 24 +; GFX10-NEXT: s_and_b32 s9, s0, s2 +; GFX10-NEXT: s_bfe_u32 s0, s0, s5 +; GFX10-NEXT: s_and_b32 s11, s1, s2 +; GFX10-NEXT: s_bfe_u32 s1, s1, s5 +; GFX10-NEXT: s_lshl_b32 s5, s10, 8 +; GFX10-NEXT: s_lshl_b32 s3, s3, 8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s5, s9, s5 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s3, s11, s3 ; GFX10-NEXT: s_lshl_b32 s7, s7, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_lshl_b32 s10, s10, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s9 +; GFX10-NEXT: s_or_b32 s0, s5, s0 +; GFX10-NEXT: s_lshl_b32 s8, s8, 24 +; GFX10-NEXT: s_or_b32 s1, s3, s1 ; GFX10-NEXT: s_or_b32 s0, s0, s7 -; GFX10-NEXT: s_or_b32 s1, s1, s10 -; GFX10-NEXT: s_cmp_eq_u32 s3, 1 +; GFX10-NEXT: s_or_b32 s1, s1, s8 +; GFX10-NEXT: s_cmp_eq_u32 s6, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: s_cselect_b32 s5, s1, s0 +; GFX10-NEXT: s_cselect_b32 s3, s1, s0 ; GFX10-NEXT: s_and_b32 s4, s4, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: s_lshl_b32 s4, s4, 3 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_lshl_b32 s6, s2, s4 -; GFX10-NEXT: s_andn2_b32 s5, s5, s6 -; GFX10-NEXT: v_lshl_or_b32 v2, v2, s4, s5 +; GFX10-NEXT: s_lshl_b32 s5, s2, s4 +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_andn2_b32 s3, s3, s5 +; GFX10-NEXT: v_lshl_or_b32 v2, v2, s4, s3 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 1 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s6, 1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX10-NEXT: v_and_b32_sdwa v6, v0, s2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v7, v1, s2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, s2, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, s2, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s2, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 +; GFX10-NEXT: v_and_or_b32 v1, v1, s2, v5 +; GFX10-NEXT: v_or3_b32 v0, v0, v6, v2 +; GFX10-NEXT: v_or3_b32 v1, v1, v7, v3 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_or3_b32 v0, v0, v6, v4 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_or3_b32 v1, v1, v7, v5 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr @@ -2643,63 +2533,60 @@ define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %pt ; GFX9-LABEL: insertelement_s_v8i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s10, 0xff +; GFX9-NEXT: s_mov_b32 s9, 0x80008 +; GFX9-NEXT: s_movk_i32 s7, 0xff ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s10 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s6, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s3, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_lshl_b32 s2, s6, 24 -; GFX9-NEXT: s_lshr_b32 s7, s1, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s7, s10 -; GFX9-NEXT: s_lshr_b32 s8, s1, 16 -; GFX9-NEXT: s_lshr_b32 s9, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s8, s10 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshl_b32 s2, s9, 24 -; GFX9-NEXT: s_or_b32 s1, s1, s2 +; GFX9-NEXT: s_bfe_u32 s10, s0, s9 +; GFX9-NEXT: s_and_b32 s8, s0, s7 +; GFX9-NEXT: s_lshl_b32 s10, s10, 8 +; GFX9-NEXT: s_or_b32 s8, s8, s10 +; GFX9-NEXT: s_mov_b32 s10, 0x80010 +; GFX9-NEXT: s_lshr_b32 s5, s0, 24 +; GFX9-NEXT: s_bfe_u32 s0, s0, s10 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s8, s0 +; GFX9-NEXT: s_bfe_u32 s8, s1, s9 +; GFX9-NEXT: s_lshl_b32 s5, s5, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s5 +; GFX9-NEXT: s_lshr_b32 s6, s1, 24 +; GFX9-NEXT: s_and_b32 s5, s1, s7 +; GFX9-NEXT: s_bfe_u32 s1, s1, s10 +; GFX9-NEXT: s_lshl_b32 s8, s8, 8 +; GFX9-NEXT: s_or_b32 s5, s5, s8 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s1, s5, s1 +; GFX9-NEXT: s_lshl_b32 s5, s6, 24 +; GFX9-NEXT: s_or_b32 s1, s1, s5 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX9-NEXT: s_and_b32 s2, s4, s10 +; GFX9-NEXT: s_and_b32 s4, s4, s7 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_lshlrev_b32_e64 v3, v0, s2 -; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s10 +; GFX9-NEXT: v_lshlrev_b32_e64 v3, v0, s4 +; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s7 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 ; GFX9-NEXT: v_and_or_b32 v3, v1, v0, v3 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] -; GFX9-NEXT: s_mov_b32 s5, 8 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX9-NEXT: s_mov_b32 s2, 8 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: s_mov_b32 s3, 16 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v2, v0, s10, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v4, v0, s7, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_or3_b32 v0, v4, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v2, v1, s7, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX9-NEXT: v_and_or_b32 v2, v1, s10, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v5 ; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 @@ -2709,42 +2596,40 @@ define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %pt ; GFX8-LABEL: insertelement_s_v8i8_s_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s9, 0xff +; GFX8-NEXT: s_mov_b32 s7, 0x80008 +; GFX8-NEXT: s_movk_i32 s5, 0xff ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 2, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s2, s0, 8 -; GFX8-NEXT: s_and_b32 s2, s2, s9 -; GFX8-NEXT: s_lshr_b32 s3, s0, 16 -; GFX8-NEXT: s_lshr_b32 s5, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s9 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s3, s9 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 +; GFX8-NEXT: s_bfe_u32 s8, s0, s7 +; GFX8-NEXT: s_and_b32 s6, s0, s5 +; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: s_or_b32 s6, s6, s8 +; GFX8-NEXT: s_mov_b32 s8, 0x80010 +; GFX8-NEXT: s_lshr_b32 s2, s0, 24 +; GFX8-NEXT: s_bfe_u32 s0, s0, s8 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s6, s0 +; GFX8-NEXT: s_bfe_u32 s6, s1, s7 +; GFX8-NEXT: s_lshl_b32 s2, s2, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s2, s5, 24 -; GFX8-NEXT: s_lshr_b32 s6, s1, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s6, s9 -; GFX8-NEXT: s_lshr_b32 s7, s1, 16 -; GFX8-NEXT: s_lshr_b32 s8, s1, 24 -; GFX8-NEXT: s_and_b32 s1, s1, s9 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_and_b32 s2, s7, s9 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_lshl_b32 s2, s8, 24 +; GFX8-NEXT: s_lshr_b32 s3, s1, 24 +; GFX8-NEXT: s_and_b32 s2, s1, s5 +; GFX8-NEXT: s_bfe_u32 s1, s1, s8 +; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: s_or_b32 s2, s2, s6 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s1, s2, s1 +; GFX8-NEXT: s_lshl_b32 s2, s3, 24 ; GFX8-NEXT: s_or_b32 s1, s1, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s0 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX8-NEXT: s_and_b32 s2, s4, s9 +; GFX8-NEXT: s_and_b32 s2, s4, s5 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: v_lshlrev_b32_e64 v3, v0, s2 -; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s9 +; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s5 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, v1, v0 ; GFX8-NEXT: v_or_b32_e32 v3, v0, v3 @@ -2752,24 +2637,22 @@ define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %pt ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_mov_b32_e32 v6, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v7, s9 +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v6, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v5, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v5, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 ; GFX8-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 @@ -2779,42 +2662,40 @@ define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %pt ; GFX7-LABEL: insertelement_s_v8i8_s_v: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s9, 0xff +; GFX7-NEXT: s_mov_b32 s7, 0x80008 +; GFX7-NEXT: s_movk_i32 s5, 0xff ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 2, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 8 -; GFX7-NEXT: s_and_b32 s2, s2, s9 -; GFX7-NEXT: s_lshr_b32 s3, s0, 16 -; GFX7-NEXT: s_lshr_b32 s5, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s9 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s2, s3, s9 -; GFX7-NEXT: s_lshl_b32 s2, s2, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_lshl_b32 s2, s5, 24 -; GFX7-NEXT: s_lshr_b32 s6, s1, 8 +; GFX7-NEXT: s_bfe_u32 s8, s0, s7 +; GFX7-NEXT: s_and_b32 s6, s0, s5 +; GFX7-NEXT: s_lshl_b32 s8, s8, 8 +; GFX7-NEXT: s_or_b32 s6, s6, s8 +; GFX7-NEXT: s_mov_b32 s8, 0x80010 +; GFX7-NEXT: s_lshr_b32 s2, s0, 24 +; GFX7-NEXT: s_bfe_u32 s0, s0, s8 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s6, s0 +; GFX7-NEXT: s_bfe_u32 s6, s1, s7 +; GFX7-NEXT: s_lshl_b32 s2, s2, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s2, s6, s9 -; GFX7-NEXT: s_lshr_b32 s7, s1, 16 -; GFX7-NEXT: s_lshr_b32 s8, s1, 24 -; GFX7-NEXT: s_and_b32 s1, s1, s9 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_and_b32 s2, s7, s9 -; GFX7-NEXT: s_lshl_b32 s2, s2, 16 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_lshl_b32 s2, s8, 24 +; GFX7-NEXT: s_lshr_b32 s3, s1, 24 +; GFX7-NEXT: s_and_b32 s2, s1, s5 +; GFX7-NEXT: s_bfe_u32 s1, s1, s8 +; GFX7-NEXT: s_lshl_b32 s6, s6, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s6 +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s1, s2, s1 +; GFX7-NEXT: s_lshl_b32 s2, s3, 24 ; GFX7-NEXT: s_or_b32 s1, s1, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s0 ; GFX7-NEXT: v_mov_b32_e32 v3, s1 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX7-NEXT: s_and_b32 s2, s4, s9 +; GFX7-NEXT: s_and_b32 s2, s4, s5 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX7-NEXT: v_lshl_b32_e32 v3, s2, v0 -; GFX7-NEXT: v_lshl_b32_e32 v0, s9, v0 +; GFX7-NEXT: v_lshl_b32_e32 v0, s5, v0 ; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 ; GFX7-NEXT: v_and_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_or_b32_e32 v3, v0, v3 @@ -2822,30 +2703,26 @@ define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %pt ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v2 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s9, v2 +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v4, s5, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s9, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s9, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_bfe_u32 v4, v1, 8, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s9, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s9, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s9, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v7 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s5, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -2857,64 +2734,61 @@ define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %pt ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 3, v0 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 ; GFX10-NEXT: s_movk_i32 s2, 0xff +; GFX10-NEXT: s_mov_b32 s5, 0x80010 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 2, v0 -; GFX10-NEXT: s_and_b32 s3, s4, s2 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX10-NEXT: s_and_b32 s4, s4, s2 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 -; GFX10-NEXT: v_lshlrev_b32_e64 v3, v1, s3 +; GFX10-NEXT: v_lshlrev_b32_e64 v3, v1, s4 ; GFX10-NEXT: v_lshlrev_b32_e64 v0, v1, s2 ; GFX10-NEXT: v_xor_b32_e32 v4, -1, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s6, s1, 8 -; GFX10-NEXT: s_lshr_b32 s7, s1, 16 -; GFX10-NEXT: s_and_b32 s6, s6, s2 -; GFX10-NEXT: s_lshr_b32 s8, s1, 24 -; GFX10-NEXT: s_and_b32 s7, s7, s2 -; GFX10-NEXT: s_and_b32 s1, s1, s2 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 8 -; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s6 -; GFX10-NEXT: s_lshr_b32 s4, s0, 16 -; GFX10-NEXT: s_and_b32 s3, s3, s2 -; GFX10-NEXT: s_lshl_b32 s8, s8, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: s_lshr_b32 s5, s0, 24 -; GFX10-NEXT: s_and_b32 s4, s4, s2 -; GFX10-NEXT: s_and_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s8, s0, s3 +; GFX10-NEXT: s_bfe_u32 s3, s1, s3 +; GFX10-NEXT: s_lshr_b32 s6, s1, 24 +; GFX10-NEXT: s_and_b32 s9, s1, s2 +; GFX10-NEXT: s_bfe_u32 s1, s1, s5 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 -; GFX10-NEXT: s_or_b32 s1, s1, s8 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s3 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s3, s9, s3 +; GFX10-NEXT: s_lshl_b32 s6, s6, 24 +; GFX10-NEXT: s_or_b32 s1, s3, s1 +; GFX10-NEXT: s_lshr_b32 s4, s0, 24 +; GFX10-NEXT: s_and_b32 s7, s0, s2 +; GFX10-NEXT: s_bfe_u32 s0, s0, s5 +; GFX10-NEXT: s_lshl_b32 s5, s8, 8 +; GFX10-NEXT: s_or_b32 s1, s1, s6 +; GFX10-NEXT: s_lshl_b32 s3, s4, 24 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s4, s7, s5 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_lshl_b32 s5, s5, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s4 -; GFX10-NEXT: s_or_b32 s0, s0, s5 +; GFX10-NEXT: s_or_b32 s0, s4, s0 +; GFX10-NEXT: s_or_b32 s0, s0, s3 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s0, v1, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2 +; GFX10-NEXT: s_mov_b32 s1, 16 ; GFX10-NEXT: v_and_or_b32 v3, v5, v4, v3 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v3, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v6, v0, s2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v7, v1, s2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, s2, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, s2, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s2, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, s2, v5 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 +; GFX10-NEXT: v_or3_b32 v0, v0, v6, v2 +; GFX10-NEXT: v_or3_b32 v1, v1, v7, v3 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_or3_b32 v0, v0, v6, v4 -; GFX10-NEXT: v_or3_b32 v1, v1, v7, v5 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr @@ -2927,37 +2801,35 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX9-LABEL: insertelement_s_v8i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s9, 0xff +; GFX9-NEXT: s_mov_b32 s8, 0x80008 +; GFX9-NEXT: s_movk_i32 s6, 0xff ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 2, v1 ; GFX9-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_and_b32 s2, s2, s9 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s5, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s9 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s3, s9 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_lshl_b32 s2, s5, 24 -; GFX9-NEXT: s_lshr_b32 s6, s1, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s2 -; GFX9-NEXT: s_and_b32 s2, s6, s9 -; GFX9-NEXT: s_lshr_b32 s7, s1, 16 -; GFX9-NEXT: s_lshr_b32 s8, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s9 -; GFX9-NEXT: s_lshl_b32 s2, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_and_b32 s2, s7, s9 -; GFX9-NEXT: s_lshl_b32 s2, s2, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s2 -; GFX9-NEXT: s_lshl_b32 s2, s8, 24 -; GFX9-NEXT: s_or_b32 s1, s1, s2 +; GFX9-NEXT: s_bfe_u32 s9, s0, s8 +; GFX9-NEXT: s_and_b32 s7, s0, s6 +; GFX9-NEXT: s_lshl_b32 s9, s9, 8 +; GFX9-NEXT: s_or_b32 s7, s7, s9 +; GFX9-NEXT: s_mov_b32 s9, 0x80010 +; GFX9-NEXT: s_lshr_b32 s4, s0, 24 +; GFX9-NEXT: s_bfe_u32 s0, s0, s9 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s7, s0 +; GFX9-NEXT: s_bfe_u32 s7, s1, s8 +; GFX9-NEXT: s_lshl_b32 s4, s4, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s4 +; GFX9-NEXT: s_lshr_b32 s5, s1, 24 +; GFX9-NEXT: s_and_b32 s4, s1, s6 +; GFX9-NEXT: s_bfe_u32 s1, s1, s9 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_or_b32 s4, s4, s7 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s1, s4, s1 +; GFX9-NEXT: s_lshl_b32 s4, s5, 24 +; GFX9-NEXT: s_or_b32 s1, s1, s4 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s9 +; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s6 ; GFX9-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-NEXT: v_mov_b32_e32 v4, s1 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 @@ -2968,21 +2840,20 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v2 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] -; GFX9-NEXT: s_mov_b32 s4, 8 -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX9-NEXT: s_mov_b32 s2, 8 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: s_mov_b32 s3, 16 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v2, v0, s9, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v4, v0, s6, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_or3_b32 v0, v4, v0, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v2, v1, s6, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX9-NEXT: v_and_or_b32 v2, v1, s9, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v5 ; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 @@ -2992,37 +2863,35 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX8-LABEL: insertelement_s_v8i8_v_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s8, 0xff +; GFX8-NEXT: s_mov_b32 s6, 0x80008 +; GFX8-NEXT: s_movk_i32 s4, 0xff ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 2, v1 ; GFX8-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s2, s0, 8 -; GFX8-NEXT: s_and_b32 s2, s2, s8 -; GFX8-NEXT: s_lshr_b32 s3, s0, 16 -; GFX8-NEXT: s_lshr_b32 s4, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s8 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s3, s8 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 +; GFX8-NEXT: s_bfe_u32 s7, s0, s6 +; GFX8-NEXT: s_and_b32 s5, s0, s4 +; GFX8-NEXT: s_lshl_b32 s7, s7, 8 +; GFX8-NEXT: s_or_b32 s5, s5, s7 +; GFX8-NEXT: s_mov_b32 s7, 0x80010 +; GFX8-NEXT: s_lshr_b32 s2, s0, 24 +; GFX8-NEXT: s_bfe_u32 s0, s0, s7 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s5, s0 +; GFX8-NEXT: s_bfe_u32 s5, s1, s6 +; GFX8-NEXT: s_lshl_b32 s2, s2, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_lshl_b32 s2, s4, 24 -; GFX8-NEXT: s_lshr_b32 s5, s1, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s2 -; GFX8-NEXT: s_and_b32 s2, s5, s8 -; GFX8-NEXT: s_lshr_b32 s6, s1, 16 -; GFX8-NEXT: s_lshr_b32 s7, s1, 24 -; GFX8-NEXT: s_and_b32 s1, s1, s8 -; GFX8-NEXT: s_lshl_b32 s2, s2, 8 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_and_b32 s2, s6, s8 -; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_or_b32 s1, s1, s2 -; GFX8-NEXT: s_lshl_b32 s2, s7, 24 +; GFX8-NEXT: s_lshr_b32 s3, s1, 24 +; GFX8-NEXT: s_and_b32 s2, s1, s4 +; GFX8-NEXT: s_bfe_u32 s1, s1, s7 +; GFX8-NEXT: s_lshl_b32 s5, s5, 8 +; GFX8-NEXT: s_or_b32 s2, s2, s5 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s1, s2, s1 +; GFX8-NEXT: s_lshl_b32 s2, s3, 24 ; GFX8-NEXT: s_or_b32 s1, s1, s2 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s8 +; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s4 ; GFX8-NEXT: v_mov_b32_e32 v3, s0 ; GFX8-NEXT: v_mov_b32_e32 v4, s1 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 @@ -3034,24 +2903,22 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_mov_b32_e32 v6, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v7, s8 +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v6, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v5, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v5, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 ; GFX8-NEXT: v_or_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 @@ -3061,38 +2928,36 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX7-LABEL: insertelement_s_v8i8_v_v: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s8, 0xff +; GFX7-NEXT: s_mov_b32 s6, 0x80008 +; GFX7-NEXT: s_movk_i32 s4, 0xff ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 2, v1 ; GFX7-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 8 -; GFX7-NEXT: s_and_b32 s2, s2, s8 -; GFX7-NEXT: s_lshr_b32 s3, s0, 16 -; GFX7-NEXT: s_lshr_b32 s4, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s8 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s2, s3, s8 -; GFX7-NEXT: s_lshl_b32 s2, s2, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_lshl_b32 s2, s4, 24 -; GFX7-NEXT: s_lshr_b32 s5, s1, 8 +; GFX7-NEXT: s_bfe_u32 s7, s0, s6 +; GFX7-NEXT: s_and_b32 s5, s0, s4 +; GFX7-NEXT: s_lshl_b32 s7, s7, 8 +; GFX7-NEXT: s_or_b32 s5, s5, s7 +; GFX7-NEXT: s_mov_b32 s7, 0x80010 +; GFX7-NEXT: s_lshr_b32 s2, s0, 24 +; GFX7-NEXT: s_bfe_u32 s0, s0, s7 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s5, s0 +; GFX7-NEXT: s_bfe_u32 s5, s1, s6 +; GFX7-NEXT: s_lshl_b32 s2, s2, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s2 -; GFX7-NEXT: s_and_b32 s2, s5, s8 -; GFX7-NEXT: s_lshr_b32 s6, s1, 16 -; GFX7-NEXT: s_lshr_b32 s7, s1, 24 -; GFX7-NEXT: s_and_b32 s1, s1, s8 -; GFX7-NEXT: s_lshl_b32 s2, s2, 8 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_and_b32 s2, s6, s8 -; GFX7-NEXT: s_lshl_b32 s2, s2, 16 -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_lshl_b32 s2, s7, 24 +; GFX7-NEXT: s_lshr_b32 s3, s1, 24 +; GFX7-NEXT: s_and_b32 s2, s1, s4 +; GFX7-NEXT: s_bfe_u32 s1, s1, s7 +; GFX7-NEXT: s_lshl_b32 s5, s5, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s5 +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s1, s2, s1 +; GFX7-NEXT: s_lshl_b32 s2, s3, 24 ; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: v_and_b32_e32 v0, s8, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshl_b32_e32 v1, s8, v1 +; GFX7-NEXT: v_lshl_b32_e32 v1, s4, v1 ; GFX7-NEXT: v_mov_b32_e32 v3, s0 ; GFX7-NEXT: v_mov_b32_e32 v4, s1 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 @@ -3104,30 +2969,26 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v2 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s8, v2 +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v4, s4, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s8, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s8, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_bfe_u32 v4, v1, 8, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s8, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s8, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s8, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v7 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v3 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -3138,8 +2999,10 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX10-LABEL: insertelement_s_v8i8_v_v: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX10-NEXT: s_movk_i32 s2, 0xff +; GFX10-NEXT: s_mov_b32 s3, 0x80008 ; GFX10-NEXT: v_and_b32_e32 v2, 3, v1 +; GFX10-NEXT: s_movk_i32 s2, 0xff +; GFX10-NEXT: s_mov_b32 s4, 0x80010 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 2, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v2 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 @@ -3147,55 +3010,50 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt ; GFX10-NEXT: v_lshlrev_b32_e64 v0, v2, s2 ; GFX10-NEXT: v_xor_b32_e32 v2, -1, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s6, s1, 8 -; GFX10-NEXT: s_lshr_b32 s7, s1, 16 -; GFX10-NEXT: s_and_b32 s6, s6, s2 -; GFX10-NEXT: s_lshr_b32 s8, s1, 24 -; GFX10-NEXT: s_and_b32 s7, s7, s2 -; GFX10-NEXT: s_and_b32 s1, s1, s2 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_lshr_b32 s3, s0, 8 -; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s6 -; GFX10-NEXT: s_lshr_b32 s4, s0, 16 -; GFX10-NEXT: s_and_b32 s3, s3, s2 -; GFX10-NEXT: s_lshl_b32 s8, s8, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: s_lshr_b32 s5, s0, 24 -; GFX10-NEXT: s_and_b32 s4, s4, s2 -; GFX10-NEXT: s_and_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s8, s0, s3 +; GFX10-NEXT: s_bfe_u32 s3, s1, s3 +; GFX10-NEXT: s_lshr_b32 s6, s1, 24 +; GFX10-NEXT: s_and_b32 s9, s1, s2 +; GFX10-NEXT: s_bfe_u32 s1, s1, s4 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8 -; GFX10-NEXT: s_or_b32 s1, s1, s8 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s3 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s3, s9, s3 +; GFX10-NEXT: s_lshl_b32 s6, s6, 24 +; GFX10-NEXT: s_or_b32 s1, s3, s1 +; GFX10-NEXT: s_lshr_b32 s5, s0, 24 +; GFX10-NEXT: s_and_b32 s7, s0, s2 +; GFX10-NEXT: s_bfe_u32 s0, s0, s4 +; GFX10-NEXT: s_lshl_b32 s4, s8, 8 +; GFX10-NEXT: s_or_b32 s1, s1, s6 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s4, s7, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_lshl_b32 s5, s5, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s4 -; GFX10-NEXT: s_or_b32 s0, s0, s5 +; GFX10-NEXT: s_lshl_b32 s3, s5, 24 +; GFX10-NEXT: s_or_b32 s0, s4, s0 +; GFX10-NEXT: s_or_b32 s0, s0, s3 ; GFX10-NEXT: v_cndmask_b32_e32 v5, s0, v1, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v3 +; GFX10-NEXT: s_mov_b32 s1, 16 ; GFX10-NEXT: v_and_or_b32 v2, v5, v2, v4 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v2, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v6, v0, s2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v7, v1, s2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, s2, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, s2, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s2, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, s2, v5 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 +; GFX10-NEXT: v_or3_b32 v0, v0, v6, v2 +; GFX10-NEXT: v_or3_b32 v1, v1, v7, v3 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_or3_b32 v0, v0, v6, v4 -; GFX10-NEXT: v_or3_b32 v1, v1, v7, v5 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr @@ -3209,51 +3067,49 @@ define amdgpu_ps void @insertelement_v_v8i8_s_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_movk_i32 s3, 0xff -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 2, v2 +; GFX9-NEXT: s_mov_b32 s1, 16 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 2, v2 ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 -; GFX9-NEXT: s_and_b32 s1, s2, s3 +; GFX9-NEXT: s_movk_i32 s3, 0xff +; GFX9-NEXT: s_and_b32 s2, s2, s3 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX9-NEXT: v_lshlrev_b32_e64 v5, v2, s1 +; GFX9-NEXT: v_lshlrev_b32_e64 v6, v2, s2 ; GFX9-NEXT: v_lshlrev_b32_e64 v2, v2, s3 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX9-NEXT: v_mov_b32_e32 v3, 8 +; GFX9-NEXT: v_mov_b32_e32 v4, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v10, v0, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v11, v1, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v9, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v10, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v9 ; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v6 -; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v8 +; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v11 +; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 ; GFX9-NEXT: v_or3_b32 v0, v0, v10, v7 -; GFX9-NEXT: v_or3_b32 v1, v1, v11, v9 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc -; GFX9-NEXT: v_and_or_b32 v2, v6, v2, v5 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_or3_b32 v1, v1, v12, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v1, vcc +; GFX9-NEXT: v_and_or_b32 v2, v7, v2, v6 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v7, v0, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v8, v1, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v3 -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v5 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NEXT: v_or3_b32 v0, v0, v7, v2 +; GFX9-NEXT: v_or3_b32 v1, v1, v4, v3 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_or3_b32 v0, v0, v7, v4 -; GFX9-NEXT: v_or3_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm @@ -3261,58 +3117,55 @@ define amdgpu_ps void @insertelement_v_v8i8_s_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX8-LABEL: insertelement_v_v8i8_s_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v3, 8 -; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_mov_b32_e32 v5, s0 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 2, v2 +; GFX8-NEXT: v_mov_b32_e32 v4, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 2, v2 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_and_b32 s1, s2, s0 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX8-NEXT: v_lshlrev_b32_e64 v7, v2, s1 +; GFX8-NEXT: v_lshlrev_b32_e64 v8, v2, s1 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7 +; GFX8-NEXT: v_mov_b32_e32 v5, 8 +; GFX8-NEXT: v_mov_b32_e32 v6, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v9 -; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v12, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v13, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v9 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v12 -; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v11 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v13 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v10 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc ; GFX8-NEXT: v_and_b32_e32 v2, v3, v2 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v7 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v8 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v8, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_mov_b32_e32 v2, 0 -; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v6 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm @@ -3335,59 +3188,51 @@ define amdgpu_ps void @insertelement_v_v8i8_s_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v3 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v5, s3, v5 -; GFX7-NEXT: v_and_b32_e32 v8, s3, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v6, s3, v6 -; GFX7-NEXT: v_and_b32_e32 v9, s3, v9 -; GFX7-NEXT: v_and_b32_e32 v0, s3, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX7-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX7-NEXT: v_bfe_u32 v8, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v10, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v7, s3, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v9, s3, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 +; GFX7-NEXT: v_or_b32_e32 v7, v7, v8 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v8, v9, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX7-NEXT: v_or_b32_e32 v0, v7, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX7-NEXT: v_or_b32_e32 v1, v8, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc ; GFX7-NEXT: v_and_b32_e32 v2, v5, v2 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s3, v2 -; GFX7-NEXT: v_and_b32_e32 v5, s3, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX7-NEXT: v_and_b32_e32 v6, s3, v6 -; GFX7-NEXT: v_and_b32_e32 v0, s3, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s3, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v6, s3, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v5, v6, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -3396,51 +3241,49 @@ define amdgpu_ps void @insertelement_v_v8i8_s_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_mov_b32 s0, 8 ; GFX10-NEXT: v_and_b32_e32 v3, 3, v2 -; GFX10-NEXT: s_movk_i32 s1, 0xff +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_movk_i32 s3, 0xff ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 2, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v3 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v8, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v9, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v4 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v5 -; GFX10-NEXT: v_lshlrev_b32_e64 v4, v3, s1 -; GFX10-NEXT: s_and_b32 s0, s2, s1 -; GFX10-NEXT: v_or3_b32 v0, v0, v8, v6 -; GFX10-NEXT: v_or3_b32 v1, v1, v9, v7 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v8, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v9, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s3, v5 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v6 +; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v7 +; GFX10-NEXT: v_lshlrev_b32_e64 v6, v3, s3 +; GFX10-NEXT: s_and_b32 s0, s2, s3 +; GFX10-NEXT: v_or3_b32 v0, v0, v8, v4 ; GFX10-NEXT: v_lshlrev_b32_e64 v3, v3, s0 -; GFX10-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX10-NEXT: v_or3_b32 v1, v1, v9, v5 +; GFX10-NEXT: v_xor_b32_e32 v4, -1, v6 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc_lo ; GFX10-NEXT: v_and_or_b32 v3, v5, v4, v3 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v3, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX10-NEXT: v_mov_b32_e32 v3, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v7, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v4, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s3, v5 +; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: v_or3_b32 v1, v1, v3, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_or3_b32 v0, v0, v7, v4 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_or3_b32 v0, v0, v7, v5 -; GFX10-NEXT: v_or3_b32 v1, v1, v4, v6 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr @@ -3454,50 +3297,48 @@ define amdgpu_ps void @insertelement_v_v8i8_v_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_movk_i32 s3, 0xff -; GFX9-NEXT: s_lshr_b32 s1, s2, 2 +; GFX9-NEXT: s_mov_b32 s1, 16 +; GFX9-NEXT: s_lshr_b32 s4, s2, 2 ; GFX9-NEXT: s_and_b32 s2, s2, 3 +; GFX9-NEXT: s_movk_i32 s3, 0xff ; GFX9-NEXT: s_lshl_b32 s2, s2, 3 ; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: s_lshl_b32 s2, s3, s2 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 ; GFX9-NEXT: s_not_b32 s2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, 8 +; GFX9-NEXT: v_mov_b32_e32 v4, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v8, v0, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v9, v1, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v9, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v10, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v7 ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v4 -; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v6 +; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v9 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 ; GFX9-NEXT: v_or3_b32 v0, v0, v8, v5 -; GFX9-NEXT: v_or3_b32 v1, v1, v9, v7 -; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc -; GFX9-NEXT: v_and_or_b32 v2, v4, s2, v2 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_or3_b32 v1, v1, v10, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc +; GFX9-NEXT: v_and_or_b32 v2, v5, s2, v2 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v7, v0, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v8, v1, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v1, v1, s3, v3 -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v5 +; GFX9-NEXT: v_and_or_b32 v0, v0, s3, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NEXT: v_or3_b32 v0, v0, v7, v2 +; GFX9-NEXT: v_or3_b32 v1, v1, v4, v3 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_or3_b32 v0, v0, v7, v4 -; GFX9-NEXT: v_or3_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm @@ -3507,56 +3348,53 @@ define amdgpu_ps void @insertelement_v_v8i8_v_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] ; GFX8-NEXT: s_lshr_b32 s1, s2, 2 ; GFX8-NEXT: s_and_b32 s2, s2, 3 +; GFX8-NEXT: v_mov_b32_e32 v3, 8 ; GFX8-NEXT: s_lshl_b32 s2, s2, 3 -; GFX8-NEXT: v_mov_b32_e32 v6, s2 +; GFX8-NEXT: v_mov_b32_e32 v4, 16 +; GFX8-NEXT: v_mov_b32_e32 v7, s2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: s_movk_i32 s0, 0xff -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v3, 8 -; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_mov_b32_e32 v5, s0 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 ; GFX8-NEXT: s_not_b32 s0, s0 +; GFX8-NEXT: v_mov_b32_e32 v5, 8 +; GFX8-NEXT: v_mov_b32_e32 v6, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v10, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v11, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v7 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v10 -; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v9 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v11 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v8 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc ; GFX8-NEXT: v_and_b32_e32 v3, s0, v3 ; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v8, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_mov_b32_e32 v2, 0 -; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v6 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm @@ -3578,60 +3416,52 @@ define amdgpu_ps void @insertelement_v_v8i8_v_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX7-NEXT: s_not_b32 s1, s1 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX7-NEXT: v_and_b32_e32 v6, s3, v6 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v4, s3, v4 -; GFX7-NEXT: v_and_b32_e32 v7, s3, v7 -; GFX7-NEXT: v_and_b32_e32 v0, s3, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX7-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX7-NEXT: v_bfe_u32 v6, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v8, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v5, s3, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v7, s3, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 +; GFX7-NEXT: v_or_b32_e32 v5, v5, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v6, v7, v8 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v5, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_or_b32_e32 v1, v6, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc ; GFX7-NEXT: v_and_b32_e32 v3, s1, v3 ; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s3, v2 -; GFX7-NEXT: v_and_b32_e32 v5, s3, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX7-NEXT: v_and_b32_e32 v6, s3, v6 -; GFX7-NEXT: v_and_b32_e32 v0, s3, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_and_b32_e32 v1, s3, v1 +; GFX7-NEXT: v_bfe_u32 v5, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s3, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v6, s3, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v5, v6, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -3639,51 +3469,49 @@ define amdgpu_ps void @insertelement_v_v8i8_v_s(<8 x i8> addrspace(1)* %ptr, i8 ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_movk_i32 s1, 0xff +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_movk_i32 s3, 0xff ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v7, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v8, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v8, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s3, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX10-NEXT: s_lshr_b32 s1, s2, 2 ; GFX10-NEXT: s_and_b32 s0, s2, 3 -; GFX10-NEXT: s_lshr_b32 s2, s2, 2 +; GFX10-NEXT: v_or3_b32 v0, v0, v7, v3 +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s1, 1 +; GFX10-NEXT: v_or3_b32 v1, v1, v8, v4 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3 -; GFX10-NEXT: v_or3_b32 v0, v0, v7, v5 -; GFX10-NEXT: v_or3_b32 v1, v1, v8, v6 -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 ; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_lshl_b32 s0, s1, s0 -; GFX10-NEXT: s_not_b32 s0, s0 +; GFX10-NEXT: s_lshl_b32 s0, s3, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc_lo +; GFX10-NEXT: s_not_b32 s0, s0 ; GFX10-NEXT: v_and_or_b32 v2, v3, s0, v2 -; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s2, 0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s1, 0 +; GFX10-NEXT: v_mov_b32_e32 v3, 8 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v2, s0 -; GFX10-NEXT: v_mov_b32_e32 v2, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v7, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v4, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s3, v5 +; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_or3_b32 v0, v0, v7, v4 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_or3_b32 v0, v0, v7, v5 -; GFX10-NEXT: v_or3_b32 v1, v1, v4, v6 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr @@ -3697,51 +3525,49 @@ define amdgpu_ps void @insertelement_v_v8i8_v_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 -; GFX9-NEXT: s_movk_i32 s1, 0xff -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 2, v3 +; GFX9-NEXT: s_mov_b32 s1, 16 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 2, v3 ; GFX9-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX9-NEXT: s_movk_i32 s2, 0xff ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v3 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xff ; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, v3, v4 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 ; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 +; GFX9-NEXT: v_mov_b32_e32 v6, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v7, s0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v9, s0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v11, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v12, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v10, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, s2, v10 ; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v7 -; GFX9-NEXT: v_and_or_b32 v1, v1, s1, v9 -; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX9-NEXT: v_and_or_b32 v1, v1, s2, v12 +; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 ; GFX9-NEXT: v_or3_b32 v0, v0, v11, v8 -; GFX9-NEXT: v_or3_b32 v1, v1, v12, v10 -; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v1, vcc -; GFX9-NEXT: v_and_or_b32 v2, v7, v3, v2 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 +; GFX9-NEXT: v_or3_b32 v1, v1, v13, v9 +; GFX9-NEXT: v_cndmask_b32_e32 v8, v0, v1, vcc +; GFX9-NEXT: v_and_or_b32 v2, v8, v3, v2 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v7 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v5 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v2 -; GFX9-NEXT: v_and_b32_sdwa v9, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_or3_b32 v0, v0, v8, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v8, v2 +; GFX9-NEXT: v_or3_b32 v1, v1, v6, v3 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v5 -; GFX9-NEXT: v_or3_b32 v1, v1, v9, v6 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX9-NEXT: s_endpgm @@ -3749,58 +3575,54 @@ define amdgpu_ps void @insertelement_v_v8i8_v_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX8-LABEL: insertelement_v_v8i8_v_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff -; GFX8-NEXT: v_mov_b32_e32 v5, 8 -; GFX8-NEXT: v_mov_b32_e32 v6, 8 -; GFX8-NEXT: v_mov_b32_e32 v7, s0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 2, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 2, v3 ; GFX8-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX8-NEXT: v_mov_b32_e32 v5, 8 +; GFX8-NEXT: v_mov_b32_e32 v6, 16 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 3, v3 ; GFX8-NEXT: v_mov_b32_e32 v4, 0xff ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, v3, v4 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 ; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v8 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v9 +; GFX8-NEXT: v_mov_b32_e32 v7, 8 +; GFX8-NEXT: v_mov_b32_e32 v8, 16 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v10 -; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v6, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_and_b32_sdwa v13, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v13 -; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v11 -; GFX8-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc -; GFX8-NEXT: v_and_b32_e32 v3, v5, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v12 +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v10 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v6 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc +; GFX8-NEXT: v_and_b32_e32 v3, v4, v3 ; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GFX8-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX8-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX8-NEXT: v_mov_b32_e32 v2, 0 -; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v6 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm @@ -3823,60 +3645,52 @@ define amdgpu_ps void @insertelement_v_v8i8_v_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX7-NEXT: v_xor_b32_e32 v3, -1, v3 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 -; GFX7-NEXT: v_and_b32_e32 v9, s0, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v7, s0, v7 -; GFX7-NEXT: v_and_b32_e32 v10, s0, v10 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6 +; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v11, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v8, s0, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v10, s0, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 +; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v9, v10, v11 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX7-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v9, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX7-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v10 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v11 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 ; GFX7-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc ; GFX7-NEXT: v_and_b32_e32 v3, v6, v3 ; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v6, v6, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_and_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, v7, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX7-NEXT: v_bfe_u32 v6, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v5, v0, v4 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v4, v1, v4 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v5, v5, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v5, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v8 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -3885,51 +3699,49 @@ define amdgpu_ps void @insertelement_v_v8i8_v_v(<8 x i8> addrspace(1)* %ptr, i8 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_mov_b32 s0, 8 ; GFX10-NEXT: v_and_b32_e32 v4, 3, v3 -; GFX10-NEXT: s_movk_i32 s1, 0xff +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_movk_i32 s2, 0xff ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 2, v3 ; GFX10-NEXT: v_mov_b32_e32 v5, 0xff ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v4 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 ; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v10, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v11, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v6 -; GFX10-NEXT: v_and_or_b32 v1, v1, s1, v7 -; GFX10-NEXT: v_lshlrev_b32_e32 v6, v4, v5 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v9, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s2, v7 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v8 +; GFX10-NEXT: v_and_or_b32 v1, v1, s2, v9 +; GFX10-NEXT: v_lshlrev_b32_e32 v8, v4, v5 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v3 -; GFX10-NEXT: v_or3_b32 v0, v0, v10, v8 -; GFX10-NEXT: v_or3_b32 v1, v1, v11, v9 -; GFX10-NEXT: v_xor_b32_e32 v4, -1, v6 +; GFX10-NEXT: v_or3_b32 v0, v0, v10, v6 +; GFX10-NEXT: v_mov_b32_e32 v3, 8 +; GFX10-NEXT: v_or3_b32 v1, v1, v11, v7 +; GFX10-NEXT: v_xor_b32_e32 v4, -1, v8 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc_lo ; GFX10-NEXT: v_and_or_b32 v2, v6, v4, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v2, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v2, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_and_b32_sdwa v8, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v4, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX10-NEXT: v_and_or_b32 v0, v0, v5, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v5, v6 +; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v7 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: v_or3_b32 v1, v1, v2, v3 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_or3_b32 v0, v0, v8, v4 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_or3_b32 v0, v0, v8, v6 -; GFX10-NEXT: v_or3_b32 v1, v1, v4, v7 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr @@ -3942,57 +3754,51 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX9-LABEL: insertelement_s_v16i8_s_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s18, 0xff +; GFX9-NEXT: s_mov_b32 s12, 0x80008 +; GFX9-NEXT: s_movk_i32 s10, 0xff ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s6, s0, 8 -; GFX9-NEXT: s_and_b32 s6, s6, s18 -; GFX9-NEXT: s_lshr_b32 s7, s0, 16 -; GFX9-NEXT: s_lshr_b32 s8, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s6 -; GFX9-NEXT: s_and_b32 s6, s7, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s6 -; GFX9-NEXT: s_lshl_b32 s6, s8, 24 -; GFX9-NEXT: s_lshr_b32 s9, s1, 8 +; GFX9-NEXT: s_bfe_u32 s13, s0, s12 +; GFX9-NEXT: s_and_b32 s11, s0, s10 +; GFX9-NEXT: s_lshl_b32 s13, s13, 8 +; GFX9-NEXT: s_or_b32 s11, s11, s13 +; GFX9-NEXT: s_mov_b32 s13, 0x80010 +; GFX9-NEXT: s_lshr_b32 s6, s0, 24 +; GFX9-NEXT: s_bfe_u32 s0, s0, s13 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s11, s0 +; GFX9-NEXT: s_bfe_u32 s11, s1, s12 +; GFX9-NEXT: s_lshl_b32 s6, s6, 24 ; GFX9-NEXT: s_or_b32 s0, s0, s6 -; GFX9-NEXT: s_and_b32 s6, s9, s18 -; GFX9-NEXT: s_lshr_b32 s10, s1, 16 -; GFX9-NEXT: s_lshr_b32 s11, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s6 -; GFX9-NEXT: s_and_b32 s6, s10, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s6 -; GFX9-NEXT: s_lshl_b32 s6, s11, 24 -; GFX9-NEXT: s_lshr_b32 s12, s2, 8 +; GFX9-NEXT: s_lshr_b32 s7, s1, 24 +; GFX9-NEXT: s_and_b32 s6, s1, s10 +; GFX9-NEXT: s_bfe_u32 s1, s1, s13 +; GFX9-NEXT: s_lshl_b32 s11, s11, 8 +; GFX9-NEXT: s_or_b32 s6, s6, s11 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s1, s6, s1 +; GFX9-NEXT: s_lshl_b32 s6, s7, 24 +; GFX9-NEXT: s_bfe_u32 s7, s2, s12 ; GFX9-NEXT: s_or_b32 s1, s1, s6 -; GFX9-NEXT: s_and_b32 s6, s12, s18 -; GFX9-NEXT: s_lshr_b32 s13, s2, 16 -; GFX9-NEXT: s_lshr_b32 s14, s2, 24 -; GFX9-NEXT: s_and_b32 s2, s2, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 8 -; GFX9-NEXT: s_or_b32 s2, s2, s6 -; GFX9-NEXT: s_and_b32 s6, s13, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 16 -; GFX9-NEXT: s_or_b32 s2, s2, s6 -; GFX9-NEXT: s_lshl_b32 s6, s14, 24 -; GFX9-NEXT: s_lshr_b32 s15, s3, 8 +; GFX9-NEXT: s_lshr_b32 s8, s2, 24 +; GFX9-NEXT: s_and_b32 s6, s2, s10 +; GFX9-NEXT: s_bfe_u32 s2, s2, s13 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_or_b32 s6, s6, s7 +; GFX9-NEXT: s_lshl_b32 s2, s2, 16 +; GFX9-NEXT: s_bfe_u32 s7, s3, s12 +; GFX9-NEXT: s_or_b32 s2, s6, s2 +; GFX9-NEXT: s_lshl_b32 s6, s8, 24 ; GFX9-NEXT: s_or_b32 s2, s2, s6 -; GFX9-NEXT: s_and_b32 s6, s15, s18 -; GFX9-NEXT: s_lshr_b32 s16, s3, 16 -; GFX9-NEXT: s_lshr_b32 s17, s3, 24 -; GFX9-NEXT: s_and_b32 s3, s3, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 8 -; GFX9-NEXT: s_or_b32 s3, s3, s6 -; GFX9-NEXT: s_and_b32 s6, s16, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 16 -; GFX9-NEXT: s_or_b32 s3, s3, s6 -; GFX9-NEXT: s_lshl_b32 s6, s17, 24 +; GFX9-NEXT: s_lshr_b32 s9, s3, 24 +; GFX9-NEXT: s_and_b32 s6, s3, s10 +; GFX9-NEXT: s_bfe_u32 s3, s3, s13 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_or_b32 s6, s6, s7 +; GFX9-NEXT: s_lshl_b32 s3, s3, 16 +; GFX9-NEXT: s_or_b32 s3, s6, s3 +; GFX9-NEXT: s_lshl_b32 s6, s9, 24 ; GFX9-NEXT: s_or_b32 s3, s3, s6 ; GFX9-NEXT: s_lshr_b32 s6, s5, 2 ; GFX9-NEXT: s_cmp_eq_u32 s6, 1 @@ -4003,9 +3809,9 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX9-NEXT: s_cselect_b32 s7, s3, s7 ; GFX9-NEXT: s_and_b32 s5, s5, 3 ; GFX9-NEXT: s_lshl_b32 s5, s5, 3 -; GFX9-NEXT: s_and_b32 s4, s4, s18 +; GFX9-NEXT: s_and_b32 s4, s4, s10 ; GFX9-NEXT: s_lshl_b32 s4, s4, s5 -; GFX9-NEXT: s_lshl_b32 s5, s18, s5 +; GFX9-NEXT: s_lshl_b32 s5, s10, s5 ; GFX9-NEXT: s_andn2_b32 s5, s7, s5 ; GFX9-NEXT: s_or_b32 s4, s5, s4 ; GFX9-NEXT: s_cmp_eq_u32 s6, 0 @@ -4016,53 +3822,45 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX9-NEXT: s_cselect_b32 s2, s4, s2 ; GFX9-NEXT: s_cmp_eq_u32 s6, 3 ; GFX9-NEXT: s_cselect_b32 s3, s4, s3 -; GFX9-NEXT: s_lshr_b32 s4, s0, 8 -; GFX9-NEXT: s_and_b32 s4, s4, s18 -; GFX9-NEXT: s_lshr_b32 s5, s0, 16 -; GFX9-NEXT: s_lshr_b32 s6, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s18 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s4 -; GFX9-NEXT: s_and_b32 s4, s5, s18 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s4 -; GFX9-NEXT: s_lshl_b32 s4, s6, 24 -; GFX9-NEXT: s_lshr_b32 s7, s1, 8 +; GFX9-NEXT: s_bfe_u32 s9, s0, s12 +; GFX9-NEXT: s_lshr_b32 s4, s0, 24 +; GFX9-NEXT: s_and_b32 s8, s0, s10 +; GFX9-NEXT: s_bfe_u32 s0, s0, s13 +; GFX9-NEXT: s_lshl_b32 s9, s9, 8 +; GFX9-NEXT: s_or_b32 s8, s8, s9 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s8, s0 +; GFX9-NEXT: s_bfe_u32 s8, s1, s12 +; GFX9-NEXT: s_lshl_b32 s4, s4, 24 ; GFX9-NEXT: s_or_b32 s0, s0, s4 -; GFX9-NEXT: s_and_b32 s4, s7, s18 -; GFX9-NEXT: s_lshr_b32 s8, s1, 16 -; GFX9-NEXT: s_lshr_b32 s9, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s18 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s4 -; GFX9-NEXT: s_and_b32 s4, s8, s18 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s4 -; GFX9-NEXT: s_lshl_b32 s4, s9, 24 -; GFX9-NEXT: s_lshr_b32 s10, s2, 8 +; GFX9-NEXT: s_lshr_b32 s5, s1, 24 +; GFX9-NEXT: s_and_b32 s4, s1, s10 +; GFX9-NEXT: s_bfe_u32 s1, s1, s13 +; GFX9-NEXT: s_lshl_b32 s8, s8, 8 +; GFX9-NEXT: s_or_b32 s4, s4, s8 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s1, s4, s1 +; GFX9-NEXT: s_lshl_b32 s4, s5, 24 +; GFX9-NEXT: s_bfe_u32 s5, s2, s12 ; GFX9-NEXT: s_or_b32 s1, s1, s4 -; GFX9-NEXT: s_and_b32 s4, s10, s18 -; GFX9-NEXT: s_lshr_b32 s11, s2, 16 -; GFX9-NEXT: s_lshr_b32 s12, s2, 24 -; GFX9-NEXT: s_and_b32 s2, s2, s18 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_or_b32 s2, s2, s4 -; GFX9-NEXT: s_and_b32 s4, s11, s18 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-NEXT: s_or_b32 s2, s2, s4 -; GFX9-NEXT: s_lshl_b32 s4, s12, 24 -; GFX9-NEXT: s_lshr_b32 s13, s3, 8 +; GFX9-NEXT: s_lshr_b32 s6, s2, 24 +; GFX9-NEXT: s_and_b32 s4, s2, s10 +; GFX9-NEXT: s_bfe_u32 s2, s2, s13 +; GFX9-NEXT: s_lshl_b32 s5, s5, 8 +; GFX9-NEXT: s_or_b32 s4, s4, s5 +; GFX9-NEXT: s_lshl_b32 s2, s2, 16 +; GFX9-NEXT: s_bfe_u32 s5, s3, s12 +; GFX9-NEXT: s_or_b32 s2, s4, s2 +; GFX9-NEXT: s_lshl_b32 s4, s6, 24 ; GFX9-NEXT: s_or_b32 s2, s2, s4 -; GFX9-NEXT: s_and_b32 s4, s13, s18 -; GFX9-NEXT: s_lshr_b32 s14, s3, 16 -; GFX9-NEXT: s_lshr_b32 s15, s3, 24 -; GFX9-NEXT: s_and_b32 s3, s3, s18 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_or_b32 s3, s3, s4 -; GFX9-NEXT: s_and_b32 s4, s14, s18 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-NEXT: s_or_b32 s3, s3, s4 -; GFX9-NEXT: s_lshl_b32 s4, s15, 24 +; GFX9-NEXT: s_lshr_b32 s7, s3, 24 +; GFX9-NEXT: s_and_b32 s4, s3, s10 +; GFX9-NEXT: s_bfe_u32 s3, s3, s13 +; GFX9-NEXT: s_lshl_b32 s5, s5, 8 +; GFX9-NEXT: s_or_b32 s4, s4, s5 +; GFX9-NEXT: s_lshl_b32 s3, s3, 16 +; GFX9-NEXT: s_or_b32 s3, s4, s3 +; GFX9-NEXT: s_lshl_b32 s4, s7, 24 ; GFX9-NEXT: s_or_b32 s3, s3, s4 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 @@ -4074,57 +3872,51 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX8-LABEL: insertelement_s_v16i8_s_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s18, 0xff +; GFX8-NEXT: s_mov_b32 s12, 0x80008 +; GFX8-NEXT: s_movk_i32 s10, 0xff ; GFX8-NEXT: v_mov_b32_e32 v4, 0 ; GFX8-NEXT: v_mov_b32_e32 v5, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s6, s0, 8 -; GFX8-NEXT: s_and_b32 s6, s6, s18 -; GFX8-NEXT: s_lshr_b32 s7, s0, 16 -; GFX8-NEXT: s_lshr_b32 s8, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s18 -; GFX8-NEXT: s_lshl_b32 s6, s6, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s6 -; GFX8-NEXT: s_and_b32 s6, s7, s18 -; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s6 -; GFX8-NEXT: s_lshl_b32 s6, s8, 24 -; GFX8-NEXT: s_lshr_b32 s9, s1, 8 +; GFX8-NEXT: s_bfe_u32 s13, s0, s12 +; GFX8-NEXT: s_and_b32 s11, s0, s10 +; GFX8-NEXT: s_lshl_b32 s13, s13, 8 +; GFX8-NEXT: s_or_b32 s11, s11, s13 +; GFX8-NEXT: s_mov_b32 s13, 0x80010 +; GFX8-NEXT: s_lshr_b32 s6, s0, 24 +; GFX8-NEXT: s_bfe_u32 s0, s0, s13 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s11, s0 +; GFX8-NEXT: s_bfe_u32 s11, s1, s12 +; GFX8-NEXT: s_lshl_b32 s6, s6, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s6 -; GFX8-NEXT: s_and_b32 s6, s9, s18 -; GFX8-NEXT: s_lshr_b32 s10, s1, 16 -; GFX8-NEXT: s_lshr_b32 s11, s1, 24 -; GFX8-NEXT: s_and_b32 s1, s1, s18 -; GFX8-NEXT: s_lshl_b32 s6, s6, 8 -; GFX8-NEXT: s_or_b32 s1, s1, s6 -; GFX8-NEXT: s_and_b32 s6, s10, s18 -; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: s_or_b32 s1, s1, s6 -; GFX8-NEXT: s_lshl_b32 s6, s11, 24 -; GFX8-NEXT: s_lshr_b32 s12, s2, 8 +; GFX8-NEXT: s_lshr_b32 s7, s1, 24 +; GFX8-NEXT: s_and_b32 s6, s1, s10 +; GFX8-NEXT: s_bfe_u32 s1, s1, s13 +; GFX8-NEXT: s_lshl_b32 s11, s11, 8 +; GFX8-NEXT: s_or_b32 s6, s6, s11 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s1, s6, s1 +; GFX8-NEXT: s_lshl_b32 s6, s7, 24 +; GFX8-NEXT: s_bfe_u32 s7, s2, s12 ; GFX8-NEXT: s_or_b32 s1, s1, s6 -; GFX8-NEXT: s_and_b32 s6, s12, s18 -; GFX8-NEXT: s_lshr_b32 s13, s2, 16 -; GFX8-NEXT: s_lshr_b32 s14, s2, 24 -; GFX8-NEXT: s_and_b32 s2, s2, s18 -; GFX8-NEXT: s_lshl_b32 s6, s6, 8 -; GFX8-NEXT: s_or_b32 s2, s2, s6 -; GFX8-NEXT: s_and_b32 s6, s13, s18 -; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: s_or_b32 s2, s2, s6 -; GFX8-NEXT: s_lshl_b32 s6, s14, 24 -; GFX8-NEXT: s_lshr_b32 s15, s3, 8 +; GFX8-NEXT: s_lshr_b32 s8, s2, 24 +; GFX8-NEXT: s_and_b32 s6, s2, s10 +; GFX8-NEXT: s_bfe_u32 s2, s2, s13 +; GFX8-NEXT: s_lshl_b32 s7, s7, 8 +; GFX8-NEXT: s_or_b32 s6, s6, s7 +; GFX8-NEXT: s_lshl_b32 s2, s2, 16 +; GFX8-NEXT: s_bfe_u32 s7, s3, s12 +; GFX8-NEXT: s_or_b32 s2, s6, s2 +; GFX8-NEXT: s_lshl_b32 s6, s8, 24 ; GFX8-NEXT: s_or_b32 s2, s2, s6 -; GFX8-NEXT: s_and_b32 s6, s15, s18 -; GFX8-NEXT: s_lshr_b32 s16, s3, 16 -; GFX8-NEXT: s_lshr_b32 s17, s3, 24 -; GFX8-NEXT: s_and_b32 s3, s3, s18 -; GFX8-NEXT: s_lshl_b32 s6, s6, 8 -; GFX8-NEXT: s_or_b32 s3, s3, s6 -; GFX8-NEXT: s_and_b32 s6, s16, s18 -; GFX8-NEXT: s_lshl_b32 s6, s6, 16 -; GFX8-NEXT: s_or_b32 s3, s3, s6 -; GFX8-NEXT: s_lshl_b32 s6, s17, 24 +; GFX8-NEXT: s_lshr_b32 s9, s3, 24 +; GFX8-NEXT: s_and_b32 s6, s3, s10 +; GFX8-NEXT: s_bfe_u32 s3, s3, s13 +; GFX8-NEXT: s_lshl_b32 s7, s7, 8 +; GFX8-NEXT: s_or_b32 s6, s6, s7 +; GFX8-NEXT: s_lshl_b32 s3, s3, 16 +; GFX8-NEXT: s_or_b32 s3, s6, s3 +; GFX8-NEXT: s_lshl_b32 s6, s9, 24 ; GFX8-NEXT: s_or_b32 s3, s3, s6 ; GFX8-NEXT: s_lshr_b32 s6, s5, 2 ; GFX8-NEXT: s_cmp_eq_u32 s6, 1 @@ -4135,9 +3927,9 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX8-NEXT: s_cselect_b32 s7, s3, s7 ; GFX8-NEXT: s_and_b32 s5, s5, 3 ; GFX8-NEXT: s_lshl_b32 s5, s5, 3 -; GFX8-NEXT: s_and_b32 s4, s4, s18 +; GFX8-NEXT: s_and_b32 s4, s4, s10 ; GFX8-NEXT: s_lshl_b32 s4, s4, s5 -; GFX8-NEXT: s_lshl_b32 s5, s18, s5 +; GFX8-NEXT: s_lshl_b32 s5, s10, s5 ; GFX8-NEXT: s_andn2_b32 s5, s7, s5 ; GFX8-NEXT: s_or_b32 s4, s5, s4 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0 @@ -4148,53 +3940,45 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX8-NEXT: s_cselect_b32 s2, s4, s2 ; GFX8-NEXT: s_cmp_eq_u32 s6, 3 ; GFX8-NEXT: s_cselect_b32 s3, s4, s3 -; GFX8-NEXT: s_lshr_b32 s4, s0, 8 -; GFX8-NEXT: s_and_b32 s4, s4, s18 -; GFX8-NEXT: s_lshr_b32 s5, s0, 16 -; GFX8-NEXT: s_lshr_b32 s6, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s18 -; GFX8-NEXT: s_lshl_b32 s4, s4, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s4 -; GFX8-NEXT: s_and_b32 s4, s5, s18 -; GFX8-NEXT: s_lshl_b32 s4, s4, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s4 -; GFX8-NEXT: s_lshl_b32 s4, s6, 24 -; GFX8-NEXT: s_lshr_b32 s7, s1, 8 +; GFX8-NEXT: s_bfe_u32 s9, s0, s12 +; GFX8-NEXT: s_lshr_b32 s4, s0, 24 +; GFX8-NEXT: s_and_b32 s8, s0, s10 +; GFX8-NEXT: s_bfe_u32 s0, s0, s13 +; GFX8-NEXT: s_lshl_b32 s9, s9, 8 +; GFX8-NEXT: s_or_b32 s8, s8, s9 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s8, s0 +; GFX8-NEXT: s_bfe_u32 s8, s1, s12 +; GFX8-NEXT: s_lshl_b32 s4, s4, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s4 -; GFX8-NEXT: s_and_b32 s4, s7, s18 -; GFX8-NEXT: s_lshr_b32 s8, s1, 16 -; GFX8-NEXT: s_lshr_b32 s9, s1, 24 -; GFX8-NEXT: s_and_b32 s1, s1, s18 -; GFX8-NEXT: s_lshl_b32 s4, s4, 8 -; GFX8-NEXT: s_or_b32 s1, s1, s4 -; GFX8-NEXT: s_and_b32 s4, s8, s18 -; GFX8-NEXT: s_lshl_b32 s4, s4, 16 -; GFX8-NEXT: s_or_b32 s1, s1, s4 -; GFX8-NEXT: s_lshl_b32 s4, s9, 24 -; GFX8-NEXT: s_lshr_b32 s10, s2, 8 +; GFX8-NEXT: s_lshr_b32 s5, s1, 24 +; GFX8-NEXT: s_and_b32 s4, s1, s10 +; GFX8-NEXT: s_bfe_u32 s1, s1, s13 +; GFX8-NEXT: s_lshl_b32 s8, s8, 8 +; GFX8-NEXT: s_or_b32 s4, s4, s8 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s1, s4, s1 +; GFX8-NEXT: s_lshl_b32 s4, s5, 24 +; GFX8-NEXT: s_bfe_u32 s5, s2, s12 ; GFX8-NEXT: s_or_b32 s1, s1, s4 -; GFX8-NEXT: s_and_b32 s4, s10, s18 -; GFX8-NEXT: s_lshr_b32 s11, s2, 16 -; GFX8-NEXT: s_lshr_b32 s12, s2, 24 -; GFX8-NEXT: s_and_b32 s2, s2, s18 -; GFX8-NEXT: s_lshl_b32 s4, s4, 8 -; GFX8-NEXT: s_or_b32 s2, s2, s4 -; GFX8-NEXT: s_and_b32 s4, s11, s18 -; GFX8-NEXT: s_lshl_b32 s4, s4, 16 -; GFX8-NEXT: s_or_b32 s2, s2, s4 -; GFX8-NEXT: s_lshl_b32 s4, s12, 24 -; GFX8-NEXT: s_lshr_b32 s13, s3, 8 +; GFX8-NEXT: s_lshr_b32 s6, s2, 24 +; GFX8-NEXT: s_and_b32 s4, s2, s10 +; GFX8-NEXT: s_bfe_u32 s2, s2, s13 +; GFX8-NEXT: s_lshl_b32 s5, s5, 8 +; GFX8-NEXT: s_or_b32 s4, s4, s5 +; GFX8-NEXT: s_lshl_b32 s2, s2, 16 +; GFX8-NEXT: s_bfe_u32 s5, s3, s12 +; GFX8-NEXT: s_or_b32 s2, s4, s2 +; GFX8-NEXT: s_lshl_b32 s4, s6, 24 ; GFX8-NEXT: s_or_b32 s2, s2, s4 -; GFX8-NEXT: s_and_b32 s4, s13, s18 -; GFX8-NEXT: s_lshr_b32 s14, s3, 16 -; GFX8-NEXT: s_lshr_b32 s15, s3, 24 -; GFX8-NEXT: s_and_b32 s3, s3, s18 -; GFX8-NEXT: s_lshl_b32 s4, s4, 8 -; GFX8-NEXT: s_or_b32 s3, s3, s4 -; GFX8-NEXT: s_and_b32 s4, s14, s18 -; GFX8-NEXT: s_lshl_b32 s4, s4, 16 -; GFX8-NEXT: s_or_b32 s3, s3, s4 -; GFX8-NEXT: s_lshl_b32 s4, s15, 24 +; GFX8-NEXT: s_lshr_b32 s7, s3, 24 +; GFX8-NEXT: s_and_b32 s4, s3, s10 +; GFX8-NEXT: s_bfe_u32 s3, s3, s13 +; GFX8-NEXT: s_lshl_b32 s5, s5, 8 +; GFX8-NEXT: s_or_b32 s4, s4, s5 +; GFX8-NEXT: s_lshl_b32 s3, s3, 16 +; GFX8-NEXT: s_or_b32 s3, s4, s3 +; GFX8-NEXT: s_lshl_b32 s4, s7, 24 ; GFX8-NEXT: s_or_b32 s3, s3, s4 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 @@ -4206,55 +3990,49 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX7-LABEL: insertelement_s_v16i8_s_s: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s18, 0xff +; GFX7-NEXT: s_mov_b32 s12, 0x80008 +; GFX7-NEXT: s_movk_i32 s10, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s6, s0, 8 -; GFX7-NEXT: s_and_b32 s6, s6, s18 -; GFX7-NEXT: s_lshr_b32 s7, s0, 16 -; GFX7-NEXT: s_lshr_b32 s8, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s6 -; GFX7-NEXT: s_and_b32 s6, s7, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s6 -; GFX7-NEXT: s_lshl_b32 s6, s8, 24 -; GFX7-NEXT: s_lshr_b32 s9, s1, 8 +; GFX7-NEXT: s_bfe_u32 s13, s0, s12 +; GFX7-NEXT: s_and_b32 s11, s0, s10 +; GFX7-NEXT: s_lshl_b32 s13, s13, 8 +; GFX7-NEXT: s_or_b32 s11, s11, s13 +; GFX7-NEXT: s_mov_b32 s13, 0x80010 +; GFX7-NEXT: s_lshr_b32 s6, s0, 24 +; GFX7-NEXT: s_bfe_u32 s0, s0, s13 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s11, s0 +; GFX7-NEXT: s_bfe_u32 s11, s1, s12 +; GFX7-NEXT: s_lshl_b32 s6, s6, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s6 -; GFX7-NEXT: s_and_b32 s6, s9, s18 -; GFX7-NEXT: s_lshr_b32 s10, s1, 16 -; GFX7-NEXT: s_lshr_b32 s11, s1, 24 -; GFX7-NEXT: s_and_b32 s1, s1, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 8 +; GFX7-NEXT: s_lshr_b32 s7, s1, 24 +; GFX7-NEXT: s_and_b32 s6, s1, s10 +; GFX7-NEXT: s_bfe_u32 s1, s1, s13 +; GFX7-NEXT: s_lshl_b32 s11, s11, 8 +; GFX7-NEXT: s_or_b32 s6, s6, s11 +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s1, s6, s1 +; GFX7-NEXT: s_lshl_b32 s6, s7, 24 +; GFX7-NEXT: s_bfe_u32 s7, s2, s12 ; GFX7-NEXT: s_or_b32 s1, s1, s6 -; GFX7-NEXT: s_and_b32 s6, s10, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 16 -; GFX7-NEXT: s_or_b32 s1, s1, s6 -; GFX7-NEXT: s_lshl_b32 s6, s11, 24 -; GFX7-NEXT: s_lshr_b32 s12, s2, 8 -; GFX7-NEXT: s_or_b32 s1, s1, s6 -; GFX7-NEXT: s_and_b32 s6, s12, s18 -; GFX7-NEXT: s_lshr_b32 s13, s2, 16 -; GFX7-NEXT: s_lshr_b32 s14, s2, 24 -; GFX7-NEXT: s_and_b32 s2, s2, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 8 -; GFX7-NEXT: s_or_b32 s2, s2, s6 -; GFX7-NEXT: s_and_b32 s6, s13, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 16 -; GFX7-NEXT: s_or_b32 s2, s2, s6 -; GFX7-NEXT: s_lshl_b32 s6, s14, 24 -; GFX7-NEXT: s_lshr_b32 s15, s3, 8 +; GFX7-NEXT: s_lshr_b32 s8, s2, 24 +; GFX7-NEXT: s_and_b32 s6, s2, s10 +; GFX7-NEXT: s_bfe_u32 s2, s2, s13 +; GFX7-NEXT: s_lshl_b32 s7, s7, 8 +; GFX7-NEXT: s_or_b32 s6, s6, s7 +; GFX7-NEXT: s_lshl_b32 s2, s2, 16 +; GFX7-NEXT: s_bfe_u32 s7, s3, s12 +; GFX7-NEXT: s_or_b32 s2, s6, s2 +; GFX7-NEXT: s_lshl_b32 s6, s8, 24 ; GFX7-NEXT: s_or_b32 s2, s2, s6 -; GFX7-NEXT: s_and_b32 s6, s15, s18 -; GFX7-NEXT: s_lshr_b32 s16, s3, 16 -; GFX7-NEXT: s_lshr_b32 s17, s3, 24 -; GFX7-NEXT: s_and_b32 s3, s3, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 8 -; GFX7-NEXT: s_or_b32 s3, s3, s6 -; GFX7-NEXT: s_and_b32 s6, s16, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 16 -; GFX7-NEXT: s_or_b32 s3, s3, s6 -; GFX7-NEXT: s_lshl_b32 s6, s17, 24 +; GFX7-NEXT: s_lshr_b32 s9, s3, 24 +; GFX7-NEXT: s_and_b32 s6, s3, s10 +; GFX7-NEXT: s_bfe_u32 s3, s3, s13 +; GFX7-NEXT: s_lshl_b32 s7, s7, 8 +; GFX7-NEXT: s_or_b32 s6, s6, s7 +; GFX7-NEXT: s_lshl_b32 s3, s3, 16 +; GFX7-NEXT: s_or_b32 s3, s6, s3 +; GFX7-NEXT: s_lshl_b32 s6, s9, 24 ; GFX7-NEXT: s_or_b32 s3, s3, s6 ; GFX7-NEXT: s_lshr_b32 s6, s5, 2 ; GFX7-NEXT: s_cmp_eq_u32 s6, 1 @@ -4265,9 +4043,9 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX7-NEXT: s_cselect_b32 s7, s3, s7 ; GFX7-NEXT: s_and_b32 s5, s5, 3 ; GFX7-NEXT: s_lshl_b32 s5, s5, 3 -; GFX7-NEXT: s_and_b32 s4, s4, s18 +; GFX7-NEXT: s_and_b32 s4, s4, s10 ; GFX7-NEXT: s_lshl_b32 s4, s4, s5 -; GFX7-NEXT: s_lshl_b32 s5, s18, s5 +; GFX7-NEXT: s_lshl_b32 s5, s10, s5 ; GFX7-NEXT: s_andn2_b32 s5, s7, s5 ; GFX7-NEXT: s_or_b32 s4, s5, s4 ; GFX7-NEXT: s_cmp_eq_u32 s6, 0 @@ -4278,53 +4056,45 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX7-NEXT: s_cselect_b32 s2, s4, s2 ; GFX7-NEXT: s_cmp_eq_u32 s6, 3 ; GFX7-NEXT: s_cselect_b32 s3, s4, s3 -; GFX7-NEXT: s_lshr_b32 s4, s5, 8 -; GFX7-NEXT: s_and_b32 s4, s4, s18 -; GFX7-NEXT: s_lshr_b32 s6, s5, 16 -; GFX7-NEXT: s_lshr_b32 s8, s5, 24 -; GFX7-NEXT: s_and_b32 s5, s5, s18 -; GFX7-NEXT: s_lshl_b32 s4, s4, 8 -; GFX7-NEXT: s_or_b32 s4, s5, s4 -; GFX7-NEXT: s_and_b32 s5, s6, s18 -; GFX7-NEXT: s_lshr_b32 s9, s7, 8 +; GFX7-NEXT: s_bfe_u32 s14, s5, s12 +; GFX7-NEXT: s_lshr_b32 s4, s5, 24 +; GFX7-NEXT: s_and_b32 s11, s5, s10 +; GFX7-NEXT: s_bfe_u32 s5, s5, s13 +; GFX7-NEXT: s_lshl_b32 s14, s14, 8 +; GFX7-NEXT: s_or_b32 s11, s11, s14 ; GFX7-NEXT: s_lshl_b32 s5, s5, 16 -; GFX7-NEXT: s_and_b32 s6, s9, s18 -; GFX7-NEXT: s_or_b32 s4, s4, s5 -; GFX7-NEXT: s_lshl_b32 s5, s8, 24 -; GFX7-NEXT: s_or_b32 s4, s4, s5 -; GFX7-NEXT: s_lshr_b32 s10, s7, 16 -; GFX7-NEXT: s_and_b32 s5, s7, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 8 -; GFX7-NEXT: s_or_b32 s5, s5, s6 -; GFX7-NEXT: s_and_b32 s6, s10, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 16 -; GFX7-NEXT: s_lshr_b32 s11, s7, 24 -; GFX7-NEXT: s_or_b32 s5, s5, s6 -; GFX7-NEXT: s_lshl_b32 s6, s11, 24 -; GFX7-NEXT: s_lshr_b32 s12, s2, 8 +; GFX7-NEXT: s_or_b32 s5, s11, s5 +; GFX7-NEXT: s_lshl_b32 s4, s4, 24 +; GFX7-NEXT: s_bfe_u32 s11, s7, s12 +; GFX7-NEXT: s_lshr_b32 s6, s7, 24 +; GFX7-NEXT: s_or_b32 s4, s5, s4 +; GFX7-NEXT: s_and_b32 s5, s7, s10 +; GFX7-NEXT: s_bfe_u32 s7, s7, s13 +; GFX7-NEXT: s_lshl_b32 s11, s11, 8 +; GFX7-NEXT: s_or_b32 s5, s5, s11 +; GFX7-NEXT: s_lshl_b32 s7, s7, 16 +; GFX7-NEXT: s_or_b32 s5, s5, s7 +; GFX7-NEXT: s_bfe_u32 s7, s2, s12 +; GFX7-NEXT: s_lshl_b32 s6, s6, 24 ; GFX7-NEXT: s_or_b32 s5, s5, s6 -; GFX7-NEXT: s_and_b32 s6, s12, s18 -; GFX7-NEXT: s_lshr_b32 s13, s2, 16 -; GFX7-NEXT: s_lshr_b32 s14, s2, 24 -; GFX7-NEXT: s_and_b32 s2, s2, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 8 -; GFX7-NEXT: s_or_b32 s2, s2, s6 -; GFX7-NEXT: s_and_b32 s6, s13, s18 -; GFX7-NEXT: s_lshl_b32 s6, s6, 16 -; GFX7-NEXT: s_or_b32 s2, s2, s6 -; GFX7-NEXT: s_lshl_b32 s6, s14, 24 -; GFX7-NEXT: s_lshr_b32 s15, s3, 8 +; GFX7-NEXT: s_lshr_b32 s8, s2, 24 +; GFX7-NEXT: s_and_b32 s6, s2, s10 +; GFX7-NEXT: s_bfe_u32 s2, s2, s13 +; GFX7-NEXT: s_lshl_b32 s7, s7, 8 +; GFX7-NEXT: s_or_b32 s6, s6, s7 +; GFX7-NEXT: s_lshl_b32 s2, s2, 16 +; GFX7-NEXT: s_bfe_u32 s7, s3, s12 +; GFX7-NEXT: s_or_b32 s2, s6, s2 +; GFX7-NEXT: s_lshl_b32 s6, s8, 24 ; GFX7-NEXT: s_or_b32 s6, s2, s6 -; GFX7-NEXT: s_lshr_b32 s16, s3, 16 -; GFX7-NEXT: s_lshr_b32 s17, s3, 24 -; GFX7-NEXT: s_and_b32 s2, s3, s18 -; GFX7-NEXT: s_and_b32 s3, s15, s18 -; GFX7-NEXT: s_lshl_b32 s3, s3, 8 -; GFX7-NEXT: s_or_b32 s2, s2, s3 -; GFX7-NEXT: s_and_b32 s3, s16, s18 +; GFX7-NEXT: s_lshr_b32 s9, s3, 24 +; GFX7-NEXT: s_and_b32 s2, s3, s10 +; GFX7-NEXT: s_bfe_u32 s3, s3, s13 +; GFX7-NEXT: s_lshl_b32 s7, s7, 8 +; GFX7-NEXT: s_or_b32 s2, s2, s7 ; GFX7-NEXT: s_lshl_b32 s3, s3, 16 ; GFX7-NEXT: s_or_b32 s2, s2, s3 -; GFX7-NEXT: s_lshl_b32 s3, s17, 24 +; GFX7-NEXT: s_lshl_b32 s3, s9, 24 ; GFX7-NEXT: s_or_b32 s7, s2, s3 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 @@ -4339,127 +4109,113 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg % ; GFX10-LABEL: insertelement_s_v16i8_s_s: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s7, 0x80008 ; GFX10-NEXT: s_movk_i32 s6, 0xff +; GFX10-NEXT: s_mov_b32 s8, 0x80010 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s7, s0, 8 -; GFX10-NEXT: s_lshr_b32 s8, s0, 16 -; GFX10-NEXT: s_and_b32 s7, s7, s6 +; GFX10-NEXT: s_bfe_u32 s14, s0, s7 ; GFX10-NEXT: s_lshr_b32 s9, s0, 24 -; GFX10-NEXT: s_and_b32 s8, s8, s6 -; GFX10-NEXT: s_and_b32 s0, s0, s6 -; GFX10-NEXT: s_lshl_b32 s7, s7, 8 -; GFX10-NEXT: s_lshr_b32 s13, s2, 8 -; GFX10-NEXT: s_lshl_b32 s8, s8, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s7 -; GFX10-NEXT: s_lshr_b32 s10, s1, 8 -; GFX10-NEXT: s_or_b32 s0, s0, s8 -; GFX10-NEXT: s_and_b32 s8, s13, s6 +; GFX10-NEXT: s_bfe_u32 s16, s1, s7 +; GFX10-NEXT: s_and_b32 s13, s0, s6 +; GFX10-NEXT: s_bfe_u32 s0, s0, s8 +; GFX10-NEXT: s_lshl_b32 s14, s14, 8 +; GFX10-NEXT: s_lshr_b32 s10, s1, 24 +; GFX10-NEXT: s_and_b32 s15, s1, s6 +; GFX10-NEXT: s_bfe_u32 s1, s1, s8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s13, s13, s14 +; GFX10-NEXT: s_lshl_b32 s16, s16, 8 +; GFX10-NEXT: s_bfe_u32 s18, s2, s7 ; GFX10-NEXT: s_lshl_b32 s9, s9, 24 -; GFX10-NEXT: s_lshr_b32 s14, s2, 16 -; GFX10-NEXT: s_lshr_b32 s11, s1, 16 -; GFX10-NEXT: s_and_b32 s10, s10, s6 -; GFX10-NEXT: s_lshr_b32 s15, s2, 24 +; GFX10-NEXT: s_or_b32 s0, s13, s0 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s14, s15, s16 ; GFX10-NEXT: s_or_b32 s0, s0, s9 -; GFX10-NEXT: s_and_b32 s2, s2, s6 -; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_and_b32 s9, s14, s6 -; GFX10-NEXT: s_lshr_b32 s12, s1, 24 -; GFX10-NEXT: s_and_b32 s11, s11, s6 -; GFX10-NEXT: s_or_b32 s2, s2, s8 -; GFX10-NEXT: s_lshl_b32 s8, s9, 16 -; GFX10-NEXT: s_lshr_b32 s16, s3, 8 -; GFX10-NEXT: s_and_b32 s1, s1, s6 -; GFX10-NEXT: s_lshl_b32 s10, s10, 8 -; GFX10-NEXT: s_lshr_b32 s17, s3, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s8 -; GFX10-NEXT: s_and_b32 s8, s16, s6 -; GFX10-NEXT: s_lshl_b32 s7, s11, 16 +; GFX10-NEXT: s_lshr_b32 s11, s2, 24 +; GFX10-NEXT: s_and_b32 s17, s2, s6 +; GFX10-NEXT: s_lshl_b32 s9, s18, 8 +; GFX10-NEXT: s_bfe_u32 s2, s2, s8 +; GFX10-NEXT: s_lshl_b32 s10, s10, 24 +; GFX10-NEXT: s_or_b32 s1, s14, s1 +; GFX10-NEXT: s_or_b32 s9, s17, s9 +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 ; GFX10-NEXT: s_or_b32 s1, s1, s10 -; GFX10-NEXT: s_lshr_b32 s18, s3, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: s_lshl_b32 s7, s12, 24 -; GFX10-NEXT: s_and_b32 s3, s3, s6 -; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_and_b32 s9, s17, s6 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: s_lshl_b32 s7, s15, 24 -; GFX10-NEXT: s_or_b32 s3, s3, s8 -; GFX10-NEXT: s_lshl_b32 s8, s9, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s7 -; GFX10-NEXT: s_or_b32 s3, s3, s8 -; GFX10-NEXT: s_lshl_b32 s7, s18, 24 -; GFX10-NEXT: s_lshr_b32 s8, s5, 2 -; GFX10-NEXT: s_or_b32 s3, s3, s7 -; GFX10-NEXT: s_cmp_eq_u32 s8, 1 -; GFX10-NEXT: s_cselect_b32 s7, s1, s0 -; GFX10-NEXT: s_cmp_eq_u32 s8, 2 -; GFX10-NEXT: s_cselect_b32 s7, s2, s7 -; GFX10-NEXT: s_cmp_eq_u32 s8, 3 -; GFX10-NEXT: s_cselect_b32 s7, s3, s7 +; GFX10-NEXT: s_bfe_u32 s10, s3, s7 +; GFX10-NEXT: s_or_b32 s2, s9, s2 +; GFX10-NEXT: s_lshl_b32 s9, s11, 24 +; GFX10-NEXT: s_lshr_b32 s12, s3, 24 +; GFX10-NEXT: s_and_b32 s11, s3, s6 +; GFX10-NEXT: s_lshl_b32 s10, s10, 8 +; GFX10-NEXT: s_bfe_u32 s3, s3, s8 +; GFX10-NEXT: s_or_b32 s10, s11, s10 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s2, s2, s9 +; GFX10-NEXT: s_or_b32 s3, s10, s3 +; GFX10-NEXT: s_lshl_b32 s9, s12, 24 +; GFX10-NEXT: s_lshr_b32 s10, s5, 2 +; GFX10-NEXT: s_or_b32 s3, s3, s9 +; GFX10-NEXT: s_cmp_eq_u32 s10, 1 +; GFX10-NEXT: s_cselect_b32 s9, s1, s0 +; GFX10-NEXT: s_cmp_eq_u32 s10, 2 +; GFX10-NEXT: s_cselect_b32 s9, s2, s9 +; GFX10-NEXT: s_cmp_eq_u32 s10, 3 +; GFX10-NEXT: s_cselect_b32 s9, s3, s9 ; GFX10-NEXT: s_and_b32 s5, s5, 3 ; GFX10-NEXT: s_and_b32 s4, s4, s6 ; GFX10-NEXT: s_lshl_b32 s5, s5, 3 -; GFX10-NEXT: s_lshl_b32 s9, s6, s5 +; GFX10-NEXT: s_lshl_b32 s11, s6, s5 ; GFX10-NEXT: s_lshl_b32 s4, s4, s5 -; GFX10-NEXT: s_andn2_b32 s5, s7, s9 +; GFX10-NEXT: s_andn2_b32 s5, s9, s11 ; GFX10-NEXT: s_or_b32 s4, s5, s4 -; GFX10-NEXT: s_cmp_eq_u32 s8, 0 +; GFX10-NEXT: s_cmp_eq_u32 s10, 0 ; GFX10-NEXT: s_cselect_b32 s0, s4, s0 -; GFX10-NEXT: s_cmp_eq_u32 s8, 1 +; GFX10-NEXT: s_cmp_eq_u32 s10, 1 ; GFX10-NEXT: s_cselect_b32 s1, s4, s1 -; GFX10-NEXT: s_cmp_eq_u32 s8, 2 +; GFX10-NEXT: s_cmp_eq_u32 s10, 2 ; GFX10-NEXT: s_cselect_b32 s2, s4, s2 -; GFX10-NEXT: s_cmp_eq_u32 s8, 3 +; GFX10-NEXT: s_cmp_eq_u32 s10, 3 ; GFX10-NEXT: s_cselect_b32 s3, s4, s3 -; GFX10-NEXT: s_lshr_b32 s4, s0, 8 -; GFX10-NEXT: s_lshr_b32 s5, s0, 16 -; GFX10-NEXT: s_and_b32 s4, s4, s6 -; GFX10-NEXT: s_lshr_b32 s7, s0, 24 -; GFX10-NEXT: s_and_b32 s0, s0, s6 -; GFX10-NEXT: s_lshl_b32 s4, s4, 8 -; GFX10-NEXT: s_and_b32 s5, s5, s6 -; GFX10-NEXT: s_or_b32 s0, s0, s4 -; GFX10-NEXT: s_lshl_b32 s4, s5, 16 -; GFX10-NEXT: s_lshr_b32 s8, s1, 8 -; GFX10-NEXT: s_or_b32 s0, s0, s4 -; GFX10-NEXT: s_lshl_b32 s4, s7, 24 -; GFX10-NEXT: s_and_b32 s7, s8, s6 -; GFX10-NEXT: s_lshr_b32 s9, s1, 16 -; GFX10-NEXT: s_lshr_b32 s10, s1, 24 -; GFX10-NEXT: s_and_b32 s1, s1, s6 -; GFX10-NEXT: s_lshl_b32 s7, s7, 8 -; GFX10-NEXT: s_and_b32 s8, s9, s6 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: s_lshl_b32 s7, s8, 16 +; GFX10-NEXT: s_bfe_u32 s10, s0, s7 +; GFX10-NEXT: s_lshr_b32 s4, s0, 24 +; GFX10-NEXT: s_and_b32 s11, s0, s6 +; GFX10-NEXT: s_lshl_b32 s10, s10, 8 +; GFX10-NEXT: s_bfe_u32 s0, s0, s8 +; GFX10-NEXT: s_or_b32 s10, s11, s10 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_lshr_b32 s5, s1, 24 +; GFX10-NEXT: s_or_b32 s0, s10, s0 +; GFX10-NEXT: s_bfe_u32 s10, s1, s7 +; GFX10-NEXT: s_lshl_b32 s4, s4, 24 +; GFX10-NEXT: s_and_b32 s12, s1, s6 +; GFX10-NEXT: s_lshl_b32 s10, s10, 8 +; GFX10-NEXT: s_bfe_u32 s1, s1, s8 ; GFX10-NEXT: s_or_b32 s0, s0, s4 -; GFX10-NEXT: s_lshr_b32 s11, s2, 8 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: s_lshl_b32 s4, s10, 24 -; GFX10-NEXT: s_lshr_b32 s14, s3, 8 -; GFX10-NEXT: s_and_b32 s7, s11, s6 -; GFX10-NEXT: s_lshr_b32 s12, s2, 16 +; GFX10-NEXT: s_lshl_b32 s4, s5, 24 +; GFX10-NEXT: s_bfe_u32 s5, s2, s7 +; GFX10-NEXT: s_or_b32 s10, s12, s10 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_lshr_b32 s9, s2, 24 +; GFX10-NEXT: s_or_b32 s1, s10, s1 +; GFX10-NEXT: s_and_b32 s10, s2, s6 +; GFX10-NEXT: s_lshl_b32 s5, s5, 8 +; GFX10-NEXT: s_bfe_u32 s2, s2, s8 ; GFX10-NEXT: s_or_b32 s1, s1, s4 -; GFX10-NEXT: s_and_b32 s4, s14, s6 -; GFX10-NEXT: s_lshr_b32 s15, s3, 16 -; GFX10-NEXT: s_lshr_b32 s13, s2, 24 -; GFX10-NEXT: s_lshr_b32 s5, s3, 24 -; GFX10-NEXT: s_and_b32 s2, s2, s6 -; GFX10-NEXT: s_lshl_b32 s7, s7, 8 -; GFX10-NEXT: s_and_b32 s8, s12, s6 -; GFX10-NEXT: s_and_b32 s3, s3, s6 +; GFX10-NEXT: s_bfe_u32 s4, s3, s7 +; GFX10-NEXT: s_or_b32 s5, s10, s5 +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 +; GFX10-NEXT: s_lshr_b32 s11, s3, 24 +; GFX10-NEXT: s_or_b32 s2, s5, s2 +; GFX10-NEXT: s_and_b32 s5, s3, s6 ; GFX10-NEXT: s_lshl_b32 s4, s4, 8 -; GFX10-NEXT: s_and_b32 s6, s15, s6 -; GFX10-NEXT: s_or_b32 s2, s2, s7 -; GFX10-NEXT: s_lshl_b32 s7, s8, 16 -; GFX10-NEXT: s_or_b32 s3, s3, s4 -; GFX10-NEXT: s_lshl_b32 s4, s6, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s7 -; GFX10-NEXT: s_lshl_b32 s6, s13, 24 -; GFX10-NEXT: s_or_b32 s3, s3, s4 -; GFX10-NEXT: s_lshl_b32 s4, s5, 24 -; GFX10-NEXT: s_or_b32 s2, s2, s6 +; GFX10-NEXT: s_bfe_u32 s3, s3, s8 +; GFX10-NEXT: s_or_b32 s4, s5, s4 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_lshl_b32 s5, s9, 24 +; GFX10-NEXT: s_or_b32 s3, s4, s3 +; GFX10-NEXT: s_lshl_b32 s4, s11, 24 +; GFX10-NEXT: s_or_b32 s2, s2, s5 ; GFX10-NEXT: s_or_b32 s3, s3, s4 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 @@ -4478,86 +4234,80 @@ define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: s_mov_b32 s1, 16 ; GFX9-NEXT: v_mov_b32_e32 v4, 8 ; GFX9-NEXT: s_movk_i32 s6, 0xff -; GFX9-NEXT: s_and_b32 s1, s3, 3 +; GFX9-NEXT: v_mov_b32_e32 v5, 16 ; GFX9-NEXT: s_lshr_b32 s4, s3, 2 +; GFX9-NEXT: s_and_b32 s3, s3, 3 ; GFX9-NEXT: s_and_b32 s2, s2, s6 -; GFX9-NEXT: s_lshl_b32 s1, s1, 3 -; GFX9-NEXT: s_lshl_b32 s2, s2, s1 -; GFX9-NEXT: s_lshl_b32 s1, s6, s1 +; GFX9-NEXT: s_lshl_b32 s3, s3, 3 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 -; GFX9-NEXT: s_not_b32 s5, s1 -; GFX9-NEXT: v_mov_b32_e32 v5, s2 +; GFX9-NEXT: s_lshl_b32 s2, s2, s3 +; GFX9-NEXT: s_lshl_b32 s3, s6, s3 +; GFX9-NEXT: s_not_b32 s5, s3 +; GFX9-NEXT: v_mov_b32_e32 v6, s2 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v8, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 8, v3 -; GFX9-NEXT: v_and_b32_sdwa v14, v0, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v15, v1, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v0, v0, s6, v11 ; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX9-NEXT: v_and_or_b32 v0, v0, s6, v6 +; GFX9-NEXT: v_and_or_b32 v1, v1, s6, v13 +; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v17, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v2, v2, s6, v15 ; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX9-NEXT: v_and_or_b32 v1, v1, s6, v8 -; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 24, v3 -; GFX9-NEXT: v_and_b32_sdwa v16, v2, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX9-NEXT: v_and_or_b32 v2, v2, s6, v10 -; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v4, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_or3_b32 v0, v0, v14, v7 -; GFX9-NEXT: v_or3_b32 v1, v1, v15, v9 -; GFX9-NEXT: v_and_b32_sdwa v17, v3, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v13, 24, v13 -; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v12 -; GFX9-NEXT: v_or3_b32 v2, v2, v16, v11 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc +; GFX9-NEXT: v_or3_b32 v0, v0, v12, v7 +; GFX9-NEXT: v_or3_b32 v1, v1, v14, v8 +; GFX9-NEXT: v_lshlrev_b32_sdwa v18, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v17 +; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX9-NEXT: v_or3_b32 v2, v2, v16, v9 +; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 -; GFX9-NEXT: v_or3_b32 v3, v3, v17, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v2, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v3, s[2:3] -; GFX9-NEXT: v_and_or_b32 v5, v6, s5, v5 +; GFX9-NEXT: v_or3_b32 v3, v3, v18, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v3, s[2:3] +; GFX9-NEXT: v_and_or_b32 v6, v7, s5, v6 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[2:3] -; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 8, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[2:3] +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v9, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v13, v0, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v14, v1, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v15, v2, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v16, v3, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX9-NEXT: v_and_or_b32 v0, v0, s6, v5 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v4 -; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v12 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v9 +; GFX9-NEXT: v_or3_b32 v3, v3, v5, v4 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_and_or_b32 v0, v0, s6, v10 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX9-NEXT: v_and_or_b32 v1, v1, s6, v12 +; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX9-NEXT: v_and_or_b32 v2, v2, s6, v14 ; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX9-NEXT: v_and_or_b32 v1, v1, s6, v7 -; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX9-NEXT: v_and_or_b32 v2, v2, s6, v9 -; GFX9-NEXT: v_or3_b32 v0, v0, v13, v6 -; GFX9-NEXT: v_or3_b32 v1, v1, v14, v8 -; GFX9-NEXT: v_or3_b32 v2, v2, v15, v10 -; GFX9-NEXT: v_or3_b32 v3, v3, v16, v11 +; GFX9-NEXT: v_or3_b32 v0, v0, v11, v6 +; GFX9-NEXT: v_or3_b32 v1, v1, v13, v7 +; GFX9-NEXT: v_or3_b32 v2, v2, v15, v8 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX9-NEXT: s_endpgm @@ -4565,11 +4315,12 @@ define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i ; GFX8-LABEL: insertelement_v_v16i8_s_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_mov_b32_e32 v5, 8 -; GFX8-NEXT: v_mov_b32_e32 v6, s0 +; GFX8-NEXT: v_mov_b32_e32 v6, 8 +; GFX8-NEXT: v_mov_b32_e32 v5, 16 +; GFX8-NEXT: v_mov_b32_e32 v7, 16 ; GFX8-NEXT: s_and_b32 s1, s3, 3 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_lshr_b32 s4, s3, 2 ; GFX8-NEXT: s_lshl_b32 s1, s1, 3 ; GFX8-NEXT: s_and_b32 s2, s2, s0 @@ -4580,81 +4331,73 @@ define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 8, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v8 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v1 -; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 8, v2 -; GFX8-NEXT: v_and_b32_sdwa v15, v0, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v16, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v2, v2, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v9 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX8-NEXT: v_lshlrev_b32_sdwa v16, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v13 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v17, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v10 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v5, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 8, v3 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v15 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v16 -; GFX8-NEXT: v_and_b32_sdwa v17, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v17 -; GFX8-NEXT: v_lshrrev_b32_e32 v14, 24, v3 -; GFX8-NEXT: v_and_b32_sdwa v18, v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v3, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX8-NEXT: v_lshlrev_b32_e32 v13, 24, v14 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v18 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v11 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v15 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v11 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v17 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc -; GFX8-NEXT: v_or_b32_e32 v3, v3, v13 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v10 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v2, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v3, s[2:3] ; GFX8-NEXT: v_and_b32_e32 v4, s6, v4 ; GFX8-NEXT: v_or_b32_e32 v4, s5, v4 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[2:3] -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v3 -; GFX8-NEXT: v_and_b32_sdwa v14, v0, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v15, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v16, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v6, v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v8, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v2, v2, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v11 +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v13 +; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v15 +; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v9 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 -; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v14 -; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v15 -; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v16 -; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v8 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v6 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v11 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v12 ; GFX8-NEXT: v_mov_b32_e32 v5, 0 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GFX8-NEXT: s_endpgm @@ -4678,111 +4421,95 @@ define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 8, v2 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v4 -; GFX7-NEXT: v_and_b32_e32 v7, s6, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v3 -; GFX7-NEXT: v_and_b32_e32 v5, s6, v5 -; GFX7-NEXT: v_and_b32_e32 v8, s6, v8 -; GFX7-NEXT: v_and_b32_e32 v10, s6, v10 -; GFX7-NEXT: v_and_b32_e32 v0, s6, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v11, s6, v11 -; GFX7-NEXT: v_and_b32_e32 v13, s6, v13 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v14, s6, v14 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v3 +; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v11, v1, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX7-NEXT: v_bfe_u32 v13, v2, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v8, s6, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v10, s6, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX7-NEXT: v_bfe_u32 v15, v3, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v12, s6, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v9, v10, v11 ; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v13 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v14, s6, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX7-NEXT: v_or_b32_e32 v1, v9, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v10, v12, v13 ; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX7-NEXT: v_lshlrev_b32_e32 v12, 24, v12 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v11 -; GFX7-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v13 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v15, 24, v15 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v14 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v12 +; GFX7-NEXT: v_or_b32_e32 v2, v10, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v11, v14, v15 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX7-NEXT: v_or_b32_e32 v3, v11, v3 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v6 ; GFX7-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc -; GFX7-NEXT: v_or_b32_e32 v3, v3, v15 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v7 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v2, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v3, s[2:3] ; GFX7-NEXT: v_and_b32_e32 v4, s7, v4 ; GFX7-NEXT: v_or_b32_e32 v4, s5, v4 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v11, v1, 8, 8 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[2:3] -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s6, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX7-NEXT: v_bfe_u32 v13, v2, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v8, s6, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v10, s6, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 +; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v12, s6, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v9, v10, v11 +; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v13 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX7-NEXT: v_or_b32_e32 v1, v9, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v10, v12, v13 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v10 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v11 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v12 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v3 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX7-NEXT: v_bfe_u32 v5, v3, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v2, v10, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v6 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v13 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_and_b32_e32 v5, s6, v5 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v15 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v4, s6, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; GFX7-NEXT: s_endpgm @@ -4790,86 +4517,80 @@ define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i ; GFX10-LABEL: insertelement_v_v16i8_s_s: ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_mov_b32_e32 v4, 8 ; GFX10-NEXT: s_mov_b32 s0, 8 +; GFX10-NEXT: v_mov_b32_e32 v4, 8 +; GFX10-NEXT: s_mov_b32 s1, 16 ; GFX10-NEXT: s_movk_i32 s4, 0xff +; GFX10-NEXT: v_mov_b32_e32 v5, 16 ; GFX10-NEXT: s_lshr_b32 s5, s3, 2 -; GFX10-NEXT: s_and_b32 s1, s3, 3 -; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s5, 1 -; GFX10-NEXT: s_lshl_b32 s3, s1, 3 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s5, 3 ; GFX10-NEXT: s_and_b32 s2, s2, s4 -; GFX10-NEXT: s_lshl_b32 s6, s4, s3 -; GFX10-NEXT: s_lshl_b32 s2, s2, s3 -; GFX10-NEXT: s_not_b32 s3, s6 +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s5, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v5, s0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v13, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v14, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v2 -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v5 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v10 ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, s4, v7 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v1, v1, s4, v12 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v3 +; GFX10-NEXT: v_or3_b32 v0, v0, v11, v6 +; GFX10-NEXT: v_lshlrev_b32_sdwa v16, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_or3_b32 v1, v1, v13, v7 +; GFX10-NEXT: v_and_or_b32 v2, v2, s4, v14 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v15, v2, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v3 -; GFX10-NEXT: v_or3_b32 v0, v0, v13, v6 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v10 -; GFX10-NEXT: v_and_or_b32 v2, v2, s4, v9 -; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v4, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_or3_b32 v1, v1, v14, v8 -; GFX10-NEXT: v_and_b32_sdwa v16, v3, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v12 -; GFX10-NEXT: v_or3_b32 v2, v2, v15, v5 -; GFX10-NEXT: v_and_or_b32 v3, v3, s4, v7 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v3, v3, s4, v16 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v9 +; GFX10-NEXT: v_cndmask_b32_e32 v7, v0, v1, vcc_lo +; GFX10-NEXT: v_or3_b32 v2, v2, v15, v8 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s5, 2 -; GFX10-NEXT: v_or3_b32 v3, v3, v16, v6 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v3, s1 -; GFX10-NEXT: v_and_or_b32 v5, v5, s3, s2 +; GFX10-NEXT: s_and_b32 s1, s3, 3 +; GFX10-NEXT: v_or3_b32 v3, v3, v10, v6 +; GFX10-NEXT: s_lshl_b32 s3, s1, 3 +; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s5, 3 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v7, v2, s0 +; GFX10-NEXT: s_lshl_b32 s6, s4, s3 +; GFX10-NEXT: s_lshl_b32 s2, s2, s3 +; GFX10-NEXT: s_not_b32 s3, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v3, s1 +; GFX10-NEXT: v_and_or_b32 v6, v6, s3, s2 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, s5, 0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v5, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v5, s2 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v6, s1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v3 -; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v4, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v13, v0, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v14, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v15, v2, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v16, v3, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v5 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v3, v3, s4, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v12 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v9 +; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v10 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX10-NEXT: v_and_or_b32 v1, v1, s4, v12 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX10-NEXT: v_and_or_b32 v2, v2, s4, v14 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX10-NEXT: v_and_or_b32 v1, v1, s4, v7 -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX10-NEXT: v_and_or_b32 v2, v2, s4, v9 +; GFX10-NEXT: v_or3_b32 v3, v3, v5, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_or3_b32 v0, v0, v13, v6 -; GFX10-NEXT: v_or3_b32 v1, v1, v14, v8 -; GFX10-NEXT: v_or3_b32 v3, v3, v16, v11 -; GFX10-NEXT: v_or3_b32 v2, v2, v15, v10 +; GFX10-NEXT: v_or3_b32 v0, v0, v11, v6 +; GFX10-NEXT: v_or3_b32 v1, v1, v13, v7 +; GFX10-NEXT: v_or3_b32 v2, v2, v15, v8 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm @@ -4883,111 +4604,103 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX9-LABEL: insertelement_s_v16i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s18, 0xff -; GFX9-NEXT: v_and_b32_e32 v0, s18, v0 +; GFX9-NEXT: s_mov_b32 s13, 0x80008 +; GFX9-NEXT: s_movk_i32 s11, 0xff +; GFX9-NEXT: v_and_b32_e32 v0, s11, v0 ; GFX9-NEXT: s_mov_b32 s5, 8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s6, s0, 8 -; GFX9-NEXT: s_and_b32 s6, s6, s18 -; GFX9-NEXT: s_lshr_b32 s7, s0, 16 -; GFX9-NEXT: s_lshr_b32 s8, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s6 -; GFX9-NEXT: s_and_b32 s6, s7, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s6 -; GFX9-NEXT: s_lshl_b32 s6, s8, 24 -; GFX9-NEXT: s_lshr_b32 s9, s1, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s6 -; GFX9-NEXT: s_and_b32 s6, s9, s18 -; GFX9-NEXT: s_lshr_b32 s10, s1, 16 -; GFX9-NEXT: s_lshr_b32 s11, s1, 24 -; GFX9-NEXT: s_and_b32 s1, s1, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s6 -; GFX9-NEXT: s_and_b32 s6, s10, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 16 -; GFX9-NEXT: s_or_b32 s1, s1, s6 -; GFX9-NEXT: s_lshl_b32 s6, s11, 24 -; GFX9-NEXT: s_lshr_b32 s12, s2, 8 -; GFX9-NEXT: s_or_b32 s1, s1, s6 -; GFX9-NEXT: s_and_b32 s6, s12, s18 -; GFX9-NEXT: s_lshr_b32 s13, s2, 16 -; GFX9-NEXT: s_lshr_b32 s14, s2, 24 -; GFX9-NEXT: s_and_b32 s2, s2, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 8 -; GFX9-NEXT: s_or_b32 s2, s2, s6 -; GFX9-NEXT: s_and_b32 s6, s13, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 16 -; GFX9-NEXT: s_or_b32 s2, s2, s6 -; GFX9-NEXT: s_lshl_b32 s6, s14, 24 -; GFX9-NEXT: s_lshr_b32 s15, s3, 8 -; GFX9-NEXT: s_or_b32 s2, s2, s6 -; GFX9-NEXT: s_and_b32 s6, s15, s18 -; GFX9-NEXT: s_lshr_b32 s16, s3, 16 -; GFX9-NEXT: s_lshr_b32 s17, s3, 24 -; GFX9-NEXT: s_and_b32 s3, s3, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 8 -; GFX9-NEXT: s_or_b32 s3, s3, s6 -; GFX9-NEXT: s_and_b32 s6, s16, s18 -; GFX9-NEXT: s_lshl_b32 s6, s6, 16 -; GFX9-NEXT: s_or_b32 s3, s3, s6 -; GFX9-NEXT: s_lshl_b32 s6, s17, 24 -; GFX9-NEXT: s_or_b32 s3, s3, s6 -; GFX9-NEXT: s_lshr_b32 s6, s4, 2 -; GFX9-NEXT: s_cmp_eq_u32 s6, 1 -; GFX9-NEXT: s_cselect_b32 s7, s1, s0 -; GFX9-NEXT: s_cmp_eq_u32 s6, 2 -; GFX9-NEXT: s_cselect_b32 s7, s2, s7 -; GFX9-NEXT: s_cmp_eq_u32 s6, 3 -; GFX9-NEXT: s_cselect_b32 s7, s3, s7 +; GFX9-NEXT: s_bfe_u32 s14, s0, s13 +; GFX9-NEXT: s_and_b32 s12, s0, s11 +; GFX9-NEXT: s_lshl_b32 s14, s14, 8 +; GFX9-NEXT: s_or_b32 s12, s12, s14 +; GFX9-NEXT: s_mov_b32 s14, 0x80010 +; GFX9-NEXT: s_lshr_b32 s7, s0, 24 +; GFX9-NEXT: s_bfe_u32 s0, s0, s14 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s12, s0 +; GFX9-NEXT: s_bfe_u32 s12, s1, s13 +; GFX9-NEXT: s_lshl_b32 s7, s7, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s7 +; GFX9-NEXT: s_lshr_b32 s8, s1, 24 +; GFX9-NEXT: s_and_b32 s7, s1, s11 +; GFX9-NEXT: s_bfe_u32 s1, s1, s14 +; GFX9-NEXT: s_lshl_b32 s12, s12, 8 +; GFX9-NEXT: s_or_b32 s7, s7, s12 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s1, s7, s1 +; GFX9-NEXT: s_lshl_b32 s7, s8, 24 +; GFX9-NEXT: s_bfe_u32 s8, s2, s13 +; GFX9-NEXT: s_or_b32 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s9, s2, 24 +; GFX9-NEXT: s_and_b32 s7, s2, s11 +; GFX9-NEXT: s_bfe_u32 s2, s2, s14 +; GFX9-NEXT: s_lshl_b32 s8, s8, 8 +; GFX9-NEXT: s_or_b32 s7, s7, s8 +; GFX9-NEXT: s_lshl_b32 s2, s2, 16 +; GFX9-NEXT: s_bfe_u32 s8, s3, s13 +; GFX9-NEXT: s_or_b32 s2, s7, s2 +; GFX9-NEXT: s_lshl_b32 s7, s9, 24 +; GFX9-NEXT: s_or_b32 s2, s2, s7 +; GFX9-NEXT: s_lshr_b32 s10, s3, 24 +; GFX9-NEXT: s_and_b32 s7, s3, s11 +; GFX9-NEXT: s_bfe_u32 s3, s3, s14 +; GFX9-NEXT: s_lshl_b32 s8, s8, 8 +; GFX9-NEXT: s_or_b32 s7, s7, s8 +; GFX9-NEXT: s_lshl_b32 s3, s3, 16 +; GFX9-NEXT: s_or_b32 s3, s7, s3 +; GFX9-NEXT: s_lshl_b32 s7, s10, 24 +; GFX9-NEXT: s_or_b32 s3, s3, s7 +; GFX9-NEXT: s_lshr_b32 s7, s4, 2 +; GFX9-NEXT: s_cmp_eq_u32 s7, 1 +; GFX9-NEXT: s_cselect_b32 s8, s1, s0 +; GFX9-NEXT: s_cmp_eq_u32 s7, 2 +; GFX9-NEXT: s_cselect_b32 s8, s2, s8 +; GFX9-NEXT: s_cmp_eq_u32 s7, 3 +; GFX9-NEXT: s_cselect_b32 s8, s3, s8 ; GFX9-NEXT: s_and_b32 s4, s4, 3 ; GFX9-NEXT: s_lshl_b32 s4, s4, 3 -; GFX9-NEXT: s_lshl_b32 s8, s18, s4 -; GFX9-NEXT: s_andn2_b32 s7, s7, s8 -; GFX9-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-NEXT: s_lshl_b32 s9, s11, s4 +; GFX9-NEXT: s_andn2_b32 s8, s8, s9 +; GFX9-NEXT: v_mov_b32_e32 v1, s8 ; GFX9-NEXT: v_lshl_or_b32 v4, v0, s4, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s6, 0 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s7, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s6, 1 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s7, 1 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s6, 2 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s7, 2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s6, 3 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s7, 3 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v4, v0, s18, v4 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: s_mov_b32 s6, 16 +; GFX9-NEXT: v_and_or_b32 v8, v0, s11, v8 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX9-NEXT: v_or3_b32 v0, v8, v0, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v4, v1, s11, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX9-NEXT: v_or3_b32 v0, v4, v0, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX9-NEXT: v_or3_b32 v1, v4, v1, v5 ; GFX9-NEXT: v_mov_b32_e32 v4, 8 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_and_or_b32 v5, v1, s18, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX9-NEXT: v_or3_b32 v1, v5, v1, v6 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v5, v2, s18, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v3 -; GFX9-NEXT: v_and_b32_sdwa v2, v2, s18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v9 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v8, 16 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v5, v2, s11, v5 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 ; GFX9-NEXT: v_or3_b32 v2, v5, v2, v6 -; GFX9-NEXT: v_and_or_b32 v4, v3, s18, v4 -; GFX9-NEXT: v_and_b32_sdwa v3, v3, s18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v11 +; GFX9-NEXT: v_and_or_b32 v4, v3, s11, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v7 ; GFX9-NEXT: v_or3_b32 v3, v4, v3, v5 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 @@ -4997,56 +4710,51 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX8-LABEL: insertelement_s_v16i8_v_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s17, 0xff -; GFX8-NEXT: v_mov_b32_e32 v12, 8 +; GFX8-NEXT: s_mov_b32 s11, 0x80008 +; GFX8-NEXT: s_movk_i32 s9, 0xff +; GFX8-NEXT: v_mov_b32_e32 v8, 8 +; GFX8-NEXT: v_mov_b32_e32 v10, 16 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s5, s0, 8 -; GFX8-NEXT: s_and_b32 s5, s5, s17 -; GFX8-NEXT: s_lshr_b32 s6, s0, 16 -; GFX8-NEXT: s_lshr_b32 s7, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s17 -; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s5 -; GFX8-NEXT: s_and_b32 s5, s6, s17 -; GFX8-NEXT: s_lshl_b32 s5, s5, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s5 -; GFX8-NEXT: s_lshl_b32 s5, s7, 24 -; GFX8-NEXT: s_lshr_b32 s8, s1, 8 +; GFX8-NEXT: s_bfe_u32 s12, s0, s11 +; GFX8-NEXT: s_and_b32 s10, s0, s9 +; GFX8-NEXT: s_lshl_b32 s12, s12, 8 +; GFX8-NEXT: s_or_b32 s10, s10, s12 +; GFX8-NEXT: s_mov_b32 s12, 0x80010 +; GFX8-NEXT: s_lshr_b32 s5, s0, 24 +; GFX8-NEXT: s_bfe_u32 s0, s0, s12 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s10, s0 +; GFX8-NEXT: s_bfe_u32 s10, s1, s11 +; GFX8-NEXT: s_lshl_b32 s5, s5, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s5 -; GFX8-NEXT: s_and_b32 s5, s8, s17 -; GFX8-NEXT: s_lshr_b32 s9, s1, 16 -; GFX8-NEXT: s_lshr_b32 s10, s1, 24 -; GFX8-NEXT: s_and_b32 s1, s1, s17 -; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: s_or_b32 s1, s1, s5 -; GFX8-NEXT: s_and_b32 s5, s9, s17 -; GFX8-NEXT: s_lshl_b32 s5, s5, 16 -; GFX8-NEXT: s_or_b32 s1, s1, s5 -; GFX8-NEXT: s_lshl_b32 s5, s10, 24 -; GFX8-NEXT: s_lshr_b32 s11, s2, 8 +; GFX8-NEXT: s_lshr_b32 s6, s1, 24 +; GFX8-NEXT: s_and_b32 s5, s1, s9 +; GFX8-NEXT: s_bfe_u32 s1, s1, s12 +; GFX8-NEXT: s_lshl_b32 s10, s10, 8 +; GFX8-NEXT: s_or_b32 s5, s5, s10 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s1, s5, s1 +; GFX8-NEXT: s_lshl_b32 s5, s6, 24 +; GFX8-NEXT: s_bfe_u32 s6, s2, s11 ; GFX8-NEXT: s_or_b32 s1, s1, s5 -; GFX8-NEXT: s_and_b32 s5, s11, s17 -; GFX8-NEXT: s_lshr_b32 s12, s2, 16 -; GFX8-NEXT: s_lshr_b32 s13, s2, 24 -; GFX8-NEXT: s_and_b32 s2, s2, s17 -; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: s_or_b32 s2, s2, s5 -; GFX8-NEXT: s_and_b32 s5, s12, s17 -; GFX8-NEXT: s_lshl_b32 s5, s5, 16 -; GFX8-NEXT: s_or_b32 s2, s2, s5 -; GFX8-NEXT: s_lshl_b32 s5, s13, 24 -; GFX8-NEXT: s_lshr_b32 s14, s3, 8 +; GFX8-NEXT: s_lshr_b32 s7, s2, 24 +; GFX8-NEXT: s_and_b32 s5, s2, s9 +; GFX8-NEXT: s_bfe_u32 s2, s2, s12 +; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: s_or_b32 s5, s5, s6 +; GFX8-NEXT: s_lshl_b32 s2, s2, 16 +; GFX8-NEXT: s_bfe_u32 s6, s3, s11 +; GFX8-NEXT: s_or_b32 s2, s5, s2 +; GFX8-NEXT: s_lshl_b32 s5, s7, 24 ; GFX8-NEXT: s_or_b32 s2, s2, s5 -; GFX8-NEXT: s_and_b32 s5, s14, s17 -; GFX8-NEXT: s_lshr_b32 s15, s3, 16 -; GFX8-NEXT: s_lshr_b32 s16, s3, 24 -; GFX8-NEXT: s_and_b32 s3, s3, s17 -; GFX8-NEXT: s_lshl_b32 s5, s5, 8 -; GFX8-NEXT: s_or_b32 s3, s3, s5 -; GFX8-NEXT: s_and_b32 s5, s15, s17 -; GFX8-NEXT: s_lshl_b32 s5, s5, 16 -; GFX8-NEXT: s_or_b32 s3, s3, s5 -; GFX8-NEXT: s_lshl_b32 s5, s16, 24 +; GFX8-NEXT: s_lshr_b32 s8, s3, 24 +; GFX8-NEXT: s_and_b32 s5, s3, s9 +; GFX8-NEXT: s_bfe_u32 s3, s3, s12 +; GFX8-NEXT: s_lshl_b32 s6, s6, 8 +; GFX8-NEXT: s_or_b32 s5, s5, s6 +; GFX8-NEXT: s_lshl_b32 s3, s3, 16 +; GFX8-NEXT: s_or_b32 s3, s5, s3 +; GFX8-NEXT: s_lshl_b32 s5, s8, 24 ; GFX8-NEXT: s_or_b32 s3, s3, s5 ; GFX8-NEXT: s_lshr_b32 s5, s4, 2 ; GFX8-NEXT: s_cmp_eq_u32 s5, 1 @@ -5058,7 +4766,7 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX8-NEXT: s_and_b32 s4, s4, 3 ; GFX8-NEXT: s_lshl_b32 s4, s4, 3 ; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: s_lshl_b32 s4, s17, s4 +; GFX8-NEXT: s_lshl_b32 s4, s9, s4 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: s_andn2_b32 s4, s6, s4 ; GFX8-NEXT: v_or_b32_e32 v4, s4, v0 @@ -5071,41 +4779,37 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX8-NEXT: v_mov_b32_e32 v2, s2 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s5, 2 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_mov_b32_e32 v3, s3 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s5, 3 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v12, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v12, s17 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v9, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v0, v9, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_or_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX8-NEXT: v_or_b32_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v5, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v7 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v8, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v2 ; GFX8-NEXT: v_or_b32_sdwa v5, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v2, v2, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v3 ; GFX8-NEXT: v_or_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v3, v3, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v9 +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v6 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v11 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v5 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 @@ -5116,56 +4820,50 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX7-LABEL: insertelement_s_v16i8_v_s: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s17, 0xff -; GFX7-NEXT: v_and_b32_e32 v0, s17, v0 +; GFX7-NEXT: s_mov_b32 s11, 0x80008 +; GFX7-NEXT: s_movk_i32 s9, 0xff +; GFX7-NEXT: v_and_b32_e32 v0, s9, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s5, s0, 8 -; GFX7-NEXT: s_and_b32 s5, s5, s17 -; GFX7-NEXT: s_lshr_b32 s6, s0, 16 -; GFX7-NEXT: s_lshr_b32 s7, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s17 -; GFX7-NEXT: s_lshl_b32 s5, s5, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s5 -; GFX7-NEXT: s_and_b32 s5, s6, s17 -; GFX7-NEXT: s_lshl_b32 s5, s5, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s5 -; GFX7-NEXT: s_lshl_b32 s5, s7, 24 -; GFX7-NEXT: s_lshr_b32 s8, s1, 8 +; GFX7-NEXT: s_bfe_u32 s12, s0, s11 +; GFX7-NEXT: s_and_b32 s10, s0, s9 +; GFX7-NEXT: s_lshl_b32 s12, s12, 8 +; GFX7-NEXT: s_or_b32 s10, s10, s12 +; GFX7-NEXT: s_mov_b32 s12, 0x80010 +; GFX7-NEXT: s_lshr_b32 s5, s0, 24 +; GFX7-NEXT: s_bfe_u32 s0, s0, s12 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s10, s0 +; GFX7-NEXT: s_bfe_u32 s10, s1, s11 +; GFX7-NEXT: s_lshl_b32 s5, s5, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s5 -; GFX7-NEXT: s_and_b32 s5, s8, s17 -; GFX7-NEXT: s_lshr_b32 s9, s1, 16 -; GFX7-NEXT: s_lshr_b32 s10, s1, 24 -; GFX7-NEXT: s_and_b32 s1, s1, s17 -; GFX7-NEXT: s_lshl_b32 s5, s5, 8 -; GFX7-NEXT: s_or_b32 s1, s1, s5 -; GFX7-NEXT: s_and_b32 s5, s9, s17 -; GFX7-NEXT: s_lshl_b32 s5, s5, 16 -; GFX7-NEXT: s_or_b32 s1, s1, s5 -; GFX7-NEXT: s_lshl_b32 s5, s10, 24 -; GFX7-NEXT: s_lshr_b32 s11, s2, 8 +; GFX7-NEXT: s_lshr_b32 s6, s1, 24 +; GFX7-NEXT: s_and_b32 s5, s1, s9 +; GFX7-NEXT: s_bfe_u32 s1, s1, s12 +; GFX7-NEXT: s_lshl_b32 s10, s10, 8 +; GFX7-NEXT: s_or_b32 s5, s5, s10 +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s1, s5, s1 +; GFX7-NEXT: s_lshl_b32 s5, s6, 24 +; GFX7-NEXT: s_bfe_u32 s6, s2, s11 ; GFX7-NEXT: s_or_b32 s1, s1, s5 -; GFX7-NEXT: s_and_b32 s5, s11, s17 -; GFX7-NEXT: s_lshr_b32 s12, s2, 16 -; GFX7-NEXT: s_lshr_b32 s13, s2, 24 -; GFX7-NEXT: s_and_b32 s2, s2, s17 -; GFX7-NEXT: s_lshl_b32 s5, s5, 8 -; GFX7-NEXT: s_or_b32 s2, s2, s5 -; GFX7-NEXT: s_and_b32 s5, s12, s17 -; GFX7-NEXT: s_lshl_b32 s5, s5, 16 -; GFX7-NEXT: s_or_b32 s2, s2, s5 -; GFX7-NEXT: s_lshl_b32 s5, s13, 24 -; GFX7-NEXT: s_lshr_b32 s14, s3, 8 +; GFX7-NEXT: s_lshr_b32 s7, s2, 24 +; GFX7-NEXT: s_and_b32 s5, s2, s9 +; GFX7-NEXT: s_bfe_u32 s2, s2, s12 +; GFX7-NEXT: s_lshl_b32 s6, s6, 8 +; GFX7-NEXT: s_or_b32 s5, s5, s6 +; GFX7-NEXT: s_lshl_b32 s2, s2, 16 +; GFX7-NEXT: s_bfe_u32 s6, s3, s11 +; GFX7-NEXT: s_or_b32 s2, s5, s2 +; GFX7-NEXT: s_lshl_b32 s5, s7, 24 ; GFX7-NEXT: s_or_b32 s2, s2, s5 -; GFX7-NEXT: s_and_b32 s5, s14, s17 -; GFX7-NEXT: s_lshr_b32 s15, s3, 16 -; GFX7-NEXT: s_lshr_b32 s16, s3, 24 -; GFX7-NEXT: s_and_b32 s3, s3, s17 -; GFX7-NEXT: s_lshl_b32 s5, s5, 8 -; GFX7-NEXT: s_or_b32 s3, s3, s5 -; GFX7-NEXT: s_and_b32 s5, s15, s17 -; GFX7-NEXT: s_lshl_b32 s5, s5, 16 -; GFX7-NEXT: s_or_b32 s3, s3, s5 -; GFX7-NEXT: s_lshl_b32 s5, s16, 24 +; GFX7-NEXT: s_lshr_b32 s8, s3, 24 +; GFX7-NEXT: s_and_b32 s5, s3, s9 +; GFX7-NEXT: s_bfe_u32 s3, s3, s12 +; GFX7-NEXT: s_lshl_b32 s6, s6, 8 +; GFX7-NEXT: s_or_b32 s5, s5, s6 +; GFX7-NEXT: s_lshl_b32 s3, s3, 16 +; GFX7-NEXT: s_or_b32 s3, s5, s3 +; GFX7-NEXT: s_lshl_b32 s5, s8, 24 ; GFX7-NEXT: s_or_b32 s3, s3, s5 ; GFX7-NEXT: s_lshr_b32 s5, s4, 2 ; GFX7-NEXT: s_cmp_eq_u32 s5, 1 @@ -5177,7 +4875,7 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX7-NEXT: s_and_b32 s4, s4, 3 ; GFX7-NEXT: s_lshl_b32 s4, s4, 3 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, s4, v0 -; GFX7-NEXT: s_lshl_b32 s4, s17, s4 +; GFX7-NEXT: s_lshl_b32 s4, s9, s4 ; GFX7-NEXT: s_andn2_b32 s4, s6, s4 ; GFX7-NEXT: v_or_b32_e32 v4, s4, v0 ; GFX7-NEXT: v_mov_b32_e32 v0, s0 @@ -5188,57 +4886,49 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX7-NEXT: v_mov_b32_e32 v2, s2 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s5, 2 -; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX7-NEXT: v_mov_b32_e32 v3, s3 +; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 8 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s5, 3 ; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v4, s17, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s17, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s17, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v6 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v8, s9, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX7-NEXT: v_bfe_u32 v8, v1, 8, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s17, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s17, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s17, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s9, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v8 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX7-NEXT: v_bfe_u32 v5, v2, 8, 8 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s17, v10 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s17, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s17, v11 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v12 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v4, s9, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_bfe_u32 v5, v3, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v6 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s17, v13 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s17, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s17, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v15 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v4, s9, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -5249,58 +4939,53 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX10-LABEL: insertelement_s_v16i8_v_s: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s6, 0x80008 ; GFX10-NEXT: s_movk_i32 s5, 0xff -; GFX10-NEXT: v_mov_b32_e32 v9, 8 +; GFX10-NEXT: s_mov_b32 s7, 0x80010 ; GFX10-NEXT: v_and_b32_e32 v0, s5, v0 +; GFX10-NEXT: v_mov_b32_e32 v10, 8 +; GFX10-NEXT: v_mov_b32_e32 v12, 16 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: s_lshr_b32 s7, s0, 16 -; GFX10-NEXT: s_and_b32 s6, s6, s5 +; GFX10-NEXT: s_bfe_u32 s13, s0, s6 +; GFX10-NEXT: s_bfe_u32 s15, s1, s6 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 -; GFX10-NEXT: s_and_b32 s7, s7, s5 -; GFX10-NEXT: s_and_b32 s0, s0, s5 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_lshr_b32 s12, s2, 8 -; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_lshr_b32 s9, s1, 8 -; GFX10-NEXT: s_or_b32 s0, s0, s7 -; GFX10-NEXT: s_and_b32 s7, s12, s5 +; GFX10-NEXT: s_lshr_b32 s9, s1, 24 +; GFX10-NEXT: s_and_b32 s12, s0, s5 +; GFX10-NEXT: s_bfe_u32 s0, s0, s7 +; GFX10-NEXT: s_and_b32 s14, s1, s5 +; GFX10-NEXT: s_bfe_u32 s1, s1, s7 +; GFX10-NEXT: s_lshl_b32 s13, s13, 8 +; GFX10-NEXT: s_lshl_b32 s15, s15, 8 +; GFX10-NEXT: s_or_b32 s12, s12, s13 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s13, s14, s15 +; GFX10-NEXT: s_bfe_u32 s17, s2, s6 +; GFX10-NEXT: s_bfe_u32 s6, s3, s6 ; GFX10-NEXT: s_lshl_b32 s8, s8, 24 -; GFX10-NEXT: s_lshr_b32 s13, s2, 16 -; GFX10-NEXT: s_lshr_b32 s10, s1, 16 -; GFX10-NEXT: s_and_b32 s9, s9, s5 -; GFX10-NEXT: s_lshr_b32 s14, s2, 24 +; GFX10-NEXT: s_or_b32 s0, s12, s0 +; GFX10-NEXT: s_lshl_b32 s9, s9, 24 +; GFX10-NEXT: s_or_b32 s1, s13, s1 ; GFX10-NEXT: s_or_b32 s0, s0, s8 -; GFX10-NEXT: s_and_b32 s2, s2, s5 -; GFX10-NEXT: s_lshl_b32 s7, s7, 8 -; GFX10-NEXT: s_and_b32 s8, s13, s5 -; GFX10-NEXT: s_lshr_b32 s11, s1, 24 -; GFX10-NEXT: s_and_b32 s10, s10, s5 -; GFX10-NEXT: s_or_b32 s2, s2, s7 -; GFX10-NEXT: s_lshl_b32 s7, s8, 16 -; GFX10-NEXT: s_lshr_b32 s15, s3, 8 -; GFX10-NEXT: s_and_b32 s1, s1, s5 -; GFX10-NEXT: s_lshl_b32 s9, s9, 8 -; GFX10-NEXT: s_lshr_b32 s16, s3, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s7 -; GFX10-NEXT: s_and_b32 s7, s15, s5 -; GFX10-NEXT: s_lshl_b32 s6, s10, 16 ; GFX10-NEXT: s_or_b32 s1, s1, s9 -; GFX10-NEXT: s_lshr_b32 s17, s3, 24 -; GFX10-NEXT: s_or_b32 s1, s1, s6 +; GFX10-NEXT: s_lshr_b32 s10, s2, 24 +; GFX10-NEXT: s_and_b32 s16, s2, s5 +; GFX10-NEXT: s_lshl_b32 s8, s17, 8 +; GFX10-NEXT: s_bfe_u32 s2, s2, s7 +; GFX10-NEXT: s_lshr_b32 s11, s3, 24 +; GFX10-NEXT: s_and_b32 s9, s3, s5 +; GFX10-NEXT: s_bfe_u32 s3, s3, s7 +; GFX10-NEXT: s_lshl_b32 s6, s6, 8 +; GFX10-NEXT: s_or_b32 s8, s16, s8 +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 +; GFX10-NEXT: s_or_b32 s6, s9, s6 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: s_or_b32 s2, s8, s2 +; GFX10-NEXT: s_lshl_b32 s8, s10, 24 +; GFX10-NEXT: s_or_b32 s3, s6, s3 ; GFX10-NEXT: s_lshl_b32 s6, s11, 24 -; GFX10-NEXT: s_and_b32 s3, s3, s5 -; GFX10-NEXT: s_lshl_b32 s7, s7, 8 -; GFX10-NEXT: s_and_b32 s8, s16, s5 -; GFX10-NEXT: s_or_b32 s1, s1, s6 -; GFX10-NEXT: s_lshl_b32 s6, s14, 24 -; GFX10-NEXT: s_or_b32 s3, s3, s7 -; GFX10-NEXT: s_lshl_b32 s7, s8, 16 -; GFX10-NEXT: s_or_b32 s2, s2, s6 -; GFX10-NEXT: s_or_b32 s3, s3, s7 -; GFX10-NEXT: s_lshl_b32 s6, s17, 24 ; GFX10-NEXT: s_lshr_b32 s7, s4, 2 +; GFX10-NEXT: s_or_b32 s2, s2, s8 ; GFX10-NEXT: s_or_b32 s3, s3, s6 ; GFX10-NEXT: s_cmp_eq_u32 s7, 1 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s7, 0 @@ -5321,40 +5006,37 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg % ; GFX10-NEXT: s_mov_b32 s0, 8 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s7, 1 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s7, 2 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX10-NEXT: v_and_or_b32 v6, v0, s5, v6 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s7, 3 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_lshlrev_b32_sdwa v6, v9, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v9, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v2 -; GFX10-NEXT: v_and_or_b32 v6, v1, s5, v6 -; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v3 -; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v3 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v10, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v9, v1, s5, v9 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v10, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: v_and_or_b32 v11, v2, s5, v11 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v12, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v9, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_or_b32 v4, v0, s5, v4 -; GFX10-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v8, v2, s5, v8 -; GFX10-NEXT: v_and_b32_sdwa v2, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX10-NEXT: v_and_or_b32 v9, v3, s5, v9 -; GFX10-NEXT: v_and_b32_sdwa v3, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX10-NEXT: v_or3_b32 v0, v4, v0, v5 +; GFX10-NEXT: v_and_or_b32 v10, v3, s5, v10 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v12, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX10-NEXT: v_or3_b32 v0, v6, v0, v4 +; GFX10-NEXT: v_or3_b32 v1, v9, v1, v5 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_or3_b32 v1, v6, v1, v7 -; GFX10-NEXT: v_or3_b32 v2, v8, v2, v10 -; GFX10-NEXT: v_or3_b32 v3, v9, v3, v11 +; GFX10-NEXT: v_or3_b32 v2, v11, v2, v7 +; GFX10-NEXT: v_or3_b32 v3, v10, v3, v8 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm @@ -5368,68 +5050,62 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX9-LABEL: insertelement_s_v16i8_s_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s18, 0xff +; GFX9-NEXT: s_mov_b32 s13, 0x80008 +; GFX9-NEXT: s_movk_i32 s12, 0xff ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 2, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s5, s0, 8 -; GFX9-NEXT: s_and_b32 s5, s5, s18 -; GFX9-NEXT: s_lshr_b32 s7, s0, 16 -; GFX9-NEXT: s_lshr_b32 s8, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s18 +; GFX9-NEXT: s_bfe_u32 s14, s0, s13 +; GFX9-NEXT: s_and_b32 s8, s0, s12 +; GFX9-NEXT: s_lshl_b32 s14, s14, 8 +; GFX9-NEXT: s_or_b32 s8, s8, s14 +; GFX9-NEXT: s_mov_b32 s14, 0x80010 +; GFX9-NEXT: s_lshr_b32 s5, s0, 24 +; GFX9-NEXT: s_bfe_u32 s0, s0, s14 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s8, s0 +; GFX9-NEXT: s_lshl_b32 s5, s5, 24 +; GFX9-NEXT: s_or_b32 s8, s0, s5 +; GFX9-NEXT: s_bfe_u32 s5, s1, s13 +; GFX9-NEXT: s_lshr_b32 s9, s1, 24 +; GFX9-NEXT: s_and_b32 s0, s1, s12 +; GFX9-NEXT: s_bfe_u32 s1, s1, s14 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8 ; GFX9-NEXT: s_or_b32 s0, s0, s5 -; GFX9-NEXT: s_and_b32 s5, s7, s18 -; GFX9-NEXT: s_lshl_b32 s5, s5, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s5 -; GFX9-NEXT: s_lshl_b32 s5, s8, 24 -; GFX9-NEXT: s_lshr_b32 s9, s1, 8 -; GFX9-NEXT: s_or_b32 s8, s0, s5 -; GFX9-NEXT: s_lshr_b32 s10, s1, 16 -; GFX9-NEXT: s_lshr_b32 s11, s1, 24 -; GFX9-NEXT: s_and_b32 s0, s1, s18 -; GFX9-NEXT: s_and_b32 s1, s9, s18 -; GFX9-NEXT: s_lshl_b32 s1, s1, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s10, s18 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_lshl_b32 s1, s11, 24 -; GFX9-NEXT: s_lshr_b32 s12, s2, 8 +; GFX9-NEXT: s_lshl_b32 s1, s9, 24 ; GFX9-NEXT: s_or_b32 s9, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s12, s18 -; GFX9-NEXT: s_lshr_b32 s13, s2, 16 -; GFX9-NEXT: s_and_b32 s0, s2, s18 +; GFX9-NEXT: s_bfe_u32 s1, s2, s13 +; GFX9-NEXT: s_and_b32 s0, s2, s12 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s13, s18 +; GFX9-NEXT: s_bfe_u32 s1, s2, s14 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16 -; GFX9-NEXT: s_lshr_b32 s14, s2, 24 +; GFX9-NEXT: s_lshr_b32 s10, s2, 24 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_lshl_b32 s1, s14, 24 -; GFX9-NEXT: s_lshr_b32 s15, s3, 8 +; GFX9-NEXT: s_lshl_b32 s1, s10, 24 ; GFX9-NEXT: s_or_b32 s10, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s15, s18 -; GFX9-NEXT: s_lshr_b32 s16, s3, 16 -; GFX9-NEXT: s_and_b32 s0, s3, s18 +; GFX9-NEXT: s_bfe_u32 s1, s3, s13 +; GFX9-NEXT: s_and_b32 s0, s3, s12 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s16, s18 -; GFX9-NEXT: s_lshr_b32 s17, s3, 24 +; GFX9-NEXT: s_bfe_u32 s1, s3, s14 +; GFX9-NEXT: s_lshr_b32 s11, s3, 24 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_lshl_b32 s1, s17, 24 +; GFX9-NEXT: s_lshl_b32 s1, s11, 24 ; GFX9-NEXT: v_mov_b32_e32 v1, s8 ; GFX9-NEXT: v_mov_b32_e32 v2, s9 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX9-NEXT: s_or_b32 s11, s0, s1 ; GFX9-NEXT: v_mov_b32_e32 v3, s10 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX9-NEXT: s_and_b32 s4, s4, s18 +; GFX9-NEXT: s_and_b32 s4, s4, s12 ; GFX9-NEXT: v_lshlrev_b32_e64 v2, v0, s4 -; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s18 +; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s12 ; GFX9-NEXT: v_mov_b32_e32 v5, s11 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 @@ -5439,41 +5115,39 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX9-NEXT: v_mov_b32_e32 v0, s8 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] +; GFX9-NEXT: s_mov_b32 s6, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: s_mov_b32 s7, 16 ; GFX9-NEXT: v_mov_b32_e32 v2, s10 ; GFX9-NEXT: v_mov_b32_e32 v3, s11 -; GFX9-NEXT: s_mov_b32 s6, 8 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX9-NEXT: v_and_or_b32 v8, v0, s12, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX9-NEXT: v_or3_b32 v0, v8, v0, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[2:3] -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v4, v0, s18, v4 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX9-NEXT: v_and_or_b32 v4, v1, s12, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX9-NEXT: v_or3_b32 v0, v4, v0, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX9-NEXT: v_or3_b32 v1, v4, v1, v5 ; GFX9-NEXT: v_mov_b32_e32 v4, 8 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_and_or_b32 v5, v1, s18, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX9-NEXT: v_or3_b32 v1, v5, v1, v6 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v5, v2, s18, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v3 -; GFX9-NEXT: v_and_b32_sdwa v2, v2, s18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v9 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v8, 16 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v5, v2, s12, v5 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 ; GFX9-NEXT: v_or3_b32 v2, v5, v2, v6 -; GFX9-NEXT: v_and_or_b32 v4, v3, s18, v4 -; GFX9-NEXT: v_and_b32_sdwa v3, v3, s18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v11 +; GFX9-NEXT: v_and_or_b32 v4, v3, s12, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v7 ; GFX9-NEXT: v_or3_b32 v3, v4, v3, v5 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 @@ -5483,68 +5157,62 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX8-LABEL: insertelement_s_v16i8_s_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s18, 0xff +; GFX8-NEXT: s_mov_b32 s13, 0x80008 +; GFX8-NEXT: s_movk_i32 s12, 0xff +; GFX8-NEXT: s_mov_b32 s14, 0x80010 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 2, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s5, s0, 8 -; GFX8-NEXT: s_and_b32 s5, s5, s18 -; GFX8-NEXT: s_lshr_b32 s6, s0, 16 -; GFX8-NEXT: s_lshr_b32 s7, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s18 +; GFX8-NEXT: s_bfe_u32 s9, s0, s13 +; GFX8-NEXT: s_lshr_b32 s5, s0, 24 +; GFX8-NEXT: s_and_b32 s8, s0, s12 +; GFX8-NEXT: s_bfe_u32 s0, s0, s14 +; GFX8-NEXT: s_lshl_b32 s9, s9, 8 +; GFX8-NEXT: s_or_b32 s8, s8, s9 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s8, s0 +; GFX8-NEXT: s_lshl_b32 s5, s5, 24 +; GFX8-NEXT: s_or_b32 s8, s0, s5 +; GFX8-NEXT: s_bfe_u32 s5, s1, s13 +; GFX8-NEXT: s_lshr_b32 s6, s1, 24 +; GFX8-NEXT: s_and_b32 s0, s1, s12 +; GFX8-NEXT: s_bfe_u32 s1, s1, s14 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8 ; GFX8-NEXT: s_or_b32 s0, s0, s5 -; GFX8-NEXT: s_and_b32 s5, s6, s18 -; GFX8-NEXT: s_lshl_b32 s5, s5, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s5 -; GFX8-NEXT: s_lshl_b32 s5, s7, 24 -; GFX8-NEXT: s_lshr_b32 s9, s1, 8 -; GFX8-NEXT: s_or_b32 s8, s0, s5 -; GFX8-NEXT: s_lshr_b32 s10, s1, 16 -; GFX8-NEXT: s_lshr_b32 s11, s1, 24 -; GFX8-NEXT: s_and_b32 s0, s1, s18 -; GFX8-NEXT: s_and_b32 s1, s9, s18 -; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s10, s18 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s11, 24 -; GFX8-NEXT: s_lshr_b32 s12, s2, 8 +; GFX8-NEXT: s_lshl_b32 s1, s6, 24 ; GFX8-NEXT: s_or_b32 s9, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s12, s18 -; GFX8-NEXT: s_lshr_b32 s13, s2, 16 -; GFX8-NEXT: s_and_b32 s0, s2, s18 +; GFX8-NEXT: s_bfe_u32 s1, s2, s13 +; GFX8-NEXT: s_and_b32 s0, s2, s12 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s13, s18 +; GFX8-NEXT: s_bfe_u32 s1, s2, s14 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-NEXT: s_lshr_b32 s14, s2, 24 +; GFX8-NEXT: s_lshr_b32 s7, s2, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s14, 24 -; GFX8-NEXT: s_lshr_b32 s15, s3, 8 +; GFX8-NEXT: s_lshl_b32 s1, s7, 24 ; GFX8-NEXT: s_or_b32 s10, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s15, s18 -; GFX8-NEXT: s_lshr_b32 s16, s3, 16 -; GFX8-NEXT: s_and_b32 s0, s3, s18 +; GFX8-NEXT: s_bfe_u32 s1, s3, s13 +; GFX8-NEXT: s_and_b32 s0, s3, s12 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s16, s18 -; GFX8-NEXT: s_lshr_b32 s17, s3, 24 +; GFX8-NEXT: s_bfe_u32 s1, s3, s14 +; GFX8-NEXT: s_lshr_b32 s11, s3, 24 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s17, 24 +; GFX8-NEXT: s_lshl_b32 s1, s11, 24 ; GFX8-NEXT: v_mov_b32_e32 v1, s8 ; GFX8-NEXT: v_mov_b32_e32 v2, s9 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX8-NEXT: s_or_b32 s11, s0, s1 ; GFX8-NEXT: v_mov_b32_e32 v3, s10 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX8-NEXT: s_and_b32 s4, s4, s18 +; GFX8-NEXT: s_and_b32 s4, s4, s12 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s4 -; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s18 +; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s12 ; GFX8-NEXT: v_mov_b32_e32 v5, s11 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 @@ -5555,45 +5223,42 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX8-NEXT: v_mov_b32_e32 v0, s8 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX8-NEXT: v_mov_b32_e32 v12, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v12, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_mov_b32_e32 v8, 8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v10, 16 ; GFX8-NEXT: v_mov_b32_e32 v1, s9 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v9, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX8-NEXT: v_or_b32_e32 v0, v9, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 ; GFX8-NEXT: v_mov_b32_e32 v2, s10 ; GFX8-NEXT: v_mov_b32_e32 v3, s11 -; GFX8-NEXT: v_mov_b32_e32 v12, s18 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[2:3] -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_or_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v5 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX8-NEXT: v_or_b32_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v5, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v7 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v8, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v2 ; GFX8-NEXT: v_or_b32_sdwa v5, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v2, v2, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v3 ; GFX8-NEXT: v_or_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v3, v3, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v9 +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v6 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v11 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v5 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 @@ -5604,68 +5269,62 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX7-LABEL: insertelement_s_v16i8_s_v: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s18, 0xff +; GFX7-NEXT: s_mov_b32 s13, 0x80008 +; GFX7-NEXT: s_movk_i32 s12, 0xff +; GFX7-NEXT: s_mov_b32 s14, 0x80010 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 2, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s5, s0, 8 -; GFX7-NEXT: s_and_b32 s5, s5, s18 -; GFX7-NEXT: s_lshr_b32 s6, s0, 16 -; GFX7-NEXT: s_lshr_b32 s7, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s18 +; GFX7-NEXT: s_bfe_u32 s9, s0, s13 +; GFX7-NEXT: s_lshr_b32 s5, s0, 24 +; GFX7-NEXT: s_and_b32 s8, s0, s12 +; GFX7-NEXT: s_bfe_u32 s0, s0, s14 +; GFX7-NEXT: s_lshl_b32 s9, s9, 8 +; GFX7-NEXT: s_or_b32 s8, s8, s9 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s8, s0 +; GFX7-NEXT: s_lshl_b32 s5, s5, 24 +; GFX7-NEXT: s_or_b32 s8, s0, s5 +; GFX7-NEXT: s_bfe_u32 s5, s1, s13 +; GFX7-NEXT: s_lshr_b32 s6, s1, 24 +; GFX7-NEXT: s_and_b32 s0, s1, s12 +; GFX7-NEXT: s_bfe_u32 s1, s1, s14 ; GFX7-NEXT: s_lshl_b32 s5, s5, 8 ; GFX7-NEXT: s_or_b32 s0, s0, s5 -; GFX7-NEXT: s_and_b32 s5, s6, s18 -; GFX7-NEXT: s_lshl_b32 s5, s5, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s5 -; GFX7-NEXT: s_lshl_b32 s5, s7, 24 -; GFX7-NEXT: s_lshr_b32 s9, s1, 8 -; GFX7-NEXT: s_or_b32 s8, s0, s5 -; GFX7-NEXT: s_lshr_b32 s10, s1, 16 -; GFX7-NEXT: s_lshr_b32 s11, s1, 24 -; GFX7-NEXT: s_and_b32 s0, s1, s18 -; GFX7-NEXT: s_and_b32 s1, s9, s18 -; GFX7-NEXT: s_lshl_b32 s1, s1, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s10, s18 ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s11, 24 -; GFX7-NEXT: s_lshr_b32 s12, s2, 8 +; GFX7-NEXT: s_lshl_b32 s1, s6, 24 ; GFX7-NEXT: s_or_b32 s9, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s12, s18 -; GFX7-NEXT: s_lshr_b32 s13, s2, 16 -; GFX7-NEXT: s_and_b32 s0, s2, s18 +; GFX7-NEXT: s_bfe_u32 s1, s2, s13 +; GFX7-NEXT: s_and_b32 s0, s2, s12 ; GFX7-NEXT: s_lshl_b32 s1, s1, 8 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s13, s18 +; GFX7-NEXT: s_bfe_u32 s1, s2, s14 ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_lshr_b32 s14, s2, 24 +; GFX7-NEXT: s_lshr_b32 s7, s2, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s14, 24 -; GFX7-NEXT: s_lshr_b32 s15, s3, 8 +; GFX7-NEXT: s_lshl_b32 s1, s7, 24 ; GFX7-NEXT: s_or_b32 s10, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s15, s18 -; GFX7-NEXT: s_lshr_b32 s16, s3, 16 -; GFX7-NEXT: s_and_b32 s0, s3, s18 +; GFX7-NEXT: s_bfe_u32 s1, s3, s13 +; GFX7-NEXT: s_and_b32 s0, s3, s12 ; GFX7-NEXT: s_lshl_b32 s1, s1, 8 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s16, s18 -; GFX7-NEXT: s_lshr_b32 s17, s3, 24 +; GFX7-NEXT: s_bfe_u32 s1, s3, s14 +; GFX7-NEXT: s_lshr_b32 s11, s3, 24 ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s17, 24 +; GFX7-NEXT: s_lshl_b32 s1, s11, 24 ; GFX7-NEXT: v_mov_b32_e32 v1, s8 ; GFX7-NEXT: v_mov_b32_e32 v2, s9 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX7-NEXT: s_or_b32 s11, s0, s1 ; GFX7-NEXT: v_mov_b32_e32 v3, s10 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GFX7-NEXT: s_and_b32 s4, s4, s18 +; GFX7-NEXT: s_and_b32 s4, s4, s12 ; GFX7-NEXT: v_lshl_b32_e32 v2, s4, v0 -; GFX7-NEXT: v_lshl_b32_e32 v0, s18, v0 +; GFX7-NEXT: v_lshl_b32_e32 v0, s12, v0 ; GFX7-NEXT: v_mov_b32_e32 v5, s11 ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 @@ -5676,59 +5335,51 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX7-NEXT: v_mov_b32_e32 v0, s8 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 8 ; GFX7-NEXT: v_mov_b32_e32 v1, s9 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v8, s12, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, s10 ; GFX7-NEXT: v_mov_b32_e32 v3, s11 -; GFX7-NEXT: v_and_b32_e32 v4, s18, v4 +; GFX7-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX7-NEXT: v_bfe_u32 v8, v1, 8, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[2:3] ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s18, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s18, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v6 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s18, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s18, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s18, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s12, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v8 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX7-NEXT: v_bfe_u32 v5, v2, 8, 8 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s18, v10 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s18, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s18, v11 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v12 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v4, s12, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_bfe_u32 v5, v3, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v6 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s18, v13 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s18, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s18, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v15 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v4, s12, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -5739,68 +5390,64 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX10-LABEL: insertelement_s_v16i8_s_v: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s6, 0x80008 ; GFX10-NEXT: s_movk_i32 s5, 0xff +; GFX10-NEXT: s_mov_b32 s7, 0x80010 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 2, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX10-NEXT: v_mov_b32_e32 v9, 8 +; GFX10-NEXT: v_mov_b32_e32 v10, 8 +; GFX10-NEXT: v_mov_b32_e32 v12, 16 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-NEXT: v_lshlrev_b32_e64 v2, v0, s5 -; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: s_lshr_b32 s7, s0, 16 -; GFX10-NEXT: s_lshr_b32 s9, s1, 8 -; GFX10-NEXT: s_and_b32 s6, s6, s5 +; GFX10-NEXT: s_bfe_u32 s13, s0, s6 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24 -; GFX10-NEXT: s_lshr_b32 s10, s1, 16 -; GFX10-NEXT: s_and_b32 s7, s7, s5 -; GFX10-NEXT: s_and_b32 s9, s9, s5 -; GFX10-NEXT: s_and_b32 s0, s0, s5 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_lshr_b32 s11, s1, 24 -; GFX10-NEXT: s_and_b32 s1, s1, s5 -; GFX10-NEXT: s_lshl_b32 s9, s9, 8 -; GFX10-NEXT: s_and_b32 s10, s10, s5 -; GFX10-NEXT: s_or_b32 s0, s0, s6 -; GFX10-NEXT: s_lshl_b32 s7, s7, 16 -; GFX10-NEXT: s_lshr_b32 s12, s2, 8 +; GFX10-NEXT: s_and_b32 s12, s0, s5 +; GFX10-NEXT: s_bfe_u32 s0, s0, s7 +; GFX10-NEXT: s_lshl_b32 s13, s13, 8 +; GFX10-NEXT: s_bfe_u32 s15, s1, s6 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s12, s12, s13 +; GFX10-NEXT: s_lshr_b32 s9, s1, 24 +; GFX10-NEXT: s_and_b32 s14, s1, s5 +; GFX10-NEXT: s_bfe_u32 s1, s1, s7 +; GFX10-NEXT: s_lshl_b32 s15, s15, 8 +; GFX10-NEXT: s_bfe_u32 s17, s2, s6 ; GFX10-NEXT: s_lshl_b32 s8, s8, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s7 -; GFX10-NEXT: s_lshl_b32 s6, s10, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s9 -; GFX10-NEXT: s_lshr_b32 s13, s2, 16 +; GFX10-NEXT: s_or_b32 s0, s12, s0 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s13, s14, s15 ; GFX10-NEXT: s_or_b32 s8, s0, s8 -; GFX10-NEXT: s_or_b32 s0, s1, s6 -; GFX10-NEXT: s_and_b32 s6, s12, s5 -; GFX10-NEXT: s_lshr_b32 s14, s2, 24 -; GFX10-NEXT: s_and_b32 s2, s2, s5 -; GFX10-NEXT: s_lshl_b32 s6, s6, 8 -; GFX10-NEXT: s_and_b32 s7, s13, s5 -; GFX10-NEXT: s_lshl_b32 s1, s11, 24 -; GFX10-NEXT: s_or_b32 s2, s2, s6 -; GFX10-NEXT: s_lshr_b32 s15, s3, 8 -; GFX10-NEXT: s_lshl_b32 s6, s7, 16 -; GFX10-NEXT: s_or_b32 s9, s0, s1 -; GFX10-NEXT: s_or_b32 s0, s2, s6 -; GFX10-NEXT: s_and_b32 s2, s15, s5 -; GFX10-NEXT: s_lshl_b32 s1, s14, 24 -; GFX10-NEXT: s_lshr_b32 s16, s3, 16 +; GFX10-NEXT: s_lshr_b32 s10, s2, 24 +; GFX10-NEXT: s_and_b32 s16, s2, s5 +; GFX10-NEXT: s_lshl_b32 s0, s17, 8 +; GFX10-NEXT: s_bfe_u32 s2, s2, s7 +; GFX10-NEXT: s_lshl_b32 s9, s9, 24 +; GFX10-NEXT: s_or_b32 s1, s13, s1 +; GFX10-NEXT: s_or_b32 s0, s16, s0 +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 +; GFX10-NEXT: s_or_b32 s9, s1, s9 +; GFX10-NEXT: s_or_b32 s0, s0, s2 +; GFX10-NEXT: s_bfe_u32 s2, s3, s6 +; GFX10-NEXT: s_lshl_b32 s1, s10, 24 ; GFX10-NEXT: v_mov_b32_e32 v1, s9 -; GFX10-NEXT: s_lshr_b32 s17, s3, 24 ; GFX10-NEXT: s_or_b32 s10, s0, s1 -; GFX10-NEXT: s_and_b32 s1, s16, s5 -; GFX10-NEXT: s_and_b32 s3, s3, s5 +; GFX10-NEXT: s_bfe_u32 s1, s3, s7 +; GFX10-NEXT: s_and_b32 s6, s3, s5 ; GFX10-NEXT: s_lshl_b32 s2, s2, 8 ; GFX10-NEXT: s_lshl_b32 s1, s1, 16 -; GFX10-NEXT: s_or_b32 s0, s3, s2 +; GFX10-NEXT: s_or_b32 s0, s6, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo ; GFX10-NEXT: s_or_b32 s1, s0, s1 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v4 -; GFX10-NEXT: s_lshl_b32 s2, s17, 24 +; GFX10-NEXT: s_lshr_b32 s11, s3, 24 +; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX10-NEXT: s_lshl_b32 s2, s11, 24 +; GFX10-NEXT: s_mov_b32 s3, 8 ; GFX10-NEXT: s_or_b32 s11, s1, s2 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v4 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v4 ; GFX10-NEXT: s_and_b32 s2, s4, s5 ; GFX10-NEXT: v_lshlrev_b32_e64 v0, v0, s2 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v4 @@ -5814,36 +5461,32 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg % ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v5, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s1 -; GFX10-NEXT: s_mov_b32 s2, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v6, v9, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v9, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v3 -; GFX10-NEXT: v_and_or_b32 v4, v0, s5, v4 -; GFX10-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: s_mov_b32 s2, 16 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v9, s3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v10, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v10, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v6, v0, s5, v6 +; GFX10-NEXT: v_and_or_b32 v9, v1, s5, v9 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_and_or_b32 v6, v1, s5, v6 -; GFX10-NEXT: v_and_or_b32 v8, v2, s5, v8 -; GFX10-NEXT: v_and_or_b32 v9, v3, s5, v9 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_or_b32 v11, v2, s5, v11 +; GFX10-NEXT: v_and_or_b32 v10, v3, s5, v10 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v12, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX10-NEXT: v_and_b32_sdwa v2, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX10-NEXT: v_and_b32_sdwa v3, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX10-NEXT: v_or3_b32 v0, v4, v0, v5 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v12, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX10-NEXT: v_or3_b32 v0, v6, v0, v4 +; GFX10-NEXT: v_or3_b32 v1, v9, v1, v5 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_or3_b32 v1, v6, v1, v7 -; GFX10-NEXT: v_or3_b32 v2, v8, v2, v10 -; GFX10-NEXT: v_or3_b32 v3, v9, v3, v11 +; GFX10-NEXT: v_or3_b32 v2, v11, v2, v7 +; GFX10-NEXT: v_or3_b32 v3, v10, v3, v8 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm @@ -5857,67 +5500,61 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX9-LABEL: insertelement_s_v16i8_v_v: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX9-NEXT: s_movk_i32 s17, 0xff +; GFX9-NEXT: s_mov_b32 s12, 0x80008 +; GFX9-NEXT: s_movk_i32 s10, 0xff ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 2, v1 ; GFX9-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s4, s0, 8 -; GFX9-NEXT: s_and_b32 s4, s4, s17 -; GFX9-NEXT: s_lshr_b32 s5, s0, 16 -; GFX9-NEXT: s_lshr_b32 s6, s0, 24 -; GFX9-NEXT: s_and_b32 s0, s0, s17 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s4 -; GFX9-NEXT: s_and_b32 s4, s5, s17 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-NEXT: s_or_b32 s0, s0, s4 -; GFX9-NEXT: s_lshl_b32 s4, s6, 24 -; GFX9-NEXT: s_lshr_b32 s7, s1, 8 +; GFX9-NEXT: s_bfe_u32 s13, s0, s12 +; GFX9-NEXT: s_and_b32 s11, s0, s10 +; GFX9-NEXT: s_lshl_b32 s13, s13, 8 +; GFX9-NEXT: s_or_b32 s11, s11, s13 +; GFX9-NEXT: s_mov_b32 s13, 0x80010 +; GFX9-NEXT: s_lshr_b32 s4, s0, 24 +; GFX9-NEXT: s_bfe_u32 s0, s0, s13 +; GFX9-NEXT: s_lshl_b32 s0, s0, 16 +; GFX9-NEXT: s_or_b32 s0, s11, s0 +; GFX9-NEXT: s_lshl_b32 s4, s4, 24 +; GFX9-NEXT: s_bfe_u32 s11, s1, s12 ; GFX9-NEXT: s_or_b32 s4, s0, s4 -; GFX9-NEXT: s_lshr_b32 s9, s1, 16 -; GFX9-NEXT: s_lshr_b32 s10, s1, 24 -; GFX9-NEXT: s_and_b32 s0, s1, s17 -; GFX9-NEXT: s_and_b32 s1, s7, s17 -; GFX9-NEXT: s_lshl_b32 s1, s1, 8 -; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s9, s17 +; GFX9-NEXT: s_lshr_b32 s5, s1, 24 +; GFX9-NEXT: s_and_b32 s0, s1, s10 +; GFX9-NEXT: s_bfe_u32 s1, s1, s13 +; GFX9-NEXT: s_lshl_b32 s11, s11, 8 +; GFX9-NEXT: s_or_b32 s0, s0, s11 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_lshl_b32 s1, s10, 24 -; GFX9-NEXT: s_lshr_b32 s11, s2, 8 +; GFX9-NEXT: s_lshl_b32 s1, s5, 24 ; GFX9-NEXT: s_or_b32 s5, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s11, s17 -; GFX9-NEXT: s_lshr_b32 s12, s2, 16 -; GFX9-NEXT: s_and_b32 s0, s2, s17 +; GFX9-NEXT: s_bfe_u32 s1, s2, s12 +; GFX9-NEXT: s_and_b32 s0, s2, s10 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s12, s17 +; GFX9-NEXT: s_bfe_u32 s1, s2, s13 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16 -; GFX9-NEXT: s_lshr_b32 s13, s2, 24 +; GFX9-NEXT: s_lshr_b32 s6, s2, 24 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_lshl_b32 s1, s13, 24 -; GFX9-NEXT: s_lshr_b32 s14, s3, 8 +; GFX9-NEXT: s_lshl_b32 s1, s6, 24 ; GFX9-NEXT: s_or_b32 s6, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s14, s17 -; GFX9-NEXT: s_lshr_b32 s15, s3, 16 -; GFX9-NEXT: s_and_b32 s0, s3, s17 +; GFX9-NEXT: s_bfe_u32 s1, s3, s12 +; GFX9-NEXT: s_and_b32 s0, s3, s10 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_and_b32 s1, s15, s17 -; GFX9-NEXT: s_lshr_b32 s16, s3, 24 +; GFX9-NEXT: s_bfe_u32 s1, s3, s13 +; GFX9-NEXT: s_lshr_b32 s7, s3, 24 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16 ; GFX9-NEXT: s_or_b32 s0, s0, s1 -; GFX9-NEXT: s_lshl_b32 s1, s16, 24 +; GFX9-NEXT: s_lshl_b32 s1, s7, 24 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 ; GFX9-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX9-NEXT: s_or_b32 s7, s0, s1 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX9-NEXT: v_mov_b32_e32 v5, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 ; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s17 +; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s10 ; GFX9-NEXT: v_mov_b32_e32 v6, s7 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 @@ -5931,37 +5568,35 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] ; GFX9-NEXT: s_mov_b32 s8, 8 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: s_mov_b32 s9, 16 +; GFX9-NEXT: v_and_or_b32 v8, v0, s10, v8 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX9-NEXT: v_or3_b32 v0, v8, v0, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[2:3] -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, s8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v4, v0, s17, v4 -; GFX9-NEXT: v_and_b32_sdwa v0, v0, s17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX9-NEXT: v_and_or_b32 v4, v1, s10, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX9-NEXT: v_or3_b32 v0, v4, v0, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX9-NEXT: v_or3_b32 v1, v4, v1, v5 ; GFX9-NEXT: v_mov_b32_e32 v4, 8 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_and_or_b32 v5, v1, s17, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v7 -; GFX9-NEXT: v_or3_b32 v1, v5, v1, v6 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v5, v2, s17, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v3 -; GFX9-NEXT: v_and_b32_sdwa v2, v2, s17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v9 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX9-NEXT: v_mov_b32_e32 v8, 16 +; GFX9-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v5, v2, s10, v5 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 ; GFX9-NEXT: v_or3_b32 v2, v5, v2, v6 -; GFX9-NEXT: v_and_or_b32 v4, v3, s17, v4 -; GFX9-NEXT: v_and_b32_sdwa v3, v3, s17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v11 +; GFX9-NEXT: v_and_or_b32 v4, v3, s10, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v5, 24, v7 ; GFX9-NEXT: v_or3_b32 v3, v4, v3, v5 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 @@ -5971,67 +5606,61 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX8-LABEL: insertelement_s_v16i8_v_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX8-NEXT: s_movk_i32 s16, 0xff +; GFX8-NEXT: s_mov_b32 s10, 0x80008 +; GFX8-NEXT: s_movk_i32 s8, 0xff ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 2, v1 ; GFX8-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s4, s0, 8 -; GFX8-NEXT: s_and_b32 s4, s4, s16 -; GFX8-NEXT: s_lshr_b32 s5, s0, 16 -; GFX8-NEXT: s_lshr_b32 s6, s0, 24 -; GFX8-NEXT: s_and_b32 s0, s0, s16 -; GFX8-NEXT: s_lshl_b32 s4, s4, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s4 -; GFX8-NEXT: s_and_b32 s4, s5, s16 -; GFX8-NEXT: s_lshl_b32 s4, s4, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s4 -; GFX8-NEXT: s_lshl_b32 s4, s6, 24 -; GFX8-NEXT: s_lshr_b32 s7, s1, 8 +; GFX8-NEXT: s_bfe_u32 s11, s0, s10 +; GFX8-NEXT: s_and_b32 s9, s0, s8 +; GFX8-NEXT: s_lshl_b32 s11, s11, 8 +; GFX8-NEXT: s_or_b32 s9, s9, s11 +; GFX8-NEXT: s_mov_b32 s11, 0x80010 +; GFX8-NEXT: s_lshr_b32 s4, s0, 24 +; GFX8-NEXT: s_bfe_u32 s0, s0, s11 +; GFX8-NEXT: s_lshl_b32 s0, s0, 16 +; GFX8-NEXT: s_or_b32 s0, s9, s0 +; GFX8-NEXT: s_lshl_b32 s4, s4, 24 +; GFX8-NEXT: s_bfe_u32 s9, s1, s10 ; GFX8-NEXT: s_or_b32 s4, s0, s4 -; GFX8-NEXT: s_lshr_b32 s8, s1, 16 -; GFX8-NEXT: s_lshr_b32 s9, s1, 24 -; GFX8-NEXT: s_and_b32 s0, s1, s16 -; GFX8-NEXT: s_and_b32 s1, s7, s16 -; GFX8-NEXT: s_lshl_b32 s1, s1, 8 -; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s8, s16 +; GFX8-NEXT: s_lshr_b32 s5, s1, 24 +; GFX8-NEXT: s_and_b32 s0, s1, s8 +; GFX8-NEXT: s_bfe_u32 s1, s1, s11 +; GFX8-NEXT: s_lshl_b32 s9, s9, 8 +; GFX8-NEXT: s_or_b32 s0, s0, s9 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s9, 24 -; GFX8-NEXT: s_lshr_b32 s10, s2, 8 +; GFX8-NEXT: s_lshl_b32 s1, s5, 24 ; GFX8-NEXT: s_or_b32 s5, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s10, s16 -; GFX8-NEXT: s_lshr_b32 s11, s2, 16 -; GFX8-NEXT: s_and_b32 s0, s2, s16 +; GFX8-NEXT: s_bfe_u32 s1, s2, s10 +; GFX8-NEXT: s_and_b32 s0, s2, s8 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s11, s16 +; GFX8-NEXT: s_bfe_u32 s1, s2, s11 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-NEXT: s_lshr_b32 s12, s2, 24 +; GFX8-NEXT: s_lshr_b32 s6, s2, 24 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s12, 24 -; GFX8-NEXT: s_lshr_b32 s13, s3, 8 +; GFX8-NEXT: s_lshl_b32 s1, s6, 24 ; GFX8-NEXT: s_or_b32 s6, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s13, s16 -; GFX8-NEXT: s_lshr_b32 s14, s3, 16 -; GFX8-NEXT: s_and_b32 s0, s3, s16 +; GFX8-NEXT: s_bfe_u32 s1, s3, s10 +; GFX8-NEXT: s_and_b32 s0, s3, s8 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_and_b32 s1, s14, s16 -; GFX8-NEXT: s_lshr_b32 s15, s3, 24 +; GFX8-NEXT: s_bfe_u32 s1, s3, s11 +; GFX8-NEXT: s_lshr_b32 s7, s3, 24 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 ; GFX8-NEXT: s_or_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s1, s15, 24 +; GFX8-NEXT: s_lshl_b32 s1, s7, 24 ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX8-NEXT: s_or_b32 s7, s0, s1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX8-NEXT: v_mov_b32_e32 v5, s6 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s16 +; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s8 ; GFX8-NEXT: v_mov_b32_e32 v6, s7 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 @@ -6045,42 +5674,39 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX8-NEXT: v_mov_b32_e32 v3, s7 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX8-NEXT: v_mov_b32_e32 v12, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v12, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v12, s16 +; GFX8-NEXT: v_mov_b32_e32 v8, 8 +; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v10, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX8-NEXT: v_or_b32_sdwa v9, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX8-NEXT: v_or_b32_e32 v0, v9, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[2:3] -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v0, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_or_b32_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v5 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, 8 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX8-NEXT: v_or_b32_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v1, v1, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v5, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v7 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v8, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v2 ; GFX8-NEXT: v_or_b32_sdwa v5, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v2, v2, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v3 ; GFX8-NEXT: v_or_b32_sdwa v4, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v3, v3, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v9 +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v6 ; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v11 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v5 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 @@ -6091,68 +5717,62 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX7-LABEL: insertelement_s_v16i8_v_v: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 -; GFX7-NEXT: s_movk_i32 s16, 0xff +; GFX7-NEXT: s_mov_b32 s10, 0x80008 +; GFX7-NEXT: s_movk_i32 s8, 0xff ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 2, v1 ; GFX7-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s4, s0, 8 -; GFX7-NEXT: s_and_b32 s4, s4, s16 -; GFX7-NEXT: s_lshr_b32 s5, s0, 16 -; GFX7-NEXT: s_lshr_b32 s6, s0, 24 -; GFX7-NEXT: s_and_b32 s0, s0, s16 -; GFX7-NEXT: s_lshl_b32 s4, s4, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s4 -; GFX7-NEXT: s_and_b32 s4, s5, s16 -; GFX7-NEXT: s_lshl_b32 s4, s4, 16 -; GFX7-NEXT: s_or_b32 s0, s0, s4 -; GFX7-NEXT: s_lshl_b32 s4, s6, 24 -; GFX7-NEXT: s_lshr_b32 s7, s1, 8 +; GFX7-NEXT: s_bfe_u32 s11, s0, s10 +; GFX7-NEXT: s_and_b32 s9, s0, s8 +; GFX7-NEXT: s_lshl_b32 s11, s11, 8 +; GFX7-NEXT: s_or_b32 s9, s9, s11 +; GFX7-NEXT: s_mov_b32 s11, 0x80010 +; GFX7-NEXT: s_lshr_b32 s4, s0, 24 +; GFX7-NEXT: s_bfe_u32 s0, s0, s11 +; GFX7-NEXT: s_lshl_b32 s0, s0, 16 +; GFX7-NEXT: s_or_b32 s0, s9, s0 +; GFX7-NEXT: s_lshl_b32 s4, s4, 24 +; GFX7-NEXT: s_bfe_u32 s9, s1, s10 ; GFX7-NEXT: s_or_b32 s4, s0, s4 -; GFX7-NEXT: s_lshr_b32 s8, s1, 16 -; GFX7-NEXT: s_lshr_b32 s9, s1, 24 -; GFX7-NEXT: s_and_b32 s0, s1, s16 -; GFX7-NEXT: s_and_b32 s1, s7, s16 -; GFX7-NEXT: s_lshl_b32 s1, s1, 8 -; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s8, s16 +; GFX7-NEXT: s_lshr_b32 s5, s1, 24 +; GFX7-NEXT: s_and_b32 s0, s1, s8 +; GFX7-NEXT: s_bfe_u32 s1, s1, s11 +; GFX7-NEXT: s_lshl_b32 s9, s9, 8 +; GFX7-NEXT: s_or_b32 s0, s0, s9 ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s9, 24 -; GFX7-NEXT: s_lshr_b32 s10, s2, 8 +; GFX7-NEXT: s_lshl_b32 s1, s5, 24 ; GFX7-NEXT: s_or_b32 s5, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s10, s16 -; GFX7-NEXT: s_lshr_b32 s11, s2, 16 -; GFX7-NEXT: s_and_b32 s0, s2, s16 +; GFX7-NEXT: s_bfe_u32 s1, s2, s10 +; GFX7-NEXT: s_and_b32 s0, s2, s8 ; GFX7-NEXT: s_lshl_b32 s1, s1, 8 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s11, s16 +; GFX7-NEXT: s_bfe_u32 s1, s2, s11 ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 -; GFX7-NEXT: s_lshr_b32 s12, s2, 24 +; GFX7-NEXT: s_lshr_b32 s6, s2, 24 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s12, 24 -; GFX7-NEXT: s_lshr_b32 s13, s3, 8 +; GFX7-NEXT: s_lshl_b32 s1, s6, 24 ; GFX7-NEXT: s_or_b32 s6, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s13, s16 -; GFX7-NEXT: s_lshr_b32 s14, s3, 16 -; GFX7-NEXT: s_and_b32 s0, s3, s16 +; GFX7-NEXT: s_bfe_u32 s1, s3, s10 +; GFX7-NEXT: s_and_b32 s0, s3, s8 ; GFX7-NEXT: s_lshl_b32 s1, s1, 8 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_and_b32 s1, s14, s16 -; GFX7-NEXT: s_lshr_b32 s15, s3, 24 +; GFX7-NEXT: s_bfe_u32 s1, s3, s11 +; GFX7-NEXT: s_lshr_b32 s7, s3, 24 ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 ; GFX7-NEXT: s_or_b32 s0, s0, s1 -; GFX7-NEXT: s_lshl_b32 s1, s15, 24 +; GFX7-NEXT: s_lshl_b32 s1, s7, 24 ; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: v_mov_b32_e32 v3, s5 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX7-NEXT: s_or_b32 s7, s0, s1 ; GFX7-NEXT: v_mov_b32_e32 v5, s6 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s16, v0 +; GFX7-NEXT: v_and_b32_e32 v0, s8, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_lshl_b32_e32 v1, s16, v1 +; GFX7-NEXT: v_lshl_b32_e32 v1, s8, v1 ; GFX7-NEXT: v_mov_b32_e32 v6, s7 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 @@ -6166,56 +5786,48 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX7-NEXT: v_mov_b32_e32 v3, s7 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v4, s16, v4 +; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v8, s8, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX7-NEXT: v_bfe_u32 v8, v1, 8, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[2:3] ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_and_b32_e32 v0, s16, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s16, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v6 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s16, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s16, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s16, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s8, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v8 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v5 +; GFX7-NEXT: v_bfe_u32 v5, v2, 8, 8 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s16, v10 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s16, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s16, v11 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v12 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v4, s8, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_bfe_u32 v5, v3, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v6 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s16, v13 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s16, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s16, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v15 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v4, s8, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -6226,70 +5838,66 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX10-LABEL: insertelement_s_v16i8_v_v: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GFX10-NEXT: s_mov_b32 s7, 0x80008 ; GFX10-NEXT: s_movk_i32 s8, 0xff +; GFX10-NEXT: s_mov_b32 s9, 0x80010 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 2, v1 ; GFX10-NEXT: v_and_b32_e32 v1, 3, v1 -; GFX10-NEXT: v_mov_b32_e32 v9, 8 +; GFX10-NEXT: v_mov_b32_e32 v10, 8 +; GFX10-NEXT: v_mov_b32_e32 v12, 16 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GFX10-NEXT: v_lshlrev_b32_e64 v3, v1, s8 ; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_xor_b32_e32 v1, -1, v3 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s4, s0, 8 -; GFX10-NEXT: s_lshr_b32 s5, s0, 16 -; GFX10-NEXT: s_lshr_b32 s7, s1, 8 -; GFX10-NEXT: s_and_b32 s4, s4, s8 -; GFX10-NEXT: s_lshr_b32 s6, s0, 24 -; GFX10-NEXT: s_lshr_b32 s9, s1, 16 -; GFX10-NEXT: s_and_b32 s5, s5, s8 -; GFX10-NEXT: s_and_b32 s7, s7, s8 -; GFX10-NEXT: s_and_b32 s0, s0, s8 -; GFX10-NEXT: s_lshl_b32 s4, s4, 8 -; GFX10-NEXT: s_and_b32 s9, s9, s8 -; GFX10-NEXT: s_lshr_b32 s10, s1, 24 -; GFX10-NEXT: s_and_b32 s1, s1, s8 -; GFX10-NEXT: s_lshl_b32 s7, s7, 8 -; GFX10-NEXT: s_lshl_b32 s5, s5, 16 -; GFX10-NEXT: s_or_b32 s0, s0, s4 -; GFX10-NEXT: s_lshr_b32 s11, s2, 8 -; GFX10-NEXT: s_lshl_b32 s6, s6, 24 -; GFX10-NEXT: s_or_b32 s0, s0, s5 -; GFX10-NEXT: s_lshl_b32 s9, s9, 16 -; GFX10-NEXT: s_or_b32 s1, s1, s7 -; GFX10-NEXT: s_lshr_b32 s12, s2, 16 -; GFX10-NEXT: s_or_b32 s4, s0, s6 -; GFX10-NEXT: s_or_b32 s0, s1, s9 -; GFX10-NEXT: s_and_b32 s1, s11, s8 -; GFX10-NEXT: s_lshr_b32 s13, s2, 24 -; GFX10-NEXT: s_and_b32 s2, s2, s8 -; GFX10-NEXT: s_lshl_b32 s1, s1, 8 -; GFX10-NEXT: s_and_b32 s5, s12, s8 -; GFX10-NEXT: s_or_b32 s1, s2, s1 -; GFX10-NEXT: s_lshl_b32 s2, s5, 16 -; GFX10-NEXT: s_lshl_b32 s5, s10, 24 -; GFX10-NEXT: s_lshr_b32 s14, s3, 8 -; GFX10-NEXT: s_or_b32 s5, s0, s5 -; GFX10-NEXT: s_and_b32 s0, s14, s8 -; GFX10-NEXT: s_or_b32 s1, s1, s2 -; GFX10-NEXT: s_lshl_b32 s2, s13, 24 -; GFX10-NEXT: s_lshr_b32 s15, s3, 16 -; GFX10-NEXT: s_or_b32 s6, s1, s2 +; GFX10-NEXT: s_bfe_u32 s12, s0, s7 +; GFX10-NEXT: s_lshr_b32 s4, s0, 24 +; GFX10-NEXT: s_and_b32 s11, s0, s8 +; GFX10-NEXT: s_bfe_u32 s0, s0, s9 +; GFX10-NEXT: s_lshl_b32 s12, s12, 8 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_or_b32 s11, s11, s12 +; GFX10-NEXT: s_bfe_u32 s16, s2, s7 +; GFX10-NEXT: s_lshl_b32 s4, s4, 24 +; GFX10-NEXT: s_or_b32 s0, s11, s0 +; GFX10-NEXT: s_bfe_u32 s14, s1, s7 +; GFX10-NEXT: s_or_b32 s4, s0, s4 +; GFX10-NEXT: s_bfe_u32 s0, s2, s9 +; GFX10-NEXT: s_and_b32 s15, s2, s8 +; GFX10-NEXT: s_lshl_b32 s16, s16, 8 +; GFX10-NEXT: s_lshr_b32 s5, s1, 24 +; GFX10-NEXT: s_and_b32 s13, s1, s8 +; GFX10-NEXT: s_bfe_u32 s1, s1, s9 +; GFX10-NEXT: s_lshl_b32 s14, s14, 8 +; GFX10-NEXT: s_lshr_b32 s6, s2, 24 +; GFX10-NEXT: s_or_b32 s2, s15, s16 +; GFX10-NEXT: s_lshl_b32 s0, s0, 16 +; GFX10-NEXT: s_lshl_b32 s1, s1, 16 +; GFX10-NEXT: s_or_b32 s12, s13, s14 +; GFX10-NEXT: s_or_b32 s0, s2, s0 +; GFX10-NEXT: s_lshl_b32 s2, s6, 24 +; GFX10-NEXT: s_or_b32 s1, s12, s1 +; GFX10-NEXT: s_lshl_b32 s5, s5, 24 +; GFX10-NEXT: s_or_b32 s6, s0, s2 +; GFX10-NEXT: s_bfe_u32 s0, s3, s7 +; GFX10-NEXT: s_or_b32 s5, s1, s5 ; GFX10-NEXT: s_and_b32 s1, s3, s8 ; GFX10-NEXT: s_lshl_b32 s0, s0, 8 ; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: s_or_b32 s0, s1, s0 -; GFX10-NEXT: s_and_b32 s1, s15, s8 -; GFX10-NEXT: s_lshr_b32 s16, s3, 24 +; GFX10-NEXT: s_bfe_u32 s1, s3, s9 +; GFX10-NEXT: s_lshr_b32 s10, s3, 24 ; GFX10-NEXT: s_lshl_b32 s1, s1, 16 ; GFX10-NEXT: v_cndmask_b32_e32 v2, s4, v2, vcc_lo ; GFX10-NEXT: s_or_b32 s1, s0, s1 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v4 -; GFX10-NEXT: s_lshl_b32 s2, s16, 24 +; GFX10-NEXT: s_lshl_b32 s2, s10, 24 +; GFX10-NEXT: v_xor_b32_e32 v1, -1, v3 ; GFX10-NEXT: s_or_b32 s7, s1, s2 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v4 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s6, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v4 +; GFX10-NEXT: s_mov_b32 s3, 8 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s7, s1 ; GFX10-NEXT: v_and_or_b32 v5, v2, v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 @@ -6300,36 +5908,32 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg % ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v5, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s1 -; GFX10-NEXT: s_mov_b32 s2, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v4, s2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v6, v9, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v9, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v3 -; GFX10-NEXT: v_and_or_b32 v4, v0, s8, v4 -; GFX10-NEXT: v_and_b32_sdwa v0, v0, s8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: s_mov_b32 s2, 16 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, s3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v9, s3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v10, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v10, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v6, v0, s8, v6 +; GFX10-NEXT: v_and_or_b32 v9, v1, s8, v9 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GFX10-NEXT: v_and_or_b32 v6, v1, s8, v6 -; GFX10-NEXT: v_and_or_b32 v8, v2, s8, v8 -; GFX10-NEXT: v_and_or_b32 v9, v3, s8, v9 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_or_b32 v11, v2, s8, v11 +; GFX10-NEXT: v_and_or_b32 v10, v3, s8, v10 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v12, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX10-NEXT: v_and_b32_sdwa v2, v2, s8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX10-NEXT: v_and_b32_sdwa v3, v3, s8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX10-NEXT: v_or3_b32 v0, v4, v0, v5 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v12, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX10-NEXT: v_or3_b32 v0, v6, v0, v4 +; GFX10-NEXT: v_or3_b32 v1, v9, v1, v5 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_or3_b32 v1, v6, v1, v7 -; GFX10-NEXT: v_or3_b32 v2, v8, v2, v10 -; GFX10-NEXT: v_or3_b32 v3, v9, v3, v11 +; GFX10-NEXT: v_or3_b32 v2, v11, v2, v7 +; GFX10-NEXT: v_or3_b32 v3, v10, v3, v8 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm @@ -6344,85 +5948,79 @@ define amdgpu_ps void @insertelement_v_v16i8_s_v(<16 x i8> addrspace(1)* %ptr, i ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: s_mov_b32 s1, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: s_movk_i32 s6, 0xff -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 2, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 2, v2 +; GFX9-NEXT: v_mov_b32_e32 v1, 16 ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 -; GFX9-NEXT: s_and_b32 s1, s2, s6 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 +; GFX9-NEXT: s_and_b32 s2, s2, s6 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GFX9-NEXT: v_lshlrev_b32_e64 v7, v2, s1 +; GFX9-NEXT: v_lshlrev_b32_e64 v8, v2, s2 ; GFX9-NEXT: v_lshlrev_b32_e64 v2, v2, s6 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v1 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v7 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v4 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 8, v5 -; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v0, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 24, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v14, 8, v6 -; GFX9-NEXT: v_and_b32_sdwa v16, v3, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v17, v4, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v5 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, s1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v17, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v13 ; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v8 +; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v15 +; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_sdwa v18, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v19, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v5, v5, s6, v17 ; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v10 -; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v6 -; GFX9-NEXT: v_and_b32_sdwa v18, v5, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v13, 24, v13 -; GFX9-NEXT: v_and_or_b32 v5, v5, s6, v12 -; GFX9-NEXT: v_lshlrev_b32_sdwa v14, v0, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_or3_b32 v3, v3, v16, v9 -; GFX9-NEXT: v_or3_b32 v4, v4, v17, v11 -; GFX9-NEXT: v_and_b32_sdwa v19, v6, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v15, 24, v15 -; GFX9-NEXT: v_and_or_b32 v6, v6, s6, v14 -; GFX9-NEXT: v_or3_b32 v5, v5, v18, v13 -; GFX9-NEXT: v_cndmask_b32_e32 v8, v3, v4, vcc -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v1 -; GFX9-NEXT: v_or3_b32 v6, v6, v19, v15 -; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v5, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v6, s[2:3] -; GFX9-NEXT: v_and_or_b32 v2, v8, v2, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v2, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[0:1] +; GFX9-NEXT: v_or3_b32 v3, v3, v14, v9 +; GFX9-NEXT: v_or3_b32 v4, v4, v16, v10 +; GFX9-NEXT: v_and_or_b32 v13, v6, s6, v19 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v12, 24, v12 +; GFX9-NEXT: v_or3_b32 v5, v5, v18, v11 +; GFX9-NEXT: v_cndmask_b32_e32 v9, v3, v4, vcc +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v7 +; GFX9-NEXT: v_or3_b32 v6, v13, v6, v12 +; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v5, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v6, s[2:3] +; GFX9-NEXT: v_and_or_b32 v2, v9, v2, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v2, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3] -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v9, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v13, v1, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v14, v3, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v15, v4, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v1, v1, s6, v5 -; GFX9-NEXT: v_and_b32_sdwa v16, v2, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX9-NEXT: v_and_or_b32 v5, v2, s6, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v9 -; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v7 -; GFX9-NEXT: v_or3_b32 v0, v1, v13, v6 -; GFX9-NEXT: v_or3_b32 v1, v3, v14, v8 -; GFX9-NEXT: v_or3_b32 v2, v4, v15, v10 -; GFX9-NEXT: v_or3_b32 v3, v5, v16, v11 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v5 +; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v1, v3, s6, v10 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v7 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v8 +; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v12 +; GFX9-NEXT: v_and_or_b32 v5, v5, s6, v14 +; GFX9-NEXT: v_and_or_b32 v8, v2, s6, v0 +; GFX9-NEXT: v_or3_b32 v0, v1, v11, v3 +; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX9-NEXT: v_or3_b32 v1, v4, v13, v6 +; GFX9-NEXT: v_or3_b32 v2, v5, v15, v7 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_or3_b32 v3, v8, v16, v9 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX9-NEXT: s_endpgm @@ -6430,96 +6028,89 @@ define amdgpu_ps void @insertelement_v_v16i8_s_v(<16 x i8> addrspace(1)* %ptr, i ; GFX8-LABEL: insertelement_v_v16i8_s_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v7, s0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 2, v2 +; GFX8-NEXT: v_mov_b32_e32 v7, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_mov_b32_e32 v8, 16 +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 2, v2 ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_and_b32 s1, s2, s0 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 -; GFX8-NEXT: v_lshlrev_b32_e64 v9, v2, s1 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 +; GFX8-NEXT: v_lshlrev_b32_e64 v10, v2, s1 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v8 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v8 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v9 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v9 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v14, 8, v5 -; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v16, 8, v6 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v4 -; GFX8-NEXT: v_and_b32_sdwa v18, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v4, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v19, v4, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v15, 24, v5 -; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v19 -; GFX8-NEXT: v_and_b32_sdwa v10, v5, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v4, v5, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v16, v1, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v18 -; GFX8-NEXT: v_lshrrev_b32_e32 v17, 24, v6 -; GFX8-NEXT: v_or_b32_sdwa v5, v6, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v13, 24, v15 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v10 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v11 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v12 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v6 -; GFX8-NEXT: v_lshlrev_b32_e32 v14, 24, v17 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v13 -; GFX8-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc -; GFX8-NEXT: v_or_b32_e32 v5, v5, v14 -; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v4, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v5, s[2:3] -; GFX8-NEXT: v_and_b32_e32 v2, v6, v2 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v9 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v2, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3] -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v2 -; GFX8-NEXT: v_and_b32_sdwa v15, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v16, v4, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v14, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v7, v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v2, v3, v15 -; GFX8-NEXT: v_or_b32_e32 v3, v4, v16 -; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX8-NEXT: v_or_b32_e32 v4, v1, v7 +; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_sdwa v16, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v17, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v5 +; GFX8-NEXT: v_lshlrev_b32_sdwa v18, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v19, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_or_b32_sdwa v4, v5, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v12 ; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX8-NEXT: v_or_b32_e32 v1, v2, v9 -; GFX8-NEXT: v_or_b32_e32 v2, v3, v11 -; GFX8-NEXT: v_or_b32_e32 v3, v4, v12 -; GFX8-NEXT: v_mov_b32_e32 v4, 0 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v16 +; GFX8-NEXT: v_lshrrev_b32_e32 v14, 24, v6 +; GFX8-NEXT: v_or_b32_sdwa v5, v6, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_e32 v1, v4, v18 +; GFX8-NEXT: v_lshlrev_b32_e32 v13, 24, v13 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v12 +; GFX8-NEXT: v_or_b32_e32 v4, v5, v6 +; GFX8-NEXT: v_lshlrev_b32_e32 v14, 24, v14 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v13 +; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v0, vcc +; GFX8-NEXT: v_or_b32_e32 v4, v4, v14 +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v1, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v4, s[2:3] +; GFX8-NEXT: v_and_b32_e32 v2, v5, v2 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v10 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v2, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] +; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v2, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v9 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_or_b32_e32 v9, v0, v13 +; GFX8-NEXT: v_or_b32_e32 v10, v1, v15 ; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v14 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX8-NEXT: v_or_b32_e32 v8, v2, v8 +; GFX8-NEXT: v_or_b32_e32 v0, v3, v4 +; GFX8-NEXT: v_or_b32_e32 v1, v9, v5 +; GFX8-NEXT: v_mov_b32_e32 v4, 0 +; GFX8-NEXT: v_or_b32_e32 v2, v10, v6 +; GFX8-NEXT: v_or_b32_e32 v3, v8, v7 ; GFX8-NEXT: v_mov_b32_e32 v5, 0 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GFX8-NEXT: s_endpgm @@ -6544,110 +6135,94 @@ define amdgpu_ps void @insertelement_v_v16i8_s_v(<16 x i8> addrspace(1)* %ptr, i ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v17 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 8, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 8, v5 -; GFX7-NEXT: v_and_b32_e32 v0, s6, v0 -; GFX7-NEXT: v_and_b32_e32 v8, s6, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 8, v6 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v1 -; GFX7-NEXT: v_and_b32_e32 v9, s6, v9 -; GFX7-NEXT: v_and_b32_e32 v11, s6, v11 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 24, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v6 -; GFX7-NEXT: v_and_b32_e32 v12, s6, v12 -; GFX7-NEXT: v_and_b32_e32 v14, s6, v14 -; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v5, s6, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 -; GFX7-NEXT: v_or_b32_e32 v3, v4, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v16, 24, v6 -; GFX7-NEXT: v_and_b32_e32 v15, s6, v15 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_and_b32_e32 v6, s6, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX7-NEXT: v_or_b32_e32 v1, v3, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX7-NEXT: v_or_b32_e32 v4, v5, v11 +; GFX7-NEXT: v_bfe_u32 v10, v3, 8, 8 +; GFX7-NEXT: v_bfe_u32 v12, v4, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v4 +; GFX7-NEXT: v_bfe_u32 v14, v5, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v9, s6, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v11, s6, v4 +; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v5 +; GFX7-NEXT: v_bfe_u32 v16, v6, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v13, s6, v5 +; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8 +; GFX7-NEXT: v_or_b32_e32 v9, v9, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX7-NEXT: v_or_b32_e32 v10, v11, v12 ; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v13, 24, v13 -; GFX7-NEXT: v_or_b32_e32 v3, v4, v12 -; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX7-NEXT: v_or_b32_e32 v5, v6, v14 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v7 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v10 -; GFX7-NEXT: v_or_b32_e32 v4, v5, v15 -; GFX7-NEXT: v_lshlrev_b32_e32 v16, 24, v16 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v13 +; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v6 +; GFX7-NEXT: v_and_b32_e32 v15, s6, v6 +; GFX7-NEXT: v_bfe_u32 v6, v6, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v16, 8, v16 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v3, v9, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v4, v10, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_or_b32_e32 v11, v13, v14 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX7-NEXT: v_or_b32_e32 v12, v15, v16 +; GFX7-NEXT: v_or_b32_e32 v5, v11, v5 +; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX7-NEXT: v_or_b32_e32 v3, v5, v7 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc -; GFX7-NEXT: v_or_b32_e32 v4, v4, v16 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX7-NEXT: v_or_b32_e32 v6, v12, v6 +; GFX7-NEXT: v_or_b32_e32 v4, v6, v8 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v3, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v4, s[2:3] ; GFX7-NEXT: v_and_b32_e32 v2, v5, v2 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v18 -; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v2, s[0:1] +; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 8 +; GFX7-NEXT: v_bfe_u32 v11, v1, 8, 8 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v2, s[2:3] -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s6, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v10 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v11 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX7-NEXT: v_bfe_u32 v13, v3, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v8, s6, v0 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v10, s6, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 +; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v12, s6, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v9, v10, v11 +; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v13 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX7-NEXT: v_or_b32_e32 v1, v9, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v10, v12, v13 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX7-NEXT: v_bfe_u32 v5, v4, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v2, v10, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v6 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v12 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v4 ; GFX7-NEXT: v_and_b32_e32 v3, s6, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v13 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_and_b32_e32 v5, s6, v5 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v15 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; GFX7-NEXT: s_endpgm @@ -6655,86 +6230,80 @@ define amdgpu_ps void @insertelement_v_v16i8_s_v(<16 x i8> addrspace(1)* %ptr, i ; GFX10-LABEL: insertelement_v_v16i8_s_v: ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off -; GFX10-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-NEXT: s_mov_b32 s0, 8 +; GFX10-NEXT: v_mov_b32_e32 v1, 8 +; GFX10-NEXT: s_mov_b32 s1, 16 ; GFX10-NEXT: s_movk_i32 s3, 0xff ; GFX10-NEXT: v_and_b32_e32 v0, 3, v2 +; GFX10-NEXT: v_mov_b32_e32 v7, 16 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 2, v2 -; GFX10-NEXT: s_and_b32 s1, s2, s3 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 -; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v4 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v5 -; GFX10-NEXT: v_lshlrev_b32_sdwa v7, s0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v15, v3, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v16, v4, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v5 -; GFX10-NEXT: v_and_or_b32 v3, v3, s3, v7 +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v4 +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v5 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, s1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, s1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v3, v3, s3, v12 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v6 -; GFX10-NEXT: v_and_or_b32 v4, v4, s3, v9 +; GFX10-NEXT: v_lshlrev_b32_sdwa v16, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v4, v4, s3, v14 +; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX10-NEXT: v_lshlrev_b32_sdwa v17, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v6 +; GFX10-NEXT: v_or3_b32 v3, v3, v13, v8 +; GFX10-NEXT: v_lshlrev_b32_sdwa v18, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_or3_b32 v4, v4, v15, v9 +; GFX10-NEXT: v_and_or_b32 v5, v5, s3, v16 ; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v17, v5, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v14, 24, v6 -; GFX10-NEXT: v_or3_b32 v3, v3, v15, v8 -; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v12 -; GFX10-NEXT: v_and_or_b32 v5, v5, s3, v11 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_or3_b32 v4, v4, v16, v10 -; GFX10-NEXT: v_and_b32_sdwa v18, v6, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v14 -; GFX10-NEXT: v_or3_b32 v5, v5, v17, v7 -; GFX10-NEXT: v_and_or_b32 v6, v6, s3, v9 -; GFX10-NEXT: v_cndmask_b32_e32 v7, v3, v4, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v6, v6, s3, v18 +; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v11 +; GFX10-NEXT: v_cndmask_b32_e32 v9, v3, v4, vcc_lo +; GFX10-NEXT: v_or3_b32 v5, v5, v17, v10 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v2 -; GFX10-NEXT: v_lshlrev_b32_e64 v9, v0, s3 +; GFX10-NEXT: s_and_b32 s1, s2, s3 +; GFX10-NEXT: v_lshlrev_b32_e64 v10, v0, s3 +; GFX10-NEXT: v_or3_b32 v6, v6, v12, v8 ; GFX10-NEXT: v_lshlrev_b32_e64 v0, v0, s1 -; GFX10-NEXT: v_or3_b32 v6, v6, v18, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v5, s0 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v5, s0 -; GFX10-NEXT: v_xor_b32_e32 v8, -1, v9 -; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v6, s1 -; GFX10-NEXT: v_and_or_b32 v0, v7, v8, v0 +; GFX10-NEXT: v_xor_b32_e32 v9, -1, v10 +; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v6, s1 +; GFX10-NEXT: v_and_or_b32 v0, v8, v9, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v3, v0, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v4, v0, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v4, v5, v0, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v6, v0, s1 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v13, v2, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v14, v3, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v15, v4, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v2, v2, s3, v5 -; GFX10-NEXT: v_and_b32_sdwa v16, v0, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v5, v0, s3, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v4 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v2, v2, s3, v10 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX10-NEXT: v_and_or_b32 v4, v4, s3, v14 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX10-NEXT: v_and_or_b32 v3, v3, s3, v7 -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX10-NEXT: v_and_or_b32 v4, v4, s3, v9 -; GFX10-NEXT: v_or3_b32 v0, v2, v13, v6 -; GFX10-NEXT: v_or3_b32 v1, v3, v14, v8 -; GFX10-NEXT: v_or3_b32 v3, v5, v16, v11 -; GFX10-NEXT: v_or3_b32 v2, v4, v15, v10 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v10, v0, s3, v1 +; GFX10-NEXT: v_and_or_b32 v3, v3, s3, v12 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX10-NEXT: v_or3_b32 v0, v2, v11, v5 +; GFX10-NEXT: v_or3_b32 v2, v4, v15, v8 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 +; GFX10-NEXT: v_or3_b32 v1, v3, v13, v6 +; GFX10-NEXT: v_or3_b32 v3, v10, v7, v9 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm @@ -6748,85 +6317,79 @@ define amdgpu_ps void @insertelement_v_v16i8_v_s(<16 x i8> addrspace(1)* %ptr, i ; GFX9-LABEL: insertelement_v_v16i8_v_s: ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off -; GFX9-NEXT: s_and_b32 s1, s2, 3 -; GFX9-NEXT: s_lshl_b32 s1, s1, 3 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: s_mov_b32 s1, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: s_movk_i32 s6, 0xff +; GFX9-NEXT: v_mov_b32_e32 v1, 16 ; GFX9-NEXT: s_lshr_b32 s4, s2, 2 -; GFX9-NEXT: s_lshl_b32 s1, s6, s1 +; GFX9-NEXT: s_and_b32 s2, s2, 3 +; GFX9-NEXT: s_lshl_b32 s2, s2, 3 ; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 -; GFX9-NEXT: s_not_b32 s5, s1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: s_lshl_b32 s2, s6, s2 +; GFX9-NEXT: s_not_b32 s5, s2 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v4 -; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v5 -; GFX9-NEXT: v_lshlrev_b32_sdwa v8, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 8, v6 -; GFX9-NEXT: v_and_b32_sdwa v14, v3, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v2, v3, s6, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v4 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v5 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, s1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v11 ; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX9-NEXT: v_and_b32_sdwa v15, v4, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v13 +; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v17, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v5, v5, s6, v15 ; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX9-NEXT: v_and_or_b32 v3, v4, s6, v8 -; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v0, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 24, v6 -; GFX9-NEXT: v_and_b32_sdwa v16, v5, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX9-NEXT: v_and_or_b32 v4, v5, s6, v10 -; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_or3_b32 v2, v2, v14, v7 -; GFX9-NEXT: v_or3_b32 v3, v3, v15, v9 -; GFX9-NEXT: v_and_b32_sdwa v17, v6, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v5, v6, s6, v12 -; GFX9-NEXT: v_lshlrev_b32_e32 v13, 24, v13 -; GFX9-NEXT: v_or3_b32 v4, v4, v16, v11 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v2, v3, vcc +; GFX9-NEXT: v_or3_b32 v3, v3, v12, v7 +; GFX9-NEXT: v_or3_b32 v4, v4, v14, v8 +; GFX9-NEXT: v_lshlrev_b32_sdwa v18, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v6, v6, s6, v17 +; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX9-NEXT: v_or3_b32 v5, v5, v16, v9 +; GFX9-NEXT: v_cndmask_b32_e32 v7, v3, v4, vcc ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 -; GFX9-NEXT: v_or3_b32 v5, v5, v17, v13 -; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v4, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v5, s[2:3] -; GFX9-NEXT: v_and_or_b32 v1, v6, s5, v1 +; GFX9-NEXT: v_or3_b32 v6, v6, v18, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v5, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v6, s[2:3] +; GFX9-NEXT: v_and_or_b32 v2, v7, s5, v2 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[2:3] -; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v1 -; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v9, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v13, v2, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v14, v3, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v15, v4, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v2, v2, s6, v5 -; GFX9-NEXT: v_and_b32_sdwa v16, v1, s6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX9-NEXT: v_and_or_b32 v5, v1, s6, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX9-NEXT: v_and_or_b32 v3, v3, s6, v7 -; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v9 -; GFX9-NEXT: v_or3_b32 v0, v2, v13, v6 -; GFX9-NEXT: v_or3_b32 v1, v3, v14, v8 -; GFX9-NEXT: v_or3_b32 v2, v4, v15, v10 -; GFX9-NEXT: v_or3_b32 v3, v5, v16, v11 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v2, s[0:1] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v2, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3] +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 24, v5 +; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v1, v3, s6, v10 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v7 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v8 +; GFX9-NEXT: v_and_or_b32 v4, v4, s6, v12 +; GFX9-NEXT: v_and_or_b32 v5, v5, s6, v14 +; GFX9-NEXT: v_and_or_b32 v8, v2, s6, v0 +; GFX9-NEXT: v_or3_b32 v0, v1, v11, v3 +; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX9-NEXT: v_or3_b32 v1, v4, v13, v6 +; GFX9-NEXT: v_or3_b32 v2, v5, v15, v7 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_or3_b32 v3, v8, v16, v9 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX9-NEXT: s_endpgm @@ -6835,95 +6398,88 @@ define amdgpu_ps void @insertelement_v_v16i8_v_s(<16 x i8> addrspace(1)* %ptr, i ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] ; GFX8-NEXT: s_and_b32 s1, s2, 3 -; GFX8-NEXT: s_lshl_b32 s1, s1, 3 -; GFX8-NEXT: v_mov_b32_e32 v8, s1 -; GFX8-NEXT: s_movk_i32 s0, 0xff -; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_mov_b32_e32 v0, 8 -; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v7, s0 +; GFX8-NEXT: s_lshl_b32 s1, s1, 3 +; GFX8-NEXT: v_mov_b32_e32 v7, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, 16 +; GFX8-NEXT: v_mov_b32_e32 v9, s1 +; GFX8-NEXT: v_mov_b32_e32 v8, 16 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: s_lshr_b32 s4, s2, 2 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_lshl_b32 s0, s0, s1 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 ; GFX8-NEXT: s_not_b32 s5, s0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v4 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v8, 24, v9 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v5 +; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v17, v7, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v5 +; GFX8-NEXT: v_lshlrev_b32_sdwa v16, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v10 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_or_b32_sdwa v5, v5, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v6 +; GFX8-NEXT: v_lshlrev_b32_sdwa v18, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v14 +; GFX8-NEXT: v_or_b32_sdwa v6, v6, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b32_e32 v10, 24, v11 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v5 -; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v14, 8, v6 -; GFX8-NEXT: v_and_b32_sdwa v16, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v17, v4, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v16 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v17 -; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX8-NEXT: v_and_b32_sdwa v18, v5, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v4, v5, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v18 -; GFX8-NEXT: v_lshrrev_b32_e32 v15, 24, v6 -; GFX8-NEXT: v_and_b32_sdwa v19, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v5, v6, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v10 -; GFX8-NEXT: v_lshlrev_b32_e32 v14, 24, v15 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v19 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v12 -; GFX8-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc -; GFX8-NEXT: v_or_b32_e32 v5, v5, v14 -; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v4, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v5, s[2:3] -; GFX8-NEXT: v_and_b32_e32 v6, s5, v6 -; GFX8-NEXT: v_or_b32_e32 v2, v6, v2 +; GFX8-NEXT: v_or_b32_e32 v1, v5, v16 +; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v12 +; GFX8-NEXT: v_or_b32_e32 v5, v6, v18 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v9 +; GFX8-NEXT: v_or_b32_e32 v4, v5, v11 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v1, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v4, s[2:3] +; GFX8-NEXT: v_and_b32_e32 v5, s5, v5 +; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v2, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3] -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v2 -; GFX8-NEXT: v_and_b32_sdwa v15, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v16, v4, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v14, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v7, v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v2, v3, v15 -; GFX8-NEXT: v_or_b32_e32 v3, v4, v16 -; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX8-NEXT: v_or_b32_e32 v4, v1, v7 -; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX8-NEXT: v_or_b32_e32 v1, v2, v9 -; GFX8-NEXT: v_or_b32_e32 v2, v3, v11 -; GFX8-NEXT: v_or_b32_e32 v3, v4, v12 -; GFX8-NEXT: v_mov_b32_e32 v4, 0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v2, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] +; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v2, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v9 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX8-NEXT: v_or_b32_e32 v9, v0, v13 +; GFX8-NEXT: v_or_b32_e32 v10, v1, v15 ; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v14 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX8-NEXT: v_or_b32_e32 v8, v2, v8 +; GFX8-NEXT: v_or_b32_e32 v0, v3, v4 +; GFX8-NEXT: v_or_b32_e32 v1, v9, v5 +; GFX8-NEXT: v_mov_b32_e32 v4, 0 +; GFX8-NEXT: v_or_b32_e32 v2, v10, v6 +; GFX8-NEXT: v_or_b32_e32 v3, v8, v7 ; GFX8-NEXT: v_mov_b32_e32 v5, 0 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GFX8-NEXT: s_endpgm @@ -6947,111 +6503,95 @@ define amdgpu_ps void @insertelement_v_v16i8_v_s(<16 x i8> addrspace(1)* %ptr, i ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 8, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 8, v5 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v1 -; GFX7-NEXT: v_and_b32_e32 v8, s6, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 8, v6 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_and_b32_e32 v9, s6, v9 -; GFX7-NEXT: v_and_b32_e32 v11, s6, v11 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 24, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v6 -; GFX7-NEXT: v_and_b32_e32 v12, s6, v12 -; GFX7-NEXT: v_and_b32_e32 v14, s6, v14 -; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_and_b32_e32 v5, s6, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 -; GFX7-NEXT: v_or_b32_e32 v3, v4, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v16, 24, v6 -; GFX7-NEXT: v_and_b32_e32 v15, s6, v15 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX7-NEXT: v_and_b32_e32 v6, s6, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX7-NEXT: v_or_b32_e32 v2, v3, v9 -; GFX7-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX7-NEXT: v_or_b32_e32 v4, v5, v11 +; GFX7-NEXT: v_bfe_u32 v10, v3, 8, 8 +; GFX7-NEXT: v_bfe_u32 v12, v4, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v4 +; GFX7-NEXT: v_bfe_u32 v14, v5, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v9, s6, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v11, s6, v4 +; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v5 +; GFX7-NEXT: v_bfe_u32 v16, v6, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v13, s6, v5 +; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8 +; GFX7-NEXT: v_or_b32_e32 v9, v9, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX7-NEXT: v_or_b32_e32 v10, v11, v12 ; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v13, 24, v13 -; GFX7-NEXT: v_or_b32_e32 v3, v4, v12 -; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX7-NEXT: v_or_b32_e32 v5, v6, v14 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v10 -; GFX7-NEXT: v_or_b32_e32 v4, v5, v15 -; GFX7-NEXT: v_lshlrev_b32_e32 v16, 24, v16 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v13 +; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v6 +; GFX7-NEXT: v_and_b32_e32 v15, s6, v6 +; GFX7-NEXT: v_bfe_u32 v6, v6, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v16, 8, v16 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v3, v9, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v4, v10, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_or_b32_e32 v11, v13, v14 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX7-NEXT: v_or_b32_e32 v12, v15, v16 +; GFX7-NEXT: v_or_b32_e32 v5, v11, v5 +; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX7-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX7-NEXT: v_or_b32_e32 v3, v5, v7 ; GFX7-NEXT: v_cndmask_b32_e32 v5, v1, v2, vcc -; GFX7-NEXT: v_or_b32_e32 v4, v4, v16 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX7-NEXT: v_or_b32_e32 v6, v12, v6 +; GFX7-NEXT: v_or_b32_e32 v4, v6, v8 ; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v3, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v5, v5, v4, s[2:3] ; GFX7-NEXT: v_and_b32_e32 v5, s5, v5 ; GFX7-NEXT: v_or_b32_e32 v0, v5, v0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 -; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v0, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v0, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[0:1] +; GFX7-NEXT: v_bfe_u32 v9, v1, 8, 8 +; GFX7-NEXT: v_bfe_u32 v11, v2, 8, 8 ; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[2:3] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 -; GFX7-NEXT: v_and_b32_e32 v0, s6, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v2 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX7-NEXT: v_and_b32_e32 v1, s6, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v2 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v2 +; GFX7-NEXT: v_bfe_u32 v13, v3, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v8, s6, v1 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v10, s6, v2 +; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 +; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v12, s6, v3 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v9 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 8, v3 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v2, s6, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v10 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_and_b32_e32 v3, s6, v11 +; GFX7-NEXT: v_or_b32_e32 v9, v10, v11 +; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v13 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v1, v8, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v10, v12, v13 +; GFX7-NEXT: v_or_b32_e32 v2, v9, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v5 +; GFX7-NEXT: v_bfe_u32 v5, v4, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v2, v10, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v6 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v12 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v4 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v4 ; GFX7-NEXT: v_and_b32_e32 v3, s6, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v13 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_and_b32_e32 v5, s6, v5 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_and_b32_e32 v4, s6, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v15 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; GFX7-NEXT: s_endpgm @@ -7059,85 +6599,79 @@ define amdgpu_ps void @insertelement_v_v16i8_v_s(<16 x i8> addrspace(1)* %ptr, i ; GFX10-LABEL: insertelement_v_v16i8_v_s: ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx4 v[3:6], v[0:1], off -; GFX10-NEXT: v_mov_b32_e32 v0, 8 ; GFX10-NEXT: s_mov_b32 s0, 8 +; GFX10-NEXT: v_mov_b32_e32 v0, 8 +; GFX10-NEXT: s_mov_b32 s1, 16 ; GFX10-NEXT: s_movk_i32 s3, 0xff +; GFX10-NEXT: v_mov_b32_e32 v1, 16 ; GFX10-NEXT: s_lshr_b32 s4, s2, 2 -; GFX10-NEXT: s_and_b32 s1, s2, 3 ; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, s4, 1 -; GFX10-NEXT: s_lshl_b32 s2, s1, 3 -; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s4, 3 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: s_lshl_b32 s2, s3, s2 -; GFX10-NEXT: s_not_b32 s2, s2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v4 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 8, v5 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v5 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 8, v6 -; GFX10-NEXT: v_and_b32_sdwa v14, v3, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v1, v3, s3, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v4 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v5 +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, s1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, s1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v3, v3, s3, v11 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX10-NEXT: v_and_or_b32 v3, v4, s3, v8 -; GFX10-NEXT: v_and_b32_sdwa v15, v4, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v4, v4, s3, v13 +; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX10-NEXT: v_lshlrev_b32_sdwa v16, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v6 +; GFX10-NEXT: v_or3_b32 v3, v3, v12, v7 +; GFX10-NEXT: v_lshlrev_b32_sdwa v17, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_or3_b32 v4, v4, v14, v8 +; GFX10-NEXT: v_and_or_b32 v5, v5, s3, v15 ; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v0, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v16, v5, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v13, 24, v6 -; GFX10-NEXT: v_or3_b32 v1, v1, v14, v7 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v11 -; GFX10-NEXT: v_and_or_b32 v5, v5, s3, v10 -; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v0, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_or3_b32 v3, v3, v15, v9 -; GFX10-NEXT: v_and_b32_sdwa v17, v6, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v13 -; GFX10-NEXT: v_or3_b32 v4, v5, v16, v4 -; GFX10-NEXT: v_and_or_b32 v6, v6, s3, v8 -; GFX10-NEXT: v_cndmask_b32_e32 v5, v1, v3, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v6, v6, s3, v17 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v10 +; GFX10-NEXT: v_cndmask_b32_e32 v8, v3, v4, vcc_lo +; GFX10-NEXT: v_or3_b32 v5, v5, v16, v9 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s4, 2 -; GFX10-NEXT: v_or3_b32 v6, v6, v17, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v4, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v6, s1 -; GFX10-NEXT: v_and_or_b32 v2, v5, s2, v2 +; GFX10-NEXT: s_and_b32 s1, s2, 3 +; GFX10-NEXT: v_or3_b32 v6, v6, v11, v7 +; GFX10-NEXT: s_lshl_b32 s2, s1, 3 +; GFX10-NEXT: v_cmp_eq_u32_e64 s1, s4, 3 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v8, v5, s0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: s_lshl_b32 s2, s3, s2 +; GFX10-NEXT: s_not_b32 s2, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v6, s1 +; GFX10-NEXT: v_and_or_b32 v2, v7, s2, v2 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, s4, 0 -; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v2, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v2, s2 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v2, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v2, s2 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v2, s1 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v13, v1, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v14, v3, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v15, v4, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v1, v1, s3, v5 -; GFX10-NEXT: v_and_b32_sdwa v16, v2, s3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v5, v2, s3, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX10-NEXT: v_and_or_b32 v4, v4, s3, v9 -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX10-NEXT: v_and_or_b32 v3, v3, s3, v7 -; GFX10-NEXT: v_or3_b32 v0, v1, v13, v6 -; GFX10-NEXT: v_or3_b32 v2, v4, v15, v10 -; GFX10-NEXT: v_or3_b32 v1, v3, v14, v8 -; GFX10-NEXT: v_or3_b32 v3, v5, v16, v11 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 24, v5 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v16, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v1, v3, s3, v10 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v7 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v8 +; GFX10-NEXT: v_and_or_b32 v4, v4, s3, v12 +; GFX10-NEXT: v_and_or_b32 v5, v5, s3, v14 +; GFX10-NEXT: v_and_or_b32 v8, v2, s3, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX10-NEXT: v_or3_b32 v0, v1, v11, v3 +; GFX10-NEXT: v_or3_b32 v1, v4, v13, v6 +; GFX10-NEXT: v_or3_b32 v2, v5, v15, v7 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 +; GFX10-NEXT: v_or3_b32 v3, v8, v16, v9 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm @@ -7152,85 +6686,79 @@ define amdgpu_ps void @insertelement_v_v16i8_v_v(<16 x i8> addrspace(1)* %ptr, i ; GFX9: ; %bb.0: ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off ; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: s_mov_b32 s1, 16 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: s_movk_i32 s1, 0xff -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 2, v3 +; GFX9-NEXT: s_movk_i32 s2, 0xff +; GFX9-NEXT: v_mov_b32_e32 v8, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v5 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, s0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, s1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, s1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v5, v5, s2, v15 +; GFX9-NEXT: v_lshrrev_b32_e32 v15, 2, v3 +; GFX9-NEXT: v_and_or_b32 v4, v4, s2, v13 +; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX9-NEXT: v_lshlrev_b32_sdwa v17, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v7 ; GFX9-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 +; GFX9-NEXT: v_lshlrev_b32_sdwa v18, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v19, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v6, v6, v0, v17 +; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 +; GFX9-NEXT: v_or3_b32 v4, v4, v14, v9 +; GFX9-NEXT: v_or3_b32 v5, v5, v16, v10 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v15 +; GFX9-NEXT: v_and_or_b32 v13, v7, v0, v19 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v12, 24, v12 +; GFX9-NEXT: v_or3_b32 v6, v6, v18, v11 +; GFX9-NEXT: v_cndmask_b32_e32 v9, v4, v5, vcc +; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v15 ; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, v3, v0 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v8 -; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 8, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v6 -; GFX9-NEXT: v_lshrrev_b32_e32 v15, 8, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 24, v5 -; GFX9-NEXT: v_lshlrev_b32_sdwa v9, s0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v11, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_b32_sdwa v17, v4, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v18, v5, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v5, v5, s1, v11 -; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v12 -; GFX9-NEXT: v_lshrrev_b32_e32 v14, 24, v6 -; GFX9-NEXT: v_lshrrev_b32_e32 v16, 24, v7 -; GFX9-NEXT: v_lshlrev_b32_sdwa v13, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v1, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v4, v4, s1, v9 -; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX9-NEXT: v_and_b32_sdwa v19, v6, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v6, v6, s1, v13 -; GFX9-NEXT: v_and_or_b32 v9, v7, v0, v15 -; GFX9-NEXT: v_lshlrev_b32_e32 v12, 24, v14 -; GFX9-NEXT: v_and_b32_sdwa v7, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v13, 24, v16 -; GFX9-NEXT: v_or3_b32 v4, v4, v17, v10 -; GFX9-NEXT: v_or3_b32 v5, v5, v18, v11 -; GFX9-NEXT: v_or3_b32 v7, v9, v7, v13 -; GFX9-NEXT: v_or3_b32 v6, v6, v19, v12 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v4, v5, vcc -; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v8 +; GFX9-NEXT: v_or3_b32 v7, v13, v7, v12 ; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v6, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v15 +; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3 ; GFX9-NEXT: v_cndmask_b32_e64 v9, v9, v7, s[2:3] ; GFX9-NEXT: v_and_or_b32 v2, v9, v3, v2 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, v2, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v2, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v5, v6, v2, s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[2:3] -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v12, 8, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v3 -; GFX9-NEXT: v_and_b32_sdwa v15, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_sdwa v16, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_sdwa v6, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 24, v2 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX9-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX9-NEXT: v_and_or_b32 v4, v4, v0, v8 -; GFX9-NEXT: v_and_or_b32 v5, v5, v0, v10 -; GFX9-NEXT: v_and_b32_sdwa v14, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_or_b32 v3, v3, v0, v6 -; GFX9-NEXT: v_and_or_b32 v6, v2, v0, v1 -; GFX9-NEXT: v_and_b32_sdwa v17, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 24, v5 +; GFX9-NEXT: v_lshlrev_b32_sdwa v13, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v15, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v10, 24, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v11, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v14, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v16, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_and_or_b32 v4, v4, v0, v13 ; GFX9-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GFX9-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX9-NEXT: v_or3_b32 v1, v4, v15, v9 -; GFX9-NEXT: v_or3_b32 v2, v5, v16, v11 +; GFX9-NEXT: v_and_or_b32 v5, v5, v0, v15 +; GFX9-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX9-NEXT: v_lshlrev_b32_sdwa v12, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_and_or_b32 v3, v3, v0, v11 +; GFX9-NEXT: v_and_or_b32 v11, v2, v0, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX9-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX9-NEXT: v_or3_b32 v1, v4, v14, v7 +; GFX9-NEXT: v_or3_b32 v2, v5, v16, v9 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_or3_b32 v0, v3, v14, v7 -; GFX9-NEXT: v_or3_b32 v3, v6, v17, v12 +; GFX9-NEXT: v_or3_b32 v0, v3, v12, v6 +; GFX9-NEXT: v_or3_b32 v3, v11, v8, v10 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX9-NEXT: s_endpgm @@ -7238,96 +6766,88 @@ define amdgpu_ps void @insertelement_v_v16i8_v_v(<16 x i8> addrspace(1)* %ptr, i ; GFX8-LABEL: insertelement_v_v16i8_v_v: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] -; GFX8-NEXT: s_movk_i32 s0, 0xff -; GFX8-NEXT: v_mov_b32_e32 v1, 8 -; GFX8-NEXT: v_mov_b32_e32 v8, 8 -; GFX8-NEXT: v_mov_b32_e32 v9, s0 -; GFX8-NEXT: v_mov_b32_e32 v0, 0xff -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 2, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 2, v3 ; GFX8-NEXT: v_and_b32_e32 v3, 3, v3 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v10 +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v9, 8 +; GFX8-NEXT: v_mov_b32_e32 v8, 16 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 3, v3 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v10 +; GFX8-NEXT: v_mov_b32_e32 v0, 0xff +; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0 +; GFX8-NEXT: v_mov_b32_e32 v10, 16 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_e32 v3, v3, v0 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v10 -; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v11 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v11 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v11 +; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 8, v5 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 8, v4 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v15, 8, v6 -; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v8, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v17, 8, v7 -; GFX8-NEXT: v_and_b32_sdwa v19, v4, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b32_e32 v14, 24, v5 -; GFX8-NEXT: v_or_b32_sdwa v4, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v9, v5, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v8, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v16, 24, v6 -; GFX8-NEXT: v_lshlrev_b32_e32 v13, 24, v14 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v9 -; GFX8-NEXT: v_and_b32_sdwa v11, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v5, v6, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_sdwa v17, v8, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 24, v4 +; GFX8-NEXT: v_lshrrev_b32_e32 v12, 24, v5 +; GFX8-NEXT: v_lshlrev_b32_sdwa v16, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v4, v4, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v17, v9, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v19, v9, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v6 +; GFX8-NEXT: v_lshrrev_b32_e32 v14, 24, v7 +; GFX8-NEXT: v_lshlrev_b32_sdwa v18, v10, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v5, v6, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v6, v7, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v10, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX8-NEXT: v_or_b32_e32 v4, v4, v16 ; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v12 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v19 -; GFX8-NEXT: v_lshrrev_b32_e32 v18, 24, v7 -; GFX8-NEXT: v_or_b32_sdwa v6, v7, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v7, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshlrev_b32_e32 v14, 24, v16 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v11 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v12 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v13 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX8-NEXT: v_lshlrev_b32_e32 v13, 24, v13 +; GFX8-NEXT: v_lshlrev_b32_e32 v14, 24, v14 +; GFX8-NEXT: v_or_b32_e32 v5, v5, v18 +; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 -; GFX8-NEXT: v_lshlrev_b32_e32 v15, 24, v18 -; GFX8-NEXT: v_or_b32_e32 v5, v5, v14 -; GFX8-NEXT: v_cndmask_b32_e32 v7, v1, v4, vcc -; GFX8-NEXT: v_or_b32_e32 v6, v6, v15 -; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v5, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v6, s[2:3] -; GFX8-NEXT: v_and_b32_e32 v3, v7, v3 -; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v2, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] -; GFX8-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[2:3] -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v8, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v8, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 24, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX8-NEXT: v_lshrrev_b32_e32 v13, 24, v2 -; GFX8-NEXT: v_and_b32_sdwa v14, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v15, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v16, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v12 +; GFX8-NEXT: v_or_b32_e32 v4, v5, v13 +; GFX8-NEXT: v_or_b32_e32 v5, v6, v14 +; GFX8-NEXT: v_cndmask_b32_e32 v6, v3, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v4, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v5, s[2:3] +; GFX8-NEXT: v_and_b32_e32 v0, v6, v0 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, v3, v0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v3, v4, v0, s[0:1] +; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[2:3] +; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v12, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v10, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX8-NEXT: v_or_b32_sdwa v2, v2, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v13, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v14, v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v15, v10, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v13 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v11 +; GFX8-NEXT: v_or_b32_e32 v8, v0, v10 ; GFX8-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX8-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX8-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX8-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v14 -; GFX8-NEXT: v_or_b32_e32 v5, v2, v0 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v16 +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 24, v7 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v15 -; GFX8-NEXT: v_or_b32_e32 v0, v1, v6 -; GFX8-NEXT: v_or_b32_e32 v1, v3, v9 -; GFX8-NEXT: v_or_b32_e32 v2, v4, v11 -; GFX8-NEXT: v_or_b32_e32 v3, v5, v12 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v4 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 +; GFX8-NEXT: v_or_b32_e32 v2, v3, v6 +; GFX8-NEXT: v_or_b32_e32 v3, v8, v7 ; GFX8-NEXT: v_mov_b32_e32 v5, 0 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GFX8-NEXT: s_endpgm @@ -7352,111 +6872,95 @@ define amdgpu_ps void @insertelement_v_v16i8_v_v(<16 x i8> addrspace(1)* %ptr, i ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19 ; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 8, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v6 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 -; GFX7-NEXT: v_and_b32_e32 v10, s0, v10 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 24, v5 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v6 -; GFX7-NEXT: v_lshrrev_b32_e32 v16, 8, v7 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 -; GFX7-NEXT: v_and_b32_e32 v11, s0, v11 -; GFX7-NEXT: v_and_b32_e32 v13, s0, v13 -; GFX7-NEXT: v_and_b32_e32 v4, s0, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v5, s0, v5 -; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 24, v6 -; GFX7-NEXT: v_lshrrev_b32_e32 v17, 16, v7 -; GFX7-NEXT: v_and_b32_e32 v14, v14, v8 -; GFX7-NEXT: v_and_b32_e32 v16, v16, v8 -; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v13 -; GFX7-NEXT: v_or_b32_e32 v4, v5, v10 -; GFX7-NEXT: v_lshrrev_b32_e32 v18, 24, v7 -; GFX7-NEXT: v_and_b32_e32 v17, v17, v8 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX7-NEXT: v_and_b32_e32 v7, v7, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v12, 24, v12 -; GFX7-NEXT: v_or_b32_e32 v1, v4, v11 -; GFX7-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX7-NEXT: v_or_b32_e32 v5, v6, v13 +; GFX7-NEXT: v_bfe_u32 v12, v4, 8, 8 +; GFX7-NEXT: v_bfe_u32 v14, v5, 8, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 24, v5 +; GFX7-NEXT: v_bfe_u32 v16, v6, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v11, s0, v4 +; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 +; GFX7-NEXT: v_and_b32_e32 v13, s0, v5 +; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 +; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 +; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v6 +; GFX7-NEXT: v_bfe_u32 v18, v7, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v15, v6, v8 +; GFX7-NEXT: v_bfe_u32 v6, v6, 16, 8 +; GFX7-NEXT: v_or_b32_e32 v11, v11, v12 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_or_b32_e32 v12, v13, v14 ; GFX7-NEXT: v_lshlrev_b32_e32 v16, 8, v16 -; GFX7-NEXT: v_lshlrev_b32_e32 v15, 24, v15 -; GFX7-NEXT: v_or_b32_e32 v4, v5, v14 -; GFX7-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX7-NEXT: v_or_b32_e32 v6, v7, v16 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v9 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v12 -; GFX7-NEXT: v_or_b32_e32 v5, v6, v17 -; GFX7-NEXT: v_lshlrev_b32_e32 v18, 24, v18 -; GFX7-NEXT: v_or_b32_e32 v4, v4, v15 +; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v7 +; GFX7-NEXT: v_and_b32_e32 v17, v7, v8 +; GFX7-NEXT: v_bfe_u32 v7, v7, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v18, 8, v18 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v4, v11, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v1 +; GFX7-NEXT: v_or_b32_e32 v5, v12, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX7-NEXT: v_or_b32_e32 v13, v15, v16 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 +; GFX7-NEXT: v_or_b32_e32 v14, v17, v18 +; GFX7-NEXT: v_or_b32_e32 v6, v13, v6 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_or_b32_e32 v1, v5, v1 +; GFX7-NEXT: v_or_b32_e32 v4, v6, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX7-NEXT: v_or_b32_e32 v7, v14, v7 ; GFX7-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v19 -; GFX7-NEXT: v_or_b32_e32 v5, v5, v18 +; GFX7-NEXT: v_or_b32_e32 v5, v7, v10 ; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v4, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v6, v6, v5, s[2:3] ; GFX7-NEXT: v_and_b32_e32 v3, v6, v3 ; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX7-NEXT: v_bfe_u32 v10, v0, 8, 8 ; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, v2, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[2:3] -; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v2, v2, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX7-NEXT: v_and_b32_e32 v0, v0, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v0 +; GFX7-NEXT: v_bfe_u32 v12, v1, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v9, v0, v8 +; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX7-NEXT: v_bfe_u32 v14, v3, 8, 8 +; GFX7-NEXT: v_and_b32_e32 v11, v1, v8 +; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v9, v9, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v13, v3, v8 +; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v9, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v10, v11, v12 +; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v2, v7, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v1 -; GFX7-NEXT: v_and_b32_e32 v1, v1, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_and_b32_e32 v2, v9, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 24, v10 -; GFX7-NEXT: v_lshrrev_b32_e32 v11, 8, v3 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v13, 24, v3 -; GFX7-NEXT: v_and_b32_e32 v2, v3, v8 -; GFX7-NEXT: v_and_b32_e32 v3, v11, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_and_b32_e32 v3, v12, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v13 -; GFX7-NEXT: v_lshrrev_b32_e32 v14, 8, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v11, v13, v14 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX7-NEXT: v_or_b32_e32 v1, v10, v1 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX7-NEXT: v_bfe_u32 v5, v4, 8, 8 +; GFX7-NEXT: v_or_b32_e32 v2, v11, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v6 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v16, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v4 ; GFX7-NEXT: v_and_b32_e32 v3, v4, v8 -; GFX7-NEXT: v_and_b32_e32 v4, v14, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GFX7-NEXT: v_and_b32_e32 v5, v5, v8 -; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_and_b32_e32 v4, v15, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v16 -; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; GFX7-NEXT: s_endpgm @@ -7464,86 +6968,80 @@ define amdgpu_ps void @insertelement_v_v16i8_v_v(<16 x i8> addrspace(1)* %ptr, i ; GFX10-LABEL: insertelement_v_v16i8_v_v: ; GFX10: ; %bb.0: ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-NEXT: v_mov_b32_e32 v8, 8 ; GFX10-NEXT: s_mov_b32 s0, 8 -; GFX10-NEXT: s_movk_i32 s1, 0xff +; GFX10-NEXT: v_mov_b32_e32 v8, 8 +; GFX10-NEXT: s_mov_b32 s1, 16 +; GFX10-NEXT: s_movk_i32 s2, 0xff ; GFX10-NEXT: v_and_b32_e32 v0, 3, v3 +; GFX10-NEXT: v_mov_b32_e32 v9, 16 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 2, v3 ; GFX10-NEXT: v_mov_b32_e32 v1, 0xff ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 -; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v3 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 8, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v5 -; GFX10-NEXT: v_lshrrev_b32_e32 v13, 8, v6 -; GFX10-NEXT: v_lshlrev_b32_sdwa v9, s0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v8, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v17, v4, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v18, v5, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v14, 24, v6 -; GFX10-NEXT: v_and_or_b32 v4, v4, s1, v9 +; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v5 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, s0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v16, s0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v6 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, s1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v17, s1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v4, v4, s2, v14 ; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 -; GFX10-NEXT: v_lshrrev_b32_e32 v15, 8, v7 -; GFX10-NEXT: v_and_or_b32 v5, v5, s1, v11 +; GFX10-NEXT: v_and_or_b32 v5, v5, s2, v16 +; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v11 +; GFX10-NEXT: v_lshlrev_b32_sdwa v18, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v19, v9, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshrrev_b32_e32 v13, 24, v7 +; GFX10-NEXT: v_or3_b32 v4, v4, v15, v10 +; GFX10-NEXT: v_or3_b32 v5, v5, v17, v11 +; GFX10-NEXT: v_lshlrev_b32_sdwa v20, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_and_or_b32 v6, v6, v1, v18 ; GFX10-NEXT: v_lshlrev_b32_e32 v12, 24, v12 -; GFX10-NEXT: v_lshlrev_b32_sdwa v13, v8, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v19, v6, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v16, 24, v7 -; GFX10-NEXT: v_or3_b32 v4, v4, v17, v10 -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v14 -; GFX10-NEXT: v_and_or_b32 v6, v6, s1, v13 -; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v8, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_or3_b32 v5, v5, v18, v12 -; GFX10-NEXT: v_and_b32_sdwa v20, v7, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v16 -; GFX10-NEXT: v_or3_b32 v6, v6, v19, v9 -; GFX10-NEXT: v_and_or_b32 v7, v7, v1, v11 -; GFX10-NEXT: v_cndmask_b32_e32 v9, v4, v5, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v9, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v13 +; GFX10-NEXT: v_and_or_b32 v7, v7, v1, v20 +; GFX10-NEXT: v_cndmask_b32_e32 v11, v4, v5, vcc_lo +; GFX10-NEXT: v_or3_b32 v6, v6, v19, v12 ; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 2, v3 -; GFX10-NEXT: v_lshlrev_b32_e32 v11, v0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v12, v0, v1 +; GFX10-NEXT: v_or3_b32 v7, v7, v14, v10 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v3 -; GFX10-NEXT: v_or3_b32 v7, v7, v20, v10 ; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v6, s0 -; GFX10-NEXT: v_xor_b32_e32 v2, -1, v11 -; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v7, s1 -; GFX10-NEXT: v_and_or_b32 v0, v9, v2, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v10, v11, v6, s0 +; GFX10-NEXT: v_xor_b32_e32 v2, -1, v12 +; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v7, s1 +; GFX10-NEXT: v_and_or_b32 v0, v10, v2, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v4, v0, s2 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v6, v0, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v5, v0, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v0, v7, v0, s1 -; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v10, 8, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 8, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v13, 24, v0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v7, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v10, v8, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v8, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-NEXT: v_and_b32_sdwa v14, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v15, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_sdwa v16, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v2, v2, v1, v5 -; GFX10-NEXT: v_and_b32_sdwa v17, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v4 +; GFX10-NEXT: v_lshlrev_b32_sdwa v11, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v15, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v13, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX10-NEXT: v_lshlrev_b32_sdwa v12, v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v16, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v2, v2, v1, v11 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v5 +; GFX10-NEXT: v_and_or_b32 v4, v4, v1, v15 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX10-NEXT: v_lshlrev_b32_sdwa v14, v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v9, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v8, v0, v1, v8 +; GFX10-NEXT: v_and_or_b32 v3, v3, v1, v13 ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v5, v0, v1, v8 -; GFX10-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX10-NEXT: v_lshlrev_b32_e32 v11, 24, v11 -; GFX10-NEXT: v_lshlrev_b32_e32 v12, 24, v13 -; GFX10-NEXT: v_and_or_b32 v3, v3, v1, v7 -; GFX10-NEXT: v_and_or_b32 v4, v4, v1, v10 -; GFX10-NEXT: v_or3_b32 v0, v2, v14, v6 -; GFX10-NEXT: v_or3_b32 v1, v3, v15, v9 -; GFX10-NEXT: v_or3_b32 v2, v4, v16, v11 -; GFX10-NEXT: v_or3_b32 v3, v5, v17, v12 +; GFX10-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX10-NEXT: v_or3_b32 v0, v2, v12, v5 +; GFX10-NEXT: v_or3_b32 v2, v4, v16, v7 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 +; GFX10-NEXT: v_or3_b32 v1, v3, v14, v6 +; GFX10-NEXT: v_or3_b32 v3, v8, v9, v10 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: global_store_dwordx4 v[4:5], v[0:3], off ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir new file mode 100644 index 0000000000000..e030a3ab931b5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sbfx.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s + +# The only instruction selection cases for G_SBFX/G_UBFX are the 64-bit +# vector versions. All other versions, scalar and 32-bit vector, are +# expanded during register bank selection. + +--- +name: sbfx_s32_vii +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: sbfx_s32_vii + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_BFE_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 2 + %2:vgpr(s32) = G_CONSTANT i32 10 + %3:vgpr(s32) = G_SBFX %0, %1(s32), %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: sbfx_s32_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: sbfx_s32_vvv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_BFE_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_SBFX %0, %1(s32), %2 + S_ENDPGM 0, implicit %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir new file mode 100644 index 0000000000000..8fd99d75c5cd8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ubfx.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s + +# The only simple instruction selection for G_SBFX/G_UBFX are the 64-bit +# vector versions. All other versions are expanded during register bank +# selection. + +--- +name: ubfx_s32_vii +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; WAVE64-LABEL: name: ubfx_s32_vii + ; WAVE64: liveins: $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + ; WAVE64: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] + ; WAVE32-LABEL: name: ubfx_s32_vii + ; WAVE32: liveins: $vgpr0 + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + ; WAVE32: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] + ; CHECK-LABEL: name: ubfx_s32_vii + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + ; CHECK: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 2 + %2:vgpr(s32) = G_CONSTANT i32 10 + %3:vgpr(s32) = G_UBFX %0, %1(s32), %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: ubfx_s32_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; WAVE64-LABEL: name: ubfx_s32_vvv + ; WAVE64: liveins: $vgpr0, $vgpr1, $vgpr2 + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] + ; WAVE32-LABEL: name: ubfx_s32_vvv + ; WAVE32: liveins: $vgpr0, $vgpr1, $vgpr2 + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] + ; CHECK-LABEL: name: ubfx_s32_vvv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_UBFX %0, %1(s32), %2 + S_ENDPGM 0, implicit %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir new file mode 100644 index 0000000000000..88d6a71d13832 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +... +--- +name: test_sbfx_s32 +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: test_sbfx_s32 + ; GCN: %copy:_(s32) = COPY $vgpr0 + ; GCN: %offset:_(s32) = COPY $vgpr1 + ; GCN: %width:_(s32) = COPY $vgpr2 + ; GCN: %sbfx:_(s32) = G_SBFX %copy, %offset(s32), %width + ; GCN: $vgpr0 = COPY %sbfx(s32) + %copy:_(s32) = COPY $vgpr0 + %offset:_(s32) = COPY $vgpr1 + %width:_(s32) = COPY $vgpr2 + %sbfx:_(s32) = G_SBFX %copy, %offset(s32), %width + $vgpr0 = COPY %sbfx(s32) +... + +--- +name: test_sbfx_s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + + ; GVN-LABEL: name: test_sbfx_s64 + ; GVN: %copy:_(s64) = COPY $vgpr0_vgpr1 + ; GVN: %offset:_(s32) = COPY $vgpr2 + ; GVN: %width:_(s32) = COPY $vgpr3 + ; GVN: %sbfx:_(s64) = G_SBFX %copy, %offset(s32), %width + ; GVN: $vgpr0_vgpr1 = COPY %sbfx(s64) + %copy:_(s64) = COPY $vgpr0_vgpr1 + %offset:_(s32) = COPY $vgpr2 + %width:_(s32) = COPY $vgpr3 + %sbfx:_(s64) = G_SBFX %copy, %offset(s32), %width + $vgpr0_vgpr1 = COPY %sbfx(s64) +... + +--- +name: test_sbfx_s8 +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GVN-LABEL: name: test_sbfx_s8 + ; GVN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GVN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GVN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GVN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GVN: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GVN: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GVN: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GVN: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GVN: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GVN: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY5]], [[AND]](s32), [[AND1]] + ; GVN: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SBFX]](s32) + ; GVN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8 + ; GVN: $vgpr0 = COPY [[SEXT_INREG]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %copy:_(s8) = G_TRUNC %0 + %offset:_(s8) = G_TRUNC %1 + %width:_(s8) = G_TRUNC %2 + %sbfx:_(s8) = G_SBFX %copy, %offset, %width + %4:_(s32) = G_SEXT %sbfx + $vgpr0 = COPY %4 +... + +--- +name: test_sbfx_s16 +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GVN-LABEL: name: test_sbfx_s16 + ; GVN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GVN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GVN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GVN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GVN: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GVN: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GVN: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GVN: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GVN: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GVN: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY5]], [[AND]](s32), [[AND1]] + ; GVN: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SBFX]](s32) + ; GVN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 16 + ; GVN: $vgpr0 = COPY [[SEXT_INREG]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %copy:_(s16) = G_TRUNC %0 + %offset:_(s16) = G_TRUNC %1 + %width:_(s16) = G_TRUNC %2 + %sbfx:_(s16) = G_SBFX %copy, %offset, %width + %4:_(s32) = G_SEXT %sbfx + $vgpr0 = COPY %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir new file mode 100644 index 0000000000000..774cb0d9fd894 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir @@ -0,0 +1,105 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +... +--- +name: test_ubfx_s32 +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: test_ubfx_s32 + ; GCN: %copy:_(s32) = COPY $vgpr0 + ; GCN: %offset:_(s32) = COPY $vgpr1 + ; GCN: %width:_(s32) = COPY $vgpr2 + ; GCN: %ubfx:_(s32) = G_UBFX %copy, %offset(s32), %width + ; GCN: $vgpr0 = COPY %ubfx(s32) + %copy:_(s32) = COPY $vgpr0 + %offset:_(s32) = COPY $vgpr1 + %width:_(s32) = COPY $vgpr2 + %ubfx:_(s32) = G_UBFX %copy, %offset(s32), %width + $vgpr0 = COPY %ubfx(s32) +... + +--- +name: test_ubfx_s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: test_ubfx_s64 + ; GCN: %copy:_(s64) = COPY $vgpr0_vgpr1 + ; GCN: %offset:_(s32) = COPY $vgpr2 + ; GCN: %width:_(s32) = COPY $vgpr3 + ; GCN: %ubfx:_(s64) = G_UBFX %copy, %offset(s32), %width + ; GCN: $vgpr0_vgpr1 = COPY %ubfx(s64) + %copy:_(s64) = COPY $vgpr0_vgpr1 + %offset:_(s32) = COPY $vgpr2 + %width:_(s32) = COPY $vgpr3 + %ubfx:_(s64) = G_UBFX %copy, %offset(s32), %width + $vgpr0_vgpr1 = COPY %ubfx(s64) +... + +--- +name: test_ubfx_s8 +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: test_ubfx_s8 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY5]], [[AND]](s32), [[AND1]] + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UBFX]](s32) + ; GCN: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; GCN: $vgpr0 = COPY [[AND2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %copy:_(s8) = G_TRUNC %0 + %offset:_(s8) = G_TRUNC %1 + %width:_(s8) = G_TRUNC %2 + %ubfx:_(s8) = G_UBFX %copy, %offset, %width + %4:_(s32) = G_ZEXT %ubfx + $vgpr0 = COPY %4 +... + +--- +name: test_ubfx_s16 +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: test_ubfx_s16 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY5]], [[AND]](s32), [[AND1]] + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UBFX]](s32) + ; GCN: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; GCN: $vgpr0 = COPY [[AND2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %copy:_(s16) = G_TRUNC %0 + %offset:_(s16) = G_TRUNC %1 + %width:_(s16) = G_TRUNC %2 + %sbfx:_(s16) = G_UBFX %copy, %offset, %width + %4:_(s32) = G_ZEXT %sbfx + $vgpr0 = COPY %4 +... + diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll index a8098b7dd9d15..85146308beb8c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -983,8 +983,7 @@ define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshr_b32 s0, s0, 6 -; GFX6-NEXT: s_and_b32 s0, s0, 7 +; GFX6-NEXT: s_bfe_u32 s0, s0, 0x30006 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll index c3ab2b2a15a81..d67cbe717f195 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -36,8 +36,7 @@ define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { ; CO-V2-NOT: v1 ; CO-V2: {{buffer|flat}}_store_dword {{.*}}v1 -; PACKED-TID: v_lshrrev_b32_e32 [[ID:v[0-9]+]], 10, v0 -; PACKED-TID: v_and_b32_e32 [[ID]], 0x3ff, [[ID]] +; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 ; PACKED-TID: {{buffer|flat|global}}_store_dword {{.*}}[[ID]] ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1 define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { @@ -55,8 +54,7 @@ define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { ; CO-V2-NOT: v2 ; CO-V2: {{buffer|flat}}_store_dword {{.*}}v2 -; PACKED-TID: v_lshrrev_b32_e32 [[ID:v[0-9]+]], 20, v0 -; PACKED-TID: v_and_b32_e32 [[ID]], 0x3ff, [[ID]] +; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 ; PACKED-TID: {{buffer|flat|global}}_store_dword {{.*}}[[ID]] ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2 define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 { @@ -110,8 +108,8 @@ define void @test_workitem_id_x_func(i32 addrspace(1)* %out) #1 { } ; ALL-LABEL: {{^}}test_workitem_id_y_func: -; HSA: v_lshrrev_b32_e32 v2, 10, v2 -; MESA: v_lshrrev_b32_e32 v2, 10, v2 +; HSA: v_bfe_u32 v2, v2, 10, 10 +; MESA: v_bfe_u32 v2, v2, 10, 10 define void @test_workitem_id_y_func(i32 addrspace(1)* %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.y() store i32 %id, i32 addrspace(1)* %out @@ -119,8 +117,8 @@ define void @test_workitem_id_y_func(i32 addrspace(1)* %out) #1 { } ; ALL-LABEL: {{^}}test_workitem_id_z_func: -; HSA: v_lshrrev_b32_e32 v2, 20, v2 -; MESA: v_lshrrev_b32_e32 v2, 20, v2 +; HSA: v_bfe_u32 v2, v2, 20, 10 +; MESA: v_bfe_u32 v2, v2, 20, 10 define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.z() store i32 %id, i32 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir new file mode 100644 index 0000000000000..fc959d1f24cbd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-sbfx.mir @@ -0,0 +1,151 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck --check-prefix=GCN %s + +--- +name: bfe_sext_inreg_ashr_s32 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; GCN-LABEL: name: bfe_sext_inreg_ashr_s32 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C1]] + ; GCN: $vgpr0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 4 + %2:_(s32) = G_ASHR %0, %1(s32) + %3:_(s32) = COPY %2(s32) + %4:_(s32) = G_SEXT_INREG %3, 16 + $vgpr0 = COPY %4(s32) +... + +--- +name: bfe_sext_inreg_lshr_s32 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; GCN-LABEL: name: bfe_sext_inreg_lshr_s32 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN: [[SBFX:%[0-9]+]]:_(s32) = G_SBFX [[COPY]], [[C]](s32), [[C1]] + ; GCN: $vgpr0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 4 + %2:_(s32) = G_LSHR %0, %1(s32) + %3:_(s32) = COPY %2(s32) + %4:_(s32) = G_SEXT_INREG %3, 16 + $vgpr0 = COPY %4(s32) +... + +--- +name: bfe_sext_inreg_ashr_s64 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: bfe_sext_inreg_ashr_s64 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN: [[SBFX:%[0-9]+]]:_(s64) = G_SBFX [[COPY]], [[C]](s32), [[C1]] + ; GCN: $vgpr0_vgpr1 = COPY [[SBFX]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 4 + %2:_(s64) = G_ASHR %0, %1(s32) + %3:_(s64) = COPY %2(s64) + %4:_(s64) = G_SEXT_INREG %3, 16 + $vgpr0_vgpr1 = COPY %4(s64) +... + +--- +name: toobig_sext_inreg_ashr_s32 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; GCN-LABEL: name: toobig_sext_inreg_ashr_s32 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 20 + ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 16 + %2:_(s32) = G_ASHR %0, %1(s32) + %3:_(s32) = COPY %2(s32) + %4:_(s32) = G_SEXT_INREG %3, 20 + $vgpr0 = COPY %4(s32) +... + +--- +name: toobig_sext_inreg_ashr_s64 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: toobig_sext_inreg_ashr_s64 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GCN: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C1]](s32) + ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR1]](s32), [[ASHR]](s32) + ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV]], 32 + ; GCN: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 40 + %2:_(s64) = G_ASHR %0, %1(s32) + %3:_(s64) = COPY %2(s64) + %4:_(s64) = G_SEXT_INREG %3, 32 + $vgpr0_vgpr1 = COPY %4(s64) +... + +--- +name: var_sext_inreg_ashr_s32 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1 + + ; GCN-LABEL: name: var_sext_inreg_ashr_s32 + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32) + ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ASHR]], 10 + ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ASHR %0, %1(s32) + %3:_(s32) = COPY %2(s32) + %4:_(s32) = G_SEXT_INREG %3, 10 + $vgpr0 = COPY %4(s32) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir new file mode 100644 index 0000000000000..027e3e3535ec9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-ubfx.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck --check-prefix=GCN %s + +--- +name: bfe_and_lshr_s32 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; GCN-LABEL: name: bfe_and_lshr_s32 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: [[UBFX:%[0-9]+]]:_(s32) = G_UBFX [[COPY]], [[C1]](s32), [[C]] + ; GCN: $vgpr0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 8 + %2:_(s32) = G_LSHR %0, %1(s32) + %3:_(s32) = G_CONSTANT i32 31 + %4:_(s32) = G_AND %2, %3 + $vgpr0 = COPY %4(s32) + +... + +--- +name: bfe_and_lshr_s64 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: bfe_and_lshr_s64 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: [[UBFX:%[0-9]+]]:_(s64) = G_UBFX [[COPY]], [[C1]](s32), [[C]] + ; GCN: $vgpr0_vgpr1 = COPY [[UBFX]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 8 + %2:_(s64) = G_LSHR %0, %1(s32) + %3:_(s64) = G_CONSTANT i64 1023 + %4:_(s64) = G_AND %2, %3 + $vgpr0_vgpr1 = COPY %4(s64) + +... + +--- +name: toobig_and_lshr_s32 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; GCN-LABEL: name: toobig_and_lshr_s32 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GCN: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GCN: $vgpr0 = COPY [[LSHR]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 28 + %2:_(s32) = G_LSHR %0, %1(s32) + %3:_(s32) = G_CONSTANT i32 511 + %4:_(s32) = G_AND %2, %3 + $vgpr0 = COPY %4(s32) + +... + +--- +name: bfe_and_ashr_s32 +legalized: true +tracksRegLiveness: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; GCN-LABEL: name: bfe_and_ashr_s32 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; GCN: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]] + ; GCN: $vgpr0 = COPY [[AND]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 8 + %2:_(s32) = G_ASHR %0, %1(s32) + %3:_(s32) = G_CONSTANT i32 31 + %4:_(s32) = G_AND %2, %3 + $vgpr0 = COPY %4(s32) + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir new file mode 100644 index 0000000000000..282849347a3bb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sbfx.mir @@ -0,0 +1,342 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +... + +# Generate the 3 operand vector bitfield extract instructions for 32-bit +# operations only. +--- +name: test_sbfx_s32_vvv +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: test_sbfx_s32_vvv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY1]](s32), [[COPY2]] + ; CHECK: $vgpr0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = G_SBFX %0, %1(s32), %2 + $vgpr0 = COPY %3(s32) +... + +--- +name: test_sbfx_s32_vii +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sbfx_s32_vii + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY1]](s32), [[COPY2]] + ; CHECK: $vgpr0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 10 + %2:_(s32) = G_CONSTANT i32 4 + %3:_(s32) = G_SBFX %0, %1(s32), %2 + $vgpr0 = COPY %3(s32) +... + +--- +name: test_sbfx_s32_vss +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: test_sbfx_s32_vss + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY]], [[COPY3]](s32), [[COPY4]] + ; CHECK: $vgpr0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_SBFX %0, %1(s32), %2 + $vgpr0 = COPY %3(s32) +... + +# Expand to a sequence that implements the 64-bit bitfield extract using +# shifts and masks. +--- +name: test_sbfx_s64_vvv +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: test_sbfx_s64_vvv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = COPY $vgpr3 + %3:_(s64) = G_SBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +--- +name: test_sbfx_s64_vss +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: test_sbfx_s64_vss + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s64) = G_SBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +# If the offset and width are constants, use the 32-bit bitfield extract, +# and merge to create a 64-bit result. +--- +name: test_sbfx_s64_vii_small +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sbfx_s64_vii_small + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[UV]], [[C2]](s32), [[COPY2]] + ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR1:%[0-9]+]]:vgpr(s32) = G_ASHR [[SBFX]], [[C3]](s32) + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SBFX]](s32), [[ASHR1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 31 + %2:_(s32) = G_CONSTANT i32 4 + %3:_(s64) = G_SBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +--- +name: test_sbfx_s64_vii_big +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sbfx_s64_vii_big + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 40 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 + ; CHECK: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[UV1]], [[C2]](s32), [[C3]] + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UV]](s32), [[SBFX]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 8 + %2:_(s32) = G_CONSTANT i32 40 + %3:_(s64) = G_SBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +--- +name: test_sbfx_s64_svv +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_sbfx_s64_svv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[COPY3]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ASHR]], [[SUB]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[SUB]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s64) = G_SBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +# Expand to a sequence that combines the offset and width for the two operand +# version of the 32-bit instruction. +--- +name: test_sbfx_s32_svv +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_sbfx_s32_svv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[SBFX:%[0-9]+]]:vgpr(s32) = G_SBFX [[COPY3]], [[COPY1]](s32), [[COPY2]] + ; CHECK: $vgpr0 = COPY [[SBFX]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = G_SBFX %0, %1(s32), %2 + $vgpr0 = COPY %3(s32) +... + +--- +name: test_sbfx_s32_sss +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr3 + + ; CHECK-LABEL: name: test_sbfx_s32_sss + ; CHECK: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc + ; CHECK: $sgpr0 = COPY [[S_BFE_I32_]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_SBFX %0, %1(s32), %2 + $sgpr0 = COPY %3(s32) +... + +--- +name: test_sbfx_s32_sii +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0 + + ; CHECK-LABEL: name: test_sbfx_s32_sii + ; CHECK: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[S_BFE_I32_:%[0-9]+]]:sreg_32(s32) = S_BFE_I32 [[COPY]](s32), [[OR]](s32), implicit-def $scc + ; CHECK: $sgpr0 = COPY [[S_BFE_I32_]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 10 + %3:_(s32) = G_SBFX %0, %1(s32), %2 + $sgpr0 = COPY %3(s32) +... + +# Expand to a sequence that combines the offset and width for the two operand +# version of the 64-bit scalar instruction. +--- +name: test_sbfx_s64_sss +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0_sgpr1, $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: test_sbfx_s64_sss + ; CHECK: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc + ; CHECK: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s64) = G_SBFX %0, %1(s32), %2 + $sgpr0_sgpr1 = COPY %3(s64) +... + +--- +name: test_sbfx_s64_sii +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: test_sbfx_s64_sii + ; CHECK: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[S_BFE_I64_:%[0-9]+]]:sreg_64(s64) = S_BFE_I64 [[COPY]](s64), [[OR]](s32), implicit-def $scc + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 10 + %3:_(s64) = G_SBFX %0, %1(s32), %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir new file mode 100644 index 0000000000000..a08e234618029 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ubfx.mir @@ -0,0 +1,342 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +... + +# Generate the 3 operand vector bitfield extract instructions for 32-bit +# operations only. +--- +name: test_ubfx_s32_vvv +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: test_ubfx_s32_vvv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY1]](s32), [[COPY2]] + ; CHECK: $vgpr0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = G_UBFX %0, %1(s32), %2 + $vgpr0 = COPY %3(s32) +... + +--- +name: test_ubfx_s32_vii +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_ubfx_s32_vii + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY1]](s32), [[COPY2]] + ; CHECK: $vgpr0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 10 + %2:_(s32) = G_CONSTANT i32 4 + %3:_(s32) = G_UBFX %0, %1(s32), %2 + $vgpr0 = COPY %3(s32) +... + +--- +name: test_ubfx_s32_vss +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0, $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: test_ubfx_s32_vss + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY]], [[COPY3]](s32), [[COPY4]] + ; CHECK: $vgpr0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_UBFX %0, %1(s32), %2 + $vgpr0 = COPY %3(s32) +... + +# Expand to a sequence that implements the 64-bit bitfield extract using +# shifts and masks. +--- +name: test_ubfx_s64_vvv +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: test_ubfx_s64_vvv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = COPY $vgpr3 + %3:_(s64) = G_UBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +--- +name: test_ubfx_s64_vss +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: test_ubfx_s64_vss + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s64) = G_UBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +# If the offset and width are constants, use the 32-bit bitfield extract, +# and merge to create a 64-bit result. +--- +name: test_ubfx_s64_vii_small +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ubfx_s64_vii_small + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 31 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[UV]], [[C2]](s32), [[COPY2]] + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UBFX]](s32), [[C2]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 31 + %2:_(s32) = G_CONSTANT i32 4 + %3:_(s64) = G_UBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +--- +name: test_ubfx_s64_vii_big +legalized: true + +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ubfx_s64_vii_big + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 8 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 40 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 8 + ; CHECK: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[UV1]], [[C2]](s32), [[C3]] + ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UV]](s32), [[UBFX]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 8 + %2:_(s32) = G_CONSTANT i32 40 + %3:_(s64) = G_UBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +--- +name: test_ubfx_s64_svv +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_ubfx_s64_svv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[COPY3]], [[COPY1]](s32) + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LSHR]](s64) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 64 + ; CHECK: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[C]], [[COPY2]] + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[LSHR]], [[SUB]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[SUB]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY %3:vgpr(s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s64) = G_UBFX %0, %1(s32), %2 + $vgpr0_vgpr1 = COPY %3(s64) +... + +# Expand to a sequence that combines the offset and width for the two operand +# version of the 32-bit instruction. +--- +name: test_ubfx_s32_svv +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: test_ubfx_s32_svv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[UBFX:%[0-9]+]]:vgpr(s32) = G_UBFX [[COPY3]], [[COPY1]](s32), [[COPY2]] + ; CHECK: $vgpr0 = COPY [[UBFX]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = G_UBFX %0, %1(s32), %2 + $vgpr0 = COPY %3(s32) +... + +--- +name: test_ubfx_s32_sss +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; CHECK-LABEL: name: test_ubfx_s32_sss + ; CHECK: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[S_BFE_U32_:%[0-9]+]]:sreg_32(s32) = S_BFE_U32 [[COPY]](s32), [[OR]](s32), implicit-def $scc + ; CHECK: $sgpr0 = COPY [[S_BFE_U32_]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_UBFX %0, %1(s32), %2 + $sgpr0 = COPY %3(s32) +... + +--- +name: test_ubfx_s32_sii +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; CHECK-LABEL: name: test_ubfx_s32_sii + ; CHECK: [[COPY:%[0-9]+]]:sreg_32(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[S_BFE_U32_:%[0-9]+]]:sreg_32(s32) = S_BFE_U32 [[COPY]](s32), [[OR]](s32), implicit-def $scc + ; CHECK: $sgpr0 = COPY [[S_BFE_U32_]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 10 + %3:_(s32) = G_UBFX %0, %1(s32), %2 + $sgpr0 = COPY %3(s32) +... + +# Expand to a sequence that combines the offset and width for the two operand +# version of the 64-bit scalar instruction. +--- +name: test_ubfx_s64_sss +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3 + + ; CHECK-LABEL: name: test_ubfx_s64_sss + ; CHECK: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[S_BFE_U64_:%[0-9]+]]:sreg_64(s64) = S_BFE_U64 [[COPY]](s64), [[OR]](s32), implicit-def $scc + ; CHECK: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $sgpr2 + %2:_(s32) = COPY $sgpr3 + %3:_(s64) = G_UBFX %0, %1(s32), %2 + $sgpr0_sgpr1 = COPY %3(s64) +... + +--- +name: test_ubfx_s64_sii +legalized: true + +body: | + bb.0.entry: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: test_ubfx_s64_sii + ; CHECK: [[COPY:%[0-9]+]]:sreg_64(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 63 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[C]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[C1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[S_BFE_U64_:%[0-9]+]]:sreg_64(s64) = S_BFE_U64 [[COPY]](s64), [[OR]](s32), implicit-def $scc + ; CHECK: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]](s64) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 10 + %3:_(s64) = G_UBFX %0, %1(s32), %2 + $sgpr0_sgpr1 = COPY %3(s64) +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index 2c68143dc1579..9ccc2ee540f26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -604,33 +604,34 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, 8 ; GFX9-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0xffff ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX9-NEXT: v_and_or_b32 v0, v0, v8, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_and_or_b32 v2, v3, v8, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 ; GFX9-NEXT: v_pk_add_i16 v0, v0, v1 clamp ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] -; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] -; GFX9-NEXT: s_movk_i32 s4, 0xff ; GFX9-NEXT: v_pk_add_i16 v1, v2, v3 clamp -; GFX9-NEXT: v_and_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_mov_b32_e32 v2, 8 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] +; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v2 ; GFX9-NEXT: v_and_b32_e32 v2, s4, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -638,33 +639,34 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_and_or_b32 v0, v0, v5, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v6 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v7 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v3, v3, v5, v4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_and_or_b32 v2, v8, v5, v2 +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_add_i16 v1, v2, v3 clamp +; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_add_i16 v1, v3, v1 clamp -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_b32_e32 v3, s4, v1 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 @@ -831,46 +833,47 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; ; GFX9-LABEL: s_saddsat_v4i8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s4, s0, 24 -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4 +; GFX9-NEXT: s_lshr_b32 s3, s0, 8 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16 -; GFX9-NEXT: s_mov_b32 s3, 0x80008 -; GFX9-NEXT: s_lshr_b32 s5, s1, 8 -; GFX9-NEXT: s_lshl_b32 s0, s0, s3 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 -; GFX9-NEXT: s_lshr_b32 s4, s2, 16 -; GFX9-NEXT: s_lshr_b32 s6, s1, 16 -; GFX9-NEXT: s_lshr_b32 s7, s1, 24 -; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX9-NEXT: s_lshr_b32 s5, s1, 16 -; GFX9-NEXT: s_lshl_b32 s2, s2, s3 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 -; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s7 -; GFX9-NEXT: s_lshl_b32 s1, s1, s3 -; GFX9-NEXT: s_lshl_b32 s5, s5, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX9-NEXT: s_lshr_b32 s5, s4, 16 +; GFX9-NEXT: s_lshr_b32 s6, s0, 24 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 +; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s6 +; GFX9-NEXT: s_lshr_b32 s6, s0, 16 +; GFX9-NEXT: s_mov_b32 s4, 0x80008 +; GFX9-NEXT: s_lshr_b32 s7, s1, 8 +; GFX9-NEXT: s_lshl_b32 s0, s0, s4 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6 +; GFX9-NEXT: s_lshr_b32 s6, s3, 16 +; GFX9-NEXT: s_lshr_b32 s8, s1, 16 +; GFX9-NEXT: s_lshr_b32 s9, s1, 24 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s7, s1, 16 +; GFX9-NEXT: s_lshl_b32 s3, s3, s4 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX9-NEXT: s_pack_ll_b32_b16 s6, s8, s9 +; GFX9-NEXT: s_lshl_b32 s1, s1, s4 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s7, s6, 16 +; GFX9-NEXT: s_lshl_b32 s4, s6, s4 +; GFX9-NEXT: s_lshl_b32 s6, s7, 8 ; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: s_lshl_b32 s3, s4, s3 -; GFX9-NEXT: s_lshl_b32 s4, s5, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s6 ; GFX9-NEXT: v_pk_add_i16 v0, s0, v0 clamp -; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_pk_add_i16 v1, s3, v1 clamp +; GFX9-NEXT: s_mov_b32 s2, 8 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] -; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_pk_add_i16 v1, s2, v1 clamp -; GFX9-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] +; GFX9-NEXT: s_movk_i32 s0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v0, v0, s0, v2 ; GFX9-NEXT: v_and_b32_e32 v2, s0, v1 +; GFX9-NEXT: s_mov_b32 s5, 24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -878,42 +881,43 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; GFX10-LABEL: s_saddsat_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s5, s1, 8 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16 ; GFX10-NEXT: s_lshr_b32 s4, s0, 24 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 +; GFX10-NEXT: s_lshr_b32 s4, s0, 16 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 +; GFX10-NEXT: s_lshr_b32 s5, s1, 8 ; GFX10-NEXT: s_lshr_b32 s6, s1, 16 ; GFX10-NEXT: s_lshr_b32 s7, s1, 24 +; GFX10-NEXT: s_lshl_b32 s0, s0, s3 +; GFX10-NEXT: s_lshl_b32 s4, s4, 8 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX10-NEXT: s_lshr_b32 s8, s0, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX10-NEXT: s_pack_ll_b32_b16 s4, s6, s7 +; GFX10-NEXT: s_lshr_b32 s8, s2, 16 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16 -; GFX10-NEXT: s_mov_b32 s2, 0x80008 +; GFX10-NEXT: s_lshr_b32 s6, s4, 16 +; GFX10-NEXT: s_lshl_b32 s2, s2, s3 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshl_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s1, s1, s2 +; GFX10-NEXT: s_lshl_b32 s1, s1, s3 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 -; GFX10-NEXT: s_pack_ll_b32_b16 s6, s6, s7 +; GFX10-NEXT: s_lshl_b32 s3, s4, s3 +; GFX10-NEXT: s_lshl_b32 s4, s6, 8 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8 -; GFX10-NEXT: s_lshr_b32 s4, s3, 16 -; GFX10-NEXT: s_lshr_b32 s5, s6, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s8 +; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 ; GFX10-NEXT: v_pk_add_i16 v0, s0, s1 clamp -; GFX10-NEXT: s_lshl_b32 s3, s3, s2 -; GFX10-NEXT: s_lshl_b32 s4, s4, 8 -; GFX10-NEXT: s_lshl_b32 s0, s6, s2 -; GFX10-NEXT: s_lshl_b32 s1, s5, 8 -; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 -; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX10-NEXT: v_pk_add_i16 v1, s2, s3 clamp +; GFX10-NEXT: s_mov_b32 s0, 8 +; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_add_i16 v1, s2, s0 clamp -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX10-NEXT: v_and_b32_e32 v3, s0, v1 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_b32_e32 v3, s1, v1 +; GFX10-NEXT: s_mov_b32 s0, 24 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll new file mode 100644 index 0000000000000..afb2ebd5eb0db --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sbfx.ll @@ -0,0 +1,202 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefix=GFX10 %s + +; Test vector signed bitfield extract. +define signext i8 @v_ashr_i8_i32(i32 %value) { +; GCN-LABEL: v_ashr_i8_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 4, 8 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_ashr_i8_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_bfe_i32 v0, v0, 4, 8 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = ashr i32 %value, 4 + %2 = trunc i32 %1 to i8 + ret i8 %2 +} + +define signext i16 @v_ashr_i16_i32(i32 %value) { +; GCN-LABEL: v_ashr_i16_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 9, 16 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_ashr_i16_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_bfe_i32 v0, v0, 9, 16 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = ashr i32 %value, 9 + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define signext i8 @v_lshr_i8_i32(i32 %value) { +; GCN-LABEL: v_lshr_i8_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 4, 8 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_lshr_i8_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_bfe_i32 v0, v0, 4, 8 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = lshr i32 %value, 4 + %2 = trunc i32 %1 to i8 + ret i8 %2 +} + +define signext i16 @v_lshr_i16_i32(i32 %value) { +; GCN-LABEL: v_lshr_i16_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 9, 16 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_lshr_i16_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_bfe_i32 v0, v0, 9, 16 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = lshr i32 %value, 9 + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +; Test vector bitfield extract for 64-bits. +define i64 @v_ashr_i64(i64 %value) { +; GCN-LABEL: v_ashr_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_ashr_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] +; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 4 +; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = ashr i64 %value, 10 + %2 = shl i64 %1, 60 + %3 = ashr i64 %2, 60 + ret i64 %3 +} + +define i64 @v_lshr_i64(i64 %value) { +; GCN-LABEL: v_lshr_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_lshr_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_ashrrev_i64 v[0:1], 10, v[0:1] +; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 4 +; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = lshr i64 %value, 10 + %2 = shl i64 %1, 60 + %3 = ashr i64 %2, 60 + ret i64 %3 +} + +; Test scalar signed bitfield extract. +define amdgpu_ps signext i8 @s_ashr_i8_i32(i32 inreg %value) { +; GCN-LABEL: s_ashr_i8_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_i32 s0, s0, 0x80004 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_ashr_i8_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_i32 s0, s0, 0x80004 +; GFX10-NEXT: ; return to shader part epilog + %1 = ashr i32 %value, 4 + %2 = trunc i32 %1 to i8 + ret i8 %2 +} + +define amdgpu_ps signext i16 @s_ashr_i16_i32(i32 inreg %value) { +; GCN-LABEL: s_ashr_i16_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_i32 s0, s0, 0x100009 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_ashr_i16_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_i32 s0, s0, 0x100009 +; GFX10-NEXT: ; return to shader part epilog + %1 = ashr i32 %value, 9 + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +define amdgpu_ps signext i8 @s_lshr_i8_i32(i32 inreg %value) { +; GCN-LABEL: s_lshr_i8_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_i32 s0, s0, 0x80004 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_lshr_i8_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_i32 s0, s0, 0x80004 +; GFX10-NEXT: ; return to shader part epilog + %1 = lshr i32 %value, 4 + %2 = trunc i32 %1 to i8 + ret i8 %2 +} + +define amdgpu_ps signext i16 @s_lshr_i16_i32(i32 inreg %value) { +; GCN-LABEL: s_lshr_i16_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_i32 s0, s0, 0x100009 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_lshr_i16_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_i32 s0, s0, 0x100009 +; GFX10-NEXT: ; return to shader part epilog + %1 = lshr i32 %value, 9 + %2 = trunc i32 %1 to i16 + ret i16 %2 +} + +; Test scalar bitfield extract for 64-bits. +define amdgpu_ps i64 @s_ashr_i64(i64 inreg %value) { +; GCN-LABEL: s_ashr_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x40001 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_ashr_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x40001 +; GFX10-NEXT: ; return to shader part epilog + %1 = ashr i64 %value, 1 + %2 = shl i64 %1, 60 + %3 = ashr i64 %2, 60 + ret i64 %3 +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll index fd8590ca7337c..984b6a21e3adc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -2367,8 +2367,7 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 8 -; GFX8-NEXT: s_sext_i32_i8 s1, s1 +; GFX8-NEXT: s_bfe_i32 s1, s0, 0x80008 ; GFX8-NEXT: s_ashr_i32 s6, s1, 31 ; GFX8-NEXT: s_add_i32 s1, s1, s6 ; GFX8-NEXT: s_xor_b32 s7, s1, s6 @@ -2417,8 +2416,7 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s1, s0, 8 -; GFX9-NEXT: s_sext_i32_i8 s1, s1 +; GFX9-NEXT: s_bfe_i32 s1, s0, 0x80008 ; GFX9-NEXT: s_ashr_i32 s6, s1, 31 ; GFX9-NEXT: s_add_i32 s1, s1, s6 ; GFX9-NEXT: s_xor_b32 s7, s1, s6 @@ -2462,15 +2460,14 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s1, s0, 8 +; GFX10-NEXT: s_bfe_i32 s1, s0, 0x80008 ; GFX10-NEXT: s_sext_i32_i8 s0, s0 -; GFX10-NEXT: s_sext_i32_i8 s1, s1 -; GFX10-NEXT: s_ashr_i32 s8, s0, 31 ; GFX10-NEXT: s_ashr_i32 s6, s1, 31 -; GFX10-NEXT: s_add_i32 s0, s0, s8 +; GFX10-NEXT: s_ashr_i32 s8, s0, 31 ; GFX10-NEXT: s_add_i32 s1, s1, s6 -; GFX10-NEXT: s_xor_b32 s0, s0, s8 +; GFX10-NEXT: s_add_i32 s0, s0, s8 ; GFX10-NEXT: s_xor_b32 s7, s1, s6 +; GFX10-NEXT: s_xor_b32 s0, s0, s8 ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 ; GFX10-NEXT: s_sub_i32 s1, 0, s7 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -2513,63 +2510,60 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> addrspace(1)* %out1, <2 x i8> %x, <2 x i8> %y) { ; GFX8-LABEL: sdivrem_v2i8: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x10 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 16 -; GFX8-NEXT: s_sext_i32_i8 s1, s1 -; GFX8-NEXT: s_ashr_i32 s2, s1, 31 -; GFX8-NEXT: s_add_i32 s1, s1, s2 -; GFX8-NEXT: s_xor_b32 s3, s1, s2 -; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX8-NEXT: s_sub_i32 s7, 0, s3 -; GFX8-NEXT: s_lshr_b32 s1, s0, 24 -; GFX8-NEXT: s_sext_i32_i8 s6, s0 +; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80010 +; GFX8-NEXT: s_ashr_i32 s3, s0, 31 +; GFX8-NEXT: s_add_i32 s0, s0, s3 +; GFX8-NEXT: s_xor_b32 s8, s0, s3 +; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s8 +; GFX8-NEXT: s_sub_i32 s6, 0, s8 +; GFX8-NEXT: s_bfe_i32 s1, s2, 0x80018 +; GFX8-NEXT: s_ashr_i32 s10, s1, 31 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sext_i32_i8 s1, s1 -; GFX8-NEXT: s_ashr_i32 s8, s6, 31 -; GFX8-NEXT: s_ashr_i32 s9, s1, 31 +; GFX8-NEXT: s_add_i32 s1, s1, s10 +; GFX8-NEXT: s_xor_b32 s11, s1, s10 +; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s11 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: s_add_i32 s6, s6, s8 -; GFX8-NEXT: s_add_i32 s1, s1, s9 -; GFX8-NEXT: s_xor_b32 s11, s1, s9 -; GFX8-NEXT: v_mul_lo_u32 v1, s7, v0 -; GFX8-NEXT: s_xor_b32 s10, s6, s8 -; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s11 -; GFX8-NEXT: s_lshr_b32 s12, s0, 8 -; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX8-NEXT: s_sext_i32_i8 s0, s2 +; GFX8-NEXT: s_ashr_i32 s9, s0, 31 +; GFX8-NEXT: s_add_i32 s0, s0, s9 +; GFX8-NEXT: v_mul_lo_u32 v1, s6, v0 +; GFX8-NEXT: s_xor_b32 s0, s0, s9 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 -; GFX8-NEXT: v_mul_hi_u32 v0, s10, v0 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v2 -; GFX8-NEXT: v_mul_lo_u32 v2, v0, s3 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 -; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 +; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 ; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s10, v2 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX8-NEXT: v_mul_lo_u32 v2, v0, s8 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s0, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s3, v2 +; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s8, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s3, v2 +; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s8, v2 ; GFX8-NEXT: s_sub_i32 s1, 0, s11 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; GFX8-NEXT: v_mul_lo_u32 v3, s1, v1 -; GFX8-NEXT: s_sext_i32_i8 s1, s12 -; GFX8-NEXT: s_xor_b32 s0, s8, s2 +; GFX8-NEXT: s_bfe_i32 s1, s2, 0x80008 ; GFX8-NEXT: s_ashr_i32 s2, s1, 31 -; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_add_i32 s1, s1, s2 +; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_xor_b32 s1, s1, s2 +; GFX8-NEXT: s_xor_b32 s0, s9, s3 ; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 -; GFX8-NEXT: v_xor_b32_e32 v2, s8, v2 +; GFX8-NEXT: v_xor_b32_e32 v2, s9, v2 ; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s8, v2 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s9, v2 ; GFX8-NEXT: v_mul_lo_u32 v3, v1, s11 ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 @@ -2581,7 +2575,7 @@ define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s11, v3 -; GFX8-NEXT: s_xor_b32 s0, s2, s9 +; GFX8-NEXT: s_xor_b32 s0, s2, s10 ; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 @@ -2607,60 +2601,58 @@ define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s6, s[4:5], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s0, s6, 16 -; GFX9-NEXT: s_sext_i32_i8 s0, s0 +; GFX9-NEXT: s_bfe_i32 s0, s6, 0x80010 ; GFX9-NEXT: s_ashr_i32 s7, s0, 31 ; GFX9-NEXT: s_add_i32 s0, s0, s7 ; GFX9-NEXT: s_xor_b32 s8, s0, s7 ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s8 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX9-NEXT: s_lshr_b32 s5, s6, 24 -; GFX9-NEXT: s_sext_i32_i8 s5, s5 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: s_bfe_i32 s5, s6, 0x80018 ; GFX9-NEXT: s_ashr_i32 s9, s5, 31 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_add_i32 s5, s5, s9 ; GFX9-NEXT: s_xor_b32 s5, s5, s9 +; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s5 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: s_sub_i32 s10, 0, s8 -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s5 -; GFX9-NEXT: s_lshr_b32 s4, s6, 8 -; GFX9-NEXT: v_mul_lo_u32 v1, s10, v0 -; GFX9-NEXT: s_sext_i32_i8 s6, s6 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GFX9-NEXT: s_ashr_i32 s10, s6, 31 -; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX9-NEXT: s_add_i32 s6, s6, s10 -; GFX9-NEXT: s_xor_b32 s6, s6, s10 -; GFX9-NEXT: s_sub_i32 s11, 0, s5 -; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 -; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX9-NEXT: s_sext_i32_i8 s4, s6 +; GFX9-NEXT: v_mul_lo_u32 v2, s10, v0 +; GFX9-NEXT: s_ashr_i32 s10, s4, 31 +; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: v_mul_hi_u32 v0, s6, v0 -; GFX9-NEXT: s_sext_i32_i8 s4, s4 -; GFX9-NEXT: s_xor_b32 s7, s10, s7 -; GFX9-NEXT: v_mul_lo_u32 v3, s11, v1 -; GFX9-NEXT: v_mul_lo_u32 v2, v0, s8 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX9-NEXT: s_add_i32 s4, s4, s10 +; GFX9-NEXT: s_xor_b32 s4, s4, s10 +; GFX9-NEXT: s_sub_i32 s11, 0, s5 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 +; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX9-NEXT: v_mul_lo_u32 v2, s11, v1 +; GFX9-NEXT: s_bfe_i32 s6, s6, 0x80008 +; GFX9-NEXT: s_ashr_i32 s11, s6, 31 +; GFX9-NEXT: v_mul_lo_u32 v3, v0, s8 +; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 +; GFX9-NEXT: s_add_i32 s6, s6, s11 ; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 -; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX9-NEXT: v_sub_u32_e32 v2, s6, v2 -; GFX9-NEXT: s_ashr_i32 s6, s4, 31 -; GFX9-NEXT: s_add_i32 s4, s4, s6 -; GFX9-NEXT: s_xor_b32 s4, s4, s6 -; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 +; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 +; GFX9-NEXT: s_xor_b32 s4, s6, s11 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v2 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, s5 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v2 -; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX9-NEXT: v_mul_lo_u32 v3, v1, s5 ; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX9-NEXT: s_xor_b32 s6, s10, s7 +; GFX9-NEXT: v_xor_b32_e32 v0, s6, v0 +; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX9-NEXT: v_subrev_u32_e32 v4, s5, v3 @@ -2668,20 +2660,19 @@ define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a ; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: s_xor_b32 s4, s6, s9 +; GFX9-NEXT: s_xor_b32 s4, s11, s9 ; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 ; GFX9-NEXT: v_subrev_u32_e32 v4, s5, v3 ; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1 ; GFX9-NEXT: s_movk_i32 s4, 0xff -; GFX9-NEXT: v_xor_b32_e32 v0, s7, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX9-NEXT: v_xor_b32_e32 v3, s6, v3 -; GFX9-NEXT: v_subrev_u32_e32 v0, s7, v0 +; GFX9-NEXT: v_xor_b32_e32 v3, s11, v3 +; GFX9-NEXT: v_subrev_u32_e32 v0, s6, v0 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_subrev_u32_e32 v3, s6, v3 +; GFX9-NEXT: v_subrev_u32_e32 v3, s11, v3 ; GFX9-NEXT: v_xor_b32_e32 v2, s10, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_short v1, v0, s[0:1] @@ -2696,10 +2687,8 @@ define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s1, s0, 24 -; GFX10-NEXT: s_lshr_b32 s2, s0, 16 -; GFX10-NEXT: s_sext_i32_i8 s1, s1 -; GFX10-NEXT: s_sext_i32_i8 s2, s2 +; GFX10-NEXT: s_bfe_i32 s1, s0, 0x80018 +; GFX10-NEXT: s_bfe_i32 s2, s0, 0x80010 ; GFX10-NEXT: s_ashr_i32 s3, s1, 31 ; GFX10-NEXT: s_ashr_i32 s8, s2, 31 ; GFX10-NEXT: s_add_i32 s1, s1, s3 @@ -2719,16 +2708,15 @@ define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a ; GFX10-NEXT: v_mul_lo_u32 v2, s6, v0 ; GFX10-NEXT: v_mul_lo_u32 v3, s7, v1 ; GFX10-NEXT: s_sext_i32_i8 s6, s0 -; GFX10-NEXT: s_lshr_b32 s0, s0, 8 +; GFX10-NEXT: s_bfe_i32 s0, s0, 0x80008 ; GFX10-NEXT: s_ashr_i32 s9, s6, 31 -; GFX10-NEXT: s_sext_i32_i8 s0, s0 -; GFX10-NEXT: s_add_i32 s6, s6, s9 ; GFX10-NEXT: s_ashr_i32 s10, s0, 31 +; GFX10-NEXT: s_add_i32 s6, s6, s9 +; GFX10-NEXT: s_add_i32 s0, s0, s10 ; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX10-NEXT: s_add_i32 s0, s0, s10 -; GFX10-NEXT: s_xor_b32 s6, s6, s9 ; GFX10-NEXT: s_xor_b32 s0, s0, s10 +; GFX10-NEXT: s_xor_b32 s6, s6, s9 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 ; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 @@ -2790,8 +2778,7 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* % ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 16 -; GFX8-NEXT: s_sext_i32_i16 s1, s1 +; GFX8-NEXT: s_bfe_i32 s1, s0, 0x100010 ; GFX8-NEXT: s_ashr_i32 s6, s1, 31 ; GFX8-NEXT: s_add_i32 s1, s1, s6 ; GFX8-NEXT: s_xor_b32 s7, s1, s6 @@ -2840,8 +2827,7 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* % ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s1, s0, 16 -; GFX9-NEXT: s_sext_i32_i16 s1, s1 +; GFX9-NEXT: s_bfe_i32 s1, s0, 0x100010 ; GFX9-NEXT: s_ashr_i32 s6, s1, 31 ; GFX9-NEXT: s_add_i32 s1, s1, s6 ; GFX9-NEXT: s_xor_b32 s7, s1, s6 @@ -2885,15 +2871,14 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* % ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s1, s0, 16 +; GFX10-NEXT: s_bfe_i32 s1, s0, 0x100010 ; GFX10-NEXT: s_sext_i32_i16 s0, s0 -; GFX10-NEXT: s_sext_i32_i16 s1, s1 -; GFX10-NEXT: s_ashr_i32 s8, s0, 31 ; GFX10-NEXT: s_ashr_i32 s6, s1, 31 -; GFX10-NEXT: s_add_i32 s0, s0, s8 +; GFX10-NEXT: s_ashr_i32 s8, s0, 31 ; GFX10-NEXT: s_add_i32 s1, s1, s6 -; GFX10-NEXT: s_xor_b32 s0, s0, s8 +; GFX10-NEXT: s_add_i32 s0, s0, s8 ; GFX10-NEXT: s_xor_b32 s7, s1, s6 +; GFX10-NEXT: s_xor_b32 s0, s0, s8 ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 ; GFX10-NEXT: s_sub_i32 s1, 0, s7 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 @@ -2937,40 +2922,38 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16 ; GFX8-LABEL: sdivrem_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x14 +; GFX8-NEXT: s_load_dword s8, s[4:5], 0x10 +; GFX8-NEXT: s_mov_b32 s9, 0x100010 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_sext_i32_i16 s1, s0 ; GFX8-NEXT: s_ashr_i32 s2, s1, 31 ; GFX8-NEXT: s_add_i32 s1, s1, s2 ; GFX8-NEXT: s_xor_b32 s3, s1, s2 ; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s3 -; GFX8-NEXT: s_sub_i32 s7, 0, s3 -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x10 -; GFX8-NEXT: s_lshr_b32 s0, s0, 16 +; GFX8-NEXT: s_sub_i32 s6, 0, s3 +; GFX8-NEXT: s_sext_i32_i16 s1, s8 +; GFX8-NEXT: s_bfe_i32 s0, s0, s9 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 -; GFX8-NEXT: s_ashr_i32 s9, s0, 31 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_sext_i32_i16 s6, s1 +; GFX8-NEXT: s_ashr_i32 s10, s1, 31 +; GFX8-NEXT: s_ashr_i32 s11, s0, 31 +; GFX8-NEXT: s_add_i32 s1, s1, s10 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: s_ashr_i32 s8, s6, 31 -; GFX8-NEXT: s_add_i32 s6, s6, s8 -; GFX8-NEXT: s_add_i32 s0, s0, s9 -; GFX8-NEXT: v_mul_lo_u32 v1, s7, v0 -; GFX8-NEXT: s_xor_b32 s10, s6, s8 -; GFX8-NEXT: s_xor_b32 s11, s0, s9 -; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s11 -; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX8-NEXT: s_lshr_b32 s12, s1, 16 +; GFX8-NEXT: s_add_i32 s0, s0, s11 +; GFX8-NEXT: s_xor_b32 s12, s0, s11 +; GFX8-NEXT: s_xor_b32 s1, s1, s10 +; GFX8-NEXT: v_mul_lo_u32 v1, s6, v0 +; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s12 ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 -; GFX8-NEXT: v_mul_hi_u32 v0, s10, v0 +; GFX8-NEXT: v_mul_hi_u32 v0, s1, v0 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v2 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, s3 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s10, v2 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s1, v2 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s3, v2 @@ -2979,11 +2962,11 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s3, v2 -; GFX8-NEXT: s_sub_i32 s1, 0, s11 +; GFX8-NEXT: s_sub_i32 s1, 0, s12 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc ; GFX8-NEXT: v_mul_lo_u32 v3, s1, v1 -; GFX8-NEXT: s_sext_i32_i16 s1, s12 -; GFX8-NEXT: s_xor_b32 s0, s8, s2 +; GFX8-NEXT: s_bfe_i32 s1, s8, s9 +; GFX8-NEXT: s_xor_b32 s0, s10, s2 ; GFX8-NEXT: s_ashr_i32 s2, s1, 31 ; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: s_add_i32 s1, s1, s2 @@ -2991,21 +2974,21 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16 ; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 -; GFX8-NEXT: v_xor_b32_e32 v2, s8, v2 +; GFX8-NEXT: v_xor_b32_e32 v2, s10, v2 ; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s8, v2 -; GFX8-NEXT: v_mul_lo_u32 v3, v1, s11 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s10, v2 +; GFX8-NEXT: v_mul_lo_u32 v3, v1, s12 ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s12, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s11, v3 +; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s12, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s12, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s11, v3 -; GFX8-NEXT: s_xor_b32 s0, s2, s9 +; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s12, v3 +; GFX8-NEXT: s_xor_b32 s0, s2, s11 ; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 @@ -3038,73 +3021,72 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16 ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s8 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: s_load_dword s9, s[4:5], 0x10 -; GFX9-NEXT: s_lshr_b32 s4, s6, 16 -; GFX9-NEXT: s_sext_i32_i16 s4, s4 +; GFX9-NEXT: s_mov_b32 s4, 0x100010 +; GFX9-NEXT: s_bfe_i32 s6, s6, s4 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_i32 s10, 0, s8 -; GFX9-NEXT: s_ashr_i32 s6, s4, 31 -; GFX9-NEXT: s_add_i32 s4, s4, s6 +; GFX9-NEXT: s_ashr_i32 s10, s6, 31 +; GFX9-NEXT: s_add_i32 s6, s6, s10 +; GFX9-NEXT: s_xor_b32 s6, s6, s10 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: s_xor_b32 s4, s4, s6 -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s4 +; GFX9-NEXT: s_sub_i32 s11, 0, s8 +; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s6 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_sext_i32_i16 s5, s9 -; GFX9-NEXT: v_mul_lo_u32 v1, s10, v0 -; GFX9-NEXT: s_ashr_i32 s10, s5, 31 -; GFX9-NEXT: s_add_i32 s5, s5, s10 +; GFX9-NEXT: v_mul_lo_u32 v1, s11, v0 +; GFX9-NEXT: s_ashr_i32 s11, s5, 31 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX9-NEXT: s_add_i32 s5, s5, s11 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX9-NEXT: s_xor_b32 s5, s5, s10 -; GFX9-NEXT: s_sub_i32 s11, 0, s4 -; GFX9-NEXT: s_lshr_b32 s9, s9, 16 +; GFX9-NEXT: s_xor_b32 s5, s5, s11 +; GFX9-NEXT: s_bfe_i32 s4, s9, s4 +; GFX9-NEXT: s_sub_i32 s9, 0, s6 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 -; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: s_xor_b32 s7, s10, s7 +; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 +; GFX9-NEXT: s_xor_b32 s7, s11, s7 +; GFX9-NEXT: v_mul_lo_u32 v3, s9, v1 ; GFX9-NEXT: v_mul_lo_u32 v2, v0, s8 ; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, s11, v1 +; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX9-NEXT: v_sub_u32_e32 v2, s5, v2 +; GFX9-NEXT: s_ashr_i32 s5, s4, 31 +; GFX9-NEXT: s_add_i32 s4, s4, s5 +; GFX9-NEXT: s_xor_b32 s4, s4, s5 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v2 +; GFX9-NEXT: v_mul_lo_u32 v3, v1, s6 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 -; GFX9-NEXT: s_sext_i32_i16 s5, s9 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v2 -; GFX9-NEXT: s_ashr_i32 s8, s5, 31 -; GFX9-NEXT: s_add_i32 s5, s5, s8 -; GFX9-NEXT: s_xor_b32 s5, s5, s8 -; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 +; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX9-NEXT: v_xor_b32_e32 v0, s7, v0 -; GFX9-NEXT: v_xor_b32_e32 v2, s10, v2 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, s4 ; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 -; GFX9-NEXT: v_subrev_u32_e32 v0, s7, v0 -; GFX9-NEXT: v_subrev_u32_e32 v2, s10, v2 -; GFX9-NEXT: v_sub_u32_e32 v3, s5, v3 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s4, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_subrev_u32_e32 v4, s4, v3 +; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s4, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_subrev_u32_e32 v4, s4, v3 +; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX9-NEXT: s_xor_b32 s4, s8, s6 +; GFX9-NEXT: s_xor_b32 s4, s5, s10 +; GFX9-NEXT: v_xor_b32_e32 v0, s7, v0 +; GFX9-NEXT: v_xor_b32_e32 v2, s11, v2 ; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 -; GFX9-NEXT: v_xor_b32_e32 v3, s8, v3 +; GFX9-NEXT: v_xor_b32_e32 v3, s5, v3 +; GFX9-NEXT: v_subrev_u32_e32 v0, s7, v0 +; GFX9-NEXT: v_subrev_u32_e32 v2, s11, v2 ; GFX9-NEXT: v_sub_u32_sdwa v1, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX9-NEXT: v_sub_u32_sdwa v3, v3, s8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-NEXT: v_sub_u32_sdwa v3, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v1 ; GFX9-NEXT: v_and_or_b32 v1, v2, v4, v3 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -3115,21 +3097,21 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16 ; GFX10-LABEL: sdivrem_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x14 +; GFX10-NEXT: s_mov_b32 s1, 0x100010 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s1, s0, 16 -; GFX10-NEXT: s_sext_i32_i16 s0, s0 -; GFX10-NEXT: s_sext_i32_i16 s1, s1 -; GFX10-NEXT: s_ashr_i32 s2, s0, 31 -; GFX10-NEXT: s_ashr_i32 s3, s1, 31 -; GFX10-NEXT: s_add_i32 s0, s0, s2 -; GFX10-NEXT: s_add_i32 s1, s1, s3 -; GFX10-NEXT: s_xor_b32 s8, s0, s2 -; GFX10-NEXT: s_xor_b32 s1, s1, s3 -; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s8 -; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s1 +; GFX10-NEXT: s_sext_i32_i16 s2, s0 +; GFX10-NEXT: s_bfe_i32 s0, s0, s1 +; GFX10-NEXT: s_ashr_i32 s3, s2, 31 +; GFX10-NEXT: s_ashr_i32 s8, s0, 31 +; GFX10-NEXT: s_add_i32 s2, s2, s3 +; GFX10-NEXT: s_add_i32 s0, s0, s8 +; GFX10-NEXT: s_xor_b32 s2, s2, s3 +; GFX10-NEXT: s_xor_b32 s9, s0, s8 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s9 ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 -; GFX10-NEXT: s_sub_i32 s6, 0, s8 -; GFX10-NEXT: s_sub_i32 s7, 0, s1 +; GFX10-NEXT: s_sub_i32 s6, 0, s2 +; GFX10-NEXT: s_sub_i32 s7, 0, s9 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 @@ -3140,56 +3122,55 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16 ; GFX10-NEXT: v_mul_lo_u32 v3, s7, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_sext_i32_i16 s6, s0 -; GFX10-NEXT: s_lshr_b32 s0, s0, 16 -; GFX10-NEXT: s_ashr_i32 s9, s6, 31 -; GFX10-NEXT: s_sext_i32_i16 s0, s0 -; GFX10-NEXT: s_add_i32 s6, s6, s9 +; GFX10-NEXT: s_bfe_i32 s0, s0, s1 +; GFX10-NEXT: s_ashr_i32 s1, s6, 31 ; GFX10-NEXT: s_ashr_i32 s10, s0, 31 +; GFX10-NEXT: s_add_i32 s6, s6, s1 +; GFX10-NEXT: s_add_i32 s0, s0, s10 ; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX10-NEXT: s_add_i32 s0, s0, s10 -; GFX10-NEXT: s_xor_b32 s6, s6, s9 +; GFX10-NEXT: s_xor_b32 s6, s6, s1 ; GFX10-NEXT: s_xor_b32 s0, s0, s10 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 ; GFX10-NEXT: v_mul_hi_u32 v0, s6, v0 ; GFX10-NEXT: v_mul_hi_u32 v1, s0, v1 -; GFX10-NEXT: v_mul_lo_u32 v2, v0, s8 -; GFX10-NEXT: v_mul_lo_u32 v3, v1, s1 +; GFX10-NEXT: v_mul_lo_u32 v2, v0, s2 +; GFX10-NEXT: v_mul_lo_u32 v3, v1, s9 ; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, s6, v2 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, s0, v3 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s8, v2 -; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s8, v2 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s1, v3 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v2 +; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s2, v2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s9, v3 +; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s9, v3 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo ; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s8, v2 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s1, v3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s8, v2 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s9, v3 +; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s2, v2 +; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s9, v3 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 -; GFX10-NEXT: s_xor_b32 s1, s9, s2 +; GFX10-NEXT: s_xor_b32 s2, s1, s3 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 -; GFX10-NEXT: s_xor_b32 s0, s10, s3 -; GFX10-NEXT: v_xor_b32_e32 v0, s1, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo +; GFX10-NEXT: s_xor_b32 s0, s10, s8 +; GFX10-NEXT: v_xor_b32_e32 v0, s2, v0 ; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1 +; GFX10-NEXT: v_xor_b32_e32 v2, s1, v2 ; GFX10-NEXT: v_xor_b32_e32 v3, s10, v3 -; GFX10-NEXT: v_xor_b32_e32 v2, s9, v2 ; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s1, v0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s2, v0 ; GFX10-NEXT: v_sub_nc_u32_sdwa v1, v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s1, v2 ; GFX10-NEXT: v_sub_nc_u32_sdwa v3, v3, s10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s9, v2 ; GFX10-NEXT: v_and_or_b32 v0, v0, v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_and_or_b32 v2, v2, v4, v3 @@ -3209,8 +3190,7 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 8 -; GFX8-NEXT: s_bfe_i32 s1, s1, 0x30000 +; GFX8-NEXT: s_bfe_i32 s1, s0, 0x30008 ; GFX8-NEXT: s_ashr_i32 s6, s1, 31 ; GFX8-NEXT: s_add_i32 s1, s1, s6 ; GFX8-NEXT: s_xor_b32 s7, s1, s6 @@ -3259,8 +3239,7 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s1, s0, 8 -; GFX9-NEXT: s_bfe_i32 s1, s1, 0x30000 +; GFX9-NEXT: s_bfe_i32 s1, s0, 0x30008 ; GFX9-NEXT: s_ashr_i32 s6, s1, 31 ; GFX9-NEXT: s_add_i32 s1, s1, s6 ; GFX9-NEXT: s_xor_b32 s7, s1, s6 @@ -3304,15 +3283,14 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s1, s0, 8 +; GFX10-NEXT: s_bfe_i32 s1, s0, 0x30008 ; GFX10-NEXT: s_bfe_i32 s0, s0, 0x30000 -; GFX10-NEXT: s_bfe_i32 s1, s1, 0x30000 -; GFX10-NEXT: s_ashr_i32 s8, s0, 31 ; GFX10-NEXT: s_ashr_i32 s6, s1, 31 -; GFX10-NEXT: s_add_i32 s0, s0, s8 +; GFX10-NEXT: s_ashr_i32 s8, s0, 31 ; GFX10-NEXT: s_add_i32 s1, s1, s6 -; GFX10-NEXT: s_xor_b32 s0, s0, s8 +; GFX10-NEXT: s_add_i32 s0, s0, s8 ; GFX10-NEXT: s_xor_b32 s7, s1, s6 +; GFX10-NEXT: s_xor_b32 s0, s0, s8 ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 ; GFX10-NEXT: s_sub_i32 s1, 0, s7 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index 26ed8af407773..bce68f90bc1d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -604,33 +604,34 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, 8 ; GFX9-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0xffff ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX9-NEXT: v_and_or_b32 v0, v0, v8, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_and_or_b32 v2, v3, v8, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 ; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] -; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] -; GFX9-NEXT: s_movk_i32 s4, 0xff ; GFX9-NEXT: v_pk_sub_i16 v1, v2, v3 clamp -; GFX9-NEXT: v_and_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_mov_b32_e32 v2, 8 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] +; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v2 ; GFX9-NEXT: v_and_b32_e32 v2, s4, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -638,33 +639,34 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_and_or_b32 v0, v0, v5, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v6 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v7 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v3, v3, v5, v4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_and_or_b32 v2, v8, v5, v2 +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_sub_i16 v1, v2, v3 clamp +; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_sub_i16 v1, v3, v1 clamp -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_b32_e32 v3, s4, v1 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 @@ -831,46 +833,47 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; ; GFX9-LABEL: s_ssubsat_v4i8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s4, s0, 24 -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4 +; GFX9-NEXT: s_lshr_b32 s3, s0, 8 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16 -; GFX9-NEXT: s_mov_b32 s3, 0x80008 -; GFX9-NEXT: s_lshr_b32 s5, s1, 8 -; GFX9-NEXT: s_lshl_b32 s0, s0, s3 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 -; GFX9-NEXT: s_lshr_b32 s4, s2, 16 -; GFX9-NEXT: s_lshr_b32 s6, s1, 16 -; GFX9-NEXT: s_lshr_b32 s7, s1, 24 -; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX9-NEXT: s_lshr_b32 s5, s1, 16 -; GFX9-NEXT: s_lshl_b32 s2, s2, s3 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 -; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s7 -; GFX9-NEXT: s_lshl_b32 s1, s1, s3 -; GFX9-NEXT: s_lshl_b32 s5, s5, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX9-NEXT: s_lshr_b32 s5, s4, 16 +; GFX9-NEXT: s_lshr_b32 s6, s0, 24 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 +; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s6 +; GFX9-NEXT: s_lshr_b32 s6, s0, 16 +; GFX9-NEXT: s_mov_b32 s4, 0x80008 +; GFX9-NEXT: s_lshr_b32 s7, s1, 8 +; GFX9-NEXT: s_lshl_b32 s0, s0, s4 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6 +; GFX9-NEXT: s_lshr_b32 s6, s3, 16 +; GFX9-NEXT: s_lshr_b32 s8, s1, 16 +; GFX9-NEXT: s_lshr_b32 s9, s1, 24 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s7, s1, 16 +; GFX9-NEXT: s_lshl_b32 s3, s3, s4 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX9-NEXT: s_pack_ll_b32_b16 s6, s8, s9 +; GFX9-NEXT: s_lshl_b32 s1, s1, s4 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s7, s6, 16 +; GFX9-NEXT: s_lshl_b32 s4, s6, s4 +; GFX9-NEXT: s_lshl_b32 s6, s7, 8 ; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: s_lshl_b32 s3, s4, s3 -; GFX9-NEXT: s_lshl_b32 s4, s5, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s6 ; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp -; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_pk_sub_i16 v1, s3, v1 clamp +; GFX9-NEXT: s_mov_b32 s2, 8 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] -; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_pk_sub_i16 v1, s2, v1 clamp -; GFX9-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] +; GFX9-NEXT: s_movk_i32 s0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v0, v0, s0, v2 ; GFX9-NEXT: v_and_b32_e32 v2, s0, v1 +; GFX9-NEXT: s_mov_b32 s5, 24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -878,42 +881,43 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; GFX10-LABEL: s_ssubsat_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s5, s1, 8 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16 ; GFX10-NEXT: s_lshr_b32 s4, s0, 24 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 +; GFX10-NEXT: s_lshr_b32 s4, s0, 16 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 +; GFX10-NEXT: s_lshr_b32 s5, s1, 8 ; GFX10-NEXT: s_lshr_b32 s6, s1, 16 ; GFX10-NEXT: s_lshr_b32 s7, s1, 24 +; GFX10-NEXT: s_lshl_b32 s0, s0, s3 +; GFX10-NEXT: s_lshl_b32 s4, s4, 8 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX10-NEXT: s_lshr_b32 s8, s0, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX10-NEXT: s_pack_ll_b32_b16 s4, s6, s7 +; GFX10-NEXT: s_lshr_b32 s8, s2, 16 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16 -; GFX10-NEXT: s_mov_b32 s2, 0x80008 +; GFX10-NEXT: s_lshr_b32 s6, s4, 16 +; GFX10-NEXT: s_lshl_b32 s2, s2, s3 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshl_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s1, s1, s2 +; GFX10-NEXT: s_lshl_b32 s1, s1, s3 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 -; GFX10-NEXT: s_pack_ll_b32_b16 s6, s6, s7 +; GFX10-NEXT: s_lshl_b32 s3, s4, s3 +; GFX10-NEXT: s_lshl_b32 s4, s6, 8 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8 -; GFX10-NEXT: s_lshr_b32 s4, s3, 16 -; GFX10-NEXT: s_lshr_b32 s5, s6, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s8 +; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 ; GFX10-NEXT: v_pk_sub_i16 v0, s0, s1 clamp -; GFX10-NEXT: s_lshl_b32 s3, s3, s2 -; GFX10-NEXT: s_lshl_b32 s4, s4, 8 -; GFX10-NEXT: s_lshl_b32 s0, s6, s2 -; GFX10-NEXT: s_lshl_b32 s1, s5, 8 -; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 -; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX10-NEXT: v_pk_sub_i16 v1, s2, s3 clamp +; GFX10-NEXT: s_mov_b32 s0, 8 +; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_sub_i16 v1, s2, s0 clamp -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX10-NEXT: v_and_b32_e32 v3, s0, v1 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_b32_e32 v3, s1, v1 +; GFX10-NEXT: s_mov_b32 s0, 24 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll index edde6f01f8c16..53a6250892432 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -433,33 +433,34 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, 8 ; GFX9-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0xffff ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX9-NEXT: v_and_or_b32 v0, v0, v8, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_and_or_b32 v2, v3, v8, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 clamp ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] -; GFX9-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] -; GFX9-NEXT: s_movk_i32 s4, 0xff ; GFX9-NEXT: v_pk_add_u16 v1, v2, v3 clamp -; GFX9-NEXT: v_and_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX9-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_mov_b32_e32 v2, 8 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] +; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v2 ; GFX9-NEXT: v_and_b32_e32 v2, s4, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -467,33 +468,34 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_and_or_b32 v0, v0, v5, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v6 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v7 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v3, v3, v5, v4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_and_or_b32 v2, v8, v5, v2 +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_add_u16 v1, v2, v3 clamp +; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_add_u16 v1, v3, v1 clamp -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_b32_e32 v3, s4, v1 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 @@ -585,46 +587,47 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; ; GFX9-LABEL: s_uaddsat_v4i8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s4, s0, 24 -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4 +; GFX9-NEXT: s_lshr_b32 s3, s0, 8 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16 -; GFX9-NEXT: s_mov_b32 s3, 0x80008 -; GFX9-NEXT: s_lshr_b32 s5, s1, 8 -; GFX9-NEXT: s_lshl_b32 s0, s0, s3 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 -; GFX9-NEXT: s_lshr_b32 s4, s2, 16 -; GFX9-NEXT: s_lshr_b32 s6, s1, 16 -; GFX9-NEXT: s_lshr_b32 s7, s1, 24 -; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX9-NEXT: s_lshr_b32 s5, s1, 16 -; GFX9-NEXT: s_lshl_b32 s2, s2, s3 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 -; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s7 -; GFX9-NEXT: s_lshl_b32 s1, s1, s3 -; GFX9-NEXT: s_lshl_b32 s5, s5, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX9-NEXT: s_lshr_b32 s5, s4, 16 +; GFX9-NEXT: s_lshr_b32 s6, s0, 24 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 +; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s6 +; GFX9-NEXT: s_lshr_b32 s6, s0, 16 +; GFX9-NEXT: s_mov_b32 s4, 0x80008 +; GFX9-NEXT: s_lshr_b32 s7, s1, 8 +; GFX9-NEXT: s_lshl_b32 s0, s0, s4 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6 +; GFX9-NEXT: s_lshr_b32 s6, s3, 16 +; GFX9-NEXT: s_lshr_b32 s8, s1, 16 +; GFX9-NEXT: s_lshr_b32 s9, s1, 24 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s7, s1, 16 +; GFX9-NEXT: s_lshl_b32 s3, s3, s4 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX9-NEXT: s_pack_ll_b32_b16 s6, s8, s9 +; GFX9-NEXT: s_lshl_b32 s1, s1, s4 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s7, s6, 16 +; GFX9-NEXT: s_lshl_b32 s4, s6, s4 +; GFX9-NEXT: s_lshl_b32 s6, s7, 8 ; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: s_lshl_b32 s3, s4, s3 -; GFX9-NEXT: s_lshl_b32 s4, s5, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s6 ; GFX9-NEXT: v_pk_add_u16 v0, s0, v0 clamp -; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_pk_add_u16 v1, s3, v1 clamp +; GFX9-NEXT: s_mov_b32 s2, 8 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] -; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_pk_add_u16 v1, s2, v1 clamp -; GFX9-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] +; GFX9-NEXT: s_movk_i32 s0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v0, v0, s0, v2 ; GFX9-NEXT: v_and_b32_e32 v2, s0, v1 +; GFX9-NEXT: s_mov_b32 s5, 24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -632,42 +635,43 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; GFX10-LABEL: s_uaddsat_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s5, s1, 8 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16 ; GFX10-NEXT: s_lshr_b32 s4, s0, 24 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 +; GFX10-NEXT: s_lshr_b32 s4, s0, 16 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 +; GFX10-NEXT: s_lshr_b32 s5, s1, 8 ; GFX10-NEXT: s_lshr_b32 s6, s1, 16 ; GFX10-NEXT: s_lshr_b32 s7, s1, 24 +; GFX10-NEXT: s_lshl_b32 s0, s0, s3 +; GFX10-NEXT: s_lshl_b32 s4, s4, 8 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX10-NEXT: s_lshr_b32 s8, s0, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX10-NEXT: s_pack_ll_b32_b16 s4, s6, s7 +; GFX10-NEXT: s_lshr_b32 s8, s2, 16 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16 -; GFX10-NEXT: s_mov_b32 s2, 0x80008 +; GFX10-NEXT: s_lshr_b32 s6, s4, 16 +; GFX10-NEXT: s_lshl_b32 s2, s2, s3 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshl_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s1, s1, s2 +; GFX10-NEXT: s_lshl_b32 s1, s1, s3 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 -; GFX10-NEXT: s_pack_ll_b32_b16 s6, s6, s7 +; GFX10-NEXT: s_lshl_b32 s3, s4, s3 +; GFX10-NEXT: s_lshl_b32 s4, s6, 8 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8 -; GFX10-NEXT: s_lshr_b32 s4, s3, 16 -; GFX10-NEXT: s_lshr_b32 s5, s6, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s8 +; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 ; GFX10-NEXT: v_pk_add_u16 v0, s0, s1 clamp -; GFX10-NEXT: s_lshl_b32 s3, s3, s2 -; GFX10-NEXT: s_lshl_b32 s4, s4, 8 -; GFX10-NEXT: s_lshl_b32 s0, s6, s2 -; GFX10-NEXT: s_lshl_b32 s1, s5, 8 -; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 -; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX10-NEXT: v_pk_add_u16 v1, s2, s3 clamp +; GFX10-NEXT: s_mov_b32 s0, 8 +; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_add_u16 v1, s2, s0 clamp -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX10-NEXT: v_and_b32_e32 v3, s0, v1 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_b32_e32 v3, s1, v1 +; GFX10-NEXT: s_mov_b32 s0, 24 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll new file mode 100644 index 0000000000000..d4657dfc2b506 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ubfx.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefix=GFX10 %s + +; Test vector bitfield extract. +define i32 @v_srl_mask_i32(i32 %value) { +; GCN-LABEL: v_srl_mask_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_bfe_u32 v0, v0, 8, 5 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_srl_mask_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 5 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = lshr i32 %value, 8 + %2 = and i32 %1, 31 + ret i32 %2 +} + +; Test scalar bitfield extract. +define amdgpu_ps i32 @s_srl_mask_i32(i32 inreg %value) { +; GCN-LABEL: s_srl_mask_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_u32 s0, s0, 0x50008 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_srl_mask_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_u32 s0, s0, 0x50008 +; GFX10-NEXT: ; return to shader part epilog + %1 = lshr i32 %value, 8 + %2 = and i32 %1, 31 + ret i32 %2 +} + +; Don't generate G_UBFX if the offset + width is too big. +define amdgpu_ps i32 @s_srl_big_mask_i32(i32 inreg %value) { +; GCN-LABEL: s_srl_big_mask_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_lshr_b32 s0, s0, 30 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_srl_big_mask_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshr_b32 s0, s0, 30 +; GFX10-NEXT: ; return to shader part epilog + %1 = lshr i32 %value, 30 + %2 = and i32 %1, 31 + ret i32 %2 +} + +; Test vector bitfield extract for 64-bits. +define i64 @v_srl_mask_i64(i64 %value) { +; GCN-LABEL: v_srl_mask_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_bfe_u32 v0, v0, 0, 10 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_srl_mask_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1] +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10 +; GFX10-NEXT: s_setpc_b64 s[30:31] + %1 = lshr i64 %value, 25 + %2 = and i64 %1, 1023 + ret i64 %2 +} + +; Test scalar bitfield extract for 64-bits. +define amdgpu_ps i64 @s_srl_mask_i64(i64 inreg %value) { +; GCN-LABEL: s_srl_mask_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0xa0019 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_srl_mask_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_bfe_u64 s[0:1], s[0:1], 0xa0019 +; GFX10-NEXT: ; return to shader part epilog + %1 = lshr i64 %value, 25 + %2 = and i64 %1, 1023 + ret i64 %2 +} + +; Don't generate G_UBFX if the offset + width is too big. +define amdgpu_ps i64 @s_srl_big_mask_i64(i64 inreg %value) { +; GCN-LABEL: s_srl_big_mask_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_lshr_b32 s0, s1, 28 +; GCN-NEXT: s_mov_b32 s1, 0 +; GCN-NEXT: ; return to shader part epilog +; +; GFX10-LABEL: s_srl_big_mask_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_lshr_b32 s0, s1, 28 +; GFX10-NEXT: s_mov_b32 s1, 0 +; GFX10-NEXT: ; return to shader part epilog + %1 = lshr i64 %value, 60 + %2 = and i64 %1, 63 + ret i64 %2 +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll index 208cb92876110..89c4569aaa291 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -1896,18 +1896,16 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out1, i8 %x, i8 %y) { ; GFX8-LABEL: udiv_i8: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x10 -; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s2, s1, 8 -; GFX8-NEXT: s_and_b32 s6, s2, s0 +; GFX8-NEXT: s_bfe_u32 s6, s0, 0x80008 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, s6 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_sub_i32 s2, 0, s6 -; GFX8-NEXT: s_and_b32 s7, s1, s0 +; GFX8-NEXT: s_sub_i32 s1, 0, s6 +; GFX8-NEXT: s_and_b32 s7, s0, 0xff ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, s2, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 @@ -1935,19 +1933,17 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out ; ; GFX9-LABEL: udiv_i8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s1, s[4:5], 0x10 -; GFX9-NEXT: s_movk_i32 s0, 0xff +; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s2, s1, 8 -; GFX9-NEXT: s_and_b32 s6, s2, s0 +; GFX9-NEXT: s_bfe_u32 s6, s0, 0x80008 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, s6 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_sub_i32 s2, 0, s6 -; GFX9-NEXT: s_and_b32 s7, s1, s0 +; GFX9-NEXT: s_sub_i32 s1, 0, s6 +; GFX9-NEXT: s_and_b32 s7, s0, 0xff ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_lo_u32 v1, s2, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 @@ -1972,17 +1968,15 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out ; GFX10-LABEL: udiv_i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 -; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_and_b32 s0, s0, s1 -; GFX10-NEXT: s_and_b32 s6, s2, s1 +; GFX10-NEXT: s_bfe_u32 s6, s0, 0x80008 +; GFX10-NEXT: s_and_b32 s0, s0, 0xff ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, s6 -; GFX10-NEXT: s_sub_i32 s2, 0, s6 +; GFX10-NEXT: s_sub_i32 s1, 0, s6 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 ; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 @@ -2014,106 +2008,113 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out define amdgpu_kernel void @udivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> addrspace(1)* %out1, <2 x i8> %x, <2 x i8> %y) { ; GFX8-LABEL: udivrem_v2i8: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 -; GFX8-NEXT: s_movk_i32 s2, 0xff -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 16 -; GFX8-NEXT: s_and_b32 s3, s1, s2 -; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, s3 +; GFX8-NEXT: s_bfe_u32 s2, s0, 0x80010 +; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, s2 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: v_cvt_f32_ubyte3_e32 v1, s0 -; GFX8-NEXT: s_sub_i32 s1, 0, s3 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; GFX8-NEXT: s_sub_i32 s1, 0, s2 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: s_lshr_b32 s8, s0, 24 ; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 +; GFX8-NEXT: s_lshr_b32 s3, s0, 24 ; GFX8-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX8-NEXT: s_and_b32 s1, s0, s2 -; GFX8-NEXT: s_sub_i32 s9, 0, s8 -; GFX8-NEXT: v_mul_lo_u32 v3, s9, v1 +; GFX8-NEXT: s_sub_i32 s1, 0, s3 +; GFX8-NEXT: v_mul_lo_u32 v3, s1, v1 +; GFX8-NEXT: s_and_b32 s1, s0, 0xff ; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX8-NEXT: s_lshr_b32 s0, s0, 8 -; GFX8-NEXT: s_and_b32 s9, s0, s2 +; GFX8-NEXT: s_bfe_u32 s4, s0, 0x80008 +; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 ; GFX8-NEXT: v_mul_hi_u32 v0, s1, v0 -; GFX8-NEXT: v_mul_hi_u32 v2, v1, v3 -; GFX8-NEXT: v_mul_lo_u32 v3, v0, s3 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v0 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s3, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 -; GFX8-NEXT: v_mul_hi_u32 v1, s9, v1 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 +; GFX8-NEXT: v_mul_hi_u32 v1, s4, v1 +; GFX8-NEXT: v_mul_lo_u32 v2, v0, s2 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 +; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s1, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s2, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s2, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s2, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: v_mul_lo_u32 v3, v1, s3 +; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s2, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s4, v3 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX8-NEXT: v_mul_lo_u32 v2, v1, s8 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s3, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s9, v2 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s8, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX8-NEXT: v_and_b32_e32 v1, s2, v1 -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s8, v2 +; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s3, v3 +; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: v_and_b32_e32 v1, s0, v1 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX8-NEXT: v_or_b32_sdwa v4, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: v_mov_b32_e32 v0, s8 +; GFX8-NEXT: v_mov_b32_e32 v1, s9 ; GFX8-NEXT: flat_store_short v[0:1], v4 -; GFX8-NEXT: v_and_b32_e32 v0, s2, v2 +; GFX8-NEXT: v_and_b32_e32 v0, s0, v3 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX8-NEXT: v_or_b32_sdwa v2, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: v_mov_b32_e32 v1, s7 +; GFX8-NEXT: v_or_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v0, s10 +; GFX8-NEXT: v_mov_b32_e32 v1, s11 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: udivrem_v2i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 -; GFX9-NEXT: s_movk_i32 s6, 0xff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s1, s0, 16 -; GFX9-NEXT: s_and_b32 s7, s1, s6 -; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, s7 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f32_ubyte3_e32 v1, s0 +; GFX9-NEXT: s_bfe_u32 s6, s0, 0x80010 +; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v1, s6 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; GFX9-NEXT: s_sub_i32 s1, 0, s7 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_cvt_f32_ubyte3_e32 v0, s0 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: s_sub_i32 s1, 0, s6 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX9-NEXT: s_lshr_b32 s8, s0, 24 -; GFX9-NEXT: v_mul_lo_u32 v2, s1, v0 -; GFX9-NEXT: s_sub_i32 s1, 0, s8 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: s_lshr_b32 s7, s0, 24 ; GFX9-NEXT: v_mul_lo_u32 v3, s1, v1 -; GFX9-NEXT: s_and_b32 s9, s0, s6 -; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 -; GFX9-NEXT: s_lshr_b32 s1, s0, 8 +; GFX9-NEXT: s_sub_i32 s2, 0, s7 +; GFX9-NEXT: v_mul_lo_u32 v2, s2, v0 +; GFX9-NEXT: s_and_b32 s8, s0, 0xff ; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 -; GFX9-NEXT: s_and_b32 s10, s1, s6 +; GFX9-NEXT: s_bfe_u32 s9, s0, 0x80008 +; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 +; GFX9-NEXT: v_mul_hi_u32 v1, s8, v1 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 ; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 -; GFX9-NEXT: v_mul_hi_u32 v1, s10, v1 -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: v_mul_lo_u32 v3, v1, s6 +; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 ; GFX9-NEXT: v_mul_lo_u32 v2, v0, s7 -; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, v1, s8 +; GFX9-NEXT: v_sub_u32_e32 v3, s8, v3 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 ; GFX9-NEXT: v_sub_u32_e32 v2, s9, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GFX9-NEXT: v_subrev_u32_e32 v4, s7, v2 @@ -2121,91 +2122,79 @@ define amdgpu_kernel void @udivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a ; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 ; GFX9-NEXT: v_subrev_u32_e32 v4, s7, v2 -; GFX9-NEXT: v_sub_u32_e32 v3, s10, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GFX9-NEXT: v_and_b32_e32 v1, s6, v1 -; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v3 -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 +; GFX9-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_short v1, v0, s[0:1] -; GFX9-NEXT: v_and_b32_e32 v0, s6, v3 +; GFX9-NEXT: v_and_b32_e32 v0, s4, v2 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX9-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: global_store_short v1, v0, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: udivrem_v2i8: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 -; GFX10-NEXT: s_movk_i32 s1, 0xff +; GFX10-NEXT: s_nop 0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_cvt_f32_ubyte3_e32 v0, s0 -; GFX10-NEXT: s_lshr_b32 s2, s0, 16 -; GFX10-NEXT: s_lshr_b32 s3, s0, 24 -; GFX10-NEXT: s_and_b32 s2, s2, s1 -; GFX10-NEXT: s_sub_i32 s6, 0, s3 -; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, s2 +; GFX10-NEXT: s_bfe_u32 s1, s0, 0x80010 +; GFX10-NEXT: s_lshr_b32 s2, s0, 24 +; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, s1 +; GFX10-NEXT: s_sub_i32 s3, 0, s2 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX10-NEXT: v_mul_lo_u32 v2, s6, v0 -; GFX10-NEXT: s_sub_i32 s6, 0, s2 -; GFX10-NEXT: v_mul_lo_u32 v3, s6, v1 -; GFX10-NEXT: s_lshr_b32 s6, s0, 8 -; GFX10-NEXT: s_and_b32 s0, s0, s1 -; GFX10-NEXT: s_and_b32 s6, s6, s1 +; GFX10-NEXT: v_mul_lo_u32 v2, s3, v0 +; GFX10-NEXT: s_sub_i32 s3, 0, s1 +; GFX10-NEXT: v_mul_lo_u32 v3, s3, v1 +; GFX10-NEXT: s_bfe_u32 s3, s0, 0x80008 +; GFX10-NEXT: s_and_b32 s0, s0, 0xff ; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 ; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 -; GFX10-NEXT: v_mul_hi_u32 v0, s6, v0 +; GFX10-NEXT: v_mul_hi_u32 v0, s3, v0 ; GFX10-NEXT: v_mul_hi_u32 v1, s0, v1 -; GFX10-NEXT: v_mul_lo_u32 v2, v0, s3 +; GFX10-NEXT: v_mul_lo_u32 v2, v0, s2 ; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 -; GFX10-NEXT: v_mul_lo_u32 v3, v1, s2 +; GFX10-NEXT: v_mul_lo_u32 v3, v1, s1 ; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, s6, v2 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, s3, v2 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, s0, v3 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s3, v2 -; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s3, v2 -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v3 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2 +; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s2, v2 +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s1, v3 +; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s1, v3 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo -; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s2, v3 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s3, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v4, s0 -; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s3, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s0 +; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2 +; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s2, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v3 -; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s2, v3 -; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo -; GFX10-NEXT: v_and_b32_sdwa v0, v0, s1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_cmp_le_u32_e64 s0, s1, v3 +; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s1, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s0 +; GFX10-NEXT: v_and_b32_sdwa v0, v0, s1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_and_b32_sdwa v2, v2, s1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_store_short v1, v0, s[4:5] ; GFX10-NEXT: global_store_short v1, v2, s[6:7] ; GFX10-NEXT: s_endpgm @@ -2538,8 +2527,7 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s1, s0, 8 -; GFX8-NEXT: s_and_b32 s6, s1, 7 +; GFX8-NEXT: s_bfe_u32 s6, s0, 0x30008 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, s6 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX8-NEXT: s_sub_i32 s1, 0, s6 @@ -2577,8 +2565,7 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s1, s0, 8 -; GFX9-NEXT: s_and_b32 s6, s1, 7 +; GFX9-NEXT: s_bfe_u32 s6, s0, 0x30008 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, s6 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX9-NEXT: s_sub_i32 s1, 0, s6 @@ -2611,9 +2598,8 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s1, s0, 8 +; GFX10-NEXT: s_bfe_u32 s6, s0, 0x30008 ; GFX10-NEXT: s_and_b32 s0, s0, 7 -; GFX10-NEXT: s_and_b32 s6, s1, 7 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, s6 ; GFX10-NEXT: s_sub_i32 s1, 0, s6 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll index 3c73951eb0236..1775b535ec0b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -421,33 +421,34 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, 8 ; GFX9-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0xffff ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX9-NEXT: v_and_or_b32 v0, v0, v8, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_and_or_b32 v2, v3, v8, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 ; GFX9-NEXT: v_pk_sub_u16 v0, v0, v1 clamp ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] -; GFX9-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] -; GFX9-NEXT: s_movk_i32 s4, 0xff ; GFX9-NEXT: v_pk_sub_u16 v1, v2, v3 clamp -; GFX9-NEXT: v_and_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX9-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_mov_b32_e32 v2, 8 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] +; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v2 ; GFX9-NEXT: v_and_b32_e32 v2, s4, v1 +; GFX9-NEXT: v_mov_b32_e32 v3, 24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -455,33 +456,34 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s4, 8 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 +; GFX10-NEXT: s_mov_b32 s4, 8 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_and_or_b32 v0, v0, v5, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v6 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v7 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v3, v3, v5, v4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_and_or_b32 v2, v8, v5, v2 +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_sub_u16 v1, v2, v3 clamp +; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_sub_u16 v1, v3, v1 clamp -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_b32_e32 v3, s4, v1 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 @@ -569,46 +571,47 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; ; GFX9-LABEL: s_usubsat_v4i8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_lshr_b32 s2, s0, 8 -; GFX9-NEXT: s_lshr_b32 s3, s0, 16 -; GFX9-NEXT: s_lshr_b32 s4, s0, 24 -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s4 +; GFX9-NEXT: s_lshr_b32 s3, s0, 8 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16 -; GFX9-NEXT: s_mov_b32 s3, 0x80008 -; GFX9-NEXT: s_lshr_b32 s5, s1, 8 -; GFX9-NEXT: s_lshl_b32 s0, s0, s3 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 -; GFX9-NEXT: s_lshr_b32 s4, s2, 16 -; GFX9-NEXT: s_lshr_b32 s6, s1, 16 -; GFX9-NEXT: s_lshr_b32 s7, s1, 24 -; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX9-NEXT: s_lshr_b32 s5, s1, 16 -; GFX9-NEXT: s_lshl_b32 s2, s2, s3 -; GFX9-NEXT: s_lshl_b32 s4, s4, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 -; GFX9-NEXT: s_pack_ll_b32_b16 s4, s6, s7 -; GFX9-NEXT: s_lshl_b32 s1, s1, s3 -; GFX9-NEXT: s_lshl_b32 s5, s5, 8 -; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX9-NEXT: s_lshr_b32 s5, s4, 16 +; GFX9-NEXT: s_lshr_b32 s6, s0, 24 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 +; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s6 +; GFX9-NEXT: s_lshr_b32 s6, s0, 16 +; GFX9-NEXT: s_mov_b32 s4, 0x80008 +; GFX9-NEXT: s_lshr_b32 s7, s1, 8 +; GFX9-NEXT: s_lshl_b32 s0, s0, s4 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6 +; GFX9-NEXT: s_lshr_b32 s6, s3, 16 +; GFX9-NEXT: s_lshr_b32 s8, s1, 16 +; GFX9-NEXT: s_lshr_b32 s9, s1, 24 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s7, s1, 16 +; GFX9-NEXT: s_lshl_b32 s3, s3, s4 +; GFX9-NEXT: s_lshl_b32 s6, s6, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX9-NEXT: s_pack_ll_b32_b16 s6, s8, s9 +; GFX9-NEXT: s_lshl_b32 s1, s1, s4 +; GFX9-NEXT: s_lshl_b32 s7, s7, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 +; GFX9-NEXT: s_lshr_b32 s7, s6, 16 +; GFX9-NEXT: s_lshl_b32 s4, s6, s4 +; GFX9-NEXT: s_lshl_b32 s6, s7, 8 ; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: s_lshl_b32 s3, s4, s3 -; GFX9-NEXT: s_lshl_b32 s4, s5, 8 +; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s6 ; GFX9-NEXT: v_pk_sub_u16 v0, s0, v0 clamp -; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_pk_sub_u16 v1, s3, v1 clamp +; GFX9-NEXT: s_mov_b32 s2, 8 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] -; GFX9-NEXT: s_movk_i32 s0, 0xff -; GFX9-NEXT: v_pk_sub_u16 v1, s2, v1 clamp -; GFX9-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] +; GFX9-NEXT: s_movk_i32 s0, 0xff +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_and_or_b32 v0, v0, s0, v2 ; GFX9-NEXT: v_and_b32_e32 v2, s0, v1 +; GFX9-NEXT: s_mov_b32 s5, 24 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_and_b32_sdwa v1, v1, s0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -616,42 +619,43 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; GFX10-LABEL: s_usubsat_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_lshr_b32 s2, s0, 8 -; GFX10-NEXT: s_lshr_b32 s5, s1, 8 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16 ; GFX10-NEXT: s_lshr_b32 s4, s0, 24 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 +; GFX10-NEXT: s_lshr_b32 s4, s0, 16 +; GFX10-NEXT: s_mov_b32 s3, 0x80008 +; GFX10-NEXT: s_lshr_b32 s5, s1, 8 ; GFX10-NEXT: s_lshr_b32 s6, s1, 16 ; GFX10-NEXT: s_lshr_b32 s7, s1, 24 +; GFX10-NEXT: s_lshl_b32 s0, s0, s3 +; GFX10-NEXT: s_lshl_b32 s4, s4, 8 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX10-NEXT: s_lshr_b32 s8, s0, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 +; GFX10-NEXT: s_pack_ll_b32_b16 s4, s6, s7 +; GFX10-NEXT: s_lshr_b32 s8, s2, 16 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16 -; GFX10-NEXT: s_mov_b32 s2, 0x80008 +; GFX10-NEXT: s_lshr_b32 s6, s4, 16 +; GFX10-NEXT: s_lshl_b32 s2, s2, s3 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8 -; GFX10-NEXT: s_lshl_b32 s0, s0, s2 -; GFX10-NEXT: s_lshl_b32 s1, s1, s2 +; GFX10-NEXT: s_lshl_b32 s1, s1, s3 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8 -; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 -; GFX10-NEXT: s_pack_ll_b32_b16 s6, s6, s7 +; GFX10-NEXT: s_lshl_b32 s3, s4, s3 +; GFX10-NEXT: s_lshl_b32 s4, s6, 8 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 -; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8 -; GFX10-NEXT: s_lshr_b32 s4, s3, 16 -; GFX10-NEXT: s_lshr_b32 s5, s6, 16 +; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s8 +; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 ; GFX10-NEXT: v_pk_sub_u16 v0, s0, s1 clamp -; GFX10-NEXT: s_lshl_b32 s3, s3, s2 -; GFX10-NEXT: s_lshl_b32 s4, s4, 8 -; GFX10-NEXT: s_lshl_b32 s0, s6, s2 -; GFX10-NEXT: s_lshl_b32 s1, s5, 8 -; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 -; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX10-NEXT: v_pk_sub_u16 v1, s2, s3 clamp +; GFX10-NEXT: s_mov_b32 s0, 8 +; GFX10-NEXT: s_movk_i32 s1, 0xff ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_sub_u16 v1, s2, s0 clamp -; GFX10-NEXT: s_movk_i32 s0, 0xff -; GFX10-NEXT: v_and_b32_sdwa v2, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX10-NEXT: v_and_b32_e32 v3, s0, v1 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, s0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_or_b32 v0, v0, s0, v2 +; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_b32_e32 v3, s1, v1 +; GFX10-NEXT: s_mov_b32 s0, 24 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 From 40240877310897ad0643e71dfa0b86a149ca3a61 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Jun 2021 09:14:46 -0400 Subject: [PATCH 040/619] [OpenMP][NFC] Fix missing argument --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 2410e52848cb0..d2831b2fa7ef2 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2668,7 +2668,7 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (isOpenMPDevice) ? 128 : 32; + unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 128 : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixponitIterations, OREGetter, DEBUG_TYPE); From 1dd2d15b50fd471a20ad8b06b349c855f333fa48 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Mon, 28 Jun 2021 21:19:26 +0800 Subject: [PATCH 041/619] [AVR][test] Add a new test: functions with struct return type Reviewed By: dylanmckay Differential Revision: https://reviews.llvm.org/D99239 --- llvm/test/CodeGen/AVR/struct.ll | 87 +++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 llvm/test/CodeGen/AVR/struct.ll diff --git a/llvm/test/CodeGen/AVR/struct.ll b/llvm/test/CodeGen/AVR/struct.ll new file mode 100644 index 0000000000000..3d1eb83253c6f --- /dev/null +++ b/llvm/test/CodeGen/AVR/struct.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=avr < %s | FileCheck %s --check-prefix=CHECKA +; RUN: llc -mtriple=avr -mattr=+movw < %s | FileCheck %s --check-prefix=CHECKB + +%struct.s10 = type { i16, i16, i16, i16, i16 } +%struct.s06 = type { i16, i16, i16 } +%struct.s04 = type { i16, i16 } + +define void @foo10(%struct.s10* sret(%struct.s10) %0, i16 %1, i16 %2, i16 %3) addrspace(1) { +; CHECKA-LABEL: foo10: +; CHECKA: ; %bb.0: +; CHECKA-NEXT: mov r30, r24 +; CHECKA-NEXT: mov r31, r25 +; CHECKA-NEXT: std Z+4, r22 +; CHECKA-NEXT: std Z+5, r23 +; CHECKA-NEXT: std Z+2, r20 +; CHECKA-NEXT: std Z+3, r21 +; CHECKA-NEXT: st Z, r18 +; CHECKA-NEXT: std Z+1, r19 +; CHECKA-NEXT: ret +; +; CHECKB-LABEL: foo10: +; CHECKB: ; %bb.0: +; CHECKB-NEXT: movw r30, r24 +; CHECKB-NEXT: std Z+4, r22 +; CHECKB-NEXT: std Z+5, r23 +; CHECKB-NEXT: std Z+2, r20 +; CHECKB-NEXT: std Z+3, r21 +; CHECKB-NEXT: st Z, r18 +; CHECKB-NEXT: std Z+1, r19 +; CHECKB-NEXT: ret + %5 = getelementptr inbounds %struct.s10, %struct.s10* %0, i16 0, i32 0 + store i16 %3, i16* %5 + %6 = getelementptr inbounds %struct.s10, %struct.s10* %0, i16 0, i32 1 + store i16 %2, i16* %6 + %7 = getelementptr inbounds %struct.s10, %struct.s10* %0, i16 0, i32 2 + store i16 %1, i16* %7 + ret void +} + +define %struct.s06 @foo06(i16 %0, i16 %1, i16 %2) addrspace(1) { +; CHECKA-LABEL: foo06: +; CHECKA: ; %bb.0: +; CHECKA-NEXT: mov r30, r20 +; CHECKA-NEXT: mov r31, r21 +; CHECKA-NEXT: mov r20, r22 +; CHECKA-NEXT: mov r21, r23 +; CHECKA-NEXT: mov r18, r24 +; CHECKA-NEXT: mov r19, r25 +; CHECKA-NEXT: mov r22, r30 +; CHECKA-NEXT: mov r23, r31 +; CHECKA-NEXT: ret +; +; CHECKB-LABEL: foo06: +; CHECKB: ; %bb.0: +; CHECKB-NEXT: movw r30, r20 +; CHECKB-NEXT: movw r20, r22 +; CHECKB-NEXT: movw r18, r24 +; CHECKB-NEXT: movw r22, r30 +; CHECKB-NEXT: ret + %4 = insertvalue %struct.s06 undef, i16 %0, 0 + %5 = insertvalue %struct.s06 %4, i16 %1, 1 + %6 = insertvalue %struct.s06 %5, i16 %2, 2 + ret %struct.s06 %6 +} + +define %struct.s04 @foo04(i16 %0, i16 %1) addrspace(1) { +; CHECKA-LABEL: foo04: +; CHECKA: ; %bb.0: +; CHECKA-NEXT: mov r18, r22 +; CHECKA-NEXT: mov r19, r23 +; CHECKA-NEXT: mov r22, r24 +; CHECKA-NEXT: mov r23, r25 +; CHECKA-NEXT: mov r24, r18 +; CHECKA-NEXT: mov r25, r19 +; CHECKA-NEXT: ret +; +; CHECKB-LABEL: foo04: +; CHECKB: ; %bb.0: +; CHECKB-NEXT: movw r18, r22 +; CHECKB-NEXT: movw r22, r24 +; CHECKB-NEXT: movw r24, r18 +; CHECKB-NEXT: ret + %3 = insertvalue %struct.s04 undef, i16 %0, 0 + %4 = insertvalue %struct.s04 %3, i16 %1, 1 + ret %struct.s04 %4 +} From fe0e861a4d9946a3e7de1bc95a3ec12fa602b492 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Mon, 28 Jun 2021 15:41:30 +0200 Subject: [PATCH 042/619] [Analyzer] Attempt to fix windows bots test failure b/c of new-line --- clang/test/Analysis/expr-inspection-printState-diseq-info.c | 1 + clang/test/Analysis/expr-inspection-printState-eq-classes.c | 1 + 2 files changed, 2 insertions(+) diff --git a/clang/test/Analysis/expr-inspection-printState-diseq-info.c b/clang/test/Analysis/expr-inspection-printState-diseq-info.c index fe2ee324105fa..3f11ed61af5e7 100644 --- a/clang/test/Analysis/expr-inspection-printState-diseq-info.c +++ b/clang/test/Analysis/expr-inspection-printState-diseq-info.c @@ -1,5 +1,6 @@ // RUN: %clang_analyze_cc1 \ // RUN: -analyzer-checker=debug.ExprInspection %s 2>&1 | FileCheck %s +// UNSUPPORTED: windows void clang_analyzer_printState(); diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c index 5b40ac5cd47e9..7daa8648c2494 100644 --- a/clang/test/Analysis/expr-inspection-printState-eq-classes.c +++ b/clang/test/Analysis/expr-inspection-printState-eq-classes.c @@ -1,5 +1,6 @@ // RUN: %clang_analyze_cc1 \ // RUN: -analyzer-checker=debug.ExprInspection %s 2>&1 | FileCheck %s +// UNSUPPORTED: windows void clang_analyzer_printState(); From 13b2fba2398d433a5f5f9534c737c434dfb0c7b3 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Jun 2021 09:48:54 -0400 Subject: [PATCH 043/619] [OpenMP][NFC] Fix typo in OpenMPOpt --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index d2831b2fa7ef2..3765378634611 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2668,9 +2668,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, - MaxFixponitIterations, OREGetter, DEBUG_TYPE); + MaxFixpointIterations, OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); @@ -2720,9 +2720,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, - MaxFixponitIterations, OREGetter, DEBUG_TYPE); + MaxFixpointIterations, OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(false); @@ -2786,9 +2786,9 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, - MaxFixponitIterations, OREGetter, DEBUG_TYPE); + MaxFixpointIterations, OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); return OMPOpt.run(false); From ae983de6cce23dfeeb629395f86528e911218ba4 Mon Sep 17 00:00:00 2001 From: Reshabh Sharma Date: Mon, 28 Jun 2021 19:18:39 +0530 Subject: [PATCH 044/619] [InferAddressSpaces] NFC: For noop IntToPtr/PtrToInt pair cast to operator instead of PtrToInt Compiler crashes at an assertion while casting operands to PtrToIntInst at some cases when ptrtoint is present as an explicit operand to inttoptr. Explicit instruction operator as operand can not be casted to an Instruction. This patch replaces cast from PtrToInst to Operator which are later checked for constant expressions. Differential Revision: https://reviews.llvm.org/D105002 --- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp | 2 +- .../InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 332eb10ac16b8..aa26bf11c2990 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -471,7 +471,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { } else if (auto *I2P = dyn_cast(&I)) { if (isNoopPtrIntCastPair(cast(I2P), *DL, TTI)) PushPtrOperand( - cast(I2P->getOperand(0))->getPointerOperand()); + cast(I2P->getOperand(0))->getOperand(0)); } } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll index 24cab4f7bf6ea..d45b0ab8803f7 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll @@ -69,6 +69,16 @@ define i32* @noop_ptrint_pair_ce2() { ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*) } +; COMMON-LABEL: @noop_ptrint_pair_ce3( +; AMDGCN-NEXT: %i = inttoptr i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32* +; AMDGCN-NEXT: ret void +; NOTTI-NEXT: %i = inttoptr i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32* +; NOTTI-NEXT: ret void +define void @noop_ptrint_pair_ce3() { + %i = inttoptr i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32* + ret void +} + ; COMMON-LABEL: @non_noop_ptrint_pair_ce( ; AMDGCN-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*) ; AMDGCN-NEXT: ret void From 8d5c0b8768f729d48e25251755ec12cfd785c934 Mon Sep 17 00:00:00 2001 From: Jonathan Crowther Date: Mon, 28 Jun 2021 09:53:28 -0400 Subject: [PATCH 045/619] [libc++] Remove unnecessary reinterpret_cast from typeinfo In typeinfo there is a reinterpret_cast between a uintptr_t and size_t. These are two integer types and therefore a reinterpret_cast is not right for this situation. It looks like it may have been copied and pasted from above in the file. An implicit cast works in it's place. Reviewed By: ldionne, #libc Differential Revision: https://reviews.llvm.org/D104814 --- libcxx/include/typeinfo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo index 65ce53a0e9d7e..6026038ba5f94 100644 --- a/libcxx/include/typeinfo +++ b/libcxx/include/typeinfo @@ -249,7 +249,7 @@ struct __type_info_implementations { _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE static size_t __hash(__type_name_t __v) _NOEXCEPT { if (__is_type_name_unique(__v)) - return reinterpret_cast(__v); + return __v; return __non_unique_impl::__hash(__type_name_to_string(__v)); } _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE From c089e29aa47f8833d4370ac1a87a17f7b3a585cf Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Fri, 25 Jun 2021 12:11:33 +0100 Subject: [PATCH 046/619] [AArch64][SVE] DAG combine SETCC_MERGE_ZERO of a SETCC_MERGE_ZERO This helps remove extra comparisons when generating masks for fixed length masked operations. Differential Revision: https://reviews.llvm.org/D104910 --- .../Target/AArch64/AArch64ISelLowering.cpp | 23 ++++++++ .../AArch64/sve-fixed-length-masked-loads.ll | 56 ++++--------------- .../AArch64/sve-fixed-length-masked-stores.ll | 16 ++---- 3 files changed, 39 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9886d6374665b..16bb7eb222723 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15508,6 +15508,27 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && + "Unexpected opcode!"); + + SDValue Pred = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + ISD::CondCode Cond = cast(N->getOperand(3))->get(); + + // setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne + // => inner setcc_merge_zero + if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) && + LHS->getOpcode() == ISD::SIGN_EXTEND && + LHS->getOperand(0)->getValueType(0) == N->getValueType(0) && + LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && + LHS->getOperand(0)->getOperand(0) == Pred) + return LHS->getOperand(0); + + return SDValue(); +} + // Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test // as well as whether the test should be inverted. This code is required to // catch these cases (as opposed to standard dag combines) because @@ -16366,6 +16387,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performSpliceCombine(N, DAG); case AArch64ISD::UZP1: return performUzpCombine(N, DAG); + case AArch64ISD::SETCC_MERGE_ZERO: + return performSetccMergeZeroCombine(N, DAG); case AArch64ISD::GLD1_MERGE_ZERO: case AArch64ISD::GLD1_SCALED_MERGE_ZERO: case AArch64ISD::GLD1_UXTW_MERGE_ZERO: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll index ecc2ca518df1b..d8c040e3fba29 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -90,8 +90,6 @@ define <8 x float> @masked_load_v8f32(<8 x float>* %ap, <8 x float>* %bp) #0 { ; CHECK-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] ; CHECK-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1] ; CHECK-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; CHECK-NEXT: mov [[Z0]].s, [[PG1]]/z, #-1 -; CHECK-NEXT: cmpne [[PG1]].s, [[PG0]]/z, [[Z0]].s, #0 ; CHECK-NEXT: ld1w { [[Z0]].s }, [[PG1]]/z, [x0] ; CHECK-NEXT: st1w { [[Z0]].s }, [[PG0]], [x8] ; CHECK-NEXT: ret @@ -108,8 +106,6 @@ define <16 x float> @masked_load_v16f32(<16 x float>* %ap, <16 x float>* %bp) #0 ; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1] ; VBITS_GE_512-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_512-NEXT: mov [[Z0]].s, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG1]].s, [[PG0]]/z, [[Z0]].s, #0 ; VBITS_GE_512-NEXT: ld1w { [[Z0]].s }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, [[PG0]], [x8] ; VBITS_GE_512-NEXT: ret @@ -126,8 +122,6 @@ define <32 x float> @masked_load_v32f32(<32 x float>* %ap, <32 x float>* %bp) #0 ; VBITS_GE_1024-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] ; VBITS_GE_1024-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1] ; VBITS_GE_1024-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_1024-NEXT: mov [[Z0]].s, [[PG1]]/z, #-1 -; VBITS_GE_1024-NEXT: cmpne [[PG1]].s, [[PG0]]/z, [[Z0]].s, #0 ; VBITS_GE_1024-NEXT: ld1w { [[Z0]].s }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_1024-NEXT: st1w { [[Z0]].s }, [[PG0]], [x8] ; VBITS_GE_1024-NEXT: ret @@ -144,8 +138,6 @@ define <64 x float> @masked_load_v64f32(<64 x float>* %ap, <64 x float>* %bp) #0 ; VBITS_GE_2048-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_2048-NEXT: mov [[Z0]].s, [[PG1]]/z, #-1 -; VBITS_GE_2048-NEXT: cmpne [[PG1]].s, [[PG0]]/z, [[Z0]].s, #0 ; VBITS_GE_2048-NEXT: ld1w { [[Z0]].s }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_2048-NEXT: st1w { [[Z0]].s }, [[PG0]], [x8] ; VBITS_GE_2048-NEXT: ret @@ -163,8 +155,6 @@ define <64 x i8> @masked_load_v64i8(<64 x i8>* %ap, <64 x i8>* %bp) #0 { ; VBITS_GE_512-NEXT: ld1b { [[Z0:z[0-9]+]].b }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1b { [[Z1:z[0-9]+]].b }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].b, [[PG0]]/z, [[Z0]].b, [[Z1]].b -; VBITS_GE_512-NEXT: mov [[Z0]].b, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG1]].b, [[PG0]]/z, [[Z0]].b, #0 ; VBITS_GE_512-NEXT: ld1b { [[Z0]].b }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: st1b { [[Z0]].b }, [[PG0]], [x8] ; VBITS_GE_512-NEXT: ret @@ -181,8 +171,6 @@ define <32 x i16> @masked_load_v32i16(<32 x i16>* %ap, <32 x i16>* %bp) #0 { ; VBITS_GE_512-NEXT: ld1h { [[Z0:z[0-9]+]].h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { [[Z1:z[0-9]+]].h }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].h, [[PG0]]/z, [[Z0]].h, [[Z1]].h -; VBITS_GE_512-NEXT: mov [[Z0]].h, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG1]].h, [[PG0]]/z, [[Z0]].h, #0 ; VBITS_GE_512-NEXT: ld1h { [[Z0]].h }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: st1h { [[Z0]].h }, [[PG0]], [x8] ; VBITS_GE_512: ret @@ -199,8 +187,6 @@ define <16 x i32> @masked_load_v16i32(<16 x i32>* %ap, <16 x i32>* %bp) #0 { ; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_512-NEXT: mov [[Z0]].s, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG1]].s, [[PG0]]/z, [[Z0]].s, #0 ; VBITS_GE_512-NEXT: ld1w { [[Z0]].s }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, [[PG0]], [x8] ; VBITS_GE_512-NEXT: ret @@ -217,8 +203,6 @@ define <8 x i64> @masked_load_v8i64(<8 x i64>* %ap, <8 x i64>* %bp) #0 { ; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { [[Z1:z[0-9]+]].d }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].d, [[PG0]]/z, [[Z0]].d, [[Z1]].d -; VBITS_GE_512-NEXT: mov [[Z0]].d, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG1]].d, [[PG0]]/z, [[Z0]].d, #0 ; VBITS_GE_512-NEXT: ld1d { [[Z0]].d }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: st1d { [[Z0]].d }, [[PG0]], [x8] ; VBITS_GE_512-NEXT: ret @@ -235,8 +219,6 @@ define <8 x i64> @masked_load_passthru_v8i64(<8 x i64>* %ap, <8 x i64>* %bp) #0 ; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { [[Z1:z[0-9]+]].d }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].d, [[PG0]]/z, [[Z0]].d, [[Z1]].d -; VBITS_GE_512-NEXT: mov [[Z0]].d, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG1]].d, [[PG0]]/z, [[Z0]].d, #0 ; VBITS_GE_512-NEXT: ld1d { [[Z0]].d }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: sel [[Z2:z[0-9]+]].d, [[PG1]], [[Z0]].d, [[Z1]].d ; VBITS_GE_512-NEXT: st1d { [[Z2]].d }, [[PG0]], [x8] @@ -254,8 +236,6 @@ define <8 x double> @masked_load_passthru_v8f64(<8 x double>* %ap, <8 x double>* ; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { [[Z1:z[0-9]+]].d }, p0/z, [x1] ; VBITS_GE_512-NEXT: fcmeq [[PG1:p[0-9]+]].d, [[PG0]]/z, [[Z0]].d, [[Z1]].d -; VBITS_GE_512-NEXT: mov [[Z0]].d, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG1]].d, [[PG0]]/z, [[Z0]].d, #0 ; VBITS_GE_512-NEXT: ld1d { [[Z0]].d }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: sel [[Z2:z[0-9]+]].d, [[PG1]], [[Z0]].d, [[Z1]].d ; VBITS_GE_512-NEXT: st1d { [[Z2]].d }, [[PG0]], [x8] @@ -273,12 +253,10 @@ define <32 x i16> @masked_load_sext_v32i8i16(<32 x i8>* %ap, <32 x i8>* %bp) #0 ; VBITS_GE_512-NEXT: ld1b { [[Z0:z[0-9]+]].b }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1b { [[Z1:z[0-9]+]].b }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].b, [[PG0]]/z, [[Z0]].b, [[Z1]].b -; VBITS_GE_512-NEXT: mov [[Z0]].b, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG2:p[0-9]+]].b, [[PG0]]/z, [[Z0]].b, #0 -; VBITS_GE_512-NEXT: ld1b { [[Z0]].b }, [[PG2]]/z, [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ld1b { [[Z0]].b }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].h, vl32 ; VBITS_GE_512-NEXT: sunpklo [[Z0]].h, [[Z0]].b -; VBITS_GE_512-NEXT: st1h { [[Z0]].h }, [[PG2]], [x8] +; VBITS_GE_512-NEXT: st1h { [[Z0]].h }, [[PG1]], [x8] ; VBITS_GE_512-NEXT: ret %a = load <32 x i8>, <32 x i8>* %ap %b = load <32 x i8>, <32 x i8>* %bp @@ -337,12 +315,10 @@ define <16 x i32> @masked_load_sext_v16i16i32(<16 x i16>* %ap, <16 x i16>* %bp) ; VBITS_GE_512-NEXT: ld1h { [[Z0:z[0-9]+]].h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { [[Z1:z[0-9]+]].h }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].h, [[PG0]]/z, [[Z0]].h, [[Z1]].h -; VBITS_GE_512-NEXT: mov [[Z0]].h, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG2:p[0-9]+]].h, [[PG0]]/z, [[Z0]].h, #0 -; VBITS_GE_512-NEXT: ld1h { [[Z0]].h }, [[PG2]]/z, [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ld1h { [[Z0]].h }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s, vl16 ; VBITS_GE_512-NEXT: sunpklo [[Z0]].s, [[Z0]].h -; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, [[PG2]], [x8] +; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, [[PG1]], [x8] ; VBITS_GE_512-NEXT: ret %a = load <16 x i16>, <16 x i16>* %ap %b = load <16 x i16>, <16 x i16>* %bp @@ -379,12 +355,10 @@ define <8 x i64> @masked_load_sext_v8i32i64(<8 x i32>* %ap, <8 x i32>* %bp) #0 { ; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_512-NEXT: mov [[Z0]].s, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG2:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, #0 -; VBITS_GE_512-NEXT: ld1w { [[Z0]].s }, [[PG2]]/z, [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ld1w { [[Z0]].s }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d, vl8 ; VBITS_GE_512-NEXT: sunpklo [[Z0]].d, [[Z0]].s -; VBITS_GE_512-NEXT: st1d { [[Z0]].d }, [[PG2]], [x8] +; VBITS_GE_512-NEXT: st1d { [[Z0]].d }, [[PG1]], [x8] ; VBITS_GE_512-NEXT: ret %a = load <8 x i32>, <8 x i32>* %ap %b = load <8 x i32>, <8 x i32>* %bp @@ -400,12 +374,10 @@ define <32 x i16> @masked_load_zext_v32i8i16(<32 x i8>* %ap, <32 x i8>* %bp) #0 ; VBITS_GE_512-NEXT: ld1b { [[Z0:z[0-9]+]].b }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1b { [[Z1:z[0-9]+]].b }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].b, [[PG0]]/z, [[Z0]].b, [[Z1]].b -; VBITS_GE_512-NEXT: mov [[Z0]].b, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG2:p[0-9]+]].b, [[PG0]]/z, [[Z0]].b, #0 -; VBITS_GE_512-NEXT: ld1b { [[Z0]].b }, [[PG2]]/z, [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ld1b { [[Z0]].b }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].h, vl32 ; VBITS_GE_512-NEXT: uunpklo [[Z0]].h, [[Z0]].b -; VBITS_GE_512-NEXT: st1h { [[Z0]].h }, [[PG2]], [x8] +; VBITS_GE_512-NEXT: st1h { [[Z0]].h }, [[PG1]], [x8] ; VBITS_GE_512-NEXT: ret %a = load <32 x i8>, <32 x i8>* %ap %b = load <32 x i8>, <32 x i8>* %bp @@ -464,12 +436,10 @@ define <16 x i32> @masked_load_zext_v16i16i32(<16 x i16>* %ap, <16 x i16>* %bp) ; VBITS_GE_512-NEXT: ld1h { [[Z0:z[0-9]+]].h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { [[Z1:z[0-9]+]].h }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].h, [[PG0]]/z, [[Z0]].h, [[Z1]].h -; VBITS_GE_512-NEXT: mov [[Z0]].h, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG2:p[0-9]+]].h, [[PG0]]/z, [[Z0]].h, #0 -; VBITS_GE_512-NEXT: ld1h { [[Z0]].h }, [[PG2]]/z, [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ld1h { [[Z0]].h }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s, vl16 ; VBITS_GE_512-NEXT: uunpklo [[Z0]].s, [[Z0]].h -; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, [[PG2]], [x8] +; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, [[PG1]], [x8] ; VBITS_GE_512-NEXT: ret %a = load <16 x i16>, <16 x i16>* %ap %b = load <16 x i16>, <16 x i16>* %bp @@ -506,12 +476,10 @@ define <8 x i64> @masked_load_zext_v8i32i64(<8 x i32>* %ap, <8 x i32>* %bp) #0 { ; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmpeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_512-NEXT: mov [[Z0]].s, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG2:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, #0 -; VBITS_GE_512-NEXT: ld1w { [[Z0]].s }, [[PG2]]/z, [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ld1w { [[Z0]].s }, [[PG1]]/z, [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d, vl8 ; VBITS_GE_512-NEXT: uunpklo [[Z0]].d, [[Z0]].s -; VBITS_GE_512-NEXT: st1d { [[Z0]].d }, [[PG2]], [x8] +; VBITS_GE_512-NEXT: st1d { [[Z0]].d }, [[PG1]], [x8] ; VBITS_GE_512-NEXT: ret %a = load <8 x i32>, <8 x i32>* %ap %b = load <8 x i32>, <8 x i32>* %bp diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll index bbba4336e0e66..6f5c5cee303c6 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -91,9 +91,7 @@ define void @masked_store_v8f32(<8 x float>* %ap, <8 x float>* %bp) #0 { ; CHECK-NEXT: ld1w { [[Z0:z[0-9]+]].s }, [[PG0]]/z, [x0] ; CHECK-NEXT: ld1w { [[Z1:z[0-9]+]].s }, [[PG0]]/z, [x1] ; CHECK-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; CHECK-NEXT: mov [[Z2:z[0-9]+]].s, [[PG1]]/z, #-1 -; CHECK-NEXT: cmpne [[PG2:p[0-9]+]].s, [[PG0]]/z, [[Z2]].s, #0 -; CHECK-NEXT: st1w { z0.s }, [[PG2]], [x{{[0-9]+}}] +; CHECK-NEXT: st1w { z0.s }, [[PG1]], [x{{[0-9]+}}] ; CHECK-NEXT: ret %a = load <8 x float>, <8 x float>* %ap %b = load <8 x float>, <8 x float>* %bp @@ -108,9 +106,7 @@ define void @masked_store_v16f32(<16 x float>* %ap, <16 x float>* %bp) #0 { ; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_512-NEXT: ld1w { [[Z1:z[0-9]+]].s }, [[PG0]]/z, [x1] ; VBITS_GE_512-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_512-NEXT: mov [[Z2:z[0-9]+]].s, [[PG1]]/z, #-1 -; VBITS_GE_512-NEXT: cmpne [[PG2:p[0-9]+]].s, [[PG0]]/z, [[Z1]].s, #0 -; VBITS_GE_512-NEXT: st1w { z0.s }, [[PG2]], [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: st1w { z0.s }, [[PG1]], [x{{[0-9]+}}] ; VBITS_GE_512-NEXT: ret %a = load <16 x float>, <16 x float>* %ap %b = load <16 x float>, <16 x float>* %bp @@ -125,9 +121,7 @@ define void @masked_store_v32f32(<32 x float>* %ap, <32 x float>* %bp) #0 { ; VBITS_GE_1024-NEXT: ld1w { [[Z0:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_1024-NEXT: ld1w { [[Z1:z[0-9]+]].s }, [[PG0]]/z, [x1] ; VBITS_GE_1024-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_1024-NEXT: mov [[Z1:z[0-9]+]].s, [[PG1]]/z, #-1 -; VBITS_GE_1024-NEXT: cmpne [[PG2:p[0-9]+]].s, [[PG0]]/z, [[Z1]].s, #0 -; VBITS_GE_1024-NEXT: st1w { z0.s }, [[PG2]], [x{{[0-9]+}}] +; VBITS_GE_1024-NEXT: st1w { z0.s }, [[PG1]], [x{{[0-9]+}}] ; VBITS_GE_1024-NEXT: ret %a = load <32 x float>, <32 x float>* %ap %b = load <32 x float>, <32 x float>* %bp @@ -142,9 +136,7 @@ define void @masked_store_v64f32(<64 x float>* %ap, <64 x float>* %bp) #0 { ; VBITS_GE_2048-NEXT: ld1w { [[Z0:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_GE_2048-NEXT: ld1w { [[Z1:z[0-9]+]].s }, [[PG0]]/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s -; VBITS_GE_2048-NEXT: mov [[Z1:z[0-9]+]].s, [[PG1]]/z, #-1 -; VBITS_GE_2048-NEXT: cmpne [[PG2:p[0-9]+]].s, [[PG0]]/z, [[Z1]].s, #0 -; VBITS_GE_2048-NEXT: st1w { z0.s }, [[PG2]], [x{{[0-9]+}}] +; VBITS_GE_2048-NEXT: st1w { z0.s }, [[PG1]], [x{{[0-9]+}}] ; VBITS_GE_2048-NEXT: ret %a = load <64 x float>, <64 x float>* %ap %b = load <64 x float>, <64 x float>* %bp From 31ef15e0442ac13135717179d32f438af5bd6ab1 Mon Sep 17 00:00:00 2001 From: Ahsan Saghir Date: Mon, 31 May 2021 08:52:56 -0500 Subject: [PATCH 047/619] Teach peephole optimizer to not emit sub-register defs Peephole optimizer should not be introducing sub-reg definitions as they are illegal in machine SSA phase. This patch modifies the optimizer to not emit sub-register definitions. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D103408 --- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 29 +++++++++---- .../CodeGen/PowerPC/peephole-subreg-def.mir | 41 +++++++++++++++++++ 2 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/peephole-subreg-def.mir diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 34ac396c04719..49bdba518322d 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -585,15 +585,30 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, MRI->constrainRegClass(DstReg, DstRC); } + // SubReg defs are illegal in machine SSA phase, + // we should not generate SubReg defs. + // + // For example, for the instructions: + // + // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc + // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc + // + // We should generate: + // + // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc + // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0 + // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0 + // + if (UseSrcSubIdx) + RC = MRI->getRegClass(UseMI->getOperand(0).getReg()); + Register NewVR = MRI->createVirtualRegister(RC); - MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) + BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); - // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. - if (UseSrcSubIdx) { - Copy->getOperand(0).setSubReg(SubIdx); - Copy->getOperand(0).setIsUndef(); - } + if (UseSrcSubIdx) + UseMO->setSubReg(0); + UseMO->setReg(NewVR); ++NumReuse; Changed = true; diff --git a/llvm/test/CodeGen/PowerPC/peephole-subreg-def.mir b/llvm/test/CodeGen/PowerPC/peephole-subreg-def.mir new file mode 100644 index 0000000000000..e89e2068d9278 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/peephole-subreg-def.mir @@ -0,0 +1,41 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=powerpc64le -simplify-mir -verify-machineinstrs \ +# RUN: -run-pass=peephole-opt %s -o - | FileCheck %s + +# This tests to make sure that we do not generate subreg def +# as it is illegal to generate subreg defs in machine SSA phase. + +--- +name: test_peephole_subreg_def +alignment: 16 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x3 + + ; CHECK-LABEL: name: test_peephole_subreg_def + ; CHECK: liveins: $x3 + ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 + ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 [[COPY]], 1 + ; CHECK: [[EXTSW:%[0-9]+]]:g8rc_and_g8rc_nox0 = EXTSW [[ADDI8_]] + ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 0 + ; CHECK: STB8 [[LI8_]], 0, [[EXTSW]] + ; CHECK: [[COPY1:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[EXTSW]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[COPY1]] + ; CHECK: [[ADDI:%[0-9]+]]:gprc = ADDI killed [[COPY2]], 1 + ; CHECK: [[EXTSW_32_64_:%[0-9]+]]:g8rc_and_g8rc_nox0 = EXTSW_32_64 killed [[ADDI]] + ; CHECK: STB8 [[LI8_]], 0, killed [[EXTSW_32_64_]] + %0:g8rc_and_g8rc_nox0 = COPY $x3 + %1:g8rc = ADDI8 %0, 1 + %2:g8rc_and_g8rc_nox0 = EXTSW %1 + %3:g8rc = LI8 0 + STB8 %3, 0, killed %2 + %4:gprc_and_gprc_nor0 = COPY %1.sub_32 + %5:gprc = ADDI killed %4, 1 + %6:g8rc_and_g8rc_nox0 = EXTSW_32_64 killed %5 + STB8 %3, 0, killed %6 + +... From 0e09d18c6a0a73dc9ed9e45584407a7a44fbbc4a Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Sun, 27 Jun 2021 16:07:19 +0100 Subject: [PATCH 048/619] Reland [GlobalISel] NFC: Have LLT::getSizeInBits/Bytes return a TypeSize. This patch relands https://reviews.llvm.org/D104454, but fixes some failing builds on Mac OS which apparently has a different definition for size_t, that caused 'ambiguous operator overload' for the implicit conversion of TypeSize to a scalar value. This reverts commit b732e6c9a8438e5204ac96c8ca76f9b11abf98ff. --- .../GlobalISel/InstructionSelectorImpl.h | 2 +- llvm/include/llvm/Support/LowLevelTypeImpl.h | 35 +++++++++++-------- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 2 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 12 ++++--- llvm/unittests/CodeGen/LowLevelTypeTest.cpp | 3 ++ llvm/utils/TableGen/GlobalISelEmitter.cpp | 11 ++++-- 7 files changed, 43 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index d9814962d11d8..bc9f952146c2d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -595,7 +595,7 @@ bool InstructionSelector::executeMatchTable( case GIM_CheckPointerToAny: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; - int64_t SizeInBits = MatchTable[CurrentIdx++]; + uint64_t SizeInBits = MatchTable[CurrentIdx++]; DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": GIM_CheckPointerToAny(MIs[" diff --git a/llvm/include/llvm/Support/LowLevelTypeImpl.h b/llvm/include/llvm/Support/LowLevelTypeImpl.h index 0660bcd8256c8..379e37101e9a4 100644 --- a/llvm/include/llvm/Support/LowLevelTypeImpl.h +++ b/llvm/include/llvm/Support/LowLevelTypeImpl.h @@ -67,7 +67,7 @@ class LLT { assert(!EC.isScalar() && "invalid number of vector elements"); assert(!ScalarTy.isVector() && "invalid vector element type"); return LLT{ScalarTy.isPointer(), /*isVector=*/true, EC, - ScalarTy.getSizeInBits(), + ScalarTy.getSizeInBits().getFixedSize(), ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0}; } @@ -100,12 +100,14 @@ class LLT { return EC.isScalar() ? ScalarTy : LLT::vector(EC, ScalarTy); } - static LLT scalarOrVector(ElementCount EC, unsigned ScalarSize) { - return scalarOrVector(EC, LLT::scalar(ScalarSize)); + static LLT scalarOrVector(ElementCount EC, uint64_t ScalarSize) { + assert(ScalarSize <= std::numeric_limits::max() && + "Not enough bits in LLT to represent size"); + return scalarOrVector(EC, LLT::scalar(static_cast(ScalarSize))); } explicit LLT(bool isPointer, bool isVector, ElementCount EC, - unsigned SizeInBits, unsigned AddressSpace) { + uint64_t SizeInBits, unsigned AddressSpace) { init(isPointer, isVector, EC, SizeInBits, AddressSpace); } explicit LLT() : IsPointer(false), IsVector(false), RawData(0) {} @@ -148,18 +150,19 @@ class LLT { } /// Returns the total size of the type. Must only be called on sized types. - unsigned getSizeInBits() const { + TypeSize getSizeInBits() const { if (isPointer() || isScalar()) - return getScalarSizeInBits(); - // FIXME: This should return a TypeSize in order to work for scalable - // vectors. - return getScalarSizeInBits() * getElementCount().getKnownMinValue(); + return TypeSize::Fixed(getScalarSizeInBits()); + auto EC = getElementCount(); + return TypeSize(getScalarSizeInBits() * EC.getKnownMinValue(), + EC.isScalable()); } /// Returns the total size of the type in bytes, i.e. number of whole bytes /// needed to represent the size in bits. Must only be called on sized types. - unsigned getSizeInBytes() const { - return (getSizeInBits() + 7) / 8; + TypeSize getSizeInBytes() const { + TypeSize BaseSize = getSizeInBits(); + return {(BaseSize.getKnownMinSize() + 7) / 8, BaseSize.isScalable()}; } LLT getScalarType() const { @@ -199,11 +202,11 @@ class LLT { getElementType()); } - assert(getSizeInBits() % Factor == 0); - return scalar(getSizeInBits() / Factor); + assert(getScalarSizeInBits() % Factor == 0); + return scalar(getScalarSizeInBits() / Factor); } - bool isByteSized() const { return (getSizeInBits() & 7) == 0; } + bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); } unsigned getScalarSizeInBits() const { assert(RawData != 0 && "Invalid Type"); @@ -333,8 +336,10 @@ class LLT { return getMask(FieldInfo) & (RawData >> FieldInfo[1]); } - void init(bool IsPointer, bool IsVector, ElementCount EC, unsigned SizeInBits, + void init(bool IsPointer, bool IsVector, ElementCount EC, uint64_t SizeInBits, unsigned AddressSpace) { + assert(SizeInBits <= std::numeric_limits::max() && + "Not enough bits in LLT to represent size"); this->IsPointer = IsPointer; this->IsVector = IsVector; if (!IsVector) { diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 1f603408b887b..2815dae41b7d5 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -355,7 +355,7 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef OrigRegs, assert(OrigRegs.size() == 1); LLT OrigTy = MRI.getType(OrigRegs[0]); - unsigned SrcSize = PartLLT.getSizeInBits() * Regs.size(); + unsigned SrcSize = PartLLT.getSizeInBits().getFixedSize() * Regs.size(); if (SrcSize == OrigTy.getSizeInBits()) B.buildMerge(OrigRegs[0], Regs); else { diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index cbe4568245872..42c63849910e4 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1565,7 +1565,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, Register SrcReg = getOrCreateVReg(**AI); LLT SrcTy = MRI->getType(SrcReg); if (SrcTy.isPointer()) - MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize); + MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize); SrcRegs.push_back(SrcReg); } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index e73a5f8c62fb0..3352d1989d157 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -1117,7 +1117,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, DstOps[0].getLLTTy(*getMRI()); }) && "type mismatch in output list"); - assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == + assert((TypeSize::ScalarTy)DstOps.size() * + DstOps[0].getLLTTy(*getMRI()).getSizeInBits() == SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input operands do not cover output register"); break; @@ -1131,7 +1132,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()); }) && "type mismatch in input list"); - assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + assert((TypeSize::ScalarTy)SrcOps.size() * + SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input operands do not cover output register"); if (SrcOps.size() == 1) @@ -1182,7 +1184,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI()); }) && "type mismatch in input list"); - assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + assert((TypeSize::ScalarTy)SrcOps.size() * + SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input scalars do not exactly cover the output vector register"); break; @@ -1215,7 +1218,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, SrcOps[0].getLLTTy(*getMRI())); }) && "type mismatch in input list"); - assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == + assert((TypeSize::ScalarTy)SrcOps.size() * + SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() == DstOps[0].getLLTTy(*getMRI()).getSizeInBits() && "input vectors do not exactly cover the output vector register"); break; diff --git a/llvm/unittests/CodeGen/LowLevelTypeTest.cpp b/llvm/unittests/CodeGen/LowLevelTypeTest.cpp index e27c6e4e955a4..9ff75b8f183bb 100644 --- a/llvm/unittests/CodeGen/LowLevelTypeTest.cpp +++ b/llvm/unittests/CodeGen/LowLevelTypeTest.cpp @@ -81,6 +81,9 @@ TEST(LowLevelTypeTest, Vector) { EXPECT_EQ(EC, VTy.getElementCount()); if (!EC.isScalable()) EXPECT_EQ(S * EC.getFixedValue(), VTy.getSizeInBits()); + else + EXPECT_EQ(TypeSize::Scalable(S * EC.getKnownMinValue()), + VTy.getSizeInBits()); // Test equality operators. EXPECT_TRUE(VTy == VTy); diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 497854ababfe3..693073672fc11 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -182,7 +182,13 @@ class LLTCodeGen { assert((!Ty.isVector() || Ty.isScalable() == Other.Ty.isScalable()) && "Unexpected mismatch of scalable property"); - return Ty.getSizeInBits() < Other.Ty.getSizeInBits(); + return Ty.isVector() + ? std::make_tuple(Ty.isScalable(), + Ty.getSizeInBits().getKnownMinSize()) < + std::make_tuple(Other.Ty.isScalable(), + Other.Ty.getSizeInBits().getKnownMinSize()) + : Ty.getSizeInBits().getFixedSize() < + Other.Ty.getSizeInBits().getFixedSize(); } bool operator==(const LLTCodeGen &B) const { return Ty == B.Ty; } @@ -3788,7 +3794,8 @@ Optional GlobalISelEmitter::getMemSizeBitsFromPredicate(const TreePred return None; // Align so unusual types like i1 don't get rounded down. - return llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8); + return llvm::alignTo( + static_cast(MemTyOrNone->get().getSizeInBits()), 8); } Expected GlobalISelEmitter::addBuiltinPredicates( From 4cf072e7fbb02e77332fa2a6dd26a7160ee4cc2e Mon Sep 17 00:00:00 2001 From: Zarko Todorovski Date: Mon, 28 Jun 2021 10:31:55 -0400 Subject: [PATCH 049/619] [AIX] Use less than or equal to for some alignment tests on AIX On AIX the alignment implementation has the storage aligned to the preferred alignment instead of the alignment of a type. Macro guard these tests for AIX and have them pass when the "reference alignment" is less than or equal to the alignment observed. In other words, the alignment applied is at least as strict as the required alignment. Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D104786 --- llvm/unittests/Support/AlignOfTest.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/llvm/unittests/Support/AlignOfTest.cpp b/llvm/unittests/Support/AlignOfTest.cpp index d8cabde6c727a..f84895c18602d 100644 --- a/llvm/unittests/Support/AlignOfTest.cpp +++ b/llvm/unittests/Support/AlignOfTest.cpp @@ -131,9 +131,17 @@ TEST(AlignOfTest, BasicAlignedArray) { EXPECT_EQ(alignof(T), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(T), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(T), alignof(AlignedCharArrayUnion)); +#ifdef _AIX + EXPECT_LE(alignof(T), alignof(AlignedCharArrayUnion)); + EXPECT_LE(alignof(T), + alignof(AlignedCharArrayUnion)); + EXPECT_LE(alignof(S4), alignof(AlignedCharArrayUnion)); +#else EXPECT_EQ(alignof(T), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(T), alignof(AlignedCharArrayUnion)); + EXPECT_EQ(alignof(S4), alignof(AlignedCharArrayUnion)); +#endif EXPECT_EQ(alignof(T), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(T), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(T), @@ -143,7 +151,6 @@ TEST(AlignOfTest, BasicAlignedArray) { EXPECT_EQ(alignof(S1), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(S2), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(S3), alignof(AlignedCharArrayUnion)); - EXPECT_EQ(alignof(S4), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(S5), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(S6), alignof(AlignedCharArrayUnion)); EXPECT_EQ(alignof(D1), alignof(AlignedCharArrayUnion)); From 7d6e589fc86d7865fc4bf92c583209700dd32aac Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 28 Jun 2021 07:50:11 -0700 Subject: [PATCH 050/619] Revert "[mlir][memref] Implement lowering of memref.copy to llvm" This reverts commit e9396449777f02d573deb25d603ee1b1d6e568c1. Breaks Windows build. --- .../mlir/Dialect/LLVMIR/FunctionCallUtils.h | 2 - .../mlir/ExecutionEngine/CRunnerUtils.h | 7 --- .../StandardToLLVM/StandardToLLVM.cpp | 63 ------------------- .../Dialect/LLVMIR/IR/FunctionCallUtils.cpp | 10 --- mlir/lib/ExecutionEngine/CRunnerUtils.cpp | 48 -------------- 5 files changed, 130 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h index 6380ff2d8e132..7efff9774cd50 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h +++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h @@ -45,8 +45,6 @@ LLVM::LLVMFuncOp lookupOrCreateMallocFn(ModuleOp moduleOp, Type indexType); LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(ModuleOp moduleOp, Type indexType); LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp); -LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType, - Type unrankedDescriptorType); /// Create a FuncOp with signature `resultType`(`paramTypes`)` and name `name`. LLVM::LLVMFuncOp lookupOrCreateFn(ModuleOp moduleOp, StringRef name, diff --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h index bd855fcc03a96..fb0b2a65a67eb 100644 --- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h @@ -330,13 +330,6 @@ class DynamicMemRefType { const int64_t *strides; }; -//===----------------------------------------------------------------------===// -// Small runtime support library for memref.copy lowering during codegen. -//===----------------------------------------------------------------------===// -extern "C" MLIR_CRUNNERUTILS_EXPORT void -memrefCopy(int64_t elemSize, UnrankedMemRefType *src, - UnrankedMemRefType *dst); - //===----------------------------------------------------------------------===// // Small runtime support library for vector.print lowering during codegen. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index eb390bf8844fa..db5918e95f182 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -2618,68 +2618,6 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern { } }; -struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern { - using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; - - LogicalResult - matchAndRewrite(memref::CopyOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - memref::CopyOp::Adaptor adaptor(operands); - auto srcType = op.source().getType().cast(); - auto targetType = op.target().getType().cast(); - - // First make sure we have an unranked memref descriptor representation. - auto makeUnranked = [&, this](Value ranked, BaseMemRefType type) { - auto rank = rewriter.create( - loc, getIndexType(), rewriter.getIndexAttr(type.getRank())); - auto *typeConverter = getTypeConverter(); - auto ptr = - typeConverter->promoteOneMemRefDescriptor(loc, ranked, rewriter); - auto voidPtr = - rewriter.create(loc, getVoidPtrType(), ptr) - .getResult(); - auto unrankedType = - UnrankedMemRefType::get(type.getElementType(), type.getMemorySpace()); - return UnrankedMemRefDescriptor::pack(rewriter, loc, *typeConverter, - unrankedType, - ValueRange{rank, voidPtr}); - }; - - Value unrankedSource = srcType.hasRank() - ? makeUnranked(adaptor.source(), srcType) - : adaptor.source(); - Value unrankedTarget = targetType.hasRank() - ? makeUnranked(adaptor.target(), targetType) - : adaptor.target(); - - // Now promote the unranked descriptors to the stack. - auto one = rewriter.create(loc, getIndexType(), - rewriter.getIndexAttr(1)); - auto promote = [&](Value desc) { - auto ptrType = LLVM::LLVMPointerType::get(desc.getType()); - auto allocated = - rewriter.create(loc, ptrType, ValueRange{one}); - rewriter.create(loc, desc, allocated); - return allocated; - }; - - auto sourcePtr = promote(unrankedSource); - auto targetPtr = promote(unrankedTarget); - - auto elemSize = rewriter.create( - loc, getIndexType(), - rewriter.getIndexAttr(srcType.getElementTypeBitWidth() / 8)); - auto copyFn = LLVM::lookupOrCreateMemRefCopyFn( - op->getParentOfType(), getIndexType(), sourcePtr.getType()); - rewriter.create(loc, copyFn, - ValueRange{elemSize, sourcePtr, targetPtr}); - rewriter.eraseOp(op); - - return success(); - } -}; - /// Extracts allocated, aligned pointers and offset from a ranked or unranked /// memref type. In unranked case, the fields are extracted from the underlying /// ranked descriptor. @@ -4071,7 +4009,6 @@ void mlir::populateStdToLLVMMemoryConversionPatterns( GetGlobalMemrefOpLowering, LoadOpLowering, MemRefCastOpLowering, - MemRefCopyOpLowering, MemRefReinterpretCastOpLowering, MemRefReshapeOpLowering, RankOpLowering, diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp index 47a5851b51f2e..a43c2251c2d99 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp @@ -35,7 +35,6 @@ static constexpr llvm::StringRef kPrintNewline = "printNewline"; static constexpr llvm::StringRef kMalloc = "malloc"; static constexpr llvm::StringRef kAlignedAlloc = "aligned_alloc"; static constexpr llvm::StringRef kFree = "free"; -static constexpr llvm::StringRef kMemRefCopy = "memref_copy"; /// Generic print function lookupOrCreate helper. LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(ModuleOp moduleOp, StringRef name, @@ -115,15 +114,6 @@ LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(ModuleOp moduleOp) { LLVM::LLVMVoidType::get(moduleOp->getContext())); } -LLVM::LLVMFuncOp -mlir::LLVM::lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType, - Type unrankedDescriptorType) { - return LLVM::lookupOrCreateFn( - moduleOp, kMemRefCopy, - ArrayRef{indexType, unrankedDescriptorType, unrankedDescriptorType}, - LLVM::LLVMVoidType::get(moduleOp->getContext())); -} - Operation::result_range mlir::LLVM::createLLVMCall(OpBuilder &b, Location loc, LLVM::LLVMFuncOp fn, ValueRange paramTypes, diff --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp index bf96afb73725b..e5b682a7b6de5 100644 --- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp @@ -18,10 +18,8 @@ #include #endif // _WIN32 -#include #include #include -#include #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS @@ -38,52 +36,6 @@ extern "C" void printClose() { fputs(" )", stdout); } extern "C" void printComma() { fputs(", ", stdout); } extern "C" void printNewline() { fputc('\n', stdout); } -extern "C" MLIR_CRUNNERUTILS_EXPORT void -memrefCopy(int64_t elemSize, UnrankedMemRefType *srcArg, - UnrankedMemRefType *dstArg) { - DynamicMemRefType src(*srcArg); - DynamicMemRefType dst(*dstArg); - - int64_t rank = src.rank; - int64_t *indices = static_cast(alloca(sizeof(int64_t) * rank)); - int64_t *srcStrides = static_cast(alloca(sizeof(int64_t) * rank)); - int64_t *dstStrides = static_cast(alloca(sizeof(int64_t) * rank)); - - char *srcPtr = src.data + src.offset * elemSize; - char *dstPtr = dst.data + dst.offset * elemSize; - - // Initialize index and scale strides. - for (int rankp = 0; rankp < rank; ++rankp) { - indices[rankp] = 0; - srcStrides[rankp] = src.strides[rankp] * elemSize; - dstStrides[rankp] = dst.strides[rankp] * elemSize; - } - - int64_t readIndex = 0, writeIndex = 0; - for (;;) { - // Copy over the element, byte by byte. - memcpy(dstPtr + writeIndex, srcPtr + readIndex, elemSize); - // Advance index and read position. - for (int64_t axis = rank - 1; axis >= 0; --axis) { - // Advance at current axis. - auto newIndex = ++indices[axis]; - readIndex += srcStrides[axis]; - writeIndex += dstStrides[axis]; - // If this is a valid index, we have our next index, so continue copying. - if (src.sizes[axis] != newIndex) - break; - // We reached the end of this axis. If this is axis 0, we are done. - if (axis == 0) - return; - // Else, reset to 0 and undo the advancement of the linear index that - // this axis had. The continue with the axis one outer. - indices[axis] = 0; - readIndex -= src.sizes[axis] * srcStrides[axis]; - writeIndex -= dst.sizes[axis] * dstStrides[axis]; - } - } -} - /// Prints GFLOPS rating. extern "C" void print_flops(double flops) { fprintf(stderr, "%lf GFLOPS\n", flops / 1.0E9); From e5d8cfb2f134fcf0235ec1a35eec875a9cd36b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Mon, 28 Jun 2021 16:10:02 +0200 Subject: [PATCH 051/619] [Orc][examples] Temporarily disable LLJITWithRemoteDebugging build and test The underlying TargetProcessControl API changes with D104694. Once it landed I can patch and re-enable the example. --- .../OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt | 5 ++++- llvm/test/Examples/lit.local.cfg | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt index 558297b9e0b00..06dfe88a898d5 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt +++ b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/CMakeLists.txt @@ -10,7 +10,7 @@ set(LLVM_LINK_COMPONENTS nativecodegen ) -if ( LLVM_INCLUDE_UTILS ) +if (LLVM_INCLUDE_UTILS AND NOT LLVM_INCLUDE_UTILS) add_llvm_example(LLJITWithRemoteDebugging LLJITWithRemoteDebugging.cpp RemoteJITUtils.cpp @@ -18,4 +18,7 @@ if ( LLVM_INCLUDE_UTILS ) DEPENDS llvm-jitlink-executor ) +else() + # Use a temporary no-op target until D104694 lands. + add_custom_target(LLJITWithRemoteDebugging) endif() diff --git a/llvm/test/Examples/lit.local.cfg b/llvm/test/Examples/lit.local.cfg index a9f3860333603..f23a918956ba7 100644 --- a/llvm/test/Examples/lit.local.cfg +++ b/llvm/test/Examples/lit.local.cfg @@ -1,5 +1,7 @@ -if not config.build_examples or sys.platform in ['win32']: - config.unsupported = True +#if not config.build_examples or sys.platform in ['win32']: + +# Mark both lljit-with-* tests unsupported until D104694 lands. +config.unsupported = True # Test discovery should ignore subdirectories that contain test inputs. config.excludes = ['Inputs'] From 35c0ab72fc20fcd47adda07f738338733d2c49e3 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Fri, 25 Jun 2021 23:23:14 -0400 Subject: [PATCH 052/619] [MLIR] Simplify select to a not Given a select that returns the logical negation of the condition, replace it with a not of the condition. Differential Revision: https://reviews.llvm.org/D104966 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 1 + mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 31 +++++++++++++++++++ mlir/test/Dialect/Standard/canonicalize.mlir | 12 +++++++ 3 files changed, 44 insertions(+) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 48a539a387bd7..ebb7c37703c1e 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1454,6 +1454,7 @@ def SelectOp : Std_Op<"select", [NoSideEffect, Value getFalseValue() { return false_value(); } }]; + let hasCanonicalizer = 1; let hasFolder = 1; } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 837986fc03535..f6abfc4060d0a 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1496,6 +1496,37 @@ static LogicalResult verify(ReturnOp op) { // SelectOp //===----------------------------------------------------------------------===// +// Transforms a select to a not, where relevant. +// +// select %arg, %false, %true +// +// becomes +// +// xor %arg, %true +struct SelectToNot : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(SelectOp op, + PatternRewriter &rewriter) const override { + if (!matchPattern(op.getTrueValue(), m_Zero())) + return failure(); + + if (!matchPattern(op.getFalseValue(), m_One())) + return failure(); + + if (!op.getType().isInteger(1)) + return failure(); + + rewriter.replaceOpWithNewOp(op, op.condition(), op.getFalseValue()); + return success(); + } +}; + +void SelectOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + OpFoldResult SelectOp::fold(ArrayRef operands) { auto trueVal = getTrueValue(); auto falseVal = getFalseValue(); diff --git a/mlir/test/Dialect/Standard/canonicalize.mlir b/mlir/test/Dialect/Standard/canonicalize.mlir index f3b9bdf9c54f2..d2ef830537f9f 100644 --- a/mlir/test/Dialect/Standard/canonicalize.mlir +++ b/mlir/test/Dialect/Standard/canonicalize.mlir @@ -319,3 +319,15 @@ func @branchCondProp(%arg0: i1) { ^exit: return } + +// ----- + +// CHECK-LABEL: @selToNot +// CHECK: %[[trueval:.+]] = constant true +// CHECK: %{{.+}} = xor %arg0, %[[trueval]] : i1 +func @selToNot(%arg0: i1) -> i1 { + %true = constant true + %false = constant false + %res = select %arg0, %false, %true : i1 + return %res : i1 +} From d6cb0143cccedaaddcfa274b25e9696dd1de5ab1 Mon Sep 17 00:00:00 2001 From: naromero77 Date: Fri, 25 Jun 2021 18:54:13 -0500 Subject: [PATCH 053/619] [flang][docs] Minor update to Fortran LLVM Tests-suite docs. Updated Fortran LLVM Test-suite docs to reflect latest changes. Reviewed By: Meinersbur, xgupta Differential Revision: https://reviews.llvm.org/D104961 --- flang/docs/FortranLLVMTestSuite.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/flang/docs/FortranLLVMTestSuite.md b/flang/docs/FortranLLVMTestSuite.md index ee7ce42928103..46a8fe4f63424 100644 --- a/flang/docs/FortranLLVMTestSuite.md +++ b/flang/docs/FortranLLVMTestSuite.md @@ -21,18 +21,18 @@ code-generation capabilities. Fortran support can be enabled by setting the following CMake variables: ``` -% cmake -DCMAKE_Fortran_COMPILER= \ - -DTEST_SUITE_FORTRAN:STRING=ON \ - -C../test-suite/cmake/caches/O3.cmake \ - ../test-suite +cmake -G "Ninja" -DCMAKE_C_COMPILER= \ + -DCMAKE_CXX_COMPILER= \ + -DCMAKE_Fortran_COMPILER= \ + -DTEST_SUITE_COLLECT_CODE_SIZE:STRING=OFF \ + -DTEST_SUITE_SUBDIRS:STRING="Fortran" \ + -DTEST_SUITE_FORTRAN:STRING=ON .. ``` -At the moment, there is only a "hello world" Fortran test. A current -shortcoming in the design of the test suite is that building the C/C++ -tests is conflated with building and running the Fortran tests, -i.e. it is not possible to only build and run the Fortran tests with -the exception of the [External -tests](https://llvm.org/docs/TestSuiteGuide.html#external-suites). +This will configure the test-suite to run only the Fortran tests which +are found in the Fortran subdirectory. To run the C/C++ tests +alongside the Fortran tests omit the `-DTEST_SUITE_SUBDIRS` CMake +variable. ## Running the SPEC CPU 2017 From ef78325c1033ae315bdeeb5dab31d906c8e58d97 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 28 Jun 2021 15:46:56 +0100 Subject: [PATCH 054/619] [BasicAA] Add test to cover GetIndexDifference change in D99424. Precommit test case for a change to GetIndexDifference in D99424. --- llvm/test/Analysis/BasicAA/gep-modulo.ll | 27 ++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/llvm/test/Analysis/BasicAA/gep-modulo.ll b/llvm/test/Analysis/BasicAA/gep-modulo.ll index acdbaefefd71c..79782fad44872 100644 --- a/llvm/test/Analysis/BasicAA/gep-modulo.ll +++ b/llvm/test/Analysis/BasicAA/gep-modulo.ll @@ -192,7 +192,7 @@ define void @may_overflow_i32_sext([16 x i8]* %ptr, i32 %idx) { ; CHECK-LABEL: Function: may_overflow_i32_sext: 3 pointers, 0 call sites ; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.idx ; CHECK-NEXT: PartialAlias (off 3): [16 x i8]* %ptr, i8* %gep.3 -; CHECK-NEXT: MayAlias: i8* %gep.3, i8* %gep.idx +; CHECK-NEXT: MayAlias: i8* %gep.3, i8* %gep.idx ; %mul = mul i32 %idx, 678152731 %sub = sub i32 %mul, 1582356375 @@ -208,7 +208,7 @@ define void @nuw_nsw_i32_sext([16 x i8]* %ptr, i32 %idx) { ; CHECK-LABEL: Function: nuw_nsw_i32_sext: 3 pointers, 0 call sites ; CHECK-NEXT: NoAlias: [16 x i8]* %ptr, i8* %gep.idx ; CHECK-NEXT: PartialAlias (off 3): [16 x i8]* %ptr, i8* %gep.3 -; CHECK-NEXT: NoAlias: i8* %gep.3, i8* %gep.idx +; CHECK-NEXT: NoAlias: i8* %gep.3, i8* %gep.idx ; %mul = mul nuw nsw i32 %idx, 678152731 %sub = sub nuw nsw i32 %mul, 1582356375 @@ -225,7 +225,7 @@ define void @may_overflow_i32_zext([16 x i8]* %ptr, i32 %idx) { ; CHECK-LABEL: Function: may_overflow_i32_zext: 3 pointers, 0 call sites ; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.idx ; CHECK-NEXT: PartialAlias (off 3): [16 x i8]* %ptr, i8* %gep.3 -; CHECK-NEXT: MayAlias: i8* %gep.3, i8* %gep.idx +; CHECK-NEXT: MayAlias: i8* %gep.3, i8* %gep.idx ; %mul = mul i32 %idx, 678152731 %sub = sub i32 %mul, 1582356375 @@ -241,7 +241,7 @@ define void @nuw_nsw_i32_zext([16 x i8]* %ptr, i32 %idx) { ; CHECK-LABEL: Function: nuw_nsw_i32_zext: 3 pointers, 0 call sites ; CHECK-NEXT: NoAlias: [16 x i8]* %ptr, i8* %gep.idx ; CHECK-NEXT: PartialAlias (off 3): [16 x i8]* %ptr, i8* %gep.3 -; CHECK-NEXT: NoAlias: i8* %gep.3, i8* %gep.idx +; CHECK-NEXT: NoAlias: i8* %gep.3, i8* %gep.idx ; %mul = mul nuw nsw i32 %idx, 678152731 %sub = sub nuw nsw i32 %mul, 1582356375 @@ -252,3 +252,22 @@ define void @nuw_nsw_i32_zext([16 x i8]* %ptr, i32 %idx) { store i8 1, i8* %gep.3, align 1 ret void } + +; %mul.1 and %sub.2 are equal, if %idx = 9, because %mul.1 overflows. Hence +; %gep.mul.1 and %gep.sub.2 may alias. +define void @may_overflow_pointer_diff([16 x i8]* %ptr, i64 %idx) { +; CHECK-LABEL: Function: may_overflow_pointer_diff: 3 pointers, 0 call sites +; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.mul.1 +; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.sub.2 +; CHECK-NEXT: NoAlias: i8* %gep.mul.1, i8* %gep.sub.2 +; + %mul.1 = mul i64 %idx, 6148914691236517207 + %gep.mul.1 = getelementptr [16 x i8], [16 x i8]* %ptr, i32 0, i64 %mul.1 + store i8 1, i8* %gep.mul.1, align 1 + %mul.2 = mul nsw i64 %idx, 3 + %sub.2 = sub nsw i64 %mul.2, 12 + %gep.sub.2= getelementptr [16 x i8], [16 x i8]* %ptr, i32 0, i64 %sub.2 + store i8 0, i8* %gep.sub.2, align 1 + + ret void +} From 558d9e82283e6b053b18091de6e56a5e208b7036 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 23 Jun 2021 11:15:24 +0000 Subject: [PATCH 055/619] [llvm][ARM] Treat xscale arch as an alias of armv5te Previously xscale was known to everything apart from the ELF streamer so we would crash as soon as you tried to output an object file. Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D104776 --- .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 1 + llvm/test/MC/ARM/directive-arch-xscale.s | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 llvm/test/MC/ARM/directive-arch-xscale.s diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 5d3342a887d63..e294748e5ce7b 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -834,6 +834,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { case ARM::ArchKind::ARMV4T: case ARM::ArchKind::ARMV5T: + case ARM::ArchKind::XSCALE: case ARM::ArchKind::ARMV5TE: case ARM::ArchKind::ARMV6: setAttributeItem(ARM_ISA_use, Allowed, false); diff --git a/llvm/test/MC/ARM/directive-arch-xscale.s b/llvm/test/MC/ARM/directive-arch-xscale.s new file mode 100644 index 0000000000000..391b47801857c --- /dev/null +++ b/llvm/test/MC/ARM/directive-arch-xscale.s @@ -0,0 +1,34 @@ +@@ Test the .arch directive for xscale + +@@ This test case will check the default .ARM.attributes value for the +@@ xscale architecture. + +@ RUN: llvm-mc -triple arm-eabi -filetype asm %s \ +@ RUN: | FileCheck %s -check-prefix CHECK-ASM +@ RUN: llvm-mc -triple arm-eabi -filetype obj %s \ +@ RUN: | llvm-readobj --arch-specific - | FileCheck %s -check-prefix CHECK-ATTR + + .syntax unified + .arch xscale + +@ CHECK-ASM: .arch xscale + +@ CHECK-ATTR: FileAttributes { +@ CHECK-ATTR: Attribute { +@ CHECK-ATTR: TagName: CPU_name +@ CHECK-ATTR: Value: xscale +@ CHECK-ATTR: } +@ CHECK-ATTR: Attribute { +@ CHECK-ATTR: TagName: CPU_arch +@ CHECK-ATTR: Description: ARM v5TE +@ CHECK-ATTR: } +@ CHECK-ATTR: Attribute { +@ CHECK-ATTR: TagName: ARM_ISA_use +@ CHECK-ATTR: Description: Permitted +@ CHECK-ATTR: } +@ CHECK-ATTR: Attribute { +@ CHECK-ATTR: TagName: THUMB_ISA_use +@ CHECK-ATTR: Description: Thumb-1 +@ CHECK-ATTR: } +@ CHECK-ATTR: } + From 540b4a5fb31086b6d40735e96e6ec497022107e7 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 28 Jun 2021 11:16:11 -0400 Subject: [PATCH 056/619] Revert "[DebugInfo] Enable variadic debug value salvaging" This reverts commit adace79652174d126be290cab42b3122569fe15d. Still breaks things, see comment on https://reviews.llvm.org/D91722 --- llvm/lib/Transforms/Utils/Local.cpp | 6 +++++- llvm/test/DebugInfo/NVPTX/debug-info.ll | 10 ++++------ llvm/test/DebugInfo/salvage-gep.ll | 1 + llvm/test/DebugInfo/salvage-nonconst-binop.ll | 1 + 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 8af22af83d45a..cfbc24c0001fa 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1748,7 +1748,11 @@ void llvm::salvageDebugInfoForDbgValues( } else if (isa(DII) && DII->getNumVariableLocationOps() + AdditionalValues.size() <= MaxDebugArgs) { - DII->addVariableLocationOps(AdditionalValues, SalvagedExpr); + // TODO: Uncomment the line below and delete the two beneath it to enable + // salvaging of dbg.values with multiple location operands. + // DII->addVariableLocationOps(AdditionalValues, SalvagedExpr); + Value *Undef = UndefValue::get(I.getOperand(0)->getType()); + DII->replaceVariableLocationOp(I.getOperand(0), Undef); } else { // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is // currently only valid for stack value expressions. diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 15ea41e8ebdd4..08a7e037ec490 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -702,12 +702,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT:.b32 10034 // Length of Unit +; CHECK-NEXT:.b32 10029 // Length of Unit ; CHECK-NEXT:.b8 2 // DWARF version number ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT:.b8 8 // Address Size (in bytes) -; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x272b DW_TAG_compile_unit +; CHECK-NEXT:.b8 1 // Abbrev [1] 0xb:0x2726 DW_TAG_compile_unit ; CHECK-NEXT:.b8 0 // DW_AT_producer ; CHECK-NEXT:.b8 4 // DW_AT_language ; CHECK-NEXT:.b8 0 @@ -8306,7 +8306,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 3 // DW_AT_decl_line ; CHECK-NEXT:.b32 3345 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 40 // Abbrev [40] 0x2671:0xc4 DW_TAG_subprogram +; CHECK-NEXT:.b8 40 // Abbrev [40] 0x2671:0xbf DW_TAG_subprogram ; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -8386,7 +8386,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column -; CHECK-NEXT:.b8 43 // Abbrev [43] 0x2711:0x23 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b8 43 // Abbrev [43] 0x2711:0x1e DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 9791 // DW_AT_abstract_origin ; CHECK-NEXT:.b64 Ltmp9 // DW_AT_low_pc ; CHECK-NEXT:.b64 Ltmp10 // DW_AT_high_pc @@ -8395,8 +8395,6 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 5 // DW_AT_call_column ; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2729:0x5 DW_TAG_formal_parameter ; CHECK-NEXT:.b32 9820 // DW_AT_abstract_origin -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x272e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 9829 // DW_AT_abstract_origin ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark diff --git a/llvm/test/DebugInfo/salvage-gep.ll b/llvm/test/DebugInfo/salvage-gep.ll index 6c31b0ff61de6..dfb3b5311083b 100644 --- a/llvm/test/DebugInfo/salvage-gep.ll +++ b/llvm/test/DebugInfo/salvage-gep.ll @@ -1,3 +1,4 @@ +; XFAIL: * ; RUN: opt %s -dce -S | FileCheck %s ; Tests the salvaging of GEP instructions, specifically struct indexing and diff --git a/llvm/test/DebugInfo/salvage-nonconst-binop.ll b/llvm/test/DebugInfo/salvage-nonconst-binop.ll index b470bc1ad2a91..f9ee1a6e253c9 100644 --- a/llvm/test/DebugInfo/salvage-nonconst-binop.ll +++ b/llvm/test/DebugInfo/salvage-nonconst-binop.ll @@ -1,3 +1,4 @@ +; XFAIL: * ; RUN: opt %s -dce -S | FileCheck %s ; Tests the salvaging of binary operators that use more than one non-constant From d5402a2fee5d860e20378f819e200865af3a6113 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 28 Jun 2021 11:32:39 -0400 Subject: [PATCH 057/619] Revert "[Analyzer][solver] Add dump methods for (dis)equality classes." This reverts commit 6f3b775c3e9c685f74ecbe2ce1a94af52cc17c2f. Test fails flakily, see comments on https://reviews.llvm.org/D103967 Also revert follow-up "[Analyzer] Attempt to fix windows bots test failure b/c of new-line" This reverts commit fe0e861a4d9946a3e7de1bc95a3ec12fa602b492. --- .../Core/RangeConstraintManager.cpp | 138 ------------------ .../expr-inspection-printState-diseq-info.c | 35 ----- .../expr-inspection-printState-eq-classes.c | 22 --- clang/test/Analysis/expr-inspection.c | 2 - 4 files changed, 197 deletions(-) delete mode 100644 clang/test/Analysis/expr-inspection-printState-diseq-info.c delete mode 100644 clang/test/Analysis/expr-inspection-printState-eq-classes.c diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index c3d8a0a87635d..0e57a1a5040fc 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -592,11 +592,6 @@ class EquivalenceClass : public llvm::FoldingSetNode { RangeSet::Factory &F, ProgramStateRef State); - void dumpToStream(ProgramStateRef State, raw_ostream &os) const; - LLVM_DUMP_METHOD void dump(ProgramStateRef State) const { - dumpToStream(State, llvm::errs()); - } - /// Check equivalence data for consistency. LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED static bool isClassDataConsistent(ProgramStateRef State); @@ -1410,17 +1405,6 @@ class RangeConstraintManager : public RangedConstraintManager { void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n", unsigned int Space = 0, bool IsDot = false) const override; - void printConstraints(raw_ostream &Out, ProgramStateRef State, - const char *NL = "\n", unsigned int Space = 0, - bool IsDot = false) const; - void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State, - const char *NL = "\n", unsigned int Space = 0, - bool IsDot = false) const; - void printEquivalenceClass(raw_ostream &Out, ProgramStateRef State, - EquivalenceClass Class) const; - void printDisequalities(raw_ostream &Out, ProgramStateRef State, - const char *NL = "\n", unsigned int Space = 0, - bool IsDot = false) const; //===------------------------------------------------------------------===// // Implementation for interface from RangedConstraintManager. @@ -1644,15 +1628,6 @@ ConstraintMap ento::getConstraintMap(ProgramStateRef State) { // EqualityClass implementation details //===----------------------------------------------------------------------===// -LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State, - raw_ostream &os) const { - SymbolSet ClassMembers = getClassMembers(State); - for (const SymbolRef &MemberSym : ClassMembers) { - MemberSym->dump(); - os << "\n"; - } -} - inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State, SymbolRef Sym) { assert(State && "State should not be null"); @@ -2493,16 +2468,6 @@ ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange( void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State, const char *NL, unsigned int Space, bool IsDot) const { - printConstraints(Out, State, NL, Space, IsDot); - printEquivalenceClasses(Out, State, NL, Space, IsDot); - printDisequalities(Out, State, NL, Space, IsDot); -} - -void RangeConstraintManager::printConstraints(raw_ostream &Out, - ProgramStateRef State, - const char *NL, - unsigned int Space, - bool IsDot) const { ConstraintRangeTy Constraints = State->get(); Indent(Out, Space, IsDot) << "\"constraints\": "; @@ -2536,106 +2501,3 @@ void RangeConstraintManager::printConstraints(raw_ostream &Out, --Space; Indent(Out, Space, IsDot) << "]," << NL; } - -void RangeConstraintManager::printEquivalenceClass( - raw_ostream &Out, ProgramStateRef State, EquivalenceClass Class) const { - bool FirstMember = true; - SymbolSet ClassMembers = Class.getClassMembers(State); - Out << "[ "; - for (SymbolRef ClassMember : ClassMembers) { - if (FirstMember) - FirstMember = false; - else - Out << ", "; - Out << "\"" << ClassMember << "\""; - } - Out << " ]"; -} - -void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out, - ProgramStateRef State, - const char *NL, - unsigned int Space, - bool IsDot) const { - ClassMembersTy Members = State->get(); - - Indent(Out, Space, IsDot) << "\"equivalence_classes\": "; - if (Members.isEmpty()) { - Out << "null," << NL; - return; - } - - ++Space; - Out << '[' << NL; - bool FirstClass = true; - for (std::pair ClassToSymbolSet : Members) { - EquivalenceClass Class = ClassToSymbolSet.first; - - if (FirstClass) { - FirstClass = false; - } else { - Out << ','; - Out << NL; - } - Indent(Out, Space, IsDot); - printEquivalenceClass(Out, State, Class); - } - Out << NL; - - --Space; - Indent(Out, Space, IsDot) << "]," << NL; -} - -void RangeConstraintManager::printDisequalities(raw_ostream &Out, - ProgramStateRef State, - const char *NL, - unsigned int Space, - bool IsDot) const { - DisequalityMapTy Disequalities = State->get(); - - Indent(Out, Space, IsDot) << "\"disequality_info\": "; - if (Disequalities.isEmpty()) { - Out << "null," << NL; - return; - } - - ++Space; - Out << '[' << NL; - bool FirstClass = true; - for (std::pair ClassToDisEqSet : Disequalities) { - EquivalenceClass Class = ClassToDisEqSet.first; - if (FirstClass) { - FirstClass = false; - } else { - Out << ','; - Out << NL; - } - Indent(Out, Space, IsDot) << "{" << NL; - unsigned int DisEqSpace = Space + 1; - Indent(Out, DisEqSpace, IsDot) << "\"class\": "; - printEquivalenceClass(Out, State, Class); - ClassSet DisequalClasses = ClassToDisEqSet.second; - if (!DisequalClasses.isEmpty()) { - Out << "," << NL; - Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL; - unsigned int DisEqClassSpace = DisEqSpace + 1; - Indent(Out, DisEqClassSpace, IsDot); - bool FirstDisEqClass = true; - for (EquivalenceClass DisEqClass : DisequalClasses) { - if (FirstDisEqClass) { - FirstDisEqClass = false; - } else { - Out << ',' << NL; - Indent(Out, DisEqClassSpace, IsDot); - } - printEquivalenceClass(Out, State, DisEqClass); - } - Out << "]" << NL; - } - Indent(Out, Space, IsDot) << "}"; - } - Out << NL; - - --Space; - Indent(Out, Space, IsDot) << "]," << NL; -} diff --git a/clang/test/Analysis/expr-inspection-printState-diseq-info.c b/clang/test/Analysis/expr-inspection-printState-diseq-info.c deleted file mode 100644 index 3f11ed61af5e7..0000000000000 --- a/clang/test/Analysis/expr-inspection-printState-diseq-info.c +++ /dev/null @@ -1,35 +0,0 @@ -// RUN: %clang_analyze_cc1 \ -// RUN: -analyzer-checker=debug.ExprInspection %s 2>&1 | FileCheck %s -// UNSUPPORTED: windows - -void clang_analyzer_printState(); - -void test_disequality_info(int e0, int b0, int b1, int c0) { - int e1 = e0 - b0; - if (b0 == 2) { - int e2 = e1 - b1; - if (e2 > 0) { - if (b1 != c0) - clang_analyzer_printState(); - } - } -} - -// CHECK: "disequality_info": [ -// CHECK-NEXT: { -// CHECK-NEXT: "class": [ "reg_$2" ], -// CHECK-NEXT: "disequal_to": [ -// CHECK-NEXT: [ "(reg_$0) - 2" ], -// CHECK-NEXT: [ "reg_$3" ]] -// CHECK-NEXT: }, -// CHECK-NEXT: { -// CHECK-NEXT: "class": [ "(reg_$0) - 2" ], -// CHECK-NEXT: "disequal_to": [ -// CHECK-NEXT: [ "reg_$2" ]] -// CHECK-NEXT: }, -// CHECK-NEXT: { -// CHECK-NEXT: "class": [ "reg_$3" ], -// CHECK-NEXT: "disequal_to": [ -// CHECK-NEXT: [ "reg_$2" ]] -// CHECK-NEXT: } -// CHECK-NEXT: ], diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c deleted file mode 100644 index 7daa8648c2494..0000000000000 --- a/clang/test/Analysis/expr-inspection-printState-eq-classes.c +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_analyze_cc1 \ -// RUN: -analyzer-checker=debug.ExprInspection %s 2>&1 | FileCheck %s -// UNSUPPORTED: windows - -void clang_analyzer_printState(); - -void test_equivalence_classes(int a, int b, int c, int d) { - if (a + b != c) - return; - if (a != d) - return; - if (b != 0) - return; - clang_analyzer_printState(); - (void)(a * b * c * d); - return; -} - -// CHECK: "equivalence_classes": [ -// CHECK-NEXT: [ "reg_$0", "(reg_$0) + (reg_$1)", "reg_$2", "reg_$3" ], -// CHECK-NEXT: [ "((reg_$0) + (reg_$1)) != (reg_$2)", "(reg_$0) != (reg_$2)" ] -// CHECK-NEXT: ], diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c index 76118a76e71ca..283fa9bdb724a 100644 --- a/clang/test/Analysis/expr-inspection.c +++ b/clang/test/Analysis/expr-inspection.c @@ -38,8 +38,6 @@ void foo(int x) { // CHECK-NEXT: "constraints": [ // CHECK-NEXT: { "symbol": "reg_$0", "range": "{ [-2147483648, 13] }" } // CHECK-NEXT: ], -// CHECK-NEXT: "equivalence_classes": null, -// CHECK-NEXT: "disequality_info": null, // CHECK-NEXT: "dynamic_types": null, // CHECK-NEXT: "dynamic_casts": null, // CHECK-NEXT: "constructing_objects": null, From 75cacc6775ad8fc3d89c89ff77fc4a3b7de32111 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 28 Jun 2021 16:44:39 +0100 Subject: [PATCH 058/619] [AMDGPU] Use opName instead of PseudoName in VOP2 multiclasses. NFC. This is just for consistency with all other instruction multiclasses that pass around pseudo names as arguments. --- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 6ad23bc7497a2..6c58769316cbf 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1278,20 +1278,20 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { VOP2_Real(NAME), SIEncodingFamily.SI>, VOP2_MADKe(NAME).Pfl>; } - multiclass VOP2_Real_e32_gfx6_gfx7 op, string PseudoName = NAME> { + multiclass VOP2_Real_e32_gfx6_gfx7 op, string opName = NAME> { def _e32_gfx6_gfx7 : - VOP2_Real(PseudoName#"_e32"), SIEncodingFamily.SI>, - VOP2e(PseudoName#"_e32").Pfl>; + VOP2_Real(opName#"_e32"), SIEncodingFamily.SI>, + VOP2e(opName#"_e32").Pfl>; } - multiclass VOP2_Real_e64_gfx6_gfx7 op, string PseudoName = NAME> { + multiclass VOP2_Real_e64_gfx6_gfx7 op, string opName = NAME> { def _e64_gfx6_gfx7 : - VOP3_Real(PseudoName#"_e64"), SIEncodingFamily.SI>, - VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast(PseudoName#"_e64").Pfl>; + VOP3_Real(opName#"_e64"), SIEncodingFamily.SI>, + VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast(opName#"_e64").Pfl>; } - multiclass VOP2be_Real_e64_gfx6_gfx7 op, string PseudoName = NAME> { + multiclass VOP2be_Real_e64_gfx6_gfx7 op, string opName = NAME> { def _e64_gfx6_gfx7 : - VOP3_Real(PseudoName#"_e64"), SIEncodingFamily.SI>, - VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast(PseudoName#"_e64").Pfl>; + VOP3_Real(opName#"_e64"), SIEncodingFamily.SI>, + VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast(opName#"_e64").Pfl>; } } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" @@ -1308,16 +1308,16 @@ multiclass VOP2be_Real_gfx6_gfx7 op> : VOP2_Real_e32_gfx6_gfx7, VOP2be_Real_e64_gfx6_gfx7; multiclass VOP2be_Real_gfx6_gfx7_with_name op, - string PseudoName, string asmName> { - defvar ps32 = !cast(PseudoName#"_e32"); - defvar ps64 = !cast(PseudoName#"_e64"); + string opName, string asmName> { + defvar ps32 = !cast(opName#"_e32"); + defvar ps64 = !cast(opName#"_e64"); let AsmString = asmName # ps32.AsmOperands in { - defm "" : VOP2_Real_e32_gfx6_gfx7; + defm "" : VOP2_Real_e32_gfx6_gfx7; } let AsmString = asmName # ps64.AsmOperands in { - defm "" : VOP2be_Real_e64_gfx6_gfx7; + defm "" : VOP2be_Real_e64_gfx6_gfx7; } } From cccc7e5aa8088b3b721e1f430c47d199575fae9b Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Fri, 25 Jun 2021 16:16:23 -0400 Subject: [PATCH 059/619] [MLIR] Don't remove memref allocation if stored into another allocation A canonicalization accidentally will remove a memref allocation if it is only stored into. However, this is incorrect if the allocation is the value being stored, not the allocation being stored into. Differential Revision: https://reviews.llvm.org/D104947 --- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 6 ++++-- mlir/test/Dialect/MemRef/canonicalize.mlir | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index cc4e7a49363a5..6f358d834beed 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -174,8 +174,10 @@ struct SimplifyDeadAlloc : public OpRewritePattern { LogicalResult matchAndRewrite(T alloc, PatternRewriter &rewriter) const override { - if (llvm::any_of(alloc->getUsers(), [](Operation *op) { - return !isa(op); + if (llvm::any_of(alloc->getUsers(), [&](Operation *op) { + if (auto storeOp = dyn_cast(op)) + return storeOp.value() == alloc; + return !isa(op); })) return failure(); diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir index cbf2126a9ea2f..c59d1d30f7ec1 100644 --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -420,3 +420,13 @@ func @alloc_const_fold_with_symbols2() -> memref { %0 = memref.alloc(%c1)[%c1, %c1] : memref return %0 : memref } + +// ----- +// CHECK-LABEL: func @allocator +// CHECK: %[[alloc:.+]] = memref.alloc +// CHECK: memref.store %[[alloc:.+]], %arg0 +func @allocator(%arg0 : memref>, %arg1 : index) { + %0 = memref.alloc(%arg1) : memref + memref.store %0, %arg0[] : memref> + return +} From c3fe847f9d90de5a6a76fd1d5f5823ab4719accc Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Fri, 11 Jun 2021 05:39:28 -0500 Subject: [PATCH 060/619] [Clang] Add option to handle behaviour of vector bool/vector pixel. Added the option `-altivec-src-compat=[mixed,gcc,xl]`. The default at this time is `mixed`. The default behavior for clang is for all vector compares to return a scalar unless the vectors being compared are vector bool or vector pixel. In that case the compare returns a vector. With the gcc case all vector compares return vectors and in the xl case all vector compares return scalars. This patch does not change the default behavior of clang. This option will be used in future patches to implement behaviour compatibility for the vector bool/pixel types. Reviewed By: bmahjour Differential Revision: https://reviews.llvm.org/D103615 --- .../clang/Basic/DiagnosticSemaKinds.td | 6 + clang/include/clang/Basic/LangOptions.def | 2 + clang/include/clang/Basic/LangOptions.h | 12 ++ clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/Clang.cpp | 1 + clang/lib/Sema/SemaExpr.cpp | 29 ++- .../vector-compat-pixel-bool-ternary.c | 104 ++++++++++ clang/test/CodeGen/vector-compat-pixel-bool.c | 94 +++++++++ clang/test/CodeGen/vector-compat-ternary.c | 180 ++++++++++++++++++ clang/test/CodeGen/vector-compat.c | 162 ++++++++++++++++ 10 files changed, 597 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGen/vector-compat-pixel-bool-ternary.c create mode 100644 clang/test/CodeGen/vector-compat-pixel-bool.c create mode 100644 clang/test/CodeGen/vector-compat-ternary.c create mode 100644 clang/test/CodeGen/vector-compat.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index b5b8bc6aa3c57..70a22fd2506a3 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7441,6 +7441,12 @@ def warn_deprecated_volatile_structured_binding : Warning< "volatile qualifier in structured binding declaration is deprecated">, InGroup; +def warn_deprecated_altivec_src_compat : Warning< + "Current handling of vector bool and vector pixel types in this context are " + "deprecated. The default behaviour will soon change to that implied by the " + "'-altivec-compat=xl' option">, + InGroup>; + def err_catch_incomplete_ptr : Error< "cannot catch pointer to incomplete type %0">; def err_catch_incomplete_ref : Error< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index b6d9160f89a00..465bad8d7d112 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -126,6 +126,8 @@ LANGOPT(WritableStrings , 1, 0, "writable string support") LANGOPT(ConstStrings , 1, 0, "const-qualified string support") ENUM_LANGOPT(LaxVectorConversions, LaxVectorConversionKind, 2, LaxVectorConversionKind::All, "lax vector conversions") +ENUM_LANGOPT(AltivecSrcCompat, AltivecSrcCompatKind, 2, + AltivecSrcCompatKind::Default, "Altivec source compatibility") LANGOPT(ConvergentFunctions, 1, 1, "Assume convergent functions") LANGOPT(AltiVec , 1, 0, "AltiVec-style vector initializers") LANGOPT(ZVector , 1, 0, "System z vector extensions") diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index d618daf3d23c2..d04ce52a550ef 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -244,6 +244,18 @@ class LangOptions : public LangOptionsBase { All, }; + enum class AltivecSrcCompatKind { + // All vector compares produce scalars except vector pixel and vector bool. + // The types vector pixel and vector bool return vector results. + Mixed, + // All vector compares produce vector results as in GCC. + GCC, + // All vector compares produce scalars as in XL. + XL, + // Default clang behaviour. + Default = Mixed, + }; + enum class SignReturnAddressScopeKind { /// No signing for any function. None, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0122afd2eeada..f1455f5461990 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3823,6 +3823,18 @@ def u : JoinedOrSeparate<["-"], "u">, Group; def v : Flag<["-"], "v">, Flags<[CC1Option, CoreOption]>, HelpText<"Show commands to run and use verbose output">, MarshallingInfoFlag>; +def altivec_src_compat : Joined<["-"], "faltivec-src-compat=">, + Flags<[CC1Option]>, Group, + HelpText<"Source-level compatibility for Altivec vectors (for PowerPC " + "targets). This includes results of vector comparison (scalar for " + "'xl', vector for 'gcc') as well as behavior when initializing with " + "a scalar (splatting for 'xl', element zero only for 'gcc'). For " + "'mixed', the compatibility is as 'gcc' for 'vector bool/vector " + "pixel' and as 'xl' for other types. Current default is 'mixed'.">, + Values<"mixed,gcc,xl">, + NormalizedValuesScope<"LangOptions::AltivecSrcCompatKind">, + NormalizedValues<["Mixed", "GCC", "XL"]>, + MarshallingInfoEnum, "Mixed">; def verify_debug_info : Flag<["--"], "verify-debug-info">, Flags<[NoXarchOption]>, HelpText<"Verify the binary representation of debug output">; def weak_l : Joined<["-"], "weak-l">, Flags<[LinkerInput]>; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a3f0ec577379e..c265e1c4e53cb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5816,6 +5816,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType))) CmdArgs.push_back("-fapple-kext"); + Args.AddLastArg(CmdArgs, options::OPT_altivec_src_compat); Args.AddLastArg(CmdArgs, options::OPT_flax_vector_conversions_EQ); Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 728d7b61d4a86..6031dff673351 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -12224,11 +12224,30 @@ QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, QualType LHSType = LHS.get()->getType(); - // If AltiVec, the comparison results in a numeric type, i.e. - // bool for C++, int for C - if (getLangOpts().AltiVec && - vType->castAs()->getVectorKind() == VectorType::AltiVecVector) - return Context.getLogicalOperationType(); + // Determine the return type of a vector compare. By default clang will return + // a scalar for all vector compares except vector bool and vector pixel. + // With the gcc compiler we will always return a vector type and with the xl + // compiler we will always return a scalar type. This switch allows choosing + // which behavior is prefered. + if (getLangOpts().AltiVec) { + switch (getLangOpts().getAltivecSrcCompat()) { + case LangOptions::AltivecSrcCompatKind::Mixed: + // If AltiVec, the comparison results in a numeric type, i.e. + // bool for C++, int for C + if (vType->castAs()->getVectorKind() == + VectorType::AltiVecVector) + return Context.getLogicalOperationType(); + else + Diag(Loc, diag::warn_deprecated_altivec_src_compat); + break; + case LangOptions::AltivecSrcCompatKind::GCC: + // For GCC we always return the vector type. + break; + case LangOptions::AltivecSrcCompatKind::XL: + return Context.getLogicalOperationType(); + break; + } + } // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and diff --git a/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c b/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c new file mode 100644 index 0000000000000..20da809602126 --- /dev/null +++ b/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c @@ -0,0 +1,104 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1| FileCheck %s --check-prefix=ERROR +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @bi8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required +int bi8(vector bool char a, vector bool char b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @bi16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required +int bi16(vector bool short a, vector bool short b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @bi32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required +int bi32(vector bool int a, vector bool int b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @bi64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required +int bi64(vector bool long long a, vector bool long long b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @VecPixel( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required +int VecPixel(vector pixel a, vector pixel b) { + return a == b ? 3 : 7; +} diff --git a/clang/test/CodeGen/vector-compat-pixel-bool.c b/clang/test/CodeGen/vector-compat-pixel-bool.c new file mode 100644 index 0000000000000..292431f5b07f6 --- /dev/null +++ b/clang/test/CodeGen/vector-compat-pixel-bool.c @@ -0,0 +1,94 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @bi8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> +// CHECK-NEXT: ret <16 x i8> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector unsigned char bi8(vector bool char a, vector bool char b) { + return a == b; +} + +// CHECK-LABEL: @bi16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector bool short bi16(vector bool short a, vector bool short b) { + return a == b; +} + +// CHECK-LABEL: @bi32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector bool int bi32(vector bool int a, vector bool int b) { + return a == b; +} + +// CHECK-LABEL: @bi64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector long long bi64(vector bool long long a, vector bool long long b) { + return a == b; +} + +// CHECK-LABEL: @VecPixel( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector pixel VecPixel(vector pixel a, vector pixel b) { + return a == b; +} diff --git a/clang/test/CodeGen/vector-compat-ternary.c b/clang/test/CodeGen/vector-compat-ternary.c new file mode 100644 index 0000000000000..9a7d9d9585131 --- /dev/null +++ b/clang/test/CodeGen/vector-compat-ternary.c @@ -0,0 +1,180 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @ui8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required +int ui8(vector unsigned char a, vector unsigned char b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @si8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required +int si8(vector signed char a, vector signed char b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @ui16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required +int ui16(vector unsigned short a, vector unsigned short b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @si16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required +int si16(vector signed short a, vector signed short b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @ui32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required +int ui32(vector unsigned int a, vector unsigned int b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @si32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required +int si32(vector signed int a, vector signed int b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @si64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required +int si64(vector long long a, vector long long b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[A:%.*]], <4 x float>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x float> [[B:%.*]], <4 x float>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpeqfp.p(i32 2, <4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required +int f32(vector float a, vector float b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: store <2 x double> [[A:%.*]], <2 x double>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x double> [[B:%.*]], <2 x double>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32 2, <2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required +int f64(vector double a, vector double b) { + return a == b ? 3 : 7; +} diff --git a/clang/test/CodeGen/vector-compat.c b/clang/test/CodeGen/vector-compat.c new file mode 100644 index 0000000000000..5f9f8d1db3570 --- /dev/null +++ b/clang/test/CodeGen/vector-compat.c @@ -0,0 +1,162 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @ui8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> +// CHECK-NEXT: ret <16 x i8> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector unsigned char ui8(vector unsigned char a, vector unsigned char b) { + return a == b; +} + +// CHECK-LABEL: @si8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> +// CHECK-NEXT: ret <16 x i8> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector signed char si8(vector signed char a, vector signed char b) { + return a == b; +} + +// CHECK-LABEL: @ui16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector unsigned short ui16(vector unsigned short a, vector unsigned short b) { + return a == b; +} + +// CHECK-LABEL: @si16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector signed short si16(vector signed short a, vector signed short b) { + return a == b; +} + +// CHECK-LABEL: @ui32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector unsigned int ui32(vector unsigned int a, vector unsigned int b) { + return a == b; +} + +// CHECK-LABEL: @si32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector signed int si32(vector signed int a, vector signed int b) { + return a == b; +} + +// CHECK-LABEL: @si64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector long long si64(vector long long a, vector long long b) { + return a == b; +} + +// CHECK-LABEL: @f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[A:%.*]], <4 x float>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x float> [[B:%.*]], <4 x float>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector int f32(vector float a, vector float b) { + return a == b; +} + +// CHECK-LABEL: @f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: store <2 x double> [[A:%.*]], <2 x double>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x double> [[B:%.*]], <2 x double>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector long long f64(vector double a, vector double b) { + return a == b; +} From e6450d88e2801373d198bd63af547dbc0a096334 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Mon, 28 Jun 2021 17:31:01 +0200 Subject: [PATCH 061/619] [mlir][llvm] Fix windows build Gate the include of alloca.h behind _WIN32 guard. Differential Revision: https://reviews.llvm.org/D105036 --- mlir/lib/ExecutionEngine/CRunnerUtils.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp index e5b682a7b6de5..4677098d64f3b 100644 --- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp @@ -15,7 +15,10 @@ #include "mlir/ExecutionEngine/CRunnerUtils.h" #ifndef _WIN32 +#include #include +#else +#include "malloc.h" #endif // _WIN32 #include From 44826ecd929bdd33b3c86650198a5f8a57965cc7 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Thu, 10 Jun 2021 15:12:04 -0400 Subject: [PATCH 062/619] [MLIR] Correct memrefdataflow behavior in the presence of cast and other operations MemRefDataFlow performs mem2reg style operations for affine load/stores. Unfortunately, it is not presently correct in the presence of external operations such as memref.cast, or function calls. This diff extends the functionality of the pass to remain correct in the presence of such ops. Differential Revision: https://reviews.llvm.org/D104053 --- .../Transforms/AffineScalarReplacement.cpp | 452 +++++++++++------- mlir/test/Dialect/Affine/scalrep.mlir | 88 ++++ 2 files changed, 359 insertions(+), 181 deletions(-) diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp index 2ab4d8fada6b0..5be0dcdaea157 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp @@ -33,27 +33,17 @@ using namespace mlir; namespace { // The store to load forwarding and load CSE rely on three conditions: // -// 1) store/load and load need to have mathematically equivalent affine access -// functions (checked after full composition of load/store operands); this -// implies that they access the same single memref element for all iterations of -// the common surrounding loop, +// 1) store/load providing a replacement value and load being replaced need to +// have mathematically equivalent affine access functions (checked after full +// composition of load/store operands); this implies that they access the same +// single memref element for all iterations of the common surrounding loop, // // 2) the store/load op should dominate the load op, // -// 3) among all op's that satisfy both (1) and (2), for store to load -// forwarding, the one that does not dominate any store op that has a -// dependence into the load, is provably the last writer to the particular -// memref location being loaded at the load op, and its store value can be -// forwarded to the load; for load CSE, any op that does not dominate any store -// op that have a dependence into the load can be forwarded and the first one -// found is chosen. Note that the only dependences that are to be considered are -// those that are satisfied at the block* of the innermost common surrounding -// loop of the being considered. -// -// (* A dependence being satisfied at a block: a dependence that is satisfied by -// virtue of the destination operation appearing textually / lexically after -// the source operation within the body of a 'affine.for' operation; thus, a -// dependence is always either satisfied by a loop or by a block). +// 3) no operation that may write to memory read by the load being replaced can +// occur after executing the instruction (load or store) providing the +// replacement value and before the load being replaced (thus potentially +// allowing overwriting the memory read by the load). // // The above conditions are simple to check, sufficient, and powerful for most // cases in practice - they are sufficient, but not necessary --- since they @@ -70,16 +60,14 @@ struct AffineScalarReplacement : public AffineScalarReplacementBase { void runOnFunction() override; - LogicalResult forwardStoreToLoad(AffineReadOpInterface loadOp); - void loadCSE(AffineReadOpInterface loadOp); - - // A list of memref's that are potentially dead / could be eliminated. - SmallPtrSet memrefsToErase; - // Load ops whose results were replaced by those forwarded from stores - // dominating stores or loads.. - SmallVector loadOpsToErase; + LogicalResult forwardStoreToLoad(AffineReadOpInterface loadOp, + SmallVectorImpl &loadOpsToErase, + SmallPtrSetImpl &memrefsToErase, + DominanceInfo &domInfo); - DominanceInfo *domInfo = nullptr; + void loadCSE(AffineReadOpInterface loadOp, + SmallVectorImpl &loadOpsToErase, + DominanceInfo &domInfo); }; } // end anonymous namespace @@ -91,61 +79,204 @@ mlir::createAffineScalarReplacementPass() { return std::make_unique(); } -// Check if the store may be reaching the load. -static bool storeMayReachLoad(Operation *storeOp, Operation *loadOp, - unsigned minSurroundingLoops) { - MemRefAccess srcAccess(storeOp); - MemRefAccess destAccess(loadOp); - FlatAffineConstraints dependenceConstraints; - unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp); - unsigned d; - // Dependences at loop depth <= minSurroundingLoops do NOT matter. - for (d = nsLoops + 1; d > minSurroundingLoops; d--) { - DependenceResult result = checkMemrefAccessDependence( - srcAccess, destAccess, d, &dependenceConstraints, - /*dependenceComponents=*/nullptr); - if (hasDependence(result)) - break; - } - if (d <= minSurroundingLoops) - return false; +/// Ensure that all operations that could be executed after `start` +/// (noninclusive) and prior to `memOp` (e.g. on a control flow/op path +/// between the operations) do not have the potential memory effect +/// `EffectType` on `memOp`. `memOp` is an operation that reads or writes to +/// a memref. For example, if `EffectType` is MemoryEffects::Write, this method +/// will check if there is no write to the memory between `start` and `memOp` +/// that would change the read within `memOp`. +template +bool hasNoInterveningEffect(Operation *start, T memOp) { + + Value memref = memOp.getMemRef(); + bool isOriginalAllocation = memref.getDefiningOp() || + memref.getDefiningOp(); + + // A boolean representing whether an intervening operation could have impacted + // memOp. + bool hasSideEffect = false; + + // Check whether the effect on memOp can be caused by a given operation op. + std::function checkOperation = [&](Operation *op) { + // If the effect has alreay been found, early exit, + if (hasSideEffect) + return; + + if (auto memEffect = dyn_cast(op)) { + SmallVector effects; + memEffect.getEffects(effects); + + bool opMayHaveEffect = false; + for (auto effect : effects) { + // If op causes EffectType on a potentially aliasing location for + // memOp, mark as having the effect. + if (isa(effect.getEffect())) { + if (isOriginalAllocation && effect.getValue() && + (effect.getValue().getDefiningOp() || + effect.getValue().getDefiningOp())) { + if (effect.getValue() != memref) + continue; + } + opMayHaveEffect = true; + break; + } + } - return true; + if (!opMayHaveEffect) + return; + + // If the side effect comes from an affine read or write, try to + // prove the side effecting `op` cannot reach `memOp`. + if (isa(op)) { + MemRefAccess srcAccess(op); + MemRefAccess destAccess(memOp); + // Dependence analysis is only correct if both ops operate on the same + // memref. + if (srcAccess.memref == destAccess.memref) { + FlatAffineConstraints dependenceConstraints; + + // Number of loops containing the start op and the ending operation. + unsigned minSurroundingLoops = + getNumCommonSurroundingLoops(*start, *memOp); + + // Number of loops containing the operation `op` which has the + // potential memory side effect and can occur on a path between + // `start` and `memOp`. + unsigned nsLoops = getNumCommonSurroundingLoops(*op, *memOp); + + // For ease, let's consider the case that `op` is a store and we're + // looking for other potential stores (e.g `op`) that overwrite memory + // after `start`, and before being read in `memOp`. In this case, we + // only need to consider other potential stores with depth > + // minSurrounding loops since `start` would overwrite any store with a + // smaller number of surrounding loops before. + unsigned d; + for (d = nsLoops + 1; d > minSurroundingLoops; d--) { + DependenceResult result = checkMemrefAccessDependence( + srcAccess, destAccess, d, &dependenceConstraints, + /*dependenceComponents=*/nullptr); + if (hasDependence(result)) { + hasSideEffect = true; + return; + } + } + + // No side effect was seen, simply return. + return; + } + } + hasSideEffect = true; + return; + } + + if (op->hasTrait()) { + // Recurse into the regions for this op and check whether the internal + // operations may have the side effect `EffectType` on memOp. + for (Region ®ion : op->getRegions()) + for (Block &block : region) + for (Operation &op : block) + checkOperation(&op); + return; + } + + // Otherwise, conservatively assume generic operations have the effect + // on the operation + hasSideEffect = true; + return; + }; + + // Check all paths from ancestor op `parent` to the operation `to` for the + // effect. It is known that `to` must be contained within `parent`. + auto until = [&](Operation *parent, Operation *to) { + // TODO check only the paths from `parent` to `to`. + // Currently we fallback and check the entire parent op, rather than + // just the paths from the parent path, stopping after reaching `to`. + // This is conservatively correct, but could be made more aggressive. + assert(parent->isAncestor(to)); + checkOperation(parent); + }; + + // Check for all paths from operation `from` to operation `untilOp` for the + // given memory effect. + std::function recur = + [&](Operation *from, Operation *untilOp) { + assert( + from->getParentRegion()->isAncestor(untilOp->getParentRegion()) && + "Checking for side effect between two operations without a common " + "ancestor"); + + // If the operations are in different regions, recursively consider all + // path from `from` to the parent of `to` and all paths from the parent + // of `to` to `to`. + if (from->getParentRegion() != untilOp->getParentRegion()) { + recur(from, untilOp->getParentOp()); + until(untilOp->getParentOp(), untilOp); + return; + } + + // Now, assuming that `from` and `to` exist in the same region, perform + // a CFG traversal to check all the relevant operations. + + // Additional blocks to consider. + SmallVector todoBlocks; + { + // First consider the parent block of `from` an check all operations + // after `from`. + for (auto iter = ++from->getIterator(), end = from->getBlock()->end(); + iter != end && &*iter != untilOp; ++iter) { + checkOperation(&*iter); + } + + // If the parent of `from` doesn't contain `to`, add the successors + // to the list of blocks to check. + if (untilOp->getBlock() != from->getBlock()) + for (Block *succ : from->getBlock()->getSuccessors()) + todoBlocks.push_back(succ); + } + + SmallPtrSet done; + // Traverse the CFG until hitting `to`. + while (todoBlocks.size()) { + Block *blk = todoBlocks.pop_back_val(); + if (done.count(blk)) + continue; + done.insert(blk); + for (auto &op : *blk) { + if (&op == untilOp) + break; + checkOperation(&op); + if (&op == blk->getTerminator()) + for (Block *succ : blk->getSuccessors()) + todoBlocks.push_back(succ); + } + } + }; + recur(start, memOp); + return !hasSideEffect; } -// This is a straightforward implementation not optimized for speed. Optimize -// if needed. -LogicalResult -AffineScalarReplacement::forwardStoreToLoad(AffineReadOpInterface loadOp) { - // First pass over the use list to get the minimum number of surrounding - // loops common between the load op and the store op, with min taken across - // all store ops. - SmallVector storeOps; - unsigned minSurroundingLoops = getNestingDepth(loadOp); +/// Attempt to eliminate loadOp by replacing it with a value stored into memory +/// which the load is guaranteed to retrieve. This check involves three +/// components: 1) The store and load must be on the same location 2) The store +/// must dominate (and therefore must always occur prior to) the load 3) No +/// other operations will overwrite the memory loaded between the given load +/// and store. If such a value exists, the replaced `loadOp` will be added to +/// `loadOpsToErase` and its memref will be added to `memrefsToErase`. +LogicalResult AffineScalarReplacement::forwardStoreToLoad( + AffineReadOpInterface loadOp, SmallVectorImpl &loadOpsToErase, + SmallPtrSetImpl &memrefsToErase, DominanceInfo &domInfo) { + + // The store op candidate for forwarding that satisfies all conditions + // to replace the load, if any. + Operation *lastWriteStoreOp = nullptr; + for (auto *user : loadOp.getMemRef().getUsers()) { auto storeOp = dyn_cast(user); if (!storeOp) continue; - unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *storeOp); - minSurroundingLoops = std::min(nsLoops, minSurroundingLoops); - storeOps.push_back(storeOp); - } - - // The list of store op candidates for forwarding that satisfy conditions - // (1) and (2) above - they will be filtered later when checking (3). - SmallVector fwdingCandidates; - - // Store ops that have a dependence into the load (even if they aren't - // forwarding candidates). Each forwarding candidate will be checked for a - // dominance on these. 'fwdingCandidates' are a subset of depSrcStores. - SmallVector depSrcStores; - - for (auto *storeOp : storeOps) { - if (!storeMayReachLoad(storeOp, loadOp, minSurroundingLoops)) - continue; - - // Stores that *may* be reaching the load. - depSrcStores.push_back(storeOp); + MemRefAccess srcAccess(storeOp); + MemRefAccess destAccess(loadOp); // 1. Check if the store and the load have mathematically equivalent // affine access functions; this implies that they statically refer to the @@ -155,41 +286,24 @@ AffineScalarReplacement::forwardStoreToLoad(AffineReadOpInterface loadOp) { // store %A[%M] // load %A[%N] // Use the AffineValueMap difference based memref access equality checking. - MemRefAccess srcAccess(storeOp); - MemRefAccess destAccess(loadOp); if (srcAccess != destAccess) continue; // 2. The store has to dominate the load op to be candidate. - if (!domInfo->dominates(storeOp, loadOp)) + if (!domInfo.dominates(storeOp, loadOp)) + continue; + + // 3. Ensure there is no intermediate operation which could replace the + // value in memory. + if (!hasNoInterveningEffect(storeOp, loadOp)) continue; // We now have a candidate for forwarding. - fwdingCandidates.push_back(storeOp); + assert(lastWriteStoreOp == nullptr && + "multiple simulataneous replacement stores"); + lastWriteStoreOp = storeOp; } - // 3. Of all the store ops that meet the above criteria, the store op - // that does not dominate any of the ops in 'depSrcStores' (if such exists) - // will not have any of those latter ops on its paths to `loadOp`. It would - // thus be the unique store providing the value to the load. This condition is - // however conservative for eg: - // - // for ... { - // store - // load - // store - // load - // } - // - Operation *lastWriteStoreOp = nullptr; - for (auto *storeOp : fwdingCandidates) { - if (llvm::all_of(depSrcStores, [&](Operation *depStore) { - return !domInfo->properlyDominates(storeOp, depStore); - })) { - lastWriteStoreOp = storeOp; - break; - } - } if (!lastWriteStoreOp) return failure(); @@ -213,109 +327,85 @@ AffineScalarReplacement::forwardStoreToLoad(AffineReadOpInterface loadOp) { // loadA will be be replaced with loadB if: // 1) loadA and loadB have mathematically equivalent affine access functions. // 2) loadB dominates loadA. -// 3) loadB does not dominate any of the store ops that have a dependence into -// loadA. -void AffineScalarReplacement::loadCSE(AffineReadOpInterface loadOp) { - // The list of load op candidates for forwarding that satisfy conditions - // (1) and (2) above - they will be filtered later when checking (3). - SmallVector fwdingCandidates; - SmallVector storeOps; - unsigned minSurroundingLoops = getNestingDepth(loadOp); - MemRefAccess memRefAccess(loadOp); - // First pass over the use list to get 1) the minimum number of surrounding - // loops common between the load op and an load op candidate, with min taken - // across all load op candidates; 2) load op candidates; 3) store ops. - // We take min across all load op candidates instead of all load ops to make - // sure later dependence check is performed at loop depths that do matter. - for (auto *user : loadOp.getMemRef().getUsers()) { - if (auto storeOp = dyn_cast(user)) { - storeOps.push_back(storeOp); - } else if (auto aLoadOp = dyn_cast(user)) { - MemRefAccess otherMemRefAccess(aLoadOp); - // No need to consider Load ops that have been replaced in previous store - // to load forwarding or loadCSE. If loadA or storeA can be forwarded to - // loadB, then loadA or storeA can be forwarded to loadC iff loadB can be - // forwarded to loadC. - // If loadB is visited before loadC and replace with loadA, we do not put - // loadB in candidates list, only loadA. If loadC is visited before loadB, - // loadC may be replaced with loadB, which will be replaced with loadA - // later. - if (aLoadOp != loadOp && !llvm::is_contained(loadOpsToErase, aLoadOp) && - memRefAccess == otherMemRefAccess && - domInfo->dominates(aLoadOp, loadOp)) { - fwdingCandidates.push_back(aLoadOp); - unsigned nsLoops = getNumCommonSurroundingLoops(*loadOp, *aLoadOp); - minSurroundingLoops = std::min(nsLoops, minSurroundingLoops); - } +// 3) There is no write between loadA and loadB. +void AffineScalarReplacement::loadCSE( + AffineReadOpInterface loadA, SmallVectorImpl &loadOpsToErase, + DominanceInfo &domInfo) { + SmallVector loadCandidates; + for (auto *user : loadA.getMemRef().getUsers()) { + auto loadB = dyn_cast(user); + if (!loadB || loadB == loadA) + continue; + + MemRefAccess srcAccess(loadB); + MemRefAccess destAccess(loadA); + + // 1. The accesses have to be to the same location. + if (srcAccess != destAccess) { + continue; } - } - // No forwarding candidate. - if (fwdingCandidates.empty()) - return; + // 2. The store has to dominate the load op to be candidate. + if (!domInfo.dominates(loadB, loadA)) + continue; - // Store ops that have a dependence into the load. - SmallVector depSrcStores; + // 3. There is no write between loadA and loadB. + if (!hasNoInterveningEffect(loadB.getOperation(), + loadA)) + continue; - for (auto *storeOp : storeOps) { - if (!storeMayReachLoad(storeOp, loadOp, minSurroundingLoops)) + // Check if two values have the same shape. This is needed for affine vector + // loads. + if (loadB.getValue().getType() != loadA.getValue().getType()) continue; - // Stores that *may* be reaching the load. - depSrcStores.push_back(storeOp); + loadCandidates.push_back(loadB); } - // 3. Of all the load op's that meet the above criteria, return the first load - // found that does not dominate any op in 'depSrcStores' and has the same - // shape as the load to be replaced (if one exists). The shape check is needed - // for affine vector loads. - Operation *firstLoadOp = nullptr; - Value oldVal = loadOp.getValue(); - for (auto *loadOp : fwdingCandidates) { - if (llvm::all_of(depSrcStores, - [&](Operation *depStore) { - return !domInfo->properlyDominates(loadOp, depStore); - }) && - cast(loadOp).getValue().getType() == - oldVal.getType()) { - firstLoadOp = loadOp; + // Of the legal load candidates, use the one that dominates all others + // to minimize the subsequent need to loadCSE + Value loadB; + for (AffineReadOpInterface option : loadCandidates) { + if (llvm::all_of(loadCandidates, [&](AffineReadOpInterface depStore) { + return depStore == option || + domInfo.dominates(option.getOperation(), + depStore.getOperation()); + })) { + loadB = option.getValue(); break; } } - if (!firstLoadOp) - return; - // Perform the actual load to load forwarding. - Value loadVal = cast(firstLoadOp).getValue(); - loadOp.getValue().replaceAllUsesWith(loadVal); - // Record this to erase later. - loadOpsToErase.push_back(loadOp); + if (loadB) { + loadA.getValue().replaceAllUsesWith(loadB); + // Record this to erase later. + loadOpsToErase.push_back(loadA); + } } void AffineScalarReplacement::runOnFunction() { // Only supports single block functions at the moment. FuncOp f = getFunction(); - if (!llvm::hasSingleElement(f)) { - markAllAnalysesPreserved(); - return; - } - domInfo = &getAnalysis(); + // Load op's whose results were replaced by those forwarded from stores. + SmallVector opsToErase; + + // A list of memref's that are potentially dead / could be eliminated. + SmallPtrSet memrefsToErase; - loadOpsToErase.clear(); - memrefsToErase.clear(); + auto &domInfo = getAnalysis(); - // Walk all load's and perform store to load forwarding and loadCSE. + // Walk all load's and perform store to load forwarding. f.walk([&](AffineReadOpInterface loadOp) { - // Do store to load forwarding first, if no success, try loadCSE. - if (failed(forwardStoreToLoad(loadOp))) - loadCSE(loadOp); + if (failed( + forwardStoreToLoad(loadOp, opsToErase, memrefsToErase, domInfo))) { + loadCSE(loadOp, opsToErase, domInfo); + } }); - // Erase all load op's whose results were replaced with store or load fwd'ed - // ones. - for (auto *loadOp : loadOpsToErase) - loadOp->erase(); + // Erase all load op's whose results were replaced with store fwd'ed ones. + for (auto *op : opsToErase) + op->erase(); // Check if the store fwd'ed memrefs are now left with only stores and can // thus be completely deleted. Note: the canonicalize pass should be able diff --git a/mlir/test/Dialect/Affine/scalrep.mlir b/mlir/test/Dialect/Affine/scalrep.mlir index 8d39fe300345c..452ff0939a185 100644 --- a/mlir/test/Dialect/Affine/scalrep.mlir +++ b/mlir/test/Dialect/Affine/scalrep.mlir @@ -554,3 +554,91 @@ func @vector_load_affine_apply_store_load(%in : memref<512xf32>, %out : memref<5 } return } + +// CHECK-LABEL: func @external_no_forward_load + +func @external_no_forward_load(%in : memref<512xf32>, %out : memref<512xf32>) { + affine.for %i = 0 to 16 { + %ld0 = affine.load %in[32*%i] : memref<512xf32> + affine.store %ld0, %out[32*%i] : memref<512xf32> + "memop"(%in, %out) : (memref<512xf32>, memref<512xf32>) -> () + %ld1 = affine.load %in[32*%i] : memref<512xf32> + affine.store %ld1, %out[32*%i] : memref<512xf32> + } + return +} +// CHECK: affine.load +// CHECK: affine.store +// CHECK: affine.load +// CHECK: affine.store + +// CHECK-LABEL: func @external_no_forward_store + +func @external_no_forward_store(%in : memref<512xf32>, %out : memref<512xf32>) { + %cf1 = constant 1.0 : f32 + affine.for %i = 0 to 16 { + affine.store %cf1, %in[32*%i] : memref<512xf32> + "memop"(%in, %out) : (memref<512xf32>, memref<512xf32>) -> () + %ld1 = affine.load %in[32*%i] : memref<512xf32> + affine.store %ld1, %out[32*%i] : memref<512xf32> + } + return +} +// CHECK: affine.store +// CHECK: affine.load +// CHECK: affine.store + +// CHECK-LABEL: func @no_forward_cast + +func @no_forward_cast(%in : memref<512xf32>, %out : memref<512xf32>) { + %cf1 = constant 1.0 : f32 + %cf2 = constant 2.0 : f32 + %m2 = memref.cast %in : memref<512xf32> to memref + affine.for %i = 0 to 16 { + affine.store %cf1, %in[32*%i] : memref<512xf32> + affine.store %cf2, %m2[32*%i] : memref + %ld1 = affine.load %in[32*%i] : memref<512xf32> + affine.store %ld1, %out[32*%i] : memref<512xf32> + } + return +} +// CHECK: affine.store +// CHECK-NEXT: affine.store +// CHECK-NEXT: affine.load +// CHECK-NEXT: affine.store + +// Although there is a dependence from the second store to the load, it is +// satisfied by the outer surrounding loop, and does not prevent the first +// store to be forwarded to the load. + +// CHECK-LABEL: func @overlap_no_fwd +func @overlap_no_fwd(%N : index) -> f32 { + %cf7 = constant 7.0 : f32 + %cf9 = constant 9.0 : f32 + %c0 = constant 0 : index + %c1 = constant 1 : index + %m = memref.alloc() : memref<10xf32> + affine.for %i0 = 0 to 5 { + affine.store %cf7, %m[2 * %i0] : memref<10xf32> + affine.for %i1 = 0 to %N { + %v0 = affine.load %m[2 * %i0] : memref<10xf32> + %v1 = addf %v0, %v0 : f32 + affine.store %cf9, %m[%i0 + 1] : memref<10xf32> + } + } + // Due to this load, the memref isn't optimized away. + %v3 = affine.load %m[%c1] : memref<10xf32> + return %v3 : f32 + +// CHECK: affine.for %{{.*}} = 0 to 5 { +// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> +// CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} { +// CHECK-NEXT: %{{.*}} = affine.load +// CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 +// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32> +// CHECK-NEXT: return %{{.*}} : f32 +} + From f32f3db9fcbf43b1a603c5da882557ebed62f1c5 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 11 Jun 2021 09:55:11 -0400 Subject: [PATCH 063/619] [libc++] Split the various iterator types out of Differential Revision: https://reviews.llvm.org/D104669 --- libcxx/include/CMakeLists.txt | 13 +- .../include/__iterator/back_insert_iterator.h | 75 ++ .../__iterator/front_insert_iterator.h | 75 ++ libcxx/include/__iterator/insert_iterator.h | 77 ++ libcxx/include/__iterator/istream_iterator.h | 104 ++ .../include/__iterator/istreambuf_iterator.h | 110 ++ libcxx/include/__iterator/iterator.h | 40 + libcxx/include/__iterator/move_iterator.h | 189 +++ libcxx/include/__iterator/ostream_iterator.h | 76 ++ .../include/__iterator/ostreambuf_iterator.h | 81 ++ libcxx/include/__iterator/reverse_iterator.h | 239 ++++ libcxx/include/__iterator/wrap_iter.h | 407 ++++++ libcxx/include/iterator | 1137 +---------------- libcxx/include/module.modulemap | 33 +- libcxx/include/regex | 1 + libcxx/include/span | 1 + libcxx/include/string | 1 + libcxx/include/vector | 1 + ...e_iterator_produces_diagnostic.verify.cpp} | 2 +- 19 files changed, 1523 insertions(+), 1139 deletions(-) create mode 100644 libcxx/include/__iterator/back_insert_iterator.h create mode 100644 libcxx/include/__iterator/front_insert_iterator.h create mode 100644 libcxx/include/__iterator/insert_iterator.h create mode 100644 libcxx/include/__iterator/istream_iterator.h create mode 100644 libcxx/include/__iterator/istreambuf_iterator.h create mode 100644 libcxx/include/__iterator/iterator.h create mode 100644 libcxx/include/__iterator/move_iterator.h create mode 100644 libcxx/include/__iterator/ostream_iterator.h create mode 100644 libcxx/include/__iterator/ostreambuf_iterator.h create mode 100644 libcxx/include/__iterator/reverse_iterator.h create mode 100644 libcxx/include/__iterator/wrap_iter.h rename libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/{reverse_iterator_produces_diagnostic.fail.cpp => reverse_iterator_produces_diagnostic.verify.cpp} (75%) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index a594b57866ab9..3b03ec6685787 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -103,25 +103,36 @@ set(files __format/format_error.h __format/format_parse_context.h __function_like.h - __functional/search.h __functional_03 __functional_base __functional_base_03 __functional/hash.h + __functional/search.h __functional/unary_function.h __functional/unwrap_ref.h __hash_table __iterator/advance.h + __iterator/back_insert_iterator.h __iterator/concepts.h __iterator/default_sentinel.h + __iterator/front_insert_iterator.h __iterator/incrementable_traits.h + __iterator/insert_iterator.h + __iterator/istream_iterator.h + __iterator/istreambuf_iterator.h __iterator/iter_move.h __iterator/iter_swap.h __iterator/iterator_traits.h + __iterator/iterator.h + __iterator/move_iterator.h __iterator/next.h + __iterator/ostream_iterator.h + __iterator/ostreambuf_iterator.h __iterator/prev.h __iterator/projected.h __iterator/readable_traits.h + __iterator/reverse_iterator.h + __iterator/wrap_iter.h __libcpp_version __locale __memory/addressof.h diff --git a/libcxx/include/__iterator/back_insert_iterator.h b/libcxx/include/__iterator/back_insert_iterator.h new file mode 100644 index 0000000000000..61ac90dfffcac --- /dev/null +++ b/libcxx/include/__iterator/back_insert_iterator.h @@ -0,0 +1,75 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_BACK_INSERT_ITERATOR_H +#define _LIBCPP___ITERATOR_BACK_INSERT_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include <__memory/addressof.h> +#include +#include // std::move + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +class _LIBCPP_TEMPLATE_VIS back_insert_iterator +#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) + : public iterator +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +protected: + _Container* container; +public: + typedef output_iterator_tag iterator_category; + typedef void value_type; +#if _LIBCPP_STD_VER > 17 + typedef ptrdiff_t difference_type; +#else + typedef void difference_type; +#endif + typedef void pointer; + typedef void reference; + typedef _Container container_type; + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit back_insert_iterator(_Container& __x) : container(_VSTD::addressof(__x)) {} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(const typename _Container::value_type& __value_) + {container->push_back(__value_); return *this;} +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(typename _Container::value_type&& __value_) + {container->push_back(_VSTD::move(__value_)); return *this;} +#endif // _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator*() {return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator++() {return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator operator++(int) {return *this;} +}; + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +back_insert_iterator<_Container> +back_inserter(_Container& __x) +{ + return back_insert_iterator<_Container>(__x); +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_BACK_INSERT_ITERATOR_H diff --git a/libcxx/include/__iterator/front_insert_iterator.h b/libcxx/include/__iterator/front_insert_iterator.h new file mode 100644 index 0000000000000..d5d86f51849cb --- /dev/null +++ b/libcxx/include/__iterator/front_insert_iterator.h @@ -0,0 +1,75 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_FRONT_INSERT_ITERATOR_H +#define _LIBCPP___ITERATOR_FRONT_INSERT_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include <__memory/addressof.h> +#include +#include // std::move + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +class _LIBCPP_TEMPLATE_VIS front_insert_iterator +#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) + : public iterator +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +protected: + _Container* container; +public: + typedef output_iterator_tag iterator_category; + typedef void value_type; +#if _LIBCPP_STD_VER > 17 + typedef ptrdiff_t difference_type; +#else + typedef void difference_type; +#endif + typedef void pointer; + typedef void reference; + typedef _Container container_type; + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit front_insert_iterator(_Container& __x) : container(_VSTD::addressof(__x)) {} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator=(const typename _Container::value_type& __value_) + {container->push_front(__value_); return *this;} +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator=(typename _Container::value_type&& __value_) + {container->push_front(_VSTD::move(__value_)); return *this;} +#endif // _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator*() {return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator++() {return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator operator++(int) {return *this;} +}; + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +front_insert_iterator<_Container> +front_inserter(_Container& __x) +{ + return front_insert_iterator<_Container>(__x); +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_FRONT_INSERT_ITERATOR_H diff --git a/libcxx/include/__iterator/insert_iterator.h b/libcxx/include/__iterator/insert_iterator.h new file mode 100644 index 0000000000000..40555a4c9d349 --- /dev/null +++ b/libcxx/include/__iterator/insert_iterator.h @@ -0,0 +1,77 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_INSERT_ITERATOR_H +#define _LIBCPP___ITERATOR_INSERT_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include <__memory/addressof.h> +#include +#include // std::move + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +class _LIBCPP_TEMPLATE_VIS insert_iterator +#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) + : public iterator +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +protected: + _Container* container; + typename _Container::iterator iter; // FIXME: `ranges::iterator_t` in C++20 mode +public: + typedef output_iterator_tag iterator_category; + typedef void value_type; +#if _LIBCPP_STD_VER > 17 + typedef ptrdiff_t difference_type; +#else + typedef void difference_type; +#endif + typedef void pointer; + typedef void reference; + typedef _Container container_type; + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator(_Container& __x, typename _Container::iterator __i) + : container(_VSTD::addressof(__x)), iter(__i) {} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(const typename _Container::value_type& __value_) + {iter = container->insert(iter, __value_); ++iter; return *this;} +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(typename _Container::value_type&& __value_) + {iter = container->insert(iter, _VSTD::move(__value_)); ++iter; return *this;} +#endif // _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator*() {return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator++() {return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator++(int) {return *this;} +}; + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +insert_iterator<_Container> +inserter(_Container& __x, typename _Container::iterator __i) +{ + return insert_iterator<_Container>(__x, __i); +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_INSERT_ITERATOR_H diff --git a/libcxx/include/__iterator/istream_iterator.h b/libcxx/include/__iterator/istream_iterator.h new file mode 100644 index 0000000000000..1dd57f0d49cfd --- /dev/null +++ b/libcxx/include/__iterator/istream_iterator.h @@ -0,0 +1,104 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_ISTREAM_ITERATOR_H +#define _LIBCPP___ITERATOR_ISTREAM_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include <__memory/addressof.h> +#include +#include // for forward declarations of char_traits and basic_istream + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template , class _Distance = ptrdiff_t> +class _LIBCPP_TEMPLATE_VIS istream_iterator +#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) + : public iterator +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +public: + typedef input_iterator_tag iterator_category; + typedef _Tp value_type; + typedef _Distance difference_type; + typedef const _Tp* pointer; + typedef const _Tp& reference; + typedef _CharT char_type; + typedef _Traits traits_type; + typedef basic_istream<_CharT,_Traits> istream_type; +private: + istream_type* __in_stream_; + _Tp __value_; +public: + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR istream_iterator() : __in_stream_(nullptr), __value_() {} + _LIBCPP_INLINE_VISIBILITY istream_iterator(istream_type& __s) : __in_stream_(_VSTD::addressof(__s)) + { + if (!(*__in_stream_ >> __value_)) + __in_stream_ = nullptr; + } + + _LIBCPP_INLINE_VISIBILITY const _Tp& operator*() const {return __value_;} + _LIBCPP_INLINE_VISIBILITY const _Tp* operator->() const {return _VSTD::addressof((operator*()));} + _LIBCPP_INLINE_VISIBILITY istream_iterator& operator++() + { + if (!(*__in_stream_ >> __value_)) + __in_stream_ = nullptr; + return *this; + } + _LIBCPP_INLINE_VISIBILITY istream_iterator operator++(int) + {istream_iterator __t(*this); ++(*this); return __t;} + + template + friend _LIBCPP_INLINE_VISIBILITY + bool + operator==(const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __x, + const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __y); + + template + friend _LIBCPP_INLINE_VISIBILITY + bool + operator==(const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __x, + const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __y); +}; + +template +inline _LIBCPP_INLINE_VISIBILITY +bool +operator==(const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __x, + const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __y) +{ + return __x.__in_stream_ == __y.__in_stream_; +} + +template +inline _LIBCPP_INLINE_VISIBILITY +bool +operator!=(const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __x, + const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __y) +{ + return !(__x == __y); +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_ISTREAM_ITERATOR_H diff --git a/libcxx/include/__iterator/istreambuf_iterator.h b/libcxx/include/__iterator/istreambuf_iterator.h new file mode 100644 index 0000000000000..910d57efc3ba9 --- /dev/null +++ b/libcxx/include/__iterator/istreambuf_iterator.h @@ -0,0 +1,110 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_ISTREAMBUF_ITERATOR_H +#define _LIBCPP___ITERATOR_ISTREAMBUF_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include // for forward declaration of basic_streambuf + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +class _LIBCPP_TEMPLATE_VIS istreambuf_iterator +#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) + : public iterator +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +public: + typedef input_iterator_tag iterator_category; + typedef _CharT value_type; + typedef typename _Traits::off_type difference_type; + typedef _CharT* pointer; + typedef _CharT reference; + typedef _CharT char_type; + typedef _Traits traits_type; + typedef typename _Traits::int_type int_type; + typedef basic_streambuf<_CharT,_Traits> streambuf_type; + typedef basic_istream<_CharT,_Traits> istream_type; +private: + mutable streambuf_type* __sbuf_; + + class __proxy + { + char_type __keep_; + streambuf_type* __sbuf_; + _LIBCPP_INLINE_VISIBILITY __proxy(char_type __c, streambuf_type* __s) + : __keep_(__c), __sbuf_(__s) {} + friend class istreambuf_iterator; + public: + _LIBCPP_INLINE_VISIBILITY char_type operator*() const {return __keep_;} + }; + + _LIBCPP_INLINE_VISIBILITY + bool __test_for_eof() const + { + if (__sbuf_ && traits_type::eq_int_type(__sbuf_->sgetc(), traits_type::eof())) + __sbuf_ = nullptr; + return __sbuf_ == nullptr; + } +public: + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR istreambuf_iterator() _NOEXCEPT : __sbuf_(nullptr) {} + _LIBCPP_INLINE_VISIBILITY istreambuf_iterator(istream_type& __s) _NOEXCEPT + : __sbuf_(__s.rdbuf()) {} + _LIBCPP_INLINE_VISIBILITY istreambuf_iterator(streambuf_type* __s) _NOEXCEPT + : __sbuf_(__s) {} + _LIBCPP_INLINE_VISIBILITY istreambuf_iterator(const __proxy& __p) _NOEXCEPT + : __sbuf_(__p.__sbuf_) {} + + _LIBCPP_INLINE_VISIBILITY char_type operator*() const + {return static_cast(__sbuf_->sgetc());} + _LIBCPP_INLINE_VISIBILITY istreambuf_iterator& operator++() + { + __sbuf_->sbumpc(); + return *this; + } + _LIBCPP_INLINE_VISIBILITY __proxy operator++(int) + { + return __proxy(__sbuf_->sbumpc(), __sbuf_); + } + + _LIBCPP_INLINE_VISIBILITY bool equal(const istreambuf_iterator& __b) const + {return __test_for_eof() == __b.__test_for_eof();} +}; + +template +inline _LIBCPP_INLINE_VISIBILITY +bool operator==(const istreambuf_iterator<_CharT,_Traits>& __a, + const istreambuf_iterator<_CharT,_Traits>& __b) + {return __a.equal(__b);} + +template +inline _LIBCPP_INLINE_VISIBILITY +bool operator!=(const istreambuf_iterator<_CharT,_Traits>& __a, + const istreambuf_iterator<_CharT,_Traits>& __b) + {return !__a.equal(__b);} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_ISTREAMBUF_ITERATOR_H diff --git a/libcxx/include/__iterator/iterator.h b/libcxx/include/__iterator/iterator.h new file mode 100644 index 0000000000000..dfd481e357120 --- /dev/null +++ b/libcxx/include/__iterator/iterator.h @@ -0,0 +1,40 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_ITERATOR_H +#define _LIBCPP___ITERATOR_ITERATOR_H + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 iterator +{ + typedef _Tp value_type; + typedef _Distance difference_type; + typedef _Pointer pointer; + typedef _Reference reference; + typedef _Category iterator_category; +}; + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_ITERATOR_H diff --git a/libcxx/include/__iterator/move_iterator.h b/libcxx/include/__iterator/move_iterator.h new file mode 100644 index 0000000000000..7819743bdb396 --- /dev/null +++ b/libcxx/include/__iterator/move_iterator.h @@ -0,0 +1,189 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_MOVE_ITERATOR_H +#define _LIBCPP___ITERATOR_MOVE_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class _LIBCPP_TEMPLATE_VIS move_iterator +{ +private: + _Iter __i; +public: + typedef _Iter iterator_type; + typedef typename iterator_traits::value_type value_type; + typedef typename iterator_traits::difference_type difference_type; + typedef iterator_type pointer; + typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, + random_access_iterator_tag, + typename iterator_traits<_Iter>::iterator_category> iterator_category; +#if _LIBCPP_STD_VER > 17 + typedef input_iterator_tag iterator_concept; +#endif + +#ifndef _LIBCPP_CXX03_LANG + typedef typename iterator_traits::reference __reference; + typedef typename conditional< + is_reference<__reference>::value, + typename remove_reference<__reference>::type&&, + __reference + >::type reference; +#else + typedef typename iterator_traits::reference reference; +#endif + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator() : __i() {} + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + explicit move_iterator(_Iter __x) : __i(__x) {} + + template ::value && is_convertible<_Up const&, _Iter>::value + > > + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator(const move_iterator<_Up>& __u) : __i(__u.base()) {} + + template ::value && + is_convertible<_Up const&, _Iter>::value && + is_assignable<_Iter&, _Up const&>::value + > > + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator& operator=(const move_iterator<_Up>& __u) { + __i = __u.base(); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 _Iter base() const {return __i;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reference operator*() const { return static_cast(*__i); } + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + pointer operator->() const { return __i;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator& operator++() {++__i; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator operator++(int) {move_iterator __tmp(*this); ++__i; return __tmp;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator& operator--() {--__i; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator operator--(int) {move_iterator __tmp(*this); --__i; return __tmp;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator operator+ (difference_type __n) const {return move_iterator(__i + __n);} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator& operator+=(difference_type __n) {__i += __n; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator operator- (difference_type __n) const {return move_iterator(__i - __n);} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + move_iterator& operator-=(difference_type __n) {__i -= __n; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reference operator[](difference_type __n) const { return static_cast(__i[__n]); } +}; + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator==(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) +{ + return __x.base() == __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator<(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) +{ + return __x.base() < __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator!=(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) +{ + return __x.base() != __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator>(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) +{ + return __x.base() > __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator>=(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) +{ + return __x.base() >= __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator<=(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) +{ + return __x.base() <= __y.base(); +} + +#ifndef _LIBCPP_CXX03_LANG +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +auto +operator-(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) +-> decltype(__x.base() - __y.base()) +{ + return __x.base() - __y.base(); +} +#else +template +inline _LIBCPP_INLINE_VISIBILITY +typename move_iterator<_Iter1>::difference_type +operator-(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) +{ + return __x.base() - __y.base(); +} +#endif + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +move_iterator<_Iter> +operator+(typename move_iterator<_Iter>::difference_type __n, const move_iterator<_Iter>& __x) +{ + return move_iterator<_Iter>(__x.base() + __n); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +move_iterator<_Iter> +make_move_iterator(_Iter __i) +{ + return move_iterator<_Iter>(__i); +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_MOVE_ITERATOR_H diff --git a/libcxx/include/__iterator/ostream_iterator.h b/libcxx/include/__iterator/ostream_iterator.h new file mode 100644 index 0000000000000..2615b21b059fd --- /dev/null +++ b/libcxx/include/__iterator/ostream_iterator.h @@ -0,0 +1,76 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_OSTREAM_ITERATOR_H +#define _LIBCPP___ITERATOR_OSTREAM_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include <__memory/addressof.h> +#include +#include // for forward declarations of char_traits and basic_ostream + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template > +class _LIBCPP_TEMPLATE_VIS ostream_iterator +#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) + : public iterator +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +public: + typedef output_iterator_tag iterator_category; + typedef void value_type; +#if _LIBCPP_STD_VER > 17 + typedef ptrdiff_t difference_type; +#else + typedef void difference_type; +#endif + typedef void pointer; + typedef void reference; + typedef _CharT char_type; + typedef _Traits traits_type; + typedef basic_ostream<_CharT, _Traits> ostream_type; + +private: + ostream_type* __out_stream_; + const char_type* __delim_; +public: + _LIBCPP_INLINE_VISIBILITY ostream_iterator(ostream_type& __s) _NOEXCEPT + : __out_stream_(_VSTD::addressof(__s)), __delim_(nullptr) {} + _LIBCPP_INLINE_VISIBILITY ostream_iterator(ostream_type& __s, const _CharT* __delimiter) _NOEXCEPT + : __out_stream_(_VSTD::addressof(__s)), __delim_(__delimiter) {} + _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator=(const _Tp& __value_) + { + *__out_stream_ << __value_; + if (__delim_) + *__out_stream_ << __delim_; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator*() {return *this;} + _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator++() {return *this;} + _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator++(int) {return *this;} +}; + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_OSTREAM_ITERATOR_H diff --git a/libcxx/include/__iterator/ostreambuf_iterator.h b/libcxx/include/__iterator/ostreambuf_iterator.h new file mode 100644 index 0000000000000..4676fc70ffbeb --- /dev/null +++ b/libcxx/include/__iterator/ostreambuf_iterator.h @@ -0,0 +1,81 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_OSTREAMBUF_ITERATOR_H +#define _LIBCPP___ITERATOR_OSTREAMBUF_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include // for forward declaration of basic_streambuf + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +class _LIBCPP_TEMPLATE_VIS ostreambuf_iterator +#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) + : public iterator +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +public: + typedef output_iterator_tag iterator_category; + typedef void value_type; +#if _LIBCPP_STD_VER > 17 + typedef ptrdiff_t difference_type; +#else + typedef void difference_type; +#endif + typedef void pointer; + typedef void reference; + typedef _CharT char_type; + typedef _Traits traits_type; + typedef basic_streambuf<_CharT, _Traits> streambuf_type; + typedef basic_ostream<_CharT, _Traits> ostream_type; + +private: + streambuf_type* __sbuf_; +public: + _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator(ostream_type& __s) _NOEXCEPT + : __sbuf_(__s.rdbuf()) {} + _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator(streambuf_type* __s) _NOEXCEPT + : __sbuf_(__s) {} + _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator& operator=(_CharT __c) + { + if (__sbuf_ && traits_type::eq_int_type(__sbuf_->sputc(__c), traits_type::eof())) + __sbuf_ = nullptr; + return *this; + } + _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator& operator*() {return *this;} + _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator& operator++() {return *this;} + _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator& operator++(int) {return *this;} + _LIBCPP_INLINE_VISIBILITY bool failed() const _NOEXCEPT {return __sbuf_ == nullptr;} + + template + friend + _LIBCPP_HIDDEN + ostreambuf_iterator<_Ch, _Tr> + __pad_and_output(ostreambuf_iterator<_Ch, _Tr> __s, + const _Ch* __ob, const _Ch* __op, const _Ch* __oe, + ios_base& __iob, _Ch __fl); +}; + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_OSTREAMBUF_ITERATOR_H diff --git a/libcxx/include/__iterator/reverse_iterator.h b/libcxx/include/__iterator/reverse_iterator.h new file mode 100644 index 0000000000000..77f7143b43057 --- /dev/null +++ b/libcxx/include/__iterator/reverse_iterator.h @@ -0,0 +1,239 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_REVERSE_ITERATOR_H +#define _LIBCPP___ITERATOR_REVERSE_ITERATOR_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include <__memory/addressof.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct __is_stashing_iterator : false_type {}; + +template +struct __is_stashing_iterator<_Tp, typename __void_t::type> + : true_type {}; + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +class _LIBCPP_TEMPLATE_VIS reverse_iterator +#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) + : public iterator::iterator_category, + typename iterator_traits<_Iter>::value_type, + typename iterator_traits<_Iter>::difference_type, + typename iterator_traits<_Iter>::pointer, + typename iterator_traits<_Iter>::reference> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +private: +#ifndef _LIBCPP_ABI_NO_ITERATOR_BASES + _Iter __t; // no longer used as of LWG #2360, not removed due to ABI break +#endif + + static_assert(!__is_stashing_iterator<_Iter>::value, + "The specified iterator type cannot be used with reverse_iterator; " + "Using stashing iterators with reverse_iterator causes undefined behavior"); + +protected: + _Iter current; +public: + typedef _Iter iterator_type; + typedef typename iterator_traits<_Iter>::difference_type difference_type; + typedef typename iterator_traits<_Iter>::reference reference; + typedef typename iterator_traits<_Iter>::pointer pointer; + typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, + random_access_iterator_tag, + typename iterator_traits<_Iter>::iterator_category> iterator_category; + typedef typename iterator_traits<_Iter>::value_type value_type; + +#if _LIBCPP_STD_VER > 17 + typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, + random_access_iterator_tag, + bidirectional_iterator_tag> iterator_concept; +#endif + +#ifndef _LIBCPP_ABI_NO_ITERATOR_BASES + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator() : __t(), current() {} + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + explicit reverse_iterator(_Iter __x) : __t(__x), current(__x) {} + + template ::value && is_convertible<_Up const&, _Iter>::value + > > + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator(const reverse_iterator<_Up>& __u) + : __t(__u.base()), current(__u.base()) + { } + + template ::value && + is_convertible<_Up const&, _Iter>::value && + is_assignable<_Up const&, _Iter>::value + > > + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator& operator=(const reverse_iterator<_Up>& __u) { + __t = current = __u.base(); + return *this; + } +#else + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator() : current() {} + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + explicit reverse_iterator(_Iter __x) : current(__x) {} + + template ::value && is_convertible<_Up const&, _Iter>::value + > > + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator(const reverse_iterator<_Up>& __u) + : current(__u.base()) + { } + + template ::value && + is_convertible<_Up const&, _Iter>::value && + is_assignable<_Up const&, _Iter>::value + > > + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator& operator=(const reverse_iterator<_Up>& __u) { + current = __u.base(); + return *this; + } +#endif + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + _Iter base() const {return current;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reference operator*() const {_Iter __tmp = current; return *--__tmp;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + pointer operator->() const {return _VSTD::addressof(operator*());} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator& operator++() {--current; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator operator++(int) {reverse_iterator __tmp(*this); --current; return __tmp;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator& operator--() {++current; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator operator--(int) {reverse_iterator __tmp(*this); ++current; return __tmp;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator operator+ (difference_type __n) const {return reverse_iterator(current - __n);} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator& operator+=(difference_type __n) {current -= __n; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator operator- (difference_type __n) const {return reverse_iterator(current + __n);} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reverse_iterator& operator-=(difference_type __n) {current += __n; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 + reference operator[](difference_type __n) const {return *(*this + __n);} +}; + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator==(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +{ + return __x.base() == __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator<(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +{ + return __x.base() > __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator!=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +{ + return __x.base() != __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator>(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +{ + return __x.base() < __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator>=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +{ + return __x.base() <= __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +bool +operator<=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +{ + return __x.base() >= __y.base(); +} + +#ifndef _LIBCPP_CXX03_LANG +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +auto +operator-(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +-> decltype(__y.base() - __x.base()) +{ + return __y.base() - __x.base(); +} +#else +template +inline _LIBCPP_INLINE_VISIBILITY +typename reverse_iterator<_Iter1>::difference_type +operator-(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +{ + return __y.base() - __x.base(); +} +#endif + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +reverse_iterator<_Iter> +operator+(typename reverse_iterator<_Iter>::difference_type __n, const reverse_iterator<_Iter>& __x) +{ + return reverse_iterator<_Iter>(__x.base() - __n); +} + +#if _LIBCPP_STD_VER > 11 +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 +reverse_iterator<_Iter> make_reverse_iterator(_Iter __i) +{ + return reverse_iterator<_Iter>(__i); +} +#endif + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_REVERSE_ITERATOR_H diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h new file mode 100644 index 0000000000000..d3e6f1e9ef518 --- /dev/null +++ b/libcxx/include/__iterator/wrap_iter.h @@ -0,0 +1,407 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_WRAP_ITER_H +#define _LIBCPP___ITERATOR_WRAP_ITER_H + +#include <__config> +#include <__debug> +#include <__iterator/iterator_traits.h> +#include <__memory/pointer_traits.h> // __to_address +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template class __wrap_iter; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator==(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator<(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator!=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator>(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator>=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator<=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + +#ifndef _LIBCPP_CXX03_LANG +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +auto +operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +-> decltype(__x.base() - __y.base()); +#else +template +_LIBCPP_INLINE_VISIBILITY +typename __wrap_iter<_Iter1>::difference_type +operator-(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; +#endif + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +__wrap_iter<_Iter> +operator+(typename __wrap_iter<_Iter>::difference_type, __wrap_iter<_Iter>) _NOEXCEPT; + +template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy(_Ip, _Ip, _Op); +template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy_backward(_B1, _B1, _B2); +template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move(_Ip, _Ip, _Op); +template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move_backward(_B1, _B1, _B2); + +template +class __wrap_iter +{ +public: + typedef _Iter iterator_type; + typedef typename iterator_traits::value_type value_type; + typedef typename iterator_traits::difference_type difference_type; + typedef typename iterator_traits::pointer pointer; + typedef typename iterator_traits::reference reference; + typedef typename iterator_traits::iterator_category iterator_category; +#if _LIBCPP_STD_VER > 17 + typedef contiguous_iterator_tag iterator_concept; +#endif + +private: + iterator_type __i; +public: + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter() _NOEXCEPT +#if _LIBCPP_STD_VER > 11 + : __i{} +#endif + { +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif + } + template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + __wrap_iter(const __wrap_iter<_Up>& __u, + typename enable_if::value>::type* = nullptr) _NOEXCEPT + : __i(__u.base()) + { +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__iterator_copy(this, &__u); +#endif + } +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + __wrap_iter(const __wrap_iter& __x) + : __i(__x.base()) + { + __get_db()->__iterator_copy(this, &__x); + } + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + __wrap_iter& operator=(const __wrap_iter& __x) + { + if (this != &__x) + { + __get_db()->__iterator_copy(this, &__x); + __i = __x.__i; + } + return *this; + } + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + ~__wrap_iter() + { + __get_db()->__erase_i(this); + } +#endif + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator*() const _NOEXCEPT + { +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), + "Attempted to dereference a non-dereferenceable iterator"); +#endif + return *__i; + } + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG pointer operator->() const _NOEXCEPT + { +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), + "Attempted to dereference a non-dereferenceable iterator"); +#endif + return _VSTD::__to_address(__i); + } + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator++() _NOEXCEPT + { +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), + "Attempted to increment a non-incrementable iterator"); +#endif + ++__i; + return *this; + } + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator++(int) _NOEXCEPT + {__wrap_iter __tmp(*this); ++(*this); return __tmp;} + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator--() _NOEXCEPT + { +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__decrementable(this), + "Attempted to decrement a non-decrementable iterator"); +#endif + --__i; + return *this; + } + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator--(int) _NOEXCEPT + {__wrap_iter __tmp(*this); --(*this); return __tmp;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator+ (difference_type __n) const _NOEXCEPT + {__wrap_iter __w(*this); __w += __n; return __w;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator+=(difference_type __n) _NOEXCEPT + { +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__addable(this, __n), + "Attempted to add/subtract an iterator outside its valid range"); +#endif + __i += __n; + return *this; + } + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator- (difference_type __n) const _NOEXCEPT + {return *this + (-__n);} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator-=(difference_type __n) _NOEXCEPT + {*this += -__n; return *this;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator[](difference_type __n) const _NOEXCEPT + { +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__subscriptable(this, __n), + "Attempted to subscript an iterator outside its valid range"); +#endif + return __i[__n]; + } + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG iterator_type base() const _NOEXCEPT {return __i;} + +private: +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(const void* __p, iterator_type __x) : __i(__x) + { + __get_db()->__insert_ic(this, __p); + } +#else + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(iterator_type __x) _NOEXCEPT : __i(__x) {} +#endif + + template friend class __wrap_iter; + template friend class basic_string; + template friend class _LIBCPP_TEMPLATE_VIS vector; + template friend class _LIBCPP_TEMPLATE_VIS span; + + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + bool + operator==(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + bool + operator<(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + bool + operator!=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + bool + operator>(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + bool + operator>=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + bool + operator<=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; + +#ifndef _LIBCPP_CXX03_LANG + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + auto + operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT + -> decltype(__x.base() - __y.base()); +#else + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + typename __wrap_iter<_Iter1>::difference_type + operator-(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; +#endif + + template + _LIBCPP_CONSTEXPR_IF_NODEBUG friend + __wrap_iter<_Iter1> + operator+(typename __wrap_iter<_Iter1>::difference_type, __wrap_iter<_Iter1>) _NOEXCEPT; +}; + +#if _LIBCPP_STD_VER <= 17 +template +struct __is_cpp17_contiguous_iterator<__wrap_iter<_It> > : true_type {}; +#endif + +template +_LIBCPP_CONSTEXPR +decltype(_VSTD::__to_address(declval<_Iter>())) +__to_address(__wrap_iter<_Iter> __w) _NOEXCEPT { + return _VSTD::__to_address(__w.base()); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +{ + return __x.base() == __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +{ +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), + "Attempted to compare incomparable iterators"); +#endif + return __x.base() < __y.base(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +{ + return !(__x == __y); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +{ + return __y < __x; +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +{ + return !(__x < __y); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +{ + return !(__y < __x); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT +{ + return !(__x == __y); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT +{ + return __y < __x; +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT +{ + return !(__x < __y); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool +operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT +{ + return !(__y < __x); +} + +#ifndef _LIBCPP_CXX03_LANG +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +auto +operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +-> decltype(__x.base() - __y.base()) +{ +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), + "Attempted to subtract incompatible iterators"); +#endif + return __x.base() - __y.base(); +} +#else +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +typename __wrap_iter<_Iter1>::difference_type +operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +{ +#if _LIBCPP_DEBUG_LEVEL == 2 + _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), + "Attempted to subtract incompatible iterators"); +#endif + return __x.base() - __y.base(); +} +#endif + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +__wrap_iter<_Iter> +operator+(typename __wrap_iter<_Iter>::difference_type __n, + __wrap_iter<_Iter> __x) _NOEXCEPT +{ + __x += __n; + return __x; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_WRAP_ITER_H diff --git a/libcxx/include/iterator b/libcxx/include/iterator index 092e5898fc26a..b4e15c283d935 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -558,16 +558,27 @@ template constexpr const E* data(initializer_list il) noexcept; #include <__debug> #include <__functional_base> #include <__iterator/advance.h> +#include <__iterator/back_insert_iterator.h> #include <__iterator/concepts.h> #include <__iterator/default_sentinel.h> +#include <__iterator/front_insert_iterator.h> #include <__iterator/incrementable_traits.h> +#include <__iterator/insert_iterator.h> +#include <__iterator/istream_iterator.h> +#include <__iterator/istreambuf_iterator.h> #include <__iterator/iter_move.h> #include <__iterator/iter_swap.h> #include <__iterator/iterator_traits.h> +#include <__iterator/iterator.h> +#include <__iterator/move_iterator.h> #include <__iterator/next.h> +#include <__iterator/ostream_iterator.h> +#include <__iterator/ostreambuf_iterator.h> #include <__iterator/prev.h> #include <__iterator/projected.h> #include <__iterator/readable_traits.h> +#include <__iterator/reverse_iterator.h> +#include <__iterator/wrap_iter.h> #include <__memory/addressof.h> #include <__memory/pointer_traits.h> #include <__utility/forward.h> @@ -575,7 +586,6 @@ template constexpr const E* data(initializer_list il) noexcept; #include // Mandated by the Standard. #include #include -#include // for forward declarations of vector and string #include #include @@ -585,17 +595,6 @@ template constexpr const E* data(initializer_list il) noexcept; _LIBCPP_BEGIN_NAMESPACE_STD -template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 iterator -{ - typedef _Tp value_type; - typedef _Distance difference_type; - typedef _Pointer pointer; - typedef _Reference reference; - typedef _Category iterator_category; -}; - template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 typename iterator_traits<_InputIter>::difference_type @@ -623,1120 +622,6 @@ distance(_InputIter __first, _InputIter __last) return _VSTD::__distance(__first, __last, typename iterator_traits<_InputIter>::iterator_category()); } -template -struct __is_stashing_iterator : false_type {}; - -template -struct __is_stashing_iterator<_Tp, typename __void_t::type> - : true_type {}; - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template -class _LIBCPP_TEMPLATE_VIS reverse_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator::iterator_category, - typename iterator_traits<_Iter>::value_type, - typename iterator_traits<_Iter>::difference_type, - typename iterator_traits<_Iter>::pointer, - typename iterator_traits<_Iter>::reference> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -private: -#ifndef _LIBCPP_ABI_NO_ITERATOR_BASES - _Iter __t; // no longer used as of LWG #2360, not removed due to ABI break -#endif - - static_assert(!__is_stashing_iterator<_Iter>::value, - "The specified iterator type cannot be used with reverse_iterator; " - "Using stashing iterators with reverse_iterator causes undefined behavior"); - -protected: - _Iter current; -public: - typedef _Iter iterator_type; - typedef typename iterator_traits<_Iter>::difference_type difference_type; - typedef typename iterator_traits<_Iter>::reference reference; - typedef typename iterator_traits<_Iter>::pointer pointer; - typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, - random_access_iterator_tag, - typename iterator_traits<_Iter>::iterator_category> iterator_category; - typedef typename iterator_traits<_Iter>::value_type value_type; - -#if _LIBCPP_STD_VER > 17 - typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, - random_access_iterator_tag, - bidirectional_iterator_tag> iterator_concept; -#endif - -#ifndef _LIBCPP_ABI_NO_ITERATOR_BASES - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator() : __t(), current() {} - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - explicit reverse_iterator(_Iter __x) : __t(__x), current(__x) {} - - template ::value && is_convertible<_Up const&, _Iter>::value - > > - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator(const reverse_iterator<_Up>& __u) - : __t(__u.base()), current(__u.base()) - { } - - template ::value && - is_convertible<_Up const&, _Iter>::value && - is_assignable<_Up const&, _Iter>::value - > > - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator& operator=(const reverse_iterator<_Up>& __u) { - __t = current = __u.base(); - return *this; - } -#else - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator() : current() {} - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - explicit reverse_iterator(_Iter __x) : current(__x) {} - - template ::value && is_convertible<_Up const&, _Iter>::value - > > - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator(const reverse_iterator<_Up>& __u) - : current(__u.base()) - { } - - template ::value && - is_convertible<_Up const&, _Iter>::value && - is_assignable<_Up const&, _Iter>::value - > > - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator& operator=(const reverse_iterator<_Up>& __u) { - current = __u.base(); - return *this; - } -#endif - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - _Iter base() const {return current;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reference operator*() const {_Iter __tmp = current; return *--__tmp;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - pointer operator->() const {return _VSTD::addressof(operator*());} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator& operator++() {--current; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator operator++(int) {reverse_iterator __tmp(*this); --current; return __tmp;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator& operator--() {++current; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator operator--(int) {reverse_iterator __tmp(*this); ++current; return __tmp;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator operator+ (difference_type __n) const {return reverse_iterator(current - __n);} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator& operator+=(difference_type __n) {current -= __n; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator operator- (difference_type __n) const {return reverse_iterator(current + __n);} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reverse_iterator& operator-=(difference_type __n) {current += __n; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reference operator[](difference_type __n) const {return *(*this + __n);} -}; - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator==(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) -{ - return __x.base() == __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator<(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) -{ - return __x.base() > __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator!=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) -{ - return __x.base() != __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator>(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) -{ - return __x.base() < __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator>=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) -{ - return __x.base() <= __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator<=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) -{ - return __x.base() >= __y.base(); -} - -#ifndef _LIBCPP_CXX03_LANG -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -auto -operator-(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) --> decltype(__y.base() - __x.base()) -{ - return __y.base() - __x.base(); -} -#else -template -inline _LIBCPP_INLINE_VISIBILITY -typename reverse_iterator<_Iter1>::difference_type -operator-(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) -{ - return __y.base() - __x.base(); -} -#endif - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -reverse_iterator<_Iter> -operator+(typename reverse_iterator<_Iter>::difference_type __n, const reverse_iterator<_Iter>& __x) -{ - return reverse_iterator<_Iter>(__x.base() - __n); -} - -#if _LIBCPP_STD_VER > 11 -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -reverse_iterator<_Iter> make_reverse_iterator(_Iter __i) -{ - return reverse_iterator<_Iter>(__i); -} -#endif - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template -class _LIBCPP_TEMPLATE_VIS back_insert_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -protected: - _Container* container; -public: - typedef output_iterator_tag iterator_category; - typedef void value_type; -#if _LIBCPP_STD_VER > 17 - typedef ptrdiff_t difference_type; -#else - typedef void difference_type; -#endif - typedef void pointer; - typedef void reference; - typedef _Container container_type; - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit back_insert_iterator(_Container& __x) : container(_VSTD::addressof(__x)) {} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(const typename _Container::value_type& __value_) - {container->push_back(__value_); return *this;} -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(typename _Container::value_type&& __value_) - {container->push_back(_VSTD::move(__value_)); return *this;} -#endif // _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator*() {return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator++() {return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator operator++(int) {return *this;} -}; - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -back_insert_iterator<_Container> -back_inserter(_Container& __x) -{ - return back_insert_iterator<_Container>(__x); -} - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template -class _LIBCPP_TEMPLATE_VIS front_insert_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -protected: - _Container* container; -public: - typedef output_iterator_tag iterator_category; - typedef void value_type; -#if _LIBCPP_STD_VER > 17 - typedef ptrdiff_t difference_type; -#else - typedef void difference_type; -#endif - typedef void pointer; - typedef void reference; - typedef _Container container_type; - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit front_insert_iterator(_Container& __x) : container(_VSTD::addressof(__x)) {} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator=(const typename _Container::value_type& __value_) - {container->push_front(__value_); return *this;} -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator=(typename _Container::value_type&& __value_) - {container->push_front(_VSTD::move(__value_)); return *this;} -#endif // _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator*() {return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator& operator++() {return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 front_insert_iterator operator++(int) {return *this;} -}; - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -front_insert_iterator<_Container> -front_inserter(_Container& __x) -{ - return front_insert_iterator<_Container>(__x); -} - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template -class _LIBCPP_TEMPLATE_VIS insert_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -protected: - _Container* container; - typename _Container::iterator iter; // FIXME: `ranges::iterator_t` in C++20 mode -public: - typedef output_iterator_tag iterator_category; - typedef void value_type; -#if _LIBCPP_STD_VER > 17 - typedef ptrdiff_t difference_type; -#else - typedef void difference_type; -#endif - typedef void pointer; - typedef void reference; - typedef _Container container_type; - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator(_Container& __x, typename _Container::iterator __i) - : container(_VSTD::addressof(__x)), iter(__i) {} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(const typename _Container::value_type& __value_) - {iter = container->insert(iter, __value_); ++iter; return *this;} -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(typename _Container::value_type&& __value_) - {iter = container->insert(iter, _VSTD::move(__value_)); ++iter; return *this;} -#endif // _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator*() {return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator++() {return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator++(int) {return *this;} -}; - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -insert_iterator<_Container> -inserter(_Container& __x, typename _Container::iterator __i) -{ - return insert_iterator<_Container>(__x, __i); -} - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template , class _Distance = ptrdiff_t> -class _LIBCPP_TEMPLATE_VIS istream_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -public: - typedef input_iterator_tag iterator_category; - typedef _Tp value_type; - typedef _Distance difference_type; - typedef const _Tp* pointer; - typedef const _Tp& reference; - typedef _CharT char_type; - typedef _Traits traits_type; - typedef basic_istream<_CharT,_Traits> istream_type; -private: - istream_type* __in_stream_; - _Tp __value_; -public: - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR istream_iterator() : __in_stream_(nullptr), __value_() {} - _LIBCPP_INLINE_VISIBILITY istream_iterator(istream_type& __s) : __in_stream_(_VSTD::addressof(__s)) - { - if (!(*__in_stream_ >> __value_)) - __in_stream_ = nullptr; - } - - _LIBCPP_INLINE_VISIBILITY const _Tp& operator*() const {return __value_;} - _LIBCPP_INLINE_VISIBILITY const _Tp* operator->() const {return _VSTD::addressof((operator*()));} - _LIBCPP_INLINE_VISIBILITY istream_iterator& operator++() - { - if (!(*__in_stream_ >> __value_)) - __in_stream_ = nullptr; - return *this; - } - _LIBCPP_INLINE_VISIBILITY istream_iterator operator++(int) - {istream_iterator __t(*this); ++(*this); return __t;} - - template - friend _LIBCPP_INLINE_VISIBILITY - bool - operator==(const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __x, - const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __y); - - template - friend _LIBCPP_INLINE_VISIBILITY - bool - operator==(const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __x, - const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __y); -}; - -template -inline _LIBCPP_INLINE_VISIBILITY -bool -operator==(const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __x, - const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __y) -{ - return __x.__in_stream_ == __y.__in_stream_; -} - -template -inline _LIBCPP_INLINE_VISIBILITY -bool -operator!=(const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __x, - const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __y) -{ - return !(__x == __y); -} - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template > -class _LIBCPP_TEMPLATE_VIS ostream_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -public: - typedef output_iterator_tag iterator_category; - typedef void value_type; -#if _LIBCPP_STD_VER > 17 - typedef ptrdiff_t difference_type; -#else - typedef void difference_type; -#endif - typedef void pointer; - typedef void reference; - typedef _CharT char_type; - typedef _Traits traits_type; - typedef basic_ostream<_CharT, _Traits> ostream_type; - -private: - ostream_type* __out_stream_; - const char_type* __delim_; -public: - _LIBCPP_INLINE_VISIBILITY ostream_iterator(ostream_type& __s) _NOEXCEPT - : __out_stream_(_VSTD::addressof(__s)), __delim_(nullptr) {} - _LIBCPP_INLINE_VISIBILITY ostream_iterator(ostream_type& __s, const _CharT* __delimiter) _NOEXCEPT - : __out_stream_(_VSTD::addressof(__s)), __delim_(__delimiter) {} - _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator=(const _Tp& __value_) - { - *__out_stream_ << __value_; - if (__delim_) - *__out_stream_ << __delim_; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator*() {return *this;} - _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator++() {return *this;} - _LIBCPP_INLINE_VISIBILITY ostream_iterator& operator++(int) {return *this;} -}; - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template -class _LIBCPP_TEMPLATE_VIS istreambuf_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -public: - typedef input_iterator_tag iterator_category; - typedef _CharT value_type; - typedef typename _Traits::off_type difference_type; - typedef _CharT* pointer; - typedef _CharT reference; - typedef _CharT char_type; - typedef _Traits traits_type; - typedef typename _Traits::int_type int_type; - typedef basic_streambuf<_CharT,_Traits> streambuf_type; - typedef basic_istream<_CharT,_Traits> istream_type; -private: - mutable streambuf_type* __sbuf_; - - class __proxy - { - char_type __keep_; - streambuf_type* __sbuf_; - _LIBCPP_INLINE_VISIBILITY __proxy(char_type __c, streambuf_type* __s) - : __keep_(__c), __sbuf_(__s) {} - friend class istreambuf_iterator; - public: - _LIBCPP_INLINE_VISIBILITY char_type operator*() const {return __keep_;} - }; - - _LIBCPP_INLINE_VISIBILITY - bool __test_for_eof() const - { - if (__sbuf_ && traits_type::eq_int_type(__sbuf_->sgetc(), traits_type::eof())) - __sbuf_ = nullptr; - return __sbuf_ == nullptr; - } -public: - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR istreambuf_iterator() _NOEXCEPT : __sbuf_(nullptr) {} - _LIBCPP_INLINE_VISIBILITY istreambuf_iterator(istream_type& __s) _NOEXCEPT - : __sbuf_(__s.rdbuf()) {} - _LIBCPP_INLINE_VISIBILITY istreambuf_iterator(streambuf_type* __s) _NOEXCEPT - : __sbuf_(__s) {} - _LIBCPP_INLINE_VISIBILITY istreambuf_iterator(const __proxy& __p) _NOEXCEPT - : __sbuf_(__p.__sbuf_) {} - - _LIBCPP_INLINE_VISIBILITY char_type operator*() const - {return static_cast(__sbuf_->sgetc());} - _LIBCPP_INLINE_VISIBILITY istreambuf_iterator& operator++() - { - __sbuf_->sbumpc(); - return *this; - } - _LIBCPP_INLINE_VISIBILITY __proxy operator++(int) - { - return __proxy(__sbuf_->sbumpc(), __sbuf_); - } - - _LIBCPP_INLINE_VISIBILITY bool equal(const istreambuf_iterator& __b) const - {return __test_for_eof() == __b.__test_for_eof();} -}; - -template -inline _LIBCPP_INLINE_VISIBILITY -bool operator==(const istreambuf_iterator<_CharT,_Traits>& __a, - const istreambuf_iterator<_CharT,_Traits>& __b) - {return __a.equal(__b);} - -template -inline _LIBCPP_INLINE_VISIBILITY -bool operator!=(const istreambuf_iterator<_CharT,_Traits>& __a, - const istreambuf_iterator<_CharT,_Traits>& __b) - {return !__a.equal(__b);} - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template -class _LIBCPP_TEMPLATE_VIS ostreambuf_iterator -#if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) - : public iterator -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -public: - typedef output_iterator_tag iterator_category; - typedef void value_type; -#if _LIBCPP_STD_VER > 17 - typedef ptrdiff_t difference_type; -#else - typedef void difference_type; -#endif - typedef void pointer; - typedef void reference; - typedef _CharT char_type; - typedef _Traits traits_type; - typedef basic_streambuf<_CharT, _Traits> streambuf_type; - typedef basic_ostream<_CharT, _Traits> ostream_type; - -private: - streambuf_type* __sbuf_; -public: - _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator(ostream_type& __s) _NOEXCEPT - : __sbuf_(__s.rdbuf()) {} - _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator(streambuf_type* __s) _NOEXCEPT - : __sbuf_(__s) {} - _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator& operator=(_CharT __c) - { - if (__sbuf_ && traits_type::eq_int_type(__sbuf_->sputc(__c), traits_type::eof())) - __sbuf_ = nullptr; - return *this; - } - _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator& operator*() {return *this;} - _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator& operator++() {return *this;} - _LIBCPP_INLINE_VISIBILITY ostreambuf_iterator& operator++(int) {return *this;} - _LIBCPP_INLINE_VISIBILITY bool failed() const _NOEXCEPT {return __sbuf_ == nullptr;} - - template - friend - _LIBCPP_HIDDEN - ostreambuf_iterator<_Ch, _Tr> - __pad_and_output(ostreambuf_iterator<_Ch, _Tr> __s, - const _Ch* __ob, const _Ch* __op, const _Ch* __oe, - ios_base& __iob, _Ch __fl); -}; - -template -class _LIBCPP_TEMPLATE_VIS move_iterator -{ -private: - _Iter __i; -public: - typedef _Iter iterator_type; - typedef typename iterator_traits::value_type value_type; - typedef typename iterator_traits::difference_type difference_type; - typedef iterator_type pointer; - typedef _If<__is_cpp17_random_access_iterator<_Iter>::value, - random_access_iterator_tag, - typename iterator_traits<_Iter>::iterator_category> iterator_category; -#if _LIBCPP_STD_VER > 17 - typedef input_iterator_tag iterator_concept; -#endif - -#ifndef _LIBCPP_CXX03_LANG - typedef typename iterator_traits::reference __reference; - typedef typename conditional< - is_reference<__reference>::value, - typename remove_reference<__reference>::type&&, - __reference - >::type reference; -#else - typedef typename iterator_traits::reference reference; -#endif - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator() : __i() {} - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - explicit move_iterator(_Iter __x) : __i(__x) {} - - template ::value && is_convertible<_Up const&, _Iter>::value - > > - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator(const move_iterator<_Up>& __u) : __i(__u.base()) {} - - template ::value && - is_convertible<_Up const&, _Iter>::value && - is_assignable<_Iter&, _Up const&>::value - > > - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator& operator=(const move_iterator<_Up>& __u) { - __i = __u.base(); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 _Iter base() const {return __i;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reference operator*() const { return static_cast(*__i); } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - pointer operator->() const { return __i;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator& operator++() {++__i; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator operator++(int) {move_iterator __tmp(*this); ++__i; return __tmp;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator& operator--() {--__i; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator operator--(int) {move_iterator __tmp(*this); --__i; return __tmp;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator operator+ (difference_type __n) const {return move_iterator(__i + __n);} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator& operator+=(difference_type __n) {__i += __n; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator operator- (difference_type __n) const {return move_iterator(__i - __n);} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - move_iterator& operator-=(difference_type __n) {__i -= __n; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 - reference operator[](difference_type __n) const { return static_cast(__i[__n]); } -}; - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator==(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) -{ - return __x.base() == __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator<(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) -{ - return __x.base() < __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator!=(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) -{ - return __x.base() != __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator>(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) -{ - return __x.base() > __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator>=(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) -{ - return __x.base() >= __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -bool -operator<=(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) -{ - return __x.base() <= __y.base(); -} - -#ifndef _LIBCPP_CXX03_LANG -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -auto -operator-(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) --> decltype(__x.base() - __y.base()) -{ - return __x.base() - __y.base(); -} -#else -template -inline _LIBCPP_INLINE_VISIBILITY -typename move_iterator<_Iter1>::difference_type -operator-(const move_iterator<_Iter1>& __x, const move_iterator<_Iter2>& __y) -{ - return __x.base() - __y.base(); -} -#endif - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -move_iterator<_Iter> -operator+(typename move_iterator<_Iter>::difference_type __n, const move_iterator<_Iter>& __x) -{ - return move_iterator<_Iter>(__x.base() + __n); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 -move_iterator<_Iter> -make_move_iterator(_Iter __i) -{ - return move_iterator<_Iter>(__i); -} - -// __wrap_iter - -template class __wrap_iter; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator==(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator!=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -#ifndef _LIBCPP_CXX03_LANG -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -auto -operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT --> decltype(__x.base() - __y.base()); -#else -template -_LIBCPP_INLINE_VISIBILITY -typename __wrap_iter<_Iter1>::difference_type -operator-(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; -#endif - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -__wrap_iter<_Iter> -operator+(typename __wrap_iter<_Iter>::difference_type, __wrap_iter<_Iter>) _NOEXCEPT; - -template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy(_Ip, _Ip, _Op); -template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy_backward(_B1, _B1, _B2); -template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move(_Ip, _Ip, _Op); -template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move_backward(_B1, _B1, _B2); - -template -class __wrap_iter -{ -public: - typedef _Iter iterator_type; - typedef typename iterator_traits::value_type value_type; - typedef typename iterator_traits::difference_type difference_type; - typedef typename iterator_traits::pointer pointer; - typedef typename iterator_traits::reference reference; - typedef typename iterator_traits::iterator_category iterator_category; -#if _LIBCPP_STD_VER > 17 - typedef contiguous_iterator_tag iterator_concept; -#endif - -private: - iterator_type __i; -public: - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter() _NOEXCEPT -#if _LIBCPP_STD_VER > 11 - : __i{} -#endif - { -#if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__insert_i(this); -#endif - } - template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG - __wrap_iter(const __wrap_iter<_Up>& __u, - typename enable_if::value>::type* = nullptr) _NOEXCEPT - : __i(__u.base()) - { -#if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__iterator_copy(this, &__u); -#endif - } -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG - __wrap_iter(const __wrap_iter& __x) - : __i(__x.base()) - { - __get_db()->__iterator_copy(this, &__x); - } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG - __wrap_iter& operator=(const __wrap_iter& __x) - { - if (this != &__x) - { - __get_db()->__iterator_copy(this, &__x); - __i = __x.__i; - } - return *this; - } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG - ~__wrap_iter() - { - __get_db()->__erase_i(this); - } -#endif - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator*() const _NOEXCEPT - { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), - "Attempted to dereference a non-dereferenceable iterator"); -#endif - return *__i; - } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG pointer operator->() const _NOEXCEPT - { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), - "Attempted to dereference a non-dereferenceable iterator"); -#endif - return _VSTD::__to_address(__i); - } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator++() _NOEXCEPT - { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), - "Attempted to increment a non-incrementable iterator"); -#endif - ++__i; - return *this; - } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator++(int) _NOEXCEPT - {__wrap_iter __tmp(*this); ++(*this); return __tmp;} - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator--() _NOEXCEPT - { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__decrementable(this), - "Attempted to decrement a non-decrementable iterator"); -#endif - --__i; - return *this; - } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator--(int) _NOEXCEPT - {__wrap_iter __tmp(*this); --(*this); return __tmp;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator+ (difference_type __n) const _NOEXCEPT - {__wrap_iter __w(*this); __w += __n; return __w;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator+=(difference_type __n) _NOEXCEPT - { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__addable(this, __n), - "Attempted to add/subtract an iterator outside its valid range"); -#endif - __i += __n; - return *this; - } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator- (difference_type __n) const _NOEXCEPT - {return *this + (-__n);} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator-=(difference_type __n) _NOEXCEPT - {*this += -__n; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator[](difference_type __n) const _NOEXCEPT - { -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__subscriptable(this, __n), - "Attempted to subscript an iterator outside its valid range"); -#endif - return __i[__n]; - } - - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG iterator_type base() const _NOEXCEPT {return __i;} - -private: -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(const void* __p, iterator_type __x) : __i(__x) - { - __get_db()->__insert_ic(this, __p); - } -#else - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(iterator_type __x) _NOEXCEPT : __i(__x) {} -#endif - - template friend class __wrap_iter; - template friend class basic_string; - template friend class _LIBCPP_TEMPLATE_VIS vector; - template friend class _LIBCPP_TEMPLATE_VIS span; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator==(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator<(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator!=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator>(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator>=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator<=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -#ifndef _LIBCPP_CXX03_LANG - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - auto - operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT - -> decltype(__x.base() - __y.base()); -#else - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - typename __wrap_iter<_Iter1>::difference_type - operator-(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; -#endif - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - __wrap_iter<_Iter1> - operator+(typename __wrap_iter<_Iter1>::difference_type, __wrap_iter<_Iter1>) _NOEXCEPT; -}; - -#if _LIBCPP_STD_VER <= 17 -template -struct __is_cpp17_contiguous_iterator<__wrap_iter<_It> > : true_type {}; -#endif - -template -_LIBCPP_CONSTEXPR -decltype(_VSTD::__to_address(declval<_Iter>())) -__to_address(__wrap_iter<_Iter> __w) _NOEXCEPT { - return _VSTD::__to_address(__w.base()); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -{ - return __x.base() == __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -{ -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), - "Attempted to compare incomparable iterators"); -#endif - return __x.base() < __y.base(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -{ - return !(__x == __y); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -{ - return __y < __x; -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -{ - return !(__x < __y); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -{ - return !(__y < __x); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT -{ - return !(__x == __y); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT -{ - return __y < __x; -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT -{ - return !(__x < __y); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT -{ - return !(__y < __x); -} - -#ifndef _LIBCPP_CXX03_LANG -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -auto -operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT --> decltype(__x.base() - __y.base()) -{ -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), - "Attempted to subtract incompatible iterators"); -#endif - return __x.base() - __y.base(); -} -#else -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -typename __wrap_iter<_Iter1>::difference_type -operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -{ -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), - "Attempted to subtract incompatible iterators"); -#endif - return __x.base() - __y.base(); -} -#endif - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -__wrap_iter<_Iter> -operator+(typename __wrap_iter<_Iter>::difference_type __n, - __wrap_iter<_Iter> __x) _NOEXCEPT -{ - __x += __n; - return __x; -} - template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 _Tp* diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 48573377bb7b0..da0a988c00c87 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -453,17 +453,28 @@ module std [system] { export * module __iterator { - module advance { header "__iterator/advance.h" } - module concepts { header "__iterator/concepts.h" } - module default_sentinel { header "__iterator/default_sentinel.h" } - module incrementable_traits { header "__iterator/incrementable_traits.h" } - module iter_move { header "__iterator/iter_move.h" } - module iter_swap { header "__iterator/iter_swap.h" } - module iterator_traits { header "__iterator/iterator_traits.h" } - module next { header "__iterator/next.h" } - module prev { header "__iterator/prev.h" } - module projected { header "__iterator/projected.h" } - module readable_traits { header "__iterator/readable_traits.h" } + module advance { header "__iterator/advance.h" } + module back_insert_iterator { header "__iterator/back_insert_iterator.h" } + module concepts { header "__iterator/concepts.h" } + module default_sentinel { header "__iterator/default_sentinel.h" } + module front_insert_iterator { header "__iterator/front_insert_iterator.h" } + module incrementable_traits { header "__iterator/incrementable_traits.h" } + module insert_iterator { header "__iterator/insert_iterator.h" } + module istream_iterator { header "__iterator/istream_iterator.h" } + module istreambuf_iterator { header "__iterator/istreambuf_iterator.h" } + module iter_move { header "__iterator/iter_move.h" } + module iter_swap { header "__iterator/iter_swap.h" } + module iterator { header "__iterator/iterator.h" } + module iterator_traits { header "__iterator/iterator_traits.h" } + module move_iterator { header "__iterator/move_iterator.h" } + module next { header "__iterator/next.h" } + module ostream_iterator { header "__iterator/ostream_iterator.h" } + module ostreambuf_iterator { header "__iterator/ostreambuf_iterator.h" } + module prev { header "__iterator/prev.h" } + module projected { header "__iterator/projected.h" } + module readable_traits { header "__iterator/readable_traits.h" } + module reverse_iterator { header "__iterator/reverse_iterator.h" } + module wrap_iter { header "__iterator/wrap_iter.h" } } } module latch { diff --git a/libcxx/include/regex b/libcxx/include/regex index 5d4c52c40a18f..9e5c6ed39998b 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -765,6 +765,7 @@ typedef regex_token_iterator wsregex_token_iterator; #include <__config> #include <__debug> #include <__functional/search.h> +#include <__iterator/wrap_iter.h> #include <__locale> #include #include diff --git a/libcxx/include/span b/libcxx/include/span index 80c550daa3a5d..0892e25a59bc2 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -129,6 +129,7 @@ template #include <__config> #include <__debug> +#include <__iterator/wrap_iter.h> #include <__ranges/enable_borrowed_range.h> #include <__ranges/enable_view.h> #include // for array diff --git a/libcxx/include/string b/libcxx/include/string index 9b76be30ad7d4..c5e0745250ee4 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -516,6 +516,7 @@ basic_string operator "" s( const char32_t *str, size_t len ); // C++1 #include <__config> #include <__debug> #include <__functional_base> +#include <__iterator/wrap_iter.h> #include #include #include // EOF diff --git a/libcxx/include/vector b/libcxx/include/vector index bf193e5ba4c90..69babd04f7e6c 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -275,6 +275,7 @@ erase_if(vector& c, Predicate pred); // C++20 #include <__bit_reference> #include <__debug> #include <__functional_base> +#include <__iterator/wrap_iter.h> #include <__split_buffer> #include <__utility/forward.h> #include diff --git a/libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/reverse_iterator_produces_diagnostic.fail.cpp b/libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/reverse_iterator_produces_diagnostic.verify.cpp similarity index 75% rename from libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/reverse_iterator_produces_diagnostic.fail.cpp rename to libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/reverse_iterator_produces_diagnostic.verify.cpp index 84dcbfa49c8d5..c4abf6d89f9da 100644 --- a/libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/reverse_iterator_produces_diagnostic.fail.cpp +++ b/libcxx/test/libcxx/input.output/filesystems/class.path/path.itr/reverse_iterator_produces_diagnostic.verify.cpp @@ -20,7 +20,7 @@ int main(int, char**) { using namespace fs; using RIt = std::reverse_iterator; - // expected-error-re@iterator:* {{static_assert failed{{.*}} "The specified iterator type cannot be used with reverse_iterator; Using stashing iterators with reverse_iterator causes undefined behavior"}} + // expected-error-re@*:* {{static_assert failed{{.*}} "The specified iterator type cannot be used with reverse_iterator; Using stashing iterators with reverse_iterator causes undefined behavior"}} { RIt r; ((void)r); From 931e95687d6df71aa8a33376fd2d566c8153be24 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Mon, 7 Jun 2021 09:34:09 -0400 Subject: [PATCH 064/619] [llvm][clang][fpenv] Create new intrinsic llvm.arith.fence to control FP optimization at expression level This intrinsic blocks floating point transformations by the optimizer. Author: Pengfei Reviewed By: LuoYuanke, Andy Kaylor, Craig Topper, kpn Differential Revision: https://reviews.llvm.org/D99675 --- llvm/docs/LangRef.rst | 36 ++++ .../llvm/Analysis/TargetTransformInfoImpl.h | 1 + llvm/include/llvm/CodeGen/BasicTTIImpl.h | 1 + llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 + llvm/include/llvm/CodeGen/SelectionDAGISel.h | 1 + llvm/include/llvm/IR/IRBuilder.h | 7 + llvm/include/llvm/IR/Intrinsics.td | 3 + llvm/include/llvm/Support/TargetOpcodes.def | 3 + llvm/include/llvm/Target/Target.td | 7 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 4 + .../SelectionDAG/LegalizeVectorTypes.cpp | 3 + .../SelectionDAG/SelectionDAGBuilder.cpp | 6 + .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 8 + llvm/test/CodeGen/X86/arithmetic_fence.ll | 161 +++++++++++++++++ llvm/test/CodeGen/X86/arithmetic_fence2.ll | 170 ++++++++++++++++++ 15 files changed, 415 insertions(+) create mode 100644 llvm/test/CodeGen/X86/arithmetic_fence.ll create mode 100644 llvm/test/CodeGen/X86/arithmetic_fence2.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index fc9bf536a9fd1..1986f232cc3e3 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -21453,6 +21453,42 @@ If the function's return value's second element is false, the value of the first element is undefined. +'``llvm.arithmetic.fence``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.arithmetic.fence( ) + +Overview: +""""""""" + +The purpose of the ``llvm.arithmetic.fence`` intrinsic +is to prevent the optimizer from performaing fast-math optimizations, +particularly reassociation, +between the argument and the expression that contains the argument. +It can be used to preserve the parentheses in the source language. + +Arguments: +"""""""""" + +The ``llvm.arithmetic.fence`` intrinsic takes only one argument. +The argument and the return value are floating-point numbers, +or vector floating-point numbers, of the same type. + +Semantics: +"""""""""" + +This intrinsic returns the value of its operand. The optimizer can optimize +the argument, but the optimizer cannot hoist any component of the operand +to the containing context, and the optimizer cannot move the calculation of +any expression in the containing context into the operand. + + '``llvm.donothing``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 2e9e27fcb86ee..657e8d81aa73f 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -576,6 +576,7 @@ class TargetTransformInfoImplBase { case Intrinsic::assume: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: + case Intrinsic::arithmetic_fence: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index c6d92ad7f99d6..85486474846d0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1609,6 +1609,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::lifetime_end: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: + case Intrinsic::arithmetic_fence: return 0; case Intrinsic::masked_store: { Type *Ty = Tys[0]; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 6eb70ab477089..8ff83043e7056 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1097,6 +1097,10 @@ enum NodeType { /// specifier. PREFETCH, + /// ARITH_FENCE - This corresponds to a arithmetic fence intrinsic. Both its + /// operand and output are the same floating type. + ARITH_FENCE, + /// OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) /// This corresponds to the fence instruction. It takes an input chain, and /// two integer constants: an AtomicOrdering and a SynchronizationScope. diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index f6afa5eedc8d8..94ba6ad91517a 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -318,6 +318,7 @@ class SelectionDAGISel : public MachineFunctionPass { void CannotYetSelect(SDNode *N); void Select_FREEZE(SDNode *N); + void Select_ARITH_FENCE(SDNode *N); private: void DoInstructionSelection(); diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 7fb504ad0d66d..aa7c90f932f82 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -905,6 +905,13 @@ class IRBuilderBase { return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name); } + /// Create a call to the arithmetic_fence intrinsic. + CallInst *CreateArithmeticFence(Value *Val, Type *DstType, + const Twine &Name = "") { + return CreateIntrinsic(Intrinsic::arithmetic_fence, DstType, Val, nullptr, + Name); + } + /// Create a call to the experimental.vector.extract intrinsic. CallInst *CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name = "") { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 58483ff47ba10..c7bdd86d82f83 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1335,6 +1335,9 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, Int def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; +// Arithmetic fence intrinsic. +def int_arithmetic_fence : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + // Intrinsics to support half precision floating point format let IntrProperties = [IntrNoMem, IntrWillReturn] in { def int_convert_to_fp16 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index a153eae965195..154329f8a9796 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -121,6 +121,9 @@ HANDLE_TARGET_OPCODE(LIFETIME_END) /// Pseudo probe HANDLE_TARGET_OPCODE(PSEUDO_PROBE) +/// Arithmetic fence. +HANDLE_TARGET_OPCODE(ARITH_FENCE) + /// A Stackmap instruction captures the location of live variables at its /// position in the instruction stream. It is followed by a shadow of bytes /// that must lie within the function and not contain another stackmap. diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 71c74f3d5cdec..e9720d7651677 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1176,6 +1176,13 @@ def PSEUDO_PROBE : StandardPseudoInstruction { let AsmString = "PSEUDO_PROBE"; let hasSideEffects = 1; } +def ARITH_FENCE : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let AsmString = ""; + let hasSideEffects = false; + let Constraints = "$src = $dst"; +} def STACKMAP : StandardPseudoInstruction { let OutOperandList = (outs); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6b06bb88edbd8..f04cbb07403bb 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1332,6 +1332,10 @@ void AsmPrinter::emitFunctionBody() { case TargetOpcode::PSEUDO_PROBE: emitPseudoProbe(MI); break; + case TargetOpcode::ARITH_FENCE: + if (isVerbose()) + OutStreamer->emitRawComment("ARITH_FENCE"); + break; default: emitInstruction(&MI); if (CanDoExtraAnalysis) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 22adb9ae52f5c..7bc8d1d2333bd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -90,6 +90,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FREEZE: + case ISD::ARITH_FENCE: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: @@ -983,6 +984,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FREEZE: + case ISD::ARITH_FENCE: case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: @@ -3146,6 +3148,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ_ZERO_UNDEF: case ISD::FNEG: case ISD::FREEZE: + case ISD::ARITH_FENCE: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b63246ac671b7..0774c7fcf0117 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6292,6 +6292,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; + case Intrinsic::arithmetic_fence: { + setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), Flags)); + return; + } case Intrinsic::fma: setValue(&I, DAG.getNode( ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e049be94fcf17..e3ff00131dbed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2325,6 +2325,11 @@ void SelectionDAGISel::Select_FREEZE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::ARITH_FENCE, N->getValueType(0), + N->getOperand(0)); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2876,6 +2881,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::FREEZE: Select_FREEZE(NodeToMatch); return; + case ISD::ARITH_FENCE: + Select_ARITH_FENCE(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); diff --git a/llvm/test/CodeGen/X86/arithmetic_fence.ll b/llvm/test/CodeGen/X86/arithmetic_fence.ll new file mode 100644 index 0000000000000..eddc0cc33de92 --- /dev/null +++ b/llvm/test/CodeGen/X86/arithmetic_fence.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma | FileCheck %s --check-prefix=X64 + +define float @f1(float %a, float %b, float %c) { +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem +; X86-NEXT: vmovss %xmm1, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 +; X64-NEXT: retq + %mul = fmul fast float %b, %a + %add = fadd fast float %mul, %c + ret float %add +} + +define float @f2(float %a, float %b, float %c) { +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X86-NEXT: #ARITH_FENCE +; X86-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X86-NEXT: vmovss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; X64-NEXT: #ARITH_FENCE +; X64-NEXT: vaddss %xmm2, %xmm0, %xmm0 +; X64-NEXT: retq + %mul = fmul fast float %b, %a + %tmp = call float @llvm.arithmetic.fence.f32(float %mul) + %add = fadd fast float %tmp, %c + ret float %add +} + +define double @f3(double %a) { +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: retq + %1 = fadd fast double %a, %a + %2 = fadd fast double %a, %a + %3 = fadd fast double %1, %2 + ret double %3 +} + +define double @f4(double %a) { +; X86-LABEL: f4: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vaddsd %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovapd %xmm0, %xmm1 +; X86-NEXT: #ARITH_FENCE +; X86-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; +; X64-LABEL: f4: +; X64: # %bb.0: +; X64-NEXT: vaddsd %xmm0, %xmm0, %xmm0 +; X64-NEXT: vmovapd %xmm0, %xmm1 +; X64-NEXT: #ARITH_FENCE +; X64-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; X64-NEXT: retq + %1 = fadd fast double %a, %a + %t = call double @llvm.arithmetic.fence.f64(double %1) + %2 = fadd fast double %a, %a + %3 = fadd fast double %t, %2 + ret double %3 +} + +define <2 x float> @f5(<2 x float> %a) { +; X86-LABEL: f5: +; X86: # %bb.0: +; X86-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: f5: +; X64: # %bb.0: +; X64-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: retq + %1 = fadd fast <2 x float> %a, %a + %2 = fadd fast <2 x float> %a, %a + %3 = fadd fast <2 x float> %1, %2 + ret <2 x float> %3 +} + +define <2 x float> @f6(<2 x float> %a) { +; X86-LABEL: f6: +; X86: # %bb.0: +; X86-NEXT: vaddps %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovaps %xmm0, %xmm1 +; X86-NEXT: #ARITH_FENCE +; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: f6: +; X64: # %bb.0: +; X64-NEXT: vaddps %xmm0, %xmm0, %xmm0 +; X64-NEXT: vmovaps %xmm0, %xmm1 +; X64-NEXT: #ARITH_FENCE +; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64-NEXT: retq + %1 = fadd fast <2 x float> %a, %a + %t = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> %1) + %2 = fadd fast <2 x float> %a, %a + %3 = fadd fast <2 x float> %t, %2 + ret <2 x float> %3 +} + +declare float @llvm.arithmetic.fence.f32(float) +declare double @llvm.arithmetic.fence.f64(double) +declare <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float>) diff --git a/llvm/test/CodeGen/X86/arithmetic_fence2.ll b/llvm/test/CodeGen/X86/arithmetic_fence2.ll new file mode 100644 index 0000000000000..22dab9ffa8227 --- /dev/null +++ b/llvm/test/CodeGen/X86/arithmetic_fence2.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 + +define double @f1(double %a) { +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: movsd %xmm0, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: retq + %1 = fadd fast double %a, %a + %2 = fadd fast double %a, %a + %3 = fadd fast double %1, %2 + ret double %3 +} + +define double @f2(double %a) { +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: addsd %xmm0, %xmm0 +; X86-NEXT: movapd %xmm0, %xmm1 +; X86-NEXT: #ARITH_FENCE +; X86-NEXT: addsd %xmm0, %xmm1 +; X86-NEXT: movsd %xmm1, (%esp) +; X86-NEXT: fldl (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: addsd %xmm0, %xmm0 +; X64-NEXT: movapd %xmm0, %xmm1 +; X64-NEXT: #ARITH_FENCE +; X64-NEXT: addsd %xmm0, %xmm1 +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: retq + %1 = fadd fast double %a, %a + %t = call double @llvm.arithmetic.fence.f64(double %1) + %2 = fadd fast double %a, %a + %3 = fadd fast double %t, %2 + ret double %3 +} + +define <2 x float> @f3(<2 x float> %a) { +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: retq + %1 = fadd fast <2 x float> %a, %a + %2 = fadd fast <2 x float> %a, %a + %3 = fadd fast <2 x float> %1, %2 + ret <2 x float> %3 +} + +define <2 x float> @f4(<2 x float> %a) { +; X86-LABEL: f4: +; X86: # %bb.0: +; X86-NEXT: addps %xmm0, %xmm0 +; X86-NEXT: movaps %xmm0, %xmm1 +; X86-NEXT: #ARITH_FENCE +; X86-NEXT: addps %xmm0, %xmm1 +; X86-NEXT: movaps %xmm1, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: f4: +; X64: # %bb.0: +; X64-NEXT: addps %xmm0, %xmm0 +; X64-NEXT: movaps %xmm0, %xmm1 +; X64-NEXT: #ARITH_FENCE +; X64-NEXT: addps %xmm0, %xmm1 +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: retq + %1 = fadd fast <2 x float> %a, %a + %t = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> %1) + %2 = fadd fast <2 x float> %a, %a + %3 = fadd fast <2 x float> %t, %2 + ret <2 x float> %3 +} + +define <8 x float> @f5(<8 x float> %a) { +; X86-LABEL: f5: +; X86: # %bb.0: +; X86-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0] +; X86-NEXT: mulps %xmm2, %xmm0 +; X86-NEXT: mulps %xmm2, %xmm1 +; X86-NEXT: retl +; +; X64-LABEL: f5: +; X64: # %bb.0: +; X64-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0] +; X64-NEXT: mulps %xmm2, %xmm0 +; X64-NEXT: mulps %xmm2, %xmm1 +; X64-NEXT: retq + %1 = fadd fast <8 x float> %a, %a + %2 = fadd fast <8 x float> %a, %a + %3 = fadd fast <8 x float> %1, %2 + ret <8 x float> %3 +} + +define <8 x float> @f6(<8 x float> %a) { +; X86-LABEL: f6: +; X86: # %bb.0: +; X86-NEXT: addps %xmm0, %xmm0 +; X86-NEXT: addps %xmm1, %xmm1 +; X86-NEXT: movaps %xmm1, %xmm2 +; X86-NEXT: #ARITH_FENCE +; X86-NEXT: movaps %xmm0, %xmm3 +; X86-NEXT: #ARITH_FENCE +; X86-NEXT: addps %xmm0, %xmm3 +; X86-NEXT: addps %xmm1, %xmm2 +; X86-NEXT: movaps %xmm3, %xmm0 +; X86-NEXT: movaps %xmm2, %xmm1 +; X86-NEXT: retl +; +; X64-LABEL: f6: +; X64: # %bb.0: +; X64-NEXT: addps %xmm0, %xmm0 +; X64-NEXT: addps %xmm1, %xmm1 +; X64-NEXT: movaps %xmm1, %xmm2 +; X64-NEXT: #ARITH_FENCE +; X64-NEXT: movaps %xmm0, %xmm3 +; X64-NEXT: #ARITH_FENCE +; X64-NEXT: addps %xmm0, %xmm3 +; X64-NEXT: addps %xmm1, %xmm2 +; X64-NEXT: movaps %xmm3, %xmm0 +; X64-NEXT: movaps %xmm2, %xmm1 +; X64-NEXT: retq + %1 = fadd fast <8 x float> %a, %a + %t = call <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float> %1) + %2 = fadd fast <8 x float> %a, %a + %3 = fadd fast <8 x float> %t, %2 + ret <8 x float> %3 +} + +declare float @llvm.arithmetic.fence.f32(float) +declare double @llvm.arithmetic.fence.f64(double) +declare <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float>) +declare <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float>) From 2c02b0c3f45414ac6c64583e006a26113c028304 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Tue, 8 Jun 2021 16:55:54 -0400 Subject: [PATCH 065/619] [clang][PATCH][nfc] Refactor TargetInfo::adjust to pass DiagnosticsEngine to allow diagnostics on target-unsupported options Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D104729 --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/lib/Basic/TargetInfo.cpp | 2 +- clang/lib/Basic/Targets/AMDGPU.cpp | 4 ++-- clang/lib/Basic/Targets/AMDGPU.h | 2 +- clang/lib/Basic/Targets/PPC.cpp | 4 ++-- clang/lib/Basic/Targets/PPC.h | 2 +- clang/lib/Basic/Targets/SPIR.h | 4 ++-- clang/lib/Basic/Targets/WebAssembly.cpp | 3 ++- clang/lib/Basic/Targets/WebAssembly.h | 2 +- clang/lib/Frontend/ASTUnit.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 4 ++-- clang/lib/Interpreter/Interpreter.cpp | 2 +- clang/tools/clang-import-test/clang-import-test.cpp | 2 +- 13 files changed, 18 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index d59bad30e7428..20f6afa76cbb3 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1162,7 +1162,7 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. - virtual void adjust(LangOptions &Opts); + virtual void adjust(DiagnosticsEngine &Diags, LangOptions &Opts); /// Adjust target options based on codegen options. virtual void adjustTargetOptions(const CodeGenOptions &CGOpts, diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index e73b4a3a40c74..4c2859e5eda7f 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -346,7 +346,7 @@ bool TargetInfo::isTypeSigned(IntType T) { /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. -void TargetInfo::adjust(LangOptions &Opts) { +void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { if (Opts.NoBitFieldTypeAlign) UseBitFieldTypeAlignment = false; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 595132e2e70ba..fac786dbcf9e2 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -358,8 +358,8 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; } -void AMDGPUTargetInfo::adjust(LangOptions &Opts) { - TargetInfo::adjust(Opts); +void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { + TargetInfo::adjust(Diags, Opts); // ToDo: There are still a few places using default address space as private // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL // can be removed from the following line. diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index fe5c61c6ba2bb..244a6e0446905 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -93,7 +93,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { void setAddressSpaceMap(bool DefaultIsPrivate); - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; uint64_t getPointerWidthV(unsigned AddrSpace) const override { if (isR600(getTriple())) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 6860b5e5d02fa..d431dda970222 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -614,10 +614,10 @@ void PPCTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); } -void PPCTargetInfo::adjust(LangOptions &Opts) { +void PPCTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { if (HasAltivec) Opts.AltiVec = 1; - TargetInfo::adjust(Opts); + TargetInfo::adjust(Diags, Opts); if (LongDoubleFormat != &llvm::APFloat::IEEEdouble()) LongDoubleFormat = Opts.PPCIEEELongDouble ? &llvm::APFloat::IEEEquad() diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 554f2174fee00..18ee1194c759d 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -89,7 +89,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { } // Set the language option for altivec based on our value. - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; // Note: GCC recognizes the following additional cpus: // 401, 403, 405, 405fp, 440fp, 464, 464fp, 476, 476fp, 505, 740, 801, diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index c429b27709ecb..50f34abd66309 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -135,8 +135,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { AddrSpaceMap = DefaultIsGeneric ? &SPIRDefIsGenMap : &SPIRDefIsPrivMap; } - void adjust(LangOptions &Opts) override { - TargetInfo::adjust(Opts); + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); // FIXME: SYCL specification considers unannotated pointers and references // to be pointing to the generic address space. See section 5.9.3 of // SYCL 2020 specification. diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 2a5055c3d534b..7ef79849cb75d 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -234,7 +234,8 @@ ArrayRef WebAssemblyTargetInfo::getTargetBuiltins() const { Builtin::FirstTSBuiltin); } -void WebAssemblyTargetInfo::adjust(LangOptions &Opts) { +void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags, + LangOptions &Opts) { // If the Atomics feature isn't available, turn off POSIXThreads and // ThreadModel, so that we don't predefine _REENTRANT or __STDCPP_THREADS__. if (!HasAtomics) { diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index 70115183e46b9..b29730c5d706b 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -138,7 +138,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool hasProtectedVisibility() const override { return false; } - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; }; class LLVM_LIBRARY_VISIBILITY WebAssembly32TargetInfo diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 988090a8b1b13..4f92833e4229c 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -588,7 +588,7 @@ class ASTInfoCollector : public ASTReaderListener { // // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - Target->adjust(LangOpt); + Target->adjust(PP.getDiagnostics(), LangOpt); // Initialize the preprocessor. PP.Initialize(*Target); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 063384130f730..2ae3be6814dec 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -142,7 +142,7 @@ bool CompilerInstance::createTarget() { // Inform the target of the language options. // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - getTarget().adjust(getLangOpts()); + getTarget().adjust(getDiagnostics(), getLangOpts()); // Adjust target options based on codegen options. getTarget().adjustTargetOptions(getCodeGenOpts(), getTargetOpts()); @@ -457,7 +457,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { getSourceManager(), *HeaderInfo, *this, /*IdentifierInfoLookup=*/nullptr, /*OwnsHeaderSearch=*/true, TUKind); - getTarget().adjust(getLangOpts()); + getTarget().adjust(getDiagnostics(), getLangOpts()); PP->Initialize(getTarget(), getAuxTarget()); if (PPOpts.DetailedRecord) diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 711a5e9ff0168..768847f9f0352 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -110,7 +110,7 @@ CreateCI(const llvm::opt::ArgStringList &Argv) { "Initialization failed. " "Target is missing"); - Clang->getTarget().adjust(Clang->getLangOpts()); + Clang->getTarget().adjust(Clang->getDiagnostics(), Clang->getLangOpts()); return std::move(Clang); } diff --git a/clang/tools/clang-import-test/clang-import-test.cpp b/clang/tools/clang-import-test/clang-import-test.cpp index df173cf49f35e..fa5d7a54f53b4 100644 --- a/clang/tools/clang-import-test/clang-import-test.cpp +++ b/clang/tools/clang-import-test/clang-import-test.cpp @@ -208,7 +208,7 @@ std::unique_ptr BuildCompilerInstance() { TargetInfo *TI = TargetInfo::CreateTargetInfo( Ins->getDiagnostics(), Ins->getInvocation().TargetOpts); Ins->setTarget(TI); - Ins->getTarget().adjust(Ins->getLangOpts()); + Ins->getTarget().adjust(Ins->getDiagnostics(), Ins->getLangOpts()); Ins->createFileManager(); Ins->createSourceManager(Ins->getFileManager()); Ins->createPreprocessor(TU_Complete); From 4f1238e44d803b145997fa984677a6c5cdf1f417 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Wed, 9 Jun 2021 11:38:34 -0400 Subject: [PATCH 066/619] [clang][patch][fpenv] Add builtin __arithmetic_fence and option fprotect-parens This patch adds a new clang builtin, __arithmetic_fence. The purpose of the builtin is to provide the user fine control, at the expression level, over floating point optimization when -ffast-math (-ffp-model=fast) is enabled. The builtin prevents the optimizer from rearranging floating point expression evaluation. The new option fprotect-parens has the same effect on parenthesized expressions, forcing the optimizer to respect the parentheses. Reviewed By: aaron.ballman, kpn Differential Revision: https://reviews.llvm.org/D100118 --- clang/docs/UsersManual.rst | 20 +++++ clang/include/clang/Basic/Builtins.def | 3 + .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/include/clang/Basic/LangOptions.def | 2 + clang/include/clang/Basic/TargetInfo.h | 3 + clang/include/clang/Driver/Options.td | 9 ++- clang/include/clang/Sema/Sema.h | 4 + clang/lib/AST/ExprConstant.cpp | 3 + clang/lib/Basic/TargetInfo.cpp | 5 ++ clang/lib/Basic/Targets/X86.h | 2 + clang/lib/CodeGen/CGBuiltin.cpp | 30 ++++++++ clang/lib/Driver/ToolChains/Clang.cpp | 5 ++ clang/lib/Sema/SemaChecking.cpp | 27 +++++++ clang/lib/Sema/SemaCoroutine.cpp | 32 ++------ clang/lib/Sema/SemaExpr.cpp | 27 +++++++ clang/test/AST/arithmetic-fence-builtin.c | 46 ++++++++++++ clang/test/CodeGen/arithmetic-fence-builtin.c | 74 +++++++++++++++++++ clang/test/Driver/clang_f_opts.c | 3 +- clang/test/Sema/arithmetic-fence-builtin.c | 48 ++++++++++++ 19 files changed, 318 insertions(+), 28 deletions(-) create mode 100644 clang/test/AST/arithmetic-fence-builtin.c create mode 100644 clang/test/CodeGen/arithmetic-fence-builtin.c create mode 100644 clang/test/Sema/arithmetic-fence-builtin.c diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 244212a1336db..9e8bac635337e 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1478,6 +1478,26 @@ Note that floating-point operations performed as part of constant initialization * ``maytrap`` The compiler avoids transformations that may raise exceptions that would not have been raised by the original code. Constant folding performed by the compiler is exempt from this option. * ``strict`` The compiler ensures that all transformations strictly preserve the floating point exception semantics of the original code. +.. option:: -f[no-]protect-parens: + + This option pertains to floating-point types, complex types with + floating-point components, and vectors of these types. Some arithmetic + expression transformations that are mathematically correct and permissible + according to the C and C++ language standards may be incorrect when dealing + with floating-point types, such as reassociation and distribution. Further, + the optimizer may ignore parentheses when computing arithmetic expressions + in circumstances where the parenthesized and unparenthesized expression + express the same mathematical value. For example (a+b)+c is the same + mathematical value as a+(b+c), but the optimizer is free to evaluate the + additions in any order regardless of the parentheses. When enabled, this + option forces the optimizer to honor the order of operations with respect + to parentheses in all circumstances. + + Note that floating-point contraction (option `-ffp-contract=`) is disabled + when `-fprotect-parens` is enabled. Also note that in safe floating-point + modes, such as `-ffp-model=precise` or `-ffp-model=strict`, this option + has no effect because the optimizer is prohibited from making unsafe + transformations. .. _fp-constant-eval: diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 5a9d0a0018292..33d3e6dc4e7db 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1657,6 +1657,9 @@ BUILTIN(__builtin_ms_va_start, "vc*&.", "nt") BUILTIN(__builtin_ms_va_end, "vc*&", "n") BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") +// Arithmetic Fence: to prevent FP reordering and reassociation optimizations +LANGBUILTIN(__arithmetic_fence, "v.", "t", ALL_LANGUAGES) + #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 70a22fd2506a3..22c2a1a39ea13 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8530,6 +8530,9 @@ def err_typecheck_expect_scalar_operand : Error< "operand of type %0 where arithmetic or pointer type is required">; def err_typecheck_cond_incompatible_operands : Error< "incompatible operand types%diff{ ($ and $)|}0,1">; +def err_typecheck_expect_flt_or_vector : Error< + "invalid operand of type %0 where floating, complex or " + "a vector of such types is required">; def err_cast_selector_expr : Error< "cannot type cast @selector expression">; def ext_typecheck_cond_incompatible_pointers : ExtWarn< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 465bad8d7d112..b18e957a58f4c 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -199,6 +199,8 @@ COMPATIBLE_LANGOPT(Deprecated , 1, 0, "__DEPRECATED predefined macro") COMPATIBLE_LANGOPT(FastMath , 1, 0, "fast FP math optimizations, and __FAST_MATH__ predefined macro") COMPATIBLE_LANGOPT(FiniteMathOnly , 1, 0, "__FINITE_MATH_ONLY__ predefined macro") COMPATIBLE_LANGOPT(UnsafeFPMath , 1, 0, "Unsafe Floating Point Math") +COMPATIBLE_LANGOPT(ProtectParens , 1, 0, "optimizer honors parentheses " + "when floating-point expressions are evaluated") BENIGN_LANGOPT(AllowFPReassoc , 1, 0, "Permit Floating Point reassociation") BENIGN_LANGOPT(NoHonorNaNs , 1, 0, "Permit Floating Point optimization without regard to NaN") BENIGN_LANGOPT(NoHonorInfs , 1, 0, "Permit Floating Point optimization without regard to infinities") diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 20f6afa76cbb3..4f0cbf986b31b 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1424,6 +1424,9 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Whether the option -fextend-arguments={32,64} is supported on the target. virtual bool supportsExtendIntArgs() const { return false; } + /// Controls if __arithmetic_fence is supported in the targeted backend. + virtual bool checkArithmeticFenceSupported() const { return false; } + /// Gets the default calling convention for the given target and /// declaration context. virtual CallingConv getDefaultCallingConv() const { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f1455f5461990..0b9596c68f5fb 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1762,6 +1762,13 @@ defm strict_float_cast_overflow : BoolFOption<"strict-float-cast-overflow", " of the target's native float-to-int conversion instructions">, PosFlag>; +defm protect_parens : BoolFOption<"protect-parens", + LangOpts<"ProtectParens">, DefaultFalse, + PosFlag, + NegFlag>; + def ffor_scope : Flag<["-"], "ffor-scope">, Group; def fno_for_scope : Flag<["-"], "fno-for-scope">, Group; @@ -4408,7 +4415,7 @@ defm integer_4_integer_8 : BooleanFFlag<"integer-4-integer-8">, Group, Group; defm module_private : BooleanFFlag<"module-private">, Group; defm pack_derived : BooleanFFlag<"pack-derived">, Group; -defm protect_parens : BooleanFFlag<"protect-parens">, Group; +//defm protect_parens : BooleanFFlag<"protect-parens">, Group; defm range_check : BooleanFFlag<"range-check">, Group; defm real_4_real_10 : BooleanFFlag<"real-4-real-10">, Group; defm real_4_real_16 : BooleanFFlag<"real-4-real-16">, Group; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 4c3a7035bcc94..3f7db9bc5be8b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -36,6 +36,7 @@ #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeOrdering.h" #include "clang/Basic/BitmaskEnum.h" +#include "clang/Basic/Builtins.h" #include "clang/Basic/ExpressionTraits.h" #include "clang/Basic/Module.h" #include "clang/Basic/OpenCLOptions.h" @@ -5424,6 +5425,8 @@ class Sema final { Expr *ExecConfig = nullptr, bool IsExecConfig = false, bool AllowRecovery = false); + Expr *BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id, + MultiExprArg CallArgs); enum class AtomicArgumentOrder { API, AST }; ExprResult BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, @@ -12583,6 +12586,7 @@ class Sema final { private: bool SemaBuiltinPrefetch(CallExpr *TheCall); bool SemaBuiltinAllocaWithAlign(CallExpr *TheCall); + bool SemaBuiltinArithmeticFence(CallExpr *TheCall); bool SemaBuiltinAssume(CallExpr *TheCall); bool SemaBuiltinAssumeAligned(CallExpr *TheCall); bool SemaBuiltinLongjmp(CallExpr *TheCall); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a4d8fec09748d..01c0168d61a40 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13692,6 +13692,9 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) { Result.changeSign(); return true; + case Builtin::BI__arithmetic_fence: + return EvaluateFloat(E->getArg(0), Result, Info); + // FIXME: Builtin::BI__builtin_powi // FIXME: Builtin::BI__builtin_powif // FIXME: Builtin::BI__builtin_powil diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 4c2859e5eda7f..88086fa2fed74 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -430,6 +430,11 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { // its corresponding signed type. PaddingOnUnsignedFixedPoint |= Opts.PaddingOnUnsignedFixedPoint; CheckFixedPointBits(); + + if (Opts.ProtectParens && !checkArithmeticFenceSupported()) { + Diags.Report(diag::err_opt_not_valid_on_target) << "-fprotect-parens"; + Opts.ProtectParens = false; + } } bool TargetInfo::initFeatureMap( diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 7639ea835ebc7..e798962617a30 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -362,6 +362,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { } } + bool checkArithmeticFenceSupported() const override { return true; } + CallingConv getDefaultCallingConv() const override { return CC_C; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2e9454921ffa8..6702181e7b766 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2833,6 +2833,36 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); } + case Builtin::BI__arithmetic_fence: { + // Create the builtin call if FastMath is selected, and the target + // supports the builtin, otherwise just return the argument. + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + llvm::FastMathFlags FMF = Builder.getFastMathFlags(); + bool isArithmeticFenceEnabled = + FMF.allowReassoc() && + getContext().getTargetInfo().checkArithmeticFenceSupported(); + QualType ArgType = E->getArg(0)->getType(); + if (ArgType->isComplexType()) { + if (isArithmeticFenceEnabled) { + QualType ElementType = ArgType->castAs()->getElementType(); + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + Value *Real = Builder.CreateArithmeticFence(ComplexVal.first, + ConvertType(ElementType)); + Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second, + ConvertType(ElementType)); + return RValue::getComplex(std::make_pair(Real, Imag)); + } + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + Value *Real = ComplexVal.first; + Value *Imag = ComplexVal.second; + return RValue::getComplex(std::make_pair(Real, Imag)); + } + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + if (isArithmeticFenceEnabled) + return RValue::get( + Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType))); + return RValue::get(ArgValue); + } case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c265e1c4e53cb..fd26d04e39bcc 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4975,6 +4975,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, false)) CmdArgs.push_back("-fsplit-stack"); + // -fprotect-parens=0 is default. + if (Args.hasFlag(options::OPT_fprotect_parens, + options::OPT_fno_protect_parens, false)) + CmdArgs.push_back("-fprotect-parens"); + RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA); if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 17eeebb0c6799..4d04f3017a2ef 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1554,6 +1554,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, Diag(TheCall->getBeginLoc(), diag::warn_alloca) << TheCall->getDirectCallee(); break; + case Builtin::BI__arithmetic_fence: + if (SemaBuiltinArithmeticFence(TheCall)) + return ExprError(); + break; case Builtin::BI__assume: case Builtin::BI__builtin_assume: if (SemaBuiltinAssume(TheCall)) @@ -6549,6 +6553,29 @@ bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { return false; } +/// SemaBuiltinArithmeticFence - Handle __arithmetic_fence. +bool Sema::SemaBuiltinArithmeticFence(CallExpr *TheCall) { + if (!Context.getTargetInfo().checkArithmeticFenceSupported()) + return Diag(TheCall->getBeginLoc(), diag::err_builtin_target_unsupported) + << SourceRange(TheCall->getBeginLoc(), TheCall->getEndLoc()); + if (checkArgCount(*this, TheCall, 1)) + return true; + Expr *Arg = TheCall->getArg(0); + if (Arg->isInstantiationDependent()) + return false; + + QualType ArgTy = Arg->getType(); + if (!ArgTy->hasFloatingRepresentation()) + return Diag(TheCall->getEndLoc(), diag::err_typecheck_expect_flt_or_vector) + << ArgTy; + if (Arg->isLValue()) { + ExprResult FirstArg = DefaultLvalueConversion(Arg); + TheCall->setArg(0, FirstArg.get()); + } + TheCall->setType(TheCall->getArg(0)->getType()); + return false; +} + /// SemaBuiltinAssume - Handle __assume (MS Extension). // __assume does not evaluate its arguments, and should warn if its argument // has side effects. diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index cec80436d575e..31a4092b5b604 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -291,26 +291,6 @@ static ExprResult buildOperatorCoawaitCall(Sema &SemaRef, Scope *S, cast(R.get())); } -static Expr *buildBuiltinCall(Sema &S, SourceLocation Loc, Builtin::ID Id, - MultiExprArg CallArgs) { - StringRef Name = S.Context.BuiltinInfo.getName(Id); - LookupResult R(S, &S.Context.Idents.get(Name), Loc, Sema::LookupOrdinaryName); - S.LookupName(R, S.TUScope, /*AllowBuiltinCreation=*/true); - - auto *BuiltInDecl = R.getAsSingle(); - assert(BuiltInDecl && "failed to find builtin declaration"); - - ExprResult DeclRef = - S.BuildDeclRefExpr(BuiltInDecl, BuiltInDecl->getType(), VK_LValue, Loc); - assert(DeclRef.isUsable() && "Builtin reference cannot fail"); - - ExprResult Call = - S.BuildCallExpr(/*Scope=*/nullptr, DeclRef.get(), Loc, CallArgs, Loc); - - assert(!Call.isInvalid() && "Call to builtin cannot fail!"); - return Call.get(); -} - static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType, SourceLocation Loc) { QualType CoroHandleType = lookupCoroutineHandleType(S, PromiseType, Loc); @@ -327,7 +307,7 @@ static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType, } Expr *FramePtr = - buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_frame, {}); + S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}); CXXScopeSpec SS; ExprResult FromAddr = @@ -404,8 +384,8 @@ static Expr *maybeTailCall(Sema &S, QualType RetType, Expr *E, // the resume call and return instruction, which would interfere with the // musttail call contract. JustAddress = S.MaybeCreateExprWithCleanups(JustAddress); - return buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_resume, - JustAddress); + return S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_resume, + JustAddress); } /// Build calls to await_ready, await_suspend, and await_resume for a co_await @@ -1357,10 +1337,10 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { return false; Expr *FramePtr = - buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_frame, {}); + S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}); Expr *FrameSize = - buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_size, {}); + S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_size, {}); // Make new call. @@ -1389,7 +1369,7 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { return false; Expr *CoroFree = - buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_free, {FramePtr}); + S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_free, {FramePtr}); SmallVector DeleteArgs{CoroFree}; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 6031dff673351..3df74b5ea9dbc 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4054,6 +4054,10 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) { assert(E && "ActOnParenExpr() missing expr"); + QualType ExprTy = E->getType(); + if (getLangOpts().ProtectParens && CurFPFeatures.getAllowFPReassociate() && + !E->isLValue() && ExprTy->hasFloatingRepresentation()) + return BuildBuiltinCallExpr(R, Builtin::BI__arithmetic_fence, E); return new (Context) ParenExpr(L, R, E); } @@ -6560,6 +6564,29 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, ExecConfig, IsExecConfig); } +/// BuildBuiltinCallExpr - Create a call to a builtin function specified by Id +// with the specified CallArgs +Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id, + MultiExprArg CallArgs) { + StringRef Name = Context.BuiltinInfo.getName(Id); + LookupResult R(*this, &Context.Idents.get(Name), Loc, + Sema::LookupOrdinaryName); + LookupName(R, TUScope, /*AllowBuiltinCreation=*/true); + + auto *BuiltInDecl = R.getAsSingle(); + assert(BuiltInDecl && "failed to find builtin declaration"); + + ExprResult DeclRef = + BuildDeclRefExpr(BuiltInDecl, BuiltInDecl->getType(), VK_LValue, Loc); + assert(DeclRef.isUsable() && "Builtin reference cannot fail"); + + ExprResult Call = + BuildCallExpr(/*Scope=*/nullptr, DeclRef.get(), Loc, CallArgs, Loc); + + assert(!Call.isInvalid() && "Call to builtin cannot fail!"); + return Call.get(); +} + /// Parse a __builtin_astype expression. /// /// __builtin_astype( value, dst type ) diff --git a/clang/test/AST/arithmetic-fence-builtin.c b/clang/test/AST/arithmetic-fence-builtin.c new file mode 100644 index 0000000000000..46666b3c8bed5 --- /dev/null +++ b/clang/test/AST/arithmetic-fence-builtin.c @@ -0,0 +1,46 @@ +// Tests without serialization: +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu %s \ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 +// +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 +// +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ +// RUN: -fprotect-parens \ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK2 +// +// Tests with serialization: +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null \ +// RUN: | FileCheck %s --strict-whitespace +// +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ +// RUN: -emit-pch -o %t +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null \ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 +// +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ +// RUN: -fprotect-parens \ +// RUN: -emit-pch -o %t +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null -fprotect-parens\ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK2 + +// +int v; +int addit(float a, float b) { + + v = __arithmetic_fence(a + b); + + v = (a + b); + + return 0; +} +//CHECK:| `-CompoundStmt {{.*}} +//CHECK-NEXT:| |-BinaryOperator {{.*}} 'int' '=' +//CHECK-NEXT:| | |-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'v' 'int' +//CHECK-NEXT:| | `-ImplicitCastExpr {{.*}} +//CHECK-NEXT:| | `-CallExpr {{.*}} 'float' +//CHECK-NEXT:| | |-ImplicitCastExpr {{.*}} +//CHECK-NEXT:| | | `-DeclRefExpr {{.*}}' Function {{.*}} '__arithmetic_fence'{{.*}} +//CHECK1-NOT:| | | `-DeclRefExpr {{.*}}' Function{{.*}} '__arithmetic_fence' 'void ()' +//CHECK2:| | | `-DeclRefExpr {{.*}} Function{{.*}} '__arithmetic_fence' 'void ()' diff --git a/clang/test/CodeGen/arithmetic-fence-builtin.c b/clang/test/CodeGen/arithmetic-fence-builtin.c new file mode 100644 index 0000000000000..6b5b5b4b9cefa --- /dev/null +++ b/clang/test/CodeGen/arithmetic-fence-builtin.c @@ -0,0 +1,74 @@ +// Test with fast math +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -DFAST \ +// RUN: -mreassociate \ +// RUN: -o - %s | FileCheck --check-prefixes CHECK,CHECKFAST,CHECKNP %s +// +// Test with fast math and fprotect-parens +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -DFAST \ +// RUN: -mreassociate -fprotect-parens -ffp-contract=on\ +// RUN: -o - %s | FileCheck --check-prefixes CHECK,CHECKFAST,CHECKPP %s +// +// Test without fast math: llvm intrinsic not created +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -fprotect-parens\ +// RUN: -o - %s | FileCheck --implicit-check-not="llvm.arithmetic.fence" %s +// +int v; +int addit(float a, float b) { + // CHECK: define {{.*}}@addit(float %a, float %b) #0 { + _Complex double cd, cd1; + cd = __arithmetic_fence(cd1); + // CHECKFAST: call{{.*}} double @llvm.arithmetic.fence.f64({{.*}}real) + // CHECKFAST: call{{.*}} double @llvm.arithmetic.fence.f64({{.*}}imag) + // Vector should be supported. + typedef float __v2f32 __attribute__((__vector_size__(8))); + __v2f32 vec1, vec2; + vec1 = __arithmetic_fence(vec2); + // CHECKFAST: call{{.*}} <2 x float> @llvm.arithmetic.fence.v2f32 + vec2 = (vec2 + vec1); + // CHECKPP: call{{.*}} <2 x float> @llvm.arithmetic.fence.v2f32 + + v = __arithmetic_fence(a + b); + // CHECKFAST: call{{.*}} float @llvm.arithmetic.fence.f32(float %add{{.*}}) + + v = (a + b); + // CHECKPP: call{{.*}} float @llvm.arithmetic.fence.f32(float %add{{.*}}) + v = a + (b*b); + // CHECKPP: fmul reassoc + // CHECKPP-NEXT: call{{.*}} float @llvm.arithmetic.fence.f32(float %mul) + // CHECKNP: fmul + // CHECKNP: fadd + v = b + a*a; + // CHECKPP: call{{.*}} float @llvm.fmuladd.f32 + // CHECKNP: fmul + // CHECKNP: fadd + v = b + __arithmetic_fence(a*a); // Fence blocks recognition of FMA + // CHECKPP: fmul + // CHECKNP: fmul + + b = (a); + (a) = b; + // CHECK-NEXT fptosi + // CHECK-NEXT store i32 + // CHECK-NEXT load float + // CHECK-NEXT store float + // CHECK-NEXT load float + // CHECK-NEXT store float + return 0; + // CHECK-NEXT ret i32 0 +} +int addit1(int a, int b) { + // CHECK: define {{.*}}@addit1(i32 %a, i32 %b{{.*}} + v = (a + b); + // CHECK-NOT: call{{.*}} float @llvm.arithmetic.fence.int(float %add) + return 0; +} +#ifdef FAST +#pragma float_control(precise, on) +int subit(float a, float b, float *fp) { + // CHECKFAST: define {{.*}}@subit(float %a, float %b{{.*}} + *fp = __arithmetic_fence(a - b); + *fp = (a + b); + // CHECK-NOT: call{{.*}} float @llvm.arithmetic.fence.f32(float %add) + return 0; +} +#endif diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index a255f68713aec..d729378403f3f 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -1,13 +1,14 @@ // REQUIRES: clang-driver // RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fblocks -fbuiltin -fmath-errno -fcommon -fpascal-strings -fsplit-stack %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS1 %s -// RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-asm -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fno-show-source-location -fshort-enums %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS2 %s +// RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-asm -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fno-show-source-location -fshort-enums -fprotect-parens %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS2 %s // CHECK-OPTIONS1: -fsplit-stack // CHECK-OPTIONS1: -fgnu-keywords // CHECK-OPTIONS1: -fblocks // CHECK-OPTIONS1: -fpascal-strings +// CHECK-OPTIONS2: -fprotect-parens // CHECK-OPTIONS2: -fmath-errno // CHECK-OPTIONS2: -fno-gnu-keywords // CHECK-OPTIONS2: -fno-builtin diff --git a/clang/test/Sema/arithmetic-fence-builtin.c b/clang/test/Sema/arithmetic-fence-builtin.c new file mode 100644 index 0000000000000..4f4f0a02cde9e --- /dev/null +++ b/clang/test/Sema/arithmetic-fence-builtin.c @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - -verify -x c++ %s +// RUN: %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -verify -x c++ %s +// RUN: not %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -x c++ %s \ +// RUN: -fprotect-parens 2>&1 | FileCheck -check-prefix=PPC %s +#ifndef PPC +int v; +template T addT(T a, T b) { + T *q = __arithmetic_fence(&a); + // expected-error@-1 {{invalid operand of type 'float *' where floating, complex or a vector of such types is required}} + // expected-error@-2 {{invalid operand of type 'int *' where floating, complex or a vector of such types is required}} + return __arithmetic_fence(a + b); + // expected-error@-1 {{invalid operand of type 'int' where floating, complex or a vector of such types is required}} +} +int addit(int a, int b) { + float x, y; + typedef struct { + int a, b; + } stype; + stype s; + s = __arithmetic_fence(s); // expected-error {{invalid operand of type 'stype' where floating, complex or a vector of such types is required}} + x = __arithmetic_fence(); // expected-error {{too few arguments to function call, expected 1, have 0}} + x = __arithmetic_fence(x, y); // expected-error {{too many arguments to function call, expected 1, have 2}} + // Complex is supported. + _Complex double cd, cd1; + cd = __arithmetic_fence(cd1); + // Vector is supported. + typedef float __v4hi __attribute__((__vector_size__(8))); + __v4hi vec1, vec2; + vec1 = __arithmetic_fence(vec2); + + v = __arithmetic_fence(a + b); // expected-error {{invalid operand of type 'int' where floating, complex or a vector of such types is required}} + float f = addT(a, b); // expected-note {{in instantiation of function template specialization 'addT' requested here}} + int i = addT(1, 2); // expected-note {{in instantiation of function template specialization 'addT' requested here}} + constexpr float d = 1.0 + 2.0; + constexpr float c = __arithmetic_fence(1.0 + 2.0); + constexpr float e = __arithmetic_fence(d); + return 0; +} +bool func(float f1, float f2, float f3) { + return (f1 == f2 && f1 == f3) || f2 == f3; // Should not warn here +} +static_assert( __arithmetic_fence(1.0 + 2.0), "message" ); +#else +float addit(float a, float b) { + return __arithmetic_fence(a+b); // expected-error {{builtin is not supported on this target}} +} +#endif +//PPC: error: option '-fprotect-parens' cannot be specified on this target From 918bb2a9782dc4906784b4f1ecd0f3011dfc38b4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 28 Jun 2021 16:30:29 +0000 Subject: [PATCH 067/619] [gn build] Port f32f3db9fcbf --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 7a46530b80c85..0a93519706be2 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -172,16 +172,27 @@ if (current_toolchain == default_toolchain) { "__functional_base_03", "__hash_table", "__iterator/advance.h", + "__iterator/back_insert_iterator.h", "__iterator/concepts.h", "__iterator/default_sentinel.h", + "__iterator/front_insert_iterator.h", "__iterator/incrementable_traits.h", + "__iterator/insert_iterator.h", + "__iterator/istream_iterator.h", + "__iterator/istreambuf_iterator.h", "__iterator/iter_move.h", "__iterator/iter_swap.h", + "__iterator/iterator.h", "__iterator/iterator_traits.h", + "__iterator/move_iterator.h", "__iterator/next.h", + "__iterator/ostream_iterator.h", + "__iterator/ostreambuf_iterator.h", "__iterator/prev.h", "__iterator/projected.h", "__iterator/readable_traits.h", + "__iterator/reverse_iterator.h", + "__iterator/wrap_iter.h", "__libcpp_version", "__locale", "__memory/addressof.h", From 3dee1e8a848d56178fc6013c343c1b144efb1425 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Wed, 23 Jun 2021 11:30:55 -0700 Subject: [PATCH 068/619] [coro] Fix rematerializable instruction sinking to coro.suspend blocks There is a constraint that coro.suspend instructions need to be in their own blocks. The coro split pass initially creates IR that obeys this constraint (which is later checked). Sinking rematerializable instructions into these blocks breaks that constraint. Instead rematerialize in the predecessor block to the suspend's single predecessor block. Differential Revision: https://reviews.llvm.org/D104051 --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 9 +++++++-- llvm/test/Transforms/Coroutines/coro-async.ll | 8 +++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index b3da2ae6d9be7..10e3698f95609 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -1946,11 +1946,16 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB, for (Instruction *U : E.second) { // If we have not seen this block, materialize the value. if (CurrentBlock != U->getParent()) { - CurrentBlock = U->getParent(); + + bool IsInCoroSuspendBlock = isa(U); + CurrentBlock = IsInCoroSuspendBlock + ? U->getParent()->getSinglePredecessor() + : U->getParent(); CurrentMaterialization = cast(Def)->clone(); CurrentMaterialization->setName(Def->getName()); CurrentMaterialization->insertBefore( - &*CurrentBlock->getFirstInsertionPt()); + IsInCoroSuspendBlock ? CurrentBlock->getTerminator() + : &*CurrentBlock->getFirstInsertionPt()); } if (auto *PN = dyn_cast(U)) { assert(PN->getNumIncomingValues() == 1 && diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll index b5fff9b9e50c4..27d07e3ec7f7b 100644 --- a/llvm/test/Transforms/Coroutines/coro-async.ll +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -enable-coroutines -passes='default' -S | FileCheck --check-prefixes=CHECK %s - +; RUN: opt < %s -enable-coroutines -O0 -S target datalayout = "p:64:64:64" %async.task = type { i64 } @@ -205,11 +205,12 @@ entry: store i8* %async.ctxt, i8** %callee_context.caller_context.addr %resume_proj_fun = bitcast i8*(i8*)* @resume_context_projection to i8* %callee = bitcast void(i8*, %async.task*, %async.actor*)* @asyncSuspend to i8* + %task.casted = bitcast i8* %arg0 to %async.task* %res = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 2, i8* %resume.func_ptr, i8* %resume_proj_fun, void (i8*, i8*, %async.task*, %async.actor*)* @my_async_function.my_other_async_function_fp.apply, - i8* %callee, i8* %callee_context, %async.task* %task, %async.actor *%actor), !dbg !9 + i8* %callee, i8* %callee_context, %async.task* %task.casted, %async.actor *%actor), !dbg !9 %continuation_task_arg = extractvalue {i8*, i8*, i8*} %res, 0 %task.2 = bitcast i8* %continuation_task_arg to %async.task* @@ -227,7 +228,7 @@ entry: i8* %resume.func_ptr.1, i8* %resume_proj_fun.2, void (i8*, i8*, %async.task*, %async.actor*)* @my_async_function.my_other_async_function_fp.apply, - i8* %callee.2, i8* %callee_context, %async.task* %task, %async.actor *%actor) + i8* %callee.2, i8* %callee_context, %async.task* %task.casted, %async.actor *%actor) call void @llvm.coro.async.context.dealloc(i8* %callee_context) %continuation_actor_arg = extractvalue {i8*, i8*, i8*} %res.2, 1 @@ -535,6 +536,7 @@ declare swiftcc void @asyncReturn(i8*, %async.task*, %async.actor*) declare swiftcc void @asyncSuspend(i8*, %async.task*, %async.actor*) declare i8* @llvm.coro.async.resume() declare void @llvm.coro.async.size.replace(i8*, i8*) +declare i8* @hide(i8*) !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!0} From 8815ef823c803e98f328068d1abde255296de9c3 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Mon, 28 Jun 2021 12:41:24 -0400 Subject: [PATCH 069/619] Revert "[clang][PATCH][nfc] Refactor TargetInfo::adjust to pass DiagnosticsEngine to allow diagnostics on target-unsupported options" This reverts commit 2c02b0c3f45414ac6c64583e006a26113c028304. buildbot fails --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/lib/Basic/TargetInfo.cpp | 2 +- clang/lib/Basic/Targets/AMDGPU.cpp | 4 ++-- clang/lib/Basic/Targets/AMDGPU.h | 2 +- clang/lib/Basic/Targets/PPC.cpp | 4 ++-- clang/lib/Basic/Targets/PPC.h | 2 +- clang/lib/Basic/Targets/SPIR.h | 4 ++-- clang/lib/Basic/Targets/WebAssembly.cpp | 3 +-- clang/lib/Basic/Targets/WebAssembly.h | 2 +- clang/lib/Frontend/ASTUnit.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 4 ++-- clang/lib/Interpreter/Interpreter.cpp | 2 +- clang/tools/clang-import-test/clang-import-test.cpp | 2 +- 13 files changed, 17 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 4f0cbf986b31b..0e984eb29b815 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1162,7 +1162,7 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. - virtual void adjust(DiagnosticsEngine &Diags, LangOptions &Opts); + virtual void adjust(LangOptions &Opts); /// Adjust target options based on codegen options. virtual void adjustTargetOptions(const CodeGenOptions &CGOpts, diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 88086fa2fed74..ffd88dc75dba1 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -346,7 +346,7 @@ bool TargetInfo::isTypeSigned(IntType T) { /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. -void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { +void TargetInfo::adjust(LangOptions &Opts) { if (Opts.NoBitFieldTypeAlign) UseBitFieldTypeAlignment = false; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index fac786dbcf9e2..595132e2e70ba 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -358,8 +358,8 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; } -void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { - TargetInfo::adjust(Diags, Opts); +void AMDGPUTargetInfo::adjust(LangOptions &Opts) { + TargetInfo::adjust(Opts); // ToDo: There are still a few places using default address space as private // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL // can be removed from the following line. diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 244a6e0446905..fe5c61c6ba2bb 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -93,7 +93,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { void setAddressSpaceMap(bool DefaultIsPrivate); - void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; + void adjust(LangOptions &Opts) override; uint64_t getPointerWidthV(unsigned AddrSpace) const override { if (isR600(getTriple())) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index d431dda970222..6860b5e5d02fa 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -614,10 +614,10 @@ void PPCTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); } -void PPCTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { +void PPCTargetInfo::adjust(LangOptions &Opts) { if (HasAltivec) Opts.AltiVec = 1; - TargetInfo::adjust(Diags, Opts); + TargetInfo::adjust(Opts); if (LongDoubleFormat != &llvm::APFloat::IEEEdouble()) LongDoubleFormat = Opts.PPCIEEELongDouble ? &llvm::APFloat::IEEEquad() diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 18ee1194c759d..554f2174fee00 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -89,7 +89,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { } // Set the language option for altivec based on our value. - void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; + void adjust(LangOptions &Opts) override; // Note: GCC recognizes the following additional cpus: // 401, 403, 405, 405fp, 440fp, 464, 464fp, 476, 476fp, 505, 740, 801, diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 50f34abd66309..c429b27709ecb 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -135,8 +135,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { AddrSpaceMap = DefaultIsGeneric ? &SPIRDefIsGenMap : &SPIRDefIsPrivMap; } - void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { - TargetInfo::adjust(Diags, Opts); + void adjust(LangOptions &Opts) override { + TargetInfo::adjust(Opts); // FIXME: SYCL specification considers unannotated pointers and references // to be pointing to the generic address space. See section 5.9.3 of // SYCL 2020 specification. diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 7ef79849cb75d..2a5055c3d534b 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -234,8 +234,7 @@ ArrayRef WebAssemblyTargetInfo::getTargetBuiltins() const { Builtin::FirstTSBuiltin); } -void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags, - LangOptions &Opts) { +void WebAssemblyTargetInfo::adjust(LangOptions &Opts) { // If the Atomics feature isn't available, turn off POSIXThreads and // ThreadModel, so that we don't predefine _REENTRANT or __STDCPP_THREADS__. if (!HasAtomics) { diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index b29730c5d706b..70115183e46b9 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -138,7 +138,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool hasProtectedVisibility() const override { return false; } - void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; + void adjust(LangOptions &Opts) override; }; class LLVM_LIBRARY_VISIBILITY WebAssembly32TargetInfo diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 4f92833e4229c..988090a8b1b13 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -588,7 +588,7 @@ class ASTInfoCollector : public ASTReaderListener { // // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - Target->adjust(PP.getDiagnostics(), LangOpt); + Target->adjust(LangOpt); // Initialize the preprocessor. PP.Initialize(*Target); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 2ae3be6814dec..063384130f730 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -142,7 +142,7 @@ bool CompilerInstance::createTarget() { // Inform the target of the language options. // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - getTarget().adjust(getDiagnostics(), getLangOpts()); + getTarget().adjust(getLangOpts()); // Adjust target options based on codegen options. getTarget().adjustTargetOptions(getCodeGenOpts(), getTargetOpts()); @@ -457,7 +457,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { getSourceManager(), *HeaderInfo, *this, /*IdentifierInfoLookup=*/nullptr, /*OwnsHeaderSearch=*/true, TUKind); - getTarget().adjust(getDiagnostics(), getLangOpts()); + getTarget().adjust(getLangOpts()); PP->Initialize(getTarget(), getAuxTarget()); if (PPOpts.DetailedRecord) diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 768847f9f0352..711a5e9ff0168 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -110,7 +110,7 @@ CreateCI(const llvm::opt::ArgStringList &Argv) { "Initialization failed. " "Target is missing"); - Clang->getTarget().adjust(Clang->getDiagnostics(), Clang->getLangOpts()); + Clang->getTarget().adjust(Clang->getLangOpts()); return std::move(Clang); } diff --git a/clang/tools/clang-import-test/clang-import-test.cpp b/clang/tools/clang-import-test/clang-import-test.cpp index fa5d7a54f53b4..df173cf49f35e 100644 --- a/clang/tools/clang-import-test/clang-import-test.cpp +++ b/clang/tools/clang-import-test/clang-import-test.cpp @@ -208,7 +208,7 @@ std::unique_ptr BuildCompilerInstance() { TargetInfo *TI = TargetInfo::CreateTargetInfo( Ins->getDiagnostics(), Ins->getInvocation().TargetOpts); Ins->setTarget(TI); - Ins->getTarget().adjust(Ins->getDiagnostics(), Ins->getLangOpts()); + Ins->getTarget().adjust(Ins->getLangOpts()); Ins->createFileManager(); Ins->createSourceManager(Ins->getFileManager()); Ins->createPreprocessor(TU_Complete); From c27e5a2a8e34aaa4c6037498cd8d21df0a8e8e70 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Mon, 28 Jun 2021 12:41:57 -0400 Subject: [PATCH 070/619] Revert "[clang][patch][fpenv] Add builtin __arithmetic_fence and option fprotect-parens" This reverts commit 4f1238e44d803b145997fa984677a6c5cdf1f417. Buildbot fails on predecessor patch --- clang/docs/UsersManual.rst | 20 ----- clang/include/clang/Basic/Builtins.def | 3 - .../clang/Basic/DiagnosticSemaKinds.td | 3 - clang/include/clang/Basic/LangOptions.def | 2 - clang/include/clang/Basic/TargetInfo.h | 3 - clang/include/clang/Driver/Options.td | 9 +-- clang/include/clang/Sema/Sema.h | 4 - clang/lib/AST/ExprConstant.cpp | 3 - clang/lib/Basic/TargetInfo.cpp | 5 -- clang/lib/Basic/Targets/X86.h | 2 - clang/lib/CodeGen/CGBuiltin.cpp | 30 -------- clang/lib/Driver/ToolChains/Clang.cpp | 5 -- clang/lib/Sema/SemaChecking.cpp | 27 ------- clang/lib/Sema/SemaCoroutine.cpp | 32 ++++++-- clang/lib/Sema/SemaExpr.cpp | 27 ------- clang/test/AST/arithmetic-fence-builtin.c | 46 ------------ clang/test/CodeGen/arithmetic-fence-builtin.c | 74 ------------------- clang/test/Driver/clang_f_opts.c | 3 +- clang/test/Sema/arithmetic-fence-builtin.c | 48 ------------ 19 files changed, 28 insertions(+), 318 deletions(-) delete mode 100644 clang/test/AST/arithmetic-fence-builtin.c delete mode 100644 clang/test/CodeGen/arithmetic-fence-builtin.c delete mode 100644 clang/test/Sema/arithmetic-fence-builtin.c diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 9e8bac635337e..244212a1336db 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1478,26 +1478,6 @@ Note that floating-point operations performed as part of constant initialization * ``maytrap`` The compiler avoids transformations that may raise exceptions that would not have been raised by the original code. Constant folding performed by the compiler is exempt from this option. * ``strict`` The compiler ensures that all transformations strictly preserve the floating point exception semantics of the original code. -.. option:: -f[no-]protect-parens: - - This option pertains to floating-point types, complex types with - floating-point components, and vectors of these types. Some arithmetic - expression transformations that are mathematically correct and permissible - according to the C and C++ language standards may be incorrect when dealing - with floating-point types, such as reassociation and distribution. Further, - the optimizer may ignore parentheses when computing arithmetic expressions - in circumstances where the parenthesized and unparenthesized expression - express the same mathematical value. For example (a+b)+c is the same - mathematical value as a+(b+c), but the optimizer is free to evaluate the - additions in any order regardless of the parentheses. When enabled, this - option forces the optimizer to honor the order of operations with respect - to parentheses in all circumstances. - - Note that floating-point contraction (option `-ffp-contract=`) is disabled - when `-fprotect-parens` is enabled. Also note that in safe floating-point - modes, such as `-ffp-model=precise` or `-ffp-model=strict`, this option - has no effect because the optimizer is prohibited from making unsafe - transformations. .. _fp-constant-eval: diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 33d3e6dc4e7db..5a9d0a0018292 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1657,9 +1657,6 @@ BUILTIN(__builtin_ms_va_start, "vc*&.", "nt") BUILTIN(__builtin_ms_va_end, "vc*&", "n") BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") -// Arithmetic Fence: to prevent FP reordering and reassociation optimizations -LANGBUILTIN(__arithmetic_fence, "v.", "t", ALL_LANGUAGES) - #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 22c2a1a39ea13..70a22fd2506a3 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8530,9 +8530,6 @@ def err_typecheck_expect_scalar_operand : Error< "operand of type %0 where arithmetic or pointer type is required">; def err_typecheck_cond_incompatible_operands : Error< "incompatible operand types%diff{ ($ and $)|}0,1">; -def err_typecheck_expect_flt_or_vector : Error< - "invalid operand of type %0 where floating, complex or " - "a vector of such types is required">; def err_cast_selector_expr : Error< "cannot type cast @selector expression">; def ext_typecheck_cond_incompatible_pointers : ExtWarn< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index b18e957a58f4c..465bad8d7d112 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -199,8 +199,6 @@ COMPATIBLE_LANGOPT(Deprecated , 1, 0, "__DEPRECATED predefined macro") COMPATIBLE_LANGOPT(FastMath , 1, 0, "fast FP math optimizations, and __FAST_MATH__ predefined macro") COMPATIBLE_LANGOPT(FiniteMathOnly , 1, 0, "__FINITE_MATH_ONLY__ predefined macro") COMPATIBLE_LANGOPT(UnsafeFPMath , 1, 0, "Unsafe Floating Point Math") -COMPATIBLE_LANGOPT(ProtectParens , 1, 0, "optimizer honors parentheses " - "when floating-point expressions are evaluated") BENIGN_LANGOPT(AllowFPReassoc , 1, 0, "Permit Floating Point reassociation") BENIGN_LANGOPT(NoHonorNaNs , 1, 0, "Permit Floating Point optimization without regard to NaN") BENIGN_LANGOPT(NoHonorInfs , 1, 0, "Permit Floating Point optimization without regard to infinities") diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 0e984eb29b815..d59bad30e7428 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1424,9 +1424,6 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Whether the option -fextend-arguments={32,64} is supported on the target. virtual bool supportsExtendIntArgs() const { return false; } - /// Controls if __arithmetic_fence is supported in the targeted backend. - virtual bool checkArithmeticFenceSupported() const { return false; } - /// Gets the default calling convention for the given target and /// declaration context. virtual CallingConv getDefaultCallingConv() const { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0b9596c68f5fb..f1455f5461990 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1762,13 +1762,6 @@ defm strict_float_cast_overflow : BoolFOption<"strict-float-cast-overflow", " of the target's native float-to-int conversion instructions">, PosFlag>; -defm protect_parens : BoolFOption<"protect-parens", - LangOpts<"ProtectParens">, DefaultFalse, - PosFlag, - NegFlag>; - def ffor_scope : Flag<["-"], "ffor-scope">, Group; def fno_for_scope : Flag<["-"], "fno-for-scope">, Group; @@ -4415,7 +4408,7 @@ defm integer_4_integer_8 : BooleanFFlag<"integer-4-integer-8">, Group, Group; defm module_private : BooleanFFlag<"module-private">, Group; defm pack_derived : BooleanFFlag<"pack-derived">, Group; -//defm protect_parens : BooleanFFlag<"protect-parens">, Group; +defm protect_parens : BooleanFFlag<"protect-parens">, Group; defm range_check : BooleanFFlag<"range-check">, Group; defm real_4_real_10 : BooleanFFlag<"real-4-real-10">, Group; defm real_4_real_16 : BooleanFFlag<"real-4-real-16">, Group; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 3f7db9bc5be8b..4c3a7035bcc94 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -36,7 +36,6 @@ #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeOrdering.h" #include "clang/Basic/BitmaskEnum.h" -#include "clang/Basic/Builtins.h" #include "clang/Basic/ExpressionTraits.h" #include "clang/Basic/Module.h" #include "clang/Basic/OpenCLOptions.h" @@ -5425,8 +5424,6 @@ class Sema final { Expr *ExecConfig = nullptr, bool IsExecConfig = false, bool AllowRecovery = false); - Expr *BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id, - MultiExprArg CallArgs); enum class AtomicArgumentOrder { API, AST }; ExprResult BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, @@ -12586,7 +12583,6 @@ class Sema final { private: bool SemaBuiltinPrefetch(CallExpr *TheCall); bool SemaBuiltinAllocaWithAlign(CallExpr *TheCall); - bool SemaBuiltinArithmeticFence(CallExpr *TheCall); bool SemaBuiltinAssume(CallExpr *TheCall); bool SemaBuiltinAssumeAligned(CallExpr *TheCall); bool SemaBuiltinLongjmp(CallExpr *TheCall); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 01c0168d61a40..a4d8fec09748d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13692,9 +13692,6 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) { Result.changeSign(); return true; - case Builtin::BI__arithmetic_fence: - return EvaluateFloat(E->getArg(0), Result, Info); - // FIXME: Builtin::BI__builtin_powi // FIXME: Builtin::BI__builtin_powif // FIXME: Builtin::BI__builtin_powil diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index ffd88dc75dba1..e73b4a3a40c74 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -430,11 +430,6 @@ void TargetInfo::adjust(LangOptions &Opts) { // its corresponding signed type. PaddingOnUnsignedFixedPoint |= Opts.PaddingOnUnsignedFixedPoint; CheckFixedPointBits(); - - if (Opts.ProtectParens && !checkArithmeticFenceSupported()) { - Diags.Report(diag::err_opt_not_valid_on_target) << "-fprotect-parens"; - Opts.ProtectParens = false; - } } bool TargetInfo::initFeatureMap( diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index e798962617a30..7639ea835ebc7 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -362,8 +362,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { } } - bool checkArithmeticFenceSupported() const override { return true; } - CallingConv getDefaultCallingConv() const override { return CC_C; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6702181e7b766..2e9454921ffa8 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2833,36 +2833,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); } - case Builtin::BI__arithmetic_fence: { - // Create the builtin call if FastMath is selected, and the target - // supports the builtin, otherwise just return the argument. - CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); - llvm::FastMathFlags FMF = Builder.getFastMathFlags(); - bool isArithmeticFenceEnabled = - FMF.allowReassoc() && - getContext().getTargetInfo().checkArithmeticFenceSupported(); - QualType ArgType = E->getArg(0)->getType(); - if (ArgType->isComplexType()) { - if (isArithmeticFenceEnabled) { - QualType ElementType = ArgType->castAs()->getElementType(); - ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); - Value *Real = Builder.CreateArithmeticFence(ComplexVal.first, - ConvertType(ElementType)); - Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second, - ConvertType(ElementType)); - return RValue::getComplex(std::make_pair(Real, Imag)); - } - ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); - Value *Real = ComplexVal.first; - Value *Imag = ComplexVal.second; - return RValue::getComplex(std::make_pair(Real, Imag)); - } - Value *ArgValue = EmitScalarExpr(E->getArg(0)); - if (isArithmeticFenceEnabled) - return RValue::get( - Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType))); - return RValue::get(ArgValue); - } case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fd26d04e39bcc..c265e1c4e53cb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4975,11 +4975,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, false)) CmdArgs.push_back("-fsplit-stack"); - // -fprotect-parens=0 is default. - if (Args.hasFlag(options::OPT_fprotect_parens, - options::OPT_fno_protect_parens, false)) - CmdArgs.push_back("-fprotect-parens"); - RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA); if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 4d04f3017a2ef..17eeebb0c6799 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1554,10 +1554,6 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, Diag(TheCall->getBeginLoc(), diag::warn_alloca) << TheCall->getDirectCallee(); break; - case Builtin::BI__arithmetic_fence: - if (SemaBuiltinArithmeticFence(TheCall)) - return ExprError(); - break; case Builtin::BI__assume: case Builtin::BI__builtin_assume: if (SemaBuiltinAssume(TheCall)) @@ -6553,29 +6549,6 @@ bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { return false; } -/// SemaBuiltinArithmeticFence - Handle __arithmetic_fence. -bool Sema::SemaBuiltinArithmeticFence(CallExpr *TheCall) { - if (!Context.getTargetInfo().checkArithmeticFenceSupported()) - return Diag(TheCall->getBeginLoc(), diag::err_builtin_target_unsupported) - << SourceRange(TheCall->getBeginLoc(), TheCall->getEndLoc()); - if (checkArgCount(*this, TheCall, 1)) - return true; - Expr *Arg = TheCall->getArg(0); - if (Arg->isInstantiationDependent()) - return false; - - QualType ArgTy = Arg->getType(); - if (!ArgTy->hasFloatingRepresentation()) - return Diag(TheCall->getEndLoc(), diag::err_typecheck_expect_flt_or_vector) - << ArgTy; - if (Arg->isLValue()) { - ExprResult FirstArg = DefaultLvalueConversion(Arg); - TheCall->setArg(0, FirstArg.get()); - } - TheCall->setType(TheCall->getArg(0)->getType()); - return false; -} - /// SemaBuiltinAssume - Handle __assume (MS Extension). // __assume does not evaluate its arguments, and should warn if its argument // has side effects. diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 31a4092b5b604..cec80436d575e 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -291,6 +291,26 @@ static ExprResult buildOperatorCoawaitCall(Sema &SemaRef, Scope *S, cast(R.get())); } +static Expr *buildBuiltinCall(Sema &S, SourceLocation Loc, Builtin::ID Id, + MultiExprArg CallArgs) { + StringRef Name = S.Context.BuiltinInfo.getName(Id); + LookupResult R(S, &S.Context.Idents.get(Name), Loc, Sema::LookupOrdinaryName); + S.LookupName(R, S.TUScope, /*AllowBuiltinCreation=*/true); + + auto *BuiltInDecl = R.getAsSingle(); + assert(BuiltInDecl && "failed to find builtin declaration"); + + ExprResult DeclRef = + S.BuildDeclRefExpr(BuiltInDecl, BuiltInDecl->getType(), VK_LValue, Loc); + assert(DeclRef.isUsable() && "Builtin reference cannot fail"); + + ExprResult Call = + S.BuildCallExpr(/*Scope=*/nullptr, DeclRef.get(), Loc, CallArgs, Loc); + + assert(!Call.isInvalid() && "Call to builtin cannot fail!"); + return Call.get(); +} + static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType, SourceLocation Loc) { QualType CoroHandleType = lookupCoroutineHandleType(S, PromiseType, Loc); @@ -307,7 +327,7 @@ static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType, } Expr *FramePtr = - S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}); + buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_frame, {}); CXXScopeSpec SS; ExprResult FromAddr = @@ -384,8 +404,8 @@ static Expr *maybeTailCall(Sema &S, QualType RetType, Expr *E, // the resume call and return instruction, which would interfere with the // musttail call contract. JustAddress = S.MaybeCreateExprWithCleanups(JustAddress); - return S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_resume, - JustAddress); + return buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_resume, + JustAddress); } /// Build calls to await_ready, await_suspend, and await_resume for a co_await @@ -1337,10 +1357,10 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { return false; Expr *FramePtr = - S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}); + buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_frame, {}); Expr *FrameSize = - S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_size, {}); + buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_size, {}); // Make new call. @@ -1369,7 +1389,7 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { return false; Expr *CoroFree = - S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_free, {FramePtr}); + buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_free, {FramePtr}); SmallVector DeleteArgs{CoroFree}; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3df74b5ea9dbc..6031dff673351 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4054,10 +4054,6 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) { assert(E && "ActOnParenExpr() missing expr"); - QualType ExprTy = E->getType(); - if (getLangOpts().ProtectParens && CurFPFeatures.getAllowFPReassociate() && - !E->isLValue() && ExprTy->hasFloatingRepresentation()) - return BuildBuiltinCallExpr(R, Builtin::BI__arithmetic_fence, E); return new (Context) ParenExpr(L, R, E); } @@ -6564,29 +6560,6 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, ExecConfig, IsExecConfig); } -/// BuildBuiltinCallExpr - Create a call to a builtin function specified by Id -// with the specified CallArgs -Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id, - MultiExprArg CallArgs) { - StringRef Name = Context.BuiltinInfo.getName(Id); - LookupResult R(*this, &Context.Idents.get(Name), Loc, - Sema::LookupOrdinaryName); - LookupName(R, TUScope, /*AllowBuiltinCreation=*/true); - - auto *BuiltInDecl = R.getAsSingle(); - assert(BuiltInDecl && "failed to find builtin declaration"); - - ExprResult DeclRef = - BuildDeclRefExpr(BuiltInDecl, BuiltInDecl->getType(), VK_LValue, Loc); - assert(DeclRef.isUsable() && "Builtin reference cannot fail"); - - ExprResult Call = - BuildCallExpr(/*Scope=*/nullptr, DeclRef.get(), Loc, CallArgs, Loc); - - assert(!Call.isInvalid() && "Call to builtin cannot fail!"); - return Call.get(); -} - /// Parse a __builtin_astype expression. /// /// __builtin_astype( value, dst type ) diff --git a/clang/test/AST/arithmetic-fence-builtin.c b/clang/test/AST/arithmetic-fence-builtin.c deleted file mode 100644 index 46666b3c8bed5..0000000000000 --- a/clang/test/AST/arithmetic-fence-builtin.c +++ /dev/null @@ -1,46 +0,0 @@ -// Tests without serialization: -// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu %s \ -// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 -// -// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ -// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 -// -// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ -// RUN: -fprotect-parens \ -// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK2 -// -// Tests with serialization: -// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null \ -// RUN: | FileCheck %s --strict-whitespace -// -// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ -// RUN: -emit-pch -o %t -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null \ -// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 -// -// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ -// RUN: -fprotect-parens \ -// RUN: -emit-pch -o %t -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null -fprotect-parens\ -// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK2 - -// -int v; -int addit(float a, float b) { - - v = __arithmetic_fence(a + b); - - v = (a + b); - - return 0; -} -//CHECK:| `-CompoundStmt {{.*}} -//CHECK-NEXT:| |-BinaryOperator {{.*}} 'int' '=' -//CHECK-NEXT:| | |-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'v' 'int' -//CHECK-NEXT:| | `-ImplicitCastExpr {{.*}} -//CHECK-NEXT:| | `-CallExpr {{.*}} 'float' -//CHECK-NEXT:| | |-ImplicitCastExpr {{.*}} -//CHECK-NEXT:| | | `-DeclRefExpr {{.*}}' Function {{.*}} '__arithmetic_fence'{{.*}} -//CHECK1-NOT:| | | `-DeclRefExpr {{.*}}' Function{{.*}} '__arithmetic_fence' 'void ()' -//CHECK2:| | | `-DeclRefExpr {{.*}} Function{{.*}} '__arithmetic_fence' 'void ()' diff --git a/clang/test/CodeGen/arithmetic-fence-builtin.c b/clang/test/CodeGen/arithmetic-fence-builtin.c deleted file mode 100644 index 6b5b5b4b9cefa..0000000000000 --- a/clang/test/CodeGen/arithmetic-fence-builtin.c +++ /dev/null @@ -1,74 +0,0 @@ -// Test with fast math -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -DFAST \ -// RUN: -mreassociate \ -// RUN: -o - %s | FileCheck --check-prefixes CHECK,CHECKFAST,CHECKNP %s -// -// Test with fast math and fprotect-parens -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -DFAST \ -// RUN: -mreassociate -fprotect-parens -ffp-contract=on\ -// RUN: -o - %s | FileCheck --check-prefixes CHECK,CHECKFAST,CHECKPP %s -// -// Test without fast math: llvm intrinsic not created -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -fprotect-parens\ -// RUN: -o - %s | FileCheck --implicit-check-not="llvm.arithmetic.fence" %s -// -int v; -int addit(float a, float b) { - // CHECK: define {{.*}}@addit(float %a, float %b) #0 { - _Complex double cd, cd1; - cd = __arithmetic_fence(cd1); - // CHECKFAST: call{{.*}} double @llvm.arithmetic.fence.f64({{.*}}real) - // CHECKFAST: call{{.*}} double @llvm.arithmetic.fence.f64({{.*}}imag) - // Vector should be supported. - typedef float __v2f32 __attribute__((__vector_size__(8))); - __v2f32 vec1, vec2; - vec1 = __arithmetic_fence(vec2); - // CHECKFAST: call{{.*}} <2 x float> @llvm.arithmetic.fence.v2f32 - vec2 = (vec2 + vec1); - // CHECKPP: call{{.*}} <2 x float> @llvm.arithmetic.fence.v2f32 - - v = __arithmetic_fence(a + b); - // CHECKFAST: call{{.*}} float @llvm.arithmetic.fence.f32(float %add{{.*}}) - - v = (a + b); - // CHECKPP: call{{.*}} float @llvm.arithmetic.fence.f32(float %add{{.*}}) - v = a + (b*b); - // CHECKPP: fmul reassoc - // CHECKPP-NEXT: call{{.*}} float @llvm.arithmetic.fence.f32(float %mul) - // CHECKNP: fmul - // CHECKNP: fadd - v = b + a*a; - // CHECKPP: call{{.*}} float @llvm.fmuladd.f32 - // CHECKNP: fmul - // CHECKNP: fadd - v = b + __arithmetic_fence(a*a); // Fence blocks recognition of FMA - // CHECKPP: fmul - // CHECKNP: fmul - - b = (a); - (a) = b; - // CHECK-NEXT fptosi - // CHECK-NEXT store i32 - // CHECK-NEXT load float - // CHECK-NEXT store float - // CHECK-NEXT load float - // CHECK-NEXT store float - return 0; - // CHECK-NEXT ret i32 0 -} -int addit1(int a, int b) { - // CHECK: define {{.*}}@addit1(i32 %a, i32 %b{{.*}} - v = (a + b); - // CHECK-NOT: call{{.*}} float @llvm.arithmetic.fence.int(float %add) - return 0; -} -#ifdef FAST -#pragma float_control(precise, on) -int subit(float a, float b, float *fp) { - // CHECKFAST: define {{.*}}@subit(float %a, float %b{{.*}} - *fp = __arithmetic_fence(a - b); - *fp = (a + b); - // CHECK-NOT: call{{.*}} float @llvm.arithmetic.fence.f32(float %add) - return 0; -} -#endif diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index d729378403f3f..a255f68713aec 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -1,14 +1,13 @@ // REQUIRES: clang-driver // RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fblocks -fbuiltin -fmath-errno -fcommon -fpascal-strings -fsplit-stack %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS1 %s -// RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-asm -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fno-show-source-location -fshort-enums -fprotect-parens %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS2 %s +// RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-asm -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fno-show-source-location -fshort-enums %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS2 %s // CHECK-OPTIONS1: -fsplit-stack // CHECK-OPTIONS1: -fgnu-keywords // CHECK-OPTIONS1: -fblocks // CHECK-OPTIONS1: -fpascal-strings -// CHECK-OPTIONS2: -fprotect-parens // CHECK-OPTIONS2: -fmath-errno // CHECK-OPTIONS2: -fno-gnu-keywords // CHECK-OPTIONS2: -fno-builtin diff --git a/clang/test/Sema/arithmetic-fence-builtin.c b/clang/test/Sema/arithmetic-fence-builtin.c deleted file mode 100644 index 4f4f0a02cde9e..0000000000000 --- a/clang/test/Sema/arithmetic-fence-builtin.c +++ /dev/null @@ -1,48 +0,0 @@ -// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - -verify -x c++ %s -// RUN: %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -verify -x c++ %s -// RUN: not %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -x c++ %s \ -// RUN: -fprotect-parens 2>&1 | FileCheck -check-prefix=PPC %s -#ifndef PPC -int v; -template T addT(T a, T b) { - T *q = __arithmetic_fence(&a); - // expected-error@-1 {{invalid operand of type 'float *' where floating, complex or a vector of such types is required}} - // expected-error@-2 {{invalid operand of type 'int *' where floating, complex or a vector of such types is required}} - return __arithmetic_fence(a + b); - // expected-error@-1 {{invalid operand of type 'int' where floating, complex or a vector of such types is required}} -} -int addit(int a, int b) { - float x, y; - typedef struct { - int a, b; - } stype; - stype s; - s = __arithmetic_fence(s); // expected-error {{invalid operand of type 'stype' where floating, complex or a vector of such types is required}} - x = __arithmetic_fence(); // expected-error {{too few arguments to function call, expected 1, have 0}} - x = __arithmetic_fence(x, y); // expected-error {{too many arguments to function call, expected 1, have 2}} - // Complex is supported. - _Complex double cd, cd1; - cd = __arithmetic_fence(cd1); - // Vector is supported. - typedef float __v4hi __attribute__((__vector_size__(8))); - __v4hi vec1, vec2; - vec1 = __arithmetic_fence(vec2); - - v = __arithmetic_fence(a + b); // expected-error {{invalid operand of type 'int' where floating, complex or a vector of such types is required}} - float f = addT(a, b); // expected-note {{in instantiation of function template specialization 'addT' requested here}} - int i = addT(1, 2); // expected-note {{in instantiation of function template specialization 'addT' requested here}} - constexpr float d = 1.0 + 2.0; - constexpr float c = __arithmetic_fence(1.0 + 2.0); - constexpr float e = __arithmetic_fence(d); - return 0; -} -bool func(float f1, float f2, float f3) { - return (f1 == f2 && f1 == f3) || f2 == f3; // Should not warn here -} -static_assert( __arithmetic_fence(1.0 + 2.0), "message" ); -#else -float addit(float a, float b) { - return __arithmetic_fence(a+b); // expected-error {{builtin is not supported on this target}} -} -#endif -//PPC: error: option '-fprotect-parens' cannot be specified on this target From 7bcb3bd169e7f3540601f05932336aac2ea291f6 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Fri, 25 Jun 2021 15:52:19 +0100 Subject: [PATCH 071/619] [AArch64] Added tests to neon-truncstore.ll. NFC. --- llvm/test/CodeGen/AArch64/neon-truncstore.ll | 227 +++++++++++++++++-- 1 file changed, 210 insertions(+), 17 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll index 2bbab0bfa3e56..7292841410a05 100644 --- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll +++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll @@ -1,40 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; A vector TruncStore can not be selected. ; Test a trunc IR and a vector store IR can be selected correctly. -define void @truncStore.v2i64(<2 x i64> %a, <2 x i32>* %result) { -; CHECK-LABEL: truncStore.v2i64: -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d -; CHECK: {{st1 { v[0-9]+.2s }|str d[0-9]+}}, [x{{[0-9]+|sp}}] + +define void @v2i64_v2i32(<2 x i64> %a, <2 x i32>* %result) { +; CHECK-LABEL: v2i64_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret %b = trunc <2 x i64> %a to <2 x i32> store <2 x i32> %b, <2 x i32>* %result ret void } -define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) { -; CHECK-LABEL: truncStore.v4i32: -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s -; CHECK: {{st1 { v[0-9]+.4h }|str d[0-9]+}}, [x{{[0-9]+|sp}}] +define void @v4i64_v4i32(<4 x i64> %a, <4 x i32>* %result) { +; CHECK-LABEL: v4i64_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn2 v0.4s, v1.2d +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %b = trunc <4 x i64> %a to <4 x i32> + store <4 x i32> %b, <4 x i32>* %result + ret void +} + +define void @v8i64_v8i32(<8 x i64> %a, <8 x i32>* %result) { +; CHECK-LABEL: v8i64_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn2 v0.4s, v1.2d +; CHECK-NEXT: xtn2 v2.4s, v3.2d +; CHECK-NEXT: stp q0, q2, [x0] +; CHECK-NEXT: ret + %b = trunc <8 x i64> %a to <8 x i32> + store <8 x i32> %b, <8 x i32>* %result + ret void +} + +define void @v2i32_v2i16(<2 x i32> %a, <2 x i16>* %result) { +; CHECK-LABEL: v2i32_v2i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: strh w8, [x0, #2] +; CHECK-NEXT: strh w9, [x0] +; CHECK-NEXT: ret + %b = trunc <2 x i32> %a to <2 x i16> + store <2 x i16> %b, <2 x i16>* %result + ret void +} + +define void @v4i32_v4i16(<4 x i32> %a, <4 x i16>* %result) { +; CHECK-LABEL: v4i32_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret %b = trunc <4 x i32> %a to <4 x i16> store <4 x i16> %b, <4 x i16>* %result ret void } -define void @truncStore.v4i8(<4 x i32> %a, <4 x i8>* %result) { -; CHECK-LABEL: truncStore.v4i8: -; CHECK: xtn [[TMP:(v[0-9]+)]].4h, v{{[0-9]+}}.4s -; CHECK-NEXT: xtn [[TMP2:(v[0-9]+)]].8b, [[TMP]].8h -; CHECK-NEXT: str s{{[0-9]+}}, [x{{[0-9]+}}] +define void @v8i32_v8i16(<8 x i32> %a, <8 x i16>* %result) { +; CHECK-LABEL: v8i32_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %b = trunc <8 x i32> %a to <8 x i16> + store <8 x i16> %b, <8 x i16>* %result + ret void +} + +define void @v16i32_v16i16(<16 x i32> %a, <16 x i16>* %result) { +; CHECK-LABEL: v16i32_v16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: xtn2 v2.8h, v3.4s +; CHECK-NEXT: stp q0, q2, [x0] +; CHECK-NEXT: ret + %b = trunc <16 x i32> %a to <16 x i16> + store <16 x i16> %b, <16 x i16>* %result + ret void +} + +define void @v2i32_v2i8(<2 x i32> %a, <2 x i8>* %result) { +; CHECK-LABEL: v2i32_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: strb w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: ret + %b = trunc <2 x i32> %a to <2 x i8> + store <2 x i8> %b, <2 x i8>* %result + ret void +} + +define void @v4i32_v4i8(<4 x i32> %a, <4 x i8>* %result) { +; CHECK-LABEL: v4i32_v4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret %b = trunc <4 x i32> %a to <4 x i8> store <4 x i8> %b, <4 x i8>* %result ret void } -define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) { -; CHECK-LABEL: truncStore.v8i16: -; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h -; CHECK: {{st1 { v[0-9]+.8b }|str d[0-9]+}}, [x{{[0-9]+|sp}}] +define void @v8i32_v8i8(<8 x i32> %a, <8 x i8>* %result) { +; CHECK-LABEL: v8i32_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %b = trunc <8 x i32> %a to <8 x i8> + store <8 x i8> %b, <8 x i8>* %result + ret void +} + +define void @v16i32_v16i8(<16 x i32> %a, <16 x i8>* %result) { +; CHECK-LABEL: v16i32_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn2 v2.8h, v3.4s +; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: xtn2 v0.16b, v2.8h +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %b = trunc <16 x i32> %a to <16 x i8> + store <16 x i8> %b, <16 x i8>* %result + ret void +} + +define void @v32i32_v32i8(<32 x i32> %a, <32 x i8>* %result) { +; CHECK-LABEL: v32i32_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn2 v2.8h, v3.4s +; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: xtn v6.4h, v6.4s +; CHECK-NEXT: xtn v4.4h, v4.4s +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: xtn2 v0.16b, v2.8h +; CHECK-NEXT: xtn2 v6.8h, v7.4s +; CHECK-NEXT: xtn2 v4.8h, v5.4s +; CHECK-NEXT: xtn v1.8b, v4.8h +; CHECK-NEXT: xtn2 v1.16b, v6.8h +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %b = trunc <32 x i32> %a to <32 x i8> + store <32 x i8> %b, <32 x i8>* %result + ret void +} + +define void @v2i16_v2i8(<2 x i16> %a, <2 x i8>* %result) { +; CHECK-LABEL: v2i16_v2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: strb w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: ret + %b = trunc <2 x i16> %a to <2 x i8> + store <2 x i8> %b, <2 x i8>* %result + ret void +} + +define void @v4i16_v4i8(<4 x i16> %a, <4 x i8>* %result) { +; CHECK-LABEL: v4i16_v4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret + %b = trunc <4 x i16> %a to <4 x i8> + store <4 x i8> %b, <4 x i8>* %result + ret void +} + +define void @v8i16_v8i8(<8 x i16> %a, <8 x i8>* %result) { +; CHECK-LABEL: v8i16_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret %b = trunc <8 x i16> %a to <8 x i8> store <8 x i8> %b, <8 x i8>* %result ret void } + +define void @v16i16_v16i8(<16 x i16> %a, <16 x i8>* %result) { +; CHECK-LABEL: v16i16_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: xtn2 v0.16b, v1.8h +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %b = trunc <16 x i16> %a to <16 x i8> + store <16 x i8> %b, <16 x i8>* %result + ret void +} + +define void @v32i16_v32i8(<32 x i16> %a, <32 x i8>* %result) { +; CHECK-LABEL: v32i16_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: xtn v2.8b, v2.8h +; CHECK-NEXT: xtn2 v0.16b, v1.8h +; CHECK-NEXT: xtn2 v2.16b, v3.8h +; CHECK-NEXT: stp q0, q2, [x0] +; CHECK-NEXT: ret + %b = trunc <32 x i16> %a to <32 x i8> + store <32 x i8> %b, <32 x i8>* %result + ret void +} From 3a7cea2858ff2665c5430ead186a45a7f7a2d112 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Mon, 28 Jun 2021 17:25:53 +0100 Subject: [PATCH 072/619] Revert "[AArch64] Custom lower <4 x i8> loads" This reverts commit 51e434fc2590d1d3ffa6545cd07290a238db2b88 because of a build bot failure in test-suite::GCC-C-execute-pr60960.test that I need to investigate. --- .../Target/AArch64/AArch64ISelLowering.cpp | 43 +--- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 - llvm/test/CodeGen/AArch64/aarch64-load-ext.ll | 196 ++---------------- llvm/test/CodeGen/AArch64/arm64-vshift.ll | 33 +-- llvm/test/CodeGen/AArch64/neon-extload.ll | 145 +++++++++++++ llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 20 +- llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 20 +- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 20 +- llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 20 +- 9 files changed, 245 insertions(+), 253 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/neon-extload.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 16bb7eb222723..9ceb91ea8017a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1131,13 +1131,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VSCALE, MVT::i32, Custom); setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); - - setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); } if (Subtarget->hasSVE()) { @@ -4483,40 +4476,6 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, return SDValue(); } -// Custom lowering for extending v4i8 vector loads. -SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - LoadSDNode *LoadNode = cast(Op); - assert(LoadNode && "Expected custom lowering of a load node"); - EVT VT = Op->getValueType(0); - assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32"); - - if (LoadNode->getMemoryVT() != MVT::v4i8) - return SDValue(); - - unsigned ExtType; - if (LoadNode->getExtensionType() == ISD::SEXTLOAD) - ExtType = ISD::SIGN_EXTEND; - else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD || - LoadNode->getExtensionType() == ISD::EXTLOAD) - ExtType = ISD::ZERO_EXTEND; - else - return SDValue(); - - SDValue Load = DAG.getLoad(MVT::f32, DL, DAG.getEntryNode(), - LoadNode->getBasePtr(), MachinePointerInfo()); - SDValue Chain = Load.getValue(1); - SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load); - SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec); - SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC); - Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext, - DAG.getConstant(0, DL, MVT::i64)); - if (VT == MVT::v4i32) - Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext); - return DAG.getMergeValues({Ext, Chain}, DL); -} - // Generate SUBS and CSEL for integer abs. SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); @@ -4760,7 +4719,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::LOAD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) return LowerFixedLengthVectorLoadToSVE(Op, DAG); - return LowerLOAD(Op, DAG); + llvm_unreachable("Unexpected request to lower ISD::LOAD"); case ISD::ADD: return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); case ISD::AND: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7daa61996739f..f3b2da8304303 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -851,7 +851,6 @@ class AArch64TargetLowering : public TargetLowering { SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const; - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll index d303ab8b80f3c..308352e3e2277 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -86,195 +86,27 @@ define <2 x i8> @test3(<2 x i8>* %v2i8_ptr) { define <4 x i8> @test4(<4 x i8>* %v4i8_ptr) { ; CHECK-LE-LABEL: test4: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-LE-NEXT: add x8, x0, #1 // =1 +; CHECK-LE-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-LE-NEXT: add x8, x0, #2 // =2 +; CHECK-LE-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-LE-NEXT: add x8, x0, #3 // =3 +; CHECK-LE-NEXT: ld1 { v0.b }[6], [x8] ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test4: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-BE-NEXT: add x8, x0, #1 // =1 +; CHECK-BE-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-BE-NEXT: add x8, x0, #2 // =2 +; CHECK-BE-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-BE-NEXT: add x8, x0, #3 // =3 +; CHECK-BE-NEXT: ld1 { v0.b }[6], [x8] ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ret %v4i8 = load <4 x i8>, <4 x i8>* %v4i8_ptr ret <4 x i8> %v4i8 } - -define <4 x i32> @fsext_v4i32(<4 x i8>* %a) { -; CHECK-LE-LABEL: fsext_v4i32: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: fsext_v4i32: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = sext <4 x i8> %x to <4 x i32> - ret <4 x i32> %y -} - -define <4 x i32> @fzext_v4i32(<4 x i8>* %a) { -; CHECK-LE-LABEL: fzext_v4i32: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: fzext_v4i32: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = zext <4 x i8> %x to <4 x i32> - ret <4 x i32> %y -} - -; TODO: This codegen could just be: -; ldrb w0, [x0] -; -define i32 @loadExti32(<4 x i8>* %ref) { -; CHECK-LE-LABEL: loadExti32: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: umov w8, v0.h[0] -; CHECK-LE-NEXT: and w0, w8, #0xff -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: loadExti32: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: umov w8, v0.h[0] -; CHECK-BE-NEXT: and w0, w8, #0xff -; CHECK-BE-NEXT: ret - %a = load <4 x i8>, <4 x i8>* %ref - %vecext = extractelement <4 x i8> %a, i32 0 - %conv = zext i8 %vecext to i32 - ret i32 %conv -} - -define <4 x i16> @fsext_v4i16(<4 x i8>* %a) { -; CHECK-LE-LABEL: fsext_v4i16: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: fsext_v4i16: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: rev64 v0.4h, v0.4h -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = sext <4 x i8> %x to <4 x i16> - ret <4 x i16> %y -} - -define <4 x i16> @fzext_v4i16(<4 x i8>* %a) { -; CHECK-LE-LABEL: fzext_v4i16: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: fzext_v4i16: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: rev64 v0.4h, v0.4h -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = zext <4 x i8> %x to <4 x i16> - ret <4 x i16> %y -} - -define <4 x i16> @anyext_v4i16(<4 x i8> *%a, <4 x i8> *%b) { -; CHECK-LE-LABEL: anyext_v4i16: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ldr s1, [x1] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: anyext_v4i16: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: ldr s1, [x1] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: rev32 v1.8b, v1.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-BE-NEXT: rev64 v0.4h, v0.4h -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a, align 4 - %y = load <4 x i8>, <4 x i8>* %b, align 4 - %z = add <4 x i8> %x, %y - %s = sext <4 x i8> %z to <4 x i16> - ret <4 x i16> %s -} - -define <4 x i32> @anyext_v4i32(<4 x i8> *%a, <4 x i8> *%b) { -; CHECK-LE-LABEL: anyext_v4i32: -; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr s0, [x0] -; CHECK-LE-NEXT: ldr s1, [x1] -; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-LE-NEXT: shl v0.4s, v0.4s, #24 -; CHECK-LE-NEXT: sshr v0.4s, v0.4s, #24 -; CHECK-LE-NEXT: ret -; -; CHECK-BE-LABEL: anyext_v4i32: -; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldr s0, [x0] -; CHECK-BE-NEXT: ldr s1, [x1] -; CHECK-BE-NEXT: rev32 v0.8b, v0.8b -; CHECK-BE-NEXT: rev32 v1.8b, v1.8b -; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24 -; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24 -; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a, align 4 - %y = load <4 x i8>, <4 x i8>* %b, align 4 - %z = add <4 x i8> %x, %y - %s = sext <4 x i8> %z to <4 x i32> - ret <4 x i32> %s -} diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 07b257043426d..c63f3399e636f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -1494,12 +1494,17 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(<8 x i8>* %A) nounwind { } define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(<4 x i8>* %A) nounwind { -; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #1 -; CHECK-NEXT: ret +;CHECK-LABEL: @neon.ushl8h_constant_shift_extend_not_2x +;CHECK-NOT: ushll.8h v0, +;CHECK: ldrb w8, [x0] +;CHECK: fmov s0, w8 +;CHECK: ldrb w8, [x0, #1] +;CHECK: mov.s v0[1], w8 +;CHECK: ldrb w8, [x0, #2] +;CHECK: mov.s v0[2], w8 +;CHECK: ldrb w8, [x0, #3] +;CHECK: mov.s v0[3], w8 +;CHECK: shl.4s v0, v0, #1 %tmp1 = load <4 x i8>, <4 x i8>* %A %tmp2 = zext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -1632,12 +1637,16 @@ define <8 x i16> @neon.sshll8h_constant_shift(<8 x i8>* %A) nounwind { } define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(<4 x i8>* %A) nounwind { -; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #1 -; CHECK-NEXT: ret +;CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift +;CHECK: ldrsb w8, [x0] +;CHECK-NEXT: fmov s0, w8 +;CHECK-NEXT: ldrsb w8, [x0, #1] +;CHECK-NEXT: mov.s v0[1], w8 +;CHECK-NEXT: ldrsb w8, [x0, #2] +;CHECK-NEXT: mov.s v0[2], w8 +;CHECK-NEXT: ldrsb w8, [x0, #3] +;CHECK-NEXT: mov.s v0[3], w8 +;CHECK-NEXT: shl.4s v0, v0, #1 %tmp1 = load <4 x i8>, <4 x i8>* %A %tmp2 = sext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) diff --git a/llvm/test/CodeGen/AArch64/neon-extload.ll b/llvm/test/CodeGen/AArch64/neon-extload.ll new file mode 100644 index 0000000000000..321a1babb411d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-extload.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=LE +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=BE + +define <4 x i32> @fsext_v4i32(<4 x i8>* %a) { +; LE-LABEL: fsext_v4i32: +; LE: // %bb.0: +; LE-NEXT: ldrsb w8, [x0] +; LE-NEXT: ldrsb w9, [x0, #1] +; LE-NEXT: ldrsb w10, [x0, #2] +; LE-NEXT: ldrsb w11, [x0, #3] +; LE-NEXT: fmov s0, w8 +; LE-NEXT: mov v0.s[1], w9 +; LE-NEXT: mov v0.s[2], w10 +; LE-NEXT: mov v0.s[3], w11 +; LE-NEXT: ret +; +; BE-LABEL: fsext_v4i32: +; BE: // %bb.0: +; BE-NEXT: ldrsb w8, [x0] +; BE-NEXT: ldrsb w9, [x0, #1] +; BE-NEXT: ldrsb w10, [x0, #2] +; BE-NEXT: ldrsb w11, [x0, #3] +; BE-NEXT: fmov s0, w8 +; BE-NEXT: mov v0.s[1], w9 +; BE-NEXT: mov v0.s[2], w10 +; BE-NEXT: mov v0.s[3], w11 +; BE-NEXT: rev64 v0.4s, v0.4s +; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = sext <4 x i8> %x to <4 x i32> + ret <4 x i32> %y +} + +define <4 x i32> @fzext_v4i32(<4 x i8>* %a) { +; LE-LABEL: fzext_v4i32: +; LE: // %bb.0: +; LE-NEXT: ldrb w8, [x0] +; LE-NEXT: ldrb w9, [x0, #1] +; LE-NEXT: ldrb w10, [x0, #2] +; LE-NEXT: ldrb w11, [x0, #3] +; LE-NEXT: fmov s0, w8 +; LE-NEXT: mov v0.s[1], w9 +; LE-NEXT: mov v0.s[2], w10 +; LE-NEXT: mov v0.s[3], w11 +; LE-NEXT: ret +; +; BE-LABEL: fzext_v4i32: +; BE: // %bb.0: +; BE-NEXT: ldrb w8, [x0] +; BE-NEXT: ldrb w9, [x0, #1] +; BE-NEXT: ldrb w10, [x0, #2] +; BE-NEXT: ldrb w11, [x0, #3] +; BE-NEXT: fmov s0, w8 +; BE-NEXT: mov v0.s[1], w9 +; BE-NEXT: mov v0.s[2], w10 +; BE-NEXT: mov v0.s[3], w11 +; BE-NEXT: rev64 v0.4s, v0.4s +; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = zext <4 x i8> %x to <4 x i32> + ret <4 x i32> %y +} + +define i32 @loadExt.i32(<4 x i8>* %ref) { +; CHECK-LABEL: loadExt.i32: +; CHECK: ldrb +; LE-LABEL: loadExt.i32: +; LE: // %bb.0: +; LE-NEXT: ldrb w0, [x0] +; LE-NEXT: ret +; +; BE-LABEL: loadExt.i32: +; BE: // %bb.0: +; BE-NEXT: ldrb w0, [x0] +; BE-NEXT: ret + %a = load <4 x i8>, <4 x i8>* %ref + %vecext = extractelement <4 x i8> %a, i32 0 + %conv = zext i8 %vecext to i32 + ret i32 %conv +} + +define <4 x i16> @fsext_v4i16(<4 x i8>* %a) { +; LE-LABEL: fsext_v4i16: +; LE: // %bb.0: +; LE-NEXT: ldrsb w8, [x0] +; LE-NEXT: ldrsb w9, [x0, #1] +; LE-NEXT: ldrsb w10, [x0, #2] +; LE-NEXT: ldrsb w11, [x0, #3] +; LE-NEXT: fmov s0, w8 +; LE-NEXT: mov v0.h[1], w9 +; LE-NEXT: mov v0.h[2], w10 +; LE-NEXT: mov v0.h[3], w11 +; LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: ret +; +; BE-LABEL: fsext_v4i16: +; BE: // %bb.0: +; BE-NEXT: ldrsb w8, [x0] +; BE-NEXT: ldrsb w9, [x0, #1] +; BE-NEXT: ldrsb w10, [x0, #2] +; BE-NEXT: ldrsb w11, [x0, #3] +; BE-NEXT: fmov s0, w8 +; BE-NEXT: mov v0.h[1], w9 +; BE-NEXT: mov v0.h[2], w10 +; BE-NEXT: mov v0.h[3], w11 +; BE-NEXT: rev64 v0.4h, v0.4h +; BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = sext <4 x i8> %x to <4 x i16> + ret <4 x i16> %y +} + +define <4 x i16> @fzext_v4i16(<4 x i8>* %a) { +; LE-LABEL: fzext_v4i16: +; LE: // %bb.0: +; LE-NEXT: ldrb w8, [x0] +; LE-NEXT: ldrb w9, [x0, #1] +; LE-NEXT: ldrb w10, [x0, #2] +; LE-NEXT: ldrb w11, [x0, #3] +; LE-NEXT: fmov s0, w8 +; LE-NEXT: mov v0.h[1], w9 +; LE-NEXT: mov v0.h[2], w10 +; LE-NEXT: mov v0.h[3], w11 +; LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; LE-NEXT: ret +; +; BE-LABEL: fzext_v4i16: +; BE: // %bb.0: +; BE-NEXT: ldrb w8, [x0] +; BE-NEXT: ldrb w9, [x0, #1] +; BE-NEXT: ldrb w10, [x0, #2] +; BE-NEXT: ldrb w11, [x0, #3] +; BE-NEXT: fmov s0, w8 +; BE-NEXT: mov v0.h[1], w9 +; BE-NEXT: mov v0.h[2], w10 +; BE-NEXT: mov v0.h[3], w11 +; BE-NEXT: rev64 v0.4h, v0.4h +; BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = zext <4 x i8> %x to <4 x i16> + ret <4 x i16> %y +} diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 9c654f6719b18..cefd4758b3747 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -112,10 +112,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: ldrsb w8, [x0] +; CHECK-NEXT: ldrsb w9, [x1] +; CHECK-NEXT: ldrsb w10, [x0, #1] +; CHECK-NEXT: ldrsb w11, [x1, #1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrsb w8, [x0, #2] +; CHECK-NEXT: ldrsb w9, [x1, #2] +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: mov v1.h[1], w11 +; CHECK-NEXT: ldrsb w10, [x0, #3] +; CHECK-NEXT: ldrsb w11, [x1, #3] +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w11 ; CHECK-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 7c2e2330608e8..17af8a11aeee5 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -113,10 +113,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: ldrsb w8, [x0] +; CHECK-NEXT: ldrsb w9, [x1] +; CHECK-NEXT: ldrsb w10, [x0, #1] +; CHECK-NEXT: ldrsb w11, [x1, #1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrsb w8, [x0, #2] +; CHECK-NEXT: ldrsb w9, [x1, #2] +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: mov v1.h[1], w11 +; CHECK-NEXT: ldrsb w10, [x0, #3] +; CHECK-NEXT: ldrsb w11, [x1, #3] +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w11 ; CHECK-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 2b52e4c934c9d..21427a6a92d7e 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -112,11 +112,23 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: ldrb w12, [x0, #2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldrb w8, [x1, #2] +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: ldrb w9, [x0, #3] +; CHECK-NEXT: ldrb w10, [x1, #3] +; CHECK-NEXT: mov v1.h[1], w11 +; CHECK-NEXT: mov v0.h[2], w12 +; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w9 +; CHECK-NEXT: mov v1.h[3], w10 ; CHECK-NEXT: movi d2, #0xff00ff00ff00ff -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h ; CHECK-NEXT: xtn v0.8b, v0.8h diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 63bbac3be3fb8..a0ab8040e8fc0 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -113,10 +113,22 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrb w8, [x0, #2] +; CHECK-NEXT: ldrb w9, [x1, #2] +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: mov v1.h[1], w11 +; CHECK-NEXT: ldrb w10, [x0, #3] +; CHECK-NEXT: ldrb w11, [x1, #3] +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w11 ; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] From 2dca0b5a1ce431692136b293fd5f9ecadea31750 Mon Sep 17 00:00:00 2001 From: Anirudh Prasad Date: Mon, 28 Jun 2021 12:46:31 -0400 Subject: [PATCH 073/619] [AsmParser][SystemZ][z/OS] Fix hanging scenario in HLASMAsmParser class - In the caller of the overridden `parseStatement` function (i.e. the `AsmParser::Run()`) in the case of an error **and** if we're not at the start of the statement, we "eat" up until the end of the current statement, so we don't have to process it again. - However, in the HLASMAsmParser class what's happening is that, if an error occurs at the very start of the statement (for example, you invoke the HLASMAsmParser to parse a gnu directive), we will error out, but we never really progress in terms of the next token in the statement to parse. We simply keep looping processing the same error over and over again (partly because we're at the start of the statement) - To remedy this, when the `parseAsHLASMLabel` function fails, before returning, we "eat" until the end of the statement function, so we don't process it anymore. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D104869 --- llvm/lib/MC/MCParser/AsmParser.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 5305dde360d4f..1adde169c0a16 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -6302,8 +6302,12 @@ bool HLASMAsmParser::parseStatement(ParseStatementInfo &Info, if (ShouldParseAsHLASMLabel) { // If there were any errors while handling and emitting the label, // early return. - if (parseAsHLASMLabel(Info, SI)) + if (parseAsHLASMLabel(Info, SI)) { + // If we know we've failed in parsing, simply eat until end of the + // statement. This ensures that we don't process any other statements. + eatToEndOfStatement(); return true; + } } return parseAsMachineInstruction(Info, SI); From 88d5eba139598b51718b5a10b336e0bc9f51aff4 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Mon, 28 Jun 2021 18:45:29 +0200 Subject: [PATCH 074/619] Revert "Revert "[mlir][memref] Implement lowering of memref.copy to llvm"" This reverts commit 7d6e589fc86d7865fc4bf92c583209700dd32aac. Windows build was unbroken. --- .../mlir/Dialect/LLVMIR/FunctionCallUtils.h | 2 + .../mlir/ExecutionEngine/CRunnerUtils.h | 7 +++ .../StandardToLLVM/StandardToLLVM.cpp | 63 +++++++++++++++++++ .../Dialect/LLVMIR/IR/FunctionCallUtils.cpp | 10 +++ mlir/lib/ExecutionEngine/CRunnerUtils.cpp | 47 ++++++++++++++ 5 files changed, 129 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h index 7efff9774cd50..6380ff2d8e132 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h +++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h @@ -45,6 +45,8 @@ LLVM::LLVMFuncOp lookupOrCreateMallocFn(ModuleOp moduleOp, Type indexType); LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(ModuleOp moduleOp, Type indexType); LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp); +LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType, + Type unrankedDescriptorType); /// Create a FuncOp with signature `resultType`(`paramTypes`)` and name `name`. LLVM::LLVMFuncOp lookupOrCreateFn(ModuleOp moduleOp, StringRef name, diff --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h index fb0b2a65a67eb..bd855fcc03a96 100644 --- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h @@ -330,6 +330,13 @@ class DynamicMemRefType { const int64_t *strides; }; +//===----------------------------------------------------------------------===// +// Small runtime support library for memref.copy lowering during codegen. +//===----------------------------------------------------------------------===// +extern "C" MLIR_CRUNNERUTILS_EXPORT void +memrefCopy(int64_t elemSize, UnrankedMemRefType *src, + UnrankedMemRefType *dst); + //===----------------------------------------------------------------------===// // Small runtime support library for vector.print lowering during codegen. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index db5918e95f182..eb390bf8844fa 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -2618,6 +2618,68 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern { } }; +struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(memref::CopyOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + memref::CopyOp::Adaptor adaptor(operands); + auto srcType = op.source().getType().cast(); + auto targetType = op.target().getType().cast(); + + // First make sure we have an unranked memref descriptor representation. + auto makeUnranked = [&, this](Value ranked, BaseMemRefType type) { + auto rank = rewriter.create( + loc, getIndexType(), rewriter.getIndexAttr(type.getRank())); + auto *typeConverter = getTypeConverter(); + auto ptr = + typeConverter->promoteOneMemRefDescriptor(loc, ranked, rewriter); + auto voidPtr = + rewriter.create(loc, getVoidPtrType(), ptr) + .getResult(); + auto unrankedType = + UnrankedMemRefType::get(type.getElementType(), type.getMemorySpace()); + return UnrankedMemRefDescriptor::pack(rewriter, loc, *typeConverter, + unrankedType, + ValueRange{rank, voidPtr}); + }; + + Value unrankedSource = srcType.hasRank() + ? makeUnranked(adaptor.source(), srcType) + : adaptor.source(); + Value unrankedTarget = targetType.hasRank() + ? makeUnranked(adaptor.target(), targetType) + : adaptor.target(); + + // Now promote the unranked descriptors to the stack. + auto one = rewriter.create(loc, getIndexType(), + rewriter.getIndexAttr(1)); + auto promote = [&](Value desc) { + auto ptrType = LLVM::LLVMPointerType::get(desc.getType()); + auto allocated = + rewriter.create(loc, ptrType, ValueRange{one}); + rewriter.create(loc, desc, allocated); + return allocated; + }; + + auto sourcePtr = promote(unrankedSource); + auto targetPtr = promote(unrankedTarget); + + auto elemSize = rewriter.create( + loc, getIndexType(), + rewriter.getIndexAttr(srcType.getElementTypeBitWidth() / 8)); + auto copyFn = LLVM::lookupOrCreateMemRefCopyFn( + op->getParentOfType(), getIndexType(), sourcePtr.getType()); + rewriter.create(loc, copyFn, + ValueRange{elemSize, sourcePtr, targetPtr}); + rewriter.eraseOp(op); + + return success(); + } +}; + /// Extracts allocated, aligned pointers and offset from a ranked or unranked /// memref type. In unranked case, the fields are extracted from the underlying /// ranked descriptor. @@ -4009,6 +4071,7 @@ void mlir::populateStdToLLVMMemoryConversionPatterns( GetGlobalMemrefOpLowering, LoadOpLowering, MemRefCastOpLowering, + MemRefCopyOpLowering, MemRefReinterpretCastOpLowering, MemRefReshapeOpLowering, RankOpLowering, diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp index a43c2251c2d99..47a5851b51f2e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp @@ -35,6 +35,7 @@ static constexpr llvm::StringRef kPrintNewline = "printNewline"; static constexpr llvm::StringRef kMalloc = "malloc"; static constexpr llvm::StringRef kAlignedAlloc = "aligned_alloc"; static constexpr llvm::StringRef kFree = "free"; +static constexpr llvm::StringRef kMemRefCopy = "memref_copy"; /// Generic print function lookupOrCreate helper. LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(ModuleOp moduleOp, StringRef name, @@ -114,6 +115,15 @@ LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(ModuleOp moduleOp) { LLVM::LLVMVoidType::get(moduleOp->getContext())); } +LLVM::LLVMFuncOp +mlir::LLVM::lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType, + Type unrankedDescriptorType) { + return LLVM::lookupOrCreateFn( + moduleOp, kMemRefCopy, + ArrayRef{indexType, unrankedDescriptorType, unrankedDescriptorType}, + LLVM::LLVMVoidType::get(moduleOp->getContext())); +} + Operation::result_range mlir::LLVM::createLLVMCall(OpBuilder &b, Location loc, LLVM::LLVMFuncOp fn, ValueRange paramTypes, diff --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp index 4677098d64f3b..d4ebc46aa47da 100644 --- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp @@ -23,6 +23,7 @@ #include #include +#include #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS @@ -39,6 +40,52 @@ extern "C" void printClose() { fputs(" )", stdout); } extern "C" void printComma() { fputs(", ", stdout); } extern "C" void printNewline() { fputc('\n', stdout); } +extern "C" MLIR_CRUNNERUTILS_EXPORT void +memrefCopy(int64_t elemSize, UnrankedMemRefType *srcArg, + UnrankedMemRefType *dstArg) { + DynamicMemRefType src(*srcArg); + DynamicMemRefType dst(*dstArg); + + int64_t rank = src.rank; + int64_t *indices = static_cast(alloca(sizeof(int64_t) * rank)); + int64_t *srcStrides = static_cast(alloca(sizeof(int64_t) * rank)); + int64_t *dstStrides = static_cast(alloca(sizeof(int64_t) * rank)); + + char *srcPtr = src.data + src.offset * elemSize; + char *dstPtr = dst.data + dst.offset * elemSize; + + // Initialize index and scale strides. + for (int rankp = 0; rankp < rank; ++rankp) { + indices[rankp] = 0; + srcStrides[rankp] = src.strides[rankp] * elemSize; + dstStrides[rankp] = dst.strides[rankp] * elemSize; + } + + int64_t readIndex = 0, writeIndex = 0; + for (;;) { + // Copy over the element, byte by byte. + memcpy(dstPtr + writeIndex, srcPtr + readIndex, elemSize); + // Advance index and read position. + for (int64_t axis = rank - 1; axis >= 0; --axis) { + // Advance at current axis. + auto newIndex = ++indices[axis]; + readIndex += srcStrides[axis]; + writeIndex += dstStrides[axis]; + // If this is a valid index, we have our next index, so continue copying. + if (src.sizes[axis] != newIndex) + break; + // We reached the end of this axis. If this is axis 0, we are done. + if (axis == 0) + return; + // Else, reset to 0 and undo the advancement of the linear index that + // this axis had. The continue with the axis one outer. + indices[axis] = 0; + readIndex -= src.sizes[axis] * srcStrides[axis]; + writeIndex -= dst.sizes[axis] * dstStrides[axis]; + } + } +} + /// Prints GFLOPS rating. extern "C" void print_flops(double flops) { fprintf(stderr, "%lf GFLOPS\n", flops / 1.0E9); From a8d1182f661ccecd99efd4e543fddf3172c67a95 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Tue, 15 Jun 2021 12:47:05 -0400 Subject: [PATCH 075/619] [libc++] Remove some _LIBCPP_CXX03_LANG from iostreams headers. With the STL containers, I didn't enable move operations in C++03 mode because that would change the overload resolution for things that today are copy operations. With iostreams, though, the copy operations aren't present at all, and so I see no problem with enabling move operations even in (Clang's greatly extended) C++03 mode. Clang's C++03 mode does not support delegating constructors. Differential Revision: https://reviews.llvm.org/D104310 --- libcxx/include/fstream | 29 +------------ libcxx/include/ios | 2 - libcxx/include/istream | 19 +-------- libcxx/include/ostream | 14 +------ libcxx/include/sstream | 42 ++++--------------- libcxx/include/string | 4 -- .../filebuf.assign/move_assign.pass.cpp | 2 - .../fstreams/filebuf.cons/move.pass.cpp | 6 +-- .../fstream.assign/move_assign.pass.cpp | 2 - .../fstreams/fstream.cons/move.pass.cpp | 6 +-- .../ifstream.assign/move_assign.pass.cpp | 1 - .../fstreams/ifstream.cons/move.pass.cpp | 5 +-- .../ofstream.assign/move_assign.pass.cpp | 2 - .../fstreams/ofstream.cons/move.pass.cpp | 6 +-- .../iostream.assign/move_assign.pass.cpp | 2 - .../iostreamclass/iostream.cons/move.pass.cpp | 2 - .../istream.assign/move_assign.pass.cpp | 2 - .../istream/istream.cons/copy.fail.cpp | 5 --- .../istream/istream.cons/move.pass.cpp | 2 - .../ostream.assign/move_assign.pass.cpp | 2 - .../output.streams/ostream.cons/move.pass.cpp | 2 - .../is_error_code_enum_io_errc.pass.cpp | 2 - .../istringstream.assign/move.pass.cpp | 2 - .../istringstream.cons/move.pass.cpp | 2 - .../ostringstream.assign/move.pass.cpp | 2 - .../ostringstream.cons/move.pass.cpp | 2 - .../stringbuf/stringbuf.cons/move.pass.cpp | 12 +++--- .../stringstream.cons/move.pass.cpp | 2 - .../stringstream.assign/move.pass.cpp | 2 - .../string.io/get_line_delim_rv.pass.cpp | 6 +-- .../string.io/get_line_rv.pass.cpp | 6 +-- 31 files changed, 31 insertions(+), 164 deletions(-) diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 09af0d91e79b6..c522b8ab110d7 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -219,16 +219,12 @@ public: // 27.9.1.2 Constructors/destructor: basic_filebuf(); -#ifndef _LIBCPP_CXX03_LANG basic_filebuf(basic_filebuf&& __rhs); -#endif virtual ~basic_filebuf(); // 27.9.1.3 Assign/swap: -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_filebuf& operator=(basic_filebuf&& __rhs); -#endif void swap(basic_filebuf& __rhs); // 27.9.1.4 Members: @@ -318,8 +314,6 @@ basic_filebuf<_CharT, _Traits>::basic_filebuf() setbuf(nullptr, 4096); } -#ifndef _LIBCPP_CXX03_LANG - template basic_filebuf<_CharT, _Traits>::basic_filebuf(basic_filebuf&& __rhs) : basic_streambuf<_CharT, _Traits>(__rhs) @@ -394,8 +388,6 @@ basic_filebuf<_CharT, _Traits>::operator=(basic_filebuf&& __rhs) return *this; } -#endif // _LIBCPP_CXX03_LANG - template basic_filebuf<_CharT, _Traits>::~basic_filebuf() { @@ -1164,13 +1156,10 @@ public: : basic_ifstream(__p.c_str(), __mode) {} #endif // _LIBCPP_STD_VER >= 17 #endif -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_ifstream(basic_ifstream&& __rhs); - _LIBCPP_INLINE_VISIBILITY basic_ifstream& operator=(basic_ifstream&& __rhs); -#endif _LIBCPP_INLINE_VISIBILITY void swap(basic_ifstream& __rhs); @@ -1240,8 +1229,6 @@ basic_ifstream<_CharT, _Traits>::basic_ifstream(const string& __s, ios_base::ope } #endif -#ifndef _LIBCPP_CXX03_LANG - template inline basic_ifstream<_CharT, _Traits>::basic_ifstream(basic_ifstream&& __rhs) @@ -1261,8 +1248,6 @@ basic_ifstream<_CharT, _Traits>::operator=(basic_ifstream&& __rhs) return *this; } -#endif // _LIBCPP_CXX03_LANG - template inline void @@ -1379,13 +1364,10 @@ public: : basic_ofstream(__p.c_str(), __mode) {} #endif // _LIBCPP_STD_VER >= 17 -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_ofstream(basic_ofstream&& __rhs); - _LIBCPP_INLINE_VISIBILITY basic_ofstream& operator=(basic_ofstream&& __rhs); -#endif _LIBCPP_INLINE_VISIBILITY void swap(basic_ofstream& __rhs); @@ -1454,8 +1436,6 @@ basic_ofstream<_CharT, _Traits>::basic_ofstream(const string& __s, ios_base::ope } #endif -#ifndef _LIBCPP_CXX03_LANG - template inline basic_ofstream<_CharT, _Traits>::basic_ofstream(basic_ofstream&& __rhs) @@ -1475,8 +1455,6 @@ basic_ofstream<_CharT, _Traits>::operator=(basic_ofstream&& __rhs) return *this; } -#endif // _LIBCPP_CXX03_LANG - template inline void @@ -1595,13 +1573,12 @@ public: #endif // _LIBCPP_STD_VER >= 17 #endif -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_fstream(basic_fstream&& __rhs); _LIBCPP_INLINE_VISIBILITY basic_fstream& operator=(basic_fstream&& __rhs); -#endif + _LIBCPP_INLINE_VISIBILITY void swap(basic_fstream& __rhs); @@ -1668,8 +1645,6 @@ basic_fstream<_CharT, _Traits>::basic_fstream(const string& __s, ios_base::openm } #endif -#ifndef _LIBCPP_CXX03_LANG - template inline basic_fstream<_CharT, _Traits>::basic_fstream(basic_fstream&& __rhs) @@ -1689,8 +1664,6 @@ basic_fstream<_CharT, _Traits>::operator=(basic_fstream&& __rhs) return *this; } -#endif // _LIBCPP_CXX03_LANG - template inline void diff --git a/libcxx/include/ios b/libcxx/include/ios index eefb58f55be13..3128bca899990 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -662,10 +662,8 @@ protected: _LIBCPP_INLINE_VISIBILITY void move(basic_ios& __rhs); -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY void move(basic_ios&& __rhs) {move(__rhs);} -#endif _LIBCPP_INLINE_VISIBILITY void swap(basic_ios& __rhs) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY diff --git a/libcxx/include/istream b/libcxx/include/istream index 531280719b30e..17ca68388f523 100644 --- a/libcxx/include/istream +++ b/libcxx/include/istream @@ -192,14 +192,12 @@ public: { this->init(__sb); } virtual ~basic_istream(); protected: -#ifndef _LIBCPP_CXX03_LANG inline _LIBCPP_INLINE_VISIBILITY basic_istream(basic_istream&& __rhs); // 27.7.1.1.2 Assign/swap: inline _LIBCPP_INLINE_VISIBILITY basic_istream& operator=(basic_istream&& __rhs); -#endif inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void swap(basic_istream& __rhs) { @@ -207,10 +205,8 @@ protected: basic_ios::swap(__rhs); } -#ifndef _LIBCPP_CXX03_LANG basic_istream (const basic_istream& __rhs) = delete; basic_istream& operator=(const basic_istream& __rhs) = delete; -#endif public: // 27.7.1.1.3 Prefix/suffix: @@ -333,8 +329,6 @@ basic_istream<_CharT, _Traits>::sentry::sentry(basic_istream<_CharT, _Traits>& _ __is.setstate(ios_base::failbit); } -#ifndef _LIBCPP_CXX03_LANG - template basic_istream<_CharT, _Traits>::basic_istream(basic_istream&& __rhs) : __gc_(__rhs.__gc_) @@ -351,8 +345,6 @@ basic_istream<_CharT, _Traits>::operator=(basic_istream&& __rhs) return *this; } -#endif // _LIBCPP_CXX03_LANG - template basic_istream<_CharT, _Traits>::~basic_istream() { @@ -1416,21 +1408,18 @@ public: virtual ~basic_iostream(); protected: -#ifndef _LIBCPP_CXX03_LANG inline _LIBCPP_INLINE_VISIBILITY basic_iostream(basic_iostream&& __rhs); // assign/swap inline _LIBCPP_INLINE_VISIBILITY basic_iostream& operator=(basic_iostream&& __rhs); -#endif + inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void swap(basic_iostream& __rhs) { basic_istream::swap(__rhs); } }; -#ifndef _LIBCPP_CXX03_LANG - template basic_iostream<_CharT, _Traits>::basic_iostream(basic_iostream&& __rhs) : basic_istream<_CharT, _Traits>(_VSTD::move(__rhs)) @@ -1445,8 +1434,6 @@ basic_iostream<_CharT, _Traits>::operator=(basic_iostream&& __rhs) return *this; } -#endif // _LIBCPP_CXX03_LANG - template basic_iostream<_CharT, _Traits>::~basic_iostream() { @@ -1570,8 +1557,6 @@ getline(basic_istream<_CharT, _Traits>& __is, return getline(__is, __str, __is.widen('\n')); } -#ifndef _LIBCPP_CXX03_LANG - template inline _LIBCPP_INLINE_VISIBILITY basic_istream<_CharT, _Traits>& @@ -1590,8 +1575,6 @@ getline(basic_istream<_CharT, _Traits>&& __is, return getline(__is, __str, __is.widen('\n')); } -#endif // _LIBCPP_CXX03_LANG - template basic_istream<_CharT, _Traits>& operator>>(basic_istream<_CharT, _Traits>& __is, bitset<_Size>& __x) diff --git a/libcxx/include/ostream b/libcxx/include/ostream index 81ba565e67f53..efeaee253eb97 100644 --- a/libcxx/include/ostream +++ b/libcxx/include/ostream @@ -165,27 +165,21 @@ public: { this->init(__sb); } virtual ~basic_ostream(); protected: -#ifndef _LIBCPP_CXX03_LANG inline _LIBCPP_INLINE_VISIBILITY basic_ostream(basic_ostream&& __rhs); // 27.7.2.3 Assign/swap inline _LIBCPP_INLINE_VISIBILITY basic_ostream& operator=(basic_ostream&& __rhs); -#endif + inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void swap(basic_ostream& __rhs) { basic_ios::swap(__rhs); } -#ifndef _LIBCPP_CXX03_LANG basic_ostream (const basic_ostream& __rhs) = delete; basic_ostream& operator=(const basic_ostream& __rhs) = delete; -#else - basic_ostream (const basic_ostream& __rhs); // not defined - basic_ostream& operator=(const basic_ostream& __rhs); // not defined -#endif -public: +public: // 27.7.2.4 Prefix/suffix: class _LIBCPP_TEMPLATE_VIS sentry; @@ -291,8 +285,6 @@ basic_ostream<_CharT, _Traits>::sentry::~sentry() } } -#ifndef _LIBCPP_CXX03_LANG - template basic_ostream<_CharT, _Traits>::basic_ostream(basic_ostream&& __rhs) { @@ -307,8 +299,6 @@ basic_ostream<_CharT, _Traits>::operator=(basic_ostream&& __rhs) return *this; } -#endif // _LIBCPP_CXX03_LANG - template basic_ostream<_CharT, _Traits>::~basic_ostream() { diff --git a/libcxx/include/sstream b/libcxx/include/sstream index 0b614a0b09562..fbe5ffcab4c6e 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -219,19 +219,13 @@ private: public: // 30.8.2.1 [stringbuf.cons], constructors -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - basic_stringbuf() : basic_stringbuf(ios_base::in | ios_base::out) {} + basic_stringbuf() + : __hm_(nullptr), __mode_(ios_base::in | ios_base::out) {} _LIBCPP_INLINE_VISIBILITY explicit basic_stringbuf(ios_base::openmode __wch) : __hm_(nullptr), __mode_(__wch) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit basic_stringbuf(ios_base::openmode __wch = ios_base::in | - ios_base::out) - : __hm_(nullptr), __mode_(__wch) {} -#endif _LIBCPP_INLINE_VISIBILITY explicit basic_stringbuf(const string_type& __s, @@ -643,18 +637,13 @@ private: public: // 30.8.3.1 [istringstream.cons], constructors -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - basic_istringstream() : basic_istringstream(ios_base::in) {} + basic_istringstream() + : basic_istream<_CharT, _Traits>(&__sb_), __sb_(ios_base::in) {} _LIBCPP_INLINE_VISIBILITY explicit basic_istringstream(ios_base::openmode __wch) : basic_istream<_CharT, _Traits>(&__sb_), __sb_(__wch | ios_base::in) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit basic_istringstream(ios_base::openmode __wch = ios_base::in) - : basic_istream<_CharT, _Traits>(&__sb_), __sb_(__wch | ios_base::in) {} -#endif _LIBCPP_INLINE_VISIBILITY explicit basic_istringstream(const string_type& __s, @@ -728,20 +717,13 @@ private: public: // 30.8.4.1 [ostringstream.cons], constructors -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - basic_ostringstream() : basic_ostringstream(ios_base::out) {} + basic_ostringstream() + : basic_ostream<_CharT, _Traits>(&__sb_), __sb_(ios_base::out) {} _LIBCPP_INLINE_VISIBILITY explicit basic_ostringstream(ios_base::openmode __wch) - : basic_ostream<_CharT, _Traits>(&__sb_), - __sb_(__wch | ios_base::out) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit basic_ostringstream(ios_base::openmode __wch = ios_base::out) - : basic_ostream<_CharT, _Traits>(&__sb_), - __sb_(__wch | ios_base::out) {} -#endif + : basic_ostream<_CharT, _Traits>(&__sb_), __sb_(__wch | ios_base::out) {} _LIBCPP_INLINE_VISIBILITY explicit basic_ostringstream(const string_type& __s, @@ -816,19 +798,13 @@ private: public: // 30.8.5.1 [stringstream.cons], constructors -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - basic_stringstream() : basic_stringstream(ios_base::in | ios_base::out) {} + basic_stringstream() + : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(ios_base::in | ios_base::out) {} _LIBCPP_INLINE_VISIBILITY explicit basic_stringstream(ios_base::openmode __wch) : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(__wch) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit basic_stringstream(ios_base::openmode __wch = ios_base::in | - ios_base::out) - : basic_iostream<_CharT, _Traits>(&__sb_), __sb_(__wch) {} -#endif _LIBCPP_INLINE_VISIBILITY explicit basic_stringstream(const string_type& __s, diff --git a/libcxx/include/string b/libcxx/include/string index c5e0745250ee4..3917c07a1f744 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -4439,8 +4439,6 @@ basic_istream<_CharT, _Traits>& getline(basic_istream<_CharT, _Traits>& __is, basic_string<_CharT, _Traits, _Allocator>& __str); -#ifndef _LIBCPP_CXX03_LANG - template inline _LIBCPP_INLINE_VISIBILITY basic_istream<_CharT, _Traits>& @@ -4453,8 +4451,6 @@ basic_istream<_CharT, _Traits>& getline(basic_istream<_CharT, _Traits>&& __is, basic_string<_CharT, _Traits, _Allocator>& __str); -#endif // _LIBCPP_CXX03_LANG - #if _LIBCPP_STD_VER > 17 template inline _LIBCPP_INLINE_VISIBILITY diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/move_assign.pass.cpp index 69ccd202564db..173c9d110ffc4 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/move_assign.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/move.pass.cpp index 0a541670858e0..38aba56540f0d 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > @@ -31,7 +29,7 @@ int main(int, char**) assert(f.sputn("123", 3) == 3); f.pubseekoff(1, std::ios_base::beg); assert(f.sgetc() == '2'); - std::filebuf f2(move(f)); + std::filebuf f2(std::move(f)); assert(!f.is_open()); assert(f2.is_open()); assert(f2.sgetc() == '2'); @@ -45,7 +43,7 @@ int main(int, char**) assert(f.sputn(L"123", 3) == 3); f.pubseekoff(1, std::ios_base::beg); assert(f.sgetc() == L'2'); - std::wfilebuf f2(move(f)); + std::wfilebuf f2(std::move(f)); assert(!f.is_open()); assert(f2.is_open()); assert(f2.sgetc() == L'2'); diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/move_assign.pass.cpp index 6032b21c579c3..9bc7887f839fd 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/move_assign.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp index 9166737543a0e..e210b562ac949 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > @@ -26,7 +24,7 @@ int main(int, char**) { std::fstream fso(temp, std::ios_base::in | std::ios_base::out | std::ios_base::trunc); - std::fstream fs = move(fso); + std::fstream fs = std::move(fso); double x = 0; fs << 3.25; fs.seekg(0); @@ -37,7 +35,7 @@ int main(int, char**) { std::wfstream fso(temp, std::ios_base::in | std::ios_base::out | std::ios_base::trunc); - std::wfstream fs = move(fso); + std::wfstream fs = std::move(fso); double x = 0; fs << 3.25; fs.seekg(0); diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/move_assign.pass.cpp index b9df31149efdc..439db87a0eba3 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/move_assign.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 // FILE_DEPENDENCIES: test.dat // diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp index 03475e17beef0..28f55f6a857db 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 // FILE_DEPENDENCIES: test.dat // @@ -25,14 +24,14 @@ int main(int, char**) { { std::ifstream fso("test.dat"); - std::ifstream fs = move(fso); + std::ifstream fs = std::move(fso); double x = 0; fs >> x; assert(x == 3.25); } { std::wifstream fso("test.dat"); - std::wifstream fs = move(fso); + std::wifstream fs = std::move(fso); double x = 0; fs >> x; assert(x == 3.25); diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/move_assign.pass.cpp index 720bcf0e77486..4db1e6c3b35fb 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/move_assign.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp index 60fbf2b9e360e..01676f0e75a0d 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > @@ -25,7 +23,7 @@ int main(int, char**) std::string temp = get_temp_file_name(); { std::ofstream fso(temp.c_str()); - std::ofstream fs = move(fso); + std::ofstream fs = std::move(fso); fs << 3.25; } { @@ -37,7 +35,7 @@ int main(int, char**) std::remove(temp.c_str()); { std::wofstream fso(temp.c_str()); - std::wofstream fs = move(fso); + std::wofstream fs = std::move(fso); fs << 3.25; } { diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/move_assign.pass.cpp index 7b02842610d5b..fb1f524c7123e 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/move_assign.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/move.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/move.pass.cpp index 514cde055875a..2a4e1a6b1123b 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/move_assign.pass.cpp index bd901171cc8ee..7899c63e8293e 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/move_assign.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/copy.fail.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/copy.fail.cpp index c5f10fa0145c2..90e5315a662b3 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/copy.fail.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/copy.fail.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > @@ -47,10 +45,7 @@ struct test_istream }; - int main(int, char**) { - - return 0; } diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/move.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/move.pass.cpp index 22163156c2c82..fb071ba970163 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/move_assign.pass.cpp index b529970a2883c..8fcb0c2b1ee97 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/move_assign.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/move.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/move.pass.cpp index 345388baf6e94..73991b36f6c9b 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template > diff --git a/libcxx/test/std/input.output/iostreams.base/is_error_code_enum_io_errc.pass.cpp b/libcxx/test/std/input.output/iostreams.base/is_error_code_enum_io_errc.pass.cpp index 76eb83148b398..13f90d92899ed 100644 --- a/libcxx/test/std/input.output/iostreams.base/is_error_code_enum_io_errc.pass.cpp +++ b/libcxx/test/std/input.output/iostreams.base/is_error_code_enum_io_errc.pass.cpp @@ -5,8 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// UNSUPPORTED: c++03 // diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/move.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/move.pass.cpp index c9b701397d761..93c8313d85909 100644 --- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template , class Allocator = allocator > diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp index e4f1f2b4ddbf3..650284d87b706 100644 --- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template , class Allocator = allocator > diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/move.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/move.pass.cpp index 52e4ace45efff..8be3a43fe38db 100644 --- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template , class Allocator = allocator > diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp index 3a3f42f009c20..6823bb69bc627 100644 --- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template , class Allocator = allocator > diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/move.pass.cpp index 9fb588d1a4304..af1eb38c349b4 100644 --- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/move.pass.cpp @@ -22,32 +22,32 @@ int main(int, char**) { { std::stringbuf buf1("testing"); - std::stringbuf buf(move(buf1)); + std::stringbuf buf(std::move(buf1)); assert(buf.str() == "testing"); } { std::stringbuf buf1("testing", std::ios_base::in); - std::stringbuf buf(move(buf1)); + std::stringbuf buf(std::move(buf1)); assert(buf.str() == "testing"); } { std::stringbuf buf1("testing", std::ios_base::out); - std::stringbuf buf(move(buf1)); + std::stringbuf buf(std::move(buf1)); assert(buf.str() == "testing"); } { std::wstringbuf buf1(L"testing"); - std::wstringbuf buf(move(buf1)); + std::wstringbuf buf(std::move(buf1)); assert(buf.str() == L"testing"); } { std::wstringbuf buf1(L"testing", std::ios_base::in); - std::wstringbuf buf(move(buf1)); + std::wstringbuf buf(std::move(buf1)); assert(buf.str() == L"testing"); } { std::wstringbuf buf1(L"testing", std::ios_base::out); - std::wstringbuf buf(move(buf1)); + std::wstringbuf buf(std::move(buf1)); assert(buf.str() == L"testing"); } diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/move.pass.cpp index 71c385a068060..642fd752a8f46 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream.cons/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template , class Allocator = allocator > diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/move.pass.cpp index e2a750651c6eb..91a685d2db9f7 100644 --- a/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/move.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template , class Allocator = allocator > diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_delim_rv.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_delim_rv.pass.cpp index af2e62a5c808b..d2e5ab5488bdd 100644 --- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_delim_rv.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_delim_rv.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template @@ -35,13 +33,13 @@ int main(int, char**) assert(s == L" abc"); } { - typedef std::basic_string, min_allocator> S; + typedef std::basic_string, min_allocator > S; S s("initial text"); getline(std::istringstream(" abc* def* ghij"), s, '*'); assert(s == " abc"); } { - typedef std::basic_string, min_allocator> S; + typedef std::basic_string, min_allocator > S; S s(L"initial text"); getline(std::wistringstream(L" abc* def* ghij"), s, L'*'); assert(s == L" abc"); diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_rv.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_rv.pass.cpp index 322b49b9c6e7e..3fb35cc24d66b 100644 --- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_rv.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_rv.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03 - // // template @@ -35,13 +33,13 @@ int main(int, char**) assert(s == L" abc"); } { - typedef std::basic_string, min_allocator> S; + typedef std::basic_string, min_allocator > S; S s("initial text"); getline(std::istringstream(" abc\n def\n ghij"), s); assert(s == " abc"); } { - typedef std::basic_string, min_allocator> S; + typedef std::basic_string, min_allocator > S; S s(L"initial text"); getline(std::wistringstream(L" abc\n def\n ghij"), s); assert(s == L" abc"); From 5d6240b77e7e7199fcf0e89f6dd2f7eea3596a3c Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Fri, 25 Jun 2021 19:40:35 -0400 Subject: [PATCH 076/619] [MLIR][SCF] Inline ExecuteRegion if parent can contain multiple blocks The executeregionop is used to allow multiple blocks within SCF constructs. If the container allows multiple blocks, inline the region Differential Revision: https://reviews.llvm.org/D104960 --- mlir/include/mlir/Dialect/SCF/SCFOps.td | 6 -- mlir/lib/Dialect/SCF/SCF.cpp | 77 ++++++++++++++++++++++++- mlir/test/Dialect/SCF/canonicalize.mlir | 67 +++++++++++++++++++++ 3 files changed, 141 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index c10441f59bd55..9f039b6fcda68 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -108,14 +108,8 @@ def ExecuteRegionOp : SCF_Op<"execute_region"> { let regions = (region AnyRegion:$region); - // TODO: If the parent is a func like op (which would be the case if all other - // ops are from the std dialect), the inliner logic could be readily used to - // inline. let hasCanonicalizer = 1; - // TODO: can fold if it returns a constant. - // TODO: Single block execute_region ops can be readily inlined irrespective - // of which op is a parent. Add a fold for this. let hasFolder = 0; } diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 99d2386ced1b1..38760ca4050d3 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -143,23 +143,94 @@ static LogicalResult verify(ExecuteRegionOp op) { // // "test.foo"() : () -> () // %x = "test.val"() : () -> i64 -// "test.bar"(%v) : (i64) -> () +// "test.bar"(%x) : (i64) -> () // struct SingleBlockExecuteInliner : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ExecuteRegionOp op, PatternRewriter &rewriter) const override { - if (op.region().getBlocks().size() != 1) + if (!llvm::hasSingleElement(op.region())) return failure(); replaceOpWithRegion(rewriter, op, op.region()); return success(); } }; +// Inline an ExecuteRegionOp if its parent can contain multiple blocks. +// TODO generalize the conditions for operations which can be inlined into. +// func @func_execute_region_elim() { +// "test.foo"() : () -> () +// %v = scf.execute_region -> i64 { +// %c = "test.cmp"() : () -> i1 +// cond_br %c, ^bb2, ^bb3 +// ^bb2: +// %x = "test.val1"() : () -> i64 +// br ^bb4(%x : i64) +// ^bb3: +// %y = "test.val2"() : () -> i64 +// br ^bb4(%y : i64) +// ^bb4(%z : i64): +// scf.yield %z : i64 +// } +// "test.bar"(%v) : (i64) -> () +// return +// } +// +// becomes +// +// func @func_execute_region_elim() { +// "test.foo"() : () -> () +// %c = "test.cmp"() : () -> i1 +// cond_br %c, ^bb1, ^bb2 +// ^bb1: // pred: ^bb0 +// %x = "test.val1"() : () -> i64 +// br ^bb3(%x : i64) +// ^bb2: // pred: ^bb0 +// %y = "test.val2"() : () -> i64 +// br ^bb3(%y : i64) +// ^bb3(%z: i64): // 2 preds: ^bb1, ^bb2 +// "test.bar"(%z) : (i64) -> () +// return +// } +// +struct MultiBlockExecuteInliner : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ExecuteRegionOp op, + PatternRewriter &rewriter) const override { + if (!isa(op->getParentOp())) + return failure(); + + Block *prevBlock = op->getBlock(); + Block *postBlock = rewriter.splitBlock(prevBlock, op->getIterator()); + rewriter.setInsertionPointToEnd(prevBlock); + + rewriter.create(op.getLoc(), &op.region().front()); + + for (Block &blk : op.region()) { + if (YieldOp yieldOp = dyn_cast(blk.getTerminator())) { + rewriter.setInsertionPoint(yieldOp); + rewriter.create(yieldOp.getLoc(), postBlock, + yieldOp.results()); + rewriter.eraseOp(yieldOp); + } + } + + rewriter.inlineRegionBefore(op.region(), postBlock); + SmallVector blockArgs; + + for (auto res : op.getResults()) + blockArgs.push_back(postBlock->addArgument(res.getType())); + + rewriter.replaceOp(op, blockArgs); + return success(); + } +}; + void ExecuteRegionOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index 8692f2d9705e0..d1789c6dfde52 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -948,3 +948,70 @@ func @execute_region_elim() { // CHECK-NEXT: "test.bar"(%[[VAL]]) : (i64) -> () // CHECK-NEXT: } + +// ----- + +// CHECK-LABEL: func @func_execute_region_elim +func @func_execute_region_elim() { + "test.foo"() : () -> () + %v = scf.execute_region -> i64 { + %c = "test.cmp"() : () -> i1 + cond_br %c, ^bb2, ^bb3 + ^bb2: + %x = "test.val1"() : () -> i64 + br ^bb4(%x : i64) + ^bb3: + %y = "test.val2"() : () -> i64 + br ^bb4(%y : i64) + ^bb4(%z : i64): + scf.yield %z : i64 + } + "test.bar"(%v) : (i64) -> () + return +} + +// CHECK: "test.foo" +// CHECK: %[[cmp:.+]] = "test.cmp" +// CHECK: cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]] +// CHECK: ^[[bb1]]: // pred: ^bb0 +// CHECK: %[[x:.+]] = "test.val1" +// CHECK: br ^[[bb3:.+]](%[[x]] : i64) +// CHECK: ^[[bb2]]: // pred: ^bb0 +// CHECK: %[[y:.+]] = "test.val2" +// CHECK: br ^[[bb3]](%[[y:.+]] : i64) +// CHECK: ^[[bb3]](%[[z:.+]]: i64): +// CHECK: "test.bar"(%[[z]]) +// CHECK: return + + +// ----- + +// CHECK-LABEL: func @func_execute_region_elim2 +func @func_execute_region_elim2() { + "test.foo"() : () -> () + %v = scf.execute_region -> i64 { + %c = "test.cmp"() : () -> i1 + cond_br %c, ^bb2, ^bb3 + ^bb2: + %x = "test.val1"() : () -> i64 + scf.yield %x : i64 + ^bb3: + %y = "test.val2"() : () -> i64 + scf.yield %y : i64 + } + "test.bar"(%v) : (i64) -> () + return +} + +// CHECK: "test.foo" +// CHECK: %[[cmp:.+]] = "test.cmp" +// CHECK: cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]] +// CHECK: ^[[bb1]]: // pred: ^bb0 +// CHECK: %[[x:.+]] = "test.val1" +// CHECK: br ^[[bb3:.+]](%[[x]] : i64) +// CHECK: ^[[bb2]]: // pred: ^bb0 +// CHECK: %[[y:.+]] = "test.val2" +// CHECK: br ^[[bb3]](%[[y:.+]] : i64) +// CHECK: ^[[bb3]](%[[z:.+]]: i64): +// CHECK: "test.bar"(%[[z]]) +// CHECK: return From 9c5ed8d567924e807a6466b6ad681c8bf395cf58 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 25 Jun 2021 17:45:50 -0500 Subject: [PATCH 077/619] [Hexagon] Add patterns to load i1 This fixes https://llvm.org/PR50853 --- llvm/lib/Target/Hexagon/HexagonPatterns.td | 62 ++- llvm/test/CodeGen/Hexagon/isel-extload-i1.ll | 25 ++ llvm/test/CodeGen/Hexagon/isel/extload-i1.ll | 380 +++++++++++++++++++ 3 files changed, 451 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/isel-extload-i1.ll create mode 100644 llvm/test/CodeGen/Hexagon/isel/extload-i1.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index f4223b74c9008..cad5ca8ab92ec 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -1948,6 +1948,9 @@ def: Pat<(HexagonAtPcrel I32:$addr), // --(12) Load ----------------------------------------------------------- // +def L1toI32: OutPatFrag<(ops node:$Rs), (A2_subri 0, (i32 $Rs))>; +def L1toI64: OutPatFrag<(ops node:$Rs), (ToSext64 (L1toI32 $Rs))>; + def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; @@ -2104,11 +2107,17 @@ let AddedComplexity = 20 in { } let AddedComplexity = 30 in { + // Loads of i1 are loading a byte, and the byte should be either 0 or 1. + // It doesn't matter if it's sign- or zero-extended, so use zero-extension + // everywhere. + defm: Loadxim_pat; defm: Loadxim_pat; + defm: Loadxim_pat; + defm: Loadxim_pat; + defm: Loadxim_pat; defm: Loadxim_pat; defm: Loadxim_pat; - defm: Loadxim_pat; defm: Loadxim_pat; defm: Loadxim_pat; defm: Loadxim_pat; @@ -2118,6 +2127,7 @@ let AddedComplexity = 30 in { } let AddedComplexity = 60 in { + def: Loadxu_pat; def: Loadxu_pat; def: Loadxu_pat; def: Loadxu_pat; @@ -2126,6 +2136,7 @@ let AddedComplexity = 60 in { def: Loadxu_pat; def: Loadxu_pat; def: Loadxu_pat; + def: Loadxu_pat; def: Loadxu_pat; def: Loadxu_pat; def: Loadxu_pat; @@ -2140,6 +2151,11 @@ let AddedComplexity = 60 in { def: Loadxu_pat; def: Loadxu_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; def: Loadxum_pat; def: Loadxum_pat; @@ -2152,7 +2168,9 @@ let AddedComplexity = 60 in { } let AddedComplexity = 40 in { + def: Loadxr_shl_pat; def: Loadxr_shl_pat; + def: Loadxr_shl_pat; def: Loadxr_shl_pat; def: Loadxr_shl_pat; def: Loadxr_shl_pat; @@ -2170,8 +2188,10 @@ let AddedComplexity = 40 in { } let AddedComplexity = 20 in { + def: Loadxr_add_pat; def: Loadxr_add_pat; def: Loadxr_add_pat; + def: Loadxr_add_pat; def: Loadxr_add_pat; def: Loadxr_add_pat; def: Loadxr_add_pat; @@ -2188,6 +2208,11 @@ let AddedComplexity = 20 in { } let AddedComplexity = 40 in { + def: Loadxrm_shl_pat; + def: Loadxrm_shl_pat; + def: Loadxrm_shl_pat; + def: Loadxrm_shl_pat; + def: Loadxrm_shl_pat; def: Loadxrm_shl_pat; def: Loadxrm_shl_pat; @@ -2199,7 +2224,12 @@ let AddedComplexity = 40 in { def: Loadxrm_shl_pat; } -let AddedComplexity = 20 in { +let AddedComplexity = 30 in { + def: Loadxrm_add_pat; + def: Loadxrm_add_pat; + def: Loadxrm_add_pat; + def: Loadxrm_add_pat; + def: Loadxrm_add_pat; def: Loadxrm_add_pat; def: Loadxrm_add_pat; @@ -2214,12 +2244,13 @@ let AddedComplexity = 20 in { // Absolute address let AddedComplexity = 60 in { + def: Loada_pat; def: Loada_pat; - def: Loada_pat; def: Loada_pat; + def: Loada_pat; def: Loada_pat; - def: Loada_pat; def: Loada_pat; + def: Loada_pat; def: Loada_pat; def: Loada_pat; def: Loada_pat; @@ -2238,6 +2269,12 @@ let AddedComplexity = 60 in { } let AddedComplexity = 30 in { + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; def: Loadam_pat; def: Loadam_pat; @@ -2247,9 +2284,6 @@ let AddedComplexity = 30 in { def: Loadam_pat; def: Loadam_pat; def: Loadam_pat; - - def: Loadam_pat; - def: Loadam_pat; } // GP-relative address @@ -2280,6 +2314,11 @@ let AddedComplexity = 100 in { } let AddedComplexity = 70 in { + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; def: Loadam_pat; def: Loadam_pat; @@ -2291,17 +2330,8 @@ let AddedComplexity = 70 in { def: Loadam_pat; def: Loadam_pat; - def: Loadam_pat; } - -// Sign-extending loads of i1 need to replicate the lowest bit throughout -// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should -// do the trick. -let AddedComplexity = 20 in -def: Pat<(i32 (sextloadi1 I32:$Rs)), - (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; - // Patterns for loads of i1: def: Pat<(i1 (load AddrFI:$fi)), (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; diff --git a/llvm/test/CodeGen/Hexagon/isel-extload-i1.ll b/llvm/test/CodeGen/Hexagon/isel-extload-i1.ll new file mode 100644 index 0000000000000..7c3f73d098476 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel-extload-i1.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define i64 @f0(i32 %a0, i64 %a1, i32 %a2, i32 %a3, i1 zeroext %a4) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r29+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asr(r0,#31) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = sext i1 %a4 to i64 + ret i64 %v0 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+v66,-long-calls" } diff --git a/llvm/test/CodeGen/Hexagon/isel/extload-i1.ll b/llvm/test/CodeGen/Hexagon/isel/extload-i1.ll new file mode 100644 index 0000000000000..def04ee4026c1 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel/extload-i1.ll @@ -0,0 +1,380 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +@array8 = global [128 x i8] zeroinitializer +@array32 = global [128 x i32] zeroinitializer +@global_gp = global i1 false + +; Sign extensions + +define i32 @f0(i1* %a0) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr i1, i1* %a0, i32 1 + %v1 = load i1, i1* %v0 + %v2 = sext i1 %v1 to i32 + ret i32 %v2 +} + +define i32 @f1(i1* %a0, i32 %a1) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+r1<<#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr i1, i1* %a0, i32 %a1 + %v1 = load i1, i1* %v0 + %v2 = sext i1 %v1 to i32 + ret i32 %v2 +} + +define i32 @f2(i32 %a0) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+##array8) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr [128 x i8], [128 x i8]* @array8, i32 0, i32 %a0 + %v1 = bitcast i8* %v0 to i1* + %v2 = load i1, i1* %v1 + %v3 = sext i1 %v2 to i32 + ret i32 %v3 +} + +define i32 @f3(i32 %a0) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0<<#2+##array32) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr [128 x i32], [128 x i32]* @array32, i32 0, i32 %a0 + %v1 = bitcast i32* %v0 to i1* + %v2 = load i1, i1* %v1 + %v3 = sext i1 %v2 to i32 + ret i32 %v3 +} + +define i32 @f4() #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(gp+#global_gp) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = load i1, i1* @global_gp + %v1 = sext i1 %v0 to i32 + ret i32 %v1 +} + +define i32 @f5(i64 %a0, i64 %a1, i64 %a2, i1 signext %a3) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r29+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = sext i1 %a3 to i32 + ret i32 %v0 +} + +define i64 @f6(i1* %a0) #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+#1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asr(r0,#31) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr i1, i1* %a0, i32 1 + %v1 = load i1, i1* %v0 + %v2 = sext i1 %v1 to i64 + ret i64 %v2 +} + +define i64 @f7(i1* %a0, i32 %a1) #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+r1<<#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asr(r0,#31) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr i1, i1* %a0, i32 %a1 + %v1 = load i1, i1* %v0 + %v2 = sext i1 %v1 to i64 + ret i64 %v2 +} + +define i64 @f8(i32 %a0) #0 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+##array8) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asr(r0,#31) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr [128 x i8], [128 x i8]* @array8, i32 0, i32 %a0 + %v1 = bitcast i8* %v0 to i1* + %v2 = load i1, i1* %v1 + %v3 = sext i1 %v2 to i64 + ret i64 %v3 +} + +define i64 @f9(i32 %a0) #0 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0<<#2+##array32) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asr(r0,#31) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr [128 x i32], [128 x i32]* @array32, i32 0, i32 %a0 + %v1 = bitcast i32* %v0 to i1* + %v2 = load i1, i1* %v1 + %v3 = sext i1 %v2 to i64 + ret i64 %v3 +} + +define i64 @f10() #0 { +; CHECK-LABEL: f10: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(gp+#global_gp) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asr(r0,#31) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = load i1, i1* @global_gp + %v1 = sext i1 %v0 to i64 + ret i64 %v1 +} + +define i64 @f11(i64 %a0, i64 %a1, i64 %a2, i1 signext %a3) #0 { +; CHECK-LABEL: f11: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r29+#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = sub(#0,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = asr(r0,#31) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = sext i1 %a3 to i64 + ret i64 %v0 +} + +; Zero-extensions + +define i32 @f12(i1* %a0) #0 { +; CHECK-LABEL: f12: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = memub(r0+#1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = getelementptr i1, i1* %a0, i32 1 + %v1 = load i1, i1* %v0 + %v2 = zext i1 %v1 to i32 + ret i32 %v2 +} + +define i32 @f13(i1* %a0, i32 %a1) #0 { +; CHECK-LABEL: f13: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(r0+r1<<#0) +; CHECK-NEXT: } + %v0 = getelementptr i1, i1* %a0, i32 %a1 + %v1 = load i1, i1* %v0 + %v2 = zext i1 %v1 to i32 + ret i32 %v2 +} + +define i32 @f14(i32 %a0) #0 { +; CHECK-LABEL: f14: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(r0+##array8) +; CHECK-NEXT: } + %v0 = getelementptr [128 x i8], [128 x i8]* @array8, i32 0, i32 %a0 + %v1 = bitcast i8* %v0 to i1* + %v2 = load i1, i1* %v1 + %v3 = zext i1 %v2 to i32 + ret i32 %v3 +} + +define i32 @f15(i32 %a0) #0 { +; CHECK-LABEL: f15: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(r0<<#2+##array32) +; CHECK-NEXT: } + %v0 = getelementptr [128 x i32], [128 x i32]* @array32, i32 0, i32 %a0 + %v1 = bitcast i32* %v0 to i1* + %v2 = load i1, i1* %v1 + %v3 = zext i1 %v2 to i32 + ret i32 %v3 +} + +define i32 @f16() #0 { +; CHECK-LABEL: f16: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(gp+#global_gp) +; CHECK-NEXT: } + %v0 = load i1, i1* @global_gp + %v1 = zext i1 %v0 to i32 + ret i32 %v1 +} + +define i32 @f17(i64 %a0, i64 %a1, i64 %a2, i1 zeroext %a3) #0 { +; CHECK-LABEL: f17: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(r29+#0) +; CHECK-NEXT: } + %v0 = zext i1 %a3 to i32 + ret i32 %v0 +} + +define i64 @f18(i1* %a0) #0 { +; CHECK-LABEL: f18: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r1 = #0 +; CHECK-NEXT: r0 = memub(r0+#1) +; CHECK-NEXT: } + %v0 = getelementptr i1, i1* %a0, i32 1 + %v1 = load i1, i1* %v0 + %v2 = zext i1 %v1 to i64 + ret i64 %v2 +} + +define i64 @f19(i1* %a0, i32 %a1) #0 { +; CHECK-LABEL: f19: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(r0+r1<<#0) +; CHECK-NEXT: } + %v0 = getelementptr i1, i1* %a0, i32 %a1 + %v1 = load i1, i1* %v0 + %v2 = zext i1 %v1 to i64 + ret i64 %v2 +} + +define i64 @f20(i32 %a0) #0 { +; CHECK-LABEL: f20: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(r0+##array8) +; CHECK-NEXT: } + %v0 = getelementptr [128 x i8], [128 x i8]* @array8, i32 0, i32 %a0 + %v1 = bitcast i8* %v0 to i1* + %v2 = load i1, i1* %v1 + %v3 = zext i1 %v2 to i64 + ret i64 %v3 +} + +define i64 @f21(i32 %a0) #0 { +; CHECK-LABEL: f21: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(r0<<#2+##array32) +; CHECK-NEXT: } + %v0 = getelementptr [128 x i32], [128 x i32]* @array32, i32 0, i32 %a0 + %v1 = bitcast i32* %v0 to i1* + %v2 = load i1, i1* %v1 + %v3 = zext i1 %v2 to i64 + ret i64 %v3 +} + +define i64 @f22() #0 { +; CHECK-LABEL: f22: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(gp+#global_gp) +; CHECK-NEXT: } + %v0 = load i1, i1* @global_gp + %v1 = zext i1 %v0 to i64 + ret i64 %v1 +} + +define i64 @f23(i64 %a0, i64 %a1, i64 %a2, i1 signext %a3) #0 { +; CHECK-LABEL: f23: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: r0 = memub(r29+#0) +; CHECK-NEXT: } + %v0 = zext i1 %a3 to i64 + ret i64 %v0 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv66" } From c7676d9993183f7041b1d79cc672ff14961c8777 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Mon, 28 Jun 2021 10:17:32 -0700 Subject: [PATCH 078/619] [mlir][tosa] Update Tosa conv verifier to handle IntegerType input Input/output types can be integers, which represent a quantized convolution. Update verifier to expect this behavior. Reviewed By: sjarus Differential Revision: https://reviews.llvm.org/D104949 --- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index e50dab12aaf14..83a89f3af80d6 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -121,19 +121,20 @@ static LogicalResult verifyConvOp(T op) { if (!inputType || !weightType) return failure(); - auto inputQType = - inputType.getElementType().template isa(); - auto weightQType = - weightType.getElementType().template isa(); + auto inputEType = inputType.getElementType(); + auto weightEType = weightType.getElementType(); + + bool inputIsQuant = !inputEType.template isa(); + bool weightIsQuant = !weightEType.template isa(); // Either both must be quantized or both unquantized. - if (inputQType != weightQType) + if (inputIsQuant != weightIsQuant) return failure(); // Quantized type must have constructed the quantizationattr, and unquantized // types should not have a quantizationattr. - if ((inputQType && !op.quantization_info()) || - (!inputQType && op.quantization_info())) + if ((inputIsQuant && !op.quantization_info()) || + (!inputIsQuant && op.quantization_info())) return failure(); return success(); From a4aa705d52e818cf526f5e41cce8e719befd97a6 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 28 Jun 2021 10:38:18 -0700 Subject: [PATCH 079/619] [lldb] Remove spurious lldb/lldb subdirectory Remove the lldb/lldb subdirectory which I must have accidentally created when applying a patch with the wrong prefix number. Thank you Nico Weber for pointing this out! --- lldb/lldb/test/Shell/Breakpoint/breakpoint-command.test | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 lldb/lldb/test/Shell/Breakpoint/breakpoint-command.test diff --git a/lldb/lldb/test/Shell/Breakpoint/breakpoint-command.test b/lldb/lldb/test/Shell/Breakpoint/breakpoint-command.test deleted file mode 100644 index 6104713cde5ae..0000000000000 --- a/lldb/lldb/test/Shell/Breakpoint/breakpoint-command.test +++ /dev/null @@ -1,5 +0,0 @@ -# RUN: %build %p/Inputs/dummy-target.c -o %t.out -# RUN: %lldb %t.out -o 'b main' -o 'break command add 1 -o "script print(95000 + 126)"' -o 'r' - -# CHECK: 95125 -# CHECK-NOT: 95126 From 18c3c7784975700ae463bb461487d46e74324a66 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Mon, 28 Jun 2021 13:45:12 -0400 Subject: [PATCH 080/619] Add papers adopted by the C++ committee in the June 2021 plenary --- clang/www/cxx_status.html | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index b7f2501cbc534..8de688189e297 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1283,6 +1283,41 @@

C++2b implementation status

P2266R1 Clang 13 + + if consteval + P1938R3 + No + + + Allow duplicate attributes + P2156R1 + Clang 13 + + + Narrowing contextual conversions to bool + P1401R5 + No + + + Trimming whitespaces before line splicing + P2223R2 + Yes + + + Make declaration order layout mandated + P1874R4 + Yes + + + C++ identifier syntax using UAX 31 + P1949R7 + No + + + Mixed string literal concatenation + P2201R1 + Yes + From 2ab27758d5c5e7985cee1a2651bc0a9ee4c2d8c9 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Mon, 28 Jun 2021 13:52:30 -0400 Subject: [PATCH 081/619] Revert "[MLIR][SCF] Inline ExecuteRegion if parent can contain multiple blocks" This reverts commit 5d6240b77e7e7199fcf0e89f6dd2f7eea3596a3c. The commit was mistakenly landed without a PR approval, this will be reverted now and resubmitted. --- mlir/include/mlir/Dialect/SCF/SCFOps.td | 6 ++ mlir/lib/Dialect/SCF/SCF.cpp | 77 +------------------------ mlir/test/Dialect/SCF/canonicalize.mlir | 67 --------------------- 3 files changed, 9 insertions(+), 141 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index 9f039b6fcda68..c10441f59bd55 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -108,8 +108,14 @@ def ExecuteRegionOp : SCF_Op<"execute_region"> { let regions = (region AnyRegion:$region); + // TODO: If the parent is a func like op (which would be the case if all other + // ops are from the std dialect), the inliner logic could be readily used to + // inline. let hasCanonicalizer = 1; + // TODO: can fold if it returns a constant. + // TODO: Single block execute_region ops can be readily inlined irrespective + // of which op is a parent. Add a fold for this. let hasFolder = 0; } diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 38760ca4050d3..99d2386ced1b1 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -143,94 +143,23 @@ static LogicalResult verify(ExecuteRegionOp op) { // // "test.foo"() : () -> () // %x = "test.val"() : () -> i64 -// "test.bar"(%x) : (i64) -> () +// "test.bar"(%v) : (i64) -> () // struct SingleBlockExecuteInliner : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ExecuteRegionOp op, PatternRewriter &rewriter) const override { - if (!llvm::hasSingleElement(op.region())) + if (op.region().getBlocks().size() != 1) return failure(); replaceOpWithRegion(rewriter, op, op.region()); return success(); } }; -// Inline an ExecuteRegionOp if its parent can contain multiple blocks. -// TODO generalize the conditions for operations which can be inlined into. -// func @func_execute_region_elim() { -// "test.foo"() : () -> () -// %v = scf.execute_region -> i64 { -// %c = "test.cmp"() : () -> i1 -// cond_br %c, ^bb2, ^bb3 -// ^bb2: -// %x = "test.val1"() : () -> i64 -// br ^bb4(%x : i64) -// ^bb3: -// %y = "test.val2"() : () -> i64 -// br ^bb4(%y : i64) -// ^bb4(%z : i64): -// scf.yield %z : i64 -// } -// "test.bar"(%v) : (i64) -> () -// return -// } -// -// becomes -// -// func @func_execute_region_elim() { -// "test.foo"() : () -> () -// %c = "test.cmp"() : () -> i1 -// cond_br %c, ^bb1, ^bb2 -// ^bb1: // pred: ^bb0 -// %x = "test.val1"() : () -> i64 -// br ^bb3(%x : i64) -// ^bb2: // pred: ^bb0 -// %y = "test.val2"() : () -> i64 -// br ^bb3(%y : i64) -// ^bb3(%z: i64): // 2 preds: ^bb1, ^bb2 -// "test.bar"(%z) : (i64) -> () -// return -// } -// -struct MultiBlockExecuteInliner : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(ExecuteRegionOp op, - PatternRewriter &rewriter) const override { - if (!isa(op->getParentOp())) - return failure(); - - Block *prevBlock = op->getBlock(); - Block *postBlock = rewriter.splitBlock(prevBlock, op->getIterator()); - rewriter.setInsertionPointToEnd(prevBlock); - - rewriter.create(op.getLoc(), &op.region().front()); - - for (Block &blk : op.region()) { - if (YieldOp yieldOp = dyn_cast(blk.getTerminator())) { - rewriter.setInsertionPoint(yieldOp); - rewriter.create(yieldOp.getLoc(), postBlock, - yieldOp.results()); - rewriter.eraseOp(yieldOp); - } - } - - rewriter.inlineRegionBefore(op.region(), postBlock); - SmallVector blockArgs; - - for (auto res : op.getResults()) - blockArgs.push_back(postBlock->addArgument(res.getType())); - - rewriter.replaceOp(op, blockArgs); - return success(); - } -}; - void ExecuteRegionOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index d1789c6dfde52..8692f2d9705e0 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -948,70 +948,3 @@ func @execute_region_elim() { // CHECK-NEXT: "test.bar"(%[[VAL]]) : (i64) -> () // CHECK-NEXT: } - -// ----- - -// CHECK-LABEL: func @func_execute_region_elim -func @func_execute_region_elim() { - "test.foo"() : () -> () - %v = scf.execute_region -> i64 { - %c = "test.cmp"() : () -> i1 - cond_br %c, ^bb2, ^bb3 - ^bb2: - %x = "test.val1"() : () -> i64 - br ^bb4(%x : i64) - ^bb3: - %y = "test.val2"() : () -> i64 - br ^bb4(%y : i64) - ^bb4(%z : i64): - scf.yield %z : i64 - } - "test.bar"(%v) : (i64) -> () - return -} - -// CHECK: "test.foo" -// CHECK: %[[cmp:.+]] = "test.cmp" -// CHECK: cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]] -// CHECK: ^[[bb1]]: // pred: ^bb0 -// CHECK: %[[x:.+]] = "test.val1" -// CHECK: br ^[[bb3:.+]](%[[x]] : i64) -// CHECK: ^[[bb2]]: // pred: ^bb0 -// CHECK: %[[y:.+]] = "test.val2" -// CHECK: br ^[[bb3]](%[[y:.+]] : i64) -// CHECK: ^[[bb3]](%[[z:.+]]: i64): -// CHECK: "test.bar"(%[[z]]) -// CHECK: return - - -// ----- - -// CHECK-LABEL: func @func_execute_region_elim2 -func @func_execute_region_elim2() { - "test.foo"() : () -> () - %v = scf.execute_region -> i64 { - %c = "test.cmp"() : () -> i1 - cond_br %c, ^bb2, ^bb3 - ^bb2: - %x = "test.val1"() : () -> i64 - scf.yield %x : i64 - ^bb3: - %y = "test.val2"() : () -> i64 - scf.yield %y : i64 - } - "test.bar"(%v) : (i64) -> () - return -} - -// CHECK: "test.foo" -// CHECK: %[[cmp:.+]] = "test.cmp" -// CHECK: cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]] -// CHECK: ^[[bb1]]: // pred: ^bb0 -// CHECK: %[[x:.+]] = "test.val1" -// CHECK: br ^[[bb3:.+]](%[[x]] : i64) -// CHECK: ^[[bb2]]: // pred: ^bb0 -// CHECK: %[[y:.+]] = "test.val2" -// CHECK: br ^[[bb3]](%[[y:.+]] : i64) -// CHECK: ^[[bb3]](%[[z:.+]]: i64): -// CHECK: "test.bar"(%[[z]]) -// CHECK: return From 355541a1b7a5011f8f4ebadc3e23b25c734f9d27 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 28 Jun 2021 18:58:42 +0200 Subject: [PATCH 082/619] [lldb] Avoid using any shell when calling xcrun. When we run `xcrun` we don't have any user input in our command so relying on the user's default shell doesn't make a lot of sense. If the user has set the system shell to a something that isn't supported yet (dash, ash) then we would run into the problem that we don't know how to escape our command string. This patch just avoids using any shell at all as xcrun is always at the same path. Reviewed By: aprantl, JDevlieghere, kastiglione Differential Revision: https://reviews.llvm.org/D104653 --- .../source/Host/macosx/objcxx/HostInfoMacOSX.mm | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm index f822533f1b41a..a0706ec9ff6ae 100644 --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -383,17 +383,22 @@ static void ParseOSVersion(llvm::VersionTuple &version, NSString *Key) { auto xcrun = [](const std::string &sdk, llvm::StringRef developer_dir = "") -> std::string { - std::string xcrun_cmd = "xcrun --show-sdk-path --sdk " + sdk; - if (!developer_dir.empty()) - xcrun_cmd = "/usr/bin/env DEVELOPER_DIR=\"" + developer_dir.str() + - "\" " + xcrun_cmd; + Args args; + if (!developer_dir.empty()) { + args.AppendArgument("/usr/bin/env"); + args.AppendArgument("DEVELOPER_DIR=" + developer_dir.str()); + } + args.AppendArgument("/usr/bin/xcrun"); + args.AppendArgument("--show-sdk-path"); + args.AppendArgument("--sdk"); + args.AppendArgument(sdk); int status = 0; int signo = 0; std::string output_str; lldb_private::Status error = - Host::RunShellCommand(xcrun_cmd, FileSpec(), &status, &signo, - &output_str, std::chrono::seconds(15)); + Host::RunShellCommand(args, FileSpec(), &status, &signo, &output_str, + std::chrono::seconds(15)); // Check that xcrun return something useful. if (status != 0 || output_str.empty()) From 2a60ab76a796637d49bf1c7191f5b5a0c92f81bc Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Mon, 28 Jun 2021 11:34:30 +0100 Subject: [PATCH 083/619] [hwasan] print exact mismatch offset for short granules. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D104463 --- compiler-rt/lib/hwasan/hwasan_report.cpp | 19 +++++++++++++-- .../TestCases/heap-buffer-overflow-into.c | 23 +++++++++++++++---- .../hwasan/TestCases/heap-buffer-overflow.c | 2 ++ .../test/hwasan/TestCases/mem-intrinsics.c | 2 +- 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index 715b4e05992a6..b6f968ea10457 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -630,9 +630,24 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size, Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n", is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag, mem_tag, t->unique_id()); + if (mem_tag < kShadowAlignment) { + tag_t *granule_ptr = reinterpret_cast((untagged_addr + offset) & + ~(kShadowAlignment - 1)); + // If offset is 0, (untagged_addr + offset) is not aligned to granules. + // This is the offset of the leftmost accessed byte within the bad granule. + u8 in_granule_offset = (untagged_addr + offset) & (kShadowAlignment - 1); + // The first mismatch was a short granule that matched the ptr_tag. + if (granule_ptr[kShadowAlignment - 1] == ptr_tag) { + // If the access starts after the end of the short granule, then the first + // bad byte is the first byte of the access; otherwise it is the first + // byte past the end of the short granule + if (mem_tag > in_granule_offset) { + offset += mem_tag - in_granule_offset; + } + } + } if (offset != 0) - Printf("Invalid access starting at offset [%zu, %zu)\n", offset, - Min(access_size, static_cast(offset) + (1 << kShadowScale))); + Printf("Invalid access starting at offset %zu\n", offset); Printf("%s", d.Default()); stack->Print(); diff --git a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow-into.c b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow-into.c index af4256b84db03..8526c81f4cd7d 100644 --- a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow-into.c +++ b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow-into.c @@ -1,5 +1,8 @@ // RUN: %clang_hwasan %s -o %t -// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK +// RUN: not %run %t 5 10 2>&1 | FileCheck %s --check-prefix=CHECK5 +// RUN: not %run %t 7 10 2>&1 | FileCheck %s --check-prefix=CHECK7 +// RUN: not %run %t 8 20 2>&1 | FileCheck %s --check-prefix=CHECK8 +// RUN: not %run %t 32 20 2>&1 | FileCheck %s --check-prefix=CHECK32 // REQUIRES: stable-runtime @@ -10,8 +13,20 @@ int main(int argc, char **argv) { __hwasan_enable_allocator_tagging(); - char *volatile x = (char *)malloc(10); - memset(x + 5, 0, 26); - // CHECK: is located 5 bytes inside 10-byte region + if (argc < 2) { + fprintf(stderr, "Invalid number of arguments."); + abort(); + } + int read_offset = argc < 2 ? 5 : atoi(argv[1]); + int size = argc < 3 ? 10 : atoi(argv[2]); + char *volatile x = (char *)malloc(size); + memset(x + read_offset, 0, 26); + // CHECK5: Invalid access starting at offset 5 + // CHECK5: is located 5 bytes inside 10-byte region + // CHECK7: Invalid access starting at offset 3 + // CHECK7: is located 7 bytes inside 10-byte region + // CHECK8: Invalid access starting at offset 12 + // CHECK8: is located 8 bytes inside 20-byte region + // CHECK32: is located 12 bytes to the right of 20-byte region free(x); } diff --git a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c index 67398141209af..8e8719a7f65c4 100644 --- a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c +++ b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c @@ -52,12 +52,14 @@ int main(int argc, char **argv) { // CHECKM: is located 0 bytes to the right of 1000000-byte region // // CHECK31: tags: [[TAG:..]]/0e (ptr/mem) +// CHECK31-NOT: Invalid access starting at offset // CHECK31: is located 1 bytes to the right of 30-byte region // CHECK31: Memory tags around the buggy address // CHECK31: [0e] // CHECK31: Tags for short granules around the buggy address // CHECK31: {{\[}}[[TAG]]] // +// CHECK20-NOT: Invalid access starting at offset // CHECK20: is located 10 bytes to the right of 20-byte region [0x{{.*}}0,0x{{.*}}4) free(x); } diff --git a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c index 28568c828cea1..44b9fd67cbcc6 100644 --- a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c +++ b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c @@ -23,7 +23,7 @@ int main() { write(STDOUT_FILENO, "recovered\n", 10); // WRITE: ERROR: HWAddressSanitizer: tag-mismatch on address // WRITE: WRITE of size 32 at {{.*}} tags: [[PTR_TAG:..]]/[[MEM_TAG:..]] (ptr/mem) - // WRITE: Invalid access starting at offset [16, 32) + // WRITE: Invalid access starting at offset 16 // WRITE: Memory tags around the buggy address (one tag corresponds to 16 bytes): // WRITE: =>{{.*}}[[PTR_TAG]]{{[[:space:]]\[}}[[MEM_TAG]] // WRITE-NOT: recovered From f85b9d644398767f6b5cb046f952ed7dbd7dfc7a Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 28 Jun 2021 11:02:30 -0700 Subject: [PATCH 084/619] [ObjC][ARC] Ignore operand bundle "clang.arc.attachedcall" on a call if the call's return type is void Instead of trying hard to prevent global optimization passes such as deadargelim from changing the return type to void, just ignore the bundle if the return type is void. clang currently emits calls to @llvm.objc.clang.arc.noop.use, which consumes the function call result, immediately after the function call to prevent changes to the return type, but optimization passes can delete the call to @llvm.objc.clang.arc.noop.use if the function call doesn't return, which enables deadargelim to change the return type. rdar://76671438 Differential Revision: https://reviews.llvm.org/D103062 --- llvm/docs/LangRef.rst | 6 ++++-- llvm/include/llvm/Analysis/ObjCARCUtil.h | 19 ++++++++++++++----- llvm/lib/IR/Verifier.cpp | 6 ++++-- .../Transforms/ObjCARC/contract-rv-attr.ll | 13 +++++++++++++ llvm/test/Verifier/operand-bundles.ll | 7 +++++++ 5 files changed, 42 insertions(+), 9 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1986f232cc3e3..083ece600448f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2406,8 +2406,10 @@ A ``"clang.arc.attachedcall`` operand bundle on a call indicates the call is implicitly followed by a marker instruction and a call to an ObjC runtime function that uses the result of the call. If the argument passed to the operand bundle is 0, ``@objc_retainAutoreleasedReturnValue`` is called. If 1 is passed, -``@objc_unsafeClaimAutoreleasedReturnValue`` is called. A call with this bundle -implicitly uses its return value. +``@objc_unsafeClaimAutoreleasedReturnValue`` is called. The return value of a +call with this bundle is used by a call to ``@llvm.objc.clang.arc.noop.use`` +unless the called function's return type is void, in which case the operand +bundle is ignored. The operand bundle is needed to ensure the call is immediately followed by the marker instruction or the ObjC runtime call in the final output. diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h index 5d04ebadf0851..2566bfbcf61cc 100644 --- a/llvm/include/llvm/Analysis/ObjCARCUtil.h +++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h @@ -31,7 +31,21 @@ getAttachedCallOperandBundleEnum(bool IsRetain) { return IsRetain ? RVOB_Retain : RVOB_Claim; } +inline bool hasAttachedCallOpBundle(const CallBase *CB) { + // Ignore the bundle if the return type is void. Global optimization passes + // can turn the called function's return type to void. That should happen only + // if the call doesn't return and the call to @llvm.objc.clang.arc.noop.use + // no longer consumes the function return or is deleted. In that case, it's + // not necessary to emit the marker instruction or calls to the ARC runtime + // functions. + return !CB->getFunctionType()->getReturnType()->isVoidTy() && + CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall) + .hasValue(); +} + inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) { + assert(hasAttachedCallOpBundle(CB) && + "call doesn't have operand bundle clang_arc_attachedcall"); auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall); if (!B.hasValue()) return false; @@ -39,11 +53,6 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) { getAttachedCallOperandBundleEnum(IsRetain); } -inline bool hasAttachedCallOpBundle(const CallBase *CB) { - return CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall) - .hasValue(); -} - } // end namespace objcarc } // end namespace llvm diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 468c935e3bbf2..24f5d51381803 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3353,9 +3353,11 @@ void Verifier::visitCallBase(CallBase &Call) { } if (FoundAttachedCallBundle) - Assert(FTy->getReturnType()->isPointerTy(), + Assert((FTy->getReturnType()->isPointerTy() || + (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())), "a call with operand bundle \"clang.arc.attachedcall\" must call a " - "function returning a pointer", + "function returning a pointer or a non-returning function that has " + "a void return type", Call); // Verify that each inlinable callsite of a debug-info-bearing function in a diff --git a/llvm/test/Transforms/ObjCARC/contract-rv-attr.ll b/llvm/test/Transforms/ObjCARC/contract-rv-attr.ll index 3a817327c3638..18bc00b62db8d 100644 --- a/llvm/test/Transforms/ObjCARC/contract-rv-attr.ll +++ b/llvm/test/Transforms/ObjCARC/contract-rv-attr.ll @@ -55,9 +55,22 @@ cleanup: ret i8* %retval.0 } +; CHECK-LABEL: define void @test3( +; CHECK: call void @foo2() #[[ATTR1:.*]] [ "clang.arc.attachedcall"(i64 0) ] +; CHECK-NEXT: ret void + +define void @test3() { + call void @foo2() #0 [ "clang.arc.attachedcall"(i64 0) ] + ret void +} + declare i8* @foo() +declare void @foo2() declare i32 @__gxx_personality_v0(...) !llvm.module.flags = !{!0} +; CHECK: attributes #[[ATTR1]] = { noreturn } +attributes #0 = { noreturn } + !0 = !{i32 1, !"clang.arc.retainAutoreleasedReturnValueMarker", !"mov\09fp, fp\09\09// marker for objc_retainAutoreleaseReturnValue"} diff --git a/llvm/test/Verifier/operand-bundles.ll b/llvm/test/Verifier/operand-bundles.ll index 4ef0e647988af..d7d7b4f0f7820 100644 --- a/llvm/test/Verifier/operand-bundles.ll +++ b/llvm/test/Verifier/operand-bundles.ll @@ -4,6 +4,7 @@ declare void @g() declare %0* @foo0() declare i8 @foo1() +declare void @noreturn_func() ; Operand bundles uses are like regular uses, and need to be dominated ; by their defs. @@ -69,9 +70,15 @@ define void @f_clang_arc_attachedcall() { ; CHECK-NEXT: call %0* @foo0() [ "clang.arc.attachedcall"(i64 0), "clang.arc.attachedcall"(i64 0) ] ; CHECK-NEXT: must call a function returning a pointer ; CHECK-NEXT: call i8 @foo1() [ "clang.arc.attachedcall"(i64 0) ] +; CHECK-NEXT: or a non-returning function +; CHECK-NEXT: call void @g() [ "clang.arc.attachedcall"(i64 0) ] call %0* @foo0() [ "clang.arc.attachedcall"(i64 0) ] call %0* @foo0() [ "clang.arc.attachedcall"(i64 0), "clang.arc.attachedcall"(i64 0) ] call i8 @foo1() [ "clang.arc.attachedcall"(i64 0) ] + call void @noreturn_func() #0 [ "clang.arc.attachedcall"(i64 0) ] + call void @g() [ "clang.arc.attachedcall"(i64 0) ] ret void } + +attributes #0 = { noreturn } From 4f5ebfdcd6c9d459e262d1815f49a45bad3cbcfc Mon Sep 17 00:00:00 2001 From: Nancy Wang Date: Mon, 28 Jun 2021 14:04:02 -0400 Subject: [PATCH 085/619] [SystemZ][z/OS][libcxx]: fix libcxx test cases failed on ebcdic mode on z/OS This patch is to fix 2 libcxx test cases, test cases assumed 'a' > 'A' which is not case in z/OS platform on ebcdic mode, modified test cases to compare between upper letters or lower letters, or digits so ordering will be true for all platform. Differential Revision: https://reviews.llvm.org/D104748 --- .../lt.pass.cpp | 18 +++++++++--------- .../lt.pass.cpp | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/lt.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/lt.pass.cpp index fa14e666e9e90..81a8574eac8bd 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/lt.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/lt.pass.cpp @@ -19,18 +19,18 @@ int main(int, char**) { - assert( std::char_traits::lt('\0', 'A')); - assert(!std::char_traits::lt('A', '\0')); + assert(std::char_traits::lt('\0', 'A') == ('\0' < 'A')); + assert(std::char_traits::lt('A', '\0') == ('A' < '\0')); - assert(!std::char_traits::lt('a', 'a')); - assert( std::char_traits::lt('A', 'a')); - assert(!std::char_traits::lt('a', 'A')); + assert(std::char_traits::lt('a', 'a') == ('a' < 'a')); + assert(std::char_traits::lt('A', 'a') == ('A' < 'a')); + assert(std::char_traits::lt('a', 'A') == ('a' < 'A')); - assert( std::char_traits::lt('a', 'z')); - assert( std::char_traits::lt('A', 'Z')); + assert(std::char_traits::lt('a', 'z') == ('a' < 'z')); + assert(std::char_traits::lt('A', 'Z') == ('A' < 'Z')); - assert( std::char_traits::lt(' ', 'A')); - assert( std::char_traits::lt('A', '~')); + assert(std::char_traits::lt(' ', 'A') == (' ' < 'A')); + assert(std::char_traits::lt('A', '~') == ('A' < '~')); return 0; } diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp index 15a16b0cd92da..5664692addb41 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp @@ -19,8 +19,18 @@ int main(int, char**) { - assert(!std::char_traits::lt(L'a', L'a')); - assert( std::char_traits::lt(L'A', L'a')); + assert(std::char_traits::lt(L'\0', L'A') == (L'\0' < L'A')); + assert(std::char_traits::lt(L'A', L'\0') == (L'A' < L'\0')); + + assert(std::char_traits::lt(L'a', L'a') == (L'a' < L'a')); + assert(std::char_traits::lt(L'A', L'a') == (L'A' < L'a')); + assert(std::char_traits::lt(L'a', L'A') == (L'a' < L'A')); + + assert(std::char_traits::lt(L'a', L'z') == (L'a' < L'z')); + assert(std::char_traits::lt(L'A', L'Z') == (L'A' < L'Z')); + + assert(std::char_traits::lt(L' ', L'A') == (L' ' < L'A')); + assert(std::char_traits::lt(L'A', L'~') == (L'A' < L'~')); return 0; } From 57e53f013087d68305fe278aca0a92efc9b0e899 Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Fri, 25 Jun 2021 11:28:30 -0700 Subject: [PATCH 086/619] [flang] Fix conformability for intrinsic procedures There are situations where the arguments of intrinsics must be conformable, which is defined in section 3.36. This means they must have "the same shape, or one being an array and the other being scalar". But the check we were actually making was that their ranks were the same. This change fixes that and adds a test for the UNPACK intrinsic, where the FIELD argument "shall be conformable with MASK". Differential Revision: https://reviews.llvm.org/D104936 --- flang/lib/Evaluate/intrinsics.cpp | 19 +++++++++++++++++-- flang/test/Semantics/unpack.f90 | 15 +++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 flang/test/Semantics/unpack.f90 diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index c8d8b02d58abc..5e305055b6913 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -1355,6 +1355,7 @@ std::optional IntrinsicInterface::Match( // Check the ranks of the arguments against the intrinsic's interface. const ActualArgument *arrayArg{nullptr}; + const char *arrayArgName{nullptr}; const ActualArgument *knownArg{nullptr}; std::optional shapeArgSize; int elementalRank{0}; @@ -1411,6 +1412,7 @@ std::optional IntrinsicInterface::Match( argOk = rank > 0; if (!arrayArg) { arrayArg = arg; + arrayArgName = d.keyword; } else { argOk &= rank == arrayArg->Rank(); } @@ -1424,9 +1426,22 @@ std::optional IntrinsicInterface::Match( case Rank::anyOrAssumedRank: argOk = true; break; - case Rank::conformable: + case Rank::conformable: // arg must be conformable with previous arrayArg CHECK(arrayArg); - argOk = rank == 0 || rank == arrayArg->Rank(); + CHECK(arrayArgName); + if (const std::optional &arrayArgShape{ + GetShape(context, *arrayArg)}) { + if (const std::optional &argShape{GetShape(context, *arg)}) { + std::string arrayArgMsg{"'"}; + arrayArgMsg = arrayArgMsg + arrayArgName + "='" + " argument"; + std::string argMsg{"'"}; + argMsg = argMsg + d.keyword + "='" + " argument"; + CheckConformance(context.messages(), *arrayArgShape, *argShape, + CheckConformanceFlags::RightScalarExpandable, + arrayArgMsg.c_str(), argMsg.c_str()); + } + } + argOk = true; // Avoid an additional error message break; case Rank::dimReduced: case Rank::dimRemovedOrScalar: diff --git a/flang/test/Semantics/unpack.f90 b/flang/test/Semantics/unpack.f90 new file mode 100644 index 0000000000000..d624f9c2e38a1 --- /dev/null +++ b/flang/test/Semantics/unpack.f90 @@ -0,0 +1,15 @@ +! RUN: %S/test_errors.sh %s %t %flang_fc1 +! UNPACK() intrinsic function error tests +program test_unpack + integer, dimension(2) :: vector = [343, 512] + logical, dimension(2, 2) :: mask = & + reshape([.true., .false., .true., .false.], [2, 2]) + integer, dimension(2, 2) :: field = reshape([1, 2, 3, 4, 5, 6], [2, 2]) + integer, dimension(2, 1) :: bad_field = reshape([1, 2], [2, 1]) + integer :: scalar_field + integer, dimension(2, 2) :: result + result = unpack(vector, mask, field) + !ERROR: Dimension 2 of 'mask=' argument has extent 2, but 'field=' argument has extent 1 + result = unpack(vector, mask, bad_field) + result = unpack(vector, mask, scalar_field) +end program From ad6bee87e6b78881223ebd71e52e5a336ef2a65c Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 28 Jun 2021 14:20:45 -0400 Subject: [PATCH 087/619] [libc++] NFCI: Remove unused Lit parameter sanitizer_library --- libcxx/test/configs/legacy.cfg.in | 1 - libcxxabi/test/lit.site.cfg.in | 1 - 2 files changed, 2 deletions(-) diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index 9e501a68c3531..f9737e85e0dc6 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -18,7 +18,6 @@ config.enable_shared = @LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXX@ config.enable_32bit = @LIBCXX_BUILD_32_BITS@ config.cxx_abi = "@LIBCXX_CXX_ABI_LIBNAME@" config.use_sanitizer = "@LLVM_USE_SANITIZER@" -config.sanitizer_library = "@LIBCXX_SANITIZER_LIBRARY@" config.configuration_variant = "@LIBCXX_LIT_VARIANT@" config.host_triple = "@LLVM_HOST_TRIPLE@" if "@TARGET_TRIPLE@": diff --git a/libcxxabi/test/lit.site.cfg.in b/libcxxabi/test/lit.site.cfg.in index 425b9f2c47618..6c4e944de556d 100644 --- a/libcxxabi/test/lit.site.cfg.in +++ b/libcxxabi/test/lit.site.cfg.in @@ -17,7 +17,6 @@ config.llvm_unwinder = @LIBCXXABI_USE_LLVM_UNWINDER@ config.builtins_library = "@LIBCXXABI_BUILTINS_LIBRARY@" config.enable_threads = @LIBCXXABI_ENABLE_THREADS@ config.use_sanitizer = "@LLVM_USE_SANITIZER@" -config.sanitizer_library = "@LIBCXXABI_SANITIZER_LIBRARY@" config.enable_32bit = @LIBCXXABI_BUILD_32_BITS@ config.target_info = "@LIBCXXABI_TARGET_INFO@" config.executor = "@LIBCXXABI_EXECUTOR@" From 43fadefb0e77c56de7637c391cf98cf709b27095 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Fri, 25 Jun 2021 10:40:08 -0700 Subject: [PATCH 088/619] [flang] Implement user-defined derived type runtime I/O With derived type description tables now available to the runtime library, it is possible to implement the concept of "child" I/O statements in the runtime and use them to convert instances of derived type I/O data transfers into calls to user-defined subroutines when they have been specified for a type. (See Fortran 2018, subclauses 12.6.4.8 & 13.7.6). - Support formatted, list-directed, and NAMELIST transfers to internal parent units; support these, and unformatted transfers, for external parent units. - Support nested child defined derived type I/O. - Parse DT'foo'(v-list) FORMAT data edit descriptors and passes their strings &/or v-list values as arguments to the defined formatted I/O routines. - Fix problems with this feature encountered in semantics and FORMAT valiation during development and end-to-end testing. - Convert typeInfo::SpecialBinding from a struct to a class after adding a member function. Differential Revision: https://reviews.llvm.org/D104930 --- flang/include/flang/Common/format.h | 10 +- flang/lib/Semantics/check-declarations.cpp | 10 +- flang/runtime/CMakeLists.txt | 1 + flang/runtime/derived.cpp | 14 +- flang/runtime/descriptor-io.cpp | 106 +++++++++++ flang/runtime/descriptor-io.h | 168 ++++++++++------- flang/runtime/format-implementation.h | 87 +++++++-- flang/runtime/format.cpp | 46 +---- flang/runtime/format.h | 29 ++- flang/runtime/io-api.cpp | 126 ++++++++----- flang/runtime/io-error.cpp | 8 + flang/runtime/io-error.h | 5 + flang/runtime/io-stmt.cpp | 205 ++++++++++++++++++--- flang/runtime/io-stmt.h | 156 +++++++++++++--- flang/runtime/tools.cpp | 4 +- flang/runtime/type-info.cpp | 68 ++++++- flang/runtime/type-info.h | 178 ++++++++++-------- flang/runtime/unit-map.cpp | 1 + flang/runtime/unit.cpp | 46 ++++- flang/runtime/unit.h | 57 +++++- flang/test/Semantics/typeinfo01.f90 | 2 +- 21 files changed, 984 insertions(+), 343 deletions(-) create mode 100644 flang/runtime/descriptor-io.cpp diff --git a/flang/include/flang/Common/format.h b/flang/include/flang/Common/format.h index 99b8cbe41d7cf..e38ea6b0dfedf 100644 --- a/flang/include/flang/Common/format.h +++ b/flang/include/flang/Common/format.h @@ -136,11 +136,11 @@ template class FormatValidator { const CHAR *cursor_{}; // current location in format_ const CHAR *laCursor_{}; // lookahead cursor Token token_{}; // current token + TokenKind previousTokenKind_{TokenKind::None}; int64_t integerValue_{-1}; // value of UnsignedInteger token Token knrToken_{}; // k, n, or r UnsignedInteger token int64_t knrValue_{-1}; // -1 ==> not present int64_t wValue_{-1}; - bool previousTokenWasInt_{false}; char argString_[3]{}; // 1-2 character msg arg; usually edit descriptor name bool formatHasErrors_{false}; bool unterminatedFormatError_{false}; @@ -179,7 +179,7 @@ template void FormatValidator::NextToken() { // At entry, cursor_ points before the start of the next token. // At exit, cursor_ points to last CHAR of token_. - previousTokenWasInt_ = token_.kind() == TokenKind::UnsignedInteger; + previousTokenKind_ = token_.kind(); CHAR c{NextChar()}; token_.set_kind(TokenKind::None); token_.set_offset(cursor_ - format_); @@ -416,7 +416,8 @@ template void FormatValidator::NextToken() { } } SetLength(); - if (stmt_ == IoStmtKind::Read) { // 13.3.2p6 + if (stmt_ == IoStmtKind::Read && + previousTokenKind_ != TokenKind::DT) { // 13.3.2p6 ReportError("String edit descriptor in READ format expression"); } else if (token_.kind() != TokenKind::String) { ReportError("Unterminated string"); @@ -829,7 +830,8 @@ template bool FormatValidator::Check() { // Possible first token of the next format item; token not yet processed. if (commaRequired) { const char *s{"Expected ',' or ')' in format expression"}; // C1302 - if (previousTokenWasInt_ && itemsWithLeadingInts_.test(token_.kind())) { + if (previousTokenKind_ == TokenKind::UnsignedInteger && + itemsWithLeadingInts_.test(token_.kind())) { ReportError(s); } else { ReportWarning(s); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index 5d063f14499a3..b57d19b8a62e5 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -1797,9 +1797,15 @@ void CheckHelper::CheckAlreadySeenDefinedIo(const DerivedTypeSpec *derivedType, void CheckHelper::CheckDioDummyIsDerived( const Symbol &subp, const Symbol &arg, GenericKind::DefinedIo ioKind) { if (const DeclTypeSpec * type{arg.GetType()}) { - const DerivedTypeSpec *derivedType{type->AsDerived()}; - if (derivedType) { + if (const DerivedTypeSpec * derivedType{type->AsDerived()}) { CheckAlreadySeenDefinedIo(derivedType, ioKind, subp); + bool isPolymorphic{type->IsPolymorphic()}; + if (isPolymorphic != IsExtensibleType(derivedType)) { + messages_.Say(arg.name(), + "Dummy argument '%s' of a defined input/output procedure must be %s when the derived type is %s"_err_en_US, + arg.name(), isPolymorphic ? "TYPE()" : "CLASS()", + isPolymorphic ? "not extensible" : "extensible"); + } } else { messages_.Say(arg.name(), "Dummy argument '%s' of a defined input/output procedure must have a" diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt index 5f4bbc73c23d2..1f7e3d14728a4 100644 --- a/flang/runtime/CMakeLists.txt +++ b/flang/runtime/CMakeLists.txt @@ -40,6 +40,7 @@ add_flang_library(FortranRuntime connection.cpp derived.cpp descriptor.cpp + descriptor-io.cpp dot-product.cpp edit-input.cpp edit-output.cpp diff --git a/flang/runtime/derived.cpp b/flang/runtime/derived.cpp index ef4bddc8a4669..4875ef2a4bc57 100644 --- a/flang/runtime/derived.cpp +++ b/flang/runtime/derived.cpp @@ -20,9 +20,9 @@ static const typeInfo::SpecialBinding *FindFinal( for (std::size_t j{0}; j < totalSpecialBindings; ++j) { const auto &special{ *specialDesc.ZeroBasedIndexedElement(j)}; - switch (special.which) { + switch (special.which()) { case typeInfo::SpecialBinding::Which::Final: - if (special.rank == rank) { + if (special.rank() == rank) { return &special; } break; @@ -40,20 +40,20 @@ static const typeInfo::SpecialBinding *FindFinal( static void CallFinalSubroutine( const Descriptor &descriptor, const typeInfo::DerivedType &derived) { if (const auto *special{FindFinal(derived, descriptor.rank())}) { - if (special->which == typeInfo::SpecialBinding::Which::ElementalFinal) { + if (special->which() == typeInfo::SpecialBinding::Which::ElementalFinal) { std::size_t byteStride{descriptor.ElementBytes()}; - auto p{reinterpret_cast(special->proc)}; + auto *p{special->GetProc()}; // Finalizable objects must be contiguous. std::size_t elements{descriptor.Elements()}; for (std::size_t j{0}; j < elements; ++j) { p(descriptor.OffsetElement(j * byteStride)); } - } else if (special->isArgDescriptorSet & 1) { - auto p{reinterpret_cast(special->proc)}; + } else if (special->IsArgDescriptor(0)) { + auto *p{special->GetProc()}; p(descriptor); } else { // Finalizable objects must be contiguous. - auto p{reinterpret_cast(special->proc)}; + auto *p{special->GetProc()}; p(descriptor.OffsetElement()); } } diff --git a/flang/runtime/descriptor-io.cpp b/flang/runtime/descriptor-io.cpp new file mode 100644 index 0000000000000..2e552b7c5228e --- /dev/null +++ b/flang/runtime/descriptor-io.cpp @@ -0,0 +1,106 @@ +//===-- runtime/descriptor-io.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "descriptor-io.h" + +namespace Fortran::runtime::io::descr { + +// User-defined derived type formatted I/O (maybe) +std::optional DefinedFormattedIo(IoStatementState &io, + const Descriptor &descriptor, const typeInfo::SpecialBinding &special) { + std::optional peek{io.GetNextDataEdit(0 /*to peek at it*/)}; + if (peek && + (peek->descriptor == DataEdit::DefinedDerivedType || + peek->descriptor == DataEdit::ListDirected)) { + // User-defined derived type formatting + IoErrorHandler &handler{io.GetIoErrorHandler()}; + DataEdit edit{*io.GetNextDataEdit()}; // consume it this time + RUNTIME_CHECK(handler, edit.descriptor == peek->descriptor); + char ioType[2 + edit.maxIoTypeChars]; + auto ioTypeLen{std::size_t{2} /*"DT"*/ + edit.ioTypeChars}; + if (edit.descriptor == DataEdit::DefinedDerivedType) { + ioType[0] = 'D'; + ioType[1] = 'T'; + std::memcpy(ioType + 2, edit.ioType, edit.ioTypeChars); + } else { + std::strcpy( + ioType, io.mutableModes().inNamelist ? "NAMELIST" : "LISTDIRECTED"); + ioTypeLen = std::strlen(ioType); + } + StaticDescriptor<0, true> statDesc; + Descriptor &vListDesc{statDesc.descriptor()}; + vListDesc.Establish(TypeCategory::Integer, sizeof(int), nullptr, 1); + vListDesc.set_base_addr(edit.vList); + vListDesc.GetDimension(0).SetBounds(1, edit.vListEntries); + vListDesc.GetDimension(0).SetByteStride( + static_cast(sizeof(int))); + ExternalFileUnit *actualExternal{io.GetExternalFileUnit()}; + ExternalFileUnit *external{actualExternal}; + if (!external) { + // Create a new unit to service defined I/O for an + // internal I/O parent. + external = &ExternalFileUnit::NewUnit(handler, true); + } + ChildIo &child{external->PushChildIo(io)}; + int unit{external->unitNumber()}; + int ioStat{IostatOk}; + char ioMsg[100]; + if (special.IsArgDescriptor(0)) { + auto *p{special.GetProc()}; + p(descriptor, unit, ioType, vListDesc, ioStat, ioMsg, ioTypeLen, + sizeof ioMsg); + } else { + auto *p{special.GetProc()}; + p(descriptor.raw().base_addr, unit, ioType, vListDesc, ioStat, ioMsg, + ioTypeLen, sizeof ioMsg); + } + handler.Forward(ioStat, ioMsg, sizeof ioMsg); + external->PopChildIo(child); + if (!actualExternal) { + // Close unit created for internal I/O above. + auto *closing{external->LookUpForClose(external->unitNumber())}; + RUNTIME_CHECK(handler, external == closing); + external->DestroyClosed(); + } + return handler.GetIoStat() == IostatOk; + } else { + // There's a user-defined I/O subroutine, but there's a FORMAT present and + // it does not have a DT data edit descriptor, so apply default formatting + // to the components of the derived type as usual. + return std::nullopt; + } +} + +// User-defined derived type unformatted I/O +bool DefinedUnformattedIo(IoStatementState &io, const Descriptor &descriptor, + const typeInfo::SpecialBinding &special) { + // Unformatted I/O must have an external unit (or child thereof). + IoErrorHandler &handler{io.GetIoErrorHandler()}; + ExternalFileUnit *external{io.GetExternalFileUnit()}; + RUNTIME_CHECK(handler, external != nullptr); + ChildIo &child{external->PushChildIo(io)}; + int unit{external->unitNumber()}; + int ioStat{IostatOk}; + char ioMsg[100]; + if (special.IsArgDescriptor(0)) { + auto *p{special.GetProc()}; + p(descriptor, unit, ioStat, ioMsg, sizeof ioMsg); + } else { + auto *p{special.GetProc()}; + p(descriptor.raw().base_addr, unit, ioStat, ioMsg, sizeof ioMsg); + } + handler.Forward(ioStat, ioMsg, sizeof ioMsg); + external->PopChildIo(child); + return handler.GetIoStat() == IostatOk; +} + +} // namespace Fortran::runtime::io::descr diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h index 09d068612325b..2ebb449e46d11 100644 --- a/flang/runtime/descriptor-io.h +++ b/flang/runtime/descriptor-io.h @@ -10,6 +10,9 @@ #define FORTRAN_RUNTIME_DESCRIPTOR_IO_H_ // Implementation of I/O data list item transfers based on descriptors. +// (All I/O items come through here so that the code is exercised for test; +// some scalar I/O data transfer APIs could be changed to bypass their use +// of descriptors in the future for better efficiency.) #include "cpp-type.h" #include "descriptor.h" @@ -18,6 +21,7 @@ #include "io-stmt.h" #include "terminator.h" #include "type-info.h" +#include "unit.h" #include "flang/Common/uint128.h" namespace Fortran::runtime::io::descr { @@ -243,92 +247,130 @@ static bool DefaultFormattedComponentIO(IoStatementState &io, } } +std::optional DefinedFormattedIo( + IoStatementState &, const Descriptor &, const typeInfo::SpecialBinding &); + template static bool FormattedDerivedTypeIO( IoStatementState &io, const Descriptor &descriptor) { - Terminator &terminator{io.GetIoErrorHandler()}; + IoErrorHandler &handler{io.GetIoErrorHandler()}; + // Derived type information must be present for formatted I/O. const DescriptorAddendum *addendum{descriptor.Addendum()}; - RUNTIME_CHECK(terminator, addendum != nullptr); + RUNTIME_CHECK(handler, addendum != nullptr); const typeInfo::DerivedType *type{addendum->derivedType()}; - RUNTIME_CHECK(terminator, type != nullptr); - if (false) { - // TODO: user-defined derived type formatted I/O - } else { - // Default derived type formatting - const Descriptor &compArray{type->component()}; - RUNTIME_CHECK(terminator, compArray.rank() == 1); - std::size_t numComponents{compArray.Elements()}; - std::size_t numElements{descriptor.Elements()}; - SubscriptValue subscripts[maxRank]; - descriptor.GetLowerBounds(subscripts); - for (std::size_t j{0}; j < numElements; - ++j, descriptor.IncrementSubscripts(subscripts)) { - SubscriptValue at[maxRank]; - compArray.GetLowerBounds(at); - for (std::size_t k{0}; k < numComponents; - ++k, compArray.IncrementSubscripts(at)) { - const typeInfo::Component &component{ - *compArray.Element(at)}; - if (!DefaultFormattedComponentIO( - io, component, descriptor, subscripts, terminator)) { - return false; - } + RUNTIME_CHECK(handler, type != nullptr); + if (const typeInfo::SpecialBinding * + special{type->FindSpecialBinding(DIR == Direction::Input + ? typeInfo::SpecialBinding::Which::ReadFormatted + : typeInfo::SpecialBinding::Which::WriteFormatted)}) { + if (std::optional wasDefined{ + DefinedFormattedIo(io, descriptor, *special)}) { + return *wasDefined; // user-defined I/O was applied + } + } + // Default componentwise derived type formatting + const Descriptor &compArray{type->component()}; + RUNTIME_CHECK(handler, compArray.rank() == 1); + std::size_t numComponents{compArray.Elements()}; + std::size_t numElements{descriptor.Elements()}; + SubscriptValue subscripts[maxRank]; + descriptor.GetLowerBounds(subscripts); + for (std::size_t j{0}; j < numElements; + ++j, descriptor.IncrementSubscripts(subscripts)) { + SubscriptValue at[maxRank]; + compArray.GetLowerBounds(at); + for (std::size_t k{0}; k < numComponents; + ++k, compArray.IncrementSubscripts(at)) { + const typeInfo::Component &component{ + *compArray.Element(at)}; + if (!DefaultFormattedComponentIO( + io, component, descriptor, subscripts, handler)) { + return false; } } } return true; } +bool DefinedUnformattedIo( + IoStatementState &, const Descriptor &, const typeInfo::SpecialBinding &); + +// Unformatted I/O template -static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { - if (!io.get_if>()) { - io.GetIoErrorHandler().Crash( - "DescriptorIO() called for wrong I/O direction"); - return false; - } - if constexpr (DIR == Direction::Input) { - if (!io.BeginReadingRecord()) { - return false; - } - } - if (auto *unf{io.get_if>()}) { +static bool UnformattedDescriptorIO( + IoStatementState &io, const Descriptor &descriptor) { + IoErrorHandler &handler{io.GetIoErrorHandler()}; + const DescriptorAddendum *addendum{descriptor.Addendum()}; + const typeInfo::DerivedType *type{ + addendum ? addendum->derivedType() : nullptr}; + if (const typeInfo::SpecialBinding * + special{type + ? type->FindSpecialBinding(DIR == Direction::Input + ? typeInfo::SpecialBinding::Which::ReadUnformatted + : typeInfo::SpecialBinding::Which::WriteUnformatted) + : nullptr}) { + // User-defined derived type unformatted I/O + return DefinedUnformattedIo(io, descriptor, *special); + } else { + // Regular derived type unformatted I/O, not user-defined + auto *externalUnf{io.get_if>()}; + auto *childUnf{io.get_if>()}; + RUNTIME_CHECK(handler, externalUnf != nullptr || childUnf != nullptr); std::size_t elementBytes{descriptor.ElementBytes()}; + std::size_t numElements{descriptor.Elements()}; SubscriptValue subscripts[maxRank]; descriptor.GetLowerBounds(subscripts); - std::size_t numElements{descriptor.Elements()}; - if (false) { - // TODO: user-defined derived type unformatted I/O - } else if (descriptor.IsContiguous()) { // contiguous unformatted I/O - char &x{ExtractElement(io, descriptor, subscripts)}; - auto totalBytes{numElements * elementBytes}; + using CharType = + std::conditional_t; + auto Transfer{[=](CharType &x, std::size_t totalBytes, + std::size_t elementBytes) -> bool { if constexpr (DIR == Direction::Output) { - return unf->Emit(&x, totalBytes, elementBytes); + return externalUnf ? externalUnf->Emit(&x, totalBytes, elementBytes) + : childUnf->Emit(&x, totalBytes, elementBytes); } else { - return unf->Receive(&x, totalBytes, elementBytes); + return externalUnf ? externalUnf->Receive(&x, totalBytes, elementBytes) + : childUnf->Receive(&x, totalBytes, elementBytes); } + }}; + if (descriptor.IsContiguous()) { // contiguous unformatted I/O + char &x{ExtractElement(io, descriptor, subscripts)}; + return Transfer(x, numElements * elementBytes, elementBytes); } else { // non-contiguous unformatted I/O for (std::size_t j{0}; j < numElements; ++j) { char &x{ExtractElement(io, descriptor, subscripts)}; - if constexpr (DIR == Direction::Output) { - if (!unf->Emit(&x, elementBytes, elementBytes)) { - return false; - } - } else { - if (!unf->Receive(&x, elementBytes, elementBytes)) { - return false; - } + if (!Transfer(x, elementBytes, elementBytes)) { + return false; } if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) { - io.GetIoErrorHandler().Crash( - "DescriptorIO: subscripts out of bounds"); + handler.Crash("DescriptorIO: subscripts out of bounds"); } } return true; } - } else if (auto catAndKind{descriptor.type().GetCategoryAndKind()}) { + } +} + +template +static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { + if (!io.get_if>()) { + io.GetIoErrorHandler().Crash( + "DescriptorIO() called for wrong I/O direction"); + return false; + } + if constexpr (DIR == Direction::Input) { + if (!io.BeginReadingRecord()) { + return false; + } + } + if (!io.get_if()) { + return UnformattedDescriptorIO(io, descriptor); + } + IoErrorHandler &handler{io.GetIoErrorHandler()}; + if (auto catAndKind{descriptor.type().GetCategoryAndKind()}) { + TypeCategory cat{catAndKind->first}; int kind{catAndKind->second}; - switch (catAndKind->first) { + switch (cat) { case TypeCategory::Integer: switch (kind) { case 1: @@ -347,7 +389,7 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { return FormattedIntegerIO, DIR>( io, descriptor); default: - io.GetIoErrorHandler().Crash( + handler.Crash( "DescriptorIO: Unimplemented INTEGER kind (%d) in descriptor", kind); return false; @@ -368,7 +410,7 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { case 16: return FormattedRealIO<16, DIR>(io, descriptor); default: - io.GetIoErrorHandler().Crash( + handler.Crash( "DescriptorIO: Unimplemented REAL kind (%d) in descriptor", kind); return false; } @@ -388,7 +430,7 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { case 16: return FormattedComplexIO<16, DIR>(io, descriptor); default: - io.GetIoErrorHandler().Crash( + handler.Crash( "DescriptorIO: Unimplemented COMPLEX kind (%d) in descriptor", kind); return false; @@ -399,7 +441,7 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { return FormattedCharacterIO(io, descriptor); // TODO cases 2, 4 default: - io.GetIoErrorHandler().Crash( + handler.Crash( "DescriptorIO: Unimplemented CHARACTER kind (%d) in descriptor", kind); return false; @@ -419,7 +461,7 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { return FormattedLogicalIO, DIR>( io, descriptor); default: - io.GetIoErrorHandler().Crash( + handler.Crash( "DescriptorIO: Unimplemented LOGICAL kind (%d) in descriptor", kind); return false; @@ -428,7 +470,7 @@ static bool DescriptorIO(IoStatementState &io, const Descriptor &descriptor) { return FormattedDerivedTypeIO(io, descriptor); } } - io.GetIoErrorHandler().Crash("DescriptorIO: Bad type code (%d) in descriptor", + handler.Crash("DescriptorIO: Bad type code (%d) in descriptor", static_cast(descriptor.type().raw())); return false; } diff --git a/flang/runtime/format-implementation.h b/flang/runtime/format-implementation.h index 91d80a7336019..63ca682eb3e7a 100644 --- a/flang/runtime/format-implementation.h +++ b/flang/runtime/format-implementation.h @@ -338,10 +338,12 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { ++offset_; } } - if (ch == 'E' || - (!next && - (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'O' || ch == 'Z' || - ch == 'F' || ch == 'D' || ch == 'G' || ch == 'L'))) { + if ((!next && + (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' || + ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' || + ch == 'L')) || + (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) || + (ch == 'D' && next == 'T')) { // Data edit descriptor found offset_ = start; return repeat && *repeat > 0 ? *repeat : 1; @@ -363,34 +365,86 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { } } +// Returns the next data edit descriptor template DataEdit FormatControl::GetNextDataEdit( Context &context, int maxRepeat) { - - // TODO: DT editing - - // Return the next data edit descriptor int repeat{CueUpNextDataEdit(context)}; auto start{offset_}; DataEdit edit; edit.descriptor = static_cast(Capitalize(GetNextChar(context))); if (edit.descriptor == 'E') { - edit.variation = static_cast(Capitalize(PeekNext())); - if (edit.variation >= 'A' && edit.variation <= 'Z') { + if (auto next{static_cast(Capitalize(PeekNext()))}; + next == 'N' || next == 'S' || next == 'X') { + edit.variation = next; ++offset_; } + } else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') { + // DT'iotype'(v_list) user-defined derived type I/O + edit.descriptor = DataEdit::DefinedDerivedType; + ++offset_; + if (auto quote{static_cast(PeekNext())}; + quote == '\'' || quote == '"') { + // Capture the quoted 'iotype' + bool ok{false}, tooLong{false}; + for (++offset_; offset_ < formatLength_;) { + auto ch{static_cast(format_[offset_++])}; + if (ch == quote && + (offset_ == formatLength_ || + static_cast(format_[offset_]) != quote)) { + ok = true; + break; // that was terminating quote + } else if (edit.ioTypeChars >= edit.maxIoTypeChars) { + tooLong = true; + } else { + edit.ioType[edit.ioTypeChars++] = ch; + if (ch == quote) { + ++offset_; + } + } + } + if (!ok) { + context.SignalError( + IostatErrorInFormat, "Unclosed DT'iotype' in FORMAT"); + } else if (tooLong) { + context.SignalError( + IostatErrorInFormat, "Excessive DT'iotype' in FORMAT"); + } + } + if (PeekNext() == '(') { + // Capture the v_list arguments + bool ok{false}, tooLong{false}; + for (++offset_; offset_ < formatLength_;) { + int n{GetIntField(context)}; + if (edit.vListEntries >= edit.maxVListEntries) { + tooLong = true; + } else { + edit.vList[edit.vListEntries++] = n; + } + auto ch{static_cast(GetNextChar(context))}; + if (ch != ',') { + ok = ch == ')'; + break; + } + } + if (!ok) { + context.SignalError( + IostatErrorInFormat, "Unclosed DT(v_list) in FORMAT"); + } else if (tooLong) { + context.SignalError( + IostatErrorInFormat, "Excessive DT(v_list) in FORMAT"); + } + } } - if (edit.descriptor == 'A') { // width is optional for A[w] auto ch{PeekNext()}; if (ch >= '0' && ch <= '9') { edit.width = GetIntField(context); } - } else { + } else if (edit.descriptor != DataEdit::DefinedDerivedType) { edit.width = GetIntField(context); } - edit.modes = context.mutableModes(); - if (PeekNext() == '.') { + if (edit.descriptor != DataEdit::DefinedDerivedType && PeekNext() == '.') { ++offset_; edit.digits = GetIntField(context); CharType ch{PeekNext()}; @@ -399,14 +453,15 @@ DataEdit FormatControl::GetNextDataEdit( edit.expoDigits = GetIntField(context); } } + edit.modes = context.mutableModes(); // Handle repeated nonparenthesized edit descriptors - if (repeat > 1) { + if (repeat > maxRepeat) { stack_[height_].start = start; // after repeat count stack_[height_].remaining = repeat; // full count ++height_; } - edit.repeat = 1; + edit.repeat = std::min(1, maxRepeat); // 0 if maxRepeat==0 if (height_ > 1) { // Subtle: stack_[0].start doesn't necessarily point to '(' int start{stack_[height_ - 1].start}; if (format_[start] != '(') { diff --git a/flang/runtime/format.cpp b/flang/runtime/format.cpp index 65ed12447bb58..e46cada81aa6c 100644 --- a/flang/runtime/format.cpp +++ b/flang/runtime/format.cpp @@ -9,50 +9,6 @@ #include "format-implementation.h" namespace Fortran::runtime::io { - -DataEdit DefaultFormatControlCallbacks::GetNextDataEdit(int) { - Crash("DefaultFormatControlCallbacks::GetNextDataEdit() called for " - "non-formatted I/O statement"); - return {}; -} -bool DefaultFormatControlCallbacks::Emit( - const char *, std::size_t, std::size_t) { - Crash("DefaultFormatControlCallbacks::Emit(char) called for non-output I/O " - "statement"); - return {}; -} -bool DefaultFormatControlCallbacks::Emit(const char16_t *, std::size_t) { - Crash("DefaultFormatControlCallbacks::Emit(char16_t) called for non-output " - "I/O statement"); - return {}; -} -bool DefaultFormatControlCallbacks::Emit(const char32_t *, std::size_t) { - Crash("DefaultFormatControlCallbacks::Emit(char32_t) called for non-output " - "I/O statement"); - return {}; -} -std::optional DefaultFormatControlCallbacks::GetCurrentChar() { - Crash("DefaultFormatControlCallbacks::GetCurrentChar() called for non-input " - "I/O " - "statement"); - return {}; -} -bool DefaultFormatControlCallbacks::AdvanceRecord(int) { - Crash("DefaultFormatControlCallbacks::AdvanceRecord() called unexpectedly"); - return {}; -} -void DefaultFormatControlCallbacks::BackspaceRecord() { - Crash("DefaultFormatControlCallbacks::BackspaceRecord() called unexpectedly"); -} -void DefaultFormatControlCallbacks::HandleAbsolutePosition(std::int64_t) { - Crash("DefaultFormatControlCallbacks::HandleAbsolutePosition() called for " - "non-formatted I/O statement"); -} -void DefaultFormatControlCallbacks::HandleRelativePosition(std::int64_t) { - Crash("DefaultFormatControlCallbacks::HandleRelativePosition() called for " - "non-formatted I/O statement"); -} - template class FormatControl< InternalFormattedIoStatementState>; template class FormatControl< @@ -61,4 +17,6 @@ template class FormatControl< ExternalFormattedIoStatementState>; template class FormatControl< ExternalFormattedIoStatementState>; +template class FormatControl>; +template class FormatControl>; } // namespace Fortran::runtime::io diff --git a/flang/runtime/format.h b/flang/runtime/format.h index 9dcd59a54a8bc..77daa38f3262e 100644 --- a/flang/runtime/format.h +++ b/flang/runtime/format.h @@ -51,32 +51,28 @@ struct DataEdit { descriptor == ListDirectedImaginaryPart; } + static constexpr char DefinedDerivedType{'d'}; // DT user-defined derived type + char variation{'\0'}; // N, S, or X for EN, ES, EX std::optional width; // the 'w' field; optional for A std::optional digits; // the 'm' or 'd' field std::optional expoDigits; // 'Ee' field MutableModes modes; int repeat{1}; -}; -// FormatControl requires that A have these member functions; -// these default implementations just crash if called. -struct DefaultFormatControlCallbacks : public IoErrorHandler { - using IoErrorHandler::IoErrorHandler; - DataEdit GetNextDataEdit(int = 1); - bool Emit(const char *, std::size_t, std::size_t elementBytes = 0); - bool Emit(const char16_t *, std::size_t); - bool Emit(const char32_t *, std::size_t); - std::optional GetCurrentChar(); - bool AdvanceRecord(int = 1); - void BackspaceRecord(); - void HandleAbsolutePosition(std::int64_t); - void HandleRelativePosition(std::int64_t); + // "iotype" &/or "v_list" values for a DT'iotype'(v_list) + // user-defined derived type data edit descriptor + static constexpr std::size_t maxIoTypeChars{32}; + static constexpr std::size_t maxVListEntries{4}; + std::uint8_t ioTypeChars{0}; + std::uint8_t vListEntries{0}; + char ioType[maxIoTypeChars]; + int vList[maxVListEntries]; }; // Generates a sequence of DataEdits from a FORMAT statement or // default-CHARACTER string. Driven by I/O item list processing. -// Errors are fatal. See clause 13.4 in Fortran 2018 for background. +// Errors are fatal. See subclause 13.4 in Fortran 2018 for background. template class FormatControl { public: using Context = CONTEXT; @@ -98,7 +94,8 @@ template class FormatControl { } // Extracts the next data edit descriptor, handling control edit descriptors - // along the way. + // along the way. If maxRepeat==0, this is a peek at the next data edit + // descriptor. DataEdit GetNextDataEdit(Context &, int maxRepeat = 1); // Emit any remaining character literals after the last data item (on output) diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 8754cd666ae7a..d1b13cb330eba 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -156,22 +156,29 @@ Cookie BeginExternalListIO(const char *what, int unitNumber, } ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous( unitNumber, DIR, false /*!unformatted*/, terminator)}; - if (unit.access == Access::Direct) { - terminator.Crash("%s attempted on direct access file", what); - return nullptr; - } - if (!unit.isUnformatted.has_value()) { - unit.isUnformatted = false; - } - if (*unit.isUnformatted) { - terminator.Crash("%s attempted on unformatted file", what); - return nullptr; + if (ChildIo * child{unit.GetChildIo()}) { + return child->CheckFormattingAndDirection(terminator, what, false, DIR) + ? &child->BeginIoStatement>( + *child, sourceFile, sourceLine) + : nullptr; + } else { + if (unit.access == Access::Direct) { + terminator.Crash("%s attempted on direct access file", what); + return nullptr; + } + if (!unit.isUnformatted.has_value()) { + unit.isUnformatted = false; + } + if (*unit.isUnformatted) { + terminator.Crash("%s attempted on unformatted file", what); + return nullptr; + } + IoErrorHandler handler{terminator}; + unit.SetDirection(DIR, handler); + IoStatementState &io{unit.BeginIoStatement>( + std::forward(xs)..., unit, sourceFile, sourceLine)}; + return &io; } - IoErrorHandler handler{terminator}; - unit.SetDirection(DIR, handler); - IoStatementState &io{unit.BeginIoStatement>( - std::forward(xs)..., unit, sourceFile, sourceLine)}; - return &io; } Cookie IONAME(BeginExternalListOutput)( @@ -195,19 +202,29 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, } ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous( unitNumber, DIR, false /*!unformatted*/, terminator)}; - if (!unit.isUnformatted.has_value()) { - unit.isUnformatted = false; - } - if (*unit.isUnformatted) { - terminator.Crash("Formatted I/O attempted on unformatted file"); - return nullptr; + if (ChildIo * child{unit.GetChildIo()}) { + return child->CheckFormattingAndDirection(terminator, + DIR == Direction::Output ? "formatted output" + : "formatted input", + false, DIR) + ? &child->BeginIoStatement>( + *child, sourceFile, sourceLine) + : nullptr; + } else { + if (!unit.isUnformatted.has_value()) { + unit.isUnformatted = false; + } + if (*unit.isUnformatted) { + terminator.Crash("Formatted I/O attempted on unformatted file"); + return nullptr; + } + IoErrorHandler handler{terminator}; + unit.SetDirection(DIR, handler); + IoStatementState &io{ + unit.BeginIoStatement>( + unit, format, formatLength, sourceFile, sourceLine)}; + return &io; } - IoErrorHandler handler{terminator}; - unit.SetDirection(DIR, handler); - IoStatementState &io{ - unit.BeginIoStatement>( - unit, format, formatLength, sourceFile, sourceLine)}; - return &io; } Cookie IONAME(BeginExternalFormattedOutput)(const char *format, @@ -230,25 +247,36 @@ Cookie BeginUnformattedIO( Terminator terminator{sourceFile, sourceLine}; ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous( unitNumber, DIR, true /*unformatted*/, terminator)}; - if (!unit.isUnformatted.has_value()) { - unit.isUnformatted = true; - } - if (!*unit.isUnformatted) { - terminator.Crash("Unformatted I/O attempted on formatted file"); - } - IoStatementState &io{unit.BeginIoStatement>( - unit, sourceFile, sourceLine)}; - IoErrorHandler handler{terminator}; - unit.SetDirection(DIR, handler); - if constexpr (DIR == Direction::Output) { - if (unit.access == Access::Sequential && !unit.isFixedRecordLength) { - // Create space for (sub)record header to be completed by - // UnformattedIoStatementState::EndIoStatement() - unit.recordLength.reset(); // in case of prior BACKSPACE - io.Emit("\0\0\0\0", 4); // placeholder for record length header + if (ChildIo * child{unit.GetChildIo()}) { + return child->CheckFormattingAndDirection(terminator, + DIR == Direction::Output ? "unformatted output" + : "unformatted input", + true, DIR) + ? &child->BeginIoStatement>( + *child, sourceFile, sourceLine) + : nullptr; + } else { + if (!unit.isUnformatted.has_value()) { + unit.isUnformatted = true; + } + if (!*unit.isUnformatted) { + terminator.Crash("Unformatted I/O attempted on formatted file"); + } + IoStatementState &io{ + unit.BeginIoStatement>( + unit, sourceFile, sourceLine)}; + IoErrorHandler handler{terminator}; + unit.SetDirection(DIR, handler); + if constexpr (DIR == Direction::Output) { + if (unit.access == Access::Sequential && !unit.isFixedRecordLength) { + // Create space for (sub)record header to be completed by + // ExternalUnformattedIoStatementState::EndIoStatement() + unit.recordLength.reset(); // in case of prior BACKSPACE + io.Emit("\0\0\0\0", 4); // placeholder for record length header + } } + return &io; } - return &io; } Cookie IONAME(BeginUnformattedOutput)( @@ -276,9 +304,7 @@ Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=) Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; - bool ignored{false}; - ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreate( - ExternalFileUnit::NewUnit(terminator), terminator, ignored)}; + ExternalFileUnit &unit{ExternalFileUnit::NewUnit(terminator)}; return &unit.BeginIoStatement( unit, false /*was an existing file*/, sourceFile, sourceLine); } @@ -895,7 +921,8 @@ bool IONAME(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) { bool IONAME(OutputUnformattedBlock)(Cookie cookie, const char *x, std::size_t length, std::size_t elementBytes) { IoStatementState &io{*cookie}; - if (auto *unf{io.get_if>()}) { + if (auto *unf{io.get_if< + ExternalUnformattedIoStatementState>()}) { return unf->Emit(x, length, elementBytes); } io.GetIoErrorHandler().Crash("OutputUnformattedBlock() called for an I/O " @@ -910,7 +937,8 @@ bool IONAME(InputUnformattedBlock)( if (io.GetIoErrorHandler().InError()) { return false; } - if (auto *unf{io.get_if>()}) { + if (auto *unf{ + io.get_if>()}) { return unf->Receive(x, length, elementBytes); } io.GetIoErrorHandler().Crash("InputUnformattedBlock() called for an I/O " diff --git a/flang/runtime/io-error.cpp b/flang/runtime/io-error.cpp index bc835bad1dc13..19342c5aa427b 100644 --- a/flang/runtime/io-error.cpp +++ b/flang/runtime/io-error.cpp @@ -57,6 +57,14 @@ void IoErrorHandler::SignalError(int iostatOrErrno) { SignalError(iostatOrErrno, nullptr); } +void IoErrorHandler::Forward( + int ioStatOrErrno, const char *msg, std::size_t length) { + SignalError(ioStatOrErrno); + if (ioStat_ != IostatOk && (flags_ & hasIoMsg)) { + ioMsg_ = SaveDefaultCharacter(msg, length, *this); + } +} + void IoErrorHandler::SignalErrno() { SignalError(errno); } void IoErrorHandler::SignalEnd() { SignalError(IostatEnd); } diff --git a/flang/runtime/io-error.h b/flang/runtime/io-error.h index e51df9b5be866..dd2a269fef89a 100644 --- a/flang/runtime/io-error.h +++ b/flang/runtime/io-error.h @@ -32,6 +32,9 @@ class IoErrorHandler : public Terminator { void HasEndLabel() { flags_ |= hasEnd; } void HasEorLabel() { flags_ |= hasEor; } void HasIoMsg() { flags_ |= hasIoMsg; } + void HandleAnything() { + flags_ = hasIoStat | hasErr | hasEnd | hasEor | hasIoMsg; + } bool InError() const { return ioStat_ != IostatOk; } @@ -41,6 +44,8 @@ class IoErrorHandler : public Terminator { SignalError(IostatGenericError, msg, std::forward(xs)...); } + void Forward(int iostatOrErrno, const char *, std::size_t); + void SignalErrno(); // SignalError(errno) void SignalEnd(); // input only; EOF on internal write is an error void SignalEor(); // non-advancing input only; EOR on write is an error diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 099d9038a8acd..3432f847cce51 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -21,32 +21,64 @@ namespace Fortran::runtime::io { int IoStatementBase::EndIoStatement() { return GetIoStat(); } +bool IoStatementBase::Emit(const char *, std::size_t, std::size_t) { + return false; +} + +bool IoStatementBase::Emit(const char *, std::size_t) { + return false; +} + +bool IoStatementBase::Emit(const char16_t *, std::size_t) { + return false; +} + +bool IoStatementBase::Emit(const char32_t *, std::size_t) { + return false; +} + +std::optional IoStatementBase::GetCurrentChar() { + return std::nullopt; +} + +bool IoStatementBase::AdvanceRecord(int) { return false; } + +void IoStatementBase::BackspaceRecord() {} + +bool IoStatementBase::Receive(char *, std::size_t, std::size_t) { + return false; +} + std::optional IoStatementBase::GetNextDataEdit( IoStatementState &, int) { return std::nullopt; } +ExternalFileUnit *IoStatementBase::GetExternalFileUnit() const { + return nullptr; +} + +bool IoStatementBase::BeginReadingRecord() { return true; } + +void IoStatementBase::FinishReadingRecord() {} + +void IoStatementBase::HandleAbsolutePosition(std::int64_t) {} + +void IoStatementBase::HandleRelativePosition(std::int64_t) {} + bool IoStatementBase::Inquire(InquiryKeywordHash, char *, std::size_t) { - Crash( - "IoStatementBase::Inquire() called for I/O statement other than INQUIRE"); return false; } bool IoStatementBase::Inquire(InquiryKeywordHash, bool &) { - Crash( - "IoStatementBase::Inquire() called for I/O statement other than INQUIRE"); return false; } bool IoStatementBase::Inquire(InquiryKeywordHash, std::int64_t, bool &) { - Crash( - "IoStatementBase::Inquire() called for I/O statement other than INQUIRE"); return false; } bool IoStatementBase::Inquire(InquiryKeywordHash, std::int64_t &) { - Crash( - "IoStatementBase::Inquire() called for I/O statement other than INQUIRE"); return false; } @@ -69,12 +101,12 @@ InternalIoStatementState::InternalIoStatementState( template bool InternalIoStatementState::Emit( - const CharType *data, std::size_t chars, std::size_t /*elementBytes*/) { + const CharType *data, std::size_t chars) { if constexpr (DIR == Direction::Input) { Crash("InternalIoStatementState::Emit() called"); return false; } - return unit_.Emit(data, chars, *this); + return unit_.Emit(data, chars * sizeof(CharType), *this); } template @@ -252,6 +284,14 @@ bool ExternalIoStatementState::Emit( return unit().Emit(data, bytes, elementBytes, *this); } +template +bool ExternalIoStatementState::Emit(const char *data, std::size_t bytes) { + if constexpr (DIR == Direction::Input) { + Crash("ExternalIoStatementState::Emit(char) called for input statement"); + } + return unit().Emit(data, bytes, 0, *this); +} + template bool ExternalIoStatementState::Emit( const char16_t *data, std::size_t chars) { @@ -261,7 +301,7 @@ bool ExternalIoStatementState::Emit( } // TODO: UTF-8 encoding return unit().Emit(reinterpret_cast(data), chars * sizeof *data, - static_cast(sizeof *data), *this); + sizeof *data, *this); } template @@ -273,7 +313,7 @@ bool ExternalIoStatementState::Emit( } // TODO: UTF-8 encoding return unit().Emit(reinterpret_cast(data), chars * sizeof *data, - static_cast(sizeof *data), *this); + sizeof *data, *this); } template @@ -354,6 +394,24 @@ bool IoStatementState::Emit( [=](auto &x) { return x.get().Emit(data, n, elementBytes); }, u_); } +bool IoStatementState::Emit(const char *data, std::size_t n) { + return std::visit([=](auto &x) { return x.get().Emit(data, n); }, u_); +} + +bool IoStatementState::Emit(const char16_t *data, std::size_t chars) { + return std::visit([=](auto &x) { return x.get().Emit(data, chars); }, u_); +} + +bool IoStatementState::Emit(const char32_t *data, std::size_t chars) { + return std::visit([=](auto &x) { return x.get().Emit(data, chars); }, u_); +} + +bool IoStatementState::Receive( + char *data, std::size_t n, std::size_t elementBytes) { + return std::visit( + [=](auto &x) { return x.get().Receive(data, n, elementBytes); }, u_); +} + std::optional IoStatementState::GetCurrentChar() { return std::visit([&](auto &x) { return x.get().GetCurrentChar(); }, u_); } @@ -370,6 +428,10 @@ void IoStatementState::HandleRelativePosition(std::int64_t n) { std::visit([=](auto &x) { x.get().HandleRelativePosition(n); }, u_); } +void IoStatementState::HandleAbsolutePosition(std::int64_t n) { + std::visit([=](auto &x) { x.get().HandleAbsolutePosition(n); }, u_); +} + int IoStatementState::EndIoStatement() { return std::visit([](auto &x) { return x.get().EndIoStatement(); }, u_); } @@ -682,23 +744,100 @@ ListDirectedStatementState::GetNextDataEdit( } template -bool UnformattedIoStatementState::Receive( +bool ExternalUnformattedIoStatementState::Receive( char *data, std::size_t bytes, std::size_t elementBytes) { if constexpr (DIR == Direction::Output) { - this->Crash( - "UnformattedIoStatementState::Receive() called for output statement"); + this->Crash("ExternalUnformattedIoStatementState::Receive() called for " + "output statement"); } return this->unit().Receive(data, bytes, elementBytes, *this); } template -bool UnformattedIoStatementState::Emit( +ChildIoStatementState::ChildIoStatementState( + ChildIo &child, const char *sourceFile, int sourceLine) + : IoStatementBase{sourceFile, sourceLine}, child_{child} {} + +template +MutableModes &ChildIoStatementState::mutableModes() { + return child_.parent().mutableModes(); +} + +template +ConnectionState &ChildIoStatementState::GetConnectionState() { + return child_.parent().GetConnectionState(); +} + +template +ExternalFileUnit *ChildIoStatementState::GetExternalFileUnit() const { + return child_.parent().GetExternalFileUnit(); +} + +template int ChildIoStatementState::EndIoStatement() { + auto result{IoStatementBase::EndIoStatement()}; + child_.EndIoStatement(); // annihilates *this in child_.u_ + return result; +} + +template +bool ChildIoStatementState::Emit( const char *data, std::size_t bytes, std::size_t elementBytes) { - if constexpr (DIR == Direction::Input) { - this->Crash( - "UnformattedIoStatementState::Emit() called for input statement"); - } - return ExternalIoStatementState::Emit(data, bytes, elementBytes); + return child_.parent().Emit(data, bytes, elementBytes); +} + +template +bool ChildIoStatementState::Emit(const char *data, std::size_t bytes) { + return child_.parent().Emit(data, bytes); +} + +template +bool ChildIoStatementState::Emit(const char16_t *data, std::size_t chars) { + return child_.parent().Emit(data, chars); +} + +template +bool ChildIoStatementState::Emit(const char32_t *data, std::size_t chars) { + return child_.parent().Emit(data, chars); +} + +template +std::optional ChildIoStatementState::GetCurrentChar() { + return child_.parent().GetCurrentChar(); +} + +template +void ChildIoStatementState::HandleAbsolutePosition(std::int64_t n) { + return child_.parent().HandleAbsolutePosition(n); +} + +template +void ChildIoStatementState::HandleRelativePosition(std::int64_t n) { + return child_.parent().HandleRelativePosition(n); +} + +template +ChildFormattedIoStatementState::ChildFormattedIoStatementState( + ChildIo &child, const CHAR *format, std::size_t formatLength, + const char *sourceFile, int sourceLine) + : ChildIoStatementState{child, sourceFile, sourceLine}, + mutableModes_{child.parent().mutableModes()}, format_{*this, format, + formatLength} {} + +template +int ChildFormattedIoStatementState::EndIoStatement() { + format_.Finish(*this); + return ChildIoStatementState::EndIoStatement(); +} + +template +bool ChildFormattedIoStatementState::AdvanceRecord(int) { + return false; // no can do in a child I/O +} + +template +bool ChildUnformattedIoStatementState::Receive( + char *data, std::size_t bytes, std::size_t elementBytes) { + return this->child().parent().Receive(data, bytes, elementBytes); } template class InternalIoStatementState; @@ -713,8 +852,16 @@ template class ExternalFormattedIoStatementState; template class ExternalFormattedIoStatementState; template class ExternalListIoStatementState; template class ExternalListIoStatementState; -template class UnformattedIoStatementState; -template class UnformattedIoStatementState; +template class ExternalUnformattedIoStatementState; +template class ExternalUnformattedIoStatementState; +template class ChildIoStatementState; +template class ChildIoStatementState; +template class ChildFormattedIoStatementState; +template class ChildFormattedIoStatementState; +template class ChildListIoStatementState; +template class ChildListIoStatementState; +template class ChildUnformattedIoStatementState; +template class ChildUnformattedIoStatementState; int ExternalMiscIoStatementState::EndIoStatement() { ExternalFileUnit &ext{unit()}; @@ -742,6 +889,12 @@ InquireUnitState::InquireUnitState( bool InquireUnitState::Inquire( InquiryKeywordHash inquiry, char *result, std::size_t length) { + if (unit().createdForInternalChildIo()) { + SignalError(IostatInquireInternalUnit, + "INQUIRE of unit created for defined derived type I/O of an internal " + "unit"); + return false; + } const char *str{nullptr}; switch (inquiry) { case HashInquiryKeyword("ACCESS"): @@ -1161,10 +1314,4 @@ InquireIOLengthState::InquireIOLengthState( const char *sourceFile, int sourceLine) : NoUnitIoStatementState{sourceFile, sourceLine, *this} {} -bool InquireIOLengthState::Emit( - const char *, std::size_t n, std::size_t /*elementBytes*/) { - bytes_ += n; - return true; -} - } // namespace Fortran::runtime::io diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index b76c5202619b7..34c4a47363c0d 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -25,6 +25,7 @@ namespace Fortran::runtime::io { class ExternalFileUnit; +class ChildIo; class OpenStatementState; class InquireUnitState; @@ -41,7 +42,10 @@ template class InternalListIoStatementState; template class ExternalFormattedIoStatementState; template class ExternalListIoStatementState; -template class UnformattedIoStatementState; +template class ExternalUnformattedIoStatementState; +template class ChildFormattedIoStatementState; +template class ChildListIoStatementState; +template class ChildUnformattedIoStatementState; struct InputStatementState {}; struct OutputStatementState {}; @@ -60,17 +64,19 @@ class IoStatementState { // to interact with the state of the I/O statement in progress. // This design avoids virtual member functions and function pointers, // which may not have good support in some runtime environments. - std::optional GetNextDataEdit(int = 1); - bool Emit(const char *, std::size_t, std::size_t elementBytes = 0); + int EndIoStatement(); + bool Emit(const char *, std::size_t, std::size_t elementBytes); + bool Emit(const char *, std::size_t); + bool Emit(const char16_t *, std::size_t chars); + bool Emit(const char32_t *, std::size_t chars); + bool Receive(char *, std::size_t, std::size_t elementBytes = 0); std::optional GetCurrentChar(); // vacant after end of record bool AdvanceRecord(int = 1); void BackspaceRecord(); void HandleRelativePosition(std::int64_t); - int EndIoStatement(); - ConnectionState &GetConnectionState(); - IoErrorHandler &GetIoErrorHandler() const; + void HandleAbsolutePosition(std::int64_t); // for r* in list I/O + std::optional GetNextDataEdit(int = 1); ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit - MutableModes &mutableModes(); bool BeginReadingRecord(); void FinishReadingRecord(); bool Inquire(InquiryKeywordHash, char *, std::size_t); @@ -78,6 +84,10 @@ class IoStatementState { bool Inquire(InquiryKeywordHash, std::int64_t, bool &); // PENDING= bool Inquire(InquiryKeywordHash, std::int64_t &); + MutableModes &mutableModes(); + ConnectionState &GetConnectionState(); + IoErrorHandler &GetIoErrorHandler() const; + // N.B.: this also works with base classes template A *get_if() const { return std::visit( @@ -129,8 +139,18 @@ class IoStatementState { ExternalFormattedIoStatementState>, std::reference_wrapper>, std::reference_wrapper>, - std::reference_wrapper>, - std::reference_wrapper>, + std::reference_wrapper< + ExternalUnformattedIoStatementState>, + std::reference_wrapper< + ExternalUnformattedIoStatementState>, + std::reference_wrapper>, + std::reference_wrapper>, + std::reference_wrapper>, + std::reference_wrapper>, + std::reference_wrapper< + ChildUnformattedIoStatementState>, + std::reference_wrapper< + ChildUnformattedIoStatementState>, std::reference_wrapper, std::reference_wrapper, std::reference_wrapper, @@ -140,18 +160,30 @@ class IoStatementState { }; // Base class for all per-I/O statement state classes. -// Inherits IoErrorHandler from its base. -struct IoStatementBase : public DefaultFormatControlCallbacks { - using DefaultFormatControlCallbacks::DefaultFormatControlCallbacks; +struct IoStatementBase : public IoErrorHandler { + using IoErrorHandler::IoErrorHandler; + + // These are default no-op backstops that can be overridden by descendants. int EndIoStatement(); + bool Emit(const char *, std::size_t, std::size_t elementBytes); + bool Emit(const char *, std::size_t); + bool Emit(const char16_t *, std::size_t chars); + bool Emit(const char32_t *, std::size_t chars); + bool Receive(char *, std::size_t, std::size_t elementBytes = 0); + std::optional GetCurrentChar(); + bool AdvanceRecord(int); + void BackspaceRecord(); + void HandleRelativePosition(std::int64_t); + void HandleAbsolutePosition(std::int64_t); std::optional GetNextDataEdit(IoStatementState &, int = 1); - ExternalFileUnit *GetExternalFileUnit() const { return nullptr; } - bool BeginReadingRecord() { return true; } - void FinishReadingRecord() {} + ExternalFileUnit *GetExternalFileUnit() const; + bool BeginReadingRecord(); + void FinishReadingRecord(); bool Inquire(InquiryKeywordHash, char *, std::size_t); bool Inquire(InquiryKeywordHash, bool &); bool Inquire(InquiryKeywordHash, std::int64_t, bool &); bool Inquire(InquiryKeywordHash, std::int64_t &); + void BadInquiryKeywordHashCrash(InquiryKeywordHash); }; @@ -207,8 +239,11 @@ class InternalIoStatementState : public IoStatementBase, InternalIoStatementState( const Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0); int EndIoStatement(); - bool Emit(const CharType *, std::size_t chars /* not necessarily bytes */, - std::size_t elementBytes = 0); + + using IoStatementBase::Emit; + bool Emit( + const CharType *data, std::size_t chars /* not necessarily bytes */); + std::optional GetCurrentChar(); bool AdvanceRecord(int = 1); void BackspaceRecord(); @@ -275,7 +310,7 @@ class ExternalIoStatementBase : public IoStatementBase { MutableModes &mutableModes(); ConnectionState &GetConnectionState(); int EndIoStatement(); - ExternalFileUnit *GetExternalFileUnit() { return &unit_; } + ExternalFileUnit *GetExternalFileUnit() const { return &unit_; } private: ExternalFileUnit &unit_; @@ -287,7 +322,8 @@ class ExternalIoStatementState : public ExternalIoStatementBase, public: using ExternalIoStatementBase::ExternalIoStatementBase; int EndIoStatement(); - bool Emit(const char *, std::size_t, std::size_t elementBytes = 0); + bool Emit(const char *, std::size_t, std::size_t elementBytes); + bool Emit(const char *, std::size_t); bool Emit(const char16_t *, std::size_t chars /* not bytes */); bool Emit(const char32_t *, std::size_t chars /* not bytes */); std::optional GetCurrentChar(); @@ -331,13 +367,73 @@ class ExternalListIoStatementState : public ExternalIoStatementState, }; template -class UnformattedIoStatementState : public ExternalIoStatementState { +class ExternalUnformattedIoStatementState + : public ExternalIoStatementState { public: using ExternalIoStatementState::ExternalIoStatementState; bool Receive(char *, std::size_t, std::size_t elementBytes = 0); - bool Emit(const char *, std::size_t, std::size_t elementBytes = 0); }; +template +class ChildIoStatementState : public IoStatementBase, + public IoDirectionState { +public: + ChildIoStatementState( + ChildIo &, const char *sourceFile = nullptr, int sourceLine = 0); + ChildIo &child() { return child_; } + MutableModes &mutableModes(); + ConnectionState &GetConnectionState(); + ExternalFileUnit *GetExternalFileUnit() const; + int EndIoStatement(); + bool Emit(const char *, std::size_t, std::size_t elementBytes); + bool Emit(const char *, std::size_t); + bool Emit(const char16_t *, std::size_t chars /* not bytes */); + bool Emit(const char32_t *, std::size_t chars /* not bytes */); + std::optional GetCurrentChar(); + void HandleRelativePosition(std::int64_t); + void HandleAbsolutePosition(std::int64_t); + +private: + ChildIo &child_; +}; + +template +class ChildFormattedIoStatementState : public ChildIoStatementState, + public FormattedIoStatementState { +public: + using CharType = CHAR; + ChildFormattedIoStatementState(ChildIo &, const CharType *format, + std::size_t formatLength, const char *sourceFile = nullptr, + int sourceLine = 0); + MutableModes &mutableModes() { return mutableModes_; } + int EndIoStatement(); + bool AdvanceRecord(int = 1); + std::optional GetNextDataEdit( + IoStatementState &, int maxRepeat = 1) { + return format_.GetNextDataEdit(*this, maxRepeat); + } + +private: + MutableModes mutableModes_; + FormatControl format_; +}; + +template +class ChildListIoStatementState : public ChildIoStatementState, + public ListDirectedStatementState { +public: + using ChildIoStatementState::ChildIoStatementState; + using ListDirectedStatementState::GetNextDataEdit; +}; + +template +class ChildUnformattedIoStatementState : public ChildIoStatementState { +public: + using ChildIoStatementState::ChildIoStatementState; + bool Receive(char *, std::size_t, std::size_t elementBytes = 0); +}; + +// OPEN class OpenStatementState : public ExternalIoStatementBase { public: OpenStatementState(ExternalFileUnit &unit, bool wasExtant, @@ -415,8 +511,17 @@ extern template class ExternalFormattedIoStatementState; extern template class ExternalFormattedIoStatementState; extern template class ExternalListIoStatementState; extern template class ExternalListIoStatementState; -extern template class UnformattedIoStatementState; -extern template class UnformattedIoStatementState; +extern template class ExternalUnformattedIoStatementState; +extern template class ExternalUnformattedIoStatementState; +extern template class ChildIoStatementState; +extern template class ChildIoStatementState; +extern template class ChildFormattedIoStatementState; +extern template class ChildFormattedIoStatementState; +extern template class ChildListIoStatementState; +extern template class ChildListIoStatementState; +extern template class ChildUnformattedIoStatementState; +extern template class ChildUnformattedIoStatementState; + extern template class FormatControl< InternalFormattedIoStatementState>; extern template class FormatControl< @@ -425,6 +530,10 @@ extern template class FormatControl< ExternalFormattedIoStatementState>; extern template class FormatControl< ExternalFormattedIoStatementState>; +extern template class FormatControl< + ChildFormattedIoStatementState>; +extern template class FormatControl< + ChildFormattedIoStatementState>; class InquireUnitState : public ExternalIoStatementBase { public: @@ -463,7 +572,6 @@ class InquireIOLengthState : public NoUnitIoStatementState, public: InquireIOLengthState(const char *sourceFile = nullptr, int sourceLine = 0); std::size_t bytes() const { return bytes_; } - bool Emit(const char *, std::size_t, std::size_t elementBytes = 0); private: std::size_t bytes_{0}; diff --git a/flang/runtime/tools.cpp b/flang/runtime/tools.cpp index c67da77e0c118..07f38cdf3efa5 100644 --- a/flang/runtime/tools.cpp +++ b/flang/runtime/tools.cpp @@ -71,9 +71,11 @@ int IdentifyValue( void ToFortranDefaultCharacter( char *to, std::size_t toLength, const char *from) { std::size_t len{std::strlen(from)}; - std::memcpy(to, from, std::max(toLength, len)); if (len < toLength) { + std::memcpy(to, from, len); std::memset(to + len, ' ', toLength - len); + } else { + std::memcpy(to, from, toLength); } } diff --git a/flang/runtime/type-info.cpp b/flang/runtime/type-info.cpp index df72fc466a29b..9385eabf2dc84 100644 --- a/flang/runtime/type-info.cpp +++ b/flang/runtime/type-info.cpp @@ -82,6 +82,21 @@ const Component *DerivedType::FindDataComponent( : nullptr; } +const SpecialBinding *DerivedType::FindSpecialBinding( + SpecialBinding::Which which) const { + const Descriptor &specialDesc{special()}; + std::size_t n{specialDesc.Elements()}; + SubscriptValue at[maxRank]; + specialDesc.GetLowerBounds(at); + for (std::size_t j{0}; j < n; ++j, specialDesc.IncrementSubscripts(at)) { + const SpecialBinding &special{*specialDesc.Element(at)}; + if (special.which() == which) { + return &special; + } + } + return nullptr; +} + static void DumpScalarCharacter( FILE *f, const Descriptor &desc, const char *what) { if (desc.raw().version == CFI_VERSION && @@ -103,7 +118,7 @@ FILE *DerivedType::Dump(FILE *f) const { int offset{j * static_cast(sizeof *uints)}; std::fprintf(f, " [+%3d](0x%p) %#016jx", offset, reinterpret_cast(&uints[j]), - static_cast(uints[j])); + static_cast(uints[j])); if (offset == offsetof(DerivedType, binding_)) { std::fputs(" <-- binding_\n", f); } else if (offset == offsetof(DerivedType, name_)) { @@ -151,6 +166,15 @@ FILE *DerivedType::Dump(FILE *f) const { std::fputs(" bad descriptor: ", f); compDesc.Dump(f); } + const Descriptor &specialDesc{special()}; + std::fprintf( + f, "\n special descriptor (byteSize 0x%zx): ", special_.byteSize); + specialDesc.Dump(f); + std::size_t specials{specialDesc.Elements()}; + for (std::size_t j{0}; j < specials; ++j) { + std::fprintf(f, " [%3zd] ", j); + specialDesc.ZeroBasedIndexedElement(j)->Dump(f); + } return f; } @@ -174,4 +198,46 @@ FILE *Component::Dump(FILE *f) const { return f; } +FILE *SpecialBinding::Dump(FILE *f) const { + std::fprintf( + f, "SpecialBinding @ 0x%p:\n", reinterpret_cast(this)); + switch (which_) { + case Which::Assignment: + std::fputs(" Assignment", f); + break; + case Which::ElementalAssignment: + std::fputs(" ElementalAssignment", f); + break; + case Which::Final: + std::fputs(" Final", f); + break; + case Which::ElementalFinal: + std::fputs(" ElementalFinal", f); + break; + case Which::AssumedRankFinal: + std::fputs(" AssumedRankFinal", f); + break; + case Which::ReadFormatted: + std::fputs(" ReadFormatted", f); + break; + case Which::ReadUnformatted: + std::fputs(" ReadUnformatted", f); + break; + case Which::WriteFormatted: + std::fputs(" WriteFormatted", f); + break; + case Which::WriteUnformatted: + std::fputs(" WriteUnformatted", f); + break; + default: + std::fprintf( + f, " Unknown which: 0x%x", static_cast(which_)); + break; + } + std::fprintf(f, "\n rank: %d\n", rank_); + std::fprintf(f, " isArgDescriptoSetr: 0x%x\n", isArgDescriptorSet_); + std::fprintf(f, " proc: 0x%p\n", reinterpret_cast(proc_)); + return f; +} + } // namespace Fortran::runtime::typeInfo diff --git a/flang/runtime/type-info.h b/flang/runtime/type-info.h index 05a4c41a34997..0dfb4b64ffd35 100644 --- a/flang/runtime/type-info.h +++ b/flang/runtime/type-info.h @@ -20,81 +20,7 @@ namespace Fortran::runtime::typeInfo { -class Component; - -class DerivedType { -public: - ~DerivedType(); // never defined - - const Descriptor &binding() const { return binding_.descriptor(); } - const Descriptor &name() const { return name_.descriptor(); } - std::uint64_t sizeInBytes() const { return sizeInBytes_; } - const Descriptor &parent() const { return parent_.descriptor(); } - std::uint64_t typeHash() const { return typeHash_; } - const Descriptor &uninstatiated() const { - return uninstantiated_.descriptor(); - } - const Descriptor &kindParameter() const { - return kindParameter_.descriptor(); - } - const Descriptor &lenParameterKind() const { - return lenParameterKind_.descriptor(); - } - const Descriptor &component() const { return component_.descriptor(); } - const Descriptor &procPtr() const { return procPtr_.descriptor(); } - const Descriptor &special() const { return special_.descriptor(); } - - std::size_t LenParameters() const { return lenParameterKind().Elements(); } - - // Finds a data component by name in this derived type or tis ancestors. - const Component *FindDataComponent( - const char *name, std::size_t nameLen) const; - - FILE *Dump(FILE * = stdout) const; - -private: - // This member comes first because it's used like a vtable by generated code. - // It includes all of the ancestor types' bindings, if any, first, - // with any overrides from descendants already applied to them. Local - // bindings then follow in alphabetic order of binding name. - StaticDescriptor<1, true> - binding_; // TYPE(BINDING), DIMENSION(:), POINTER, CONTIGUOUS - - StaticDescriptor<0> name_; // CHARACTER(:), POINTER - - std::uint64_t sizeInBytes_{0}; - StaticDescriptor<0, true> parent_; // TYPE(DERIVEDTYPE), POINTER - - // Instantiations of a parameterized derived type with KIND type - // parameters will point this data member to the description of - // the original uninstantiated type, which may be shared from a - // module via use association. The original uninstantiated derived - // type description will point to itself. Derived types that have - // no KIND type parameters will have a null pointer here. - StaticDescriptor<0, true> uninstantiated_; // TYPE(DERIVEDTYPE), POINTER - - // TODO: flags for SEQUENCE, BIND(C), any PRIVATE component(? see 7.5.2) - std::uint64_t typeHash_{0}; - - // These pointer targets include all of the items from the parent, if any. - StaticDescriptor<1> kindParameter_; // pointer to rank-1 array of INTEGER(8) - StaticDescriptor<1> - lenParameterKind_; // pointer to rank-1 array of INTEGER(1) - - // This array of local data components includes the parent component. - // Components are in component order, not collation order of their names. - // It does not include procedure pointer components. - StaticDescriptor<1, true> - component_; // TYPE(COMPONENT), POINTER, DIMENSION(:), CONTIGUOUS - - // Procedure pointer components - StaticDescriptor<1, true> - procPtr_; // TYPE(PROCPTR), POINTER, DIMENSION(:), CONTIGUOUS - - // Does not include special bindings from ancestral types. - StaticDescriptor<1, true> - special_; // TYPE(SPECIALBINDING), POINTER, DIMENSION(:), CONTIGUOUS -}; +class DerivedType; using ProcedurePointer = void (*)(); // TYPE(C_FUNPTR) @@ -177,7 +103,8 @@ struct ProcPtrComponent { ProcedurePointer procInitialization; // for Genre::Procedure }; -struct SpecialBinding { +class SpecialBinding { +public: enum class Which : std::uint8_t { None = 0, Assignment = 4, @@ -189,13 +116,27 @@ struct SpecialBinding { ReadUnformatted = 17, WriteFormatted = 18, WriteUnformatted = 19 - } which{Which::None}; + }; + + Which which() const { return which_; } + int rank() const { return rank_; } + bool IsArgDescriptor(int zeroBasedArg) const { + return (isArgDescriptorSet_ >> zeroBasedArg) & 1; + } + template PROC GetProc() const { + return reinterpret_cast(proc_); + } + + FILE *Dump(FILE *) const; + +private: + Which which_{Which::None}; // Used for Which::Final only. Which::Assignment always has rank 0, as // type-bound defined assignment for rank > 0 must be elemental // due to the required passed object dummy argument, which are scalar. // User defined derived type I/O is always scalar. - std::uint8_t rank{0}; + std::uint8_t rank_{0}; // The following little bit-set identifies which dummy arguments are // passed via descriptors for their derived type arguments. @@ -222,9 +163,86 @@ struct SpecialBinding { // the case when and only when the derived type is extensible. // When false, the user derived type I/O subroutine must have been // called via a generic interface, not a generic TBP. - std::uint8_t isArgDescriptorSet{0}; + std::uint8_t isArgDescriptorSet_{0}; + + ProcedurePointer proc_{nullptr}; +}; + +class DerivedType { +public: + ~DerivedType(); // never defined + + const Descriptor &binding() const { return binding_.descriptor(); } + const Descriptor &name() const { return name_.descriptor(); } + std::uint64_t sizeInBytes() const { return sizeInBytes_; } + const Descriptor &parent() const { return parent_.descriptor(); } + std::uint64_t typeHash() const { return typeHash_; } + const Descriptor &uninstatiated() const { + return uninstantiated_.descriptor(); + } + const Descriptor &kindParameter() const { + return kindParameter_.descriptor(); + } + const Descriptor &lenParameterKind() const { + return lenParameterKind_.descriptor(); + } + const Descriptor &component() const { return component_.descriptor(); } + const Descriptor &procPtr() const { return procPtr_.descriptor(); } + const Descriptor &special() const { return special_.descriptor(); } + + std::size_t LenParameters() const { return lenParameterKind().Elements(); } + + // Finds a data component by name in this derived type or tis ancestors. + const Component *FindDataComponent( + const char *name, std::size_t nameLen) const; + + const SpecialBinding *FindSpecialBinding(SpecialBinding::Which) const; + + FILE *Dump(FILE * = stdout) const; + +private: + // This member comes first because it's used like a vtable by generated code. + // It includes all of the ancestor types' bindings, if any, first, + // with any overrides from descendants already applied to them. Local + // bindings then follow in alphabetic order of binding name. + StaticDescriptor<1, true> + binding_; // TYPE(BINDING), DIMENSION(:), POINTER, CONTIGUOUS + + StaticDescriptor<0> name_; // CHARACTER(:), POINTER + + std::uint64_t sizeInBytes_{0}; + StaticDescriptor<0, true> parent_; // TYPE(DERIVEDTYPE), POINTER + + // Instantiations of a parameterized derived type with KIND type + // parameters will point this data member to the description of + // the original uninstantiated type, which may be shared from a + // module via use association. The original uninstantiated derived + // type description will point to itself. Derived types that have + // no KIND type parameters will have a null pointer here. + StaticDescriptor<0, true> uninstantiated_; // TYPE(DERIVEDTYPE), POINTER + + // TODO: flags for SEQUENCE, BIND(C), any PRIVATE component(? see 7.5.2) + std::uint64_t typeHash_{0}; + + // These pointer targets include all of the items from the parent, if any. + StaticDescriptor<1> kindParameter_; // pointer to rank-1 array of INTEGER(8) + StaticDescriptor<1> + lenParameterKind_; // pointer to rank-1 array of INTEGER(1) + + // This array of local data components includes the parent component. + // Components are in component order, not collation order of their names. + // It does not include procedure pointer components. + StaticDescriptor<1, true> + component_; // TYPE(COMPONENT), POINTER, DIMENSION(:), CONTIGUOUS + + // Procedure pointer components + StaticDescriptor<1, true> + procPtr_; // TYPE(PROCPTR), POINTER, DIMENSION(:), CONTIGUOUS - ProcedurePointer proc{nullptr}; + // Does not include special bindings from ancestral types. + StaticDescriptor<1, true> + special_; // TYPE(SPECIALBINDING), POINTER, DIMENSION(:), CONTIGUOUS }; + } // namespace Fortran::runtime::typeInfo #endif // FORTRAN_RUNTIME_TYPE_INFO_H_ diff --git a/flang/runtime/unit-map.cpp b/flang/runtime/unit-map.cpp index 1cd2115f4aa1b..915c747371850 100644 --- a/flang/runtime/unit-map.cpp +++ b/flang/runtime/unit-map.cpp @@ -92,4 +92,5 @@ ExternalFileUnit &UnitMap::Create(int n, const Terminator &terminator) { bucket_[Hash(n)].swap(chain.next); // pushes new node as list head return chain.unit; } + } // namespace Fortran::runtime::io diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index 79f3722fb7aba..aafb71fb6d73c 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -87,8 +87,11 @@ ExternalFileUnit *ExternalFileUnit::LookUpForClose(int unit) { return GetUnitMap().LookUpForClose(unit); } -int ExternalFileUnit::NewUnit(const Terminator &terminator) { - return GetUnitMap().NewUnit(terminator).unitNumber(); +ExternalFileUnit &ExternalFileUnit::NewUnit( + const Terminator &terminator, bool forChildIo) { + ExternalFileUnit &unit{GetUnitMap().NewUnit(terminator)}; + unit.createdForInternalChildIo_ = forChildIo; + return unit; } void ExternalFileUnit::OpenUnit(std::optional status, @@ -697,4 +700,43 @@ void ExternalFileUnit::DoEndfile(IoErrorHandler &handler) { BeginRecord(); impliedEndfile_ = false; } + +ChildIo &ExternalFileUnit::PushChildIo(IoStatementState &parent) { + OwningPtr current{std::move(child_)}; + Terminator &terminator{parent.GetIoErrorHandler()}; + OwningPtr next{New{terminator}(parent, std::move(current))}; + child_.reset(next.release()); + return *child_; +} + +void ExternalFileUnit::PopChildIo(ChildIo &child) { + if (child_.get() != &child) { + child.parent().GetIoErrorHandler().Crash( + "ChildIo being popped is not top of stack"); + } + child_.reset(child.AcquirePrevious().release()); // deletes top child +} + +void ChildIo::EndIoStatement() { + io_.reset(); + u_.emplace(); +} + +bool ChildIo::CheckFormattingAndDirection(Terminator &terminator, + const char *what, bool unformatted, Direction direction) { + bool parentIsUnformatted{!parent_.get_if()}; + bool parentIsInput{!parent_.get_if>()}; + if (unformatted != parentIsUnformatted) { + terminator.Crash("Child %s attempted on %s parent I/O unit", what, + parentIsUnformatted ? "unformatted" : "formatted"); + return false; + } else if (parentIsInput != (direction == Direction::Input)) { + terminator.Crash("Child %s attempted on %s parent I/O unit", what, + parentIsInput ? "input" : "output"); + return false; + } else { + return true; + } +} + } // namespace Fortran::runtime::io diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h index 9634f1a95804e..68876ff536399 100644 --- a/flang/runtime/unit.h +++ b/flang/runtime/unit.h @@ -28,6 +28,7 @@ namespace Fortran::runtime::io { class UnitMap; +class ChildIo; class ExternalFileUnit : public ConnectionState, public OpenFile, @@ -36,6 +37,7 @@ class ExternalFileUnit : public ConnectionState, explicit ExternalFileUnit(int unitNumber) : unitNumber_{unitNumber} {} int unitNumber() const { return unitNumber_; } bool swapEndianness() const { return swapEndianness_; } + bool createdForInternalChildIo() const { return createdForInternalChildIo_; } static ExternalFileUnit *LookUp(int unit); static ExternalFileUnit &LookUpOrCrash(int unit, const Terminator &); @@ -46,7 +48,7 @@ class ExternalFileUnit : public ConnectionState, static ExternalFileUnit *LookUp(const char *path); static ExternalFileUnit &CreateNew(int unit, const Terminator &); static ExternalFileUnit *LookUpForClose(int unit); - static int NewUnit(const Terminator &); + static ExternalFileUnit &NewUnit(const Terminator &, bool forChildIo = false); static void CloseAll(IoErrorHandler &); static void FlushAll(IoErrorHandler &); @@ -62,7 +64,6 @@ class ExternalFileUnit : public ConnectionState, template IoStatementState &BeginIoStatement(X &&...xs) { - // TODO: Child data transfer statements vs. locking lock_.Take(); // dropped in EndIoStatement() A &state{u_.emplace(std::forward(xs)...)}; if constexpr (!std::is_same_v) { @@ -91,6 +92,10 @@ class ExternalFileUnit : public ConnectionState, BeginRecord(); } + ChildIo *GetChildIo() { return child_.get(); } + ChildIo &PushChildIo(IoStatementState &); + void PopChildIo(ChildIo &); + private: static UnitMap &GetUnitMap(); const char *FrameNextInput(IoErrorHandler &, std::size_t); @@ -116,8 +121,8 @@ class ExternalFileUnit : public ConnectionState, ExternalFormattedIoStatementState, ExternalListIoStatementState, ExternalListIoStatementState, - UnformattedIoStatementState, - UnformattedIoStatementState, InquireUnitState, + ExternalUnformattedIoStatementState, + ExternalUnformattedIoStatementState, InquireUnitState, ExternalMiscIoStatementState> u_; @@ -132,6 +137,50 @@ class ExternalFileUnit : public ConnectionState, std::size_t recordOffsetInFrame_{0}; // of currentRecordNumber bool swapEndianness_{false}; + + bool createdForInternalChildIo_{false}; + + // A stack of child I/O pseudo-units for user-defined derived type + // I/O that have this unit number. + OwningPtr child_; +}; + +// A pseudo-unit for child I/O statements in user-defined derived type +// I/O subroutines; it forwards operations to the parent I/O statement, +// which can also be a child I/O statement. +class ChildIo { +public: + ChildIo(IoStatementState &parent, OwningPtr &&previous) + : parent_{parent}, previous_{std::move(previous)} {} + + IoStatementState &parent() const { return parent_; } + + void EndIoStatement(); + + template + IoStatementState &BeginIoStatement(X &&...xs) { + A &state{u_.emplace(std::forward(xs)...)}; + io_.emplace(state); + return *io_; + } + + OwningPtr AcquirePrevious() { return std::move(previous_); } + + bool CheckFormattingAndDirection( + Terminator &, const char *what, bool unformatted, Direction); + +private: + IoStatementState &parent_; + OwningPtr previous_; + std::variant, + ChildFormattedIoStatementState, + ChildListIoStatementState, + ChildListIoStatementState, + ChildUnformattedIoStatementState, + ChildUnformattedIoStatementState> + u_; + std::optional io_; }; } // namespace Fortran::runtime::io diff --git a/flang/test/Semantics/typeinfo01.f90 b/flang/test/Semantics/typeinfo01.f90 index a68c392ad7513..088c6e56b6b76 100644 --- a/flang/test/Semantics/typeinfo01.f90 +++ b/flang/test/Semantics/typeinfo01.f90 @@ -171,7 +171,7 @@ subroutine wu(x,u,iostat,iomsg) end module module m10 - type :: t + type, bind(c) :: t ! non-extensible end type interface read(formatted) procedure :: rf From 557e1fa02f470bd4f14b7aa4060430007332895a Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Mon, 28 Jun 2021 14:43:34 -0400 Subject: [PATCH 089/619] [lld-macho] Extend ICF to literal sections Literal sections can be deduplicated before running ICF. That makes it easy to compare them during ICF: we can tell if two literals are constant-equal by comparing their offsets in their OutputSection. LLD-ELF takes a similar approach. Reviewed By: #lld-macho, gkm Differential Revision: https://reviews.llvm.org/D104671 --- lld/MachO/Driver.cpp | 6 +-- lld/MachO/ICF.cpp | 44 +++++++++-------- lld/MachO/InputSection.cpp | 4 +- lld/MachO/Options.td | 2 +- lld/MachO/SyntheticSections.cpp | 2 +- lld/MachO/SyntheticSections.h | 2 +- lld/MachO/Writer.cpp | 12 ++++- lld/test/MachO/icf-literals.s | 86 +++++++++++++++++++++++++++++++++ 8 files changed, 127 insertions(+), 31 deletions(-) create mode 100644 lld/test/MachO/icf-literals.s diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index f91fca0999042..1d06f19311c1b 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1080,7 +1080,9 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, config->emitBitcodeBundle = args.hasArg(OPT_bitcode_bundle); config->emitDataInCodeInfo = args.hasFlag(OPT_data_in_code_info, OPT_no_data_in_code_info, true); - config->dedupLiterals = args.hasArg(OPT_deduplicate_literals); + config->icfLevel = getICFLevel(args); + config->dedupLiterals = args.hasArg(OPT_deduplicate_literals) || + config->icfLevel != ICFLevel::none; // FIXME: Add a commandline flag for this too. config->zeroModTime = getenv("ZERO_AR_DATE"); @@ -1123,8 +1125,6 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, config->undefinedSymbolTreatment = getUndefinedSymbolTreatment(args); - config->icfLevel = getICFLevel(args); - if (config->outputType == MH_EXECUTE) config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"), /*file=*/nullptr, diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index ce49dc903d4bc..4ff8c578d56c2 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -104,23 +104,22 @@ static bool equalsVariable(const ConcatInputSection *ia, if (isa(sa)) { const auto *da = dyn_cast(sa); const auto *db = dyn_cast(sb); - if (da->value != db->value) - return false; - if (da->isAbsolute() != db->isAbsolute()) - return false; - if (da->isec) { + if (da->isec && db->isec) { if (da->isec->kind() != db->isec->kind()) return false; if (const auto *isecA = dyn_cast(da->isec)) { const auto *isecB = cast(db->isec); - if (isecA->icfEqClass[icfPass % 2] != - isecB->icfEqClass[icfPass % 2]) - return false; - } else { - // FIXME: implement ICF for other InputSection kinds - return false; + return da->value == db->value && isecA->icfEqClass[icfPass % 2] == + isecB->icfEqClass[icfPass % 2]; } + // Else we have two literal sections. References to them are + // constant-equal if their offsets in the output section are equal. + return da->isec->parent == db->isec->parent && + da->isec->getOffset(da->value) == + db->isec->getOffset(db->value); } + assert(da->isAbsolute() && db->isAbsolute()); + return da->value == db->value; } else if (isa(sa)) { // There is one DylibSymbol per gotIndex and we already checked for // symbol equality, thus we know that these must be different. @@ -135,14 +134,13 @@ static bool equalsVariable(const ConcatInputSection *ia, return false; if (const auto *isecA = dyn_cast(sa)) { const auto *isecB = cast(sb); - if (isecA->icfEqClass[icfPass % 2] != isecB->icfEqClass[icfPass % 2]) - return false; + return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; } else { - // FIXME: implement ICF for other InputSection kinds - return false; + assert(isa(sa) || + isa(sa)); + return sa->getOffset(ra.addend) == sb->getOffset(rb.addend); } } - return true; }; return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f); @@ -207,11 +205,15 @@ void ICF::run() { if (auto *dylibSym = dyn_cast(sym)) hash += dylibSym->stubsHelperIndex; else if (auto *defined = dyn_cast(sym)) { - hash += defined->value; - if (defined->isec) - if (auto *isec = cast(defined->isec)) - hash += isec->icfEqClass[icfPass % 2]; - // FIXME: implement ICF for other InputSection kinds + if (defined->isec) { + if (auto isec = dyn_cast(defined->isec)) + hash += defined->value + isec->icfEqClass[icfPass % 2]; + else + hash += defined->isec->kind() + + defined->isec->getOffset(defined->value); + } else { + hash += defined->value; + } } else llvm_unreachable("foldIdenticalSections symbol kind"); } diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 78a7f00a18c5c..a961807abd230 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -62,9 +62,7 @@ bool ConcatInputSection::isHashableForICF(bool isText) const { case S_8BYTE_LITERALS: case S_16BYTE_LITERALS: case S_LITERAL_POINTERS: - // FIXME(jezng): We should not have any ConcatInputSections of these types - // when running ICF. - return false; + llvm_unreachable("found unexpected literal type in ConcatInputSection"); case S_ZEROFILL: case S_GB_ZEROFILL: case S_NON_LAZY_SYMBOL_POINTERS: diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index b115f43594f31..ebff0d5813a02 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -55,7 +55,7 @@ def time_trace_file_eq: Joined<["--"], "time-trace-file=">, HelpText<"Specify time trace output file">, Group; def deduplicate_literals: Flag<["--"], "deduplicate-literals">, - HelpText<"Enable literal deduplication">, + HelpText<"Enable literal deduplication. This is implied by --icf={safe,all}">, Group; def print_dylib_search: Flag<["--"], "print-dylib-search">, HelpText<"Print which paths lld searched when trying to find dylibs">, diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 563b6e2ab605a..81fec04275295 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1188,7 +1188,7 @@ void CStringSection::addInput(CStringInputSection *isec) { inputs.push_back(isec); } -void CStringSection::finalize() { +void CStringSection::finalizeContents() { // Add all string pieces to the string table builder to create section // contents. for (const CStringInputSection *isec : inputs) diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 3b2605871009a..a5f6ea9a6e1f4 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -523,7 +523,7 @@ class CStringSection final : public SyntheticSection { CStringSection(); void addInput(CStringInputSection *); uint64_t getSize() const override { return builder.getSize(); } - void finalize() override; + void finalizeContents(); bool isNeeded() const override { return !inputs.empty(); } void writeTo(uint8_t *buf) const override { builder.write(buf); } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 5dab4d1aa3145..9dca3416875b6 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -52,6 +52,7 @@ class Writer { void scanSymbols(); template void createOutputSections(); template void createLoadCommands(); + void foldIdenticalLiterals(); void foldIdenticalSections(); void finalizeAddresses(); void finalizeLinkEditSegment(); @@ -942,6 +943,12 @@ template void Writer::createOutputSections() { linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); } +void Writer::foldIdenticalLiterals() { + if (in.cStringSection) + in.cStringSection->finalizeContents(); + // TODO: WordLiteralSection & CFStringSection should be finalized here too +} + void Writer::foldIdenticalSections() { if (config->icfLevel == ICFLevel::none) return; @@ -973,8 +980,8 @@ void Writer::foldIdenticalSections() { else concatIsec->icfEqClass[0] = ++icfUniqueID; } - // FIXME: hash literal sections here? } + // FIXME: hash literal sections here too? parallelForEach(hashable, [](ConcatInputSection *isec) { isec->hashForICF(); }); // Now that every input section is either hashed or marked as unique, @@ -1118,6 +1125,9 @@ template void Writer::run() { in.stubHelper->setup(); scanSymbols(); createOutputSections(); + // ICF assumes that all literals have been folded already, so we must run + // foldIdenticalLiterals before foldIdenticalSections. + foldIdenticalLiterals(); foldIdenticalSections(); // After this point, we create no new segments; HOWEVER, we might // yet create branch-range extension thunks for architectures whose diff --git a/lld/test/MachO/icf-literals.s b/lld/test/MachO/icf-literals.s new file mode 100644 index 0000000000000..dbe0490dd6848 --- /dev/null +++ b/lld/test/MachO/icf-literals.s @@ -0,0 +1,86 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; mkdir %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: %lld -lSystem --icf=all -o %t/test %t/test.o +# RUN: llvm-objdump --macho --syms -d %t/test | FileCheck %s + +# CHECK: _main: +# CHECK-NEXT: callq _foo2_ref +# CHECK-NEXT: callq _foo2_ref +# CHECK-NEXT: callq _bar2_ref +# CHECK-NEXT: callq _bar2_ref +# CHECK-NEXT: callq _baz2_ref +# CHECK-NEXT: callq _baz2_ref +# CHECK-NEXT: callq _qux2_ref +# CHECK-NEXT: callq _qux2_ref + +# CHECK: [[#%.16x,FOO:]] l O __TEXT,__cstring _foo1 +# CHECK-NEXT: [[#%.16x,FOO:]] l O __TEXT,__cstring _foo2 +# CHECK-NEXT: [[#%.16x,BAR:]] l O __TEXT,__cstring _bar1 +# CHECK-NEXT: [[#%.16x,BAR:]] l O __TEXT,__cstring _bar2 +# CHECK-NEXT: [[#%.16x,BAZ:]] l O __TEXT,__literals _baz1 +# CHECK-NEXT: [[#%.16x,BAZ:]] l O __TEXT,__literals _baz2 +# CHECK-NEXT: [[#%.16x,QUX:]] l O __TEXT,__literals _qux1 +# CHECK-NEXT: [[#%.16x,QUX:]] l O __TEXT,__literals _qux2 +# CHECK-NEXT: [[#%.16x,FOO_REF:]] l F __TEXT,__text _foo1_ref +# CHECK-NEXT: [[#%.16x,FOO_REF:]] l F __TEXT,__text _foo2_ref +# CHECK-NEXT: [[#%.16x,BAR_REF:]] l F __TEXT,__text _bar1_ref +# CHECK-NEXT: [[#%.16x,BAR_REF:]] l F __TEXT,__text _bar2_ref +# CHECK-NEXT: [[#%.16x,BAZ_REF:]] l F __TEXT,__text _baz1_ref +# CHECK-NEXT: [[#%.16x,BAZ_REF:]] l F __TEXT,__text _baz2_ref +# CHECK-NEXT: [[#%.16x,QUX_REF:]] l F __TEXT,__text _qux1_ref +# CHECK-NEXT: [[#%.16x,QUX_REF:]] l F __TEXT,__text _qux2_ref + +## _foo1 vs _bar1: same section, different offsets +## _foo1 vs _baz1: same offset, different sections + +.cstring +_foo1: + .asciz "foo" +_foo2: + .asciz "foo" +_bar1: + .asciz "bar" +_bar2: + .asciz "bar" + +.literal8 +_baz1: + .quad 0xdead +_baz2: + .quad 0xdead +_qux1: + .quad 0xbeef +_qux2: + .quad 0xbeef + +.text +_foo1_ref: + mov _foo1@GOTPCREL(%rip), %rax +_foo2_ref: + mov _foo2@GOTPCREL(%rip), %rax +_bar1_ref: + mov _bar1@GOTPCREL(%rip), %rax +_bar2_ref: + mov _bar2@GOTPCREL(%rip), %rax +_baz1_ref: + mov _baz1@GOTPCREL(%rip), %rax +_baz2_ref: + mov _baz2@GOTPCREL(%rip), %rax +_qux1_ref: + mov _qux1@GOTPCREL(%rip), %rax +_qux2_ref: + mov _qux2@GOTPCREL(%rip), %rax + +.globl _main +_main: + callq _foo1_ref + callq _foo2_ref + callq _bar1_ref + callq _bar2_ref + callq _baz1_ref + callq _baz2_ref + callq _qux1_ref + callq _qux2_ref + +.subsections_via_symbols From 74d5f30d83f44ff3835a983bcce038f334cdbd6d Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Mon, 28 Jun 2021 14:43:36 -0400 Subject: [PATCH 090/619] [lld-macho][nfc] Add absolute-vs-non-absolute symbol test for ICF Make sure we don't wrongly fold two sections that refer to symbols with the same value if they are not both absolute / non-absolute. Reviewed By: #lld-macho, gkm Differential Revision: https://reviews.llvm.org/D104876 --- lld/test/MachO/icf.s | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s index 0fedb5df77110..4367646d520a8 100644 --- a/lld/test/MachO/icf.s +++ b/lld/test/MachO/icf.s @@ -13,6 +13,7 @@ # CHECK: [[#%x,A]] g F __TEXT,__text _a2 # CHECK: [[#%x,A]] g F __TEXT,__text _a3 # CHECK: [[#%x,B:]] g F __TEXT,__text _b +# CHECK: [[#%x,B2:]] g F __TEXT,__text _b2 # CHECK: [[#%x,C:]] g F __TEXT,__text _c # CHECK: [[#%x,D:]] g F __TEXT,__text _d # CHECK: [[#%x,E:]] g F __TEXT,__text _e @@ -34,6 +35,7 @@ # CHECK-NEXT: callq 0x[[#%x,A]] <_a3> # CHECK-NEXT: callq 0x[[#%x,A]] <_a3> # CHECK-NEXT: callq 0x[[#%x,B]] <_b> +# CHECK-NEXT: callq 0x[[#%x,B2]] <_b2> # CHECK-NEXT: callq 0x[[#%x,C]] <_c> # CHECK-NEXT: callq 0x[[#%x,D]] <_d> # CHECK-NEXT: callq 0x[[#%x,E]] <_e> @@ -53,14 +55,20 @@ ### TODO: ### * Fold: funcs only differ in alignment ### * No fold: func is weak? preemptable? +### * Test that we hash things appropriately w/ minimal collisions #--- abs.s .subsections_via_symbols -.globl _abs1a, _abs1b, _abs2 -_abs1a = 0xfeedfac3 -_abs1b = 0xfeedfac3 -_abs2 = 0xfeedf00d +.globl _abs1a, _abs1b, _abs2, _not_abs +_abs1a = 0xfac3 +_abs1b = 0xfac3 +_abs2 = 0xf00d + +.data +.space 0xfac3 +## _not_abs has the same Defined::value as _abs1{a,b} +_not_abs: #--- main.s .subsections_via_symbols @@ -116,6 +124,18 @@ _b: movl $0, %eax ret +### No fold: _not_abs has the same value as _abs1{a,b}, but is not absolute. + +.globl _b2 +.p2align 2 +_b2: + callq _d + mov ___nan@GOTPCREL(%rip), %rax + callq ___isnan + movabs $_not_abs, %rdx + movl $0, %eax + ret + ### No fold: _c has slightly different body from _a1 & _a2 .globl _c @@ -282,6 +302,7 @@ _main: callq _a2 callq _a3 callq _b + callq _b2 callq _c callq _d callq _e From bf457919f2db496b8fbca0a3f5f25b33c4e9b8f1 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Sat, 26 Jun 2021 17:38:25 -0400 Subject: [PATCH 091/619] [lld-macho][nfc] Remove unnecessary dyn_cast and simplify code --- lld/MachO/UnwindInfoSection.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 6e9f4ff92aa27..f4bd08d2f3cbc 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -231,7 +231,7 @@ relocateCompactUnwind(ConcatOutputSection *compactUnwindSection, memcpy(buf, isec->data.data(), isec->data.size()); for (const Reloc &r : isec->relocs) { - uint64_t referentVA = 0; + uint64_t referentVA = UINT64_MAX; // Tombstone value if (auto *referentSym = r.referent.dyn_cast()) { if (!isa(referentSym)) { assert(referentSym->isInGot()); @@ -242,14 +242,12 @@ relocateCompactUnwind(ConcatOutputSection *compactUnwindSection, // that we can distinguish the null pointer case. referentVA = referentSym->gotIndex + 1; } - } else if (auto *referentIsec = r.referent.dyn_cast()) { + } else { + auto *referentIsec = r.referent.get(); ConcatInputSection *concatIsec = checkTextSegment(referentIsec); - if (concatIsec->shouldOmitFromOutput()) - referentVA = UINT64_MAX; // Tombstone value - else + if (!concatIsec->shouldOmitFromOutput()) referentVA = referentIsec->getVA(r.addend); } - writeAddress(buf + r.offset, referentVA, r.length); } } From 280593bd3ff1db6d19ccb8182698dd9c816734e2 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Mon, 28 Jun 2021 13:43:02 +0530 Subject: [PATCH 092/619] [Clang] [NFC] fix CHECK lines for convergent attribute tests --- clang/test/CodeGen/convergent-functions.cpp | 8 +++++--- clang/test/CodeGenCUDA/convergent.cu | 2 +- clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/clang/test/CodeGen/convergent-functions.cpp b/clang/test/CodeGen/convergent-functions.cpp index 7ddb8d3f94501..cb8682474f931 100644 --- a/clang/test/CodeGen/convergent-functions.cpp +++ b/clang/test/CodeGen/convergent-functions.cpp @@ -1,8 +1,10 @@ -// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -fconvergent-functions -o - < %s | FileCheck -check-prefix=CONVFUNC %s -// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -o - < %s | FileCheck -check-prefix=NOCONVFUNC %s +// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -fconvergent-functions -o - < %s | FileCheck -check-prefixes=CHECK,CONVFUNC %s +// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -o - < %s | FileCheck -check-prefixes=CHECK,NOCONVFUNC %s // Test that the -fconvergent-functions flag works -// CONVFUNC: attributes #0 = { convergent {{.*}} } +// CHECK: attributes #0 = { // NOCONVFUNC-NOT: convergent +// CONVFUNC-SAME: convergent +// CHECK-SAME: } void func() { } diff --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu index ff18f92ef1eae..5d98d4ba69262 100644 --- a/clang/test/CodeGenCUDA/convergent.cu +++ b/clang/test/CodeGenCUDA/convergent.cu @@ -42,4 +42,4 @@ __host__ __device__ void bar() { // HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] // HOST: attributes [[BAZ_ATTR]] = { // HOST-NOT: convergent -// NOST-SAME: } +// HOST-SAME: } diff --git a/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip b/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip index 9e3e436200fc3..ee4c585cb5d7c 100644 --- a/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip +++ b/clang/test/CodeGenCUDA/dft-func-attr-skip-intrinsic.hip @@ -15,4 +15,4 @@ __device__ float foo(float x) { // CHECK: attributes [[ATTR1]] = { convergent // CHECK: attributes [[ATTR2]] = { // CHECK-NOT: convergent -// CHECK: } +// CHECK-SAME: } From 614b46e4dcab0d095e05f8b4da45ef935b7b86b4 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Mon, 28 Jun 2021 18:11:22 +0200 Subject: [PATCH 093/619] [clangd] Add a flag to disable formatting of tweak edits Some tweaks might edit file types not supported by clang-format. This patch gives them a way to signal that they do not require formatting. Differential Revision: https://reviews.llvm.org/D105039 --- clang-tools-extra/clangd/ClangdServer.cpp | 4 +- clang-tools-extra/clangd/refactor/Tweak.h | 3 + .../clangd/unittests/ClangdTests.cpp | 57 +++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 0f525f3b9a0a4..1e722086e2e04 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -637,8 +637,8 @@ void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID, Effect = T.takeError(); } assert(Effect.hasValue() && "Expected at least one selection"); - if (*Effect) { - // Tweaks don't apply clang-format, do that centrally here. + if (*Effect && (*Effect)->FormatEdits) { + // Format tweaks that require it centrally here. for (auto &It : (*Effect)->ApplyEdits) { Edit &E = It.second; format::FormatStyle Style = diff --git a/clang-tools-extra/clangd/refactor/Tweak.h b/clang-tools-extra/clangd/refactor/Tweak.h index 60ee34d138d6b..5b2d9cc80d9fd 100644 --- a/clang-tools-extra/clangd/refactor/Tweak.h +++ b/clang-tools-extra/clangd/refactor/Tweak.h @@ -78,6 +78,9 @@ class Tweak { /// A message to be displayed to the user. llvm::Optional ShowMessage; FileEdits ApplyEdits; + /// Whether the edits should be formatted before presenting to the client. + /// Note that it applies to all files. + bool FormatEdits = true; static Effect showMessage(StringRef S) { Effect E; diff --git a/clang-tools-extra/clangd/unittests/ClangdTests.cpp b/clang-tools-extra/clangd/unittests/ClangdTests.cpp index 49e1f7aa93b67..07f5da1fbc52f 100644 --- a/clang-tools-extra/clangd/unittests/ClangdTests.cpp +++ b/clang-tools-extra/clangd/unittests/ClangdTests.cpp @@ -18,12 +18,14 @@ #include "TestTU.h" #include "TidyProvider.h" #include "URI.h" +#include "refactor/Tweak.h" #include "support/MemoryTree.h" #include "support/Path.h" #include "support/Threading.h" #include "clang/Config/config.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Tooling/ArgumentsAdjusters.h" +#include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" @@ -31,6 +33,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #include "llvm/Support/VirtualFileSystem.h" @@ -1259,6 +1262,60 @@ TEST(ClangdServer, MemoryUsageTest) { ASSERT_TRUE(MT.children().count("tuscheduler")); EXPECT_TRUE(MT.child("tuscheduler").children().count(FooCpp)); } + +TEST(ClangdServer, RespectsTweakFormatting) { + static constexpr const char *TweakID = "ModuleTweak"; + static constexpr const char *NewContents = "{not;\nformatted;}"; + + // Contributes a tweak that generates a non-formatted insertion and disables + // formatting. + struct TweakContributingModule final : public FeatureModule { + struct ModuleTweak final : public Tweak { + const char *id() const override { return TweakID; } + bool prepare(const Selection &Sel) override { return true; } + Expected apply(const Selection &Sel) override { + auto &SM = Sel.AST->getSourceManager(); + llvm::StringRef FilePath = SM.getFilename(Sel.Cursor); + tooling::Replacements Reps; + llvm::cantFail( + Reps.add(tooling::Replacement(FilePath, 0, 0, NewContents))); + auto E = llvm::cantFail(Effect::mainFileEdit(SM, std::move(Reps))); + E.FormatEdits = false; + return E; + } + std::string title() const override { return id(); } + llvm::StringLiteral kind() const override { + return llvm::StringLiteral(""); + }; + }; + + void contributeTweaks(std::vector> &Out) override { + Out.emplace_back(new ModuleTweak); + } + }; + + MockFS FS; + MockCompilationDatabase CDB; + auto Opts = ClangdServer::optsForTest(); + FeatureModuleSet Set; + Set.add(std::make_unique()); + Opts.FeatureModules = &Set; + ClangdServer Server(CDB, FS, Opts); + + auto FooCpp = testPath("foo.cpp"); + Server.addDocument(FooCpp, ""); + ASSERT_TRUE(Server.blockUntilIdleForTest()); + + // Ensure that disabled formatting is respected. + Notification N; + Server.applyTweak(FooCpp, {}, TweakID, [&](llvm::Expected E) { + ASSERT_TRUE(static_cast(E)); + EXPECT_THAT(llvm::cantFail(E->ApplyEdits.lookup(FooCpp).apply()), + NewContents); + N.notify(); + }); + N.wait(); +} } // namespace } // namespace clangd } // namespace clang From 2dbe1c675fe94eeb7973dcc25b049d25f4ca4fa0 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Mon, 28 Jun 2021 15:09:27 -0400 Subject: [PATCH 094/619] [clang][PATCH][nfc] Refactor TargetInfo::adjust to pass DiagnosticsEngine to allow diagnostics on target-unsupported options Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D104729 --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/lib/Basic/TargetInfo.cpp | 2 +- clang/lib/Basic/Targets/AMDGPU.cpp | 4 ++-- clang/lib/Basic/Targets/AMDGPU.h | 2 +- clang/lib/Basic/Targets/PPC.cpp | 4 ++-- clang/lib/Basic/Targets/PPC.h | 2 +- clang/lib/Basic/Targets/SPIR.h | 4 ++-- clang/lib/Basic/Targets/WebAssembly.cpp | 3 ++- clang/lib/Basic/Targets/WebAssembly.h | 2 +- clang/lib/Frontend/ASTUnit.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 4 ++-- clang/lib/Interpreter/Interpreter.cpp | 2 +- clang/tools/clang-import-test/clang-import-test.cpp | 2 +- .../Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp | 3 ++- .../Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp | 2 +- 15 files changed, 21 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index d59bad30e7428..20f6afa76cbb3 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1162,7 +1162,7 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. - virtual void adjust(LangOptions &Opts); + virtual void adjust(DiagnosticsEngine &Diags, LangOptions &Opts); /// Adjust target options based on codegen options. virtual void adjustTargetOptions(const CodeGenOptions &CGOpts, diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index e73b4a3a40c74..4c2859e5eda7f 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -346,7 +346,7 @@ bool TargetInfo::isTypeSigned(IntType T) { /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. -void TargetInfo::adjust(LangOptions &Opts) { +void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { if (Opts.NoBitFieldTypeAlign) UseBitFieldTypeAlignment = false; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 595132e2e70ba..fac786dbcf9e2 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -358,8 +358,8 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; } -void AMDGPUTargetInfo::adjust(LangOptions &Opts) { - TargetInfo::adjust(Opts); +void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { + TargetInfo::adjust(Diags, Opts); // ToDo: There are still a few places using default address space as private // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL // can be removed from the following line. diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index fe5c61c6ba2bb..244a6e0446905 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -93,7 +93,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { void setAddressSpaceMap(bool DefaultIsPrivate); - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; uint64_t getPointerWidthV(unsigned AddrSpace) const override { if (isR600(getTriple())) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 6860b5e5d02fa..d431dda970222 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -614,10 +614,10 @@ void PPCTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); } -void PPCTargetInfo::adjust(LangOptions &Opts) { +void PPCTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { if (HasAltivec) Opts.AltiVec = 1; - TargetInfo::adjust(Opts); + TargetInfo::adjust(Diags, Opts); if (LongDoubleFormat != &llvm::APFloat::IEEEdouble()) LongDoubleFormat = Opts.PPCIEEELongDouble ? &llvm::APFloat::IEEEquad() diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 554f2174fee00..18ee1194c759d 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -89,7 +89,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { } // Set the language option for altivec based on our value. - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; // Note: GCC recognizes the following additional cpus: // 401, 403, 405, 405fp, 440fp, 464, 464fp, 476, 476fp, 505, 740, 801, diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index c429b27709ecb..50f34abd66309 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -135,8 +135,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { AddrSpaceMap = DefaultIsGeneric ? &SPIRDefIsGenMap : &SPIRDefIsPrivMap; } - void adjust(LangOptions &Opts) override { - TargetInfo::adjust(Opts); + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); // FIXME: SYCL specification considers unannotated pointers and references // to be pointing to the generic address space. See section 5.9.3 of // SYCL 2020 specification. diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 2a5055c3d534b..7ef79849cb75d 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -234,7 +234,8 @@ ArrayRef WebAssemblyTargetInfo::getTargetBuiltins() const { Builtin::FirstTSBuiltin); } -void WebAssemblyTargetInfo::adjust(LangOptions &Opts) { +void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags, + LangOptions &Opts) { // If the Atomics feature isn't available, turn off POSIXThreads and // ThreadModel, so that we don't predefine _REENTRANT or __STDCPP_THREADS__. if (!HasAtomics) { diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index 70115183e46b9..b29730c5d706b 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -138,7 +138,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool hasProtectedVisibility() const override { return false; } - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; }; class LLVM_LIBRARY_VISIBILITY WebAssembly32TargetInfo diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 988090a8b1b13..4f92833e4229c 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -588,7 +588,7 @@ class ASTInfoCollector : public ASTReaderListener { // // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - Target->adjust(LangOpt); + Target->adjust(PP.getDiagnostics(), LangOpt); // Initialize the preprocessor. PP.Initialize(*Target); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 063384130f730..2ae3be6814dec 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -142,7 +142,7 @@ bool CompilerInstance::createTarget() { // Inform the target of the language options. // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - getTarget().adjust(getLangOpts()); + getTarget().adjust(getDiagnostics(), getLangOpts()); // Adjust target options based on codegen options. getTarget().adjustTargetOptions(getCodeGenOpts(), getTargetOpts()); @@ -457,7 +457,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { getSourceManager(), *HeaderInfo, *this, /*IdentifierInfoLookup=*/nullptr, /*OwnsHeaderSearch=*/true, TUKind); - getTarget().adjust(getLangOpts()); + getTarget().adjust(getDiagnostics(), getLangOpts()); PP->Initialize(getTarget(), getAuxTarget()); if (PPOpts.DetailedRecord) diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 711a5e9ff0168..768847f9f0352 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -110,7 +110,7 @@ CreateCI(const llvm::opt::ArgStringList &Argv) { "Initialization failed. " "Target is missing"); - Clang->getTarget().adjust(Clang->getLangOpts()); + Clang->getTarget().adjust(Clang->getDiagnostics(), Clang->getLangOpts()); return std::move(Clang); } diff --git a/clang/tools/clang-import-test/clang-import-test.cpp b/clang/tools/clang-import-test/clang-import-test.cpp index df173cf49f35e..fa5d7a54f53b4 100644 --- a/clang/tools/clang-import-test/clang-import-test.cpp +++ b/clang/tools/clang-import-test/clang-import-test.cpp @@ -208,7 +208,7 @@ std::unique_ptr BuildCompilerInstance() { TargetInfo *TI = TargetInfo::CreateTargetInfo( Ins->getDiagnostics(), Ins->getInvocation().TargetOpts); Ins->setTarget(TI); - Ins->getTarget().adjust(Ins->getLangOpts()); + Ins->getTarget().adjust(Ins->getDiagnostics(), Ins->getLangOpts()); Ins->createFileManager(); Ins->createSourceManager(Ins->getFileManager()); Ins->createPreprocessor(TU_Complete); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index d38e64f9c5542..af44face09ed1 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -658,7 +658,8 @@ ClangExpressionParser::ClangExpressionParser( // // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - m_compiler->getTarget().adjust(m_compiler->getLangOpts()); + m_compiler->getTarget().adjust(m_compiler->getDiagnostics(), + m_compiler->getLangOpts()); // 6. Set up the diagnostic buffer for reporting errors diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index c337ee9f79f6b..65f8a9dcdb004 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -704,7 +704,7 @@ ClangModulesDeclVendor::Create(Target &target) { if (!instance->hasTarget()) return nullptr; - instance->getTarget().adjust(instance->getLangOpts()); + instance->getTarget().adjust(*diagnostics_engine, instance->getLangOpts()); if (!action->BeginSourceFile(*instance, instance->getFrontendOpts().Inputs[0])) From e837ce2a32369b2e9e8e5d60270c072c7dd63827 Mon Sep 17 00:00:00 2001 From: Kirill Bobyrev Date: Mon, 28 Jun 2021 21:15:00 +0200 Subject: [PATCH 095/619] [clang-tidy] Add -line-filter to run-clang-tidy.py This patch allows the use of --line-filter in clang-tidy.py from run-clang-tidy.py Author: [bansan (Vincent LE GARREC)](https://reviews.llvm.org/p/bansan/) Reviewed By: kbobyrev Differential Revision: https://reviews.llvm.org/D104981 --- clang-tools-extra/clang-tidy/tool/run-clang-tidy.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py index 313ecd2f95716..de810230b2852 100755 --- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py +++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py @@ -81,13 +81,16 @@ def make_absolute(f, directory): def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path, header_filter, allow_enabling_alpha_checkers, - extra_arg, extra_arg_before, quiet, config): + extra_arg, extra_arg_before, quiet, config, + line_filter): """Gets a command line for clang-tidy.""" start = [clang_tidy_binary, '--use-color'] if allow_enabling_alpha_checkers: start.append('-allow-enabling-analyzer-alpha-checkers') if header_filter is not None: start.append('-header-filter=' + header_filter) + if line_filter is not None: + start.append('-line-filter=' + line_filter) if checks: start.append('-checks=' + checks) if tmpdir is not None: @@ -165,7 +168,7 @@ def run_tidy(args, tmpdir, build_path, queue, lock, failed_files): tmpdir, build_path, args.header_filter, args.allow_enabling_alpha_checkers, args.extra_arg, args.extra_arg_before, - args.quiet, args.config) + args.quiet, args.config, args.line_filter) proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, err = proc.communicate() @@ -209,6 +212,9 @@ def main(): 'headers to output diagnostics from. Diagnostics from ' 'the main file of each translation unit are always ' 'displayed.') + parser.add_argument('-line-filter', default=None, + help='List of files with line ranges to filter the' + 'warnings.') if yaml: parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', help='Create a yaml file to store suggested fixes in, ' From cd0a1226b50081e86eb75a89d01e8782423971a0 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 28 Jun 2021 11:41:04 -0700 Subject: [PATCH 096/619] [flang] Fix "non-advancing" I/O, support $ in FORMAT Non-advancing I/O was failing; ExternalFileUnit was losing track of what writes had been committed to the file. Fixed. Also, support the common extension of $ and \ in a FORMAT as being equivalent to ADVANCE=NO. Differential Revision: https://reviews.llvm.org/D105046 --- flang/runtime/connection.h | 1 - flang/runtime/format-implementation.h | 2 ++ flang/runtime/format.h | 1 + flang/runtime/io-api.cpp | 9 ++++--- flang/runtime/io-stmt.cpp | 19 ++++++++----- flang/runtime/io-stmt.h | 15 ++++++----- flang/runtime/unit-map.cpp | 2 +- flang/runtime/unit.cpp | 39 +++++++++++++++++++-------- flang/runtime/unit.h | 2 ++ 9 files changed, 60 insertions(+), 30 deletions(-) diff --git a/flang/runtime/connection.h b/flang/runtime/connection.h index 6eb6b62ccab7e..6d0678f18abfa 100644 --- a/flang/runtime/connection.h +++ b/flang/runtime/connection.h @@ -49,7 +49,6 @@ struct ConnectionState : public ConnectionAttributes { std::int64_t currentRecordNumber{1}; // 1 is first std::int64_t positionInRecord{0}; // offset in current record std::int64_t furthestPositionInRecord{0}; // max(position+bytes) - bool nonAdvancing{false}; // ADVANCE='NO' // Set at end of non-advancing I/O data transfer std::optional leftTabLimit; // offset in current record diff --git a/flang/runtime/format-implementation.h b/flang/runtime/format-implementation.h index 63ca682eb3e7a..8c41a984693fa 100644 --- a/flang/runtime/format-implementation.h +++ b/flang/runtime/format-implementation.h @@ -357,6 +357,8 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { } } else if (ch == '/') { context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1); + } else if (ch == '$' || ch == '\\') { + context.mutableModes().nonAdvancing = true; } else { context.SignalError(IostatErrorInFormat, "Invalid character '%c' in FORMAT", static_cast(ch)); diff --git a/flang/runtime/format.h b/flang/runtime/format.h index 77daa38f3262e..1989aa79a98ee 100644 --- a/flang/runtime/format.h +++ b/flang/runtime/format.h @@ -35,6 +35,7 @@ struct MutableModes { char delim{'\0'}; // DELIM= short scale{0}; // kP bool inNamelist{false}; // skip ! comments + bool nonAdvancing{false}; // ADVANCE='NO', or $ or \ in FORMAT }; // A single edit descriptor extracted from a FORMAT diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index d1b13cb330eba..8996b44669dda 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -437,12 +437,13 @@ static bool YesOrNo(const char *keyword, std::size_t length, const char *what, bool IONAME(SetAdvance)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; - ConnectionState &connection{io.GetConnectionState()}; - connection.nonAdvancing = - !YesOrNo(keyword, length, "ADVANCE", io.GetIoErrorHandler()); - if (connection.nonAdvancing && connection.access == Access::Direct) { + bool nonAdvancing{ + !YesOrNo(keyword, length, "ADVANCE", io.GetIoErrorHandler())}; + if (nonAdvancing && io.GetConnectionState().access == Access::Direct) { io.GetIoErrorHandler().SignalError( "Non-advancing I/O attempted on direct access file"); + } else { + io.mutableModes().nonAdvancing = nonAdvancing; } return true; } diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 3432f847cce51..56ea6129d5501 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -203,9 +203,8 @@ MutableModes &ExternalIoStatementBase::mutableModes() { return unit_.modes; } ConnectionState &ExternalIoStatementBase::GetConnectionState() { return unit_; } int ExternalIoStatementBase::EndIoStatement() { - if (unit_.nonAdvancing) { + if (mutableModes().nonAdvancing) { unit_.leftTabLimit = unit_.furthestPositionInRecord; - unit_.nonAdvancing = false; } else { unit_.leftTabLimit.reset(); } @@ -260,14 +259,20 @@ int NoUnitIoStatementState::EndIoStatement() { return result; } +template +ExternalIoStatementState::ExternalIoStatementState( + ExternalFileUnit &unit, const char *sourceFile, int sourceLine) + : ExternalIoStatementBase{unit, sourceFile, sourceLine}, mutableModes_{ + unit.modes} {} + template int ExternalIoStatementState::EndIoStatement() { if constexpr (DIR == Direction::Input) { BeginReadingRecord(); // in case there were no I/O items - if (!unit().nonAdvancing) { + if (!mutableModes().nonAdvancing) { FinishReadingRecord(); } } else { - if (!unit().nonAdvancing) { + if (!mutableModes().nonAdvancing) { unit().AdvanceRecord(*this); } unit().FlushIfTerminal(*this); @@ -375,7 +380,7 @@ ExternalFormattedIoStatementState::ExternalFormattedIoStatementState( ExternalFileUnit &unit, const CHAR *format, std::size_t formatLength, const char *sourceFile, int sourceLine) : ExternalIoStatementState{unit, sourceFile, sourceLine}, - mutableModes_{unit.modes}, format_{*this, format, formatLength} {} + format_{*this, format, formatLength} {} template int ExternalFormattedIoStatementState::EndIoStatement() { @@ -558,7 +563,7 @@ std::optional IoStatementState::NextInField( return std::optional{' '}; } IoErrorHandler &handler{GetIoErrorHandler()}; - if (connection.nonAdvancing) { + if (mutableModes().nonAdvancing) { handler.SignalEor(); } else { handler.SignalError(IostatRecordReadOverrun); @@ -867,7 +872,7 @@ int ExternalMiscIoStatementState::EndIoStatement() { ExternalFileUnit &ext{unit()}; switch (which_) { case Flush: - ext.Flush(*this); + ext.FlushOutput(*this); std::fflush(nullptr); // flushes C stdio output streams (12.9(2)) break; case Backspace: diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 34c4a47363c0d..49964359a48ba 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -320,7 +320,9 @@ template class ExternalIoStatementState : public ExternalIoStatementBase, public IoDirectionState { public: - using ExternalIoStatementBase::ExternalIoStatementBase; + ExternalIoStatementState( + ExternalFileUnit &, const char *sourceFile = nullptr, int sourceLine = 0); + MutableModes &mutableModes() { return mutableModes_; } int EndIoStatement(); bool Emit(const char *, std::size_t, std::size_t elementBytes); bool Emit(const char *, std::size_t); @@ -333,6 +335,12 @@ class ExternalIoStatementState : public ExternalIoStatementBase, void HandleAbsolutePosition(std::int64_t); bool BeginReadingRecord(); void FinishReadingRecord(); + +private: + // These are forked from ConnectionState's modes at the beginning + // of each formatted I/O statement so they may be overridden by control + // edit descriptors during the statement. + MutableModes mutableModes_; }; template @@ -343,7 +351,6 @@ class ExternalFormattedIoStatementState : public ExternalIoStatementState, ExternalFormattedIoStatementState(ExternalFileUnit &, const CharType *format, std::size_t formatLength, const char *sourceFile = nullptr, int sourceLine = 0); - MutableModes &mutableModes() { return mutableModes_; } int EndIoStatement(); std::optional GetNextDataEdit( IoStatementState &, int maxRepeat = 1) { @@ -351,10 +358,6 @@ class ExternalFormattedIoStatementState : public ExternalIoStatementState, } private: - // These are forked from ConnectionState's modes at the beginning - // of each formatted I/O statement so they may be overridden by control - // edit descriptors during the statement. - MutableModes mutableModes_; FormatControl format_; }; diff --git a/flang/runtime/unit-map.cpp b/flang/runtime/unit-map.cpp index 915c747371850..2a7e414b3facc 100644 --- a/flang/runtime/unit-map.cpp +++ b/flang/runtime/unit-map.cpp @@ -67,7 +67,7 @@ void UnitMap::FlushAll(IoErrorHandler &handler) { CriticalSection critical{lock_}; for (int j{0}; j < buckets_; ++j) { for (Chain *p{bucket_[j].get()}; p; p = p->next.get()) { - p->unit.Flush(handler); + p->unit.FlushOutput(handler); } } } diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index aafb71fb6d73c..e1ff6e7fd2930 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -32,7 +32,7 @@ void FlushOutputOnCrash(const Terminator &terminator) { if (defaultOutput) { IoErrorHandler handler{terminator}; handler.HasIoStat(); // prevent nested crash if flush has error - defaultOutput->Flush(handler); + defaultOutput->FlushOutput(handler); } } @@ -118,7 +118,7 @@ void ExternalFileUnit::OpenUnit(std::optional status, } // Otherwise, OPEN on open unit with new FILE= implies CLOSE DoImpliedEndfile(handler); - Flush(handler); + FlushOutput(handler); Close(CloseStatus::Keep, handler); } set_path(std::move(newPath), newPathLength); @@ -168,7 +168,7 @@ void ExternalFileUnit::OpenAnonymousUnit(std::optional status, void ExternalFileUnit::CloseUnit(CloseStatus status, IoErrorHandler &handler) { DoImpliedEndfile(handler); - Flush(handler); + FlushOutput(handler); Close(status, handler); } @@ -462,12 +462,9 @@ bool ExternalFileUnit::AdvanceRecord(IoErrorHandler &handler) { ok = ok && Emit("\n", 1, 1, handler); // TODO: Windows CR+LF } } - frameOffsetInFile_ += - recordOffsetInFrame_ + recordLength.value_or(furthestPositionInRecord); - recordOffsetInFrame_ = 0; + CommitWrites(); impliedEndfile_ = true; ++currentRecordNumber; - BeginRecord(); return ok; } } @@ -499,9 +496,24 @@ void ExternalFileUnit::BackspaceRecord(IoErrorHandler &handler) { } } +void ExternalFileUnit::FlushOutput(IoErrorHandler &handler) { + if (!mayPosition()) { + auto frameAt{FrameAt()}; + if (frameOffsetInFile_ >= frameAt && + frameOffsetInFile_ < + static_cast(frameAt + FrameLength())) { + // A Flush() that's about to happen to a non-positionable file + // needs to advance frameOffsetInFile_ to prevent attempts at + // impossible seeks + CommitWrites(); + } + } + Flush(handler); +} + void ExternalFileUnit::FlushIfTerminal(IoErrorHandler &handler) { if (isTerminal()) { - Flush(handler); + FlushOutput(handler); } } @@ -533,8 +545,6 @@ void ExternalFileUnit::Rewind(IoErrorHandler &handler) { } void ExternalFileUnit::EndIoStatement() { - frameOffsetInFile_ += recordOffsetInFrame_; - recordOffsetInFrame_ = 0; io_.reset(); u_.emplace(); lock_.Drop(); @@ -585,7 +595,7 @@ void ExternalFileUnit::BeginSequentialVariableUnformattedInputRecord( void ExternalFileUnit::BeginSequentialVariableFormattedInputRecord( IoErrorHandler &handler) { if (this == defaultInput && defaultOutput) { - defaultOutput->Flush(handler); + defaultOutput->FlushOutput(handler); } std::size_t length{0}; do { @@ -701,6 +711,13 @@ void ExternalFileUnit::DoEndfile(IoErrorHandler &handler) { impliedEndfile_ = false; } +void ExternalFileUnit::CommitWrites() { + frameOffsetInFile_ += + recordOffsetInFrame_ + recordLength.value_or(furthestPositionInRecord); + recordOffsetInFrame_ = 0; + BeginRecord(); +} + ChildIo &ExternalFileUnit::PushChildIo(IoStatementState &parent) { OwningPtr current{std::move(child_)}; Terminator &terminator{parent.GetIoErrorHandler()}; diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h index 68876ff536399..99ba05d78bee6 100644 --- a/flang/runtime/unit.h +++ b/flang/runtime/unit.h @@ -82,6 +82,7 @@ class ExternalFileUnit : public ConnectionState, void FinishReadingRecord(IoErrorHandler &); bool AdvanceRecord(IoErrorHandler &); void BackspaceRecord(IoErrorHandler &); + void FlushOutput(IoErrorHandler &); void FlushIfTerminal(IoErrorHandler &); void Endfile(IoErrorHandler &); void Rewind(IoErrorHandler &); @@ -107,6 +108,7 @@ class ExternalFileUnit : public ConnectionState, bool SetSequentialVariableFormattedRecordLength(); void DoImpliedEndfile(IoErrorHandler &); void DoEndfile(IoErrorHandler &); + void CommitWrites(); int unitNumber_{-1}; Direction direction_{Direction::Output}; From 14a8aa615597ef0aa424ac9545906bf8b9865063 Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Mon, 28 Jun 2021 18:54:41 +0000 Subject: [PATCH 097/619] [ADT] Add makeVisitor to STLExtras.h Adds a utility to combine multiple Callables into a single Callable. This is useful to make constructing a visitor for `std::visit`-like functions more natural; functions like this will be added in future patches. Intended to supercede https://reviews.llvm.org/D99560 by perfectly-forwarding the combined Callables. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D100670 --- llvm/include/llvm/ADT/STLExtras.h | 52 ++++++++++ llvm/unittests/ADT/STLExtrasTest.cpp | 147 +++++++++++++++++++++++++++ 2 files changed, 199 insertions(+) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 430da0f42348b..372907be8a11d 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1299,6 +1299,58 @@ using is_one_of = disjunction...>; template using are_base_of = conjunction...>; +namespace detail { +template struct Visitor; + +template +struct Visitor : remove_cvref_t, Visitor { + explicit constexpr Visitor(HeadT &&Head, TailTs &&...Tail) + : remove_cvref_t(std::forward(Head)), + Visitor(std::forward(Tail)...) {} + using remove_cvref_t::operator(); + using Visitor::operator(); +}; + +template struct Visitor : remove_cvref_t { + explicit constexpr Visitor(HeadT &&Head) + : remove_cvref_t(std::forward(Head)) {} + using remove_cvref_t::operator(); +}; +} // namespace detail + +/// Returns an opaquely-typed Callable object whose operator() overload set is +/// the sum of the operator() overload sets of each CallableT in CallableTs. +/// +/// The type of the returned object derives from each CallableT in CallableTs. +/// The returned object is constructed by invoking the appropriate copy or move +/// constructor of each CallableT, as selected by overload resolution on the +/// corresponding argument to makeVisitor. +/// +/// Example: +/// +/// \code +/// auto visitor = makeVisitor([](auto) { return "unhandled type"; }, +/// [](int i) { return "int"; }, +/// [](std::string s) { return "str"; }); +/// auto a = visitor(42); // `a` is now "int". +/// auto b = visitor("foo"); // `b` is now "str". +/// auto c = visitor(3.14f); // `c` is now "unhandled type". +/// \endcode +/// +/// Example of making a visitor with a lambda which captures a move-only type: +/// +/// \code +/// std::unique_ptr FH = /* ... */; +/// auto visitor = makeVisitor( +/// [FH{std::move(FH)}](Foo F) { return FH->handle(F); }, +/// [](int i) { return i; }, +/// [](std::string s) { return atoi(s); }); +/// \endcode +template +constexpr decltype(auto) makeVisitor(CallableTs &&...Callables) { + return detail::Visitor(std::forward(Callables)...); +} + //===----------------------------------------------------------------------===// // Extra additions for arrays //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index 512c594d86322..eb87670700f7d 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -764,4 +764,151 @@ TEST(STLExtras, Unique) { EXPECT_EQ(3, V[3]); } +TEST(STLExtrasTest, TypesAreDistinct) { + EXPECT_TRUE((llvm::TypesAreDistinct<>::value)); + EXPECT_TRUE((llvm::TypesAreDistinct::value)); + EXPECT_FALSE((llvm::TypesAreDistinct::value)); + EXPECT_TRUE((llvm::TypesAreDistinct::value)); + EXPECT_FALSE((llvm::TypesAreDistinct::value)); + EXPECT_TRUE((llvm::TypesAreDistinct::value)); + EXPECT_FALSE((llvm::TypesAreDistinct::value)); + EXPECT_TRUE((llvm::TypesAreDistinct::value)); + EXPECT_TRUE((llvm::TypesAreDistinct::value)); + EXPECT_TRUE((llvm::TypesAreDistinct::value)); + EXPECT_TRUE((llvm::TypesAreDistinct::value)); +} + +TEST(STLExtrasTest, FirstIndexOfType) { + EXPECT_EQ((llvm::FirstIndexOfType::value), 0u); + EXPECT_EQ((llvm::FirstIndexOfType::value), 0u); + EXPECT_EQ((llvm::FirstIndexOfType::value), 1u); + EXPECT_EQ((llvm::FirstIndexOfType::value), + 2u); +} + +TEST(STLExtrasTest, TypeAtIndex) { + EXPECT_TRUE((std::is_same>::value)); + EXPECT_TRUE((std::is_same>::value)); + EXPECT_TRUE((std::is_same>::value)); + EXPECT_TRUE( + (std::is_same>::value)); + EXPECT_TRUE( + (std::is_same>::value)); + EXPECT_TRUE( + (std::is_same>::value)); +} + +TEST(STLExtrasTest, MakeVisitorOneCallable) { + auto IdentityLambda = [](auto X) { return X; }; + auto IdentityVisitor = makeVisitor(IdentityLambda); + EXPECT_EQ(IdentityLambda(1), IdentityVisitor(1)); + EXPECT_EQ(IdentityLambda(2.0f), IdentityVisitor(2.0f)); + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); +} + +TEST(STLExtrasTest, MakeVisitorTwoCallables) { + auto Visitor = + makeVisitor([](int) { return "int"; }, [](std::string) { return "str"; }); + EXPECT_EQ(Visitor(42), "int"); + EXPECT_EQ(Visitor("foo"), "str"); +} + +TEST(STLExtrasTest, MakeVisitorCallableMultipleOperands) { + auto Second = makeVisitor([](int I, float F) { return F; }, + [](float F, int I) { return I; }); + EXPECT_EQ(Second(1.f, 1), 1); + EXPECT_EQ(Second(1, 1.f), 1.f); +} + +TEST(STLExtrasTest, MakeVisitorDefaultCase) { + { + auto Visitor = makeVisitor([](int I) { return I + 100; }, + [](float F) { return F * 2; }, + [](auto) { return "unhandled type"; }); + EXPECT_EQ(Visitor(24), 124); + EXPECT_EQ(Visitor(2.f), 4.f); + EXPECT_EQ(Visitor(2.), "unhandled type"); + EXPECT_EQ(Visitor(Visitor), "unhandled type"); + } + { + auto Visitor = makeVisitor([](auto) { return "unhandled type"; }, + [](int I) { return I + 100; }, + [](float F) { return F * 2; }); + EXPECT_EQ(Visitor(24), 124); + EXPECT_EQ(Visitor(2.f), 4.f); + EXPECT_EQ(Visitor(2.), "unhandled type"); + EXPECT_EQ(Visitor(Visitor), "unhandled type"); + } +} + +template +struct Functor : Counted { + using Counted::Counted; + void operator()() {} +}; + +TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsPRValue) { + int Copies = 0; + int Moves = 0; + int Destructors = 0; + { + auto V = makeVisitor(Functor(Copies, Moves, Destructors)); + (void)V; + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(1, Destructors); + } + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(2, Destructors); +} + +TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsRValue) { + int Copies = 0; + int Moves = 0; + int Destructors = 0; + { + Functor F(Copies, Moves, Destructors); + { + auto V = makeVisitor(std::move(F)); + (void)V; + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(0, Destructors); + } + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(1, Destructors); + } + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(2, Destructors); +} + +TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsLValue) { + int Copies = 0; + int Moves = 0; + int Destructors = 0; + { + Functor F(Copies, Moves, Destructors); + { + auto V = makeVisitor(F); + (void)V; + EXPECT_EQ(1, Copies); + EXPECT_EQ(0, Moves); + EXPECT_EQ(0, Destructors); + } + EXPECT_EQ(1, Copies); + EXPECT_EQ(0, Moves); + EXPECT_EQ(1, Destructors); + } + EXPECT_EQ(1, Copies); + EXPECT_EQ(0, Moves); + EXPECT_EQ(2, Destructors); +} + } // namespace From 333c0acb9bb36bd89ab75abba3d368da67f7370c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 28 Jun 2021 21:24:16 +0200 Subject: [PATCH 098/619] [Verifier] Support opaque pointers for global_ctors Adjust the assertion to allow opaque pointers. --- llvm/lib/IR/Verifier.cpp | 3 ++- llvm/test/Other/force-opaque-ptrs.ll | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 24f5d51381803..6039d1cf2dc02 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -699,8 +699,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { "the third field of the element type is mandatory, " "specify i8* null to migrate from the obsoleted 2-field form"); Type *ETy = STy->getTypeAtIndex(2); + Type *Int8Ty = Type::getInt8Ty(ETy->getContext()); Assert(ETy->isPointerTy() && - cast(ETy)->getElementType()->isIntegerTy(8), + cast(ETy)->isOpaqueOrPointeeTypeMatches(Int8Ty), "wrong type for intrinsic global variable", &GV); } } diff --git a/llvm/test/Other/force-opaque-ptrs.ll b/llvm/test/Other/force-opaque-ptrs.ll index e80fdce0e6e27..fa83bb56080e5 100644 --- a/llvm/test/Other/force-opaque-ptrs.ll +++ b/llvm/test/Other/force-opaque-ptrs.ll @@ -6,6 +6,9 @@ ; CHECK: @g = external global i16 @g = external global i16 +; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr null, ptr null }] +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 1, void ()* null, i8* null }] + ; CHECK: @ga = alias i18, ptr @g2 @g2 = global i18 0 @ga = alias i18, i18* @g2 From 1d85d0879a75b9556b10f55739437af8233c0b64 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Mon, 28 Jun 2021 15:47:03 -0400 Subject: [PATCH 099/619] Revert "[clang][PATCH][nfc] Refactor TargetInfo::adjust to pass DiagnosticsEngine to allow diagnostics on target-unsupported options" This reverts commit 2dbe1c675fe94eeb7973dcc25b049d25f4ca4fa0. More buildbot failures --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/lib/Basic/TargetInfo.cpp | 2 +- clang/lib/Basic/Targets/AMDGPU.cpp | 4 ++-- clang/lib/Basic/Targets/AMDGPU.h | 2 +- clang/lib/Basic/Targets/PPC.cpp | 4 ++-- clang/lib/Basic/Targets/PPC.h | 2 +- clang/lib/Basic/Targets/SPIR.h | 4 ++-- clang/lib/Basic/Targets/WebAssembly.cpp | 3 +-- clang/lib/Basic/Targets/WebAssembly.h | 2 +- clang/lib/Frontend/ASTUnit.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 4 ++-- clang/lib/Interpreter/Interpreter.cpp | 2 +- clang/tools/clang-import-test/clang-import-test.cpp | 2 +- .../Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp | 3 +-- .../Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp | 2 +- 15 files changed, 19 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 20f6afa76cbb3..d59bad30e7428 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1162,7 +1162,7 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. - virtual void adjust(DiagnosticsEngine &Diags, LangOptions &Opts); + virtual void adjust(LangOptions &Opts); /// Adjust target options based on codegen options. virtual void adjustTargetOptions(const CodeGenOptions &CGOpts, diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 4c2859e5eda7f..e73b4a3a40c74 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -346,7 +346,7 @@ bool TargetInfo::isTypeSigned(IntType T) { /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. -void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { +void TargetInfo::adjust(LangOptions &Opts) { if (Opts.NoBitFieldTypeAlign) UseBitFieldTypeAlignment = false; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index fac786dbcf9e2..595132e2e70ba 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -358,8 +358,8 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; } -void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { - TargetInfo::adjust(Diags, Opts); +void AMDGPUTargetInfo::adjust(LangOptions &Opts) { + TargetInfo::adjust(Opts); // ToDo: There are still a few places using default address space as private // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL // can be removed from the following line. diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 244a6e0446905..fe5c61c6ba2bb 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -93,7 +93,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { void setAddressSpaceMap(bool DefaultIsPrivate); - void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; + void adjust(LangOptions &Opts) override; uint64_t getPointerWidthV(unsigned AddrSpace) const override { if (isR600(getTriple())) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index d431dda970222..6860b5e5d02fa 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -614,10 +614,10 @@ void PPCTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); } -void PPCTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { +void PPCTargetInfo::adjust(LangOptions &Opts) { if (HasAltivec) Opts.AltiVec = 1; - TargetInfo::adjust(Diags, Opts); + TargetInfo::adjust(Opts); if (LongDoubleFormat != &llvm::APFloat::IEEEdouble()) LongDoubleFormat = Opts.PPCIEEELongDouble ? &llvm::APFloat::IEEEquad() diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 18ee1194c759d..554f2174fee00 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -89,7 +89,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { } // Set the language option for altivec based on our value. - void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; + void adjust(LangOptions &Opts) override; // Note: GCC recognizes the following additional cpus: // 401, 403, 405, 405fp, 440fp, 464, 464fp, 476, 476fp, 505, 740, 801, diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 50f34abd66309..c429b27709ecb 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -135,8 +135,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { AddrSpaceMap = DefaultIsGeneric ? &SPIRDefIsGenMap : &SPIRDefIsPrivMap; } - void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { - TargetInfo::adjust(Diags, Opts); + void adjust(LangOptions &Opts) override { + TargetInfo::adjust(Opts); // FIXME: SYCL specification considers unannotated pointers and references // to be pointing to the generic address space. See section 5.9.3 of // SYCL 2020 specification. diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 7ef79849cb75d..2a5055c3d534b 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -234,8 +234,7 @@ ArrayRef WebAssemblyTargetInfo::getTargetBuiltins() const { Builtin::FirstTSBuiltin); } -void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags, - LangOptions &Opts) { +void WebAssemblyTargetInfo::adjust(LangOptions &Opts) { // If the Atomics feature isn't available, turn off POSIXThreads and // ThreadModel, so that we don't predefine _REENTRANT or __STDCPP_THREADS__. if (!HasAtomics) { diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index b29730c5d706b..70115183e46b9 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -138,7 +138,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool hasProtectedVisibility() const override { return false; } - void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; + void adjust(LangOptions &Opts) override; }; class LLVM_LIBRARY_VISIBILITY WebAssembly32TargetInfo diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 4f92833e4229c..988090a8b1b13 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -588,7 +588,7 @@ class ASTInfoCollector : public ASTReaderListener { // // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - Target->adjust(PP.getDiagnostics(), LangOpt); + Target->adjust(LangOpt); // Initialize the preprocessor. PP.Initialize(*Target); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 2ae3be6814dec..063384130f730 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -142,7 +142,7 @@ bool CompilerInstance::createTarget() { // Inform the target of the language options. // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - getTarget().adjust(getDiagnostics(), getLangOpts()); + getTarget().adjust(getLangOpts()); // Adjust target options based on codegen options. getTarget().adjustTargetOptions(getCodeGenOpts(), getTargetOpts()); @@ -457,7 +457,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { getSourceManager(), *HeaderInfo, *this, /*IdentifierInfoLookup=*/nullptr, /*OwnsHeaderSearch=*/true, TUKind); - getTarget().adjust(getDiagnostics(), getLangOpts()); + getTarget().adjust(getLangOpts()); PP->Initialize(getTarget(), getAuxTarget()); if (PPOpts.DetailedRecord) diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 768847f9f0352..711a5e9ff0168 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -110,7 +110,7 @@ CreateCI(const llvm::opt::ArgStringList &Argv) { "Initialization failed. " "Target is missing"); - Clang->getTarget().adjust(Clang->getDiagnostics(), Clang->getLangOpts()); + Clang->getTarget().adjust(Clang->getLangOpts()); return std::move(Clang); } diff --git a/clang/tools/clang-import-test/clang-import-test.cpp b/clang/tools/clang-import-test/clang-import-test.cpp index fa5d7a54f53b4..df173cf49f35e 100644 --- a/clang/tools/clang-import-test/clang-import-test.cpp +++ b/clang/tools/clang-import-test/clang-import-test.cpp @@ -208,7 +208,7 @@ std::unique_ptr BuildCompilerInstance() { TargetInfo *TI = TargetInfo::CreateTargetInfo( Ins->getDiagnostics(), Ins->getInvocation().TargetOpts); Ins->setTarget(TI); - Ins->getTarget().adjust(Ins->getDiagnostics(), Ins->getLangOpts()); + Ins->getTarget().adjust(Ins->getLangOpts()); Ins->createFileManager(); Ins->createSourceManager(Ins->getFileManager()); Ins->createPreprocessor(TU_Complete); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index af44face09ed1..d38e64f9c5542 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -658,8 +658,7 @@ ClangExpressionParser::ClangExpressionParser( // // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - m_compiler->getTarget().adjust(m_compiler->getDiagnostics(), - m_compiler->getLangOpts()); + m_compiler->getTarget().adjust(m_compiler->getLangOpts()); // 6. Set up the diagnostic buffer for reporting errors diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index 65f8a9dcdb004..c337ee9f79f6b 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -704,7 +704,7 @@ ClangModulesDeclVendor::Create(Target &target) { if (!instance->hasTarget()) return nullptr; - instance->getTarget().adjust(*diagnostics_engine, instance->getLangOpts()); + instance->getTarget().adjust(instance->getLangOpts()); if (!action->BeginSourceFile(*instance, instance->getFrontendOpts().Inputs[0])) From 61242c0addb120294211d24a97ed89837418cb36 Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Mon, 28 Jun 2021 19:51:25 +0000 Subject: [PATCH 100/619] Revert "[ADT] Add makeVisitor to STLExtras.h" This reverts commit 14a8aa615597ef0aa424ac9545906bf8b9865063. Mistakenly landed this before a patch it should depend on was accepted. --- llvm/include/llvm/ADT/STLExtras.h | 52 ---------- llvm/unittests/ADT/STLExtrasTest.cpp | 147 --------------------------- 2 files changed, 199 deletions(-) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 372907be8a11d..430da0f42348b 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1299,58 +1299,6 @@ using is_one_of = disjunction...>; template using are_base_of = conjunction...>; -namespace detail { -template struct Visitor; - -template -struct Visitor : remove_cvref_t, Visitor { - explicit constexpr Visitor(HeadT &&Head, TailTs &&...Tail) - : remove_cvref_t(std::forward(Head)), - Visitor(std::forward(Tail)...) {} - using remove_cvref_t::operator(); - using Visitor::operator(); -}; - -template struct Visitor : remove_cvref_t { - explicit constexpr Visitor(HeadT &&Head) - : remove_cvref_t(std::forward(Head)) {} - using remove_cvref_t::operator(); -}; -} // namespace detail - -/// Returns an opaquely-typed Callable object whose operator() overload set is -/// the sum of the operator() overload sets of each CallableT in CallableTs. -/// -/// The type of the returned object derives from each CallableT in CallableTs. -/// The returned object is constructed by invoking the appropriate copy or move -/// constructor of each CallableT, as selected by overload resolution on the -/// corresponding argument to makeVisitor. -/// -/// Example: -/// -/// \code -/// auto visitor = makeVisitor([](auto) { return "unhandled type"; }, -/// [](int i) { return "int"; }, -/// [](std::string s) { return "str"; }); -/// auto a = visitor(42); // `a` is now "int". -/// auto b = visitor("foo"); // `b` is now "str". -/// auto c = visitor(3.14f); // `c` is now "unhandled type". -/// \endcode -/// -/// Example of making a visitor with a lambda which captures a move-only type: -/// -/// \code -/// std::unique_ptr FH = /* ... */; -/// auto visitor = makeVisitor( -/// [FH{std::move(FH)}](Foo F) { return FH->handle(F); }, -/// [](int i) { return i; }, -/// [](std::string s) { return atoi(s); }); -/// \endcode -template -constexpr decltype(auto) makeVisitor(CallableTs &&...Callables) { - return detail::Visitor(std::forward(Callables)...); -} - //===----------------------------------------------------------------------===// // Extra additions for arrays //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index eb87670700f7d..512c594d86322 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -764,151 +764,4 @@ TEST(STLExtras, Unique) { EXPECT_EQ(3, V[3]); } -TEST(STLExtrasTest, TypesAreDistinct) { - EXPECT_TRUE((llvm::TypesAreDistinct<>::value)); - EXPECT_TRUE((llvm::TypesAreDistinct::value)); - EXPECT_FALSE((llvm::TypesAreDistinct::value)); - EXPECT_TRUE((llvm::TypesAreDistinct::value)); - EXPECT_FALSE((llvm::TypesAreDistinct::value)); - EXPECT_TRUE((llvm::TypesAreDistinct::value)); - EXPECT_FALSE((llvm::TypesAreDistinct::value)); - EXPECT_TRUE((llvm::TypesAreDistinct::value)); - EXPECT_TRUE((llvm::TypesAreDistinct::value)); - EXPECT_TRUE((llvm::TypesAreDistinct::value)); - EXPECT_TRUE((llvm::TypesAreDistinct::value)); -} - -TEST(STLExtrasTest, FirstIndexOfType) { - EXPECT_EQ((llvm::FirstIndexOfType::value), 0u); - EXPECT_EQ((llvm::FirstIndexOfType::value), 0u); - EXPECT_EQ((llvm::FirstIndexOfType::value), 1u); - EXPECT_EQ((llvm::FirstIndexOfType::value), - 2u); -} - -TEST(STLExtrasTest, TypeAtIndex) { - EXPECT_TRUE((std::is_same>::value)); - EXPECT_TRUE((std::is_same>::value)); - EXPECT_TRUE((std::is_same>::value)); - EXPECT_TRUE( - (std::is_same>::value)); - EXPECT_TRUE( - (std::is_same>::value)); - EXPECT_TRUE( - (std::is_same>::value)); -} - -TEST(STLExtrasTest, MakeVisitorOneCallable) { - auto IdentityLambda = [](auto X) { return X; }; - auto IdentityVisitor = makeVisitor(IdentityLambda); - EXPECT_EQ(IdentityLambda(1), IdentityVisitor(1)); - EXPECT_EQ(IdentityLambda(2.0f), IdentityVisitor(2.0f)); - EXPECT_TRUE((std::is_same::value)); - EXPECT_TRUE((std::is_same::value)); -} - -TEST(STLExtrasTest, MakeVisitorTwoCallables) { - auto Visitor = - makeVisitor([](int) { return "int"; }, [](std::string) { return "str"; }); - EXPECT_EQ(Visitor(42), "int"); - EXPECT_EQ(Visitor("foo"), "str"); -} - -TEST(STLExtrasTest, MakeVisitorCallableMultipleOperands) { - auto Second = makeVisitor([](int I, float F) { return F; }, - [](float F, int I) { return I; }); - EXPECT_EQ(Second(1.f, 1), 1); - EXPECT_EQ(Second(1, 1.f), 1.f); -} - -TEST(STLExtrasTest, MakeVisitorDefaultCase) { - { - auto Visitor = makeVisitor([](int I) { return I + 100; }, - [](float F) { return F * 2; }, - [](auto) { return "unhandled type"; }); - EXPECT_EQ(Visitor(24), 124); - EXPECT_EQ(Visitor(2.f), 4.f); - EXPECT_EQ(Visitor(2.), "unhandled type"); - EXPECT_EQ(Visitor(Visitor), "unhandled type"); - } - { - auto Visitor = makeVisitor([](auto) { return "unhandled type"; }, - [](int I) { return I + 100; }, - [](float F) { return F * 2; }); - EXPECT_EQ(Visitor(24), 124); - EXPECT_EQ(Visitor(2.f), 4.f); - EXPECT_EQ(Visitor(2.), "unhandled type"); - EXPECT_EQ(Visitor(Visitor), "unhandled type"); - } -} - -template -struct Functor : Counted { - using Counted::Counted; - void operator()() {} -}; - -TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsPRValue) { - int Copies = 0; - int Moves = 0; - int Destructors = 0; - { - auto V = makeVisitor(Functor(Copies, Moves, Destructors)); - (void)V; - EXPECT_EQ(0, Copies); - EXPECT_EQ(1, Moves); - EXPECT_EQ(1, Destructors); - } - EXPECT_EQ(0, Copies); - EXPECT_EQ(1, Moves); - EXPECT_EQ(2, Destructors); -} - -TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsRValue) { - int Copies = 0; - int Moves = 0; - int Destructors = 0; - { - Functor F(Copies, Moves, Destructors); - { - auto V = makeVisitor(std::move(F)); - (void)V; - EXPECT_EQ(0, Copies); - EXPECT_EQ(1, Moves); - EXPECT_EQ(0, Destructors); - } - EXPECT_EQ(0, Copies); - EXPECT_EQ(1, Moves); - EXPECT_EQ(1, Destructors); - } - EXPECT_EQ(0, Copies); - EXPECT_EQ(1, Moves); - EXPECT_EQ(2, Destructors); -} - -TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsLValue) { - int Copies = 0; - int Moves = 0; - int Destructors = 0; - { - Functor F(Copies, Moves, Destructors); - { - auto V = makeVisitor(F); - (void)V; - EXPECT_EQ(1, Copies); - EXPECT_EQ(0, Moves); - EXPECT_EQ(0, Destructors); - } - EXPECT_EQ(1, Copies); - EXPECT_EQ(0, Moves); - EXPECT_EQ(1, Destructors); - } - EXPECT_EQ(1, Copies); - EXPECT_EQ(0, Moves); - EXPECT_EQ(2, Destructors); -} - } // namespace From b0d27eb069159e21c3b62cdf011937739950eafc Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 25 Jun 2021 16:38:41 -0700 Subject: [PATCH 101/619] IR: Fix use-list-order round-tripping for br Fix the use-list-order for br instructions by setting the operands in order of their index to match the use-list-order prediction. The case where this matters is when there is a condition but the if-true and if-false branches are identical. Bug was found when reviewing failures pointed at by https://reviews.llvm.org/D104950. Fix is similar to 3cf415c6c367ced43175ebd1dc4bd9582c7f5376. Differential Revision: https://reviews.llvm.org/D104959 --- llvm/lib/IR/Instructions.cpp | 13 ++++++++----- llvm/test/Assembler/br-single-destination.ll | 11 +++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Assembler/br-single-destination.ll diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index f02de8eed21b0..2a41fde8666ed 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1247,9 +1247,10 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br, OperandTraits::op_end(this) - 3, 3, InsertBefore) { - Op<-1>() = IfTrue; - Op<-2>() = IfFalse; + // Assign in order of operand index to make use-list order predictable. Op<-3>() = Cond; + Op<-2>() = IfFalse; + Op<-1>() = IfTrue; #ifndef NDEBUG AssertOK(); #endif @@ -1266,9 +1267,10 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(IfTrue->getContext()), Instruction::Br, OperandTraits::op_end(this) - 3, 3, InsertAtEnd) { - Op<-1>() = IfTrue; - Op<-2>() = IfFalse; + // Assign in order of operand index to make use-list order predictable. Op<-3>() = Cond; + Op<-2>() = IfFalse; + Op<-1>() = IfTrue; #ifndef NDEBUG AssertOK(); #endif @@ -1278,12 +1280,13 @@ BranchInst::BranchInst(const BranchInst &BI) : Instruction(Type::getVoidTy(BI.getContext()), Instruction::Br, OperandTraits::op_end(this) - BI.getNumOperands(), BI.getNumOperands()) { - Op<-1>() = BI.Op<-1>(); + // Assign in order of operand index to make use-list order predictable. if (BI.getNumOperands() != 1) { assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!"); Op<-3>() = BI.Op<-3>(); Op<-2>() = BI.Op<-2>(); } + Op<-1>() = BI.Op<-1>(); SubclassOptionalData = BI.SubclassOptionalData; } diff --git a/llvm/test/Assembler/br-single-destination.ll b/llvm/test/Assembler/br-single-destination.ll new file mode 100644 index 0000000000000..cf2083ee85879 --- /dev/null +++ b/llvm/test/Assembler/br-single-destination.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s -disable-output 2>&1 | FileCheck %s -allow-empty +; CHECK-NOT: error +; CHECK-NOT: warning +; RUN: verify-uselistorder < %s + +define void @f1(i1 %cmp) { +entry: + br i1 %cmp, label %branch, label %branch +branch: + unreachable +} From 6d6f35eb7b92c6dd4478834497752f4e963db16d Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Mon, 28 Jun 2021 19:54:14 +0000 Subject: [PATCH 102/619] [ADT] Add makeVisitor to STLExtras.h Relands patch reverted by 61242c0addb120294211d24a97ed89837418cb36 The original patch mistakenly included unrelated tests. Adds a utility to combine multiple Callables into a single Callable. This is useful to make constructing a visitor for `std::visit`-like functions more natural; functions like this will be added in future patches. Intended to supercede https://reviews.llvm.org/D99560 by perfectly-forwarding the combined Callables. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D100670 --- llvm/include/llvm/ADT/STLExtras.h | 52 +++++++++++++ llvm/unittests/ADT/STLExtrasTest.cpp | 112 +++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 430da0f42348b..372907be8a11d 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1299,6 +1299,58 @@ using is_one_of = disjunction...>; template using are_base_of = conjunction...>; +namespace detail { +template struct Visitor; + +template +struct Visitor : remove_cvref_t, Visitor { + explicit constexpr Visitor(HeadT &&Head, TailTs &&...Tail) + : remove_cvref_t(std::forward(Head)), + Visitor(std::forward(Tail)...) {} + using remove_cvref_t::operator(); + using Visitor::operator(); +}; + +template struct Visitor : remove_cvref_t { + explicit constexpr Visitor(HeadT &&Head) + : remove_cvref_t(std::forward(Head)) {} + using remove_cvref_t::operator(); +}; +} // namespace detail + +/// Returns an opaquely-typed Callable object whose operator() overload set is +/// the sum of the operator() overload sets of each CallableT in CallableTs. +/// +/// The type of the returned object derives from each CallableT in CallableTs. +/// The returned object is constructed by invoking the appropriate copy or move +/// constructor of each CallableT, as selected by overload resolution on the +/// corresponding argument to makeVisitor. +/// +/// Example: +/// +/// \code +/// auto visitor = makeVisitor([](auto) { return "unhandled type"; }, +/// [](int i) { return "int"; }, +/// [](std::string s) { return "str"; }); +/// auto a = visitor(42); // `a` is now "int". +/// auto b = visitor("foo"); // `b` is now "str". +/// auto c = visitor(3.14f); // `c` is now "unhandled type". +/// \endcode +/// +/// Example of making a visitor with a lambda which captures a move-only type: +/// +/// \code +/// std::unique_ptr FH = /* ... */; +/// auto visitor = makeVisitor( +/// [FH{std::move(FH)}](Foo F) { return FH->handle(F); }, +/// [](int i) { return i; }, +/// [](std::string s) { return atoi(s); }); +/// \endcode +template +constexpr decltype(auto) makeVisitor(CallableTs &&...Callables) { + return detail::Visitor(std::forward(Callables)...); +} + //===----------------------------------------------------------------------===// // Extra additions for arrays //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index 512c594d86322..2c2b649030880 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -764,4 +764,116 @@ TEST(STLExtras, Unique) { EXPECT_EQ(3, V[3]); } +TEST(STLExtrasTest, MakeVisitorOneCallable) { + auto IdentityLambda = [](auto X) { return X; }; + auto IdentityVisitor = makeVisitor(IdentityLambda); + EXPECT_EQ(IdentityLambda(1), IdentityVisitor(1)); + EXPECT_EQ(IdentityLambda(2.0f), IdentityVisitor(2.0f)); + EXPECT_TRUE((std::is_same::value)); + EXPECT_TRUE((std::is_same::value)); +} + +TEST(STLExtrasTest, MakeVisitorTwoCallables) { + auto Visitor = + makeVisitor([](int) { return "int"; }, [](std::string) { return "str"; }); + EXPECT_EQ(Visitor(42), "int"); + EXPECT_EQ(Visitor("foo"), "str"); +} + +TEST(STLExtrasTest, MakeVisitorCallableMultipleOperands) { + auto Second = makeVisitor([](int I, float F) { return F; }, + [](float F, int I) { return I; }); + EXPECT_EQ(Second(1.f, 1), 1); + EXPECT_EQ(Second(1, 1.f), 1.f); +} + +TEST(STLExtrasTest, MakeVisitorDefaultCase) { + { + auto Visitor = makeVisitor([](int I) { return I + 100; }, + [](float F) { return F * 2; }, + [](auto) { return "unhandled type"; }); + EXPECT_EQ(Visitor(24), 124); + EXPECT_EQ(Visitor(2.f), 4.f); + EXPECT_EQ(Visitor(2.), "unhandled type"); + EXPECT_EQ(Visitor(Visitor), "unhandled type"); + } + { + auto Visitor = makeVisitor([](auto) { return "unhandled type"; }, + [](int I) { return I + 100; }, + [](float F) { return F * 2; }); + EXPECT_EQ(Visitor(24), 124); + EXPECT_EQ(Visitor(2.f), 4.f); + EXPECT_EQ(Visitor(2.), "unhandled type"); + EXPECT_EQ(Visitor(Visitor), "unhandled type"); + } +} + +template +struct Functor : Counted { + using Counted::Counted; + void operator()() {} +}; + +TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsPRValue) { + int Copies = 0; + int Moves = 0; + int Destructors = 0; + { + auto V = makeVisitor(Functor(Copies, Moves, Destructors)); + (void)V; + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(1, Destructors); + } + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(2, Destructors); +} + +TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsRValue) { + int Copies = 0; + int Moves = 0; + int Destructors = 0; + { + Functor F(Copies, Moves, Destructors); + { + auto V = makeVisitor(std::move(F)); + (void)V; + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(0, Destructors); + } + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(1, Destructors); + } + EXPECT_EQ(0, Copies); + EXPECT_EQ(1, Moves); + EXPECT_EQ(2, Destructors); +} + +TEST(STLExtrasTest, MakeVisitorLifetimeSemanticsLValue) { + int Copies = 0; + int Moves = 0; + int Destructors = 0; + { + Functor F(Copies, Moves, Destructors); + { + auto V = makeVisitor(F); + (void)V; + EXPECT_EQ(1, Copies); + EXPECT_EQ(0, Moves); + EXPECT_EQ(0, Destructors); + } + EXPECT_EQ(1, Copies); + EXPECT_EQ(0, Moves); + EXPECT_EQ(1, Destructors); + } + EXPECT_EQ(1, Copies); + EXPECT_EQ(0, Moves); + EXPECT_EQ(2, Destructors); +} + } // namespace From 7ac0442fe59dbe0f9127e79e8786a7dd6345c537 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 28 Jun 2021 21:12:12 +0200 Subject: [PATCH 103/619] [SanitizerCoverage] Support opaque pointers Pass element type rather than pointer type to some functions, so we know which type to use for the global variables. --- .../Instrumentation/SanitizerCoverage.cpp | 22 ++++++++++--------- .../SanitizerCoverage/opaque-ptr.ll | 22 +++++++++++++++++++ 2 files changed, 34 insertions(+), 10 deletions(-) create mode 100644 llvm/test/Instrumentation/SanitizerCoverage/opaque-ptr.ll diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 8bce6b82482b5..52670dad777e0 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -336,12 +336,12 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, ? GlobalVariable::ExternalLinkage : GlobalVariable::ExternalWeakLinkage; GlobalVariable *SecStart = - new GlobalVariable(M, Ty->getPointerElementType(), false, Linkage, - nullptr, getSectionStart(Section)); + new GlobalVariable(M, Ty, false, Linkage, nullptr, + getSectionStart(Section)); SecStart->setVisibility(GlobalValue::HiddenVisibility); GlobalVariable *SecEnd = - new GlobalVariable(M, Ty->getPointerElementType(), false, Linkage, - nullptr, getSectionEnd(Section)); + new GlobalVariable(M, Ty, false, Linkage, nullptr, + getSectionEnd(Section)); SecEnd->setVisibility(GlobalValue::HiddenVisibility); IRBuilder<> IRB(M.getContext()); if (!TargetTriple.isOSBinFormatCOFF()) @@ -352,7 +352,8 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy); auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr, ConstantInt::get(IntptrTy, sizeof(uint64_t))); - return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEnd); + return std::make_pair(IRB.CreatePointerCast(GEP, PointerType::getUnqual(Ty)), + SecEnd); } Function *ModuleSanitizerCoverage::CreateInitCallsForSections( @@ -362,8 +363,9 @@ Function *ModuleSanitizerCoverage::CreateInitCallsForSections( auto SecStart = SecStartEnd.first; auto SecEnd = SecStartEnd.second; Function *CtorFunc; + Type *PtrTy = PointerType::getUnqual(Ty); std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, CtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd}); + M, CtorName, InitFunctionName, {PtrTy, PtrTy}, {SecStart, SecEnd}); assert(CtorFunc->getName() == CtorName); if (TargetTriple.supportsCOMDAT()) { @@ -488,19 +490,19 @@ bool ModuleSanitizerCoverage::instrumentModule( if (FunctionGuardArray) Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName, - SanCovTracePCGuardInitName, Int32PtrTy, + SanCovTracePCGuardInitName, Int32Ty, SanCovGuardsSectionName); if (Function8bitCounterArray) Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName, - SanCov8bitCountersInitName, Int8PtrTy, + SanCov8bitCountersInitName, Int8Ty, SanCovCountersSectionName); if (FunctionBoolArray) { Ctor = CreateInitCallsForSections(M, SanCovModuleCtorBoolFlagName, - SanCovBoolFlagInitName, Int1PtrTy, + SanCovBoolFlagInitName, Int1Ty, SanCovBoolFlagSectionName); } if (Ctor && Options.PCTable) { - auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy); + auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrTy); FunctionCallee InitFunction = declareSanitizerInitFunction( M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy}); IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator()); diff --git a/llvm/test/Instrumentation/SanitizerCoverage/opaque-ptr.ll b/llvm/test/Instrumentation/SanitizerCoverage/opaque-ptr.ll new file mode 100644 index 0000000000000..f70a73b3aa8a5 --- /dev/null +++ b/llvm/test/Instrumentation/SanitizerCoverage/opaque-ptr.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -force-opaque-pointers -S | FileCheck %s + +;. +; CHECK: @[[__SANCOV_LOWEST_STACK:[a-zA-Z0-9_$"\\.-]+]] = external thread_local(initialexec) global i64 +; CHECK: @[[__SANCOV_GEN_:[a-zA-Z0-9_$"\\.-]+]] = private global [1 x i32] zeroinitializer, section "__sancov_guards", comdat($foo), align 4 +; CHECK: @[[__START___SANCOV_GUARDS:[a-zA-Z0-9_$"\\.-]+]] = extern_weak hidden global i32 +; CHECK: @[[__STOP___SANCOV_GUARDS:[a-zA-Z0-9_$"\\.-]+]] = extern_weak hidden global i32 +; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 2, ptr @sancov.module_ctor_trace_pc_guard, ptr @sancov.module_ctor_trace_pc_guard }] +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__sancov_gen_], section "llvm.metadata" +;. +define void @foo(i32* %a) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: call void @__sanitizer_cov_trace_pc_guard(ptr @__sancov_gen_) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: ret void +; + ret void +} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR1]] = { nomerge } +;. From fe3c425ae01389eb865d7d979e1ae0c53cc92740 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 28 Jun 2021 13:15:24 -0700 Subject: [PATCH 104/619] [mlir] Destroy MLIRContext thread pool when disable multi threading Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D105057 --- mlir/lib/IR/MLIRContext.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 1ae3e6c21cc51..ddb909949cdfc 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -262,7 +262,7 @@ class MLIRContextImpl { //===--------------------------------------------------------------------===// /// The thread pool to use when processing MLIR tasks in parallel. - llvm::ThreadPool threadPool; + llvm::Optional threadPool; /// This is a list of dialects that are created referring to this context. /// The MLIRContext owns the objects. @@ -334,7 +334,10 @@ class MLIRContextImpl { StringAttr emptyStringAttr; public: - MLIRContextImpl() : identifiers(identifierAllocator) {} + MLIRContextImpl() : identifiers(identifierAllocator) { + if (threadingIsEnabled) + threadPool.emplace(); + } ~MLIRContextImpl() { for (auto typeMapping : registeredTypes) typeMapping.second->~AbstractType(); @@ -573,12 +576,19 @@ void MLIRContext::disableMultithreading(bool disable) { impl->affineUniquer.disableMultithreading(disable); impl->attributeUniquer.disableMultithreading(disable); impl->typeUniquer.disableMultithreading(disable); + + // Destroy thread pool (stop all threads) if it is no longer needed, or create + // a new one if multithreading was re-enabled. + if (!impl->threadingIsEnabled) + impl->threadPool.reset(); + else if (!impl->threadPool.hasValue()) + impl->threadPool.emplace(); } llvm::ThreadPool &MLIRContext::getThreadPool() { assert(isMultithreadingEnabled() && "expected multi-threading to be enabled within the context"); - return impl->threadPool; + return *impl->threadPool; } void MLIRContext::enterMultiThreadedExecution() { From 5bc9cc1332aa042b68fb5efa9fb50eaaf2d54f79 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 28 Jun 2021 13:42:16 -0700 Subject: [PATCH 105/619] [drr] Enable specifying range in NativeCodeCall replacement. This enables creating a replacement rule where range of positional replacements need not be spelled out, or are not known (e.g., enable having a rewrite that forward all operands to a call generically). Differential Revision: https://reviews.llvm.org/D104955 --- mlir/docs/DeclarativeRewrites.md | 94 +++++++++++----------- mlir/include/mlir/TableGen/Format.h | 25 ++++-- mlir/lib/TableGen/Format.cpp | 25 +++++- mlir/test/mlir-tblgen/rewriter-indexing.td | 5 ++ 4 files changed, 98 insertions(+), 51 deletions(-) diff --git a/mlir/docs/DeclarativeRewrites.md b/mlir/docs/DeclarativeRewrites.md index 27ae161978c8a..5815035ca77e8 100644 --- a/mlir/docs/DeclarativeRewrites.md +++ b/mlir/docs/DeclarativeRewrites.md @@ -11,8 +11,8 @@ compiler build time. This manual explains in detail all of the available mechanisms for defining rewrite rules in such a declarative manner. It aims to be a specification instead of a tutorial. Please refer to -[Quickstart tutorial to adding MLIR graph -rewrite](Tutorials/QuickstartRewrites.md) for the latter. +[Quickstart tutorial to adding MLIR graph rewrite](Tutorials/QuickstartRewrites.md) +for the latter. Given that declarative rewrite rules depend on op definition specification, this manual assumes knowledge of the [ODS](OpDefinitions.md) doc. @@ -51,8 +51,8 @@ features: * Matching multi-result ops in nested patterns. * Matching and generating variadic operand/result ops in nested patterns. * Packing and unpacking variadic operands/results during generation. -* [`NativeCodeCall`](#nativecodecall-transforming-the-generated-op) - returning more than one results. +* [`NativeCodeCall`](#nativecodecall-transforming-the-generated-op) returning + more than one results. ## Rule Definition @@ -93,9 +93,9 @@ Each pattern is specified as a TableGen `dag` object with the syntax of [directives](#rewrite-directives). `argN` is for matching (if used in source pattern) or generating (if used in result pattern) the `N`-th argument for `operator`. If the `operator` is some MLIR operation, it means the `N`-th -argument as specified in the `arguments` list of the op's definition. -Therefore, we say op argument specification in pattern is **position-based**: -the position where they appear matters. +argument as specified in the `arguments` list of the op's definition. Therefore, +we say op argument specification in pattern is **position-based**: the position +where they appear matters. `argN` can be a `dag` object itself, thus we can have nested `dag` tree to model the def-use relationship between ops. @@ -245,15 +245,15 @@ the pattern by following the exact same order as the ODS `arguments` definition. Otherwise, a custom `build()` method that matches the argument list is required. Right now all ODS-generated `build()` methods require specifying the result -type(s), unless the op has known traits like `SameOperandsAndResultType` that -we can use to auto-generate a `build()` method with result type deduction. -When generating an op to replace the result of the matched root op, we can use -the matched root op's result type when calling the ODS-generated builder. -Otherwise (e.g., generating an [auxiliary op](#supporting-auxiliary-ops) or -generating an op with a nested result pattern), DRR will not be able to deduce -the result type(s). The pattern author will need to define a custom builder -that has result type deduction ability via `OpBuilder` in ODS. For example, -in the following pattern +type(s), unless the op has known traits like `SameOperandsAndResultType` that we +can use to auto-generate a `build()` method with result type deduction. When +generating an op to replace the result of the matched root op, we can use the +matched root op's result type when calling the ODS-generated builder. Otherwise +(e.g., generating an [auxiliary op](#supporting-auxiliary-ops) or generating an +op with a nested result pattern), DRR will not be able to deduce the result +type(s). The pattern author will need to define a custom builder that has result +type deduction ability via `OpBuilder` in ODS. For example, in the following +pattern ```tablegen def : Pat<(AOp $input, $attr), (COp (AOp $input, $attr) $attr)>; @@ -295,8 +295,8 @@ to replace the matched `AOp`. In the result pattern, we can bind to the result(s) of a newly built op by attaching symbols to the op. (But we **cannot** bind to op arguments given that -they are referencing previously bound symbols.) This is useful for reusing -newly created results where suitable. For example, +they are referencing previously bound symbols.) This is useful for reusing newly +created results where suitable. For example, ```tablegen def DOp : Op<"d_op"> { @@ -373,18 +373,18 @@ And make sure the generated C++ code from the above pattern has access to the definition of the C++ helper function. In the above example, we are using a string to specialize the `NativeCodeCall` -template. The string can be an arbitrary C++ expression that evaluates into -some C++ object expected at the `NativeCodeCall` site (here it would be -expecting an array attribute). Typically the string should be a function call. +template. The string can be an arbitrary C++ expression that evaluates into some +C++ object expected at the `NativeCodeCall` site (here it would be expecting an +array attribute). Typically the string should be a function call. Note that currently `NativeCodeCall` must return no more than one value or attribute. This might change in the future. ##### `NativeCodeCall` placeholders -In `NativeCodeCall`, we can use placeholders like `$_builder`, `$N`. The former -is called _special placeholder_, while the latter is called _positional -placeholder_. +In `NativeCodeCall`, we can use placeholders like `$_builder`, `$N` and `$N...`. +The former is called _special placeholder_, while the latter is called +_positional placeholder_ and _positional range placeholder_. `NativeCodeCall` right now only supports three special placeholders: `$_builder`, `$_loc`, and `$_self`: @@ -423,6 +423,11 @@ the `NativeCodeCall` use site. For example, if we define `SomeCall : NativeCodeCall<"someFn($1, $2, $0)">` and use it like `(SomeCall $in0, $in1, $in2)`, then this will be translated into C++ call `someFn($in1, $in2, $in0)`. +Positional range placeholders will be substituted by multiple `dag` object +parameters at the `NativeCodeCall` use site. For example, if we define +`SomeCall : NativeCodeCall<"someFn($1...)">` and use it like `(SomeCall $in0, +$in1, $in2)`, then this will be translated into C++ call `someFn($in1, $in2)`. + ##### Customizing entire op building `NativeCodeCall` is not only limited to transforming arguments for building an @@ -490,8 +495,8 @@ matched op. Multi-result ops bring extra complexity to declarative rewrite rules. We use TableGen `dag` objects to represent ops in patterns; there is no native way to -indicate that an op generates multiple results. The approach adopted is based -on **naming convention**: a `__N` suffix is added to a symbol to indicate the +indicate that an op generates multiple results. The approach adopted is based on +**naming convention**: a `__N` suffix is added to a symbol to indicate the `N`-th result. #### `__N` suffix @@ -541,12 +546,12 @@ The above example also shows how to replace a matched multi-result op. To replace an `N`-result op, the result patterns must generate at least `N` declared values (see [Declared vs. actual value](#declared-vs-actual-value) for -definition). If there are more than `N` declared values generated, only the -last `N` declared values will be used to replace the matched op. Note that -because of the existence of multi-result op, one result pattern **may** generate -multiple declared values. So it means we do not necessarily need `N` result -patterns to replace an `N`-result op. For example, to replace an op with three -results, you can have +definition). If there are more than `N` declared values generated, only the last +`N` declared values will be used to replace the matched op. Note that because of +the existence of multi-result op, one result pattern **may** generate multiple +declared values. So it means we do not necessarily need `N` result patterns to +replace an `N`-result op. For example, to replace an op with three results, you +can have ```tablegen // ThreeResultOp/TwoResultOp/OneResultOp generates three/two/one result(s), @@ -590,8 +595,8 @@ regarding an op's values. * _Actual operand/result/value_: an operand/result/value of an op instance at runtime -The above terms are needed because ops can have multiple results, and some of the -results can also be variadic. For example, +The above terms are needed because ops can have multiple results, and some of +the results can also be variadic. For example, ```tablegen def MultiVariadicOp : Op<"multi_variadic_op"> { @@ -611,8 +616,8 @@ def MultiVariadicOp : Op<"multi_variadic_op"> { We say the above op has 3 declared operands and 3 declared results. But at runtime, an instance can have 3 values corresponding to `$input2` and 2 values -correspond to `$output2`; we say it has 5 actual operands and 4 actual -results. A variadic operand/result is a considered as a declared value that can +correspond to `$output2`; we say it has 5 actual operands and 4 actual results. +A variadic operand/result is a considered as a declared value that can correspond to multiple actual values. [TODO] @@ -651,10 +656,10 @@ You can ### Adjusting benefits -The benefit of a `Pattern` is an integer value indicating the benefit of matching -the pattern. It determines the priorities of patterns inside the pattern rewrite -driver. A pattern with a higher benefit is applied before one with a lower -benefit. +The benefit of a `Pattern` is an integer value indicating the benefit of +matching the pattern. It determines the priorities of patterns inside the +pattern rewrite driver. A pattern with a higher benefit is applied before one +with a lower benefit. In DRR, a rule is set to have a benefit of the number of ops in the source pattern. This is based on the heuristics and assumptions that: @@ -662,7 +667,6 @@ pattern. This is based on the heuristics and assumptions that: * Larger matches are more beneficial than smaller ones. * If a smaller one is applied first the larger one may not apply anymore. - The fourth parameter to `Pattern` (and `Pat`) allows to manually tweak a pattern's benefit. Just supply `(addBenefit N)` to add `N` to the benefit value. @@ -696,8 +700,8 @@ def : Pat<(LocSrc1Op:$src1 (LocSrc2Op:$src2 ...), (LocDst1Op (LocDst2Op ..., (location $src2)), (location "outer"))>; ``` -In the above pattern, the generated `LocDst2Op` will use the matched location -of `LocSrc2Op` while the root `LocDst1Op` node will used the named location +In the above pattern, the generated `LocDst2Op` will use the matched location of +`LocSrc2Op` while the root `LocDst1Op` node will used the named location `outer`. ### `replaceWithValue` @@ -724,8 +728,8 @@ The above pattern removes the `Foo` and replaces all uses of `Foo` with ### Run `mlir-tblgen` to see the generated content -TableGen syntax sometimes can be obscure; reading the generated content can be -a very helpful way to understand and debug issues. To build `mlir-tblgen`, run +TableGen syntax sometimes can be obscure; reading the generated content can be a +very helpful way to understand and debug issues. To build `mlir-tblgen`, run `cmake --build . --target mlir-tblgen` in your build directory and find the `mlir-tblgen` binary in the `bin/` subdirectory. All the supported generators can be found via `mlir-tblgen --help`. diff --git a/mlir/include/mlir/TableGen/Format.h b/mlir/include/mlir/TableGen/Format.h index 441e05c29f264..3120f6ef5766c 100644 --- a/mlir/include/mlir/TableGen/Format.h +++ b/mlir/include/mlir/TableGen/Format.h @@ -88,22 +88,33 @@ class FmtContext { /// Struct representing a replacement segment for the formatted string. It can /// be a segment of the formatting template (for `Literal`) or a replacement -/// parameter (for `PositionalPH` and `SpecialPH`). +/// parameter (for `PositionalPH`, `PositionalRangePH` and `SpecialPH`). struct FmtReplacement { - enum class Type { Empty, Literal, PositionalPH, SpecialPH }; + enum class Type { + Empty, + Literal, + PositionalPH, + PositionalRangePH, + SpecialPH + }; FmtReplacement() = default; explicit FmtReplacement(StringRef literal) : type(Type::Literal), spec(literal) {} FmtReplacement(StringRef spec, size_t index) : type(Type::PositionalPH), spec(spec), index(index) {} + FmtReplacement(StringRef spec, size_t index, size_t end) + : type(Type::PositionalRangePH), spec(spec), index(index), end(end) {} FmtReplacement(StringRef spec, FmtContext::PHKind placeholder) : type(Type::SpecialPH), spec(spec), placeholder(placeholder) {} Type type = Type::Empty; StringRef spec; size_t index = 0; + size_t end = kUnset; FmtContext::PHKind placeholder = FmtContext::PHKind::None; + + static constexpr size_t kUnset = -1; }; class FmtObjectBase { @@ -121,7 +132,7 @@ class FmtObjectBase { // std::vector. struct CreateAdapters { template - std::vector operator()(Ts &... items) { + std::vector operator()(Ts &...items) { return std::vector{&items...}; } }; @@ -205,7 +216,8 @@ class FmtStrVecObject : public FmtObjectBase { /// /// There are two categories of placeholders accepted, both led by a '$' sign: /// -/// 1. Positional placeholder: $[0-9]+ +/// 1.a Positional placeholder: $[0-9]+ +/// 1.b Positional range placeholder: $[0-9]+... /// 2. Special placeholder: $[a-zA-Z_][a-zA-Z0-9_]* /// /// Replacement parameters for positional placeholders are supplied as the @@ -214,6 +226,9 @@ class FmtStrVecObject : public FmtObjectBase { /// can use the positional placeholders in any order and repeat any times, for /// example, "$2 $1 $1 $0" is accepted. /// +/// Replace parameters for positional range placeholders are supplied as if +/// positional placeholders were specified with commas separating them. +/// /// Replacement parameters for special placeholders are supplied using the `ctx` /// format context. /// @@ -237,7 +252,7 @@ class FmtStrVecObject : public FmtObjectBase { /// 2. This utility does not support format layout because it is rarely needed /// in C++ code generation. template -inline auto tgfmt(StringRef fmt, const FmtContext *ctx, Ts &&... vals) +inline auto tgfmt(StringRef fmt, const FmtContext *ctx, Ts &&...vals) -> FmtObject(vals))...))> { using ParamTuple = decltype(std::make_tuple( diff --git a/mlir/lib/TableGen/Format.cpp b/mlir/lib/TableGen/Format.cpp index 10834510b7674..4a0bbdf7f346c 100644 --- a/mlir/lib/TableGen/Format.cpp +++ b/mlir/lib/TableGen/Format.cpp @@ -97,7 +97,8 @@ FmtObjectBase::splitFmtSegment(StringRef fmt) { // First try to see if it's a positional placeholder, and then handle special // placeholders. - size_t end = fmt.find_if_not([](char c) { return std::isdigit(c); }, 1); + size_t end = + fmt.find_if_not([](char c) { return std::isdigit(c); }, /*From=*/1); if (end != 1) { // We have a positional placeholder. Parse the index. size_t index = 0; @@ -105,6 +106,14 @@ FmtObjectBase::splitFmtSegment(StringRef fmt) { llvm_unreachable("invalid replacement sequence index"); } + // Check if this is the part of a range specification. + if (fmt.substr(end, 3) == "...") { + // Currently only ranges without upper bound are supported. + return { + FmtReplacement{fmt.substr(0, end + 3), index, FmtReplacement::kUnset}, + fmt.substr(end + 3)}; + } + if (end == StringRef::npos) { // All the remaining characters are part of the positional placeholder. return {FmtReplacement{fmt, index}, StringRef()}; @@ -164,6 +173,20 @@ void FmtObjectBase::format(raw_ostream &s) const { continue; } + if (repl.type == FmtReplacement::Type::PositionalRangePH) { + if (repl.index >= adapters.size()) { + s << repl.spec << kMarkerForNoSubst; + continue; + } + auto range = llvm::makeArrayRef(adapters); + range = range.drop_front(repl.index); + if (repl.end != FmtReplacement::kUnset) + range = range.drop_back(adapters.size() - repl.end); + llvm::interleaveComma(range, s, + [&](auto &x) { x->format(s, /*Options=*/""); }); + continue; + } + assert(repl.type == FmtReplacement::Type::PositionalPH); if (repl.index >= adapters.size()) { diff --git a/mlir/test/mlir-tblgen/rewriter-indexing.td b/mlir/test/mlir-tblgen/rewriter-indexing.td index cbdeff9c743da..f4f055e1c0c40 100644 --- a/mlir/test/mlir-tblgen/rewriter-indexing.td +++ b/mlir/test/mlir-tblgen/rewriter-indexing.td @@ -85,3 +85,8 @@ def NativeBuilder : // CHECK: nativeCall(rewriter, odsLoc, (*v1.begin()), (*v2.begin()), (*v3.begin()), (*v4.begin()), (*v5.begin()), (*v6.begin()), (*v7.begin()), (*v8.begin()), (*v9.begin()), (*v10.begin())) def test4 : Pat<(DOp $v1, $v2, $v3, $v4, $v5, $v6, $v7, $v8, $v9, $v10), (NativeBuilder $v1, $v2, $v3, $v4, $v5, $v6, $v7, $v8, $v9, $v10)>; + +// CHECK: struct test5 : public ::mlir::RewritePattern { +// CHECK: foo(rewriter, (*v4.begin()), (*v5.begin()), (*v6.begin()), (*v7.begin()), (*v8.begin()), (*v9.begin()), (*v10.begin())) +def test5 : Pat<(DOp $v1, $v2, $v3, $v4, $v5, $v6, $v7, $v8, $v9, $v10), + (NativeCodeCall<[{ foo($_builder, $3...) }]> $v1, $v2, $v3, $v4, $v5, $v6, $v7, $v8, $v9, $v10)>; From 57ad2e106767f298a0248a5894fcb3581f533f93 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 24 Jun 2021 20:42:31 -0400 Subject: [PATCH 106/619] [OpenMP] Prevent OpenMPOpt from internalizing uncalled functions Currently OpenMPOpt will only check if a function is a kernel before deciding not to internalize it. Any uncalled function that gets internalized will be trivially dead in the module so this is unnnecessary. Depends on D102423 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D104890 --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 16 +++++++++--- .../Transforms/OpenMP/remove_globalization.ll | 25 +++++++++++++------ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 3765378634611..ef86fe04708e3 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2636,15 +2636,25 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { KernelSet Kernels = getDeviceKernels(M); - // Create internal copies of each function if this is a kernel Module. + auto IsCalled = [&](Function &F) { + if (Kernels.contains(&F)) + return true; + for (const User *U : F.users()) + if (!isa(U)) + return true; + return false; + }; + + // Create internal copies of each function if this is a kernel Module. This + // allows iterprocedural passes to see every call edge. DenseSet InternalizedFuncs; if (isOpenMPDevice(M)) for (Function &F : M) - if (!F.isDeclaration() && !Kernels.contains(&F)) + if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) if (Attributor::internalizeFunction(F, /* Force */ true)) InternalizedFuncs.insert(&F); - // Look at every function definition in the Module that wasn't internalized. + // Look at every function in the Module unless it was internalized. SmallVector SCC; for (Function &F : M) if (!F.isDeclaration() && !InternalizedFuncs.contains(&F)) diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 2edf5d3c219d2..62dc54f52b5fe 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -6,6 +6,7 @@ target triple = "nvptx64" ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured. ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. +; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack. @S = external local_unnamed_addr global i8* @@ -30,7 +31,7 @@ define internal void @foo() { ; CHECK-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !11 + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12 call void @use(i8* %0) call void @__kmpc_free_shared(i8* %0) ret void @@ -46,7 +47,7 @@ define internal void @bar() { ; CHECK-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12 + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !13 call void @share(i8* %0) call void @__kmpc_free_shared(i8* %0) ret void @@ -69,6 +70,14 @@ entry: ret void } +define void @unused() { +entry: + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !14 + call void @use(i8* %0) + call void @__kmpc_free_shared(i8* %0) + ret void +} + ; CHECK: declare i8* @__kmpc_alloc_shared(i64) declare i8* @__kmpc_alloc_shared(i64) @@ -87,8 +96,10 @@ declare void @__kmpc_free_shared(i8*) !5 = !{void ()* @kernel, !"kernel", i32 1} !6 = !{i32 7, !"openmp", i32 50} !7 = !{i32 7, !"openmp-device", i32 50} -!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -!10 = !DISubroutineType(types: !2) -!11 = !DILocation(line: 2, column: 2, scope: !8) -!12 = !DILocation(line: 4, column: 2, scope: !9) +!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!10 = distinct !DISubprogram(name: "unused", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 2, column: 2, scope: !8) +!13 = !DILocation(line: 4, column: 2, scope: !9) +!14 = !DILocation(line: 6, column: 2, scope: !9) From a00ad8599045ce7d0b5d2a64a8d22267df37bd14 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 28 Jun 2021 13:52:41 -0700 Subject: [PATCH 107/619] [Test] rewrite inline_nossp.ll While adding remark based tests in D104944, I noticed that the tests that we were passing were passing for the wrong reason. They were passing because the dynamic allocas were preventing inlining, not the code I added in D91816. Rewrite and simplify the test. Add remark based checks to validate we're preventing inline substitutions for the right reasons. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D104958 --- llvm/test/Transforms/Inline/inline_nossp.ll | 104 +++++--------------- 1 file changed, 22 insertions(+), 82 deletions(-) diff --git a/llvm/test/Transforms/Inline/inline_nossp.ll b/llvm/test/Transforms/Inline/inline_nossp.ll index dde75cc524344..586735ec81992 100644 --- a/llvm/test/Transforms/Inline/inline_nossp.ll +++ b/llvm/test/Transforms/Inline/inline_nossp.ll @@ -1,97 +1,37 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -inline -o - -S %s | FileCheck %s -; RUN: opt -passes='cgscc(inline)' %s -S | FileCheck %s +; RUN: opt -inline -o - -S %s -pass-remarks-missed=inline 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-INLINE %s +; RUN: opt -passes='cgscc(inline)' %s -S -pass-remarks-missed=inline 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-INLINE %s ; RUN: opt -always-inline -o - -S %s | FileCheck %s ; RUN: opt -passes=always-inline -o - -S %s | FileCheck %s -declare dso_local void @foo(i8*) +; CHECK-INLINE: ssp not inlined into nossp_caller because it should never be inlined (cost=never): stack protected callee but caller requested no stack protector +; CHECK-INLINE: nossp not inlined into ssp_caller because it should never be inlined (cost=never): stack protected caller but callee requested no stack protector ; Not interesting to test. -define dso_local void @ssp(i64 %0) #0 { - %2 = alloca i64, align 8 - store i64 %0, i64* %2, align 8 - %3 = load i64, i64* %2, align 8 - %4 = alloca i8, i64 %3, align 16 - call void @foo(i8* %4) - ret void -} - -; Not interesting to test. -define dso_local void @ssp_alwaysinline(i64 %0) #1 { - %2 = alloca i64, align 8 - store i64 %0, i64* %2, align 8 - %3 = load i64, i64* %2, align 8 - %4 = alloca i8, i64 %3, align 16 - call void @foo(i8* %4) - ret void -} +define i32 @nossp() { ret i32 41 } +define i32 @ssp() sspstrong { ret i32 42 } +define i32 @nossp_alwaysinline() alwaysinline { ret i32 43 } +define i32 @ssp_alwaysinline() sspstrong alwaysinline { ret i32 44 } ; @ssp should not be inlined due to mismatch stack protector. ; @ssp_alwaysinline should be inlined due to alwaysinline. -define dso_local void @nossp() { -; CHECK-LABEL: @nossp( -; CHECK-NEXT: [[TMP1:%.*]] = alloca i64, align 8 -; CHECK-NEXT: call void @ssp(i64 1024) -; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call i8* @llvm.stacksave() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[TMP1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP2]]) -; CHECK-NEXT: store i64 1024, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[TMP3]], align 16 -; CHECK-NEXT: call void @foo(i8* [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP5]]) -; CHECK-NEXT: call void @llvm.stackrestore(i8* [[SAVEDSTACK]]) -; CHECK-NEXT: ret void -; - call void @ssp(i64 1024) - call void @ssp_alwaysinline(i64 1024) - ret void -} - -; This is the same case as @nossp above. That the caller has alwaysinline is -; irrelevant. Not interesting to test. -define dso_local void @nossp_alwaysinline() #2 { - call void @ssp(i64 1024) - call void @ssp_alwaysinline(i64 1024) - ret void -} - -; @nossp_alwaysinline should be inlined due to alwaysinline. -; @ssp should not be inlined due to mismatch stack protector. -; @ssp_alwaysinline should be inlined due to alwaysinline. -define dso_local void @nossp_caller() { +define i32 @nossp_caller() { ; CHECK-LABEL: @nossp_caller( -; CHECK-NEXT: [[TMP1:%.*]] = alloca i64, align 8 -; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call i8* @llvm.stacksave() -; CHECK-NEXT: call void @ssp(i64 1024) -; CHECK-NEXT: [[SAVEDSTACK_I:%.*]] = call i8* @llvm.stacksave() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[TMP1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP2]]) -; CHECK-NEXT: store i64 1024, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[TMP3]], align 16 -; CHECK-NEXT: call void @foo(i8* [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP5]]) -; CHECK-NEXT: call void @llvm.stackrestore(i8* [[SAVEDSTACK_I]]) -; CHECK-NEXT: call void @llvm.stackrestore(i8* [[SAVEDSTACK]]) -; CHECK-NEXT: ret void +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @ssp() +; CHECK-NEXT: ret i32 44 ; - call void @nossp_alwaysinline() - ret void + call i32 @ssp() + %2 = call i32 @ssp_alwaysinline() + ret i32 %2 } - ; @nossp should not be inlined due to mismatch stack protector. -define dso_local void @ssp2() #0 { -; CHECK-LABEL: @ssp2( -; CHECK-NEXT: call void @nossp() -; CHECK-NEXT: ret void +; @nossp_alwaysinline should be inlined due to alwaysinline. +define i32 @ssp_caller() sspstrong { +; CHECK-LABEL: @ssp_caller( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @nossp() +; CHECK-NEXT: ret i32 43 ; - call void @nossp() - ret void + call i32 @nossp() + %2 = call i32 @nossp_alwaysinline() + ret i32 %2 } - -attributes #0 = { sspstrong } -attributes #1 = { sspstrong alwaysinline } -attributes #2 = { alwaysinline} From 8aee282f57f42773570fc5d29f03a03361ff7fb4 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 28 Jun 2021 13:53:55 -0700 Subject: [PATCH 108/619] [IR] remove assert since always_inline can appear on CallBase I added an assertion in D91816 (documenting behavior added in D93422) that callers and callees with mismatched fn attr's related to stack protectors should not occur unless the callee was attributed always_inline. This falls apart when a call, invoke, or callbr (any instruction inheriting from CallBase) itself has an always_inline attribute. Clang will emit such attributes on Instructions when __attribute__((flatten)) is used to recursively force inlining from a caller. Since these assertions only had the caller and callee Functions, and not the call site (CallBase derived classes), we would have to search the caller for such instructions to reconstruct the call site information. But at that point, inlining has already occurred; the call site has already been removed from the caller. Remove the assertions, add a unit test for always_inline call sites, and update the LangRef. Another curiosity is that the always_inline Attribute on Instructions is only expanded by the inline pass, not the always_inline pass. Thanks to @pcc on this report when building Android's RunTime (ART) interpreter. Reviewed By: pcc, MaskRay Differential Revision: https://reviews.llvm.org/D104944 --- llvm/docs/LangRef.rst | 10 +++++++--- llvm/lib/IR/Attributes.cpp | 10 ---------- llvm/test/Transforms/Inline/inline_nossp.ll | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 083ece600448f..243e9ea8bf7e8 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1922,7 +1922,8 @@ example: A function with the ``ssp`` attribute but without the ``alwaysinline`` attribute cannot be inlined into a function without a ``ssp/sspreq/sspstrong`` attribute. If inlined, the caller will get the - ``ssp`` attribute. + ``ssp`` attribute. ``call``, ``invoke``, and ``callbr`` instructions with + the ``alwaysinline`` attribute force inlining. ``sspstrong`` This attribute indicates that the function should emit a stack smashing protector. This attribute causes a strong heuristic to be used when @@ -1950,7 +1951,9 @@ example: A function with the ``sspstrong`` attribute but without the ``alwaysinline`` attribute cannot be inlined into a function without a ``ssp/sspstrong/sspreq`` attribute. If inlined, the caller will get the - ``sspstrong`` attribute unless the ``sspreq`` attribute exists. + ``sspstrong`` attribute unless the ``sspreq`` attribute exists. ``call``, + ``invoke``, and ``callbr`` instructions with the ``alwaysinline`` attribute + force inlining. ``sspreq`` This attribute indicates that the function should *always* emit a stack smashing protector. This overrides the ``ssp`` and ``sspstrong`` function @@ -1970,7 +1973,8 @@ example: A function with the ``sspreq`` attribute but without the ``alwaysinline`` attribute cannot be inlined into a function without a ``ssp/sspstrong/sspreq`` attribute. If inlined, the caller will get the - ``sspreq`` attribute. + ``sspreq`` attribute. ``call``, ``invoke``, and ``callbr`` instructions + with the ``alwaysinline`` attribute force inlining. ``strictfp`` This attribute indicates that the function was called from a scope that diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index daf0f178e4c47..27b5835c85583 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -2159,16 +2159,6 @@ static void setOR(Function &Caller, const Function &Callee) { /// If the inlined function had a higher stack protection level than the /// calling function, then bump up the caller's stack protection level. static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) { -#ifndef NDEBUG - if (!Callee.hasFnAttribute(Attribute::AlwaysInline)) { - assert(!(!Callee.hasStackProtectorFnAttr() && - Caller.hasStackProtectorFnAttr()) && - "stack protected caller but callee requested no stack protector"); - assert(!(!Caller.hasStackProtectorFnAttr() && - Callee.hasStackProtectorFnAttr()) && - "stack protected callee but caller requested no stack protector"); - } -#endif // If upgrading the SSP attribute, clear out the old SSP Attributes first. // Having multiple SSP attributes doesn't actually hurt, but it adds useless // clutter to the IR. diff --git a/llvm/test/Transforms/Inline/inline_nossp.ll b/llvm/test/Transforms/Inline/inline_nossp.ll index 586735ec81992..2a4c8c65f8929 100644 --- a/llvm/test/Transforms/Inline/inline_nossp.ll +++ b/llvm/test/Transforms/Inline/inline_nossp.ll @@ -35,3 +35,18 @@ define i32 @ssp_caller() sspstrong { %2 = call i32 @nossp_alwaysinline() ret i32 %2 } + +; The alwaysinline attribute can also appear on the CallBase (ie. the call +; site), ie. when __attribute__((flatten)) is used on the caller. Treat this +; the same as if the caller had the fn attr alwaysinline and permit inline +; substitution, despite the mismatch between caller and callee on ssp attrs. +; +; Curiously, the always_inline attribute on a CallInst is only expanded by the +; inline pass, but not always_inline pass! +define i32 @nossp_alwaysinline_caller() { +; CHECK-INLINE-LABEL: @nossp_alwaysinline_caller( +; CHECK-INLINE-NEXT: ret i32 42 +; + %1 = call i32 @ssp() alwaysinline + ret i32 %1 +} From 357c339ec83c94ec83314174553ef8be2bda3c8b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 28 Jun 2021 15:57:38 -0400 Subject: [PATCH 109/619] [InstCombine][test] add test for potential miscompile with constant expression; NFC (PR50906) --- .../InstCombine/indexed-gep-compares.ll | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll index 2ca2a45d5c3ff..24410a125d261 100644 --- a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll +++ b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll @@ -249,6 +249,35 @@ bb10: ret i1 %cmp } +; FIXME: +; It is not generally safe to hoist an expression (sdiv) that may trap. + +define i1 @PR50906() { +; CHECK-LABEL: @PR50906( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ icmp sgt (i32 sdiv (i32 7, i32 ptrtoint (i1 ()* @PR50906 to i32)), i32 1), [[NEXT:%.*]] ], [ icmp sgt (i32 sdiv (i32 7, i32 ptrtoint (i1 ()* @PR50906 to i32)), i32 0), [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[NEXT]] +; CHECK: next: +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + br label %loop + +loop: + %phi = phi i32 [ 0, %entry ], [ 1, %next ] + br label %next + +next: + %cmp = icmp sgt i32 sdiv (i32 7, i32 ptrtoint (i1 ()* @PR50906 to i32)), %phi + br i1 %cmp, label %exit, label %loop + +exit: + ret i1 %cmp +} declare i32 @__gxx_personality_v0(...) From 9d0bf7699c0292041b65a0c0bac371003e067ef3 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 28 Jun 2021 16:37:10 -0400 Subject: [PATCH 110/619] [InstCombine] don't try to fold a constant expression that can trap (PR50906) We could use a bigger hammer and bail out on any constant expression, but there's a regression test that appears to validly do the transform (although it may not have been intending to check that optimization). --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 2 +- llvm/test/Transforms/InstCombine/indexed-gep-compares.ll | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index e141d614851ba..e5a1b8eaf24f1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1440,7 +1440,7 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) { // icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...). Constant *C = dyn_cast(Op1); - if (!C) + if (!C || C->canTrap()) return nullptr; if (auto *Phi = dyn_cast(Op0)) diff --git a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll index 24410a125d261..d2a6c438527cd 100644 --- a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll +++ b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll @@ -249,7 +249,6 @@ bb10: ret i1 %cmp } -; FIXME: ; It is not generally safe to hoist an expression (sdiv) that may trap. define i1 @PR50906() { @@ -257,9 +256,10 @@ define i1 @PR50906() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ icmp sgt (i32 sdiv (i32 7, i32 ptrtoint (i1 ()* @PR50906 to i32)), i32 1), [[NEXT:%.*]] ], [ icmp sgt (i32 sdiv (i32 7, i32 ptrtoint (i1 ()* @PR50906 to i32)), i32 0), [[ENTRY:%.*]] ] +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[NEXT:%.*]] ] ; CHECK-NEXT: br label [[NEXT]] ; CHECK: next: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[PHI]], sdiv (i32 7, i32 ptrtoint (i1 ()* @PR50906 to i32)) ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret i1 [[CMP]] From 69046b4a79e2670053362112aa467f89faf9e53e Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 28 Jun 2021 22:20:30 +0200 Subject: [PATCH 111/619] [mlir] Skip scalar operands when tiling to linalg.tiled_loop. We are interested only in tensors/memrefs when creating a TiledLoopOp. Differential Revision: https://reviews.llvm.org/D105059 --- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 16 +++++++++++++--- mlir/test/Dialect/Linalg/tile-tensors.mlir | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 9d7286c08dedf..c82e7eb10df21 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -284,6 +284,19 @@ void GenerateLoopNest::doit( SmallVector lbs, ubs, steps; unpackRanges(loopRanges, lbs, ubs, steps); + auto dropNonShapedValues = + [](ArrayRef operands) -> SmallVector { + SmallVector filteredOperands; + for (OpOperand *operand : operands) { + Type type = operand->get().getType(); + if (type.isa()) + filteredOperands.push_back(operand->get()); + } + return filteredOperands; + }; + auto inputOperands = dropNonShapedValues(linalgOp.getInputOperands()); + auto outputOperands = dropNonShapedValues(linalgOp.getOutputOperands()); + auto wrappedBuilderFn = [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange ivs, ValueRange inputs, ValueRange outputs) { @@ -292,9 +305,6 @@ void GenerateLoopNest::doit( bodyBuilderFn(nestedBuilder, nestedLoc, ivs, outputTensors); nestedBuilder.create(nestedLoc, results); }; - - SmallVector inputOperands = linalgOp.getInputOperands(); - SmallVector outputOperands = linalgOp.getOutputOperands(); auto tiledLoop = b.create(loc, lbs, ubs, steps, inputOperands, outputOperands, b.getArrayAttr(iteratorTypes), wrappedBuilderFn); diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir index f446d9da9179d..63bddb5a16055 100644 --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -130,3 +130,17 @@ func @generic_op_tensors( // TLOOP-SAME: ins (%{{.*}} = %[[ARG_0]]: [[TY]], %{{.*}} = %[[ARG_1]]: [[TY]]) // TLOOP-SAME: outs (%{{.*}} = %[[INIT]]: [[TY]]) // TLOOP-SAME: distribution["block_x", "block_y", "none"] { + + +func @fill(%arg0 : tensor) -> tensor { + %c0 = constant 0.0 : f32 + %0 = linalg.fill(%c0, %arg0) : f32, tensor -> tensor + return %0 : tensor +} +// CHECK-LABEL: func @fill + +// TLOOP-LABEL: func @fill +// TLOOP-NOT: ins +// TLOOP: tensor.extract_slice +// TLOOP-NEXT: linalg.fill +// TLOOP-NEXT: tensor.insert_slice From c9f3240c9d94f8128371358887e8b9e5c10c122a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Jun 2021 15:21:18 -0400 Subject: [PATCH 112/619] [OpenMP][Documentation] Add OpenMPOpt optimization section Add some information about the optimizations currently provided by OpenMPOpt. Every optimization performed should eventually be listed here. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D105050 --- openmp/docs/optimizations/OpenMPOpt.rst | 100 +++++++++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/openmp/docs/optimizations/OpenMPOpt.rst b/openmp/docs/optimizations/OpenMPOpt.rst index 6606cb9ccd230..6fc942f2c79c1 100644 --- a/openmp/docs/optimizations/OpenMPOpt.rst +++ b/openmp/docs/optimizations/OpenMPOpt.rst @@ -1,13 +1,109 @@ +========================== OpenMP-Aware Optimizations ========================== +LLVM, since `version 11 `_ (12 +Oct 2020), supports an :ref:`OpenMP-Aware optimization pass `. This +optimization pass will attempt to optimize the module with OpenMP-specific +domain-knowledge. This pass is enabled by default at high optimization levels +(O2 / O3) if compiling with OpenMP support enabled. + +.. _OpenMPOpt: + OpenMPOpt ---------- +========= + +.. contents:: + :local: + :depth: 1 + +OpenMPOpt contains several OpenMP-Aware optimizations. This pass is run early on +the entire Module, and later on the entire call graph. Most optimizations done +by OpenMPOpt support remarks. Optimization remarks can be enabled by compiling +with the following flags. + +.. code-block:: console + + $ clang -Rpass=openmp-opt -Rpass-missed=openmp-opt -Rpass-analysis=openmp-opt + +OpenMP Runtime Call Deduplication +--------------------------------- + +The OpenMP runtime library contains several functions used to implement features +of the OpenMP standard. Several of the runtime calls are constant within a +parallel region. A common optimization is to replace invariant code with a +single reference, but in this case the compiler will only see an opaque call +into the runtime library. To get around this, OpenMPOpt maintains a list of +OpenMP runtime functions that are constant and will manually deduplicate them. + +Globalization +------------- + +The OpenMP standard requires that data can be shared between different threads. +This requirement poses a unique challenge when offloading to GPU accelerators. +Data cannot be shared between the threads in a GPU by default, in order to do +this it must either be placed in global or shared memory. This needs to be done +every time a variable may potentially be shared in order to create correct +OpenMP programs. Unfortunately, this has significant performance implications +and is not needed in the majority of cases. For example, when Clang is +generating code for this offloading region, it will see that the variable `x` +escapes and is potentially shared. This will require globalizing the variable, +which means it cannot reside in the registers on the device. + +.. code-block:: c++ + + void use(void *) { } + + void foo() { + int x; + use(&x); + } + + int main() { + #pragma omp target parallel + foo(); + } + +In many cases, this transformation is not actually necessary but still carries a +significant performance penalty. Because of this, OpenMPOpt can perform and +inter-procedural optimization and scan each known usage of the globalized +variable and determine if it is potentially captured and shared by another +thread. If it is not actually captured, it can safely be moved back to fast +register memory. + +Another case is memory that is intentionally shared between the threads, but is +shared from one thread to all the others. Such variables can be moved to shared +memory when compiled without needing to go through the runtime library. This +allows for users to confidently declare shared memory on the device without +needing to use custom OpenMP allocators or rely on the runtime. + + +.. code-block:: c++ + + static void share(void *); + + static void foo() { + int x[64]; + #pragma omp parallel + share(x); + } + + int main() { + #pragma omp target + foo(); + } +These optimizations can have very large performance implications. Both of these +optimizations rely heavily on inter-procedural analysis. Because of this, +offloading applications should ideally be contained in a single translation unit +and functions should not be externally visible unless needed. OpenMPOpt will +inform the user if any globalization calls remain if remarks are enabled. This +should be treated as a defect in the program. Resources ---------- +========= +- 2021 OpenMP Webinar: "A Compiler's View of OpenMP" https://youtu.be/eIMpgez61r4 - 2020 LLVM Developers’ Meeting: "(OpenMP) Parallelism-Aware Optimizations" https://youtu.be/gtxWkeLCxmU - 2019 EuroLLVM Developers’ Meeting: "Compiler Optimizations for (OpenMP) Target Offloading to GPUs" https://youtu.be/3AbS82C3X30 From 2190c48fdece1712735c507e3f343ae5b872189e Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Jun 2021 15:34:15 -0400 Subject: [PATCH 113/619] [OpenMP][Documentation] Add FAQ entry for CMake module This patch adds documentation for using the CMake find module for OpenMP target offloading provided by LLVM. It also removes the requirement for AMD's architecture to be set as this isn't necessary for upstream LLVM. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D105051 --- openmp/docs/SupportAndFAQ.rst | 110 ++++++++++++++------ openmp/tools/Modules/FindOpenMPTarget.cmake | 4 - openmp/tools/Modules/README.rst | 2 +- 3 files changed, 78 insertions(+), 38 deletions(-) diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index 2eb15d24aa1af..e693f1e31ec13 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -53,14 +53,15 @@ Q: How to build an OpenMP GPU offload capable compiler? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To build an *effective* OpenMP offload capable compiler, only one extra CMake option, `LLVM_ENABLE_RUNTIMES="openmp"`, is needed when building LLVM (Generic -information about building LLVM is available `here `__.). -Make sure all backends that are targeted by OpenMP to be enabled. By default, -Clang will be built with all backends enabled. -When building with `LLVM_ENABLE_RUNTIMES="openmp"` OpenMP should not be enabled -in `LLVM_ENABLE_PROJECTS` because it is enabled by default. +information about building LLVM is available `here +`__.). Make sure all backends that +are targeted by OpenMP to be enabled. By default, Clang will be built with all +backends enabled. When building with `LLVM_ENABLE_RUNTIMES="openmp"` OpenMP +should not be enabled in `LLVM_ENABLE_PROJECTS` because it is enabled by +default. -For Nvidia offload, please see :ref:`_build_nvidia_offload_capable_compiler`. -For AMDGPU offload, please see :ref:`_build_amdgpu_offload_capable_compiler`. +For Nvidia offload, please see :ref:`build_nvidia_offload_capable_compiler`. +For AMDGPU offload, please see :ref:`build_amdgpu_offload_capable_compiler`. .. note:: The compiler that generates the offload code should be the same (version) as @@ -86,41 +87,51 @@ available GPUs failed, you should also set: Q: How to build an OpenMP AMDGPU offload capable compiler? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -A subset of the `ROCm ` toolchain is +A subset of the `ROCm `_ toolchain is required to build the LLVM toolchain and to execute the openmp application. Either install ROCm somewhere that cmake's find_package can locate it, or build the required subcomponents ROCt and ROCr from source. -The two components used are ROCT-Thunk-Interface, roct, and ROCR-Runtime, -rocr. Roct is the userspace part of the linux driver. It calls into the -driver which ships with the linux kernel. It is an implementation detail of -Rocr from OpenMP's perspective. Rocr is an implementation of `HSA `. +The two components used are ROCT-Thunk-Interface, roct, and ROCR-Runtime, rocr. +Roct is the userspace part of the linux driver. It calls into the driver which +ships with the linux kernel. It is an implementation detail of Rocr from +OpenMP's perspective. Rocr is an implementation of `HSA +`_. - SOURCE_DIR=same-as-llvm-source # e.g. the checkout of llvm-project, next to openmp - BUILD_DIR=somewhere - INSTALL_PREFIX=same-as-llvm-install - - cd $SOURCE_DIR - git clone git@github.com:RadeonOpenCompute/ROCT-Thunk-Interface.git -b roc-4.1.x --single-branch - git clone git@github.com:RadeonOpenCompute/ROCR-Runtime.git -b rocm-4.1.x --single-branch - - cd $BUILD_DIR && mkdir roct && cd roct - cmake $SOURCE_DIR/ROCT-Thunk-Interface/ -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF - make && make install - - cd $BUILD_DIR && mkdir rocr && cd rocr - cmake $SOURCE_DIR/ROCR-Runtime/src -DIMAGE_SUPPORT=OFF -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON - make && make install +.. code-block:: text -IMAGE_SUPPORT requires building rocr with clang and is not used by openmp. + SOURCE_DIR=same-as-llvm-source # e.g. the checkout of llvm-project, next to openmp + BUILD_DIR=somewhere + INSTALL_PREFIX=same-as-llvm-install + + cd $SOURCE_DIR + git clone git@github.com:RadeonOpenCompute/ROCT-Thunk-Interface.git -b roc-4.1.x \ + --single-branch + git clone git@github.com:RadeonOpenCompute/ROCR-Runtime.git -b rocm-4.1.x \ + --single-branch + + cd $BUILD_DIR && mkdir roct && cd roct + cmake $SOURCE_DIR/ROCT-Thunk-Interface/ -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \ + -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF + make && make install + + cd $BUILD_DIR && mkdir rocr && cd rocr + cmake $SOURCE_DIR/ROCR-Runtime/src -DIMAGE_SUPPORT=OFF \ + -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=ON + make && make install + +``IMAGE_SUPPORT`` requires building rocr with clang and is not used by openmp. Provided cmake's find_package can find the ROCR-Runtime package, LLVM will -build a tool `bin/amdgpu-arch` which will print a string like 'gfx906' when +build a tool ``bin/amdgpu-arch`` which will print a string like ``gfx906`` when run if it recognises a GPU on the local system. LLVM will also build a shared library, libomptarget.rtl.amdgpu.so, which is linked against rocr. With those libraries installed, then LLVM build and installed, try: +.. code-block:: shell + clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa example.c -o example && ./example Q: What are the known limitations of OpenMP AMDGPU offload? @@ -153,8 +164,8 @@ For now, the answer is most likely *no*. Please see :ref:`build_offload_capable_ Q: Does Clang support `` and `` operations in OpenMP target on GPUs? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Yes, LLVM/Clang allows math functions and complex arithmetic inside of OpenMP target regions -that are compiled for GPUs. +Yes, LLVM/Clang allows math functions and complex arithmetic inside of OpenMP +target regions that are compiled for GPUs. Clang provides a set of wrapper headers that are found first when `math.h` and `complex.h`, for C, `cmath` and `complex`, for C++, or similar headers are @@ -202,8 +213,8 @@ an error like this. Currently, the only solution is to change how the application is built and avoid the use of static libraries. -Q: Can I use dynamically linked libraries with OpenMP offloading -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Q: Can I use dynamically linked libraries with OpenMP offloading? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Dynamically linked libraries can be only used if there is no device code split between the library and application. Anything declared on the device inside the @@ -220,3 +231,36 @@ correct GCC toolchain in the second stage of the build. For example, if your system-wide GCC installation is too old to build LLVM and you would like to use a newer GCC, set the CMake variable `GCC_INSTALL_PREFIX` to inform clang of the GCC installation you would like to use in the second stage. + +Q: How can I include OpenMP offloading support in my CMake project? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Currently, there is an experimental CMake find module for OpenMP target +offloading provided by LLVM. It will attempt to find OpenMP target offloading +support for your compiler. The flags necessary for OpenMP target offloading will +be loaded into the ``OpenMPTarget::OpenMPTarget_`` target or the +``OpenMPTarget__FLAGS`` variable if successful. Currently supported +devices are ``AMDGCN`` and ``NVPTX``. + +To use this module, simply add the path to CMake's current module path and call +``find_package``. The module will be installed with your OpenMP installation by +default. Including OpenMP offloading support in an application should now only +require a few additions. + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.13.4) + project(offloadTest VERSION 1.0 LANGUAGES CXX) + + list(APPEND CMAKE_MODULE_PATH "${PATH_TO_OPENMP_INSTALL}/lib/cmake/openmp") + + find_package(OpenMPTarget REQUIRED NVPTX) + + add_executable(offload) + target_link_libraries(offload PRIVATE OpenMPTarget::OpenMPTarget_NVPTX) + target_sources(offload PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/Main.cpp) + +Using this module requires at least CMake version 3.13.4. Supported languages +are C and C++ with Fortran support planned in the future. Compiler support is +best for Clang but this module should work for other compiler vendors such as +IBM, GNU. diff --git a/openmp/tools/Modules/FindOpenMPTarget.cmake b/openmp/tools/Modules/FindOpenMPTarget.cmake index d3917afdf2808..a8a666512fab6 100644 --- a/openmp/tools/Modules/FindOpenMPTarget.cmake +++ b/openmp/tools/Modules/FindOpenMPTarget.cmake @@ -140,10 +140,6 @@ endfunction() # Get flags for setting the device's architecture for each compiler. function(_OPENMP_TARGET_DEVICE_ARCH_CANDIDATES LANG DEVICE DEVICE_FLAG) - # AMD requires the architecture, default to gfx908 if not provided. - if((NOT OpenMPTarget_${DEVICE}_ARCH) AND ("${DEVICE}" STREQUAL "AMDGCN")) - set(OpenMPTarget_${DEVICE}_ARCH "gfx908") - endif() if(OpenMPTarget_${DEVICE}_ARCH) # Only Clang supports selecting the architecture for now. set(OMPTarget_ARCH_Clang "-Xopenmp-target=${DEVICE_FLAG} -march=${OpenMPTarget_${DEVICE}_ARCH}") diff --git a/openmp/tools/Modules/README.rst b/openmp/tools/Modules/README.rst index d1ec32f52190a..166118fb363f0 100644 --- a/openmp/tools/Modules/README.rst +++ b/openmp/tools/Modules/README.rst @@ -15,7 +15,7 @@ This module will attempt to find OpenMP target offloading support for a given device. The module will attempt to compile a test program using known compiler flags for each requested architecture. If successful, the flags required for offloading will be loaded into the ``OpenMPTarget::OpenMPTarget_`` -target or the ``OpenMPTarget_NVPTX_FLAGS`` variable. Currently supported target +target or the ``OpenMPTarget__FLAGS`` variable. Currently supported target devices are ``NVPTX`` and ``AMDGCN``. This module is still under development so some features may be missing. From aad87328fabff9382bac0b452c83934515e6d0c8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 26 Jun 2021 15:42:49 +0200 Subject: [PATCH 114/619] [AsmWriter] Properly handle uselistorder for global symbols Currently, AsmWriter will stick uselistorder directives for global values inside individual functions. This doesn't make a lot of sense, and interacts badly with D104950, as use list order adjustments will be performed while still working on a forward reference. This patch instead always prints uselistorder directives for globals at the module level. This isn't really compatible with the previously used implementation approach. Rather than walking through all values again, use the OrderMap (after stabilizing its order) to go through all values and compute the use list shuffles for them. Classify them per-function, or nullptr for globals. Even independently of D104950, this seems to fix a few verify-uselistorder failures. Conveniently, there is even a pre-existing failing test that this fixes. Differential Revision: https://reviews.llvm.org/D104976 --- llvm/lib/IR/AsmWriter.cpp | 179 ++++++--------------- llvm/test/Assembler/uselistorder_global.ll | 27 ++++ llvm/test/Bitcode/use-list-order2.ll | 1 - 3 files changed, 80 insertions(+), 127 deletions(-) create mode 100644 llvm/test/Assembler/uselistorder_global.ll diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 25034d64556e7..cbbcbe02638bb 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -63,7 +63,6 @@ #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/IR/Use.h" -#include "llvm/IR/UseListOrder.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" @@ -95,26 +94,10 @@ AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default; // Helper Functions //===----------------------------------------------------------------------===// -namespace { - -struct OrderMap { - DenseMap> IDs; - - unsigned size() const { return IDs.size(); } - std::pair &operator[](const Value *V) { return IDs[V]; } - - std::pair lookup(const Value *V) const { - return IDs.lookup(V); - } +using OrderMap = MapVector; - void index(const Value *V) { - // Explicitly sequence get-size and insert-value operations to avoid UB. - unsigned ID = IDs.size() + 1; - IDs[V].first = ID; - } -}; - -} // end anonymous namespace +using UseListOrderMap = + DenseMap>>; /// Look for a value that might be wrapped as metadata, e.g. a value in a /// metadata operand. Returns the input value as-is if it is not wrapped. @@ -126,7 +109,7 @@ static const Value *skipMetadataWrapper(const Value *V) { } static void orderValue(const Value *V, OrderMap &OM) { - if (OM.lookup(V).first) + if (OM.lookup(V)) return; if (const Constant *C = dyn_cast(V)) @@ -137,7 +120,8 @@ static void orderValue(const Value *V, OrderMap &OM) { // Note: we cannot cache this lookup above, since inserting into the map // changes the map's size, and thus affects the other IDs. - OM.index(V); + unsigned ID = OM.size() + 1; + OM[V] = ID; } static OrderMap orderModule(const Module *M) { @@ -187,33 +171,32 @@ static OrderMap orderModule(const Module *M) { return OM; } -static void predictValueUseListOrderImpl(const Value *V, const Function *F, - unsigned ID, const OrderMap &OM, - UseListOrderStack &Stack) { +static std::vector +predictValueUseListOrder(const Value *V, unsigned ID, const OrderMap &OM) { // Predict use-list order for this one. using Entry = std::pair; SmallVector List; for (const Use &U : V->uses()) // Check if this user will be serialized. - if (OM.lookup(U.getUser()).first) + if (OM.lookup(U.getUser())) List.push_back(std::make_pair(&U, List.size())); if (List.size() < 2) // We may have lost some users. - return; + return {}; bool GetsReversed = !isa(V) && !isa(V) && !isa(V); if (auto *BA = dyn_cast(V)) - ID = OM.lookup(BA->getBasicBlock()).first; + ID = OM.lookup(BA->getBasicBlock()); llvm::sort(List, [&](const Entry &L, const Entry &R) { const Use *LU = L.first; const Use *RU = R.first; if (LU == RU) return false; - auto LID = OM.lookup(LU->getUser()).first; - auto RID = OM.lookup(RU->getUser()).first; + auto LID = OM.lookup(LU->getUser()); + auto RID = OM.lookup(RU->getUser()); // If ID is 4, then expect: 7 6 5 1 2 3. if (LID < RID) { @@ -241,89 +224,38 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F, return L.second < R.second; })) // Order is already correct. - return; + return {}; // Store the shuffle. - Stack.emplace_back(V, F, List.size()); - assert(List.size() == Stack.back().Shuffle.size() && "Wrong size"); + std::vector Shuffle(List.size()); for (size_t I = 0, E = List.size(); I != E; ++I) - Stack.back().Shuffle[I] = List[I].second; -} - -static void predictValueUseListOrder(const Value *V, const Function *F, - OrderMap &OM, UseListOrderStack &Stack) { - auto &IDPair = OM[V]; - assert(IDPair.first && "Unmapped value"); - if (IDPair.second) - // Already predicted. - return; - - // Do the actual prediction. - IDPair.second = true; - if (!V->use_empty() && std::next(V->use_begin()) != V->use_end()) - predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack); - - // Recursive descent into constants. - if (const Constant *C = dyn_cast(V)) - if (C->getNumOperands()) // Visit GlobalValues. - for (const Value *Op : C->operands()) - if (isa(Op)) // Visit GlobalValues. - predictValueUseListOrder(Op, F, OM, Stack); + Shuffle[I] = List[I].second; + return Shuffle; } -static UseListOrderStack predictUseListOrder(const Module *M) { +static UseListOrderMap predictUseListOrder(const Module *M) { OrderMap OM = orderModule(M); + UseListOrderMap ULOM; + for (const auto &Pair : OM) { + const Value *V = Pair.first; + if (V->use_empty() || std::next(V->use_begin()) == V->use_end()) + continue; - // Use-list orders need to be serialized after all the users have been added - // to a value, or else the shuffles will be incomplete. Store them per - // function in a stack. - // - // Aside from function order, the order of values doesn't matter much here. - UseListOrderStack Stack; - - // We want to visit the functions backward now so we can list function-local - // constants in the last Function they're used in. Module-level constants - // have already been visited above. - for (const Function &F : make_range(M->rbegin(), M->rend())) { - if (F.isDeclaration()) + std::vector Shuffle = + predictValueUseListOrder(V, Pair.second, OM); + if (Shuffle.empty()) continue; - for (const BasicBlock &BB : F) - predictValueUseListOrder(&BB, &F, OM, Stack); - for (const Argument &A : F.args()) - predictValueUseListOrder(&A, &F, OM, Stack); - for (const BasicBlock &BB : F) - for (const Instruction &I : BB) - for (const Value *Op : I.operands()) { - Op = skipMetadataWrapper(Op); - if (isa(*Op) || isa(*Op)) // Visit GlobalValues. - predictValueUseListOrder(Op, &F, OM, Stack); - } - for (const BasicBlock &BB : F) - for (const Instruction &I : BB) - predictValueUseListOrder(&I, &F, OM, Stack); - } - - // Visit globals last. - for (const GlobalVariable &G : M->globals()) - predictValueUseListOrder(&G, nullptr, OM, Stack); - for (const Function &F : *M) - predictValueUseListOrder(&F, nullptr, OM, Stack); - for (const GlobalAlias &A : M->aliases()) - predictValueUseListOrder(&A, nullptr, OM, Stack); - for (const GlobalIFunc &I : M->ifuncs()) - predictValueUseListOrder(&I, nullptr, OM, Stack); - for (const GlobalVariable &G : M->globals()) - if (G.hasInitializer()) - predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack); - for (const GlobalAlias &A : M->aliases()) - predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack); - for (const GlobalIFunc &I : M->ifuncs()) - predictValueUseListOrder(I.getResolver(), nullptr, OM, Stack); - for (const Function &F : *M) - for (const Use &U : F.operands()) - predictValueUseListOrder(U.get(), nullptr, OM, Stack); - return Stack; + const Function *F = nullptr; + if (auto *I = dyn_cast(V)) + F = I->getFunction(); + if (auto *A = dyn_cast(V)) + F = A->getParent(); + if (auto *BB = dyn_cast(V)) + F = BB->getParent(); + ULOM[F][V] = std::move(Shuffle); + } + return ULOM; } static const Module *getModuleFromVal(const Value *V) { @@ -2643,7 +2575,7 @@ class AssemblyWriter { SetVector Comdats; bool IsForDebug; bool ShouldPreserveUseListOrder; - UseListOrderStack UseListOrders; + UseListOrderMap UseListOrders; SmallVector MDNames; /// Synchronization scope names registered with LLVMContext. SmallVector SSNs; @@ -2692,7 +2624,7 @@ class AssemblyWriter { void printInstructionLine(const Instruction &I); void printInstruction(const Instruction &I); - void printUseListOrder(const UseListOrder &Order); + void printUseListOrder(const Value *V, const std::vector &Shuffle); void printUseLists(const Function *F); void printModuleSummaryIndex(); @@ -2926,15 +2858,14 @@ void AssemblyWriter::printModule(const Module *M) { for (const GlobalIFunc &GI : M->ifuncs()) printIndirectSymbol(&GI); - // Output global use-lists. - printUseLists(nullptr); - // Output all of the functions. for (const Function &F : *M) { Out << '\n'; printFunction(&F); } - assert(UseListOrders.empty() && "All use-lists should have been consumed"); + + // Output global use-lists. + printUseLists(nullptr); // Output all attribute groups. if (!Machine.as_empty()) { @@ -4527,43 +4458,39 @@ void AssemblyWriter::writeAllAttributeGroups() { << I.first.getAsString(true) << " }\n"; } -void AssemblyWriter::printUseListOrder(const UseListOrder &Order) { +void AssemblyWriter::printUseListOrder(const Value *V, + const std::vector &Shuffle) { bool IsInFunction = Machine.getFunction(); if (IsInFunction) Out << " "; Out << "uselistorder"; - if (const BasicBlock *BB = - IsInFunction ? nullptr : dyn_cast(Order.V)) { + if (const BasicBlock *BB = IsInFunction ? nullptr : dyn_cast(V)) { Out << "_bb "; writeOperand(BB->getParent(), false); Out << ", "; writeOperand(BB, false); } else { Out << " "; - writeOperand(Order.V, true); + writeOperand(V, true); } Out << ", { "; - assert(Order.Shuffle.size() >= 2 && "Shuffle too small"); - Out << Order.Shuffle[0]; - for (unsigned I = 1, E = Order.Shuffle.size(); I != E; ++I) - Out << ", " << Order.Shuffle[I]; + assert(Shuffle.size() >= 2 && "Shuffle too small"); + Out << Shuffle[0]; + for (unsigned I = 1, E = Shuffle.size(); I != E; ++I) + Out << ", " << Shuffle[I]; Out << " }\n"; } void AssemblyWriter::printUseLists(const Function *F) { - auto hasMore = - [&]() { return !UseListOrders.empty() && UseListOrders.back().F == F; }; - if (!hasMore()) - // Nothing to do. + auto It = UseListOrders.find(F); + if (It == UseListOrders.end()) return; Out << "\n; uselistorder directives\n"; - while (hasMore()) { - printUseListOrder(UseListOrders.back()); - UseListOrders.pop_back(); - } + for (const auto &Pair : It->second) + printUseListOrder(Pair.first, Pair.second); } //===----------------------------------------------------------------------===// diff --git a/llvm/test/Assembler/uselistorder_global.ll b/llvm/test/Assembler/uselistorder_global.ll new file mode 100644 index 0000000000000..3bbab5593226b --- /dev/null +++ b/llvm/test/Assembler/uselistorder_global.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -preserve-ll-uselistorder < %s | FileCheck %s +; RUN: verify-uselistorder %s + +; CHECK: @g = external global i32 +; CHECK: define void @func1() { +; CHECK-NOT: uselistorder +; CHECK: } +; CHECK: define void @func2() { +; CHECK-NOT: uselistorder +; CHECK: } +; CHECK: uselistorder i32* @g, { 3, 2, 1, 0 } + +@g = external global i32 + +define void @func1() { + load i32, i32* @g + load i32, i32* @g + ret void +} + +define void @func2() { + load i32, i32* @g + load i32, i32* @g + ret void +} + +uselistorder i32* @g, { 3, 2, 1, 0 } diff --git a/llvm/test/Bitcode/use-list-order2.ll b/llvm/test/Bitcode/use-list-order2.ll index 7de79a5202104..aafa3d552bbee 100644 --- a/llvm/test/Bitcode/use-list-order2.ll +++ b/llvm/test/Bitcode/use-list-order2.ll @@ -1,5 +1,4 @@ ; RUN: verify-uselistorder %s -; XFAIL: * ; Test 1 @g1 = global i8 0 From 8cd35ad854ab4458fd509447359066ea3578b494 Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Mon, 28 Jun 2021 19:40:45 +0000 Subject: [PATCH 115/619] [DebugInfo] Enforce implicit constraints on `distinct` MDNodes Add UNIQUED and DISTINCT properties in Metadata.def and use them to implement restrictions on the `distinct` property of MDNodes: * DIExpression can currently be parsed from IR or read from bitcode as `distinct`, but this property is silently dropped when printing to IR. This causes accepted IR to fail to round-trip. As DIExpression appears inline at each use in the canonical form of IR, it cannot actually be `distinct` anyway, as there is no syntax to describe it. * Similarly, DIArgList is conceptually always uniqued. It is currently restricted to only appearing in contexts where there is no syntax for `distinct`, but for consistency it is treated equivalently to DIExpression in this patch. * DICompileUnit is already restricted to always being `distinct`, but along with adding general support for the inverse restriction I went ahead and described this in Metadata.def and updated the parser to be general. Future nodes which have this restriction can share this support. The new UNIQUED property applies to DIExpression and DIArgList, and forbids them to be `distinct`. It also implies they are canonically printed inline at each use, rather than via MDNode ID. The new DISTINCT property applies to DICompileUnit, and requires it to be `distinct`. A potential alternative change is to forbid the non-inline syntax for DIExpression entirely, as is done with DIArgList implicitly by requiring it appear in the context of a function. For example, we would forbid: !named = !{!0} !0 = !DIExpression() Instead we would only accept the equivalent inlined version: !named = !{!DIExpression()} This essentially removes the ability to create a `distinct` DIExpression by construction, as there is no syntax for `distinct` inline. If this patch is accepted as-is, the result would be that the non-canonical version is accepted, but the following would be an error and produce a diagnostic: !named = !{!0} ; error: 'distinct' not allowed for !DIExpression() !0 = distinct !DIExpression() Also update some documentation to consistently use the inline syntax for DIExpression, and to describe the restrictions on `distinct` for nodes where applicable. Reviewed By: StephenTozer, t-tye Differential Revision: https://reviews.llvm.org/D104827 --- llvm/docs/LangRef.rst | 60 +++++++------ llvm/docs/SourceLevelDebugging.rst | 81 +++++++++--------- llvm/include/llvm/AsmParser/LLParser.h | 3 +- llvm/include/llvm/IR/Metadata.def | 79 +++++++++++++++-- llvm/lib/AsmParser/LLParser.cpp | 69 +++++++++------ llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 10 ++- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 17 +++- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 2 + llvm/lib/IR/AsmWriter.cpp | 56 ++++++------ llvm/lib/IR/DebugInfoMetadata.cpp | 2 + llvm/lib/IR/LLVMContextImpl.cpp | 2 + llvm/lib/IR/LLVMContextImpl.h | 1 + llvm/lib/IR/Metadata.cpp | 3 + .../invalid-diarglist-outside-function.ll | 4 + .../invalid-diexpression-distinct.ll | 4 + .../DIExpression-is-distinct-upgrade.ll | 16 ++++ .../DIExpression-is-distinct-upgrade.ll.bc | Bin 0 -> 1424 bytes 17 files changed, 275 insertions(+), 134 deletions(-) create mode 100644 llvm/test/Assembler/invalid-diarglist-outside-function.ll create mode 100644 llvm/test/Assembler/invalid-diexpression-distinct.ll create mode 100644 llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll create mode 100644 llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll.bc diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 243e9ea8bf7e8..5bbcc1477c5d6 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5094,21 +5094,22 @@ metadata nodes are related to debug info. DICompileUnit """"""""""""" -``DICompileUnit`` nodes represent a compile unit. The ``enums:``, -``retainedTypes:``, ``globals:``, ``imports:`` and ``macros:`` fields are tuples -containing the debug info to be emitted along with the compile unit, regardless -of code optimizations (some nodes are only emitted if there are references to -them from instructions). The ``debugInfoForProfiling:`` field is a boolean -indicating whether or not line-table discriminators are updated to provide -more-accurate debug info for profiling results. +``DICompileUnit`` nodes represent a compile unit. ``DICompileUnit`` nodes must +be ``distinct``. The ``enums:``, ``retainedTypes:``, ``globals:``, ``imports:`` +and ``macros:`` fields are tuples containing the debug info to be emitted along +with the compile unit, regardless of code optimizations (some nodes are only +emitted if there are references to them from instructions). The +``debugInfoForProfiling:`` field is a boolean indicating whether or not +line-table discriminators are updated to provide more-accurate debug info for +profiling results. .. code-block:: text - !0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", - isOptimized: true, flags: "-O2", runtimeVersion: 2, - splitDebugFilename: "abc.debug", emissionKind: FullDebug, - enums: !2, retainedTypes: !3, globals: !4, imports: !5, - macros: !6, dwoId: 0x0abcd) + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", + isOptimized: true, flags: "-O2", runtimeVersion: 2, + splitDebugFilename: "abc.debug", emissionKind: FullDebug, + enums: !2, retainedTypes: !3, globals: !4, imports: !5, + macros: !6, dwoId: 0x0abcd) Compile unit descriptors provide the root scope for objects declared in a specific compilation unit. File descriptors are defined using this scope. These @@ -5519,12 +5520,14 @@ DIExpression """""""""""" ``DIExpression`` nodes represent expressions that are inspired by the DWARF -expression language. They are used in :ref:`debug intrinsics` -(such as ``llvm.dbg.declare`` and ``llvm.dbg.value``) to describe how the -referenced LLVM variable relates to the source language variable. Debug -intrinsics are interpreted left-to-right: start by pushing the value/address -operand of the intrinsic onto a stack, then repeatedly push and evaluate -opcodes from the DIExpression until the final variable description is produced. +expression language. ``DIExpression`` nodes must not be ``distinct``, and are +canonically printed inline at each use. They are used in :ref:`debug +intrinsics` (such as ``llvm.dbg.declare`` and +``llvm.dbg.value``) to describe how the referenced LLVM variable relates to the +source language variable. Debug intrinsics are interpreted left-to-right: start +by pushing the value/address operand of the intrinsic onto a stack, then +repeatedly push and evaluate opcodes from the DIExpression until the final +variable description is produced. The current supported opcode vocabulary is limited: @@ -5602,23 +5605,23 @@ The current supported opcode vocabulary is limited: IR for "*ptr = 4;" -------------- - call void @llvm.dbg.value(metadata i32 4, metadata !17, metadata !20) + call void @llvm.dbg.value(metadata i32 4, metadata !17, + metadata !DIExpression(DW_OP_LLVM_implicit_pointer))) !17 = !DILocalVariable(name: "ptr1", scope: !12, file: !3, line: 5, type: !18) !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) !19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !20 = !DIExpression(DW_OP_LLVM_implicit_pointer)) IR for "**ptr = 4;" -------------- - call void @llvm.dbg.value(metadata i32 4, metadata !17, metadata !21) + call void @llvm.dbg.value(metadata i32 4, metadata !17, + metadata !DIExpression(DW_OP_LLVM_implicit_pointer, + DW_OP_LLVM_implicit_pointer))) !17 = !DILocalVariable(name: "ptr1", scope: !12, file: !3, line: 5, type: !18) !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) !20 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !21 = !DIExpression(DW_OP_LLVM_implicit_pointer, - DW_OP_LLVM_implicit_pointer)) DWARF specifies three kinds of simple location descriptions: Register, memory, and implicit location descriptions. Note that a location description is @@ -5659,12 +5662,13 @@ valid debug intrinsic. DIArgList """""""""""" -``DIArgList`` nodes hold a list of constant or SSA value references. These are -used in :ref:`debug intrinsics` (currently only in +``DIArgList`` nodes hold a list of constant or SSA value references. +``DIArgList`` must not be ``distinct``, must only be used as an argument to a +function call, and must appear inline at each use. ``DIArgList`` may refer to +function-local values of the containing function. ``DIArgList`` nodes are used +in :ref:`debug intrinsics` (currently only in ``llvm.dbg.value``) in combination with a ``DIExpression`` that uses the -``DW_OP_LLVM_arg`` operator. Because a DIArgList may refer to local values -within a function, it must only be used as a function argument, must always be -inlined, and cannot appear in named metadata. +``DW_OP_LLVM_arg`` operator. .. code-block:: text diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.rst index 652eccb33f077..69b7f35fc7b87 100644 --- a/llvm/docs/SourceLevelDebugging.rst +++ b/llvm/docs/SourceLevelDebugging.rst @@ -291,17 +291,17 @@ Compiled to LLVM, this function would be represented like this: %X = alloca i32, align 4 %Y = alloca i32, align 4 %Z = alloca i32, align 4 - call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !13), !dbg !14 - store i32 21, i32* %X, align 4, !dbg !14 - call void @llvm.dbg.declare(metadata i32* %Y, metadata !15, metadata !13), !dbg !16 - store i32 22, i32* %Y, align 4, !dbg !16 - call void @llvm.dbg.declare(metadata i32* %Z, metadata !17, metadata !13), !dbg !19 - store i32 23, i32* %Z, align 4, !dbg !19 - %0 = load i32, i32* %X, align 4, !dbg !20 - store i32 %0, i32* %Z, align 4, !dbg !21 - %1 = load i32, i32* %Y, align 4, !dbg !22 - store i32 %1, i32* %X, align 4, !dbg !23 - ret void, !dbg !24 + call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !DIExpression()), !dbg !13 + store i32 21, i32* %X, align 4, !dbg !13 + call void @llvm.dbg.declare(metadata i32* %Y, metadata !14, metadata !DIExpression()), !dbg !15 + store i32 22, i32* %Y, align 4, !dbg !15 + call void @llvm.dbg.declare(metadata i32* %Z, metadata !16, metadata !DIExpression()), !dbg !18 + store i32 23, i32* %Z, align 4, !dbg !18 + %0 = load i32, i32* %X, align 4, !dbg !19 + store i32 %0, i32* %Z, align 4, !dbg !20 + %1 = load i32, i32* %Y, align 4, !dbg !21 + store i32 %1, i32* %X, align 4, !dbg !22 + ret void, !dbg !23 } ; Function Attrs: nounwind readnone @@ -327,18 +327,17 @@ Compiled to LLVM, this function would be represented like this: !10 = !{!"clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)"} !11 = !DILocalVariable(name: "X", scope: !4, file: !1, line: 2, type: !12) !12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) - !13 = !DIExpression() - !14 = !DILocation(line: 2, column: 9, scope: !4) - !15 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12) - !16 = !DILocation(line: 3, column: 9, scope: !4) - !17 = !DILocalVariable(name: "Z", scope: !18, file: !1, line: 5, type: !12) - !18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) - !19 = !DILocation(line: 5, column: 11, scope: !18) - !20 = !DILocation(line: 6, column: 11, scope: !18) - !21 = !DILocation(line: 6, column: 9, scope: !18) - !22 = !DILocation(line: 8, column: 9, scope: !4) - !23 = !DILocation(line: 8, column: 7, scope: !4) - !24 = !DILocation(line: 9, column: 3, scope: !4) + !13 = !DILocation(line: 2, column: 9, scope: !4) + !14 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12) + !15 = !DILocation(line: 3, column: 9, scope: !4) + !16 = !DILocalVariable(name: "Z", scope: !17, file: !1, line: 5, type: !12) + !17 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) + !18 = !DILocation(line: 5, column: 11, scope: !17) + !19 = !DILocation(line: 6, column: 11, scope: !17) + !20 = !DILocation(line: 6, column: 9, scope: !17) + !21 = !DILocation(line: 8, column: 9, scope: !4) + !22 = !DILocation(line: 8, column: 7, scope: !4) + !23 = !DILocation(line: 9, column: 3, scope: !4) This example illustrates a few important details about LLVM debugging @@ -349,21 +348,21 @@ variable definitions, and the code used to implement the function. .. code-block:: llvm - call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !13), !dbg !14 + call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !DIExpression()), !dbg !13 ; [debug line = 2:7] [debug variable = X] The first intrinsic ``%llvm.dbg.declare`` encodes debugging information for the -variable ``X``. The metadata ``!dbg !14`` attached to the intrinsic provides +variable ``X``. The metadata ``!dbg !13`` attached to the intrinsic provides scope information for the variable ``X``. .. code-block:: text - !14 = !DILocation(line: 2, column: 9, scope: !4) + !13 = !DILocation(line: 2, column: 9, scope: !4) !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2) -Here ``!14`` is metadata providing `location information +Here ``!13`` is metadata providing `location information `_. In this example, scope is encoded by ``!4``, a `subprogram descriptor `_. This way the location information attached to the intrinsics indicates that the variable ``X`` is @@ -373,20 +372,20 @@ Now lets take another example. .. code-block:: llvm - call void @llvm.dbg.declare(metadata i32* %Z, metadata !17, metadata !13), !dbg !19 + call void @llvm.dbg.declare(metadata i32* %Z, metadata !16, metadata !DIExpression()), !dbg !18 ; [debug line = 5:9] [debug variable = Z] The third intrinsic ``%llvm.dbg.declare`` encodes debugging information for -variable ``Z``. The metadata ``!dbg !19`` attached to the intrinsic provides +variable ``Z``. The metadata ``!dbg !18`` attached to the intrinsic provides scope information for the variable ``Z``. .. code-block:: text - !18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) - !19 = !DILocation(line: 5, column: 11, scope: !18) + !17 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) + !18 = !DILocation(line: 5, column: 11, scope: !17) -Here ``!19`` indicates that ``Z`` is declared at line number 5 and column -number 11 inside of lexical scope ``!18``. The lexical scope itself resides +Here ``!18`` indicates that ``Z`` is declared at line number 5 and column +number 11 inside of lexical scope ``!17``. The lexical scope itself resides inside of subprogram ``!4`` described above. The scope information attached with each instruction provides a straightforward @@ -800,14 +799,14 @@ presents several difficulties: br label %exit, !dbg !26 truebr: - call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !24 - call void @llvm.dbg.value(metadata i32 1, metadata !23, metadata !DIExpression()), !dbg !24 + call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata i32 1, metadata !22, metadata !DIExpression()), !dbg !23 %value1 = add i32 %input, 1 br label %bb1 falsebr: - call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !24 - call void @llvm.dbg.value(metadata i32 2, metadata !23, metadata !DIExpression()), !dbg !24 + call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata i32 2, metadata !22, metadata !DIExpression()), !dbg !23 %value = add i32 %input, 2 br label %bb1 @@ -818,7 +817,7 @@ presents several difficulties: Here the difficulties are: * The control flow is roughly the opposite of basic block order -* The value of the ``!23`` variable merges into ``%bb1``, but there is no PHI +* The value of the ``!22`` variable merges into ``%bb1``, but there is no PHI node As mentioned above, the ``llvm.dbg.value`` intrinsics essentially form an @@ -831,9 +830,9 @@ location, which would lead to a large number of debugging intrinsics being generated. Examining the example above, variable ``!30`` is assigned ``%input`` on both -conditional paths through the function, while ``!23`` is assigned differing +conditional paths through the function, while ``!22`` is assigned differing constant values on either path. Where control flow merges in ``%bb1`` we would -want ``!30`` to keep its location (``%input``), but ``!23`` to become undefined +want ``!30`` to keep its location (``%input``), but ``!22`` to become undefined as we cannot determine at runtime what value it should have in %bb1 without inserting a PHI node. mem2reg does not insert the PHI node to avoid changing codegen when debugging is enabled, and does not insert the other dbg.values @@ -852,7 +851,7 @@ DbgEntityHistoryCalculator) to build a map of each instruction to every valid variable location, without the need to consider control flow. From the example above, it is otherwise difficult to determine that the location of variable ``!30`` should flow "up" into block ``%bb1``, but that the location -of variable ``!23`` should not flow "down" into the ``%exit`` block. +of variable ``!22`` should not flow "down" into the ``%exit`` block. .. _ccxx_frontend: diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 108be7b5628ca..3981241bb2eee 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -532,7 +532,8 @@ namespace llvm { template bool parseMDFieldsImplBody(ParserTy ParseField); template bool parseMDFieldsImpl(ParserTy ParseField, LocTy &ClosingLoc); - bool parseSpecializedMDNode(MDNode *&N, bool IsDistinct = false); + bool parseSpecializedMDNode(MDNode *&N, bool IsDistinct = false, + LocTy DistinctLoc = LocTy()); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) \ bool parse##CLASS(MDNode *&Result, bool IsDistinct); diff --git a/llvm/include/llvm/IR/Metadata.def b/llvm/include/llvm/IR/Metadata.def index bbf349e6b508c..fdef80724de52 100644 --- a/llvm/include/llvm/IR/Metadata.def +++ b/llvm/include/llvm/IR/Metadata.def @@ -8,12 +8,39 @@ // // Macros for running through all types of metadata. // +// Definitions for terms used to describe metadata include: +// +// * BRANCH: refers to an "abstract" metadata kind, which exists only in the +// C++ class hierarchy. These cannot appear directly in IR/bitcode. +// * LEAF: refers to a "concrete" metadata kind. These can appear directly in +// IR/bitcode. +// * SPECIALIZED: refers to non-MDTuple MDNodes, i.e. those that use the +// syntax "!CLASS(...)" in IR. +// * UNIQUABLE: refers to nodes which can use uniqued, distinct, or temporary +// storage without any restrictions. +// * UNIQUED: refers to nodes which must use uniqued or temporary storage. +// * DISTINCT: refers to nodes which must use distinct or temporary storage. +// +// In LLVM IR, UNIQUABLE and DISTINCT nodes must be referred to by MDNode ID, +// as in `!0`, whereas UNIQUED nodes canonically appear inline at each use, as +// in `DIExpression(...)`. This is because `distinct` nodes maintain their +// identity irrespective of contents, making the inline syntax ambiguous in +// some cases. +// +// Note: UNIQUABLE, UNIQUED, and DISTINCT are mutually exclusive. For example, +// code which intends to consider all nodes which can use uniqued storage must +// consider both UNIQUABLE and UNIQUED nodes. +// //===----------------------------------------------------------------------===// #if !(defined HANDLE_METADATA || defined HANDLE_METADATA_LEAF || \ defined HANDLE_METADATA_BRANCH || defined HANDLE_MDNODE_LEAF || \ - defined HANDLE_MDNODE_LEAF_UNIQUABLE || defined HANDLE_MDNODE_BRANCH || \ + defined HANDLE_MDNODE_LEAF_UNIQUABLE || \ + defined HANDLE_MDNODE_LEAF_UNIQUED || \ + defined HANDLE_MDNODE_LEAF_DISTINCT || defined HANDLE_MDNODE_BRANCH || \ defined HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE || \ + defined HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED || \ + defined HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT || \ defined HANDLE_SPECIALIZED_MDNODE_LEAF || \ defined HANDLE_SPECIALIZED_MDNODE_BRANCH) #error "Missing macro definition of HANDLE_METADATA*" @@ -34,7 +61,7 @@ #define HANDLE_METADATA_BRANCH(CLASS) HANDLE_METADATA(CLASS) #endif -// Handler for specialized and uniquable leaf nodes under MDNode. Defers to +// Handler for specialized and uniquable leaf nodes under MDNode. Defers to // HANDLE_MDNODE_LEAF_UNIQUABLE if it's defined, otherwise to // HANDLE_SPECIALIZED_MDNODE_LEAF. #ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE @@ -47,11 +74,47 @@ #endif #endif -// Handler for leaf nodes under MDNode. +// Handler for specialized and always-uniqued leaf nodes under MDNode. Defers to +// HANDLE_MDNODE_LEAF_UNIQUED if it's defined, otherwise to +// HANDLE_SPECIALIZED_MDNODE_LEAF. +#ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED +#ifdef HANDLE_MDNODE_LEAF_UNIQUED +#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(CLASS) \ + HANDLE_MDNODE_LEAF_UNIQUED(CLASS) +#else +#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(CLASS) \ + HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) +#endif +#endif + +// Handler for specialized and always-distinct leaf nodes under MDNode. Defers +// to HANDLE_MDNODE_LEAF_DISTINCT if it's defined, otherwise to +// HANDLE_SPECIALIZED_MDNODE_LEAF. +#ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT +#ifdef HANDLE_MDNODE_LEAF_DISTINCT +#define HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT(CLASS) \ + HANDLE_MDNODE_LEAF_DISTINCT(CLASS) +#else +#define HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT(CLASS) \ + HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) +#endif +#endif + +// Handler for uniquable leaf nodes under MDNode. #ifndef HANDLE_MDNODE_LEAF_UNIQUABLE #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) HANDLE_MDNODE_LEAF(CLASS) #endif +// Handler for uniqued leaf nodes under MDNode. +#ifndef HANDLE_MDNODE_LEAF_UNIQUED +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF(CLASS) +#endif + +// Handler for distinct leaf nodes under MDNode. +#ifndef HANDLE_MDNODE_LEAF_DISTINCT +#define HANDLE_MDNODE_LEAF_DISTINCT(CLASS) HANDLE_MDNODE_LEAF(CLASS) +#endif + // Handler for leaf nodes under MDNode. #ifndef HANDLE_MDNODE_LEAF #define HANDLE_MDNODE_LEAF(CLASS) HANDLE_METADATA_LEAF(CLASS) @@ -80,7 +143,7 @@ HANDLE_METADATA_LEAF(DistinctMDOperandPlaceholder) HANDLE_MDNODE_BRANCH(MDNode) HANDLE_MDNODE_LEAF_UNIQUABLE(MDTuple) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocation) -HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIExpression) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(DIExpression) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGlobalVariableExpression) HANDLE_SPECIALIZED_MDNODE_BRANCH(DINode) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(GenericDINode) @@ -93,7 +156,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIDerivedType) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICompositeType) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubroutineType) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIFile) -HANDLE_SPECIALIZED_MDNODE_LEAF(DICompileUnit) +HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT(DICompileUnit) HANDLE_SPECIALIZED_MDNODE_BRANCH(DILocalScope) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubprogram) HANDLE_SPECIALIZED_MDNODE_BRANCH(DILexicalBlockBase) @@ -114,7 +177,7 @@ HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICommonBlock) -HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIArgList) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(DIArgList) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIStringType) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGenericSubrange) @@ -123,7 +186,11 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGenericSubrange) #undef HANDLE_METADATA_BRANCH #undef HANDLE_MDNODE_LEAF #undef HANDLE_MDNODE_LEAF_UNIQUABLE +#undef HANDLE_MDNODE_LEAF_UNIQUED +#undef HANDLE_MDNODE_LEAF_DISTINCT #undef HANDLE_MDNODE_BRANCH #undef HANDLE_SPECIALIZED_MDNODE_LEAF #undef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE +#undef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED +#undef HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT #undef HANDLE_SPECIALIZED_MDNODE_BRANCH diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index c4fa35ff15ece..ccbc031736c3a 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -740,27 +740,29 @@ bool LLParser::parseNamedMetadata() { return true; NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name); - if (Lex.getKind() != lltok::rbrace) - do { - MDNode *N = nullptr; - // parse DIExpressions inline as a special case. They are still MDNodes, - // so they can still appear in named metadata. Remove this logic if they - // become plain Metadata. - if (Lex.getKind() == lltok::MetadataVar && - Lex.getStrVal() == "DIExpression") { - if (parseDIExpression(N, /*IsDistinct=*/false)) - return true; - // DIArgLists should only appear inline in a function, as they may - // contain LocalAsMetadata arguments which require a function context. - } else if (Lex.getKind() == lltok::MetadataVar && - Lex.getStrVal() == "DIArgList") { - return tokError("found DIArgList outside of function"); - } else if (parseToken(lltok::exclaim, "Expected '!' here") || - parseMDNodeID(N)) { - return true; - } - NMD->addOperand(N); - } while (EatIfPresent(lltok::comma)); + + if (Lex.getKind() == lltok::rbrace) { + Lex.Lex(); + return false; + } + + do { + MDNode *N = nullptr; + // Parse uniqued MDNodes inline as a special case. +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) \ + if (Lex.getKind() == lltok::MetadataVar && Lex.getStrVal() == #CLASS) { \ + if (parse##CLASS(N, /*IsDistinct=*/false)) \ + return true; \ + NMD->addOperand(N); \ + continue; \ + } +#include "llvm/IR/Metadata.def" + // Parse all other MDNodes as an MDNodeID. + if (parseToken(lltok::exclaim, "Expected '!' here") || parseMDNodeID(N)) { + return true; + } + NMD->addOperand(N); + } while (EatIfPresent(lltok::comma)); return parseToken(lltok::rbrace, "expected end of metadata node"); } @@ -780,9 +782,10 @@ bool LLParser::parseStandaloneMetadata() { if (Lex.getKind() == lltok::Type) return tokError("unexpected type in metadata definition"); + auto DistinctLoc = Lex.getLoc(); bool IsDistinct = EatIfPresent(lltok::kw_distinct); if (Lex.getKind() == lltok::MetadataVar) { - if (parseSpecializedMDNode(Init, IsDistinct)) + if (parseSpecializedMDNode(Init, IsDistinct, DistinctLoc)) return true; } else if (parseToken(lltok::exclaim, "Expected '!' here") || parseMDTuple(Init, IsDistinct)) @@ -4641,12 +4644,25 @@ bool LLParser::parseMDField(StringRef Name, FieldTy &Result) { return parseMDField(Loc, Name, Result); } -bool LLParser::parseSpecializedMDNode(MDNode *&N, bool IsDistinct) { +bool LLParser::parseSpecializedMDNode(MDNode *&N, bool IsDistinct, + LocTy DistinctLoc) { assert(Lex.getKind() == lltok::MetadataVar && "Expected metadata type name"); -#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) \ +#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(CLASS) \ if (Lex.getStrVal() == #CLASS) \ return parse##CLASS(N, IsDistinct); +#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUED(CLASS) \ + if (Lex.getStrVal() == #CLASS) { \ + if (IsDistinct) \ + return error(DistinctLoc, "'distinct' not allowed for !" #CLASS); \ + return parse##CLASS(N, IsDistinct); \ + } +#define HANDLE_SPECIALIZED_MDNODE_LEAF_DISTINCT(CLASS) \ + if (Lex.getStrVal() == #CLASS) { \ + if (!IsDistinct) \ + return error(DistinctLoc, "missing 'distinct', required for !" #CLASS); \ + return parse##CLASS(N, IsDistinct); \ + } #include "llvm/IR/Metadata.def" return tokError("expected metadata type"); @@ -4990,9 +5006,6 @@ bool LLParser::parseDIFile(MDNode *&Result, bool IsDistinct) { /// globals: !4, imports: !5, macros: !6, dwoId: 0x0abcd, /// sysroot: "/", sdk: "MacOSX.sdk") bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) { - if (!IsDistinct) - return Lex.Error("missing 'distinct', required for !DICompileUnit"); - #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ REQUIRED(language, DwarfLangField, ); \ REQUIRED(file, MDField, (/* AllowNull */ false)); \ @@ -5356,7 +5369,7 @@ bool LLParser::parseDIExpression(MDNode *&Result, bool IsDistinct) { } bool LLParser::parseDIArgList(MDNode *&Result, bool IsDistinct) { - return parseDIArgList(Result, IsDistinct, nullptr); + return tokError("!DIArgList cannot appear outside of a function"); } /// ParseDIArgList: /// ::= !DIArgList(i32 7, i64 %0) diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 8493eb7a28b23..a2ad4a4207971 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -555,7 +555,7 @@ class MetadataLoader::MetadataLoaderImpl { } /// Upgrade the expression from previous versions. - Error upgradeDIExpression(uint64_t FromVersion, + Error upgradeDIExpression(uint64_t FromVersion, bool &IsDistinct, MutableArrayRef &Expr, SmallVectorImpl &Buffer) { auto N = Expr.size(); @@ -629,6 +629,9 @@ class MetadataLoader::MetadataLoaderImpl { LLVM_FALLTHROUGH; } case 3: + IsDistinct = false; + LLVM_FALLTHROUGH; + case 4: // Up-to-date! break; } @@ -1981,9 +1984,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( auto Elts = MutableArrayRef(Record).slice(1); SmallVector Buffer; - if (Error Err = upgradeDIExpression(Version, Elts, Buffer)) + if (Error Err = upgradeDIExpression(Version, IsDistinct, Elts, Buffer)) return Err; + if (IsDistinct) + return error("Invalid record"); + MetadataList.assignValue( GET_OR_DISTINCT(DIExpression, (Context, Elts)), NextMetadataNo); NextMetadataNo++; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index bdb973e8e421b..1ad55e264acad 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1758,7 +1758,6 @@ void ModuleBitcodeWriter::writeDIFile(const DIFile *N, void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N, SmallVectorImpl &Record, unsigned Abbrev) { - assert(N->isDistinct() && "Expected distinct compile units"); Record.push_back(/* IsDistinct */ true); Record.push_back(N->getSourceLanguage()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); @@ -2009,7 +2008,7 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.reserve(N->getElements().size() + 1); - const uint64_t Version = 3 << 1; + const uint64_t Version = 4 << 1; Record.push_back((uint64_t)N->isDistinct() | Version); Record.append(N->elements_begin(), N->elements_end()); @@ -2154,6 +2153,20 @@ void ModuleBitcodeWriter::writeMetadataRecords( if (const MDNode *N = dyn_cast(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); +#ifndef NDEBUG + switch (N->getMetadataID()) { +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) \ + case Metadata::CLASS##Kind: \ + assert(!N->isDistinct() && "Expected non-distinct " #CLASS); \ + break; +#define HANDLE_MDNODE_LEAF_DISTINCT(CLASS) \ + case Metadata::CLASS##Kind: \ + assert(N->isDistinct() && "Expected distinct " #CLASS); \ + break; +#include "llvm/IR/Metadata.def" + } +#endif + switch (N->getMetadataID()) { default: llvm_unreachable("Invalid MDNode subclass"); diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 5a179105bd8a4..745ba4dc7b91d 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1159,6 +1159,7 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) { if (parseMDNode(Node)) return true; } else if (Token.is(MIToken::md_diexpr)) { + // FIXME: This should be driven off of the UNIQUED property in Metadata.def if (parseDIExpression(Node)) return true; } else if (Token.is(MIToken::md_dilocation)) { @@ -2175,6 +2176,7 @@ bool MIParser::parseMetadataOperand(MachineOperand &Dest) { if (parseMDNode(Node)) return true; } else if (Token.is(MIToken::md_diexpr)) { + // FIXME: This should be driven off of the UNIQUED property in Metadata.def if (parseDIExpression(Node)) return true; } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index cbbcbe02638bb..b0ee6aa276b53 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1233,10 +1233,11 @@ void SlotTracker::CreateFunctionSlot(const Value *V) { void SlotTracker::CreateMetadataSlot(const MDNode *N) { assert(N && "Can't insert a null Value into SlotTracker!"); - // Don't make slots for DIExpressions or DIArgLists. We just print them inline - // everywhere. - if (isa(N) || isa(N)) + // Don't make slots for uniqued nodes. We just print them inline everywhere. +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) \ + if (isa(N)) \ return; +#include "llvm/IR/Metadata.def" unsigned DestSlot = mdnNext; if (!mdnMap.insert(std::make_pair(N, DestSlot)).second) @@ -2354,9 +2355,7 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N, static void writeDIArgList(raw_ostream &Out, const DIArgList *N, TypePrinting *TypePrinter, SlotTracker *Machine, - const Module *Context, bool FromValue = false) { - assert(FromValue && - "Unexpected DIArgList metadata outside of value argument"); + const Module *Context) { Out << "!DIArgList("; FieldSeparator FS; MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); @@ -2514,16 +2513,16 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context, bool FromValue) { - // Write DIExpressions and DIArgLists inline when used as a value. Improves - // readability of debug info intrinsics. - if (const DIExpression *Expr = dyn_cast(MD)) { - writeDIExpression(Out, Expr, TypePrinter, Machine, Context); - return; - } - if (const DIArgList *ArgList = dyn_cast(MD)) { - writeDIArgList(Out, ArgList, TypePrinter, Machine, Context, FromValue); - return; + assert((FromValue || !(isa(MD) || isa(MD))) && + "Unexpected function-local metadata outside of value argument"); + + // Write uniqued MDNodes inline when used as a value. +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) \ + if (const CLASS *N = dyn_cast(MD)) { \ + write##CLASS(Out, N, TypePrinter, Machine, Context); \ + return; \ } +#include "llvm/IR/Metadata.def" if (const MDNode *N = dyn_cast(MD)) { std::unique_ptr MachineStorage; @@ -2554,9 +2553,6 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD, auto *V = cast(MD); assert(TypePrinter && "TypePrinter required for metadata values"); - assert((FromValue || !isa(V)) && - "Unexpected function-local metadata outside of value argument"); - TypePrinter->print(V->getValue()->getType(), Out); Out << ' '; WriteAsOperandInternal(Out, V->getValue(), TypePrinter, Machine, Context); @@ -3445,15 +3441,17 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { if (i) Out << ", "; - // Write DIExpressions inline. + // Write UNIQUED nodes inline. // FIXME: Ban DIExpressions in NamedMDNodes, they will serve no purpose. MDNode *Op = NMD->getOperand(i); assert(!isa(Op) && "DIArgLists should not appear in NamedMDNodes"); - if (auto *Expr = dyn_cast(Op)) { - writeDIExpression(Out, Expr, nullptr, nullptr, nullptr); - continue; - } +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) \ + if (auto *N = dyn_cast(Op)) { \ + write##CLASS(Out, N, nullptr, nullptr, nullptr); \ + continue; \ + } +#include "llvm/IR/Metadata.def" int Slot = Machine.getMetadataSlot(Op); if (Slot == -1) @@ -4710,12 +4708,18 @@ static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD, TypePrinting TypePrinter(M); - WriteAsOperandInternal(OS, &MD, &TypePrinter, MST.getMachine(), M, - /* FromValue */ true); + WriteAsOperandInternal(OS, &MD, &TypePrinter, MST.getMachine(), M); auto *N = dyn_cast(&MD); - if (OnlyAsOperand || !N || isa(MD) || isa(MD)) + if (OnlyAsOperand || !N) { + return; + } + // Uniqued MDNodes are always treated as if OnlyAsOperand, as they are + // printed inline. +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) \ + if (isa(MD)) \ return; +#include "llvm/IR/Metadata.def" OS << " = "; WriteMDNodeBodyInternal(OS, N, &TypePrinter, MST.getMachine(), M); diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 7b0dab799e1a9..eff01bffe760f 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1053,6 +1053,7 @@ DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, DIExpression *DIExpression::getImpl(LLVMContext &Context, ArrayRef Elements, StorageType Storage, bool ShouldCreate) { + assert(Storage != Distinct && "DIExpression cannot be distinct"); DEFINE_GETIMPL_LOOKUP(DIExpression, (Elements)); DEFINE_GETIMPL_STORE_NO_OPS(DIExpression, (Elements)); } @@ -1583,6 +1584,7 @@ DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType, DIArgList *DIArgList::getImpl(LLVMContext &Context, ArrayRef Args, StorageType Storage, bool ShouldCreate) { + assert(Storage != Distinct && "DIArgList cannot be distinct"); DEFINE_GETIMPL_LOOKUP(DIArgList, (Args)); DEFINE_GETIMPL_STORE_NO_OPS(DIArgList, (Args)); } diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 99819602c5452..2533ce83600b5 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -60,6 +60,7 @@ LLVMContextImpl::~LLVMContextImpl() { #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ for (auto *I : CLASS##s) \ I->dropAllReferences(); +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) #include "llvm/IR/Metadata.def" // Also drop references that come from the Value bridges. @@ -74,6 +75,7 @@ LLVMContextImpl::~LLVMContextImpl() { #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ for (CLASS * I : CLASS##s) \ delete I; +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) #include "llvm/IR/Metadata.def" // Free the constants. diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 2ae23fdc95a8a..5caa8f60b694f 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1381,6 +1381,7 @@ class LLVMContextImpl { #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ DenseSet CLASS##s; +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) #include "llvm/IR/Metadata.def" // Optional map for looking up composite types by identifier. diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp index 4f87ef5377653..9ac388835be2e 100644 --- a/llvm/lib/IR/Metadata.cpp +++ b/llvm/lib/IR/Metadata.cpp @@ -672,6 +672,7 @@ MDNode *MDNode::replaceWithPermanentImpl() { #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ case CLASS##Kind: \ break; +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) #include "llvm/IR/Metadata.def" } @@ -812,6 +813,7 @@ MDNode *MDNode::uniquify() { dispatchRecalculateHash(SubclassThis, ShouldRecalculateHash); \ return uniquifyImpl(SubclassThis, getContext().pImpl->CLASS##s); \ } +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) #include "llvm/IR/Metadata.def" } } @@ -824,6 +826,7 @@ void MDNode::eraseFromStore() { case CLASS##Kind: \ getContext().pImpl->CLASS##s.erase(cast(this)); \ break; +#define HANDLE_MDNODE_LEAF_UNIQUED(CLASS) HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) #include "llvm/IR/Metadata.def" } } diff --git a/llvm/test/Assembler/invalid-diarglist-outside-function.ll b/llvm/test/Assembler/invalid-diarglist-outside-function.ll new file mode 100644 index 0000000000000..351cd0bc7b40f --- /dev/null +++ b/llvm/test/Assembler/invalid-diarglist-outside-function.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +; CHECK: :[[@LINE+1]]:6: error: !DIArgList cannot appear outside of a function +!0 = !DIArgList() diff --git a/llvm/test/Assembler/invalid-diexpression-distinct.ll b/llvm/test/Assembler/invalid-diexpression-distinct.ll new file mode 100644 index 0000000000000..96628e37479b2 --- /dev/null +++ b/llvm/test/Assembler/invalid-diexpression-distinct.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +; CHECK: :[[@LINE+1]]:6: error: 'distinct' not allowed for !DIExpression +!0 = distinct !DIExpression() diff --git a/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll b/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll new file mode 100644 index 0000000000000..d888e9a9eb827 --- /dev/null +++ b/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll @@ -0,0 +1,16 @@ +; RUN: llvm-dis -o - %s.bc | FileCheck %s + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!8, !9} + +!0 = distinct !DIGlobalVariable(name: "g", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true) +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "a.c", directory: "/") +!3 = !{} +!4 = !{!7} +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; CHECK: expr: !DIExpression() +!6 = distinct !DIExpression() +!7 = !DIGlobalVariableExpression(var: !0, expr: !6) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll.bc b/llvm/test/Bitcode/DIExpression-is-distinct-upgrade.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..088e1a4b448857eeb5a8bf26cf17b88146e57788 GIT binary patch literal 1424 zcmXw34@?_n6#q&KcVpePBh3bKciy#ZHdEke8T7n18C*BJh_Q=i!GvXPY0Iiuj?zJ4 zYFY}p&UH&6BsK=)h|@(g4NHh(LJUxb9XXb5Q8Ob!D&1h(5wf9K;$r;D^pbn;-sQXZ z=lA=)YviYw4{87?1OT{{OvNj2eCK`l?}=B-(-l^kp%BnY05}-{I}2z~0MG1!Hxkhe zHKY|si_`{oST>;CSBRF8igT_odbpr*cxR(kZP1v|(wB=3+L%q4qjlC2F|KYHE^w#O zs~U%qhN#&z%zKsspcs1i8A`d10Q&igBuejtrtCT8;H88^ z>u#YcT>Xbi(WDN@!83Fd_D6W}>u*kOJska~su9gDu z0rW;RjL&6ouSRBx@uq=PyfKXQhLEmgr}7?9Jy2s?Va%v(Bxh%c8)LYaDcn95Etlk$ zG;fK^Em9~R6p%A0(rXh10qGOu-62t+#4>3&mP=q6M&)NztDD%S9W$!uh-DhTLld)3 z(#xFXrlQLZk)G3K%UH+vpg4rVJd#D7)~noz%;N0dH6EI=F|Zsq_@&6kgDr5T93LP3@P6*PxW|zzqQ78aVdJ5x4SXZF}R%qeV z^r1pE3K%)#VSr@d!D1J1+ru(DFe+n15<`hvU1Z0?;Q7U`76+ShlIMzlBz!%V4?lfy z{>p_frEBrZT)SV>@F7eFK8L$QL%*{)pof}7`kI9##32RQXK$wZ@8R=fyVe`>D+A$r z%JrVwRR^|ahZ0g{^FmUs*|Dwha0KbqZ!0_`LU!`}HVdOE(}aw>6t#?`3f_Xtf-hz8 zB?`|v^L9N#&N}g%V|e;0DY9GYr+bb_=1Wm-6bf5zxfnG=ktb3RPS8n#?f;qqQ)}}` zx#|xZ>*{I_dOW5&w${zovaUL&zNXG(G@4jr zYrVIgsb$;SyWg%3db)zujlN)Ohuc$aJ$g7K1Uy}NrYe11m2O{G(C2RrR-P8x0#3K5 Ks=Ym53j7Ogd-B=< literal 0 HcmV?d00001 From 1fa70235856962fae723d97cb39b47da6eb9666e Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Mon, 28 Jun 2021 21:14:17 +0000 Subject: [PATCH 116/619] [LLDB] dotest.py set selected_platform on remote connection This patch fixes a bug in dotest.py where lldb.selected_platform was being set to host platform even after a successful connection to a remote platform via platform url. This patch fixes this behavior and sets selected_platform to remote_platform after a successful connection. This patch also removes target_platform variable from run_suite. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D105060 --- lldb/packages/Python/lldbsuite/test/dotest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 3e832d91e288d..5c5583fb5706f 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -929,6 +929,7 @@ def run_suite(): err = lldb.remote_platform.ConnectRemote(platform_connect_options) if err.Success(): print("Connected.") + lldb.selected_platform = lldb.remote_platform else: print("error: failed to connect to remote platform using URL '%s': %s" % ( configuration.lldb_platform_url, err)) @@ -958,9 +959,6 @@ def run_suite(): # Note that it's not dotest's job to clean this directory. lldbutil.mkdir_p(configuration.test_build_dir) - from . import lldbplatformutil - target_platform = lldbplatformutil.getPlatform() - checkLibcxxSupport() checkLibstdcxxSupport() checkWatchpointSupport() From 633ca3ff2f8fc2e2b69001d17abc43f302578fc1 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Mon, 28 Jun 2021 12:32:43 -0700 Subject: [PATCH 117/619] [UniqueLinkageName] Use exsiting GlobalDecl object instead of reconstructing one. C++ constructors/destructors need to go through a different constructor to construct a GlobalDecl object in order to retrieve their linkage type. This causes an assert failure in the default constructor of GlobalDecl. I'm chaning it to using the exsiting GlobalDecl object. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D102356 --- clang/lib/CodeGen/CGCall.cpp | 3 ++- .../CodeGen/unique-internal-linkage-names.cpp | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 1cd972f32f3ff..35b34179cc231 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2174,7 +2174,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // functions with -funique-internal-linkage-names. if (TargetDecl && CodeGenOpts.UniqueInternalLinkageNames) { if (auto *Fn = dyn_cast(TargetDecl)) { - if (this->getFunctionLinkage(Fn) == llvm::GlobalValue::InternalLinkage) + if (this->getFunctionLinkage(CalleeInfo.getCalleeDecl()) == + llvm::GlobalValue::InternalLinkage) FuncAttrs.addAttribute("sample-profile-suffix-elision-policy", "selected"); } diff --git a/clang/test/CodeGen/unique-internal-linkage-names.cpp b/clang/test/CodeGen/unique-internal-linkage-names.cpp index c567bcde45a84..95591de308d37 100644 --- a/clang/test/CodeGen/unique-internal-linkage-names.cpp +++ b/clang/test/CodeGen/unique-internal-linkage-names.cpp @@ -42,12 +42,26 @@ int mver_call() { return mver(); } +namespace { +class A { +public: + A() {} + ~A() {} +}; +} + +void test() { + A a; +} + // PLAIN: @_ZL4glob = internal global // PLAIN: @_ZZ8retAnonMvE5fGlob = internal global // PLAIN: @_ZN12_GLOBAL__N_16anon_mE = internal global // PLAIN: define internal i32 @_ZL3foov() // PLAIN: define internal i32 @_ZN12_GLOBAL__N_14getMEv // PLAIN: define weak_odr i32 ()* @_ZL4mverv.resolver() +// PLAIN: define internal void @_ZN12_GLOBAL__N_11AC1Ev +// PLAIN: define internal void @_ZN12_GLOBAL__N_11AD1Ev // PLAIN: define internal i32 @_ZL4mverv() // PLAIN: define internal i32 @_ZL4mverv.sse4.2() // PLAIN-NOT: "sample-profile-suffix-elision-policy" @@ -57,6 +71,8 @@ int mver_call() { // UNIQUE: define internal i32 @_ZL3foov.[[MODHASH:__uniq.[0-9]+]]() #[[#ATTR:]] { // UNIQUE: define internal i32 @_ZN12_GLOBAL__N_14getMEv.[[MODHASH]] // UNIQUE: define weak_odr i32 ()* @_ZL4mverv.[[MODHASH]].resolver() +// UNIQUE: define internal void @_ZN12_GLOBAL__N_11AC1Ev.__uniq.68358509610070717889884130747296293671 +// UNIQUE: define internal void @_ZN12_GLOBAL__N_11AD1Ev.__uniq.68358509610070717889884130747296293671 // UNIQUE: define internal i32 @_ZL4mverv.[[MODHASH]]() // UNIQUE: define internal i32 @_ZL4mverv.[[MODHASH]].sse4.2 // UNIQUE: attributes #[[#ATTR]] = { {{.*}}"sample-profile-suffix-elision-policy"{{.*}} } From f1969b74a7e70623129872d69caba4759df47fb0 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Mon, 28 Jun 2021 15:29:16 -0400 Subject: [PATCH 118/619] [lld/mac] Fix nondeterminism in output section ordering The two different thread_local_regular sections (__thread_data and more_thread_data) had nondeterminstic ordering for two reasons: 1. https://reviews.llvm.org/D102972 changed concatOutputSections from MapVector to DenseMap, so when we iterate it to make output segments, we would add the two sections to the __DATA output segment in nondeterministic order. 2. The same change also moved the two stable_sort()s for segments and sections to sort(). Since sections with assigned priority (such as TLV data) have the same priority for all sections, this is incorrect -- we must use stable_sort() so that the initial (input-order-based) order remains. As a side effect, we now (deterministically) put the __common section in front of __bss (while previously we happened to put it after it). (__common and __bss are both zerofill so both have order INT_MAX, but common symbols are added to inputSections before normal sections are collected.) Makes lld/test/MachO/tlv.s and lld/test/MachO/tlv-dylib.s pass with LLVM_ENABLE_EXPENSIVE_CHECKS=ON. Differential Revision: https://reviews.llvm.org/D105054 --- lld/MachO/OutputSegment.cpp | 7 ++++++- lld/MachO/Writer.cpp | 5 ++++- lld/test/MachO/tlv-dylib.s | 12 ++++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp index 77aa50cd03043..8a050c4a7627b 100644 --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -140,10 +140,15 @@ static int sectionOrder(OutputSection *osec) { } void OutputSegment::sortOutputSections() { - llvm::sort(sections, compareByOrder(sectionOrder)); + // Must be stable_sort() to keep special sections such as + // S_THREAD_LOCAL_REGULAR in input order. + llvm::stable_sort(sections, compareByOrder(sectionOrder)); } void macho::sortOutputSegments() { + // sort() instead of stable_sort() is fine because segmentOrder() is + // name-based and getOrCreateOutputSegment() makes there's only a single + // segment for every name. llvm::sort(outputSegments, compareByOrder(segmentOrder)); } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 9dca3416875b6..ffe5668a877bd 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -79,7 +79,10 @@ class Writer { LCUuid *uuidCommand = nullptr; OutputSegment *linkEditSegment = nullptr; - DenseMap concatOutputSections; + + // Output sections are added to output segments in iteration order + // of ConcatOutputSection, so must have deterministic iteration order. + MapVector concatOutputSections; }; // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information. diff --git a/lld/test/MachO/tlv-dylib.s b/lld/test/MachO/tlv-dylib.s index dc6bdef1433e1..8a179ffc34902 100644 --- a/lld/test/MachO/tlv-dylib.s +++ b/lld/test/MachO/tlv-dylib.s @@ -74,21 +74,21 @@ # FLAGS-NEXT: reloff 0 # FLAGS-NEXT: nreloc 0 # FLAGS-NEXT: type S_THREAD_LOCAL_ZEROFILL -# FLAGS: sectname __bss +# FLAGS: sectname __common # FLAGS-NEXT: segname __DATA # FLAGS-NEXT: addr -# FLAGS-NEXT: size 0x0000000000002000 +# FLAGS-NEXT: size 0x0000000000004000 # FLAGS-NEXT: offset 0 -# FLAGS-NEXT: align 2^0 (1) +# FLAGS-NEXT: align 2^14 (16384) # FLAGS-NEXT: reloff 0 # FLAGS-NEXT: nreloc 0 # FLAGS-NEXT: type S_ZEROFILL -# FLAGS: sectname __common +# FLAGS: sectname __bss # FLAGS-NEXT: segname __DATA # FLAGS-NEXT: addr -# FLAGS-NEXT: size 0x0000000000004000 +# FLAGS-NEXT: size 0x0000000000002000 # FLAGS-NEXT: offset 0 -# FLAGS-NEXT: align 2^14 (16384) +# FLAGS-NEXT: align 2^0 (1) # FLAGS-NEXT: reloff 0 # FLAGS-NEXT: nreloc 0 # FLAGS-NEXT: type S_ZEROFILL From aaad46e6f5f7a16bc6be278d92671f8334156d3a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Jun 2021 15:56:10 -0400 Subject: [PATCH 119/619] [OpenMP] Run the OpenMPOpt module pass at O1 Now that the OpenMPOpt module pass include important optimizations for removing globalization from offloading regions it should be run at a lower optimization level. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D105056 --- llvm/lib/Passes/PassBuilder.cpp | 2 +- llvm/test/Other/new-pm-defaults.ll | 3 +-- llvm/test/Other/new-pm-thinlto-defaults.ll | 3 +-- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll | 3 +-- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll | 3 +-- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll | 3 +-- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll | 3 +-- 7 files changed, 7 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 7f975eb7a2b55..49f6c1049625f 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1116,7 +1116,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Try to perform OpenMP specific optimizations on the module. This is a // (quick!) no-op if there are no OpenMP runtime calls present in the module. - if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) + if (Level != OptimizationLevel::O0) MPM.addPass(OpenMPOptPass()); if (AttributorRun & AttributorRunOption::MODULE) diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 6e6c866408180..a152036fdad10 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -90,8 +90,7 @@ ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION-NEXT: Running pass: NoOpModulePass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 50ca75e843510..37d343d4e1471 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -71,8 +71,7 @@ ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 3dcc5dd634d34..aeed818a106bf 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -41,8 +41,7 @@ ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 1e255d5911592..4f41e34eb83c2 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -55,8 +55,7 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index 20925d01c63b3..5bd83bc0575ff 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -42,8 +42,7 @@ ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 76a1d73a0f3c6..7ecbb2231ea4b 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -52,8 +52,7 @@ ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass From b608053efb88378900d0f08148662e433aa609db Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 28 Jun 2021 13:37:45 -0700 Subject: [PATCH 120/619] [IR] Fix replaceUsesWithIf ponetial issue with constants There can be a use after free in the Value::replaceUsesWithIf() if two uses point to the same constant. Patch defers handling of the constants past the iterator scan. Another potential issue is that handleOperandChange updates all the uses in a given Constant, not just the one passed to ShouldReplace. Added a FIXME comment. Both issues are not currently exploitable as the only use of this call with constants avoids it. Differential Revision: https://reviews.llvm.org/D105061 --- llvm/lib/IR/Value.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index c3796f134922a..9cb00569598aa 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -531,6 +531,9 @@ void Value::replaceUsesWithIf(Value *New, assert(New->getType() == getType() && "replaceUses of value with new value of different type!"); + SmallVector, 8> Consts; + SmallPtrSet Visited; + for (use_iterator UI = use_begin(), E = use_end(); UI != E;) { Use &U = *UI; ++UI; @@ -540,12 +543,19 @@ void Value::replaceUsesWithIf(Value *New, // constant because they are uniqued. if (auto *C = dyn_cast(U.getUser())) { if (!isa(C)) { - C->handleOperandChange(this, New); + if (Visited.insert(C).second) + Consts.push_back(TrackingVH(C)); continue; } } U.set(New); } + + while (!Consts.empty()) { + // FIXME: handleOperandChange() updates all the uses in a given Constant, + // not just the one passed to ShouldReplace + Consts.pop_back_val()->handleOperandChange(this, New); + } } /// Replace llvm.dbg.* uses of MetadataAsValue(ValueAsMetadata(V)) outside BB From 716d2fedbfc8e67a478f2bee7024729f8f94ca10 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 28 Jun 2021 16:00:32 -0700 Subject: [PATCH 121/619] Precommit miscompile test from D103700 --- .../LoopVectorize/unroll_nonlatch.ll | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll diff --git a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll new file mode 100644 index 0000000000000..90bc2be334d86 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -loop-vectorize -force-vector-interleave=2 | FileCheck %s + +; Demonstrate a case where we unroll a loop, but don't vectorize it. +; This currently reveals a miscompile. The original loop runs stores in +; the latch block on iterations 0 to 1022, and exits when %indvars.iv = 1023. +; Currently, the unrolled loop produced by the vectorizer runs the iteration +; where %indvar.iv = 1023 in the vector.body loop before exiting. This results +; in an out of bounds access.. + +define void @test(double* %data) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDUCTION]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[INDUCTION1]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[DATA:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[DATA]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = fneg double [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = fneg double [[TMP7]] +; CHECK-NEXT: store double [[TMP8]], double* [[TMP4]], align 8 +; CHECK-NEXT: store double [[TMP9]], double* [[TMP5]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_LATCH:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_LATCH]] +; CHECK: for.latch: +; CHECK-NEXT: [[T15:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[T16:%.*]] = or i64 [[T15]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[DATA]], i64 [[T16]] +; CHECK-NEXT: [[T17:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[T17]] +; CHECK-NEXT: store double [[FNEG]], double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: br label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.latch ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.latch + +for.latch: + %t15 = shl nuw nsw i64 %indvars.iv, 1 + %t16 = or i64 %t15, 1 + %arrayidx = getelementptr inbounds double, double* %data, i64 %t16 + %t17 = load double, double* %arrayidx, align 8 + %fneg = fneg double %t17 + store double %fneg, double* %arrayidx, align 8 + br label %for.body + +for.end: + ret void +} From 8e66fc438463e8cf6fa05e88b51ce29b604ce307 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 29 Jun 2021 08:59:02 +1000 Subject: [PATCH 122/619] [JITLink][ELF] Move ELF section and symbol parsing into ELFLinkGraphBuilder. Move architecture independent ELF parsing/graph-building code from ELFLinkGraphBuilder_x86_64 to the ELFLinkGraphBuilder base class template. --- .../JITLink/ELFLinkGraphBuilder.cpp | 10 + .../JITLink/ELFLinkGraphBuilder.h | 389 +++++++++++++++++- .../ExecutionEngine/JITLink/ELF_x86_64.cpp | 367 +---------------- .../JITLink/X86/ELF_skip_debug_sections.s | 2 +- 4 files changed, 400 insertions(+), 368 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp index d1e221b2145b2..2194a4fbf1f41 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp @@ -14,9 +14,19 @@ #define DEBUG_TYPE "jitlink" +static const char *DWSecNames[] = { +#define HANDLE_DWARF_SECTION(ENUM_NAME, ELF_NAME, CMDLINE_NAME, OPTION) \ + ELF_NAME, +#include "llvm/BinaryFormat/Dwarf.def" +#undef HANDLE_DWARF_SECTION +}; + namespace llvm { namespace jitlink { +StringRef ELFLinkGraphBuilderBase::CommonSectionName(".common"); +ArrayRef ELFLinkGraphBuilderBase::DwarfSectionNames = DWSecNames; + ELFLinkGraphBuilderBase::~ELFLinkGraphBuilderBase() {} } // end namespace jitlink diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 0841bdb621657..2b2a1a8db4c1d 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -15,7 +15,11 @@ #include "llvm/ExecutionEngine/JITLink/JITLink.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" + +#define DEBUG_TYPE "jitlink" namespace llvm { namespace jitlink { @@ -23,34 +27,403 @@ namespace jitlink { /// Common link-graph building code shared between all ELFFiles. class ELFLinkGraphBuilderBase { public: + ELFLinkGraphBuilderBase(std::unique_ptr G) : G(std::move(G)) {} virtual ~ELFLinkGraphBuilderBase(); + +protected: + static bool isDwarfSection(StringRef SectionName) { + return llvm::is_contained(DwarfSectionNames, SectionName); + } + + Section &getCommonSection() { + if (!CommonSection) { + auto Prot = static_cast( + sys::Memory::MF_READ | sys::Memory::MF_WRITE); + CommonSection = &G->createSection(CommonSectionName, Prot); + } + return *CommonSection; + } + + std::unique_ptr G; + +private: + static StringRef CommonSectionName; + static ArrayRef DwarfSectionNames; + + Section *CommonSection = nullptr; }; /// Ling-graph building code that's specific to the given ELFT, but common /// across all architectures. template class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { + using ELFFile = object::ELFFile; + public: ELFLinkGraphBuilder(const object::ELFFile &Obj, Triple TT, StringRef FileName, LinkGraph::GetEdgeKindNameFunction GetEdgeKindName); + /// Attempt to construct and return the LinkGraph. + Expected> buildGraph(); + + /// Call to derived class to handle relocations. These require + /// architecture specific knowledge to map to JITLink edge kinds. + virtual Error addRelocations() = 0; + protected: - std::unique_ptr G; - const object::ELFFile &Obj; + using ELFSectionIndex = unsigned; + using ELFSymbolIndex = unsigned; + + bool isRelocatable() const { + return Obj.getHeader().e_type == llvm::ELF::ET_REL; + } + + void setGraphSection(ELFSectionIndex SecIndex, Section &Sec) { + assert(!GraphSections.count(SecIndex) && "Duplicate section at index"); + GraphSections[SecIndex] = &Sec; + } + + Section *getGraphSection(ELFSectionIndex SecIndex) { + auto I = GraphSections.find(SecIndex); + if (I == GraphSections.end()) + return nullptr; + return I->second; + } + + void setGraphSymbol(ELFSymbolIndex SymIndex, Symbol &Sym) { + assert(!GraphSymbols.count(SymIndex) && "Duplicate symbol at index"); + GraphSymbols[SymIndex] = &Sym; + } + + Symbol *getGraphSymbol(ELFSymbolIndex SymIndex) { + auto I = GraphSymbols.find(SymIndex); + if (I == GraphSymbols.end()) + return nullptr; + return I->second; + } + + Expected> + getSymbolLinkageAndScope(const typename ELFT::Sym &Sym, StringRef Name); + + Error prepare(); + Error graphifySections(); + Error graphifySymbols(); + + const ELFFile &Obj; + + typename ELFFile::Elf_Shdr_Range Sections; + const typename ELFFile::Elf_Shdr *SymTabSec = nullptr; + StringRef SectionStringTab; + + // Maps ELF section indexes to LinkGraph Sections. + // Only SHF_ALLOC sections will have graph sections. + DenseMap GraphSections; + DenseMap GraphSymbols; }; template ELFLinkGraphBuilder::ELFLinkGraphBuilder( - const object::ELFFile &Obj, Triple TT, StringRef FileName, + const ELFFile &Obj, Triple TT, StringRef FileName, LinkGraph::GetEdgeKindNameFunction GetEdgeKindName) - : G(std::make_unique(FileName.str(), Triple(std::move(TT)), - ELFT::Is64Bits ? 8 : 4, - support::endianness(ELFT::TargetEndianness), - std::move(GetEdgeKindName))), - Obj(Obj) {} + : ELFLinkGraphBuilderBase(std::make_unique( + FileName.str(), Triple(std::move(TT)), ELFT::Is64Bits ? 8 : 4, + support::endianness(ELFT::TargetEndianness), + std::move(GetEdgeKindName))), + Obj(Obj) { + LLVM_DEBUG( + { dbgs() << "Created ELFLinkGraphBuilder for \"" << FileName << "\""; }); +} + +template +Expected> ELFLinkGraphBuilder::buildGraph() { + if (!isRelocatable()) + return make_error("Object is not a relocatable ELF file"); + + if (auto Err = prepare()) + return std::move(Err); + + if (auto Err = graphifySections()) + return std::move(Err); + + if (auto Err = graphifySymbols()) + return std::move(Err); + + if (auto Err = addRelocations()) + return std::move(Err); + + return std::move(G); +} + +template +Expected> +ELFLinkGraphBuilder::getSymbolLinkageAndScope( + const typename ELFT::Sym &Sym, StringRef Name) { + Linkage L = Linkage::Strong; + Scope S = Scope::Default; + + switch (Sym.getBinding()) { + case ELF::STB_LOCAL: + S = Scope::Local; + break; + case ELF::STB_GLOBAL: + // Nothing to do here. + break; + case ELF::STB_WEAK: + L = Linkage::Weak; + break; + default: + return make_error("Unrecognized symbol binding for " + Name, + inconvertibleErrorCode()); + } + + switch (Sym.getVisibility()) { + case ELF::STV_DEFAULT: + case ELF::STV_PROTECTED: + // FIXME: Make STV_DEFAULT symbols pre-emptible? This probably needs + // Orc support. + // Otherwise nothing to do here. + break; + case ELF::STV_HIDDEN: + // Default scope -> Hidden scope. No effect on local scope. + if (S == Scope::Default) + S = Scope::Hidden; + break; + case ELF::STV_INTERNAL: + return make_error("Unrecognized symbol visibility for " + Name, + inconvertibleErrorCode()); + } + + return std::make_pair(L, S); +} + +template Error ELFLinkGraphBuilder::prepare() { + LLVM_DEBUG(dbgs() << " Preparing to build...\n"); + + // Get the sections array. + if (auto SectionsOrErr = Obj.sections()) + Sections = *SectionsOrErr; + else + return SectionsOrErr.takeError(); + + // Get the section string table. + if (auto SectionStringTabOrErr = Obj.getSectionStringTable(Sections)) + SectionStringTab = *SectionStringTabOrErr; + else + return SectionStringTabOrErr.takeError(); + + // Get the SHT_SYMTAB section. + for (auto &Sec : Sections) + if (Sec.sh_type == ELF::SHT_SYMTAB) { + if (!SymTabSec) + SymTabSec = &Sec; + else + return make_error("Multiple SHT_SYMTAB sections in " + + G->getName()); + } + + return Error::success(); +} + +template Error ELFLinkGraphBuilder::graphifySections() { + LLVM_DEBUG(dbgs() << " Creating graph sections...\n"); + + // For each section... + for (ELFSectionIndex SecIndex = 0; SecIndex != Sections.size(); ++SecIndex) { + + auto &Sec = Sections[SecIndex]; + + // Start by getting the section name. + auto Name = Obj.getSectionName(Sec, SectionStringTab); + if (!Name) + return Name.takeError(); + + // If the name indicates that it's a debug section then skip it: We don't + // support those yet. + if (isDwarfSection(*Name)) { + LLVM_DEBUG({ + dbgs() << " " << SecIndex << ": \"" << *Name + << "\" is a debug section: " + "No graph section will be created.\n"; + }); + continue; + } + + // Skip non-SHF_ALLOC sections + if (!(Sec.sh_flags & ELF::SHF_ALLOC)) { + LLVM_DEBUG({ + dbgs() << " " << SecIndex << ": \"" << *Name + << "\" is not an SHF_ALLOC section: " + "No graph section will be created.\n"; + }); + continue; + } + + LLVM_DEBUG({ + dbgs() << " " << SecIndex << ": Creating section for \"" << *Name + << "\"\n"; + }); + + // Get the section's memory protection flags. + sys::Memory::ProtectionFlags Prot; + if (Sec.sh_flags & ELF::SHF_EXECINSTR) + Prot = static_cast(sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + else + Prot = static_cast(sys::Memory::MF_READ | + sys::Memory::MF_WRITE); + + // For now we just use this to skip the "undefined" section, probably need + // to revist. + if (Sec.sh_size == 0) + continue; + + auto &GraphSec = G->createSection(*Name, Prot); + if (Sec.sh_type != ELF::SHT_NOBITS) { + auto Data = Obj.template getSectionContentsAsArray(Sec); + if (!Data) + return Data.takeError(); + + G->createContentBlock(GraphSec, *Data, Sec.sh_addr, Sec.sh_addralign, 0); + } else + G->createZeroFillBlock(GraphSec, Sec.sh_size, Sec.sh_addr, + Sec.sh_addralign, 0); + + setGraphSection(SecIndex, GraphSec); + } + + return Error::success(); +} + +template Error ELFLinkGraphBuilder::graphifySymbols() { + LLVM_DEBUG(dbgs() << " Creating graph symbols...\n"); + + // No SYMTAB -- Bail out early. + if (!SymTabSec) + return Error::success(); + + // Get the section content as a Symbols array. + auto Symbols = Obj.symbols(SymTabSec); + if (!Symbols) + return Symbols.takeError(); + + // Get the string table for this section. + auto StringTab = Obj.getStringTableForSymtab(*SymTabSec, Sections); + if (!StringTab) + return StringTab.takeError(); + + LLVM_DEBUG({ + StringRef SymTabName; + + if (auto SymTabNameOrErr = Obj.getSectionName(*SymTabSec, SectionStringTab)) + SymTabName = *SymTabNameOrErr; + else { + dbgs() << "Could not get ELF SHT_SYMTAB section name for logging: " + << toString(SymTabNameOrErr.takeError()) << "\n"; + SymTabName = ""; + } + + dbgs() << " Adding symbols from symtab section \"" << SymTabName + << "\"\n"; + }); + + for (ELFSymbolIndex SymIndex = 0; SymIndex != Symbols->size(); ++SymIndex) { + auto &Sym = (*Symbols)[SymIndex]; + + // Check symbol type. + switch (Sym.getType()) { + case ELF::STT_FILE: + LLVM_DEBUG({ + if (auto Name = Sym.getName(*StringTab)) + dbgs() << " " << SymIndex << ": Skipping STT_FILE symbol \"" + << *Name << "\"\n"; + else { + dbgs() << "Could not get STT_FILE symbol name: " + << toString(Name.takeError()) << "\n"; + dbgs() << " " << SymIndex + << ": Skipping STT_FILE symbol with invalid name\n"; + } + }); + continue; + break; + } + + // Get the symbol name. + auto Name = Sym.getName(*StringTab); + if (!Name) + return Name.takeError(); + + // Handle common symbols specially. + if (Sym.isCommon()) { + Symbol &GSym = + G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0, + Sym.st_size, Sym.getValue(), false); + setGraphSymbol(SymIndex, GSym); + continue; + } + + // Map Visibility and Binding to Scope and Linkage: + Linkage L; + Scope S; + + if (auto LSOrErr = getSymbolLinkageAndScope(Sym, *Name)) + std::tie(L, S) = *LSOrErr; + else + return LSOrErr.takeError(); + + if (Sym.isDefined() && + (Sym.getType() == ELF::STT_NOTYPE || Sym.getType() == ELF::STT_FUNC || + Sym.getType() == ELF::STT_OBJECT || + Sym.getType() == ELF::STT_SECTION)) { + + // FIXME: Handle extended tables. + if (auto *GraphSec = getGraphSection(Sym.st_shndx)) { + Block *B = nullptr; + { + auto Blocks = GraphSec->blocks(); + assert(Blocks.begin() != Blocks.end() && "No blocks for section"); + assert(std::next(Blocks.begin()) == Blocks.end() && + "Multiple blocks for section"); + B = *Blocks.begin(); + } + + LLVM_DEBUG({ + dbgs() << " " << SymIndex + << ": Creating defined graph symbol for ELF symbol \"" << *Name + << "\"\n"; + }); + + if (Sym.getType() == ELF::STT_SECTION) + *Name = GraphSec->getName(); + + auto &GSym = + G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L, S, + Sym.getType() == ELF::STT_FUNC, false); + setGraphSymbol(SymIndex, GSym); + } + } else if (Sym.isUndefined() && Sym.isExternal()) { + LLVM_DEBUG({ + dbgs() << " " << SymIndex + << ": Creating external graph symbol for ELF symbol \"" << *Name + << "\"\n"; + }); + auto &GSym = G->addExternalSymbol(*Name, Sym.st_size, L); + setGraphSymbol(SymIndex, GSym); + } else { + LLVM_DEBUG({ + dbgs() << " " << SymIndex + << ": Not creating graph symbol for ELF symbol \"" << *Name + << "\" with unrecognized type\n"; + }); + } + } + + return Error::success(); +} } // end namespace jitlink } // end namespace llvm +#undef DEBUG_TYPE + #endif // LIB_EXECUTIONENGINE_JITLINK_ELFLINKGRAPHBUILDER_H diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp index 80f814f51fa53..a5aed6d252007 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp @@ -139,13 +139,6 @@ class PerGraphGOTAndPLTStubsBuilder_ELF_x86_64 mutable Section *StubsSection = nullptr; }; -const char *const DwarfSectionNames[] = { -#define HANDLE_DWARF_SECTION(ENUM_NAME, ELF_NAME, CMDLINE_NAME, OPTION) \ - ELF_NAME, -#include "llvm/BinaryFormat/Dwarf.def" -#undef HANDLE_DWARF_SECTION -}; - } // namespace const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::NullGOTEntryContent[8] = @@ -153,7 +146,6 @@ const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::NullGOTEntryContent[8] = const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::StubContent[6] = { 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00}; -static const char *CommonSectionName = "__common"; static Error optimizeELF_x86_64_GOTAndStubs(LinkGraph &G) { LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n"); @@ -229,35 +221,13 @@ static Error optimizeELF_x86_64_GOTAndStubs(LinkGraph &G) { return Error::success(); } -static bool isDwarfSection(StringRef SectionName) { - return llvm::is_contained(DwarfSectionNames, SectionName); -} - namespace llvm { namespace jitlink { // This should become a template as the ELFFile is so a lot of this could become // generic class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder { - private: - Section *CommonSection = nullptr; - - // TODO hack to get this working - // Find a better way - using SymbolTable = object::ELFFile::Elf_Shdr; - // For now we just assume - using SymbolMap = std::map; - SymbolMap JITSymbolTable; - - Section &getCommonSection() { - if (!CommonSection) { - auto Prot = static_cast( - sys::Memory::MF_READ | sys::Memory::MF_WRITE); - CommonSection = &G->createSection(CommonSectionName, Prot); - } - return *CommonSection; - } static Expected getRelocationKind(const uint32_t Type) { @@ -286,143 +256,11 @@ class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder { formatv("{0:d}", Type)); } - // This could be a template - object::ELFFile::Elf_Shdr_Range sections; - SymbolTable SymTab; - - bool isRelocatable() { return Obj.getHeader().e_type == llvm::ELF::ET_REL; } - - support::endianness - getEndianness(const object::ELFFile &Obj) { - return Obj.isLE() ? support::little : support::big; - } - - // This could also just become part of a template - unsigned getPointerSize(const object::ELFFile &Obj) { - return Obj.getHeader().getFileClass() == ELF::ELFCLASS64 ? 8 : 4; - } - - // We don't technically need this right now - // But for now going to keep it as it helps me to debug things - - Error createNormalizedSymbols() { - LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n"); - - for (auto SecRef : sections) { - if (SecRef.sh_type != ELF::SHT_SYMTAB && - SecRef.sh_type != ELF::SHT_DYNSYM) - continue; - - auto Symbols = Obj.symbols(&SecRef); - // TODO: Currently I use this function to test things - // I also want to leave it to see if its common between MACH and elf - // so for now I just want to continue even if there is an error - if (errorToBool(Symbols.takeError())) - continue; - - auto StrTabSec = Obj.getSection(SecRef.sh_link); - if (!StrTabSec) - return StrTabSec.takeError(); - auto StringTable = Obj.getStringTable(**StrTabSec); - if (!StringTable) - return StringTable.takeError(); - - for (auto SymRef : *Symbols) { - Optional Name; - - if (auto NameOrErr = SymRef.getName(*StringTable)) - Name = *NameOrErr; - else - return NameOrErr.takeError(); - - LLVM_DEBUG({ - dbgs() << " value = " << formatv("{0:x16}", SymRef.getValue()) - << ", type = " << formatv("{0:x2}", SymRef.getType()) - << ", binding = " << formatv("{0:x2}", SymRef.getBinding()) - << ", size = " - << formatv("{0:x16}", static_cast(SymRef.st_size)) - << ", info = " << formatv("{0:x2}", SymRef.st_info) - << " :" << (Name ? *Name : "") << "\n"; - }); - } - } - return Error::success(); - } - - Error createNormalizedSections() { - LLVM_DEBUG(dbgs() << "Creating normalized sections...\n"); - for (auto &SecRef : sections) { - auto Name = Obj.getSectionName(SecRef); - if (!Name) - return Name.takeError(); - - // Skip Dwarf sections. - if (isDwarfSection(*Name)) { - LLVM_DEBUG({ - dbgs() << *Name - << " is a debug section: No graph section will be created.\n"; - }); - continue; - } - - sys::Memory::ProtectionFlags Prot; - if (SecRef.sh_flags & ELF::SHF_EXECINSTR) { - Prot = static_cast(sys::Memory::MF_READ | - sys::Memory::MF_EXEC); - } else { - Prot = static_cast(sys::Memory::MF_READ | - sys::Memory::MF_WRITE); - } - uint64_t Address = SecRef.sh_addr; - uint64_t Size = SecRef.sh_size; - uint64_t Flags = SecRef.sh_flags; - uint64_t Alignment = SecRef.sh_addralign; - const char *Data = nullptr; - // for now we just use this to skip the "undefined" section, probably need - // to revist - if (Size == 0) - continue; - - // FIXME: Use flags. - (void)Flags; - - LLVM_DEBUG({ - dbgs() << " " << *Name << ": " << formatv("{0:x16}", Address) << " -- " - << formatv("{0:x16}", Address + Size) << ", align: " << Alignment - << " Flags: " << formatv("{0:x}", Flags) << "\n"; - }); - - if (SecRef.sh_type != ELF::SHT_NOBITS) { - // .sections() already checks that the data is not beyond the end of - // file - auto contents = Obj.getSectionContentsAsArray(SecRef); - if (!contents) - return contents.takeError(); - - Data = contents->data(); - // TODO protection flags. - // for now everything is - auto §ion = G->createSection(*Name, Prot); - // Do this here because we have it, but move it into graphify later - G->createContentBlock(section, ArrayRef(Data, Size), Address, - Alignment, 0); - if (SecRef.sh_type == ELF::SHT_SYMTAB) - // TODO: Dynamic? - SymTab = SecRef; - } else { - auto &Section = G->createSection(*Name, Prot); - G->createZeroFillBlock(Section, Size, Address, Alignment, 0); - } - } - - return Error::success(); - } - - Error addRelocations() { + Error addRelocations() override { LLVM_DEBUG(dbgs() << "Adding relocations\n"); // TODO a partern is forming of iterate some sections but only give me // ones I am interested, i should abstract that concept some where - for (auto &SecRef : sections) { + for (auto &SecRef : Sections) { if (SecRef.sh_type != ELF::SHT_RELA && SecRef.sh_type != ELF::SHT_REL) continue; // TODO can the elf obj file do this for me? @@ -477,19 +315,20 @@ class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder { << "Name: " << Obj.getRelocationTypeName(Type) << "\n"; }); auto SymbolIndex = Rela.getSymbol(false); - auto Symbol = Obj.getRelocationSymbol(Rela, &SymTab); + auto Symbol = Obj.getRelocationSymbol(Rela, SymTabSec); if (!Symbol) return Symbol.takeError(); auto BlockToFix = *(JITSection->blocks().begin()); - auto *TargetSymbol = JITSymbolTable[SymbolIndex]; + auto *TargetSymbol = getGraphSymbol(SymbolIndex); if (!TargetSymbol) { return make_error( "Could not find symbol at given index, did you add it to " - "JITSymbolTable? index: " + std::to_string(SymbolIndex) - + ", shndx: " + std::to_string((*Symbol)->st_shndx) + - " Size of table: " + std::to_string(JITSymbolTable.size()), + "JITSymbolTable? index: " + + std::to_string(SymbolIndex) + + ", shndx: " + std::to_string((*Symbol)->st_shndx) + + " Size of table: " + std::to_string(GraphSymbols.size()), llvm::inconvertibleErrorCode()); } uint64_t Addend = Rela.r_addend; @@ -518,201 +357,11 @@ class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder { return Error::success(); } - Error graphifyRegularSymbols() { - - // TODO: ELF supports beyond SHN_LORESERVE, - // need to perf test how a vector vs map handles those cases - - std::vector::Elf_Shdr_Range *>> - SecIndexToSymbols; - - LLVM_DEBUG(dbgs() << "Creating graph symbols...\n"); - - for (auto SecRef : sections) { - - if (SecRef.sh_type != ELF::SHT_SYMTAB && - SecRef.sh_type != ELF::SHT_DYNSYM) - continue; - auto Symbols = Obj.symbols(&SecRef); - if (!Symbols) - return Symbols.takeError(); - - auto StrTabSec = Obj.getSection(SecRef.sh_link); - if (!StrTabSec) - return StrTabSec.takeError(); - auto StringTable = Obj.getStringTable(**StrTabSec); - if (!StringTable) - return StringTable.takeError(); - auto Name = Obj.getSectionName(SecRef); - if (!Name) - return Name.takeError(); - - LLVM_DEBUG(dbgs() << "Processing symbol section " << *Name << ":\n"); - - auto Section = G->findSectionByName(*Name); - if (!Section) - return make_error("Could not find a section " + - *Name, - llvm::inconvertibleErrorCode()); - // we only have one for now - auto blocks = Section->blocks(); - if (blocks.empty()) - return make_error("Section has no block", - llvm::inconvertibleErrorCode()); - int SymbolIndex = -1; - for (auto SymRef : *Symbols) { - ++SymbolIndex; - auto Type = SymRef.getType(); - - if (Type == ELF::STT_FILE || SymbolIndex == 0) - continue; - // these should do it for now - // if(Type != ELF::STT_NOTYPE && - // Type != ELF::STT_OBJECT && - // Type != ELF::STT_FUNC && - // Type != ELF::STT_SECTION && - // Type != ELF::STT_COMMON) { - // continue; - // } - auto Name = SymRef.getName(*StringTable); - // I am not sure on If this is going to hold as an invariant. Revisit. - if (!Name) - return Name.takeError(); - - if (SymRef.isCommon()) { - // Symbols in SHN_COMMON refer to uninitialized data. The st_value - // field holds alignment constraints. - Symbol &S = - G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0, - SymRef.st_size, SymRef.getValue(), false); - JITSymbolTable[SymbolIndex] = &S; - continue; - } - - // Map Visibility and Binding to Scope and Linkage: - Linkage L = Linkage::Strong; - Scope S = Scope::Default; - - switch (SymRef.getBinding()) { - case ELF::STB_LOCAL: - S = Scope::Local; - break; - case ELF::STB_GLOBAL: - // Nothing to do here. - break; - case ELF::STB_WEAK: - L = Linkage::Weak; - break; - default: - return make_error("Unrecognized symbol binding for " + - *Name, - inconvertibleErrorCode()); - } - - switch (SymRef.getVisibility()) { - case ELF::STV_DEFAULT: - case ELF::STV_PROTECTED: - // FIXME: Make STV_DEFAULT symbols pre-emptible? This probably needs - // Orc support. - // Otherwise nothing to do here. - break; - case ELF::STV_HIDDEN: - // Default scope -> Hidden scope. No effect on local scope. - if (S == Scope::Default) - S = Scope::Hidden; - break; - case ELF::STV_INTERNAL: - return make_error("Unrecognized symbol visibility for " + - *Name, - inconvertibleErrorCode()); - } - - if (SymRef.isDefined() && - (Type == ELF::STT_NOTYPE || Type == ELF::STT_FUNC || - Type == ELF::STT_OBJECT || Type == ELF::STT_SECTION)) { - - auto DefinedSection = Obj.getSection(SymRef.st_shndx); - if (!DefinedSection) - return DefinedSection.takeError(); - auto sectName = Obj.getSectionName(**DefinedSection); - if (!sectName) - return Name.takeError(); - - // Skip debug section symbols. - if (isDwarfSection(*sectName)) - continue; - - auto JitSection = G->findSectionByName(*sectName); - if (!JitSection) - return make_error( - "Could not find the JitSection " + *sectName, - llvm::inconvertibleErrorCode()); - auto bs = JitSection->blocks(); - if (bs.empty()) - return make_error( - "Section has no block", llvm::inconvertibleErrorCode()); - - auto *B = *bs.begin(); - LLVM_DEBUG({ dbgs() << " " << *Name << " at index " << SymbolIndex << "\n"; }); - if (SymRef.getType() == ELF::STT_SECTION) - *Name = *sectName; - auto &Sym = G->addDefinedSymbol( - *B, SymRef.getValue(), *Name, SymRef.st_size, L, S, - SymRef.getType() == ELF::STT_FUNC, false); - JITSymbolTable[SymbolIndex] = &Sym; - } else if (SymRef.isUndefined() && SymRef.isExternal()) { - auto &Sym = G->addExternalSymbol(*Name, SymRef.st_size, L); - JITSymbolTable[SymbolIndex] = &Sym; - } else - LLVM_DEBUG({ - dbgs() - << "Not creating graph symbol for normalized symbol at index " - << SymbolIndex << ", \"" << *Name << "\"\n"; - }); - - // TODO: The following has to be implmented. - // leaving commented out to save time for future patchs - /* - G->addAbsoluteSymbol(*Name, SymRef.getValue(), SymRef.st_size, - Linkage::Strong, Scope::Default, false); - */ - } - } - return Error::success(); - } - public: ELFLinkGraphBuilder_x86_64(StringRef FileName, const object::ELFFile &Obj) : ELFLinkGraphBuilder(Obj, Triple("x86_64-unknown-linux"), FileName, getELFX86RelocationKindName) {} - - Expected> buildGraph() { - // Sanity check: we only operate on relocatable objects. - if (!isRelocatable()) - return make_error("Object is not a relocatable ELF"); - - auto Secs = Obj.sections(); - - if (!Secs) { - return Secs.takeError(); - } - sections = *Secs; - - if (auto Err = createNormalizedSections()) - return std::move(Err); - - if (auto Err = createNormalizedSymbols()) - return std::move(Err); - - if (auto Err = graphifyRegularSymbols()) - return std::move(Err); - - if (auto Err = addRelocations()) - return std::move(Err); - - return std::move(G); - } }; class ELFJITLinker_x86_64 : public JITLinker { diff --git a/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s b/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s index 53132c4a987b7..acd3ae7ad8b1f 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s @@ -4,7 +4,7 @@ # # Check that debug sections are not emitted. # -# CHECK: .debug_info is a debug section: No graph section will be created. +# CHECK: ".debug_info" is a debug section: No graph section will be created. .text .file "ELF_skip_debug_sections.c" From 473a21c0e9684b19016fd7367b28cc0ff12e928d Mon Sep 17 00:00:00 2001 From: River Riddle Date: Mon, 28 Jun 2021 22:46:05 +0000 Subject: [PATCH 123/619] [vscode-mlir] Add support for restarting the server on setting/server changes This revision adds detection for changes to either the mlir-lsp-server binary or the setting, and prompts the user to restart the server. Whether the user gets prompted or not is a configurable setting in the extension, and this setting may updated based on the user response to the prompt. Differential Revision: https://reviews.llvm.org/D104501 --- mlir/utils/vscode/package-lock.json | 298 +++++++++++++++++++++++++ mlir/utils/vscode/package.json | 25 ++- mlir/utils/vscode/src/config.ts | 16 ++ mlir/utils/vscode/src/configWatcher.ts | 81 +++++++ mlir/utils/vscode/src/extension.ts | 55 +---- mlir/utils/vscode/src/mlirContext.ts | 62 +++++ 6 files changed, 491 insertions(+), 46 deletions(-) create mode 100644 mlir/utils/vscode/src/config.ts create mode 100644 mlir/utils/vscode/src/configWatcher.ts create mode 100644 mlir/utils/vscode/src/mlirContext.ts diff --git a/mlir/utils/vscode/package-lock.json b/mlir/utils/vscode/package-lock.json index 0b6e1341260e7..00ede875a3ac1 100644 --- a/mlir/utils/vscode/package-lock.json +++ b/mlir/utils/vscode/package-lock.json @@ -8,10 +8,12 @@ "name": "mlir", "version": "0.0.1", "dependencies": { + "chokidar": "3.5.2", "vscode-languageclient": "^5.2.1", "vscode-languageserver-types": "3.16.0" }, "devDependencies": { + "@types/chokidar": "2.1.3", "@types/mocha": "^5.2.0", "@types/node": "^8.0.0", "@types/vscode": "1.52.*", @@ -61,6 +63,16 @@ "node": ">= 6" } }, + "node_modules/@types/chokidar": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@types/chokidar/-/chokidar-2.1.3.tgz", + "integrity": "sha512-6qK3xoLLAhQVTucQGHTySwOVA1crHRXnJeLwqK6KIFkkKa2aoMFXh+WEi8PotxDtvN6MQJLyYN9ag9P6NLV81w==", + "deprecated": "This is a stub types definition. chokidar provides its own type definitions, so you do not need this installed.", + "dev": true, + "dependencies": { + "chokidar": "*" + } + }, "node_modules/@types/mocha": { "version": "5.2.7", "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-5.2.7.tgz", @@ -103,6 +115,18 @@ "node": ">=4" } }, + "node_modules/anymatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.2.tgz", + "integrity": "sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==", + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -156,6 +180,14 @@ "node": "*" } }, + "node_modules/binary-extensions": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", + "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", + "engines": { + "node": ">=8" + } + }, "node_modules/bluebird": { "version": "3.4.7", "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz", @@ -178,6 +210,17 @@ "concat-map": "0.0.1" } }, + "node_modules/braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "dependencies": { + "fill-range": "^7.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/buffer-crc32": { "version": "0.2.13", "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", @@ -296,6 +339,27 @@ "integrity": "sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==", "dev": true }, + "node_modules/chokidar": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.2.tgz", + "integrity": "sha512-ekGhOnNVPgT77r4K/U3GDhu+FQ2S8TnK/s2KbIGXi0SZWuwkZ2QNyfWdZW+TVfn84DpEP7rLeCt2UI6bJ8GwbQ==", + "dependencies": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "fsevents": "~2.3.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + }, + "engines": { + "node": ">= 8.10.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, "node_modules/clang-format": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/clang-format/-/clang-format-1.4.0.tgz", @@ -509,12 +573,36 @@ "pend": "~1.2.0" } }, + "node_modules/fill-range": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", "dev": true }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/fstream": { "version": "1.0.12", "resolved": "https://registry.npmjs.org/fstream/-/fstream-1.0.12.tgz", @@ -582,6 +670,17 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/graceful-fs": { "version": "4.2.6", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.6.tgz", @@ -692,6 +791,17 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "dev": true }, + "node_modules/is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "dependencies": { + "binary-extensions": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/is-core-module": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.2.0.tgz", @@ -704,6 +814,33 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-glob": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", + "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "engines": { + "node": ">=0.12.0" + } + }, "node_modules/isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", @@ -841,6 +978,14 @@ "integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==", "dev": true }, + "node_modules/normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/nth-check": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.0.tgz", @@ -944,6 +1089,17 @@ "integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA=", "dev": true }, + "node_modules/picomatch": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.0.tgz", + "integrity": "sha512-lY1Q/PiJGC2zOv/z391WOTD+Z02bCgsFfvxoXXf6h7kv9o+WmsmzYqrAwY63sNgOxE4xEdq0WyUnXfKeBrSvYw==", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/process-nextick-args": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", @@ -992,6 +1148,17 @@ "util-deprecate": "~1.0.1" } }, + "node_modules/readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "dependencies": { + "picomatch": "^2.2.1" + }, + "engines": { + "node": ">=8.10.0" + } + }, "node_modules/resolve": { "version": "1.20.0", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", @@ -1093,6 +1260,17 @@ "node": ">=8.17.0" } }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, "node_modules/traverse": { "version": "0.3.9", "resolved": "https://registry.npmjs.org/traverse/-/traverse-0.3.9.tgz", @@ -1381,6 +1559,15 @@ "integrity": "sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==", "dev": true }, + "@types/chokidar": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@types/chokidar/-/chokidar-2.1.3.tgz", + "integrity": "sha512-6qK3xoLLAhQVTucQGHTySwOVA1crHRXnJeLwqK6KIFkkKa2aoMFXh+WEi8PotxDtvN6MQJLyYN9ag9P6NLV81w==", + "dev": true, + "requires": { + "chokidar": "*" + } + }, "@types/mocha": { "version": "5.2.7", "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-5.2.7.tgz", @@ -1417,6 +1604,15 @@ "color-convert": "^1.9.0" } }, + "anymatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.2.tgz", + "integrity": "sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==", + "requires": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + } + }, "argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -1464,6 +1660,11 @@ "chainsaw": "~0.1.0" } }, + "binary-extensions": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", + "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==" + }, "bluebird": { "version": "3.4.7", "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz", @@ -1486,6 +1687,14 @@ "concat-map": "0.0.1" } }, + "braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "requires": { + "fill-range": "^7.0.1" + } + }, "buffer-crc32": { "version": "0.2.13", "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", @@ -1576,6 +1785,21 @@ "domutils": "^2.7.0" } }, + "chokidar": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.2.tgz", + "integrity": "sha512-ekGhOnNVPgT77r4K/U3GDhu+FQ2S8TnK/s2KbIGXi0SZWuwkZ2QNyfWdZW+TVfn84DpEP7rLeCt2UI6bJ8GwbQ==", + "requires": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "fsevents": "~2.3.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + } + }, "clang-format": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/clang-format/-/clang-format-1.4.0.tgz", @@ -1733,12 +1957,26 @@ "pend": "~1.2.0" } }, + "fill-range": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "requires": { + "to-regex-range": "^5.0.1" + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", "dev": true }, + "fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "optional": true + }, "fstream": { "version": "1.0.12", "resolved": "https://registry.npmjs.org/fstream/-/fstream-1.0.12.tgz", @@ -1793,6 +2031,14 @@ "path-is-absolute": "^1.0.0" } }, + "glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "requires": { + "is-glob": "^4.0.1" + } + }, "graceful-fs": { "version": "4.2.6", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.6.tgz", @@ -1875,6 +2121,14 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "dev": true }, + "is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "requires": { + "binary-extensions": "^2.0.0" + } + }, "is-core-module": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.2.0.tgz", @@ -1884,6 +2138,24 @@ "has": "^1.0.3" } }, + "is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=" + }, + "is-glob": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", + "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", + "requires": { + "is-extglob": "^2.1.1" + } + }, + "is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==" + }, "isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", @@ -2002,6 +2274,11 @@ "integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==", "dev": true }, + "normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==" + }, "nth-check": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.0.tgz", @@ -2090,6 +2367,11 @@ "integrity": "sha1-elfrVQpng/kRUzH89GY9XI4AelA=", "dev": true }, + "picomatch": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.0.tgz", + "integrity": "sha512-lY1Q/PiJGC2zOv/z391WOTD+Z02bCgsFfvxoXXf6h7kv9o+WmsmzYqrAwY63sNgOxE4xEdq0WyUnXfKeBrSvYw==" + }, "process-nextick-args": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", @@ -2129,6 +2411,14 @@ "util-deprecate": "~1.0.1" } }, + "readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "requires": { + "picomatch": "^2.2.1" + } + }, "resolve": { "version": "1.20.0", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", @@ -2209,6 +2499,14 @@ "rimraf": "^3.0.0" } }, + "to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "requires": { + "is-number": "^7.0.0" + } + }, "traverse": { "version": "0.3.9", "resolved": "https://registry.npmjs.org/traverse/-/traverse-0.3.9.tgz", diff --git a/mlir/utils/vscode/package.json b/mlir/utils/vscode/package.json index 6d6813020782c..e580aaa94466f 100644 --- a/mlir/utils/vscode/package.json +++ b/mlir/utils/vscode/package.json @@ -44,9 +44,30 @@ "scope": "resource", "type": "string", "description": "The file path of the mlir-lsp-server executable." + }, + "mlir.onSettingsChanged": { + "type": "string", + "default": "prompt", + "description": "Action taken when a setting change requires a server restart to take effect.", + "enum": [ + "prompt", + "restart", + "ignore" + ], + "enumDescriptions": [ + "Prompt the user for restarting the server", + "Automatically restart the server", + "Do nothing" + ] } } - } + }, + "commands": [ + { + "command": "mlir.restart", + "title": "mlir: Restart language server" + } + ] }, "scripts": { "vscode:prepublish": "tsc -p ./", @@ -56,6 +77,7 @@ "package": "vsce package" }, "devDependencies": { + "@types/chokidar": "2.1.3", "@types/mocha": "^5.2.0", "@types/node": "^8.0.0", "@types/vscode": "1.52.*", @@ -67,6 +89,7 @@ "vscode-test": "^1.3.0" }, "dependencies": { + "chokidar": "3.5.2", "vscode-languageclient": "^5.2.1", "vscode-languageserver-types": "3.16.0" } diff --git a/mlir/utils/vscode/src/config.ts b/mlir/utils/vscode/src/config.ts new file mode 100644 index 0000000000000..58f45ccbc8c02 --- /dev/null +++ b/mlir/utils/vscode/src/config.ts @@ -0,0 +1,16 @@ +import * as vscode from 'vscode'; + +/** + * Gets the config value `mlir.`. + */ +export function get(key: string): T { + return vscode.workspace.getConfiguration('mlir').get(key); +} + +/** + * Sets the config value `mlir.`. + */ +export function update(key: string, value: T, + target?: vscode.ConfigurationTarget) { + return vscode.workspace.getConfiguration('mlir').update(key, value, target); +} diff --git a/mlir/utils/vscode/src/configWatcher.ts b/mlir/utils/vscode/src/configWatcher.ts new file mode 100644 index 0000000000000..47d450086804a --- /dev/null +++ b/mlir/utils/vscode/src/configWatcher.ts @@ -0,0 +1,81 @@ +import * as chokidar from 'chokidar'; +import * as path from 'path'; +import * as vscode from 'vscode'; + +import * as config from './config'; +import {MLIRContext} from './mlirContext'; + +/** + * Prompt the user to see if we should restart the server. + */ +async function promptRestart(settingName: string, promptMessage: string) { + switch (config.get(settingName)) { + case 'restart': + vscode.commands.executeCommand('mlir.restart'); + break; + case 'ignore': + break; + case 'prompt': + default: + switch (await vscode.window.showInformationMessage( + promptMessage, 'Yes', 'Yes, always', 'No, never')) { + case 'Yes': + vscode.commands.executeCommand('mlir.restart'); + break; + case 'Yes, always': + vscode.commands.executeCommand('mlir.restart'); + config.update(settingName, 'restart', + vscode.ConfigurationTarget.Global); + break; + case 'No, never': + config.update(settingName, 'ignore', + vscode.ConfigurationTarget.Global); + break; + default: + break; + } + break; + } +} + +/** + * Activate the watchers that track configuration changes which decide when to + * restart the server. + */ +export function activate(mlirContext: MLIRContext) { + // When a configuration change happens, check to see if we should restart the + // server. + mlirContext.subscriptions.push(vscode.workspace.onDidChangeConfiguration(event => { + const settings: string[] = [ 'server_path' ]; + for (const setting of settings) { + const expandedSetting = `mlir.${setting}`; + if (event.affectsConfiguration(expandedSetting)) { + promptRestart( + 'onSettingsChanged', + `setting '${ + expandedSetting}' has changed. Do you want to reload the server?`); + break; + } + } + })); + + // Track the server file in case it changes. We use `fs` here because the + // server may not be in a workspace directory. + const userDefinedServerPath = config.get('server_path'); + const serverPath = + path.resolve((userDefinedServerPath === '') ? 'mlir-lsp-server' + : userDefinedServerPath); + const fileWatcherConfig = { + disableGlobbing : true, + followSymlinks : true, + ignoreInitial : true, + }; + const fileWatcher = chokidar.watch(serverPath, fileWatcherConfig); + fileWatcher.on('all', (_event, _filename, _details) => { + promptRestart( + 'onSettingsChanged', + 'MLIR language server binary has changed. Do you want to reload the server?'); + }); + mlirContext.subscriptions.push( + new vscode.Disposable(() => { fileWatcher.close(); })); +} diff --git a/mlir/utils/vscode/src/extension.ts b/mlir/utils/vscode/src/extension.ts index db4904e5c24a6..2220c50327e52 100644 --- a/mlir/utils/vscode/src/extension.ts +++ b/mlir/utils/vscode/src/extension.ts @@ -1,56 +1,21 @@ import * as vscode from 'vscode'; -import * as vscodelc from 'vscode-languageclient'; -let client: vscodelc.LanguageClient; +import {MLIRContext} from './mlirContext'; /** * This method is called when the extension is activated. The extension is * activated the very first time a command is executed. */ export function activate(context: vscode.ExtensionContext) { - // Get the path of the mlir-lsp-server that is used to provide language - // functionality. - const config = vscode.workspace.getConfiguration('mlir'); - const userDefinedServerPath = config.get('server_path'); - const serverPath = (userDefinedServerPath === '') ? 'mlir-lsp-server' - : userDefinedServerPath; + const mlirContext = new MLIRContext(); + context.subscriptions.push(mlirContext); - // Configure the server options. - const serverOptions: vscodelc.ServerOptions = { - run : { - command : serverPath, - transport : vscodelc.TransportKind.stdio, - args : [] - }, - debug : { - command : serverPath, - transport : vscodelc.TransportKind.stdio, - args : [] - } - }; + // Initialize the commands of the extension. + context.subscriptions.push( + vscode.commands.registerCommand('mlir.restart', async () => { + mlirContext.dispose(); + await mlirContext.activate(); + })); - // Configure the client options. - const clientOptions: vscodelc.LanguageClientOptions = { - documentSelector : [ {scheme : 'file', language : 'mlir'} ], - synchronize : { - // Notify the server about file changes to *.mlir files contained in the - // workspace. - fileEvents : vscode.workspace.createFileSystemWatcher('**/*.mlir') - } - }; - - // Create the language client and start the client. - client = new vscodelc.LanguageClient('mlir-lsp', 'MLIR Language Client', - serverOptions, clientOptions); - client.start(); -} - -/** - * This method is called when the extension is deactivated. - */ -export function deactivate(): Thenable|undefined { - if (!client) { - return undefined; - } - return client.stop(); + mlirContext.activate(); } diff --git a/mlir/utils/vscode/src/mlirContext.ts b/mlir/utils/vscode/src/mlirContext.ts new file mode 100644 index 0000000000000..3b582187c3730 --- /dev/null +++ b/mlir/utils/vscode/src/mlirContext.ts @@ -0,0 +1,62 @@ +import * as vscode from 'vscode'; +import * as vscodelc from 'vscode-languageclient'; + +import * as config from './config'; +import * as configWatcher from './configWatcher'; + +/** + * This class manages all of the MLIR extension state, + * including the language client. + */ +export class MLIRContext implements vscode.Disposable { + subscriptions: vscode.Disposable[] = []; + client!: vscodelc.LanguageClient; + + /** + * Activate the MLIR context, and start the language client. + */ + async activate() { + // Get the path of the mlir-lsp-server that is used to provide language + // functionality. + const userDefinedServerPath = config.get('server_path'); + const serverPath = (userDefinedServerPath === '') ? 'mlir-lsp-server' + : userDefinedServerPath; + + // Configure the server options. + const serverOptions: vscodelc.ServerOptions = { + run : { + command : serverPath, + transport : vscodelc.TransportKind.stdio, + args : [] + }, + debug : { + command : serverPath, + transport : vscodelc.TransportKind.stdio, + args : [] + } + }; + + // Configure the client options. + const clientOptions: vscodelc.LanguageClientOptions = { + documentSelector : [ {scheme : 'file', language : 'mlir'} ], + synchronize : { + // Notify the server about file changes to *.mlir files contained in the + // workspace. + fileEvents : vscode.workspace.createFileSystemWatcher('**/*.mlir') + } + }; + + // Create the language client and start the client. + this.client = new vscodelc.LanguageClient( + 'mlir-lsp', 'MLIR Language Client', serverOptions, clientOptions); + this.subscriptions.push(this.client.start()); + + // Watch for configuration changes. + configWatcher.activate(this); + } + + dispose() { + this.subscriptions.forEach((d) => { d.dispose(); }); + this.subscriptions = []; + } +} From d77ccfdc72182cf7ca1bbf6b8b47e062766a9f1f Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Thu, 17 Jun 2021 14:22:24 -0700 Subject: [PATCH 124/619] Create synthetic symbol names on demand to improve memory consumption and startup times. This fix was created after profiling the target creation of a large C/C++/ObjC application that contained almost 4,000,000 redacted symbol names. The symbol table parsing code was creating names for each of these synthetic symbols and adding them to the name indexes. The code was also adding the object file basename to the end of the symbol name which doesn't allow symbols from different shared libraries to share the names in the constant string pool. Prior to this fix this was creating 180MB of "___lldb_unnamed_symbol" symbol names and was taking a long time to generate each name, add them to the string pool and then add each of these names to the name index. This patch fixes the issue by: - not adding a name to synthetic symbols at creation time, and allows name to be dynamically generated when accessed - doesn't add synthetic symbol names to the name indexes, but catches this special case as name lookup time. Users won't typically set breakpoints or lookup these synthetic names, but support was added to do the lookup in case it does happen - removes the object file baseanme from the generated names to allow the names to be shared in the constant string pool Prior to this fix the startup times for a large application was: 12.5 seconds (cold file caches) 8.5 seconds (warm file caches) After this fix: 9.7 seconds (cold file caches) 5.7 seconds (warm file caches) The names of the symbols are auto generated by appending the symbol's UserID to the end of the "___lldb_unnamed_symbol" string and is only done when the name is requested from a synthetic symbol if it has no name. Differential Revision: https://reviews.llvm.org/D104488 --- lldb/include/lldb/Symbol/ObjectFile.h | 2 - lldb/include/lldb/Symbol/Symbol.h | 24 +++++-- lldb/include/lldb/Symbol/Symtab.h | 20 ++++++ .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 64 ++++++++++--------- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 6 +- lldb/source/Symbol/ObjectFile.cpp | 10 --- lldb/source/Symbol/Symbol.cpp | 40 +++++++++--- lldb/source/Symbol/Symtab.cpp | 38 +++++++++-- 8 files changed, 140 insertions(+), 64 deletions(-) diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index 1e29cf53b78b3..dc83565c7db52 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -712,8 +712,6 @@ class ObjectFile : public std::enable_shared_from_this, /// false otherwise. bool SetModulesArchitecture(const ArchSpec &new_arch); - ConstString GetNextSyntheticSymbolName(); - static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size, uint64_t Offset); diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h index 3abe3114863de..be3e8abefa490 100644 --- a/lldb/include/lldb/Symbol/Symbol.h +++ b/lldb/include/lldb/Symbol/Symbol.h @@ -113,14 +113,20 @@ class Symbol : public SymbolContextScope { lldb::LanguageType GetLanguage() const { // TODO: See if there is a way to determine the language for a symbol // somehow, for now just return our best guess - return m_mangled.GuessLanguage(); + return GetMangled().GuessLanguage(); } void SetID(uint32_t uid) { m_uid = uid; } - Mangled &GetMangled() { return m_mangled; } + Mangled &GetMangled() { + SynthesizeNameIfNeeded(); + return m_mangled; + } - const Mangled &GetMangled() const { return m_mangled; } + const Mangled &GetMangled() const { + SynthesizeNameIfNeeded(); + return m_mangled; + } ConstString GetReExportedSymbolName() const; @@ -166,9 +172,9 @@ class Symbol : public SymbolContextScope { bool IsTrampoline() const; bool IsIndirect() const; - + bool IsWeak() const { return m_is_weak; } - + void SetIsWeak (bool b) { m_is_weak = b; } bool GetByteSizeIsValid() const { return m_size_is_valid; } @@ -223,6 +229,10 @@ class Symbol : public SymbolContextScope { bool ContainsFileAddress(lldb::addr_t file_addr) const; + static llvm::StringRef GetSyntheticSymbolPrefix() { + return "___lldb_unnamed_symbol"; + } + protected: // This is the internal guts of ResolveReExportedSymbol, it assumes // reexport_name is not null, and that module_spec is valid. We track the @@ -233,6 +243,8 @@ class Symbol : public SymbolContextScope { lldb_private::ModuleSpec &module_spec, lldb_private::ModuleList &seen_modules) const; + void SynthesizeNameIfNeeded() const; + uint32_t m_uid = UINT32_MAX; // User ID (usually the original symbol table index) uint16_t m_type_data = 0; // data specific to m_type @@ -258,7 +270,7 @@ class Symbol : public SymbolContextScope { // doing name lookups m_is_weak : 1, m_type : 6; // Values from the lldb::SymbolType enum. - Mangled m_mangled; // uniqued symbol name/mangled name pair + mutable Mangled m_mangled; // uniqued symbol name/mangled name pair AddressRange m_addr_range; // Contains the value, or the section offset // address when the value is an address in a // section, and the size (if any) diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h index fbfa3a5e0cec7..e1ad0dfd2eb8d 100644 --- a/lldb/include/lldb/Symbol/Symtab.h +++ b/lldb/include/lldb/Symbol/Symtab.h @@ -219,6 +219,26 @@ class Symtab { return false; } + /// A helper function that looks up full function names. + /// + /// We generate unique names for synthetic symbols so that users can look + /// them up by name when needed. But because doing so is uncommon in normal + /// debugger use, we trade off some performance at lookup time for faster + /// symbol table building by detecting these symbols and generating their + /// names lazily, rather than adding them to the normal symbol indexes. This + /// function does the job of first consulting the name indexes, and if that + /// fails it extracts the information it needs from the synthetic name and + /// locates the symbol. + /// + /// @param[in] symbol_name The symbol name to search for. + /// + /// @param[out] indexes The vector if symbol indexes to update with results. + /// + /// @returns The number of indexes added to the index vector. Zero if no + /// matches were found. + uint32_t GetNameIndexes(ConstString symbol_name, + std::vector &indexes); + void SymbolIndicesToSymbolContextList(std::vector &symbol_indexes, SymbolContextList &sc_list); diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index be73d38961ea6..edf87f036f0f5 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1880,7 +1880,7 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) { unified_section_list.AddSection(symtab_section_sp); } } - } + } } std::shared_ptr ObjectFileELF::GetGnuDebugDataObjectFile() { @@ -2813,20 +2813,24 @@ Symtab *ObjectFileELF::GetSymtab() { if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress( entry_point_file_addr)) { uint64_t symbol_id = m_symtab_up->GetNumSymbols(); - Symbol symbol(symbol_id, - GetNextSyntheticSymbolName().GetCString(), // Symbol name. - eSymbolTypeCode, // Type of this symbol. - true, // Is this globally visible? - false, // Is this symbol debug info? - false, // Is this symbol a trampoline? - true, // Is this symbol artificial? - entry_point_addr.GetSection(), // Section where this - // symbol is defined. - 0, // Offset in section or symbol value. - 0, // Size. - false, // Size is valid. - false, // Contains linker annotations? - 0); // Symbol flags. + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + SectionSP section_sp = entry_point_addr.GetSection(); + Symbol symbol( + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/entry_point_addr.GetOffset(), + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); m_symtab_up->AddSymbol(symbol); // When the entry point is arm thumb we need to explicitly set its // class address to reflect that. This is important because expression @@ -2917,22 +2921,24 @@ void ObjectFileELF::ParseUnwindSymbols(Symtab *symbol_table, section_list->FindSectionContainingFileAddress(file_addr); if (section_sp) { addr_t offset = file_addr - section_sp->GetFileAddress(); - const char *symbol_name = GetNextSyntheticSymbolName().GetCString(); uint64_t symbol_id = ++last_symbol_id; + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. Symbol eh_symbol( - symbol_id, // Symbol table index. - symbol_name, // Symbol name. - eSymbolTypeCode, // Type of this symbol. - true, // Is this globally visible? - false, // Is this symbol debug info? - false, // Is this symbol a trampoline? - true, // Is this symbol artificial? - section_sp, // Section in which this symbol is defined or null. - offset, // Offset in section or symbol value. - 0, // Size: Don't specify the size as an FDE can - false, // Size is valid: cover multiple symbols. - false, // Contains linker annotations? - 0); // Symbol flags. + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/offset, + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); new_symbols.push_back(eh_symbol); } } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index e7652cffb1c81..72389e9fd5c67 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -4696,8 +4696,10 @@ size_t ObjectFileMachO::ParseSymtab() { symbol_byte_size = section_end_file_addr - symbol_file_addr; } sym[sym_idx].SetID(synthetic_sym_id++); - sym[sym_idx].GetMangled().SetDemangledName( - GetNextSyntheticSymbolName()); + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + sym[sym_idx].GetMangled().SetDemangledName(ConstString()); sym[sym_idx].SetType(eSymbolTypeCode); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = symbol_addr; diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index b0fdd50b3c0f1..101af01341a20 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -616,16 +616,6 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name, return symbol_type_hint; } -ConstString ObjectFile::GetNextSyntheticSymbolName() { - llvm::SmallString<256> name; - llvm::raw_svector_ostream os(name); - ConstString file_name = GetModule()->GetFileSpec().GetFilename(); - ++m_synthetic_symbol_idx; - os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$" - << file_name.GetStringRef(); - return ConstString(os.str()); -} - std::vector ObjectFile::GetLoadableData(Target &target) { std::vector loadables; diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index a25911d1734da..b24372795ad55 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -56,8 +56,8 @@ Symbol::Symbol(uint32_t symID, const Mangled &mangled, SymbolType type, m_size_is_synthesized(false), m_size_is_valid(size_is_valid || range.GetByteSize() > 0), m_demangled_is_synthesized(false), - m_contains_linker_annotations(contains_linker_annotations), - m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), + m_contains_linker_annotations(contains_linker_annotations), + m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), m_flags(flags) {} Symbol::Symbol(const Symbol &rhs) @@ -119,7 +119,7 @@ bool Symbol::ValueIsAddress() const { } ConstString Symbol::GetDisplayName() const { - return m_mangled.GetDisplayDemangledName(); + return GetMangled().GetDisplayDemangledName(); } ConstString Symbol::GetReExportedSymbolName() const { @@ -202,7 +202,7 @@ void Symbol::GetDescription(Stream *s, lldb::DescriptionLevel level, s->Printf(", value = 0x%16.16" PRIx64, m_addr_range.GetBaseAddress().GetOffset()); } - ConstString demangled = m_mangled.GetDemangledName(); + ConstString demangled = GetMangled().GetDemangledName(); if (demangled) s->Printf(", name=\"%s\"", demangled.AsCString()); if (m_mangled.GetMangledName()) @@ -218,7 +218,7 @@ void Symbol::Dump(Stream *s, Target *target, uint32_t index, // Make sure the size of the symbol is up to date before dumping GetByteSize(); - ConstString name = m_mangled.GetName(name_preference); + ConstString name = GetMangled().GetName(name_preference); if (ValueIsAddress()) { if (!m_addr_range.GetBaseAddress().Dump(s, nullptr, Address::DumpStyleFileAddress)) @@ -330,9 +330,11 @@ uint32_t Symbol::GetPrologueByteSize() { } bool Symbol::Compare(ConstString name, SymbolType type) const { - if (type == eSymbolTypeAny || m_type == type) - return m_mangled.GetMangledName() == name || - m_mangled.GetDemangledName() == name; + if (type == eSymbolTypeAny || m_type == type) { + const Mangled &mangled = GetMangled(); + return mangled.GetMangledName() == name || + mangled.GetDemangledName() == name; + } return false; } @@ -495,10 +497,10 @@ lldb::addr_t Symbol::GetLoadAddress(Target *target) const { return LLDB_INVALID_ADDRESS; } -ConstString Symbol::GetName() const { return m_mangled.GetName(); } +ConstString Symbol::GetName() const { return GetMangled().GetName(); } ConstString Symbol::GetNameNoArguments() const { - return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments); + return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments); } lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const { @@ -565,3 +567,21 @@ bool Symbol::GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor, bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const { return m_addr_range.ContainsFileAddress(file_addr); } + +void Symbol::SynthesizeNameIfNeeded() const { + if (m_is_synthetic && !m_mangled) { + // Synthetic symbol names don't mean anything, but they do uniquely + // identify individual symbols so we give them a unique name. The name + // starts with the synthetic symbol prefix, followed by a unique number. + // Typically the UserID of a real symbol is the symbol table index of the + // symbol in the object file's symbol table(s), so it will be the same + // every time you read in the object file. We want the same persistence for + // synthetic symbols so that users can identify them across multiple debug + // sessions, to understand crashes in those symbols and to reliably set + // breakpoints on them. + llvm::SmallString<256> name; + llvm::raw_svector_ostream os(name); + os << GetSyntheticSymbolPrefix() << GetID(); + m_mangled.SetDemangledName(ConstString(os.str())); + } +} diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 14360aa69a72c..d859d8e251299 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -301,7 +301,7 @@ void Symtab::InitNameIndexes() { // the trampoline symbols to be searchable by name we can remove this and // then possibly add a new bool to any of the Symtab functions that // lookup symbols by name to indicate if they want trampolines. - if (symbol->IsTrampoline()) + if (symbol->IsTrampoline() || symbol->IsSynthetic()) continue; // If the symbol's name string matched a Mangled::ManglingScheme, it is @@ -628,6 +628,36 @@ void Symtab::SortSymbolIndexesByValue(std::vector &indexes, } } +uint32_t Symtab::GetNameIndexes(ConstString symbol_name, + std::vector &indexes) { + auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); + const uint32_t count = name_to_index.GetValues(symbol_name, indexes); + if (count) + return count; + // Synthetic symbol names are not added to the name indexes, but they start + // with a prefix and end with a the symbol UserID. This allows users to find + // these symbols without having to add them to the name indexes. These + // queries will not happen very often since the names don't mean anything, so + // performance is not paramount in this case. + llvm::StringRef name = symbol_name.GetStringRef(); + // String the synthetic prefix if the name starts with it. + if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix())) + return 0; // Not a synthetic symbol name + + // Extract the user ID from the symbol name + user_id_t uid = 0; + if (getAsUnsignedInteger(name, /*Radix=*/10, uid)) + return 0; // Failed to extract the user ID as an integer + Symbol *symbol = FindSymbolByID(uid); + if (symbol == nullptr) + return 0; + const uint32_t symbol_idx = GetIndexForSymbol(symbol); + if (symbol_idx == UINT32_MAX) + return 0; + indexes.push_back(symbol_idx); + return 1; +} + uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, std::vector &indexes) { std::lock_guard guard(m_mutex); @@ -637,8 +667,7 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, if (!m_name_indexes_computed) InitNameIndexes(); - auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); - return name_to_index.GetValues(symbol_name, indexes); + return GetNameIndexes(symbol_name, indexes); } return 0; } @@ -655,10 +684,9 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, if (!m_name_indexes_computed) InitNameIndexes(); - auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); std::vector all_name_indexes; const size_t name_match_count = - name_to_index.GetValues(symbol_name, all_name_indexes); + GetNameIndexes(symbol_name, all_name_indexes); for (size_t i = 0; i < name_match_count; ++i) { if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type, symbol_visibility)) From 323bcbdba0e6ffa206a4575ce90e5056e8e77c09 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 28 Jun 2021 18:12:05 -0700 Subject: [PATCH 125/619] Fix buildbot failure after https://reviews.llvm.org/D104488. --- lldb/source/Symbol/Symtab.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index d859d8e251299..89e75c28cb9b6 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -645,7 +645,7 @@ uint32_t Symtab::GetNameIndexes(ConstString symbol_name, return 0; // Not a synthetic symbol name // Extract the user ID from the symbol name - user_id_t uid = 0; + unsigned long long uid = 0; if (getAsUnsignedInteger(name, /*Radix=*/10, uid)) return 0; // Failed to extract the user ID as an integer Symbol *symbol = FindSymbolByID(uid); From ab546ead3bf720b1789ddb2e093c398409ec3679 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 28 Jun 2021 16:32:36 -0700 Subject: [PATCH 126/619] Fix a case where multiple symbols with zero size would cause duplicate entries in gsym files. Symbol tables can have symbols with no size in mach-o files that were failing to get combined into a single entry. This resulted in many duplicate entries for the same address and made gsym files larger. Differential Revision: https://reviews.llvm.org/D105068 --- llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 10 ++++++--- llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 24 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index 5d0f01382c2ec..1c20a59469dc2 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -224,9 +224,13 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { Funcs.erase( removeIfBinary(Funcs.begin(), Funcs.end(), [&](const auto &Prev, const auto &Curr) { - if (Prev.Range.intersects(Curr.Range)) { - // Overlapping address ranges. - if (Prev.Range == Curr.Range) { + // Empty ranges won't intersect, but we still need to + // catch the case where we have multiple symbols at the + // same address and coalesce them. + const bool ranges_equal = Prev.Range == Curr.Range; + if (ranges_equal || Prev.Range.intersects(Curr.Range)) { + // Overlapping ranges or empty identical ranges. + if (ranges_equal) { // Same address range. Check if one is from debug // info and the other is from a symbol table. If // so, then keep the one with debug info. Our diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp index caefd500ecdd5..2376b09999053 100644 --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -2518,3 +2518,27 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) { StringRef MethodName = GR->getString(ExpFI->Name); EXPECT_EQ(MethodName, "main"); } + +TEST(GSYMTest, TestGsymCreatorMultipleSymbolsWithNoSize) { + // Multiple symbols at the same address with zero size were being emitted + // instead of being combined into a single entry. This function tests to make + // sure we only get one symbol. + uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + GsymCreator GC; + GC.setUUID(UUID); + constexpr uint64_t BaseAddr = 0x1000; + constexpr uint8_t AddrOffSize = 1; + const uint32_t Func1Name = GC.insertString("foo"); + const uint32_t Func2Name = GC.insertString("bar"); + GC.addFunctionInfo(FunctionInfo(BaseAddr, 0, Func1Name)); + GC.addFunctionInfo(FunctionInfo(BaseAddr, 0, Func2Name)); + Error Err = GC.finalize(llvm::nulls()); + ASSERT_FALSE(Err); + TestEncodeDecode(GC, llvm::support::little, GSYM_VERSION, AddrOffSize, + BaseAddr, + 1, // NumAddresses + ArrayRef(UUID)); + TestEncodeDecode(GC, llvm::support::big, GSYM_VERSION, AddrOffSize, BaseAddr, + 1, // NumAddresses + ArrayRef(UUID)); +} From 0d6e4199e32a3a5942f920bf13c0a0ddf10d2579 Mon Sep 17 00:00:00 2001 From: harsh-nod Date: Mon, 28 Jun 2021 18:40:49 -0700 Subject: [PATCH 127/619] [mlir][vector] Order parallel indices before transposing the input in multireductions The current code does not preserve the order of the parallel dimensions when doing multi-reductions and thus we can end up in scenarios where the result shape does not match the desired shape after reduction. This patch fixes that by ensuring that the parallel indices are in order and then concatenates them to the reduction dimensions so that the reduction dimensions are innermost. Differential Revision: https://reviews.llvm.org/D104884 --- mlir/lib/Dialect/Vector/VectorTransforms.cpp | 50 ++++++++----------- .../vector-multi-reduction-lowering.mlir | 45 ++++++++++++++++- 2 files changed, 65 insertions(+), 30 deletions(-) diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index e04d48d6ca840..3a10fb3de6416 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -37,6 +37,7 @@ #include "mlir/IR/Types.h" #include "mlir/Interfaces/VectorInterfaces.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -3915,42 +3916,33 @@ struct InnerDimReductionConversion auto loc = multiReductionOp.getLoc(); auto srcRank = multiReductionOp.getSourceVectorType().getRank(); - auto reductionDims = llvm::to_vector<4>( - llvm::map_range(multiReductionOp.reduction_dims().cast(), - [](Attribute attr) -> int64_t { - return attr.cast().getInt(); - })); - llvm::sort(reductionDims); - - int64_t reductionSize = multiReductionOp.reduction_dims().size(); - - // Fails if already inner most reduction. - bool innerMostReduction = true; - for (int i = 0; i < reductionSize; ++i) { - if (reductionDims[reductionSize - i - 1] != srcRank - i - 1) { - innerMostReduction = false; - } + // Separate reduction and parallel dims + auto reductionDimsRange = + multiReductionOp.reduction_dims().getAsValueRange(); + auto reductionDims = llvm::to_vector<4>(llvm::map_range( + reductionDimsRange, [](APInt a) { return a.getZExtValue(); })); + llvm::SmallDenseSet reductionDimsSet(reductionDims.begin(), + reductionDims.end()); + int64_t reductionSize = reductionDims.size(); + SmallVector parallelDims; + for (int64_t i = 0; i < srcRank; i++) { + if (!reductionDimsSet.contains(i)) + parallelDims.push_back(i); } - if (innerMostReduction) - return failure(); - // Permutes the indices so reduction dims are inner most dims. - SmallVector indices; - for (int i = 0; i < srcRank; ++i) { - indices.push_back(i); - } - int ir = reductionSize - 1; - int id = srcRank - 1; - while (ir >= 0) { - std::swap(indices[reductionDims[ir--]], indices[id--]); - } + // Add transpose only if inner-most dimensions are not reductions + if (parallelDims == + llvm::to_vector<4>(llvm::seq(0, parallelDims.size()))) + return failure(); - // Sets inner most dims as reduction. + SmallVector indices; + indices.append(parallelDims.begin(), parallelDims.end()); + indices.append(reductionDims.begin(), reductionDims.end()); + auto transposeOp = rewriter.create(loc, src, indices); SmallVector reductionMask(srcRank, false); for (int i = 0; i < reductionSize; ++i) { reductionMask[srcRank - i - 1] = true; } - auto transposeOp = rewriter.create(loc, src, indices); rewriter.replaceOpWithNewOp( multiReductionOp, transposeOp.result(), reductionMask, multiReductionOp.kind()); diff --git a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir index 6cfc4e035719d..4121262722e34 100644 --- a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir @@ -61,6 +61,49 @@ func @vector_multi_reduction_transposed(%arg0: vector<2x3x4x5xf32>) -> vector<2x // CHECK-LABEL: func @vector_multi_reduction_transposed // CHECK-SAME: %[[INPUT:.+]]: vector<2x3x4x5xf32> // CHECK: %[[TRANSPOSED_INPUT:.+]] = vector.transpose %[[INPUT]], [0, 3, 1, 2] : vector<2x3x4x5xf32> to vector<2x5x3x4xf32> -// CHEKC: vector.shape_cast %[[TRANSPOSED_INPUT]] : vector<2x5x3x4xf32> to vector<10x12xf32> +// CHECK: vector.shape_cast %[[TRANSPOSED_INPUT]] : vector<2x5x3x4xf32> to vector<10x12xf32> // CHECK: %[[RESULT:.+]] = vector.shape_cast %{{.*}} : vector<10xf32> to vector<2x5xf32> // CHECK: return %[[RESULT]] + +func @vector_multi_reduction_ordering(%arg0: vector<3x2x4xf32>) -> vector<2x4xf32> { + %0 = vector.multi_reduction #vector.kind, %arg0 [0] : vector<3x2x4xf32> to vector<2x4xf32> + return %0 : vector<2x4xf32> +} +// CHECK-LABEL: func @vector_multi_reduction_ordering +// CHECK-SAME: %[[INPUT:.+]]: vector<3x2x4xf32> +// CHECK: %[[RESULT_VEC_0:.+]] = constant dense<{{.*}}> : vector<8xf32> +// CHECK: %[[C0:.+]] = constant 0 : i32 +// CHECK: %[[C1:.+]] = constant 1 : i32 +// CHECK: %[[C2:.+]] = constant 2 : i32 +// CHECK: %[[C3:.+]] = constant 3 : i32 +// CHECK: %[[C4:.+]] = constant 4 : i32 +// CHECK: %[[C5:.+]] = constant 5 : i32 +// CHECK: %[[C6:.+]] = constant 6 : i32 +// CHECK: %[[C7:.+]] = constant 7 : i32 +// CHECK: %[[TRANSPOSED_INPUT:.+]] = vector.transpose %[[INPUT]], [1, 2, 0] : vector<3x2x4xf32> to vector<2x4x3xf32> +// CHECK: %[[V0:.+]] = vector.extract %[[TRANSPOSED_INPUT]][0, 0] +// CHECK: %[[RV0:.+]] = vector.reduction "mul", %[[V0]] : vector<3xf32> into f32 +// CHECK: %[[RESULT_VEC_1:.+]] = vector.insertelement %[[RV0:.+]], %[[RESULT_VEC_0]][%[[C0]] : i32] : vector<8xf32> +// CHECK: %[[V1:.+]] = vector.extract %[[TRANSPOSED_INPUT]][0, 1] +// CHECK: %[[RV1:.+]] = vector.reduction "mul", %[[V1]] : vector<3xf32> into f32 +// CHECK: %[[RESULT_VEC_2:.+]] = vector.insertelement %[[RV1:.+]], %[[RESULT_VEC_1]][%[[C1]] : i32] : vector<8xf32> +// CHECK: %[[V2:.+]] = vector.extract %[[TRANSPOSED_INPUT]][0, 2] +// CHECK: %[[RV2:.+]] = vector.reduction "mul", %[[V2]] : vector<3xf32> into f32 +// CHECK: %[[RESULT_VEC_3:.+]] = vector.insertelement %[[RV2:.+]], %[[RESULT_VEC_2]][%[[C2]] : i32] : vector<8xf32> +// CHECK: %[[V3:.+]] = vector.extract %[[TRANSPOSED_INPUT]][0, 3] +// CHECK: %[[RV3:.+]] = vector.reduction "mul", %[[V3]] : vector<3xf32> into f32 +// CHECK: %[[RESULT_VEC_4:.+]] = vector.insertelement %[[RV3:.+]], %[[RESULT_VEC_3]][%[[C3]] : i32] : vector<8xf32> +// CHECK: %[[V4:.+]] = vector.extract %[[TRANSPOSED_INPUT]][1, 0] +// CHECK: %[[RV4:.+]] = vector.reduction "mul", %[[V4]] : vector<3xf32> into f32 +// CHECK: %[[RESULT_VEC_5:.+]] = vector.insertelement %[[RV4:.+]], %[[RESULT_VEC_4]][%[[C4]] : i32] : vector<8xf32> +// CHECK: %[[V5:.+]] = vector.extract %[[TRANSPOSED_INPUT]][1, 1] +// CHECK: %[[RV5:.+]] = vector.reduction "mul", %[[V5]] : vector<3xf32> into f32 +// CHECK: %[[RESULT_VEC_6:.+]] = vector.insertelement %[[RV5:.+]], %[[RESULT_VEC_5]][%[[C5]] : i32] : vector<8xf32> +// CHECK: %[[V6:.+]] = vector.extract %[[TRANSPOSED_INPUT]][1, 2] +// CHECK: %[[RV6:.+]] = vector.reduction "mul", %[[V6]] : vector<3xf32> into f32 +// CHECK: %[[RESULT_VEC_7:.+]] = vector.insertelement %[[RV6:.+]], %[[RESULT_VEC_6]][%[[C6]] : i32] : vector<8xf32> +// CHECK: %[[V7:.+]] = vector.extract %[[TRANSPOSED_INPUT]][1, 3] +// CHECK: %[[RV7:.+]] = vector.reduction "mul", %[[V7]] : vector<3xf32> into f32 +// CHECK: %[[RESULT_VEC:.+]] = vector.insertelement %[[RV7:.+]], %[[RESULT_VEC_7]][%[[C7]] : i32] : vector<8xf32> +// CHECK: %[[RESHAPED_VEC:.+]] = vector.shape_cast %[[RESULT_VEC]] : vector<8xf32> to vector<2x4xf32> +// CHECK: return %[[RESHAPED_VEC]] From a8a6e5b094aac642f436390294ec837400c521bb Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Mon, 28 Jun 2021 22:22:21 -0400 Subject: [PATCH 128/619] [lld-macho] Preserve alignment for non-deduplicated cstrings Fixes PR50637. Downstream bug: https://crbug.com/1218958 Currently, we split __cstring along symbol boundaries with .subsections_via_symbols when not deduplicating, and along null bytes when deduplicating. This change splits along null bytes unconditionally, and preserves original alignment in the non- deduplicated case. Removing subsections-section-relocs.s because with this change, __cstring is never reordered based on the order file. Differential Revision: https://reviews.llvm.org/D104919 --- lld/MachO/InputFiles.cpp | 7 ++- lld/MachO/InputSection.cpp | 4 +- lld/MachO/InputSection.h | 15 ++++-- lld/MachO/SyntheticSections.cpp | 54 ++++++++++++++++----- lld/MachO/SyntheticSections.h | 19 ++++++-- lld/MachO/Writer.cpp | 6 ++- lld/test/MachO/dead-strip-align.s | 46 ++++++++++++++++++ lld/test/MachO/subsections-section-relocs.s | 52 -------------------- 8 files changed, 125 insertions(+), 78 deletions(-) create mode 100644 lld/test/MachO/dead-strip-align.s delete mode 100644 lld/test/MachO/subsections-section-relocs.s diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 32527c7f4c7b8..4025a4c55ab1d 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -262,10 +262,9 @@ void ObjFile::parseSections(ArrayRef
sections) { uint32_t align = 1 << sec.align; uint32_t flags = sec.flags; - if (config->dedupLiterals && - (sectionType(sec.flags) == S_CSTRING_LITERALS || - isWordLiteralSection(sec.flags))) { - if (sec.nreloc) + if (sectionType(sec.flags) == S_CSTRING_LITERALS || + (config->dedupLiterals && isWordLiteralSection(sec.flags))) { + if (sec.nreloc && config->dedupLiterals) fatal(toString(this) + " contains relocations in " + sec.segname + "," + sec.sectname + ", so LLD cannot deduplicate literals. Try re-running without " diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index a961807abd230..740eea6d8fd41 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputSection.h" +#include "Config.h" #include "InputFiles.h" #include "OutputSegment.h" #include "Symbols.h" @@ -156,7 +157,8 @@ void CStringInputSection::splitIntoPieces() { if (end == StringRef::npos) fatal(toString(this) + ": string is not null terminated"); size_t size = end + 1; - pieces.emplace_back(off, xxHash64(s.substr(0, size))); + uint32_t hash = config->dedupLiterals ? xxHash64(s.substr(0, size)) : 0; + pieces.emplace_back(off, hash); s = s.substr(size); off += size; } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 3dd31d27be919..9eea39105f147 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -145,6 +145,7 @@ struct StringPiece { // Offset from the start of the containing input section. uint32_t inSecOff; uint32_t live : 1; + // Only set if deduplicating literals uint32_t hash : 31; // Offset from the start of the containing output section. uint64_t outSecOff = 0; @@ -180,14 +181,20 @@ class CStringInputSection final : public InputSection { // Split at each null byte. void splitIntoPieces(); - // Returns i'th piece as a CachedHashStringRef. This function is very hot when - // string merging is enabled, so we want to inline. LLVM_ATTRIBUTE_ALWAYS_INLINE - llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { + StringRef getStringRef(size_t i) const { size_t begin = pieces[i].inSecOff; size_t end = (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff; - return {toStringRef(data.slice(begin, end - begin)), pieces[i].hash}; + return toStringRef(data.slice(begin, end - begin)); + } + + // Returns i'th piece as a CachedHashStringRef. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { + assert(config->dedupLiterals); + return {getStringRef(i), pieces[i].hash}; } static bool classof(const InputSection *isec) { diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 81fec04275295..c5d77142fda06 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -1152,6 +1152,45 @@ void BitcodeBundleSection::writeTo(uint8_t *buf) const { remove(xarPath); } +CStringSection::CStringSection() + : SyntheticSection(segment_names::text, section_names::cString) { + flags = S_CSTRING_LITERALS; +} + +void CStringSection::addInput(CStringInputSection *isec) { + isec->parent = this; + inputs.push_back(isec); + if (isec->align > align) + align = isec->align; +} + +void CStringSection::writeTo(uint8_t *buf) const { + for (const CStringInputSection *isec : inputs) { + for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { + if (!isec->pieces[i].live) + continue; + StringRef string = isec->getStringRef(i); + memcpy(buf + isec->pieces[i].outSecOff, string.data(), string.size()); + } + } +} + +void CStringSection::finalizeContents() { + uint64_t offset = 0; + for (CStringInputSection *isec : inputs) { + for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) { + if (!isec->pieces[i].live) + continue; + uint32_t pieceAlign = MinAlign(isec->pieces[i].inSecOff, align); + offset = alignTo(offset, pieceAlign); + isec->pieces[i].outSecOff = offset; + isec->isFinal = true; + StringRef string = isec->getStringRef(i); + offset += string.size(); + } + } + size = offset; +} // Mergeable cstring literals are found under the __TEXT,__cstring section. In // contrast to ELF, which puts strings that need different alignments into // different sections, clang's Mach-O backend puts them all in one section. @@ -1176,19 +1215,10 @@ void BitcodeBundleSection::writeTo(uint8_t *buf) const { // deduplication of differently-aligned strings. Finally, the overhead is not // huge: using 16-byte alignment (vs no alignment) is only a 0.5% size overhead // when linking chromium_framework on x86_64. -CStringSection::CStringSection() - : SyntheticSection(segment_names::text, section_names::cString), - builder(StringTableBuilder::RAW, /*Alignment=*/16) { - align = 16; - flags = S_CSTRING_LITERALS; -} +DeduplicatedCStringSection::DeduplicatedCStringSection() + : builder(StringTableBuilder::RAW, /*Alignment=*/16) {} -void CStringSection::addInput(CStringInputSection *isec) { - isec->parent = this; - inputs.push_back(isec); -} - -void CStringSection::finalizeContents() { +void DeduplicatedCStringSection::finalizeContents() { // Add all string pieces to the string table builder to create section // contents. for (const CStringInputSection *isec : inputs) diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index a5f6ea9a6e1f4..95a09a010e274 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -518,17 +518,28 @@ class BitcodeBundleSection final : public SyntheticSection { uint64_t xarSize; }; -class CStringSection final : public SyntheticSection { +class CStringSection : public SyntheticSection { public: CStringSection(); void addInput(CStringInputSection *); - uint64_t getSize() const override { return builder.getSize(); } - void finalizeContents(); + uint64_t getSize() const override { return size; } + virtual void finalizeContents(); bool isNeeded() const override { return !inputs.empty(); } - void writeTo(uint8_t *buf) const override { builder.write(buf); } + void writeTo(uint8_t *buf) const override; std::vector inputs; +private: + uint64_t size; +}; + +class DeduplicatedCStringSection final : public CStringSection { +public: + DeduplicatedCStringSection(); + uint64_t getSize() const override { return builder.getSize(); } + void finalizeContents() override; + void writeTo(uint8_t *buf) const override { builder.write(buf); } + private: llvm::StringTableBuilder builder; }; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index ffe5668a877bd..5520e65d881f2 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1149,7 +1149,11 @@ template void macho::writeResult() { Writer().run(); } void macho::createSyntheticSections() { in.header = make(); - in.cStringSection = config->dedupLiterals ? make() : nullptr; + if (config->dedupLiterals) { + in.cStringSection = make(); + } else { + in.cStringSection = make(); + } in.wordLiteralSection = config->dedupLiterals ? make() : nullptr; in.rebase = make(); diff --git a/lld/test/MachO/dead-strip-align.s b/lld/test/MachO/dead-strip-align.s new file mode 100644 index 0000000000000..46452079cb365 --- /dev/null +++ b/lld/test/MachO/dead-strip-align.s @@ -0,0 +1,46 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: %lld -lSystem -o %t.out %t.o -dead_strip +# RUN: llvm-otool -l %t.out | FileCheck --check-prefix=SECT %s +# RUN: llvm-otool -vs __TEXT __cstring %t.out | FileCheck %s + +# SECT: sectname __cstring +# SECT-NEXT: segname __TEXT +# SECT-NEXT: addr +# SECT-NEXT: size +# SECT-NEXT: offset +# SECT-NEXT: align 2^4 (16) + +# CHECK: 0 \303Q043\005\376\334\272\230vT2\020\001 +# CHECK: 8 def + +.section __TEXT,__cstring,cstring_literals +.globl _foo +_foo: # Dead. External, has symbol table entry, gets stripped. + .asciz "asdf" + +.globl _hi +_hi: + .asciz "hi" # External, has symbol table entry. + +.p2align 4 +L_internal_aligned_16: # Has no symbol table entry. + .asciz "\303Q043\005\376\334\272\230vT2\020\001" + +L_internal_nonaligned: + .asciz "abc" + +.p2align 3 +L_internal_aligned_8: + .asciz "def" + +.text +.globl _main +_main: + movq _hi(%rip), %rax + movq L_internal_nonaligned(%rip), %rax + movq L_internal_aligned_8(%rip), %rax + movaps L_internal_aligned_16(%rip), %xmm0 + retq + +.subsections_via_symbols diff --git a/lld/test/MachO/subsections-section-relocs.s b/lld/test/MachO/subsections-section-relocs.s deleted file mode 100644 index 84baa4784202a..0000000000000 --- a/lld/test/MachO/subsections-section-relocs.s +++ /dev/null @@ -1,52 +0,0 @@ -# REQUIRES: x86 -# RUN: rm -rf %t; split-file %s %t -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o - -# RUN: %lld -o %t/test %t/test.o -order_file %t/order-file -# RUN: llvm-objdump --section-headers -d --no-show-raw-insn %t/test | FileCheck %s -# CHECK-LABEL: Sections: -# CHECK: __cstring {{[^ ]*}} {{0*}}[[#%x, CSTRING_ADDR:]] -# CHECK-LABEL: Disassembly of section __TEXT,__text: -## L._str should end up at CSTRING_ADDR + 4, and leaq is 7 bytes long so we -## have RIP = ADDR + 7 -# CHECK: [[#%x, ADDR:]]: leaq -# CHECK-SAME: [[#%u, CSTRING_ADDR + 4 - ADDR - 7]](%rip), %rsi {{.*}} <_bar_str+0x4> - -# RUN: llvm-readobj --string-dump=__cstring %t/test | FileCheck %s --check-prefix=STRINGS -# STRINGS: bar -# STRINGS: Private symbol -# STRINGS: foo - -#--- order-file -_bar_str -_foo_str - -#--- test.s -.text -.globl _main, _foo_str, _bar_str - -_main: - leaq L_.str(%rip), %rsi - mov $0, %rax - ret - -.section __TEXT,__cstring -_foo_str: - .asciz "foo" - -_bar_str: - .asciz "bar" - -## References to this generate a section relocation -## N.B.: ld64 doesn't actually reorder symbols in __cstring based on the order -## file. Our implementation only does does so if --no-literal-merge is -## specified. I'm not sure how else to test section relocations that -## target an address inside a relocated symbol: using a non-__cstring -## section would cause llvm-mc to emit a symbol relocation instead using -## the nearest symbol. It might be more consistent for LLD to disable -## symbol-based cstring reordering altogether and leave this functionality -## untested, at least until we find a real-world use case... -L_.str: - .asciz "Private symbol" - -.subsections_via_symbols From 31eb696fc4cd3b1ed8054d88af54f214c0f92989 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Mon, 28 Jun 2021 19:28:27 -0700 Subject: [PATCH 129/619] [Coroutines] Remove CoroElide from O0 pipeline CoroElide pass works only when a post-split coroutine is inlined into another post-split coroutine. In O0, there is no inlining after CoroSplit, and hence no CoroElide can happen. It's useless to put CoroElide pass in the O0 pipeline and it will never be triggered (unless I miss anything). Differential Revision: https://reviews.llvm.org/D105066 --- .../CodeGenCoroutines/coro-newpm-pipeline.cpp | 16 ++++++++-------- llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/test/Transforms/Coroutines/smoketest.ll | 18 +++++++++--------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp b/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp index 83f8121296690..91e0fb3042b9d 100644 --- a/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp +++ b/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp @@ -3,23 +3,23 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o /dev/null \ // RUN: -fexperimental-new-pass-manager -fdebug-pass-manager -fcoroutines-ts \ -// RUN: -O0 %s 2>&1 | FileCheck %s +// RUN: -O0 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o /dev/null \ // RUN: -fexperimental-new-pass-manager -fdebug-pass-manager -fcoroutines-ts \ -// RUN: -O1 %s 2>&1 | FileCheck %s +// RUN: -O1 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT // -// CHECK: Running pass:{{.*}}CoroEarlyPass +// CHECK-ALL: Running pass:{{.*}}CoroEarlyPass // // The first coro-split pass enqueues a second run of the entire CGSCC pipeline. -// CHECK: Running pass: CoroSplitPass on (_Z3foov) -// CHECK: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}} +// CHECK-ALL: Running pass: CoroSplitPass on (_Z3foov) +// CHECK-OPT: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}} // // The second coro-split pass splits coroutine 'foo' into funclets // 'foo.resume', 'foo.destroy', and 'foo.cleanup'. -// CHECK: Running pass: CoroSplitPass on (_Z3foov) -// CHECK: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}} +// CHECK-ALL: Running pass: CoroSplitPass on (_Z3foov) +// CHECK-OPT: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}} // -// CHECK: Running pass:{{.*}}CoroCleanupPass +// CHECK-ALL: Running pass:{{.*}}CoroCleanupPass namespace std { namespace experimental { diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 49f6c1049625f..2db8b451bf16d 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1986,7 +1986,6 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, CGSCCPassManager CGPM; CGPM.addPass(CoroSplitPass()); - CGPM.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass())); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); diff --git a/llvm/test/Transforms/Coroutines/smoketest.ll b/llvm/test/Transforms/Coroutines/smoketest.ll index bb8d26783ca9d..bd122ff00180c 100644 --- a/llvm/test/Transforms/Coroutines/smoketest.ll +++ b/llvm/test/Transforms/Coroutines/smoketest.ll @@ -2,21 +2,21 @@ ; levels and -enable-coroutines adds coroutine passes to the pipeline. ; ; RUN: opt < %s -disable-output -passes='default' -enable-coroutines \ -; RUN: -debug-pass-manager 2>&1 | FileCheck %s +; RUN: -debug-pass-manager 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL ; RUN: opt < %s -disable-output -passes='default' -enable-coroutines \ -; RUN: -debug-pass-manager 2>&1 | FileCheck %s +; RUN: -debug-pass-manager 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT ; RUN: opt < %s -disable-output -passes='default' -enable-coroutines \ -; RUN: -debug-pass-manager 2>&1 | FileCheck %s +; RUN: -debug-pass-manager 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT ; RUN: opt < %s -disable-output -passes='default' -enable-coroutines \ -; RUN: -debug-pass-manager 2>&1 | FileCheck %s +; RUN: -debug-pass-manager 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT ; RUN: opt < %s -disable-output -debug-pass-manager \ ; RUN: -passes='function(coro-early),cgscc(coro-split),function(coro-elide,coro-cleanup)' 2>&1 \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT -; CHECK: CoroEarlyPass -; CHECK: CoroSplitPass -; CHECK: CoroElidePass -; CHECK: CoroCleanupPass +; CHECK-ALL: CoroEarlyPass +; CHECK-ALL: CoroSplitPass +; CHECK-OPT: CoroElidePass +; CHECK-ALL: CoroCleanupPass define void @foo() { ret void From e818eface8034040fbea7ce2f05761944b2d53b1 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Tue, 25 May 2021 20:21:21 -0400 Subject: [PATCH 130/619] [MIRParser] Add machine metadata. - Add standalone metadata parsing support so that machine metadata nodes could be populated before and accessed during MIR is parsed. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D103282 --- .../include/llvm/CodeGen/MIRParser/MIParser.h | 8 + llvm/lib/CodeGen/MIRParser/MILexer.cpp | 1 + llvm/lib/CodeGen/MIRParser/MILexer.h | 3 + llvm/lib/CodeGen/MIRParser/MIParser.cpp | 159 ++++++++++++++- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 33 ++++ .../MIR/AArch64/machine-metadata-error.mir | 25 +++ .../CodeGen/MIR/AArch64/machine-metadata.mir | 166 ++++++++++++++++ .../MIR/AMDGPU/machine-metadata-error.mir | 25 +++ .../CodeGen/MIR/AMDGPU/machine-metadata.mir | 181 ++++++++++++++++++ .../MIR/Generic/machine-metadata-err0.mir | 15 ++ .../MIR/Generic/machine-metadata-err1.mir | 15 ++ .../MIR/Generic/machine-metadata-err2.mir | 15 ++ .../MIR/Generic/machine-metadata-err3.mir | 16 ++ .../MIR/Generic/machine-metadata-err4.mir | 15 ++ .../MIR/Generic/machine-metadata-err5.mir | 15 ++ .../MIR/Generic/machine-metadata-err6.mir | 15 ++ .../MIR/Generic/machine-metadata-err7.mir | 15 ++ .../MIR/Generic/machine-metadata-err8.mir | 15 ++ .../MIR/X86/machine-metadata-error.mir | 25 +++ .../test/CodeGen/MIR/X86/machine-metadata.mir | 172 +++++++++++++++++ 20 files changed, 932 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/MIR/AArch64/machine-metadata-error.mir create mode 100644 llvm/test/CodeGen/MIR/AArch64/machine-metadata.mir create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/machine-metadata-error.mir create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err0.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err1.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err2.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err3.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err4.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err5.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err6.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err7.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-metadata-err8.mir create mode 100644 llvm/test/CodeGen/MIR/X86/machine-metadata-error.mir create mode 100644 llvm/test/CodeGen/MIR/X86/machine-metadata.mir diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h index e3f7e3df3f1a9..b01a0c7aa073c 100644 --- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h +++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h @@ -18,6 +18,8 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/Register.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/SMLoc.h" +#include namespace llvm { @@ -164,6 +166,9 @@ struct PerFunctionMIParsingState { const SlotMapping &IRSlots; PerTargetMIParsingState &Target; + std::map MachineMetadataNodes; + std::map> MachineForwardRefMDNodes; + DenseMap MBBSlots; DenseMap VRegInfos; StringMap VRegInfosNamed; @@ -233,6 +238,9 @@ bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI, bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, SMDiagnostic &Error); +bool parseMachineMetadata(PerFunctionMIParsingState &PFS, StringRef Src, + SMRange SourceRange, SMDiagnostic &Error); + } // end namespace llvm #endif // LLVM_CODEGEN_MIRPARSER_MIPARSER_H diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index e6866f04986f0..87fde7d39a60d 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -273,6 +273,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("bbsections", MIToken::kw_bbsections) .Case("unknown-size", MIToken::kw_unknown_size) .Case("unknown-address", MIToken::kw_unknown_address) + .Case("distinct", MIToken::kw_distinct) .Default(MIToken::Identifier); } diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 5d7ea8fb66284..68425b41c3fb1 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -129,6 +129,9 @@ struct MIToken { kw_unknown_size, kw_unknown_address, + // Metadata types. + kw_distinct, + // Named metadata keywords md_tbaa, md_alias_scope, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 745ba4dc7b91d..1368663e93b31 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -395,6 +395,7 @@ class MIParser { MachineFunction &MF; SMDiagnostic &Error; StringRef Source, CurrentSource; + SMRange SourceRange; MIToken Token; PerFunctionMIParsingState &PFS; /// Maps from slot numbers to function's unnamed basic blocks. @@ -403,6 +404,8 @@ class MIParser { public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, StringRef Source); + MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + StringRef Source, SMRange SourceRange); /// \p SkipChar gives the number of characters to skip before looking /// for the next token. @@ -428,6 +431,10 @@ class MIParser { bool parseStandaloneRegister(Register &Reg); bool parseStandaloneStackObject(int &FI); bool parseStandaloneMDNode(MDNode *&Node); + bool parseMachineMetadata(); + bool parseMDTuple(MDNode *&MD, bool IsDistinct); + bool parseMDNodeVector(SmallVectorImpl &Elts); + bool parseMetadata(Metadata *&MD); bool parseBasicBlockDefinition(DenseMap &MBBSlots); @@ -550,6 +557,10 @@ class MIParser { /// parseStringConstant /// ::= StringConstant bool parseStringConstant(std::string &Result); + + /// Map the location in the MI string to the corresponding location specified + /// in `SourceRange`. + SMLoc mapSMLoc(StringRef::iterator Loc); }; } // end anonymous namespace @@ -559,6 +570,11 @@ MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS) {} +MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + StringRef Source, SMRange SourceRange) + : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), + SourceRange(SourceRange), PFS(PFS) {} + void MIParser::lex(unsigned SkipChar) { CurrentSource = lexMIToken( CurrentSource.slice(SkipChar, StringRef::npos), Token, @@ -584,6 +600,13 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { return true; } +SMLoc MIParser::mapSMLoc(StringRef::iterator Loc) { + assert(SourceRange.isValid() && "Invalid source range"); + assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); + return SMLoc::getFromPointer(SourceRange.Start.getPointer() + + (Loc - Source.data())); +} + typedef function_ref ErrorCallbackType; @@ -1172,6 +1195,130 @@ bool MIParser::parseStandaloneMDNode(MDNode *&Node) { return false; } +bool MIParser::parseMachineMetadata() { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + lex(); + if (expectAndConsume(MIToken::equal)) + return true; + bool IsDistinct = Token.is(MIToken::kw_distinct); + if (IsDistinct) + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + lex(); + + MDNode *MD; + if (parseMDTuple(MD, IsDistinct)) + return true; + + auto FI = PFS.MachineForwardRefMDNodes.find(ID); + if (FI != PFS.MachineForwardRefMDNodes.end()) { + FI->second.first->replaceAllUsesWith(MD); + PFS.MachineForwardRefMDNodes.erase(FI); + + assert(PFS.MachineMetadataNodes[ID] == MD && "Tracking VH didn't work"); + } else { + if (PFS.MachineMetadataNodes.count(ID)) + return error("Metadata id is already used"); + PFS.MachineMetadataNodes[ID].reset(MD); + } + + return false; +} + +bool MIParser::parseMDTuple(MDNode *&MD, bool IsDistinct) { + SmallVector Elts; + if (parseMDNodeVector(Elts)) + return true; + MD = (IsDistinct ? MDTuple::getDistinct + : MDTuple::get)(MF.getFunction().getContext(), Elts); + return false; +} + +bool MIParser::parseMDNodeVector(SmallVectorImpl &Elts) { + if (Token.isNot(MIToken::lbrace)) + return error("expected '{' here"); + lex(); + + if (Token.is(MIToken::rbrace)) { + lex(); + return false; + } + + do { + Metadata *MD; + if (parseMetadata(MD)) + return true; + + Elts.push_back(MD); + + if (Token.isNot(MIToken::comma)) + break; + lex(); + } while (true); + + if (Token.isNot(MIToken::rbrace)) + return error("expected end of metadata node"); + lex(); + + return false; +} + +// ::= !42 +// ::= !"string" +bool MIParser::parseMetadata(Metadata *&MD) { + if (Token.isNot(MIToken::exclaim)) + return error("expected '!' here"); + lex(); + + if (Token.is(MIToken::StringConstant)) { + std::string Str; + if (parseStringConstant(Str)) + return true; + MD = MDString::get(MF.getFunction().getContext(), Str); + return false; + } + + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + + SMLoc Loc = mapSMLoc(Token.location()); + + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + lex(); + + auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID); + if (NodeInfo != PFS.IRSlots.MetadataNodes.end()) { + MD = NodeInfo->second.get(); + return false; + } + // Check machine metadata. + NodeInfo = PFS.MachineMetadataNodes.find(ID); + if (NodeInfo != PFS.MachineMetadataNodes.end()) { + MD = NodeInfo->second.get(); + return false; + } + // Forward reference. + auto &FwdRef = PFS.MachineForwardRefMDNodes[ID]; + FwdRef = std::make_pair( + MDTuple::getTemporary(MF.getFunction().getContext(), None), Loc); + PFS.MachineMetadataNodes[ID].reset(FwdRef.first.get()); + MD = FwdRef.first.get(); + + return false; +} + static const char *printImplicitRegisterFlag(const MachineOperand &MO) { assert(MO.isImplicit()); return MO.isDef() ? "implicit-def" : "implicit"; @@ -2014,8 +2161,11 @@ bool MIParser::parseMDNode(MDNode *&Node) { if (getUnsigned(ID)) return true; auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID); - if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) - return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) { + NodeInfo = PFS.MachineMetadataNodes.find(ID); + if (NodeInfo == PFS.MachineMetadataNodes.end()) + return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + } lex(); Node = NodeInfo->second.get(); return false; @@ -3281,6 +3431,11 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS, return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node); } +bool llvm::parseMachineMetadata(PerFunctionMIParsingState &PFS, StringRef Src, + SMRange SrcRange, SMDiagnostic &Error) { + return MIParser(PFS, Error, Src, SrcRange).parseMachineMetadata(); +} + bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS, const Value *&V, ErrorCallbackType ErrorCallback) { diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 88dc4571b2d77..58ce95aaf023c 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -143,6 +143,10 @@ class MIRParserImpl { bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS, const yaml::MachineJumpTable &YamlJTI); + bool parseMachineMetadataNodes(PerFunctionMIParsingState &PFS, + MachineFunction &MF, + const yaml::MachineFunction &YMF); + private: bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, const yaml::StringValue &Source); @@ -151,6 +155,9 @@ class MIRParserImpl { MachineBasicBlock *&MBB, const yaml::StringValue &Source); + bool parseMachineMetadata(PerFunctionMIParsingState &PFS, + const yaml::StringValue &Source); + /// Return a MIR diagnostic converted from an MI string diagnostic. SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange); @@ -457,6 +464,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, if (initializeConstantPool(PFS, *ConstantPool, YamlMF)) return true; } + if (!YamlMF.MachineMetadataNodes.empty() && + parseMachineMetadataNodes(PFS, MF, YamlMF)) + return true; StringRef BlockStr = YamlMF.Body.Value.Value; SMDiagnostic Error; @@ -920,6 +930,29 @@ bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS, return false; } +bool MIRParserImpl::parseMachineMetadata(PerFunctionMIParsingState &PFS, + const yaml::StringValue &Source) { + SMDiagnostic Error; + if (llvm::parseMachineMetadata(PFS, Source.Value, Source.SourceRange, Error)) + return error(Error, Source.SourceRange); + return false; +} + +bool MIRParserImpl::parseMachineMetadataNodes( + PerFunctionMIParsingState &PFS, MachineFunction &MF, + const yaml::MachineFunction &YMF) { + for (auto &MDS : YMF.MachineMetadataNodes) { + if (parseMachineMetadata(PFS, MDS)) + return true; + } + // Report missing definitions from forward referenced nodes. + if (!PFS.MachineForwardRefMDNodes.empty()) + return error(PFS.MachineForwardRefMDNodes.begin()->second.second, + "use of undefined metadata '!" + + Twine(PFS.MachineForwardRefMDNodes.begin()->first) + "'"); + return false; +} + SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange) { assert(SourceRange.isValid() && "Invalid source range"); diff --git a/llvm/test/CodeGen/MIR/AArch64/machine-metadata-error.mir b/llvm/test/CodeGen/MIR/AArch64/machine-metadata-error.mir new file mode 100644 index 0000000000000..faf9c6a73ab03 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AArch64/machine-metadata-error.mir @@ -0,0 +1,25 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-unknown-linux-gnu" + + define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { + ret i32 0 + } +... +--- +name: test_memcpy +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!10 = !{!9}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!5 = !{!6}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' +body: | + bb.0 (%ir-block.0): + ; CHECK: [[@LINE+1]]:78: use of undefined metadata '!11' + %2:fpr128 = LDRQui %0, 1 :: (load 16, align 4, !alias.scope !5, !noalias !11) + +... diff --git a/llvm/test/CodeGen/MIR/AArch64/machine-metadata.mir b/llvm/test/CodeGen/MIR/AArch64/machine-metadata.mir new file mode 100644 index 0000000000000..1dfc45d6546a8 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AArch64/machine-metadata.mir @@ -0,0 +1,166 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=none -o - %s | FileCheck %s +--- | + ; ModuleID = 'test/CodeGen/AArch64/memcpy-scoped-aa.ll' + source_filename = "test/CodeGen/AArch64/memcpy-scoped-aa.ll" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-unknown-linux-gnu" + + define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + define i32 @test_mempcpy(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + %call = tail call i8* @mempcpy(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0 + + declare i8* @mempcpy(i8*, i8*, i64) + + attributes #0 = { argmemonly nofree nounwind willreturn } + + !0 = !{!1} + !1 = distinct !{!1, !2, !"bax: %p"} + !2 = distinct !{!2, !"bax"} + !3 = !{!4} + !4 = distinct !{!4, !2, !"bax: %q"} + +... +--- +name: test_memcpy +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!10 = !{!1, !9}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!5 = !{!1, !6}' + - '!11 = !{!4, !6}' + - '!8 = !{!4, !9}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' +body: | + bb.0 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] + ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64common = COPY $x1 + %0:gpr64common = COPY $x0 + %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %5:gpr32 = ADDWrr killed %3, killed %4 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: test_memcpy_inline +machineMetadataNodes: + - '!10 = !{!1, !9}' + - '!5 = !{!1, !6}' + - '!8 = !{!4, !9}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!11 = !{!4, !6}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!6 = distinct !{!6, !7, !"Src"}' +body: | + bb.0 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] + ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64common = COPY $x1 + %0:gpr64common = COPY $x0 + %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %5:gpr32 = ADDWrr killed %3, killed %4 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: test_mempcpy +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!8 = !{!4, !9}' + - '!5 = !{!1, !6}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!11 = !{!4, !6}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!10 = !{!1, !9}' +body: | + bb.0 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_mempcpy + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 1, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 1, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] + ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64common = COPY $x1 + %0:gpr64common = COPY $x0 + %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 1, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 1, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %5:gpr32 = ADDWrr killed %3, killed %4 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata-error.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata-error.mir new file mode 100644 index 0000000000000..60dabbd7178a5 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata-error.mir @@ -0,0 +1,25 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. +--- | + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + target triple = "amdgcn-amd-amdhsa" + + define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 { + ret i32 0 + } +... +--- +name: test_memcpy +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!5 = !{!6}' + - '!10 = !{!9}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' +body: | + bb.0 (%ir-block.0): + ; CHECK: [[@LINE+1]]:113: use of undefined metadata '!11' + %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16, align 4, !alias.scope !5, !noalias !11, addrspace 1) + +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir new file mode 100644 index 0000000000000..74ea99465185a --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir @@ -0,0 +1,181 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=none -o - %s | FileCheck %s +--- | + ; ModuleID = 'test/CodeGen/AMDGPU/memcpy-scoped-aa.ll' + source_filename = "test/CodeGen/AMDGPU/memcpy-scoped-aa.ll" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + target triple = "amdgcn-amd-amdhsa" + + define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 { + %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)* + %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4 + %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)* + tail call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %1 = bitcast i32 addrspace(1)* %q to <2 x i32> addrspace(1)* + %2 = load <2 x i32>, <2 x i32> addrspace(1)* %1, align 4, !alias.scope !3, !noalias !0 + %v01 = extractelement <2 x i32> %2, i32 0 + %v12 = extractelement <2 x i32> %2, i32 1 + %add = add i32 %v01, %v12 + ret i32 %add + } + + define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 { + %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)* + %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4 + %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)* + tail call void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %1 = bitcast i32 addrspace(1)* %q to <2 x i32> addrspace(1)* + %2 = load <2 x i32>, <2 x i32> addrspace(1)* %1, align 4, !alias.scope !3, !noalias !0 + %v01 = extractelement <2 x i32> %2, i32 0 + %v12 = extractelement <2 x i32> %2, i32 1 + %add = add i32 %v01, %v12 + ret i32 %add + } + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg) #1 + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64 immarg, i1 immarg) #1 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i32 } @llvm.amdgcn.if.i32(i1) #2 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i32 } @llvm.amdgcn.else.i32.i32(i32) #2 + + ; Function Attrs: convergent nounwind readnone willreturn + declare i32 @llvm.amdgcn.if.break.i32(i1, i32) #3 + + ; Function Attrs: convergent nounwind willreturn + declare i1 @llvm.amdgcn.loop.i32(i32) #2 + + ; Function Attrs: convergent nounwind willreturn + declare void @llvm.amdgcn.end.cf.i32(i32) #2 + + attributes #0 = { "target-cpu"="gfx1010" } + attributes #1 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx1010" } + attributes #2 = { convergent nounwind willreturn } + attributes #3 = { convergent nounwind readnone willreturn } + + !0 = !{!1} + !1 = distinct !{!1, !2, !"bax: %p"} + !2 = distinct !{!2, !"bax"} + !3 = !{!4} + !4 = distinct !{!4, !2, !"bax: %q"} + +... +--- +name: test_memcpy +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!11 = !{!4, !6}' + - '!5 = !{!1, !6}' + - '!8 = !{!4, !9}' + - '!10 = !{!1, !9}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' +body: | + bb.0 (%ir-block.0): + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_memcpy + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec + ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] + ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 + %4:sreg_64 = COPY $sgpr30_sgpr31 + %3:vgpr_32 = COPY $vgpr3 + %2:vgpr_32 = COPY $vgpr2 + %1:vgpr_32 = COPY $vgpr1 + %0:vgpr_32 = COPY $vgpr0 + %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 + %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + %9:vreg_64 = COPY %18 + %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + %10:vreg_64 = COPY %18 + GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + %12:vreg_64 = COPY %17 + %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + %13:vgpr_32 = COPY %11.sub0 + %14:vgpr_32 = COPY %11.sub1 + %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec + %5:ccr_sgpr_64 = COPY %4 + $vgpr0 = COPY %15 + %16:ccr_sgpr_64 = COPY %5 + S_SETPC_B64_return %16, implicit $vgpr0 + +... +--- +name: test_memcpy_inline +machineMetadataNodes: + - '!6 = distinct !{!6, !7, !"Src"}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!11 = !{!4, !6}' + - '!5 = !{!1, !6}' + - '!8 = !{!4, !9}' + - '!10 = !{!1, !9}' +body: | + bb.0 (%ir-block.0): + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec + ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] + ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 + %4:sreg_64 = COPY $sgpr30_sgpr31 + %3:vgpr_32 = COPY $vgpr3 + %2:vgpr_32 = COPY $vgpr2 + %1:vgpr_32 = COPY $vgpr1 + %0:vgpr_32 = COPY $vgpr0 + %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 + %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + %9:vreg_64 = COPY %18 + %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + %10:vreg_64 = COPY %18 + GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + %12:vreg_64 = COPY %17 + %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + %13:vgpr_32 = COPY %11.sub0 + %14:vgpr_32 = COPY %11.sub1 + %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec + %5:ccr_sgpr_64 = COPY %4 + $vgpr0 = COPY %15 + %16:ccr_sgpr_64 = COPY %5 + S_SETPC_B64_return %16, implicit $vgpr0 + +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err0.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err0.mir new file mode 100644 index 0000000000000..0502ac90e51eb --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err0.mir @@ -0,0 +1,15 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '9 = distinct !{!9, !7, !"Dst"}' +... +# CHECK: [[@LINE-2]]:6: expected a metadata node diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err1.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err1.mir new file mode 100644 index 0000000000000..4ac5202527d2f --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err1.mir @@ -0,0 +1,15 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '! = distinct !{!9, !7, !"Dst"}' +... +# CHECK: [[@LINE-2]]:8: expected metadata id after '!' diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err2.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err2.mir new file mode 100644 index 0000000000000..0e731b12c6456 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err2.mir @@ -0,0 +1,15 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '!9 = distinct {!9, !7, !"Dst"}' +... +# CHECK: [[@LINE-2]]:20: expected a metadata node diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err3.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err3.mir new file mode 100644 index 0000000000000..deb06f5f4abaa --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err3.mir @@ -0,0 +1,16 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!9 = distinct !{!9, !7, !"Src"}' +... +# CHECK: [[@LINE-2]]:37: Metadata id is already used diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err4.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err4.mir new file mode 100644 index 0000000000000..a28f631b0df00 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err4.mir @@ -0,0 +1,15 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '!9 = distinct !!9, !7, !"Dst"}' +... +# CHECK: [[@LINE-2]]:21: expected '{' here diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err5.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err5.mir new file mode 100644 index 0000000000000..68ab8bc58d864 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err5.mir @@ -0,0 +1,15 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"' +... +# CHECK: [[@LINE-2]]:36: expected end of metadata node diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err6.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err6.mir new file mode 100644 index 0000000000000..51cca1b259cd0 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err6.mir @@ -0,0 +1,15 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '!9 = distinct !{9, !7, !"Dst"}' +... +# CHECK: [[@LINE-2]]:22: expected '!' here diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err7.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err7.mir new file mode 100644 index 0000000000000..0cc5ef1b8af83 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err7.mir @@ -0,0 +1,15 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '!9 = distinct !{!, !7, !"Dst"}' +... +# CHECK: [[@LINE-2]]:23: expected metadata id after '!' diff --git a/llvm/test/CodeGen/MIR/Generic/machine-metadata-err8.mir b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err8.mir new file mode 100644 index 0000000000000..1d51dbc5d659d --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-metadata-err8.mir @@ -0,0 +1,15 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. + +--- | + define i32 @t0() { + ret i32 0 + } +... +--- +name: t0 +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' +... +# CHECK: [[@LINE-2]]:26: use of undefined metadata '!7' diff --git a/llvm/test/CodeGen/MIR/X86/machine-metadata-error.mir b/llvm/test/CodeGen/MIR/X86/machine-metadata-error.mir new file mode 100644 index 0000000000000..e3ab673c4d127 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/machine-metadata-error.mir @@ -0,0 +1,25 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s +# This test ensures that the MIR parser detects errors when parsing machine +# metadata. +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { + ret i32 0 + } +... +--- +name: test_memcpy +machineMetadataNodes: + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!5 = !{!6}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!10 = !{!9}' +body: | + bb.0 (%ir-block.0): + ; CHECK: [[@LINE+1]]:96: use of undefined metadata '!11' + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8, align 4, !alias.scope !5, !noalias !11) + +... diff --git a/llvm/test/CodeGen/MIR/X86/machine-metadata.mir b/llvm/test/CodeGen/MIR/X86/machine-metadata.mir new file mode 100644 index 0000000000000..b4993fcc59634 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/machine-metadata.mir @@ -0,0 +1,172 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=none -o - %s | FileCheck %s +--- | + ; ModuleID = 'test/CodeGen/X86/memcpy-scoped-aa.ll' + source_filename = "test/CodeGen/X86/memcpy-scoped-aa.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + define i32 @test_mempcpy(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + %call = tail call i8* @mempcpy(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0 + + declare i8* @mempcpy(i8*, i8*, i64) + + attributes #0 = { argmemonly nofree nounwind willreturn } + + !0 = !{!1} + !1 = distinct !{!1, !2, !"bax: %p"} + !2 = distinct !{!2, !"bax"} + !3 = !{!4} + !4 = distinct !{!4, !2, !"bax: %q"} + +... +--- +name: test_memcpy +machineMetadataNodes: + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!8 = !{!4, !9}' + - '!5 = !{!1, !6}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!11 = !{!4, !6}' + - '!10 = !{!1, !9}' +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: test_memcpy + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: $eax = COPY [[ADD32rm]] + ; CHECK: RET 0, $eax + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + $eax = COPY %5 + RET 0, $eax + +... +--- +name: test_memcpy_inline +machineMetadataNodes: + - '!8 = !{!4, !9}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!5 = !{!1, !6}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!11 = !{!4, !6}' + - '!10 = !{!1, !9}' + - '!6 = distinct !{!6, !7, !"Src"}' +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: $eax = COPY [[ADD32rm]] + ; CHECK: RET 0, $eax + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + $eax = COPY %5 + RET 0, $eax + +... +--- +name: test_mempcpy +machineMetadataNodes: + - '!5 = !{!1, !6}' + - '!8 = !{!4, !9}' + - '!11 = !{!4, !6}' + - '!10 = !{!1, !9}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!9 = distinct !{!9, !7, !"Dst"}' +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: test_mempcpy + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 1, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 1, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: $eax = COPY [[ADD32rm]] + ; CHECK: RET 0, $eax + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 1, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 1, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + $eax = COPY %5 + RET 0, $eax + +... From 948308ef34dc7da8bb741a85eb9941cc2b05d227 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 28 Jun 2021 22:48:26 -0400 Subject: [PATCH 131/619] Fix `-Wunused-variable` warning. NFC. --- clang/lib/CodeGen/CGCall.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 35b34179cc231..4ff6c632b61df 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2173,7 +2173,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // Add "sample-profile-suffix-elision-policy" attribute for internal linkage // functions with -funique-internal-linkage-names. if (TargetDecl && CodeGenOpts.UniqueInternalLinkageNames) { - if (auto *Fn = dyn_cast(TargetDecl)) { + if (isa(TargetDecl)) { if (this->getFunctionLinkage(CalleeInfo.getCalleeDecl()) == llvm::GlobalValue::InternalLinkage) FuncAttrs.addAttribute("sample-profile-suffix-elision-policy", From 42c05ed8beb264ccae0b471ca67ad3d7a6aeaa0c Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 28 Jun 2021 19:59:24 -0700 Subject: [PATCH 132/619] Fix failing tests after https://reviews.llvm.org/D104488. Synthetic names no longer have the shared library name appended to the end. --- lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml | 4 ++-- lldb/test/Shell/SymbolFile/Breakpad/symtab.test | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml index 6178a45de1b59..0dcc9fb76bd4f 100644 --- a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml @@ -3,8 +3,8 @@ # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name # CHECK: [ 0] 1 SourceFile 0x0000000000000000 0x0000000000000000 0x00000004 - -# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol1$${{.*}} -# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol2$${{.*}} +# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} +# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} --- !ELF FileHeader: diff --git a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test index 1eb03fa43deb0..788dafe248d50 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test @@ -5,7 +5,7 @@ # CHECK-LABEL: (lldb) image dump symtab symtab.out # CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5: # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name -# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}$$symtab.out +# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} # CHECK: [ 1] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func # CHECK: [ 2] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only # CHECK: [ 3] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 From ae79854e1c597962b74cb13293fcd5d31a2ca9bc Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Tue, 29 Jun 2021 03:13:55 +0000 Subject: [PATCH 133/619] [AIX][compiler-rt] Deliver libatomic.a at top level library directory Install libatomic.a in top level library directory so that compiler can find it in search directories. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D104908 --- compiler-rt/cmake/Modules/CompilerRTAIXUtils.cmake | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/compiler-rt/cmake/Modules/CompilerRTAIXUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTAIXUtils.cmake index 983d97df2d295..3b61430f471ec 100644 --- a/compiler-rt/cmake/Modules/CompilerRTAIXUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTAIXUtils.cmake @@ -57,8 +57,16 @@ macro(archive_aix_libatomic name) if(shared_libraries_to_archive) set(output_dir "") set(install_dir "") - get_compiler_rt_output_dir(${COMPILER_RT_DEFAULT_TARGET_ARCH} output_dir) - get_compiler_rt_install_dir(${COMPILER_RT_DEFAULT_TARGET_ARCH} install_dir) + # If LLVM defines top level library directory, we want to deliver + # libatomic.a at top level. See `llvm/cmake/modules/AddLLVM.cmake' + # setting _install_rpath on AIX for reference. + if(LLVM_LIBRARY_OUTPUT_INTDIR AND CMAKE_INSTALL_PREFIX) + set(output_dir "${LLVM_LIBRARY_OUTPUT_INTDIR}") + set(install_dir "${CMAKE_INSTALL_PREFIX}/lib${LLVM_LIBDIR_SUFFIX}") + else() + get_compiler_rt_output_dir(${COMPILER_RT_DEFAULT_TARGET_ARCH} output_dir) + get_compiler_rt_install_dir(${COMPILER_RT_DEFAULT_TARGET_ARCH} install_dir) + endif() add_custom_command(OUTPUT "${output_dir}/libatomic.a" COMMAND ${CMAKE_AR} -X32_64 r "${output_dir}/libatomic.a" ${shared_libraries_to_archive} From c94c8d8b5d999c97ea424b35a1cb540d2a1d3bc6 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Tue, 29 Jun 2021 11:15:00 +0800 Subject: [PATCH 134/619] [AVR][clang] Fix wrong calling convention in functions return struct type According to AVR ABI (https://gcc.gnu.org/wiki/avr-gcc), returned struct value within size 1-8 bytes should be returned directly (via register r18-r25), while larger ones should be returned via an implicit struct pointer argument. Reviewed By: dylanmckay Differential Revision: https://reviews.llvm.org/D99237 --- clang/lib/CodeGen/TargetInfo.cpp | 29 +++++++++++++++++++++++++++-- clang/test/CodeGen/avr/struct.c | 26 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/avr/struct.c diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index e9565a5aef63d..035c131dacb7d 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -8154,14 +8154,39 @@ void M68kTargetCodeGenInfo::setTargetAttributes( } //===----------------------------------------------------------------------===// -// AVR ABI Implementation. +// AVR ABI Implementation. Documented at +// https://gcc.gnu.org/wiki/avr-gcc#Calling_Convention +// https://gcc.gnu.org/wiki/avr-gcc#Reduced_Tiny //===----------------------------------------------------------------------===// namespace { +class AVRABIInfo : public DefaultABIInfo { +public: + AVRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + ABIArgInfo classifyReturnType(QualType Ty) const { + // A return struct with size less than or equal to 8 bytes is returned + // directly via registers R18-R25. + if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) <= 64) + return ABIArgInfo::getDirect(); + else + return DefaultABIInfo::classifyReturnType(Ty); + } + + // Just copy the original implementation of DefaultABIInfo::computeInfo(), + // since DefaultABIInfo::classify{Return,Argument}Type() are not virtual. + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } +}; + class AVRTargetCodeGenInfo : public TargetCodeGenInfo { public: AVRTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique(CGT)) {} + : TargetCodeGenInfo(std::make_unique(CGT)) {} LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override { diff --git a/clang/test/CodeGen/avr/struct.c b/clang/test/CodeGen/avr/struct.c new file mode 100644 index 0000000000000..cb4e84522df63 --- /dev/null +++ b/clang/test/CodeGen/avr/struct.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple avr -emit-llvm %s -o - | FileCheck %s + +// Structure that is more than 8 bytes. +struct s10 { + int a, b, c, d, e; +}; + +// Structure that is less than 8 bytes. +struct s06 { + int a, b, c; +}; + +struct s10 foo10(int a, int b, int c) { + struct s10 a0; + return a0; +} + +struct s06 foo06(int a, int b, int c) { + struct s06 a0; + return a0; +} + +// CHECK: %struct.s10 = type { i16, i16, i16, i16, i16 } +// CHECK: %struct.s06 = type { i16, i16, i16 } +// CHECK: define{{.*}} void @foo10(%struct.s10* {{.*}}, i16 %a, i16 %b, i16 %c) +// CHECK: define{{.*}} %struct.s06 @foo06(i16 %a, i16 %b, i16 %c) From 6d234a6908646cbdefcbbb4c0ea1ff2cf4a5482f Mon Sep 17 00:00:00 2001 From: Xiang1 Zhang Date: Wed, 23 Jun 2021 13:54:10 +0800 Subject: [PATCH 135/619] [X86] Zero some outputs of Kelocker intrinsics in error case Reviewed By: WangPengfei Differential Revision: https://reviews.llvm.org/D104766 --- clang/lib/CodeGen/CGBuiltin.cpp | 66 +- clang/lib/Headers/keylockerintrin.h | 30 + clang/test/CodeGen/X86/keylocker.c | 1539 ++++++++++++++++++++++----- 3 files changed, 1387 insertions(+), 248 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2e9454921ffa8..9579d706b2ae5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14736,27 +14736,56 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_aesenc256kl_u8: case X86::BI__builtin_ia32_aesdec256kl_u8: { Intrinsic::ID IID; + StringRef StrNoErr, StrErr, StrEnd; switch (BuiltinID) { default: llvm_unreachable("Unexpected builtin"); case X86::BI__builtin_ia32_aesenc128kl_u8: IID = Intrinsic::x86_aesenc128kl; + StrNoErr = "aesenc128kl_no_error"; + StrErr = "aesenc128kl_error"; + StrEnd = "aesenc128kl_end"; break; case X86::BI__builtin_ia32_aesdec128kl_u8: IID = Intrinsic::x86_aesdec128kl; + StrNoErr = "aesdec128kl_no_error"; + StrErr = "aesdec128kl_error"; + StrEnd = "aesdec128kl_end"; break; case X86::BI__builtin_ia32_aesenc256kl_u8: IID = Intrinsic::x86_aesenc256kl; + StrNoErr = "aesenc256kl_no_error"; + StrErr = "aesenc256kl_error"; + StrEnd = "aesenc256kl_end"; break; case X86::BI__builtin_ia32_aesdec256kl_u8: IID = Intrinsic::x86_aesdec256kl; + StrNoErr = "aesdec256kl_no_error"; + StrErr = "aesdec256kl_error"; + StrEnd = "aesdec256kl_end"; break; } Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]}); - Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), - Ops[0]); + BasicBlock *NoError = createBasicBlock(StrNoErr, this->CurFn); + BasicBlock *Error = createBasicBlock(StrErr, this->CurFn); + BasicBlock *End = createBasicBlock(StrEnd, this->CurFn); + + Value *Ret = Builder.CreateExtractValue(Call, 0); + Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty()); + Value *Out = Builder.CreateExtractValue(Call, 1); + Builder.CreateCondBr(Succ, NoError, Error); + + Builder.SetInsertPoint(NoError); + Builder.CreateDefaultAlignedStore(Out, Ops[0]); + Builder.CreateBr(End); + Builder.SetInsertPoint(Error); + Constant *Zero = llvm::Constant::getNullValue(Out->getType()); + Builder.CreateDefaultAlignedStore(Zero, Ops[0]); + Builder.CreateBr(End); + + Builder.SetInsertPoint(End); return Builder.CreateExtractValue(Call, 0); } case X86::BI__builtin_ia32_aesencwide128kl_u8: @@ -14764,18 +14793,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_aesencwide256kl_u8: case X86::BI__builtin_ia32_aesdecwide256kl_u8: { Intrinsic::ID IID; + StringRef StrNoErr, StrErr, StrEnd; switch (BuiltinID) { case X86::BI__builtin_ia32_aesencwide128kl_u8: IID = Intrinsic::x86_aesencwide128kl; + StrNoErr = "aesencwide128kl_no_error"; + StrErr = "aesencwide128kl_error"; + StrEnd = "aesencwide128kl_end"; break; case X86::BI__builtin_ia32_aesdecwide128kl_u8: IID = Intrinsic::x86_aesdecwide128kl; + StrNoErr = "aesdecwide128kl_no_error"; + StrErr = "aesdecwide128kl_error"; + StrEnd = "aesdecwide128kl_end"; break; case X86::BI__builtin_ia32_aesencwide256kl_u8: IID = Intrinsic::x86_aesencwide256kl; + StrNoErr = "aesencwide256kl_no_error"; + StrErr = "aesencwide256kl_error"; + StrEnd = "aesencwide256kl_end"; break; case X86::BI__builtin_ia32_aesdecwide256kl_u8: IID = Intrinsic::x86_aesdecwide256kl; + StrNoErr = "aesdecwide256kl_no_error"; + StrErr = "aesdecwide256kl_error"; + StrEnd = "aesdecwide256kl_end"; break; } @@ -14789,12 +14831,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps); + BasicBlock *NoError = createBasicBlock(StrNoErr, this->CurFn); + BasicBlock *Error = createBasicBlock(StrErr, this->CurFn); + BasicBlock *End = createBasicBlock(StrEnd, this->CurFn); + + Value *Ret = Builder.CreateExtractValue(Call, 0); + Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty()); + Builder.CreateCondBr(Succ, NoError, Error); + + Builder.SetInsertPoint(NoError); for (int i = 0; i != 8; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i); Builder.CreateAlignedStore(Extract, Ptr, Align(16)); } + Builder.CreateBr(End); + + Builder.SetInsertPoint(Error); + for (int i = 0; i != 8; ++i) { + Value *Out = Builder.CreateExtractValue(Call, i + 1); + Constant *Zero = llvm::Constant::getNullValue(Out->getType()); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i); + Builder.CreateAlignedStore(Zero, Ptr, Align(16)); + } + Builder.CreateBr(End); + Builder.SetInsertPoint(End); return Builder.CreateExtractValue(Call, 0); } } diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h index c15d39c8e3928..68b0a5689618a 100644 --- a/clang/lib/Headers/keylockerintrin.h +++ b/clang/lib/Headers/keylockerintrin.h @@ -230,10 +230,12 @@ _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256 ) /// IF (IllegalHandle) /// ZF := 1 +/// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 +/// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES256Encrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 @@ -267,10 +269,12 @@ _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128) /// IF (IllegalHandle) /// ZF := 1 +/// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 +/// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES128Decrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 @@ -304,10 +308,12 @@ _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256) /// IF (IllegalHandle) /// ZF := 1 +/// MEM[__odata+127:__odata] := 0 /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF (Authentic == 0) /// ZF := 1 +/// MEM[__odata+127:__odata] := 0 /// ELSE /// MEM[__odata+127:__odata] := AES256Decrypt (__idata[127:0], UnwrappedKey) /// ZF := 0 @@ -354,10 +360,16 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) /// IF (IllegalHandle) /// ZF := 1 +/// FOR i := 0 to 7 +/// __odata[i] := 0 +/// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF Authentic == 0 /// ZF := 1 +/// FOR i := 0 to 7 +/// __odata[i] := 0 +/// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES128Encrypt (__idata[i], UnwrappedKey) @@ -394,10 +406,16 @@ _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES512 ) /// IF (IllegalHandle) /// ZF := 1 +/// FOR i := 0 to 7 +/// __odata[i] := 0 +/// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF Authentic == 0 /// ZF := 1 +/// FOR i := 0 to 7 +/// __odata[i] := 0 +/// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES256Encrypt (__idata[i], UnwrappedKey) @@ -434,10 +452,16 @@ _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES128 ) /// IF (IllegalHandle) /// ZF := 1 +/// FOR i := 0 to 7 +/// __odata[i] := 0 +/// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) /// IF Authentic == 0 /// ZF := 1 +/// FOR i := 0 to 7 +/// __odata[i] := 0 +/// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES128Decrypt (__idata[i], UnwrappedKey) @@ -474,10 +498,16 @@ _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES512 ) /// If (IllegalHandle) /// ZF := 1 +/// FOR i := 0 to 7 +/// __odata[i] := 0 +/// ENDFOR /// ELSE /// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) /// IF Authentic == 0 /// ZF := 1 +/// FOR i := 0 to 7 +/// __odata[i] := 0 +/// ENDFOR /// ELSE /// FOR i := 0 to 7 /// __odata[i] := AES256Decrypt (__idata[i], UnwrappedKey) diff --git a/clang/test/CodeGen/X86/keylocker.c b/clang/test/CodeGen/X86/keylocker.c index b87fe22d77617..ded6e57bfb8b6 100644 --- a/clang/test/CodeGen/X86/keylocker.c +++ b/clang/test/CodeGen/X86/keylocker.c @@ -1,292 +1,1339 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +kl -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +kl -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 %s -O0 -ffreestanding -triple=x86_64-unknown-unknown -target-feature +kl -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=CHECK64 +// RUN: %clang_cc1 %s -O0 -ffreestanding -triple=i386-unknown-unknown -target-feature +kl -target-feature +widekl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=CHECK32 #include +// CHECK64-LABEL: @test_loadiwkey( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__CTL_ADDR_I:%.*]] = alloca i32, align 4 +// CHECK64-NEXT: [[__INTKEY_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__ENKEY_LO_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__ENKEY_HI_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[CTL_ADDR:%.*]] = alloca i32, align 4 +// CHECK64-NEXT: [[INTKEY_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[ENKEY_LO_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[ENKEY_HI_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: store i32 [[CTL:%.*]], i32* [[CTL_ADDR]], align 4 +// CHECK64-NEXT: store <2 x i64> [[INTKEY:%.*]], <2 x i64>* [[INTKEY_ADDR]], align 16 +// CHECK64-NEXT: store <2 x i64> [[ENKEY_LO:%.*]], <2 x i64>* [[ENKEY_LO_ADDR]], align 16 +// CHECK64-NEXT: store <2 x i64> [[ENKEY_HI:%.*]], <2 x i64>* [[ENKEY_HI_ADDR]], align 16 +// CHECK64-NEXT: [[TMP0:%.*]] = load i32, i32* [[CTL_ADDR]], align 4 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[INTKEY_ADDR]], align 16 +// CHECK64-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[ENKEY_LO_ADDR]], align 16 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ENKEY_HI_ADDR]], align 16 +// CHECK64-NEXT: store i32 [[TMP0]], i32* [[__CTL_ADDR_I]], align 4 +// CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__INTKEY_ADDR_I]], align 16 +// CHECK64-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[__ENKEY_LO_ADDR_I]], align 16 +// CHECK64-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[__ENKEY_HI_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__INTKEY_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[__ENKEY_LO_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[__ENKEY_HI_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP7:%.*]] = load i32, i32* [[__CTL_ADDR_I]], align 4 +// CHECK64-NEXT: call void @llvm.x86.loadiwkey(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], i32 [[TMP7]]) #[[ATTR1:[0-9]+]] +// CHECK64-NEXT: ret void +// +// CHECK32-LABEL: @test_loadiwkey( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__CTL_ADDR_I:%.*]] = alloca i32, align 4 +// CHECK32-NEXT: [[__INTKEY_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__ENKEY_LO_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__ENKEY_HI_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[CTL_ADDR:%.*]] = alloca i32, align 4 +// CHECK32-NEXT: [[INTKEY_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[ENKEY_LO_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[ENKEY_HI_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: store i32 [[CTL:%.*]], i32* [[CTL_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64> [[INTKEY:%.*]], <2 x i64>* [[INTKEY_ADDR]], align 16 +// CHECK32-NEXT: store <2 x i64> [[ENKEY_LO:%.*]], <2 x i64>* [[ENKEY_LO_ADDR]], align 16 +// CHECK32-NEXT: store <2 x i64> [[ENKEY_HI:%.*]], <2 x i64>* [[ENKEY_HI_ADDR]], align 16 +// CHECK32-NEXT: [[TMP0:%.*]] = load i32, i32* [[CTL_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[INTKEY_ADDR]], align 16 +// CHECK32-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[ENKEY_LO_ADDR]], align 16 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ENKEY_HI_ADDR]], align 16 +// CHECK32-NEXT: store i32 [[TMP0]], i32* [[__CTL_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__INTKEY_ADDR_I]], align 16 +// CHECK32-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[__ENKEY_LO_ADDR_I]], align 16 +// CHECK32-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* [[__ENKEY_HI_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__INTKEY_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[__ENKEY_LO_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[__ENKEY_HI_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP7:%.*]] = load i32, i32* [[__CTL_ADDR_I]], align 4 +// CHECK32-NEXT: call void @llvm.x86.loadiwkey(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], i32 [[TMP7]]) #[[ATTR1:[0-9]+]] +// CHECK32-NEXT: ret void +// void test_loadiwkey(unsigned int ctl, __m128i intkey, __m128i enkey_lo, __m128i enkey_hi) { - //CHECK-LABEL: @test_loadiwkey - //CHECK: @llvm.x86.loadiwkey _mm_loadiwkey(ctl, intkey, enkey_lo, enkey_hi); } +// CHECK64-LABEL: @test_encodekey128_u32( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__HTYPE_ADDR_I:%.*]] = alloca i32, align 4 +// CHECK64-NEXT: [[__KEY_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[HTYPE_ADDR:%.*]] = alloca i32, align 4 +// CHECK64-NEXT: [[KEY_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store i32 [[HTYPE:%.*]], i32* [[HTYPE_ADDR]], align 4 +// CHECK64-NEXT: store <2 x i64> [[KEY:%.*]], <2 x i64>* [[KEY_ADDR]], align 16 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load i32, i32* [[HTYPE_ADDR]], align 4 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_ADDR]], align 16 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store i32 [[TMP0]], i32* [[__HTYPE_ADDR_I]], align 4 +// CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__KEY_ADDR_I]], align 16 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load i32, i32* [[__HTYPE_ADDR_I]], align 4 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 [[TMP3]], <2 x i64> [[TMP4]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 1 +// CHECK64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP5]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP8]], align 1 +// CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 2 +// CHECK64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[TMP5]], i32 16 +// CHECK64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 1 +// CHECK64-NEXT: [[TMP12:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 3 +// CHECK64-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[TMP5]], i32 32 +// CHECK64-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP14]], align 1 +// CHECK64-NEXT: [[TMP15:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 4 +// CHECK64-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[TMP5]], i32 48 +// CHECK64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP17]], align 1 +// CHECK64-NEXT: [[TMP18:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 5 +// CHECK64-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP5]], i32 64 +// CHECK64-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP18]], <2 x i64>* [[TMP20]], align 1 +// CHECK64-NEXT: [[TMP21:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 6 +// CHECK64-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[TMP5]], i32 80 +// CHECK64-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP21]], <2 x i64>* [[TMP23]], align 1 +// CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: ret i32 [[TMP24]] +// +// CHECK32-LABEL: @test_encodekey128_u32( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__HTYPE_ADDR_I:%.*]] = alloca i32, align 4 +// CHECK32-NEXT: [[__KEY_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[HTYPE_ADDR:%.*]] = alloca i32, align 4 +// CHECK32-NEXT: [[KEY_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store i32 [[HTYPE:%.*]], i32* [[HTYPE_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64> [[KEY:%.*]], <2 x i64>* [[KEY_ADDR]], align 16 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load i32, i32* [[HTYPE_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_ADDR]], align 16 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store i32 [[TMP0]], i32* [[__HTYPE_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__KEY_ADDR_I]], align 16 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load i32, i32* [[__HTYPE_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 [[TMP3]], <2 x i64> [[TMP4]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 1 +// CHECK32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP5]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP8]], align 1 +// CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 2 +// CHECK32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[TMP5]], i32 16 +// CHECK32-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 1 +// CHECK32-NEXT: [[TMP12:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 3 +// CHECK32-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[TMP5]], i32 32 +// CHECK32-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP14]], align 1 +// CHECK32-NEXT: [[TMP15:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 4 +// CHECK32-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[TMP5]], i32 48 +// CHECK32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP17]], align 1 +// CHECK32-NEXT: [[TMP18:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 5 +// CHECK32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP5]], i32 64 +// CHECK32-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP18]], <2 x i64>* [[TMP20]], align 1 +// CHECK32-NEXT: [[TMP21:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 6 +// CHECK32-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[TMP5]], i32 80 +// CHECK32-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP21]], <2 x i64>* [[TMP23]], align 1 +// CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: ret i32 [[TMP24]] +// unsigned int test_encodekey128_u32(unsigned int htype, __m128i key, void *h) { - //CHECK-LABEL: @test_encodekey128_u32 - //CHECK: call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %{{.*}}, <2 x i64> %{{.*}}) - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 - //CHECK: itcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 16 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 32 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 48 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 64 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 80 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_encodekey128_u32(htype, key, h); } +// CHECK64-LABEL: @test_encodekey256_u32( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__HTYPE_ADDR_I:%.*]] = alloca i32, align 4 +// CHECK64-NEXT: [[__KEY_LO_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__KEY_HI_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[HTYPE_ADDR:%.*]] = alloca i32, align 4 +// CHECK64-NEXT: [[KEY_LO_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[KEY_HI_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store i32 [[HTYPE:%.*]], i32* [[HTYPE_ADDR]], align 4 +// CHECK64-NEXT: store <2 x i64> [[KEY_LO:%.*]], <2 x i64>* [[KEY_LO_ADDR]], align 16 +// CHECK64-NEXT: store <2 x i64> [[KEY_HI:%.*]], <2 x i64>* [[KEY_HI_ADDR]], align 16 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load i32, i32* [[HTYPE_ADDR]], align 4 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_LO_ADDR]], align 16 +// CHECK64-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_HI_ADDR]], align 16 +// CHECK64-NEXT: [[TMP3:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store i32 [[TMP0]], i32* [[__HTYPE_ADDR_I]], align 4 +// CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__KEY_LO_ADDR_I]], align 16 +// CHECK64-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[__KEY_HI_ADDR_I]], align 16 +// CHECK64-NEXT: store i8* [[TMP3]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load i32, i32* [[__HTYPE_ADDR_I]], align 4 +// CHECK64-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_LO_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_HI_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP8:%.*]] = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 1 +// CHECK64-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP7]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 1 +// CHECK64-NEXT: [[TMP11:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 2 +// CHECK64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP7]], i32 16 +// CHECK64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP13]], align 1 +// CHECK64-NEXT: [[TMP14:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 3 +// CHECK64-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[TMP7]], i32 32 +// CHECK64-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 +// CHECK64-NEXT: [[TMP17:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 4 +// CHECK64-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP7]], i32 48 +// CHECK64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP17]], <2 x i64>* [[TMP19]], align 1 +// CHECK64-NEXT: [[TMP20:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 5 +// CHECK64-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[TMP7]], i32 64 +// CHECK64-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP20]], <2 x i64>* [[TMP22]], align 1 +// CHECK64-NEXT: [[TMP23:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 6 +// CHECK64-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[TMP7]], i32 80 +// CHECK64-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP23]], <2 x i64>* [[TMP25]], align 1 +// CHECK64-NEXT: [[TMP26:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 7 +// CHECK64-NEXT: [[TMP27:%.*]] = getelementptr i8, i8* [[TMP7]], i32 96 +// CHECK64-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to <2 x i64>* +// CHECK64-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[TMP28]], align 1 +// CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0 +// CHECK64-NEXT: ret i32 [[TMP29]] +// +// CHECK32-LABEL: @test_encodekey256_u32( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__HTYPE_ADDR_I:%.*]] = alloca i32, align 4 +// CHECK32-NEXT: [[__KEY_LO_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__KEY_HI_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[HTYPE_ADDR:%.*]] = alloca i32, align 4 +// CHECK32-NEXT: [[KEY_LO_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[KEY_HI_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store i32 [[HTYPE:%.*]], i32* [[HTYPE_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64> [[KEY_LO:%.*]], <2 x i64>* [[KEY_LO_ADDR]], align 16 +// CHECK32-NEXT: store <2 x i64> [[KEY_HI:%.*]], <2 x i64>* [[KEY_HI_ADDR]], align 16 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load i32, i32* [[HTYPE_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_LO_ADDR]], align 16 +// CHECK32-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[KEY_HI_ADDR]], align 16 +// CHECK32-NEXT: [[TMP3:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store i32 [[TMP0]], i32* [[__HTYPE_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__KEY_LO_ADDR_I]], align 16 +// CHECK32-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[__KEY_HI_ADDR_I]], align 16 +// CHECK32-NEXT: store i8* [[TMP3]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load i32, i32* [[__HTYPE_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_LO_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[__KEY_HI_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP8:%.*]] = call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 1 +// CHECK32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP7]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 1 +// CHECK32-NEXT: [[TMP11:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 2 +// CHECK32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP7]], i32 16 +// CHECK32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP13]], align 1 +// CHECK32-NEXT: [[TMP14:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 3 +// CHECK32-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[TMP7]], i32 32 +// CHECK32-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[TMP16]], align 1 +// CHECK32-NEXT: [[TMP17:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 4 +// CHECK32-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP7]], i32 48 +// CHECK32-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP17]], <2 x i64>* [[TMP19]], align 1 +// CHECK32-NEXT: [[TMP20:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 5 +// CHECK32-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[TMP7]], i32 64 +// CHECK32-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP20]], <2 x i64>* [[TMP22]], align 1 +// CHECK32-NEXT: [[TMP23:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 6 +// CHECK32-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[TMP7]], i32 80 +// CHECK32-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP23]], <2 x i64>* [[TMP25]], align 1 +// CHECK32-NEXT: [[TMP26:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 7 +// CHECK32-NEXT: [[TMP27:%.*]] = getelementptr i8, i8* [[TMP7]], i32 96 +// CHECK32-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to <2 x i64>* +// CHECK32-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[TMP28]], align 1 +// CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0 +// CHECK32-NEXT: ret i32 [[TMP29]] +// unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i key_hi, void *h) { - //CHECK-LABEL: @test_encodekey256_u32 - //CHECK: call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 - //CHECK: itcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 16 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 32 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 48 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 64 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 80 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 - //CHECK: getelementptr i8, i8* %{{.*}}, i32 96 - //CHECK: bitcast i8* %{{.*}} to <2 x i64>* - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_encodekey256_u32(htype, key_lo, key_hi, h); } +// CHECK64-LABEL: @test_mm_aesenc256kl_u8( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 +// CHECK64-NEXT: br i1 [[TMP8]], label [[AESENC256KL_NO_ERROR_I:%.*]], label [[AESENC256KL_ERROR_I:%.*]] +// CHECK64: aesenc256kl_no_error.i: +// CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: br label [[_MM_AESENC256KL_U8_EXIT:%.*]] +// CHECK64: aesenc256kl_error.i: +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: br label [[_MM_AESENC256KL_U8_EXIT]] +// CHECK64: _mm_aesenc256kl_u8.exit: +// CHECK64-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: ret i8 [[TMP10]] +// +// CHECK32-LABEL: @test_mm_aesenc256kl_u8( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 +// CHECK32-NEXT: br i1 [[TMP8]], label [[AESENC256KL_NO_ERROR_I:%.*]], label [[AESENC256KL_ERROR_I:%.*]] +// CHECK32: aesenc256kl_no_error.i: +// CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: br label [[_MM_AESENC256KL_U8_EXIT:%.*]] +// CHECK32: aesenc256kl_error.i: +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: br label [[_MM_AESENC256KL_U8_EXIT]] +// CHECK32: _mm_aesenc256kl_u8.exit: +// CHECK32-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: ret i8 [[TMP10]] +// unsigned char test_mm_aesenc256kl_u8(__m128i *odata, __m128i idata, const void *h) { - //CHECK-LABEL: @test_mm_aesenc256kl_u8 - //CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %{{.*}}, i8* %{{.*}}) - //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesenc256kl_u8(odata, idata, h); } +// CHECK64-LABEL: @test_mm_aesdec256kl_u8( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 +// CHECK64-NEXT: br i1 [[TMP8]], label [[AESDEC256KL_NO_ERROR_I:%.*]], label [[AESDEC256KL_ERROR_I:%.*]] +// CHECK64: aesdec256kl_no_error.i: +// CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: br label [[_MM_AESDEC256KL_U8_EXIT:%.*]] +// CHECK64: aesdec256kl_error.i: +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: br label [[_MM_AESDEC256KL_U8_EXIT]] +// CHECK64: _mm_aesdec256kl_u8.exit: +// CHECK64-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: ret i8 [[TMP10]] +// +// CHECK32-LABEL: @test_mm_aesdec256kl_u8( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 +// CHECK32-NEXT: br i1 [[TMP8]], label [[AESDEC256KL_NO_ERROR_I:%.*]], label [[AESDEC256KL_ERROR_I:%.*]] +// CHECK32: aesdec256kl_no_error.i: +// CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: br label [[_MM_AESDEC256KL_U8_EXIT:%.*]] +// CHECK32: aesdec256kl_error.i: +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: br label [[_MM_AESDEC256KL_U8_EXIT]] +// CHECK32: _mm_aesdec256kl_u8.exit: +// CHECK32-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: ret i8 [[TMP10]] +// unsigned char test_mm_aesdec256kl_u8(__m128i *odata, __m128i idata, const void *h) { - //CHECK-LABEL: @test_mm_aesdec256kl_u8 - //CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %{{.*}}, i8* %{{.*}}) - //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesdec256kl_u8(odata, idata, h); } +// CHECK64-LABEL: @test_mm_aesenc128kl_u8( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 +// CHECK64-NEXT: br i1 [[TMP8]], label [[AESENC128KL_NO_ERROR_I:%.*]], label [[AESENC128KL_ERROR_I:%.*]] +// CHECK64: aesenc128kl_no_error.i: +// CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: br label [[_MM_AESENC128KL_U8_EXIT:%.*]] +// CHECK64: aesenc128kl_error.i: +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: br label [[_MM_AESENC128KL_U8_EXIT]] +// CHECK64: _mm_aesenc128kl_u8.exit: +// CHECK64-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: ret i8 [[TMP10]] +// +// CHECK32-LABEL: @test_mm_aesenc128kl_u8( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 +// CHECK32-NEXT: br i1 [[TMP8]], label [[AESENC128KL_NO_ERROR_I:%.*]], label [[AESENC128KL_ERROR_I:%.*]] +// CHECK32: aesenc128kl_no_error.i: +// CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: br label [[_MM_AESENC128KL_U8_EXIT:%.*]] +// CHECK32: aesenc128kl_error.i: +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: br label [[_MM_AESENC128KL_U8_EXIT]] +// CHECK32: _mm_aesenc128kl_u8.exit: +// CHECK32-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: ret i8 [[TMP10]] +// unsigned char test_mm_aesenc128kl_u8(__m128i *odata, __m128i idata, const void *h) { - //CHECK-LABEL: @test_mm_aesenc128kl_u8 - //CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %{{.*}}, i8* %{{.*}}) - //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesenc128kl_u8(odata, idata, h); } +// CHECK64-LABEL: @test_mm_aesdec128kl_u8( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK64-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 +// CHECK64-NEXT: br i1 [[TMP8]], label [[AESDEC128KL_NO_ERROR_I:%.*]], label [[AESDEC128KL_ERROR_I:%.*]] +// CHECK64: aesdec128kl_no_error.i: +// CHECK64-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: br label [[_MM_AESDEC128KL_U8_EXIT:%.*]] +// CHECK64: aesdec128kl_error.i: +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: br label [[_MM_AESDEC128KL_U8_EXIT]] +// CHECK64: _mm_aesdec128kl_u8.exit: +// CHECK64-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: ret i8 [[TMP10]] +// +// CHECK32-LABEL: @test_mm_aesdec128kl_u8( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64> [[IDATA:%.*]], <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[IDATA_ADDR]], align 16 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64> [[TMP1]], <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[__IDATA_ADDR_I]], align 16 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> [[TMP4]], i8* [[TMP5]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP7:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: [[TMP8:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK32-NEXT: [[TMP9:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 1 +// CHECK32-NEXT: br i1 [[TMP8]], label [[AESDEC128KL_NO_ERROR_I:%.*]], label [[AESDEC128KL_ERROR_I:%.*]] +// CHECK32: aesdec128kl_no_error.i: +// CHECK32-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: br label [[_MM_AESDEC128KL_U8_EXIT:%.*]] +// CHECK32: aesdec128kl_error.i: +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: br label [[_MM_AESDEC128KL_U8_EXIT]] +// CHECK32: _mm_aesdec128kl_u8.exit: +// CHECK32-NEXT: [[TMP10:%.*]] = extractvalue { i8, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: ret i8 [[TMP10]] +// unsigned char test_mm_aesdec128kl_u8(__m128i *odata, __m128i idata, const void *h) { - //CHECK-LABEL: @test_mm_aesdec128kl_u8 - //CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %{{.*}}, i8* %{{.*}}) - //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesdec128kl_u8(odata, idata, h); } +// CHECK64-LABEL: @test__mm_aesencwide128kl_u8( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 8 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 8 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 +// CHECK64-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 +// CHECK64-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +// CHECK64-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 +// CHECK64-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 +// CHECK64-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 +// CHECK64-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 +// CHECK64-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 +// CHECK64-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 +// CHECK64-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 +// CHECK64-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 +// CHECK64-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 +// CHECK64-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 +// CHECK64-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 +// CHECK64-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 +// CHECK64-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK64-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK64-NEXT: br i1 [[TMP23]], label [[AESENCWIDE128KL_NO_ERROR_I:%.*]], label [[AESENCWIDE128KL_ERROR_I:%.*]] +// CHECK64: aesencwide128kl_no_error.i: +// CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK64-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK64-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK64-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 +// CHECK64-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK64-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK64-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 +// CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK64-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK64-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 +// CHECK64-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK64-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK64-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 +// CHECK64-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK64-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK64-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 +// CHECK64-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK64-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK64-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 +// CHECK64-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK64-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK64-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 +// CHECK64-NEXT: br label [[_MM_AESENCWIDE128KL_U8_EXIT:%.*]] +// CHECK64: aesencwide128kl_error.i: +// CHECK64-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK64-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 +// CHECK64-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK64-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 +// CHECK64-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK64-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 +// CHECK64-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK64-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 +// CHECK64-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK64-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 +// CHECK64-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK64-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 +// CHECK64-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK64-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 +// CHECK64-NEXT: br label [[_MM_AESENCWIDE128KL_U8_EXIT]] +// CHECK64: _mm_aesencwide128kl_u8.exit: +// CHECK64-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK64-NEXT: ret i8 [[TMP54]] +// +// CHECK32-LABEL: @test__mm_aesencwide128kl_u8( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 4 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 4 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 +// CHECK32-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 +// CHECK32-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +// CHECK32-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 +// CHECK32-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 +// CHECK32-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 +// CHECK32-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 +// CHECK32-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 +// CHECK32-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 +// CHECK32-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 +// CHECK32-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 +// CHECK32-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 +// CHECK32-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 +// CHECK32-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 +// CHECK32-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 +// CHECK32-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK32-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK32-NEXT: br i1 [[TMP23]], label [[AESENCWIDE128KL_NO_ERROR_I:%.*]], label [[AESENCWIDE128KL_ERROR_I:%.*]] +// CHECK32: aesencwide128kl_no_error.i: +// CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK32-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK32-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK32-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 +// CHECK32-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK32-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK32-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 +// CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK32-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK32-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 +// CHECK32-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK32-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK32-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 +// CHECK32-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK32-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK32-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 +// CHECK32-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK32-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK32-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 +// CHECK32-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK32-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK32-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 +// CHECK32-NEXT: br label [[_MM_AESENCWIDE128KL_U8_EXIT:%.*]] +// CHECK32: aesencwide128kl_error.i: +// CHECK32-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK32-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 +// CHECK32-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK32-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 +// CHECK32-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK32-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 +// CHECK32-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK32-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 +// CHECK32-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK32-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 +// CHECK32-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK32-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 +// CHECK32-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK32-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 +// CHECK32-NEXT: br label [[_MM_AESENCWIDE128KL_U8_EXIT]] +// CHECK32: _mm_aesencwide128kl_u8.exit: +// CHECK32-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK32-NEXT: ret i8 [[TMP54]] +// unsigned char test__mm_aesencwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { - //CHECK-LABEL: @test__mm_aesencwide128kl - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesencwide128kl_u8(odata, idata, h); } +// CHECK64-LABEL: @test__mm_aesdecwide128kl_u8( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 8 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 8 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 +// CHECK64-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 +// CHECK64-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +// CHECK64-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 +// CHECK64-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 +// CHECK64-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 +// CHECK64-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 +// CHECK64-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 +// CHECK64-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 +// CHECK64-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 +// CHECK64-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 +// CHECK64-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 +// CHECK64-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 +// CHECK64-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 +// CHECK64-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 +// CHECK64-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK64-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK64-NEXT: br i1 [[TMP23]], label [[AESDECWIDE128KL_NO_ERROR_I:%.*]], label [[AESDECWIDE128KL_ERROR_I:%.*]] +// CHECK64: aesdecwide128kl_no_error.i: +// CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK64-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK64-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK64-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 +// CHECK64-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK64-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK64-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 +// CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK64-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK64-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 +// CHECK64-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK64-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK64-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 +// CHECK64-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK64-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK64-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 +// CHECK64-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK64-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK64-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 +// CHECK64-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK64-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK64-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 +// CHECK64-NEXT: br label [[_MM_AESDECWIDE128KL_U8_EXIT:%.*]] +// CHECK64: aesdecwide128kl_error.i: +// CHECK64-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK64-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 +// CHECK64-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK64-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 +// CHECK64-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK64-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 +// CHECK64-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK64-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 +// CHECK64-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK64-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 +// CHECK64-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK64-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 +// CHECK64-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK64-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 +// CHECK64-NEXT: br label [[_MM_AESDECWIDE128KL_U8_EXIT]] +// CHECK64: _mm_aesdecwide128kl_u8.exit: +// CHECK64-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK64-NEXT: ret i8 [[TMP54]] +// +// CHECK32-LABEL: @test__mm_aesdecwide128kl_u8( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 4 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 4 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 +// CHECK32-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 +// CHECK32-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +// CHECK32-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 +// CHECK32-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 +// CHECK32-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 +// CHECK32-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 +// CHECK32-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 +// CHECK32-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 +// CHECK32-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 +// CHECK32-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 +// CHECK32-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 +// CHECK32-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 +// CHECK32-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 +// CHECK32-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 +// CHECK32-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK32-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK32-NEXT: br i1 [[TMP23]], label [[AESDECWIDE128KL_NO_ERROR_I:%.*]], label [[AESDECWIDE128KL_ERROR_I:%.*]] +// CHECK32: aesdecwide128kl_no_error.i: +// CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK32-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK32-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK32-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 +// CHECK32-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK32-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK32-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 +// CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK32-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK32-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 +// CHECK32-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK32-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK32-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 +// CHECK32-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK32-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK32-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 +// CHECK32-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK32-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK32-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 +// CHECK32-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK32-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK32-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 +// CHECK32-NEXT: br label [[_MM_AESDECWIDE128KL_U8_EXIT:%.*]] +// CHECK32: aesdecwide128kl_error.i: +// CHECK32-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK32-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 +// CHECK32-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK32-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 +// CHECK32-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK32-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 +// CHECK32-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK32-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 +// CHECK32-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK32-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 +// CHECK32-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK32-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 +// CHECK32-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK32-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 +// CHECK32-NEXT: br label [[_MM_AESDECWIDE128KL_U8_EXIT]] +// CHECK32: _mm_aesdecwide128kl_u8.exit: +// CHECK32-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK32-NEXT: ret i8 [[TMP54]] +// unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { - //CHECK-LABEL: @test__mm_aesdecwide128kl - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesdecwide128kl_u8(odata, idata, h); } +// CHECK64-LABEL: @test__mm_aesencwide256kl_u8( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 8 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 8 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 +// CHECK64-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 +// CHECK64-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +// CHECK64-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 +// CHECK64-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 +// CHECK64-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 +// CHECK64-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 +// CHECK64-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 +// CHECK64-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 +// CHECK64-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 +// CHECK64-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 +// CHECK64-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 +// CHECK64-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 +// CHECK64-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 +// CHECK64-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 +// CHECK64-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK64-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK64-NEXT: br i1 [[TMP23]], label [[AESENCWIDE256KL_NO_ERROR_I:%.*]], label [[AESENCWIDE256KL_ERROR_I:%.*]] +// CHECK64: aesencwide256kl_no_error.i: +// CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK64-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK64-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK64-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 +// CHECK64-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK64-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK64-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 +// CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK64-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK64-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 +// CHECK64-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK64-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK64-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 +// CHECK64-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK64-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK64-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 +// CHECK64-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK64-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK64-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 +// CHECK64-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK64-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK64-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 +// CHECK64-NEXT: br label [[_MM_AESENCWIDE256KL_U8_EXIT:%.*]] +// CHECK64: aesencwide256kl_error.i: +// CHECK64-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK64-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 +// CHECK64-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK64-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 +// CHECK64-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK64-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 +// CHECK64-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK64-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 +// CHECK64-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK64-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 +// CHECK64-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK64-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 +// CHECK64-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK64-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 +// CHECK64-NEXT: br label [[_MM_AESENCWIDE256KL_U8_EXIT]] +// CHECK64: _mm_aesencwide256kl_u8.exit: +// CHECK64-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK64-NEXT: ret i8 [[TMP54]] +// +// CHECK32-LABEL: @test__mm_aesencwide256kl_u8( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 4 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 4 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 +// CHECK32-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 +// CHECK32-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +// CHECK32-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 +// CHECK32-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 +// CHECK32-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 +// CHECK32-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 +// CHECK32-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 +// CHECK32-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 +// CHECK32-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 +// CHECK32-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 +// CHECK32-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 +// CHECK32-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 +// CHECK32-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 +// CHECK32-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 +// CHECK32-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK32-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK32-NEXT: br i1 [[TMP23]], label [[AESENCWIDE256KL_NO_ERROR_I:%.*]], label [[AESENCWIDE256KL_ERROR_I:%.*]] +// CHECK32: aesencwide256kl_no_error.i: +// CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK32-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK32-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK32-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 +// CHECK32-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK32-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK32-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 +// CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK32-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK32-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 +// CHECK32-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK32-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK32-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 +// CHECK32-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK32-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK32-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 +// CHECK32-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK32-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK32-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 +// CHECK32-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK32-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK32-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 +// CHECK32-NEXT: br label [[_MM_AESENCWIDE256KL_U8_EXIT:%.*]] +// CHECK32: aesencwide256kl_error.i: +// CHECK32-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK32-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 +// CHECK32-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK32-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 +// CHECK32-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK32-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 +// CHECK32-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK32-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 +// CHECK32-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK32-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 +// CHECK32-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK32-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 +// CHECK32-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK32-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 +// CHECK32-NEXT: br label [[_MM_AESENCWIDE256KL_U8_EXIT]] +// CHECK32: _mm_aesencwide256kl_u8.exit: +// CHECK32-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK32-NEXT: ret i8 [[TMP54]] +// unsigned char test__mm_aesencwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { - //CHECK-LABEL: @test__mm_aesencwide256kl - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesencwide256kl_u8(odata, idata, h); } +// CHECK64-LABEL: @test__mm_aesdecwide256kl_u8( +// CHECK64-NEXT: entry: +// CHECK64-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 8 +// CHECK64-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 8 +// CHECK64-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 8 +// CHECK64-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 8 +// CHECK64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 8 +// CHECK64-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 8 +// CHECK64-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 +// CHECK64-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 +// CHECK64-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +// CHECK64-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 +// CHECK64-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 +// CHECK64-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 +// CHECK64-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 +// CHECK64-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 +// CHECK64-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 +// CHECK64-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 +// CHECK64-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 +// CHECK64-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 +// CHECK64-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 +// CHECK64-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 +// CHECK64-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 +// CHECK64-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] +// CHECK64-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK64-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK64-NEXT: br i1 [[TMP23]], label [[AESDECWIDE256KL_NO_ERROR_I:%.*]], label [[AESDECWIDE256KL_ERROR_I:%.*]] +// CHECK64: aesdecwide256kl_no_error.i: +// CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK64-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK64-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK64-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 +// CHECK64-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK64-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK64-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 +// CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK64-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK64-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 +// CHECK64-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK64-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK64-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 +// CHECK64-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK64-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK64-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 +// CHECK64-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK64-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK64-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 +// CHECK64-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK64-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK64-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 +// CHECK64-NEXT: br label [[_MM_AESDECWIDE256KL_U8_EXIT:%.*]] +// CHECK64: aesdecwide256kl_error.i: +// CHECK64-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK64-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK64-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 +// CHECK64-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK64-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 +// CHECK64-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK64-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 +// CHECK64-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK64-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 +// CHECK64-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK64-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 +// CHECK64-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK64-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 +// CHECK64-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK64-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK64-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 +// CHECK64-NEXT: br label [[_MM_AESDECWIDE256KL_U8_EXIT]] +// CHECK64: _mm_aesdecwide256kl_u8.exit: +// CHECK64-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK64-NEXT: ret i8 [[TMP54]] +// +// CHECK32-LABEL: @test__mm_aesdecwide256kl_u8( +// CHECK32-NEXT: entry: +// CHECK32-NEXT: [[__ODATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__IDATA_ADDR_I:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[__H_ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: [[ODATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[IDATA_ADDR:%.*]] = alloca <2 x i64>*, align 4 +// CHECK32-NEXT: [[H_ADDR:%.*]] = alloca i8*, align 4 +// CHECK32-NEXT: store <2 x i64>* [[ODATA:%.*]], <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[IDATA:%.*]], <2 x i64>** [[IDATA_ADDR]], align 4 +// CHECK32-NEXT: store i8* [[H:%.*]], i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: [[TMP0:%.*]] = load <2 x i64>*, <2 x i64>** [[ODATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP1:%.*]] = load <2 x i64>*, <2 x i64>** [[IDATA_ADDR]], align 4 +// CHECK32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[H_ADDR]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP0]], <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: store <2 x i64>* [[TMP1]], <2 x i64>** [[__IDATA_ADDR_I]], align 4 +// CHECK32-NEXT: store i8* [[TMP2]], i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP3:%.*]] = load <2 x i64>*, <2 x i64>** [[__ODATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP4:%.*]] = load <2 x i64>*, <2 x i64>** [[__IDATA_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP5:%.*]] = load i8*, i8** [[__H_ADDR_I]], align 4 +// CHECK32-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP4]], align 16 +// CHECK32-NEXT: [[TMP7:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 1 +// CHECK32-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 16 +// CHECK32-NEXT: [[TMP9:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 2 +// CHECK32-NEXT: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 16 +// CHECK32-NEXT: [[TMP11:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 3 +// CHECK32-NEXT: [[TMP12:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 16 +// CHECK32-NEXT: [[TMP13:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 4 +// CHECK32-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 16 +// CHECK32-NEXT: [[TMP15:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 5 +// CHECK32-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 16 +// CHECK32-NEXT: [[TMP17:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 6 +// CHECK32-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 16 +// CHECK32-NEXT: [[TMP19:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP4]], i32 7 +// CHECK32-NEXT: [[TMP20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 16 +// CHECK32-NEXT: [[TMP21:%.*]] = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* [[TMP5]], <2 x i64> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <2 x i64> [[TMP16]], <2 x i64> [[TMP18]], <2 x i64> [[TMP20]]) #[[ATTR1]] +// CHECK32-NEXT: [[TMP22:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK32-NEXT: [[TMP23:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK32-NEXT: br i1 [[TMP23]], label [[AESDECWIDE256KL_NO_ERROR_I:%.*]], label [[AESDECWIDE256KL_ERROR_I:%.*]] +// CHECK32: aesdecwide256kl_no_error.i: +// CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK32-NEXT: store <2 x i64> [[TMP24]], <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: [[TMP25:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK32-NEXT: [[TMP26:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK32-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[TMP26]], align 16 +// CHECK32-NEXT: [[TMP27:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK32-NEXT: [[TMP28:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK32-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[TMP28]], align 16 +// CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK32-NEXT: [[TMP30:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK32-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[TMP30]], align 16 +// CHECK32-NEXT: [[TMP31:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK32-NEXT: [[TMP32:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK32-NEXT: store <2 x i64> [[TMP31]], <2 x i64>* [[TMP32]], align 16 +// CHECK32-NEXT: [[TMP33:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK32-NEXT: [[TMP34:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK32-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[TMP34]], align 16 +// CHECK32-NEXT: [[TMP35:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK32-NEXT: [[TMP36:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK32-NEXT: store <2 x i64> [[TMP35]], <2 x i64>* [[TMP36]], align 16 +// CHECK32-NEXT: [[TMP37:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK32-NEXT: [[TMP38:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK32-NEXT: store <2 x i64> [[TMP37]], <2 x i64>* [[TMP38]], align 16 +// CHECK32-NEXT: br label [[_MM_AESDECWIDE256KL_U8_EXIT:%.*]] +// CHECK32: aesdecwide256kl_error.i: +// CHECK32-NEXT: [[TMP39:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 1 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP3]], align 16 +// CHECK32-NEXT: [[TMP40:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 2 +// CHECK32-NEXT: [[TMP41:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 1 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP41]], align 16 +// CHECK32-NEXT: [[TMP42:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 3 +// CHECK32-NEXT: [[TMP43:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 2 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP43]], align 16 +// CHECK32-NEXT: [[TMP44:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 4 +// CHECK32-NEXT: [[TMP45:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 3 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP45]], align 16 +// CHECK32-NEXT: [[TMP46:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 5 +// CHECK32-NEXT: [[TMP47:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 4 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP47]], align 16 +// CHECK32-NEXT: [[TMP48:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 6 +// CHECK32-NEXT: [[TMP49:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 5 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP49]], align 16 +// CHECK32-NEXT: [[TMP50:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 7 +// CHECK32-NEXT: [[TMP51:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 6 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP51]], align 16 +// CHECK32-NEXT: [[TMP52:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 8 +// CHECK32-NEXT: [[TMP53:%.*]] = getelementptr <2 x i64>, <2 x i64>* [[TMP3]], i32 7 +// CHECK32-NEXT: store <2 x i64> zeroinitializer, <2 x i64>* [[TMP53]], align 16 +// CHECK32-NEXT: br label [[_MM_AESDECWIDE256KL_U8_EXIT]] +// CHECK32: _mm_aesdecwide256kl_u8.exit: +// CHECK32-NEXT: [[TMP54:%.*]] = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], 0 +// CHECK32-NEXT: ret i8 [[TMP54]] +// unsigned char test__mm_aesdecwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { - //CHECK-LABEL: @test__mm_aesdecwide256kl - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 - //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 - //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 - //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 - //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesdecwide256kl_u8(odata, idata, h); } From c94cf97b53566a26245c54ea0c41b0dc83daf8a0 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Mon, 28 Jun 2021 22:50:39 -0700 Subject: [PATCH 136/619] [llvm-objcopy][MachO] Minor code cleanup Remove unnecessary template in MachOReader.cpp. NFC. --- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index 050dd976f3f35..4b0aeb9702679 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -28,7 +28,7 @@ void MachOReader::readHeader(Object &O) const { } template -static Section constructSectionCommon(SectionType Sec, uint32_t Index) { +static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); Section S(SegName, SectName); @@ -46,14 +46,11 @@ static Section constructSectionCommon(SectionType Sec, uint32_t Index) { return S; } -template -Section constructSection(SectionType Sec, uint32_t Index); - -template <> Section constructSection(MachO::section Sec, uint32_t Index) { +static Section constructSection(const MachO::section &Sec, uint32_t Index) { return constructSectionCommon(Sec, Index); } -template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) { +static Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { Section S = constructSectionCommon(Sec, Index); S.Reserved3 = Sec.reserved3; return S; From e1b8fde1cbfc031d67d9b316f1b4ac3ab9c8801f Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 28 Jun 2021 22:55:55 -0700 Subject: [PATCH 137/619] Revert "[Clang] Add option to handle behaviour of vector bool/vector pixel." This reverts commit c3fe847f9d90de5a6a76fd1d5f5823ab4719accc. Tests fail in non-asserts builds because they assume named IR, by the looks of it (testing for the "entry" label, for instance). I don't know enough about the update_cc_test_checks.py stuff to know how to manually fix these tests, so reverting for now. --- .../clang/Basic/DiagnosticSemaKinds.td | 6 - clang/include/clang/Basic/LangOptions.def | 2 - clang/include/clang/Basic/LangOptions.h | 12 -- clang/include/clang/Driver/Options.td | 12 -- clang/lib/Driver/ToolChains/Clang.cpp | 1 - clang/lib/Sema/SemaExpr.cpp | 29 +-- .../vector-compat-pixel-bool-ternary.c | 104 ---------- clang/test/CodeGen/vector-compat-pixel-bool.c | 94 --------- clang/test/CodeGen/vector-compat-ternary.c | 180 ------------------ clang/test/CodeGen/vector-compat.c | 162 ---------------- 10 files changed, 5 insertions(+), 597 deletions(-) delete mode 100644 clang/test/CodeGen/vector-compat-pixel-bool-ternary.c delete mode 100644 clang/test/CodeGen/vector-compat-pixel-bool.c delete mode 100644 clang/test/CodeGen/vector-compat-ternary.c delete mode 100644 clang/test/CodeGen/vector-compat.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 70a22fd2506a3..b5b8bc6aa3c57 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7441,12 +7441,6 @@ def warn_deprecated_volatile_structured_binding : Warning< "volatile qualifier in structured binding declaration is deprecated">, InGroup; -def warn_deprecated_altivec_src_compat : Warning< - "Current handling of vector bool and vector pixel types in this context are " - "deprecated. The default behaviour will soon change to that implied by the " - "'-altivec-compat=xl' option">, - InGroup>; - def err_catch_incomplete_ptr : Error< "cannot catch pointer to incomplete type %0">; def err_catch_incomplete_ref : Error< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 465bad8d7d112..b6d9160f89a00 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -126,8 +126,6 @@ LANGOPT(WritableStrings , 1, 0, "writable string support") LANGOPT(ConstStrings , 1, 0, "const-qualified string support") ENUM_LANGOPT(LaxVectorConversions, LaxVectorConversionKind, 2, LaxVectorConversionKind::All, "lax vector conversions") -ENUM_LANGOPT(AltivecSrcCompat, AltivecSrcCompatKind, 2, - AltivecSrcCompatKind::Default, "Altivec source compatibility") LANGOPT(ConvergentFunctions, 1, 1, "Assume convergent functions") LANGOPT(AltiVec , 1, 0, "AltiVec-style vector initializers") LANGOPT(ZVector , 1, 0, "System z vector extensions") diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index d04ce52a550ef..d618daf3d23c2 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -244,18 +244,6 @@ class LangOptions : public LangOptionsBase { All, }; - enum class AltivecSrcCompatKind { - // All vector compares produce scalars except vector pixel and vector bool. - // The types vector pixel and vector bool return vector results. - Mixed, - // All vector compares produce vector results as in GCC. - GCC, - // All vector compares produce scalars as in XL. - XL, - // Default clang behaviour. - Default = Mixed, - }; - enum class SignReturnAddressScopeKind { /// No signing for any function. None, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f1455f5461990..0122afd2eeada 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3823,18 +3823,6 @@ def u : JoinedOrSeparate<["-"], "u">, Group; def v : Flag<["-"], "v">, Flags<[CC1Option, CoreOption]>, HelpText<"Show commands to run and use verbose output">, MarshallingInfoFlag>; -def altivec_src_compat : Joined<["-"], "faltivec-src-compat=">, - Flags<[CC1Option]>, Group, - HelpText<"Source-level compatibility for Altivec vectors (for PowerPC " - "targets). This includes results of vector comparison (scalar for " - "'xl', vector for 'gcc') as well as behavior when initializing with " - "a scalar (splatting for 'xl', element zero only for 'gcc'). For " - "'mixed', the compatibility is as 'gcc' for 'vector bool/vector " - "pixel' and as 'xl' for other types. Current default is 'mixed'.">, - Values<"mixed,gcc,xl">, - NormalizedValuesScope<"LangOptions::AltivecSrcCompatKind">, - NormalizedValues<["Mixed", "GCC", "XL"]>, - MarshallingInfoEnum, "Mixed">; def verify_debug_info : Flag<["--"], "verify-debug-info">, Flags<[NoXarchOption]>, HelpText<"Verify the binary representation of debug output">; def weak_l : Joined<["-"], "weak-l">, Flags<[LinkerInput]>; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c265e1c4e53cb..a3f0ec577379e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5816,7 +5816,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType))) CmdArgs.push_back("-fapple-kext"); - Args.AddLastArg(CmdArgs, options::OPT_altivec_src_compat); Args.AddLastArg(CmdArgs, options::OPT_flax_vector_conversions_EQ); Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 6031dff673351..728d7b61d4a86 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -12224,30 +12224,11 @@ QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, QualType LHSType = LHS.get()->getType(); - // Determine the return type of a vector compare. By default clang will return - // a scalar for all vector compares except vector bool and vector pixel. - // With the gcc compiler we will always return a vector type and with the xl - // compiler we will always return a scalar type. This switch allows choosing - // which behavior is prefered. - if (getLangOpts().AltiVec) { - switch (getLangOpts().getAltivecSrcCompat()) { - case LangOptions::AltivecSrcCompatKind::Mixed: - // If AltiVec, the comparison results in a numeric type, i.e. - // bool for C++, int for C - if (vType->castAs()->getVectorKind() == - VectorType::AltiVecVector) - return Context.getLogicalOperationType(); - else - Diag(Loc, diag::warn_deprecated_altivec_src_compat); - break; - case LangOptions::AltivecSrcCompatKind::GCC: - // For GCC we always return the vector type. - break; - case LangOptions::AltivecSrcCompatKind::XL: - return Context.getLogicalOperationType(); - break; - } - } + // If AltiVec, the comparison results in a numeric type, i.e. + // bool for C++, int for C + if (getLangOpts().AltiVec && + vType->castAs()->getVectorKind() == VectorType::AltiVecVector) + return Context.getLogicalOperationType(); // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and diff --git a/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c b/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c deleted file mode 100644 index 20da809602126..0000000000000 --- a/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c +++ /dev/null @@ -1,104 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR -// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1| FileCheck %s --check-prefix=ERROR -// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s - -// CHECK-LABEL: @bi8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required -int bi8(vector bool char a, vector bool char b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @bi16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required -int bi16(vector bool short a, vector bool short b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @bi32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required -int bi32(vector bool int a, vector bool int b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @bi64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required -int bi64(vector bool long long a, vector bool long long b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @VecPixel( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required -int VecPixel(vector pixel a, vector pixel b) { - return a == b ? 3 : 7; -} diff --git a/clang/test/CodeGen/vector-compat-pixel-bool.c b/clang/test/CodeGen/vector-compat-pixel-bool.c deleted file mode 100644 index 292431f5b07f6..0000000000000 --- a/clang/test/CodeGen/vector-compat-pixel-bool.c +++ /dev/null @@ -1,94 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR -// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s - -// CHECK-LABEL: @bi8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> -// CHECK-NEXT: ret <16 x i8> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector unsigned char bi8(vector bool char a, vector bool char b) { - return a == b; -} - -// CHECK-LABEL: @bi16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector bool short bi16(vector bool short a, vector bool short b) { - return a == b; -} - -// CHECK-LABEL: @bi32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector bool int bi32(vector bool int a, vector bool int b) { - return a == b; -} - -// CHECK-LABEL: @bi64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector long long bi64(vector bool long long a, vector bool long long b) { - return a == b; -} - -// CHECK-LABEL: @VecPixel( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector pixel VecPixel(vector pixel a, vector pixel b) { - return a == b; -} diff --git a/clang/test/CodeGen/vector-compat-ternary.c b/clang/test/CodeGen/vector-compat-ternary.c deleted file mode 100644 index 9a7d9d9585131..0000000000000 --- a/clang/test/CodeGen/vector-compat-ternary.c +++ /dev/null @@ -1,180 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR -// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s - -// CHECK-LABEL: @ui8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required -int ui8(vector unsigned char a, vector unsigned char b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @si8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required -int si8(vector signed char a, vector signed char b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @ui16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required -int ui16(vector unsigned short a, vector unsigned short b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @si16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required -int si16(vector signed short a, vector signed short b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @ui32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required -int ui32(vector unsigned int a, vector unsigned int b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @si32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required -int si32(vector signed int a, vector signed int b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @si64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required -int si64(vector long long a, vector long long b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A:%.*]], <4 x float>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <4 x float> [[B:%.*]], <4 x float>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpeqfp.p(i32 2, <4 x float> [[TMP0]], <4 x float> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required -int f32(vector float a, vector float b) { - return a == b ? 3 : 7; -} - -// CHECK-LABEL: @f64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A:%.*]], <2 x double>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <2 x double> [[B:%.*]], <2 x double>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32 2, <2 x double> [[TMP0]], <2 x double> [[TMP1]]) -// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 -// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 -// CHECK-NEXT: ret i32 [[COND]] -// -// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required -int f64(vector double a, vector double b) { - return a == b ? 3 : 7; -} diff --git a/clang/test/CodeGen/vector-compat.c b/clang/test/CodeGen/vector-compat.c deleted file mode 100644 index 5f9f8d1db3570..0000000000000 --- a/clang/test/CodeGen/vector-compat.c +++ /dev/null @@ -1,162 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR -// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ -// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR -// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s -// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s - -// CHECK-LABEL: @ui8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> -// CHECK-NEXT: ret <16 x i8> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector unsigned char ui8(vector unsigned char a, vector unsigned char b) { - return a == b; -} - -// CHECK-LABEL: @si8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 -// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> -// CHECK-NEXT: ret <16 x i8> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector signed char si8(vector signed char a, vector signed char b) { - return a == b; -} - -// CHECK-LABEL: @ui16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector unsigned short ui16(vector unsigned short a, vector unsigned short b) { - return a == b; -} - -// CHECK-LABEL: @si16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 -// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> -// CHECK-NEXT: ret <8 x i16> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector signed short si16(vector signed short a, vector signed short b) { - return a == b; -} - -// CHECK-LABEL: @ui32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector unsigned int ui32(vector unsigned int a, vector unsigned int b) { - return a == b; -} - -// CHECK-LABEL: @si32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 -// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector signed int si32(vector signed int a, vector signed int b) { - return a == b; -} - -// CHECK-LABEL: @si64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 -// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector long long si64(vector long long a, vector long long b) { - return a == b; -} - -// CHECK-LABEL: @f32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[A:%.*]], <4 x float>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <4 x float> [[B:%.*]], <4 x float>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector int f32(vector float a, vector float b) { - return a == b; -} - -// CHECK-LABEL: @f64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 -// CHECK-NEXT: store <2 x double> [[A:%.*]], <2 x double>* [[A_ADDR]], align 16 -// CHECK-NEXT: store <2 x double> [[B:%.*]], <2 x double>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[B_ADDR]], align 16 -// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> -// CHECK-NEXT: ret <2 x i64> [[SEXT]] -// -// ERROR: returning 'int' from a function with incompatible result type -vector long long f64(vector double a, vector double b) { - return a == b; -} From 487f74a6c4151d13d3a7b54ee4ab7beaf3e87487 Mon Sep 17 00:00:00 2001 From: Siva Chandra Date: Mon, 28 Jun 2021 22:24:00 -0700 Subject: [PATCH 138/619] [libc][Obvious] Fix typo in implementation of aarch64 clearExcept. Instead of reading and updating the status word, control word was being updated. --- libc/utils/FPUtil/aarch64/FEnv.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libc/utils/FPUtil/aarch64/FEnv.h b/libc/utils/FPUtil/aarch64/FEnv.h index 327ce07265682..93af9a219d036 100644 --- a/libc/utils/FPUtil/aarch64/FEnv.h +++ b/libc/utils/FPUtil/aarch64/FEnv.h @@ -90,10 +90,10 @@ static inline int disableExcept(int excepts) { } static inline int clearExcept(int excepts) { - uint32_t controlWord = FEnv::getControlWord(); + uint32_t statusWord = FEnv::getStatusWord(); uint32_t toClear = FEnv::getStatusValueForExcept(excepts); - controlWord &= ~(toClear << FEnv::ExceptionStatusFlagsBitPosition); - FEnv::writeStatusWord(controlWord); + statusWord &= ~(toClear << FEnv::ExceptionStatusFlagsBitPosition); + FEnv::writeStatusWord(statusWord); return 0; } From d15663710c91204e5987279c517a10f653efa2d9 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Tue, 29 Jun 2021 08:55:25 +0200 Subject: [PATCH 139/619] Revert "[mlir] Skip scalar operands when tiling to linalg.tiled_loop." This reverts commit 69046b4a79e2670053362112aa467f89faf9e53e. It did not really break anything, but it was decided to allow scalars and other non-shaped operands for tiled_loop. --- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 16 +++------------- mlir/test/Dialect/Linalg/tile-tensors.mlir | 14 -------------- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index c82e7eb10df21..9d7286c08dedf 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -284,19 +284,6 @@ void GenerateLoopNest::doit( SmallVector lbs, ubs, steps; unpackRanges(loopRanges, lbs, ubs, steps); - auto dropNonShapedValues = - [](ArrayRef operands) -> SmallVector { - SmallVector filteredOperands; - for (OpOperand *operand : operands) { - Type type = operand->get().getType(); - if (type.isa()) - filteredOperands.push_back(operand->get()); - } - return filteredOperands; - }; - auto inputOperands = dropNonShapedValues(linalgOp.getInputOperands()); - auto outputOperands = dropNonShapedValues(linalgOp.getOutputOperands()); - auto wrappedBuilderFn = [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange ivs, ValueRange inputs, ValueRange outputs) { @@ -305,6 +292,9 @@ void GenerateLoopNest::doit( bodyBuilderFn(nestedBuilder, nestedLoc, ivs, outputTensors); nestedBuilder.create(nestedLoc, results); }; + + SmallVector inputOperands = linalgOp.getInputOperands(); + SmallVector outputOperands = linalgOp.getOutputOperands(); auto tiledLoop = b.create(loc, lbs, ubs, steps, inputOperands, outputOperands, b.getArrayAttr(iteratorTypes), wrappedBuilderFn); diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir index 63bddb5a16055..f446d9da9179d 100644 --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -130,17 +130,3 @@ func @generic_op_tensors( // TLOOP-SAME: ins (%{{.*}} = %[[ARG_0]]: [[TY]], %{{.*}} = %[[ARG_1]]: [[TY]]) // TLOOP-SAME: outs (%{{.*}} = %[[INIT]]: [[TY]]) // TLOOP-SAME: distribution["block_x", "block_y", "none"] { - - -func @fill(%arg0 : tensor) -> tensor { - %c0 = constant 0.0 : f32 - %0 = linalg.fill(%c0, %arg0) : f32, tensor -> tensor - return %0 : tensor -} -// CHECK-LABEL: func @fill - -// TLOOP-LABEL: func @fill -// TLOOP-NOT: ins -// TLOOP: tensor.extract_slice -// TLOOP-NEXT: linalg.fill -// TLOOP-NEXT: tensor.insert_slice From a2a4bc561ddf61bd5104674072c79fede3380ab1 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Tue, 29 Jun 2021 06:54:39 +0000 Subject: [PATCH 140/619] [mlir][linalg] All StructuredOp parameters are inputs or outputs. Adapt the StructuredOp verifier to ensure all operands are either in the input or the output group. The change is possible after adding support for scalar input operands (https://reviews.llvm.org/D104220). Differential Revision: https://reviews.llvm.org/D104783 --- .../Dialect/Linalg/IR/LinalgInterfaces.td | 24 +++---------------- .../Dialect/Linalg/IR/LinalgInterfaces.cpp | 11 +++++---- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 3 --- .../Dialect/Linalg/Transforms/Bufferize.cpp | 2 -- .../Transforms/ComprehensiveBufferize.cpp | 2 -- mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp | 4 ---- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 2 -- .../Dialect/Linalg/Transforms/Transforms.cpp | 2 -- 8 files changed, 9 insertions(+), 41 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td index ad91e23607141..e1f096d194b2a 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td @@ -253,7 +253,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /*args=*/(ins), /*methodBody=*/"", /*defaultImplementation=*/[{ - return getNumInputs() + getNumOutputs(); + return this->getOperation()->getNumOperands(); }] >, //===------------------------------------------------------------------===// @@ -346,8 +346,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { result.reserve(numOutputs); llvm::transform( this->getOperation()->getOpOperands() - .drop_front(getNumInputs()) - .take_front(numOutputs), + .take_back(numOutputs), std::back_inserter(result), [](OpOperand &opOperand) { return &opOperand; }); return result; @@ -458,8 +457,7 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { OpOperandVector result; result.reserve(numInputsAndOutputs); llvm::transform( - this->getOperation()->getOpOperands() - .take_front(numInputsAndOutputs), + this->getOperation()->getOpOperands(), std::back_inserter(result), [](OpOperand &opOperand) { return &opOperand; }); return result; @@ -928,22 +926,6 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { /// `createFlatListOfOperandStaticDims`. SmallVector computeStaticLoopSizes(); - /// Returns all the operands past the inputs, output_buffers and - /// init_tensors operands. Asserts that these operands are value types to - /// allow transformations like tiling to just use the values when cloning - /// `linalgOp`. - Operation::operand_range getAssumedNonShapedOperands() { - Operation::operand_range res{ - getOperation()->getOperands().begin() + getNumInputsAndOutputs(), - getOperation()->getOperands().end()}; - for (Type t : TypeRange{res}) { - (void)t; - assert((t.isSignlessIntOrIndexOrFloat() || t.template isa()) - &&"expected scalar or vector type"); - } - return res; - } - /// Returns the value that expresses the shape of the output in terms of /// shape of the input operands where possible LogicalResult reifyReturnTypeShapesPerResultDim(OpBuilder &b, diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp index 45a9f8eb15c7e..e83c62425af4d 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp @@ -318,14 +318,15 @@ LogicalResult LinalgOp::reifyReturnTypeShapesPerResultDim( LogicalResult mlir::linalg::detail::verifyStructuredOpInterface(Operation *op) { LinalgOp linalgOp = cast(op); - // Expect at least one input/output operand. + // Expect at least one output operand. // This means an op that constructs a tensor out of indices cannot be a // LinalgOp at the moment. For now this will have to be a special op until we // have output shape operands that are not tensors. - int64_t numInputsAndOutputs = linalgOp.getNumInputsAndOutputs(); - if (numInputsAndOutputs == 0) - return op->emitOpError("expected at least one input/output operand"); - if (failed(OpTrait::impl::verifyAtLeastNOperands(op, numInputsAndOutputs))) + int64_t numInputs = linalgOp.getNumInputs(); + int64_t numOutputs = linalgOp.getNumOutputs(); + if (numOutputs == 0) + return op->emitOpError("expected at least one output operand"); + if (failed(OpTrait::impl::verifyNOperands(op, numInputs + numOutputs))) return failure(); // Should have at least one output tensor per result tensor. // Can also have outbut buffers that do not correspond to results. diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 11cb3e15c0e0c..f4524f19f3f14 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -3038,8 +3038,6 @@ struct FoldTensorCastOp : public OpInterfaceRewritePattern { : opOperand->get()); newResultTypes.push_back(newOperands.back().getType()); } - auto extraOperands = op.getAssumedNonShapedOperands(); - newOperands.append(extraOperands.begin(), extraOperands.end()); // Clone op. Operation *newOp = op.clone(rewriter, op->getLoc(), newResultTypes, newOperands); @@ -3109,7 +3107,6 @@ struct DeduplicateInputs : public OpInterfaceRewritePattern { newOperands.push_back(opOperand->get()); SmallVector outputOperands = op.getOutputOperands(); llvm::append_range(newOperands, outputOperands); - llvm::append_range(newOperands, op.getAssumedNonShapedOperands()); // Repair the indexing maps by filtering out the ones that have been // eliminated. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp index 414aa632d4e86..fba709a871525 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -119,8 +119,6 @@ static void finalizeBufferAllocation(ConversionPatternRewriter &rewriter, assert(!isa(linalgOp.getOperation())); SmallVector newOperands = inputs; newOperands.append(outputs.begin(), outputs.end()); - auto otherOperands = linalgOp.getAssumedNonShapedOperands(); - newOperands.append(otherOperands.begin(), otherOperands.end()); linalgOp.clone(rewriter, linalgOp.getLoc(), /*resultTypes=*/ArrayRef{}, newOperands); // Replace the results of the old op with the new output buffers. diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index c951e70f18d83..287d2d47ca7fe 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -1241,8 +1241,6 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op, // Clone the newly bufferized op. SmallVector newOperands = newInputBuffers; newOperands.append(newOutputBuffers.begin(), newOutputBuffers.end()); - auto otherOperands = op.getAssumedNonShapedOperands(); - newOperands.append(otherOperands.begin(), otherOperands.end()); op.clone(b, loc, /*resultTypes=*/TypeRange{}, newOperands); // Replace the results of the old op with the new output buffers. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp index 0ff0594168a0b..d5964951f3c36 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -205,10 +205,6 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer, getTiledOperands(b, producer), ivs, tileSizes, sizeBounds)); - // Append the other operands. - auto operands = producer.getAssumedNonShapedOperands(); - clonedShapes.append(operands.begin(), operands.end()); - // Iterate over the results in order. // Extract the subtensor type from the linearized range. // Since we do not enforce any canonicalizations on the fly, this is always diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index a9366d1a271d3..b6420f7b104bc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -242,8 +242,6 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes, applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes); SmallVector tiledOperands = makeTiledShapes( b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds); - auto nonShapedOperands = op.getAssumedNonShapedOperands(); - tiledOperands.append(nonShapedOperands.begin(), nonShapedOperands.end()); // TODO: use an interface/adaptor to avoid leaking position in // `tiledOperands`. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 79335c3629d70..f1c8a6f7b0fd6 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -190,8 +190,6 @@ static LogicalResult rewriteAsPaddedOp(PatternRewriter &rewriter, // Clone `opToPad` to operate on the statically padded shapes. auto resultTensorTypes = ValueRange(newOperands).take_back(opToPad.getNumOutputs()).getTypes(); - ValueRange otherOperands = opToPad.getAssumedNonShapedOperands(); - newOperands.append(otherOperands.begin(), otherOperands.end()); linalg::LinalgOp paddedOp = opToPad.clone(rewriter, loc, resultTensorTypes, newOperands); From d8faf03807ac059f669ddea8742dd540e58e45be Mon Sep 17 00:00:00 2001 From: Tianqing Wang Date: Tue, 29 Jun 2021 15:34:30 +0800 Subject: [PATCH 141/619] [X86] Add -mgeneral-regs-only support. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D103943 --- clang/include/clang/Driver/Options.td | 4 +-- clang/lib/Basic/Targets/X86.cpp | 22 +++++++++++++--- clang/lib/Driver/ToolChains/Arch/X86.cpp | 21 ++++++++++++++- .../attr-target-general-regs-only-x86.c | 14 ++++++++++ clang/test/Driver/x86-mgeneral-regs-only.c | 26 +++++++++++++++++++ 5 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/attr-target-general-regs-only-x86.c create mode 100644 clang/test/Driver/x86-mgeneral-regs-only.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0122afd2eeada..af004e0c28b54 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3207,8 +3207,8 @@ defm aapcs_bitfield_width : BoolOption<"f", "aapcs-bitfield-width", " volatile bit-field width is dictated by the field container type. (ARM only).">>, Group; -def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, - HelpText<"Generate code which only uses the general purpose registers (AArch64 only)">; +def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, + HelpText<"Generate code which only uses the general purpose registers (AArch64/x86 only)">; def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">, Group, HelpText<"Workaround Cortex-A53 erratum 835769 (AArch64 only)">; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 3143a70adf858..9db96c20250f6 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -117,7 +117,20 @@ bool X86TargetInfo::initFeatureMap( for (auto &F : CPUFeatures) setFeatureEnabled(Features, F, true); - if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec)) + std::vector UpdatedFeaturesVec; + for (const auto &Feature : FeaturesVec) { + // Expand general-regs-only to -x86, -mmx and -sse + if (Feature == "+general-regs-only") { + UpdatedFeaturesVec.push_back("-x87"); + UpdatedFeaturesVec.push_back("-mmx"); + UpdatedFeaturesVec.push_back("-sse"); + continue; + } + + UpdatedFeaturesVec.push_back(Feature); + } + + if (!TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec)) return false; // Can't do this earlier because we need to be able to explicitly enable @@ -126,20 +139,20 @@ bool X86TargetInfo::initFeatureMap( // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled. auto I = Features.find("sse4.2"); if (I != Features.end() && I->getValue() && - llvm::find(FeaturesVec, "-popcnt") == FeaturesVec.end()) + llvm::find(UpdatedFeaturesVec, "-popcnt") == UpdatedFeaturesVec.end()) Features["popcnt"] = true; // Additionally, if SSE is enabled and mmx is not explicitly disabled, // then enable MMX. I = Features.find("sse"); if (I != Features.end() && I->getValue() && - llvm::find(FeaturesVec, "-mmx") == FeaturesVec.end()) + llvm::find(UpdatedFeaturesVec, "-mmx") == UpdatedFeaturesVec.end()) Features["mmx"] = true; // Enable xsave if avx is enabled and xsave is not explicitly disabled. I = Features.find("avx"); if (I != Features.end() && I->getValue() && - llvm::find(FeaturesVec, "-xsave") == FeaturesVec.end()) + llvm::find(UpdatedFeaturesVec, "-xsave") == UpdatedFeaturesVec.end()) Features["xsave"] = true; return true; @@ -866,6 +879,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("fma4", true) .Case("fsgsbase", true) .Case("fxsr", true) + .Case("general-regs-only", true) .Case("gfni", true) .Case("hreset", true) .Case("invpcid", true) diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 94a53f9d9e467..12749c7ec871c 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -213,5 +213,24 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, // Now add any that the user explicitly requested on the command line, // which may override the defaults. - handleTargetFeaturesGroup(Args, Features, options::OPT_m_x86_Features_Group); + for (const Arg *A : Args.filtered(options::OPT_m_x86_Features_Group, + options::OPT_mgeneral_regs_only)) { + StringRef Name = A->getOption().getName(); + A->claim(); + + // Skip over "-m". + assert(Name.startswith("m") && "Invalid feature name."); + Name = Name.substr(1); + + // Replace -mgeneral-regs-only with -x87, -mmx, -sse + if (A->getOption().getID() == options::OPT_mgeneral_regs_only) { + Features.insert(Features.end(), {"-x87", "-mmx", "-sse"}); + continue; + } + + bool IsNegative = Name.startswith("no-"); + if (IsNegative) + Name = Name.substr(3); + Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name)); + } } diff --git a/clang/test/CodeGen/attr-target-general-regs-only-x86.c b/clang/test/CodeGen/attr-target-general-regs-only-x86.c new file mode 100644 index 0000000000000..f7fbd0bb27bf7 --- /dev/null +++ b/clang/test/CodeGen/attr-target-general-regs-only-x86.c @@ -0,0 +1,14 @@ +// Test general-regs-only target attribute on x86 + +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s + +// CHECK: define{{.*}} void @f() [[GPR_ATTRS:#[0-9]+]] +void __attribute__((target("general-regs-only"))) f() { } +// CHECK: define{{.*}} void @f_before() [[GPR_ATTRS:#[0-9]+]] +void __attribute__((target("avx2,general-regs-only"))) f_before() { } +// CHECK: define{{.*}} void @f_after() [[AVX2_ATTRS:#[0-9]+]] +void __attribute__((target("general-regs-only,avx2"))) f_after() { } + +// CHECK: attributes [[GPR_ATTRS]] = { {{.*}} "target-features"="{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}" +// CHECK: attributes [[AVX2_ATTRS]] = { {{.*}} "target-features"="{{.*}}+avx{{.*}}+avx2{{.*}}+sse{{.*}}+sse2{{.*}}+ssse3{{.*}}-avx512f{{.*}}-x87{{.*}}" diff --git a/clang/test/Driver/x86-mgeneral-regs-only.c b/clang/test/Driver/x86-mgeneral-regs-only.c new file mode 100644 index 0000000000000..35f96795eb9de --- /dev/null +++ b/clang/test/Driver/x86-mgeneral-regs-only.c @@ -0,0 +1,26 @@ +// Test the -mgeneral-regs-only option on x86 + +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s +// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s + +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s +// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s + +// CMD-BEFORE: "-target-feature" "+avx2" +// CMD: "-target-feature" "-x87" +// CMD: "-target-feature" "-mmx" +// CMD: "-target-feature" "-sse" +// CMD-AFTER: "-target-feature" "+avx2" + +void foo() { } + +// IR-GPR: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}" +// IR-AVX2: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+avx{{.*}}+avx2{{.*}}+sse{{.*}}+sse2{{.*}}+ssse3{{.*}}-avx512f{{.*}}-x87{{.*}}" From dcfc2c3fac980b137415c17f2f19c06c3e2bd7fb Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 28 Jun 2021 16:40:19 +0100 Subject: [PATCH 142/619] [NFC] Remove shadowed variable in InnerLoopVectorizer::createInductionVariable Avoid creating a IRBuilder stack variable with the same name as the class member. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e609fddbe1220..3988d2ff27368 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3071,7 +3071,9 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, if (!Latch) Latch = Header; - IRBuilder<> Builder(&*Header->getFirstInsertionPt()); + IRBuilder<>::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(&*Header->getFirstInsertionPt()); + Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction); setDebugLocFromInst(Builder, OldInst); auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index"); From 6229369e5089b295203424a077c60a7449aab803 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Tue, 29 Jun 2021 01:18:48 -0700 Subject: [PATCH 143/619] Revert "[llvm-objcopy][MachO] Minor code cleanup" This reverts commit c94cf97b53566a26245c54ea0c41b0dc83daf8a0 since it appears to have broken linaro-clang-armv7-quick build bot and needs further investigation. --- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index 4b0aeb9702679..050dd976f3f35 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -28,7 +28,7 @@ void MachOReader::readHeader(Object &O) const { } template -static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { +static Section constructSectionCommon(SectionType Sec, uint32_t Index) { StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); Section S(SegName, SectName); @@ -46,11 +46,14 @@ static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { return S; } -static Section constructSection(const MachO::section &Sec, uint32_t Index) { +template +Section constructSection(SectionType Sec, uint32_t Index); + +template <> Section constructSection(MachO::section Sec, uint32_t Index) { return constructSectionCommon(Sec, Index); } -static Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { +template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) { Section S = constructSectionCommon(Sec, Index); S.Reserved3 = Sec.reserved3; return S; From 51d969dc27a80704038b653537fc12a31f4c31f0 Mon Sep 17 00:00:00 2001 From: Soham Dixit Date: Tue, 29 Jun 2021 09:18:21 +0100 Subject: [PATCH 144/619] [DebugInfo] Bug 41152 - Improve dumping of empty location expressions Fixes PR41152 (https://bugs.llvm.org/show_bug.cgi?id=41152). Reviewed by: jhenderson, dblaikie, SouraVX Differential Revision: https://reviews.llvm.org/D103502 --- llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp | 3 +++ .../DebugInfo/X86/dwarf-empty-expression.s | 23 +++++++++++++++++++ .../MC/X86/dwarf-size-field-overflow.test | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 llvm/test/DebugInfo/X86/dwarf-empty-expression.s diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp index d5015f00f1cfb..4b9be85f68853 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -326,6 +326,9 @@ void DWARFExpression::print(raw_ostream &OS, DIDumpOptions DumpOpts, bool IsEH) const { uint32_t EntryValExprSize = 0; uint64_t EntryValStartOffset = 0; + if (Data.getData().empty()) + OS << ""; + for (auto &Op : *this) { if (!Op.print(OS, DumpOpts, this, RegInfo, U, IsEH)) { uint64_t FailOffset = Op.getEndOffset(); diff --git a/llvm/test/DebugInfo/X86/dwarf-empty-expression.s b/llvm/test/DebugInfo/X86/dwarf-empty-expression.s new file mode 100644 index 0000000000000..7af0187827d8b --- /dev/null +++ b/llvm/test/DebugInfo/X86/dwarf-empty-expression.s @@ -0,0 +1,23 @@ +# RUN: llvm-mc -triple x86_64-unknown-linux -filetype=obj %s -o %t.o +# RUN: llvm-dwarfdump %t.o --debug-loclists | FileCheck %s + +# CHECK: DW_LLE_offset_pair (0x0000000000000000, 0x0000000000000000): + +.Lfunc_begin0: +.Ltmp1: +.section .debug_loclists, "",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 1 # Offset entry count +.Lloclists_table_base0: + .long .Ldebug_loc0-.Lloclists_table_base0 +.Ldebug_loc0: + .byte 4 # DW_LLE_offset_pair + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # starting offset + .uleb128 .Ltmp1-.Lfunc_begin0 # ending offset + .byte 0 ### empty + .byte 0 # DW_LLE_end_of_list +.Ldebug_list_header_end0: diff --git a/llvm/test/MC/X86/dwarf-size-field-overflow.test b/llvm/test/MC/X86/dwarf-size-field-overflow.test index 807de9aead27b..1a0fb0b6f8fa3 100644 --- a/llvm/test/MC/X86/dwarf-size-field-overflow.test +++ b/llvm/test/MC/X86/dwarf-size-field-overflow.test @@ -6,7 +6,7 @@ # # CHECK: 0x0000004d: DW_TAG_formal_parameter # CHECK-NEXT: DW_AT_location (0x00000000 -# CHECK-NEXT: [0x0000000000000000, 0x0000000000000008): ) +# CHECK-NEXT: [0x0000000000000000, 0x0000000000000008): ) # CHECK-NEXT: DW_AT_name ("self") import sys From 91fa3565da16f77e07270e5323874abc22661cb0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 29 Jun 2021 08:56:50 +0100 Subject: [PATCH 145/619] [BasicAA] Be more careful with modulo ops on VariableGEPIndex. (V * Scale) % X may not produce the same result for any possible value of V, e.g. if the multiplication overflows. This means we currently incorrectly determine NoAlias in some cases. This patch updates LinearExpression to track whether the expression has NSW and uses that to adjust the scale used for alias checks. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D99424 --- .../llvm/Analysis/BasicAliasAnalysis.h | 3 + llvm/lib/Analysis/BasicAliasAnalysis.cpp | 59 ++++++++++++------- llvm/test/Analysis/BasicAA/gep-modulo.ll | 8 ++- 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index 1468ad89c333f..991c0cbb642aa 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -116,6 +116,9 @@ class BasicAAResult : public AAResultBase { // Context instruction to use when querying information about this index. const Instruction *CxtI; + /// True if all operations in this expression are NSW. + bool IsNSW; + void dump() const { print(dbgs()); dbgs() << "\n"; diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 356259fe5a7a8..da489b8d457fb 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -284,11 +284,14 @@ struct LinearExpression { APInt Scale; APInt Offset; + /// True if all operations in this expression are NSW. + bool IsNSW; + LinearExpression(const ExtendedValue &Val, const APInt &Scale, - const APInt &Offset) - : Val(Val), Scale(Scale), Offset(Offset) {} + const APInt &Offset, bool IsNSW) + : Val(Val), Scale(Scale), Offset(Offset), IsNSW(IsNSW) {} - LinearExpression(const ExtendedValue &Val) : Val(Val) { + LinearExpression(const ExtendedValue &Val) : Val(Val), IsNSW(true) { unsigned BitWidth = Val.getBitWidth(); Scale = APInt(BitWidth, 1); Offset = APInt(BitWidth, 0); @@ -307,7 +310,7 @@ static LinearExpression GetLinearExpression( if (const ConstantInt *Const = dyn_cast(Val.V)) return LinearExpression(Val, APInt(Val.getBitWidth(), 0), - Val.evaluateWith(Const->getValue())); + Val.evaluateWith(Const->getValue()), true); if (const BinaryOperator *BOp = dyn_cast(Val.V)) { if (ConstantInt *RHSC = dyn_cast(BOp->getOperand(1))) { @@ -322,6 +325,7 @@ static LinearExpression GetLinearExpression( if (!Val.canDistributeOver(NUW, NSW)) return Val; + LinearExpression E(Val); switch (BOp->getOpcode()) { default: // We don't understand this instruction, so we can't decompose it any @@ -336,23 +340,26 @@ static LinearExpression GetLinearExpression( LLVM_FALLTHROUGH; case Instruction::Add: { - LinearExpression E = GetLinearExpression( - Val.withValue(BOp->getOperand(0)), DL, Depth + 1, AC, DT); + E = GetLinearExpression(Val.withValue(BOp->getOperand(0)), DL, + Depth + 1, AC, DT); E.Offset += RHS; - return E; + E.IsNSW &= NSW; + break; } case Instruction::Sub: { - LinearExpression E = GetLinearExpression( - Val.withValue(BOp->getOperand(0)), DL, Depth + 1, AC, DT); + E = GetLinearExpression(Val.withValue(BOp->getOperand(0)), DL, + Depth + 1, AC, DT); E.Offset -= RHS; - return E; + E.IsNSW &= NSW; + break; } case Instruction::Mul: { - LinearExpression E = GetLinearExpression( - Val.withValue(BOp->getOperand(0)), DL, Depth + 1, AC, DT); + E = GetLinearExpression(Val.withValue(BOp->getOperand(0)), DL, + Depth + 1, AC, DT); E.Offset *= RHS; E.Scale *= RHS; - return E; + E.IsNSW &= NSW; + break; } case Instruction::Shl: // We're trying to linearize an expression of the kind: @@ -363,12 +370,14 @@ static LinearExpression GetLinearExpression( if (RHS.getLimitedValue() > Val.getBitWidth()) return Val; - LinearExpression E = GetLinearExpression( - Val.withValue(BOp->getOperand(0)), DL, Depth + 1, AC, DT); + E = GetLinearExpression(Val.withValue(BOp->getOperand(0)), DL, + Depth + 1, AC, DT); E.Offset <<= RHS.getLimitedValue(); E.Scale <<= RHS.getLimitedValue(); - return E; + E.IsNSW &= NSW; + break; } + return E; } } @@ -578,8 +587,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, Scale = adjustToPointerSize(Scale, PointerSize); if (!!Scale) { - VariableGEPIndex Entry = {LE.Val.V, LE.Val.ZExtBits, LE.Val.SExtBits, - Scale, CxtI}; + VariableGEPIndex Entry = { + LE.Val.V, LE.Val.ZExtBits, LE.Val.SExtBits, Scale, CxtI, LE.IsNSW}; Decomposed.VarIndices.push_back(Entry); } } @@ -1138,7 +1147,11 @@ AliasResult BasicAAResult::aliasGEP( bool AllNonNegative = DecompGEP1.Offset.isNonNegative(); bool AllNonPositive = DecompGEP1.Offset.isNonPositive(); for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { - const APInt &Scale = DecompGEP1.VarIndices[i].Scale; + APInt Scale = DecompGEP1.VarIndices[i].Scale; + if (!DecompGEP1.VarIndices[i].IsNSW) + Scale = APInt::getOneBitSet(Scale.getBitWidth(), + Scale.countTrailingZeros()); + if (i == 0) GCD = Scale.abs(); else @@ -1701,9 +1714,10 @@ void BasicAAResult::GetIndexDifference( // If we found it, subtract off Scale V's from the entry in Dest. If it // goes to zero, remove the entry. - if (Dest[j].Scale != Scale) + if (Dest[j].Scale != Scale) { Dest[j].Scale -= Scale; - else + Dest[j].IsNSW = false; + } else Dest.erase(Dest.begin() + j); Scale = 0; break; @@ -1711,7 +1725,8 @@ void BasicAAResult::GetIndexDifference( // If we didn't consume this entry, add it to the end of the Dest list. if (!!Scale) { - VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale, Src[i].CxtI}; + VariableGEPIndex Entry = {V, ZExtBits, SExtBits, + -Scale, Src[i].CxtI, Src[i].IsNSW}; Dest.push_back(Entry); } } diff --git a/llvm/test/Analysis/BasicAA/gep-modulo.ll b/llvm/test/Analysis/BasicAA/gep-modulo.ll index 79782fad44872..e009ce498b06b 100644 --- a/llvm/test/Analysis/BasicAA/gep-modulo.ll +++ b/llvm/test/Analysis/BasicAA/gep-modulo.ll @@ -70,7 +70,7 @@ define void @may_overflow_mul_sub_i64([16 x i8]* %ptr, i64 %idx) { ; CHECK-LABEL: Function: may_overflow_mul_sub_i64: 3 pointers, 0 call sites ; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.idx ; CHECK-NEXT: PartialAlias (off 3): [16 x i8]* %ptr, i8* %gep.3 -; CHECK-NEXT: NoAlias: i8* %gep.3, i8* %gep.idx +; CHECK-NEXT: MayAlias: i8* %gep.3, i8* %gep.idx ; %mul = mul i64 %idx, 5 %sub = sub i64 %mul, 1 @@ -115,7 +115,7 @@ define void @only_nuw_mul_sub_i64([16 x i8]* %ptr, i64 %idx) { ; CHECK-LABEL: Function: only_nuw_mul_sub_i64: 3 pointers, 0 call sites ; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.idx ; CHECK-NEXT: PartialAlias (off 3): [16 x i8]* %ptr, i8* %gep.3 -; CHECK-NEXT: NoAlias: i8* %gep.3, i8* %gep.idx +; CHECK-NEXT: MayAlias: i8* %gep.3, i8* %gep.idx ; %mul = mul nuw i64 %idx, 5 %sub = sub nuw i64 %mul, 1 @@ -126,6 +126,8 @@ define void @only_nuw_mul_sub_i64([16 x i8]* %ptr, i64 %idx) { ret void } +; Even though the mul and sub may overflow %gep.idx and %gep.3 cannot alias +; because we multiply by a power-of-2. define void @may_overflow_mul_pow2_sub_i64([16 x i8]* %ptr, i64 %idx) { ; CHECK-LABEL: Function: may_overflow_mul_pow2_sub_i64: 3 pointers, 0 call sites ; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.idx @@ -259,7 +261,7 @@ define void @may_overflow_pointer_diff([16 x i8]* %ptr, i64 %idx) { ; CHECK-LABEL: Function: may_overflow_pointer_diff: 3 pointers, 0 call sites ; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.mul.1 ; CHECK-NEXT: MayAlias: [16 x i8]* %ptr, i8* %gep.sub.2 -; CHECK-NEXT: NoAlias: i8* %gep.mul.1, i8* %gep.sub.2 +; CHECK-NEXT: MayAlias: i8* %gep.mul.1, i8* %gep.sub.2 ; %mul.1 = mul i64 %idx, 6148914691236517207 %gep.mul.1 = getelementptr [16 x i8], [16 x i8]* %ptr, i32 0, i64 %mul.1 From b8bac6b33c576dec88add0e6a68e96219e278c8a Mon Sep 17 00:00:00 2001 From: Diana Picus Date: Fri, 11 Jun 2021 08:05:30 +0000 Subject: [PATCH 146/619] [flang] Add runtime interface for SYSTEM_CLOCK SYSTEM_CLOCK may take up to 3 optional parameters, all of which are INTENT(OUT). The COUNT and COUNT_MAX parameters are integer scalars, while COUNT_RATE may be a real or integer scalar. This patch breaks up the interface into 3 different functions, one for each parameter. All 3 return integers. It is up to lowering to convert the results to the preferred type. Differential Revision: https://reviews.llvm.org/D104851 --- flang/runtime/time-intrinsic.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/flang/runtime/time-intrinsic.h b/flang/runtime/time-intrinsic.h index 835f24c0b324e..391d72cef18fa 100644 --- a/flang/runtime/time-intrinsic.h +++ b/flang/runtime/time-intrinsic.h @@ -22,6 +22,12 @@ extern "C" { // real kind. double RTNAME(CpuTime)(); +// Interface for the SYSTEM_CLOCK intrinsic. We break it up into 3 distinct +// function calls, one for each of SYSTEM_CLOCK's optional output arguments. +// Lowering will have to cast the results to whatever type it prefers. +CppTypeFor RTNAME(SystemClockCount)(); +CppTypeFor RTNAME(SystemClockCountRate)(); +CppTypeFor RTNAME(SystemClockCountMax)(); } // extern "C" } // namespace Fortran::runtime #endif // FORTRAN_RUNTIME_TIME_INTRINSIC_H_ From 4d8871a898b30f11c905b27954c18d826c0953c9 Mon Sep 17 00:00:00 2001 From: Bruno De Fraine Date: Tue, 29 Jun 2021 09:46:27 +0200 Subject: [PATCH 147/619] PR50767: clear non-distinct debuginfo for function with nodebug definition after undecorated declaration Fix suggested by Yuanfang Chen: Non-distinct debuginfo is attached to the function due to the undecorated declaration. Later, when seeing the function definition and `nodebug` attribute, the non-distinct debuginfo should be cleared. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D104777 --- clang/lib/CodeGen/CodeGenFunction.cpp | 10 ++++++-- clang/test/CodeGen/attr-nodebug2.c | 34 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/attr-nodebug2.c diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 0ca94657e8153..578b8a811817e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1304,8 +1304,14 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, QualType ResTy = BuildFunctionArgList(GD, Args); // Check if we should generate debug info for this function. - if (FD->hasAttr()) - DebugInfo = nullptr; // disable debug info indefinitely for this function + if (FD->hasAttr()) { + // Clear non-distinct debug info that was possibly attached to the function + // due to an earlier declaration without the nodebug attribute + if (Fn) + Fn->setSubprogram(nullptr); + // Disable debug info indefinitely for this function + DebugInfo = nullptr; + } // The function might not have a body if we're generating thunks for a // function declaration. diff --git a/clang/test/CodeGen/attr-nodebug2.c b/clang/test/CodeGen/attr-nodebug2.c new file mode 100644 index 0000000000000..fd6eca1f74323 --- /dev/null +++ b/clang/test/CodeGen/attr-nodebug2.c @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -x c -debug-info-kind=limited -debugger-tuning=gdb -dwarf-version=4 -O -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++ -debug-info-kind=limited -debugger-tuning=gdb -dwarf-version=4 -O -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +#ifdef __cplusplus +extern "C" { +#endif + +void t1(); + +void use() { t1(); } + +__attribute__((nodebug)) void t1() { + int a = 10; + a++; +} + +#ifdef __cplusplus +} +#endif + +// CHECK-LABEL: define{{.*}} void @use() +// CHECK-SAME: !dbg +// CHECK-SAME: { +// CHECK: !dbg +// CHECK: } + +// PR50767 Function __attribute__((nodebug)) inconsistency causes crash +// illegal (non-distinct) !dbg metadata was being added to _Z2t1v definition + +// CHECK-LABEL: define{{.*}} void @t1() +// CHECK-NOT: !dbg +// CHECK-SAME: { +// CHECK-NOT: !dbg +// CHECK: } From 6178ddcd2b70e6a187e1960f00f5985c390a80a0 Mon Sep 17 00:00:00 2001 From: Tianqing Wang Date: Tue, 29 Jun 2021 16:06:45 +0800 Subject: [PATCH 148/619] [X86] Add a test to reveal a bug in CMOV conversion. CMOV conversion first rewrites all CMOVs with memory load to branches. Then runs a second pass to convert other CMOVs in loops if profitable. But the first pass doesn't add new basic blocks to MachineLoopInfo, CMOVs in these blocks are ignored in the subsequent pass. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D104692 --- llvm/test/CodeGen/X86/x86-cmov-converter.ll | 57 +++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll index 1f7565008c0f9..10cb3a503cb5e 100644 --- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll +++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll @@ -483,4 +483,61 @@ entry: ret i32 %r } +@begin = external global i32* +@end = external global i32* + +define void @test_memoperand_loop(i32 %data) #0 { +; CHECK-LABEL: test_memoperand_loop: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq begin@GOTPCREL(%rip), %r8 +; CHECK-NEXT: movq (%r8), %rax +; CHECK-NEXT: movq end@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movq (%rcx), %rdx +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: movq %rax, %rcx +entry: + %begin = load i32*, i32** @begin, align 8 + %end = load i32*, i32** @end, align 8 + br label %loop.body + +; CHECK-NEXT: .LBB13_1: # %loop.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: addq $8, %rcx +; CHECK-NEXT: cmpq %rdx, %rcx +; CHECK-NEXT: ja .LBB13_3 +; CHECK-NEXT: # %bb.2: # %loop.body +; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1 +; CHECK-NEXT: movq (%r8), %rcx +; CHECK-NEXT: .LBB13_3: # %loop.body +; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1 +; CHECK-NEXT: movl %edi, (%rcx) +; CHECK-NEXT: addq $8, %rcx +; CHECK-NEXT: cmpq %rdx, %rcx +; CHECK-NEXT: cmovbeq %rax, %rcx +; CHECK-NEXT: movl %edi, (%rcx) +; CHECK-NEXT: addl $1, %esi +; CHECK-NEXT: cmpl $1024, %esi # imm = 0x400 +; CHECK-NEXT: jl .LBB13_1 +loop.body: + %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ] + %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ] + %gep1 = getelementptr inbounds i32, i32 *%phi.ptr, i64 2 + %cmp1 = icmp ugt i32* %gep1, %end + %begin_dup = load i32*, i32** @begin, align 8 + %dst1 = select i1 %cmp1, i32* %gep1, i32* %begin_dup + store i32 %data, i32 *%dst1, align 4 + %gep2 = getelementptr inbounds i32, i32 *%dst1, i64 2 + %cmp2 = icmp ugt i32* %gep2, %end + %dst2 = select i1 %cmp2, i32* %gep2, i32* %begin + store i32 %data, i32 *%dst2, align 4 + %iv.next = add i32 %phi.iv, 1 + %cond = icmp slt i32 %iv.next, 1024 + br i1 %cond, label %loop.body, label %exit + +; CHECK-NEXT: # %bb.4: # %exit +; CHECK-NEXT: retq +exit: + ret void +} + attributes #0 = {"target-cpu"="x86-64"} From fe1874dd2dd99c9811db515a2957e2a42f9f6868 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 28 Jun 2021 17:41:26 +0000 Subject: [PATCH 149/619] [lldb/Interpreter] Add setting to set session transcript save directory This patch introduces a new interpreter setting `interpreter.save-session-directory` so the user can specify a directory where the session transcripts will be saved. If not set, the session transcript are saved on a temporary file. rdar://72902842 Differential Revision: https://reviews.llvm.org/D105030 Signed-off-by: Med Ismail Bennani --- .../lldb/Interpreter/CommandInterpreter.h | 3 +++ .../source/Interpreter/CommandInterpreter.cpp | 22 ++++++++++++++++--- .../Interpreter/InterpreterProperties.td | 3 +++ .../commands/session/save/TestSessionSave.py | 19 +++++++++++++++- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h index a8475ca610463..6430773de1b64 100644 --- a/lldb/include/lldb/Interpreter/CommandInterpreter.h +++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h @@ -493,6 +493,9 @@ class CommandInterpreter : public Broadcaster, bool GetSaveSessionOnQuit() const; void SetSaveSessionOnQuit(bool enable); + FileSpec GetSaveSessionDirectory() const; + void SetSaveSessionDirectory(llvm::StringRef path); + bool GetEchoCommands() const; void SetEchoCommands(bool enable); diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 2e07ff5703ff2..68e8edfc90583 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -160,6 +160,16 @@ void CommandInterpreter::SetSaveSessionOnQuit(bool enable) { m_collection_sp->SetPropertyAtIndexAsBoolean(nullptr, idx, enable); } +FileSpec CommandInterpreter::GetSaveSessionDirectory() const { + const uint32_t idx = ePropertySaveSessionDirectory; + return m_collection_sp->GetPropertyAtIndexAsFileSpec(nullptr, idx); +} + +void CommandInterpreter::SetSaveSessionDirectory(llvm::StringRef path) { + const uint32_t idx = ePropertySaveSessionDirectory; + m_collection_sp->SetPropertyAtIndexAsString(nullptr, idx, path); +} + bool CommandInterpreter::GetEchoCommands() const { const uint32_t idx = ePropertyEchoCommands; return m_collection_sp->GetPropertyAtIndexAsBoolean( @@ -2925,9 +2935,15 @@ bool CommandInterpreter::SaveTranscript( std::string now = llvm::to_string(std::chrono::system_clock::now()); std::replace(now.begin(), now.end(), ' ', '_'); const std::string file_name = "lldb_session_" + now + ".log"; - FileSpec tmp = HostInfo::GetGlobalTempDir(); - tmp.AppendPathComponent(file_name); - output_file = tmp.GetPath(); + + FileSpec save_location = GetSaveSessionDirectory(); + + if (!save_location) + save_location = HostInfo::GetGlobalTempDir(); + + FileSystem::Instance().Resolve(save_location); + save_location.AppendPathComponent(file_name); + output_file = save_location.GetPath(); } auto error_out = [&](llvm::StringRef error_message, std::string description) { diff --git a/lldb/source/Interpreter/InterpreterProperties.td b/lldb/source/Interpreter/InterpreterProperties.td index 1148c1b01def5..1c6f0206c489d 100644 --- a/lldb/source/Interpreter/InterpreterProperties.td +++ b/lldb/source/Interpreter/InterpreterProperties.td @@ -13,6 +13,9 @@ let Definition = "interpreter" in { Global, DefaultFalse, Desc<"If true, LLDB will save the session's transcripts before quitting.">; + def SaveSessionDirectory: Property<"save-session-directory", "FileSpec">, + DefaultStringValue<"">, + Desc<"A path where LLDB will save the session's transcripts. This is particularly useful when you can't set the session file, for example when using `save-session-on-quit`.">; def StopCmdSourceOnError: Property<"stop-command-source-on-error", "Boolean">, Global, DefaultTrue, diff --git a/lldb/test/API/commands/session/save/TestSessionSave.py b/lldb/test/API/commands/session/save/TestSessionSave.py index ec244a42efcac..e144ed19d1c50 100644 --- a/lldb/test/API/commands/session/save/TestSessionSave.py +++ b/lldb/test/API/commands/session/save/TestSessionSave.py @@ -1,7 +1,7 @@ """ Test the session save feature """ - +import os import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -72,3 +72,20 @@ def test_session_save(self): lines = raw.splitlines()[:-1] for line in lines: self.assertIn(line, content) + + td = tempfile.TemporaryDirectory() + res = lldb.SBCommandReturnObject() + interpreter.HandleCommand('settings set interpreter.save-session-directory ' + td.name, res) + self.assertTrue(res.Succeeded()) + + res = lldb.SBCommandReturnObject() + interpreter.HandleCommand('session save', res) + self.assertTrue(res.Succeeded()) + raw += self.raw_transcript_builder(cmd, res) + + with open(os.path.join(td.name, os.listdir(td.name)[0]), "r") as file: + content = file.read() + # Exclude last line, since session won't record it's own output + lines = raw.splitlines()[:-1] + for line in lines: + self.assertIn(line, content) From d6b64612bd92cda8b53ef348fb578983124c600f Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Mon, 28 Jun 2021 19:27:55 +0000 Subject: [PATCH 150/619] [lldb/Interpreter] Fix session-save-on-quit when using ^D Previously, when `interpreter.save-session-on-quit` was enabled, lldb would save the session transcript only when running the `quit` command. This patch changes that so the transcripts are saved when the debugger object is destroyed if the setting is enabled. rdar://72902650 Differential Revision: https://reviews.llvm.org/D105038 Signed-off-by: Med Ismail Bennani --- lldb/source/Commands/CommandObjectQuit.cpp | 3 -- lldb/source/Core/Debugger.cpp | 12 ++++++ .../source/Interpreter/CommandInterpreter.cpp | 1 + .../commands/session/save/TestSessionSave.py | 37 ++++++++++++++++++- 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/lldb/source/Commands/CommandObjectQuit.cpp b/lldb/source/Commands/CommandObjectQuit.cpp index e4de347870753..6ac04290f603f 100644 --- a/lldb/source/Commands/CommandObjectQuit.cpp +++ b/lldb/source/Commands/CommandObjectQuit.cpp @@ -101,8 +101,5 @@ bool CommandObjectQuit::DoExecute(Args &command, CommandReturnObject &result) { m_interpreter.BroadcastEvent(event_type); result.SetStatus(eReturnStatusQuit); - if (m_interpreter.GetSaveSessionOnQuit()) - m_interpreter.SaveTranscript(result); - return true; } diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index 735e6f43ac69f..12210ed541bc1 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -23,6 +23,7 @@ #include "lldb/Host/Terminal.h" #include "lldb/Host/ThreadLauncher.h" #include "lldb/Interpreter/CommandInterpreter.h" +#include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionValue.h" #include "lldb/Interpreter/OptionValueProperties.h" #include "lldb/Interpreter/OptionValueSInt64.h" @@ -604,6 +605,17 @@ void Debugger::Destroy(DebuggerSP &debugger_sp) { if (!debugger_sp) return; + CommandInterpreter &cmd_interpreter = debugger_sp->GetCommandInterpreter(); + + if (cmd_interpreter.GetSaveSessionOnQuit()) { + CommandReturnObject result(debugger_sp->GetUseColor()); + cmd_interpreter.SaveTranscript(result); + if (result.Succeeded()) + debugger_sp->GetOutputStream() << result.GetOutputData() << '\n'; + else + debugger_sp->GetErrorStream() << result.GetErrorData() << '\n'; + } + debugger_sp->Clear(); if (g_debugger_list_ptr && g_debugger_list_mutex_ptr) { diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 68e8edfc90583..00e9ccb762c32 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2974,6 +2974,7 @@ bool CommandInterpreter::SaveTranscript( return error_out("Unable to write to destination file", "Bytes written do not match transcript size."); + result.SetStatus(eReturnStatusSuccessFinishNoResult); result.AppendMessageWithFormat("Session's transcripts saved to %s\n", output_file->c_str()); diff --git a/lldb/test/API/commands/session/save/TestSessionSave.py b/lldb/test/API/commands/session/save/TestSessionSave.py index e144ed19d1c50..2e25047e501a7 100644 --- a/lldb/test/API/commands/session/save/TestSessionSave.py +++ b/lldb/test/API/commands/session/save/TestSessionSave.py @@ -2,6 +2,8 @@ Test the session save feature """ import os +import tempfile + import lldb from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * @@ -57,7 +59,6 @@ def test_session_save(self): self.assertFalse(res.Succeeded()) raw += self.raw_transcript_builder(cmd, res) - import tempfile tf = tempfile.NamedTemporaryFile() output_file = tf.name @@ -89,3 +90,37 @@ def test_session_save(self): lines = raw.splitlines()[:-1] for line in lines: self.assertIn(line, content) + + @skipIfWindows + @skipIfReproducer + @no_debug_info_test + def test_session_save_on_quit(self): + raw = "" + interpreter = self.dbg.GetCommandInterpreter() + + td = tempfile.TemporaryDirectory() + + settings = [ + 'settings set interpreter.echo-commands true', + 'settings set interpreter.echo-comment-commands true', + 'settings set interpreter.stop-command-source-on-error false', + 'settings set interpreter.save-session-on-quit true', + 'settings set interpreter.save-session-directory ' + td.name, + ] + + for setting in settings: + res = lldb.SBCommandReturnObject() + interpreter.HandleCommand(setting, res) + raw += self.raw_transcript_builder(setting, res) + + self.dbg.Destroy(self.dbg) + + with open(os.path.join(td.name, os.listdir(td.name)[0]), "r") as file: + content = file.read() + # Exclude last line, since session won't record it's own output + lines = raw.splitlines()[:-1] + for line in lines: + self.assertIn(line, content) + + + From 78d309ce197c30593450e792b0c2dc7a575f0050 Mon Sep 17 00:00:00 2001 From: Ole Strohm Date: Wed, 23 Jun 2021 10:52:58 +0100 Subject: [PATCH 151/619] [OpenCL] Fix qualifiers check on binding references to temporaries Fix the qualifiers check from PR49733. Fixes: PR49733 Reviewed By: Anastasia Differential Revision: https://reviews.llvm.org/D103962 --- clang/lib/Sema/SemaInit.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index f916299e4f560..240188f60592b 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -5065,9 +5065,9 @@ static void TryReferenceInitializationCore(Sema &S, // than, cv2; otherwise, the program is ill-formed. unsigned T1CVRQuals = T1Quals.getCVRQualifiers(); unsigned T2CVRQuals = T2Quals.getCVRQualifiers(); - if ((RefRelationship == Sema::Ref_Related && - (T1CVRQuals | T2CVRQuals) != T1CVRQuals) || - !T1Quals.isAddressSpaceSupersetOf(T2Quals)) { + if (RefRelationship == Sema::Ref_Related && + ((T1CVRQuals | T2CVRQuals) != T1CVRQuals || + !T1Quals.isAddressSpaceSupersetOf(T2Quals))) { Sequence.SetFailed(InitializationSequence::FK_ReferenceInitDropsQualifiers); return; } From 159024ce231502d4d68825c35c3548a14577f0fd Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Fri, 18 Jun 2021 14:16:18 +0300 Subject: [PATCH 152/619] [analyzer] Implement getType for SVal This commit adds a function to the top-class of SVal hierarchy to provide type information about the value. That can be extremely useful when this is the only piece of information that the user is actually caring about. Additionally, this commit introduces a testing framework for writing unit-tests for symbolic values. Differential Revision: https://reviews.llvm.org/D104550 --- .../Core/PathSensitive/BasicValueFactory.h | 2 + .../StaticAnalyzer/Core/PathSensitive/SVals.h | 13 + clang/lib/StaticAnalyzer/Core/SVals.cpp | 59 +++ clang/unittests/StaticAnalyzer/CMakeLists.txt | 3 +- clang/unittests/StaticAnalyzer/SValTest.cpp | 366 ++++++++++++++++++ 5 files changed, 442 insertions(+), 1 deletion(-) create mode 100644 clang/unittests/StaticAnalyzer/SValTest.cpp diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h index f59b254094db8..bb598af681666 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h @@ -52,6 +52,8 @@ class CompoundValData : public llvm::FoldingSetNode { iterator begin() const { return L.begin(); } iterator end() const { return L.end(); } + QualType getType() const { return T; } + static void Profile(llvm::FoldingSetNodeID& ID, QualType T, llvm::ImmutableList L); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index b1c33713febd9..6199c8d8d179c 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -201,6 +201,19 @@ class SVal { SymExpr::symbol_iterator symbol_end() const { return SymExpr::symbol_end(); } + + /// Try to get a reasonable type for the given value. + /// + /// \returns The best approximation of the value type or Null. + /// In theory, all symbolic values should be typed, but this function + /// is still a WIP and might have a few blind spots. + /// + /// \note This function should not be used when the user has access to the + /// bound expression AST node as well, since AST always has exact types. + /// + /// \note Loc values are interpreted as pointer rvalues for the purposes of + /// this method. + QualType getType(const ASTContext &) const; }; inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) { diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index 252596887e4f1..117546e43b1a1 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" @@ -21,6 +22,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h" #include "llvm/ADT/Optional.h" @@ -136,6 +138,63 @@ const MemRegion *SVal::getAsRegion() const { return nullptr; } +namespace { +class TypeRetrievingVisitor + : public FullSValVisitor { +private: + const ASTContext &Context; + +public: + TypeRetrievingVisitor(const ASTContext &Context) : Context(Context) {} + + QualType VisitLocMemRegionVal(loc::MemRegionVal MRV) { + return Visit(MRV.getRegion()); + } + QualType VisitLocGotoLabel(loc::GotoLabel GL) { + return QualType{Context.VoidPtrTy}; + } + template QualType VisitConcreteInt(ConcreteInt CI) { + const llvm::APSInt &Value = CI.getValue(); + return Context.getIntTypeForBitwidth(Value.getBitWidth(), Value.isSigned()); + } + QualType VisitLocConcreteInt(loc::ConcreteInt CI) { + return VisitConcreteInt(CI); + } + QualType VisitNonLocConcreteInt(nonloc::ConcreteInt CI) { + return VisitConcreteInt(CI); + } + QualType VisitNonLocLocAsInteger(nonloc::LocAsInteger LI) { + QualType NestedType = Visit(LI.getLoc()); + if (NestedType.isNull()) + return NestedType; + + return Context.getIntTypeForBitwidth(LI.getNumBits(), + NestedType->isSignedIntegerType()); + } + QualType VisitNonLocCompoundVal(nonloc::CompoundVal CV) { + return CV.getValue()->getType(); + } + QualType VisitNonLocLazyCompoundVal(nonloc::LazyCompoundVal LCV) { + return LCV.getRegion()->getValueType(); + } + QualType VisitNonLocSymbolVal(nonloc::SymbolVal SV) { + return Visit(SV.getSymbol()); + } + QualType VisitSymbolicRegion(const SymbolicRegion *SR) { + return Visit(SR->getSymbol()); + } + QualType VisitTypedRegion(const TypedRegion *TR) { + return TR->getLocationType(); + } + QualType VisitSymExpr(const SymExpr *SE) { return SE->getType(); } +}; +} // end anonymous namespace + +QualType SVal::getType(const ASTContext &Context) const { + TypeRetrievingVisitor TRV{Context}; + return TRV.Visit(*this); +} + const MemRegion *loc::MemRegionVal::stripCasts(bool StripBaseCasts) const { const MemRegion *R = getRegion(); return R ? R->StripCasts(StripBaseCasts) : nullptr; diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt index 0e6d8763d96d2..4de6bec4d2167 100644 --- a/clang/unittests/StaticAnalyzer/CMakeLists.txt +++ b/clang/unittests/StaticAnalyzer/CMakeLists.txt @@ -11,8 +11,9 @@ add_clang_unittest(StaticAnalysisTests ParamRegionTest.cpp RangeSetTest.cpp RegisterCustomCheckersTest.cpp - StoreTest.cpp + StoreTest.cpp SymbolReaperTest.cpp + SValTest.cpp TestReturnValueUnderConstruction.cpp ) diff --git a/clang/unittests/StaticAnalyzer/SValTest.cpp b/clang/unittests/StaticAnalyzer/SValTest.cpp new file mode 100644 index 0000000000000..0956984868384 --- /dev/null +++ b/clang/unittests/StaticAnalyzer/SValTest.cpp @@ -0,0 +1,366 @@ +//===- unittests/StaticAnalyzer/SvalTest.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CheckerRegistration.h" + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclGroup.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" +#include "clang/StaticAnalyzer/Frontend/AnalysisConsumer.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" + +namespace clang { + +// getType() tests include whole bunch of type comparisons, +// so when something is wrong, it's good to have gtest telling us +// what are those types. +LLVM_ATTRIBUTE_UNUSED std::ostream &operator<<(std::ostream &OS, + const QualType &T) { + return OS << T.getAsString(); +} + +LLVM_ATTRIBUTE_UNUSED std::ostream &operator<<(std::ostream &OS, + const CanQualType &T) { + return OS << QualType{T}; +} + +namespace ento { +namespace { + +//===----------------------------------------------------------------------===// +// Testing framework implementation +//===----------------------------------------------------------------------===// + +/// A simple map from variable names to symbolic values used to init them. +using SVals = llvm::StringMap; + +/// SValCollector is the barebone of all tests. +/// +/// It is implemented as a checker and reacts to binds, so we find +/// symbolic values of interest, and to end analysis, where we actually +/// can test whatever we gathered. +class SValCollector : public Checker { +public: + void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &C) const { + // Skip instantly if we finished testing. + // Also, we care only for binds happening in variable initializations. + if (Tested || !isa(S)) + return; + + if (const auto *VR = llvm::dyn_cast_or_null(Loc.getAsRegion())) { + CollectedSVals[VR->getDescriptiveName(false)] = Val; + } + } + + void checkEndAnalysis(ExplodedGraph &G, BugReporter &B, + ExprEngine &Engine) const { + if (!Tested) { + test(Engine, Engine.getContext()); + Tested = true; + CollectedSVals.clear(); + } + } + + /// Helper function for tests to access bound symbolic values. + SVal getByName(StringRef Name) const { return CollectedSVals[Name]; } + +private: + /// Entry point for tests. + virtual void test(ExprEngine &Engine, const ASTContext &Context) const = 0; + + mutable bool Tested = false; + mutable SVals CollectedSVals; +}; + +// SVAL_TEST is a combined way of providing a short code snippet and +// to test some programmatic predicates on symbolic values produced by the +// engine for the actual code. +// +// Each test has a NAME. One can think of it as a name for normal gtests. +// +// Each test should provide a CODE snippet. Code snippets might contain any +// valid C/C++, but have ONLY ONE defined function. There are no requirements +// about function's name or parameters. It can even be a class method. The +// body of the function must contain a set of variable declarations. Each +// variable declaration gets bound to a symbolic value, so for the following +// example: +// +// int x = ; +// +// `x` will be bound to whatever symbolic value the engine produced for . +// LIVENESS and REASSIGNMENTS don't affect this binding. +// +// During the test the actual values can be accessed via `getByName` function, +// and, for the `x`-bound value, one must use "x" as its name. +// +// Example: +// SVAL_TEST(SimpleSValTest, R"( +// void foo() { +// int x = 42; +// })") { +// SVal X = getByName("x"); +// EXPECT_TRUE(X.isConstant(42)); +// } +#define SVAL_TEST(NAME, CODE) \ + class NAME##SValCollector final : public SValCollector { \ + public: \ + void test(ExprEngine &Engine, const ASTContext &Context) const override; \ + }; \ + \ + void add##NAME##SValCollector(AnalysisASTConsumer &AnalysisConsumer, \ + AnalyzerOptions &AnOpts) { \ + AnOpts.CheckersAndPackages = {{"test.##NAME##SValCollector", true}}; \ + AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) { \ + Registry.addChecker("test.##NAME##SValCollector", \ + "Description", ""); \ + }); \ + } \ + \ + TEST(SValTest, NAME) { runCheckerOnCode(CODE); } \ + void NAME##SValCollector::test(ExprEngine &Engine, \ + const ASTContext &Context) const + +//===----------------------------------------------------------------------===// +// Actual tests +//===----------------------------------------------------------------------===// + +SVAL_TEST(GetConstType, R"( +void foo() { + int x = 42; + int *y = nullptr; +})") { + SVal X = getByName("x"); + ASSERT_FALSE(X.getType(Context).isNull()); + EXPECT_EQ(Context.IntTy, X.getType(Context)); + + SVal Y = getByName("y"); + ASSERT_FALSE(Y.getType(Context).isNull()); + EXPECT_EQ(Context.getUIntPtrType(), Y.getType(Context)); +} + +SVAL_TEST(GetLocAsIntType, R"( +void foo(int *x) { + long int a = (long int)x; + unsigned b = (long unsigned)&a; + int c = (long int)nullptr; +})") { + SVal A = getByName("a"); + ASSERT_FALSE(A.getType(Context).isNull()); + // TODO: Turn it into signed long + EXPECT_EQ(Context.UnsignedLongTy, A.getType(Context)); + + SVal B = getByName("b"); + ASSERT_FALSE(B.getType(Context).isNull()); + EXPECT_EQ(Context.UnsignedIntTy, B.getType(Context)); + + SVal C = getByName("c"); + ASSERT_FALSE(C.getType(Context).isNull()); + EXPECT_EQ(Context.IntTy, C.getType(Context)); +} + +SVAL_TEST(GetSymExprType, R"( +void foo(int a, int b) { + int x = a; + int y = a + b; + long z = a; +})") { + QualType Int = Context.IntTy; + + SVal X = getByName("x"); + ASSERT_FALSE(X.getType(Context).isNull()); + EXPECT_EQ(Int, X.getType(Context)); + + SVal Y = getByName("y"); + ASSERT_FALSE(Y.getType(Context).isNull()); + EXPECT_EQ(Int, Y.getType(Context)); + + // TODO: Change to Long when we support symbolic casts + SVal Z = getByName("z"); + ASSERT_FALSE(Z.getType(Context).isNull()); + EXPECT_EQ(Int, Z.getType(Context)); +} + +SVAL_TEST(GetPointerType, R"( +int *bar(); +int &foobar(); +struct Z { + int a; + int *b; +}; +void foo(int x, int *y, Z z) { + int &a = x; + int &b = *y; + int &c = *bar(); + int &d = foobar(); + int &e = z.a; + int &f = *z.b; +})") { + QualType Int = Context.IntTy; + + SVal A = getByName("a"); + ASSERT_FALSE(A.getType(Context).isNull()); + const auto *APtrTy = dyn_cast(A.getType(Context)); + ASSERT_NE(APtrTy, nullptr); + EXPECT_EQ(Int, APtrTy->getPointeeType()); + + SVal B = getByName("b"); + ASSERT_FALSE(B.getType(Context).isNull()); + const auto *BPtrTy = dyn_cast(B.getType(Context)); + ASSERT_NE(BPtrTy, nullptr); + EXPECT_EQ(Int, BPtrTy->getPointeeType()); + + SVal C = getByName("c"); + ASSERT_FALSE(C.getType(Context).isNull()); + const auto *CPtrTy = dyn_cast(C.getType(Context)); + ASSERT_NE(CPtrTy, nullptr); + EXPECT_EQ(Int, CPtrTy->getPointeeType()); + + SVal D = getByName("d"); + ASSERT_FALSE(D.getType(Context).isNull()); + const auto *DRefTy = dyn_cast(D.getType(Context)); + ASSERT_NE(DRefTy, nullptr); + EXPECT_EQ(Int, DRefTy->getPointeeType()); + + SVal E = getByName("e"); + ASSERT_FALSE(E.getType(Context).isNull()); + const auto *EPtrTy = dyn_cast(E.getType(Context)); + ASSERT_NE(EPtrTy, nullptr); + EXPECT_EQ(Int, EPtrTy->getPointeeType()); + + SVal F = getByName("f"); + ASSERT_FALSE(F.getType(Context).isNull()); + const auto *FPtrTy = dyn_cast(F.getType(Context)); + ASSERT_NE(FPtrTy, nullptr); + EXPECT_EQ(Int, FPtrTy->getPointeeType()); +} + +SVAL_TEST(GetCompoundType, R"( +struct TestStruct { + int a, b; +}; +union TestUnion { + int a; + float b; + TestStruct c; +}; +void foo(int x) { + int a[] = {1, x, 2}; + TestStruct b = {x, 42}; + TestUnion c = {42}; + TestUnion d = {.c=b}; +} +)") { + SVal A = getByName("a"); + ASSERT_FALSE(A.getType(Context).isNull()); + const auto *AArrayType = dyn_cast(A.getType(Context)); + ASSERT_NE(AArrayType, nullptr); + EXPECT_EQ(Context.IntTy, AArrayType->getElementType()); + + SVal B = getByName("b"); + ASSERT_FALSE(B.getType(Context).isNull()); + const auto *BRecordType = dyn_cast(B.getType(Context)); + ASSERT_NE(BRecordType, nullptr); + EXPECT_EQ("TestStruct", BRecordType->getDecl()->getName()); + + SVal C = getByName("c"); + ASSERT_FALSE(C.getType(Context).isNull()); + const auto *CRecordType = dyn_cast(C.getType(Context)); + ASSERT_NE(CRecordType, nullptr); + EXPECT_EQ("TestUnion", CRecordType->getDecl()->getName()); + + auto D = getByName("d").getAs(); + ASSERT_TRUE(D.hasValue()); + auto Begin = D->begin(); + ASSERT_NE(D->end(), Begin); + ++Begin; + ASSERT_EQ(D->end(), Begin); + auto LD = D->begin()->getAs(); + ASSERT_TRUE(LD.hasValue()); + auto LDT = LD->getType(Context); + ASSERT_FALSE(LDT.isNull()); + const auto *DRecordType = dyn_cast(LDT); + ASSERT_NE(DRecordType, nullptr); + EXPECT_EQ("TestStruct", DRecordType->getDecl()->getName()); +} + +SVAL_TEST(GetStringType, R"( +void foo() { + const char *a = "Hello, world!"; +} +)") { + SVal A = getByName("a"); + ASSERT_FALSE(A.getType(Context).isNull()); + const auto *APtrTy = dyn_cast(A.getType(Context)); + ASSERT_NE(APtrTy, nullptr); + EXPECT_EQ(Context.CharTy, APtrTy->getPointeeType()); +} + +SVAL_TEST(GetThisType, R"( +class TestClass { + void foo(); +}; +void TestClass::foo() { + const auto *a = this; +} +)") { + SVal A = getByName("a"); + ASSERT_FALSE(A.getType(Context).isNull()); + const auto *APtrTy = dyn_cast(A.getType(Context)); + ASSERT_NE(APtrTy, nullptr); + const auto *ARecordType = dyn_cast(APtrTy->getPointeeType()); + ASSERT_NE(ARecordType, nullptr); + EXPECT_EQ("TestClass", ARecordType->getDecl()->getName()); +} + +SVAL_TEST(GetFunctionPtrType, R"( +void bar(); +void foo() { + auto *a = &bar; +} +)") { + SVal A = getByName("a"); + ASSERT_FALSE(A.getType(Context).isNull()); + const auto *APtrTy = dyn_cast(A.getType(Context)); + ASSERT_NE(APtrTy, nullptr); + ASSERT_TRUE(isa(APtrTy->getPointeeType())); +} + +SVAL_TEST(GetLabelType, R"( +void foo() { + entry: + void *a = &&entry; + char *b = (char *)&&entry; +} +)") { + SVal A = getByName("a"); + ASSERT_FALSE(A.getType(Context).isNull()); + EXPECT_EQ(Context.VoidPtrTy, A.getType(Context)); + + SVal B = getByName("a"); + ASSERT_FALSE(B.getType(Context).isNull()); + // TODO: Change to CharTy when we support symbolic casts + EXPECT_EQ(Context.VoidPtrTy, B.getType(Context)); +} + +} // namespace +} // namespace ento +} // namespace clang From fc6a5d85ea2e18886f352f1ac5f9399ddeede336 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 29 Jun 2021 09:44:26 +0000 Subject: [PATCH 153/619] [gn build] Port 159024ce2315 --- llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn index dcd3fa8ed5d6a..86bc4531be120 100644 --- a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn @@ -18,6 +18,7 @@ unittest("StaticAnalysisTests") { "ParamRegionTest.cpp", "RangeSetTest.cpp", "RegisterCustomCheckersTest.cpp", + "SValTest.cpp", "SymbolReaperTest.cpp", "TestReturnValueUnderConstruction.cpp", ] From c76fe67a7be02c7a44fb8cb64586cc4417321190 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 29 Jun 2021 09:44:27 +0000 Subject: [PATCH 154/619] [gn build] Port 9b02a9b40150 --- llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn index 86bc4531be120..2db08c2a9d254 100644 --- a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn @@ -19,6 +19,7 @@ unittest("StaticAnalysisTests") { "RangeSetTest.cpp", "RegisterCustomCheckersTest.cpp", "SValTest.cpp", + "StoreTest.cpp", "SymbolReaperTest.cpp", "TestReturnValueUnderConstruction.cpp", ] From b2842298cebf420ecb3750bf309021a7f37870c1 Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Tue, 29 Jun 2021 12:59:41 +0300 Subject: [PATCH 155/619] [analyzer] Fix SValTest for LocAsInt test --- clang/unittests/StaticAnalyzer/SValTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/unittests/StaticAnalyzer/SValTest.cpp b/clang/unittests/StaticAnalyzer/SValTest.cpp index 0956984868384..ea10d69d2804e 100644 --- a/clang/unittests/StaticAnalyzer/SValTest.cpp +++ b/clang/unittests/StaticAnalyzer/SValTest.cpp @@ -166,7 +166,7 @@ void foo(int *x) { SVal A = getByName("a"); ASSERT_FALSE(A.getType(Context).isNull()); // TODO: Turn it into signed long - EXPECT_EQ(Context.UnsignedLongTy, A.getType(Context)); + EXPECT_EQ(Context.getUIntPtrType(), A.getType(Context)); SVal B = getByName("b"); ASSERT_FALSE(B.getType(Context).isNull()); From 371ee32e01a788a6dfc62cb7b10a94b80fe28425 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 29 Jun 2021 11:03:19 +0100 Subject: [PATCH 156/619] [ARM] Fold extract of ARM_BUILD_VECTOR This adds a small fold for extract (ARM_BUILD_VECTOR) to fold to the original node. This can help simplify the resulting codegen in some cases. Differential Revision: https://reviews.llvm.org/D104860 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 7 + .../CodeGen/ARM/big-endian-vector-callee.ll | 24 +- .../CodeGen/ARM/big-endian-vector-caller.ll | 24 +- llvm/test/CodeGen/Thumb2/mve-shuffle.ll | 90 ++-- .../test/CodeGen/Thumb2/mve-soft-float-abi.ll | 96 ++-- llvm/test/CodeGen/Thumb2/mve-vld3.ll | 454 ++++++++--------- llvm/test/CodeGen/Thumb2/mve-vld4.ll | 82 ++-- llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll | 148 +++--- llvm/test/CodeGen/Thumb2/mve-vst3.ll | 460 +++++++++--------- llvm/test/CodeGen/Thumb2/mve-vst4.ll | 95 ++-- 10 files changed, 729 insertions(+), 751 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 0bd4306309f28..43b8cec412f85 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14659,6 +14659,13 @@ static SDValue PerformExtractEltCombine(SDNode *N, return X; } + // extract ARM_BUILD_VECTOR -> x + if (Op0->getOpcode() == ARMISD::BUILD_VECTOR && + isa(N->getOperand(1)) && + N->getConstantOperandVal(1) < Op0.getNumOperands()) { + return Op0.getOperand(N->getConstantOperandVal(1)); + } + // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b if (Op0.getValueType() == MVT::v4i32 && isa(N->getOperand(1)) && diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-callee.ll b/llvm/test/CodeGen/ARM/big-endian-vector-callee.ll index c0c9d71e197f9..cb1da99d3fc37 100644 --- a/llvm/test/CodeGen/ARM/big-endian-vector-callee.ll +++ b/llvm/test/CodeGen/ARM/big-endian-vector-callee.ll @@ -1290,10 +1290,10 @@ define <2 x double> @test_v2f64_f128(fp128 %p) { ; SOFT-NEXT: vmov.32 d17[1], r3 ; SOFT-NEXT: vmov.32 d16[1], r1 ; SOFT-NEXT: vrev64.32 q8, q8 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: add sp, sp, #16 ; SOFT-NEXT: pop {r11, pc} ; @@ -1326,10 +1326,10 @@ define <2 x double> @test_v2f64_v2i64(<2 x i64> %p) { ; SOFT-NEXT: vmov d17, r3, r2 ; SOFT-NEXT: vmov d16, r1, r0 ; SOFT-NEXT: vadd.i64 q8, q8, q8 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bx lr ; ; HARD-LABEL: test_v2f64_v2i64: @@ -1352,10 +1352,10 @@ define <2 x double> @test_v2f64_v4f32(<4 x float> %p) { ; SOFT-NEXT: vrev64.32 q8, q8 ; SOFT-NEXT: vadd.f32 q8, q8, q8 ; SOFT-NEXT: vrev64.32 q8, q8 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bx lr ; ; HARD-LABEL: test_v2f64_v4f32: @@ -1380,10 +1380,10 @@ define <2 x double> @test_v2f64_v4i32(<4 x i32> %p) { ; SOFT-NEXT: vrev64.32 q8, q8 ; SOFT-NEXT: vadd.i32 q8, q8, q8 ; SOFT-NEXT: vrev64.32 q8, q8 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bx lr ; ; HARD-LABEL: test_v2f64_v4i32: @@ -1408,10 +1408,10 @@ define <2 x double> @test_v2f64_v8i16(<8 x i16> %p) { ; SOFT-NEXT: vrev64.16 q8, q8 ; SOFT-NEXT: vadd.i16 q8, q8, q8 ; SOFT-NEXT: vrev64.16 q8, q8 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bx lr ; ; HARD-LABEL: test_v2f64_v8i16: @@ -1436,10 +1436,10 @@ define <2 x double> @test_v2f64_v16i8(<16 x i8> %p) { ; SOFT-NEXT: vrev64.8 q8, q8 ; SOFT-NEXT: vadd.i8 q8, q8, q8 ; SOFT-NEXT: vrev64.8 q8, q8 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bx lr ; ; HARD-LABEL: test_v2f64_v16i8: diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll b/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll index bf4e8a918b2df..7aaf4ae0bfb53 100644 --- a/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll +++ b/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll @@ -1686,10 +1686,10 @@ define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) { ; SOFT-NEXT: sub sp, sp, #16 ; SOFT-NEXT: vld1.64 {d16, d17}, [r0] ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bl test_f128_v2f64_helper ; SOFT-NEXT: stm sp, {r0, r1, r2, r3} ; SOFT-NEXT: bl __addtf3 @@ -2272,10 +2272,10 @@ define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) { ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: vld1.64 {d16, d17}, [r0] ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bl test_v2i64_v2f64_helper ; SOFT-NEXT: vmov d17, r3, r2 ; SOFT-NEXT: vmov d16, r1, r0 @@ -2528,10 +2528,10 @@ define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) { ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: vld1.64 {d16, d17}, [r0] ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bl test_v4f32_v2f64_helper ; SOFT-NEXT: vmov d17, r3, r2 ; SOFT-NEXT: vmov d16, r1, r0 @@ -2800,10 +2800,10 @@ define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) { ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: vld1.64 {d16, d17}, [r0] ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bl test_v4i32_v2f64_helper ; SOFT-NEXT: vmov d17, r3, r2 ; SOFT-NEXT: vmov d16, r1, r0 @@ -3072,10 +3072,10 @@ define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) { ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: vld1.64 {d16, d17}, [r0] ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bl test_v8i16_v2f64_helper ; SOFT-NEXT: vmov d17, r3, r2 ; SOFT-NEXT: vmov d16, r1, r0 @@ -3344,10 +3344,10 @@ define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) { ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: vld1.64 {d16, d17}, [r0] ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: vadd.f64 d19, d17, d17 ; SOFT-NEXT: vadd.f64 d18, d16, d16 +; SOFT-NEXT: vadd.f64 d16, d17, d17 ; SOFT-NEXT: vmov r1, r0, d18 -; SOFT-NEXT: vmov r3, r2, d19 +; SOFT-NEXT: vmov r3, r2, d16 ; SOFT-NEXT: bl test_v16i8_v2f64_helper ; SOFT-NEXT: vmov d17, r3, r2 ; SOFT-NEXT: vmov d16, r1, r0 diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll index 46c3cebb76860..b66e7b24536cf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -164,23 +164,22 @@ define arm_aapcs_vfpcc <4 x i32> @shuffle4step_i32(<16 x i32> %src) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov.f32 s18, s11 +; CHECK-NEXT: vmov.f32 s16, s3 ; CHECK-NEXT: vmov.f32 s20, s2 -; CHECK-NEXT: vmov.f32 s19, s15 +; CHECK-NEXT: vmov.f32 s17, s7 ; CHECK-NEXT: vmov.f32 s21, s6 -; CHECK-NEXT: vmov.f32 s16, s3 -; CHECK-NEXT: vmov.f32 s11, s14 +; CHECK-NEXT: vmov.f32 s18, s11 ; CHECK-NEXT: vmov.f32 s22, s10 -; CHECK-NEXT: vmov.f32 s17, s7 +; CHECK-NEXT: vmov.f32 s19, s15 ; CHECK-NEXT: vmov.f32 s23, s14 ; CHECK-NEXT: vadd.i32 q4, q5, q4 -; CHECK-NEXT: vmov.f32 s22, s9 -; CHECK-NEXT: vmov.f32 s23, s13 ; CHECK-NEXT: vmov.f32 s20, s1 -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov.f32 s3, s12 ; CHECK-NEXT: vmov.f32 s21, s5 ; CHECK-NEXT: vmov.f32 s1, s4 +; CHECK-NEXT: vmov.f32 s22, s9 +; CHECK-NEXT: vmov.f32 s2, s8 +; CHECK-NEXT: vmov.f32 s23, s13 +; CHECK-NEXT: vmov.f32 s3, s12 ; CHECK-NEXT: vadd.i32 q0, q0, q5 ; CHECK-NEXT: vadd.i32 q0, q0, q4 ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -359,36 +358,36 @@ entry: define arm_aapcs_vfpcc <8 x i16> @shuffle3step_i16(<32 x i16> %src) { ; CHECK-LABEL: shuffle3step_i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmovx.f16 s16, s2 -; CHECK-NEXT: vmov.f32 s12, s1 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vmovx.f16 s16, s1 +; CHECK-NEXT: vmov.f32 s12, s0 ; CHECK-NEXT: vins.f16 s12, s16 -; CHECK-NEXT: vmovx.f16 s16, s5 -; CHECK-NEXT: vmov.f32 s13, s4 -; CHECK-NEXT: vmovx.f16 s20, s11 +; CHECK-NEXT: vmovx.f16 s16, s4 +; CHECK-NEXT: vmov.f32 s13, s3 +; CHECK-NEXT: vmovx.f16 s20, s5 ; CHECK-NEXT: vins.f16 s13, s16 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vins.f16 s19, s20 -; CHECK-NEXT: vmov.f32 s14, s7 -; CHECK-NEXT: vmov.f32 s18, s8 +; CHECK-NEXT: vmovx.f16 s16, s7 +; CHECK-NEXT: vmov.f32 s14, s6 ; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov q5, q4 -; CHECK-NEXT: vmovnb.i32 q5, q3 -; CHECK-NEXT: vmov.f32 s14, s22 -; CHECK-NEXT: vmovx.f16 s20, s1 -; CHECK-NEXT: vmov.f32 s15, s19 -; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: vins.f16 s16, s20 -; CHECK-NEXT: vmovx.f16 s20, s4 -; CHECK-NEXT: vmov.f32 s17, s3 +; CHECK-NEXT: vins.f16 s14, s16 +; CHECK-NEXT: vmovx.f16 s16, s2 +; CHECK-NEXT: vins.f16 s1, s16 +; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vins.f16 s17, s20 -; CHECK-NEXT: vmovx.f16 s20, s7 -; CHECK-NEXT: vmov.f32 s18, s6 -; CHECK-NEXT: vins.f16 s18, s20 ; CHECK-NEXT: vmovx.f16 s20, s10 -; CHECK-NEXT: vmov.f32 s19, s9 -; CHECK-NEXT: vins.f16 s19, s20 +; CHECK-NEXT: vmov.f32 s15, s9 +; CHECK-NEXT: vins.f16 s15, s20 +; CHECK-NEXT: vmovx.f16 s20, s11 +; CHECK-NEXT: vins.f16 s10, s20 +; CHECK-NEXT: vmov.f32 s16, s1 +; CHECK-NEXT: vmov.f32 s23, s10 +; CHECK-NEXT: vmov.f32 s22, s8 +; CHECK-NEXT: vmov.f32 s18, s7 +; CHECK-NEXT: vmov q6, q5 +; CHECK-NEXT: vmovnb.i32 q6, q4 +; CHECK-NEXT: vmov.f32 s18, s26 +; CHECK-NEXT: vmov.f32 s19, s23 ; CHECK-NEXT: vmovx.f16 s20, s0 ; CHECK-NEXT: vins.f16 s20, s2 ; CHECK-NEXT: vmovx.f16 s21, s3 @@ -401,9 +400,9 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle3step_i16(<32 x i16> %src) { ; CHECK-NEXT: vmovnb.i32 q1, q5 ; CHECK-NEXT: vmov.f32 s22, s6 ; CHECK-NEXT: vmov.f32 s23, s3 -; CHECK-NEXT: vadd.i16 q0, q4, q5 -; CHECK-NEXT: vadd.i16 q0, q0, q3 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vadd.i16 q0, q3, q5 +; CHECK-NEXT: vadd.i16 q0, q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: bx lr entry: %s1 = shufflevector <32 x i16> %src, <32 x i16> undef, <8 x i32> @@ -1193,23 +1192,22 @@ define arm_aapcs_vfpcc <4 x float> @shuffle4step_f32(<16 x float> %src) { ; CHECKFP: @ %bb.0: @ %entry ; CHECKFP-NEXT: .vsave {d8, d9, d10, d11} ; CHECKFP-NEXT: vpush {d8, d9, d10, d11} -; CHECKFP-NEXT: vmov.f32 s18, s11 +; CHECKFP-NEXT: vmov.f32 s16, s3 ; CHECKFP-NEXT: vmov.f32 s20, s2 -; CHECKFP-NEXT: vmov.f32 s19, s15 +; CHECKFP-NEXT: vmov.f32 s17, s7 ; CHECKFP-NEXT: vmov.f32 s21, s6 -; CHECKFP-NEXT: vmov.f32 s16, s3 -; CHECKFP-NEXT: vmov.f32 s11, s14 +; CHECKFP-NEXT: vmov.f32 s18, s11 ; CHECKFP-NEXT: vmov.f32 s22, s10 -; CHECKFP-NEXT: vmov.f32 s17, s7 +; CHECKFP-NEXT: vmov.f32 s19, s15 ; CHECKFP-NEXT: vmov.f32 s23, s14 ; CHECKFP-NEXT: vadd.f32 q4, q5, q4 -; CHECKFP-NEXT: vmov.f32 s22, s9 -; CHECKFP-NEXT: vmov.f32 s23, s13 ; CHECKFP-NEXT: vmov.f32 s20, s1 -; CHECKFP-NEXT: vmov.f32 s2, s8 -; CHECKFP-NEXT: vmov.f32 s3, s12 ; CHECKFP-NEXT: vmov.f32 s21, s5 ; CHECKFP-NEXT: vmov.f32 s1, s4 +; CHECKFP-NEXT: vmov.f32 s22, s9 +; CHECKFP-NEXT: vmov.f32 s2, s8 +; CHECKFP-NEXT: vmov.f32 s23, s13 +; CHECKFP-NEXT: vmov.f32 s3, s12 ; CHECKFP-NEXT: vadd.f32 q0, q0, q5 ; CHECKFP-NEXT: vadd.f32 q0, q0, q4 ; CHECKFP-NEXT: vpop {d8, d9, d10, d11} diff --git a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll index 1fdb6d84e9ca8..488a20bc9602f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll @@ -348,24 +348,27 @@ entry: define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) { ; CHECK-MVE-LABEL: vector_add_f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .save {r4, r5, r7, lr} -; CHECK-MVE-NEXT: push {r4, r5, r7, lr} -; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-MVE-NEXT: vmov d13, r2, r3 -; CHECK-MVE-NEXT: vmov d12, r0, r1 -; CHECK-MVE-NEXT: add r1, sp, #64 -; CHECK-MVE-NEXT: vldrw.u32 q5, [r1] -; CHECK-MVE-NEXT: vmov r4, r0, d13 -; CHECK-MVE-NEXT: vmov r5, r1, d11 +; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-MVE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-MVE-NEXT: .pad #4 +; CHECK-MVE-NEXT: sub sp, #4 +; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} +; CHECK-MVE-NEXT: mov r4, r0 +; CHECK-MVE-NEXT: add r0, sp, #56 +; CHECK-MVE-NEXT: vldrw.u32 q5, [r0] +; CHECK-MVE-NEXT: mov r6, r1 +; CHECK-MVE-NEXT: mov r0, r3 +; CHECK-MVE-NEXT: mov r5, r2 +; CHECK-MVE-NEXT: vmov r7, r1, d11 ; CHECK-MVE-NEXT: bl __aeabi_fadd ; CHECK-MVE-NEXT: vmov s19, r0 -; CHECK-MVE-NEXT: mov r0, r4 -; CHECK-MVE-NEXT: mov r1, r5 +; CHECK-MVE-NEXT: mov r0, r5 +; CHECK-MVE-NEXT: mov r1, r7 ; CHECK-MVE-NEXT: bl __aeabi_fadd -; CHECK-MVE-NEXT: vmov s18, r0 -; CHECK-MVE-NEXT: vmov r4, r0, d12 ; CHECK-MVE-NEXT: vmov r5, r1, d10 +; CHECK-MVE-NEXT: vmov s18, r0 +; CHECK-MVE-NEXT: mov r0, r6 ; CHECK-MVE-NEXT: bl __aeabi_fadd ; CHECK-MVE-NEXT: vmov s17, r0 ; CHECK-MVE-NEXT: mov r0, r4 @@ -374,8 +377,9 @@ define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) { ; CHECK-MVE-NEXT: vmov s16, r0 ; CHECK-MVE-NEXT: vmov r2, r3, d9 ; CHECK-MVE-NEXT: vmov r0, r1, d8 -; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; CHECK-MVE-NEXT: pop {r4, r5, r7, pc} +; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} +; CHECK-MVE-NEXT: add sp, #4 +; CHECK-MVE-NEXT: pop {r4, r5, r6, r7, pc} ; ; CHECK-BE-LABEL: vector_add_f32: ; CHECK-BE: @ %bb.0: @ %entry @@ -432,10 +436,10 @@ define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK-MVE-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-MVE-NEXT: .pad #4 ; CHECK-MVE-NEXT: sub sp, #4 -; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} +; CHECK-MVE-NEXT: .vsave {d8, d9} +; CHECK-MVE-NEXT: vpush {d8, d9} ; CHECK-MVE-NEXT: mov r5, r0 -; CHECK-MVE-NEXT: add r0, sp, #56 +; CHECK-MVE-NEXT: add r0, sp, #40 ; CHECK-MVE-NEXT: vldrw.u32 q4, [r0] ; CHECK-MVE-NEXT: mov r4, r2 ; CHECK-MVE-NEXT: mov r6, r3 @@ -445,14 +449,14 @@ define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK-MVE-NEXT: mov r1, r6 ; CHECK-MVE-NEXT: bl __aeabi_dadd ; CHECK-MVE-NEXT: vmov r2, r3, d8 -; CHECK-MVE-NEXT: vmov d11, r0, r1 +; CHECK-MVE-NEXT: mov r4, r0 +; CHECK-MVE-NEXT: mov r6, r1 ; CHECK-MVE-NEXT: mov r0, r5 ; CHECK-MVE-NEXT: mov r1, r7 ; CHECK-MVE-NEXT: bl __aeabi_dadd -; CHECK-MVE-NEXT: vmov d10, r0, r1 -; CHECK-MVE-NEXT: vmov r2, r3, d11 -; CHECK-MVE-NEXT: vmov r0, r1, d10 -; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} +; CHECK-MVE-NEXT: mov r2, r4 +; CHECK-MVE-NEXT: mov r3, r6 +; CHECK-MVE-NEXT: vpop {d8, d9} ; CHECK-MVE-NEXT: add sp, #4 ; CHECK-MVE-NEXT: pop {r4, r5, r6, r7, pc} ; @@ -462,10 +466,10 @@ define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-BE-NEXT: .pad #4 ; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-BE-NEXT: vpush {d8, d9, d10, d11} +; CHECK-BE-NEXT: .vsave {d8, d9} +; CHECK-BE-NEXT: vpush {d8, d9} ; CHECK-BE-NEXT: mov r5, r0 -; CHECK-BE-NEXT: add r0, sp, #56 +; CHECK-BE-NEXT: add r0, sp, #40 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0] ; CHECK-BE-NEXT: mov r6, r2 ; CHECK-BE-NEXT: mov r4, r3 @@ -476,14 +480,14 @@ define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK-BE-NEXT: mov r1, r4 ; CHECK-BE-NEXT: bl __aeabi_dadd ; CHECK-BE-NEXT: vmov r3, r2, d8 -; CHECK-BE-NEXT: vmov d11, r1, r0 +; CHECK-BE-NEXT: mov r4, r0 +; CHECK-BE-NEXT: mov r6, r1 ; CHECK-BE-NEXT: mov r0, r5 ; CHECK-BE-NEXT: mov r1, r7 ; CHECK-BE-NEXT: bl __aeabi_dadd -; CHECK-BE-NEXT: vmov d10, r1, r0 -; CHECK-BE-NEXT: vmov r3, r2, d11 -; CHECK-BE-NEXT: vmov r1, r0, d10 -; CHECK-BE-NEXT: vpop {d8, d9, d10, d11} +; CHECK-BE-NEXT: mov r2, r4 +; CHECK-BE-NEXT: mov r3, r6 +; CHECK-BE-NEXT: vpop {d8, d9} ; CHECK-BE-NEXT: add sp, #4 ; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} ; @@ -495,24 +499,22 @@ define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK-FP-NEXT: sub sp, #4 ; CHECK-FP-NEXT: .vsave {d8, d9} ; CHECK-FP-NEXT: vpush {d8, d9} -; CHECK-FP-NEXT: mov r5, r0 -; CHECK-FP-NEXT: add r0, sp, #40 -; CHECK-FP-NEXT: vldrw.u32 q4, [r0] -; CHECK-FP-NEXT: mov r4, r2 -; CHECK-FP-NEXT: mov r6, r3 -; CHECK-FP-NEXT: mov r7, r1 -; CHECK-FP-NEXT: vmov r2, r3, d9 -; CHECK-FP-NEXT: mov r0, r4 -; CHECK-FP-NEXT: mov r1, r6 -; CHECK-FP-NEXT: bl __aeabi_dadd +; CHECK-FP-NEXT: mov r5, r2 +; CHECK-FP-NEXT: add r2, sp, #40 +; CHECK-FP-NEXT: vldrw.u32 q4, [r2] +; CHECK-FP-NEXT: mov r4, r3 ; CHECK-FP-NEXT: vmov r2, r3, d8 -; CHECK-FP-NEXT: vmov d9, r0, r1 -; CHECK-FP-NEXT: mov r0, r5 -; CHECK-FP-NEXT: mov r1, r7 ; CHECK-FP-NEXT: bl __aeabi_dadd -; CHECK-FP-NEXT: vmov d8, r0, r1 ; CHECK-FP-NEXT: vmov r2, r3, d9 -; CHECK-FP-NEXT: vmov r0, r1, d8 +; CHECK-FP-NEXT: mov r6, r0 +; CHECK-FP-NEXT: mov r7, r1 +; CHECK-FP-NEXT: mov r0, r5 +; CHECK-FP-NEXT: mov r1, r4 +; CHECK-FP-NEXT: bl __aeabi_dadd +; CHECK-FP-NEXT: mov r2, r0 +; CHECK-FP-NEXT: mov r3, r1 +; CHECK-FP-NEXT: mov r0, r6 +; CHECK-FP-NEXT: mov r1, r7 ; CHECK-FP-NEXT: vpop {d8, d9} ; CHECK-FP-NEXT: add sp, #4 ; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-vld3.ll b/llvm/test/CodeGen/Thumb2/mve-vld3.ll index 423f796e97753..b998d62b0d9c6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vld3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vld3.ll @@ -39,20 +39,20 @@ define void @vld3_v4i32(<12 x i32> *%src, <4 x i32> *%dst) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] -; CHECK-NEXT: vmov.f64 d6, d2 -; CHECK-NEXT: vmov.f32 s16, s5 -; CHECK-NEXT: vmov.f32 s13, s7 -; CHECK-NEXT: vmov.f32 s17, s0 -; CHECK-NEXT: vmov.f32 s14, s2 -; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vldrw.u32 q4, [r0, #32] +; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s14, s3 ; CHECK-NEXT: vmov.f32 s0, s6 -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s15, s9 -; CHECK-NEXT: vadd.i32 q3, q3, q4 -; CHECK-NEXT: vmov.f32 s3, s11 -; CHECK-NEXT: vadd.i32 q0, q3, q0 +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.i32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vadd.i32 q0, q2, q0 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr @@ -74,36 +74,36 @@ define void @vld3_v8i32(<24 x i32> *%src, <8 x i32> *%dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] ; CHECK-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-NEXT: vldrw.u32 q2, [r0, #80] -; CHECK-NEXT: vmov.f64 d6, d2 -; CHECK-NEXT: vmov.f32 s16, s5 -; CHECK-NEXT: vmov.f32 s13, s7 -; CHECK-NEXT: vmov.f32 s17, s0 -; CHECK-NEXT: vmov.f32 s14, s2 -; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] +; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s14, s3 ; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s15, s9 -; CHECK-NEXT: vmov.f32 s3, s11 -; CHECK-NEXT: vadd.i32 q3, q3, q4 -; CHECK-NEXT: vadd.i32 q0, q3, q0 -; CHECK-NEXT: vldrw.u32 q3, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.i32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vadd.i32 q0, q2, q0 +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] ; CHECK-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-NEXT: vmov.f32 s16, s13 -; CHECK-NEXT: vmov.f64 d10, d6 +; CHECK-NEXT: vmov.f32 s16, s9 +; CHECK-NEXT: vmov.f64 d10, d4 ; CHECK-NEXT: vmov.f32 s17, s4 -; CHECK-NEXT: vmov.f32 s21, s15 +; CHECK-NEXT: vmov.f32 s21, s11 ; CHECK-NEXT: vmov.f32 s18, s7 ; CHECK-NEXT: vmov.f32 s22, s6 -; CHECK-NEXT: vmov.f32 s4, s14 -; CHECK-NEXT: vmov.f32 s6, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s23, s9 +; CHECK-NEXT: vmov.f32 s4, s10 +; CHECK-NEXT: vmov.f32 s6, s12 +; CHECK-NEXT: vmov.f32 s19, s14 +; CHECK-NEXT: vmov.f32 s23, s13 ; CHECK-NEXT: vadd.i32 q4, q5, q4 -; CHECK-NEXT: vmov.f32 s7, s11 +; CHECK-NEXT: vmov.f32 s7, s15 ; CHECK-NEXT: vadd.i32 q1, q4, q1 ; CHECK-NEXT: vstrw.32 q1, [r1] ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -126,70 +126,70 @@ define void @vld3_v16i32(<48 x i32> *%src, <16 x i32> *%dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] ; CHECK-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-NEXT: vldrw.u32 q2, [r0, #80] -; CHECK-NEXT: vmov.f64 d6, d2 -; CHECK-NEXT: vmov.f32 s16, s5 -; CHECK-NEXT: vmov.f32 s13, s7 -; CHECK-NEXT: vmov.f32 s17, s0 -; CHECK-NEXT: vmov.f32 s14, s2 -; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] +; CHECK-NEXT: vldrw.u32 q6, [r0, #176] +; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s14, s3 ; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s15, s9 -; CHECK-NEXT: vmov.f32 s3, s11 -; CHECK-NEXT: vadd.i32 q3, q3, q4 -; CHECK-NEXT: vadd.i32 q0, q3, q0 -; CHECK-NEXT: vldrw.u32 q3, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] -; CHECK-NEXT: vmov.f32 s16, s13 -; CHECK-NEXT: vmov.f64 d10, d6 +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.i32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vadd.i32 q0, q2, q0 +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] +; CHECK-NEXT: vmov.f32 s16, s9 +; CHECK-NEXT: vmov.f64 d10, d4 ; CHECK-NEXT: vmov.f32 s17, s4 -; CHECK-NEXT: vmov.f32 s21, s15 +; CHECK-NEXT: vmov.f32 s21, s11 ; CHECK-NEXT: vmov.f32 s18, s7 ; CHECK-NEXT: vmov.f32 s22, s6 -; CHECK-NEXT: vmov.f32 s4, s14 -; CHECK-NEXT: vldrw.u32 q3, [r0, #144] -; CHECK-NEXT: vmov.f32 s6, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s23, s9 -; CHECK-NEXT: vmov.f32 s7, s11 +; CHECK-NEXT: vmov.f32 s4, s10 ; CHECK-NEXT: vldrw.u32 q2, [r0, #160] +; CHECK-NEXT: vmov.f32 s6, s12 +; CHECK-NEXT: vmov.f32 s19, s14 +; CHECK-NEXT: vmov.f32 s23, s13 +; CHECK-NEXT: vmov.f32 s7, s15 +; CHECK-NEXT: vldrw.u32 q3, [r0, #144] ; CHECK-NEXT: vadd.i32 q4, q5, q4 -; CHECK-NEXT: vmov.f64 d10, d6 +; CHECK-NEXT: vmov.f32 s20, s13 ; CHECK-NEXT: vadd.i32 q1, q4, q1 -; CHECK-NEXT: vldrw.u32 q4, [r0, #176] -; CHECK-NEXT: vmov.f32 s24, s13 -; CHECK-NEXT: vmov.f32 s21, s15 -; CHECK-NEXT: vmov.f32 s25, s8 -; CHECK-NEXT: vmov.f32 s22, s10 -; CHECK-NEXT: vmov.f32 s26, s11 +; CHECK-NEXT: vmov.f64 d8, d6 +; CHECK-NEXT: vmov.f32 s17, s15 +; CHECK-NEXT: vmov.f32 s21, s8 +; CHECK-NEXT: vmov.f32 s18, s10 +; CHECK-NEXT: vmov.f32 s22, s11 ; CHECK-NEXT: vmov.f32 s8, s14 ; CHECK-NEXT: vldrw.u32 q3, [r0, #112] -; CHECK-NEXT: vmov.f32 s10, s16 -; CHECK-NEXT: vmov.f32 s27, s18 -; CHECK-NEXT: vmov.f32 s23, s17 -; CHECK-NEXT: vmov.f32 s11, s19 -; CHECK-NEXT: vadd.i32 q5, q5, q6 -; CHECK-NEXT: vadd.i32 q2, q5, q2 -; CHECK-NEXT: vldrw.u32 q5, [r0, #96] -; CHECK-NEXT: vldrw.u32 q4, [r0, #128] +; CHECK-NEXT: vmov.f32 s10, s24 +; CHECK-NEXT: vmov.f32 s23, s26 +; CHECK-NEXT: vmov.f32 s19, s25 +; CHECK-NEXT: vadd.i32 q4, q4, q5 +; CHECK-NEXT: vmov.f32 s11, s27 +; CHECK-NEXT: vadd.i32 q2, q4, q2 +; CHECK-NEXT: vldrw.u32 q4, [r0, #96] +; CHECK-NEXT: vldrw.u32 q5, [r0, #128] ; CHECK-NEXT: vstrw.32 q2, [r1, #48] -; CHECK-NEXT: vmov.f32 s24, s21 +; CHECK-NEXT: vmov.f32 s24, s17 ; CHECK-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-NEXT: vmov.f64 d14, d10 +; CHECK-NEXT: vmov.f64 d14, d8 ; CHECK-NEXT: vstrw.32 q1, [r1] ; CHECK-NEXT: vmov.f32 s25, s12 -; CHECK-NEXT: vmov.f32 s29, s23 +; CHECK-NEXT: vmov.f32 s29, s19 ; CHECK-NEXT: vmov.f32 s26, s15 ; CHECK-NEXT: vmov.f32 s30, s14 -; CHECK-NEXT: vmov.f32 s12, s22 -; CHECK-NEXT: vmov.f32 s14, s16 -; CHECK-NEXT: vmov.f32 s27, s18 -; CHECK-NEXT: vmov.f32 s31, s17 +; CHECK-NEXT: vmov.f32 s12, s18 +; CHECK-NEXT: vmov.f32 s14, s20 +; CHECK-NEXT: vmov.f32 s27, s22 +; CHECK-NEXT: vmov.f32 s31, s21 ; CHECK-NEXT: vadd.i32 q6, q7, q6 -; CHECK-NEXT: vmov.f32 s15, s19 +; CHECK-NEXT: vmov.f32 s15, s23 ; CHECK-NEXT: vadd.i32 q3, q6, q3 ; CHECK-NEXT: vstrw.32 q3, [r1, #32] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} @@ -358,53 +358,53 @@ define void @vld3_v16i16(<48 x i16> *%src, <16 x i16> *%dst) { ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-NEXT: vmovx.f16 s8, s6 -; CHECK-NEXT: vmov.f32 s0, s5 +; CHECK-NEXT: vmov.f64 d0, d2 +; CHECK-NEXT: vmovx.f16 s8, s5 ; CHECK-NEXT: vins.f16 s0, s8 ; CHECK-NEXT: vldrw.u32 q2, [r0, #64] -; CHECK-NEXT: vmovx.f16 s12, s9 -; CHECK-NEXT: vmov.f32 s1, s8 +; CHECK-NEXT: vmov.f32 s1, s7 +; CHECK-NEXT: vmovx.f16 s12, s8 +; CHECK-NEXT: vmovx.f16 s16, s9 ; CHECK-NEXT: vins.f16 s1, s12 -; CHECK-NEXT: vldrw.u32 q3, [r0, #80] -; CHECK-NEXT: vmov.f32 s2, s11 +; CHECK-NEXT: vmovx.f16 s12, s11 +; CHECK-NEXT: vmov.f32 s2, s10 ; CHECK-NEXT: vmov.u16 r2, q2[5] -; CHECK-NEXT: vmovx.f16 s20, s15 -; CHECK-NEXT: vmov.f32 s19, s14 -; CHECK-NEXT: vins.f16 s19, s20 -; CHECK-NEXT: vmov.f32 s18, s12 -; CHECK-NEXT: vmov q5, q4 -; CHECK-NEXT: vmovnb.i32 q5, q0 -; CHECK-NEXT: vmov.f32 s2, s22 -; CHECK-NEXT: vmovx.f16 s20, s5 -; CHECK-NEXT: vmov.f32 s3, s19 -; CHECK-NEXT: vmov.f64 d8, d2 -; CHECK-NEXT: vins.f16 s16, s20 -; CHECK-NEXT: vmovx.f16 s20, s8 -; CHECK-NEXT: vmov.f32 s17, s7 -; CHECK-NEXT: vins.f16 s17, s20 -; CHECK-NEXT: vmovx.f16 s20, s11 -; CHECK-NEXT: vmov.f32 s18, s10 +; CHECK-NEXT: vins.f16 s2, s12 +; CHECK-NEXT: vmovx.f16 s12, s6 +; CHECK-NEXT: vins.f16 s5, s12 +; CHECK-NEXT: vmov.f32 s13, s8 +; CHECK-NEXT: vins.f16 s13, s16 +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmovx.f16 s20, s18 +; CHECK-NEXT: vmov.f32 s3, s17 +; CHECK-NEXT: vins.f16 s3, s20 +; CHECK-NEXT: vmovx.f16 s20, s19 ; CHECK-NEXT: vins.f16 s18, s20 -; CHECK-NEXT: vmovx.f16 s20, s14 -; CHECK-NEXT: vmov.f32 s19, s13 -; CHECK-NEXT: vins.f16 s19, s20 +; CHECK-NEXT: vmov.f32 s14, s11 +; CHECK-NEXT: vmov.f32 s23, s18 +; CHECK-NEXT: vmov.f32 s22, s16 +; CHECK-NEXT: vmov q6, q5 +; CHECK-NEXT: vmovnb.i32 q6, q3 +; CHECK-NEXT: vmov.f32 s14, s26 +; CHECK-NEXT: vmov.f32 s15, s23 ; CHECK-NEXT: vmovx.f16 s20, s4 ; CHECK-NEXT: vins.f16 s20, s6 ; CHECK-NEXT: vmovx.f16 s21, s7 -; CHECK-NEXT: vins.f16 s6, s12 -; CHECK-NEXT: vmovx.f16 s7, s13 +; CHECK-NEXT: vins.f16 s6, s16 +; CHECK-NEXT: vmovx.f16 s7, s17 ; CHECK-NEXT: vins.f16 s21, s9 -; CHECK-NEXT: vins.f16 s7, s15 +; CHECK-NEXT: vins.f16 s7, s19 ; CHECK-NEXT: vmov.16 q5[4], r2 ; CHECK-NEXT: vmov q2, q1 ; CHECK-NEXT: vmovnb.i32 q2, q5 ; CHECK-NEXT: vmov.f32 s22, s10 ; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vmov.f32 s23, s7 -; CHECK-NEXT: vadd.i16 q1, q4, q5 -; CHECK-NEXT: vmovx.f16 s12, s10 -; CHECK-NEXT: vadd.i16 q0, q1, q0 +; CHECK-NEXT: vadd.i16 q0, q0, q5 ; CHECK-NEXT: vmov.f32 s4, s9 +; CHECK-NEXT: vadd.i16 q0, q0, q3 +; CHECK-NEXT: vmovx.f16 s12, s10 ; CHECK-NEXT: vins.f16 s4, s12 ; CHECK-NEXT: vldrw.u32 q3, [r0, #16] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill @@ -900,20 +900,20 @@ define void @vld3_v4f32(<12 x float> *%src, <4 x float> *%dst) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] -; CHECK-NEXT: vmov.f64 d6, d2 -; CHECK-NEXT: vmov.f32 s16, s5 -; CHECK-NEXT: vmov.f32 s13, s7 -; CHECK-NEXT: vmov.f32 s17, s0 -; CHECK-NEXT: vmov.f32 s14, s2 -; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vldrw.u32 q4, [r0, #32] +; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s14, s3 ; CHECK-NEXT: vmov.f32 s0, s6 -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s15, s9 -; CHECK-NEXT: vadd.f32 q3, q3, q4 -; CHECK-NEXT: vmov.f32 s3, s11 -; CHECK-NEXT: vadd.f32 q0, q3, q0 +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.f32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vadd.f32 q0, q2, q0 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr @@ -935,36 +935,36 @@ define void @vld3_v8f32(<24 x float> *%src, <8 x float> *%dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] ; CHECK-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-NEXT: vldrw.u32 q2, [r0, #80] -; CHECK-NEXT: vmov.f64 d6, d2 -; CHECK-NEXT: vmov.f32 s16, s5 -; CHECK-NEXT: vmov.f32 s13, s7 -; CHECK-NEXT: vmov.f32 s17, s0 -; CHECK-NEXT: vmov.f32 s14, s2 -; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] +; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s14, s3 ; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s15, s9 -; CHECK-NEXT: vmov.f32 s3, s11 -; CHECK-NEXT: vadd.f32 q3, q3, q4 -; CHECK-NEXT: vadd.f32 q0, q3, q0 -; CHECK-NEXT: vldrw.u32 q3, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.f32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vadd.f32 q0, q2, q0 +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] ; CHECK-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-NEXT: vmov.f32 s16, s13 -; CHECK-NEXT: vmov.f64 d10, d6 +; CHECK-NEXT: vmov.f32 s16, s9 +; CHECK-NEXT: vmov.f64 d10, d4 ; CHECK-NEXT: vmov.f32 s17, s4 -; CHECK-NEXT: vmov.f32 s21, s15 +; CHECK-NEXT: vmov.f32 s21, s11 ; CHECK-NEXT: vmov.f32 s18, s7 ; CHECK-NEXT: vmov.f32 s22, s6 -; CHECK-NEXT: vmov.f32 s4, s14 -; CHECK-NEXT: vmov.f32 s6, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s23, s9 +; CHECK-NEXT: vmov.f32 s4, s10 +; CHECK-NEXT: vmov.f32 s6, s12 +; CHECK-NEXT: vmov.f32 s19, s14 +; CHECK-NEXT: vmov.f32 s23, s13 ; CHECK-NEXT: vadd.f32 q4, q5, q4 -; CHECK-NEXT: vmov.f32 s7, s11 +; CHECK-NEXT: vmov.f32 s7, s15 ; CHECK-NEXT: vadd.f32 q1, q4, q1 ; CHECK-NEXT: vstrw.32 q1, [r1] ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -987,70 +987,70 @@ define void @vld3_v16f32(<48 x float> *%src, <16 x float> *%dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vldrw.u32 q1, [r0, #48] ; CHECK-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-NEXT: vldrw.u32 q2, [r0, #80] -; CHECK-NEXT: vmov.f64 d6, d2 -; CHECK-NEXT: vmov.f32 s16, s5 -; CHECK-NEXT: vmov.f32 s13, s7 -; CHECK-NEXT: vmov.f32 s17, s0 -; CHECK-NEXT: vmov.f32 s14, s2 -; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] +; CHECK-NEXT: vldrw.u32 q6, [r0, #176] +; CHECK-NEXT: vmov.f64 d4, d2 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s14, s3 ; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s15, s9 -; CHECK-NEXT: vmov.f32 s3, s11 -; CHECK-NEXT: vadd.f32 q3, q3, q4 -; CHECK-NEXT: vadd.f32 q0, q3, q0 -; CHECK-NEXT: vldrw.u32 q3, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] -; CHECK-NEXT: vmov.f32 s16, s13 -; CHECK-NEXT: vmov.f64 d10, d6 +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.f32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vadd.f32 q0, q2, q0 +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] +; CHECK-NEXT: vmov.f32 s16, s9 +; CHECK-NEXT: vmov.f64 d10, d4 ; CHECK-NEXT: vmov.f32 s17, s4 -; CHECK-NEXT: vmov.f32 s21, s15 +; CHECK-NEXT: vmov.f32 s21, s11 ; CHECK-NEXT: vmov.f32 s18, s7 ; CHECK-NEXT: vmov.f32 s22, s6 -; CHECK-NEXT: vmov.f32 s4, s14 -; CHECK-NEXT: vldrw.u32 q3, [r0, #144] -; CHECK-NEXT: vmov.f32 s6, s8 -; CHECK-NEXT: vmov.f32 s19, s10 -; CHECK-NEXT: vmov.f32 s23, s9 -; CHECK-NEXT: vmov.f32 s7, s11 +; CHECK-NEXT: vmov.f32 s4, s10 ; CHECK-NEXT: vldrw.u32 q2, [r0, #160] +; CHECK-NEXT: vmov.f32 s6, s12 +; CHECK-NEXT: vmov.f32 s19, s14 +; CHECK-NEXT: vmov.f32 s23, s13 +; CHECK-NEXT: vmov.f32 s7, s15 +; CHECK-NEXT: vldrw.u32 q3, [r0, #144] ; CHECK-NEXT: vadd.f32 q4, q5, q4 -; CHECK-NEXT: vmov.f64 d10, d6 +; CHECK-NEXT: vmov.f32 s20, s13 ; CHECK-NEXT: vadd.f32 q1, q4, q1 -; CHECK-NEXT: vldrw.u32 q4, [r0, #176] -; CHECK-NEXT: vmov.f32 s24, s13 -; CHECK-NEXT: vmov.f32 s21, s15 -; CHECK-NEXT: vmov.f32 s25, s8 -; CHECK-NEXT: vmov.f32 s22, s10 -; CHECK-NEXT: vmov.f32 s26, s11 +; CHECK-NEXT: vmov.f64 d8, d6 +; CHECK-NEXT: vmov.f32 s17, s15 +; CHECK-NEXT: vmov.f32 s21, s8 +; CHECK-NEXT: vmov.f32 s18, s10 +; CHECK-NEXT: vmov.f32 s22, s11 ; CHECK-NEXT: vmov.f32 s8, s14 ; CHECK-NEXT: vldrw.u32 q3, [r0, #112] -; CHECK-NEXT: vmov.f32 s10, s16 -; CHECK-NEXT: vmov.f32 s27, s18 -; CHECK-NEXT: vmov.f32 s23, s17 -; CHECK-NEXT: vmov.f32 s11, s19 -; CHECK-NEXT: vadd.f32 q5, q5, q6 -; CHECK-NEXT: vadd.f32 q2, q5, q2 -; CHECK-NEXT: vldrw.u32 q5, [r0, #96] -; CHECK-NEXT: vldrw.u32 q4, [r0, #128] +; CHECK-NEXT: vmov.f32 s10, s24 +; CHECK-NEXT: vmov.f32 s23, s26 +; CHECK-NEXT: vmov.f32 s19, s25 +; CHECK-NEXT: vadd.f32 q4, q4, q5 +; CHECK-NEXT: vmov.f32 s11, s27 +; CHECK-NEXT: vadd.f32 q2, q4, q2 +; CHECK-NEXT: vldrw.u32 q4, [r0, #96] +; CHECK-NEXT: vldrw.u32 q5, [r0, #128] ; CHECK-NEXT: vstrw.32 q2, [r1, #48] -; CHECK-NEXT: vmov.f32 s24, s21 +; CHECK-NEXT: vmov.f32 s24, s17 ; CHECK-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-NEXT: vmov.f64 d14, d10 +; CHECK-NEXT: vmov.f64 d14, d8 ; CHECK-NEXT: vstrw.32 q1, [r1] ; CHECK-NEXT: vmov.f32 s25, s12 -; CHECK-NEXT: vmov.f32 s29, s23 +; CHECK-NEXT: vmov.f32 s29, s19 ; CHECK-NEXT: vmov.f32 s26, s15 ; CHECK-NEXT: vmov.f32 s30, s14 -; CHECK-NEXT: vmov.f32 s12, s22 -; CHECK-NEXT: vmov.f32 s14, s16 -; CHECK-NEXT: vmov.f32 s27, s18 -; CHECK-NEXT: vmov.f32 s31, s17 +; CHECK-NEXT: vmov.f32 s12, s18 +; CHECK-NEXT: vmov.f32 s14, s20 +; CHECK-NEXT: vmov.f32 s27, s22 +; CHECK-NEXT: vmov.f32 s31, s21 ; CHECK-NEXT: vadd.f32 q6, q7, q6 -; CHECK-NEXT: vmov.f32 s15, s19 +; CHECK-NEXT: vmov.f32 s15, s23 ; CHECK-NEXT: vadd.f32 q3, q6, q3 ; CHECK-NEXT: vstrw.32 q3, [r1, #32] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} @@ -1147,41 +1147,41 @@ define void @vld3_v8f16(<24 x half> *%src, <8 x half> *%dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: vldrw.u32 q4, [r0, #16] +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] ; CHECK-NEXT: vmovx.f16 s8, s2 ; CHECK-NEXT: vmov.f32 s4, s1 ; CHECK-NEXT: vins.f16 s4, s8 ; CHECK-NEXT: vmovx.f16 s8, s17 ; CHECK-NEXT: vmov.f32 s5, s16 -; CHECK-NEXT: vmovx.f16 s24, s1 +; CHECK-NEXT: vmovx.f16 s20, s15 ; CHECK-NEXT: vins.f16 s5, s8 -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] +; CHECK-NEXT: vmov.f32 s11, s14 +; CHECK-NEXT: vins.f16 s11, s20 ; CHECK-NEXT: vmov.f32 s6, s19 -; CHECK-NEXT: vmovx.f16 s26, s16 -; CHECK-NEXT: vmovx.f16 s20, s11 -; CHECK-NEXT: vmov.f32 s15, s10 -; CHECK-NEXT: vins.f16 s15, s20 -; CHECK-NEXT: vmovx.f16 s20, s8 +; CHECK-NEXT: vmovx.f16 s20, s12 +; CHECK-NEXT: vmov.f32 s28, s18 ; CHECK-NEXT: vins.f16 s6, s20 ; CHECK-NEXT: vmovx.f16 s20, s19 -; CHECK-NEXT: vmov.f32 s28, s18 -; CHECK-NEXT: vmovx.f16 s30, s10 ; CHECK-NEXT: vins.f16 s28, s20 +; CHECK-NEXT: vmovx.f16 s24, s1 ; CHECK-NEXT: vmovx.f16 s20, s0 ; CHECK-NEXT: vins.f16 s0, s24 ; CHECK-NEXT: vins.f16 s20, s2 +; CHECK-NEXT: vmovx.f16 s26, s16 ; CHECK-NEXT: vmovx.f16 s21, s3 ; CHECK-NEXT: vins.f16 s3, s26 ; CHECK-NEXT: vins.f16 s21, s17 -; CHECK-NEXT: vmov.f32 s14, s8 -; CHECK-NEXT: vmovx.f16 s23, s9 +; CHECK-NEXT: vmovx.f16 s30, s14 +; CHECK-NEXT: vmovx.f16 s23, s13 +; CHECK-NEXT: vmov.f32 s10, s12 ; CHECK-NEXT: vmov.f32 s1, s3 -; CHECK-NEXT: vins.f16 s9, s30 -; CHECK-NEXT: vins.f16 s23, s11 -; CHECK-NEXT: vmovx.f16 s22, s18 +; CHECK-NEXT: vins.f16 s13, s30 +; CHECK-NEXT: vins.f16 s23, s15 ; CHECK-NEXT: vmov.f32 s2, s28 -; CHECK-NEXT: vins.f16 s22, s8 -; CHECK-NEXT: vmov.f32 s3, s9 -; CHECK-NEXT: vmov.f32 s7, s15 +; CHECK-NEXT: vmovx.f16 s22, s18 +; CHECK-NEXT: vmov.f32 s3, s13 +; CHECK-NEXT: vins.f16 s22, s12 +; CHECK-NEXT: vmov.f32 s7, s11 ; CHECK-NEXT: vadd.f16 q0, q0, q5 ; CHECK-NEXT: vadd.f16 q0, q0, q1 ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -1204,24 +1204,24 @@ define void @vld3_v16f16(<48 x half> *%src, <16 x half> *%dst) { ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vldrw.u32 q0, [r0, #48] -; CHECK-NEXT: vldrw.u32 q4, [r0, #64] +; CHECK-NEXT: vldrw.u32 q3, [r0, #64] ; CHECK-NEXT: vmovx.f16 s8, s2 ; CHECK-NEXT: vmov.f32 s4, s1 ; CHECK-NEXT: vins.f16 s4, s8 -; CHECK-NEXT: vmovx.f16 s8, s17 -; CHECK-NEXT: vmov.f32 s5, s16 +; CHECK-NEXT: vmovx.f16 s8, s13 +; CHECK-NEXT: vmov.f32 s5, s12 ; CHECK-NEXT: vmovx.f16 s24, s1 ; CHECK-NEXT: vins.f16 s5, s8 ; CHECK-NEXT: vldrw.u32 q2, [r0, #80] -; CHECK-NEXT: vmov.f32 s6, s19 -; CHECK-NEXT: vmovx.f16 s26, s16 +; CHECK-NEXT: vmov.f32 s6, s15 +; CHECK-NEXT: vmovx.f16 s26, s12 ; CHECK-NEXT: vmovx.f16 s20, s11 -; CHECK-NEXT: vmov.f32 s15, s10 -; CHECK-NEXT: vins.f16 s15, s20 +; CHECK-NEXT: vmov.f32 s19, s10 +; CHECK-NEXT: vins.f16 s19, s20 ; CHECK-NEXT: vmovx.f16 s20, s8 ; CHECK-NEXT: vins.f16 s6, s20 -; CHECK-NEXT: vmovx.f16 s20, s19 -; CHECK-NEXT: vmov.f32 s28, s18 +; CHECK-NEXT: vmovx.f16 s20, s15 +; CHECK-NEXT: vmov.f32 s28, s14 ; CHECK-NEXT: vmovx.f16 s30, s10 ; CHECK-NEXT: vins.f16 s28, s20 ; CHECK-NEXT: vmovx.f16 s20, s0 @@ -1229,17 +1229,17 @@ define void @vld3_v16f16(<48 x half> *%src, <16 x half> *%dst) { ; CHECK-NEXT: vins.f16 s20, s2 ; CHECK-NEXT: vmovx.f16 s21, s3 ; CHECK-NEXT: vins.f16 s3, s26 -; CHECK-NEXT: vins.f16 s21, s17 -; CHECK-NEXT: vmov.f32 s14, s8 +; CHECK-NEXT: vins.f16 s21, s13 +; CHECK-NEXT: vmov.f32 s18, s8 ; CHECK-NEXT: vmovx.f16 s23, s9 ; CHECK-NEXT: vmov.f32 s1, s3 ; CHECK-NEXT: vins.f16 s9, s30 ; CHECK-NEXT: vins.f16 s23, s11 -; CHECK-NEXT: vmovx.f16 s22, s18 +; CHECK-NEXT: vmovx.f16 s22, s14 ; CHECK-NEXT: vmov.f32 s2, s28 ; CHECK-NEXT: vins.f16 s22, s8 ; CHECK-NEXT: vmov.f32 s3, s9 -; CHECK-NEXT: vmov.f32 s7, s15 +; CHECK-NEXT: vmov.f32 s7, s19 ; CHECK-NEXT: vadd.f16 q0, q0, q5 ; CHECK-NEXT: vadd.f16 q1, q0, q1 ; CHECK-NEXT: vldrw.u32 q0, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-vld4.ll b/llvm/test/CodeGen/Thumb2/mve-vld4.ll index 92a25743a6b82..c1b984761dcdf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vld4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vld4.ll @@ -193,28 +193,27 @@ define void @vld4_v4i32_align1(<16 x i32> *%src, <4 x i32> *%dst) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: vldrb.u8 q3, [r0, #32] -; CHECK-NEXT: vldrb.u8 q1, [r0, #48] -; CHECK-NEXT: vldrb.u8 q2, [r0, #16] -; CHECK-NEXT: vmov.f32 s18, s15 -; CHECK-NEXT: vmov.f64 d10, d1 -; CHECK-NEXT: vmov.f32 s19, s7 -; CHECK-NEXT: vmov.f32 s21, s10 -; CHECK-NEXT: vmov.f32 s16, s3 -; CHECK-NEXT: vmov.f32 s15, s6 -; CHECK-NEXT: vmov.f32 s22, s14 -; CHECK-NEXT: vmov.f32 s17, s11 -; CHECK-NEXT: vmov.f32 s23, s6 +; CHECK-NEXT: vldrb.u8 q2, [r0] +; CHECK-NEXT: vldrb.u8 q3, [r0, #16] +; CHECK-NEXT: vldrb.u8 q1, [r0, #32] +; CHECK-NEXT: vldrb.u8 q0, [r0, #48] +; CHECK-NEXT: vmov.f32 s16, s11 +; CHECK-NEXT: vmov.f64 d10, d5 +; CHECK-NEXT: vmov.f32 s17, s15 +; CHECK-NEXT: vmov.f32 s21, s14 +; CHECK-NEXT: vmov.f32 s18, s7 +; CHECK-NEXT: vmov.f32 s22, s6 +; CHECK-NEXT: vmov.f32 s19, s3 +; CHECK-NEXT: vmov.f32 s23, s2 ; CHECK-NEXT: vadd.i32 q4, q5, q4 -; CHECK-NEXT: vmov.f32 s22, s13 -; CHECK-NEXT: vmov.f32 s23, s5 -; CHECK-NEXT: vmov.f32 s20, s1 -; CHECK-NEXT: vmov.f32 s2, s12 -; CHECK-NEXT: vmov.f32 s3, s4 -; CHECK-NEXT: vmov.f32 s21, s9 -; CHECK-NEXT: vmov.f32 s1, s8 -; CHECK-NEXT: vadd.i32 q0, q0, q5 +; CHECK-NEXT: vmov.f32 s20, s9 +; CHECK-NEXT: vmov.f32 s21, s13 +; CHECK-NEXT: vmov.f32 s9, s12 +; CHECK-NEXT: vmov.f32 s22, s5 +; CHECK-NEXT: vmov.f32 s10, s4 +; CHECK-NEXT: vmov.f32 s23, s1 +; CHECK-NEXT: vmov.f32 s11, s0 +; CHECK-NEXT: vadd.i32 q0, q2, q5 ; CHECK-NEXT: vadd.i32 q0, q0, q4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -992,28 +991,27 @@ define void @vld4_v4f32_align1(<16 x float> *%src, <4 x float> *%dst) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: vldrb.u8 q3, [r0, #32] -; CHECK-NEXT: vldrb.u8 q1, [r0, #48] -; CHECK-NEXT: vldrb.u8 q2, [r0, #16] -; CHECK-NEXT: vmov.f32 s18, s15 -; CHECK-NEXT: vmov.f64 d10, d1 -; CHECK-NEXT: vmov.f32 s19, s7 -; CHECK-NEXT: vmov.f32 s21, s10 -; CHECK-NEXT: vmov.f32 s16, s3 -; CHECK-NEXT: vmov.f32 s15, s6 -; CHECK-NEXT: vmov.f32 s22, s14 -; CHECK-NEXT: vmov.f32 s17, s11 -; CHECK-NEXT: vmov.f32 s23, s6 +; CHECK-NEXT: vldrb.u8 q2, [r0] +; CHECK-NEXT: vldrb.u8 q3, [r0, #16] +; CHECK-NEXT: vldrb.u8 q1, [r0, #32] +; CHECK-NEXT: vldrb.u8 q0, [r0, #48] +; CHECK-NEXT: vmov.f32 s16, s11 +; CHECK-NEXT: vmov.f64 d10, d5 +; CHECK-NEXT: vmov.f32 s17, s15 +; CHECK-NEXT: vmov.f32 s21, s14 +; CHECK-NEXT: vmov.f32 s18, s7 +; CHECK-NEXT: vmov.f32 s22, s6 +; CHECK-NEXT: vmov.f32 s19, s3 +; CHECK-NEXT: vmov.f32 s23, s2 ; CHECK-NEXT: vadd.f32 q4, q5, q4 -; CHECK-NEXT: vmov.f32 s22, s13 -; CHECK-NEXT: vmov.f32 s23, s5 -; CHECK-NEXT: vmov.f32 s20, s1 -; CHECK-NEXT: vmov.f32 s2, s12 -; CHECK-NEXT: vmov.f32 s3, s4 -; CHECK-NEXT: vmov.f32 s21, s9 -; CHECK-NEXT: vmov.f32 s1, s8 -; CHECK-NEXT: vadd.f32 q0, q0, q5 +; CHECK-NEXT: vmov.f32 s20, s9 +; CHECK-NEXT: vmov.f32 s21, s13 +; CHECK-NEXT: vmov.f32 s9, s12 +; CHECK-NEXT: vmov.f32 s22, s5 +; CHECK-NEXT: vmov.f32 s10, s4 +; CHECK-NEXT: vmov.f32 s23, s1 +; CHECK-NEXT: vmov.f32 s11, s0 +; CHECK-NEXT: vadd.f32 q0, q2, q5 ; CHECK-NEXT: vadd.f32 q0, q0, q4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vpop {d8, d9, d10, d11} diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll index 418c56d7b1c1f..8004aad599b90 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll @@ -190,17 +190,15 @@ entry: define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: sext32_0213_0ext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.f32 s16, s1 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 -; CHECK-NEXT: vmov.f32 s5, s2 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmov.f32 s8, s4 -; CHECK-NEXT: vmov.f32 s10, s5 -; CHECK-NEXT: vmullb.s32 q0, q2, q3 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: vmullb.s32 q1, q2, q3 +; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vmullb.s32 q2, q0, q3 +; CHECK-NEXT: vmullb.s32 q1, q4, q3 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> @@ -215,17 +213,15 @@ entry: define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: sext32_0ext_0213: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.f32 s16, s1 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 -; CHECK-NEXT: vmov.f32 s5, s2 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmov.f32 s8, s4 -; CHECK-NEXT: vmov.f32 s10, s5 -; CHECK-NEXT: vmullb.s32 q0, q3, q2 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: vmullb.s32 q1, q3, q2 +; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vmullb.s32 q2, q3, q0 +; CHECK-NEXT: vmullb.s32 q1, q3, q4 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> @@ -242,18 +238,13 @@ define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s5, s2 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmov.f32 s0, s4 -; CHECK-NEXT: vmov.f32 s2, s5 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov.f32 s8, s5 ; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: umull r2, r5, r3, r0 ; CHECK-NEXT: umull lr, r12, r1, r0 +; CHECK-NEXT: umull r2, r5, r3, r0 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr ; CHECK-NEXT: asrs r2, r0, #31 ; CHECK-NEXT: mla r4, r1, r2, r12 @@ -291,19 +282,14 @@ define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: asrs r4, r0, #31 -; CHECK-NEXT: vmov.f32 s5, s2 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmov.f32 s0, s4 -; CHECK-NEXT: vmov.f32 s2, s5 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov.f32 s8, s6 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov.f32 s8, s5 ; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: umull r2, r5, r0, r3 ; CHECK-NEXT: umull lr, r12, r0, r1 +; CHECK-NEXT: umull r2, r5, r0, r3 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr ; CHECK-NEXT: asrs r2, r1, #31 ; CHECK-NEXT: mla r2, r0, r2, r12 @@ -488,17 +474,15 @@ entry: define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: zext32_0213_0ext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.f32 s16, s1 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 -; CHECK-NEXT: vmov.f32 s5, s2 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmov.f32 s8, s4 -; CHECK-NEXT: vmov.f32 s10, s5 -; CHECK-NEXT: vmullb.u32 q0, q2, q3 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: vmullb.u32 q1, q2, q3 +; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vmullb.u32 q2, q0, q3 +; CHECK-NEXT: vmullb.u32 q1, q4, q3 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> @@ -513,17 +497,15 @@ entry: define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: zext32_0ext_0213: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov.f32 s16, s1 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 -; CHECK-NEXT: vmov.f32 s5, s2 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmov.f32 s8, s4 -; CHECK-NEXT: vmov.f32 s10, s5 -; CHECK-NEXT: vmullb.u32 q0, q3, q2 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: vmullb.u32 q1, q3, q2 +; CHECK-NEXT: vmov.f32 s18, s3 +; CHECK-NEXT: vmullb.u32 q2, q3, q0 +; CHECK-NEXT: vmullb.u32 q1, q3, q4 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: bx lr entry: %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> @@ -538,22 +520,17 @@ entry: define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: zext32_0213_ext0: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s5, s2 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmov.f32 s0, s4 -; CHECK-NEXT: vmov.f32 s2, s5 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s7 ; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: umull r3, r2, r3, r0 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov.f32 s4, s1 +; CHECK-NEXT: vmov.f32 s6, s3 ; CHECK-NEXT: umull r1, r12, r1, r0 -; CHECK-NEXT: vmov q0[2], q0[0], r3, r1 -; CHECK-NEXT: vmov r1, s10 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmov q0[3], q0[1], r2, r12 +; CHECK-NEXT: umull r3, r2, r3, r0 +; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov q2[3], q2[1], r2, r12 +; CHECK-NEXT: vmov q0, q2 ; CHECK-NEXT: umull r1, r2, r1, r0 ; CHECK-NEXT: umull r0, r3, r3, r0 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 @@ -572,22 +549,17 @@ entry: define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) { ; CHECK-LABEL: zext32_ext0_0213: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.f32 s4, s0 -; CHECK-NEXT: vmov.f32 s5, s2 -; CHECK-NEXT: vmov.f32 s6, s1 -; CHECK-NEXT: vmov.f32 s7, s3 -; CHECK-NEXT: vmov.f32 s0, s4 -; CHECK-NEXT: vmov.f32 s2, s5 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov.f32 s10, s7 ; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: umull r3, r2, r0, r3 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov.f32 s4, s1 +; CHECK-NEXT: vmov.f32 s6, s3 ; CHECK-NEXT: umull r1, r12, r0, r1 -; CHECK-NEXT: vmov q0[2], q0[0], r3, r1 -; CHECK-NEXT: vmov r1, s10 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: vmov q0[3], q0[1], r2, r12 +; CHECK-NEXT: umull r3, r2, r0, r3 +; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 +; CHECK-NEXT: vmov r1, s6 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov q2[3], q2[1], r2, r12 +; CHECK-NEXT: vmov q0, q2 ; CHECK-NEXT: umull r1, r2, r0, r1 ; CHECK-NEXT: umull r0, r3, r0, r3 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll index c1827f0c91886..1e46dd1b256f5 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -345,61 +345,61 @@ define void @vst3_v8i16(<8 x i16> *%src, <24 x i16> *%dst) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} -; CHECK-NEXT: vldrw.u32 q3, [r0] +; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vmov.f64 d0, d6 +; CHECK-NEXT: vmov.f64 d0, d4 ; CHECK-NEXT: vmov.u16 r2, q1[1] -; CHECK-NEXT: vmovx.f16 s20, s12 +; CHECK-NEXT: vmovx.f16 s20, s8 ; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vmov.f32 s8, s13 -; CHECK-NEXT: vins.f16 s8, s5 +; CHECK-NEXT: vmov.f32 s12, s9 +; CHECK-NEXT: vins.f16 s12, s5 ; CHECK-NEXT: vmov.16 q0[4], r2 -; CHECK-NEXT: vmov.f32 s3, s8 -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] -; CHECK-NEXT: vmov.f32 s1, s12 -; CHECK-NEXT: vmov.f32 s17, s8 -; CHECK-NEXT: vmov.f32 s18, s8 +; CHECK-NEXT: vmov.f32 s3, s12 +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] +; CHECK-NEXT: vmov.f32 s1, s8 +; CHECK-NEXT: vmov.f32 s17, s12 +; CHECK-NEXT: vmov.f32 s18, s12 ; CHECK-NEXT: vins.f16 s17, s20 ; CHECK-NEXT: vmovx.f16 s20, s18 ; CHECK-NEXT: vins.f16 s2, s20 -; CHECK-NEXT: vmovx.f16 s20, s10 +; CHECK-NEXT: vmovx.f16 s20, s14 ; CHECK-NEXT: vmov.f32 s18, s2 ; CHECK-NEXT: vmov.f32 s1, s17 ; CHECK-NEXT: vmov.f32 s2, s18 ; CHECK-NEXT: vmovx.f16 s16, s6 ; CHECK-NEXT: vins.f16 s16, s20 -; CHECK-NEXT: vmovx.f16 s20, s11 +; CHECK-NEXT: vmovx.f16 s20, s15 ; CHECK-NEXT: vins.f16 s17, s7 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: vmovx.f16 s19, s7 ; CHECK-NEXT: vrev32.16 q1, q1 ; CHECK-NEXT: vins.f16 s19, s20 -; CHECK-NEXT: vmov.f32 s21, s15 -; CHECK-NEXT: vmov.f32 s18, s11 +; CHECK-NEXT: vmov.f32 s21, s11 +; CHECK-NEXT: vmov.f32 s18, s15 ; CHECK-NEXT: vmovx.f16 s24, s17 -; CHECK-NEXT: vmov.f32 s22, s15 +; CHECK-NEXT: vmov.f32 s22, s11 ; CHECK-NEXT: vins.f16 s21, s24 ; CHECK-NEXT: vmovx.f16 s24, s22 ; CHECK-NEXT: vins.f16 s18, s24 -; CHECK-NEXT: vmov.f32 s8, s9 +; CHECK-NEXT: vmov.f32 s12, s13 ; CHECK-NEXT: vmov.f32 s22, s18 ; CHECK-NEXT: vmov.f32 s17, s21 ; CHECK-NEXT: vmov.f32 s18, s22 -; CHECK-NEXT: vmovx.f16 s20, s13 -; CHECK-NEXT: vins.f16 s8, s20 -; CHECK-NEXT: vmovx.f16 s20, s14 -; CHECK-NEXT: vins.f16 s10, s20 +; CHECK-NEXT: vmovx.f16 s20, s9 +; CHECK-NEXT: vins.f16 s12, s20 +; CHECK-NEXT: vmovx.f16 s20, s10 +; CHECK-NEXT: vins.f16 s14, s20 ; CHECK-NEXT: vstrw.32 q4, [r1, #32] -; CHECK-NEXT: vmov.f32 s11, s10 -; CHECK-NEXT: vmov.f32 s10, s14 -; CHECK-NEXT: vmovx.f16 s12, s9 -; CHECK-NEXT: vins.f16 s5, s12 -; CHECK-NEXT: vmovx.f16 s12, s6 -; CHECK-NEXT: vins.f16 s10, s12 -; CHECK-NEXT: vmov.f32 s6, s10 -; CHECK-NEXT: vmov.f32 s9, s5 -; CHECK-NEXT: vmov.f32 s10, s6 -; CHECK-NEXT: vstrw.32 q2, [r1, #16] +; CHECK-NEXT: vmov.f32 s15, s14 +; CHECK-NEXT: vmov.f32 s14, s10 +; CHECK-NEXT: vmovx.f16 s8, s13 +; CHECK-NEXT: vins.f16 s5, s8 +; CHECK-NEXT: vmovx.f16 s8, s6 +; CHECK-NEXT: vins.f16 s14, s8 +; CHECK-NEXT: vmov.f32 s6, s14 +; CHECK-NEXT: vmov.f32 s13, s5 +; CHECK-NEXT: vmov.f32 s14, s6 +; CHECK-NEXT: vstrw.32 q3, [r1, #16] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} ; CHECK-NEXT: bx lr entry: @@ -423,20 +423,19 @@ define void @vst3_v16i16(<16 x i16> *%src, <48 x i16> *%dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #80 ; CHECK-NEXT: sub sp, #80 -; CHECK-NEXT: vldrw.u32 q1, [r0, #48] +; CHECK-NEXT: vldrw.u32 q5, [r0, #48] ; CHECK-NEXT: vldrw.u32 q3, [r0, #80] ; CHECK-NEXT: vldrw.u32 q6, [r0, #32] -; CHECK-NEXT: vldrw.u32 q5, [r0, #64] ; CHECK-NEXT: vmovx.f16 s0, s14 -; CHECK-NEXT: vmovx.f16 s8, s6 +; CHECK-NEXT: vmovx.f16 s8, s22 ; CHECK-NEXT: vins.f16 s8, s0 ; CHECK-NEXT: vmovx.f16 s0, s15 -; CHECK-NEXT: vins.f16 s9, s7 -; CHECK-NEXT: vstrw.32 q1, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vmovx.f16 s11, s7 +; CHECK-NEXT: vins.f16 s9, s23 ; CHECK-NEXT: vmov.u16 r2, q6[1] +; CHECK-NEXT: vmovx.f16 s11, s23 +; CHECK-NEXT: vstrw.32 q6, [sp, #48] @ 16-byte Spill ; CHECK-NEXT: vins.f16 s11, s0 -; CHECK-NEXT: vstrw.32 q6, [sp] @ 16-byte Spill +; CHECK-NEXT: vstrw.32 q5, [sp] @ 16-byte Spill ; CHECK-NEXT: vmov.f32 s10, s15 ; CHECK-NEXT: vmovx.f16 s4, s9 ; CHECK-NEXT: vmov q4, q2 @@ -457,95 +456,99 @@ define void @vst3_v16i16(<16 x i16> *%src, <48 x i16> *%dst) { ; CHECK-NEXT: vmov.16 q2[4], r2 ; CHECK-NEXT: vmov.f32 s11, s5 ; CHECK-NEXT: vins.f16 s11, s25 -; CHECK-NEXT: vmov.f32 s18, s2 +; CHECK-NEXT: vldrw.u32 q6, [r0, #64] ; CHECK-NEXT: vmov.f32 s9, s4 -; CHECK-NEXT: vstrw.32 q4, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vmov.f32 s5, s20 -; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s6, s20 +; CHECK-NEXT: vmov.u16 r0, q5[1] +; CHECK-NEXT: vmov.f32 s5, s24 +; CHECK-NEXT: vmov.f32 s6, s24 ; CHECK-NEXT: vins.f16 s5, s28 ; CHECK-NEXT: vmovx.f16 s28, s6 ; CHECK-NEXT: vins.f16 s10, s28 -; CHECK-NEXT: vmov.f64 d14, d8 +; CHECK-NEXT: vmov.f32 s18, s2 ; CHECK-NEXT: vmov.f32 s6, s10 +; CHECK-NEXT: vstrw.32 q4, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vmov.f32 s9, s5 -; CHECK-NEXT: vmov.f32 s0, s17 +; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s10, s6 -; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q2, [r1] -; CHECK-NEXT: vins.f16 s28, s4 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vins.f16 s0, s5 +; CHECK-NEXT: vmov.f64 d14, d2 +; CHECK-NEXT: vins.f16 s28, s20 +; CHECK-NEXT: vmov.f32 s0, s5 +; CHECK-NEXT: vins.f16 s0, s21 ; CHECK-NEXT: vmov.16 q7[4], r0 ; CHECK-NEXT: vmov.f32 s31, s0 -; CHECK-NEXT: vmovx.f16 s4, s16 +; CHECK-NEXT: vldrw.u32 q5, [sp, #64] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s1, s12 +; CHECK-NEXT: vmov.f32 s29, s4 +; CHECK-NEXT: vmovx.f16 s4, s4 ; CHECK-NEXT: vmov.f32 s2, s12 ; CHECK-NEXT: vins.f16 s1, s4 -; CHECK-NEXT: vmov.f32 s29, s16 ; CHECK-NEXT: vmovx.f16 s4, s2 -; CHECK-NEXT: vldrw.u32 q4, [sp, #64] @ 16-byte Reload ; CHECK-NEXT: vins.f16 s30, s4 -; CHECK-NEXT: vmovx.f16 s4, s22 +; CHECK-NEXT: vmovx.f16 s4, s26 ; CHECK-NEXT: vmov.f32 s2, s30 ; CHECK-NEXT: vmov.f32 s29, s1 ; CHECK-NEXT: vmov.f32 s12, s13 ; CHECK-NEXT: vmov.f32 s30, s2 -; CHECK-NEXT: vmovx.f16 s0, s26 +; CHECK-NEXT: vmovx.f16 s0, s18 ; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vmovx.f16 s4, s23 -; CHECK-NEXT: vins.f16 s1, s27 +; CHECK-NEXT: vmov q1, q4 +; CHECK-NEXT: vins.f16 s1, s7 ; CHECK-NEXT: vstrw.32 q7, [r1, #48] -; CHECK-NEXT: vmovx.f16 s3, s27 +; CHECK-NEXT: vmovx.f16 s3, s7 +; CHECK-NEXT: vmovx.f16 s4, s27 ; CHECK-NEXT: vins.f16 s3, s4 -; CHECK-NEXT: vmov.f32 s5, s19 -; CHECK-NEXT: vmov.f32 s2, s23 -; CHECK-NEXT: vmovx.f16 s24, s1 -; CHECK-NEXT: vmov.f32 s6, s19 +; CHECK-NEXT: vmov.f32 s5, s23 +; CHECK-NEXT: vmov.f32 s2, s27 +; CHECK-NEXT: vmovx.f16 s16, s1 +; CHECK-NEXT: vmov.f32 s6, s23 +; CHECK-NEXT: vins.f16 s5, s16 ; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vins.f16 s5, s24 -; CHECK-NEXT: vmovx.f16 s24, s6 -; CHECK-NEXT: vins.f16 s2, s24 -; CHECK-NEXT: vmovx.f16 s24, s17 +; CHECK-NEXT: vmovx.f16 s20, s6 +; CHECK-NEXT: vmov.f32 s24, s25 +; CHECK-NEXT: vins.f16 s2, s20 +; CHECK-NEXT: vmovx.f16 s20, s17 +; CHECK-NEXT: vins.f16 s12, s20 +; CHECK-NEXT: vmovx.f16 s20, s18 +; CHECK-NEXT: vins.f16 s14, s20 ; CHECK-NEXT: vmov.f32 s6, s2 -; CHECK-NEXT: vins.f16 s12, s24 -; CHECK-NEXT: vmovx.f16 s24, s18 -; CHECK-NEXT: vmov.f32 s1, s5 -; CHECK-NEXT: vins.f16 s14, s24 -; CHECK-NEXT: vldrw.u32 q6, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s15, s14 ; CHECK-NEXT: vmov.f32 s14, s18 ; CHECK-NEXT: vmovx.f16 s16, s13 -; CHECK-NEXT: vrev32.16 q6, q6 -; CHECK-NEXT: vmov.f32 s20, s21 -; CHECK-NEXT: vins.f16 s25, s16 -; CHECK-NEXT: vmovx.f16 s16, s26 +; CHECK-NEXT: vstr s16, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: vldrw.u32 q4, [sp] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s1, s5 +; CHECK-NEXT: vrev32.16 q5, q4 +; CHECK-NEXT: vldr s16, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: vins.f16 s21, s16 +; CHECK-NEXT: vmovx.f16 s16, s22 ; CHECK-NEXT: vins.f16 s14, s16 ; CHECK-NEXT: vldrw.u32 q4, [sp, #64] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s2, s6 ; CHECK-NEXT: vmovx.f16 s4, s17 -; CHECK-NEXT: vmov.f32 s26, s14 -; CHECK-NEXT: vins.f16 s20, s4 +; CHECK-NEXT: vmov.f32 s22, s14 +; CHECK-NEXT: vins.f16 s24, s4 ; CHECK-NEXT: vmovx.f16 s4, s18 -; CHECK-NEXT: vins.f16 s22, s4 -; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s23, s22 +; CHECK-NEXT: vins.f16 s26, s4 +; CHECK-NEXT: vmov.f32 s13, s21 +; CHECK-NEXT: vmov.f32 s27, s26 ; CHECK-NEXT: vstrw.32 q0, [r1, #32] -; CHECK-NEXT: vmov.f32 s22, s18 -; CHECK-NEXT: vmovx.f16 s16, s21 -; CHECK-NEXT: vrev32.16 q1, q1 -; CHECK-NEXT: vmov.f32 s13, s25 -; CHECK-NEXT: vins.f16 s5, s16 -; CHECK-NEXT: vmovx.f16 s16, s6 -; CHECK-NEXT: vins.f16 s22, s16 +; CHECK-NEXT: vmov.f32 s26, s18 +; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vmovx.f16 s4, s25 ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s6, s22 -; CHECK-NEXT: vmov.f32 s21, s5 -; CHECK-NEXT: vstrw.32 q0, [r1, #80] -; CHECK-NEXT: vmov.f32 s14, s26 +; CHECK-NEXT: vrev32.16 q4, q4 +; CHECK-NEXT: vins.f16 s17, s4 +; CHECK-NEXT: vmovx.f16 s4, s18 +; CHECK-NEXT: vins.f16 s26, s4 +; CHECK-NEXT: vmov.f32 s14, s22 +; CHECK-NEXT: vmov.f32 s18, s26 ; CHECK-NEXT: vstrw.32 q3, [r1, #64] -; CHECK-NEXT: vmov.f32 s22, s6 -; CHECK-NEXT: vstrw.32 q5, [r1, #16] +; CHECK-NEXT: vmov.f32 s25, s17 +; CHECK-NEXT: vstrw.32 q0, [r1, #80] +; CHECK-NEXT: vmov.f32 s26, s18 +; CHECK-NEXT: vstrw.32 q6, [r1, #16] ; CHECK-NEXT: add sp, #80 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr @@ -1378,60 +1381,60 @@ define void @vst3_v8f16(<8 x half> *%src, <24 x half> *%dst) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: vldrw.u32 q3, [r0] +; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vldrw.u32 q5, [r0, #16] -; CHECK-NEXT: vmov.f64 d0, d6 +; CHECK-NEXT: vmov.f64 d0, d4 ; CHECK-NEXT: vmovx.f16 s6, s20 -; CHECK-NEXT: vmovx.f16 s8, s12 -; CHECK-NEXT: vmov.f32 s4, s13 +; CHECK-NEXT: vmovx.f16 s12, s8 +; CHECK-NEXT: vmov.f32 s4, s9 ; CHECK-NEXT: vins.f16 s0, s20 ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: vins.f16 s4, s21 ; CHECK-NEXT: vmov.16 q0[4], r2 ; CHECK-NEXT: vmov.f32 s3, s4 ; CHECK-NEXT: vldrw.u32 q1, [r0, #32] -; CHECK-NEXT: vmov.f32 s1, s12 +; CHECK-NEXT: vmov.f32 s1, s8 ; CHECK-NEXT: vmov.f32 s17, s4 ; CHECK-NEXT: vmovx.f16 s24, s7 ; CHECK-NEXT: vmov.f32 s18, s4 -; CHECK-NEXT: vins.f16 s17, s8 -; CHECK-NEXT: vmovx.f16 s8, s18 -; CHECK-NEXT: vins.f16 s2, s8 -; CHECK-NEXT: vmovx.f16 s11, s23 -; CHECK-NEXT: vins.f16 s11, s24 +; CHECK-NEXT: vins.f16 s17, s12 +; CHECK-NEXT: vmovx.f16 s12, s18 +; CHECK-NEXT: vins.f16 s2, s12 +; CHECK-NEXT: vmovx.f16 s15, s23 +; CHECK-NEXT: vins.f16 s15, s24 ; CHECK-NEXT: vmovx.f16 s24, s6 -; CHECK-NEXT: vmovx.f16 s8, s22 +; CHECK-NEXT: vmovx.f16 s12, s22 ; CHECK-NEXT: vmov.f32 s18, s2 -; CHECK-NEXT: vins.f16 s8, s24 -; CHECK-NEXT: vmov.f32 s25, s15 -; CHECK-NEXT: vins.f16 s9, s23 -; CHECK-NEXT: vmov.f32 s26, s15 -; CHECK-NEXT: vmov.f32 s10, s7 -; CHECK-NEXT: vmovx.f16 s28, s9 +; CHECK-NEXT: vins.f16 s12, s24 +; CHECK-NEXT: vmov.f32 s25, s11 +; CHECK-NEXT: vins.f16 s13, s23 +; CHECK-NEXT: vmov.f32 s26, s11 +; CHECK-NEXT: vmov.f32 s14, s7 +; CHECK-NEXT: vmovx.f16 s28, s13 ; CHECK-NEXT: vins.f16 s25, s28 ; CHECK-NEXT: vmovx.f16 s28, s26 -; CHECK-NEXT: vins.f16 s10, s28 -; CHECK-NEXT: vmovx.f16 s28, s13 +; CHECK-NEXT: vins.f16 s14, s28 +; CHECK-NEXT: vmovx.f16 s28, s9 ; CHECK-NEXT: vmov.f32 s4, s5 ; CHECK-NEXT: vrev32.16 q5, q5 ; CHECK-NEXT: vins.f16 s4, s28 -; CHECK-NEXT: vmovx.f16 s28, s14 +; CHECK-NEXT: vmovx.f16 s28, s10 ; CHECK-NEXT: vins.f16 s6, s28 -; CHECK-NEXT: vmov.f32 s26, s10 +; CHECK-NEXT: vmov.f32 s26, s14 ; CHECK-NEXT: vmov.f32 s7, s6 -; CHECK-NEXT: vmov.f32 s6, s14 -; CHECK-NEXT: vmovx.f16 s12, s5 -; CHECK-NEXT: vins.f16 s21, s12 -; CHECK-NEXT: vmovx.f16 s12, s22 -; CHECK-NEXT: vins.f16 s6, s12 +; CHECK-NEXT: vmov.f32 s6, s10 +; CHECK-NEXT: vmovx.f16 s8, s5 +; CHECK-NEXT: vins.f16 s21, s8 +; CHECK-NEXT: vmovx.f16 s8, s22 +; CHECK-NEXT: vins.f16 s6, s8 ; CHECK-NEXT: vmov.f32 s1, s17 ; CHECK-NEXT: vmov.f32 s22, s6 -; CHECK-NEXT: vmov.f32 s9, s25 +; CHECK-NEXT: vmov.f32 s13, s25 ; CHECK-NEXT: vmov.f32 s5, s21 ; CHECK-NEXT: vmov.f32 s2, s18 -; CHECK-NEXT: vmov.f32 s10, s26 +; CHECK-NEXT: vmov.f32 s14, s26 ; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: vstrw.32 q2, [r1, #32] +; CHECK-NEXT: vstrw.32 q3, [r1, #32] ; CHECK-NEXT: vmov.f32 s6, s22 ; CHECK-NEXT: vstrw.32 q1, [r1, #16] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} @@ -1457,144 +1460,147 @@ define void @vst3_v16f16(<16 x half> *%src, <48 x half> *%dst) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #128 ; CHECK-NEXT: sub sp, #128 -; CHECK-NEXT: vldrw.u32 q1, [r0, #32] -; CHECK-NEXT: vldrw.u32 q7, [r0, #64] -; CHECK-NEXT: vldrw.u32 q5, [r0, #80] -; CHECK-NEXT: vmovx.f16 s0, s31 -; CHECK-NEXT: vmovx.f16 s11, s7 -; CHECK-NEXT: vins.f16 s11, s0 -; CHECK-NEXT: vmovx.f16 s0, s30 -; CHECK-NEXT: vmovx.f16 s8, s6 -; CHECK-NEXT: vmov q4, q1 -; CHECK-NEXT: vins.f16 s8, s0 -; CHECK-NEXT: vstrw.32 q4, [sp, #48] @ 16-byte Spill -; CHECK-NEXT: vins.f16 s9, s7 -; CHECK-NEXT: vmov.f32 s10, s31 -; CHECK-NEXT: vmovx.f16 s0, s9 -; CHECK-NEXT: vmov q3, q2 -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vmov.f32 s5, s11 -; CHECK-NEXT: vmov q6, q2 -; CHECK-NEXT: vmov.f32 s6, s11 -; CHECK-NEXT: vldrw.u32 q2, [r0, #48] +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] +; CHECK-NEXT: vldrw.u32 q4, [r0, #64] +; CHECK-NEXT: vldrw.u32 q6, [r0] +; CHECK-NEXT: vldrw.u32 q5, [r0, #16] +; CHECK-NEXT: vmovx.f16 s0, s19 +; CHECK-NEXT: vmovx.f16 s7, s15 +; CHECK-NEXT: vins.f16 s7, s0 +; CHECK-NEXT: vmovx.f16 s0, s18 +; CHECK-NEXT: vmovx.f16 s4, s14 +; CHECK-NEXT: vstrw.32 q5, [sp, #64] @ 16-byte Spill +; CHECK-NEXT: vins.f16 s4, s0 +; CHECK-NEXT: vmov.f64 d14, d12 +; CHECK-NEXT: vins.f16 s5, s15 +; CHECK-NEXT: vstrw.32 q3, [sp] @ 16-byte Spill +; CHECK-NEXT: vmov.f32 s6, s19 +; CHECK-NEXT: vmovx.f16 s0, s5 +; CHECK-NEXT: vmov q2, q1 +; CHECK-NEXT: vmov.f32 s5, s27 +; CHECK-NEXT: vmov.f32 s6, s27 +; CHECK-NEXT: vins.f16 s28, s12 ; CHECK-NEXT: vins.f16 s5, s0 ; CHECK-NEXT: vmovx.f16 s0, s6 -; CHECK-NEXT: vstrw.32 q1, [sp, #64] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-NEXT: vins.f16 s14, s0 +; CHECK-NEXT: vins.f16 s10, s0 +; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vmov.f64 d2, d10 +; CHECK-NEXT: vstrw.32 q2, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vldrw.u32 q2, [r0, #48] ; CHECK-NEXT: vmovx.f16 s2, s8 -; CHECK-NEXT: vstrw.32 q3, [sp, #32] @ 16-byte Spill -; CHECK-NEXT: vmov.f64 d6, d2 -; CHECK-NEXT: vstrw.32 q1, [sp, #80] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q6, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q2, [sp] @ 16-byte Spill -; CHECK-NEXT: vmov.f32 s0, s5 -; CHECK-NEXT: vins.f16 s12, s8 +; CHECK-NEXT: vstrw.32 q2, [sp, #48] @ 16-byte Spill +; CHECK-NEXT: vmov.f32 s0, s21 +; CHECK-NEXT: vins.f16 s4, s8 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vins.f16 s0, s9 -; CHECK-NEXT: vmov.16 q3[4], r2 -; CHECK-NEXT: vmovx.f16 s2, s16 -; CHECK-NEXT: vmov.f32 s15, s0 -; CHECK-NEXT: vmovx.f16 s0, s4 -; CHECK-NEXT: vmov.f32 s13, s4 +; CHECK-NEXT: vmov.16 q1[4], r2 +; CHECK-NEXT: vmovx.f16 s2, s12 +; CHECK-NEXT: vmov.f32 s7, s0 +; CHECK-NEXT: vmovx.f16 s0, s20 ; CHECK-NEXT: vmov.f32 s5, s20 -; CHECK-NEXT: vmov.f32 s6, s20 -; CHECK-NEXT: vins.f16 s5, s0 -; CHECK-NEXT: vmovx.f16 s0, s6 -; CHECK-NEXT: vstrw.32 q1, [sp, #112] @ 16-byte Spill -; CHECK-NEXT: vmov q1, q6 -; CHECK-NEXT: vins.f16 s14, s0 -; CHECK-NEXT: vmov.f32 s0, s5 -; CHECK-NEXT: vins.f16 s24, s16 +; CHECK-NEXT: vldrw.u32 q5, [r0, #80] ; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vins.f16 s0, s17 -; CHECK-NEXT: vmov.16 q6[4], r0 -; CHECK-NEXT: vmov.f32 s27, s0 -; CHECK-NEXT: vmovx.f16 s0, s4 -; CHECK-NEXT: vmov.f32 s25, s4 -; CHECK-NEXT: vmov.f32 s5, s28 -; CHECK-NEXT: vmov.f32 s6, s28 +; CHECK-NEXT: vmov.f32 s9, s20 +; CHECK-NEXT: vmov.16 q7[4], r0 +; CHECK-NEXT: vmov.f32 s10, s20 +; CHECK-NEXT: vins.f16 s9, s0 +; CHECK-NEXT: vmovx.f16 s0, s10 +; CHECK-NEXT: vins.f16 s6, s0 +; CHECK-NEXT: vmov.f32 s0, s25 +; CHECK-NEXT: vstrw.32 q2, [sp, #96] @ 16-byte Spill +; CHECK-NEXT: vmov q2, q4 +; CHECK-NEXT: vins.f16 s0, s13 +; CHECK-NEXT: vstrw.32 q1, [sp, #112] @ 16-byte Spill +; CHECK-NEXT: vmov.f32 s5, s8 +; CHECK-NEXT: vldrw.u32 q3, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s31, s0 +; CHECK-NEXT: vmovx.f16 s0, s24 +; CHECK-NEXT: vmov.f32 s6, s8 ; CHECK-NEXT: vins.f16 s5, s0 +; CHECK-NEXT: vmov.f32 s29, s24 ; CHECK-NEXT: vmovx.f16 s0, s6 -; CHECK-NEXT: vstrw.32 q1, [sp, #96] @ 16-byte Spill -; CHECK-NEXT: vins.f16 s26, s0 +; CHECK-NEXT: vstrw.32 q1, [sp, #80] @ 16-byte Spill +; CHECK-NEXT: vins.f16 s30, s0 ; CHECK-NEXT: vmovx.f16 s0, s22 -; CHECK-NEXT: vmovx.f16 s4, s10 +; CHECK-NEXT: vmovx.f16 s4, s14 +; CHECK-NEXT: vmov.f32 s8, s9 ; CHECK-NEXT: vins.f16 s4, s0 ; CHECK-NEXT: vmovx.f16 s0, s23 -; CHECK-NEXT: vmovx.f16 s7, s11 -; CHECK-NEXT: vmov.f32 s28, s29 +; CHECK-NEXT: vmovx.f16 s7, s15 ; CHECK-NEXT: vins.f16 s7, s0 -; CHECK-NEXT: vins.f16 s5, s11 -; CHECK-NEXT: vldrw.u32 q2, [sp, #80] @ 16-byte Reload +; CHECK-NEXT: vins.f16 s5, s15 +; CHECK-NEXT: vldrw.u32 q3, [sp, #64] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s6, s23 ; CHECK-NEXT: vmovx.f16 s16, s5 -; CHECK-NEXT: vmov.f32 s1, s11 -; CHECK-NEXT: vmov.f32 s2, s11 +; CHECK-NEXT: vmov.f32 s1, s15 +; CHECK-NEXT: vmov.f32 s2, s15 ; CHECK-NEXT: vins.f16 s1, s16 ; CHECK-NEXT: vmovx.f16 s16, s2 ; CHECK-NEXT: vins.f16 s6, s16 -; CHECK-NEXT: vmovx.f16 s16, s9 +; CHECK-NEXT: vmovx.f16 s16, s13 ; CHECK-NEXT: vmov.f32 s20, s21 ; CHECK-NEXT: vins.f16 s20, s16 -; CHECK-NEXT: vmovx.f16 s16, s10 +; CHECK-NEXT: vmovx.f16 s16, s14 ; CHECK-NEXT: vins.f16 s22, s16 -; CHECK-NEXT: vldrw.u32 q2, [sp] @ 16-byte Reload -; CHECK-NEXT: vldrw.u32 q4, [sp, #80] @ 16-byte Reload +; CHECK-NEXT: vldrw.u32 q4, [sp, #112] @ 16-byte Reload +; CHECK-NEXT: vldrw.u32 q3, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s23, s22 -; CHECK-NEXT: vrev32.16 q2, q2 +; CHECK-NEXT: vmov.f32 s14, s18 +; CHECK-NEXT: vstrw.32 q3, [sp, #96] @ 16-byte Spill +; CHECK-NEXT: vldrw.u32 q3, [sp, #80] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s14, s30 +; CHECK-NEXT: vstrw.32 q3, [sp, #80] @ 16-byte Spill +; CHECK-NEXT: vldrw.u32 q3, [sp, #64] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s2, s6 -; CHECK-NEXT: vmov.f32 s22, s18 -; CHECK-NEXT: vmovx.f16 s16, s21 -; CHECK-NEXT: vins.f16 s9, s16 -; CHECK-NEXT: vldrw.u32 q4, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s14 -; CHECK-NEXT: vstrw.32 q2, [sp, #80] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q4, [sp, #112] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q4, [sp, #96] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s26 -; CHECK-NEXT: vstrw.32 q4, [sp, #96] @ 16-byte Spill -; CHECK-NEXT: vmovx.f16 s16, s10 -; CHECK-NEXT: vins.f16 s22, s16 -; CHECK-NEXT: vldrw.u32 q4, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s22, s14 +; CHECK-NEXT: vmovx.f16 s12, s21 +; CHECK-NEXT: vstr s12, [sp, #64] @ 4-byte Spill +; CHECK-NEXT: vldrw.u32 q3, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s5, s1 -; CHECK-NEXT: vmovx.f16 s8, s17 +; CHECK-NEXT: vrev32.16 q4, q3 +; CHECK-NEXT: vldr s12, [sp, #64] @ 4-byte Reload +; CHECK-NEXT: vins.f16 s17, s12 +; CHECK-NEXT: vmovx.f16 s12, s18 +; CHECK-NEXT: vins.f16 s22, s12 +; CHECK-NEXT: vmovx.f16 s12, s25 ; CHECK-NEXT: vmov.f32 s6, s2 -; CHECK-NEXT: vins.f16 s28, s8 -; CHECK-NEXT: vmovx.f16 s0, s18 -; CHECK-NEXT: vins.f16 s30, s0 -; CHECK-NEXT: vldrw.u32 q0, [sp, #48] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s31, s30 -; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s30, s18 -; CHECK-NEXT: vmovx.f16 s16, s29 -; CHECK-NEXT: vrev32.16 q0, q0 +; CHECK-NEXT: vins.f16 s8, s12 +; CHECK-NEXT: vmovx.f16 s0, s26 +; CHECK-NEXT: vmov.f32 s18, s22 +; CHECK-NEXT: vins.f16 s10, s0 +; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s11, s10 ; CHECK-NEXT: vstrw.32 q1, [r1, #80] -; CHECK-NEXT: vins.f16 s1, s16 -; CHECK-NEXT: vmovx.f16 s16, s2 -; CHECK-NEXT: vins.f16 s30, s16 -; CHECK-NEXT: vldrw.u32 q4, [sp, #96] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s2, s30 -; CHECK-NEXT: vmov.f32 s25, s17 -; CHECK-NEXT: vmov.f32 s26, s18 -; CHECK-NEXT: vldrw.u32 q4, [sp, #112] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s10, s26 +; CHECK-NEXT: vrev32.16 q6, q0 +; CHECK-NEXT: vmovx.f16 s12, s9 +; CHECK-NEXT: vldrw.u32 q0, [sp, #80] @ 16-byte Reload +; CHECK-NEXT: vins.f16 s25, s12 +; CHECK-NEXT: vmovx.f16 s12, s26 +; CHECK-NEXT: vins.f16 s10, s12 ; CHECK-NEXT: vmov.f32 s29, s1 -; CHECK-NEXT: vstrw.32 q6, [r1] -; CHECK-NEXT: vmov.f32 s13, s17 -; CHECK-NEXT: vmov.f32 s14, s18 -; CHECK-NEXT: vldrw.u32 q4, [sp, #64] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s10 -; CHECK-NEXT: vstrw.32 q3, [r1, #48] -; CHECK-NEXT: vmov.f32 s9, s17 +; CHECK-NEXT: vldrw.u32 q3, [sp, #96] @ 16-byte Reload ; CHECK-NEXT: vmov.f32 s30, s2 -; CHECK-NEXT: vstrw.32 q7, [r1, #16] -; CHECK-NEXT: vmov.f32 s10, s18 -; CHECK-NEXT: vldrw.u32 q4, [sp, #80] @ 16-byte Reload -; CHECK-NEXT: vmov.f32 s18, s22 -; CHECK-NEXT: vstrw.32 q2, [r1, #32] +; CHECK-NEXT: vldrw.u32 q0, [sp, #112] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s26, s10 +; CHECK-NEXT: vmov.f32 s1, s13 +; CHECK-NEXT: vstrw.32 q7, [r1] +; CHECK-NEXT: vmov.f32 s2, s14 +; CHECK-NEXT: vldrw.u32 q3, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vstrw.32 q0, [sp, #112] @ 16-byte Spill +; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vmov.f32 s2, s14 +; CHECK-NEXT: vmov.f32 s13, s1 ; CHECK-NEXT: vmov.f32 s21, s17 +; CHECK-NEXT: vmov.f32 s9, s25 ; CHECK-NEXT: vmov.f32 s22, s18 +; CHECK-NEXT: vmov.f32 s10, s26 ; CHECK-NEXT: vstrw.32 q5, [r1, #64] +; CHECK-NEXT: vstrw.32 q2, [r1, #16] +; CHECK-NEXT: vmov.f32 s14, s2 +; CHECK-NEXT: vldrw.u32 q0, [sp, #112] @ 16-byte Reload +; CHECK-NEXT: vstrw.32 q3, [r1, #32] +; CHECK-NEXT: vstrw.32 q0, [r1, #48] ; CHECK-NEXT: add sp, #128 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll index cb933dc41f15a..4d1e12f0c5efd 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll @@ -11,23 +11,18 @@ define void @vst4_v2i32(<2 x i32> *%src, <8 x i32> *%dst) { ; CHECK-NEXT: ldrd lr, r12, [r0] ; CHECK-NEXT: ldrd r3, r2, [r0, #8] ; CHECK-NEXT: ldrd r4, r0, [r0, #16] -; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 -; CHECK-NEXT: vmov.f64 d0, d2 -; CHECK-NEXT: vmov.f32 s1, s6 -; CHECK-NEXT: vmov.f32 s2, s4 -; CHECK-NEXT: vmov.f32 s3, s6 ; CHECK-NEXT: vmov q1[2], q1[0], lr, r3 ; CHECK-NEXT: vmov q1[3], q1[1], r12, r2 +; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 ; CHECK-NEXT: vmov.f64 d4, d2 ; CHECK-NEXT: vmov.f32 s9, s6 ; CHECK-NEXT: vmov.f32 s10, s0 -; CHECK-NEXT: vmov.f32 s11, s2 +; CHECK-NEXT: vmov.f32 s11, s0 +; CHECK-NEXT: vmov.f32 s0, s5 ; CHECK-NEXT: vstrw.32 q2, [r1] -; CHECK-NEXT: vmov.f32 s8, s5 -; CHECK-NEXT: vmov.f32 s9, s7 -; CHECK-NEXT: vmov.f32 s10, s1 -; CHECK-NEXT: vmov.f32 s11, s3 -; CHECK-NEXT: vstrw.32 q2, [r1, #16] +; CHECK-NEXT: vmov.f32 s1, s7 +; CHECK-NEXT: vmov.f32 s3, s2 +; CHECK-NEXT: vstrw.32 q0, [r1, #16] ; CHECK-NEXT: pop {r4, pc} entry: %s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0 @@ -208,30 +203,30 @@ define void @vst4_v4i32_align1(<4 x i32> *%src, <16 x i32> *%dst) { ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} ; CHECK-NEXT: vldrw.u32 q0, [r0, #32] -; CHECK-NEXT: vldrw.u32 q4, [r0] +; CHECK-NEXT: vldrw.u32 q1, [r0, #16] ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: vmov r12, lr, d0 -; CHECK-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-NEXT: vmov.f64 d2, d8 -; CHECK-NEXT: vmov.f32 s5, s0 -; CHECK-NEXT: vmov s10, r2 -; CHECK-NEXT: vmov s14, r3 -; CHECK-NEXT: vmov.f32 s8, s18 -; CHECK-NEXT: vmov s20, lr -; CHECK-NEXT: vmov.f32 s9, s2 -; CHECK-NEXT: vmov s6, r12 -; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: vmov.f32 s12, s19 -; CHECK-NEXT: vmov.f32 s13, s3 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vmov s14, r2 +; CHECK-NEXT: vmov s18, r3 +; CHECK-NEXT: vmov s10, lr +; CHECK-NEXT: vmov s20, r12 +; CHECK-NEXT: vmov.f32 s16, s3 +; CHECK-NEXT: vmov.f32 s12, s2 +; CHECK-NEXT: vmov.f32 s8, s1 +; CHECK-NEXT: vmov.f32 s1, s4 +; CHECK-NEXT: vmov.f32 s17, s7 +; CHECK-NEXT: vmov.f32 s13, s6 +; CHECK-NEXT: vmov.f32 s9, s5 ; CHECK-NEXT: vmov.f32 s2, s20 +; CHECK-NEXT: vmov.f32 s19, s18 ; CHECK-NEXT: vmov.f32 s15, s14 +; CHECK-NEXT: vstrb.8 q4, [r1, #48] ; CHECK-NEXT: vmov.f32 s11, s10 -; CHECK-NEXT: vstrb.8 q3, [r1, #48] +; CHECK-NEXT: vstrb.8 q3, [r1, #32] ; CHECK-NEXT: vmov.f32 s3, s20 -; CHECK-NEXT: vstrb.8 q2, [r1, #32] -; CHECK-NEXT: vmov.f32 s7, s6 -; CHECK-NEXT: vstrb.8 q0, [r1, #16] -; CHECK-NEXT: vstrb.8 q1, [r1] +; CHECK-NEXT: vstrb.8 q2, [r1, #16] +; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop {r7, pc} entry: @@ -975,30 +970,30 @@ define void @vst4_v4f32_align1(<4 x float> *%src, <16 x float> *%dst) { ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12} ; CHECK-NEXT: vldrw.u32 q0, [r0, #32] -; CHECK-NEXT: vldrw.u32 q5, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r0, #16] +; CHECK-NEXT: vldrw.u32 q2, [r0, #16] +; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: vmov r12, lr, d0 -; CHECK-NEXT: vmov.f64 d4, d10 -; CHECK-NEXT: vmov.f32 s9, s4 -; CHECK-NEXT: vmov s14, r2 -; CHECK-NEXT: vmov s18, r3 -; CHECK-NEXT: vmov.f32 s12, s22 -; CHECK-NEXT: vmov s24, lr -; CHECK-NEXT: vmov.f32 s13, s6 -; CHECK-NEXT: vmov.f32 s4, s21 -; CHECK-NEXT: vmov.f32 s16, s23 -; CHECK-NEXT: vmov.f32 s17, s7 -; CHECK-NEXT: vmov s10, r12 +; CHECK-NEXT: vmov s18, r2 +; CHECK-NEXT: vmov s22, r3 +; CHECK-NEXT: vmov s14, lr +; CHECK-NEXT: vmov s24, r12 +; CHECK-NEXT: vmov.f32 s20, s7 +; CHECK-NEXT: vmov.f32 s16, s6 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s5, s8 +; CHECK-NEXT: vmov.f32 s21, s11 +; CHECK-NEXT: vmov.f32 s13, s9 +; CHECK-NEXT: vmov.f32 s17, s10 ; CHECK-NEXT: vmov.f32 s6, s24 -; CHECK-NEXT: vmov.f32 s19, s18 -; CHECK-NEXT: vmov.f32 s15, s2 -; CHECK-NEXT: vstrb.8 q4, [r1, #48] -; CHECK-NEXT: vmov.f32 s7, s24 -; CHECK-NEXT: vstrb.8 q3, [r1, #32] -; CHECK-NEXT: vmov.f32 s11, s0 -; CHECK-NEXT: vstrb.8 q1, [r1, #16] -; CHECK-NEXT: vstrb.8 q2, [r1] +; CHECK-NEXT: vmov.f32 s23, s22 +; CHECK-NEXT: vmov.f32 s19, s2 +; CHECK-NEXT: vstrb.8 q5, [r1, #48] +; CHECK-NEXT: vmov.f32 s15, s14 +; CHECK-NEXT: vstrb.8 q4, [r1, #32] +; CHECK-NEXT: vmov.f32 s7, s0 +; CHECK-NEXT: vstrb.8 q3, [r1, #16] +; CHECK-NEXT: vstrb.8 q1, [r1] ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} ; CHECK-NEXT: pop {r7, pc} entry: From 7dec20dbb6ae4a095e586c94eca59008506b5de5 Mon Sep 17 00:00:00 2001 From: Chia-hung Duan Date: Tue, 29 Jun 2021 18:03:30 +0800 Subject: [PATCH 157/619] [mlir-reduce] Add doc for usage of mlir-reduce Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D103683 --- mlir/docs/Tools/mlir-reduce.md | 126 +++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 mlir/docs/Tools/mlir-reduce.md diff --git a/mlir/docs/Tools/mlir-reduce.md b/mlir/docs/Tools/mlir-reduce.md new file mode 100644 index 0000000000000..eb38d7222e4fd --- /dev/null +++ b/mlir/docs/Tools/mlir-reduce.md @@ -0,0 +1,126 @@ +# MLIR Reduce + +[TOC] + +An MLIR input may trigger bugs after series of transformations. To root cause +the problem or help verification after fixes, developers want to be able to +reduce the size of a reproducer for a bug. This document describes +`mlir-reduce`, which is similar to +[bugpoint](https://llvm.org/docs/CommandGuide/bugpoint.html), a tool that can +reduce the size of the input needed to trigger the error. + +`mlir-reduce` supports reducing the input in several ways, including simply +deleting code not required to reproduce an error, applying the reducer +patterns heuristically or run with optimization passes to reduce the input. To +use it, the first thing you need to do is, provide a command which tells if an +input is interesting, e.g., exhibits the characteristics that you would like to +focus on. For example, you may want to see if `mlir-opt` invocation fails after +it runs on the certain MLIR input. Afterwards, select your reduction strategy +then `mlir-reduce` will do the remining works for you. + +## How to Use it + +`mlir-reduce` adopts reduction-tree algorithm to reduce the input. it generates +several reduced outputs and do the further reduction in between them according +to the tree traversal strategy. The different strategies may lead to different +result and different time complexity. You can run as +`-reduction-tree='traversal-mode=0'` to select the mode for example. + +### Write the script for testing interesting + +As mentioned, you need to provide a command to specify `mlir-reduce` which case +you're interesting. For each intermediate output generated during reduction, +`mlir-reduce` will run the command over the it, the script should returns 1 for +interesting case, 0 otherwise. The sample script, + +```shell +mlir-opt -convert-vector-to-spirv $1 | grep "failed to materialize" +if [[ $? -eq 1 ]]; then + exit 1 +else + exit 0 +fi +``` + +The sample usage will be like, note that the `test` argument is part of the mode +argument. + +```shell +mlir-reduce $INPUT -reduction-tree='traversal-mode=0 test=$TEST_SCRIPT' +``` + +## Available reduction strategies + +### Operation elimination + +`mlir-reduce` will try to remove the operations directly. This is the most +aggressive reduction as it may result in an invalid output as long as it ends up +retaining the error message that the test script is interesting. To avoid that, +`mlir-reduce` always checks the validity and it expects the user will provide a +valid input as well. + +### Rewrite patterns into simpler forms + +In some cases, rewrite an operation into a simpler or smaller form can still +retain the interestingness. For example, `mlir-reduce` will try to rewrite a +`tensor` with unknown rank into a constant rank one like +`tensor<1xi32>`. Not only produce a simpler operation, it may introduce further +reduction chances because of precise type information. + +MLIR supports dialects and `mlir-reduce` supports rewrite patterns for every +dialect as well. Which means you can have the dialect specific rewrite patterns. +To do that, you need to implement the `DialectReductionPatternInterface`. For +example, + +```c++ +#include "mlir/Reducer/ReductionPatternInterface.h" + +struct MyReductionPatternInterface : public DialectReductionPatternInterface { + virtual void + populateReductionPatterns(RewritePatternSet &patterns) const final { + populateMyReductionPatterns(patterns); + } +} +``` + +`mlir-reduce` will call `populateReductionPatterns` to collect the reduction +rewrite patterns provided by each dialect. Here's a hint, if you use +[DRR](../DeclarativeRewrites.md) to write the reduction patterns, you can +leverage the method `populateWithGenerated` generated by `mlir-tblgen`. + +### Reduce with built-in optimization passes + +MLIR provides amount of transformation passes and some of them are useful for +reducing the input size, e.g., Symbol-DCE. `mlir-reduce` will schedule them +along with above two strategies. + +## Build a custom mlir-reduce + +In the cases of, 1. have defined a custom syntax, 2. the failure is specific to +certain dialects or 3. there's a dialect specific reducer patterns, you need to +build your own `mlir-reduce`. Link it with `MLIRReduceLib` and implement it +like, + +```c++ +#include "mlir/Tools/mlir-reduce/MlirReduceMain.h" +using namespace mlir; + +int main(int argc, char **argv) { + DialectRegistry registry; + registerMyDialects(registry); + // Register the DialectReductionPatternInterface if any. + MLIRContext context(registry); + return failed(mlirReduceMain(argc, argv, context)); +} + +``` + +## Future works + +`mlir-reduce` is missing several features, + +* `-reduction-tree` now only supports `Single-Path` traversal mode, extends it +with different traveral strategies may reduce the input better. +* Produce the optimial result when interruped. The reduction process may take +a quite long time, it'll be better to get an optimal result so far while an +interrup is triggered. From 6cf6f6f65fde9638a2ca64cd8013d0d0ab1d473c Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Tue, 29 Jun 2021 13:25:32 +0300 Subject: [PATCH 158/619] [NFC][InstCombine] foldAggregateConstructionIntoAggregateReuse(): cast to Instruction eagerly In all of these, the value must be an instruction for us to succeed anyway, so change it to maybe hopefully make further changes more straight-forward. --- .../InstCombine/InstCombineVectorOps.cpp | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 1f9be3bbf3792..c207f079bfce3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -796,12 +796,12 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // Try to find a value of each element of an aggregate. // FIXME: deal with more complex, not one-dimensional, aggregate types - SmallVector, 2> AggElts(NumAggElts, NotFound); + SmallVector, 2> AggElts(NumAggElts, NotFound); // Do we know values for each element of the aggregate? auto KnowAllElts = [&AggElts]() { return all_of(AggElts, - [](Optional Elt) { return Elt != NotFound; }); + [](Optional Elt) { return Elt != NotFound; }); }; int Depth = 0; @@ -816,7 +816,11 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( Depth < DepthLimit && CurrIVI && !KnowAllElts(); CurrIVI = dyn_cast(CurrIVI->getAggregateOperand()), ++Depth) { - Value *InsertedValue = CurrIVI->getInsertedValueOperand(); + auto *InsertedValue = + dyn_cast(CurrIVI->getInsertedValueOperand()); + if (!InsertedValue) + return nullptr; // Inserted value must be produced by an instruction. + ArrayRef Indices = CurrIVI->getIndices(); // Don't bother with more than single-level aggregates. @@ -826,7 +830,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // Now, we may have already previously recorded the value for this element // of an aggregate. If we did, that means the CurrIVI will later be // overwritten with the already-recorded value. But if not, let's record it! - Optional &Elt = AggElts[Indices.front()]; + Optional &Elt = AggElts[Indices.front()]; Elt = Elt.getValueOr(InsertedValue); // FIXME: should we handle chain-terminating undef base operand? @@ -870,15 +874,15 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // If found, return the source aggregate from which the extraction was. // If \p PredBB is provided, does PHI translation of an \p Elt first. auto FindSourceAggregate = - [&](Value *Elt, unsigned EltIdx, Optional UseBB, + [&](Instruction *Elt, unsigned EltIdx, Optional UseBB, Optional PredBB) -> Optional { // For now(?), only deal with, at most, a single level of PHI indirection. if (UseBB && PredBB) - Elt = Elt->DoPHITranslation(*UseBB, *PredBB); + Elt = dyn_cast(Elt->DoPHITranslation(*UseBB, *PredBB)); // FIXME: deal with multiple levels of PHI indirection? // Did we find an extraction? - auto *EVI = dyn_cast(Elt); + auto *EVI = dyn_cast_or_null(Elt); if (!EVI) return NotFound; @@ -966,13 +970,8 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( // they all should be defined in the same basic block. BasicBlock *UseBB = nullptr; - for (const Optional &Elt : AggElts) { - // If this element's value was not defined by an instruction, ignore it. - auto *I = dyn_cast(*Elt); - if (!I) - continue; - // Otherwise, in which basic block is this instruction located? - BasicBlock *BB = I->getParent(); + for (const Optional &I : AggElts) { + BasicBlock *BB = (*I)->getParent(); // If it's the first instruction we've encountered, record the basic block. if (!UseBB) { UseBB = BB; From 293064222a013055cda9240647110f5bcf8e1f31 Mon Sep 17 00:00:00 2001 From: Butygin Date: Sat, 26 Jun 2021 13:05:29 +0300 Subject: [PATCH 159/619] [mlir] Add MemoryEffects::Allocate to memref::CloneOp Without it BufferDeallocationPass process only CloneOps created during pass itself and ignore all CloneOps that were already present in IR. For our specific usecase: ``` func @dealloc_existing_clones(%arg0: memref, %arg1: memref) -> memref { return %arg0 : memref } ``` Input arguments will be freed immediately after return from function and we want to prolong lifetime for the returned argument. To achieve this we explicitly add clones to all input memrefs and expect that BufferDeallocationPass will add correct deallocs to them (unnessesary clone+dealloc pairs will be canonicalized away later). Differential Revision: https://reviews.llvm.org/D104973 --- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 2 ++ mlir/test/Transforms/buffer-deallocation.mlir | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 6f358d834beed..c9df5fc4678ad 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -514,6 +514,8 @@ void CloneOp::getEffects( SideEffects::DefaultResource::get()); effects.emplace_back(MemoryEffects::Write::get(), output(), SideEffects::DefaultResource::get()); + effects.emplace_back(MemoryEffects::Allocate::get(), output(), + SideEffects::DefaultResource::get()); } namespace { diff --git a/mlir/test/Transforms/buffer-deallocation.mlir b/mlir/test/Transforms/buffer-deallocation.mlir index 77945113e1647..7bc335a93ae50 100644 --- a/mlir/test/Transforms/buffer-deallocation.mlir +++ b/mlir/test/Transforms/buffer-deallocation.mlir @@ -1207,3 +1207,18 @@ func @noRegionBranchOpInterface() { }) : () -> (i32) "test.terminator"() : () -> () } + +// ----- + +// CHECK-LABEL: func @dealloc_existing_clones +// CHECK: (%[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref) +// CHECK: %[[RES0:.*]] = memref.clone %[[ARG0]] +// CHECK: %[[RES1:.*]] = memref.clone %[[ARG1]] +// CHECK-NOT: memref.dealloc %[[RES0]] +// CHECK: memref.dealloc %[[RES1]] +// CHECK: return %[[RES0]] +func @dealloc_existing_clones(%arg0: memref, %arg1: memref) -> memref { + %0 = memref.clone %arg0 : memref to memref + %1 = memref.clone %arg1 : memref to memref + return %0 : memref +} From 303b6d5e981947cff7e12626669c1fbeef046f18 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 17 Jun 2021 09:48:30 +0100 Subject: [PATCH 160/619] [LoopVectorize] Add support for scalable vectorization of invariant stores Previously in setCostBasedWideningDecision if we encountered an invariant store we just assumed that we could scalarize the store and called getUniformMemOpCost to get the associated cost. However, for scalable vectors this is not an option because it is not currently possibly to scalarize the store. At the moment we crash in VPReplicateRecipe::execute when trying to scalarize the store. Therefore, I have changed setCostBasedWideningDecision so that if we are storing a scalable vector out to a uniform address and the target supports scatter instructions, then we should use those instead. Tests have been added here: Transforms/LoopVectorize/AArch64/sve-inv-store.ll Differential Revision: https://reviews.llvm.org/D104624 --- .../Transforms/Vectorize/LoopVectorize.cpp | 13 +++- .../LoopVectorize/AArch64/sve-inv-store.ll | 70 +++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3988d2ff27368..f22e73719a99f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7346,8 +7346,17 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { // relying on instcombine to remove them. // Load: Scalar load + broadcast // Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract - InstructionCost Cost = getUniformMemOpCost(&I, VF); - setWideningDecision(&I, VF, CM_Scalarize, Cost); + InstructionCost Cost; + if (isa(&I) && VF.isScalable() && + isLegalGatherOrScatter(&I)) { + Cost = getGatherScatterCost(&I, VF); + setWideningDecision(&I, VF, CM_GatherScatter, Cost); + } else { + assert((isa(&I) || !VF.isScalable()) && + "Cannot yet scalarize uniform stores"); + Cost = getUniformMemOpCost(&I, VF); + setWideningDecision(&I, VF, CM_Scalarize, Cost); + } continue; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll new file mode 100644 index 0000000000000..0e02af631d205 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -0,0 +1,70 @@ +; RUN: opt -loop-vectorize -scalable-vectorization=on -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define void @inv_store_i16(i16* noalias %dst, i16* noalias readonly %src, i64 %N) #0 { +; CHECK-LABEL: @inv_store_i16( +; CHECK: vector.ph: +; CHECK: %[[TMP1:.*]] = insertelement poison, i16* %dst, i32 0 +; CHECK-NEXT: %[[SPLAT_PTRS:.*]] = shufflevector %[[TMP1]], poison, zeroinitializer +; CHECK: vector.body: +; CHECK: %[[VECLOAD:.*]] = load , * %{{.*}}, align 2 +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16( %[[VECLOAD]], %[[SPLAT_PTRS]], i32 2 +entry: + br label %for.body14 + +for.body14: ; preds = %for.body14, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body14 ] + %arrayidx = getelementptr inbounds i16, i16* %src, i64 %indvars.iv + %ld = load i16, i16* %arrayidx + store i16 %ld, i16* %dst, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond.not, label %for.inc24, label %for.body14, !llvm.loop !0 + +for.inc24: ; preds = %for.body14, %for.body + ret void +} + + +define void @cond_inv_store_i32(i32* noalias %dst, i32* noalias readonly %src, i64 %N) #0 { +; CHECK-LABEL: @cond_inv_store_i32( +; CHECK: vector.ph: +; CHECK: %[[TMP1:.*]] = insertelement poison, i32* %dst, i32 0 +; CHECK-NEXT: %[[SPLAT_PTRS:.*]] = shufflevector %[[TMP1]], poison, zeroinitializer +; CHECK: vector.body: +; CHECK: %[[VECLOAD:.*]] = load , * %{{.*}}, align 4 +; CHECK-NEXT: %[[MASK:.*]] = icmp sgt %[[VECLOAD]], shufflevector ( insertelement ( poison, i32 0, i32 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %[[VECLOAD]], %[[SPLAT_PTRS]], i32 4, %[[MASK]]) +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc + %i.09 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %src, i64 %i.09 + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + store i32 %0, i32* %dst, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %inc = add nuw nsw i64 %i.09, 1 + %exitcond.not = icmp eq i64 %inc, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: ; preds = %for.inc, %entry + ret void +} + +attributes #0 = { "target-features"="+neon,+sve" } + +!0 = distinct !{!0, !1, !2, !3, !4, !5} +!1 = !{!"llvm.loop.mustprogress"} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true} +!5 = !{!"llvm.loop.interleave.count", i32 1} + From 400509238a0ff71f62fbf3a5d0cb576bc163b4b8 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Tue, 29 Jun 2021 11:56:19 +0100 Subject: [PATCH 161/619] Revert "[hwasan] print exact mismatch offset for short granules." Broke x86 LAM bot. This reverts commit 2a60ab76a796637d49bf1c7191f5b5a0c92f81bc. --- compiler-rt/lib/hwasan/hwasan_report.cpp | 19 ++------------- .../TestCases/heap-buffer-overflow-into.c | 23 ++++--------------- .../hwasan/TestCases/heap-buffer-overflow.c | 2 -- .../test/hwasan/TestCases/mem-intrinsics.c | 2 +- 4 files changed, 7 insertions(+), 39 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index b6f968ea10457..715b4e05992a6 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -630,24 +630,9 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size, Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n", is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag, mem_tag, t->unique_id()); - if (mem_tag < kShadowAlignment) { - tag_t *granule_ptr = reinterpret_cast((untagged_addr + offset) & - ~(kShadowAlignment - 1)); - // If offset is 0, (untagged_addr + offset) is not aligned to granules. - // This is the offset of the leftmost accessed byte within the bad granule. - u8 in_granule_offset = (untagged_addr + offset) & (kShadowAlignment - 1); - // The first mismatch was a short granule that matched the ptr_tag. - if (granule_ptr[kShadowAlignment - 1] == ptr_tag) { - // If the access starts after the end of the short granule, then the first - // bad byte is the first byte of the access; otherwise it is the first - // byte past the end of the short granule - if (mem_tag > in_granule_offset) { - offset += mem_tag - in_granule_offset; - } - } - } if (offset != 0) - Printf("Invalid access starting at offset %zu\n", offset); + Printf("Invalid access starting at offset [%zu, %zu)\n", offset, + Min(access_size, static_cast(offset) + (1 << kShadowScale))); Printf("%s", d.Default()); stack->Print(); diff --git a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow-into.c b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow-into.c index 8526c81f4cd7d..af4256b84db03 100644 --- a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow-into.c +++ b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow-into.c @@ -1,8 +1,5 @@ // RUN: %clang_hwasan %s -o %t -// RUN: not %run %t 5 10 2>&1 | FileCheck %s --check-prefix=CHECK5 -// RUN: not %run %t 7 10 2>&1 | FileCheck %s --check-prefix=CHECK7 -// RUN: not %run %t 8 20 2>&1 | FileCheck %s --check-prefix=CHECK8 -// RUN: not %run %t 32 20 2>&1 | FileCheck %s --check-prefix=CHECK32 +// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK // REQUIRES: stable-runtime @@ -13,20 +10,8 @@ int main(int argc, char **argv) { __hwasan_enable_allocator_tagging(); - if (argc < 2) { - fprintf(stderr, "Invalid number of arguments."); - abort(); - } - int read_offset = argc < 2 ? 5 : atoi(argv[1]); - int size = argc < 3 ? 10 : atoi(argv[2]); - char *volatile x = (char *)malloc(size); - memset(x + read_offset, 0, 26); - // CHECK5: Invalid access starting at offset 5 - // CHECK5: is located 5 bytes inside 10-byte region - // CHECK7: Invalid access starting at offset 3 - // CHECK7: is located 7 bytes inside 10-byte region - // CHECK8: Invalid access starting at offset 12 - // CHECK8: is located 8 bytes inside 20-byte region - // CHECK32: is located 12 bytes to the right of 20-byte region + char *volatile x = (char *)malloc(10); + memset(x + 5, 0, 26); + // CHECK: is located 5 bytes inside 10-byte region free(x); } diff --git a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c index 8e8719a7f65c4..67398141209af 100644 --- a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c +++ b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c @@ -52,14 +52,12 @@ int main(int argc, char **argv) { // CHECKM: is located 0 bytes to the right of 1000000-byte region // // CHECK31: tags: [[TAG:..]]/0e (ptr/mem) -// CHECK31-NOT: Invalid access starting at offset // CHECK31: is located 1 bytes to the right of 30-byte region // CHECK31: Memory tags around the buggy address // CHECK31: [0e] // CHECK31: Tags for short granules around the buggy address // CHECK31: {{\[}}[[TAG]]] // -// CHECK20-NOT: Invalid access starting at offset // CHECK20: is located 10 bytes to the right of 20-byte region [0x{{.*}}0,0x{{.*}}4) free(x); } diff --git a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c index 44b9fd67cbcc6..28568c828cea1 100644 --- a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c +++ b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c @@ -23,7 +23,7 @@ int main() { write(STDOUT_FILENO, "recovered\n", 10); // WRITE: ERROR: HWAddressSanitizer: tag-mismatch on address // WRITE: WRITE of size 32 at {{.*}} tags: [[PTR_TAG:..]]/[[MEM_TAG:..]] (ptr/mem) - // WRITE: Invalid access starting at offset 16 + // WRITE: Invalid access starting at offset [16, 32) // WRITE: Memory tags around the buggy address (one tag corresponds to 16 bytes): // WRITE: =>{{.*}}[[PTR_TAG]]{{[[:space:]]\[}}[[MEM_TAG]] // WRITE-NOT: recovered From b458bb8c04cd5ed025884d424f386a00c9c6857e Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Mon, 21 Jun 2021 19:55:31 +0100 Subject: [PATCH 162/619] [hwasan] Display causes in order of probability. A heap or global buffer that is far away from the faulting address is unlikely to be the cause, especially if there is a potential use-after-free as well, so we want to show it after the other causes. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D104781 --- compiler-rt/lib/hwasan/hwasan_report.cpp | 180 +++++++++++------- compiler-rt/test/hwasan/TestCases/global.c | 1 + .../hwasan/TestCases/heap-buffer-overflow.c | 8 + compiler-rt/test/hwasan/TestCases/stack-oob.c | 1 + compiler-rt/test/hwasan/TestCases/stack-uar.c | 3 +- .../test/hwasan/TestCases/thread-uaf.c | 1 + .../TestCases/use-after-free-and-overflow.c | 61 ++++++ .../test/hwasan/TestCases/use-after-free.c | 5 +- 8 files changed, 183 insertions(+), 77 deletions(-) create mode 100644 compiler-rt/test/hwasan/TestCases/use-after-free-and-overflow.c diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index 715b4e05992a6..00a78193e3a31 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -296,6 +296,75 @@ static uptr GetGlobalSizeFromDescriptor(uptr ptr) { return 0; } +static void ShowCandidate(uptr untagged_addr, tag_t *candidate, tag_t *left, + tag_t *right) { + Decorator d; + uptr mem = ShadowToMem(reinterpret_cast(candidate)); + HwasanChunkView chunk = FindHeapChunkByAddress(mem); + if (chunk.IsAllocated()) { + uptr offset; + const char *whence; + if (untagged_addr < chunk.End() && untagged_addr >= chunk.Beg()) { + offset = untagged_addr - chunk.Beg(); + whence = "inside"; + } else if (candidate == left) { + offset = untagged_addr - chunk.End(); + whence = "to the right of"; + } else { + offset = chunk.Beg() - untagged_addr; + whence = "to the left of"; + } + Printf("%s", d.Error()); + Printf("\nCause: heap-buffer-overflow\n"); + Printf("%s", d.Default()); + Printf("%s", d.Location()); + Printf("%p is located %zd bytes %s %zd-byte region [%p,%p)\n", + untagged_addr, offset, whence, chunk.UsedSize(), chunk.Beg(), + chunk.End()); + Printf("%s", d.Allocation()); + Printf("allocated here:\n"); + Printf("%s", d.Default()); + GetStackTraceFromId(chunk.GetAllocStackId()).Print(); + return; + } + // Check whether the address points into a loaded library. If so, this is + // most likely a global variable. + const char *module_name; + uptr module_address; + Symbolizer *sym = Symbolizer::GetOrInit(); + if (sym->GetModuleNameAndOffsetForPC(mem, &module_name, &module_address)) { + Printf("%s", d.Error()); + Printf("\nCause: global-overflow\n"); + Printf("%s", d.Default()); + DataInfo info; + Printf("%s", d.Location()); + if (sym->SymbolizeData(mem, &info) && info.start) { + Printf( + "%p is located %zd bytes to the %s of %zd-byte global variable " + "%s [%p,%p) in %s\n", + untagged_addr, + candidate == left ? untagged_addr - (info.start + info.size) + : info.start - untagged_addr, + candidate == left ? "right" : "left", info.size, info.name, + info.start, info.start + info.size, module_name); + } else { + uptr size = GetGlobalSizeFromDescriptor(mem); + if (size == 0) + // We couldn't find the size of the global from the descriptors. + Printf("%p is located to the %s of a global variable in (%s+0x%x)\n", + untagged_addr, candidate == left ? "right" : "left", module_name, + module_address); + else + Printf( + "%p is located to the %s of a %zd-byte global variable in " + "(%s+0x%x)\n", + untagged_addr, candidate == left ? "right" : "left", size, + module_name, module_address); + } + Printf("%s", d.Default()); + } +} + void PrintAddressDescription( uptr tagged_addr, uptr access_size, StackAllocationsRingBuffer *current_stack_allocations) { @@ -324,7 +393,8 @@ void PrintAddressDescription( tag_t addr_tag = GetTagFromPointer(tagged_addr); tag_t *tag_ptr = reinterpret_cast(MemToShadow(untagged_addr)); tag_t *candidate = nullptr, *left = tag_ptr, *right = tag_ptr; - for (int i = 0; i < 1000; i++) { + uptr candidate_distance = 0; + for (; candidate_distance < 1000; candidate_distance++) { if (TagsEqual(addr_tag, left)) { candidate = left; break; @@ -337,68 +407,32 @@ void PrintAddressDescription( ++right; } - if (candidate) { - uptr mem = ShadowToMem(reinterpret_cast(candidate)); - HwasanChunkView chunk = FindHeapChunkByAddress(mem); - if (chunk.IsAllocated()) { - uptr offset; - const char *whence; - if (untagged_addr < chunk.End() && untagged_addr >= chunk.Beg()) { - offset = untagged_addr - chunk.Beg(); - whence = "inside"; - } else if (candidate == left) { - offset = untagged_addr - chunk.End(); - whence = "to the right of"; - } else { - offset = chunk.Beg() - untagged_addr; - whence = "to the left of"; - } + constexpr auto kCloseCandidateDistance = 1; + + if (candidate && candidate_distance <= kCloseCandidateDistance) { + ShowCandidate(untagged_addr, candidate, left, right); + num_descriptions_printed++; + } + + hwasanThreadList().VisitAllLiveThreads([&](Thread *t) { + if (t->AddrIsInStack(untagged_addr)) { + // TODO(fmayer): figure out how to distinguish use-after-return and + // stack-buffer-overflow. + Printf("%s", d.Error()); + Printf("\nCause: stack tag-mismatch\n"); Printf("%s", d.Location()); - Printf("%p is located %zd bytes %s %zd-byte region [%p,%p)\n", - untagged_addr, offset, whence, chunk.UsedSize(), chunk.Beg(), - chunk.End()); - Printf("%s", d.Allocation()); - Printf("allocated here:\n"); + Printf("Address %p is located in stack of thread T%zd\n", untagged_addr, + t->unique_id()); Printf("%s", d.Default()); - GetStackTraceFromId(chunk.GetAllocStackId()).Print(); + t->Announce(); + + auto *sa = (t == GetCurrentThread() && current_stack_allocations) + ? current_stack_allocations + : t->stack_allocations(); + PrintStackAllocations(sa, addr_tag, untagged_addr); num_descriptions_printed++; - } else { - // Check whether the address points into a loaded library. If so, this is - // most likely a global variable. - const char *module_name; - uptr module_address; - Symbolizer *sym = Symbolizer::GetOrInit(); - if (sym->GetModuleNameAndOffsetForPC(mem, &module_name, - &module_address)) { - DataInfo info; - if (sym->SymbolizeData(mem, &info) && info.start) { - Printf( - "%p is located %zd bytes to the %s of %zd-byte global variable " - "%s [%p,%p) in %s\n", - untagged_addr, - candidate == left ? untagged_addr - (info.start + info.size) - : info.start - untagged_addr, - candidate == left ? "right" : "left", info.size, info.name, - info.start, info.start + info.size, module_name); - } else { - uptr size = GetGlobalSizeFromDescriptor(mem); - if (size == 0) - // We couldn't find the size of the global from the descriptors. - Printf( - "%p is located to the %s of a global variable in (%s+0x%x)\n", - untagged_addr, candidate == left ? "right" : "left", - module_name, module_address); - else - Printf( - "%p is located to the %s of a %zd-byte global variable in " - "(%s+0x%x)\n", - untagged_addr, candidate == left ? "right" : "left", size, - module_name, module_address); - } - num_descriptions_printed++; - } } - } + }); hwasanThreadList().VisitAllLiveThreads([&](Thread *t) { // Scan all threads' ring buffers to find if it's a heap-use-after-free. @@ -407,6 +441,8 @@ void PrintAddressDescription( if (FindHeapAllocation(t->heap_allocations(), tagged_addr, &har, &ring_index, &num_matching_addrs, &num_matching_addrs_4b)) { + Printf("%s", d.Error()); + Printf("\nCause: use-after-free\n"); Printf("%s", d.Location()); Printf("%p is located %zd bytes inside of %zd-byte region [%p,%p)\n", untagged_addr, untagged_addr - UntagAddr(har.tagged_addr), @@ -433,29 +469,25 @@ void PrintAddressDescription( t->Announce(); num_descriptions_printed++; } - - // Very basic check for stack memory. - if (t->AddrIsInStack(untagged_addr)) { - Printf("%s", d.Location()); - Printf("Address %p is located in stack of thread T%zd\n", untagged_addr, - t->unique_id()); - Printf("%s", d.Default()); - t->Announce(); - - auto *sa = (t == GetCurrentThread() && current_stack_allocations) - ? current_stack_allocations - : t->stack_allocations(); - PrintStackAllocations(sa, addr_tag, untagged_addr); - num_descriptions_printed++; - } }); + if (candidate && num_descriptions_printed == 0) { + ShowCandidate(untagged_addr, candidate, left, right); + num_descriptions_printed++; + } + // Print the remaining threads, as an extra information, 1 line per thread. hwasanThreadList().VisitAllLiveThreads([&](Thread *t) { t->Announce(); }); if (!num_descriptions_printed) // We exhausted our possibilities. Bail out. Printf("HWAddressSanitizer can not describe address in more detail.\n"); + if (num_descriptions_printed > 1) { + Printf( + "There are %d potential causes, printed above in order " + "of likeliness.", + num_descriptions_printed); + } } void ReportStats() {} diff --git a/compiler-rt/test/hwasan/TestCases/global.c b/compiler-rt/test/hwasan/TestCases/global.c index e1a7fd72f2b88..82fca892c23ab 100644 --- a/compiler-rt/test/hwasan/TestCases/global.c +++ b/compiler-rt/test/hwasan/TestCases/global.c @@ -10,6 +10,7 @@ int x = 1; int main(int argc, char **argv) { + // CHECK: Cause: global-overflow // RSYM: is located 0 bytes to the right of 4-byte global variable x {{.*}} in {{.*}}global.c.tmp // RNOSYM: is located to the right of a 4-byte global variable in ({{.*}}global.c.tmp+{{.*}}) // LSYM: is located 4 bytes to the left of 4-byte global variable x {{.*}} in {{.*}}global.c.tmp diff --git a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c index 67398141209af..8d41ac51dd1e4 100644 --- a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c +++ b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c @@ -31,6 +31,7 @@ int main(int argc, char **argv) { if (size == 1000000) { fprintf(stderr, "is a large allocated heap chunk; size: 1003520 offset: %d\n", offset); + fprintf(stderr, "Cause: heap-buffer-overflow\n"); fprintf(stderr, "is located %s of 1000000-byte region\n", offset == -30 ? "30 bytes to the left" : "0 bytes to the right"); return -1; @@ -38,26 +39,33 @@ int main(int argc, char **argv) { #endif // CHECK40: allocated heap chunk; size: 32 offset: 8 +// CHECK40: Cause: heap-buffer-overflow // CHECK40: is located 10 bytes to the right of 30-byte region // // CHECK80: allocated heap chunk; size: 32 offset: 16 +// CHECK80: Cause: heap-buffer-overflow // CHECK80: is located 50 bytes to the right of 30-byte region // +// CHECKm30: Cause: heap-buffer-overflow // CHECKm30: is located 30 bytes to the left of 30-byte region // // CHECKMm30: is a large allocated heap chunk; size: 1003520 offset: -30 +// CHECKMm30: Cause: heap-buffer-overflow // CHECKMm30: is located 30 bytes to the left of 1000000-byte region // // CHECKM: is a large allocated heap chunk; size: 1003520 offset: 1000000 +// CHECKM: Cause: heap-buffer-overflow // CHECKM: is located 0 bytes to the right of 1000000-byte region // // CHECK31: tags: [[TAG:..]]/0e (ptr/mem) +// CHECK31: Cause: heap-buffer-overflow // CHECK31: is located 1 bytes to the right of 30-byte region // CHECK31: Memory tags around the buggy address // CHECK31: [0e] // CHECK31: Tags for short granules around the buggy address // CHECK31: {{\[}}[[TAG]]] // +// CHECK20: Cause: heap-buffer-overflow // CHECK20: is located 10 bytes to the right of 20-byte region [0x{{.*}}0,0x{{.*}}4) free(x); } diff --git a/compiler-rt/test/hwasan/TestCases/stack-oob.c b/compiler-rt/test/hwasan/TestCases/stack-oob.c index 1088300c74bc1..7dfd7deb2767e 100644 --- a/compiler-rt/test/hwasan/TestCases/stack-oob.c +++ b/compiler-rt/test/hwasan/TestCases/stack-oob.c @@ -27,6 +27,7 @@ int main() { // CHECK: READ of size 1 at // CHECK: #0 {{.*}} in f{{.*}}stack-oob.c:[[@LINE-6]] + // CHECK: Cause: stack tag-mismatch // CHECK: is located in stack of threa // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in f diff --git a/compiler-rt/test/hwasan/TestCases/stack-uar.c b/compiler-rt/test/hwasan/TestCases/stack-uar.c index 113f36c319204..9a81dfc3d6bce 100644 --- a/compiler-rt/test/hwasan/TestCases/stack-uar.c +++ b/compiler-rt/test/hwasan/TestCases/stack-uar.c @@ -30,9 +30,10 @@ int main() { return *p; // CHECK: READ of size 1 at // CHECK: #0 {{.*}} in main{{.*}}stack-uar.c:[[@LINE-2]] + // CHECK: Cause: stack tag-mismatch // CHECK: is located in stack of thread // CHECK: Potentially referenced stack objects: - // CHECK-NEXT: zzz in buggy {{.*}}stack-uar.c:[[@LINE-19]] + // CHECK-NEXT: zzz in buggy {{.*}}stack-uar.c:[[@LINE-20]] // CHECK-NEXT: Memory tags around the buggy address // NOSYM: Previously allocated frames: diff --git a/compiler-rt/test/hwasan/TestCases/thread-uaf.c b/compiler-rt/test/hwasan/TestCases/thread-uaf.c index 7051b2632e606..c368882f45896 100644 --- a/compiler-rt/test/hwasan/TestCases/thread-uaf.c +++ b/compiler-rt/test/hwasan/TestCases/thread-uaf.c @@ -30,6 +30,7 @@ void *Use(void *arg) { // CHECK: ERROR: HWAddressSanitizer: tag-mismatch on address // CHECK: WRITE of size 1 {{.*}} in thread T3 // CHECK: thread-uaf.c:[[@LINE-3]] + // CHECK: Cause: use-after-free // CHECK: freed by thread T2 here // CHECK: in Deallocate // CHECK: previously allocated here: diff --git a/compiler-rt/test/hwasan/TestCases/use-after-free-and-overflow.c b/compiler-rt/test/hwasan/TestCases/use-after-free-and-overflow.c new file mode 100644 index 0000000000000..c08b00fc35ace --- /dev/null +++ b/compiler-rt/test/hwasan/TestCases/use-after-free-and-overflow.c @@ -0,0 +1,61 @@ +// Checks that we do not print a faraway buffer overrun if we find a +// use-after-free. +// RUN: %clang_hwasan -O0 %s -o %t +// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK +// REQUIRES: stable-runtime + +#include +#include +#include + +#define ALLOC_ATTEMPTS 256 + +char *Untag(void *x) { + return (char *)__hwasan_tag_pointer(x, 0); +} + +void *FindMatch(void *ptrs[ALLOC_ATTEMPTS], void *value) { + for (int i = 0; i < ALLOC_ATTEMPTS; ++i) { + if (!ptrs[i]) + return NULL; + int distance = Untag(value) - Untag(ptrs[i]); + // Leave at least one granule of gap to the allocation. + if (abs(distance) < 1000 && abs(distance) > 32) + return ptrs[i]; + } + return NULL; +} + +int main(int argc, char **argv) { + __hwasan_enable_allocator_tagging(); + void *ptrs[ALLOC_ATTEMPTS] = {}; + // Find two allocations that are close enough so that they would be + // candidates as buffer overflows for each other. + void *one; + void *other; + for (int i = 0; i < ALLOC_ATTEMPTS; ++i) { + one = malloc(16); + other = FindMatch(ptrs, one); + ptrs[i] = one; + if (other) + break; + } + if (!other) { + fprintf(stderr, "Could not find closeby allocations.\n"); + abort(); + } + __hwasan_tag_memory(Untag(one), 3, 16); + __hwasan_tag_memory(Untag(other), 3, 16); + // Tag potential adjaceant allocations with a mismatching tag, otherwise this + // test would flake. + __hwasan_tag_memory(Untag(one) + 16, 4, 16); + __hwasan_tag_memory(Untag(one) - 16, 4, 16); + void *retagged_one = __hwasan_tag_pointer(one, 3); + free(retagged_one); + volatile char *ptr = (char *)retagged_one; + *ptr = 1; +} + +// CHECK-NOT: Cause: heap-buffer-overflow +// CHECK: Cause: use-after-free +// CHECK-NOT: Cause: heap-buffer-overflow diff --git a/compiler-rt/test/hwasan/TestCases/use-after-free.c b/compiler-rt/test/hwasan/TestCases/use-after-free.c index ed4512387cc84..608f588944536 100644 --- a/compiler-rt/test/hwasan/TestCases/use-after-free.c +++ b/compiler-rt/test/hwasan/TestCases/use-after-free.c @@ -24,15 +24,16 @@ int main() { // CHECK: #{{[0-9]}} {{.*}} in main {{.*}}use-after-free.c:[[@LINE-2]] // Offset is 5 or 11 depending on left/right alignment. // CHECK: is a small unallocated heap chunk; size: 32 offset: {{5|11}} + // CHECK: Cause: use-after-free // CHECK: is located 5 bytes inside of 10-byte region // // CHECK: freed by thread {{.*}} here: // CHECK: #0 {{.*}} in {{.*}}free{{.*}} {{.*}}hwasan_allocation_functions.cpp - // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-14]] + // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-15]] // CHECK: previously allocated here: // CHECK: #0 {{.*}} in {{.*}}malloc{{.*}} {{.*}}hwasan_allocation_functions.cpp - // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-19]] + // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-20]] // CHECK: Memory tags around the buggy address (one tag corresponds to 16 bytes): // CHECK: =>{{.*}}[[MEM_TAG]] // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch From 2098c5dfbd621f5d51eba90810da083c1eb407fc Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 29 Jun 2021 05:25:44 -0700 Subject: [PATCH 163/619] [InstCombine]Add a test for reductions after shuffles, NFC. --- .../InstCombine/reduction-shufflevector.ll | 281 ++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/reduction-shufflevector.ll diff --git a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll new file mode 100644 index 0000000000000..1ecdb386ac1ab --- /dev/null +++ b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll @@ -0,0 +1,281 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @reduce_add(<4 x i32> %x) { +; CHECK-LABEL: @reduce_add( +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_or(<4 x i32> %x) { +; CHECK-LABEL: @reduce_or( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> + %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_and(<4 x i32> %x) { +; CHECK-LABEL: @reduce_and( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_xor(<4 x i32> %x) { +; CHECK-LABEL: @reduce_xor( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> + %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_umax(<4 x i32> %x) { +; CHECK-LABEL: @reduce_umax( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_umin(<4 x i32> %x) { +; CHECK-LABEL: @reduce_umin( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_smax(<4 x i32> %x) { +; CHECK-LABEL: @reduce_smax( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_smin(<4 x i32> %x) { +; CHECK-LABEL: @reduce_smin( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define float @reduce_fmax(<4 x float> %x) { +; CHECK-LABEL: @reduce_fmax( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: ret float [[RES]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> + %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf) + ret float %res +} + +define float @reduce_fmin(<4 x float> %x) { +; CHECK-LABEL: @reduce_fmin( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: ret float [[RES]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> + %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf) + ret float %res +} + +define float @reduce_fadd(float %a, <4 x float> %x) { +; CHECK-LABEL: @reduce_fadd( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: ret float [[RES]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> + %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf) + ret float %res +} + +define float @reduce_fmul(float %a, <4 x float> %x) { +; CHECK-LABEL: @reduce_fmul( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: ret float [[RES]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> + %res = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf) + ret float %res +} + +; Failed cases +; TODO: simplify the reductions for shuffles resulting in undef/poison elements. + +define i32 @reduce_add_failed(<4 x i32> %x) { +; CHECK-LABEL: @reduce_add_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> + %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_or_failed(<4 x i32> %x) { +; CHECK-LABEL: @reduce_or_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> zeroinitializer, <4 x i32> + %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_and_failed(<4 x i32> %x) { +; CHECK-LABEL: @reduce_and_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_xor_failed(<4 x i32> %x) { +; CHECK-LABEL: @reduce_xor_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_umax_failed(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @reduce_umax_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> + %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_umin_failed(<2 x i32> %x) { +; CHECK-LABEL: @reduce_umin_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <2 x i32> %x, <2 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_smax_failed(<8 x i32> %x) { +; CHECK-LABEL: @reduce_smax_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> + %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define i32 @reduce_smin_failed(<8 x i32> %x) { +; CHECK-LABEL: @reduce_smin_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %shuf = shufflevector <8 x i32> %x, <8 x i32> %x, <4 x i32> + %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf) + ret i32 %res +} + +define float @reduce_fmax_failed(<4 x float> %x) { +; CHECK-LABEL: @reduce_fmax_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: ret float [[RES]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> + %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf) + ret float %res +} + +define float @reduce_fmin_failed(<4 x float> %x) { +; CHECK-LABEL: @reduce_fmin_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: ret float [[RES]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> + %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf) + ret float %res +} + +define float @reduce_fadd_failed(float %a, <4 x float> %x) { +; CHECK-LABEL: @reduce_fadd_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: ret float [[RES]] +; + %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> + %res = call float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf) + ret float %res +} + +define float @reduce_fmul_failed(float %a, <2 x float> %x) { +; CHECK-LABEL: @reduce_fmul_failed( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: ret float [[RES]] +; + %shuf = shufflevector <2 x float> %x, <2 x float> poison, <4 x i32> + %res = call float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf) + ret float %res +} + +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a) +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) +declare float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %b) +declare float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %b) From 47215e1c6250298aa9db59b3b06f832fcd23be01 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 29 Jun 2021 11:54:52 +0100 Subject: [PATCH 164/619] [LV] Fix crash when target instruction for sinking is dead. This patch fixes a crash when the target instruction for sinking is dead. In that case, no recipe is created and trying to get the recipe for it results in a crash. To ensure all sink targets are alive, find & use the first previous alive instruction. Note that the case where the sink source is dead is already handled. Found by https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=35320 Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D104603 --- .../Transforms/Vectorize/LoopVectorize.cpp | 18 +++++++++ .../LoopVectorize/first-order-recurrence.ll | 39 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f22e73719a99f..bb0cb5e6ba6d8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9040,6 +9040,24 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, for (Instruction *I : DeadInstructions) SinkAfter.erase(I); + // Cannot sink instructions after dead instructions (there won't be any + // recipes for them). Instead, find the first non-dead previous instruction. + for (auto &P : Legal->getSinkAfter()) { + Instruction *SinkTarget = P.second; + Instruction *FirstInst = &*SinkTarget->getParent()->begin(); + (void)FirstInst; + while (DeadInstructions.contains(SinkTarget)) { + assert( + SinkTarget != FirstInst && + "Must find a live instruction (at least the one feeding the " + "first-order recurrence PHI) before reaching beginning of the block"); + SinkTarget = SinkTarget->getPrevNode(); + assert(SinkTarget != P.first && + "sink source equals target, no sinking required"); + } + P.second = SinkTarget; + } + auto MaxVFPlusOne = MaxVF.getWithIncrement(1); for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { VFRange SubRange = {VF, MaxVFPlusOne}; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 1389d28d382e3..dac449440069f 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -895,4 +895,43 @@ bb: br i1 %tmp9, label %bb1, label %bb2, !prof !2 } +; %vec.dead will be marked as dead instruction in the vector loop and no recipe +; will be created for it. Make sure a valid sink target is used. +define void @sink_after_dead_inst(i32* %A.ptr) { +; CHECK-LABEL: @sink_after_dead_inst +; CHECK-LABEL: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT]], %vector.body ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = zext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[OFFSET_IDX]], 48 +; CHECK-NEXT: [[SHIFT:%.*]] = ashr exact i64 [[SEXT]], 48 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* %A.ptr, i64 [[SHIFT]] +; CHECK-NEXT: [[CAST:%.*]] = bitcast i32* [[GEP]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[CAST]], align 4 +; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[EC]], label %middle.block, label %vector.body + +entry: + br label %loop + +loop: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] + %for = phi i32 [ 0, %entry ], [ %for.prev, %loop ] + %cmp = icmp eq i32 %for, 15 + %C = icmp eq i1 %cmp, true + %vec.dead = and i1 %C, 1 + %iv.next = add i16 %iv, 1 + %B1 = or i16 %iv.next, %iv.next + %B3 = and i1 %cmp, %C + %for.prev = zext i16 %B1 to i32 + + %ext = zext i1 %B3 to i32 + %A.gep = getelementptr i32, i32* %A.ptr, i16 %iv + store i32 0, i32* %A.gep + br i1 %vec.dead, label %for.end, label %loop + +for.end: + ret void +} + !2 = !{!"branch_weights", i32 1, i32 1} From 355216380b9c11e5d7a16ac20619cf16b1c0151c Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 29 Jun 2021 14:13:01 +0200 Subject: [PATCH 165/619] [mlir] Remove SDBM This data structure and algorithm collection is no longer in use. Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D105102 --- mlir/include/mlir/Dialect/SDBM/SDBM.h | 197 ----- mlir/include/mlir/Dialect/SDBM/SDBMDialect.h | 37 - mlir/include/mlir/Dialect/SDBM/SDBMExpr.h | 576 --------------- mlir/include/mlir/InitAllDialects.h | 2 - mlir/lib/Dialect/CMakeLists.txt | 1 - mlir/lib/Dialect/SDBM/CMakeLists.txt | 11 - mlir/lib/Dialect/SDBM/SDBM.cpp | 551 -------------- mlir/lib/Dialect/SDBM/SDBMDialect.cpp | 23 - mlir/lib/Dialect/SDBM/SDBMExpr.cpp | 732 ------------------- mlir/lib/Dialect/SDBM/SDBMExprDetail.h | 137 ---- mlir/test/CMakeLists.txt | 2 - mlir/test/SDBM/CMakeLists.txt | 19 - mlir/test/SDBM/lit.local.cfg | 1 - mlir/test/SDBM/sdbm-api-test.cpp | 201 ----- mlir/test/lit.cfg.py | 1 - mlir/test/mlir-opt/commandline.mlir | 1 - mlir/unittests/CMakeLists.txt | 1 - mlir/unittests/SDBM/CMakeLists.txt | 7 - mlir/unittests/SDBM/SDBMTest.cpp | 449 ------------ 19 files changed, 2949 deletions(-) delete mode 100644 mlir/include/mlir/Dialect/SDBM/SDBM.h delete mode 100644 mlir/include/mlir/Dialect/SDBM/SDBMDialect.h delete mode 100644 mlir/include/mlir/Dialect/SDBM/SDBMExpr.h delete mode 100644 mlir/lib/Dialect/SDBM/CMakeLists.txt delete mode 100644 mlir/lib/Dialect/SDBM/SDBM.cpp delete mode 100644 mlir/lib/Dialect/SDBM/SDBMDialect.cpp delete mode 100644 mlir/lib/Dialect/SDBM/SDBMExpr.cpp delete mode 100644 mlir/lib/Dialect/SDBM/SDBMExprDetail.h delete mode 100644 mlir/test/SDBM/CMakeLists.txt delete mode 100644 mlir/test/SDBM/lit.local.cfg delete mode 100644 mlir/test/SDBM/sdbm-api-test.cpp delete mode 100644 mlir/unittests/SDBM/CMakeLists.txt delete mode 100644 mlir/unittests/SDBM/SDBMTest.cpp diff --git a/mlir/include/mlir/Dialect/SDBM/SDBM.h b/mlir/include/mlir/Dialect/SDBM/SDBM.h deleted file mode 100644 index f4e6be874fdca..0000000000000 --- a/mlir/include/mlir/Dialect/SDBM/SDBM.h +++ /dev/null @@ -1,197 +0,0 @@ -//===- SDBM.h - MLIR SDBM declaration ---------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A striped difference-bound matrix (SDBM) is a set in Z^N (or R^N) defined -// as {(x_1, ... x_n) | f(x_1, ... x_n) >= 0} where f is an SDBM expression. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_DIALECT_SDBM_SDBM_H -#define MLIR_DIALECT_SDBM_SDBM_H - -#include "mlir/Support/LLVM.h" -#include "llvm/ADT/DenseMap.h" - -namespace mlir { - -class MLIRContext; -class SDBMDialect; -class SDBMExpr; -class SDBMTermExpr; - -/// A utility class for SDBM to represent an integer with potentially infinite -/// positive value. This uses the largest value of int64_t to represent infinity -/// and redefines the arithmetic operators so that the infinity "saturates": -/// inf + x = inf, -/// inf - x = inf. -/// If a sum of two finite values reaches the largest value of int64_t, the -/// behavior of IntInfty is undefined (in practice, it asserts), similarly to -/// regular signed integer overflow. -class IntInfty { -public: - constexpr static int64_t infty = std::numeric_limits::max(); - - /*implicit*/ IntInfty(int64_t v) : value(v) {} - - IntInfty &operator=(int64_t v) { - value = v; - return *this; - } - - static IntInfty infinity() { return IntInfty(infty); } - - int64_t getValue() const { return value; } - explicit operator int64_t() const { return value; } - - bool isFinite() { return value != infty; } - -private: - int64_t value; -}; - -inline IntInfty operator+(IntInfty lhs, IntInfty rhs) { - if (!lhs.isFinite() || !rhs.isFinite()) - return IntInfty::infty; - - // Check for overflows, treating the sum of two values adding up to INT_MAX as - // overflow. Convert values to unsigned to get an extra bit and avoid the - // undefined behavior of signed integer overflows. - assert((lhs.getValue() <= 0 || rhs.getValue() <= 0 || - static_cast(lhs.getValue()) + - static_cast(rhs.getValue()) < - static_cast(std::numeric_limits::max())) && - "IntInfty overflow"); - // Check for underflows by converting values to unsigned to avoid undefined - // behavior of signed integers perform the addition (bitwise result is same - // because numbers are required to be two's complement in C++) and check if - // the sign bit remains negative. - assert((lhs.getValue() >= 0 || rhs.getValue() >= 0 || - ((static_cast(lhs.getValue()) + - static_cast(rhs.getValue())) >> - 63) == 1) && - "IntInfty underflow"); - - return lhs.getValue() + rhs.getValue(); -} - -inline bool operator<(IntInfty lhs, IntInfty rhs) { - return lhs.getValue() < rhs.getValue(); -} - -inline bool operator<=(IntInfty lhs, IntInfty rhs) { - return lhs.getValue() <= rhs.getValue(); -} - -inline bool operator==(IntInfty lhs, IntInfty rhs) { - return lhs.getValue() == rhs.getValue(); -} - -inline bool operator!=(IntInfty lhs, IntInfty rhs) { return !(lhs == rhs); } - -/// Striped difference-bound matrix is a representation of an integer set bound -/// by a system of SDBMExprs interpreted as inequalities "expr <= 0". -class SDBM { -public: - /// Obtain an SDBM from a list of SDBM expressions treated as inequalities and - /// equalities with zero. - static SDBM get(ArrayRef inequalities, - ArrayRef equalities); - - void getSDBMExpressions(SDBMDialect *dialect, - SmallVectorImpl &inequalities, - SmallVectorImpl &equalities); - - void print(raw_ostream &os); - void dump(); - - IntInfty operator()(int i, int j) { return at(i, j); } - -private: - /// Get the given element of the difference bounds matrix. First index - /// corresponds to the negative term of the difference, second index - /// corresponds to the positive term of the difference. - IntInfty &at(int i, int j) { return matrix[i * getNumVariables() + j]; } - - /// Populate `inequalities` and `equalities` based on the values at(row,col) - /// and at(col,row) of the DBM. Depending on the values being finite and - /// being subsumed by stripe expressions, this may or may not add elements to - /// the lists of equalities and inequalities. - void convertDBMElement(unsigned row, unsigned col, SDBMTermExpr rowExpr, - SDBMTermExpr colExpr, - SmallVectorImpl &inequalities, - SmallVectorImpl &equalities); - - /// Populate `inequalities` based on the value at(pos,pos) of the DBM. Only - /// adds new inequalities if the inequality is not trivially true. - void convertDBMDiagonalElement(unsigned pos, SDBMTermExpr expr, - SmallVectorImpl &inequalities); - - /// Get the total number of elements in the matrix. - unsigned getNumVariables() const { - return 1 + numDims + numSymbols + numTemporaries; - } - - /// Get the position in the matrix that corresponds to the given dimension. - unsigned getDimPosition(unsigned position) const { return 1 + position; } - - /// Get the position in the matrix that corresponds to the given symbol. - unsigned getSymbolPosition(unsigned position) const { - return 1 + numDims + position; - } - - /// Get the position in the matrix that corresponds to the given temporary. - unsigned getTemporaryPosition(unsigned position) const { - return 1 + numDims + numSymbols + position; - } - - /// Number of dimensions in the system, - unsigned numDims; - /// Number of symbols in the system. - unsigned numSymbols; - /// Number of temporary variables in the system. - unsigned numTemporaries; - - /// Difference bounds matrix, stored as a linearized row-major vector. - /// Each value in this matrix corresponds to an inequality - /// - /// v@col - v@row <= at(row, col) - /// - /// where v@col and v@row are the variables that correspond to the linearized - /// position in the matrix. The positions correspond to - /// - /// - constant 0 (producing constraints v@col <= X and -v@row <= Y); - /// - SDBM expression dimensions (d0, d1, ...); - /// - SDBM expression symbols (s0, s1, ...); - /// - temporary variables (t0, t1, ...). - /// - /// Temporary variables are introduced to represent expressions that are not - /// trivially a difference between two variables. For example, if one side of - /// a difference expression is itself a stripe expression, it will be replaced - /// with a temporary variable assigned equal to this expression. - /// - /// Infinite entries in the matrix correspond correspond to an absence of a - /// constraint: - /// - /// v@col - v@row <= infinity - /// - /// is trivially true. Negated values at symmetric positions in the matrix - /// allow one to couple two inequalities into a single equality. - std::vector matrix; - - /// The mapping between the indices of variables in the DBM and the stripe - /// expressions they are equal to. These expressions are stored as they - /// appeared when constructing an SDBM from a SDBMExprs, in particular no - /// temporaries can appear in these expressions. This removes the need to - /// iteratively substitute definitions of the temporaries in the reverse - /// conversion. - DenseMap stripeToPoint; -}; - -} // namespace mlir - -#endif // MLIR_DIALECT_SDBM_SDBM_H diff --git a/mlir/include/mlir/Dialect/SDBM/SDBMDialect.h b/mlir/include/mlir/Dialect/SDBM/SDBMDialect.h deleted file mode 100644 index 85cfe91d2c9b9..0000000000000 --- a/mlir/include/mlir/Dialect/SDBM/SDBMDialect.h +++ /dev/null @@ -1,37 +0,0 @@ -//===- SDBMDialect.h - Dialect for striped DBMs -----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_DIALECT_SDBM_SDBMDIALECT_H -#define MLIR_DIALECT_SDBM_SDBMDIALECT_H - -#include "mlir/IR/Dialect.h" -#include "mlir/Support/StorageUniquer.h" - -namespace mlir { -class MLIRContext; - -class SDBMDialect : public Dialect { -public: - SDBMDialect(MLIRContext *context); - - /// Since there are no other virtual methods in this derived class, override - /// the destructor so that key methods get defined in the corresponding - /// module. - ~SDBMDialect() override; - - static StringRef getDialectNamespace() { return "sdbm"; } - - /// Get the uniquer for SDBM expressions. This should not be used directly. - StorageUniquer &getUniquer() { return uniquer; } - -private: - StorageUniquer uniquer; -}; -} // namespace mlir - -#endif // MLIR_DIALECT_SDBM_SDBMDIALECT_H diff --git a/mlir/include/mlir/Dialect/SDBM/SDBMExpr.h b/mlir/include/mlir/Dialect/SDBM/SDBMExpr.h deleted file mode 100644 index 7b51b892384e1..0000000000000 --- a/mlir/include/mlir/Dialect/SDBM/SDBMExpr.h +++ /dev/null @@ -1,576 +0,0 @@ -//===- SDBMExpr.h - MLIR SDBM Expression ------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A striped difference-bound matrix (SDBM) expression is a constant expression, -// an identifier, a binary expression with constant RHS and +, stripe operators -// or a difference expression between two identifiers. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_DIALECT_SDBM_SDBMEXPR_H -#define MLIR_DIALECT_SDBM_SDBMEXPR_H - -#include "mlir/Support/LLVM.h" -#include "llvm/ADT/DenseMapInfo.h" - -namespace mlir { - -class AffineExpr; -class MLIRContext; - -enum class SDBMExprKind { Add, Stripe, Diff, Constant, DimId, SymbolId, Neg }; - -namespace detail { -struct SDBMExprStorage; -struct SDBMBinaryExprStorage; -struct SDBMDiffExprStorage; -struct SDBMTermExprStorage; -struct SDBMConstantExprStorage; -struct SDBMNegExprStorage; -} // namespace detail - -class SDBMConstantExpr; -class SDBMDialect; -class SDBMDimExpr; -class SDBMSymbolExpr; -class SDBMTermExpr; - -/// Striped Difference-Bounded Matrix (SDBM) expression is a base left-hand side -/// expression for the SDBM framework. SDBM expressions are a subset of affine -/// expressions supporting low-complexity algorithms for the operations used in -/// loop transformations. In particular, are supported: -/// - constant expressions; -/// - single variables (dimensions and symbols) with +1 or -1 coefficient; -/// - stripe expressions: "x # C", where "x" is a single variable or another -/// stripe expression, "#" is the stripe operator, and "C" is a constant -/// expression; "#" is defined as x - x mod C. -/// - sum expressions between single variable/stripe expressions and constant -/// expressions; -/// - difference expressions between single variable/stripe expressions. -/// `SDBMExpr` class hierarchy provides a type-safe interface to constructing -/// and operating on SDBM expressions. For example, it requires the LHS of a -/// sum expression to be a single variable or a stripe expression. These -/// restrictions are intended to force the caller to perform the necessary -/// simplifications to stay within the SDBM domain, because SDBM expressions do -/// not combine in more cases than they do. This choice may be reconsidered in -/// the future. -/// -/// SDBM expressions are grouped into the following structure -/// - expression -/// - varying -/// - direct -/// - sum <- (term, constant) -/// - term -/// - symbol -/// - dimension -/// - stripe <- (direct, constant) -/// - negation <- (direct) -/// - difference <- (direct, term) -/// - constant -/// The notation <- (...) denotes the types of subexpressions a compound -/// expression can combine. The tree of subexpressions essentially imposes the -/// following canonicalization rules: -/// - constants are always folded; -/// - constants can only appear on the RHS of an expression; -/// - double negation must be elided; -/// - an additive constant term is only allowed in a sum expression, and -/// should be sunk into the nearest such expression in the tree; -/// - zero constant expression can only appear at the top level. -/// -/// `SDBMExpr` and derived classes are thin wrappers around a pointer owned by -/// an MLIRContext, and should be used by-value. They are uniqued in the -/// MLIRContext and immortal. -class SDBMExpr { -public: - using ImplType = detail::SDBMExprStorage; - SDBMExpr() : impl(nullptr) {} - /* implicit */ SDBMExpr(ImplType *expr) : impl(expr) {} - - /// SDBM expressions are thin wrappers around a unique'ed immutable pointer, - /// which makes them trivially assignable and trivially copyable. - SDBMExpr(const SDBMExpr &) = default; - SDBMExpr &operator=(const SDBMExpr &) = default; - - /// SDBM expressions can be compared straight-forwardly. - bool operator==(const SDBMExpr &other) const { return impl == other.impl; } - bool operator!=(const SDBMExpr &other) const { return !(*this == other); } - - /// SDBM expressions are convertible to `bool`: null expressions are converted - /// to false, non-null expressions are converted to true. - explicit operator bool() const { return impl != nullptr; } - bool operator!() const { return !static_cast(*this); } - - /// Negate the given SDBM expression. - SDBMExpr operator-(); - - /// Prints the SDBM expression. - void print(raw_ostream &os) const; - void dump() const; - - /// LLVM-style casts. - template bool isa() const { return U::isClassFor(*this); } - template U dyn_cast() const { - if (!isa()) - return {}; - return U(const_cast(this)->impl); - } - template U cast() const { - assert(isa() && "cast to incorrect subtype"); - return U(const_cast(this)->impl); - } - - /// Support for LLVM hashing. - ::llvm::hash_code hash_value() const { return ::llvm::hash_value(impl); } - - /// Returns the kind of the SDBM expression. - SDBMExprKind getKind() const; - - /// Returns the MLIR context in which this expression lives. - MLIRContext *getContext() const; - - /// Returns the SDBM dialect instance. - SDBMDialect *getDialect() const; - - /// Convert the SDBM expression into an Affine expression. This always - /// succeeds because SDBM are a subset of affine. - AffineExpr getAsAffineExpr() const; - - /// Try constructing an SDBM expression from the given affine expression. - /// This may fail if the affine expression is not representable as SDBM, in - /// which case llvm::None is returned. The conversion procedure recognizes - /// (nested) multiplicative ((x floordiv B) * B) and additive (x - x mod B) - /// patterns for the stripe expression. - static Optional tryConvertAffineExpr(AffineExpr affine); - -protected: - ImplType *impl; -}; - -/// SDBM constant expression, wraps a 64-bit integer. -class SDBMConstantExpr : public SDBMExpr { -public: - using ImplType = detail::SDBMConstantExprStorage; - - using SDBMExpr::SDBMExpr; - - /// Obtain or create a constant expression unique'ed in the given dialect - /// (which belongs to a context). - static SDBMConstantExpr get(SDBMDialect *dialect, int64_t value); - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::Constant; - } - - int64_t getValue() const; -}; - -/// SDBM varying expression can be one of: -/// - input variable expression; -/// - stripe expression; -/// - negation (product with -1) of either of the above. -/// - sum of a varying and a constant expression -/// - difference between varying expressions -class SDBMVaryingExpr : public SDBMExpr { -public: - using ImplType = detail::SDBMExprStorage; - using SDBMExpr::SDBMExpr; - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::DimId || - expr.getKind() == SDBMExprKind::SymbolId || - expr.getKind() == SDBMExprKind::Neg || - expr.getKind() == SDBMExprKind::Stripe || - expr.getKind() == SDBMExprKind::Add || - expr.getKind() == SDBMExprKind::Diff; - } -}; - -/// SDBM direct expression includes exactly one variable (symbol or dimension), -/// which is not negated in the expression. It can be one of: -/// - term expression; -/// - sum expression. -class SDBMDirectExpr : public SDBMVaryingExpr { -public: - using SDBMVaryingExpr::SDBMVaryingExpr; - - /// If this is a sum expression, return its variable part, otherwise return - /// self. - SDBMTermExpr getTerm(); - - /// If this is a sum expression, return its constant part, otherwise return 0. - int64_t getConstant(); - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::DimId || - expr.getKind() == SDBMExprKind::SymbolId || - expr.getKind() == SDBMExprKind::Stripe || - expr.getKind() == SDBMExprKind::Add; - } -}; - -/// SDBM term expression can be one of: -/// - single variable expression; -/// - stripe expression. -/// Stripe expressions are treated as terms since, in the SDBM domain, they are -/// attached to temporary variables and can appear anywhere a variable can. -class SDBMTermExpr : public SDBMDirectExpr { -public: - using SDBMDirectExpr::SDBMDirectExpr; - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::DimId || - expr.getKind() == SDBMExprKind::SymbolId || - expr.getKind() == SDBMExprKind::Stripe; - } -}; - -/// SDBM sum expression. LHS is a term expression and RHS is a constant. -class SDBMSumExpr : public SDBMDirectExpr { -public: - using ImplType = detail::SDBMBinaryExprStorage; - using SDBMDirectExpr::SDBMDirectExpr; - - /// Obtain or create a sum expression unique'ed in the given context. - static SDBMSumExpr get(SDBMTermExpr lhs, SDBMConstantExpr rhs); - - static bool isClassFor(const SDBMExpr &expr) { - SDBMExprKind kind = expr.getKind(); - return kind == SDBMExprKind::Add; - } - - SDBMTermExpr getLHS() const; - SDBMConstantExpr getRHS() const; -}; - -/// SDBM difference expression. LHS is a direct expression, i.e. it may be a -/// sum of a term and a constant. RHS is a term expression. Thus the -/// expression (t1 - t2 + C) with term expressions t1,t2 is represented as -/// diff(sum(t1, C), t2) -/// and it is possible to extract the constant factor without negating it. -class SDBMDiffExpr : public SDBMVaryingExpr { -public: - using ImplType = detail::SDBMDiffExprStorage; - using SDBMVaryingExpr::SDBMVaryingExpr; - - /// Obtain or create a difference expression unique'ed in the given context. - static SDBMDiffExpr get(SDBMDirectExpr lhs, SDBMTermExpr rhs); - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::Diff; - } - - SDBMDirectExpr getLHS() const; - SDBMTermExpr getRHS() const; -}; - -/// SDBM stripe expression "x # C" where "x" is a term expression, "C" is a -/// constant expression and "#" is the stripe operator defined as: -/// x # C = x - x mod C. -class SDBMStripeExpr : public SDBMTermExpr { -public: - using ImplType = detail::SDBMBinaryExprStorage; - using SDBMTermExpr::SDBMTermExpr; - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::Stripe; - } - - static SDBMStripeExpr get(SDBMDirectExpr var, SDBMConstantExpr stripeFactor); - - SDBMDirectExpr getLHS() const; - SDBMConstantExpr getStripeFactor() const; -}; - -/// SDBM "input" variable expression can be either a dimension identifier or -/// a symbol identifier. When used to define SDBM functions, dimensions are -/// interpreted as function arguments while symbols are treated as unknown but -/// constant values, hence the name. -class SDBMInputExpr : public SDBMTermExpr { -public: - using ImplType = detail::SDBMTermExprStorage; - using SDBMTermExpr::SDBMTermExpr; - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::DimId || - expr.getKind() == SDBMExprKind::SymbolId; - } - - unsigned getPosition() const; -}; - -/// SDBM dimension expression. Dimensions correspond to function arguments -/// when defining functions using SDBM expressions. -class SDBMDimExpr : public SDBMInputExpr { -public: - using ImplType = detail::SDBMTermExprStorage; - using SDBMInputExpr::SDBMInputExpr; - - /// Obtain or create a dimension expression unique'ed in the given dialect - /// (which belongs to a context). - static SDBMDimExpr get(SDBMDialect *dialect, unsigned position); - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::DimId; - } -}; - -/// SDBM symbol expression. Symbols correspond to symbolic constants when -/// defining functions using SDBM expressions. -class SDBMSymbolExpr : public SDBMInputExpr { -public: - using ImplType = detail::SDBMTermExprStorage; - using SDBMInputExpr::SDBMInputExpr; - - /// Obtain or create a symbol expression unique'ed in the given dialect (which - /// belongs to a context). - static SDBMSymbolExpr get(SDBMDialect *dialect, unsigned position); - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::SymbolId; - } -}; - -/// Negation of an SDBM variable expression. Equivalent to multiplying the -/// expression with -1 (SDBM does not support other coefficients that 1 and -1). -class SDBMNegExpr : public SDBMVaryingExpr { -public: - using ImplType = detail::SDBMNegExprStorage; - using SDBMVaryingExpr::SDBMVaryingExpr; - - /// Obtain or create a negation expression unique'ed in the given context. - static SDBMNegExpr get(SDBMDirectExpr var); - - static bool isClassFor(const SDBMExpr &expr) { - return expr.getKind() == SDBMExprKind::Neg; - } - - SDBMDirectExpr getVar() const; -}; - -/// A visitor class for SDBM expressions. Calls the kind-specific function -/// depending on the kind of expression it visits. -template class SDBMVisitor { -public: - /// Visit the given SDBM expression, dispatching to kind-specific functions. - Result visit(SDBMExpr expr) { - auto *derived = static_cast(this); - switch (expr.getKind()) { - case SDBMExprKind::Add: - case SDBMExprKind::Diff: - case SDBMExprKind::DimId: - case SDBMExprKind::SymbolId: - case SDBMExprKind::Neg: - case SDBMExprKind::Stripe: - return derived->visitVarying(expr.cast()); - case SDBMExprKind::Constant: - return derived->visitConstant(expr.cast()); - } - - llvm_unreachable("unsupported SDBM expression kind"); - } - - /// Traverse the SDBM expression tree calling `visit` on each node - /// in depth-first preorder. - void walkPreorder(SDBMExpr expr) { return walk(expr); } - - /// Traverse the SDBM expression tree calling `visit` on each node in - /// depth-first postorder. - void walkPostorder(SDBMExpr expr) { return walk(expr); } - -protected: - /// Default visitors do nothing. - void visitSum(SDBMSumExpr) {} - void visitDiff(SDBMDiffExpr) {} - void visitStripe(SDBMStripeExpr) {} - void visitDim(SDBMDimExpr) {} - void visitSymbol(SDBMSymbolExpr) {} - void visitNeg(SDBMNegExpr) {} - void visitConstant(SDBMConstantExpr) {} - - /// Default implementation of visitDirect dispatches to the dedicated for sums - /// or delegates to visitTerm for the other expression kinds. Concrete - /// visitors can overload it. - Result visitDirect(SDBMDirectExpr expr) { - auto *derived = static_cast(this); - if (auto sum = expr.dyn_cast()) - return derived->visitSum(sum); - else - return derived->visitTerm(expr.cast()); - } - - /// Default implementation of visitTerm dispatches to the special functions - /// for stripes and other variables. Concrete visitors can override it. - Result visitTerm(SDBMTermExpr expr) { - auto *derived = static_cast(this); - if (expr.getKind() == SDBMExprKind::Stripe) - return derived->visitStripe(expr.cast()); - else - return derived->visitInput(expr.cast()); - } - - /// Default implementation of visitInput dispatches to the special - /// functions for dimensions or symbols. Concrete visitors can override it to - /// visit all variables instead. - Result visitInput(SDBMInputExpr expr) { - auto *derived = static_cast(this); - if (expr.getKind() == SDBMExprKind::DimId) - return derived->visitDim(expr.cast()); - else - return derived->visitSymbol(expr.cast()); - } - - /// Default implementation of visitVarying dispatches to the special - /// functions for variables and negations thereof. Concrete visitors can - /// override it to visit all variables and negations instead. - Result visitVarying(SDBMVaryingExpr expr) { - auto *derived = static_cast(this); - if (auto var = expr.dyn_cast()) - return derived->visitDirect(var); - else if (auto neg = expr.dyn_cast()) - return derived->visitNeg(neg); - else if (auto diff = expr.dyn_cast()) - return derived->visitDiff(diff); - - llvm_unreachable("unhandled subtype of varying SDBM expression"); - } - - template void walk(SDBMExpr expr) { - if (isPreorder) - visit(expr); - if (auto sumExpr = expr.dyn_cast()) { - walk(sumExpr.getLHS()); - walk(sumExpr.getRHS()); - } else if (auto diffExpr = expr.dyn_cast()) { - walk(diffExpr.getLHS()); - walk(diffExpr.getRHS()); - } else if (auto stripeExpr = expr.dyn_cast()) { - walk(stripeExpr.getLHS()); - walk(stripeExpr.getStripeFactor()); - } else if (auto negExpr = expr.dyn_cast()) { - walk(negExpr.getVar()); - } - if (!isPreorder) - visit(expr); - } -}; - -/// Overloaded arithmetic operators for SDBM expressions asserting that their -/// arguments have the proper SDBM expression subtype. Perform canonicalization -/// and constant folding on these expressions. -namespace ops_assertions { - -/// Add two SDBM expressions. At least one of the expressions must be a -/// constant or a negation, but both expressions cannot be negations -/// simultaneously. -SDBMExpr operator+(SDBMExpr lhs, SDBMExpr rhs); -inline SDBMExpr operator+(SDBMExpr lhs, int64_t rhs) { - return lhs + SDBMConstantExpr::get(lhs.getDialect(), rhs); -} -inline SDBMExpr operator+(int64_t lhs, SDBMExpr rhs) { - return SDBMConstantExpr::get(rhs.getDialect(), lhs) + rhs; -} - -/// Subtract an SDBM expression from another SDBM expression. Both expressions -/// must not be difference expressions. -SDBMExpr operator-(SDBMExpr lhs, SDBMExpr rhs); -inline SDBMExpr operator-(SDBMExpr lhs, int64_t rhs) { - return lhs - SDBMConstantExpr::get(lhs.getDialect(), rhs); -} -inline SDBMExpr operator-(int64_t lhs, SDBMExpr rhs) { - return SDBMConstantExpr::get(rhs.getDialect(), lhs) - rhs; -} - -/// Construct a stripe expression from a positive expression and a positive -/// constant stripe factor. -SDBMExpr stripe(SDBMExpr expr, SDBMExpr factor); -inline SDBMExpr stripe(SDBMExpr expr, int64_t factor) { - return stripe(expr, SDBMConstantExpr::get(expr.getDialect(), factor)); -} -} // namespace ops_assertions - -} // end namespace mlir - -namespace llvm { -// SDBMExpr hash just like pointers. -template <> struct DenseMapInfo { - static mlir::SDBMExpr getEmptyKey() { - auto *pointer = llvm::DenseMapInfo::getEmptyKey(); - return mlir::SDBMExpr(static_cast(pointer)); - } - static mlir::SDBMExpr getTombstoneKey() { - auto *pointer = llvm::DenseMapInfo::getTombstoneKey(); - return mlir::SDBMExpr(static_cast(pointer)); - } - static unsigned getHashValue(mlir::SDBMExpr expr) { - return expr.hash_value(); - } - static bool isEqual(mlir::SDBMExpr lhs, mlir::SDBMExpr rhs) { - return lhs == rhs; - } -}; - -// SDBMDirectExpr hash just like pointers. -template <> struct DenseMapInfo { - static mlir::SDBMDirectExpr getEmptyKey() { - auto *pointer = llvm::DenseMapInfo::getEmptyKey(); - return mlir::SDBMDirectExpr( - static_cast(pointer)); - } - static mlir::SDBMDirectExpr getTombstoneKey() { - auto *pointer = llvm::DenseMapInfo::getTombstoneKey(); - return mlir::SDBMDirectExpr( - static_cast(pointer)); - } - static unsigned getHashValue(mlir::SDBMDirectExpr expr) { - return expr.hash_value(); - } - static bool isEqual(mlir::SDBMDirectExpr lhs, mlir::SDBMDirectExpr rhs) { - return lhs == rhs; - } -}; - -// SDBMTermExpr hash just like pointers. -template <> struct DenseMapInfo { - static mlir::SDBMTermExpr getEmptyKey() { - auto *pointer = llvm::DenseMapInfo::getEmptyKey(); - return mlir::SDBMTermExpr(static_cast(pointer)); - } - static mlir::SDBMTermExpr getTombstoneKey() { - auto *pointer = llvm::DenseMapInfo::getTombstoneKey(); - return mlir::SDBMTermExpr(static_cast(pointer)); - } - static unsigned getHashValue(mlir::SDBMTermExpr expr) { - return expr.hash_value(); - } - static bool isEqual(mlir::SDBMTermExpr lhs, mlir::SDBMTermExpr rhs) { - return lhs == rhs; - } -}; - -// SDBMConstantExpr hash just like pointers. -template <> struct DenseMapInfo { - static mlir::SDBMConstantExpr getEmptyKey() { - auto *pointer = llvm::DenseMapInfo::getEmptyKey(); - return mlir::SDBMConstantExpr( - static_cast(pointer)); - } - static mlir::SDBMConstantExpr getTombstoneKey() { - auto *pointer = llvm::DenseMapInfo::getTombstoneKey(); - return mlir::SDBMConstantExpr( - static_cast(pointer)); - } - static unsigned getHashValue(mlir::SDBMConstantExpr expr) { - return expr.hash_value(); - } - static bool isEqual(mlir::SDBMConstantExpr lhs, mlir::SDBMConstantExpr rhs) { - return lhs == rhs; - } -}; -} // namespace llvm - -#endif // MLIR_DIALECT_SDBM_SDBMEXPR_H diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index c52dae3fd1b51..5cf0429942ca7 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -35,7 +35,6 @@ #include "mlir/Dialect/PDLInterp/IR/PDLInterp.h" #include "mlir/Dialect/Quant/QuantOps.h" #include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/SDBM/SDBMDialect.h" #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" #include "mlir/Dialect/Shape/IR/Shape.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" @@ -75,7 +74,6 @@ inline void registerAllDialects(DialectRegistry ®istry) { vector::VectorDialect, NVVM::NVVMDialect, ROCDL::ROCDLDialect, - SDBMDialect, shape::ShapeDialect, sparse_tensor::SparseTensorDialect, tensor::TensorDialect, diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt index de946beef0d9f..8a6f08ab3b837 100644 --- a/mlir/lib/Dialect/CMakeLists.txt +++ b/mlir/lib/Dialect/CMakeLists.txt @@ -17,7 +17,6 @@ add_subdirectory(PDL) add_subdirectory(PDLInterp) add_subdirectory(Quant) add_subdirectory(SCF) -add_subdirectory(SDBM) add_subdirectory(Shape) add_subdirectory(SparseTensor) add_subdirectory(SPIRV) diff --git a/mlir/lib/Dialect/SDBM/CMakeLists.txt b/mlir/lib/Dialect/SDBM/CMakeLists.txt deleted file mode 100644 index db2b9ac85472c..0000000000000 --- a/mlir/lib/Dialect/SDBM/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -add_mlir_dialect_library(MLIRSDBM - SDBM.cpp - SDBMDialect.cpp - SDBMExpr.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SDBM - - LINK_LIBS PUBLIC - MLIRIR - ) diff --git a/mlir/lib/Dialect/SDBM/SDBM.cpp b/mlir/lib/Dialect/SDBM/SDBM.cpp deleted file mode 100644 index df24e77bc4f29..0000000000000 --- a/mlir/lib/Dialect/SDBM/SDBM.cpp +++ /dev/null @@ -1,551 +0,0 @@ -//===- SDBM.cpp - MLIR SDBM implementation --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A striped difference-bound matrix (SDBM) is a set in Z^N (or R^N) defined -// as {(x_1, ... x_n) | f(x_1, ... x_n) >= 0} where f is an SDBM expression. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/SDBM/SDBM.h" -#include "mlir/Dialect/SDBM/SDBMExpr.h" - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" - -using namespace mlir; - -// Helper function for SDBM construction that collects information necessary to -// start building an SDBM in one sweep. In particular, it records the largest -// position of a dimension in `dim`, that of a symbol in `symbol` as well as -// collects all unique stripe expressions in `stripes`. Uses SetVector to -// ensure these expressions always have the same order. -static void collectSDBMBuildInfo(SDBMExpr expr, int &dim, int &symbol, - llvm::SmallSetVector &stripes) { - struct Visitor : public SDBMVisitor { - void visitDim(SDBMDimExpr dimExpr) { - int p = dimExpr.getPosition(); - if (p > maxDimPosition) - maxDimPosition = p; - } - void visitSymbol(SDBMSymbolExpr symbExpr) { - int p = symbExpr.getPosition(); - if (p > maxSymbPosition) - maxSymbPosition = p; - } - void visitStripe(SDBMStripeExpr stripeExpr) { stripes.insert(stripeExpr); } - - Visitor(llvm::SmallSetVector &stripes) : stripes(stripes) {} - - int maxDimPosition = -1; - int maxSymbPosition = -1; - llvm::SmallSetVector &stripes; - }; - - Visitor visitor(stripes); - visitor.walkPostorder(expr); - dim = std::max(dim, visitor.maxDimPosition); - symbol = std::max(symbol, visitor.maxSymbPosition); -} - -namespace { -// Utility class for SDBMBuilder. Represents a value that can be inserted in -// the SDB matrix that corresponds to "v0 - v1 + C <= 0", where v0 and v1 is -// any combination of the positive and negative positions. Since multiple -// variables can be declared equal to the same stripe expression, the -// constraints on this expression must be reflected to all these variables. For -// example, if -// d0 = s0 # 42 -// d1 = s0 # 42 -// d2 = s1 # 2 -// d3 = s1 # 2 -// the constraint -// s0 # 42 - s1 # 2 <= C -// should be reflected in the DB matrix as -// d0 - d2 <= C -// d1 - d2 <= C -// d0 - d3 <= C -// d1 - d3 <= C -// since the DB matrix has no knowledge of the transitive equality between d0, -// d1 and s0 # 42 as well as between d2, d3 and s1 # 2. This knowledge can be -// obtained by computing a transitive closure, which is impossible until the -// DBM is actually built. -struct SDBMBuilderResult { - // Positions in the matrix of the variables taken with the "+" sign in the - // difference expression, 0 if it is a constant rather than a variable. - SmallVector positivePos; - - // Positions in the matrix of the variables taken with the "-" sign in the - // difference expression, 0 if it is a constant rather than a variable. - SmallVector negativePos; - - // Constant value in the difference expression. - int64_t value = 0; -}; - -// Visitor for building an SDBM from SDBM expressions. After traversing an SDBM -// expression, produces an update to the SDB matrix specifying the positions in -// the matrix and the negated value that should be stored. Both the positive -// and the negative positions may be lists of indices in cases where multiple -// variables are equal to the same stripe expression. In such cases, the update -// applies to the cross product of positions because elements involved in the -// update are (transitively) equal and should have the same constraints, but we -// may not have an explicit equality for them. -struct SDBMBuilder : public SDBMVisitor { -public: - // A difference expression produces both the positive and the negative - // coordinate in the matrix, recursively traversing the LHS and the RHS. The - // value is the difference between values obtained from LHS and RHS. - SDBMBuilderResult visitDiff(SDBMDiffExpr diffExpr) { - auto lhs = visit(diffExpr.getLHS()); - auto rhs = visit(diffExpr.getRHS()); - assert(lhs.negativePos.size() == 1 && lhs.negativePos[0] == 0 && - "unexpected negative expression in a difference expression"); - assert(rhs.negativePos.size() == 1 && lhs.negativePos[0] == 0 && - "unexpected negative expression in a difference expression"); - - SDBMBuilderResult result; - result.positivePos = lhs.positivePos; - result.negativePos = rhs.positivePos; - result.value = lhs.value - rhs.value; - return result; - } - - // An input expression is always taken with the "+" sign and therefore - // produces a positive coordinate keeping the negative coordinate zero for an - // eventual constant. - SDBMBuilderResult visitInput(SDBMInputExpr expr) { - SDBMBuilderResult r; - r.positivePos.push_back(linearPosition(expr)); - r.negativePos.push_back(0); - return r; - } - - // A stripe expression is always equal to one or more variables, which may be - // temporaries, and appears with a "+" sign in the SDBM expression tree. Take - // the positions of the corresponding variables as positive coordinates. - SDBMBuilderResult visitStripe(SDBMStripeExpr expr) { - SDBMBuilderResult r; - assert(pointExprToStripe.count(expr)); - r.positivePos = pointExprToStripe[expr]; - r.negativePos.push_back(0); - return r; - } - - // A constant expression has both coordinates at zero. - SDBMBuilderResult visitConstant(SDBMConstantExpr expr) { - SDBMBuilderResult r; - r.positivePos.push_back(0); - r.negativePos.push_back(0); - r.value = expr.getValue(); - return r; - } - - // A negation expression swaps the positive and the negative coordinates - // and also negates the constant value. - SDBMBuilderResult visitNeg(SDBMNegExpr expr) { - SDBMBuilderResult result = visit(expr.getVar()); - std::swap(result.positivePos, result.negativePos); - result.value = -result.value; - return result; - } - - // The RHS of a sum expression must be a constant and therefore must have both - // positive and negative coordinates at zero. Take the sum of the values - // between LHS and RHS and keep LHS coordinates. - SDBMBuilderResult visitSum(SDBMSumExpr expr) { - auto lhs = visit(expr.getLHS()); - auto rhs = visit(expr.getRHS()); - for (auto pos : rhs.negativePos) { - (void)pos; - assert(pos == 0 && "unexpected variable on the RHS of SDBM sum"); - } - for (auto pos : rhs.positivePos) { - (void)pos; - assert(pos == 0 && "unexpected variable on the RHS of SDBM sum"); - } - - lhs.value += rhs.value; - return lhs; - } - - SDBMBuilder(DenseMap> &pointExprToStripe, - function_ref callback) - : pointExprToStripe(pointExprToStripe), linearPosition(callback) {} - - DenseMap> &pointExprToStripe; - function_ref linearPosition; -}; -} // namespace - -SDBM SDBM::get(ArrayRef inequalities, ArrayRef equalities) { - SDBM result; - - // TODO: consider detecting equalities in the list of inequalities. - // This is potentially expensive and requires to - // - create a list of negated inequalities (may allocate under lock); - // - perform a pairwise comparison of direct and negated inequalities; - // - copy the lists of equalities and inequalities, and move entries between - // them; - // only for the purpose of sparing a temporary variable in cases where an - // implicit equality between a variable and a stripe expression is present in - // the input. - - // Do the first sweep over (in)equalities to collect the information necessary - // to allocate the SDB matrix (number of dimensions, symbol and temporary - // variables required for stripe expressions). - llvm::SmallSetVector stripes; - int maxDim = -1; - int maxSymbol = -1; - for (auto expr : inequalities) - collectSDBMBuildInfo(expr, maxDim, maxSymbol, stripes); - for (auto expr : equalities) - collectSDBMBuildInfo(expr, maxDim, maxSymbol, stripes); - // Indexing of dimensions starts with 0, obtain the number of dimensions by - // incrementing the maximal position of the dimension seen in expressions. - result.numDims = maxDim + 1; - result.numSymbols = maxSymbol + 1; - result.numTemporaries = 0; - - // Helper function that returns the position of the variable represented by - // an SDBM input expression. - auto linearPosition = [result](SDBMInputExpr expr) { - if (expr.isa()) - return result.getDimPosition(expr.getPosition()); - return result.getSymbolPosition(expr.getPosition()); - }; - - // Check if some stripe expressions are equal to another variable. In - // particular, look for the equalities of the form - // d0 - stripe-expression = 0, or - // stripe-expression - d0 = 0. - // There may be multiple variables that are equal to the same stripe - // expression. Keep track of those in pointExprToStripe. - // There may also be multiple stripe expressions equal to the same variable. - // Introduce a temporary variable for each of those. - DenseMap> pointExprToStripe; - unsigned numTemporaries = 0; - - auto updateStripePointMaps = [&numTemporaries, &result, &pointExprToStripe, - linearPosition](SDBMInputExpr input, - SDBMExpr expr) { - unsigned position = linearPosition(input); - if (result.stripeToPoint.count(position) && - result.stripeToPoint[position] != expr) { - position = result.getNumVariables() + numTemporaries++; - } - pointExprToStripe[expr].push_back(position); - result.stripeToPoint.insert(std::make_pair(position, expr)); - }; - - for (auto eq : equalities) { - auto diffExpr = eq.dyn_cast(); - if (!diffExpr) - continue; - - auto lhs = diffExpr.getLHS(); - auto rhs = diffExpr.getRHS(); - auto lhsInput = lhs.dyn_cast(); - auto rhsInput = rhs.dyn_cast(); - - if (lhsInput && stripes.count(rhs)) - updateStripePointMaps(lhsInput, rhs); - if (rhsInput && stripes.count(lhs)) - updateStripePointMaps(rhsInput, lhs); - } - - // Assign the remaining stripe expressions to temporary variables. These - // expressions are the ones that could not be associated with an existing - // variable in the previous step. - for (auto expr : stripes) { - if (pointExprToStripe.count(expr)) - continue; - unsigned position = result.getNumVariables() + numTemporaries++; - pointExprToStripe[expr].push_back(position); - result.stripeToPoint.insert(std::make_pair(position, expr)); - } - - // Create the DBM matrix, initialized to infinity values for the least tight - // possible bound (x - y <= infinity is always true). - result.numTemporaries = numTemporaries; - result.matrix.resize(result.getNumVariables() * result.getNumVariables(), - IntInfty::infinity()); - - SDBMBuilder builder(pointExprToStripe, linearPosition); - - // Only keep the tightest constraint. Since we transform everything into - // less-than-or-equals-to inequalities, keep the smallest constant. For - // example, if we have d0 - d1 <= 42 and d0 - d1 <= 2, we keep the latter. - // Note that the input expressions are in the shape of d0 - d1 + -42 <= 0 - // so we negate the value before storing it. - // In case where the positive and the negative positions are equal, the - // corresponding expression has the form d0 - d0 + -42 <= 0. If the constant - // value is positive, the set defined by SDBM is trivially empty. We store - // this value anyway and continue processing to maintain the correspondence - // between the matrix form and the list-of-SDBMExpr form. - // TODO: we may want to reconsider this once we have canonicalization - // or simplification in place - auto updateMatrix = [](SDBM &sdbm, const SDBMBuilderResult &r) { - for (auto positivePos : r.positivePos) { - for (auto negativePos : r.negativePos) { - auto &m = sdbm.at(negativePos, positivePos); - m = m < -r.value ? m : -r.value; - } - } - }; - - // Do the second sweep on (in)equalities, updating the SDB matrix to reflect - // the constraints. - for (auto ineq : inequalities) - updateMatrix(result, builder.visit(ineq)); - - // An equality f(x) = 0 is represented as a pair of inequalities {f(x) >= 0; - // f(x) <= 0} or, alternatively, {-f(x) <= 0 and f(x) <= 0}. - for (auto eq : equalities) { - updateMatrix(result, builder.visit(eq)); - updateMatrix(result, builder.visit(-eq)); - } - - // Add the inequalities induced by stripe equalities. - // t = x # C => t <= x <= t + C - 1 - // which is equivalent to - // {t - x <= 0; - // x - t - (C - 1) <= 0}. - for (const auto &pair : result.stripeToPoint) { - auto stripe = pair.second.cast(); - SDBMBuilderResult update = builder.visit(stripe.getLHS()); - assert(update.negativePos.size() == 1 && update.negativePos[0] == 0 && - "unexpected negated variable in stripe expression"); - assert(update.value == 0 && - "unexpected non-zero value in stripe expression"); - update.negativePos.clear(); - update.negativePos.push_back(pair.first); - update.value = -(stripe.getStripeFactor().getValue() - 1); - updateMatrix(result, update); - - std::swap(update.negativePos, update.positivePos); - update.value = 0; - updateMatrix(result, update); - } - - return result; -} - -// Given a row and a column position in the square DBM, insert one equality -// or up to two inequalities that correspond the entries (col, row) and (row, -// col) in the DBM. `rowExpr` and `colExpr` contain the expressions such that -// colExpr - rowExpr <= V where V is the value at (row, col) in the DBM. -// If one of the expressions is derived from another using a stripe operation, -// check if the inequalities induced by the stripe operation subsume the -// inequalities defined in the DBM and if so, elide these inequalities. -void SDBM::convertDBMElement(unsigned row, unsigned col, SDBMTermExpr rowExpr, - SDBMTermExpr colExpr, - SmallVectorImpl &inequalities, - SmallVectorImpl &equalities) { - using ops_assertions::operator+; - using ops_assertions::operator-; - - auto diffIJValue = at(col, row); - auto diffJIValue = at(row, col); - - // If symmetric entries are opposite, the corresponding expressions are equal. - if (diffIJValue.isFinite() && - diffIJValue.getValue() == -diffJIValue.getValue()) { - equalities.push_back(rowExpr - colExpr - diffIJValue.getValue()); - return; - } - - // Given an inequality x0 - x1 <= A, check if x0 is a stripe variable derived - // from x1: x0 = x1 # B. If so, it would imply the constraints - // x0 <= x1 <= x0 + (B - 1) <=> x0 - x1 <= 0 and x1 - x0 <= (B - 1). - // Therefore, if A >= 0, this inequality is subsumed by that implied - // by the stripe equality and thus can be elided. - // Similarly, check if x1 is a stripe variable derived from x0: x1 = x0 # C. - // If so, it would imply the constraints x1 <= x0 <= x1 + (C - 1) <=> - // <=> x1 - x0 <= 0 and x0 - x1 <= (C - 1). Therefore, if A >= (C - 1), this - // inequality can be elided. - // - // Note: x0 and x1 may be a stripe expressions themselves, we rely on stripe - // expressions being stored without temporaries on the RHS and being passed - // into this function as is. - auto canElide = [this](unsigned x0, unsigned x1, SDBMExpr x0Expr, - SDBMExpr x1Expr, int64_t value) { - if (stripeToPoint.count(x0)) { - auto stripe = stripeToPoint[x0].cast(); - SDBMDirectExpr var = stripe.getLHS(); - if (x1Expr == var && value >= 0) - return true; - } - if (stripeToPoint.count(x1)) { - auto stripe = stripeToPoint[x1].cast(); - SDBMDirectExpr var = stripe.getLHS(); - if (x0Expr == var && value >= stripe.getStripeFactor().getValue() - 1) - return true; - } - return false; - }; - - // Check row - col. - if (diffIJValue.isFinite() && - !canElide(row, col, rowExpr, colExpr, diffIJValue.getValue())) { - inequalities.push_back(rowExpr - colExpr - diffIJValue.getValue()); - } - // Check col - row. - if (diffJIValue.isFinite() && - !canElide(col, row, colExpr, rowExpr, diffJIValue.getValue())) { - inequalities.push_back(colExpr - rowExpr - diffJIValue.getValue()); - } -} - -// The values on the main diagonal correspond to the upper bound on the -// difference between a variable and itself: d0 - d0 <= C, or alternatively -// to -C <= 0. Only construct the inequalities when C is negative, which -// are trivially false but necessary for the returned system of inequalities -// to indicate that the set it defines is empty. -void SDBM::convertDBMDiagonalElement(unsigned pos, SDBMTermExpr expr, - SmallVectorImpl &inequalities) { - auto selfDifference = at(pos, pos); - if (selfDifference.isFinite() && selfDifference < 0) { - auto selfDifferenceValueExpr = - SDBMConstantExpr::get(expr.getDialect(), -selfDifference.getValue()); - inequalities.push_back(selfDifferenceValueExpr); - } -} - -void SDBM::getSDBMExpressions(SDBMDialect *dialect, - SmallVectorImpl &inequalities, - SmallVectorImpl &equalities) { - using ops_assertions::operator-; - using ops_assertions::operator+; - - // Helper function that creates an SDBMInputExpr given the linearized position - // of variable in the DBM. - auto getInput = [dialect, this](unsigned matrixPos) -> SDBMInputExpr { - if (matrixPos < numDims) - return SDBMDimExpr::get(dialect, matrixPos); - return SDBMSymbolExpr::get(dialect, matrixPos - numDims); - }; - - // The top-left value corresponds to inequality 0 <= C. If C is negative, the - // set defined by SDBM is trivially empty and we add the constraint -C <= 0 to - // the list of inequalities. Otherwise, the constraint is trivially true and - // we ignore it. - auto difference = at(0, 0); - if (difference.isFinite() && difference < 0) { - inequalities.push_back( - SDBMConstantExpr::get(dialect, -difference.getValue())); - } - - // Traverse the segment of the matrix that involves non-temporary variables. - unsigned numTrueVariables = numDims + numSymbols; - for (unsigned i = 0; i < numTrueVariables; ++i) { - // The first row and column represent numerical upper and lower bound on - // each variable. Transform them into inequalities if they are finite. - auto upperBound = at(0, 1 + i); - auto lowerBound = at(1 + i, 0); - auto inputExpr = getInput(i); - if (upperBound.isFinite() && - upperBound.getValue() == -lowerBound.getValue()) { - equalities.push_back(inputExpr - upperBound.getValue()); - } else if (upperBound.isFinite()) { - inequalities.push_back(inputExpr - upperBound.getValue()); - } else if (lowerBound.isFinite()) { - inequalities.push_back(-inputExpr - lowerBound.getValue()); - } - - // Introduce trivially false inequalities if required by diagonal elements. - convertDBMDiagonalElement(1 + i, inputExpr, inequalities); - - // Introduce equalities or inequalities between non-temporary variables. - for (unsigned j = 0; j < i; ++j) { - convertDBMElement(1 + i, 1 + j, getInput(i), getInput(j), inequalities, - equalities); - } - } - - // Add equalities for stripe expressions that define non-temporary - // variables. Temporary variables will be substituted into their uses and - // should not appear in the resulting equalities. - for (const auto &stripePair : stripeToPoint) { - unsigned position = stripePair.first; - if (position < 1 + numTrueVariables) { - equalities.push_back(getInput(position - 1) - stripePair.second); - } - } - - // Add equalities / inequalities involving temporaries by replacing the - // temporaries with stripe expressions that define them. - for (unsigned i = 1 + numTrueVariables, e = getNumVariables(); i < e; ++i) { - // Mixed constraints involving one temporary (j) and one non-temporary (i) - // variable. - for (unsigned j = 0; j < numTrueVariables; ++j) { - convertDBMElement(i, 1 + j, stripeToPoint[i].cast(), - getInput(j), inequalities, equalities); - } - - // Constraints involving only temporary variables. - for (unsigned j = 1 + numTrueVariables; j < i; ++j) { - convertDBMElement(i, j, stripeToPoint[i].cast(), - stripeToPoint[j].cast(), inequalities, - equalities); - } - - // Introduce trivially false inequalities if required by diagonal elements. - convertDBMDiagonalElement(i, stripeToPoint[i].cast(), - inequalities); - } -} - -void SDBM::print(raw_ostream &os) { - unsigned numVariables = getNumVariables(); - - // Helper function that prints the name of the variable given its linearized - // position in the DBM. - auto getVarName = [this](unsigned matrixPos) -> std::string { - if (matrixPos == 0) - return "cst"; - matrixPos -= 1; - if (matrixPos < numDims) - return std::string(llvm::formatv("d{0}", matrixPos)); - matrixPos -= numDims; - if (matrixPos < numSymbols) - return std::string(llvm::formatv("s{0}", matrixPos)); - matrixPos -= numSymbols; - return std::string(llvm::formatv("t{0}", matrixPos)); - }; - - // Header row. - os << " cst"; - for (unsigned i = 1; i < numVariables; ++i) { - os << llvm::formatv(" {0,4}", getVarName(i)); - } - os << '\n'; - - // Data rows. - for (unsigned i = 0; i < numVariables; ++i) { - os << llvm::formatv("{0,-4}", getVarName(i)); - for (unsigned j = 0; j < numVariables; ++j) { - IntInfty value = operator()(i, j); - if (!value.isFinite()) - os << " inf"; - else - os << llvm::formatv(" {0,4}", value.getValue()); - } - os << '\n'; - } - - // Explanation of temporaries. - for (const auto &pair : stripeToPoint) { - os << getVarName(pair.first) << " = "; - pair.second.print(os); - os << '\n'; - } -} - -void SDBM::dump() { print(llvm::errs()); } diff --git a/mlir/lib/Dialect/SDBM/SDBMDialect.cpp b/mlir/lib/Dialect/SDBM/SDBMDialect.cpp deleted file mode 100644 index 4e3e050b4a4f8..0000000000000 --- a/mlir/lib/Dialect/SDBM/SDBMDialect.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===- SDBMDialect.cpp - MLIR SDBM Dialect --------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/SDBM/SDBMDialect.h" -#include "SDBMExprDetail.h" - -using namespace mlir; - -SDBMDialect::SDBMDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context, TypeID::get()) { - uniquer.registerParametricStorageType(); - uniquer.registerParametricStorageType(); - uniquer.registerParametricStorageType(); - uniquer.registerParametricStorageType(); - uniquer.registerParametricStorageType(); -} - -SDBMDialect::~SDBMDialect() = default; diff --git a/mlir/lib/Dialect/SDBM/SDBMExpr.cpp b/mlir/lib/Dialect/SDBM/SDBMExpr.cpp deleted file mode 100644 index 5adcbcc78d524..0000000000000 --- a/mlir/lib/Dialect/SDBM/SDBMExpr.cpp +++ /dev/null @@ -1,732 +0,0 @@ -//===- SDBMExpr.cpp - MLIR SDBM Expression implementation -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A striped difference-bound matrix (SDBM) expression is a constant expression, -// an identifier, a binary expression with constant RHS and +, stripe operators -// or a difference expression between two identifiers. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/SDBM/SDBMExpr.h" -#include "SDBMExprDetail.h" -#include "mlir/Dialect/SDBM/SDBMDialect.h" -#include "mlir/IR/AffineExpr.h" -#include "mlir/IR/AffineExprVisitor.h" - -#include "llvm/Support/raw_ostream.h" - -using namespace mlir; - -namespace { -/// A simple compositional matcher for AffineExpr -/// -/// Example usage: -/// -/// ```c++ -/// AffineExprMatcher x, C, m; -/// AffineExprMatcher pattern1 = ((x % C) * m) + x; -/// AffineExprMatcher pattern2 = x + ((x % C) * m); -/// if (pattern1.match(expr) || pattern2.match(expr)) { -/// ... -/// } -/// ``` -class AffineExprMatcherStorage; -class AffineExprMatcher { -public: - AffineExprMatcher(); - AffineExprMatcher(const AffineExprMatcher &other); - - AffineExprMatcher operator+(AffineExprMatcher other) { - return AffineExprMatcher(AffineExprKind::Add, *this, other); - } - AffineExprMatcher operator*(AffineExprMatcher other) { - return AffineExprMatcher(AffineExprKind::Mul, *this, other); - } - AffineExprMatcher floorDiv(AffineExprMatcher other) { - return AffineExprMatcher(AffineExprKind::FloorDiv, *this, other); - } - AffineExprMatcher ceilDiv(AffineExprMatcher other) { - return AffineExprMatcher(AffineExprKind::CeilDiv, *this, other); - } - AffineExprMatcher operator%(AffineExprMatcher other) { - return AffineExprMatcher(AffineExprKind::Mod, *this, other); - } - - AffineExpr match(AffineExpr expr); - AffineExpr matched(); - Optional getMatchedConstantValue(); - -private: - AffineExprMatcher(AffineExprKind k, AffineExprMatcher a, AffineExprMatcher b); - AffineExprKind kind; // only used to match in binary op cases. - // A shared_ptr allows multiple references to same matcher storage without - // worrying about ownership or dealing with an arena. To be cleaned up if we - // go with this. - std::shared_ptr storage; -}; - -class AffineExprMatcherStorage { -public: - AffineExprMatcherStorage() {} - AffineExprMatcherStorage(const AffineExprMatcherStorage &other) - : subExprs(other.subExprs.begin(), other.subExprs.end()), - matched(other.matched) {} - AffineExprMatcherStorage(ArrayRef exprs) - : subExprs(exprs.begin(), exprs.end()) {} - AffineExprMatcherStorage(AffineExprMatcher &a, AffineExprMatcher &b) - : subExprs({a, b}) {} - SmallVector subExprs; - AffineExpr matched; -}; -} // namespace - -AffineExprMatcher::AffineExprMatcher() - : kind(AffineExprKind::Constant), storage(new AffineExprMatcherStorage()) {} - -AffineExprMatcher::AffineExprMatcher(const AffineExprMatcher &other) - : kind(other.kind), storage(other.storage) {} - -Optional AffineExprMatcher::getMatchedConstantValue() { - if (auto cst = storage->matched.dyn_cast()) - return cst.getValue(); - return None; -} - -AffineExpr AffineExprMatcher::match(AffineExpr expr) { - if (kind > AffineExprKind::LAST_AFFINE_BINARY_OP) { - if (storage->matched) - if (storage->matched != expr) - return AffineExpr(); - storage->matched = expr; - return storage->matched; - } - if (kind != expr.getKind()) { - return AffineExpr(); - } - if (auto bin = expr.dyn_cast()) { - if (!storage->subExprs.empty() && - !storage->subExprs[0].match(bin.getLHS())) { - return AffineExpr(); - } - if (!storage->subExprs.empty() && - !storage->subExprs[1].match(bin.getRHS())) { - return AffineExpr(); - } - if (storage->matched) - if (storage->matched != expr) - return AffineExpr(); - storage->matched = expr; - return storage->matched; - } - llvm_unreachable("binary expected"); -} - -AffineExpr AffineExprMatcher::matched() { return storage->matched; } - -AffineExprMatcher::AffineExprMatcher(AffineExprKind k, AffineExprMatcher a, - AffineExprMatcher b) - : kind(k), storage(new AffineExprMatcherStorage(a, b)) { - storage->subExprs.push_back(a); - storage->subExprs.push_back(b); -} - -//===----------------------------------------------------------------------===// -// SDBMExpr -//===----------------------------------------------------------------------===// - -SDBMExprKind SDBMExpr::getKind() const { return impl->getKind(); } - -MLIRContext *SDBMExpr::getContext() const { - return impl->dialect->getContext(); -} - -SDBMDialect *SDBMExpr::getDialect() const { return impl->dialect; } - -void SDBMExpr::print(raw_ostream &os) const { - struct Printer : public SDBMVisitor { - Printer(raw_ostream &ostream) : prn(ostream) {} - - void visitSum(SDBMSumExpr expr) { - visit(expr.getLHS()); - prn << " + "; - visit(expr.getRHS()); - } - void visitDiff(SDBMDiffExpr expr) { - visit(expr.getLHS()); - prn << " - "; - visit(expr.getRHS()); - } - void visitDim(SDBMDimExpr expr) { prn << 'd' << expr.getPosition(); } - void visitSymbol(SDBMSymbolExpr expr) { prn << 's' << expr.getPosition(); } - void visitStripe(SDBMStripeExpr expr) { - SDBMDirectExpr lhs = expr.getLHS(); - bool isTerm = lhs.isa(); - if (!isTerm) - prn << '('; - visit(lhs); - if (!isTerm) - prn << ')'; - prn << " # "; - visitConstant(expr.getStripeFactor()); - } - void visitNeg(SDBMNegExpr expr) { - bool isSum = expr.getVar().isa(); - prn << '-'; - if (isSum) - prn << '('; - visit(expr.getVar()); - if (isSum) - prn << ')'; - } - void visitConstant(SDBMConstantExpr expr) { prn << expr.getValue(); } - - raw_ostream &prn; - }; - Printer printer(os); - printer.visit(*this); -} - -void SDBMExpr::dump() const { - print(llvm::errs()); - llvm::errs() << '\n'; -} - -namespace { -// Helper class to perform negation of an SDBM expression. -struct SDBMNegator : public SDBMVisitor { - // Any term expression is wrapped into a negation expression. - // -(x) = -x - SDBMExpr visitDirect(SDBMDirectExpr expr) { return SDBMNegExpr::get(expr); } - // A negation expression is unwrapped. - // -(-x) = x - SDBMExpr visitNeg(SDBMNegExpr expr) { return expr.getVar(); } - // The value of the constant is negated. - SDBMExpr visitConstant(SDBMConstantExpr expr) { - return SDBMConstantExpr::get(expr.getDialect(), -expr.getValue()); - } - - // Terms of a difference are interchanged. Since only the LHS of a diff - // expression is allowed to be a sum with a constant, we need to recreate the - // sum with the negated value: - // -((x + C) - y) = (y - C) - x. - SDBMExpr visitDiff(SDBMDiffExpr expr) { - // If the LHS is just a term, we can do straightforward interchange. - if (auto term = expr.getLHS().dyn_cast()) - return SDBMDiffExpr::get(expr.getRHS(), term); - - auto sum = expr.getLHS().cast(); - auto cst = visitConstant(sum.getRHS()).cast(); - return SDBMDiffExpr::get(SDBMSumExpr::get(expr.getRHS(), cst), - sum.getLHS()); - } -}; -} // namespace - -SDBMExpr SDBMExpr::operator-() { return SDBMNegator().visit(*this); } - -//===----------------------------------------------------------------------===// -// SDBMSumExpr -//===----------------------------------------------------------------------===// - -SDBMSumExpr SDBMSumExpr::get(SDBMTermExpr lhs, SDBMConstantExpr rhs) { - assert(lhs && "expected SDBM variable expression"); - assert(rhs && "expected SDBM constant"); - - // If LHS of a sum is another sum, fold the constant RHS parts. - if (auto lhsSum = lhs.dyn_cast()) { - lhs = lhsSum.getLHS(); - rhs = SDBMConstantExpr::get(rhs.getDialect(), - rhs.getValue() + lhsSum.getRHS().getValue()); - } - - StorageUniquer &uniquer = lhs.getDialect()->getUniquer(); - return uniquer.get( - /*initFn=*/{}, static_cast(SDBMExprKind::Add), lhs, rhs); -} - -SDBMTermExpr SDBMSumExpr::getLHS() const { - return static_cast(impl)->lhs.cast(); -} - -SDBMConstantExpr SDBMSumExpr::getRHS() const { - return static_cast(impl)->rhs; -} - -AffineExpr SDBMExpr::getAsAffineExpr() const { - struct Converter : public SDBMVisitor { - AffineExpr visitSum(SDBMSumExpr expr) { - AffineExpr lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS()); - return lhs + rhs; - } - - AffineExpr visitStripe(SDBMStripeExpr expr) { - AffineExpr lhs = visit(expr.getLHS()), - rhs = visit(expr.getStripeFactor()); - return lhs - (lhs % rhs); - } - - AffineExpr visitDiff(SDBMDiffExpr expr) { - AffineExpr lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS()); - return lhs - rhs; - } - - AffineExpr visitDim(SDBMDimExpr expr) { - return getAffineDimExpr(expr.getPosition(), expr.getContext()); - } - - AffineExpr visitSymbol(SDBMSymbolExpr expr) { - return getAffineSymbolExpr(expr.getPosition(), expr.getContext()); - } - - AffineExpr visitNeg(SDBMNegExpr expr) { - return getAffineBinaryOpExpr(AffineExprKind::Mul, - getAffineConstantExpr(-1, expr.getContext()), - visit(expr.getVar())); - } - - AffineExpr visitConstant(SDBMConstantExpr expr) { - return getAffineConstantExpr(expr.getValue(), expr.getContext()); - } - } converter; - return converter.visit(*this); -} - -// Given a direct expression `expr`, add the given constant to it and pass the -// resulting expression to `builder` before returning its result. If the -// expression is already a sum expression, update its constant and extract the -// LHS if the constant becomes zero. Otherwise, construct a sum expression. -template -static Result addConstantAndSink(SDBMDirectExpr expr, int64_t constant, - bool negated, - function_ref builder) { - SDBMDialect *dialect = expr.getDialect(); - if (auto sumExpr = expr.dyn_cast()) { - if (negated) - constant = sumExpr.getRHS().getValue() - constant; - else - constant += sumExpr.getRHS().getValue(); - - if (constant != 0) { - auto sum = SDBMSumExpr::get(sumExpr.getLHS(), - SDBMConstantExpr::get(dialect, constant)); - return builder(sum); - } else { - return builder(sumExpr.getLHS()); - } - } - if (constant != 0) - return builder(SDBMSumExpr::get( - expr.cast(), - SDBMConstantExpr::get(dialect, negated ? -constant : constant))); - return expr; -} - -// Construct an expression lhs + constant while maintaining the canonical form -// of the SDBM expressions, in particular sink the constant expression to the -// nearest sum expression in the left subtree of the expression tree. -static SDBMExpr addConstant(SDBMVaryingExpr lhs, int64_t constant) { - if (auto lhsDiff = lhs.dyn_cast()) - return addConstantAndSink( - lhsDiff.getLHS(), constant, /*negated=*/false, - [lhsDiff](SDBMDirectExpr e) { - return SDBMDiffExpr::get(e, lhsDiff.getRHS()); - }); - if (auto lhsNeg = lhs.dyn_cast()) - return addConstantAndSink( - lhsNeg.getVar(), constant, /*negated=*/true, - [](SDBMDirectExpr e) { return SDBMNegExpr::get(e); }); - if (auto lhsSum = lhs.dyn_cast()) - return addConstantAndSink(lhsSum, constant, /*negated=*/false, - [](SDBMDirectExpr e) { return e; }); - if (constant != 0) - return SDBMSumExpr::get(lhs.cast(), - SDBMConstantExpr::get(lhs.getDialect(), constant)); - return lhs; -} - -// Build a difference expression given a direct expression and a negation -// expression. -static SDBMExpr buildDiffExpr(SDBMDirectExpr lhs, SDBMNegExpr rhs) { - // Fold (x + C) - (x + D) = C - D. - if (lhs.getTerm() == rhs.getVar().getTerm()) - return SDBMConstantExpr::get( - lhs.getDialect(), lhs.getConstant() - rhs.getVar().getConstant()); - - return SDBMDiffExpr::get( - addConstantAndSink(lhs, -rhs.getVar().getConstant(), - /*negated=*/false, - [](SDBMDirectExpr e) { return e; }), - rhs.getVar().getTerm()); -} - -// Try folding an expression (lhs + rhs) where at least one of the operands -// contains a negated variable, i.e. is a negation or a difference expression. -static SDBMExpr foldSumDiff(SDBMExpr lhs, SDBMExpr rhs) { - // If exactly one of LHS, RHS is a negation expression, we can construct - // a difference expression, which is a special kind in SDBM. - auto lhsDirect = lhs.dyn_cast(); - auto rhsDirect = rhs.dyn_cast(); - auto lhsNeg = lhs.dyn_cast(); - auto rhsNeg = rhs.dyn_cast(); - - if (lhsDirect && rhsNeg) - return buildDiffExpr(lhsDirect, rhsNeg); - if (lhsNeg && rhsDirect) - return buildDiffExpr(rhsDirect, lhsNeg); - - // If a subexpression appears in a diff expression on the LHS(RHS) of a - // sum expression where it also appears on the RHS(LHS) with the opposite - // sign, we can simplify it away and obtain the SDBM form. - auto lhsDiff = lhs.dyn_cast(); - auto rhsDiff = rhs.dyn_cast(); - - // -(x + A) + ((x + B) - y) = -(y + (A - B)) - if (lhsNeg && rhsDiff && - lhsNeg.getVar().getTerm() == rhsDiff.getLHS().getTerm()) { - int64_t constant = - lhsNeg.getVar().getConstant() - rhsDiff.getLHS().getConstant(); - // RHS of the diff is a term expression, its sum with a constant is a direct - // expression. - return SDBMNegExpr::get( - addConstant(rhsDiff.getRHS(), constant).cast()); - } - - // (x + A) + ((y + B) - x) = (y + B) + A. - if (lhsDirect && rhsDiff && lhsDirect.getTerm() == rhsDiff.getRHS()) - return addConstant(rhsDiff.getLHS(), lhsDirect.getConstant()); - - // ((x + A) - y) + (-(x + B)) = -(y + (B - A)). - if (lhsDiff && rhsNeg && - lhsDiff.getLHS().getTerm() == rhsNeg.getVar().getTerm()) { - int64_t constant = - rhsNeg.getVar().getConstant() - lhsDiff.getLHS().getConstant(); - // RHS of the diff is a term expression, its sum with a constant is a direct - // expression. - return SDBMNegExpr::get( - addConstant(lhsDiff.getRHS(), constant).cast()); - } - - // ((x + A) - y) + (y + B) = (x + A) + B. - if (rhsDirect && lhsDiff && rhsDirect.getTerm() == lhsDiff.getRHS()) - return addConstant(lhsDiff.getLHS(), rhsDirect.getConstant()); - - return {}; -} - -Optional SDBMExpr::tryConvertAffineExpr(AffineExpr affine) { - struct Converter : public AffineExprVisitor { - SDBMExpr visitAddExpr(AffineBinaryOpExpr expr) { - auto lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS()); - if (!lhs || !rhs) - return {}; - - // In a "add" AffineExpr, the constant always appears on the right. If - // there were two constants, they would have been folded away. - assert(!lhs.isa() && "non-canonical affine expression"); - - // If RHS is a constant, we can always extend the SDBM expression to - // include it by sinking the constant into the nearest sum expression. - if (auto rhsConstant = rhs.dyn_cast()) { - int64_t constant = rhsConstant.getValue(); - auto varying = lhs.dyn_cast(); - assert(varying && "unexpected uncanonicalized sum of constants"); - return addConstant(varying, constant); - } - - // Try building a difference expression if one of the values is negated, - // or check if a difference on either hand side cancels out the outer term - // so as to remain correct within SDBM. Return null otherwise. - return foldSumDiff(lhs, rhs); - } - - SDBMExpr visitMulExpr(AffineBinaryOpExpr expr) { - // Attempt to recover a stripe expression "x # C = (x floordiv C) * C". - AffineExprMatcher x, C; - AffineExprMatcher pattern = (x.floorDiv(C)) * C; - if (pattern.match(expr)) { - if (SDBMExpr converted = visit(x.matched())) { - if (auto varConverted = converted.dyn_cast()) - // TODO: return varConverted.stripe(C.getConstantValue()); - return SDBMStripeExpr::get( - varConverted, - SDBMConstantExpr::get(dialect, - C.getMatchedConstantValue().getValue())); - } - } - - auto lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS()); - if (!lhs || !rhs) - return {}; - - // In a "mul" AffineExpr, the constant always appears on the right. If - // there were two constants, they would have been folded away. - assert(!lhs.isa() && "non-canonical affine expression"); - auto rhsConstant = rhs.dyn_cast(); - if (!rhsConstant) - return {}; - - // The only supported "multiplication" expression is an SDBM is dimension - // negation, that is a product of dimension and constant -1. - if (rhsConstant.getValue() != -1) - return {}; - - if (auto lhsVar = lhs.dyn_cast()) - return SDBMNegExpr::get(lhsVar); - if (auto lhsDiff = lhs.dyn_cast()) - return SDBMNegator().visitDiff(lhsDiff); - - // Other multiplications are not allowed in SDBM. - return {}; - } - - SDBMExpr visitModExpr(AffineBinaryOpExpr expr) { - auto lhs = visit(expr.getLHS()), rhs = visit(expr.getRHS()); - if (!lhs || !rhs) - return {}; - - // 'mod' can only be converted to SDBM if its LHS is a direct expression - // and its RHS is a constant. Then it `x mod c = x - x stripe c`. - auto rhsConstant = rhs.dyn_cast(); - auto lhsVar = lhs.dyn_cast(); - if (!lhsVar || !rhsConstant) - return {}; - return SDBMDiffExpr::get(lhsVar, - SDBMStripeExpr::get(lhsVar, rhsConstant)); - } - - // `a floordiv b = (a stripe b) / b`, but we have no division in SDBM - SDBMExpr visitFloorDivExpr(AffineBinaryOpExpr expr) { return {}; } - SDBMExpr visitCeilDivExpr(AffineBinaryOpExpr expr) { return {}; } - - // Dimensions, symbols and constants are converted trivially. - SDBMExpr visitConstantExpr(AffineConstantExpr expr) { - return SDBMConstantExpr::get(dialect, expr.getValue()); - } - SDBMExpr visitDimExpr(AffineDimExpr expr) { - return SDBMDimExpr::get(dialect, expr.getPosition()); - } - SDBMExpr visitSymbolExpr(AffineSymbolExpr expr) { - return SDBMSymbolExpr::get(dialect, expr.getPosition()); - } - - SDBMDialect *dialect; - } converter; - converter.dialect = affine.getContext()->getOrLoadDialect(); - - if (auto result = converter.visit(affine)) - return result; - return None; -} - -//===----------------------------------------------------------------------===// -// SDBMDiffExpr -//===----------------------------------------------------------------------===// - -SDBMDiffExpr SDBMDiffExpr::get(SDBMDirectExpr lhs, SDBMTermExpr rhs) { - assert(lhs && "expected SDBM dimension"); - assert(rhs && "expected SDBM dimension"); - - StorageUniquer &uniquer = lhs.getDialect()->getUniquer(); - return uniquer.get(/*initFn=*/{}, lhs, rhs); -} - -SDBMDirectExpr SDBMDiffExpr::getLHS() const { - return static_cast(impl)->lhs; -} - -SDBMTermExpr SDBMDiffExpr::getRHS() const { - return static_cast(impl)->rhs; -} - -//===----------------------------------------------------------------------===// -// SDBMDirectExpr -//===----------------------------------------------------------------------===// - -SDBMTermExpr SDBMDirectExpr::getTerm() { - if (auto sum = dyn_cast()) - return sum.getLHS(); - return cast(); -} - -int64_t SDBMDirectExpr::getConstant() { - if (auto sum = dyn_cast()) - return sum.getRHS().getValue(); - return 0; -} - -//===----------------------------------------------------------------------===// -// SDBMStripeExpr -//===----------------------------------------------------------------------===// - -SDBMStripeExpr SDBMStripeExpr::get(SDBMDirectExpr var, - SDBMConstantExpr stripeFactor) { - assert(var && "expected SDBM variable expression"); - assert(stripeFactor && "expected non-null stripe factor"); - if (stripeFactor.getValue() <= 0) - llvm::report_fatal_error("non-positive stripe factor"); - - StorageUniquer &uniquer = var.getDialect()->getUniquer(); - return uniquer.get( - /*initFn=*/{}, static_cast(SDBMExprKind::Stripe), var, - stripeFactor); -} - -SDBMDirectExpr SDBMStripeExpr::getLHS() const { - if (SDBMVaryingExpr lhs = static_cast(impl)->lhs) - return lhs.cast(); - return {}; -} - -SDBMConstantExpr SDBMStripeExpr::getStripeFactor() const { - return static_cast(impl)->rhs; -} - -//===----------------------------------------------------------------------===// -// SDBMInputExpr -//===----------------------------------------------------------------------===// - -unsigned SDBMInputExpr::getPosition() const { - return static_cast(impl)->position; -} - -//===----------------------------------------------------------------------===// -// SDBMDimExpr -//===----------------------------------------------------------------------===// - -SDBMDimExpr SDBMDimExpr::get(SDBMDialect *dialect, unsigned position) { - assert(dialect && "expected non-null dialect"); - - auto assignDialect = [dialect](detail::SDBMTermExprStorage *storage) { - storage->dialect = dialect; - }; - - StorageUniquer &uniquer = dialect->getUniquer(); - return uniquer.get( - assignDialect, static_cast(SDBMExprKind::DimId), position); -} - -//===----------------------------------------------------------------------===// -// SDBMSymbolExpr -//===----------------------------------------------------------------------===// - -SDBMSymbolExpr SDBMSymbolExpr::get(SDBMDialect *dialect, unsigned position) { - assert(dialect && "expected non-null dialect"); - - auto assignDialect = [dialect](detail::SDBMTermExprStorage *storage) { - storage->dialect = dialect; - }; - - StorageUniquer &uniquer = dialect->getUniquer(); - return uniquer.get( - assignDialect, static_cast(SDBMExprKind::SymbolId), position); -} - -//===----------------------------------------------------------------------===// -// SDBMConstantExpr -//===----------------------------------------------------------------------===// - -SDBMConstantExpr SDBMConstantExpr::get(SDBMDialect *dialect, int64_t value) { - assert(dialect && "expected non-null dialect"); - - auto assignCtx = [dialect](detail::SDBMConstantExprStorage *storage) { - storage->dialect = dialect; - }; - - StorageUniquer &uniquer = dialect->getUniquer(); - return uniquer.get(assignCtx, value); -} - -int64_t SDBMConstantExpr::getValue() const { - return static_cast(impl)->constant; -} - -//===----------------------------------------------------------------------===// -// SDBMNegExpr -//===----------------------------------------------------------------------===// - -SDBMNegExpr SDBMNegExpr::get(SDBMDirectExpr var) { - assert(var && "expected non-null SDBM direct expression"); - - StorageUniquer &uniquer = var.getDialect()->getUniquer(); - return uniquer.get(/*initFn=*/{}, var); -} - -SDBMDirectExpr SDBMNegExpr::getVar() const { - return static_cast(impl)->expr; -} - -SDBMExpr mlir::ops_assertions::operator+(SDBMExpr lhs, SDBMExpr rhs) { - if (auto folded = foldSumDiff(lhs, rhs)) - return folded; - assert(!(lhs.isa() && rhs.isa()) && - "a sum of negated expressions is a negation of a sum of variables and " - "not a correct SDBM"); - - // Fold (x - y) + (y - x) = 0. - auto lhsDiff = lhs.dyn_cast(); - auto rhsDiff = rhs.dyn_cast(); - if (lhsDiff && rhsDiff) { - if (lhsDiff.getLHS() == rhsDiff.getRHS() && - lhsDiff.getRHS() == rhsDiff.getLHS()) - return SDBMConstantExpr::get(lhs.getDialect(), 0); - } - - // If LHS is a constant and RHS is not, swap the order to get into a supported - // sum case. From now on, RHS must be a constant. - auto lhsConstant = lhs.dyn_cast(); - auto rhsConstant = rhs.dyn_cast(); - if (!rhsConstant && lhsConstant) { - std::swap(lhs, rhs); - std::swap(lhsConstant, rhsConstant); - } - assert(rhsConstant && "at least one operand must be a constant"); - - // Constant-fold if LHS is also a constant. - if (lhsConstant) - return SDBMConstantExpr::get(lhs.getDialect(), lhsConstant.getValue() + - rhsConstant.getValue()); - return addConstant(lhs.cast(), rhsConstant.getValue()); -} - -SDBMExpr mlir::ops_assertions::operator-(SDBMExpr lhs, SDBMExpr rhs) { - // Fold x - x == 0. - if (lhs == rhs) - return SDBMConstantExpr::get(lhs.getDialect(), 0); - - // LHS and RHS may be constants. - auto lhsConstant = lhs.dyn_cast(); - auto rhsConstant = rhs.dyn_cast(); - - // Constant fold if both LHS and RHS are constants. - if (lhsConstant && rhsConstant) - return SDBMConstantExpr::get(lhs.getDialect(), lhsConstant.getValue() - - rhsConstant.getValue()); - - // Replace a difference with a sum with a negated value if one of LHS and RHS - // is a constant: - // x - C == x + (-C); - // C - x == -x + C. - // This calls into operator+ for further simplification. - if (rhsConstant) - return lhs + (-rhsConstant); - if (lhsConstant) - return -rhs + lhsConstant; - - return buildDiffExpr(lhs.cast(), (-rhs).cast()); -} - -SDBMExpr mlir::ops_assertions::stripe(SDBMExpr expr, SDBMExpr factor) { - auto constantFactor = factor.cast(); - assert(constantFactor.getValue() > 0 && "non-positive stripe"); - - // Fold x # 1 = x. - if (constantFactor.getValue() == 1) - return expr; - - return SDBMStripeExpr::get(expr.cast(), constantFactor); -} diff --git a/mlir/lib/Dialect/SDBM/SDBMExprDetail.h b/mlir/lib/Dialect/SDBM/SDBMExprDetail.h deleted file mode 100644 index 8d91334c807e0..0000000000000 --- a/mlir/lib/Dialect/SDBM/SDBMExprDetail.h +++ /dev/null @@ -1,137 +0,0 @@ -//===- SDBMExprDetail.h - MLIR SDBM Expression storage details --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This holds implementation details of SDBMExpr, in particular underlying -// storage types. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_IR_SDBMEXPRDETAIL_H -#define MLIR_IR_SDBMEXPRDETAIL_H - -#include "mlir/Dialect/SDBM/SDBMExpr.h" -#include "mlir/Support/StorageUniquer.h" - -namespace mlir { - -class SDBMDialect; - -namespace detail { - -// Base storage class for SDBMExpr. -struct SDBMExprStorage : public StorageUniquer::BaseStorage { - SDBMExprKind getKind() { return kind; } - - SDBMDialect *dialect; - SDBMExprKind kind; -}; - -// Storage class for SDBM sum and stripe expressions. -struct SDBMBinaryExprStorage : public SDBMExprStorage { - using KeyTy = std::tuple; - - bool operator==(const KeyTy &key) const { - return static_cast(std::get<0>(key)) == kind && - std::get<1>(key) == lhs && std::get<2>(key) == rhs; - } - - static SDBMBinaryExprStorage * - construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) { - auto *result = allocator.allocate(); - result->lhs = std::get<1>(key); - result->rhs = std::get<2>(key); - result->dialect = result->lhs.getDialect(); - result->kind = static_cast(std::get<0>(key)); - return result; - } - - SDBMDirectExpr lhs; - SDBMConstantExpr rhs; -}; - -// Storage class for SDBM difference expressions. -struct SDBMDiffExprStorage : public SDBMExprStorage { - using KeyTy = std::pair; - - bool operator==(const KeyTy &key) const { - return std::get<0>(key) == lhs && std::get<1>(key) == rhs; - } - - static SDBMDiffExprStorage * - construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) { - auto *result = allocator.allocate(); - result->lhs = std::get<0>(key); - result->rhs = std::get<1>(key); - result->dialect = result->lhs.getDialect(); - result->kind = SDBMExprKind::Diff; - return result; - } - - SDBMDirectExpr lhs; - SDBMTermExpr rhs; -}; - -// Storage class for SDBM constant expressions. -struct SDBMConstantExprStorage : public SDBMExprStorage { - using KeyTy = int64_t; - - bool operator==(const KeyTy &key) const { return constant == key; } - - static SDBMConstantExprStorage * - construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) { - auto *result = allocator.allocate(); - result->constant = key; - result->kind = SDBMExprKind::Constant; - return result; - } - - int64_t constant; -}; - -// Storage class for SDBM dimension and symbol expressions. -struct SDBMTermExprStorage : public SDBMExprStorage { - using KeyTy = std::pair; - - bool operator==(const KeyTy &key) const { - return kind == static_cast(key.first) && - position == key.second; - } - - static SDBMTermExprStorage * - construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) { - auto *result = allocator.allocate(); - result->kind = static_cast(key.first); - result->position = key.second; - return result; - } - - unsigned position; -}; - -// Storage class for SDBM negation expressions. -struct SDBMNegExprStorage : public SDBMExprStorage { - using KeyTy = SDBMDirectExpr; - - bool operator==(const KeyTy &key) const { return key == expr; } - - static SDBMNegExprStorage * - construct(StorageUniquer::StorageAllocator &allocator, const KeyTy &key) { - auto *result = allocator.allocate(); - result->expr = key; - result->dialect = key.getDialect(); - result->kind = SDBMExprKind::Neg; - return result; - } - - SDBMDirectExpr expr; -}; - -} // end namespace detail -} // end namespace mlir - -#endif // MLIR_IR_SDBMEXPRDETAIL_H diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index 5ce620c6c2b6e..416cfee7efade 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -1,5 +1,4 @@ add_subdirectory(CAPI) -add_subdirectory(SDBM) add_subdirectory(lib) if(MLIR_ENABLE_BINDINGS_PYTHON) @@ -75,7 +74,6 @@ set(MLIR_TEST_DEPENDS mlir-lsp-server mlir-opt mlir-reduce - mlir-sdbm-api-test mlir-tblgen mlir-translate mlir_runner_utils diff --git a/mlir/test/SDBM/CMakeLists.txt b/mlir/test/SDBM/CMakeLists.txt deleted file mode 100644 index 633fae707c855..0000000000000 --- a/mlir/test/SDBM/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Core - Support - ) - -add_llvm_executable(mlir-sdbm-api-test - sdbm-api-test.cpp -) - -llvm_update_compile_flags(mlir-sdbm-api-test) - -target_link_libraries(mlir-sdbm-api-test - PRIVATE - MLIRIR - MLIRSDBM - MLIRSupport -) - -target_include_directories(mlir-sdbm-api-test PRIVATE ..) diff --git a/mlir/test/SDBM/lit.local.cfg b/mlir/test/SDBM/lit.local.cfg deleted file mode 100644 index 81261555b4246..0000000000000 --- a/mlir/test/SDBM/lit.local.cfg +++ /dev/null @@ -1 +0,0 @@ -config.suffixes.add('.cpp') diff --git a/mlir/test/SDBM/sdbm-api-test.cpp b/mlir/test/SDBM/sdbm-api-test.cpp deleted file mode 100644 index 027c584c74097..0000000000000 --- a/mlir/test/SDBM/sdbm-api-test.cpp +++ /dev/null @@ -1,201 +0,0 @@ -//===- sdbm-api-test.cpp - Tests for SDBM expression APIs -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// RUN: mlir-sdbm-api-test | FileCheck %s - -#include "mlir/Dialect/SDBM/SDBM.h" -#include "mlir/Dialect/SDBM/SDBMDialect.h" -#include "mlir/Dialect/SDBM/SDBMExpr.h" -#include "mlir/IR/MLIRContext.h" - -#include "llvm/Support/raw_ostream.h" - -#include "APITest.h" - -using namespace mlir; - - -static MLIRContext *ctx() { - static thread_local MLIRContext context; - static thread_local bool once = - (context.getOrLoadDialect(), true); - (void)once; - return &context; -} - -static SDBMDialect *dialect() { - static thread_local SDBMDialect *d = nullptr; - if (!d) { - d = ctx()->getOrLoadDialect(); - } - return d; -} - -static SDBMExpr dim(unsigned pos) { return SDBMDimExpr::get(dialect(), pos); } - -static SDBMExpr symb(unsigned pos) { - return SDBMSymbolExpr::get(dialect(), pos); -} - -namespace { - -using namespace mlir::ops_assertions; - -TEST_FUNC(SDBM_SingleConstraint) { - // Build an SDBM defined by - // d0 - 3 <= 0 <=> d0 <= 3. - auto sdbm = SDBM::get(dim(0) - 3, llvm::None); - - // CHECK: cst d0 - // CHECK-NEXT: cst inf 3 - // CHECK-NEXT: d0 inf inf - sdbm.print(llvm::outs()); -} - -TEST_FUNC(SDBM_Equality) { - // Build an SDBM defined by - // - // d0 - d1 - 3 = 0 - // <=> {d0 - d1 - 3 <= 0 and d0 - d1 - 3 >= 0} - // <=> {d0 - d1 <= 3 and d1 - d0 <= -3}. - auto sdbm = SDBM::get(llvm::None, dim(0) - dim(1) - 3); - - // CHECK: cst d0 d1 - // CHECK-NEXT: cst inf inf inf - // CHECK-NEXT: d0 inf inf -3 - // CHECK-NEXT: d1 inf 3 inf - sdbm.print(llvm::outs()); -} - -TEST_FUNC(SDBM_TrivialSimplification) { - // Build an SDBM defined by - // - // d0 - 3 <= 0 <=> d0 <= 3 - // d0 - 5 <= 0 <=> d0 <= 5 - // - // which should get simplified on construction to only the former. - auto sdbm = SDBM::get({dim(0) - 3, dim(0) - 5}, llvm::None); - - // CHECK: cst d0 - // CHECK-NEXT: cst inf 3 - // CHECK-NEXT: d0 inf inf - sdbm.print(llvm::outs()); -} - -TEST_FUNC(SDBM_StripeInducedIneqs) { - // Build an SDBM defined by d1 = d0 # 3, which induces the constraints - // - // d1 - d0 <= 0 - // d0 - d1 <= 3 - 1 = 2 - auto sdbm = SDBM::get(llvm::None, dim(1) - stripe(dim(0), 3)); - - // CHECK: cst d0 d1 - // CHECK-NEXT: cst inf inf inf - // CHECK-NEXT: d0 inf inf 0 - // CHECK-NEXT: d1 inf 2 0 - // CHECK-NEXT: d1 = d0 # 3 - sdbm.print(llvm::outs()); -} - -TEST_FUNC(SDBM_StripeTemporaries) { - // Build an SDBM defined by d0 # 3 <= 0, which creates a temporary - // t0 = d0 # 3 leading to a constraint t0 <= 0 and the stripe-induced - // constraints - // - // t0 - d0 <= 0 - // d0 - t0 <= 3 - 1 = 2 - auto sdbm = SDBM::get(stripe(dim(0), 3), llvm::None); - - // CHECK: cst d0 t0 - // CHECK-NEXT: cst inf inf 0 - // CHECK-NEXT: d0 inf inf 0 - // CHECK-NEXT: t0 inf 2 inf - // CHECK-NEXT: t0 = d0 # 3 - sdbm.print(llvm::outs()); -} - -TEST_FUNC(SDBM_ElideInducedInequalities) { - // Build an SDBM defined by a single stripe equality d0 = s0 # 3 and make sure - // the induced inequalities are not present after converting the SDBM back - // into lists of expressions. - auto sdbm = SDBM::get(llvm::None, {dim(0) - stripe(symb(0), 3)}); - - SmallVector eqs, ineqs; - sdbm.getSDBMExpressions(dialect(), ineqs, eqs); - // CHECK-EMPTY: - for (auto ineq : ineqs) - ineq.print(llvm::outs() << '\n'); - llvm::outs() << "\n"; - - // CHECK: d0 - s0 # 3 - // CHECK-EMPTY: - for (auto eq : eqs) - eq.print(llvm::outs() << '\n'); - llvm::outs() << "\n\n"; -} - -TEST_FUNC(SDBM_StripeTightening) { - // Build an SDBM defined by - // - // d0 = s0 # 3 # 5 - // s0 # 3 # 5 - d1 + 42 = 0 - // s0 # 3 - d0 <= 2 - // - // where the last inequality is tighter than that induced by the first stripe - // equality (s0 # 3 - d0 <= 5 - 1 = 4). Check that the conversion from SDBM - // back to the lists of constraints conserves both the stripe equality and the - // tighter inequality. - auto s = stripe(stripe(symb(0), 3), 5); - auto tight = stripe(symb(0), 3) - dim(0) - 2; - auto sdbm = SDBM::get({tight}, {s - dim(0), s - dim(1) + 42}); - - SmallVector eqs, ineqs; - sdbm.getSDBMExpressions(dialect(), ineqs, eqs); - // CHECK: s0 # 3 + -2 - d0 - // CHECK-EMPTY: - for (auto ineq : ineqs) - ineq.print(llvm::outs() << '\n'); - llvm::outs() << "\n"; - - // CHECK-DAG: d1 + -42 - d0 - // CHECK-DAG: d0 - s0 # 3 # 5 - for (auto eq : eqs) - eq.print(llvm::outs() << '\n'); - llvm::outs() << "\n\n"; -} - -TEST_FUNC(SDBM_StripeTransitive) { - // Build an SDBM defined by - // - // d0 = d1 # 3 - // d0 = d2 # 7 - // - // where the same dimension is declared equal to two stripe expressions over - // different variables. This is practically handled by introducing a - // temporary variable for the second stripe expression and adding an equality - // constraint between this variable and the original dimension variable. - auto sdbm = SDBM::get( - llvm::None, {stripe(dim(1), 3) - dim(0), stripe(dim(2), 7) - dim(0)}); - - // CHECK: cst d0 d1 d2 t0 - // CHECK-NEXT: cst inf inf inf inf inf - // CHECK-NEXT: d0 inf 0 2 inf 0 - // CHECK-NEXT: d1 inf 0 inf inf inf - // CHECK-NEXT: d2 inf inf inf inf 0 - // CHECK-NEXT: t0 inf 0 inf 6 inf - // CHECK-NEXT: t0 = d2 # 7 - // CHECK-NEXT: d0 = d1 # 3 - sdbm.print(llvm::outs()); -} - -} // end namespace - -int main() { - RUN_TESTS(); - return 0; -} diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index 83048dd99603a..dd38b8fec864f 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -65,7 +65,6 @@ 'mlir-linalg-ods-gen', 'mlir-linalg-ods-yaml-gen', 'mlir-reduce', - 'mlir-sdbm-api-test', ] # The following tools are optional diff --git a/mlir/test/mlir-opt/commandline.mlir b/mlir/test/mlir-opt/commandline.mlir index 95c476a841633..e42118d86b5dc 100644 --- a/mlir/test/mlir-opt/commandline.mlir +++ b/mlir/test/mlir-opt/commandline.mlir @@ -21,7 +21,6 @@ // CHECK-NEXT: quant // CHECK-NEXT: rocdl // CHECK-NEXT: scf -// CHECK-NEXT: sdbm // CHECK-NEXT: shape // CHECK-NEXT: sparse_tensor // CHECK-NEXT: spv diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index a8e9212ee2559..45558b6d3dcee 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -11,5 +11,4 @@ add_subdirectory(Interfaces) add_subdirectory(IR) add_subdirectory(Pass) add_subdirectory(Rewrite) -add_subdirectory(SDBM) add_subdirectory(TableGen) diff --git a/mlir/unittests/SDBM/CMakeLists.txt b/mlir/unittests/SDBM/CMakeLists.txt deleted file mode 100644 index d86f9dda38025..0000000000000 --- a/mlir/unittests/SDBM/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_mlir_unittest(MLIRSDBMTests - SDBMTest.cpp -) -target_link_libraries(MLIRSDBMTests - PRIVATE - MLIRSDBM -) diff --git a/mlir/unittests/SDBM/SDBMTest.cpp b/mlir/unittests/SDBM/SDBMTest.cpp deleted file mode 100644 index c907aed6258a4..0000000000000 --- a/mlir/unittests/SDBM/SDBMTest.cpp +++ /dev/null @@ -1,449 +0,0 @@ -//===- SDBMTest.cpp - SDBM expression unit tests --------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/SDBM/SDBM.h" -#include "mlir/Dialect/SDBM/SDBMDialect.h" -#include "mlir/Dialect/SDBM/SDBMExpr.h" -#include "mlir/IR/AffineExpr.h" -#include "mlir/IR/MLIRContext.h" -#include "gtest/gtest.h" - -#include "llvm/ADT/DenseSet.h" - -using namespace mlir; - - -static MLIRContext *ctx() { - static thread_local MLIRContext context; - context.getOrLoadDialect(); - return &context; -} - -static SDBMDialect *dialect() { - static thread_local SDBMDialect *d = nullptr; - if (!d) { - d = ctx()->getOrLoadDialect(); - } - return d; -} - -static SDBMExpr dim(unsigned pos) { return SDBMDimExpr::get(dialect(), pos); } - -static SDBMExpr symb(unsigned pos) { - return SDBMSymbolExpr::get(dialect(), pos); -} - -namespace { - -using namespace mlir::ops_assertions; - -TEST(SDBMOperators, Add) { - auto expr = dim(0) + 42; - auto sumExpr = expr.dyn_cast(); - ASSERT_TRUE(sumExpr); - EXPECT_EQ(sumExpr.getLHS(), dim(0)); - EXPECT_EQ(sumExpr.getRHS().getValue(), 42); -} - -TEST(SDBMOperators, AddFolding) { - auto constant = SDBMConstantExpr::get(dialect(), 2) + 42; - auto constantExpr = constant.dyn_cast(); - ASSERT_TRUE(constantExpr); - EXPECT_EQ(constantExpr.getValue(), 44); - - auto expr = (dim(0) + 10) + 32; - auto sumExpr = expr.dyn_cast(); - ASSERT_TRUE(sumExpr); - EXPECT_EQ(sumExpr.getRHS().getValue(), 42); - - expr = dim(0) + SDBMNegExpr::get(SDBMDimExpr::get(dialect(), 1)); - auto diffExpr = expr.dyn_cast(); - ASSERT_TRUE(diffExpr); - EXPECT_EQ(diffExpr.getLHS(), dim(0)); - EXPECT_EQ(diffExpr.getRHS(), dim(1)); - - auto inverted = SDBMNegExpr::get(SDBMDimExpr::get(dialect(), 1)) + dim(0); - EXPECT_EQ(inverted, expr); - - // Check that opposite values cancel each other, and that we elide the zero - // constant. - expr = dim(0) + 42; - auto onlyDim = expr - 42; - EXPECT_EQ(onlyDim, dim(0)); - - // Check that we can sink a constant under a negation. - expr = -(dim(0) + 2); - auto negatedSum = (expr + 10).dyn_cast(); - ASSERT_TRUE(negatedSum); - auto sum = negatedSum.getVar().dyn_cast(); - ASSERT_TRUE(sum); - EXPECT_EQ(sum.getRHS().getValue(), -8); - - // Sum with zero is the same as the original expression. - EXPECT_EQ(dim(0) + 0, dim(0)); - - // Sum of opposite differences is zero. - auto diffOfDiffs = - ((dim(0) - dim(1)) + (dim(1) - dim(0))).dyn_cast(); - EXPECT_EQ(diffOfDiffs.getValue(), 0); -} - -TEST(SDBMOperators, AddNegativeTerms) { - const int64_t A = 7; - const int64_t B = -5; - auto x = SDBMDimExpr::get(dialect(), 0); - auto y = SDBMDimExpr::get(dialect(), 1); - - // Check the simplification patterns in addition where one of the variables is - // cancelled out and the result remains an SDBM. - EXPECT_EQ(-(x + A) + ((x + B) - y), -(y + (A - B))); - EXPECT_EQ((x + A) + ((y + B) - x), (y + B) + A); - EXPECT_EQ(((x + A) - y) + (-(x + B)), -(y + (B - A))); - EXPECT_EQ(((x + A) - y) + (y + B), (x + A) + B); -} - -TEST(SDBMOperators, Diff) { - auto expr = dim(0) - dim(1); - auto diffExpr = expr.dyn_cast(); - ASSERT_TRUE(diffExpr); - EXPECT_EQ(diffExpr.getLHS(), dim(0)); - EXPECT_EQ(diffExpr.getRHS(), dim(1)); -} - -TEST(SDBMOperators, DiffFolding) { - auto constant = SDBMConstantExpr::get(dialect(), 10) - 3; - auto constantExpr = constant.dyn_cast(); - ASSERT_TRUE(constantExpr); - EXPECT_EQ(constantExpr.getValue(), 7); - - auto expr = dim(0) - 3; - auto sumExpr = expr.dyn_cast(); - ASSERT_TRUE(sumExpr); - EXPECT_EQ(sumExpr.getRHS().getValue(), -3); - - auto zero = dim(0) - dim(0); - constantExpr = zero.dyn_cast(); - ASSERT_TRUE(constantExpr); - EXPECT_EQ(constantExpr.getValue(), 0); - - // Check that the constant terms in difference-of-sums are folded. - // (d0 - 3) - (d1 - 5) = (d0 + 2) - d1 - auto diffOfSums = ((dim(0) - 3) - (dim(1) - 5)).dyn_cast(); - ASSERT_TRUE(diffOfSums); - auto lhs = diffOfSums.getLHS().dyn_cast(); - ASSERT_TRUE(lhs); - EXPECT_EQ(lhs.getLHS(), dim(0)); - EXPECT_EQ(lhs.getRHS().getValue(), 2); - EXPECT_EQ(diffOfSums.getRHS(), dim(1)); - - // Check that identical dimensions with opposite signs cancel each other. - auto cstOnly = ((dim(0) + 42) - dim(0)).dyn_cast(); - ASSERT_TRUE(cstOnly); - EXPECT_EQ(cstOnly.getValue(), 42); - - // Check that identical terms in sum of diffs cancel out. - auto dimOnly = (-dim(0) + (dim(0) - dim(1))); - EXPECT_EQ(dimOnly, -dim(1)); - dimOnly = (dim(0) - dim(1)) + (-dim(0)); - EXPECT_EQ(dimOnly, -dim(1)); - dimOnly = (dim(0) - dim(1)) + dim(1); - EXPECT_EQ(dimOnly, dim(0)); - dimOnly = dim(0) + (dim(1) - dim(0)); - EXPECT_EQ(dimOnly, dim(1)); - - // Top-level zero constant is fine. - cstOnly = (-symb(1) + symb(1)).dyn_cast(); - ASSERT_TRUE(cstOnly); - EXPECT_EQ(cstOnly.getValue(), 0); -} - -TEST(SDBMOperators, Negate) { - auto sum = dim(0) + 3; - auto negated = (-sum).dyn_cast(); - ASSERT_TRUE(negated); - EXPECT_EQ(negated.getVar(), sum); -} - -TEST(SDBMOperators, Stripe) { - auto expr = stripe(dim(0), 3); - auto stripeExpr = expr.dyn_cast(); - ASSERT_TRUE(stripeExpr); - EXPECT_EQ(stripeExpr.getLHS(), dim(0)); - EXPECT_EQ(stripeExpr.getStripeFactor().getValue(), 3); -} - -TEST(SDBM, RoundTripEqs) { - // Build an SDBM defined by - // - // d0 = s0 # 3 # 5 - // s0 # 3 # 5 - d1 + 42 = 0 - // - // and perform a double round-trip between the "list of equalities" and SDBM - // representation. After the first round-trip, the equalities may be - // different due to simplification or equivalent substitutions (e.g., the - // second equality may become d0 - d1 + 42 = 0). However, there should not - // be any further simplification after the second round-trip, - - // Build the SDBM from a pair of equalities and extract back the lists of - // inequalities and equalities. Check that all equalities are properly - // detected and none of them decayed into inequalities. - auto s = stripe(stripe(symb(0), 3), 5); - auto sdbm = SDBM::get(llvm::None, {s - dim(0), s - dim(1) + 42}); - SmallVector eqs, ineqs; - sdbm.getSDBMExpressions(dialect(), ineqs, eqs); - ASSERT_TRUE(ineqs.empty()); - - // Do the second round-trip. - auto sdbm2 = SDBM::get(llvm::None, eqs); - SmallVector eqs2, ineqs2; - sdbm2.getSDBMExpressions(dialect(), ineqs2, eqs2); - ASSERT_EQ(eqs.size(), eqs2.size()); - - // Check that the sets of equalities are equal, their order is not relevant. - llvm::DenseSet eqSet, eq2Set; - eqSet.insert(eqs.begin(), eqs.end()); - eq2Set.insert(eqs2.begin(), eqs2.end()); - EXPECT_EQ(eqSet, eq2Set); -} - -TEST(SDBMExpr, Constant) { - // We can create constants and query them. - auto expr = SDBMConstantExpr::get(dialect(), 42); - EXPECT_EQ(expr.getValue(), 42); - - // Two separately created constants with identical values are trivially equal. - auto expr2 = SDBMConstantExpr::get(dialect(), 42); - EXPECT_EQ(expr, expr2); - - // Hierarchy is okay. - auto generic = static_cast(expr); - EXPECT_TRUE(generic.isa()); -} - -TEST(SDBMExpr, Dim) { - // We can create dimension expressions and query them. - auto expr = SDBMDimExpr::get(dialect(), 0); - EXPECT_EQ(expr.getPosition(), 0u); - - // Two separately created dimensions with the same position are trivially - // equal. - auto expr2 = SDBMDimExpr::get(dialect(), 0); - EXPECT_EQ(expr, expr2); - - // Hierarchy is okay. - auto generic = static_cast(expr); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - - // Dimensions are not Symbols. - auto symbol = SDBMSymbolExpr::get(dialect(), 0); - EXPECT_NE(expr, symbol); - EXPECT_FALSE(expr.isa()); -} - -TEST(SDBMExpr, Symbol) { - // We can create symbol expressions and query them. - auto expr = SDBMSymbolExpr::get(dialect(), 0); - EXPECT_EQ(expr.getPosition(), 0u); - - // Two separately created symbols with the same position are trivially equal. - auto expr2 = SDBMSymbolExpr::get(dialect(), 0); - EXPECT_EQ(expr, expr2); - - // Hierarchy is okay. - auto generic = static_cast(expr); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - - // Dimensions are not Symbols. - auto symbol = SDBMDimExpr::get(dialect(), 0); - EXPECT_NE(expr, symbol); - EXPECT_FALSE(expr.isa()); -} - -TEST(SDBMExpr, Stripe) { - auto cst2 = SDBMConstantExpr::get(dialect(), 2); - auto cst0 = SDBMConstantExpr::get(dialect(), 0); - auto var = SDBMSymbolExpr::get(dialect(), 0); - - // We can create stripe expressions and query them. - auto expr = SDBMStripeExpr::get(var, cst2); - EXPECT_EQ(expr.getLHS(), var); - EXPECT_EQ(expr.getStripeFactor(), cst2); - - // Two separately created stripe expressions with the same LHS and RHS are - // trivially equal. - auto expr2 = SDBMStripeExpr::get(SDBMSymbolExpr::get(dialect(), 0), cst2); - EXPECT_EQ(expr, expr2); - - // Stripes can be nested. - SDBMStripeExpr::get(expr, SDBMConstantExpr::get(dialect(), 4)); - - // Non-positive stripe factors are not allowed. - EXPECT_DEATH(SDBMStripeExpr::get(var, cst0), "non-positive"); - - // Stripes can have sums on the LHS. - SDBMStripeExpr::get(SDBMSumExpr::get(var, cst2), cst2); - - // Hierarchy is okay. - auto generic = static_cast(expr); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); -} - -TEST(SDBMExpr, Neg) { - auto cst2 = SDBMConstantExpr::get(dialect(), 2); - auto var = SDBMSymbolExpr::get(dialect(), 0); - auto stripe = SDBMStripeExpr::get(var, cst2); - - // We can create negation expressions and query them. - auto expr = SDBMNegExpr::get(var); - EXPECT_EQ(expr.getVar(), var); - auto expr2 = SDBMNegExpr::get(stripe); - EXPECT_EQ(expr2.getVar(), stripe); - - // Neg expressions are trivially comparable. - EXPECT_EQ(expr, SDBMNegExpr::get(var)); - - // Hierarchy is okay. - auto generic = static_cast(expr); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); -} - -TEST(SDBMExpr, Sum) { - auto cst2 = SDBMConstantExpr::get(dialect(), 2); - auto var = SDBMSymbolExpr::get(dialect(), 0); - auto stripe = SDBMStripeExpr::get(var, cst2); - - // We can create sum expressions and query them. - auto expr = SDBMSumExpr::get(var, cst2); - EXPECT_EQ(expr.getLHS(), var); - EXPECT_EQ(expr.getRHS(), cst2); - auto expr2 = SDBMSumExpr::get(stripe, cst2); - EXPECT_EQ(expr2.getLHS(), stripe); - EXPECT_EQ(expr2.getRHS(), cst2); - - // Sum expressions are trivially comparable. - EXPECT_EQ(expr, SDBMSumExpr::get(var, cst2)); - - // Hierarchy is okay. - auto generic = static_cast(expr); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); -} - -TEST(SDBMExpr, Diff) { - auto cst2 = SDBMConstantExpr::get(dialect(), 2); - auto var = SDBMSymbolExpr::get(dialect(), 0); - auto stripe = SDBMStripeExpr::get(var, cst2); - - // We can create sum expressions and query them. - auto expr = SDBMDiffExpr::get(var, stripe); - EXPECT_EQ(expr.getLHS(), var); - EXPECT_EQ(expr.getRHS(), stripe); - auto expr2 = SDBMDiffExpr::get(stripe, var); - EXPECT_EQ(expr2.getLHS(), stripe); - EXPECT_EQ(expr2.getRHS(), var); - - // Sum expressions are trivially comparable. - EXPECT_EQ(expr, SDBMDiffExpr::get(var, stripe)); - - // Hierarchy is okay. - auto generic = static_cast(expr); - EXPECT_TRUE(generic.isa()); - EXPECT_TRUE(generic.isa()); -} - -TEST(SDBMExpr, AffineRoundTrip) { - // Build an expression (s0 - s0 # 2) - auto cst2 = SDBMConstantExpr::get(dialect(), 2); - auto var = SDBMSymbolExpr::get(dialect(), 0); - auto stripe = SDBMStripeExpr::get(var, cst2); - auto expr = SDBMDiffExpr::get(var, stripe); - - // Check that it can be converted to AffineExpr and back, i.e. stripe - // detection works correctly. - Optional roundtripped = - SDBMExpr::tryConvertAffineExpr(expr.getAsAffineExpr()); - ASSERT_TRUE(roundtripped.hasValue()); - EXPECT_EQ(roundtripped, static_cast(expr)); - - // Check that (s0 # 2 # 5) can be converted to AffineExpr, i.e. stripe - // detection supports nested expressions. - auto cst5 = SDBMConstantExpr::get(dialect(), 5); - auto outerStripe = SDBMStripeExpr::get(stripe, cst5); - roundtripped = SDBMExpr::tryConvertAffineExpr(outerStripe.getAsAffineExpr()); - ASSERT_TRUE(roundtripped.hasValue()); - EXPECT_EQ(roundtripped, static_cast(outerStripe)); - - // Check that ((s0 + 2) # 5) can be round-tripped through AffineExpr, i.e. - // stripe detection supports sum expressions. - auto inner = SDBMSumExpr::get(var, cst2); - auto stripeSum = SDBMStripeExpr::get(inner, cst5); - roundtripped = SDBMExpr::tryConvertAffineExpr(stripeSum.getAsAffineExpr()); - ASSERT_TRUE(roundtripped.hasValue()); - EXPECT_EQ(roundtripped, static_cast(stripeSum)); - - // Check that (s0 # 2 # 5 - s0 # 2) + 2 can be converted as an example of a - // deeper expression tree. - auto sum = SDBMSumExpr::get(outerStripe, cst2); - auto diff = SDBMDiffExpr::get(sum, stripe); - roundtripped = SDBMExpr::tryConvertAffineExpr(diff.getAsAffineExpr()); - ASSERT_TRUE(roundtripped.hasValue()); - EXPECT_EQ(roundtripped, static_cast(diff)); - - // Check a nested stripe-sum combination. - auto cst7 = SDBMConstantExpr::get(dialect(), 7); - auto nestedStripe = - SDBMStripeExpr::get(SDBMSumExpr::get(stripeSum, cst2), cst7); - diff = SDBMDiffExpr::get(nestedStripe, stripe); - roundtripped = SDBMExpr::tryConvertAffineExpr(diff.getAsAffineExpr()); - ASSERT_TRUE(roundtripped.hasValue()); - EXPECT_EQ(roundtripped, static_cast(diff)); -} - -TEST(SDBMExpr, MatchStripeMulPattern) { - // Make sure conversion from AffineExpr recognizes multiplicative stripe - // pattern (x floordiv B) * B == x # B. - auto cst = getAffineConstantExpr(42, ctx()); - auto dim = getAffineDimExpr(0, ctx()); - auto floor = dim.floorDiv(cst); - auto mul = cst * floor; - Optional converted = SDBMStripeExpr::tryConvertAffineExpr(mul); - ASSERT_TRUE(converted.hasValue()); - EXPECT_TRUE(converted->isa()); -} - -TEST(SDBMExpr, NonSDBM) { - auto d0 = getAffineDimExpr(0, ctx()); - auto d1 = getAffineDimExpr(1, ctx()); - auto sum = d0 + d1; - auto c2 = getAffineConstantExpr(2, ctx()); - auto prod = d0 * c2; - auto ceildiv = d1.ceilDiv(c2); - - // The following are not valid SDBM expressions: - // - a sum of two variables - EXPECT_FALSE(SDBMExpr::tryConvertAffineExpr(sum).hasValue()); - // - a variable with coefficient other than 1 or -1 - EXPECT_FALSE(SDBMExpr::tryConvertAffineExpr(prod).hasValue()); - // - a ceildiv expression - EXPECT_FALSE(SDBMExpr::tryConvertAffineExpr(ceildiv).hasValue()); -} - -} // end namespace From 8ca04b05133b9fc1b891585e9dfd6e30790998ba Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Tue, 29 Jun 2021 14:57:16 +0200 Subject: [PATCH 166/619] [mlir] Add support for LLVM's dso_local attr This patch brings support for setting runtime preemption specifiers of LLVM's GlobalValues. In LLVM semantics, if the `dso_local` attribute is not explicitly requested, then it is inferred based on linkage and visibility. We model this same behavior with a UnitAttribute: if it is present, then we explicitly request the GlobalValue to marked as `dso_local`, otherwise we rely on the GlobalValue itself to make this decision. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D104983 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 4 ++++ mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp | 2 +- .../Conversion/StandardToLLVM/StandardToLLVM.cpp | 6 +++--- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 8 ++++++-- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 11 +++++++++++ mlir/test/Target/LLVMIR/llvmir.mlir | 16 ++++++++++++++++ 6 files changed, 41 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index f714126ce9209..ed5f74c421614 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -907,6 +907,7 @@ def LLVM_GlobalOp : LLVM_Op<"mlir.global", UnitAttr:$constant, StrAttr:$sym_name, Linkage:$linkage, + UnitAttr:$dso_local, OptionalAttr:$value, OptionalAttr:$alignment, DefaultValuedAttr, "0">:$addr_space, @@ -1017,6 +1018,7 @@ def LLVM_GlobalOp : LLVM_Op<"mlir.global", "StringRef":$name, "Attribute":$value, CArg<"uint64_t", "0">:$alignment, CArg<"unsigned", "0">:$addrSpace, + CArg<"bool", "false">:$dsoLocal, CArg<"ArrayRef", "{}">:$attrs)> ]; @@ -1081,6 +1083,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", }]; let arguments = (ins DefaultValuedAttr:$linkage, + UnitAttr:$dso_local, OptionalAttr:$personality, OptionalAttr:$passthrough); @@ -1091,6 +1094,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", let builders = [ OpBuilder<(ins "StringRef":$name, "Type":$type, CArg<"Linkage", "Linkage::External">:$linkage, + CArg<"bool", "false">:$dsoLocal, CArg<"ArrayRef", "{}">:$attrs, CArg<"ArrayRef", "{}">:$argAttrs)> ]; diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 912f58d5cbff2..98aa0e0aea1e0 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -70,7 +70,7 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr()); auto llvmFuncOp = rewriter.create( gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType, - LLVM::Linkage::External, attributes); + LLVM::Linkage::External, /*dsoLocal*/ false, attributes); { // Insert operations that correspond to converted workgroup and private diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index eb390bf8844fa..f09f2a062a7fa 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1266,7 +1266,7 @@ static void wrapForExternalCallers(OpBuilder &rewriter, Location loc, typeConverter.convertFunctionTypeCWrapper(type); auto wrapperFuncOp = rewriter.create( loc, llvm::formatv("_mlir_ciface_{0}", funcOp.getName()).str(), - wrapperFuncType, LLVM::Linkage::External, attributes); + wrapperFuncType, LLVM::Linkage::External, /*dsoLocal*/ false, attributes); OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(wrapperFuncOp.addEntryBlock()); @@ -1330,7 +1330,7 @@ static void wrapExternalFunction(OpBuilder &builder, Location loc, // Create the auxiliary function. auto wrapperFunc = builder.create( loc, llvm::formatv("_mlir_ciface_{0}", funcOp.getName()).str(), - wrapperType, LLVM::Linkage::External, attributes); + wrapperType, LLVM::Linkage::External, /*dsoLocal*/ false, attributes); builder.setInsertionPointToStart(newFuncOp.addEntryBlock()); @@ -1441,7 +1441,7 @@ struct FuncOpConversionBase : public ConvertOpToLLVMPattern { // functions have linkage. auto newFuncOp = rewriter.create( funcOp.getLoc(), funcOp.getName(), llvmType, LLVM::Linkage::External, - attributes); + /*dsoLocal*/ false, attributes); rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(), newFuncOp.end()); if (failed(rewriter.convertRegionTypes(&newFuncOp.getBody(), *typeConverter, diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 95f3460960649..8b9a8fa74b8c7 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -1267,7 +1267,7 @@ static StringRef getUnnamedAddrAttrName() { return "unnamed_addr"; } void GlobalOp::build(OpBuilder &builder, OperationState &result, Type type, bool isConstant, Linkage linkage, StringRef name, Attribute value, uint64_t alignment, unsigned addrSpace, - ArrayRef attrs) { + bool dsoLocal, ArrayRef attrs) { result.addAttribute(SymbolTable::getSymbolAttrName(), builder.getStringAttr(name)); result.addAttribute("type", TypeAttr::get(type)); @@ -1275,6 +1275,8 @@ void GlobalOp::build(OpBuilder &builder, OperationState &result, Type type, result.addAttribute("constant", builder.getUnitAttr()); if (value) result.addAttribute("value", value); + if (dsoLocal) + result.addAttribute("dso_local", builder.getUnitAttr()); // Only add an alignment attribute if the "alignment" input // is different from 0. The value must also be a power of two, but @@ -1756,7 +1758,7 @@ Block *LLVMFuncOp::addEntryBlock() { void LLVMFuncOp::build(OpBuilder &builder, OperationState &result, StringRef name, Type type, LLVM::Linkage linkage, - ArrayRef attrs, + bool dsoLocal, ArrayRef attrs, ArrayRef argAttrs) { result.addRegion(); result.addAttribute(SymbolTable::getSymbolAttrName(), @@ -1765,6 +1767,8 @@ void LLVMFuncOp::build(OpBuilder &builder, OperationState &result, result.addAttribute(getLinkageAttrName(), builder.getI64IntegerAttr(static_cast(linkage))); result.attributes.append(attrs.begin(), attrs.end()); + if (dsoLocal) + result.addAttribute("dso_local", builder.getUnitAttr()); if (argAttrs.empty()) return; diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 0d1ae3fd3c362..bbff0b0956c19 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -425,6 +425,14 @@ static bool shouldDropGlobalInitializer(llvm::GlobalValue::LinkageTypes linkage, linkage == llvm::GlobalVariable::ExternalWeakLinkage; } +/// Sets the runtime preemption specifier of `gv` to dso_local if +/// `dsoLocalRequested` is true, otherwise it is left unchanged. +static void addRuntimePreemptionSpecifier(bool dsoLocalRequested, + llvm::GlobalValue *gv) { + if (dsoLocalRequested) + gv->setDSOLocal(true); +} + /// Create named global variables that correspond to llvm.mlir.global /// definitions. LogicalResult ModuleTranslation::convertGlobals() { @@ -458,6 +466,8 @@ LogicalResult ModuleTranslation::convertGlobals() { if (op.section().hasValue()) var->setSection(*op.section()); + addRuntimePreemptionSpecifier(op.dso_local(), var); + Optional alignment = op.alignment(); if (alignment.hasValue()) var->setAlignment(llvm::MaybeAlign(alignment.getValue())); @@ -687,6 +697,7 @@ LogicalResult ModuleTranslation::convertFunctionSignatures() { llvm::Function *llvmFunc = cast(llvmFuncCst.getCallee()); llvmFunc->setLinkage(convertLinkageToLLVM(function.linkage())); mapFunction(function.getName(), llvmFunc); + addRuntimePreemptionSpecifier(function.dso_local(), llvmFunc); // Forward the pass-through attributes to LLVM. if (failed(forwardPassthroughAttributes(function.getLoc(), diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index cb5358fde9cc5..551fa7e76a93c 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -79,6 +79,13 @@ llvm.mlir.global private local_unnamed_addr constant @local_unnamed_addr(42 : i6 // CHECK: @unnamed_addr = private unnamed_addr constant i64 42 llvm.mlir.global private unnamed_addr constant @unnamed_addr(42 : i64) : i64 +// +// dso_local attribute. +// + +llvm.mlir.global @has_dso_local(42 : i64) {dso_local} : i64 +// CHECK: @has_dso_local = dso_local global i64 42 + // // Section attribute. // @@ -428,6 +435,15 @@ llvm.func internal @func_internal() { llvm.return } +// +// dso_local attribute. +// + +// CHECK: define dso_local void @dso_local_func +llvm.func @dso_local_func() attributes {dso_local} { + llvm.return +} + // // MemRef type conversion, allocation and communication with functions. // From 8a3365fba2e9f4489780c9c9a6c356748b72e8c1 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 29 Jun 2021 13:47:00 +0100 Subject: [PATCH 167/619] Revert "[NFC] Remove shadowed variable in InnerLoopVectorizer::createInductionVariable" This reverts commit dcfc2c3fac980b137415c17f2f19c06c3e2bd7fb. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index bb0cb5e6ba6d8..f99352a3f075f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3071,9 +3071,7 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, if (!Latch) Latch = Header; - IRBuilder<>::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(&*Header->getFirstInsertionPt()); - + IRBuilder<> Builder(&*Header->getFirstInsertionPt()); Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction); setDebugLocFromInst(Builder, OldInst); auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index"); From 1092357ccdc9b12e1b129bf34819d16dbe14532f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 29 Jun 2021 14:11:46 +0100 Subject: [PATCH 168/619] [SCCP] Add tests with urem/srem with 2 constant operands. Reduced test case for PR49731. --- .../SCCP/binaryops-range-special-cases.ll | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll b/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll index f7fdc1ed5e64b..59c1aad6d1f79 100644 --- a/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll +++ b/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll @@ -95,3 +95,59 @@ bb3: call void @use(i1 %c.1) ret void } + +define void @urem_cmp_constants() { +; CHECK-LABEL: @urem_cmp_constants( +; CHECK-NEXT: [[UREM_1:%.*]] = urem i16 12704, 12704 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[UREM_1]], 0 +; CHECK-NEXT: call void @use(i1 [[C_1]]) +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[UREM_1]], 1 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: [[UREM_2:%.*]] = urem i16 12704, 3 +; CHECK-NEXT: [[C_3:%.*]] = icmp eq i16 [[UREM_2]], 2 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: [[C_4:%.*]] = icmp eq i16 [[UREM_2]], 1 +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: ret void +; + %sel = select i1 false, i16 0, i16 12704 + %urem.1 = urem i16 %sel, 12704 + %c.1 = icmp eq i16 %urem.1, 0 + call void @use(i1 %c.1) + %c.2 = icmp eq i16 %urem.1, 1 + call void @use(i1 %c.2) + %urem.2 = urem i16 %sel, 3 + %c.3 = icmp eq i16 %urem.2, 2 + call void @use(i1 %c.3) + %c.4 = icmp eq i16 %urem.2, 1 + call void @use(i1 %c.4) + ret void +} + +define void @srem_cmp_constants() { +; CHECK-LABEL: @srem_cmp_constants( +; CHECK-NEXT: [[SREM_1:%.*]] = srem i16 12704, 12704 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[SREM_1]], 0 +; CHECK-NEXT: call void @use(i1 [[C_1]]) +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[SREM_1]], 1 +; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: [[SREM_2:%.*]] = srem i16 12704, 3 +; CHECK-NEXT: [[C_3:%.*]] = icmp eq i16 [[SREM_2]], 2 +; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: [[C_4:%.*]] = icmp eq i16 [[SREM_2]], 1 +; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: ret void +; + %sel = select i1 false, i16 0, i16 12704 + %srem.1 = srem i16 %sel, 12704 + %c.1 = icmp eq i16 %srem.1, 0 + call void @use(i1 %c.1) + %c.2 = icmp eq i16 %srem.1, 1 + call void @use(i1 %c.2) + %srem.2 = srem i16 %sel, 3 + %c.3 = icmp eq i16 %srem.2, 2 + call void @use(i1 %c.3) + %c.4 = icmp eq i16 %srem.2, 1 + call void @use(i1 %c.4) + ret void +} From c82957e79236f9f5ef2598ab86138d43fd987932 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 29 Jun 2021 14:19:01 +0100 Subject: [PATCH 169/619] ARM: fix vacuously true assertion to actually check what it should. NFC. --- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 42a99d7b48434..a2ad626313294 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1011,7 +1011,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, } if (ReservedArgStack || IncomingArgStackToRestore) { - assert(ReservedArgStack + IncomingArgStackToRestore >= 0 && + assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 && "attempting to restore negative stack amount"); emitSPUpdate(isARM, MBB, MBBI, dl, TII, ReservedArgStack + IncomingArgStackToRestore, From aed0a08c69cfb274a1e005e3bd9865ac15c2dfb7 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 29 Jun 2021 09:21:33 -0400 Subject: [PATCH 170/619] [lld/mac] Make symbol table order deterministic SymtabSection::emitStabs() writes the symbol table in the order of externalSymbols, which has the order of symtab->getSymbols(), which is just the order symbols are added to the symbol table. In practice, symbols in the symbol files of input .o files are sorted, but since that's not guaranteed we sort them in ObjFile::parseSymbols(). To make sure several symbols with the same address keep the order they're in the input file, we have to use stable_sort(). In practice, std::sort() on already-sorted inputs won't change the order of just adjacent elements, and while in theory std::sort() could use a random pivot, in practice the code should be deterministic as it was previously too. But now lld/test/MachO/stabs.s passes with LLVM_ENABLE_EXPENSIVE_CHECKS=ON (the last test that was failing with that set). Fixes a regression from D99972. While here, remove an empty section in stabs.s and move .subsections_via_symbols to the end where it usually is (this part no behavior change). Differential Revision: https://reviews.llvm.org/D105071 --- lld/MachO/InputFiles.cpp | 2 +- lld/test/MachO/stabs.s | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 4025a4c55ab1d..f75c65f9370d8 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -601,7 +601,7 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, continue; std::vector &symbolIndices = symbolsBySection[i]; - llvm::sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { + llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { return nList[lhs].n_value < nList[rhs].n_value; }); uint64_t sectionAddr = sectionHeaders[i].addr; diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s index f6c529774de64..bafd6e2c6a9ca 100644 --- a/lld/test/MachO/stabs.s +++ b/lld/test/MachO/stabs.s @@ -191,14 +191,14 @@ Ldebug_info_start0: .long Lset3 .byte 0 ## End Of Children Mark Ldebug_info_end0: -.subsections_via_symbols -.section __DWARF,__debug_line,regular,debug .section OTHER,more_text,regular,pure_instructions .globl _fun _fun: ret +.subsections_via_symbols + #--- foo.s .text .globl _foo @@ -240,13 +240,13 @@ Ldebug_info_start0: .long Lset3 .byte 0 ## End Of Children Mark Ldebug_info_end0: -.subsections_via_symbols -.section __DWARF,__debug_line,regular,debug .section __DWARF,__debug_aranges,regular,debug ltmp1: .byte 0 +.subsections_via_symbols + #--- no-debug.s ## This file has no debug info. .text From b661d9f9c35e5d5689ec20b825de203c08404c9a Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 24 Jun 2021 13:58:57 +0000 Subject: [PATCH 171/619] [mlir][Linalg] NFC - Drop AliasInfo::existsNonDominatingRead The case where a non-dominating read can be found is captured by slightly generalizing `AliasInfo::wouldCreaateReadAfterWriteInterference`. This simplification will make it easier to implement bufferization across function call. APIs are also simplified were possible. Differential revision: https://reviews.llvm.org/D104845 --- .../Transforms/ComprehensiveBufferize.cpp | 195 +++++++++--------- 1 file changed, 94 insertions(+), 101 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index 287d2d47ca7fe..3875965e9b92c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -391,6 +391,35 @@ static OpResult getInplaceableOpResult(OpOperand &opOperand) { // clang-format on } +/// Determine which OpOperand* will alias with `result` if the op is bufferized +/// in place. +/// Return None if the owner of `opOperand` does not have known +/// bufferization aliasing behavior, which indicates that the op must allocate +/// all of its tensor results. +/// TODO: in the future this may need to evolve towards a list of OpOperand*. +static Optional getAliasingOpOperand(OpResult result) { + if (!hasKnownBufferizationAliasingBehavior(result.getDefiningOp())) + return None; + return TypeSwitch(result.getDefiningOp()) + .Case([&](LinalgOp op) { + return op.getOutputTensorOperands()[result.getResultNumber()]; + }) + .Case([&](ExtractSliceOp op) { return &op->getOpOperand(0); }) + .Case([&](InsertSliceOp op) { return &op->getOpOperand(1); }) + .Case([&](vector::TransferWriteOp op) { return &op->getOpOperand(1); }) + // In the case of scf::ForOp, this currently assumes the iter_args / yield + // are 1-1. This may fail and is verified at the end. + // TODO: update this. + .Case([&](scf::ForOp op) { + return &op.getIterOpOperands()[result.getResultNumber()]; + }) + .Default([&](Operation *op) { + op->dump(); + llvm_unreachable("unexpected defining op"); + return nullptr; + }); +} + /// Determine which OpResult will alias with `opOperand` if the op is bufferized /// in place. This is a superset of `getInplaceableOpResult`. /// Return None if the owner of `opOperand` does not have known @@ -512,7 +541,7 @@ class BufferizationAliasInfo { void bufferizeOutOfPlace(OpResult result); /// Return true if it is possible to find an inplace write W among the uses of - /// aliasInfo[rootWrite], and a read R among the uses of aliasInfo[rootRead], + /// aliasInfo[result], and a read R among the uses of aliasInfo[result], /// such that W and R interfere. /// Such a (W, R) pair is an interference to the inplace bufferization of /// rootWrite when: @@ -522,15 +551,9 @@ class BufferizationAliasInfo { /// C interleaved between W and R (i.e. W -> C -> R where -> denotes /// dominance). bool - wouldCreateReadAfterWriteInterference(Value rootWrite, Value rootRead, - Operation *opToBufferize, + wouldCreateReadAfterWriteInterference(OpResult result, const DominanceInfo &domInfo) const; - /// Return true if we find any read to opOperand.get() or any of its aliases, - /// that does not dominate opOperand.getOwner(). - bool existsNonDominatingRead(OpOperand &opOperand, - const DominanceInfo &domInfo) const; - /// Return true if `v1` and `v2` bufferize to equivalent buffers. bool areEquivalentBufferizedValues(Value v1, Value v2) const { return equivalentInfo.getLeaderValue(v1) == @@ -612,8 +635,8 @@ class BufferizationAliasInfo { /// /// Capture possible cases where `aliasingWriteOp(alias(%rootWrite))` has no /// visible effect on `aliasingReadOp(alias(%rootRead))`. - bool isClobberedWriteBeforeRead(Operation *opToBufferize, Value rootRead, - Value rootWrite, OpOperand &aliasingRead, + bool isClobberedWriteBeforeRead(Operation *opToBufferize, + OpOperand &aliasingRead, OpOperand &aliasingWrite, const DominanceInfo &domInfo) const; @@ -736,59 +759,77 @@ void BufferizationAliasInfo::bufferizeOutOfPlace(OpResult result) { setInPlaceOpResult(result, InPlaceSpec::False); } -/// Return true if merging the alias sets of `rootWrite` and `rootRead` would -/// result in a semantic change in the program (i.e. RAW violation). -/// -/// This is the case when one can find an inplace write W among the aliases -/// `rootWrite`, that may become an interference if W were to be bufferized -/// inplace. A potential interference would be with respect to a read R among -/// the aliases of `rootRead`. -/// +/// Return true if it is possible to find an inplace write W among the uses of +/// aliasInfo[result], and a read R among the uses of aliasInfo[result], +/// such that W and R interfere. /// Such a (W, R) pair is an interference to the inplace bufferization of -/// rootWrite when R does not properly dominate W (i.e. W may come before R -/// along some control-flow path). +/// rootWrite when: +/// 1. R is not known properly dominate W (i.e. the effects of the write may +/// be visible from R). +/// 2. one cannot find an intermediate clobbering write `C` to W, such that +/// C interleaved between W and R (i.e. W -> C -> R where -> denotes +/// dominance). bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference( - Value rootWrite, Value rootRead, Operation *opToBufferize, - const DominanceInfo &domInfo) const { + OpResult result, const DominanceInfo &domInfo) const { + Optional maybeAliasingOperand = getAliasingOpOperand(result); + if (!maybeAliasingOperand) + return false; + + Operation *opToBufferize = result.getDefiningOp(); + Value root = (*maybeAliasingOperand)->get(); LDBG("----Start wouldCreateReadAfterWriteInterference\n"); + LDBG("--------rootValue: " << root << "\n"); - // Collect all the inplace write uses of some alias of `rootWrite`. + // Collect: + // 1. all the inplace write uses of some alias of `root`. + // 2. all the write uses that belong to `opToBufferize`. + // opToBufferize is not yet inplace, we want to determine if it can be inplace + // so we also consider all its write uses, not just the inplace ones. DenseSet usesWrite; - auto &aliasListWrite = getAliasInfoRef(rootWrite); - for (Value vWrite : aliasListWrite) { + for (Value vWrite : getAliasInfoRef(root)) { for (auto &uWrite : vWrite.getUses()) { - if (!bufferizesToMemoryWrite(uWrite, InPlaceSpec::True)) + if (!bufferizesToMemoryWrite(uWrite)) continue; - usesWrite.insert(&uWrite); + if (uWrite.getOwner() == opToBufferize || + bufferizesToMemoryWrite(uWrite, InPlaceSpec::True)) + usesWrite.insert(&uWrite); } } - - // Collect all the read uses of some alias of `rootRead`. + for (Value vWrite : getAliasInfoRef(result)) + for (auto &uWrite : vWrite.getUses()) + if (bufferizesToMemoryWrite(uWrite, InPlaceSpec::True)) + usesWrite.insert(&uWrite); + + // Collect all the reads of some alias of `root`. + // opToBufferize is not yet inplace, we want to determine if it can be inplace + // so we also consider all read uses of its result. DenseSet usesRead; - auto &aliasListRead = getAliasInfoRef(rootRead); - for (Value vRead : aliasListRead) { - for (auto &uRead : vRead.getUses()) { - if (!bufferizesToMemoryRead(uRead)) - continue; - usesRead.insert(&uRead); - } - } + auto &aliasListRead = getAliasInfoRef(root); + for (Value vRead : aliasListRead) + for (auto &uRead : vRead.getUses()) + if (bufferizesToMemoryRead(uRead)) + usesRead.insert(&uRead); + for (Value vRead : getAliasInfoRef(result)) + for (auto &uRead : vRead.getUses()) + if (bufferizesToMemoryRead(uRead)) + usesRead.insert(&uRead); for (OpOperand *uRead : usesRead) { Operation *aliasingReadOp = uRead->getOwner(); LDBG("----++++aliasRead #" << uRead->getOperandNumber() << " in: " << *aliasingReadOp << '\n'); for (OpOperand *uWrite : usesWrite) { - // Don't consider self-use of the same operand. - // Uses within the same op is fine though. + // Don't consider self-use of the same operand for interference. + // Multiple different uses within the same op is fair game though. if (uWrite == uRead) continue; + Operation *aliasingWriteOp = uWrite->getOwner(); LDBG("---- aliasWrite #" << uWrite->getOperandNumber() << " in: " << *aliasingWriteOp << '\n'); - // If read and written value already alias, no interference would be added - // by bufferizing inplace. - if (getAliasInfoRef(uRead->get()).contains(uWrite->get())) + // If the candidate write is the one that produces the read value (in the + // SSA def-use sense), this is not considered an interference. + if (getInplaceableOpResult(*uWrite) == uRead->get()) continue; // If aliasingReadOp properly dominates aliasingWriteOp, the read cannot // be affected by the write: there is no interference. @@ -801,12 +842,8 @@ bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference( << "): " << *aliasingReadOp << '\n'); LDBG(" Interfering write (op #" << uWrite->getOperandNumber() << "): " << *aliasingWriteOp << '\n'); - LDBG(" aliases rootRead: " << rootRead << '\n'); - LDBG(" aliases rootWrite: " << rootWrite << '\n'); LDBG("---->opportunity to clobber RaW interference\n"); - if (isClobberedWriteBeforeRead(opToBufferize, rootRead, rootWrite, *uRead, - *uWrite, domInfo)) { - + if (isClobberedWriteBeforeRead(opToBufferize, *uRead, *uWrite, domInfo)) { LDBG("---->clobbered! -> skip\n"); continue; } @@ -819,35 +856,6 @@ bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference( return false; } -/// Return true if we find any read to opOperand.get() or any of its aliases, -/// that does not dominate opOperand.getOwner(). -bool BufferizationAliasInfo::existsNonDominatingRead( - OpOperand &opOperand, const DominanceInfo &domInfo) const { - LDBG("----Start existsNonDominatingRead\n"); - Operation *op = opOperand.getOwner(); - for (Value alias : getAliasInfoRef(opOperand.get())) { - for (OpOperand &wantReadUse : alias.getUses()) { - LDBG("--------current operand #" << wantReadUse.getOperandNumber() << ": " - << *(wantReadUse.getOwner()) << '\n'); - if (!bufferizesToMemoryRead(wantReadUse)) { - LDBG("------------not a read -> skip\n"); - continue; - } - if (&wantReadUse == &opOperand) { - LDBG("------------self-read is not an interference -> skip\n"); - continue; - } - if (domInfo.properlyDominates(wantReadUse.getOwner(), op)) { - LDBG("------------read properly dominates -> skip\n"); - continue; - } - LDBG("----found interfering read of " << wantReadUse.get() << '\n'); - return true; - } - } - return false; -} - /// Return true if the source of a `insertSliceOp` bufferizes to an /// equivalent ExtractSliceOp. bool BufferizationAliasInfo::isSourceEquivalentToAMatchingExtractSliceOp( @@ -981,20 +989,13 @@ bool BufferizationAliasInfo::existsInterleavedValueClobber( /// 3. Clobbers the write that would be interfering with the read. /// bool BufferizationAliasInfo::isClobberedWriteBeforeRead( - Operation *opToBufferize, Value rootRead, Value rootWrite, - OpOperand &aliasingRead, OpOperand &aliasingWrite, + Operation *opToBufferize, OpOperand &aliasingRead, OpOperand &aliasingWrite, const DominanceInfo &domInfo) const { Operation *aliasingReadOp = aliasingRead.getOwner(); Operation *aliasingWriteOp = aliasingWrite.getOwner(); assert(!domInfo.properlyDominates(aliasingReadOp, aliasingWriteOp) && "Unexpected aliasingReadOp properly dominates aliasingWriteOp"); - assert(((rootRead.isa() && - rootRead.getDefiningOp() == opToBufferize) || - (rootWrite.isa() && - rootWrite.getDefiningOp() == opToBufferize)) && - "Expected rootRead or rootWrite to be produced by opToBufferize"); - // Bail if the write does not dominate the read: it may clobber but only on // a strict subset of paths, which is not enough for safety. if (!domInfo.dominates(aliasingWriteOp, aliasingReadOp)) { @@ -1581,14 +1582,9 @@ bufferizableInPlaceAnalysis(ExtractSliceOp extractSliceOp, // an interfering write? OpResult r = extractSliceOp->getResult(0); OpOperand &s = extractSliceOp->getOpOperand(0); - bool foundInterference = wouldCreateAliasingWriteToNonWriteableBuffer || - // Do not consider (s, s) and (r, r) as all the - // aliasings already exist by construction; we are - // interested in new interfering aliases only. - aliasInfo.wouldCreateReadAfterWriteInterference( - s.get(), r, extractSliceOp, domInfo) || - aliasInfo.wouldCreateReadAfterWriteInterference( - r, s.get(), extractSliceOp, domInfo); + bool foundInterference = + wouldCreateAliasingWriteToNonWriteableBuffer || + aliasInfo.wouldCreateReadAfterWriteInterference(r, domInfo); if (foundInterference) aliasInfo.bufferizeOutOfPlace(r); else @@ -1618,8 +1614,10 @@ bufferizableInPlaceAnalysis(OpOperand &operand, OpResult result, << result << '\n'); // `result` must bufferize to a writeable buffer to be a candidate. - // This means the use->def chain not backpropagate to a function that is - // not inplaceable or to a constant op to be considered. + // This means the operand must not alias either: + // 1. a function bbArg that is not inplaceable or + // 2. a constant op. + // to be considered for inplace bufferization bool wouldCreateAliasingWriteToNonWriteableBuffer = aliasInfo.aliasesNonWriteableBuffer(operand); if (wouldCreateAliasingWriteToNonWriteableBuffer) @@ -1627,15 +1625,10 @@ bufferizableInPlaceAnalysis(OpOperand &operand, OpResult result, else LDBG("->bufferizes to writeable inplace buffer\n"); - Value s = operand.get(), r = result; + assert(result == getInplaceableOpResult(operand)); bool foundInterference = wouldCreateAliasingWriteToNonWriteableBuffer || - aliasInfo.existsNonDominatingRead(operand, domInfo) || - // Do not consider (s, s) and (r, r) as all the aliasings already - // exist by construction; we are interested in new interfering aliases - // only. - aliasInfo.wouldCreateReadAfterWriteInterference(s, r, op, domInfo) || - aliasInfo.wouldCreateReadAfterWriteInterference(r, s, op, domInfo); + aliasInfo.wouldCreateReadAfterWriteInterference(result, domInfo); if (foundInterference) aliasInfo.bufferizeOutOfPlace(result); From 9dde51416209a5552156384b9c2b08b676818d70 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 29 Jun 2021 14:28:49 +0100 Subject: [PATCH 172/619] [NFC] Remove shadowed variable in InnerLoopVectorizer::createInductionVariable Avoid creating a IRBuilder stack variable with the same name as the class member. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f99352a3f075f..c79c57cb2bdb5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3071,7 +3071,13 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, if (!Latch) Latch = Header; - IRBuilder<> Builder(&*Header->getFirstInsertionPt()); + // Set the Builder to a valid Block pointer as the existing one could get + // deleted below. + Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); + + IRBuilder<>::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(&*Header->getFirstInsertionPt()); + Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction); setDebugLocFromInst(Builder, OldInst); auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index"); From f0d6c9156b129597f2215b4123ebaae8a3eb57a3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Jun 2021 14:36:20 +0100 Subject: [PATCH 173/619] [X86] Add cmov i33 sgt test case Suggested on D101074 - add a 'icmp sgt i64 %0, -2147483649' comparison that can fold to 'icmp sge i64 %0, -2147483648' on D101074 allowing i32 immediate folding --- llvm/test/CodeGen/X86/cmov.ll | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 1f4d6d8c6ac57..9aaf8eb5463c3 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -198,6 +198,19 @@ define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind { ret i8 %d } +define i64 @test8(i64 %0, i64 %1, i64 %2) { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movabsq $-2147483649, %rcx # imm = 0xFFFFFFFF7FFFFFFF +; CHECK-NEXT: cmpq %rcx, %rdi +; CHECK-NEXT: cmovleq %rdx, %rax +; CHECK-NEXT: retq + %4 = icmp sgt i64 %0, -2147483649 + %5 = select i1 %4, i64 %1, i64 %2 + ret i64 %5 +} + define i32 @smin(i32 %x) { ; CHECK-LABEL: smin: ; CHECK: # %bb.0: From c85175c5f6a96e44b3b343fa7b26d66bbc0b4973 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Tue, 29 Jun 2021 21:44:50 +0800 Subject: [PATCH 174/619] [AVR] Fix a bug in prologue of ISR The r1 register should be cleared in prologue of ISR as it is used as constant zero. Reviewed By: dylanmckay Differential Revision: https://reviews.llvm.org/D99467 --- llvm/lib/Target/AVR/AVRFrameLowering.cpp | 5 +++++ llvm/test/CodeGen/AVR/interrupts.ll | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index e99801032037c..89ed30e8bcdb0 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -83,6 +83,11 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, .addReg(AVR::R0, RegState::Kill) .addReg(AVR::R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr)) + .addReg(AVR::R1, RegState::Define) + .addReg(AVR::R1, RegState::Kill) + .addReg(AVR::R1, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); } // Early exit if the frame pointer is not needed in this function. diff --git a/llvm/test/CodeGen/AVR/interrupts.ll b/llvm/test/CodeGen/AVR/interrupts.ll index c6550a0fb6ae5..e112de0a0ddd8 100644 --- a/llvm/test/CodeGen/AVR/interrupts.ll +++ b/llvm/test/CodeGen/AVR/interrupts.ll @@ -8,6 +8,7 @@ define avr_intrcc void @interrupt_handler() { ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 ; CHECK: clr r0 +; CHECK-NEXT: clr r1 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 ; CHECK-NEXT: pop r1 @@ -24,6 +25,7 @@ define void @interrupt_handler_via_ir_attribute() #0 { ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 ; CHECK: clr r0 +; CHECK-NEXT: clr r1 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 ; CHECK-NEXT: pop r1 @@ -40,6 +42,7 @@ define avr_signalcc void @signal_handler() { ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 ; CHECK: clr r0 +; CHECK-NEXT: clr r1 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 ; CHECK-NEXT: pop r1 @@ -56,6 +59,7 @@ define void @signal_handler_via_attribute() #1 { ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 ; CHECK: clr r0 +; CHECK-NEXT: clr r1 ; CHECK: pop r0 ; CHECK-NEXT: out 63, r0 ; CHECK-NEXT: pop r1 @@ -72,6 +76,7 @@ define avr_intrcc void @interrupt_alloca() { ; CHECK-NEXT: in r0, 63 ; CHECK-NEXT: push r0 ; CHECK: clr r0 +; CHECK-NEXT: clr r1 ; CHECK: push r28 ; CHECK-NEXT: push r29 ; CHECK-NEXT: in r28, 61 From 010108bb2c88511f7bb4f432b9c365e6aef81468 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Tue, 29 Jun 2021 13:48:49 +0100 Subject: [PATCH 175/619] [DebugInstrRef][3/3] Follow DBG_PHI instructions through LiveDebugValues This patch reads machine value numbers from DBG_PHI instructions (marking where SSA PHIs used to be), and matches them up with DBG_INSTR_REF instructions that refer to them. Essentially they are two separate parts of a DBG_VALUE: the place to read the value (register and program position), and where the variable is assigned that value. Sometimes these DBG_PHIs can be duplicated, usually by tail duplication. This corresponds to the SSA structure of the program being destroyed, and the original PHI being split. When this happens: run LLVMs standard SSAUpdater utility, to work out what values should appear in which blocks. The majority of this patch is boilerplate to make use of SSAUpdater. If there are any additional PHIs on the path between multiple DBG_PHIs and their using DBG_INSTR_REF, their existance is validated, just in case a value gets clobbered along the way (see dbg-phis-with-loops.mir for several examples). Differential Revision: https://reviews.llvm.org/D86814 --- .../LiveDebugValues/InstrRefBasedImpl.cpp | 522 +++++++++++++++++- .../MIR/InstrRef/dbg-phis-in-ldv.mir | 162 ++++++ .../MIR/InstrRef/dbg-phis-merging-in-ldv.mir | 199 +++++++ .../MIR/InstrRef/dbg-phis-with-loops.mir | 205 +++++++ 4 files changed, 1076 insertions(+), 12 deletions(-) create mode 100644 llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-in-ldv.mir create mode 100644 llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-merging-in-ldv.mir create mode 100644 llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-with-loops.mir diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 75a0ab641e435..8e0588241bb2b 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -148,6 +148,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -184,6 +185,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" #include #include #include @@ -199,6 +201,8 @@ using namespace llvm; +// SSAUpdaterImple sets DEBUG_TYPE, change it. +#undef DEBUG_TYPE #define DEBUG_TYPE "livedebugvalues" // Act more like the VarLoc implementation, by propagating some locations too @@ -1329,6 +1333,7 @@ class InstrRefBasedLDV : public LDVImpl { const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; const TargetFrameLowering *TFI; + const MachineFrameInfo *MFI; BitVector CalleeSavedRegs; LexicalScopes LS; TargetPassConfig *TPC; @@ -1369,6 +1374,23 @@ class InstrRefBasedLDV : public LDVImpl { /// instruction numbers in DBG_INSTR_REFs into machine value numbers. std::map DebugInstrNumToInstr; + /// Record of where we observed a DBG_PHI instruction. + class DebugPHIRecord { + public: + uint64_t InstrNum; ///< Instruction number of this DBG_PHI. + MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred. + ValueIDNum ValueRead; ///< The value number read by the DBG_PHI. + LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads. + + operator unsigned() const { return InstrNum; } + }; + + /// Map from instruction numbers defined by DBG_PHIs to a record of what that + /// DBG_PHI read and where. Populated and edited during the machine value + /// location problem -- we use LLVMs SSA Updater to fix changes by + /// optimizations that destroy PHI instructions. + SmallVector DebugPHINumToValue; + // Map of overlapping variable fragments. OverlapMap OverlapFragments; VarToFragments SeenFragments; @@ -1395,7 +1417,8 @@ class InstrRefBasedLDV : public LDVImpl { SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI); /// Observe a single instruction while stepping through a block. - void process(MachineInstr &MI); + void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, + ValueIDNum **MLiveIns = nullptr); /// Examines whether \p MI is a DBG_VALUE and notifies trackers. /// \returns true if MI was recognized and processed. @@ -1403,7 +1426,13 @@ class InstrRefBasedLDV : public LDVImpl { /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. /// \returns true if MI was recognized and processed. - bool transferDebugInstrRef(MachineInstr &MI); + bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns); + + /// Stores value-information about where this PHI occurred, and what + /// instruction number is associated with it. + /// \returns true if MI was recognized and processed. + bool transferDebugPHI(MachineInstr &MI); /// Examines whether \p MI is copy instruction, and notifies trackers. /// \returns true if MI was recognized and processed. @@ -1422,6 +1451,18 @@ class InstrRefBasedLDV : public LDVImpl { void accumulateFragmentMap(MachineInstr &MI); + /// Determine the machine value number referred to by (potentially several) + /// DBG_PHI instructions. Block duplication and tail folding can duplicate + /// DBG_PHIs, shifting the position where values in registers merge, and + /// forming another mini-ssa problem to solve. + /// \p Here the position of a DBG_INSTR_REF seeking a machine value number + /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. + /// \returns The machine value number at position Here, or None. + Optional resolveDbgPHIs(MachineFunction &MF, + ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns, MachineInstr &Here, + uint64_t InstrNum); + /// Step through the function, recording register definitions and movements /// in an MLocTracker. Convert the observations into a per-block transfer /// function in \p MLocTransfer, suitable for using with the machine value @@ -1524,7 +1565,7 @@ class InstrRefBasedLDV : public LDVImpl { /// right now "order of appearence in function, when explored in RPO", so /// that we can compare explictly against VarLocBasedImpl. void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, - ValueIDNum **MInLocs, + ValueIDNum **MOutLocs, ValueIDNum **MInLocs, DenseMap &AllVarsNumbering); /// Boilerplate computation of some initial sets, artifical blocks and @@ -1637,7 +1678,9 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { return true; } -bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { +bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, + ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns) { if (!MI.isDebugRef()) return false; @@ -1679,8 +1722,10 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { Optional NewID = None; // Try to lookup the instruction number, and find the machine value number - // that it defines. + // that it defines. It could be an instruction, or a PHI. auto InstrIt = DebugInstrNumToInstr.find(InstNo); + auto PHIIt = std::lower_bound(DebugPHINumToValue.begin(), + DebugPHINumToValue.end(), InstNo); if (InstrIt != DebugInstrNumToInstr.end()) { const MachineInstr &TargetInstr = *InstrIt->second.first; uint64_t BlockNo = TargetInstr.getParent()->getNumber(); @@ -1695,6 +1740,11 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { unsigned LocID = MTracker->getLocID(MO.getReg(), false); LocIdx L = MTracker->LocIDToLocIdx[LocID]; NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + } else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) { + // It's actually a PHI value. Which value it is might not be obvious, use + // the resolver helper to find out. + NewID = resolveDbgPHIs(*MI.getParent()->getParent(), MLiveOuts, MLiveIns, + MI, InstNo); } // We, we have a value number or None. Tell the variable value tracker about @@ -1749,6 +1799,55 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) { MachineInstr *DbgMI = MTracker->emitLoc(FoundLoc, V, Properties); TTracker->PendingDbgValues.push_back(DbgMI); TTracker->flushDbgValues(MI.getIterator(), nullptr); + return true; +} + +bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { + if (!MI.isDebugPHI()) + return false; + + // Analyse these only when solving the machine value location problem. + if (VTracker || TTracker) + return true; + + // First operand is the value location, either a stack slot or register. + // Second is the debug instruction number of the original PHI. + const MachineOperand &MO = MI.getOperand(0); + unsigned InstrNum = MI.getOperand(1).getImm(); + + if (MO.isReg()) { + // The value is whatever's currently in the register. Read and record it, + // to be analysed later. + Register Reg = MO.getReg(); + ValueIDNum Num = MTracker->readReg(Reg); + auto PHIRec = DebugPHIRecord( + {InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)}); + DebugPHINumToValue.push_back(PHIRec); + } else { + // The value is whatever's in this stack slot. + assert(MO.isFI()); + unsigned FI = MO.getIndex(); + + // If the stack slot is dead, then this was optimized away. + // FIXME: stack slot colouring should account for slots that get merged. + if (MFI->isDeadObjectIndex(FI)) + return true; + + // Identify this spill slot. + Register Base; + StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base); + SpillLoc SL = {Base, Offs}; + Optional Num = MTracker->readSpill(SL); + + if (!Num) + // Nothing ever writes to this slot. Curious, but nothing we can do. + return true; + + // Record this DBG_PHI for later analysis. + auto DbgPHI = DebugPHIRecord( + {InstrNum, MI.getParent(), *Num, *MTracker->getSpillMLoc(SL)}); + DebugPHINumToValue.push_back(DbgPHI); + } return true; } @@ -2121,13 +2220,16 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { AllSeenFragments.insert(ThisFragment); } -void InstrRefBasedLDV::process(MachineInstr &MI) { +void InstrRefBasedLDV::process(MachineInstr &MI, ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns) { // Try to interpret an MI as a debug or transfer instruction. Only if it's // none of these should we interpret it's register defs as new value // definitions. if (transferDebugValue(MI)) return; - if (transferDebugInstrRef(MI)) + if (transferDebugInstrRef(MI, MLiveOuts, MLiveIns)) + return; + if (transferDebugPHI(MI)) return; if (transferRegisterCopy(MI)) return; @@ -3123,8 +3225,8 @@ void InstrRefBasedLDV::dump_mloc_transfer( #endif void InstrRefBasedLDV::emitLocations( - MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MInLocs, - DenseMap &AllVarsNumbering) { + MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MOutLocs, + ValueIDNum **MInLocs, DenseMap &AllVarsNumbering) { TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs); unsigned NumLocs = MTracker->getNumLocs(); @@ -3141,7 +3243,7 @@ void InstrRefBasedLDV::emitLocations( CurBB = bbnum; CurInst = 1; for (auto &MI : MBB) { - process(MI); + process(MI, MOutLocs, MInLocs); TTracker->checkInstForNewValues(CurInst, MI.getIterator()); ++CurInst; } @@ -3219,6 +3321,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); TFI->getCalleeSaves(MF, CalleeSavedRegs); + MFI = &MF.getFrameInfo(); LS.initialize(MF); MTracker = @@ -3261,6 +3364,21 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // dataflow problem. mlocDataflow(MInLocs, MOutLocs, MLocTransfer); + // Patch up debug phi numbers, turning unknown block-live-in values into + // either live-through machine values, or PHIs. + for (auto &DBG_PHI : DebugPHINumToValue) { + // Identify unresolved block-live-ins. + ValueIDNum &Num = DBG_PHI.ValueRead; + if (!Num.isPHI()) + continue; + + unsigned BlockNo = Num.getBlock(); + LocIdx LocNo = Num.getLoc(); + Num = MInLocs[BlockNo][LocNo.asU64()]; + } + // Later, we'll be looking up ranges of instruction numbers. + llvm::sort(DebugPHINumToValue); + // Walk back through each block / instruction, collecting DBG_VALUE // instructions and recording what machine value their operands refer to. for (auto &OrderPair : OrderToBB) { @@ -3271,7 +3389,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, MTracker->loadFromArray(MInLocs[CurBB], CurBB); CurInst = 1; for (auto &MI : MBB) { - process(MI); + process(MI, MOutLocs, MInLocs); ++CurInst; } MTracker->reset(); @@ -3326,7 +3444,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // Using the computed value locations and variable values for each block, // create the DBG_VALUE instructions representing the extended variable // locations. - emitLocations(MF, SavedLiveIns, MInLocs, AllVarsNumbering); + emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering); for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) { delete[] MOutLocs[Idx]; @@ -3349,6 +3467,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, BBToOrder.clear(); BBNumToRPO.clear(); DebugInstrNumToInstr.clear(); + DebugPHINumToValue.clear(); return Changed; } @@ -3356,3 +3475,382 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, LDVImpl *llvm::makeInstrRefBasedLiveDebugValues() { return new InstrRefBasedLDV(); } + +namespace { +class LDVSSABlock; +class LDVSSAUpdater; + +// Pick a type to identify incoming block values as we construct SSA. We +// can't use anything more robust than an integer unfortunately, as SSAUpdater +// expects to zero-initialize the type. +typedef uint64_t BlockValueNum; + +/// Represents an SSA PHI node for the SSA updater class. Contains the block +/// this PHI is in, the value number it would have, and the expected incoming +/// values from parent blocks. +class LDVSSAPhi { +public: + SmallVector, 4> IncomingValues; + LDVSSABlock *ParentBlock; + BlockValueNum PHIValNum; + LDVSSAPhi(BlockValueNum PHIValNum, LDVSSABlock *ParentBlock) + : ParentBlock(ParentBlock), PHIValNum(PHIValNum) {} + + LDVSSABlock *getParent() { return ParentBlock; } +}; + +/// Thin wrapper around a block predecessor iterator. Only difference from a +/// normal block iterator is that it dereferences to an LDVSSABlock. +class LDVSSABlockIterator { +public: + MachineBasicBlock::pred_iterator PredIt; + LDVSSAUpdater &Updater; + + LDVSSABlockIterator(MachineBasicBlock::pred_iterator PredIt, + LDVSSAUpdater &Updater) + : PredIt(PredIt), Updater(Updater) {} + + bool operator!=(const LDVSSABlockIterator &OtherIt) const { + return OtherIt.PredIt != PredIt; + } + + LDVSSABlockIterator &operator++() { + ++PredIt; + return *this; + } + + LDVSSABlock *operator*(); +}; + +/// Thin wrapper around a block for SSA Updater interface. Necessary because +/// we need to track the PHI value(s) that we may have observed as necessary +/// in this block. +class LDVSSABlock { +public: + MachineBasicBlock &BB; + LDVSSAUpdater &Updater; + using PHIListT = SmallVector; + /// List of PHIs in this block. There should only ever be one. + PHIListT PHIList; + + LDVSSABlock(MachineBasicBlock &BB, LDVSSAUpdater &Updater) + : BB(BB), Updater(Updater) {} + + LDVSSABlockIterator succ_begin() { + return LDVSSABlockIterator(BB.succ_begin(), Updater); + } + + LDVSSABlockIterator succ_end() { + return LDVSSABlockIterator(BB.succ_end(), Updater); + } + + /// SSAUpdater has requested a PHI: create that within this block record. + LDVSSAPhi *newPHI(BlockValueNum Value) { + PHIList.emplace_back(Value, this); + return &PHIList.back(); + } + + /// SSAUpdater wishes to know what PHIs already exist in this block. + PHIListT &phis() { return PHIList; } +}; + +/// Utility class for the SSAUpdater interface: tracks blocks, PHIs and values +/// while SSAUpdater is exploring the CFG. It's passed as a handle / baton to +// SSAUpdaterTraits. +class LDVSSAUpdater { +public: + /// Map of value numbers to PHI records. + DenseMap PHIs; + /// Map of which blocks generate Undef values -- blocks that are not + /// dominated by any Def. + DenseMap UndefMap; + /// Map of machine blocks to our own records of them. + DenseMap BlockMap; + /// Machine location where any PHI must occur. + LocIdx Loc; + /// Table of live-in machine value numbers for blocks / locations. + ValueIDNum **MLiveIns; + + LDVSSAUpdater(LocIdx L, ValueIDNum **MLiveIns) : Loc(L), MLiveIns(MLiveIns) {} + + void reset() { + PHIs.clear(); + UndefMap.clear(); + BlockMap.clear(); + } + + ~LDVSSAUpdater() { reset(); } + + /// For a given MBB, create a wrapper block for it. Stores it in the + /// LDVSSAUpdater block map. + LDVSSABlock *getSSALDVBlock(MachineBasicBlock *BB) { + auto it = BlockMap.find(BB); + if (it == BlockMap.end()) { + BlockMap[BB] = new LDVSSABlock(*BB, *this); + it = BlockMap.find(BB); + } + return it->second; + } + + /// Find the live-in value number for the given block. Looks up the value at + /// the PHI location on entry. + BlockValueNum getValue(LDVSSABlock *LDVBB) { + return MLiveIns[LDVBB->BB.getNumber()][Loc.asU64()].asU64(); + } +}; + +LDVSSABlock *LDVSSABlockIterator::operator*() { + return Updater.getSSALDVBlock(*PredIt); +} + +} // namespace + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &out, const LDVSSAPhi &PHI) { + out << "SSALDVPHI " << PHI.PHIValNum; + return out; +} + +/// Template specialization to give SSAUpdater access to CFG and value +/// information. SSAUpdater calls methods in these traits, passing in the +/// LDVSSAUpdater object, to learn about blocks and the values they define. +/// It also provides methods to create PHI nodes and track them. +template <> class SSAUpdaterTraits { +public: + using BlkT = LDVSSABlock; + using ValT = BlockValueNum; + using PhiT = LDVSSAPhi; + using BlkSucc_iterator = LDVSSABlockIterator; + + // Methods to access block successors -- dereferencing to our wrapper class. + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } + + /// Iterator for PHI operands. + class PHI_iterator { + private: + LDVSSAPhi *PHI; + unsigned Idx; + + public: + explicit PHI_iterator(LDVSSAPhi *P) // begin iterator + : PHI(P), Idx(0) {} + PHI_iterator(LDVSSAPhi *P, bool) // end iterator + : PHI(P), Idx(PHI->IncomingValues.size()) {} + + PHI_iterator &operator++() { + Idx++; + return *this; + } + bool operator==(const PHI_iterator &X) const { return Idx == X.Idx; } + bool operator!=(const PHI_iterator &X) const { return !operator==(X); } + + BlockValueNum getIncomingValue() { return PHI->IncomingValues[Idx].second; } + + LDVSSABlock *getIncomingBlock() { return PHI->IncomingValues[Idx].first; } + }; + + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + + static inline PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } + + /// FindPredecessorBlocks - Put the predecessors of BB into the Preds + /// vector. + static void FindPredecessorBlocks(LDVSSABlock *BB, + SmallVectorImpl *Preds) { + for (MachineBasicBlock::pred_iterator PI = BB->BB.pred_begin(), + E = BB->BB.pred_end(); + PI != E; ++PI) + Preds->push_back(BB->Updater.getSSALDVBlock(*PI)); + } + + /// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new + /// register. For LiveDebugValues, represents a block identified as not having + /// any DBG_PHI predecessors. + static BlockValueNum GetUndefVal(LDVSSABlock *BB, LDVSSAUpdater *Updater) { + // Create a value number for this block -- it needs to be unique and in the + // "undef" collection, so that we know it's not real. Use a number + // representing a PHI into this block. + BlockValueNum Num = ValueIDNum(BB->BB.getNumber(), 0, Updater->Loc).asU64(); + Updater->UndefMap[&BB->BB] = Num; + return Num; + } + + /// CreateEmptyPHI - Create a (representation of a) PHI in the given block. + /// SSAUpdater will populate it with information about incoming values. The + /// value number of this PHI is whatever the machine value number problem + /// solution determined it to be. This includes non-phi values if SSAUpdater + /// tries to create a PHI where the incoming values are identical. + static BlockValueNum CreateEmptyPHI(LDVSSABlock *BB, unsigned NumPreds, + LDVSSAUpdater *Updater) { + BlockValueNum PHIValNum = Updater->getValue(BB); + LDVSSAPhi *PHI = BB->newPHI(PHIValNum); + Updater->PHIs[PHIValNum] = PHI; + return PHIValNum; + } + + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(LDVSSAPhi *PHI, BlockValueNum Val, LDVSSABlock *Pred) { + PHI->IncomingValues.push_back(std::make_pair(Pred, Val)); + } + + /// ValueIsPHI - Check if the instruction that defines the specified value + /// is a PHI instruction. + static LDVSSAPhi *ValueIsPHI(BlockValueNum Val, LDVSSAUpdater *Updater) { + auto PHIIt = Updater->PHIs.find(Val); + if (PHIIt == Updater->PHIs.end()) + return nullptr; + return PHIIt->second; + } + + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static LDVSSAPhi *ValueIsNewPHI(BlockValueNum Val, LDVSSAUpdater *Updater) { + LDVSSAPhi *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->IncomingValues.size() == 0) + return PHI; + return nullptr; + } + + /// GetPHIValue - For the specified PHI instruction, return the value + /// that it defines. + static BlockValueNum GetPHIValue(LDVSSAPhi *PHI) { return PHI->PHIValNum; } +}; + +} // end namespace llvm + +Optional InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, + ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns, + MachineInstr &Here, + uint64_t InstrNum) { + // Pick out records of DBG_PHI instructions that have been observed. If there + // are none, then we cannot compute a value number. + auto RangePair = std::equal_range(DebugPHINumToValue.begin(), + DebugPHINumToValue.end(), InstrNum); + auto LowerIt = RangePair.first; + auto UpperIt = RangePair.second; + + // No DBG_PHI means there can be no location. + if (LowerIt == UpperIt) + return None; + + // If there's only one DBG_PHI, then that is our value number. + if (std::distance(LowerIt, UpperIt) == 1) + return LowerIt->ValueRead; + + auto DBGPHIRange = make_range(LowerIt, UpperIt); + + // Pick out the location (physreg, slot) where any PHIs must occur. It's + // technically possible for us to merge values in different registers in each + // block, but highly unlikely that LLVM will generate such code after register + // allocation. + LocIdx Loc = LowerIt->ReadLoc; + + // We have several DBG_PHIs, and a use position (the Here inst). All each + // DBG_PHI does is identify a value at a program position. We can treat each + // DBG_PHI like it's a Def of a value, and the use position is a Use of a + // value, just like SSA. We use the bulk-standard LLVM SSA updater class to + // determine which Def is used at the Use, and any PHIs that happen along + // the way. + // Adapted LLVM SSA Updater: + LDVSSAUpdater Updater(Loc, MLiveIns); + // Map of which Def or PHI is the current value in each block. + DenseMap AvailableValues; + // Set of PHIs that we have created along the way. + SmallVector CreatedPHIs; + + // Each existing DBG_PHI is a Def'd value under this model. Record these Defs + // for the SSAUpdater. + for (const auto &DBG_PHI : DBGPHIRange) { + LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB); + const ValueIDNum &Num = DBG_PHI.ValueRead; + AvailableValues.insert(std::make_pair(Block, Num.asU64())); + } + + LDVSSABlock *HereBlock = Updater.getSSALDVBlock(Here.getParent()); + const auto &AvailIt = AvailableValues.find(HereBlock); + if (AvailIt != AvailableValues.end()) { + // Actually, we already know what the value is -- the Use is in the same + // block as the Def. + return ValueIDNum::fromU64(AvailIt->second); + } + + // Otherwise, we must use the SSA Updater. It will identify the value number + // that we are to use, and the PHIs that must happen along the way. + SSAUpdaterImpl Impl(&Updater, &AvailableValues, &CreatedPHIs); + BlockValueNum ResultInt = Impl.GetValue(Updater.getSSALDVBlock(Here.getParent())); + ValueIDNum Result = ValueIDNum::fromU64(ResultInt); + + // We have the number for a PHI, or possibly live-through value, to be used + // at this Use. There are a number of things we have to check about it though: + // * Does any PHI use an 'Undef' (like an IMPLICIT_DEF) value? If so, this + // Use was not completely dominated by DBG_PHIs and we should abort. + // * Are the Defs or PHIs clobbered in a block? SSAUpdater isn't aware that + // we've left SSA form. Validate that the inputs to each PHI are the + // expected values. + // * Is a PHI we've created actually a merging of values, or are all the + // predecessor values the same, leading to a non-PHI machine value number? + // (SSAUpdater doesn't know that either). Remap validated PHIs into the + // the ValidatedValues collection below to sort this out. + DenseMap ValidatedValues; + + // Define all the input DBG_PHI values in ValidatedValues. + for (const auto &DBG_PHI : DBGPHIRange) { + LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB); + const ValueIDNum &Num = DBG_PHI.ValueRead; + ValidatedValues.insert(std::make_pair(Block, Num)); + } + + // Sort PHIs to validate into RPO-order. + SmallVector SortedPHIs; + for (auto &PHI : CreatedPHIs) + SortedPHIs.push_back(PHI); + + std::sort( + SortedPHIs.begin(), SortedPHIs.end(), [&](LDVSSAPhi *A, LDVSSAPhi *B) { + return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB]; + }); + + for (auto &PHI : SortedPHIs) { + ValueIDNum ThisBlockValueNum = + MLiveIns[PHI->ParentBlock->BB.getNumber()][Loc.asU64()]; + + // Are all these things actually defined? + for (auto &PHIIt : PHI->IncomingValues) { + // Any undef input means DBG_PHIs didn't dominate the use point. + if (Updater.UndefMap.find(&PHIIt.first->BB) != Updater.UndefMap.end()) + return None; + + ValueIDNum ValueToCheck; + ValueIDNum *BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; + + auto VVal = ValidatedValues.find(PHIIt.first); + if (VVal == ValidatedValues.end()) { + // We cross a loop, and this is a backedge. LLVMs tail duplication + // happens so late that DBG_PHI instructions should not be able to + // migrate into loops -- meaning we can only be live-through this + // loop. + ValueToCheck = ThisBlockValueNum; + } else { + // Does the block have as a live-out, in the location we're examining, + // the value that we expect? If not, it's been moved or clobbered. + ValueToCheck = VVal->second; + } + + if (BlockLiveOuts[Loc.asU64()] != ValueToCheck) + return None; + } + + // Record this value as validated. + ValidatedValues.insert({PHI->ParentBlock, ThisBlockValueNum}); + } + + // All the PHIs are valid: we can return what the SSAUpdater said our value + // number was. + return Result; +} diff --git a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-in-ldv.mir b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-in-ldv.mir new file mode 100644 index 0000000000000..93a82ee8f1d06 --- /dev/null +++ b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-in-ldv.mir @@ -0,0 +1,162 @@ +# RUN: llc %s -o - -mtriple=x86_64-unknown-unknown \ +# RUN: -experimental-debug-variable-locations -run-pass=livedebugvalues\ +# RUN: | FileCheck %s +# +# Test that a DBG_INSTR_REF that refers to a DBG_PHI, will be translated into a +# DBG_VALUE of the value read at that DBG_PHI. Same original code as +# phi-coalescing.mir. +# +--- | + ; ModuleID = 'phi-coalescing.mir' + source_filename = "test.c" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + define dso_local i32 @foo(i64 %bar, i64 %baz) !dbg !7 { + entry: + call void @llvm.dbg.value(metadata i64 %bar, metadata !12, metadata !DIExpression()), !dbg !13 + call void @llvm.dbg.value(metadata i64 %baz, metadata !14, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %bar), !dbg !15 + %add = add nsw i64 %bar, 12, !dbg !16 + call void @llvm.dbg.value(metadata i64 %add, metadata !12, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %add), !dbg !17 + %call = call i64 @getlong(), !dbg !18 + %tobool = icmp ne i64 %call, 0, !dbg !18 + br i1 %tobool, label %if.then, label %if.end, !dbg !20 + + if.then: ; preds = %entry + %add1 = add nsw i64 %add, 1, !dbg !21 + call void @llvm.dbg.value(metadata i64 %add1, metadata !12, metadata !DIExpression()), !dbg !13 + br label %if.end, !dbg !22 + + if.end: ; preds = %if.then, %entry + %bar.addr.0 = phi i64 [ %add1, %if.then ], [ %add, %entry ], !dbg !13 + call void @llvm.dbg.value(metadata i64 %bar.addr.0, metadata !12, metadata !DIExpression()), !dbg !13 + %add2 = add nsw i64 %bar.addr.0, %baz, !dbg !23 + call void @llvm.dbg.value(metadata i64 %add2, metadata !12, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %add2), !dbg !24 + %conv = trunc i64 %add2 to i32, !dbg !25 + ret i32 %conv, !dbg !26 + } + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.declare(metadata, metadata, metadata) + + declare dso_local void @ext(i64) + + declare dso_local i64 @getlong() + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + !llvm.ident = !{!6} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: "/tmp/out.c") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = !{!""} + !7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !8 = !DISubroutineType(types: !9) + !9 = !{!10, !11, !11} + !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !11 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) + !12 = !DILocalVariable(name: "bar", arg: 1, scope: !7, file: !1, line: 3, type: !11) + !13 = !DILocation(line: 0, scope: !7) + !14 = !DILocalVariable(name: "baz", arg: 2, scope: !7, file: !1, line: 3, type: !11) + !15 = !DILocation(line: 4, column: 3, scope: !7) + !16 = !DILocation(line: 5, column: 7, scope: !7) + !17 = !DILocation(line: 6, column: 3, scope: !7) + !18 = !DILocation(line: 8, column: 7, scope: !19) + !19 = distinct !DILexicalBlock(scope: !7, file: !1, line: 8, column: 7) + !20 = !DILocation(line: 8, column: 7, scope: !7) + !21 = !DILocation(line: 9, column: 9, scope: !19) + !22 = !DILocation(line: 9, column: 5, scope: !19) + !23 = !DILocation(line: 11, column: 7, scope: !7) + !24 = !DILocation(line: 12, column: 3, scope: !7) + !25 = !DILocation(line: 13, column: 10, scope: !7) + !26 = !DILocation(line: 13, column: 3, scope: !7) + +... +--- +name: foo +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$rdi' } + - { reg: '$rsi' } +frameInfo: + stackSize: 24 + offsetAdjustment: -24 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 16 +fixedStack: + - { id: 0, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$rbx' } + - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '$r14' } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $rdi, $rsi, $r14, $rbx + + frame-setup PUSH64r killed $r14, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 16 + frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 24 + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 32 + CFI_INSTRUCTION offset $rbx, -24 + CFI_INSTRUCTION offset $r14, -16 + $r14 = MOV64rr $rsi + $rbx = MOV64rr $rdi + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !15 + renamable $rbx = ADD64ri32 killed renamable $rbx, 12, implicit-def $eflags, debug-location !16 + $rdi = MOV64rr $rbx, debug-location !17 + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !17 + CALL64pcrel32 @getlong, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax, debug-location !18 + CMP64ri8 killed renamable $rax, 0, implicit-def $eflags, debug-location !18 + JCC_1 %bb.2, 4, implicit $eflags, debug-location !20 + + bb.1.if.then: + liveins: $rbx, $r14 + + renamable $rbx = ADD64ri32 killed renamable $rbx, 1, implicit-def $eflags, debug-location !21 + + bb.2.if.end: + liveins: $rbx, $r14 + + DBG_PHI $rbx, 1 + $rax = COPY $rbx + $rbx = MOV64ri 0 + DBG_INSTR_REF 1, 0, !12, !DIExpression(), debug-location !13 + + ; This sequence should mark the contents of rbx on block entry as being the + ; value for the variable at this DBG_INSTR_REF. We've force it to be in + ; $rax now, so we should see a DBG_VALUE for rax: + ; CHECK: DBG_PHI $rbx, 1 + ; CHECK-NEXT: $rax = COPY $rbx + ; CHECK-NEXT: $rbx = MOV64ri 0 + ; CHECK-NEXT: DBG_INSTR_REF 1, 0 + ; CHECK-NEXT: DBG_VALUE $rax, $noreg + + $rbx = COPY $rax + renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $r14, implicit-def $eflags, debug-location !23 + DBG_VALUE $rbx, $noreg, !12, !DIExpression(), debug-location !13 + $rdi = MOV64rr $rbx, debug-location !24 + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !24 + $eax = MOV32rr $ebx, implicit killed $rbx, debug-location !26 + $rsp = frame-destroy ADD64ri8 $rsp, 8, implicit-def dead $eflags, debug-location !26 + CFI_INSTRUCTION def_cfa_offset 24, debug-location !26 + $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !26 + CFI_INSTRUCTION def_cfa_offset 16, debug-location !26 + $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !26 + CFI_INSTRUCTION def_cfa_offset 8, debug-location !26 + RETQ implicit $eax, debug-location !26 + +... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-merging-in-ldv.mir b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-merging-in-ldv.mir new file mode 100644 index 0000000000000..b46c4284c31ac --- /dev/null +++ b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-merging-in-ldv.mir @@ -0,0 +1,199 @@ +# RUN: llc %s -o - -mtriple=x86_64-unknown-unknown \ +# RUN: -experimental-debug-variable-locations -run-pass=livedebugvalues \ +# RUN: | FileCheck %s --check-prefix=CHECK +# +# Test that, in a scenario where tail duplication has duplicated DBG_PHI +# instructions, we can reconstruct the value to refer to. This includes cases +# where the DBG_PHIs refer to the same value, to values that later merge, and +# value that become unavailable. +--- | + ; ModuleID = 'before.mir' + source_filename = "test.c" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + define dso_local i32 @foo(i64 %bar, i64 %baz) !dbg !7 { + entry: + call void @llvm.dbg.value(metadata i64 %bar, metadata !12, metadata !DIExpression()), !dbg !13 + call void @llvm.dbg.value(metadata i64 %baz, metadata !14, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %bar), !dbg !15 + %add = add nsw i64 %bar, 12, !dbg !16 + call void @llvm.dbg.value(metadata i64 %add, metadata !12, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %add), !dbg !17 + %add1 = add nsw i64 %baz, 1, !dbg !18 + call void @llvm.dbg.value(metadata i64 %add1, metadata !14, metadata !DIExpression()), !dbg !13 + %call = call i64 @getlong(), !dbg !19 + %tobool = icmp ne i64 %call, 0, !dbg !19 + br i1 %tobool, label %if.then, label %if.else, !dbg !21 + + if.then: ; preds = %entry + %add2 = add nsw i64 %add, 1, !dbg !22 + call void @llvm.dbg.value(metadata i64 %add2, metadata !12, metadata !DIExpression()), !dbg !13 + br label %if.end, !dbg !24 + + if.else: ; preds = %entry + %add3 = add nsw i64 %add, 2, !dbg !25 + call void @llvm.dbg.value(metadata i64 %add3, metadata !12, metadata !DIExpression()), !dbg !13 + br label %if.end + + if.end: ; preds = %if.else, %if.then + %bar.addr.0 = phi i64 [ %add2, %if.then ], [ %add3, %if.else ], !dbg !27 + call void @llvm.dbg.value(metadata i64 %bar.addr.0, metadata !12, metadata !DIExpression()), !dbg !13 + %add4 = add nsw i64 %bar.addr.0, %add1, !dbg !28 + call void @llvm.dbg.value(metadata i64 %add4, metadata !12, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %add4), !dbg !29 + %conv = trunc i64 %add4 to i32, !dbg !30 + ret i32 %conv, !dbg !31 + } + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.declare(metadata, metadata, metadata) + + declare dso_local void @ext(i64) + + declare dso_local i64 @getlong() + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + !llvm.ident = !{!6} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: ".") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = !{!"clang"} + !7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !8 = !DISubroutineType(types: !9) + !9 = !{!10, !11, !11} + !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !11 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) + !12 = !DILocalVariable(name: "bar", arg: 1, scope: !7, file: !1, line: 3, type: !11) + !13 = !DILocation(line: 0, scope: !7) + !14 = !DILocalVariable(name: "baz", arg: 2, scope: !7, file: !1, line: 3, type: !11) + !15 = !DILocation(line: 4, column: 3, scope: !7) + !16 = !DILocation(line: 5, column: 7, scope: !7) + !17 = !DILocation(line: 6, column: 3, scope: !7) + !18 = !DILocation(line: 7, column: 7, scope: !7) + !19 = !DILocation(line: 9, column: 7, scope: !20) + !20 = distinct !DILexicalBlock(scope: !7, file: !1, line: 9, column: 7) + !21 = !DILocation(line: 9, column: 7, scope: !7) + !22 = !DILocation(line: 10, column: 9, scope: !23) + !23 = distinct !DILexicalBlock(scope: !20, file: !1, line: 9, column: 18) + !24 = !DILocation(line: 11, column: 3, scope: !23) + !25 = !DILocation(line: 12, column: 9, scope: !26) + !26 = distinct !DILexicalBlock(scope: !20, file: !1, line: 11, column: 10) + !27 = !DILocation(line: 0, scope: !20) + !28 = !DILocation(line: 15, column: 7, scope: !7) + !29 = !DILocation(line: 16, column: 3, scope: !7) + !30 = !DILocation(line: 17, column: 10, scope: !7) + !31 = !DILocation(line: 17, column: 3, scope: !7) + +... +--- +name: foo +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$rdi' } + - { reg: '$rsi' } +frameInfo: + stackSize: 24 + offsetAdjustment: -24 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 16 +fixedStack: + - { id: 0, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$rbx' } + - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '$r14' } +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.2, %bb.1 + liveins: $rdi, $rsi, $r14, $rbx + + frame-setup PUSH64r killed $r14, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 16 + frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 24 + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 32 + CFI_INSTRUCTION offset $rbx, -24 + CFI_INSTRUCTION offset $r14, -16 + $r14 = MOV64rr $rsi + $rbx = MOV64rr $rdi + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !15 + renamable $rbx = ADD64ri32 killed renamable $rbx, 12, implicit-def $eflags, debug-location !16 + $rdi = MOV64rr $rbx, debug-location !17 + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !17 + renamable $r14 = ADD64ri32 killed renamable $r14, 1, implicit-def $eflags, debug-location !18 + CALL64pcrel32 @getlong, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax, debug-location !19 + CMP64ri8 killed renamable $rax, 0, implicit-def $eflags, debug-location !19 + JCC_1 %bb.1, 5, implicit $eflags, debug-location !21 + + bb.2.if.else: + liveins: $rbx, $r14, $rax + + renamable $rbx = ADD64ri32 killed renamable $rbx, 2, implicit-def $eflags, debug-location !25 + DBG_PHI $r14, 1 + DBG_PHI $rbx, 2 + DBG_PHI $rax, 3 + $rax = MOV64ri 0 + JMP_1 %bb.3 + + bb.1.if.then: + liveins: $rbx, $r14, $rax + + renamable $rbx = ADD64ri32 killed renamable $rbx, 1, implicit-def $eflags, debug-location !22 + DBG_PHI $r14, 1 + DBG_PHI $rbx, 2 + DBG_PHI $rax, 3 + + bb.3.if.end: + liveins: $rbx, $r14 + + DBG_INSTR_REF 1, 0, !14, !DIExpression(), debug-location !13 + DBG_INSTR_REF 2, 0, !12, !DIExpression(), debug-location !13 + DBG_INSTR_REF 3, 0, !12, !DIExpression(), debug-location !13 + + ; Value number 1 is live-through the above control flow from the two + ; DBG_PHIs: + ; CHECK: DBG_INSTR_REF 1, 0 + ; CHECK-NEXT: DBG_VALUE $r14 + ; + ; While value number 2 has different defs that merge on entry to bb.3. + ; These are both in $rbx though, and we should find its location: + ; CHECK: DBG_INSTR_REF 2, 0 + ; CHECK-NEXT: DBG_VALUE $rbx + ; + ; Value number 3 cannot be resolved because $rax is clobbered in bb.2, + ; meaning the merged value in bb.3 is incorrect. It should produce a + ; DBG_VALUE $noreg. + ; CHECK: DBG_INSTR_REF 3, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + + renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $r14, implicit-def $eflags, debug-location !28 + DBG_INSTR_REF 2, 0, !12, !DIExpression(), debug-location !13 + + ; After clobbering rbx, the variable location should not be available. + ; CHECK: DBG_INSTR_REF 2, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + + $rdi = MOV64rr $rbx, debug-location !29 + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !29 + $eax = MOV32rr $ebx, implicit killed $rbx, debug-location !31 + $rsp = frame-destroy ADD64ri8 $rsp, 8, implicit-def dead $eflags, debug-location !31 + CFI_INSTRUCTION def_cfa_offset 24, debug-location !31 + $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !31 + CFI_INSTRUCTION def_cfa_offset 16, debug-location !31 + $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !31 + CFI_INSTRUCTION def_cfa_offset 8, debug-location !31 + RETQ implicit $eax, debug-location !31 + +... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-with-loops.mir b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-with-loops.mir new file mode 100644 index 0000000000000..cba605089aa93 --- /dev/null +++ b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-with-loops.mir @@ -0,0 +1,205 @@ +# RUN: llc %s -o - -mtriple=x86_64-unknown-unknown \ +# RUN: -experimental-debug-variable-locations -run-pass=livedebugvalues \ +# RUN: | FileCheck %s --check-prefix=CHECK +# +# Copy of dbg-phis-merging-in-ldv.mir, where I've added a loop in between the +# DBG_PHI "definitions" of values, and the DBG_INSTR_REFs where they're used. +# We should be able to traverse this obstacle. +--- | + ; ModuleID = 'before.mir' + source_filename = "test.c" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + define dso_local i32 @foo(i64 %bar, i64 %baz) !dbg !7 { + entry: + call void @llvm.dbg.value(metadata i64 %bar, metadata !12, metadata !DIExpression()), !dbg !13 + call void @llvm.dbg.value(metadata i64 %baz, metadata !14, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %bar), !dbg !15 + %add = add nsw i64 %bar, 12, !dbg !16 + call void @llvm.dbg.value(metadata i64 %add, metadata !12, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %add), !dbg !17 + %add1 = add nsw i64 %baz, 1, !dbg !18 + call void @llvm.dbg.value(metadata i64 %add1, metadata !14, metadata !DIExpression()), !dbg !13 + %call = call i64 @getlong(), !dbg !19 + %tobool = icmp ne i64 %call, 0, !dbg !19 + br i1 %tobool, label %if.then, label %if.else, !dbg !21 + + if.then: ; preds = %entry + %add2 = add nsw i64 %add, 1, !dbg !22 + call void @llvm.dbg.value(metadata i64 %add2, metadata !12, metadata !DIExpression()), !dbg !13 + br label %if.end, !dbg !24 + + if.else: ; preds = %entry + %add3 = add nsw i64 %add, 2, !dbg !25 + call void @llvm.dbg.value(metadata i64 %add3, metadata !12, metadata !DIExpression()), !dbg !13 + br label %if.end + + if.end: ; preds = %if.else, %if.then + %bar.addr.0 = phi i64 [ %add2, %if.then ], [ %add3, %if.else ], !dbg !27 + call void @llvm.dbg.value(metadata i64 %bar.addr.0, metadata !12, metadata !DIExpression()), !dbg !13 + %add4 = add nsw i64 %bar.addr.0, %add1, !dbg !28 + call void @llvm.dbg.value(metadata i64 %add4, metadata !12, metadata !DIExpression()), !dbg !13 + call void @ext(i64 %add4), !dbg !29 + %conv = trunc i64 %add4 to i32, !dbg !30 + ret i32 %conv, !dbg !31 + } + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 + + declare dso_local void @ext(i64) + + declare dso_local i64 @getlong() + + ; Function Attrs: nounwind readnone speculatable willreturn + declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + + attributes #0 = { nounwind readnone speculatable willreturn } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + !llvm.ident = !{!6} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) + !1 = !DIFile(filename: "test.c", directory: ".") + !2 = !{} + !3 = !{i32 7, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 4} + !6 = !{!"clang"} + !7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) + !8 = !DISubroutineType(types: !9) + !9 = !{!10, !11, !11} + !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !11 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) + !12 = !DILocalVariable(name: "bar", arg: 1, scope: !7, file: !1, line: 3, type: !11) + !13 = !DILocation(line: 0, scope: !7) + !14 = !DILocalVariable(name: "baz", arg: 2, scope: !7, file: !1, line: 3, type: !11) + !15 = !DILocation(line: 4, column: 3, scope: !7) + !16 = !DILocation(line: 5, column: 7, scope: !7) + !17 = !DILocation(line: 6, column: 3, scope: !7) + !18 = !DILocation(line: 7, column: 7, scope: !7) + !19 = !DILocation(line: 9, column: 7, scope: !20) + !20 = distinct !DILexicalBlock(scope: !7, file: !1, line: 9, column: 7) + !21 = !DILocation(line: 9, column: 7, scope: !7) + !22 = !DILocation(line: 10, column: 9, scope: !23) + !23 = distinct !DILexicalBlock(scope: !20, file: !1, line: 9, column: 18) + !24 = !DILocation(line: 11, column: 3, scope: !23) + !25 = !DILocation(line: 12, column: 9, scope: !26) + !26 = distinct !DILexicalBlock(scope: !20, file: !1, line: 11, column: 10) + !27 = !DILocation(line: 0, scope: !20) + !28 = !DILocation(line: 15, column: 7, scope: !7) + !29 = !DILocation(line: 16, column: 3, scope: !7) + !30 = !DILocation(line: 17, column: 10, scope: !7) + !31 = !DILocation(line: 17, column: 3, scope: !7) + +... +--- +name: foo +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$rdi' } + - { reg: '$rsi' } +frameInfo: + stackSize: 24 + offsetAdjustment: -24 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 16 +fixedStack: + - { id: 0, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '$rbx' } + - { id: 1, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '$r14' } +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.2, %bb.1 + liveins: $rdi, $rsi, $r14, $rbx + + frame-setup PUSH64r killed $r14, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 16 + frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 24 + frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + CFI_INSTRUCTION def_cfa_offset 32 + CFI_INSTRUCTION offset $rbx, -24 + CFI_INSTRUCTION offset $r14, -16 + $r14 = MOV64rr $rsi + $rbx = MOV64rr $rdi + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !15 + renamable $rbx = ADD64ri32 killed renamable $rbx, 12, implicit-def $eflags, debug-location !16 + $rdi = MOV64rr $rbx, debug-location !17 + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !17 + renamable $r14 = ADD64ri32 killed renamable $r14, 1, implicit-def $eflags, debug-location !18 + CALL64pcrel32 @getlong, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax, debug-location !19 + CMP64ri8 killed renamable $rax, 0, implicit-def $eflags, debug-location !19 + JCC_1 %bb.1, 5, implicit $eflags, debug-location !21 + + bb.2.if.else: + liveins: $rbx, $r14, $rax + + renamable $rbx = ADD64ri32 killed renamable $rbx, 2, implicit-def $eflags, debug-location !25 + DBG_PHI $r14, 1 + DBG_PHI $rbx, 2 + DBG_PHI $rax, 3 + $rax = MOV64ri 0 + JMP_1 %bb.3 + + bb.1.if.then: + liveins: $rbx, $r14, $rax + + renamable $rbx = ADD64ri32 killed renamable $rbx, 1, implicit-def $eflags, debug-location !22 + DBG_PHI $r14, 1 + DBG_PHI $rbx, 2 + DBG_PHI $rax, 3 + + bb.3: + $r15 = MOV64ri 0 + CMP64ri8 $r15, 0, implicit-def $eflags, debug-location !19 + JCC_1 %bb.3, 5, implicit $eflags, debug-location !21 + + bb.4: + liveins: $rbx, $r14 + + DBG_INSTR_REF 1, 0, !14, !DIExpression(), debug-location !13 + DBG_INSTR_REF 2, 0, !12, !DIExpression(), debug-location !13 + DBG_INSTR_REF 3, 0, !12, !DIExpression(), debug-location !13 + + ; Value number 1 is live-through the above control flow from the two + ; DBG_PHIs: + ; CHECK: DBG_INSTR_REF 1, 0 + ; CHECK-NEXT: DBG_VALUE $r14 + ; + ; While value number 2 has different defs that merge on entry to bb.3. + ; These are both in $rbx though, and we should find its location: + ; CHECK: DBG_INSTR_REF 2, 0 + ; CHECK-NEXT: DBG_VALUE $rbx + ; + ; Value number 3 cannot be resolved because $rax is clobbered in bb.2, + ; meaning the merged value in bb.3 is incorrect. It should produce a + ; DBG_VALUE $noreg. + ; CHECK: DBG_INSTR_REF 3, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + + renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $r14, implicit-def $eflags, debug-location !28 + DBG_INSTR_REF 2, 0, !12, !DIExpression(), debug-location !13 + + ; After clobbering rbx, the variable location should not be available. + ; CHECK: DBG_INSTR_REF 2, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + + $rdi = MOV64rr $rbx, debug-location !29 + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, debug-location !29 + $eax = MOV32rr $ebx, implicit killed $rbx, debug-location !31 + $rsp = frame-destroy ADD64ri8 $rsp, 8, implicit-def dead $eflags, debug-location !31 + CFI_INSTRUCTION def_cfa_offset 24, debug-location !31 + $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !31 + CFI_INSTRUCTION def_cfa_offset 16, debug-location !31 + $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !31 + CFI_INSTRUCTION def_cfa_offset 8, debug-location !31 + RETQ implicit $eax, debug-location !31 + +... From c3d3defd11a73d86f2ec0a5e1d8af36c5486be49 Mon Sep 17 00:00:00 2001 From: Dylan Fleming Date: Tue, 29 Jun 2021 14:00:49 +0100 Subject: [PATCH 176/619] [SVE] Added CodeGen support for inserting an element into a predicate vector Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D104722 --- .../Target/AArch64/AArch64ISelLowering.cpp | 19 +- .../CodeGen/AArch64/sve-insert-element.ll | 176 ++++++++++++++++++ 2 files changed, 194 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9ceb91ea8017a..b3edefe550f81 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1197,6 +1197,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); // There are no legal MVT::nxv16f## based types. if (VT != MVT::nxv16i1) { @@ -10147,11 +10148,27 @@ SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // Check for non-constant or out of range lane. EVT VT = Op.getOperand(0).getValueType(); + + if (VT.getScalarType() == MVT::i1) { + EVT VectorVT = getPromotedVTForPredicate(VT); + SDLoc DL(Op); + SDValue ExtendedVector = + DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT); + SDValue ExtendedValue = + DAG.getAnyExtOrTrunc(Op.getOperand(1), DL, + VectorVT.getScalarType().getSizeInBits() < 32 + ? MVT::i32 + : VectorVT.getScalarType()); + ExtendedVector = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector, + ExtendedValue, Op.getOperand(2)); + return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT); + } + ConstantSDNode *CI = dyn_cast(Op.getOperand(2)); if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) return SDValue(); - // Insertion/extraction are legal for V128 types. if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll index da56ae9ba027a..4ef66be15ac63 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -352,3 +352,179 @@ define @test_insert_with_index_nxv2f64(double %d, i64 %idx %res = insertelement undef, double %d, i64 %idx ret %res } + +;Predicate insert +define @test_predicate_insert_2xi1_immediate ( %val, i1 %elt) { +; CHECK-LABEL: test_predicate_insert_2xi1_immediate: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: mov z0.d, p0/m, x0 +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret + %res = insertelement %val, i1 %elt, i32 0 + ret %res +} + +define @test_predicate_insert_4xi1_immediate ( %val, i1 %elt) { +; CHECK-LABEL: test_predicate_insert_4xi1_immediate: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s +; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z0.s, p2/m, w0 +; CHECK-NEXT: and z0.s, z0.s, #0x1 +; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0 +; CHECK-NEXT: ret + %res = insertelement %val, i1 %elt, i32 2 + ret %res +} + +define @test_predicate_insert_8xi1_immediate ( %val, i32 %idx) { +; CHECK-LABEL: test_predicate_insert_8xi1_immediate: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 +; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, z2.h +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov z1.h, p0/m, w8 +; CHECK-NEXT: and z1.h, z1.h, #0x1 +; CHECK-NEXT: cmpne p0.h, p1/z, z1.h, #0 +; CHECK-NEXT: ret + %res = insertelement %val, i1 1, i32 %idx + ret %res +} + +define @test_predicate_insert_16xi1_immediate ( %val) { +; CHECK-LABEL: test_predicate_insert_16xi1_immediate: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: mov z1.b, w9 +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b +; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z0.b, p2/m, w8 +; CHECK-NEXT: and z0.b, z0.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 +; CHECK-NEXT: ret + %res = insertelement %val, i1 0, i32 4 + ret %res +} + + +define @test_predicate_insert_2xi1( %val, i1 %elt, i32 %idx) { +; CHECK-LABEL: test_predicate_insert_2xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z1.d +; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: mov z0.d, p2/m, x0 +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: cmpne p0.d, p1/z, z0.d, #0 +; CHECK-NEXT: ret + %res = insertelement %val, i1 %elt, i32 %idx + ret %res +} + +define @test_predicate_insert_4xi1( %val, i1 %elt, i32 %idx) { +; CHECK-LABEL: test_predicate_insert_4xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s +; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z0.s, p2/m, w0 +; CHECK-NEXT: and z0.s, z0.s, #0x1 +; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0 +; CHECK-NEXT: ret + %res = insertelement %val, i1 %elt, i32 %idx + ret %res +} +define @test_predicate_insert_8xi1( %val, i1 %elt, i32 %idx) { +; CHECK-LABEL: test_predicate_insert_8xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h +; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z0.h, p2/m, w0 +; CHECK-NEXT: and z0.h, z0.h, #0x1 +; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 +; CHECK-NEXT: ret + %res = insertelement %val, i1 %elt, i32 %idx + ret %res +} + +define @test_predicate_insert_16xi1( %val, i1 %elt, i32 %idx) { +; CHECK-LABEL: test_predicate_insert_16xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b +; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z0.b, p2/m, w0 +; CHECK-NEXT: and z0.b, z0.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 +; CHECK-NEXT: ret + %res = insertelement %val, i1 %elt, i32 %idx + ret %res +} + +define @test_predicate_insert_32xi1( %val, i1 %elt, i32 %idx) { +; CHECK-LABEL: test_predicate_insert_32xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: rdvl x10, #2 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: sub x10, x10, #1 // =1 +; CHECK-NEXT: cmp x9, x10 +; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 +; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: st1b { z0.b }, p1, [x8, #1, mul vl] +; CHECK-NEXT: st1b { z1.b }, p1, [sp] +; CHECK-NEXT: strb w0, [x8, x9] +; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp] +; CHECK-NEXT: ld1b { z1.b }, p1/z, [x8, #1, mul vl] +; CHECK-NEXT: and z0.b, z0.b, #0x1 +; CHECK-NEXT: and z1.b, z1.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 +; CHECK-NEXT: cmpne p1.b, p1/z, z1.b, #0 +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = insertelement %val, i1 %elt, i32 %idx + ret %res +} From 9de63367d8a7ab9e2588de4eae5a5df6a1abd90f Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 29 Jun 2021 15:20:11 +0100 Subject: [PATCH 177/619] Revert "[NFC] Remove shadowed variable in InnerLoopVectorizer::createInductionVariable" This reverts commit 9dde51416209a5552156384b9c2b08b676818d70. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c79c57cb2bdb5..f99352a3f075f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3071,13 +3071,7 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, if (!Latch) Latch = Header; - // Set the Builder to a valid Block pointer as the existing one could get - // deleted below. - Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); - - IRBuilder<>::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(&*Header->getFirstInsertionPt()); - + IRBuilder<> Builder(&*Header->getFirstInsertionPt()); Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction); setDebugLocFromInst(Builder, OldInst); auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index"); From 3dae01911b6902f00e80eb7ebdc2f55bb28f173e Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Tue, 29 Jun 2021 16:35:07 +0200 Subject: [PATCH 178/619] [analyzer] Make CheckerManager::hasPathSensitiveCheckers() complete again It turns out that the CheckerManager::hasPathSensitiveCheckers() missed checking for the BeginFunctionCheckers. It seems like other callbacks are also missing: - ObjCMessageNilCheckers - BeginFunctionCheckers - NewAllocatorCheckers - PointerEscapeCheckers - EndOfTranslationUnitCheckers In this patch, I wanted to use a fold-expression, but until C++17 arrives we are left with the old-school method. When I tried to write a unittest I observed an interesting behavior. I subscribed only to the BeginFunction event, it was not fired. However, when I also defined the PreCall with an empty handler, suddenly both fired. I could add this test demonstrating the issue, but I don't think it would serve much value in a long run. I don't expect regressions for this. However, I think it would be great to enforce the completeness of this list in a runtime check. I could not come up with a solution for this though. PS: Thank you @Szelethus for helping me debugging this. Differential Revision: https://reviews.llvm.org/D105101 Reviewed by: vsavchenko --- .../StaticAnalyzer/Core/CheckerManager.cpp | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp index 86cecf6524f03..e09399a83589e 100644 --- a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp @@ -33,21 +33,20 @@ using namespace clang; using namespace ento; bool CheckerManager::hasPathSensitiveCheckers() const { - return !StmtCheckers.empty() || - !PreObjCMessageCheckers.empty() || - !PostObjCMessageCheckers.empty() || - !PreCallCheckers.empty() || - !PostCallCheckers.empty() || - !LocationCheckers.empty() || - !BindCheckers.empty() || - !EndAnalysisCheckers.empty() || - !EndFunctionCheckers.empty() || - !BranchConditionCheckers.empty() || - !LiveSymbolsCheckers.empty() || - !DeadSymbolsCheckers.empty() || - !RegionChangesCheckers.empty() || - !EvalAssumeCheckers.empty() || - !EvalCallCheckers.empty(); + const auto IfAnyAreNonEmpty = [](const auto &... Callbacks) -> bool { + bool Result = false; + // FIXME: Use fold expressions in C++17. + LLVM_ATTRIBUTE_UNUSED int Unused[]{0, (Result |= !Callbacks.empty())...}; + return Result; + }; + return IfAnyAreNonEmpty( + StmtCheckers, PreObjCMessageCheckers, ObjCMessageNilCheckers, + PostObjCMessageCheckers, PreCallCheckers, PostCallCheckers, + LocationCheckers, BindCheckers, EndAnalysisCheckers, + BeginFunctionCheckers, EndFunctionCheckers, BranchConditionCheckers, + NewAllocatorCheckers, LiveSymbolsCheckers, DeadSymbolsCheckers, + RegionChangesCheckers, PointerEscapeCheckers, EvalAssumeCheckers, + EvalCallCheckers, EndOfTranslationUnitCheckers); } void CheckerManager::finishedCheckerRegistration() { From a33e12801279a947c74fdee2655b24480941fb39 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 24 Jun 2021 12:04:46 -0500 Subject: [PATCH 179/619] [InstCombine] Gracefully handle an alloca outside the alloca-AS While we might eventually want to disallow allocas that do not have the alloca-AS set, it seems undesirable to crash on them. Add a cast when required so that we can support such allocas (at least here). Differential Revision: https://reviews.llvm.org/D104866 --- .../InstCombineLoadStoreAlloca.cpp | 14 +++++++--- .../InstCombine/alloca-in-non-alloca-as.ll | 27 +++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 61b0bc7320be2..73fdcc05de0af 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -199,13 +199,21 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = {NullIdx, NullIdx}; - Instruction *GEP = GetElementPtrInst::CreateInBounds( + Instruction *NewI = GetElementPtrInst::CreateInBounds( NewTy, New, Idx, New->getName() + ".sub"); - IC.InsertNewInstBefore(GEP, *It); + IC.InsertNewInstBefore(NewI, *It); + + // Gracefully handle allocas in other address spaces. + if (AI.getType()->getPointerAddressSpace() != + NewI->getType()->getPointerAddressSpace()) { + NewI = + CastInst::CreatePointerBitCastOrAddrSpaceCast(NewI, AI.getType()); + IC.InsertNewInstBefore(NewI, *It); + } // Now make everything use the getelementptr instead of the original // allocation. - return IC.replaceInstUsesWith(AI, GEP); + return IC.replaceInstUsesWith(AI, NewI); } } diff --git a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll new file mode 100644 index 0000000000000..c1c45b8e5ab34 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Gracefully handle the alloca that is not in the alloca AS (=5) + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" +target triple = "amdgcn-amd-amdhsa" + +declare void @use(i8*, i32**) + +define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) { +; CHECK-LABEL: @__omp_offloading_802_ea0109_main_l8( +; CHECK-NEXT: .master: +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32*, align 1, addrspace(5) +; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i32* addrspace(5)* [[TMP0]] to i8 addrspace(5)* +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i8 addrspace(5)* [[DOTSUB]] to i8* +; CHECK-NEXT: [[A_ON_STACK:%.*]] = addrspacecast i32* addrspace(5)* [[TMP0]] to i32** +; CHECK-NEXT: call void @use(i8* [[TMP1]], i32** [[A_ON_STACK]]) +; CHECK-NEXT: ret void +; +.master: + %0 = alloca i8, i64 8, align 1 + %a_on_stack = bitcast i8* %0 to i32** + store i32* undef, i32** %a_on_stack, align 8 + call void @use(i8* %0, i32** %a_on_stack) + ret void +} From 8dc9bb6d85ac04a07bab1a3309702c7bf7944e1e Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 25 Jun 2021 19:19:19 -0500 Subject: [PATCH 180/619] [Attributor][NFC] Clang format --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index d6e9e19b2edbc..25e0ff5599d43 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -8396,9 +8396,7 @@ AACallGraphNode *AACallEdgeIterator::operator*() const { &A.getOrCreateAAFor(IRPosition::function(**I)))); } -void AttributorCallGraph::print() { - llvm::WriteGraph(outs(), this); -} +void AttributorCallGraph::print() { llvm::WriteGraph(outs(), this); } const char AAReturnedValues::ID = 0; const char AANoUnwind::ID = 0; From 457bd5c8d52a110fac43d538a03e30ef49099974 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 10 Jun 2021 17:13:22 -0500 Subject: [PATCH 181/619] [Attributor] Teach AAPotentialValues about constant select conditions There was a TODO but now we actually check if the select condition is assumed constant and only look at the relevant operand. --- .../Transforms/IPO/AttributorAttributes.cpp | 49 ++++++++++++++----- .../Transforms/Attributor/value-simplify.ll | 31 ++++++++++++ 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 25e0ff5599d43..5bcdf636ca1d7 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -7823,23 +7823,46 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) return indicatePessimisticFixpoint(); - // TODO: Use assumed simplified condition value - auto &LHSAA = A.getAAFor(*this, IRPosition::value(*LHS), - DepClassTy::REQUIRED); - if (!LHSAA.isValidState()) - return indicatePessimisticFixpoint(); + bool UsedAssumedInformation = false; + Optional C = A.getAssumedConstant(*SI->getCondition(), *this, + UsedAssumedInformation); + + // Check if we only need one operand. + bool OnlyLeft = false, OnlyRight = false; + if (C.hasValue() && *C && (*C)->isOneValue()) + OnlyLeft = true; + else if (C.hasValue() && *C && (*C)->isZeroValue()) + OnlyRight = true; + + const AAPotentialValues *LHSAA = nullptr, *RHSAA = nullptr; + if (!OnlyRight) { + LHSAA = &A.getAAFor(*this, IRPosition::value(*LHS), + DepClassTy::REQUIRED); + if (!LHSAA->isValidState()) + return indicatePessimisticFixpoint(); + } + if (!OnlyLeft) { + RHSAA = &A.getAAFor(*this, IRPosition::value(*RHS), + DepClassTy::REQUIRED); + if (!RHSAA->isValidState()) + return indicatePessimisticFixpoint(); + } - auto &RHSAA = A.getAAFor(*this, IRPosition::value(*RHS), - DepClassTy::REQUIRED); - if (!RHSAA.isValidState()) - return indicatePessimisticFixpoint(); + if (!LHSAA || !RHSAA) { + // select (true/false), lhs, rhs + auto *OpAA = LHSAA ? LHSAA : RHSAA; - if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) + if (OpAA->undefIsContained()) + unionAssumedWithUndef(); + else + unionAssumed(*OpAA); + + } else if (LHSAA->undefIsContained() && RHSAA->undefIsContained()) { // select i1 *, undef , undef => undef unionAssumedWithUndef(); - else { - unionAssumed(LHSAA); - unionAssumed(RHSAA); + } else { + unionAssumed(*LHSAA); + unionAssumed(*RHSAA); } return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index db4b8b6ef88f3..1d8b468e43133 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -1073,6 +1073,37 @@ b: ret i1 %cmp2 } +define i32 @test_select(i32 %c) { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@test_select +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @select() #[[ATTR1]] +; IS__TUNIT____-NEXT: ret i32 [[CALL]] +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@test_select +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__CGSCC____-NEXT: ret i32 42 +; + %call = call i32 @select(i1 1, i32 42, i32 %c) + ret i32 %call +} + +define internal i32 @select(i1 %a, i32 %b, i32 %c) { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@select +; IS__TUNIT____-SAME: () #[[ATTR1]] { +; IS__TUNIT____-NEXT: ret i32 42 +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@select +; IS__CGSCC____-SAME: () #[[ATTR1]] { +; IS__CGSCC____-NEXT: ret i32 undef +; + %s = select i1 %a, i32 %b, i32 %c + ret i32 %s +} + define i1 @icmp() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@icmp From dcbe58d94c843e443113bff5f60fbcccc4168714 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 26 Jun 2021 13:10:13 -0500 Subject: [PATCH 182/619] [Attributor][NFCI] Remove unneeded namespace --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 5bcdf636ca1d7..78c9a6b1ac1a5 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -141,8 +141,6 @@ PIPE_OPERATOR(AAFunctionReachability) #undef PIPE_OPERATOR } // namespace llvm -namespace { - /// Get pointer operand of memory accessing instruction. If \p I is /// not a memory accessing instruction, return nullptr. If \p AllowVolatile, /// is set to false and the instruction is volatile, return nullptr. @@ -6468,8 +6466,6 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U, removeAssumedBits(NO_WRITES); } -} // namespace - /// -------------------- Memory Locations Attributes --------------------------- /// Includes read-none, argmemonly, inaccessiblememonly, /// inaccessiblememorargmemonly From 7af91a2b8f06cfa603eff1514c8ee38bdf1811f1 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 26 Jun 2021 13:20:28 -0500 Subject: [PATCH 183/619] [Attributor][NFCI] Make the state of AAValueSimplify explicit As we have done with other states we want the AAValueSimplify state to be explicit to use it more easily in our helpers. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 82 ++++++++++++++++++- llvm/lib/Transforms/IPO/Attributor.cpp | 4 +- .../Transforms/IPO/AttributorAttributes.cpp | 79 ++++++++---------- 3 files changed, 116 insertions(+), 49 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 5449003a98b34..393160d8b2e83 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -3341,10 +3341,86 @@ struct AANoCapture static const char ID; }; +struct ValueSimplifyStateType : public AbstractState { + + ValueSimplifyStateType(Type *Ty) : Ty(Ty) {} + + static ValueSimplifyStateType getBestState(Type *Ty) { + return ValueSimplifyStateType(Ty); + } + static ValueSimplifyStateType getBestState(const ValueSimplifyStateType &VS) { + return getBestState(VS.Ty); + } + + /// Return the worst possible representable state. + static ValueSimplifyStateType getWorstState(Type *Ty) { + ValueSimplifyStateType DS(Ty); + DS.indicatePessimisticFixpoint(); + return DS; + } + static ValueSimplifyStateType + getWorstState(const ValueSimplifyStateType &VS) { + return getWorstState(VS.Ty); + } + + /// See AbstractState::isValidState(...) + bool isValidState() const override { return BS.isValidState(); } + + /// See AbstractState::isAtFixpoint(...) + bool isAtFixpoint() const override { return BS.isAtFixpoint(); } + + /// Return the assumed state encoding. + ValueSimplifyStateType getAssumed() { return *this; } + const ValueSimplifyStateType &getAssumed() const { return *this; } + + /// See AbstractState::indicatePessimisticFixpoint(...) + ChangeStatus indicatePessimisticFixpoint() override { + return BS.indicatePessimisticFixpoint(); + } + + /// See AbstractState::indicateOptimisticFixpoint(...) + ChangeStatus indicateOptimisticFixpoint() override { + return BS.indicateOptimisticFixpoint(); + } + + /// "Clamp" this state with \p PVS. + ValueSimplifyStateType operator^=(const ValueSimplifyStateType &VS) { + BS ^= VS.BS; + unionAssumed(VS.SimplifiedAssociatedValue); + return *this; + } + + bool operator==(const ValueSimplifyStateType &RHS) const { + if (isValidState() != RHS.isValidState()) + return false; + if (!isValidState() && !RHS.isValidState()) + return true; + return SimplifiedAssociatedValue == RHS.SimplifiedAssociatedValue; + } + +protected: + /// The type of the original value. + Type *Ty; + + /// Merge \p Other into the currently assumed simplified value + bool unionAssumed(Optional Other); + + /// Helper to track validity and fixpoint + BooleanState BS; + + /// An assumed simplified value. Initially, it is set to Optional::None, which + /// means that the value is not clear under current assumption. If in the + /// pessimistic state, getAssumedSimplifiedValue doesn't return this value but + /// returns orignal associated value. + Optional SimplifiedAssociatedValue; +}; + /// An abstract interface for value simplify abstract attribute. -struct AAValueSimplify : public StateWrapper { - using Base = StateWrapper; - AAValueSimplify(const IRPosition &IRP, Attributor &A) : Base(IRP) {} +struct AAValueSimplify + : public StateWrapper { + using Base = StateWrapper; + AAValueSimplify(const IRPosition &IRP, Attributor &A) + : Base(IRP, IRP.getAssociatedType()) {} /// Create an abstract attribute view for the position \p IRP. static AAValueSimplify &createForPosition(const IRPosition &IRP, diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 47d1bbc607fba..7114862653749 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -579,7 +579,7 @@ Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA, AA, IRPosition::value(V, AA.getCallBaseContext()), DepClassTy::NONE); Optional SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(*this); - bool IsKnown = ValueSimplifyAA.isKnown(); + bool IsKnown = ValueSimplifyAA.isAtFixpoint(); UsedAssumedInformation |= !IsKnown; if (!SimplifiedV.hasValue()) { recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); @@ -618,7 +618,7 @@ Attributor::getAssumedSimplified(const IRPosition &IRP, getOrCreateAAFor(IRP, AA, DepClassTy::NONE); Optional SimplifiedV = ValueSimplifyAA.getAssumedSimplifiedValue(*this); - bool IsKnown = ValueSimplifyAA.isKnown(); + bool IsKnown = ValueSimplifyAA.isAtFixpoint(); UsedAssumedInformation |= !IsKnown; if (!SimplifiedV.hasValue()) { if (AA) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 78c9a6b1ac1a5..8ae1cff4050dd 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -4512,6 +4512,32 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { }; /// ------------------ Value Simplify Attribute ---------------------------- + +bool ValueSimplifyStateType::unionAssumed(Optional Other) { + // FIXME: Add a typecast support. + if (!Other.hasValue()) + return true; + + if (!Other.getValue()) + return false; + + Value &QueryingValueSimplifiedUnwrapped = *Other.getValue(); + + if (SimplifiedAssociatedValue.hasValue() && + !isa(SimplifiedAssociatedValue.getValue()) && + !isa(QueryingValueSimplifiedUnwrapped)) + return SimplifiedAssociatedValue == Other; + if (SimplifiedAssociatedValue.hasValue() && + isa(QueryingValueSimplifiedUnwrapped)) + return true; + + LLVM_DEBUG(dbgs() << "[ValueSimplify] is assumed to be " + << QueryingValueSimplifiedUnwrapped << "\n"); + + SimplifiedAssociatedValue = Other; + return true; +} + struct AAValueSimplifyImpl : AAValueSimplify { AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A) : AAValueSimplify(IRP, A) {} @@ -4529,8 +4555,8 @@ struct AAValueSimplifyImpl : AAValueSimplify { if (SimplifiedAssociatedValue) errs() << "SAV: " << **SimplifiedAssociatedValue << " "; }); - return getAssumed() ? (getKnown() ? "simplified" : "maybe-simple") - : "not-simple"; + return isValidState() ? (isAtFixpoint() ? "simplified" : "maybe-simple") + : "not-simple"; } /// See AbstractAttribute::trackStatistics() @@ -4538,45 +4564,19 @@ struct AAValueSimplifyImpl : AAValueSimplify { /// See AAValueSimplify::getAssumedSimplifiedValue() Optional getAssumedSimplifiedValue(Attributor &A) const override { - if (!getAssumed()) + if (!isValidState()) return const_cast(&getAssociatedValue()); return SimplifiedAssociatedValue; } /// Helper function for querying AAValueSimplify and updating candicate. /// \param IRP The value position we are trying to unify with SimplifiedValue - /// \param AccumulatedSimplifiedValue Current simplification result. - static bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA, - const IRPosition &IRP, - Optional &AccumulatedSimplifiedValue) { - // FIXME: Add a typecast support. + bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA, + const IRPosition &IRP) { bool UsedAssumedInformation = false; Optional QueryingValueSimplified = A.getAssumedSimplified(IRP, QueryingAA, UsedAssumedInformation); - - if (!QueryingValueSimplified.hasValue()) - return true; - - if (!QueryingValueSimplified.getValue()) - return false; - - Value &QueryingValueSimplifiedUnwrapped = - *QueryingValueSimplified.getValue(); - - if (AccumulatedSimplifiedValue.hasValue() && - !isa(AccumulatedSimplifiedValue.getValue()) && - !isa(QueryingValueSimplifiedUnwrapped)) - return AccumulatedSimplifiedValue == QueryingValueSimplified; - if (AccumulatedSimplifiedValue.hasValue() && - isa(QueryingValueSimplifiedUnwrapped)) - return true; - - LLVM_DEBUG(dbgs() << "[ValueSimplify] " << IRP.getAssociatedValue() - << " is assumed to be " - << QueryingValueSimplifiedUnwrapped << "\n"); - - AccumulatedSimplifiedValue = QueryingValueSimplified; - return true; + return unionAssumed(QueryingValueSimplified); } /// Returns a candidate is found or not @@ -4645,13 +4645,6 @@ struct AAValueSimplifyImpl : AAValueSimplify { indicateOptimisticFixpoint(); return ChangeStatus::CHANGED; } - -protected: - // An assumed simplified value. Initially, it is set to Optional::None, which - // means that the value is not clear under current assumption. If in the - // pessimistic state, getAssumedSimplifiedValue doesn't return this value but - // returns orignal associated value. - Optional SimplifiedAssociatedValue; }; struct AAValueSimplifyArgument final : AAValueSimplifyImpl { @@ -4711,7 +4704,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { if (auto *C = dyn_cast(&ArgOp)) if (C->isThreadDependent()) return false; - return checkAndUpdate(A, *this, ACSArgPos, SimplifiedAssociatedValue); + return checkAndUpdate(A, *this, ACSArgPos); }; // Generate a answer specific to a call site context. @@ -4749,8 +4742,7 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl { auto PredForReturned = [&](Value &V) { return checkAndUpdate(A, *this, - IRPosition::value(V, getCallBaseContext()), - SimplifiedAssociatedValue); + IRPosition::value(V, getCallBaseContext())); }; if (!A.checkForAllReturnedValues(PredForReturned, *this)) @@ -4906,8 +4898,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { return false; } return checkAndUpdate(A, *this, - IRPosition::value(V, getCallBaseContext()), - SimplifiedAssociatedValue); + IRPosition::value(V, getCallBaseContext())); }; bool Dummy = false; From bc8bb3df35223afeea62170b32f8ce9bd04255d4 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 29 Jun 2021 09:33:31 -0500 Subject: [PATCH 184/619] Revert "[omp] Fix build without ITT after D103121 changes" This reverts commit eab1fd389b61d236bf8df4d09f62dd18253a10bc. This commit fixed a problem with 25073a4ecfc9 (D103121) which is the one we actually need to revert to unblock non-X86 builds of OpenMP. Can be reapplied, or merged into, D103121 as it goes in again. --- openmp/runtime/src/kmp_barrier.cpp | 11 ++++++----- openmp/runtime/src/kmp_wait_release.h | 3 +-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index a0a020bf9474e..134163e23fa7f 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -225,9 +225,10 @@ void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team, } } -static void __kmp_dist_barrier_gather( - enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { +static void +__kmp_dist_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, + int tid, void (*reduce)(void *, void *) + USE_ITT_BUILD_ARG(void *itt_sync_obj)) { KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather); kmp_team_t *team; distributedBarrier *b; @@ -403,7 +404,7 @@ static void __kmp_dist_barrier_release( if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2, 0) || this_thr->th.th_used_in_team.load() == 0) { - my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj)); + my_flag.wait(this_thr, true, itt_sync_obj); } #if USE_ITT_BUILD && USE_ITT_NOTIFY if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { @@ -453,7 +454,7 @@ static void __kmp_dist_barrier_release( // Wait on go flag on team kmp_atomic_flag_64 my_flag( &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep)); - my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj)); + my_flag.wait(this_thr, true, itt_sync_obj); KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0); KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false); diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h index e63a93ce880f1..5c250d94886f6 100644 --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -943,8 +943,7 @@ class kmp_flag_oncore : public kmp_flag_native { } kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) : kmp_flag_native(p), offset(idx), - flag_switch(false), - bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {} + flag_switch(false), bt(bs_last_barrier), itt_sync_obj(nullptr) {} kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t, kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) From 4eb90e893f82314def571f7129dfd88bd098208b Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 29 Jun 2021 09:34:53 -0500 Subject: [PATCH 185/619] Revert "[OpenMP] Add Two-level Distributed Barrier" This reverts commit 25073a4ecfc9b2e3cb76776185e63bfdb094cd98. This breaks non-x86 OpenMP builds for a while now. Until a solution is ready to be upstreamed we revert the feature and unblock those builds. See: https://reviews.llvm.org/rG25073a4ecfc9b2e3cb76776185e63bfdb094cd98#1005821 and https://reviews.llvm.org/rG25073a4ecfc9b2e3cb76776185e63bfdb094cd98#1005821 The currently proposed fix (D104788) seems not to be ready yet: https://reviews.llvm.org/D104788#2841928 --- openmp/runtime/src/i18n/en_US.txt | 1 - openmp/runtime/src/kmp.h | 33 - openmp/runtime/src/kmp_atomic.cpp | 6 +- openmp/runtime/src/kmp_barrier.cpp | 553 +---------------- openmp/runtime/src/kmp_barrier.h | 109 ---- openmp/runtime/src/kmp_global.cpp | 4 +- openmp/runtime/src/kmp_os.h | 21 - openmp/runtime/src/kmp_runtime.cpp | 248 +------- openmp/runtime/src/kmp_settings.cpp | 36 +- openmp/runtime/src/kmp_stats.h | 4 - openmp/runtime/src/kmp_str.cpp | 25 - openmp/runtime/src/kmp_str.h | 1 - openmp/runtime/src/kmp_tasking.cpp | 29 +- openmp/runtime/src/kmp_wait_release.cpp | 8 - openmp/runtime/src/kmp_wait_release.h | 724 +++++++++++----------- openmp/runtime/src/z_Linux_util.cpp | 90 +-- openmp/runtime/src/z_Windows_NT_util.cpp | 78 +-- openmp/runtime/test/barrier/omp_barrier.c | 2 - 18 files changed, 454 insertions(+), 1518 deletions(-) delete mode 100644 openmp/runtime/src/kmp_barrier.h diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt index 435579fc7e7a7..0b5436fd5801b 100644 --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -269,7 +269,6 @@ Using_int_Value "%1$s value \"%2$d\" will be used." Using_uint_Value "%1$s value \"%2$u\" will be used." Using_uint64_Value "%1$s value \"%2$s\" will be used." Using_str_Value "%1$s value \"%2$s\" will be used." -BarrierPatternOverride "Mixing other barrier patterns with dist is prohibited. Using dist for all barrier patterns." MaxValueUsing "%1$s maximum value \"%2$d\" will be used." MinValueUsing "%1$s minimum value \"%2$d\" will be used." MemoryAllocFailed "Memory allocation failed." diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 835d4ad55980b..05264f4433d3e 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -115,7 +115,6 @@ typedef unsigned int kmp_hwloc_depth_t; #include "kmp_debug.h" #include "kmp_lock.h" #include "kmp_version.h" -#include "kmp_barrier.h" #if USE_DEBUGGER #include "kmp_debugger.h" #endif @@ -264,7 +263,6 @@ typedef union kmp_root kmp_root_p; template class kmp_flag_32; template class kmp_flag_64; -template class kmp_atomic_flag_64; class kmp_flag_oncore; #ifdef __cplusplus @@ -1881,15 +1879,6 @@ typedef struct kmp_disp { 0 // Thread th_reap_state: not safe to reap (tasking) #define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking) -// The flag_type describes the storage used for the flag. -enum flag_type { - flag32, /**< atomic 32 bit flags */ - flag64, /**< 64 bit flags */ - atomic_flag64, /**< atomic 64 bit flags */ - flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */ - flag_unset -}; - enum barrier_type { bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */ @@ -1913,7 +1902,6 @@ typedef enum kmp_bar_pat { /* Barrier communication patterns */ bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */ bp_hierarchical_bar = 3, /* Machine hierarchy tree */ - bp_dist_bar = 4, /* Distributed barrier */ bp_last_bar /* Placeholder to mark the end */ } kmp_bar_pat_e; @@ -2638,7 +2626,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { /* while awaiting queuing lock acquire */ volatile void *th_sleep_loc; // this points at a kmp_flag - flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc ident_t *th_ident; unsigned th_x; // Random number generator data @@ -2659,9 +2646,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { written by the worker thread) */ kmp_uint8 th_active_in_pool; // included in count of #active threads in pool int th_active; // ! sleeping; 32 bits for TCR/TCW - std::atomic th_used_in_team; // Flag indicating use in team - // 0 = not used in team; 1 = used in team; - // 2 = transitioning to not used in team; 3 = transitioning to used in team struct cons_header *th_cons; // used for consistency check #if KMP_USE_HIER_SCHED // used for hierarchical scheduling @@ -2841,7 +2825,6 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { #if USE_ITT_BUILD void *t_stack_id; // team specific stack stitching id (for ittnotify) #endif /* USE_ITT_BUILD */ - distributedBarrier *b; // Distributed barrier data associated with team } kmp_base_team_t; union KMP_ALIGN_CACHE kmp_team { @@ -4143,26 +4126,18 @@ template extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag); template extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag); -template -extern void __kmp_atomic_suspend_64(int th_gtid, - kmp_atomic_flag_64 *flag); extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag); #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT template extern void __kmp_mwait_32(int th_gtid, kmp_flag_32 *flag); template extern void __kmp_mwait_64(int th_gtid, kmp_flag_64 *flag); -template -extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64 *flag); extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag); #endif template extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag); template extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag); -template -extern void __kmp_atomic_resume_64(int target_gtid, - kmp_atomic_flag_64 *flag); extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag); template @@ -4181,14 +4156,6 @@ int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, void *itt_sync_obj, #endif /* USE_ITT_BUILD */ kmp_int32 is_constrained); -template -int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, - kmp_atomic_flag_64 *flag, - int final_spin, int *thread_finished, -#if USE_ITT_BUILD - void *itt_sync_obj, -#endif /* USE_ITT_BUILD */ - kmp_int32 is_constrained); int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, int *thread_finished, diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp index fdcfc6ef540fe..fcc06216a4fa5 100644 --- a/openmp/runtime/src/kmp_atomic.cpp +++ b/openmp/runtime/src/kmp_atomic.cpp @@ -732,7 +732,7 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ - (*lhs) = (TYPE)((*lhs)OP rhs); \ + (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \ __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); // ------------------------------------------------------------------------ @@ -791,14 +791,14 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, { \ TYPE old_value, new_value; \ old_value = *(TYPE volatile *)lhs; \ - new_value = (TYPE)(old_value OP rhs); \ + new_value = (TYPE)(old_value OP((TYPE)rhs)); \ while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ KMP_DO_PAUSE; \ \ old_value = *(TYPE volatile *)lhs; \ - new_value = (TYPE)(old_value OP rhs); \ + new_value = (TYPE)(old_value OP((TYPE)rhs)); \ } \ } diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index 134163e23fa7f..237d18a73dcd6 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -10,14 +10,12 @@ // //===----------------------------------------------------------------------===// +#include "kmp.h" #include "kmp_wait_release.h" -#include "kmp_barrier.h" #include "kmp_itt.h" #include "kmp_os.h" #include "kmp_stats.h" #include "ompt-specific.h" -// for distributed barrier -#include "kmp_affinity.h" #if KMP_MIC #include @@ -42,517 +40,6 @@ void __kmp_print_structure(void); // Forward declaration // ---------------------------- Barrier Algorithms ---------------------------- -// Distributed barrier - -// Compute how many threads to have polling each cache-line. -// We want to limit the number of writes to IDEAL_GO_RESOLUTION. -void distributedBarrier::computeVarsForN(size_t n) { - int nsockets = 1; - if (__kmp_topology) { - int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET); - int core_level = __kmp_topology->get_level(KMP_HW_CORE); - int ncores_per_socket = - __kmp_topology->calculate_ratio(core_level, socket_level); - nsockets = __kmp_topology->get_count(socket_level); - - if (nsockets <= 0) - nsockets = 1; - if (ncores_per_socket <= 0) - ncores_per_socket = 1; - - threads_per_go = ncores_per_socket >> 1; - if (!fix_threads_per_go) { - // Minimize num_gos - if (threads_per_go > 4) { - if (KMP_OPTIMIZE_FOR_REDUCTIONS) { - threads_per_go = threads_per_go >> 1; - } - if (threads_per_go > 4 && nsockets == 1) - threads_per_go = threads_per_go >> 1; - } - } - if (threads_per_go == 0) - threads_per_go = 1; - fix_threads_per_go = true; - num_gos = n / threads_per_go; - if (n % threads_per_go) - num_gos++; - if (nsockets == 1 || num_gos == 1) - num_groups = 1; - else { - num_groups = num_gos / nsockets; - if (num_gos % nsockets) - num_groups++; - } - if (num_groups <= 0) - num_groups = 1; - gos_per_group = num_gos / num_groups; - if (num_gos % num_groups) - gos_per_group++; - threads_per_group = threads_per_go * gos_per_group; - } else { - num_gos = n / threads_per_go; - if (n % threads_per_go) - num_gos++; - if (num_gos == 1) - num_groups = 1; - else { - num_groups = num_gos / 2; - if (num_gos % 2) - num_groups++; - } - gos_per_group = num_gos / num_groups; - if (num_gos % num_groups) - gos_per_group++; - threads_per_group = threads_per_go * gos_per_group; - } -} - -void distributedBarrier::computeGo(size_t n) { - // Minimize num_gos - for (num_gos = 1;; num_gos++) - if (IDEAL_CONTENTION * num_gos >= n) - break; - threads_per_go = n / num_gos; - if (n % num_gos) - threads_per_go++; - while (num_gos > MAX_GOS) { - threads_per_go++; - num_gos = n / threads_per_go; - if (n % threads_per_go) - num_gos++; - } - computeVarsForN(n); -} - -// This function is to resize the barrier arrays when the new number of threads -// exceeds max_threads, which is the current size of all the arrays -void distributedBarrier::resize(size_t nthr) { - KMP_DEBUG_ASSERT(nthr > max_threads); - - // expand to requested size * 2 - max_threads = nthr * 2; - - // allocate arrays to new max threads - for (int i = 0; i < MAX_ITERS; ++i) { - if (flags[i]) - flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i], - max_threads * sizeof(flags_s)); - else - flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s)); - } - - if (go) - go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s)); - else - go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s)); - - if (iter) - iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s)); - else - iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s)); - - if (sleep) - sleep = - (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s)); - else - sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s)); -} - -// This function is to set all the go flags that threads might be waiting -// on, and when blocktime is not infinite, it should be followed by a wake-up -// call to each thread -kmp_uint64 distributedBarrier::go_release() { - kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS; - for (size_t j = 0; j < num_gos; j++) { - go[j].go.store(next_go); - } - return next_go; -} - -void distributedBarrier::go_reset() { - for (size_t j = 0; j < max_threads; ++j) { - for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) { - flags[i][j].stillNeed = 1; - } - go[j].go.store(0); - iter[j].iter = 0; - } -} - -// This function inits/re-inits the distributed barrier for a particular number -// of threads. If a resize of arrays is needed, it calls the resize function. -void distributedBarrier::init(size_t nthr) { - size_t old_max = max_threads; - if (nthr > max_threads) { // need more space in arrays - resize(nthr); - } - - for (size_t i = 0; i < max_threads; i++) { - for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) { - flags[j][i].stillNeed = 1; - } - go[i].go.store(0); - iter[i].iter = 0; - if (i >= old_max) - sleep[i].sleep = false; - } - - // Recalculate num_gos, etc. based on new nthr - computeVarsForN(nthr); - - num_threads = nthr; - - if (team_icvs == NULL) - team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t)); -} - -// This function is used only when KMP_BLOCKTIME is not infinite. -// static -void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team, - size_t start, size_t stop, size_t inc, - size_t tid) { - KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME); - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - kmp_info_t **other_threads = team->t.t_threads; - for (size_t thr = start; thr < stop; thr += inc) { - KMP_DEBUG_ASSERT(other_threads[thr]); - int gtid = other_threads[thr]->th.th_info.ds.ds_gtid; - // Wake up worker regardless of if it appears to be sleeping or not - __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL); - } -} - -static void -__kmp_dist_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, - int tid, void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather); - kmp_team_t *team; - distributedBarrier *b; - kmp_info_t **other_threads; - kmp_uint64 my_current_iter, my_next_iter; - kmp_uint32 nproc; - bool group_leader; - - team = this_thr->th.th_team; - nproc = this_thr->th.th_team_nproc; - other_threads = team->t.t_threads; - b = team->t.b; - my_current_iter = b->iter[tid].iter; - my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS; - group_leader = ((tid % b->threads_per_group) == 0); - - KA_TRACE(20, - ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - -#if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = - __itt_get_timestamp(); - } -#endif - - if (group_leader) { - // Start from the thread after the group leader - size_t group_start = tid + 1; - size_t group_end = tid + b->threads_per_group; - size_t threads_pending = 0; - - if (group_end > nproc) - group_end = nproc; - do { // wait for threads in my group - threads_pending = 0; - // Check all the flags every time to avoid branch misspredict - for (size_t thr = group_start; thr < group_end; thr++) { - // Each thread uses a different cache line - threads_pending += b->flags[my_current_iter][thr].stillNeed; - } - // Execute tasks here - if (__kmp_tasking_mode != tskm_immediate_exec) { - kmp_task_team_t *task_team = this_thr->th.th_task_team; - if (task_team != NULL) { - if (TCR_SYNC_4(task_team->tt.tt_active)) { - if (KMP_TASKING_ENABLED(task_team)) { - int tasks_completed = FALSE; - __kmp_atomic_execute_tasks_64( - this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE, - &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); - } else - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } - } else { - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } // if - } - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; - } else if (__kmp_tasking_mode != tskm_immediate_exec && - this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { - this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; - } - } while (threads_pending > 0); - - if (reduce) { // Perform reduction if needed - OMPT_REDUCTION_DECL(this_thr, gtid); - OMPT_REDUCTION_BEGIN; - // Group leader reduces all threads in group - for (size_t thr = group_start; thr < group_end; thr++) { - (*reduce)(this_thr->th.th_local.reduce_data, - other_threads[thr]->th.th_local.reduce_data); - } - OMPT_REDUCTION_END; - } - - // Set flag for next iteration - b->flags[my_next_iter][tid].stillNeed = 1; - // Each thread uses a different cache line; resets stillNeed to 0 to - // indicate it has reached the barrier - b->flags[my_current_iter][tid].stillNeed = 0; - - do { // wait for all group leaders - threads_pending = 0; - for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) { - threads_pending += b->flags[my_current_iter][thr].stillNeed; - } - // Execute tasks here - if (__kmp_tasking_mode != tskm_immediate_exec) { - kmp_task_team_t *task_team = this_thr->th.th_task_team; - if (task_team != NULL) { - if (TCR_SYNC_4(task_team->tt.tt_active)) { - if (KMP_TASKING_ENABLED(task_team)) { - int tasks_completed = FALSE; - __kmp_atomic_execute_tasks_64( - this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE, - &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); - } else - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } - } else { - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } // if - } - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; - } else if (__kmp_tasking_mode != tskm_immediate_exec && - this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { - this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; - } - } while (threads_pending > 0); - - if (reduce) { // Perform reduction if needed - if (KMP_MASTER_TID(tid)) { // Master reduces over group leaders - OMPT_REDUCTION_DECL(this_thr, gtid); - OMPT_REDUCTION_BEGIN; - for (size_t thr = b->threads_per_group; thr < nproc; - thr += b->threads_per_group) { - (*reduce)(this_thr->th.th_local.reduce_data, - other_threads[thr]->th.th_local.reduce_data); - } - OMPT_REDUCTION_END; - } - } - } else { - // Set flag for next iteration - b->flags[my_next_iter][tid].stillNeed = 1; - // Each thread uses a different cache line; resets stillNeed to 0 to - // indicate it has reached the barrier - b->flags[my_current_iter][tid].stillNeed = 0; - } - - KMP_MFENCE(); - - KA_TRACE(20, - ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} - -static void __kmp_dist_barrier_release( - enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release); - kmp_team_t *team; - distributedBarrier *b; - kmp_bstate_t *thr_bar; - kmp_uint64 my_current_iter, next_go; - size_t my_go_index; - bool group_leader; - - KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n", - gtid, tid, bt)); - - thr_bar = &this_thr->th.th_bar[bt].bb; - - if (!KMP_MASTER_TID(tid)) { - // workers and non-master group leaders need to check their presence in team - do { - if (this_thr->th.th_used_in_team.load() != 1 && - this_thr->th.th_used_in_team.load() != 3) { - // Thread is not in use in a team. Wait on location in tid's thread - // struct. The 0 value tells anyone looking that this thread is spinning - // or sleeping until this location becomes 3 again; 3 is the transition - // state to get to 1 which is waiting on go and being in the team - kmp_flag_32 my_flag(&(this_thr->th.th_used_in_team), 3); - if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2, - 0) || - this_thr->th.th_used_in_team.load() == 0) { - my_flag.wait(this_thr, true, itt_sync_obj); - } -#if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In fork barrier where we could not get the object reliably - itt_sync_obj = - __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - } - if (this_thr->th.th_used_in_team.load() != 1 && - this_thr->th.th_used_in_team.load() != 3) // spurious wake-up? - continue; - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - // At this point, the thread thinks it is in use in a team, or in - // transition to be used in a team, but it might have reached this barrier - // before it was marked unused by the team. Unused threads are awoken and - // shifted to wait on local thread struct elsewhere. It also might reach - // this point by being picked up for use by a different team. Either way, - // we need to update the tid. - tid = __kmp_tid_from_gtid(gtid); - team = this_thr->th.th_team; - KMP_DEBUG_ASSERT(tid >= 0); - KMP_DEBUG_ASSERT(team); - b = team->t.b; - my_current_iter = b->iter[tid].iter; - next_go = my_current_iter + distributedBarrier::MAX_ITERS; - my_go_index = tid / b->threads_per_go; - if (this_thr->th.th_used_in_team.load() == 3) { - KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1); - } - // Check if go flag is set - if (b->go[my_go_index].go.load() != next_go) { - // Wait on go flag on team - kmp_atomic_flag_64 my_flag( - &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep)); - my_flag.wait(this_thr, true, itt_sync_obj); - KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter || - b->iter[tid].iter == 0); - KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false); - } - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - // At this point, the thread's go location was set. This means the primary - // thread is safely in the barrier, and so this thread's data is - // up-to-date, but we should check again that this thread is really in - // use in the team, as it could have been woken up for the purpose of - // changing team size, or reaping threads at shutdown. - if (this_thr->th.th_used_in_team.load() == 1) - break; - } while (1); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - group_leader = ((tid % b->threads_per_group) == 0); - if (group_leader) { - // Tell all the threads in my group they can go! - for (size_t go_idx = my_go_index + 1; - go_idx < my_go_index + b->gos_per_group; go_idx++) { - b->go[go_idx].go.store(next_go); - } - // Fence added so that workers can see changes to go. sfence inadequate. - KMP_MFENCE(); - } - -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) { // copy ICVs to final dest - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, - tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - (kmp_internal_control_t *)team->t.b->team_icvs); - copy_icvs(&thr_bar->th_fixed_icvs, - &team->t.t_implicit_task_taskdata[tid].td_icvs); - } -#endif - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) { - // This thread is now awake and participating in the barrier; - // wake up the other threads in the group - size_t nproc = this_thr->th.th_team_nproc; - size_t group_end = tid + b->threads_per_group; - if (nproc < group_end) - group_end = nproc; - __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid); - } - } else { // Primary thread - team = this_thr->th.th_team; - b = team->t.b; - my_current_iter = b->iter[tid].iter; - next_go = my_current_iter + distributedBarrier::MAX_ITERS; -#if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) { - // primary thread has ICVs in final destination; copy - copy_icvs(&thr_bar->th_fixed_icvs, - &team->t.t_implicit_task_taskdata[tid].td_icvs); - } -#endif - // Tell all the group leaders they can go! - for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) { - b->go[go_idx].go.store(next_go); - } - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - // Wake-up the group leaders - size_t nproc = this_thr->th.th_team_nproc; - __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc, - b->threads_per_group, tid); - } - - // Tell all the threads in my group they can go! - for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) { - b->go[go_idx].go.store(next_go); - } - - // Fence added so that workers can see changes to go. sfence inadequate. - KMP_MFENCE(); - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - // Wake-up the other threads in my group - size_t nproc = this_thr->th.th_team_nproc; - size_t group_end = tid + b->threads_per_group; - if (nproc < group_end) - group_end = nproc; - __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid); - } - } - // Update to next iteration - KMP_ASSERT(my_current_iter == b->iter[tid].iter); - b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS; - - KA_TRACE( - 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); -} // Linear Barrier template @@ -1907,11 +1394,6 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj)); } else { switch (__kmp_barrier_gather_pattern[bt]) { - case bp_dist_bar: { - __kmp_dist_barrier_gather(bt, this_thr, gtid, tid, - reduce USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } case bp_hyper_bar: { // don't set branch bits to 0; use linear KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); @@ -2025,12 +1507,6 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); } else { switch (__kmp_barrier_release_pattern[bt]) { - case bp_dist_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_dist_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } case bp_hyper_bar: { KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, @@ -2162,11 +1638,6 @@ void __kmp_end_split_barrier(enum barrier_type bt, int gtid) { if (!team->t.t_serialized) { if (KMP_MASTER_GTID(gtid)) { switch (__kmp_barrier_release_pattern[bt]) { - case bp_dist_bar: { - __kmp_dist_barrier_release(bt, this_thr, gtid, tid, - FALSE USE_ITT_BUILD_ARG(NULL)); - break; - } case bp_hyper_bar: { KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, @@ -2278,8 +1749,8 @@ void __kmp_join_barrier(int gtid) { if (__kmp_tasking_mode == tskm_extra_barrier) { __kmp_tasking_barrier(team, this_thr, gtid); - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n", - gtid, team_id, tid)); + KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, + team_id, tid)); } #ifdef KMP_DEBUG if (__kmp_tasking_mode != tskm_immediate_exec) { @@ -2288,9 +1759,8 @@ void __kmp_join_barrier(int gtid) { __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state], this_thr->th.th_task_team)); - if (this_thr->th.th_task_team) - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == - team->t.t_task_team[this_thr->th.th_task_state]); + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == + team->t.t_task_team[this_thr->th.th_task_state]); } #endif /* KMP_DEBUG */ @@ -2316,11 +1786,6 @@ void __kmp_join_barrier(int gtid) { #endif /* USE_ITT_BUILD */ switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { - case bp_dist_bar: { - __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, - NULL USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } case bp_hyper_bar: { KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, @@ -2366,7 +1831,8 @@ void __kmp_join_barrier(int gtid) { team_thread->th.th_stats->setIdleFlag(); if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL) - __kmp_null_resume_wrapper(team_thread); + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), + team_thread->th.th_sleep_loc); } #endif #if USE_ITT_BUILD @@ -2513,11 +1979,6 @@ void __kmp_fork_barrier(int gtid, int tid) { } // primary thread switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { - case bp_dist_bar: { - __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, - TRUE USE_ITT_BUILD_ARG(NULL)); - break; - } case bp_hyper_bar: { KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, diff --git a/openmp/runtime/src/kmp_barrier.h b/openmp/runtime/src/kmp_barrier.h deleted file mode 100644 index 5510fcaebd0a4..0000000000000 --- a/openmp/runtime/src/kmp_barrier.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * kmp_barrier.h - */ - -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef KMP_BARRIER_H -#define KMP_BARRIER_H - -#include "kmp.h" - -// Use four cache lines: MLC tends to prefetch the next or previous cache line -// creating a possible fake conflict between cores, so this is the only way to -// guarantee that no such prefetch can happen. -#ifndef KMP_FOURLINE_ALIGN_CACHE -#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE) -#endif - -#define KMP_OPTIMIZE_FOR_REDUCTIONS 0 - -class distributedBarrier { - struct flags_s { - kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed; - }; - - struct go_s { - std::atomic KMP_FOURLINE_ALIGN_CACHE go; - }; - - struct iter_s { - kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter; - }; - - struct sleep_s { - std::atomic KMP_FOURLINE_ALIGN_CACHE sleep; - }; - - void init(size_t nthr); - void resize(size_t nthr); - void computeGo(size_t n); - void computeVarsForN(size_t n); - -public: - enum { - MAX_ITERS = 3, - MAX_GOS = 8, - IDEAL_GOS = 4, - IDEAL_CONTENTION = 16, - }; - - flags_s *flags[MAX_ITERS]; - go_s *go; - iter_s *iter; - sleep_s *sleep; - - size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier - size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure - // number of go signals each requiring one write per iteration - size_t KMP_ALIGN_CACHE num_gos; - // number of groups of gos - size_t KMP_ALIGN_CACHE num_groups; - // threads per go signal - size_t KMP_ALIGN_CACHE threads_per_go; - bool KMP_ALIGN_CACHE fix_threads_per_go; - // threads per group - size_t KMP_ALIGN_CACHE threads_per_group; - // number of go signals in a group - size_t KMP_ALIGN_CACHE gos_per_group; - void *team_icvs; - - distributedBarrier() = delete; - ~distributedBarrier() = delete; - - // Used instead of constructor to create aligned data - static distributedBarrier *allocate(int nThreads) { - distributedBarrier *d = (distributedBarrier *)_mm_malloc( - sizeof(distributedBarrier), 4 * CACHE_LINE); - d->num_threads = 0; - d->max_threads = 0; - for (int i = 0; i < MAX_ITERS; ++i) - d->flags[i] = NULL; - d->go = NULL; - d->iter = NULL; - d->sleep = NULL; - d->team_icvs = NULL; - d->fix_threads_per_go = false; - // calculate gos and groups ONCE on base size - d->computeGo(nThreads); - d->init(nThreads); - return d; - } - - static void deallocate(distributedBarrier *db) { _mm_free(db); } - - void update_num_threads(size_t nthr) { init(nthr); } - - bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); } - size_t get_num_threads() { return num_threads; } - kmp_uint64 go_release(); - void go_reset(); -}; - -#endif // KMP_BARRIER_H diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index b6babbe0e97e3..24de14fe8c33c 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -110,8 +110,8 @@ char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain", "forkjoin" "reduction" #endif // KMP_FAST_REDUCTION_BARRIER }; -char const *__kmp_barrier_pattern_name[bp_last_bar] = { - "linear", "tree", "hyper", "hierarchical", "dist"}; +char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear", "tree", + "hyper", "hierarchical"}; int __kmp_allThreadsSpecified = 0; size_t __kmp_align_alloc = CACHE_LINE; diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index e203f876016cf..858acd9c1d7a1 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -1019,27 +1019,6 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); #define KMP_MB() /* nothing to do */ #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -#if KMP_COMPILER_ICC -#define KMP_MFENCE_() _mm_mfence() -#define KMP_SFENCE_() _mm_sfence() -#elif KMP_COMPILER_MSVC -#define KMP_MFENCE_() MemoryBarrier() -#define KMP_SFENCE_() MemoryBarrier() -#else -#define KMP_MFENCE_() __sync_synchronize() -#define KMP_SFENCE_() __sync_synchronize() -#endif -#define KMP_MFENCE() \ - if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \ - __kmp_query_cpuid(&__kmp_cpuinfo); \ - } \ - if (__kmp_cpuinfo.sse2) { \ - KMP_MFENCE_(); \ - } -#define KMP_SFENCE() KMP_SFENCE_() -#endif - #ifndef KMP_IMB #define KMP_IMB() /* nothing to do */ #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 40d2ed7f7a119..414e9ba4e36d7 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -109,10 +109,6 @@ static int __kmp_unregister_root_other_thread(int gtid); static void __kmp_reap_thread(kmp_info_t *thread, int is_root); kmp_info_t *__kmp_thread_pool_insert_pt = NULL; -void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, - int new_nthreads); -void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads); - /* Calculate the identifier of the current thread */ /* fast (and somewhat portable) way to get unique identifier of executing thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ @@ -1210,7 +1206,7 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { this_thr->th.th_team = serial_team; serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; - KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid, + KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid, this_thr->th.th_current_task)); KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); this_thr->th.th_current_task->td_flags.executing = 0; @@ -1569,24 +1565,15 @@ int __kmp_fork_call(ident_t *loc, int gtid, /* Change number of threads in the team if requested */ if (master_set_numthreads) { // The parallel has num_threads clause - if (master_set_numthreads <= master_th->th.th_teams_size.nth) { + if (master_set_numthreads < master_th->th.th_teams_size.nth) { // AC: only can reduce number of threads dynamically, can't increase kmp_info_t **other_threads = parent_team->t.t_threads; - // NOTE: if using distributed barrier, we need to run this code block - // even when the team size appears not to have changed from the max. - int old_proc = master_th->th.th_teams_size.nth; - if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == - bp_dist_bar) { - __kmp_resize_dist_barrier(parent_team, old_proc, - master_set_numthreads); - __kmp_add_threads_to_team(parent_team, master_set_numthreads); - } parent_team->t.t_nproc = master_set_numthreads; for (i = 0; i < master_set_numthreads; ++i) { other_threads[i]->th.th_team_nproc = master_set_numthreads; } + // Keep extra threads hot in the team for possible next parallels } - // Keep extra threads hot in the team for possible next parallels master_th->th.th_set_nproc = 0; } @@ -1650,9 +1637,6 @@ int __kmp_fork_call(ident_t *loc, int gtid, } #endif - // Need this to happen before we determine the number of threads, not while - // we are allocating the team - //__kmp_push_current_task_to_thread(master_th, parent_team, 0); int enter_teams = 0; if (parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels) { @@ -1660,10 +1644,13 @@ int __kmp_fork_call(ident_t *loc, int gtid, } else { enter_teams = ((ap == NULL && active_level == 0) || (ap && teams_level > 0 && teams_level == level)); - nthreads = master_set_numthreads - ? master_set_numthreads - // TODO: get nproc directly from current task - : get__nproc_2(parent_team, master_tid); + nthreads = + master_set_numthreads + ? master_set_numthreads + : get__nproc_2( + parent_team, + master_tid); // TODO: get nproc directly from current task + // Check if we need to take forkjoin lock? (no need for serialized // parallel out of teams construct). This code moved here from // __kmp_reserve_threads() to speedup nested serialized parallels. @@ -1998,8 +1985,6 @@ int __kmp_fork_call(ident_t *loc, int gtid, #endif proc_bind, &new_icvs, argc USE_NESTED_HOT_ARG(master_th)); - if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) - copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs); } else { /* allocate a new parallel team */ KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); @@ -2010,9 +1995,6 @@ int __kmp_fork_call(ident_t *loc, int gtid, proc_bind, &master_th->th.th_current_task->td_icvs, argc USE_NESTED_HOT_ARG(master_th)); - if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) - copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, - &master_th->th.th_current_task->td_icvs); } KF_TRACE( 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); @@ -2379,12 +2361,6 @@ void __kmp_join_call(ident_t *loc, int gtid parent_team->t.t_stack_id = NULL; } #endif - - if (team->t.t_nproc > 1 && - __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - team->t.b->update_num_threads(team->t.t_nproc); - __kmp_add_threads_to_team(team, team->t.t_nproc); - } } KMP_MB(); @@ -2672,9 +2648,6 @@ void __kmp_set_num_threads(int new_nth, int gtid) { __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth); - } // Release the extra threads we don't need any more. for (f = new_nth; f < hot_team->t.t_nproc; f++) { KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); @@ -2694,11 +2667,6 @@ void __kmp_set_num_threads(int new_nth, int gtid) { } #endif - if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - hot_team->t.b->update_num_threads(new_nth); - __kmp_add_threads_to_team(hot_team, new_nth); - } - __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); // Update the t_nproc field in the threads that are still active. @@ -4146,6 +4114,7 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, this_thr->th.th_team_nproc = team->t.t_nproc; this_thr->th.th_team_master = master; this_thr->th.th_team_serialized = team->t.t_serialized; + TCW_PTR(this_thr->th.th_sleep_loc, NULL); KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); @@ -4314,12 +4283,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, new_thr->th.th_task_state_top = 0; new_thr->th.th_task_state_stack_sz = 4; - if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - // Make sure pool thread has transitioned to waiting on own thread struct - KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0); - // Thread activated in __kmp_allocate_team when increasing team size - } - #ifdef KMP_ADJUST_BLOCKTIME /* Adjust blocktime back to zero if necessary */ /* Middle initialization might not have occurred yet */ @@ -4487,9 +4450,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, balign[b].bb.use_oncore_barrier = 0; } - TCW_PTR(new_thr->th.th_sleep_loc, NULL); - new_thr->th.th_sleep_loc_type = flag_unset; - new_thr->th.th_spin_here = FALSE; new_thr->th.th_next_waiting = 0; #if KMP_OS_UNIX @@ -5069,13 +5029,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, } #endif - if (team->t.t_nproc != new_nproc && - __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - // Distributed barrier may need a resize - int old_nthr = team->t.t_nproc; - __kmp_resize_dist_barrier(team, old_nthr, new_nproc); - } - // Has the number of threads changed? /* Let's assume the most common case is that the number of threads is unchanged, and put that case first. */ @@ -5125,11 +5078,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, new_nproc)); team->t.t_size_changed = 1; - if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - // Barrier size already reduced earlier in this function - // Activate team threads via th_used_in_team - __kmp_add_threads_to_team(team, new_nproc); - } #if KMP_NESTED_HOT_TEAMS if (__kmp_hot_teams_mode == 0) { // AC: saved number of threads should correspond to team's value in this @@ -5206,7 +5154,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, KA_TRACE(20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc)); - int old_nproc = team->t.t_nproc; // save old value and use to update only + team->t.t_size_changed = 1; #if KMP_NESTED_HOT_TEAMS @@ -5233,9 +5181,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); team->t.t_nproc = new_nproc; // just get reserved threads involved } else { - // We may have some threads in reserve, but not enough; - // get reserved threads involved if any. - team->t.t_nproc = hot_teams[level].hot_team_nth; + // we may have some threads in reserve, but not enough + team->t.t_nproc = + hot_teams[level] + .hot_team_nth; // get reserved threads involved if any hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size #endif // KMP_NESTED_HOT_TEAMS if (team->t.t_max_nproc < new_nproc) { @@ -5290,12 +5239,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if KMP_NESTED_HOT_TEAMS } // end of check of t_nproc vs. new_nproc vs. hot_team_nth #endif // KMP_NESTED_HOT_TEAMS - if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - // Barrier size already increased earlier in this function - // Activate team threads via th_used_in_team - __kmp_add_threads_to_team(team, new_nproc); - } /* make sure everyone is syncronized */ + int old_nproc = team->t.t_nproc; // save old value and use to update only // new threads below __kmp_initialize_team(team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident); @@ -5399,13 +5344,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, /* take this team from the team pool */ __kmp_team_pool = team->t.t_next_pool; - if (max_nproc > 1 && - __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - if (!team->t.b) { // Allocate barrier structure - team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); - } - } - /* setup the team for fresh use */ __kmp_initialize_team(team, new_nproc, new_icvs, NULL); @@ -5461,12 +5399,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, /* and set it up */ team->t.t_max_nproc = max_nproc; - if (max_nproc > 1 && - __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - // Allocate barrier structure - team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub); - } - /* NOTE well, for some reason allocating one big buffer and dividing it up seems to really hurt performance a lot on the P4, so, let's not use this */ __kmp_allocate_team_arrays(team, max_nproc); @@ -5623,43 +5555,10 @@ void __kmp_free_team(kmp_root_t *root, /* free the worker threads */ for (f = 1; f < team->t.t_nproc; ++f) { KMP_DEBUG_ASSERT(team->t.t_threads[f]); - if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), - 1, 2); - } __kmp_free_thread(team->t.t_threads[f]); - } - - if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - if (team->t.b) { - // wake up thread at old location - team->t.b->go_release(); - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - for (f = 1; f < team->t.t_nproc; ++f) { - if (team->t.b->sleep[f].sleep) { - __kmp_atomic_resume_64( - team->t.t_threads[f]->th.th_info.ds.ds_gtid, - (kmp_atomic_flag_64<> *)NULL); - } - } - } - // Wait for threads to be removed from team - for (int f = 1; f < team->t.t_nproc; ++f) { - while (team->t.t_threads[f]->th.th_used_in_team.load() != 0) - KMP_CPU_PAUSE(); - } - } - } - - for (f = 1; f < team->t.t_nproc; ++f) { team->t.t_threads[f] = NULL; } - if (team->t.t_max_nproc > 1 && - __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - distributedBarrier::deallocate(team->t.b); - team->t.b = NULL; - } /* put the team back in the team pool */ /* TODO limit size of team pool, call reap_team if pool too large */ team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool); @@ -6058,19 +5957,12 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid)); - if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) { - while ( - !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3)) - KMP_CPU_PAUSE(); - __kmp_resume_32(gtid, (kmp_flag_32 *)NULL); - } else { - /* Need release fence here to prevent seg faults for tree forkjoin - barrier (GEH) */ - ANNOTATE_HAPPENS_BEFORE(thread); - kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, - thread); - __kmp_release_64(&flag); - } + /* Need release fence here to prevent seg faults for tree forkjoin barrier + * (GEH) */ + ANNOTATE_HAPPENS_BEFORE(thread); + kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, + thread); + __kmp_release_64(&flag); } // Terminate OS thread. @@ -6944,8 +6836,8 @@ static void __kmp_do_serial_initialize(void) { #if KMP_FAST_REDUCTION_BARRIER #define kmp_reduction_barrier_gather_bb ((int)1) #define kmp_reduction_barrier_release_bb ((int)1) -#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt -#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt +#define kmp_reduction_barrier_gather_pat bp_hyper_bar +#define kmp_reduction_barrier_release_pat bp_hyper_bar #endif // KMP_FAST_REDUCTION_BARRIER for (i = bs_plain_barrier; i < bs_last_barrier; i++) { __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; @@ -8802,96 +8694,6 @@ void __kmp_omp_display_env(int verbose) { __kmp_release_bootstrap_lock(&__kmp_initz_lock); } -// The team size is changing, so distributed barrier must be modified -void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads, - int new_nthreads) { - KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] == - bp_dist_bar); - kmp_info_t **other_threads = team->t.t_threads; - - // We want all the workers to stop waiting on the barrier while we adjust the - // size of the team. - for (int f = 1; f < old_nthreads; ++f) { - KMP_DEBUG_ASSERT(other_threads[f] != NULL); - // Ignore threads that are already inactive or not present in the team - if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) { - // teams construct causes thread_limit to get passed in, and some of - // those could be inactive; just ignore them - continue; - } - // If thread is transitioning still to in_use state, wait for it - if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) { - while (team->t.t_threads[f]->th.th_used_in_team.load() == 3) - KMP_CPU_PAUSE(); - } - // The thread should be in_use now - KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1); - // Transition to unused state - team->t.t_threads[f]->th.th_used_in_team.store(2); - KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2); - } - // Release all the workers - kmp_uint64 new_value; // new value for go - new_value = team->t.b->go_release(); - - KMP_MFENCE(); - - // Workers should see transition status 2 and move to 0; but may need to be - // woken up first - size_t my_go_index; - int count = old_nthreads - 1; - while (count > 0) { - count = old_nthreads - 1; - for (int f = 1; f < old_nthreads; ++f) { - my_go_index = f / team->t.b->threads_per_go; - if (other_threads[f]->th.th_used_in_team.load() != 0) { - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers - kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST( - void *, other_threads[f]->th.th_sleep_loc); - __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag); - } - } else { - KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0); - count--; - } - } - } - // Now update the barrier size - team->t.b->update_num_threads(new_nthreads); - team->t.b->go_reset(); -} - -void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) { - // Add the threads back to the team - KMP_DEBUG_ASSERT(team); - // Threads were paused and pointed at th_used_in_team temporarily during a - // resize of the team. We're going to set th_used_in_team to 3 to indicate to - // the thread that it should transition itself back into the team. Then, if - // blocktime isn't infinite, the thread could be sleeping, so we send a resume - // to wake it up. - for (int f = 1; f < new_nthreads; ++f) { - KMP_DEBUG_ASSERT(team->t.t_threads[f]); - KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0, - 3); - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads - __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid, - (kmp_flag_32 *)NULL); - } - } - // The threads should be transitioning to the team; when they are done, they - // should have set th_used_in_team to 1. This loop forces master to wait until - // all threads have moved into the team and are waiting in the barrier. - int count = new_nthreads - 1; - while (count > 0) { - count = new_nthreads - 1; - for (int f = 1; f < new_nthreads; ++f) { - if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) { - count--; - } - } - } -} - // Globals and functions for hidden helper task kmp_info_t **__kmp_hidden_helper_threads; kmp_info_t *__kmp_hidden_helper_main_thread; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index a98a2a43b0d06..f287c27f29a58 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -1684,8 +1684,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value, const char *var; /* ---------- Barrier method control ------------ */ - static int dist_req = 0, non_dist_req = 0; - static bool warn = 1; for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { var = __kmp_barrier_pattern_env_name[i]; @@ -1697,11 +1695,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value, for (j = bp_linear_bar; j < bp_last_bar; j++) { if (__kmp_match_with_sentinel(__kmp_barrier_pattern_name[j], value, 1, ',')) { - if (j == bp_dist_bar) { - dist_req++; - } else { - non_dist_req++; - } __kmp_barrier_gather_pattern[i] = (kmp_bar_pat_e)j; break; } @@ -1716,11 +1709,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value, if (comma != NULL) { for (j = bp_linear_bar; j < bp_last_bar; j++) { if (__kmp_str_match(__kmp_barrier_pattern_name[j], 1, comma + 1)) { - if (j == bp_dist_bar) { - dist_req++; - } else { - non_dist_req++; - } __kmp_barrier_release_pattern[i] = (kmp_bar_pat_e)j; break; } @@ -1735,28 +1723,6 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value, } } } - if ((dist_req == 0) && (non_dist_req != 0)) { - // Something was set to a barrier other than dist; set all others to hyper - for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { - if (__kmp_barrier_release_pattern[i] == bp_dist_bar) - __kmp_barrier_release_pattern[i] = bp_hyper_bar; - if (__kmp_barrier_gather_pattern[i] == bp_dist_bar) - __kmp_barrier_gather_pattern[i] = bp_hyper_bar; - } - } else if (non_dist_req != 0) { - // some requests for dist, plus requests for others; set all to dist - if (non_dist_req > 0 && dist_req > 0 && warn) { - KMP_INFORM(BarrierPatternOverride, name, - __kmp_barrier_pattern_name[bp_dist_bar]); - warn = 0; - } - for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { - if (__kmp_barrier_release_pattern[i] != bp_dist_bar) - __kmp_barrier_release_pattern[i] = bp_dist_bar; - if (__kmp_barrier_gather_pattern[i] != bp_dist_bar) - __kmp_barrier_gather_pattern[i] = bp_dist_bar; - } - } } // __kmp_stg_parse_barrier_pattern static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer, @@ -1773,7 +1739,7 @@ static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer, __kmp_str_buf_print(buffer, " %s='", __kmp_barrier_pattern_env_name[i]); } - KMP_DEBUG_ASSERT(j < bp_last_bar && k < bp_last_bar); + KMP_DEBUG_ASSERT(j < bs_last_barrier && k < bs_last_barrier); __kmp_str_buf_print(buffer, "%s,%s'\n", __kmp_barrier_pattern_name[j], __kmp_barrier_pattern_name[k]); } diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h index 113221c066a33..4c5053df3fef1 100644 --- a/openmp/runtime/src/kmp_stats.h +++ b/openmp/runtime/src/kmp_stats.h @@ -246,8 +246,6 @@ enum stats_state_e { // KMP_tree_release -- time in __kmp_tree_barrier_release // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather // KMP_hyper_release -- time in __kmp_hyper_barrier_release -// KMP_dist_gather -- time in __kmp_dist_barrier_gather -// KMP_dist_release -- time in __kmp_dist_barrier_release // clang-format off #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ macro(KMP_fork_call, 0, arg) \ @@ -257,8 +255,6 @@ enum stats_state_e { macro(KMP_hier_release, 0, arg) \ macro(KMP_hyper_gather, 0, arg) \ macro(KMP_hyper_release, 0, arg) \ - macro(KMP_dist_gather, 0, arg) \ - macro(KMP_dist_release, 0, arg) \ macro(KMP_linear_gather, 0, arg) \ macro(KMP_linear_release, 0, arg) \ macro(KMP_tree_gather, 0, arg) \ diff --git a/openmp/runtime/src/kmp_str.cpp b/openmp/runtime/src/kmp_str.cpp index e64f989fbc698..ffce2b88ab35d 100644 --- a/openmp/runtime/src/kmp_str.cpp +++ b/openmp/runtime/src/kmp_str.cpp @@ -515,31 +515,6 @@ int __kmp_str_match(char const *target, int len, char const *data) { return ((len > 0) ? i >= len : (!target[i] && (len || !data[i]))); } // __kmp_str_match -// If data contains all of target, returns true, otherwise returns false. -// len should be the length of target -bool __kmp_str_contains(char const *target, int len, char const *data) { - int i = 0, j = 0, start = 0; - if (target == NULL || data == NULL) { - return FALSE; - } - while (target[i]) { - if (!data[j]) - return FALSE; - if (TOLOWER(target[i]) != TOLOWER(data[j])) { - j = start + 1; - start = j; - i = 0; - } else { - if (i == 0) - start = j; - j++; - i++; - } - } - - return i == len; -} // __kmp_str_contains - int __kmp_str_match_false(char const *data) { int result = __kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) || diff --git a/openmp/runtime/src/kmp_str.h b/openmp/runtime/src/kmp_str.h index 855b5df55d692..ff6179908ef14 100644 --- a/openmp/runtime/src/kmp_str.h +++ b/openmp/runtime/src/kmp_str.h @@ -106,7 +106,6 @@ int __kmp_str_eqf(char const *lhs, char const *rhs); char *__kmp_str_format(char const *format, ...); void __kmp_str_free(char **str); int __kmp_str_match(char const *target, int len, char const *data); -bool __kmp_str_contains(char const *target, int len, char const *data); int __kmp_str_match_false(char const *data); int __kmp_str_match_true(char const *data); void __kmp_str_replace(char *str, char search_for, char replace_with); diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index b1a1fb1798bef..62f0bdca4be93 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -2963,7 +2963,8 @@ static inline int __kmp_execute_tasks_template( (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) != NULL)) { asleep = 1; - __kmp_null_resume_wrapper(other_thread); + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), + other_thread->th.th_sleep_loc); // A sleeping thread should not have any tasks on it's queue. // There is a slight possibility that it resumes, steals a task // from another thread, which spawns more tasks, all in the time @@ -3112,16 +3113,6 @@ int __kmp_execute_tasks_64( thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); } -template -int __kmp_atomic_execute_tasks_64( - kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64 *flag, - int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - return __kmp_execute_tasks_template( - thread, gtid, flag, final_spin, - thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); -} - int __kmp_execute_tasks_oncore( kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), @@ -3148,14 +3139,6 @@ template int __kmp_execute_tasks_64(kmp_info_t *, kmp_int32, int *USE_ITT_BUILD_ARG(void *), kmp_int32); -template int __kmp_atomic_execute_tasks_64( - kmp_info_t *, kmp_int32, kmp_atomic_flag_64 *, int, - int *USE_ITT_BUILD_ARG(void *), kmp_int32); - -template int __kmp_atomic_execute_tasks_64( - kmp_info_t *, kmp_int32, kmp_atomic_flag_64 *, int, - int *USE_ITT_BUILD_ARG(void *), kmp_int32); - // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the // next barrier so they can assist in executing enqueued tasks. // First thread in allocates the task team atomically. @@ -3194,7 +3177,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team, // tasks and execute them. In extra barrier mode, tasks do not sleep // at the separate tasking barrier, so this isn't a problem. for (i = 0; i < nthreads; i++) { - void *sleep_loc; + volatile void *sleep_loc; kmp_info_t *thread = threads_data[i].td.td_thr; if (i == this_thr->th.th_info.ds.ds_tid) { @@ -3211,7 +3194,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team, KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n", __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread))); - __kmp_null_resume_wrapper(thread); + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); } else { KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", __kmp_gtid_from_thread(this_thr), @@ -3581,7 +3564,7 @@ void __kmp_wait_to_unref_task_teams(void) { __kmp_gtid_from_thread(thread))); if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - void *sleep_loc; + volatile void *sleep_loc; // If the thread is sleeping, awaken it. if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) != NULL) { @@ -3589,7 +3572,7 @@ void __kmp_wait_to_unref_task_teams(void) { 10, ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread))); - __kmp_null_resume_wrapper(thread); + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); } } } diff --git a/openmp/runtime/src/kmp_wait_release.cpp b/openmp/runtime/src/kmp_wait_release.cpp index d41ddf231e3ff..cabb5722f4dcd 100644 --- a/openmp/runtime/src/kmp_wait_release.cpp +++ b/openmp/runtime/src/kmp_wait_release.cpp @@ -33,10 +33,6 @@ template void __kmp_mwait_64(int th_gtid, kmp_flag_64 *flag) { __kmp_mwait_template(th_gtid, flag); } -template -void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64 *flag) { - __kmp_mwait_template(th_gtid, flag); -} void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) { __kmp_mwait_template(th_gtid, flag); } @@ -44,8 +40,4 @@ void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) { template void __kmp_mwait_32(int, kmp_flag_32 *); template void __kmp_mwait_64(int, kmp_flag_64 *); template void __kmp_mwait_64(int, kmp_flag_64 *); -template void -__kmp_atomic_mwait_64(int, kmp_atomic_flag_64 *); -template void -__kmp_atomic_mwait_64(int, kmp_atomic_flag_64 *); #endif diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h index 5c250d94886f6..d528ce9f18019 100644 --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -33,285 +33,96 @@ higher level operations such as barriers and fork/join. @{ */ +/*! + * The flag_type describes the storage used for the flag. + */ +enum flag_type { + flag32, /**< 32 bit flags */ + flag64, /**< 64 bit flags */ + flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ +}; + struct flag_properties { unsigned int type : 16; unsigned int reserved : 16; }; -template struct flag_traits {}; - -template <> struct flag_traits { - typedef kmp_uint32 flag_t; - static const flag_type t = flag32; - static inline flag_t tcr(flag_t f) { return TCR_4(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { - return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); - } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_OR32(f, v); - } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_AND32(f, v); - } -}; - -template <> struct flag_traits { - typedef kmp_uint64 flag_t; - static const flag_type t = atomic_flag64; - static inline flag_t tcr(flag_t f) { return TCR_8(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { - return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); - } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_OR64(f, v); - } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_AND64(f, v); - } -}; - -template <> struct flag_traits { - typedef kmp_uint64 flag_t; - static const flag_type t = flag64; - static inline flag_t tcr(flag_t f) { return TCR_8(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { - return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); - } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_OR64(f, v); - } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_AND64(f, v); - } -}; - -template <> struct flag_traits { - typedef kmp_uint64 flag_t; - static const flag_type t = flag_oncore; - static inline flag_t tcr(flag_t f) { return TCR_8(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { - return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); - } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_OR64(f, v); - } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_AND64(f, v); - } -}; - -/*! Base class for all flags */ -template class kmp_flag { -protected: - flag_properties t; /**< "Type" of the flag in loc */ - kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */ - kmp_uint32 num_waiting_threads; /**< #threads sleeping on this thread. */ - std::atomic *sleepLoc; +/*! + * Base class for wait/release volatile flag + */ +template class kmp_flag_native { + volatile P *loc; + flag_properties t; public: - typedef flag_traits traits_type; - kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {} - kmp_flag(int nwaiters) - : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {} - kmp_flag(std::atomic *sloc) - : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {} - /*! @result the flag_type */ + typedef P flag_t; + kmp_flag_native(volatile P *p, flag_type ft) + : loc(p), t({(short unsigned int)ft, 0U}) {} + volatile P *get() { return loc; } + void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); } + void set(volatile P *new_loc) { loc = new_loc; } flag_type get_type() { return (flag_type)(t.type); } - - /*! param i in index into waiting_threads - * @result the thread that is waiting at index i */ - kmp_info_t *get_waiter(kmp_uint32 i) { - KMP_DEBUG_ASSERT(i < num_waiting_threads); - return waiting_threads[i]; - } - /*! @result num_waiting_threads */ - kmp_uint32 get_num_waiters() { return num_waiting_threads; } - /*! @param thr in the thread which is now waiting - * Insert a waiting thread at index 0. */ - void set_waiter(kmp_info_t *thr) { - waiting_threads[0] = thr; - num_waiting_threads = 1; - } - enum barrier_type get_bt() { return bs_last_barrier; } + P load() { return *loc; } + void store(P val) { *loc = val; } }; -/*! Base class for wait/release volatile flag */ -template -class kmp_flag_native : public kmp_flag { -protected: - volatile PtrType *loc; - PtrType checker; /**< When flag==checker, it has been released. */ - typedef flag_traits traits_type; - -public: - typedef PtrType flag_t; - kmp_flag_native(volatile PtrType *p) : kmp_flag(), loc(p) {} - kmp_flag_native(volatile PtrType *p, kmp_info_t *thr) - : kmp_flag(1), loc(p) { - this->waiting_threads[0] = thr; - } - kmp_flag_native(volatile PtrType *p, PtrType c) - : kmp_flag(), loc(p), checker(c) {} - kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic *sloc) - : kmp_flag(sloc), loc(p), checker(c) {} - volatile PtrType *get() { return loc; } - void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); } - void set(volatile PtrType *new_loc) { loc = new_loc; } - PtrType load() { return *loc; } - void store(PtrType val) { *loc = val; } - /*! @result true if the flag object has been released. */ - virtual bool done_check() { - if (Sleepable && !(this->sleepLoc)) - return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) == - checker; - else - return traits_type::tcr(*(this->get())) == checker; - } - /*! @param old_loc in old value of flag - * @result true if the flag's old value indicates it was released. */ - virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; } - /*! @result true if the flag object is not yet released. - * Used in __kmp_wait_template like: - * @code - * while (flag.notdone_check()) { pause(); } - * @endcode */ - virtual bool notdone_check() { - return traits_type::tcr(*(this->get())) != checker; - } - /*! @result Actual flag value before release was applied. - * Trigger all waiting threads to run by modifying flag to release state. */ - void internal_release() { - (void)traits_type::test_then_add4((volatile PtrType *)this->get()); - } - /*! @result Actual flag value before sleep bit(s) set. - * Notes that there is at least one thread sleeping on the flag by setting - * sleep bit(s). */ - PtrType set_sleeping() { - if (this->sleepLoc) { - this->sleepLoc->store(true); - return *(this->get()); - } - return traits_type::test_then_or((volatile PtrType *)this->get(), - KMP_BARRIER_SLEEP_STATE); - } - /*! @result Actual flag value before sleep bit(s) cleared. - * Notes that there are no longer threads sleeping on the flag by clearing - * sleep bit(s). */ - void unset_sleeping() { - if (this->sleepLoc) { - this->sleepLoc->store(false); - return; - } - traits_type::test_then_and((volatile PtrType *)this->get(), - ~KMP_BARRIER_SLEEP_STATE); - } - /*! @param old_loc in old value of flag - * Test if there are threads sleeping on the flag's old value in old_loc. */ - bool is_sleeping_val(PtrType old_loc) { - if (this->sleepLoc) - return this->sleepLoc->load(); - return old_loc & KMP_BARRIER_SLEEP_STATE; - } - /*! Test whether there are threads sleeping on the flag. */ - bool is_sleeping() { - if (this->sleepLoc) - return this->sleepLoc->load(); - return is_sleeping_val(*(this->get())); - } - bool is_any_sleeping() { - if (this->sleepLoc) - return this->sleepLoc->load(); - return is_sleeping_val(*(this->get())); - } - kmp_uint8 *get_stolen() { return NULL; } -}; - -/*! Base class for wait/release atomic flag */ -template -class kmp_flag_atomic : public kmp_flag { -protected: - std::atomic *loc; /**< Pointer to flag location to wait on */ - PtrType checker; /**< Flag == checker means it has been released. */ +/*! + * Base class for wait/release atomic flag + */ +template class kmp_flag { + std::atomic

+ *loc; /**< Pointer to the flag storage that is modified by another thread + */ + flag_properties t; /**< "Type" of the flag in loc */ public: - typedef flag_traits traits_type; - typedef PtrType flag_t; - kmp_flag_atomic(std::atomic *p) : kmp_flag(), loc(p) {} - kmp_flag_atomic(std::atomic *p, kmp_info_t *thr) - : kmp_flag(1), loc(p) { - this->waiting_threads[0] = thr; - } - kmp_flag_atomic(std::atomic *p, PtrType c) - : kmp_flag(), loc(p), checker(c) {} - kmp_flag_atomic(std::atomic *p, PtrType c, std::atomic *sloc) - : kmp_flag(sloc), loc(p), checker(c) {} - /*! @result the pointer to the actual flag */ - std::atomic *get() { return loc; } - /*! @result void* pointer to the actual flag */ + typedef P flag_t; + kmp_flag(std::atomic

*p, flag_type ft) + : loc(p), t({(short unsigned int)ft, 0U}) {} + /*! + * @result the pointer to the actual flag + */ + std::atomic

*get() { return loc; } + /*! + * @result void* pointer to the actual flag + */ void *get_void_p() { return RCAST(void *, loc); } - /*! @param new_loc in set loc to point at new_loc */ - void set(std::atomic *new_loc) { loc = new_loc; } - /*! @result flag value */ - PtrType load() { return loc->load(std::memory_order_acquire); } - /*! @param val the new flag value to be stored */ - void store(PtrType val) { loc->store(val, std::memory_order_release); } - /*! @result true if the flag object has been released. */ - bool done_check() { - if (Sleepable && !(this->sleepLoc)) - return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker; - else - return this->load() == checker; - } - /*! @param old_loc in old value of flag - * @result true if the flag's old value indicates it was released. */ - bool done_check_val(PtrType old_loc) { return old_loc == checker; } - /*! @result true if the flag object is not yet released. - * Used in __kmp_wait_template like: - * @code - * while (flag.notdone_check()) { pause(); } - * @endcode */ - bool notdone_check() { return this->load() != checker; } - /*! @result Actual flag value before release was applied. - * Trigger all waiting threads to run by modifying flag to release state. */ - void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } - /*! @result Actual flag value before sleep bit(s) set. - * Notes that there is at least one thread sleeping on the flag by setting - * sleep bit(s). */ - PtrType set_sleeping() { - if (this->sleepLoc) { - this->sleepLoc->store(true); - return *(this->get()); - } - return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); - } - /*! @result Actual flag value before sleep bit(s) cleared. - * Notes that there are no longer threads sleeping on the flag by clearing - * sleep bit(s). */ - void unset_sleeping() { - if (this->sleepLoc) { - this->sleepLoc->store(false); - return; - } - KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); - } - /*! @param old_loc in old value of flag - * Test whether there are threads sleeping on flag's old value in old_loc. */ - bool is_sleeping_val(PtrType old_loc) { - if (this->sleepLoc) - return this->sleepLoc->load(); - return old_loc & KMP_BARRIER_SLEEP_STATE; - } - /*! Test whether there are threads sleeping on the flag. */ - bool is_sleeping() { - if (this->sleepLoc) - return this->sleepLoc->load(); - return is_sleeping_val(this->load()); - } - bool is_any_sleeping() { - if (this->sleepLoc) - return this->sleepLoc->load(); - return is_sleeping_val(this->load()); - } - kmp_uint8 *get_stolen() { return NULL; } + /*! + * @param new_loc in set loc to point at new_loc + */ + void set(std::atomic

*new_loc) { loc = new_loc; } + /*! + * @result the flag_type + */ + flag_type get_type() { return (flag_type)(t.type); } + /*! + * @result flag value + */ + P load() { return loc->load(std::memory_order_acquire); } + /*! + * @param val the new flag value to be stored + */ + void store(P val) { loc->store(val, std::memory_order_release); } + // Derived classes must provide the following: + /* + kmp_info_t * get_waiter(kmp_uint32 i); + kmp_uint32 get_num_waiters(); + bool done_check(); + bool done_check_val(P old_loc); + bool notdone_check(); + P internal_release(); + void suspend(int th_gtid); + void mwait(int th_gtid); + void resume(int th_gtid); + P set_sleeping(); + P unset_sleeping(); + bool is_sleeping(); + bool is_any_sleeping(); + bool is_sleeping_val(P old_loc); + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, + int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 + is_constrained); + */ }; #if OMPT_SUPPORT @@ -453,9 +264,8 @@ final_spin=FALSE) ompt_entry_state = this_thr->th.ompt_thread_info.state; if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { - ompt_lw_taskteam_t *team = NULL; - if (this_thr->th.th_team) - team = this_thr->th.th_team->t.ompt_serialized_team_info; + ompt_lw_taskteam_t *team = + this_thr->th.th_team->t.ompt_serialized_team_info; if (team) { tId = &(team->ompt_task_info.task_data); } else { @@ -530,11 +340,11 @@ final_spin=FALSE) disabled (KMP_TASKING=0). */ if (task_team != NULL) { if (TCR_SYNC_4(task_team->tt.tt_active)) { - if (KMP_TASKING_ENABLED(task_team)) { + if (KMP_TASKING_ENABLED(task_team)) flag->execute_tasks( this_thr, th_gtid, final_spin, &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); - } else + else this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; } else { KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); @@ -747,7 +557,6 @@ static inline void __kmp_mwait_template(int th_gtid, C *flag) { else { // if flag changes here, wake-up happens immediately TCW_PTR(th->th.th_sleep_loc, (void *)flag); - th->th.th_sleep_loc_type = flag->get_type(); __kmp_unlock_suspend_mx(th); KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid)); #if KMP_HAVE_UMWAIT @@ -765,7 +574,6 @@ static inline void __kmp_mwait_template(int th_gtid, C *flag) { if (flag->is_sleeping()) flag->unset_sleeping(); TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; } // Mark thread as active again th->th.th_active = TRUE; @@ -816,15 +624,251 @@ template static inline void __kmp_release_template(C *flag) { } } +template struct flag_traits {}; + +template <> struct flag_traits { + typedef kmp_uint32 flag_t; + static const flag_type t = flag32; + static inline flag_t tcr(flag_t f) { return TCR_4(f); } + static inline flag_t test_then_add4(volatile flag_t *f) { + return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); + } + static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { + return KMP_TEST_THEN_OR32(f, v); + } + static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { + return KMP_TEST_THEN_AND32(f, v); + } +}; + +template <> struct flag_traits { + typedef kmp_uint64 flag_t; + static const flag_type t = flag64; + static inline flag_t tcr(flag_t f) { return TCR_8(f); } + static inline flag_t test_then_add4(volatile flag_t *f) { + return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); + } + static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { + return KMP_TEST_THEN_OR64(f, v); + } + static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { + return KMP_TEST_THEN_AND64(f, v); + } +}; + +// Basic flag that does not use C11 Atomics +template +class kmp_basic_flag_native : public kmp_flag_native { + typedef flag_traits traits_type; + FlagType checker; /**< Value to compare flag to to check if flag has been + released. */ + kmp_info_t + *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ + kmp_uint32 + num_waiting_threads; /**< Number of threads sleeping on this thread. */ +public: + kmp_basic_flag_native(volatile FlagType *p) + : kmp_flag_native(p, traits_type::t), num_waiting_threads(0) {} + kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr) + : kmp_flag_native(p, traits_type::t), num_waiting_threads(1) { + waiting_threads[0] = thr; + } + kmp_basic_flag_native(volatile FlagType *p, FlagType c) + : kmp_flag_native(p, traits_type::t), checker(c), + num_waiting_threads(0) {} + /*! + * param i in index into waiting_threads + * @result the thread that is waiting at index i + */ + kmp_info_t *get_waiter(kmp_uint32 i) { + KMP_DEBUG_ASSERT(i < num_waiting_threads); + return waiting_threads[i]; + } + /*! + * @result num_waiting_threads + */ + kmp_uint32 get_num_waiters() { return num_waiting_threads; } + /*! + * @param thr in the thread which is now waiting + * + * Insert a waiting thread at index 0. + */ + void set_waiter(kmp_info_t *thr) { + waiting_threads[0] = thr; + num_waiting_threads = 1; + } + /*! + * @result true if the flag object has been released. + */ + bool done_check() { + if (Sleepable) + return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) == + checker; + else + return traits_type::tcr(*(this->get())) == checker; + } + /*! + * @param old_loc in old value of flag + * @result true if the flag's old value indicates it was released. + */ + bool done_check_val(FlagType old_loc) { return old_loc == checker; } + /*! + * @result true if the flag object is not yet released. + * Used in __kmp_wait_template like: + * @code + * while (flag.notdone_check()) { pause(); } + * @endcode + */ + bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } + /*! + * @result Actual flag value before release was applied. + * Trigger all waiting threads to run by modifying flag to release state. + */ + void internal_release() { + (void)traits_type::test_then_add4((volatile FlagType *)this->get()); + } + /*! + * @result Actual flag value before sleep bit(s) set. + * Notes that there is at least one thread sleeping on the flag by setting + * sleep bit(s). + */ + FlagType set_sleeping() { + return traits_type::test_then_or((volatile FlagType *)this->get(), + KMP_BARRIER_SLEEP_STATE); + } + /*! + * @result Actual flag value before sleep bit(s) cleared. + * Notes that there are no longer threads sleeping on the flag by clearing + * sleep bit(s). + */ + FlagType unset_sleeping() { + return traits_type::test_then_and((volatile FlagType *)this->get(), + ~KMP_BARRIER_SLEEP_STATE); + } + /*! + * @param old_loc in old value of flag + * Test whether there are threads sleeping on the flag's old value in old_loc. + */ + bool is_sleeping_val(FlagType old_loc) { + return old_loc & KMP_BARRIER_SLEEP_STATE; + } + /*! + * Test whether there are threads sleeping on the flag. + */ + bool is_sleeping() { return is_sleeping_val(*(this->get())); } + bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } + kmp_uint8 *get_stolen() { return NULL; } + enum barrier_type get_bt() { return bs_last_barrier; } +}; + +template +class kmp_basic_flag : public kmp_flag { + typedef flag_traits traits_type; + FlagType checker; /**< Value to compare flag to to check if flag has been + released. */ + kmp_info_t + *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ + kmp_uint32 + num_waiting_threads; /**< Number of threads sleeping on this thread. */ +public: + kmp_basic_flag(std::atomic *p) + : kmp_flag(p, traits_type::t), num_waiting_threads(0) {} + kmp_basic_flag(std::atomic *p, kmp_info_t *thr) + : kmp_flag(p, traits_type::t), num_waiting_threads(1) { + waiting_threads[0] = thr; + } + kmp_basic_flag(std::atomic *p, FlagType c) + : kmp_flag(p, traits_type::t), checker(c), + num_waiting_threads(0) {} + /*! + * param i in index into waiting_threads + * @result the thread that is waiting at index i + */ + kmp_info_t *get_waiter(kmp_uint32 i) { + KMP_DEBUG_ASSERT(i < num_waiting_threads); + return waiting_threads[i]; + } + /*! + * @result num_waiting_threads + */ + kmp_uint32 get_num_waiters() { return num_waiting_threads; } + /*! + * @param thr in the thread which is now waiting + * + * Insert a waiting thread at index 0. + */ + void set_waiter(kmp_info_t *thr) { + waiting_threads[0] = thr; + num_waiting_threads = 1; + } + /*! + * @result true if the flag object has been released. + */ + bool done_check() { + if (Sleepable) + return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker; + else + return this->load() == checker; + } + /*! + * @param old_loc in old value of flag + * @result true if the flag's old value indicates it was released. + */ + bool done_check_val(FlagType old_loc) { return old_loc == checker; } + /*! + * @result true if the flag object is not yet released. + * Used in __kmp_wait_template like: + * @code + * while (flag.notdone_check()) { pause(); } + * @endcode + */ + bool notdone_check() { return this->load() != checker; } + /*! + * @result Actual flag value before release was applied. + * Trigger all waiting threads to run by modifying flag to release state. + */ + void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); } + /*! + * @result Actual flag value before sleep bit(s) set. + * Notes that there is at least one thread sleeping on the flag by setting + * sleep bit(s). + */ + FlagType set_sleeping() { + return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE); + } + /*! + * @result Actual flag value before sleep bit(s) cleared. + * Notes that there are no longer threads sleeping on the flag by clearing + * sleep bit(s). + */ + FlagType unset_sleeping() { + return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE); + } + /*! + * @param old_loc in old value of flag + * Test whether there are threads sleeping on the flag's old value in old_loc. + */ + bool is_sleeping_val(FlagType old_loc) { + return old_loc & KMP_BARRIER_SLEEP_STATE; + } + /*! + * Test whether there are threads sleeping on the flag. + */ + bool is_sleeping() { return is_sleeping_val(this->load()); } + bool is_any_sleeping() { return is_sleeping_val(this->load()); } + kmp_uint8 *get_stolen() { return NULL; } + enum barrier_type get_bt() { return bs_last_barrier; } +}; + template -class kmp_flag_32 : public kmp_flag_atomic { +class kmp_flag_32 : public kmp_basic_flag { public: kmp_flag_32(std::atomic *p) - : kmp_flag_atomic(p) {} + : kmp_basic_flag(p) {} kmp_flag_32(std::atomic *p, kmp_info_t *thr) - : kmp_flag_atomic(p, thr) {} + : kmp_basic_flag(p, thr) {} kmp_flag_32(std::atomic *p, kmp_uint32 c) - : kmp_flag_atomic(p, c) {} + : kmp_basic_flag(p, c) {} void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); } @@ -851,16 +895,14 @@ class kmp_flag_32 : public kmp_flag_atomic { }; template -class kmp_flag_64 : public kmp_flag_native { +class kmp_flag_64 : public kmp_basic_flag_native { public: kmp_flag_64(volatile kmp_uint64 *p) - : kmp_flag_native(p) {} + : kmp_basic_flag_native(p) {} kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) - : kmp_flag_native(p, thr) {} + : kmp_basic_flag_native(p, thr) {} kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) - : kmp_flag_native(p, c) {} - kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic *loc) - : kmp_flag_native(p, c, loc) {} + : kmp_basic_flag_native(p, c) {} void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); } @@ -886,52 +928,20 @@ class kmp_flag_64 : public kmp_flag_native { flag_type get_ptr_type() { return flag64; } }; -template -class kmp_atomic_flag_64 - : public kmp_flag_atomic { -public: - kmp_atomic_flag_64(std::atomic *p) - : kmp_flag_atomic(p) {} - kmp_atomic_flag_64(std::atomic *p, kmp_info_t *thr) - : kmp_flag_atomic(p, thr) {} - kmp_atomic_flag_64(std::atomic *p, kmp_uint64 c) - : kmp_flag_atomic(p, c) {} - kmp_atomic_flag_64(std::atomic *p, kmp_uint64 c, - std::atomic *loc) - : kmp_flag_atomic(p, c, loc) {} - void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); } - void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); } - void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, - int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), - kmp_int32 is_constrained) { - return __kmp_atomic_execute_tasks_64( - this_thr, gtid, this, final_spin, - thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - bool wait(kmp_info_t *this_thr, - int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { - if (final_spin) - return __kmp_wait_template( - this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); - else - return __kmp_wait_template( - this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - flag_type get_ptr_type() { return atomic_flag64; } -}; - // Hierarchical 64-bit on-core barrier instantiation -class kmp_flag_oncore : public kmp_flag_native { - kmp_uint32 offset; /**< Portion of flag of interest for an operation. */ +class kmp_flag_oncore : public kmp_flag_native { + kmp_uint64 checker; + kmp_info_t *waiting_threads[1]; + kmp_uint32 num_waiting_threads; + kmp_uint32 + offset; /**< Portion of flag that is of interest for an operation. */ bool flag_switch; /**< Indicates a switch in flag location. */ enum barrier_type bt; /**< Barrier type. */ - kmp_info_t *this_thr; /**< Thread to redirect to different flag location. */ + kmp_info_t *this_thr; /**< Thread that may be redirected to different flag + location. */ #if USE_ITT_BUILD - void *itt_sync_obj; /**< ITT object to pass to new flag location. */ + void * + itt_sync_obj; /**< ITT object that must be passed to new flag location. */ #endif unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; @@ -939,17 +949,26 @@ class kmp_flag_oncore : public kmp_flag_native { public: kmp_flag_oncore(volatile kmp_uint64 *p) - : kmp_flag_native(p), flag_switch(false) { - } + : kmp_flag_native(p, flag_oncore), num_waiting_threads(0), + flag_switch(false) {} kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) - : kmp_flag_native(p), offset(idx), - flag_switch(false), bt(bs_last_barrier), itt_sync_obj(nullptr) {} + : kmp_flag_native(p, flag_oncore), num_waiting_threads(0), + offset(idx), flag_switch(false) {} kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t, kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt)) - : kmp_flag_native(p, c), offset(idx), - flag_switch(false), bt(bar_t), + : kmp_flag_native(p, flag_oncore), checker(c), + num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t), this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {} + kmp_info_t *get_waiter(kmp_uint32 i) { + KMP_DEBUG_ASSERT(i < num_waiting_threads); + return waiting_threads[i]; + } + kmp_uint32 get_num_waiters() { return num_waiting_threads; } + void set_waiter(kmp_info_t *thr) { + waiting_threads[0] = thr; + num_waiting_threads = 1; + } bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc, offset) == checker; } @@ -978,6 +997,17 @@ class kmp_flag_oncore : public kmp_flag_native { KMP_TEST_THEN_OR64(get(), mask); } } + kmp_uint64 set_sleeping() { + return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE); + } + kmp_uint64 unset_sleeping() { + return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE); + } + bool is_sleeping_val(kmp_uint64 old_loc) { + return old_loc & KMP_BARRIER_SLEEP_STATE; + } + bool is_sleeping() { return is_sleeping_val(*get()); } + bool is_any_sleeping() { return is_sleeping_val(*get()); } void wait(kmp_info_t *this_thr, int final_spin) { if (final_spin) __kmp_wait_template( @@ -1008,39 +1038,27 @@ class kmp_flag_oncore : public kmp_flag_native { thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); #endif } + kmp_uint8 *get_stolen() { return NULL; } enum barrier_type get_bt() { return bt; } flag_type get_ptr_type() { return flag_oncore; } }; -static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) { - int gtid = __kmp_gtid_from_thread(thr); - void *flag = CCAST(void *, thr->th.th_sleep_loc); - flag_type type = thr->th.th_sleep_loc_type; +// Used to wake up threads, volatile void* flag is usually the th_sleep_loc +// associated with int gtid. +static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { if (!flag) return; - // Attempt to wake up a thread: examine its type and call appropriate template - switch (type) { + + switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) { case flag32: - __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag)); + __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL); break; case flag64: - __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag)); - break; - case atomic_flag64: - __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag)); + __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL); break; case flag_oncore: - __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag)); - break; -#ifdef KMP_DEBUG - case flag_unset: - KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type)); + __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL); break; - default: - KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any " - "known flag type\n", - type)); -#endif } } diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 920de698ac43e..bd50987a857ac 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -1409,13 +1409,9 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread gets called first? */ old_spin = flag->set_sleeping(); - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - th->th.th_sleep_loc_type = flag->get_type(); if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && __kmp_pause_status != kmp_soft_paused) { flag->unset_sleeping(); - TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; __kmp_unlock_suspend_mx(th); return; } @@ -1423,10 +1419,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { " was %x\n", th_gtid, flag->get(), flag->load(), old_spin)); - if (flag->done_check_val(old_spin) || flag->done_check()) { - flag->unset_sleeping(); - TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; + if (flag->done_check_val(old_spin)) { + old_spin = flag->unset_sleeping(); KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit " "for spin(%p)\n", th_gtid, flag->get())); @@ -1435,6 +1429,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { "with low probability" return when the condition variable has not been signaled or broadcast */ int deactivated = FALSE; + TCW_PTR(th->th.th_sleep_loc, (void *)flag); while (flag->is_sleeping()) { #ifdef DEBUG_SUSPEND @@ -1456,9 +1451,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { deactivated = TRUE; } - KMP_DEBUG_ASSERT(th->th.th_sleep_loc); - KMP_DEBUG_ASSERT(flag->get_type() == th->th.th_sleep_loc_type); - #if USE_SUSPEND_TIMEOUT struct timespec now; struct timeval tval; @@ -1488,18 +1480,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) { KMP_SYSFAIL("pthread_cond_wait", status); } - - KMP_DEBUG_ASSERT(flag->get_type() == flag->get_ptr_type()); - - if (!flag->is_sleeping() && - ((status == EINTR) || (status == ETIMEDOUT))) { - // if interrupt or timeout, and thread is no longer sleeping, we need to - // make sure sleep_loc gets reset; however, this shouldn't be needed if - // we woke up with resume - flag->unset_sleeping(); - TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; - } #ifdef KMP_DEBUG if (status == ETIMEDOUT) { if (flag->is_sleeping()) { @@ -1509,8 +1489,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit " "not set!\n", th_gtid)); - TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; } } else if (flag->is_sleeping()) { KF_TRACE(100, @@ -1528,13 +1506,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { } } } - // We may have had the loop variable set before entering the loop body; - // so we need to reset sleep_loc. - TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; - - KMP_DEBUG_ASSERT(!flag->is_sleeping()); - KMP_DEBUG_ASSERT(!th->th.th_sleep_loc); #ifdef DEBUG_SUSPEND { char buffer[128]; @@ -1556,10 +1527,6 @@ template void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { __kmp_suspend_template(th_gtid, flag); } -template -void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64 *flag) { - __kmp_suspend_template(th_gtid, flag); -} void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { __kmp_suspend_template(th_gtid, flag); } @@ -1567,10 +1534,6 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { template void __kmp_suspend_32(int, kmp_flag_32 *); template void __kmp_suspend_64(int, kmp_flag_64 *); template void __kmp_suspend_64(int, kmp_flag_64 *); -template void -__kmp_atomic_suspend_64(int, kmp_atomic_flag_64 *); -template void -__kmp_atomic_suspend_64(int, kmp_atomic_flag_64 *); /* This routine signals the thread specified by target_gtid to wake up after setting the sleep bit indicated by the flag argument to FALSE. @@ -1593,50 +1556,36 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { __kmp_lock_suspend_mx(th); - if (!flag || flag != th->th.th_sleep_loc) { - // coming from __kmp_null_resume_wrapper, or thread is now sleeping on a - // different location; wake up at new location + if (!flag) { // coming from __kmp_null_resume_wrapper flag = (C *)CCAST(void *, th->th.th_sleep_loc); } // First, check if the flag is null or its type has changed. If so, someone // else woke it up. - if (!flag) { // Thread doesn't appear to be sleeping on anything + if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type + // simply shows what flag was cast to KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " "awake: flag(%p)\n", - gtid, target_gtid, (void *)NULL)); - __kmp_unlock_suspend_mx(th); - return; - } else if (flag->get_type() != th->th.th_sleep_loc_type) { - // Flag type does not appear to match this function template; possibly the - // thread is sleeping on something else. Try null resume again. - KF_TRACE( - 5, - ("__kmp_resume_template: T#%d retrying, thread T#%d Mismatch flag(%p), " - "spin(%p) type=%d ptr_type=%d\n", - gtid, target_gtid, flag, flag->get(), flag->get_type(), - th->th.th_sleep_loc_type)); + gtid, target_gtid, NULL)); __kmp_unlock_suspend_mx(th); - __kmp_null_resume_wrapper(th); return; } else { // if multiple threads are sleeping, flag should be internally // referring to a specific thread here - if (!flag->is_sleeping()) { + typename C::flag_t old_spin = flag->unset_sleeping(); + if (!flag->is_sleeping_val(old_spin)) { KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " - "awake: flag(%p): %u\n", - gtid, target_gtid, flag->get(), (unsigned int)flag->load())); + "awake: flag(%p): " + "%u => %u\n", + gtid, target_gtid, flag->get(), old_spin, flag->load())); __kmp_unlock_suspend_mx(th); return; } + KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset " + "sleep bit for flag's loc(%p): " + "%u => %u\n", + gtid, target_gtid, flag->get(), old_spin, flag->load())); } - KMP_DEBUG_ASSERT(flag); - flag->unset_sleeping(); TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; - - KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset " - "sleep bit for flag's loc(%p): %u\n", - gtid, target_gtid, flag->get(), (unsigned int)flag->load())); #ifdef DEBUG_SUSPEND { @@ -1662,19 +1611,12 @@ template void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { __kmp_resume_template(target_gtid, flag); } -template -void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64 *flag) { - __kmp_resume_template(target_gtid, flag); -} void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { __kmp_resume_template(target_gtid, flag); } template void __kmp_resume_32(int, kmp_flag_32 *); -template void __kmp_resume_32(int, kmp_flag_32 *); template void __kmp_resume_64(int, kmp_flag_64 *); -template void -__kmp_atomic_resume_64(int, kmp_atomic_flag_64 *); #if KMP_USE_MONITOR void __kmp_resume_monitor() { diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp index 0a0801c7ece2f..320920283c9dc 100644 --- a/openmp/runtime/src/z_Windows_NT_util.cpp +++ b/openmp/runtime/src/z_Windows_NT_util.cpp @@ -240,12 +240,13 @@ static void __kmp_win32_cond_wait(kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, continue; } // condition fulfilled, exiting - flag->unset_sleeping(); + old_f = flag->unset_sleeping(); + KMP_DEBUG_ASSERT(old_f & KMP_BARRIER_SLEEP_STATE); TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; - KF_TRACE(50, ("__kmp_win32_cond_wait: exiting, condition " - "fulfilled: flag's loc(%p): %u\n", - flag->get(), (unsigned int)flag->load())); + KF_TRACE(50, + ("__kmp_win32_cond_wait: exiting, condition " + "fulfilled: flag's loc(%p): %u => %u\n", + flag->get(), (unsigned int)old_f, (unsigned int)flag->load())); __kmp_win32_mutex_lock(&cv->waiters_count_lock_); KMP_DEBUG_ASSERT(cv->waiters_count_ > 0); @@ -375,13 +376,9 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread gets called first? */ old_spin = flag->set_sleeping(); - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - th->th.th_sleep_loc_type = flag->get_type(); if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && __kmp_pause_status != kmp_soft_paused) { flag->unset_sleeping(); - TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; __kmp_unlock_suspend_mx(th); return; } @@ -390,10 +387,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { " loc(%p)==%u\n", th_gtid, flag->get(), (unsigned int)flag->load())); - if (flag->done_check_val(old_spin) || flag->done_check()) { - flag->unset_sleeping(); - TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; + if (flag->done_check_val(old_spin)) { + old_spin = flag->unset_sleeping(); KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit " "for flag's loc(%p)\n", th_gtid, flag->get())); @@ -405,7 +400,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { low probability" return when the condition variable has not been signaled or broadcast */ int deactivated = FALSE; - + TCW_PTR(th->th.th_sleep_loc, (void *)flag); while (flag->is_sleeping()) { KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform " "kmp_win32_cond_wait()\n", @@ -420,14 +415,13 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); } deactivated = TRUE; + __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th, + flag); + } else { + __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th, + flag); } - KMP_DEBUG_ASSERT(th->th.th_sleep_loc); - KMP_DEBUG_ASSERT(th->th.th_sleep_loc_type == flag->get_type()); - - __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th, - flag); - #ifdef KMP_DEBUG if (flag->is_sleeping()) { KF_TRACE(100, @@ -437,14 +431,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) { } // while - // We may have had the loop variable set before entering the loop body; - // so we need to reset sleep_loc. - TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; - - KMP_DEBUG_ASSERT(!flag->is_sleeping()); - KMP_DEBUG_ASSERT(!th->th.th_sleep_loc); - // Mark the thread as active again (if it was previous marked as inactive) if (deactivated) { th->th.th_active = TRUE; @@ -467,10 +453,6 @@ template void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { __kmp_suspend_template(th_gtid, flag); } -template -void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64 *flag) { - __kmp_suspend_template(th_gtid, flag); -} void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { __kmp_suspend_template(th_gtid, flag); } @@ -478,10 +460,6 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { template void __kmp_suspend_32(int, kmp_flag_32 *); template void __kmp_suspend_64(int, kmp_flag_64 *); template void __kmp_suspend_64(int, kmp_flag_64 *); -template void -__kmp_atomic_suspend_64(int, kmp_atomic_flag_64 *); -template void -__kmp_atomic_suspend_64(int, kmp_atomic_flag_64 *); /* This routine signals the thread specified by target_gtid to wake up after setting the sleep bit indicated by the flag argument to FALSE */ @@ -499,35 +477,32 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) { __kmp_suspend_initialize_thread(th); __kmp_lock_suspend_mx(th); - if (!flag || flag != th->th.th_sleep_loc) { - // coming from __kmp_null_resume_wrapper, or thread is now sleeping on a - // different location; wake up at new location + if (!flag) { // coming from __kmp_null_resume_wrapper flag = (C *)th->th.th_sleep_loc; } // First, check if the flag is null or its type has changed. If so, someone // else woke it up. - if (!flag || flag->get_type() != th->th.th_sleep_loc_type) { - // simply shows what flag was cast to + if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type + // simply shows what + // flag was cast to KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " "awake: flag's loc(%p)\n", gtid, target_gtid, NULL)); __kmp_unlock_suspend_mx(th); return; } else { - if (!flag->is_sleeping()) { + typename C::flag_t old_spin = flag->unset_sleeping(); + if (!flag->is_sleeping_val(old_spin)) { KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " - "awake: flag's loc(%p): %u\n", - gtid, target_gtid, flag->get(), (unsigned int)flag->load())); + "awake: flag's loc(%p): %u => %u\n", + gtid, target_gtid, flag->get(), (unsigned int)old_spin, + (unsigned int)flag->load())); __kmp_unlock_suspend_mx(th); return; } } - KMP_DEBUG_ASSERT(flag); - flag->unset_sleeping(); TCW_PTR(th->th.th_sleep_loc, NULL); - th->th.th_sleep_loc_type = flag_unset; - KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep " "bit for flag's loc(%p)\n", gtid, target_gtid, flag->get())); @@ -548,19 +523,12 @@ template void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { __kmp_resume_template(target_gtid, flag); } -template -void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64 *flag) { - __kmp_resume_template(target_gtid, flag); -} void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { __kmp_resume_template(target_gtid, flag); } template void __kmp_resume_32(int, kmp_flag_32 *); -template void __kmp_resume_32(int, kmp_flag_32 *); template void __kmp_resume_64(int, kmp_flag_64 *); -template void -__kmp_atomic_resume_64(int, kmp_atomic_flag_64 *); void __kmp_yield() { Sleep(0); } diff --git a/openmp/runtime/test/barrier/omp_barrier.c b/openmp/runtime/test/barrier/omp_barrier.c index 052784e0de397..3da70db928905 100644 --- a/openmp/runtime/test/barrier/omp_barrier.c +++ b/openmp/runtime/test/barrier/omp_barrier.c @@ -2,8 +2,6 @@ // RUN: %libomp-compile && env KMP_BLOCKTIME=infinite %libomp-run // RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run // RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run -// RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='dist,dist' KMP_FORKJOIN_BARRIER_PATTERN='dist,dist' KMP_REDUCTION_BARRIER_PATTERN='dist,dist' %libomp-run -// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='dist,dist' KMP_FORKJOIN_BARRIER_PATTERN='dist,dist' KMP_REDUCTION_BARRIER_PATTERN='dist,dist' %libomp-run #include #include "omp_testsuite.h" #include "omp_my_sleep.h" From bf9dcb4cd21513a2b90db8af4f7f0e6769afe205 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 29 Jun 2021 16:44:16 +0200 Subject: [PATCH 186/619] [mlir] silence -Wunused-variable in Linalg comprehensive bufferize --- mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index 3875965e9b92c..996e90662a454 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -1605,6 +1605,7 @@ bufferizableInPlaceAnalysis(OpOperand &operand, OpResult result, Operation *op = result.getDefiningOp(); assert(result && !isa(op) && "expected OpResult not coming from a ExtractSliceOp"); + (void)op; int64_t resultNumber = result.getResultNumber(); (void)resultNumber; From e63b18bc84a27718266f5c838a572ba423f70a2c Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Tue, 29 Jun 2021 15:40:43 +0100 Subject: [PATCH 187/619] Catch an extremely obvious memory leak, thanks asan https://lab.llvm.org/buildbot/#/builders/5/builds/9208 (dbg-phis-merging-in-ldv.mir and dbg-phis-with-loops.mir in the asan check stage) --- llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 8e0588241bb2b..75e551b665185 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -3574,6 +3574,9 @@ class LDVSSAUpdater { LDVSSAUpdater(LocIdx L, ValueIDNum **MLiveIns) : Loc(L), MLiveIns(MLiveIns) {} void reset() { + for (auto &Block : BlockMap) + delete Block.second; + PHIs.clear(); UndefMap.clear(); BlockMap.clear(); From 7756216547e5cb5eca429e9d7d467058a1472a48 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 22 Jun 2021 13:43:48 -0400 Subject: [PATCH 188/619] [libc++] NFCI: Remove code duplication and obsolete declarations in wrap_iter Differential Revision: https://reviews.llvm.org/D105040 --- libcxx/include/__algorithm/inplace_merge.h | 5 +- libcxx/include/__iterator/wrap_iter.h | 217 +++------------------ 2 files changed, 35 insertions(+), 187 deletions(-) diff --git a/libcxx/include/__algorithm/inplace_merge.h b/libcxx/include/__algorithm/inplace_merge.h index 24ad36300fd04..c74633a74cf39 100644 --- a/libcxx/include/__algorithm/inplace_merge.h +++ b/libcxx/include/__algorithm/inplace_merge.h @@ -10,11 +10,12 @@ #define _LIBCPP___ALGORITHM_INPLACE_MERGE_H #include <__config> -#include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> -#include <__algorithm/rotate.h> +#include <__algorithm/comp.h> #include <__algorithm/lower_bound.h> #include <__algorithm/min.h> +#include <__algorithm/move.h> +#include <__algorithm/rotate.h> #include <__algorithm/upper_bound.h> #include <__iterator/iterator_traits.h> #include <__utility/swap.h> diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h index d3e6f1e9ef518..4f2228c893d76 100644 --- a/libcxx/include/__iterator/wrap_iter.h +++ b/libcxx/include/__iterator/wrap_iter.h @@ -25,61 +25,6 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -template class __wrap_iter; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator==(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator!=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -#ifndef _LIBCPP_CXX03_LANG -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -auto -operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT --> decltype(__x.base() - __y.base()); -#else -template -_LIBCPP_INLINE_VISIBILITY -typename __wrap_iter<_Iter1>::difference_type -operator-(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; -#endif - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -__wrap_iter<_Iter> -operator+(typename __wrap_iter<_Iter>::difference_type, __wrap_iter<_Iter>) _NOEXCEPT; - -template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy(_Ip, _Ip, _Op); -template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 copy_backward(_B1, _B1, _B2); -template _Op _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move(_Ip, _Ip, _Op); -template _B2 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 move_backward(_B1, _B1, _B2); - template class __wrap_iter { @@ -217,80 +162,18 @@ class __wrap_iter template friend class basic_string; template friend class _LIBCPP_TEMPLATE_VIS vector; template friend class _LIBCPP_TEMPLATE_VIS span; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator==(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator<(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator!=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator>(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator>=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - bool - operator<=(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; - -#ifndef _LIBCPP_CXX03_LANG - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - auto - operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT - -> decltype(__x.base() - __y.base()); -#else - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - typename __wrap_iter<_Iter1>::difference_type - operator-(const __wrap_iter<_Iter1>&, const __wrap_iter<_Iter2>&) _NOEXCEPT; -#endif - - template - _LIBCPP_CONSTEXPR_IF_NODEBUG friend - __wrap_iter<_Iter1> - operator+(typename __wrap_iter<_Iter1>::difference_type, __wrap_iter<_Iter1>) _NOEXCEPT; }; -#if _LIBCPP_STD_VER <= 17 -template -struct __is_cpp17_contiguous_iterator<__wrap_iter<_It> > : true_type {}; -#endif - -template -_LIBCPP_CONSTEXPR -decltype(_VSTD::__to_address(declval<_Iter>())) -__to_address(__wrap_iter<_Iter> __w) _NOEXCEPT { - return _VSTD::__to_address(__w.base()); -} - template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return __x.base() == __y.base(); } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), @@ -300,87 +183,42 @@ operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return !(__x == __y); } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return __y < __x; } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return !(__x < __y); } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -{ - return !(__y < __x); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT -{ - return !(__x == __y); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT -{ - return __y < __x; -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT -{ - return !(__x < __y); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -bool -operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return !(__y < __x); } -#ifndef _LIBCPP_CXX03_LANG template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -auto -operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT --> decltype(__x.base() - __y.base()) -{ -#if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), - "Attempted to subtract incompatible iterators"); -#endif - return __x.base() - __y.base(); -} +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +#ifndef _LIBCPP_CXX03_LANG +auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT + -> decltype(__x.base() - __y.base()) #else -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG typename __wrap_iter<_Iter1>::difference_type operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT +#endif // C++03 { #if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), @@ -388,18 +226,27 @@ operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC #endif return __x.base() - __y.base(); } -#endif -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG -__wrap_iter<_Iter> -operator+(typename __wrap_iter<_Iter>::difference_type __n, - __wrap_iter<_Iter> __x) _NOEXCEPT +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +__wrap_iter<_Iter1> operator+(typename __wrap_iter<_Iter1>::difference_type __n, __wrap_iter<_Iter1> __x) _NOEXCEPT { __x += __n; return __x; } +#if _LIBCPP_STD_VER <= 17 +template +struct __is_cpp17_contiguous_iterator<__wrap_iter<_It> > : true_type {}; +#endif + +template +_LIBCPP_CONSTEXPR +decltype(_VSTD::__to_address(declval<_Iter>())) +__to_address(__wrap_iter<_Iter> __w) _NOEXCEPT { + return _VSTD::__to_address(__w.base()); +} + _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS From 58a230455bbc680fc12641d9231dce5dfb907e91 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 1 Jun 2021 17:16:11 -0400 Subject: [PATCH 189/619] [libc++] Serialize Lit parameters to make them available to from-scratch configs Before this patch, Lit parameters that were set as a result of CMake options were not made available to from-scratch configs. This patch serializes those parameters into the generated lit config file so that they are available to all configs. Differential Revision: https://reviews.llvm.org/D105047 --- libcxx/test/CMakeLists.txt | 33 ++++++++++++++++--- libcxx/test/configs/legacy.cfg.in | 9 ++--- .../test/configs/libcxx-trunk-shared.cfg.in | 5 ++- .../test/configs/libcxx-trunk-static.cfg.in | 5 ++- libcxxabi/test/CMakeLists.txt | 21 +++++++++++- libcxxabi/test/lit.site.cfg.in | 7 ++-- libunwind/test/CMakeLists.txt | 16 +++++++++ libunwind/test/libunwind/test/config.py | 2 +- libunwind/test/lit.site.cfg.in | 6 ++-- 9 files changed, 76 insertions(+), 28 deletions(-) diff --git a/libcxx/test/CMakeLists.txt b/libcxx/test/CMakeLists.txt index 429b7905fdce6..71a7d16772d35 100644 --- a/libcxx/test/CMakeLists.txt +++ b/libcxx/test/CMakeLists.txt @@ -55,9 +55,6 @@ if(LIBCXX_INCLUDE_TESTS AND NOT LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXX AND NOT LIB message(FATAL_ERROR "LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXX being OFF requires LIBCXX_ENABLE_STATIC to be ON") endif() -pythonize_bool(LIBCXX_ENABLE_EXCEPTIONS) -pythonize_bool(LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY) -pythonize_bool(LIBCXX_ENABLE_RTTI) pythonize_bool(LIBCXX_ENABLE_SHARED) pythonize_bool(LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXX) pythonize_bool(LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXXABI) @@ -71,7 +68,6 @@ pythonize_bool(LIBCXX_HAS_ATOMIC_LIB) pythonize_bool(LIBCXX_HAVE_CXX_ATOMICS_WITH_LIB) pythonize_bool(LIBCXX_BUILD_EXTERNAL_THREAD_LIBRARY) pythonize_bool(LIBCXX_DEBUG_BUILD) -pythonize_bool(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT) pythonize_bool(LIBCXX_ENABLE_PARALLEL_ALGORITHMS) # By default, for non-standalone builds, libcxx and libcxxabi share a library @@ -87,6 +83,35 @@ set(LIBCXX_EXECUTOR "\\\"${Python3_EXECUTABLE}\\\" ${CMAKE_CURRENT_LIST_DIR}/../ "Executor to use when running tests.") set(AUTO_GEN_COMMENT "## Autogenerated by libcxx configuration.\n# Do not edit!") +set(SERIALIZED_LIT_PARAMS "# Lit parameters serialized here for llvm-lit to pick them up\n") + +macro(serialize_lit_param param value) + string(APPEND SERIALIZED_LIT_PARAMS "config.${param} = ${value}\n") +endmacro() + +if (NOT LIBCXX_ENABLE_EXCEPTIONS) + serialize_lit_param(enable_exceptions False) +endif() + +if (NOT LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY) + serialize_lit_param(enable_experimental False) +endif() + +if (NOT LIBCXX_ENABLE_RTTI) + serialize_lit_param(enable_rtti False) +endif() + +if (NOT LIBCXX_ENABLE_DEBUG_MODE_SUPPORT) + serialize_lit_param(enable_debug_tests False) +endif() + +if (TARGET_TRIPLE) + serialize_lit_param(target_triple "\"${TARGET_TRIPLE}\"") +endif() + +if (LLVM_USE_SANITIZER) + serialize_lit_param(use_sanitizer "\"${LLVM_USE_SANITIZER}\"") +endif() if (NOT DEFINED LIBCXX_TEST_DEPS) message(FATAL_ERROR "Expected LIBCXX_TEST_DEPS to be defined") diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in index f9737e85e0dc6..adb813644fec7 100644 --- a/libcxx/test/configs/legacy.cfg.in +++ b/libcxx/test/configs/legacy.cfg.in @@ -1,5 +1,7 @@ @AUTO_GEN_COMMENT@ +@SERIALIZED_LIT_PARAMS@ + import os import site @@ -10,18 +12,11 @@ config.libcxx_src_root = "@LIBCXX_SOURCE_DIR@" config.libcxx_obj_root = "@LIBCXX_BINARY_DIR@" config.cxx_library_root = "@LIBCXX_LIBRARY_DIR@" config.abi_library_root = "@LIBCXX_CXX_ABI_LIBRARY_PATH@" -config.enable_exceptions = @LIBCXX_ENABLE_EXCEPTIONS@ -config.enable_debug_tests = @LIBCXX_ENABLE_DEBUG_MODE_SUPPORT@ -config.enable_experimental = @LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY@ -config.enable_rtti = @LIBCXX_ENABLE_RTTI@ config.enable_shared = @LIBCXX_LINK_TESTS_WITH_SHARED_LIBCXX@ config.enable_32bit = @LIBCXX_BUILD_32_BITS@ config.cxx_abi = "@LIBCXX_CXX_ABI_LIBNAME@" -config.use_sanitizer = "@LLVM_USE_SANITIZER@" config.configuration_variant = "@LIBCXX_LIT_VARIANT@" config.host_triple = "@LLVM_HOST_TRIPLE@" -if "@TARGET_TRIPLE@": - config.target_triple = "@TARGET_TRIPLE@" config.sysroot = "@LIBCXX_SYSROOT@" config.gcc_toolchain = "@LIBCXX_GCC_TOOLCHAIN@" config.generate_coverage = @LIBCXX_GENERATE_COVERAGE@ diff --git a/libcxx/test/configs/libcxx-trunk-shared.cfg.in b/libcxx/test/configs/libcxx-trunk-shared.cfg.in index 127d824f3ecf8..4bb4c43dc752e 100644 --- a/libcxx/test/configs/libcxx-trunk-shared.cfg.in +++ b/libcxx/test/configs/libcxx-trunk-shared.cfg.in @@ -1,11 +1,12 @@ @AUTO_GEN_COMMENT@ +@SERIALIZED_LIT_PARAMS@ + LIBCXX_ROOT = "@LIBCXX_SOURCE_DIR@" INSTALL_ROOT = "@CMAKE_BINARY_DIR@" COMPILER = "@CMAKE_CXX_COMPILER@" EXEC_ROOT = "@LIBCXX_BINARY_DIR@" CMAKE_OSX_SYSROOT = "@CMAKE_OSX_SYSROOT@" -TARGET_TRIPLE = "@TARGET_TRIPLE@" import os import pipes @@ -23,8 +24,6 @@ config.test_source_root = os.path.join(LIBCXX_ROOT, 'test') config.test_format = libcxx.test.format.CxxStandardLibraryTest() config.recursiveExpansionLimit = 10 config.test_exec_root = EXEC_ROOT -if TARGET_TRIPLE: - config.target_triple = TARGET_TRIPLE # Configure basic substitutions runPy = os.path.join(LIBCXX_ROOT, 'utils', 'run.py') diff --git a/libcxx/test/configs/libcxx-trunk-static.cfg.in b/libcxx/test/configs/libcxx-trunk-static.cfg.in index 4a6b4f19ec458..5a111e05fe956 100644 --- a/libcxx/test/configs/libcxx-trunk-static.cfg.in +++ b/libcxx/test/configs/libcxx-trunk-static.cfg.in @@ -1,11 +1,12 @@ @AUTO_GEN_COMMENT@ +@SERIALIZED_LIT_PARAMS@ + LIBCXX_ROOT = "@LIBCXX_SOURCE_DIR@" INSTALL_ROOT = "@CMAKE_BINARY_DIR@" COMPILER = "@CMAKE_CXX_COMPILER@" EXEC_ROOT = "@LIBCXX_BINARY_DIR@" CMAKE_OSX_SYSROOT = "@CMAKE_OSX_SYSROOT@" -TARGET_TRIPLE = "@TARGET_TRIPLE@" import os import pipes @@ -23,8 +24,6 @@ config.test_source_root = os.path.join(LIBCXX_ROOT, 'test') config.test_format = libcxx.test.format.CxxStandardLibraryTest() config.recursiveExpansionLimit = 10 config.test_exec_root = EXEC_ROOT -if TARGET_TRIPLE: - config.target_triple = TARGET_TRIPLE # Configure basic substitutions runPy = os.path.join(LIBCXX_ROOT, 'utils', 'run.py') diff --git a/libcxxabi/test/CMakeLists.txt b/libcxxabi/test/CMakeLists.txt index d85dc412ed744..b571a2437cc8f 100644 --- a/libcxxabi/test/CMakeLists.txt +++ b/libcxxabi/test/CMakeLists.txt @@ -41,7 +41,6 @@ pythonize_bool(LIBCXXABI_BUILD_32_BITS) pythonize_bool(LIBCXX_ENABLE_SHARED) pythonize_bool(LIBCXXABI_ENABLE_SHARED) pythonize_bool(LIBCXXABI_ENABLE_THREADS) -pythonize_bool(LIBCXXABI_ENABLE_EXCEPTIONS) pythonize_bool(LIBCXXABI_USE_LLVM_UNWINDER) pythonize_bool(LIBCXXABI_USE_COMPILER_RT) pythonize_bool(LIBCXXABI_BUILD_EXTERNAL_THREAD_LIBRARY) @@ -71,6 +70,26 @@ if (NOT LIBCXXABI_STANDALONE_BUILD) endif() set(AUTO_GEN_COMMENT "## Autogenerated by libcxxabi configuration.\n# Do not edit!") +set(SERIALIZED_LIT_PARAMS "# Lit parameters serialized here for llvm-lit to pick them up\n") + +macro(serialize_lit_param param value) + string(APPEND SERIALIZED_LIT_PARAMS "config.${param} = ${value}\n") +endmacro() + +if (NOT LIBCXXABI_ENABLE_EXCEPTIONS) + serialize_lit_param(enable_exceptions False) +endif() + +serialize_lit_param(enable_experimental False) + +if (LLVM_USE_SANITIZER) + serialize_lit_param(use_sanitizer "\"${LLVM_USE_SANITIZER}\"") +endif() + +if (TARGET_TRIPLE) + serialize_lit_param(target_triple "\"${TARGET_TRIPLE}\"") +endif() + configure_lit_site_cfg( "${LIBCXXABI_TEST_CONFIG}" ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg diff --git a/libcxxabi/test/lit.site.cfg.in b/libcxxabi/test/lit.site.cfg.in index 6c4e944de556d..4015501de482e 100644 --- a/libcxxabi/test/lit.site.cfg.in +++ b/libcxxabi/test/lit.site.cfg.in @@ -1,5 +1,7 @@ @AUTO_GEN_COMMENT@ +@SERIALIZED_LIT_PARAMS@ + import os import site @@ -16,20 +18,15 @@ config.cxx_library_root = "@LIBCXXABI_LIBCXX_LIBRARY_PATH@" config.llvm_unwinder = @LIBCXXABI_USE_LLVM_UNWINDER@ config.builtins_library = "@LIBCXXABI_BUILTINS_LIBRARY@" config.enable_threads = @LIBCXXABI_ENABLE_THREADS@ -config.use_sanitizer = "@LLVM_USE_SANITIZER@" config.enable_32bit = @LIBCXXABI_BUILD_32_BITS@ config.target_info = "@LIBCXXABI_TARGET_INFO@" config.executor = "@LIBCXXABI_EXECUTOR@" config.libcxxabi_shared = @LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXXABI@ config.enable_shared = @LIBCXXABI_LINK_TESTS_WITH_SHARED_LIBCXX@ -config.enable_exceptions = @LIBCXXABI_ENABLE_EXCEPTIONS@ config.host_triple = "@LLVM_HOST_TRIPLE@" -if "@TARGET_TRIPLE@": - config.target_triple = "@TARGET_TRIPLE@" config.sysroot = "@LIBCXXABI_SYSROOT@" config.gcc_toolchain = "@LIBCXXABI_GCC_TOOLCHAIN@" config.cxx_ext_threads = @LIBCXXABI_BUILD_EXTERNAL_THREAD_LIBRARY@ -config.enable_experimental = False config.pstl_src_root = "@ParallelSTL_SOURCE_DIR@" if @LIBCXX_ENABLE_PARALLEL_ALGORITHMS@ else None config.pstl_obj_root = "@ParallelSTL_BINARY_DIR@" if @LIBCXX_ENABLE_PARALLEL_ALGORITHMS@ else None diff --git a/libunwind/test/CMakeLists.txt b/libunwind/test/CMakeLists.txt index a5a739b5e74e7..24c769cb9de71 100644 --- a/libunwind/test/CMakeLists.txt +++ b/libunwind/test/CMakeLists.txt @@ -24,6 +24,22 @@ set(LIBUNWIND_EXECUTOR "${Python3_EXECUTABLE} ${LIBUNWIND_LIBCXX_PATH}/utils/run "Executor to use when running tests.") set(AUTO_GEN_COMMENT "## Autogenerated by libunwind configuration.\n# Do not edit!") +set(SERIALIZED_LIT_PARAMS "# Lit parameters serialized here for llvm-lit to pick them up\n") + +macro(serialize_lit_param param value) + string(APPEND SERIALIZED_LIT_PARAMS "config.${param} = ${value}\n") +endmacro() + +serialize_lit_param(enable_experimental False) + +if (LLVM_USE_SANITIZER) + serialize_lit_param(use_sanitizer "\"${LLVM_USE_SANITIZER}\"") +endif() + +if (TARGET_TRIPLE) + serialize_lit_param(target_triple "\"${TARGET_TRIPLE}\"") +endif() + configure_lit_site_cfg( "${LIBUNWIND_TEST_CONFIG}" ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg diff --git a/libunwind/test/libunwind/test/config.py b/libunwind/test/libunwind/test/config.py index b3f12bfe403c4..18919c247f203 100644 --- a/libunwind/test/libunwind/test/config.py +++ b/libunwind/test/libunwind/test/config.py @@ -45,7 +45,7 @@ def configure_compile_flags(self): self.cxx.compile_flags += ['-funwind-tables'] # Make symbols available in the tests. triple = self.get_lit_conf('target_triple', None) - if 'linux' in triple: + if triple is not None and 'linux' in triple: self.cxx.link_flags += ['-Wl,--export-dynamic'] if not self.get_lit_bool('enable_threads', True): self.cxx.compile_flags += ['-D_LIBUNWIND_HAS_NO_THREADS'] diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in index 51b46316e4bd4..6f0c339b9ab66 100644 --- a/libunwind/test/lit.site.cfg.in +++ b/libunwind/test/lit.site.cfg.in @@ -1,5 +1,7 @@ @AUTO_GEN_COMMENT@ +@SERIALIZED_LIT_PARAMS@ + import os import site @@ -14,7 +16,6 @@ config.cxx_library_root = "@LIBUNWIND_LIBCXX_LIBRARY_PATH@" config.llvm_unwinder = True config.builtins_library = "@LIBUNWIND_BUILTINS_LIBRARY@" config.enable_threads = @LIBUNWIND_ENABLE_THREADS@ -config.use_sanitizer = "@LLVM_USE_SANITIZER@" config.enable_32bit = @LIBUNWIND_BUILD_32_BITS@ config.target_info = "@LIBUNWIND_TARGET_INFO@" config.test_linker_flags = "@LIBUNWIND_TEST_LINKER_FLAGS@" @@ -24,12 +25,9 @@ config.libunwind_shared = @LIBUNWIND_ENABLE_SHARED@ config.enable_shared = @LIBCXX_ENABLE_SHARED@ config.arm_ehabi = @LIBUNWIND_USES_ARM_EHABI@ config.host_triple = "@LLVM_HOST_TRIPLE@" -if "@TARGET_TRIPLE@": - config.target_triple = "@TARGET_TRIPLE@" config.sysroot = "@LIBUNWIND_SYSROOT@" config.gcc_toolchain = "@LIBUNWIND_GCC_TOOLCHAIN@" config.cxx_ext_threads = @LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY@ -config.enable_experimental = False site.addsitedir(os.path.join(config.libunwind_src_root, 'test')) site.addsitedir(os.path.join(config.libcxx_src_root, 'utils')) From c33ebad73516ffcf7b00821a430aa6a0199941f0 Mon Sep 17 00:00:00 2001 From: Pratyush Das Date: Tue, 29 Jun 2021 13:50:04 +0000 Subject: [PATCH 190/619] Print default template argument if manually specified in typedef declaration. If a default template type argument is manually specified to be of the default type, then it is committed when printing the template. Differential revision: https://reviews.llvm.org/D103040 --- clang/lib/AST/TypePrinter.cpp | 3 +-- clang/test/SemaTemplate/class-template-id.cpp | 4 ++-- .../SemaTemplate/default-arguments-ast-print.cpp | 12 ++++++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 720cab917a22f..47e48506dc96d 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1449,8 +1449,7 @@ void TypePrinter::printTemplateId(const TemplateSpecializationType *T, T->getTemplateName().print(OS, Policy); } - const TemplateParameterList *TPL = TD ? TD->getTemplateParameters() : nullptr; - printTemplateArgumentList(OS, T->template_arguments(), Policy, TPL); + printTemplateArgumentList(OS, T->template_arguments(), Policy); spaceBeforePlaceHolder(OS); } diff --git a/clang/test/SemaTemplate/class-template-id.cpp b/clang/test/SemaTemplate/class-template-id.cpp index b32a03e478d91..50cb3ef59ea41 100644 --- a/clang/test/SemaTemplate/class-template-id.cpp +++ b/clang/test/SemaTemplate/class-template-id.cpp @@ -9,9 +9,9 @@ A *foo(A *ptr, A const *ptr2, A *ptr3) { if (ptr) return ptr; // okay else if (ptr2) - return ptr2; // expected-error{{cannot initialize return object of type 'A *' with an lvalue of type 'const A *'}} + return ptr2; // expected-error{{cannot initialize return object of type 'A *' (aka 'A *') with an lvalue of type 'const A *'}} else { - return ptr3; // expected-error{{cannot initialize return object of type 'A *' with an lvalue of type 'A *'}} + return ptr3; // expected-error{{cannot initialize return object of type 'A *' (aka 'A *') with an lvalue of type 'A *'}} } } diff --git a/clang/test/SemaTemplate/default-arguments-ast-print.cpp b/clang/test/SemaTemplate/default-arguments-ast-print.cpp index 9ed17a79de0d6..4623f0a8cdf46 100644 --- a/clang/test/SemaTemplate/default-arguments-ast-print.cpp +++ b/clang/test/SemaTemplate/default-arguments-ast-print.cpp @@ -10,3 +10,15 @@ int Foo::method1() { // CHECK: int Foo::method1() return 10; } + +int test_typedef() { + typedef Foo TypedefArg; + // CHECK: typedef Foo TypedefArg; + return 10; +} + +int test_typedef2() { + typedef Foo TypedefArg; + // CHECK: typedef Foo TypedefArg; + return 10; +} From e49d65f36d66e247c148601f59edeb2e0b44d8dd Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 29 Jun 2021 07:54:53 -0700 Subject: [PATCH 191/619] [LV] Fix bug when unrolling (only) a loop with non-latch exit If we unroll a loop in the vectorizer (without vectorizing), and the cost model requires a epilogue be generated for correctness, the code generation must actually do so. The included test case on an unmodified opt will access memory one past the expected bound. As a result, this patch is fixing a latent miscompile. Differential Revision: https://reviews.llvm.org/D103700 --- .../Transforms/Vectorize/LoopVectorize.cpp | 35 ++++++++----------- .../LoopVectorize/unroll_nonlatch.ll | 16 ++++----- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f99352a3f075f..38a55d1281412 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1566,14 +1566,14 @@ class LoopVectorizationCostModel { /// Returns true if we're required to use a scalar epilogue for at least /// the final iteration of the original loop. - bool requiresScalarEpilogue() const { + bool requiresScalarEpilogue(ElementCount VF) const { if (!isScalarEpilogueAllowed()) return false; // If we might exit from anywhere but the latch, must run the exiting // iteration in scalar form. if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) return true; - return InterleaveInfo.requiresScalarEpilogue(); + return VF.isVector() && InterleaveInfo.requiresScalarEpilogue(); } /// Returns true if a scalar epilogue is not allowed due to optsize or a @@ -3181,18 +3181,13 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { // unroll factor (number of SIMD instructions). Value *R = Builder.CreateURem(TC, Step, "n.mod.vf"); - // There are two cases where we need to ensure (at least) the last iteration - // runs in the scalar remainder loop. Thus, if the step evenly divides - // the trip count, we set the remainder to be equal to the step. If the step - // does not evenly divide the trip count, no adjustment is necessary since - // there will already be scalar iterations. Note that the minimum iterations - // check ensures that N >= Step. The cases are: - // 1) If there is a non-reversed interleaved group that may speculatively - // access memory out-of-bounds. - // 2) If any instruction may follow a conditionally taken exit. That is, if - // the loop contains multiple exiting blocks, or a single exiting block - // which is not the latch. - if (VF.isVector() && Cost->requiresScalarEpilogue()) { + // There are cases where we *must* run at least one iteration in the remainder + // loop. See the cost model for when this can happen. If the step evenly + // divides the trip count, we set the remainder to be equal to the step. If + // the step does not evenly divide the trip count, no adjustment is necessary + // since there will already be scalar iterations. Note that the minimum + // iterations check ensures that N >= Step. + if (Cost->requiresScalarEpilogue(VF)) { auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0)); R = Builder.CreateSelect(IsZero, Step, R); } @@ -3246,8 +3241,8 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, // vector trip count is zero. This check also covers the case where adding one // to the backedge-taken count overflowed leading to an incorrect trip count // of zero. In this case we will also jump to the scalar loop. - auto P = Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE - : ICmpInst::ICMP_ULT; + auto P = Cost->requiresScalarEpilogue(VF) ? ICmpInst::ICMP_ULE + : ICmpInst::ICMP_ULT; // If tail is to be folded, vector loop takes care of all iterations. Value *CheckMinIters = Builder.getFalse(); @@ -8323,8 +8318,8 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck( // Generate code to check if the loop's trip count is less than VF * UF of the // main vector loop. - auto P = - Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; + auto P = Cost->requiresScalarEpilogue(ForEpilogue ? EPI.EpilogueVF : VF) ? + ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; Value *CheckMinIters = Builder.CreateICmp( P, Count, ConstantInt::get(Count->getType(), VFactor * UFactor), @@ -8467,8 +8462,8 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( // Generate code to check if the loop's trip count is less than VF * UF of the // vector epilogue loop. - auto P = - Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; + auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF) ? + ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; Value *CheckMinIters = Builder.CreateICmp( P, Count, diff --git a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll index 90bc2be334d86..b09b6c1457282 100644 --- a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll +++ b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll @@ -2,11 +2,11 @@ ; RUN: opt %s -S -loop-vectorize -force-vector-interleave=2 | FileCheck %s ; Demonstrate a case where we unroll a loop, but don't vectorize it. -; This currently reveals a miscompile. The original loop runs stores in -; the latch block on iterations 0 to 1022, and exits when %indvars.iv = 1023. -; Currently, the unrolled loop produced by the vectorizer runs the iteration -; where %indvar.iv = 1023 in the vector.body loop before exiting. This results -; in an out of bounds access.. +; The original loop runs stores in the latch block on iterations 0 to 1022, +; and exits when %indvars.iv = 1023. (That is, it actually runs the stores +; for an odd number of iterations.) If we unroll by two in the "vector.body" +; loop, we must exit to the epilogue on iteration with %indvars.iv = 1022 to +; avoid an out of bounds access. define void @test(double* %data) { ; CHECK-LABEL: @test( @@ -31,13 +31,13 @@ define void @test(double* %data) { ; CHECK-NEXT: store double [[TMP8]], double* [[TMP4]], align 8 ; CHECK-NEXT: store double [[TMP9]], double* [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1022 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1022, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_LATCH:%.*]] ] From bd4bfe0e0c3342c613a23a6b2911a97c267a3f99 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 29 Jun 2021 16:04:35 +0100 Subject: [PATCH 192/619] [SCCP] Extend tests added 1092357ccdc9 in with UREM/SREM by 0. Add additional coverage for computing UREM/SREM C, 0. --- .../Transforms/SCCP/binaryops-range-special-cases.ll | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll b/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll index 59c1aad6d1f79..ad6d1e452a6a5 100644 --- a/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll +++ b/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll @@ -108,6 +108,9 @@ define void @urem_cmp_constants() { ; CHECK-NEXT: call void @use(i1 [[C_3]]) ; CHECK-NEXT: [[C_4:%.*]] = icmp eq i16 [[UREM_2]], 1 ; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: [[UREM_3:%.*]] = urem i16 12704, 0 +; CHECK-NEXT: [[C_5:%.*]] = icmp eq i16 [[UREM_3]], 1 +; CHECK-NEXT: call void @use(i1 [[C_5]]) ; CHECK-NEXT: ret void ; %sel = select i1 false, i16 0, i16 12704 @@ -121,6 +124,9 @@ define void @urem_cmp_constants() { call void @use(i1 %c.3) %c.4 = icmp eq i16 %urem.2, 1 call void @use(i1 %c.4) + %urem.3 = urem i16 %sel, 0 + %c.5 = icmp eq i16 %urem.3, 1 + call void @use(i1 %c.5) ret void } @@ -136,6 +142,9 @@ define void @srem_cmp_constants() { ; CHECK-NEXT: call void @use(i1 [[C_3]]) ; CHECK-NEXT: [[C_4:%.*]] = icmp eq i16 [[SREM_2]], 1 ; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: [[SREM_3:%.*]] = srem i16 12704, 0 +; CHECK-NEXT: [[C_5:%.*]] = icmp eq i16 [[SREM_3]], 1 +; CHECK-NEXT: call void @use(i1 [[C_5]]) ; CHECK-NEXT: ret void ; %sel = select i1 false, i16 0, i16 12704 @@ -149,5 +158,8 @@ define void @srem_cmp_constants() { call void @use(i1 %c.3) %c.4 = icmp eq i16 %srem.2, 1 call void @use(i1 %c.4) + %srem.3 = srem i16 %sel, 0 + %c.5 = icmp eq i16 %srem.3, 1 + call void @use(i1 %c.5) ret void } From e88ac7295fa298c336cb57d65bdcc72b45dbc0b0 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Sat, 26 Jun 2021 07:34:43 -0700 Subject: [PATCH 193/619] [perf] Fix a data race in the PerfJITEventListener Concurrent JIT compilation + PerfJITEventListener triggers tsan error Reviewed By: cota Differential Revision: https://reviews.llvm.org/D104977 --- .../lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp index d4c715cc59f61..4a236e183c8b6 100644 --- a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +++ b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp @@ -284,6 +284,9 @@ void PerfJITEventListener::notifyObjectLoaded( NotifyCode(Name, *AddrOrErr, Size); } + // avoid races with writes + std::lock_guard Guard(Mutex); + Dumpstream->flush(); } From d03aa7d6b66fd741db2d937c18a6c6675037b888 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Jun 2021 11:33:16 -0400 Subject: [PATCH 194/619] [libc++] NFCI: Remove __functional/search.h The __search helper function was once split into __functional for circular dependency reasons, however this is not an issue anymore now that we have finer grained headers. --- libcxx/include/CMakeLists.txt | 1 - libcxx/include/__algorithm/search.h | 76 +++++++++++++++++- libcxx/include/__functional/search.h | 102 ------------------------- libcxx/include/experimental/functional | 1 - libcxx/include/functional | 2 +- libcxx/include/module.modulemap | 1 - libcxx/include/regex | 1 - 7 files changed, 75 insertions(+), 109 deletions(-) delete mode 100644 libcxx/include/__functional/search.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 3b03ec6685787..ec1e0762c6612 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -107,7 +107,6 @@ set(files __functional_base __functional_base_03 __functional/hash.h - __functional/search.h __functional/unary_function.h __functional/unwrap_ref.h __hash_table diff --git a/libcxx/include/__algorithm/search.h b/libcxx/include/__algorithm/search.h index 1b238fcab3e97..008b8ebb04adb 100644 --- a/libcxx/include/__algorithm/search.h +++ b/libcxx/include/__algorithm/search.h @@ -10,11 +10,11 @@ #ifndef _LIBCPP___ALGORITHM_SEARCH_H #define _LIBCPP___ALGORITHM_SEARCH_H -#include <__config> #include <__algorithm/comp.h> -#include <__functional/search.h> +#include <__config> #include <__iterator/iterator_traits.h> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -25,6 +25,78 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD +template +pair<_ForwardIterator1, _ForwardIterator1> + _LIBCPP_CONSTEXPR_AFTER_CXX11 __search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _BinaryPredicate __pred, forward_iterator_tag, forward_iterator_tag) { + if (__first2 == __last2) + return _VSTD::make_pair(__first1, __first1); // Everything matches an empty sequence + while (true) { + // Find first element in sequence 1 that matchs *__first2, with a mininum of loop checks + while (true) { + if (__first1 == __last1) // return __last1 if no element matches *__first2 + return _VSTD::make_pair(__last1, __last1); + if (__pred(*__first1, *__first2)) + break; + ++__first1; + } + // *__first1 matches *__first2, now match elements after here + _ForwardIterator1 __m1 = __first1; + _ForwardIterator2 __m2 = __first2; + while (true) { + if (++__m2 == __last2) // If pattern exhausted, __first1 is the answer (works for 1 element pattern) + return _VSTD::make_pair(__first1, __m1); + if (++__m1 == __last1) // Otherwise if source exhaused, pattern not found + return _VSTD::make_pair(__last1, __last1); + if (!__pred(*__m1, *__m2)) // if there is a mismatch, restart with a new __first1 + { + ++__first1; + break; + } // else there is a match, check next elements + } + } +} + +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 pair<_RandomAccessIterator1, _RandomAccessIterator1> +__search(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _BinaryPredicate __pred, random_access_iterator_tag, + random_access_iterator_tag) { + typedef typename iterator_traits<_RandomAccessIterator1>::difference_type _D1; + typedef typename iterator_traits<_RandomAccessIterator2>::difference_type _D2; + // Take advantage of knowing source and pattern lengths. Stop short when source is smaller than pattern + const _D2 __len2 = __last2 - __first2; + if (__len2 == 0) + return _VSTD::make_pair(__first1, __first1); + const _D1 __len1 = __last1 - __first1; + if (__len1 < __len2) + return _VSTD::make_pair(__last1, __last1); + const _RandomAccessIterator1 __s = __last1 - (__len2 - 1); // Start of pattern match can't go beyond here + + while (true) { + while (true) { + if (__first1 == __s) + return _VSTD::make_pair(__last1, __last1); + if (__pred(*__first1, *__first2)) + break; + ++__first1; + } + + _RandomAccessIterator1 __m1 = __first1; + _RandomAccessIterator2 __m2 = __first2; + while (true) { + if (++__m2 == __last2) + return _VSTD::make_pair(__first1, __first1 + __len2); + ++__m1; // no need to check range on __m1 because __s guarantees we have enough source + if (!__pred(*__m1, *__m2)) { + ++__first1; + break; + } + } + } +} + template _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1 search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, diff --git a/libcxx/include/__functional/search.h b/libcxx/include/__functional/search.h deleted file mode 100644 index 061b30f0d0322..0000000000000 --- a/libcxx/include/__functional/search.h +++ /dev/null @@ -1,102 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___FUNCTIONAL___SEARCH_H -#define _LIBCPP___FUNCTIONAL___SEARCH_H - -#include <__config> -#include <__iterator/iterator_traits.h> -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -#pragma GCC system_header -#endif - -_LIBCPP_PUSH_MACROS -#include <__undef_macros> - -_LIBCPP_BEGIN_NAMESPACE_STD - -template -pair<_ForwardIterator1, _ForwardIterator1> - _LIBCPP_CONSTEXPR_AFTER_CXX11 __search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, - _BinaryPredicate __pred, forward_iterator_tag, forward_iterator_tag) { - if (__first2 == __last2) - return _VSTD::make_pair(__first1, __first1); // Everything matches an empty sequence - while (true) { - // Find first element in sequence 1 that matchs *__first2, with a mininum of loop checks - while (true) { - if (__first1 == __last1) // return __last1 if no element matches *__first2 - return _VSTD::make_pair(__last1, __last1); - if (__pred(*__first1, *__first2)) - break; - ++__first1; - } - // *__first1 matches *__first2, now match elements after here - _ForwardIterator1 __m1 = __first1; - _ForwardIterator2 __m2 = __first2; - while (true) { - if (++__m2 == __last2) // If pattern exhausted, __first1 is the answer (works for 1 element pattern) - return _VSTD::make_pair(__first1, __m1); - if (++__m1 == __last1) // Otherwise if source exhaused, pattern not found - return _VSTD::make_pair(__last1, __last1); - if (!__pred(*__m1, *__m2)) // if there is a mismatch, restart with a new __first1 - { - ++__first1; - break; - } // else there is a match, check next elements - } - } -} - -template -_LIBCPP_CONSTEXPR_AFTER_CXX11 pair<_RandomAccessIterator1, _RandomAccessIterator1> -__search(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _BinaryPredicate __pred, random_access_iterator_tag, - random_access_iterator_tag) { - typedef typename iterator_traits<_RandomAccessIterator1>::difference_type _D1; - typedef typename iterator_traits<_RandomAccessIterator2>::difference_type _D2; - // Take advantage of knowing source and pattern lengths. Stop short when source is smaller than pattern - const _D2 __len2 = __last2 - __first2; - if (__len2 == 0) - return _VSTD::make_pair(__first1, __first1); - const _D1 __len1 = __last1 - __first1; - if (__len1 < __len2) - return _VSTD::make_pair(__last1, __last1); - const _RandomAccessIterator1 __s = __last1 - (__len2 - 1); // Start of pattern match can't go beyond here - - while (true) { - while (true) { - if (__first1 == __s) - return _VSTD::make_pair(__last1, __last1); - if (__pred(*__first1, *__first2)) - break; - ++__first1; - } - - _RandomAccessIterator1 __m1 = __first1; - _RandomAccessIterator2 __m2 = __first2; - while (true) { - if (++__m2 == __last2) - return _VSTD::make_pair(__first1, __first1 + __len2); - ++__m1; // no need to check range on __m1 because __s guarantees we have enough source - if (!__pred(*__m1, *__m2)) { - ++__first1; - break; - } - } - } -} - -_LIBCPP_END_NAMESPACE_STD - -_LIBCPP_POP_MACROS - -#endif // _LIBCPP___FUNCTIONAL___SEARCH_H diff --git a/libcxx/include/experimental/functional b/libcxx/include/experimental/functional index f6c1821da9c57..e18962002d4ae 100644 --- a/libcxx/include/experimental/functional +++ b/libcxx/include/experimental/functional @@ -86,7 +86,6 @@ inline namespace fundamentals_v1 { */ -#include <__functional/search.h> #include #include #include diff --git a/libcxx/include/functional b/libcxx/include/functional index 2b2dcd26ce46f..976b94585b0b8 100644 --- a/libcxx/include/functional +++ b/libcxx/include/functional @@ -487,11 +487,11 @@ POLICY: For non-variadic implementations, the number of arguments is limited */ +#include <__algorithm/search.h> #include <__config> #include <__debug> #include <__functional_base> #include <__functional/hash.h> -#include <__functional/search.h> #include <__functional/unary_function.h> #include <__functional/unwrap_ref.h> #include <__utility/forward.h> diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index da0a988c00c87..9c6c678affbb6 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -409,7 +409,6 @@ module std [system] { export * module __functional { module hash { header "__functional/hash.h" } - module search { header "__functional/search.h" } module unary_function { header "__functional/unary_function.h" } module unwrap_ref { header "__functional/unwrap_ref.h" } } diff --git a/libcxx/include/regex b/libcxx/include/regex index 9e5c6ed39998b..55f1d34b51f5a 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -764,7 +764,6 @@ typedef regex_token_iterator wsregex_token_iterator; #include <__config> #include <__debug> -#include <__functional/search.h> #include <__iterator/wrap_iter.h> #include <__locale> #include From 2a063173c159cf80ebb0b8ee6f1b3db519c0f111 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 29 Jun 2021 15:41:00 +0000 Subject: [PATCH 195/619] [gn build] Port d03aa7d6b66f --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 0a93519706be2..439ef44425dd4 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -164,7 +164,6 @@ if (current_toolchain == default_toolchain) { "__format/format_parse_context.h", "__function_like.h", "__functional/hash.h", - "__functional/search.h", "__functional/unary_function.h", "__functional/unwrap_ref.h", "__functional_03", From aaf6a7ac344c62e26903161ea3daa4b52c423369 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 29 Jun 2021 16:51:47 +0100 Subject: [PATCH 196/619] [ARM] Extra test for gep immediate costs. NFC --- llvm/test/CodeGen/ARM/gep-imm.ll | 134 +++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/gep-imm.ll diff --git a/llvm/test/CodeGen/ARM/gep-imm.ll b/llvm/test/CodeGen/ARM/gep-imm.ll new file mode 100644 index 0000000000000..5358261426b47 --- /dev/null +++ b/llvm/test/CodeGen/ARM/gep-imm.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECKV6M +; RUN: llc -mtriple=thumbv7m-none-eabi < %s | FileCheck %s --check-prefix=CHECKV7M +; RUN: llc -mtriple=thumbv7a-none-eabi < %s | FileCheck %s --check-prefix=CHECKV7A + +define void @small(i32 %a, i32 %b, i32 *%c, i32* %d) { +; CHECKV6M-LABEL: small: +; CHECKV6M: @ %bb.0: @ %entry +; CHECKV6M-NEXT: str r1, [r3, #120] +; CHECKV6M-NEXT: str r0, [r3, #80] +; CHECKV6M-NEXT: str r0, [r2, #80] +; CHECKV6M-NEXT: bx lr +; +; CHECKV7M-LABEL: small: +; CHECKV7M: @ %bb.0: @ %entry +; CHECKV7M-NEXT: str r1, [r3, #120] +; CHECKV7M-NEXT: str r0, [r3, #80] +; CHECKV7M-NEXT: str r0, [r2, #80] +; CHECKV7M-NEXT: bx lr +; +; CHECKV7A-LABEL: small: +; CHECKV7A: @ %bb.0: @ %entry +; CHECKV7A-NEXT: str r1, [r3, #120] +; CHECKV7A-NEXT: str r0, [r3, #80] +; CHECKV7A-NEXT: str r0, [r2, #80] +; CHECKV7A-NEXT: bx lr +entry: + %arrayidx = getelementptr inbounds i32, i32* %d, i32 20 + store i32 %a, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %d, i32 30 + store i32 %b, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 20 + store i32 %a, i32* %arrayidx2, align 4 + ret void +} + +define void @large(i32 %a, i32 %b, i32 *%c, i32* %d) { +; CHECKV6M-LABEL: large: +; CHECKV6M: @ %bb.0: @ %entry +; CHECKV6M-NEXT: .save {r4, r5, r7, lr} +; CHECKV6M-NEXT: push {r4, r5, r7, lr} +; CHECKV6M-NEXT: movs r4, #125 +; CHECKV6M-NEXT: lsls r4, r4, #4 +; CHECKV6M-NEXT: lsls r4, r4, #2 +; CHECKV6M-NEXT: str r0, [r3, r4] +; CHECKV6M-NEXT: ldr r5, .LCPI1_0 +; CHECKV6M-NEXT: str r1, [r3, r5] +; CHECKV6M-NEXT: str r0, [r2, r4] +; CHECKV6M-NEXT: pop {r4, r5, r7, pc} +; CHECKV6M-NEXT: .p2align 2 +; CHECKV6M-NEXT: @ %bb.1: +; CHECKV6M-NEXT: .LCPI1_0: +; CHECKV6M-NEXT: .long 12000 @ 0x2ee0 +; +; CHECKV7M-LABEL: large: +; CHECKV7M: @ %bb.0: @ %entry +; CHECKV7M-NEXT: mov.w r12, #8000 +; CHECKV7M-NEXT: str.w r0, [r3, r12] +; CHECKV7M-NEXT: add.w r3, r3, #8000 +; CHECKV7M-NEXT: str.w r1, [r3, #4000] +; CHECKV7M-NEXT: str.w r0, [r2, r12] +; CHECKV7M-NEXT: bx lr +; +; CHECKV7A-LABEL: large: +; CHECKV7A: @ %bb.0: @ %entry +; CHECKV7A-NEXT: mov.w r12, #8000 +; CHECKV7A-NEXT: str.w r0, [r3, r12] +; CHECKV7A-NEXT: add.w r3, r3, #8000 +; CHECKV7A-NEXT: str.w r1, [r3, #4000] +; CHECKV7A-NEXT: str.w r0, [r2, r12] +; CHECKV7A-NEXT: bx lr +entry: + %arrayidx = getelementptr inbounds i32, i32* %d, i32 2000 + store i32 %a, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %d, i32 3000 + store i32 %b, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 2000 + store i32 %a, i32* %arrayidx2, align 4 + ret void +} + +define void @huge(i32 %a, i32 %b, i32 *%c, i32* %d) { +; CHECKV6M-LABEL: huge: +; CHECKV6M: @ %bb.0: @ %entry +; CHECKV6M-NEXT: .save {r4, r5, r7, lr} +; CHECKV6M-NEXT: push {r4, r5, r7, lr} +; CHECKV6M-NEXT: ldr r4, .LCPI2_0 +; CHECKV6M-NEXT: lsls r4, r4, #2 +; CHECKV6M-NEXT: str r0, [r3, r4] +; CHECKV6M-NEXT: ldr r5, .LCPI2_1 +; CHECKV6M-NEXT: str r1, [r3, r5] +; CHECKV6M-NEXT: str r0, [r2, r4] +; CHECKV6M-NEXT: pop {r4, r5, r7, pc} +; CHECKV6M-NEXT: .p2align 2 +; CHECKV6M-NEXT: @ %bb.1: +; CHECKV6M-NEXT: .LCPI2_0: +; CHECKV6M-NEXT: .long 200000 @ 0x30d40 +; CHECKV6M-NEXT: .LCPI2_1: +; CHECKV6M-NEXT: .long 1200000 @ 0x124f80 +; +; CHECKV7M-LABEL: huge: +; CHECKV7M: @ %bb.0: @ %entry +; CHECKV7M-NEXT: .save {r7, lr} +; CHECKV7M-NEXT: push {r7, lr} +; CHECKV7M-NEXT: movw r12, #3392 +; CHECKV7M-NEXT: movw lr, #20352 +; CHECKV7M-NEXT: movt r12, #3 +; CHECKV7M-NEXT: movt lr, #18 +; CHECKV7M-NEXT: str.w r0, [r3, r12, lsl #2] +; CHECKV7M-NEXT: str.w r1, [r3, lr] +; CHECKV7M-NEXT: str.w r0, [r2, r12, lsl #2] +; CHECKV7M-NEXT: pop {r7, pc} +; +; CHECKV7A-LABEL: huge: +; CHECKV7A: @ %bb.0: @ %entry +; CHECKV7A-NEXT: .save {r7, lr} +; CHECKV7A-NEXT: push {r7, lr} +; CHECKV7A-NEXT: movw r12, #3392 +; CHECKV7A-NEXT: movw lr, #20352 +; CHECKV7A-NEXT: movt r12, #3 +; CHECKV7A-NEXT: movt lr, #18 +; CHECKV7A-NEXT: str.w r0, [r3, r12, lsl #2] +; CHECKV7A-NEXT: str.w r1, [r3, lr] +; CHECKV7A-NEXT: str.w r0, [r2, r12, lsl #2] +; CHECKV7A-NEXT: pop {r7, pc} +entry: + %arrayidx = getelementptr inbounds i32, i32* %d, i32 200000 + store i32 %a, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %d, i32 300000 + store i32 %b, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 200000 + store i32 %a, i32* %arrayidx2, align 4 + ret void +} From f38a8b54ea31d6c915e2d6e259ebc083883e6f6d Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Tue, 29 Jun 2021 12:35:34 +0200 Subject: [PATCH 197/619] [AMDGPU] Fix 224-bit spills Related to D104622. Differential Revision: https://reviews.llvm.org/D105109 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 + llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 15 ++++ llvm/test/CodeGen/AMDGPU/spill224.mir | 104 ++++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/spill224.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 358abec1e06c2..4084619240c54 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1394,6 +1394,8 @@ static unsigned getAGPRSpillSaveOpcode(unsigned Size) { return AMDGPU::SI_SPILL_A160_SAVE; case 24: return AMDGPU::SI_SPILL_A192_SAVE; + case 28: + return AMDGPU::SI_SPILL_A224_SAVE; case 32: return AMDGPU::SI_SPILL_A256_SAVE; case 64: @@ -1531,6 +1533,8 @@ static unsigned getAGPRSpillRestoreOpcode(unsigned Size) { return AMDGPU::SI_SPILL_A160_RESTORE; case 24: return AMDGPU::SI_SPILL_A192_RESTORE; + case 28: + return AMDGPU::SI_SPILL_A224_RESTORE; case 32: return AMDGPU::SI_SPILL_A256_RESTORE; case 64: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index d44c8c48a2468..7fd0765cbbe66 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -814,6 +814,13 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { case AMDGPU::SI_SPILL_A256_SAVE: case AMDGPU::SI_SPILL_A256_RESTORE: return 8; + case AMDGPU::SI_SPILL_S224_SAVE: + case AMDGPU::SI_SPILL_S224_RESTORE: + case AMDGPU::SI_SPILL_V224_SAVE: + case AMDGPU::SI_SPILL_V224_RESTORE: + case AMDGPU::SI_SPILL_A224_SAVE: + case AMDGPU::SI_SPILL_A224_RESTORE: + return 7; case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_V192_SAVE: @@ -1473,6 +1480,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S224_SAVE: case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: @@ -1483,6 +1491,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S224_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: @@ -1519,6 +1528,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S224_SAVE: case AMDGPU::SI_SPILL_S192_SAVE: case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: @@ -1533,6 +1543,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S224_RESTORE: case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: @@ -1547,6 +1558,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V224_SAVE: case AMDGPU::SI_SPILL_V192_SAVE: case AMDGPU::SI_SPILL_V160_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: @@ -1556,6 +1568,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_A1024_SAVE: case AMDGPU::SI_SPILL_A512_SAVE: case AMDGPU::SI_SPILL_A256_SAVE: + case AMDGPU::SI_SPILL_A224_SAVE: case AMDGPU::SI_SPILL_A192_SAVE: case AMDGPU::SI_SPILL_A160_SAVE: case AMDGPU::SI_SPILL_A128_SAVE: @@ -1584,6 +1597,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V160_RESTORE: case AMDGPU::SI_SPILL_V192_RESTORE: + case AMDGPU::SI_SPILL_V224_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: case AMDGPU::SI_SPILL_V1024_RESTORE: @@ -1593,6 +1607,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_A128_RESTORE: case AMDGPU::SI_SPILL_A160_RESTORE: case AMDGPU::SI_SPILL_A192_RESTORE: + case AMDGPU::SI_SPILL_A224_RESTORE: case AMDGPU::SI_SPILL_A256_RESTORE: case AMDGPU::SI_SPILL_A512_RESTORE: case AMDGPU::SI_SPILL_A1024_RESTORE: { diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir new file mode 100644 index 0000000000000..e8d6a80e84f95 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill224.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regallocfast,si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=EXPANDED %s + +# Make sure spill/restore of 224 bit registers works. + +--- +name: spill_restore_sgpr224 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_sgpr224 + ; SPILLED: bb.0: + ; SPILLED: successors: %bb.1(0x80000000) + ; SPILLED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; SPILLED: SI_SPILL_S224_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, %stack.0, implicit $exec, implicit $sgpr32 :: (store 28 into %stack.0, align 4, addrspace 5) + ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED: bb.1: + ; SPILLED: successors: %bb.2(0x80000000) + ; SPILLED: S_NOP 1 + ; SPILLED: bb.2: + ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 28 from %stack.0, align 4, addrspace 5) + ; SPILLED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-LABEL: name: spill_restore_sgpr224 + ; EXPANDED: bb.0: + ; EXPANDED: successors: %bb.1(0x80000000) + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 6, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED: bb.1: + ; EXPANDED: successors: %bb.2(0x80000000) + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: S_NOP 1 + ; EXPANDED: bb.2: + ; EXPANDED: liveins: $vgpr0 + ; EXPANDED: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED: $sgpr5 = V_READLANE_B32 $vgpr0, 1 + ; EXPANDED: $sgpr6 = V_READLANE_B32 $vgpr0, 2 + ; EXPANDED: $sgpr7 = V_READLANE_B32 $vgpr0, 3 + ; EXPANDED: $sgpr8 = V_READLANE_B32 $vgpr0, 4 + ; EXPANDED: $sgpr9 = V_READLANE_B32 $vgpr0, 5 + ; EXPANDED: $sgpr10 = V_READLANE_B32 $vgpr0, 6 + ; EXPANDED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + bb.0: + S_NOP 0, implicit-def %0:sgpr_224 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... + +--- +name: spill_restore_vgpr224 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_vgpr224 + ; SPILLED: bb.0: + ; SPILLED: successors: %bb.1(0x80000000) + ; SPILLED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; SPILLED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5) + ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED: bb.1: + ; SPILLED: successors: %bb.2(0x80000000) + ; SPILLED: S_NOP 1 + ; SPILLED: bb.2: + ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5) + ; SPILLED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; EXPANDED-LABEL: name: spill_restore_vgpr224 + ; EXPANDED: bb.0: + ; EXPANDED: successors: %bb.1(0x80000000) + ; EXPANDED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + ; EXPANDED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5) + ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED: bb.1: + ; EXPANDED: successors: %bb.2(0x80000000) + ; EXPANDED: S_NOP 1 + ; EXPANDED: bb.2: + ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5) + ; EXPANDED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 + bb.0: + S_NOP 0, implicit-def %0:vreg_224 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0 +... From 56fa49878b71a1d92b9a93b586cff26829abe157 Mon Sep 17 00:00:00 2001 From: gbreynoo Date: Tue, 29 Jun 2021 17:03:21 +0100 Subject: [PATCH 198/619] [llvm-objdump] Add testing for --print-imm-hex, --headers, --section-headers and --private-headers llvm-objdump had some missing coverage that is fixed by this change: - A test specifically for --print-imm-hex, and coverage of --no-print-imm-hex - section-headers.test checks the aliases --headers or --section-headers - A test for the use of --private-headers for ELF that checks the output - A test for ELF program headers Differential Revision: https://reviews.llvm.org/D103974 --- .../llvm-objdump/ELF/private-headers.test | 65 ++++ .../llvm-objdump/ELF/program-headers.test | 324 ++++++++++++++++++ .../tools/llvm-objdump/X86/print-imm-hex.s | 29 ++ .../tools/llvm-objdump/section-headers.test | 6 + 4 files changed, 424 insertions(+) create mode 100644 llvm/test/tools/llvm-objdump/ELF/private-headers.test create mode 100644 llvm/test/tools/llvm-objdump/ELF/program-headers.test create mode 100644 llvm/test/tools/llvm-objdump/X86/print-imm-hex.s diff --git a/llvm/test/tools/llvm-objdump/ELF/private-headers.test b/llvm/test/tools/llvm-objdump/ELF/private-headers.test new file mode 100644 index 0000000000000..bf18f5810c571 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/private-headers.test @@ -0,0 +1,65 @@ +## Check that with ELF input --private-headers outputs the program header, +## dynamic section and version definitions. +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump --private-headers %t | FileCheck %s + +# CHECK: Program Header: +# CHECK-NEXT: LOAD off 0x00000000000000b0 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# CHECK-NEXT: filesz 0x000000000000003d memsz 0x000000000000003d flags --- +# CHECK-NEXT: DYNAMIC off 0x00000000000000cd vaddr 0x000000000000101d paddr 0x000000000000101d align 2**0 +# CHECK-NEXT: filesz 0x0000000000000020 memsz 0x0000000000000020 flags --- +# CHECK: Dynamic Section: +# CHECK-NEXT: NEEDED bar +# CHECK: Version definitions: +# CHECK-NEXT: 1 0x01 0x075bcd15 foo +# CHECK-NEXT: 2 0x02 0x3ade68b1 VERSION_1 +# CHECK-NEXT: VERSION_2 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .dynstr + Type: SHT_STRTAB + Address: 0x1000 + Content: "0062617200666F6F0056455253494F4E5F320056455253494F4E5F3100" + # Content is: bar, foo, VERSION_2, VERSION_1 + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_ALLOC ] + Entries: + - Tag: DT_NEEDED + Value: 0x1 + - Tag: DT_NULL + Value: 0x0 + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Entries: + - Version: 1 + Flags: 1 + VersionNdx: 1 + Hash: 123456789 + Names: + - foo + - Version: 1 + Flags: 2 + VersionNdx: 2 + Hash: 987654321 + Names: + - VERSION_1 + - VERSION_2 +ProgramHeaders: + - Type: PT_LOAD + VAddr: 0x1000 + FirstSec: .dynstr + LastSec: .dynamic + - Type: PT_DYNAMIC + VAddr: 0x101D + FirstSec: .dynamic + LastSec: .dynamic +DynamicSymbols: + - Name: bar + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-objdump/ELF/program-headers.test b/llvm/test/tools/llvm-objdump/ELF/program-headers.test new file mode 100644 index 0000000000000..4c61e974d6db5 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/program-headers.test @@ -0,0 +1,324 @@ +## Check that program headers are output correctly + +# RUN: yaml2obj --docnum=1 -DBITS=32 -DMACHINE=EM_386 %s -o %t32.elf +# RUN: llvm-objdump --private-headers %t32.elf | FileCheck %s --check-prefixes=ELF32 + +# ELF32: Program Header: +# ELF32-NEXT: PHDR off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags -w- +# ELF32-NEXT: PHDR off 0x00000317 vaddr 0x00002000 paddr 0x00002000 align 2**0 +# ELF32-NEXT: filesz 0x00000007 memsz 0x00000007 flags --x +# ELF32-NEXT: UNKNOWN off 0x00000317 vaddr 0x00002000 paddr 0x00002000 align 2**0 +# ELF32-NEXT: filesz 0x00000007 memsz 0x00000007 flags --x +# ELF32-NEXT: DYNAMIC off 0x00000324 vaddr 0x00006000 paddr 0x00006000 align 2**0 +# ELF32-NEXT: filesz 0x00000010 memsz 0x00000010 flags rwx +# ELF32-NEXT: INTERP off 0x0000031e vaddr 0x00003000 paddr 0x00003000 align 2**0 +# ELF32-NEXT: filesz 0x00000004 memsz 0x00000004 flags rw- +# ELF32-NEXT: NOTE off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000001 memsz 0x00000001 flags --- +# ELF32-NEXT: TLS off 0x00000322 vaddr 0x00004000 paddr 0x00004000 align 2**0 +# ELF32-NEXT: filesz 0x00000001 memsz 0x00000001 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT:EH_FRAME off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: STACK off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: RELRO off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT:PROPERTY off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: OPENBSD_RANDOMIZE off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: OPENBSD_WXNEEDED off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: OPENBSD_BOOTDATA off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-NEXT: UNKNOWN off 0x00000314 vaddr 0x00001000 paddr 0x00001000 align 2**0 +# ELF32-NEXT: filesz 0x00000003 memsz 0x00000003 flags --- +# ELF32-EMPTY: + +# RUN: yaml2obj --docnum=1 -DBITS=64 -DMACHINE=EM_X86_64 %s -o %t64.elf +# RUN: llvm-objdump --private-headers %t64.elf | FileCheck %s --check-prefixes=ELF64 + +# ELF64: Program Header: +# ELF64-NEXT: PHDR off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags -w- +# ELF64-NEXT: PHDR off 0x000000000000054b vaddr 0x0000000000002000 paddr 0x0000000000002000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000007 memsz 0x0000000000000007 flags --x +# ELF64-NEXT: UNKNOWN off 0x000000000000054b vaddr 0x0000000000002000 paddr 0x0000000000002000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000007 memsz 0x0000000000000007 flags --x +# ELF64-NEXT: DYNAMIC off 0x0000000000000558 vaddr 0x0000000000006000 paddr 0x0000000000006000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000020 memsz 0x0000000000000020 flags rwx +# ELF64-NEXT: INTERP off 0x0000000000000552 vaddr 0x0000000000003000 paddr 0x0000000000003000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000004 memsz 0x0000000000000004 flags rw- +# ELF64-NEXT: NOTE off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000001 memsz 0x0000000000000001 flags --- +# ELF64-NEXT: TLS off 0x0000000000000556 vaddr 0x0000000000004000 paddr 0x0000000000004000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000001 memsz 0x0000000000000001 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT:EH_FRAME off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: STACK off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: RELRO off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: PROPERTY off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: OPENBSD_RANDOMIZE off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: OPENBSD_WXNEEDED off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: OPENBSD_BOOTDATA off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-NEXT: UNKNOWN off 0x0000000000000548 vaddr 0x0000000000001000 paddr 0x0000000000001000 align 2**0 +# ELF64-NEXT: filesz 0x0000000000000003 memsz 0x0000000000000003 flags --- +# ELF64-EMPTY: + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS]] + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: [[MACHINE]] +Sections: + - Name: .foo.begin + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x1000 + Size: 0x1 + - Name: .foo.end + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Size: 0x2 + - Name: .bar.begin + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x2000 + Size: 0x3 + - Name: .bar.end + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Size: 0x4 + - Name: .interp + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x3000 + Content: "41424300" ## "ABC" + - Name: .tls + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_TLS ] + Address: 0x4000 + Size: 0x1 + - Name: .unused + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_TLS ] + Address: 0x5000 + Size: 0x1 + - Name: .dynamic + Type: SHT_DYNAMIC + Flags: [ SHF_ALLOC ] + Address: 0x6000 + Entries: + - Tag: DT_NEEDED + Value: 0x1 + - Tag: DT_NULL + Value: 0x0 +ProgramHeaders: +## Case 1: an arbitrary segment with sections. + - Type: PT_PHDR + Flags: [ PF_W ] + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 2: another segment with different sections. + - Type: PT_PHDR + Flags: [ PF_X ] + VAddr: 0x2000 + FirstSec: .bar.begin + LastSec: .bar.end +## Case 3: the PT_NULL segment. + - Type: PT_NULL + Flags: [ PF_X ] + VAddr: 0x2000 + FirstSec: .bar.begin + LastSec: .bar.end +## Case 4: the PT_DYNAMIC segment. + - Type: PT_DYNAMIC + Flags: [ PF_R, PF_W, PF_X ] + VAddr: 0x6000 + FirstSec: .dynamic + LastSec: .dynamic +## Case 5: the PT_INTERP segment. + - Type: PT_INTERP + Flags: [ PF_R, PF_W ] + VAddr: 0x3000 + FirstSec: .interp + LastSec: .interp +## Case 6: the PT_NOTE segment. + - Type: PT_NOTE + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 7: the PT_SHLIB segment. + - Type: PT_SHLIB + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.begin +## Case 8: the PT_TLS segment. + - Type: PT_TLS + VAddr: 0x4000 + FirstSec: .tls + LastSec: .tls +## Case 9: the PT_LOOS segment. + - Type: 0x60000000 ## PT_LOOS + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 10: the PT_GNU_EH_FRAME segment. + - Type: PT_GNU_EH_FRAME + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 11: the PT_SUNW_UNWIND segment. + - Type: 0x6464e550 ## PT_SUNW_UNWIND + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 12: the PT_GNU_STACK segment. + - Type: PT_GNU_STACK + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 13: the PT_GNU_RELRO segment. + - Type: PT_GNU_RELRO + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 14: the PT_GNU_PROPERTY segment. + - Type: PT_GNU_PROPERTY + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 15: the PT_OPENBSD_RANDOMIZE segment. + - Type: 0x65a3dbe6 ## PT_OPENBSD_RANDOMIZE + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 16: the PT_OPENBSD_WXNEEDED segment. + - Type: 0x65a3dbe7 ## PT_OPENBSD_WXNEEDED + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 17: the PT_OPENBSD_BOOTDATA segment. + - Type: 0x65a41be6 ## PT_OPENBSD_BOOTDATA + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 18: the PT_HIOS segment. + - Type: 0x6fffffff ## PT_HIOS + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 19: the PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO segment. + - Type: 0x70000000 ## PT_LOPROC/PT_ARM_ARCHEXT/PT_MIPS_REGINFO + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 20: the PT_ARM_EXIDX/PT_MIPS_RTPROC segment. + - Type: 0x70000001 ## PT_ARM_EXIDX, PT_MIPS_RTPROC + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 20: the PT_MIPS_OPTIONS segment. + - Type: 0x70000002 ## PT_MIPS_OPTIONS + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 21: the PT_MIPS_ABIFLAGS segment. + - Type: 0x70000003 ## PT_MIPS_ABIFLAGS + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end +## Case 22: the PT_HIPROC segment. + - Type: 0x7fffffff ## PT_HIPROC + VAddr: 0x1000 + FirstSec: .foo.begin + LastSec: .foo.end + +## Check we report an error / warning when we are unable to read program headers. +## Case A: the e_phentsize field is invalid. +# RUN: yaml2obj --docnum=2 -DPHENTSIZE=1 %s -o %t.phdr.err +# RUN: not llvm-objdump --private-headers %t.phdr.err 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.phdr.err --check-prefix=PHENTSIZE + +# PHENTSIZE: Program Header: +# PHENTSIZE-NEXT: warning: '[[FILE]]': unable to read program headers: invalid e_phentsize: 1 +# PHENTSIZE-NEXT: error: '[[FILE]]': invalid e_phentsize: 1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + EPhEntSize: [[PHENTSIZE=]] + EPhOff: [[PHOFF=]] +Sections: + - Name: .foo + Type: SHT_PROGBITS +ProgramHeaders: + - Type: PT_PHDR + FirstSec: .foo + LastSec: .foo + +## Case B: the value of the e_phoff field is invalid. + +## Check that we do not report a warning when the program header table ends right before the end of the file. +## 0x160 + size of headers (56) == file size. +# RUN: yaml2obj --docnum=2 -DPHOFF=0x160 %s -o %t.phdr.no.err2 +# RUN: llvm-objdump %t.phdr.no.err2 --private-headers 2>&1 | FileCheck %s --implicit-check-not=warning: + +## Check we report a warning / error when e_phoff goes 1 byte past the end of the file. +# RUN: yaml2obj --docnum=2 -DPHOFF=0x161 %s -o %t.phdr.err2 +# RUN: not llvm-objdump --private-headers %t.phdr.err2 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.phdr.err2 --check-prefix=PHOFF -DOFF=0x161 + +# PHOFF: Program Header: +# PHOFF-NEXT: warning: '[[FILE]]': unable to read program headers: program headers are longer than binary of size 408: e_phoff = [[OFF]], e_phnum = 1, e_phentsize = 56 +# PHOFF-NEXT: error: '[[FILE]]': program headers are longer than binary of size 408: e_phoff = [[OFF]], e_phnum = 1, e_phentsize = 56 + + +## Check we report a warning / error when the value of e_phoff is so large that +## e_phoff + e_phnum * e_phentsize > UINT64_MAX. +# RUN: yaml2obj --docnum=2 -DPHOFF=0xffffffffffffffff %s -o %t.phdr.err3 +# RUN: not llvm-objdump --private-headers %t.phdr.err3 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.phdr.err3 --check-prefix=PHOFF -DOFF=0xffffffffffffffff diff --git a/llvm/test/tools/llvm-objdump/X86/print-imm-hex.s b/llvm/test/tools/llvm-objdump/X86/print-imm-hex.s new file mode 100644 index 0000000000000..8de49fd4c2a7c --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/print-imm-hex.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t + +# RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=NOPRINT +# RUN: llvm-objdump -d --print-imm-hex --no-print-imm-hex %t | FileCheck %s --check-prefix=NOPRINT +# RUN: llvm-objdump -d --no-print-imm-hex --print-imm-hex %t | FileCheck %s --check-prefix=PRINT + +.text + retq + movq 0x123456(%rip),%rax + movabs $0x5555555555555554,%rax + lwpval $0x0, 0x40(%rdx,%rax), %r15d + lwpins $0x0, 0x1cf01cf0, %r15d + .word 0xffff + +# NOPRINT: 0000000000000000 <.text>: +# NOPRINT-NEXT: 0: c3 retq +# NOPRINT-NEXT: 1: 48 8b 05 56 34 12 00 movq 1193046(%rip), %rax # 0x12345e <.text+0x12345e> +# NOPRINT-NEXT: 8: 48 b8 54 55 55 55 55 55 55 55 movabsq $6148914691236517204, %rax # imm = 0x5555555555555554 +# NOPRINT-NEXT: 12: 8f ea 00 12 4c 02 40 00 00 00 00 lwpval $0, 64(%rdx,%rax), %r15d +# NOPRINT-NEXT: 1d: 8f ea 00 12 04 25 f0 1c f0 1c 00 00 00 00 lwpins $0, 485498096, %r15d +# NOPRINT-NEXT: 2b: ff ff + +# PRINT: 0000000000000000 <.text>: +# PRINT-NEXT: 0: c3 retq +# PRINT-NEXT: 1: 48 8b 05 56 34 12 00 movq 0x123456(%rip), %rax # 0x12345e <.text+0x12345e> +# PRINT-NEXT: 8: 48 b8 54 55 55 55 55 55 55 55 movabsq $0x5555555555555554, %rax # imm = 0x5555555555555554 +# PRINT-NEXT: 12: 8f ea 00 12 4c 02 40 00 00 00 00 lwpval $0x0, 0x40(%rdx,%rax), %r15d +# PRINT-NEXT: 1d: 8f ea 00 12 04 25 f0 1c f0 1c 00 00 00 00 lwpins $0x0, 0x1cf01cf0, %r15d +# PRINT-NEXT: 2b: ff ff diff --git a/llvm/test/tools/llvm-objdump/section-headers.test b/llvm/test/tools/llvm-objdump/section-headers.test index f8159be030647..c850e3a3cb3f5 100644 --- a/llvm/test/tools/llvm-objdump/section-headers.test +++ b/llvm/test/tools/llvm-objdump/section-headers.test @@ -51,6 +51,12 @@ Sections: - Name: .debug_info Type: SHT_PROGBITS Flags: [ SHF_WRITE, SHF_ALLOC ] + +## Check that --section-headers and --headers are aliases for -h. +# RUN: llvm-objdump --section-headers --show-lma %t-whitespace.o \ +# RUN: | FileCheck %s --check-prefix=WHITESPACE --strict-whitespace +# RUN: llvm-objdump --headers --show-lma %t-whitespace.o \ +# RUN: | FileCheck %s --check-prefix=WHITESPACE --strict-whitespace ## The name field automatically expands past the default 13 columns when a ## section name is longer than that. From a37f558682e479686cf1644a6aa70b8f46fa1b6a Mon Sep 17 00:00:00 2001 From: gbreynoo Date: Tue, 29 Jun 2021 17:18:32 +0100 Subject: [PATCH 199/619] [llvm-objdump] Add --no-print-imm-hex to the command guide The option --no-print-imm-hex was not included in the command guide for llvm-objdump but appears in the help text. This commit adds it to the command guide. Differential Revision: https://reviews.llvm.org/D104717 --- llvm/docs/CommandGuide/llvm-objdump.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index 28ac7e4abbc42..3b79b9c490744 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -169,6 +169,10 @@ OPTIONS When disassembling, do not print leading addresses. +.. option:: --no-print-imm-hex + + Do not use hex format for immediate values in disassembly output (default). + .. option:: --no-show-raw-insn When disassembling, do not print the raw bytes of each instruction. From a8f819c6d85e1990954d8846dac769bb789d2ba9 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Sun, 27 Jun 2021 17:44:31 -0700 Subject: [PATCH 200/619] [mlir:Async] Remove async operations if it is statically known that the parallel operation has a single compute block Depends On D104850 Add a test that verifies that canonicalization removes all async overheads if it is statically known that the scf.parallel operation will be computed using a single block. Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D104891 --- mlir/include/mlir/Dialect/Async/IR/Async.h | 1 + .../include/mlir/Dialect/Async/IR/AsyncOps.td | 2 + mlir/lib/Dialect/Async/IR/Async.cpp | 30 +++++++++++ .../Async/Transforms/AsyncParallelFor.cpp | 54 ++++++++++++++----- .../async-parallel-for-async-dispatch.mlir | 7 ++- .../async-parallel-for-canonicalize.mlir | 33 ++++++++++++ 6 files changed, 114 insertions(+), 13 deletions(-) create mode 100644 mlir/test/Dialect/Async/async-parallel-for-canonicalize.mlir diff --git a/mlir/include/mlir/Dialect/Async/IR/Async.h b/mlir/include/mlir/Dialect/Async/IR/Async.h index d84b8f8ea98a6..0783009d2855c 100644 --- a/mlir/include/mlir/Dialect/Async/IR/Async.h +++ b/mlir/include/mlir/Dialect/Async/IR/Async.h @@ -20,6 +20,7 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td index f9ddd67a7961d..d168b8cefad8a 100644 --- a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td @@ -177,6 +177,8 @@ def Async_CreateGroupOp : Async_Op<"create_group", [NoSideEffect]> { let arguments = (ins Index:$size); let results = (outs Async_GroupType:$result); + let hasCanonicalizeMethod = 1; + let assemblyFormat = "$size `:` type($result) attr-dict"; } diff --git a/mlir/lib/Dialect/Async/IR/Async.cpp b/mlir/lib/Dialect/Async/IR/Async.cpp index a06b2b6664690..bd627edbd4271 100644 --- a/mlir/lib/Dialect/Async/IR/Async.cpp +++ b/mlir/lib/Dialect/Async/IR/Async.cpp @@ -245,6 +245,36 @@ static LogicalResult verify(ExecuteOp op) { return success(); } +//===----------------------------------------------------------------------===// +/// CreateGroupOp +//===----------------------------------------------------------------------===// + +LogicalResult CreateGroupOp::canonicalize(CreateGroupOp op, + PatternRewriter &rewriter) { + // Find all `await_all` users of the group. + llvm::SmallVector awaitAllUsers; + + auto isAwaitAll = [&](Operation *op) -> bool { + if (AwaitAllOp awaitAll = dyn_cast(op)) { + awaitAllUsers.push_back(awaitAll); + return true; + } + return false; + }; + + // Check if all users of the group are `await_all` operations. + if (!llvm::all_of(op->getUsers(), isAwaitAll)) + return failure(); + + // If group is only awaited without adding anything to it, we can safely erase + // the create operation and all users. + for (AwaitAllOp awaitAll : awaitAllUsers) + rewriter.eraseOp(awaitAll); + rewriter.eraseOp(op); + + return success(); +} + //===----------------------------------------------------------------------===// /// AwaitOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp index 1d545a52f7152..a104fb73571d9 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp @@ -513,18 +513,48 @@ static void doAsyncDispatch(ImplicitLocOpBuilder &b, PatternRewriter &rewriter, Value groupSize = b.create(blockCount, c1); Value group = b.create(GroupType::get(ctx), groupSize); - // Pack the async dispath function operands to launch the work splitting. - SmallVector asyncDispatchOperands = {group, c0, blockCount, blockSize}; - asyncDispatchOperands.append(tripCounts); - asyncDispatchOperands.append(op.lowerBound().begin(), op.lowerBound().end()); - asyncDispatchOperands.append(op.upperBound().begin(), op.upperBound().end()); - asyncDispatchOperands.append(op.step().begin(), op.step().end()); - asyncDispatchOperands.append(parallelComputeFunction.captures); - - // Launch async dispatch function for [0, blockCount) range. - b.create(asyncDispatchFunction.sym_name(), - asyncDispatchFunction.getCallableResults(), - asyncDispatchOperands); + // Appends operands shared by async dispatch and parallel compute functions to + // the given operands vector. + auto appendBlockComputeOperands = [&](SmallVector &operands) { + operands.append(tripCounts); + operands.append(op.lowerBound().begin(), op.lowerBound().end()); + operands.append(op.upperBound().begin(), op.upperBound().end()); + operands.append(op.step().begin(), op.step().end()); + operands.append(parallelComputeFunction.captures); + }; + + // Check if the block size is one, in this case we can skip the async dispatch + // completely. If this will be known statically, then canonicalization will + // erase async group operations. + Value isSingleBlock = b.create(CmpIPredicate::eq, blockCount, c1); + + auto syncDispatch = [&](OpBuilder &nestedBuilder, Location loc) { + ImplicitLocOpBuilder nb(loc, nestedBuilder); + + // Call parallel compute function for the single block. + SmallVector operands = {c0, blockSize}; + appendBlockComputeOperands(operands); + + nb.create(parallelComputeFunction.func.sym_name(), + parallelComputeFunction.func.getCallableResults(), + operands); + nb.create(); + }; + + auto asyncDispatch = [&](OpBuilder &nestedBuilder, Location loc) { + ImplicitLocOpBuilder nb(loc, nestedBuilder); + + // Launch async dispatch function for [0, blockCount) range. + SmallVector operands = {group, c0, blockCount, blockSize}; + appendBlockComputeOperands(operands); + + nb.create(asyncDispatchFunction.sym_name(), + asyncDispatchFunction.getCallableResults(), operands); + nb.create(); + }; + + // Dispatch either single block compute function, or launch async dispatch. + b.create(TypeRange(), isSingleBlock, syncDispatch, asyncDispatch); // Wait for the completion of all parallel compute operations. b.create(group); diff --git a/mlir/test/Dialect/Async/async-parallel-for-async-dispatch.mlir b/mlir/test/Dialect/Async/async-parallel-for-async-dispatch.mlir index df538a4fc7661..a6e308e422e20 100644 --- a/mlir/test/Dialect/Async/async-parallel-for-async-dispatch.mlir +++ b/mlir/test/Dialect/Async/async-parallel-for-async-dispatch.mlir @@ -3,8 +3,13 @@ // CHECK-LABEL: @loop_1d func @loop_1d(%arg0: index, %arg1: index, %arg2: index, %arg3: memref) { + // CHECK: %[[C0:.*]] = constant 0 : index // CHECK: %[[GROUP:.*]] = async.create_group - // CHECK: call @async_dispatch_fn + // CHECK: scf.if {{.*}} { + // CHECK: call @parallel_compute_fn(%[[C0]] + // CHECK: } else { + // CHECK: call @async_dispatch_fn + // CHECK: } // CHECK: async.await_all %[[GROUP]] scf.parallel (%i) = (%arg0) to (%arg1) step (%arg2) { %one = constant 1.0 : f32 diff --git a/mlir/test/Dialect/Async/async-parallel-for-canonicalize.mlir b/mlir/test/Dialect/Async/async-parallel-for-canonicalize.mlir new file mode 100644 index 0000000000000..e26d99006b55a --- /dev/null +++ b/mlir/test/Dialect/Async/async-parallel-for-canonicalize.mlir @@ -0,0 +1,33 @@ +// RUN: mlir-opt %s \ +// RUN: -async-parallel-for=async-dispatch=true \ +// RUN: -canonicalize -inline -symbol-dce \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s \ +// RUN: -async-parallel-for=async-dispatch=false \ +// RUN: -canonicalize -inline -symbol-dce \ +// RUN: | FileCheck %s + +// Check that if we statically know that the parallel operation has a single +// block then all async operations will be canonicalized away and we will +// end up with a single synchonous compute function call. + +// CHECK-LABEL: @loop_1d( +// CHECK: %[[MEMREF:.*]]: memref +func @loop_1d(%arg0: memref) { + // CHECK-DAG: %[[C0:.*]] = constant 0 : index + // CHECK-DAG: %[[C1:.*]] = constant 1 : index + // CHECK-DAG: %[[C100:.*]] = constant 100 : index + // CHECK-DAG: %[[ONE:.*]] = constant 1.000000e+00 : f32 + // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] + // CHECK: memref.store %[[ONE]], %[[MEMREF]][%[[I]]] + %lb = constant 0 : index + %ub = constant 100 : index + %st = constant 1 : index + scf.parallel (%i) = (%lb) to (%ub) step (%st) { + %one = constant 1.0 : f32 + memref.store %one, %arg0[%i] : memref + } + + return +} From 6088f86a2e1ce3ab6fe9573e26fc63a5cf338fa9 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Sun, 27 Jun 2021 14:25:50 -0700 Subject: [PATCH 201/619] [mlir:Async] Convert AsyncParallelFor pass to ModuleOp pass Depends On D104891 Outlining scf.parallel body as a function requires async-parallel-for pass to be a ModuleOp pass Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D104998 --- mlir/include/mlir/Dialect/Async/Passes.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Async/Passes.td b/mlir/include/mlir/Dialect/Async/Passes.td index b770ac751ab13..e321747d4ec66 100644 --- a/mlir/include/mlir/Dialect/Async/Passes.td +++ b/mlir/include/mlir/Dialect/Async/Passes.td @@ -11,7 +11,7 @@ include "mlir/Pass/PassBase.td" -def AsyncParallelFor : Pass<"async-parallel-for"> { +def AsyncParallelFor : Pass<"async-parallel-for", "ModuleOp"> { let summary = "Convert scf.parallel operations to multiple async compute ops " "executed concurrently for non-overlapping iteration ranges"; let constructor = "mlir::createAsyncParallelForPass()"; From 9ccdaac8f9d5b06c35a18180c517342c435d75a1 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 29 Jun 2021 09:30:54 -0700 Subject: [PATCH 202/619] [mlir:Async] Fix a bug in automatic refence counting around function calls Depends On D104998 Function calls "transfer ownership" to the callee and it puts additional constraints on the reference counting optimization pass Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D104999 --- .../Transforms/AsyncRuntimeRefCountingOpt.cpp | 53 +++++++++++++++++++ .../Dialect/Async/Transforms/CMakeLists.txt | 3 +- .../Async/async-runtime-ref-counting-opt.mlir | 14 +++++ .../microbench-linalg-async-parallel-for.mlir | 2 +- .../microbench-scf-async-parallel-for.mlir | 6 +-- .../Async/CPU/test-async-parallel-for-1d.mlir | 4 +- .../Async/CPU/test-async-parallel-for-2d.mlir | 4 +- 7 files changed, 77 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCountingOpt.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCountingOpt.cpp index ccd81c61668e3..063c2050e37a8 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCountingOpt.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCountingOpt.cpp @@ -13,6 +13,7 @@ #include "PassDetail.h" #include "mlir/Dialect/Async/IR/Async.h" #include "mlir/Dialect/Async/Passes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/Debug.h" @@ -109,6 +110,58 @@ LogicalResult AsyncRuntimeRefCountingOptPass::optimizeReferenceCounting( dropRef->isBeforeInBlock(addRef.getOperation())) continue; + // When reference counted value passed to a function as an argument, + // function takes ownership of +1 reference and it will drop it before + // returning. + // + // Example: + // + // %token = ... : !async.token + // + // async.runtime.add_ref %token {count = 1 : i32} : !async.token + // call @pass_token(%token: !async.token, ...) + // + // async.await %token : !async.token + // async.runtime.drop_ref %token {count = 1 : i32} : !async.token + // + // In this example if we'll cancel a pair of reference counting + // operations we might end up with a deallocated token when we'll + // reach `async.await` operation. + Operation *firstFunctionCallUser = nullptr; + Operation *lastNonFunctionCallUser = nullptr; + + for (Operation *user : info.users) { + // `user` operation lies after `addRef` ... + if (user == addRef || user->isBeforeInBlock(addRef)) + continue; + // ... and before `dropRef`. + if (user == dropRef || dropRef->isBeforeInBlock(user)) + break; + + // Find the first function call user of the reference counted value. + Operation *functionCall = dyn_cast(user); + if (functionCall && + (!firstFunctionCallUser || + functionCall->isBeforeInBlock(firstFunctionCallUser))) { + firstFunctionCallUser = functionCall; + continue; + } + + // Find the last regular user of the reference counted value. + if (!functionCall && + (!lastNonFunctionCallUser || + lastNonFunctionCallUser->isBeforeInBlock(user))) { + lastNonFunctionCallUser = user; + continue; + } + } + + // Non function call user after the function call user of the reference + // counted value. + if (firstFunctionCallUser && lastNonFunctionCallUser && + firstFunctionCallUser->isBeforeInBlock(lastNonFunctionCallUser)) + continue; + // Try to cancel the pair of `add_ref` and `drop_ref` operations. auto emplaced = cancellable.try_emplace(dropRef.getOperation(), addRef.getOperation()); diff --git a/mlir/lib/Dialect/Async/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Async/Transforms/CMakeLists.txt index 45fb77f443a00..9aea38b4c5e53 100644 --- a/mlir/lib/Dialect/Async/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Async/Transforms/CMakeLists.txt @@ -13,8 +13,9 @@ add_mlir_dialect_library(MLIRAsyncTransforms LINK_LIBS PUBLIC MLIRIR MLIRAsync - MLIRSCF MLIRPass + MLIRSCF + MLIRStandard MLIRTransforms MLIRTransformUtils ) diff --git a/mlir/test/Dialect/Async/async-runtime-ref-counting-opt.mlir b/mlir/test/Dialect/Async/async-runtime-ref-counting-opt.mlir index 9b6bb1a5e7515..5d32201e9b913 100644 --- a/mlir/test/Dialect/Async/async-runtime-ref-counting-opt.mlir +++ b/mlir/test/Dialect/Async/async-runtime-ref-counting-opt.mlir @@ -53,3 +53,17 @@ func @cancellable_operations_3(%arg0: !async.token) { // CHECK: return return } + +// CHECK-LABEL: @not_cancellable_operations_0 +func @not_cancellable_operations_0(%arg0: !async.token) { + // CHECK: add_ref + async.runtime.add_ref %arg0 {count = 1 : i32} : !async.token + // CHECK: call @consume_toke + call @consume_token(%arg0): (!async.token) -> () + // CHECK: async.runtime.await + async.runtime.await %arg0 : !async.token + // CHECK: async.runtime.drop_ref + async.runtime.drop_ref %arg0 {count = 1 : i32} : !async.token + // CHECK: return + return +} diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir index 1ab6ff0630eda..772ae873c8e56 100644 --- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir @@ -3,7 +3,7 @@ // RUN: -async-parallel-for \ // RUN: -async-to-async-runtime \ // RUN: -async-runtime-ref-counting \ -// FIXME: -async-runtime-ref-counting-opt \ +// RUN: -async-runtime-ref-counting-opt \ // RUN: -convert-async-to-llvm \ // RUN: -convert-scf-to-std \ // RUN: -std-expand \ diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir index e2e69c65ba08c..56b090e1e7bf3 100644 --- a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir @@ -2,13 +2,13 @@ // RUN: -async-parallel-for \ // RUN: -async-to-async-runtime \ // RUN: -async-runtime-ref-counting \ -// FIXME: -async-runtime-ref-counting-opt \ +// RUN: -async-runtime-ref-counting-opt \ // RUN: -convert-async-to-llvm \ // RUN: -convert-linalg-to-loops \ // RUN: -convert-scf-to-std \ // RUN: -std-expand \ // RUN: -convert-vector-to-llvm \ -// RUN: -convert-std-to-llvm \ +// RUN: -convert-std-to-llvm -print-ir-after-all \ // RUN: | mlir-cpu-runner \ // RUN: -e entry -entry-point-result=void -O3 \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ @@ -20,7 +20,7 @@ // RUN: -async-parallel-for=async-dispatch=false \ // RUN: -async-to-async-runtime \ // RUN: -async-runtime-ref-counting \ -// FIXME: -async-runtime-ref-counting-opt \ +// RUN: -async-runtime-ref-counting-opt \ // RUN: -convert-async-to-llvm \ // RUN: -convert-linalg-to-loops \ // RUN: -convert-scf-to-std \ diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir index 76a6b2f270531..12b2be2627131 100644 --- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -async-parallel-for \ // RUN: -async-to-async-runtime \ // RUN: -async-runtime-ref-counting \ -// FIXME: -async-runtime-ref-counting-opt \ +// RUN: -async-runtime-ref-counting-opt \ // RUN: -convert-async-to-llvm \ // RUN: -convert-scf-to-std \ // RUN: -convert-std-to-llvm \ @@ -16,7 +16,7 @@ // RUN: target-block-size=1" \ // RUN: -async-to-async-runtime \ // RUN: -async-runtime-ref-counting \ -// FIXME: -async-runtime-ref-counting-opt \ +// RUN: -async-runtime-ref-counting-opt \ // RUN: -convert-async-to-llvm \ // RUN: -convert-scf-to-std \ // RUN: -convert-std-to-llvm \ diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir index 0443e46116920..b294b9ce4d26e 100644 --- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -async-parallel-for \ // RUN: -async-to-async-runtime \ // RUN: -async-runtime-ref-counting \ -// FIXME: -async-runtime-ref-counting-opt \ +// RUN: -async-runtime-ref-counting-opt \ // RUN: -convert-async-to-llvm \ // RUN: -convert-scf-to-std \ // RUN: -convert-std-to-llvm \ @@ -16,7 +16,7 @@ // RUN: target-block-size=1" \ // RUN: -async-to-async-runtime \ // RUN: -async-runtime-ref-counting \ -// FIXME: -async-runtime-ref-counting-opt \ +// RUN: -async-runtime-ref-counting-opt \ // RUN: -convert-async-to-llvm \ // RUN: -convert-scf-to-std \ // RUN: -convert-std-to-llvm \ From 287847dace4408a0e4e4bfbed7db4a4d1f3eba29 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Wed, 26 May 2021 08:59:16 -0400 Subject: [PATCH 203/619] [libc++] Update ABI docs. NFCI. Differential Revision: https://reviews.llvm.org/D103160 --- libcxx/docs/DesignDocs/ABIVersioning.rst | 27 +++++++++++++++--------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/libcxx/docs/DesignDocs/ABIVersioning.rst b/libcxx/docs/DesignDocs/ABIVersioning.rst index 5960dd18610c0..3b82f3cc60a44 100644 --- a/libcxx/docs/DesignDocs/ABIVersioning.rst +++ b/libcxx/docs/DesignDocs/ABIVersioning.rst @@ -3,15 +3,22 @@ Libc++ ABI stability ==================== -Libc++ aims to preserve stable ABI to avoid subtle bugs when code built to the old ABI -is linked with the code build to the new ABI. At the same time, libc++ allows ABI-breaking -improvements and bugfixes for the scenarios when ABI change is not a issue. +Libc++ aims to preserve a stable ABI to avoid subtle bugs when code built under the old ABI +is linked with code built under the new ABI. At the same time, libc++ wants to make +ABI-breaking improvements and bugfixes in scenarios where the user doesn't mind ABI breaks. -To support both cases, libc++ allows specifying the ABI version at the -build time. The version is defined with a cmake option -LIBCXX_ABI_VERSION. Another option LIBCXX_ABI_UNSTABLE can be used to -include all present ABI breaking features. These options translate -into C++ macro definitions _LIBCPP_ABI_VERSION, _LIBCPP_ABI_UNSTABLE. +To support both cases, libc++ allows specifying an ABI version at +build time. The version is defined with CMake option ``LIBCXX_ABI_VERSION``. +Currently supported values are ``1`` (the stable default) +and ``2`` (the unstable "next" version). At some point "ABI version 2" will be +frozen and new ABI-breaking changes will start being applied to version ``3``; +but this has not happened yet. -Any ABI-changing feature is placed under it's own macro, _LIBCPP_ABI_XXX, which is enabled -based on the value of _LIBCPP_ABI_VERSION. _LIBCPP_ABI_UNSTABLE, if set, enables all features at once. +To always use the most cutting-edge, most unstable ABI (which is currently ``2`` +but at some point will become ``3``), set the CMake option ``LIBCXX_ABI_UNSTABLE``. + +Internally, each ABI-changing feature is placed under its own C++ macro, +``_LIBCPP_ABI_XXX``. These macros' definitions are controlled by the C++ macro +``_LIBCPP_ABI_VERSION``, which is controlled by the ``LIBCXX_ABI_VERSION`` set +at build time. Libc++ does not intend users to interact with these C++ macros +directly. From 3b6dfa381edfc66864cfd6dbc2769ba645858120 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jun 2021 23:09:58 -0700 Subject: [PATCH 204/619] [RISCV] Protect the SHL/SRA/SRL handlers in LowerOperation against being called for an illegal i32 shift amount. It seems it is possible for DAG combine to create a shl with an i64 result type and an i32 shift amount. This is ok before type legalization since the type don't need to match in SelectionDAG. This results in type legalization calling LowerOperation to legalize just the amount. We weren't expecting this so we asserted for not finding a fixed vector shift. To fix this, I've added a check for the fixed vector case and returned SDValue() to get the default type legalizer. I've factored all shifts together and added a fixed vector specific handler to avoid repeating similar code for each in LowerOperation. The particular case I found was exposed by D104581, but the bad shift is created after that patch triggers. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 23 +++++++++++++--- llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 + llvm/test/CodeGen/RISCV/aext-to-sext.ll | 29 +++++++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3cf3ad958f691..30c2224bf8b23 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2432,11 +2432,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::UREM: return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL); case ISD::SHL: - return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL); case ISD::SRA: - return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL); case ISD::SRL: - return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL); + if (Op.getSimpleValueType().isFixedLengthVector()) + return lowerFixedLengthVectorShiftToRVV(Op, DAG); + // This can be called for an i32 shift amount that needs to be promoted. + assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + return SDValue(); case ISD::FADD: return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); case ISD::FSUB: @@ -4290,6 +4293,20 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV( return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); } +SDValue +RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op, + SelectionDAG &DAG) const { + unsigned Opc; + switch (Op.getOpcode()) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::SHL: Opc = RISCVISD::SHL_VL; break; + case ISD::SRA: Opc = RISCVISD::SRA_VL; break; + case ISD::SRL: Opc = RISCVISD::SRL_VL; break; + } + + return lowerToScalableOp(Op, DAG, Opc); +} + // Lower vector ABS to smax(X, sub(0, X)). SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 8fc92f65c38f3..cd13e748fed37 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -552,6 +552,7 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerFixedLengthVectorLogicOpToRVV(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const; + SDValue lowerFixedLengthVectorShiftToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll index 45b30dd60a3d5..5265a085073a7 100644 --- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll +++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll @@ -46,3 +46,32 @@ bb6: ; preds = %bb2, %bb } declare void @hoge() + +; This ends up creating a shl with a i64 result type, but an i32 shift amount. +; Because custom type legalization for i32 is enabled, this resulted in +; LowerOperation being called for the amount. This was not expected and +; triggered an assert. +define i32 @crash(i32 %x, i32 %y, i32 %z) { +; RV64I-LABEL: crash: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: seqz a3, a0 +; RV64I-NEXT: addw a0, a1, a2 +; RV64I-NEXT: slli a1, a3, 3 +; RV64I-NEXT: .LBB1_1: # %bb +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: beq a0, a1, .LBB1_1 +; RV64I-NEXT: # %bb.2: # %bar +; RV64I-NEXT: ret + br label %bb + +bb: + %a = icmp eq i32 %x, 0 + %b = add i32 %y, %z + %c = select i1 %a, i32 8, i32 0 + %d = icmp eq i32 %b, %c + br i1 %d, label %bb, label %bar + +bar: + ret i32 %b +} From c00032321a6ae26f9c8056d024e262abf342631e Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 29 Jun 2021 19:00:30 +0200 Subject: [PATCH 205/619] [lldb] Skip TestPairFromStdModule for now I didn't get around to fix this change and the original commit itself seems fine, so this looks like an existing LLDB/Clang bug that was just uncovered by this change. Skipping while I'm investigating. --- .../import-std-module/pair/TestPairFromStdModule.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py index d0449a4e7800d..121b6e7420349 100644 --- a/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py @@ -13,6 +13,10 @@ class TestCase(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) + # FIXME: This regressed in 69d5a6662115499198ebfa07a081e98a6ce4b915 + # but needs further investigation for what underlying Clang/LLDB bug can't + # handle that code change. + @skipIf def test(self): self.build() From 5d933c0b0c741767f2af8218d55581557903a254 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 29 Jun 2021 16:57:52 +0000 Subject: [PATCH 206/619] [Flang][test] Fix Windows buildbot after D104930. Add REQUIRES: shell to the unpack.f90 test that executes a UNIX shell script. --- flang/test/Semantics/unpack.f90 | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/test/Semantics/unpack.f90 b/flang/test/Semantics/unpack.f90 index d624f9c2e38a1..71bd21c7fb008 100644 --- a/flang/test/Semantics/unpack.f90 +++ b/flang/test/Semantics/unpack.f90 @@ -1,4 +1,5 @@ ! RUN: %S/test_errors.sh %s %t %flang_fc1 +! REQUIRES: shell ! UNPACK() intrinsic function error tests program test_unpack integer, dimension(2) :: vector = [343, 512] From 129ae515fba022353050e0f313b32595de9e4b39 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 28 Jun 2021 12:21:19 -0700 Subject: [PATCH 207/619] [INSTCOMBINE] Transform reduction(shuffle V, poison, unique_mask) to reduction(V). After SLP + LTO we may have have reduction(shuffle V, poison, mask). This can be simplified to just reduction(V) if the mask is only for single vector and just all elements from this vector are permuted, without reusing, replacing with undefs and/or other values, etc. Differential Revision: https://reviews.llvm.org/D105053 --- .../InstCombine/InstCombineCalls.cpp | 41 +++++++++++++++++++ .../InstCombine/reduction-shufflevector.ll | 35 ++++++---------- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index fb3dfd89895be..552de8b072e39 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" @@ -1983,6 +1984,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { replaceInstUsesWith(CI, Res); return eraseInstFromFunction(CI); } + LLVM_FALLTHROUGH; + } + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { + bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd && + IID != Intrinsic::vector_reduce_fmul) || + II->hasAllowReassoc(); + const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd || + IID == Intrinsic::vector_reduce_fmul) + ? 1 + : 0; + Value *Arg = II->getArgOperand(ArgIdx); + Value *V; + ArrayRef Mask; + if (!isa(Arg->getType()) || !CanBeReassociated || + !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) || + !cast(Arg)->isSingleSource()) + break; + int Sz = Mask.size(); + SmallBitVector UsedIndices(Sz); + for (int Idx : Mask) { + if (Idx == UndefMaskElem || UsedIndices.test(Idx)) + break; + UsedIndices.set(Idx); + } + // Can remove shuffle iff just shuffled elements, no repeats, undefs, or + // other changes. + if (UsedIndices.all()) { + replaceUse(II->getOperandUse(ArgIdx), V); + return nullptr; + } break; } default: { diff --git a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll index 1ecdb386ac1ab..cf43f1bd626de 100644 --- a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll +++ b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll @@ -13,8 +13,7 @@ define i32 @reduce_add(<4 x i32> %x) { define i32 @reduce_or(<4 x i32> %x) { ; CHECK-LABEL: @reduce_or( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> @@ -24,8 +23,7 @@ define i32 @reduce_or(<4 x i32> %x) { define i32 @reduce_and(<4 x i32> %x) { ; CHECK-LABEL: @reduce_and( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -35,8 +33,7 @@ define i32 @reduce_and(<4 x i32> %x) { define i32 @reduce_xor(<4 x i32> %x) { ; CHECK-LABEL: @reduce_xor( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> @@ -46,8 +43,7 @@ define i32 @reduce_xor(<4 x i32> %x) { define i32 @reduce_umax(<4 x i32> %x) { ; CHECK-LABEL: @reduce_umax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -57,8 +53,7 @@ define i32 @reduce_umax(<4 x i32> %x) { define i32 @reduce_umin(<4 x i32> %x) { ; CHECK-LABEL: @reduce_umin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -68,8 +63,7 @@ define i32 @reduce_umin(<4 x i32> %x) { define i32 @reduce_smax(<4 x i32> %x) { ; CHECK-LABEL: @reduce_smax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -79,8 +73,7 @@ define i32 @reduce_smax(<4 x i32> %x) { define i32 @reduce_smin(<4 x i32> %x) { ; CHECK-LABEL: @reduce_smin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -90,19 +83,17 @@ define i32 @reduce_smin(<4 x i32> %x) { define float @reduce_fmax(<4 x float> %x) { ; CHECK-LABEL: @reduce_fmax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> - %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf) + %res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf) ret float %res } define float @reduce_fmin(<4 x float> %x) { ; CHECK-LABEL: @reduce_fmin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> @@ -112,8 +103,7 @@ define float @reduce_fmin(<4 x float> %x) { define float @reduce_fadd(float %a, <4 x float> %x) { ; CHECK-LABEL: @reduce_fadd( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> @@ -123,8 +113,7 @@ define float @reduce_fadd(float %a, <4 x float> %x) { define float @reduce_fmul(float %a, <4 x float> %x) { ; CHECK-LABEL: @reduce_fmul( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> From 952944c12c0aa917e97805e929b5cd4e40866f91 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 2 Mar 2021 16:48:09 -0800 Subject: [PATCH 208/619] [ObjC][ARC] Don't add operand bundle clang.arc.attachedcall to a call if the call already has the operand bundle This bug was causing the call to `replaceAllUsesWith` to crash because the old call instruction and the new call instruction were the same. rdar://74957948 Differential Revision: https://reviews.llvm.org/D97824 --- clang/lib/CodeGen/CGObjC.cpp | 6 +++++- clang/test/CodeGenObjCXX/arc-rv-attr.mm | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenObjCXX/arc-rv-attr.mm diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 6c36dde1f526d..63429b1d4f653 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -2939,8 +2939,12 @@ static llvm::Value *emitARCOperationAfterCall(CodeGenFunction &CGF, ValueTransform doAfterCall, ValueTransform doFallback) { CGBuilderTy::InsertPoint ip = CGF.Builder.saveIP(); + auto *callBase = dyn_cast(value); - if (llvm::CallInst *call = dyn_cast(value)) { + if (callBase && llvm::objcarc::hasAttachedCallOpBundle(callBase)) { + // Fall back if the call base has operand bundle "clang.arc.attachedcall". + value = doFallback(CGF, value); + } else if (llvm::CallInst *call = dyn_cast(value)) { // Place the retain immediately following the call. CGF.Builder.SetInsertPoint(call->getParent(), ++llvm::BasicBlock::iterator(call)); diff --git a/clang/test/CodeGenObjCXX/arc-rv-attr.mm b/clang/test/CodeGenObjCXX/arc-rv-attr.mm new file mode 100644 index 0000000000000..0f4519bd527d9 --- /dev/null +++ b/clang/test/CodeGenObjCXX/arc-rv-attr.mm @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple arm64-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -std=c++11 -O -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK + +id foo(void); + +// CHECK-LABEL: define{{.*}} void @_Z14test_list_initv( +// CHECK: %[[CALL1:.*]] = call i8* @_Z3foov() [ "clang.arc.attachedcall"(i64 0) ] +// CHECK: call i8* @llvm.objc.retain(i8* %[[CALL1]]) + +void test_list_init() { + auto t = id{foo()}; +} From aaba37187fda7f5a7fdc4c1e6129cbaaa1bbf709 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Tue, 29 Jun 2021 11:44:49 -0400 Subject: [PATCH 209/619] [clang][PATCH][nfc] Refactor TargetInfo::adjust to pass DiagnosticsEngine to allow diagnostics on target-unsupported options Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D104729 --- clang/include/clang/Basic/TargetInfo.h | 2 +- clang/lib/Basic/TargetInfo.cpp | 2 +- clang/lib/Basic/Targets/AMDGPU.cpp | 4 ++-- clang/lib/Basic/Targets/AMDGPU.h | 2 +- clang/lib/Basic/Targets/PPC.cpp | 4 ++-- clang/lib/Basic/Targets/PPC.h | 2 +- clang/lib/Basic/Targets/SPIR.h | 4 ++-- clang/lib/Basic/Targets/WebAssembly.cpp | 3 ++- clang/lib/Basic/Targets/WebAssembly.h | 2 +- clang/lib/Frontend/ASTUnit.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 4 ++-- clang/lib/Interpreter/Interpreter.cpp | 2 +- clang/tools/clang-import-test/clang-import-test.cpp | 2 +- .../Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp | 3 ++- .../Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp | 2 +- 15 files changed, 21 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index d59bad30e7428..20f6afa76cbb3 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1162,7 +1162,7 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. - virtual void adjust(LangOptions &Opts); + virtual void adjust(DiagnosticsEngine &Diags, LangOptions &Opts); /// Adjust target options based on codegen options. virtual void adjustTargetOptions(const CodeGenOptions &CGOpts, diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index e73b4a3a40c74..4c2859e5eda7f 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -346,7 +346,7 @@ bool TargetInfo::isTypeSigned(IntType T) { /// Apply changes to the target information with respect to certain /// language options which change the target configuration and adjust /// the language based on the target options where applicable. -void TargetInfo::adjust(LangOptions &Opts) { +void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { if (Opts.NoBitFieldTypeAlign) UseBitFieldTypeAlignment = false; diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 595132e2e70ba..fac786dbcf9e2 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -358,8 +358,8 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; } -void AMDGPUTargetInfo::adjust(LangOptions &Opts) { - TargetInfo::adjust(Opts); +void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { + TargetInfo::adjust(Diags, Opts); // ToDo: There are still a few places using default address space as private // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL // can be removed from the following line. diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index fe5c61c6ba2bb..244a6e0446905 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -93,7 +93,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { void setAddressSpaceMap(bool DefaultIsPrivate); - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; uint64_t getPointerWidthV(unsigned AddrSpace) const override { if (isR600(getTriple())) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 6860b5e5d02fa..d431dda970222 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -614,10 +614,10 @@ void PPCTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); } -void PPCTargetInfo::adjust(LangOptions &Opts) { +void PPCTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { if (HasAltivec) Opts.AltiVec = 1; - TargetInfo::adjust(Opts); + TargetInfo::adjust(Diags, Opts); if (LongDoubleFormat != &llvm::APFloat::IEEEdouble()) LongDoubleFormat = Opts.PPCIEEELongDouble ? &llvm::APFloat::IEEEquad() diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 554f2174fee00..18ee1194c759d 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -89,7 +89,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { } // Set the language option for altivec based on our value. - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; // Note: GCC recognizes the following additional cpus: // 401, 403, 405, 405fp, 440fp, 464, 464fp, 476, 476fp, 505, 740, 801, diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index c429b27709ecb..50f34abd66309 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -135,8 +135,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { AddrSpaceMap = DefaultIsGeneric ? &SPIRDefIsGenMap : &SPIRDefIsPrivMap; } - void adjust(LangOptions &Opts) override { - TargetInfo::adjust(Opts); + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override { + TargetInfo::adjust(Diags, Opts); // FIXME: SYCL specification considers unannotated pointers and references // to be pointing to the generic address space. See section 5.9.3 of // SYCL 2020 specification. diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 2a5055c3d534b..7ef79849cb75d 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -234,7 +234,8 @@ ArrayRef WebAssemblyTargetInfo::getTargetBuiltins() const { Builtin::FirstTSBuiltin); } -void WebAssemblyTargetInfo::adjust(LangOptions &Opts) { +void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags, + LangOptions &Opts) { // If the Atomics feature isn't available, turn off POSIXThreads and // ThreadModel, so that we don't predefine _REENTRANT or __STDCPP_THREADS__. if (!HasAtomics) { diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index 70115183e46b9..b29730c5d706b 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -138,7 +138,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool hasProtectedVisibility() const override { return false; } - void adjust(LangOptions &Opts) override; + void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override; }; class LLVM_LIBRARY_VISIBILITY WebAssembly32TargetInfo diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 988090a8b1b13..4f92833e4229c 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -588,7 +588,7 @@ class ASTInfoCollector : public ASTReaderListener { // // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - Target->adjust(LangOpt); + Target->adjust(PP.getDiagnostics(), LangOpt); // Initialize the preprocessor. PP.Initialize(*Target); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 063384130f730..2ae3be6814dec 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -142,7 +142,7 @@ bool CompilerInstance::createTarget() { // Inform the target of the language options. // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - getTarget().adjust(getLangOpts()); + getTarget().adjust(getDiagnostics(), getLangOpts()); // Adjust target options based on codegen options. getTarget().adjustTargetOptions(getCodeGenOpts(), getTargetOpts()); @@ -457,7 +457,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { getSourceManager(), *HeaderInfo, *this, /*IdentifierInfoLookup=*/nullptr, /*OwnsHeaderSearch=*/true, TUKind); - getTarget().adjust(getLangOpts()); + getTarget().adjust(getDiagnostics(), getLangOpts()); PP->Initialize(getTarget(), getAuxTarget()); if (PPOpts.DetailedRecord) diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 711a5e9ff0168..768847f9f0352 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -110,7 +110,7 @@ CreateCI(const llvm::opt::ArgStringList &Argv) { "Initialization failed. " "Target is missing"); - Clang->getTarget().adjust(Clang->getLangOpts()); + Clang->getTarget().adjust(Clang->getDiagnostics(), Clang->getLangOpts()); return std::move(Clang); } diff --git a/clang/tools/clang-import-test/clang-import-test.cpp b/clang/tools/clang-import-test/clang-import-test.cpp index df173cf49f35e..fa5d7a54f53b4 100644 --- a/clang/tools/clang-import-test/clang-import-test.cpp +++ b/clang/tools/clang-import-test/clang-import-test.cpp @@ -208,7 +208,7 @@ std::unique_ptr BuildCompilerInstance() { TargetInfo *TI = TargetInfo::CreateTargetInfo( Ins->getDiagnostics(), Ins->getInvocation().TargetOpts); Ins->setTarget(TI); - Ins->getTarget().adjust(Ins->getLangOpts()); + Ins->getTarget().adjust(Ins->getDiagnostics(), Ins->getLangOpts()); Ins->createFileManager(); Ins->createSourceManager(Ins->getFileManager()); Ins->createPreprocessor(TU_Complete); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index d38e64f9c5542..af44face09ed1 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -658,7 +658,8 @@ ClangExpressionParser::ClangExpressionParser( // // FIXME: We shouldn't need to do this, the target should be immutable once // created. This complexity should be lifted elsewhere. - m_compiler->getTarget().adjust(m_compiler->getLangOpts()); + m_compiler->getTarget().adjust(m_compiler->getDiagnostics(), + m_compiler->getLangOpts()); // 6. Set up the diagnostic buffer for reporting errors diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp index c337ee9f79f6b..65f8a9dcdb004 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp @@ -704,7 +704,7 @@ ClangModulesDeclVendor::Create(Target &target) { if (!instance->hasTarget()) return nullptr; - instance->getTarget().adjust(instance->getLangOpts()); + instance->getTarget().adjust(*diagnostics_engine, instance->getLangOpts()); if (!action->BeginSourceFile(*instance, instance->getFrontendOpts().Inputs[0])) From 3999dcae5e763adceb2c3bc1dbc8f2c005c808ef Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 29 Jun 2021 10:31:58 -0700 Subject: [PATCH 210/619] [Inline] prevent inlining on noprofile mismatch Similar to commit bc044a88ee3c ("[Inline] prevent inlining on stack protector mismatch") The noprofile function attribute is meant to prevent compiler instrumentation from being inserted into a function. Inlining may defeat the developer's intent. If the caller and callee don't either BOTH have the attribute or BOTH lack the attribute, suppress inline substitution. This matches behavior being proposed in GCC: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573511.html https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80223 Add LangRef entry for noprofile fn attr, similar to text added in D93422 and D104944. Reviewed By: MaskRay, melver, phosek Differential Revision: https://reviews.llvm.org/D104810 --- llvm/docs/LangRef.rst | 4 ++ llvm/include/llvm/IR/Attributes.td | 1 + .../Transforms/Inline/inline_noprofile.ll | 44 +++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 llvm/test/Transforms/Inline/inline_noprofile.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 5bbcc1477c5d6..848ee2343b5e0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1656,6 +1656,10 @@ example: This attribute suppresses lazy symbol binding for the function. This may make calls to the function faster, at the cost of extra program startup time if the function is not called during program startup. +``noprofile`` + This function attribute prevents instrumentation based profiling, used for + coverage or profile based optimization, from being added to a function, + even when inlined. ``noredzone`` This attribute indicates that the code generator should not use a red zone, even if the target-specific ABI normally permits it. diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 177471c69fafe..b3ea4f0b873c7 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -297,6 +297,7 @@ def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; class MergeRule { // The name of the function called to merge the attributes of the caller and diff --git a/llvm/test/Transforms/Inline/inline_noprofile.ll b/llvm/test/Transforms/Inline/inline_noprofile.ll new file mode 100644 index 0000000000000..dd023b20126fe --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_noprofile.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=inline %s -S -pass-remarks-missed=inline 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-INLINE +; RUN: opt -passes=always-inline %s -S | FileCheck %s + +; Test that we don't inline when caller and callee don't have matching +; noprofile fn attrs. + +; CHECK-INLINE: profile not inlined into profile_caller because it should never be inlined (cost=never): conflicting attributes +; CHECK-INLINE: noprofile not inlined into noprofile_caller because it should never be inlined (cost=never): conflicting attributes + +define i32 @profile() { ret i32 42 } +define i32 @noprofile() noprofile { ret i32 43 } +define i32 @profile_aa() alwaysinline { ret i32 44 } +define i32 @noprofile_aa() noprofile alwaysinline { ret i32 45 } + +define i32 @profile_caller() noprofile { +; CHECK-LABEL: @profile_caller( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @profile() +; CHECK-NEXT: ret i32 44 +; + call i32 @profile() + %2 = call i32 @profile_aa() + ret i32 %2 +} + +define i32 @noprofile_caller() { +; CHECK-LABEL: @noprofile_caller( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @noprofile() +; CHECK-NEXT: ret i32 45 +; + call i32 @noprofile() + %2 = call i32 @noprofile_aa() + ret i32 %2 +} + +; Test that we do inline when caller and callee don't have matching +; noprofile fn attrs, when CallInst is alwaysinline. +define i32 @aa_callsite() { +; CHECK-INLINE-LABEL: @aa_callsite( +; CHECK-INLINE-NEXT: ret i32 43 +; + %1 = call i32 @noprofile() alwaysinline + ret i32 %1 +} From a562853a511b078912f3a9fccb4a27220ce75e9e Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Jun 2021 13:52:26 -0400 Subject: [PATCH 211/619] [libc++] NFC: Fix return-by-const-value and pass-by-const-value typos While we can debate on the value of passing by const value, there is no arguing that it's confusing to do so in some circumstances, such as when marking a pointer parameter as being const (did you mean a pointer-to-const?). This commit fixes a few issues along those lines. --- libcxx/include/vector | 2 +- libcxx/src/string.cpp | 2 +- .../associative/map/map.cons/copy_assign.pass.cpp | 8 ++++---- .../string.view.find/find_last_not_of_char_size.pass.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libcxx/include/vector b/libcxx/include/vector index 69babd04f7e6c..52ddd45ffa86b 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -144,7 +144,7 @@ public: public: reference(const reference&) noexcept; operator bool() const noexcept; - reference& operator=(const bool x) noexcept; + reference& operator=(bool x) noexcept; reference& operator=(const reference& x) noexcept; iterator operator&() const noexcept; void flip() noexcept; diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp index 63b81d67049d4..97a773f79a3be 100644 --- a/libcxx/src/string.cpp +++ b/libcxx/src/string.cpp @@ -423,7 +423,7 @@ get_swprintf() } template -S i_to_string(const V v) +S i_to_string(V v) { // numeric_limits::digits10 returns value less on 1 than desired for unsigned numbers. // For example, for 1-byte unsigned value digits10 is 2 (999 can not be represented), diff --git a/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp index 50b44446d117a..bcae4b462a3cb 100644 --- a/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp @@ -40,12 +40,12 @@ class counting_allocatorT { template bool operator==(const counting_allocatorT& other) const noexcept { return foo == other.foo; } template bool operator!=(const counting_allocatorT& other) const noexcept { return foo != other.foo; } - T * allocate(const size_t n) const { + T* allocate(size_t n) const { ca_allocs.push_back(foo); void * const pv = ::malloc(n * sizeof(T)); return static_cast(pv); } - void deallocate(T * const p, size_t) const noexcept { + void deallocate(T* p, size_t) const noexcept { ca_deallocs.push_back(foo); free(p); } @@ -63,12 +63,12 @@ class counting_allocatorF { template bool operator==(const counting_allocatorF& other) const noexcept { return foo == other.foo; } template bool operator!=(const counting_allocatorF& other) const noexcept { return foo != other.foo; } - T * allocate(const size_t n) const { + T* allocate(size_t n) const { ca_allocs.push_back(foo); void * const pv = ::malloc(n * sizeof(T)); return static_cast(pv); } - void deallocate(T * const p, size_t) const noexcept { + void deallocate(T* p, size_t) const noexcept { ca_deallocs.push_back(foo); free(p); } diff --git a/libcxx/test/std/strings/string.view/string.view.find/find_last_not_of_char_size.pass.cpp b/libcxx/test/std/strings/string.view/string.view.find/find_last_not_of_char_size.pass.cpp index d2cf8d5d94f9b..798c899e80f28 100644 --- a/libcxx/test/std/strings/string.view/string.view.find/find_last_not_of_char_size.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.find/find_last_not_of_char_size.pass.cpp @@ -8,7 +8,7 @@ // -// const size_type find_last_not_of(charT c, size_type pos = npos) const; +// size_type find_last_not_of(charT c, size_type pos = npos) const; #include #include From 71be4db05bbdcc8a9bbe01f54cf273b530327ec7 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Jun 2021 10:56:01 -0700 Subject: [PATCH 212/619] [lldb] Check for the mangled symbol name for objc_copyRealizedClassList_nolock When we check whether the Objective-C SPI is available, we need to check for the mangled symbol name. Unlike `objc_copyRealizedClassList`, which is C exported, the `nolock` variant is not. Differential revision: https://reviews.llvm.org/D105136 --- .../ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index a3a0827cfe65a..2ea7640ed737b 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -668,7 +668,7 @@ AppleObjCRuntimeV2::AppleObjCRuntimeV2(Process *process, static const ConstString g_gdb_object_getClass("gdb_object_getClass"); m_has_object_getClass = HasSymbol(g_gdb_object_getClass); static const ConstString g_objc_copyRealizedClassList( - "objc_copyRealizedClassList_nolock"); + "_ZL33objc_copyRealizedClassList_nolockPj"); m_has_objc_copyRealizedClassList = HasSymbol(g_objc_copyRealizedClassList); RegisterObjCExceptionRecognizer(process); From 913229983633cd4c19b9e5534018f9a42e274b30 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Jun 2021 10:38:47 -0700 Subject: [PATCH 213/619] [LegalizeTypes][VE] Don't Expand BITREVERSE/BSWAP during type legalization promotion if they will be promoted for NVT in op legalization. We were trying to expand these if they were going to be expanded in op legalization so that we generated the minimum number of operations. We failed to take into account that NVT could be promoted to another legal type in op legalization. Hoping this fixes the issue on the VE target reported as a follow up to D96681. The check line changes were taken from before 1e46b6f4012399a2fef5fbbb4ed06fc919835414 so this patch does appear to improve some cases that had previously regressed. --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 5 +++-- llvm/test/CodeGen/VE/Scalar/bitreverse.ll | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 997cc39d709ae..27bc2d8c05f7f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -465,7 +465,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { // If we expand later we'll end up with more operations since we lost the // original type. We only do this for scalars since we have a shuffle // based lowering for vectors in LegalizeVectorOps. - if (!OVT.isVector() && !TLI.isOperationLegalOrCustom(ISD::BSWAP, NVT)) { + if (!OVT.isVector() && + !TLI.isOperationLegalOrCustomOrPromote(ISD::BSWAP, NVT)) { if (SDValue Res = TLI.expandBSWAP(N, DAG)) return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res); } @@ -487,7 +488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { // original type. We only do this for scalars since we have a shuffle // based lowering for vectors in LegalizeVectorOps. if (!OVT.isVector() && OVT.isSimple() && - !TLI.isOperationLegalOrCustom(ISD::BITREVERSE, NVT)) { + !TLI.isOperationLegalOrCustomOrPromote(ISD::BITREVERSE, NVT)) { if (SDValue Res = TLI.expandBITREVERSE(N, DAG)) return DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Res); } diff --git a/llvm/test/CodeGen/VE/Scalar/bitreverse.ll b/llvm/test/CodeGen/VE/Scalar/bitreverse.ll index cceca6824adb4..208c207ff5139 100644 --- a/llvm/test/CodeGen/VE/Scalar/bitreverse.ll +++ b/llvm/test/CodeGen/VE/Scalar/bitreverse.ll @@ -49,9 +49,9 @@ define zeroext i32 @func32z(i32 zeroext %p) { define signext i16 @func16s(i16 signext %p) { ; CHECK-LABEL: func16s: ; CHECK: # %bb.0: -; CHECK-NEXT: bswp %s0, %s0, 1 -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: srl %s1, %s0, 12 +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: sra.l %s0, %s0, 48 +; CHECK-NEXT: b.l.t (, %s10) %r = tail call i16 @llvm.bitreverse.i16(i16 %p) ret i16 %r } @@ -59,9 +59,9 @@ define signext i16 @func16s(i16 signext %p) { define zeroext i16 @func16z(i16 zeroext %p) { ; CHECK-LABEL: func16z: ; CHECK: # %bb.0: -; CHECK-NEXT: bswp %s0, %s0, 1 -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: srl %s1, %s0, 12 +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 48 +; CHECK-NEXT: b.l.t (, %s10) %r = tail call i16 @llvm.bitreverse.i16(i16 %p) ret i16 %r } @@ -69,6 +69,9 @@ define zeroext i16 @func16z(i16 zeroext %p) { define signext i8 @func8s(i8 signext %p) { ; CHECK-LABEL: func8s: ; CHECK: # %bb.0: +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: sra.l %s0, %s0, 56 +; CHECK-NEXT: b.l.t (, %s10) %r = tail call i8 @llvm.bitreverse.i8(i8 %p) ret i8 %r } @@ -76,6 +79,9 @@ define signext i8 @func8s(i8 signext %p) { define zeroext i8 @func8z(i8 zeroext %p) { ; CHECK-LABEL: func8z: ; CHECK: # %bb.0: +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 56 +; CHECK-NEXT: b.l.t (, %s10) %r = tail call i8 @llvm.bitreverse.i8(i8 %p) ret i8 %r } From f8aaec19e674c44bfffd2b31611ad1eecc4698bd Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 25 Jun 2021 22:14:37 +0200 Subject: [PATCH 214/619] [OpaquePtr] Support forward references in textual IR Currently, LLParser will create a Function/GlobalVariable forward reference based on the desired pointer type and then modify it when it is declared. With opaque pointers, we generally do not know the correct type to use until we see the declaration. Solve this by creating the forward reference with a dummy type, and then performing a RAUW with the correct Function/GlobalVariable when it is declared. The approach is adopted from https://github.com/TNorthover/llvm-project/commit/b5b55963f62038319fa7a8b1b232226ba1d8ef3c. This results in a change to the use list order, which is why we see test changes on some module passes that are not stable under use list reordering. Differential Revision: https://reviews.llvm.org/D104950 --- llvm/lib/AsmParser/LLParser.cpp | 119 ++++++++++-------- llvm/lib/IR/AsmWriter.cpp | 6 +- .../WebAssembly/add-prototypes-conflict.ll | 6 +- llvm/test/Other/force-opaque-ptrs.ll | 7 ++ .../Attributor/IPConstantProp/PR16052.ll | 24 +++- llvm/test/Transforms/Attributor/misc.ll | 1 + .../function-specialization3.ll | 8 +- .../function-specialization4.ll | 4 +- .../LowerTypeTests/function-weak.ll | 8 +- .../LowerTypeTests/icall-branch-funnel.ll | 4 +- .../OpenMP/parallel_deletion_remarks.ll | 4 +- .../WholeProgramDevirt/branch-funnel.ll | 5 +- .../virtual-const-prop-begin.ll | 18 +-- .../virtual-const-prop-check.ll | 22 ++-- .../virtual-const-prop-end.ll | 18 +-- 15 files changed, 145 insertions(+), 109 deletions(-) diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index ccbc031736c3a..30057866fb3ed 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1001,10 +1001,12 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc, // See if the alias was forward referenced, if so, prepare to replace the // forward reference. if (!Name.empty()) { - GVal = M->getNamedValue(Name); - if (GVal) { - if (!ForwardRefVals.erase(Name)) - return error(NameLoc, "redefinition of global '@" + Name + "'"); + auto I = ForwardRefVals.find(Name); + if (I != ForwardRefVals.end()) { + GVal = I->second.first; + ForwardRefVals.erase(Name); + } else if (M->getNamedValue(Name)) { + return error(NameLoc, "redefinition of global '@" + Name + "'"); } } else { auto I = ForwardRefValIDs.find(NumberedVals.size()); @@ -1126,10 +1128,12 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc, // See if the global was forward referenced, if so, use the global. if (!Name.empty()) { - GVal = M->getNamedValue(Name); - if (GVal) { - if (!ForwardRefVals.erase(Name)) - return error(NameLoc, "redefinition of global '@" + Name + "'"); + auto I = ForwardRefVals.find(Name); + if (I != ForwardRefVals.end()) { + GVal = I->second.first; + ForwardRefVals.erase(I); + } else if (M->getNamedValue(Name)) { + return error(NameLoc, "redefinition of global '@" + Name + "'"); } } else { auto I = ForwardRefValIDs.find(NumberedVals.size()); @@ -1139,22 +1143,9 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc, } } - GlobalVariable *GV; - if (!GVal) { - GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, nullptr, - Name, nullptr, GlobalVariable::NotThreadLocal, - AddrSpace); - } else { - if (GVal->getValueType() != Ty) - return error( - TyLoc, - "forward reference and definition of global have different types"); - - GV = cast(GVal); - - // Move the forward-reference to the correct spot in the module. - M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV); - } + GlobalVariable *GV = new GlobalVariable( + *M, Ty, false, GlobalValue::ExternalLinkage, nullptr, Name, nullptr, + GlobalVariable::NotThreadLocal, AddrSpace); if (Name.empty()) NumberedVals.push_back(GV); @@ -1171,6 +1162,16 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc, GV->setThreadLocalMode(TLM); GV->setUnnamedAddr(UnnamedAddr); + if (GVal) { + if (!GVal->getType()->isOpaque() && GVal->getValueType() != Ty) + return error( + TyLoc, + "forward reference and definition of global have different types"); + + GVal->replaceAllUsesWith(GV); + GVal->eraseFromParent(); + } + // parse attributes on the global. while (Lex.getKind() == lltok::comma) { Lex.Lex(); @@ -1459,14 +1460,21 @@ bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, // GlobalValue Reference/Resolution Routines. //===----------------------------------------------------------------------===// -static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy, - const std::string &Name) { - if (auto *FT = dyn_cast(PTy->getElementType())) +static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy) { + // For opaque pointers, the used global type does not matter. We will later + // RAUW it with a global/function of the correct type. + if (PTy->isOpaque()) + return new GlobalVariable(*M, Type::getInt8Ty(M->getContext()), false, + GlobalValue::ExternalWeakLinkage, nullptr, "", + nullptr, GlobalVariable::NotThreadLocal, + PTy->getAddressSpace()); + + if (auto *FT = dyn_cast(PTy->getPointerElementType())) return Function::Create(FT, GlobalValue::ExternalWeakLinkage, - PTy->getAddressSpace(), Name, M); + PTy->getAddressSpace(), "", M); else - return new GlobalVariable(*M, PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, nullptr, Name, + return new GlobalVariable(*M, PTy->getPointerElementType(), false, + GlobalValue::ExternalWeakLinkage, nullptr, "", nullptr, GlobalVariable::NotThreadLocal, PTy->getAddressSpace()); } @@ -1518,7 +1526,7 @@ GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty, checkValidVariableType(Loc, "@" + Name, Ty, Val, IsCall)); // Otherwise, create a new forward reference for this value and remember it. - GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, Name); + GlobalValue *FwdVal = createGlobalFwdRef(M, PTy); ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); return FwdVal; } @@ -1547,7 +1555,7 @@ GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc, checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val, IsCall)); // Otherwise, create a new forward reference for this value and remember it. - GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, ""); + GlobalValue *FwdVal = createGlobalFwdRef(M, PTy); ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); return FwdVal; } @@ -5876,24 +5884,27 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) { PointerType *PFT = PointerType::get(FT, AddrSpace); Fn = nullptr; + GlobalValue *FwdFn = nullptr; if (!FunctionName.empty()) { // If this was a definition of a forward reference, remove the definition // from the forward reference table and fill in the forward ref. auto FRVI = ForwardRefVals.find(FunctionName); if (FRVI != ForwardRefVals.end()) { - Fn = M->getFunction(FunctionName); - if (!Fn) - return error(FRVI->second.second, "invalid forward reference to " - "function as global value!"); - if (Fn->getType() != PFT) - return error(FRVI->second.second, - "invalid forward reference to " - "function '" + - FunctionName + - "' with wrong type: " - "expected '" + - getTypeString(PFT) + "' but was '" + - getTypeString(Fn->getType()) + "'"); + FwdFn = FRVI->second.first; + if (!FwdFn->getType()->isOpaque()) { + if (!FwdFn->getType()->getPointerElementType()->isFunctionTy()) + return error(FRVI->second.second, "invalid forward reference to " + "function as global value!"); + if (FwdFn->getType() != PFT) + return error(FRVI->second.second, + "invalid forward reference to " + "function '" + + FunctionName + + "' with wrong type: " + "expected '" + + getTypeString(PFT) + "' but was '" + + getTypeString(FwdFn->getType()) + "'"); + } ForwardRefVals.erase(FRVI); } else if ((Fn = M->getFunction(FunctionName))) { // Reject redefinitions. @@ -5908,23 +5919,20 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) { // types agree. auto I = ForwardRefValIDs.find(NumberedVals.size()); if (I != ForwardRefValIDs.end()) { - Fn = cast(I->second.first); - if (Fn->getType() != PFT) + FwdFn = cast(I->second.first); + if (!FwdFn->getType()->isOpaque() && FwdFn->getType() != PFT) return error(NameLoc, "type of definition and forward reference of '@" + Twine(NumberedVals.size()) + "' disagree: " "expected '" + getTypeString(PFT) + "' but was '" + - getTypeString(Fn->getType()) + "'"); + getTypeString(FwdFn->getType()) + "'"); ForwardRefValIDs.erase(I); } } - if (!Fn) - Fn = Function::Create(FT, GlobalValue::ExternalLinkage, AddrSpace, - FunctionName, M); - else // Move the forward-reference to the correct spot in the module. - M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn); + Fn = Function::Create(FT, GlobalValue::ExternalLinkage, AddrSpace, + FunctionName, M); assert(Fn->getAddressSpace() == AddrSpace && "Created function in wrong AS"); @@ -5962,6 +5970,11 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) { "redefinition of argument '%" + ArgList[i].Name + "'"); } + if (FwdFn) { + FwdFn->replaceAllUsesWith(Fn); + FwdFn->eraseFromParent(); + } + if (IsDefine) return false; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index b0ee6aa276b53..d62f8f40b571c 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -185,8 +185,10 @@ predictValueUseListOrder(const Value *V, unsigned ID, const OrderMap &OM) { // We may have lost some users. return {}; - bool GetsReversed = - !isa(V) && !isa(V) && !isa(V); + // When referencing a value before its declaration, a temporary value is + // created, which will later be RAUWed with the actual value. This reverses + // the use list. This happens for all values apart from basic blocks. + bool GetsReversed = !isa(V); if (auto *BA = dyn_cast(V)) ID = OM.lookup(BA->getBasicBlock()); llvm::sort(List, [&](const Entry &L, const Entry &R) { diff --git a/llvm/test/CodeGen/WebAssembly/add-prototypes-conflict.ll b/llvm/test/CodeGen/WebAssembly/add-prototypes-conflict.ll index 3b84797d8fdbb..914ac0c89cebe 100644 --- a/llvm/test/CodeGen/WebAssembly/add-prototypes-conflict.ll +++ b/llvm/test/CodeGen/WebAssembly/add-prototypes-conflict.ll @@ -7,15 +7,15 @@ target triple = "wasm32-unknown-unknown" ; WARNING: warning: prototype-less function used with conflicting signatures: foo ; CHECK-LABEL: @call_with_conflicting_prototypes -; CHECK: %call1 = call i64 bitcast (i64 (i32, i32)* @foo to i64 (i32)*)(i32 42) -; CHECK: %call2 = call i64 @foo(i32 42, i32 43) +; CHECK: %call1 = call i64 @foo(i32 42) +; CHECK: %call2 = call i64 bitcast (i64 (i32)* @foo to i64 (i32, i32)*)(i32 42, i32 43) define void @call_with_conflicting_prototypes() { %call1 = call i64 bitcast (i64 (...)* @foo to i64 (i32)*)(i32 42) %call2 = call i64 bitcast (i64 (...)* @foo to i64 (i32, i32)*)(i32 42, i32 43) ret void } -; CHECK: declare extern_weak i64 @foo(i32, i32) +; CHECK: declare extern_weak i64 @foo(i32) declare extern_weak i64 @foo(...) #1 ; CHECK-NOT: attributes {{.*}} = { {{.*}}"no-prototype"{{.*}} } diff --git a/llvm/test/Other/force-opaque-ptrs.ll b/llvm/test/Other/force-opaque-ptrs.ll index fa83bb56080e5..442ef0ac9eb0a 100644 --- a/llvm/test/Other/force-opaque-ptrs.ll +++ b/llvm/test/Other/force-opaque-ptrs.ll @@ -20,8 +20,15 @@ define void @f(i32* %p) { ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (ptr [[P:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = alloca i17, align 4 +; CHECK-NEXT: call void @fn.fwd(i32 0) +; CHECK-NEXT: store i32 0, ptr @g.fwd, align 4 ; CHECK-NEXT: ret void ; %a = alloca i17 + call void @fn.fwd(i32 0) + store i32 0, i32* @g.fwd ret void } + +@g.fwd = global i32 0 +declare void @fn.fwd(i32) diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll index 972c9fc51ad92..b8aa61b69212a 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll @@ -15,11 +15,23 @@ define i64 @fn2() { ; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 undef) #[[ATTR0]], !range [[RNG0:![0-9]+]] ; IS__TUNIT____-NEXT: ret i64 [[CALL2]] ; -; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2 -; IS__CGSCC____-SAME: () #[[ATTR0:[0-9]+]] { -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: ret i64 undef +; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn2 +; IS__CGSCC_OPM-SAME: () #[[ATTR0:[0-9]+]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[CONV:%.*]] = sext i32 undef to i64 +; IS__CGSCC_OPM-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] +; IS__CGSCC_OPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 undef) #[[ATTR1:[0-9]+]] +; IS__CGSCC_OPM-NEXT: ret i64 [[CALL2]] +; +; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn2 +; IS__CGSCC_NPM-SAME: () #[[ATTR0:[0-9]+]] { +; IS__CGSCC_NPM-NEXT: entry: +; IS__CGSCC_NPM-NEXT: [[CONV:%.*]] = sext i32 undef to i64 +; IS__CGSCC_NPM-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] +; IS__CGSCC_NPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 undef) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]] +; IS__CGSCC_NPM-NEXT: ret i64 [[CALL2]] ; entry: %conv = sext i32 undef to i64 @@ -40,7 +52,7 @@ define i64 @fn2b(i32 %arg) { ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2b -; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { +; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 ; IS__CGSCC____-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index c0a1125b0cd3b..858aabfe27286 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -26,6 +26,7 @@ define internal void @internal(void (i8*)* %fp) { ; IS__CGSCC____-SAME: (void (i8*)* noundef nonnull [[FP:%.*]]) { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 +; IS__CGSCC____-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* ; IS__CGSCC____-NEXT: call void @foo(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) #[[ATTR1:[0-9]+]] ; IS__CGSCC____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) ; IS__CGSCC____-NEXT: call void @callback1(void (i32*)* noundef nonnull @foo) diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll index 2e7e457398185..de9417331637d 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll @@ -14,8 +14,8 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define dso_local i32 @bar(i32 %x, i32 %y) { ; COMMON-LABEL: @bar -; FORCE: %call = call i32 @foo.2(i32 %x, i32* @A) -; FORCE: %call1 = call i32 @foo.1(i32 %y, i32* @B) +; FORCE: %call = call i32 @foo.1(i32 %x, i32* @A) +; FORCE: %call1 = call i32 @foo.2(i32 %y, i32* @B) ; DISABLED-NOT: %call1 = call i32 @foo.1( entry: %tobool = icmp ne i32 %x, 0 @@ -36,14 +36,14 @@ return: ; FORCE: define internal i32 @foo.1(i32 %x, i32* %b) { ; FORCE-NEXT: entry: -; FORCE-NEXT: %0 = load i32, i32* @B, align 4 +; FORCE-NEXT: %0 = load i32, i32* @A, align 4 ; FORCE-NEXT: %add = add nsw i32 %x, %0 ; FORCE-NEXT: ret i32 %add ; FORCE-NEXT: } ; FORCE: define internal i32 @foo.2(i32 %x, i32* %b) { ; FORCE-NEXT: entry: -; FORCE-NEXT: %0 = load i32, i32* @A, align 4 +; FORCE-NEXT: %0 = load i32, i32* @B, align 4 ; FORCE-NEXT: %add = add nsw i32 %x, %0 ; FORCE-NEXT: ret i32 %add ; FORCE-NEXT: } diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll index 783472d840ced..9adb928dff3ba 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -43,7 +43,7 @@ entry: ; CHECK: define internal i32 @foo.1(i32 %x, i32* %b, i32* %c) { ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = load i32, i32* @B, align 4 +; CHECK-NEXT: %0 = load i32, i32* @A, align 4 ; CHECK-NEXT: %add = add nsw i32 %x, %0 ; CHECK-NEXT: %1 = load i32, i32* %c, align 4 ; CHECK-NEXT: %add1 = add nsw i32 %add, %1 @@ -52,7 +52,7 @@ entry: ; CHECK: define internal i32 @foo.2(i32 %x, i32* %b, i32* %c) { ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = load i32, i32* @A, align 4 +; CHECK-NEXT: %0 = load i32, i32* @B, align 4 ; CHECK-NEXT: %add = add nsw i32 %x, %0 ; CHECK-NEXT: %1 = load i32, i32* %c, align 4 ; CHECK-NEXT: %add1 = add nsw i32 %add, %1 diff --git a/llvm/test/Transforms/LowerTypeTests/function-weak.ll b/llvm/test/Transforms/LowerTypeTests/function-weak.ll index 17f54be9df83e..5580d20be7b26 100644 --- a/llvm/test/Transforms/LowerTypeTests/function-weak.ll +++ b/llvm/test/Transforms/LowerTypeTests/function-weak.ll @@ -55,11 +55,11 @@ define i1 @foo(i8* %p) { ; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" { ; CHECK-NEXT: entry: -; CHECK-NEXT: store { void ()*, void ()*, i32 } { void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), i32 42 }, { void ()*, void ()*, i32 }* @s, align 8 -; CHECK-NEXT: store void ()* bitcast (i8* getelementptr (i8, i8* bitcast (void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null) to i8*), i64 42) to void ()*), void ()** @x4, align 8 -; CHECK-NEXT: store void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()** @x3, align 8 -; CHECK-NEXT: store void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()** @x2, align 8 ; CHECK-NEXT: store void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()** @x, align 8 +; CHECK-NEXT: store void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()** @x2, align 8 +; CHECK-NEXT: store void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()** @x3, align 8 +; CHECK-NEXT: store void ()* bitcast (i8* getelementptr (i8, i8* bitcast (void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null) to i8*), i64 42) to void ()*), void ()** @x4, align 8 +; CHECK-NEXT: store { void ()*, void ()*, i32 } { void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), i32 42 }, { void ()*, void ()*, i32 }* @s, align 8 ; CHECK-NEXT: ret void ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LowerTypeTests/icall-branch-funnel.ll b/llvm/test/Transforms/LowerTypeTests/icall-branch-funnel.ll index 6cd81f275babf..8f2659b6db97a 100644 --- a/llvm/test/Transforms/LowerTypeTests/icall-branch-funnel.ll +++ b/llvm/test/Transforms/LowerTypeTests/icall-branch-funnel.ll @@ -4,10 +4,10 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux" ; CHECK: @0 = private constant { i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 2 } -; CHECK: @f1 = alias void (), void ()* @.cfi.jumptable -; CHECK: @f2 = alias void (), bitcast ([8 x i8]* getelementptr inbounds ([2 x [8 x i8]], [2 x [8 x i8]]* bitcast (void ()* @.cfi.jumptable to [2 x [8 x i8]]*), i64 0, i64 1) to void ()*) ; CHECK: @g1 = alias i32, getelementptr inbounds ({ i32, [0 x i8], i32 }, { i32, [0 x i8], i32 }* @0, i32 0, i32 0) ; CHECK: @g2 = alias i32, getelementptr inbounds ({ i32, [0 x i8], i32 }, { i32, [0 x i8], i32 }* @0, i32 0, i32 2) +; CHECK: @f1 = alias void (), void ()* @.cfi.jumptable +; CHECK: @f2 = alias void (), bitcast ([8 x i8]* getelementptr inbounds ([2 x [8 x i8]], [2 x [8 x i8]]* bitcast (void ()* @.cfi.jumptable to [2 x [8 x i8]]*), i64 0, i64 1) to void ()*) @g1 = constant i32 1 @g2 = constant i32 2 diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll index ae81f12fcfed4..70854d44eb6ff 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll @@ -23,9 +23,9 @@ target triple = "x86_64-pc-linux-gnu" ; ; This will delete all but the first parallel region -; CHECK: remark: parallel_deletion_remarks.c:14:1: Parallel region in delete_parallel deleted -; CHECK: remark: parallel_deletion_remarks.c:12:1: Parallel region in delete_parallel deleted ; CHECK: remark: parallel_deletion_remarks.c:10:1: Parallel region in delete_parallel deleted +; CHECK: remark: parallel_deletion_remarks.c:12:1: Parallel region in delete_parallel deleted +; CHECK: remark: parallel_deletion_remarks.c:14:1: Parallel region in delete_parallel deleted define dso_local void @delete_parallel() local_unnamed_addr !dbg !15 { call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg !18 call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)), !dbg !19 diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll index bf7c8547f2710..1d147067e92bb 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -147,10 +147,11 @@ define i32 @fn3(i8* %obj) #0 { ret i32 %result } -; CHECK-LABEL: define internal void @branch_funnel(i8* -; CHECK: define hidden void @__typeid_typeid1_0_branch_funnel(i8* nest %0, ...) +; CHECK-LABEL: define hidden void @__typeid_typeid1_0_branch_funnel(i8* nest %0, ...) ; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(i8* %0, i8* bitcast ([1 x i8*]* {{(nonnull )?}}@vt1_1 to i8*), i32 (i8*, i32)* {{(nonnull )?}}@vf1_1, i8* bitcast ([1 x i8*]* {{(nonnull )?}}@vt1_2 to i8*), i32 (i8*, i32)* {{(nonnull )?}}@vf1_2, ...) +; CHECK: define internal void @branch_funnel(i8* + declare i1 @llvm.type.test(i8*, metadata) declare void @llvm.assume(i1) diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll index 22426dfc4497e..30fd7ebcd50c2 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll @@ -3,28 +3,28 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\01\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]] +; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\00\00\00\02", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]] @vt1 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*) ], section "vt1sec", !type !0 -; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\02\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]] +; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\00\00\00\01", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]] @vt2 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*) ], !type !0 -; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [5 x i8], [3 x i8*], [0 x i8] } { [5 x i8] c"\01\03\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [0 x i8] zeroinitializer }, align 1, !type [[T5:![0-9]+]] +; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [5 x i8], [3 x i8*], [0 x i8] } { [5 x i8] c"\03\00\00\00\02", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [0 x i8] zeroinitializer }, align 1, !type [[T5:![0-9]+]] @vt3 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*) ], align 1, !type !0 -; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [16 x i8], [3 x i8*], [0 x i8] } { [16 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\02\04\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [0 x i8] zeroinitializer }, align 16, !type [[T16:![0-9]+]] +; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [16 x i8], [3 x i8*], [0 x i8] } { [16 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\04\00\00\00\01", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [0 x i8] zeroinitializer }, align 16, !type [[T16:![0-9]+]] @vt4 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), @@ -79,9 +79,9 @@ define i1 @call1(i8* %obj) { %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 0 %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i1 (i8*)* - ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i32 -5 + ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i32 -1 ; CHECK: [[VTLOAD1:%[^ ]*]] = load i8, i8* [[VTGEP1]] - ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 2 + ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 1 ; CHECK: [[VTCMP1:%[^ ]*]] = icmp ne i8 [[VTAND1]], 0 %result = call i1 %fptr_casted(i8* %obj) ; CHECK: ret i1 [[VTCMP1]] @@ -100,9 +100,9 @@ define i1 @call2(i8* %obj) { %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 1 %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i1 (i8*)* - ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i32 -5 + ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i32 -1 ; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, i8* [[VTGEP2]] - ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 1 + ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 2 ; CHECK: [[VTCMP2:%[^ ]*]] = icmp ne i8 [[VTAND2]], 0 %result = call i1 %fptr_casted(i8* %obj) ; CHECK: ret i1 [[VTCMP2]] @@ -121,7 +121,7 @@ define i32 @call3(i8* %obj) { %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 2 %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i32 (i8*)* - ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i32 -4 + ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i32 -5 ; CHECK: [[VTBC3:%[^ ]*]] = bitcast i8* [[VTGEP3]] to i32* ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, i32* [[VTBC3]] %result = call i32 %fptr_casted(i8* %obj) diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll index de432260f40e9..cbdbfe44dc612 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll @@ -11,9 +11,9 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: remark: :0:0: virtual-const-prop: devirtualized a call to vf1i32 -; CHECK: remark: :0:0: virtual-const-prop-1-bit: devirtualized a call to vf1i1 ; CHECK: remark: :0:0: virtual-const-prop-1-bit: devirtualized a call to vf0i1 +; CHECK: remark: :0:0: virtual-const-prop-1-bit: devirtualized a call to vf1i1 +; CHECK: remark: :0:0: virtual-const-prop: devirtualized a call to vf1i32 ; CHECK: remark: :0:0: devirtualized vf0i1 ; CHECK: remark: :0:0: devirtualized vf1i1 ; CHECK: remark: :0:0: devirtualized vf1i32 @@ -31,28 +31,28 @@ target triple = "x86_64-unknown-linux-gnu" ; SKIP-ALL-NOT: devirtualized -; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\01\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]] +; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\00\00\00\02", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]] @vt1 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*) ], section "vt1sec", !type !0 -; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\02\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]] +; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\00\00\00\01", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]] @vt2 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*) ], !type !0 -; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\03\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]] +; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\03\00\00\00\02", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]] @vt3 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*) ], !type !0 -; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\04\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]] +; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\04\00\00\00\01", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]] @vt4 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), @@ -104,9 +104,9 @@ define i1 @call1(i8* %obj) { %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 0, metadata !"typeid") %fptr = extractvalue {i8*, i1} %pair, 0 %fptr_casted = bitcast i8* %fptr to i1 (i8*)* - ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i32 -5 + ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i32 -1 ; CHECK: [[VTLOAD1:%[^ ]*]] = load i8, i8* [[VTGEP1]] - ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 2 + ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 1 ; CHECK: [[VTCMP1:%[^ ]*]] = icmp ne i8 [[VTAND1]], 0 %result = call i1 %fptr_casted(i8* %obj) ; CHECK: [[AND1:%[^ ]*]] = and i1 [[VTCMP1]], true @@ -125,9 +125,9 @@ define i1 @call2(i8* %obj) { %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 8, metadata !"typeid") %fptr = extractvalue {i8*, i1} %pair, 0 %fptr_casted = bitcast i8* %fptr to i1 (i8*)* - ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i32 -5 + ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i32 -1 ; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, i8* [[VTGEP2]] - ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 1 + ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 2 ; CHECK: [[VTCMP2:%[^ ]*]] = icmp ne i8 [[VTAND2]], 0 %result = call i1 %fptr_casted(i8* %obj) ; CHECK: [[AND2:%[^ ]*]] = and i1 [[VTCMP2]], true @@ -146,7 +146,7 @@ define i32 @call3(i8* %obj) { %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 16, metadata !"typeid") %fptr = extractvalue {i8*, i1} %pair, 0 %fptr_casted = bitcast i8* %fptr to i32 (i8*)* - ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i32 -4 + ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i32 -5 ; CHECK: [[VTBC3:%[^ ]*]] = bitcast i8* [[VTGEP3]] to i32* ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, i32* [[VTBC3]] %result = call i32 %fptr_casted(i8* %obj) diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll index b3cb4de7240ea..9b66ef0cb2f5e 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [0 x i8], [4 x i8*], [5 x i8] } { [0 x i8] zeroinitializer, [4 x i8*] [i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [5 x i8] c"\01\00\00\00\01" }, !type [[T8:![0-9]+]] +; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [0 x i8], [4 x i8*], [5 x i8] } { [0 x i8] zeroinitializer, [4 x i8*] [i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [5 x i8] c"\02\01\00\00\00" }, !type [[T8:![0-9]+]] @vt1 = constant [4 x i8*] [ i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), @@ -11,14 +11,14 @@ i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*) ], !type !1 -; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [0 x i8], [3 x i8*], [5 x i8] } { [0 x i8] zeroinitializer, [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [5 x i8] c"\02\00\00\00\02" }, !type [[T0:![0-9]+]] +; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [0 x i8], [3 x i8*], [5 x i8] } { [0 x i8] zeroinitializer, [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [5 x i8] c"\01\02\00\00\00" }, !type [[T0:![0-9]+]] @vt2 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*) ], !type !0 -; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [0 x i8], [4 x i8*], [5 x i8] } { [0 x i8] zeroinitializer, [4 x i8*] [i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [5 x i8] c"\03\00\00\00\01" }, !type [[T8]] +; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [0 x i8], [4 x i8*], [5 x i8] } { [0 x i8] zeroinitializer, [4 x i8*] [i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [5 x i8] c"\02\03\00\00\00" }, !type [[T8]] @vt3 = constant [4 x i8*] [ i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), @@ -26,7 +26,7 @@ i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*) ], !type !1 -; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [0 x i8], [3 x i8*], [5 x i8] } { [0 x i8] zeroinitializer, [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [5 x i8] c"\04\00\00\00\02" }, !type [[T0]] +; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [0 x i8], [3 x i8*], [5 x i8] } { [0 x i8] zeroinitializer, [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [5 x i8] c"\01\04\00\00\00" }, !type [[T0]] @vt4 = constant [3 x i8*] [ i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), @@ -74,9 +74,9 @@ define i1 @call1(i8* %obj) { %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 0 %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i1 (i8*)* - ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i32 28 + ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i32 24 ; CHECK: [[VTLOAD1:%[^ ]*]] = load i8, i8* [[VTGEP1]] - ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 2 + ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 1 ; CHECK: [[VTCMP1:%[^ ]*]] = icmp ne i8 [[VTAND1]], 0 %result = call i1 %fptr_casted(i8* %obj) ; CHECK: ret i1 [[VTCMP1]] @@ -95,9 +95,9 @@ define i1 @call2(i8* %obj) { %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 1 %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i1 (i8*)* - ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i32 28 + ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i32 24 ; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, i8* [[VTGEP2]] - ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 1 + ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 2 ; CHECK: [[VTCMP2:%[^ ]*]] = icmp ne i8 [[VTAND2]], 0 %result = call i1 %fptr_casted(i8* %obj) ; CHECK: ret i1 [[VTCMP2]] @@ -116,7 +116,7 @@ define i32 @call3(i8* %obj) { %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 2 %fptr = load i8*, i8** %fptrptr %fptr_casted = bitcast i8* %fptr to i32 (i8*)* - ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i32 24 + ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i32 25 ; CHECK: [[VTBC3:%[^ ]*]] = bitcast i8* [[VTGEP3]] to i32* ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, i32* [[VTBC3]] %result = call i32 %fptr_casted(i8* %obj) From 7b639f50959d046dbfe6b8d4aa9e1071594ca361 Mon Sep 17 00:00:00 2001 From: Jacob Hegna Date: Tue, 29 Jun 2021 18:14:24 +0000 Subject: [PATCH 215/619] [NFC] clang-format on InlineCost.cpp and InlineAdvisor.h. --- llvm/include/llvm/Analysis/InlineAdvisor.h | 8 +--- llvm/lib/Analysis/InlineCost.cpp | 43 ++++++++++------------ 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 469560cd8c8d8..c27aaf0db8f2f 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -10,9 +10,9 @@ #define LLVM_ANALYSIS_INLINEADVISOR_H #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/PassManager.h" -#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include #include @@ -36,11 +36,7 @@ class OptimizationRemarkEmitter; /// requires the full C Tensorflow API library, and evaluates models /// dynamically. This mode also permits generating training logs, for offline /// training. -enum class InliningAdvisorMode : int { - Default, - Release, - Development -}; +enum class InliningAdvisorMode : int { Default, Release, Development }; class InlineAdvisor; /// Capture state between an inlining decision having had been made, and diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 69cf28049b38e..92b0fbd840860 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -410,19 +410,18 @@ class CallAnalyzer : public InstVisitor { bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer( - Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, - function_ref GetAssumptionCache, - function_ref GetBFI = nullptr, - ProfileSummaryInfo *PSI = nullptr, - OptimizationRemarkEmitter *ORE = nullptr) + CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, + function_ref GetAssumptionCache, + function_ref GetBFI = nullptr, + ProfileSummaryInfo *PSI = nullptr, + OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call), EnableLoadElimination(true) {} InlineResult analyze(); - Optional getSimplifiedValue(Instruction *I) { + Optional getSimplifiedValue(Instruction *I) { if (SimplifiedValues.find(I) != SimplifiedValues.end()) return SimplifiedValues[I]; return None; @@ -950,8 +949,8 @@ void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) { disableLoadElimination(); } -void InlineCostAnnotationWriter::emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { +void InlineCostAnnotationWriter::emitInstructionAnnot( + const Instruction *I, formatted_raw_ostream &OS) { // The cost of inlining of the given instruction is printed always. // The threshold delta is printed only when it is non-zero. It happens // when we decided to give a bonus at a particular instruction. @@ -1056,8 +1055,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // is needed to track stack usage during inlining. Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( - AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getKnownMinSize(), - AllocatedSize); + AllocSize->getLimitedValue(), + DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) HasDynamicAlloca = true; return false; @@ -1210,11 +1209,11 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { if (!DisableGEPConstOperand) if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - SmallVector Indices; - for (unsigned int Index = 1 ; Index < COps.size() ; ++Index) + SmallVector Indices; + for (unsigned int Index = 1; Index < COps.size(); ++Index) Indices.push_back(COps[Index]); - return ConstantExpr::getGetElementPtr(I.getSourceElementType(), COps[0], - Indices, I.isInBounds()); + return ConstantExpr::getGetElementPtr( + I.getSourceElementType(), COps[0], Indices, I.isInBounds()); })) return true; @@ -1949,9 +1948,9 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) { } // Select condition is a constant. - Value *SelectedV = CondC->isAllOnesValue() - ? TrueVal - : (CondC->isNullValue()) ? FalseVal : nullptr; + Value *SelectedV = CondC->isAllOnesValue() ? TrueVal + : (CondC->isNullValue()) ? FalseVal + : nullptr; if (!SelectedV) { // Condition is a vector constant that is not all 1s or all 0s. If all // operands are constants, ConstantExpr::getSelect() can handle the cases @@ -2416,9 +2415,7 @@ void InlineCostCallAnalyzer::print() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Dump stats about this call's analysis. -LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { - print(); -} +LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(); } #endif /// Test that there are no attribute conflicts between Caller and Callee @@ -2772,8 +2769,8 @@ PreservedAnalyses InlineCostAnnotationPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { PrintInstructionComments = true; - std::function GetAssumptionCache = [&]( - Function &F) -> AssumptionCache & { + std::function GetAssumptionCache = + [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; Module *M = F.getParent(); From 69937a8080bc4828f0b317cd54a03ed2704b661a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Jun 2021 11:23:30 -0700 Subject: [PATCH 216/619] [llvm-objcopy][MachO] Support ARM64_RELOC_ADDEND An ARM64_RELOC_ADDEND relocation reuses the symbol field for the addend value. We should pass through such relocations. Reviewed By: alexander-shaposhnikov Differential Revision: https://reviews.llvm.org/D104967 --- .../test/tools/llvm-objcopy/MachO/arm64-relocs.s | 16 ++++++++++++++++ llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 7 ++++++- llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp | 2 +- llvm/tools/llvm-objcopy/MachO/Object.h | 3 +++ 4 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-objcopy/MachO/arm64-relocs.s diff --git a/llvm/test/tools/llvm-objcopy/MachO/arm64-relocs.s b/llvm/test/tools/llvm-objcopy/MachO/arm64-relocs.s new file mode 100644 index 0000000000000..269926666eb37 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/arm64-relocs.s @@ -0,0 +1,16 @@ +# REQUIRES: aarch64-registered-target + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t +# RUN: llvm-objcopy %t %t.copy +# RUN: cmp %t %t.copy + +.text +.globl _foo, _bar +_foo: + ## ARM64_RELOC_ADDEND and ARM64_RELOC_BRANCH26 + bl _bar + 123 + +_bar: + ret + +.subsections_via_symbols diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index 050dd976f3f35..d1f87bde12403 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -94,6 +94,7 @@ Expected>> static extractSections( S.Content = StringRef(reinterpret_cast(Data->data()), Data->size()); + const uint32_t CPUType = MachOObj.getHeader().cputype; S.Relocations.reserve(S.NReloc); for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); @@ -102,6 +103,10 @@ Expected>> static extractSections( R.Symbol = nullptr; // We'll fill this field later. R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); R.Scattered = MachOObj.isRelocationScattered(R.Info); + unsigned Type = MachOObj.getAnyRelocationType(R.Info); + // TODO Support CPU_TYPE_ARM. + R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && + Type == MachO::ARM64_RELOC_ADDEND); R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); S.Relocations.push_back(R); } @@ -222,7 +227,7 @@ void MachOReader::setSymbolInRelocationInfo(Object &O) const { for (LoadCommand &LC : O.LoadCommands) for (std::unique_ptr

&Sec : LC.Sections) for (auto &Reloc : Sec->Relocations) - if (!Reloc.Scattered) { + if (!Reloc.Scattered && !Reloc.IsAddend) { const uint32_t SymbolNum = Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); if (Reloc.Extern) { diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp index 483703de352f8..24a9d28dfbd96 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -261,7 +261,7 @@ void MachOWriter::writeSections() { Sec->Content.size()); for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) { RelocationInfo RelocInfo = Sec->Relocations[Index]; - if (!RelocInfo.Scattered) { + if (!RelocInfo.Scattered && !RelocInfo.IsAddend) { const uint32_t SymbolNum = RelocInfo.Extern ? (*RelocInfo.Symbol)->Index : (*RelocInfo.Sec)->Index; diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h index 0bb4b344b2eb0..978bd80e97b39 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -180,6 +180,9 @@ struct RelocationInfo { Optional Sec; // True if Info is a scattered_relocation_info. bool Scattered; + // True if the type is an ADDEND. r_symbolnum holds the addend instead of a + // symbol index. + bool IsAddend; // True if the r_symbolnum points to a section number (i.e. r_extern=0). bool Extern; MachO::any_relocation_info Info; From c4de78e91c9341b5b1abf927da15e0956a484b79 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 29 Jun 2021 20:29:10 +0200 Subject: [PATCH 217/619] [SanitizerCoverage] Fix global type check with opaque pointers The code was previously relying on the fact that an incorrectly typed global would result in the insertion of a BitCast constant expression. With opaque pointers, this is no longer the case, so we should check the type explicitly. --- llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp | 2 +- .../SanitizerCoverage/stack-depth-variable-declared-by-user.ll | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 52670dad777e0..d8720c37305b6 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -469,7 +469,7 @@ bool ModuleSanitizerCoverage::instrumentModule( Constant *SanCovLowestStackConstant = M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy); SanCovLowestStack = dyn_cast(SanCovLowestStackConstant); - if (!SanCovLowestStack) { + if (!SanCovLowestStack || SanCovLowestStack->getValueType() != IntptrTy) { C->emitError(StringRef("'") + SanCovLowestStackName + "' should not be declared by the user"); return true; diff --git a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll index f44c3e0c458f5..e3d225fd1db68 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll @@ -4,6 +4,8 @@ ; RUN: -sanitizer-coverage-stack-depth -S 2>&1 -enable-new-pm=0 | FileCheck %s ; RUN: not opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 \ ; RUN: -sanitizer-coverage-stack-depth -S 2>&1 | FileCheck %s +; RUN: not opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 \ +; RUN: -sanitizer-coverage-stack-depth -force-opaque-pointers -S 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" From a9854045f6b657ea1658d3518ac88b5eb45e2eeb Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Jun 2021 11:50:31 -0700 Subject: [PATCH 218/619] [test] Change -t to --syms and -s to -S for llvm-readobj RUN lines -s and -t will be changed to improve consistency with llvm-readelf. The inconsistency issue regularly contributes to confusion using the two tools. --- llvm/test/CodeGen/AMDGPU/amdpal-elf.ll | 4 ++-- llvm/test/CodeGen/AMDGPU/hsa.ll | 4 ++-- llvm/test/CodeGen/AMDGPU/lds-relocs.ll | 2 +- llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll | 2 +- llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll | 2 +- llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll | 2 +- llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir | 2 +- llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll | 2 +- llvm/test/MC/AMDGPU/elf-lds.s | 2 +- llvm/test/MC/AMDGPU/hsa-gfx10.s | 2 +- llvm/test/MC/ELF/section-relro.ll | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll index 961ada4da6d1f..3b4ecad617cb3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -filetype=obj | llvm-readobj -symbols -s -sd - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -filetype=obj | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 %s ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 %s diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll index 862586a356ccd..26c2f035e1545 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 | FileCheck --check-prefix=HSA %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo --amdhsa-code-object-version=2 -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s diff --git a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll index a4a8e078cb587..ae062bd06a0f3 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -filetype=obj < %s | llvm-readobj -r -t - | FileCheck -check-prefixes=ELF %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -filetype=obj < %s | llvm-readobj -r --syms - | FileCheck -check-prefixes=ELF %s @lds.external = external unnamed_addr addrspace(3) global [0 x i32] @lds.defined = unnamed_addr addrspace(3) global [8 x i32] undef, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll index 63c21d6fe7284..4b753ac53cf16 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll @@ -1,7 +1,7 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ ; RUN: -xcoff-traceback-table=false --code-model=large -filetype=obj -o %t.o < %s ; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s -; RUN: llvm-readobj -t %t.o | FileCheck --check-prefix=SYM %s +; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s ; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s @GInit = global double 1.000000e+00, align 8 diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll index 8b0d5363b730b..97fed400922cf 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll @@ -1,7 +1,7 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ ; RUN: -xcoff-traceback-table=false -data-sections=false -filetype=obj -o %t.o < %s ; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s -; RUN: llvm-readobj -t %t.o | FileCheck --check-prefix=SYM %s +; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s ; RUN: llvm-objdump -D -r --symbol-description %t.o | FileCheck --check-prefix=DIS %s @const_ivar = constant i32 6, align 4 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll index 54b915d3ccfb5..3ec5bfcb1d8b8 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll @@ -4,7 +4,7 @@ ; RUN: FileCheck --check-prefixes=CHECK,ASM64 %s ; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc-ibm-aix-xcoff -data-sections=false -filetype=obj -o %t.o < %s -; RUN: llvm-readobj -r --expand-relocs -t %t.o | FileCheck --check-prefixes=RELOC,SYM %s +; RUN: llvm-readobj -r --expand-relocs --syms %t.o | FileCheck --check-prefixes=RELOC,SYM %s ; RUN: not --crash llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff \ ; RUN: -data-sections=false -filetype=obj < %s 2>&1 | \ diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir index 73ade8c4139e2..c8ee10f7ac247 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir @@ -1,6 +1,6 @@ # RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc-ibm-aix-xcoff -x mir -verify-machineinstrs \ # RUN: -xcoff-traceback-table=false -start-after=lazy-machine-block-freq -filetype=obj -o %t.o < %s -# RUN: llvm-readobj --relocs --expand-relocs -t %t.o | FileCheck --check-prefixes=RELOC,SYM %s +# RUN: llvm-readobj --relocs --expand-relocs --syms %t.o | FileCheck --check-prefixes=RELOC,SYM %s # RUN: llvm-objdump -D %t.o | FileCheck --check-prefix=DIS %s --- diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll index 72752576c90ff..6ce251bb49fd8 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll @@ -3,7 +3,7 @@ ; RUN: llvm-readobj --section-headers --file-header %t.o | \ ; RUN: FileCheck --check-prefix=OBJ %s ; RUN: llvm-readobj --relocs --expand-relocs %t.o | FileCheck --check-prefix=RELOC %s -; RUN: llvm-readobj -t %t.o | FileCheck --check-prefix=SYM %s +; RUN: llvm-readobj --syms %t.o | FileCheck --check-prefix=SYM %s ; RUN: llvm-objdump -D %t.o | FileCheck --check-prefix=DIS %s ; RUN: llvm-objdump -r %t.o | FileCheck --check-prefix=DIS_REL %s diff --git a/llvm/test/MC/AMDGPU/elf-lds.s b/llvm/test/MC/AMDGPU/elf-lds.s index deb6ba0aad0c7..b2b4ad6120f1b 100644 --- a/llvm/test/MC/AMDGPU/elf-lds.s +++ b/llvm/test/MC/AMDGPU/elf-lds.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu gfx900 %s -o - | llvm-readobj -t -r - | FileCheck %s +// RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu gfx900 %s -o - | llvm-readobj -r --syms - | FileCheck %s .text .globl test_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10.s b/llvm/test/MC/AMDGPU/hsa-gfx10.s index 938e48e6341e4..a7bd3863083e2 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx10.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx10.s @@ -1,5 +1,5 @@ // RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck %s --check-prefix=ASM -// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF +// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=2 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF // ELF: Section { // ELF: Name: .text diff --git a/llvm/test/MC/ELF/section-relro.ll b/llvm/test/MC/ELF/section-relro.ll index ebc8b18e1007d..d4bacda4b4747 100644 --- a/llvm/test/MC/ELF/section-relro.ll +++ b/llvm/test/MC/ELF/section-relro.ll @@ -1,6 +1,6 @@ ; Tests that data and relro are correctly placed in sections ; specified by "#pragma clang section" -; RUN: llc -filetype=obj -mtriple x86_64-unknown-linux %s -o - | llvm-readobj -S -t - | FileCheck %s +; RUN: llc -filetype=obj -mtriple x86_64-unknown-linux %s -o - | llvm-readobj -S --syms - | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux" From c4a00ed85192b31b1fe9c5c3e94f96f826937825 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Wed, 23 Jun 2021 16:18:44 -0700 Subject: [PATCH 219/619] [NFC][compiler-rt][hwasan] Move GetCurrentThread to hwasan.cpp We can reuse the same implementation for getting the current thread on fuchsia. Differential Revision: https://reviews.llvm.org/D104824 --- compiler-rt/lib/hwasan/hwasan.cpp | 8 ++++++++ compiler-rt/lib/hwasan/hwasan_linux.cpp | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp index 0401783281015..0cae96f35d35b 100644 --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -228,6 +228,14 @@ void HwasanTagMismatch(uptr addr, uptr access_info, uptr *registers_frame, __builtin_unreachable(); } +Thread *GetCurrentThread() { + uptr *ThreadLongPtr = GetCurrentThreadLongPtr(); + if (UNLIKELY(*ThreadLongPtr == 0)) + return nullptr; + auto *R = (StackAllocationsRingBuffer *)ThreadLongPtr; + return hwasanThreadList().GetThreadByBufferAddress((uptr)R->Next()); +} + } // namespace __hwasan using namespace __hwasan; diff --git a/compiler-rt/lib/hwasan/hwasan_linux.cpp b/compiler-rt/lib/hwasan/hwasan_linux.cpp index 12bea5ca08444..02672030886ca 100644 --- a/compiler-rt/lib/hwasan/hwasan_linux.cpp +++ b/compiler-rt/lib/hwasan/hwasan_linux.cpp @@ -338,14 +338,6 @@ void AndroidTestTlsSlot() { void AndroidTestTlsSlot() {} #endif -Thread *GetCurrentThread() { - uptr *ThreadLongPtr = GetCurrentThreadLongPtr(); - if (UNLIKELY(*ThreadLongPtr == 0)) - return nullptr; - auto *R = (StackAllocationsRingBuffer *)ThreadLongPtr; - return hwasanThreadList().GetThreadByBufferAddress((uptr)R->Next()); -} - static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) { // Access type is passed in a platform dependent way (see below) and encoded // as 0xXY, where X&1 is 1 for store, 0 for load, and X&2 is 1 if the error is From d4dcb55c7050fd908af2378fa551078d859d994f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Jun 2021 11:56:26 -0700 Subject: [PATCH 220/619] [llvm-readobj] Make -s and -t match llvm-readelf llvm-readobj is an internal testing tool for binary formats. Its output and command line options do not need to be stable. It isn't supposed to be part of a build process. llvm-readelf was created as a user-facing utility and its interface intends to be compatible with GNU readelf (unless there are good reasons not to). The two tools have mostly compatible options. -s and -t are noticeable exceptions due to history. I think the cost of keeping the inconsistency overweighs the little history-compatible benefit and hinders transition from cl::opt to OptTable, so let's change it. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D105055 --- llvm/docs/CommandGuide/llvm-readobj.rst | 4 +-- llvm/docs/ReleaseNotes.rst | 5 ++++ llvm/test/tools/llvm-readobj/ELF/merged.test | 2 +- .../test/tools/llvm-readobj/ELF/sections.test | 4 --- llvm/test/tools/llvm-readobj/ELF/symbols.test | 10 +++---- llvm/test/tools/llvm-readobj/basic.test | 6 ++--- .../yaml2obj/ELF/duplicate-symbol-names.yaml | 2 +- llvm/tools/llvm-readobj/llvm-readobj.cpp | 27 +++++-------------- 8 files changed, 21 insertions(+), 39 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-readobj.rst b/llvm/docs/CommandGuide/llvm-readobj.rst index 201d1df803c04..84f0aa4235149 100644 --- a/llvm/docs/CommandGuide/llvm-readobj.rst +++ b/llvm/docs/CommandGuide/llvm-readobj.rst @@ -95,7 +95,7 @@ file formats. Display the relocation entries in the file. -.. option:: --sections, --section-headers, -s, -S +.. option:: --sections, --section-headers, -S Display all sections. @@ -123,7 +123,7 @@ file formats. Display the specified section(s) as a list of strings. ``section`` may be a section index or section name. -.. option:: --symbols, --syms, -t +.. option:: --symbols, --syms, -s Display the symbol table. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index ad3905080338b..3a18d36670108 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -164,6 +164,11 @@ Changes to the LLVM tools ``--x86-asm-syntax`` is a deprecated internal option which will be removed in LLVM 14.0.0. (`D101695 `_) +* The llvm-readobj short aliases ``-s`` (previously ``--sections``) and ``-t`` + (previously ``--syms``) have been changed to ``--syms`` and + ``--section-details`` respectively, to match llvm-readelf. + (`D105055 `_) + Changes to LLDB --------------------------------- diff --git a/llvm/test/tools/llvm-readobj/ELF/merged.test b/llvm/test/tools/llvm-readobj/ELF/merged.test index 5b464ebbdce7c..c3df02691f610 100644 --- a/llvm/test/tools/llvm-readobj/ELF/merged.test +++ b/llvm/test/tools/llvm-readobj/ELF/merged.test @@ -52,7 +52,7 @@ DynamicSymbols: ## llvm-readobj does not support merged args, because it also supports some old ## flags (-st, -sd, etc.), and it would be confusing if only some merged args ## were supported. -# RUN: not llvm-readobj -aeWhSrnudlVgIs %t.o 2>&1 | FileCheck %s --check-prefix=UNKNOWN +# RUN: not llvm-readobj -aeWhSsrnudlVgIS %t.o 2>&1 | FileCheck %s --check-prefix=UNKNOWN # CHECK-NOT: Unknown command line argument # UNKNOWN: for the --section-headers option: may only occur zero or one times! diff --git a/llvm/test/tools/llvm-readobj/ELF/sections.test b/llvm/test/tools/llvm-readobj/ELF/sections.test index efd268bcd8e97..73b0cd21617b7 100644 --- a/llvm/test/tools/llvm-readobj/ELF/sections.test +++ b/llvm/test/tools/llvm-readobj/ELF/sections.test @@ -10,16 +10,12 @@ # RUN: llvm-readobj --sections %t64 > %t64.llvm.sections # RUN: llvm-readobj -S %t64 > %t64.llvm.upper.s # RUN: cmp %t64.llvm.sections %t64.llvm.upper.s -# RUN: llvm-readobj -s %t64 > %t64.llvm.lower.s -# RUN: cmp %t64.llvm.sections %t64.llvm.lower.s # RUN: llvm-readobj --section-headers %t64 > %t64.llvm.section-headers # RUN: cmp %t64.llvm.sections %t64.llvm.section-headers # RUN: llvm-readobj --sections %t32 > %t32.llvm.sections # RUN: llvm-readobj -S %t32 > %t32.llvm.upper.s # RUN: cmp %t32.llvm.sections %t32.llvm.upper.s -# RUN: llvm-readobj -s %t32 > %t32.llvm.lower.s -# RUN: cmp %t32.llvm.sections %t32.llvm.lower.s # RUN: llvm-readobj --section-headers %t32 > %t32.llvm.section-headers # RUN: cmp %t32.llvm.sections %t32.llvm.section-headers diff --git a/llvm/test/tools/llvm-readobj/ELF/symbols.test b/llvm/test/tools/llvm-readobj/ELF/symbols.test index bd126b2f03337..fd4c07c5cceb7 100644 --- a/llvm/test/tools/llvm-readobj/ELF/symbols.test +++ b/llvm/test/tools/llvm-readobj/ELF/symbols.test @@ -70,17 +70,13 @@ # RUN: llvm-readobj --symbols %t64 > %t.symbols # RUN: llvm-readobj --syms %t64 > %t.syms # RUN: cmp %t.symbols %t.syms -# RUN: llvm-readobj -t %t64 > %t.t -# RUN: cmp %t.symbols %t.t -# RUN: llvm-readelf -s --elf-output-style=LLVM %t64 > %t.lowers -# RUN: cmp %t.symbols %t.lowers +# RUN: llvm-readobj -s %t64 | diff %t.symbols - +# RUN: llvm-readelf -s --elf-output-style=LLVM %t64 | diff %t.symbols - # RUN: llvm-readelf --symbols %t64 > %t.symbols.gnu # RUN: llvm-readelf --syms %t64 > %t.syms.gnu # RUN: cmp %t.symbols.gnu %t.syms.gnu - -## -s is an llvm-readobj option to dump sections. -# RUN: llvm-readobj -s --elf-output-style=GNU %t64 | FileCheck %s --implicit-check-not="Symbol table" +# RUN: llvm-readelf -s %t64 | diff %t.symbols.gnu - ## Case 3: Test that both regular and dynamic symbols are dumped when `--symbols` and `--dyn-symbols` ## are specified together. Note that the order is different for different styles. diff --git a/llvm/test/tools/llvm-readobj/basic.test b/llvm/test/tools/llvm-readobj/basic.test index 08c83ad91000f..a30ac4eaedef2 100644 --- a/llvm/test/tools/llvm-readobj/basic.test +++ b/llvm/test/tools/llvm-readobj/basic.test @@ -54,8 +54,6 @@ HELP: OVERVIEW: LLVM Object Reader OBJ: llvm-readobj{{.*}} [options] ELF: llvm-readelf{{.*}} [options] HELP: OPTIONS: -OBJ: -s - Alias for --section-headers -OBJ: -t - Alias for --symbols -ELF: -s - Alias for --symbols -ELF: -t - Alias for --section-details +HELP -s - Alias for --symbols +HELP -t - Alias for --section-details HELP: @FILE diff --git a/llvm/test/tools/yaml2obj/ELF/duplicate-symbol-names.yaml b/llvm/test/tools/yaml2obj/ELF/duplicate-symbol-names.yaml index 75a25f34ac32e..d4883d642fb95 100644 --- a/llvm/test/tools/yaml2obj/ELF/duplicate-symbol-names.yaml +++ b/llvm/test/tools/yaml2obj/ELF/duplicate-symbol-names.yaml @@ -2,7 +2,7 @@ ## containing symbols with duplicate names (but different name suffixes). # RUN: yaml2obj --docnum=1 %s -o %t1 -# RUN: llvm-readobj -t %t1 | FileCheck %s --check-prefix=CASE1 +# RUN: llvm-readobj --syms %t1 | FileCheck %s --check-prefix=CASE1 # CASE1: Name: localfoo (1) # CASE1: Name: localfoo (1) diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index 86f9a0a855663..8d07688dd07fd 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -142,6 +142,10 @@ namespace opts { // Also -t in llvm-readelf mode. cl::opt SectionDetails("section-details", cl::desc("Display the section details")); + static cl::alias SectionDetailsShort("t", + cl::desc("Alias for --section-details"), + cl::aliasopt(SectionDetails), + cl::NotHidden); // --symbols // Also -s in llvm-readelf mode, or -t in llvm-readobj mode. @@ -151,6 +155,9 @@ namespace opts { "symbol table when using GNU output style for ELF")); cl::alias SymbolsGNU("syms", cl::desc("Alias for --symbols"), cl::aliasopt(Symbols)); + static cl::alias SymbolsShort("s", cl::desc("Alias for --symbols"), + cl::aliasopt(Symbols), cl::NotHidden, + cl::Grouping); // --dyn-symbols, --dyn-syms // Also --dt in llvm-readobj mode. @@ -694,16 +701,6 @@ static void dumpInput(StringRef File, ScopedPrinter &Writer) { /// Registers aliases that should only be allowed by readobj. static void registerReadobjAliases() { - // -s has meant --sections for a very long time in llvm-readobj despite - // meaning --symbols in readelf. - static cl::alias SectionsShort("s", cl::desc("Alias for --section-headers"), - cl::aliasopt(opts::SectionHeaders), - cl::NotHidden); - - // llvm-readelf reserves it for --section-details. - static cl::alias SymbolsShort("t", cl::desc("Alias for --symbols"), - cl::aliasopt(opts::Symbols), cl::NotHidden); - // The following two-letter aliases are only provided for readobj, as readelf // allows single-letter args to be grouped together. static cl::alias SectionRelocationsShort( @@ -721,16 +718,6 @@ static void registerReadobjAliases() { /// Registers aliases that should only be allowed by readelf. static void registerReadelfAliases() { - // -s is here because for readobj it means --sections. - static cl::alias SymbolsShort("s", cl::desc("Alias for --symbols"), - cl::aliasopt(opts::Symbols), cl::NotHidden, - cl::Grouping); - - // -t is here because for readobj it is an alias for --symbols. - static cl::alias SectionDetailsShort( - "t", cl::desc("Alias for --section-details"), - cl::aliasopt(opts::SectionDetails), cl::NotHidden); - // Allow all single letter flags to be grouped together. for (auto &OptEntry : cl::getRegisteredOptions()) { StringRef ArgName = OptEntry.getKey(); From 8e74668e96da5c38c7fabb1881a6510d6a09112d Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Wed, 23 Jun 2021 16:12:52 -0700 Subject: [PATCH 221/619] [NFC][compiler-rt][hwasan] Re-use ring buffer size calculation Users can call HwasanThreadList::GetRingBufferSize rather than RingBufferSize to prevent having to do the calculation in RingBufferSize. This will be useful for Fuchsia where we plan to initialize the stack ring buffer separately from the rest of thread initialization. Differential Revision: https://reviews.llvm.org/D104823 --- compiler-rt/lib/hwasan/hwasan_thread_list.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/lib/hwasan/hwasan_thread_list.h b/compiler-rt/lib/hwasan/hwasan_thread_list.h index 8e6c8adf1e598..15916a802d6ee 100644 --- a/compiler-rt/lib/hwasan/hwasan_thread_list.h +++ b/compiler-rt/lib/hwasan/hwasan_thread_list.h @@ -171,6 +171,8 @@ class HwasanThreadList { return stats_; } + uptr GetRingBufferSize() const { return ring_buffer_size_; } + private: Thread *AllocThread() { SpinMutexLock l(&free_space_mutex_); From 90dfd059198ed94334f9b1ccfd29b566feb75e8b Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Tue, 29 Jun 2021 14:01:48 -0500 Subject: [PATCH 222/619] [Clang] Add option to handle behaviour of vector bool/vector pixel. Added the option `-altivec-src-compat=[mixed,gcc,xl]`. The default at this time is `mixed`. The default behavior for clang is for all vector compares to return a scalar unless the vectors being compared are vector bool or vector pixel. In that case the compare returns a vector. With the gcc case all vector compares return vectors and in the xl case all vector compares return scalars. This patch does not change the default behavior of clang. This option will be used in future patches to implement behaviour compatibility for the vector bool/pixel types. Reviewed By: bmahjour Differential Revision: https://reviews.llvm.org/D103615 --- .../clang/Basic/DiagnosticSemaKinds.td | 6 + clang/include/clang/Basic/LangOptions.def | 2 + clang/include/clang/Basic/LangOptions.h | 12 ++ clang/include/clang/Driver/Options.td | 12 ++ clang/lib/Driver/ToolChains/Clang.cpp | 1 + clang/lib/Sema/SemaExpr.cpp | 29 ++- .../vector-compat-pixel-bool-ternary.c | 98 ++++++++++ clang/test/CodeGen/vector-compat-pixel-bool.c | 88 +++++++++ clang/test/CodeGen/vector-compat-ternary.c | 170 ++++++++++++++++++ clang/test/CodeGen/vector-compat.c | 152 ++++++++++++++++ 10 files changed, 565 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGen/vector-compat-pixel-bool-ternary.c create mode 100644 clang/test/CodeGen/vector-compat-pixel-bool.c create mode 100644 clang/test/CodeGen/vector-compat-ternary.c create mode 100644 clang/test/CodeGen/vector-compat.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index b5b8bc6aa3c57..70a22fd2506a3 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7441,6 +7441,12 @@ def warn_deprecated_volatile_structured_binding : Warning< "volatile qualifier in structured binding declaration is deprecated">, InGroup; +def warn_deprecated_altivec_src_compat : Warning< + "Current handling of vector bool and vector pixel types in this context are " + "deprecated. The default behaviour will soon change to that implied by the " + "'-altivec-compat=xl' option">, + InGroup>; + def err_catch_incomplete_ptr : Error< "cannot catch pointer to incomplete type %0">; def err_catch_incomplete_ref : Error< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index b6d9160f89a00..465bad8d7d112 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -126,6 +126,8 @@ LANGOPT(WritableStrings , 1, 0, "writable string support") LANGOPT(ConstStrings , 1, 0, "const-qualified string support") ENUM_LANGOPT(LaxVectorConversions, LaxVectorConversionKind, 2, LaxVectorConversionKind::All, "lax vector conversions") +ENUM_LANGOPT(AltivecSrcCompat, AltivecSrcCompatKind, 2, + AltivecSrcCompatKind::Default, "Altivec source compatibility") LANGOPT(ConvergentFunctions, 1, 1, "Assume convergent functions") LANGOPT(AltiVec , 1, 0, "AltiVec-style vector initializers") LANGOPT(ZVector , 1, 0, "System z vector extensions") diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index d618daf3d23c2..d04ce52a550ef 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -244,6 +244,18 @@ class LangOptions : public LangOptionsBase { All, }; + enum class AltivecSrcCompatKind { + // All vector compares produce scalars except vector pixel and vector bool. + // The types vector pixel and vector bool return vector results. + Mixed, + // All vector compares produce vector results as in GCC. + GCC, + // All vector compares produce scalars as in XL. + XL, + // Default clang behaviour. + Default = Mixed, + }; + enum class SignReturnAddressScopeKind { /// No signing for any function. None, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index af004e0c28b54..301f59207c4a5 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3823,6 +3823,18 @@ def u : JoinedOrSeparate<["-"], "u">, Group; def v : Flag<["-"], "v">, Flags<[CC1Option, CoreOption]>, HelpText<"Show commands to run and use verbose output">, MarshallingInfoFlag>; +def altivec_src_compat : Joined<["-"], "faltivec-src-compat=">, + Flags<[CC1Option]>, Group, + HelpText<"Source-level compatibility for Altivec vectors (for PowerPC " + "targets). This includes results of vector comparison (scalar for " + "'xl', vector for 'gcc') as well as behavior when initializing with " + "a scalar (splatting for 'xl', element zero only for 'gcc'). For " + "'mixed', the compatibility is as 'gcc' for 'vector bool/vector " + "pixel' and as 'xl' for other types. Current default is 'mixed'.">, + Values<"mixed,gcc,xl">, + NormalizedValuesScope<"LangOptions::AltivecSrcCompatKind">, + NormalizedValues<["Mixed", "GCC", "XL"]>, + MarshallingInfoEnum, "Mixed">; def verify_debug_info : Flag<["--"], "verify-debug-info">, Flags<[NoXarchOption]>, HelpText<"Verify the binary representation of debug output">; def weak_l : Joined<["-"], "weak-l">, Flags<[LinkerInput]>; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a3f0ec577379e..c265e1c4e53cb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5816,6 +5816,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType))) CmdArgs.push_back("-fapple-kext"); + Args.AddLastArg(CmdArgs, options::OPT_altivec_src_compat); Args.AddLastArg(CmdArgs, options::OPT_flax_vector_conversions_EQ); Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 728d7b61d4a86..6031dff673351 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -12224,11 +12224,30 @@ QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS, QualType LHSType = LHS.get()->getType(); - // If AltiVec, the comparison results in a numeric type, i.e. - // bool for C++, int for C - if (getLangOpts().AltiVec && - vType->castAs()->getVectorKind() == VectorType::AltiVecVector) - return Context.getLogicalOperationType(); + // Determine the return type of a vector compare. By default clang will return + // a scalar for all vector compares except vector bool and vector pixel. + // With the gcc compiler we will always return a vector type and with the xl + // compiler we will always return a scalar type. This switch allows choosing + // which behavior is prefered. + if (getLangOpts().AltiVec) { + switch (getLangOpts().getAltivecSrcCompat()) { + case LangOptions::AltivecSrcCompatKind::Mixed: + // If AltiVec, the comparison results in a numeric type, i.e. + // bool for C++, int for C + if (vType->castAs()->getVectorKind() == + VectorType::AltiVecVector) + return Context.getLogicalOperationType(); + else + Diag(Loc, diag::warn_deprecated_altivec_src_compat); + break; + case LangOptions::AltivecSrcCompatKind::GCC: + // For GCC we always return the vector type. + break; + case LangOptions::AltivecSrcCompatKind::XL: + return Context.getLogicalOperationType(); + break; + } + } // For non-floating point types, check for self-comparisons of the form // x == x, x != x, x < x, etc. These always evaluate to a constant, and diff --git a/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c b/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c new file mode 100644 index 0000000000000..9b383f4cb20bf --- /dev/null +++ b/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c @@ -0,0 +1,98 @@ +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1| FileCheck %s --check-prefix=ERROR +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @bi8( +// CHECK: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required +int bi8(vector bool char a, vector bool char b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @bi16( +// CHECK: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required +int bi16(vector bool short a, vector bool short b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @bi32( +// CHECK: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required +int bi32(vector bool int a, vector bool int b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @bi64( +// CHECK: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required +int bi64(vector bool long long a, vector bool long long b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @VecPixel( +// CHECK: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required +int VecPixel(vector pixel a, vector pixel b) { + return a == b ? 3 : 7; +} diff --git a/clang/test/CodeGen/vector-compat-pixel-bool.c b/clang/test/CodeGen/vector-compat-pixel-bool.c new file mode 100644 index 0000000000000..d8179ad96ee8d --- /dev/null +++ b/clang/test/CodeGen/vector-compat-pixel-bool.c @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @bi8( +// CHECK: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> +// CHECK-NEXT: ret <16 x i8> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector unsigned char bi8(vector bool char a, vector bool char b) { + return a == b; +} + +// CHECK-LABEL: @bi16( +// CHECK: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector bool short bi16(vector bool short a, vector bool short b) { + return a == b; +} + +// CHECK-LABEL: @bi32( +// CHECK: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector bool int bi32(vector bool int a, vector bool int b) { + return a == b; +} + +// CHECK-LABEL: @bi64( +// CHECK: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector long long bi64(vector bool long long a, vector bool long long b) { + return a == b; +} + +// CHECK-LABEL: @VecPixel( +// CHECK: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector pixel VecPixel(vector pixel a, vector pixel b) { + return a == b; +} diff --git a/clang/test/CodeGen/vector-compat-ternary.c b/clang/test/CodeGen/vector-compat-ternary.c new file mode 100644 index 0000000000000..5b57980a7c2d5 --- /dev/null +++ b/clang/test/CodeGen/vector-compat-ternary.c @@ -0,0 +1,170 @@ +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=xl --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @ui8( +// CHECK: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required +int ui8(vector unsigned char a, vector unsigned char b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @si8( +// CHECK: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequb.p(i32 2, <16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(16 * sizeof(char)))) char' (vector of 16 'char' values) where arithmetic or pointer type is required +int si8(vector signed char a, vector signed char b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @ui16( +// CHECK: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required +int ui16(vector unsigned short a, vector unsigned short b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @si16( +// CHECK: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(8 * sizeof(short)))) short' (vector of 8 'short' values) where arithmetic or pointer type is required +int si16(vector signed short a, vector signed short b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @ui32( +// CHECK: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required +int ui32(vector unsigned int a, vector unsigned int b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @si32( +// CHECK: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequw.p(i32 2, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required +int si32(vector signed int a, vector signed int b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @si64( +// CHECK: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required +int si64(vector long long a, vector long long b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @f32( +// CHECK: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[A:%.*]], <4 x float>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x float> [[B:%.*]], <4 x float>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.altivec.vcmpeqfp.p(i32 2, <4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(4 * sizeof(long)))) long' (vector of 4 'long' values) where arithmetic or pointer type is required +int f32(vector float a, vector float b) { + return a == b ? 3 : 7; +} + +// CHECK-LABEL: @f64( +// CHECK: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: store <2 x double> [[A:%.*]], <2 x double>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x double> [[B:%.*]], <2 x double>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ppc.vsx.xvcmpeqdp.p(i32 2, <2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 3, i32 7 +// CHECK-NEXT: ret i32 [[COND]] +// +// ERROR: error: used type '__attribute__((__vector_size__(2 * sizeof(long long)))) long long' (vector of 2 'long long' values) where arithmetic or pointer type is required +int f64(vector double a, vector double b) { + return a == b ? 3 : 7; +} diff --git a/clang/test/CodeGen/vector-compat.c b/clang/test/CodeGen/vector-compat.c new file mode 100644 index 0000000000000..023f1e8f36999 --- /dev/null +++ b/clang/test/CodeGen/vector-compat.c @@ -0,0 +1,152 @@ +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=mixed -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=gcc -triple powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: not %clang_cc1 -target-feature +altivec -target-feature +vsx \ +// RUN: -faltivec-src-compat=xl -triple powerpc-unknown-unknown -S -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR +// RUN: %clang -mcpu=pwr8 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -mcpu=pwr9 -faltivec-src-compat=gcc --target=powerpc-unknown-unknown -S -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: @ui8( +// CHECK: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> +// CHECK-NEXT: ret <16 x i8> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector unsigned char ui8(vector unsigned char a, vector unsigned char b) { + return a == b; +} + +// CHECK-LABEL: @si8( +// CHECK: [[A_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-NEXT: store <16 x i8> [[A:%.*]], <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <16 x i8> [[B:%.*]], <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, <16 x i8>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i8> +// CHECK-NEXT: ret <16 x i8> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector signed char si8(vector signed char a, vector signed char b) { + return a == b; +} + +// CHECK-LABEL: @ui16( +// CHECK: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector unsigned short ui16(vector unsigned short a, vector unsigned short b) { + return a == b; +} + +// CHECK-LABEL: @si16( +// CHECK: [[A_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: store <8 x i16> [[A:%.*]], <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x i16> [[B:%.*]], <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, <8 x i16>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector signed short si16(vector signed short a, vector signed short b) { + return a == b; +} + +// CHECK-LABEL: @ui32( +// CHECK: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector unsigned int ui32(vector unsigned int a, vector unsigned int b) { + return a == b; +} + +// CHECK-LABEL: @si32( +// CHECK: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-NEXT: store <4 x i32> [[A:%.*]], <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x i32> [[B:%.*]], <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector signed int si32(vector signed int a, vector signed int b) { + return a == b; +} + +// CHECK-LABEL: @si64( +// CHECK: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i64>, align 16 +// CHECK-NEXT: store <2 x i64> [[A:%.*]], <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x i64> [[B:%.*]], <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector long long si64(vector long long a, vector long long b) { + return a == b; +} + +// CHECK-LABEL: @f32( +// CHECK: [[A_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[A:%.*]], <4 x float>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <4 x float> [[B:%.*]], <4 x float>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector int f32(vector float a, vector float b) { + return a == b; +} + +// CHECK-LABEL: @f64( +// CHECK: [[A_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16 +// CHECK-NEXT: store <2 x double> [[A:%.*]], <2 x double>* [[A_ADDR]], align 16 +// CHECK-NEXT: store <2 x double> [[B:%.*]], <2 x double>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[B_ADDR]], align 16 +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[SEXT]] +// +// ERROR: returning 'int' from a function with incompatible result type +vector long long f64(vector double a, vector double b) { + return a == b; +} From a77524cd2c20f4d0aa4ddd164f12f7af64b5bfc8 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 29 Jun 2021 15:39:14 +0000 Subject: [PATCH 223/619] [mlir][Linalg] Add a ComprehensiveModuleBufferizePass and support for CallOp analysis(9/n) This revision adds the minimal plumbing to create a simple ComprehensiveModuleBufferizePass that can behave conservatively in the presence of CallOps. A topological sort of caller/callee is performed and, if the call-graph is cycle-free, analysis can proceed. Differential revision: https://reviews.llvm.org/D104859 --- mlir/include/mlir/Dialect/Linalg/Passes.h | 8 + mlir/include/mlir/Dialect/Linalg/Passes.td | 21 +- mlir/include/mlir/IR/FunctionSupport.h | 4 + .../Transforms/ComprehensiveBufferize.cpp | 242 +++++++++++++++--- ...mprehensive-module-bufferize-analysis.mlir | 84 ++++++ ...omprehensive-module-bufferize-invalid.mlir | 15 ++ 6 files changed, 336 insertions(+), 38 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir create mode 100644 mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index b3433e5c58a53..d80eb9a0652de 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -62,6 +62,14 @@ std::unique_ptr> createConvertLinalgToAffineLoopsPass(); /// b) whose buffer uses would be free of memory hazards. std::unique_ptr createLinalgComprehensiveFuncBufferizePass(); +/// This pass implements a cross-dialect bufferization approach and performs an +/// analysis to determine which op operands and results may be bufferized in the +/// same buffers. The analysis is performed on topologically sorted CallOp and +/// FuncOp within a module. It provides analyses and bufferization across +/// function boundaries. Within a single function body, the bufferization used +/// is that provided by `LinalgComprehensiveFuncBufferizePass`. +std::unique_ptr createLinalgComprehensiveModuleBufferizePass(); + /// Create a pass to convert Linalg operations which work on tensors to use /// buffers instead. std::unique_ptr> createLinalgBufferizePass(); diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index d36d655638a2d..3d9833061a090 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -32,7 +32,7 @@ def LinalgComprehensiveFuncBufferize : This pass implements a cross-dialect bufferization approach and performs an analysis to determine which op operands and results may be bufferized in the same buffers. The analysis is performed on SSA use-def chains starting from - function operands that are annotated with the 'inplaceable' attribute + function operands that are annotated with the 'inplaceable' attribute. }]; let options = [ Option<"testAnalysisOnly", "test-analysis-only", "bool", @@ -42,6 +42,25 @@ def LinalgComprehensiveFuncBufferize : let constructor = "mlir::createLinalgComprehensiveFuncBufferizePass()"; } +def LinalgComprehensiveModuleBufferize : + Pass<"linalg-comprehensive-module-bufferize", "ModuleOp"> { + let summary = "Bufferize (tensor into memref) for a Module."; + let description = [{ + This pass implements a cross-dialect bufferization approach and performs an + analysis to determine which op operands and results may be bufferized in the + same buffers. The analysis is performed on topologically sorted CallOp and + FuncOp within a module. It provides analyses and bufferization across + function boundaries. Within a single function body, the bufferization used + is that provided by `-linalg-comprehensive-func-bufferize`. + }]; + let options = [ + Option<"testAnalysisOnly", "test-analysis-only", "bool", + /*default=*/"false", + "Only runs inplaceability analysis (for testing purposes only)"> + ]; + let constructor = "mlir::createLinalgComprehensiveModuleBufferizePass()"; +} + def LinalgFoldUnitExtentDims : FunctionPass<"linalg-fold-unit-extent-dims"> { let summary = "Remove unit-extent dimension in Linalg ops on tensors"; let constructor = "mlir::createLinalgFoldUnitExtentDimsPass()"; diff --git a/mlir/include/mlir/IR/FunctionSupport.h b/mlir/include/mlir/IR/FunctionSupport.h index c081fb24bf268..f27c857a175a6 100644 --- a/mlir/include/mlir/IR/FunctionSupport.h +++ b/mlir/include/mlir/IR/FunctionSupport.h @@ -375,6 +375,10 @@ class FunctionLike : public OpTrait::TraitBase { /// attribute that was erased, or nullptr if there was no attribute with such /// name. Attribute removeArgAttr(unsigned index, Identifier name); + Attribute removeArgAttr(unsigned index, StringRef name) { + return removeArgAttr( + index, Identifier::get(name, this->getOperation()->getContext())); + } //===--------------------------------------------------------------------===// // Result Attributes diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index 996e90662a454..dd49ae43d8fe6 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -16,7 +16,7 @@ // Composability with extensible set of ops is not a first-class concern. // // Bufferization occurs by: -// a. performing an inPlace analysis `inPlaceAnalysisFuncOpInternals` +// a. performing an inPlace analysis `inPlaceAnalysisFuncOpBody` // which marks each operation within the function with the // `kInPlaceResultsAttrName` attribute. // b. traversing each operation in the function and rewriting it in @@ -132,6 +132,19 @@ using namespace tensor; #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") #define LDBG(X) LLVM_DEBUG(DBGS() << X) +//===----------------------------------------------------------------------===// +// Generic helpers. +//===----------------------------------------------------------------------===// + +/// Return the FuncOp called by `callOp`. +static FuncOp getCalledFunction(CallOpInterface callOp) { + SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast(); + if (!sym) + return nullptr; + return dyn_cast_or_null( + SymbolTable::lookupNearestSymbolFrom(callOp, sym)); +} + //===----------------------------------------------------------------------===// // Bufferization-specific BlockAndValueMapping support with debugging. //===----------------------------------------------------------------------===// @@ -167,6 +180,7 @@ static Value lookup(BlockAndValueMapping &bvm, Value key) { parentOp = key.getDefiningOp()->getParentOfType(); } LDBG("In func:\n" << *parentOp << "NO VALUE FOR KEY: " << key << '\n'); + (void)parentOp; return Value(); } @@ -276,6 +290,25 @@ static InPlaceSpec getInPlace(BlockArgument bbArg) { return InPlaceSpec::None; } +/// Set the attribute that triggers inplace bufferization on a FuncOp argument +/// `bbArg`. +static void +setInPlaceFuncArgument(BlockArgument bbArg, + InPlaceSpec inPlaceSpec = InPlaceSpec::True) { + auto funcOp = cast(bbArg.getOwner()->getParentOp()); + funcOp.setArgAttr( + bbArg.getArgNumber(), LinalgDialect::kInplaceableAttrName, + BoolAttr::get(bbArg.getContext(), inPlaceSpec == InPlaceSpec::True)); +} + +/// Remove the attribute that triggers inplace bufferization on a FuncOp +/// argument `bbArg`. +static void removeInPlaceFuncArgument(BlockArgument bbArg) { + auto funcOp = cast(bbArg.getOwner()->getParentOp()); + funcOp.removeArgAttr(bbArg.getArgNumber(), + LinalgDialect::kInplaceableAttrName); +} + LLVM_ATTRIBUTE_UNUSED static InPlaceSpec getInPlace(Value v) { if (auto bbArg = v.dyn_cast()) return getInPlace(bbArg); @@ -305,7 +338,8 @@ LLVM_ATTRIBUTE_UNUSED static InPlaceSpec getInPlace(Value v) { static bool hasKnownBufferizationAliasingBehavior(Operation *op) { return // clang-format off - isa(opOperand.getOwner())) return false; + // CallOpInterface alone doesn't bufferize to a memory read, one of the uses + // of the matching bbArg may. It is the responsibility of the caller to + // inspect bbArgs. In the absence of a BufferizationAliasInfo, we need to be + // conservative. + if (auto callOp = dyn_cast(opOperand.getOwner())) + return true; if (auto linalgOp = dyn_cast(opOperand.getOwner())) return linalgOp.isInputTensor(&opOperand) || linalgOp.isInitTensor(&opOperand); @@ -473,6 +517,19 @@ static bool bufferizesToMemoryRead(OpOperand &opOperand) { static bool bufferizesToMemoryWrite(OpOperand &opOperand, InPlaceSpec inPlaceSpec = InPlaceSpec::None) { + // These terminators are not writes. + if (isa(opOperand.getOwner())) + return false; + // ExtractSliceOp alone doesn't bufferize to a memory write, one of its uses + // may. + if (isa(opOperand.getOwner())) + return false; + // CallOpInterface alone doesn't bufferize to a memory write, one of the uses + // of the matching bbArg may. It is the responsibility of the caller to + // inspect bbArgs. In the absence of a BufferizationAliasInfo, we need to be + // conservative. + if (auto callOp = dyn_cast(opOperand.getOwner())) + return true; Optional maybeOpResult = getAliasingOpResult(opOperand); // Unknown op that returns a tensor. The inplace analysis does not support // it. Conservatively return true. @@ -482,13 +539,6 @@ bufferizesToMemoryWrite(OpOperand &opOperand, // This does not bufferize to a write. if (!*maybeOpResult) return false; - // These terminators are not writes. - if (isa(opOperand.getOwner())) - return false; - // ExtractSliceOp alone doesn't bufferize to a memory write, one of its uses - // may. - if (maybeOpResult->getDefiningOp()) - return false; // If we have a matching OpResult, this is a write. // Additionally allow to restrict to only inPlace write, if so specified. return inPlaceSpec == InPlaceSpec::None || @@ -521,7 +571,11 @@ class BufferizationAliasInfo { Equivalent }; - explicit BufferizationAliasInfo(FuncOp funcOp); + explicit BufferizationAliasInfo(Operation *rootOp); + + /// Add a new entry for `v` in the `aliasInfo` and `equivalentInfo`. In the + /// beginning the alias and equivalence sets only contain `v` itself. + void createAliasInfoEntry(Value v); /// Return true if the buffer to which `operand` would bufferize aliases a /// buffer that is known to not be writeable. This implies that the matching @@ -664,33 +718,28 @@ class BufferizationAliasInfo { }; } // namespace -BufferizationAliasInfo::BufferizationAliasInfo(FuncOp funcOp) { - funcOp.walk([&](Operation *op) { - for (Value v : op->getResults()) { - if (!v.getType().isa()) - continue; - assert(getInPlace(v) == InPlaceSpec::None && - "unexpected inplace in analysis."); - DenseSet selfSet; - selfSet.insert(v); - aliasInfo.try_emplace(v, selfSet); - equivalentInfo.insert(v); - } - for (Region &r : op->getRegions()) { - for (Block &b : r.getBlocks()) { - for (auto bbArg : b.getArguments()) { - if (!bbArg.getType().isa()) - continue; - DenseSet selfSet; - selfSet.insert(bbArg); - aliasInfo.try_emplace(bbArg, selfSet); - equivalentInfo.insert(bbArg); - } - } - } +BufferizationAliasInfo::BufferizationAliasInfo(Operation *rootOp) { + rootOp->walk([&](Operation *op) { + for (Value v : op->getResults()) + if (v.getType().isa()) + createAliasInfoEntry(v); + for (Region &r : op->getRegions()) + for (Block &b : r.getBlocks()) + for (auto bbArg : b.getArguments()) + if (bbArg.getType().isa()) + createAliasInfoEntry(bbArg); }); } +/// Add a new entry for `v` in the `aliasInfo` and `equivalentInfo`. In the +/// beginning the alias and equivalence sets only contain `v` itself. +void BufferizationAliasInfo::createAliasInfoEntry(Value v) { + DenseSet selfSet; + selfSet.insert(v); + aliasInfo.try_emplace(v, selfSet); + equivalentInfo.insert(v); +} + /// Return true if the buffer to which `operand` would bufferize aliases a /// buffer that is known to not be writeable. This implies that the matching /// OpResult cannot be bufferized inplace. @@ -1679,8 +1728,8 @@ bufferizationSanityCheck(scf::YieldOp yieldOp, /// Analyze the `funcOp` body to determine which OpResults are inplaceable. static LogicalResult -inPlaceAnalysisFuncOpInternals(FuncOp funcOp, BufferizationAliasInfo &aliasInfo, - const DominanceInfo &domInfo) { +inPlaceAnalysisFuncOpBody(FuncOp funcOp, BufferizationAliasInfo &aliasInfo, + const DominanceInfo &domInfo) { LLVM_DEBUG(llvm::dbgs() << "\n\n"); LDBG("Begin InPlaceAnalysisFuncOpInternals:\n" << funcOp << '\n'); assert(funcOp && funcOp->getNumRegions() > 0 && !funcOp.body().empty() && @@ -1816,7 +1865,7 @@ void LinalgComprehensiveFuncBufferize::runOnFunction() { BufferizationAliasInfo aliasInfo(funcOp); // If the analysis fails, just return. This is expected to reset the IR and no // single OpResult should be marked inPlace. - if (failed(inPlaceAnalysisFuncOpInternals(funcOp, aliasInfo, domInfo))) { + if (failed(inPlaceAnalysisFuncOpBody(funcOp, aliasInfo, domInfo))) { signalPassFailure(); return; } @@ -1836,3 +1885,122 @@ void LinalgComprehensiveFuncBufferize::runOnFunction() { std::unique_ptr mlir::createLinalgComprehensiveFuncBufferizePass() { return std::make_unique(); } + +//===----------------------------------------------------------------------===// +// Bufferization entry-point for modules. +//===----------------------------------------------------------------------===// + +/// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by +/// callee-caller order (i.e. callees without callers first). +/// Store the map of FuncOp to all its callers in `callerMap`. +/// Return `failure()` if a cycle of calls is detected or if we are unable to +/// retrieve the called FuncOp from any CallOpInterface. +static LogicalResult +getFuncOpsOrderedByCalls(ModuleOp moduleOp, + SmallVectorImpl &orderedFuncOps, + DenseMap> &callerMap) { + // For each FuncOp, the set of functions called by it (i.e. the union of + // symbols of all nested CallOpInterfaceOp). + DenseMap> calledBy; + // For each FuncOp, the number of CallOpInterface it contains. + DenseMap numberCallOpsContainedInFuncOp; + WalkResult res = moduleOp.walk([&](FuncOp funcOp) { + numberCallOpsContainedInFuncOp[funcOp] = 0; + return funcOp.walk([&](CallOpInterface callOp) { + FuncOp calledFunction = getCalledFunction(callOp); + if (!calledFunction) + return WalkResult::interrupt(); + auto it = callerMap.try_emplace(calledFunction, DenseSet{}); + it.first->getSecond().insert(callOp); + if (calledBy[calledFunction].count(funcOp) == 0) { + calledBy[calledFunction].insert(funcOp); + numberCallOpsContainedInFuncOp[funcOp]++; + } + return WalkResult::advance(); + }); + }); + if (res.wasInterrupted()) + return failure(); + // Iteratively remove function operation that do not call any of the + // functions remaining in the callCounter map and add them to the worklist. + while (!numberCallOpsContainedInFuncOp.empty()) { + auto it = llvm::find_if(numberCallOpsContainedInFuncOp, + [](auto entry) { return entry.getSecond() == 0; }); + if (it == numberCallOpsContainedInFuncOp.end()) + return moduleOp.emitOpError( + "expected callgraph to be free of circular dependencies."); + orderedFuncOps.push_back(it->getFirst()); + for (auto callee : calledBy[it->getFirst()]) + numberCallOpsContainedInFuncOp[callee]--; + numberCallOpsContainedInFuncOp.erase(it); + } + return success(); +} + +namespace { +struct LinalgComprehensiveModuleBufferize + : public LinalgComprehensiveModuleBufferizeBase< + LinalgComprehensiveModuleBufferize> { + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } +}; +} // end namespace + +void LinalgComprehensiveModuleBufferize::runOnOperation() { + ModuleOp moduleOp = getOperation(); + + SmallVector orderedFuncOps; + DenseMap> callerMap; + if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap))) + return signalPassFailure(); + + DominanceInfo domInfo(moduleOp); + BufferizationAliasInfo aliasInfo(moduleOp); + // Interestingly, all function args that are not visible outside of a module + // can be fully bufferized inplace by guaranteeing the CallOp is bufferized + // inplace. Therefore, we just bufferize funcOp as if none of its results were + // inplaceable, detect which operands are cloned internally and decide what to + // do at call sites. + for (FuncOp funcOp : orderedFuncOps) { + // No body => no analysis. + if (funcOp.body().empty()) + continue; + + // In a first approximation: + // ========================= + // If the function is called, we can allocate on the caller side which lets + // us force inplace arguments at function boundaries. + // TODO: do not rely on this behavior. + if (callerMap.find(funcOp) != callerMap.end()) + for (BlockArgument bbArg : funcOp.getArguments()) + if (bbArg.getType().isa()) + setInPlaceFuncArgument(bbArg); + + // If the analysis fails, just return. + if (failed(inPlaceAnalysisFuncOpBody(funcOp, aliasInfo, domInfo))) { + signalPassFailure(); + return; + } + + // TODO: Bufferization phase. + } + // Don't drop the attributes if we only want to report the analysis. + if (testAnalysisOnly) + return; + + // Post-pass cleanup of inplaceable attributes. + moduleOp.walk( + [&](Operation *op) { op->removeAttr(kInPlaceResultsAttrName); }); + moduleOp.walk([&](FuncOp op) { + for (BlockArgument bbArg : op.getArguments()) + removeInPlaceFuncArgument(bbArg); + }); +} + +std::unique_ptr mlir::createLinalgComprehensiveModuleBufferizePass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir new file mode 100644 index 0000000000000..108119467ea63 --- /dev/null +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir @@ -0,0 +1,84 @@ +// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=test-analysis-only -split-input-file | FileCheck %s + +func private @foo(tensor<64xf32>) + +// CHECK-LABEL: dependence_through_call +func @dependence_through_call(%I : tensor<64xf32> {linalg.inplaceable = true}) { + %f1 = constant 1.000000e+00 : f32 + %f2 = constant 2.000000e+00 : f32 + + // 2. %B already bufferizes inplace, %A would alias and have a different + // value. The calls to `foo` are determined to read conservatively, so %A + // cannot bufferize inplace. + // CHECK: fill + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32> + + // 1. Bufferizes inplace: no alias to %A is yet possible. + // CHECK: fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32> + + call @foo(%A) : (tensor<64xf32>) -> () + call @foo(%B) : (tensor<64xf32>) -> () + + return +} + +// ----- + +func private @foo(tensor<64xf32>) + +func private @bar(%A : tensor<64xf32>) { + call @foo(%A) : (tensor<64xf32>) -> () + return +} + +func @read_dependence_through_scf_and_call( + %I : tensor<64xf32> {linalg.inplaceable = true}, + %I2 : tensor<64xf32> {linalg.inplaceable = true}) { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c10 = constant 10 : index + %f1 = constant 1.000000e+00 : f32 + %f2 = constant 2.000000e+00 : f32 + + // 5. %B bufferizes inplace, %A would alias and have a different value. + // The calls to `foo` are determined to read conservatively, so %A cannot + // bufferize inplace. + // CHECK: fill + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32> + + // 4. Bufferizes inplace: no alias to %A is yet possible. + // CHECK: fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32> + + // 3. Does not read or write, bufferizes inplace. + // CHECK: scf.for + // CHECK: {__inplace_results_attr__ = ["true", "true"]} + %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B) + -> (tensor<64xf32>, tensor<64xf32>) + { + scf.yield %0, %1 : tensor<64xf32>, tensor<64xf32> + } + call @foo(%r#0) : (tensor<64xf32>) -> () + call @foo(%r#1) : (tensor<64xf32>) -> () + + // 2. %B2 already bufferizes inplace, %A2 would alias and have a different + // value. The calls to `foo` are determined to read conservatively, so %A2 + // cannot bufferize inplace. + // CHECK: fill + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %A2 = linalg.fill(%f1, %I2) : f32, tensor<64xf32> -> tensor<64xf32> + + // 1. Bufferizes inplace: no alias to %A2 is yet possible. + // CHECK: fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %B2 = linalg.fill(%f2, %I2) : f32, tensor<64xf32> -> tensor<64xf32> + + call @bar(%A2) : (tensor<64xf32>) -> () + call @bar(%B2) : (tensor<64xf32>) -> () + return +} diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir new file mode 100644 index 0000000000000..0e378a89ef58c --- /dev/null +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir @@ -0,0 +1,15 @@ +// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize -split-input-file -verify-diagnostics + +// ----- + +// expected-error @-3 {{expected callgraph to be free of circular dependencies}} + +func @foo() { + call @bar() : () -> () + return +} + +func @bar() { + call @foo() : () -> () + return +} From f57b2420b2235eca00d5c085a7ef084433140452 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 29 Jun 2021 12:12:15 -0700 Subject: [PATCH 224/619] [mlir:Async] Add an async reference counting pass based on the user defined policy Depends On D104999 Automatic reference counting based on the liveness analysis can add a lot of reference counting overhead at runtime. If the IR is known to be constrained to few particular "shapes", it's much more efficient to provide a custom reference counting policy that will specify where it is required to update the async value reference count. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D105037 --- mlir/include/mlir/Dialect/Async/Passes.h | 2 + mlir/include/mlir/Dialect/Async/Passes.td | 32 +++ .../Transforms/AsyncRuntimeRefCounting.cpp | 216 ++++++++++++++---- ...ync-runtime-policy-based-ref-counting.mlir | 47 ++++ .../Async/CPU/test-async-parallel-for-1d.mlir | 12 + .../Async/CPU/test-async-parallel-for-2d.mlir | 12 + 6 files changed, 274 insertions(+), 47 deletions(-) create mode 100644 mlir/test/Dialect/Async/async-runtime-policy-based-ref-counting.mlir diff --git a/mlir/include/mlir/Dialect/Async/Passes.h b/mlir/include/mlir/Dialect/Async/Passes.h index 5d0a7f66cf774..ce85ffa296472 100644 --- a/mlir/include/mlir/Dialect/Async/Passes.h +++ b/mlir/include/mlir/Dialect/Async/Passes.h @@ -29,6 +29,8 @@ std::unique_ptr createAsyncRuntimeRefCountingPass(); std::unique_ptr createAsyncRuntimeRefCountingOptPass(); +std::unique_ptr createAsyncRuntimePolicyBasedRefCountingPass(); + //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Async/Passes.td b/mlir/include/mlir/Dialect/Async/Passes.td index e321747d4ec66..913ecee43097c 100644 --- a/mlir/include/mlir/Dialect/Async/Passes.td +++ b/mlir/include/mlir/Dialect/Async/Passes.td @@ -66,4 +66,36 @@ def AsyncRuntimeRefCountingOpt : Pass<"async-runtime-ref-counting-opt"> { let dependentDialects = ["async::AsyncDialect"]; } +def AsyncRuntimePolicyBasedRefCounting + : Pass<"async-runtime-policy-based-ref-counting"> { + let summary = "Policy based reference counting for Async runtime operations"; + let description = [{ + This pass works at the async runtime abtraction level, after all + `async.execute` and `async.await` operations are lowered to the async + runtime API calls, and async coroutine operations. + + This pass doesn't rely on reference counted values liveness analysis, and + instead uses simple policy to create reference counting operations. If the + program violates any of the assumptions, then this pass might lead to + memory leaks or runtime errors. + + The default reference counting policy assumptions: + 1. Async token can be awaited or added to the group only once. + 2. Async value or group can be awaited only once. + + Under these assumptions reference counting only needs to drop reference: + 1. After `async.runtime.await` operation for async tokens and groups + (until error handling is not implemented for the sync await). + 2. After `async.runtime.is_error` operation for async tokens and groups + (this is the last operation in the coroutine resume function). + 3. After `async.runtime.load` operation for async values. + + This pass introduces significanly less runtime overhead compared to the + automatic reference counting. + }]; + + let constructor = "mlir::createAsyncRuntimePolicyBasedRefCountingPass()"; + let dependentDialects = ["async::AsyncDialect"]; +} + #endif // MLIR_DIALECT_ASYNC_PASSES diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp index 15fd4f3f87650..17e768cee74ba 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp @@ -26,6 +26,79 @@ using namespace mlir::async; #define DEBUG_TYPE "async-runtime-ref-counting" +//===----------------------------------------------------------------------===// +// Utility functions shared by reference counting passes. +//===----------------------------------------------------------------------===// + +// Drop the reference count immediately if the value has no uses. +static LogicalResult dropRefIfNoUses(Value value, unsigned count = 1) { + if (!value.getUses().empty()) + return failure(); + + OpBuilder b(value.getContext()); + + // Set insertion point after the operation producing a value, or at the + // beginning of the block if the value defined by the block argument. + if (Operation *op = value.getDefiningOp()) + b.setInsertionPointAfter(op); + else + b.setInsertionPointToStart(value.getParentBlock()); + + b.create(value.getLoc(), value, b.getI32IntegerAttr(1)); + return success(); +} + +// Calls `addRefCounting` for every reference counted value defined by the +// operation `op` (block arguments and values defined in nested regions). +static LogicalResult walkReferenceCountedValues( + Operation *op, llvm::function_ref addRefCounting) { + // Check that we do not have high level async operations in the IR because + // otherwise reference counting will produce incorrect results after high + // level async operations will be lowered to `async.runtime` + WalkResult checkNoAsyncWalk = op->walk([&](Operation *op) -> WalkResult { + if (!isa(op)) + return WalkResult::advance(); + + return op->emitError() + << "async operations must be lowered to async runtime operations"; + }); + + if (checkNoAsyncWalk.wasInterrupted()) + return failure(); + + // Add reference counting to block arguments. + WalkResult blockWalk = op->walk([&](Block *block) -> WalkResult { + for (BlockArgument arg : block->getArguments()) + if (isRefCounted(arg.getType())) + if (failed(addRefCounting(arg))) + return WalkResult::interrupt(); + + return WalkResult::advance(); + }); + + if (blockWalk.wasInterrupted()) + return failure(); + + // Add reference counting to operation results. + WalkResult opWalk = op->walk([&](Operation *op) -> WalkResult { + for (unsigned i = 0; i < op->getNumResults(); ++i) + if (isRefCounted(op->getResultTypes()[i])) + if (failed(addRefCounting(op->getResult(i)))) + return WalkResult::interrupt(); + + return WalkResult::advance(); + }); + + if (opWalk.wasInterrupted()) + return failure(); + + return success(); +} + +//===----------------------------------------------------------------------===// +// Automatic reference counting based on the liveness analysis. +//===----------------------------------------------------------------------===// + namespace { class AsyncRuntimeRefCountingPass @@ -356,21 +429,9 @@ AsyncRuntimeRefCountingPass::addDropRefInDivergentLivenessSuccessor( LogicalResult AsyncRuntimeRefCountingPass::addAutomaticRefCounting(Value value) { - OpBuilder builder(value.getContext()); - Location loc = value.getLoc(); - - // Set inserton point after the operation producing a value, or at the - // beginning of the block if the value defined by the block argument. - if (Operation *op = value.getDefiningOp()) - builder.setInsertionPointAfter(op); - else - builder.setInsertionPointToStart(value.getParentBlock()); - - // Drop the reference count immediately if the value has no uses. - if (value.getUses().empty()) { - builder.create(loc, value, builder.getI32IntegerAttr(1)); + // Short-circuit reference counting for values without uses. + if (succeeded(dropRefIfNoUses(value))) return success(); - } // Add `drop_ref` operations based on the liveness analysis. if (failed(addDropRefAfterLastUse(value))) @@ -388,53 +449,114 @@ AsyncRuntimeRefCountingPass::addAutomaticRefCounting(Value value) { } void AsyncRuntimeRefCountingPass::runOnOperation() { - Operation *op = getOperation(); + auto functor = [&](Value value) { return addAutomaticRefCounting(value); }; + if (failed(walkReferenceCountedValues(getOperation(), functor))) + signalPassFailure(); +} - // Check that we do not have high level async operations in the IR because - // otherwise automatic reference counting will produce incorrect results after - // execute operations will be lowered to `async.runtime` - WalkResult executeOpWalk = op->walk([&](Operation *op) -> WalkResult { - if (!isa(op)) - return WalkResult::advance(); +//===----------------------------------------------------------------------===// +// Reference counting based on the user defined policy. +//===----------------------------------------------------------------------===// - return op->emitError() - << "async operations must be lowered to async runtime operations"; - }); +namespace { - if (executeOpWalk.wasInterrupted()) { - signalPassFailure(); - return; - } +class AsyncRuntimePolicyBasedRefCountingPass + : public AsyncRuntimePolicyBasedRefCountingBase< + AsyncRuntimePolicyBasedRefCountingPass> { +public: + AsyncRuntimePolicyBasedRefCountingPass() { initializeDefaultPolicy(); } - // Add reference counting to block arguments. - WalkResult blockWalk = op->walk([&](Block *block) -> WalkResult { - for (BlockArgument arg : block->getArguments()) - if (isRefCounted(arg.getType())) - if (failed(addAutomaticRefCounting(arg))) - return WalkResult::interrupt(); + void runOnOperation() override; - return WalkResult::advance(); - }); +private: + // Adds a reference counting operations for all uses of the `value` according + // to the reference counting policy. + LogicalResult addRefCounting(Value value); - if (blockWalk.wasInterrupted()) { - signalPassFailure(); - return; + void initializeDefaultPolicy(); + + llvm::SmallVector(OpOperand &)>> policy; +}; + +} // namespace + +LogicalResult +AsyncRuntimePolicyBasedRefCountingPass::addRefCounting(Value value) { + // Short-circuit reference counting for values without uses. + if (succeeded(dropRefIfNoUses(value))) + return success(); + + OpBuilder b(value.getContext()); + + // Consult the user defined policy for every value use. + for (OpOperand &operand : value.getUses()) { + Location loc = operand.getOwner()->getLoc(); + + for (auto &func : policy) { + FailureOr refCount = func(operand); + if (failed(refCount)) + return failure(); + + int cnt = refCount.getValue(); + + // Create `add_ref` operation before the operand owner. + if (cnt > 0) { + b.setInsertionPoint(operand.getOwner()); + b.create(loc, value, b.getI32IntegerAttr(cnt)); + } + + // Create `drop_ref` operation after the operand owner. + if (cnt < 0) { + b.setInsertionPointAfter(operand.getOwner()); + b.create(loc, value, b.getI32IntegerAttr(-cnt)); + } + } } - // Add reference counting to operation results. - WalkResult opWalk = op->walk([&](Operation *op) -> WalkResult { - for (unsigned i = 0; i < op->getNumResults(); ++i) - if (isRefCounted(op->getResultTypes()[i])) - if (failed(addAutomaticRefCounting(op->getResult(i)))) - return WalkResult::interrupt(); + return success(); +} - return WalkResult::advance(); +void AsyncRuntimePolicyBasedRefCountingPass::initializeDefaultPolicy() { + policy.push_back([](OpOperand &operand) -> FailureOr { + Operation *op = operand.getOwner(); + Type type = operand.get().getType(); + + bool isToken = type.isa(); + bool isGroup = type.isa(); + bool isValue = type.isa(); + + // Drop reference after async token or group await (sync await) + if (auto await = dyn_cast(op)) + return (isToken || isGroup) ? -1 : 0; + + // Drop reference after async token or group error check (coro await). + if (auto await = dyn_cast(op)) + return (isToken || isGroup) ? -1 : 0; + + // Drop reference after async value load. + if (auto load = dyn_cast(op)) + return isValue ? -1 : 0; + + // Drop reference after async token added to the group. + if (auto add = dyn_cast(op)) + return isToken ? -1 : 0; + + return 0; }); +} - if (opWalk.wasInterrupted()) +void AsyncRuntimePolicyBasedRefCountingPass::runOnOperation() { + auto functor = [&](Value value) { return addRefCounting(value); }; + if (failed(walkReferenceCountedValues(getOperation(), functor))) signalPassFailure(); } +//----------------------------------------------------------------------------// + std::unique_ptr mlir::createAsyncRuntimeRefCountingPass() { return std::make_unique(); } + +std::unique_ptr mlir::createAsyncRuntimePolicyBasedRefCountingPass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/Async/async-runtime-policy-based-ref-counting.mlir b/mlir/test/Dialect/Async/async-runtime-policy-based-ref-counting.mlir new file mode 100644 index 0000000000000..54640f552798d --- /dev/null +++ b/mlir/test/Dialect/Async/async-runtime-policy-based-ref-counting.mlir @@ -0,0 +1,47 @@ +// RUN: mlir-opt %s -async-runtime-policy-based-ref-counting | FileCheck %s + +// CHECK-LABEL: @token_await +// CHECK: %[[TOKEN:.*]]: !async.token +func @token_await(%arg0: !async.token) { + // CHECK: async.runtime.await %[[TOKEN]] + // CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32} + async.runtime.await %arg0 : !async.token + return +} + +// CHECK-LABEL: @group_await +// CHECK: %[[GROUP:.*]]: !async.group +func @group_await(%arg0: !async.group) { + // CHECK: async.runtime.await %[[GROUP]] + // CHECK: async.runtime.drop_ref %[[GROUP]] {count = 1 : i32} + async.runtime.await %arg0 : !async.group + return +} + +// CHECK-LABEL: @add_token_to_group +// CHECK: %[[GROUP:.*]]: !async.group +// CHECK: %[[TOKEN:.*]]: !async.token +func @add_token_to_group(%arg0: !async.group, %arg1: !async.token) { + // CHECK: async.runtime.add_to_group %[[TOKEN]], %[[GROUP]] + // CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32} + async.runtime.add_to_group %arg1, %arg0 : !async.token + return +} + +// CHECK-LABEL: @value_load +// CHECK: %[[VALUE:.*]]: !async.value +func @value_load(%arg0: !async.value) { + // CHECK: async.runtime.load %[[VALUE]] + // CHECK: async.runtime.drop_ref %[[VALUE]] {count = 1 : i32} + %0 = async.runtime.load %arg0 : !async.value + return +} + +// CHECK-LABEL: @error_check +// CHECK: %[[TOKEN:.*]]: !async.token +func @error_check(%arg0: !async.token) { + // CHECK: async.runtime.is_error %[[TOKEN]] + // CHECK: async.runtime.drop_ref %[[TOKEN]] {count = 1 : i32} + %0 = async.runtime.is_error %arg0 : !async.token + return +} diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir index 12b2be2627131..6c2758c484f79 100644 --- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir @@ -11,6 +11,18 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_async_runtime%shlibext\ // RUN: | FileCheck %s --dump-input=always +// RUN: mlir-opt %s -async-parallel-for \ +// RUN: -async-to-async-runtime \ +// RUN: -async-runtime-policy-based-ref-counting \ +// RUN: -convert-async-to-llvm \ +// RUN: -convert-scf-to-std \ +// RUN: -convert-std-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void -O0 \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_async_runtime%shlibext\ +// RUN: | FileCheck %s --dump-input=always + // RUN: mlir-opt %s -async-parallel-for="async-dispatch=false \ // RUN: num-workers=20 \ // RUN: target-block-size=1" \ diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir index b294b9ce4d26e..d8f99d061b7d4 100644 --- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir @@ -11,6 +11,18 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_async_runtime%shlibext\ // RUN: | FileCheck %s --dump-input=always +// RUN: mlir-opt %s -async-parallel-for \ +// RUN: -async-to-async-runtime \ +// RUN: -async-runtime-policy-based-ref-counting \ +// RUN: -convert-async-to-llvm \ +// RUN: -convert-scf-to-std \ +// RUN: -convert-std-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void -O0 \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_async_runtime%shlibext\ +// RUN: | FileCheck %s --dump-input=always + // RUN: mlir-opt %s -async-parallel-for="async-dispatch=false \ // RUN: num-workers=20 \ // RUN: target-block-size=1" \ From c1194c2ec35029f96ce75ab54555dccf2b7e8681 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 29 Jun 2021 12:56:15 -0700 Subject: [PATCH 225/619] [mlir:Async] Change async-parallel-for block size/count calculation Depends On D105037 Avoid creating too many tasks when the number of workers is large. Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D105126 --- .../Async/Transforms/AsyncParallelFor.cpp | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp index a104fb73571d9..373ee8b01dca9 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp @@ -653,9 +653,19 @@ AsyncParallelForRewrite::matchAndRewrite(scf::ParallelOp op, for (size_t i = 1; i < tripCounts.size(); ++i) tripCount = b.create(tripCount, tripCounts[i]); + // With large number of threads the value of creating many compute blocks + // is reduced because the problem typically becomes memory bound. For small + // number of threads it helps with stragglers. + float overshardingFactor = numWorkerThreads <= 4 ? 8.0 + : numWorkerThreads <= 8 ? 4.0 + : numWorkerThreads <= 16 ? 2.0 + : numWorkerThreads <= 32 ? 1.0 + : numWorkerThreads <= 64 ? 0.8 + : 0.6; + // Do not overload worker threads with too many compute blocks. - Value maxComputeBlocks = - b.create(numWorkerThreads * kMaxOversharding); + Value maxComputeBlocks = b.create( + std::max(1, static_cast(numWorkerThreads * overshardingFactor))); // Target block size from the pass parameters. Value targetComputeBlockSize = b.create(targetBlockSize); @@ -668,7 +678,11 @@ AsyncParallelForRewrite::matchAndRewrite(scf::ParallelOp op, Value bs1 = b.create(CmpIPredicate::sge, bs0, targetComputeBlockSize); Value bs2 = b.create(bs1, bs0, targetComputeBlockSize); Value bs3 = b.create(CmpIPredicate::sle, tripCount, bs2); - Value blockSize = b.create(bs3, tripCount, bs2); + Value blockSize0 = b.create(bs3, tripCount, bs2); + Value blockCount0 = b.create(tripCount, blockSize0); + + // Compute balanced block size for the estimated block count. + Value blockSize = b.create(tripCount, blockCount0); Value blockCount = b.create(tripCount, blockSize); // Create a parallel compute function that takes a block id and computes the From bb2cfca2f3237d7f722e95d4cab9f3d71f728c9c Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Tue, 29 Jun 2021 12:09:56 -0700 Subject: [PATCH 226/619] Revert D104488 and friends since it broke the windows bot Reverts commits: "Fix failing tests after https://reviews.llvm.org/D104488." "Fix buildbot failure after https://reviews.llvm.org/D104488." "Create synthetic symbol names on demand to improve memory consumption and startup times." This series of commits broke the windows lldb bot and then failed to fix all of the failing tests. --- lldb/include/lldb/Symbol/ObjectFile.h | 2 + lldb/include/lldb/Symbol/Symbol.h | 24 ++----- lldb/include/lldb/Symbol/Symtab.h | 20 ------ .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 64 +++++++++---------- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 6 +- lldb/source/Symbol/ObjectFile.cpp | 10 +++ lldb/source/Symbol/Symbol.cpp | 40 +++--------- lldb/source/Symbol/Symtab.cpp | 38 ++--------- .../ObjectFile/ELF/eh_frame-symbols.yaml | 4 +- .../Shell/SymbolFile/Breakpad/symtab.test | 2 +- 10 files changed, 67 insertions(+), 143 deletions(-) diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index dc83565c7db52..1e29cf53b78b3 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -712,6 +712,8 @@ class ObjectFile : public std::enable_shared_from_this, /// false otherwise. bool SetModulesArchitecture(const ArchSpec &new_arch); + ConstString GetNextSyntheticSymbolName(); + static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size, uint64_t Offset); diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h index be3e8abefa490..3abe3114863de 100644 --- a/lldb/include/lldb/Symbol/Symbol.h +++ b/lldb/include/lldb/Symbol/Symbol.h @@ -113,20 +113,14 @@ class Symbol : public SymbolContextScope { lldb::LanguageType GetLanguage() const { // TODO: See if there is a way to determine the language for a symbol // somehow, for now just return our best guess - return GetMangled().GuessLanguage(); + return m_mangled.GuessLanguage(); } void SetID(uint32_t uid) { m_uid = uid; } - Mangled &GetMangled() { - SynthesizeNameIfNeeded(); - return m_mangled; - } + Mangled &GetMangled() { return m_mangled; } - const Mangled &GetMangled() const { - SynthesizeNameIfNeeded(); - return m_mangled; - } + const Mangled &GetMangled() const { return m_mangled; } ConstString GetReExportedSymbolName() const; @@ -172,9 +166,9 @@ class Symbol : public SymbolContextScope { bool IsTrampoline() const; bool IsIndirect() const; - + bool IsWeak() const { return m_is_weak; } - + void SetIsWeak (bool b) { m_is_weak = b; } bool GetByteSizeIsValid() const { return m_size_is_valid; } @@ -229,10 +223,6 @@ class Symbol : public SymbolContextScope { bool ContainsFileAddress(lldb::addr_t file_addr) const; - static llvm::StringRef GetSyntheticSymbolPrefix() { - return "___lldb_unnamed_symbol"; - } - protected: // This is the internal guts of ResolveReExportedSymbol, it assumes // reexport_name is not null, and that module_spec is valid. We track the @@ -243,8 +233,6 @@ class Symbol : public SymbolContextScope { lldb_private::ModuleSpec &module_spec, lldb_private::ModuleList &seen_modules) const; - void SynthesizeNameIfNeeded() const; - uint32_t m_uid = UINT32_MAX; // User ID (usually the original symbol table index) uint16_t m_type_data = 0; // data specific to m_type @@ -270,7 +258,7 @@ class Symbol : public SymbolContextScope { // doing name lookups m_is_weak : 1, m_type : 6; // Values from the lldb::SymbolType enum. - mutable Mangled m_mangled; // uniqued symbol name/mangled name pair + Mangled m_mangled; // uniqued symbol name/mangled name pair AddressRange m_addr_range; // Contains the value, or the section offset // address when the value is an address in a // section, and the size (if any) diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h index e1ad0dfd2eb8d..fbfa3a5e0cec7 100644 --- a/lldb/include/lldb/Symbol/Symtab.h +++ b/lldb/include/lldb/Symbol/Symtab.h @@ -219,26 +219,6 @@ class Symtab { return false; } - /// A helper function that looks up full function names. - /// - /// We generate unique names for synthetic symbols so that users can look - /// them up by name when needed. But because doing so is uncommon in normal - /// debugger use, we trade off some performance at lookup time for faster - /// symbol table building by detecting these symbols and generating their - /// names lazily, rather than adding them to the normal symbol indexes. This - /// function does the job of first consulting the name indexes, and if that - /// fails it extracts the information it needs from the synthetic name and - /// locates the symbol. - /// - /// @param[in] symbol_name The symbol name to search for. - /// - /// @param[out] indexes The vector if symbol indexes to update with results. - /// - /// @returns The number of indexes added to the index vector. Zero if no - /// matches were found. - uint32_t GetNameIndexes(ConstString symbol_name, - std::vector &indexes); - void SymbolIndicesToSymbolContextList(std::vector &symbol_indexes, SymbolContextList &sc_list); diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index edf87f036f0f5..be73d38961ea6 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1880,7 +1880,7 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) { unified_section_list.AddSection(symtab_section_sp); } } - } + } } std::shared_ptr ObjectFileELF::GetGnuDebugDataObjectFile() { @@ -2813,24 +2813,20 @@ Symtab *ObjectFileELF::GetSymtab() { if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress( entry_point_file_addr)) { uint64_t symbol_id = m_symtab_up->GetNumSymbols(); - // Don't set the name for any synthetic symbols, the Symbol - // object will generate one if needed when the name is accessed - // via accessors. - SectionSP section_sp = entry_point_addr.GetSection(); - Symbol symbol( - /*symID=*/symbol_id, - /*name=*/llvm::StringRef(), // Name will be auto generated. - /*type=*/eSymbolTypeCode, - /*external=*/true, - /*is_debug=*/false, - /*is_trampoline=*/false, - /*is_artificial=*/true, - /*section_sp=*/section_sp, - /*offset=*/entry_point_addr.GetOffset(), - /*size=*/0, // FDE can span multiple symbols so don't use its size. - /*size_is_valid=*/false, - /*contains_linker_annotations=*/false, - /*flags=*/0); + Symbol symbol(symbol_id, + GetNextSyntheticSymbolName().GetCString(), // Symbol name. + eSymbolTypeCode, // Type of this symbol. + true, // Is this globally visible? + false, // Is this symbol debug info? + false, // Is this symbol a trampoline? + true, // Is this symbol artificial? + entry_point_addr.GetSection(), // Section where this + // symbol is defined. + 0, // Offset in section or symbol value. + 0, // Size. + false, // Size is valid. + false, // Contains linker annotations? + 0); // Symbol flags. m_symtab_up->AddSymbol(symbol); // When the entry point is arm thumb we need to explicitly set its // class address to reflect that. This is important because expression @@ -2921,24 +2917,22 @@ void ObjectFileELF::ParseUnwindSymbols(Symtab *symbol_table, section_list->FindSectionContainingFileAddress(file_addr); if (section_sp) { addr_t offset = file_addr - section_sp->GetFileAddress(); + const char *symbol_name = GetNextSyntheticSymbolName().GetCString(); uint64_t symbol_id = ++last_symbol_id; - // Don't set the name for any synthetic symbols, the Symbol - // object will generate one if needed when the name is accessed - // via accessors. Symbol eh_symbol( - /*symID=*/symbol_id, - /*name=*/llvm::StringRef(), // Name will be auto generated. - /*type=*/eSymbolTypeCode, - /*external=*/true, - /*is_debug=*/false, - /*is_trampoline=*/false, - /*is_artificial=*/true, - /*section_sp=*/section_sp, - /*offset=*/offset, - /*size=*/0, // FDE can span multiple symbols so don't use its size. - /*size_is_valid=*/false, - /*contains_linker_annotations=*/false, - /*flags=*/0); + symbol_id, // Symbol table index. + symbol_name, // Symbol name. + eSymbolTypeCode, // Type of this symbol. + true, // Is this globally visible? + false, // Is this symbol debug info? + false, // Is this symbol a trampoline? + true, // Is this symbol artificial? + section_sp, // Section in which this symbol is defined or null. + offset, // Offset in section or symbol value. + 0, // Size: Don't specify the size as an FDE can + false, // Size is valid: cover multiple symbols. + false, // Contains linker annotations? + 0); // Symbol flags. new_symbols.push_back(eh_symbol); } } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 72389e9fd5c67..e7652cffb1c81 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -4696,10 +4696,8 @@ size_t ObjectFileMachO::ParseSymtab() { symbol_byte_size = section_end_file_addr - symbol_file_addr; } sym[sym_idx].SetID(synthetic_sym_id++); - // Don't set the name for any synthetic symbols, the Symbol - // object will generate one if needed when the name is accessed - // via accessors. - sym[sym_idx].GetMangled().SetDemangledName(ConstString()); + sym[sym_idx].GetMangled().SetDemangledName( + GetNextSyntheticSymbolName()); sym[sym_idx].SetType(eSymbolTypeCode); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = symbol_addr; diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 101af01341a20..b0fdd50b3c0f1 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -616,6 +616,16 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name, return symbol_type_hint; } +ConstString ObjectFile::GetNextSyntheticSymbolName() { + llvm::SmallString<256> name; + llvm::raw_svector_ostream os(name); + ConstString file_name = GetModule()->GetFileSpec().GetFilename(); + ++m_synthetic_symbol_idx; + os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$" + << file_name.GetStringRef(); + return ConstString(os.str()); +} + std::vector ObjectFile::GetLoadableData(Target &target) { std::vector loadables; diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index b24372795ad55..a25911d1734da 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -56,8 +56,8 @@ Symbol::Symbol(uint32_t symID, const Mangled &mangled, SymbolType type, m_size_is_synthesized(false), m_size_is_valid(size_is_valid || range.GetByteSize() > 0), m_demangled_is_synthesized(false), - m_contains_linker_annotations(contains_linker_annotations), - m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), + m_contains_linker_annotations(contains_linker_annotations), + m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), m_flags(flags) {} Symbol::Symbol(const Symbol &rhs) @@ -119,7 +119,7 @@ bool Symbol::ValueIsAddress() const { } ConstString Symbol::GetDisplayName() const { - return GetMangled().GetDisplayDemangledName(); + return m_mangled.GetDisplayDemangledName(); } ConstString Symbol::GetReExportedSymbolName() const { @@ -202,7 +202,7 @@ void Symbol::GetDescription(Stream *s, lldb::DescriptionLevel level, s->Printf(", value = 0x%16.16" PRIx64, m_addr_range.GetBaseAddress().GetOffset()); } - ConstString demangled = GetMangled().GetDemangledName(); + ConstString demangled = m_mangled.GetDemangledName(); if (demangled) s->Printf(", name=\"%s\"", demangled.AsCString()); if (m_mangled.GetMangledName()) @@ -218,7 +218,7 @@ void Symbol::Dump(Stream *s, Target *target, uint32_t index, // Make sure the size of the symbol is up to date before dumping GetByteSize(); - ConstString name = GetMangled().GetName(name_preference); + ConstString name = m_mangled.GetName(name_preference); if (ValueIsAddress()) { if (!m_addr_range.GetBaseAddress().Dump(s, nullptr, Address::DumpStyleFileAddress)) @@ -330,11 +330,9 @@ uint32_t Symbol::GetPrologueByteSize() { } bool Symbol::Compare(ConstString name, SymbolType type) const { - if (type == eSymbolTypeAny || m_type == type) { - const Mangled &mangled = GetMangled(); - return mangled.GetMangledName() == name || - mangled.GetDemangledName() == name; - } + if (type == eSymbolTypeAny || m_type == type) + return m_mangled.GetMangledName() == name || + m_mangled.GetDemangledName() == name; return false; } @@ -497,10 +495,10 @@ lldb::addr_t Symbol::GetLoadAddress(Target *target) const { return LLDB_INVALID_ADDRESS; } -ConstString Symbol::GetName() const { return GetMangled().GetName(); } +ConstString Symbol::GetName() const { return m_mangled.GetName(); } ConstString Symbol::GetNameNoArguments() const { - return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments); + return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments); } lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const { @@ -567,21 +565,3 @@ bool Symbol::GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor, bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const { return m_addr_range.ContainsFileAddress(file_addr); } - -void Symbol::SynthesizeNameIfNeeded() const { - if (m_is_synthetic && !m_mangled) { - // Synthetic symbol names don't mean anything, but they do uniquely - // identify individual symbols so we give them a unique name. The name - // starts with the synthetic symbol prefix, followed by a unique number. - // Typically the UserID of a real symbol is the symbol table index of the - // symbol in the object file's symbol table(s), so it will be the same - // every time you read in the object file. We want the same persistence for - // synthetic symbols so that users can identify them across multiple debug - // sessions, to understand crashes in those symbols and to reliably set - // breakpoints on them. - llvm::SmallString<256> name; - llvm::raw_svector_ostream os(name); - os << GetSyntheticSymbolPrefix() << GetID(); - m_mangled.SetDemangledName(ConstString(os.str())); - } -} diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 89e75c28cb9b6..14360aa69a72c 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -301,7 +301,7 @@ void Symtab::InitNameIndexes() { // the trampoline symbols to be searchable by name we can remove this and // then possibly add a new bool to any of the Symtab functions that // lookup symbols by name to indicate if they want trampolines. - if (symbol->IsTrampoline() || symbol->IsSynthetic()) + if (symbol->IsTrampoline()) continue; // If the symbol's name string matched a Mangled::ManglingScheme, it is @@ -628,36 +628,6 @@ void Symtab::SortSymbolIndexesByValue(std::vector &indexes, } } -uint32_t Symtab::GetNameIndexes(ConstString symbol_name, - std::vector &indexes) { - auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); - const uint32_t count = name_to_index.GetValues(symbol_name, indexes); - if (count) - return count; - // Synthetic symbol names are not added to the name indexes, but they start - // with a prefix and end with a the symbol UserID. This allows users to find - // these symbols without having to add them to the name indexes. These - // queries will not happen very often since the names don't mean anything, so - // performance is not paramount in this case. - llvm::StringRef name = symbol_name.GetStringRef(); - // String the synthetic prefix if the name starts with it. - if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix())) - return 0; // Not a synthetic symbol name - - // Extract the user ID from the symbol name - unsigned long long uid = 0; - if (getAsUnsignedInteger(name, /*Radix=*/10, uid)) - return 0; // Failed to extract the user ID as an integer - Symbol *symbol = FindSymbolByID(uid); - if (symbol == nullptr) - return 0; - const uint32_t symbol_idx = GetIndexForSymbol(symbol); - if (symbol_idx == UINT32_MAX) - return 0; - indexes.push_back(symbol_idx); - return 1; -} - uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, std::vector &indexes) { std::lock_guard guard(m_mutex); @@ -667,7 +637,8 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, if (!m_name_indexes_computed) InitNameIndexes(); - return GetNameIndexes(symbol_name, indexes); + auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); + return name_to_index.GetValues(symbol_name, indexes); } return 0; } @@ -684,9 +655,10 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, if (!m_name_indexes_computed) InitNameIndexes(); + auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); std::vector all_name_indexes; const size_t name_match_count = - GetNameIndexes(symbol_name, all_name_indexes); + name_to_index.GetValues(symbol_name, all_name_indexes); for (size_t i = 0; i < name_match_count; ++i) { if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type, symbol_visibility)) diff --git a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml index 0dcc9fb76bd4f..6178a45de1b59 100644 --- a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml @@ -3,8 +3,8 @@ # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name # CHECK: [ 0] 1 SourceFile 0x0000000000000000 0x0000000000000000 0x00000004 - -# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} -# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} +# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol1$${{.*}} +# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol2$${{.*}} --- !ELF FileHeader: diff --git a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test index 788dafe248d50..1eb03fa43deb0 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test @@ -5,7 +5,7 @@ # CHECK-LABEL: (lldb) image dump symtab symtab.out # CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5: # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name -# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} +# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}$$symtab.out # CHECK: [ 1] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func # CHECK: [ 2] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only # CHECK: [ 3] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 From 485cc55edfb875628e19bb6d9de4706af2865d3e Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Mon, 28 Jun 2021 22:54:11 +0000 Subject: [PATCH 227/619] [mlir] Generare .cpp.inc files for dialects. * Previously, we were only generating .h.inc files. We foresee the need to also generate implementations and this is a step towards that. * Discussed in https://llvm.discourse.group/t/generating-cpp-inc-files-for-dialects/3732/2 * Deviates from the discussion above by generating a default constructor in the .cpp.inc file (and adding a tablegen bit that disables this in case if this is user provided). * Generating the destructor started as a way to flush out the missing includes (produces a link error), but it is a strict improvement on its own that is worth doing (i.e. by emitting key methods in the .cpp file, we root vtables in one translation unit, which is a non-controversial improvement). Differential Revision: https://reviews.llvm.org/D105070 --- mlir/cmake/modules/AddMLIR.cmake | 1 + .../lib/Standalone/StandaloneDialect.cpp | 2 + .../toy/Ch2/include/toy/CMakeLists.txt | 1 + mlir/examples/toy/Ch2/mlir/Dialect.cpp | 2 + .../toy/Ch3/include/toy/CMakeLists.txt | 1 + mlir/examples/toy/Ch3/mlir/Dialect.cpp | 2 + .../toy/Ch4/include/toy/CMakeLists.txt | 1 + mlir/examples/toy/Ch4/mlir/Dialect.cpp | 2 + .../toy/Ch5/include/toy/CMakeLists.txt | 1 + mlir/examples/toy/Ch5/mlir/Dialect.cpp | 2 + .../toy/Ch6/include/toy/CMakeLists.txt | 1 + mlir/examples/toy/Ch6/mlir/Dialect.cpp | 2 + .../toy/Ch7/include/toy/CMakeLists.txt | 1 + mlir/examples/toy/Ch7/mlir/Dialect.cpp | 2 + .../mlir/Dialect/LLVMIR/CMakeLists.txt | 1 + .../mlir/Dialect/OpenACC/CMakeLists.txt | 1 + .../mlir/Dialect/OpenMP/CMakeLists.txt | 1 + .../Dialect/StandardOps/IR/CMakeLists.txt | 1 + mlir/include/mlir/IR/CMakeLists.txt | 1 + mlir/include/mlir/IR/OpBase.td | 4 + mlir/include/mlir/TableGen/Dialect.h | 6 +- mlir/lib/Dialect/AMX/IR/AMXDialect.cpp | 2 + mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 2 + .../lib/Dialect/ArmNeon/IR/ArmNeonDialect.cpp | 2 + mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp | 2 + mlir/lib/Dialect/Async/IR/Async.cpp | 2 + .../lib/Dialect/Complex/IR/ComplexDialect.cpp | 2 + mlir/lib/Dialect/DLTI/DLTI.cpp | 2 + mlir/lib/Dialect/EmitC/IR/EmitC.cpp | 2 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 2 + mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 2 + mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 2 + mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp | 2 + mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 2 + mlir/lib/Dialect/Math/IR/MathDialect.cpp | 2 + mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp | 2 + mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 2 + mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 1 + mlir/lib/Dialect/PDL/IR/PDL.cpp | 2 + mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp | 2 + mlir/lib/Dialect/Quant/IR/QuantOps.cpp | 2 + mlir/lib/Dialect/SCF/SCF.cpp | 2 + mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp | 2 + mlir/lib/Dialect/Shape/IR/Shape.cpp | 2 + .../SparseTensor/IR/SparseTensorDialect.cpp | 2 + mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 2 + mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp | 2 + mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 2 + mlir/lib/Dialect/Vector/VectorOps.cpp | 1 + .../Dialect/X86Vector/IR/X86VectorDialect.cpp | 2 + mlir/lib/IR/BuiltinDialect.cpp | 2 + mlir/lib/TableGen/Dialect.cpp | 4 + mlir/test/lib/Dialect/Test/CMakeLists.txt | 1 + mlir/test/lib/Dialect/Test/TestDialect.cpp | 2 + mlir/test/lib/Dialect/Test/TestOps.td | 3 +- mlir/tools/mlir-tblgen/DialectGen.cpp | 96 +++++++--- .../llvm-project-overlay/mlir/BUILD.bazel | 174 ++++++++++++++++++ .../mlir/test/BUILD.bazel | 7 + 58 files changed, 355 insertions(+), 26 deletions(-) diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake index 81bb528bc11d3..109ac46ab6d26 100644 --- a/mlir/cmake/modules/AddMLIR.cmake +++ b/mlir/cmake/modules/AddMLIR.cmake @@ -15,6 +15,7 @@ function(add_mlir_dialect dialect dialect_namespace) mlir_tablegen(${dialect}Types.h.inc -gen-typedef-decls) mlir_tablegen(${dialect}Types.cpp.inc -gen-typedef-defs) mlir_tablegen(${dialect}Dialect.h.inc -gen-dialect-decls -dialect=${dialect_namespace}) + mlir_tablegen(${dialect}Dialect.cpp.inc -gen-dialect-defs -dialect=${dialect_namespace}) add_public_tablegen_target(MLIR${dialect}IncGen) add_dependencies(mlir-headers MLIR${dialect}IncGen) endfunction() diff --git a/mlir/examples/standalone/lib/Standalone/StandaloneDialect.cpp b/mlir/examples/standalone/lib/Standalone/StandaloneDialect.cpp index acdf88ab9b43c..cdd9337fcf98b 100644 --- a/mlir/examples/standalone/lib/Standalone/StandaloneDialect.cpp +++ b/mlir/examples/standalone/lib/Standalone/StandaloneDialect.cpp @@ -12,6 +12,8 @@ using namespace mlir; using namespace mlir::standalone; +#include "Standalone/StandaloneOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // Standalone dialect. //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch2/include/toy/CMakeLists.txt b/mlir/examples/toy/Ch2/include/toy/CMakeLists.txt index 26a0eb1f8e1b4..301baafb4412d 100644 --- a/mlir/examples/toy/Ch2/include/toy/CMakeLists.txt +++ b/mlir/examples/toy/Ch2/include/toy/CMakeLists.txt @@ -2,4 +2,5 @@ set(LLVM_TARGET_DEFINITIONS Ops.td) mlir_tablegen(Ops.h.inc -gen-op-decls) mlir_tablegen(Ops.cpp.inc -gen-op-defs) mlir_tablegen(Dialect.h.inc -gen-dialect-decls) +mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs) add_public_tablegen_target(ToyCh2OpsIncGen) diff --git a/mlir/examples/toy/Ch2/mlir/Dialect.cpp b/mlir/examples/toy/Ch2/mlir/Dialect.cpp index 9327aaf8431a4..5213d336d5482 100644 --- a/mlir/examples/toy/Ch2/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch2/mlir/Dialect.cpp @@ -20,6 +20,8 @@ using namespace mlir; using namespace mlir::toy; +#include "toy/Dialect.cpp.inc" + //===----------------------------------------------------------------------===// // ToyDialect //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch3/include/toy/CMakeLists.txt b/mlir/examples/toy/Ch3/include/toy/CMakeLists.txt index 15bb98ca9600a..a233f5d6da08f 100644 --- a/mlir/examples/toy/Ch3/include/toy/CMakeLists.txt +++ b/mlir/examples/toy/Ch3/include/toy/CMakeLists.txt @@ -2,4 +2,5 @@ set(LLVM_TARGET_DEFINITIONS Ops.td) mlir_tablegen(Ops.h.inc -gen-op-decls) mlir_tablegen(Ops.cpp.inc -gen-op-defs) mlir_tablegen(Dialect.h.inc -gen-dialect-decls) +mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs) add_public_tablegen_target(ToyCh3OpsIncGen) diff --git a/mlir/examples/toy/Ch3/mlir/Dialect.cpp b/mlir/examples/toy/Ch3/mlir/Dialect.cpp index 9327aaf8431a4..5213d336d5482 100644 --- a/mlir/examples/toy/Ch3/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch3/mlir/Dialect.cpp @@ -20,6 +20,8 @@ using namespace mlir; using namespace mlir::toy; +#include "toy/Dialect.cpp.inc" + //===----------------------------------------------------------------------===// // ToyDialect //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch4/include/toy/CMakeLists.txt b/mlir/examples/toy/Ch4/include/toy/CMakeLists.txt index f2de562aaa353..acf7e311ffd92 100644 --- a/mlir/examples/toy/Ch4/include/toy/CMakeLists.txt +++ b/mlir/examples/toy/Ch4/include/toy/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS Ops.td) mlir_tablegen(Ops.h.inc -gen-op-decls) mlir_tablegen(Ops.cpp.inc -gen-op-defs) mlir_tablegen(Dialect.h.inc -gen-dialect-decls) +mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs) add_public_tablegen_target(ToyCh4OpsIncGen) # Most dialects should use add_mlir_interfaces(). diff --git a/mlir/examples/toy/Ch4/mlir/Dialect.cpp b/mlir/examples/toy/Ch4/mlir/Dialect.cpp index dd82e0409ab16..ff1d4cdcd2a9a 100644 --- a/mlir/examples/toy/Ch4/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch4/mlir/Dialect.cpp @@ -21,6 +21,8 @@ using namespace mlir; using namespace mlir::toy; +#include "toy/Dialect.cpp.inc" + //===----------------------------------------------------------------------===// // ToyInlinerInterface //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch5/include/toy/CMakeLists.txt b/mlir/examples/toy/Ch5/include/toy/CMakeLists.txt index 10313c9d91ccd..7d51dd9c17c73 100644 --- a/mlir/examples/toy/Ch5/include/toy/CMakeLists.txt +++ b/mlir/examples/toy/Ch5/include/toy/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS Ops.td) mlir_tablegen(Ops.h.inc -gen-op-decls) mlir_tablegen(Ops.cpp.inc -gen-op-defs) mlir_tablegen(Dialect.h.inc -gen-dialect-decls) +mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs) add_public_tablegen_target(ToyCh5OpsIncGen) # Most dialects should use add_mlir_interfaces(). diff --git a/mlir/examples/toy/Ch5/mlir/Dialect.cpp b/mlir/examples/toy/Ch5/mlir/Dialect.cpp index 18d5985042faa..89e7529a4a948 100644 --- a/mlir/examples/toy/Ch5/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch5/mlir/Dialect.cpp @@ -21,6 +21,8 @@ using namespace mlir; using namespace mlir::toy; +#include "toy/Dialect.cpp.inc" + //===----------------------------------------------------------------------===// // ToyInlinerInterface //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch6/include/toy/CMakeLists.txt b/mlir/examples/toy/Ch6/include/toy/CMakeLists.txt index 4c54020302a5d..03e8a2a470078 100644 --- a/mlir/examples/toy/Ch6/include/toy/CMakeLists.txt +++ b/mlir/examples/toy/Ch6/include/toy/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS Ops.td) mlir_tablegen(Ops.h.inc -gen-op-decls) mlir_tablegen(Ops.cpp.inc -gen-op-defs) mlir_tablegen(Dialect.h.inc -gen-dialect-decls) +mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs) add_public_tablegen_target(ToyCh6OpsIncGen) # Most dialects should use add_mlir_interfaces(). diff --git a/mlir/examples/toy/Ch6/mlir/Dialect.cpp b/mlir/examples/toy/Ch6/mlir/Dialect.cpp index 18d5985042faa..89e7529a4a948 100644 --- a/mlir/examples/toy/Ch6/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch6/mlir/Dialect.cpp @@ -21,6 +21,8 @@ using namespace mlir; using namespace mlir::toy; +#include "toy/Dialect.cpp.inc" + //===----------------------------------------------------------------------===// // ToyInlinerInterface //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch7/include/toy/CMakeLists.txt b/mlir/examples/toy/Ch7/include/toy/CMakeLists.txt index 3ff7633f8ddef..7712e42ad8093 100644 --- a/mlir/examples/toy/Ch7/include/toy/CMakeLists.txt +++ b/mlir/examples/toy/Ch7/include/toy/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS Ops.td) mlir_tablegen(Ops.h.inc -gen-op-decls) mlir_tablegen(Ops.cpp.inc -gen-op-defs) mlir_tablegen(Dialect.h.inc -gen-dialect-decls) +mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs) add_public_tablegen_target(ToyCh7OpsIncGen) # Most dialects should use add_mlir_interfaces(). diff --git a/mlir/examples/toy/Ch7/mlir/Dialect.cpp b/mlir/examples/toy/Ch7/mlir/Dialect.cpp index 28f5435f7e982..30d473f7bec20 100644 --- a/mlir/examples/toy/Ch7/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch7/mlir/Dialect.cpp @@ -22,6 +22,8 @@ using namespace mlir; using namespace mlir::toy; +#include "toy/Dialect.cpp.inc" + //===----------------------------------------------------------------------===// // ToyInlinerInterface //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt index ff3c8e2eeba54..91754f16e8a37 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt @@ -9,6 +9,7 @@ set(LLVM_TARGET_DEFINITIONS LLVMOps.td) mlir_tablegen(LLVMOps.h.inc -gen-op-decls) mlir_tablegen(LLVMOps.cpp.inc -gen-op-defs) mlir_tablegen(LLVMOpsDialect.h.inc -gen-dialect-decls) +mlir_tablegen(LLVMOpsDialect.cpp.inc -gen-dialect-defs) mlir_tablegen(LLVMOpsEnums.h.inc -gen-enum-decls) mlir_tablegen(LLVMOpsEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRLLVMOpsIncGen) diff --git a/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt b/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt index 525851588562a..241f939aedbfe 100644 --- a/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt @@ -4,6 +4,7 @@ add_public_tablegen_target(acc_common_td) set(LLVM_TARGET_DEFINITIONS OpenACCOps.td) mlir_tablegen(OpenACCOpsDialect.h.inc -gen-dialect-decls -dialect=acc) +mlir_tablegen(OpenACCOpsDialect.cpp.inc -gen-dialect-defs -dialect=acc) mlir_tablegen(OpenACCOps.h.inc -gen-op-decls) mlir_tablegen(OpenACCOps.cpp.inc -gen-op-defs) mlir_tablegen(OpenACCOpsEnums.h.inc -gen-enum-decls) diff --git a/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt b/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt index bbcebc023c7fb..90614993cacf6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt @@ -4,6 +4,7 @@ add_public_tablegen_target(omp_common_td) set(LLVM_TARGET_DEFINITIONS OpenMPOps.td) mlir_tablegen(OpenMPOpsDialect.h.inc -gen-dialect-decls -dialect=omp) +mlir_tablegen(OpenMPOpsDialect.cpp.inc -gen-dialect-defs -dialect=omp) mlir_tablegen(OpenMPOps.h.inc -gen-op-decls) mlir_tablegen(OpenMPOps.cpp.inc -gen-op-defs) mlir_tablegen(OpenMPOpsEnums.h.inc -gen-enum-decls) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/StandardOps/IR/CMakeLists.txt index 7bc39e93d913b..c1756e94ba128 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/StandardOps/IR/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS Ops.td) mlir_tablegen(Ops.h.inc -gen-op-decls) mlir_tablegen(Ops.cpp.inc -gen-op-defs) mlir_tablegen(OpsDialect.h.inc -gen-dialect-decls) +mlir_tablegen(OpsDialect.cpp.inc -gen-dialect-defs) mlir_tablegen(OpsEnums.h.inc -gen-enum-decls) mlir_tablegen(OpsEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRStandardOpsIncGen) diff --git a/mlir/include/mlir/IR/CMakeLists.txt b/mlir/include/mlir/IR/CMakeLists.txt index 1a6b9c942d3f6..2757f3d6ead59 100644 --- a/mlir/include/mlir/IR/CMakeLists.txt +++ b/mlir/include/mlir/IR/CMakeLists.txt @@ -9,6 +9,7 @@ add_public_tablegen_target(MLIRBuiltinAttributesIncGen) set(LLVM_TARGET_DEFINITIONS BuiltinDialect.td) mlir_tablegen(BuiltinDialect.h.inc -gen-dialect-decls) +mlir_tablegen(BuiltinDialect.cpp.inc -gen-dialect-defs) add_public_tablegen_target(MLIRBuiltinDialectIncGen) set(LLVM_TARGET_DEFINITIONS BuiltinLocationAttributes.td) diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 2b17731e95d02..59890190d2c66 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -267,6 +267,10 @@ class Dialect { // If this dialect overrides the hook for materializing constants. bit hasConstantMaterializer = 0; + /// If the dialect definition provides a non-default destructor. + /// If false, a default destructor implementation will be generated. + bit hasNonDefaultDestructor = 0; + // If this dialect overrides the hook for verifying operation attributes. bit hasOperationAttrVerify = 0; diff --git a/mlir/include/mlir/TableGen/Dialect.h b/mlir/include/mlir/TableGen/Dialect.h index 609bf4e2ec466..4c5af8eba7d14 100644 --- a/mlir/include/mlir/TableGen/Dialect.h +++ b/mlir/include/mlir/TableGen/Dialect.h @@ -54,9 +54,13 @@ class Dialect { /// Returns true if this dialect has a canonicalizer. bool hasCanonicalizer() const; - // Returns true if this dialect has a constant materializer. + /// Returns true if this dialect has a constant materializer. bool hasConstantMaterializer() const; + /// Returns true if the destructor definition is provided explicitly or + /// false if a default should be generated. + bool hasNonDefaultDestructor() const; + /// Returns true if this dialect has an operation attribute verifier. bool hasOperationAttrVerify() const; diff --git a/mlir/lib/Dialect/AMX/IR/AMXDialect.cpp b/mlir/lib/Dialect/AMX/IR/AMXDialect.cpp index ab98820b2ecbc..c5cf1f41d7098 100644 --- a/mlir/lib/Dialect/AMX/IR/AMXDialect.cpp +++ b/mlir/lib/Dialect/AMX/IR/AMXDialect.cpp @@ -18,6 +18,8 @@ using namespace mlir; +#include "mlir/Dialect/AMX/AMXDialect.cpp.inc" + void amx::AMXDialect::initialize() { addOperations< #define GET_OP_LIST diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp index 1106636ea1437..5d4db0f1039a8 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp @@ -26,6 +26,8 @@ using namespace mlir; #define DEBUG_TYPE "affine-analysis" +#include "mlir/Dialect/Affine/IR/AffineOpsDialect.cpp.inc" + /// A utility function to check if a value is defined at the top level of /// `region` or is an argument of `region`. A value of index type defined at the /// top level of a `AffineScope` region is always a valid symbol for all diff --git a/mlir/lib/Dialect/ArmNeon/IR/ArmNeonDialect.cpp b/mlir/lib/Dialect/ArmNeon/IR/ArmNeonDialect.cpp index b8b8ebd35e688..b4f85adcad87c 100644 --- a/mlir/lib/Dialect/ArmNeon/IR/ArmNeonDialect.cpp +++ b/mlir/lib/Dialect/ArmNeon/IR/ArmNeonDialect.cpp @@ -18,6 +18,8 @@ using namespace mlir; +#include "mlir/Dialect/ArmNeon/ArmNeonDialect.cpp.inc" + void arm_neon::ArmNeonDialect::initialize() { addOperations< #define GET_OP_LIST diff --git a/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp b/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp index 5e5ce6ed63bc4..6a89c4ea57bf5 100644 --- a/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp +++ b/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp @@ -22,6 +22,8 @@ using namespace mlir; using namespace arm_sve; +#include "mlir/Dialect/ArmSVE/ArmSVEDialect.cpp.inc" + static Type getI1SameShape(Type type); static void buildScalableCmpIOp(OpBuilder &build, OperationState &result, CmpIPredicate predicate, Value lhs, Value rhs); diff --git a/mlir/lib/Dialect/Async/IR/Async.cpp b/mlir/lib/Dialect/Async/IR/Async.cpp index bd627edbd4271..3e325e21b1c56 100644 --- a/mlir/lib/Dialect/Async/IR/Async.cpp +++ b/mlir/lib/Dialect/Async/IR/Async.cpp @@ -14,6 +14,8 @@ using namespace mlir; using namespace mlir::async; +#include "mlir/Dialect/Async/IR/AsyncOpsDialect.cpp.inc" + void AsyncDialect::initialize() { addOperations< #define GET_OP_LIST diff --git a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp index 44330361e95df..0a61ceadcfa9c 100644 --- a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp +++ b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp @@ -8,6 +8,8 @@ #include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Complex/IR/ComplexOpsDialect.cpp.inc" + void mlir::complex::ComplexDialect::initialize() { addOperations< #define GET_OP_LIST diff --git a/mlir/lib/Dialect/DLTI/DLTI.cpp b/mlir/lib/Dialect/DLTI/DLTI.cpp index 2567be64ac1ad..20615124bd91d 100644 --- a/mlir/lib/Dialect/DLTI/DLTI.cpp +++ b/mlir/lib/Dialect/DLTI/DLTI.cpp @@ -16,6 +16,8 @@ using namespace mlir; +#include "mlir/Dialect/DLTI/DLTIDialect.cpp.inc" + //===----------------------------------------------------------------------===// // DataLayoutEntryAttr //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index 364c247f75e4f..c0fa74908c40e 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -14,6 +14,8 @@ using namespace mlir; using namespace mlir::emitc; +#include "mlir/Dialect/EmitC/IR/EmitCDialect.cpp.inc" + //===----------------------------------------------------------------------===// // EmitCDialect //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index f0845c45cd63e..12730d78d2530 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -29,6 +29,8 @@ using namespace mlir; using namespace mlir::gpu; +#include "mlir/Dialect/GPU/GPUOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // MMAMatrixType //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 8b9a8fa74b8c7..856c41c2d0e9b 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -36,6 +36,8 @@ using namespace mlir; using namespace mlir::LLVM; +#include "mlir/Dialect/LLVMIR/LLVMOpsDialect.cpp.inc" + static constexpr const char kVolatileAttrName[] = "volatile_"; static constexpr const char kNonTemporalAttrName[] = "nontemporal"; diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index c16e1c2f7af4f..da5c07abe6d8b 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -30,6 +30,8 @@ using namespace mlir; using namespace NVVM; +#include "mlir/Dialect/LLVMIR/NVVMOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // Printing/parsing for NVVM ops //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp index f54fcdbca3190..fcce8902d8404 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp @@ -30,6 +30,8 @@ using namespace mlir; using namespace ROCDL; +#include "mlir/Dialect/LLVMIR/ROCDLOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // Parsing for ROCDL ops //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index f4524f19f3f14..904f6dbede70b 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -36,6 +36,8 @@ using namespace mlir; using namespace mlir::linalg; +#include "mlir/Dialect/Linalg/IR/LinalgOpsDialect.cpp.inc" + /// Forward declarations. /// Generic entry point to create the block for the region of a LinalgOp. diff --git a/mlir/lib/Dialect/Math/IR/MathDialect.cpp b/mlir/lib/Dialect/Math/IR/MathDialect.cpp index 98ab368988d1d..eb21400491745 100644 --- a/mlir/lib/Dialect/Math/IR/MathDialect.cpp +++ b/mlir/lib/Dialect/Math/IR/MathDialect.cpp @@ -12,6 +12,8 @@ using namespace mlir; using namespace mlir::math; +#include "mlir/Dialect/Math/IR/MathOpsDialect.cpp.inc" + namespace { /// This class defines the interface for handling inlining with math /// operations. diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp index ed82a4beefd1c..b6d5754fde0d0 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp @@ -12,6 +12,8 @@ using namespace mlir; using namespace mlir::memref; +#include "mlir/Dialect/MemRef/IR/MemRefOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // MemRefDialect Dialect Interfaces //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index f823041f29221..92f8aaa8126a2 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -17,6 +17,8 @@ using namespace mlir; using namespace acc; +#include "mlir/Dialect/OpenACC/OpenACCOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // OpenACC operations //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 9160ab9318b6a..30a138e6a5a27 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringSwitch.h" #include +#include "mlir/Dialect/OpenMP/OpenMPOpsDialect.cpp.inc" #include "mlir/Dialect/OpenMP/OpenMPOpsEnums.cpp.inc" using namespace mlir; diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp index 8164c89dac544..3cb5b1bd0d082 100644 --- a/mlir/lib/Dialect/PDL/IR/PDL.cpp +++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp @@ -16,6 +16,8 @@ using namespace mlir; using namespace mlir::pdl; +#include "mlir/Dialect/PDL/IR/PDLOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // PDLDialect //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp b/mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp index a93f3c48503cf..d149ef58586e5 100644 --- a/mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp +++ b/mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp @@ -14,6 +14,8 @@ using namespace mlir; using namespace mlir::pdl_interp; +#include "mlir/Dialect/PDLInterp/IR/PDLInterpOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // PDLInterp Dialect //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Quant/IR/QuantOps.cpp b/mlir/lib/Dialect/Quant/IR/QuantOps.cpp index fa64e4dd5411e..fd0beadcd192f 100644 --- a/mlir/lib/Dialect/Quant/IR/QuantOps.cpp +++ b/mlir/lib/Dialect/Quant/IR/QuantOps.cpp @@ -23,6 +23,8 @@ using namespace mlir; using namespace mlir::quant; using namespace mlir::quant::detail; +#include "mlir/Dialect/Quant/QuantOpsDialect.cpp.inc" + void QuantizationDialect::initialize() { addTypes(); diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 99d2386ced1b1..75f8430bd3e03 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -18,6 +18,8 @@ using namespace mlir; using namespace mlir::scf; +#include "mlir/Dialect/SCF/SCFOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // SCFDialect Dialect Interfaces //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp index 7da2d5ab9f2c1..6537710b92e09 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp @@ -33,6 +33,8 @@ using namespace mlir; using namespace mlir::spirv; +#include "mlir/Dialect/SPIRV/IR/SPIRVOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // InlinerInterface //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index 158a6d5763fbb..f75bfc5894b6a 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -24,6 +24,8 @@ using namespace mlir; using namespace mlir::shape; +#include "mlir/Dialect/Shape/IR/ShapeOpsDialect.cpp.inc" + namespace { #include "ShapeCanonicalization.inc" } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 7bde51d2dbab1..e07dfdcb7f0cd 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -16,6 +16,8 @@ using namespace mlir; using namespace mlir::sparse_tensor; +#include "mlir/Dialect/SparseTensor/IR/SparseTensorOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // TensorDialect Attribute Methods. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index f6abfc4060d0a..49c285b7fd63c 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -28,6 +28,8 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" +#include "mlir/Dialect/StandardOps/IR/OpsDialect.cpp.inc" + // Pull in all enum type definitions and utility function declarations. #include "mlir/Dialect/StandardOps/IR/OpsEnums.cpp.inc" diff --git a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp index 46a348bca8f9a..860e0b84cfe66 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp @@ -12,6 +12,8 @@ using namespace mlir; using namespace mlir::tensor; +#include "mlir/Dialect/Tensor/IR/TensorOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // TensorDialect Dialect Interfaces //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 83a89f3af80d6..39b864ff62c02 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -23,6 +23,8 @@ using namespace mlir; using namespace mlir::tosa; +#include "mlir/Dialect/Tosa/IR/TosaOpsDialect.cpp.inc" + //===----------------------------------------------------------------------===// // Tosa dialect structs and interface includes. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 92a672050a19e..c35413289b195 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/bit.h" #include +#include "mlir/Dialect/Vector/VectorOpsDialect.cpp.inc" // Pull in all enum type and utility function definitions. #include "mlir/Dialect/Vector/VectorOpsEnums.cpp.inc" diff --git a/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp b/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp index 5c80fa3692fef..a9de16d5cb8a0 100644 --- a/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp +++ b/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp @@ -18,6 +18,8 @@ using namespace mlir; +#include "mlir/Dialect/X86Vector/X86VectorDialect.cpp.inc" + void x86vector::X86VectorDialect::initialize() { addOperations< #define GET_OP_LIST diff --git a/mlir/lib/IR/BuiltinDialect.cpp b/mlir/lib/IR/BuiltinDialect.cpp index 728443e8b64fb..25e853f141bec 100644 --- a/mlir/lib/IR/BuiltinDialect.cpp +++ b/mlir/lib/IR/BuiltinDialect.cpp @@ -27,6 +27,8 @@ using namespace mlir; // Builtin Dialect //===----------------------------------------------------------------------===// +#include "mlir/IR/BuiltinDialect.cpp.inc" + namespace { struct BuiltinOpAsmDialectInterface : public OpAsmDialectInterface { using OpAsmDialectInterface::OpAsmDialectInterface; diff --git a/mlir/lib/TableGen/Dialect.cpp b/mlir/lib/TableGen/Dialect.cpp index 0cdd9d6d856e1..59e7593c9425a 100644 --- a/mlir/lib/TableGen/Dialect.cpp +++ b/mlir/lib/TableGen/Dialect.cpp @@ -69,6 +69,10 @@ bool Dialect::hasConstantMaterializer() const { return def->getValueAsBit("hasConstantMaterializer"); } +bool Dialect::hasNonDefaultDestructor() const { + return def->getValueAsBit("hasNonDefaultDestructor"); +} + bool Dialect::hasOperationAttrVerify() const { return def->getValueAsBit("hasOperationAttrVerify"); } diff --git a/mlir/test/lib/Dialect/Test/CMakeLists.txt b/mlir/test/lib/Dialect/Test/CMakeLists.txt index a591ab5dd543a..5f37b09dda4c8 100644 --- a/mlir/test/lib/Dialect/Test/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Test/CMakeLists.txt @@ -28,6 +28,7 @@ set(LLVM_TARGET_DEFINITIONS TestOps.td) mlir_tablegen(TestOps.h.inc -gen-op-decls) mlir_tablegen(TestOps.cpp.inc -gen-op-defs) mlir_tablegen(TestOpsDialect.h.inc -gen-dialect-decls -dialect=test) +mlir_tablegen(TestOpsDialect.cpp.inc -gen-dialect-defs -dialect=test) mlir_tablegen(TestOpEnums.h.inc -gen-enum-decls) mlir_tablegen(TestOpEnums.cpp.inc -gen-enum-defs) mlir_tablegen(TestOpStructs.h.inc -gen-struct-attr-decls) diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index ca6f18019a2ac..991d8fa48093f 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -26,6 +26,8 @@ using namespace mlir; using namespace mlir::test; +#include "TestOpsDialect.cpp.inc" + void mlir::test::registerTestDialect(DialectRegistry ®istry) { registry.insert(); } diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 0f1775f7a78f1..c59b9faa47fdd 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -31,6 +31,7 @@ def Test_Dialect : Dialect { let hasRegionArgAttrVerify = 1; let hasRegionResultAttrVerify = 1; let hasOperationInterfaceFallback = 1; + let hasNonDefaultDestructor = 1; let dependentDialects = ["::mlir::DLTIDialect"]; let extraClassDeclaration = [{ @@ -47,8 +48,6 @@ def Test_Dialect : Dialect { getParseOperationHook(StringRef opName) const override; LogicalResult printOperation(Operation *op, OpAsmPrinter &printer) const override; - - ~TestDialect(); private: // Storage for a custom fallback interface. void *fallbackEffectOpInterfaces; diff --git a/mlir/tools/mlir-tblgen/DialectGen.cpp b/mlir/tools/mlir-tblgen/DialectGen.cpp index dbbc32b3b0f4e..2ebabc5dd171c 100644 --- a/mlir/tools/mlir-tblgen/DialectGen.cpp +++ b/mlir/tools/mlir-tblgen/DialectGen.cpp @@ -17,6 +17,7 @@ #include "mlir/TableGen/OpClass.h" #include "mlir/TableGen/Operator.h" #include "mlir/TableGen/Trait.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" @@ -54,6 +55,29 @@ filterForDialect(ArrayRef records, Dialect &dialect) { DialectFilterIterator(records.end(), records.end(), filterFn)}; } +static Optional +findSelectedDialect(ArrayRef dialectDefs) { + // Select the dialect to gen for. + if (dialectDefs.size() == 1 && selectedDialect.getNumOccurrences() == 0) { + return Dialect(dialectDefs.front()); + } + + if (selectedDialect.getNumOccurrences() == 0) { + llvm::errs() << "when more than 1 dialect is present, one must be selected " + "via '-dialect'\n"; + return llvm::None; + } + + auto dialectIt = llvm::find_if(dialectDefs, [](const llvm::Record *def) { + return Dialect(def).getName() == selectedDialect; + }); + if (dialectIt == dialectDefs.end()) { + llvm::errs() << "selected dialect with '-dialect' does not exist\n"; + return llvm::None; + } + return Dialect(*dialectIt); +} + //===----------------------------------------------------------------------===// // GEN: Dialect declarations //===----------------------------------------------------------------------===// @@ -72,9 +96,11 @@ class {0} : public ::mlir::Dialect { {2} initialize(); } + void initialize(); friend class ::mlir::MLIRContext; public: + ~{0}() override; static constexpr ::llvm::StringLiteral getDialectNamespace() { return ::llvm::StringLiteral("{1}"); } @@ -210,34 +236,52 @@ static bool emitDialectDecls(const llvm::RecordKeeper &recordKeeper, raw_ostream &os) { emitSourceFileHeader("Dialect Declarations", os); - auto defs = recordKeeper.getAllDerivedDefinitions("Dialect"); - if (defs.empty()) + auto dialectDefs = recordKeeper.getAllDerivedDefinitions("Dialect"); + if (dialectDefs.empty()) return false; - // Select the dialect to gen for. - const llvm::Record *dialectDef = nullptr; - if (defs.size() == 1 && selectedDialect.getNumOccurrences() == 0) { - dialectDef = defs.front(); - } else if (selectedDialect.getNumOccurrences() == 0) { - llvm::errs() << "when more than 1 dialect is present, one must be selected " - "via '-dialect'"; + Optional dialect = findSelectedDialect(dialectDefs); + if (!dialect) return true; - } else { - auto dialectIt = llvm::find_if(defs, [](const llvm::Record *def) { - return Dialect(def).getName() == selectedDialect; - }); - if (dialectIt == defs.end()) { - llvm::errs() << "selected dialect with '-dialect' does not exist"; - return true; - } - dialectDef = *dialectIt; - } - auto attrDefs = recordKeeper.getAllDerivedDefinitions("DialectAttr"); auto typeDefs = recordKeeper.getAllDerivedDefinitions("DialectType"); - Dialect dialect(dialectDef); - emitDialectDecl(dialect, filterForDialect(attrDefs, dialect), - filterForDialect(typeDefs, dialect), os); + emitDialectDecl(*dialect, filterForDialect(attrDefs, *dialect), + filterForDialect(typeDefs, *dialect), os); + return false; +} + +//===----------------------------------------------------------------------===// +// GEN: Dialect definitions +//===----------------------------------------------------------------------===// + +/// The code block to generate a default desturctor definition. +/// +/// {0}: The name of the dialect class. +static const char *const dialectDestructorStr = R"( +{0}::~{0}() = default; + +)"; + +static void emitDialectDef(Dialect &dialect, raw_ostream &os) { + // Emit all nested namespaces. + NamespaceEmitter nsEmitter(os, dialect); + + if (!dialect.hasNonDefaultDestructor()) + os << llvm::formatv(dialectDestructorStr, dialect.getCppClassName()); +} + +static bool emitDialectDefs(const llvm::RecordKeeper &recordKeeper, + raw_ostream &os) { + emitSourceFileHeader("Dialect Definitions", os); + + auto dialectDefs = recordKeeper.getAllDerivedDefinitions("Dialect"); + if (dialectDefs.empty()) + return false; + + Optional dialect = findSelectedDialect(dialectDefs); + if (!dialect) + return true; + emitDialectDef(*dialect, os); return false; } @@ -250,3 +294,9 @@ static mlir::GenRegistration [](const llvm::RecordKeeper &records, raw_ostream &os) { return emitDialectDecls(records, os); }); + +static mlir::GenRegistration + genDialectDefs("gen-dialect-defs", "Generate dialect definitions", + [](const llvm::RecordKeeper &records, raw_ostream &os) { + return emitDialectDefs(records, os); + }); diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index a9211f1216495..e1119061e12c3 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -129,6 +129,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/IR/BuiltinDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/IR/BuiltinDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/IR/BuiltinDialect.td", @@ -778,6 +782,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/Affine/IR/AffineOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/Affine/IR/AffineOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/Affine/IR/AffineOps.td", @@ -847,6 +855,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/EmitC/IR/EmitCDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/EmitC/IR/EmitCDialect.cpp.inc", + ), ( ["-gen-op-decls"], "include/mlir/Dialect/EmitC/IR/EmitC.h.inc", @@ -904,6 +916,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/Async/IR/AsyncOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/Async/IR/AsyncOpsDialect.cpp.inc", + ), ( ["-gen-typedef-decls"], "include/mlir/Dialect/Async/IR/AsyncOpsTypes.h.inc", @@ -975,6 +991,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/ArmNeon/ArmNeonDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=arm_neon", + ], + "include/mlir/Dialect/ArmNeon/ArmNeonDialect.cpp.inc", + ), ( ["-gen-op-decls"], "include/mlir/Dialect/ArmNeon/ArmNeon.h.inc", @@ -1093,6 +1116,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/ArmSVE/ArmSVEDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=arm_sve", + ], + "include/mlir/Dialect/ArmSVE/ArmSVEDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/ArmSVE/ArmSVE.td", @@ -1173,6 +1203,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/AMX/AMXDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=amx", + ], + "include/mlir/Dialect/AMX/AMXDialect.cpp.inc", + ), ( ["-gen-op-decls"], "include/mlir/Dialect/AMX/AMX.h.inc", @@ -1261,6 +1298,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/X86Vector/X86VectorDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=x86vector", + ], + "include/mlir/Dialect/X86Vector/X86VectorDialect.cpp.inc", + ), ( ["-gen-op-decls"], "include/mlir/Dialect/X86Vector/X86Vector.h.inc", @@ -1355,6 +1399,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/SCF/SCFOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/SCF/SCFOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/SCF/SCFOps.td", @@ -1446,6 +1494,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/SparseTensor/IR/SparseTensorOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=sparse_tensor", + ], + "include/mlir/Dialect/SparseTensor/IR/SparseTensorOpsDialect.cpp.inc", + ), ( ["-gen-op-decls"], "include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.h.inc", @@ -1586,6 +1641,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/StandardOps/IR/OpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/StandardOps/IR/OpsDialect.cpp.inc", + ), ( ["-gen-enum-decls"], "include/mlir/Dialect/StandardOps/IR/OpsEnums.h.inc", @@ -2060,6 +2119,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/Shape/IR/ShapeOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/Shape/IR/ShapeOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/Shape/IR/ShapeOps.td", @@ -2516,6 +2579,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/GPU/GPUOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=gpu", + ], + "include/mlir/Dialect/GPU/GPUOpsDialect.cpp.inc", + ), ( ["-gen-op-interface-decls"], "include/mlir/Dialect/GPU/GPUOpInterfaces.h.inc", @@ -2953,6 +3023,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/LLVMIR/LLVMOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/LLVMIR/LLVMOpsDialect.cpp.inc", + ), ( ["-gen-enum-decls"], "include/mlir/Dialect/LLVMIR/LLVMOpsEnums.h.inc", @@ -3037,6 +3111,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/LLVMIR/NVVMOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=nvvm", + ], + "include/mlir/Dialect/LLVMIR/NVVMOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/LLVMIR/NVVMOps.td", @@ -3105,6 +3186,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/LLVMIR/ROCDLOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=rocdl", + ], + "include/mlir/Dialect/LLVMIR/ROCDLOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/LLVMIR/ROCDLOps.td", @@ -3175,6 +3263,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/PDL/IR/PDLOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/PDL/IR/PDLOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/PDL/IR/PDLOps.td", @@ -3250,6 +3342,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/PDLInterp/IR/PDLInterpOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=pdl_interp", + ], + "include/mlir/Dialect/PDLInterp/IR/PDLInterpOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td", @@ -3284,6 +3383,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/SPIRV/IR/SPIRVOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/SPIRV/IR/SPIRVOpsDialect.cpp.inc", + ), ( ["-gen-op-doc"], "g3doc/Dialects/SPIRV/SPIRVOps.md", @@ -3645,6 +3748,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/Tensor/IR/TensorOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=tensor", + ], + "include/mlir/Dialect/Tensor/IR/TensorOpsDialect.cpp.inc", + ), ( ["-gen-op-decls"], "include/mlir/Dialect/Tensor/IR/TensorOps.h.inc", @@ -5152,6 +5262,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/OpenACC/OpenACCOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=acc", + ], + "include/mlir/Dialect/OpenACC/OpenACCOpsDialect.cpp.inc", + ), ( ["-gen-op-decls"], "include/mlir/Dialect/OpenACC/OpenACCOps.h.inc", @@ -5259,6 +5376,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/OpenMP/OpenMPOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=omp", + ], + "include/mlir/Dialect/OpenMP/OpenMPOpsDialect.cpp.inc", + ), ( ["-gen-op-doc"], "g3doc/Dialects/OpenMP/OpenMPOps.md", @@ -5390,6 +5514,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/Quant/QuantOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/Quant/QuantOpsDialect.cpp.inc", + ), ( ["-gen-op-doc"], "g3doc/Dialects/QuantOps/QuantOps.md", @@ -5488,6 +5616,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/Linalg/IR/LinalgOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=linalg", + ], + "include/mlir/Dialect/Linalg/IR/LinalgOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/Linalg/IR/LinalgOps.td", @@ -5826,6 +5961,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/Vector/VectorOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=vector", + ], + "include/mlir/Dialect/Vector/VectorOpsDialect.cpp.inc", + ), ( ["-gen-enum-decls"], "include/mlir/Dialect/Vector/VectorOpsEnums.h.inc", @@ -5973,6 +6115,10 @@ gentbl_cc_library( ["-gen-dialect-decls"], "include/mlir/Dialect/Tosa/IR/TosaOpsDialect.h.inc", ), + ( + ["-gen-dialect-defs"], + "include/mlir/Dialect/Tosa/IR/TosaOpsDialect.cpp.inc", + ), ( ["-gen-op-doc"], "g3doc/Dialects/Tosa/TosaOps.md", @@ -6147,6 +6293,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/Complex/IR/ComplexOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=complex", + ], + "include/mlir/Dialect/Complex/IR/ComplexOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/Complex/IR/ComplexBase.td", @@ -6295,6 +6448,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/Math/IR/MathOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=math", + ], + "include/mlir/Dialect/Math/IR/MathOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/Math/IR/MathBase.td", @@ -6418,6 +6578,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/MemRef/IR/MemRefOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=memref", + ], + "include/mlir/Dialect/MemRef/IR/MemRefOpsDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/MemRef/IR/MemRefBase.td", @@ -6535,6 +6702,13 @@ gentbl_cc_library( ], "include/mlir/Dialect/DLTI/DLTIDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=dlti", + ], + "include/mlir/Dialect/DLTI/DLTIDialect.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/DLTI/DLTIBase.td", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 4d20116b0d0da..b8f590a1b9841 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -71,6 +71,13 @@ gentbl_cc_library( ], "lib/Dialect/Test/TestOpsDialect.h.inc", ), + ( + [ + "-gen-dialect-defs", + "-dialect=test", + ], + "lib/Dialect/Test/TestOpsDialect.cpp.inc", + ), ( ["-gen-enum-decls"], "lib/Dialect/Test/TestOpEnums.h.inc", From ae6648cee01bc5c390b74dacd1851cf5df74599b Mon Sep 17 00:00:00 2001 From: Jianzhou Zhao Date: Tue, 29 Jun 2021 06:17:00 +0000 Subject: [PATCH 228/619] [dfsan] Expose dfsan_get_track_origins to get origin tracking status This allows application code checks if origin tracking is on before printing out traces. -dfsan-track-origins can be 0,1,2. The current code only distinguishes 1 and 2 in compile time, but not at runtime. Made runtime distinguish 1 and 2 too. Reviewed By: browneee Differential Revision: https://reviews.llvm.org/D105128 --- .../include/sanitizer/dfsan_interface.h | 6 +++++ compiler-rt/lib/dfsan/dfsan.cpp | 27 ++++++++++--------- compiler-rt/lib/dfsan/done_abilist.txt | 3 ++- .../test/dfsan/dfsan_get_track_origins.c | 13 +++++++++ .../Instrumentation/DataFlowSanitizer.cpp | 3 ++- .../DataFlowSanitizer/basic.ll | 6 +++-- 6 files changed, 41 insertions(+), 17 deletions(-) create mode 100644 compiler-rt/test/dfsan/dfsan_get_track_origins.c diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h index ea2283284e096..cd3b6d6e2b163 100644 --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -141,6 +141,12 @@ size_t dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size); /// Retrieves the very first origin associated with the data at the given /// address. dfsan_origin dfsan_get_init_origin(const void *addr); + +/// Returns the value of -dfsan-track-origins. +/// * 0: do not track origins. +/// * 1: track origins at memory store operations. +/// * 2: track origins at memory load and store operations. +int dfsan_get_track_origins(void); #ifdef __cplusplus } // extern "C" diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp index a029500c5fa13..6f9ae141d7ab6 100644 --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -55,10 +55,11 @@ SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 // Instrumented code may set this value in terms of -dfsan-track-origins. // * undefined or 0: do not track origins. // * 1: track origins at memory store operations. -// * 2: TODO: track origins at memory store operations and callsites. +// * 2: track origins at memory load and store operations. +// TODO: track callsites. extern "C" SANITIZER_WEAK_ATTRIBUTE const int __dfsan_track_origins; -int __dfsan_get_track_origins() { +extern "C" SANITIZER_INTERFACE_ATTRIBUTE int dfsan_get_track_origins() { return &__dfsan_track_origins ? __dfsan_track_origins : 0; } @@ -446,7 +447,7 @@ void dfsan_copy_memory(void *dst, const void *src, uptr size) { internal_memcpy(dst, src, size); internal_memcpy((void *)shadow_for(dst), (const void *)shadow_for(src), size * sizeof(dfsan_label)); - if (__dfsan_get_track_origins()) + if (dfsan_get_track_origins()) dfsan_mem_origin_transfer(dst, src, size); } @@ -514,12 +515,12 @@ void SetShadow(dfsan_label label, void *addr, uptr size, dfsan_origin origin) { if (0 != label) { const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); WriteShadowWithSize(label, beg_shadow_addr, size); - if (__dfsan_get_track_origins()) + if (dfsan_get_track_origins()) SetOrigin(addr, size, origin); return; } - if (__dfsan_get_track_origins()) + if (dfsan_get_track_origins()) ReleaseOrigins(addr, size); ReleaseOrClearShadows(addr, size); @@ -533,7 +534,7 @@ extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label( SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_label(dfsan_label label, void *addr, uptr size) { dfsan_origin init_origin = 0; - if (label && __dfsan_get_track_origins()) { + if (label && dfsan_get_track_origins()) { GET_CALLER_PC_BP; GET_STORE_STACK_TRACE_PC_BP(pc, bp); init_origin = ChainOrigin(0, &stack, true); @@ -546,7 +547,7 @@ void dfsan_add_label(dfsan_label label, void *addr, uptr size) { if (0 == label) return; - if (__dfsan_get_track_origins()) { + if (dfsan_get_track_origins()) { GET_CALLER_PC_BP; GET_STORE_STACK_TRACE_PC_BP(pc, bp); dfsan_origin init_origin = ChainOrigin(0, &stack, true); @@ -648,7 +649,7 @@ void PrintInvalidOriginWarning(dfsan_label label, const void *address) { bool PrintOriginTraceToStr(const void *addr, const char *description, InternalScopedString *out) { CHECK(out); - CHECK(__dfsan_get_track_origins()); + CHECK(dfsan_get_track_origins()); Decorator d; const dfsan_label label = *__dfsan::shadow_for(addr); @@ -687,7 +688,7 @@ bool PrintOriginTraceToStr(const void *addr, const char *description, extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_trace( const void *addr, const char *description) { - if (!__dfsan_get_track_origins()) { + if (!dfsan_get_track_origins()) { PrintNoOriginTrackingWarning(); return; } @@ -713,7 +714,7 @@ dfsan_sprint_origin_trace(const void *addr, const char *description, char *out_buf, size_t out_buf_size) { CHECK(out_buf); - if (!__dfsan_get_track_origins()) { + if (!dfsan_get_track_origins()) { PrintNoOriginTrackingWarning(); return 0; } @@ -742,7 +743,7 @@ dfsan_sprint_origin_trace(const void *addr, const char *description, extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin dfsan_get_init_origin(const void *addr) { - if (!__dfsan_get_track_origins()) + if (!dfsan_get_track_origins()) return 0; const dfsan_label label = *__dfsan::shadow_for(addr); @@ -829,7 +830,7 @@ void dfsan_clear_thread_local_state() { internal_memset(__dfsan_arg_tls, 0, sizeof(__dfsan_arg_tls)); internal_memset(__dfsan_retval_tls, 0, sizeof(__dfsan_retval_tls)); - if (__dfsan_get_track_origins()) { + if (dfsan_get_track_origins()) { internal_memset(__dfsan_arg_origin_tls, 0, sizeof(__dfsan_arg_origin_tls)); internal_memset(&__dfsan_retval_origin_tls, 0, sizeof(__dfsan_retval_origin_tls)); @@ -995,7 +996,7 @@ static void DFsanInit(int argc, char **argv, char **envp) { CheckASLR(); - InitShadow(__dfsan_get_track_origins()); + InitShadow(dfsan_get_track_origins()); initialize_interceptors(); diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt index ca26eab406b03..111c7d581e0ab 100644 --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -38,7 +38,8 @@ fun:dfsan_get_origin=uninstrumented fun:dfsan_get_origin=custom fun:dfsan_get_init_origin=uninstrumented fun:dfsan_get_init_origin=discard - +fun:dfsan_get_track_origins=uninstrumented +fun:dfsan_get_track_origins=discard ############################################################################### # glibc diff --git a/compiler-rt/test/dfsan/dfsan_get_track_origins.c b/compiler-rt/test/dfsan/dfsan_get_track_origins.c new file mode 100644 index 0000000000000..4013fed5bdaa2 --- /dev/null +++ b/compiler-rt/test/dfsan/dfsan_get_track_origins.c @@ -0,0 +1,13 @@ +// RUN: %clang_dfsan -DTRACK_ORIGINS=2 -mllvm -dfsan-track-origins=2 %s -o %t && %run %t +// RUN: %clang_dfsan -DTRACK_ORIGINS=1 -mllvm -dfsan-track-origins=1 %s -o %t && %run %t +// RUN: %clang_dfsan -DTRACK_ORIGINS=0 %s -o %t && %run %t +// +// REQUIRES: x86_64-target-arch + +#include + +#include + +int main(int argc, char *argv[]) { + assert(dfsan_get_track_origins() == TRACK_ORIGINS); +} diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 6588c88111fc7..63aa84e4a77cd 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1397,7 +1397,8 @@ bool DataFlowSanitizer::runImpl(Module &M) { Changed = true; return new GlobalVariable( M, OriginTy, true, GlobalValue::WeakODRLinkage, - ConstantInt::getSigned(OriginTy, shouldTrackOrigins()), + ConstantInt::getSigned(OriginTy, + shouldTrackOrigins() ? ClTrackOrigins : 0), "__dfsan_track_origins"); }); diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll index 40e63f6e63621..8f11036b691bb 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -dfsan -S | FileCheck %s --check-prefixes=CHECK,CHECK_NO_ORIGIN -DSHADOW_XOR_MASK=87960930222080 --dump-input-context=100 -; RUN: opt < %s -dfsan -dfsan-track-origins=1 -S | FileCheck %s --check-prefixes=CHECK,CHECK_ORIGIN -DSHADOW_XOR_MASK=87960930222080 --dump-input-context=100 +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -S | FileCheck %s --check-prefixes=CHECK,CHECK_ORIGIN1 -DSHADOW_XOR_MASK=87960930222080 --dump-input-context=100 +; RUN: opt < %s -dfsan -dfsan-track-origins=2 -S | FileCheck %s --check-prefixes=CHECK_ORIGIN2 -DSHADOW_XOR_MASK=87960930222080 --dump-input-context=100 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -8,7 +9,8 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: @__dfsan_arg_origin_tls = external thread_local(initialexec) global [200 x i32] ; CHECK: @__dfsan_retval_origin_tls = external thread_local(initialexec) global i32 ; CHECK_NO_ORIGIN: @__dfsan_track_origins = weak_odr constant i32 0 -; CHECK_ORIGIN: @__dfsan_track_origins = weak_odr constant i32 1 +; CHECK_ORIGIN1: @__dfsan_track_origins = weak_odr constant i32 1 +; CHECK_ORIGIN2: @__dfsan_track_origins = weak_odr constant i32 2 ; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]] ; CHECK: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]] From 9952d591ccc49cbcbf9c89d5191e6111c44703a6 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Tue, 29 Jun 2021 13:39:18 -0700 Subject: [PATCH 229/619] [lldb] Fix globals-bss.cpp which was broken in https://reviews.llvm.org/D105055 -S replaced -s, so the test needs to be updated to use the new option --- lldb/test/Shell/SymbolFile/NativePDB/globals-bss.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/Shell/SymbolFile/NativePDB/globals-bss.cpp b/lldb/test/Shell/SymbolFile/NativePDB/globals-bss.cpp index 3744a2837055d..9c65c26499cd1 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/globals-bss.cpp +++ b/lldb/test/Shell/SymbolFile/NativePDB/globals-bss.cpp @@ -4,7 +4,7 @@ // Make sure we can read variables from BSS // RUN: %clang_cl --target=x86_64-windows-msvc -Od -Z7 -c /Fo%t.obj -- %s // RUN: lld-link -debug:full -nodefaultlib -entry:main %t.obj -out:%t.exe -pdb:%t.pdb -// RUN: llvm-readobj -s %t.exe | FileCheck --check-prefix=BSS %s +// RUN: llvm-readobj -S %t.exe | FileCheck --check-prefix=BSS %s // RUN: env LLDB_USE_NATIVE_PDB_READER=1 %lldb -f %t.exe -s \ // RUN: %p/Inputs/globals-bss.lldbinit 2>&1 | FileCheck %s From 58af0d567d88eb5a7eec436886da066308d7a39e Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Tue, 29 Jun 2021 20:27:28 +0000 Subject: [PATCH 230/619] [libc] Allow target architecture independent configs Previously, we required entrypoints.txt for every target architecture supported by a target OS. With this change, we allow architecture independent config for a target OS. That is, if an architecture specific entrypoints.txt is missing, then a generic entrypoints.txt for that target OS will be used. Reviewed By: caitlyncano Differential Revision: https://reviews.llvm.org/D105147 --- libc/CMakeLists.txt | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 30ffa35111db9..5f891510fbb5a 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -77,8 +77,20 @@ include(CMakeParseArguments) include(LLVMLibCRules) include(LLVMLibCCheckCpuFeatures) -include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt") -include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/headers.txt") +if(EXISTS "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt") + set(entrypoint_file "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt") +elseif(EXISTS "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/entrypoints.txt") + set(entrypoint_file "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/entrypoints.txt") +else() + message(FATAL_ERROR "entrypoints.txt file for the target platform not found.") +endif() +include(${entrypoint_file}) + +if(EXISTS "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/headers.txt") + include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/headers.txt") +elseif(EXISTS "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/headers.txt") + include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}//headers.txt") +endif() set(TARGET_ENTRYPOINT_NAME_LIST "") foreach(entrypoint IN LISTS TARGET_LLVMLIBC_ENTRYPOINTS) From 2240b41ee4f30fe938975677a0a5a2c5c26d271b Mon Sep 17 00:00:00 2001 From: Dhruva Chakrabarti Date: Mon, 28 Jun 2021 17:52:01 -0700 Subject: [PATCH 231/619] [libomptarget] [amdgpu] Fix default setting of max flat workgroup size When max flat workgroup size is not specified, it is set to the default workgroup size. This prevents kernel launch with a workgroup size larger than the default. The fix is to ignore a size of 0 and treat it as unspecified. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D105073 --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 9a07d26546bbc..aaa68121db105 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1707,10 +1707,9 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, // Get ExecMode ExecModeVal = KernDescVal.Mode; DP("ExecModeVal %d\n", ExecModeVal); - if (KernDescVal.WG_Size == 0) { - KernDescVal.WG_Size = RTLDeviceInfoTy::Default_WG_Size; - DP("Setting KernDescVal.WG_Size to default %d\n", KernDescVal.WG_Size); - } + // If KernDescVal.WG_Size is 0, it is equivalent to not + // specified. Hence, max_flat_workgroup_size is filtered out in + // getLaunchVals WGSizeVal = KernDescVal.WG_Size; DP("WGSizeVal %d\n", WGSizeVal); check("Loading KernDesc computation property", err); @@ -1920,7 +1919,7 @@ void getLaunchVals(int &threadsPerGroup, int &num_groups, int ConstWGSize, } } // check flat_max_work_group_size attr here - if (threadsPerGroup > ConstWGSize) { + if (ConstWGSize > 0 && threadsPerGroup > ConstWGSize) { threadsPerGroup = ConstWGSize; DP("Reduced threadsPerGroup to flat-attr-group-size limit %d\n", threadsPerGroup); From c8a9c78e170e3b972041b301a50d0456afe83d10 Mon Sep 17 00:00:00 2001 From: Stella Stamenova Date: Tue, 29 Jun 2021 13:54:48 -0700 Subject: [PATCH 232/619] [lldb] Fix debug_loc.s which was broken after https://reviews.llvm.org/D103502 An empty location is now printed as --- lldb/test/Shell/SymbolFile/DWARF/x86/debug_loc.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug_loc.s b/lldb/test/Shell/SymbolFile/DWARF/x86/debug_loc.s index 3d78469f6306d..5b8d1bc328559 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug_loc.s +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug_loc.s @@ -14,11 +14,11 @@ # CHECK-LABEL: image lookup -v -a 0 # CHECK: Variable: {{.*}}, name = "x0", type = "int", location = DW_OP_reg5 RDI, -# CHECK: Variable: {{.*}}, name = "x1", type = "int", location = , +# CHECK: Variable: {{.*}}, name = "x1", type = "int", location = , # CHECK-LABEL: image lookup -v -a 2 # CHECK: Variable: {{.*}}, name = "x0", type = "int", location = DW_OP_reg0 RAX, -# CHECK: Variable: {{.*}}, name = "x1", type = "int", location = , +# CHECK: Variable: {{.*}}, name = "x1", type = "int", location = , # CHECK: Variable: {{.*}}, name = "x3", type = "int", location = DW_OP_reg1 RDX, # CHECK-LABEL: image dump symfile From 6d72845a8517eea6a69a493351fb4f03f3c10c21 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Tue, 29 Jun 2021 13:46:20 -0700 Subject: [PATCH 233/619] [llvm-objcopy][MachO] Code cleanup 1. Remove unnecessary templates. 2. Fix potentially unaligned reads inside constructSection. Test plan: make check-all Differential revision: https://reviews.llvm.org/D105089 --- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index d1f87bde12403..da47e4bc61f25 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -28,7 +28,7 @@ void MachOReader::readHeader(Object &O) const { } template -static Section constructSectionCommon(SectionType Sec, uint32_t Index) { +static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); Section S(SegName, SectName); @@ -46,14 +46,11 @@ static Section constructSectionCommon(SectionType Sec, uint32_t Index) { return S; } -template -Section constructSection(SectionType Sec, uint32_t Index); - -template <> Section constructSection(MachO::section Sec, uint32_t Index) { +Section constructSection(const MachO::section &Sec, uint32_t Index) { return constructSectionCommon(Sec, Index); } -template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) { +Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { Section S = constructSectionCommon(Sec, Index); S.Reserved3 = Sec.reserved3; return S; @@ -63,21 +60,20 @@ template Expected>> static extractSections( const object::MachOObjectFile::LoadCommandInfo &LoadCmd, const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { - auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; - const SectionType *Curr = - reinterpret_cast(LoadCmd.Ptr + sizeof(SegmentType)); std::vector> Sections; - for (; reinterpret_cast(Curr) < End; Curr++) { - if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { - SectionType Sec; - memcpy((void *)&Sec, Curr, sizeof(SectionType)); + for (auto Curr = reinterpret_cast(LoadCmd.Ptr + + sizeof(SegmentType)), + End = reinterpret_cast(LoadCmd.Ptr + + LoadCmd.C.cmdsize); + Curr < End; ++Curr) { + SectionType Sec; + memcpy((void *)&Sec, Curr, sizeof(SectionType)); + + if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) MachO::swapStruct(Sec); - Sections.push_back( - std::make_unique
(constructSection(Sec, NextSectionIndex))); - } else { - Sections.push_back( - std::make_unique
(constructSection(*Curr, NextSectionIndex))); - } + + Sections.push_back( + std::make_unique
(constructSection(Sec, NextSectionIndex))); Section &S = *Sections.back(); From ac2bec5addd2f96e976242bc8e0d93725fe3d2fd Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Tue, 29 Jun 2021 12:49:57 -0700 Subject: [PATCH 234/619] OpaquePtr: Support i32** with --force-opaque-pointers 4506f614cb6983a16d117cf77a968608e66d7a5c fixed parsing of textual IR to reject `ptr*`, but broke the auto-conversion of `i32**` to `ptr` with `--force-opaque-pointers`. Get that working again by refactoring LLParser::parseType to only send `ptr`-spelled pointers into the type suffix logic when it's the return of a function type. This also rejects `ptr addrspace(3) addrspace(2)`, which 1e6303e60ca5af4fbe7ca728572fd65666a98271 invadvertently started accepting. Just the default top-level error message for the double-addrspace since I had trouble thinking of something nice; probably it's fine as is (it doesn't look valid the way that `ptr*` does). Differential Revision: https://reviews.llvm.org/D105146 --- llvm/lib/AsmParser/LLParser.cpp | 15 +++++++++++++-- .../invalid-opaque-ptr-double-addrspace.ll | 4 ++++ llvm/test/Assembler/opaque-ptr.ll | 7 +++++++ llvm/test/Other/force-opaque-ptrs.ll | 12 ++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Assembler/invalid-opaque-ptr-double-addrspace.ll diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 30057866fb3ed..f9f73d2a4ffd4 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2586,11 +2586,24 @@ bool LLParser::parseType(Type *&Result, const Twine &Msg, bool AllowVoid) { } } + // Handle (explicit) opaque pointer types (not --force-opaque-pointers). + // + // Type ::= ptr ('addrspace' '(' uint32 ')')? if (Result->isOpaquePointerTy()) { unsigned AddrSpace; if (parseOptionalAddrSpace(AddrSpace)) return true; Result = PointerType::get(getContext(), AddrSpace); + + // Give a nice error for 'ptr*'. + if (Lex.getKind() == lltok::star) + return tokError("ptr* is invalid - use ptr instead"); + + // Fall through to parsing the type suffixes only if this 'ptr' is a + // function return. Otherwise, return success, implicitly rejecting other + // suffixes. + if (Lex.getKind() != lltok::lparen) + return false; } // parse the type suffixes. @@ -2608,8 +2621,6 @@ bool LLParser::parseType(Type *&Result, const Twine &Msg, bool AllowVoid) { return tokError("basic block pointers are invalid"); if (Result->isVoidTy()) return tokError("pointers to void are invalid - use i8* instead"); - if (Result->isOpaquePointerTy()) - return tokError("ptr* is invalid - use ptr instead"); if (!PointerType::isValidElementType(Result)) return tokError("pointer to this type is invalid"); Result = PointerType::getUnqual(Result); diff --git a/llvm/test/Assembler/invalid-opaque-ptr-double-addrspace.ll b/llvm/test/Assembler/invalid-opaque-ptr-double-addrspace.ll new file mode 100644 index 0000000000000..733c43d8f3c1c --- /dev/null +++ b/llvm/test/Assembler/invalid-opaque-ptr-double-addrspace.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +; CHECK: expected top-level entity +@g1 = external global ptr addrspace(3) addrspace(4) diff --git a/llvm/test/Assembler/opaque-ptr.ll b/llvm/test/Assembler/opaque-ptr.ll index 9167ca9e45866..5ee57fae18f06 100644 --- a/llvm/test/Assembler/opaque-ptr.ll +++ b/llvm/test/Assembler/opaque-ptr.ll @@ -4,6 +4,13 @@ ; CHECK: @global = external global ptr @global = external global ptr +; CHECK: @fptr1 = external global ptr ()* +; CHECK: @fptr2 = external global ptr () addrspace(1)* +; CHECK: @fptr3 = external global ptr () addrspace(1)* addrspace(2)* +@fptr1 = external global ptr ()* +@fptr2 = external global ptr () addrspace(1)* +@fptr3 = external global ptr () addrspace(1)* addrspace(2)* + ; CHECK: define ptr @f(ptr %a) { ; CHECK: %b = bitcast ptr %a to ptr ; CHECK: ret ptr %b diff --git a/llvm/test/Other/force-opaque-ptrs.ll b/llvm/test/Other/force-opaque-ptrs.ll index 442ef0ac9eb0a..779c5a158bf5a 100644 --- a/llvm/test/Other/force-opaque-ptrs.ll +++ b/llvm/test/Other/force-opaque-ptrs.ll @@ -32,3 +32,15 @@ define void @f(i32* %p) { @g.fwd = global i32 0 declare void @fn.fwd(i32) + +define void @f2(i32** %p) { +; CHECK-LABEL: define {{[^@]+}}@f2( +; CHECK-SAME: ptr {{%.*}}) { + unreachable +} + +define void @f3(i32 addrspace(1)* addrspace(2)* %p) { +; CHECK-LABEL: define {{[^@]+}}@f3( +; CHECK-SAME: ptr addrspace(2) {{%.*}}) { + unreachable +} From 8d21d5472501460933e78aead04cf59579025ba4 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 29 Jun 2021 14:22:26 -0700 Subject: [PATCH 235/619] [CodeGen] Stop creating fake FunctionDecls when generating IR for functions implicitly generated by the compiler These fake functions would cause clang to crash if the changes proposed in https://reviews.llvm.org/D98799 were made. --- clang/lib/CodeGen/CGBuiltin.cpp | 10 +------ clang/lib/CodeGen/CGNonTrivialStruct.cpp | 8 ++---- clang/lib/CodeGen/ItaniumCXXABI.cpp | 28 ++++++------------- clang/test/CodeGen/constructor-attribute.c | 10 +++++-- clang/test/CodeGen/debug-info-oslog.c | 2 +- .../nontrivial-c-struct-exception.m | 5 ++++ 6 files changed, 24 insertions(+), 39 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9579d706b2ae5..2f2d5e6c83d77 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1681,7 +1681,6 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( } QualType ReturnTy = Ctx.VoidTy; - QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {}); // The helper function has linkonce_odr linkage to enable the linker to merge // identical functions. To ensure the merging always happens, 'noinline' is @@ -1701,14 +1700,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( Fn->addFnAttr(llvm::Attribute::NoInline); auto NL = ApplyDebugLocation::CreateEmpty(*this); - IdentifierInfo *II = &Ctx.Idents.get(Name); - FunctionDecl *FD = FunctionDecl::Create( - Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, - FuncionTy, nullptr, SC_PrivateExtern, false, false); - // Avoid generating debug location info for the function. - FD->setImplicit(); - - StartFunction(FD, ReturnTy, Fn, FI, Args); + StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp index 9c6bbbc048b72..ad505fc5a0d4f 100644 --- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp @@ -472,14 +472,10 @@ template struct GenFuncBase { F->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, F, /*IsThunk=*/false); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F); - IdentifierInfo *II = &Ctx.Idents.get(FuncName); - FunctionDecl *FD = FunctionDecl::Create( - Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), - II, Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}), nullptr, - SC_PrivateExtern, false, false); CodeGenFunction NewCGF(CGM); setCGF(&NewCGF); - CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args); + CGF->StartFunction(GlobalDecl(), Ctx.VoidTy, F, FI, Args); + auto AL = ApplyDebugLocation::CreateArtificial(*CGF); std::array Addrs = getParamAddrs(std::make_index_sequence{}, Alignments, Args, CGF); asDerived().visitStructFields(QT, CharUnits::Zero(), Addrs); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 183ec7c2842f2..cf277ca347e43 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2611,15 +2611,6 @@ static llvm::Function *createGlobalInitOrCleanupFn(CodeGen::CodeGenModule &CGM, return GlobalInitOrCleanupFn; } -static FunctionDecl * -createGlobalInitOrCleanupFnDecl(CodeGen::CodeGenModule &CGM, StringRef FnName) { - ASTContext &Ctx = CGM.getContext(); - QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}); - return FunctionDecl::Create( - Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), - &Ctx.Idents.get(FnName), FunctionTy, nullptr, SC_Static, false, false); -} - void CodeGenModule::unregisterGlobalDtorsWithUnAtExit() { for (const auto &I : DtorsUsingAtExit) { int Priority = I.first; @@ -2629,13 +2620,11 @@ void CodeGenModule::unregisterGlobalDtorsWithUnAtExit() { llvm::Function *GlobalCleanupFn = createGlobalInitOrCleanupFn(*this, GlobalCleanupFnName); - FunctionDecl *GlobalCleanupFD = - createGlobalInitOrCleanupFnDecl(*this, GlobalCleanupFnName); - CodeGenFunction CGF(*this); - CGF.StartFunction(GlobalDecl(GlobalCleanupFD), getContext().VoidTy, - GlobalCleanupFn, getTypes().arrangeNullaryFunction(), - FunctionArgList(), SourceLocation(), SourceLocation()); + CGF.StartFunction(GlobalDecl(), getContext().VoidTy, GlobalCleanupFn, + getTypes().arrangeNullaryFunction(), FunctionArgList(), + SourceLocation(), SourceLocation()); + auto AL = ApplyDebugLocation::CreateArtificial(CGF); // Get the destructor function type, void(*)(void). llvm::FunctionType *dtorFuncTy = llvm::FunctionType::get(CGF.VoidTy, false); @@ -2688,13 +2677,12 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() { std::string("__GLOBAL_init_") + llvm::to_string(Priority); llvm::Function *GlobalInitFn = createGlobalInitOrCleanupFn(*this, GlobalInitFnName); - FunctionDecl *GlobalInitFD = - createGlobalInitOrCleanupFnDecl(*this, GlobalInitFnName); CodeGenFunction CGF(*this); - CGF.StartFunction(GlobalDecl(GlobalInitFD), getContext().VoidTy, - GlobalInitFn, getTypes().arrangeNullaryFunction(), - FunctionArgList(), SourceLocation(), SourceLocation()); + CGF.StartFunction(GlobalDecl(), getContext().VoidTy, GlobalInitFn, + getTypes().arrangeNullaryFunction(), FunctionArgList(), + SourceLocation(), SourceLocation()); + auto AL = ApplyDebugLocation::CreateArtificial(CGF); // Since constructor functions are run in non-descending order of their // priorities, destructors are registered in non-descending order of their diff --git a/clang/test/CodeGen/constructor-attribute.c b/clang/test/CodeGen/constructor-attribute.c index e58143a2165a1..f7c9c202f4978 100644 --- a/clang/test/CodeGen/constructor-attribute.c +++ b/clang/test/CodeGen/constructor-attribute.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=WITHOUTATEXIT %s -// RUN: %clang_cc1 -triple x86_64-apple-darwin -fregister-global-dtors-with-atexit -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CXAATEXIT --check-prefix=WITHATEXIT %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -fregister-global-dtors-with-atexit -debug-info-kind=line-tables-only -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CXAATEXIT --check-prefix=WITHATEXIT %s // RUN: %clang_cc1 -triple x86_64-apple-darwin -fno-use-cxa-atexit -fregister-global-dtors-with-atexit -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=ATEXIT --check-prefix=WITHATEXIT %s // WITHOUTATEXIT: global_ctors{{.*}}@A{{.*}}@C @@ -19,8 +19,9 @@ // CHECK: define internal i32 @foo() // WITHOUTATEXIT-NOT: define -// WITHATEXIT: define internal void @__GLOBAL_init_123(){{.*}}section "__TEXT,__StaticInit,regular,pure_instructions" -// CXAATEXIT: call i32 @__cxa_atexit(void (i8*)* bitcast (void ()* @E to void (i8*)*), i8* null, i8* @__dso_handle) +// CXAATEXIT: define internal void @__GLOBAL_init_123(){{.*}}section "__TEXT,__StaticInit,regular,pure_instructions" !dbg ![[GLOBAL_INIT_SP:.*]] { +// ATEXIT: define internal void @__GLOBAL_init_123(){{.*}}section "__TEXT,__StaticInit,regular,pure_instructions" +// CXAATEXIT: call i32 @__cxa_atexit(void (i8*)* bitcast (void ()* @E to void (i8*)*), i8* null, i8* @__dso_handle) {{.*}}, !dbg ![[GLOBAL_INIT_LOC:.*]] // CXAATEXIT: call i32 @__cxa_atexit(void (i8*)* bitcast (void ()* @G to void (i8*)*), i8* null, i8* @__dso_handle) // ATEXIT: call i32 @atexit(void ()* @E) // ATEXIT: call i32 @atexit(void ()* @G) @@ -82,3 +83,6 @@ static void D() { int main() { return 0; } + +// CXAATEXIT: ![[GLOBAL_INIT_SP]] = distinct !DISubprogram(linkageName: "__GLOBAL_init_123", +// CXAATEXIT: ![[GLOBAL_INIT_LOC]] = !DILocation(line: 0, scope: ![[GLOBAL_INIT_SP]]) diff --git a/clang/test/CodeGen/debug-info-oslog.c b/clang/test/CodeGen/debug-info-oslog.c index 11a1cd64dedff..49c361ae9d5aa 100644 --- a/clang/test/CodeGen/debug-info-oslog.c +++ b/clang/test/CodeGen/debug-info-oslog.c @@ -10,6 +10,6 @@ void test_builtin_os_log(void *buf, int i, const char *data) { // This helper is going to be uniqued, so it should not have a line // number between file and type. -// CHECK: distinct !DISubprogram(name: "__os_log_helper_1_0_1_4_0", +// CHECK: distinct !DISubprogram(linkageName: "__os_log_helper_1_0_1_4_0", // CHECK-SAME: file: !{{.*}}, type // CHECK-SAME: flags: DIFlagArtificial diff --git a/clang/test/CodeGenObjC/nontrivial-c-struct-exception.m b/clang/test/CodeGenObjC/nontrivial-c-struct-exception.m index 10fc3a3db6b61..7fd0f23943c09 100644 --- a/clang/test/CodeGenObjC/nontrivial-c-struct-exception.m +++ b/clang/test/CodeGenObjC/nontrivial-c-struct-exception.m @@ -54,6 +54,9 @@ void testStrongException(void) { // CHECK: resume +// CHECK: define{{.*}} void @__destructor_8_w8({{.*}} !dbg ![[DTOR_SP:.*]] { +// CHECK: load i8**, i8*** {{.*}}, !dbg ![[DTOR_LOC:.*]] + Weak genWeak(void); void calleeWeak(Weak, Weak); @@ -63,3 +66,5 @@ void testWeakException(void) { // CHECK-DAG: [[ARTIFICIAL_LOC_1]] = !DILocation(line: 0 // CHECK-DAG: [[ARTIFICIAL_LOC_2]] = !DILocation(line: 0 +// CHECK: ![[DTOR_SP]] = distinct !DISubprogram(linkageName: "__destructor_8_w8", +// CHECK: ![[DTOR_LOC]] = !DILocation(line: 0, scope: ![[DTOR_SP]]) From 49fa6abf7472022d7bf1fb05df3033a7bd1ff0de Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 8 Jun 2021 18:23:34 -0400 Subject: [PATCH 236/619] Revert "GlobalISel: Use MMO helper for getting the size in bits" This reverts commit dc98adfb448bdb845605185bb173e99614a17790. This should still be done, but this is currently causing some commit ordering issues. --- llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 0e004a7fc1178..c757cb65947e6 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -352,7 +352,8 @@ LegalizerInfo::getAction(const MachineInstr &MI, SmallVector MemDescrs; for (const auto &MMO : MI.memoperands()) - MemDescrs.push_back({MMO->getSizeInBits(), 8 * MMO->getAlign().value(), + MemDescrs.push_back({8 * MMO->getSize() /* in bits */, + 8 * MMO->getAlign().value(), MMO->getSuccessOrdering()}); return getAction({MI.getOpcode(), Types, MemDescrs}); From 990278d026d680942c859be70836ad34a9a716f7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 May 2021 22:06:14 -0400 Subject: [PATCH 237/619] CodeGen: Store LLT instead of uint64_t in MachineMemOperand GlobalISel is relying on regular MachineMemOperands to track all of the memory properties of accesses. Just the raw byte size is insufficent to disambiguate all situations. For example, if we need to split an unaligned extending load, we need to know the number of bits in the original source value and can't infer it from the result type. This is also a problem for extending vector loads. This does decrease the maximum representable size from the full uint64_t bytes to a maximum of 16-bits. No in tree testcases hit this, other than places using UINT64_MAX for unknown sizes. This may be an issue for G_MEMCPY and co., although they can just use unknown size for large static sizes. This also has potential for backend abuse by relying on the type when it really shouldn't be relevant after selection. This does not include the necessary MIR printer/parser changes to represent this. --- llvm/include/llvm/CodeGen/MachineFunction.h | 16 +- llvm/include/llvm/CodeGen/MachineMemOperand.h | 29 +++- llvm/include/llvm/Support/LowLevelTypeImpl.h | 1 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 38 +++-- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 12 +- llvm/lib/CodeGen/LowLevelType.cpp | 4 +- llvm/lib/CodeGen/MachineFunction.cpp | 22 ++- llvm/lib/CodeGen/MachineOperand.cpp | 19 ++- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 9 +- llvm/lib/Target/ARM/ARMCallLowering.cpp | 3 +- .../test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll | 152 +----------------- .../test/CodeGen/AMDGPU/GlobalISel/udivrem.ll | 122 +------------- 12 files changed, 123 insertions(+), 304 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 01e3bc03255c1..e9ce813428dc4 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -865,12 +865,23 @@ class MachineFunction { AtomicOrdering Ordering = AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic); + MachineMemOperand *getMachineMemOperand( + MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, + Align base_alignment, const AAMDNodes &AAInfo = AAMDNodes(), + const MDNode *Ranges = nullptr, SyncScope::ID SSID = SyncScope::System, + AtomicOrdering Ordering = AtomicOrdering::NotAtomic, + AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic); + /// getMachineMemOperand - Allocate a new MachineMemOperand by copying /// an existing one, adjusting by an offset and using the given size. /// MachineMemOperands are owned by the MachineFunction and need not be /// explicitly deallocated. MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, - int64_t Offset, uint64_t Size); + int64_t Offset, LLT Ty); + MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, + int64_t Offset, uint64_t Size) { + return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size)); + } /// getMachineMemOperand - Allocate a new MachineMemOperand by copying /// an existing one, replacing only the MachinePointerInfo and size. @@ -879,6 +890,9 @@ class MachineFunction { MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, uint64_t Size); + MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, + const MachinePointerInfo &PtrInfo, + LLT Ty); /// Allocate a new MachineMemOperand by copying an existing one, /// replacing only AliasAnalysis information. MachineMemOperands are owned diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h index 0bef1c1035d79..8c1fb1d7785ad 100644 --- a/llvm/include/llvm/CodeGen/MachineMemOperand.h +++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h @@ -22,6 +22,7 @@ #include "llvm/IR/Value.h" // PointerLikeTypeTraits #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/LowLevelTypeImpl.h" namespace llvm { @@ -168,7 +169,11 @@ class MachineMemOperand { }; MachinePointerInfo PtrInfo; - uint64_t Size; + + /// Track the memory type of the access. An access size which is unknown or + /// too large to be represented by LLT should use the invalid LLT. + LLT MemoryType; + Flags FlagVals; Align BaseAlign; MachineAtomicInfo AtomicInfo; @@ -187,6 +192,12 @@ class MachineMemOperand { SyncScope::ID SSID = SyncScope::System, AtomicOrdering Ordering = AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic); + MachineMemOperand(MachinePointerInfo PtrInfo, Flags flags, LLT type, Align a, + const AAMDNodes &AAInfo = AAMDNodes(), + const MDNode *Ranges = nullptr, + SyncScope::ID SSID = SyncScope::System, + AtomicOrdering Ordering = AtomicOrdering::NotAtomic, + AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic); const MachinePointerInfo &getPointerInfo() const { return PtrInfo; } @@ -217,11 +228,23 @@ class MachineMemOperand { unsigned getAddrSpace() const { return PtrInfo.getAddrSpace(); } + /// Return the memory type of the memory reference. This should only be relied + /// on for GlobalISel G_* operation legalization. + LLT getMemoryType() const { return MemoryType; } + /// Return the size in bytes of the memory reference. - uint64_t getSize() const { return Size; } + uint64_t getSize() const { + return MemoryType.isValid() ? MemoryType.getSizeInBytes() : ~UINT64_C(0); + } /// Return the size in bits of the memory reference. - uint64_t getSizeInBits() const { return Size * 8; } + uint64_t getSizeInBits() const { + return MemoryType.isValid() ? MemoryType.getSizeInBits() : ~UINT64_C(0); + } + + LLT getType() const { + return MemoryType; + } /// Return the minimum known alignment in bytes of the actual memory /// reference. diff --git a/llvm/include/llvm/Support/LowLevelTypeImpl.h b/llvm/include/llvm/Support/LowLevelTypeImpl.h index 379e37101e9a4..3d631dc6d5307 100644 --- a/llvm/include/llvm/Support/LowLevelTypeImpl.h +++ b/llvm/include/llvm/Support/LowLevelTypeImpl.h @@ -366,6 +366,7 @@ class LLT { } } +public: uint64_t getUniqueRAWLLTData() const { return ((uint64_t)RawData) << 2 | ((uint64_t)IsPointer) << 1 | ((uint64_t)IsVector); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 42c63849910e4..a3a8f10c011df 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1300,7 +1300,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { AAMDNodes AAMetadata; LI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( - Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(), + Ptr, Flags, MRI->getType(Regs[i]), commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); @@ -1342,7 +1342,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { AAMDNodes AAMetadata; SI.getAAMetadata(AAMetadata); auto MMO = MF->getMachineMemOperand( - Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(), + Ptr, Flags, MRI->getType(Vals[i]), commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr, SI.getSyncScopeID(), SI.getOrdering()); MIRBuilder.buildStore(Vals[i], Addr, *MMO); @@ -1627,12 +1627,14 @@ void IRTranslator::getStackGuard(Register DstReg, if (!Global) return; + unsigned AddrSpace = Global->getType()->getPointerAddressSpace(); + LLT PtrTy = LLT::pointer(AddrSpace, DL->getPointerSizeInBits(AddrSpace)); + MachinePointerInfo MPInfo(Global); auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; - MachineMemOperand *MemRef = - MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment(0)); + MachineMemOperand *MemRef = MF->getMachineMemOperand( + MPInfo, Flags, PtrTy, DL->getPointerABIAlignment(AddrSpace)); MIB.setMemRefs({MemRef}); } @@ -2067,7 +2069,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, - PtrTy.getSizeInBits() / 8, Align(8))); + PtrTy, Align(8))); return true; } case Intrinsic::stacksave: { @@ -2364,10 +2366,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { Align Alignment = Info.align.getValueOr( DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); - - uint64_t Size = Info.memVT.getStoreSize(); + LLT MemTy = Info.memVT.isSimple() + ? getLLTForMVT(Info.memVT.getSimpleVT()) + : LLT::scalar(Info.memVT.getStoreSizeInBits()); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Alignment)); + Info.flags, MemTy, Alignment)); } return true; @@ -2733,9 +2736,6 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, auto &TLI = *MF->getSubtarget().getTargetLowering(); auto Flags = TLI.getAtomicMemOperandFlags(I, *DL); - Type *ResType = I.getType(); - Type *ValType = ResType->Type::getStructElementType(0); - auto Res = getOrCreateVRegs(I); Register OldValRes = Res[0]; Register SuccessRes = Res[1]; @@ -2749,9 +2749,9 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, MIRBuilder.buildAtomicCmpXchgWithSuccess( OldValRes, SuccessRes, Addr, Cmp, NewVal, *MF->getMachineMemOperand( - MachinePointerInfo(I.getPointerOperand()), Flags, - DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr, - I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering())); + MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp), + getMemOpAlign(I), AAMetadata, nullptr, I.getSyncScopeID(), + I.getSuccessOrdering(), I.getFailureOrdering())); return true; } @@ -2761,8 +2761,6 @@ bool IRTranslator::translateAtomicRMW(const User &U, auto &TLI = *MF->getSubtarget().getTargetLowering(); auto Flags = TLI.getAtomicMemOperandFlags(I, *DL); - Type *ResType = I.getType(); - Register Res = getOrCreateVReg(I); Register Addr = getOrCreateVReg(*I.getPointerOperand()); Register Val = getOrCreateVReg(*I.getValOperand()); @@ -2818,9 +2816,9 @@ bool IRTranslator::translateAtomicRMW(const User &U, MIRBuilder.buildAtomicRMW( Opcode, Res, Addr, Val, *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - Flags, DL->getTypeStoreSize(ResType), - getMemOpAlign(I), AAMetadata, nullptr, - I.getSyncScopeID(), I.getOrdering())); + Flags, MRI->getType(Val), getMemOpAlign(I), + AAMetadata, nullptr, I.getSyncScopeID(), + I.getOrdering())); return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 3352d1989d157..54ac62793b08d 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -347,10 +347,9 @@ MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr, MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); - uint64_t Size = MemoryLocation::getSizeOrUnknown( - TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes())); + LLT Ty = Dst.getLLTTy(*getMRI()); MachineMemOperand *MMO = - getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + getMF().getMachineMemOperand(PtrInfo, MMOFlags, Ty, Alignment, AAInfo); return buildLoad(Dst, Addr, *MMO); } @@ -373,7 +372,7 @@ MachineInstrBuilder MachineIRBuilder::buildLoadFromOffset( MachineMemOperand &BaseMMO, int64_t Offset) { LLT LoadTy = Dst.getLLTTy(*getMRI()); MachineMemOperand *OffsetMMO = - getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy.getSizeInBytes()); + getMF().getMachineMemOperand(&BaseMMO, Offset, LoadTy); if (Offset == 0) // This may be a size or type changing load. return buildLoad(Dst, BasePtr, *OffsetMMO); @@ -406,10 +405,9 @@ MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr, MMOFlags |= MachineMemOperand::MOStore; assert((MMOFlags & MachineMemOperand::MOLoad) == 0); - uint64_t Size = MemoryLocation::getSizeOrUnknown( - TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes())); + LLT Ty = Val.getLLTTy(*getMRI()); MachineMemOperand *MMO = - getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + getMF().getMachineMemOperand(PtrInfo, MMOFlags, Ty, Alignment, AAInfo); return buildStore(Val, Addr, *MMO); } diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp index 1cfd4fee8e5bb..62e9c6b629d3b 100644 --- a/llvm/lib/CodeGen/LowLevelType.cpp +++ b/llvm/lib/CodeGen/LowLevelType.cpp @@ -56,8 +56,8 @@ LLT llvm::getLLTForMVT(MVT Ty) { if (!Ty.isVector()) return LLT::scalar(Ty.getSizeInBits()); - return LLT::vector(Ty.getVectorElementCount(), - Ty.getVectorElementType().getSizeInBits()); + return LLT::scalarOrVector(Ty.getVectorElementCount(), + Ty.getVectorElementType().getSizeInBits()); } const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) { diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index dff38f09022e5..39feb92a9752f 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -437,6 +437,16 @@ MachineMemOperand *MachineFunction::getMachineMemOperand( SSID, Ordering, FailureOrdering); } +MachineMemOperand *MachineFunction::getMachineMemOperand( + MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, + Align base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, + SyncScope::ID SSID, AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) { + return new (Allocator) + MachineMemOperand(PtrInfo, f, MemTy, base_alignment, AAInfo, Ranges, SSID, + Ordering, FailureOrdering); +} + MachineMemOperand *MachineFunction::getMachineMemOperand( const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, uint64_t Size) { return new (Allocator) @@ -445,9 +455,17 @@ MachineMemOperand *MachineFunction::getMachineMemOperand( MMO->getSuccessOrdering(), MMO->getFailureOrdering()); } +MachineMemOperand *MachineFunction::getMachineMemOperand( + const MachineMemOperand *MMO, const MachinePointerInfo &PtrInfo, LLT Ty) { + return new (Allocator) + MachineMemOperand(PtrInfo, MMO->getFlags(), Ty, MMO->getBaseAlign(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), + MMO->getSuccessOrdering(), MMO->getFailureOrdering()); +} + MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, - int64_t Offset, uint64_t Size) { + int64_t Offset, LLT Ty) { const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); // If there is no pointer value, the offset isn't tracked so we need to adjust @@ -459,7 +477,7 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, // Do not preserve ranges, since we don't necessarily know what the high bits // are anymore. return new (Allocator) MachineMemOperand( - PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size, Alignment, + PtrInfo.getWithOffset(Offset), MMO->getFlags(), Ty, Alignment, MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(), MMO->getSuccessOrdering(), MMO->getFailureOrdering()); } diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 79f59cf67e5e2..b020c42375769 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1023,13 +1023,12 @@ MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) { } MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, - uint64_t s, Align a, - const AAMDNodes &AAInfo, + LLT type, Align a, const AAMDNodes &AAInfo, const MDNode *Ranges, SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) - : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlign(a), AAInfo(AAInfo), - Ranges(Ranges) { + : PtrInfo(ptrinfo), MemoryType(type), FlagVals(f), BaseAlign(a), + AAInfo(AAInfo), Ranges(Ranges) { assert((PtrInfo.V.isNull() || PtrInfo.V.is() || isa(PtrInfo.V.get()->getType())) && "invalid pointer value"); @@ -1043,11 +1042,21 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, assert(getFailureOrdering() == FailureOrdering && "Value truncated"); } +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, + uint64_t s, Align a, + const AAMDNodes &AAInfo, + const MDNode *Ranges, SyncScope::ID SSID, + AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) + : MachineMemOperand(ptrinfo, f, + s == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * s), a, + AAInfo, Ranges, SSID, Ordering, FailureOrdering) {} + /// Profile - Gather unique data for the object. /// void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(getOffset()); - ID.AddInteger(Size); + ID.AddInteger(getMemoryType().getUniqueRAWLLTData()); ID.AddPointer(getOpaqueValue()); ID.AddInteger(getFlags()); ID.AddInteger(getBaseAlign().value()); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 0f778284ade0b..80f7cdded3e89 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1007,7 +1007,7 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, auto List = MIRBuilder.buildLoad( PtrTy, ListPtr, *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, - PtrSize, PtrAlign)); + PtrTy, PtrAlign)); MachineInstrBuilder DstPtr; if (Alignment > PtrAlign) { @@ -1019,11 +1019,12 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, } else DstPtr = List; - uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8; + LLT ValTy = MRI.getType(Dst); + uint64_t ValSize = ValTy.getSizeInBits() / 8; MIRBuilder.buildLoad( Dst, DstPtr, *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, - ValSize, std::max(Alignment, PtrAlign))); + ValTy, std::max(Alignment, PtrAlign))); auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign)); @@ -1032,7 +1033,7 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, MIRBuilder.buildStore(NewList, ListPtr, *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, - PtrSize, PtrAlign)); + PtrTy, PtrAlign)); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index 78fcbbb632c9d..ee600477ed9fc 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -128,8 +128,7 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler { Register ExtReg = extendRegister(ValVReg, VA); auto MMO = MIRBuilder.getMF().getMachineMemOperand( - MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(), - Align(1)); + MPO, MachineMemOperand::MOStore, LLT(VA.getLocVT()), Align(1)); MIRBuilder.buildStore(ExtReg, Addr, *MMO); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll index 984b6a21e3adc..ce1066abc377c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -3330,147 +3330,11 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ret void } -define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { -; GFX8-LABEL: sdivrem_i27: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_i32 s1, s1, 0x1b0000 -; GFX8-NEXT: s_ashr_i32 s6, s1, 31 -; GFX8-NEXT: s_add_i32 s1, s1, s6 -; GFX8-NEXT: s_xor_b32 s7, s1, s6 -; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s7 -; GFX8-NEXT: s_sub_i32 s1, 0, s7 -; GFX8-NEXT: s_bfe_i32 s0, s0, 0x1b0000 -; GFX8-NEXT: s_ashr_i32 s8, s0, 31 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: s_add_i32 s0, s0, s8 -; GFX8-NEXT: s_xor_b32 s9, s0, s8 -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX8-NEXT: s_xor_b32 s4, s8, s6 -; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 -; GFX8-NEXT: v_mul_hi_u32 v2, s9, v0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_mul_lo_u32 v3, v2, s7 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s9, v3 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 -; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 -; GFX8-NEXT: v_xor_b32_e32 v3, s8, v3 -; GFX8-NEXT: flat_store_dword v[0:1], v2 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s8, v3 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: flat_store_dword v[0:1], v3 -; GFX8-NEXT: s_endpgm -; -; GFX9-LABEL: sdivrem_i27: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_bfe_i32 s1, s1, 0x1b0000 -; GFX9-NEXT: s_ashr_i32 s6, s1, 31 -; GFX9-NEXT: s_add_i32 s1, s1, s6 -; GFX9-NEXT: s_xor_b32 s7, s1, s6 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 -; GFX9-NEXT: s_sub_i32 s1, 0, s7 -; GFX9-NEXT: s_bfe_i32 s0, s0, 0x1b0000 -; GFX9-NEXT: s_ashr_i32 s8, s0, 31 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: s_add_i32 s0, s0, s8 -; GFX9-NEXT: s_xor_b32 s9, s0, s8 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX9-NEXT: s_xor_b32 s4, s8, s6 -; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 -; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 -; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 -; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 -; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 -; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_store_dword v2, v0, s[0:1] -; GFX9-NEXT: global_store_dword v2, v1, s[2:3] -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: sdivrem_i27: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_bfe_i32 s1, s1, 0x1b0000 -; GFX10-NEXT: s_bfe_i32 s0, s0, 0x1b0000 -; GFX10-NEXT: s_ashr_i32 s6, s1, 31 -; GFX10-NEXT: s_ashr_i32 s8, s0, 31 -; GFX10-NEXT: s_add_i32 s1, s1, s6 -; GFX10-NEXT: s_add_i32 s0, s0, s8 -; GFX10-NEXT: s_xor_b32 s7, s1, s6 -; GFX10-NEXT: s_xor_b32 s0, s0, s8 -; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 -; GFX10-NEXT: s_sub_i32 s1, 0, s7 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 -; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 -; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, v0, s7 -; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX10-NEXT: s_xor_b32 s4, s8, s6 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, s8, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s8, v1 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_store_dword v2, v0, s[0:1] -; GFX10-NEXT: global_store_dword v2, v1, s[2:3] -; GFX10-NEXT: s_endpgm - %div = sdiv i27 %x, %y - store i27 %div, i27 addrspace(1)* %out0 - %rem = srem i27 %x, %y - store i27 %rem, i27 addrspace(1)* %out1 - ret void -} +; FIXME: Reenable test +; define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { +; %div = sdiv i27 %x, %y +; store i27 %div, i27 addrspace(1)* %out0 +; %rem = srem i27 %x, %y +; store i27 %rem, i27 addrspace(1)* %out1 +; ret void +; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll index 89c4569aaa291..0cb228f6c651d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -2634,117 +2634,11 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ret void } -define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { -; GFX8-LABEL: udivrem_i27: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 -; GFX8-NEXT: s_mov_b32 s2, 0x7ffffff -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s1, s2 -; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s6 -; GFX8-NEXT: s_sub_i32 s1, 0, s6 -; GFX8-NEXT: s_and_b32 s7, s0, s2 -; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 -; GFX8-NEXT: v_mul_hi_u32 v2, s7, v0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, s1 -; GFX8-NEXT: v_mul_lo_u32 v3, v2, s6 -; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 -; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s7, v3 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 -; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 -; GFX8-NEXT: flat_store_dword v[0:1], v2 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: flat_store_dword v[0:1], v3 -; GFX8-NEXT: s_endpgm -; -; GFX9-LABEL: udivrem_i27: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 -; GFX9-NEXT: s_mov_b32 s2, 0x7ffffff -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_and_b32 s6, s1, s2 -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s6 -; GFX9-NEXT: s_sub_i32 s1, 0, s6 -; GFX9-NEXT: s_and_b32 s7, s0, s2 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 -; GFX9-NEXT: v_mul_hi_u32 v0, s7, v0 -; GFX9-NEXT: v_mul_lo_u32 v1, v0, s6 -; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX9-NEXT: v_sub_u32_e32 v1, s7, v1 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_subrev_u32_e32 v3, s6, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 -; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX9-NEXT: v_subrev_u32_e32 v3, s6, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_store_dword v2, v0, s[0:1] -; GFX9-NEXT: global_store_dword v2, v1, s[2:3] -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: udivrem_i27: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 -; GFX10-NEXT: s_mov_b32 s2, 0x7ffffff -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_and_b32 s6, s1, s2 -; GFX10-NEXT: s_and_b32 s0, s0, s2 -; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s6 -; GFX10-NEXT: s_sub_i32 s1, 0, s6 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 -; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 -; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, v0, s6 -; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 -; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s6, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s6, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_store_dword v2, v0, s[0:1] -; GFX10-NEXT: global_store_dword v2, v1, s[2:3] -; GFX10-NEXT: s_endpgm - %div = udiv i27 %x, %y - store i27 %div, i27 addrspace(1)* %out0 - %rem = urem i27 %x, %y - store i27 %rem, i27 addrspace(1)* %out1 - ret void -} +; FIXME: Reenable test +; define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { +; %div = udiv i27 %x, %y +; store i27 %div, i27 addrspace(1)* %out0 +; %rem = urem i27 %x, %y +; store i27 %rem, i27 addrspace(1)* %out1 +; ret void +; } From b810600a93a1f426389ffe93c609af9b659a8430 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 29 Jun 2021 23:43:58 +0200 Subject: [PATCH 238/619] [Test] Regenerate test checks (NFC) Make these follow the update_test_checks.py format. --- llvm/test/Other/force-opaque-ptrs.ll | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/test/Other/force-opaque-ptrs.ll b/llvm/test/Other/force-opaque-ptrs.ll index 779c5a158bf5a..1a0738dfcd637 100644 --- a/llvm/test/Other/force-opaque-ptrs.ll +++ b/llvm/test/Other/force-opaque-ptrs.ll @@ -34,13 +34,17 @@ define void @f(i32* %p) { declare void @fn.fwd(i32) define void @f2(i32** %p) { -; CHECK-LABEL: define {{[^@]+}}@f2( -; CHECK-SAME: ptr {{%.*}}) { +; CHECK-LABEL: define {{[^@]+}}@f2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: unreachable +; unreachable } define void @f3(i32 addrspace(1)* addrspace(2)* %p) { -; CHECK-LABEL: define {{[^@]+}}@f3( -; CHECK-SAME: ptr addrspace(2) {{%.*}}) { +; CHECK-LABEL: define {{[^@]+}}@f3 +; CHECK-SAME: (ptr addrspace(2) [[P:%.*]]) { +; CHECK-NEXT: unreachable +; unreachable } From cb3580e7ad247dfdcf2ad279895f52bb73c4cee4 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 29 Jun 2021 11:38:18 -0700 Subject: [PATCH 239/619] [OpaquePtr][BitcodeWriter] Handle attributes with types For example, byval. Skip the type attribute auto-upgrade if we already have the type. I've actually seen this error of the ValueEnumerator missing a type attribute's type in a non-opaque pointer context. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D105138 --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 3 +++ llvm/lib/Bitcode/Writer/ValueEnumerator.cpp | 5 +++++ llvm/test/Assembler/opaque-ptr.ll | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 8f8bd73f2082c..854243ee95bc6 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -3333,6 +3333,9 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { if (!Func->hasParamAttribute(i, Kind)) continue; + if (Func->getParamAttribute(i, Kind).getValueAsType()) + continue; + Func->removeParamAttr(i, Kind); Type *PTy = cast(FTy)->getParamType(i); diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 2be3ca741f165..d86db61ee1f46 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -1045,6 +1045,11 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { if (Entry == 0) { AttributeGroups.push_back(Pair); Entry = AttributeGroups.size(); + + for (Attribute Attr : AS) { + if (Attr.isTypeAttribute()) + EnumerateType(Attr.getValueAsType()); + } } } } diff --git a/llvm/test/Assembler/opaque-ptr.ll b/llvm/test/Assembler/opaque-ptr.ll index 5ee57fae18f06..c168fda39bf70 100644 --- a/llvm/test/Assembler/opaque-ptr.ll +++ b/llvm/test/Assembler/opaque-ptr.ll @@ -141,3 +141,8 @@ cleanup: cleanup ret void } + +; CHECK: define void @byval(ptr byval({ i32, i32 }) %0) +define void @byval(ptr byval({ i32, i32 }) %0) { + ret void +} From f664e2ec371f61b69e11147d7f9e045083335917 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Tue, 29 Jun 2021 23:46:43 +0200 Subject: [PATCH 240/619] Thread safety analysis: Always warn when dropping locks on back edges We allow branches to join where one holds a managed lock but the other doesn't, but we can't do so for back edges: because there we can't drop them from the lockset, as we have already analyzed the loop with the larger lockset. So we can't allow dropping managed locks on back edges. We move the managed() check from handleRemovalFromIntersection up to intersectAndWarn, where we additionally check if we're on a back edge if we're removing from the first lock set (the entry set of the next block) but not if we're removing from the second lock set (the exit set of the previous block). Now that the order of arguments matters, I had to swap them in one invocation, which also causes some minor differences in the tests. Reviewed By: delesley Differential Revision: https://reviews.llvm.org/D104261 --- clang/lib/Analysis/ThreadSafety.cpp | 11 +++-- .../SemaCXX/warn-thread-safety-analysis.cpp | 47 +++++++++++++++++-- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 3eb1b640e7290..b09de2bd71f24 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -865,7 +865,7 @@ class LockableFactEntry : public FactEntry { handleRemovalFromIntersection(const FactSet &FSet, FactManager &FactMan, SourceLocation JoinLoc, LockErrorKind LEK, ThreadSafetyHandler &Handler) const override { - if (!managed() && !asserted() && !negative() && !isUniversal()) { + if (!asserted() && !negative() && !isUniversal()) { Handler.handleMutexHeldEndOfScope("mutex", toString(), loc(), JoinLoc, LEK); } @@ -2239,7 +2239,7 @@ void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &FSet1, if (Iter1 != FSet1.end()) { if (join(FactMan[*Iter1], LDat2) && LEK1 == LEK_LockedSomePredecessors) *Iter1 = Fact; - } else { + } else if (!LDat2.managed()) { LDat2.handleRemovalFromIntersection(FSet2, FactMan, JoinLoc, LEK1, Handler); } @@ -2251,8 +2251,9 @@ void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &FSet1, const FactEntry *LDat2 = FSet2.findLock(FactMan, *LDat1); if (!LDat2) { - LDat1->handleRemovalFromIntersection(FSet1Orig, FactMan, JoinLoc, LEK2, - Handler); + if (!LDat1->managed() || LEK2 == LEK_LockedSomeLoopIterations) + LDat1->handleRemovalFromIntersection(FSet1Orig, FactMan, JoinLoc, LEK2, + Handler); if (LEK2 == LEK_LockedSomePredecessors) FSet1.removeLock(FactMan, *LDat1); } @@ -2528,7 +2529,7 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { CFGBlock *FirstLoopBlock = *SI; CFGBlockInfo *PreLoop = &BlockInfo[FirstLoopBlock->getBlockID()]; CFGBlockInfo *LoopEnd = &BlockInfo[CurrBlockID]; - intersectAndWarn(LoopEnd->ExitSet, PreLoop->EntrySet, PreLoop->EntryLoc, + intersectAndWarn(PreLoop->EntrySet, LoopEnd->ExitSet, PreLoop->EntryLoc, LEK_LockedSomeLoopIterations); } } diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index 8e8bb6f45dde4..e9d41da80517c 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -636,11 +636,11 @@ void shared_fun_0() { void shared_fun_1() { sls_mu.ReaderLock(); // \ - // expected-warning {{mutex 'sls_mu' is acquired exclusively and shared in the same scope}} + // expected-note {{the other acquisition of mutex 'sls_mu' is here}} do { sls_mu.Unlock(); sls_mu.Lock(); // \ - // expected-note {{the other acquisition of mutex 'sls_mu' is here}} + // expected-warning {{mutex 'sls_mu' is acquired exclusively and shared in the same scope}} } while (getBool()); sls_mu.Unlock(); } @@ -695,11 +695,11 @@ void shared_fun_11() { void shared_bad_0() { sls_mu.Lock(); // \ - // expected-warning {{mutex 'sls_mu' is acquired exclusively and shared in the same scope}} + // expected-note {{the other acquisition of mutex 'sls_mu' is here}} do { sls_mu.Unlock(); sls_mu.ReaderLock(); // \ - // expected-note {{the other acquisition of mutex 'sls_mu' is here}} + // expected-warning {{mutex 'sls_mu' is acquired exclusively and shared in the same scope}} } while (getBool()); sls_mu.Unlock(); } @@ -2773,6 +2773,45 @@ void unlockJoin() { x = 2; // expected-warning {{writing variable 'x' requires holding mutex 'mu' exclusively}} } +void loopAcquire() { + RelockableMutexLock scope(&mu, DeferTraits{}); + for (unsigned i = 1; i < 10; ++i) + scope.Lock(); // We could catch this double lock with negative capabilities. +} + +void loopRelease() { + RelockableMutexLock scope(&mu, ExclusiveTraits{}); // expected-note {{mutex acquired here}} + // We have to warn on this join point despite the lock being managed ... + for (unsigned i = 1; i < 10; ++i) { // expected-warning {{expecting mutex 'mu' to be held at start of each loop}} + x = 1; // ... because we might miss that this doesn't always happen under lock. + if (i == 5) + scope.Unlock(); + } +} + +void loopAcquireContinue() { + RelockableMutexLock scope(&mu, DeferTraits{}); + for (unsigned i = 1; i < 10; ++i) { + x = 1; // expected-warning {{writing variable 'x' requires holding mutex 'mu' exclusively}} + if (i == 5) { + scope.Lock(); + continue; + } + } +} + +void loopReleaseContinue() { + RelockableMutexLock scope(&mu, ExclusiveTraits{}); // expected-note {{mutex acquired here}} + // We have to warn on this join point despite the lock being managed ... + for (unsigned i = 1; i < 10; ++i) { + x = 1; // ... because we might miss that this doesn't always happen under lock. + if (i == 5) { + scope.Unlock(); + continue; // expected-warning {{expecting mutex 'mu' to be held at start of each loop}} + } + } +} + void exclusiveSharedJoin() { RelockableMutexLock scope(&mu, DeferTraits{}); if (b) From e0b90771c318625e51c34c67db3f3dfbbb686df8 Mon Sep 17 00:00:00 2001 From: Aaron Puchert Date: Tue, 29 Jun 2021 23:51:52 +0200 Subject: [PATCH 241/619] Thread safety analysis: Rename parameters of ThreadSafetyAnalyzer::intersectAndWarn (NFC) In D104261 we made the parameters' meaning slightly more specific, this changes their names accordingly. In all uses we're building a new lock set by intersecting existing locksets. The first (modifiable) argument is the new lock set being built, the second (non-modifiable) argument is the exit set of a preceding block. Reviewed By: aaron.ballman, delesley Differential Revision: https://reviews.llvm.org/D104649 --- clang/lib/Analysis/ThreadSafety.cpp | 75 +++++++++++++++-------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index b09de2bd71f24..5b2c882c4235a 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -1052,13 +1052,13 @@ class ThreadSafetyAnalyzer { bool join(const FactEntry &a, const FactEntry &b); - void intersectAndWarn(FactSet &FSet1, const FactSet &FSet2, - SourceLocation JoinLoc, LockErrorKind LEK1, - LockErrorKind LEK2); + void intersectAndWarn(FactSet &EntrySet, const FactSet &ExitSet, + SourceLocation JoinLoc, LockErrorKind EntryLEK, + LockErrorKind ExitLEK); - void intersectAndWarn(FactSet &FSet1, const FactSet &FSet2, - SourceLocation JoinLoc, LockErrorKind LEK1) { - intersectAndWarn(FSet1, FSet2, JoinLoc, LEK1, LEK1); + void intersectAndWarn(FactSet &EntrySet, const FactSet &ExitSet, + SourceLocation JoinLoc, LockErrorKind LEK) { + intersectAndWarn(EntrySet, ExitSet, JoinLoc, LEK, LEK); } void runAnalysis(AnalysisDeclContext &AC); @@ -2219,43 +2219,44 @@ bool ThreadSafetyAnalyzer::join(const FactEntry &A, const FactEntry &B) { /// are the same. In the event of a difference, we use the intersection of these /// two locksets at the start of D. /// -/// \param FSet1 The first lockset. -/// \param FSet2 The second lockset. +/// \param EntrySet A lockset for entry into a (possibly new) block. +/// \param ExitSet The lockset on exiting a preceding block. /// \param JoinLoc The location of the join point for error reporting -/// \param LEK1 The error message to report if a mutex is missing from LSet1 -/// \param LEK2 The error message to report if a mutex is missing from Lset2 -void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &FSet1, - const FactSet &FSet2, +/// \param EntryLEK The warning if a mutex is missing from \p EntrySet. +/// \param ExitLEK The warning if a mutex is missing from \p ExitSet. +void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &EntrySet, + const FactSet &ExitSet, SourceLocation JoinLoc, - LockErrorKind LEK1, - LockErrorKind LEK2) { - FactSet FSet1Orig = FSet1; - - // Find locks in FSet2 that conflict or are not in FSet1, and warn. - for (const auto &Fact : FSet2) { - const FactEntry &LDat2 = FactMan[Fact]; - - FactSet::iterator Iter1 = FSet1.findLockIter(FactMan, LDat2); - if (Iter1 != FSet1.end()) { - if (join(FactMan[*Iter1], LDat2) && LEK1 == LEK_LockedSomePredecessors) - *Iter1 = Fact; - } else if (!LDat2.managed()) { - LDat2.handleRemovalFromIntersection(FSet2, FactMan, JoinLoc, LEK1, - Handler); + LockErrorKind EntryLEK, + LockErrorKind ExitLEK) { + FactSet EntrySetOrig = EntrySet; + + // Find locks in ExitSet that conflict or are not in EntrySet, and warn. + for (const auto &Fact : ExitSet) { + const FactEntry &ExitFact = FactMan[Fact]; + + FactSet::iterator EntryIt = EntrySet.findLockIter(FactMan, ExitFact); + if (EntryIt != EntrySet.end()) { + if (join(FactMan[*EntryIt], ExitFact) && + EntryLEK == LEK_LockedSomePredecessors) + *EntryIt = Fact; + } else if (!ExitFact.managed()) { + ExitFact.handleRemovalFromIntersection(ExitSet, FactMan, JoinLoc, + EntryLEK, Handler); } } - // Find locks in FSet1 that are not in FSet2, and remove them. - for (const auto &Fact : FSet1Orig) { - const FactEntry *LDat1 = &FactMan[Fact]; - const FactEntry *LDat2 = FSet2.findLock(FactMan, *LDat1); + // Find locks in EntrySet that are not in ExitSet, and remove them. + for (const auto &Fact : EntrySetOrig) { + const FactEntry *EntryFact = &FactMan[Fact]; + const FactEntry *ExitFact = ExitSet.findLock(FactMan, *EntryFact); - if (!LDat2) { - if (!LDat1->managed() || LEK2 == LEK_LockedSomeLoopIterations) - LDat1->handleRemovalFromIntersection(FSet1Orig, FactMan, JoinLoc, LEK2, - Handler); - if (LEK2 == LEK_LockedSomePredecessors) - FSet1.removeLock(FactMan, *LDat1); + if (!ExitFact) { + if (!EntryFact->managed() || ExitLEK == LEK_LockedSomeLoopIterations) + EntryFact->handleRemovalFromIntersection(EntrySetOrig, FactMan, JoinLoc, + ExitLEK, Handler); + if (ExitLEK == LEK_LockedSomePredecessors) + EntrySet.removeLock(FactMan, *EntryFact); } } } From a346372200e7b2b99631bd90691678d5ca03fdd1 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 29 Jun 2021 14:59:12 -0700 Subject: [PATCH 242/619] Change PathMappingList::FindFile to return an optional result (NFC) This is an NFC modernization refactoring that replaces the combination of a bool return + reference argument, with an Optional return value. Differential Revision: https://reviews.llvm.org/D104405 --- lldb/include/lldb/Target/PathMappingList.h | 9 ++------- lldb/source/Core/Module.cpp | 6 +++++- lldb/source/Core/SourceManager.cpp | 18 +++++++++++------- lldb/source/Symbol/LineEntry.cpp | 8 ++++---- lldb/source/Target/PathMappingList.cpp | 18 +++++++++--------- 5 files changed, 31 insertions(+), 28 deletions(-) diff --git a/lldb/include/lldb/Target/PathMappingList.h b/lldb/include/lldb/Target/PathMappingList.h index 5d8e2a1b4d242..46d7a427d3071 100644 --- a/lldb/include/lldb/Target/PathMappingList.h +++ b/lldb/include/lldb/Target/PathMappingList.h @@ -90,14 +90,9 @@ class PathMappingList { /// \param[in] orig_spec /// The original source file path to try and remap. /// - /// \param[out] new_spec - /// The newly remapped filespec that is guaranteed to exist. - /// /// \return - /// /b true if \a orig_spec was successfully located and - /// \a new_spec is filled in with an existing file spec, - /// \b false otherwise. - bool FindFile(const FileSpec &orig_spec, FileSpec &new_spec) const; + /// The newly remapped filespec that is guaranteed to exist. + llvm::Optional FindFile(const FileSpec &orig_spec) const; uint32_t FindIndexForPath(ConstString path) const; diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index 6502518f9247f..af7128496812d 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -1598,7 +1598,11 @@ bool Module::MatchesModuleSpec(const ModuleSpec &module_ref) { bool Module::FindSourceFile(const FileSpec &orig_spec, FileSpec &new_spec) const { std::lock_guard guard(m_mutex); - return m_source_mappings.FindFile(orig_spec, new_spec); + if (auto remapped = m_source_mappings.FindFile(orig_spec)) { + new_spec = *remapped; + return true; + } + return false; } bool Module::RemapSourceFile(llvm::StringRef path, diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index 691bda6592e2e..3d51d42897c23 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -441,13 +441,17 @@ void SourceManager::File::CommonInitializer(const FileSpec &file_spec, } // Try remapping if m_file_spec does not correspond to an existing file. if (!FileSystem::Instance().Exists(m_file_spec)) { - FileSpec new_file_spec; - // Check target specific source remappings first, then fall back to - // modules objects can have individual path remappings that were - // detected when the debug info for a module was found. then - if (target->GetSourcePathMap().FindFile(m_file_spec, new_file_spec) || - target->GetImages().FindSourceFile(m_file_spec, new_file_spec)) { - m_file_spec = new_file_spec; + // Check target specific source remappings (i.e., the + // target.source-map setting), then fall back to the module + // specific remapping (i.e., the .dSYM remapping dictionary). + auto remapped = target->GetSourcePathMap().FindFile(m_file_spec); + if (!remapped) { + FileSpec new_spec; + if (target->GetImages().FindSourceFile(m_file_spec, new_spec)) + remapped = new_spec; + } + if (remapped) { + m_file_spec = *remapped; m_mod_time = FileSystem::Instance().GetModificationTime(m_file_spec); } } diff --git a/lldb/source/Symbol/LineEntry.cpp b/lldb/source/Symbol/LineEntry.cpp index 58bf8509a9589..1b2801cd03683 100644 --- a/lldb/source/Symbol/LineEntry.cpp +++ b/lldb/source/Symbol/LineEntry.cpp @@ -252,9 +252,9 @@ AddressRange LineEntry::GetSameLineContiguousAddressRange( void LineEntry::ApplyFileMappings(lldb::TargetSP target_sp) { if (target_sp) { - // Apply any file remappings to our file - FileSpec new_file_spec; - if (target_sp->GetSourcePathMap().FindFile(original_file, new_file_spec)) - file = new_file_spec; + // Apply any file remappings to our file. + if (auto new_file_spec = + target_sp->GetSourcePathMap().FindFile(original_file)) + file = *new_file_spec; } } diff --git a/lldb/source/Target/PathMappingList.cpp b/lldb/source/Target/PathMappingList.cpp index b6dbf551ea57d..f9d415bcf15d7 100644 --- a/lldb/source/Target/PathMappingList.cpp +++ b/lldb/source/Target/PathMappingList.cpp @@ -194,16 +194,16 @@ bool PathMappingList::ReverseRemapPath(const FileSpec &file, FileSpec &fixed) co return false; } -bool PathMappingList::FindFile(const FileSpec &orig_spec, - FileSpec &new_spec) const { +llvm::Optional +PathMappingList::FindFile(const FileSpec &orig_spec) const { if (m_pairs.empty()) - return false; - + return {}; + std::string orig_path = orig_spec.GetPath(); if (orig_path.empty()) - return false; - + return {}; + bool orig_is_relative = orig_spec.IsRelative(); for (auto entry : m_pairs) { @@ -228,15 +228,15 @@ bool PathMappingList::FindFile(const FileSpec &orig_spec, continue; if (orig_ref.consume_front(prefix_ref)) { + FileSpec new_spec; new_spec.SetFile(entry.second.GetCString(), FileSpec::Style::native); new_spec.AppendPathComponent(orig_ref); if (FileSystem::Instance().Exists(new_spec)) - return true; + return new_spec; } } - new_spec.Clear(); - return false; + return {}; } bool PathMappingList::Replace(ConstString path, From 302b1b97180907011aae610b9f51d4b9186c9821 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 29 Jun 2021 15:14:31 -0700 Subject: [PATCH 243/619] Express PathMappingList::FindFile() in terms of PathMappingList::RemapPath() NFC. This patch replaces the function body FindFile() with a call to RemapPath(), since the two functions implement the same functionality. Differential Revision: https://reviews.llvm.org/D104406 --- lldb/source/Target/PathMappingList.cpp | 44 +++----------------------- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/lldb/source/Target/PathMappingList.cpp b/lldb/source/Target/PathMappingList.cpp index f9d415bcf15d7..8a8cc1c8ab9be 100644 --- a/lldb/source/Target/PathMappingList.cpp +++ b/lldb/source/Target/PathMappingList.cpp @@ -194,48 +194,12 @@ bool PathMappingList::ReverseRemapPath(const FileSpec &file, FileSpec &fixed) co return false; } -llvm::Optional -PathMappingList::FindFile(const FileSpec &orig_spec) const { - if (m_pairs.empty()) - return {}; - - std::string orig_path = orig_spec.GetPath(); - - if (orig_path.empty()) - return {}; - - bool orig_is_relative = orig_spec.IsRelative(); - for (auto entry : m_pairs) { - llvm::StringRef orig_ref(orig_path); - llvm::StringRef prefix_ref = entry.first.GetStringRef(); - if (orig_ref.size() < prefix_ref.size()) - continue; - // We consider a relative prefix or one of just "." to - // mean "only apply to relative paths". - bool prefix_is_relative = false; - - if (prefix_ref == ".") { - prefix_is_relative = true; - // Remove the "." since it will have been removed from the - // FileSpec paths already. - prefix_ref = prefix_ref.drop_front(); - } else { - FileSpec prefix_spec(prefix_ref, FileSpec::Style::native); - prefix_is_relative = prefix_spec.IsRelative(); - } - if (prefix_is_relative != orig_is_relative) - continue; +llvm::Optional PathMappingList::FindFile(const FileSpec &orig_spec) const { + if (auto remapped = RemapPath(orig_spec.GetPath())) + if (FileSystem::Instance().Exists(*remapped)) + return remapped; - if (orig_ref.consume_front(prefix_ref)) { - FileSpec new_spec; - new_spec.SetFile(entry.second.GetCString(), FileSpec::Style::native); - new_spec.AppendPathComponent(orig_ref); - if (FileSystem::Instance().Exists(new_spec)) - return new_spec; - } - } - return {}; } From a0e1b11fac7a1599faec21d13fae45c8571de02c Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 29 Jun 2021 15:19:31 -0700 Subject: [PATCH 244/619] Modernize Module::RemapFile to return an Optional (NFC) This addresses feedback raised in https://reviews.llvm.org/D104404. Differential Revision: https://reviews.llvm.org/D104724 --- lldb/include/lldb/Core/Module.h | 9 +++------ lldb/source/Core/Module.cpp | 11 ++++------- .../Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp | 12 +++++++----- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index a9ec1e890dabd..dd7100c4616c3 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -850,13 +850,10 @@ class Module : public std::enable_shared_from_this, /// \param[in] path /// The original source file path to try and remap. /// - /// \param[out] new_path - /// The newly remapped filespec that is may or may not exist. - /// /// \return - /// /b true if \a path was successfully located and \a new_path - /// is filled in with a new source path, \b false otherwise. - bool RemapSourceFile(llvm::StringRef path, std::string &new_path) const; + /// The newly remapped filespec that is may or may not exist if + /// \a path was successfully located. + llvm::Optional RemapSourceFile(llvm::StringRef path) const; bool RemapSourceFile(const char *, std::string &) const = delete; /// Update the ArchSpec to a more specific variant. diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index af7128496812d..fb805353e47b8 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -1605,14 +1605,11 @@ bool Module::FindSourceFile(const FileSpec &orig_spec, return false; } -bool Module::RemapSourceFile(llvm::StringRef path, - std::string &new_path) const { +llvm::Optional Module::RemapSourceFile(llvm::StringRef path) const { std::lock_guard guard(m_mutex); - if (auto remapped = m_source_mappings.RemapPath(path)) { - new_path = remapped->GetPath(); - return true; - } - return false; + if (auto remapped = m_source_mappings.RemapPath(path)) + return remapped->GetPath(); + return {}; } void Module::RegisterXcodeSDK(llvm::StringRef sdk_name, llvm::StringRef sysroot) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index ca02d64709e89..6549e4fca0f95 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -240,9 +240,12 @@ ParseSupportFilesFromPrologue(const lldb::ModuleSP &module, const size_t number_of_files = prologue.FileNames.size(); for (size_t idx = first_file; idx <= number_of_files; ++idx) { std::string remapped_file; - if (auto file_path = GetFileByIndex(prologue, idx, compile_dir, style)) - if (!module->RemapSourceFile(llvm::StringRef(*file_path), remapped_file)) + if (auto file_path = GetFileByIndex(prologue, idx, compile_dir, style)) { + if (auto remapped = module->RemapSourceFile(llvm::StringRef(*file_path))) + remapped_file = *remapped; + else remapped_file = std::move(*file_path); + } // Unconditionally add an entry, so the indices match up. support_files.EmplaceBack(remapped_file, style); @@ -681,9 +684,8 @@ static void MakeAbsoluteAndRemap(FileSpec &file_spec, DWARFUnit &dwarf_cu, // files are NFS mounted. file_spec.MakeAbsolute(dwarf_cu.GetCompilationDirectory()); - std::string remapped_file; - if (module_sp->RemapSourceFile(file_spec.GetPath(), remapped_file)) - file_spec.SetFile(remapped_file, FileSpec::Style::native); + if (auto remapped_file = module_sp->RemapSourceFile(file_spec.GetPath())) + file_spec.SetFile(*remapped_file, FileSpec::Style::native); } lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFCompileUnit &dwarf_cu) { From 21e013303bb7d0dbb9106283af0fb966fe45af42 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Tue, 29 Jun 2021 15:24:36 -0700 Subject: [PATCH 245/619] Improve path remapping in cross-debugging scenarios This patch implements a slight improvement when debugging across platforms and remapping source paths that are in a non-native format. See the unit test for examples. rdar://79205675 Differential Revision: https://reviews.llvm.org/D104407 --- lldb/source/Target/PathMappingList.cpp | 23 +++++++++++++-- lldb/unittests/Target/PathMappingListTest.cpp | 28 ++++++++++++++++++- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/lldb/source/Target/PathMappingList.cpp b/lldb/source/Target/PathMappingList.cpp index 8a8cc1c8ab9be..86b0ad5f933ff 100644 --- a/lldb/source/Target/PathMappingList.cpp +++ b/lldb/source/Target/PathMappingList.cpp @@ -152,6 +152,18 @@ bool PathMappingList::RemapPath(ConstString path, return false; } +/// Append components to path, applying style. +static void AppendPathComponents(FileSpec &path, llvm::StringRef components, + llvm::sys::path::Style style) { + auto component = llvm::sys::path::begin(components, style); + auto e = llvm::sys::path::end(components); + while (component != e && + llvm::sys::path::is_separator(*component->data(), style)) + ++component; + for (; component != e; ++component) + path.AppendPathComponent(*component); +} + llvm::Optional PathMappingList::RemapPath(llvm::StringRef path) const { if (m_pairs.empty() || path.empty()) @@ -175,7 +187,9 @@ PathMappingList::RemapPath(llvm::StringRef path) const { continue; } FileSpec remapped(it.second.GetStringRef()); - remapped.AppendPathComponent(path); + auto orig_style = FileSpec::GuessPathStyle(prefix).getValueOr( + llvm::sys::path::Style::native); + AppendPathComponents(remapped, path, orig_style); return remapped; } return {}; @@ -187,8 +201,11 @@ bool PathMappingList::ReverseRemapPath(const FileSpec &file, FileSpec &fixed) co for (const auto &it : m_pairs) { if (!path_ref.consume_front(it.second.GetStringRef())) continue; - fixed.SetFile(it.first.GetStringRef(), FileSpec::Style::native); - fixed.AppendPathComponent(path_ref); + auto orig_file = it.first.GetStringRef(); + auto orig_style = FileSpec::GuessPathStyle(orig_file).getValueOr( + llvm::sys::path::Style::native); + fixed.SetFile(orig_file, orig_style); + AppendPathComponents(fixed, path_ref, orig_style); return true; } return false; diff --git a/lldb/unittests/Target/PathMappingListTest.cpp b/lldb/unittests/Target/PathMappingListTest.cpp index 66fd97c17f624..90b6f1134a2b6 100644 --- a/lldb/unittests/Target/PathMappingListTest.cpp +++ b/lldb/unittests/Target/PathMappingListTest.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/ArrayRef.h" #include "lldb/Target/PathMappingList.h" #include "lldb/Utility/FileSpec.h" +#include "llvm/ADT/ArrayRef.h" #include "gtest/gtest.h" #include @@ -19,6 +19,8 @@ struct Matches { FileSpec original; FileSpec remapped; Matches(const char *o, const char *r) : original(o), remapped(r) {} + Matches(const char *o, llvm::sys::path::Style style, const char *r) + : original(o, style), remapped(r) {} }; } // namespace @@ -112,3 +114,27 @@ TEST(PathMappingListTest, RemapRoot) { }; TestPathMappings(map, matches, fails); } + +#ifndef _WIN32 +TEST(PathMappingListTest, CrossPlatformTests) { + PathMappingList map; + map.Append(ConstString(R"(C:\old)"), ConstString("/new"), false); + Matches matches[] = { + {R"(C:\old)", llvm::sys::path::Style::windows, "/new"}, + {R"(C:\old\)", llvm::sys::path::Style::windows, "/new"}, + {R"(C:\old\foo\.)", llvm::sys::path::Style::windows, "/new/foo"}, + {R"(C:\old\foo.c)", llvm::sys::path::Style::windows, "/new/foo.c"}, + {R"(C:\old\foo.c\.)", llvm::sys::path::Style::windows, "/new/foo.c"}, + {R"(C:\old\.\foo.c)", llvm::sys::path::Style::windows, "/new/foo.c"}, + }; + ConstString fails[] = { + ConstString("/foo"), + ConstString("/"), + ConstString("foo.c"), + ConstString("./foo.c"), + ConstString("../foo.c"), + ConstString("../bar/foo.c"), + }; + TestPathMappings(map, matches, fails); +} +#endif From e0b713a0357aa31ef906111115d4e881503e56ba Mon Sep 17 00:00:00 2001 From: Dhruva Chakrabarti Date: Tue, 29 Jun 2021 15:07:57 -0700 Subject: [PATCH 246/619] [libomptarget] [amdgpu] Change default number of teams per computation unit This patch is related to https://reviews.llvm.org/D98832. Based on discussions there, I decided to separate out the teams default as this patch. This change is to increase the number of teams per computation unit so as to provide more wavefronts for hiding latency. This change improves performance for some programs, including 20-50% for some Stream benchmarks. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D99003 --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index aaa68121db105..be64ed2bbfa41 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -70,6 +70,10 @@ hostrpc_assign_buffer(hsa_agent_t, hsa_queue_t *, uint32_t device_id) { } } +// Heuristic parameters used for kernel launch +// Number of teams per CU to allow scheduling flexibility +static const unsigned DefaultTeamsPerCU = 4; + int print_kernel_trace; #ifdef OMPTARGET_DEBUG @@ -1083,7 +1087,7 @@ int32_t __tgt_rtl_init_device(int device_id) { DeviceInfo.EnvNumTeams); } else { char *TeamsPerCUEnvStr = getenv("OMP_TARGET_TEAMS_PER_PROC"); - int TeamsPerCU = 1; // default number of teams per CU is 1 + int TeamsPerCU = DefaultTeamsPerCU; if (TeamsPerCUEnvStr) { TeamsPerCU = std::stoi(TeamsPerCUEnvStr); } From 3644726a78e37823b1687a7aa8d186e91570ffe2 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 28 Jun 2021 15:43:10 -0700 Subject: [PATCH 247/619] [Clang][NVPTX] Add NVPTX intrinsics and builtins for CUDA PTX 6.5 and 7.0 WMMA and MMA instructions Adds NVPTX builtins and intrinsics for the CUDA PTX `wmma.load`, `wmma.store`, `wmma.mma`, and `mma` instructions added in PTX 6.5 and 7.0. PTX ISA description of - `wmma.load`: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-wmma-ld - `wmma.store`: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-wmma-st - `wmma.mma`: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-wmma-mma - `mma`: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma Overview of `wmma.mma` and `mma` matrix shape/type combinations added with specific PTX versions: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-shape Authored-by: Steffen Larsen Co-Authored-by: Stuart Adams Reviewed By: tra Differential Revision: https://reviews.llvm.org/D104847 --- clang/include/clang/Basic/BuiltinsNVPTX.def | 23 + clang/lib/CodeGen/CGBuiltin.cpp | 220 ++++++---- clang/test/CodeGen/builtins-nvptx-mma.cu | 169 +++++++- clang/test/CodeGen/builtins-nvptx-mma.py | 114 +++-- llvm/include/llvm/IR/IntrinsicsNVVM.td | 409 +++++++++++++----- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 94 +++- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 1 + llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 207 ++++++--- llvm/test/CodeGen/NVPTX/lit.local.cfg | 1 + llvm/test/CodeGen/NVPTX/wmma.py | 454 ++++++++++++++++---- 10 files changed, 1323 insertions(+), 369 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 98f3c659b7cec..e815138a15c15 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -759,6 +759,29 @@ TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) +// Builtins to support double and alternate float WMMA instructions on sm_80 +TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70)) + +TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) + +TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70)) +TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) + // Async Copy TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70)) TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70)) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2f2d5e6c83d77..3fc9ba414397e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16402,6 +16402,34 @@ static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { case NVPTX::BI__bmma_m8n8k128_ld_c: return MMA_LDST(2, m8n8k128_load_c_s32); + // Double MMA loads + case NVPTX::BI__dmma_m8n8k4_ld_a: + return MMA_LDST(1, m8n8k4_load_a_f64); + case NVPTX::BI__dmma_m8n8k4_ld_b: + return MMA_LDST(1, m8n8k4_load_b_f64); + case NVPTX::BI__dmma_m8n8k4_ld_c: + return MMA_LDST(2, m8n8k4_load_c_f64); + + // Alternate float MMA loads + case NVPTX::BI__mma_bf16_m16n16k16_ld_a: + return MMA_LDST(4, m16n16k16_load_a_bf16); + case NVPTX::BI__mma_bf16_m16n16k16_ld_b: + return MMA_LDST(4, m16n16k16_load_b_bf16); + case NVPTX::BI__mma_bf16_m8n32k16_ld_a: + return MMA_LDST(2, m8n32k16_load_a_bf16); + case NVPTX::BI__mma_bf16_m8n32k16_ld_b: + return MMA_LDST(8, m8n32k16_load_b_bf16); + case NVPTX::BI__mma_bf16_m32n8k16_ld_a: + return MMA_LDST(8, m32n8k16_load_a_bf16); + case NVPTX::BI__mma_bf16_m32n8k16_ld_b: + return MMA_LDST(2, m32n8k16_load_b_bf16); + case NVPTX::BI__mma_tf32_m16n16k8_ld_a: + return MMA_LDST(4, m16n16k8_load_a_tf32); + case NVPTX::BI__mma_tf32_m16n16k8_ld_b: + return MMA_LDST(2, m16n16k8_load_b_tf32); + case NVPTX::BI__mma_tf32_m16n16k8_ld_c: + return MMA_LDST(8, m16n16k8_load_c_f32); + // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike // PTX and LLVM IR where stores always use fragment D, NVCC builtins always // use fragment C for both loads and stores. @@ -16433,6 +16461,14 @@ static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { case NVPTX::BI__bmma_m8n8k128_st_c_i32: return MMA_LDST(2, m8n8k128_store_d_s32); + // Double MMA store + case NVPTX::BI__dmma_m8n8k4_st_c_f64: + return MMA_LDST(2, m8n8k4_store_d_f64); + + // Alternate float MMA store + case NVPTX::BI__mma_m16n16k8_st_c_f32: + return MMA_LDST(8, m16n16k8_store_d_f32); + default: llvm_unreachable("Unknown MMA builtin"); } @@ -16446,10 +16482,14 @@ struct NVPTXMmaInfo { unsigned NumEltsB; unsigned NumEltsC; unsigned NumEltsD; + + // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority + // over 'col' for layout. The index of non-satf variants is expected to match + // the undocumented layout constants used by CUDA's mma.hpp. std::array Variants; unsigned getMMAIntrinsic(int Layout, bool Satf) { - unsigned Index = Layout * 2 + Satf; + unsigned Index = Layout + 4 * Satf; if (Index >= Variants.size()) return 0; return Variants[Index]; @@ -16460,93 +16500,107 @@ struct NVPTXMmaInfo { // Layout and Satf, 0 otherwise. static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { // clang-format off -#define MMA_VARIANTS(geom, type) {{ \ +#define MMA_VARIANTS(geom, type) \ Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type +#define MMA_SATF_VARIANTS(geom, type) \ + MMA_VARIANTS(geom, type), \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ - }} + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite // Sub-integer MMA only supports row.col layout. -#define MMA_VARIANTS_I4(geom, type) {{ \ - 0, \ +#define MMA_VARIANTS_I4(geom, type) \ 0, \ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ 0, \ 0, \ 0, \ - 0 \ - }} -// b1 MMA does not support .satfinite. -#define MMA_VARIANTS_B1(geom, type) {{ \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ 0, \ + 0 +// b1 MMA does not support .satfinite. +#define MMA_VARIANTS_B1(geom, type) \ 0, \ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ 0, \ 0, \ 0, \ 0, \ - 0 \ - }} - // clang-format on - switch (BuiltinID) { - // FP MMA - // Note that 'type' argument of MMA_VARIANT uses D_C notation, while - // NumEltsN of return value are ordered as A,B,C,D. - case NVPTX::BI__hmma_m16n16k16_mma_f16f16: - return {8, 8, 4, 4, MMA_VARIANTS(m16n16k16, f16_f16)}; - case NVPTX::BI__hmma_m16n16k16_mma_f32f16: - return {8, 8, 4, 8, MMA_VARIANTS(m16n16k16, f32_f16)}; - case NVPTX::BI__hmma_m16n16k16_mma_f16f32: - return {8, 8, 8, 4, MMA_VARIANTS(m16n16k16, f16_f32)}; - case NVPTX::BI__hmma_m16n16k16_mma_f32f32: - return {8, 8, 8, 8, MMA_VARIANTS(m16n16k16, f32_f32)}; - case NVPTX::BI__hmma_m32n8k16_mma_f16f16: - return {8, 8, 4, 4, MMA_VARIANTS(m32n8k16, f16_f16)}; - case NVPTX::BI__hmma_m32n8k16_mma_f32f16: - return {8, 8, 4, 8, MMA_VARIANTS(m32n8k16, f32_f16)}; - case NVPTX::BI__hmma_m32n8k16_mma_f16f32: - return {8, 8, 8, 4, MMA_VARIANTS(m32n8k16, f16_f32)}; - case NVPTX::BI__hmma_m32n8k16_mma_f32f32: - return {8, 8, 8, 8, MMA_VARIANTS(m32n8k16, f32_f32)}; - case NVPTX::BI__hmma_m8n32k16_mma_f16f16: - return {8, 8, 4, 4, MMA_VARIANTS(m8n32k16, f16_f16)}; - case NVPTX::BI__hmma_m8n32k16_mma_f32f16: - return {8, 8, 4, 8, MMA_VARIANTS(m8n32k16, f32_f16)}; - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: - return {8, 8, 8, 4, MMA_VARIANTS(m8n32k16, f16_f32)}; - case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - return {8, 8, 8, 8, MMA_VARIANTS(m8n32k16, f32_f32)}; - - // Integer MMA - case NVPTX::BI__imma_m16n16k16_mma_s8: - return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, s8)}; - case NVPTX::BI__imma_m16n16k16_mma_u8: - return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, u8)}; - case NVPTX::BI__imma_m32n8k16_mma_s8: - return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, s8)}; - case NVPTX::BI__imma_m32n8k16_mma_u8: - return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, u8)}; - case NVPTX::BI__imma_m8n32k16_mma_s8: - return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, s8)}; - case NVPTX::BI__imma_m8n32k16_mma_u8: - return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, u8)}; - - // Sub-integer MMA - case NVPTX::BI__imma_m8n8k32_mma_s4: - return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, s4)}; - case NVPTX::BI__imma_m8n8k32_mma_u4: - return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, u4)}; - case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: - return {1, 1, 2, 2, MMA_VARIANTS_B1(m8n8k128, b1)}; - default: - llvm_unreachable("Unexpected builtin ID."); - } + 0, \ + 0 + // clang-format on + switch (BuiltinID) { + // FP MMA + // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while + // NumEltsN of return value are ordered as A,B,C,D. + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}}; + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: + return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f16: + return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f16: + return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f32: + return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f32: + return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f16: + return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f16: + return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f32: + return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}}; + + // Integer MMA + case NVPTX::BI__imma_m16n16k16_mma_s8: + return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}}; + case NVPTX::BI__imma_m16n16k16_mma_u8: + return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}}; + case NVPTX::BI__imma_m32n8k16_mma_s8: + return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}}; + case NVPTX::BI__imma_m32n8k16_mma_u8: + return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}}; + case NVPTX::BI__imma_m8n32k16_mma_s8: + return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}}; + case NVPTX::BI__imma_m8n32k16_mma_u8: + return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}}; + + // Sub-integer MMA + case NVPTX::BI__imma_m8n8k32_mma_s4: + return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}}; + case NVPTX::BI__imma_m8n8k32_mma_u4: + return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}}; + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: + return {1, 1, 2, 2, {{MMA_VARIANTS_B1(m8n8k128, b1)}}}; + + // Double MMA + case NVPTX::BI__dmma_m8n8k4_mma_f64: + return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}}; + + // Alternate FP MMA + case NVPTX::BI__mma_bf16_m16n16k16_mma_f32: + return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}}; + case NVPTX::BI__mma_bf16_m8n32k16_mma_f32: + return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}}; + case NVPTX::BI__mma_bf16_m32n8k16_mma_f32: + return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}}; + case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: + return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}}; + default: + llvm_unreachable("Unexpected builtin ID."); + } #undef MMA_VARIANTS +#undef MMA_SATF_VARIANTS #undef MMA_VARIANTS_I4 #undef MMA_VARIANTS_B1 } @@ -16844,7 +16898,20 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { case NVPTX::BI__bmma_m8n8k128_ld_a_b1: case NVPTX::BI__bmma_m8n8k128_ld_b_b1: case NVPTX::BI__bmma_m8n8k128_ld_c: - { + // Double MMA loads. + case NVPTX::BI__dmma_m8n8k4_ld_a: + case NVPTX::BI__dmma_m8n8k4_ld_b: + case NVPTX::BI__dmma_m8n8k4_ld_c: + // Alternate float MMA loads. + case NVPTX::BI__mma_bf16_m16n16k16_ld_a: + case NVPTX::BI__mma_bf16_m16n16k16_ld_b: + case NVPTX::BI__mma_bf16_m8n32k16_ld_a: + case NVPTX::BI__mma_bf16_m8n32k16_ld_b: + case NVPTX::BI__mma_bf16_m32n8k16_ld_a: + case NVPTX::BI__mma_bf16_m32n8k16_ld_b: + case NVPTX::BI__mma_tf32_m16n16k8_ld_a: + case NVPTX::BI__mma_tf32_m16n16k8_ld_b: + case NVPTX::BI__mma_tf32_m16n16k8_ld_c: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -16889,7 +16956,9 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { case NVPTX::BI__imma_m32n8k16_st_c_i32: case NVPTX::BI__imma_m8n32k16_st_c_i32: case NVPTX::BI__imma_m8n8k32_st_c_i32: - case NVPTX::BI__bmma_m8n8k128_st_c_i32: { + case NVPTX::BI__bmma_m8n8k128_st_c_i32: + case NVPTX::BI__dmma_m8n8k4_st_c_f64: + case NVPTX::BI__mma_m16n16k8_st_c_f32: { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -16941,7 +17010,12 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { case NVPTX::BI__imma_m8n32k16_mma_u8: case NVPTX::BI__imma_m8n8k32_mma_s4: case NVPTX::BI__imma_m8n8k32_mma_u4: - case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: { + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: + case NVPTX::BI__dmma_m8n8k4_mma_f64: + case NVPTX::BI__mma_bf16_m16n16k16_mma_f32: + case NVPTX::BI__mma_bf16_m8n32k16_mma_f32: + case NVPTX::BI__mma_bf16_m32n8k16_mma_f32: + case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); diff --git a/clang/test/CodeGen/builtins-nvptx-mma.cu b/clang/test/CodeGen/builtins-nvptx-mma.cu index cc31f6f4779a5..7e9bac86792d2 100644 --- a/clang/test/CodeGen/builtins-nvptx-mma.cu +++ b/clang/test/CodeGen/builtins-nvptx-mma.cu @@ -3,21 +3,20 @@ // *** DO NOT EDIT *** // // This test has been automatically generated by -// builtins-nvtx-mma.py --ptx=63 --gpu-arch=75 +// builtins-nvtx-mma.py --ptx=70 --gpu-arch=80 // -// Make sure we can handle all builtins available on sm_75 with PTX63 -// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_75 \ -// RUN: -fcuda-is-device -target-feature +ptx63 \ -// RUN: -DPTX=63 -DSM=75 \ +// Make sure we can handle all builtins available on sm_80 with PTX70 +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_80 \ +// RUN: -fcuda-is-device -target-feature +ptx70 \ +// RUN: -DPTX=70 -DSM=80 \ // RUN: -S -emit-llvm -o - -x cuda %s \ -// RUN: | FileCheck -check-prefixes=CHECK_PTX61_SM70,CHECK_PTX63_SM75,CHECK_PTX63_SM72,CHECK_PTX60_SM70 %s +// RUN: | FileCheck -check-prefixes=CHECK_PTX70_SM80,CHECK_PTX60_SM70,CHECK_PTX63_SM72,CHECK_PTX61_SM70,CHECK_PTX63_SM75 %s // Verify that all builtins have correct constraints. // RUN: %clang_cc1 -triple nvptx-unknown-unknown \ // RUN: -target-cpu sm_60 -target-feature +ptx42 \ -// RUN: -DPTX=63 -DSM=75 -fcuda-is-device -S -o /dev/null -x cuda \ +// RUN: -DPTX=70 -DSM=80 -fcuda-is-device -S -o /dev/null -x cuda \ // RUN: -verify %s - #if !defined(CUDA_VERSION) #define __device__ __attribute__((device)) #define __global__ __attribute__((global)) @@ -29,8 +28,8 @@ typedef unsigned long long uint64_t; // CHECK-LABEL: test_wmma_buitins __device__ void test_wmma_buitins(int *src, int *dst, - float *fsrc, float *fdst, int ldm) { - + float *fsrc, float *fdst, + double *dsrc, double *ddst, int ldm) { #if (PTX >= 60) && (SM >= 70) @@ -751,5 +750,153 @@ __device__ void test_wmma_buitins(int *src, int *dst, // CHECK_PTX63_SM75: call {{.*}} @llvm.nvvm.wmma.m8n8k32.mma.row.col.u4.satfinite // expected-error-re@+1 {{'__imma_m8n8k32_mma_u4' needs target feature (sm_75{{.*}},(ptx63{{.*}}}} __imma_m8n8k32_mma_u4(dst, src, src, src, 1, 1); -#endif // (PTX >= 63) && (SM >= 75) +#endif // (PTX >= 63) && (SM >= 75) + +#if (PTX >= 70) && (SM >= 80) + + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m16n16k16_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m16n16k16_ld_a(dst, src, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m16n16k16_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m16n16k16_ld_a(dst, src, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m16n16k16_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m16n16k16_ld_b(dst, src, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m16n16k16_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m16n16k16_ld_b(dst, src, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.load.a.col.stride.tf32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_ld_a(dst, src, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_ld_a(dst, src, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.load.b.col.stride.tf32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_ld_b(dst, src, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.load.b.row.stride.tf32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_ld_b(dst, src, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.load.c.col.stride.f32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_ld_c' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_ld_c(fdst, fsrc, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.load.c.row.stride.f32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_ld_c' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_ld_c(fdst, fsrc, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.store.d.col.stride.f32 + // expected-error-re@+1 {{'__mma_m16n16k8_st_c_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_m16n16k8_st_c_f32(fdst, fsrc, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.store.d.row.stride.f32 + // expected-error-re@+1 {{'__mma_m16n16k8_st_c_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_m16n16k8_st_c_f32(fdst, fsrc, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m32n8k16_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m32n8k16_ld_a(dst, src, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m32n8k16_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m32n8k16_ld_a(dst, src, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m32n8k16_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m32n8k16_ld_b(dst, src, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m32n8k16_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m32n8k16_ld_b(dst, src, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m8n32k16_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m8n32k16_ld_a(dst, src, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m8n32k16_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m8n32k16_ld_a(dst, src, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m8n32k16_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m8n32k16_ld_b(dst, src, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.bf16 + // expected-error-re@+1 {{'__mma_bf16_m8n32k16_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m8n32k16_ld_b(dst, src, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.load.a.col.stride.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_ld_a(ddst, dsrc, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.load.a.row.stride.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_ld_a' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_ld_a(ddst, dsrc, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.load.b.col.stride.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_ld_b(ddst, dsrc, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.load.b.row.stride.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_ld_b' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_ld_b(ddst, dsrc, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.load.c.col.stride.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_ld_c' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_ld_c(ddst, dsrc, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.load.c.row.stride.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_ld_c' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_ld_c(ddst, dsrc, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.store.d.col.stride.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_st_c_f64' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_st_c_f64(ddst, dsrc, ldm, 1); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.store.d.row.stride.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_st_c_f64' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_st_c_f64(ddst, dsrc, ldm, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.bf16 + // expected-error-re@+1 {{'__mma_bf16_m16n16k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m16n16k16_mma_f32(fdst, src, src, fsrc, 3, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.bf16 + // expected-error-re@+1 {{'__mma_bf16_m16n16k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m16n16k16_mma_f32(fdst, src, src, fsrc, 2, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.bf16 + // expected-error-re@+1 {{'__mma_bf16_m16n16k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m16n16k16_mma_f32(fdst, src, src, fsrc, 1, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.bf16 + // expected-error-re@+1 {{'__mma_bf16_m16n16k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m16n16k16_mma_f32(fdst, src, src, fsrc, 0, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.mma.col.col.tf32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_mma_f32(fdst, src, src, fsrc, 3, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.mma.col.row.tf32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_mma_f32(fdst, src, src, fsrc, 2, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.mma.row.col.tf32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_mma_f32(fdst, src, src, fsrc, 1, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32 + // expected-error-re@+1 {{'__mma_tf32_m16n16k8_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_tf32_m16n16k8_mma_f32(fdst, src, src, fsrc, 0, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.bf16 + // expected-error-re@+1 {{'__mma_bf16_m32n8k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m32n8k16_mma_f32(fdst, src, src, fsrc, 3, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.bf16 + // expected-error-re@+1 {{'__mma_bf16_m32n8k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m32n8k16_mma_f32(fdst, src, src, fsrc, 2, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.bf16 + // expected-error-re@+1 {{'__mma_bf16_m32n8k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m32n8k16_mma_f32(fdst, src, src, fsrc, 1, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.bf16 + // expected-error-re@+1 {{'__mma_bf16_m32n8k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m32n8k16_mma_f32(fdst, src, src, fsrc, 0, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.bf16 + // expected-error-re@+1 {{'__mma_bf16_m8n32k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m8n32k16_mma_f32(fdst, src, src, fsrc, 3, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.bf16 + // expected-error-re@+1 {{'__mma_bf16_m8n32k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m8n32k16_mma_f32(fdst, src, src, fsrc, 2, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.bf16 + // expected-error-re@+1 {{'__mma_bf16_m8n32k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m8n32k16_mma_f32(fdst, src, src, fsrc, 1, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.bf16 + // expected-error-re@+1 {{'__mma_bf16_m8n32k16_mma_f32' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __mma_bf16_m8n32k16_mma_f32(fdst, src, src, fsrc, 0, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.mma.col.col.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_mma_f64' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_mma_f64(ddst, dsrc, dsrc, dsrc, 3, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.mma.col.row.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_mma_f64' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_mma_f64(ddst, dsrc, dsrc, dsrc, 2, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.mma.row.col.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_mma_f64' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_mma_f64(ddst, dsrc, dsrc, dsrc, 1, 0); + // CHECK_PTX70_SM80: call {{.*}} @llvm.nvvm.wmma.m8n8k4.mma.row.row.f64 + // expected-error-re@+1 {{'__dmma_m8n8k4_mma_f64' needs target feature (sm_80{{.*}},(ptx70{{.*}}}} + __dmma_m8n8k4_mma_f64(ddst, dsrc, dsrc, dsrc, 0, 0); +#endif // (PTX >= 70) && (SM >= 80) } diff --git a/clang/test/CodeGen/builtins-nvptx-mma.py b/clang/test/CodeGen/builtins-nvptx-mma.py index 1b395fc4f33b1..2ffc21b12fb06 100644 --- a/clang/test/CodeGen/builtins-nvptx-mma.py +++ b/clang/test/CodeGen/builtins-nvptx-mma.py @@ -47,7 +47,13 @@ def make_ldst_ops(geoms, frags, types): in product(geoms, frags, types)] def get_mma_ops(): - return (make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], + return (make_mma_ops(["m16n16k8"], + ["tf32"], [], ["f32"], []) + + make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], + ["bf16"], [], ["f32"], []) + + make_mma_ops(["m8n8k4"], + ["f64"], [], ["f64"], []) + + make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], ["f16"], [], ["f16", "f32"], ["f16", "f32"]) + make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], ["s8", "u8"], [], ["s32"], []) + @@ -55,14 +61,18 @@ def get_mma_ops(): ["s4", "u4"], [], ["s32"], []) + make_mma_ops(["m8n8k128"], ["b1"], [], ["s32"], [])) + def get_ldst_ops(): return (make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"], - ["a", "b"], ["f16", "u8", "s8"]) + + ["a", "b"], ["f16", "u8", "s8", "bf16"]) + make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"], ["c", "d"], ["f16", "f32", "s32"]) + make_ldst_ops(["m8n8k32"], ["a", "b"], ["s4","u4"]) + make_ldst_ops(["m8n8k128"], ["a", "b"], ["b1"]) + - make_ldst_ops(["m8n8k32", "m8n8k128"], ["c", "d"], ["s32"])) + make_ldst_ops(["m8n8k32", "m8n8k128"], ["c", "d"], ["s32"]) + + make_ldst_ops(["m8n8k4"], ["a", "b", "c", "d"], ["f64"]) + + make_ldst_ops(["m16n16k8"], ["a", "b"], ["tf32"]) + + make_ldst_ops(["m16n16k8"], ["c", "d"], ["f32"])) def is_geom_supported(geom): # geometries for FP and ints. @@ -73,6 +83,8 @@ def is_geom_supported(geom): return ptx_version >= 63 and gpu_arch >= 75 if geom == "m16n16k16": return ptx_version >= 60 + if geom in ["m16n16k8", "m8n8k4"]: + return ptx_version >= 70 and gpu_arch >= 80 assert(False) # Unexpected geometry. def is_type_supported(ptx_type): @@ -80,16 +92,24 @@ def is_type_supported(ptx_type): return ptx_version >= 63 and gpu_arch >= 72 if ptx_type in ["s4", "u4", "b1"]: return ptx_version >= 63 and gpu_arch >= 75 + if ptx_type in ["bf16", "tf32", "f64"]: + return ptx_version >= 70 and gpu_arch >= 80 return ptx_version >= 60 and gpu_arch >= 70 +def is_rnd_supported(op): + # rnd is only supported for FP64 WMMA + return op.a.ptx_type == "f64" + def is_mma_variant_supported(op, layout_a, layout_b, satf): if not (is_type_supported(op.a.ptx_type) and is_geom_supported(op.a.geom)): return False - # sub-integer require row/col layout, and no satf. + + if satf and not op.a.ptx_type in ["f16", "s8", "u8", "s4", "u4"]: + return False + + # sub-integer types require row/col layout. if op.a.ptx_type in ["s4", "u4", "b1"]: - if op.a.ptx_type == "b1" and satf: - return False return layout_a == "row" and layout_b == "col" return True @@ -98,7 +118,7 @@ def is_ldst_variant_supported(frag, layout): and is_geom_supported(frag.geom)): return False if frag.ptx_type in ["s4", "u4", "b1"]: - # sub-integer require sm_75 and ptx63, row/col layout for a/b. + # sub-integer types require sm_75 and ptx63, row/col layout for a/b. return ((frag.frag == "a" and layout == "row") or (frag.frag == "b" and layout == "col") or frag.frag in ["c", "d"]) @@ -109,12 +129,21 @@ def get_builtin_prefix(frag): if frag.geom in ["m16n16k16", "m32n8k16", "m8n32k16"]: if frag.ptx_type in ["f16", "f32"]: prefix = "__hmma" + elif frag.ptx_type == "bf16": + prefix = "__mma_bf16" else: prefix = "__imma" elif frag.geom == "m8n8k32": prefix = "__imma" # sub-integers elif frag.geom == "m8n8k128": prefix = "__bmma" + elif frag.geom == "m8n8k4": + prefix = "__dmma" + elif frag.geom == "m16n16k8": + if frag.ptx_type == "f32": + prefix = "__mma" + else: + prefix = "__mma_tf32" assert prefix return prefix @@ -123,10 +152,13 @@ def get_ldst_builtin_name(frag): if prefix == "__hmma": suffix = "" if frag.frag in ["a","b"] else frag.ptx_type - elif prefix in ["__imma", "__bmma"]: - suffix = "" if frag.frag in ["c"] else frag.ptx_type + elif prefix in ["__dmma", "__mma_bf16", "__mma_tf32"]: + suffix = "" if frag.frag in ["a","b","c"] else frag.ptx_type + else: + suffix = "" if frag.frag == "c" else frag.ptx_type if suffix == "s32": suffix = "i32" + if frag.frag == "d": ifrag = "c" op = "st" @@ -143,6 +175,8 @@ def get_mma_builtin_name(op): if prefix == "__hmma": suffix = op.d.ptx_type + op.c.ptx_type + elif prefix in ["__mma_bf16", "__mma_tf32"]: + suffix = op.d.ptx_type else: suffix = op.a.ptx_type @@ -151,8 +185,9 @@ def get_mma_builtin_name(op): suffix) return name - def get_required_sm(frag): + if frag.ptx_type in ["f64", "bf16", "tf32"]: + return 80 if frag.ptx_type in ["u4", "s4", "b1"]: return 75 if frag.ptx_type in ["s8", "u8"]: @@ -163,18 +198,34 @@ def get_required_sm(frag): else: # s8/u8 return 72 if frag.ptx_type in ["f16", "f32"]: - return 70 + if frag.geom == "m16n16k8": + return 80 + else: + return 70 assert(False) def get_required_ptx(frag): + if frag.ptx_type in ["f64", "bf16", "tf32"]: + return 70 if frag.ptx_type in ["f16", "f32"]: - return 60 if frag.geom == "m16n16k16" else 61 + if frag.geom == "m16n16k16": + return 60 + if frag.geom == "m16n16k8": + return 70 + return 61 return 63 +def get_src_dst_prefix(ptx_type): + if ptx_type == "f32": + return "f" + if ptx_type == "f64": + return "d" + return "" + def gen_wmma_ldst_tests(results): load_template = """ // CHECK${check_suffix}: call {{.*}} @${intrinsic} - // expected-error-re@+1 {{'${builtin}' needs target feature sm_${min_sm}{{.*}},ptx${min_ptx}{{.*}}}} + // expected-error-re@+1 {{'${builtin}' needs target feature (sm_${min_sm}{{.*}},(ptx${min_ptx}{{.*}}}} ${builtin}(${dst}, ${src}, ldm, ${blayout}); """.rstrip() intrinsic_template = "llvm.nvvm.wmma.${geom}.${op}.${frag}.${ilayout}.stride.${itype}" @@ -184,7 +235,7 @@ def gen_wmma_ldst_tests(results): if not is_ldst_variant_supported(frag, layout): continue - is_fp = frag.ptx_type == "f32" + src_dst_prefix = get_src_dst_prefix(frag.ptx_type) min_sm = get_required_sm(frag) min_ptx = get_required_ptx(frag) params = { @@ -192,8 +243,8 @@ def gen_wmma_ldst_tests(results): "builtin" : get_ldst_builtin_name(frag), "min_ptx" : min_ptx, "min_sm" : min_sm, - "dst": "fdst" if is_fp else "dst", - "src": "fsrc" if is_fp else "src", + "dst": src_dst_prefix + "dst", + "src": src_dst_prefix + "src", "blayout" : 0 if layout == "row" else 1, "intrinsic" : Template(intrinsic_template).substitute({ "frag" : frag.frag, @@ -208,12 +259,12 @@ def gen_wmma_ldst_tests(results): return results def mma_signature(op): - if op.a.ptx_type in ["s8", "u8", "s4", "u4", "b1"]: - # int and sub-int ops are identified by input type. - return op.a.ptx_type - else: - # the rest are FP ops identified by accumulator & result type. + if op.a.ptx_type == "f16": + # FP16 ops identified by accumulator & result type. return "%s.%s" % (op.d.ptx_type, op.c.ptx_type) + else: + # other ops are identified by input type. + return op.a.ptx_type # Get numeric value for rowcol parameter of the builtin # AFAICT it uses the encoding accepted by NVVM intrinsics: @@ -229,8 +280,8 @@ def get_ilayout(a, b): def gen_wmma_mma_tests(results): mma_template = """ // CHECK${check_suffix}: call {{.*}} @${intrinsic} - // expected-error-re@+1 {{'${builtin}' needs target feature sm_${min_sm}{{.*}},ptx${min_ptx}{{.*}}}} - ${builtin}(${dst}, ${asrc}, ${asrc}, ${csrc}, ${ilayout}${maybe_isatf}); + // expected-error-re@+1 {{'${builtin}' needs target feature (sm_${min_sm}{{.*}},(ptx${min_ptx}{{.*}}}} + ${builtin}(${dst}, ${asrc}, ${asrc}, ${csrc}, ${ilayout}${maybe_satf}); """.rstrip() intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}.${intrinsic_signature}${satf}" @@ -243,9 +294,9 @@ def gen_wmma_mma_tests(results): if not is_mma_variant_supported(op, alayout, blayout, satf): continue - a_is_fp = op.a.ptx_type == "f32" - c_is_fp = op.c.ptx_type == "f32" - d_is_fp = op.d.ptx_type == "f32" + asrc_prefix = get_src_dst_prefix(op.a.ptx_type) + csrc_prefix = get_src_dst_prefix(op.c.ptx_type) + ddst_prefix = get_src_dst_prefix(op.d.ptx_type) min_sm = get_required_sm(op.a) min_ptx = get_required_ptx(op.a) if op.a.ptx_type == "b1": # .b1 MMA has no satf argument. @@ -257,11 +308,11 @@ def gen_wmma_mma_tests(results): "builtin" : get_mma_builtin_name(op), "min_ptx" : min_ptx, "min_sm" : min_sm, - "dst": "fdst" if d_is_fp else "dst", - "asrc": "fsrc" if a_is_fp else "src", - "csrc": "fsrc" if c_is_fp else "src", + "dst": ddst_prefix + "dst", + "asrc": asrc_prefix + "src", + "csrc": csrc_prefix + "src", "ilayout" : get_ilayout(alayout, blayout), - "maybe_isatf" : isatf_arg, + "maybe_satf" : isatf_arg, "intrinsic" : Template(intrinsic_template).substitute({ "geom" : op.a.geom, "alayout" : alayout, @@ -322,7 +373,8 @@ def supported_variants(ptx, sm, results): // CHECK-LABEL: test_wmma_buitins __device__ void test_wmma_buitins(int *src, int *dst, - float *fsrc, float *fdst, int ldm) { + float *fsrc, float *fdst, + double *dsrc, double *ddst, int ldm) { """); for (ptx, sm), tests in sorted(results.items()): diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 71e31b14f4c3b..3ce9dfb1bb807 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -52,13 +52,27 @@ class WMMA_REGS { string gft = Geom#":"#Frag#":"#ptx_elt_type; string ft = frag#":"#ptx_elt_type; list regs = !cond( - // mma.sync.m8n8k4 uses smaller a/b fragments than wmma fp ops + // mma fp ops use smaller fragments than wmma fp ops !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2), !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2), - - // fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16 - // All currently supported geometries use the same fragment format, - // so we only need to consider {fragment, type}. + !eq(gft,"m16n8k8:a:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m16n8k8:b:f16") : [llvm_v2f16_ty], + !eq(gft,"m16n8k8:c:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m16n8k8:d:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m16n8k8:c:f32") : !listsplat(llvm_float_ty, 4), + !eq(gft,"m16n8k8:d:f32") : !listsplat(llvm_float_ty, 4), + !eq(gft,"m16n8k16:a:f16") : !listsplat(llvm_v2f16_ty, 4), + !eq(gft,"m16n8k16:b:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m16n8k16:c:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m16n8k16:d:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m16n8k16:c:f32") : !listsplat(llvm_float_ty, 4), + !eq(gft,"m16n8k16:d:f32") : !listsplat(llvm_float_ty, 4), + !eq(gft,"m16n8k4:c:f32") : !listsplat(llvm_float_ty, 4), + !eq(gft,"m16n8k4:d:f32") : !listsplat(llvm_float_ty, 4), + + // wmma fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16 + // All other supported geometries use the same fragment format for f32 and + // f16, so we only need to consider {fragment, type}. !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8), !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8), !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4), @@ -66,7 +80,36 @@ class WMMA_REGS { !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8), !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8), - // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 + // wmma tf32 -> s32 @ m16n16k8 + !eq(gft,"m16n16k8:a:tf32") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n16k8:b:tf32") : !listsplat(llvm_i32_ty, 4), + + // mma tf32 -> s32 @ m16n16k8/m16n8k8 + !eq(gft,"m16n8k4:a:tf32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k4:b:tf32") : [llvm_i32_ty], + !eq(gft,"m16n8k8:a:tf32") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k8:b:tf32") : !listsplat(llvm_i32_ty, 2), + + !eq(gft,"m8n8k4:a:f64") : [llvm_double_ty], + !eq(gft,"m8n8k4:b:f64") : [llvm_double_ty], + !eq(gft,"m8n8k4:c:f64") : !listsplat(llvm_double_ty, 2), + !eq(gft,"m8n8k4:d:f64") : !listsplat(llvm_double_ty, 2), + + // wmma bf16 -> s32 @ m16n16k16/m8n32k16/m32n8k16 + !eq(gft,"m16n16k16:a:bf16") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n16k16:b:bf16") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m8n32k16:a:bf16") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n32k16:b:bf16") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m32n8k16:a:bf16") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m32n8k16:b:bf16") : !listsplat(llvm_i32_ty, 2), + + // mma bf16 -> s32 @ m16n8k16/m16n8k8 + !eq(gft,"m16n8k16:a:bf16") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k16:b:bf16") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k8:a:bf16") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k8:b:bf16") : [llvm_i32_ty], + + // wmma u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2), !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2), !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2), @@ -88,17 +131,65 @@ class WMMA_REGS { !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8), !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8), - // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1) - !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty], + // mma u8/s8 -> s32 @ m8n8k16/m16n8k16/m16n8k32 + !eq(gft,"m8n8k16:a:u8") : [llvm_i32_ty], + !eq(gft,"m8n8k16:a:s8") : [llvm_i32_ty], + !eq(gft,"m8n8k16:b:u8") : [llvm_i32_ty], + !eq(gft,"m8n8k16:b:s8") : [llvm_i32_ty], + !eq(gft,"m8n8k16:c:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k16:d:s32") : !listsplat(llvm_i32_ty, 2), + + !eq(gft,"m16n8k16:a:u8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k16:a:s8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k16:b:u8") : [llvm_i32_ty], + !eq(gft,"m16n8k16:b:s8") : [llvm_i32_ty], + !eq(gft,"m16n8k16:c:s32") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k16:d:s32") : !listsplat(llvm_i32_ty, 4), + + !eq(gft,"m16n8k32:a:u8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:a:s8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:b:u8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k32:b:s8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k32:c:s32") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:d:s32") : !listsplat(llvm_i32_ty, 4), + + // wmma/mma u4/s4 -> s32 @ m8n8k32 (u4/s4) !eq(gft,"m8n8k32:a:u4") : [llvm_i32_ty], !eq(gft,"m8n8k32:a:s4") : [llvm_i32_ty], - !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty], !eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty], !eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty], - !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2), - !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2), !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2), !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2), + + !eq(gft,"m16n8k32:a:u4") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k32:a:s4") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k32:b:u4") : [llvm_i32_ty], + !eq(gft,"m16n8k32:b:s4") : [llvm_i32_ty], + !eq(gft,"m16n8k32:c:s32") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k32:d:s32") : !listsplat(llvm_i32_ty, 4), + + !eq(gft,"m16n8k64:a:u4") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k64:a:s4") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k64:b:u4") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k64:b:s4") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k64:c:s32") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k64:d:s32") : !listsplat(llvm_i32_ty, 4), + + // wmma/mma b1 -> s32 @ m8n8k128(b1) + !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty], + !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty], + !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2), + + !eq(gft,"m16n8k128:a:b1") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k128:b:b1") : [llvm_i32_ty], + !eq(gft,"m16n8k128:c:s32") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k128:d:s32") : !listsplat(llvm_i32_ty, 4), + + !eq(gft,"m16n8k256:a:b1") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k256:b:b1") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n8k256:c:s32") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m16n8k256:d:s32") : !listsplat(llvm_i32_ty, 4), ); } @@ -125,35 +216,40 @@ class WMMA_NAME_LDST { class MMA_SIGNATURE { list id_frags = !cond( - // int and sub-int ops are identified by input type. - !eq(A.ptx_elt_type, "s8") : [A], - !eq(A.ptx_elt_type, "u8") : [A], - !eq(A.ptx_elt_type, "s4") : [A], - !eq(A.ptx_elt_type, "u4") : [A], - !eq(A.ptx_elt_type, "b1") : [A], - // the rest are FP ops identified by accumulator & result type. - true: [D, C] + // FP16 ops are identified by accumulator & result type. + !eq(A.ptx_elt_type, "f16") : [D, C], + // other ops are identified by input types. + !ne(A.ptx_elt_type, B.ptx_elt_type): [A, B], + true: [A] ); string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type)); } -class WMMA_NAME_MMA { +class WMMA_NAME { string signature = MMA_SIGNATURE.ret; - string llvm = !if( - !eq(A.geom, "m8n8k4"), - "llvm.nvvm.mma.m8n8k4" - # "." # ALayout - # "." # BLayout - # signature, - "llvm.nvvm.wmma." - # A.geom - # ".mma" - # "." # ALayout - # "." # BLayout - # signature - # !if(Satfinite, ".satfinite", "")); + string llvm = "llvm.nvvm.wmma." + # A.geom + # ".mma" + # "." # ALayout + # "." # BLayout + # !if(!ne(Rnd, ""), !strconcat(".", Rnd), "") + # signature + # !if(Satfinite, ".satfinite", ""); + + string record = !subst(".", "_", + !subst("llvm.", "int_", llvm)); +} +class MMA_NAME { + string signature = MMA_SIGNATURE.ret; + string llvm = "llvm.nvvm.mma." + # A.geom + # "." # ALayout + # "." # BLayout + # !if(Satfinite, ".satfinite", "") + # signature; string record = !subst(".", "_", !subst("llvm.", "int_", llvm)); } @@ -188,14 +284,18 @@ class MMA_LDST_OPS Geom, list Frags, list Types> { list ops = !foreach(x, ret, x.gft); } - - // Creates list of valid combinations of fragments. This is the master list that // drives generation of corresponding intrinsics and instructions. class NVVM_MMA_OPS { - list> fp_mma_ops = MMA_OPS< + list> tf32_wmma_ops = MMA_OPS< + ["m16n16k8"], + ["tf32"], [], ["f32"], []>.ret; + list> bf16_wmma_ops = MMA_OPS< + ["m16n16k16", "m32n8k16", "m8n32k16"], + ["bf16"], [], ["f32"], []>.ret; + list> f64_wmma_ops = MMA_OPS< ["m8n8k4"], - ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret; + ["f64"], [], ["f64"], []>.ret; list> fp_wmma_ops = MMA_OPS< ["m16n16k16", "m32n8k16", "m8n32k16"], ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret; @@ -208,16 +308,50 @@ class NVVM_MMA_OPS { list> bit_wmma_ops = MMA_OPS< ["m8n8k128"], ["b1"], [], ["s32"], []>.ret; + list> all_wmma_ops = !listconcat( + tf32_wmma_ops, bf16_wmma_ops, f64_wmma_ops, + fp_wmma_ops, int_wmma_ops, subint_wmma_ops, bit_wmma_ops); + + list> tf32_mma_ops = MMA_OPS< + ["m16n8k4", "m16n8k8"], + ["tf32"], [], ["f32"], []>.ret; + list> bf16_mma_ops = MMA_OPS< + ["m16n8k16", "m16n8k8"], + ["bf16"], [], ["f32"], []>.ret; + list> f64_mma_ops = MMA_OPS< + ["m8n8k4"], + ["f64"], [], ["f64"], []>.ret; + list> fp_mma_ops = MMA_OPS< + ["m8n8k4", "m16n8k8", "m16n8k16"], + ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret; + list> int_mma_ops = MMA_OPS< + ["m8n8k16", "m16n8k16", "m16n8k32"], + ["s8", "u8"], ["s8", "u8"], ["s32"], []>.ret; + list> subint_mma_ops = MMA_OPS< + ["m8n8k32", "m16n8k32", "m16n8k64"], + ["s4", "u4"], ["s4", "u4"], ["s32"], []>.ret; + list> bit_mma_ops = MMA_OPS< + ["m8n8k128", "m16n8k128", "m16n8k256"], + ["b1"], [], ["s32"], []>.ret; list> all_mma_ops = !listconcat( - fp_mma_ops, fp_wmma_ops, int_wmma_ops, - subint_wmma_ops, bit_wmma_ops); + tf32_mma_ops, bf16_mma_ops, f64_mma_ops, + fp_mma_ops, int_mma_ops, subint_mma_ops, bit_mma_ops); list ldst_ab_ops = MMA_LDST_OPS< ["m16n16k16", "m32n8k16", "m8n32k16"], - ["a", "b"], ["f16", "u8", "s8"]>.ret; + ["a", "b"], ["f16", "u8", "s8", "bf16"]>.ret; list ldst_cd_ops = MMA_LDST_OPS< ["m16n16k16", "m32n8k16", "m8n32k16"], ["c", "d"], ["f16", "f32", "s32"]>.ret; + list ldst_tf32_ab_ops = MMA_LDST_OPS< + ["m16n16k8"], + ["a", "b"], ["tf32"]>.ret; + list ldst_tf32_cd_ops = MMA_LDST_OPS< + ["m16n16k8"], + ["c", "d"], ["f32"]>.ret; + list ldst_f64_abcd_ops = MMA_LDST_OPS< + ["m8n8k4"], + ["a", "b", "c", "d"], ["f64"]>.ret; list ldst_subint_ab_ops = MMA_LDST_OPS< ["m8n8k32"], ["a", "b"], ["s4","u4"]>.ret; list ldst_bit_ab_ops = MMA_LDST_OPS< @@ -225,6 +359,9 @@ class NVVM_MMA_OPS { list ldst_subint_cd_ops = MMA_LDST_OPS< ["m8n8k32", "m8n8k128"], ["c", "d"], ["s32"]>.ret; list all_ldst_ops = !listconcat(ldst_ab_ops, ldst_cd_ops, + ldst_tf32_ab_ops, + ldst_tf32_cd_ops, + ldst_f64_abcd_ops, ldst_subint_ab_ops, ldst_bit_ab_ops, ldst_subint_cd_ops); @@ -235,69 +372,110 @@ class NVVM_MMA_OPS { def NVVM_MMA_OPS : NVVM_MMA_OPS; -// Returns true if this combination of layout/satf is supported; false otherwise. -// MMA ops must provide all parameters. Loads and stores -- only frags and layout_a. -// The class is used to prevent generation of records for the unsupported variants. + +// Returns true if this combination of fragment and layout for WMMA load/store +// ops is supported; false otherwise. +// E.g. +// if NVVM_WMMA_LDST_SUPPORTED<...>.ret then +// def : FOO<>; // The record will only be defined for supported ops. +// +class NVVM_WMMA_LDST_SUPPORTED { + string f = frag.frag; + string t = frag.ptx_elt_type; + + bit ret = !cond( + // Sub-int load and store requires A fragment to be of row layout and B + // fragments to be of column layout. + !and(!or(!eq(t, "b1"), + !eq(t, "u4"), + !eq(t, "s4")), + !or(!and(!eq(f, "a"), + !ne(layout, "row")), + !and(!eq(f, "b"), + !ne(layout, "col")))) : false, + true: true + ); +} + +// Returns true if this combination of layout/satf/rnd for WMMA ops is +// supported; false otherwise. +// E.g. +// if NVVM_WMMA_SUPPORTED<...>.ret then +// def : FOO<>; // The record will only be defined for supported ops. +// +class NVVM_WMMA_SUPPORTED frags, string layout_a, string layout_b, int satf, string rnd> { + // WMMA ops check both layouts. + string layout = layout_a # ":" # layout_b; + string t = frags[0].ptx_elt_type; + + bit ret = !cond( + // only f64 wmma functions support rnd options + // any non f64 type that uses a rnd value is invalid + !and(!ne(t, "f64"), !ne(rnd, "")) : false, + + // satf is only valid for select types + !and(!eq(satf, 1), + !ne(t, "s8"), + !ne(t, "u8"), + !ne(t, "s4"), + !ne(t, "u4"), + !ne(t, "f16")): false, + + // Sub-int wmma requires row/column layout + !and(!or(!eq(t, "s4"), + !eq(t, "u4"), + !eq(t, "b1")), + !ne(layout, "row:col")) : false, + true: true + ); +} + +// Returns true if this combination of layout/satf for MMA ops is supported; +// false otherwise. // E.g. // if NVVM_MMA_SUPPORTED<...>.ret then // def : FOO<>; // The record will only be defined for supported ops. // -class NVVM_MMA_SUPPORTED frags, string layout_a, string layout_b="-", int satf=-1> { +class NVVM_MMA_SUPPORTED frags, string layout_a, string layout_b, int satf> { // MMA ops check both layouts. - string mma = frags[0].ptx_elt_type - # ":" # layout_a - # ":" # layout_b; - // Load ops only need type/fragment/layout. - string ld = frags[0].ptx_elt_type - # ":" # frags[0].frag - # ":" # layout_a - ; - string ldf = frags[0].ptx_elt_type - # ":" # frags[0].frag - ; - string t = frags[0].ptx_elt_type; + string layout = layout_a # ":" # layout_b; + string a_type = frags[0].ptx_elt_type; + string b_type = frags[1].ptx_elt_type; + string c_type = frags[2].ptx_elt_type; + string d_type = frags[3].ptx_elt_type; + string geom = frags[0].geom; // gcd is a shortcut used to identify instructions that depend on - // geom+frag_c+frag_d. Not all instances of this class have all fragments - // specified. If there are not enough fragments, the tail evaluates to '?'. - string gcd = frags[0].geom - # ":" - # !if(!eq(!size(frags), 4), - frags[2].ptx_elt_type # frags[3].ptx_elt_type, - "?"); + // geom+frag_c+frag_d. + string gcd = geom # ":" # c_type # d_type; bit ret = !cond( - // Sub-int MMA only supports fixed A/B layout. - // b1 does not support .satf. - !eq(mma#":"#satf, "b1:row:col:0") : true, - // mma.m8n8k4 has no .satf modifier. - !and(!eq(frags[0].geom, "m8n8k4"), - !ne(satf, 0)): false, - - // mma.m8n8k4 has no C=f32 D=f16 variant. + + // Limit satf to valid types + !and(!eq(satf, 1), + !ne(a_type, "s8"), + !ne(a_type, "u8"), + !ne(a_type, "s4"), + !ne(a_type, "u4")): false, + + // m8n8k4 has no C=f32 D=f16 variant. !eq(gcd, "m8n8k4:f32f16"): false, - !eq(mma, "s4:row:col") : true, - !eq(mma, "u4:row:col") : true, - !eq(mma, "s4:row:col") : true, - !eq(mma, "u4:row:col") : true, - // Sub-int load/stores have fixed layout for A and B. - !and(!eq(layout_b, "-"), // It's a Load or Store op - !or(!eq(ld, "b1:a:row"), - !eq(ld, "b1:b:col"), - !eq(ldf, "b1:c"), - !eq(ldf, "b1:d"), - !eq(ld, "s4:a:row"), - !eq(ld, "s4:b:col"), - !eq(ldf, "s4:c"), - !eq(ldf, "s4:d"), - !eq(ld, "u4:a:row"), - !eq(ld, "u4:b:col"), - !eq(ldf, "u4:c"), - !eq(ldf, "u4:d"))) : true, - // All other sub-int ops are not supported. - !eq(t, "b1") : false, - !eq(t, "s4") : false, - !eq(t, "u4") : false, - // All other (non sub-int) are OK. + + // only m8n8k4 for f16 does not require row:col layout + !and(!ne(layout, "row:col"), + !or(!ne(geom, "m8n8k4"), + !ne(a_type, "f16"))) : false, + + // m16n8k8 requires A and B to be the same type and C and D to be the same + // type. + !and(!eq(geom, "m16n8k8"), + !or(!ne(a_type, b_type), + !ne(c_type, d_type))): false, + + // m16n8k8 requires C and D to be the same type. + !and(!eq(geom, "m16n8k8"), + !ne(c_type, d_type)): false, + + // All other are OK. true: true ); } @@ -4271,36 +4449,59 @@ class NVVM_WMMA_ST foreach layout = ["row", "col"] in { foreach stride = [0, 1] in { foreach frag = NVVM_MMA_OPS.all_ld_ops in - if NVVM_MMA_SUPPORTED<[frag], layout>.ret then + if NVVM_WMMA_LDST_SUPPORTED.ret then def WMMA_NAME_LDST<"load", frag, layout, stride>.record : NVVM_WMMA_LD; foreach frag = NVVM_MMA_OPS.all_st_ops in - if NVVM_MMA_SUPPORTED<[frag], layout>.ret then + if NVVM_WMMA_LDST_SUPPORTED.ret then def WMMA_NAME_LDST<"store", frag, layout, stride>.record : NVVM_WMMA_ST; } } // WMMA.MMA -class NVVM_WMMA_MMA : Intrinsic.llvm>; + WMMA_NAME.llvm>; + +foreach layout_a = ["row", "col"] in { + foreach layout_b = ["row", "col"] in { + foreach satf = [0, 1] in { + foreach rnd = ["", "rn", "rz", "rm", "rp"] in { + foreach op = NVVM_MMA_OPS.all_wmma_ops in { + if NVVM_WMMA_SUPPORTED.ret then { + def WMMA_NAME.record + : NVVM_WMMA_MMA; + } + } // op + } // rnd + } // satf + } // layout_b +} // layout_a + +// MMA +class NVVM_MMA + : Intrinsic.llvm>; foreach layout_a = ["row", "col"] in { foreach layout_b = ["row", "col"] in { foreach satf = [0, 1] in { foreach op = NVVM_MMA_OPS.all_mma_ops in { if NVVM_MMA_SUPPORTED.ret then { - def WMMA_NAME_MMA.record - : NVVM_WMMA_MMA; + def MMA_NAME.record + : NVVM_MMA; } - } + } // op } // satf } // layout_b } // layout_a diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index d431f20d066f6..d4842c953ce7a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3490,6 +3490,10 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride: case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride: case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row: + case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col: + case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_col_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row: + case Intrinsic::nvvm_wmma_m8n32k16_load_a_bf16_row_stride: case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col: case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride: case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride: @@ -3497,7 +3501,11 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row: case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride: case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride: - case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row: { + case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row: + case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col: + case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_col_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row: + case Intrinsic::nvvm_wmma_m32n8k16_load_b_bf16_row_stride: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::v2i32; Info.ptrVal = I.getArgOperand(0); @@ -3515,6 +3523,14 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride: case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride: case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row: + case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col: + case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_col_stride: + case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row: + case Intrinsic::nvvm_wmma_m16n16k16_load_a_bf16_row_stride: + case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col: + case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_col_stride: + case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row: + case Intrinsic::nvvm_wmma_m16n16k8_load_a_tf32_row_stride: case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col: case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride: @@ -3523,7 +3539,15 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row: case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride: case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride: - case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row: { + case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row: + case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col: + case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_col_stride: + case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row: + case Intrinsic::nvvm_wmma_m16n16k16_load_b_bf16_row_stride: + case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col: + case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride: + case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row: + case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::v4i32; Info.ptrVal = I.getArgOperand(0); @@ -3603,7 +3627,11 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col: case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row: case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride: - case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride: { + case Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride: + case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col: + case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row: + case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_col_stride: + case Intrinsic::nvvm_wmma_m16n16k8_load_c_f32_row_stride: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::v8f32; Info.ptrVal = I.getArgOperand(0); @@ -3613,6 +3641,16 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( return true; } + case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col: + case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_col_stride: + case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row: + case Intrinsic::nvvm_wmma_m32n8k16_load_a_bf16_row_stride: + + case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col: + case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_col_stride: + case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row: + case Intrinsic::nvvm_wmma_m8n32k16_load_b_bf16_row_stride: + case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col: case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride: case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row: @@ -3651,6 +3689,37 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( return true; } + case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col: + case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_col_stride: + case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row: + case Intrinsic::nvvm_wmma_m8n8k4_load_a_f64_row_stride: + + case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col: + case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_col_stride: + case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row: + case Intrinsic::nvvm_wmma_m8n8k4_load_b_f64_row_stride: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::f64; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.flags = MachineMemOperand::MOLoad; + Info.align = Align(8); + return true; + } + + case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col: + case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_col_stride: + case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row: + case Intrinsic::nvvm_wmma_m8n8k4_load_c_f64_row_stride: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::v2f64; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.flags = MachineMemOperand::MOLoad; + Info.align = Align(16); + return true; + } + case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col: case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row: case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride: @@ -3683,7 +3752,11 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col: case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row: case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride: - case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride: { + case Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride: + case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col: + case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row: + case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_col_stride: + case Intrinsic::nvvm_wmma_m16n16k8_store_d_f32_row_stride: { Info.opc = ISD::INTRINSIC_VOID; Info.memVT = MVT::v8f32; Info.ptrVal = I.getArgOperand(0); @@ -3731,6 +3804,19 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( return true; } + case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col: + case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_col_stride: + case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row: + case Intrinsic::nvvm_wmma_m8n8k4_store_d_f64_row_stride: { + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = MVT::v2f64; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.flags = MachineMemOperand::MOStore; + Info.align = Align(16); + return true; + } + case Intrinsic::nvvm_atomic_load_inc_32: case Intrinsic::nvvm_atomic_load_dec_32: diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 5622d5a6fdac5..ab93bf16d4919 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -144,6 +144,7 @@ def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">; def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">; def hasPTX64 : Predicate<"Subtarget->getPTXVersion() >= 64">; +def hasPTX65 : Predicate<"Subtarget->getPTXVersion() >= 65">; def hasPTX70 : Predicate<"Subtarget->getPTXVersion() >= 70">; def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 1aaa9f0dd127d..798538410b104 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1943,21 +1943,21 @@ multiclass VLDU_G_ELE_V2 { !strconcat("ldu.global.", TyStr), []>; } -multiclass VLDU_G_ELE_V4 { +multiclass VLDU_G_ELE_V4 { def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int32Regs:$src), + regclass:$dst4), (ins Int32Regs:$src), !strconcat("ldu.global.", TyStr), []>; def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int64Regs:$src), + regclass:$dst4), (ins Int64Regs:$src), !strconcat("ldu.global.", TyStr), []>; def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri:$src), + regclass:$dst4), (ins MEMri:$src), !strconcat("ldu.global.", TyStr), []>; def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri64:$src), + regclass:$dst4), (ins MEMri64:$src), !strconcat("ldu.global.", TyStr), []>; def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins imemAny:$src), + regclass:$dst4), (ins imemAny:$src), !strconcat("ldu.global.", TyStr), []>; } @@ -1997,7 +1997,7 @@ defm INT_PTX_LDU_G_v4f32_ELE //----------------------------------- -// Support for ldg on sm_35 or later +// Support for ldg on sm_35 or later //----------------------------------- // Don't annotate ld.global.nc as mayLoad, because these loads go through the @@ -2045,7 +2045,7 @@ defm INT_PTX_LDG_GLOBAL_p64 // vector -// Elementized vector ldg +// Elementized vector ldg multiclass VLDG_G_ELE_V2 { def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins Int32Regs:$src), @@ -2064,21 +2064,21 @@ multiclass VLDG_G_ELE_V2 { !strconcat("ld.global.nc.", TyStr), []>; } -multiclass VLDG_G_ELE_V4 { +multiclass VLDG_G_ELE_V4 { def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int32Regs:$src), + regclass:$dst4), (ins Int32Regs:$src), !strconcat("ld.global.nc.", TyStr), []>; def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins Int64Regs:$src), + regclass:$dst4), (ins Int64Regs:$src), !strconcat("ld.global.nc.", TyStr), []>; def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri:$src), + regclass:$dst4), (ins MEMri:$src), !strconcat("ld.global.nc.", TyStr), []>; def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins MEMri64:$src), + regclass:$dst4), (ins MEMri64:$src), !strconcat("ld.global.nc.", TyStr), []>; def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), (ins imemAny:$src), + regclass:$dst4), (ins imemAny:$src), !strconcat("ld.global.nc.", TyStr), []>; } @@ -7568,12 +7568,15 @@ def INT_PTX_SREG_WARPSIZE : // In addition to target-independent fields provided by WMMA_REGS, it adds // the fields commonly used to implement specific PTX instruction -- register // types and names, constraints, parts of assembly, etc. -class WMMA_REGINFO +class WMMA_REGINFO : WMMA_REGS { // NVPTX register types used to carry fragment data. NVPTXRegClass regclass = !cond( !eq(ptx_elt_type, "f16") : Float16x2Regs, !eq(ptx_elt_type, "f32") : Float32Regs, + !eq(ptx_elt_type, "f64") : Float64Regs, + !eq(ptx_elt_type, "bf16") : Int32Regs, + !eq(ptx_elt_type, "tf32") : Int32Regs, !eq(ptx_elt_type, "s32") : Int32Regs, !eq(ptx_elt_type, "s8") : Int32Regs, !eq(ptx_elt_type, "u8") : Int32Regs, @@ -7602,6 +7605,9 @@ class WMMA_REGINFO !or(!eq(ptx_elt_type, "f16"), !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60], + !and(!eq(geom,"m8n8k4"), + !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70], + // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 !and(!or(!eq(geom, "m8n32k16"), !eq(geom, "m32n8k16")), @@ -7616,11 +7622,46 @@ class WMMA_REGINFO !eq(ptx_elt_type, "s8"), !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63], - // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1) - !or(!eq(geom,"m8n8k128"), - !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63], + !and(!or(!eq(geom,"m16n16k16"), + !eq(geom,"m8n32k16"), + !eq(geom,"m32n8k16")), + !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70], + + !and(!eq(geom,"m16n16k8"), + !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70], + + !and(!eq(geom,"m16n16k8"), + !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70], + + // b1 -> s32 @ m8n8k128(b1) + !and(!ne(op,"mma"), + !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63], + + // u4/s4 -> s32 @ m8n8k32 (u4/s4) + !and(!ne(op,"mma"), + !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63], + + !or(!eq(geom,"m16n8k8"), + !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65], - !eq(geom, "m8n8k4") : [hasSM70, hasPTX64]); + !and(!ne(ptx_elt_type,"f64"), + !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64], + + // mma m8n8k32 requires higher PTX version + !and(!eq(op,"mma"), + !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65], + + !and(!eq(ptx_elt_type,"f64"), + !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70], + + !and(!eq(op,"mma"), + !or(!eq(geom, "m16n8k16"), + !eq(geom, "m16n8k4"), + !eq(geom, "m16n8k32"), + !eq(geom, "m16n8k64"), + !eq(geom, "m8n8k128"), + !eq(geom, "m16n8k128"), + !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70]); // template DAGs for instruction inputs/output. dag Outs = !dag(outs, ptx_regs, reg_names); @@ -7744,11 +7785,11 @@ defset list MMA_LDSTs = { foreach space = [".global", ".shared", ""] in { foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { foreach frag = NVVM_MMA_OPS.all_ld_ops in - if NVVM_MMA_SUPPORTED<[frag], layout>.ret then - def : WMMA_LOAD, layout, space, stride, addr>; + if NVVM_WMMA_LDST_SUPPORTED.ret then + def : WMMA_LOAD, layout, space, stride, addr>; foreach frag = NVVM_MMA_OPS.all_st_ops in - if NVVM_MMA_SUPPORTED<[frag], layout>.ret then - def : WMMA_STORE_D, layout, space, stride, addr>; + if NVVM_WMMA_LDST_SUPPORTED.ret then + def : WMMA_STORE_D, layout, space, stride, addr>; } // addr } // space } // stride @@ -7758,46 +7799,84 @@ defset list MMA_LDSTs = { // WMMA.MMA class WMMA_MMA - : WMMA_INSTR.record, - [FragA.Ins, FragB.Ins, FragC.Ins]>, + string ALayout, string BLayout, int Satfinite, string rnd> + : WMMA_INSTR.record, + [FragA.Ins, FragB.Ins, FragC.Ins]>, // Requires does not seem to have effect on Instruction w/o Patterns. // We set it here anyways and propagate to the Pat<> we construct below. Requires { let OutOperandList = FragD.Outs; let InOperandList = !con(Args, (ins MmaCode:$ptx)); string TypeList = !cond( - !eq(FragD.geom, "m8n8k4") : "." # FragD.ptx_elt_type - # ".f16.f16." - # FragC.ptx_elt_type, - !eq(FragD.ptx_elt_type, "s32") : ".s32" - # "." # FragA.ptx_elt_type - # "." # FragB.ptx_elt_type - # ".s32", - 1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type, + !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type + # "." # FragC.ptx_elt_type, + 1: "." # FragD.ptx_elt_type + # "." # FragA.ptx_elt_type + # "." # FragB.ptx_elt_type + # "." # FragC.ptx_elt_type, ); - let AsmString = !if(!eq(FragA.geom, "m8n8k4"), - "mma.sync.aligned.m8n8k4" - # "." # ALayout - # "." # BLayout - # TypeList # "\n\t\t" - # FragD.regstring # ",\n\t\t" - # FragA.regstring # ",\n\t\t" - # FragB.regstring # ",\n\t\t" - # FragC.regstring # ";", - "wmma.mma" - # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "") - # ".sync" - # "${ptx:aligned}" - # "." # ALayout - # "." # BLayout - # "." # FragA.geom - # TypeList - # !if(Satfinite, ".satfinite", "") # "\n\t\t" - # FragD.regstring # ",\n\t\t" - # FragA.regstring # ",\n\t\t" - # FragB.regstring # ",\n\t\t" - # FragC.regstring # ";"); + let AsmString = "wmma.mma" + # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "") + # ".sync" + # "${ptx:aligned}" + # "." # ALayout + # "." # BLayout + # "." # FragA.geom + # !if(!ne(rnd, ""), !strconcat(".", rnd), "") + # TypeList + # !if(Satfinite, ".satfinite", "") # "\n\t\t" + # FragD.regstring # ",\n\t\t" + # FragA.regstring # ",\n\t\t" + # FragB.regstring # ",\n\t\t" + # FragC.regstring # ";"; +} + +defset list WMMAs = { + foreach layout_a = ["row", "col"] in { + foreach layout_b = ["row", "col"] in { + foreach satf = [0, 1] in { + foreach rnd = ["", "rn", "rz", "rm", "rp"] in { + foreach op = NVVM_MMA_OPS.all_wmma_ops in { + if NVVM_WMMA_SUPPORTED.ret then { + def : WMMA_MMA, + WMMA_REGINFO, + WMMA_REGINFO, + WMMA_REGINFO, + layout_a, layout_b, satf, rnd>; + } + } // op + } // rnd + } // satf + } // layout_b + } // layout_a +} // defset + +// MMA +class MMA + : WMMA_INSTR.record, + [FragA.Ins, FragB.Ins, FragC.Ins]>, + // Requires does not seem to have effect on Instruction w/o Patterns. + // We set it here anyways and propagate to the Pat<> we construct below. + Requires { + let OutOperandList = FragD.Outs; + let InOperandList = !con(Args, (ins MmaCode:$ptx)); + string TypeList = "." # FragD.ptx_elt_type + # "." # FragA.ptx_elt_type + # "." # FragB.ptx_elt_type + # "." # FragC.ptx_elt_type; + let AsmString = "mma.sync.aligned." + # FragA.geom + # "." # ALayout + # "." # BLayout + # !if(Satfinite, ".satfinite", "") + # TypeList + # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "") # "\n\t\t" + # FragD.regstring # ",\n\t\t" + # FragA.regstring # ",\n\t\t" + # FragB.regstring # ",\n\t\t" + # FragC.regstring # ";"; } defset list MMAs = { @@ -7806,11 +7885,11 @@ defset list MMAs = { foreach satf = [0, 1] in { foreach op = NVVM_MMA_OPS.all_mma_ops in { if NVVM_MMA_SUPPORTED.ret then { - def : WMMA_MMA, - WMMA_REGINFO, - WMMA_REGINFO, - WMMA_REGINFO, - layout_a, layout_b, satf>; + def : MMA, + WMMA_REGINFO, + WMMA_REGINFO, + WMMA_REGINFO, + layout_a, layout_b, satf>; } } // op } // satf @@ -7822,12 +7901,12 @@ defset list MMAs = { // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with // the instruction record. -class WMMA_PAT +class MMA_PAT : Pat, Requires; // Build intrinsic->instruction patterns for all MMA instructions. -foreach mma = !listconcat(MMAs, MMA_LDSTs) in - def : WMMA_PAT; +foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs) in + def : MMA_PAT; diff --git a/llvm/test/CodeGen/NVPTX/lit.local.cfg b/llvm/test/CodeGen/NVPTX/lit.local.cfg index 2cb98eb371b21..8354800109ebd 100644 --- a/llvm/test/CodeGen/NVPTX/lit.local.cfg +++ b/llvm/test/CodeGen/NVPTX/lit.local.cfg @@ -1,2 +1,3 @@ if not 'NVPTX' in config.root.targets: config.unsupported = True +config.suffixes.add('.py') diff --git a/llvm/test/CodeGen/NVPTX/wmma.py b/llvm/test/CodeGen/NVPTX/wmma.py index 8c140c4d93108..8a808bd377eb9 100644 --- a/llvm/test/CodeGen/NVPTX/wmma.py +++ b/llvm/test/CodeGen/NVPTX/wmma.py @@ -6,7 +6,7 @@ # RUN: FileCheck %t-ptx60-sm_70.ll < %t-ptx60-sm_70.ll \ # RUN: --check-prefixes=INTRINSICS,M16N16 # RUN: FileCheck %t-ptx60-sm_70.ll < %t-ptx60-sm_70.ll \ -# RUN: --check-prefixes=INTRINSICS,NOEXTGEOM,NOINT,NOSUBINT,NOMMA +# RUN: --check-prefixes=INTRINSICS,NOEXTGEOM,NOINT,NOSUBINT,NOMMA,NODOUBLE,NOALTFLOAT # RUN: llc < %t-ptx60-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 \ # RUN: | FileCheck %t-ptx60-sm_70.ll @@ -15,7 +15,7 @@ # RUN: FileCheck %t-ptx61-sm_70.ll < %t-ptx61-sm_70.ll \ # RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM # RUN: FileCheck %t-ptx61-sm_70.ll < %t-ptx61-sm_70.ll \ -# RUN: --check-prefixes=INTRINSICS,NOINT,NOSUBINT,NOMMA +# RUN: --check-prefixes=INTRINSICS,NOINT,NOSUBINT,NOMMA,NODOUBLE,NOALTFLOAT # RUN: llc < %t-ptx61-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx61 \ # RUN: | FileCheck %t-ptx61-sm_70.ll @@ -24,7 +24,7 @@ # RUN: FileCheck %t-ptx63-sm_72.ll < %t-ptx63-sm_72.ll \ # RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM,INT # RUN: FileCheck %t-ptx63-sm_72.ll < %t-ptx63-sm_72.ll \ -# RUN: --check-prefixes=INTRINSICS,NOSUBINT,NOMMA +# RUN: --check-prefixes=INTRINSICS,NOSUBINT,NOMMA,NODOUBLE,NOALTFLOAT # RUN: llc < %t-ptx63-sm_72.ll -march=nvptx64 -mcpu=sm_72 -mattr=+ptx63 \ # RUN: | FileCheck %t-ptx63-sm_72.ll @@ -33,7 +33,7 @@ # RUN: FileCheck %t-ptx63-sm_75.ll < %t-ptx63-sm_75.ll \ # RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM,INT,SUBINT # RUN: FileCheck %t-ptx63-sm_75.ll < %t-ptx63-sm_75.ll \ -# RUN: --check-prefixes=INTRINSICS,NOMMA +# RUN: --check-prefixes=INTRINSICS,NOMMA,NODOUBLE,NOALTFLOAT # RUN: llc < %t-ptx63-sm_75.ll -march=nvptx64 -mcpu=sm_75 -mattr=+ptx63 \ # RUN: | FileCheck %t-ptx63-sm_75.ll @@ -42,10 +42,28 @@ # RUN: FileCheck %t-ptx64-sm_70.ll < %t-ptx64-sm_70.ll \ # RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM,MMA # RUN: FileCheck %t-ptx64-sm_70.ll < %t-ptx64-sm_70.ll \ -# RUN: --check-prefixes=INTRINSICS,NOINT,NOSUBINT +# RUN: --check-prefixes=INTRINSICS,NOINT,NOSUBINT,NODOUBLE,NOALTFLOAT # RUN: llc < %t-ptx64-sm_70.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx64 \ # RUN: | FileCheck %t-ptx64-sm_70.ll +# Check all variants of instructions supported by PTX65 on SM75+ +# RUN: python %s --ptx=65 --gpu-arch=75 > %t-ptx65-sm_75.ll +# RUN: FileCheck %t-ptx65-sm_75.ll < %t-ptx65-sm_75.ll \ +# RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM,INT,SUBINT,MMA,PTX65MMA +# RUN: FileCheck %t-ptx65-sm_75.ll < %t-ptx65-sm_75.ll \ +# RUN: --check-prefixes=INTRINSICS +# RUN: llc < %t-ptx65-sm_75.ll -march=nvptx64 -mcpu=sm_75 -mattr=+ptx65 \ +# RUN: | FileCheck %t-ptx65-sm_75.ll + +# Check all variants of instructions supported by PTX70 on SM80+ +# RUN: python %s --ptx=70 --gpu-arch=80 > %t-ptx70-sm_80.ll +# RUN: FileCheck %t-ptx70-sm_80.ll < %t-ptx70-sm_80.ll \ +# RUN: --check-prefixes=INTRINSICS,M16N16,EXTGEOM,INT,SUBINT,MMA,ALTFLOAT,DOUBLE,PTX65MMA,PTX70MMA +# RUN: FileCheck %t-ptx70-sm_80.ll < %t-ptx70-sm_80.ll \ +# RUN: --check-prefixes=INTRINSICS +# RUN: llc < %t-ptx70-sm_80.ll -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 \ +# RUN: | FileCheck %t-ptx70-sm_80.ll + from __future__ import print_function import argparse @@ -56,19 +74,23 @@ class MMAType: def __init__(self, ptx_type): self.ptx_type = ptx_type self.llvm_type = { - "f16" : "<2 x half>", - "f32" : "float", - "s32" : "i32", - "s8" : "i32", - "u8" : "i32", - "s4" : "i32", - "u4" : "i32", - "b1" : "i32", + "f16" : "<2 x half>", + "f32" : "float", + "f64" : "double", + "s32" : "i32", + "s8" : "i32", + "u8" : "i32", + "s4" : "i32", + "u4" : "i32", + "b1" : "i32", + "bf16" : "i32", + "tf32" : "i32", }[ptx_type]; self.ptx_reg_pattern = { "f16" : "%hh[0-9]+", "f32" : "%f[0-9]+", + "f64" : "%fd[0-9]+", }.get(ptx_type, "%r[0-9]+") def __repr__(self): @@ -78,16 +100,8 @@ class MMAFrag: def __init__(self, geom, frag, ptx_elt_type): self.geom = geom self.frag = frag - self.is_mma = True if geom == "m8n8k4" else False; self.mma_type = MMAType(ptx_elt_type); self.nregs = { - "a:f16" : 2 if self.is_mma else 8, - "b:f16" : 2 if self.is_mma else 8, - "c:f16" : 4, - "d:f16" : 4, - "c:f32" : 8, - "d:f32" : 8, - }.get("%s:%s" % (frag, ptx_elt_type), { # u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 "m16n16k16:a:u8" : 2, "m16n16k16:a:s8" : 2, @@ -110,18 +124,123 @@ def __init__(self, geom, frag, ptx_elt_type): "m32n8k16:c:s32" : 8, "m32n8k16:d:s32" : 8, - # u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1) - "m8n8k128:a:b1" : 1, + "m8n8k16:a:u8": 1, + "m8n8k16:a:s8": 1, + "m8n8k16:b:u8": 1, + "m8n8k16:b:s8": 1, + "m8n8k16:c:s32": 2, + "m8n8k16:d:s32": 2, + + "m16n8k16:a:u8": 2, + "m16n8k16:a:s8": 2, + "m16n8k16:b:u8": 1, + "m16n8k16:b:s8": 1, + "m16n8k16:c:s32": 4, + "m16n8k16:d:s32": 4, + + "m16n8k32:a:u8": 4, + "m16n8k32:a:s8": 4, + "m16n8k32:b:u8": 2, + "m16n8k32:b:s8": 2, + "m16n8k32:c:s32": 4, + "m16n8k32:d:s32": 4, + + # u4/s4 -> s32 @ m8n8k32 (u4/s4) "m8n8k32:a:u4" : 1, "m8n8k32:a:s4" : 1, - "m8n8k128:b:b1" : 1, "m8n8k32:b:u4" : 1, "m8n8k32:b:s4" : 1, - "m8n8k128:c:s32" : 2, - "m8n8k128:d:s32" : 2, "m8n8k32:c:s32" : 2, "m8n8k32:d:s32" : 2, - }.get("%s:%s:%s" % (geom, frag, ptx_elt_type), None)); + + "m16n8k32:a:u4" : 2, + "m16n8k32:a:s4" : 2, + "m16n8k32:b:u4" : 1, + "m16n8k32:b:s4" : 1, + "m16n8k32:c:s32" : 4, + "m16n8k32:d:s32" : 4, + + "m16n8k64:a:u4" : 4, + "m16n8k64:a:s4" : 4, + "m16n8k64:b:u4" : 2, + "m16n8k64:b:s4" : 2, + "m16n8k64:c:s32" : 4, + "m16n8k64:d:s32" : 4, + + # b1 -> s32 @ m8n8k128(b1) + "m8n8k128:a:b1" : 1, + "m8n8k128:b:b1" : 1, + "m8n8k128:c:s32" : 2, + "m8n8k128:d:s32" : 2, + + "m16n8k128:a:b1" : 2, + "m16n8k128:b:b1" : 1, + "m16n8k128:c:s32" : 4, + "m16n8k128:d:s32" : 4, + + "m16n8k256:a:b1" : 4, + "m16n8k256:b:b1" : 2, + "m16n8k256:c:s32" : 4, + "m16n8k256:d:s32" : 4, + + # bf16 -> s32 @ m16n16k16/m8n32k16/m32n8k16 + "m16n16k16:a:bf16" : 4, + "m16n16k16:b:bf16" : 4, + "m8n32k16:a:bf16" : 2, + "m8n32k16:b:bf16" : 8, + "m32n8k16:a:bf16" : 8, + "m32n8k16:b:bf16" : 2, + + "m16n8k16:a:bf16" : 4, + "m16n8k16:b:bf16" : 2, + "m16n8k16:c:f32" : 4, + "m16n8k16:d:f32" : 4, + "m16n8k8:a:bf16" : 2, + "m16n8k8:b:bf16" : 1, + "m16n8k8:c:f32" : 4, + "m16n8k8:d:f32" : 4, + + "m8n8k4:a:f64" : 1, + "m8n8k4:b:f64" : 1, + "m8n8k4:c:f64" : 2, + "m8n8k4:d:f64" : 2, + + # tf32 -> s32 @ m16n16k8 + "m16n16k8:a:tf32" : 4, + "m16n16k8:b:tf32" : 4, + + "m16n8k4:a:tf32" : 2, + "m16n8k4:b:tf32" : 1, + "m16n8k4:c:f32" : 4, + "m16n8k4:d:f32" : 4, + "m16n8k8:a:tf32" : 4, + "m16n8k8:b:tf32" : 2, + "m16n8k8:c:f32" : 4, + "m16n8k8:d:f32" : 4, + + "m8n8k4:a:f16": 2, + "m8n8k4:b:f16": 2, + "m16n8k8:a:f16": 2, + "m16n8k8:b:f16": 1, + "m16n8k8:c:f16": 2, + "m16n8k8:d:f16": 2, + "m16n8k8:c:f32": 4, + "m16n8k8:d:f32": 4, + "m16n8k16:a:f16": 4, + "m16n8k16:b:f16": 2, + "m16n8k16:c:f16": 2, + "m16n8k16:d:f16": 2, + "m16n8k16:c:f32": 4, + "m16n8k16:d:f32": 4, + }.get("%s:%s:%s" % (geom, frag, ptx_elt_type), { + # All other FP shape/fragment/type combinations have the same size + "a:f16" : 8, + "b:f16" : 8, + "c:f16" : 4, + "d:f16" : 4, + "c:f32" : 8, + "d:f32" : 8, + }.get("%s:%s" % (frag, ptx_elt_type), None)) assert(self.nregs); def __repr__(self): @@ -153,9 +272,13 @@ def make_ldst_ops(geoms, frags, types): return [MMAFrag(geom, frag, ptx_type) for (geom, frag, ptx_type) in product(geoms, frags, types)] -def get_mma_ops(): - return (make_mma_ops(["m8n8k4"], - ["f16"], [], ["f16", "f32"], ["f16", "f32"]) + +def get_wmma_ops(): + return (make_mma_ops(["m16n16k8"], + ["tf32"], [], ["f32"], []) + + make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], + ["bf16"], [], ["f32"], []) + + make_mma_ops(["m8n8k4"], + ["f64"], [], ["f64"], []) + make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], ["f16"], [], ["f16", "f32"], ["f16", "f32"]) + make_mma_ops(["m16n16k16", "m32n8k16", "m8n32k16"], @@ -164,20 +287,38 @@ def get_mma_ops(): ["s4", "u4"], [], ["s32"], []) + make_mma_ops(["m8n8k128"], ["b1"], [], ["s32"], [])) + +def get_mma_ops(): + return (make_mma_ops(["m8n8k4"], + ["f64"], [], ["f64"], []) + + make_mma_ops(["m16n8k4", "m16n8k8"], + ["tf32"], [], ["f32"], []) + + make_mma_ops(["m16n8k16", "m16n8k8"], + ["bf16"], [], ["f32"], []) + + make_mma_ops(["m8n8k4", "m16n8k8", "m16n8k16"], + ["f16"], [], ["f16", "f32"], ["f16", "f32"]) + + make_mma_ops(["m8n8k16", "m16n8k16", "m16n8k32"], + ["s8", "u8"], ["s8", "u8"], ["s32"], []) + + make_mma_ops(["m8n8k32", "m16n8k32", "m16n8k64"], + ["s4", "u4"], ["s4", "u4"], ["s32"], []) + + make_mma_ops(["m8n8k128", "m16n8k128", "m16n8k256"], + ["b1"], [], ["s32"], [])) + def get_ldst_ops(kind): ldst_ops = (make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"], - ["a", "b"], ["f16", "u8", "s8"]) + + ["a", "b"], ["f16", "u8", "s8", "bf16"]) + make_ldst_ops(["m16n16k16", "m32n8k16", "m8n32k16"], ["c", "d"], ["f16", "f32", "s32"]) + make_ldst_ops(["m8n8k32"], ["a", "b"], ["s4","u4"]) + make_ldst_ops(["m8n8k128"], ["a", "b"], ["b1"]) + - make_ldst_ops(["m8n8k32", "m8n8k128"], ["c", "d"], ["s32"])) + make_ldst_ops(["m8n8k32", "m8n8k128"], ["c", "d"], ["s32"]) + + make_ldst_ops(["m8n8k4"], ["a", "b", "c", "d"], ["f64"]) + + make_ldst_ops(["m16n16k8"], ["a", "b"], ["tf32"]) + + make_ldst_ops(["m16n16k8"], ["c", "d"], ["f32"])) return [ x for x in ldst_ops if (x.frag == "d") == (kind == "store")] -def is_geom_supported(geom): +def is_wmma_geom_supported(geom): # geometries for FP and ints. - if geom == "m8n8k4": - return ptx_version >= 64 if geom in ["m8n32k16", "m32n8k16"]: return ptx_version >= 61 # geometries for sub-ints. @@ -185,6 +326,21 @@ def is_geom_supported(geom): return ptx_version >= 63 and gpu_arch >= 75 if geom == "m16n16k16": return ptx_version >= 60 + if geom == "m16n8k8": + return ptx_version >= 65 + if geom in ["m16n16k8", "m8n8k4"]: + return ptx_version >= 70 + assert(False) # Unexpected geometry. + +def is_mma_geom_supported(geom): + # geometries for FP and ints. + if geom == "m8n8k4": + return ptx_version >= 64 + if geom in ["m16n8k8", "m8n8k16", "m8n8k32"]: + return ptx_version >= 65 + if geom in ["m16n8k16", "m16n8k4", "m16n8k32", "m16n8k64", "m8n8k128", + "m16n8k128", "m16n8k256"]: + return ptx_version >= 70 assert(False) # Unexpected geometry. def is_type_supported(ptx_type): @@ -192,30 +348,63 @@ def is_type_supported(ptx_type): return ptx_version >= 63 and gpu_arch >= 72 if ptx_type in ["s4", "u4", "b1"]: return ptx_version >= 63 and gpu_arch >= 75 + if ptx_type in ["bf16", "tf32", "f64"]: + return ptx_version >= 70 return ptx_version >= 60 and gpu_arch >= 70 +def is_wmma_variant_supported(op, layout_a, layout_b, rnd, satf): + if not (is_type_supported(op.a.mma_type.ptx_type) + and is_wmma_geom_supported(op.a.geom)): + return False + + # rnd is only supported for FP64 WMMA + if rnd and op.a.mma_type.ptx_type != "f64": + return False + + if satf: + # satfinite for floating points was removed in PTX 6.5 + if op.a.mma_type.ptx_type == "f16" and ptx_version >= 65: + return False + if not op.a.mma_type.ptx_type in ["f16", "s8", "u8", "s4", "u4"]: + return False + + # sub-integer require row/col layout. + if op.a.mma_type.ptx_type in ["s4", "u4", "b1"]: + return layout_a == "row" and layout_b == "col" + return True def is_mma_variant_supported(op, layout_a, layout_b, satf): if not (is_type_supported(op.a.mma_type.ptx_type) - and is_geom_supported(op.a.geom)): + and is_mma_geom_supported(op.a.geom)): + return False + + if satf and not op.a.mma_type.ptx_type in ["s8", "u8", "s4", "u4"]: + return False + + # If the type of C is f32 then so must the type of D + if (op.a.geom == "m8n8k4" and op.c.mma_type.ptx_type == "f32" + and op.d.mma_type.ptx_type != "f32"): return False - if op.a.geom == "m8n8k4": - if satf: + + # A and B type must be the same. C and D type must be the same + if (op.a.geom == "m16n8k8" + and (op.a.mma_type.ptx_type != op.b.mma_type.ptx_type + or op.c.mma_type.ptx_type != op.d.mma_type.ptx_type)): return False - if op.c.mma_type.ptx_type == "f32": - # If C is f32, D must be, too. - return op.d.mma_type.ptx_type == "f32" - # sub-integer require row/col layout, and no satf. - if op.a.mma_type.ptx_type in ["s4", "u4", "b1"]: - if op.a.mma_type.ptx_type == "b1" and satf: + # C and D type must be the same + if (op.a.geom == "m16n8k16" + and op.c.mma_type.ptx_type != op.d.mma_type.ptx_type): return False + + # Require row/col layout for all MMA except m8n8k4 on FP16 + if not (op.a.geom == "m8n8k4" and op.a.mma_type.ptx_type == "f16"): return layout_a == "row" and layout_b == "col" return True def is_ldst_variant_supported(frag, layout): if not (is_type_supported(frag.mma_type.ptx_type) - and is_geom_supported(frag.geom)): + and is_wmma_geom_supported(frag.geom)): return False if frag.mma_type.ptx_type in ["s4", "u4", "b1"]: # sub-integer require sm_75 and ptx63, row/col layout for a/b. @@ -396,24 +585,37 @@ def gen_wmma_store_tests(): return generated_items def mma_signature(op): - if op.a.mma_type.ptx_type in ["s8", "u8", "s4", "u4", "b1"]: - # int and sub-int ops are identified by input type. - return op.a.mma_type.ptx_type - else: - # the rest are FP ops identified by accumulator & result type. + if op.a.mma_type.ptx_type == "f16": + # FP16 ops identified by accumulator & result type. return "%s.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type) + elif op.a.mma_type.ptx_type != op.b.mma_type.ptx_type: + # other ops are identified by input types. + return "%s.%s" % (op.a.mma_type.ptx_type, op.b.mma_type.ptx_type) + else: + # if input types are the same, it only appears once. + return op.a.mma_type.ptx_type def mma_ptx_signature(op): - if op.a.mma_type.ptx_type in ["s8", "u8", "s4", "u4", "b1"]: - # int and sub-int instructions encode all four types as D.A.B.C - return ".".join(x.mma_type.ptx_type for x in (op.d, op.a, op.b, op.c)) - if op.a.geom == "m8n8k4": - return "%s.f16.f16.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type) + # Encode all four types as D.A.B.C + return ".".join(x.mma_type.ptx_type for x in (op.d, op.a, op.b, op.c)) + +def wmma_signature(op): + if op.a.mma_type.ptx_type == "f16": + # FP16 ops identified by accumulator & result type. + return "%s.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type) else: - # the rest are FP instructions use D.C + # other ops are identified by input type. + return op.a.mma_type.ptx_type + +def wmma_ptx_signature(op): + if op.a.mma_type.ptx_type == "f16": + # FP16 instructions use D.C return "%s.%s" % (op.d.mma_type.ptx_type, op.c.mma_type.ptx_type) + else: + # other instructions encode all four types as D.A.B.C + return ".".join(x.mma_type.ptx_type for x in (op.d, op.a, op.b, op.c)) -def gen_wmma_mma_tests(): +def common_mma_test_gen(params, op, intrinsic_template, instruction_template): mma_template = """ declare ${ret_ty} @${intrinsic}( ${args}); @@ -431,10 +633,61 @@ def gen_wmma_mma_tests(): ret ${ret_ty} %r; } """ - wmma_intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}.${intrinsic_signature}${satf}" - wmma_instruction_template = "wmma.mma${mma_variant}.sync${aligned}.${alayout}.${blayout}.${geom}.${ptx_signature}${satf}" - mma_intrinsic_template = "llvm.nvvm.mma.${geom}.${alayout}.${blayout}.${intrinsic_signature}" - mma_instruction_template = "mma.sync${aligned}.${geom}.${alayout}.${blayout}.${ptx_signature}" + + test_params = params + test_params["intrinsic"] = Template(intrinsic_template).substitute(params) + test_params["function"] = test_params["intrinsic"].replace(".", "_") + test_params["instruction"] = Template(instruction_template).substitute(params) + test_params["ret_ty"] = make_wmma_ld_ret_ty(op.d) + test_params["check_a"] = check_pattern(op.a) + test_params["check_b"] = check_pattern(op.b) + test_params["check_c"] = check_pattern(op.c) + test_params["check_d"] = check_pattern(op.d) + args = ",\n ".join(make_wmma_slice_args(frag) + for frag in (op.a, op.b, op.c)) + test_params["args"] = args + print(Template(mma_template).substitute(test_params)) + return (test_params["intrinsic"], test_params["instruction"]) + +def gen_wmma_mma_tests(): + wmma_intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}${rnd}.${intrinsic_signature}${satf}" + wmma_instruction_template = "wmma.mma${mma_variant}.sync${aligned}.${alayout}.${blayout}.${geom}${rnd}.${ptx_signature}${satf}" + + generated_items=[] + + for op, alayout, blayout, rnd, satf in product( + get_wmma_ops(), + ["row","col"], + ["row","col"], + [".rn", ".rz", ".rm", ".rp", ""], + [".satfinite", ""]): + + if not is_wmma_variant_supported(op, alayout, blayout, rnd, satf): + continue + + params = { + "aligned" : ".aligned" if ptx_version >= 63 else "", + "alayout" : alayout, + "blayout" : blayout, + "intrinsic_signature" : wmma_signature(op), + "ptx_signature" : wmma_ptx_signature(op), + "satf" : satf, + "rnd" : rnd, + "geom" : op.a.geom, + "mma_variant" : ".xor.popc" if op.a.mma_type.ptx_type == "b1" else "", + } + + intrinsic_template = wmma_intrinsic_template + instruction_template = wmma_instruction_template + + generated_items.append(common_mma_test_gen(params, op, + intrinsic_template, instruction_template)) + + return generated_items + +def gen_mma_tests(): + mma_intrinsic_template = "llvm.nvvm.mma.${geom}.${alayout}.${blayout}${satf}.${intrinsic_signature}" + mma_instruction_template = "mma.sync${aligned}.${geom}.${alayout}.${blayout}${satf}.${ptx_signature}${mma_variant}" generated_items=[] @@ -458,28 +711,11 @@ def gen_wmma_mma_tests(): "mma_variant" : ".xor.popc" if op.a.mma_type.ptx_type == "b1" else "", } - if op.a.geom == "m8n8k4": - intrinsic_template = mma_intrinsic_template - instruction_template = mma_instruction_template - else: - intrinsic_template = wmma_intrinsic_template - instruction_template = wmma_instruction_template + intrinsic_template = mma_intrinsic_template + instruction_template = mma_instruction_template - test_params = params - test_params["intrinsic"] = Template(intrinsic_template).substitute(params) - test_params["function"] = test_params["intrinsic"].replace(".", "_") - test_params["instruction"] = Template(instruction_template).substitute(params) - test_params["ret_ty"] = make_wmma_ld_ret_ty(op.d) - test_params["check_a"] = check_pattern(op.a) - test_params["check_b"] = check_pattern(op.b) - test_params["check_c"] = check_pattern(op.c) - test_params["check_d"] = check_pattern(op.d) - args = ",\n ".join(make_wmma_slice_args(frag) - for frag in (op.a, op.b, op.c)) - test_params["args"] = args - print(Template(mma_template).substitute(test_params)) - generated_items.append((test_params["intrinsic"], - test_params["instruction"])) + generated_items.append(common_mma_test_gen(params, op, + intrinsic_template, instruction_template)) return generated_items @@ -497,6 +733,8 @@ def gen_check_unsupported_ops(items): ; NOINT-NOT: .{{s32|s8}} ; NOSUBINT-NOT: {{s4|u4|b1}} ; NOMMA-NOT: .m8n8k4. +; NOALTFLOAT-NOT: .{{bf16|tf32}} +; NODOUBLE-NOT: .f64 ; M16N16-DAG: m16n16k16.load.{{[ab].*}}.f16.p ; M16N16-DAG: m16n16k16.{{load|store}}.{{[cd].*\.(f16|f32)}}.p @@ -543,10 +781,61 @@ def gen_check_unsupported_ops(items): ; SUBINT-DAG: m8n8k32.mma.{{.*}}.s4 ; SUBINT-DAG: m8n8k128.mma.{{.*}}.b1 +; ALTFLOAT-DAG: m16n16k16.load.{{[ab].*}}.bf16.p +; ALTFLOAT-DAG: m8n32k16.load.{{[ab].*}}.bf16.p +; ALTFLOAT-DAG: m32n8k16.load.{{[ab].*}}.bf16.p +; ALTFLOAT-DAG: m16n16k8.load.{{[ab].*}}.tf32.p +; ALTFLOAT-DAG: m16n16k16.mma.{{.*}}.bf16 +; ALTFLOAT-DAG: m8n32k16.mma.{{.*}}.bf16 +; ALTFLOAT-DAG: m32n8k16.mma.{{.*}}.bf16 +; ALTFLOAT-DAG: m16n16k8.mma.{{.*}}.tf32 + +; DOUBLE-DAG: m8n8k4.load.{{[abc].*}}.f64.p +; DOUBLE-DAG: m8n8k4.store.d.{{.*}}.f64.p +; DOUBLE-DAG: m8n8k4.mma.{{.*}}.f64 + ; MMA-DAG: mma.m8n8k4.{{.*}}.f16.f32 ; MMA-DAG: mma.m8n8k4.{{.*}}.f32.f16 ; MMA-DAG: mma.m8n8k4.{{.*}}.f16.f16 ; MMA-DAG: mma.m8n8k4.{{.*}}.f32.f32 + +; PTX65MMA-DAG: mma.m16n8k8.row.col.f16.f16 +; PTX65MMA-DAG: mma.m16n8k8.row.col.f32.f32 +; PTX65MMA-DAG: mma.m8n8k16.row.col{{.*}}.u8.u8 +; PTX65MMA-DAG: mma.m8n8k16.row.col{{.*}}.s8.s8 +; PTX65MMA-DAG: mma.m8n8k16.row.col{{.*}}.s8.u8 +; PTX65MMA-DAG: mma.m8n8k16.row.col{{.*}}.u8.s8 +; PTX65MMA-DAG: mma.m8n8k32.row.col{{.*}}.u4.u4 +; PTX65MMA-DAG: mma.m8n8k32.row.col{{.*}}.s4.s4 +; PTX65MMA-DAG: mma.m8n8k32.row.col{{.*}}.s4.u4 +; PTX65MMA-DAG: mma.m8n8k32.row.col{{.*}}.u4.s4 + +; PTX70MMA-DAG: mma.m8n8k4.row.col.f64 +; PTX70MMA-DAG: mma.m16n8k4.row.col.tf32 +; PTX70MMA-DAG: mma.m16n8k8.row.col.tf32 +; PTX70MMA-DAG: mma.m16n8k16.row.col.bf16 +; PTX70MMA-DAG: mma.m16n8k8.row.col.bf16 +; PTX70MMA-DAG: mma.m16n8k16.row.col.f16.f16 +; PTX70MMA-DAG: mma.m16n8k16.row.col.f32.f32 +; PTX70MMA-DAG: mma.m16n8k16.row.col{{.*}}.u8.u8 +; PTX70MMA-DAG: mma.m16n8k16.row.col{{.*}}.s8.s8 +; PTX70MMA-DAG: mma.m16n8k16.row.col{{.*}}.s8.u8 +; PTX70MMA-DAG: mma.m16n8k16.row.col{{.*}}.u8.s8 +; PTX70MMA-DAG: mma.m16n8k32.row.col{{.*}}.u8.u8 +; PTX70MMA-DAG: mma.m16n8k32.row.col{{.*}}.s8.s8 +; PTX70MMA-DAG: mma.m16n8k32.row.col{{.*}}.s8.u8 +; PTX70MMA-DAG: mma.m16n8k32.row.col{{.*}}.u8.s8 +; PTX70MMA-DAG: mma.m16n8k32.row.col{{.*}}.u4.u4 +; PTX70MMA-DAG: mma.m16n8k32.row.col{{.*}}.s4.s4 +; PTX70MMA-DAG: mma.m16n8k32.row.col{{.*}}.s4.u4 +; PTX70MMA-DAG: mma.m16n8k32.row.col{{.*}}.u4.s4 +; PTX70MMA-DAG: mma.m16n8k64.row.col{{.*}}.u4.u4 +; PTX70MMA-DAG: mma.m16n8k64.row.col{{.*}}.s4.s4 +; PTX70MMA-DAG: mma.m16n8k64.row.col{{.*}}.s4.u4 +; PTX70MMA-DAG: mma.m16n8k64.row.col{{.*}}.u4.s4 +; PTX70MMA-DAG: mma.m8n8k128.row.col.b1 +; PTX70MMA-DAG: mma.m16n8k128.row.col.b1 +; PTX70MMA-DAG: mma.m16n8k256.row.col.b1 ; """) @@ -561,6 +850,7 @@ def gen_tests(): items = gen_wmma_load_tests() items += gen_wmma_store_tests() items += gen_wmma_mma_tests() + items += gen_mma_tests() gen_check_unsupported_ops(items) parser = argparse.ArgumentParser() From 1df981f43ae9041ed326a3d806fcbb8278211ca4 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Tue, 29 Jun 2021 18:03:23 -0500 Subject: [PATCH 248/619] Revert "Attempt to disable MLIR JIT tests on PowerPC to unbreak the bot" This reverts commit 652f4b5140e231b679564a86019307291f7bf7cc. Re-enable MLLIR JIT tests. The MLIR Bot was updated to export LD_LIBRARY_PATH=/usr/lib64, which seem to fix this issue. --- mlir/test/Unit/lit.cfg.py | 4 ---- mlir/test/Unit/lit.site.cfg.py.in | 1 - mlir/test/mlir-cpu-runner/lit.local.cfg | 4 ---- 3 files changed, 9 deletions(-) diff --git a/mlir/test/Unit/lit.cfg.py b/mlir/test/Unit/lit.cfg.py index 7cde5003bc07c..d645971074f54 100644 --- a/mlir/test/Unit/lit.cfg.py +++ b/mlir/test/Unit/lit.cfg.py @@ -37,7 +37,3 @@ for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']: if symbolizer in os.environ: config.environment[symbolizer] = os.environ[symbolizer] - -# FIXME: PPC needs to be switched to use the large code model -if 'powerpc' in config.host_triple: - config.unsupported = True diff --git a/mlir/test/Unit/lit.site.cfg.py.in b/mlir/test/Unit/lit.site.cfg.py.in index 813a8e297275d..5ad2f7dda8c25 100644 --- a/mlir/test/Unit/lit.site.cfg.py.in +++ b/mlir/test/Unit/lit.site.cfg.py.in @@ -11,7 +11,6 @@ config.shlibdir = "@SHLIBDIR@" config.mlir_src_root = "@MLIR_SOURCE_DIR@" config.mlir_obj_root = "@MLIR_BINARY_DIR@" config.mlir_tools_dir = "@MLIR_TOOLS_DIR@" -config.host_triple = "@LLVM_HOST_TRIPLE@" # Support substitution of the tools_dir and build_mode with user parameters. # This is used when we can't determine the tool dir at configuration time. diff --git a/mlir/test/mlir-cpu-runner/lit.local.cfg b/mlir/test/mlir-cpu-runner/lit.local.cfg index 2bf36f7688185..012da916f226b 100644 --- a/mlir/test/mlir-cpu-runner/lit.local.cfg +++ b/mlir/test/mlir-cpu-runner/lit.local.cfg @@ -4,10 +4,6 @@ import sys if sys.platform == 'win32': config.unsupported = True -# FIXME: PPC needs to be switched to use the large code model -if 'powerpc' in config.host_triple: - config.unsupported = True - # Requires a non-empty default triple for these tests. # Passing ` -DLLVM_DEFAULT_TARGET_TRIPLE="" ` when the # host target isn't available is how LLVM filters From 6cda73e3c44968eb6fff4b73cb6f1d0ef7d861f4 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Tue, 29 Jun 2021 16:27:24 -0700 Subject: [PATCH 249/619] [CodeGen] Add ParmVarDecls to FunctionDecls that are created to generate ObjC property getter/setter functions This is needed to prevent clang from crashing when we make the changes proposed in https://reviews.llvm.org/D98799. Differential Revision: https://reviews.llvm.org/D104883 --- clang/lib/CodeGen/CGObjC.cpp | 44 +++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 63429b1d4f653..b865780ffe93c 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -3698,12 +3698,18 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( FunctionTy, nullptr, SC_Static, false, false); FunctionArgList args; - ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy, - ImplicitParamDecl::Other); - args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy, - ImplicitParamDecl::Other); - args.push_back(&SrcDecl); + ParmVarDecl *Params[2]; + ParmVarDecl *DstDecl = ParmVarDecl::Create( + C, FD, SourceLocation(), SourceLocation(), nullptr, DestTy, + C.getTrivialTypeSourceInfo(DestTy, SourceLocation()), SC_None, + /*DefArg=*/nullptr); + args.push_back(Params[0] = DstDecl); + ParmVarDecl *SrcDecl = ParmVarDecl::Create( + C, FD, SourceLocation(), SourceLocation(), nullptr, SrcTy, + C.getTrivialTypeSourceInfo(SrcTy, SourceLocation()), SC_None, + /*DefArg=*/nullptr); + args.push_back(Params[1] = SrcDecl); + FD->setParams(Params); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); @@ -3719,12 +3725,12 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( StartFunction(FD, ReturnTy, Fn, FI, args); - DeclRefExpr DstExpr(C, &DstDecl, false, DestTy, VK_PRValue, SourceLocation()); + DeclRefExpr DstExpr(C, DstDecl, false, DestTy, VK_PRValue, SourceLocation()); UnaryOperator *DST = UnaryOperator::Create( C, &DstExpr, UO_Deref, DestTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation(), false, FPOptionsOverride()); - DeclRefExpr SrcExpr(C, &SrcDecl, false, SrcTy, VK_PRValue, SourceLocation()); + DeclRefExpr SrcExpr(C, SrcDecl, false, SrcTy, VK_PRValue, SourceLocation()); UnaryOperator *SRC = UnaryOperator::Create( C, &SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation(), false, FPOptionsOverride()); @@ -3782,12 +3788,18 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( FunctionTy, nullptr, SC_Static, false, false); FunctionArgList args; - ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy, - ImplicitParamDecl::Other); - args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy, - ImplicitParamDecl::Other); - args.push_back(&SrcDecl); + ParmVarDecl *Params[2]; + ParmVarDecl *DstDecl = ParmVarDecl::Create( + C, FD, SourceLocation(), SourceLocation(), nullptr, DestTy, + C.getTrivialTypeSourceInfo(DestTy, SourceLocation()), SC_None, + /*DefArg=*/nullptr); + args.push_back(Params[0] = DstDecl); + ParmVarDecl *SrcDecl = ParmVarDecl::Create( + C, FD, SourceLocation(), SourceLocation(), nullptr, SrcTy, + C.getTrivialTypeSourceInfo(SrcTy, SourceLocation()), SC_None, + /*DefArg=*/nullptr); + args.push_back(Params[1] = SrcDecl); + FD->setParams(Params); const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); @@ -3802,7 +3814,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( StartFunction(FD, ReturnTy, Fn, FI, args); - DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_PRValue, + DeclRefExpr SrcExpr(getContext(), SrcDecl, false, SrcTy, VK_PRValue, SourceLocation()); UnaryOperator *SRC = UnaryOperator::Create( @@ -3829,7 +3841,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( CXXConstExpr->getConstructionKind(), SourceRange()); - DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_PRValue, + DeclRefExpr DstExpr(getContext(), DstDecl, false, DestTy, VK_PRValue, SourceLocation()); RValue DV = EmitAnyExpr(&DstExpr); From 632e15e766ee625ae367b2e872f3df903e507bfb Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 29 Jun 2021 16:39:05 -0700 Subject: [PATCH 250/619] Conditionalize function only used in an assert to address -Wunused-function --- .../bugprone/EasilySwappableParametersCheck.cpp | 4 ---- llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp | 10 +++++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index 72f5b25e9f66a..8e972298adcec 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -167,10 +167,6 @@ static inline std::string formatMixFlags(MixFlags F) { return Str.str().str(); } -#else - -static inline std::string formatMixFlags(MixFlags F); - #endif // NDEBUG /// The results of the steps of an Implicit Conversion Sequence is saved in diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 75e551b665185..c29900b2c694d 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -3606,15 +3606,19 @@ LDVSSABlock *LDVSSABlockIterator::operator*() { return Updater.getSSALDVBlock(*PredIt); } -} // namespace - -namespace llvm { +#ifndef NDEBUG raw_ostream &operator<<(raw_ostream &out, const LDVSSAPhi &PHI) { out << "SSALDVPHI " << PHI.PHIValNum; return out; } +#endif + +} // namespace + +namespace llvm { + /// Template specialization to give SSAUpdater access to CFG and value /// information. SSAUpdater calls methods in these traits, passing in the /// LDVSSAUpdater object, to learn about blocks and the values they define. From 98b9fc9b93d7116cec9b661f809441488c3894cb Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 29 Jun 2021 17:09:39 -0700 Subject: [PATCH 251/619] [Test] delete LPM RUNs in inline_nossp.ll This test was modified in D104958. Invoking opt with -{passname} (vs -passes={passname}) without -enable-new-pm={0|1} is now ambiguous and dependent on how LLVM was configured. Drop the LPM runs rather than fix since there unlikely to be any users still on LPM that rely on the behavior in this test. See also: https://lists.llvm.org/pipermail/llvm-dev/2021-June/151553.html Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D105154 --- llvm/test/Transforms/Inline/inline_nossp.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/Transforms/Inline/inline_nossp.ll b/llvm/test/Transforms/Inline/inline_nossp.ll index 2a4c8c65f8929..24fdab0b9f13b 100644 --- a/llvm/test/Transforms/Inline/inline_nossp.ll +++ b/llvm/test/Transforms/Inline/inline_nossp.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -inline -o - -S %s -pass-remarks-missed=inline 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-INLINE %s ; RUN: opt -passes='cgscc(inline)' %s -S -pass-remarks-missed=inline 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-INLINE %s -; RUN: opt -always-inline -o - -S %s | FileCheck %s ; RUN: opt -passes=always-inline -o - -S %s | FileCheck %s ; CHECK-INLINE: ssp not inlined into nossp_caller because it should never be inlined (cost=never): stack protected callee but caller requested no stack protector From c8164d0276b97679e80db01adc860271ab4a5d11 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 29 Jun 2021 13:12:36 -0700 Subject: [PATCH 252/619] Create synthetic symbol names on demand to improve memory consumption and startup times. This fix was created after profiling the target creation of a large C/C++/ObjC application that contained almost 4,000,000 redacted symbol names. The symbol table parsing code was creating names for each of these synthetic symbols and adding them to the name indexes. The code was also adding the object file basename to the end of the symbol name which doesn't allow symbols from different shared libraries to share the names in the constant string pool. Prior to this fix this was creating 180MB of "___lldb_unnamed_symbol" symbol names and was taking a long time to generate each name, add them to the string pool and then add each of these names to the name index. This patch fixes the issue by: - not adding a name to synthetic symbols at creation time, and allows name to be dynamically generated when accessed - doesn't add synthetic symbol names to the name indexes, but catches this special case as name lookup time. Users won't typically set breakpoints or lookup these synthetic names, but support was added to do the lookup in case it does happen - removes the object file baseanme from the generated names to allow the names to be shared in the constant string pool Prior to this fix the startup times for a large application was: 12.5 seconds (cold file caches) 8.5 seconds (warm file caches) After this fix: 9.7 seconds (cold file caches) 5.7 seconds (warm file caches) The names of the symbols are auto generated by appending the symbol's UserID to the end of the "___lldb_unnamed_symbol" string and is only done when the name is requested from a synthetic symbol if it has no name. Differential Revision: https://reviews.llvm.org/D105160 --- lldb/include/lldb/Symbol/ObjectFile.h | 2 - lldb/include/lldb/Symbol/Symbol.h | 24 +++++-- lldb/include/lldb/Symbol/Symtab.h | 20 ++++++ .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 72 ++++++++++--------- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 6 +- lldb/source/Symbol/ObjectFile.cpp | 10 --- lldb/source/Symbol/Symbol.cpp | 40 ++++++++--- lldb/source/Symbol/Symtab.cpp | 38 ++++++++-- .../ObjectFile/ELF/eh_frame-symbols.yaml | 4 +- .../Shell/SymbolFile/Breakpad/symtab.test | 2 +- 10 files changed, 148 insertions(+), 70 deletions(-) diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index 1e29cf53b78b3..dc83565c7db52 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -712,8 +712,6 @@ class ObjectFile : public std::enable_shared_from_this, /// false otherwise. bool SetModulesArchitecture(const ArchSpec &new_arch); - ConstString GetNextSyntheticSymbolName(); - static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size, uint64_t Offset); diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h index 3abe3114863de..be3e8abefa490 100644 --- a/lldb/include/lldb/Symbol/Symbol.h +++ b/lldb/include/lldb/Symbol/Symbol.h @@ -113,14 +113,20 @@ class Symbol : public SymbolContextScope { lldb::LanguageType GetLanguage() const { // TODO: See if there is a way to determine the language for a symbol // somehow, for now just return our best guess - return m_mangled.GuessLanguage(); + return GetMangled().GuessLanguage(); } void SetID(uint32_t uid) { m_uid = uid; } - Mangled &GetMangled() { return m_mangled; } + Mangled &GetMangled() { + SynthesizeNameIfNeeded(); + return m_mangled; + } - const Mangled &GetMangled() const { return m_mangled; } + const Mangled &GetMangled() const { + SynthesizeNameIfNeeded(); + return m_mangled; + } ConstString GetReExportedSymbolName() const; @@ -166,9 +172,9 @@ class Symbol : public SymbolContextScope { bool IsTrampoline() const; bool IsIndirect() const; - + bool IsWeak() const { return m_is_weak; } - + void SetIsWeak (bool b) { m_is_weak = b; } bool GetByteSizeIsValid() const { return m_size_is_valid; } @@ -223,6 +229,10 @@ class Symbol : public SymbolContextScope { bool ContainsFileAddress(lldb::addr_t file_addr) const; + static llvm::StringRef GetSyntheticSymbolPrefix() { + return "___lldb_unnamed_symbol"; + } + protected: // This is the internal guts of ResolveReExportedSymbol, it assumes // reexport_name is not null, and that module_spec is valid. We track the @@ -233,6 +243,8 @@ class Symbol : public SymbolContextScope { lldb_private::ModuleSpec &module_spec, lldb_private::ModuleList &seen_modules) const; + void SynthesizeNameIfNeeded() const; + uint32_t m_uid = UINT32_MAX; // User ID (usually the original symbol table index) uint16_t m_type_data = 0; // data specific to m_type @@ -258,7 +270,7 @@ class Symbol : public SymbolContextScope { // doing name lookups m_is_weak : 1, m_type : 6; // Values from the lldb::SymbolType enum. - Mangled m_mangled; // uniqued symbol name/mangled name pair + mutable Mangled m_mangled; // uniqued symbol name/mangled name pair AddressRange m_addr_range; // Contains the value, or the section offset // address when the value is an address in a // section, and the size (if any) diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h index fbfa3a5e0cec7..e1ad0dfd2eb8d 100644 --- a/lldb/include/lldb/Symbol/Symtab.h +++ b/lldb/include/lldb/Symbol/Symtab.h @@ -219,6 +219,26 @@ class Symtab { return false; } + /// A helper function that looks up full function names. + /// + /// We generate unique names for synthetic symbols so that users can look + /// them up by name when needed. But because doing so is uncommon in normal + /// debugger use, we trade off some performance at lookup time for faster + /// symbol table building by detecting these symbols and generating their + /// names lazily, rather than adding them to the normal symbol indexes. This + /// function does the job of first consulting the name indexes, and if that + /// fails it extracts the information it needs from the synthetic name and + /// locates the symbol. + /// + /// @param[in] symbol_name The symbol name to search for. + /// + /// @param[out] indexes The vector if symbol indexes to update with results. + /// + /// @returns The number of indexes added to the index vector. Zero if no + /// matches were found. + uint32_t GetNameIndexes(ConstString symbol_name, + std::vector &indexes); + void SymbolIndicesToSymbolContextList(std::vector &symbol_indexes, SymbolContextList &sc_list); diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index be73d38961ea6..a5e86f0c2c1b7 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1880,7 +1880,7 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) { unified_section_list.AddSection(symtab_section_sp); } } - } + } } std::shared_ptr ObjectFileELF::GetGnuDebugDataObjectFile() { @@ -2813,31 +2813,37 @@ Symtab *ObjectFileELF::GetSymtab() { if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress( entry_point_file_addr)) { uint64_t symbol_id = m_symtab_up->GetNumSymbols(); - Symbol symbol(symbol_id, - GetNextSyntheticSymbolName().GetCString(), // Symbol name. - eSymbolTypeCode, // Type of this symbol. - true, // Is this globally visible? - false, // Is this symbol debug info? - false, // Is this symbol a trampoline? - true, // Is this symbol artificial? - entry_point_addr.GetSection(), // Section where this - // symbol is defined. - 0, // Offset in section or symbol value. - 0, // Size. - false, // Size is valid. - false, // Contains linker annotations? - 0); // Symbol flags. - m_symtab_up->AddSymbol(symbol); + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + SectionSP section_sp = entry_point_addr.GetSection(); + Symbol symbol( + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/0, + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); // When the entry point is arm thumb we need to explicitly set its // class address to reflect that. This is important because expression // evaluation relies on correctly setting a breakpoint at this // address. if (arch.GetMachine() == llvm::Triple::arm && - (entry_point_file_addr & 1)) + (entry_point_file_addr & 1)) { + symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1); m_address_class_map[entry_point_file_addr ^ 1] = AddressClass::eCodeAlternateISA; - else + } else { m_address_class_map[entry_point_file_addr] = AddressClass::eCode; + } + m_symtab_up->AddSymbol(symbol); } } @@ -2917,22 +2923,24 @@ void ObjectFileELF::ParseUnwindSymbols(Symtab *symbol_table, section_list->FindSectionContainingFileAddress(file_addr); if (section_sp) { addr_t offset = file_addr - section_sp->GetFileAddress(); - const char *symbol_name = GetNextSyntheticSymbolName().GetCString(); uint64_t symbol_id = ++last_symbol_id; + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. Symbol eh_symbol( - symbol_id, // Symbol table index. - symbol_name, // Symbol name. - eSymbolTypeCode, // Type of this symbol. - true, // Is this globally visible? - false, // Is this symbol debug info? - false, // Is this symbol a trampoline? - true, // Is this symbol artificial? - section_sp, // Section in which this symbol is defined or null. - offset, // Offset in section or symbol value. - 0, // Size: Don't specify the size as an FDE can - false, // Size is valid: cover multiple symbols. - false, // Contains linker annotations? - 0); // Symbol flags. + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/offset, + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); new_symbols.push_back(eh_symbol); } } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index e7652cffb1c81..72389e9fd5c67 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -4696,8 +4696,10 @@ size_t ObjectFileMachO::ParseSymtab() { symbol_byte_size = section_end_file_addr - symbol_file_addr; } sym[sym_idx].SetID(synthetic_sym_id++); - sym[sym_idx].GetMangled().SetDemangledName( - GetNextSyntheticSymbolName()); + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + sym[sym_idx].GetMangled().SetDemangledName(ConstString()); sym[sym_idx].SetType(eSymbolTypeCode); sym[sym_idx].SetIsSynthetic(true); sym[sym_idx].GetAddressRef() = symbol_addr; diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index b0fdd50b3c0f1..101af01341a20 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -616,16 +616,6 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name, return symbol_type_hint; } -ConstString ObjectFile::GetNextSyntheticSymbolName() { - llvm::SmallString<256> name; - llvm::raw_svector_ostream os(name); - ConstString file_name = GetModule()->GetFileSpec().GetFilename(); - ++m_synthetic_symbol_idx; - os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$" - << file_name.GetStringRef(); - return ConstString(os.str()); -} - std::vector ObjectFile::GetLoadableData(Target &target) { std::vector loadables; diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp index a25911d1734da..b24372795ad55 100644 --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -56,8 +56,8 @@ Symbol::Symbol(uint32_t symID, const Mangled &mangled, SymbolType type, m_size_is_synthesized(false), m_size_is_valid(size_is_valid || range.GetByteSize() > 0), m_demangled_is_synthesized(false), - m_contains_linker_annotations(contains_linker_annotations), - m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), + m_contains_linker_annotations(contains_linker_annotations), + m_is_weak(false), m_type(type), m_mangled(mangled), m_addr_range(range), m_flags(flags) {} Symbol::Symbol(const Symbol &rhs) @@ -119,7 +119,7 @@ bool Symbol::ValueIsAddress() const { } ConstString Symbol::GetDisplayName() const { - return m_mangled.GetDisplayDemangledName(); + return GetMangled().GetDisplayDemangledName(); } ConstString Symbol::GetReExportedSymbolName() const { @@ -202,7 +202,7 @@ void Symbol::GetDescription(Stream *s, lldb::DescriptionLevel level, s->Printf(", value = 0x%16.16" PRIx64, m_addr_range.GetBaseAddress().GetOffset()); } - ConstString demangled = m_mangled.GetDemangledName(); + ConstString demangled = GetMangled().GetDemangledName(); if (demangled) s->Printf(", name=\"%s\"", demangled.AsCString()); if (m_mangled.GetMangledName()) @@ -218,7 +218,7 @@ void Symbol::Dump(Stream *s, Target *target, uint32_t index, // Make sure the size of the symbol is up to date before dumping GetByteSize(); - ConstString name = m_mangled.GetName(name_preference); + ConstString name = GetMangled().GetName(name_preference); if (ValueIsAddress()) { if (!m_addr_range.GetBaseAddress().Dump(s, nullptr, Address::DumpStyleFileAddress)) @@ -330,9 +330,11 @@ uint32_t Symbol::GetPrologueByteSize() { } bool Symbol::Compare(ConstString name, SymbolType type) const { - if (type == eSymbolTypeAny || m_type == type) - return m_mangled.GetMangledName() == name || - m_mangled.GetDemangledName() == name; + if (type == eSymbolTypeAny || m_type == type) { + const Mangled &mangled = GetMangled(); + return mangled.GetMangledName() == name || + mangled.GetDemangledName() == name; + } return false; } @@ -495,10 +497,10 @@ lldb::addr_t Symbol::GetLoadAddress(Target *target) const { return LLDB_INVALID_ADDRESS; } -ConstString Symbol::GetName() const { return m_mangled.GetName(); } +ConstString Symbol::GetName() const { return GetMangled().GetName(); } ConstString Symbol::GetNameNoArguments() const { - return m_mangled.GetName(Mangled::ePreferDemangledWithoutArguments); + return GetMangled().GetName(Mangled::ePreferDemangledWithoutArguments); } lldb::addr_t Symbol::ResolveCallableAddress(Target &target) const { @@ -565,3 +567,21 @@ bool Symbol::GetDisassembly(const ExecutionContext &exe_ctx, const char *flavor, bool Symbol::ContainsFileAddress(lldb::addr_t file_addr) const { return m_addr_range.ContainsFileAddress(file_addr); } + +void Symbol::SynthesizeNameIfNeeded() const { + if (m_is_synthetic && !m_mangled) { + // Synthetic symbol names don't mean anything, but they do uniquely + // identify individual symbols so we give them a unique name. The name + // starts with the synthetic symbol prefix, followed by a unique number. + // Typically the UserID of a real symbol is the symbol table index of the + // symbol in the object file's symbol table(s), so it will be the same + // every time you read in the object file. We want the same persistence for + // synthetic symbols so that users can identify them across multiple debug + // sessions, to understand crashes in those symbols and to reliably set + // breakpoints on them. + llvm::SmallString<256> name; + llvm::raw_svector_ostream os(name); + os << GetSyntheticSymbolPrefix() << GetID(); + m_mangled.SetDemangledName(ConstString(os.str())); + } +} diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 14360aa69a72c..d859d8e251299 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -301,7 +301,7 @@ void Symtab::InitNameIndexes() { // the trampoline symbols to be searchable by name we can remove this and // then possibly add a new bool to any of the Symtab functions that // lookup symbols by name to indicate if they want trampolines. - if (symbol->IsTrampoline()) + if (symbol->IsTrampoline() || symbol->IsSynthetic()) continue; // If the symbol's name string matched a Mangled::ManglingScheme, it is @@ -628,6 +628,36 @@ void Symtab::SortSymbolIndexesByValue(std::vector &indexes, } } +uint32_t Symtab::GetNameIndexes(ConstString symbol_name, + std::vector &indexes) { + auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); + const uint32_t count = name_to_index.GetValues(symbol_name, indexes); + if (count) + return count; + // Synthetic symbol names are not added to the name indexes, but they start + // with a prefix and end with a the symbol UserID. This allows users to find + // these symbols without having to add them to the name indexes. These + // queries will not happen very often since the names don't mean anything, so + // performance is not paramount in this case. + llvm::StringRef name = symbol_name.GetStringRef(); + // String the synthetic prefix if the name starts with it. + if (!name.consume_front(Symbol::GetSyntheticSymbolPrefix())) + return 0; // Not a synthetic symbol name + + // Extract the user ID from the symbol name + user_id_t uid = 0; + if (getAsUnsignedInteger(name, /*Radix=*/10, uid)) + return 0; // Failed to extract the user ID as an integer + Symbol *symbol = FindSymbolByID(uid); + if (symbol == nullptr) + return 0; + const uint32_t symbol_idx = GetIndexForSymbol(symbol); + if (symbol_idx == UINT32_MAX) + return 0; + indexes.push_back(symbol_idx); + return 1; +} + uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, std::vector &indexes) { std::lock_guard guard(m_mutex); @@ -637,8 +667,7 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, if (!m_name_indexes_computed) InitNameIndexes(); - auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); - return name_to_index.GetValues(symbol_name, indexes); + return GetNameIndexes(symbol_name, indexes); } return 0; } @@ -655,10 +684,9 @@ uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name, if (!m_name_indexes_computed) InitNameIndexes(); - auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone); std::vector all_name_indexes; const size_t name_match_count = - name_to_index.GetValues(symbol_name, all_name_indexes); + GetNameIndexes(symbol_name, all_name_indexes); for (size_t i = 0; i < name_match_count; ++i) { if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type, symbol_visibility)) diff --git a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml index 6178a45de1b59..0dcc9fb76bd4f 100644 --- a/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/eh_frame-symbols.yaml @@ -3,8 +3,8 @@ # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name # CHECK: [ 0] 1 SourceFile 0x0000000000000000 0x0000000000000000 0x00000004 - -# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol1$${{.*}} -# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol2$${{.*}} +# CHECK: [ 1] 2 SX Code 0x0000000000201180 0x0000000000000010 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} +# CHECK: [ 2] 3 SX Code 0x0000000000201190 0x0000000000000006 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} --- !ELF FileHeader: diff --git a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test index 1eb03fa43deb0..788dafe248d50 100644 --- a/lldb/test/Shell/SymbolFile/Breakpad/symtab.test +++ b/lldb/test/Shell/SymbolFile/Breakpad/symtab.test @@ -5,7 +5,7 @@ # CHECK-LABEL: (lldb) image dump symtab symtab.out # CHECK: Symtab, file = {{.*}}symtab.out, num_symbols = 5: # CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name -# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}}$$symtab.out +# CHECK: [ 0] 0 SX Code 0x0000000000400000 0x00000000000000b0 0x00000000 ___lldb_unnamed_symbol{{[0-9]*}} # CHECK: [ 1] 0 X Code 0x00000000004000b0 0x000000000000000c 0x00000000 f1_func # CHECK: [ 2] 0 X Code 0x00000000004000a0 0x000000000000000d 0x00000000 func_only # CHECK: [ 3] 0 X Code 0x00000000004000c0 0x0000000000000010 0x00000000 f2 From 43f6dad2344247976d5777f56a1fc29e39c6c717 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Tue, 29 Jun 2021 18:03:25 -0700 Subject: [PATCH 253/619] Fix buildbot compile error for https://reviews.llvm.org/D105160. --- lldb/source/Symbol/Symtab.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index d859d8e251299..89e75c28cb9b6 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -645,7 +645,7 @@ uint32_t Symtab::GetNameIndexes(ConstString symbol_name, return 0; // Not a synthetic symbol name // Extract the user ID from the symbol name - user_id_t uid = 0; + unsigned long long uid = 0; if (getAsUnsignedInteger(name, /*Radix=*/10, uid)) return 0; // Failed to extract the user ID as an integer Symbol *symbol = FindSymbolByID(uid); From 814dffa4b7edb36d3b05c6b96591330e33a82204 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Jun 2021 18:47:55 -0700 Subject: [PATCH 254/619] [llvm-objcopy][MachO] Support LC_LINKER_OPTIMIZATION_HINT load command The load command is currently specific to arm64 and holds information for instruction rewriting, e.g. converting a GOT load to an ADR to compute a local address. (On ELF the information is usually conveyed by relocations, e.g. R_X86_64_REX_GOTPCRELX, R_PPC64_TOC16_HA) Reviewed By: alexander-shaposhnikov Differential Revision: https://reviews.llvm.org/D104968 --- .../MachO/lc-linker-optimization-hint.s | 25 +++++++++++++++++++ .../llvm-objcopy/MachO/MachOLayoutBuilder.cpp | 11 ++++++-- llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 9 +++++++ llvm/tools/llvm-objcopy/MachO/MachOReader.h | 1 + llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp | 25 +++++++++++++++++++ llvm/tools/llvm-objcopy/MachO/MachOWriter.h | 1 + llvm/tools/llvm-objcopy/MachO/Object.cpp | 3 +++ llvm/tools/llvm-objcopy/MachO/Object.h | 3 +++ 8 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-objcopy/MachO/lc-linker-optimization-hint.s diff --git a/llvm/test/tools/llvm-objcopy/MachO/lc-linker-optimization-hint.s b/llvm/test/tools/llvm-objcopy/MachO/lc-linker-optimization-hint.s new file mode 100644 index 0000000000000..2c2ec6f7be935 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/lc-linker-optimization-hint.s @@ -0,0 +1,25 @@ +# REQUIRES: aarch64-registered-target +## Test that we can copy LC_LINKER_OPTIMIZATION_HINT. + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: llvm-objdump --macho --link-opt-hints - < %t.o > %tloh.txt +# RUN: FileCheck --input-file=%tloh.txt %s + +# CHECK: Linker optimiztion hints (8 total bytes) +# CHECK-NEXT: identifier 7 AdrpAdd + +# RUN: llvm-objcopy %t.o %t.copy.o +# RUN: llvm-objdump --macho --link-opt-hints - < %t.copy.o | diff %tloh.txt - + +.text +.align 2 +_test: +L1: + adrp x0, _foo@PAGE +L2: + add x0, x0, _foo@PAGEOFF +.loh AdrpAdd L1, L2 + +.data +_foo: + .long 0 diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp index 8dc4259418400..6ed21806fe5ed 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp @@ -251,7 +251,10 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size(); uint64_t StartOfDataInCode = StartOfFunctionStarts + O.FunctionStarts.Data.size(); - uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size(); + uint64_t StartOfLinkerOptimizationHint = + StartOfDataInCode + O.DataInCode.Data.size(); + uint64_t StartOfSymbols = + StartOfLinkerOptimizationHint + O.LinkerOptimizationHint.Data.size(); uint64_t StartOfIndirectSymbols = StartOfSymbols + NListSize * O.SymTable.Symbols.size(); uint64_t StartOfSymbolStrings = @@ -320,6 +323,11 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint; + MLC.linkedit_data_command_data.datasize = + O.LinkerOptimizationHint.Data.size(); + break; case MachO::LC_FUNCTION_STARTS: MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); @@ -355,7 +363,6 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. case MachO::LC_ENCRYPTION_INFO: case MachO::LC_ENCRYPTION_INFO_64: - case MachO::LC_LINKER_OPTIMIZATION_HINT: case MachO::LC_LOAD_DYLINKER: case MachO::LC_MAIN: case MachO::LC_RPATH: diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp index da47e4bc61f25..7d1c29b42c2e4 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp @@ -151,6 +151,9 @@ Error MachOReader::readLoadCommands(Object &O) const { case MachO::LC_DATA_IN_CODE: O.DataInCodeCommandIndex = O.LoadCommands.size(); break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); + break; case MachO::LC_FUNCTION_STARTS: O.FunctionStartsCommandIndex = O.LoadCommands.size(); break; @@ -276,6 +279,11 @@ void MachOReader::readDataInCodeData(Object &O) const { return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); } +void MachOReader::readLinkerOptimizationHint(Object &O) const { + return readLinkData(O, O.LinkerOptimizationHintCommandIndex, + O.LinkerOptimizationHint); +} + void MachOReader::readFunctionStartsData(Object &O) const { return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); } @@ -330,6 +338,7 @@ Expected> MachOReader::create() const { readExportInfo(*Obj); readCodeSignature(*Obj); readDataInCodeData(*Obj); + readLinkerOptimizationHint(*Obj); readFunctionStartsData(*Obj); readIndirectSymbolTable(*Obj); readSwiftVersion(*Obj); diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h index b446e02865e57..ca3a0214cb6d3 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h @@ -39,6 +39,7 @@ class MachOReader : public Reader { void readLinkData(Object &O, Optional LCIndex, LinkData &LD) const; void readCodeSignature(Object &O) const; void readDataInCodeData(Object &O) const; + void readLinkerOptimizationHint(Object &O) const; void readFunctionStartsData(Object &O) const; void readIndirectSymbolTable(Object &O) const; void readSwiftVersion(Object &O) const; diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp index 24a9d28dfbd96..295098ed41183 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp @@ -107,6 +107,16 @@ size_t MachOWriter::totalSize() const { LinkEditDataCommand.datasize); } + if (O.LinkerOptimizationHintCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.LinkerOptimizationHintCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Ends.push_back(LinkEditDataCommand.dataoff + + LinkEditDataCommand.datasize); + } + if (O.FunctionStartsCommandIndex) { const MachO::linkedit_data_command &LinkEditDataCommand = O.LoadCommands[*O.FunctionStartsCommandIndex] @@ -421,6 +431,11 @@ void MachOWriter::writeDataInCodeData() { return writeLinkData(O.DataInCodeCommandIndex, O.DataInCode); } +void MachOWriter::writeLinkerOptimizationHint() { + return writeLinkData(O.LinkerOptimizationHintCommandIndex, + O.LinkerOptimizationHint); +} + void MachOWriter::writeFunctionStartsData() { return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts); } @@ -490,6 +505,16 @@ void MachOWriter::writeTail() { &MachOWriter::writeDataInCodeData); } + if (O.LinkerOptimizationHintCommandIndex) { + const MachO::linkedit_data_command &LinkEditDataCommand = + O.LoadCommands[*O.LinkerOptimizationHintCommandIndex] + .MachOLoadCommand.linkedit_data_command_data; + + if (LinkEditDataCommand.dataoff) + Queue.emplace_back(LinkEditDataCommand.dataoff, + &MachOWriter::writeLinkerOptimizationHint); + } + if (O.FunctionStartsCommandIndex) { const MachO::linkedit_data_command &LinkEditDataCommand = O.LoadCommands[*O.FunctionStartsCommandIndex] diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h index 24d36712a2a69..c8c06d644e9f7 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h +++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h @@ -48,6 +48,7 @@ class MachOWriter { void writeLinkData(Optional LCIndex, const LinkData &LD); void writeCodeSignatureData(); void writeDataInCodeData(); + void writeLinkerOptimizationHint(); void writeFunctionStartsData(); void writeTail(); diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp index c82dae6d98ada..b4f98fa84cb59 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.cpp +++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp @@ -46,6 +46,9 @@ void Object::updateLoadCommandIndexes() { case MachO::LC_DATA_IN_CODE: DataInCodeCommandIndex = Index; break; + case MachO::LC_LINKER_OPTIMIZATION_HINT: + LinkerOptimizationHintCommandIndex = Index; + break; case MachO::LC_FUNCTION_STARTS: FunctionStartsCommandIndex = Index; break; diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h index 978bd80e97b39..207502e2241b8 100644 --- a/llvm/tools/llvm-objcopy/MachO/Object.h +++ b/llvm/tools/llvm-objcopy/MachO/Object.h @@ -313,6 +313,7 @@ struct Object { ExportInfo Exports; IndirectSymbolTable IndirectSymTable; LinkData DataInCode; + LinkData LinkerOptimizationHint; LinkData FunctionStarts; LinkData CodeSignature; @@ -328,6 +329,8 @@ struct Object { Optional DySymTabCommandIndex; /// The index LC_DATA_IN_CODE load comamnd if present. Optional DataInCodeCommandIndex; + /// The index of LC_LINKER_OPTIMIZATIN_HINT load comamnd if present. + Optional LinkerOptimizationHintCommandIndex; /// The index LC_FUNCTION_STARTS load comamnd if present. Optional FunctionStartsCommandIndex; From 1d9539cf49a585e7c3cd8faa1b8e7291e0ce285c Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 30 Jun 2021 11:20:51 +0800 Subject: [PATCH 255/619] [Coroutine] Add statistics for the number of elided coroutine Now we lack a benchmark to measure the performance change for each commit. Since coro elide is the main optimization in coroutine module, I wonder it may be an estimation to count the number of elided coroutine in private code bases. e.g., for a certain commit, if we found that the number of elided goes down, we could find it before the commit check-in. Reviewed By: lxfind Differential Revision: https://reviews.llvm.org/D105095 --- llvm/lib/Transforms/Coroutines/CoroElide.cpp | 4 ++++ llvm/test/Transforms/Coroutines/coro-elide.ll | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index 9f0adae58948a..18bd56c45de19 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -9,6 +9,7 @@ #include "llvm/Transforms/Coroutines/CoroElide.h" #include "CoroInternal.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/Dominators.h" @@ -21,6 +22,8 @@ using namespace llvm; #define DEBUG_TYPE "coro-elide" +STATISTIC(NumOfCoroElided, "The # of coroutine get elided."); + namespace { // Created on demand if the coro-elide pass has work to do. struct Lowerer : coro::LowererBase { @@ -344,6 +347,7 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first, FrameSizeAndAlign.second, AA); coro::replaceCoroFree(CoroId, /*Elide=*/true); + NumOfCoroElided++; } return true; diff --git a/llvm/test/Transforms/Coroutines/coro-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide.ll index 674996b79e923..040a1a05d5c6d 100644 --- a/llvm/test/Transforms/Coroutines/coro-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide.ll @@ -1,8 +1,8 @@ ; Tests that the coro.destroy and coro.resume are devirtualized where possible, ; SCC pipeline restarts and inlines the direct calls. ; RUN: opt < %s -S \ -; RUN: -passes='cgscc(repeat<2>(inline,function(coro-elide,dce)))' \ -; RUN: | FileCheck %s +; RUN: -passes='cgscc(repeat<2>(inline,function(coro-elide,dce)))' -stats \ +; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,STATS declare void @print(i32) nounwind @@ -165,3 +165,4 @@ declare i8* @llvm.coro.begin(token, i8*) declare i8* @llvm.coro.frame() declare i8* @llvm.coro.subfn.addr(i8*, i8) declare i1 @llvm.coro.alloc(token) +; STATS: 2 coro-elide - The # of coroutine get elided. \ No newline at end of file From 801c2b9bbaad778fd4f9fb25b4ab2bd8742a5a3b Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 30 Jun 2021 11:24:44 +0800 Subject: [PATCH 256/619] [FuncSpec] Add an option to specializing literal constant Now the option is off by default. Since we are not sure if this option would make the compile time increase aggressively. Although we tested it on SPEC2017, we may need to test more to make it on by default. Reviewed By: SjoerdMeijer Differential Revision: https://reviews.llvm.org/D104365 --- .../Transforms/IPO/FunctionSpecialization.cpp | 20 ++++----- ...nction-specialization-constant-integers.ll | 44 +++++++++++++++++++ 2 files changed, 53 insertions(+), 11 deletions(-) create mode 100644 llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 8c1a78a88ec5a..f61f4312b7776 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -12,7 +12,7 @@ // // Current limitations: // - It does not handle specialization of recursive functions, -// - It does not yet handle integer constants, and integer ranges, +// - It does not yet handle integer ranges. // - Only 1 argument per function is specialised, // - The cost-model could be further looked into, // - We are not yet caching analysis results. @@ -64,6 +64,10 @@ static cl::opt cl::desc("Average loop iteration count cost"), cl::init(10)); +static cl::opt EnableSpecializationForLiteralConstant( + "function-specialization-for-literal-constant", cl::init(false), cl::Hidden, + cl::desc("Make function specialization available for literal constant.")); + // Helper to check if \p LV is either overdefined or a constant int. static bool isOverdefined(const ValueLatticeElement &LV) { return !LV.isUnknownOrUndef() && !LV.isConstant(); @@ -485,17 +489,11 @@ class FunctionSpecializer { } } - // Get the lattice value for the value the call site passes to the - // argument. If this value is not constant, move on to the next call - // site. Additionally, set the AllConstant flag to false. - if (V != A && !Solver.getLatticeValueFor(V).isConstant()) { + if (isa(V) && (Solver.getLatticeValueFor(V).isConstant() || + EnableSpecializationForLiteralConstant)) + Constants.push_back(cast(V)); + else AllConstant = false; - continue; - } - - // Add the constant to the set. - if (auto *C = dyn_cast(CS.getArgOperand(A->getArgNo()))) - Constants.push_back(C); } // If the argument can only take on constant values, AllConstant will be diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll new file mode 100644 index 0000000000000..598f73691ba74 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll @@ -0,0 +1,44 @@ +; RUN: opt -function-specialization -function-specialization-for-literal-constant=true -S < %s | FileCheck %s + +; Check that the literal constant parameter could be specialized. +; CHECK: @foo.1( +; CHECK: @foo.2( + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +declare i32 @getValue() +declare i1 @getCond() + +define internal i32 @foo(i1 %break_cond) { +entry: + br label %loop.entry + +loop.entry: + br label %loop2.entry + +loop2.entry: + br label %loop2.body + +loop2.body: + %value = call i32 @getValue() + br i1 %break_cond, label %loop2.end, label %return + +loop2.end: + %cond.end = call i1 @getCond() + br i1 %cond.end, label %loop2.entry, label %loop.end + +loop.end: + %cond2.end = call i1 @getCond() + br i1 %cond2.end, label %loop.entry, label %return + +return: + ret i32 %value +} + +define dso_local i32 @bar(i32 %x, i32 %y) { +entry: + %retval.1 = call i32 @foo(i1 1) + %retval.2 = call i32 @foo(i1 0) + %retval = add nsw i32 %retval.1, %retval.2 + ret i32 %retval +} \ No newline at end of file From 7f19aa73c265b966a5ecac4abe78b862426e28b5 Mon Sep 17 00:00:00 2001 From: Tony Tye Date: Fri, 7 May 2021 20:55:23 +0000 Subject: [PATCH 257/619] [AMDGPU] Update gfx90a memory model support Update AMDGPU gfx90a memory model to make coarse grain memory allocations consistent when fine grained system scope atomic acquire and release is performed. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D105137 --- llvm/docs/AMDGPUUsage.rst | 202 +++++++++++++----- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 74 +++++++ .../CodeGen/AMDGPU/fp64-atomics-gfx90a.ll | 16 ++ llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll | 4 + .../CodeGen/AMDGPU/memory-legalizer-fence.ll | 28 +++ .../AMDGPU/memory-legalizer-flat-system.ll | 176 +++++++++++++++ .../AMDGPU/memory-legalizer-global-system.ll | 176 +++++++++++++++ 7 files changed, 625 insertions(+), 51 deletions(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index d414b06edb10f..e63af4a5e8206 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -6093,10 +6093,10 @@ For GFX90A: ensures a previous vector memory operation has completed before executing a subsequent vector memory or LDS operation and so can be used to meet the requirements of acquire and release. - * The L2 cache of one agent can be kept coherent with other agents by using - the MTYPE CC (cache-coherent) with the PTE C-bit for memory local to the L2, - and MTYPE UC (uncached) with the PTE C-bit set for memory not local to the - L2. + * The L2 cache of one agent can be kept coherent with other agents by: + using the MTYPE RW (read-write) or MTYPE CC (cache-coherent) with the PTE + C-bit for memory local to the L2; and using the MTYPE NC (non-coherent) with + the PTE C-bit set or MTYPE UC (uncached) for memory not local to the L2. * Any local memory cache lines will be automatically invalidated by writes from CUs associated with other L2 caches, or writes from the CPU, due to @@ -6108,13 +6108,21 @@ For GFX90A: the CPU cache due to the L2 probe filter and and the PTE C-bit being set. * Since all work-groups on the same agent share the same L2, no L2 invalidation or writeback is required for coherence. - * Since local memory reads and writes of work-groups in different agents - access memory using MTYPE CC, no L2 invalidate or writeback is required - for coherence. MTYPE CC causes write through to DRAM and local reads to be - invalidated by remote writes with with the PTE C-bit. - * Since remote memory reads and writes of work-groups in different agents - access memory using MTYPE UC, no L2 invalidate or writeback is required - for coherence. MTYPE UC causes direct accesses to DRAM. + * To ensure coherence of local and remote memory writes of work-groups in + different agents a ``buffer_wbl2`` is required. It will writeback dirty L2 + cache lines of MTYPE RW (used for local coarse grain memory) and MTYPE NC + ()used for remote coarse grain memory). Note that MTYPE CC (used for local + fine grain memory) causes write through to DRAM, and MTYPE UC (used for + remote fine grain memory) bypasses the L2, so both will never result in + dirty L2 cache lines. + * To ensure coherence of local and remote memory reads of work-groups in + different agents a ``buffer_invl2`` is required. It will invalidate L2 + cache lines with MTYPE NC (used for remote coarse grain memory). Note that + MTYPE CC (used for local fine grain memory) and MTYPE RW (used for local + coarse memory) cause local reads to be invalidated by remote writes with + with the PTE C-bit so these cache lines are not invalidated. Note that + MTYPE UC (used for remote fine grain memory) bypasses the L2, so will + never result in L2 cache lines that need to be invalidated. * PCIe access from the GPU to the CPU memory is kept coherent by using the MTYPE UC (uncached) which bypasses the L2. @@ -6384,14 +6392,15 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. 2. s_waitcnt vmcnt(0) - Must happen before - following + following buffer_invl2 and buffer_wbinvl1_vol. - Ensures the load has completed before invalidating the cache. - 3. buffer_wbinvl1_vol + 3. buffer_invl2; + buffer_wbinvl1_vol - Must happen before any following @@ -6401,7 +6410,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. - Ensures that following loads will not see - stale L1 global data. + stale L1 global data, + nor see stale L2 MTYPE + NC global data. MTYPE RW and CC memory will never be stale in L2 due to the memory probes. @@ -6444,13 +6455,15 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. lgkmcnt(0). - Must happen before following + buffer_invl2 and buffer_wbinvl1_vol. - Ensures the flat_load has completed before invalidating the caches. - 3. buffer_wbinvl1_vol + 3. buffer_invl2; + buffer_wbinvl1_vol - Must happen before any following @@ -6459,8 +6472,10 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. atomic/atomicrmw. - Ensures that following - L1 loads will not see - stale global data. + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. MTYPE RW and CC memory will never be stale in L2 due to the memory probes. @@ -6579,7 +6594,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. 2. s_waitcnt vmcnt(0) - Must happen before - following + following buffer_invl2 and buffer_wbinvl1_vol. - Ensures the atomicrmw has @@ -6587,7 +6602,8 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. invalidating the caches. - 3. buffer_wbinvl1_vol + 3. buffer_invl2; + buffer_wbinvl1_vol - Must happen before any following @@ -6597,8 +6613,10 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. - Ensures that following loads will not see - stale L1 global data. - MTYPE RW and CC L2 memory + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will never be stale in L2 due to the memory probes. @@ -6641,6 +6659,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. lgkmcnt(0). - Must happen before following + buffer_invl2 and buffer_wbinvl1_vol. - Ensures the atomicrmw has @@ -6648,7 +6667,8 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. invalidating the caches. - 3. buffer_wbinvl1_vol + 3. buffer_invl2; + buffer_wbinvl1_vol - Must happen before any following @@ -6658,7 +6678,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. - Ensures that following loads will not see - stale L1 global data. + stale L1 global data, + nor see stale L2 MTYPE + NC global data. MTYPE RW and CC memory will never be stale in L2 due to the memory probes. @@ -6734,7 +6756,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. value read by the fence-paired-atomic. - 3. buffer_wbinvl1_vol + 2. buffer_wbinvl1_vol - If not TgSplit execution mode, omit. @@ -6872,7 +6894,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. termed the fence-paired-atomic). - Must happen before - the following + the following buffer_invl2 and buffer_wbinvl1_vol. - Ensures that the fence-paired atomic @@ -6887,7 +6909,8 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. the fence-paired-atomic. - 2. buffer_wbinvl1_vol + 2. buffer_invl2; + buffer_wbinvl1_vol - Must happen before any following global/generic @@ -6897,7 +6920,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. - Ensures that following loads will not see - stale L1 global data. + stale L1 global data, + nor see stale L2 MTYPE + NC global data. MTYPE RW and CC memory will never be stale in L2 due to the memory probes. @@ -6991,8 +7016,18 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. released. 2. buffer/global/flat_store - store atomic release - system - global 1. s_waitcnt lgkmcnt(0) & - - generic vmcnt(0) + store atomic release - system - global 1. buffer_wbl2 + - generic + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) - If TgSplit execution mode, omit lgkmcnt(0). @@ -7035,7 +7070,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. store that is being released. - 2. buffer/global/flat_store + 3. buffer/global/flat_store atomicrmw release - singlethread - global 1. buffer/global/flat_atomic - wavefront - generic atomicrmw release - singlethread - local *If TgSplit execution mode, @@ -7123,8 +7158,18 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. is being released. 2. buffer/global/flat_atomic - atomicrmw release - system - global 1. s_waitcnt lgkmcnt(0) & - - generic vmcnt(0) + atomicrmw release - system - global 1. buffer_wbl2 + - generic + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) - If TgSplit execution mode, omit lgkmcnt(0). @@ -7165,7 +7210,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. store that is being released. - 2. buffer/global/flat_atomic + 3. buffer/global/flat_atomic fence release - singlethread *none* *none* - wavefront fence release - workgroup *none* 1. s_waitcnt lgkm/vmcnt(0) @@ -7298,7 +7343,20 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. following fence-paired-atomic. - fence release - system *none* 1. s_waitcnt lgkmcnt(0) & + fence release - system *none* 1. buffer_wbl2 + + - If OpenCL and + address space is + local, omit. + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & vmcnt(0) - If TgSplit execution mode, @@ -7588,7 +7646,17 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. will not see stale global data. - atomicrmw acq_rel - system - global 1. s_waitcnt lgkmcnt(0) & + atomicrmw acq_rel - system - global 1. buffer_wbl2 + + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & vmcnt(0) - If TgSplit execution mode, @@ -7629,11 +7697,11 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. atomicrmw that is being released. - 2. buffer/global_atomic - 3. s_waitcnt vmcnt(0) + 3. buffer/global_atomic + 4. s_waitcnt vmcnt(0) - Must happen before - following + following buffer_invl2 and buffer_wbinvl1_vol. - Ensures the atomicrmw has @@ -7641,7 +7709,8 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. invalidating the caches. - 4. buffer_wbinvl1_vol + 5. buffer_invl2; + buffer_wbinvl1_vol - Must happen before any following @@ -7651,7 +7720,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. - Ensures that following loads will not see - stale L1 global data. + stale L1 global data, + nor see stale L2 MTYPE + NC global data. MTYPE RW and CC memory will never be stale in L2 due to the memory probes. @@ -7726,7 +7797,17 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. will not see stale global data. - atomicrmw acq_rel - system - generic 1. s_waitcnt lgkmcnt(0) & + atomicrmw acq_rel - system - generic 1. buffer_wbl2 + + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & vmcnt(0) - If TgSplit execution mode, @@ -7767,8 +7848,8 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. atomicrmw that is being released. - 2. flat_atomic - 3. s_waitcnt vmcnt(0) & + 3. flat_atomic + 4. s_waitcnt vmcnt(0) & lgkmcnt(0) - If TgSplit execution mode, @@ -7776,7 +7857,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. - If OpenCL, omit lgkmcnt(0). - Must happen before - following + following buffer_invl2 and buffer_wbinvl1_vol. - Ensures the atomicrmw has @@ -7784,7 +7865,8 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. invalidating the caches. - 4. buffer_wbinvl1_vol + 5. buffer_invl2; + buffer_wbinvl1_vol - Must happen before any following @@ -7794,7 +7876,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. - Ensures that following loads will not see - stale L1 global data. + stale L1 global data, + nor see stale L2 MTYPE + NC global data. MTYPE RW and CC memory will never be stale in L2 due to the memory probes. @@ -7902,7 +7986,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. the acquire-fence-paired-atomic. - 3. buffer_wbinvl1_vol + 2. buffer_wbinvl1_vol - If not TgSplit execution mode, omit. @@ -8007,7 +8091,20 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. requirements of acquire. - fence acq_rel - system *none* 1. s_waitcnt lgkmcnt(0) & + fence acq_rel - system *none* 1. buffer_wbl2 + + - If OpenCL and + address space is + local, omit. + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & vmcnt(0) - If TgSplit execution mode, @@ -8048,7 +8145,7 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. atomic/store atomic/atomicrmw. - Must happen before - the following + the following buffer_invl2 and buffer_wbinvl1_vol. - Ensures that the preceding @@ -8087,7 +8184,8 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. requirements of release. - 2. buffer_wbinvl1_vol + 3. buffer_invl2; + buffer_wbinvl1_vol - Must happen before any following @@ -8098,7 +8196,9 @@ in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. - Ensures that following loads will not see - stale L1 global data. + stale L1 global data, + nor see stale L2 MTYPE + NC global data. MTYPE RW and CC memory will never be stale in L2 due to the memory probes. diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 5147a516807d5..71be73c2f0e49 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -452,6 +452,12 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl { SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, Position Pos) const override; + + bool insertRelease(MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, + bool IsCrossAddrSpaceOrdering, + Position Pos) const override; }; class SIGfx10CacheControl : public SIGfx7CacheControl { @@ -1265,9 +1271,26 @@ bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI, bool Changed = false; + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + if (Pos == Position::AFTER) + ++MI; + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { switch (Scope) { case SIAtomicScope::SYSTEM: + // Ensures that following loads will not see stale remote VMEM data or + // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and + // CC will never be stale due to the local memory probes. + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INVL2)); + // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the + // hardware does not reorder memory operations by the same wave with + // respect to a preceding "BUFFER_INVL2". The invalidate is guaranteed to + // remove any cache lines of earlier writes by the same wave and ensures + // later reads by the same wave will refetch the cache lines. + Changed = true; + break; case SIAtomicScope::AGENT: // Same as GFX7. break; @@ -1297,11 +1320,62 @@ bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI, /// Other address spaces do not have a cache. + if (Pos == Position::AFTER) + --MI; + Changed |= SIGfx7CacheControl::insertAcquire(MI, Scope, AddrSpace, Pos); return Changed; } +bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, + bool IsCrossAddrSpaceOrdering, + Position Pos) const { + bool Changed = false; + + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + if (Pos == Position::AFTER) + ++MI; + + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { + switch (Scope) { + case SIAtomicScope::SYSTEM: + // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the + // hardware does not reorder memory operations by the same wave with + // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed + // to initiate writeback of any dirty cache lines of earlier writes by the + // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the + // writeback has completed. + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2)); + // Followed by same as GFX7, which will ensure the necessary "S_WAITCNT + // vmcnt(0)" needed by the "BUFFER_WBL2". + Changed = true; + break; + case SIAtomicScope::AGENT: + case SIAtomicScope::WORKGROUP: + case SIAtomicScope::WAVEFRONT: + case SIAtomicScope::SINGLETHREAD: + // Same as GFX7. + break; + default: + llvm_unreachable("Unsupported synchronization scope"); + } + } + + if (Pos == Position::AFTER) + --MI; + + Changed |= + SIGfx7CacheControl::insertRelease(MI, Scope, AddrSpace, + IsCrossAddrSpaceOrdering, Pos); + + return Changed; +} + bool SIGfx10CacheControl::enableLoadCacheBypass( const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll index 1ba4fca34efbe..785add35fc197 100644 --- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll @@ -424,9 +424,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] @@ -470,9 +472,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_mov_b32_e32 v4, 0 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] @@ -526,9 +530,11 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] @@ -571,9 +577,11 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] @@ -655,9 +663,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] @@ -702,9 +712,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) # ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] @@ -730,9 +742,11 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] @@ -775,9 +789,11 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll index a0d5cef78d608..56c76c6d4d8ef 100644 --- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll @@ -70,9 +70,11 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32(float addrspace(1)* %ptr) ; GFX90A-NEXT: v_mov_b32_e32 v1, v0 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1 +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] @@ -527,9 +529,11 @@ define amdgpu_kernel void @global_atomic_fadd_ret_f32_system(float addrspace(1)* ; GFX90A-NEXT: v_mov_b32_e32 v1, v0 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: v_add_f32_e32 v0, 4.0, v1 +; GFX90A-NEXT: buffer_wbl2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll index c1a35169d4346..8e20ea43fed9d 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll @@ -1275,13 +1275,17 @@ define amdgpu_kernel void @system_acquire_fence() { ; ; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_acquire_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm entry: @@ -1319,11 +1323,13 @@ define amdgpu_kernel void @system_release_fence() { ; ; GFX90A-NOTTGSPLIT-LABEL: system_release_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_release_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm entry: @@ -1367,13 +1373,17 @@ define amdgpu_kernel void @system_acq_rel_fence() { ; ; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm entry: @@ -1417,13 +1427,17 @@ define amdgpu_kernel void @system_seq_cst_fence() { ; ; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm entry: @@ -1467,13 +1481,17 @@ define amdgpu_kernel void @system_one_as_acquire_fence() { ; ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm entry: @@ -1511,11 +1529,13 @@ define amdgpu_kernel void @system_one_as_release_fence() { ; ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: s_endpgm entry: @@ -1559,13 +1579,17 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() { ; ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm entry: @@ -1609,13 +1633,17 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() { ; ; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence: ; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence: ; GFX90A-TGSPLIT: ; %bb.0: ; %entry +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 1abe2dc3b5c6c..fb993ed5b8d7e 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -242,6 +242,7 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v0, v[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, s3 @@ -256,6 +257,7 @@ define amdgpu_kernel void @flat_system_acquire_load( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-TGSPLIT-NEXT: flat_load_dword v0, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, s3 @@ -341,6 +343,7 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v0, v[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, s3 @@ -356,6 +359,7 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_load_dword v0, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, s3 @@ -572,6 +576,7 @@ define amdgpu_kernel void @flat_system_release_store( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -583,6 +588,7 @@ define amdgpu_kernel void @flat_system_release_store( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -652,6 +658,7 @@ define amdgpu_kernel void @flat_system_seq_cst_store( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -663,6 +670,7 @@ define amdgpu_kernel void @flat_system_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -811,6 +819,7 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -823,6 +832,7 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in) { @@ -891,6 +901,7 @@ define amdgpu_kernel void @flat_system_release_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -902,6 +913,7 @@ define amdgpu_kernel void @flat_system_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -982,9 +994,11 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -995,9 +1009,11 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in) { @@ -1077,9 +1093,11 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1090,9 +1108,11 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in) { @@ -1170,6 +1190,7 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1183,6 +1204,7 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1266,9 +1288,11 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1280,9 +1304,11 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1366,9 +1392,11 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1380,9 +1408,11 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1557,6 +1587,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1569,6 +1600,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -1650,6 +1682,7 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1661,6 +1694,7 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1754,9 +1788,11 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1767,9 +1803,11 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -1862,9 +1900,11 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1875,9 +1915,11 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -1966,6 +2008,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1978,6 +2021,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -2070,9 +2114,11 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2083,9 +2129,11 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -2178,9 +2226,11 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2191,9 +2241,11 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -2286,9 +2338,11 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2299,9 +2353,11 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -2394,9 +2450,11 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2407,9 +2465,11 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -2508,6 +2568,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2521,6 +2582,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2626,9 +2688,11 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2640,9 +2704,11 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2748,9 +2814,11 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2762,9 +2830,11 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2866,6 +2936,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2879,6 +2950,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2984,9 +3056,11 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2998,9 +3072,11 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3106,9 +3182,11 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3120,9 +3198,11 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3228,9 +3308,11 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3242,9 +3324,11 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3350,9 +3434,11 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3364,9 +3450,11 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3619,6 +3707,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v0, v[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, s3 @@ -3634,6 +3723,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s1 ; GFX90A-TGSPLIT-NEXT: flat_load_dword v0, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, s3 @@ -3723,6 +3813,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_load_dword v0, v[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v3, s3 @@ -3739,6 +3830,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_load_dword v0, v[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v3, s3 @@ -3955,6 +4047,7 @@ define amdgpu_kernel void @flat_system_one_as_release_store( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3966,6 +4059,7 @@ define amdgpu_kernel void @flat_system_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4035,6 +4129,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4046,6 +4141,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4192,6 +4288,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4204,6 +4301,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in) { @@ -4272,6 +4370,7 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4283,6 +4382,7 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4361,9 +4461,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4374,9 +4476,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in) { @@ -4454,9 +4558,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4467,9 +4573,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in) { @@ -4550,6 +4658,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4564,6 +4673,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4650,9 +4760,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4665,9 +4777,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4754,9 +4868,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -4769,9 +4885,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4944,6 +5062,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4956,6 +5075,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -5037,6 +5157,7 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -5048,6 +5169,7 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -5139,9 +5261,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5152,9 +5276,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -5245,9 +5371,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5258,9 +5386,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -5347,6 +5477,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5359,6 +5490,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -5449,9 +5581,11 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5462,9 +5596,11 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -5555,9 +5691,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5568,9 +5706,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -5661,9 +5801,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5674,9 +5816,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -5767,9 +5911,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5780,9 +5926,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v[0:1], v[2:3] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32* %out, i32 %in, i32 %old) { @@ -5885,6 +6033,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -5899,6 +6048,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6008,9 +6158,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -6023,9 +6175,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6135,9 +6289,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -6150,9 +6306,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6258,6 +6416,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -6272,6 +6431,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6381,9 +6541,11 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -6396,9 +6558,11 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6508,9 +6672,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -6523,9 +6689,11 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6635,9 +6803,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -6650,9 +6820,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6762,9 +6934,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 @@ -6777,9 +6951,11 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 ; GFX90A-TGSPLIT-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll index a8be20c343eb2..358a7bb37d00c 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -276,6 +276,7 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -287,6 +288,7 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -383,6 +385,7 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -394,6 +397,7 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -636,6 +640,7 @@ define amdgpu_kernel void @global_system_release_store( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -647,6 +652,7 @@ define amdgpu_kernel void @global_system_release_store( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -726,6 +732,7 @@ define amdgpu_kernel void @global_system_seq_cst_store( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -737,6 +744,7 @@ define amdgpu_kernel void @global_system_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -903,6 +911,7 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -915,6 +924,7 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in) { @@ -993,6 +1003,7 @@ define amdgpu_kernel void @global_system_release_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1004,6 +1015,7 @@ define amdgpu_kernel void @global_system_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1094,9 +1106,11 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1107,9 +1121,11 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in) { @@ -1199,9 +1215,11 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1212,9 +1230,11 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in) { @@ -1304,6 +1324,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1317,6 +1338,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1413,9 +1435,11 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1427,9 +1451,11 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1526,9 +1552,11 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1540,9 +1568,11 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1725,6 +1755,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1737,6 +1768,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -1823,6 +1855,7 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -1834,6 +1867,7 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -1932,9 +1966,11 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -1945,9 +1981,11 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -2045,9 +2083,11 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2058,9 +2098,11 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -2153,6 +2195,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2165,6 +2208,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -2262,9 +2306,11 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2275,9 +2321,11 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -2375,9 +2423,11 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2388,9 +2438,11 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -2488,9 +2540,11 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2501,9 +2555,11 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -2601,9 +2657,11 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -2614,9 +2672,11 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -2716,6 +2776,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2729,6 +2790,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2836,9 +2898,11 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2850,9 +2914,11 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -2960,9 +3026,11 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -2974,9 +3042,11 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3079,6 +3149,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3092,6 +3163,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3199,9 +3271,11 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3213,9 +3287,11 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3323,9 +3399,11 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3337,9 +3415,11 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3447,9 +3527,11 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3461,9 +3543,11 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3571,9 +3655,11 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3585,9 +3671,11 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3869,6 +3957,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3880,6 +3969,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -3976,6 +4066,7 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -3987,6 +4078,7 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4229,6 +4321,7 @@ define amdgpu_kernel void @global_system_one_as_release_store( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4240,6 +4333,7 @@ define amdgpu_kernel void @global_system_one_as_release_store( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4319,6 +4413,7 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4330,6 +4425,7 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4496,6 +4592,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4508,6 +4605,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in) { @@ -4586,6 +4684,7 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4597,6 +4696,7 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -4687,9 +4787,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4700,9 +4802,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in) { @@ -4792,9 +4896,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -4805,9 +4911,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in) { @@ -4897,6 +5005,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -4910,6 +5019,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -5006,9 +5116,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -5020,9 +5132,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -5119,9 +5233,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -5133,9 +5249,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v1, s2 +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[0:1] glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -5318,6 +5436,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5330,6 +5449,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -5416,6 +5536,7 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -5427,6 +5548,7 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -5525,9 +5647,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5538,9 +5662,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -5638,9 +5764,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5651,9 +5779,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -5746,6 +5876,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5758,6 +5889,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -5855,9 +5987,11 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5868,9 +6002,11 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -5968,9 +6104,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -5981,9 +6119,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -6081,9 +6221,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -6094,9 +6236,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -6194,9 +6338,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm ; @@ -6207,9 +6353,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v2, v[0:1], s[0:1] offset:16 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: s_endpgm i32 addrspace(1)* %out, i32 %in, i32 %old) { @@ -6309,6 +6457,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -6322,6 +6471,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6429,9 +6579,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -6443,9 +6595,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6553,9 +6707,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -6567,9 +6723,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6672,6 +6830,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -6685,6 +6844,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6792,9 +6952,11 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -6806,9 +6968,11 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -6916,9 +7080,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -6930,9 +7096,11 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -7040,9 +7208,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -7054,9 +7224,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm @@ -7164,9 +7336,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2 ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2 ; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-NOTTGSPLIT-NEXT: s_endpgm @@ -7178,9 +7352,11 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX90A-TGSPLIT-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-TGSPLIT-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; GFX90A-TGSPLIT-NEXT: buffer_wbl2 ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX90A-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 glc ; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0) +; GFX90A-TGSPLIT-NEXT: buffer_invl2 ; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol ; GFX90A-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] ; GFX90A-TGSPLIT-NEXT: s_endpgm From 8b8f5c54d56daa9491c97a105bc996916c39927e Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 30 Jun 2021 04:08:36 +0000 Subject: [PATCH 258/619] Fix test pass registration to use the new API / not use the deprecated one (NFC) --- mlir/test/lib/IR/TestDiagnostics.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mlir/test/lib/IR/TestDiagnostics.cpp b/mlir/test/lib/IR/TestDiagnostics.cpp index 0021e0d383d91..bb7610789c1bd 100644 --- a/mlir/test/lib/IR/TestDiagnostics.cpp +++ b/mlir/test/lib/IR/TestDiagnostics.cpp @@ -19,6 +19,10 @@ using namespace mlir; namespace { struct TestDiagnosticFilterPass : public PassWrapper> { + StringRef getArgument() const final { return "test-diagnostic-filter"; } + StringRef getDescription() const final { + return "Test diagnostic filtering support."; + } TestDiagnosticFilterPass() {} TestDiagnosticFilterPass(const TestDiagnosticFilterPass &) {} @@ -58,8 +62,7 @@ struct TestDiagnosticFilterPass namespace mlir { namespace test { void registerTestDiagnosticsPass() { - PassRegistration( - "test-diagnostic-filter", "Test diagnostic filtering support."); + PassRegistration{}; } } // namespace test } // namespace mlir From 338a3f495ea4765b05612598e732c0d45dff3edc Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Wed, 30 Jun 2021 04:39:31 +0000 Subject: [PATCH 259/619] [PowerPC][AIX] Pre-commit tracetable test for D100167. NFC. --- .../CodeGen/PowerPC/aix-tracetable-csr.ll | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll diff --git a/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll b/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll new file mode 100644 index 0000000000000..616a94f9a16b1 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-tracetable-csr.ll @@ -0,0 +1,24 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefix=AIX-64 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefix=AIX-32 %s + +%0 = type { i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i16, i16, [4 x i64] } +%1 = type { [167 x i64] } +%2 = type { [179 x i64] } +%3 = type { i64, void (i32, %3*)*, i64, i64 } + +declare i32 @wibble(%1*) local_unnamed_addr #0 + +declare hidden fastcc i32 @spam(%1*, %2*, %3*) unnamed_addr #0 + +; Function Attrs: nounwind +define void @baz(%3* %0) local_unnamed_addr #2 { +; AIX-64: std 30 +; AIX-64: .byte 0x02 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 2 +; AIX-32: stw 30 +; AIX-32: .byte 0x02 # -HasVectorInfo, -HasExtensionTable, NumOfGPRsSaved = 2 + %2 = call signext i32 @wibble(%1* nonnull undef) #2 + %3 = call fastcc zeroext i32 @spam(%1* nonnull undef, %2* nonnull undef, %3* nonnull %0) + unreachable +} From 071d26f8082391612f3a3f71b1135cbdceb0a30a Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Wed, 30 Jun 2021 09:50:33 +0530 Subject: [PATCH 260/619] [MLIR] Fix generateCopyForMemRefRegion Fix generateCopyForMemRefRegion for a missing check: in some cases, when the thing to generate copies for itself is empty, no fast buffer/copy loops would have been allocated/generated. Add an extra assertion there while at this. Differential Revision: https://reviews.llvm.org/D105170 --- mlir/lib/Transforms/Utils/LoopUtils.cpp | 8 ++++++-- mlir/test/Dialect/Affine/affine-data-copy.mlir | 13 +++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index 5e71e706ff7f3..ac3f87ea25f41 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -2912,8 +2912,12 @@ LogicalResult mlir::generateCopyForMemRegion( if (failed(err)) return err; - result.alloc = - fastBufferMap.find(memrefRegion.memref)->second.getDefiningOp(); + const auto &en = fastBufferMap.find(memrefRegion.memref); + // In some cases (empty loops), no copy generation would have happened. + if (en == fastBufferMap.end()) + return failure(); + result.alloc = en->second.getDefiningOp(); + assert(result.alloc && "fast buffer expected to be locally allocated"); assert(copyNests.size() <= 1 && "At most one copy nest is expected."); result.copyNest = copyNests.empty() ? nullptr : *copyNests.begin(); return success(); diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir index 11288784a4e91..243f9d0b65319 100644 --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -270,3 +270,16 @@ func @max_lower_bound(%M: memref<2048x516xf64>, %i : index, %j : index) { // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: memref.dealloc %[[BUF]] : memref<2048x6xf64> + +// ----- + +// CHECK-LABEL: func @empty_loop +func @empty_loop(%arg0: memref<1024x1024xf64>) { + // Empty loop - so no copy generation happens. + affine.for %i = 0 to 0 { + affine.load %arg0[0, %i] : memref<1024x1024xf64> + } + return + // CHECK-NOT: memref.alloc + // CHECK: return +} From 1f169a774cb865659cefe085e70a56a884e3711e Mon Sep 17 00:00:00 2001 From: Kai Luo Date: Wed, 30 Jun 2021 05:36:26 +0000 Subject: [PATCH 261/619] [PowerPC][AIX] Re-generate test aix-framepointer-save-restore.ll. NFC. --- .../PowerPC/aix-framepointer-save-restore.ll | 126 ++++++++++++++++-- 1 file changed, 116 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll index 4b3759f3e931e..e92789c51300d 100644 --- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll +++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs < %s -mcpu=pwr4 -mattr=-altivec \ ; RUN: -mtriple=powerpc-ibm-aix-xcoff | \ ; RUN: FileCheck %s -check-prefix=AIX32 @@ -9,19 +10,124 @@ declare void @clobber(i32*) define dso_local float @frameptr_only(i32 %n, float %f) { +; AIX32-LABEL: frameptr_only: +; AIX32: # %bb.0: # %entry +; AIX32-NEXT: mflr 0 +; AIX32-NEXT: stw 31, -12(1) +; AIX32-NEXT: stw 0, 8(1) +; AIX32-NEXT: stwu 1, -80(1) +; AIX32-NEXT: slwi 3, 3, 2 +; AIX32-NEXT: mr 31, 1 +; AIX32-NEXT: stfd 31, 72(31) # 8-byte Folded Spill +; AIX32-NEXT: fmr 31, 1 +; AIX32-NEXT: addi 3, 3, 15 +; AIX32-NEXT: addi 4, 31, 80 +; AIX32-NEXT: rlwinm 3, 3, 0, 0, 27 +; AIX32-NEXT: neg 3, 3 +; AIX32-NEXT: stwux 4, 1, 3 +; AIX32-NEXT: addi 3, 1, 64 +; AIX32-NEXT: bl .clobber[PR] +; AIX32-NEXT: nop +; AIX32-NEXT: fmr 1, 31 +; AIX32-NEXT: lfd 31, 72(31) # 8-byte Folded Reload +; AIX32-NEXT: lwz 1, 0(1) +; AIX32-NEXT: lwz 0, 8(1) +; AIX32-NEXT: mtlr 0 +; AIX32-NEXT: lwz 31, -12(1) +; AIX32-NEXT: blr +; +; AIX64-LABEL: frameptr_only: +; AIX64: # %bb.0: # %entry +; AIX64-NEXT: mflr 0 +; AIX64-NEXT: std 31, -16(1) +; AIX64-NEXT: std 0, 16(1) +; AIX64-NEXT: stdu 1, -144(1) +; AIX64-NEXT: rldic 3, 3, 2, 30 +; AIX64-NEXT: mr 31, 1 +; AIX64-NEXT: stfd 31, 136(31) # 8-byte Folded Spill +; AIX64-NEXT: fmr 31, 1 +; AIX64-NEXT: addi 3, 3, 15 +; AIX64-NEXT: addi 4, 31, 144 +; AIX64-NEXT: rldicl 3, 3, 60, 4 +; AIX64-NEXT: rldicl 3, 3, 4, 29 +; AIX64-NEXT: neg 3, 3 +; AIX64-NEXT: stdux 4, 1, 3 +; AIX64-NEXT: addi 3, 1, 112 +; AIX64-NEXT: bl .clobber[PR] +; AIX64-NEXT: nop +; AIX64-NEXT: fmr 1, 31 +; AIX64-NEXT: lfd 31, 136(31) # 8-byte Folded Reload +; AIX64-NEXT: ld 1, 0(1) +; AIX64-NEXT: ld 0, 16(1) +; AIX64-NEXT: mtlr 0 +; AIX64-NEXT: ld 31, -16(1) +; AIX64-NEXT: blr entry: %0 = alloca i32, i32 %n call void @clobber(i32* %0) ret float %f } -; AIX32: stw 31, -12(1) -; AIX32: stwu 1, -80(1) -; AIX32: lwz 1, 0(1) -; AIX32: lwz 31, -12(1) - -; AIX64: std 31, -16(1) -; AIX64: stdu 1, -144(1) -; AIX64: ld 1, 0(1) -; AIX64: ld 31, -16(1) - +define dso_local void @frameptr_realigned(i32 %n) { +; AIX32-LABEL: frameptr_realigned: +; AIX32: # %bb.0: +; AIX32-NEXT: mflr 0 +; AIX32-NEXT: stw 31, -4(1) +; AIX32-NEXT: stw 30, -8(1) +; AIX32-NEXT: mr 30, 1 +; AIX32-NEXT: stw 0, 8(1) +; AIX32-NEXT: clrlwi 0, 1, 26 +; AIX32-NEXT: subfic 0, 0, -192 +; AIX32-NEXT: stwux 1, 1, 0 +; AIX32-NEXT: slwi 3, 3, 2 +; AIX32-NEXT: lwz 4, 0(1) +; AIX32-NEXT: li 5, -64 +; AIX32-NEXT: addi 3, 3, 15 +; AIX32-NEXT: mr 31, 1 +; AIX32-NEXT: rlwinm 3, 3, 0, 0, 27 +; AIX32-NEXT: neg 3, 3 +; AIX32-NEXT: and 5, 3, 5 +; AIX32-NEXT: stwux 4, 1, 5 +; AIX32-NEXT: addi 3, 1, 64 +; AIX32-NEXT: bl .clobber[PR] +; AIX32-NEXT: nop +; AIX32-NEXT: mr 1, 30 +; AIX32-NEXT: lwz 0, 8(1) +; AIX32-NEXT: mtlr 0 +; AIX32-NEXT: lwz 31, -4(1) +; AIX32-NEXT: lwz 30, -8(1) +; AIX32-NEXT: blr +; +; AIX64-LABEL: frameptr_realigned: +; AIX64: # %bb.0: +; AIX64-NEXT: mflr 0 +; AIX64-NEXT: std 31, -8(1) +; AIX64-NEXT: std 30, -16(1) +; AIX64-NEXT: mr 30, 1 +; AIX64-NEXT: std 0, 16(1) +; AIX64-NEXT: clrldi 0, 1, 58 +; AIX64-NEXT: subfic 0, 0, -256 +; AIX64-NEXT: stdux 1, 1, 0 +; AIX64-NEXT: rldic 3, 3, 2, 30 +; AIX64-NEXT: ld 4, 0(1) +; AIX64-NEXT: li 5, -64 +; AIX64-NEXT: addi 3, 3, 15 +; AIX64-NEXT: mr 31, 1 +; AIX64-NEXT: rldicl 3, 3, 60, 4 +; AIX64-NEXT: rldicl 3, 3, 4, 29 +; AIX64-NEXT: neg 3, 3 +; AIX64-NEXT: and 5, 3, 5 +; AIX64-NEXT: stdux 4, 1, 5 +; AIX64-NEXT: addi 3, 1, 128 +; AIX64-NEXT: bl .clobber[PR] +; AIX64-NEXT: nop +; AIX64-NEXT: mr 1, 30 +; AIX64-NEXT: ld 0, 16(1) +; AIX64-NEXT: mtlr 0 +; AIX64-NEXT: ld 31, -8(1) +; AIX64-NEXT: ld 30, -16(1) +; AIX64-NEXT: blr + %ptr = alloca i32, i32 %n, align 64 + call void @clobber(i32* %ptr) + ret void +} From fceaf8621179aa758c44f3eaee02d789abfd455b Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin Date: Tue, 29 Jun 2021 05:57:14 -0700 Subject: [PATCH 262/619] [clang] Fix UB when string.front() is used for the empty string Compilation database might have empty string as a command line argument. But ExpandResponseFilesDatabase::expand doesn't expect this and assumes that string.front() can be used for any argument. It is undefined behaviour if string is empty. With debug build mode it causes crash in clangd. Test Plan: check-clang Differential Revision: https://reviews.llvm.org/D105120 --- .../ExpandResponseFilesCompilationDatabase.cpp | 3 ++- clang/unittests/Tooling/CompilationDatabaseTest.cpp | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp b/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp index a825370afcf56..29787b8a88942 100644 --- a/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp +++ b/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp @@ -54,7 +54,8 @@ class ExpandResponseFilesDatabase : public CompilationDatabase { Argv.reserve(Cmd.CommandLine.size()); for (auto &Arg : Cmd.CommandLine) { Argv.push_back(Arg.c_str()); - SeenRSPFile |= Arg.front() == '@'; + if (!Arg.empty()) + SeenRSPFile |= Arg.front() == '@'; } if (!SeenRSPFile) continue; diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp index 9a04de32c852d..218a352f86f06 100644 --- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp +++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp @@ -700,6 +700,10 @@ class MemDBTest : public ::testing::Test { SmallVector Argv = {Clang, File, "-D", File}; llvm::SplitString(Flags, Argv); + // Trim double quotation from the argumnets if any. + for (auto *It = Argv.begin(); It != Argv.end(); ++It) + *It = It->trim("\""); + SmallString<32> Dir; llvm::sys::path::system_temp_directory(false, Dir); @@ -962,5 +966,12 @@ TEST_F(ExpandResponseFilesTest, ExpandResponseFiles) { EXPECT_EQ(getCommand("bar.cpp"), "clang bar.cpp -D bar.cpp -Dflag"); } +TEST_F(ExpandResponseFilesTest, ExpandResponseFilesEmptyArgument) { + addFile(path(StringRef("rsp1.rsp")), "-Dflag"); + + add("foo.cpp", "clang", "@rsp1.rsp \"\""); + EXPECT_EQ(getCommand("foo.cpp"), "clang foo.cpp -D foo.cpp -Dflag "); +} + } // end namespace tooling } // end namespace clang From b062fff87adcfa2e252cbce43d92b61b76614bd5 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 29 Jun 2021 11:33:14 +0100 Subject: [PATCH 263/619] Recommit "[AArch64] Custom lower <4 x i8> loads" This recommits D104782 including a fix for adding a wrong operand to the new load node. Differential Revision: https://reviews.llvm.org/D105110 --- .../Target/AArch64/AArch64ISelLowering.cpp | 43 +++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 + llvm/test/CodeGen/AArch64/aarch64-load-ext.ll | 223 ++++++++++++++++-- llvm/test/CodeGen/AArch64/arm64-vshift.ll | 33 +-- llvm/test/CodeGen/AArch64/neon-extload.ll | 145 ------------ llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 20 +- llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 20 +- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 20 +- llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 20 +- 9 files changed, 280 insertions(+), 245 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/neon-extload.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b3edefe550f81..5b0e7d743b088 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1131,6 +1131,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VSCALE, MVT::i32, Custom); setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); + + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); } if (Subtarget->hasSVE()) { @@ -4477,6 +4484,40 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, return SDValue(); } +// Custom lowering for extending v4i8 vector loads. +SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + LoadSDNode *LoadNode = cast(Op); + assert(LoadNode && "Expected custom lowering of a load node"); + EVT VT = Op->getValueType(0); + assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32"); + + if (LoadNode->getMemoryVT() != MVT::v4i8) + return SDValue(); + + unsigned ExtType; + if (LoadNode->getExtensionType() == ISD::SEXTLOAD) + ExtType = ISD::SIGN_EXTEND; + else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD || + LoadNode->getExtensionType() == ISD::EXTLOAD) + ExtType = ISD::ZERO_EXTEND; + else + return SDValue(); + + SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(), + LoadNode->getBasePtr(), MachinePointerInfo()); + SDValue Chain = Load.getValue(1); + SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load); + SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec); + SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC); + Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext, + DAG.getConstant(0, DL, MVT::i64)); + if (VT == MVT::v4i32) + Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext); + return DAG.getMergeValues({Ext, Chain}, DL); +} + // Generate SUBS and CSEL for integer abs. SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); @@ -4720,7 +4761,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::LOAD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) return LowerFixedLengthVectorLoadToSVE(Op, DAG); - llvm_unreachable("Unexpected request to lower ISD::LOAD"); + return LowerLOAD(Op, DAG); case ISD::ADD: return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); case ISD::AND: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index f3b2da8304303..7daa61996739f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -851,6 +851,7 @@ class AArch64TargetLowering : public TargetLowering { SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll index 308352e3e2277..1bbab3879dc35 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -86,27 +86,222 @@ define <2 x i8> @test3(<2 x i8>* %v2i8_ptr) { define <4 x i8> @test4(<4 x i8>* %v4i8_ptr) { ; CHECK-LE-LABEL: test4: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-LE-NEXT: add x8, x0, #1 // =1 -; CHECK-LE-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-LE-NEXT: add x8, x0, #2 // =2 -; CHECK-LE-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-LE-NEXT: add x8, x0, #3 // =3 -; CHECK-LE-NEXT: ld1 { v0.b }[6], [x8] +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: test4: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-BE-NEXT: add x8, x0, #1 // =1 -; CHECK-BE-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-BE-NEXT: add x8, x0, #2 // =2 -; CHECK-BE-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-BE-NEXT: add x8, x0, #3 // =3 -; CHECK-BE-NEXT: ld1 { v0.b }[6], [x8] +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ret %v4i8 = load <4 x i8>, <4 x i8>* %v4i8_ptr ret <4 x i8> %v4i8 } + +define <4 x i32> @fsext_v4i32(<4 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v4i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v4i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = sext <4 x i8> %x to <4 x i32> + ret <4 x i32> %y +} + +define <4 x i32> @fzext_v4i32(<4 x i8>* %a) { +; CHECK-LE-LABEL: fzext_v4i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fzext_v4i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = zext <4 x i8> %x to <4 x i32> + ret <4 x i32> %y +} + +; TODO: This codegen could just be: +; ldrb w0, [x0] +; +define i32 @loadExti32(<4 x i8>* %ref) { +; CHECK-LE-LABEL: loadExti32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: umov w8, v0.h[0] +; CHECK-LE-NEXT: and w0, w8, #0xff +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: loadExti32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: umov w8, v0.h[0] +; CHECK-BE-NEXT: and w0, w8, #0xff +; CHECK-BE-NEXT: ret + %a = load <4 x i8>, <4 x i8>* %ref + %vecext = extractelement <4 x i8> %a, i32 0 + %conv = zext i8 %vecext to i32 + ret i32 %conv +} + +define <4 x i16> @fsext_v4i16(<4 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v4i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v4i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: rev64 v0.4h, v0.4h +; CHECK-BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = sext <4 x i8> %x to <4 x i16> + ret <4 x i16> %y +} + +define <4 x i16> @fzext_v4i16(<4 x i8>* %a) { +; CHECK-LE-LABEL: fzext_v4i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fzext_v4i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: rev64 v0.4h, v0.4h +; CHECK-BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a + %y = zext <4 x i8> %x to <4 x i16> + ret <4 x i16> %y +} + +define <4 x i16> @anyext_v4i16(<4 x i8> *%a, <4 x i8> *%b) { +; CHECK-LE-LABEL: anyext_v4i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ldr s1, [x1] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: anyext_v4i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: ldr s1, [x1] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: rev32 v1.8b, v1.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-BE-NEXT: rev64 v0.4h, v0.4h +; CHECK-BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a, align 4 + %y = load <4 x i8>, <4 x i8>* %b, align 4 + %z = add <4 x i8> %x, %y + %s = sext <4 x i8> %z to <4 x i16> + ret <4 x i16> %s +} + +define <4 x i32> @anyext_v4i32(<4 x i8> *%a, <4 x i8> *%b) { +; CHECK-LE-LABEL: anyext_v4i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: ldr s1, [x1] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-LE-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: anyext_v4i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: ldr s1, [x1] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: rev32 v1.8b, v1.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <4 x i8>, <4 x i8>* %a, align 4 + %y = load <4 x i8>, <4 x i8>* %b, align 4 + %z = add <4 x i8> %x, %y + %s = sext <4 x i8> %z to <4 x i32> + ret <4 x i32> %s +} + +define <4 x i8> @bitcast(i32 %0) { +; CHECK-LE-LABEL: bitcast: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: sub sp, sp, #16 // =16 +; CHECK-LE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-LE-NEXT: str w0, [sp, #12] +; CHECK-LE-NEXT: ldr s0, [sp, #12] +; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: add sp, sp, #16 // =16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: bitcast: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: sub sp, sp, #16 // =16 +; CHECK-BE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-BE-NEXT: str w0, [sp, #12] +; CHECK-BE-NEXT: ldr s0, [sp, #12] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: rev64 v0.4h, v0.4h +; CHECK-BE-NEXT: add sp, sp, #16 // =16 +; CHECK-BE-NEXT: ret + %2 = bitcast i32 %0 to <4 x i8> + ret <4 x i8> %2 +} diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index c63f3399e636f..07b257043426d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -1494,17 +1494,12 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(<8 x i8>* %A) nounwind { } define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(<4 x i8>* %A) nounwind { -;CHECK-LABEL: @neon.ushl8h_constant_shift_extend_not_2x -;CHECK-NOT: ushll.8h v0, -;CHECK: ldrb w8, [x0] -;CHECK: fmov s0, w8 -;CHECK: ldrb w8, [x0, #1] -;CHECK: mov.s v0[1], w8 -;CHECK: ldrb w8, [x0, #2] -;CHECK: mov.s v0[2], w8 -;CHECK: ldrb w8, [x0, #3] -;CHECK: mov.s v0[3], w8 -;CHECK: shl.4s v0, v0, #1 +; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #1 +; CHECK-NEXT: ret %tmp1 = load <4 x i8>, <4 x i8>* %A %tmp2 = zext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) @@ -1637,16 +1632,12 @@ define <8 x i16> @neon.sshll8h_constant_shift(<8 x i8>* %A) nounwind { } define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(<4 x i8>* %A) nounwind { -;CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift -;CHECK: ldrsb w8, [x0] -;CHECK-NEXT: fmov s0, w8 -;CHECK-NEXT: ldrsb w8, [x0, #1] -;CHECK-NEXT: mov.s v0[1], w8 -;CHECK-NEXT: ldrsb w8, [x0, #2] -;CHECK-NEXT: mov.s v0[2], w8 -;CHECK-NEXT: ldrsb w8, [x0, #3] -;CHECK-NEXT: mov.s v0[3], w8 -;CHECK-NEXT: shl.4s v0, v0, #1 +; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: sshll.8h v0, v0, #0 +; CHECK-NEXT: sshll.4s v0, v0, #1 +; CHECK-NEXT: ret %tmp1 = load <4 x i8>, <4 x i8>* %A %tmp2 = sext <4 x i8> %tmp1 to <4 x i32> %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) diff --git a/llvm/test/CodeGen/AArch64/neon-extload.ll b/llvm/test/CodeGen/AArch64/neon-extload.ll deleted file mode 100644 index 321a1babb411d..0000000000000 --- a/llvm/test/CodeGen/AArch64/neon-extload.ll +++ /dev/null @@ -1,145 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=LE -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=BE - -define <4 x i32> @fsext_v4i32(<4 x i8>* %a) { -; LE-LABEL: fsext_v4i32: -; LE: // %bb.0: -; LE-NEXT: ldrsb w8, [x0] -; LE-NEXT: ldrsb w9, [x0, #1] -; LE-NEXT: ldrsb w10, [x0, #2] -; LE-NEXT: ldrsb w11, [x0, #3] -; LE-NEXT: fmov s0, w8 -; LE-NEXT: mov v0.s[1], w9 -; LE-NEXT: mov v0.s[2], w10 -; LE-NEXT: mov v0.s[3], w11 -; LE-NEXT: ret -; -; BE-LABEL: fsext_v4i32: -; BE: // %bb.0: -; BE-NEXT: ldrsb w8, [x0] -; BE-NEXT: ldrsb w9, [x0, #1] -; BE-NEXT: ldrsb w10, [x0, #2] -; BE-NEXT: ldrsb w11, [x0, #3] -; BE-NEXT: fmov s0, w8 -; BE-NEXT: mov v0.s[1], w9 -; BE-NEXT: mov v0.s[2], w10 -; BE-NEXT: mov v0.s[3], w11 -; BE-NEXT: rev64 v0.4s, v0.4s -; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = sext <4 x i8> %x to <4 x i32> - ret <4 x i32> %y -} - -define <4 x i32> @fzext_v4i32(<4 x i8>* %a) { -; LE-LABEL: fzext_v4i32: -; LE: // %bb.0: -; LE-NEXT: ldrb w8, [x0] -; LE-NEXT: ldrb w9, [x0, #1] -; LE-NEXT: ldrb w10, [x0, #2] -; LE-NEXT: ldrb w11, [x0, #3] -; LE-NEXT: fmov s0, w8 -; LE-NEXT: mov v0.s[1], w9 -; LE-NEXT: mov v0.s[2], w10 -; LE-NEXT: mov v0.s[3], w11 -; LE-NEXT: ret -; -; BE-LABEL: fzext_v4i32: -; BE: // %bb.0: -; BE-NEXT: ldrb w8, [x0] -; BE-NEXT: ldrb w9, [x0, #1] -; BE-NEXT: ldrb w10, [x0, #2] -; BE-NEXT: ldrb w11, [x0, #3] -; BE-NEXT: fmov s0, w8 -; BE-NEXT: mov v0.s[1], w9 -; BE-NEXT: mov v0.s[2], w10 -; BE-NEXT: mov v0.s[3], w11 -; BE-NEXT: rev64 v0.4s, v0.4s -; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = zext <4 x i8> %x to <4 x i32> - ret <4 x i32> %y -} - -define i32 @loadExt.i32(<4 x i8>* %ref) { -; CHECK-LABEL: loadExt.i32: -; CHECK: ldrb -; LE-LABEL: loadExt.i32: -; LE: // %bb.0: -; LE-NEXT: ldrb w0, [x0] -; LE-NEXT: ret -; -; BE-LABEL: loadExt.i32: -; BE: // %bb.0: -; BE-NEXT: ldrb w0, [x0] -; BE-NEXT: ret - %a = load <4 x i8>, <4 x i8>* %ref - %vecext = extractelement <4 x i8> %a, i32 0 - %conv = zext i8 %vecext to i32 - ret i32 %conv -} - -define <4 x i16> @fsext_v4i16(<4 x i8>* %a) { -; LE-LABEL: fsext_v4i16: -; LE: // %bb.0: -; LE-NEXT: ldrsb w8, [x0] -; LE-NEXT: ldrsb w9, [x0, #1] -; LE-NEXT: ldrsb w10, [x0, #2] -; LE-NEXT: ldrsb w11, [x0, #3] -; LE-NEXT: fmov s0, w8 -; LE-NEXT: mov v0.h[1], w9 -; LE-NEXT: mov v0.h[2], w10 -; LE-NEXT: mov v0.h[3], w11 -; LE-NEXT: // kill: def $d0 killed $d0 killed $q0 -; LE-NEXT: ret -; -; BE-LABEL: fsext_v4i16: -; BE: // %bb.0: -; BE-NEXT: ldrsb w8, [x0] -; BE-NEXT: ldrsb w9, [x0, #1] -; BE-NEXT: ldrsb w10, [x0, #2] -; BE-NEXT: ldrsb w11, [x0, #3] -; BE-NEXT: fmov s0, w8 -; BE-NEXT: mov v0.h[1], w9 -; BE-NEXT: mov v0.h[2], w10 -; BE-NEXT: mov v0.h[3], w11 -; BE-NEXT: rev64 v0.4h, v0.4h -; BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = sext <4 x i8> %x to <4 x i16> - ret <4 x i16> %y -} - -define <4 x i16> @fzext_v4i16(<4 x i8>* %a) { -; LE-LABEL: fzext_v4i16: -; LE: // %bb.0: -; LE-NEXT: ldrb w8, [x0] -; LE-NEXT: ldrb w9, [x0, #1] -; LE-NEXT: ldrb w10, [x0, #2] -; LE-NEXT: ldrb w11, [x0, #3] -; LE-NEXT: fmov s0, w8 -; LE-NEXT: mov v0.h[1], w9 -; LE-NEXT: mov v0.h[2], w10 -; LE-NEXT: mov v0.h[3], w11 -; LE-NEXT: // kill: def $d0 killed $d0 killed $q0 -; LE-NEXT: ret -; -; BE-LABEL: fzext_v4i16: -; BE: // %bb.0: -; BE-NEXT: ldrb w8, [x0] -; BE-NEXT: ldrb w9, [x0, #1] -; BE-NEXT: ldrb w10, [x0, #2] -; BE-NEXT: ldrb w11, [x0, #3] -; BE-NEXT: fmov s0, w8 -; BE-NEXT: mov v0.h[1], w9 -; BE-NEXT: mov v0.h[2], w10 -; BE-NEXT: mov v0.h[3], w11 -; BE-NEXT: rev64 v0.4h, v0.4h -; BE-NEXT: ret - %x = load <4 x i8>, <4 x i8>* %a - %y = zext <4 x i8> %x to <4 x i16> - ret <4 x i16> %y -} diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index cefd4758b3747..9c654f6719b18 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -112,22 +112,10 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w8, [x0] -; CHECK-NEXT: ldrsb w9, [x1] -; CHECK-NEXT: ldrsb w10, [x0, #1] -; CHECK-NEXT: ldrsb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrsb w8, [x0, #2] -; CHECK-NEXT: ldrsb w9, [x1, #2] -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: ldrsb w10, [x0, #3] -; CHECK-NEXT: ldrsb w11, [x1, #3] -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: mov v1.h[2], w9 -; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: mov v1.h[3], w11 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 ; CHECK-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 17af8a11aeee5..7c2e2330608e8 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -113,22 +113,10 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w8, [x0] -; CHECK-NEXT: ldrsb w9, [x1] -; CHECK-NEXT: ldrsb w10, [x0, #1] -; CHECK-NEXT: ldrsb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrsb w8, [x0, #2] -; CHECK-NEXT: ldrsb w9, [x1, #2] -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: ldrsb w10, [x0, #3] -; CHECK-NEXT: ldrsb w11, [x1, #3] -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: mov v1.h[2], w9 -; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: mov v1.h[3], w11 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 ; CHECK-NEXT: shl v1.4h, v1.4h, #8 ; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 21427a6a92d7e..2b52e4c934c9d 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -112,23 +112,11 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: ldrb w12, [x0, #2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x1, #2] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: ldrb w9, [x0, #3] -; CHECK-NEXT: ldrb w10, [x1, #3] -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: mov v0.h[2], w12 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: mov v0.h[3], w9 -; CHECK-NEXT: mov v1.h[3], w10 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: movi d2, #0xff00ff00ff00ff +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h ; CHECK-NEXT: xtn v0.8b, v0.8h diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index a0ab8040e8fc0..63bbac3be3fb8 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -113,22 +113,10 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: ldrb w9, [x1, #2] -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: ldrb w10, [x0, #3] -; CHECK-NEXT: ldrb w11, [x1, #3] -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: mov v1.h[2], w9 -; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: mov v1.h[3], w11 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] From db2de8d7f1eb37f5a7c1d2de61cdd9d79647ea78 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 29 Jun 2021 16:47:58 +0200 Subject: [PATCH 264/619] [mlir][llvm] Add a test for memref.copy lowering to llvm This was missing and also there was a bug in the lowering itself, which went unnoticed due to it. Differential Revision: https://reviews.llvm.org/D105122 --- .../Dialect/LLVMIR/IR/FunctionCallUtils.cpp | 2 +- mlir/test/mlir-cpu-runner/copy.mlir | 49 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 mlir/test/mlir-cpu-runner/copy.mlir diff --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp index 47a5851b51f2e..a4c8b741a3884 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp @@ -35,7 +35,7 @@ static constexpr llvm::StringRef kPrintNewline = "printNewline"; static constexpr llvm::StringRef kMalloc = "malloc"; static constexpr llvm::StringRef kAlignedAlloc = "aligned_alloc"; static constexpr llvm::StringRef kFree = "free"; -static constexpr llvm::StringRef kMemRefCopy = "memref_copy"; +static constexpr llvm::StringRef kMemRefCopy = "memrefCopy"; /// Generic print function lookupOrCreate helper. LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(ModuleOp moduleOp, StringRef name, diff --git a/mlir/test/mlir-cpu-runner/copy.mlir b/mlir/test/mlir-cpu-runner/copy.mlir new file mode 100644 index 0000000000000..508e7b264ae1c --- /dev/null +++ b/mlir/test/mlir-cpu-runner/copy.mlir @@ -0,0 +1,49 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-std-to-llvm \ +// RUN: | mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface } + +func @main() -> () { + %c0 = constant 0 : index + %c1 = constant 1 : index + + // Initialize input. + %input = memref.alloc() : memref<2x3xf32> + %dim_x = memref.dim %input, %c0 : memref<2x3xf32> + %dim_y = memref.dim %input, %c1 : memref<2x3xf32> + scf.parallel (%i, %j) = (%c0, %c0) to (%dim_x, %dim_y) step (%c1, %c1) { + %prod = muli %i, %dim_y : index + %val = addi %prod, %j : index + %val_i64 = index_cast %val : index to i64 + %val_f32 = sitofp %val_i64 : i64 to f32 + memref.store %val_f32, %input[%i, %j] : memref<2x3xf32> + } + %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32> + call @print_memref_f32(%unranked_input) : (memref<*xf32>) -> () + // CHECK: rank = 2 offset = 0 sizes = [2, 3] strides = [3, 1] + // CHECK-NEXT: [0, 1, 2] + // CHECK-NEXT: [3, 4, 5] + + %copy = memref.alloc() : memref<2x3xf32> + memref.copy %input, %copy : memref<2x3xf32> to memref<2x3xf32> + %unranked_copy = memref.cast %copy : memref<2x3xf32> to memref<*xf32> + call @print_memref_f32(%unranked_copy) : (memref<*xf32>) -> () + // CHECK: rank = 2 offset = 0 sizes = [2, 3] strides = [3, 1] + // CHECK-NEXT: [0, 1, 2] + // CHECK-NEXT: [3, 4, 5] + + %copy_two = memref.alloc() : memref<3x2xf32> + %copy_two_casted = memref.reinterpret_cast %copy_two to offset: [0], sizes: [2,3], strides:[1, 2] + : memref<3x2xf32> to memref<2x3xf32> + memref.copy %input, %copy_two_casted : memref<2x3xf32> to memref<2x3xf32> + %unranked_copy_two = memref.cast %copy_two : memref<3x2xf32> to memref<*xf32> + call @print_memref_f32(%unranked_copy_two) : (memref<*xf32>) -> () + // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] + // CHECK-NEXT: [0, 3] + // CHECK-NEXT: [1, 4] + // CHECK-NEXT: [2, 5] + + return +} From 17bcae8906949374495fb396b0baf540feafb1a5 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Wed, 30 Jun 2021 15:54:53 +0700 Subject: [PATCH 265/619] [ARM][NFC] Remove an unused method `ARMInstPrinter::printMveAddrModeQOperand()` was added in D62680, but was never used. It looks like `printT2AddrModeImm8Operand()` is used instead. Differential Revision: https://reviews.llvm.org/D105124 --- .../Target/ARM/MCTargetDesc/ARMInstPrinter.cpp | 16 ---------------- .../lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h | 2 -- 2 files changed, 18 deletions(-) diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp index 464fd01b56aeb..aa0828ea0c02c 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp @@ -622,22 +622,6 @@ void ARMInstPrinter::printMveAddrModeRQOperand(const MCInst *MI, unsigned OpNum, O << "]" << markup(">"); } -void ARMInstPrinter::printMveAddrModeQOperand(const MCInst *MI, unsigned OpNum, - const MCSubtargetInfo &STI, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum + 1); - - O << markup(""); - - O << "]" << markup(">"); -} - void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h index d975d799e0791..0c686e434197c 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h @@ -274,8 +274,6 @@ class ARMInstPrinter : public MCInstPrinter { template void printMveAddrModeRQOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); - void printMveAddrModeQOperand(const MCInst *MI, unsigned OpNum, - const MCSubtargetInfo &STI, raw_ostream &O); void printMveSaturateOp(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); private: From 4361bd9b7b38c73b69f9a37e52d0b72989e84947 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Wed, 30 Jun 2021 08:59:22 +0000 Subject: [PATCH 266/619] [mlir][linalg][python] Explicit shape and dimension order in OpDSL. Extend the OpDSL syntax with an optional `domain` function to specify an explicit dimension order. The extension is needed to provide more control over the dimension order instead of deducing it implicitly depending on the formulation of the tensor comprehension. Additionally, the patch also ensures the symbols are ordered according to the operand definitions of the operation. Differential Revision: https://reviews.llvm.org/D105117 --- .../Linalg/IR/LinalgNamedStructuredOps.yaml | 53 +++++++++--------- .../linalg/opdsl/lang/comprehension.py | 18 ++++--- .../mlir/dialects/linalg/opdsl/lang/config.py | 54 +++++++++++++++---- .../mlir/dialects/linalg/opdsl/lang/dsl.py | 8 +++ .../linalg/opdsl/ops/core_named_ops.py | 7 +++ .../python/dialects/linalg/opdsl/arguments.py | 12 ++--- .../linalg/opdsl/emit_structured_generic.py | 20 +++---- .../dialects/linalg/opdsl/interfaces.py | 8 +-- .../linalg/opdsl/shape_maps_iteration.py | 22 ++++---- 9 files changed, 132 insertions(+), 70 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index 82e4d01c4a72c..e536b44fe6fb2 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -1,3 +1,4 @@ + --- !LinalgOpConfig metadata: !LinalgOpMetadata name: matmul @@ -15,17 +16,17 @@ structured_op: !LinalgStructuredOpConfig name: A usage: InputOperand type_var: T1 - shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)> + shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)> - !LinalgOperandDefConfig name: B usage: InputOperand type_var: T2 - shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)> + shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)> - !LinalgOperandDefConfig name: C usage: OutputOperand type_var: U - shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)> + shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)> indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)> @@ -77,17 +78,17 @@ structured_op: !LinalgStructuredOpConfig name: A usage: InputOperand type_var: T1 - shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)> + shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)> - !LinalgOperandDefConfig name: B usage: InputOperand type_var: T2 - shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)> + shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)> - !LinalgOperandDefConfig name: C usage: OutputOperand type_var: U - shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)> + shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)> indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)> @@ -201,17 +202,17 @@ structured_op: !LinalgStructuredOpConfig name: y usage: InputOperand type_var: T1 - shape_map: affine_map<()[s0, s1] -> (s1)> + shape_map: affine_map<()[s0, s1] -> (s0)> - !LinalgOperandDefConfig name: A usage: InputOperand type_var: T2 - shape_map: affine_map<()[s0, s1] -> (s1, s0)> + shape_map: affine_map<()[s0, s1] -> (s0, s1)> - !LinalgOperandDefConfig name: x usage: OutputOperand type_var: U - shape_map: affine_map<()[s0, s1] -> (s0)> + shape_map: affine_map<()[s0, s1] -> (s1)> indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - affine_map<(d0, d1)[s0, s1] -> (d1)> @@ -321,19 +322,19 @@ structured_op: !LinalgStructuredOpConfig usage: InputOperand type_var: T1 shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> - (s0, s4, s5, s3)> + (s0, s1, s2, s3)> - !LinalgOperandDefConfig name: K usage: InputOperand type_var: T2 shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> - (s6, s7, s3)> + (s4, s5, s3)> - !LinalgOperandDefConfig name: O usage: OutputOperand type_var: U shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> - (s0, s1, s2, s3)> + (s0, s6, s7, s3)> - !LinalgOperandDefConfig name: strides usage: IndexAttribute @@ -349,18 +350,18 @@ structured_op: !LinalgStructuredOpConfig indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, - s10, s11] -> (d0, d1 * s8 + d4 * s10, d2 * s9 + d5 * s11, d3)> + s10, s11] -> (d0, d1 * s8 + d3 * s10, d2 * s9 + d4 * s11, d5)> - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, - s10, s11] -> (d4, d5, d3)> + s10, s11] -> (d3, d4, d5)> - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, - s10, s11] -> (d0, d1, d2, d3)> + s10, s11] -> (d0, d1, d2, d5)> iterator_types: - parallel - parallel - parallel - - parallel - reduction - reduction + - parallel assignments: - !ScalarAssign arg: O @@ -402,45 +403,45 @@ structured_op: !LinalgStructuredOpConfig usage: InputOperand type_var: T1 shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> - (s0, s4, s5, s3)> + (s0, s1, s2, s3)> - !LinalgOperandDefConfig name: K usage: InputOperand type_var: T2 shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> - (s10, s11)> + (s4, s5)> - !LinalgOperandDefConfig name: O usage: OutputOperand type_var: U shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> - (s0, s1, s2, s3)> + (s0, s6, s7, s3)> - !LinalgOperandDefConfig name: strides usage: IndexAttribute type_var: I64 attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] - -> (s6, s7)> + -> (s8, s9)> - !LinalgOperandDefConfig name: dilations usage: IndexAttribute type_var: I64 attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] - -> (s8, s9)> + -> (s10, s11)> indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, - s10, s11] -> (d2, d3 * s6 + d0 * s8, d4 * s7 + d1 * s9, d5)> + s10, s11] -> (d0, d1 * s8 + d3 * s10, d2 * s9 + d4 * s11, d5)> - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, - s10, s11] -> (d0, d1)> + s10, s11] -> (d3, d4)> - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, - s10, s11] -> (d2, d3, d4, d5)> + s10, s11] -> (d0, d1, d2, d5)> iterator_types: - - reduction - - reduction - parallel - parallel - parallel + - reduction + - reduction - parallel assignments: - !ScalarAssign diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py index e89885e975d65..1f9230de397a2 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py @@ -32,13 +32,13 @@ def visit_tensor_exprs(self, callback): """Visits all tensor expression reachable by the expression.""" callback(self) - def _get_all_dim_defs(self) -> Set[DimDef]: - """Recursively gets all DimDef affine expressions that are referenced.""" + def collect_dim_uses(self, uses: Set["DimDef"]): + """Collects all DimDefs reachable through this expression.""" results = set() def visit_dim_def(dim_def): if isinstance(dim_def, DimDef): - results.add(dim_def) + uses.add(dim_def) def visit_affine_exprs(expr): if isinstance(expr, TensorUse): @@ -49,7 +49,6 @@ def visit_affine_exprs(expr): ind.visit_affine_exprs(visit_dim_def) self.visit_tensor_exprs(visit_affine_exprs) - return results def collect_tensor_uses(self, uses: Set["TensorUse"]): """Collects all TensorUses reachable through this expression.""" @@ -126,8 +125,10 @@ def _compute_reduce_dims(self, rhs: TensorExpression) -> Set[DimDef]: reduced into. Any indices referenced on the rhs and not in self are considered reduction dims and will be ordered as encountered on the rhs. """ - rhs_dims = rhs._get_all_dim_defs() - lhs_dims = self._get_all_dim_defs() + rhs_dims = set() + lhs_dims = set() + rhs.collect_dim_uses(rhs_dims) + self.collect_dim_uses(lhs_dims) return rhs_dims - lhs_dims def __repr__(self): @@ -202,7 +203,7 @@ def __init__(self, f"number of index_dims {len(index_dims)}") if index_dims and any(not isinstance(dim, DimDef) for dim in index_dims): raise ValueError(f"TensorDef requires index dims of type DimDef but " - f"got {type(index_dims)}") + f"got {index_dims}") kind = OperandKind.OutputTensor if output else OperandKind.InputTensor self.operand_def = OperandDef( kind, type_var, size_exprs=shape, index_dims=index_dims) @@ -273,7 +274,7 @@ class AttributeDef: def __init__(self, *sizes: SymbolDef): if any(not isinstance(size, SymbolDef) for size in sizes): raise ValueError(f"AttributeDef requires sizes of type SymbolDef but got " - f"{type(sizes)}") + f"{sizes}") self.operand_def = OperandDef(OperandKind.Attribute, I64, size_exprs=sizes) @@ -516,6 +517,7 @@ def __init__(self, self.metadata = OpMetadataDef( name=name, cpp_class_name=cpp_class_name, doc=doc) self.registered_operands = dict() # type: Dict[str, OperandDef] + self.domain = list() # type: List[DimDef] self.comprehensions = list() # type: List[Comprehension] self._affine_state = AffineBuildState() diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/config.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/config.py index 78e6f1d6a3083..f6d5248ea00fb 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/config.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/config.py @@ -115,6 +115,7 @@ class LinalgStructuredOpConfig(YAMLObject): def __init__(self, comprehension: Comprehension, + domain: Sequence[DimDef], registered_operands: Sequence[OperandDef], context: Optional[_ir.Context] = None): self.context = context if context is not None else _ir.Context() @@ -123,10 +124,11 @@ def __init__(self, self.operands = dict() # type: Dict[OperandDef, OperandDefConfig] self.uses = dict() # type: Dict[TensorUse, TensorUseConfig] - # Compute the ordered set of writes and collect the tensor, capture, and - # index uses. + # Compute the ordered set of writes and collect the tensor, capture, dims, + # and index uses. collected_tensor_uses = set() collected_scalar_uses = set() + collected_dim_uses = set() collected_indices = set() for write_use, read_use in zip(comprehension.definitions, comprehension.values): @@ -136,8 +138,28 @@ def __init__(self, collected_tensor_uses.add(write_use) read_use.collect_tensor_uses(collected_tensor_uses) read_use.collect_scalar_uses(collected_scalar_uses) + read_use.collect_dim_uses(collected_dim_uses) + write_use.collect_dim_uses(collected_dim_uses) read_use.collect_indices(collected_indices) + # Set domain to the sorted list of uses if no domain annotation is given. + if not domain: + domain = sorted(collected_dim_uses, key=lambda dim: dim.dimname) + + # Verify the domain dimensions match the used dimensions. + if (len(domain) != len(collected_dim_uses) or + any(dim not in collected_dim_uses for dim in domain)): + raise ValueError(f"Expected the annotated domain dimensions {domain} to " + f"match the set of dimension used by the tensor " + f"comprehension {collected_dim_uses}") + + # Instantiate the dimensions in the given order. + with self.context: + local_state = AffineBuildState( + global_state=self.affine_state, allow_new_symbols=False) + for dim in domain: + dim.build(state=local_state) + # Collect all attribute definitions. collected_attr_defs = list() for operand in registered_operands: @@ -148,18 +170,32 @@ def __init__(self, collected_index_defs = list() for operand in registered_operands: if operand.index_dims: + if any(dim not in collected_dim_uses for dim in operand.index_dims): + raise ValueError(f"Expected all index dims {operand.index_dims} of " + f"operand {operand.name} to have uses.") collected_index_defs.append(operand) - # Add all definitions before uses, so process twice. + # Collect the operand definitions of all tensor/scalar uses, attributes, and + # shape-only tensors. + all_operand_defs = list() for use in collected_tensor_uses: - self.add_operand(use.operand_def) + all_operand_defs.append(use.operand_def) for use in collected_scalar_uses: - self.add_operand(use.operand_def) + all_operand_defs.append(use.operand_def) for definition in collected_attr_defs: - self.add_operand(definition) + all_operand_defs.append(definition) + for definition in collected_index_defs: + all_operand_defs.append(definition) + + # Add all operands in registration order to ensure the symbols are + # registered in the order they appear. + all_operand_defs = sorted( + all_operand_defs, key=lambda operand_def: operand_def.registered_index) + for operand_def in all_operand_defs: + self.add_operand(operand_def) + + # Add all shape-only tensor index_dim annotations and all tensor uses. for definition in collected_index_defs: - if definition not in self.operands: - self.add_operand(definition) self.add_indexed_operand(definition) for use in collected_tensor_uses: self.add_tensor_use(use) @@ -396,7 +432,7 @@ def from_linalg_op_def( LinalgOpConfig( tc_op_def.metadata, structured_op=LinalgStructuredOpConfig( - tc_op_def.comprehensions[0], + tc_op_def.comprehensions[0], tc_op_def.domain, tc_op_def.registered_operands.values(), context)), ] diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py index 6dbda1bb7ecbe..1b42b57670448 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py @@ -132,3 +132,11 @@ def linalg_structured_op(dsl_func=None, def implements(*interfaces: OpInterfaceDef): current_op_def().metadata.implements.extend(interfaces) + + +def domain(*dimensions: DimDef): + if current_op_def().domain: + raise ValueError(f"Expected only one set of domain dimensions per operator") + if any(not isinstance(dim, DimDef) for dim in dimensions): + raise ValueError(f"Expected dimensions of type DimDef but got {dimensions}") + current_op_def().domain.extend(dimensions) diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index 253fca4b41690..5867109279aa4 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -16,6 +16,7 @@ def matmul( Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. """ + domain(D.m, D.n, D.k) implements(ContractionOpInterface) C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n]) @@ -30,6 +31,7 @@ def batch_matmul( Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. """ + domain(D.b, D.m, D.n, D.k) implements(ContractionOpInterface) C[D.b, D.m, D.n] += cast(U, A[D.b, D.m, D.k]) * cast(U, B[D.b, D.k, D.n]) @@ -44,6 +46,7 @@ def matvec( Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. """ + domain(D.m, D.n) implements(ContractionOpInterface) x[D.m] += cast(U, A[D.m, D.n]) * cast(U, y[D.n]) @@ -58,6 +61,7 @@ def vecmat( Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. """ + domain(D.n, D.m) implements(ContractionOpInterface) x[D.n] += cast(U, y[D.m]) * cast(U, A[D.m, D.n]) @@ -86,6 +90,7 @@ def depthwise_conv_2d_input_nhwc_filter_hwc_poly( Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. """ + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) O[D.n, D.oh, D.ow, D.c] += cast( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) * cast(U, K[D.kh, D.kw, D.c]) @@ -103,6 +108,7 @@ def pooling_nhwc_sum_poly( Numeric casting is performed on the input operand, promoting it to the same data type as the accumulator/output. """ + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) O[D.n, D.oh, D.ow, D.c] += cast( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) @@ -123,6 +129,7 @@ def fill_rng_2d( element seed the random number generation. The min and max operands limit the range of the generated random numbers. """ + domain(D.m, D.n) multiplier = cast(I32, const(1103515245)) increment = cast(I32, const(12345)) rand1 = (cast(I32, index(D.m)) + seed) * multiplier + increment diff --git a/mlir/test/python/dialects/linalg/opdsl/arguments.py b/mlir/test/python/dialects/linalg/opdsl/arguments.py index a70e3cdeca99b..572c811d93a4b 100644 --- a/mlir/test/python/dialects/linalg/opdsl/arguments.py +++ b/mlir/test/python/dialects/linalg/opdsl/arguments.py @@ -9,15 +9,15 @@ # CHECK: name: A # CHECK: usage: InputOperand # CHECK: type_var: T -# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)> +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)> # CHECK: name: B # CHECK: usage: InputOperand # CHECK: type_var: T -# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)> +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)> # CHECK: name: C # CHECK: usage: OutputOperand # CHECK: type_var: U -# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)> +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)> @linalg_structured_op def matmul( A=TensorDef(T, S.M, S.K), @@ -44,11 +44,11 @@ def fill(value=ScalarDef(T), O=TensorDef(T, S.M, S.K, output=True)): # CHECK: name: I # CHECK: usage: InputOperand # CHECK: type_var: T -# CHECK: shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2, s3)> +# CHECK: shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1)> # CHECK: name: O # CHECK: usage: OutputOperand # CHECK: type_var: T -# CHECK: shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1)> +# CHECK: shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2, s3)> # CHECK: name: strides # CHECK: usage: IndexAttribute # CHECK: type_var: I64 @@ -58,4 +58,4 @@ def strided_copy( I=TensorDef(T, S.IH, S.IW), O=TensorDef(T, S.OH, S.OW, output=True), strides=AttributeDef(S.SH, S.SW)): - O[D.oh, D.ow] = I[D.h * S.SH, D.w * S.SW] + O[D.oh, D.ow] = I[D.oh * S.SH, D.ow * S.SW] diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py index cbe88dd043f73..f7db532dced5c 100644 --- a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py +++ b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py @@ -16,6 +16,7 @@ def matmul_mono( A=TensorDef(T, S.M, S.K), B=TensorDef(T, S.K, S.N), C=TensorDef(T, S.M, S.N, output=True)): + domain(D.m, D.n, D.k) C[D.m, D.n] += A[D.m, D.k] * B[D.k, D.n] @@ -24,6 +25,7 @@ def matmul_poly( A=TensorDef(T1, S.M, S.K), B=TensorDef(T2, S.K, S.N), C=TensorDef(U, S.M, S.N, output=True)): + domain(D.m, D.n, D.k) C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n]) @@ -34,6 +36,7 @@ def conv_poly( O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), strides=AttributeDef(S.SH, S.SW), dilations=AttributeDef(S.DH, S.DW)): + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) O[D.n, D.oh, D.ow, D.c] += cast( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) * cast(U, K[D.kh, D.kw, D.c]) @@ -46,6 +49,7 @@ def pooling_poly( O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), strides=AttributeDef(S.SH, S.SW), dilations=AttributeDef(S.DH, S.DW)): + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) O[D.n, D.oh, D.ow, D.c] += cast( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) @@ -84,14 +88,12 @@ def fill_rng_poly( # CHECK: #[[$MUL_MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)> # Convolution indexing maps. - # CHECK: #[[$CONV_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d4, d2 * 4 + d5 * 2, d3)> - # CHECK: #[[$CONV_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5, d3)> - # CHECK: #[[$CONV_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> + # CHECK: #[[$CONV_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d3, d2 * 4 + d4 * 2, d5)> + # CHECK: #[[$CONV_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)> + # CHECK: #[[$CONV_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d5)> # Pooling indexing maps. - # CHECK: #[[$POOL_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d3 * 2 + d0, d4 * 4 + d1 * 2, d5)> - # CHECK: #[[$POOL_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1)> - # CHECK: #[[$POOL_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d3, d4, d5)> + # CHECK: #[[$POOL_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4)> # CHECK-LABEL: func @test_matmul_mono # CHECK-SAME: %[[A:.+]]: tensor<4x16xf32> @@ -197,7 +199,7 @@ def test_f64f64f32_matmul(lhs, rhs, init_result): # CHECK-LABEL: @test_f32i32_conv # CHECK: linalg.generic # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$CONV_MAP_K]], #[[$CONV_MAP_O]]] - # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"] + # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[FILTER:.+]]: f32, %[[OUT:.+]]: i32) # CHECK-NEXT: %[[IN_CAST:.+]] = fptosi %[[IN:.+]] : f32 to i32 # CHECK-NEXT: %[[FILTER_CAST:.+]] = fptosi %[[FILTER:.+]] : f32 to i32 @@ -215,8 +217,8 @@ def test_f32i32_conv(input, filter, init_result): # CHECK-LABEL: @test_f32i32_pooling # CHECK: linalg.generic - # CHECK-SAME: indexing_maps = [#[[$POOL_MAP_I]], #[[$POOL_MAP_K]], #[[$POOL_MAP_O]]] - # CHECK-SAME: iterator_types = ["reduction", "reduction", "parallel", "parallel", "parallel", "parallel"] + # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$POOL_MAP_K]], #[[$CONV_MAP_O]]] + # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: i32) # CHECK-NEXT: %[[IN_CAST:.+]] = fptosi %[[IN:.+]] : f32 to i32 # CHECK-NEXT: %[[SUM:.+]] = addi %[[OUT]], %[[IN_CAST]] : i32 diff --git a/mlir/test/python/dialects/linalg/opdsl/interfaces.py b/mlir/test/python/dialects/linalg/opdsl/interfaces.py index 46689a07bbbb9..6d75bfcbeefd4 100644 --- a/mlir/test/python/dialects/linalg/opdsl/interfaces.py +++ b/mlir/test/python/dialects/linalg/opdsl/interfaces.py @@ -2,13 +2,15 @@ from mlir.dialects.linalg.opdsl.lang import * + # CHECK: --- # CHECK-LABEL: matmul # CHECK: implements: # CHECK-NEXT: - LinalgContractionOpInterface @linalg_structured_op -def matmul(A=TensorDef(T, S.M, S.K), - B=TensorDef(T, S.K, S.N), - C=TensorDef(U, S.M, S.N, output=True)): +def matmul( + A=TensorDef(T, S.M, S.K), + B=TensorDef(T, S.K, S.N), + C=TensorDef(U, S.M, S.N, output=True)): implements(ContractionOpInterface) C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n]) diff --git a/mlir/test/python/dialects/linalg/opdsl/shape_maps_iteration.py b/mlir/test/python/dialects/linalg/opdsl/shape_maps_iteration.py index 2933852f97cfe..fbb82f79f6d9e 100644 --- a/mlir/test/python/dialects/linalg/opdsl/shape_maps_iteration.py +++ b/mlir/test/python/dialects/linalg/opdsl/shape_maps_iteration.py @@ -7,9 +7,9 @@ # dims auto discovered emits the right shape, indexing maps and iterator types. # CHECK: --- # CHECK-LABEL: matmul -# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)> -# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)> # CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)> +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)> +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)> # CHECK: static_indexing_maps: # CHECK-NEXT: - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)> # CHECK-NEXT: - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)> @@ -23,6 +23,7 @@ def matmul( A=TensorDef(T, S.M, S.K), B=TensorDef(T, S.K, S.N), C=TensorDef(U, S.M, S.N, output=True)): + domain(D.m, D.n, D.k) C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n]) @@ -43,22 +44,25 @@ def matmul( def dot(A=TensorDef(T, S.M), B=TensorDef(T, S.M), C=TensorDef(U, output=True)): C[None] += cast(U, A[D.m]) * cast(U, B[D.m]) + # Verifies that the index_dims of shape-only operands translate to correct # indexing maps. # CHECK: --- # CHECK-LABEL: pool +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0)> # CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s1)> # CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s2)> -# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0)> # CHECK: static_indexing_maps: -# CHECK-NEXT: - affine_map<(d0, d1)[s0, s1, s2] -> (d1 * 2 + d0)> -# CHECK-NEXT: - affine_map<(d0, d1)[s0, s1, s2] -> (d0)> +# CHECK-NEXT: - affine_map<(d0, d1)[s0, s1, s2] -> (d0 * 2 + d1)> # CHECK-NEXT: - affine_map<(d0, d1)[s0, s1, s2] -> (d1)> +# CHECK-NEXT: - affine_map<(d0, d1)[s0, s1, s2] -> (d0)> # CHECK: iterator_types: -# CHECK-NEXT: - reduction # CHECK-NEXT: - parallel +# CHECK-NEXT: - reduction @linalg_structured_op -def pool(I=TensorDef(T, S.I), - K=TensorDef(T, S.K, index_dims=[D.k]), - O=TensorDef(U, S.O, output=True)): +def pool( + I=TensorDef(T, S.I), + K=TensorDef(T, S.K, index_dims=[D.k]), + O=TensorDef(U, S.O, output=True)): + domain(D.o, D.k) O[D.o] += cast(U, I[D.o * 2 + D.k]) From 657e067bb58c585400d62d6b260301ca0fdb7b48 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Wed, 30 Jun 2021 16:34:52 +0700 Subject: [PATCH 267/619] [ARMInstPrinter] Print the target address of a branch instruction This follows other patches that changed printing immediate values of branch instructions to target addresses, see D76580 (x86), D76591 (PPC), D77853 (AArch64). As observing immediate values might sometimes be useful, they are printed as comments for branch instructions. // llvm-objdump -d output (before) 000200b4 <_start>: 200b4: ff ff ff fa blx #-4 000200b8 : 200b8: ff f7 fc ef blx #-8 <_start> // llvm-objdump -d output (after) 000200b4 <_start>: 200b4: ff ff ff fa blx 0x200b8 @ imm = #-4 000200b8 : 200b8: ff f7 fc ef blx 0x200b4 <_start> @ imm = #-8 // GNU objdump -d. 000200b4 <_start>: 200b4: faffffff blx 200b8 000200b8 : 200b8: f7ff effc blx 200b4 <_start> Differential Revision: https://reviews.llvm.org/D104701 --- lld/test/COFF/arm-thumb-thunks-multipass.s | 4 +- lld/test/COFF/arm-thumb-thunks.s | 8 +- lld/test/COFF/armnt-blx23t.test | 18 ++-- lld/test/COFF/armnt-branch24t.test | 10 +-- lld/test/COFF/delayimports-armnt.yaml | 5 +- lld/test/ELF/arm-bl-v6-inrange.s | 4 +- lld/test/ELF/arm-bl-v6.s | 4 +- lld/test/ELF/arm-blx.s | 38 ++++----- lld/test/ELF/arm-branch-rangethunk.s | 23 ++--- .../ELF/arm-branch-undef-weak-plt-thunk.s | 4 +- lld/test/ELF/arm-branch.s | 28 +++---- lld/test/ELF/arm-exidx-canunwind.s | 4 +- lld/test/ELF/arm-exidx-gc.s | 4 +- lld/test/ELF/arm-extreme-range-pi-thunk.s | 9 +- lld/test/ELF/arm-fix-cortex-a8-blx.s | 4 +- lld/test/ELF/arm-fix-cortex-a8-nopatch.s | 16 ++-- lld/test/ELF/arm-fix-cortex-a8-plt.s | 4 +- lld/test/ELF/arm-fix-cortex-a8-recognize.s | 34 ++++---- lld/test/ELF/arm-fix-cortex-a8-thunk-align.s | 2 +- lld/test/ELF/arm-fix-cortex-a8-thunk.s | 8 +- lld/test/ELF/arm-force-pi-thunk.s | 18 ++-- lld/test/ELF/arm-gnu-ifunc-plt.s | 8 +- lld/test/ELF/arm-gnu-ifunc.s | 5 +- lld/test/ELF/arm-long-thunk-converge.s | 4 +- lld/test/ELF/arm-plt-reloc.s | 49 +++++------ lld/test/ELF/arm-thumb-branch.s | 20 +++-- lld/test/ELF/arm-thumb-condbranch-thunk.s | 24 +++--- lld/test/ELF/arm-thumb-interwork-abs.s | 12 +-- lld/test/ELF/arm-thumb-interwork-ifunc.s | 10 +-- lld/test/ELF/arm-thumb-interwork-notfunc.s | 84 +++++++++---------- lld/test/ELF/arm-thumb-interwork-shared.s | 8 +- lld/test/ELF/arm-thumb-interwork-thunk-v5.s | 12 +-- lld/test/ELF/arm-thumb-interwork-thunk.s | 68 +++++++-------- lld/test/ELF/arm-thumb-mix-range-thunk-os.s | 32 +++---- lld/test/ELF/arm-thumb-narrow-branch-check.s | 16 ++-- lld/test/ELF/arm-thumb-no-undefined-thunk.s | 8 +- lld/test/ELF/arm-thumb-plt-range-thunk-os.s | 12 +-- lld/test/ELF/arm-thumb-plt-reloc.s | 39 ++++----- lld/test/ELF/arm-thumb-range-thunk-os.s | 36 ++++---- lld/test/ELF/arm-thumb-thunk-empty-pass.s | 6 +- lld/test/ELF/arm-thumb-thunk-v6m.s | 4 +- .../ELF/arm-thumb-undefined-weak-narrow.test | 2 +- lld/test/ELF/arm-thumb-undefined-weak.s | 8 +- lld/test/ELF/arm-thunk-arm-thumb-reuse.s | 8 +- lld/test/ELF/arm-thunk-largesection.s | 6 +- lld/test/ELF/arm-thunk-linkerscript-dotexpr.s | 16 ++-- lld/test/ELF/arm-thunk-linkerscript-large.s | 26 +++--- lld/test/ELF/arm-thunk-linkerscript-orphan.s | 12 +-- lld/test/ELF/arm-thunk-linkerscript-sort.s | 6 +- lld/test/ELF/arm-thunk-linkerscript.s | 16 ++-- lld/test/ELF/arm-thunk-many-passes.s | 36 ++++---- lld/test/ELF/arm-thunk-multipass-plt.s | 7 +- lld/test/ELF/arm-thunk-multipass.s | 10 +-- lld/test/ELF/arm-thunk-nosuitable.s | 4 +- lld/test/ELF/arm-thunk-re-add.s | 6 +- lld/test/ELF/arm-undefined-weak.s | 6 +- .../ARM/MCTargetDesc/ARMInstPrinter.cpp | 15 ++++ .../Target/ARM/MCTargetDesc/ARMInstPrinter.h | 6 +- .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 43 +++++----- .../Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 3 + .../CodeGen/ARM/Windows/division-range.ll | 3 +- llvm/test/MC/ARM/arm-macho-calls.s | 4 +- llvm/test/MC/ARM/branch-disassemble.s | 4 +- llvm/test/MC/ARM/coff-relocations.s | 8 +- llvm/test/MC/ARM/thumb-cb-thumbfunc.s | 2 +- llvm/test/MC/ARM/thumb1-relax-bcc.s | 2 +- llvm/test/MC/ARM/thumb1-relax-br.s | 4 +- llvm/test/MC/ARM/thumb2-b.w-target.s | 4 +- llvm/test/MC/ARM/thumb2-cbn-to-next-inst.s | 4 +- llvm/test/MC/Disassembler/ARM/mve-lol.txt | 20 ++--- .../llvm-objdump/ELF/ARM/branch-symbols.s | 46 +++++----- .../ELF/ARM/invalid-instruction.s | 2 +- .../tools/llvm-objdump/ELF/ARM/tblxi-target.s | 8 +- .../llvm-objdump/ELF/ARM/unknown-instr.test | 2 +- 74 files changed, 529 insertions(+), 526 deletions(-) diff --git a/lld/test/COFF/arm-thumb-thunks-multipass.s b/lld/test/COFF/arm-thumb-thunks-multipass.s index ddee007b8ba87..267b225b984bb 100644 --- a/lld/test/COFF/arm-thumb-thunks-multipass.s +++ b/lld/test/COFF/arm-thumb-thunks-multipass.s @@ -51,8 +51,8 @@ far_func\i: .endr bx lr -// FUNC01: 403000: 41 f0 fc 87 bne.w #8184 <.text+0x3ffc> -// FUNC01: 403004: 41 f0 ff 87 bne.w #8190 <.text+0x4006> +// FUNC01: 403000: 41 f0 fc 87 bne.w 0x404ffc <.text+0x3ffc> +// FUNC01: 403004: 41 f0 ff 87 bne.w 0x405006 <.text+0x4006> // Check that we only have two thunks here, even if we created the first // thunk twice (once in the first pass, then thrown away and recreated diff --git a/lld/test/COFF/arm-thumb-thunks.s b/lld/test/COFF/arm-thumb-thunks.s index 0437a4d7501b5..868f3685bc2a0 100644 --- a/lld/test/COFF/arm-thumb-thunks.s +++ b/lld/test/COFF/arm-thumb-thunks.s @@ -48,9 +48,9 @@ func2: "??_C@string2": .asciz "bar" -// MAIN: 401000: 40 f0 05 80 bne.w #10 <.text+0xe> -// MAIN: 401004: 40 f0 08 80 bne.w #16 <.text+0x18> -// MAIN: 401008: 40 f0 01 80 bne.w #2 <.text+0xe> +// MAIN: 401000: 40 f0 05 80 bne.w 0x40100e <.text+0xe> +// MAIN: 401004: 40 f0 08 80 bne.w 0x401018 <.text+0x18> +// MAIN: 401008: 40 f0 01 80 bne.w 0x40100e <.text+0xe> // MAIN: 40100c: 70 47 bx lr // func1 thunk // MAIN: 40100e: 40 f2 08 0c movw r12, #8 @@ -61,7 +61,7 @@ func2: // MAIN: 40101c: c0 f2 20 0c movt r12, #32 // MAIN: 401020: e7 44 add pc, r12 -// FUNC1: 501022: 40 f0 01 80 bne.w #2 <.text+0x100028> +// FUNC1: 501022: 40 f0 01 80 bne.w 0x501028 <.text+0x100028> // FUNC1: 501026: 70 47 bx lr // func2 thunk // FUNC1: 501028: 4f f6 fe 7c movw r12, #65534 diff --git a/lld/test/COFF/armnt-blx23t.test b/lld/test/COFF/armnt-blx23t.test index a7a2181f21f65..66a5222d42fec 100644 --- a/lld/test/COFF/armnt-blx23t.test +++ b/lld/test/COFF/armnt-blx23t.test @@ -12,20 +12,20 @@ # BEFORE: 4: 2d e9 00 48 push.w {r11, lr} # BEFORE: 8: eb 46 mov r11, sp # BEFORE: a: 20 20 movs r0, #32 -# BEFORE: c: 00 f0 00 f8 bl #0 +# BEFORE: c: 00 f0 00 f8 bl {{.+}} @ imm = #0 # BEFORE: 10: 01 30 adds r0, #1 # BEFORE: 12: bd e8 00 88 pop.w {r11, pc} # AFTER: Disassembly of section .text: # AFTER-EMPTY: -# AFTER: 1000: 70 47 bx lr -# AFTER: 1002: 00 bf nop -# AFTER: 1004: 2d e9 00 48 push.w {r11, lr} -# AFTER: 1008: eb 46 mov r11, sp -# AFTER: 100a: 20 20 movs r0, #32 -# AFTER: 100c: ff f7 f8 ff bl #-16 -# AFTER: 1010: 01 30 adds r0, #1 -# AFTER: 1012: bd e8 00 88 pop.w {r11, pc} +# AFTER: 401000: 70 47 bx lr +# AFTER: 401002: 00 bf nop +# AFTER: 401004: 2d e9 00 48 push.w {r11, lr} +# AFTER: 401008: eb 46 mov r11, sp +# AFTER: 40100a: 20 20 movs r0, #32 +# AFTER: 40100c: ff f7 f8 ff bl 0x401000 <.text> +# AFTER: 401010: 01 30 adds r0, #1 +# AFTER: 401012: bd e8 00 88 pop.w {r11, pc} --- !COFF header: diff --git a/lld/test/COFF/armnt-branch24t.test b/lld/test/COFF/armnt-branch24t.test index 9b07bd44fa76c..9a8f46334ea5b 100644 --- a/lld/test/COFF/armnt-branch24t.test +++ b/lld/test/COFF/armnt-branch24t.test @@ -10,15 +10,15 @@ # BEFORE: 0: 70 47 bx lr # BEFORE: 2: 00 bf nop # BEFORE: 4: 20 20 movs r0, #32 -# BEFORE: 6: 00 f0 00 b8 b.w #0 +# BEFORE: 6: 00 f0 00 b8 b.w {{.+}} @ imm = #0 # AFTER: Disassembly of section .text: # AFTER-EMPTY: # AFTER: <.text>: -# AFTER: 1000: 70 47 bx lr -# AFTER: 1002: 00 bf nop -# AFTER: 1004: 20 20 movs r0, #32 -# AFTER: 1006: ff f7 fb bf b.w #-10 +# AFTER: 401000: 70 47 bx lr +# AFTER: 401002: 00 bf nop +# AFTER: 401004: 20 20 movs r0, #32 +# AFTER: 401006: ff f7 fb bf b.w 0x401000 <.text> --- !COFF header: diff --git a/lld/test/COFF/delayimports-armnt.yaml b/lld/test/COFF/delayimports-armnt.yaml index 048752f76bcee..42ed4053254bc 100644 --- a/lld/test/COFF/delayimports-armnt.yaml +++ b/lld/test/COFF/delayimports-armnt.yaml @@ -51,16 +51,17 @@ # BASEREL-NEXT: } # BASEREL-NEXT: ] # +# DISASM: 00401000 <.text>: # DISASM: 40100c: 43 f2 08 0c movw r12, #12296 # DISASM-NEXT: c0 f2 40 0c movt r12, #64 -# DISASM-NEXT: 00 f0 00 b8 b.w #0 +# DISASM-NEXT: 00 f0 00 b8 b.w {{.+}} @ imm = #0 # DISASM-NEXT: 2d e9 0f 48 push.w {r0, r1, r2, r3, r11, lr} # DISASM-NEXT: 0d f2 10 0b addw r11, sp, #16 # DISASM-NEXT: 2d ed 10 0b vpush {d0, d1, d2, d3, d4, d5, d6, d7} # DISASM-NEXT: 61 46 mov r1, r12 # DISASM-NEXT: 42 f2 00 00 movw r0, #8192 # DISASM-NEXT: c0 f2 40 00 movt r0, #64 -# DISASM-NEXT: ff f7 e7 ff bl #-50 +# DISASM-NEXT: ff f7 e7 ff bl 0x401000 <.text> # DISASM-NEXT: 84 46 mov r12, r0 # DISASM-NEXT: bd ec 10 0b vpop {d0, d1, d2, d3, d4, d5, d6, d7} # DISASM-NEXT: bd e8 0f 48 pop.w {r0, r1, r2, r3, r11, lr} diff --git a/lld/test/ELF/arm-bl-v6-inrange.s b/lld/test/ELF/arm-bl-v6-inrange.s index 7a0abf276ae7c..fe7cba55c5bdc 100644 --- a/lld/test/ELF/arm-bl-v6-inrange.s +++ b/lld/test/ELF/arm-bl-v6-inrange.s @@ -34,8 +34,8 @@ thumbfunc: // CHECK-NEXT: Disassembly of section .caller: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 500000: 00 f4 00 f8 bl #-4194304 -// CHECK-NEXT: 500004: ff f3 fe ef blx #4194300 +// CHECK-NEXT: 500000: 00 f4 00 f8 bl 0x100004 +// CHECK-NEXT: 500004: ff f3 fe ef blx 0x900004 // CHECK-NEXT: 500008: 70 47 bx lr .arm diff --git a/lld/test/ELF/arm-bl-v6.s b/lld/test/ELF/arm-bl-v6.s index e8b2668a74cdf..3d0c052fbc96b 100644 --- a/lld/test/ELF/arm-bl-v6.s +++ b/lld/test/ELF/arm-bl-v6.s @@ -28,7 +28,7 @@ _start: // CHECK-ARM1: Disassembly of section .text: // CHECK-ARM1-EMPTY: // CHECK-ARM1-NEXT: <_start>: -// CHECK-ARM1-NEXT: 21000: 00 00 00 fa blx #0 +// CHECK-ARM1-NEXT: 21000: 00 00 00 fa blx 0x21008 // CHECK-ARM1-NEXT: 21004: 1e ff 2f e1 bx lr .thumb .section .text.2, "ax", %progbits @@ -38,7 +38,7 @@ thumbfunc: bl farthumbfunc // CHECK-THUMB1: : -// CHECK-THUMB1-NEXT: 21008: 00 f2 00 e8 blx #2097152 +// CHECK-THUMB1-NEXT: 21008: 00 f2 00 e8 blx 0x22100c <__ARMv5ABSLongThunk_farthumbfunc> /// 6 Megabytes, enough to make farthumbfunc out of range of caller /// on a v6 Arm, but not on a v7 Arm. diff --git a/lld/test/ELF/arm-blx.s b/lld/test/ELF/arm-blx.s index 5be3faa4c947f..25e269113afb2 100644 --- a/lld/test/ELF/arm-blx.s +++ b/lld/test/ELF/arm-blx.s @@ -11,8 +11,8 @@ // RUN: ld.lld --script %t.script %t %tfar -o %t2 // RUN: llvm-objdump -d --triple=armv7a-none-linux-gnueabi %t2 | FileCheck %s -// Test BLX instruction is chosen for ARM BL/BLX instruction and Thumb callee -// Using two callees to ensure at least one has 2-byte alignment. +/// Test BLX instruction is chosen for ARM BL/BLX instruction and Thumb callee +/// Using two callees to ensure at least one has 2-byte alignment. .syntax unified .thumb .section .callee_low, "ax",%progbits @@ -48,7 +48,7 @@ _start: blx callee_high2 bl blx_far blx blx_far2 -// blx to ARM instruction should be written as a BL +/// blx to ARM instruction should be written as a BL bl callee_arm_low blx callee_arm_low bl callee_arm_high @@ -87,22 +87,22 @@ callee_arm_high: // CHECK: Disassembly of section .caller: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 10000: 2b c0 ff fa blx #-65364 -// CHECK-NEXT: 10004: 2a c0 ff fa blx #-65368 -// CHECK-NEXT: 10008: 29 c0 ff fb blx #-65370 -// CHECK-NEXT: 1000c: 28 c0 ff fb blx #-65374 -// CHECK-NEXT: 10010: 3a 00 00 fa blx #232 -// CHECK-NEXT: 10014: 39 00 00 fa blx #228 -// CHECK-NEXT: 10018: 38 00 00 fb blx #226 -// CHECK-NEXT: 1001c: 37 00 00 fb blx #222 -// 10020 + 1FFFFFC + 8 = 0x2010024 = blx_far -// CHECK-NEXT: 10020: ff ff 7f fa blx #33554428 -// 10024 + 1FFFFFC + 8 = 0x2010028 = blx_far2 -// CHECK-NEXT: 10024: ff ff 7f fa blx #33554428 -// CHECK-NEXT: 10028: 34 c0 ff eb bl #-65328 -// CHECK-NEXT: 1002c: 33 c0 ff eb bl #-65332 -// CHECK-NEXT: 10030: 72 00 00 eb bl #456 -// CHECK-NEXT: 10034: 71 00 00 eb bl #452 +// CHECK-NEXT: 10000: 2b c0 ff fa blx 0xb4 +// CHECK-NEXT: 10004: 2a c0 ff fa blx 0xb4 +// CHECK-NEXT: 10008: 29 c0 ff fb blx 0xb6 +// CHECK-NEXT: 1000c: 28 c0 ff fb blx 0xb6 +// CHECK-NEXT: 10010: 3a 00 00 fa blx 0x10100 +// CHECK-NEXT: 10014: 39 00 00 fa blx 0x10100 +// CHECK-NEXT: 10018: 38 00 00 fb blx 0x10102 +// CHECK-NEXT: 1001c: 37 00 00 fb blx 0x10102 +/// 0x2010024 = blx_far +// CHECK-NEXT: 10020: ff ff 7f fa blx 0x2010024 +/// 0x2010028 = blx_far2 +// CHECK-NEXT: 10024: ff ff 7f fa blx 0x2010028 +// CHECK-NEXT: 10028: 34 c0 ff eb bl 0x100 +// CHECK-NEXT: 1002c: 33 c0 ff eb bl 0x100 +// CHECK-NEXT: 10030: 72 00 00 eb bl 0x10200 +// CHECK-NEXT: 10034: 71 00 00 eb bl 0x10200 // CHECK-NEXT: 10038: 1e ff 2f e1 bx lr // CHECK: Disassembly of section .callee3: diff --git a/lld/test/ELF/arm-branch-rangethunk.s b/lld/test/ELF/arm-branch-rangethunk.s index 7a208006e9c56..28a5c615027c8 100644 --- a/lld/test/ELF/arm-branch-rangethunk.s +++ b/lld/test/ELF/arm-branch-rangethunk.s @@ -19,20 +19,23 @@ _start: beq too_far3 // SHORT: 00030000 <_start>: -// SHORT-NEXT: 30000: bl #4 <__ARMv7ABSLongThunk_too_far1> -// SHORT-NEXT: 30004: b #4 <__ARMv7ABSLongThunk_too_far2> -// SHORT-NEXT: 30008: beq #4 <__ARMv7ABSLongThunk_too_far3> +// SHORT-NEXT: 30000: bl 0x3000c <__ARMv7ABSLongThunk_too_far1> +// SHORT-NEXT: 30004: b 0x30010 <__ARMv7ABSLongThunk_too_far2> +// SHORT-NEXT: 30008: beq 0x30014 <__ARMv7ABSLongThunk_too_far3> // SHORT: 0003000c <__ARMv7ABSLongThunk_too_far1>: -// SHORT-NEXT: 3000c: b #33554420 <__ARMv7ABSLongThunk_too_far3+0x1fffff4> +/// 0x2030008 = too_far1 +// SHORT-NEXT: 3000c: b 0x2030008 // SHORT: 00030010 <__ARMv7ABSLongThunk_too_far2>: -// SHORT-NEXT: 30010: b #33554420 <__ARMv7ABSLongThunk_too_far3+0x1fffff8> +/// 0x203000c = too_far2 +// SHORT-NEXT: 30010: b 0x203000c // SHORT: 00030014 <__ARMv7ABSLongThunk_too_far3>: -// SHORT-NEXT: 30014: b #33554420 <__ARMv7ABSLongThunk_too_far3+0x1fffffc> +/// 0x2030010 = too_far3 +// SHORT-NEXT: 30014: b 0x2030010 // LONG: 00030000 <_start>: -// LONG-NEXT: 30000: bl #4 <__ARMv7ABSLongThunk_too_far1> -// LONG-NEXT: 30004: b #12 <__ARMv7ABSLongThunk_too_far2> -// LONG-NEXT: 30008: beq #20 <__ARMv7ABSLongThunk_too_far3> +// LONG-NEXT: 30000: bl 0x3000c <__ARMv7ABSLongThunk_too_far1> +// LONG-NEXT: 30004: b 0x30018 <__ARMv7ABSLongThunk_too_far2> +// LONG-NEXT: 30008: beq 0x30024 <__ARMv7ABSLongThunk_too_far3> // LONG: 0003000c <__ARMv7ABSLongThunk_too_far1>: // LONG-NEXT: 3000c: movw r12, #20 // LONG-NEXT: 30010: movt r12, #515 @@ -44,4 +47,4 @@ _start: // LONG: 00030024 <__ARMv7ABSLongThunk_too_far3>: // LONG-NEXT: 30024: movw r12, #44 // LONG-NEXT: 30028: movt r12, #515 -// LONG-NEXT: 3002c: bx r12 \ No newline at end of file +// LONG-NEXT: 3002c: bx r12 diff --git a/lld/test/ELF/arm-branch-undef-weak-plt-thunk.s b/lld/test/ELF/arm-branch-undef-weak-plt-thunk.s index 1f6acd2ca5216..ca71f45b4a2b7 100644 --- a/lld/test/ELF/arm-branch-undef-weak-plt-thunk.s +++ b/lld/test/ELF/arm-branch-undef-weak-plt-thunk.s @@ -24,8 +24,8 @@ _start: // CHECK: Disassembly of section .text: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 201e4: 00 00 00 ea b #0 <__ARMv7ABSLongThunk_undefined_weak_we_expect_a_plt_entry_for> -// CHECK-NEXT: 201e8: 02 00 00 eb bl #8 <__ARMv7ABSLongThunk_bar2> +// CHECK-NEXT: 201e4: 00 00 00 ea b 0x201ec <__ARMv7ABSLongThunk_undefined_weak_we_expect_a_plt_entry_for> +// CHECK-NEXT: 201e8: 02 00 00 eb bl 0x201f8 <__ARMv7ABSLongThunk_bar2> // CHECK: <__ARMv7ABSLongThunk_undefined_weak_we_expect_a_plt_entry_for>: // CHECK-NEXT: 201ec: 30 c2 00 e3 movw r12, #560 // CHECK-NEXT: 201f0: 02 c2 40 e3 movt r12, #514 diff --git a/lld/test/ELF/arm-branch.s b/lld/test/ELF/arm-branch.s index 4d3b6a5adec6c..411d6dc9fbee9 100644 --- a/lld/test/ELF/arm-branch.s +++ b/lld/test/ELF/arm-branch.s @@ -44,22 +44,14 @@ callee_high: bx lr // CHECK: 00010000 <_start>: -/// S(callee_low) = 0xb4; P = 0x10000; A = -8; S + A - P = -0xff54 = -65364 -// CHECK-NEXT: 10000: bl #-65364 -/// S(callee_low) = 0xb4; P = 0x10004; A = -8; S + A - P = -0xff58 = -65368 -// CHECK-NEXT: 10004: b #-65368 -/// S(callee_low) = 0xb4; P = 0x10008; A = -8; S + A - P = -0xff5c = -65372 -// CHECK-NEXT: 10008: beq #-65372 -/// S(callee_high) = 0x10028; P = 0x1000c; A = -8; S + A - P = 0x14 = 20 -// CHECK-NEXT: 1000c: bl #20 -/// S(callee_high) = 0x10028; P = 0x10010; A = -8; S + A - P = 0x10 = 16 -// CHECK-NEXT: 10010: b #16 -/// S(callee_high) = 0x10028; P = 0x10014; A = -8; S + A - P = 0x0c = 12 -// CHECK-NEXT: 10014: bne #12 -/// S(far) = 0x201001c; P = 0x10018; A = -8; S + A - P = 0x1fffffc = 33554428 -// CHECK-NEXT: 10018: bl #33554428 -/// S(far) = 0x201001c; P = 0x1001c; A = -8; S + A - P = 0x1fffff8 = 33554424 -// CHECK-NEXT: 1001c: b #33554424 -/// S(far) = 0x201001c; P = 0x10020; A = -8; S + A - P = 0x1fffff4 = 33554420 -// CHECK-NEXT: 10020: bgt #33554420 +// CHECK-NEXT: 10000: bl 0xb4 +// CHECK-NEXT: 10004: b 0xb4 +// CHECK-NEXT: 10008: beq 0xb4 +// CHECK-NEXT: 1000c: bl 0x10028 +// CHECK-NEXT: 10010: b 0x10028 +// CHECK-NEXT: 10014: bne 0x10028 +/// 0x201001c = far +// CHECK-NEXT: 10018: bl 0x201001c +// CHECK-NEXT: 1001c: b 0x201001c +// CHECK-NEXT: 10020: bgt 0x201001c // CHECK-NEXT: 10024: bx lr diff --git a/lld/test/ELF/arm-exidx-canunwind.s b/lld/test/ELF/arm-exidx-canunwind.s index 0c87b828cd818..e219c31d47f88 100644 --- a/lld/test/ELF/arm-exidx-canunwind.s +++ b/lld/test/ELF/arm-exidx-canunwind.s @@ -55,8 +55,8 @@ _start: // CHECK: Disassembly of section .text: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 20108: bl #4 -// CHECK-NEXT: bl #4 +// CHECK-NEXT: 20108: bl 0x20114 +// CHECK-NEXT: bl 0x20118 // CHECK-NEXT: bx lr // CHECK: : // CHECK-NEXT: 20114: bx lr diff --git a/lld/test/ELF/arm-exidx-gc.s b/lld/test/ELF/arm-exidx-gc.s index 4b04b246827cd..5c172b1f1392b 100644 --- a/lld/test/ELF/arm-exidx-gc.s +++ b/lld/test/ELF/arm-exidx-gc.s @@ -93,8 +93,8 @@ _start: // CHECK: Disassembly of section .text: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 2010c: bl #4 -// CHECK-NEXT: 20110: bl #4 +// CHECK-NEXT: 2010c: bl 0x20118 +// CHECK-NEXT: 20110: bl 0x2011c // CHECK-NEXT: 20114: bx lr // CHECK: : // CHECK-NEXT: 20118: bx lr diff --git a/lld/test/ELF/arm-extreme-range-pi-thunk.s b/lld/test/ELF/arm-extreme-range-pi-thunk.s index cba81ef30510f..e6486331c5b98 100644 --- a/lld/test/ELF/arm-extreme-range-pi-thunk.s +++ b/lld/test/ELF/arm-extreme-range-pi-thunk.s @@ -34,7 +34,7 @@ high: // CHECK: Disassembly of section .text_low: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 130: bl #0 <__ARMV7PILongThunk_high> +// CHECK-NEXT: 130: bl 0x138 <__ARMV7PILongThunk_high> // CHECK-NEXT: 134: bx lr // CHECK: <__ARMV7PILongThunk_high>: @@ -47,7 +47,7 @@ high: // CHECK: Disassembly of section .text_high: // CHECK-EMPTY: // CHECK-NEXT: : -// CHECK-NEXT: f0000000: bl #0 <__ARMV7PILongThunk__start> +// CHECK-NEXT: f0000000: bl 0xf0000008 <__ARMV7PILongThunk__start> // CHECK-NEXT: f0000004: bx lr // CHECK: <__ARMV7PILongThunk__start>: @@ -61,9 +61,8 @@ high: // CHECK-THUMB: Disassembly of section .text_low: // CHECK-THUMB-EMPTY: // CHECK-THUMB-NEXT: <_start>: -// CHECK-THUMB-NEXT: 130: bl #4 +// CHECK-THUMB-NEXT: 130: bl 0x138 <__ThumbV7PILongThunk_high> // CHECK-THUMB-NEXT: 134: bx lr -// CHECK-THUMB-NEXT: 136: bmi #-88 // CHECK-THUMB: <__ThumbV7PILongThunk_high>: // CHECK-THUMB-NEXT: 138: movw r12, #65213 @@ -75,7 +74,7 @@ high: // CHECK-THUMB: Disassembly of section .text_high: // CHECK-THUMB-EMPTY: // CHECK-THUMB-NEXT: : -// CHECK-THUMB-NEXT: f0000000: bl #4 +// CHECK-THUMB-NEXT: f0000000: bl 0xf0000008 <__ThumbV7PILongThunk__start> // CHECK-THUMB-NEXT: f0000004: bx lr // CHECK-THUMB: <__ThumbV7PILongThunk__start>: diff --git a/lld/test/ELF/arm-fix-cortex-a8-blx.s b/lld/test/ELF/arm-fix-cortex-a8-blx.s index 636094fd798bb..6000f0f3dfcbf 100644 --- a/lld/test/ELF/arm-fix-cortex-a8-blx.s +++ b/lld/test/ELF/arm-fix-cortex-a8-blx.s @@ -28,6 +28,6 @@ _start: .inst.n 0xe800 // CHECK-PATCH: 21ffa: nop.w -// CHECK-PATCH-NEXT: 21ffe: blx #4 +// CHECK-PATCH-NEXT: 21ffe: blx 0x22004 <__CortexA8657417_21FFE> // CHECK-PATCH: 00022004 <__CortexA8657417_21FFE>: -// CHECK-PATCH-NEXT: 22004: b #-4104 +// CHECK-PATCH-NEXT: 22004: b 0x21004 <{{.+}}> @ imm = #-4104 diff --git a/lld/test/ELF/arm-fix-cortex-a8-nopatch.s b/lld/test/ELF/arm-fix-cortex-a8-nopatch.s index 2f9e1962a868b..0dad04779f0ed 100644 --- a/lld/test/ELF/arm-fix-cortex-a8-nopatch.s +++ b/lld/test/ELF/arm-fix-cortex-a8-nopatch.s @@ -28,8 +28,8 @@ target: b.w target // CALLSITE1: 00021ffa : -// CALLSITE1-NEXT: 21ffa: b.w #-4 -// CALLSITE1-NEXT: 21ffe: b.w #-8 +// CALLSITE1-NEXT: 21ffa: b.w 0x21ffa +// CALLSITE1-NEXT: 21ffe: b.w 0x21ffa .space 4088 .type target2, %function @@ -43,7 +43,7 @@ target2: // CALLSITE2: 00022ffa : // CALLSITE2-NEXT: 22ffa: nop // CALLSITE2-NEXT: 22ffc: nop -// CALLSITE2-NEXT: 22ffe: bl #-8 +// CALLSITE2-NEXT: 22ffe: bl 0x22ffa .space 4088 .type target3, %function @@ -56,7 +56,7 @@ target3: // CALLSITE3: 00023ffa : // CALLSITE3-NEXT: 23ffa: nop.w -// CALLSITE3-NEXT: 23ffe: beq.w #-4104 +// CALLSITE3-NEXT: 23ffe: beq.w 0x22ffa .space 4088 .type source4, %function @@ -71,7 +71,7 @@ target4: // CALLSITE4: 00024ffa : // CALLSITE4-NEXT: 24ffa: nop.w -// CALLSITE4-NEXT: 24ffe: beq.w #0 +// CALLSITE4-NEXT: 24ffe: beq.w 0x25002 // CALLSITE4: 00025002 : // CALLSITE4-NEXT: 25002: nop.w @@ -90,7 +90,7 @@ source5: beq.w target5 // CALLSITE5: 00025ffe : -// CALLSITE5-NEXT: 25ffe: beq.w #-8 +// CALLSITE5-NEXT: 25ffe: beq.w 0x25ffa /// Edge case where two word sequence starts at offset 0xffc, check that /// we don't match. In this case the branch will be completely in the 2nd @@ -105,7 +105,7 @@ target6: bl target6 // CALLSITE6: 00027000 : -// CALLSITE6-NEXT: 27000: bl #-4 +// CALLSITE6-NEXT: 27000: bl 0x27000 /// Edge case where two word sequence starts at offset 0xffe, check that /// we don't match. In this case the branch will be completely in the 2nd @@ -120,4 +120,4 @@ target7: bl target7 // CALLSITE7: 00028002 : -// CALLSITE7: 28002: bl #-4 +// CALLSITE7: 28002: bl 0x28002 diff --git a/lld/test/ELF/arm-fix-cortex-a8-plt.s b/lld/test/ELF/arm-fix-cortex-a8-plt.s index 183aa6f1ce513..8599b398b5263 100644 --- a/lld/test/ELF/arm-fix-cortex-a8-plt.s +++ b/lld/test/ELF/arm-fix-cortex-a8-plt.s @@ -34,6 +34,6 @@ source: // CHECK: 00002ffa : // CHECK-NEXT: 2ffa: nop.w -// CHECK-NEXT: 2ffe: blx #4 +// CHECK-NEXT: 2ffe: blx 0x3004 <__CortexA8657417_2FFE> // CHECK: 00003004 <__CortexA8657417_2FFE>: -// CHECK-NEXT: 3004: b #-4076 +// CHECK-NEXT: 3004: b 0x2020 diff --git a/lld/test/ELF/arm-fix-cortex-a8-recognize.s b/lld/test/ELF/arm-fix-cortex-a8-recognize.s index 7b1f08b16a72d..f5ffe49e77ac6 100644 --- a/lld/test/ELF/arm-fix-cortex-a8-recognize.s +++ b/lld/test/ELF/arm-fix-cortex-a8-recognize.s @@ -55,11 +55,11 @@ target: // CALLSITE1: 00021ffa : // CALLSITE1-NEXT: 21ffa: nop.w -// CALLSITE1-NEXT: 21ffe: b.w #28674 +// CALLSITE1-NEXT: 21ffe: b.w 0x29004 <__CortexA8657417_21FFE> /// Expect no patch when doing a relocatable link ld -r. // CHECK-RELOCATABLE: 00000ffa : // CHECK-RELOCATABLE-NEXT: ffa: nop.w -// CHECK-RELOCATABLE-NEXT: ffe: b.w #-4 +// CHECK-RELOCATABLE-NEXT: ffe: b.w {{.+}} @ imm = #-4 .space 4088 .type target2, %function @@ -72,7 +72,7 @@ target2: // CALLSITE2: 00022ffa : // CALLSITE2-NEXT: 22ffa: nop.w -// CALLSITE2-NEXT: 22ffe: bl #24582 +// CALLSITE2-NEXT: 22ffe: bl 0x29008 <__CortexA8657417_22FFE> .space 4088 .type target3, %function @@ -85,7 +85,7 @@ target3: // CALLSITE3: 00023ffa : // CALLSITE3-NEXT: 23ffa: nop.w -// CALLSITE3-NEXT: 23ffe: beq.w #20490 +// CALLSITE3-NEXT: 23ffe: beq.w 0x2900c <__CortexA8657417_23FFE> .space 4082 .type target4, %function @@ -106,7 +106,7 @@ target4: // CALLSITE4-NEXT: 24ff4: bx lr // CALLSITE4: 24ff8: 00 00 .short 0x0000 // CALLSITE4: 24ffa: nop.w -// CALLSITE4-NEXT: 24ffe: blx #16400 +// CALLSITE4-NEXT: 24ffe: blx 0x29010 <__CortexA8657417_24FFE> /// Separate sections for source and destination of branches to force /// a relocation. @@ -126,7 +126,7 @@ target5: /// Target = 0x19014 __CortexA8657417_16FFE // CALLSITE5: 25ffa: nop.w -// CALLSITE5-NEXT: 25ffe: b.w #12306 +// CALLSITE5-NEXT: 25ffe: b.w 0x29014 <__CortexA8657417_25FFE> .section .text.2, "ax", %progbits .balign 2 @@ -144,7 +144,7 @@ target6: /// Target = 0x19018 __CortexA8657417_17FFE // CALLSITE6: 26ffa: nop.w -// CALLSITE6-NEXT: 26ffe: bl #8214 +// CALLSITE6-NEXT: 26ffe: bl 0x29018 <__CortexA8657417_26FFE> .section .text.4, "ax", %progbits .global target7 @@ -160,7 +160,7 @@ target7: bne.w target7 // CALLSITE7: 27ffa: nop.w -// CALLSITE7-NEXT: 27ffe: bne.w #4122 +// CALLSITE7-NEXT: 27ffe: bne.w 0x2901c <__CortexA8657417_27FFE> .section .text.6, "ax", %progbits .space 4082 @@ -184,28 +184,28 @@ target8: // CALLSITE8-NEXT: 28ff4: bx lr // CALLSITE8: 28ff8: 00 00 .short 0x0000 // CALLSITE8: 28ffa: nop.w -// CALLSITE8-NEXT: 28ffe: blx #32 +// CALLSITE8-NEXT: 28ffe: blx 0x29020 <__CortexA8657417_28FFE> // CHECK-PATCHES: 00029004 <__CortexA8657417_21FFE>: -// CHECK-PATCHES-NEXT: 29004: b.w #-28686 +// CHECK-PATCHES-NEXT: 29004: b.w 0x21ffa // CHECK-PATCHES: 00029008 <__CortexA8657417_22FFE>: -// CHECK-PATCHES-NEXT: 29008: b.w #-24594 +// CHECK-PATCHES-NEXT: 29008: b.w 0x22ffa // CHECK-PATCHES: 0002900c <__CortexA8657417_23FFE>: -// CHECK-PATCHES-NEXT: 2900c: b.w #-20502 +// CHECK-PATCHES-NEXT: 2900c: b.w 0x23ffa // CHECK-PATCHES: 00029010 <__CortexA8657417_24FFE>: -// CHECK-PATCHES-NEXT: 29010: b #-16420 +// CHECK-PATCHES-NEXT: 29010: b 0x24ff4 // CHECK-PATCHES: 00029014 <__CortexA8657417_25FFE>: -// CHECK-PATCHES-NEXT: 29014: b.w #-16406 +// CHECK-PATCHES-NEXT: 29014: b.w 0x25002 // CHECK-PATCHES: 00029018 <__CortexA8657417_26FFE>: -// CHECK-PATCHES-NEXT: 29018: b.w #-12314 +// CHECK-PATCHES-NEXT: 29018: b.w 0x26002 // CHECK-PATCHES: 0002901c <__CortexA8657417_27FFE>: -// CHECK-PATCHES-NEXT: 2901c: b.w #-8222 +// CHECK-PATCHES-NEXT: 2901c: b.w 0x27002 // CHECK-PATCHES: 00029020 <__CortexA8657417_28FFE>: -// CHECK-PATCHES-NEXT: 29020: b #-52 +// CHECK-PATCHES-NEXT: 29020: b 0x28ff4 diff --git a/lld/test/ELF/arm-fix-cortex-a8-thunk-align.s b/lld/test/ELF/arm-fix-cortex-a8-thunk-align.s index 183b397733e3d..61f615433f8d8 100644 --- a/lld/test/ELF/arm-fix-cortex-a8-thunk-align.s +++ b/lld/test/ELF/arm-fix-cortex-a8-thunk-align.s @@ -28,7 +28,7 @@ thumb_target: // CHECK-NEXT: add r12, pc // CHECK-NEXT: bx r12 // CHECK: 00013004 <__CortexA8657417_11FFE>: -// CHECK-NEXT: 13004: b.w #-8196 +// CHECK-NEXT: 13004: b.w 0x11004 .section .text.02 /// Take us over thunk section spacing .space 16 * 1024 * 1024 diff --git a/lld/test/ELF/arm-fix-cortex-a8-thunk.s b/lld/test/ELF/arm-fix-cortex-a8-thunk.s index c5d77e078b06c..ad493a81ceff1 100644 --- a/lld/test/ELF/arm-fix-cortex-a8-thunk.s +++ b/lld/test/ELF/arm-fix-cortex-a8-thunk.s @@ -45,19 +45,19 @@ target: /// Expect erratum patch inserted here // CHECK: 00110ffa : // CHECK-NEXT: 110ffa: nop.w -// CHECK-NEXT: bl #2 +// CHECK-NEXT: bl 0x111004 <__CortexA8657417_110FFE> // CHECK: 00111004 <__CortexA8657417_110FFE>: -// CHECK-NEXT: 111004: b.w #-14 +// CHECK-NEXT: 111004: b.w 0x110ffa /// Expect range extension thunk here. // CHECK: 00111008 <__ThumbV7PILongThunk_early>: -// CHECK-NEXT: 111008: b.w #-1048582 +// CHECK-NEXT: 111008: b.w 0x11006 .section .text.04, "ax", %progbits /// The erratum patch will push this branch out of range, so another /// range extension thunk will be needed. beq.w early -// CHECK: 11100c: beq.w #-8 +// CHECK: 11100c: beq.w 0x111008 <__ThumbV7PILongThunk_early> .section .text.05, "ax", %progbits .arm diff --git a/lld/test/ELF/arm-force-pi-thunk.s b/lld/test/ELF/arm-force-pi-thunk.s index 582d1e4babdae..caff2e69fb6aa 100644 --- a/lld/test/ELF/arm-force-pi-thunk.s +++ b/lld/test/ELF/arm-force-pi-thunk.s @@ -35,9 +35,9 @@ low_target2: // CHECK-NEXT: <_start>: // CHECK-NEXT: 94: 70 47 bx lr // CHECK: : -// CHECK-NEXT: 96: 00 f0 03 f8 bl #6 -// CHECK-NEXT: 9a: 00 f0 07 f8 bl #14 -// CHECK-NEXT: 9e: d4 d4 bmi #-88 +// CHECK-NEXT: 96: 00 f0 03 f8 bl 0xa0 <__ThumbV7PILongThunk_high_target> +// CHECK-NEXT: 9a: 00 f0 07 f8 bl 0xac <__ThumbV7PILongThunk_high_target2> +// CHECK-NEXT: 9e: d4 d4 // CHECK: <__ThumbV7PILongThunk_high_target>: // CHECK-NEXT: a0: 4f f6 55 7c movw r12, #65365 // CHECK-NEXT: a4: c0 f2 ff 1c movt r12, #511 @@ -49,8 +49,8 @@ low_target2: // CHECK-NEXT: b4: fc 44 add r12, pc // CHECK-NEXT: b6: 60 47 bx r12 // CHECK: : -// CHECK-NEXT: b8: ff f7 f2 ff bl #-28 -// CHECK-NEXT: bc: ff f7 f6 ff bl #-20 +// CHECK-NEXT: b8: ff f7 f2 ff bl 0xa0 <__ThumbV7PILongThunk_high_target> +// CHECK-NEXT: bc: ff f7 f6 ff bl 0xac <__ThumbV7PILongThunk_high_target2> .section .text_high, "ax", %progbits @@ -72,8 +72,8 @@ high_target2: // CHECK: Disassembly of section .text_high: // CHECK-EMPTY: // CHECK-NEXT: : -// CHECK-NEXT: 2000000: 00 f0 02 f8 bl #4 -// CHECK-NEXT: 2000004: 00 f0 06 f8 bl #12 +// CHECK-NEXT: 2000000: 00 f0 02 f8 bl 0x2000008 <__ThumbV7PILongThunk_low_target> +// CHECK-NEXT: 2000004: 00 f0 06 f8 bl 0x2000014 <__ThumbV7PILongThunk_low_target2> // CHECK: <__ThumbV7PILongThunk_low_target>: // CHECK-NEXT: 2000008: 40 f2 83 0c movw r12, #131 // CHECK-NEXT: 200000c: cf f6 00 6c movt r12, #65024 @@ -85,5 +85,5 @@ high_target2: // CHECK-NEXT: 200001c: fc 44 add r12, pc // CHECK-NEXT: 200001e: 60 47 bx r12 // CHECK: : -// CHECK-NEXT: 2000020: ff f7 f2 ff bl #-28 -// CHECK-NEXT: 2000024: ff f7 f6 ff bl #-20 +// CHECK-NEXT: 2000020: ff f7 f2 ff bl 0x2000008 <__ThumbV7PILongThunk_low_target> +// CHECK-NEXT: 2000024: ff f7 f6 ff bl 0x2000014 <__ThumbV7PILongThunk_low_target2> diff --git a/lld/test/ELF/arm-gnu-ifunc-plt.s b/lld/test/ELF/arm-gnu-ifunc-plt.s index ca986f6c54e02..a1e09712e4b3b 100644 --- a/lld/test/ELF/arm-gnu-ifunc-plt.s +++ b/lld/test/ELF/arm-gnu-ifunc-plt.s @@ -35,13 +35,13 @@ // DISASM: : // DISASM-NEXT: 201e0: bx lr // DISASM: <_start>: -// DISASM-NEXT: 201e4: bl #84 -// DISASM-NEXT: 201e8: bl #96 +// DISASM-NEXT: 201e4: bl 0x20240 +// DISASM-NEXT: 201e8: bl 0x20250 // DISASM: <$d.1>: // DISASM-NEXT: 201ec: 00 00 00 00 .word 0x00000000 // DISASM-NEXT: 201f0: 04 00 00 00 .word 0x00000004 -// DISASM: 201f4: bl #36 -// DISASM-NEXT: 201f8: bl #48 +// DISASM: 201f4: bl 0x20220 +// DISASM-NEXT: 201f8: bl 0x20230 // DISASM-EMPTY: // DISASM-NEXT: Disassembly of section .plt: // DISASM-EMPTY: diff --git a/lld/test/ELF/arm-gnu-ifunc.s b/lld/test/ELF/arm-gnu-ifunc.s index de7a196ea5daa..bbd932ad7b442 100644 --- a/lld/test/ELF/arm-gnu-ifunc.s +++ b/lld/test/ELF/arm-gnu-ifunc.s @@ -118,8 +118,8 @@ _start: // DISASM: : // DISASM-NEXT: 20108: bx lr // DISASM: <_start>: -// DISASM-NEXT: 2010c: bl #28 -// DISASM-NEXT: 20110: bl #40 +// DISASM-NEXT: 2010c: bl 0x20130 +// DISASM-NEXT: 20110: bl 0x20140 // 1 * 65536 + 244 = 0x100f4 __rel_iplt_start // DISASM-NEXT: 20114: movw r0, #244 // DISASM-NEXT: 20118: movt r0, #1 @@ -141,4 +141,3 @@ _start: // DISASM-NEXT: 20148: ldr pc, [r12, #12]! // DISASM: <$d>: // DISASM-NEXT: 2014c: d4 d4 d4 d4 .word 0xd4d4d4d4 - diff --git a/lld/test/ELF/arm-long-thunk-converge.s b/lld/test/ELF/arm-long-thunk-converge.s index f88a16c3d6f5c..19daeca276dfd 100644 --- a/lld/test/ELF/arm-long-thunk-converge.s +++ b/lld/test/ELF/arm-long-thunk-converge.s @@ -10,7 +10,7 @@ // CHECK1-NEXT: 4: 00 c2 40 e3 movt r12, #512 // CHECK1-NEXT: 8: 1c ff 2f e1 bx r12 // CHECK1: : -// CHECK1-NEXT: c: fb ff ff eb bl #-20 +// CHECK1-NEXT: c: fb ff ff eb bl 0x0 <__ARMv7ABSLongThunk_bar> .section .foo,"ax",%progbits,unique,1 foo: @@ -21,7 +21,7 @@ bl bar // CHECK2-NEXT: 2000004: 00 c0 40 e3 movt r12, #0 // CHECK2-NEXT: 2000008: 1c ff 2f e1 bx r12 // CHECK2: : -// CHECK2-NEXT: 200000c: fb ff ff eb bl #-20 <__ARMv7ABSLongThunk_foo> +// CHECK2-NEXT: 200000c: fb ff ff eb bl 0x2000000 <__ARMv7ABSLongThunk_foo> .section .bar,"ax",%progbits,unique,1 bar: diff --git a/lld/test/ELF/arm-plt-reloc.s b/lld/test/ELF/arm-plt-reloc.s index 17d2be5d2fb55..d5bcf682bb173 100644 --- a/lld/test/ELF/arm-plt-reloc.s +++ b/lld/test/ELF/arm-plt-reloc.s @@ -28,9 +28,9 @@ _start: // CHECK: : // CHECK-NEXT: 200bc: bx lr // CHECK: <_start>: -// CHECK-NEXT: 200c0: b #-20 -// CHECK-NEXT: 200c4: bl #-20 -// CHECK-NEXT: 200c8: beq #-20 +// CHECK-NEXT: 200c0: b 0x200b4 +// CHECK-NEXT: 200c4: bl 0x200b8 +// CHECK-NEXT: 200c8: beq 0x200bc // Expect PLT entries as symbols can be preempted // The .got.plt and .plt displacement is small so we can use small PLT entries. @@ -43,20 +43,17 @@ _start: // DSO: : // DSO-NEXT: 1021c: bx lr // DSO: <_start>: -// S(0x10214) - P(0x10220) + A(-8) = 0x2c = 32 -// DSO-NEXT: 10220: b #40 -// S(0x10218) - P(0x10224) + A(-8) = 0x38 = 56 -// DSO-NEXT: 10224: bl #52 -// S(0x1021c) - P(0x10228) + A(-8) = 0x44 = 68 -// DSO-NEXT: 10228: beq #64 +// DSO-NEXT: 10220: b 0x10250 +// DSO-NEXT: 10224: bl 0x10260 +// DSO-NEXT: 10228: beq 0x10270 // DSO-EMPTY: // DSO-NEXT: Disassembly of section .plt: // DSO-EMPTY: // DSO-NEXT: <$a>: // DSO-NEXT: 10230: str lr, [sp, #-4]! -// (0x10234 + 8) + (0 RoR 12) + 8192 + 164 = 0x32e0 = .got.plt[2] +// (0x10234 + 8) + (0 RoR 12) + (32 RoR 20 = 0x20000) + 164 = 0x302e0 = .got.plt[2] // DSO-NEXT: 10234: add lr, pc, #0, #12 -// DSO-NEXT: 10238: add lr, lr, #32 +// DSO-NEXT: 10238: add lr, lr, #32, #20 // DSO-NEXT: 1023c: ldr pc, [lr, #164]! // DSO: <$d>: // DSO-NEXT: 10240: d4 d4 d4 d4 .word 0xd4d4d4d4 @@ -64,23 +61,23 @@ _start: // DSO-NEXT: 10248: d4 d4 d4 d4 .word 0xd4d4d4d4 // DSO-NEXT: 1024c: d4 d4 d4 d4 .word 0xd4d4d4d4 // DSO: <$a>: -// (0x10250 + 8) + (0 RoR 12) + 8192 + 140 = 0x32e4 +// (0x10250 + 8) + (0 RoR 12) + (32 RoR 20 = 0x20000) + 140 = 0x302e4 // DSO-NEXT: 10250: add r12, pc, #0, #12 -// DSO-NEXT: 10254: add r12, r12, #32 +// DSO-NEXT: 10254: add r12, r12, #32, #20 // DSO-NEXT: 10258: ldr pc, [r12, #140]! // DSO: <$d>: // DSO-NEXT: 1025c: d4 d4 d4 d4 .word 0xd4d4d4d4 // DSO: <$a>: -// (0x10260 + 8) + (0 RoR 12) + 8192 + 128 = 0x32e8 +// (0x10260 + 8) + (0 RoR 12) + (32 RoR 20 = 0x20000) + 128 = 0x302e8 // DSO-NEXT: 10260: add r12, pc, #0, #12 -// DSO-NEXT: 10264: add r12, r12, #32 +// DSO-NEXT: 10264: add r12, r12, #32, #20 // DSO-NEXT: 10268: ldr pc, [r12, #128]! // DSO: <$d>: // DSO-NEXT: 1026c: d4 d4 d4 d4 .word 0xd4d4d4d4 // DSO: <$a>: -// (0x10270 + 8) + (0 RoR 12) + 8192 + 116 = 0x32ec +// (0x10270 + 8) + (0 RoR 12) + (32 RoR 20 = 0x20000) + 116 = 0x302ec // DSO-NEXT: 10270: add r12, pc, #0, #12 -// DSO-NEXT: 10274: add r12, r12, #32 +// DSO-NEXT: 10274: add r12, r12, #32, #20 // DSO-NEXT: 10278: ldr pc, [r12, #116]! // DSO: <$d>: // DSO-NEXT: 1027c: d4 d4 d4 d4 .word 0xd4d4d4d4 @@ -126,9 +123,9 @@ _start: // CHECKHIGH: : // CHECKHIGH-NEXT: 1008: bx lr // CHECKHIGH: <_start>: -// CHECKHIGH-NEXT: 100c: b #4108 <$a> -// CHECKHIGH-NEXT: 1010: bl #4120 <$a> -// CHECKHIGH-NEXT: 1014: beq #4132 <$a> +// CHECKHIGH-NEXT: 100c: b 0x2020 +// CHECKHIGH-NEXT: 1010: bl 0x2030 +// CHECKHIGH-NEXT: 1014: beq 0x2040 // CHECKHIGH-EMPTY: // CHECKHIGH-NEXT: Disassembly of section .plt: // CHECKHIGH-EMPTY: @@ -194,9 +191,9 @@ _start: // CHECKLONG: : // CHECKLONG-NEXT: 1008: bx lr // CHECKLONG: <_start>: -// CHECKLONG-NEXT: 100c: b #4108 <$a> -// CHECKLONG-NEXT: 1010: bl #4120 <$a> -// CHECKLONG-NEXT: 1014: beq #4132 <$a> +// CHECKLONG-NEXT: 100c: b 0x2020 +// CHECKLONG-NEXT: 1010: bl 0x2030 +// CHECKLONG-NEXT: 1014: beq 0x2040 // CHECKLONG-EMPTY: // CHECKLONG-NEXT: Disassembly of section .plt: // CHECKLONG-EMPTY: @@ -263,9 +260,9 @@ _start: // CHECKMIX: : // CHECKMIX-NEXT: 1008: bx lr // CHECKMIX: <_start>: -// CHECKMIX-NEXT: 100c: b #4108 <$a> -// CHECKMIX-NEXT: 1010: bl #4120 <$a> -// CHECKMIX-NEXT: 1014: beq #4132 <$a> +// CHECKMIX-NEXT: 100c: b 0x2020 +// CHECKMIX-NEXT: 1010: bl 0x2030 +// CHECKMIX-NEXT: 1014: beq 0x2040 // CHECKMIX-EMPTY: // CHECKMIX-NEXT: Disassembly of section .plt: // CHECKMIX-EMPTY: diff --git a/lld/test/ELF/arm-thumb-branch.s b/lld/test/ELF/arm-thumb-branch.s index e0396b30db5f8..e440fd24fffc0 100644 --- a/lld/test/ELF/arm-thumb-branch.s +++ b/lld/test/ELF/arm-thumb-branch.s @@ -47,15 +47,17 @@ callee_high: // CHECK-NEXT: Disassembly of section .caller: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 10000: f0 f7 58 f8 bl #-65360 -// CHECK-NEXT: 10004: f0 f7 56 b8 b.w #-65364 -// CHECK-NEXT: 10008: 30 f4 54 a8 beq.w #-65368 -// CHECK-NEXT: 1000c: 00 f0 0c f8 bl #24 -// CHECK-NEXT: 10010: 00 f0 0a b8 b.w #20 -// CHECK-NEXT: 10014: 40 f0 08 80 bne.w #16 -// CHECK-NEXT: 10018: ff f3 ff d7 bl #16777214 -// CHECK-NEXT: 1001c: ff f3 fd 97 b.w #16777210 -// CHECK-NEXT: 10020: 3f f3 ff af bgt.w #1048574 +// CHECK-NEXT: 10000: f0 f7 58 f8 bl 0xb4 +// CHECK-NEXT: 10004: f0 f7 56 b8 b.w 0xb4 +// CHECK-NEXT: 10008: 30 f4 54 a8 beq.w 0xb4 +// CHECK-NEXT: 1000c: 00 f0 0c f8 bl 0x10028 +// CHECK-NEXT: 10010: 00 f0 0a b8 b.w 0x10028 +// CHECK-NEXT: 10014: 40 f0 08 80 bne.w 0x10028 +/// far_uncond = 0x101001b +// CHECK-NEXT: 10018: ff f3 ff d7 bl 0x101001a +// CHECK-NEXT: 1001c: ff f3 fd 97 b.w 0x101001a +/// far_cond = 0x110023 +// CHECK-NEXT: 10020: 3f f3 ff af bgt.w 0x110022 // CHECK-NEXT: 10024: 70 47 bx lr // CHECK-NEXT: 10026: // CHECK-EMPTY: diff --git a/lld/test/ELF/arm-thumb-condbranch-thunk.s b/lld/test/ELF/arm-thumb-condbranch-thunk.s index 3a20f4e6af255..bd69e7a8601b4 100644 --- a/lld/test/ELF/arm-thumb-condbranch-thunk.s +++ b/lld/test/ELF/arm-thumb-condbranch-thunk.s @@ -37,11 +37,11 @@ _start: // CHECK1-EMPTY: // CHECK1-NEXT: : // CHECK1-NEXT: 80000: 70 47 bx lr -// CHECK1-NEXT: 80002: 7f f3 ff d7 bl #0xf7fffe +// CHECK1-NEXT: 80002: 7f f3 ff d7 bl 0x1000004 <__Thumbv7ABSLongThunk_tfunc33> // CHECK1: <__Thumbv7ABSLongThunk_tfunc05>: -// CHECK1-NEXT: 80008: 7f f2 fa bf b.w #0x27fff4 +// CHECK1-NEXT: 80008: 7f f2 fa bf b.w 0x300000 // CHECK1: <__Thumbv7ABSLongThunk_tfunc00>: -// CHECK1-NEXT: 8000c: ff f7 f8 bf b.w #-0x10 +// CHECK1-NEXT: 8000c: ff f7 f8 bf b.w 0x80000 FUNCTION 01 // tfunc02 is within range of tfunc02 beq.w tfunc02 @@ -50,15 +50,15 @@ _start: bne.w tfunc05 // CHECK2: : // CHECK2-NEXT: 100000: 70 47 bx lr -// CHECK2-NEXT: 100002: 3f f0 fd a7 beq.w #0x7fffa -// CHECK2-NEXT: 100006: 7f f4 ff a7 bne.w #-0x80002 <__Thumbv7ABSLongThunk_tfunc05> +// CHECK2-NEXT: 100002: 3f f0 fd a7 beq.w 0x180000 +// CHECK2-NEXT: 100006: 7f f4 ff a7 bne.w 0x80008 <__Thumbv7ABSLongThunk_tfunc05> FUNCTION 02 // We can reach the Thunk Section created for bne.w tfunc05 bne.w tfunc05 beq.w tfunc00 // CHECK3: 180000: 70 47 bx lr -// CHECK3-NEXT: 180002: 40 f4 01 80 bne.w #-0xffffe <__Thumbv7ABSLongThunk_tfunc05> -// CHECK3-NEXT: 180006: 00 f4 01 80 beq.w #-0xffffe <__Thumbv7ABSLongThunk_tfunc00> +// CHECK3-NEXT: 180002: 40 f4 01 80 bne.w 0x80008 <__Thumbv7ABSLongThunk_tfunc05> +// CHECK3-NEXT: 180006: 00 f4 01 80 beq.w 0x8000c <__Thumbv7ABSLongThunk_tfunc00> FUNCTION 03 FUNCTION 04 FUNCTION 05 @@ -67,13 +67,13 @@ _start: FUNCTION 08 FUNCTION 09 // CHECK4: <__Thumbv7ABSLongThunk_tfunc03>: -// CHECK4-NEXT: 500004: ff f4 fc bf b.w #-0x300008 +// CHECK4-NEXT: 500004: ff f4 fc bf b.w 0x200000 FUNCTION 10 // We can't reach any Thunk Section, create a new one beq.w tfunc03 // CHECK5: : // CHECK5-NEXT: 580000: 70 47 bx lr -// CHECK5-NEXT: 580002: 3f f4 ff a7 beq.w #-0x80002 <__Thumbv7ABSLongThunk_tfunc03> +// CHECK5-NEXT: 580002: 3f f4 ff a7 beq.w 0x500004 <__Thumbv7ABSLongThunk_tfunc03> FUNCTION 11 FUNCTION 12 FUNCTION 13 @@ -96,13 +96,13 @@ _start: FUNCTION 30 FUNCTION 31 // CHECK6: <__Thumbv7ABSLongThunk_tfunc33>: -// CHECK6-NEXT: 1000004: ff f0 fc bf b.w #0xffff8 +// CHECK6-NEXT: 1000004: ff f0 fc bf b.w 0x1100000 // CHECK6: <__Thumbv7ABSLongThunk_tfunc00>: -// CHECK6-NEXT: 1000008: 7f f4 fa 97 b.w #-0xf8000c +// CHECK6-NEXT: 1000008: 7f f4 fa 97 b.w 0x80000 FUNCTION 32 FUNCTION 33 // We should be able to reach an existing ThunkSection. b.w tfunc00 // CHECK7: : // CHECK7-NEXT: 1100000: 70 47 bx lr -// CHECK7-NEXT: 1100002: 00 f7 01 b8 b.w #-0xffffe <__Thumbv7ABSLongThunk_tfunc00> +// CHECK7-NEXT: 1100002: 00 f7 01 b8 b.w 0x1000008 <__Thumbv7ABSLongThunk_tfunc00> diff --git a/lld/test/ELF/arm-thumb-interwork-abs.s b/lld/test/ELF/arm-thumb-interwork-abs.s index 6dd2b87e5690b..8010cc3625b24 100644 --- a/lld/test/ELF/arm-thumb-interwork-abs.s +++ b/lld/test/ELF/arm-thumb-interwork-abs.s @@ -28,11 +28,11 @@ thumb_caller: // WARN: branch and link relocation: R_ARM_THM_CALL to non STT_FUNC symbol: sym interworking not performed; consider using directive '.type sym, %function' to give symbol type STT_FUNC if interworking between ARM and Thumb is required // CHECK: 00021000 : -// CHECK-NEXT: 21000: b #-57352 -// CHECK-NEXT: 21004: bl #-57356 -// CHECK-NEXT: 21008: blx #-57360 +// CHECK-NEXT: 21000: b 0x13000 +// CHECK-NEXT: 21004: bl 0x13000 +// CHECK-NEXT: 21008: blx 0x13000 // CHECK: 0002100c : -// CHECK-NEXT: 2100c: b.w #-57360 -// CHECK-NEXT: 21010: bl #-57364 -// CHECK-NEXT: 21014: blx #-57364 +// CHECK-NEXT: 2100c: b.w 0x13000 +// CHECK-NEXT: 21010: bl 0x13000 +// CHECK-NEXT: 21014: blx 0x13004 diff --git a/lld/test/ELF/arm-thumb-interwork-ifunc.s b/lld/test/ELF/arm-thumb-interwork-ifunc.s index f77439c6c50b4..9e1d23abd106f 100644 --- a/lld/test/ELF/arm-thumb-interwork-ifunc.s +++ b/lld/test/ELF/arm-thumb-interwork-ifunc.s @@ -42,13 +42,13 @@ thumb_caller: bl foo // CHECK: 00021004 <_start>: -// CHECK-NEXT: b #36 <$a> -// CHECK-NEXT: bl #32 <$a> +// CHECK-NEXT: b 0x21030 +// CHECK-NEXT: bl 0x21030 // CHECK: 0002100c : -// CHECK-NEXT: b.w #8 -// CHECK-NEXT: b.w #4 -// CHECK-NEXT: blx #24 +// CHECK-NEXT: b.w 0x21018 <__Thumbv7ABSLongThunk_foo> +// CHECK-NEXT: b.w 0x21018 <__Thumbv7ABSLongThunk_foo> +// CHECK-NEXT: blx 0x21030 // CHECK: 00021018 <__Thumbv7ABSLongThunk_foo>: // CHECK-NEXT: movw r12, #4144 diff --git a/lld/test/ELF/arm-thumb-interwork-notfunc.s b/lld/test/ELF/arm-thumb-interwork-notfunc.s index 1cccf70b56df3..d6b6a3190c689 100644 --- a/lld/test/ELF/arm-thumb-interwork-notfunc.s +++ b/lld/test/ELF/arm-thumb-interwork-notfunc.s @@ -95,47 +95,47 @@ thumb_caller: blx thumb_func_with_explicit_notype // CHECK: 00021008 <_start>: -// CHECK-NEXT: 21008: b #-16 -// CHECK-NEXT: 2100c: b #-20 -// CHECK-NEXT: 21010: b #-24 -// CHECK-NEXT: 21014: b #-24 -// CHECK-NEXT: 21018: b #-28 -// CHECK-NEXT: 2101c: b #-32 -// CHECK-NEXT: 21020: bl #-40 -// CHECK-NEXT: 21024: bl #-44 -// CHECK-NEXT: 21028: bl #-48 -// CHECK-NEXT: 2102c: bl #-48 -// CHECK-NEXT: 21030: bl #-52 -// CHECK-NEXT: 21034: bl #-56 -// CHECK-NEXT: 21038: blx #-64 -// CHECK-NEXT: 2103c: blx #-68 -// CHECK-NEXT: 21040: blx #-72 -// CHECK-NEXT: 21044: blx #-72 -// CHECK-NEXT: 21048: blx #-76 -// CHECK-NEXT: 2104c: blx #-80 +// CHECK-NEXT: 21008: b 0x21000 +// CHECK-NEXT: 2100c: b 0x21000 +// CHECK-NEXT: 21010: b 0x21000 +// CHECK-NEXT: 21014: b 0x21004 +// CHECK-NEXT: 21018: b 0x21004 +// CHECK-NEXT: 2101c: b 0x21004 +// CHECK-NEXT: 21020: bl 0x21000 +// CHECK-NEXT: 21024: bl 0x21000 +// CHECK-NEXT: 21028: bl 0x21000 +// CHECK-NEXT: 2102c: bl 0x21004 +// CHECK-NEXT: 21030: bl 0x21004 +// CHECK-NEXT: 21034: bl 0x21004 +// CHECK-NEXT: 21038: blx 0x21000 +// CHECK-NEXT: 2103c: blx 0x21000 +// CHECK-NEXT: 21040: blx 0x21000 +// CHECK-NEXT: 21044: blx 0x21004 +// CHECK-NEXT: 21048: blx 0x21004 +// CHECK-NEXT: 2104c: blx 0x21004 // CHECK: 00021050 : -// CHECK-NEXT: 21050: b.w #-84 -// CHECK-NEXT: 21054: b.w #-88 -// CHECK-NEXT: 21058: b.w #-92 -// CHECK-NEXT: 2105c: b.w #-92 -// CHECK-NEXT: 21060: b.w #-96 -// CHECK-NEXT: 21064: b.w #-100 -// CHECK-NEXT: 21068: beq.w #-108 -// CHECK-NEXT: 2106c: beq.w #-112 -// CHECK-NEXT: 21070: beq.w #-116 -// CHECK-NEXT: 21074: beq.w #-116 -// CHECK-NEXT: 21078: beq.w #-120 -// CHECK-NEXT: 2107c: beq.w #-124 -// CHECK-NEXT: 21080: bl #-132 -// CHECK-NEXT: 21084: bl #-136 -// CHECK-NEXT: 21088: bl #-140 -// CHECK-NEXT: 2108c: bl #-140 -// CHECK-NEXT: 21090: bl #-144 -// CHECK-NEXT: 21094: bl #-148 -// CHECK-NEXT: 21098: blx #-156 -// CHECK-NEXT: 2109c: blx #-160 -// CHECK-NEXT: 210a0: blx #-164 -// CHECK-NEXT: 210a4: blx #-164 -// CHECK-NEXT: 210a8: blx #-168 -// CHECK-NEXT: 210ac: blx #-172 +// CHECK-NEXT: 21050: b.w 0x21000 +// CHECK-NEXT: 21054: b.w 0x21000 +// CHECK-NEXT: 21058: b.w 0x21000 +// CHECK-NEXT: 2105c: b.w 0x21004 +// CHECK-NEXT: 21060: b.w 0x21004 +// CHECK-NEXT: 21064: b.w 0x21004 +// CHECK-NEXT: 21068: beq.w 0x21000 +// CHECK-NEXT: 2106c: beq.w 0x21000 +// CHECK-NEXT: 21070: beq.w 0x21000 +// CHECK-NEXT: 21074: beq.w 0x21004 +// CHECK-NEXT: 21078: beq.w 0x21004 +// CHECK-NEXT: 2107c: beq.w 0x21004 +// CHECK-NEXT: 21080: bl 0x21000 +// CHECK-NEXT: 21084: bl 0x21000 +// CHECK-NEXT: 21088: bl 0x21000 +// CHECK-NEXT: 2108c: bl 0x21004 +// CHECK-NEXT: 21090: bl 0x21004 +// CHECK-NEXT: 21094: bl 0x21004 +// CHECK-NEXT: 21098: blx 0x21000 +// CHECK-NEXT: 2109c: blx 0x21000 +// CHECK-NEXT: 210a0: blx 0x21000 +// CHECK-NEXT: 210a4: blx 0x21004 +// CHECK-NEXT: 210a8: blx 0x21004 +// CHECK-NEXT: 210ac: blx 0x21004 diff --git a/lld/test/ELF/arm-thumb-interwork-shared.s b/lld/test/ELF/arm-thumb-interwork-shared.s index 45ebcf5d08169..d2bbf2c3107a9 100644 --- a/lld/test/ELF/arm-thumb-interwork-shared.s +++ b/lld/test/ELF/arm-thumb-interwork-shared.s @@ -19,10 +19,10 @@ sym1: // CHECK: Disassembly of section .text: // CHECK-EMPTY: // CHECK-NEXT: : -// CHECK-NEXT: 101e0: b.w #12 <__ThumbV7PILongThunk_elsewhere> -// CHECK-NEXT: b.w #20 <__ThumbV7PILongThunk_weakref> -// CHECK-NEXT: blx #68 -// CHECK-NEXT: blx #80 +// CHECK-NEXT: 101e0: b.w 0x101f0 <__ThumbV7PILongThunk_elsewhere> +// CHECK-NEXT: b.w 0x101fc <__ThumbV7PILongThunk_weakref> +// CHECK-NEXT: blx 0x10230 +// CHECK-NEXT: blx 0x10240 // CHECK: <__ThumbV7PILongThunk_elsewhere>: // CHECK-NEXT: 101f0: movw r12, #52 // CHECK-NEXT: movt r12, #0 diff --git a/lld/test/ELF/arm-thumb-interwork-thunk-v5.s b/lld/test/ELF/arm-thumb-interwork-thunk-v5.s index 4d6e89df18110..8144ad5d20f0b 100644 --- a/lld/test/ELF/arm-thumb-interwork-thunk-v5.s +++ b/lld/test/ELF/arm-thumb-interwork-thunk-v5.s @@ -27,9 +27,9 @@ _start: bx lr // CHECK: <_start>: -// CHECK-NEXT: 21000: 03 00 00 ea b #12 <__ARMv5ABSLongThunk_thumb_func> -// CHECK-NEXT: 21004: 01 00 00 fa blx #4 -// CHECK-NEXT: 21008: 00 00 00 fa blx #0 +// CHECK-NEXT: 21000: 03 00 00 ea b 0x21014 <__ARMv5ABSLongThunk_thumb_func> +// CHECK-NEXT: 21004: 01 00 00 fa blx 0x21010 +// CHECK-NEXT: 21008: 00 00 00 fa blx 0x21010 // CHECK-NEXT: 2100c: 1e ff 2f e1 bx lr // CHECK: : @@ -41,9 +41,9 @@ _start: // CHECK-NEXT: 21018: 11 10 02 00 .word 0x00021011 // CHECK-PI: <_start>: -// CHECK-PI-NEXT: 11000: 03 00 00 ea b #12 <__ARMV5PILongThunk_thumb_func> -// CHECK-PI-NEXT: 11004: 01 00 00 fa blx #4 -// CHECK-PI-NEXT: 11008: 00 00 00 fa blx #0 +// CHECK-PI-NEXT: 11000: 03 00 00 ea b 0x11014 <__ARMV5PILongThunk_thumb_func> +// CHECK-PI-NEXT: 11004: 01 00 00 fa blx 0x11010 +// CHECK-PI-NEXT: 11008: 00 00 00 fa blx 0x11010 // CHECK-PI-NEXT: 1100c: 1e ff 2f e1 bx lr // CHECK-PI: : diff --git a/lld/test/ELF/arm-thumb-interwork-thunk.s b/lld/test/ELF/arm-thumb-interwork-thunk.s index 0b2f9220ab75c..5caf857f06ad4 100644 --- a/lld/test/ELF/arm-thumb-interwork-thunk.s +++ b/lld/test/ELF/arm-thumb-interwork-thunk.s @@ -80,15 +80,15 @@ arm_caller: // CHECK-ARM-ABS-ARM: Disassembly of section .arm_caller: // CHECK-ARM-ABS-ARM-EMPTY: // CHECK-ARM-ABS-ARM-NEXT: : -// CHECK-ARM-ABS-ARM-NEXT: 1300: 3e ff ff fa blx #-776 -// CHECK-ARM-ABS-ARM-NEXT: 1304: 3d ff ff fa blx #-780 -// CHECK-ARM-ABS-ARM-NEXT: 1308: 06 00 00 ea b #24 <__ARMv7ABSLongThunk_thumb_callee1> -// CHECK-ARM-ABS-ARM-NEXT: 130c: 05 00 00 ea b #20 <__ARMv7ABSLongThunk_thumb_callee1> -// CHECK-ARM-ABS-ARM-NEXT: 1310: 07 00 00 ea b #28 <__ARMv7ABSLongThunk_thumb_callee2> -// CHECK-ARM-ABS-ARM-NEXT: 1314: 09 00 00 ea b #36 <__ARMv7ABSLongThunk_thumb_callee3> -// CHECK-ARM-ABS-ARM-NEXT: 1318: 78 ff ff ea b #-544 -// CHECK-ARM-ABS-ARM-NEXT: 131c: b7 00 00 0a beq #732 -// CHECK-ARM-ABS-ARM-NEXT: 1320: b7 00 00 1a bne #732 +// CHECK-ARM-ABS-ARM-NEXT: 1300: 3e ff ff fa blx 0x1000 +// CHECK-ARM-ABS-ARM-NEXT: 1304: 3d ff ff fa blx 0x1000 +// CHECK-ARM-ABS-ARM-NEXT: 1308: 06 00 00 ea b 0x1328 <__ARMv7ABSLongThunk_thumb_callee1> +// CHECK-ARM-ABS-ARM-NEXT: 130c: 05 00 00 ea b 0x1328 <__ARMv7ABSLongThunk_thumb_callee1> +// CHECK-ARM-ABS-ARM-NEXT: 1310: 07 00 00 ea b 0x1334 <__ARMv7ABSLongThunk_thumb_callee2> +// CHECK-ARM-ABS-ARM-NEXT: 1314: 09 00 00 ea b 0x1340 <__ARMv7ABSLongThunk_thumb_callee3> +// CHECK-ARM-ABS-ARM-NEXT: 1318: 78 ff ff ea b 0x1100 +// CHECK-ARM-ABS-ARM-NEXT: 131c: b7 00 00 0a beq 0x1600 +// CHECK-ARM-ABS-ARM-NEXT: 1320: b7 00 00 1a bne 0x1604 // CHECK-ARM-ABS-ARM-NEXT: 1324: 1e ff 2f e1 bx lr // CHECK-ARM-ABS-ARM: <__ARMv7ABSLongThunk_thumb_callee1>: // 0x1001 = thumb_callee1 @@ -109,15 +109,15 @@ arm_caller: // CHECK-PI-ARM: Disassembly of section .arm_caller: // CHECK-PI-ARM-EMPTY: // CHECK-PI-ARM-NEXT: : -// CHECK-PI-ARM-NEXT: 1300: 3e ff ff fa blx #-776 -// CHECK-PI-ARM-NEXT: 1304: 3d ff ff fa blx #-780 -// CHECK-PI-ARM-NEXT: 1308: 06 00 00 ea b #24 <__ARMV7PILongThunk_thumb_callee1> -// CHECK-PI-ARM-NEXT: 130c: 05 00 00 ea b #20 <__ARMV7PILongThunk_thumb_callee1> -// CHECK-PI-ARM-NEXT: 1310: 08 00 00 ea b #32 <__ARMV7PILongThunk_thumb_callee2> -// CHECK-PI-ARM-NEXT: 1314: 0b 00 00 ea b #44 <__ARMV7PILongThunk_thumb_callee3> -// CHECK-PI-ARM-NEXT: 1318: 78 ff ff ea b #-544 -// CHECK-PI-ARM-NEXT: 131c: b7 00 00 0a beq #732 -// CHECK-PI-ARM-NEXT: 1320: b7 00 00 1a bne #732 +// CHECK-PI-ARM-NEXT: 1300: 3e ff ff fa blx 0x1000 +// CHECK-PI-ARM-NEXT: 1304: 3d ff ff fa blx 0x1000 +// CHECK-PI-ARM-NEXT: 1308: 06 00 00 ea b 0x1328 <__ARMV7PILongThunk_thumb_callee1> +// CHECK-PI-ARM-NEXT: 130c: 05 00 00 ea b 0x1328 <__ARMV7PILongThunk_thumb_callee1> +// CHECK-PI-ARM-NEXT: 1310: 08 00 00 ea b 0x1338 <__ARMV7PILongThunk_thumb_callee2> +// CHECK-PI-ARM-NEXT: 1314: 0b 00 00 ea b 0x1348 <__ARMV7PILongThunk_thumb_callee3> +// CHECK-PI-ARM-NEXT: 1318: 78 ff ff ea b 0x1100 +// CHECK-PI-ARM-NEXT: 131c: b7 00 00 0a beq 0x1600 +// CHECK-PI-ARM-NEXT: 1320: b7 00 00 1a bne 0x1604 // CHECK-PI-ARM-NEXT: 1324: 1e ff 2f e1 bx lr // CHECK-PI-ARM: <__ARMV7PILongThunk_thumb_callee1>: // 0x1330 + 8 - 0x337 = 0x1001 = thumb_callee1 @@ -188,14 +188,14 @@ thumb_caller: // CHECK-ABS-THUMB: Disassembly of section .thumb_caller: // CHECK-ABS-THUMB-EMPTY: // CHECK-ABS-THUMB-NEXT: : -// CHECK-ABS-THUMB-NEXT: 1400: ff f7 7e ee blx #-772 -// CHECK-ABS-THUMB-NEXT: 1404: ff f7 7c ee blx #-776 -// CHECK-ABS-THUMB-NEXT: 1408: 00 f0 0a b8 b.w #20 <__Thumbv7ABSLongThunk_arm_callee1> -// CHECK-ABS-THUMB-NEXT: 140c: 00 f0 0d b8 b.w #26 <__Thumbv7ABSLongThunk_arm_callee2> -// CHECK-ABS-THUMB-NEXT: 1410: 00 f0 10 b8 b.w #32 <__Thumbv7ABSLongThunk_arm_callee3> -// CHECK-ABS-THUMB-NEXT: 1414: 00 f0 04 80 beq.w #8 <__Thumbv7ABSLongThunk_arm_callee1> -// CHECK-ABS-THUMB-NEXT: 1418: 00 f0 07 80 beq.w #14 <__Thumbv7ABSLongThunk_arm_callee2> -// CHECK-ABS-THUMB-NEXT: 141c: 40 f0 0a 80 bne.w #20 <__Thumbv7ABSLongThunk_arm_callee3> +// CHECK-ABS-THUMB-NEXT: 1400: ff f7 7e ee blx 0x1100 +// CHECK-ABS-THUMB-NEXT: 1404: ff f7 7c ee blx 0x1100 +// CHECK-ABS-THUMB-NEXT: 1408: 00 f0 0a b8 b.w 0x1420 <__Thumbv7ABSLongThunk_arm_callee1> +// CHECK-ABS-THUMB-NEXT: 140c: 00 f0 0d b8 b.w 0x142a <__Thumbv7ABSLongThunk_arm_callee2> +// CHECK-ABS-THUMB-NEXT: 1410: 00 f0 10 b8 b.w 0x1434 <__Thumbv7ABSLongThunk_arm_callee3> +// CHECK-ABS-THUMB-NEXT: 1414: 00 f0 04 80 beq.w 0x1420 <__Thumbv7ABSLongThunk_arm_callee1> +// CHECK-ABS-THUMB-NEXT: 1418: 00 f0 07 80 beq.w 0x142a <__Thumbv7ABSLongThunk_arm_callee2> +// CHECK-ABS-THUMB-NEXT: 141c: 40 f0 0a 80 bne.w 0x1434 <__Thumbv7ABSLongThunk_arm_callee3> // CHECK-ABS-THUMB: <__Thumbv7ABSLongThunk_arm_callee1>: // 0x1100 = arm_callee1 // CHECK-ABS-THUMB-NEXT: 1420: 41 f2 00 1c movw r12, #4352 @@ -215,14 +215,14 @@ thumb_caller: // CHECK-PI-THUMB: Disassembly of section .thumb_caller: // CHECK-PI-THUMB-EMPTY: // CHECK-PI-THUMB-NEXT: : -// CHECK-PI-THUMB-NEXT: 1400: ff f7 7e ee blx #-772 -// CHECK-PI-THUMB-NEXT: 1404: ff f7 7c ee blx #-776 -// CHECK-PI-THUMB-NEXT: 1408: 00 f0 0a b8 b.w #20 <__ThumbV7PILongThunk_arm_callee1> -// CHECK-PI-THUMB-NEXT: 140c: 00 f0 0e b8 b.w #28 <__ThumbV7PILongThunk_arm_callee2> -// CHECK-PI-THUMB-NEXT: 1410: 00 f0 12 b8 b.w #36 <__ThumbV7PILongThunk_arm_callee3> -// CHECK-PI-THUMB-NEXT: 1414: 00 f0 04 80 beq.w #8 <__ThumbV7PILongThunk_arm_callee1> -// CHECK-PI-THUMB-NEXT: 1418: 00 f0 08 80 beq.w #16 <__ThumbV7PILongThunk_arm_callee2> -// CHECK-PI-THUMB-NEXT: 141c: 40 f0 0c 80 bne.w #24 <__ThumbV7PILongThunk_arm_callee3> +// CHECK-PI-THUMB-NEXT: 1400: ff f7 7e ee blx 0x1100 +// CHECK-PI-THUMB-NEXT: 1404: ff f7 7c ee blx 0x1100 +// CHECK-PI-THUMB-NEXT: 1408: 00 f0 0a b8 b.w 0x1420 <__ThumbV7PILongThunk_arm_callee1> +// CHECK-PI-THUMB-NEXT: 140c: 00 f0 0e b8 b.w 0x142c <__ThumbV7PILongThunk_arm_callee2> +// CHECK-PI-THUMB-NEXT: 1410: 00 f0 12 b8 b.w 0x1438 <__ThumbV7PILongThunk_arm_callee3> +// CHECK-PI-THUMB-NEXT: 1414: 00 f0 04 80 beq.w 0x1420 <__ThumbV7PILongThunk_arm_callee1> +// CHECK-PI-THUMB-NEXT: 1418: 00 f0 08 80 beq.w 0x142c <__ThumbV7PILongThunk_arm_callee2> +// CHECK-PI-THUMB-NEXT: 141c: 40 f0 0c 80 bne.w 0x1438 <__ThumbV7PILongThunk_arm_callee3> // CHECK-PI-THUMB: <__ThumbV7PILongThunk_arm_callee1>: // 0x1428 + 4 - 0x32c = 0x1100 = arm_callee1 // CHECK-PI-THUMB-NEXT: 1420: 4f f6 d4 4c movw r12, #64724 diff --git a/lld/test/ELF/arm-thumb-mix-range-thunk-os.s b/lld/test/ELF/arm-thumb-mix-range-thunk-os.s index 33b698d6d886b..5a54c1bed4dea 100644 --- a/lld/test/ELF/arm-thumb-mix-range-thunk-os.s +++ b/lld/test/ELF/arm-thumb-mix-range-thunk-os.s @@ -62,12 +62,12 @@ _start: bne afunc32 // CHECK1: : // CHECK1-NEXT: 100000: 1e ff 2f e1 bx lr -// CHECK1-NEXT: 100004: fd ff 7b fa blx #32505844 -// CHECK1-NEXT: 100008: fd ff 3b ea b #15728628 -// CHECK1-NEXT: 10000c: fc ff 3b 0a beq #15728624 -// CHECK1-NEXT: 100010: fa ff 7f eb bl #33554408 -// CHECK1-NEXT: 100014: f9 ff 7f ea b #33554404 -// CHECK1-NEXT: 100018: f8 ff 7f 1a bne #33554400 +// CHECK1-NEXT: 100004: fd ff 7b fa blx 0x2000000 +// CHECK1-NEXT: 100008: fd ff 3b ea b 0x1000004 <__ARMv7ABSLongThunk_tfunc31> +// CHECK1-NEXT: 10000c: fc ff 3b 0a beq 0x1000004 <__ARMv7ABSLongThunk_tfunc31> +// CHECK1-NEXT: 100010: fa ff 7f eb bl 0x2100000 +// CHECK1-NEXT: 100014: f9 ff 7f ea b 0x2100000 +// CHECK1-NEXT: 100018: f8 ff 7f 1a bne 0x2100000 THUMBFUNCTION 01 // Expect Thumb bl to be in range (can use blx to change state) bl afunc14 @@ -75,8 +75,8 @@ _start: b.w afunc14 // CHECK2: : // CHECK2-NEXT: 200000: 70 47 bx lr -// CHECK2-NEXT: 200002: ff f0 fe c7 blx #13631484 -// CHECK2-NEXT: 200006: 00 f2 03 90 b.w #14680070 <__Thumbv7ABSLongThunk_afunc14> +// CHECK2-NEXT: 200002: ff f0 fe c7 blx 0xf00000 +// CHECK2-NEXT: 200006: 00 f2 03 90 b.w 0x1000010 <__Thumbv7ABSLongThunk_afunc14> ARMFUNCTION 02 THUMBFUNCTION 03 @@ -127,12 +127,12 @@ _start: bl afunc00 // CHECK6: : // CHECK6-NEXT: 2200000: 70 47 bx lr -// CHECK6-NEXT: 2200002: ff f4 ff ff bl #-3145730 +// CHECK6-NEXT: 2200002: ff f4 ff ff bl 0x1f00004 <__Thumbv7ABSLongThunk_afunc00> ARMFUNCTION 34 // Out of range, can reach earlier Thunk Section // CHECK7: : // CHECK7-NEXT: 2300000: 1e ff 2f e1 bx lr -// CHECK7-NEXT: 2300004: fe ff ef fa blx #-4194312 <__Thumbv7ABSLongThunk_afunc00 +// CHECK7-NEXT: 2300004: fe ff ef fa blx 0x1f00004 <__Thumbv7ABSLongThunk_afunc00> bl afunc00 THUMBFUNCTION 35 ARMFUNCTION 36 @@ -161,7 +161,7 @@ _start: // CHECK9-NEXT: 3300008: c0 f2 30 2c movt r12, #560 // CHECK9-NEXT: 330000c: 60 47 bx r12 // CHECK9: <__Thumbv7ABSLongThunk_tfunc35>: -// CHECK9-NEXT: 330000e: ff f4 f7 97 b.w #-15728658 +// CHECK9-NEXT: 330000e: ff f4 f7 97 b.w 0x2400000 THUMBFUNCTION 51 ARMFUNCTION 52 THUMBFUNCTION 53 @@ -180,14 +180,14 @@ _start: bl afunc34 b tfunc35 // CHECK10: : -// CHECK10-NEXT: 4100000: 1e ff 2f e1 bx lr -// CHECK10-NEXT: 4100004: fd ff 87 eb bl #-31457292 -// CHECK10-NEXT: 4100008: fd ff b3 ea b #-19922956 <__ARMv7ABSLongThunk_tfunc35> +// CHECK10-NEXT: 4100000: 1e ff 2f e1 bx lr +// CHECK10-NEXT: 4100004: fd ff 87 eb bl 0x2300000 +// CHECK10-NEXT: 4100008: fd ff b3 ea b 0x2e00004 <__ARMv7ABSLongThunk_tfunc35> THUMBFUNCTION 65 // afunc34 and tfunc35 are both out of range bl afunc34 bl tfunc35 // CHECK11: : // CHECK11: 4200000: 70 47 bx lr -// CHECK11-NEXT: 4200002: ff f4 ff d7 bl #-15728642 -// CHECK11-NEXT: 4200006: 00 f5 02 d0 bl #-15728636 +// CHECK11-NEXT: 4200002: ff f4 ff d7 bl 0x3300004 <__Thumbv7ABSLongThunk_afunc34> +// CHECK11-NEXT: 4200006: 00 f5 02 d0 bl 0x330000e <__Thumbv7ABSLongThunk_tfunc35> diff --git a/lld/test/ELF/arm-thumb-narrow-branch-check.s b/lld/test/ELF/arm-thumb-narrow-branch-check.s index 6dc8c0b2978c1..687ace98f37ee 100644 --- a/lld/test/ELF/arm-thumb-narrow-branch-check.s +++ b/lld/test/ELF/arm-thumb-narrow-branch-check.s @@ -57,14 +57,12 @@ callee_high_far = 0x180d // CHECK-NEXT: Disassembly of section .caller: // CHECK-EMPTY: // CHECK-NEXT: : -// 1004 - 0x800 (2048) + 4 = 0x808 = callee_low_far -// CHECK-NEXT: 1004: 00 e4 b #-2048 -// 1006 - 0xa (10) + 4 = 0x1000 = callee_low -// CHECK-NEXT: 1006: fb e7 b #-10 -// 1008 + 4 + 4 = 0x1010 = callee_high -// CHECK-NEXT: 1008: 02 e0 b #4 -// 100a + 0x7fe (2046) + 4 = 0x180c = callee_high_far -// CHECK-NEXT: 100a: ff e3 b #2046 +/// callee_low_far = 0x809 +// CHECK-NEXT: 1004: 00 e4 b 0x808 +// CHECK-NEXT: 1006: fb e7 b 0x1000 +// CHECK-NEXT: 1008: 02 e0 b 0x1010 +/// callee_high_far = 0x180d +// CHECK-NEXT: 100a: ff e3 b 0x180c // CHECK-NEXT: 100c: 70 47 bx lr // CHECK-NEXT: 100e: 00 bf nop // CHECK-EMPTY: @@ -76,5 +74,5 @@ callee_high_far = 0x180d // CHECK-NEXT: Disassembly of section .text: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 1014: ff f7 f6 ff bl #-20 +// CHECK-NEXT: 1014: ff f7 f6 ff bl 0x1004 // CHECK-NEXT: 1018: 70 47 bx lr diff --git a/lld/test/ELF/arm-thumb-no-undefined-thunk.s b/lld/test/ELF/arm-thumb-no-undefined-thunk.s index 77af78ce68179..c84c16ec69041 100644 --- a/lld/test/ELF/arm-thumb-no-undefined-thunk.s +++ b/lld/test/ELF/arm-thumb-no-undefined-thunk.s @@ -19,7 +19,7 @@ _start: // CHECK: Disassembly of section .text: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// 0x110b8 = next instruction -// CHECK: 200b4: {{.*}} bl #0 -// CHECK-NEXT: 200b8: {{.*}} b.w #0 <_start+0x8> -// CHECK-NEXT: 200bc: {{.*}} b.w #0 <_start+0xc> +// 0x200b8 = next instruction +// CHECK: 200b4: {{.*}} bl 0x200b8 <_start+0x4> @ imm = #0 +// CHECK-NEXT: 200b8: {{.*}} b.w 0x200bc <_start+0x8> @ imm = #0 +// CHECK-NEXT: 200bc: {{.*}} b.w 0x200c0 <_start+0xc> @ imm = #0 diff --git a/lld/test/ELF/arm-thumb-plt-range-thunk-os.s b/lld/test/ELF/arm-thumb-plt-range-thunk-os.s index 0a84b32cdcd3b..e0c1a0a9a7219 100644 --- a/lld/test/ELF/arm-thumb-plt-range-thunk-os.s +++ b/lld/test/ELF/arm-thumb-plt-range-thunk-os.s @@ -38,13 +38,13 @@ preemptible: // CHECK1: Disassembly of section .text: // CHECK1-EMPTY: // CHECK1-NEXT: : -// CHECK1-NEXT: 2000000: 00 f0 00 d8 bl #8388608 -// CHECK1-NEXT: 2000004: 00 f0 04 d8 bl #8388616 +// CHECK1-NEXT: 2000000: 00 f0 00 d8 bl 0x2800004 <__ThumbV7PILongThunk_elsewhere> +// CHECK1-NEXT: 2000004: 00 f0 04 d8 bl 0x2800010 <__ThumbV7PILongThunk_preemptible> // CHECK1-NEXT: 2000008: 70 47 bx lr // CHECK1: : -// CHECK1-NEXT: 200000a: 00 f0 07 d8 bl #8388622 -// CHECK1-NEXT: 200000e: 00 f0 0b d8 bl #8388630 -// CHECK1-NEXT: 2000012: 00 f0 09 d8 bl #8388626 +// CHECK1-NEXT: 200000a: 00 f0 07 d8 bl 0x280001c <__ThumbV7PILongThunk_far_preemptible> +// CHECK1-NEXT: 200000e: 00 f0 0b d8 bl 0x2800028 <__ThumbV7PILongThunk_far_nonpreemptible> +// CHECK1-NEXT: 2000012: 00 f0 09 d8 bl 0x2800028 <__ThumbV7PILongThunk_far_nonpreemptible> // CHECK1-NEXT: 2000016: 70 47 bx lr .section .text.2, "ax", %progbits @@ -83,7 +83,7 @@ far_nonpreemptible_alias: bl elsewhere // CHECK3: : -// CHECK3: 4000000: 00 f0 16 e8 blx #44 +// CHECK3: 4000000: 00 f0 16 e8 blx 0x4000030 // CHECK4: Disassembly of section .plt: // CHECK4-EMPTY: diff --git a/lld/test/ELF/arm-thumb-plt-reloc.s b/lld/test/ELF/arm-thumb-plt-reloc.s index aa561771f14ba..22e4282579003 100644 --- a/lld/test/ELF/arm-thumb-plt-reloc.s +++ b/lld/test/ELF/arm-thumb-plt-reloc.s @@ -32,12 +32,9 @@ _start: // CHECK-NEXT: 200b8: 70 47 bx lr // CHECK-NEXT: 200ba: d4 d4 // CHECK: <_start>: -// . + 4 -12 = 0x200b4 = func1 -// CHECK-NEXT: 200bc: ff f7 fa ff bl #-12 -// . + 4 -14 = 0x200b6 = func2 -// CHECK-NEXT: 200c0: ff f7 f9 ff bl #-14 -// . + 4 -16 = 0x200b8 = func3 -// CHECK-NEXT: 200c4: ff f7 f8 ff bl #-16 +// CHECK-NEXT: 200bc: ff f7 fa ff bl 0x200b4 +// CHECK-NEXT: 200c0: ff f7 f9 ff bl 0x200b6 +// CHECK-NEXT: 200c4: ff f7 f8 ff bl 0x200b8 // Expect PLT entries as symbols can be preempted // .text is Thumb and .plt is ARM, llvm-objdump can currently only disassemble @@ -50,21 +47,21 @@ _start: // DSO-NEXT: 10216: 70 47 bx lr // DSO: : // DSO-NEXT: 10218: 70 47 bx lr -// DSO-NEXT: 1021a: d4 d4 bmi #-88 +// DSO-NEXT: 1021a: d4 d4 // DSO: <_start>: -// . + 48 + 4 = 0x10250 = PLT func1 -// DSO-NEXT: 1021c: 00 f0 18 e8 blx #48 -// . + 60 + 4 = 0x10260 = PLT func2 -// DSO-NEXT: 10220: 00 f0 1e e8 blx #60 -// . + 72 + 4 = 0x10270 = PLT func3 -// DSO-NEXT: 10224: 00 f0 24 e8 blx #72 +// 0x10250 = PLT func1 +// DSO-NEXT: 1021c: 00 f0 18 e8 blx 0x10250 +// 0x10260 = PLT func2 +// DSO-NEXT: 10220: 00 f0 1e e8 blx 0x10260 +// 0x10270 = PLT func3 +// DSO-NEXT: 10224: 00 f0 24 e8 blx 0x10270 // DSO: Disassembly of section .plt: // DSO-EMPTY: // DSO-NEXT: <$a>: // DSO-NEXT: 10230: 04 e0 2d e5 str lr, [sp, #-4]! -// (0x10234 + 8) + (0 RoR 12) + 8192 + 164 = 0x32e0 = .got.plt[3] +// (0x10234 + 8) + (0 RoR 12) + (32 RoR 20 = 0x20000) + 164 = 0x302e0 = .got.plt[2] // DSO-NEXT: 10234: 00 e6 8f e2 add lr, pc, #0, #12 -// DSO-NEXT: 10238: 20 ea 8e e2 add lr, lr, #32 +// DSO-NEXT: 10238: 20 ea 8e e2 add lr, lr, #32, #20 // DSO-NEXT: 1023c: a4 f0 be e5 ldr pc, [lr, #164]! // DSO: <$d>: @@ -73,23 +70,23 @@ _start: // DSO-NEXT: 10248: d4 d4 d4 d4 .word 0xd4d4d4d4 // DSO-NEXT: 1024c: d4 d4 d4 d4 .word 0xd4d4d4d4 // DSO: <$a>: -// (0x10250 + 8) + (0 RoR 12) + 8192 + 140 = 0x32e4 +// (0x10250 + 8) + (0 RoR 12) + (32 RoR 20 = 0x20000) + 140 = 0x302e4 // DSO-NEXT: 10250: 00 c6 8f e2 add r12, pc, #0, #12 -// DSO-NEXT: 10254: 20 ca 8c e2 add r12, r12, #32 +// DSO-NEXT: 10254: 20 ca 8c e2 add r12, r12, #32, #20 // DSO-NEXT: 10258: 8c f0 bc e5 ldr pc, [r12, #140]! // DSO: <$d>: // DSO-NEXT: 1025c: d4 d4 d4 d4 .word 0xd4d4d4d4 // DSO: <$a>: -// (0x10260 + 8) + (0 RoR 12) + 8192 + 128 = 0x32e8 +// (0x10260 + 8) + (0 RoR 12) + (32 RoR 20 = 0x20000) + 128 = 0x302e8 // DSO-NEXT: 10260: 00 c6 8f e2 add r12, pc, #0, #12 -// DSO-NEXT: 10264: 20 ca 8c e2 add r12, r12, #32 +// DSO-NEXT: 10264: 20 ca 8c e2 add r12, r12, #32, #20 // DSO-NEXT: 10268: 80 f0 bc e5 ldr pc, [r12, #128]! // DSO: <$d>: // DSO-NEXT: 1026c: d4 d4 d4 d4 .word 0xd4d4d4d4 // DSO: <$a>: -// (0x10270 + 8) + (0 RoR 12) + 8192 + 116 = 0x32ec +// (0x10270 + 8) + (0 RoR 12) + (32 RoR 20 = 0x20000) + 116 = 0x302ec // DSO-NEXT: 10270: 00 c6 8f e2 add r12, pc, #0, #12 -// DSO-NEXT: 10274: 20 ca 8c e2 add r12, r12, #32 +// DSO-NEXT: 10274: 20 ca 8c e2 add r12, r12, #32, #20 // DSO-NEXT: 10278: 74 f0 bc e5 ldr pc, [r12, #116]! // DSO: <$d>: // DSO-NEXT: 1027c: d4 d4 d4 d4 .word 0xd4d4d4d4 diff --git a/lld/test/ELF/arm-thumb-range-thunk-os.s b/lld/test/ELF/arm-thumb-range-thunk-os.s index ba89159aeac46..0205d773b35fa 100644 --- a/lld/test/ELF/arm-thumb-range-thunk-os.s +++ b/lld/test/ELF/arm-thumb-range-thunk-os.s @@ -45,9 +45,9 @@ _start: // CHECK1: Disassembly of section .text: // CHECK1-EMPTY: // CHECK1-NEXT: <_start>: -// CHECK1-NEXT: 100000: ff f0 fe ff bl #1048572 -// CHECK1-NEXT: 100004: ff f3 fc d7 bl #16777208 -// CHECK1-NEXT: 100008: ff f2 fc d7 bl #15728632 +// CHECK1-NEXT: 100000: ff f0 fe ff bl 0x200000 +// CHECK1-NEXT: 100004: ff f3 fc d7 bl 0x1100000 +// CHECK1-NEXT: 100008: ff f2 fc d7 bl 0x1000004 <__Thumbv7ABSLongThunk_tfunc16> FUNCTION 00 // CHECK2: : @@ -61,7 +61,7 @@ _start: b.w tfunc28 // CHECK4: : // CHECK4-NEXT: 400000: 70 47 bx lr -// CHECK4-NEXT: 400002: 00 f0 01 90 b.w #12582914 <__Thumbv7ABSLongThunk_tfunc28> +// CHECK4-NEXT: 400002: 00 f0 01 90 b.w 0x1000008 <__Thumbv7ABSLongThunk_tfunc28> FUNCTION 03 FUNCTION 04 FUNCTION 05 @@ -76,9 +76,9 @@ _start: FUNCTION 14 // Expect precreated ThunkSection here // CHECK5: <__Thumbv7ABSLongThunk_tfunc16>: -// CHECK5-NEXT: 1000004: ff f1 fc bf b.w #2097144 +// CHECK5-NEXT: 1000004: ff f1 fc bf b.w 0x1200000 // CHECK5: <__Thumbv7ABSLongThunk_tfunc28>: -// CHECK5-NEXT: 1000008: ff f1 fa 97 b.w #14680052 +// CHECK5-NEXT: 1000008: ff f1 fa 97 b.w 0x1e00000 // CHECK5: <__Thumbv7ABSLongThunk_tfunc32>: // CHECK5-NEXT: 100000c: 40 f2 01 0c movw r12, #1 // CHECK5-NEXT: 1000010: c0 f2 20 2c movt r12, #544 @@ -88,7 +88,7 @@ _start: // CHECK5-NEXT: 100001a: c0 f2 30 2c movt r12, #560 // CHECK5-NEXT: 100001e: 60 47 bx r12 // CHECK5: <__Thumbv7ABSLongThunk_tfunc02>: -// CHECK5-NEXT: 1000020: ff f7 ee 97 b.w #-12582948 +// CHECK5-NEXT: 1000020: ff f7 ee 97 b.w 0x400000 FUNCTION 15 // tfunc00 and tfunc01 are < 16Mb away, expect no range extension thunks bl tfunc00 @@ -99,18 +99,18 @@ _start: bl tfunc33 // CHECK6: : // CHECK6-NEXT: 1100000: 70 47 bx lr -// CHECK6-NEXT: 1100002: ff f4 fd d7 bl #-15728646 -// CHECK6-NEXT: 1100006: ff f5 fb d7 bl #-14680074 -// CHECK6-NEXT: 110000a: ff f6 ff ff bl #-1048578 -// CHECK6-NEXT: 110000e: 00 f7 02 f8 bl #-1048572 +// CHECK6-NEXT: 1100002: ff f4 fd d7 bl 0x200000 +// CHECK6-NEXT: 1100006: ff f5 fb d7 bl 0x300000 +// CHECK6-NEXT: 110000a: ff f6 ff ff bl 0x100000c <__Thumbv7ABSLongThunk_tfunc32> +// CHECK6-NEXT: 110000e: 00 f7 02 f8 bl 0x1000016 <__Thumbv7ABSLongThunk_tfunc33> FUNCTION 16 FUNCTION 17 FUNCTION 18 // Expect another precreated thunk section here // CHECK7: <__Thumbv7ABSLongThunk_tfunc15>: -// CHECK7-NEXT: 1400004: ff f4 fc bf b.w #-3145736 +// CHECK7-NEXT: 1400004: ff f4 fc bf b.w 0x1100000 // CHECK7: <__Thumbv7ABSLongThunk_tfunc16>: -// CHECK7-NEXT: 1400008: ff f5 fa bf b.w #-2097164 +// CHECK7-NEXT: 1400008: ff f5 fa bf b.w 0x1200000 FUNCTION 19 FUNCTION 20 FUNCTION 21 @@ -125,7 +125,7 @@ _start: // section // CHECK8: : // CHECK8-NEXT: 1e00000: 70 47 bx lr -// CHECK8-NEXT: 1e00002: 00 f6 0d 90 b.w #-14680038 <__Thumbv7ABSLongThunk_tfunc02> +// CHECK8-NEXT: 1e00002: 00 f6 0d 90 b.w 0x1000020 <__Thumbv7ABSLongThunk_tfunc02> b.w tfunc02 FUNCTION 29 @@ -138,13 +138,13 @@ _start: bl tfunc16 // CHECK9: : // CHECK9: 2200000: 70 47 bx lr -// CHECK9-NEXT: 2200002: ff f5 ff d7 bl #-14680066 -// CHECK9-NEXT: 2200006: ff f5 ff d7 bl #-14680066 +// CHECK9-NEXT: 2200002: ff f5 ff d7 bl 0x1400004 <__Thumbv7ABSLongThunk_tfunc15> +// CHECK9-NEXT: 2200006: ff f5 ff d7 bl 0x1400008 <__Thumbv7ABSLongThunk_tfunc16> FUNCTION 33 bl tfunc15 bl tfunc16 // CHECK10: : // CHECK10: 2300000: 70 47 bx lr -// CHECK10-NEXT: 2300002: ff f4 ff d7 bl #-15728642 -// CHECK10-NEXT: 2300006: ff f4 ff d7 bl #-15728642 +// CHECK10-NEXT: 2300002: ff f4 ff d7 bl 0x1400004 <__Thumbv7ABSLongThunk_tfunc15> +// CHECK10-NEXT: 2300006: ff f4 ff d7 bl 0x1400008 <__Thumbv7ABSLongThunk_tfunc16> diff --git a/lld/test/ELF/arm-thumb-thunk-empty-pass.s b/lld/test/ELF/arm-thumb-thunk-empty-pass.s index 35eecc49eda92..2d05ac7b78459 100644 --- a/lld/test/ELF/arm-thumb-thunk-empty-pass.s +++ b/lld/test/ELF/arm-thumb-thunk-empty-pass.s @@ -18,13 +18,13 @@ foo: // CHECK: Disassembly of section .text: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 200b4: ff f7 fe ff bl #-4 +// CHECK-NEXT: 200b4: ff f7 fe ff bl 0x200b4 <_start> // CHECK: <__Thumbv7ABSLongThunk__start>: -// CHECK-NEXT: 200b8: ff f7 fc bf b.w #-8 <_start> +// CHECK-NEXT: 200b8: ff f7 fc bf b.w 0x200b4 <_start> // CHECK: <__Thumbv7ABSLongThunk__start>: // CHECK: 10200bc: 40 f2 b5 0c movw r12, #181 // CHECK-NEXT: 10200c0: c0 f2 02 0c movt r12, #2 // CHECK-NEXT: 10200c4: 60 47 bx r12 // CHECK: : -// CHECK-NEXT: 10200c6: ff f7 f9 ff bl #-14 +// CHECK-NEXT: 10200c6: ff f7 f9 ff bl 0x10200bc <__Thumbv7ABSLongThunk__start> diff --git a/lld/test/ELF/arm-thumb-thunk-v6m.s b/lld/test/ELF/arm-thumb-thunk-v6m.s index 6036febf8e63d..ff6968698d93f 100644 --- a/lld/test/ELF/arm-thumb-thunk-v6m.s +++ b/lld/test/ELF/arm-thumb-thunk-v6m.s @@ -33,7 +33,7 @@ far: // CHECK: Disassembly of section .text_low: // CHECK-EMPTY: // CHECK-NEXT: <_start>: -// CHECK-NEXT: 94: 00 f0 00 f8 bl #0 +// CHECK-NEXT: 94: 00 f0 00 f8 bl 0x98 <__Thumbv6MABSLongThunk_far> // CHECK: <__Thumbv6MABSLongThunk_far>: // CHECK-NEXT: 98: 03 b4 push {r0, r1} // CHECK-NEXT: 9a: 01 48 ldr r0, [pc, #4] @@ -48,7 +48,7 @@ far: // CHECK-PI: Disassembly of section .text_low: // CHECK-PI-EMPTY: // CHECK-PI-NEXT: <_start>: -// CHECK-PI-NEXT: 130: 00 f0 00 f8 bl #0 +// CHECK-PI-NEXT: 130: 00 f0 00 f8 bl 0x134 <__Thumbv6MPILongThunk_far> // CHECK-PI: <__Thumbv6MPILongThunk_far>: // CHECK-PI-NEXT: 134: 01 b4 push {r0} // CHECK-PI-NEXT: 136: 02 48 ldr r0, [pc, #8] diff --git a/lld/test/ELF/arm-thumb-undefined-weak-narrow.test b/lld/test/ELF/arm-thumb-undefined-weak-narrow.test index 1de346d8b7a0b..dbb587568aee0 100644 --- a/lld/test/ELF/arm-thumb-undefined-weak-narrow.test +++ b/lld/test/ELF/arm-thumb-undefined-weak-narrow.test @@ -6,7 +6,7 @@ # CHECK: Disassembly of section .text: # CHECK-EMPTY: # CHECK-NEXT: <_start>: -# CHECK-NEXT: ff e7 b #-2 +# CHECK-NEXT: ff e7 b 0x200b6 <_start+0x2> @ imm = #-2 # Test the R_ARM_THM_JUMP11 relocation (102) to an undefined weak reference # It should resolve to the next instruction, which is an offset of -2 which diff --git a/lld/test/ELF/arm-thumb-undefined-weak.s b/lld/test/ELF/arm-thumb-undefined-weak.s index 6cc26cfb87847..1738ca7fa1e01 100644 --- a/lld/test/ELF/arm-thumb-undefined-weak.s +++ b/lld/test/ELF/arm-thumb-undefined-weak.s @@ -37,11 +37,11 @@ _start: .reloc 0x1c, R_ARM_THM_PC12, target // CHECK: Disassembly of section .text: // CHECK-EMPTY: -// CHECK: 200b4: {{.*}} beq.w #0 <_start+0x4> -// CHECK-NEXT: 200b8: {{.*}} b.w #0 <_start+0x8> -// CHECK-NEXT: 200bc: {{.*}} bl #0 +// CHECK: 200b4: {{.*}} beq.w 0x200b8 <_start+0x4> @ imm = #0 +// CHECK-NEXT: 200b8: {{.*}} b.w 0x200bc <_start+0x8> @ imm = #0 +// CHECK-NEXT: 200bc: {{.*}} bl 0x200c0 <_start+0xc> @ imm = #0 /// blx is transformed into bl so we don't change state -// CHECK-NEXT: 200c0: {{.*}} bl #0 +// CHECK-NEXT: 200c0: {{.*}} bl 0x200c4 <_start+0x10> @ imm = #0 // CHECK-NEXT: 200c4: {{.*}} movt r0, #0 // CHECK-NEXT: 200c8: {{.*}} movw r0, #0 // CHECK-NEXT: 200cc: {{.*}} adr.w r0, #-4 diff --git a/lld/test/ELF/arm-thunk-arm-thumb-reuse.s b/lld/test/ELF/arm-thunk-arm-thumb-reuse.s index 5b77c4fcf5672..86901fa3769e6 100644 --- a/lld/test/ELF/arm-thunk-arm-thumb-reuse.s +++ b/lld/test/ELF/arm-thunk-arm-thumb-reuse.s @@ -29,12 +29,12 @@ _start: bl far2 // CHECK: 00010000 <_start>: -// CHECK-NEXT: 10000: bl #8 <__ARMv7ABSLongThunk_far> +// CHECK-NEXT: 10000: bl 0x10010 <__ARMv7ABSLongThunk_far> // CHECK: 00010004 <$t.1>: -// CHECK-NEXT: 10004: blx #8 -// CHECK-NEXT: 10008: bl #16 +// CHECK-NEXT: 10004: blx 0x10010 <__ARMv7ABSLongThunk_far> +// CHECK-NEXT: 10008: bl 0x1001c <__Thumbv7ABSLongThunk_far2> // CHECK: 0001000c <$a.2>: -// CHECK-NEXT: 1000c: blx #8 <__Thumbv7ABSLongThunk_far2> +// CHECK-NEXT: 1000c: blx 0x1001c <__Thumbv7ABSLongThunk_far2> // CHECK: 00010010 <__ARMv7ABSLongThunk_far>: // CHECK-NEXT: 10010: movw r12, #0 // CHECK-NEXT: 10014: movt r12, #4096 diff --git a/lld/test/ELF/arm-thunk-largesection.s b/lld/test/ELF/arm-thunk-largesection.s index f51d8dc4187fa..0aa02b125f05d 100644 --- a/lld/test/ELF/arm-thunk-largesection.s +++ b/lld/test/ELF/arm-thunk-largesection.s @@ -22,7 +22,7 @@ _start: // CHECK2: <__Thumbv7ABSLongThunk__start>: -// CHECK2-NEXT: 22004: b.w #-4104 <_start> +// CHECK2-NEXT: 22004: b.w 0x21000 <_start> /// Gigantic section where we need a ThunkSection either side of it .section .text.large1, "ax", %progbits @@ -32,8 +32,8 @@ _start: .space (16 * 1024 * 1024) - 4 bl _start .space (16 * 1024 * 1024) - 16 -// CHECK3: 1021ff8: bl #-16777208 -// CHECK4: 2021ff8: bl #16777200 +// CHECK3: 1021ff8: bl 0x22004 <__Thumbv7ABSLongThunk__start> +// CHECK4: 2021ff8: bl 0x3021fec <__Thumbv7ABSLongThunk__start> // CHECK5: <__Thumbv7ABSLongThunk__start>: // CHECK5-NEXT: 3021fec: movw r12, #4097 diff --git a/lld/test/ELF/arm-thunk-linkerscript-dotexpr.s b/lld/test/ELF/arm-thunk-linkerscript-dotexpr.s index 62b5ea0ebc956..08edcdf7b0afa 100644 --- a/lld/test/ELF/arm-thunk-linkerscript-dotexpr.s +++ b/lld/test/ELF/arm-thunk-linkerscript-dotexpr.s @@ -32,8 +32,8 @@ low_target2: // CHECK1-NEXT: <_start>: // CHECK1-NEXT: 94: 70 47 bx lr // CHECK1: : -// CHECK1-NEXT: 96: 00 f0 03 f8 bl #6 -// CHECK1-NEXT: 9a: 00 f0 06 f8 bl #12 +// CHECK1-NEXT: 96: 00 f0 03 f8 bl 0xa0 <__Thumbv7ABSLongThunk_high_target> +// CHECK1-NEXT: 9a: 00 f0 06 f8 bl 0xaa <__Thumbv7ABSLongThunk_high_target2> // CHECK1: <__Thumbv7ABSLongThunk_high_target>: // CHECK1-NEXT: a0: 40 f2 bd 0c movw r12, #189 // CHECK1-NEXT: a4: c0 f2 00 2c movt r12, #512 @@ -43,8 +43,8 @@ low_target2: // CHECK1-NEXT: ae: c0 f2 00 2c movt r12, #512 // CHECK1-NEXT: b2: 60 47 bx r12 // CHECK1: : -// CHECK1-NEXT: b4: ff f7 f4 ff bl #-24 -// CHECK1-NEXT: b8: ff f7 f7 ff bl #-18 +// CHECK1-NEXT: b4: ff f7 f4 ff bl 0xa0 <__Thumbv7ABSLongThunk_high_target> +// CHECK1-NEXT: b8: ff f7 f7 ff bl 0xaa <__Thumbv7ABSLongThunk_high_target2> .section .text_high, "ax", %progbits .thumb @@ -63,8 +63,8 @@ high_target2: bl low_target2 // CHECK2: : -// CHECK2-NEXT: 20000bc: 00 f0 02 f8 bl #4 -// CHECK2-NEXT: 20000c0: 00 f0 05 f8 bl #10 +// CHECK2-NEXT: 20000bc: 00 f0 02 f8 bl 0x20000c4 <__Thumbv7ABSLongThunk_low_target> +// CHECK2-NEXT: 20000c0: 00 f0 05 f8 bl 0x20000ce <__Thumbv7ABSLongThunk_low_target2> // CHECK2: <__Thumbv7ABSLongThunk_low_target>: // CHECK2-NEXT: 20000c4: 40 f2 97 0c movw r12, #151 // CHECK2-NEXT: 20000c8: c0 f2 00 0c movt r12, #0 @@ -74,5 +74,5 @@ high_target2: // CHECK2-NEXT: 20000d2: c0 f2 00 0c movt r12, #0 // CHECK2-NEXT: 20000d6: 60 47 bx r12 // CHECK2: : -// CHECK2-NEXT: 20000d8: ff f7 f4 ff bl #-24 -// CHECK2-NEXT: 20000dc: ff f7 f7 ff bl #-18 +// CHECK2-NEXT: 20000d8: ff f7 f4 ff bl 0x20000c4 <__Thumbv7ABSLongThunk_low_target> +// CHECK2-NEXT: 20000dc: ff f7 f7 ff bl 0x20000ce <__Thumbv7ABSLongThunk_low_target2> diff --git a/lld/test/ELF/arm-thunk-linkerscript-large.s b/lld/test/ELF/arm-thunk-linkerscript-large.s index 580719f6e11de..4143dfc2204ff 100644 --- a/lld/test/ELF/arm-thunk-linkerscript-large.s +++ b/lld/test/ELF/arm-thunk-linkerscript-large.s @@ -55,8 +55,8 @@ _start: // CHECK1: Disassembly of section .text: // CHECK1-EMPTY: // CHECK1-NEXT: <_start>: -// CHECK1-NEXT: 100000: ff f0 fe ff bl #1048572 -// CHECK1-NEXT: 100004: 00 f0 00 f8 bl #0 +// CHECK1-NEXT: 100000: ff f0 fe ff bl 0x200000 +// CHECK1-NEXT: 100004: 00 f0 00 f8 bl 0x100008 <__Thumbv7ABSLongThunk_tfunch31> // CHECK1: <__Thumbv7ABSLongThunk_tfunch31>: // CHECK1-NEXT: 100008: 40 f2 01 0c movw r12, #1 // CHECK1-NEXT: 10000c: c0 f2 10 4c movt r12, #1040 @@ -70,8 +70,8 @@ _start: // CHECK2-EMPTY: // CHECK2-NEXT: : // CHECK2-NEXT: 200000: 70 47 bx lr -// CHECK2-NEXT: 200002: ff f0 ff df bl #9437182 -// CHECK2-NEXT: 200006: ff f6 ff ff bl #-1048578 +// CHECK2-NEXT: 200002: ff f0 ff df bl 0xb00004 <__Thumbv7ABSLongThunk_tfuncl24> +// CHECK2-NEXT: 200006: ff f6 ff ff bl 0x100008 <__Thumbv7ABSLongThunk_tfunch31> FUNCTIONL 01 FUNCTIONL 02 FUNCTIONL 03 @@ -82,7 +82,7 @@ _start: FUNCTIONL 08 FUNCTIONL 09 // CHECK3: <__Thumbv7ABSLongThunk_tfuncl24>: -// CHECK3-NEXT: b00004: ff f2 fc 97 b.w #15728632 +// CHECK3-NEXT: b00004: ff f2 fc 97 b.w 0x1a00000 FUNCTIONL 10 FUNCTIONL 11 FUNCTIONL 12 @@ -110,9 +110,9 @@ _start: bl tfuncl24 // Shouldn't need a thunk bl tfunch00 -// CHECK4: 2100002: 00 f0 05 f8 bl #10 -// CHECK4-NEXT: 2100006: ff f4 fb f7 bl #-7340042 -// CHECK4-NEXT: 210000a: ff f0 f9 ff bl #1048562 +// CHECK4: 2100002: 00 f0 05 f8 bl 0x2100010 <__Thumbv7ABSLongThunk_tfuncl00> +// CHECK4-NEXT: 2100006: ff f4 fb f7 bl 0x1a00000 +// CHECK4-NEXT: 210000a: ff f0 f9 ff bl 0x2200000 // CHECK4: <__Thumbv7ABSLongThunk_tfuncl00>: // CHECK4-NEXT: 2100010: 40 f2 01 0c movw r12, #1 // CHECK4-NEXT: 2100014: c0 f2 20 0c movt r12, #32 @@ -127,9 +127,9 @@ _start: // CHECK5-EMPTY: // CHECK5-NEXT: : // CHECK5-NEXT: 2200000: 70 47 bx lr -// CHECK5-NEXT: 2200002: 00 f7 05 f8 bl #-1048566 -// CHECK5-NEXT: 2200006: ff f7 fb df bl #-8388618 -// CHECK5-NEXT: 220000a: ff f6 f9 ff bl #-1048590 +// CHECK5-NEXT: 2200002: 00 f7 05 f8 bl 0x2100010 <__Thumbv7ABSLongThunk_tfuncl00> +// CHECK5-NEXT: 2200006: ff f7 fb df bl 0x1a00000 +// CHECK5-NEXT: 220000a: ff f6 f9 ff bl 0x2100000 FUNCTIONH 01 FUNCTIONH 02 FUNCTIONH 03 @@ -166,8 +166,8 @@ _start: bl tfunch00 // CHECK6: : // CHECK6-NEXT: 4100000: 70 47 bx lr -// CHECK6-NEXT: 4100002: 00 f0 03 f8 bl #6 -// CHECK6-NEXT: 4100006: 00 f0 06 f8 bl #12 +// CHECK6-NEXT: 4100002: 00 f0 03 f8 bl 0x410000c <__Thumbv7ABSLongThunk_tfuncl00> +// CHECK6-NEXT: 4100006: 00 f0 06 f8 bl 0x4100016 <__Thumbv7ABSLongThunk_tfunch00> // CHECK6: <__Thumbv7ABSLongThunk_tfuncl00>: // CHECK6-NEXT: 410000c: 40 f2 01 0c movw r12, #1 // CHECK6-NEXT: 4100010: c0 f2 20 0c movt r12, #32 diff --git a/lld/test/ELF/arm-thunk-linkerscript-orphan.s b/lld/test/ELF/arm-thunk-linkerscript-orphan.s index 103e2c66c98f5..b9d52edeb3cf5 100644 --- a/lld/test/ELF/arm-thunk-linkerscript-orphan.s +++ b/lld/test/ELF/arm-thunk-linkerscript-orphan.s @@ -22,8 +22,8 @@ low_target: // CHECK-NEXT: <_start>: // CHECK-NEXT: 100000: 70 47 bx lr // CHECK: : -// CHECK-NEXT: 100002: 00 f0 03 f8 bl #6 -// CHECK-NEXT: 100006: 00 f0 06 f8 bl #12 +// CHECK-NEXT: 100002: 00 f0 03 f8 bl 0x10000c <__Thumbv7ABSLongThunk_high_target> +// CHECK-NEXT: 100006: 00 f0 06 f8 bl 0x100016 <__Thumbv7ABSLongThunk_orphan_target> // CHECK: <__Thumbv7ABSLongThunk_high_target>: // CHECK-NEXT: 10000c: 40 f2 01 0c movw r12, #1 // CHECK-NEXT: 100010: c0 f2 00 2c movt r12, #512 @@ -42,8 +42,8 @@ high_target: // CHECK: Disassembly of section .text_high: // CHECK-EMPTY: // CHECK-NEXT: : -// CHECK-NEXT: 2000000: 00 f0 02 f8 bl #4 -// CHECK-NEXT: 2000004: 00 f0 06 f8 bl #12 +// CHECK-NEXT: 2000000: 00 f0 02 f8 bl 0x2000008 <__Thumbv7ABSLongThunk_low_target> +// CHECK-NEXT: 2000004: 00 f0 06 f8 bl 0x2000014 // CHECK: <__Thumbv7ABSLongThunk_low_target>: // CHECK-NEXT: 2000008: 40 f2 03 0c movw r12, #3 // CHECK-NEXT: 200000c: c0 f2 10 0c movt r12, #16 @@ -59,8 +59,8 @@ orphan_target: // CHECK: Disassembly of section orphan: // CHECK-EMPTY: // CHECK-NEXT: : -// CHECK-NEXT: 2000014: ff f7 f8 ff bl #-16 -// CHECK-NEXT: 2000018: ff f7 f2 ff bl #-28 +// CHECK-NEXT: 2000014: ff f7 f8 ff bl 0x2000008 <__Thumbv7ABSLongThunk_low_target> +// CHECK-NEXT: 2000018: ff f7 f2 ff bl 0x2000000 .data .word 10 diff --git a/lld/test/ELF/arm-thunk-linkerscript-sort.s b/lld/test/ELF/arm-thunk-linkerscript-sort.s index 511ec52d3e4bd..cdb1ee517996d 100644 --- a/lld/test/ELF/arm-thunk-linkerscript-sort.s +++ b/lld/test/ELF/arm-thunk-linkerscript-sort.s @@ -41,7 +41,7 @@ tfunc\suff\(): FUNCTION 16 FUNCTION 15 // CHECK2: <__Thumbv7ABSLongThunk_tfunc31>: -// CHECK2-NEXT: 1000004: ff f3 fc 97 b.w #16777208 +// CHECK2-NEXT: 1000004: ff f3 fc 97 b.w 0x2000000 FUNCTION 14 FUNCTION 13 FUNCTION 12 @@ -65,5 +65,5 @@ _start: bl tfunc01 bl tfunc31 // CHECK1: <_start>: -// CHECK1-NEXT: 100000: ff f0 fe ff bl #1048572 -// CHECK1-NEXT: 100004: ff f2 fe d7 bl #15728636 +// CHECK1-NEXT: 100000: ff f0 fe ff bl 0x200000 +// CHECK1-NEXT: 100004: ff f2 fe d7 bl 0x1000004 <__Thumbv7ABSLongThunk_tfunc31> diff --git a/lld/test/ELF/arm-thunk-linkerscript.s b/lld/test/ELF/arm-thunk-linkerscript.s index a08aadc795e75..0536657283a8d 100644 --- a/lld/test/ELF/arm-thunk-linkerscript.s +++ b/lld/test/ELF/arm-thunk-linkerscript.s @@ -32,8 +32,8 @@ low_target2: // CHECK-NEXT: <_start>: // CHECK-NEXT: 94: 70 47 bx lr // CHECK: : -// CHECK-NEXT: 96: 00 f0 03 f8 bl #6 -// CHECK-NEXT: 9a: 00 f0 06 f8 bl #12 +// CHECK-NEXT: 96: 00 f0 03 f8 bl 0xa0 <__Thumbv7ABSLongThunk_high_target> +// CHECK-NEXT: 9a: 00 f0 06 f8 bl 0xaa <__Thumbv7ABSLongThunk_high_target2> // CHECK: <__Thumbv7ABSLongThunk_high_target>: // CHECK-NEXT: a0: 40 f2 01 0c movw r12, #1 // CHECK-NEXT: a4: c0 f2 00 2c movt r12, #512 @@ -43,8 +43,8 @@ low_target2: // CHECK-NEXT: ae: c0 f2 00 2c movt r12, #512 // CHECK-NEXT: b2: 60 47 bx r12 // CHECK: : -// CHECK-NEXT: b4: ff f7 f4 ff bl #-24 -// CHECK-NEXT: b8: ff f7 f7 ff bl #-18 +// CHECK-NEXT: b4: ff f7 f4 ff bl 0xa0 <__Thumbv7ABSLongThunk_high_target> +// CHECK-NEXT: b8: ff f7 f7 ff bl 0xaa <__Thumbv7ABSLongThunk_high_target2> .section .text_high, "ax", %progbits .thumb @@ -65,8 +65,8 @@ high_target2: // CHECK: Disassembly of section .text_high: // CHECK-EMPTY: // CHECK-NEXT: : -// CHECK-NEXT: 2000000: 00 f0 02 f8 bl #4 -// CHECK-NEXT: 2000004: 00 f0 05 f8 bl #10 +// CHECK-NEXT: 2000000: 00 f0 02 f8 bl 0x2000008 <__Thumbv7ABSLongThunk_low_target> +// CHECK-NEXT: 2000004: 00 f0 05 f8 bl 0x2000012 <__Thumbv7ABSLongThunk_low_target2> // CHECK: <__Thumbv7ABSLongThunk_low_target>: // CHECK-NEXT: 2000008: 40 f2 97 0c movw r12, #151 // CHECK-NEXT: 200000c: c0 f2 00 0c movt r12, #0 @@ -76,5 +76,5 @@ high_target2: // CHECK-NEXT: 2000016: c0 f2 00 0c movt r12, #0 // CHECK-NEXT: 200001a: 60 47 bx r12 // CHECK: : -// CHECK-NEXT: 200001c: ff f7 f4 ff bl #-24 -// CHECK-NEXT: 2000020: ff f7 f7 ff bl #-18 +// CHECK-NEXT: 200001c: ff f7 f4 ff bl 0x2000008 <__Thumbv7ABSLongThunk_low_target> +// CHECK-NEXT: 2000020: ff f7 f7 ff bl 0x2000012 <__Thumbv7ABSLongThunk_low_target2> diff --git a/lld/test/ELF/arm-thunk-many-passes.s b/lld/test/ELF/arm-thunk-many-passes.s index 9e8f428bb01f8..b4c35c8da27fa 100644 --- a/lld/test/ELF/arm-thunk-many-passes.s +++ b/lld/test/ELF/arm-thunk-many-passes.s @@ -35,24 +35,24 @@ // CHECK-ELF-NEXT: Value: 0x101104C // CHECK: 00011000 <_start>: -// CHECK-NEXT: 11000: b.w #14680132 <__Thumbv7ABSLongThunk_f2> -// CHECK-NEXT: 11004: b.w #14680128 <__Thumbv7ABSLongThunk_f2> -// CHECK-NEXT: 11008: b.w #14680128 <__Thumbv7ABSLongThunk_f3> -// CHECK-NEXT: 1100c: b.w #14680124 <__Thumbv7ABSLongThunk_f3> -// CHECK-NEXT: 11010: b.w #14680124 <__Thumbv7ABSLongThunk_f4> -// CHECK-NEXT: 11014: b.w #14680120 <__Thumbv7ABSLongThunk_f4> -// CHECK-NEXT: 11018: b.w #14680120 <__Thumbv7ABSLongThunk_f5> -// CHECK-NEXT: 1101c: b.w #14680116 <__Thumbv7ABSLongThunk_f5> -// CHECK-NEXT: 11020: b.w #14680116 <__Thumbv7ABSLongThunk_f6> -// CHECK-NEXT: 11024: b.w #14680112 <__Thumbv7ABSLongThunk_f6> -// CHECK-NEXT: 11028: b.w #14680112 <__Thumbv7ABSLongThunk_f7> -// CHECK-NEXT: 1102c: b.w #14680108 <__Thumbv7ABSLongThunk_f7> -// CHECK-NEXT: 11030: b.w #14680108 <__Thumbv7ABSLongThunk_f8> -// CHECK-NEXT: 11034: b.w #14680104 <__Thumbv7ABSLongThunk_f8> -// CHECK-NEXT: 11038: b.w #14680104 <__Thumbv7ABSLongThunk_f9> -// CHECK-NEXT: 1103c: b.w #14680100 <__Thumbv7ABSLongThunk_f9> -// CHECK-NEXT: 11040: b.w #14680100 <__Thumbv7ABSLongThunk_f10> -// CHECK-NEXT: 11044: b.w #14680096 <__Thumbv7ABSLongThunk_f10> +// CHECK-NEXT: 11000: b.w 0xe11048 <__Thumbv7ABSLongThunk_f2> +// CHECK-NEXT: 11004: b.w 0xe11048 <__Thumbv7ABSLongThunk_f2> +// CHECK-NEXT: 11008: b.w 0xe1104c <__Thumbv7ABSLongThunk_f3> +// CHECK-NEXT: 1100c: b.w 0xe1104c <__Thumbv7ABSLongThunk_f3> +// CHECK-NEXT: 11010: b.w 0xe11050 <__Thumbv7ABSLongThunk_f4> +// CHECK-NEXT: 11014: b.w 0xe11050 <__Thumbv7ABSLongThunk_f4> +// CHECK-NEXT: 11018: b.w 0xe11054 <__Thumbv7ABSLongThunk_f5> +// CHECK-NEXT: 1101c: b.w 0xe11054 <__Thumbv7ABSLongThunk_f5> +// CHECK-NEXT: 11020: b.w 0xe11058 <__Thumbv7ABSLongThunk_f6> +// CHECK-NEXT: 11024: b.w 0xe11058 <__Thumbv7ABSLongThunk_f6> +// CHECK-NEXT: 11028: b.w 0xe1105c <__Thumbv7ABSLongThunk_f7> +// CHECK-NEXT: 1102c: b.w 0xe1105c <__Thumbv7ABSLongThunk_f7> +// CHECK-NEXT: 11030: b.w 0xe11060 <__Thumbv7ABSLongThunk_f8> +// CHECK-NEXT: 11034: b.w 0xe11060 <__Thumbv7ABSLongThunk_f8> +// CHECK-NEXT: 11038: b.w 0xe11064 <__Thumbv7ABSLongThunk_f9> +// CHECK-NEXT: 1103c: b.w 0xe11064 <__Thumbv7ABSLongThunk_f9> +// CHECK-NEXT: 11040: b.w 0xe11068 <__Thumbv7ABSLongThunk_f10> +// CHECK-NEXT: 11044: b.w 0xe11068 <__Thumbv7ABSLongThunk_f10> .thumb diff --git a/lld/test/ELF/arm-thunk-multipass-plt.s b/lld/test/ELF/arm-thunk-multipass-plt.s index 328a85d0e07a0..d1cd7b8e1093e 100644 --- a/lld/test/ELF/arm-thunk-multipass-plt.s +++ b/lld/test/ELF/arm-thunk-multipass-plt.s @@ -41,9 +41,9 @@ needsplt: .section .text.07, "ax", %progbits .space (1024 * 1024) -/// 0x70000c + 8 + 0x60002c = 0xd00040 = preemptible@plt +/// 0xd00040 = preemptible@plt // CHECK: 0070000c <__ARMV5PILongThunk_preemptible>: -// CHECK-NEXT: 70000c: b #6291500 +// CHECK-NEXT: 70000c: b 0xd00040 .section .text.08, "ax", %progbits .space (1024 * 1024) - 4 @@ -52,8 +52,7 @@ needsplt: .balign 2 bl preemptible bl preemptible2 -/// 0x80000c + 4 - 100004 = 0x70000c = __ARMv5PILongThunk_preemptible -// CHECK-CALL: 80000c: blx #-1048580 +// CHECK-CALL: 80000c: blx 0x70000c <__ARMV5PILongThunk_preemptible> .balign 2 .globl preemptible .type preemptible, %function diff --git a/lld/test/ELF/arm-thunk-multipass.s b/lld/test/ELF/arm-thunk-multipass.s index 5e8d9a239942f..3e82b565ce2db 100644 --- a/lld/test/ELF/arm-thunk-multipass.s +++ b/lld/test/ELF/arm-thunk-multipass.s @@ -27,8 +27,8 @@ _start: b.w arm_target // arm_target is in range but needs an interworking thunk // CHECK1: <_start>: -// CHECK1-NEXT: 100002: 00 f3 06 d0 bl #15728652 -// CHECK1-NEXT: 100006: ff f2 ff 97 b.w #15728638 <__Thumbv7ABSLongThunk_arm_target> +// CHECK1-NEXT: 100002: 00 f3 06 d0 bl 0x1000012 <__Thumbv7ABSLongThunk_target> +// CHECK1-NEXT: 100006: ff f2 ff 97 b.w 0x1000008 <__Thumbv7ABSLongThunk_arm_target> nop nop nop @@ -64,9 +64,9 @@ arm_target: // CHECK2-NEXT: 100000c: c0 f2 00 1c movt r12, #256 // CHECK2-NEXT: 1000010: 60 47 bx r12 // CHECK2: <__Thumbv7ABSLongThunk_target>: -// CHECK2-NEXT: 1000012: ff f0 ff bf b.w #1048574 +// CHECK2-NEXT: 1000012: ff f0 ff bf b.w 0x1100014 // CHECK2: <__Thumbv7ABSLongThunk_target2>: -// CHECK2-NEXT: 1000016: ff f4 fc 97 b.w #-15728648 +// CHECK2-NEXT: 1000016: ff f4 fc 97 b.w 0x100012 .section .text.17, "ax", %progbits // Just enough space so that bl target is in range if no extension thunks are @@ -86,7 +86,7 @@ target: nop bx lr // CHECK3: : -// CHECK3-NEXT: 1100014: ff f6 ff ff bl #-1048578 +// CHECK3-NEXT: 1100014: ff f6 ff ff bl 0x1000016 <__Thumbv7ABSLongThunk_target2> // CHECK3-NEXT: 1100018: 00 bf nop // CHECK3-NEXT: 110001a: 00 bf nop // CHECK3-NEXT: 110001c: 70 47 bx lr diff --git a/lld/test/ELF/arm-thunk-nosuitable.s b/lld/test/ELF/arm-thunk-nosuitable.s index 1eed54b4fbc29..749b53fe0fbe3 100644 --- a/lld/test/ELF/arm-thunk-nosuitable.s +++ b/lld/test/ELF/arm-thunk-nosuitable.s @@ -20,9 +20,9 @@ _start: bx lr // CHECK: <_start>: -// CHECK-NEXT: 2200b4: 00 f0 00 80 beq.w #0 +// CHECK-NEXT: 2200b4: 00 f0 00 80 beq.w 0x2200b8 <__Thumbv7ABSLongThunk_target> // CHECK: <__Thumbv7ABSLongThunk_target>: -// CHECK-NEXT: 2200b8: 00 f0 01 90 b.w #12582914 +// CHECK-NEXT: 2200b8: 00 f0 01 90 b.w 0xe200be // CHECK: 2200bc: 70 47 bx lr .section .text.2, "ax", %progbits diff --git a/lld/test/ELF/arm-thunk-re-add.s b/lld/test/ELF/arm-thunk-re-add.s index 9d95e916297ba..1c6c3582b9b83 100644 --- a/lld/test/ELF/arm-thunk-re-add.s +++ b/lld/test/ELF/arm-thunk-re-add.s @@ -94,9 +94,9 @@ callers: // CHECK2-NEXT: 1100010: fc 44 add r12, pc // CHECK2-NEXT: 1100012: 60 47 bx r12 // CHECK2: : -// CHECK2-NEXT: 1100014: ff f6 f6 bf b.w #-1048596 <__ThumbV7PILongThunk_imported> -// CHECK2-NEXT: 1100018: 3f f4 f6 af beq.w #-20 <__ThumbV7PILongThunk_imported> -// CHECK2-NEXT: 110001c: ff f6 f8 bf b.w #-1048592 <__ThumbV7PILongThunk_imported2> +// CHECK2-NEXT: 1100014: ff f6 f6 bf b.w 0x1000004 <__ThumbV7PILongThunk_imported> +// CHECK2-NEXT: 1100018: 3f f4 f6 af beq.w 0x1100008 <__ThumbV7PILongThunk_imported> +// CHECK2-NEXT: 110001c: ff f6 f8 bf b.w 0x1000010 <__ThumbV7PILongThunk_imported2> // CHECK3: Disassembly of section .plt: // CHECK3-EMPTY: diff --git a/lld/test/ELF/arm-undefined-weak.s b/lld/test/ELF/arm-undefined-weak.s index 32ab54765947e..8e4ccff7b6c42 100644 --- a/lld/test/ELF/arm-undefined-weak.s +++ b/lld/test/ELF/arm-undefined-weak.s @@ -33,9 +33,9 @@ _start: // CHECK: Disassembly of section .text: // CHECK-EMPTY: // CHECK-NEXT: 100100b4 <_start>: -// CHECK-NEXT: 100100b4: b #-4 -// CHECK-NEXT: 100100b8: bl #-4 -// CHECK-NEXT: 100100bc: bl #-4 +// CHECK-NEXT: 100100b4: b {{.+}} @ imm = #-4 +// CHECK-NEXT: 100100b8: bl {{.+}} @ imm = #-4 +// CHECK-NEXT: 100100bc: bl {{.+}} @ imm = #-4 // CHECK-NEXT: 100100c0: movt r0, #0 // CHECK-NEXT: 100100c4: movw r0, #0 // CHECK: 100100c8: 00 00 00 00 .word 0x00000000 diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp index aa0828ea0c02c..15bbc7d503184 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -348,6 +349,20 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } +void ARMInstPrinter::printOperand(const MCInst *MI, uint64_t Address, + unsigned OpNum, const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + if (!Op.isImm() || !PrintBranchImmAsAddress || getUseMarkup()) + return printOperand(MI, OpNum, STI, O); + uint64_t Target = ARM_MC::evaluateBranchTarget(MII.get(MI->getOpcode()), + Address, Op.getImm()); + Target &= 0xffffffff; + O << formatHex(Target); + if (CommentStream) + *CommentStream << "imm = #" << formatImm(Op.getImm()) << '\n'; +} + void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h index 0c686e434197c..aab5e13545c19 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h @@ -44,10 +44,8 @@ class ARMInstPrinter : public MCInstPrinter { void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, - const MCSubtargetInfo &STI, raw_ostream &O) { - printOperand(MI, OpNum, STI, O); - } + void printOperand(const MCInst *MI, uint64_t Address, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printSORegRegOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 3fad668a89a44..87cce08b1ce4f 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -195,6 +195,24 @@ bool ARM_MC::isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII) { return false; } +uint64_t ARM_MC::evaluateBranchTarget(const MCInstrDesc &InstDesc, + uint64_t Addr, int64_t Imm) { + // For ARM instructions the PC offset is 8 bytes, for Thumb instructions it + // is 4 bytes. + uint64_t Offset = + ((InstDesc.TSFlags & ARMII::FormMask) == ARMII::ThumbFrm) ? 4 : 8; + + // A Thumb instruction BLX(i) can be 16-bit aligned while targets Arm code + // which is 32-bit aligned. The target address for the case is calculated as + // targetAddress = Align(PC,4) + imm32; + // where + // Align(x, y) = y * (x DIV y); + if (InstDesc.getOpcode() == ARM::tBLXi) + Addr &= ~0x3; + + return Addr + Imm + Offset; +} + MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU); @@ -413,32 +431,15 @@ class ARMMCInstrAnalysis : public MCInstrAnalysis { const MCInstrDesc &Desc = Info->get(Inst.getOpcode()); // Find the PC-relative immediate operand in the instruction. - bool FoundImm = false; - int64_t Imm; for (unsigned OpNum = 0; OpNum < Desc.getNumOperands(); ++OpNum) { if (Inst.getOperand(OpNum).isImm() && Desc.OpInfo[OpNum].OperandType == MCOI::OPERAND_PCREL) { - Imm = Inst.getOperand(OpNum).getImm(); - FoundImm = true; + int64_t Imm = Inst.getOperand(OpNum).getImm(); + Target = ARM_MC::evaluateBranchTarget(Desc, Addr, Imm); + return true; } } - if (!FoundImm) - return false; - - // For ARM instructions the PC offset is 8 bytes, for Thumb instructions it - // is 4 bytes. - uint64_t Offset = ((Desc.TSFlags & ARMII::FormMask) == ARMII::ThumbFrm) ? 4 : 8; - - // A Thumb instruction BLX(i) can be 16-bit aligned while targets Arm code - // which is 32-bit aligned. The target address for the case is calculated as - // targetAddress = Align(PC,4) + imm32; - // where - // Align(x, y) = y * (x DIV y); - if (Inst.getOpcode() == ARM::tBLXi) - Addr &= ~0x3; - - Target = Addr + Imm + Offset; - return true; + return false; } }; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index f08d78b670786..7ccdc6f855007 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -57,6 +57,9 @@ bool isLDMBaseRegInList(const Inst &MI) { return false; } +uint64_t evaluateBranchTarget(const MCInstrDesc &InstDesc, uint64_t Addr, + int64_t Imm); + /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc. /// do not need to go through TargetRegistry. MCSubtargetInfo *createARMMCSubtargetInfo(const Triple &TT, StringRef CPU, diff --git a/llvm/test/CodeGen/ARM/Windows/division-range.ll b/llvm/test/CodeGen/ARM/Windows/division-range.ll index c341042ad0b60..699346e1f34bc 100644 --- a/llvm/test/CodeGen/ARM/Windows/division-range.ll +++ b/llvm/test/CodeGen/ARM/Windows/division-range.ll @@ -10,6 +10,5 @@ entry: } ; CHECK: cmp r1, #0 -; CHECK: beq # +; CHECK: beq {{.+}} @ imm = # ; CHECK: bl - diff --git a/llvm/test/MC/ARM/arm-macho-calls.s b/llvm/test/MC/ARM/arm-macho-calls.s index 9b0a564ce260d..f3f96ebb65941 100644 --- a/llvm/test/MC/ARM/arm-macho-calls.s +++ b/llvm/test/MC/ARM/arm-macho-calls.s @@ -2,9 +2,9 @@ @ RUN: llvm-objdump -d -r %t | FileCheck %s @ CHECK: <_func>: -@ CHECK: bl #0 <_func+0x8> +@ CHECK: bl 0x8 <_func+0x8> @ imm = #0 @ CHECK: ARM_RELOC_BR24 __text -@ CHECK: bl #-12 <_func> +@ CHECK: bl 0x0 <_func> @ imm = #-12 @ CHECK: ARM_RELOC_BR24 _elsewhere .global _func _func: diff --git a/llvm/test/MC/ARM/branch-disassemble.s b/llvm/test/MC/ARM/branch-disassemble.s index 33fedc3bb685c..c6cb4ffa5edc9 100644 --- a/llvm/test/MC/ARM/branch-disassemble.s +++ b/llvm/test/MC/ARM/branch-disassemble.s @@ -7,8 +7,8 @@ @ RUN: | FileCheck %s -check-prefix CHECK-THUMB b.w .Lbranch -@ CHECK-ARM: b #4 <$a.0+0xc> -@ CHECK-THUMB: b.w #8 <$t.0+0xc> +@ CHECK-ARM: b 0xc <$a.0+0xc> @ imm = #4 +@ CHECK-THUMB: b.w 0xc <$t.0+0xc> @ imm = #8 adds r0, r1, #42 adds r1, r2, #42 .Lbranch: diff --git a/llvm/test/MC/ARM/coff-relocations.s b/llvm/test/MC/ARM/coff-relocations.s index 32a95f973c0f2..08e3cd9aefa2f 100644 --- a/llvm/test/MC/ARM/coff-relocations.s +++ b/llvm/test/MC/ARM/coff-relocations.s @@ -18,28 +18,28 @@ branch24t_0: b target @ CHECK-ENCODING-LABEL: : -@ CHECK-ENCODING-NEXT: b.w #0 +@ CHECK-ENCODING-NEXT: b.w {{.+}} @ imm = #0 .thumb_func branch24t_1: bl target @ CHECK-ENCODING-LABEL: : -@ CHECK-ENCODING-NEXR: bl #0 +@ CHECK-ENCODING-NEXR: bl {{.+}} @ imm = #0 .thumb_func branch20t: bcc target @ CHECK-ENCODING-LABEL: : -@ CHECK-ENCODING-NEXT: blo.w #0 +@ CHECK-ENCODING-NEXT: blo.w {{.+}} @ imm = #0 .thumb_func blx23t: blx target @ CHECK-ENCODING-LABEL: : -@ CHECK-ENCODING-NEXT: blx #0 +@ CHECK-ENCODING-NEXT: blx {{.+}} @ imm = #0 .thumb_func mov32t: diff --git a/llvm/test/MC/ARM/thumb-cb-thumbfunc.s b/llvm/test/MC/ARM/thumb-cb-thumbfunc.s index faff72e252e53..d59918ca35d3c 100644 --- a/llvm/test/MC/ARM/thumb-cb-thumbfunc.s +++ b/llvm/test/MC/ARM/thumb-cb-thumbfunc.s @@ -1,7 +1,7 @@ @ RUN: llvm-mc -triple thumbv7-apple-macho -filetype=obj -o %t %s @ RUN: llvm-objdump -d --triple=thumbv7 %t | FileCheck %s -@ CHECK: cbnz r0, #0 +@ CHECK: cbnz r0, 0x4 @ imm = #0 .thumb_func label4 cbnz r0, label4 .space 2 diff --git a/llvm/test/MC/ARM/thumb1-relax-bcc.s b/llvm/test/MC/ARM/thumb1-relax-bcc.s index d746c92a5f4d0..b2de0a344bddd 100644 --- a/llvm/test/MC/ARM/thumb1-relax-bcc.s +++ b/llvm/test/MC/ARM/thumb1-relax-bcc.s @@ -8,5 +8,5 @@ _func1: bne _func2 @ CHECK-ERROR: unsupported relocation on symbol -@ CHECK-ELF: 7f f4 fe af bne.w #-4 +@ CHECK-ELF: 7f f4 fe af bne.w {{.+}} @ imm = #-4 @ CHECK-ELF-NEXT: R_ARM_THM_JUMP19 _func2 diff --git a/llvm/test/MC/ARM/thumb1-relax-br.s b/llvm/test/MC/ARM/thumb1-relax-br.s index 550737e80c5dc..4c0ecb33ff33a 100644 --- a/llvm/test/MC/ARM/thumb1-relax-br.s +++ b/llvm/test/MC/ARM/thumb1-relax-br.s @@ -12,8 +12,8 @@ _func1: @ CHECK-ERROR: unsupported relocation on symbol -@ CHECK-MACHO: ff f7 fe bf b.w #-4 +@ CHECK-MACHO: ff f7 fe bf b.w {{.+}} @ imm = #-4 @ CHECK-MACHO-NEXT: ARM_THUMB_RELOC_BR22 -@ CHECK-ELF: ff f7 fe bf b.w #-4 +@ CHECK-ELF: ff f7 fe bf b.w {{.+}} @ imm = #-4 @ CHECK-ELF-NEXT: R_ARM_THM_JUMP24 _func2 diff --git a/llvm/test/MC/ARM/thumb2-b.w-target.s b/llvm/test/MC/ARM/thumb2-b.w-target.s index 1323730552f0a..7ae1a3beaac3c 100644 --- a/llvm/test/MC/ARM/thumb2-b.w-target.s +++ b/llvm/test/MC/ARM/thumb2-b.w-target.s @@ -3,8 +3,8 @@ .syntax unified // CHECK-LABEL: start -// CHECK-NEXT: b.w #16777208 -// CHECK-NEXT: b.w #2 +// CHECK-NEXT: b.w {{.+}} @ imm = #16777208 +// CHECK-NEXT: b.w {{.+}} @ imm = #2 start: b.w start - 1f + 0x1000000 1: diff --git a/llvm/test/MC/ARM/thumb2-cbn-to-next-inst.s b/llvm/test/MC/ARM/thumb2-cbn-to-next-inst.s index 52ceafea27de0..12b6b57aac22c 100644 --- a/llvm/test/MC/ARM/thumb2-cbn-to-next-inst.s +++ b/llvm/test/MC/ARM/thumb2-cbn-to-next-inst.s @@ -23,11 +23,11 @@ L4: @ CHECK: 0: 02 eb 03 01 add.w r1, r2, r3 @ CHECK: 4: 00 bf nop @ CHECK: 6: 05 eb 06 04 add.w r4, r5, r6 -@ CHECK: a: 0a b9 cbnz r2, #2 +@ CHECK: a: 0a b9 cbnz r2, 0x10 @ imm = #2 @ CHECK: c: a8 eb 09 07 sub.w r7, r8, r9 @ CHECK: 10: 08 eb 09 07 add.w r7, r8, r9 @ CHECK: 14: 00 bf nop @ CHECK: 16: 0b eb 0c 0a add.w r10, r11, r12 -@ CHECK: 1a: 0a b1 cbz r2, #2 +@ CHECK: 1a: 0a b1 cbz r2, 0x20 @ imm = #2 @ CHECK: 1c: a8 eb 09 07 sub.w r7, r8, r9 @ CHECK: 20: 04 eb 05 03 add.w r3, r4, r5 diff --git a/llvm/test/MC/Disassembler/ARM/mve-lol.txt b/llvm/test/MC/Disassembler/ARM/mve-lol.txt index 1646b7f252364..99cbf5079b8c0 100644 --- a/llvm/test/MC/Disassembler/ARM/mve-lol.txt +++ b/llvm/test/MC/Disassembler/ARM/mve-lol.txt @@ -5,45 +5,45 @@ # checking that we see branch targets annotations like <$t.0+0xc> in the # disassembly. -# CHECK: wls lr, r3, #8 <$t.0+0xc> +# CHECK: wls lr, r3, 0xc <$t.0+0xc> @ imm = #8 # CHECK: vmov q0, q1 -# CHECK: le lr, #-8 <$t.0+0x4> +# CHECK: le lr, 0x4 <$t.0+0x4> @ imm = #-8 wls lr, r3, #8 vmov q0, q1 le lr, #-8 -# CHECK: wlstp.8 lr, r3, #8 <$t.0+0x18> +# CHECK: wlstp.8 lr, r3, 0x18 <$t.0+0x18> @ imm = #8 # CHECK: vmov q0, q1 -# CHECK: letp lr, #-8 <$t.0+0x10> +# CHECK: letp lr, 0x10 <$t.0+0x10> @ imm = #-8 wlstp.8 lr, r3, #8 vmov q0, q1 letp lr, #-8 -# CHECK: wlstp.16 lr, r3, #8 <$t.0+0x24> +# CHECK: wlstp.16 lr, r3, 0x24 <$t.0+0x24> @ imm = #8 # CHECK: vmov q0, q1 -# CHECK: letp lr, #-8 <$t.0+0x1c> +# CHECK: letp lr, 0x1c <$t.0+0x1c> @ imm = #-8 wlstp.16 lr, r3, #8 vmov q0, q1 letp lr, #-8 -# CHECK: wlstp.32 lr, r3, #8 <$t.0+0x30> +# CHECK: wlstp.32 lr, r3, 0x30 <$t.0+0x30> @ imm = #8 # CHECK: vmov q0, q1 -# CHECK: letp lr, #-8 <$t.0+0x28> +# CHECK: letp lr, 0x28 <$t.0+0x28> @ imm = #-8 wlstp.32 lr, r3, #8 vmov q0, q1 letp lr, #-8 -# CHECK: wlstp.64 lr, r3, #8 <$t.0+0x3c> +# CHECK: wlstp.64 lr, r3, 0x3c <$t.0+0x3c> @ imm = #8 # CHECK: vmov q0, q1 -# CHECK: letp lr, #-8 <$t.0+0x34> +# CHECK: letp lr, 0x34 <$t.0+0x34> @ imm = #-8 wlstp.64 lr, r3, #8 vmov q0, q1 diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/branch-symbols.s b/llvm/test/tools/llvm-objdump/ELF/ARM/branch-symbols.s index d967c21ae0488..c2c46c54ccdf8 100644 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/branch-symbols.s +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/branch-symbols.s @@ -7,8 +7,8 @@ foo: .arm b foo ble foo -@ CHECK: 0: b #-8 -@ CHECK: 4: ble #-12 +@ CHECK: 0: b 0x0 @ imm = #-8 +@ CHECK: 4: ble 0x0 @ imm = #-12 .thumb b foo @@ -19,29 +19,29 @@ foo: le lr, foo cbz r0, bar cbnz r0, bar -@ CHECK: 8: b #-12 -@ CHECK: a: b.w #-14 -@ CHECK: e: ble #-18 -@ CHECK: 10: ble.w #-20 -@ CHECK: 14: le #-24 -@ CHECK: 18: le lr, #-28 -@ CHECK: 1c: cbz r0, #40 -@ CHECK: 1e: cbnz r0, #38 +@ CHECK: 8: b 0x0 @ imm = #-12 +@ CHECK: a: b.w 0x0 @ imm = #-14 +@ CHECK: e: ble 0x0 @ imm = #-18 +@ CHECK: 10: ble.w 0x0 @ imm = #-20 +@ CHECK: 14: le 0x0 @ imm = #-24 +@ CHECK: 18: le lr, 0x0 @ imm = #-28 +@ CHECK: 1c: cbz r0, 0x48 @ imm = #40 +@ CHECK: 1e: cbnz r0, 0x48 @ imm = #38 // Calls without relocations (these offsets al correspond to label foo). .arm bl #-40 blx #-44 bleq #-48 -@ CHECK: 20: bl #-40 -@ CHECK: 24: blx #-44 -@ CHECK: 28: bleq #-48 +@ CHECK: 20: bl 0x0 @ imm = #-40 +@ CHECK: 24: blx 0x0 @ imm = #-44 +@ CHECK: 28: bleq 0x0 @ imm = #-48 .thumb bl #-48 blx #-52 -@ CHECK: 2c: bl #-48 -@ CHECK: 30: blx #-52 +@ CHECK: 2c: bl 0x0 @ imm = #-48 +@ CHECK: 30: blx 0x0 @ imm = #-52 // Calls with relocations. These currently emit a reference to their own // location, because we don't take relocations into account when printing @@ -50,21 +50,19 @@ foo: bl baz blx baz bleq baz -@ CHECK: 34: bl #-8 <$a.4> +@ CHECK: 34: bl {{.+}} @ imm = #-8 @ CHECK: 00000034: R_ARM_CALL baz -@ CHECK: 38: blx #-8 <$a.4+0x4> +@ CHECK: 38: blx {{.+}} @ imm = #-8 @ CHECK: 00000038: R_ARM_CALL baz -@ CHECK: 3c: bleq #-8 <$a.4+0x8> +@ CHECK: 3c: bleq {{.+}} @ imm = #-8 @ CHECK: 0000003c: R_ARM_JUMP24 baz .thumb bl baz blx baz -@ CHECK: 40: bl #-4 <$t.5> -@ CHECK: 00000040: R_ARM_THM_CALL baz -@ CHECK: 44: blx #-4 <$t.5+0x4> -@ CHECK: 00000044: R_ARM_THM_CALL baz +@ CHECK: 40: bl {{.+}} @ imm = #-4 +@ CHECK: 00000040: R_ARM_THM_CALL baz +@ CHECK: 44: blx {{.+}} @ imm = #-4 +@ CHECK: 00000044: R_ARM_THM_CALL baz bar: - - diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/invalid-instruction.s b/llvm/test/tools/llvm-objdump/ELF/ARM/invalid-instruction.s index a63300cadffea..2e6364514cf6a 100644 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/invalid-instruction.s +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/invalid-instruction.s @@ -5,5 +5,5 @@ .inst 0xffffffff l0: -@CHECK: 0: 00 00 00 ea b #0 +@CHECK: 0: 00 00 00 ea b 0x8 @ imm = #0 @CHECK-NEXT: 4: ff ff ff ff diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/tblxi-target.s b/llvm/test/tools/llvm-objdump/ELF/ARM/tblxi-target.s index 096c1a3a24026..ef6b87827838b 100644 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/tblxi-target.s +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/tblxi-target.s @@ -5,10 +5,12 @@ # RUN: llvm-objdump -dr - --triple armv8a --no-show-raw-insn | \ # RUN: FileCheck %s -# CHECK: : +# CHECK: 00000000 : +# CHECK: 00000004 : # CHECK-NEXT: 4: nop -# CHECK-NEXT: 6: blx #-8 -# CHECK-NEXT: a: blx #4 +# CHECK-NEXT: 6: blx 0x0 @ imm = #-8 +# CHECK-NEXT: a: blx 0x10 @ imm = #4 +# CHECK: 00000010 : .arm foo: diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/unknown-instr.test b/llvm/test/tools/llvm-objdump/ELF/ARM/unknown-instr.test index cb990b85dfce5..77972f9107969 100644 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/unknown-instr.test +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/unknown-instr.test @@ -14,7 +14,7 @@ # CHECK: 00000000 <.text>: # CHECK-NEXT: 0: cb -# CHECK-NEXT: 1: f3 f7 8b be b.w #-49898 +# CHECK-NEXT: 1: f3 f7 8b be b.w 0xffff3d1b <{{.+}}> @ imm = #-49898 --- !ELF FileHeader: From c818cb96ad4aa65bceadc72199677c852e8c22bd Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Wed, 30 Jun 2021 12:49:31 +0300 Subject: [PATCH 268/619] [analyzer][satest][NFC] Relax dependencies requirements --- clang/utils/analyzer/Dockerfile | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/utils/analyzer/Dockerfile b/clang/utils/analyzer/Dockerfile index f74ff8aa95c25..bb1dd60eeb9b8 100644 --- a/clang/utils/analyzer/Dockerfile +++ b/clang/utils/analyzer/Dockerfile @@ -13,16 +13,16 @@ RUN apt-add-repository -y 'deb https://apt.kitware.com/ubuntu/ bionic main' # test system dependencies RUN apt-get update && apt-get install -y \ - git=1:2.17.1-1ubuntu0.7 \ - gettext=0.19.8.1-6ubuntu0.3 \ + git=1:2.17.1* \ + gettext=0.19.8.1* \ python3=3.6.7-1~18.04 \ - python3-pip=9.0.1-2.3~ubuntu1.18.04.1 \ - cmake=3.17.3-0kitware1 \ + python3-pip=9.0.1-2.3* \ + cmake=3.20.5* \ ninja-build=1.8.2-1 # box2d dependencies RUN apt-get install -y \ - libx11-dev=2:1.6.4-3ubuntu0.2 \ + libx11-dev=2:1.6.4-3* \ libxrandr-dev=2:1.5.1-1 \ libxinerama-dev=2:1.1.3-1 \ libxcursor-dev=1:1.1.15-1 \ @@ -35,22 +35,22 @@ RUN apt-get install -y \ # simbody dependencies RUN apt-get install -y \ - liblapack-dev=3.7.1-4ubuntu1 + liblapack-dev=3.7.1-4* # drogon dependencies RUN apt-get install -y \ - libjsonrpccpp-dev=0.7.0-1build2 \ - uuid-dev=2.31.1-0.4ubuntu3.6 + libjsonrpccpp-dev=0.7.0-1* \ + uuid-dev=2.31.1-0.4* # tmux dependencies RUN apt-get install -y \ autotools-dev=20180224.1 \ - automake=1:1.15.1-3ubuntu2 \ - libncurses5-dev=6.1-1ubuntu1.18.04 \ - libevent-dev=2.1.8-stable-4build1 \ - pkg-config=0.29.1-0ubuntu2 \ + automake=1:1.15.1-3* \ + libncurses5-dev=6.1-1* \ + libevent-dev=2.1.8* \ + pkg-config=0.29.1-0* \ flex=2.6.4-6 \ - bison=2:3.0.4.dfsg-1build1 + bison=2:3.0.4.* RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 From 2da58826a5e0716163d3b91afcde1218a211d714 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 25 Jun 2021 17:08:36 +0100 Subject: [PATCH 269/619] [TableGen] Allow identical MnemonicAliases with no predicate My use case for this is illustrated in the test case: I want to define the same instruction twice with different (disjoint) predicates, because the instruction has different operands on different subtargets. It's convenient to do this with a multiclass that also defines an alias for the instruction. Previously tablegen would complain if this alias was defined twice with no predicate. One way to fix this would be to add a predicate on each definition of the alias, matching the predicate on the instruction. But this (a) is slightly awkward to do in the real world use case I had, and (b) leads to an inefficient matcher that will do something like this: if (Mnemonic == "foo_alias") { if (Features.test(Feature_Subtarget1Bit)) Mnemonic == "foo"; else if (Features.test(Feature_Subtarget2Bit)) Mnemonic == "foo"; return; } It would be more efficient to skip the feature tests and return "foo" unconditionally. Overall it seems better to allow multiple definitions of the identical alias with no predicate. Differential Revision: https://reviews.llvm.org/D105033 --- llvm/test/TableGen/MnemonicAlias.td | 41 +++++++++++++++++++++++ llvm/utils/TableGen/AsmMatcherEmitter.cpp | 12 ++++--- 2 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 llvm/test/TableGen/MnemonicAlias.td diff --git a/llvm/test/TableGen/MnemonicAlias.td b/llvm/test/TableGen/MnemonicAlias.td new file mode 100644 index 0000000000000..518454a7ca63f --- /dev/null +++ b/llvm/test/TableGen/MnemonicAlias.td @@ -0,0 +1,41 @@ +// RUN: llvm-tblgen -gen-asm-matcher -I %p/../../include %s | FileCheck %s + +include "llvm/Target/Target.td" + +def ArchInstrInfo : InstrInfo { } + +def Arch : Target { + let InstructionSet = ArchInstrInfo; +} + +def Reg : Register<"reg">; +def RegClass : RegisterClass<"foo", [i32], 0, (add Reg)>; + +def AsmCond1 : SubtargetFeature<"cond1", "cond1", "true", "">; +def AsmCond2 : SubtargetFeature<"cond2", "cond2", "true", "">; + +def Subtarget1 : Predicate<"Pred1">, AssemblerPredicate<(all_of AsmCond1)>; +def Subtarget2 : Predicate<"Pred2">, AssemblerPredicate<(all_of AsmCond2)>; + +multiclass DefInstruction { + def "" : Instruction { + let Size = 2; + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = name; + let Predicates = [pred]; + } + def : MnemonicAlias; +} + +defm FooInst1 : DefInstruction<"foo", (outs), (ins), Subtarget1>; + +defm FooInst2 : DefInstruction<"foo", (outs), (ins), Subtarget2>; + +// Check that applyMnemonicAliases maps "foo_alias" to "foo" once only and +// without checking any predicates. + +// CHECK: if (memcmp(Mnemonic.data()+0, "foo_alias", 9) != 0) +// CHECK-NEXT: break; +// CHECK-NEXT: Mnemonic = "foo"; // "foo_alias" +// CHECK-NEXT: return; diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 96159a60c665a..00bdd127e3c28 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -2749,10 +2749,14 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info, // If this unconditionally matches, remember it for later and diagnose // duplicates. if (FeatureMask.empty()) { - if (AliasWithNoPredicate != -1) { - // We can't have two aliases from the same mnemonic with no predicate. - PrintError(ToVec[AliasWithNoPredicate]->getLoc(), - "two MnemonicAliases with the same 'from' mnemonic!"); + if (AliasWithNoPredicate != -1 && + R->getValueAsString("ToMnemonic") != + ToVec[AliasWithNoPredicate]->getValueAsString("ToMnemonic")) { + // We can't have two different aliases from the same mnemonic with no + // predicate. + PrintError( + ToVec[AliasWithNoPredicate]->getLoc(), + "two different MnemonicAliases with the same 'from' mnemonic!"); PrintFatalError(R->getLoc(), "this is the other MnemonicAlias."); } From a24f104645f3faf4d0e4ea6047f933caddb85de5 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Tue, 29 Jun 2021 20:11:41 +0100 Subject: [PATCH 270/619] [MTE] Remove redundant helper function. Looking at PostDominatorTree::dominates, we can see that has the same logic (with the addition of handling Phi nodes - which are not used as inputs in this pass) as the helper function. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D105141 --- .../Target/AArch64/AArch64StackTagging.cpp | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp index 33224c658c805..7008b188e1f1c 100644 --- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp +++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp @@ -520,24 +520,6 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) { Info.AI = NewAI; } -// Helper function to check for post-dominance. -static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A, - const IntrinsicInst *B) { - const BasicBlock *ABB = A->getParent(); - const BasicBlock *BBB = B->getParent(); - - if (ABB != BBB) - return PDT->dominates(ABB, BBB); - - for (const Instruction &I : *ABB) { - if (&I == B) - return true; - if (&I == A) - return false; - } - llvm_unreachable("Corrupt instruction list"); -} - // FIXME: check for MTE extension bool AArch64StackTagging::runOnFunction(Function &Fn) { if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag)) @@ -666,7 +648,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) { tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size); // We need to ensure that if we tag some object, we certainly untag it // before the function exits. - if (PDT != nullptr && postDominates(PDT, End, Start)) { + if (PDT != nullptr && PDT->dominates(End, Start)) { untagAlloca(AI, End, Size); } else { SmallVector ReachableRetVec; From 7b7b5b5a2669ff40882c1a5e1d92a75e15b925c4 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 29 Jun 2021 16:12:17 +0100 Subject: [PATCH 271/619] [NFC] Rename shadowed variable in InnerLoopVectorizer::createInductionVariable Avoid creating a IRBuilder stack variable with the same name as the class member. --- .../Transforms/Vectorize/LoopVectorize.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 38a55d1281412..57abd0d26f5b5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3071,13 +3071,13 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, if (!Latch) Latch = Header; - IRBuilder<> Builder(&*Header->getFirstInsertionPt()); + IRBuilder<> B(&*Header->getFirstInsertionPt()); Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction); - setDebugLocFromInst(Builder, OldInst); - auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index"); + setDebugLocFromInst(B, OldInst); + auto *Induction = B.CreatePHI(Start->getType(), 2, "index"); - Builder.SetInsertPoint(Latch->getTerminator()); - setDebugLocFromInst(Builder, OldInst); + B.SetInsertPoint(Latch->getTerminator()); + setDebugLocFromInst(B, OldInst); // Create i+1 and fill the PHINode. // @@ -3086,14 +3086,13 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, // Start % Step == 0 and End % Step == 0. We exit the vector loop if %IV + // %Step == %End. Hence we must exit the loop before %IV + %Step unsigned // overflows and we can mark the induction increment as NUW. - Value *Next = - Builder.CreateAdd(Induction, Step, "index.next", - /*NUW=*/!Cost->foldTailByMasking(), /*NSW=*/false); + Value *Next = B.CreateAdd(Induction, Step, "index.next", + /*NUW=*/!Cost->foldTailByMasking(), /*NSW=*/false); Induction->addIncoming(Start, L->getLoopPreheader()); Induction->addIncoming(Next, Latch); // Create the compare. - Value *ICmp = Builder.CreateICmpEQ(Next, End); - Builder.CreateCondBr(ICmp, L->getUniqueExitBlock(), Header); + Value *ICmp = B.CreateICmpEQ(Next, End); + B.CreateCondBr(ICmp, L->getUniqueExitBlock(), Header); // Now we have two terminators. Remove the old one from the block. Latch->getTerminator()->eraseFromParent(); From ad8494c021d711779900bf63f01423f615b413a4 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Mon, 28 Jun 2021 14:19:43 +0100 Subject: [PATCH 272/619] [hwasan] Make sure we retag with a new tag on free. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D105021 --- compiler-rt/lib/hwasan/hwasan_allocator.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/hwasan/hwasan_allocator.cpp b/compiler-rt/lib/hwasan/hwasan_allocator.cpp index 21563be36ebef..e53de53e46d5c 100644 --- a/compiler-rt/lib/hwasan/hwasan_allocator.cpp +++ b/compiler-rt/lib/hwasan/hwasan_allocator.cpp @@ -196,6 +196,7 @@ static void HwasanDeallocate(StackTrace *stack, void *tagged_ptr) { : tagged_ptr; void *aligned_ptr = reinterpret_cast( RoundDownTo(reinterpret_cast(untagged_ptr), kShadowAlignment)); + tag_t pointer_tag = GetTagFromPointer(reinterpret_cast(tagged_ptr)); Metadata *meta = reinterpret_cast(allocator.GetMetaData(aligned_ptr)); uptr orig_size = meta->get_requested_size(); @@ -236,7 +237,8 @@ static void HwasanDeallocate(StackTrace *stack, void *tagged_ptr) { // The tag can be zero if tagging is disabled on this thread. do { tag = t->GenerateRandomTag(/*num_bits=*/8); - } while (UNLIKELY(tag < kShadowAlignment && tag != 0)); + } while ( + UNLIKELY((tag < kShadowAlignment || tag == pointer_tag) && tag != 0)); } else { static_assert(kFallbackFreeTag >= kShadowAlignment, "fallback tag must not be a short granule tag."); From 611a02cce509d304ebf66054b7816f1e0b5a792c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 30 Jun 2021 09:45:50 +0100 Subject: [PATCH 273/619] [ConstantRanges] Use APInt for constant case for urem/srem. Currently UREM & SREM on constant ranges produces overly pessimistic results for single element constant ranges. Delegate to APInt's implementation if both operands are single element constant ranges. We already do something similar for other binary operators, like binary AND. Fixes PR49731. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D105115 --- llvm/lib/IR/ConstantRange.cpp | 18 ++++++++++++ .../SCCP/binaryops-range-special-cases.ll | 28 ++++++------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index b38599fa7d982..0649776dbc22b 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -1221,6 +1221,15 @@ ConstantRange ConstantRange::urem(const ConstantRange &RHS) const { if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue()) return getEmpty(); + if (const APInt *RHSInt = RHS.getSingleElement()) { + // UREM by null is UB. + if (RHSInt->isNullValue()) + return getEmpty(); + // Use APInt's implementation of UREM for single element ranges. + if (const APInt *LHSInt = getSingleElement()) + return {LHSInt->urem(*RHSInt)}; + } + // L % R for L < R is L. if (getUnsignedMax().ult(RHS.getUnsignedMin())) return *this; @@ -1234,6 +1243,15 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const { if (isEmptySet() || RHS.isEmptySet()) return getEmpty(); + if (const APInt *RHSInt = RHS.getSingleElement()) { + // SREM by null is UB. + if (RHSInt->isNullValue()) + return getEmpty(); + // Use APInt's implementation of SREM for single element ranges. + if (const APInt *LHSInt = getSingleElement()) + return {LHSInt->srem(*RHSInt)}; + } + ConstantRange AbsRHS = RHS.abs(); APInt MinAbsRHS = AbsRHS.getUnsignedMin(); APInt MaxAbsRHS = AbsRHS.getUnsignedMax(); diff --git a/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll b/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll index ad6d1e452a6a5..ad4ab37dfc60f 100644 --- a/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll +++ b/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll @@ -98,16 +98,10 @@ bb3: define void @urem_cmp_constants() { ; CHECK-LABEL: @urem_cmp_constants( -; CHECK-NEXT: [[UREM_1:%.*]] = urem i16 12704, 12704 -; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[UREM_1]], 0 -; CHECK-NEXT: call void @use(i1 [[C_1]]) -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[UREM_1]], 1 -; CHECK-NEXT: call void @use(i1 [[C_2]]) -; CHECK-NEXT: [[UREM_2:%.*]] = urem i16 12704, 3 -; CHECK-NEXT: [[C_3:%.*]] = icmp eq i16 [[UREM_2]], 2 -; CHECK-NEXT: call void @use(i1 [[C_3]]) -; CHECK-NEXT: [[C_4:%.*]] = icmp eq i16 [[UREM_2]], 1 -; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[UREM_3:%.*]] = urem i16 12704, 0 ; CHECK-NEXT: [[C_5:%.*]] = icmp eq i16 [[UREM_3]], 1 ; CHECK-NEXT: call void @use(i1 [[C_5]]) @@ -132,16 +126,10 @@ define void @urem_cmp_constants() { define void @srem_cmp_constants() { ; CHECK-LABEL: @srem_cmp_constants( -; CHECK-NEXT: [[SREM_1:%.*]] = srem i16 12704, 12704 -; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[SREM_1]], 0 -; CHECK-NEXT: call void @use(i1 [[C_1]]) -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[SREM_1]], 1 -; CHECK-NEXT: call void @use(i1 [[C_2]]) -; CHECK-NEXT: [[SREM_2:%.*]] = srem i16 12704, 3 -; CHECK-NEXT: [[C_3:%.*]] = icmp eq i16 [[SREM_2]], 2 -; CHECK-NEXT: call void @use(i1 [[C_3]]) -; CHECK-NEXT: [[C_4:%.*]] = icmp eq i16 [[SREM_2]], 1 -; CHECK-NEXT: call void @use(i1 [[C_4]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[SREM_3:%.*]] = srem i16 12704, 0 ; CHECK-NEXT: [[C_5:%.*]] = icmp eq i16 [[SREM_3]], 1 ; CHECK-NEXT: call void @use(i1 [[C_5]]) From a7ed55f64c5fdce9af3257458779402fb9de1f8b Mon Sep 17 00:00:00 2001 From: madhur13490 Date: Fri, 18 Jun 2021 13:44:54 +0530 Subject: [PATCH 274/619] [AMDGPU] Simplify getReservedNumSGPRs This is a followup patch on D103636 where it seemed checking on amdgpu-calls and amdgpu-stack-objects is unnecessary. Removing these checks didn't regress any tests functionally. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D104513 --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 37480ffbf05a4..e67a76eeb4cb2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -821,15 +821,17 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const { // The logic to detect if the function has - // flat scratch init is same as how MachineFunctionInfo derives. + // flat scratch init is slightly different than how + // SIMachineFunctionInfo constructor derives. + // We don't use amdgpu-calls, amdgpu-stack-objects + // attributes and isAmdHsaOrMesa here as it doesn't really matter. + // TODO: Outline this derivation logic and have just + // one common function in the backend to avoid duplication. + bool isEntry = AMDGPU::isEntryFunctionCC(F.getCallingConv()); bool FunctionHasFlatScratchInit = false; - bool HasCalls = F.hasFnAttribute("amdgpu-calls"); - bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); - if (hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(F.getCallingConv()) && - (isAmdHsaOrMesa(F) || enableFlatScratch()) && - !flatScratchIsArchitected()) { - if (HasCalls || HasStackObjects || enableFlatScratch()) - FunctionHasFlatScratchInit = true; + if (hasFlatAddressSpace() && isEntry && !flatScratchIsArchitected() && + enableFlatScratch()) { + FunctionHasFlatScratchInit = true; } return getBaseReservedNumSGPRs(FunctionHasFlatScratchInit); } From 2fd75507d1855300d0a59451337d0a55b081887c Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Wed, 30 Jun 2021 19:46:35 +0800 Subject: [PATCH 275/619] [clang] NFC: add line break at the end of if expressions Hi, In function TransformTemplateArgument, would it be better to add line break at the end of "if" expressions? I use clang-format to do the job for me. Thanks a lot Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D104604 --- clang/lib/Sema/TreeTransform.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 7b9f6a85260f1..70ba631dbfc6c 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4322,10 +4322,10 @@ void TreeTransform::InventTemplateArgumentLoc( Arg, QualType(), getDerived().getBaseLocation()); } -template +template bool TreeTransform::TransformTemplateArgument( - const TemplateArgumentLoc &Input, - TemplateArgumentLoc &Output, bool Uneval) { + const TemplateArgumentLoc &Input, TemplateArgumentLoc &Output, + bool Uneval) { const TemplateArgument &Arg = Input.getArgument(); switch (Arg.getKind()) { case TemplateArgument::Null: @@ -4374,7 +4374,8 @@ bool TreeTransform::TransformTemplateArgument( DI = InventTypeSourceInfo(Input.getArgument().getAsType()); DI = getDerived().TransformType(DI); - if (!DI) return true; + if (!DI) + return true; Output = TemplateArgumentLoc(TemplateArgument(DI->getType()), DI); return false; @@ -4390,9 +4391,8 @@ bool TreeTransform::TransformTemplateArgument( CXXScopeSpec SS; SS.Adopt(QualifierLoc); - TemplateName Template - = getDerived().TransformTemplateName(SS, Arg.getAsTemplate(), - Input.getTemplateNameLoc()); + TemplateName Template = getDerived().TransformTemplateName( + SS, Arg.getAsTemplate(), Input.getTemplateNameLoc()); if (Template.isNull()) return true; @@ -4414,11 +4414,13 @@ bool TreeTransform::TransformTemplateArgument( Sema::ExpressionEvaluationContextRecord::EK_TemplateArgument); Expr *InputExpr = Input.getSourceExpression(); - if (!InputExpr) InputExpr = Input.getArgument().getAsExpr(); + if (!InputExpr) + InputExpr = Input.getArgument().getAsExpr(); ExprResult E = getDerived().TransformExpr(InputExpr); E = SemaRef.ActOnConstantExpression(E); - if (E.isInvalid()) return true; + if (E.isInvalid()) + return true; Output = TemplateArgumentLoc(TemplateArgument(E.get()), E.get()); return false; } From 7fab1146e42ca76a78cccd0aa274168c628d01de Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 29 Jun 2021 12:26:37 -0700 Subject: [PATCH 276/619] [OPENMP]Fix PR50929: Ignored initializer clause in user-defined reduction. No need to try to create the default constructor for private copy, it will be called automatically in the initializer of the declare reduction. Fixes balance between constructors/destructors calls. Differential Revision: https://reviews.llvm.org/D105143 --- clang/lib/Sema/SemaOpenMP.cpp | 1 - .../test/OpenMP/for_reduction_codegen_UDR.cpp | 1280 ++++++++--------- 2 files changed, 560 insertions(+), 721 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 44acb570885ba..7fddff7992fc1 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -17069,7 +17069,6 @@ static bool actOnOMPReductionKindClause( auto *DRDRef = DeclareReductionRef.getAs(); auto *DRD = cast(DRDRef->getDecl()); if (DRD->getInitializer()) { - S.ActOnUninitializedDecl(PrivateVD); Init = DRDRef; RHSVD->setInit(DRDRef); RHSVD->setInitStyle(VarDecl::CallInit); diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp index d778cab63507b..d33753f0383e8 100644 --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -510,7 +510,6 @@ int main() { // CHECK1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[S1]]) // CHECK1-NEXT: call void @.omp_initializer.(%struct.S* [[S1]], %struct.S* [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: @@ -790,7 +789,6 @@ int main() { // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: call void @.omp_initializer..3(float* [[T_VAR3]], float* [[TMP0]]) // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[VAR4]]) // CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.S.0* [[VAR4]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 4 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* @@ -799,7 +797,6 @@ int main() { // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP9]], %struct.BaseS1* [[TMP11]]) // CHECK1-NEXT: store %struct.S.0* [[VAR4]], %struct.S.0** [[_TMP6]], align 8 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[VAR17]]) // CHECK1-NEXT: call void @.omp_initializer..7(%struct.S.0* [[VAR17]], %struct.S.0* [[TMP2]]) // CHECK1-NEXT: [[TMP12:%.*]] = load float, float* @.init, align 4 // CHECK1-NEXT: store float [[TMP12]], float* [[T_VAR18]], align 4 @@ -1170,24 +1167,12 @@ int main() { // CHECK1-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) // CHECK1-NEXT: [[VLA16:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP28]], align 16 // CHECK1-NEXT: store i64 [[TMP28]], i64* [[__VLA_EXPR1]], align 8 -// CHECK1-NEXT: [[ISEMPTY:%.*]] = icmp eq i64 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[ISEMPTY]], label [[ARRAYCTOR_CONT:%.*]], label [[NEW_CTORLOOP:%.*]] -// CHECK1: new.ctorloop: -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[VLA16]], i64 [[TMP28]] -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[VLA16]], [[NEW_CTORLOOP]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA16]], i64 [[TMP28]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY17:%.*]] = icmp eq %struct.S.0* [[VLA16]], [[TMP30]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY17]], label [[OMP_ARRAYINIT_DONE25:%.*]], label [[OMP_ARRAYINIT_BODY18:%.*]] // CHECK1: omp.arrayinit.body18: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[ARRAYIDX10]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYINIT_BODY18]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[VLA16]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYINIT_BODY18]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[ARRAYIDX10]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYINIT_BODY18]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[VLA16]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYINIT_BODY18]] ] // CHECK1-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP31]], i64 4 // CHECK1-NEXT: [[TMP32:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* @@ -1511,35 +1496,25 @@ int main() { // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP3]] to %struct.S.0* -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY7:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN6]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY7]], label [[OMP_ARRAYINIT_DONE15:%.*]], label [[OMP_ARRAYINIT_BODY8:%.*]] -// CHECK1: omp.arrayinit.body8: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[TMP9]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYINIT_BODY8]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN6]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYINIT_BODY8]] ] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY6:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY6]], label [[OMP_ARRAYINIT_DONE14:%.*]], label [[OMP_ARRAYINIT_BODY7:%.*]] +// CHECK1: omp.arrayinit.body7: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[TMP9]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYINIT_BODY7]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYINIT_BODY7]] ] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 4 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]] to i8* -// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP12]], %struct.BaseS1* [[TMP14]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYINIT_DONE15]], label [[OMP_ARRAYINIT_BODY8]] -// CHECK1: omp.arrayinit.done15: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYINIT_DONE14]], label [[OMP_ARRAYINIT_BODY7]] +// CHECK1: omp.arrayinit.done14: // CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP3]] to %struct.S.0* // CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[ARRS5]] to %struct.S.0* // CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -1562,8 +1537,8 @@ int main() { // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP16:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[CMP15:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: @@ -1575,17 +1550,17 @@ int main() { // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA3]], i64 [[TMP23]] // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX17]], align 4 +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX17]], align 4 +// CHECK1-NEXT: store i32 [[INC]], i32* [[ARRAYIDX16]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1613,88 +1588,88 @@ int main() { // CHECK1: .omp.reduction.case1: // CHECK1-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[TMP2]], i64 [[TMP6]] // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP2]], [[TMP39]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: call void @.omp_combiner..10(i32* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32* [[OMP_ARRAYCPY_SRCELEMENTPAST19]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP39]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: call void @.omp_combiner..10(i32* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32* [[OMP_ARRAYCPY_SRCELEMENTPAST18]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done22: // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY24:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY24]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY25:%.*]] -// CHECK1: omp.arraycpy.body25: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST26:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE23]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY25]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST27:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE23]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY25]] ] -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST27]] to i8* -// CHECK1-NEXT: [[ADD_PTR28:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR28]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST26]] to i8* -// CHECK1-NEXT: [[ADD_PTR29:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR29]] to %struct.BaseS1* +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] +// CHECK1: omp.arraycpy.body24: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] to i8* +// CHECK1-NEXT: [[ADD_PTR27:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR27]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST25]] to i8* +// CHECK1-NEXT: [[ADD_PTR28:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR28]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP42]], %struct.BaseS1* [[TMP44]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST27]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY25]] -// CHECK1: omp.arraycpy.done33: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY24]] +// CHECK1: omp.arraycpy.done32: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr i32, i32* [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY34:%.*]] = icmp eq i32* [[TMP2]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY34]], label [[OMP_ARRAYCPY_DONE41:%.*]], label [[OMP_ARRAYCPY_BODY35:%.*]] -// CHECK1: omp.arraycpy.body35: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST36:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT39:%.*]], [[OMP_ARRAYCPY_BODY35]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST37:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY35]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY33:%.*]] = icmp eq i32* [[TMP2]], [[TMP45]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY33]], label [[OMP_ARRAYCPY_DONE40:%.*]], label [[OMP_ARRAYCPY_BODY34:%.*]] +// CHECK1: omp.arraycpy.body34: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST35:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY34]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST36:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT37:%.*]], [[OMP_ARRAYCPY_BODY34]] ] // CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..10(i32* [[OMP_ARRAYCPY_DESTELEMENTPAST37]], i32* [[OMP_ARRAYCPY_SRCELEMENTPAST36]]) +// CHECK1-NEXT: call void @.omp_combiner..10(i32* [[OMP_ARRAYCPY_DESTELEMENTPAST36]], i32* [[OMP_ARRAYCPY_SRCELEMENTPAST35]]) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT38]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST37]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT39]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST36]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE40:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT38]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_DONE41]], label [[OMP_ARRAYCPY_BODY35]] -// CHECK1: omp.arraycpy.done41: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT37]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST36]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT38]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST35]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE39:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT37]], [[TMP45]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE39]], label [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_BODY34]] +// CHECK1: omp.arraycpy.done40: // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY42:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP48]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY42]], label [[OMP_ARRAYCPY_DONE51:%.*]], label [[OMP_ARRAYCPY_BODY43:%.*]] -// CHECK1: omp.arraycpy.body43: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST44:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE41]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT49:%.*]], [[OMP_ARRAYCPY_BODY43]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST45:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE41]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT48:%.*]], [[OMP_ARRAYCPY_BODY43]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY41:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP48]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY41]], label [[OMP_ARRAYCPY_DONE50:%.*]], label [[OMP_ARRAYCPY_BODY42:%.*]] +// CHECK1: omp.arraycpy.body42: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST43:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT48:%.*]], [[OMP_ARRAYCPY_BODY42]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST44:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT47:%.*]], [[OMP_ARRAYCPY_BODY42]] ] // CHECK1-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP51:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST45]] to i8* -// CHECK1-NEXT: [[ADD_PTR46:%.*]] = getelementptr inbounds i8, i8* [[TMP51]], i64 4 -// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8* [[ADD_PTR46]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST44]] to i8* -// CHECK1-NEXT: [[ADD_PTR47:%.*]] = getelementptr inbounds i8, i8* [[TMP53]], i64 4 -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8* [[ADD_PTR47]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST44]] to i8* +// CHECK1-NEXT: [[ADD_PTR45:%.*]] = getelementptr inbounds i8, i8* [[TMP51]], i64 4 +// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8* [[ADD_PTR45]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST43]] to i8* +// CHECK1-NEXT: [[ADD_PTR46:%.*]] = getelementptr inbounds i8, i8* [[TMP53]], i64 4 +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8* [[ADD_PTR46]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP52]], %struct.BaseS1* [[TMP54]]) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT48]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST45]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT49]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST44]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE50:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT48]], [[TMP48]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE50]], label [[OMP_ARRAYCPY_DONE51]], label [[OMP_ARRAYCPY_BODY43]] -// CHECK1: omp.arraycpy.done51: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT47]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST44]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT48]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST43]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE49:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT47]], [[TMP48]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE49]], label [[OMP_ARRAYCPY_DONE50]], label [[OMP_ARRAYCPY_BODY42]] +// CHECK1: omp.arraycpy.done50: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN52:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN52]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN51:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN51]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP55]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN52]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE53:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done53: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN51]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE52:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done52: // CHECK1-NEXT: [[TMP56:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 // CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP56]]) // CHECK1-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -1806,24 +1781,12 @@ int main() { // CHECK1-NEXT: store i8* [[TMP11]], i8** [[SAVED_STACK]], align 8 // CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP9]], align 16 // CHECK1-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[ISEMPTY:%.*]] = icmp eq i64 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[ISEMPTY]], label [[ARRAYCTOR_CONT:%.*]], label [[NEW_CTORLOOP:%.*]] -// CHECK1: new.ctorloop: -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[VLA]], [[NEW_CTORLOOP]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP9]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[VLA]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX1]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[VLA]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* @@ -2034,32 +1997,22 @@ int main() { // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[TMP0]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[TMP0]], i64 0, i64 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 5 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN3]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN3]], [[TMP1]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP1]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN3]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 4 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* // CHECK1-NEXT: [[TMP4:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 4 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[ADD_PTR4]] to %struct.BaseS1* +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 4 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP3]], %struct.BaseS1* [[TMP5]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP1]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP1]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: [[TMP6:%.*]] = bitcast [5 x %struct.S.0]* [[TMP0]] to %struct.S.0* @@ -2091,8 +2044,8 @@ int main() { // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: @@ -2105,8 +2058,8 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2128,60 +2081,60 @@ int main() { // CHECK1: .omp.reduction.case1: // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 5 // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* -// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP32]], i64 4 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* -// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 4 -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, i8* [[TMP32]], i64 4 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8* [[ADD_PTR9]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST7]] to i8* +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 4 +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP33]], %struct.BaseS1* [[TMP35]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done13: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP36]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] -// CHECK1: omp.arraycpy.body16: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY16]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]] +// CHECK1: omp.arraycpy.body15: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY15]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY15]] ] // CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]] to i8* -// CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 4 -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]] to i8* -// CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST17]] to i8* +// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 4 +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast i8* [[ADD_PTR18]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST16]] to i8* +// CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP40]], %struct.BaseS1* [[TMP42]]) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP36]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY16]] -// CHECK1: omp.arraycpy.done24: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT21]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY15]] +// CHECK1: omp.arraycpy.done23: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN25]], i64 5 +// CHECK1-NEXT: [[ARRAY_BEGIN24:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN24]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP43]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN25]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE26:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done26: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN24]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE25:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done25: // CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP45]]) @@ -2241,7 +2194,7 @@ int main() { // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S.0], align 16 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[_TMP7:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -2260,32 +2213,22 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN5]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN5]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP4]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN5]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[TMP5:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 4 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* // CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR6]] to %struct.BaseS1* +// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP6]], %struct.BaseS1* [[TMP8]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP4]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: [[TMP9:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 @@ -2297,7 +2240,7 @@ int main() { // CHECK1-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S.0]* [[VAR34]] to %struct.S.0* // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[TMP15]], i64 [[TMP14]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[TMP16]] to [4 x %struct.S.0]* -// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP17]], [4 x %struct.S.0]** [[_TMP8]], align 8 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[TMP17]], [4 x %struct.S.0]** [[_TMP7]], align 8 // CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x %struct.S.0]* [[VAR34]] to %struct.S.0* // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 @@ -2319,8 +2262,8 @@ int main() { // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: @@ -2333,8 +2276,8 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2356,60 +2299,60 @@ int main() { // CHECK1: .omp.reduction.case1: // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 2 // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST11:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST12]] to i8* -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 4 -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST11]] to i8* -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 4 -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast i8* [[ADD_PTR14]] to %struct.BaseS1* +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] to i8* +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 4 +// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]] to i8* +// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 4 +// CHECK1-NEXT: [[TMP39:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP37]], %struct.BaseS1* [[TMP39]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP35]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done16: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] -// CHECK1: omp.arraycpy.body19: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY19]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY19]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] +// CHECK1: omp.arraycpy.body18: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY18]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY18]] ] // CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST21]] to i8* -// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR22]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST20]] to i8* -// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[TMP45]], i64 4 -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[ADD_PTR23]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]] to i8* +// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]] to i8* +// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[TMP45]], i64 4 +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[ADD_PTR22]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP44]], %struct.BaseS1* [[TMP46]]) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY19]] -// CHECK1: omp.arraycpy.done27: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY18]] +// CHECK1: omp.arraycpy.done26: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN28:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN28]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN27]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP47]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN28]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE29:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done29: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN27]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE28:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done28: // CHECK1-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP49]]) @@ -2468,7 +2411,7 @@ int main() { // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -2488,36 +2431,26 @@ int main() { // CHECK1-NEXT: [[DOTVAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP4]], i64 48, i8* inttoptr (i64 6 to i8*)) // CHECK1-NEXT: [[DOTVAR3__ADDR:%.*]] = bitcast i8* [[DOTVAR3__VOID_ADDR]] to [4 x %struct.S.0]* // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 4 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [4 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN3]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN3]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN3]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR4]] to %struct.BaseS1* +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP8]], %struct.BaseS1* [[TMP10]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: store [4 x %struct.S.0]* [[DOTVAR3__ADDR]], [4 x %struct.S.0]** [[_TMP6]], align 8 +// CHECK1-NEXT: store [4 x %struct.S.0]* [[DOTVAR3__ADDR]], [4 x %struct.S.0]** [[_TMP5]], align 8 // CHECK1-NEXT: [[LHS_BEGIN:%.*]] = bitcast [4 x %struct.S.0]* [[TMP2]] to %struct.S.0* // CHECK1-NEXT: [[RHS_BEGIN:%.*]] = bitcast [4 x %struct.S.0]* [[DOTVAR3__ADDR]] to %struct.S.0* // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -2538,8 +2471,8 @@ int main() { // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: @@ -2552,8 +2485,8 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2571,58 +2504,58 @@ int main() { // CHECK1: .omp.reduction.case1: // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP22]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]] to i8* -// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 4 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]] to i8* -// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i64 4 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 4 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i64 4 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP24]], %struct.BaseS1* [[TMP26]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP22]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done15: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP22]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done14: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] -// CHECK1: omp.arraycpy.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY17]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP27]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] +// CHECK1: omp.arraycpy.body16: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST19]] to i8* -// CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP28]], i64 4 -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST18]] to i8* -// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP30]], i64 4 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]] to i8* +// CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP28]], i64 4 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]] to i8* +// CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP30]], i64 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP29]], %struct.BaseS1* [[TMP31]]) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY17]] -// CHECK1: omp.arraycpy.done25: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP27]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY16]] +// CHECK1: omp.arraycpy.done24: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN26:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN26]], i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN25]], i64 4 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN26]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done27: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN25]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE26:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done26: // CHECK1-NEXT: [[TMP33:%.*]] = bitcast [4 x %struct.S.0]* [[DOTVAR3__ADDR]] to i8* // CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], i8* [[TMP33]], i8* inttoptr (i64 6 to i8*)) // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP4]]) @@ -2843,7 +2776,6 @@ int main() { // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: call void @.omp_initializer..25(i32* [[T_VAR3]], i32* [[TMP0]]) // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[VAR4]]) // CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[VAR4]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 4 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* @@ -2852,7 +2784,6 @@ int main() { // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP9]], %struct.BaseS1* [[TMP11]]) // CHECK1-NEXT: store %struct.S* [[VAR4]], %struct.S** [[_TMP6]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[VAR17]]) // CHECK1-NEXT: call void @.omp_initializer..27(%struct.S* [[VAR17]], %struct.S* [[TMP2]]) // CHECK1-NEXT: call void @.omp_initializer..29(i32* [[T_VAR18]], i32* [[TMP3]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -3296,32 +3227,22 @@ int main() { // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[TMP0]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[TMP0]], i64 0, i64 40 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK1: arrayctor.loop: -// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYCTOR_CUR]], i64 1 -// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN5]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR6]] to %struct.BaseS1* +// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_initializer..37(%struct.BaseS1* [[TMP8]], %struct.BaseS1* [[TMP10]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: [[TMP11:%.*]] = bitcast [42 x %struct.S]* [[TMP0]] to %struct.S* @@ -3353,8 +3274,8 @@ int main() { // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: @@ -3365,20 +3286,20 @@ int main() { // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX8]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP3]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* nonnull align 4 dereferenceable(12) [[ARRAYIDX11]], %struct.S* nonnull align 4 dereferenceable(12) [[TMP29]]) +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP3]], i64 0, i64 [[IDXPROM9]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* nonnull align 4 dereferenceable(12) [[ARRAYIDX10]], %struct.S* nonnull align 4 dereferenceable(12) [[TMP29]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3400,60 +3321,60 @@ int main() { // CHECK1: .omp.reduction.case1: // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 40 // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]] to i8* -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR15]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST13]] to i8* -// CHECK1-NEXT: [[ADD_PTR16:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR16]] to %struct.BaseS1* +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST12:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST13]] to i8* +// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR14]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST12]] to i8* +// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR15]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..36(%struct.BaseS1* [[TMP42]], %struct.BaseS1* [[TMP44]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done18: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE29:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] -// CHECK1: omp.arraycpy.body21: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT27:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP45]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE28:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// CHECK1: omp.arraycpy.body20: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY20]] ] // CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST23]] to i8* -// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, i8* [[TMP48]], i64 4 -// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[ADD_PTR24]] to %struct.BaseS1* -// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST22]] to i8* -// CHECK1-NEXT: [[ADD_PTR25:%.*]] = getelementptr inbounds i8, i8* [[TMP50]], i64 4 -// CHECK1-NEXT: [[TMP51:%.*]] = bitcast i8* [[ADD_PTR25]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] to i8* +// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[TMP48]], i64 4 +// CHECK1-NEXT: [[TMP49:%.*]] = bitcast i8* [[ADD_PTR23]] to %struct.BaseS1* +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST21]] to i8* +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, i8* [[TMP50]], i64 4 +// CHECK1-NEXT: [[TMP51:%.*]] = bitcast i8* [[ADD_PTR24]] to %struct.BaseS1* // CHECK1-NEXT: call void @.omp_combiner..36(%struct.BaseS1* [[TMP49]], %struct.BaseS1* [[TMP51]]) // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT26]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT27]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE28:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT26]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_DONE29]], label [[OMP_ARRAYCPY_BODY21]] -// CHECK1: omp.arraycpy.done29: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT26]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE27:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP45]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_BODY20]] +// CHECK1: omp.arraycpy.done28: // CHECK1-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN30:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN30]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN29:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN29]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP52]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN30]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE31:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done31: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN29]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done30: // CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP54]]) @@ -3581,7 +3502,6 @@ int main() { // CHECK2-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[S1]]) // CHECK2-NEXT: call void @.omp_initializer.(%struct.S* [[S1]], %struct.S* [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: @@ -3861,7 +3781,6 @@ int main() { // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: call void @.omp_initializer..3(float* [[T_VAR3]], float* [[TMP0]]) // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP1]], align 8 -// CHECK2-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[VAR4]]) // CHECK2-NEXT: [[TMP8:%.*]] = bitcast %struct.S.0* [[VAR4]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 4 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* @@ -3870,7 +3789,6 @@ int main() { // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP9]], %struct.BaseS1* [[TMP11]]) // CHECK2-NEXT: store %struct.S.0* [[VAR4]], %struct.S.0** [[_TMP6]], align 8 -// CHECK2-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[VAR17]]) // CHECK2-NEXT: call void @.omp_initializer..7(%struct.S.0* [[VAR17]], %struct.S.0* [[TMP2]]) // CHECK2-NEXT: [[TMP12:%.*]] = load float, float* @.init, align 4 // CHECK2-NEXT: store float [[TMP12]], float* [[T_VAR18]], align 4 @@ -4241,24 +4159,12 @@ int main() { // CHECK2-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], ptrtoint (%struct.S.0* getelementptr ([[STRUCT_S_0]], %struct.S.0* null, i32 1) to i64) // CHECK2-NEXT: [[VLA16:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP28]], align 16 // CHECK2-NEXT: store i64 [[TMP28]], i64* [[__VLA_EXPR1]], align 8 -// CHECK2-NEXT: [[ISEMPTY:%.*]] = icmp eq i64 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[ISEMPTY]], label [[ARRAYCTOR_CONT:%.*]], label [[NEW_CTORLOOP:%.*]] -// CHECK2: new.ctorloop: -// CHECK2-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[VLA16]], i64 [[TMP28]] -// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK2: arrayctor.loop: -// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[VLA16]], [[NEW_CTORLOOP]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK2-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT]], label [[ARRAYCTOR_LOOP]] -// CHECK2: arrayctor.cont: // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA16]], i64 [[TMP28]] // CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY17:%.*]] = icmp eq %struct.S.0* [[VLA16]], [[TMP30]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY17]], label [[OMP_ARRAYINIT_DONE25:%.*]], label [[OMP_ARRAYINIT_BODY18:%.*]] // CHECK2: omp.arrayinit.body18: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[ARRAYIDX10]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYINIT_BODY18]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[VLA16]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYINIT_BODY18]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[ARRAYIDX10]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYINIT_BODY18]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[VLA16]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYINIT_BODY18]] ] // CHECK2-NEXT: [[TMP31:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP31]], i64 4 // CHECK2-NEXT: [[TMP32:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* @@ -4582,35 +4488,25 @@ int main() { // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: // CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 40 -// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK2: arrayctor.loop: -// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK2-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK2: arrayctor.cont: -// CHECK2-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP3]] to %struct.S.0* -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN6]], i64 40 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY7:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN6]], [[TMP10]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY7]], label [[OMP_ARRAYINIT_DONE15:%.*]], label [[OMP_ARRAYINIT_BODY8:%.*]] -// CHECK2: omp.arrayinit.body8: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[TMP9]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYINIT_BODY8]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN6]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYINIT_BODY8]] ] -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]] to i8* +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 40 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY6:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP10]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY6]], label [[OMP_ARRAYINIT_DONE14:%.*]], label [[OMP_ARRAYINIT_BODY7:%.*]] +// CHECK2: omp.arrayinit.body7: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[TMP9]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYINIT_BODY7]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYINIT_BODY7]] ] +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 4 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]] to i8* -// CHECK2-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* +// CHECK2-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP12]], %struct.BaseS1* [[TMP14]]) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP10]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYINIT_DONE15]], label [[OMP_ARRAYINIT_BODY8]] -// CHECK2: omp.arrayinit.done15: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP10]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYINIT_DONE14]], label [[OMP_ARRAYINIT_BODY7]] +// CHECK2: omp.arrayinit.done14: // CHECK2-NEXT: [[LHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[TMP3]] to %struct.S.0* // CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [10 x [4 x %struct.S.0]]* [[ARRS5]] to %struct.S.0* // CHECK2-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -4633,8 +4529,8 @@ int main() { // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP16:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK2-NEXT: [[CMP15:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: @@ -4646,17 +4542,17 @@ int main() { // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA3]], i64 [[TMP23]] // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX17]], align 4 +// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK2-NEXT: store i32 [[INC]], i32* [[ARRAYIDX17]], align 4 +// CHECK2-NEXT: store i32 [[INC]], i32* [[ARRAYIDX16]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK2-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4684,88 +4580,88 @@ int main() { // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP39:%.*]] = getelementptr i32, i32* [[TMP2]], i64 [[TMP6]] // CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP2]], [[TMP39]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: call void @.omp_combiner..10(i32* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32* [[OMP_ARRAYCPY_SRCELEMENTPAST19]]) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP39]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done23: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: call void @.omp_combiner..10(i32* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32* [[OMP_ARRAYCPY_SRCELEMENTPAST18]]) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP39]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done22: // CHECK2-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 40 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY24:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY24]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY25:%.*]] -// CHECK2: omp.arraycpy.body25: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST26:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE23]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY25]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST27:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE23]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY25]] ] -// CHECK2-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST27]] to i8* -// CHECK2-NEXT: [[ADD_PTR28:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR28]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST26]] to i8* -// CHECK2-NEXT: [[ADD_PTR29:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR29]] to %struct.BaseS1* +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP40]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] +// CHECK2: omp.arraycpy.body24: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK2-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST26]] to i8* +// CHECK2-NEXT: [[ADD_PTR27:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 +// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR27]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST25]] to i8* +// CHECK2-NEXT: [[ADD_PTR28:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 +// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR28]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP42]], %struct.BaseS1* [[TMP44]]) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST27]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST26]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY25]] -// CHECK2: omp.arraycpy.done33: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP40]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY24]] +// CHECK2: omp.arraycpy.done32: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: // CHECK2-NEXT: [[TMP45:%.*]] = getelementptr i32, i32* [[TMP2]], i64 [[TMP6]] -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY34:%.*]] = icmp eq i32* [[TMP2]], [[TMP45]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY34]], label [[OMP_ARRAYCPY_DONE41:%.*]], label [[OMP_ARRAYCPY_BODY35:%.*]] -// CHECK2: omp.arraycpy.body35: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST36:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT39:%.*]], [[OMP_ARRAYCPY_BODY35]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST37:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY35]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY33:%.*]] = icmp eq i32* [[TMP2]], [[TMP45]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY33]], label [[OMP_ARRAYCPY_DONE40:%.*]], label [[OMP_ARRAYCPY_BODY34:%.*]] +// CHECK2: omp.arraycpy.body34: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST35:%.*]] = phi i32* [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY34]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST36:%.*]] = phi i32* [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT37:%.*]], [[OMP_ARRAYCPY_BODY34]] ] // CHECK2-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: call void @.omp_combiner..10(i32* [[OMP_ARRAYCPY_DESTELEMENTPAST37]], i32* [[OMP_ARRAYCPY_SRCELEMENTPAST36]]) +// CHECK2-NEXT: call void @.omp_combiner..10(i32* [[OMP_ARRAYCPY_DESTELEMENTPAST36]], i32* [[OMP_ARRAYCPY_SRCELEMENTPAST35]]) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT38]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST37]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT39]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST36]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE40:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT38]], [[TMP45]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_DONE41]], label [[OMP_ARRAYCPY_BODY35]] -// CHECK2: omp.arraycpy.done41: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT37]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST36]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT38]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST35]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE39:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT37]], [[TMP45]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE39]], label [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_BODY34]] +// CHECK2: omp.arraycpy.done40: // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 40 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY42:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP48]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY42]], label [[OMP_ARRAYCPY_DONE51:%.*]], label [[OMP_ARRAYCPY_BODY43:%.*]] -// CHECK2: omp.arraycpy.body43: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST44:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE41]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT49:%.*]], [[OMP_ARRAYCPY_BODY43]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST45:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE41]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT48:%.*]], [[OMP_ARRAYCPY_BODY43]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY41:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP48]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY41]], label [[OMP_ARRAYCPY_DONE50:%.*]], label [[OMP_ARRAYCPY_BODY42:%.*]] +// CHECK2: omp.arraycpy.body42: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST43:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT48:%.*]], [[OMP_ARRAYCPY_BODY42]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST44:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT47:%.*]], [[OMP_ARRAYCPY_BODY42]] ] // CHECK2-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[TMP51:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST45]] to i8* -// CHECK2-NEXT: [[ADD_PTR46:%.*]] = getelementptr inbounds i8, i8* [[TMP51]], i64 4 -// CHECK2-NEXT: [[TMP52:%.*]] = bitcast i8* [[ADD_PTR46]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP53:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST44]] to i8* -// CHECK2-NEXT: [[ADD_PTR47:%.*]] = getelementptr inbounds i8, i8* [[TMP53]], i64 4 -// CHECK2-NEXT: [[TMP54:%.*]] = bitcast i8* [[ADD_PTR47]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP51:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST44]] to i8* +// CHECK2-NEXT: [[ADD_PTR45:%.*]] = getelementptr inbounds i8, i8* [[TMP51]], i64 4 +// CHECK2-NEXT: [[TMP52:%.*]] = bitcast i8* [[ADD_PTR45]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP53:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST43]] to i8* +// CHECK2-NEXT: [[ADD_PTR46:%.*]] = getelementptr inbounds i8, i8* [[TMP53]], i64 4 +// CHECK2-NEXT: [[TMP54:%.*]] = bitcast i8* [[ADD_PTR46]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP52]], %struct.BaseS1* [[TMP54]]) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT48]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST45]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT49]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST44]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE50:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT48]], [[TMP48]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE50]], label [[OMP_ARRAYCPY_DONE51]], label [[OMP_ARRAYCPY_BODY43]] -// CHECK2: omp.arraycpy.done51: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT47]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST44]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT48]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST43]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE49:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT47]], [[TMP48]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE49]], label [[OMP_ARRAYCPY_DONE50]], label [[OMP_ARRAYCPY_BODY42]] +// CHECK2: omp.arraycpy.done50: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: -// CHECK2-NEXT: [[ARRAY_BEGIN52:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN52]], i64 40 +// CHECK2-NEXT: [[ARRAY_BEGIN51:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], [10 x [4 x %struct.S.0]]* [[ARRS5]], i32 0, i32 0, i32 0 +// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN51]], i64 40 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP55]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN52]] -// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE53:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK2: arraydestroy.done53: +// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN51]] +// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE52:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK2: arraydestroy.done52: // CHECK2-NEXT: [[TMP56:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 // CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP56]]) // CHECK2-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -4877,24 +4773,12 @@ int main() { // CHECK2-NEXT: store i8* [[TMP11]], i8** [[SAVED_STACK]], align 8 // CHECK2-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP9]], align 16 // CHECK2-NEXT: store i64 [[TMP9]], i64* [[__VLA_EXPR0]], align 8 -// CHECK2-NEXT: [[ISEMPTY:%.*]] = icmp eq i64 [[TMP9]], 0 -// CHECK2-NEXT: br i1 [[ISEMPTY]], label [[ARRAYCTOR_CONT:%.*]], label [[NEW_CTORLOOP:%.*]] -// CHECK2: new.ctorloop: -// CHECK2-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP9]] -// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK2: arrayctor.loop: -// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[VLA]], [[NEW_CTORLOOP]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK2-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT]], label [[ARRAYCTOR_LOOP]] -// CHECK2: arrayctor.cont: // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[VLA]], i64 [[TMP9]] // CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[VLA]], [[TMP12]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX1]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[VLA]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: [[TMP13:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP13]], i64 4 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* @@ -5105,32 +4989,22 @@ int main() { // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[TMP0]], i64 0, i64 0 // CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[TMP0]], i64 0, i64 4 // CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 5 -// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK2: arrayctor.loop: -// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK2-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK2: arrayctor.cont: -// CHECK2-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN3]], i64 5 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN3]], [[TMP1]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 5 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP1]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN3]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: [[TMP2:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 4 // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* // CHECK2-NEXT: [[TMP4:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK2-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 4 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[ADD_PTR4]] to %struct.BaseS1* +// CHECK2-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 4 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP3]], %struct.BaseS1* [[TMP5]]) // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP1]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP1]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: // CHECK2-NEXT: [[TMP6:%.*]] = bitcast [5 x %struct.S.0]* [[TMP0]] to %struct.S.0* @@ -5162,8 +5036,8 @@ int main() { // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: @@ -5176,8 +5050,8 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5199,60 +5073,60 @@ int main() { // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 5 // CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP31]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* -// CHECK2-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP32]], i64 4 -// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* -// CHECK2-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 4 -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP32:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST8]] to i8* +// CHECK2-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, i8* [[TMP32]], i64 4 +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i8* [[ADD_PTR9]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP34:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST7]] to i8* +// CHECK2-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP34]], i64 4 +// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP33]], %struct.BaseS1* [[TMP35]]) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP31]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done14: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP31]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done13: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: // CHECK2-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 5 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP36]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] -// CHECK2: omp.arraycpy.body16: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY16]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP36]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]] +// CHECK2: omp.arraycpy.body15: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY15]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY15]] ] // CHECK2-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]] to i8* -// CHECK2-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 4 -// CHECK2-NEXT: [[TMP40:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]] to i8* -// CHECK2-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP39:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST17]] to i8* +// CHECK2-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[TMP39]], i64 4 +// CHECK2-NEXT: [[TMP40:%.*]] = bitcast i8* [[ADD_PTR18]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP41:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST16]] to i8* +// CHECK2-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 +// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP40]], %struct.BaseS1* [[TMP42]]) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP36]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY16]] -// CHECK2: omp.arraycpy.done24: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT21]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP36]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY15]] +// CHECK2: omp.arraycpy.done23: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: -// CHECK2-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN25]], i64 5 +// CHECK2-NEXT: [[ARRAY_BEGIN24:%.*]] = getelementptr inbounds [5 x %struct.S.0], [5 x %struct.S.0]* [[VVAR22]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN24]], i64 5 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP43]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN25]] -// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE26:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK2: arraydestroy.done26: +// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN24]] +// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE25:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK2: arraydestroy.done25: // CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP45]]) @@ -5312,7 +5186,7 @@ int main() { // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S.0], align 16 -// CHECK2-NEXT: [[_TMP8:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK2-NEXT: [[_TMP7:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -5331,32 +5205,22 @@ int main() { // CHECK2-NEXT: [[TMP3:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 // CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[TMP3]], i64 0, i64 2 // CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK2: arrayctor.loop: -// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK2-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK2: arrayctor.cont: -// CHECK2-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN5]], i64 2 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN5]], [[TMP4]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP4]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN5]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: [[TMP5:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i64 4 // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* // CHECK2-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK2-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR6]] to %struct.BaseS1* +// CHECK2-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP6]], %struct.BaseS1* [[TMP8]]) // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP4]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP4]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: // CHECK2-NEXT: [[TMP9:%.*]] = load [4 x %struct.S.0]*, [4 x %struct.S.0]** [[_TMP1]], align 8 @@ -5368,7 +5232,7 @@ int main() { // CHECK2-NEXT: [[TMP15:%.*]] = bitcast [2 x %struct.S.0]* [[VAR34]] to %struct.S.0* // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[TMP15]], i64 [[TMP14]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast %struct.S.0* [[TMP16]] to [4 x %struct.S.0]* -// CHECK2-NEXT: store [4 x %struct.S.0]* [[TMP17]], [4 x %struct.S.0]** [[_TMP8]], align 8 +// CHECK2-NEXT: store [4 x %struct.S.0]* [[TMP17]], [4 x %struct.S.0]** [[_TMP7]], align 8 // CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [2 x %struct.S.0]* [[VAR34]] to %struct.S.0* // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 @@ -5390,8 +5254,8 @@ int main() { // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] -// CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: @@ -5404,8 +5268,8 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5427,60 +5291,60 @@ int main() { // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 2 // CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP35]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST11:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST12]] to i8* -// CHECK2-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 4 -// CHECK2-NEXT: [[TMP37:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST11]] to i8* -// CHECK2-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 4 -// CHECK2-NEXT: [[TMP39:%.*]] = bitcast i8* [[ADD_PTR14]] to %struct.BaseS1* +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]] to i8* +// CHECK2-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP36]], i64 4 +// CHECK2-NEXT: [[TMP37:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP38:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]] to i8* +// CHECK2-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, i8* [[TMP38]], i64 4 +// CHECK2-NEXT: [[TMP39:%.*]] = bitcast i8* [[ADD_PTR13]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP37]], %struct.BaseS1* [[TMP39]]) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP35]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done17: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP35]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done16: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: // CHECK2-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAYIDX]], i64 2 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] -// CHECK2: omp.arraycpy.body19: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY19]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY19]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq %struct.S.0* [[ARRAYIDX]], [[TMP40]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] +// CHECK2: omp.arraycpy.body18: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY18]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi %struct.S.0* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY18]] ] // CHECK2-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST21]] to i8* -// CHECK2-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR22]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST20]] to i8* -// CHECK2-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[TMP45]], i64 4 -// CHECK2-NEXT: [[TMP46:%.*]] = bitcast i8* [[ADD_PTR23]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP43:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]] to i8* +// CHECK2-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 +// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]] to i8* +// CHECK2-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, i8* [[TMP45]], i64 4 +// CHECK2-NEXT: [[TMP46:%.*]] = bitcast i8* [[ADD_PTR22]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP44]], %struct.BaseS1* [[TMP46]]) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP42]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY19]] -// CHECK2: omp.arraycpy.done27: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP40]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY18]] +// CHECK2: omp.arraycpy.done26: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: -// CHECK2-NEXT: [[ARRAY_BEGIN28:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN28]], i64 2 +// CHECK2-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[VAR34]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN27]], i64 2 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP47]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN28]] -// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE29:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK2: arraydestroy.done29: +// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN27]] +// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE28:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK2: arraydestroy.done28: // CHECK2-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP49]]) @@ -5539,7 +5403,7 @@ int main() { // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP6:%.*]] = alloca [4 x %struct.S.0]*, align 8 +// CHECK2-NEXT: [[_TMP5:%.*]] = alloca [4 x %struct.S.0]*, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -5559,36 +5423,26 @@ int main() { // CHECK2-NEXT: [[DOTVAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP4]], i64 48, i8* inttoptr (i64 6 to i8*)) // CHECK2-NEXT: [[DOTVAR3__ADDR:%.*]] = bitcast i8* [[DOTVAR3__VOID_ADDR]] to [4 x %struct.S.0]* // CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 4 -// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK2: arrayctor.loop: -// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK2-NEXT: call void @_ZN1SIfEC1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYCTOR_CUR]], i64 1 -// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK2: arrayctor.cont: -// CHECK2-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast [4 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN3]], i64 4 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN3]], [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 4 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP6]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN3]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK2-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR4]] to %struct.BaseS1* +// CHECK2-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR3]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP8]], %struct.BaseS1* [[TMP10]]) // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP6]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP6]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: store [4 x %struct.S.0]* [[DOTVAR3__ADDR]], [4 x %struct.S.0]** [[_TMP6]], align 8 +// CHECK2-NEXT: store [4 x %struct.S.0]* [[DOTVAR3__ADDR]], [4 x %struct.S.0]** [[_TMP5]], align 8 // CHECK2-NEXT: [[LHS_BEGIN:%.*]] = bitcast [4 x %struct.S.0]* [[TMP2]] to %struct.S.0* // CHECK2-NEXT: [[RHS_BEGIN:%.*]] = bitcast [4 x %struct.S.0]* [[DOTVAR3__ADDR]] to %struct.S.0* // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -5609,8 +5463,8 @@ int main() { // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: @@ -5623,8 +5477,8 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5642,58 +5496,58 @@ int main() { // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP22]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]] to i8* -// CHECK2-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 4 -// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP25:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]] to i8* -// CHECK2-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i64 4 -// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i8* [[ADD_PTR12]] to %struct.BaseS1* +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]] to i8* +// CHECK2-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 4 +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8* [[ADD_PTR10]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP25:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]] to i8* +// CHECK2-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i64 4 +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i8* [[ADD_PTR11]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP24]], %struct.BaseS1* [[TMP26]]) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP22]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done15: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP22]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done14: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: // CHECK2-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[LHS_BEGIN]], i64 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP27]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] -// CHECK2: omp.arraycpy.body17: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY17]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq %struct.S.0* [[LHS_BEGIN]], [[TMP27]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] +// CHECK2: omp.arraycpy.body16: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi %struct.S.0* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi %struct.S.0* [ [[LHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST19]] to i8* -// CHECK2-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP28]], i64 4 -// CHECK2-NEXT: [[TMP29:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST18]] to i8* -// CHECK2-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, i8* [[TMP30]], i64 4 -// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i8* [[ADD_PTR21]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]] to i8* +// CHECK2-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, i8* [[TMP28]], i64 4 +// CHECK2-NEXT: [[TMP29:%.*]] = bitcast i8* [[ADD_PTR19]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]] to i8* +// CHECK2-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, i8* [[TMP30]], i64 4 +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i8* [[ADD_PTR20]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..4(%struct.BaseS1* [[TMP29]], %struct.BaseS1* [[TMP31]]) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP27]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY17]] -// CHECK2: omp.arraycpy.done25: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP27]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY16]] +// CHECK2: omp.arraycpy.done24: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: -// CHECK2-NEXT: [[ARRAY_BEGIN26:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN26]], i64 4 +// CHECK2-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [4 x %struct.S.0], [4 x %struct.S.0]* [[DOTVAR3__ADDR]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN25]], i64 4 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIfED1Ev(%struct.S.0* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN26]] -// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK2: arraydestroy.done27: +// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN25]] +// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE26:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK2: arraydestroy.done26: // CHECK2-NEXT: [[TMP33:%.*]] = bitcast [4 x %struct.S.0]* [[DOTVAR3__ADDR]] to i8* // CHECK2-NEXT: call void @__kmpc_free(i32 [[TMP4]], i8* [[TMP33]], i8* inttoptr (i64 6 to i8*)) // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP4]]) @@ -5914,7 +5768,6 @@ int main() { // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: call void @.omp_initializer..25(i32* [[T_VAR3]], i32* [[TMP0]]) // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 -// CHECK2-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[VAR4]]) // CHECK2-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[VAR4]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i64 4 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* @@ -5923,7 +5776,6 @@ int main() { // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_initializer..5(%struct.BaseS1* [[TMP9]], %struct.BaseS1* [[TMP11]]) // CHECK2-NEXT: store %struct.S* [[VAR4]], %struct.S** [[_TMP6]], align 8 -// CHECK2-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[VAR17]]) // CHECK2-NEXT: call void @.omp_initializer..27(%struct.S* [[VAR17]], %struct.S* [[TMP2]]) // CHECK2-NEXT: call void @.omp_initializer..29(i32* [[T_VAR18]], i32* [[TMP3]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -6367,32 +6219,22 @@ int main() { // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[TMP0]], i64 0, i64 1 // CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], [42 x %struct.S]* [[TMP0]], i64 0, i64 40 // CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 40 -// CHECK2-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] -// CHECK2: arrayctor.loop: -// CHECK2-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] -// CHECK2-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[ARRAYCTOR_CUR]]) -// CHECK2-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYCTOR_CUR]], i64 1 -// CHECK2-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %struct.S* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] -// CHECK2-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] -// CHECK2: arrayctor.cont: -// CHECK2-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN5]], i64 40 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN5]], [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 40 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP6]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN5]], [[ARRAYCTOR_CONT]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]] to i8* // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 4 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to %struct.BaseS1* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]] to i8* -// CHECK2-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR6]] to %struct.BaseS1* +// CHECK2-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, i8* [[TMP9]], i64 4 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR5]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_initializer..37(%struct.BaseS1* [[TMP8]], %struct.BaseS1* [[TMP10]]) // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP6]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP6]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: // CHECK2-NEXT: [[TMP11:%.*]] = bitcast [42 x %struct.S]* [[TMP0]] to %struct.S* @@ -6424,8 +6266,8 @@ int main() { // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: @@ -6436,20 +6278,20 @@ int main() { // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX8]], align 4 // CHECK2-NEXT: [[TMP29:%.*]] = load %struct.S*, %struct.S** [[_TMP1]], align 8 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP3]], i64 0, i64 [[IDXPROM10]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* nonnull align 4 dereferenceable(12) [[ARRAYIDX11]], %struct.S* nonnull align 4 dereferenceable(12) [[TMP29]]) +// CHECK2-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP3]], i64 0, i64 [[IDXPROM9]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(12) %struct.S* @_ZN1SIiEaSERKS0_(%struct.S* nonnull align 4 dereferenceable(12) [[ARRAYIDX10]], %struct.S* nonnull align 4 dereferenceable(12) [[TMP29]]) // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6471,60 +6313,60 @@ int main() { // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 40 // CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]] to i8* -// CHECK2-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 -// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR15]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST13]] to i8* -// CHECK2-NEXT: [[ADD_PTR16:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 -// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR16]] to %struct.BaseS1* +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST12:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP41:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST13]] to i8* +// CHECK2-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 4 +// CHECK2-NEXT: [[TMP42:%.*]] = bitcast i8* [[ADD_PTR14]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP43:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST12]] to i8* +// CHECK2-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, i8* [[TMP43]], i64 4 +// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[ADD_PTR15]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..36(%struct.BaseS1* [[TMP42]], %struct.BaseS1* [[TMP44]]) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done19: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP40]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done18: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: // CHECK2-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[ARRAYIDX]], i64 40 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP45]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE29:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] -// CHECK2: omp.arraycpy.body21: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT27:%.*]], [[OMP_ARRAYCPY_BODY21]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq %struct.S* [[ARRAYIDX]], [[TMP45]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE28:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// CHECK2: omp.arraycpy.body20: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi %struct.S* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi %struct.S* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY20]] ] // CHECK2-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST23]] to i8* -// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, i8* [[TMP48]], i64 4 -// CHECK2-NEXT: [[TMP49:%.*]] = bitcast i8* [[ADD_PTR24]] to %struct.BaseS1* -// CHECK2-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST22]] to i8* -// CHECK2-NEXT: [[ADD_PTR25:%.*]] = getelementptr inbounds i8, i8* [[TMP50]], i64 4 -// CHECK2-NEXT: [[TMP51:%.*]] = bitcast i8* [[ADD_PTR25]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP48:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST22]] to i8* +// CHECK2-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[TMP48]], i64 4 +// CHECK2-NEXT: [[TMP49:%.*]] = bitcast i8* [[ADD_PTR23]] to %struct.BaseS1* +// CHECK2-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST21]] to i8* +// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, i8* [[TMP50]], i64 4 +// CHECK2-NEXT: [[TMP51:%.*]] = bitcast i8* [[ADD_PTR24]] to %struct.BaseS1* // CHECK2-NEXT: call void @.omp_combiner..36(%struct.BaseS1* [[TMP49]], %struct.BaseS1* [[TMP51]]) // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var) -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT26]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT27]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE28:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT26]], [[TMP45]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_DONE29]], label [[OMP_ARRAYCPY_BODY21]] -// CHECK2: omp.arraycpy.done29: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT26]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE27:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP45]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_BODY20]] +// CHECK2: omp.arraycpy.done28: // CHECK2-NEXT: call void @__kmpc_end_reduce(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: -// CHECK2-NEXT: [[ARRAY_BEGIN30:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN30]], i64 40 +// CHECK2-NEXT: [[ARRAY_BEGIN29:%.*]] = getelementptr inbounds [40 x %struct.S], [40 x %struct.S]* [[ARR4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN29]], i64 40 // CHECK2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK2: arraydestroy.body: // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP52]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK2-NEXT: call void @_ZN1SIiED1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN30]] -// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE31:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK2: arraydestroy.done31: +// CHECK2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN29]] +// CHECK2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK2: arraydestroy.done30: // CHECK2-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP54]]) @@ -6627,7 +6469,6 @@ int main() { // CHECK3-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[S]]) // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[S1]]) // CHECK3-NEXT: call void @.omp_initializer.(%struct.S* [[S1]], %struct.S* [[S]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: @@ -7265,7 +7106,6 @@ int main() { // CHECK4-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK4-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[S]]) // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: call void @_ZN1SIiEC1Ev(%struct.S* nonnull align 4 dereferenceable(12) [[S1]]) // CHECK4-NEXT: call void @.omp_initializer.(%struct.S* [[S1]], %struct.S* [[S]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: From fcd0cb39214e5abc89bfb1926397fe668172ecaa Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Jun 2021 11:36:06 +0100 Subject: [PATCH 277/619] Fix MSVC "32-bit shift implicitly converted to 64 bits" warning. --- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 7126977a00f6d..280140f009209 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1920,7 +1920,7 @@ HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign, return; unsigned Addr = CA->getZExtValue(); Align HaveAlign = - Addr != 0 ? Align(1u << countTrailingZeros(Addr)) : NeedAlign; + Addr != 0 ? Align(1ull << countTrailingZeros(Addr)) : NeedAlign; if (HaveAlign < NeedAlign) { std::string ErrMsg; raw_string_ostream O(ErrMsg); From f7ce532d622dc26eddd25f87faec0ff35dc0c2e9 Mon Sep 17 00:00:00 2001 From: Saiyedul Islam Date: Wed, 9 Jun 2021 18:49:45 +0530 Subject: [PATCH 278/619] [clang-offload-bundler] Add unbundling of archives containing bundled object files into device specific archives This patch adds unbundling support of an archive file. It takes an archive file along with a set of offload targets as input. Output is a device specific archive for each given offload target. Input archive contains bundled code objects bundled using clang-offload-bundler. Each generated device specific archive contains a set of device code object files which are named as -. Entries in input archive can be of any binary type which is supported by clang-offload-bundler, like *.bc. Output archives will contain files in same type. Example Usuage: clang-offload-bundler --unbundle --inputs=lib-generic.a -type=a -targets=openmp-amdgcn-amdhsa--gfx906,openmp-amdgcn-amdhsa--gfx908 -outputs=devicelib-gfx906.a,deviceLib-gfx908.a Reviewed By: jdoerfert, yaxunl Differential Revision: https://reviews.llvm.org/D93525 --- clang/docs/ClangOffloadBundler.rst | 10 +- clang/lib/Driver/ToolChains/Clang.cpp | 30 +- clang/test/Driver/clang-offload-bundler.c | 44 ++- clang/test/Driver/hip-rdc-device-only.hip | 8 +- .../Driver/hip-toolchain-rdc-separate.hip | 12 +- .../ClangOffloadBundler.cpp | 352 ++++++++++++++++-- 6 files changed, 383 insertions(+), 73 deletions(-) diff --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst index 68c5116b235f4..c92d8a94cfb54 100644 --- a/clang/docs/ClangOffloadBundler.rst +++ b/clang/docs/ClangOffloadBundler.rst @@ -121,7 +121,15 @@ Where: ============= ============================================================== **target-triple** - The target triple of the code object. + The target triple of the code object: + +.. code:: + + --- + +It is required to have all four components present, if target-id is present. +Components are hyphen separated. If a component is not specified then the +empty string must be used in its place. **target-id** The canonical target ID of the code object. Present only if the target diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c265e1c4e53cb..00939eae42998 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7629,10 +7629,16 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, }); } Triples += Action::GetOffloadKindName(CurKind); - Triples += '-'; - Triples += CurTC->getTriple().normalize(); - if (CurKind == Action::OFK_HIP && CurDep->getOffloadingArch()) { - Triples += '-'; + Triples += "-"; + std::string NormalizedTriple = CurTC->getTriple().normalize(); + Triples += NormalizedTriple; + + if (CurDep->getOffloadingArch() != nullptr) { + // If OffloadArch is present it can only appear as the 6th hypen + // sepearated field of Bundle Entry ID. So, pad required number of + // hyphens in Triple. + for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--) + Triples += "-"; Triples += CurDep->getOffloadingArch(); } } @@ -7702,11 +7708,17 @@ void OffloadBundler::ConstructJobMultipleOutputs( auto &Dep = DepInfo[I]; Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind); - Triples += '-'; - Triples += Dep.DependentToolChain->getTriple().normalize(); - if (Dep.DependentOffloadKind == Action::OFK_HIP && - !Dep.DependentBoundArch.empty()) { - Triples += '-'; + Triples += "-"; + std::string NormalizedTriple = + Dep.DependentToolChain->getTriple().normalize(); + Triples += NormalizedTriple; + + if (!Dep.DependentBoundArch.empty()) { + // If OffloadArch is present it can only appear as the 6th hypen + // sepearated field of Bundle Entry ID. So, pad required number of + // hyphens in Triple. + for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--) + Triples += "-"; Triples += Dep.DependentBoundArch; } } diff --git a/clang/test/Driver/clang-offload-bundler.c b/clang/test/Driver/clang-offload-bundler.c index faa6c5161a8f9..e1afa19570ec3 100644 --- a/clang/test/Driver/clang-offload-bundler.c +++ b/clang/test/Driver/clang-offload-bundler.c @@ -46,6 +46,7 @@ // CK-HELP: {{.*}}bc {{.*}}- llvm-bc // CK-HELP: {{.*}}s {{.*}}- assembler // CK-HELP: {{.*}}o {{.*}}- object +// CK-HELP: {{.*}}a {{.*}}- archive of objects // CK-HELP: {{.*}}gch {{.*}}- precompiled-header // CK-HELP: {{.*}}ast {{.*}}- clang AST file // CK-HELP: {{.*}}-unbundle {{.*}}- Unbundle bundled file into several output files. @@ -103,6 +104,9 @@ // RUN: not clang-offload-bundler -type=i -targets=host-%itanium_abi_triple,host-%itanium_abi_triple,openmp-x86_64-pc-linux-gnu -inputs=%t.i,%t.tgt1,%t.tgt2 -outputs=%t.bundle.i 2>&1 | FileCheck %s --check-prefix CK-ERR9B // CK-ERR9B: error: Duplicate targets are not allowed +// RUN: not clang-offload-bundler -type=a -targets=hxst-powerpcxxle-ibm-linux-gnu,openxp-pxxerpc64le-ibm-linux-gnu,xpenmp-x86_xx-pc-linux-gnu -inputs=%t.i,%t.tgt1,%t.tgt2 -outputs=%t.bundle.i 2>&1 | FileCheck %s --check-prefix CK-ERR10A +// CK-ERR10A: error: Archive files are only supported for unbundling + // // Check text bundle. This is a readable format, so we check for the format we expect to find. // @@ -313,30 +317,30 @@ // // Check error due to missing bundles // -// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa-gfx900 -inputs=%t.bc,%t.tgt1 -outputs=%t.hip.bundle.bc +// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx900 -inputs=%t.bc,%t.tgt1 -outputs=%t.hip.bundle.bc // RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc -unbundle \ -// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906 \ +// RUN: -targets=hip-amdgcn-amd-amdhsa--gfx906 \ // RUN: 2>&1 | FileCheck -check-prefix=MISS1 %s // RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc,%t.tmp2.bc -unbundle \ -// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx900 \ +// RUN: -targets=hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx900 \ // RUN: 2>&1 | FileCheck -check-prefix=MISS1 %s -// MISS1: error: Can't find bundles for hip-amdgcn-amd-amdhsa-gfx906 +// MISS1: error: Can't find bundles for hip-amdgcn-amd-amdhsa--gfx906 // RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc,%t.tmp2.bc -unbundle \ -// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx803 \ +// RUN: -targets=hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx803 \ // RUN: 2>&1 | FileCheck -check-prefix=MISS2 %s -// MISS2: error: Can't find bundles for hip-amdgcn-amd-amdhsa-gfx803 and hip-amdgcn-amd-amdhsa-gfx906 +// MISS2: error: Can't find bundles for hip-amdgcn-amd-amdhsa--gfx803 and hip-amdgcn-amd-amdhsa--gfx906 // RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc,%t.tmp2.bc,%t.tmp3.bc -unbundle \ -// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx1010 \ +// RUN: -targets=hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx1010 \ // RUN: 2>&1 | FileCheck -check-prefix=MISS3 %s -// MISS3: error: Can't find bundles for hip-amdgcn-amd-amdhsa-gfx1010, hip-amdgcn-amd-amdhsa-gfx803, and hip-amdgcn-amd-amdhsa-gfx906 +// MISS3: error: Can't find bundles for hip-amdgcn-amd-amdhsa--gfx1010, hip-amdgcn-amd-amdhsa--gfx803, and hip-amdgcn-amd-amdhsa--gfx906 // // Check error due to duplicate targets // -// RUN: not clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa-gfx900,hip-amdgcn-amd-amdhsa-gfx900 \ +// RUN: not clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx900 \ // RUN: -inputs=%t.bc,%t.tgt1,%t.tgt1 -outputs=%t.hip.bundle.bc 2>&1 | FileCheck -check-prefix=DUP %s // RUN: not clang-offload-bundler -type=bc -inputs=%t.hip.bundle.bc -outputs=%t.tmp.bc,%t.tmp2.bc -unbundle \ -// RUN: -targets=hip-amdgcn-amd-amdhsa-gfx906,hip-amdgcn-amd-amdhsa-gfx906 \ +// RUN: -targets=hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx906 \ // RUN: 2>&1 | FileCheck -check-prefix=DUP %s // DUP: error: Duplicate targets are not allowed // @@ -364,17 +368,29 @@ // // Check bundling without host target is allowed for HIP. // -// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa-gfx900,hip-amdgcn-amd-amdhsa-gfx906 \ +// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ // RUN: -inputs=%t.tgt1,%t.tgt2 -outputs=%t.hip.bundle.bc // RUN: clang-offload-bundler -type=bc -list -inputs=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s -// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa-gfx900,hip-amdgcn-amd-amdhsa-gfx906 \ +// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \ // RUN: -outputs=%t.res.tgt1,%t.res.tgt2 -inputs=%t.hip.bundle.bc -unbundle // RUN: diff %t.tgt1 %t.res.tgt1 // RUN: diff %t.tgt2 %t.res.tgt2 // // NOHOST-NOT: host- -// NOHOST-DAG: hip-amdgcn-amd-amdhsa-gfx900 -// NOHOST-DAG: hip-amdgcn-amd-amdhsa-gfx906 +// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900 +// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906 +// Check archive unbundling +// +// Create few code object bundles and archive them to create an input archive +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx906,openmp-amdgcn-amd-amdhsa--gfx908 -inputs=%t.o,%t.tgt1,%t.tgt2 -outputs=%t.simple.bundle +// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,openmp-amdgcn-amd-amdhsa--gfx903 -inputs=%t.o,%t.tgt1 -outputs=%t.simple1.bundle +// RUN: llvm-ar cr %t.input-archive.a %t.simple.bundle %t.simple1.bundle + +// RUN: clang-offload-bundler -unbundle -type=a -targets=openmp-amdgcn-amd-amdhsa--gfx906,openmp-amdgcn-amd-amdhsa--gfx908 -inputs=%t.input-archive.a -outputs=%t-archive-gfx906-simple.a,%t-archive-gfx908-simple.a +// RUN: llvm-ar t %t-archive-gfx906-simple.a | FileCheck %s -check-prefix=GFX906 +// GFX906: simple-openmp-amdgcn-amd-amdhsa--gfx906 +// RUN: llvm-ar t %t-archive-gfx908-simple.a | FileCheck %s -check-prefix=GFX908 +// GFX908-NOT: {{gfx906}} // Some code so that we can create a binary out of this file. int A = 0; diff --git a/clang/test/Driver/hip-rdc-device-only.hip b/clang/test/Driver/hip-rdc-device-only.hip index ca8d54ea633e2..a95f636d777c1 100644 --- a/clang/test/Driver/hip-rdc-device-only.hip +++ b/clang/test/Driver/hip-rdc-device-only.hip @@ -82,7 +82,7 @@ // COMMON-SAME: {{.*}} {{".*a.cu"}} // COMMON: "{{.*}}clang-offload-bundler" "-type={{(bc|ll)}}" -// COMMON-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// COMMON-SAME: "-targets=hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900" // COMMON-SAME: "-outputs=a-hip-amdgcn-amd-amdhsa.{{(bc|ll)}}" // COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" @@ -112,7 +112,7 @@ // COMMON-SAME: {{.*}} {{".*b.hip"}} // COMMON: "{{.*}}clang-offload-bundler" "-type={{(bc|ll)}}" -// COMMON-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// COMMON-SAME: "-targets=hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900" // COMMON-SAME: "-outputs=b-hip-amdgcn-amd-amdhsa.{{(bc|ll)}}" // SAVETEMP: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" @@ -142,7 +142,7 @@ // SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX900_TMP_BC]] // SAVETEMP: "{{.*}}clang-offload-bundler" "-type=ll" -// SAVETEMP-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// SAVETEMP-SAME: "-targets=hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900" // SAVETEMP-SAME: "-outputs=a-hip-amdgcn-amd-amdhsa.ll" // SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu" @@ -172,7 +172,7 @@ // SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX900_TMP_BC]] // SAVETEMP: "{{.*}}clang-offload-bundler" "-type=ll" -// SAVETEMP-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// SAVETEMP-SAME: "-targets=hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900" // SAVETEMP-SAME: "-outputs=b-hip-amdgcn-amd-amdhsa.ll" // FAIL: error: cannot specify -o when generating multiple output files diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip index 698ee14e74dc9..cdddbcc8fd216 100644 --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -44,7 +44,7 @@ // CHECK-SAME: {{.*}} [[A_SRC]] // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// CHECK-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900,host-x86_64-unknown-linux-gnu" +// CHECK-SAME: "-targets=hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900,host-x86_64-unknown-linux-gnu" // CHECK-SAME: "-outputs=[[A_O:.*a.o]]" "-inputs=[[A_BC1]],[[A_BC2]],[[A_OBJ_HOST]]" // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" @@ -79,7 +79,7 @@ // CHECK-SAME: {{.*}} [[B_SRC]] // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// CHECK-SAME: "-targets=hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900,host-x86_64-unknown-linux-gnu" +// CHECK-SAME: "-targets=hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900,host-x86_64-unknown-linux-gnu" // CHECK-SAME: "-outputs=[[B_O:.*b.o]]" "-inputs=[[B_BC1]],[[B_BC2]],[[B_OBJ_HOST]]" // RUN: touch %T/a.o @@ -91,22 +91,22 @@ // RUN: 2>&1 | FileCheck -check-prefix=LINK %s // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900" // LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" // LINK: "-unbundle" "-allow-missing-bundles" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900" // LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" // LINK: "-unbundle" "-allow-missing-bundles" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900" // LINK-SAME: "-inputs=[[A_O]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]" // LINK: "-unbundle" "-allow-missing-bundles" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx803,hip-amdgcn-amd-amdhsa--gfx900" // LINK-SAME: "-inputs=[[B_O]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]" // LINK: "-unbundle" "-allow-missing-bundles" diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp index a049ae725c89a..cb4f50ee1ed46 100644 --- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp +++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp @@ -22,14 +22,18 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" @@ -82,6 +86,7 @@ static cl::opt " bc - llvm-bc\n" " s - assembler\n" " o - object\n" + " a - archive of objects\n" " gch - precompiled-header\n" " ast - clang AST file"), cl::cat(ClangOffloadBundlerCategory)); @@ -123,20 +128,49 @@ static bool AllowNoHost = false; /// Path to the current binary. static std::string BundlerExecutable; -/// Obtain the offload kind and real machine triple out of the target -/// information specified by the user. -static void getOffloadKindAndTriple(StringRef Target, StringRef &OffloadKind, - StringRef &Triple) { - auto KindTriplePair = Target.split('-'); - OffloadKind = KindTriplePair.first; - Triple = KindTriplePair.second; -} -static bool hasHostKind(StringRef Target) { +/// Obtain the offload kind, real machine triple, and an optional GPUArch +/// out of the target information specified by the user. +/// Bundle Entry ID (or, Offload Target String) has following components: +/// * Offload Kind - Host, OpenMP, or HIP +/// * Triple - Standard LLVM Triple +/// * GPUArch (Optional) - Processor name, like gfx906 or sm_30 +/// In presence of Proc, the Triple should contain separator "-" for all +/// standard four components, even if they are empty. +struct OffloadTargetInfo { StringRef OffloadKind; - StringRef Triple; - getOffloadKindAndTriple(Target, OffloadKind, Triple); - return OffloadKind == "host"; -} + llvm::Triple Triple; + StringRef GPUArch; + + OffloadTargetInfo(const StringRef Target) { + SmallVector Components; + Target.split(Components, '-', 5); + Components.resize(6); + this->OffloadKind = Components[0]; + this->Triple = llvm::Triple(Components[1], Components[2], Components[3], + Components[4]); + this->GPUArch = Components[5]; + } + + bool hasHostKind() const { return this->OffloadKind == "host"; } + + bool isOffloadKindValid() const { + return OffloadKind == "host" || OffloadKind == "openmp" || + OffloadKind == "hip" || OffloadKind == "hipv4"; + } + + bool isTripleValid() const { + return !Triple.str().empty() && Triple.getArch() != Triple::UnknownArch; + } + + bool operator==(const OffloadTargetInfo &Target) const { + return OffloadKind == Target.OffloadKind && + Triple.isCompatibleWith(Target.Triple) && GPUArch == Target.GPUArch; + } + + std::string str() { + return Twine(OffloadKind + "-" + Triple.str() + "-" + GPUArch).str(); + } +}; /// Generic file handler interface. class FileHandler { @@ -163,7 +197,7 @@ class FileHandler { virtual Error ReadBundleEnd(MemoryBuffer &Input) = 0; /// Read the current bundle and write the result into the stream \a OS. - virtual Error ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0; + virtual Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) = 0; /// Write the header of the bundled file to \a OS based on the information /// gathered from \a Inputs. @@ -378,7 +412,7 @@ class BinaryFileHandler final : public FileHandler { return Error::success(); } - Error ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { + Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final { assert(CurBundleInfo != BundlesInfo.end() && "Invalid reader info!"); StringRef FC = Input.getBuffer(); OS.write(FC.data() + CurBundleInfo->second.Offset, @@ -541,7 +575,7 @@ class ObjectFileHandler final : public FileHandler { Error ReadBundleEnd(MemoryBuffer &Input) final { return Error::success(); } - Error ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { + Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final { Expected ContentOrErr = CurrentSection->getContents(); if (!ContentOrErr) return ContentOrErr.takeError(); @@ -717,7 +751,7 @@ class TextFileHandler final : public FileHandler { return Error::success(); } - Error ReadBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final { + Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final { StringRef FC = Input.getBuffer(); size_t BundleStart = ReadChars; @@ -812,6 +846,8 @@ CreateFileHandler(MemoryBuffer &FirstInput) { return std::make_unique(/*Comment=*/"#"); if (FilesType == "o") return CreateObjectFileHandler(FirstInput); + if (FilesType == "a") + return CreateObjectFileHandler(FirstInput); if (FilesType == "gch") return std::make_unique(); if (FilesType == "ast") @@ -956,7 +992,8 @@ static Error UnbundleFiles() { Worklist.erase(Output); // Record if we found the host bundle. - if (hasHostKind(CurTriple)) + auto OffloadInfo = OffloadTargetInfo(CurTriple); + if (OffloadInfo.hasHostKind()) FoundHostBundle = true; } @@ -989,7 +1026,8 @@ static Error UnbundleFiles() { return createFileError(E.second, EC); // If this entry has a host kind, copy the input file to the output file. - if (hasHostKind(E.first())) + auto OffloadInfo = OffloadTargetInfo(E.getKey()); + if (OffloadInfo.hasHostKind()) OutputFile.write(Input.getBufferStart(), Input.getBufferSize()); } return Error::success(); @@ -1012,6 +1050,241 @@ static Error UnbundleFiles() { return Error::success(); } +static Archive::Kind getDefaultArchiveKindForHost() { + return Triple(sys::getDefaultTargetTriple()).isOSDarwin() ? Archive::K_DARWIN + : Archive::K_GNU; +} + +/// @brief Checks if a code object \p CodeObjectInfo is compatible with a given +/// target \p TargetInfo. +/// @link https://clang.llvm.org/docs/ClangOffloadBundler.html#bundle-entry-id +bool isCodeObjectCompatible(OffloadTargetInfo &CodeObjectInfo, + OffloadTargetInfo &TargetInfo) { + + // Compatible in case of exact match. + if (CodeObjectInfo == TargetInfo) { + DEBUG_WITH_TYPE( + "CodeObjectCompatibility", + dbgs() << "Compatible: Exact match: " << CodeObjectInfo.str() << "\n"); + return true; + } + + // Incompatible if Kinds or Triples mismatch. + if (CodeObjectInfo.OffloadKind != TargetInfo.OffloadKind || + !CodeObjectInfo.Triple.isCompatibleWith(TargetInfo.Triple)) { + DEBUG_WITH_TYPE( + "CodeObjectCompatibility", + dbgs() << "Incompatible: Kind/Triple mismatch \t[CodeObject: " + << CodeObjectInfo.str() << "]\t:\t[Target: " << TargetInfo.str() + << "]\n"); + return false; + } + + // Incompatible if GPUArch mismatch. + if (CodeObjectInfo.GPUArch != TargetInfo.GPUArch) { + DEBUG_WITH_TYPE("CodeObjectCompatibility", + dbgs() << "Incompatible: GPU Arch mismatch \t[CodeObject: " + << CodeObjectInfo.str() + << "]\t:\t[Target: " << TargetInfo.str() << "]\n"); + return false; + } + + DEBUG_WITH_TYPE( + "CodeObjectCompatibility", + dbgs() << "Compatible: Code Objects are compatible \t[CodeObject: " + << CodeObjectInfo.str() << "]\t:\t[Target: " << TargetInfo.str() + << "]\n"); + return true; +} + +/// @brief Computes a list of targets among all given targets which are +/// compatible with this code object +/// @param [in] Code Object \p CodeObject +/// @param [out] List of all compatible targets \p CompatibleTargets among all +/// given targets +/// @return false, if no compatible target is found. +static bool +getCompatibleOffloadTargets(OffloadTargetInfo &CodeObjectInfo, + SmallVectorImpl &CompatibleTargets) { + if (!CompatibleTargets.empty()) { + DEBUG_WITH_TYPE("CodeObjectCompatibility", + dbgs() << "CompatibleTargets list should be empty\n"); + return false; + } + for (auto &Target : TargetNames) { + auto TargetInfo = OffloadTargetInfo(Target); + if (isCodeObjectCompatible(CodeObjectInfo, TargetInfo)) + CompatibleTargets.push_back(Target); + } + return !CompatibleTargets.empty(); +} + +/// UnbundleArchive takes an archive file (".a") as input containing bundled +/// code object files, and a list of offload targets (not host), and extracts +/// the code objects into a new archive file for each offload target. Each +/// resulting archive file contains all code object files corresponding to that +/// particular offload target. The created archive file does not +/// contain an index of the symbols and code object files are named as +/// <->, with ':' replaced with '_'. +static Error UnbundleArchive() { + std::vector> ArchiveBuffers; + + /// Map of target names with list of object files that will form the device + /// specific archive for that target + StringMap> OutputArchivesMap; + + // Map of target names and output archive filenames + StringMap TargetOutputFileNameMap; + + auto Output = OutputFileNames.begin(); + for (auto &Target : TargetNames) { + TargetOutputFileNameMap[Target] = *Output; + ++Output; + } + + StringRef IFName = InputFileNames.front(); + ErrorOr> BufOrErr = + MemoryBuffer::getFileOrSTDIN(IFName, -1, false); + if (std::error_code EC = BufOrErr.getError()) + return createFileError(InputFileNames.front(), EC); + + ArchiveBuffers.push_back(std::move(*BufOrErr)); + Expected> LibOrErr = + Archive::create(ArchiveBuffers.back()->getMemBufferRef()); + if (!LibOrErr) + return LibOrErr.takeError(); + + auto Archive = std::move(*LibOrErr); + + Error ArchiveErr = Error::success(); + auto ChildEnd = Archive->child_end(); + + /// Iterate over all bundled code object files in the input archive. + for (auto ArchiveIter = Archive->child_begin(ArchiveErr); + ArchiveIter != ChildEnd; ++ArchiveIter) { + if (ArchiveErr) + return ArchiveErr; + auto ArchiveChildNameOrErr = (*ArchiveIter).getName(); + if (!ArchiveChildNameOrErr) + return ArchiveChildNameOrErr.takeError(); + + StringRef BundledObjectFile = sys::path::filename(*ArchiveChildNameOrErr); + + auto CodeObjectBufferRefOrErr = (*ArchiveIter).getMemoryBufferRef(); + if (!CodeObjectBufferRefOrErr) + return CodeObjectBufferRefOrErr.takeError(); + + auto CodeObjectBuffer = + MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false); + + Expected> FileHandlerOrErr = + CreateFileHandler(*CodeObjectBuffer); + if (!FileHandlerOrErr) + return FileHandlerOrErr.takeError(); + + std::unique_ptr &FileHandler = *FileHandlerOrErr; + assert(FileHandler && + "FileHandle creation failed for file in the archive!"); + + if (Error ReadErr = FileHandler.get()->ReadHeader(*CodeObjectBuffer)) + return ReadErr; + + Expected> CurBundleIDOrErr = + FileHandler->ReadBundleStart(*CodeObjectBuffer); + if (!CurBundleIDOrErr) + return CurBundleIDOrErr.takeError(); + + Optional OptionalCurBundleID = *CurBundleIDOrErr; + // No device code in this child, skip. + if (!OptionalCurBundleID.hasValue()) + continue; + StringRef CodeObject = *OptionalCurBundleID; + + // Process all bundle entries (CodeObjects) found in this child of input + // archive. + while (!CodeObject.empty()) { + SmallVector CompatibleTargets; + auto CodeObjectInfo = OffloadTargetInfo(CodeObject); + if (CodeObjectInfo.hasHostKind()) { + // Do nothing, we don't extract host code yet. + } else if (getCompatibleOffloadTargets(CodeObjectInfo, + CompatibleTargets)) { + std::string BundleData; + raw_string_ostream DataStream(BundleData); + if (Error Err = + FileHandler.get()->ReadBundle(DataStream, *CodeObjectBuffer)) + return Err; + + for (auto &CompatibleTarget : CompatibleTargets) { + SmallString<128> BundledObjectFileName; + BundledObjectFileName.assign(BundledObjectFile); + auto OutputBundleName = + Twine(llvm::sys::path::stem(BundledObjectFileName) + "-" + + CodeObject) + .str(); + // Replace ':' in optional target feature list with '_' to ensure + // cross-platform validity. + std::replace(OutputBundleName.begin(), OutputBundleName.end(), ':', + '_'); + + std::unique_ptr MemBuf = MemoryBuffer::getMemBufferCopy( + DataStream.str(), OutputBundleName); + ArchiveBuffers.push_back(std::move(MemBuf)); + llvm::MemoryBufferRef MemBufRef = + MemoryBufferRef(*(ArchiveBuffers.back())); + + // For inserting > entry in + // OutputArchivesMap. + if (OutputArchivesMap.find(CompatibleTarget) == + OutputArchivesMap.end()) { + + std::vector ArchiveMembers; + ArchiveMembers.push_back(NewArchiveMember(MemBufRef)); + OutputArchivesMap.insert_or_assign(CompatibleTarget, + std::move(ArchiveMembers)); + } else { + OutputArchivesMap[CompatibleTarget].push_back( + NewArchiveMember(MemBufRef)); + } + } + } + + if (Error Err = FileHandler.get()->ReadBundleEnd(*CodeObjectBuffer)) + return Err; + + Expected> NextTripleOrErr = + FileHandler->ReadBundleStart(*CodeObjectBuffer); + if (!NextTripleOrErr) + return NextTripleOrErr.takeError(); + + CodeObject = ((*NextTripleOrErr).hasValue()) ? **NextTripleOrErr : ""; + } // End of processing of all bundle entries of this child of input archive. + } // End of while over children of input archive. + + assert(!ArchiveErr && "Error occured while reading archive!"); + + /// Write out an archive for each target + for (auto &Target : TargetNames) { + StringRef FileName = TargetOutputFileNameMap[Target]; + StringMapIterator> CurArchiveMembers = + OutputArchivesMap.find(Target); + if (CurArchiveMembers != OutputArchivesMap.end()) { + if (Error WriteErr = writeArchive(FileName, CurArchiveMembers->getValue(), + true, getDefaultArchiveKindForHost(), + true, false, nullptr)) + return WriteErr; + } else if (!AllowMissingBundles) { + std::string ErrMsg = + Twine("no compatible code object found for the target '" + Target + + "' in heterogenous archive library: " + IFName) + .str(); + return createStringError(inconvertibleErrorCode(), ErrMsg); + } + } + + return Error::success(); +} + static void PrintVersion(raw_ostream &OS) { OS << clang::getClangToolFullVersion("clang-offload-bundler") << '\n'; } @@ -1096,6 +1369,11 @@ int main(int argc, const char **argv) { "match in unbundling mode")); } } else { + if (FilesType == "a") { + reportError(createStringError(errc::invalid_argument, + "Archive files are only supported " + "for unbundling")); + } if (OutputFileNames.size() != 1) { reportError(createStringError( errc::invalid_argument, @@ -1121,40 +1399,28 @@ int main(int argc, const char **argv) { } ParsedTargets.insert(Target); - StringRef Kind; - StringRef Triple; - getOffloadKindAndTriple(Target, Kind, Triple); - - bool KindIsValid = !Kind.empty(); - KindIsValid = KindIsValid && StringSwitch(Kind) - .Case("host", true) - .Case("openmp", true) - .Case("hip", true) - .Case("hipv4", true) - .Default(false); - - bool TripleIsValid = !Triple.empty(); - llvm::Triple T(Triple); - TripleIsValid &= T.getArch() != Triple::UnknownArch; + auto OffloadInfo = OffloadTargetInfo(Target); + bool KindIsValid = OffloadInfo.isOffloadKindValid(); + bool TripleIsValid = OffloadInfo.isTripleValid(); if (!KindIsValid || !TripleIsValid) { SmallVector Buf; raw_svector_ostream Msg(Buf); Msg << "invalid target '" << Target << "'"; if (!KindIsValid) - Msg << ", unknown offloading kind '" << Kind << "'"; + Msg << ", unknown offloading kind '" << OffloadInfo.OffloadKind << "'"; if (!TripleIsValid) - Msg << ", unknown target triple '" << Triple << "'"; + Msg << ", unknown target triple '" << OffloadInfo.Triple.str() << "'"; reportError(createStringError(errc::invalid_argument, Msg.str())); } - if (KindIsValid && Kind == "host") { + if (KindIsValid && OffloadInfo.hasHostKind()) { ++HostTargetNum; // Save the index of the input that refers to the host. HostInputIndex = Index; } - if (Kind != "hip" && Kind != "hipv4") + if (OffloadInfo.OffloadKind != "hip" && OffloadInfo.OffloadKind != "hipv4") HIPOnly = false; ++Index; @@ -1174,6 +1440,14 @@ int main(int argc, const char **argv) { Twine(HostTargetNum))); } - doWork([]() { return Unbundle ? UnbundleFiles() : BundleFiles(); }); + doWork([]() { + if (Unbundle) { + if (FilesType == "a") + return UnbundleArchive(); + else + return UnbundleFiles(); + } else + return BundleFiles(); + }); return 0; } From 42d99bc3767644311707c66033b6fc8a4eeba56a Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Wed, 30 Jun 2021 12:26:33 +0000 Subject: [PATCH 279/619] [mlir][linalg][python] Update the OpDSL doc (NFC). Update the OpDSL documentation to reflect recent changes. In particular, the updated documentation discusses: - Attributes used to parameterize index expressions - Shape-only tensor support - Scalar parameters Differential Revision: https://reviews.llvm.org/D105123 --- mlir/docs/Tools/LinalgOpDsl.md | 190 ++++++++++++++++++++++----------- 1 file changed, 128 insertions(+), 62 deletions(-) diff --git a/mlir/docs/Tools/LinalgOpDsl.md b/mlir/docs/Tools/LinalgOpDsl.md index 3ae9c9b7f45fd..f7be38ec6f656 100644 --- a/mlir/docs/Tools/LinalgOpDsl.md +++ b/mlir/docs/Tools/LinalgOpDsl.md @@ -3,30 +3,30 @@ Python based DSL for authoring Linalg op definitions and generating `linalg.generic` IR based on them for samples. -The tool `linalg_opdsl` provides a high level DSL for constructing -structured op definitions in a way that can be exported to built-in, named -structured ops via the above YAML-based definitions or used interactively to -emit corresponding `linalg.generic` IR for the composition. +The tool `linalg_opdsl` provides a high level DSL for constructing structured op +definitions in a way that can be exported to built-in, named structured ops via +the above YAML-based definitions or used interactively to emit corresponding +`linalg.generic` IR for the composition. ## Basic usage The tool is bundled with the MLIR Python bindings. To use from the CMake build tree, MLIR must be build with Python bindings enabled (`-DMLIR_ENALBE_BINDINGS_PYTHON=ON`). Then add the `python` directory in the -build tree to your `PYTHONPATH` environment variable (i.e. -`export PYTHONPATH=$PWD/build/python`). Optionally, use an installed MLIR -package, if available, to avoid building. +build tree to your `PYTHONPATH` environment variable (i.e. `export +PYTHONPATH=$PWD/build/python`). Optionally, use an installed MLIR package, if +available, to avoid building. ```shell # Dump the `core_named_ops.py` module as YAML. python -m mlir.dialects.linalg.opdsl.dump_oplib .ops.core_named_ops ``` -The tool is meant for use during both development and runtime, but not as -a build tool of the core compiler: in order to export static named op -definitions to be built as part of the compiler, the corresponding Linalg -dialect YAML file must be updated and reviewed. TODO: Develop a script to -automate op updates to these files. +The tool is meant for use during both development and runtime, but not as a +build tool of the core compiler: in order to export static named op definitions +to be built as part of the compiler, the corresponding Linalg dialect YAML file +must be updated and reviewed. TODO: Develop a script to automate op updates to +these files. ## Language Guide @@ -53,104 +53,170 @@ def matmul(A=TensorDef(T1, S.M, S.K), Numeric casting is performed on the operands to the inner multiply, promoting them to the same data type as the accumulator/output. """ + domain(D.m, D.n, D.k) implements(ContractionOpInterface) C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n]) ``` -Here we have a simple type polymorphic contraction that takes arguments `A` -and `B` and outputs `C`. Each is bound to a `TensorDef`, which specifies: +Here we have a simple type polymorphic contraction that takes arguments `A` and +`B` and outputs `C`. Each is bound to a `TensorDef`, which specifies: -* The symbolic element type (`T1`, `T2`, `U` above). -* Symbolic shape expressions with symbols that are bound globally for the op ( -note that in this simple example, the shape expressions are just symbol -references, but they are permitted to be a constrained set of affine -expressions). -* Usage (`output=True`). +* The symbolic element type (`T1`, `T2`, `U` above). +* Symbolic shape expressions with symbols that are bound globally for the op ( + note that in this simple example, the shape expressions are just symbol + references, but they are permitted to be a constrained set of affine + expressions). +* Usage (`output=True`). The docstring will be transferred to the op definition verbatim. +An explicit iteration domain dimension order can be declared for the op via +`domain(D.d0[, D.d1...])`. + Special identifying op interfaces can be declared for the op via `implements(interface1[, interface2...])`. ## Parameters -Structured operations can take two types of parameters namely input/output -tensors and captures. Assignment expressions index the tensor parameters to -access the individual elements, while captures are scalars that can be -accessed directly. +Structured operations take two types of runtime parameters namely scalars and +tensors. While scalars are inputs only, a tensor may be marked as an output. +Assignment expressions index the tensor parameters to access the individual +elements, while scalars can be accessed directly. The following example demonstrates the use of the two parameter types: ```python @linalg_structured_op -def copy_and_scale(I=TensorDef(T, S.M, S.K), - O=TensorDef(T, S.M, S.K, output=True), - val=CaptureDef(T)): - """Scale the input by the captured value and store the result""" +def copy_and_scale(val=ScalarDef(T), + I=TensorDef(T, S.M, S.K), + O=TensorDef(T, S.M, S.K, output=True)): + """Scale the input by the scalar value and store the result""" O[D.m, D.n] = I[D.m, D.n] * val ``` -The operation scales the input tensor `I` scales its elements by the value -`val` and writes the result to the output tensor `out`. The capture `val` is -bound to a `CaptureDef`, which specifies the type of the captured value. The -tensors are bound to a `TensorDef` as demonstrated by the matmul example. All -parameters appear in the parameter list of the operation: +The operation scales the input tensor `I` scales its elements by the value `val` +and writes the result to the output tensor `out`. The scalar `val` is bound to a +`ScalarDef`, which specifies the type of the scalar operand. The tensors are +bound to a `TensorDef` as demonstrated by the matmul example. All parameters +appear in the parameter list of the operation: + +```python +fill(val, in_tensor, outs=[out_tensor]) +``` + +## Attributes + +Attributes are compile-time constant parameters only accessible in index +expressions. They can be used to parameterize the access pattern of a structured +operation, for example, by setting its strides. They cannot take part in the +actual computation. + +The following example demonstrates the use of attributes: + +```python +@linalg_structured_op +def strided_copy(I=TensorDef(T, S.IH, S.IW), + O=TensorDef(T, S.OH, S.OW, output=True), + strides=AttributeDef(S.SH, S.SW)): + """Copy a subset of the input tensor elements to the output tensor""" + O[D.oh, D.ow] = I[D.oh * S.SH, D.ow * S.SW] +``` + +The operation implements a strided copy from the input tensor `I` to the output +tensor `O`. The `strides` attribute is bound to an `AttributeDef`. It defines +the symbols `S.SH` and `S.SW`, which are used to index the input tensor `I`. +When instantiating the operation, the attribute is set using a named argument: ```python -fill(in_tensor, outs=[out_tensor], captures=[captured_val]) +strided_copy(in_tensor, outs=[out_tensor], strides=[1,2]) ``` +The `strides` vector elements substitute the symbols `S.SH` and `S.SW` in the +index expressions of the operation instance. + +Attributes are currently limited to integer vectors and only accessible in index +expressions. An operation may have multiple attributes all of them placed at the +end of the parameter list after the output tensors. + +## Shape-Only Tensors + +Structured operations derive the iteration space given the sizes of the input +and output tensors. Certain operations need shape-only tensors that are not +accessed and exist purely for the sake of specifying the iteration domain. An +example is the pooling operation that takes a shape-only tensor to define the +iteration space of the reduction. As shape-only tensors have no uses, the +`TensorDef` takes an additional optional `index_dims` parameter to map the shape +to index dimensions. + +The following example demonstrates the index dimension annotation: + +```python +@linalg_structured_op +def pooling_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + O[D.n, D.oh, D.ow, D.c] += \ + cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) +``` + +The pooling operation does not access the shape-only tensor `K`. Instead, the +shapes `S.KH` and `S.KW` specify the iteration domain for the reduction +dimensions `D.kh` and `D.kw`. + ## Assignments -The bulk of language consists of assignment expressions of the form above. -The iteration dimension order is determined lexically based on the order -encountered in the expression (following operator precedence if math operators -are used). TODO: Introduce a directive to fix the dimension bindings. +The bulk of language consists of assignment expressions of the form above. The +iteration dimension order is determined lexically based on the order encountered +in the expression (following operator precedence if math operators are used). +TODO: Introduce a directive to fix the dimension bindings. Reduction dimensions are inferred to be any dimensions on the RHS that are not on the LHS. A number of arithmetic primitive functions are supported: -* `PrimFn.add(a, b)` (also via overloading the binary `+` operator) -* `PrimFn.exp(a)` -* `PrimFn.log(a)` -* `PrimFn.mul(a, b)` (also via overloading the binary `*` operator) -* `PrimFn.max(a, b)` -* `PrimFn.sub(a, b)` (also via overloading the binary `-` operator) +* `PrimFn.add(a, b)` (also via overloading the binary `+` operator) +* `PrimFn.exp(a)` +* `PrimFn.log(a)` +* `PrimFn.mul(a, b)` (also via overloading the binary `*` operator) +* `PrimFn.max(a, b)` +* `PrimFn.sub(a, b)` (also via overloading the binary `-` operator) Reduction functions can appear as the outer-most function on the RHS: -* `ReduceFn.add` (also overloading the inplace `+=` on a LHS) -* `ReduceFn.mul` -* `ReduceFn.max` +* `ReduceFn.add` (also overloading the inplace `+=` on a LHS) +* `ReduceFn.mul` +* `ReduceFn.max` There are also special forms: -* `cast(TypeVar, operand)` casts the `operand` to the target type `TypeVar`. -* `const(TypeVar, value)` returns a constant value of type `TypeVar`. -* `index(dim)` returns the iteration index in the given dimension `dim`. +* `cast(TypeVar, operand)` casts the `operand` to the target type `TypeVar`. +* `const(TypeVar, value)` returns a constant value of type `TypeVar`. +* `index(dim)` returns the iteration index in the given dimension `dim`. ## Types -All types in assignment expressions are late bound based on actual input -and output types of constructed ops. An exception are predefined types such as +All types in assignment expressions are late bound based on actual input and +output types of constructed ops. An exception are predefined types such as `I32`, `I64`, `F32`, and `F64`. These hardwired types enable intermediate -computations with a type that is independent of the input and output types. -For example, parts of floating point computation may require double precision +computations with a type that is independent of the input and output types. For +example, parts of floating point computation may require double precision arithmetic despite all inputs and outputs being single precision values. -Assignment expressions with no `cast` calls will generally require uniform -types throughout and will fail to verify if violated. The presence of a -`cast` allows for a limited form of numeric type conversion between element -types that can be derived from inputs and outputs (and in the future, -attributes). `cast` calls with a `TypeVar` first argument are emitted as -`symbolic_cast` primitives in the YAML definition. +Assignment expressions with no `cast` calls will generally require uniform types +throughout and will fail to verify if violated. The presence of a `cast` allows +for a limited form of numeric type conversion between element types that can be +derived from inputs and outputs (and in the future, attributes). `cast` calls +with a `TypeVar` first argument are emitted as `symbolic_cast` primitives in the +YAML definition. Casting will perform `int<->float` and `index->int` type conversions and will perform any necessary extension or truncation within type family. Note that presently, any integer type is assumed to be signed for the purpose of -determining how to extend or truncate. Supporting unsigned integer types is -left for future work. +determining how to extend or truncate. Supporting unsigned integer types is left +for future work. Not all functions are applicable for all numeric types, and on mismatch, op verification will fail. From 002911503fe29ac30ef0aa0a15b624982814548b Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Mon, 28 Jun 2021 13:39:07 +0100 Subject: [PATCH 280/619] [TargetLowering][AArch64][SVE] Take into account accessed type when clamping address When clamping the index for a memory access to a stacked vector we must take into account the entire type being accessed, not just assume that we are accessing only a single element. Differential Revision: https://reviews.llvm.org/D105016 --- llvm/include/llvm/CodeGen/TargetLowering.h | 8 +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 14 ++-- .../SelectionDAG/LegalizeVectorTypes.cpp | 3 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 40 +++++++---- .../CodeGen/AArch64/split-vector-insert.ll | 4 +- .../CodeGen/AArch64/sve-extract-vector.ll | 67 ++++++++++++++++--- .../test/CodeGen/AArch64/sve-insert-vector.ll | 62 +++++++++++++++-- 7 files changed, 166 insertions(+), 32 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 00ec95a85f8f3..47d6ca43a5ac3 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4488,6 +4488,14 @@ class TargetLowering : public TargetLoweringBase { SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const; + /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located + /// in memory for a vector of type \p VecVT starting at a base address of + /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the + /// returned pointer is unspecified, but the value returned will be such that + /// the entire subvector would be within the vector bounds. + SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, + EVT SubVecVT, SDValue Index) const; + /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This /// method accepts integers as its arguments. SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8392a5def8059..d92b23f56e4de 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1370,17 +1370,19 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { MachinePointerInfo()); } - StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - SDValue NewLoad; - if (Op.getValueType().isVector()) + if (Op.getValueType().isVector()) { + StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, + Op.getValueType(), Idx); NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); - else + } else { + StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), VecVT.getVectorElementType()); + } // Replace the chain going out of the store, by the one out of the load. DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); @@ -1405,6 +1407,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Store the value to a temporary stack slot, then LOAD the returned part. EVT VecVT = Vec.getValueType(); + EVT SubVecVT = Part.getValueType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int FI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = @@ -1414,7 +1417,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. - SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + SDValue SubStackPtr = + TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx); // Store the subvector. Ch = DAG.getStore( diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7bc8d1d2333bd..c018cfd0a2ead 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1315,7 +1315,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SmallestAlign); // Store the new subvector into the specified index. - SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + SDValue SubVecPtr = + TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx); Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo::getUnknownStack(MF)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 801a994e7fdb4..a5e3cc23972ee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7781,39 +7781,51 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); } -static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, - SDValue Idx, - EVT VecVT, - const SDLoc &dl) { +static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, + EVT VecVT, const SDLoc &dl, + unsigned NumSubElts) { if (!VecVT.isScalableVector() && isa(Idx)) return Idx; EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); if (VecVT.isScalableVector()) { - // If this is a constant index and we know the value is less than the - // minimum number of elements then it's safe to return Idx. + // If this is a constant index and we know the value plus the number of the + // elements in the subvector minus one is less than the minimum number of + // elements then it's safe to return Idx. if (auto *IdxCst = dyn_cast(Idx)) - if (IdxCst->getZExtValue() < NElts) + if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts) return Idx; SDValue VS = DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts)); - SDValue Sub = - DAG.getNode(ISD::SUB, dl, IdxVT, VS, DAG.getConstant(1, dl, IdxVT)); + unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT; + SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS, + DAG.getConstant(NumSubElts, dl, IdxVT)); return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub); } - if (isPowerOf2_32(NElts)) { + if (isPowerOf2_32(NElts) && NumSubElts == 1) { APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts)); return DAG.getNode(ISD::AND, dl, IdxVT, Idx, DAG.getConstant(Imm, dl, IdxVT)); } + unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0; return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, - DAG.getConstant(NElts - 1, dl, IdxVT)); + DAG.getConstant(MaxIndex, dl, IdxVT)); } SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const { + return getVectorSubVecPointer( + DAG, VecPtr, VecVT, + EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1), + Index); +} + +SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, + SDValue VecPtr, EVT VecVT, + EVT SubVecVT, + SDValue Index) const { SDLoc dl(Index); // Make sure the index type is big enough to compute in. Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType()); @@ -7825,7 +7837,11 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); - Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl); + assert(SubVecVT.isFixedLengthVector() && + SubVecVT.getVectorElementType() == EltVT && + "Sub-vector must be a fixed vector with matching element type"); + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, + SubVecVT.getVectorNumElements()); EVT IdxVT = Index.getValueType(); diff --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll index b3bf4ac9975bb..ee69b7945fa45 100644 --- a/llvm/test/CodeGen/AArch64/split-vector-insert.ll +++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -24,7 +24,7 @@ define @test_nxv2i64_v8i64( %a, <8 x i64> % ; CHECK-NEXT: cntd x9 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: str q1, [sp] -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #2 // =2 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 // =2 @@ -74,7 +74,7 @@ define @test_nxv2f64_v8f64( %a, <8 x ; CHECK-NEXT: cntd x9 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: str q1, [sp] -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #2 // =2 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 // =2 diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll index 928407a5f919a..8940441764567 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -18,7 +18,7 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx2( %vec) nounwind { ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #2 // =2 ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 // =2 ; CHECK-NEXT: ptrue p0.d @@ -30,8 +30,8 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx2( %vec) nounwind { ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret -%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 2) -ret <2 x i64> %retval + %retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 2) + ret <2 x i64> %retval } ; Should codegen to a nop, since idx is zero. @@ -40,8 +40,8 @@ define <4 x i32> @extract_v4i32_nxv4i32( %vec) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret -%retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 0) -ret <4 x i32> %retval + %retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32( %vec, i64 0) + ret <4 x i32> %retval } ; Goes through memory currently; idx != 0. @@ -51,7 +51,7 @@ define <4 x i32> @extract_v4i32_nxv4i32_idx4( %vec) nounwind { ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntw x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #4 // =4 ; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: cmp x9, #4 // =4 ; CHECK-NEXT: ptrue p0.s @@ -84,7 +84,7 @@ define <8 x i16> @extract_v8i16_nxv8i16_idx8( %vec) nounwind { ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cnth x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #8 // =8 ; CHECK-NEXT: mov w8, #8 ; CHECK-NEXT: cmp x9, #8 // =8 ; CHECK-NEXT: ptrue p0.h @@ -117,7 +117,7 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx16( %vec) nounwind ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #16 // =16 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov w8, #16 ; CHECK-NEXT: cmp x9, #16 // =16 @@ -151,11 +151,62 @@ define @extract_nxv1i16_nxv6i16( %vec) noun ret %retval } +; Fixed length clamping + +define <2 x i64> @extract_fixed_v2i64_nxv2i64( %vec) nounwind #0 { +; CHECK-LABEL: extract_fixed_v2i64_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub x9, x9, #2 // =2 +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64( %vec, i64 2) + ret <2 x i64> %retval +} + +define <4 x i64> @extract_fixed_v4i64_nxv2i64( %vec) nounwind #0 { +; CHECK-LABEL: extract_fixed_v4i64_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: subs x9, x9, #4 // =4 +; CHECK-NEXT: csel x9, xzr, x9, lo +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w10, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: ptrue p1.d, vl4 +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x10, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p1, [x8] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64( %vec, i64 4) + ret <4 x i64> %retval +} + +attributes #0 = { vscale_range(2,2) } declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(, i64) declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(, i64) declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(, i64) declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(, i64) +declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(, i64) + declare @llvm.experimental.vector.extract.nxv1i32.nxv4i32(, i64) declare @llvm.experimental.vector.extract.nxv1i16.nxv6i16(, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index 2816e97e09865..669c65e1e4a89 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -23,7 +23,7 @@ define @insert_v2i64_nxv2i64_idx2( %vec, <2 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #2 // =2 ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: cmp x9, #2 // =2 ; CHECK-NEXT: csel x8, x9, x8, lo @@ -62,7 +62,7 @@ define @insert_v4i32_nxv4i32_idx4( %vec, <4 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntw x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #4 // =4 ; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: cmp x9, #4 // =4 ; CHECK-NEXT: csel x8, x9, x8, lo @@ -101,7 +101,7 @@ define @insert_v8i16_nxv8i16_idx8( %vec, <8 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cnth x9 -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #8 // =8 ; CHECK-NEXT: mov w8, #8 ; CHECK-NEXT: cmp x9, #8 // =8 ; CHECK-NEXT: csel x8, x9, x8, lo @@ -140,7 +140,7 @@ define @insert_v16i8_nxv16i8_idx16( %vec, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: sub x9, x9, #16 // =16 ; CHECK-NEXT: mov w8, #16 ; CHECK-NEXT: cmp x9, #16 // =16 ; CHECK-NEXT: ptrue p0.b @@ -299,12 +299,66 @@ entry: ret %retval } +; Fixed length clamping + +define @insert_fixed_v2i64_nxv2i64( %vec, <2 x i64> %subvec) nounwind #0 { +; CHECK-LABEL: insert_fixed_v2i64_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub x9, x9, #2 // =2 +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.insert.nxv2i64.v2i64( %vec, <2 x i64> %subvec, i64 2) + ret %retval +} + +define @insert_fixed_v4i64_nxv2i64( %vec, <4 x i64>* %ptr) nounwind #0 { +; CHECK-LABEL: insert_fixed_v4i64_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: subs x8, x8, #4 // =4 +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: cmp x8, #4 // =4 +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1d { z0.d }, p1, [sp] +; CHECK-NEXT: st1d { z1.d }, p0, [x9, x8, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p1/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %subvec = load <4 x i64>, <4 x i64>* %ptr + %retval = call @llvm.experimental.vector.insert.nxv2i64.v4i64( %vec, <4 x i64> %subvec, i64 4) + ret %retval +} + +attributes #0 = { vscale_range(2,2) } declare @llvm.experimental.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) declare @llvm.experimental.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) declare @llvm.experimental.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) declare @llvm.experimental.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) +declare @llvm.experimental.vector.insert.nxv2i64.v4i64(, <4 x i64>, i64) + declare @llvm.experimental.vector.insert.nxv8i64.nxv16i64(, , i64) declare @llvm.experimental.vector.insert.v2i64.nxv16i64(, <2 x i64>, i64) declare @llvm.experimental.vector.insert.nxv4i32.nxv1i32(, , i64) From dd4d3f74062c64d57a5881dceac028ec6c231b73 Mon Sep 17 00:00:00 2001 From: Peter Smith Date: Fri, 25 Jun 2021 10:52:15 +0100 Subject: [PATCH 281/619] [LLD][ELF][ARM] Fix case of patched unrelocated BLX There are a couple of problems with the code to patch unrelocated BLX instructions: 1. The calculation of the PC needs to take into account the alignment of the instruction. The Thumb BLX uses alignDown(PC, 4) for the source address. 2. The calculation of the PC bias is hard-coded to 4 which works for Thumb, but when there is a BLX the branch will be in Arm state so it needs an 8 byte PC bias. No asssembler generates an unrelocated BLX instruction so these problems do not affect real world programs. However we should still fix them. Differential Revision: https://reviews.llvm.org/D104905 --- lld/ELF/ARMErrataFix.cpp | 15 ++++++++++++++- lld/test/ELF/arm-fix-cortex-a8-blx.s | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp index 86b822f02fd55..77623780ffa5a 100644 --- a/lld/ELF/ARMErrataFix.cpp +++ b/lld/ELF/ARMErrataFix.cpp @@ -164,6 +164,15 @@ static uint64_t getThumbDestAddr(uint64_t sourceAddr, uint32_t instr) { offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP24); else offset = target->getImplicitAddend(buf, R_ARM_THM_CALL); + // A BLX instruction from Thumb to Arm may have an address that is + // not 4-byte aligned. As Arm instructions are always 4-byte aligned + // the instruction is calculated (from Arm ARM): + // targetAddress = Align(PC, 4) + imm32 + // where + // Align(x, y) = y * (x Div y) + // which corresponds to alignDown. + if (isBLX(instr)) + sourceAddr = alignDown(sourceAddr, 4); return sourceAddr + offset + 4; } @@ -185,7 +194,11 @@ void Patch657417Section::writeTo(uint8_t *buf) { // We cannot use the instruction in the patchee section as this will have // been altered to point to us! uint64_t s = getThumbDestAddr(getBranchAddr(), instr); - uint64_t p = getVA(4); + // A BLX changes the state of the branch in the patch to Arm state, which + // has a PC Bias of 8, whereas in all other cases the branch is in Thumb + // state with a PC Bias of 4. + uint64_t pcBias = isBLX(instr) ? 8 : 4; + uint64_t p = getVA(pcBias); target->relocateNoSym(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p); } diff --git a/lld/test/ELF/arm-fix-cortex-a8-blx.s b/lld/test/ELF/arm-fix-cortex-a8-blx.s index 6000f0f3dfcbf..10c9d2aeb4685 100644 --- a/lld/test/ELF/arm-fix-cortex-a8-blx.s +++ b/lld/test/ELF/arm-fix-cortex-a8-blx.s @@ -30,4 +30,4 @@ _start: // CHECK-PATCH: 21ffa: nop.w // CHECK-PATCH-NEXT: 21ffe: blx 0x22004 <__CortexA8657417_21FFE> // CHECK-PATCH: 00022004 <__CortexA8657417_21FFE>: -// CHECK-PATCH-NEXT: 22004: b 0x21004 <{{.+}}> @ imm = #-4104 +// CHECK-PATCH-NEXT: 22004: b 0x21000 <_start> From e585b332e42321870b2cf1eeefcd68b0717570e2 Mon Sep 17 00:00:00 2001 From: alex-t Date: Wed, 30 Jun 2021 15:48:02 +0300 Subject: [PATCH 282/619] [AMDGPU] PHI node cost should not be counted for the size and latency. Details: https://reviews.llvm.org/D96805 changed the GCNTTIImpl::getCFInstrCost to return 1 for the PHI nodes for the TTI::TCK_CodeSize and TTI::TCK_SizeAndLatency. This is incorrect because the value moves that are the result of the PHI lowering are inserted into the basic block predecessors - not into the block itself. As a result of this change LoopRotate and LoopUnroll were broken because of the incorrect Loop header and loop body size/cost estimation. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D105104 --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 4 ---- llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index fe138fd7bf326..67886b9681aeb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -837,10 +837,6 @@ InstructionCost GCNTTIImpl::getCFInstrCost(unsigned Opcode, } case Instruction::Ret: return SCost ? 1 : 10; - case Instruction::PHI: - // TODO: 1. A prediction phi won't be eliminated? - // 2. Estimate data copy instructions in this case. - return 1; } return BaseT::getCFInstrCost(Opcode, CostKind, I); } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll b/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll index ef469ccb48278..88c4956f3fc0e 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/control-flow.ll @@ -8,7 +8,7 @@ ; SPEED: estimated cost of 10 for instruction: ret void ; SIZE: estimated cost of 5 for instruction: br i1 ; SIZE: estimated cost of 1 for instruction: br label -; SIZE: estimated cost of 1 for instruction: %phi = phi i32 [ +; SIZE: estimated cost of 0 for instruction: %phi = phi i32 [ ; SIZE: estimated cost of 1 for instruction: ret void define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { bb0: From fc1cb3104b6bde0cbba8e92ec97878076c39737d Mon Sep 17 00:00:00 2001 From: Peter Smith Date: Fri, 25 Jun 2021 11:39:47 +0100 Subject: [PATCH 283/619] [LLD][ELF][ARM] Tidy up test to hook up missing filecheck patterns [NFC] A couple of filecheck patterns had not been hooked up with the patterns suffering from some drift. As this test is old and llvm-objdump has improved a lot, take this opportunity to hide the instruction encoding. I've also taken out a lot of the explanatory comments that llvm-objdump improvements make redundant, as these comments oftern don't get updated when addresses change. Differential Revision: https://reviews.llvm.org/D104907 --- lld/test/ELF/arm-thumb-interwork-thunk.s | 451 +++++++++++------------ 1 file changed, 220 insertions(+), 231 deletions(-) diff --git a/lld/test/ELF/arm-thumb-interwork-thunk.s b/lld/test/ELF/arm-thumb-interwork-thunk.s index 5caf857f06ad4..e14ebc04f9c58 100644 --- a/lld/test/ELF/arm-thumb-interwork-thunk.s +++ b/lld/test/ELF/arm-thumb-interwork-thunk.s @@ -11,23 +11,23 @@ // RUN: .R_ARM_THM_JUMP_callee_2 : { *(.R_ARM_THM_JUMP_callee_high) } \ // RUN: .got.plt 0x18b4 : { } } " > %t.script // RUN: ld.lld --script %t.script %t -o %t2 -// RUN: llvm-objdump -d --triple=thumbv7a-none-linux-gnueabi %t2 | FileCheck --check-prefix=CHECK-THUMB --check-prefix=CHECK-ABS-THUMB %s -// RUN: llvm-objdump -d --triple=armv7a-none-linux-gnueabi %t2 | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM-ABS-ARM %s +// RUN: llvm-objdump -d --no-show-raw-insn --triple=thumbv7a-none-linux-gnueabi %t2 | FileCheck --check-prefix=CHECK-THUMB --check-prefix=CHECK-ABS-THUMB %s +// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-linux-gnueabi %t2 | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM-ABS-ARM %s // RUN: ld.lld --script %t.script %t -pie -o %t3 +// RUN: llvm-objdump -d --no-show-raw-insn --triple=thumbv7a-none-linux-gnueabi %t3 | FileCheck --check-prefix=CHECK-THUMB --check-prefix=CHECK-PI-THUMB %s +// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-linux-gnueabi %t3 | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK-PI-ARM %s // RUN: ld.lld --script %t.script %t --shared -o %t4 -// RUN: llvm-objdump -d --triple=thumbv7a-none-linux-gnueabi %t3 | FileCheck --check-prefix=CHECK-THUMB --check-prefix=CHECK-PI-THUMB %s -// RUN: llvm-objdump -d --triple=armv7a-none-linux-gnueabi %t3 | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK-PI-ARM %s // RUN: llvm-readobj -S -r %t4 | FileCheck -check-prefix=CHECK-DSO-REL %s - -// Test ARM Thumb Interworking -// The file is linked and checked 3 times to check the following contexts -// - Absolute executables, absolute Thunks are used. -// - Position independent executables, position independent Thunks are used. -// - Shared object, position independent Thunks to PLT entries are used. +// RUN: llvm-objdump -d --no-show-raw-insn --triple=armv7a-none-linux-gnueabi %t4 | FileCheck -check-prefix=CHECK-ARM-PLT %s +/// Test ARM Thumb Interworking +/// The file is linked and checked 3 times to check the following contexts +/// - Absolute executables, absolute Thunks are used. +/// - Position independent executables, position independent Thunks are used. +/// - Shared object, position independent Thunks to PLT entries are used. .syntax unified -// Target Sections for thunks at a lower address than the callers. +/// Target Sections for thunks at a lower address than the callers. .section .R_ARM_JUMP24_callee_low, "ax", %progbits .thumb .balign 0x1000 @@ -39,7 +39,7 @@ thumb_callee1: // CHECK-THUMB: Disassembly of section .R_ARM_JUMP24_callee_1: // CHECK-THUMB-EMPTY: // CHECK-THUMB: : -// CHECK-THUMB: 1000: 70 47 bx +// CHECK-THUMB: 1000: bx lr .section .R_ARM_THM_JUMP_callee_low, "ax", %progbits .arm .balign 0x100 @@ -49,30 +49,30 @@ arm_callee1: bx lr // Disassembly of section .R_ARM_THM_JUMP_callee_1: // CHECK-ARM: : -// CHECK-ARM-NEXT: 1100: 1e ff 2f e1 bx lr +// CHECK-ARM-NEXT: 1100: bx lr - // Calling sections - // At present ARM and Thumb interworking thunks are always added to the calling - // section. +/// Calling sections +/// At present ARM and Thumb interworking thunks are always added to the calling +/// section. .section .arm_caller, "ax", %progbits .arm .balign 0x100 .globl arm_caller .type arm_caller, %function arm_caller: - // If target supports BLX and target is in range we don't need an - // interworking thunk for a BL or BLX instruction. +/// If target supports BLX and target is in range we don't need an +/// interworking thunk for a BL or BLX instruction. bl thumb_callee1 blx thumb_callee1 - // A B instruction can't be transformed into a BLX and needs an interworking - // thunk +/// A B instruction can't be transformed into a BLX and needs an interworking +/// thunk. b thumb_callee1 - // As long as the thunk is in range it can be reused +/// As long as the thunk is in range it can be reused. b thumb_callee1 - // There can be more than one thunk associated with a section +/// There can be more than one thunk associated with a section. b thumb_callee2 b thumb_callee3 - // In range ARM targets do not require interworking thunks +/// In range ARM targets do not require interworking thunks. b arm_callee1 beq arm_callee2 bne arm_callee3 @@ -80,88 +80,75 @@ arm_caller: // CHECK-ARM-ABS-ARM: Disassembly of section .arm_caller: // CHECK-ARM-ABS-ARM-EMPTY: // CHECK-ARM-ABS-ARM-NEXT: : -// CHECK-ARM-ABS-ARM-NEXT: 1300: 3e ff ff fa blx 0x1000 -// CHECK-ARM-ABS-ARM-NEXT: 1304: 3d ff ff fa blx 0x1000 -// CHECK-ARM-ABS-ARM-NEXT: 1308: 06 00 00 ea b 0x1328 <__ARMv7ABSLongThunk_thumb_callee1> -// CHECK-ARM-ABS-ARM-NEXT: 130c: 05 00 00 ea b 0x1328 <__ARMv7ABSLongThunk_thumb_callee1> -// CHECK-ARM-ABS-ARM-NEXT: 1310: 07 00 00 ea b 0x1334 <__ARMv7ABSLongThunk_thumb_callee2> -// CHECK-ARM-ABS-ARM-NEXT: 1314: 09 00 00 ea b 0x1340 <__ARMv7ABSLongThunk_thumb_callee3> -// CHECK-ARM-ABS-ARM-NEXT: 1318: 78 ff ff ea b 0x1100 -// CHECK-ARM-ABS-ARM-NEXT: 131c: b7 00 00 0a beq 0x1600 -// CHECK-ARM-ABS-ARM-NEXT: 1320: b7 00 00 1a bne 0x1604 -// CHECK-ARM-ABS-ARM-NEXT: 1324: 1e ff 2f e1 bx lr +// CHECK-ARM-ABS-ARM-NEXT: 1300: blx 0x1000 +// CHECK-ARM-ABS-ARM-NEXT: 1304: blx 0x1000 +// CHECK-ARM-ABS-ARM-NEXT: 1308: b 0x1328 <__ARMv7ABSLongThunk_thumb_callee1> +// CHECK-ARM-ABS-ARM-NEXT: 130c: b 0x1328 <__ARMv7ABSLongThunk_thumb_callee1> +// CHECK-ARM-ABS-ARM-NEXT: 1310: b 0x1334 <__ARMv7ABSLongThunk_thumb_callee2> +// CHECK-ARM-ABS-ARM-NEXT: 1314: b 0x1340 <__ARMv7ABSLongThunk_thumb_callee3> +// CHECK-ARM-ABS-ARM-NEXT: 1318: b 0x1100 +// CHECK-ARM-ABS-ARM-NEXT: 131c: beq 0x1600 +// CHECK-ARM-ABS-ARM-NEXT: 1320: bne 0x1604 +// CHECK-ARM-ABS-ARM-NEXT: 1324: bx lr // CHECK-ARM-ABS-ARM: <__ARMv7ABSLongThunk_thumb_callee1>: // 0x1001 = thumb_callee1 -// CHECK-ARM-ABS-ARM-NEXT: 1328: 01 c0 01 e3 movw r12, #4097 -// CHECK-ARM-ABS-ARM-NEXT: 132c: 00 c0 40 e3 movt r12, #0 -// CHECK-ARM-ABS-ARM-NEXT: 1330: 1c ff 2f e1 bx r12 +// CHECK-ARM-ABS-ARM-NEXT: 1328: movw r12, #4097 +// CHECK-ARM-ABS-ARM-NEXT: 132c: movt r12, #0 +// CHECK-ARM-ABS-ARM-NEXT: 1330: bx r12 // 0x1501 = thumb_callee2 // CHECK-ARM-ABS-ARM: <__ARMv7ABSLongThunk_thumb_callee2>: -// CHECK-ARM-ABS-ARM-NEXT: 1334: 01 c5 01 e3 movw r12, #5377 -// CHECK-ARM-ABS-ARM-NEXT: 1338: 00 c0 40 e3 movt r12, #0 -// CHECK-ARM-ABS-ARM-NEXT: 133c: 1c ff 2f e1 bx r12 +// CHECK-ARM-ABS-ARM-NEXT: 1334: movw r12, #5377 +// CHECK-ARM-ABS-ARM-NEXT: 1338: movt r12, #0 +// CHECK-ARM-ABS-ARM-NEXT: 133c: bx r12 // 0x1503 = thumb_callee3 // CHECK-ARM-ABS-ARM: <__ARMv7ABSLongThunk_thumb_callee3>: -// CHECK-ARM-ABS-ARM-NEXT: 1340: 03 c5 01 e3 movw r12, #5379 -// CHECK-ARM-ABS-ARM-NEXT: 1344: 00 c0 40 e3 movt r12, #0 -// CHECK-ARM-ABS-ARM-NEXT: 1348: 1c ff 2f e1 bx r12 +// CHECK-ARM-ABS-ARM-NEXT: 1340: movw r12, #5379 +// CHECK-ARM-ABS-ARM-NEXT: 1344: movt r12, #0 +// CHECK-ARM-ABS-ARM-NEXT: 1348: bx r12 // CHECK-PI-ARM: Disassembly of section .arm_caller: // CHECK-PI-ARM-EMPTY: // CHECK-PI-ARM-NEXT: : -// CHECK-PI-ARM-NEXT: 1300: 3e ff ff fa blx 0x1000 -// CHECK-PI-ARM-NEXT: 1304: 3d ff ff fa blx 0x1000 -// CHECK-PI-ARM-NEXT: 1308: 06 00 00 ea b 0x1328 <__ARMV7PILongThunk_thumb_callee1> -// CHECK-PI-ARM-NEXT: 130c: 05 00 00 ea b 0x1328 <__ARMV7PILongThunk_thumb_callee1> -// CHECK-PI-ARM-NEXT: 1310: 08 00 00 ea b 0x1338 <__ARMV7PILongThunk_thumb_callee2> -// CHECK-PI-ARM-NEXT: 1314: 0b 00 00 ea b 0x1348 <__ARMV7PILongThunk_thumb_callee3> -// CHECK-PI-ARM-NEXT: 1318: 78 ff ff ea b 0x1100 -// CHECK-PI-ARM-NEXT: 131c: b7 00 00 0a beq 0x1600 -// CHECK-PI-ARM-NEXT: 1320: b7 00 00 1a bne 0x1604 -// CHECK-PI-ARM-NEXT: 1324: 1e ff 2f e1 bx lr +// CHECK-PI-ARM-NEXT: 1300: blx 0x1000 +// CHECK-PI-ARM-NEXT: 1304: blx 0x1000 +// CHECK-PI-ARM-NEXT: 1308: b 0x1328 <__ARMV7PILongThunk_thumb_callee1> +// CHECK-PI-ARM-NEXT: 130c: b 0x1328 <__ARMV7PILongThunk_thumb_callee1> +// CHECK-PI-ARM-NEXT: 1310: b 0x1338 <__ARMV7PILongThunk_thumb_callee2> +// CHECK-PI-ARM-NEXT: 1314: b 0x1348 <__ARMV7PILongThunk_thumb_callee3> +// CHECK-PI-ARM-NEXT: 1318: b 0x1100 +// CHECK-PI-ARM-NEXT: 131c: beq 0x1600 +// CHECK-PI-ARM-NEXT: 1320: bne 0x1604 +// CHECK-PI-ARM-NEXT: 1324: bx lr // CHECK-PI-ARM: <__ARMV7PILongThunk_thumb_callee1>: -// 0x1330 + 8 - 0x337 = 0x1001 = thumb_callee1 -// CHECK-PI-ARM-NEXT: 1328: c9 cc 0f e3 movw r12, #64713 -// CHECK-PI-ARM-NEXT: 132c: ff cf 4f e3 movt r12, #65535 -// CHECK-PI-ARM-NEXT: 1330: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-NEXT: 1334: 1c ff 2f e1 bx r12 +// CHECK-PI-ARM-NEXT: 1328: movw r12, #64713 +// CHECK-PI-ARM-NEXT: 132c: movt r12, #65535 +// CHECK-PI-ARM-NEXT: 1330: add r12, r12, pc +// CHECK-PI-ARM-NEXT: 1334: bx r12 // CHECK-PI-ARM: <__ARMV7PILongThunk_thumb_callee2>: - -// CHECK-PI-ARM-NEXT: 1338: b9 c1 00 e3 movw r12, #441 -// CHECK-PI-ARM-NEXT: 133c: 00 c0 40 e3 movt r12, #0 -// CHECK-PI-ARM-NEXT: 1340: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-NEXT: 1344: 1c ff 2f e1 bx r12 +// CHECK-PI-ARM-NEXT: 1338: movw r12, #441 +// CHECK-PI-ARM-NEXT: 133c: movt r12, #0 +// CHECK-PI-ARM-NEXT: 1340: add r12, r12, pc +// CHECK-PI-ARM-NEXT: 1344: bx r12 // CHECK-PI-ARM: <__ARMV7PILongThunk_thumb_callee3>: -// 0x1340 + 8 + 0x1b9 = 0x1501 -// CHECK-PI-ARM-NEXT: 1348: ab c1 00 e3 movw r12, #427 -// CHECK-PI-ARM-NEXT: 134c: 00 c0 40 e3 movt r12, #0 -// CHECK-PI-ARM-NEXT: 1350: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-NEXT: 1354: 1c ff 2f e1 bx r12 -// 1350 + 8 + 0x1ab = 0x1503 +// CHECK-PI-ARM-NEXT: 1348: movw r12, #427 +// CHECK-PI-ARM-NEXT: 134c: movt r12, #0 +// CHECK-PI-ARM-NEXT: 1350: add r12, r12, pc +// CHECK-PI-ARM-NEXT: 1354: bx r12 -// All PLT entries are ARM, no need for interworking thunks -// CHECK-PI-ARM-PLT: Disassembly of section .arm_caller: -// CHECK-PI-ARM-PLT-EMPTY: -// CHECK-PI-ARM-PLT-NEXT: arm_caller: -// 0x17e4 PLT(thumb_callee1) -// CHECK-PI-ARM-PLT-NEXT: 1300: 37 01 00 eb bl #1244 -// 0x17e4 PLT(thumb_callee1) -// CHECK-PI-ARM-PLT-NEXT: 1304: 36 01 00 eb bl #1240 -// 0x17e4 PLT(thumb_callee1) -// CHECK-PI-ARM-PLT-NEXT: 1308: 35 01 00 ea b #1236 -// 0x17e4 PLT(thumb_callee1) -// CHECK-PI-ARM-PLT-NEXT: 130c: 34 01 00 ea b #1232 -// 0x17f4 PLT(thumb_callee2) -// CHECK-PI-ARM-PLT-NEXT: 1310: 37 01 00 ea b #1244 -// 0x1804 PLT(thumb_callee3) -// CHECK-PI-ARM-PLT-NEXT: 1314: 3a 01 00 ea b #1256 -// 0x1814 PLT(arm_callee1) -// CHECK-PI-ARM-PLT-NEXT: 1318: 3d 01 00 ea b #1268 -// 0x1824 PLT(arm_callee2) -// CHECK-PI-ARM-PLT-NEXT: 131c: 40 01 00 0a beq #1280 -// 0x1834 PLT(arm_callee3) -// CHECK-PI-ARM-PLT-NEXT: 1320: 43 01 00 1a bne #1292 -// CHECK-PI-ARM-PLT-NEXT: 1324: 1e ff 2f e1 bx lr +/// All PLT entries are ARM, callers via PLT no need for interworking thunks. +// CHECK-ARM-PLT: Disassembly of section .arm_caller: +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: : +// CHECK-ARM-PLT-NEXT: 1300: bl 0x1650 +// CHECK-ARM-PLT-NEXT: 1304: bl 0x1650 +// CHECK-ARM-PLT-NEXT: 1308: b 0x1650 +// CHECK-ARM-PLT-NEXT: 130c: b 0x1650 +// CHECK-ARM-PLT-NEXT: 1310: b 0x1660 +// CHECK-ARM-PLT-NEXT: 1314: b 0x1670 +// CHECK-ARM-PLT-NEXT: 1318: b 0x1680 +// CHECK-ARM-PLT-NEXT: 131c: beq 0x1690 +// CHECK-ARM-PLT-NEXT: 1320: bne 0x16a0 +// CHECK-ARM-PLT-NEXT: 1324: bx lr .section .thumb_caller, "ax", %progbits .balign 0x100 @@ -169,115 +156,89 @@ arm_caller: .globl thumb_caller .type thumb_caller, %function thumb_caller: - // If target supports BLX and target is in range we don't need an - // interworking thunk for a BL or BLX instruction. +/// If target supports BLX and target is in range we don't need an +/// interworking thunk for a BL or BLX instruction. bl arm_callee1 blx arm_callee1 - // A B instruction can't be transformed into a BLX and needs an interworking - // thunk +/// A B instruction can't be transformed into a BLX and needs an interworking +/// thunk b.w arm_callee1 - // As long as the thunk is in range it can be reused +/// As long as the thunk is in range it can be reused b.w arm_callee2 - // There can be more than one thunk associated with a section +/// There can be more than one thunk associated with a section b.w arm_callee3 - // Conditional branches also require interworking thunks, they can use the - // same interworking thunks. +/// Conditional branches also require interworking thunks, they can use the +/// same interworking thunks. beq.w arm_callee1 beq.w arm_callee2 bne.w arm_callee3 // CHECK-ABS-THUMB: Disassembly of section .thumb_caller: // CHECK-ABS-THUMB-EMPTY: // CHECK-ABS-THUMB-NEXT: : -// CHECK-ABS-THUMB-NEXT: 1400: ff f7 7e ee blx 0x1100 -// CHECK-ABS-THUMB-NEXT: 1404: ff f7 7c ee blx 0x1100 -// CHECK-ABS-THUMB-NEXT: 1408: 00 f0 0a b8 b.w 0x1420 <__Thumbv7ABSLongThunk_arm_callee1> -// CHECK-ABS-THUMB-NEXT: 140c: 00 f0 0d b8 b.w 0x142a <__Thumbv7ABSLongThunk_arm_callee2> -// CHECK-ABS-THUMB-NEXT: 1410: 00 f0 10 b8 b.w 0x1434 <__Thumbv7ABSLongThunk_arm_callee3> -// CHECK-ABS-THUMB-NEXT: 1414: 00 f0 04 80 beq.w 0x1420 <__Thumbv7ABSLongThunk_arm_callee1> -// CHECK-ABS-THUMB-NEXT: 1418: 00 f0 07 80 beq.w 0x142a <__Thumbv7ABSLongThunk_arm_callee2> -// CHECK-ABS-THUMB-NEXT: 141c: 40 f0 0a 80 bne.w 0x1434 <__Thumbv7ABSLongThunk_arm_callee3> +// CHECK-ABS-THUMB-NEXT: 1400: blx 0x1100 +// CHECK-ABS-THUMB-NEXT: 1404: blx 0x1100 +// CHECK-ABS-THUMB-NEXT: 1408: b.w 0x1420 <__Thumbv7ABSLongThunk_arm_callee1> +// CHECK-ABS-THUMB-NEXT: 140c: b.w 0x142a <__Thumbv7ABSLongThunk_arm_callee2> +// CHECK-ABS-THUMB-NEXT: 1410: b.w 0x1434 <__Thumbv7ABSLongThunk_arm_callee3> +// CHECK-ABS-THUMB-NEXT: 1414: beq.w 0x1420 <__Thumbv7ABSLongThunk_arm_callee1> +// CHECK-ABS-THUMB-NEXT: 1418: beq.w 0x142a <__Thumbv7ABSLongThunk_arm_callee2> +// CHECK-ABS-THUMB-NEXT: 141c: bne.w 0x1434 <__Thumbv7ABSLongThunk_arm_callee3> // CHECK-ABS-THUMB: <__Thumbv7ABSLongThunk_arm_callee1>: -// 0x1100 = arm_callee1 -// CHECK-ABS-THUMB-NEXT: 1420: 41 f2 00 1c movw r12, #4352 -// CHECK-ABS-THUMB-NEXT: 1424: c0 f2 00 0c movt r12, #0 -// CHECK-ABS-THUMB-NEXT: 1428: 60 47 bx r12 +// CHECK-ABS-THUMB-NEXT: 1420: movw r12, #4352 +// CHECK-ABS-THUMB-NEXT: 1424: movt r12, #0 +// CHECK-ABS-THUMB-NEXT: 1428: bx r12 // CHECK-ABS-THUMB: <__Thumbv7ABSLongThunk_arm_callee2>: -// 0x1600 = arm_callee2 -// CHECK-ABS-THUMB-NEXT: 142a: 41 f2 00 6c movw r12, #5632 -// CHECK-ABS-THUMB-NEXT: 142e: c0 f2 00 0c movt r12, #0 -// CHECK-ABS-THUMB-NEXT: 1432: 60 47 bx r12 -// 0x1604 = arm_callee3 +// CHECK-ABS-THUMB-NEXT: 142a: movw r12, #5632 +// CHECK-ABS-THUMB-NEXT: 142e: movt r12, #0 +// CHECK-ABS-THUMB-NEXT: 1432: bx r12 // CHECK-ABS-THUMB: <__Thumbv7ABSLongThunk_arm_callee3>: -// CHECK-ABS-THUMB-NEXT: 1434: 41 f2 04 6c movw r12, #5636 -// CHECK-ABS-THUMB-NEXT: 1438: c0 f2 00 0c movt r12, #0 -// CHECK-ABS-THUMB-NEXT: 143c: 60 47 bx r12 +// CHECK-ABS-THUMB-NEXT: 1434: movw r12, #5636 +// CHECK-ABS-THUMB-NEXT: 1438: movt r12, #0 +// CHECK-ABS-THUMB-NEXT: 143c: bx r12 // CHECK-PI-THUMB: Disassembly of section .thumb_caller: // CHECK-PI-THUMB-EMPTY: // CHECK-PI-THUMB-NEXT: : -// CHECK-PI-THUMB-NEXT: 1400: ff f7 7e ee blx 0x1100 -// CHECK-PI-THUMB-NEXT: 1404: ff f7 7c ee blx 0x1100 -// CHECK-PI-THUMB-NEXT: 1408: 00 f0 0a b8 b.w 0x1420 <__ThumbV7PILongThunk_arm_callee1> -// CHECK-PI-THUMB-NEXT: 140c: 00 f0 0e b8 b.w 0x142c <__ThumbV7PILongThunk_arm_callee2> -// CHECK-PI-THUMB-NEXT: 1410: 00 f0 12 b8 b.w 0x1438 <__ThumbV7PILongThunk_arm_callee3> -// CHECK-PI-THUMB-NEXT: 1414: 00 f0 04 80 beq.w 0x1420 <__ThumbV7PILongThunk_arm_callee1> -// CHECK-PI-THUMB-NEXT: 1418: 00 f0 08 80 beq.w 0x142c <__ThumbV7PILongThunk_arm_callee2> -// CHECK-PI-THUMB-NEXT: 141c: 40 f0 0c 80 bne.w 0x1438 <__ThumbV7PILongThunk_arm_callee3> +// CHECK-PI-THUMB-NEXT: 1400: blx 0x1100 +// CHECK-PI-THUMB-NEXT: 1404: blx 0x1100 +// CHECK-PI-THUMB-NEXT: 1408: b.w 0x1420 <__ThumbV7PILongThunk_arm_callee1> +// CHECK-PI-THUMB-NEXT: 140c: b.w 0x142c <__ThumbV7PILongThunk_arm_callee2> +// CHECK-PI-THUMB-NEXT: 1410: b.w 0x1438 <__ThumbV7PILongThunk_arm_callee3> +// CHECK-PI-THUMB-NEXT: 1414: beq.w 0x1420 <__ThumbV7PILongThunk_arm_callee1> +// CHECK-PI-THUMB-NEXT: 1418: beq.w 0x142c <__ThumbV7PILongThunk_arm_callee2> +// CHECK-PI-THUMB-NEXT: 141c: bne.w 0x1438 <__ThumbV7PILongThunk_arm_callee3> // CHECK-PI-THUMB: <__ThumbV7PILongThunk_arm_callee1>: -// 0x1428 + 4 - 0x32c = 0x1100 = arm_callee1 -// CHECK-PI-THUMB-NEXT: 1420: 4f f6 d4 4c movw r12, #64724 -// CHECK-PI-THUMB-NEXT: 1424: cf f6 ff 7c movt r12, #65535 -// CHECK-PI-THUMB-NEXT: 1428: fc 44 add r12, pc -// CHECK-PI-THUMB-NEXT: 142a: 60 47 bx r12 +// CHECK-PI-THUMB-NEXT: 1420: movw r12, #64724 +// CHECK-PI-THUMB-NEXT: 1424: movt r12, #65535 +// CHECK-PI-THUMB-NEXT: 1428: add r12, pc +// CHECK-PI-THUMB-NEXT: 142a: bx r12 // CHECK-PI-THUMB: <__ThumbV7PILongThunk_arm_callee2>: -// 0x1434 + 4 + 0x1c8 = 0x1600 = arm_callee2 -// CHECK-PI-THUMB-NEXT: 142c: 40 f2 c8 1c movw r12, #456 -// CHECK-PI-THUMB-NEXT: 1430: c0 f2 00 0c movt r12, #0 -// CHECK-PI-THUMB-NEXT: 1434: fc 44 add r12, pc -// CHECK-PI-THUMB-NEXT: 1436: 60 47 bx r12 +// CHECK-PI-THUMB-NEXT: 142c: movw r12, #456 +// CHECK-PI-THUMB-NEXT: 1430: movt r12, #0 +// CHECK-PI-THUMB-NEXT: 1434: add r12, pc +// CHECK-PI-THUMB-NEXT: 1436: bx r12 // CHECK-PI-THUMB: <__ThumbV7PILongThunk_arm_callee3>: -// 0x1440 + 4 + 0x1c0 = 0x1604 = arm_callee3 -// CHECK-PI-THUMB-NEXT: 1438: 40 f2 c0 1c movw r12, #448 -// CHECK-PI-THUMB-NEXT: 143c: c0 f2 00 0c movt r12, #0 -// CHECK-PI-THUMB-NEXT: 1440: fc 44 add r12, pc -// CHECK-PI-THUMB-NEXT: 1442: 60 47 bx r12 +// CHECK-PI-THUMB-NEXT: 1438: movw r12, #448 +// CHECK-PI-THUMB-NEXT: 143c: movt r12, #0 +// CHECK-PI-THUMB-NEXT: 1440: add r12, pc +// CHECK-PI-THUMB-NEXT: 1442: bx r12 -// CHECK-PI-THUMB-PLT: Disassembly of section .arm_caller: -// CHECK-PI-THUMB-PLT-EMPTY: -// CHECK-PI-THUMB-PLT-NEXT: thumb_caller: -// 0x1400 + 4 + 0x410 = 0x1814 = PLT(arm_callee1) -// CHECK-PI-THUMB-PLT-NEXT: 1400: 00 f0 08 ea blx #1040 -// 0x1404 + 4 + 0x40c = 0x1814 = PLT(arm_callee1) -// CHECK-PI-THUMB-PLT-NEXT: 1404: 00 f0 06 ea blx #1036 -// 0x1408 + 4 + 0x14 = 0x1420 = IWV(PLT(arm_callee1) -// CHECK-PI-THUMB-PLT-NEXT: 1408: 00 f0 0a b8 b.w #20 -// 0x140c + 4 + 0x1c = 0x142c = IWV(PLT(arm_callee2) -// CHECK-PI-THUMB-PLT-NEXT: 140c: 00 f0 0e b8 b.w #28 -// 0x1410 + 4 + 0x24 = 0x1438 = IWV(PLT(arm_callee3) -// CHECK-PI-THUMB-PLT-NEXT: 1410: 00 f0 12 b8 b.w #36 -// 0x1414 + 4 + 8 = 0x1420 = IWV(PLT(arm_callee1) -// CHECK-PI-THUMB-PLT-NEXT: 1414: 00 f0 04 80 beq.w #8 -// 0x1418 + 4 + 0x10 = 0x142c = IWV(PLT(arm_callee2) -// CHECK-PI-THUMB-PLT-NEXT: 1418: 00 f0 08 80 beq.w #16 -// 0x141c + 4 + 0x18 = 0x1438 = IWV(PLT(arm_callee3) -// CHECK-PI-THUMB-PLT-NEXT: 141c: 40 f0 0c 80 bne.w #24 -// 0x1428 + 4 + 0x3e8 = 0x1814 = PLT(arm_callee1) -// CHECK-PI-THUMB-PLT-NEXT: 1420: 40 f2 e8 3c movw r12, #1000 -// CHECK-PI-THUMB-PLT-NEXT: 1424: c0 f2 00 0c movt r12, #0 -// CHECK-PI-THUMB-PLT-NEXT: 1428: fc 44 add r12, pc -// CHECK-PI-THUMB-PLT-NEXT: 142a: 60 47 bx r12 -// 0x1434 + 4 + 0x3ec = 0x1824 = PLT(arm_callee2) -// CHECK-PI-THUMB-PLT-NEXT: 142c: 40 f2 ec 3c movw r12, #1004 -// CHECK-PI-THUMB-PLT-NEXT: 1430: c0 f2 00 0c movt r12, #0 -// CHECK-PI-THUMB-PLT-NEXT: 1434: fc 44 add r12, pc -// CHECK-PI-THUMB-PLT-NEXT: 1436: 60 47 bx r12 -// 0x1440 + 4 + 0x3f0 = 0x1834 = PLT(arm_callee3) -// CHECK-PI-THUMB-PLT-NEXT: 1438: 40 f2 f0 3c movw r12, #1008 -// CHECK-PI-THUMB-PLT-NEXT: 143c: c0 f2 00 0c movt r12, #0 -// CHECK-PI-THUMB-PLT-NEXT: 1440: fc 44 add r12, pc -// CHECK-PI-THUMB-PLT-NEXT: 1442: 60 47 bx r12 +/// Thumb calls need to change state to reach PLT +/// bl can change to blx to PLT entries, branches +/// need a state change thunk. +// CHECK-ARM-PLT: Disassembly of section .thumb_caller: +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: : +// CHECK-ARM-PLT-NEXT: 1400: blx 0x1680 +// CHECK-ARM-PLT-NEXT: 1404: blx 0x1680 +// CHECK-ARM-PLT-NEXT: 1408: b.w 0x1420 <__ThumbV7PILongThunk_arm_callee1> +// CHECK-ARM-PLT-NEXT: 140c: b.w 0x142c <__ThumbV7PILongThunk_arm_callee2> +// CHECK-ARM-PLT-NEXT: 1410: b.w 0x1438 <__ThumbV7PILongThunk_arm_callee3> +// CHECK-ARM-PLT-NEXT: 1414: beq.w 0x1420 <__ThumbV7PILongThunk_arm_callee1> +// CHECK-ARM-PLT-NEXT: 1418: beq.w 0x142c <__ThumbV7PILongThunk_arm_callee2> +// CHECK-ARM-PLT-NEXT: 141c: bne.w 0x1438 <__ThumbV7PILongThunk_arm_callee3> -// Target Sections for thunks at a higher address than the callers. +/// Target Sections for thunks at a higher address than the callers. .section .R_ARM_JUMP24_callee_high, "ax", %progbits .thumb .balign 0x100 @@ -293,9 +254,9 @@ thumb_callee3: // CHECK-THUMB: Disassembly of section .R_ARM_JUMP24_callee_2: // CHECK-THUMB-EMPTY: // CHECK-THUMB-NEXT: : -// CHECK-THUMB-NEXT: 1500: 70 47 bx lr +// CHECK-THUMB-NEXT: 1500: bx lr // CHECK-THUMB: : -// CHECK-THUMB-NEXT: 1502: 70 47 bx lr +// CHECK-THUMB-NEXT: 1502: bx lr .section .R_ARM_THM_JUMP_callee_high, "ax", %progbits .arm @@ -311,11 +272,11 @@ arm_callee3: // CHECK-ARM: Disassembly of section .R_ARM_THM_JUMP_callee_2: // CHECK-ARM-EMPTY: // CHECK-ARM-NEXT: : -// CHECK-ARM-NEXT: 1600: 1e ff 2f e1 bx lr +// CHECK-ARM-NEXT: 1600: bx lr // CHECK-ARM: : -// CHECK-ARM-NEXT: 1604: 1e ff 2f e1 bx lr +// CHECK-ARM-NEXT: 1604: bx lr -// _start section just calls the arm and thumb calling sections +/// _start section just calls the arm and thumb calling sections .text .arm .globl _start @@ -326,55 +287,83 @@ _start: bl thumb_caller bx lr - -// CHECK-PI-ARM-PLT: Disassembly of section .plt: -// CHECK-PI-ARM-PLT-EMPTY: -// CHECK-PI-ARM-PLT-NEXT: .plt: -// CHECK-PI-ARM-PLT-NEXT: 17b0: 04 e0 2d e5 str lr, [sp, #-4]! -// CHECK-PI-ARM-PLT-NEXT: 17b4: 04 e0 9f e5 ldr lr, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 17b8: 0e e0 8f e0 add lr, pc, lr -// CHECK-PI-ARM-PLT-NEXT: 17bc: 08 f0 be e5 ldr pc, [lr, #8]! -// CHECK-PI-ARM-PLT-NEXT: 17c0: d4 00 00 00 -// 0x17c8 + 8 + 0xd0 = 0x18a0 arm_caller -// CHECK-PI-ARM-PLT-NEXT: 17c4: 04 c0 9f e5 ldr r12, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 17c8: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-PLT-NEXT: 17cc: 00 f0 9c e5 ldr pc, [r12] -// CHECK-PI-ARM-PLT-NEXT: 17d0: d0 00 00 00 -// 0x17d8 + 8 + 0xc4 = 0x18a4 thumb_caller -// CHECK-PI-ARM-PLT-NEXT: 17d4: 04 c0 9f e5 ldr r12, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 17d8: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-PLT-NEXT: 17dc: 00 f0 9c e5 ldr pc, [r12] -// CHECK-PI-ARM-PLT-NEXT: 17e0: c4 00 00 00 -// 0x17e8 + 8 + 0xb8 = 0x18a8 thumb_callee1 -// CHECK-PI-ARM-PLT-NEXT: 17e4: 04 c0 9f e5 ldr r12, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 17e8: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-PLT-NEXT: 17ec: 00 f0 9c e5 ldr pc, [r12] -// CHECK-PI-ARM-PLT-NEXT: 17f0: b8 00 00 00 -// 0x17f8 + 8 + 0xac = 0x18ac thumb_callee2 -// CHECK-PI-ARM-PLT-NEXT: 17f4: 04 c0 9f e5 ldr r12, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 17f8: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-PLT-NEXT: 17fc: 00 f0 9c e5 ldr pc, [r12] -// CHECK-PI-ARM-PLT-NEXT: 1800: ac 00 00 00 -// 0x1808 + 8 + 0xa0 = 0x18b0 thumb_callee3 -// CHECK-PI-ARM-PLT-NEXT: 1804: 04 c0 9f e5 ldr r12, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 1808: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-PLT-NEXT: 180c: 00 f0 9c e5 ldr pc, [r12] -// CHECK-PI-ARM-PLT-NEXT: 1810: a0 00 00 00 -// 0x1818 + 8 + 0x94 = 0x18b4 arm_callee1 -// CHECK-PI-ARM-PLT-NEXT: 1814: 04 c0 9f e5 ldr r12, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 1818: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-PLT-NEXT: 181c: 00 f0 9c e5 ldr pc, [r12] -// CHECK-PI-ARM-PLT-NEXT: 1820: 94 00 00 00 -// 0x1828 + 8 + 0x88 = 0x18b8 arm_callee2 -// CHECK-PI-ARM-PLT-NEXT: 1824: 04 c0 9f e5 ldr r12, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 1828: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-PLT-NEXT: 182c: 00 f0 9c e5 ldr pc, [r12] -// CHECK-PI-ARM-PLT-NEXT: 1830: 88 00 00 00 -// 0x1838 + 8 + 0x7c = 0x18bc arm_callee3 -// CHECK-PI-ARM-PLT-NEXT: 1834: 04 c0 9f e5 ldr r12, [pc, #4] -// CHECK-PI-ARM-PLT-NEXT: 1838: 0f c0 8c e0 add r12, r12, pc -// CHECK-PI-ARM-PLT-NEXT: 183c: 00 f0 9c e5 ldr pc, [r12] -// CHECK-PI-ARM-PLT-NEXT: 1840: 7c 00 00 00 +// CHECK-ARM-PLT: Disassembly of section .plt: +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001610 <$a>: +// CHECK-ARM-PLT-NEXT: 1610: str lr, [sp, #-4]! +// CHECK-ARM-PLT-NEXT: 1614: add lr, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 1618: add lr, lr, #0, #20 +// CHECK-ARM-PLT-NEXT: 161c: ldr pc, [lr, #672]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001620 <$d>: +// CHECK-ARM-PLT-NEXT: 1620: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-NEXT: 1624: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-NEXT: 1628: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-NEXT: 162c: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001630 <$a>: +// CHECK-ARM-PLT-NEXT: 1630: add r12, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 1634: add r12, r12, #0, #20 +// CHECK-ARM-PLT-NEXT: 1638: ldr pc, [r12, #648]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 0000163c <$d>: +// CHECK-ARM-PLT-NEXT: 163c: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001640 <$a>: +// CHECK-ARM-PLT-NEXT: 1640: add r12, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 1644: add r12, r12, #0, #20 +// CHECK-ARM-PLT-NEXT: 1648: ldr pc, [r12, #636]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 0000164c <$d>: +// CHECK-ARM-PLT-NEXT: 164c: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001650 <$a>: +// CHECK-ARM-PLT-NEXT: 1650: add r12, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 1654: add r12, r12, #0, #20 +// CHECK-ARM-PLT-NEXT: 1658: ldr pc, [r12, #624]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 0000165c <$d>: +// CHECK-ARM-PLT-NEXT: 165c: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001660 <$a>: +// CHECK-ARM-PLT-NEXT: 1660: add r12, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 1664: add r12, r12, #0, #20 +// CHECK-ARM-PLT-NEXT: 1668: ldr pc, [r12, #612]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 0000166c <$d>: +// CHECK-ARM-PLT-NEXT: 166c: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001670 <$a>: +// CHECK-ARM-PLT-NEXT: 1670: add r12, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 1674: add r12, r12, #0, #20 +// CHECK-ARM-PLT-NEXT: 1678: ldr pc, [r12, #600]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 0000167c <$d>: +// CHECK-ARM-PLT-NEXT: 167c: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001680 <$a>: +// CHECK-ARM-PLT-NEXT: 1680: add r12, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 1684: add r12, r12, #0, #20 +// CHECK-ARM-PLT-NEXT: 1688: ldr pc, [r12, #588]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 0000168c <$d>: +// CHECK-ARM-PLT-NEXT: 168c: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 00001690 <$a>: +// CHECK-ARM-PLT-NEXT: 1690: add r12, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 1694: add r12, r12, #0, #20 +// CHECK-ARM-PLT-NEXT: 1698: ldr pc, [r12, #576]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 0000169c <$d>: +// CHECK-ARM-PLT-NEXT: 169c: d4 d4 d4 d4 .word 0xd4d4d4d4 +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 000016a0 <$a>: +// CHECK-ARM-PLT-NEXT: 16a0: add r12, pc, #0, #12 +// CHECK-ARM-PLT-NEXT: 16a4: add r12, r12, #0, #20 +// CHECK-ARM-PLT-NEXT: 16a8: ldr pc, [r12, #564]! +// CHECK-ARM-PLT-EMPTY: +// CHECK-ARM-PLT-NEXT: 000016ac <$d>: +// CHECK-ARM-PLT-NEXT: 16ac: d4 d4 d4 d4 .word 0xd4d4d4d4 // CHECK-DSO-REL: 0x18C0 R_ARM_JUMP_SLOT arm_caller // CHECK-DSO-REL-NEXT: 0x18C4 R_ARM_JUMP_SLOT thumb_caller From 8db9cb262fa159b2f8742d5652aec45d2c3713b2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 30 Jun 2021 13:28:09 +0100 Subject: [PATCH 284/619] [Matrix] Add tests for hoisting address computations. --- .../multiply-fused-dominance.ll | 300 ++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll new file mode 100644 index 0000000000000..567b66002b322 --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll @@ -0,0 +1,300 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -lower-matrix-intrinsics -fuse-matrix-use-loops=false -fuse-matrix-tile-size=1 -matrix-allow-contract -force-fuse-matrix -instcombine -verify-dom-info %s -S | FileCheck %s +; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-use-loops=false -fuse-matrix-tile-size=1 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s + +; REQUIRES: aarch64-registered-target + +target datalayout = "e-m:o-i64:64-f80:128-n8:4:32:64-S128" +target triple = "aarch64-apple-ios" + +define void @multiply_can_hoist_cast(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) { +; CHECK-LABEL: @multiply_can_hoist_cast( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast [4 x double]* [[C:%.*]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = load <4 x double>, <4 x double>* %A, align 8 + %b = load <4 x double>, <4 x double>* %B, align 8 + %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) + %c.cast = bitcast [4 x double]* %C to <4 x double>* + store <4 x double> %c, <4 x double>* %c.cast, align 8 + ret void +} + +define void @multiply_can_hoist_multiple_insts(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) { +; CHECK-LABEL: @multiply_can_hoist_multiple_insts( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [4 x double], [4 x double]* [[C:%.*]], i64 2, i64 0 +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[TMP4]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 2, i64 2 +; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = load <4 x double>, <4 x double>* %A, align 8 + %b = load <4 x double>, <4 x double>* %B, align 8 + %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) + %gep = getelementptr [4 x double], [4 x double]* %C, i32 2 + %c.cast = bitcast [4 x double]* %gep to <4 x double>* + store <4 x double> %c, <4 x double>* %c.cast, align 8 + ret void +} + +; Make sure the correct instruction order is preserved when hoisting. +define void @multiply_can_hoist_multiple_insts2(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) { +; CHECK-LABEL: @multiply_can_hoist_multiple_insts2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [4 x double], [4 x double]* [[C:%.*]], i64 42, i64 0 +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[TMP4]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 42, i64 2 +; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = load <4 x double>, <4 x double>* %A, align 8 + %b = load <4 x double>, <4 x double>* %B, align 8 + %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) + %c.cast = bitcast [4 x double]* %C to <4 x double>* + %off.0 = add i32 10, 10 + %off.1 = add i32 %off.0, 2 + %off.2 = add i32 %off.0, %off.1 + %gep.1 = getelementptr <4 x double>, <4 x double>* %c.cast, i32 %off.2 + store <4 x double> %c, <4 x double>* %gep.1, align 8 + ret void +} + +define void @multiply_dont_hoist_phi(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) { +; CHECK-LABEL: @multiply_dont_hoist_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [4 x double], [4 x double]* [[C:%.*]], i64 26, i64 0 +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[TMP4]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 26, i64 2 +; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = load <4 x double>, <4 x double>* %A, align 8 + %b = load <4 x double>, <4 x double>* %B, align 8 + %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) + %c.cast = bitcast [4 x double]* %C to <4 x double>* + br label %next + +next: + %p = phi i32 [ 2, %entry ] + %off.0 = add i32 10, %p + %off.1 = add i32 %off.0, 2 + %off.2 = add i32 %off.0, %off.1 + %gep.1 = getelementptr <4 x double>, <4 x double>* %c.cast, i32 %off.2 + store <4 x double> %c, <4 x double>* %gep.1, align 8 + ret void +} + +; The address load may alias, so avoid moving it for now. +define void @multiply_dont_hoist_cast_due_to_operand(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]** %C.ptr) { +; CHECK-LABEL: @multiply_dont_hoist_cast_due_to_operand( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast [4 x double]** [[C_PTR:%.*]] to double** +; CHECK-NEXT: [[C2021:%.*]] = load double*, double** [[TMP4]], align 8 +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[C2021]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr double, double* [[C2021]], i64 2 +; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = load <4 x double>, <4 x double>* %A, align 8 + %b = load <4 x double>, <4 x double>* %B, align 8 + %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) + %C = load [4 x double]*, [4 x double]** %C.ptr + %c.cast = bitcast [4 x double]* %C to <4 x double>* + store <4 x double> %c, <4 x double>* %c.cast, align 8 + ret void +} + +; The address load may alias, so avoid moving it for now. +define void @multiply_dont_hoist_load(<4 x double>* noalias %A, <4 x double> * %B, <4 x double>** %C.ptr) { +; CHECK-LABEL: @multiply_dont_hoist_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>** [[C_PTR:%.*]] to double** +; CHECK-NEXT: [[C20:%.*]] = load double*, double** [[TMP4]], align 8 +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[C20]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr double, double* [[C20]], i64 2 +; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = load <4 x double>, <4 x double>* %A, align 8 + %b = load <4 x double>, <4 x double>* %B, align 8 + %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) + %C = load <4 x double>*, <4 x double>** %C.ptr + store <4 x double> %c, <4 x double>* %C, align 8 + ret void +} + +; The call to @get_adress may clobber memory, avoid moving it for now. +define void @multiply_dont_hoist_call(<4 x double>* noalias %A, <4 x double> * %B) { +; CHECK-LABEL: @multiply_dont_hoist_call( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[C:%.*]] = call <4 x double>* @get_address() +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast <4 x double>* [[C]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr <4 x double>, <4 x double>* [[C]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = load <4 x double>, <4 x double>* %A, align 8 + %b = load <4 x double>, <4 x double>* %B, align 8 + %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) + %C = call <4 x double>* @get_address() + store <4 x double> %c, <4 x double>* %C, align 8 + ret void +} + +declare <4 x double>* @get_address() + + +declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32) From 0cd8422e8caa4aeccbf9b9c92c9acb23a4ffa9c3 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Thu, 10 Jun 2021 15:12:04 -0400 Subject: [PATCH 285/619] [MLIR] Eliminate unnecessary affine stores Deduce circumstances where an affine load could not possibly be read by an operation (such as an affine load), and if so, eliminate the load Differential Revision: https://reviews.llvm.org/D105041 --- .../Transforms/AffineScalarReplacement.cpp | 60 +++++++++++++++++++ mlir/test/Dialect/Affine/scalrep.mlir | 35 +++++++++++ 2 files changed, 95 insertions(+) diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp index 5be0dcdaea157..b6cce790f715d 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp @@ -68,6 +68,11 @@ struct AffineScalarReplacement void loadCSE(AffineReadOpInterface loadOp, SmallVectorImpl &loadOpsToErase, DominanceInfo &domInfo); + + void findUnusedStore(AffineWriteOpInterface storeOp, + SmallVectorImpl &storeOpsToErase, + SmallPtrSetImpl &memrefsToErase, + PostDominanceInfo &postDominanceInfo); }; } // end anonymous namespace @@ -256,6 +261,51 @@ bool hasNoInterveningEffect(Operation *start, T memOp) { return !hasSideEffect; } +// This attempts to find stores which have no impact on the final result. +// A writing op writeA will be eliminated if there exists an op writeB if +// 1) writeA and writeB have mathematically equivalent affine access functions. +// 2) writeB postdominates writeA. +// 3) There is no potential read between writeA and writeB. +void AffineScalarReplacement::findUnusedStore( + AffineWriteOpInterface writeA, SmallVectorImpl &opsToErase, + SmallPtrSetImpl &memrefsToErase, + PostDominanceInfo &postDominanceInfo) { + + for (Operation *user : writeA.getMemRef().getUsers()) { + // Only consider writing operations. + auto writeB = dyn_cast(user); + if (!writeB) + continue; + + // The operations must be distinct. + if (writeB == writeA) + continue; + + // Both operations must lie in the same region. + if (writeB->getParentRegion() != writeA->getParentRegion()) + continue; + + // Both operations must write to the same memory. + MemRefAccess srcAccess(writeB); + MemRefAccess destAccess(writeA); + + if (srcAccess != destAccess) + continue; + + // writeB must postdominate writeA. + if (!postDominanceInfo.postDominates(writeB, writeA)) + continue; + + // There cannot be an operation which reads from memory between + // the two writes. + if (!hasNoInterveningEffect(writeA, writeB)) + continue; + + opsToErase.push_back(writeA); + break; + } +} + /// Attempt to eliminate loadOp by replacing it with a value stored into memory /// which the load is guaranteed to retrieve. This check involves three /// components: 1) The store and load must be on the same location 2) The store @@ -394,6 +444,7 @@ void AffineScalarReplacement::runOnFunction() { SmallPtrSet memrefsToErase; auto &domInfo = getAnalysis(); + auto &postDomInfo = getAnalysis(); // Walk all load's and perform store to load forwarding. f.walk([&](AffineReadOpInterface loadOp) { @@ -404,6 +455,15 @@ void AffineScalarReplacement::runOnFunction() { }); // Erase all load op's whose results were replaced with store fwd'ed ones. + for (auto *op : opsToErase) + op->erase(); + opsToErase.clear(); + + // Walk all store's and perform unused store elimination + f.walk([&](AffineWriteOpInterface storeOp) { + findUnusedStore(storeOp, opsToErase, memrefsToErase, postDomInfo); + }); + // Erase all store op's which don't impact the program for (auto *op : opsToErase) op->erase(); diff --git a/mlir/test/Dialect/Affine/scalrep.mlir b/mlir/test/Dialect/Affine/scalrep.mlir index 452ff0939a185..308186fe676db 100644 --- a/mlir/test/Dialect/Affine/scalrep.mlir +++ b/mlir/test/Dialect/Affine/scalrep.mlir @@ -642,3 +642,38 @@ func @overlap_no_fwd(%N : index) -> f32 { // CHECK-NEXT: return %{{.*}} : f32 } +// CHECK-LABEL: func @redundant_store_elim + +func @redundant_store_elim(%out : memref<512xf32>) { + %cf1 = constant 1.0 : f32 + %cf2 = constant 2.0 : f32 + affine.for %i = 0 to 16 { + affine.store %cf1, %out[32*%i] : memref<512xf32> + affine.store %cf2, %out[32*%i] : memref<512xf32> + } + return +} + +// CHECK: affine.for +// CHECK-NEXT: affine.store +// CHECK-NEXT: } + +// CHECK-LABEL: func @redundant_store_elim_fail + +func @redundant_store_elim_fail(%out : memref<512xf32>) { + %cf1 = constant 1.0 : f32 + %cf2 = constant 2.0 : f32 + affine.for %i = 0 to 16 { + affine.store %cf1, %out[32*%i] : memref<512xf32> + "test.use"(%out) : (memref<512xf32>) -> () + affine.store %cf2, %out[32*%i] : memref<512xf32> + } + return +} + +// CHECK: affine.for +// CHECK-NEXT: affine.store +// CHECK-NEXT: "test.use" +// CHECK-NEXT: affine.store +// CHECK-NEXT: } + From 0edb87773b1a086cb5bb960f7f4f6ca815bc8bb4 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 29 Jun 2021 17:05:31 -0400 Subject: [PATCH 286/619] [OpenMP] Add additional remarks for OpenMPOpt This patch adds additional remarks, suggesting the use of `noescape` for failed globalization and indicating when internalization failed. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D105150 --- .../Transforms/IPO/AttributorAttributes.cpp | 5 +++-- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 22 ++++++++++++++----- .../Transforms/OpenMP/remove_globalization.ll | 2 +- .../OpenMP/single_threaded_execution.ll | 15 +++++++++++++ 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 8ae1cff4050dd..5a750b0619652 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5200,8 +5200,9 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { auto Remark = [&](OptimizationRemarkMissed ORM) { return ORM << "Could not move globalized variable to the stack. " << "Variable is potentially " - << ((!NoCaptureAA.isAssumedNoCapture()) ? "captured." - : "freed."); + << (!NoCaptureAA.isAssumedNoCapture() ? "captured. " + : "freed. ") + << "Mark as noescape to override."; }; LibFunc IsAllocShared; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index ef86fe04708e3..c6b77a3c87c7b 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2634,6 +2634,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); KernelSet Kernels = getDeviceKernels(M); auto IsCalled = [&](Function &F) { @@ -2645,14 +2647,27 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { return false; }; + auto EmitRemark = [&](Function &F) { + auto &ORE = FAM.getResult(F); + ORE.emit([&]() { + OptimizationRemarkMissed ORM(DEBUG_TYPE, "InternalizationFailure", &F); + return ORM << "Could not internalize function. " + << "Some optimizations may not be possible."; + }); + }; + // Create internal copies of each function if this is a kernel Module. This // allows iterprocedural passes to see every call edge. DenseSet InternalizedFuncs; if (isOpenMPDevice(M)) for (Function &F : M) - if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) - if (Attributor::internalizeFunction(F, /* Force */ true)) + if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) { + if (Attributor::internalizeFunction(F, /* Force */ true)) { InternalizedFuncs.insert(&F); + } else if (!F.hasLocalLinkage()) { + EmitRemark(F); + } + } // Look at every function in the Module unless it was internalized. SmallVector SCC; @@ -2663,9 +2678,6 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { if (SCC.empty()) return PreservedAnalyses::all(); - FunctionAnalysisManager &FAM = - AM.getResult(M).getManager(); - AnalysisGetter AG(FAM); auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 62dc54f52b5fe..0635ec6b7813c 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -4,7 +4,7 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" -; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured. +; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured. Mark as noescape to override. ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack. diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll index b3d535bd510c9..f5ae28ea351f6 100644 --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -1,4 +1,5 @@ ; RUN: opt -passes=openmp-opt -debug-only=openmp-opt -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s --check-prefix=REMARKS ; REQUIRES: asserts ; ModuleID = 'single_threaded_exeuction.c' @@ -9,6 +10,8 @@ define weak void @kernel() { ret void } +; REMARKS: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible. + ; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread. ; CHECK: [openmp-opt] Basic block @nvptx if.then is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread. @@ -22,6 +25,7 @@ entry: if.then: call void @foo() call void @bar() + call void @baz() br label %if.end if.end: @@ -41,6 +45,7 @@ entry: if.then: call void @foo() call void @bar() + call void @baz() br label %if.end if.end: @@ -61,6 +66,13 @@ entry: ret void } +; CHECK-NOT: [openmp-opt] Basic block @baz entry is executed by a single thread. +; Function Attrs: noinline +define weak void @baz() !dbg !8 { +entry: + ret void +} + declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() declare i32 @llvm.amdgcn.workitem.id.x() @@ -80,3 +92,6 @@ declare void @__kmpc_kernel_init(i32, i16) !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} !7 = !{void ()* @kernel, !"kernel", i32 1} +!8 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !2) +!10 = !DILocation(line: 5, column: 7, scope: !8) From e773216f46368cd927a3c67bfa2516913acb75e7 Mon Sep 17 00:00:00 2001 From: Melanie Blower Date: Mon, 28 Jun 2021 12:45:56 -0400 Subject: [PATCH 287/619] [clang][patch] Add builtin __arithmetic_fence and option fprotect-parens This patch adds a new clang builtin, __arithmetic_fence. The purpose of the builtin is to provide the user fine control, at the expression level, over floating point optimization when -ffast-math (-ffp-model=fast) is enabled. The builtin prevents the optimizer from rearranging floating point expression evaluation. The new option fprotect-parens has the same effect on parenthesized expressions, forcing the optimizer to respect the parentheses. Reviewed By: aaron.ballman, kpn Differential Revision: https://reviews.llvm.org/D100118 --- clang/docs/UsersManual.rst | 20 +++++ clang/include/clang/Basic/Builtins.def | 3 + .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/include/clang/Basic/LangOptions.def | 2 + clang/include/clang/Basic/TargetInfo.h | 3 + clang/include/clang/Driver/Options.td | 9 ++- clang/include/clang/Sema/Sema.h | 4 + clang/lib/AST/ExprConstant.cpp | 3 + clang/lib/Basic/TargetInfo.cpp | 5 ++ clang/lib/Basic/Targets/X86.h | 2 + clang/lib/CodeGen/CGBuiltin.cpp | 30 ++++++++ clang/lib/Driver/ToolChains/Clang.cpp | 5 ++ clang/lib/Sema/SemaChecking.cpp | 27 +++++++ clang/lib/Sema/SemaCoroutine.cpp | 32 ++------ clang/lib/Sema/SemaExpr.cpp | 27 +++++++ clang/test/AST/arithmetic-fence-builtin.c | 46 ++++++++++++ clang/test/CodeGen/arithmetic-fence-builtin.c | 74 +++++++++++++++++++ clang/test/Driver/clang_f_opts.c | 3 +- clang/test/Sema/arithmetic-fence-builtin.c | 48 ++++++++++++ 19 files changed, 318 insertions(+), 28 deletions(-) create mode 100644 clang/test/AST/arithmetic-fence-builtin.c create mode 100644 clang/test/CodeGen/arithmetic-fence-builtin.c create mode 100644 clang/test/Sema/arithmetic-fence-builtin.c diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 244212a1336db..9e8bac635337e 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1478,6 +1478,26 @@ Note that floating-point operations performed as part of constant initialization * ``maytrap`` The compiler avoids transformations that may raise exceptions that would not have been raised by the original code. Constant folding performed by the compiler is exempt from this option. * ``strict`` The compiler ensures that all transformations strictly preserve the floating point exception semantics of the original code. +.. option:: -f[no-]protect-parens: + + This option pertains to floating-point types, complex types with + floating-point components, and vectors of these types. Some arithmetic + expression transformations that are mathematically correct and permissible + according to the C and C++ language standards may be incorrect when dealing + with floating-point types, such as reassociation and distribution. Further, + the optimizer may ignore parentheses when computing arithmetic expressions + in circumstances where the parenthesized and unparenthesized expression + express the same mathematical value. For example (a+b)+c is the same + mathematical value as a+(b+c), but the optimizer is free to evaluate the + additions in any order regardless of the parentheses. When enabled, this + option forces the optimizer to honor the order of operations with respect + to parentheses in all circumstances. + + Note that floating-point contraction (option `-ffp-contract=`) is disabled + when `-fprotect-parens` is enabled. Also note that in safe floating-point + modes, such as `-ffp-model=precise` or `-ffp-model=strict`, this option + has no effect because the optimizer is prohibited from making unsafe + transformations. .. _fp-constant-eval: diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 5a9d0a0018292..33d3e6dc4e7db 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1657,6 +1657,9 @@ BUILTIN(__builtin_ms_va_start, "vc*&.", "nt") BUILTIN(__builtin_ms_va_end, "vc*&", "n") BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") +// Arithmetic Fence: to prevent FP reordering and reassociation optimizations +LANGBUILTIN(__arithmetic_fence, "v.", "t", ALL_LANGUAGES) + #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 70a22fd2506a3..22c2a1a39ea13 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8530,6 +8530,9 @@ def err_typecheck_expect_scalar_operand : Error< "operand of type %0 where arithmetic or pointer type is required">; def err_typecheck_cond_incompatible_operands : Error< "incompatible operand types%diff{ ($ and $)|}0,1">; +def err_typecheck_expect_flt_or_vector : Error< + "invalid operand of type %0 where floating, complex or " + "a vector of such types is required">; def err_cast_selector_expr : Error< "cannot type cast @selector expression">; def ext_typecheck_cond_incompatible_pointers : ExtWarn< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 465bad8d7d112..b18e957a58f4c 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -199,6 +199,8 @@ COMPATIBLE_LANGOPT(Deprecated , 1, 0, "__DEPRECATED predefined macro") COMPATIBLE_LANGOPT(FastMath , 1, 0, "fast FP math optimizations, and __FAST_MATH__ predefined macro") COMPATIBLE_LANGOPT(FiniteMathOnly , 1, 0, "__FINITE_MATH_ONLY__ predefined macro") COMPATIBLE_LANGOPT(UnsafeFPMath , 1, 0, "Unsafe Floating Point Math") +COMPATIBLE_LANGOPT(ProtectParens , 1, 0, "optimizer honors parentheses " + "when floating-point expressions are evaluated") BENIGN_LANGOPT(AllowFPReassoc , 1, 0, "Permit Floating Point reassociation") BENIGN_LANGOPT(NoHonorNaNs , 1, 0, "Permit Floating Point optimization without regard to NaN") BENIGN_LANGOPT(NoHonorInfs , 1, 0, "Permit Floating Point optimization without regard to infinities") diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 20f6afa76cbb3..4f0cbf986b31b 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1424,6 +1424,9 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Whether the option -fextend-arguments={32,64} is supported on the target. virtual bool supportsExtendIntArgs() const { return false; } + /// Controls if __arithmetic_fence is supported in the targeted backend. + virtual bool checkArithmeticFenceSupported() const { return false; } + /// Gets the default calling convention for the given target and /// declaration context. virtual CallingConv getDefaultCallingConv() const { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 301f59207c4a5..41b7299b02745 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1762,6 +1762,13 @@ defm strict_float_cast_overflow : BoolFOption<"strict-float-cast-overflow", " of the target's native float-to-int conversion instructions">, PosFlag>; +defm protect_parens : BoolFOption<"protect-parens", + LangOpts<"ProtectParens">, DefaultFalse, + PosFlag, + NegFlag>; + def ffor_scope : Flag<["-"], "ffor-scope">, Group; def fno_for_scope : Flag<["-"], "fno-for-scope">, Group; @@ -4408,7 +4415,7 @@ defm integer_4_integer_8 : BooleanFFlag<"integer-4-integer-8">, Group, Group; defm module_private : BooleanFFlag<"module-private">, Group; defm pack_derived : BooleanFFlag<"pack-derived">, Group; -defm protect_parens : BooleanFFlag<"protect-parens">, Group; +//defm protect_parens : BooleanFFlag<"protect-parens">, Group; defm range_check : BooleanFFlag<"range-check">, Group; defm real_4_real_10 : BooleanFFlag<"real-4-real-10">, Group; defm real_4_real_16 : BooleanFFlag<"real-4-real-16">, Group; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 4c3a7035bcc94..3f7db9bc5be8b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -36,6 +36,7 @@ #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeOrdering.h" #include "clang/Basic/BitmaskEnum.h" +#include "clang/Basic/Builtins.h" #include "clang/Basic/ExpressionTraits.h" #include "clang/Basic/Module.h" #include "clang/Basic/OpenCLOptions.h" @@ -5424,6 +5425,8 @@ class Sema final { Expr *ExecConfig = nullptr, bool IsExecConfig = false, bool AllowRecovery = false); + Expr *BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id, + MultiExprArg CallArgs); enum class AtomicArgumentOrder { API, AST }; ExprResult BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, @@ -12583,6 +12586,7 @@ class Sema final { private: bool SemaBuiltinPrefetch(CallExpr *TheCall); bool SemaBuiltinAllocaWithAlign(CallExpr *TheCall); + bool SemaBuiltinArithmeticFence(CallExpr *TheCall); bool SemaBuiltinAssume(CallExpr *TheCall); bool SemaBuiltinAssumeAligned(CallExpr *TheCall); bool SemaBuiltinLongjmp(CallExpr *TheCall); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a4d8fec09748d..01c0168d61a40 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13692,6 +13692,9 @@ bool FloatExprEvaluator::VisitCallExpr(const CallExpr *E) { Result.changeSign(); return true; + case Builtin::BI__arithmetic_fence: + return EvaluateFloat(E->getArg(0), Result, Info); + // FIXME: Builtin::BI__builtin_powi // FIXME: Builtin::BI__builtin_powif // FIXME: Builtin::BI__builtin_powil diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 4c2859e5eda7f..88086fa2fed74 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -430,6 +430,11 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { // its corresponding signed type. PaddingOnUnsignedFixedPoint |= Opts.PaddingOnUnsignedFixedPoint; CheckFixedPointBits(); + + if (Opts.ProtectParens && !checkArithmeticFenceSupported()) { + Diags.Report(diag::err_opt_not_valid_on_target) << "-fprotect-parens"; + Opts.ProtectParens = false; + } } bool TargetInfo::initFeatureMap( diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 7639ea835ebc7..e798962617a30 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -362,6 +362,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { } } + bool checkArithmeticFenceSupported() const override { return true; } + CallingConv getDefaultCallingConv() const override { return CC_C; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3fc9ba414397e..0e13b55b0cc57 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2825,6 +2825,36 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); } + case Builtin::BI__arithmetic_fence: { + // Create the builtin call if FastMath is selected, and the target + // supports the builtin, otherwise just return the argument. + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + llvm::FastMathFlags FMF = Builder.getFastMathFlags(); + bool isArithmeticFenceEnabled = + FMF.allowReassoc() && + getContext().getTargetInfo().checkArithmeticFenceSupported(); + QualType ArgType = E->getArg(0)->getType(); + if (ArgType->isComplexType()) { + if (isArithmeticFenceEnabled) { + QualType ElementType = ArgType->castAs()->getElementType(); + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + Value *Real = Builder.CreateArithmeticFence(ComplexVal.first, + ConvertType(ElementType)); + Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second, + ConvertType(ElementType)); + return RValue::getComplex(std::make_pair(Real, Imag)); + } + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + Value *Real = ComplexVal.first; + Value *Imag = ComplexVal.second; + return RValue::getComplex(std::make_pair(Real, Imag)); + } + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + if (isArithmeticFenceEnabled) + return RValue::get( + Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType))); + return RValue::get(ArgValue); + } case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 00939eae42998..98023334d8805 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4975,6 +4975,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, false)) CmdArgs.push_back("-fsplit-stack"); + // -fprotect-parens=0 is default. + if (Args.hasFlag(options::OPT_fprotect_parens, + options::OPT_fno_protect_parens, false)) + CmdArgs.push_back("-fprotect-parens"); + RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA); if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 17eeebb0c6799..4d04f3017a2ef 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1554,6 +1554,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, Diag(TheCall->getBeginLoc(), diag::warn_alloca) << TheCall->getDirectCallee(); break; + case Builtin::BI__arithmetic_fence: + if (SemaBuiltinArithmeticFence(TheCall)) + return ExprError(); + break; case Builtin::BI__assume: case Builtin::BI__builtin_assume: if (SemaBuiltinAssume(TheCall)) @@ -6549,6 +6553,29 @@ bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { return false; } +/// SemaBuiltinArithmeticFence - Handle __arithmetic_fence. +bool Sema::SemaBuiltinArithmeticFence(CallExpr *TheCall) { + if (!Context.getTargetInfo().checkArithmeticFenceSupported()) + return Diag(TheCall->getBeginLoc(), diag::err_builtin_target_unsupported) + << SourceRange(TheCall->getBeginLoc(), TheCall->getEndLoc()); + if (checkArgCount(*this, TheCall, 1)) + return true; + Expr *Arg = TheCall->getArg(0); + if (Arg->isInstantiationDependent()) + return false; + + QualType ArgTy = Arg->getType(); + if (!ArgTy->hasFloatingRepresentation()) + return Diag(TheCall->getEndLoc(), diag::err_typecheck_expect_flt_or_vector) + << ArgTy; + if (Arg->isLValue()) { + ExprResult FirstArg = DefaultLvalueConversion(Arg); + TheCall->setArg(0, FirstArg.get()); + } + TheCall->setType(TheCall->getArg(0)->getType()); + return false; +} + /// SemaBuiltinAssume - Handle __assume (MS Extension). // __assume does not evaluate its arguments, and should warn if its argument // has side effects. diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index cec80436d575e..31a4092b5b604 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -291,26 +291,6 @@ static ExprResult buildOperatorCoawaitCall(Sema &SemaRef, Scope *S, cast(R.get())); } -static Expr *buildBuiltinCall(Sema &S, SourceLocation Loc, Builtin::ID Id, - MultiExprArg CallArgs) { - StringRef Name = S.Context.BuiltinInfo.getName(Id); - LookupResult R(S, &S.Context.Idents.get(Name), Loc, Sema::LookupOrdinaryName); - S.LookupName(R, S.TUScope, /*AllowBuiltinCreation=*/true); - - auto *BuiltInDecl = R.getAsSingle(); - assert(BuiltInDecl && "failed to find builtin declaration"); - - ExprResult DeclRef = - S.BuildDeclRefExpr(BuiltInDecl, BuiltInDecl->getType(), VK_LValue, Loc); - assert(DeclRef.isUsable() && "Builtin reference cannot fail"); - - ExprResult Call = - S.BuildCallExpr(/*Scope=*/nullptr, DeclRef.get(), Loc, CallArgs, Loc); - - assert(!Call.isInvalid() && "Call to builtin cannot fail!"); - return Call.get(); -} - static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType, SourceLocation Loc) { QualType CoroHandleType = lookupCoroutineHandleType(S, PromiseType, Loc); @@ -327,7 +307,7 @@ static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType, } Expr *FramePtr = - buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_frame, {}); + S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}); CXXScopeSpec SS; ExprResult FromAddr = @@ -404,8 +384,8 @@ static Expr *maybeTailCall(Sema &S, QualType RetType, Expr *E, // the resume call and return instruction, which would interfere with the // musttail call contract. JustAddress = S.MaybeCreateExprWithCleanups(JustAddress); - return buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_resume, - JustAddress); + return S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_resume, + JustAddress); } /// Build calls to await_ready, await_suspend, and await_resume for a co_await @@ -1357,10 +1337,10 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { return false; Expr *FramePtr = - buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_frame, {}); + S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}); Expr *FrameSize = - buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_size, {}); + S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_size, {}); // Make new call. @@ -1389,7 +1369,7 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { return false; Expr *CoroFree = - buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_free, {FramePtr}); + S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_free, {FramePtr}); SmallVector DeleteArgs{CoroFree}; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 6031dff673351..3df74b5ea9dbc 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4054,6 +4054,10 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) { assert(E && "ActOnParenExpr() missing expr"); + QualType ExprTy = E->getType(); + if (getLangOpts().ProtectParens && CurFPFeatures.getAllowFPReassociate() && + !E->isLValue() && ExprTy->hasFloatingRepresentation()) + return BuildBuiltinCallExpr(R, Builtin::BI__arithmetic_fence, E); return new (Context) ParenExpr(L, R, E); } @@ -6560,6 +6564,29 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, ExecConfig, IsExecConfig); } +/// BuildBuiltinCallExpr - Create a call to a builtin function specified by Id +// with the specified CallArgs +Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id, + MultiExprArg CallArgs) { + StringRef Name = Context.BuiltinInfo.getName(Id); + LookupResult R(*this, &Context.Idents.get(Name), Loc, + Sema::LookupOrdinaryName); + LookupName(R, TUScope, /*AllowBuiltinCreation=*/true); + + auto *BuiltInDecl = R.getAsSingle(); + assert(BuiltInDecl && "failed to find builtin declaration"); + + ExprResult DeclRef = + BuildDeclRefExpr(BuiltInDecl, BuiltInDecl->getType(), VK_LValue, Loc); + assert(DeclRef.isUsable() && "Builtin reference cannot fail"); + + ExprResult Call = + BuildCallExpr(/*Scope=*/nullptr, DeclRef.get(), Loc, CallArgs, Loc); + + assert(!Call.isInvalid() && "Call to builtin cannot fail!"); + return Call.get(); +} + /// Parse a __builtin_astype expression. /// /// __builtin_astype( value, dst type ) diff --git a/clang/test/AST/arithmetic-fence-builtin.c b/clang/test/AST/arithmetic-fence-builtin.c new file mode 100644 index 0000000000000..46666b3c8bed5 --- /dev/null +++ b/clang/test/AST/arithmetic-fence-builtin.c @@ -0,0 +1,46 @@ +// Tests without serialization: +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu %s \ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 +// +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 +// +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ +// RUN: -fprotect-parens \ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK2 +// +// Tests with serialization: +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null \ +// RUN: | FileCheck %s --strict-whitespace +// +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ +// RUN: -emit-pch -o %t +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null \ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK1 +// +// RUN: %clang_cc1 -ast-dump -triple i386-pc-linux-gnu -DFAST -mreassociate %s \ +// RUN: -fprotect-parens \ +// RUN: -emit-pch -o %t +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -include-pch %t -ast-dump-all /dev/null -fprotect-parens\ +// RUN: | FileCheck %s --strict-whitespace --check-prefixes=CHECK,CHECK2 + +// +int v; +int addit(float a, float b) { + + v = __arithmetic_fence(a + b); + + v = (a + b); + + return 0; +} +//CHECK:| `-CompoundStmt {{.*}} +//CHECK-NEXT:| |-BinaryOperator {{.*}} 'int' '=' +//CHECK-NEXT:| | |-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'v' 'int' +//CHECK-NEXT:| | `-ImplicitCastExpr {{.*}} +//CHECK-NEXT:| | `-CallExpr {{.*}} 'float' +//CHECK-NEXT:| | |-ImplicitCastExpr {{.*}} +//CHECK-NEXT:| | | `-DeclRefExpr {{.*}}' Function {{.*}} '__arithmetic_fence'{{.*}} +//CHECK1-NOT:| | | `-DeclRefExpr {{.*}}' Function{{.*}} '__arithmetic_fence' 'void ()' +//CHECK2:| | | `-DeclRefExpr {{.*}} Function{{.*}} '__arithmetic_fence' 'void ()' diff --git a/clang/test/CodeGen/arithmetic-fence-builtin.c b/clang/test/CodeGen/arithmetic-fence-builtin.c new file mode 100644 index 0000000000000..6b5b5b4b9cefa --- /dev/null +++ b/clang/test/CodeGen/arithmetic-fence-builtin.c @@ -0,0 +1,74 @@ +// Test with fast math +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -DFAST \ +// RUN: -mreassociate \ +// RUN: -o - %s | FileCheck --check-prefixes CHECK,CHECKFAST,CHECKNP %s +// +// Test with fast math and fprotect-parens +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -DFAST \ +// RUN: -mreassociate -fprotect-parens -ffp-contract=on\ +// RUN: -o - %s | FileCheck --check-prefixes CHECK,CHECKFAST,CHECKPP %s +// +// Test without fast math: llvm intrinsic not created +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -fprotect-parens\ +// RUN: -o - %s | FileCheck --implicit-check-not="llvm.arithmetic.fence" %s +// +int v; +int addit(float a, float b) { + // CHECK: define {{.*}}@addit(float %a, float %b) #0 { + _Complex double cd, cd1; + cd = __arithmetic_fence(cd1); + // CHECKFAST: call{{.*}} double @llvm.arithmetic.fence.f64({{.*}}real) + // CHECKFAST: call{{.*}} double @llvm.arithmetic.fence.f64({{.*}}imag) + // Vector should be supported. + typedef float __v2f32 __attribute__((__vector_size__(8))); + __v2f32 vec1, vec2; + vec1 = __arithmetic_fence(vec2); + // CHECKFAST: call{{.*}} <2 x float> @llvm.arithmetic.fence.v2f32 + vec2 = (vec2 + vec1); + // CHECKPP: call{{.*}} <2 x float> @llvm.arithmetic.fence.v2f32 + + v = __arithmetic_fence(a + b); + // CHECKFAST: call{{.*}} float @llvm.arithmetic.fence.f32(float %add{{.*}}) + + v = (a + b); + // CHECKPP: call{{.*}} float @llvm.arithmetic.fence.f32(float %add{{.*}}) + v = a + (b*b); + // CHECKPP: fmul reassoc + // CHECKPP-NEXT: call{{.*}} float @llvm.arithmetic.fence.f32(float %mul) + // CHECKNP: fmul + // CHECKNP: fadd + v = b + a*a; + // CHECKPP: call{{.*}} float @llvm.fmuladd.f32 + // CHECKNP: fmul + // CHECKNP: fadd + v = b + __arithmetic_fence(a*a); // Fence blocks recognition of FMA + // CHECKPP: fmul + // CHECKNP: fmul + + b = (a); + (a) = b; + // CHECK-NEXT fptosi + // CHECK-NEXT store i32 + // CHECK-NEXT load float + // CHECK-NEXT store float + // CHECK-NEXT load float + // CHECK-NEXT store float + return 0; + // CHECK-NEXT ret i32 0 +} +int addit1(int a, int b) { + // CHECK: define {{.*}}@addit1(i32 %a, i32 %b{{.*}} + v = (a + b); + // CHECK-NOT: call{{.*}} float @llvm.arithmetic.fence.int(float %add) + return 0; +} +#ifdef FAST +#pragma float_control(precise, on) +int subit(float a, float b, float *fp) { + // CHECKFAST: define {{.*}}@subit(float %a, float %b{{.*}} + *fp = __arithmetic_fence(a - b); + *fp = (a + b); + // CHECK-NOT: call{{.*}} float @llvm.arithmetic.fence.f32(float %add) + return 0; +} +#endif diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index a255f68713aec..d729378403f3f 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -1,13 +1,14 @@ // REQUIRES: clang-driver // RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fblocks -fbuiltin -fmath-errno -fcommon -fpascal-strings -fsplit-stack %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS1 %s -// RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-asm -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fno-show-source-location -fshort-enums %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS2 %s +// RUN: %clang -### -S -fasm -fblocks -fbuiltin -fno-math-errno -fcommon -fpascal-strings -fno-asm -fno-blocks -fno-builtin -fmath-errno -fno-common -fno-pascal-strings -fno-show-source-location -fshort-enums -fprotect-parens %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS2 %s // CHECK-OPTIONS1: -fsplit-stack // CHECK-OPTIONS1: -fgnu-keywords // CHECK-OPTIONS1: -fblocks // CHECK-OPTIONS1: -fpascal-strings +// CHECK-OPTIONS2: -fprotect-parens // CHECK-OPTIONS2: -fmath-errno // CHECK-OPTIONS2: -fno-gnu-keywords // CHECK-OPTIONS2: -fno-builtin diff --git a/clang/test/Sema/arithmetic-fence-builtin.c b/clang/test/Sema/arithmetic-fence-builtin.c new file mode 100644 index 0000000000000..4f4f0a02cde9e --- /dev/null +++ b/clang/test/Sema/arithmetic-fence-builtin.c @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - -verify -x c++ %s +// RUN: %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -verify -x c++ %s +// RUN: not %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -x c++ %s \ +// RUN: -fprotect-parens 2>&1 | FileCheck -check-prefix=PPC %s +#ifndef PPC +int v; +template T addT(T a, T b) { + T *q = __arithmetic_fence(&a); + // expected-error@-1 {{invalid operand of type 'float *' where floating, complex or a vector of such types is required}} + // expected-error@-2 {{invalid operand of type 'int *' where floating, complex or a vector of such types is required}} + return __arithmetic_fence(a + b); + // expected-error@-1 {{invalid operand of type 'int' where floating, complex or a vector of such types is required}} +} +int addit(int a, int b) { + float x, y; + typedef struct { + int a, b; + } stype; + stype s; + s = __arithmetic_fence(s); // expected-error {{invalid operand of type 'stype' where floating, complex or a vector of such types is required}} + x = __arithmetic_fence(); // expected-error {{too few arguments to function call, expected 1, have 0}} + x = __arithmetic_fence(x, y); // expected-error {{too many arguments to function call, expected 1, have 2}} + // Complex is supported. + _Complex double cd, cd1; + cd = __arithmetic_fence(cd1); + // Vector is supported. + typedef float __v4hi __attribute__((__vector_size__(8))); + __v4hi vec1, vec2; + vec1 = __arithmetic_fence(vec2); + + v = __arithmetic_fence(a + b); // expected-error {{invalid operand of type 'int' where floating, complex or a vector of such types is required}} + float f = addT(a, b); // expected-note {{in instantiation of function template specialization 'addT' requested here}} + int i = addT(1, 2); // expected-note {{in instantiation of function template specialization 'addT' requested here}} + constexpr float d = 1.0 + 2.0; + constexpr float c = __arithmetic_fence(1.0 + 2.0); + constexpr float e = __arithmetic_fence(d); + return 0; +} +bool func(float f1, float f2, float f3) { + return (f1 == f2 && f1 == f3) || f2 == f3; // Should not warn here +} +static_assert( __arithmetic_fence(1.0 + 2.0), "message" ); +#else +float addit(float a, float b) { + return __arithmetic_fence(a+b); // expected-error {{builtin is not supported on this target}} +} +#endif +//PPC: error: option '-fprotect-parens' cannot be specified on this target From dfb34c0df9d00a10f2563e989f257d3736969784 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Fri, 25 Jun 2021 19:40:35 -0400 Subject: [PATCH 288/619] [MLIR][SCF] Inline ExecuteRegion if parent can contain multiple blocks The executeregionop is used to allow multiple blocks within SCF constructs. If the container allows multiple blocks, inline the region Differential Revision: https://reviews.llvm.org/D104960 --- mlir/include/mlir/Dialect/SCF/SCFOps.td | 6 -- mlir/lib/Dialect/SCF/SCF.cpp | 77 ++++++++++++++++++++++++- mlir/test/Dialect/SCF/canonicalize.mlir | 69 ++++++++++++++++++++++ 3 files changed, 143 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index c10441f59bd55..9f039b6fcda68 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -108,14 +108,8 @@ def ExecuteRegionOp : SCF_Op<"execute_region"> { let regions = (region AnyRegion:$region); - // TODO: If the parent is a func like op (which would be the case if all other - // ops are from the std dialect), the inliner logic could be readily used to - // inline. let hasCanonicalizer = 1; - // TODO: can fold if it returns a constant. - // TODO: Single block execute_region ops can be readily inlined irrespective - // of which op is a parent. Add a fold for this. let hasFolder = 0; } diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp index 75f8430bd3e03..ef1238da5f348 100644 --- a/mlir/lib/Dialect/SCF/SCF.cpp +++ b/mlir/lib/Dialect/SCF/SCF.cpp @@ -145,23 +145,94 @@ static LogicalResult verify(ExecuteRegionOp op) { // // "test.foo"() : () -> () // %x = "test.val"() : () -> i64 -// "test.bar"(%v) : (i64) -> () +// "test.bar"(%x) : (i64) -> () // struct SingleBlockExecuteInliner : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ExecuteRegionOp op, PatternRewriter &rewriter) const override { - if (op.region().getBlocks().size() != 1) + if (!llvm::hasSingleElement(op.region())) return failure(); replaceOpWithRegion(rewriter, op, op.region()); return success(); } }; +// Inline an ExecuteRegionOp if its parent can contain multiple blocks. +// TODO generalize the conditions for operations which can be inlined into. +// func @func_execute_region_elim() { +// "test.foo"() : () -> () +// %v = scf.execute_region -> i64 { +// %c = "test.cmp"() : () -> i1 +// cond_br %c, ^bb2, ^bb3 +// ^bb2: +// %x = "test.val1"() : () -> i64 +// br ^bb4(%x : i64) +// ^bb3: +// %y = "test.val2"() : () -> i64 +// br ^bb4(%y : i64) +// ^bb4(%z : i64): +// scf.yield %z : i64 +// } +// "test.bar"(%v) : (i64) -> () +// return +// } +// +// becomes +// +// func @func_execute_region_elim() { +// "test.foo"() : () -> () +// %c = "test.cmp"() : () -> i1 +// cond_br %c, ^bb1, ^bb2 +// ^bb1: // pred: ^bb0 +// %x = "test.val1"() : () -> i64 +// br ^bb3(%x : i64) +// ^bb2: // pred: ^bb0 +// %y = "test.val2"() : () -> i64 +// br ^bb3(%y : i64) +// ^bb3(%z: i64): // 2 preds: ^bb1, ^bb2 +// "test.bar"(%z) : (i64) -> () +// return +// } +// +struct MultiBlockExecuteInliner : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ExecuteRegionOp op, + PatternRewriter &rewriter) const override { + if (!isa(op->getParentOp())) + return failure(); + + Block *prevBlock = op->getBlock(); + Block *postBlock = rewriter.splitBlock(prevBlock, op->getIterator()); + rewriter.setInsertionPointToEnd(prevBlock); + + rewriter.create(op.getLoc(), &op.region().front()); + + for (Block &blk : op.region()) { + if (YieldOp yieldOp = dyn_cast(blk.getTerminator())) { + rewriter.setInsertionPoint(yieldOp); + rewriter.create(yieldOp.getLoc(), postBlock, + yieldOp.results()); + rewriter.eraseOp(yieldOp); + } + } + + rewriter.inlineRegionBefore(op.region(), postBlock); + SmallVector blockArgs; + + for (auto res : op.getResults()) + blockArgs.push_back(postBlock->addArgument(res.getType())); + + rewriter.replaceOp(op, blockArgs); + return success(); + } +}; + void ExecuteRegionOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index 8692f2d9705e0..3120a331303ed 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -948,3 +948,72 @@ func @execute_region_elim() { // CHECK-NEXT: "test.bar"(%[[VAL]]) : (i64) -> () // CHECK-NEXT: } + +// ----- + +// CHECK-LABEL: func @func_execute_region_elim +func @func_execute_region_elim() { + "test.foo"() : () -> () + %v = scf.execute_region -> i64 { + %c = "test.cmp"() : () -> i1 + cond_br %c, ^bb2, ^bb3 + ^bb2: + %x = "test.val1"() : () -> i64 + br ^bb4(%x : i64) + ^bb3: + %y = "test.val2"() : () -> i64 + br ^bb4(%y : i64) + ^bb4(%z : i64): + scf.yield %z : i64 + } + "test.bar"(%v) : (i64) -> () + return +} + +// CHECK-NOT: execute_region +// CHECK: "test.foo" +// CHECK: %[[cmp:.+]] = "test.cmp" +// CHECK: cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]] +// CHECK: ^[[bb1]]: +// CHECK: %[[x:.+]] = "test.val1" +// CHECK: br ^[[bb3:.+]](%[[x]] : i64) +// CHECK: ^[[bb2]]: +// CHECK: %[[y:.+]] = "test.val2" +// CHECK: br ^[[bb3]](%[[y:.+]] : i64) +// CHECK: ^[[bb3]](%[[z:.+]]: i64): +// CHECK: "test.bar"(%[[z]]) +// CHECK: return + + +// ----- + +// CHECK-LABEL: func @func_execute_region_elim_multi_yield +func @func_execute_region_elim_multi_yield() { + "test.foo"() : () -> () + %v = scf.execute_region -> i64 { + %c = "test.cmp"() : () -> i1 + cond_br %c, ^bb2, ^bb3 + ^bb2: + %x = "test.val1"() : () -> i64 + scf.yield %x : i64 + ^bb3: + %y = "test.val2"() : () -> i64 + scf.yield %y : i64 + } + "test.bar"(%v) : (i64) -> () + return +} + +// CHECK-NOT: execute_region +// CHECK: "test.foo" +// CHECK: %[[cmp:.+]] = "test.cmp" +// CHECK: cond_br %[[cmp]], ^[[bb1:.+]], ^[[bb2:.+]] +// CHECK: ^[[bb1]]: +// CHECK: %[[x:.+]] = "test.val1" +// CHECK: br ^[[bb3:.+]](%[[x]] : i64) +// CHECK: ^[[bb2]]: +// CHECK: %[[y:.+]] = "test.val2" +// CHECK: br ^[[bb3]](%[[y:.+]] : i64) +// CHECK: ^[[bb3]](%[[z:.+]]: i64): +// CHECK: "test.bar"(%[[z]]) +// CHECK: return From ad4152d1b833f4696d6f9deccc021f240bab3d9d Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Wed, 30 Jun 2021 10:09:42 -0400 Subject: [PATCH 289/619] [MLIR] Update description of SCF.execute_region op See https://reviews.llvm.org/D104865 --- mlir/include/mlir/Dialect/SCF/SCFOps.td | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td index 9f039b6fcda68..feb862d31ae04 100644 --- a/mlir/include/mlir/Dialect/SCF/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td @@ -62,16 +62,13 @@ def ConditionOp : SCF_Op<"condition", def ExecuteRegionOp : SCF_Op<"execute_region"> { let summary = "operation that executes its region exactly once"; let description = [{ - The `execute_region` operation executes the region held exactly once. The op - cannot have any operands, nor does its region have any arguments. All SSA - values that dominate the op can be accessed inside. The op's region can have - multiple blocks and the blocks can have terminators the same way as FuncOp. - The values returned from this op's region define the op's results. The op - primarily provides control flow encapsulation and isolation from a parent - op's control flow restrictions if any; for example, it allows representation - of inlined calls in the inside of structured control flow ops with - restrictions like affine.for/if, scf.for/if ops, and thus the optimization - of IR in such a mixed form. + The `execute_region` operation is used to allow multiple blocks within SCF + and other operations which can hold only one block. The `execute_region` + operation executes the region held exactly once and cannot have any operands. + As such, its region has no arguments. All SSA values that dominate the op can + be accessed inside the op. The op's region can have multiple blocks and the + blocks can have multiple distinct terminators. Values returned from this op's + region define the op's results. Example: From db86e5c91477286b6432a75857edf012652c58d9 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 30 Jun 2021 10:21:33 -0400 Subject: [PATCH 290/619] Revert "[Coroutine] Add statistics for the number of elided coroutine" This reverts commit 1d9539cf49a585e7c3cd8faa1b8e7291e0ce285c. Test fails in LLVM_ENABLE_ASSERTIONS=OFF builds (such as regular release builds). --- llvm/lib/Transforms/Coroutines/CoroElide.cpp | 4 ---- llvm/test/Transforms/Coroutines/coro-elide.ll | 5 ++--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index 18bd56c45de19..9f0adae58948a 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -9,7 +9,6 @@ #include "llvm/Transforms/Coroutines/CoroElide.h" #include "CoroInternal.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/Dominators.h" @@ -22,8 +21,6 @@ using namespace llvm; #define DEBUG_TYPE "coro-elide" -STATISTIC(NumOfCoroElided, "The # of coroutine get elided."); - namespace { // Created on demand if the coro-elide pass has work to do. struct Lowerer : coro::LowererBase { @@ -347,7 +344,6 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA, elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first, FrameSizeAndAlign.second, AA); coro::replaceCoroFree(CoroId, /*Elide=*/true); - NumOfCoroElided++; } return true; diff --git a/llvm/test/Transforms/Coroutines/coro-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide.ll index 040a1a05d5c6d..674996b79e923 100644 --- a/llvm/test/Transforms/Coroutines/coro-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide.ll @@ -1,8 +1,8 @@ ; Tests that the coro.destroy and coro.resume are devirtualized where possible, ; SCC pipeline restarts and inlines the direct calls. ; RUN: opt < %s -S \ -; RUN: -passes='cgscc(repeat<2>(inline,function(coro-elide,dce)))' -stats \ -; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,STATS +; RUN: -passes='cgscc(repeat<2>(inline,function(coro-elide,dce)))' \ +; RUN: | FileCheck %s declare void @print(i32) nounwind @@ -165,4 +165,3 @@ declare i8* @llvm.coro.begin(token, i8*) declare i8* @llvm.coro.frame() declare i8* @llvm.coro.subfn.addr(i8*, i8) declare i1 @llvm.coro.alloc(token) -; STATS: 2 coro-elide - The # of coroutine get elided. \ No newline at end of file From 47941d601debe2d543a5f9452c777072c708ccdd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Jun 2021 15:01:51 +0100 Subject: [PATCH 291/619] [CostModel][X86] Adjust fp<->int vXi32 AVX1+ costs based on llvm-mca reports Based off the worse case numbers generated by D103695, the AVX1/2/512 sitofp/uitofp/fptosi/fptoui costs were higher than necessary (based off instruction counts instead of actual throughput). The SSE costs still need further fixes, but I hit an issue with the order in which SSE costs are checked - we need to check CUSTOM costs (with non-legal types) first, and then fallback to LEGALIZED types. I'm looking at this now, and this should let us start thinning out a lot of the duplicates in the costs tables. Then we can finally start work on vXi64 / vXi16 / vXi8 / vXi1 integers, which should let us look at sub-128-bit vectorization (D103925). --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 51 ++++++- llvm/test/Analysis/CostModel/X86/cast.ll | 133 ++++++++++++------ llvm/test/Analysis/CostModel/X86/fptosi.ll | 47 ++++--- llvm/test/Analysis/CostModel/X86/fptoui.ll | 47 ++++--- llvm/test/Analysis/CostModel/X86/sitofp.ll | 100 ++++++++----- llvm/test/Analysis/CostModel/X86/uitofp.ll | 31 ++-- .../Transforms/SLPVectorizer/X86/fptoui.ll | 70 +++++---- .../SLPVectorizer/X86/sitofp-inseltpoison.ll | 22 +-- .../Transforms/SLPVectorizer/X86/sitofp.ll | 22 +-- .../Transforms/SLPVectorizer/X86/uitofp.ll | 34 +++-- 10 files changed, 369 insertions(+), 188 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9425027699646..d245324cc9ce6 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1696,8 +1696,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 }, { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 }, + { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 1 }, { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 }, { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 }, + { ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f64, 3 }, { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 }, @@ -1822,7 +1824,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, - { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, @@ -1878,7 +1880,27 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 }, { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 5 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 1 }, + { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 3 }, + + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 4 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 7 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 4 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 7 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 15 }, + + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 3 }, + + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, }; static const TypeConversionCostTblEntry AVXConversionTbl[] = { @@ -1928,9 +1950,9 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, - { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, @@ -1941,10 +1963,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 6 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 6 }, // The generic code to compute the scalar overhead is currently broken. @@ -1958,13 +1981,21 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 }, { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 }, { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 }, + { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 }, + { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 5 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 9 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 5 }, { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 }, { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 9 }, { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 }, { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 }, { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 9 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 19 }, // This node is expanded into scalarized operations but BasicTTI is overly // optimistic estimating its cost. It computes 3 per element (one // vector-extract, one scalar conversion and one vector-insert). The @@ -2030,6 +2061,12 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index 305ca54aa4b11..60fd218a19f6c 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -372,27 +372,49 @@ define i32 @masks4(<4 x i1> %in) { } define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { -; SSE-LABEL: 'sitofp4' -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'sitofp4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'sitofp4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX1-LABEL: 'sitofp4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'sitofp4' -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX2-LABEL: 'sitofp4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'sitofp4' ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> @@ -417,19 +439,33 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { } define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { -; SSE-LABEL: 'sitofp8' -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'sitofp8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'sitofp8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX1-LABEL: 'sitofp8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'sitofp8' -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX2-LABEL: 'sitofp8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'sitofp8' ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> @@ -457,16 +493,27 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'uitofp4' -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'uitofp4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX2-LABEL: 'uitofp4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'uitofp4' ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> @@ -509,7 +556,7 @@ define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'uitofp8' diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index 3ada6d104b612..fd84c5eafdae3 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ ; ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 define i32 @fptosi_double_i64(i32 %arg) { ; SSE2-LABEL: 'fptosi_double_i64' @@ -68,12 +68,19 @@ define i32 @fptosi_double_i32(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'fptosi_double_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX1-LABEL: 'fptosi_double_i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'fptosi_double_i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_double_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 @@ -227,13 +234,21 @@ define i32 @fptosi_float_i32(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'fptosi_float_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX1-LABEL: 'fptosi_float_i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'fptosi_float_i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptosi_float_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll index 85de13eda3648..390aeaaef4fd6 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ ; ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 define i32 @fptoui_double_i64(i32 %arg) { ; SSE2-LABEL: 'fptoui_double_i64' @@ -75,12 +75,19 @@ define i32 @fptoui_double_i32(i32 %arg) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'fptoui_double_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX1-LABEL: 'fptoui_double_i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'fptoui_double_i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_double_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 @@ -241,13 +248,21 @@ define i32 @fptoui_float_i32(i32 %arg) { ; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'fptoui_float_i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX1-LABEL: 'fptoui_float_i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'fptoui_float_i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_float_i32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index a6e34a80c47e9..718467c1e7832 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ ; -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 define i32 @sitofp_i8_double() { ; SSE-LABEL: 'sitofp_i8_double' @@ -69,23 +69,37 @@ define i32 @sitofp_i16_double() { } define i32 @sitofp_i32_double() { -; SSE-LABEL: 'sitofp_i32_double' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'sitofp_i32_double' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'sitofp_i32_double' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE42-LABEL: 'sitofp_i32_double' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX1-LABEL: 'sitofp_i32_double' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'sitofp_i32_double' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i32_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -200,25 +214,41 @@ define i32 @sitofp_i16_float() { } define i32 @sitofp_i32_float() { -; SSE-LABEL: 'sitofp_i32_float' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'sitofp_i32_float' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'sitofp_i32_float' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE42-LABEL: 'sitofp_i32_float' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX1-LABEL: 'sitofp_i32_float' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'sitofp_i32_float' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i32_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll index 42d1e59753dfb..deb6bd496e13e 100644 --- a/llvm/test/Analysis/CostModel/X86/uitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll @@ -76,12 +76,19 @@ define i32 @uitofp_i32_double() { ; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; AVX-LABEL: 'uitofp_i32_double' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; AVX1-LABEL: 'uitofp_i32_double' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'uitofp_i32_double' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'uitofp_i32_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double @@ -218,22 +225,22 @@ define i32 @uitofp_i32_float() { ; AVX1-LABEL: 'uitofp_i32_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'uitofp_i32_float' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'uitofp_i32_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll index 9544a4a820c5b..5b5f5b051bae4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ,XOP +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256NODQ,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX256DQ @@ -144,32 +144,44 @@ define void @fptoui_8f64_8i32() #0 { ; SSE-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4 ; SSE-NEXT: ret void ; -; AVX256NODQ-LABEL: @fptoui_8f64_8i32( -; AVX256NODQ-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8 -; AVX256NODQ-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8 -; AVX256NODQ-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8 -; AVX256NODQ-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8 -; AVX256NODQ-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8 -; AVX256NODQ-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8 -; AVX256NODQ-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8 -; AVX256NODQ-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8 -; AVX256NODQ-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i32 -; AVX256NODQ-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i32 -; AVX256NODQ-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i32 -; AVX256NODQ-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i32 -; AVX256NODQ-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i32 -; AVX256NODQ-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i32 -; AVX256NODQ-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i32 -; AVX256NODQ-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i32 -; AVX256NODQ-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4 -; AVX256NODQ-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4 -; AVX256NODQ-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4 -; AVX256NODQ-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4 -; AVX256NODQ-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4 -; AVX256NODQ-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4 -; AVX256NODQ-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4 -; AVX256NODQ-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4 -; AVX256NODQ-NEXT: ret void +; AVX1-LABEL: @fptoui_8f64_8i32( +; AVX1-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8 +; AVX1-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8 +; AVX1-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8 +; AVX1-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8 +; AVX1-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8 +; AVX1-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8 +; AVX1-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8 +; AVX1-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8 +; AVX1-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i32 +; AVX1-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i32 +; AVX1-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i32 +; AVX1-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i32 +; AVX1-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i32 +; AVX1-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i32 +; AVX1-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i32 +; AVX1-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i32 +; AVX1-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4 +; AVX1-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4 +; AVX1-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4 +; AVX1-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4 +; AVX1-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4 +; AVX1-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4 +; AVX1-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4 +; AVX1-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4 +; AVX1-NEXT: ret void +; +; XOP-LABEL: @fptoui_8f64_8i32( +; XOP-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8 +; XOP-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i32> +; XOP-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4 +; XOP-NEXT: ret void +; +; AVX2-LABEL: @fptoui_8f64_8i32( +; AVX2-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8 +; AVX2-NEXT: [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i32> +; AVX2-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([16 x i32]* @dst32 to <8 x i32>*), align 4 +; AVX2-NEXT: ret void ; ; AVX512-LABEL: @fptoui_8f64_8i32( ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll index 7b4677cc7e19b..4b4b01ba8ce15 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll @@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 { } define void @sitofp_2i32_2f64() #0 { -; CHECK-LABEL: @sitofp_2i32_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 -; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 -; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double -; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: ret void +; SSE-LABEL: @sitofp_2i32_2f64( +; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double +; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: ret void +; +; AVX-LABEL: @sitofp_2i32_2f64( +; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double> +; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll index f187f6385f57a..ef63088afbb7a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 { } define void @sitofp_2i32_2f64() #0 { -; CHECK-LABEL: @sitofp_2i32_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 -; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 -; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double -; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: ret void +; SSE-LABEL: @sitofp_2i32_2f64( +; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double +; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: ret void +; +; AVX-LABEL: @sitofp_2i32_2f64( +; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double> +; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll index e07743d27834d..dfc7d64103390 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ +; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -135,14 +135,20 @@ define void @uitofp_2i32_2f64() #0 { ; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; SSE-NEXT: ret void ; -; AVX256NODQ-LABEL: @uitofp_2i32_2f64( -; AVX256NODQ-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 -; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double -; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double -; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; AVX256NODQ-NEXT: ret void +; AVX1-LABEL: @uitofp_2i32_2f64( +; AVX1-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; AVX1-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; AVX1-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double +; AVX1-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double +; AVX1-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; AVX1-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @uitofp_2i32_2f64( +; AVX2-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 +; AVX2-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> +; AVX2-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; AVX2-NEXT: ret void ; ; AVX512-LABEL: @uitofp_2i32_2f64( ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 From 90f244c8432357337a12b2582b20e2aedeb3e742 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 30 Jun 2021 14:48:57 +0100 Subject: [PATCH 292/619] [hwasan] Add missing newline in report. Reviewed By: glider Differential Revision: https://reviews.llvm.org/D105190 --- compiler-rt/lib/hwasan/hwasan_report.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index 00a78193e3a31..d4d836bd48940 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -485,7 +485,7 @@ void PrintAddressDescription( if (num_descriptions_printed > 1) { Printf( "There are %d potential causes, printed above in order " - "of likeliness.", + "of likeliness.\n", num_descriptions_printed); } } From fec521a7b206815ad995f7247e671a8f25d144fc Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 18 Jun 2021 13:33:14 -0400 Subject: [PATCH 293/619] [lit] Add the ability to parse regexes in Lit boolean expressions This patch augments Lit with the ability to parse regular expressions in boolean expressions. This includes REQUIRES:, XFAIL:, UNSUPPORTED:, and all other special Lit markup that evaluates to a boolean expression. Regular expressions can be specified by enclosing them in {{...}}, similarly to how FileCheck handles such regular expressions. The regular expression can either be on its own, or it can be part of an identifier. For example, a match expression like {{.+}}-apple-darwin{{.+}} would match the following variables: x86_64-apple-darwin20.0 arm64-apple-darwin20.0 arm64-apple-darwin22.0 etc... In the long term, this could be used to remove the need to handle the target triple specially when parsing boolean expressions. Differential Revision: https://reviews.llvm.org/D104572 --- llvm/docs/TestingGuide.rst | 8 +- llvm/utils/lit/lit/BooleanExpression.py | 84 +++++++++++++++---- llvm/utils/lit/lit/Test.py | 4 +- .../tests/Inputs/show-used-features/mixed.txt | 6 +- llvm/utils/lit/tests/show-used-features.py | 3 + 5 files changed, 80 insertions(+), 25 deletions(-) diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index 4ec6a3d52b072..cd2c012d95738 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -459,8 +459,12 @@ will be a failure if its execution succeeds. ``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated list of boolean expressions. The values in each expression may be: -- Features added to ``config.available_features`` by - configuration files such as ``lit.cfg``. +- Features added to ``config.available_features`` by configuration files such as ``lit.cfg``. + String comparison of features is case-sensitive. Furthermore, a boolean expression can + contain any Python regular expression enclosed in ``{{ }}``, in which case the boolean + expression is satisfied if any feature matches the regular expression. Regular + expressions can appear inside an identifier, so for example ``he{{l+}}o`` would match + ``helo``, ``hello``, ``helllo``, and so on. - Substrings of the target triple (``UNSUPPORTED`` and ``XFAIL`` only). | ``REQUIRES`` enables the test if all expressions are true. diff --git a/llvm/utils/lit/lit/BooleanExpression.py b/llvm/utils/lit/lit/BooleanExpression.py index 34e07fc1b8e5e..ff5352778e99c 100644 --- a/llvm/utils/lit/lit/BooleanExpression.py +++ b/llvm/utils/lit/lit/BooleanExpression.py @@ -4,18 +4,24 @@ class BooleanExpression: # A simple evaluator of boolean expressions. # # Grammar: - # expr :: or_expr - # or_expr :: and_expr ('||' and_expr)* - # and_expr :: not_expr ('&&' not_expr)* - # not_expr :: '!' not_expr - # '(' or_expr ')' - # identifier - # identifier :: [-+=._a-zA-Z0-9]+ + # expr :: or_expr + # or_expr :: and_expr ('||' and_expr)* + # and_expr :: not_expr ('&&' not_expr)* + # not_expr :: '!' not_expr + # '(' or_expr ')' + # match_expr + # match_expr :: braced_regex + # identifier + # braced_regex match_expr + # identifier match_expr + # identifier :: [-+=._a-zA-Z0-9]+ + # braced_regex :: '{{' python_regex '}}' # Evaluates `string` as a boolean expression. # Returns True or False. Throws a ValueError on syntax error. # # Variables in `variables` are true. + # Regexes that match any variable in `variables` are true. # Substrings of `triple` are true. # 'true' is true. # All other identifiers are false. @@ -41,7 +47,7 @@ def __init__(self, string, variables, triple=""): END = object() # Tokenization pattern. - Pattern = re.compile(r'\A\s*([()]|[-+=._a-zA-Z0-9]+|&&|\|\||!)\s*(.*)\Z') + Pattern = re.compile(r'\A\s*([()]|&&|\|\||!|(?:[-+=._a-zA-Z0-9]+|\{\{.+?\}\})+)\s*(.*)\Z') @staticmethod def tokenize(string): @@ -80,12 +86,24 @@ def expect(self, t): (self.quote(t), self.quote(self.token))) @staticmethod - def isIdentifier(token): + def isMatchExpression(token): if (token is BooleanExpression.END or token == '&&' or token == '||' or token == '!' or token == '(' or token == ')'): return False return True + def parseMATCH(self): + regex = '' + for part in filter(None, re.split(r'(\{\{.+?\}\})', self.token)): + if part.startswith('{{'): + assert part.endswith('}}') + regex += '(?:{})'.format(part[2:-2]) + else: + regex += re.escape(part) + regex = re.compile(regex) + self.value = self.token in self.triple or any(regex.fullmatch(var) for var in self.variables) + self.token = next(self.tokens) + def parseNOT(self): if self.accept('!'): self.parseNOT() @@ -93,13 +111,11 @@ def parseNOT(self): elif self.accept('('): self.parseOR() self.expect(')') - elif not BooleanExpression.isIdentifier(self.token): - raise ValueError("expected: '!' or '(' or identifier\nhave: %s" % + elif not BooleanExpression.isMatchExpression(self.token): + raise ValueError("expected: '!', '(', '{{', or identifier\nhave: %s" % self.quote(self.token)) else: - self.value = (self.token in self.variables or - self.token in self.triple) - self.token = next(self.tokens) + self.parseMATCH() def parseAND(self): self.parseNOT() @@ -143,12 +159,20 @@ def test_variables(self): self.assertTrue(BooleanExpression.evaluate('under_score', variables)) self.assertTrue(BooleanExpression.evaluate('e=quals', variables)) self.assertTrue(BooleanExpression.evaluate('d1g1ts', variables)) + self.assertTrue(BooleanExpression.evaluate('{{its.+}}', variables)) + self.assertTrue(BooleanExpression.evaluate('{{false-[lo]+-true}}', variables)) + self.assertTrue(BooleanExpression.evaluate('{{(true|false)-lol-(true|false)}}', variables)) + self.assertTrue(BooleanExpression.evaluate('d1g{{[0-9]}}ts', variables)) + self.assertTrue(BooleanExpression.evaluate('d1g{{[0-9]}}t{{[a-z]}}', variables)) + self.assertTrue(BooleanExpression.evaluate('{{d}}1g{{[0-9]}}t{{[a-z]}}', variables)) + self.assertTrue(BooleanExpression.evaluate('d1{{(g|1)+}}ts', variables)) self.assertFalse(BooleanExpression.evaluate('false', variables)) self.assertFalse(BooleanExpression.evaluate('True', variables)) self.assertFalse(BooleanExpression.evaluate('true-ish', variables)) self.assertFalse(BooleanExpression.evaluate('not_true', variables)) self.assertFalse(BooleanExpression.evaluate('tru', variables)) + self.assertFalse(BooleanExpression.evaluate('{{its-true.+}}', variables)) def test_triple(self): triple = 'arch-vendor-os' @@ -159,6 +183,21 @@ def test_triple(self): self.assertTrue(BooleanExpression.evaluate('-os', {}, triple)) self.assertFalse(BooleanExpression.evaluate('arch-os', {}, triple)) + # When matching against the triple, a regex is treated as an identifier and checked + # for a literal match. This preserves existing behavior before regexes were introduced. + self.assertFalse(BooleanExpression.evaluate('arch-{{vendor}}-os', {}, triple)) + self.assertTrue(BooleanExpression.evaluate('arch-{{vendor}}-os', {}, 'arch-{{vendor}}-os')) + + def test_matching(self): + expr1 = 'linux && (target={{aarch64-.+}} || target={{x86_64-.+}})' + self.assertTrue(BooleanExpression.evaluate(expr1, {'linux', 'target=x86_64-unknown-linux-gnu'})) + self.assertFalse(BooleanExpression.evaluate(expr1, {'linux', 'target=i386-unknown-linux-gnu'})) + + expr2 = 'use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions' + self.assertTrue(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.12'})) + self.assertFalse(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.12', 'no-exceptions'})) + self.assertFalse(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.15'})) + def test_operators(self): self.assertTrue(BooleanExpression.evaluate('true || true', {})) self.assertTrue(BooleanExpression.evaluate('true || false', {})) @@ -206,17 +245,17 @@ def test_errors(self): "in expression: 'true and true'") self.checkException("|| true", - "expected: '!' or '(' or identifier\n" + + "expected: '!', '(', '{{', or identifier\n" + "have: '||'\n" + "in expression: '|| true'") self.checkException("true &&", - "expected: '!' or '(' or identifier\n" + + "expected: '!', '(', '{{', or identifier\n" + "have: \n" + "in expression: 'true &&'") self.checkException("", - "expected: '!' or '(' or identifier\n" + + "expected: '!', '(', '{{', or identifier\n" + "have: \n" + "in expression: ''") @@ -244,9 +283,18 @@ def test_errors(self): "in expression: 'true (true)'") self.checkException("( )", - "expected: '!' or '(' or identifier\n" + + "expected: '!', '(', '{{', or identifier\n" + "have: ')'\n" + "in expression: '( )'") + self.checkException("abc{{def", + "couldn't parse text: '{{def'\n" + + "in expression: 'abc{{def'") + + self.checkException("{{}}", + "couldn't parse text: '{{}}'\n" + + "in expression: '{{}}'") + + if __name__ == '__main__': unittest.main() diff --git a/llvm/utils/lit/lit/Test.py b/llvm/utils/lit/lit/Test.py index 7cc610bf56bde..45ab12a85bd5e 100644 --- a/llvm/utils/lit/lit/Test.py +++ b/llvm/utils/lit/lit/Test.py @@ -408,5 +408,5 @@ def getUsedFeatures(self): BooleanExpression.tokenize(expr) for expr in boolean_expressions if expr != '*' ) - identifiers = set(filter(BooleanExpression.isIdentifier, tokens)) - return identifiers + matchExpressions = set(filter(BooleanExpression.isMatchExpression, tokens)) + return matchExpressions diff --git a/llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt b/llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt index 1de0f7442a086..309b3eaaa7698 100644 --- a/llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt +++ b/llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt @@ -1,4 +1,4 @@ -// REQUIRES: my-require-feature-2 || my-require-feature-3 -// UNSUPPORTED: my-unsupported-feature-2, my-unsupported-feature-3 -// XFAIL: my-xfail-feature-2, my-xfail-feature-3 +// REQUIRES: my-require-feature-2 || my-require-feature-3, my-{{[require]*}}-feature-4 +// UNSUPPORTED: my-unsupported-feature-2, my-unsupported-feature-3 && !my-{{[unsupported]*}}-feature-4 +// XFAIL: my-xfail-feature-2, my-xfail-feature-3, my-{{[xfail]*}}-feature-4 diff --git a/llvm/utils/lit/tests/show-used-features.py b/llvm/utils/lit/tests/show-used-features.py index 069ee08196701..b88c68faca981 100644 --- a/llvm/utils/lit/tests/show-used-features.py +++ b/llvm/utils/lit/tests/show-used-features.py @@ -4,3 +4,6 @@ # CHECK: my-require-feature-1 my-require-feature-2 my-require-feature-3 # CHECK: my-unsupported-feature-1 my-unsupported-feature-2 my-unsupported-feature-3 # CHECK: my-xfail-feature-1 my-xfail-feature-2 my-xfail-feature-3 +# CHECK: {{my-[{][{]\[require\]\*[}][}]-feature-4}} +# CHECK: {{my-[{][{]\[unsupported\]\*[}][}]-feature-4}} +# CHECK: {{my-[{][{]\[xfail\]\*[}][}]-feature-4}} From 0f1f92156f3caafe9567b853ecb57212b709e68d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Jun 2021 16:21:26 -0700 Subject: [PATCH 294/619] [ARM] Fix incorrect assignment of Changed variable in MVEGatherScatterLowering::optimiseOffsets. I believe this Changed flag should be initialized to false, otherwise the if (!Changed) is always dead. This doesn't manifest in a functional issue because the PHINode checks will fail if nothing changed. They are identical to the earlier checks that must have already failed to get into this else block. While there remove an else after return to reduce indentation. Differential Revision: https://reviews.llvm.org/D105159 --- .../Target/ARM/MVEGatherScatterLowering.cpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index 74e0fb4ac5756..339ca18179b33 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -952,25 +952,23 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, Phi = cast(Offs->getOperand(1)); OffsSecondOp = 0; } else { - bool Changed = true; + bool Changed = false; if (isa(Offs->getOperand(0)) && L->contains(cast(Offs->getOperand(0)))) Changed |= optimiseOffsets(Offs->getOperand(0), BB, LI); if (isa(Offs->getOperand(1)) && L->contains(cast(Offs->getOperand(1)))) Changed |= optimiseOffsets(Offs->getOperand(1), BB, LI); - if (!Changed) { + if (!Changed) return false; + if (isa(Offs->getOperand(0))) { + Phi = cast(Offs->getOperand(0)); + OffsSecondOp = 1; + } else if (isa(Offs->getOperand(1))) { + Phi = cast(Offs->getOperand(1)); + OffsSecondOp = 0; } else { - if (isa(Offs->getOperand(0))) { - Phi = cast(Offs->getOperand(0)); - OffsSecondOp = 1; - } else if (isa(Offs->getOperand(1))) { - Phi = cast(Offs->getOperand(1)); - OffsSecondOp = 0; - } else { - return false; - } + return false; } } // A phi node we want to perform this function on should be from the From c69cfbfd71626e0ebcfc1fc79014ee0fea25103b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 30 Jun 2021 11:11:52 -0400 Subject: [PATCH 295/619] [libc++] Remove broken links and outdated information in the docs The various design docs have been moved to RST, and the linked blog post does not apply anymore since libc++ is the default library used by Clang on Apple platforms. --- libcxx/docs/index.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index c93fbc7e5d654..7e5cb089ba8e1 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -178,12 +178,6 @@ Design Documents DesignDocs/UniquePtrTrivialAbi DesignDocs/VisibilityMacros -* ` design `_ -* ` design `_ -* `Notes by Marshall Clow`__ - -.. __: https://cplusplusmusings.wordpress.com/2012/07/05/clang-and-standard-libraries-on-mac-os-x/ - Build Bots and Test Coverage ---------------------------- From 5e630a9202a363aae37e4e10655e894490f150f9 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 30 Jun 2021 08:26:18 -0700 Subject: [PATCH 296/619] [test] precommit a test for missing (0 /u %x) SCEV fold --- llvm/test/Analysis/ScalarEvolution/fold.ll | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/test/Analysis/ScalarEvolution/fold.ll b/llvm/test/Analysis/ScalarEvolution/fold.ll index d55651cef3c64..72b62ecaeffc1 100644 --- a/llvm/test/Analysis/ScalarEvolution/fold.ll +++ b/llvm/test/Analysis/ScalarEvolution/fold.ll @@ -127,3 +127,11 @@ define i64 @test10(i64 %a, i64 %b) { ; CHECK-NEXT: --> 0 ret i64 %t2 } + +define i64 @test11(i64 %a) { +; CHECK-LABEL: @test11 + %t0 = udiv i64 0, %a +; CHECK: %t0 +; CHECK-NEXT: --> (0 /u %a) + ret i64 %t0 +} From 14d8f1546a0483ef54a7a09442848d0a470c94c0 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 30 Jun 2021 08:31:13 -0700 Subject: [PATCH 297/619] [SCEV] Fold (0 udiv %x) to 0 We have analogous rules in instsimplify, etc.., but were missing the same in SCEV. The fold is near trivial, but came up in the context of a larger change. --- llvm/lib/Analysis/ScalarEvolution.cpp | 5 +++++ llvm/test/Analysis/ScalarEvolution/fold.ll | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 990f3d6040f4c..97ea60f93c541 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3268,6 +3268,11 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // 0 udiv Y == 0 + if (const SCEVConstant *LHSC = dyn_cast(LHS)) + if (LHSC->getValue()->isZero()) + return LHS; + if (const SCEVConstant *RHSC = dyn_cast(RHS)) { if (RHSC->getValue()->isOne()) return LHS; // X udiv 1 --> x diff --git a/llvm/test/Analysis/ScalarEvolution/fold.ll b/llvm/test/Analysis/ScalarEvolution/fold.ll index 72b62ecaeffc1..c23029d521d4d 100644 --- a/llvm/test/Analysis/ScalarEvolution/fold.ll +++ b/llvm/test/Analysis/ScalarEvolution/fold.ll @@ -132,6 +132,6 @@ define i64 @test11(i64 %a) { ; CHECK-LABEL: @test11 %t0 = udiv i64 0, %a ; CHECK: %t0 -; CHECK-NEXT: --> (0 /u %a) +; CHECK-NEXT: --> 0 ret i64 %t0 } From bb41f8569138f9f87baf7f4b4e26b3cdcdfd42c6 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 16 Jun 2021 15:24:23 +0200 Subject: [PATCH 298/619] [clangd] Correct SelectionTree behavior around anonymous field access. struct A { struct { int b; }; }; A().^b; This should be considered a reference to b, but currently it's considered a reference to the anonymous struct field. Fixes https://github.com/clangd/clangd/issues/798 Differential Revision: https://reviews.llvm.org/D104376 --- clang-tools-extra/clangd/Selection.cpp | 29 ++++++++++++++++--- .../clangd/unittests/XRefsTests.cpp | 12 ++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index 017f4b22861b5..ad41dec9f20f8 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -57,6 +57,27 @@ void recordMetrics(const SelectionTree &S, const LangOptions &Lang) { SelectionUsedRecovery.record(0, LanguageLabel); // unused. } +SourceRange getSourceRange(const DynTypedNode &N) { + // MemberExprs to implicitly access anonymous fields should not claim any + // tokens for themselves. Given: + // struct A { struct { int b; }; }; + // The clang AST reports the following nodes for an access to b: + // A().b; + // [----] MemberExpr, base = A()., member = b + // [----] MemberExpr: base = A(), member = + // [-] CXXConstructExpr + // For our purposes, we don't want the second MemberExpr to own any tokens, + // so we reduce its range to match the CXXConstructExpr. + // (It's not clear that changing the clang AST would be correct in general). + if (const auto *ME = N.get()) { + if (!ME->getMemberDecl()->getDeclName()) + return ME->getBase() + ? getSourceRange(DynTypedNode::create(*ME->getBase())) + : SourceRange(); + } + return N.getSourceRange(); +} + // An IntervalSet maintains a set of disjoint subranges of an array. // // Initially, it contains the entire array. @@ -608,7 +629,7 @@ class SelectionVisitor : public RecursiveASTVisitor { // An optimization for a common case: nodes outside macro expansions that // don't intersect the selection may be recursively skipped. bool canSafelySkipNode(const DynTypedNode &N) { - SourceRange S = N.getSourceRange(); + SourceRange S = getSourceRange(N); if (auto *TL = N.get()) { // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile // heuristics. We should consider only pruning critical TypeLoc nodes, to @@ -665,7 +686,7 @@ class SelectionVisitor : public RecursiveASTVisitor { void pop() { Node &N = *Stack.top(); dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); - claimRange(N.ASTNode.getSourceRange(), N.Selected); + claimRange(getSourceRange(N.ASTNode), N.Selected); if (N.Selected == NoTokens) N.Selected = SelectionTree::Unselected; if (N.Selected || !N.Children.empty()) { @@ -868,13 +889,13 @@ const DeclContext &SelectionTree::Node::getDeclContext() const { const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const { if (Children.size() == 1 && - Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange()) + getSourceRange(Children.front()->ASTNode) == getSourceRange(ASTNode)) return Children.front()->ignoreImplicit(); return *this; } const SelectionTree::Node &SelectionTree::Node::outerImplicit() const { - if (Parent && Parent->ASTNode.getSourceRange() == ASTNode.getSourceRange()) + if (Parent && getSourceRange(Parent->ASTNode) == getSourceRange(ASTNode)) return Parent->outerImplicit(); return *this; } diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 8c37532507d45..166e0674afea6 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -420,6 +420,18 @@ TEST(LocateSymbol, All) { // $def is the definition location (if absent, symbol has no definition) // unnamed range becomes both $decl and $def. const char *Tests[] = { + R"cpp( + struct X { + union { + int [[a]]; + float b; + }; + }; + int test(X &x) { + return x.^a; + } + )cpp", + R"cpp(// Local variable int main() { int [[bonjour]]; From 0c96a92d8666b8eb69eb1275aed572f857182d9a Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 15 Apr 2021 14:29:57 +0200 Subject: [PATCH 299/619] [clangd] Log feature configuration (linux+asan+grpc) of the clangd build Included in logs, --version, remote index queries, and LSP serverInfo. Differential Revision: https://reviews.llvm.org/D100553 --- clang-tools-extra/clangd/CMakeLists.txt | 1 + clang-tools-extra/clangd/ClangdLSPServer.cpp | 5 +- clang-tools-extra/clangd/ClangdLSPServer.h | 1 - clang-tools-extra/clangd/ConfigCompile.cpp | 2 +- clang-tools-extra/clangd/Features.cpp | 55 +++++++++++++++++++ clang-tools-extra/clangd/Features.h | 29 ++++++++++ clang-tools-extra/clangd/Features.inc.in | 1 + .../clangd/index/remote/Client.cpp | 5 +- .../clangd/index/remote/server/Server.cpp | 2 +- clang-tools-extra/clangd/tool/ClangdMain.cpp | 9 +-- .../clangd/unittests/ConfigCompileTests.cpp | 2 +- 11 files changed, 100 insertions(+), 12 deletions(-) create mode 100644 clang-tools-extra/clangd/Features.cpp create mode 100644 clang-tools-extra/clangd/Features.h diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 671e55e8622d3..b983b71cc90f4 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -64,6 +64,7 @@ add_clang_library(clangDaemon DumpAST.cpp ExpectedTypes.cpp FeatureModule.cpp + Features.cpp FindSymbols.cpp FindTarget.cpp FileDistance.cpp diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index f70fd0018cfdf..9214bcbe66bca 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -12,6 +12,7 @@ #include "Diagnostics.h" #include "DraftStore.h" #include "DumpAST.h" +#include "Features.h" #include "GlobalCompilationDatabase.h" #include "LSPBinder.h" #include "Protocol.h" @@ -24,7 +25,6 @@ #include "support/MemoryTree.h" #include "support/Trace.h" #include "clang/AST/ASTContext.h" -#include "clang/Basic/Version.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" @@ -620,7 +620,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, llvm::json::Object Result{ {{"serverInfo", llvm::json::Object{{"name", "clangd"}, - {"version", getClangToolFullVersion("clangd")}}}, + {"version", llvm::formatv("{0} {1}", versionString(), + featureString())}}}, {"capabilities", std::move(ServerCaps)}}}; if (Opts.Encoding) Result["offsetEncoding"] = *Opts.Encoding; diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index 8c43d18502875..4c195df6f893c 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -11,7 +11,6 @@ #include "ClangdServer.h" #include "DraftStore.h" -#include "Features.inc" #include "FindSymbols.h" #include "GlobalCompilationDatabase.h" #include "LSPBinder.h" diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index 438dd74d866c6..4eaff343b2290 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -28,7 +28,7 @@ #include "ConfigFragment.h" #include "ConfigProvider.h" #include "Diagnostics.h" -#include "Features.inc" +#include "Features.h" #include "TidyProvider.h" #include "support/Logger.h" #include "support/Path.h" diff --git a/clang-tools-extra/clangd/Features.cpp b/clang-tools-extra/clangd/Features.cpp new file mode 100644 index 0000000000000..17f475fc4c22b --- /dev/null +++ b/clang-tools-extra/clangd/Features.cpp @@ -0,0 +1,55 @@ +//===--- Features.cpp - Compile-time configuration ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Features.h" +#include "clang/Basic/Version.h" +#include "llvm/Support/Compiler.h" + +namespace clang { +namespace clangd { + +std::string versionString() { return clang::getClangToolFullVersion("clangd"); } + +std::string featureString() { + return +#if defined(_WIN32) + "windows" +#elif defined(__APPLE__) + "mac" +#elif defined(__linux__) + "linux" +#elif defined(LLVM_ON_UNIX) + "unix" +#else + "unknown" +#endif + +#ifndef NDEBUG + "+debug" +#endif +#if LLVM_ADDRESS_SANITIZER_BUILD + "+asan" +#endif +#if LLVM_THREAD_SANITIZER_BUILD + "+tsan" +#endif +#if LLVM_MEMORY_SANITIZER_BUILD + "+msan" +#endif + +#if CLANGD_ENABLE_REMOTE + "+grpc" +#endif +#if CLANGD_BUILD_XPC + "+xpc" +#endif + ; +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/Features.h b/clang-tools-extra/clangd/Features.h new file mode 100644 index 0000000000000..6fa3618578632 --- /dev/null +++ b/clang-tools-extra/clangd/Features.h @@ -0,0 +1,29 @@ +//===--- Features.h - Compile-time configuration ------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FEATURES_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FEATURES_H +#include + +// Export constants like CLANGD_BUILD_XPC +#include "Features.inc" + +namespace clang { +namespace clangd { + +// Returns a version string for clangd, e.g. "clangd 10.0.0" +std::string versionString(); + +// Returns a string describing the compile-time configuration. +// e.g. mac+debug+asan+grpc +std::string featureString(); + +} // namespace clangd +} // namespace clang + +#endif diff --git a/clang-tools-extra/clangd/Features.inc.in b/clang-tools-extra/clangd/Features.inc.in index 5dfde58890b7e..72464d89b830e 100644 --- a/clang-tools-extra/clangd/Features.inc.in +++ b/clang-tools-extra/clangd/Features.inc.in @@ -1,3 +1,4 @@ +// IWYU pragma: private, include "Features.h" #define CLANGD_BUILD_XPC @CLANGD_BUILD_XPC@ #define CLANGD_ENABLE_REMOTE @CLANGD_ENABLE_REMOTE@ #define ENABLE_GRPC_REFLECTION @ENABLE_GRPC_REFLECTION@ diff --git a/clang-tools-extra/clangd/index/remote/Client.cpp b/clang-tools-extra/clangd/index/remote/Client.cpp index b92c6520f61c0..ac6b7c0c829ff 100644 --- a/clang-tools-extra/clangd/index/remote/Client.cpp +++ b/clang-tools-extra/clangd/index/remote/Client.cpp @@ -9,12 +9,12 @@ #include #include "Client.h" +#include "Features.h" #include "Service.grpc.pb.h" #include "index/Index.h" #include "marshalling/Marshalling.h" #include "support/Logger.h" #include "support/Trace.h" -#include "clang/Basic/Version.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -72,7 +72,8 @@ class IndexClient : public clangd::SymbolIndex { const auto RPCRequest = ProtobufMarshaller->toProtobuf(Request); SPAN_ATTACH(Tracer, "Request", RPCRequest.DebugString()); grpc::ClientContext Context; - Context.AddMetadata("version", clang::getClangToolFullVersion("clangd")); + Context.AddMetadata("version", versionString()); + Context.AddMetadata("features", featureString()); std::chrono::system_clock::time_point StartTime = std::chrono::system_clock::now(); auto Deadline = StartTime + DeadlineWaitingTime; diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp index 04ad0b2a1936f..d2f96ba1a1b53 100644 --- a/clang-tools-extra/clangd/index/remote/server/Server.cpp +++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "Features.inc" +#include "Features.h" #include "Index.pb.h" #include "MonitoringService.grpc.pb.h" #include "MonitoringService.pb.h" diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 6d70a9cf03f6e..8db52c65061c8 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -10,7 +10,7 @@ #include "CodeComplete.h" #include "Config.h" #include "ConfigProvider.h" -#include "Features.inc" +#include "Features.h" #include "PathMapping.h" #include "Protocol.h" #include "TidyProvider.h" @@ -26,7 +26,6 @@ #include "support/Shutdown.h" #include "support/ThreadsafeFS.h" #include "support/Trace.h" -#include "clang/Basic/Version.h" #include "clang/Format/Format.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" @@ -679,7 +678,8 @@ int main(int argc, char *argv[]) { llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); llvm::sys::SetInterruptFunction(&requestShutdown); llvm::cl::SetVersionPrinter([](llvm::raw_ostream &OS) { - OS << clang::getClangToolFullVersion("clangd") << "\n"; + OS << versionString() << "\n" + << "Features: " << featureString() << "\n"; }); const char *FlagsEnvVar = "CLANGD_FLAGS"; const char *Overview = @@ -784,7 +784,8 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var StreamLogger Logger(llvm::errs(), LogLevel); LoggingSession LoggingSession(Logger); // Write some initial logs before we start doing any real work. - log("{0}", clang::getClangToolFullVersion("clangd")); + log("{0}", versionString()); + log("Features: {0}", featureString()); log("PID: {0}", llvm::sys::Process::getProcessId()); { SmallString<128> CWD; diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index 381180381f36f..93b85d8c0b5dc 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -9,7 +9,7 @@ #include "Config.h" #include "ConfigFragment.h" #include "ConfigTesting.h" -#include "Features.inc" +#include "Features.h" #include "TestFS.h" #include "clang/Basic/DiagnosticSema.h" #include "llvm/ADT/None.h" From b447445eaa6f8ff826a7eab276c10bc6f133aeb0 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 11 Mar 2021 01:20:36 +0100 Subject: [PATCH 300/619] [clangd] Show padding following a field on field hover. This displays as: `Size: 4 bytes (+4 padding)` Also stop showing (byte) offset/size for bitfields. They're not meaningful and using them to calculate padding is dangerous! Differential Revision: https://reviews.llvm.org/D98377 --- clang-tools-extra/clangd/Hover.cpp | 36 +++++++++++++--- clang-tools-extra/clangd/Hover.h | 2 + .../clangd/unittests/HoverTests.cpp | 41 ++++++++++++++++++- 3 files changed, 71 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index 8b14777173a0f..c71a8c40ce94a 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -28,6 +28,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/OperationKinds.h" #include "clang/AST/PrettyPrinter.h" +#include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" @@ -770,10 +771,30 @@ void addLayoutInfo(const NamedDecl &ND, HoverInfo &HI) { const auto *Record = FD->getParent(); if (Record) Record = Record->getDefinition(); - if (Record && !Record->isInvalidDecl() && !Record->isDependentType()) { - HI.Offset = Ctx.getFieldOffset(FD) / 8; - if (auto Size = Ctx.getTypeSizeInCharsIfKnown(FD->getType())) - HI.Size = Size->getQuantity(); + if (Record && !Record->isInvalidDecl() && !Record->isDependentType() && + !FD->isBitField()) { + const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(Record); + HI.Offset = Layout.getFieldOffset(FD->getFieldIndex()) / 8; + if (auto Size = Ctx.getTypeSizeInCharsIfKnown(FD->getType())) { + HI.Size = FD->isZeroSize(Ctx) ? 0 : Size->getQuantity(); + unsigned EndOfField = *HI.Offset + *HI.Size; + + // Calculate padding following the field. + if (!Record->isUnion() && + FD->getFieldIndex() + 1 < Layout.getFieldCount()) { + // Measure padding up to the next class field. + unsigned NextOffset = + Layout.getFieldOffset(FD->getFieldIndex() + 1) / 8; + if (NextOffset >= EndOfField) // next field could be a bitfield! + HI.Padding = NextOffset - EndOfField; + } else { + // Measure padding up to the end of the object. + HI.Padding = Layout.getSize().getQuantity() - EndOfField; + } + } + // Offset in a union is always zero, so not really useful to report. + if (Record->isUnion()) + HI.Offset.reset(); } return; } @@ -1013,9 +1034,12 @@ markup::Document HoverInfo::present() const { Output.addParagraph().appendText( llvm::formatv("Offset: {0} byte{1}", *Offset, *Offset == 1 ? "" : "s") .str()); - if (Size) - Output.addParagraph().appendText( + if (Size) { + auto &P = Output.addParagraph().appendText( llvm::formatv("Size: {0} byte{1}", *Size, *Size == 1 ? "" : "s").str()); + if (Padding && *Padding != 0) + P.appendText(llvm::formatv(" (+{0} padding)", *Padding).str()); + } if (CalleeArgInfo) { assert(CallPassType); diff --git a/clang-tools-extra/clangd/Hover.h b/clang-tools-extra/clangd/Hover.h index 2f2afbf6723bf..44ee9b7d79797 100644 --- a/clang-tools-extra/clangd/Hover.h +++ b/clang-tools-extra/clangd/Hover.h @@ -77,6 +77,8 @@ struct HoverInfo { llvm::Optional Size; /// Contains the offset of fields within the enclosing class. llvm::Optional Offset; + /// Contains the padding following a field within the enclosing class. + llvm::Optional Padding; // Set when symbol is inside function call. Contains information extracted // from the callee definition about the argument this is passed as. llvm::Optional CalleeArgInfo; diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index c2645b99926c3..9089a4859c144 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -68,8 +68,9 @@ TEST(Hover, Structured) { // Field {R"cpp( namespace ns1 { namespace ns2 { - struct Foo { + class Foo { char [[b^ar]]; + double y[2]; }; }} )cpp", @@ -82,6 +83,41 @@ TEST(Hover, Structured) { HI.Type = "char"; HI.Offset = 0; HI.Size = 1; + HI.Padding = 7; + HI.AccessSpecifier = "private"; + }}, + // Union field + {R"cpp( + union Foo { + char [[b^ar]]; + double y[2]; + }; + )cpp", + [](HoverInfo &HI) { + HI.NamespaceScope = ""; + HI.LocalScope = "Foo::"; + HI.Name = "bar"; + HI.Kind = index::SymbolKind::Field; + HI.Definition = "char bar"; + HI.Type = "char"; + HI.Size = 1; + HI.Padding = 15; + HI.AccessSpecifier = "public"; + }}, + // Bitfield + {R"cpp( + struct Foo { + int [[^x]] : 1; + int y : 1; + }; + )cpp", + [](HoverInfo &HI) { + HI.NamespaceScope = ""; + HI.LocalScope = "Foo::"; + HI.Name = "x"; + HI.Kind = index::SymbolKind::Field; + HI.Definition = "int x : 1"; + HI.Type = "int"; HI.AccessSpecifier = "public"; }}, // Local to class method. @@ -2558,13 +2594,14 @@ template class Foo {})", HI.Definition = "def"; HI.Size = 4; HI.Offset = 12; + HI.Padding = 4; }, R"(field foo Type: type Value = value Offset: 12 bytes -Size: 4 bytes +Size: 4 bytes (+4 padding) // In test::Bar def)", From ecabc6684f23cb65925d62fac9a14ab52d07951a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 30 Jun 2021 10:55:14 -0400 Subject: [PATCH 301/619] [OpenMP] Change analysis remarks to not emit on cold functions The remarks will trigger on some functions that are marked cold, such as the `__muldc3` intrinsic functions. Change the remarks to avoid these functions. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D105196 --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 6 +++--- .../OpenMP/single_threaded_execution.ll | 21 ++++++++++++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index c6b77a3c87c7b..5c4be0b8b9bda 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2650,8 +2650,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { auto EmitRemark = [&](Function &F) { auto &ORE = FAM.getResult(F); ORE.emit([&]() { - OptimizationRemarkMissed ORM(DEBUG_TYPE, "InternalizationFailure", &F); - return ORM << "Could not internalize function. " + OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "InternalizationFailure", &F); + return ORA << "Could not internalize function. " << "Some optimizations may not be possible."; }); }; @@ -2664,7 +2664,7 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) { if (Attributor::internalizeFunction(F, /* Force */ true)) { InternalizedFuncs.insert(&F); - } else if (!F.hasLocalLinkage()) { + } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { EmitRemark(F); } } diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll index f5ae28ea351f6..5fff563d364d8 100644 --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -1,5 +1,5 @@ ; RUN: opt -passes=openmp-opt -debug-only=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -passes=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s --check-prefix=REMARKS +; RUN: opt -passes=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s --check-prefix=REMARKS ; REQUIRES: asserts ; ModuleID = 'single_threaded_exeuction.c' @@ -11,6 +11,7 @@ define weak void @kernel() { } ; REMARKS: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible. +; REMARKS-NOT: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible. ; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread. ; CHECK: [openmp-opt] Basic block @nvptx if.then is executed by a single thread. @@ -26,6 +27,7 @@ if.then: call void @foo() call void @bar() call void @baz() + call void @cold() br label %if.end if.end: @@ -46,6 +48,7 @@ if.then: call void @foo() call void @bar() call void @baz() + call void @cold() br label %if.end if.end: @@ -73,17 +76,25 @@ entry: ret void } +; CHECK-NOT: [openmp-opt] Basic block @cold entry is executed by a single thread. +; Function Attrs: cold convergent noinline nounwind optnone mustprogress +define weak void @cold() #0 !dbg !9 { +entry: + ret void +} + declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() declare i32 @llvm.amdgcn.workitem.id.x() declare void @__kmpc_kernel_init(i32, i16) +attributes #0 = { cold noinline } + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} !nvvm.annotations = !{!7} - !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "single_threaded_execution.c", directory: "/tmp/single_threaded_execution.c") !2 = !{} @@ -92,6 +103,6 @@ declare void @__kmpc_kernel_init(i32, i16) !5 = !{i32 7, !"openmp", i32 50} !6 = !{i32 7, !"openmp-device", i32 50} !7 = !{void ()* @kernel, !"kernel", i32 1} -!8 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -!9 = !DISubroutineType(types: !2) -!10 = !DILocation(line: 5, column: 7, scope: !8) +!8 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!9 = distinct !DISubprogram(name: "cold", scope: !1, file: !1, line: 8, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!10 = !DISubroutineType(types: !2) From 49555441628a0ec620581bba371e6bb20c2b3f5f Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Tue, 29 Jun 2021 18:50:24 +0100 Subject: [PATCH 302/619] [LiveDebugValues][InstrRef][1/2] Recover more clobbered variable locations In various circumstances, when we clobber a register there may be alternative locations that the value is live in. The classic example would be a value loaded from the stack, and then clobbered: the value is still available on the stack. InstrRefBasedLDV was coping with this at block starts where it's forced to pick a location, however it wasn't searching for alternative locations when values were clobbered. This patch notifies the "Transfer Tracker" object when clobbers occur, and it's able to find alternatives and issue DBG_VALUEs for that location. See: the added test. Differential Revision: https://reviews.llvm.org/D88405 --- .../LiveDebugValues/InstrRefBasedImpl.cpp | 96 +++++++++++++++-- .../LiveDebugValues/LiveDebugValues.cpp | 10 ++ .../livedebugvalues_instrref_tolocs.mir | 2 +- .../livedebugvalues_recover_clobbers.mir | 100 ++++++++++++++++++ .../MIR/X86/livedebugvalues_load_in_loop.mir | 1 + 5 files changed, 198 insertions(+), 11 deletions(-) create mode 100644 llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_recover_clobbers.mir diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index c29900b2c694d..83c3ceccf0640 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1225,26 +1225,63 @@ class TransferTracker { } } - /// Explicitly terminate variable locations based on \p mloc. Creates undef - /// DBG_VALUEs for any variables that were located there, and clears - /// #ActiveMLoc / #ActiveVLoc tracking information for that location. - void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos) { - assert(MTracker->isSpill(MLoc)); + /// Account for a location \p mloc being clobbered. Examine the variable + /// locations that will be terminated: and try to recover them by using + /// another location. Optionally, given \p MakeUndef, emit a DBG_VALUE to + /// explicitly terminate a location if it can't be recovered. + void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos, + bool MakeUndef = true) { auto ActiveMLocIt = ActiveMLocs.find(MLoc); if (ActiveMLocIt == ActiveMLocs.end()) return; + // What was the old variable value? + ValueIDNum OldValue = VarLocs[MLoc.asU64()]; VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue; + // Examine the remaining variable locations: if we can find the same value + // again, we can recover the location. + Optional NewLoc = None; + for (auto Loc : MTracker->locations()) + if (Loc.Value == OldValue) + NewLoc = Loc.Idx; + + // If there is no location, and we weren't asked to make the variable + // explicitly undef, then stop here. + if (!NewLoc && !MakeUndef) + return; + + // Examine all the variables based on this location. + DenseSet NewMLocs; for (auto &Var : ActiveMLocIt->second) { auto ActiveVLocIt = ActiveVLocs.find(Var); - // Create an undef. We can't feed in a nullptr DIExpression alas, - // so use the variables last expression. Pass None as the location. + // Re-state the variable location: if there's no replacement then NewLoc + // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE + // identifying the alternative location will be emitted. const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr; DbgValueProperties Properties(Expr, false); - PendingDbgValues.push_back(MTracker->emitLoc(None, Var, Properties)); - ActiveVLocs.erase(ActiveVLocIt); + PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties)); + + // Update machine locations <=> variable locations maps. Defer updating + // ActiveMLocs to avoid invalidaing the ActiveMLocIt iterator. + if (!NewLoc) { + ActiveVLocs.erase(ActiveVLocIt); + } else { + ActiveVLocIt->second.Loc = *NewLoc; + NewMLocs.insert(Var); + } } + + // Commit any deferred ActiveMLoc changes. + if (!NewMLocs.empty()) + for (auto &Var : NewMLocs) + ActiveMLocs[*NewLoc].insert(Var); + + // We lazily track what locations have which values; if we've found a new + // location for the clobbered value, remember it. + if (NewLoc) + VarLocs[NewLoc->asU64()] = OldValue; + flushDbgValues(Pos, nullptr); ActiveMLocIt->second.clear(); @@ -1899,6 +1936,32 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { for (auto *MO : RegMaskPtrs) MTracker->writeRegMask(MO, CurBB, CurInst); + + if (!TTracker) + return; + + // When committing variable values to locations: tell transfer tracker that + // we've clobbered things. It may be able to recover the variable from a + // different location. + + // Inform TTracker about any direct clobbers. + for (uint32_t DeadReg : DeadRegs) { + LocIdx Loc = MTracker->lookupOrTrackRegister(DeadReg); + TTracker->clobberMloc(Loc, MI.getIterator(), false); + } + + // Look for any clobbers performed by a register mask. Only test locations + // that are actually being tracked. + for (auto L : MTracker->locations()) { + // Stack locations can't be clobbered by regmasks. + if (MTracker->isSpill(L.Idx)) + continue; + + Register Reg = MTracker->LocIdxToLocID[L.Idx]; + for (auto *MO : RegMaskPtrs) + if (MO->clobbersPhysReg(Reg)) + TTracker->clobberMloc(L.Idx, MI.getIterator(), false); + } } void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { @@ -2046,8 +2109,12 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { if (TTracker) { Optional MLoc = MTracker->getSpillMLoc(*Loc); - if (MLoc) + if (MLoc) { + // Un-set this location before clobbering, so that we don't salvage + // the variable location back to the same place. + MTracker->setMLoc(*MLoc, ValueIDNum::EmptyValue); TTracker->clobberMloc(*MLoc, MI.getIterator()); + } } } @@ -2162,6 +2229,15 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { if (EmulateOldLDV && SrcReg != DestReg) MTracker->defReg(SrcReg, CurBB, CurInst); + // Finally, the copy might have clobbered variables based on the destination + // register. Tell TTracker about it, in case a backup location exists. + if (TTracker) { + for (MCRegAliasIterator RAI(DestReg, TRI, true); RAI.isValid(); ++RAI) { + LocIdx ClobberedLoc = MTracker->getRegMLoc(*RAI); + TTracker->clobberMloc(ClobberedLoc, MI.getIterator(), false); + } + } + return true; } diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 770c46ec84369..38e803d1abb55 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" /// \file LiveDebugValues.cpp @@ -33,6 +34,12 @@ using namespace llvm; +static cl::opt + ForceInstrRefLDV("force-instr-ref-livedebugvalues", cl::Hidden, + cl::desc("Use instruction-ref based LiveDebugValues with " + "normal DBG_VALUE inputs"), + cl::init(false)); + /// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or /// InstrRefBasedLDV to perform location propagation, via the LDVImpl /// base class. @@ -87,6 +94,9 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { InstrRefBased = TM.Options.ValueTrackingVariableLocations; } + // Allow the user to force selection of InstrRef LDV. + InstrRefBased |= ForceInstrRefLDV; + if (InstrRefBased) TheImpl = llvm::makeInstrRefBasedLiveDebugValues(); else diff --git a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir index 9f77add7bc137..578cac9dc0ec4 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir @@ -42,7 +42,7 @@ body: | $rbx = COPY killed $rax, debug-location !17 $rax = MOV64ri 1, debug-location !17 - ; Presently, this COPY isn't followed. Dealing with that is future work. + ; CHECK: DBG_VALUE $rbx, $noreg DBG_INSTR_REF 2, 0, !16, !DIExpression(), debug-location !17 ; No instruction is labelled with the number "2". This should produce an diff --git a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_recover_clobbers.mir b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_recover_clobbers.mir new file mode 100644 index 0000000000000..3d45a548e26e9 --- /dev/null +++ b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_recover_clobbers.mir @@ -0,0 +1,100 @@ +--- | + ; RUN: llc %s -march=x86-64 -run-pass=livedebugvalues -o - -force-instr-ref-livedebugvalues=1 -emulate-old-livedebugvalues=0 | FileCheck %s -implicit-check-not=DBG_VALUE + + ;; When using instruction referencing LiveDebugValues, when a register gets + ;; clobbered, we should transfer variable locations to backup locations, if + ;; one is available. + ;; I've written this test in terms of DBG_VALUEs rather than DBG_INSTR_REFs + ;; as this is purely a LiveDebugValues feature, and should work without the + ;; need to use any other instructoin referencing work. + + declare i32 @use() local_unnamed_addr; + + define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 { + entry: + br label %bb1, !dbg !17 + bb1: + br label %bb2, !dbg !17 + bb2: + br label %bb3, !dbg !17 + bb3: + br label %bb3, !dbg !17 + bb4: + br label %bb3, !dbg !17 + bb5: + ret i32 0, !dbg !17 + } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!7, !8, !9, !10} + !llvm.ident = !{!11} + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 10.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3, debugInfoForProfiling: true, nameTableKind: None) + !1 = !DIFile(filename: "main.cpp", directory: "F:\") + !2 = !{} + !3 = !{!4} + !4 = !DIGlobalVariableExpression(var: !5, expr: !DIExpression()) + !5 = distinct !DIGlobalVariable(name: "start", scope: !0, file: !1, line: 4, type: !6, isLocal: false, isDefinition: true) + !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !7 = !{i32 2, !"Dwarf Version", i32 4} + !8 = !{i32 2, !"Debug Info Version", i32 3} + !9 = !{i32 1, !"wchar_size", i32 2} + !10 = !{i32 7, !"PIC Level", i32 2} + !11 = !{!"clang version 10.0.0"} + !12 = distinct !DISubprogram(name: "bb_to_bb", linkageName: "bb_to_bb", scope: !1, file: !1, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) + !13 = !DISubroutineType(types: !14) + !14 = !{!6, !6} + !15 = !{!16} + !16 = !DILocalVariable(name: "myVar", scope: !12, file: !1, line: 7, type: !6) + !17 = !DILocation(line: 10, scope: !12) + +... +--- +name: _Z8bb_to_bb +stack: + - { id: 0, type: spill-slot, offset: -12, size: 4, alignment: 4 } +body: | + bb.0.entry: + $eax = MOV32ri 0, debug-location !17 + $eax = COPY $ebx + DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 + ;; Over-write eax, we should recover its location as being in ebx. + $eax = MOV32ri 0, debug-location !17 + + ; CHECK: DBG_VALUE $eax + ; CHECK-NEXT: $eax = MOV32ri 0 + ; CHECK-NEXT: DBG_VALUE $ebx + + ;; The same should occur for spills. + $ebx = MOV32ri 2, debug-location !17 + MOV32mr $rsp, 1, _, -12, _, killed $ebx :: (store 4 into %stack.0) + DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + $ebx = MOV32ri 0, debug-location !17 + + ; CHECK-NEXT: $ebx = MOV32ri 2 + ; CHECK-NEXT: MOV32mr $rsp + ; CHECK-NEXT: DBG_VALUE $ebx + ; CHECK-NEXT: $ebx = MOV32ri + ; CHECK-NEXT: DBG_VALUE $rsp + + ;; Now test copies and register masks. + $eax = COPY $ebx + DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17 + ;; Overwrite ebx with a copy. + $ecx = MOV32ri 1, debug-location !17 + $ebx = COPY $ecx + + ; CHECK: DBG_VALUE $ebx + ; CHECK-NEXT: $ecx = MOV32ri + ; CHECK-NEXT: $ebx = COPY + ; CHECK-NEXT: DBG_VALUE $eax + + ;; Similarly, with a register mask + $ebx = COPY $eax + CALL64pcrel32 @use, csr_64, implicit $rsp, implicit $edi, implicit-def $rsp, debug-location !17 + + ; CHECK-NEXT: $ebx = COPY $eax + ; CHECK-NEXT: CALL64pcrel32 + ; CHECK-NEXT: DBG_VALUE $ebx + + RETQ $eax, debug-location !17 +... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir index 97af3bf502196..c3ef29b528beb 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir @@ -11,6 +11,7 @@ ; CHECK-LABEL: bb.0.entry: ; CHECK: DBG_VALUE $rdi, $noreg, !16, !DIExpression() + ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() ; CHECK-LABEL: bb.1.bb1: ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() ; CHECK: DBG_VALUE $rbp, $noreg, !16, !DIExpression() From 0596f7d828436e7db85154f2815eb3ff32d505af Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 30 Jun 2021 15:57:43 +0000 Subject: [PATCH 303/619] [gn build] Port 0c96a92d8666 --- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index 72d7bf34de38c..bf6c59417907c 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -86,6 +86,7 @@ static_library("clangd") { "ExpectedTypes.cpp", "FS.cpp", "FeatureModule.cpp", + "Features.cpp", "FileDistance.cpp", "FindSymbols.cpp", "FindTarget.cpp", From 439284194959a896eab273c5c2cda2b983aa85af Mon Sep 17 00:00:00 2001 From: thomasraoux Date: Wed, 30 Jun 2021 00:02:47 -0700 Subject: [PATCH 304/619] [mlir][VectorToGPU] Support converting vetor.broadcast to MMA op Differential Revision: https://reviews.llvm.org/D105175 --- .../Conversion/VectorToGPU/VectorToGPU.cpp | 33 +++++++++++++++++-- .../VectorToGPU/vector-to-mma-ops.mlir | 18 ++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp index 869301fe260bb..7298b93aae707 100644 --- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -123,6 +123,12 @@ static bool constantSupportsMMAMatrixType(ConstantOp constantOp) { return constantOp.value().isa(); } +/// Return true if this is a broadcast from scalar to a 2D vector. +static bool broadcastSupportsMMAMatrixType(vector::BroadcastOp broadcastOp) { + return broadcastOp.getVectorType().getRank() == 2 && + broadcastOp.source().getType().isa(); +} + static bool supportsMMaMatrixType(Operation *op) { if (isa(op)) return true; @@ -134,6 +140,8 @@ static bool supportsMMaMatrixType(Operation *op) { return contractSupportsMMAMatrixType(contract); if (auto constant = dyn_cast(op)) return constantSupportsMMAMatrixType(constant); + if (auto broadcast = dyn_cast(op)) + return broadcastSupportsMMAMatrixType(broadcast); return false; } @@ -141,8 +149,11 @@ static bool supportsMMaMatrixType(Operation *op) { // slice can be converted to MMA operations. static SetVector getOpToConvert(mlir::Operation *op) { auto hasVectorDest = [](Operation *op) { - return op->getNumResults() == 0 || - llvm::any_of(op->getResultTypes(), + return llvm::any_of(op->getResultTypes(), + [](Type t) { return t.isa(); }); + }; + auto hasVectorSrc = [](Operation *op) { + return llvm::any_of(op->getOperandTypes(), [](Type t) { return t.isa(); }); }; SetVector opToConvert; @@ -150,7 +161,7 @@ static SetVector getOpToConvert(mlir::Operation *op) { if (opToConvert.contains(contract.getOperation())) return; SetVector dependentOps = - getSlice(contract, hasVectorDest, hasVectorDest); + getSlice(contract, hasVectorDest, hasVectorSrc); // If any instruction cannot use MMA matrix type drop the whole // chaine. MMA matrix are stored in an opaque type so they cannot be used // by all operations. @@ -329,6 +340,20 @@ static void convertConstantOp(ConstantOp op, valueMapping[op.getResult()] = matrix; } +/// Convert a vector.broadcast from scalar to a SubgroupMmaConstantMatrix op. +static void convertBroadcastOp(vector::BroadcastOp op, + llvm::DenseMap &valueMapping) { + assert(broadcastSupportsMMAMatrixType(op)); + OpBuilder b(op); + const char *fragType = inferFragType(op); + auto vecType = op.getVectorType(); + gpu::MMAMatrixType type = gpu::MMAMatrixType::get( + vecType.getShape(), vecType.getElementType(), llvm::StringRef(fragType)); + auto matrix = b.create(op.getLoc(), type, + op.source()); + valueMapping[op.getResult()] = matrix; +} + // Replace ForOp with a new ForOp with extra operands. The YieldOp is not // updated and needs to be updated separatly for the loop to be correct. static scf::ForOp replaceForOpWithNewSignature(OpBuilder &b, scf::ForOp loop, @@ -416,6 +441,8 @@ void convertVectorToMMAOps(FuncOp funcOp) { convertContractOp(contractOp, valueMapping); } else if (auto constantOp = dyn_cast(op)) { convertConstantOp(constantOp, valueMapping); + } else if (auto broadcastOp = dyn_cast(op)) { + convertBroadcastOp(broadcastOp, valueMapping); } else if (auto forOp = dyn_cast(op)) { convertForOp(forOp, valueMapping); } else if (auto yiledOp = dyn_cast(op)) { diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir index a7fa5796efc58..db7087fe54573 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir @@ -41,6 +41,24 @@ func @matmul_cst(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memr return } +// CHECK-LABEL: func @matmul_broadcast +// CHECK-SAME: (%{{.*}}: memref<16x16xf16>, %{{.*}}: memref<16x16xf16>, %{{.*}}: memref<16x16xf16>, %[[F:.*]]: f16) +// CHECK-DAG: %[[C:.+]] = gpu.subgroup_mma_constant_matrix %[[F]] : !gpu.mma_matrix<16x16xf16, "COp"> +// CHECK-DAG: %[[A:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "AOp"> +// CHECK-DAG: %[[B:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "BOp"> +// CHECK: %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp"> +// CHECK: gpu.subgroup_mma_store_matrix %[[D]], %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<16x16xf16> +func @matmul_broadcast(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<16x16xf16>, %f: f16) { + %C = vector.broadcast %f : f16 to vector<16x16xf16> + %c0 = constant 0 : index + %cst = constant 0.000000e+00 : f16 + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %C : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> + vector.transfer_write %D, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + return +} + // CHECK-LABEL: func @matmul_loop // CHECK: %[[C:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 128 : index} : memref<128x128xf16> -> !gpu.mma_matrix<16x16xf16, "COp"> // CHECK: %[[ACC:.+]] = scf.for {{.*}} iter_args(%[[ACC1:.+]] = %[[C]]) -> (!gpu.mma_matrix<16x16xf16, "COp">) { From 0298f2cfb1df80741a08fb7cd1eec9da70ed3441 Mon Sep 17 00:00:00 2001 From: thomasraoux Date: Wed, 30 Jun 2021 00:00:11 -0700 Subject: [PATCH 305/619] [mlir] Fix wrong type in WmmaConstantOpToNVVMLowering InsertElement takes a scalar integer attribute not an array of integer. Differential Revision: https://reviews.llvm.org/D105174 --- mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp | 2 +- mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp index d46a185dec22c..d955673d4b898 100644 --- a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp @@ -371,7 +371,7 @@ struct WmmaConstantOpToNVVMLowering for (int64_t vecEl = 0; vecEl < vecType.getNumElements(); vecEl++) { Value idx = rewriter.create( loc, typeConverter->convertType(rewriter.getIntegerType(32)), - rewriter.getI32ArrayAttr(vecEl)); + rewriter.getI32IntegerAttr(vecEl)); vecCst = rewriter.create(loc, vecType, vecCst, cst, idx); } diff --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir index f692dffdfcbad..6eb641b0e897a 100644 --- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir @@ -160,9 +160,9 @@ gpu.module @test_module { // CHECK-LABEL: func @gpu_wmma_constant_op // CHECK: %[[CST:.+]] = llvm.mlir.constant(1.000000e+00 : f16) : f16 // CHECK: %[[V0:.+]] = llvm.mlir.undef : vector<2xf16> -// CHECK: %[[C0:.+]] = llvm.mlir.constant([0 : i32]) : i32 +// CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[V1:.+]] = llvm.insertelement %[[CST]], %[[V0]][%[[C0]] : i32] : vector<2xf16> -// CHECK: %[[C1:.+]] = llvm.mlir.constant([1 : i32]) : i32 +// CHECK: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[V2:.+]] = llvm.insertelement %[[CST]], %[[V1]][%[[C1]] : i32] : vector<2xf16> // CHECK: %[[M0:.+]] = llvm.mlir.undef : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[M1:.+]] = llvm.insertvalue %[[V2]], %[[M0]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> From 05915400b7f9933b95686116f2dc1370e7f96cfb Mon Sep 17 00:00:00 2001 From: Caroline Tice Date: Tue, 29 Jun 2021 14:50:10 -0700 Subject: [PATCH 306/619] [lldb] Replace SVE_PT* macros in NativeRegisterContextLinux_arm64.{cpp,h} with their equivalent defintions in LinuxPTraceDefines_arm64sve.h Commit 090306fc80dbf (August 2020) changed most of the arm64 SVE_PT* macros, but apparently did not make the changes in the NativeRegisterContextLinux_arm64.* files (or those files were pulled over from someplace else after that commit). This change replaces the macros NativeRegisterContextLinux_arm64.cpp with the replacement definitions in LinuxPTraceDefines_arm64sve.h. It also includes LinuxPTraceDefines_arm64sve.h in NativeRegisterContextLinux_arm64.h. Differential Revision: https://reviews.llvm.org/D104826 --- .../NativeRegisterContextLinux_arm64.cpp | 36 ++++++++++--------- .../Linux/NativeRegisterContextLinux_arm64.h | 3 +- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp index f78c0d2bb32fe..a0672a635937f 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp @@ -58,7 +58,7 @@ NativeRegisterContextLinux::CreateHostNativeRegisterContextLinux( switch (target_arch.GetMachine()) { case llvm::Triple::arm: return std::make_unique(target_arch, - native_thread); + native_thread); case llvm::Triple::aarch64: { // Configure register sets supported by this AArch64 target. // Read SVE header to check for SVE support. @@ -207,15 +207,15 @@ NativeRegisterContextLinux_arm64::ReadRegister(const RegisterInfo *reg_info, if (reg == GetRegisterInfo().GetRegNumFPSR()) { sve_reg_num = reg; if (m_sve_state == SVEState::Full) - offset = SVE_PT_SVE_FPSR_OFFSET(sve_vq_from_vl(m_sve_header.vl)); + offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl)); else if (m_sve_state == SVEState::FPSIMD) - offset = SVE_PT_FPSIMD_OFFSET + (32 * 16); + offset = sve::ptrace_fpsimd_offset + (32 * 16); } else if (reg == GetRegisterInfo().GetRegNumFPCR()) { sve_reg_num = reg; if (m_sve_state == SVEState::Full) - offset = SVE_PT_SVE_FPCR_OFFSET(sve_vq_from_vl(m_sve_header.vl)); + offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl)); else if (m_sve_state == SVEState::FPSIMD) - offset = SVE_PT_FPSIMD_OFFSET + (32 * 16) + 4; + offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4; } else { // Extract SVE Z register value register number for this reg_info if (reg_info->value_regs && @@ -341,15 +341,15 @@ Status NativeRegisterContextLinux_arm64::WriteRegister( if (reg == GetRegisterInfo().GetRegNumFPSR()) { sve_reg_num = reg; if (m_sve_state == SVEState::Full) - offset = SVE_PT_SVE_FPSR_OFFSET(sve_vq_from_vl(m_sve_header.vl)); + offset = sve::PTraceFPSROffset(sve::vq_from_vl(m_sve_header.vl)); else if (m_sve_state == SVEState::FPSIMD) - offset = SVE_PT_FPSIMD_OFFSET + (32 * 16); + offset = sve::ptrace_fpsimd_offset + (32 * 16); } else if (reg == GetRegisterInfo().GetRegNumFPCR()) { sve_reg_num = reg; if (m_sve_state == SVEState::Full) - offset = SVE_PT_SVE_FPCR_OFFSET(sve_vq_from_vl(m_sve_header.vl)); + offset = sve::PTraceFPCROffset(sve::vq_from_vl(m_sve_header.vl)); else if (m_sve_state == SVEState::FPSIMD) - offset = SVE_PT_FPSIMD_OFFSET + (32 * 16) + 4; + offset = sve::ptrace_fpsimd_offset + (32 * 16) + 4; } else { // Extract SVE Z register value register number for this reg_info if (reg_info->value_regs && @@ -824,19 +824,21 @@ void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() { if (error.Success()) { // If SVE is enabled thread can switch between SVEState::FPSIMD and // SVEState::Full on every stop. - if ((m_sve_header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) + if ((m_sve_header.flags & sve::ptrace_regs_mask) == + sve::ptrace_regs_fpsimd) m_sve_state = SVEState::FPSIMD; - else if ((m_sve_header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE) + else if ((m_sve_header.flags & sve::ptrace_regs_mask) == + sve::ptrace_regs_sve) m_sve_state = SVEState::Full; // On every stop we configure SVE vector length by calling // ConfigureVectorLength regardless of current SVEState of this thread. uint32_t vq = RegisterInfoPOSIX_arm64::eVectorQuadwordAArch64SVE; if (sve_vl_valid(m_sve_header.vl)) - vq = sve_vq_from_vl(m_sve_header.vl); + vq = sve::vq_from_vl(m_sve_header.vl); GetRegisterInfo().ConfigureVectorLength(vq); - m_sve_ptrace_payload.resize(SVE_PT_SIZE(vq, SVE_PT_REGS_SVE)); + m_sve_ptrace_payload.resize(sve::PTraceSize(vq, sve::ptrace_regs_sve)); } } } @@ -852,19 +854,19 @@ uint32_t NativeRegisterContextLinux_arm64::CalculateSVEOffset( uint32_t sve_reg_offset = LLDB_INVALID_INDEX32; if (m_sve_state == SVEState::FPSIMD) { const uint32_t reg = reg_info->kinds[lldb::eRegisterKindLLDB]; - sve_reg_offset = - SVE_PT_FPSIMD_OFFSET + (reg - GetRegisterInfo().GetRegNumSVEZ0()) * 16; + sve_reg_offset = sve::ptrace_fpsimd_offset + + (reg - GetRegisterInfo().GetRegNumSVEZ0()) * 16; } else if (m_sve_state == SVEState::Full) { uint32_t sve_z0_offset = GetGPRSize() + 16; sve_reg_offset = - SVE_SIG_REGS_OFFSET + reg_info->byte_offset - sve_z0_offset; + sve::SigRegsOffset() + reg_info->byte_offset - sve_z0_offset; } return sve_reg_offset; } void *NativeRegisterContextLinux_arm64::GetSVEBuffer() { if (m_sve_state == SVEState::FPSIMD) - return m_sve_ptrace_payload.data() + SVE_PT_FPSIMD_OFFSET; + return m_sve_ptrace_payload.data() + sve::ptrace_fpsimd_offset; return m_sve_ptrace_payload.data(); } diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h index f19b047380ce4..6b56660fb80cd 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h @@ -12,6 +12,7 @@ #define lldb_NativeRegisterContextLinux_arm64_h #include "Plugins/Process/Linux/NativeRegisterContextLinux.h" +#include "Plugins/Process/Utility/LinuxPTraceDefines_arm64sve.h" #include "Plugins/Process/Utility/NativeRegisterContextDBReg_arm64.h" #include "Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h" @@ -90,7 +91,7 @@ class NativeRegisterContextLinux_arm64 m_fpr; // floating-point registers including extended register sets. SVEState m_sve_state; - struct user_sve_header m_sve_header; + struct sve::user_sve_header m_sve_header; std::vector m_sve_ptrace_payload; bool m_refresh_hwdebug_info; From 570984204f24c326699dedcc05793b77b013f068 Mon Sep 17 00:00:00 2001 From: David Goldman Date: Fri, 11 Jun 2021 10:16:19 -0400 Subject: [PATCH 307/619] [clangd] Fix highlighting for implicit ObjC property refs Objective-C lets you use the `self.prop` syntax as sugar for both `[self prop]` and `[self setProp:]`, but clangd previously did not provide a semantic token for `prop`. Now, we provide a semantic token, treating it like a normal property except it's backed by a `ObjCMethodDecl` instead of a `ObjCPropertyDecl`. Differential Revision: https://reviews.llvm.org/D104117 --- .../clangd/SemanticHighlighting.cpp | 36 +++++++++++++++++++ .../unittests/SemanticHighlightingTests.cpp | 7 +++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index bb192596f8c52..b49eb785f2deb 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -556,6 +556,42 @@ class CollectExtraHighlightings return true; } + // Objective-C allows you to use property syntax `self.prop` as sugar for + // `[self prop]` and `[self setProp:]` when there's no explicit `@property` + // for `prop` as well as for class properties. We treat this like a property + // even though semantically it's equivalent to a method expression. + void highlightObjCImplicitPropertyRef(const ObjCMethodDecl *OMD, + SourceLocation Loc) { + auto &Tok = H.addToken(Loc, HighlightingKind::Field) + .addModifier(HighlightingModifier::ClassScope); + if (OMD->isClassMethod()) + Tok.addModifier(HighlightingModifier::Static); + if (isDefaultLibrary(OMD)) + Tok.addModifier(HighlightingModifier::DefaultLibrary); + } + + bool VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *OPRE) { + // We need to handle implicit properties here since they will appear to + // reference `ObjCMethodDecl` via an implicit `ObjCMessageExpr`, so normal + // highlighting will not work. + if (!OPRE->isImplicitProperty()) + return true; + // A single property expr can reference both a getter and setter, but we can + // only provide a single semantic token, so prefer the getter. In most cases + // the end result should be the same, although it's technically possible + // that the user defines a setter for a system SDK. + if (OPRE->isMessagingGetter()) { + highlightObjCImplicitPropertyRef(OPRE->getImplicitPropertyGetter(), + OPRE->getLocation()); + return true; + } + if (OPRE->isMessagingSetter()) { + highlightObjCImplicitPropertyRef(OPRE->getImplicitPropertySetter(), + OPRE->getLocation()); + } + return true; + } + bool VisitOverloadExpr(OverloadExpr *E) { if (!E->decls().empty()) return true; // handled by findExplicitReferences. diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index 02ab6bef0a817..a0212856427d6 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -696,11 +696,16 @@ sizeof...($TemplateParameter[[Elements]]); int $Field_decl[[_someProperty]]; } @property(nonatomic, assign) int $Field_decl[[someProperty]]; + @property(readonly, class) $Class[[Foo]] *$Field_decl_readonly_static[[sharedInstance]]; @end @implementation $Class_decl[[Foo]] @synthesize someProperty = _someProperty; + - (int)$Method_decl[[otherMethod]] { + return 0; + } - (int)$Method_decl[[doSomething]] { - self.$Field[[someProperty]] = self.$Field[[someProperty]] + 1; + $Class[[Foo]].$Field_static[[sharedInstance]].$Field[[someProperty]] = 1; + self.$Field[[someProperty]] = self.$Field[[someProperty]] + self.$Field[[otherMethod]] + 1; self->$Field[[_someProperty]] = $Field[[_someProperty]] + 1; } @end From 7b06bfc49ec9d560fa50996ecf7f16b7c2f47c9d Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 30 Jun 2021 09:43:28 -0700 Subject: [PATCH 308/619] [ELF] -pie: produce dynamic relocations for absolute relocations referencing undef weak See the comment for my understanding of -no-pie and -shared expectation. -no-pie has freedom on choices. We choose dynamic relocations to be consistent with the handling of GOT-generating relocations. Note: GNU ld has arch-varying behaviors and its x86 -pie has a very complex rule: if there is at least one GOT-generating or PLT-generating relocation and -z dynamic-undefined-weak (enabled by default) is in effect, generate a dynamic relocation. We don't emulate its rule. Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D105164 --- lld/ELF/Relocations.cpp | 15 +++++++++++---- lld/test/ELF/weak-undef-rw.s | 19 +++++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index ce08ead3fa539..85755985146cb 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1147,11 +1147,18 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, // relocation will be created, pass the control to relocateAlloc() or // relocateNonAlloc() to resolve it. // - // The behavior of an undefined weak reference is implementation defined. If - // the relocation is to a weak undef, and we are producing an executable, let - // relocate{,Non}Alloc() resolve it. + // The behavior of an undefined weak reference is implementation defined. For + // non-link-time constants, we resolve relocations statically (let + // relocate{,Non}Alloc() resolve them) for -no-pie and try producing dynamic + // relocations for -pie and -shared. + // + // The general expectation of -no-pie static linking is that there is no + // dynamic relocation (except IRELATIVE). Emitting dynamic relocations for + // -shared matches the spirit of its -z undefs default. -pie has freedom on + // choices, and we choose dynamic relocations to be consistent with the + // handling of GOT-generating relocations. if (isStaticLinkTimeConstant(expr, type, sym, sec, offset) || - (!config->shared && sym.isUndefWeak())) { + (!config->isPic && sym.isUndefWeak())) { sec.relocations.push_back({expr, type, offset, addend, &sym}); return; } diff --git a/lld/test/ELF/weak-undef-rw.s b/lld/test/ELF/weak-undef-rw.s index e8d9515305c7d..26cc74788bc24 100644 --- a/lld/test/ELF/weak-undef-rw.s +++ b/lld/test/ELF/weak-undef-rw.s @@ -1,13 +1,24 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: ld.lld %t.o -o %t --export-dynamic -# RUN: llvm-readelf -r %t | FileCheck %s +# RUN: llvm-readelf -r %t | FileCheck %s --check-prefix=NOPIC +# RUN: ld.lld %t.o -o %t.pie -pie +# RUN: llvm-readobj -r %t.pie | FileCheck %s --check-prefix=PIC +# RUN: ld.lld %t.o -o %t.so -shared +# RUN: llvm-readobj -r %t.so | FileCheck %s --check-prefix=PIC ## gABI leaves the behavior of weak undefined references implementation defined. -## We choose to resolve it statically and not create a dynamic relocation for -## implementation simplicity. This also matches ld.bfd and gold. +## We choose to resolve them statically for -no-pie and produce dynamic relocations +## for -pie and -shared. +## +## Note: Some ports of GNU ld support -z nodynamic-undefined-weak that we don't +## implement. -# CHECK: no relocations +# NOPIC: no relocations + +# PIC: .rela.dyn { +# PIC-NEXT: R_X86_64_64 foobar 0x0 +# PIC-NEXT: } .global _start _start: From b56e5f8a10c1ec4fd3750bdd269fbad778820326 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 30 Jun 2021 12:52:01 -0400 Subject: [PATCH 309/619] [clangd] Unbreak mac build after 0c96a92d8666b8 That commit removed the include of Features.inc from ClangdLSPServer.h, but ClangdMain.cpp relied on this include to pull in Features.inc for the #if at the bottom of Transport.h. Since the include is needed in Transport.h, just add it to there directly. --- clang-tools-extra/clangd/Transport.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/clangd/Transport.h b/clang-tools-extra/clangd/Transport.h index ae6da722d91b1..b3db4eba85f93 100644 --- a/clang-tools-extra/clangd/Transport.h +++ b/clang-tools-extra/clangd/Transport.h @@ -18,6 +18,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRANSPORT_H_ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRANSPORT_H_ +#include "Features.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" From f6db88535cb4c47bec8cf117f549f4ad27dcd6d6 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 30 Jun 2021 12:58:59 -0400 Subject: [PATCH 310/619] [gn build] add dep needed after b56e5f8a10c1e --- llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn index 0d375392ae257..921e0dbedeb54 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn @@ -16,6 +16,7 @@ static_library("transport") { deps = [ ":conversions", "//clang-tools-extra/clangd", + "//clang-tools-extra/clangd:features", "//clang-tools-extra/clangd/support", "//llvm/lib/Support", ] From 434bd5bf5479615ea42e64a80ff994059f31f5f4 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Sat, 21 Mar 2020 17:06:39 -0400 Subject: [PATCH 311/619] [AMDGPU] Add builtin functions image_bvh_intersect_ray Reviewed by: Stanislav Mekhanoshin, Matt Arsenault Differential Revision: https://reviews.llvm.org/D104946 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 11 ++++ clang/lib/CodeGen/CGBuiltin.cpp | 17 ++++++ .../builtins-amdgcn-raytracing.cl | 61 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f9d079accb56f..3570431d952cb 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -215,6 +215,17 @@ TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts") TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts") +//===----------------------------------------------------------------------===// +// Raytracing builtins. +// By default the 1st argument is i32 and the 4/5-th arguments are float4. +// Postfix l indicates the 1st argument is i64. +// Postfix h indicates the 4/5-th arguments are half4. +//===----------------------------------------------------------------------===// +TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray, "V4UiUifV4fV4fV4fV4Ui", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_h, "V4UiUifV4fV4hV4hV4Ui", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_l, "V4UiWUifV4fV4fV4fV4Ui", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_lh, "V4UiWUifV4fV4hV4hV4Ui", "nc", "gfx10-insts") + //===----------------------------------------------------------------------===// // Special builtins. //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0e13b55b0cc57..9061abceab3a0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15850,6 +15850,23 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CI->setConvergent(); return CI; } + case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: + case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: + case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: + case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: { + llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0)); + llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1)); + llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2)); + llvm::Value *RayDir = EmitScalarExpr(E->getArg(3)); + llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4)); + llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5)); + + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray, + {NodePtr->getType(), RayDir->getType()}); + return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir, + RayInverseDir, TextureDescr}); + } + // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl new file mode 100644 index 0000000000000..805d17a392b31 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raytracing.cl @@ -0,0 +1,61 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 -S \ +// RUN: -emit-llvm -cl-std=CL2.0 -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 -S \ +// RUN: -cl-std=CL2.0 -o - %s | FileCheck -check-prefix=ISA %s + +// Test llvm.amdgcn.image.bvh.intersect.ray intrinsic. + +// The clang builtin functions __builtin_amdgcn_image_bvh_intersect_ray* use +// postfixes to indicate the types of the 1st, 4th, and 5th arguments. +// By default, the 1st argument is i32, the 4/5-th arguments are float4. +// Postfix l indicates the 1st argument is i64 and postfix h indicates +// the 4/5-th arguments are half4. + +typedef unsigned int uint; +typedef unsigned long ulong; +typedef float float4 __attribute__((ext_vector_type(4))); +typedef double double4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); +typedef uint uint4 __attribute__((ext_vector_type(4))); + +// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32 +// ISA: image_bvh_intersect_ray +void test_image_bvh_intersect_ray(global uint4* out, uint node_ptr, + float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, + uint4 texture_descr) +{ + *out = __builtin_amdgcn_image_bvh_intersect_ray(node_ptr, ray_extent, + ray_origin, ray_dir, ray_inv_dir, texture_descr); +} + +// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16 +// ISA: image_bvh_intersect_ray +void test_image_bvh_intersect_ray_h(global uint4* out, uint node_ptr, + float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, + uint4 texture_descr) +{ + *out = __builtin_amdgcn_image_bvh_intersect_ray_h(node_ptr, ray_extent, + ray_origin, ray_dir, ray_inv_dir, texture_descr); +} + +// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32 +// ISA: image_bvh_intersect_ray +void test_image_bvh_intersect_ray_l(global uint4* out, ulong node_ptr, + float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, + uint4 texture_descr) +{ + *out = __builtin_amdgcn_image_bvh_intersect_ray_l(node_ptr, ray_extent, + ray_origin, ray_dir, ray_inv_dir, texture_descr); +} + +// CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16 +// ISA: image_bvh_intersect_ray +void test_image_bvh_intersect_ray_lh(global uint4* out, ulong node_ptr, + float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, + uint4 texture_descr) +{ + *out = __builtin_amdgcn_image_bvh_intersect_ray_lh(node_ptr, ray_extent, + ray_origin, ray_dir, ray_inv_dir, texture_descr); +} + From f617ab10445148ae44d67484f9dc9486efcfbcc8 Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Fri, 25 Jun 2021 14:37:02 +0100 Subject: [PATCH 312/619] [NPM] Resolve llvmGetPassPluginInfo to the plugin being loaded Dynamically loaded plugins for the new pass manager are initialised by calling llvmGetPassPluginInfo. This is defined as a weak symbol so that it is continually redefined by each plugin that is loaded. When loading a plugin from a shared library, the intention is that llvmGetPassPluginInfo will be resolved to the definition in the most recent plugin. However, using a global search for this resolution can fail in situations where multiple plugins are loaded. Currently: * If a plugin does not define llvmGetPassPluginInfo, then it will be silently resolved to the previous plugin's definition. * If loading the same plugin twice with another in between, e.g. plugin A/plugin B/plugin A, then the second load of plugin A will resolve to llvmGetPassPluginInfo in plugin B. * The previous case can also occur when a dynamic library defines both NPM and legacy plugins; the legacy plugins are loaded first and then with `-fplugin=A -fpass-plugin=B -fpass-plugin=A`: A will be loaded as a legacy plugin and define llvmGetPassPluginInfo; B will be loaded and redefine it; and finally when A is loaded as an NPM plugin it will be resolved to the definition from B. Instead of searching globally, restrict the symbol lookup to the library that is currently being loaded. Differential Revision: https://reviews.llvm.org/D104916 --- llvm/lib/Passes/PassPlugin.cpp | 5 +- llvm/unittests/Passes/CMakeLists.txt | 26 ++++---- llvm/unittests/Passes/DoublerPlugin.cpp | 44 ++++++++++++++ llvm/unittests/Passes/PluginsTest.cpp | 79 +++++++++++++++++++++++++ llvm/unittests/Passes/TestPlugin.cpp | 28 ++++----- 5 files changed, 155 insertions(+), 27 deletions(-) create mode 100644 llvm/unittests/Passes/DoublerPlugin.cpp diff --git a/llvm/lib/Passes/PassPlugin.cpp b/llvm/lib/Passes/PassPlugin.cpp index ceefa25a703b6..6182cbbb1fddd 100644 --- a/llvm/lib/Passes/PassPlugin.cpp +++ b/llvm/lib/Passes/PassPlugin.cpp @@ -23,8 +23,11 @@ Expected PassPlugin::Load(const std::string &Filename) { inconvertibleErrorCode()); PassPlugin P{Filename, Library}; + + // llvmGetPassPluginInfo should be resolved to the definition from the plugin + // we are currently loading. intptr_t getDetailsFn = - (intptr_t)Library.SearchForAddressOfSymbol("llvmGetPassPluginInfo"); + (intptr_t)Library.getAddressOfSymbol("llvmGetPassPluginInfo"); if (!getDetailsFn) // If the symbol isn't found, this is probably a legacy plugin, which is an diff --git a/llvm/unittests/Passes/CMakeLists.txt b/llvm/unittests/Passes/CMakeLists.txt index 50bf3901ab1ea..075b47c8d07e6 100644 --- a/llvm/unittests/Passes/CMakeLists.txt +++ b/llvm/unittests/Passes/CMakeLists.txt @@ -1,5 +1,5 @@ # Needed by LLVM's CMake checks because this file defines multiple targets. -set(LLVM_OPTIONAL_SOURCES PluginsTest.cpp TestPlugin.cpp PassBuilderBindingsTest.cpp) +set(LLVM_OPTIONAL_SOURCES PluginsTest.cpp TestPlugin.cpp DoublerPlugin.cpp PassBuilderBindingsTest.cpp) # If plugins are disabled, this test will disable itself at runtime. Otherwise, # reconfiguring with plugins disabled will leave behind a stale executable. @@ -20,19 +20,20 @@ if (NOT WIN32) target_link_libraries(PluginsTests PRIVATE LLVMTestingSupport) set(LLVM_LINK_COMPONENTS) - add_llvm_library(TestPlugin MODULE BUILDTREE_ONLY - TestPlugin.cpp - ) + foreach(PLUGIN TestPlugin DoublerPlugin) + add_llvm_library(${PLUGIN} MODULE BUILDTREE_ONLY ${PLUGIN}.cpp) - # Put plugin next to the unit test executable. - set_output_directory(TestPlugin - BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR} - LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR} - ) - set_target_properties(TestPlugin PROPERTIES FOLDER "Tests") + # Put PLUGIN next to the unit test executable. + set_output_directory(${PLUGIN} + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR} + LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR} + ) + set_target_properties(${PLUGIN} PROPERTIES FOLDER "Tests") + + add_dependencies(${PLUGIN} intrinsics_gen) + add_dependencies(PluginsTests ${PLUGIN}) + endforeach() - add_dependencies(TestPlugin intrinsics_gen) - add_dependencies(PluginsTests TestPlugin) endif() set(LLVM_LINK_COMPONENTS Support Passes Core Target native AllTargetsInfos) @@ -40,3 +41,4 @@ add_llvm_unittest(PassesBindingsTests PassBuilderBindingsTest.cpp ) target_link_libraries(PassesBindingsTests PRIVATE LLVMTestingSupport) + diff --git a/llvm/unittests/Passes/DoublerPlugin.cpp b/llvm/unittests/Passes/DoublerPlugin.cpp new file mode 100644 index 0000000000000..57d48261796e2 --- /dev/null +++ b/llvm/unittests/Passes/DoublerPlugin.cpp @@ -0,0 +1,44 @@ +//===- unittests/Passes/DoublerPlugin.cpp +//--------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" + +using namespace llvm; + +struct DoublerModulePass : public PassInfoMixin { + + // Double the value of the initializer + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM) { + auto *GV = cast(M.getNamedValue("doubleme")); + auto *Init = GV->getInitializer(); + auto *Init2 = ConstantExpr::getAdd(Init, Init); + GV->setInitializer(Init2); + + return PreservedAnalyses::none(); + } + + static void registerCallbacks(PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &PM, + ArrayRef InnerPipeline) { + if (Name == "doubler-pass") { + PM.addPass(DoublerModulePass()); + return true; + } + return false; + }); + } +}; + +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "DoublerPlugin", "2.2-unit", + DoublerModulePass::registerCallbacks}; +} diff --git a/llvm/unittests/Passes/PluginsTest.cpp b/llvm/unittests/Passes/PluginsTest.cpp index 9fa5a0bdaf983..49fc8284f5e63 100644 --- a/llvm/unittests/Passes/PluginsTest.cpp +++ b/llvm/unittests/Passes/PluginsTest.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/AsmParser/Parser.h" #include "llvm/Config/config.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/PassManager.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" @@ -58,3 +60,80 @@ TEST(PluginsTests, LoadPlugin) { Plugin->registerPassBuilderCallbacks(PB); ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, "plugin-pass"), Succeeded()); } + +// Test that llvmGetPassPluginInfo from DoublerPlugin is called twice with +// -fpass-plugin=DoublerPlugin -fpass-plugin=TestPlugin +// -fpass-plugin=DoublerPlugin. +TEST(PluginsTests, LoadMultiplePlugins) { +#if !defined(LLVM_ENABLE_PLUGINS) + // Disable the test if plugins are disabled. + return; +#endif + + auto DoublerPluginPath = LibPath("DoublerPlugin"); + auto TestPluginPath = LibPath("TestPlugin"); + ASSERT_NE("", DoublerPluginPath); + ASSERT_NE("", TestPluginPath); + + Expected DoublerPlugin1 = PassPlugin::Load(DoublerPluginPath); + ASSERT_TRUE(!!DoublerPlugin1) + << "Plugin path: " << DoublerPlugin1->getFilename(); + + Expected TestPlugin = PassPlugin::Load(TestPluginPath); + ASSERT_TRUE(!!TestPlugin) << "Plugin path: " << TestPlugin->getFilename(); + + // If llvmGetPassPluginInfo is resolved as a weak symbol taking into account + // all loaded symbols, the second call to PassPlugin::Load will actually + // return the llvmGetPassPluginInfo from the most recently loaded plugin, in + // this case TestPlugin. + Expected DoublerPlugin2 = PassPlugin::Load(DoublerPluginPath); + ASSERT_TRUE(!!DoublerPlugin2) + << "Plugin path: " << DoublerPlugin2->getFilename(); + + ASSERT_EQ("DoublerPlugin", DoublerPlugin1->getPluginName()); + ASSERT_EQ("2.2-unit", DoublerPlugin1->getPluginVersion()); + ASSERT_EQ(TEST_PLUGIN_NAME, TestPlugin->getPluginName()); + ASSERT_EQ(TEST_PLUGIN_VERSION, TestPlugin->getPluginVersion()); + // Check that the plugin name/version is set correctly when loaded a second + // time + ASSERT_EQ("DoublerPlugin", DoublerPlugin2->getPluginName()); + ASSERT_EQ("2.2-unit", DoublerPlugin2->getPluginVersion()); + + PassBuilder PB; + ModulePassManager PM; + const char *PipelineText = "module(doubler-pass,plugin-pass,doubler-pass)"; + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Failed()); + TestPlugin->registerPassBuilderCallbacks(PB); + DoublerPlugin1->registerPassBuilderCallbacks(PB); + DoublerPlugin2->registerPassBuilderCallbacks(PB); + ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText), Succeeded()); + + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = + parseAssemblyString(R"IR(@doubleme = constant i32 7)IR", Err, C); + + // Check that the initial value is 7 + { + auto *GV = M->getNamedValue("doubleme"); + auto *Init = cast(GV)->getInitializer(); + auto *CI = cast(Init); + ASSERT_EQ(CI->getSExtValue(), 7); + } + + ModuleAnalysisManager MAM; + // Register required pass instrumentation analysis. + MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + PM.run(*M, MAM); + + // Check that the final value is 28 because DoublerPlugin::run was called + // twice, indicating that the llvmGetPassPluginInfo and registerCallbacks + // were correctly called. + { + // Check the value was doubled twice + auto *GV = M->getNamedValue("doubleme"); + auto *Init = cast(GV)->getInitializer(); + auto *CI = cast(Init); + ASSERT_EQ(CI->getSExtValue(), 28); + } +} diff --git a/llvm/unittests/Passes/TestPlugin.cpp b/llvm/unittests/Passes/TestPlugin.cpp index e0ae861f16fc1..edc71aaf3ff07 100644 --- a/llvm/unittests/Passes/TestPlugin.cpp +++ b/llvm/unittests/Passes/TestPlugin.cpp @@ -1,4 +1,4 @@ -//===- unittests/Passes/Plugins/Plugin.cpp --------------------------------===// +//===- unittests/Passes/TestPlugin.cpp --------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -17,22 +17,22 @@ struct TestModulePass : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM) { return PreservedAnalyses::all(); } -}; -void registerCallbacks(PassBuilder &PB) { - PB.registerPipelineParsingCallback( - [](StringRef Name, ModulePassManager &PM, - ArrayRef InnerPipeline) { - if (Name == "plugin-pass") { - PM.addPass(TestModulePass()); - return true; - } - return false; - }); -} + static void registerCallbacks(PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &PM, + ArrayRef InnerPipeline) { + if (Name == "plugin-pass") { + PM.addPass(TestModulePass()); + return true; + } + return false; + }); + } +}; extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return {LLVM_PLUGIN_API_VERSION, TEST_PLUGIN_NAME, TEST_PLUGIN_VERSION, - registerCallbacks}; + TestModulePass::registerCallbacks}; } From c4fc2cb5b2d98125e9035d9498640c7d6f17c8da Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 30 Jun 2021 10:18:38 -0700 Subject: [PATCH 313/619] [instcombine] umin(x, 1) == zext(x != 0) We already implemented this for the select form, but the intrinsic form was missing. Note that this doesn't change poison behavior as 1 is non-poison, and the optimized form is still poison exactly when x is. --- .../InstCombine/InstCombineCalls.cpp | 11 ++++- .../InstCombine/minmax-intrinsics.ll | 41 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 552de8b072e39..5060b45ad6b7f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -956,8 +956,17 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { break; } - case Intrinsic::umax: case Intrinsic::umin: { + Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); + // umin(x, 1) == zext(x != 0) + if (match(I1, m_One())) { + Value *Zero = Constant::getNullValue(I0->getType()); + Value *Cmp = Builder.CreateICmpNE(I0, Zero); + return CastInst::Create(Instruction::ZExt, Cmp, II->getType()); + } + LLVM_FALLTHROUGH; + } + case Intrinsic::umax: { Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); Value *X, *Y; if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) && diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index 3b2279a92bf7c..0dd429cb589ef 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -817,3 +817,44 @@ define i8 @clamp_two_vals_smin_smax_edge(i8 %x) { %r = call i8 @llvm.smax.i8(i8 %m, i8 127) ret i8 %r } + + +define i8 @umin_non_zero_idiom1(i8 %a) { +; CHECK-LABEL: @umin_non_zero_idiom1( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[A:%.*]], 0 +; CHECK-NEXT: [[RES:%.*]] = zext i1 [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[RES]] +; + %res = call i8 @llvm.umin.i8(i8 %a, i8 1) + ret i8 %res +} + +define i8 @umin_non_zero_idiom2(i8 %a) { +; CHECK-LABEL: @umin_non_zero_idiom2( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[A:%.*]], 0 +; CHECK-NEXT: [[RES:%.*]] = zext i1 [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[RES]] +; + %res = call i8 @llvm.umin.i8(i8 1, i8 %a) + ret i8 %res +} + +define <3 x i8> @umin_non_zero_idiom3(<3 x i8> %a) { +; CHECK-LABEL: @umin_non_zero_idiom3( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <3 x i8> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = zext <3 x i1> [[TMP1]] to <3 x i8> +; CHECK-NEXT: ret <3 x i8> [[RES]] +; + %res = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %a, <3 x i8> ) + ret <3 x i8> %res +} + +define <3 x i8> @umin_non_zero_idiom4(<3 x i8> %a) { +; CHECK-LABEL: @umin_non_zero_idiom4( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <3 x i8> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = zext <3 x i1> [[TMP1]] to <3 x i8> +; CHECK-NEXT: ret <3 x i8> [[RES]] +; + %res = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %a, <3 x i8> ) + ret <3 x i8> %res +} From 9474ddc3ac8637596f87dd796864353317622672 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Mon, 28 Jun 2021 23:52:01 +0000 Subject: [PATCH 314/619] [libc] Fix feclearexcept for x86_64. Previously, feclearexcept cleared all exceptions irrespective of the argument. This change brings it in line with the aarch64 flavors wherein only those exceptions listed in the argument will be cleared. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D105081 --- libc/test/src/fenv/CMakeLists.txt | 11 +++ libc/test/src/fenv/feclearexcept_test.cpp | 83 +++++++++++++++++++++++ libc/utils/FPUtil/x86_64/FEnv.h | 10 ++- 3 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 libc/test/src/fenv/feclearexcept_test.cpp diff --git a/libc/test/src/fenv/CMakeLists.txt b/libc/test/src/fenv/CMakeLists.txt index 851db03985698..1fa3e687e5cbb 100644 --- a/libc/test/src/fenv/CMakeLists.txt +++ b/libc/test/src/fenv/CMakeLists.txt @@ -60,6 +60,17 @@ add_libc_unittest( libc.utils.FPUtil.fputil ) +add_libc_unittest( + feclearexcept_test + SUITE + libc_fenv_unittests + SRCS + feclearexcept_test.cpp + DEPENDS + libc.src.fenv.feclearexcept + libc.utils.FPUtil.fputil +) + if (NOT LLVM_USE_SANITIZER) # Sanitizers don't like SIGFPE. So, we will run the # tests which raise SIGFPE only in non-sanitizer builds. diff --git a/libc/test/src/fenv/feclearexcept_test.cpp b/libc/test/src/fenv/feclearexcept_test.cpp new file mode 100644 index 0000000000000..dfdc29dc85fba --- /dev/null +++ b/libc/test/src/fenv/feclearexcept_test.cpp @@ -0,0 +1,83 @@ +//===-- Unittests for feclearexcept with exceptions enabled ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/fenv/feclearexcept.h" + +#include "utils/FPUtil/FEnv.h" +#include "utils/UnitTest/Test.h" + +#include +#include + +TEST(LlvmLibcFEnvTest, ClearTest) { + uint16_t excepts[] = {FE_DIVBYZERO, FE_INVALID, FE_INEXACT, FE_OVERFLOW, + FE_UNDERFLOW}; + __llvm_libc::fputil::disableExcept(FE_ALL_EXCEPT); + __llvm_libc::fputil::clearExcept(FE_ALL_EXCEPT); + + for (uint16_t e : excepts) + ASSERT_EQ(__llvm_libc::fputil::testExcept(e), 0); + + __llvm_libc::fputil::raiseExcept(FE_ALL_EXCEPT); + for (uint16_t e : excepts) { + // We clear one exception and test to verify that it was cleared. + __llvm_libc::feclearexcept(e); + ASSERT_EQ(uint16_t(__llvm_libc::fputil::testExcept(FE_ALL_EXCEPT)), + uint16_t(FE_ALL_EXCEPT & ~e)); + // After clearing, we raise the exception again. + __llvm_libc::fputil::raiseExcept(e); + } + + for (uint16_t e1 : excepts) { + for (uint16_t e2 : excepts) { + __llvm_libc::feclearexcept(e1 | e2); + ASSERT_EQ(uint16_t(__llvm_libc::fputil::testExcept(FE_ALL_EXCEPT)), + uint16_t(FE_ALL_EXCEPT & ~(e1 | e2))); + __llvm_libc::fputil::raiseExcept(e1 | e2); + } + } + + for (uint16_t e1 : excepts) { + for (uint16_t e2 : excepts) { + for (uint16_t e3 : excepts) { + __llvm_libc::feclearexcept(e1 | e2 | e3); + ASSERT_EQ(uint16_t(__llvm_libc::fputil::testExcept(FE_ALL_EXCEPT)), + uint16_t(FE_ALL_EXCEPT & ~(e1 | e2 | e3))); + __llvm_libc::fputil::raiseExcept(e1 | e2 | e3); + } + } + } + + for (uint16_t e1 : excepts) { + for (uint16_t e2 : excepts) { + for (uint16_t e3 : excepts) { + for (uint16_t e4 : excepts) { + __llvm_libc::feclearexcept(e1 | e2 | e3 | e4); + ASSERT_EQ(uint16_t(__llvm_libc::fputil::testExcept(FE_ALL_EXCEPT)), + uint16_t(FE_ALL_EXCEPT & ~(e1 | e2 | e3 | e4))); + __llvm_libc::fputil::raiseExcept(e1 | e2 | e3 | e4); + } + } + } + } + + for (uint16_t e1 : excepts) { + for (uint16_t e2 : excepts) { + for (uint16_t e3 : excepts) { + for (uint16_t e4 : excepts) { + for (uint16_t e5 : excepts) { + __llvm_libc::feclearexcept(e1 | e2 | e3 | e4 | e5); + ASSERT_EQ(uint16_t(__llvm_libc::fputil::testExcept(FE_ALL_EXCEPT)), + uint16_t(FE_ALL_EXCEPT & ~(e1 | e2 | e3 | e4 | e5))); + __llvm_libc::fputil::raiseExcept(e1 | e2 | e3 | e4 | e5); + } + } + } + } + } +} diff --git a/libc/utils/FPUtil/x86_64/FEnv.h b/libc/utils/FPUtil/x86_64/FEnv.h index f654f0bab2d51..cd11c388f41f4 100644 --- a/libc/utils/FPUtil/x86_64/FEnv.h +++ b/libc/utils/FPUtil/x86_64/FEnv.h @@ -188,12 +188,10 @@ static inline int disableExcept(int excepts) { } static inline int clearExcept(int excepts) { - // An instruction to write to x87 status word ins't available. So, we - // just clear all of the x87 exceptions. - // TODO: One can potentially use fegetenv/fesetenv to clear only the - // listed exceptions in the x87 status word. We can do this if it is - // really required. - internal::clearX87Exceptions(); + internal::X87StateDescriptor state; + internal::getX87StateDescriptor(state); + state.StatusWord &= ~internal::getStatusValueForExcept(excepts); + internal::writeX87StateDescriptor(state); uint32_t mxcsr = internal::getMXCSR(); mxcsr &= ~internal::getStatusValueForExcept(excepts); From 0c2f40f91622c57ff343a47594bd84a747afe6e3 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 30 Jun 2021 10:25:41 -0700 Subject: [PATCH 315/619] [instcombine] Precommit tests for umin(a,b) ne/eq 0 fold --- .../InstCombine/minmax-intrinsics.ll | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index 0dd429cb589ef..963c2bc476665 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -858,3 +858,49 @@ define <3 x i8> @umin_non_zero_idiom4(<3 x i8> %a) { %res = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %a, <3 x i8> ) ret <3 x i8> %res } + +define i1 @umin_eq_zero(i8 %a, i8 %b) { +; CHECK-LABEL: @umin_eq_zero( +; CHECK-NEXT: [[UMIN:%.*]] = call i8 @llvm.umin.i8(i8 [[A:%.*]], i8 [[B:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[UMIN]], 0 +; CHECK-NEXT: ret i1 [[RES]] +; + %umin = call i8 @llvm.umin.i8(i8 %a, i8 %b) + %res = icmp eq i8 %umin, 0 + ret i1 %res +} + +define <3 x i1> @umin_eq_zero2(<3 x i8> %a, <3 x i8> %b) { +; CHECK-LABEL: @umin_eq_zero2( +; CHECK-NEXT: [[UMIN:%.*]] = call <3 x i8> @llvm.umin.v3i8(<3 x i8> [[A:%.*]], <3 x i8> [[B:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = icmp eq <3 x i8> [[UMIN]], zeroinitializer +; CHECK-NEXT: ret <3 x i1> [[RES]] +; + + %umin = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %a, <3 x i8> %b) + %res = icmp eq <3 x i8> %umin, zeroinitializer + ret <3 x i1> %res +} + +define i1 @umin_ne_zero(i8 %a, i8 %b) { +; CHECK-LABEL: @umin_ne_zero( +; CHECK-NEXT: [[UMIN:%.*]] = call i8 @llvm.umin.i8(i8 [[A:%.*]], i8 [[B:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = icmp ne i8 [[UMIN]], 0 +; CHECK-NEXT: ret i1 [[RES]] +; + %umin = call i8 @llvm.umin.i8(i8 %a, i8 %b) + %res = icmp ne i8 %umin, 0 + ret i1 %res +} + +define <3 x i1> @umin_ne_zero2(<3 x i8> %a, <3 x i8> %b) { +; CHECK-LABEL: @umin_ne_zero2( +; CHECK-NEXT: [[UMIN:%.*]] = call <3 x i8> @llvm.umin.v3i8(<3 x i8> [[A:%.*]], <3 x i8> [[B:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = icmp ne <3 x i8> [[UMIN]], zeroinitializer +; CHECK-NEXT: ret <3 x i1> [[RES]] +; + + %umin = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %a, <3 x i8> %b) + %res = icmp ne <3 x i8> %umin, zeroinitializer + ret <3 x i1> %res +} From 804dc3dcf27d10d4cd0af06fdf2999ea81ba751f Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Tue, 29 Jun 2021 06:37:56 +0000 Subject: [PATCH 316/619] [libc] Clear all exceptions before setting in fesetexceptflag. Previously, exceptions from the flag were being added. This patch changes it such that only the exceptions in the flag will be set. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D105085 --- libc/src/fenv/fesetexceptflag.cpp | 1 + libc/test/src/fenv/exception_flags_test.cpp | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/libc/src/fenv/fesetexceptflag.cpp b/libc/src/fenv/fesetexceptflag.cpp index 1968ebea5a60b..9ee6205d3a6cc 100644 --- a/libc/src/fenv/fesetexceptflag.cpp +++ b/libc/src/fenv/fesetexceptflag.cpp @@ -21,6 +21,7 @@ LLVM_LIBC_FUNCTION(int, fesetexceptflag, static_assert(sizeof(int) >= sizeof(fexcept_t), "fexcept_t value cannot fit in an int value."); int excepts_to_set = static_cast(*flagp) & excepts; + fputil::clearExcept(FE_ALL_EXCEPT); return fputil::setExcept(excepts_to_set); } diff --git a/libc/test/src/fenv/exception_flags_test.cpp b/libc/test/src/fenv/exception_flags_test.cpp index e492a21c1b1e2..bb3ddaa7148ac 100644 --- a/libc/test/src/fenv/exception_flags_test.cpp +++ b/libc/test/src/fenv/exception_flags_test.cpp @@ -42,4 +42,15 @@ TEST(LlvmLibcFenvTest, GetExceptFlagAndSetExceptFlag) { // Cleanup __llvm_libc::fputil::clearExcept(e); } + + // Next, we will raise one exception and save the flags. + __llvm_libc::fputil::raiseExcept(FE_INVALID); + fexcept_t eflags; + __llvm_libc::fegetexceptflag(&eflags, FE_ALL_EXCEPT); + // Clear all exceptions and raise two other exceptions. + __llvm_libc::fputil::clearExcept(FE_ALL_EXCEPT); + __llvm_libc::fputil::raiseExcept(FE_OVERFLOW | FE_INEXACT); + // When we set the flags and test, we should only see FE_INVALID. + __llvm_libc::fesetexceptflag(&eflags, FE_ALL_EXCEPT); + EXPECT_EQ(__llvm_libc::fputil::testExcept(FE_ALL_EXCEPT), FE_INVALID); } From 230df8a419f244827895ffe5b6db298a209b741a Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Mon, 28 Jun 2021 07:25:21 +0000 Subject: [PATCH 317/619] [libc] Allow reading and writing __FE_DENORM if available on x86_64. Some libcs define __FE_DENORM on x86_64. This change allows reading the bits corresponding to that non-standard exception. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D105004 --- libc/utils/FPUtil/x86_64/FEnv.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libc/utils/FPUtil/x86_64/FEnv.h b/libc/utils/FPUtil/x86_64/FEnv.h index cd11c388f41f4..47e9dce7508e1 100644 --- a/libc/utils/FPUtil/x86_64/FEnv.h +++ b/libc/utils/FPUtil/x86_64/FEnv.h @@ -43,7 +43,10 @@ static constexpr uint16_t MXCSRRoundingControlBitPosition = 13; // encoding as well as the same bit positions. struct ExceptionFlags { static constexpr uint16_t Invalid = 0x1; - static constexpr uint16_t Denormal = 0x2; // This flag is not used + // Some libcs define __FE_DENORM corresponding to the denormal input + // exception and include it in FE_ALL_EXCEPTS. We define and use it to + // support compiling against headers provided by such libcs. + static constexpr uint16_t Denormal = 0x2; static constexpr uint16_t DivByZero = 0x4; static constexpr uint16_t Overflow = 0x8; static constexpr uint16_t Underflow = 0x10; @@ -62,6 +65,9 @@ static inline uint16_t getStatusValueForExcept(int excepts) { // We will make use of the fact that exception control bits are single // bit flags in the control registers. return (excepts & FE_INVALID ? ExceptionFlags::Invalid : 0) | +#ifdef __FE_DENORM + (excepts & __FE_DENORM ? ExceptionFalgs::Denormal : 0) | +#endif // __FE_DENORM (excepts & FE_DIVBYZERO ? ExceptionFlags::DivByZero : 0) | (excepts & FE_OVERFLOW ? ExceptionFlags::Overflow : 0) | (excepts & FE_UNDERFLOW ? ExceptionFlags::Underflow : 0) | @@ -70,6 +76,9 @@ static inline uint16_t getStatusValueForExcept(int excepts) { static inline int exceptionStatusToMacro(uint16_t status) { return (status & ExceptionFlags::Invalid ? FE_INVALID : 0) | +#ifdef __FE_DENORM + (status & ExceptionFalgs::Denormal ? __FE_DENORM : 0) | +#endif // __FE_DENORM (status & ExceptionFlags::DivByZero ? FE_DIVBYZERO : 0) | (status & ExceptionFlags::Overflow ? FE_OVERFLOW : 0) | (status & ExceptionFlags::Underflow ? FE_UNDERFLOW : 0) | From 36bd25db3dd5b76f3ab31fc95bd04f18a8af6347 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 30 Jun 2021 13:00:08 -0400 Subject: [PATCH 318/619] [InstCombine][test] add tests for icmp with constant and offset; NFC --- llvm/test/Transforms/InstCombine/icmp-add.ll | 46 ++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index a35d546f23ae4..e6cd8e74d7a91 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -747,3 +747,49 @@ define i1 @with_nuw_large_negative(i8 %x, i8 %y) { %tobool = icmp eq i8 %t2, %t1 ret i1 %tobool } + +define i1 @ugt_offset(i8 %a) { +; CHECK-LABEL: @ugt_offset( +; CHECK-NEXT: [[T:%.*]] = add i8 [[A:%.*]], 124 +; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[T]], -5 +; CHECK-NEXT: ret i1 [[OV]] +; + %t = add i8 %a, 124 + %ov = icmp ugt i8 %t, 251 + ret i1 %ov +} + +define i1 @ugt_offset_use(i32 %a) { +; CHECK-LABEL: @ugt_offset_use( +; CHECK-NEXT: [[T:%.*]] = add i32 [[A:%.*]], 42 +; CHECK-NEXT: call void @use(i32 [[T]]) +; CHECK-NEXT: [[OV:%.*]] = icmp ugt i32 [[T]], -2147483607 +; CHECK-NEXT: ret i1 [[OV]] +; + %t = add i32 %a, 42 + call void @use(i32 %t) + %ov = icmp ugt i32 %t, 2147483689 + ret i1 %ov +} + +define <2 x i1> @ugt_offset_splat(<2 x i5> %a) { +; CHECK-LABEL: @ugt_offset_splat( +; CHECK-NEXT: [[T:%.*]] = add <2 x i5> [[A:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <2 x i5> [[T]], +; CHECK-NEXT: ret <2 x i1> [[OV]] +; + %t = add <2 x i5> %a, + %ov = icmp ugt <2 x i5> %t, + ret <2 x i1> %ov +} + +define i1 @ugt_wrong_offset(i8 %a) { +; CHECK-LABEL: @ugt_wrong_offset( +; CHECK-NEXT: [[T:%.*]] = add i8 [[A:%.*]], 123 +; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[T]], -5 +; CHECK-NEXT: ret i1 [[OV]] +; + %t = add i8 %a, 123 + %ov = icmp ugt i8 %t, 251 + ret i1 %ov +} From c7b658aeb526c3e68b0cea89f0746f45b5366827 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 30 Jun 2021 13:32:53 -0400 Subject: [PATCH 319/619] [InstCombine] fold icmp of offset value with constant There must be a better way to describe this pattern in words? (X + C2) >u C --> X getType(); CmpInst::Predicate Pred = Cmp.getPredicate(); + // Fold an unsigned compare with offset to signed compare: + // (X + C2) >u C --> X getScalarSizeInBits())) + return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, -(*C2))); + // If the add does not wrap, we can always adjust the compare by subtracting // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE // are canonicalized to SGT/SLT/UGT/ULT. diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index e6cd8e74d7a91..d611a80027cf4 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -750,8 +750,7 @@ define i1 @with_nuw_large_negative(i8 %x, i8 %y) { define i1 @ugt_offset(i8 %a) { ; CHECK-LABEL: @ugt_offset( -; CHECK-NEXT: [[T:%.*]] = add i8 [[A:%.*]], 124 -; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[T]], -5 +; CHECK-NEXT: [[OV:%.*]] = icmp slt i8 [[A:%.*]], -124 ; CHECK-NEXT: ret i1 [[OV]] ; %t = add i8 %a, 124 @@ -763,7 +762,7 @@ define i1 @ugt_offset_use(i32 %a) { ; CHECK-LABEL: @ugt_offset_use( ; CHECK-NEXT: [[T:%.*]] = add i32 [[A:%.*]], 42 ; CHECK-NEXT: call void @use(i32 [[T]]) -; CHECK-NEXT: [[OV:%.*]] = icmp ugt i32 [[T]], -2147483607 +; CHECK-NEXT: [[OV:%.*]] = icmp slt i32 [[A]], -42 ; CHECK-NEXT: ret i1 [[OV]] ; %t = add i32 %a, 42 @@ -774,8 +773,7 @@ define i1 @ugt_offset_use(i32 %a) { define <2 x i1> @ugt_offset_splat(<2 x i5> %a) { ; CHECK-LABEL: @ugt_offset_splat( -; CHECK-NEXT: [[T:%.*]] = add <2 x i5> [[A:%.*]], -; CHECK-NEXT: [[OV:%.*]] = icmp ugt <2 x i5> [[T]], +; CHECK-NEXT: [[OV:%.*]] = icmp slt <2 x i5> [[A:%.*]], ; CHECK-NEXT: ret <2 x i1> [[OV]] ; %t = add <2 x i5> %a, @@ -783,6 +781,8 @@ define <2 x i1> @ugt_offset_splat(<2 x i5> %a) { ret <2 x i1> %ov } +; negative test - constants must differ by SMAX + define i1 @ugt_wrong_offset(i8 %a) { ; CHECK-LABEL: @ugt_wrong_offset( ; CHECK-NEXT: [[T:%.*]] = add i8 [[A:%.*]], 123 From 59fa435ea66629b4c45d9e6b62fa6cc1cdf5d5aa Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Jun 2021 16:22:53 +0100 Subject: [PATCH 320/619] [X86] Canonicalize SGT/UGT compares with constants to use SGE/UGE to reduce the number of EFLAGs reads. (PR48760) This demonstrates a possible fix for PR48760 - for compares with constants, canonicalize the SGT/UGT condition code to use SGE/UGE which should reduce the number of EFLAGs bits we need to read. As discussed on PR48760, some EFLAG bits are treated independently which can require additional uops to merge together for certain CMOVcc/SETcc/etc. modes. I've limited this to cases where the constant increment doesn't result in a larger encoding or additional i64 constant materializations. Differential Revision: https://reviews.llvm.org/D101074 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 52 ++++++++++++++++ .../CodeGen/X86/2008-09-11-CoalescerBug2.ll | 4 +- llvm/test/CodeGen/X86/atomic-eflags-reuse.ll | 30 +++++---- llvm/test/CodeGen/X86/cmov.ll | 9 ++- .../X86/lack-of-signed-truncation-check.ll | 48 +++++++------- llvm/test/CodeGen/X86/mul-constant-result.ll | 8 +-- llvm/test/CodeGen/X86/or-branch.ll | 12 ++-- llvm/test/CodeGen/X86/pr45995-2.ll | 4 +- llvm/test/CodeGen/X86/pr5145.ll | 8 +-- llvm/test/CodeGen/X86/sadd_sat.ll | 8 +-- llvm/test/CodeGen/X86/sadd_sat_plus.ll | 8 +-- llvm/test/CodeGen/X86/sdiv_fix_sat.ll | 48 +++++++------- llvm/test/CodeGen/X86/select.ll | 16 ++--- llvm/test/CodeGen/X86/select_const.ll | 12 ++-- llvm/test/CodeGen/X86/setcc-logic.ll | 4 +- llvm/test/CodeGen/X86/setcc.ll | 4 +- llvm/test/CodeGen/X86/smul_fix_sat.ll | 62 +++++++++---------- .../CodeGen/X86/smul_fix_sat_constants.ll | 8 +-- llvm/test/CodeGen/X86/srem-seteq.ll | 32 +++++----- llvm/test/CodeGen/X86/ssub_sat.ll | 8 +-- llvm/test/CodeGen/X86/ssub_sat_plus.ll | 8 +-- llvm/test/CodeGen/X86/umul_fix_sat.ll | 52 ++++++++-------- .../CodeGen/X86/urem-seteq-illegal-types.ll | 28 ++++----- llvm/test/CodeGen/X86/urem-seteq.ll | 32 +++++----- .../CodeGen/X86/vector-mulfix-legalize.ll | 32 +++++----- llvm/test/CodeGen/X86/zext-sext.ll | 4 +- 26 files changed, 295 insertions(+), 246 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 66c8943b1bcb0..b7d272c5aab87 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23469,6 +23469,33 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { } if (Op0.getSimpleValueType().isInteger()) { + // Attempt to canonicalize SGT/UGT -> SGE/UGE compares with constant which + // reduces the number of EFLAGs bit reads (the GE conditions don't read ZF), + // this may translate to less uops depending on uarch implementation. The + // equivalent for SLE/ULE -> SLT/ULT isn't likely to happen as we already + // canonicalize to that CondCode. + // NOTE: Only do this if incrementing the constant doesn't increase the bit + // encoding size - so it must either already be a i8 or i32 immediate, or it + // shrinks down to that. We don't do this for any i64's to avoid additional + // constant materializations. + // TODO: Can we move this to TranslateX86CC to handle jumps/branches too? + if (auto *Op1C = dyn_cast(Op1)) { + const APInt &Op1Val = Op1C->getAPIntValue(); + if (!Op1Val.isNullValue()) { + // Ensure the constant+1 doesn't overflow. + if ((CC == ISD::CondCode::SETGT && !Op1Val.isMaxSignedValue()) || + (CC == ISD::CondCode::SETUGT && !Op1Val.isMaxValue())) { + APInt Op1ValPlusOne = Op1Val + 1; + if (Op1ValPlusOne.isSignedIntN(32) && + (!Op1Val.isSignedIntN(8) || Op1ValPlusOne.isSignedIntN(8))) { + Op1 = DAG.getConstant(Op1ValPlusOne, dl, Op0.getValueType()); + CC = CC == ISD::CondCode::SETGT ? ISD::CondCode::SETGE + : ISD::CondCode::SETUGE; + } + } + } + } + SDValue X86CC; SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC); SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); @@ -42054,6 +42081,31 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, APInt Comparison = CmpRHSC->getAPIntValue(); APInt NegAddend = -Addend; + // See if we can adjust the CC to make the comparison match the negated + // addend. + if (Comparison != NegAddend) { + APInt IncComparison = Comparison + 1; + if (IncComparison == NegAddend) { + if (CC == X86::COND_A && !Comparison.isMaxValue()) { + Comparison = IncComparison; + CC = X86::COND_AE; + } else if (CC == X86::COND_LE && !Comparison.isMaxSignedValue()) { + Comparison = IncComparison; + CC = X86::COND_L; + } + } + APInt DecComparison = Comparison - 1; + if (DecComparison == NegAddend) { + if (CC == X86::COND_AE && !Comparison.isMinValue()) { + Comparison = DecComparison; + CC = X86::COND_A; + } else if (CC == X86::COND_L && !Comparison.isMinSignedValue()) { + Comparison = DecComparison; + CC = X86::COND_LE; + } + } + } + // If the addend is the negation of the comparison value, then we can do // a full comparison by emitting the atomic arithmetic as a locked sub. if (Comparison == NegAddend) { diff --git a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll index c21b6c294b58b..dff4baf6859f1 100644 --- a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll +++ b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll @@ -13,8 +13,8 @@ define i32 @func_44(i16 signext %p_46) nounwind { ; SOURCE-SCHED-NEXT: movl g_5, %eax ; SOURCE-SCHED-NEXT: sarl %eax ; SOURCE-SCHED-NEXT: xorl %ecx, %ecx -; SOURCE-SCHED-NEXT: cmpl $1, %eax -; SOURCE-SCHED-NEXT: setg %cl +; SOURCE-SCHED-NEXT: cmpl $2, %eax +; SOURCE-SCHED-NEXT: setge %cl ; SOURCE-SCHED-NEXT: movb g_73, %dl ; SOURCE-SCHED-NEXT: xorl %eax, %eax ; SOURCE-SCHED-NEXT: subb {{[0-9]+}}(%esp), %al diff --git a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll index b5a27892ad2f2..5e5e0eed6ca0a 100644 --- a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll +++ b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll @@ -55,7 +55,7 @@ define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 { ; SLOWINCDEC-LABEL: test_sub_1_cmov_sle: ; SLOWINCDEC: # %bb.0: # %entry ; SLOWINCDEC-NEXT: movl %esi, %eax -; SLOWINCDEC-NEXT: lock addq $-1, (%rdi) +; SLOWINCDEC-NEXT: lock subq $1, (%rdi) ; SLOWINCDEC-NEXT: cmovgel %edx, %eax ; SLOWINCDEC-NEXT: retq entry: @@ -298,16 +298,18 @@ entry: ret i8 %tmp2 } -; FIXME: This test canonicalizes in a way that hides the fact that the -; comparison can be folded into the atomic subtract. define i8 @test_sub_1_cmp_1_setcc_sle(i64* %p) #0 { -; CHECK-LABEL: test_sub_1_cmp_1_setcc_sle: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: cmpq $2, %rax -; CHECK-NEXT: setl %al -; CHECK-NEXT: retq +; FASTINCDEC-LABEL: test_sub_1_cmp_1_setcc_sle: +; FASTINCDEC: # %bb.0: # %entry +; FASTINCDEC-NEXT: lock decq (%rdi) +; FASTINCDEC-NEXT: setle %al +; FASTINCDEC-NEXT: retq +; +; SLOWINCDEC-LABEL: test_sub_1_cmp_1_setcc_sle: +; SLOWINCDEC: # %bb.0: # %entry +; SLOWINCDEC-NEXT: lock subq $1, (%rdi) +; SLOWINCDEC-NEXT: setle %al +; SLOWINCDEC-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst %tmp1 = icmp sle i64 %tmp0, 1 @@ -328,15 +330,11 @@ entry: ret i8 %tmp2 } -; FIXME: This test canonicalizes in a way that hides the fact that the -; comparison can be folded into the atomic subtract. define i8 @test_sub_3_cmp_3_setcc_uge(i64* %p) #0 { ; CHECK-LABEL: test_sub_3_cmp_3_setcc_uge: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq $-3, %rax -; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: cmpq $2, %rax -; CHECK-NEXT: seta %al +; CHECK-NEXT: lock subq $3, (%rdi) +; CHECK-NEXT: setae %al ; CHECK-NEXT: retq entry: %tmp0 = atomicrmw sub i64* %p, i64 3 seq_cst diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 9aaf8eb5463c3..bc42a2b5a7368 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -159,8 +159,8 @@ define i32 @test5(i32* nocapture %P) nounwind readonly { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl $41, (%rdi) -; CHECK-NEXT: setg %al +; CHECK-NEXT: cmpl $42, (%rdi) +; CHECK-NEXT: setge %al ; CHECK-NEXT: orl $-2, %eax ; CHECK-NEXT: retq entry: @@ -202,9 +202,8 @@ define i64 @test8(i64 %0, i64 %1, i64 %2) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: movabsq $-2147483649, %rcx # imm = 0xFFFFFFFF7FFFFFFF -; CHECK-NEXT: cmpq %rcx, %rdi -; CHECK-NEXT: cmovleq %rdx, %rax +; CHECK-NEXT: cmpq $-2147483648, %rdi # imm = 0x80000000 +; CHECK-NEXT: cmovlq %rdx, %rax ; CHECK-NEXT: retq %4 = icmp sgt i64 %0, -2147483649 %5 = select i1 %4, i64 %1, i64 %2 diff --git a/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll b/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll index 3b0c5b89c57ac..7bef94cca0d35 100644 --- a/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll +++ b/llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll @@ -465,16 +465,16 @@ define i1 @add_ugecmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind { ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addw {{[0-9]+}}(%esp), %ax ; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $255, %eax -; X86-NEXT: seta %al +; X86-NEXT: cmpl $256, %eax # imm = 0x100 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_bad_i16_i8_add: ; X64: # %bb.0: ; X64-NEXT: addl %esi, %edi ; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $255, %eax -; X64-NEXT: seta %al +; X64-NEXT: cmpl $256, %eax # imm = 0x100 +; X64-NEXT: setae %al ; X64-NEXT: retq %tmp0 = add i16 %x, %y %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 @@ -530,16 +530,16 @@ define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { ; X86-NEXT: movl $192, %eax ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $255, %eax -; X86-NEXT: seta %al +; X86-NEXT: cmpl $256, %eax # imm = 0x100 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo: ; X64: # %bb.0: ; X64-NEXT: addl $192, %edi ; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $255, %eax -; X64-NEXT: seta %al +; X64-NEXT: cmpl $256, %eax # imm = 0x100 +; X64-NEXT: setae %al ; X64-NEXT: retq %tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1)) %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 @@ -553,16 +553,16 @@ define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: subl $-128, %eax ; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $767, %eax # imm = 0x2FF -; X86-NEXT: seta %al +; X86-NEXT: cmpl $768, %eax # imm = 0x300 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo: ; X64: # %bb.0: ; X64-NEXT: subl $-128, %edi ; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $767, %eax # imm = 0x2FF -; X64-NEXT: seta %al +; X64-NEXT: cmpl $768, %eax # imm = 0x300 +; X64-NEXT: setae %al ; X64-NEXT: retq %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp uge i16 %tmp0, 768 ; (1U << 8)) + (1U << (8+1)) @@ -576,16 +576,16 @@ define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl $64, %eax ; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $255, %eax -; X86-NEXT: seta %al +; X86-NEXT: cmpl $256, %eax # imm = 0x100 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_bad_i16_i8_magic: ; X64: # %bb.0: ; X64-NEXT: addl $64, %edi ; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $255, %eax -; X64-NEXT: seta %al +; X64-NEXT: cmpl $256, %eax # imm = 0x100 +; X64-NEXT: setae %al ; X64-NEXT: retq %tmp0 = add i16 %x, 64 ; 1U << (8-1-1) %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 @@ -598,15 +598,15 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl $8, %eax -; X86-NEXT: cmpw $15, %ax -; X86-NEXT: seta %al +; X86-NEXT: cmpw $16, %ax +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_bad_i16_i4: ; X64: # %bb.0: ; X64-NEXT: addl $8, %edi -; X64-NEXT: cmpw $15, %di -; X64-NEXT: seta %al +; X64-NEXT: cmpw $16, %di +; X64-NEXT: setae %al ; X64-NEXT: retq %tmp0 = add i16 %x, 8 ; 1U << (4-1) %tmp1 = icmp uge i16 %tmp0, 16 ; 1U << 4 @@ -620,16 +620,16 @@ define i1 @add_ugecmp_bad_i24_i8(i24 %x) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: subl $-128, %eax ; X86-NEXT: andl $16777215, %eax # imm = 0xFFFFFF -; X86-NEXT: cmpl $255, %eax -; X86-NEXT: seta %al +; X86-NEXT: cmpl $256, %eax # imm = 0x100 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_bad_i24_i8: ; X64: # %bb.0: ; X64-NEXT: subl $-128, %edi ; X64-NEXT: andl $16777215, %edi # imm = 0xFFFFFF -; X64-NEXT: cmpl $255, %edi -; X64-NEXT: seta %al +; X64-NEXT: cmpl $256, %edi # imm = 0x100 +; X64-NEXT: setae %al ; X64-NEXT: retq %tmp0 = add i24 %x, 128 ; 1U << (8-1) %tmp1 = icmp uge i24 %tmp0, 256 ; 1U << 8 diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll index ab0f0789aaca9..25b11d1bca3e3 100644 --- a/llvm/test/CodeGen/X86/mul-constant-result.ll +++ b/llvm/test/CodeGen/X86/mul-constant-result.ll @@ -13,10 +13,10 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 { ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmpl $1, %edx +; X86-NEXT: cmpl $2, %edx ; X86-NEXT: movl $1, %eax ; X86-NEXT: movl $1, %esi -; X86-NEXT: jg .LBB0_2 +; X86-NEXT: jge .LBB0_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl %edx, %esi ; X86-NEXT: .LBB0_2: @@ -188,10 +188,10 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 { ; X64-HSW-LABEL: mult: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: cmpl $1, %esi +; X64-HSW-NEXT: cmpl $2, %esi ; X64-HSW-NEXT: movl $1, %ecx ; X64-HSW-NEXT: movl %esi, %eax -; X64-HSW-NEXT: cmovgl %ecx, %eax +; X64-HSW-NEXT: cmovgel %ecx, %eax ; X64-HSW-NEXT: testl %esi, %esi ; X64-HSW-NEXT: cmovel %ecx, %eax ; X64-HSW-NEXT: decl %edi diff --git a/llvm/test/CodeGen/X86/or-branch.ll b/llvm/test/CodeGen/X86/or-branch.ll index 1f71b97d2befb..c9f6e3e49632b 100644 --- a/llvm/test/CodeGen/X86/or-branch.ll +++ b/llvm/test/CodeGen/X86/or-branch.ll @@ -19,8 +19,8 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind { ; JUMP1: # %bb.0: # %entry ; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; JUMP1-NEXT: setne %al -; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp) -; JUMP1-NEXT: setg %cl +; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp) +; JUMP1-NEXT: setge %cl ; JUMP1-NEXT: testb %al, %cl ; JUMP1-NEXT: jne .LBB0_1 ; JUMP1-NEXT: # %bb.2: # %cond_true @@ -49,8 +49,8 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind { ; JUMP2: # %bb.0: # %entry ; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; JUMP2-NEXT: setne %al -; JUMP2-NEXT: cmpl $4, {{[0-9]+}}(%esp) -; JUMP2-NEXT: setg %cl +; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp) +; JUMP2-NEXT: setge %cl ; JUMP2-NEXT: testb %al, %cl ; JUMP2-NEXT: jne .LBB1_1 ; JUMP2-NEXT: # %bb.2: # %cond_true @@ -62,8 +62,8 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind { ; JUMP1: # %bb.0: # %entry ; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; JUMP1-NEXT: setne %al -; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp) -; JUMP1-NEXT: setg %cl +; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp) +; JUMP1-NEXT: setge %cl ; JUMP1-NEXT: testb %al, %cl ; JUMP1-NEXT: jne .LBB1_1 ; JUMP1-NEXT: # %bb.2: # %cond_true diff --git a/llvm/test/CodeGen/X86/pr45995-2.ll b/llvm/test/CodeGen/X86/pr45995-2.ll index 2d24138251e2c..27f1ef4cc7015 100644 --- a/llvm/test/CodeGen/X86/pr45995-2.ll +++ b/llvm/test/CodeGen/X86/pr45995-2.ll @@ -5,8 +5,8 @@ define <4 x i1> @selecter(i64 %0) { ; CHECK-LABEL: selecter: ; CHECK: # %bb.0: ; CHECK-NEXT: xor eax, eax -; CHECK-NEXT: cmp rdi, 1 -; CHECK-NEXT: setg al +; CHECK-NEXT: cmp rdi, 2 +; CHECK-NEXT: setge al ; CHECK-NEXT: lea eax, [rax + 2*rax] ; CHECK-NEXT: kmovd k0, eax ; CHECK-NEXT: vpmovm2d xmm0, k0 diff --git a/llvm/test/CodeGen/X86/pr5145.ll b/llvm/test/CodeGen/X86/pr5145.ll index be1610a59a2bd..5858649b9ceb9 100644 --- a/llvm/test/CodeGen/X86/pr5145.ll +++ b/llvm/test/CodeGen/X86/pr5145.ll @@ -9,10 +9,10 @@ define void @atomic_maxmin_i8() { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpb $5, %al +; CHECK-NEXT: cmpb $6, %al ; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: movl $5, %ecx -; CHECK-NEXT: cmovgl %eax, %ecx +; CHECK-NEXT: cmovgel %eax, %ecx ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip) ; CHECK-NEXT: jne .LBB0_1 @@ -33,10 +33,10 @@ define void @atomic_maxmin_i8() { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_5: # %atomicrmw.start8 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpb $7, %al +; CHECK-NEXT: cmpb $8, %al ; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: movl $7, %ecx -; CHECK-NEXT: cmoval %eax, %ecx +; CHECK-NEXT: cmovael %eax, %ecx ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip) ; CHECK-NEXT: jne .LBB0_5 diff --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll index 6f16bd15c0e74..e866c82564425 100644 --- a/llvm/test/CodeGen/X86/sadd_sat.ll +++ b/llvm/test/CodeGen/X86/sadd_sat.ll @@ -151,9 +151,9 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; X86-NEXT: cmpb $7, %al ; X86-NEXT: movl $7, %eax ; X86-NEXT: cmovll %ecx, %eax -; X86-NEXT: cmpb $-8, %al +; X86-NEXT: cmpb $-7, %al ; X86-NEXT: movl $248, %ecx -; X86-NEXT: cmovgl %eax, %ecx +; X86-NEXT: cmovgel %eax, %ecx ; X86-NEXT: movsbl %cl, %eax ; X86-NEXT: retl ; @@ -164,9 +164,9 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; X64-NEXT: cmpb $7, %al ; X64-NEXT: movl $7, %ecx ; X64-NEXT: cmovll %eax, %ecx -; X64-NEXT: cmpb $-8, %cl +; X64-NEXT: cmpb $-7, %cl ; X64-NEXT: movl $248, %eax -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: movsbl %al, %eax ; X64-NEXT: retq %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y); diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll index f6f2eeaa5489a..e50f6d5b6c553 100644 --- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll @@ -165,9 +165,9 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; X86-NEXT: cmpb $7, %al ; X86-NEXT: movl $7, %eax ; X86-NEXT: cmovll %ecx, %eax -; X86-NEXT: cmpb $-8, %al +; X86-NEXT: cmpb $-7, %al ; X86-NEXT: movl $248, %ecx -; X86-NEXT: cmovgl %eax, %ecx +; X86-NEXT: cmovgel %eax, %ecx ; X86-NEXT: movsbl %cl, %eax ; X86-NEXT: retl ; @@ -183,9 +183,9 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; X64-NEXT: cmpb $7, %al ; X64-NEXT: movl $7, %ecx ; X64-NEXT: cmovll %eax, %ecx -; X64-NEXT: cmpb $-8, %cl +; X64-NEXT: cmpb $-7, %cl ; X64-NEXT: movl $248, %eax -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: movsbl %al, %eax ; X64-NEXT: retq %a = mul i4 %y, %z diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index f22812c148355..26cbe90d8759c 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -33,9 +33,9 @@ define i16 @func(i16 %x, i16 %y) nounwind { ; X64-NEXT: cmpl $65535, %edi # imm = 0xFFFF ; X64-NEXT: movl $65535, %ecx # imm = 0xFFFF ; X64-NEXT: cmovll %edi, %ecx -; X64-NEXT: cmpl $-65536, %ecx # imm = 0xFFFF0000 +; X64-NEXT: cmpl $-65535, %ecx # imm = 0xFFFF0001 ; X64-NEXT: movl $-65536, %eax # imm = 0xFFFF0000 -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -64,9 +64,9 @@ define i16 @func(i16 %x, i16 %y) nounwind { ; X86-NEXT: cmpl $65535, %edi # imm = 0xFFFF ; X86-NEXT: movl $65535, %ecx # imm = 0xFFFF ; X86-NEXT: cmovll %edi, %ecx -; X86-NEXT: cmpl $-65536, %ecx # imm = 0xFFFF0000 +; X86-NEXT: cmpl $-65535, %ecx # imm = 0xFFFF0001 ; X86-NEXT: movl $-65536, %eax # imm = 0xFFFF0000 -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: shrl %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi @@ -103,9 +103,9 @@ define i16 @func2(i8 %x, i8 %y) nounwind { ; X64-NEXT: cmpl $16383, %edi # imm = 0x3FFF ; X64-NEXT: movl $16383, %ecx # imm = 0x3FFF ; X64-NEXT: cmovll %edi, %ecx -; X64-NEXT: cmpl $-16384, %ecx # imm = 0xC000 +; X64-NEXT: cmpl $-16383, %ecx # imm = 0xC001 ; X64-NEXT: movl $-16384, %eax # imm = 0xC000 -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; @@ -133,9 +133,9 @@ define i16 @func2(i8 %x, i8 %y) nounwind { ; X86-NEXT: cmpl $16383, %edi # imm = 0x3FFF ; X86-NEXT: movl $16383, %ecx # imm = 0x3FFF ; X86-NEXT: cmovll %edi, %ecx -; X86-NEXT: cmpl $-16384, %ecx # imm = 0xC000 +; X86-NEXT: cmpl $-16383, %ecx # imm = 0xC001 ; X86-NEXT: movl $-16384, %eax # imm = 0xC000 -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -175,9 +175,9 @@ define i16 @func3(i15 %x, i8 %y) nounwind { ; X64-NEXT: movl $16383, %ecx # imm = 0x3FFF ; X64-NEXT: cmovll %esi, %ecx ; X64-NEXT: movswl %cx, %eax -; X64-NEXT: cmpl $-16384, %eax # imm = 0xC000 +; X64-NEXT: cmpl $-16383, %eax # imm = 0xC001 ; X64-NEXT: movl $49152, %eax # imm = 0xC000 -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq ; @@ -210,9 +210,9 @@ define i16 @func3(i15 %x, i8 %y) nounwind { ; X86-NEXT: movl $16383, %ecx # imm = 0x3FFF ; X86-NEXT: cmovll %edi, %ecx ; X86-NEXT: movswl %cx, %eax -; X86-NEXT: cmpl $-16384, %eax # imm = 0xC000 +; X86-NEXT: cmpl $-16383, %eax # imm = 0xC001 ; X86-NEXT: movl $49152, %eax # imm = 0xC000 -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -253,9 +253,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind { ; X64-NEXT: cmpb $7, %dil ; X64-NEXT: movl $7, %ecx ; X64-NEXT: cmovll %edi, %ecx -; X64-NEXT: cmpb $-8, %cl +; X64-NEXT: cmpb $-7, %cl ; X64-NEXT: movl $248, %eax -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq @@ -288,9 +288,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind { ; X86-NEXT: cmpb $7, %al ; X86-NEXT: movl $7, %ecx ; X86-NEXT: cmovll %eax, %ecx -; X86-NEXT: cmpb $-8, %cl +; X86-NEXT: cmpb $-7, %cl ; X86-NEXT: movl $248, %eax -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -351,8 +351,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X64-NEXT: cmovgq %rcx, %rbx ; X64-NEXT: testq %rbp, %rbp ; X64-NEXT: cmovnsq %rbp, %rcx -; X64-NEXT: cmpq $-2, %rbp -; X64-NEXT: cmovleq %rax, %rbx +; X64-NEXT: cmpq $-1, %rbp +; X64-NEXT: cmovlq %rax, %rbx ; X64-NEXT: shrdq $1, %rcx, %rbx ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: addq $24, %rsp @@ -473,9 +473,9 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: cmovnel %eax, %esi ; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmpl $-2147483648, %edx # imm = 0x80000000 +; X86-NEXT: cmpl $-2147483647, %edx # imm = 0x80000001 ; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-NEXT: cmoval %edx, %eax +; X86-NEXT: cmovael %edx, %eax ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: andl %esi, %ecx @@ -523,9 +523,9 @@ define i18 @func6(i16 %x, i16 %y) nounwind { ; X64-NEXT: cmpl $131071, %edi # imm = 0x1FFFF ; X64-NEXT: movl $131071, %ecx # imm = 0x1FFFF ; X64-NEXT: cmovll %edi, %ecx -; X64-NEXT: cmpl $-131072, %ecx # imm = 0xFFFE0000 +; X64-NEXT: cmpl $-131071, %ecx # imm = 0xFFFE0001 ; X64-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: retq ; ; X86-LABEL: func6: @@ -552,9 +552,9 @@ define i18 @func6(i16 %x, i16 %y) nounwind { ; X86-NEXT: cmpl $131071, %edi # imm = 0x1FFFF ; X86-NEXT: movl $131071, %ecx # imm = 0x1FFFF ; X86-NEXT: cmovll %edi, %ecx -; X86-NEXT: cmpl $-131072, %ecx # imm = 0xFFFE0000 +; X86-NEXT: cmpl $-131071, %ecx # imm = 0xFFFE0001 ; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index 1ef10c894a019..7f66b456d576c 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -1202,9 +1202,9 @@ define void @clamp_i8(i32 %src, i8* %dst) { define void @clamp(i32 %src, i16* %dst) { ; GENERIC-LABEL: clamp: ; GENERIC: ## %bb.0: -; GENERIC-NEXT: cmpl $32767, %edi ## imm = 0x7FFF +; GENERIC-NEXT: cmpl $32768, %edi ## imm = 0x8000 ; GENERIC-NEXT: movl $32767, %eax ## imm = 0x7FFF -; GENERIC-NEXT: cmovlel %edi, %eax +; GENERIC-NEXT: cmovll %edi, %eax ; GENERIC-NEXT: cmpl $-32768, %eax ## imm = 0x8000 ; GENERIC-NEXT: movl $32768, %ecx ## imm = 0x8000 ; GENERIC-NEXT: cmovgel %eax, %ecx @@ -1213,10 +1213,10 @@ define void @clamp(i32 %src, i16* %dst) { ; ; ATOM-LABEL: clamp: ; ATOM: ## %bb.0: -; ATOM-NEXT: cmpl $32767, %edi ## imm = 0x7FFF +; ATOM-NEXT: cmpl $32768, %edi ## imm = 0x8000 ; ATOM-NEXT: movl $32767, %eax ## imm = 0x7FFF ; ATOM-NEXT: movl $32768, %ecx ## imm = 0x8000 -; ATOM-NEXT: cmovlel %edi, %eax +; ATOM-NEXT: cmovll %edi, %eax ; ATOM-NEXT: cmpl $-32768, %eax ## imm = 0x8000 ; ATOM-NEXT: cmovgel %eax, %ecx ; ATOM-NEXT: movw %cx, (%rsi) @@ -1226,9 +1226,9 @@ define void @clamp(i32 %src, i16* %dst) { ; ATHLON: ## %bb.0: ; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax ; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx -; ATHLON-NEXT: cmpl $32767, %ecx ## imm = 0x7FFF +; ATHLON-NEXT: cmpl $32768, %ecx ## imm = 0x8000 ; ATHLON-NEXT: movl $32767, %edx ## imm = 0x7FFF -; ATHLON-NEXT: cmovlel %ecx, %edx +; ATHLON-NEXT: cmovll %ecx, %edx ; ATHLON-NEXT: cmpl $-32768, %edx ## imm = 0x8000 ; ATHLON-NEXT: movl $32768, %ecx ## imm = 0x8000 ; ATHLON-NEXT: cmovgel %edx, %ecx @@ -1237,9 +1237,9 @@ define void @clamp(i32 %src, i16* %dst) { ; ; MCU-LABEL: clamp: ; MCU: # %bb.0: -; MCU-NEXT: cmpl $32767, %eax # imm = 0x7FFF +; MCU-NEXT: cmpl $32768, %eax # imm = 0x8000 ; MCU-NEXT: movl $32767, %ecx # imm = 0x7FFF -; MCU-NEXT: jg .LBB22_2 +; MCU-NEXT: jge .LBB22_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: movl %eax, %ecx ; MCU-NEXT: .LBB22_2: diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll index 852032ad268d5..32a827f4160ec 100644 --- a/llvm/test/CodeGen/X86/select_const.ll +++ b/llvm/test/CodeGen/X86/select_const.ll @@ -267,8 +267,8 @@ define i64 @sel_1_2(i64 %x, i64 %y) { define i8 @sel_1_neg1(i32 %x) { ; CHECK-LABEL: sel_1_neg1: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $42, %edi -; CHECK-NEXT: setg %al +; CHECK-NEXT: cmpl $43, %edi +; CHECK-NEXT: setge %al ; CHECK-NEXT: shlb $2, %al ; CHECK-NEXT: decb %al ; CHECK-NEXT: retq @@ -299,8 +299,8 @@ define i32 @sel_1_neg1_32(i32 %x) { ; CHECK-LABEL: sel_1_neg1_32: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl $42, %edi -; CHECK-NEXT: setg %al +; CHECK-NEXT: cmpl $43, %edi +; CHECK-NEXT: setge %al ; CHECK-NEXT: leal -1(%rax,%rax,8), %eax ; CHECK-NEXT: retq %cmp = icmp sgt i32 %x, 42 @@ -378,10 +378,10 @@ define i64 @select_pow2_diff_neg_invert(i1 zeroext %cond) { define i8 @sel_67_neg125(i32 %x) { ; CHECK-LABEL: sel_67_neg125: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $42, %edi +; CHECK-NEXT: cmpl $43, %edi ; CHECK-NEXT: movl $67, %ecx ; CHECK-NEXT: movl $131, %eax -; CHECK-NEXT: cmovgl %ecx, %eax +; CHECK-NEXT: cmovgel %ecx, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cmp = icmp sgt i32 %x, 42 diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll index c82a7df7b3e5a..3e7d7d32e0e76 100644 --- a/llvm/test/CodeGen/X86/setcc-logic.ll +++ b/llvm/test/CodeGen/X86/setcc-logic.ll @@ -456,8 +456,8 @@ define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) nounwind { ; CHECK-LABEL: ne_neg1_and_ne_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: incq %rdi -; CHECK-NEXT: cmpq $1, %rdi -; CHECK-NEXT: seta %al +; CHECK-NEXT: cmpq $2, %rdi +; CHECK-NEXT: setae %al ; CHECK-NEXT: retq %cmp1 = icmp ne i64 %x, -1 %cmp2 = icmp ne i64 %x, 0 diff --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll index 3a386da4503ee..2bbc9ffe6168f 100644 --- a/llvm/test/CodeGen/X86/setcc.ll +++ b/llvm/test/CodeGen/X86/setcc.ll @@ -6,8 +6,8 @@ define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { ; CHECK-LABEL: t1: ; CHECK: ## %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpw $26, %di -; CHECK-NEXT: seta %al +; CHECK-NEXT: cmpw $27, %di +; CHECK-NEXT: setae %al ; CHECK-NEXT: shll $5, %eax ; CHECK-NEXT: retq %t0 = icmp ugt i16 %x, 26 diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll index 4d60536636596..757763d407b24 100644 --- a/llvm/test/CodeGen/X86/smul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll @@ -16,9 +16,9 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: shrq $32, %rax ; X64-NEXT: shrdl $2, %eax, %ecx -; X64-NEXT: cmpl $1, %eax +; X64-NEXT: cmpl $2, %eax ; X64-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X64-NEXT: cmovlel %ecx, %edx +; X64-NEXT: cmovll %ecx, %edx ; X64-NEXT: cmpl $-2, %eax ; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000 ; X64-NEXT: cmovgel %edx, %eax @@ -29,9 +29,9 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: imull {{[0-9]+}}(%esp) ; X86-NEXT: shrdl $2, %edx, %eax -; X86-NEXT: cmpl $1, %edx +; X86-NEXT: cmpl $2, %edx ; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: cmpl $-2, %edx ; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 ; X86-NEXT: cmovll %ecx, %eax @@ -46,9 +46,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: imulq %rsi ; X64-NEXT: shrdq $2, %rdx, %rax -; X64-NEXT: cmpq $1, %rdx +; X64-NEXT: cmpq $2, %rdx ; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: cmovgq %rcx, %rax +; X64-NEXT: cmovgeq %rcx, %rax ; X64-NEXT: cmpq $-2, %rdx ; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 ; X64-NEXT: cmovlq %rcx, %rax @@ -100,8 +100,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; X86-NEXT: testl %esi, %esi ; X86-NEXT: setg %bl ; X86-NEXT: sete %bh -; X86-NEXT: cmpl $1, %ebp -; X86-NEXT: seta %dl +; X86-NEXT: cmpl $2, %ebp +; X86-NEXT: setae %dl ; X86-NEXT: andb %bh, %dl ; X86-NEXT: orb %bl, %dl ; X86-NEXT: shrdl $2, %eax, %ecx @@ -148,9 +148,9 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X64-NEXT: shlb $6, %dl ; X64-NEXT: orb %al, %dl ; X64-NEXT: movzbl %dl, %eax -; X64-NEXT: cmpb $1, %cl +; X64-NEXT: cmpb $2, %cl ; X64-NEXT: movl $127, %edx -; X64-NEXT: cmovlel %eax, %edx +; X64-NEXT: cmovll %eax, %edx ; X64-NEXT: cmpb $-2, %cl ; X64-NEXT: movl $128, %eax ; X64-NEXT: cmovgel %edx, %eax @@ -173,9 +173,9 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X86-NEXT: shrb $2, %al ; X86-NEXT: orb %cl, %al ; X86-NEXT: movzbl %al, %ecx -; X86-NEXT: cmpb $1, %ah +; X86-NEXT: cmpb $2, %ah ; X86-NEXT: movl $127, %edx -; X86-NEXT: cmovlel %ecx, %edx +; X86-NEXT: cmovll %ecx, %edx ; X86-NEXT: cmpb $-2, %ah ; X86-NEXT: movl $128, %eax ; X86-NEXT: cmovgel %edx, %eax @@ -199,9 +199,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: shrq $32, %rcx ; X64-NEXT: shrdl $2, %ecx, %edx -; X64-NEXT: cmpl $1, %ecx +; X64-NEXT: cmpl $2, %ecx ; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X64-NEXT: cmovgl %eax, %edx +; X64-NEXT: cmovgel %eax, %edx ; X64-NEXT: cmpl $-2, %ecx ; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 ; X64-NEXT: cmovll %ecx, %edx @@ -216,8 +216,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: shrq $32, %rdx ; X64-NEXT: shrdl $2, %edx, %esi -; X64-NEXT: cmpl $1, %edx -; X64-NEXT: cmovgl %eax, %esi +; X64-NEXT: cmpl $2, %edx +; X64-NEXT: cmovgel %eax, %esi ; X64-NEXT: cmpl $-2, %edx ; X64-NEXT: cmovll %ecx, %esi ; X64-NEXT: movd %esi, %xmm3 @@ -230,8 +230,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: shrq $32, %rdx ; X64-NEXT: shrdl $2, %edx, %esi -; X64-NEXT: cmpl $1, %edx -; X64-NEXT: cmovgl %eax, %esi +; X64-NEXT: cmpl $2, %edx +; X64-NEXT: cmovgel %eax, %esi ; X64-NEXT: cmpl $-2, %edx ; X64-NEXT: cmovll %ecx, %esi ; X64-NEXT: movd %esi, %xmm2 @@ -245,8 +245,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: shrq $32, %rdx ; X64-NEXT: shrdl $2, %edx, %esi -; X64-NEXT: cmpl $1, %edx -; X64-NEXT: cmovgl %eax, %esi +; X64-NEXT: cmpl $2, %edx +; X64-NEXT: cmovgel %eax, %esi ; X64-NEXT: cmpl $-2, %edx ; X64-NEXT: cmovll %ecx, %esi ; X64-NEXT: movd %esi, %xmm0 @@ -267,9 +267,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: imull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shrdl $2, %edx, %ecx -; X86-NEXT: cmpl $1, %edx +; X86-NEXT: cmpl $2, %edx ; X86-NEXT: movl $2147483647, %ebp # imm = 0x7FFFFFFF -; X86-NEXT: cmovgl %ebp, %ecx +; X86-NEXT: cmovgel %ebp, %ecx ; X86-NEXT: cmpl $-2, %edx ; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000 ; X86-NEXT: cmovll %esi, %ecx @@ -277,23 +277,23 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: imull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %edi ; X86-NEXT: shrdl $2, %edx, %edi -; X86-NEXT: cmpl $1, %edx -; X86-NEXT: cmovgl %ebp, %edi +; X86-NEXT: cmpl $2, %edx +; X86-NEXT: cmovgel %ebp, %edi ; X86-NEXT: cmpl $-2, %edx ; X86-NEXT: cmovll %esi, %edi ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: imull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: shrdl $2, %edx, %ebx -; X86-NEXT: cmpl $1, %edx -; X86-NEXT: cmovgl %ebp, %ebx +; X86-NEXT: cmpl $2, %edx +; X86-NEXT: cmovgel %ebp, %ebx ; X86-NEXT: cmpl $-2, %edx ; X86-NEXT: cmovll %esi, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: imull {{[0-9]+}}(%esp) ; X86-NEXT: shrdl $2, %edx, %eax -; X86-NEXT: cmpl $1, %edx -; X86-NEXT: cmovgl %ebp, %eax +; X86-NEXT: cmpl $2, %edx +; X86-NEXT: cmovgel %ebp, %eax ; X86-NEXT: cmpl $-2, %edx ; X86-NEXT: cmovll %esi, %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -713,11 +713,11 @@ define i64 @func8(i64 %x, i64 %y) nounwind { ; X86-NEXT: cmovnsl %ecx, %edx ; X86-NEXT: shrdl $31, %edx, %eax ; X86-NEXT: shrdl $31, %edi, %edx -; X86-NEXT: cmpl $1073741823, %edi # imm = 0x3FFFFFFF +; X86-NEXT: cmpl $1073741824, %edi # imm = 0x40000000 ; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: cmovgl %ecx, %edx +; X86-NEXT: cmovgel %ecx, %edx ; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: cmpl $-1073741824, %edi # imm = 0xC0000000 ; X86-NEXT: cmovll %ecx, %eax diff --git a/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll b/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll index b8a46567e75b6..af8353c5a5653 100644 --- a/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll +++ b/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll @@ -15,10 +15,10 @@ define i64 @func() nounwind { ; X64-NEXT: movl $2, %ecx ; X64-NEXT: movl $3, %eax ; X64-NEXT: imulq %rcx -; X64-NEXT: cmpq $1, %rdx +; X64-NEXT: cmpq $2, %rdx ; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF ; X64-NEXT: movl $1, %ecx -; X64-NEXT: cmovgq %rax, %rcx +; X64-NEXT: cmovgeq %rax, %rcx ; X64-NEXT: cmpq $-2, %rdx ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; X64-NEXT: cmovgeq %rcx, %rax @@ -51,9 +51,9 @@ define i64 @func3() nounwind { ; X64-NEXT: movl $2, %edx ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: imulq %rdx -; X64-NEXT: cmpq $1, %rdx +; X64-NEXT: cmpq $2, %rdx ; X64-NEXT: movabsq $4611686018427387903, %rsi # imm = 0x3FFFFFFFFFFFFFFF -; X64-NEXT: cmovgq %rcx, %rsi +; X64-NEXT: cmovgeq %rcx, %rsi ; X64-NEXT: cmpq $-2, %rdx ; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; X64-NEXT: cmovgeq %rsi, %rax diff --git a/llvm/test/CodeGen/X86/srem-seteq.ll b/llvm/test/CodeGen/X86/srem-seteq.ll index 67fe5f4c5e447..dfa1472b62fe5 100644 --- a/llvm/test/CodeGen/X86/srem-seteq.ll +++ b/llvm/test/CodeGen/X86/srem-seteq.ll @@ -116,8 +116,8 @@ define i16 @test_srem_even(i16 %X) nounwind { ; X86-NEXT: rorw %ax ; X86-NEXT: movzwl %ax, %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $4680, %ecx # imm = 0x1248 -; X86-NEXT: seta %al +; X86-NEXT: cmpl $4681, %ecx # imm = 0x1249 +; X86-NEXT: setae %al ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; @@ -128,8 +128,8 @@ define i16 @test_srem_even(i16 %X) nounwind { ; X64-NEXT: rorw %ax ; X64-NEXT: movzwl %ax, %ecx ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $4680, %ecx # imm = 0x1248 -; X64-NEXT: seta %al +; X64-NEXT: cmpl $4681, %ecx # imm = 0x1249 +; X64-NEXT: setae %al ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %srem = srem i16 %X, 14 @@ -229,8 +229,8 @@ define i32 @test_srem_odd_setne(i32 %X) nounwind { ; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %ecx # imm = 0xCCCCCCCD ; X86-NEXT: addl $429496729, %ecx # imm = 0x19999999 ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $858993458, %ecx # imm = 0x33333332 -; X86-NEXT: seta %al +; X86-NEXT: cmpl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: test_srem_odd_setne: @@ -238,8 +238,8 @@ define i32 @test_srem_odd_setne(i32 %X) nounwind { ; X64-NEXT: imull $-858993459, %edi, %ecx # imm = 0xCCCCCCCD ; X64-NEXT: addl $429496729, %ecx # imm = 0x19999999 ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $858993458, %ecx # imm = 0x33333332 -; X64-NEXT: seta %al +; X64-NEXT: cmpl $858993459, %ecx # imm = 0x33333333 +; X64-NEXT: setae %al ; X64-NEXT: retq %srem = srem i32 %X, 5 %cmp = icmp ne i32 %srem, 0 @@ -254,8 +254,8 @@ define i32 @test_srem_negative_odd(i32 %X) nounwind { ; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %ecx # imm = 0xCCCCCCCD ; X86-NEXT: addl $429496729, %ecx # imm = 0x19999999 ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $858993458, %ecx # imm = 0x33333332 -; X86-NEXT: seta %al +; X86-NEXT: cmpl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: test_srem_negative_odd: @@ -263,8 +263,8 @@ define i32 @test_srem_negative_odd(i32 %X) nounwind { ; X64-NEXT: imull $-858993459, %edi, %ecx # imm = 0xCCCCCCCD ; X64-NEXT: addl $429496729, %ecx # imm = 0x19999999 ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $858993458, %ecx # imm = 0x33333332 -; X64-NEXT: seta %al +; X64-NEXT: cmpl $858993459, %ecx # imm = 0x33333333 +; X64-NEXT: setae %al ; X64-NEXT: retq %srem = srem i32 %X, -5 %cmp = icmp ne i32 %srem, 0 @@ -278,8 +278,8 @@ define i32 @test_srem_negative_even(i32 %X) nounwind { ; X86-NEXT: addl $306783378, %ecx # imm = 0x12492492 ; X86-NEXT: rorl %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $306783378, %ecx # imm = 0x12492492 -; X86-NEXT: seta %al +; X86-NEXT: cmpl $306783379, %ecx # imm = 0x12492493 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: test_srem_negative_even: @@ -288,8 +288,8 @@ define i32 @test_srem_negative_even(i32 %X) nounwind { ; X64-NEXT: addl $306783378, %ecx # imm = 0x12492492 ; X64-NEXT: rorl %ecx ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $306783378, %ecx # imm = 0x12492492 -; X64-NEXT: seta %al +; X64-NEXT: cmpl $306783379, %ecx # imm = 0x12492493 +; X64-NEXT: setae %al ; X64-NEXT: retq %srem = srem i32 %X, -14 %cmp = icmp ne i32 %srem, 0 diff --git a/llvm/test/CodeGen/X86/ssub_sat.ll b/llvm/test/CodeGen/X86/ssub_sat.ll index 0a72b8a01b612..bdb45877c913b 100644 --- a/llvm/test/CodeGen/X86/ssub_sat.ll +++ b/llvm/test/CodeGen/X86/ssub_sat.ll @@ -140,9 +140,9 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; X86-NEXT: cmpb $7, %al ; X86-NEXT: movl $7, %eax ; X86-NEXT: cmovll %ecx, %eax -; X86-NEXT: cmpb $-8, %al +; X86-NEXT: cmpb $-7, %al ; X86-NEXT: movl $248, %ecx -; X86-NEXT: cmovgl %eax, %ecx +; X86-NEXT: cmovgel %eax, %ecx ; X86-NEXT: movsbl %cl, %eax ; X86-NEXT: retl ; @@ -153,9 +153,9 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; X64-NEXT: cmpb $7, %al ; X64-NEXT: movl $7, %ecx ; X64-NEXT: cmovll %eax, %ecx -; X64-NEXT: cmpb $-8, %cl +; X64-NEXT: cmpb $-7, %cl ; X64-NEXT: movl $248, %eax -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: movsbl %al, %eax ; X64-NEXT: retq %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y) diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll index 8f2774b27a14b..dc0804994a777 100644 --- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll @@ -155,9 +155,9 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; X86-NEXT: cmpb $7, %cl ; X86-NEXT: movl $7, %ecx ; X86-NEXT: cmovll %eax, %ecx -; X86-NEXT: cmpb $-8, %cl +; X86-NEXT: cmpb $-7, %cl ; X86-NEXT: movl $248, %eax -; X86-NEXT: cmovgl %ecx, %eax +; X86-NEXT: cmovgel %ecx, %eax ; X86-NEXT: movsbl %al, %eax ; X86-NEXT: retl ; @@ -173,9 +173,9 @@ define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; X64-NEXT: cmpb $7, %al ; X64-NEXT: movl $7, %ecx ; X64-NEXT: cmovll %eax, %ecx -; X64-NEXT: cmpb $-8, %cl +; X64-NEXT: cmpb $-7, %cl ; X64-NEXT: movl $248, %eax -; X64-NEXT: cmovgl %ecx, %eax +; X64-NEXT: cmovgel %ecx, %eax ; X64-NEXT: movsbl %al, %eax ; X64-NEXT: retq %a = mul i4 %y, %z diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll index 8a4331a998d96..ad980b961bc6a 100644 --- a/llvm/test/CodeGen/X86/umul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll @@ -16,9 +16,9 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: shrq $32, %rax ; X64-NEXT: shrdl $2, %eax, %ecx -; X64-NEXT: cmpl $3, %eax +; X64-NEXT: cmpl $4, %eax ; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: cmovbl %ecx, %eax ; X64-NEXT: retq ; ; X86-LABEL: func: @@ -26,9 +26,9 @@ define i32 @func(i32 %x, i32 %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: shrdl $2, %edx, %eax -; X86-NEXT: cmpl $3, %edx +; X86-NEXT: cmpl $4, %edx ; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmoval %ecx, %eax +; X86-NEXT: cmovael %ecx, %eax ; X86-NEXT: retl %tmp = call i32 @llvm.umul.fix.sat.i32(i32 %x, i32 %y, i32 2) ret i32 %tmp @@ -40,9 +40,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rsi ; X64-NEXT: shrdq $2, %rdx, %rax -; X64-NEXT: cmpq $3, %rdx +; X64-NEXT: cmpq $4, %rdx ; X64-NEXT: movq $-1, %rcx -; X64-NEXT: cmovaq %rcx, %rax +; X64-NEXT: cmovaeq %rcx, %rax ; X64-NEXT: retq ; ; X86-LABEL: func2: @@ -105,9 +105,9 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X64-NEXT: shlb $6, %dl ; X64-NEXT: orb %cl, %dl ; X64-NEXT: movzbl %dl, %ecx -; X64-NEXT: cmpb $3, %al +; X64-NEXT: cmpb $4, %al ; X64-NEXT: movl $255, %eax -; X64-NEXT: cmovbel %ecx, %eax +; X64-NEXT: cmovbl %ecx, %eax ; X64-NEXT: shrb $4, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -126,9 +126,9 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; X86-NEXT: shrb $2, %al ; X86-NEXT: orb %cl, %al ; X86-NEXT: movzbl %al, %ecx -; X86-NEXT: cmpb $3, %ah +; X86-NEXT: cmpb $4, %ah ; X86-NEXT: movl $255, %eax -; X86-NEXT: cmovbel %ecx, %eax +; X86-NEXT: cmovbl %ecx, %eax ; X86-NEXT: shrb $4, %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl @@ -147,9 +147,9 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: shrq $32, %rax ; X64-NEXT: shrdl $2, %eax, %ecx -; X64-NEXT: cmpl $3, %eax +; X64-NEXT: cmpl $4, %eax ; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmoval %eax, %ecx +; X64-NEXT: cmovael %eax, %ecx ; X64-NEXT: movd %ecx, %xmm2 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3] ; X64-NEXT: movd %xmm3, %ecx @@ -159,8 +159,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: shrq $32, %rcx ; X64-NEXT: shrdl $2, %ecx, %edx -; X64-NEXT: cmpl $3, %ecx -; X64-NEXT: cmoval %eax, %edx +; X64-NEXT: cmpl $4, %ecx +; X64-NEXT: cmovael %eax, %edx ; X64-NEXT: movd %edx, %xmm3 ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; X64-NEXT: movd %xmm1, %ecx @@ -169,8 +169,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: shrq $32, %rcx ; X64-NEXT: shrdl $2, %ecx, %edx -; X64-NEXT: cmpl $3, %ecx -; X64-NEXT: cmoval %eax, %edx +; X64-NEXT: cmpl $4, %ecx +; X64-NEXT: cmovael %eax, %edx ; X64-NEXT: movd %edx, %xmm2 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] ; X64-NEXT: movd %xmm1, %ecx @@ -180,8 +180,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: shrq $32, %rcx ; X64-NEXT: shrdl $2, %ecx, %edx -; X64-NEXT: cmpl $3, %ecx -; X64-NEXT: cmoval %eax, %edx +; X64-NEXT: cmpl $4, %ecx +; X64-NEXT: cmovael %eax, %edx ; X64-NEXT: movd %edx, %xmm0 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] @@ -201,26 +201,26 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %esi ; X86-NEXT: shrdl $2, %edx, %esi -; X86-NEXT: cmpl $3, %edx +; X86-NEXT: cmpl $4, %edx ; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmoval %ecx, %esi +; X86-NEXT: cmovael %ecx, %esi ; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: shrdl $2, %edx, %ebp -; X86-NEXT: cmpl $3, %edx -; X86-NEXT: cmoval %ecx, %ebp +; X86-NEXT: cmpl $4, %edx +; X86-NEXT: cmovael %ecx, %ebp ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: shrdl $2, %edx, %ebx -; X86-NEXT: cmpl $3, %edx -; X86-NEXT: cmoval %ecx, %ebx +; X86-NEXT: cmpl $4, %edx +; X86-NEXT: cmovael %ecx, %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: shrdl $2, %edx, %eax -; X86-NEXT: cmpl $3, %edx -; X86-NEXT: cmoval %ecx, %eax +; X86-NEXT: cmpl $4, %edx +; X86-NEXT: cmovael %ecx, %eax ; X86-NEXT: movl %eax, 12(%edi) ; X86-NEXT: movl %ebx, 8(%edi) ; X86-NEXT: movl %ebp, 4(%edi) diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll index ab5371554576b..125fe41ecf805 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll @@ -66,8 +66,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; X86-NEXT: leal (%eax,%eax,2), %ecx ; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: andb $15, %al -; X86-NEXT: cmpb $3, %al -; X86-NEXT: seta %al +; X86-NEXT: cmpb $4, %al +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: test_urem_odd_setne: @@ -76,8 +76,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; X64-NEXT: leal (%rdi,%rdi,2), %eax ; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: andb $15, %al -; X64-NEXT: cmpb $3, %al -; X64-NEXT: seta %al +; X64-NEXT: cmpb $4, %al +; X64-NEXT: setae %al ; X64-NEXT: retq %urem = urem i4 %X, 5 %cmp = icmp ne i4 %urem, 0 @@ -89,16 +89,16 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind { ; X86: # %bb.0: ; X86-NEXT: imull $307, {{[0-9]+}}(%esp), %eax # imm = 0x133 ; X86-NEXT: andl $511, %eax # imm = 0x1FF -; X86-NEXT: cmpw $1, %ax -; X86-NEXT: seta %al +; X86-NEXT: cmpw $2, %ax +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: test_urem_negative_odd: ; X64: # %bb.0: ; X64-NEXT: imull $307, %edi, %eax # imm = 0x133 ; X64-NEXT: andl $511, %eax # imm = 0x1FF -; X64-NEXT: cmpw $1, %ax -; X64-NEXT: seta %al +; X64-NEXT: cmpw $2, %ax +; X64-NEXT: setae %al ; X64-NEXT: retq %urem = urem i9 %X, -5 %cmp = icmp ne i9 %urem, 0 @@ -115,18 +115,18 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; X86-NEXT: shrl %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: andl $2047, %eax # imm = 0x7FF -; X86-NEXT: cmpl $341, %eax # imm = 0x155 -; X86-NEXT: seta %al +; X86-NEXT: cmpl $342, %eax # imm = 0x156 +; X86-NEXT: setae %al ; X86-NEXT: imull $1463, {{[0-9]+}}(%esp), %ecx # imm = 0x5B7 ; X86-NEXT: addl $-1463, %ecx # imm = 0xFA49 ; X86-NEXT: andl $2047, %ecx # imm = 0x7FF -; X86-NEXT: cmpl $292, %ecx # imm = 0x124 -; X86-NEXT: seta %dl +; X86-NEXT: cmpl $293, %ecx # imm = 0x125 +; X86-NEXT: setae %dl ; X86-NEXT: imull $819, {{[0-9]+}}(%esp), %ecx # imm = 0x333 ; X86-NEXT: addl $-1638, %ecx # imm = 0xF99A ; X86-NEXT: andl $2047, %ecx # imm = 0x7FF -; X86-NEXT: cmpw $1, %cx -; X86-NEXT: seta %cl +; X86-NEXT: cmpw $2, %cx +; X86-NEXT: setae %cl ; X86-NEXT: retl ; ; SSE2-LABEL: test_urem_vec: diff --git a/llvm/test/CodeGen/X86/urem-seteq.ll b/llvm/test/CodeGen/X86/urem-seteq.ll index 21aed941b06a3..214a5162fd13a 100644 --- a/llvm/test/CodeGen/X86/urem-seteq.ll +++ b/llvm/test/CodeGen/X86/urem-seteq.ll @@ -107,8 +107,8 @@ define i16 @test_urem_even(i16 %X) nounwind { ; X86-NEXT: rorw %ax ; X86-NEXT: movzwl %ax, %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $4681, %ecx # imm = 0x1249 -; X86-NEXT: seta %al +; X86-NEXT: cmpl $4682, %ecx # imm = 0x124A +; X86-NEXT: setae %al ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; @@ -118,8 +118,8 @@ define i16 @test_urem_even(i16 %X) nounwind { ; X64-NEXT: rorw %ax ; X64-NEXT: movzwl %ax, %ecx ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $4681, %ecx # imm = 0x1249 -; X64-NEXT: seta %al +; X64-NEXT: cmpl $4682, %ecx # imm = 0x124A +; X64-NEXT: setae %al ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %urem = urem i16 %X, 14 @@ -212,16 +212,16 @@ define i32 @test_urem_odd_setne(i32 %X) nounwind { ; X86: # %bb.0: ; X86-NEXT: imull $-858993459, {{[0-9]+}}(%esp), %ecx # imm = 0xCCCCCCCD ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $858993459, %ecx # imm = 0x33333333 -; X86-NEXT: seta %al +; X86-NEXT: cmpl $858993460, %ecx # imm = 0x33333334 +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: test_urem_odd_setne: ; X64: # %bb.0: ; X64-NEXT: imull $-858993459, %edi, %ecx # imm = 0xCCCCCCCD ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $858993459, %ecx # imm = 0x33333333 -; X64-NEXT: seta %al +; X64-NEXT: cmpl $858993460, %ecx # imm = 0x33333334 +; X64-NEXT: setae %al ; X64-NEXT: retq %urem = urem i32 %X, 5 %cmp = icmp ne i32 %urem, 0 @@ -235,16 +235,16 @@ define i32 @test_urem_negative_odd(i32 %X) nounwind { ; X86: # %bb.0: ; X86-NEXT: imull $858993459, {{[0-9]+}}(%esp), %ecx # imm = 0x33333333 ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $1, %ecx -; X86-NEXT: seta %al +; X86-NEXT: cmpl $2, %ecx +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: test_urem_negative_odd: ; X64: # %bb.0: ; X64-NEXT: imull $858993459, %edi, %ecx # imm = 0x33333333 ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $1, %ecx -; X64-NEXT: seta %al +; X64-NEXT: cmpl $2, %ecx +; X64-NEXT: setae %al ; X64-NEXT: retq %urem = urem i32 %X, -5 %cmp = icmp ne i32 %urem, 0 @@ -257,8 +257,8 @@ define i32 @test_urem_negative_even(i32 %X) nounwind { ; X86-NEXT: imull $-920350135, {{[0-9]+}}(%esp), %ecx # imm = 0xC9249249 ; X86-NEXT: rorl %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $1, %ecx -; X86-NEXT: seta %al +; X86-NEXT: cmpl $2, %ecx +; X86-NEXT: setae %al ; X86-NEXT: retl ; ; X64-LABEL: test_urem_negative_even: @@ -266,8 +266,8 @@ define i32 @test_urem_negative_even(i32 %X) nounwind { ; X64-NEXT: imull $-920350135, %edi, %ecx # imm = 0xC9249249 ; X64-NEXT: rorl %ecx ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $1, %ecx -; X64-NEXT: seta %al +; X64-NEXT: cmpl $2, %ecx +; X64-NEXT: setae %al ; X64-NEXT: retq %urem = urem i32 %X, -14 %cmp = icmp ne i32 %urem, 0 diff --git a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll index 0eafd8d644dd1..b1a7a2485701a 100644 --- a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll +++ b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll @@ -50,9 +50,9 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) { ; CHECK-NEXT: shrl $16, %edx ; CHECK-NEXT: shldw $1, %cx, %dx ; CHECK-NEXT: sarl $16, %ecx -; CHECK-NEXT: cmpl $16383, %ecx # imm = 0x3FFF +; CHECK-NEXT: cmpl $16384, %ecx # imm = 0x4000 ; CHECK-NEXT: movl $32767, %r8d # imm = 0x7FFF -; CHECK-NEXT: cmovgl %r8d, %edx +; CHECK-NEXT: cmovgel %r8d, %edx ; CHECK-NEXT: cmpl $-16384, %ecx # imm = 0xC000 ; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000 ; CHECK-NEXT: cmovll %ecx, %edx @@ -63,8 +63,8 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) { ; CHECK-NEXT: leal (%rdi,%rdi), %esi ; CHECK-NEXT: shrdw $15, %ax, %si ; CHECK-NEXT: sarl $15, %edi -; CHECK-NEXT: cmpl $16383, %edi # imm = 0x3FFF -; CHECK-NEXT: cmovgl %r8d, %esi +; CHECK-NEXT: cmpl $16384, %edi # imm = 0x4000 +; CHECK-NEXT: cmovgel %r8d, %esi ; CHECK-NEXT: cmpl $-16384, %edi # imm = 0xC000 ; CHECK-NEXT: cmovll %ecx, %esi ; CHECK-NEXT: movd %xmm0, %eax @@ -73,8 +73,8 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) { ; CHECK-NEXT: shrl $16, %edi ; CHECK-NEXT: shldw $1, %ax, %di ; CHECK-NEXT: sarl $16, %eax -; CHECK-NEXT: cmpl $16383, %eax # imm = 0x3FFF -; CHECK-NEXT: cmovgl %r8d, %edi +; CHECK-NEXT: cmpl $16384, %eax # imm = 0x4000 +; CHECK-NEXT: cmovgel %r8d, %edi ; CHECK-NEXT: cmpl $-16384, %eax # imm = 0xC000 ; CHECK-NEXT: cmovll %ecx, %edi ; CHECK-NEXT: movzwl %di, %eax @@ -88,8 +88,8 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) { ; CHECK-NEXT: leal (,%rax,4), %esi ; CHECK-NEXT: shrdw $15, %dx, %si ; CHECK-NEXT: sarl $14, %eax -; CHECK-NEXT: cmpl $16383, %eax # imm = 0x3FFF -; CHECK-NEXT: cmovgl %r8d, %esi +; CHECK-NEXT: cmpl $16384, %eax # imm = 0x4000 +; CHECK-NEXT: cmovgel %r8d, %esi ; CHECK-NEXT: cmpl $-16384, %eax # imm = 0xC000 ; CHECK-NEXT: cmovll %ecx, %esi ; CHECK-NEXT: pinsrw $3, %esi, %xmm1 @@ -109,23 +109,23 @@ define <4 x i16> @umulfixsat(<4 x i16> %a) { ; CHECK-NEXT: shrl $16, %edx ; CHECK-NEXT: movl %edx, %ecx ; CHECK-NEXT: shldw $1, %ax, %cx -; CHECK-NEXT: cmpl $32767, %edx # imm = 0x7FFF +; CHECK-NEXT: cmpl $32768, %edx # imm = 0x8000 ; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF -; CHECK-NEXT: cmoval %eax, %ecx +; CHECK-NEXT: cmovael %eax, %ecx ; CHECK-NEXT: pextrw $1, %xmm0, %edx ; CHECK-NEXT: addl %edx, %edx ; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: shrl $16, %esi ; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: shldw $1, %dx, %di -; CHECK-NEXT: cmpl $32767, %esi # imm = 0x7FFF -; CHECK-NEXT: cmoval %eax, %edi +; CHECK-NEXT: cmpl $32768, %esi # imm = 0x8000 +; CHECK-NEXT: cmovael %eax, %edi ; CHECK-NEXT: movd %xmm0, %edx ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: shldw $1, %dx, %si -; CHECK-NEXT: movl $32767, %edx # imm = 0x7FFF +; CHECK-NEXT: movl $32768, %edx # imm = 0x8000 ; CHECK-NEXT: negl %edx -; CHECK-NEXT: cmoval %eax, %esi +; CHECK-NEXT: cmovael %eax, %esi ; CHECK-NEXT: movzwl %si, %edx ; CHECK-NEXT: movd %edx, %xmm1 ; CHECK-NEXT: pinsrw $1, %edi, %xmm1 @@ -136,8 +136,8 @@ define <4 x i16> @umulfixsat(<4 x i16> %a) { ; CHECK-NEXT: shrl $16, %edx ; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: shldw $1, %cx, %si -; CHECK-NEXT: cmpl $32767, %edx # imm = 0x7FFF -; CHECK-NEXT: cmoval %eax, %esi +; CHECK-NEXT: cmpl $32768, %edx # imm = 0x8000 +; CHECK-NEXT: cmovael %eax, %esi ; CHECK-NEXT: pinsrw $3, %esi, %xmm1 ; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll index 84096e3b6805d..95b93921f0253 100644 --- a/llvm/test/CodeGen/X86/zext-sext.ll +++ b/llvm/test/CodeGen/X86/zext-sext.ll @@ -23,10 +23,10 @@ define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind { ; CHECK-NEXT: cmpl $-8608074, %eax # imm = 0xFF7CA6B6 ; CHECK-NEXT: movslq %eax, %rdi ; CHECK-NEXT: setl %dl -; CHECK-NEXT: cmpl $2138875573, %eax # imm = 0x7F7CA6B5 +; CHECK-NEXT: cmpl $2138875574, %eax # imm = 0x7F7CA6B6 ; CHECK-NEXT: movq %rdi, %r8 ; CHECK-NEXT: leal -1(%rdx,%rdx), %edx -; CHECK-NEXT: cmovlel %edx, %esi +; CHECK-NEXT: cmovll %edx, %esi ; CHECK-NEXT: subq %rax, %r8 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $1, %esi From 9a9e6189d7973c16535233b5efcb38d13a651c56 Mon Sep 17 00:00:00 2001 From: zhijian Date: Wed, 30 Jun 2021 13:56:37 -0400 Subject: [PATCH 321/619] [AIX][XCOFF][BUG-Fixed] need to switch back to text section after emit a dumy eh structure Summary: in the patch https://reviews.llvm.org/D103651 [AIX][XCOFF] generate eh_info when vector registers are saved according to the traceback table. when generate eh_info, it switch to other section, when it done, it need to switch back to text section again. Reviewers: Jason Liu Differential Revision: https://reviews.llvm.org/105195 --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 1 + .../CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll | 2 ++ 2 files changed, 3 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index b64a1ac41678c..1b4b8c168b8a5 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1928,6 +1928,7 @@ void PPCAIXAsmPrinter::emitFunctionBodyEnd() { OutStreamer->emitIntValue(0, PointerSize); OutStreamer->emitIntValue(0, PointerSize); + OutStreamer->SwitchSection(MF->getSection()); } } diff --git a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll index d7ee82ea3a71f..f1c399409196b 100644 --- a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll +++ b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable-clobber-register.ll @@ -90,6 +90,8 @@ entry: ; COMMON-NEXT: .align 2 ; COMMON-NEXT: .vbyte 4, 0 ; COMMON-NEXT: .vbyte 4, 0 +; CHECK-ASM-NEXT: .csect .text[PR],2 +; CHECK-FUNC-NEXT: .csect .foov[PR],2 ; COMMON-NEXT: # -- End function ; COMMON: .toc ; COMMON: L..C2: From 4339d3bd84a9bc1b5ecc58ddfc935d53e9de4fd4 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 30 Jun 2021 14:03:14 -0400 Subject: [PATCH 322/619] Fix shared build. --- llvm/unittests/Passes/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Passes/CMakeLists.txt b/llvm/unittests/Passes/CMakeLists.txt index 075b47c8d07e6..7c09053c7ab16 100644 --- a/llvm/unittests/Passes/CMakeLists.txt +++ b/llvm/unittests/Passes/CMakeLists.txt @@ -17,7 +17,7 @@ if (NOT WIN32) PluginsTest.cpp ) export_executable_symbols_for_plugins(PluginsTests) - target_link_libraries(PluginsTests PRIVATE LLVMTestingSupport) + target_link_libraries(PluginsTests PRIVATE LLVMAsmParser LLVMTestingSupport) set(LLVM_LINK_COMPONENTS) foreach(PLUGIN TestPlugin DoublerPlugin) From cd76f43b4995cf01bae9f97a54ca0e79c2355032 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 30 Jun 2021 19:19:03 +0100 Subject: [PATCH 323/619] [ARM] Set the immediate cost of GEP operands to 0 This prevents constant gep operands from being hoisted by the Constant Hoisting pass, leaving them to CodegenPrepare which can usually do a better job at splitting large offsets. This can, in general, improve performance and decrease codesize, especially for v6m where many constants have a high cost. Differential Revision: https://reviews.llvm.org/D104877 --- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 5 ++ llvm/test/CodeGen/ARM/gep-imm.ll | 74 +++++++++---------- .../ConstantHoisting/ARM/gep-struct-index.ll | 9 +-- 3 files changed, 42 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 6c393405a1859..7410d8d1eabe4 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -337,6 +337,11 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, Idx == 1) return 0; + // Leave any gep offsets for the CodeGenPrepare, which will do a better job at + // splitting any large offsets. + if (Opcode == Instruction::GetElementPtr && Idx != 0) + return 0; + if (Opcode == Instruction::And) { // UXTB/UXTH if (Imm == 255 || Imm == 65535) diff --git a/llvm/test/CodeGen/ARM/gep-imm.ll b/llvm/test/CodeGen/ARM/gep-imm.ll index 5358261426b47..20218725f8a40 100644 --- a/llvm/test/CodeGen/ARM/gep-imm.ll +++ b/llvm/test/CodeGen/ARM/gep-imm.ll @@ -37,16 +37,15 @@ entry: define void @large(i32 %a, i32 %b, i32 *%c, i32* %d) { ; CHECKV6M-LABEL: large: ; CHECKV6M: @ %bb.0: @ %entry -; CHECKV6M-NEXT: .save {r4, r5, r7, lr} -; CHECKV6M-NEXT: push {r4, r5, r7, lr} -; CHECKV6M-NEXT: movs r4, #125 -; CHECKV6M-NEXT: lsls r4, r4, #4 -; CHECKV6M-NEXT: lsls r4, r4, #2 -; CHECKV6M-NEXT: str r0, [r3, r4] -; CHECKV6M-NEXT: ldr r5, .LCPI1_0 -; CHECKV6M-NEXT: str r1, [r3, r5] -; CHECKV6M-NEXT: str r0, [r2, r4] -; CHECKV6M-NEXT: pop {r4, r5, r7, pc} +; CHECKV6M-NEXT: .save {r4, lr} +; CHECKV6M-NEXT: push {r4, lr} +; CHECKV6M-NEXT: ldr r4, .LCPI1_0 +; CHECKV6M-NEXT: str r1, [r3, r4] +; CHECKV6M-NEXT: movs r1, #125 +; CHECKV6M-NEXT: lsls r1, r1, #6 +; CHECKV6M-NEXT: str r0, [r3, r1] +; CHECKV6M-NEXT: str r0, [r2, r1] +; CHECKV6M-NEXT: pop {r4, pc} ; CHECKV6M-NEXT: .p2align 2 ; CHECKV6M-NEXT: @ %bb.1: ; CHECKV6M-NEXT: .LCPI1_0: @@ -82,47 +81,42 @@ entry: define void @huge(i32 %a, i32 %b, i32 *%c, i32* %d) { ; CHECKV6M-LABEL: huge: ; CHECKV6M: @ %bb.0: @ %entry -; CHECKV6M-NEXT: .save {r4, r5, r7, lr} -; CHECKV6M-NEXT: push {r4, r5, r7, lr} +; CHECKV6M-NEXT: .save {r4, lr} +; CHECKV6M-NEXT: push {r4, lr} ; CHECKV6M-NEXT: ldr r4, .LCPI2_0 -; CHECKV6M-NEXT: lsls r4, r4, #2 -; CHECKV6M-NEXT: str r0, [r3, r4] -; CHECKV6M-NEXT: ldr r5, .LCPI2_1 -; CHECKV6M-NEXT: str r1, [r3, r5] -; CHECKV6M-NEXT: str r0, [r2, r4] -; CHECKV6M-NEXT: pop {r4, r5, r7, pc} +; CHECKV6M-NEXT: str r1, [r3, r4] +; CHECKV6M-NEXT: ldr r1, .LCPI2_1 +; CHECKV6M-NEXT: str r0, [r3, r1] +; CHECKV6M-NEXT: str r0, [r2, r1] +; CHECKV6M-NEXT: pop {r4, pc} ; CHECKV6M-NEXT: .p2align 2 ; CHECKV6M-NEXT: @ %bb.1: ; CHECKV6M-NEXT: .LCPI2_0: -; CHECKV6M-NEXT: .long 200000 @ 0x30d40 -; CHECKV6M-NEXT: .LCPI2_1: ; CHECKV6M-NEXT: .long 1200000 @ 0x124f80 +; CHECKV6M-NEXT: .LCPI2_1: +; CHECKV6M-NEXT: .long 800000 @ 0xc3500 ; ; CHECKV7M-LABEL: huge: ; CHECKV7M: @ %bb.0: @ %entry -; CHECKV7M-NEXT: .save {r7, lr} -; CHECKV7M-NEXT: push {r7, lr} -; CHECKV7M-NEXT: movw r12, #3392 -; CHECKV7M-NEXT: movw lr, #20352 -; CHECKV7M-NEXT: movt r12, #3 -; CHECKV7M-NEXT: movt lr, #18 -; CHECKV7M-NEXT: str.w r0, [r3, r12, lsl #2] -; CHECKV7M-NEXT: str.w r1, [r3, lr] -; CHECKV7M-NEXT: str.w r0, [r2, r12, lsl #2] -; CHECKV7M-NEXT: pop {r7, pc} +; CHECKV7M-NEXT: movw r12, #20352 +; CHECKV7M-NEXT: movt r12, #18 +; CHECKV7M-NEXT: str.w r1, [r3, r12] +; CHECKV7M-NEXT: movw r1, #13568 +; CHECKV7M-NEXT: movt r1, #12 +; CHECKV7M-NEXT: str r0, [r3, r1] +; CHECKV7M-NEXT: str r0, [r2, r1] +; CHECKV7M-NEXT: bx lr ; ; CHECKV7A-LABEL: huge: ; CHECKV7A: @ %bb.0: @ %entry -; CHECKV7A-NEXT: .save {r7, lr} -; CHECKV7A-NEXT: push {r7, lr} -; CHECKV7A-NEXT: movw r12, #3392 -; CHECKV7A-NEXT: movw lr, #20352 -; CHECKV7A-NEXT: movt r12, #3 -; CHECKV7A-NEXT: movt lr, #18 -; CHECKV7A-NEXT: str.w r0, [r3, r12, lsl #2] -; CHECKV7A-NEXT: str.w r1, [r3, lr] -; CHECKV7A-NEXT: str.w r0, [r2, r12, lsl #2] -; CHECKV7A-NEXT: pop {r7, pc} +; CHECKV7A-NEXT: movw r12, #20352 +; CHECKV7A-NEXT: movt r12, #18 +; CHECKV7A-NEXT: str.w r1, [r3, r12] +; CHECKV7A-NEXT: movw r1, #13568 +; CHECKV7A-NEXT: movt r1, #12 +; CHECKV7A-NEXT: str r0, [r3, r1] +; CHECKV7A-NEXT: str r0, [r2, r1] +; CHECKV7A-NEXT: bx lr entry: %arrayidx = getelementptr inbounds i32, i32* %d, i32 200000 store i32 %a, i32* %arrayidx, align 4 diff --git a/llvm/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll b/llvm/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll index 45f4500b37c17..70a01a660b626 100644 --- a/llvm/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll +++ b/llvm/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll @@ -19,14 +19,11 @@ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } -; Indices for GEPs that index into a struct type should not be hoisted. +; Indices for GEPs should not be hoisted. define i32 @test1(%T* %P) nounwind { ; CHECK-LABEL: @test1 -; CHECK: %const = bitcast i32 256 to i32 -; CHECK: %addr1 = getelementptr %T, %T* %P, i32 %const, i32 256 -; CHECK: %addr2 = getelementptr %T, %T* %P, i32 %const, i32 256 -; The first index into the pointer is hoisted, but the second one into the -; struct isn't. +; CHECK: %addr1 = getelementptr %T, %T* %P, i32 256, i32 256 +; CHECK: %addr2 = getelementptr %T, %T* %P, i32 256, i32 256 %addr1 = getelementptr %T, %T* %P, i32 256, i32 256 %tmp1 = load i32, i32* %addr1 %addr2 = getelementptr %T, %T* %P, i32 256, i32 256 From 9b0ddc2662addaa563b632c577996e515e1a7802 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Wed, 30 Jun 2021 11:20:49 -0700 Subject: [PATCH 324/619] [clang][Fuchsia] Remove relative-vtables multilibs As of D102374, relative vtables is enabled on Fuchsia by default, so we don't need any of the RV multilibs. Differential revision: https://reviews.llvm.org/D105145 --- clang/cmake/caches/Fuchsia-stage2.cmake | 28 +------------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index eb001ef6579ce..c031465002cca 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -222,41 +222,15 @@ if(FUCHSIA_SDK) set(RUNTIMES_${target}+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "") set(RUNTIMES_${target}+asan+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables_LLVM_BUILD_COMPILER_RT OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables_CMAKE_CXX_FLAGS "${FUCHSIA_${target}_COMPILER_FLAGS} -Xclang -fexperimental-relative-c++-abi-vtables" CACHE STRING "") - - set(RUNTIMES_${target}+relative-vtables+asan_LLVM_BUILD_COMPILER_RT OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+asan_LLVM_USE_SANITIZER "Address" CACHE STRING "") - set(RUNTIMES_${target}+relative-vtables+asan_LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+asan_LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+asan_CMAKE_CXX_FLAGS "${FUCHSIA_${target}_COMPILER_FLAGS} -Xclang -fexperimental-relative-c++-abi-vtables" CACHE STRING "") - - set(RUNTIMES_${target}+relative-vtables+noexcept_LLVM_BUILD_COMPILER_RT OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+noexcept_CMAKE_CXX_FLAGS "${FUCHSIA_${target}_COMPILER_FLAGS} -Xclang -fexperimental-relative-c++-abi-vtables" CACHE STRING "") - set(RUNTIMES_${target}+relative-vtables+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") - - set(RUNTIMES_${target}+relative-vtables+asan+noexcept_LLVM_BUILD_COMPILER_RT OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+asan+noexcept_LLVM_USE_SANITIZER "Address" CACHE STRING "") - set(RUNTIMES_${target}+relative-vtables+asan+noexcept_LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+asan+noexcept_LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+asan+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") - set(RUNTIMES_${target}+relative-vtables+asan+noexcept_CMAKE_CXX_FLAGS "${FUCHSIA_${target}_COMPILER_FLAGS} -Xclang -fexperimental-relative-c++-abi-vtables" CACHE STRING "") - # Use .build-id link. list(APPEND RUNTIME_BUILD_ID_LINK "${target}") endforeach() - set(LLVM_RUNTIME_MULTILIBS "asan;noexcept;compat;asan+noexcept;relative-vtables;relative-vtables+noexcept;relative-vtables+asan;relative-vtables+asan+noexcept" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIBS "asan;noexcept;compat;asan+noexcept" CACHE STRING "") set(LLVM_RUNTIME_MULTILIB_asan_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") set(LLVM_RUNTIME_MULTILIB_noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") set(LLVM_RUNTIME_MULTILIB_compat_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") set(LLVM_RUNTIME_MULTILIB_asan+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_relative-vtables_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_relative-vtables+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_relative-vtables+asan_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_relative-vtables+asan+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") endif() set(LLVM_BUILTIN_TARGETS "${BUILTIN_TARGETS}" CACHE STRING "") From 03051f7ac8a3e2eda44b8280290c90863a064f0e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 30 Jun 2021 11:35:50 -0700 Subject: [PATCH 325/619] [ELF] Preserve section order within an INSERT AFTER command For ``` SECTIONS { text.0 : {} text.1 : {} text.2 : {} } INSERT AFTER .data; ``` the current order is `.data text.2 text.1 text.0`. It makes more sense to preserve the specified order and thus improve compatibility with GNU ld. For ``` SECTIONS { text.0 : {} } INSERT AFTER .data; SECTIONS { text.3 : {} } INSERT AFTER .data; ``` GNU ld somehow collects sections with `INSERT AFTER .data` together (IMO inconsistent) but I think it makes more sense to execute the commands in order and get `.data text.3 text.0` instead. Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D105158 --- lld/ELF/LinkerScript.cpp | 28 ++++++----- lld/ELF/LinkerScript.h | 2 +- lld/ELF/ScriptParser.cpp | 5 +- lld/test/ELF/linkerscript/insert-multi.test | 53 +++++++++++++++++++++ 4 files changed, 74 insertions(+), 14 deletions(-) create mode 100644 lld/test/ELF/linkerscript/insert-multi.test diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 6deb9a98cc4bb..a938984ad945e 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -251,30 +251,34 @@ getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) { // Process INSERT [AFTER|BEFORE] commands. For each command, we move the // specified output section to the designated place. void LinkerScript::processInsertCommands() { + std::vector moves; for (const InsertCommand &cmd : insertCommands) { - // If cmd.os is empty, it may have been discarded by - // adjustSectionsBeforeSorting(). We do not handle such output sections. - auto from = llvm::find_if(sectionCommands, [&](BaseCommand *base) { - return isa(base) && - cast(base)->name == cmd.name; - }); - if (from == sectionCommands.end()) - continue; - OutputSection *osec = cast(*from); - sectionCommands.erase(from); + for (StringRef name : cmd.names) { + // If base is empty, it may have been discarded by + // adjustSectionsBeforeSorting(). We do not handle such output sections. + auto from = llvm::find_if(sectionCommands, [&](BaseCommand *base) { + return isa(base) && + cast(base)->name == name; + }); + if (from == sectionCommands.end()) + continue; + moves.push_back(cast(*from)); + sectionCommands.erase(from); + } auto insertPos = llvm::find_if(sectionCommands, [&cmd](BaseCommand *base) { auto *to = dyn_cast(base); return to != nullptr && to->name == cmd.where; }); if (insertPos == sectionCommands.end()) { - error("unable to insert " + osec->name + + error("unable to insert " + cmd.names[0] + (cmd.isAfter ? " after " : " before ") + cmd.where); } else { if (cmd.isAfter) ++insertPos; - sectionCommands.insert(insertPos, osec); + sectionCommands.insert(insertPos, moves.begin(), moves.end()); } + moves.clear(); } } diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 14cd145af5613..0592c52acb84d 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -227,7 +227,7 @@ struct ByteCommand : BaseCommand { }; struct InsertCommand { - StringRef name; + std::vector names; bool isAfter; StringRef where; }; diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 3d0d720014f25..4726dd1c6a2c9 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -596,9 +596,12 @@ void ScriptParser::readSections() { else if (!consume("BEFORE")) setError("expected AFTER/BEFORE, but got '" + next() + "'"); StringRef where = next(); + std::vector names; for (BaseCommand *cmd : v) if (auto *os = dyn_cast(cmd)) - script->insertCommands.push_back({os->name, isAfter, where}); + names.push_back(os->name); + if (!names.empty()) + script->insertCommands.push_back({std::move(names), isAfter, where}); } void ScriptParser::readTarget() { diff --git a/lld/test/ELF/linkerscript/insert-multi.test b/lld/test/ELF/linkerscript/insert-multi.test new file mode 100644 index 0000000000000..88c065371de0e --- /dev/null +++ b/lld/test/ELF/linkerscript/insert-multi.test @@ -0,0 +1,53 @@ +# REQUIRES: x86 +## Sections in an INSERT command are in a unit. Their order is preserved. + +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o +# RUN: ld.lld -T %t/a.lds %t/a.o -o %t1 +# RUN: llvm-readelf -S -l %t1 | FileCheck %s + +# CHECK: Name +# CHECK-NEXT: NULL +# CHECK-NEXT: text.3 +# CHECK-NEXT: text.4 +# CHECK-NEXT: text.5 +# CHECK-NEXT: .text +# CHECK-NEXT: .data +# CHECK-NEXT: text.6 +# CHECK-NEXT: text.7 +# CHECK-NEXT: text.8 +# CHECK-NEXT: text.0 +# CHECK-NEXT: text.1 +# CHECK-NEXT: text.2 + +#--- a.lds +SECTIONS { + text.0 : {} + text.1 : {} + text.2 : {} +} INSERT AFTER .data; + +SECTIONS { + text.3 : {} + text.4 : {} + text.5 : {} +} INSERT BEFORE .text; + +SECTIONS { + text.6 : {} + text.7 : {} + text.8 : {} +} INSERT AFTER .data; + +#--- a.s +.text; nop +.section text.0,"ax"; nop +.section text.1,"ax"; nop +.section text.2,"ax"; nop +.section text.3,"ax"; nop +.section text.4,"ax"; nop +.section text.5,"ax"; nop +.section text.6,"ax"; nop +.section text.7,"ax"; nop +.section text.8,"ax"; nop +.data; .byte 0 From 2c4f5690ab5e435691aafe554725dbbd521b3754 Mon Sep 17 00:00:00 2001 From: Ahmed Taei Date: Tue, 22 Jun 2021 12:50:10 -0700 Subject: [PATCH 326/619] Add linalg.batch_matvec named op Similarly to batch_mat vec outer most dim is a batching dim and this op does |b| matrix-vector-products : C[b, i] = sum_k(A[b, i, k] * B[b, k]) Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D104739 --- .../Linalg/IR/LinalgNamedStructuredOps.yaml | 62 +++++++++++++++++++ .../linalg/opdsl/ops/core_named_ops.py | 15 +++++ .../Dialect/Linalg/generalize-named-ops.mlir | 25 ++++++++ 3 files changed, 102 insertions(+) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index e536b44fe6fb2..8781e16bba34e 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -247,6 +247,68 @@ structured_op: !LinalgStructuredOpConfig - !ScalarExpression scalar_arg: A --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: batch_matvec + cpp_class_name: BatchMatvecOp + doc: |- + Performs a batched matrix-vector multiplication. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + implements: + - LinalgContractionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: A + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2] -> (s0, s1, s2)> + - !LinalgOperandDefConfig + name: B + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)> + - !LinalgOperandDefConfig + name: C + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1, d2)> + - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)> + - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)> + iterator_types: + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: C + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: C + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: A + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: B +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: dot cpp_class_name: DotOp diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index 5867109279aa4..561cd2e7d08db 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -66,6 +66,21 @@ def vecmat( x[D.n] += cast(U, y[D.m]) * cast(U, A[D.m, D.n]) +@linalg_structured_op +def batch_matvec( + A=TensorDef(T1, Batch, S.M, S.K), + B=TensorDef(T2, Batch, S.K), + C=TensorDef(U, Batch, S.M, output=True)): + """Performs a batched matrix-vector multiplication. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + domain(D.b, D.m, D.k) + implements(ContractionOpInterface) + C[D.b, D.m] += cast(U, A[D.b, D.m, D.k]) * cast(U, B[D.b, D.k]) + + @linalg_structured_op def dot( A=TensorDef(T1, S.M), B=TensorDef(T2, S.M), C=TensorDef(U, output=True)): diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir index 412309a0f7434..405c7b156da6b 100644 --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -490,3 +490,28 @@ func @generalize_fill(%output: memref, %value : f32) { // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32) // CHECK-NEXT: linalg.yield %[[BBARG0]] : f32 + +// ----- + +func @generalize_batch_matm_vec(%lhs : memref, %rhs: memref, %out: memref) { + linalg.batch_matvec ins(%lhs, %rhs: memref, memref) + outs(%out: memref) + return +} +// CHECK: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> +// CHECK: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)> + +// CHECK: @generalize_batch_matm_vec + +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) +// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK: ^{{.+}}(%[[BBARG0:.+]]: i8, %[[BBARG1:.+]]: i8, %[[BBARG2:.+]]: f32) +// CHECK: %[[BBARG0_F32:.+]] = sitofp %[[BBARG0]] : i8 to f32 +// CHECK: %[[BBARG1_F32:.+]] = sitofp %[[BBARG1]] : i8 to f32 +// CHECK: %[[MUL:.+]] = mulf %[[BBARG0_F32]], %[[BBARG1_F32]] +// CHECK: %[[ADD:.+]] = addf %[[BBARG2]], %[[MUL]] +// CHECK: linalg.yield %[[ADD]] : f32 From 822b92aae439c4ba2946980c8a27bd2c8a62d90c Mon Sep 17 00:00:00 2001 From: Xun Li Date: Wed, 30 Jun 2021 11:38:14 -0700 Subject: [PATCH 327/619] [Coroutines] Add the newly generated SCCs back to the CGSCC work queue after CoroSplit actually happened Relevant discussion can be found at: https://lists.llvm.org/pipermail/llvm-dev/2021-January/148197.html In the existing design, An SCC that contains a coroutine will go through the folloing passes: Inliner -> CoroSplitPass (fake) -> FunctionSimplificationPipeline -> Inliner -> CoroSplitPass (real) -> FunctionSimplificationPipeline The first CoroSplitPass doesn't do anything other than putting the SCC back to the queue so that the entire pipeline can repeat. As you can see, we run Inliner twice on the SCC consecutively without doing any real split, which is unnecessary and likely unintended. What we really wanted is this: Inliner -> FunctionSimplificationPipeline -> CoroSplitPass -> FunctionSimplificationPipeline (note that we don't really need to run Inliner again on the ramp function after split). Hence the way we do it here is to move CoroSplitPass to the end of the CGSCC pipeline, make it once for real, insert the newly generated SCCs (the clones) back to the pipeline so that they can be optimized, and also add a function simplification pipeline after CoroSplit to optimize the post-split ramp function. This approach also conforms to how the new pass manager works instead of relying on an adhoc post split cleanup, making it ready for full switch to new pass manager eventually. By looking at some of the changes to the tests, we can already observe that this changes allows for more optimizations applied to coroutines. Reviewed By: aeubanks, ChuanqiXu Differential Revision: https://reviews.llvm.org/D95807 --- .../CodeGenCoroutines/coro-newpm-pipeline.cpp | 8 +- llvm/lib/Passes/PassBuilder.cpp | 6 +- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 34 ++--- llvm/test/Transforms/Coroutines/ArgAddr.ll | 2 +- .../Coroutines/coro-alloc-with-param-O0.ll | 8 +- .../Coroutines/coro-alloc-with-param-O2.ll | 2 +- .../Transforms/Coroutines/coro-alloca-01.ll | 2 +- .../Transforms/Coroutines/coro-alloca-02.ll | 2 +- .../Transforms/Coroutines/coro-alloca-03.ll | 2 +- .../Transforms/Coroutines/coro-alloca-04.ll | 4 +- .../Transforms/Coroutines/coro-alloca-05.ll | 8 +- .../Transforms/Coroutines/coro-alloca-06.ll | 2 +- .../Transforms/Coroutines/coro-alloca-07.ll | 2 +- .../Transforms/Coroutines/coro-alloca-08.ll | 2 +- llvm/test/Transforms/Coroutines/coro-async.ll | 2 +- .../Transforms/Coroutines/coro-byval-param.ll | 2 +- .../Coroutines/coro-catchswitch-cleanuppad.ll | 5 +- .../Transforms/Coroutines/coro-catchswitch.ll | 2 +- llvm/test/Transforms/Coroutines/coro-debug.ll | 2 +- .../Coroutines/coro-eh-aware-edge-split-00.ll | 2 +- .../Coroutines/coro-eh-aware-edge-split-01.ll | 2 +- .../Coroutines/coro-eh-aware-edge-split-02.ll | 2 +- .../Coroutines/coro-frame-arrayalloca.ll | 2 +- .../Coroutines/coro-frame-reuse-alloca-00.ll | 2 +- .../Coroutines/coro-frame-reuse-alloca-01.ll | 2 +- .../Coroutines/coro-frame-reuse-alloca-02.ll | 2 +- .../Coroutines/coro-frame-reuse-alloca-03.ll | 2 +- .../Coroutines/coro-frame-reuse-alloca-04.ll | 2 +- .../Coroutines/coro-frame-reuse-alloca-05.ll | 2 +- .../Coroutines/coro-frame-unreachable.ll | 2 +- llvm/test/Transforms/Coroutines/coro-frame.ll | 2 +- .../Transforms/Coroutines/coro-materialize.ll | 2 +- .../Transforms/Coroutines/coro-padding.ll | 2 +- .../Transforms/Coroutines/coro-param-copy.ll | 2 +- .../Coroutines/coro-retcon-alloca.ll | 22 +-- .../Coroutines/coro-retcon-frame.ll | 2 +- .../Coroutines/coro-retcon-once-value.ll | 16 +-- .../Coroutines/coro-retcon-once-value2.ll | 68 ++++----- .../Coroutines/coro-retcon-resume-values.ll | 48 +++++-- .../Coroutines/coro-retcon-resume-values2.ll | 129 +++++++++--------- .../Coroutines/coro-retcon-unreachable.ll | 15 +- .../Coroutines/coro-retcon-value.ll | 11 +- .../test/Transforms/Coroutines/coro-retcon.ll | 80 +++++------ .../Coroutines/coro-spill-after-phi.ll | 40 ++++-- .../Coroutines/coro-spill-corobegin.ll | 2 +- .../coro-spill-defs-before-corobegin.ll | 6 +- .../Coroutines/coro-spill-promise.ll | 2 +- .../Transforms/Coroutines/coro-split-00.ll | 2 +- .../Transforms/Coroutines/coro-split-02.ll | 3 +- .../Transforms/Coroutines/coro-split-alloc.ll | 2 +- .../Transforms/Coroutines/coro-split-dbg.ll | 2 +- .../Transforms/Coroutines/coro-split-eh-00.ll | 11 +- .../Transforms/Coroutines/coro-split-eh-01.ll | 3 +- .../Coroutines/coro-split-hidden.ll | 2 +- .../Coroutines/coro-split-musttail.ll | 2 +- .../Coroutines/coro-split-musttail1.ll | 2 +- .../Coroutines/coro-split-musttail2.ll | 2 +- .../Coroutines/coro-split-musttail3.ll | 2 +- .../Coroutines/coro-split-recursive.ll | 2 +- .../Coroutines/coro-split-sink-lifetime-01.ll | 3 +- .../Coroutines/coro-split-sink-lifetime-02.ll | 4 +- .../Coroutines/coro-split-sink-lifetime-03.ll | 2 +- .../Coroutines/coro-split-sink-lifetime-04.ll | 2 +- .../Transforms/Coroutines/coro-swifterror.ll | 4 +- .../Transforms/Coroutines/coro-zero-alloca.ll | 2 +- llvm/test/Transforms/Coroutines/no-suspend.ll | 2 +- .../Transforms/Coroutines/restart-trigger.ll | 7 +- llvm/test/Transforms/Coroutines/smoketest.ll | 8 +- 68 files changed, 322 insertions(+), 315 deletions(-) diff --git a/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp b/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp index 91e0fb3042b9d..869e98ecdb9ec 100644 --- a/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp +++ b/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp @@ -10,12 +10,6 @@ // // CHECK-ALL: Running pass:{{.*}}CoroEarlyPass // -// The first coro-split pass enqueues a second run of the entire CGSCC pipeline. -// CHECK-ALL: Running pass: CoroSplitPass on (_Z3foov) -// CHECK-OPT: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}} -// -// The second coro-split pass splits coroutine 'foo' into funclets -// 'foo.resume', 'foo.destroy', and 'foo.cleanup'. // CHECK-ALL: Running pass: CoroSplitPass on (_Z3foov) // CHECK-OPT: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}} // @@ -27,7 +21,7 @@ namespace experimental { struct handle {}; struct awaitable { - bool await_ready() noexcept { return true; } + bool await_ready() noexcept { return false; } void await_suspend(handle) noexcept {} bool await_resume() noexcept { return true; } }; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 2db8b451bf16d..229fd453c8967 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1002,9 +1002,6 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level, if (AttributorRun & AttributorRunOption::CGSCC) MainCGPipeline.addPass(AttributorCGSCCPass()); - if (PTO.Coroutines) - MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); - // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); @@ -1026,6 +1023,9 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level, MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( buildFunctionSimplificationPipeline(Level, Phase))); + if (PTO.Coroutines) + MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); + return MIWP; } diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 545b74e01126c..e4bf5e3f4d537 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1134,17 +1134,6 @@ static void postSplitCleanup(Function &F) { // pass to FPM below because it will also verify all the global data. if (verifyFunction(F, &errs())) report_fatal_error("Broken function"); - - legacy::FunctionPassManager FPM(F.getParent()); - - FPM.add(createSCCPPass()); - FPM.add(createCFGSimplificationPass()); - FPM.add(createEarlyCSEPass()); - FPM.add(createCFGSimplificationPass()); - - FPM.doInitialization(); - FPM.run(F); - FPM.doFinalization(); } // Assuming we arrived at the block NewBlock from Prev instruction, store @@ -2119,28 +2108,21 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, // Split all the coroutines. for (LazyCallGraph::Node *N : Coroutines) { Function &F = N->getFunction(); - Attribute Attr = F.getFnAttribute(CORO_PRESPLIT_ATTR); - StringRef Value = Attr.getValueAsString(); LLVM_DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F.getName() - << "' state: " << Value << "\n"); - if (Value == UNPREPARED_FOR_SPLIT) { - // Enqueue a second iteration of the CGSCC pipeline on this SCC. - UR.CWorklist.insert(&C); - F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); - continue; - } + << "' state: " + << F.getFnAttribute(CORO_PRESPLIT_ATTR).getValueAsString() + << "\n"); F.removeFnAttr(CORO_PRESPLIT_ATTR); SmallVector Clones; const coro::Shape Shape = splitCoroutine(F, Clones, ReuseFrameSlot); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); - if ((Shape.ABI == coro::ABI::Async || Shape.ABI == coro::ABI::Retcon || - Shape.ABI == coro::ABI::RetconOnce) && - !Shape.CoroSuspends.empty()) { - // Run the CGSCC pipeline on the newly split functions. - // All clones will be in the same RefSCC, so choose a random clone. - UR.RCWorklist.insert(CG.lookupRefSCC(CG.get(*Clones[0]))); + if (!Shape.CoroSuspends.empty()) { + // Run the CGSCC pipeline on the original and newly split functions. + UR.CWorklist.insert(&C); + for (Function *Clone : Clones) + UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone))); } } diff --git a/llvm/test/Transforms/Coroutines/ArgAddr.ll b/llvm/test/Transforms/Coroutines/ArgAddr.ll index 9f5afc9581126..d11f2fb762e9a 100644 --- a/llvm/test/Transforms/Coroutines/ArgAddr.ll +++ b/llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -1,6 +1,6 @@ ; Need to move users of allocas that were moved into the coroutine frame after ; coro.begin. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define nonnull i8* @f(i32 %n) "coroutine.presplit"="1" { ; CHECK-LABEL: @f( diff --git a/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll b/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll index 8fe570c1f5b71..b3a69c85d7eaa 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll @@ -1,6 +1,6 @@ ; Check that we can handle the case when both alloc function and ; the user body consume the same argument. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s ; using copy of this (as it would happen under -O0) define i8* @f_copy(i64 %this_arg) "coroutine.presplit"="1" { @@ -33,15 +33,17 @@ suspend: ; See that %this is spilled into the frame ; CHECK-LABEL: define i8* @f_copy(i64 %this_arg) +; CHECK: %this.addr = alloca i64, align 8 +; CHECK: store i64 %this_arg, i64* %this.addr, align 4 ; CHECK: %this.spill.addr = getelementptr inbounds %f_copy.Frame, %f_copy.Frame* %FramePtr, i32 0, i32 2 ; CHECK: store i64 %this_arg, i64* %this.spill.addr -; CHECK: ret i8* %hdl +; CHECK: ret i8* %hdl ; See that %this was loaded from the frame ; CHECK-LABEL: @f_copy.resume( ; CHECK: %this.reload = load i64, i64* %this.reload.addr ; CHECK: call void @print2(i64 %this.reload) -; CHECK: ret void +; CHECK: ret void declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll b/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll index 4082a8a6b3d81..7e85b9e07fc52 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloc-with-param-O2.ll @@ -1,6 +1,6 @@ ; Check that we can handle the case when both alloc function and ; the user body consume the same argument. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s ; using this directly (as it would happen under -O2) define i8* @f_direct(i64 %this) "coroutine.presplit"="1" { diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll index 1f7aeddc30691..4371bd3024283 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-01.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-01.ll @@ -1,6 +1,6 @@ ; Tests that CoroSplit can succesfully determine allocas should live on the frame ; if their aliases are used across suspension points through PHINode. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f(i1 %n) "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-02.ll b/llvm/test/Transforms/Coroutines/coro-alloca-02.ll index 067de387892a0..276daab648411 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-02.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-02.ll @@ -1,6 +1,6 @@ ; Tests that if an alloca is escaped through storing the address, ; the alloac will be put on the frame. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f() "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-03.ll b/llvm/test/Transforms/Coroutines/coro-alloca-03.ll index 419fe2fc30d84..e90464746bcd9 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-03.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-03.ll @@ -1,5 +1,5 @@ ; Tests that allocas escaped through function calls will live on the frame. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f() "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-04.ll b/llvm/test/Transforms/Coroutines/coro-alloca-04.ll index aef1c5d4688ac..7e3a852adbc01 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-04.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-04.ll @@ -1,6 +1,6 @@ ; Tests that CoroSplit can succesfully determine allocas should live on the frame ; if their aliases are used across suspension points through PHINode. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f(i1 %n) "coroutine.presplit"="1" { entry: @@ -45,7 +45,7 @@ suspend: ; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 ; CHECK-NEXT: %1 = bitcast i64* %0 to i8* ; CHECK-NEXT: %2 = bitcast i8* %1 to i32* -; CHECK-NEXT: %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK: %alias_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 ; CHECK-NEXT: store i32* %2, i32** %alias_phi.spill.addr declare i8* @llvm.coro.free(token, i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-05.ll b/llvm/test/Transforms/Coroutines/coro-alloca-05.ll index a8d6a579fd390..af4f73d1c243e 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-05.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-05.ll @@ -1,6 +1,6 @@ ; Tests that allocas after coro.begin are properly that do not need to ; live on the frame are properly moved to the .resume function. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f() "coroutine.presplit"="1" { entry: @@ -31,10 +31,10 @@ suspend: ; CHECK-NEXT: entry.resume: ; CHECK-NEXT: [[VFRAME:%.*]] = bitcast %f.Frame* [[FRAMEPTR:%.*]] to i8* ; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[X_VALUE:%.*]] = load i32, i32* [[X]], align 4 +; CHECK: [[X_VALUE:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: call void @print(i32 [[X_VALUE]]) -; CHECK-NEXT: call void @free(i8* [[VFRAME]]) -; CHECK-NEXT: ret void +; CHECK: call void @free(i8* [[VFRAME]]) +; CHECK: ret void declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-06.ll b/llvm/test/Transforms/Coroutines/coro-alloca-06.ll index 9251be631bc4b..9adc65416f727 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-06.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-06.ll @@ -1,6 +1,6 @@ ; Test that in some simple cases allocas will not live on the frame even ; though their pointers are stored. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s %handle = type { i8* } diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll index 78c8ba8eb139a..c02b6b13f1c39 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll @@ -1,6 +1,6 @@ ; Tests that CoroSplit can succesfully determine allocas should live on the frame ; if their aliases are used across suspension points through PHINode. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f(i1 %n) "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-08.ll b/llvm/test/Transforms/Coroutines/coro-alloca-08.ll index a2c2342961dc8..4baa867c8c449 100644 --- a/llvm/test/Transforms/Coroutines/coro-alloca-08.ll +++ b/llvm/test/Transforms/Coroutines/coro-alloca-08.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s %"struct.std::coroutine_handle" = type { i8* } %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll index 27d07e3ec7f7b..d8f75af2f29a9 100644 --- a/llvm/test/Transforms/Coroutines/coro-async.ll +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -120,7 +120,7 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* % ; CHECK-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) ; CHECK-SAME: !dbg ![[SP1:[0-9]+]] { -; CHECK: entry: +; CHECK: coro.return: ; CHECK: [[FRAMEPTR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 128 ; CHECK: [[ACTOR_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 152 ; CHECK: [[CAST1:%.*]] = bitcast i8* [[ACTOR_SPILL_ADDR]] to %async.actor** diff --git a/llvm/test/Transforms/Coroutines/coro-byval-param.ll b/llvm/test/Transforms/Coroutines/coro-byval-param.ll index 6c3c4582fc8be..d331f24e6ddab 100644 --- a/llvm/test/Transforms/Coroutines/coro-byval-param.ll +++ b/llvm/test/Transforms/Coroutines/coro-byval-param.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s %promise_type = type { i8 } %struct.A = type <{ i64, i64, i32, [4 x i8] }> diff --git a/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll b/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll index a1e183cc5a1a1..7805983920a9c 100644 --- a/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll +++ b/llvm/test/Transforms/Coroutines/coro-catchswitch-cleanuppad.ll @@ -1,6 +1,6 @@ ; Tests the PHI nodes in cleanuppads for catchswitch instructions are correctly ; split up. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s declare i32 @__CxxFrameHandler3(...) define i8* @f2(i1 %val) "coroutine.presplit"="1" personality i32 (...)* @__CxxFrameHandler3 { @@ -106,11 +106,10 @@ declare void @llvm.coro.destroy(i8*) declare token @llvm.coro.id(i32, i8*, i8*, i8*) declare i1 @llvm.coro.alloc(token) declare i8* @llvm.coro.begin(token, i8*) -declare i1 @llvm.coro.end(i8*, i1) +declare i1 @llvm.coro.end(i8*, i1) declare noalias i8* @malloc(i32) declare void @print(i32) declare void @free(i8*) declare i32 @f() - diff --git a/llvm/test/Transforms/Coroutines/coro-catchswitch.ll b/llvm/test/Transforms/Coroutines/coro-catchswitch.ll index ef4310b041673..ff591194a46aa 100644 --- a/llvm/test/Transforms/Coroutines/coro-catchswitch.ll +++ b/llvm/test/Transforms/Coroutines/coro-catchswitch.ll @@ -1,5 +1,5 @@ ; Verifies that we can insert the spill for a PHI preceding the catchswitch -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc" diff --git a/llvm/test/Transforms/Coroutines/coro-debug.ll b/llvm/test/Transforms/Coroutines/coro-debug.ll index 89c4c96455b17..9ae279ca62030 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug.ll @@ -1,5 +1,5 @@ ; Tests that debug information is sane after coro-split -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s source_filename = "simple-repro.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll b/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll index b35c8167033de..fdb1289df3a43 100644 --- a/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll +++ b/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-00.ll @@ -1,5 +1,5 @@ ; Check that we can handle edge splits leading into a landingpad -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll b/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll index 0ddb27a3e9340..0e71236b3331b 100644 --- a/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll +++ b/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-01.ll @@ -1,5 +1,5 @@ ; Check that we can handle edge splits leading into a landingpad -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll b/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll index 54dd60c06b2bb..ec13b06969d4d 100644 --- a/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll +++ b/llvm/test/Transforms/Coroutines/coro-eh-aware-edge-split-02.ll @@ -1,5 +1,5 @@ ; Check that we can handle edge splits leading into a landingpad -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/Coroutines/coro-frame-arrayalloca.ll b/llvm/test/Transforms/Coroutines/coro-frame-arrayalloca.ll index 1d496d6a69486..d57c4142df7a4 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame-arrayalloca.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-arrayalloca.ll @@ -1,5 +1,5 @@ ; Check that we can handle spills of array allocas -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s declare void @consume.double.ptr(double*) declare void @consume.i32.ptr(i32*) diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll index 54131a6a34c67..9670be6680afb 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-00.ll @@ -1,5 +1,5 @@ ; Check that we can handle spills of array allocas -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s %struct.big_structure = type { [500 x i8] } declare void @consume(%struct.big_structure*) diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll index 39757a760b999..d745d90977b71 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-01.ll @@ -1,6 +1,6 @@ ; Tests that variables in a Corotuine whose lifetime range is not overlapping each other ; re-use the same slot in Coroutine frame. -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s %"struct.task::promise_type" = type { i8 } %struct.awaitable = type { i8 } %struct.big_structure = type { [500 x i8] } diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll index 15a6591268472..c251509191a9d 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-02.ll @@ -1,6 +1,6 @@ ; Tests that variables of different type in a Corotuine whose lifetime range is not overlapping each other ; re-use the same slot in Coroutine frame. -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s %"struct.task::promise_type" = type { i8 } %struct.awaitable = type { i8 } %struct.big_structure = type { [500 x i8] } diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll index e1e6001d4e797..18a0098038722 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-03.ll @@ -1,5 +1,5 @@ ; Check that we should not reuse alloca sotrage in O0. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s %struct.big_structure = type { [500 x i8] } declare void @consume(%struct.big_structure*) diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll index a11863fe418ea..bfc5f4678658a 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-04.ll @@ -1,6 +1,6 @@ ; Tests that variables of different type with incompatible alignment in a Corotuine whose lifetime ; range is not overlapping each other should not re-use the same slot in Coroutine frame. -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s %"struct.task::promise_type" = type { i8 } %struct.awaitable = type { i8 } %struct.big_structure = type { [500 x i8] } diff --git a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll index 66985408367f2..f336097a0045e 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-reuse-alloca-05.ll @@ -1,6 +1,6 @@ ; Tests that variables of different type with incompatible alignment in a Corotuine whose ; lifetime range is not overlapping each other re-use the same slot in CorotuineFrame. -; RUN: opt < %s -passes=coro-split -reuse-storage-in-coroutine-frame -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -reuse-storage-in-coroutine-frame -S | FileCheck %s %"struct.task::promise_type" = type { i8 } %struct.awaitable = type { i8 } %struct.big_structure = type { [500 x i8] } diff --git a/llvm/test/Transforms/Coroutines/coro-frame-unreachable.ll b/llvm/test/Transforms/Coroutines/coro-frame-unreachable.ll index 12ab7ddf02779..a3ef133e8e11f 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame-unreachable.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame-unreachable.ll @@ -1,5 +1,5 @@ ; Check that coro-split doesn't choke on intrinsics in unreachable blocks -; RUN: opt < %s -passes=coro-split -S +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S define i8* @f(i1 %arg) "coroutine.presplit"="1" personality i32 0 { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-frame.ll b/llvm/test/Transforms/Coroutines/coro-frame.ll index 68de2658419aa..86cf40c8019be 100644 --- a/llvm/test/Transforms/Coroutines/coro-frame.ll +++ b/llvm/test/Transforms/Coroutines/coro-frame.ll @@ -1,5 +1,5 @@ ; Check that we can handle spills of the result of the invoke instruction -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f(i64 %this) "coroutine.presplit"="1" personality i32 0 { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-materialize.ll b/llvm/test/Transforms/Coroutines/coro-materialize.ll index 2d7e187409e2b..fcc1f89655879 100644 --- a/llvm/test/Transforms/Coroutines/coro-materialize.ll +++ b/llvm/test/Transforms/Coroutines/coro-materialize.ll @@ -1,5 +1,5 @@ ; Verifies that we materialize instruction across suspend points -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f(i32 %n) "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-padding.ll b/llvm/test/Transforms/Coroutines/coro-padding.ll index 16e6d7e51d996..e8801c8e8e1ca 100644 --- a/llvm/test/Transforms/Coroutines/coro-padding.ll +++ b/llvm/test/Transforms/Coroutines/coro-padding.ll @@ -1,6 +1,6 @@ ; Check that we will insert the correct padding if natural alignment of the ; spilled data does not match the alignment specified in alloca instruction. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s %PackedStruct = type <{ i64 }> diff --git a/llvm/test/Transforms/Coroutines/coro-param-copy.ll b/llvm/test/Transforms/Coroutines/coro-param-copy.ll index 4db2ae66d538e..b5a4e7059b387 100644 --- a/llvm/test/Transforms/Coroutines/coro-param-copy.ll +++ b/llvm/test/Transforms/Coroutines/coro-param-copy.ll @@ -1,6 +1,6 @@ ; Check that we create copy the data from the alloca into the coroutine ; frame slot if it was written to. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f() "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll index 6f82c7773f2d4..bf2862fcac2bd 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll @@ -6,11 +6,11 @@ target datalayout = "p:64:64:64" declare {i8*, i8*, i32} @prototype_f(i8*, i1) define {i8*, i8*, i32} @f(i8* %buffer, i32 %n) { ; CHECK-LABEL: @f( -; CHECK-NEXT: entry: +; CHECK-NEXT: coro.return: ; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[BUFFER:%.*]], i64 8 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[N_VAL_SPILL_ADDR]] to i32* ; CHECK-NEXT: store i32 [[N:%.*]], i32* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @allocate(i32 [[N]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @allocate(i32 [[N]]) #[[ATTR0:[0-9]+]] ; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = bitcast i8* [[BUFFER]] to i8** ; CHECK-NEXT: store i8* [[TMP1]], i8** [[DOTSPILL_ADDR]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8*, i8*, i32 } { i8* bitcast ({ i8*, i8*, i32 } (i8*, i1)* @f.resume.0 to i8*), i8* undef, i32 undef }, i8* [[TMP1]], 1 @@ -44,12 +44,12 @@ cleanup: declare {i8*, i32} @prototype_g(i8*, i1) define {i8*, i32} @g(i8* %buffer, i32 %n) { ; CHECK-LABEL: @g( -; CHECK-NEXT: entry: +; CHECK-NEXT: coro.return: ; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* ; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[TMP0]], align 8 -; CHECK-NEXT: call void @use(i8* nonnull [[TMP1]]) +; CHECK-NEXT: tail call void @use(i8* nonnull [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @g.resume.0 to i8*), i32 undef }, i32 [[N]], 1 ; CHECK-NEXT: ret { i8*, i32 } [[TMP2]] ; @@ -81,9 +81,9 @@ cleanup: declare {i8*, i32} @prototype_h(i8*, i1) define {i8*, i32} @h(i8* %buffer, i32 %n) { ; CHECK-LABEL: @h( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* -; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 +; CHECK-NEXT: coro.return: +; CHECK-NEXT: [[N_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* +; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i1)* @h.resume.0 to i8*), i32 undef }, i32 [[N]], 1 ; CHECK-NEXT: ret { i8*, i32 } [[TMP0]] ; @@ -115,7 +115,7 @@ cleanup: declare {i8*, i32} @prototype_i(i8*) define {i8*, i32} @i(i8* %buffer, i32 %n) { ; CHECK-LABEL: @i( -; CHECK-NEXT: entry: +; CHECK-NEXT: coro.return: ; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* ; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @i.resume.0 to i8*), i32 undef }, i32 [[N]], 1 @@ -148,7 +148,7 @@ loop2: declare {i8*, i32} @prototype_j(i8*) define {i8*, i32} @j(i8* %buffer, i32 %n) { ; CHECK-LABEL: @j( -; CHECK-NEXT: entry: +; CHECK-NEXT: coro.return: ; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* ; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*)* @j.resume.0 to i8*), i32 undef }, i32 [[N]], 1 @@ -183,7 +183,7 @@ end: declare i32 @getSize() define {i8*, i32} @k(i8* %buffer, i32 %n, i1 %cond) { ; CHECK-LABEL: @k( -; CHECK-NEXT: entry: +; CHECK-NEXT: PostSpill: ; CHECK-NEXT: [[SIZE:%.*]] = tail call i32 @getSize() ; CHECK-NEXT: br i1 [[COND:%.*]], label [[ALLOCA_BLOCK:%.*]], label [[CORO_RETURN:%.*]] ; CHECK: coro.return: @@ -192,7 +192,7 @@ define {i8*, i32} @k(i8* %buffer, i32 %n, i1 %cond) { ; CHECK: alloca_block: ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 -; CHECK-NEXT: call void @use(i8* nonnull [[TMP2]]) +; CHECK-NEXT: tail call void @use(i8* nonnull [[TMP2]]) ; CHECK-NEXT: br label [[CORO_RETURN]] ; entry: diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-frame.ll b/llvm/test/Transforms/Coroutines/coro-retcon-frame.ll index 30a6a08224c95..3380f4ace769b 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-frame.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-frame.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s target datalayout = "p:64:64:64" diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll index e897be14f2a11..9e25c5682feee 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll @@ -6,16 +6,16 @@ target triple = "x86_64-apple-macosx10.12.0" define {i8*, i32} @f(i8* %buffer, i32* %array) { ; CHECK-LABEL: @f( -; CHECK-NEXT: entry: +; CHECK-NEXT: PostSpill: ; CHECK-NEXT: [[ARRAY_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32** ; CHECK-NEXT: store i32* [[ARRAY:%.*]], i32** [[ARRAY_SPILL_ADDR]], align 8 ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[ARRAY]], align 4 ; CHECK-NEXT: [[LOAD_POS:%.*]] = icmp sgt i32 [[LOAD]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[LOAD_POS]], i32 [[LOAD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[LOAD_POS]], i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i8* bitcast (void (i8*, i1)* @f.resume.1 to i8*) -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8*, i32 } undef, i8* [[TMP1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i8*, i32 } [[TMP2]], i32 [[TMP0]], 1 -; CHECK-NEXT: ret { i8*, i32 } [[TMP3]] +; CHECK-NEXT: [[SPEC_SELECT4:%.*]] = select i1 [[LOAD_POS]], i32 [[LOAD]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[LOAD_POS]], i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i8* bitcast (void (i8*, i1)* @f.resume.1 to i8*) +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i8*, i32 } undef, i8* [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8*, i32 } [[TMP1]], i32 [[SPEC_SELECT4]], 1 +; CHECK-NEXT: ret { i8*, i32 } [[TMP2]] ; entry: %id = call token @llvm.coro.id.retcon.once(i32 8, i32 8, i8* %buffer, i8* bitcast (void (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) @@ -56,8 +56,8 @@ define void @test(i32* %array) { ; CHECK-NEXT: store i32* [[ARRAY:%.*]], i32** [[TMP0]], align 8 ; CHECK-NEXT: [[LOAD_I:%.*]] = load i32, i32* [[ARRAY]], align 4 ; CHECK-NEXT: [[LOAD_POS_I:%.*]] = icmp sgt i32 [[LOAD_I]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[LOAD_POS_I]], i32 [[LOAD_I]], i32 0 -; CHECK-NEXT: call void @print(i32 [[TMP1]]) +; CHECK-NEXT: [[SPEC_SELECT4_I:%.*]] = select i1 [[LOAD_POS_I]], i32 [[LOAD_I]], i32 0 +; CHECK-NEXT: call void @print(i32 [[SPEC_SELECT4_I]]) ; CHECK-NEXT: [[CONT_CAST:%.*]] = select i1 [[LOAD_POS_I]], void (i8*, i1)* @f.resume.0, void (i8*, i1)* @f.resume.1 ; CHECK-NEXT: call void [[CONT_CAST]](i8* nonnull [[DOTSUB]], i1 zeroext false) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll index f2c0714b0ec3d..a3637af82b174 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-value2.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -passes='cgscc(coro-split),function(coro-cleanup)' -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,coro-cleanup' -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.12.0" @@ -22,39 +23,7 @@ cleanup: unreachable } -; CHECK-LABEL: define { i8*, i32* } @f(i8* %buffer, i32* %ptr) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ALLOC:%.*]] = call i8* @allocate(i32 16) -; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i8** -; CHECK-NEXT: store i8* [[ALLOC]], i8** [[T0]] -; CHECK-NEXT: [[FRAME:%.*]] = bitcast i8* [[ALLOC]] to [[FRAME_T:%.*]]* -; CHECK-NEXT: %temp = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1 -; CHECK-NEXT: [[SPILL:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 0 -; CHECK-NEXT: store i32* %ptr, i32** [[SPILL]] -; CHECK-NEXT: %oldvalue = load i32, i32* %ptr -; CHECK-NEXT: store i32 %oldvalue, i32* %temp -; CHECK-NEXT: [[T0:%.*]] = insertvalue { i8*, i32* } { i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i32* undef }, i32* %temp, 1 -; CHECK-NEXT: ret { i8*, i32* } [[T0]] -; CHECK-NEXT: } -; CHECK-LABEL: define internal void @f.resume.0(i8* noalias nonnull align 8 dereferenceable(8) %0, i1 zeroext %1) -; CHECK-NEXT: : -; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]** -; CHECK-NEXT: [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]] -; CHECK-NEXT: bitcast [[FRAME_T]]* [[FRAME]] to i8* -; CHECK-NEXT: [[TEMP_SLOT:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1 -; CHECK-NEXT: br i1 %1, -; CHECK: : -; CHECK-NEXT: [[PTR_SLOT:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 0 -; CHECK-NEXT: [[PTR_RELOAD:%.*]] = load i32*, i32** [[PTR_SLOT]] -; CHECK-NEXT: %newvalue = load i32, i32* [[TEMP_SLOT]] -; CHECK-NEXT: store i32 %newvalue, i32* [[PTR_RELOAD]] -; CHECK-NEXT: br label -; CHECK: : -; CHECK-NEXT: [[T0:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8* -; CHECK-NEXT: call fastcc void @deallocate(i8* [[T0]]) -; CHECK-NEXT: ret void -; CHECK-NEXT: } declare token @llvm.coro.id.retcon.once(i32, i32, i8*, i8*, i8*, i8*) declare i8* @llvm.coro.begin(token, i8*) @@ -67,4 +36,35 @@ declare noalias i8* @allocate(i32 %size) declare fastcc void @deallocate(i8* %ptr) declare void @print(i32) - +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @allocate(i32 16) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8** +; CHECK-NEXT: store i8* [[TMP0]], i8** [[TMP1]], align 8 +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast i8* [[TMP0]] to %f.Frame* +; CHECK-NEXT: [[TEMP:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: [[PTR_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store i32* [[PTR:%.*]], i32** [[PTR_SPILL_ADDR]], align 8 +; CHECK-NEXT: [[OLDVALUE:%.*]] = load i32, i32* [[PTR]], align 4 +; CHECK-NEXT: store i32 [[OLDVALUE]], i32* [[TEMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8*, i32* } { i8* bitcast (void (i8*, i1)* @f.resume.0 to i8*), i32* undef }, i32* [[TEMP]], 1 +; CHECK-NEXT: ret { i8*, i32* } [[TMP2]] +; +; +; CHECK-LABEL: @f.resume.0( +; CHECK-NEXT: entryresume.0: +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP0:%.*]] to %f.Frame** +; CHECK-NEXT: [[FRAMEPTR:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8 +; CHECK-NEXT: [[TEMP:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: br i1 [[TMP1:%.*]], label [[COROEND:%.*]], label [[CONT:%.*]] +; CHECK: cont: +; CHECK-NEXT: [[PTR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: [[PTR_RELOAD:%.*]] = load i32*, i32** [[PTR_RELOAD_ADDR]], align 8 +; CHECK-NEXT: [[NEWVALUE:%.*]] = load i32, i32* [[TEMP]], align 4 +; CHECK-NEXT: store i32 [[NEWVALUE]], i32* [[PTR_RELOAD]], align 4 +; CHECK-NEXT: br label [[COROEND]] +; CHECK: CoroEnd: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %f.Frame* [[FRAMEPTR]] to i8* +; CHECK-NEXT: call fastcc void @deallocate(i8* [[TMP3]]) +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll index a6e7eae252319..1981f75ed5460 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll @@ -3,9 +3,12 @@ define i8* @f(i8* %buffer, i32 %n) { ; CHECK-LABEL: @f( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* -; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 +; CHECK-NEXT: coro.return: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8* @allocate(i32 12) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8** +; CHECK-NEXT: store i8* [[TMP0]], i8** [[TMP1]], align 8 +; CHECK-NEXT: [[N_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i32* +; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_SPILL_ADDR]], align 4 ; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i32, i1)* @f.resume.0 to i8*) ; entry: @@ -35,11 +38,40 @@ cleanup: define i32 @main() { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 4 -; CHECK-NEXT: [[DOTSUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[TMP0]], i64 0, i64 0 -; CHECK-NEXT: [[N_VAL_RELOAD_ADDR_I1:%.*]] = bitcast [8 x i8]* [[TMP0]] to i32* -; CHECK-NEXT: store i32 7, i32* [[N_VAL_RELOAD_ADDR_I1]], align 4, !alias.scope !0 -; CHECK-NEXT: call void @print(i32 7), !noalias !3 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i8** [[TMP0]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @allocate(i32 12) +; CHECK-NEXT: store i8* [[TMP1]], i8** [[TMP0]], align 8 +; CHECK-NEXT: [[N_SPILL_ADDR_I:%.*]] = bitcast i8* [[TMP1]] to i32* +; CHECK-NEXT: store i32 1, i32* [[N_SPILL_ADDR_I]], align 4 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[TMP0]] to %f.Frame** +; CHECK-NEXT: [[FRAMEPTR_I:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !0 +; CHECK-NEXT: [[N_RELOAD_ADDR9_I:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 0 +; CHECK-NEXT: [[N_RELOAD10_I:%.*]] = load i32, i32* [[N_RELOAD_ADDR9_I]], align 4, !noalias !0 +; CHECK-NEXT: [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 1 +; CHECK-NEXT: store i32 [[N_RELOAD10_I]], i32* [[N_VAL3_SPILL_ADDR_I]], align 4, !noalias !0 +; CHECK-NEXT: [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I]], i64 0, i32 2 +; CHECK-NEXT: store i32 2, i32* [[INPUT_SPILL_ADDR_I]], align 4, !noalias !0 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +; CHECK-NEXT: [[FRAMEPTR_I1:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !3 +; CHECK-NEXT: [[INPUT_RELOAD_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 2 +; CHECK-NEXT: [[INPUT_RELOAD_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR_I]], align 4, !noalias !3 +; CHECK-NEXT: [[N_VAL3_RELOAD_ADDR_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 1 +; CHECK-NEXT: [[N_VAL3_RELOAD_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR_I]], align 4, !noalias !3 +; CHECK-NEXT: [[SUM8_I:%.*]] = add i32 [[N_VAL3_RELOAD_I]], [[INPUT_RELOAD_I]] +; CHECK-NEXT: store i32 [[SUM8_I]], i32* [[N_VAL3_RELOAD_ADDR_I]], align 4, !noalias !3 +; CHECK-NEXT: store i32 4, i32* [[INPUT_RELOAD_ADDR_I]], align 4, !noalias !3 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +; CHECK-NEXT: [[FRAMEPTR_I4:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !6 +; CHECK-NEXT: [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I4]], i64 0, i32 2 +; CHECK-NEXT: [[INPUT_RELOAD14_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias !6 +; CHECK-NEXT: [[N_VAL3_RELOAD_ADDR11_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I4]], i64 0, i32 1 +; CHECK-NEXT: [[N_VAL3_RELOAD12_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias !6 +; CHECK-NEXT: [[SUM7_I:%.*]] = add i32 [[N_VAL3_RELOAD12_I]], [[INPUT_RELOAD14_I]] +; CHECK-NEXT: call void @print(i32 [[SUM7_I]]), !noalias !6 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %f.Frame* [[FRAMEPTR_I4]] to i8* +; CHECK-NEXT: call void @deallocate(i8* [[TMP3]]), !noalias !6 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll index 6e36b632fad2f..5d0ed292d8fcd 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values2.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -passes='coro-split,coro-cleanup' -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg,coro-cleanup' -S | FileCheck %s define i8* @f(i8* %buffer, i32 %n) "coroutine.presplit"="1" { entry: @@ -18,72 +19,6 @@ entry: unreachable } -; CHECK-LABEL: define i8* @f(i8* %buffer, i32 %n) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ALLOC:%.*]] = call i8* @allocate(i32 20) -; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %buffer to i8** -; CHECK-NEXT: store i8* [[ALLOC]], i8** [[T0]] -; CHECK-NEXT: [[FRAME:%.*]] = bitcast i8* [[ALLOC]] to [[FRAME_T:%.*]]* -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 0 -; CHECK-NEXT: store i32 %n, i32* [[T0]] -; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i32)* @f.resume.0 to i8*) -; CHECK-NEXT: } - -; CHECK-LABEL: define internal i8* @f.resume.0(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1) -; CHECK-NEXT: : -; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]** -; CHECK-NEXT: [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]] -; CHECK-NEXT: [[VFRAME:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8* -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1 -; CHECK-NEXT: store i32 %1, i32* [[T0]] -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 0 -; CHECK-NEXT: [[N:%.*]] = load i32, i32* [[T0]] -; CHECK-NEXT: %sum0 = call i32 @add(i32 [[N]], i32 %1) -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 2 -; CHECK-NEXT: store i32 %sum0, i32* [[T0]] -; CHECK-NEXT: [[CONT:%.*]] = bitcast i8* (i8*, i32)* @f.resume.1 to i8* -; CHECK-NEXT: ret i8* [[CONT]] -; CHECK-NEXT: } - -; CHECK-LABEL: define internal i8* @f.resume.1(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1) -; CHECK-NEXT: : -; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]** -; CHECK-NEXT: [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]] -; CHECK-NEXT: [[VFRAME:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8* -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 3 -; CHECK-NEXT: store i32 %1, i32* [[T0]] -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 2 -; CHECK-NEXT: [[SUM0:%.*]] = load i32, i32* [[T0]] -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1 -; CHECK-NEXT: [[VALUE0:%.*]] = load i32, i32* [[T0]] -; CHECK-NEXT: %sum1 = call i32 @add(i32 [[SUM0]], i32 [[VALUE0]]) -; CHECK-NEXT: %sum2 = call i32 @add(i32 %sum1, i32 %1) -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 4 -; CHECK-NEXT: store i32 %sum2, i32* [[T0]] -; CHECK-NEXT: [[CONT:%.*]] = bitcast i8* (i8*, i32)* @f.resume.2 to i8* -; CHECK-NEXT: ret i8* [[CONT]] -; CHECK-NEXT: } - -; CHECK-LABEL: define internal i8* @f.resume.2(i8* noalias nonnull align 4 dereferenceable(8) %0, i32 %1) -; CHECK-NEXT: : -; CHECK-NEXT: [[T0:%.*]] = bitcast i8* %0 to [[FRAME_T:%.*]]** -; CHECK-NEXT: [[FRAME:%.*]] = load [[FRAME_T]]*, [[FRAME_T]]** [[T0]] -; CHECK-NEXT: [[VFRAME:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8* -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 4 -; CHECK-NEXT: [[SUM2:%.*]] = load i32, i32* [[T0]] -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 3 -; CHECK-NEXT: [[VALUE1:%.*]] = load i32, i32* [[T0]] -; CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[FRAME_T]], [[FRAME_T]]* [[FRAME]], i32 0, i32 1 -; CHECK-NEXT: [[VALUE0:%.*]] = load i32, i32* [[T0]] -; CHECK-NEXT: %sum3 = call i32 @add(i32 [[SUM2]], i32 [[VALUE0]]) -; CHECK-NEXT: %sum4 = call i32 @add(i32 %sum3, i32 [[VALUE1]]) -; CHECK-NEXT: %sum5 = call i32 @add(i32 %sum4, i32 %1) -; CHECK-NEXT: call void @print(i32 %sum5) -; CHECK-NEXT: [[CONT:%.*]] = bitcast [[FRAME_T]]* [[FRAME]] to i8* -; CHECK-NEXT: call void @deallocate(i8* [[CONT]]) -; CHECK-NEXT: ret i8* null -; CHECK-NEXT: } - declare token @llvm.coro.id.retcon(i32, i32, i8*, i8*, i8*, i8*) declare i8* @llvm.coro.begin(token, i8*) declare i32 @llvm.coro.suspend.retcon.i32(...) @@ -98,3 +33,63 @@ declare void @deallocate(i8* %ptr) declare i32 @add(i32, i32) declare void @print(i32) +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @allocate(i32 20) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8** +; CHECK-NEXT: store i8* [[TMP0]], i8** [[TMP1]], align 8 +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast i8* [[TMP0]] to %f.Frame* +; CHECK-NEXT: [[N_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_SPILL_ADDR]], align 4 +; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i32)* @f.resume.0 to i8*) +; +; +; CHECK-LABEL: @f.resume.0( +; CHECK-NEXT: entryresume.0: +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP0:%.*]] to %f.Frame** +; CHECK-NEXT: [[FRAMEPTR:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8 +; CHECK-NEXT: [[VALUE0_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[VALUE0_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[N_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: [[N_RELOAD:%.*]] = load i32, i32* [[N_RELOAD_ADDR]], align 4 +; CHECK-NEXT: [[SUM0:%.*]] = call i32 @add(i32 [[N_RELOAD]], i32 [[TMP1]]) +; CHECK-NEXT: [[SUM0_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2 +; CHECK-NEXT: store i32 [[SUM0]], i32* [[SUM0_SPILL_ADDR]], align 4 +; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i32)* @f.resume.1 to i8*) +; +; +; CHECK-LABEL: @f.resume.1( +; CHECK-NEXT: entryresume.1: +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP0:%.*]] to %f.Frame** +; CHECK-NEXT: [[FRAMEPTR:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8 +; CHECK-NEXT: [[VALUE1_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3 +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[VALUE1_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[SUM0_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2 +; CHECK-NEXT: [[SUM0_RELOAD:%.*]] = load i32, i32* [[SUM0_RELOAD_ADDR]], align 4 +; CHECK-NEXT: [[VALUE0_RELOAD_ADDR5:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: [[VALUE0_RELOAD6:%.*]] = load i32, i32* [[VALUE0_RELOAD_ADDR5]], align 4 +; CHECK-NEXT: [[SUM1:%.*]] = call i32 @add(i32 [[SUM0_RELOAD]], i32 [[VALUE0_RELOAD6]]) +; CHECK-NEXT: [[SUM2:%.*]] = call i32 @add(i32 [[SUM1]], i32 [[TMP1]]) +; CHECK-NEXT: [[SUM2_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4 +; CHECK-NEXT: store i32 [[SUM2]], i32* [[SUM2_SPILL_ADDR]], align 4 +; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i32)* @f.resume.2 to i8*) +; +; +; CHECK-LABEL: @f.resume.2( +; CHECK-NEXT: entryresume.2: +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP0:%.*]] to %f.Frame** +; CHECK-NEXT: [[FRAMEPTR:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8 +; CHECK-NEXT: [[SUM2_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4 +; CHECK-NEXT: [[SUM2_RELOAD:%.*]] = load i32, i32* [[SUM2_RELOAD_ADDR]], align 4 +; CHECK-NEXT: [[VALUE1_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3 +; CHECK-NEXT: [[VALUE1_RELOAD:%.*]] = load i32, i32* [[VALUE1_RELOAD_ADDR]], align 4 +; CHECK-NEXT: [[VALUE0_RELOAD_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: [[VALUE0_RELOAD:%.*]] = load i32, i32* [[VALUE0_RELOAD_ADDR]], align 4 +; CHECK-NEXT: [[SUM3:%.*]] = call i32 @add(i32 [[SUM2_RELOAD]], i32 [[VALUE0_RELOAD]]) +; CHECK-NEXT: [[SUM4:%.*]] = call i32 @add(i32 [[SUM3]], i32 [[VALUE1_RELOAD]]) +; CHECK-NEXT: [[SUM5:%.*]] = call i32 @add(i32 [[SUM4]], i32 [[TMP1:%.*]]) +; CHECK-NEXT: call void @print(i32 [[SUM5]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %f.Frame* [[FRAMEPTR]] to i8* +; CHECK-NEXT: call void @deallocate(i8* [[TMP3]]) +; CHECK-NEXT: ret i8* null +; diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-unreachable.ll b/llvm/test/Transforms/Coroutines/coro-retcon-unreachable.ll index 38f22dd2d3e2a..17795f068ad3a 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-unreachable.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-unreachable.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -passes='function(coro-early),cgscc(coro-split)' -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='function(coro-early),cgscc(coro-split),function(simplify-cfg,early-cse)' -S | FileCheck %s target datalayout = "E-p:64:64" %swift.type = type { i64 } @@ -7,6 +8,13 @@ target datalayout = "E-p:64:64" %TSi = type <{ i64 }> define hidden swiftcc { i8*, %swift.opaque* } @no_suspends(i8* %buffer, i64 %arg) #1 { +; CHECK-LABEL: @no_suspends( +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id.retcon.once(i32 32, i32 8, i8* [[BUFFER:%.*]], i8* bitcast (void (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i64)* @malloc to i8*), i8* bitcast (void (i8*)* @free to i8*)) +; CHECK-NEXT: call void @print(i64 [[ARG:%.*]]) +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: unreachable +; %id = call token @llvm.coro.id.retcon.once(i32 32, i32 8, i8* %buffer, i8* bitcast (void (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i64)* @malloc to i8*), i8* bitcast (void (i8*)* @free to i8*)) %begin = call i8* @llvm.coro.begin(token %id, i8* null) call void @print(i64 %arg) @@ -18,11 +26,6 @@ bb1: call i1 @llvm.coro.end(i8* %begin, i1 false) unreachable } -; CHECK-LABEL: define hidden swiftcc { i8*, %swift.opaque* } @no_suspends( -; CHECK: call token @llvm.coro.id.retcon.once -; CHECK-NEXT: call void @print(i64 %arg) -; CHECK-NEXT: call void @llvm.trap() -; CHECK-NEXT: unreachable declare swiftcc void @prototype(i8* noalias dereferenceable(32), i1) declare void @print(i64) diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-value.ll b/llvm/test/Transforms/Coroutines/coro-retcon-value.ll index 7e9282ef1b360..36cc659ff44da 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-value.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-value.ll @@ -4,7 +4,7 @@ define {i8*, i32} @f(i8* %buffer, i32 %n) { ; CHECK-LABEL: @f( -; CHECK-NEXT: entry: +; CHECK-NEXT: coro.return: ; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* ; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { i8*, i32 } { i8* bitcast ({ i8*, i32 } (i8*, i8)* @f.resume.0 to i8*), i32 undef }, i32 [[N]], 1 @@ -44,10 +44,10 @@ define i32 @main() { ; CHECK-NEXT: [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1 ; CHECK-NEXT: store i32 [[INC_I]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0 ; CHECK-NEXT: call void @print(i32 [[INC_I]]) -; CHECK-NEXT: [[N_VAL_RELOAD_I1:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3 -; CHECK-NEXT: [[INC_I2:%.*]] = add i32 [[N_VAL_RELOAD_I1]], 1 -; CHECK-NEXT: store i32 [[INC_I2]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3 -; CHECK-NEXT: call void @print(i32 [[INC_I2]]) +; CHECK-NEXT: [[N_VAL_RELOAD_I3:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3 +; CHECK-NEXT: [[INC_I4:%.*]] = add i32 [[N_VAL_RELOAD_I3]], 1 +; CHECK-NEXT: store i32 [[INC_I4]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3 +; CHECK-NEXT: call void @print(i32 [[INC_I4]]) ; CHECK-NEXT: ret i32 0 ; entry: @@ -89,4 +89,3 @@ declare noalias i8* @allocate(i32 %size) declare void @deallocate(i8* %ptr) declare void @print(i32) - diff --git a/llvm/test/Transforms/Coroutines/coro-retcon.ll b/llvm/test/Transforms/Coroutines/coro-retcon.ll index c5c97deb135f3..5c9e33897139b 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; First example from Doc/Coroutines.rst (two block loop) converted to retcon -; RUN: opt < %s -enable-coroutines -passes='default' -S | FileCheck --check-prefixes=ALL,NEWPM %s +; RUN: opt < %s -enable-coroutines -passes='default' -S | FileCheck %s define i8* @f(i8* %buffer, i32 %n) { -; ALL-LABEL: @f( -; ALL-NEXT: entry: -; ALL-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* -; ALL-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 -; ALL-NEXT: tail call void @print(i32 [[N]]) -; ALL-NEXT: ret i8* bitcast (i8* (i8*, i1)* @f.resume.0 to i8*) +; CHECK-LABEL: @f( +; CHECK-NEXT: coro.return: +; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* +; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 +; CHECK-NEXT: tail call void @print(i32 [[N]]) +; CHECK-NEXT: ret i8* bitcast (i8* (i8*, i1)* @f.resume.0 to i8*) ; entry: %id = call token @llvm.coro.id.retcon(i32 8, i32 4, i8* %buffer, i8* bitcast (i8* (i8*, i1)* @prototype to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) @@ -33,23 +33,23 @@ cleanup: define i32 @main() { -; ALL-LABEL: @main( -; ALL-NEXT: entry: -; ALL-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 4 -; ALL-NEXT: [[DOTSUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[TMP0]], i64 0, i64 0 -; ALL-NEXT: [[N_VAL_SPILL_ADDR_I:%.*]] = bitcast [8 x i8]* [[TMP0]] to i32* -; ALL-NEXT: store i32 4, i32* [[N_VAL_SPILL_ADDR_I]], align 4 -; ALL-NEXT: call void @print(i32 4) -; ALL-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) -; ALL-NEXT: [[N_VAL_RELOAD_I:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0 -; ALL-NEXT: [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1 -; ALL-NEXT: store i32 [[INC_I]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0 -; ALL-NEXT: call void @print(i32 [[INC_I]]), !noalias !0 -; ALL-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) -; ALL-NEXT: [[N_VAL_RELOAD_I1:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3 -; ALL-NEXT: [[INC_I2:%.*]] = add i32 [[N_VAL_RELOAD_I1]], 1 -; ALL-NEXT: call void @print(i32 [[INC_I2]]), !noalias !3 -; ALL-NEXT: ret i32 0 +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 4 +; CHECK-NEXT: [[DOTSUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[TMP0]], i64 0, i64 0 +; CHECK-NEXT: [[N_VAL_SPILL_ADDR_I:%.*]] = bitcast [8 x i8]* [[TMP0]] to i32* +; CHECK-NEXT: store i32 4, i32* [[N_VAL_SPILL_ADDR_I]], align 4 +; CHECK-NEXT: call void @print(i32 4) +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) +; CHECK-NEXT: [[N_VAL_RELOAD_I:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0 +; CHECK-NEXT: [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1 +; CHECK-NEXT: store i32 [[INC_I]], i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !0 +; CHECK-NEXT: call void @print(i32 [[INC_I]]), !noalias !0 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +; CHECK-NEXT: [[N_VAL_RELOAD_I3:%.*]] = load i32, i32* [[N_VAL_SPILL_ADDR_I]], align 4, !alias.scope !3 +; CHECK-NEXT: [[INC_I4:%.*]] = add i32 [[N_VAL_RELOAD_I3]], 1 +; CHECK-NEXT: call void @print(i32 [[INC_I4]]), !noalias !3 +; CHECK-NEXT: ret i32 0 ; entry: %0 = alloca [8 x i8], align 4 @@ -70,27 +70,16 @@ entry: ; to some sort of phase-ordering thing. define hidden { i8*, i8* } @g(i8* %buffer, i16* %ptr) { -; OLDPM-LABEL: @g( -; OLDPM-NEXT: entry: -; OLDPM-NEXT: [[TMP0:%.*]] = tail call i8* @allocate(i32 8) #[[ATTR0:[0-9]+]] -; OLDPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8** -; OLDPM-NEXT: store i8* [[TMP0]], i8** [[TMP1]], align 8 -; OLDPM-NEXT: [[PTR_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i16** -; OLDPM-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_SPILL_ADDR]], align 8 -; OLDPM-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR]] to i8* -; OLDPM-NEXT: [[TMP3:%.*]] = insertvalue { i8*, i8* } { i8* bitcast ({ i8*, i8* } (i8*, i1)* @g.resume.0 to i8*), i8* undef }, i8* [[TMP2]], 1 -; OLDPM-NEXT: ret { i8*, i8* } [[TMP3]] -; -; NEWPM-LABEL: @g( -; NEWPM-NEXT: entry: -; NEWPM-NEXT: [[TMP0:%.*]] = tail call i8* @allocate(i32 8) -; NEWPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8** -; NEWPM-NEXT: store i8* [[TMP0]], i8** [[TMP1]], align 8 -; NEWPM-NEXT: [[PTR_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i16** -; NEWPM-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_SPILL_ADDR]], align 8 -; NEWPM-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR]] to i8* -; NEWPM-NEXT: [[TMP3:%.*]] = insertvalue { i8*, i8* } { i8* bitcast ({ i8*, i8* } (i8*, i1)* @g.resume.0 to i8*), i8* undef }, i8* [[TMP2]], 1 -; NEWPM-NEXT: ret { i8*, i8* } [[TMP3]] +; CHECK-LABEL: @g( +; CHECK-NEXT: coro.return: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8* @allocate(i32 8) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8** +; CHECK-NEXT: store i8* [[TMP0]], i8** [[TMP1]], align 8 +; CHECK-NEXT: [[PTR_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i16** +; CHECK-NEXT: store i16* [[PTR:%.*]], i16** [[PTR_SPILL_ADDR]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[PTR]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i8*, i8* } { i8* bitcast ({ i8*, i8* } (i8*, i1)* @g.resume.0 to i8*), i8* undef }, i8* [[TMP2]], 1 +; CHECK-NEXT: ret { i8*, i8* } [[TMP3]] ; entry: %id = call token @llvm.coro.id.retcon(i32 8, i32 4, i8* %buffer, i8* bitcast ({ i8*, i8* } (i8*, i1)* @g_prototype to i8*), i8* bitcast (i8* (i32)* @allocate to i8*), i8* bitcast (void (i8*)* @deallocate to i8*)) @@ -123,4 +112,3 @@ declare noalias i8* @allocate(i32 %size) declare void @deallocate(i8* %ptr) declare void @print(i32) - diff --git a/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll b/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll index 0e510389ad8e1..3947ec0561ad9 100644 --- a/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll +++ b/llvm/test/Transforms/Coroutines/coro-spill-after-phi.ll @@ -1,7 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Verifies that we insert spills of PHI instruction _after) all PHI Nodes -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s + +; Verifies that the both phis are stored correctly in the coroutine frame +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i32, i1 } define i8* @f(i1 %n) "coroutine.presplit"="1" { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%f.Frame*)*]* @f.resumers to i8*)) +; CHECK-NEXT: [[ALLOC:%.*]] = call i8* @malloc(i32 32) +; CHECK-NEXT: [[HDL:%.*]] = call noalias nonnull i8* @llvm.coro.begin(token [[ID]], i8* [[ALLOC]]) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast i8* [[HDL]] to %f.Frame* +; CHECK-NEXT: [[RESUME_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** [[RESUME_ADDR]], align 8 +; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 1 +; CHECK-NEXT: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** [[DESTROY_ADDR]], align 8 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[N:%.*]], i32 0, i32 2 +; CHECK-NEXT: [[SPEC_SELECT5:%.*]] = select i1 [[N]], i32 1, i32 3 +; CHECK-NEXT: [[PHI2_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 3 +; CHECK-NEXT: store i32 [[SPEC_SELECT5]], i32* [[PHI2_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[PHI1_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 2 +; CHECK-NEXT: store i32 [[SPEC_SELECT]], i32* [[PHI1_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[INDEX_ADDR4:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR]], i32 0, i32 4 +; CHECK-NEXT: store i1 false, i1* [[INDEX_ADDR4]], align 1 +; CHECK-NEXT: ret i8* [[HDL]] +; entry: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %size = call i32 @llvm.coro.size.i32() @@ -17,7 +41,7 @@ begin: %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %sp1, label %suspend [i8 0, label %resume - i8 1, label %cleanup] + i8 1, label %cleanup] resume: call i32 @print(i32 %phi1) call i32 @print(i32 %phi2) @@ -32,18 +56,6 @@ suspend: ret i8* %hdl } -; Verifies that the both phis are stored correctly in the coroutine frame -; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i32, i32, i1 } -; CHECK-LABEL: @f( -; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr -; CHECK: %phi1 = select i1 %n, i32 0, i32 2 -; CHECK: %phi2 = select i1 %n, i32 1, i32 3 -; CHECK: %phi2.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 -; CHECK: store i32 %phi2, i32* %phi2.spill.addr -; CHECK: %phi1.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 -; CHECK: store i32 %phi1, i32* %phi1.spill.addr -; CHECK: ret i8* %hdl - declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() declare i8 @llvm.coro.suspend(token, i1) diff --git a/llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll b/llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll index 5ad8e07bf4644..e361699c24bc1 100644 --- a/llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll +++ b/llvm/test/Transforms/Coroutines/coro-spill-corobegin.ll @@ -1,5 +1,5 @@ ; Check that we can spills coro.begin from an inlined inner coroutine. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s %g.Frame = type { void (%g.Frame*)*, void (%g.Frame*)*, i32, i1, i32 } diff --git a/llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll b/llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll index 0b8021757c2f4..783621862234b 100644 --- a/llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll +++ b/llvm/test/Transforms/Coroutines/coro-spill-defs-before-corobegin.ll @@ -1,5 +1,5 @@ ; Verifies that phi and invoke definitions before CoroBegin are spilled properly. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s define i8* @f(i1 %n) "coroutine.presplit"="1" personality i32 0 { entry: @@ -51,7 +51,7 @@ lpad: ; CHECK-LABEL: @f( ; CHECK: %alloc = call i8* @malloc(i32 32) ; CHECK-NEXT: %flag = call i1 @check(i8* %alloc) -; CHECK-NEXT: %value_phi = select i1 %flag, i32 0, i32 1 +; CHECK-NEXT: %spec.select = select i1 %flag, i32 0, i32 1 ; CHECK-NEXT: %value_invoke = call i32 @calc() ; CHECK-NEXT: %hdl = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc) @@ -59,7 +59,7 @@ lpad: ; CHECK-NEXT: %value_invoke.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 ; CHECK-NEXT: store i32 %value_invoke, i32* %value_invoke.spill.addr ; CHECK-NEXT: %value_phi.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 -; CHECK-NEXT: store i32 %value_phi, i32* %value_phi.spill.addr +; CHECK-NEXT: store i32 %spec.select, i32* %value_phi.spill.addr declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() diff --git a/llvm/test/Transforms/Coroutines/coro-spill-promise.ll b/llvm/test/Transforms/Coroutines/coro-spill-promise.ll index 6a226b1285af7..2eb3454cf6b7d 100644 --- a/llvm/test/Transforms/Coroutines/coro-spill-promise.ll +++ b/llvm/test/Transforms/Coroutines/coro-spill-promise.ll @@ -1,5 +1,5 @@ ; Check that promise object is reloaded from the correct index of the coro frame. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s %"class.task::promise_type" = type { [64 x i8] } diff --git a/llvm/test/Transforms/Coroutines/coro-split-00.ll b/llvm/test/Transforms/Coroutines/coro-split-00.ll index 114c1d2918562..eac0d177d8a76 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-00.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-00.ll @@ -1,5 +1,5 @@ ; Tests that coro-split pass splits the coroutine into f, f.resume and f.destroy -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f() "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-02.ll b/llvm/test/Transforms/Coroutines/coro-split-02.ll index 88e5eb505d9ec..caaf0e3e7d1a8 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-02.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-02.ll @@ -1,7 +1,7 @@ ; Tests that coro-split can handle the case when a code after coro.suspend uses ; a value produces between coro.save and coro.suspend (%Result.i19) ; and checks whether stray coro.saves are properly removed -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s %"struct.std::coroutine_handle" = type { i8* } %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } @@ -68,4 +68,3 @@ declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 declare i1 @llvm.coro.end(i8*, i1) #3 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4 - diff --git a/llvm/test/Transforms/Coroutines/coro-split-alloc.ll b/llvm/test/Transforms/Coroutines/coro-split-alloc.ll index 0581076ad76fb..09ee4443c2ade 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-alloc.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-alloc.ll @@ -1,5 +1,5 @@ ; Tests that coro-split passes initialized values to coroutine frame allocator. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f(i32 %argument) "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-dbg.ll b/llvm/test/Transforms/Coroutines/coro-split-dbg.ll index c8e1ec121d0d1..b64b0e5a6c902 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-dbg.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-dbg.ll @@ -1,6 +1,6 @@ ; Make sure that coro-split correctly deals with debug information. ; The test here is simply that it does not result in bad IR that will crash opt. -; RUN: opt < %s -passes=coro-split -disable-output +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -disable-output source_filename = "coro.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/Coroutines/coro-split-eh-00.ll b/llvm/test/Transforms/Coroutines/coro-split-eh-00.ll index eb222250824b4..7b9ca3d7ec1fe 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-eh-00.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-eh-00.ll @@ -1,6 +1,6 @@ ; Tests that coro-split removes cleanup code after coro.end in resume functions ; and retains it in the start function. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f(i1 %val) "coroutine.presplit"="1" personality i32 3 { entry: @@ -9,15 +9,15 @@ entry: call void @print(i32 0) br i1 %val, label %resume, label %susp -susp: +susp: %0 = call i8 @llvm.coro.suspend(token none, i1 false) - switch i8 %0, label %suspend [i8 0, label %resume + switch i8 %0, label %suspend [i8 0, label %resume i8 1, label %suspend] resume: invoke void @print(i32 1) to label %suspend unwind label %lpad suspend: - call i1 @llvm.coro.end(i8* %hdl, i1 0) + call i1 @llvm.coro.end(i8* %hdl, i1 0) call void @print(i32 0) ; should not be present in f.resume ret i8* %hdl @@ -78,9 +78,8 @@ declare void @llvm.coro.destroy(i8*) declare token @llvm.coro.id(i32, i8*, i8*, i8*) declare i8* @llvm.coro.alloc(token) declare i8* @llvm.coro.begin(token, i8*) -declare i1 @llvm.coro.end(i8*, i1) +declare i1 @llvm.coro.end(i8*, i1) declare noalias i8* @malloc(i32) declare void @print(i32) declare void @free(i8*) - diff --git a/llvm/test/Transforms/Coroutines/coro-split-eh-01.ll b/llvm/test/Transforms/Coroutines/coro-split-eh-01.ll index 3d5bbf1a48535..f57fe6d3a519a 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-eh-01.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-eh-01.ll @@ -1,6 +1,6 @@ ; Tests that coro-split removes cleanup code after coro.end in resume functions ; and retains it in the start function. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define i8* @f2(i1 %val) "coroutine.presplit"="1" personality i32 4 { entry: @@ -77,4 +77,3 @@ declare i1 @llvm.coro.end(i8*, i1) declare noalias i8* @malloc(i32) declare void @print(i32) declare void @free(i8*) - diff --git a/llvm/test/Transforms/Coroutines/coro-split-hidden.ll b/llvm/test/Transforms/Coroutines/coro-split-hidden.ll index e4bff51fc444e..d4f058f5d3756 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-hidden.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-hidden.ll @@ -1,7 +1,7 @@ ; Tests that coro-split can convert functions with hidden visibility. ; These may be generated by a frontend such as Clang, when inlining with ; '-fvisibility-inlines-hidden'. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define hidden i8* @f() "coroutine.presplit"="1" { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll index 2b832eb77bd76..b069f5d123de0 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail.ll @@ -1,6 +1,6 @@ ; Tests that coro-split will convert coro.resume followed by a suspend to a ; musttail call. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define void @f() #0 { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll index 0e1f0cebbbd71..0b1e3f2dd55b5 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail1.ll @@ -1,6 +1,6 @@ ; Tests that coro-split will convert coro.resume followed by a suspend to a ; musttail call. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define void @f() #0 { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll index 4b3e5020fcd29..08d9cf11a58ae 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail2.ll @@ -1,6 +1,6 @@ ; Tests that coro-split will convert coro.resume followed by a suspend to a ; musttail call. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define void @fakeresume1(i8*) { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll index e59ebe4fde2a2..b6b527a613e91 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-musttail3.ll @@ -1,6 +1,6 @@ ; Tests that coro-split will convert coro.resume followed by a suspend to a ; musttail call. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s define void @f() #0 { entry: diff --git a/llvm/test/Transforms/Coroutines/coro-split-recursive.ll b/llvm/test/Transforms/Coroutines/coro-split-recursive.ll index 0b6909fe4e6ee..43d1c116e4a09 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-recursive.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-recursive.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes='default' -enable-coroutines -S < %s | FileCheck %s +; RUN: opt -passes='function(coro-early),cgscc(coro-split)' -S < %s | FileCheck %s declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll index df125e1fefdd8..d4c33fb93b42b 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-01.ll @@ -1,6 +1,6 @@ ; Tests that coro-split will optimize the lifetime.start maker of each local variable, ; sink them to the places after the suspend block. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s %"struct.std::coroutine_handle" = type { i8* } %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } @@ -67,4 +67,3 @@ declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 declare i1 @llvm.coro.end(i8*, i1) #3 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4 - diff --git a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll index 68e44c53af6ba..ad157f94caddb 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-02.ll @@ -1,6 +1,6 @@ ; Tests that coro-split will optimize the lifetime.start maker of each local variable, ; sink them to the places after the suspend block. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s %"struct.std::coroutine_handle" = type { i8* } %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } @@ -58,7 +58,6 @@ exit: ; CHECK: %[[VAL:testval.+]] = getelementptr inbounds %a.Frame ; CHECK-NOT: call void @llvm.lifetime.start.p0i8(i64 4, i8* %{{.*}}) ; CHECK: %test = load i32, i32* %[[VAL]] -; CHECK-NOT: %test = load i32, i32* %testval declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) declare i1 @llvm.coro.alloc(token) #3 @@ -74,4 +73,3 @@ declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2 declare i1 @llvm.coro.end(i8*, i1) #3 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4 - diff --git a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll index 561df83430249..31913669ff9c8 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-03.ll @@ -1,6 +1,6 @@ ; Corresponding to coro-split-sink-lifetime-01.ll. This file tests that whether the CoroFrame ; pass knows the operand of lifetime.start intrinsic may be GEP as well. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s %"struct.std::coroutine_handle" = type { i8* } %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } diff --git a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll index 5dbde15f495a2..c3be84a202a41 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-sink-lifetime-04.ll @@ -1,6 +1,6 @@ ; Tests that coro-split will optimize the lifetime.start maker of each local variable, ; sink them to the places after the suspend block. -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s %"struct.std::coroutine_handle" = type { i8* } %"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" } diff --git a/llvm/test/Transforms/Coroutines/coro-swifterror.ll b/llvm/test/Transforms/Coroutines/coro-swifterror.ll index e1161c1d2f968..f19b003564912 100644 --- a/llvm/test/Transforms/Coroutines/coro-swifterror.ll +++ b/llvm/test/Transforms/Coroutines/coro-swifterror.ll @@ -4,7 +4,7 @@ target datalayout = "E-p:32:32" define i8* @f(i8* %buffer, i32 %n, i8** swifterror %errorslot) { ; CHECK-LABEL: @f( -; CHECK-NEXT: entry: +; CHECK-NEXT: coro.return: ; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* ; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 ; CHECK-NEXT: tail call void @print(i32 [[N]]) @@ -44,7 +44,7 @@ cleanup: define i8* @g(i8* %buffer, i32 %n) { ; CHECK-LABEL: @g( -; CHECK-NEXT: entry: +; CHECK-NEXT: coro.return: ; CHECK-NEXT: [[TMP0:%.*]] = alloca swifterror i8*, align 4 ; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = bitcast i8* [[BUFFER:%.*]] to i32* ; CHECK-NEXT: store i32 [[N:%.*]], i32* [[N_VAL_SPILL_ADDR]], align 4 diff --git a/llvm/test/Transforms/Coroutines/coro-zero-alloca.ll b/llvm/test/Transforms/Coroutines/coro-zero-alloca.ll index f686309e191bc..da35653bc2306 100644 --- a/llvm/test/Transforms/Coroutines/coro-zero-alloca.ll +++ b/llvm/test/Transforms/Coroutines/coro-zero-alloca.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse' -S | FileCheck %s declare i8* @malloc(i64) declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/no-suspend.ll b/llvm/test/Transforms/Coroutines/no-suspend.ll index 41b09dd861798..3d31396df79a9 100644 --- a/llvm/test/Transforms/Coroutines/no-suspend.ll +++ b/llvm/test/Transforms/Coroutines/no-suspend.ll @@ -1,5 +1,5 @@ ; Test no suspend coroutines -; RUN: opt < %s -passes=coro-split -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(coro-split),simplify-cfg,early-cse,simplify-cfg' -S | FileCheck %s ; Coroutine with no-suspends will turn into: ; diff --git a/llvm/test/Transforms/Coroutines/restart-trigger.ll b/llvm/test/Transforms/Coroutines/restart-trigger.ll index 99731a0d52727..e95bb6f1f415e 100644 --- a/llvm/test/Transforms/Coroutines/restart-trigger.ll +++ b/llvm/test/Transforms/Coroutines/restart-trigger.ll @@ -1,11 +1,14 @@ ; REQUIRES: asserts ; The following tests use the new pass manager, and verify that the coroutine ; passes re-run the CGSCC pipeline. -; RUN: opt < %s -S -passes='default' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s -; RUN: opt < %s -S -passes='default' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s +; RUN: opt < %s -S -passes='default' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck --check-prefix=CHECK-NEWPM %s +; RUN: opt < %s -S -passes='default' -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck --check-prefix=CHECK-NEWPM %s ; CHECK: CoroSplit: Processing coroutine 'f' state: 0 ; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1 +; CHECK-NEWPM: CoroSplit: Processing coroutine 'f' state: 0 +; CHECK-NEWPM-NOT: CoroSplit: Processing coroutine 'f' state: 1 + define void @f() { %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) diff --git a/llvm/test/Transforms/Coroutines/smoketest.ll b/llvm/test/Transforms/Coroutines/smoketest.ll index bd122ff00180c..10c0f6bbd1ced 100644 --- a/llvm/test/Transforms/Coroutines/smoketest.ll +++ b/llvm/test/Transforms/Coroutines/smoketest.ll @@ -10,12 +10,16 @@ ; RUN: opt < %s -disable-output -passes='default' -enable-coroutines \ ; RUN: -debug-pass-manager 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT ; RUN: opt < %s -disable-output -debug-pass-manager \ -; RUN: -passes='function(coro-early),cgscc(coro-split),function(coro-elide,coro-cleanup)' 2>&1 \ +; RUN: -passes='function(coro-early),function(coro-elide),cgscc(coro-split),function(coro-cleanup)' 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT +; note that we run CoroElidePass before CoroSplitPass. This is because CoroElidePass is part of +; function simplification pipeline, which runs before CoroSplitPass. And since @foo is not +; a coroutine, it won't be put back into the CGSCC, and hence won't trigger a CoroElidePass +; after CoroSplitPass. ; CHECK-ALL: CoroEarlyPass -; CHECK-ALL: CoroSplitPass ; CHECK-OPT: CoroElidePass +; CHECK-ALL: CoroSplitPass ; CHECK-ALL: CoroCleanupPass define void @foo() { From 381ded345bdd59edcea9a87f399f67b18be7622a Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 28 Jun 2021 13:24:24 -0700 Subject: [PATCH 328/619] [AMDGPU] Add S_MOV_B64_IMM_PSEUDO for wide constants This is to allow 64 bit constant rematerialization. If a constant is split into two separate moves initializing sub0 and sub1 like now RA cannot rematerizalize a 64 bit register. This gives 10-20% uplift in a set of huge apps heavily using double precession math. Fixes: SWDEV-292645 Differential Revision: https://reviews.llvm.org/D104874 --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 11 ++ llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 + .../Target/AMDGPU/GCNPreRAOptimizations.cpp | 162 ++++++++++++++++++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 24 +++ llvm/lib/Target/AMDGPU/SIInstructions.td | 12 ++ .../AMDGPU/GlobalISel/cvt_f32_ubyte.ll | 16 +- .../AMDGPU/GlobalISel/extractelement.ll | 55 +++--- .../CodeGen/AMDGPU/GlobalISel/floor.f64.ll | 24 +-- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll | 48 ++---- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll | 56 +++--- .../llvm.amdgcn.global.atomic.csub.ll | 6 +- .../CodeGen/AMDGPU/GlobalISel/mubuf-global.ll | 12 +- .../CodeGen/AMDGPU/GlobalISel/roundeven.ll | 2 +- .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 9 +- .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 9 +- .../CodeGen/AMDGPU/GlobalISel/udiv.i64.ll | 9 +- .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 9 +- .../AMDGPU/amdgpu-codegenprepare-idiv.ll | 134 +++++++++++---- .../CodeGen/AMDGPU/combine-sreg64-inits.mir | 98 +++++++++++ llvm/test/CodeGen/AMDGPU/ds_write2.ll | 6 +- llvm/test/CodeGen/AMDGPU/inline-asm.ll | 3 +- .../test/CodeGen/AMDGPU/inline-constraints.ll | 11 +- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 12 +- .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 18 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 5 +- llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll | 2 +- llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 5 +- .../AMDGPU/promote-alloca-vector-to-vector.ll | 6 +- .../CodeGen/AMDGPU/remat-fp64-constants.ll | 45 +++++ llvm/test/CodeGen/AMDGPU/sdiv64.ll | 6 +- llvm/test/CodeGen/AMDGPU/shift-i128.ll | 3 +- llvm/test/CodeGen/AMDGPU/shl.ll | 24 ++- llvm/test/CodeGen/AMDGPU/sopk-compares.ll | 8 +- llvm/test/CodeGen/AMDGPU/srem64.ll | 6 +- llvm/test/CodeGen/AMDGPU/udiv64.ll | 6 +- llvm/test/CodeGen/AMDGPU/urem64.ll | 6 +- .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll | 2 +- 38 files changed, 606 insertions(+), 269 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp create mode 100644 llvm/test/CodeGen/AMDGPU/combine-sreg64-inits.mir create mode 100644 llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index fa3c7e657fc1f..49ea80a6dd671 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -75,6 +75,7 @@ FunctionPass *createAMDGPURewriteOutArgumentsPass(); ModulePass *createAMDGPUReplaceLDSUseWithPointerPass(); ModulePass *createAMDGPULowerModuleLDSPass(); FunctionPass *createSIModeRegisterPass(); +FunctionPass *createGCNPreRAOptimizationsPass(); struct AMDGPUSimplifyLibCallsPass : PassInfoMixin { AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} @@ -348,6 +349,9 @@ extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; void initializeGCNNSAReassignPass(PassRegistry &); extern char &GCNNSAReassignID; +void initializeGCNPreRAOptimizationsPass(PassRegistry &); +extern char &GCNPreRAOptimizationsID; + namespace AMDGPU { enum TargetIndex { TI_CONSTDATA_START, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 82d0f832f6a61..a34449c57b3d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -208,6 +208,11 @@ static cl::opt EnableLowerModuleLDS( cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden); +static cl::opt EnablePreRAOptimizations( + "amdgpu-enable-pre-ra-optimizations", + cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), + cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheAMDGPUTarget()); @@ -275,6 +280,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPUSimplifyLibCallsPass(*PR); initializeAMDGPUPrintfRuntimeBindingPass(*PR); initializeGCNNSAReassignPass(*PR); + initializeGCNPreRAOptimizationsPass(*PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -1191,6 +1197,11 @@ void GCNPassConfig::addOptimizedRegAlloc() { if (OptExecMaskPreRA) insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID); + if (EnablePreRAOptimizations.getNumOccurrences() + ? EnablePreRAOptimizations + : TM->getOptLevel() > CodeGenOpt::Less) + insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID); + // This is not an essential optimization and it has a noticeable impact on // compilation time, so we only enable it from O2. if (TM->getOptLevel() > CodeGenOpt::Less) diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 0e3ea8d313a26..21a6e39781f0e 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -143,6 +143,7 @@ add_llvm_target(AMDGPUCodeGen GCNILPSched.cpp GCNNSAReassign.cpp GCNDPPCombine.cpp + GCNPreRAOptimizations.cpp SIModeRegister.cpp LINK_COMPONENTS diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp new file mode 100644 index 0000000000000..a51399d7da5f8 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp @@ -0,0 +1,162 @@ +//===-- GCNPreRAOptimizations.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass combines split register tuple initialization into a single psuedo: +/// +/// undef %0.sub1:sreg_64 = S_MOV_B32 1 +/// %0.sub0:sreg_64 = S_MOV_B32 2 +/// => +/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 +/// +/// This is to allow rematerialization of a value instead of spilling. It is +/// supposed to be done after register coalescer to allow it to do its job and +/// before actual register allocation to allow rematerialization. +/// +/// Right now the pass only handles 64 bit SGPRs with immediate initializers, +/// although the same shall be possible with other register classes and +/// instructions if necessary. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-pre-ra-optimizations" + +namespace { + +class GCNPreRAOptimizations : public MachineFunctionPass { +private: + const SIInstrInfo *TII; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + + bool processReg(Register Reg); + +public: + static char ID; + + GCNPreRAOptimizations() : MachineFunctionPass(ID) { + initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "AMDGPU Pre-RA optimizations"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, + "AMDGPU Pre-RA optimizations", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", + false, false) + +char GCNPreRAOptimizations::ID = 0; + +char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; + +FunctionPass *llvm::createGCNPreRAOptimizationsPass() { + return new GCNPreRAOptimizations(); +} + +bool GCNPreRAOptimizations::processReg(Register Reg) { + MachineInstr *Def0 = nullptr; + MachineInstr *Def1 = nullptr; + uint64_t Init = 0; + + for (MachineInstr &I : MRI->def_instructions(Reg)) { + if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg || + !I.getOperand(1).isImm() || I.getNumOperands() != 2) + return false; + + switch (I.getOperand(0).getSubReg()) { + default: + return false; + case AMDGPU::sub0: + if (Def0) + return false; + Def0 = &I; + Init |= I.getOperand(1).getImm() & 0xffffffff; + break; + case AMDGPU::sub1: + if (Def1) + return false; + Def1 = &I; + Init |= static_cast(I.getOperand(1).getImm()) << 32; + break; + } + } + + if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) + return false; + + LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 + << " =>\n"); + + if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), + LIS->getInstructionIndex(*Def0))) + std::swap(Def0, Def1); + + LIS->RemoveMachineInstrFromMaps(*Def0); + LIS->RemoveMachineInstrFromMaps(*Def1); + auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), + TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) + .addImm(Init); + + Def0->eraseFromParent(); + Def1->eraseFromParent(); + LIS->InsertMachineInstrInMaps(*NewI); + LIS->removeInterval(Reg); + LIS->createAndComputeVirtRegInterval(Reg); + + LLVM_DEBUG(dbgs() << " " << *NewI); + + return true; +} + +bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + const GCNSubtarget &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + LIS = &getAnalysis(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + bool Changed = false; + + for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); + if (!LIS->hasInterval(Reg)) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) + continue; + Changed |= processReg(Reg); + } + + return Changed; +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4084619240c54..7fd275bd0ade9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1765,6 +1765,30 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandMovDPP64(MI); break; } + case AMDGPU::S_MOV_B64_IMM_PSEUDO: { + const MachineOperand &SrcOp = MI.getOperand(1); + assert(!SrcOp.isFPImm()); + APInt Imm(64, SrcOp.getImm()); + if (Imm.isIntN(32) || isInlineConstant(Imm)) { + MI.setDesc(get(AMDGPU::S_MOV_B64)); + break; + } + + Register Dst = MI.getOperand(0).getReg(); + Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0); + Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1); + + APInt Lo(32, Imm.getLoBits(32).getZExtValue()); + APInt Hi(32, Imm.getHiBits(32).getZExtValue()); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstLo) + .addImm(Lo.getSExtValue()) + .addReg(Dst, RegState::Implicit | RegState::Define); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstHi) + .addImm(Hi.getSExtValue()) + .addReg(Dst, RegState::Implicit | RegState::Define); + MI.eraseFromParent(); + break; + } case AMDGPU::V_SET_INACTIVE_B32: { unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64; unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index ad6d69468ec91..1d086d14a00cc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -111,6 +111,18 @@ def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64> { let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete. } +// 64-bit scalar move immediate instruction. This is used to avoid subregs +// initialization and allow rematerialization. +def S_MOV_B64_IMM_PSEUDO : SPseudoInstSI <(outs SReg_64:$sdst), + (ins i64imm:$src0)> { + let isReMaterializable = 1; + let isAsCheapAsAMove = 1; + let isMoveImm = 1; + let SchedRW = [WriteSALU, Write64Bit]; + let Size = 16; // Needs maximum 2 s_mov_b32 instructions 8 byte long each. + let Uses = []; +} + // Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the // WQM pass processes it. def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll index cbca2a96f1d40..660eb5bd537cc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll @@ -1097,11 +1097,11 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) { ; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3 ; SI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; SI-NEXT: s_mov_b32 s4, 0 ; SI-NEXT: v_and_b32_e32 v3, s6, v3 +; SI-NEXT: s_movk_i32 s5, 0x80 ; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0 -; SI-NEXT: s_mov_b32 s4, 0 -; SI-NEXT: s_movk_i32 s5, 0x80 ; SI-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] ; SI-NEXT: v_and_b32_e32 v1, 1, v0 @@ -1129,11 +1129,11 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) { ; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3 ; VI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; VI-NEXT: s_mov_b32 s4, 0 ; VI-NEXT: v_and_b32_e32 v3, s6, v3 +; VI-NEXT: s_movk_i32 s5, 0x80 ; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0 -; VI-NEXT: s_mov_b32 s4, 0 -; VI-NEXT: s_movk_i32 s5, 0x80 ; VI-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] ; VI-NEXT: v_and_b32_e32 v1, 1, v0 @@ -1165,10 +1165,10 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) { ; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3 ; SI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; SI-NEXT: v_and_b32_e32 v3, s4, v3 -; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0 ; SI-NEXT: s_mov_b32 s4, 0 ; SI-NEXT: s_movk_i32 s5, 0x80 +; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0 ; SI-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] ; SI-NEXT: v_and_b32_e32 v1, 1, v0 @@ -1195,10 +1195,10 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) { ; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3 ; VI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc ; VI-NEXT: v_and_b32_e32 v3, s4, v3 -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0 ; VI-NEXT: s_mov_b32 s4, 0 ; VI-NEXT: s_movk_i32 s5, 0x80 +; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1 +; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0 ; VI-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[2:3] ; VI-NEXT: v_and_b32_e32 v1, 1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index a944adb4375e9..2dedb531bc1bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -2751,9 +2751,9 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 ; GPRIDX-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; GPRIDX-NEXT: s_load_dword s8, s[4:5], 0x8 ; GPRIDX-NEXT: s_mov_b32 s0, 0 +; GPRIDX-NEXT: s_mov_b32 s1, 0x40140000 ; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 ; GPRIDX-NEXT: s_mov_b32 s2, s0 -; GPRIDX-NEXT: s_mov_b32 s1, 0x40140000 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 @@ -2842,9 +2842,9 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 ; MOVREL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; MOVREL-NEXT: s_load_dword s8, s[4:5], 0x8 ; MOVREL-NEXT: s_mov_b32 s0, 0 +; MOVREL-NEXT: s_mov_b32 s1, 0x40140000 ; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 ; MOVREL-NEXT: s_mov_b32 s2, s0 -; MOVREL-NEXT: s_mov_b32 s1, 0x40140000 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) ; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 @@ -2935,9 +2935,9 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 ; GFX10-NEXT: s_load_dword s8, s[4:5], 0x8 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX10-NEXT: s_mov_b32 s2, 0 +; GFX10-NEXT: s_mov_b32 s3, 0x40140000 ; GFX10-NEXT: s_mov_b32 s5, 0x40080000 ; GFX10-NEXT: s_mov_b32 s4, s2 -; GFX10-NEXT: s_mov_b32 s3, 0x40140000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s8, 1 @@ -3837,21 +3837,21 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i3 ; GPRIDX-NEXT: runtime_loader_kernel_symbol = 0 ; GPRIDX-NEXT: .end_amd_kernel_code_t ; GPRIDX-NEXT: ; %bb.0: ; %entry -; GPRIDX-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GPRIDX-NEXT: s_load_dword s6, s[4:5], 0x8 -; GPRIDX-NEXT: s_mov_b32 s0, 0 -; GPRIDX-NEXT: s_mov_b32 s1, 0x40080000 +; GPRIDX-NEXT: s_mov_b32 s2, 0 +; GPRIDX-NEXT: s_mov_b32 s3, 0x40080000 ; GPRIDX-NEXT: v_mov_b32_e32 v2, 0 ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 ; GPRIDX-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 -; GPRIDX-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] +; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; GPRIDX-NEXT: s_cmp_eq_u32 s6, 3 -; GPRIDX-NEXT: s_cselect_b64 s[0:1], 4.0, s[0:1] -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 -; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; GPRIDX-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 +; GPRIDX-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GPRIDX-NEXT: s_endpgm ; ; MOVREL-LABEL: dyn_extract_v4f64_s_s_s: @@ -3924,21 +3924,21 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i3 ; MOVREL-NEXT: runtime_loader_kernel_symbol = 0 ; MOVREL-NEXT: .end_amd_kernel_code_t ; MOVREL-NEXT: ; %bb.0: ; %entry -; MOVREL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; MOVREL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; MOVREL-NEXT: s_load_dword s6, s[4:5], 0x8 -; MOVREL-NEXT: s_mov_b32 s0, 0 -; MOVREL-NEXT: s_mov_b32 s1, 0x40080000 +; MOVREL-NEXT: s_mov_b32 s2, 0 +; MOVREL-NEXT: s_mov_b32 s3, 0x40080000 ; MOVREL-NEXT: s_waitcnt lgkmcnt(0) -; MOVREL-NEXT: v_mov_b32_e32 v2, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s1 ; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 ; MOVREL-NEXT: s_cselect_b64 s[4:5], 2.0, 1.0 ; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 -; MOVREL-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] +; MOVREL-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] ; MOVREL-NEXT: s_cmp_eq_u32 s6, 3 -; MOVREL-NEXT: s_cselect_b64 s[0:1], 4.0, s[0:1] -; MOVREL-NEXT: v_mov_b32_e32 v0, s0 -; MOVREL-NEXT: v_mov_b32_e32 v1, s1 -; MOVREL-NEXT: v_mov_b32_e32 v3, s3 +; MOVREL-NEXT: s_cselect_b64 s[2:3], 4.0, s[2:3] +; MOVREL-NEXT: v_mov_b32_e32 v0, s2 +; MOVREL-NEXT: v_mov_b32_e32 v1, s3 +; MOVREL-NEXT: v_mov_b32_e32 v2, s0 ; MOVREL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; MOVREL-NEXT: s_endpgm ; @@ -4078,8 +4078,7 @@ define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) { ; MOVREL-LABEL: v_extract_v64i32_32: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_movk_i32 s4, 0x80 -; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 ; MOVREL-NEXT: v_mov_b32_e32 v2, s4 ; MOVREL-NEXT: v_mov_b32_e32 v3, s5 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -4112,8 +4111,7 @@ define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) { ; MOVREL-LABEL: v_extract_v64i32_33: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_movk_i32 s4, 0x80 -; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 ; MOVREL-NEXT: v_mov_b32_e32 v2, s4 ; MOVREL-NEXT: v_mov_b32_e32 v3, s5 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -4140,8 +4138,7 @@ define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { ; GPRIDX-LABEL: v_extract_v64i32_37: ; GPRIDX: ; %bb.0: ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_movk_i32 s4, 0x80 -; GPRIDX-NEXT: s_mov_b32 s5, 0 +; GPRIDX-NEXT: s_mov_b64 s[4:5], 0x80 ; GPRIDX-NEXT: v_mov_b32_e32 v2, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v3, s5 ; GPRIDX-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 @@ -4154,8 +4151,7 @@ define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { ; MOVREL-LABEL: v_extract_v64i32_37: ; MOVREL: ; %bb.0: ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_movk_i32 s4, 0x80 -; MOVREL-NEXT: s_mov_b32 s5, 0 +; MOVREL-NEXT: s_mov_b64 s[4:5], 0x80 ; MOVREL-NEXT: v_mov_b32_e32 v2, s4 ; MOVREL-NEXT: v_mov_b32_e32 v3, s5 ; MOVREL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -4171,8 +4167,7 @@ define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_movk_i32 s4, 0x80 -; GFX10-NEXT: s_mov_b32 s5, 0 +; GFX10-NEXT: s_mov_b64 s[4:5], 0x80 ; GFX10-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll index a4f5948da2c69..d7ea1404b1175 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll @@ -7,8 +7,8 @@ define double @v_floor_f64_ieee(double %x) { ; GFX6-LABEL: v_floor_f64_ieee: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] @@ -30,8 +30,8 @@ define double @v_floor_f64_ieee_nnan(double %x) { ; GFX6-LABEL: v_floor_f64_ieee_nnan: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] @@ -50,8 +50,8 @@ define double @v_floor_f64_ieee_fneg(double %x) { ; GFX6-LABEL: v_floor_f64_ieee_fneg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] @@ -74,8 +74,8 @@ define double @v_floor_f64_nonieee(double %x) #1 { ; GFX6-LABEL: v_floor_f64_nonieee: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] @@ -97,8 +97,8 @@ define double @v_floor_f64_nonieee_nnan(double %x) #1 { ; GFX6-LABEL: v_floor_f64_nonieee_nnan: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] @@ -117,8 +117,8 @@ define double @v_floor_f64_non_ieee_fneg(double %x) #1 { ; GFX6-LABEL: v_floor_f64_non_ieee_fneg: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] @@ -141,8 +141,8 @@ define double @v_floor_f64_fabs(double %x) { ; GFX6-LABEL: v_floor_f64_fabs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]| ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] @@ -170,8 +170,8 @@ define double @v_floor_f64_fneg_fabs(double %x) { ; GFX6-LABEL: v_floor_f64_fneg_fabs: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]| ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]| ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] @@ -194,8 +194,8 @@ define double @v_floor_f64_fneg_fabs(double %x) { define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) { ; GFX6-LABEL: s_floor_f64: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3] ; GFX6-NEXT: s_mov_b32 s0, -1 +; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3] ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] @@ -218,8 +218,8 @@ define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) { define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) { ; GFX6-LABEL: s_floor_f64_fneg: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3] ; GFX6-NEXT: s_mov_b32 s0, -1 +; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3] ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] @@ -243,8 +243,8 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) { define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) { ; GFX6-LABEL: s_floor_f64_fabs: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]| ; GFX6-NEXT: s_mov_b32 s0, -1 +; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]| ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] @@ -268,8 +268,8 @@ define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) { define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) { ; GFX6-LABEL: s_floor_f64_fneg_fabs: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]| ; GFX6-NEXT: s_mov_b32 s0, -1 +; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]| ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index cbeb0140efd00..e852f9a4a4c18 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -4703,8 +4703,7 @@ define <2 x i64> @v_fshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { ; GFX6-LABEL: s_fshl_i128: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s10, 0x7f -; GFX6-NEXT: s_mov_b32 s11, 0 +; GFX6-NEXT: s_mov_b64 s[10:11], 0x7f ; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] ; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] ; GFX6-NEXT: s_sub_i32 s9, s12, 64 @@ -4751,8 +4750,7 @@ define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX8-LABEL: s_fshl_i128: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s10, 0x7f -; GFX8-NEXT: s_mov_b32 s11, 0 +; GFX8-NEXT: s_mov_b64 s[10:11], 0x7f ; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] ; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] ; GFX8-NEXT: s_sub_i32 s9, s12, 64 @@ -4799,8 +4797,7 @@ define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX9-LABEL: s_fshl_i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s10, 0x7f -; GFX9-NEXT: s_mov_b32 s11, 0 +; GFX9-NEXT: s_mov_b64 s[10:11], 0x7f ; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] ; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] ; GFX9-NEXT: s_sub_i32 s9, s12, 64 @@ -4847,8 +4844,7 @@ define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX10-LABEL: s_fshl_i128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s10, 0x7f -; GFX10-NEXT: s_mov_b32 s11, 0 +; GFX10-NEXT: s_mov_b64 s[10:11], 0x7f ; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] ; GFX10-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] ; GFX10-NEXT: s_sub_i32 s9, s12, 64 @@ -5321,8 +5317,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { ; GFX6-LABEL: v_fshl_i128_svs: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s6, 0x7f -; GFX6-NEXT: s_mov_b32 s7, 0 +; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX6-NEXT: s_sub_i32 s5, s8, 64 @@ -5379,8 +5374,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX8-LABEL: v_fshl_i128_svs: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s6, 0x7f -; GFX8-NEXT: s_mov_b32 s7, 0 +; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX8-NEXT: s_sub_i32 s5, s8, 64 @@ -5437,8 +5431,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX9-LABEL: v_fshl_i128_svs: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s6, 0x7f -; GFX9-NEXT: s_mov_b32 s7, 0 +; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX9-NEXT: s_sub_i32 s5, s8, 64 @@ -5495,8 +5488,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX10-LABEL: v_fshl_i128_svs: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s6, 0x7f -; GFX10-NEXT: s_mov_b32 s7, 0 +; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] ; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] @@ -5556,8 +5548,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { ; GFX6-LABEL: v_fshl_i128_vss: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s6, 0x7f -; GFX6-NEXT: s_mov_b32 s7, 0 +; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX6-NEXT: s_sub_i32 s6, 64, s8 @@ -5612,8 +5603,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX8-LABEL: v_fshl_i128_vss: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s6, 0x7f -; GFX8-NEXT: s_mov_b32 s7, 0 +; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX8-NEXT: s_sub_i32 s6, 64, s8 @@ -5668,8 +5658,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX9-LABEL: v_fshl_i128_vss: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s6, 0x7f -; GFX9-NEXT: s_mov_b32 s7, 0 +; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX9-NEXT: s_sub_i32 s6, 64, s8 @@ -5724,8 +5713,7 @@ define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX10-LABEL: v_fshl_i128_vss: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s6, 0x7f -; GFX10-NEXT: s_mov_b32 s7, 0 +; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX10-NEXT: s_andn2_b64 s[6:7], s[6:7], s[4:5] ; GFX10-NEXT: s_sub_i32 s4, 64, s8 @@ -5902,8 +5890,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) { define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { ; GFX6-LABEL: s_fshl_v2i128: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s18, 0x7f -; GFX6-NEXT: s_mov_b32 s19, 0 +; GFX6-NEXT: s_mov_b64 s[18:19], 0x7f ; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] ; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] ; GFX6-NEXT: s_sub_i32 s17, s22, 64 @@ -5991,8 +5978,7 @@ define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX8-LABEL: s_fshl_v2i128: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s18, 0x7f -; GFX8-NEXT: s_mov_b32 s19, 0 +; GFX8-NEXT: s_mov_b64 s[18:19], 0x7f ; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] ; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] ; GFX8-NEXT: s_sub_i32 s17, s22, 64 @@ -6080,8 +6066,7 @@ define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX9-LABEL: s_fshl_v2i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s18, 0x7f -; GFX9-NEXT: s_mov_b32 s19, 0 +; GFX9-NEXT: s_mov_b64 s[18:19], 0x7f ; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] ; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] ; GFX9-NEXT: s_sub_i32 s17, s22, 64 @@ -6169,8 +6154,7 @@ define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX10-LABEL: s_fshl_v2i128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s18, 0x7f -; GFX10-NEXT: s_mov_b32 s19, 0 +; GFX10-NEXT: s_mov_b64 s[18:19], 0x7f ; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] ; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] ; GFX10-NEXT: s_sub_i32 s17, s22, 64 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index b8a83aac36044..743fe18c5da97 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -4840,8 +4840,7 @@ define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { ; GFX6-LABEL: s_fshr_i128: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s10, 0x7f -; GFX6-NEXT: s_mov_b32 s11, 0 +; GFX6-NEXT: s_mov_b64 s[10:11], 0x7f ; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] ; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] ; GFX6-NEXT: s_sub_i32 s9, 64, 1 @@ -4888,8 +4887,7 @@ define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX8-LABEL: s_fshr_i128: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s10, 0x7f -; GFX8-NEXT: s_mov_b32 s11, 0 +; GFX8-NEXT: s_mov_b64 s[10:11], 0x7f ; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] ; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] ; GFX8-NEXT: s_sub_i32 s9, 64, 1 @@ -4936,8 +4934,7 @@ define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX9-LABEL: s_fshr_i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s10, 0x7f -; GFX9-NEXT: s_mov_b32 s11, 0 +; GFX9-NEXT: s_mov_b64 s[10:11], 0x7f ; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] ; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] ; GFX9-NEXT: s_sub_i32 s9, 64, 1 @@ -4984,8 +4981,7 @@ define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg ; ; GFX10-LABEL: s_fshr_i128: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s10, 0x7f -; GFX10-NEXT: s_mov_b32 s11, 0 +; GFX10-NEXT: s_mov_b64 s[10:11], 0x7f ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 ; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] ; GFX10-NEXT: s_sub_i32 s13, 64, 1 @@ -5458,8 +5454,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { ; GFX6-LABEL: v_fshr_i128_svs: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s6, 0x7f -; GFX6-NEXT: s_mov_b32 s7, 0 +; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX6-NEXT: s_sub_i32 s5, 64, 1 @@ -5515,8 +5510,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX8-LABEL: v_fshr_i128_svs: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s6, 0x7f -; GFX8-NEXT: s_mov_b32 s7, 0 +; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX8-NEXT: s_sub_i32 s5, 64, 1 @@ -5572,8 +5566,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX9-LABEL: v_fshr_i128_svs: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s6, 0x7f -; GFX9-NEXT: s_mov_b32 s7, 0 +; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX9-NEXT: s_sub_i32 s5, 64, 1 @@ -5629,8 +5622,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i ; ; GFX10-LABEL: v_fshr_i128_svs: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_movk_i32 s6, 0x7f -; GFX10-NEXT: s_mov_b32 s7, 0 +; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 ; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX10-NEXT: s_sub_i32 s9, 64, 1 @@ -5689,8 +5681,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 i define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { ; GFX6-LABEL: v_fshr_i128_vss: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s6, 0x7f -; GFX6-NEXT: s_mov_b32 s7, 0 +; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX6-NEXT: s_sub_i32 s5, 64, 1 @@ -5746,8 +5737,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX8-LABEL: v_fshr_i128_vss: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s6, 0x7f -; GFX8-NEXT: s_mov_b32 s7, 0 +; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX8-NEXT: s_sub_i32 s5, 64, 1 @@ -5803,8 +5793,7 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; ; GFX9-LABEL: v_fshr_i128_vss: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s6, 0x7f -; GFX9-NEXT: s_mov_b32 s7, 0 +; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] ; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] ; GFX9-NEXT: s_sub_i32 s5, 64, 1 @@ -5863,19 +5852,18 @@ define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 i ; GFX10-NEXT: s_sub_i32 s6, 64, 1 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] -; GFX10-NEXT: s_movk_i32 s6, 0x7f -; GFX10-NEXT: s_mov_b32 s7, 0 +; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GFX10-NEXT: s_andn2_b64 s[8:9], s[6:7], s[4:5] ; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] +; GFX10-NEXT: s_sub_i32 s4, 64, s8 ; GFX10-NEXT: v_or_b32_e32 v2, v4, v2 ; GFX10-NEXT: v_or_b32_e32 v3, v5, v3 -; GFX10-NEXT: s_sub_i32 s4, 64, s8 ; GFX10-NEXT: s_sub_i32 s5, s8, 64 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] -; GFX10-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] ; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 +; GFX10-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] ; GFX10-NEXT: s_cmp_eq_u32 s8, 0 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] ; GFX10-NEXT: s_cselect_b32 s7, 1, 0 @@ -6044,8 +6032,7 @@ define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) { define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { ; GFX6-LABEL: s_fshr_v2i128: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s18, 0x7f -; GFX6-NEXT: s_mov_b32 s19, 0 +; GFX6-NEXT: s_mov_b64 s[18:19], 0x7f ; GFX6-NEXT: s_sub_i32 s28, 64, 1 ; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] ; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] @@ -6133,8 +6120,7 @@ define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX8-LABEL: s_fshr_v2i128: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_movk_i32 s18, 0x7f -; GFX8-NEXT: s_mov_b32 s19, 0 +; GFX8-NEXT: s_mov_b64 s[18:19], 0x7f ; GFX8-NEXT: s_sub_i32 s28, 64, 1 ; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] ; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] @@ -6222,8 +6208,7 @@ define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX9-LABEL: s_fshr_v2i128: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s18, 0x7f -; GFX9-NEXT: s_mov_b32 s19, 0 +; GFX9-NEXT: s_mov_b64 s[18:19], 0x7f ; GFX9-NEXT: s_sub_i32 s28, 64, 1 ; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] ; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] @@ -6311,13 +6296,12 @@ define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inr ; ; GFX10-LABEL: s_fshr_v2i128: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b64 s[18:19], 0x7f ; GFX10-NEXT: s_sub_i32 s28, 64, 1 -; GFX10-NEXT: s_movk_i32 s18, 0x7f -; GFX10-NEXT: s_mov_b32 s19, 0 -; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s28 -; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 ; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] ; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] +; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s28 +; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 ; GFX10-NEXT: s_or_b64 s[2:3], s[24:25], s[2:3] ; GFX10-NEXT: s_sub_i32 s23, s16, 64 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll index 4754bd208e52a..1fa75feb9d83b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll @@ -19,8 +19,7 @@ define i32 @global_atomic_csub_offset(i32 addrspace(1)* %ptr, i32 %data) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: s_movk_i32 s4, 0x1000 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_mov_b64 s[4:5], 0x1000 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 @@ -50,8 +49,7 @@ define void @global_atomic_csub_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: s_movk_i32 s4, 0x1000 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_mov_b64 s[4:5], 0x1000 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll index b5332e25dffd0..a0f02fad31b95 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll @@ -336,8 +336,7 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace( define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s4, 0x400 -; GFX6-NEXT: s_mov_b32 s5, 0 +; GFX6-NEXT: s_mov_b64 s[4:5], 0x400 ; GFX6-NEXT: v_mov_b32_e32 v2, s4 ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 @@ -352,8 +351,7 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace( ; ; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_movk_i32 s4, 0x400 -; GFX7-NEXT: s_mov_b32 s5, 0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0x400 ; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 @@ -792,8 +790,7 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspac define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_movk_i32 s4, 0x400 -; GFX6-NEXT: s_mov_b32 s5, 0 +; GFX6-NEXT: s_mov_b64 s[4:5], 0x400 ; GFX6-NEXT: v_mov_b32_e32 v2, s4 ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 ; GFX6-NEXT: v_mov_b32_e32 v3, s5 @@ -808,8 +805,7 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspac ; ; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_movk_i32 s4, 0x400 -; GFX7-NEXT: s_mov_b32 s5, 0 +; GFX7-NEXT: s_mov_b64 s[4:5], 0x400 ; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 ; GFX7-NEXT: v_mov_b32_e32 v3, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll index 6ba213c313829..472b315bc626d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll @@ -568,8 +568,8 @@ define double @v_roundeven_f64_fneg(double %x) { ; GFX6-NEXT: v_mov_b32_e32 v3, 0 ; GFX6-NEXT: v_or_b32_e32 v4, 0x43300000, v4 ; GFX6-NEXT: v_add_f64 v[5:6], -v[0:1], v[3:4] -; GFX6-NEXT: v_mov_b32_e32 v1, v0 ; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: v_mov_b32_e32 v1, v0 ; GFX6-NEXT: s_mov_b32 s5, 0x432fffff ; GFX6-NEXT: v_add_f64 v[3:4], v[5:6], -v[3:4] ; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[1:2]|, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index a899655cfd96e..d82d0ce5637dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -2509,8 +2509,7 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-LABEL: v_sdiv_i64_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 ; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: v_or_b32_e32 v3, v1, v5 @@ -2703,8 +2702,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-LABEL: v_sdiv_v2i64_pow2_shl_denom: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_movk_i32 s6, 0x1000 -; GISEL-NEXT: s_mov_b32 s7, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], 0x1000 ; GISEL-NEXT: v_lshl_b64 v[4:5], s[6:7], v4 ; GISEL-NEXT: v_ashrrev_i32_e32 v10, 31, v1 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v5 @@ -2996,8 +2994,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-LABEL: v_sdiv_v2i64_pow2_shl_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s4, 0x1000 -; CGP-NEXT: s_mov_b32 s5, 0 +; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 ; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 ; CGP-NEXT: v_mov_b32_e32 v7, v1 ; CGP-NEXT: v_mov_b32_e32 v5, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 2ae38a64fe34a..1b7184742e5dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -2473,8 +2473,7 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-LABEL: v_srem_i64_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 ; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: v_or_b32_e32 v3, v1, v5 @@ -2663,8 +2662,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-LABEL: v_srem_v2i64_pow2_shl_denom: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_movk_i32 s6, 0x1000 -; GISEL-NEXT: s_mov_b32 s7, 0 +; GISEL-NEXT: s_mov_b64 s[6:7], 0x1000 ; GISEL-NEXT: v_lshl_b64 v[4:5], s[6:7], v4 ; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v1 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v5 @@ -2952,8 +2950,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-LABEL: v_srem_v2i64_pow2_shl_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_movk_i32 s4, 0x1000 -; CGP-NEXT: s_mov_b32 s5, 0 +; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 ; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 ; CGP-NEXT: v_mov_b32_e32 v7, v1 ; CGP-NEXT: v_mov_b32_e32 v5, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index 4ee838f942b39..82f35daae56c6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -2291,8 +2291,7 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-LABEL: v_udiv_i64_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 ; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 ; CHECK-NEXT: v_or_b32_e32 v3, v1, v5 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 @@ -2470,8 +2469,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_movk_i32 s4, 0x1000 -; GISEL-NEXT: s_mov_b32 s5, 0 +; GISEL-NEXT: s_mov_b64 s[4:5], 0x1000 ; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 ; GISEL-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 @@ -2735,8 +2733,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: v_mov_b32_e32 v5, v0 ; CGP-NEXT: v_mov_b32_e32 v7, v1 -; CGP-NEXT: s_movk_i32 s4, 0x1000 -; CGP-NEXT: s_mov_b32 s5, 0 +; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 ; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 ; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6 ; CGP-NEXT: v_or_b32_e32 v1, v7, v11 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 580dc2f4b81d4..69cb3b956cc25 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -1651,8 +1651,7 @@ define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { ; CHECK-LABEL: v_urem_i64_pow2_shl_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000 ; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 ; CHECK-NEXT: v_or_b32_e32 v3, v1, v5 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 @@ -1827,8 +1826,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_movk_i32 s4, 0x1000 -; GISEL-NEXT: s_mov_b32 s5, 0 +; GISEL-NEXT: s_mov_b64 s[4:5], 0x1000 ; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 ; GISEL-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 @@ -2090,8 +2088,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CGP-NEXT: v_mov_b32_e32 v5, v0 ; CGP-NEXT: v_mov_b32_e32 v7, v1 -; CGP-NEXT: s_movk_i32 s4, 0x1000 -; CGP-NEXT: s_mov_b32 s5, 0 +; CGP-NEXT: s_mov_b64 s[4:5], 0x1000 ; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 ; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6 ; CGP-NEXT: v_or_b32_e32 v1, v7, v11 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index 2f752c2ceff15..c7a8d01fba838 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -67,6 +67,7 @@ define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c @@ -157,6 +158,7 @@ define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c @@ -268,6 +270,7 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c @@ -383,6 +386,7 @@ define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c @@ -464,6 +468,7 @@ define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX6-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc ; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -534,6 +539,7 @@ define amdgpu_kernel void @urem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -612,6 +618,7 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -696,6 +703,7 @@ define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c @@ -768,6 +776,7 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX6-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -835,6 +844,7 @@ define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -912,6 +922,7 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -997,6 +1008,7 @@ define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c @@ -1249,6 +1261,7 @@ define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 @@ -1542,6 +1555,7 @@ define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 @@ -1915,6 +1929,7 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s2, v3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 @@ -2308,6 +2323,7 @@ define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s5, v3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x34 @@ -2567,6 +2583,7 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -2791,6 +2808,7 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -3043,6 +3061,7 @@ define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -3322,6 +3341,7 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -3455,6 +3475,7 @@ define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX6-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -3528,6 +3549,7 @@ define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX6-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -3609,6 +3631,7 @@ define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX6-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -3696,6 +3719,7 @@ define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { ; GFX6-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c @@ -3843,6 +3867,7 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX6-NEXT: buffer_store_short v2, off, s[4:7], 0 offset:4 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v3i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -4023,6 +4048,7 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX6-NEXT: buffer_store_short v2, off, s[4:7], 0 offset:4 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_v3i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -4219,6 +4245,7 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX6-NEXT: buffer_store_short v2, off, s[4:7], 0 offset:4 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_v3i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -4438,6 +4465,7 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x ; GFX6-NEXT: buffer_store_short v2, off, s[4:7], 0 offset:4 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_v3i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -4631,6 +4659,7 @@ define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX6-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v3i15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -4827,6 +4856,7 @@ define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX6-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_v3i15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5043,6 +5073,7 @@ define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX6-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_v3i15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5285,6 +5316,7 @@ define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x ; GFX6-NEXT: v_and_b32_e32 v0, 0x1fff, v1 ; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_v3i15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5394,6 +5426,7 @@ define amdgpu_kernel void @udiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 20, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i32_oddk_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5430,6 +5463,7 @@ define amdgpu_kernel void @udiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i32_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5464,6 +5498,7 @@ define amdgpu_kernel void @udiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5505,6 +5540,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v2i32_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5550,6 +5586,7 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v2i32_mixed_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5694,6 +5731,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v2i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -5776,6 +5814,7 @@ define amdgpu_kernel void @urem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i32_oddk_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5814,6 +5853,7 @@ define amdgpu_kernel void @urem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i32_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5849,6 +5889,7 @@ define amdgpu_kernel void @urem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -5892,6 +5933,7 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_v2i32_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -6025,6 +6067,7 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_v2i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -6100,6 +6143,7 @@ define amdgpu_kernel void @sdiv_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i32_oddk_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -6139,6 +6183,7 @@ define amdgpu_kernel void @sdiv_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i32_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -6202,6 +6247,7 @@ define amdgpu_kernel void @sdiv_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c @@ -6275,6 +6321,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_v2i32_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -6329,6 +6376,7 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(<2 x i32> addrspace(1)* ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: ssdiv_v2i32_mixed_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -6511,6 +6559,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s2, v1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_v2i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 @@ -6610,6 +6659,7 @@ define amdgpu_kernel void @srem_i32_oddk_denom(i32 addrspace(1)* %out, i32 %x) { ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i32_oddk_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -6652,6 +6702,7 @@ define amdgpu_kernel void @srem_i32_pow2k_denom(i32 addrspace(1)* %out, i32 %x) ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i32_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -6713,6 +6764,7 @@ define amdgpu_kernel void @srem_i32_pow2_shl_denom(i32 addrspace(1)* %out, i32 % ; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c @@ -6787,6 +6839,7 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(<2 x i32> addrspace(1)* %out, ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_v2i32_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -6960,6 +7013,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, s0, v1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_v2i32_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -7159,6 +7213,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i64_oddk_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f176a73 @@ -7303,6 +7358,7 @@ define amdgpu_kernel void @udiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i64_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -7340,6 +7396,7 @@ define amdgpu_kernel void @udiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 @@ -7384,6 +7441,7 @@ define amdgpu_kernel void @udiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX6-NEXT: v_mov_b32_e32 v3, s3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v2i64_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -7525,6 +7583,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: v_mov_b32_e32 v1, s3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v2i64_mixed_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f800000 @@ -7672,6 +7731,7 @@ define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_mov_b32_e32 v3, s3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: udiv_v2i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -7824,6 +7884,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i64_oddk_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f1761f8 @@ -7967,6 +8028,7 @@ define amdgpu_kernel void @urem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i64_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -7997,8 +8059,7 @@ define amdgpu_kernel void @urem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 ; GFX6-NEXT: s_mov_b32 s1, s5 -; GFX6-NEXT: s_mov_b32 s5, 0 -; GFX6-NEXT: s_movk_i32 s4, 0x1000 +; GFX6-NEXT: s_mov_b64 s[4:5], 0x1000 ; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s8 ; GFX6-NEXT: s_add_u32 s4, s4, -1 ; GFX6-NEXT: s_addc_u32 s5, s5, -1 @@ -8007,12 +8068,12 @@ define amdgpu_kernel void @urem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_movk_i32 s0, 0x1000 +; GFX9-NEXT: s_mov_b64 s[0:1], 0x1000 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 @@ -8056,6 +8117,7 @@ define amdgpu_kernel void @urem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX6-NEXT: v_mov_b32_e32 v3, v1 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_v2i64_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -8094,8 +8156,7 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x11 -; GFX6-NEXT: s_mov_b32 s13, 0 -; GFX6-NEXT: s_movk_i32 s12, 0x1000 +; GFX6-NEXT: s_mov_b64 s[12:13], 0x1000 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -8113,13 +8174,13 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_mov_b32_e32 v3, s3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: urem_v2i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 ; GFX9-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x44 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_movk_i32 s0, 0x1000 +; GFX9-NEXT: s_mov_b64 s[0:1], 0x1000 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 @@ -8267,6 +8328,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i64_oddk_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f800000 @@ -8410,6 +8472,7 @@ define amdgpu_kernel void @sdiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i64_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -8439,10 +8502,10 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-LABEL: sdiv_i64_pow2_shl_denom: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s4, s[0:1], 0xd -; GFX6-NEXT: s_mov_b32 s3, 0 -; GFX6-NEXT: s_movk_i32 s2, 0x1000 +; GFX6-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 ; GFX6-NEXT: s_ashr_i32 s12, s3, 31 @@ -8458,7 +8521,6 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: s_mov_b32 s15, s14 -; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 @@ -8576,11 +8638,11 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0 -; GFX9-NEXT: s_movk_i32 s2, 0x1000 +; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 @@ -8753,6 +8815,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX6-NEXT: v_mov_b32_e32 v3, s3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_v2i64_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -8917,6 +8980,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(<2 x i64> addrspace(1)* ; GFX6-NEXT: v_mov_b32_e32 v1, s3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: ssdiv_v2i64_mixed_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0x457ff000 @@ -9064,10 +9128,10 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-LABEL: sdiv_v2i64_pow2_shl_denom: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x11 -; GFX6-NEXT: s_mov_b32 s3, 0 -; GFX6-NEXT: s_movk_i32 s2, 0x1000 +; GFX6-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX6-NEXT: s_mov_b32 s18, 0x4f800000 ; GFX6-NEXT: s_mov_b32 s19, 0x5f7ffffc +; GFX6-NEXT: s_mov_b32 s20, 0x2f800000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b64 s[12:13], s[2:3], s6 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 @@ -9078,12 +9142,11 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: s_xor_b64 s[14:15], s[2:3], s[16:17] ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s14 ; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s15 -; GFX6-NEXT: s_mov_b32 s20, 0x2f800000 ; GFX6-NEXT: s_mov_b32 s21, 0xcf800000 ; GFX6-NEXT: s_sub_u32 s6, 0, s14 +; GFX6-NEXT: s_subb_u32 s7, 0, s15 ; GFX6-NEXT: v_mac_f32_e32 v0, s18, v1 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 -; GFX6-NEXT: s_subb_u32 s7, 0, s15 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd ; GFX6-NEXT: v_mul_f32_e32 v0, s19, v0 @@ -9330,13 +9393,14 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: sdiv_v2i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x44 -; GFX9-NEXT: s_mov_b32 s3, 0 -; GFX9-NEXT: s_movk_i32 s2, 0x1000 +; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX9-NEXT: s_mov_b32 s18, 0x4f800000 ; GFX9-NEXT: s_mov_b32 s19, 0x5f7ffffc +; GFX9-NEXT: s_mov_b32 s20, 0x2f800000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s6 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 @@ -9347,12 +9411,11 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: s_xor_b64 s[10:11], s[2:3], s[12:13] ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s10 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s11 -; GFX9-NEXT: s_mov_b32 s20, 0x2f800000 ; GFX9-NEXT: s_mov_b32 s21, 0xcf800000 ; GFX9-NEXT: s_sub_u32 s14, 0, s10 +; GFX9-NEXT: s_subb_u32 s4, 0, s11 ; GFX9-NEXT: v_mac_f32_e32 v0, s18, v1 ; GFX9-NEXT: v_rcp_f32_e32 v0, v0 -; GFX9-NEXT: s_subb_u32 s4, 0, s11 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mul_f32_e32 v0, s19, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, s20, v0 @@ -9727,6 +9790,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(i64 addrspace(1)* %out, i64 %x) { ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i64_oddk_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4f800000 @@ -9870,6 +9934,7 @@ define amdgpu_kernel void @srem_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x) ; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i64_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -9901,10 +9966,10 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-LABEL: srem_i64_pow2_shl_denom: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s4, s[0:1], 0xd -; GFX6-NEXT: s_mov_b32 s3, 0 -; GFX6-NEXT: s_movk_i32 s2, 0x1000 +; GFX6-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 ; GFX6-NEXT: s_ashr_i32 s4, s3, 31 @@ -9920,15 +9985,14 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: s_mov_b32 s15, s14 -; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s4, s8 +; GFX6-NEXT: s_mov_b32 s5, s9 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 ; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: s_mov_b32 s5, s9 ; GFX6-NEXT: v_mul_hi_u32 v3, s2, v0 ; GFX6-NEXT: v_mul_lo_u32 v2, s2, v1 ; GFX6-NEXT: v_mul_lo_u32 v5, s3, v0 @@ -10036,11 +10100,11 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(i64 addrspace(1)* %out, i64 % ; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0 -; GFX9-NEXT: s_movk_i32 s2, 0x1000 +; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 @@ -10216,6 +10280,7 @@ define amdgpu_kernel void @srem_v2i64_pow2k_denom(<2 x i64> addrspace(1)* %out, ; GFX6-NEXT: v_mov_b32_e32 v3, s3 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_v2i64_pow2k_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -10265,10 +10330,10 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-LABEL: srem_v2i64_pow2_shl_denom: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x11 -; GFX6-NEXT: s_mov_b32 s3, 0 -; GFX6-NEXT: s_movk_i32 s2, 0x1000 +; GFX6-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX6-NEXT: s_mov_b32 s18, 0x4f800000 ; GFX6-NEXT: s_mov_b32 s19, 0x5f7ffffc +; GFX6-NEXT: s_mov_b32 s20, 0x2f800000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b64 s[14:15], s[2:3], s6 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 @@ -10279,12 +10344,11 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: s_xor_b64 s[16:17], s[2:3], s[4:5] ; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s16 ; GFX6-NEXT: v_cvt_f32_u32_e32 v1, s17 -; GFX6-NEXT: s_mov_b32 s20, 0x2f800000 ; GFX6-NEXT: s_mov_b32 s21, 0xcf800000 ; GFX6-NEXT: s_sub_u32 s6, 0, s16 +; GFX6-NEXT: s_subb_u32 s7, 0, s17 ; GFX6-NEXT: v_mac_f32_e32 v0, s18, v1 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 -; GFX6-NEXT: s_subb_u32 s7, 0, s17 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; GFX6-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd ; GFX6-NEXT: v_mul_f32_e32 v0, s19, v0 @@ -10527,13 +10591,14 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; GFX6-NEXT: s_endpgm +; ; GFX9-LABEL: srem_v2i64_pow2_shl_denom: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x44 -; GFX9-NEXT: s_mov_b32 s3, 0 -; GFX9-NEXT: s_movk_i32 s2, 0x1000 +; GFX9-NEXT: s_mov_b64 s[2:3], 0x1000 ; GFX9-NEXT: s_mov_b32 s16, 0x4f800000 ; GFX9-NEXT: s_mov_b32 s17, 0x5f7ffffc +; GFX9-NEXT: s_mov_b32 s18, 0x2f800000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b64 s[12:13], s[2:3], s6 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 @@ -10544,12 +10609,11 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou ; GFX9-NEXT: s_xor_b64 s[14:15], s[2:3], s[4:5] ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s14 ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s15 -; GFX9-NEXT: s_mov_b32 s18, 0x2f800000 ; GFX9-NEXT: s_mov_b32 s19, 0xcf800000 ; GFX9-NEXT: s_sub_u32 s4, 0, s14 +; GFX9-NEXT: s_subb_u32 s5, 0, s15 ; GFX9-NEXT: v_mac_f32_e32 v0, s16, v1 ; GFX9-NEXT: v_rcp_f32_e32 v0, v0 -; GFX9-NEXT: s_subb_u32 s5, 0, s15 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x34 ; GFX9-NEXT: v_mul_f32_e32 v0, s17, v0 diff --git a/llvm/test/CodeGen/AMDGPU/combine-sreg64-inits.mir b/llvm/test/CodeGen/AMDGPU/combine-sreg64-inits.mir new file mode 100644 index 0000000000000..5f49f6d4ea8fc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/combine-sreg64-inits.mir @@ -0,0 +1,98 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=liveintervals,amdgpu-pre-ra-optimizations %s -o - | FileCheck -check-prefix=GCN %s + +--- +# GCN-LABEL: name: combine_sreg64_inits +# GCN: %0:sgpr_64 = S_MOV_B64_IMM_PSEUDO 8589934593 +# GCN: S_NOP 0 +name: combine_sreg64_inits +tracksRegLiveness: true +body: | + bb.0: + undef %0.sub0:sgpr_64 = S_MOV_B32 1 + S_NOP 0 + %0.sub1:sgpr_64 = S_MOV_B32 2 +... +--- +# GCN-LABEL: name: combine_sreg64_inits_swap +# GCN: %0:sgpr_64 = S_MOV_B64_IMM_PSEUDO 8589934593 +# GCN: S_NOP 0 +name: combine_sreg64_inits_swap +tracksRegLiveness: true +body: | + bb.0: + undef %0.sub1:sgpr_64 = S_MOV_B32 2 + S_NOP 0 + %0.sub0:sgpr_64 = S_MOV_B32 1 +... +--- +# GCN-LABEL: name: sreg64_inits_different_blocks +# GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1 +# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 +name: sreg64_inits_different_blocks +tracksRegLiveness: true +body: | + bb.0: + undef %0.sub0:sgpr_64 = S_MOV_B32 1 + + bb.1: + %0.sub1:sgpr_64 = S_MOV_B32 2 +... +--- +# GCN-LABEL: name: sreg64_inits_two_defs_sub1 +# GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1 +# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 +# GCN: %0.sub1:sgpr_64 = S_MOV_B32 3 +name: sreg64_inits_two_defs_sub1 +tracksRegLiveness: true +body: | + bb.0: + undef %0.sub0:sgpr_64 = S_MOV_B32 1 + %0.sub1:sgpr_64 = S_MOV_B32 2 + %0.sub1:sgpr_64 = S_MOV_B32 3 +... +--- +# GCN-LABEL: name: sreg64_inits_two_defs_sub0 +# GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1 +# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 +# GCN: %0.sub0:sgpr_64 = S_MOV_B32 3 +name: sreg64_inits_two_defs_sub0 +tracksRegLiveness: true +body: | + bb.0: + undef %0.sub0:sgpr_64 = S_MOV_B32 1 + %0.sub1:sgpr_64 = S_MOV_B32 2 + %0.sub0:sgpr_64 = S_MOV_B32 3 +... +--- +# GCN-LABEL: name: sreg64_inits_full_def +# GCN: undef %1.sub0:sgpr_64 = S_MOV_B32 1 +# GCN: %0:sgpr_64 = S_MOV_B64 3 +name: sreg64_inits_full_def +tracksRegLiveness: true +body: | + bb.0: + undef %0.sub0:sgpr_64 = S_MOV_B32 1 + %0:sgpr_64 = S_MOV_B64 3 +... +--- +# GCN-LABEL: name: sreg64_inits_imp_use +# GCN: %0.sub0:sgpr_64 = S_MOV_B32 1, implicit $m0 +# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 +name: sreg64_inits_imp_use +tracksRegLiveness: true +body: | + bb.0: + undef %0.sub0:sgpr_64 = S_MOV_B32 1, implicit $m0 + %0.sub1:sgpr_64 = S_MOV_B32 2 +... +--- +# GCN-LABEL: name: sreg64_inits_imp_def +# GCN: %0.sub0:sgpr_64 = S_MOV_B32 1, implicit-def $scc +# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 +name: sreg64_inits_imp_def +tracksRegLiveness: true +body: | + bb.0: + undef %0.sub0:sgpr_64 = S_MOV_B32 1, implicit-def $scc + %0.sub1:sgpr_64 = S_MOV_B32 2 +... diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll index 0630e1043575b..7fceb602a0ba9 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -844,8 +844,7 @@ define amdgpu_kernel void @store_misaligned64_constant_offsets() { define amdgpu_kernel void @store_misaligned64_constant_large_offsets() { ; CI-LABEL: store_misaligned64_constant_large_offsets: ; CI: ; %bb.0: -; CI-NEXT: s_movk_i32 s0, 0x7b -; CI-NEXT: s_mov_b32 s1, 0 +; CI-NEXT: s_mov_b64 s[0:1], 0x7b ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v2, 0 ; CI-NEXT: v_mov_b32_e32 v1, s1 @@ -856,8 +855,7 @@ define amdgpu_kernel void @store_misaligned64_constant_large_offsets() { ; ; GFX9-LABEL: store_misaligned64_constant_large_offsets: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_movk_i32 s0, 0x7b -; GFX9-NEXT: s_mov_b32 s1, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], 0x7b ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.ll index 4aa5d518e11b0..98ee7d47f2f8e 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.ll @@ -205,8 +205,7 @@ entry: ; FIXME: Should not have intermediate sgprs ; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr: -; CHECK-DAG: s_mov_b32 s1, 0 -; CHECK-DAG: s_mov_b32 s0, 0x1e240 +; CHECK: s_mov_b64 s[0:1], 0x1e240 ; CHECK: v_mov_b32_e32 v0, s0 ; CHECK: v_mov_b32_e32 v1, s1 ; CHECK: use v[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll index dc1d442959616..06fa8ee65ddc6 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll @@ -59,20 +59,17 @@ define amdgpu_kernel void @inline_sreg_constraint_imm_f32() { ret void } -; FIXME: Should be able to use s_mov_b64 ; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64: -; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4{{$}} -; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}} -; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +; GCN: s_mov_b64 [[REG:s\[[0-9:]+\]]], -4{{$}} +; GCN: ; use [[REG]] define amdgpu_kernel void @inline_sreg_constraint_imm_i64() { tail call void asm sideeffect "; use $0", "s"(i64 -4) ret void } ; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f64: -; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}} -; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0x3ff00000{{$}} -; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +; GCN: s_mov_b64 [[REG:s\[[0-9:]+\]]], 1.0{{$}} +; GCN: ; use [[REG]] define amdgpu_kernel void @inline_sreg_constraint_imm_f64() { tail call void asm sideeffect "; use $0", "s"(double 1.0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index 558923f5cc050..30a1b31f11021 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -900,12 +900,11 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* % ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x2 ; SI-NEXT: s_load_dword s4, s[4:5], 0x4 -; SI-NEXT: s_mov_b32 s5, 0 ; SI-NEXT: s_mov_b32 s3, 0x100f000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshl_b32 s8, s4, 4 -; SI-NEXT: s_mov_b32 s4, 0xffff +; SI-NEXT: s_mov_b64 s[4:5], 0xffff ; SI-NEXT: s_lshl_b64 s[4:5], s[4:5], s8 ; SI-NEXT: s_mov_b32 s8, 0x50005 ; SI-NEXT: s_and_b32 s9, s5, s8 @@ -923,12 +922,11 @@ define amdgpu_kernel void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* % ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8 ; VI-NEXT: s_load_dword s4, s[4:5], 0x10 -; VI-NEXT: s_mov_b32 s5, 0 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s8, s4, 4 -; VI-NEXT: s_mov_b32 s4, 0xffff +; VI-NEXT: s_mov_b64 s[4:5], 0xffff ; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s8 ; VI-NEXT: s_mov_b32 s8, 0x50005 ; VI-NEXT: s_mov_b32 s9, s8 @@ -1075,14 +1073,13 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* % ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 ; SI-NEXT: s_load_dword s6, s[4:5], 0x4 -; SI-NEXT: s_mov_b32 s7, 0 ; SI-NEXT: s_mov_b32 s3, 0x100f000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_load_dwordx2 s[4:5], s[10:11], 0x0 ; SI-NEXT: s_mov_b32 s0, s8 ; SI-NEXT: s_lshl_b32 s8, s6, 3 -; SI-NEXT: s_mov_b32 s6, 0xffff +; SI-NEXT: s_mov_b64 s[6:7], 0xffff ; SI-NEXT: s_lshl_b64 s[6:7], s[6:7], s8 ; SI-NEXT: s_mov_b32 s8, 0x5050505 ; SI-NEXT: s_mov_b32 s1, s9 @@ -1100,14 +1097,13 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* % ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 ; VI-NEXT: s_load_dword s6, s[4:5], 0x10 -; VI-NEXT: s_mov_b32 s7, 0 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_load_dwordx2 s[4:5], s[10:11], 0x0 ; VI-NEXT: s_mov_b32 s0, s8 ; VI-NEXT: s_lshl_b32 s8, s6, 3 -; VI-NEXT: s_mov_b32 s6, 0xffff +; VI-NEXT: s_mov_b64 s[6:7], 0xffff ; VI-NEXT: s_lshl_b64 s[6:7], s[6:7], s8 ; VI-NEXT: s_mov_b32 s8, 0x5050505 ; VI-NEXT: s_mov_b32 s1, s9 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 3601886edc46f..3b545d2d5faa1 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -1584,8 +1584,7 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] -; GFX9-NEXT: s_mov_b32 s3, 0 -; GFX9-NEXT: s_mov_b32 s2, 0xffff +; GFX9-NEXT: s_mov_b64 s[2:3], 0xffff ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3] ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s6, s6 @@ -1607,9 +1606,8 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac ; VI-NEXT: flat_load_dword v4, v[0:1] glc ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; VI-NEXT: s_mov_b32 s2, 0xffff +; VI-NEXT: s_mov_b64 s[2:3], 0xffff ; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: s_mov_b32 s3, 0 ; VI-NEXT: s_and_b32 s1, s4, s2 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; VI-NEXT: s_lshl_b32 s0, s1, 16 @@ -1635,8 +1633,7 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac ; CI-NEXT: flat_load_dword v4, v[0:1] glc ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; CI-NEXT: s_mov_b32 s3, 0 -; CI-NEXT: s_mov_b32 s2, 0xffff +; CI-NEXT: s_mov_b64 s[2:3], 0xffff ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: s_lshl_b32 s1, s4, 16 ; CI-NEXT: s_and_b32 s4, s4, s2 @@ -1672,8 +1669,7 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] ; GFX9-NEXT: s_pack_ll_b32_b16 s5, s6, s6 -; GFX9-NEXT: s_mov_b32 s3, 0 -; GFX9-NEXT: s_mov_b32 s2, 0xffff +; GFX9-NEXT: s_mov_b64 s[2:3], 0xffff ; GFX9-NEXT: s_lshl_b32 s4, s7, 4 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 ; GFX9-NEXT: v_mov_b32_e32 v3, s5 @@ -1694,9 +1690,8 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; VI-NEXT: s_mov_b32 s2, 0xffff +; VI-NEXT: s_mov_b64 s[2:3], 0xffff ; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: s_mov_b32 s3, 0 ; VI-NEXT: s_lshl_b32 s1, s5, 4 ; VI-NEXT: s_and_b32 s4, s4, s2 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 @@ -1722,10 +1717,9 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa ; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v2 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; CI-NEXT: s_mov_b32 s2, 0xffff +; CI-NEXT: s_mov_b64 s[2:3], 0xffff ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: s_and_b32 s6, s4, s2 -; CI-NEXT: s_mov_b32 s3, 0 ; CI-NEXT: s_lshl_b32 s1, s5, 4 ; CI-NEXT: s_lshl_b32 s4, s4, 16 ; CI-NEXT: v_add_i32_e32 v2, vcc, s0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 49ffd8d9fce27..1f1609c8b68db 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -7,7 +7,7 @@ ; RUN: llc -O1 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure < %s 2>&1 \ ; RUN: | grep -v 'Verify generated machine code' | FileCheck -check-prefix=GCN-O1 %s ; RUN: llc -O1 -mtriple=amdgcn--amdhsa -disable-verify -amdgpu-scalar-ir-passes -amdgpu-sdwa-peephole \ -; RUN: -amdgpu-load-store-vectorizer -debug-pass=Structure < %s 2>&1 \ +; RUN: -amdgpu-load-store-vectorizer -amdgpu-enable-pre-ra-optimizations -debug-pass=Structure < %s 2>&1 \ ; RUN: | grep -v 'Verify generated machine code' | FileCheck -check-prefix=GCN-O1-OPTS %s ; RUN: llc -O2 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure < %s 2>&1 \ ; RUN: | grep -v 'Verify generated machine code' | FileCheck -check-prefix=GCN-O2 %s @@ -619,6 +619,7 @@ ; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction ; GCN-O1-OPTS-NEXT: Simple Register Coalescing ; GCN-O1-OPTS-NEXT: Rename Disconnected Subregister Components +; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations ; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction ; GCN-O1-OPTS-NEXT: SI Whole Quad Mode @@ -899,6 +900,7 @@ ; GCN-O2-NEXT: Machine Natural Loop Construction ; GCN-O2-NEXT: Simple Register Coalescing ; GCN-O2-NEXT: Rename Disconnected Subregister Components +; GCN-O2-NEXT: AMDGPU Pre-RA optimizations ; GCN-O2-NEXT: Machine Instruction Scheduler ; GCN-O2-NEXT: MachinePostDominator Tree Construction ; GCN-O2-NEXT: SI Whole Quad Mode @@ -1193,6 +1195,7 @@ ; GCN-O3-NEXT: Machine Natural Loop Construction ; GCN-O3-NEXT: Simple Register Coalescing ; GCN-O3-NEXT: Rename Disconnected Subregister Components +; GCN-O3-NEXT: AMDGPU Pre-RA optimizations ; GCN-O3-NEXT: Machine Instruction Scheduler ; GCN-O3-NEXT: MachinePostDominator Tree Construction ; GCN-O3-NEXT: SI Whole Quad Mode diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll index f0fa9382d00cc..62c9ab28bf7da 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -77,9 +77,9 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 s[0:1], s[6:7] ; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_mov_b32 s0, -1 ; SI-NEXT: s_movk_i32 s7, 0xfc01 ; SI-NEXT: s_mov_b32 s1, 0xfffff -; SI-NEXT: s_mov_b32 s0, -1 ; SI-NEXT: s_brev_b32 s6, -2 ; SI-NEXT: v_mov_b32_e32 v8, 0x3ff00000 ; SI-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll index 14d2feccea61d..91f676506a836 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll @@ -92,9 +92,8 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(<2 x float> addrspace(1)* %a) { ; GCN-LABEL: {{^}}fadd_v2_v_lit_hi0: ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} -; GFX90A-DAG: s_mov_b32 s[[HI:[0-9]+]], 0 -; GFX90A-DAG: s_mov_b32 s[[LO:[0-9]+]], 1.0 -; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s{{\[}}[[LO]]:[[HI]]]{{$}} +; GFX90A-DAG: s_mov_b64 [[K:s\[[0-9:]+\]]], 0x3f800000 +; GFX90A: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], [[K]] define amdgpu_kernel void @fadd_v2_v_lit_hi0(<2 x float> addrspace(1)* %a) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i32 %id diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll index ca41899b055e1..63beb537fd4f8 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll @@ -103,8 +103,7 @@ entry: ; OPT-LABEL: define amdgpu_kernel void @half4_alloca_load4 ; GCN-NOT: buffer_ -; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0 -; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0xffff +; GCN: s_mov_b64 s[{{[0-9:]+}}], 0xffff ; OPT: %gep = getelementptr inbounds <4 x half>, <4 x half> addrspace(5)* %alloca, i32 0, i32 %sel2 ; OPT: %0 = load <4 x half>, <4 x half> addrspace(5)* %alloca @@ -163,8 +162,7 @@ entry: ; OPT-LABEL: define amdgpu_kernel void @short4_alloca_load4 ; GCN-NOT: buffer_ -; GCN-DAG: s_mov_b32 s[[SH:[0-9]+]], 0 -; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0xffff +; GCN: s_mov_b64 s[{{[0-9:]+}}], 0xffff ; OPT: %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(5)* %alloca, i32 0, i32 %sel2 ; OPT: %0 = load <4 x i16>, <4 x i16> addrspace(5)* %alloca diff --git a/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll b/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll new file mode 100644 index 0000000000000..c7961b195542b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs --stress-regalloc=10 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs --stress-regalloc=10 < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test_remat_sgpr: +; GCN-NOT: v_writelane_b32 +; GCN: {{^}}[[LOOP:BB[0-9_]+]]: +; GCN-COUNT-6: s_mov_b32 s{{[0-9]+}}, 0x +; GCN-NOT: v_writelane_b32 +; GCN: s_cbranch_{{[^ ]+}} [[LOOP]] +; GCN: .sgpr_spill_count: 0 +define amdgpu_kernel void @test_remat_sgpr(double addrspace(1)* %arg, double addrspace(1)* %arg1) { +bb: + %i = tail call i32 @llvm.amdgcn.workitem.id.x() + br label %bb3 + +bb2: ; preds = %bb3 + ret void + +bb3: ; preds = %bb3, %bb + %i4 = phi i32 [ 0, %bb ], [ %i22, %bb3 ] + %i5 = add nuw nsw i32 %i4, %i + %i6 = zext i32 %i5 to i64 + %i7 = getelementptr inbounds double, double addrspace(1)* %arg, i64 %i6 + %i8 = load double, double addrspace(1)* %i7, align 8 + %i9 = fadd double %i8, 0x3EFC01997CC9E6B0 + %i10 = tail call double @llvm.fma.f64(double %i8, double %i9, double 0x3FBE25E43ABE935A) + %i11 = tail call double @llvm.fma.f64(double %i10, double %i9, double 0x3FC110EF47E6C9C2) + %i12 = tail call double @llvm.fma.f64(double %i11, double %i9, double 0x3FC3B13BCFA74449) + %i13 = tail call double @llvm.fma.f64(double %i12, double %i9, double 0x3FC745D171BF3C30) + %i14 = tail call double @llvm.fma.f64(double %i13, double %i9, double 0x3FCC71C71C7792CE) + %i15 = tail call double @llvm.fma.f64(double %i14, double %i9, double 0x3FD24924924920DA) + %i16 = tail call double @llvm.fma.f64(double %i15, double %i9, double 0x3FD999999999999C) + %i17 = tail call double @llvm.fma.f64(double %i16, double %i9, double 0x3FD899999999899C) + %i18 = tail call double @llvm.fma.f64(double %i17, double %i9, double 0x3FD799999999799C) + %i19 = tail call double @llvm.fma.f64(double %i18, double %i9, double 0x3FD699999999699C) + %i20 = tail call double @llvm.fma.f64(double %i19, double %i9, double 0x3FD599999999599C) + %i21 = getelementptr inbounds double, double addrspace(1)* %arg1, i64 %i6 + store double %i19, double addrspace(1)* %i21, align 8 + %i22 = add nuw nsw i32 %i4, 1 + %i23 = icmp eq i32 %i22, 1024 + br i1 %i23, label %bb2, label %bb3 +} + +declare double @llvm.fma.f64(double, double, double) +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index 607ca5727eb0d..50b1ceac35ec2 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -1698,7 +1698,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v5, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[4:5] -; GCN-IR-NEXT: s_mov_b32 s8, 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_mov_b32_e32 v6, s8 ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] @@ -1706,7 +1706,6 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_cndmask_b32_e64 v6, v6, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: v_mov_b32_e32 v3, v2 -; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, v11 ; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] @@ -1724,8 +1723,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_mov_b32 s5, 0 -; GCN-IR-NEXT: s_mov_b32 s4, 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_lshr_b64 v[12:13], s[4:5], v8 ; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, -1, v0 ; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, -1, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll index f2077aa2a1ad0..acadd3b5da444 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll @@ -146,8 +146,7 @@ define i128 @v_lshr_i128_kv(i128 %rhs) { ; GCN-LABEL: v_lshr_i128_kv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_movk_i32 s4, 0x41 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: s_mov_b64 s[4:5], 0x41 ; GCN-NEXT: v_lshr_b64 v[1:2], s[4:5], v0 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll index 58f73310967bb..b62a21811426e 100644 --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -1248,8 +1248,8 @@ define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrsp ; SI-NEXT: s_mov_b32 s8, s6 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 -; SI-NEXT: s_movk_i32 s7, 0x11e ; SI-NEXT: s_mov_b32 s6, 0xab19b207 +; SI-NEXT: s_movk_i32 s7, 0x11e ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -1266,8 +1266,8 @@ define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrsp ; VI-NEXT: s_load_dword s2, s[2:3], 0x0 ; VI-NEXT: s_mov_b32 s4, s0 ; VI-NEXT: s_mov_b32 s5, s1 -; VI-NEXT: s_movk_i32 s1, 0x11e ; VI-NEXT: s_mov_b32 s0, 0xab19b207 +; VI-NEXT: s_movk_i32 s1, 0x11e ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -1319,8 +1319,7 @@ define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 ; SI-NEXT: s_mov_b32 s8, s6 ; SI-NEXT: s_mov_b32 s9, s7 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 -; SI-NEXT: s_mov_b32 s7, 0 -; SI-NEXT: s_mov_b32 s6, 0x12d687 +; SI-NEXT: s_mov_b64 s[6:7], 0x12d687 ; SI-NEXT: s_mov_b32 s0, s4 ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -1337,8 +1336,7 @@ define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 ; VI-NEXT: s_load_dword s2, s[2:3], 0x0 ; VI-NEXT: s_mov_b32 s4, s0 ; VI-NEXT: s_mov_b32 s5, s1 -; VI-NEXT: s_mov_b32 s1, 0 -; VI-NEXT: s_mov_b32 s0, 0x12d687 +; VI-NEXT: s_mov_b64 s[0:1], 0x12d687 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -1927,8 +1925,7 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out, ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: s_load_dword s2, s[0:1], 0xd -; SI-NEXT: s_mov_b32 s1, 0 -; SI-NEXT: s_mov_b32 s0, 4.0 +; SI-NEXT: s_mov_b64 s[0:1], 0x40800000 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) @@ -1942,8 +1939,7 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out, ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; VI-NEXT: s_load_dword s2, s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s1, 0 -; VI-NEXT: s_mov_b32 s0, 4.0 +; VI-NEXT: s_mov_b64 s[0:1], 0x40800000 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) @@ -2039,8 +2035,8 @@ define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* % ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: s_load_dword s2, s[0:1], 0xd -; SI-NEXT: s_mov_b32 s1, 4.0 ; SI-NEXT: s_mov_b32 s0, 0 +; SI-NEXT: s_mov_b32 s1, 4.0 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) @@ -2054,8 +2050,8 @@ define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* % ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; VI-NEXT: s_load_dword s2, s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s1, 4.0 ; VI-NEXT: s_mov_b32 s0, 0 +; VI-NEXT: s_mov_b32 s1, 4.0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) @@ -2090,8 +2086,8 @@ define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1 ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: s_load_dword s2, s[0:1], 0xd -; SI-NEXT: s_mov_b32 s1, -4.0 ; SI-NEXT: s_mov_b32 s0, 0 +; SI-NEXT: s_mov_b32 s1, -4.0 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) @@ -2105,8 +2101,8 @@ define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1 ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; VI-NEXT: s_load_dword s2, s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s1, -4.0 ; VI-NEXT: s_mov_b32 s0, 0 +; VI-NEXT: s_mov_b32 s1, -4.0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll index 61993fd9ff26e..39d9ac33278e2 100644 --- a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll +++ b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll @@ -589,13 +589,13 @@ endif: ; GCN-LABEL: {{^}}br_scc_eq_i64_simm16: ; VI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x4d2 -; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0 +; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 1 ; VI: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} ; SI: v_cmp_eq_u64_e32 define amdgpu_kernel void @br_scc_eq_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 { entry: - %cmp0 = icmp eq i64 %cond, 1234 + %cmp0 = icmp eq i64 %cond, 4294968530 br i1 %cmp0, label %endif, label %if if: @@ -627,13 +627,13 @@ endif: ; GCN-LABEL: {{^}}br_scc_ne_i64_simm16: ; VI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x4d2 -; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0 +; VI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 1 ; VI: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} ; SI: v_cmp_ne_u64_e32 define amdgpu_kernel void @br_scc_ne_i64_simm16(i64 %cond, i32 addrspace(1)* %out) #0 { entry: - %cmp0 = icmp ne i64 %cond, 1234 + %cmp0 = icmp ne i64 %cond, 4294968530 br i1 %cmp0, label %endif, label %if if: diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 74177c4394317..9b0c2abaedf09 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -1876,14 +1876,13 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: s_mov_b32 s8, 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_mov_b32_e32 v4, s8 ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, v9 ; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] @@ -1901,8 +1900,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_mov_b32 s5, 0 -; GCN-IR-NEXT: s_mov_b32 s4, 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v6 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 ; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index bc38031288e7e..f4d7bdf601024 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -1249,14 +1249,13 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v5, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[4:5] -; GCN-IR-NEXT: s_mov_b32 s8, 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_mov_b32_e32 v2, s8 ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] ; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v3, v9 ; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] @@ -1274,8 +1273,7 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB9_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_mov_b32 s5, 0 -; GCN-IR-NEXT: s_mov_b32 s4, 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v6 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 ; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index d9c95cdc63e6f..a5b2cc0557680 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -1269,14 +1269,13 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: s_mov_b32 s8, 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_mov_b32_e32 v4, s8 ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, v9 ; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] @@ -1294,8 +1293,7 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GCN-IR-NEXT: s_cbranch_execz BB8_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_mov_b32 s5, 0 -; GCN-IR-NEXT: s_mov_b32 s4, 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 ; GCN-IR-NEXT: v_lshr_b64 v[10:11], s[4:5], v6 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, -1, v0 ; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, -1, v1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index ebc041501be0d..9742041a9fc8b 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -807,8 +807,8 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX9-O3-NEXT: buffer_load_dwordx4 v[1:4], v0, s[4:7], 0 offen ; GFX9-O3-NEXT: buffer_load_dwordx2 v[5:6], v0, s[4:7], 0 offen offset:16 -; GFX9-O3-NEXT: s_brev_b32 s9, -2 ; GFX9-O3-NEXT: s_mov_b32 s8, -1 +; GFX9-O3-NEXT: s_brev_b32 s9, -2 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_waitcnt vmcnt(1) ; GFX9-O3-NEXT: v_mov_b32_e32 v1, s8 From f0693bc0ae47e4f7237e4e8d17ee96481c370e0a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 30 Jun 2021 11:47:30 -0700 Subject: [PATCH 329/619] autogen two tests for ease of update --- .../test/Transforms/LoopReroll/nonconst_lb.ll | 100 +-- .../runtime-loop-multiexit-dom-verify.ll | 573 +++++++++++++++--- 2 files changed, 556 insertions(+), 117 deletions(-) diff --git a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll index aa5c456362676..200a37b8eed52 100644 --- a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll +++ b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-reroll -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-none-linux" @@ -11,6 +12,33 @@ target triple = "thumbv7-none-linux" ; } ;} define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP34:%.*]] = icmp slt i32 [[M:%.*]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[M]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 3 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[M]], [[INDVAR]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP6]], 2 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP5]] +; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP4]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: %cmp34 = icmp slt i32 %m, %n br i1 %cmp34, label %for.body, label %for.end @@ -47,26 +75,6 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } -; CHECK-LABEL: @foo -; CHECK: for.body.preheader: -; CHECK: %0 = add i32 %n, -1 -; CHECK: %1 = sub i32 %0, %m -; CHECK: %2 = lshr i32 %1, 2 -; CHECK: %3 = shl nuw i32 %2, 2 -; CHECK: %4 = add nuw nsw i32 %3, 3 -; CHECK: br label %for.body - -; CHECK: for.body: -; CHECK: %indvar = phi i32 [ 0, %for.body.preheader ], [ %indvar.next, %for.body ] -; CHECK: %5 = add i32 %m, %indvar -; CHECK: %arrayidx = getelementptr inbounds i32, i32* %B, i32 %5 -; CHECK: %6 = load i32, i32* %arrayidx, align 4 -; CHECK: %mul = shl nsw i32 %6, 2 -; CHECK: %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %5 -; CHECK: store i32 %mul, i32* %arrayidx2, align 4 -; CHECK: %indvar.next = add i32 %indvar, 1 -; CHECK: %exitcond = icmp eq i32 %indvar, %4 -; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body ;void daxpy_ur(int n,float da,float *dx,float *dy) ; { @@ -80,6 +88,36 @@ for.end: ; preds = %for.body, %entry ; } ; } define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) { +; CHECK-LABEL: @daxpy_ur( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[N:%.*]], 4 +; CHECK-NEXT: [[CMP55:%.*]] = icmp slt i32 [[REM]], [[N]] +; CHECK-NEXT: br i1 [[CMP55]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[REM]] +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 3 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[REM]], [[INDVAR]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[DY:%.*]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[DX:%.*]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP7]], [[DA:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[MUL]] +; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP4]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: %rem = srem i32 %n, 4 %cmp55 = icmp slt i32 %rem, %n @@ -126,25 +164,3 @@ for.end: ; preds = %for.body, %entry ret void } -; CHECK-LABEL: @daxpy_ur -; CHECK: for.body.preheader: -; CHECK: %0 = add i32 %n, -1 -; CHECK: %1 = sub i32 %0, %rem -; CHECK: %2 = lshr i32 %1, 2 -; CHECK: %3 = shl nuw i32 %2, 2 -; CHECK: %4 = add nuw nsw i32 %3, 3 -; CHECK: br label %for.body - -; CHECK: for.body: -; CHECK: %indvar = phi i32 [ 0, %for.body.preheader ], [ %indvar.next, %for.body ] -; CHECK: %5 = add i32 %rem, %indvar -; CHECK: %arrayidx = getelementptr inbounds float, float* %dy, i32 %5 -; CHECK: %6 = load float, float* %arrayidx, align 4 -; CHECK: %arrayidx1 = getelementptr inbounds float, float* %dx, i32 %5 -; CHECK: %7 = load float, float* %arrayidx1, align 4 -; CHECK: %mul = fmul float %7, %da -; CHECK: %add = fadd float %6, %mul -; CHECK: store float %add, float* %arrayidx, align 4 -; CHECK: %indvar.next = add i32 %indvar, 1 -; CHECK: %exitcond = icmp eq i32 %indvar, %4 -; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll index 5b8e5ef7fd1f4..9110650a93342 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -unroll-runtime-multi-exit=true -unroll-count=4 -verify-dom-info -S | FileCheck %s ; REQUIRES: asserts @@ -8,16 +9,55 @@ ; mergedexit block has edges from loop exit blocks. define i64 @test1() { -; CHECK-LABEL: test1( -; CHECK-LABEL: headerexit: -; CHECK-NEXT: %addphi = phi i64 [ %add.iv, %header ], [ %add.iv.1, %header.1 ], [ %add.iv.2, %header.2 ], [ %add.iv.3, %header.3 ] -; CHECK-NEXT: br label %mergedexit -; CHECK-LABEL: latchexit: -; CHECK-NEXT: %shftphi = phi i64 [ %shft, %latch ], [ %shft.1, %latch.1 ], [ %shft.2, %latch.2 ], [ %shft.3, %latch.3 ] -; CHECK-NEXT: br label %mergedexit -; CHECK-LABEL: mergedexit: -; CHECK-NEXT: %retval = phi i64 [ %addphi, %headerexit ], [ %shftphi, %latchexit ] -; CHECK-NEXT: ret i64 %retval +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[TRIP:%.*]] = zext i32 undef to i64 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ] +; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]] +; CHECK: headerexit: +; CHECK-NEXT: [[ADDPHI:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER]] ], [ [[ADD_IV_1:%.*]], [[HEADER_1]] ], [ [[ADD_IV_2:%.*]], [[HEADER_2:%.*]] ], [ [[ADD_IV_3]], [[HEADER_3:%.*]] ] +; CHECK-NEXT: br label [[MERGEDEXIT:%.*]] +; CHECK: latchexit: +; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1:%.*]], [[LATCH_1:%.*]] ], [ [[SHFT_2:%.*]], [[LATCH_2:%.*]] ], [ [[SHFT_3:%.*]], [[LATCH_3]] ] +; CHECK-NEXT: br label [[MERGEDEXIT]] +; CHECK: mergedexit: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[LATCHEXIT]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; CHECK: header.1: +; CHECK-NEXT: [[ADD_IV_1]] = add nuw nsw i64 [[ADD_IV]], 2 +; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1]], label [[HEADEREXIT]] +; CHECK: latch.1: +; CHECK-NEXT: [[SHFT_1]] = ashr i64 [[ADD_IV_1]], 1 +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2]], label [[LATCHEXIT]] +; CHECK: header.2: +; CHECK-NEXT: [[ADD_IV_2]] = add nuw nsw i64 [[ADD_IV_1]], 2 +; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2]], label [[HEADEREXIT]] +; CHECK: latch.2: +; CHECK-NEXT: [[SHFT_2]] = ashr i64 [[ADD_IV_2]], 1 +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3]], label [[LATCHEXIT]] +; CHECK: header.3: +; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[ADD_IV_2]], 2 +; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]] +; CHECK: latch.3: +; CHECK-NEXT: [[SHFT_3]] = ashr i64 [[ADD_IV_3]], 1 +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP0:![0-9]+]] +; entry: br label %preheader @@ -41,7 +81,7 @@ headerexit: ; preds = %header br label %mergedexit latchexit: ; preds = %latch - %shftphi = phi i64 [ %shft, %latch ] + %shftphi = phi i64 [ %shft, %latch ] br label %mergedexit mergedexit: ; preds = %latchexit, %headerexit @@ -51,12 +91,52 @@ mergedexit: ; preds = %latchexit, % ; mergedexit has edges from loop exit blocks and a block outside the loop. define void @test2(i1 %cond, i32 %n) { -; CHECK-LABEL: header.1: -; CHECK-NEXT: %add.iv.1 = add nuw nsw i64 %add.iv, 2 -; CHECK: br i1 %cmp1.1, label %latch.1, label %headerexit -; CHECK-LABEL: latch.3: -; CHECK: %cmp2.3 = icmp ult i64 %shft.3, %trip -; CHECK-NEXT: br i1 %cmp2.3, label %header, label %latchexit, !llvm.loop +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[PREHEADER:%.*]], label [[MERGEDEXIT:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ] +; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]] +; CHECK: headerexit: +; CHECK-NEXT: br label [[MERGEDEXIT]] +; CHECK: latchexit: +; CHECK-NEXT: br label [[MERGEDEXIT]] +; CHECK: mergedexit: +; CHECK-NEXT: ret void +; CHECK: header.1: +; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV]], 2 +; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]] +; CHECK: latch.1: +; CHECK-NEXT: [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1 +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]] +; CHECK: header.2: +; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV_1]], 2 +; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]] +; CHECK: latch.2: +; CHECK-NEXT: [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1 +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]] +; CHECK: header.3: +; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[ADD_IV_2]], 2 +; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]] +; CHECK: latch.3: +; CHECK-NEXT: [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1 +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; entry: br i1 %cond, label %preheader, label %mergedexit @@ -88,14 +168,53 @@ mergedexit: ; preds = %latchexit, % ; exitsucc is from loop exit block only. define i64 @test3(i32 %n) { -; CHECK-LABEL: test3( -; CHECK-LABEL: headerexit: -; CHECK-NEXT: br label %exitsucc -; CHECK-LABEL: latchexit: -; CHECK-NEXT: %shftphi = phi i64 [ %shft, %latch ], [ %shft.1, %latch.1 ], [ %shft.2, %latch.2 ], [ %shft.3, %latch.3 ] -; CHECK-NEXT: ret i64 %shftphi -; CHECK-LABEL: exitsucc: -; CHECK-NEXT: ret i64 96 +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[PREHEADER:%.*]] +; CHECK: preheader: +; CHECK-NEXT: [[TRIP:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, [[PREHEADER]] ], [ [[ADD_IV_3:%.*]], [[LATCH_3:%.*]] ] +; CHECK-NEXT: [[ADD_IV:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[ADD_IV]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH:%.*]], label [[HEADEREXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]] +; CHECK: headerexit: +; CHECK-NEXT: br label [[EXITSUCC:%.*]] +; CHECK: latchexit: +; CHECK-NEXT: [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1:%.*]], [[LATCH_1:%.*]] ], [ [[SHFT_2:%.*]], [[LATCH_2:%.*]] ], [ [[SHFT_3:%.*]], [[LATCH_3]] ] +; CHECK-NEXT: ret i64 [[SHFTPHI]] +; CHECK: exitsucc: +; CHECK-NEXT: ret i64 96 +; CHECK: header.1: +; CHECK-NEXT: [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV]], 2 +; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_1]], label [[LATCH_1]], label [[HEADEREXIT]] +; CHECK: latch.1: +; CHECK-NEXT: [[SHFT_1]] = ashr i64 [[ADD_IV_1]], 1 +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]] +; CHECK: header.2: +; CHECK-NEXT: [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV_1]], 2 +; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_2]], label [[LATCH_2]], label [[HEADEREXIT]] +; CHECK: latch.2: +; CHECK-NEXT: [[SHFT_2]] = ashr i64 [[ADD_IV_2]], 1 +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]] +; CHECK: header.3: +; CHECK-NEXT: [[ADD_IV_3]] = add nuw nsw i64 [[ADD_IV_2]], 2 +; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]] +; CHECK: latch.3: +; CHECK-NEXT: [[SHFT_3]] = ashr i64 [[ADD_IV_3]], 1 +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]] +; CHECK-NEXT: br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; entry: br label %preheader @@ -127,22 +246,95 @@ exitsucc: ; preds = %headerexit ; exit block (%default) has an exiting block and another exit block as predecessors. define void @test4(i16 %c3) { -; CHECK-LABEL: test4 - -; CHECK-LABEL: exiting.prol: -; CHECK-NEXT: switch i16 %c3, label %default.loopexit.loopexit1 [ - -; CHECK-LABEL: exiting: -; CHECK-NEXT: switch i16 %c3, label %default.loopexit.loopexit [ - -; CHECK-LABEL: default.loopexit.loopexit: -; CHECK-NEXT: br label %default.loopexit - -; CHECK-LABEL: default.loopexit.loopexit1: -; CHECK-NEXT: br label %default.loopexit - -; CHECK-LABEL: default.loopexit: -; CHECK-NEXT: br label %default +; CHECK-LABEL: @test4( +; CHECK-NEXT: preheader: +; CHECK-NEXT: [[C1:%.*]] = zext i32 undef to i64 +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[C1]], i64 1) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[UMAX]], 3 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_PREHEADER:%.*]], label [[HEADER_PROL_LOOPEXIT:%.*]] +; CHECK: header.prol.preheader: +; CHECK-NEXT: br label [[HEADER_PROL:%.*]] +; CHECK: header.prol: +; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ 0, [[HEADER_PROL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ] +; CHECK-NEXT: br label [[EXITING_PROL:%.*]] +; CHECK: exiting.prol: +; CHECK-NEXT: switch i16 [[C3:%.*]], label [[DEFAULT_LOOPEXIT_LOOPEXIT1:%.*]] [ +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT2:%.*]] +; CHECK-NEXT: i16 95, label [[LATCH_PROL]] +; CHECK-NEXT: ] +; CHECK: latch.prol: +; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 +; CHECK-NEXT: [[C2_PROL:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_PROL]], [[C1]] +; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: header.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ] +; CHECK-NEXT: br label [[HEADER_PROL_LOOPEXIT]] +; CHECK: header.prol.loopexit: +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[PREHEADER:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[TMP1]], label [[LATCHEXIT:%.*]], label [[PREHEADER_NEW:%.*]] +; CHECK: preheader.new: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[LATCH_3:%.*]] ] +; CHECK-NEXT: br label [[EXITING:%.*]] +; CHECK: exiting: +; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT:%.*]] [ +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: i16 95, label [[LATCH:%.*]] +; CHECK-NEXT: ] +; CHECK: latch: +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br label [[EXITING_1:%.*]] +; CHECK: latchexit.unr-lcssa: +; CHECK-NEXT: br label [[LATCHEXIT]] +; CHECK: latchexit: +; CHECK-NEXT: ret void +; CHECK: default.loopexit.loopexit: +; CHECK-NEXT: br label [[DEFAULT_LOOPEXIT:%.*]] +; CHECK: default.loopexit.loopexit1: +; CHECK-NEXT: br label [[DEFAULT_LOOPEXIT]] +; CHECK: default.loopexit: +; CHECK-NEXT: br label [[DEFAULT:%.*]] +; CHECK: default: +; CHECK-NEXT: ret void +; CHECK: otherexit.loopexit: +; CHECK-NEXT: br label [[OTHEREXIT:%.*]] +; CHECK: otherexit.loopexit2: +; CHECK-NEXT: br label [[OTHEREXIT]] +; CHECK: otherexit: +; CHECK-NEXT: br label [[DEFAULT]] +; CHECK: exiting.1: +; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_1:%.*]] +; CHECK-NEXT: ] +; CHECK: latch.1: +; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 +; CHECK-NEXT: br label [[EXITING_2:%.*]] +; CHECK: exiting.2: +; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_2:%.*]] +; CHECK-NEXT: ] +; CHECK: latch.2: +; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 +; CHECK-NEXT: br label [[EXITING_3:%.*]] +; CHECK: exiting.3: +; CHECK-NEXT: switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [ +; CHECK-NEXT: i16 45, label [[OTHEREXIT_LOOPEXIT]] +; CHECK-NEXT: i16 95, label [[LATCH_3]] +; CHECK-NEXT: ] +; CHECK: latch.3: +; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 +; CHECK-NEXT: [[C2_3:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_3]], [[C1]] +; CHECK-NEXT: br i1 [[C2_3]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; preheader: %c1 = zext i32 undef to i64 br label %header @@ -153,8 +345,8 @@ header: ; preds = %latch, %preheader exiting: ; preds = %header switch i16 %c3, label %default [ - i16 45, label %otherexit - i16 95, label %latch + i16 45, label %otherexit + i16 95, label %latch ] latch: ; preds = %exiting @@ -175,17 +367,205 @@ otherexit: ; preds = %exiting ; exit block (%exitB) has an exiting block and another exit block as predecessors. ; exiting block comes from inner loop. define void @test5(i1 %c) { -; CHECK-LABEL: test5 -; CHECK-LABEL: bb1: -; CHECK-NEXT: br i1 false, label %outerH.prol.preheader, label %outerH.prol.loopexit - -; CHECK-LABEL: outerH.prol.preheader: -; CHECK-NEXT: br label %outerH.prol - -; CHECK-LABEL: outerH.prol: -; CHECK-NEXT: %tmp4.prol = phi i32 [ %tmp6.prol, %outerLatch.prol ], [ undef, %outerH.prol.preheader ] -; CHECK-NEXT: %prol.iter = phi i32 [ 0, %outerH.prol.preheader ], [ %prol.iter.sub, %outerLatch.prol ] -; CHECK-NEXT: br label %innerH.prol +; CHECK-LABEL: @test5( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = icmp sgt i32 undef, 79 +; CHECK-NEXT: br i1 [[TMP]], label [[OUTERLATCHEXIT:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 false, label [[OUTERH_PROL_PREHEADER:%.*]], label [[OUTERH_PROL_LOOPEXIT:%.*]] +; CHECK: outerH.prol.preheader: +; CHECK-NEXT: br label [[OUTERH_PROL:%.*]] +; CHECK: outerH.prol: +; CHECK-NEXT: [[TMP4_PROL:%.*]] = phi i32 [ [[TMP6_PROL:%.*]], [[OUTERLATCH_PROL:%.*]] ], [ undef, [[OUTERH_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i32 [ 0, [[OUTERH_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[OUTERLATCH_PROL]] ] +; CHECK-NEXT: br label [[INNERH_PROL:%.*]] +; CHECK: innerH.prol: +; CHECK-NEXT: br i1 [[C:%.*]], label [[INNEREXITING_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1:%.*]] +; CHECK: innerexiting.prol: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2:%.*]] +; CHECK: innerLatch.prol: +; CHECK-NEXT: br i1 false, label [[INNERH_1_PROL:%.*]], label [[OUTERLATCH_PROL]] +; CHECK: innerH.1.prol: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] +; CHECK: innerexiting.1.prol: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] +; CHECK: innerLatch.1.prol: +; CHECK-NEXT: br i1 false, label [[INNERH_2_PROL:%.*]], label [[OUTERLATCH_PROL]] +; CHECK: innerH.2.prol: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] +; CHECK: innerexiting.2.prol: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] +; CHECK: innerLatch.2.prol: +; CHECK-NEXT: br i1 false, label [[INNERH_3_PROL:%.*]], label [[OUTERLATCH_PROL]] +; CHECK: innerH.3.prol: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1]] +; CHECK: innerexiting.3.prol: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3_PROL:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT2]] +; CHECK: innerLatch.3.prol: +; CHECK-NEXT: br i1 false, label [[INNERH_PROL]], label [[OUTERLATCH_PROL]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: outerLatch.prol: +; CHECK-NEXT: [[TMP6_PROL]] = add i32 [[TMP4_PROL]], 1 +; CHECK-NEXT: [[TMP7_PROL:%.*]] = icmp sgt i32 [[TMP6_PROL]], 79 +; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: outerH.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[TMP4_UNR_PH:%.*]] = phi i32 [ [[TMP6_PROL]], [[OUTERLATCH_PROL]] ] +; CHECK-NEXT: br label [[OUTERH_PROL_LOOPEXIT]] +; CHECK: outerH.prol.loopexit: +; CHECK-NEXT: [[TMP4_UNR:%.*]] = phi i32 [ undef, [[BB1]] ], [ [[TMP4_UNR_PH]], [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: br i1 false, label [[OUTERLATCHEXIT_LOOPEXIT:%.*]], label [[BB1_NEW:%.*]] +; CHECK: bb1.new: +; CHECK-NEXT: br label [[OUTERH:%.*]] +; CHECK: outerH: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP4_UNR]], [[BB1_NEW]] ], [ [[TMP6_3:%.*]], [[OUTERLATCH_3:%.*]] ] +; CHECK-NEXT: br label [[INNERH:%.*]] +; CHECK: innerH: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT:%.*]] +; CHECK: innerexiting: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT:%.*]] +; CHECK: innerLatch: +; CHECK-NEXT: br i1 false, label [[INNERH_1:%.*]], label [[OUTERLATCH:%.*]] +; CHECK: outerLatch: +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], 1 +; CHECK-NEXT: br label [[INNERH_13:%.*]] +; CHECK: outerLatchExit.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[OUTERLATCHEXIT_LOOPEXIT]] +; CHECK: outerLatchExit.loopexit: +; CHECK-NEXT: br label [[OUTERLATCHEXIT]] +; CHECK: outerLatchExit: +; CHECK-NEXT: ret void +; CHECK: exitB.loopexit.loopexit.loopexit: +; CHECK-NEXT: br label [[EXITB_LOOPEXIT_LOOPEXIT:%.*]] +; CHECK: exitB.loopexit.loopexit.loopexit13: +; CHECK-NEXT: br label [[EXITB_LOOPEXIT_LOOPEXIT]] +; CHECK: exitB.loopexit.loopexit.loopexit15: +; CHECK-NEXT: br label [[EXITB_LOOPEXIT_LOOPEXIT]] +; CHECK: exitB.loopexit.loopexit.loopexit17: +; CHECK-NEXT: br label [[EXITB_LOOPEXIT_LOOPEXIT]] +; CHECK: exitB.loopexit.loopexit: +; CHECK-NEXT: br label [[EXITB_LOOPEXIT:%.*]] +; CHECK: exitB.loopexit.loopexit2: +; CHECK-NEXT: br label [[EXITB_LOOPEXIT]] +; CHECK: exitB.loopexit: +; CHECK-NEXT: br label [[EXITB:%.*]] +; CHECK: exitB: +; CHECK-NEXT: ret void +; CHECK: otherexitB.loopexit.loopexit: +; CHECK-NEXT: br label [[OTHEREXITB_LOOPEXIT:%.*]] +; CHECK: otherexitB.loopexit.loopexit12: +; CHECK-NEXT: br label [[OTHEREXITB_LOOPEXIT]] +; CHECK: otherexitB.loopexit.loopexit14: +; CHECK-NEXT: br label [[OTHEREXITB_LOOPEXIT]] +; CHECK: otherexitB.loopexit.loopexit16: +; CHECK-NEXT: br label [[OTHEREXITB_LOOPEXIT]] +; CHECK: otherexitB.loopexit: +; CHECK-NEXT: br label [[OTHEREXITB:%.*]] +; CHECK: otherexitB.loopexit1: +; CHECK-NEXT: br label [[OTHEREXITB]] +; CHECK: otherexitB: +; CHECK-NEXT: br label [[EXITB]] +; CHECK: innerH.1: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] +; CHECK: innerexiting.1: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] +; CHECK: innerLatch.1: +; CHECK-NEXT: br i1 false, label [[INNERH_2:%.*]], label [[OUTERLATCH]] +; CHECK: innerH.2: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] +; CHECK: innerexiting.2: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] +; CHECK: innerLatch.2: +; CHECK-NEXT: br i1 false, label [[INNERH_3:%.*]], label [[OUTERLATCH]] +; CHECK: innerH.3: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]] +; CHECK: innerexiting.3: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]] +; CHECK: innerLatch.3: +; CHECK-NEXT: br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP6]] +; CHECK: innerH.13: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_14:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12:%.*]] +; CHECK: innerexiting.14: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_15:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT13:%.*]] +; CHECK: innerLatch.15: +; CHECK-NEXT: br i1 false, label [[INNERH_1_1:%.*]], label [[OUTERLATCH_1:%.*]] +; CHECK: innerH.1.1: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12]] +; CHECK: innerexiting.1.1: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT13]] +; CHECK: innerLatch.1.1: +; CHECK-NEXT: br i1 false, label [[INNERH_2_1:%.*]], label [[OUTERLATCH_1]] +; CHECK: innerH.2.1: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12]] +; CHECK: innerexiting.2.1: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT13]] +; CHECK: innerLatch.2.1: +; CHECK-NEXT: br i1 false, label [[INNERH_3_1:%.*]], label [[OUTERLATCH_1]] +; CHECK: innerH.3.1: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12]] +; CHECK: innerexiting.3.1: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3_1:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT13]] +; CHECK: innerLatch.3.1: +; CHECK-NEXT: br i1 false, label [[INNERH_13]], label [[OUTERLATCH_1]], !llvm.loop [[LOOP6]] +; CHECK: outerLatch.1: +; CHECK-NEXT: [[TMP6_1:%.*]] = add i32 [[TMP6]], 1 +; CHECK-NEXT: br label [[INNERH_26:%.*]] +; CHECK: innerH.26: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_27:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT14:%.*]] +; CHECK: innerexiting.27: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_28:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT15:%.*]] +; CHECK: innerLatch.28: +; CHECK-NEXT: br i1 false, label [[INNERH_1_2:%.*]], label [[OUTERLATCH_2:%.*]] +; CHECK: innerH.1.2: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT14]] +; CHECK: innerexiting.1.2: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT15]] +; CHECK: innerLatch.1.2: +; CHECK-NEXT: br i1 false, label [[INNERH_2_2:%.*]], label [[OUTERLATCH_2]] +; CHECK: innerH.2.2: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT14]] +; CHECK: innerexiting.2.2: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT15]] +; CHECK: innerLatch.2.2: +; CHECK-NEXT: br i1 false, label [[INNERH_3_2:%.*]], label [[OUTERLATCH_2]] +; CHECK: innerH.3.2: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3_2:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT14]] +; CHECK: innerexiting.3.2: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3_2:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT15]] +; CHECK: innerLatch.3.2: +; CHECK-NEXT: br i1 false, label [[INNERH_26]], label [[OUTERLATCH_2]], !llvm.loop [[LOOP6]] +; CHECK: outerLatch.2: +; CHECK-NEXT: [[TMP6_2:%.*]] = add i32 [[TMP6_1]], 1 +; CHECK-NEXT: br label [[INNERH_39:%.*]] +; CHECK: innerH.39: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_310:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT16:%.*]] +; CHECK: innerexiting.310: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_311:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT17:%.*]] +; CHECK: innerLatch.311: +; CHECK-NEXT: br i1 false, label [[INNERH_1_3:%.*]], label [[OUTERLATCH_3]] +; CHECK: innerH.1.3: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_1_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT16]] +; CHECK: innerexiting.1.3: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_1_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT17]] +; CHECK: innerLatch.1.3: +; CHECK-NEXT: br i1 false, label [[INNERH_2_3:%.*]], label [[OUTERLATCH_3]] +; CHECK: innerH.2.3: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_2_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT16]] +; CHECK: innerexiting.2.3: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_2_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT17]] +; CHECK: innerLatch.2.3: +; CHECK-NEXT: br i1 false, label [[INNERH_3_3:%.*]], label [[OUTERLATCH_3]] +; CHECK: innerH.3.3: +; CHECK-NEXT: br i1 [[C]], label [[INNEREXITING_3_3:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT16]] +; CHECK: innerexiting.3.3: +; CHECK-NEXT: br i1 [[C]], label [[INNERLATCH_3_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT17]] +; CHECK: innerLatch.3.3: +; CHECK-NEXT: br i1 false, label [[INNERH_39]], label [[OUTERLATCH_3]], !llvm.loop [[LOOP6]] +; CHECK: outerLatch.3: +; CHECK-NEXT: [[TMP6_3]] = add i32 [[TMP6_2]], 1 +; CHECK-NEXT: [[TMP7_3:%.*]] = icmp sgt i32 [[TMP6_3]], 79 +; CHECK-NEXT: br i1 [[TMP7_3]], label [[OUTERLATCHEXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[OUTERH]], !llvm.loop [[LOOP8:![0-9]+]] +; bb: %tmp = icmp sgt i32 undef, 79 br i1 %tmp, label %outerLatchExit, label %bb1 @@ -226,27 +606,70 @@ otherexitB: ; preds = %innerH ; Blocks reachable from exits (not_zero44) have the IDom as the block within the loop (Header). ; Update the IDom to the preheader. define void @test6(i1 %c) { -; CHECK-LABEL: test6 -; CHECK-LABEL: header.prol.preheader: -; CHECK-NEXT: br label %header.prol - -; CHECK-LABEL: header.prol: -; CHECK-NEXT: %indvars.iv.prol = phi i64 [ undef, %header.prol.preheader ], [ %indvars.iv.next.prol, %latch.prol ] -; CHECK-NEXT: %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ] - -; CHECK-NEXT: br i1 %c, label %latch.prol, label %otherexit.loopexit1 - -; CHECK-LABEL: header.prol.loopexit.unr-lcssa: -; CHECK-NEXT: %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.prol, %latch.prol ] -; CHECK-NEXT: br label %header.prol.loopexit - -; CHECK-LABEL: header.prol.loopexit: -; CHECK-NEXT: %indvars.iv.unr = phi i64 [ undef, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] -; CHECK-NEXT: %5 = icmp ult i64 %2, 3 -; CHECK-NEXT: br i1 %5, label %latchexit, label %entry.new - -; CHECK-LABEL: entry.new: -; CHECK-NEXT: br label %header +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 undef, i64 616) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], undef +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_PREHEADER:%.*]], label [[HEADER_PROL_LOOPEXIT:%.*]] +; CHECK: header.prol.preheader: +; CHECK-NEXT: br label [[HEADER_PROL:%.*]] +; CHECK: header.prol: +; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ undef, [[HEADER_PROL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ] +; CHECK-NEXT: br i1 [[C:%.*]], label [[LATCH_PROL]], label [[OTHEREXIT_LOOPEXIT1:%.*]] +; CHECK: latch.prol: +; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nsw i64 [[INDVARS_IV_PROL]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], 616 +; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: header.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ] +; CHECK-NEXT: br label [[HEADER_PROL_LOOPEXIT]] +; CHECK: header.prol.loopexit: +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP2]], 3 +; CHECK-NEXT: br i1 [[TMP5]], label [[LATCHEXIT:%.*]], label [[ENTRY_NEW:%.*]] +; CHECK: entry.new: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[LATCH_3:%.*]] ] +; CHECK-NEXT: br i1 [[C]], label [[LATCH:%.*]], label [[OTHEREXIT_LOOPEXIT:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: br i1 [[C]], label [[LATCH_1:%.*]], label [[OTHEREXIT_LOOPEXIT]] +; CHECK: latchexit.unr-lcssa: +; CHECK-NEXT: br label [[LATCHEXIT]] +; CHECK: latchexit: +; CHECK-NEXT: br label [[LATCHEXITSUCC:%.*]] +; CHECK: otherexit.loopexit: +; CHECK-NEXT: br label [[OTHEREXIT:%.*]] +; CHECK: otherexit.loopexit1: +; CHECK-NEXT: br label [[OTHEREXIT]] +; CHECK: otherexit: +; CHECK-NEXT: br label [[OTHEREXITSUCC:%.*]] +; CHECK: otherexitsucc: +; CHECK-NEXT: br label [[NOT_ZERO44:%.*]] +; CHECK: not_zero44: +; CHECK-NEXT: unreachable +; CHECK: latchexitsucc: +; CHECK-NEXT: br label [[NOT_ZERO44]] +; CHECK: latch.1: +; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], 2 +; CHECK-NEXT: br i1 [[C]], label [[LATCH_2:%.*]], label [[OTHEREXIT_LOOPEXIT]] +; CHECK: latch.2: +; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], 2 +; CHECK-NEXT: br i1 [[C]], label [[LATCH_3]], label [[OTHEREXIT_LOOPEXIT]] +; CHECK: latch.3: +; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nsw i64 [[INDVARS_IV_NEXT_2]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616 +; CHECK-NEXT: br i1 [[TMP6]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]] +; entry: br label %header From 51c3e3f80c7e6c1e8bf704e75bab999f2a8661e9 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 30 Jun 2021 14:49:00 -0400 Subject: [PATCH 330/619] [gn build] (manually) port f617ab104451 (DoublerPlugin) --- .../secondary/llvm/unittests/Passes/BUILD.gn | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/unittests/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Passes/BUILD.gn index fd0e19a50ab72..ce7acf1344c4c 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Passes/BUILD.gn @@ -3,23 +3,25 @@ import("//llvm/utils/unittest/unittest.gni") # Keyed off LLVM_ENABLE_PLUGINS in the CMake build, which is usually false # on Windows and true elsewhere. if (host_os != "win") { - loadable_module("TestPlugin") { - # Put plugin next to the unit test executable. - output_dir = target_out_dir + foreach(plugin, ["TestPlugin", "DoublerPlugin"]) { + loadable_module(plugin) { + # Put plugin next to the unit test executable. + output_dir = target_out_dir - sources = [ "TestPlugin.cpp" ] + sources = [ "$plugin.cpp" ] - deps = [ - # TestPlugin doesn't want to link in any LLVM code, it just needs its - # headers. - "//llvm/include/llvm/IR:public_tablegen", - ] + deps = [ + # TestPlugin doesn't want to link in any LLVM code, it just needs its + # headers. + "//llvm/include/llvm/IR:public_tablegen", + ] - if (host_os != "mac" && host_os != "win") { - # The GN build currently doesn't globally pass -fPIC, but that's - # needed for building .so files on ELF. Just pass it manually - # for loadable_modules for now. - cflags = [ "-fPIC" ] + if (host_os != "mac" && host_os != "win") { + # The GN build currently doesn't globally pass -fPIC, but that's + # needed for building .so files on ELF. Just pass it manually + # for loadable_modules for now. + cflags = [ "-fPIC" ] + } } } } @@ -38,7 +40,10 @@ unittest("PluginsTests") { # Otherwise, reconfiguring with plugins disabled will leave behind a stale # executable. if (host_os != "win") { - deps += [ ":TestPlugin" ] + deps += [ + ":DoublerPlugin", + ":TestPlugin", + ] defines = [ "LLVM_ENABLE_PLUGINS" ] } From ec74192f52472c1ce928f8ca3cfeed8038a68066 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 30 Jun 2021 18:49:16 +0000 Subject: [PATCH 331/619] [gn build] Port 381ded345bdd --- llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn index f82497e60e832..e860c9465c66d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn @@ -178,6 +178,7 @@ static_library("LLVMAMDGPUCodeGen") { "GCNIterativeScheduler.cpp", "GCNMinRegStrategy.cpp", "GCNNSAReassign.cpp", + "GCNPreRAOptimizations.cpp", "GCNRegPressure.cpp", "GCNSchedStrategy.cpp", "R600AsmPrinter.cpp", From f4ea6531e677b1a3c107d7009a7e2f195c8fa915 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 30 Jun 2021 18:00:36 +0100 Subject: [PATCH 332/619] [BasicAA] Add test for incorrectly inferring noalias due to scale sign. This patch adds a test where we currently incorrectly determine noalias, because the sign of Scale is adjusted after 91fa3565da16. --- llvm/test/Analysis/BasicAA/gep-modulo.ll | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/llvm/test/Analysis/BasicAA/gep-modulo.ll b/llvm/test/Analysis/BasicAA/gep-modulo.ll index e009ce498b06b..cf75ba5101b1f 100644 --- a/llvm/test/Analysis/BasicAA/gep-modulo.ll +++ b/llvm/test/Analysis/BasicAA/gep-modulo.ll @@ -273,3 +273,32 @@ define void @may_overflow_pointer_diff([16 x i8]* %ptr, i64 %idx) { ret void } + +; %gep.1 and %gep.idx may alias, e.g. if %idx.1 = 8 and %idx.2 == 2. %gep.idx is then +; (((18446744073709551614 * 8) % 2^64 + 6 * 2) % 2^64 + 10) % 2^64 == 6. +define void @may_overflow_mul_scale_neg([200 x [ 6 x i8]]* %ptr, i64 %idx.1,i64 %idx.2) { +; CHECK-LABEL: Function: may_overflow_mul_scale_neg: 4 pointers, 2 call sites +; CHECK-NEXT: MustAlias: [200 x [6 x i8]]* %ptr, i8* %bc +; CHECK-NEXT: PartialAlias (off 6): [200 x [6 x i8]]* %ptr, i8* %gep.1 +; CHECK-NEXT: NoAlias: i8* %bc, i8* %gep.1 +; CHECK-NEXT: MayAlias: [200 x [6 x i8]]* %ptr, i8* %gep.idx +; CHECK-NEXT: NoAlias: i8* %bc, i8* %gep.idx +; CHECK-NEXT: NoAlias: i8* %gep.1, i8* %gep.idx +; + %idx.1.pos = icmp sge i64 %idx.1, 0 + call void @llvm.assume(i1 %idx.1.pos) + %idx.2.pos = icmp sge i64 %idx.2, 0 + call void @llvm.assume(i1 %idx.2.pos) + + %bc = bitcast [ 200 x [ 6 x i8 ] ]* %ptr to i8* + %gep.1 = getelementptr i8, i8* %bc, i64 6 + store i8 1, i8* %gep.1, align 1 + + %mul.0 = mul i64 %idx.1, -2 + %add = add i64 %mul.0, 10 + %gep.idx = getelementptr [ 200 x [ 6 x i8 ] ], [ 200 x [ 6 x i8 ] ]* %ptr, i64 0, i64 %idx.2, i64 %add + store i8 0, i8* %gep.idx, align 1 + ret void +} + +declare void @llvm.assume(i1) From e6d22d0174e09fa01342d9ed1dca47bc1eb58303 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 30 Jun 2021 19:41:57 +0100 Subject: [PATCH 333/619] [BasicAA] Use separate scale variable for GCD. Use separate variable for adjusted scale used for GCD computations. This fixes an issue where we incorrectly determined that all indices are non-negative and returned noalias because of that. Follow up to 91fa3565da16. --- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 9 +++++---- llvm/test/Analysis/BasicAA/gep-modulo.ll | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index da489b8d457fb..d79df622ee501 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1148,14 +1148,15 @@ AliasResult BasicAAResult::aliasGEP( bool AllNonPositive = DecompGEP1.Offset.isNonPositive(); for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { APInt Scale = DecompGEP1.VarIndices[i].Scale; + APInt ScaleForGDC = DecompGEP1.VarIndices[i].Scale; if (!DecompGEP1.VarIndices[i].IsNSW) - Scale = APInt::getOneBitSet(Scale.getBitWidth(), - Scale.countTrailingZeros()); + ScaleForGDC = APInt::getOneBitSet(Scale.getBitWidth(), + Scale.countTrailingZeros()); if (i == 0) - GCD = Scale.abs(); + GCD = ScaleForGDC.abs(); else - GCD = APIntOps::GreatestCommonDivisor(GCD, Scale.abs()); + GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGDC.abs()); if (AllNonNegative || AllNonPositive) { // If the Value could change between cycles, then any reasoning about diff --git a/llvm/test/Analysis/BasicAA/gep-modulo.ll b/llvm/test/Analysis/BasicAA/gep-modulo.ll index cf75ba5101b1f..22b4310d088e7 100644 --- a/llvm/test/Analysis/BasicAA/gep-modulo.ll +++ b/llvm/test/Analysis/BasicAA/gep-modulo.ll @@ -282,8 +282,8 @@ define void @may_overflow_mul_scale_neg([200 x [ 6 x i8]]* %ptr, i64 %idx.1,i64 ; CHECK-NEXT: PartialAlias (off 6): [200 x [6 x i8]]* %ptr, i8* %gep.1 ; CHECK-NEXT: NoAlias: i8* %bc, i8* %gep.1 ; CHECK-NEXT: MayAlias: [200 x [6 x i8]]* %ptr, i8* %gep.idx -; CHECK-NEXT: NoAlias: i8* %bc, i8* %gep.idx -; CHECK-NEXT: NoAlias: i8* %gep.1, i8* %gep.idx +; CHECK-NEXT: MayAlias: i8* %bc, i8* %gep.idx +; CHECK-NEXT: MayAlias: i8* %gep.1, i8* %gep.idx ; %idx.1.pos = icmp sge i64 %idx.1, 0 call void @llvm.assume(i1 %idx.1.pos) From 2eb7bbbe65b6374e6137772f1c2c46e6daa5c33d Mon Sep 17 00:00:00 2001 From: Suraj Sudhir Date: Wed, 30 Jun 2021 12:22:38 -0700 Subject: [PATCH 334/619] [mlir][tosa] Use 3D tensors in tosa.matmul Signed-off-by: Suraj Sudhir Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D105213 --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 6 +++--- mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td | 2 -- mlir/test/Dialect/Tosa/ops.mlir | 6 +++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 639934b1acb48..3a1f9d26be118 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -208,13 +208,13 @@ def Tosa_MatMulOp : Tosa_Op<"matmul", [NoSideEffect]> { }]; let arguments = (ins - Tosa_Tensor2Dto3D:$a, - Tosa_Tensor2Dto3D:$b, + Tosa_Tensor3D:$a, + Tosa_Tensor3D:$b, OptionalAttr:$quantization_info ); let results = (outs - Tosa_Tensor2Dto3D:$c + Tosa_Tensor3D:$c ); let builders = [Tosa_MatMulOpQuantInfoBuilder]; diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td index 5969d98408a9e..08324a15a07b5 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td @@ -124,8 +124,6 @@ def Tosa_Tensor5D : TensorRankOf<[Tosa_AnyNumber], [5]>; def Tosa_Tensor1Dto4D : TensorRankOf<[Tosa_AnyNumber], [1,2,3,4]>; def Tosa_Tensor1Dto6D : TensorRankOf<[Tosa_AnyNumber], [1,2,3,4,5,6]>; -def Tosa_Tensor2Dto3D : TensorRankOf<[Tosa_AnyNumber], [2,3]>; - def Tosa_TensorUpto4D : TensorRankOf<[Tosa_AnyNumber], [0,1,2,3,4]>; def Tosa_Int32TensorUpto4D : TensorRankOf<[Tosa_Int32], [0,1,2,3,4]>; diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir index 6ef301081bb56..ec169d0e16ebf 100644 --- a/mlir/test/Dialect/Tosa/ops.mlir +++ b/mlir/test/Dialect/Tosa/ops.mlir @@ -39,9 +39,9 @@ func @test_fully_connected(%arg0: tensor<14x19xf32>, %arg1: tensor<19x28xf32>, % // ----- // CHECK-LABEL: test_matmul -func @test_matmul(%arg0: tensor<14x19xf32>, %arg1: tensor<19x28xf32>) -> tensor<14x28xf32> { - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<14x19xf32>, tensor<19x28xf32>) -> tensor<14x28xf32> - return %0 : tensor<14x28xf32> +func @test_matmul(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x19x28xf32>) -> tensor<1x14x28xf32> { + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x14x19xf32>, tensor<1x19x28xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> } // ----- From a64287247633da36b58d65e4c5e248f2b4c6fe61 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Tue, 29 Jun 2021 17:56:18 -0700 Subject: [PATCH 335/619] [GISel] Support llvm.memcpy.inline Differential revision: https://reviews.llvm.org/D105072 --- llvm/docs/GlobalISel/GenericOpcode.rst | 30 +++++ .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 17 ++- llvm/include/llvm/Support/TargetOpcodes.def | 3 + llvm/include/llvm/Target/GenericOpcodes.td | 8 ++ .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 78 +++++++++-- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 15 ++- llvm/lib/CodeGen/MachineVerifier.cpp | 9 ++ .../GISel/AArch64O0PreLegalizerCombiner.cpp | 2 + .../GISel/AArch64PreLegalizerCombiner.cpp | 2 + .../AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 2 + .../Target/Mips/MipsPreLegalizerCombiner.cpp | 2 + .../GlobalISel/inline-memcpy-forced.mir | 86 ++++++++++++ .../GlobalISel/irtranslator-memcpy-inline.ll | 125 ++++++++++++++++++ .../GlobalISel/legalizer-info-validation.mir | 3 + .../prelegalizer-combiner-memcpy-inline.mir | 81 ++++++++++++ .../inline-memcpy.mir | 60 +++++++++ llvm/test/MachineVerifier/test_g_bzero.mir | 4 +- llvm/test/MachineVerifier/test_g_memcpy.mir | 5 + .../MachineVerifier/test_g_memcpy_inline.mir | 49 +++++++ llvm/test/MachineVerifier/test_g_memmove.mir | 55 ++++++++ 20 files changed, 613 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir create mode 100644 llvm/test/MachineVerifier/test_g_memcpy_inline.mir create mode 100644 llvm/test/MachineVerifier/test_g_memmove.mir diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 5fd79d0c672f2..dfd57bc4e655b 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -715,6 +715,36 @@ G_FENCE I couldn't find any documentation on this at the time of writing. +G_MEMCPY +^^^^^^^^ + +Generic memcpy. Expects two MachineMemOperands covering the store and load +respectively, in addition to explicit operands. + +G_MEMCPY_INLINE +^^^^^^^^^^^^^^^ + +Generic inlined memcpy. Like G_MEMCPY, but it is guaranteed that this version +will not be lowered as a call to an external function. Currently the size +operand is required to evaluate as a constant (not an immediate), though that is +expected to change when llvm.memcpy.inline is taught to support dynamic sizes. + +G_MEMMOVE +^^^^^^^^^ + +Generic memmove. Similar to G_MEMCPY, but the source and destination memory +ranges are allowed to overlap. + +G_MEMSET +^^^^^^^^ + +Generic memset. Expects a MachineMemOperand in addition to explicit operands. + +G_BZERO +^^^^^^^ + +Generic bzero. Expects a MachineMemOperand in addition to explicit operands. + Control Flow ------------ diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 6acf6ccb616e3..2c8b6b14a67ec 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -532,16 +532,25 @@ class CombinerHelper { /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); + /// Emit loads and stores that perform the given memcpy. + /// Assumes \p MI is a G_MEMCPY_INLINE + /// TODO: implement dynamically sized inline memcpy, + /// and rename: s/bool tryEmit/void emit/ + bool tryEmitMemcpyInline(MachineInstr &MI); + private: // Memcpy family optimization helpers. + bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src, + uint64_t KnownLen, Align DstAlign, Align SrcAlign, + bool IsVolatile); bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src, - unsigned KnownLen, Align DstAlign, Align SrcAlign, - bool IsVolatile); + uint64_t KnownLen, uint64_t Limit, Align DstAlign, + Align SrcAlign, bool IsVolatile); bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src, - unsigned KnownLen, Align DstAlign, Align SrcAlign, + uint64_t KnownLen, Align DstAlign, Align SrcAlign, bool IsVolatile); bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val, - unsigned KnownLen, Align DstAlign, bool IsVolatile); + uint64_t KnownLen, Align DstAlign, bool IsVolatile); /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 154329f8a9796..fbe0d1a55bfca 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -739,6 +739,9 @@ HANDLE_TARGET_OPCODE(G_WRITE_REGISTER) /// llvm.memcpy intrinsic HANDLE_TARGET_OPCODE(G_MEMCPY) +/// llvm.memcpy.inline intrinsic +HANDLE_TARGET_OPCODE(G_MEMCPY_INLINE) + /// llvm.memmove intrinsic HANDLE_TARGET_OPCODE(G_MEMMOVE) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 113799611e738..e3e1d5fc3c655 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1353,6 +1353,14 @@ def G_MEMCPY : GenericInstruction { let mayStore = true; } +def G_MEMCPY_INLINE : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size); + let hasSideEffects = false; + let mayLoad = true; + let mayStore = true; +} + def G_MEMMOVE : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 0520fbee2692c..89b127c3df5f0 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1218,7 +1218,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { } bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, - Register Val, unsigned KnownLen, + Register Val, uint64_t KnownLen, Align Alignment, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); @@ -1330,10 +1330,51 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, return true; } +bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Len = MI.getOperand(2).getReg(); + + const auto *MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + bool IsVolatile = MemOp->isVolatile(); + + // See if this is a constant length copy + auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); + // FIXME: support dynamically sized G_MEMCPY_INLINE + assert(LenVRegAndVal.hasValue() && + "inline memcpy with dynamic size is not yet supported"); + uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); + if (KnownLen == 0) { + MI.eraseFromParent(); + return true; + } + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + Align DstAlign = DstMMO.getBaseAlign(); + Align SrcAlign = SrcMMO.getBaseAlign(); + + return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, + IsVolatile); +} + +bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst, + Register Src, uint64_t KnownLen, + Align DstAlign, Align SrcAlign, + bool IsVolatile) { + assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + return optimizeMemcpy(MI, Dst, Src, KnownLen, + std::numeric_limits::max(), DstAlign, + SrcAlign, IsVolatile); +} + bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, - Register Src, unsigned KnownLen, - Align DstAlign, Align SrcAlign, - bool IsVolatile) { + Register Src, uint64_t KnownLen, + uint64_t Limit, Align DstAlign, + Align SrcAlign, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); auto &DL = MF.getDataLayout(); @@ -1343,7 +1384,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); Align Alignment = commonAlignment(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); @@ -1354,7 +1394,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining // if the memcpy is in a tail call position. - unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); std::vector MemOps; const auto &DstMMO = **MI.memoperands_begin(); @@ -1437,7 +1476,7 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, } bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, - Register Src, unsigned KnownLen, + Register Src, uint64_t KnownLen, Align DstAlign, Align SrcAlign, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); @@ -1550,10 +1589,6 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { auto MMOIt = MI.memoperands_begin(); const MachineMemOperand *MemOp = *MMOIt; - bool IsVolatile = MemOp->isVolatile(); - // Don't try to optimize volatile. - if (IsVolatile) - return false; Align DstAlign = MemOp->getBaseAlign(); Align SrcAlign; @@ -1571,18 +1606,33 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); if (!LenVRegAndVal) return false; // Leave it to the legalizer to lower it to a libcall. - unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); + uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); if (KnownLen == 0) { MI.eraseFromParent(); return true; } + bool IsVolatile = MemOp->isVolatile(); + if (Opc == TargetOpcode::G_MEMCPY_INLINE) + return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, + IsVolatile); + + // Don't try to optimize volatile. + if (IsVolatile) + return false; + if (MaxLen && KnownLen > MaxLen) return false; - if (Opc == TargetOpcode::G_MEMCPY) - return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (Opc == TargetOpcode::G_MEMCPY) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + bool OptSize = shouldLowerMemFuncForSize(MF); + uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize); + return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, + IsVolatile); + } if (Opc == TargetOpcode::G_MEMMOVE) return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); if (Opc == TargetOpcode::G_MEMSET) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a3a8f10c011df..e791232e74f3d 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1589,6 +1589,9 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, if (auto *MCI = dyn_cast(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + } else if (auto *MCI = dyn_cast(&CI)) { + DstAlign = MCI->getDestAlign().valueOrOne(); + SrcAlign = MCI->getSourceAlign().valueOrOne(); } else if (auto *MMI = dyn_cast(&CI)) { DstAlign = MMI->getDestAlign().valueOrOne(); SrcAlign = MMI->getSourceAlign().valueOrOne(); @@ -1597,10 +1600,12 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, DstAlign = MSI->getDestAlign().valueOrOne(); } - // We need to propagate the tail call flag from the IR inst as an argument. - // Otherwise, we have to pessimize and assume later that we cannot tail call - // any memory intrinsics. - ICall.addImm(CI.isTailCall() ? 1 : 0); + if (Opcode != TargetOpcode::G_MEMCPY_INLINE) { + // We need to propagate the tail call flag from the IR inst as an argument. + // Otherwise, we have to pessimize and assume later that we cannot tail call + // any memory intrinsics. + ICall.addImm(CI.isTailCall() ? 1 : 0); + } // Create mem operands to store the alignment and volatile info. auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; @@ -2033,6 +2038,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getOrCreateVReg(*CI.getArgOperand(0)), MachineInstr::copyFlagsFromInstruction(CI)); return true; + case Intrinsic::memcpy_inline: + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE); case Intrinsic::memcpy: return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); case Intrinsic::memmove: diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index cb098acb960b5..6b8c9f460567f 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1477,6 +1477,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { } break; } + case TargetOpcode::G_MEMCPY_INLINE: case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: { ArrayRef MMOs = MI->memoperands(); @@ -1507,6 +1508,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace()) report("inconsistent load address space", MI); + if (Opc != TargetOpcode::G_MEMCPY_INLINE) + if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL)) + report("'tail' flag (operand 3) must be an immediate 0 or 1", MI); + break; } case TargetOpcode::G_BZERO: @@ -1532,6 +1537,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace()) report("inconsistent " + Twine(Name, " address space"), MI); + if (!MI->getOperand(MI->getNumOperands() - 1).isImm() || + (MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL)) + report("'tail' flag (last operand) must be an immediate 0 or 1", MI); + break; } case TargetOpcode::G_VECREDUCE_SEQ_FADD: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp index ad92f84d41417..04bc91318da85 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp @@ -85,6 +85,8 @@ bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); + case TargetOpcode::G_MEMCPY_INLINE: + return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index 0e4695fd74c27..9efbcbb0065ba 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -272,6 +272,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); + case TargetOpcode::G_MEMCPY_INLINE: + return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index ba08af2ecfcbd..13f09ab8f1649 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -205,6 +205,8 @@ bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, return true; switch (MI.getOpcode()) { + case TargetOpcode::G_MEMCPY_INLINE: + return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_CONCAT_VECTORS: return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: diff --git a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp index 310e54b0ea8d5..2ad9ffe4eb77d 100644 --- a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp @@ -42,6 +42,8 @@ bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, switch (MI.getOpcode()) { default: return false; + case TargetOpcode::G_MEMCPY_INLINE: + return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir new file mode 100644 index 0000000000000..73766ff82d2c8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-darwin" + + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #0 + + define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 143, i1 false) + ret void + } + + attributes #0 = { argmemonly nounwind } + +... +--- +name: test_memcpy_inline +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load 16 from %ir.1 + 64, align 4) + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store 16 into %ir.0 + 64, align 4) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 + ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load 16 from %ir.1 + 80, align 4) + ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store 16 into %ir.0 + 80, align 4) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 + ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load 16 from %ir.1 + 96, align 4) + ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store 16 into %ir.0 + 96, align 4) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 + ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load 16 from %ir.1 + 112, align 4) + ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store 16 into %ir.0 + 112, align 4) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127 + ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4) + ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 143 + G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll new file mode 100644 index 0000000000000..76e2b574c6c6e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=irtranslator %s -o - | FileCheck %s + +define void @copy(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: copy + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + ret void +} + +define void @inline_copy(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: inline_copy + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + ret void +} + +define void @copy_volatile(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: copy_volatile + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + ret void +} + +define void @inline_copy_volatile(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: inline_copy_volatile + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + ret void +} + +define void @tail_copy(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: tail_copy + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + ret void +} + +define void @tail_inline_copy(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: tail_inline_copy + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + ret void +} + +define void @tail_copy_volatile(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: tail_copy_volatile + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + ret void +} + +define void @tail_inline_copy_volatile(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: tail_inline_copy_volatile + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind +declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index f50e844ff3b68..bb18a95beb9f5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -629,6 +629,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_MEMCPY_INLINE (opcode 219): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir new file mode 100644 index 0000000000000..678d40d6d7652 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir @@ -0,0 +1,81 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s +--- | + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + target triple = "amdgcn-amd-amdhsa" + + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0 + + define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #1 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 13, i1 false) + ret void + } + + attributes #0 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx900" } + attributes #1 = { "target-cpu"="gfx900" } + +... +--- +name: test_memcpy_inline +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: sgpr_64 } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: ccr_sgpr_64 } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[MV1]](p0) :: (load 8 from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s64), [[MV]](p0) :: (store 8 into %ir.0, align 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from %ir.1 + 5, align 1, basealign 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s64), [[PTR_ADD1]](p0) :: (store 8 into %ir.0 + 5, align 1, basealign 4) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %0:_(p0) = G_MERGE_VALUES %3(s32), %4(s32) + %5:_(s32) = COPY $vgpr2 + %6:_(s32) = COPY $vgpr3 + %1:_(p0) = G_MERGE_VALUES %5(s32), %6(s32) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %7:_(s64) = G_CONSTANT i64 13 + G_MEMCPY_INLINE %0(p0), %1(p0), %7(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + %8:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %8 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir new file mode 100644 index 0000000000000..8fc9ce00a977c --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir @@ -0,0 +1,60 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=mips-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 +--- | + ; ModuleID = '../llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.ll' + source_filename = "../llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.ll" + target datalayout = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64" + target triple = "mipsel-pc-linux-gnu" + + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0 + + define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 2, i1 false) + ret void + } + + attributes #0 = { argmemonly nofree nounwind willreturn } + +... +--- +name: test_memcpy_inline +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +liveins: + - { reg: '$a0' } + - { reg: '$a1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; MIPS32-LABEL: name: test_memcpy_inline + ; MIPS32: liveins: $a0, $a1 + ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; MIPS32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load 1 from %ir.1, align 4) + ; MIPS32: G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store 1 into %ir.0, align 4) + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.1 + 1, basealign 4) + ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32: G_STORE [[LOAD1]](s8), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 1, basealign 4) + ; MIPS32: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(s64) = G_CONSTANT i64 2 + %3:_(s32) = G_TRUNC %2(s64) + G_MEMCPY_INLINE %0(p0), %1(p0), %3(s32) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RetRA + +... diff --git a/llvm/test/MachineVerifier/test_g_bzero.mir b/llvm/test/MachineVerifier/test_g_bzero.mir index 6e0212115aa6a..c1eb32965754a 100644 --- a/llvm/test/MachineVerifier/test_g_bzero.mir +++ b/llvm/test/MachineVerifier/test_g_bzero.mir @@ -27,7 +27,9 @@ body: | ; CHECK: *** Bad machine code: inconsistent bzero address space *** G_BZERO %ptr, %cst2, 0 :: (store 4, addrspace 1) - ; CHECK: *** Bad machine code: bzero operand must be a pointer *** + ; CHECK: *** Bad machine code: bzero operand must be a pointer *** G_BZERO %cst1, %cst2, 0 :: (store 4) + ; CHECK: *** Bad machine code: 'tail' flag (last operand) must be an immediate 0 or 1 *** + G_BZERO %ptr, %cst2, 2 :: (store 4) ... diff --git a/llvm/test/MachineVerifier/test_g_memcpy.mir b/llvm/test/MachineVerifier/test_g_memcpy.mir index 6b1584a4bbfd1..54733ddb74b73 100644 --- a/llvm/test/MachineVerifier/test_g_memcpy.mir +++ b/llvm/test/MachineVerifier/test_g_memcpy.mir @@ -47,4 +47,9 @@ body: | ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** G_MEMCPY %0, %2, %2, 0 :: (store 4), (load 4) + ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 *** + G_MEMCPY %0, %0, %2, %0 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 *** + G_MEMCPY %0, %0, %2, 2 :: (store 4), (load 4) ... diff --git a/llvm/test/MachineVerifier/test_g_memcpy_inline.mir b/llvm/test/MachineVerifier/test_g_memcpy_inline.mir new file mode 100644 index 0000000000000..60526fa29f2e9 --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_memcpy_inline.mir @@ -0,0 +1,49 @@ +#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- +name: test_memcpy_inline +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: +body: | + bb.0: + + %0:_(p0) = G_CONSTANT i64 0 + %1:_(p0) = G_CONSTANT i64 4 + %2:_(s64) = G_CONSTANT i64 4 + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMCPY_INLINE %0, %1, %2 + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMCPY_INLINE %0, %1, %2 :: (load 4) + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 4) + + ; CHECK: *** Bad machine code: wrong memory operand types *** + G_MEMCPY_INLINE %0, %1, %2 :: (load 4), (store 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 8), (load 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMCPY_INLINE %0, %1, %2 :: (store unknown-size), (load 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 8), (load unknown-size) + + ; CHECK: *** Bad machine code: inconsistent store address space *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 4, addrspace 1), (load 4) + + ; CHECK: *** Bad machine code: inconsistent load address space *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 4), (load 4, addrspace 1) + + ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** + G_MEMCPY_INLINE %2, %0, %2 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** + G_MEMCPY_INLINE %0, %2, %2 :: (store 4), (load 4) +... diff --git a/llvm/test/MachineVerifier/test_g_memmove.mir b/llvm/test/MachineVerifier/test_g_memmove.mir new file mode 100644 index 0000000000000..209b8972bd6ee --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_memmove.mir @@ -0,0 +1,55 @@ +#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- +name: test_memmove +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: +body: | + bb.0: + + %0:_(p0) = G_CONSTANT i64 0 + %1:_(p0) = G_CONSTANT i64 4 + %2:_(s64) = G_CONSTANT i64 4 + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMMOVE %0, %1, %2, 0 + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMMOVE %0, %1, %2, 0 :: (load 4) + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMMOVE %0, %1, %2, 0 :: (store 4) + + ; CHECK: *** Bad machine code: wrong memory operand types *** + G_MEMMOVE %0, %1, %2, 0 :: (load 4), (store 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMMOVE %0, %1, %2, 0 :: (store 8), (load 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMMOVE %0, %1, %2, 0 :: (store unknown-size), (load 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMMOVE %0, %1, %2, 0 :: (store 8), (load unknown-size) + + ; CHECK: *** Bad machine code: inconsistent store address space *** + G_MEMMOVE %0, %1, %2, 0 :: (store 4, addrspace 1), (load 4) + + ; CHECK: *** Bad machine code: inconsistent load address space *** + G_MEMMOVE %0, %1, %2, 0 :: (store 4), (load 4, addrspace 1) + + ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** + G_MEMMOVE %2, %0, %2, 0 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** + G_MEMMOVE %0, %2, %2, 0 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 *** + G_MEMMOVE %0, %0, %2, %0 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 *** + G_MEMMOVE %0, %0, %2, 2 :: (store 4), (load 4) +... From 24a36ce58b773332dbb28df87270251686c9430d Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 30 Jun 2021 15:39:43 -0400 Subject: [PATCH 336/619] [OpenMP][Offloading] Replace all calls to `isSPMDMode` with `__kmpc_is_spmd_exec_mode` In our ongoing work, we are using `AbstractAttributor` to deduct execution model of device functions, and potententially remove unnecessary function calls to `__kmpc_is_spmd_exec_mode`. In current device runtime, we have mixed use of `isSPMDMode` and `__kmpc_is_spmd_exec_mode`, but in fact in `__kmpc_is_spmd_exec_mode` it simply calls `isSPMDMode`. Since all functions starting with `__kmpc` is C function, which doesn't have things like name mangling. It is more optimization friendly. In this patch, we simply replaced all calls to `isSPMDMode` with `__kmpc_is_spmd_exec_mode` to pave the way for the optimization. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D105211 --- .../deviceRTLs/common/src/data_sharing.cu | 2 +- .../deviceRTLs/common/src/libcall.cu | 22 +++++++++---------- .../deviceRTLs/common/src/loop.cu | 2 +- .../deviceRTLs/common/src/omptarget.cu | 5 ++--- .../deviceRTLs/common/src/support.cu | 6 ++--- .../libomptarget/deviceRTLs/common/support.h | 1 - 6 files changed, 17 insertions(+), 21 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu index 367fc608c2994..3b95ca88aad27 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu @@ -66,7 +66,7 @@ static void *__kmpc_alloc_for_warp(AllocTy Alloc, unsigned Bytes, EXTERN void *__kmpc_alloc_shared(size_t Bytes) { Bytes = Bytes + (Bytes % MinBytes); - if (IsMasterThread(isSPMDMode())) { + if (IsMasterThread(__kmpc_is_spmd_exec_mode())) { // Main thread alone, use shared memory if space is available. if (MainSharedStack.Usage[0] + Bytes <= MainSharedStack.MaxSize) { void *Ptr = &MainSharedStack.Data[MainSharedStack.Usage[0]]; diff --git a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu index c1cfe3e7d6d72..ae0590284d06d 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu @@ -29,7 +29,7 @@ EXTERN double omp_get_wtime(void) { EXTERN void omp_set_num_threads(int num) { // Ignore it for SPMD mode. - if (isSPMDMode()) + if (__kmpc_is_spmd_exec_mode()) return; ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Expected initialized runtime."); PRINT(LD_IO, "call omp_set_num_threads(num %d)\n", num); @@ -41,7 +41,7 @@ EXTERN void omp_set_num_threads(int num) { } EXTERN int omp_get_num_threads(void) { - int rc = GetNumberOfOmpThreads(isSPMDMode()); + int rc = GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode()); PRINT(LD_IO, "call omp_get_num_threads() return %d\n", rc); return rc; } @@ -60,7 +60,7 @@ EXTERN int omp_get_max_threads(void) { } EXTERN int omp_get_thread_limit(void) { - if (isSPMDMode()) + if (__kmpc_is_spmd_exec_mode()) return GetNumberOfThreadsInBlock(); int rc = threadLimit; PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc); @@ -68,7 +68,7 @@ EXTERN int omp_get_thread_limit(void) { } EXTERN int omp_get_thread_num() { - bool isSPMDExecutionMode = isSPMDMode(); + bool isSPMDExecutionMode = __kmpc_is_spmd_exec_mode(); int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode); int rc = GetOmpThreadId(tid, isSPMDExecutionMode); PRINT(LD_IO, "call omp_get_thread_num() returns %d\n", rc); @@ -76,7 +76,7 @@ EXTERN int omp_get_thread_num() { } EXTERN int omp_get_num_procs(void) { - int rc = GetNumberOfProcsInDevice(isSPMDMode()); + int rc = GetNumberOfProcsInDevice(__kmpc_is_spmd_exec_mode()); PRINT(LD_IO, "call omp_get_num_procs() returns %d\n", rc); return rc; } @@ -143,7 +143,7 @@ EXTERN int omp_get_active_level(void) { } EXTERN int omp_get_ancestor_thread_num(int level) { - if (isSPMDMode()) + if (__kmpc_is_spmd_exec_mode()) return level == 1 ? GetThreadIdInBlock() : 0; int rc = -1; // If level is 0 or all parallel regions are not active - return 0. @@ -197,7 +197,7 @@ EXTERN int omp_get_ancestor_thread_num(int level) { } EXTERN int omp_get_team_size(int level) { - if (isSPMDMode()) + if (__kmpc_is_spmd_exec_mode()) return level == 1 ? GetNumberOfThreadsInBlock() : 1; int rc = -1; unsigned parLevel = parallelLevel[GetWarpId()]; @@ -217,13 +217,13 @@ EXTERN int omp_get_team_size(int level) { EXTERN void omp_get_schedule(omp_sched_t *kind, int *modifier) { if (isRuntimeUninitialized()) { - ASSERT0(LT_FUSSY, isSPMDMode(), + ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected SPMD mode only with uninitialized runtime."); *kind = omp_sched_static; *modifier = 1; } else { omptarget_nvptx_TaskDescr *currTaskDescr = - getMyTopTaskDescriptor(isSPMDMode()); + getMyTopTaskDescriptor(__kmpc_is_spmd_exec_mode()); *kind = currTaskDescr->GetRuntimeSched(); *modifier = currTaskDescr->RuntimeChunkSize(); } @@ -235,13 +235,13 @@ EXTERN void omp_set_schedule(omp_sched_t kind, int modifier) { PRINT(LD_IO, "call omp_set_schedule(sched %d, modif %d)\n", (int)kind, modifier); if (isRuntimeUninitialized()) { - ASSERT0(LT_FUSSY, isSPMDMode(), + ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected SPMD mode only with uninitialized runtime."); return; } if (kind >= omp_sched_static && kind < omp_sched_auto) { omptarget_nvptx_TaskDescr *currTaskDescr = - getMyTopTaskDescriptor(isSPMDMode()); + getMyTopTaskDescriptor(__kmpc_is_spmd_exec_mode()); currTaskDescr->SetRuntimeSched(kind); currTaskDescr->RuntimeChunkSize() = modifier; PRINT(LD_IOD, "omp_set_schedule did set sched %d & modif %" PRIu64 "\n", diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu index b1fce8e3d0c62..04447be28db11 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu @@ -508,7 +508,7 @@ public: PRINT(LD_LOOP, "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, " "last %d\n", - (int)GetNumberOfOmpThreads(isSPMDMode()), + (int)GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode()), (int)GetNumberOfWorkersInTeam(), (long long)*plower, (long long)*pupper, (long long)*pstride, (int)*plast); return DISPATCH_NOTFINISHED; diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu index 1c53131d17144..c117c7e00bf28 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu @@ -65,7 +65,7 @@ EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) { nThreads = GetNumberOfThreadsInBlock(); threadLimit = ThreadLimit; - if (!isSPMDMode()) + if (!__kmpc_is_spmd_exec_mode()) omptarget_nvptx_globalArgs.Init(); __kmpc_data_sharing_init_stack(); @@ -162,8 +162,7 @@ EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) { // Return true if the current target region is executed in SPMD mode. EXTERN int8_t __kmpc_is_spmd_exec_mode() { - PRINT0(LD_IO | LD_PAR, "call to __kmpc_is_spmd_exec_mode\n"); - return isSPMDMode(); + return (execution_param & ModeMask) == Spmd; } #pragma omp end declare target diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu index df7ad675572e3..0977e2867e6e1 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu @@ -26,8 +26,6 @@ void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) { bool isGenericMode() { return (execution_param & ModeMask) == Generic; } -bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; } - bool isRuntimeUninitialized() { return (execution_param & RuntimeMask) == RuntimeUninitialized; } @@ -42,7 +40,7 @@ bool isRuntimeInitialized() { bool checkSPMDMode(kmp_Ident *loc) { if (!loc) - return isSPMDMode(); + return __kmpc_is_spmd_exec_mode(); // If SPMD is true then we are not in the UNDEFINED state so // we can return immediately. @@ -55,7 +53,7 @@ bool checkSPMDMode(kmp_Ident *loc) { return false; // We are in underfined state. - return isSPMDMode(); + return __kmpc_is_spmd_exec_mode(); } bool checkGenericMode(kmp_Ident *loc) { return !checkSPMDMode(loc); } diff --git a/openmp/libomptarget/deviceRTLs/common/support.h b/openmp/libomptarget/deviceRTLs/common/support.h index af20362fbf871..4e9ce39b30403 100644 --- a/openmp/libomptarget/deviceRTLs/common/support.h +++ b/openmp/libomptarget/deviceRTLs/common/support.h @@ -33,7 +33,6 @@ enum RuntimeMode { void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode); bool isGenericMode(); -bool isSPMDMode(); bool isRuntimeUninitialized(); bool isRuntimeInitialized(); From 69e0f790e03221b65fbf9f9fb340a7bb916bb12c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 30 Jun 2021 21:39:34 +0200 Subject: [PATCH 337/619] [Orc] Fix name of LLVMOrcIRTransformLayerSetTransform In https://reviews.llvm.org/D103855 we added access to IRTransformLayer, but I just noticed that the function name is following the wrong pattern. Differential Revision: https://reviews.llvm.org/D104840 --- .../OrcV2CBindingsIRTransforms/OrcV2CBindingsIRTransforms.c | 2 +- llvm/include/llvm-c/Orc.h | 4 ++-- llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/examples/OrcV2Examples/OrcV2CBindingsIRTransforms/OrcV2CBindingsIRTransforms.c b/llvm/examples/OrcV2Examples/OrcV2CBindingsIRTransforms/OrcV2CBindingsIRTransforms.c index f549f1c462b9c..6335327b6baf7 100644 --- a/llvm/examples/OrcV2Examples/OrcV2CBindingsIRTransforms/OrcV2CBindingsIRTransforms.c +++ b/llvm/examples/OrcV2Examples/OrcV2CBindingsIRTransforms/OrcV2CBindingsIRTransforms.c @@ -91,7 +91,7 @@ int main(int argc, char *argv[]) { // Use TransformLayer to set IR transform. { LLVMOrcIRTransformLayerRef TL = LLVMOrcLLJITGetIRTransformLayer(J); - LLVMOrcLLJITIRTransformLayerSetTransform(TL, *transform, NULL); + LLVMOrcIRTransformLayerSetTransform(TL, *transform, NULL); } // Create our demo module. diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 9656c00075ec7..75af7320ce253 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -338,7 +338,7 @@ typedef struct LLVMOrcOpaqueIRTransformLayer *LLVMOrcIRTransformLayerRef; * management. * * Clients can call LLVMOrcLLJITGetIRTransformLayer to obtain the transform - * layer of a LLJIT instance, and use LLVMOrcLLJITIRTransformLayerSetTransform + * layer of a LLJIT instance, and use LLVMOrcIRTransformLayerSetTransform * to set the function. This can be used to override the default transform * layer. */ @@ -783,7 +783,7 @@ void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer); * Set the transform function of the provided transform layer, passing through a * pointer to user provided context. */ -void LLVMOrcLLJITIRTransformLayerSetTransform( +void LLVMOrcIRTransformLayerSetTransform( LLVMOrcIRTransformLayerRef IRTransformLayer, LLVMOrcIRTransformLayerTransformFunction TransformFunction, void *Ctx); diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index c6c0152cc9e45..ed45ec6f836be 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -526,7 +526,7 @@ void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) { delete unwrap(ObjLayer); } -void LLVMOrcLLJITIRTransformLayerSetTransform( +void LLVMOrcIRTransformLayerSetTransform( LLVMOrcIRTransformLayerRef IRTransformLayer, LLVMOrcIRTransformLayerTransformFunction TransformFunction, void *Ctx) { unwrap(IRTransformLayer) From 9762f12c6c60d7306040dd1717b708763cc803a4 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 30 Jun 2021 21:44:54 +0200 Subject: [PATCH 338/619] [Orc] Run the examples as part of the tests Enable the Orc C-Bindings for testing. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D104637 --- llvm/test/CMakeLists.txt | 4 ++++ .../OrcV2Examples/orcv2-cbindings-add-object-file.test | 3 +++ .../Examples/OrcV2Examples/orcv2-cbindings-basic-usage.test | 3 +++ .../orcv2-cbindings-reflect-process-symbols.test | 3 +++ .../OrcV2Examples/orcv2-cbindings-removable-code.test | 3 +++ llvm/test/lit.cfg.py | 6 +++++- 6 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Examples/OrcV2Examples/orcv2-cbindings-add-object-file.test create mode 100644 llvm/test/Examples/OrcV2Examples/orcv2-cbindings-basic-usage.test create mode 100644 llvm/test/Examples/OrcV2Examples/orcv2-cbindings-reflect-process-symbols.test create mode 100644 llvm/test/Examples/OrcV2Examples/orcv2-cbindings-removable-code.test diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 84be4dd3b3292..7fd52d207df46 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -162,6 +162,10 @@ if(LLVM_BUILD_EXAMPLES) Kaleidoscope-Ch6 Kaleidoscope-Ch7 LLJITWithThinLTOSummaries + OrcV2CBindingsBasicUsage + OrcV2CBindingsAddObjectFile + OrcV2CBindingsRemovableCode + OrcV2CBindingsReflectProcessSymbols ) if(CMAKE_HOST_UNIX) list(APPEND LLVM_TEST_DEPENDS diff --git a/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-add-object-file.test b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-add-object-file.test new file mode 100644 index 0000000000000..95cb1109e209c --- /dev/null +++ b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-add-object-file.test @@ -0,0 +1,3 @@ +# RUN: OrcV2CBindingsAddObjectFile 2>&1 | FileCheck %s + +# CHECK: 1 + 2 = 3 diff --git a/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-basic-usage.test b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-basic-usage.test new file mode 100644 index 0000000000000..3e7ce3f3b724d --- /dev/null +++ b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-basic-usage.test @@ -0,0 +1,3 @@ +# RUN: OrcV2CBindingsBasicUsage 2>&1 | FileCheck %s + +# CHECK: 1 + 2 = 3 diff --git a/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-reflect-process-symbols.test b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-reflect-process-symbols.test new file mode 100644 index 0000000000000..f52e0325fc80b --- /dev/null +++ b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-reflect-process-symbols.test @@ -0,0 +1,3 @@ +# RUN: OrcV2CBindingsReflectProcessSymbols 2>&1 | FileCheck %s + +# CHECK: 3 * 4 + 5 = 17 diff --git a/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-removable-code.test b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-removable-code.test new file mode 100644 index 0000000000000..fbbb380c9084c --- /dev/null +++ b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-removable-code.test @@ -0,0 +1,3 @@ +# RUN: OrcV2CBindingsRemovableCode 2>&1 | FileCheck %s + +# CHECK: 1 + 2 = 3 diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index b5b8ee9f4d265..e8de805eae715 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -182,7 +182,11 @@ def get_asan_rtlib(): ToolSubst('Kaleidoscope-Ch7', unresolved='ignore'), ToolSubst('Kaleidoscope-Ch8', unresolved='ignore'), ToolSubst('LLJITWithThinLTOSummaries', unresolved='ignore'), - ToolSubst('LLJITWithRemoteDebugging', unresolved='ignore')]) + ToolSubst('LLJITWithRemoteDebugging', unresolved='ignore'), + ToolSubst('OrcV2CBindingsBasicUsage', unresolved='ignore'), + ToolSubst('OrcV2CBindingsAddObjectFile', unresolved='ignore'), + ToolSubst('OrcV2CBindingsRemovableCode', unresolved='ignore'), + ToolSubst('OrcV2CBindingsReflectProcessSymbols', unresolved='ignore')]) llvm_config.add_tool_substitutions(tools, config.llvm_tools_dir) From bf6770f9bd606643c8ea8a7ab03da5da5960e98e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 29 Jun 2021 23:49:31 +0300 Subject: [PATCH 339/619] [CMake] Don't use -Bsymbolic-functions for MinGW targets This is an ELF specific option which isn't supported for Windows/MinGW targets, even if the MinGW linker otherwise uses an ld.bfd like linker interface. Differential Revision: https://reviews.llvm.org/D105148 --- clang/tools/clang-shlib/CMakeLists.txt | 2 +- llvm/tools/llvm-shlib/CMakeLists.txt | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/tools/clang-shlib/CMakeLists.txt b/clang/tools/clang-shlib/CMakeLists.txt index d08cf89383282..9c1f8ea452b36 100644 --- a/clang/tools/clang-shlib/CMakeLists.txt +++ b/clang/tools/clang-shlib/CMakeLists.txt @@ -50,6 +50,6 @@ add_clang_library(clang-cpp ${_DEPS}) # Optimize function calls for default visibility definitions to avoid PLT and # reduce dynamic relocations. -if (NOT APPLE) +if (NOT APPLE AND NOT MINGW) target_link_options(clang-cpp PRIVATE LINKER:-Bsymbolic-functions) endif() diff --git a/llvm/tools/llvm-shlib/CMakeLists.txt b/llvm/tools/llvm-shlib/CMakeLists.txt index 03e1383ec8b4f..76b9a25cbbcdc 100644 --- a/llvm/tools/llvm-shlib/CMakeLists.txt +++ b/llvm/tools/llvm-shlib/CMakeLists.txt @@ -50,11 +50,13 @@ if(LLVM_BUILD_LLVM_DYLIB) # Solaris ld does not accept global: *; so there is no way to version *all* global symbols set(LIB_NAMES -Wl,--version-script,${LLVM_LIBRARY_DIR}/tools/llvm-shlib/simple_version_script.map ${LIB_NAMES}) endif() - # Optimize function calls for default visibility definitions to avoid PLT and - # reduce dynamic relocations. - # Note: for -fno-pic default, the address of a function may be different from - # inside and outside libLLVM.so. - target_link_options(LLVM PRIVATE LINKER:-Bsymbolic-functions) + if (NOT MINGW) + # Optimize function calls for default visibility definitions to avoid PLT and + # reduce dynamic relocations. + # Note: for -fno-pic default, the address of a function may be different from + # inside and outside libLLVM.so. + target_link_options(LLVM PRIVATE LINKER:-Bsymbolic-functions) + endif() elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") set(LIB_NAMES -Wl,-all_load ${LIB_NAMES}) endif() From 578a4cfe19121df5e5e02bf03482233e3bc56a0d Mon Sep 17 00:00:00 2001 From: Siva Chandra Date: Wed, 30 Jun 2021 13:45:38 -0700 Subject: [PATCH 340/619] [libc][NFC] Clear all exceptions in exception_flags_test before raising another. This is because, raising some exceptions can raise other ones. For example, raising FE_OVERFLOW can raise FE_INEXACT. So, we need to clear all exceptions if we want a clean slate. --- libc/test/src/fenv/exception_flags_test.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libc/test/src/fenv/exception_flags_test.cpp b/libc/test/src/fenv/exception_flags_test.cpp index bb3ddaa7148ac..855ba7c9248d2 100644 --- a/libc/test/src/fenv/exception_flags_test.cpp +++ b/libc/test/src/fenv/exception_flags_test.cpp @@ -18,6 +18,7 @@ TEST(LlvmLibcFenvTest, GetExceptFlagAndSetExceptFlag) { // We will disable all exceptions to prevent invocation of the exception // handler. __llvm_libc::fputil::disableExcept(FE_ALL_EXCEPT); + __llvm_libc::fputil::clearExcept(FE_ALL_EXCEPT); int excepts[] = {FE_DIVBYZERO, FE_INVALID, FE_INEXACT, FE_OVERFLOW, FE_UNDERFLOW}; @@ -39,8 +40,9 @@ TEST(LlvmLibcFenvTest, GetExceptFlagAndSetExceptFlag) { ASSERT_EQ(__llvm_libc::fesetexceptflag(&eflags, FE_ALL_EXCEPT), 0); ASSERT_NE(__llvm_libc::fputil::testExcept(FE_ALL_EXCEPT) & e, 0); - // Cleanup - __llvm_libc::fputil::clearExcept(e); + // Cleanup. We clear all excepts as raising excepts like FE_OVERFLOW + // can also raise FE_INEXACT. + __llvm_libc::fputil::clearExcept(FE_ALL_EXCEPT); } // Next, we will raise one exception and save the flags. From fae05692a36f9ebbd201d93c2a6b0f927564d7e6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 May 2021 22:25:51 -0400 Subject: [PATCH 341/619] CodeGen: Print/parse LLTs in MachineMemOperands This will currently accept the old number of bytes syntax, and convert it to a scalar. This should be removed in the near future (I think I converted all of the tests already, but likely missed a few). Not sure what the exact syntax and policy should be. We can continue printing the number of bytes for non-generic instructions to avoid test churn and only allow non-scalar types for generic instructions. This will currently print the LLT in parentheses, but accept parsing the existing integers and implicitly converting to scalar. The parentheses are a bit ugly, but the parser logic seems unable to deal without either parentheses or some keyword to indicate the start of a type. --- llvm/lib/CodeGen/MIRParser/MIParser.cpp | 31 +- llvm/lib/CodeGen/MachineOperand.cpp | 6 +- .../GlobalISel/arm64-callingconv-ios.ll | 18 +- .../AArch64/GlobalISel/arm64-callingconv.ll | 22 +- .../AArch64/GlobalISel/arm64-fallback.ll | 2 +- .../GlobalISel/arm64-irtranslator-gep.ll | 8 +- .../arm64-irtranslator-stackprotect.ll | 4 +- .../GlobalISel/arm64-irtranslator-switch.ll | 4 +- .../AArch64/GlobalISel/arm64-irtranslator.ll | 194 +- .../GlobalISel/arm64-regbankselect.mir | 26 +- .../GlobalISel/call-lowering-signext.ll | 12 +- .../GlobalISel/call-lowering-zeroext.ll | 22 +- .../AArch64/GlobalISel/call-translator-cse.ll | 8 +- .../AArch64/GlobalISel/call-translator-ios.ll | 20 +- .../call-translator-tail-call-sret.ll | 6 +- .../GlobalISel/call-translator-tail-call.ll | 20 +- .../AArch64/GlobalISel/call-translator.ll | 50 +- .../GlobalISel/combine-ext-debugloc.mir | 14 +- .../AArch64/GlobalISel/combine-fconstant.mir | 8 +- .../combine-sext-trunc-sextload.mir | 12 +- .../combine-shift-immed-mismatch-crash.mir | 4 +- .../AArch64/GlobalISel/constant-dbg-loc.ll | 6 +- .../GlobalISel/constant-mir-debugify.mir | 4 +- .../AArch64/GlobalISel/contract-store.mir | 32 +- .../debug-loc-legalize-tail-call.mir | 8 +- .../GlobalISel/fold-global-offsets.mir | 16 +- .../fp128-legalize-crash-pr35690.mir | 14 +- .../AArch64/GlobalISel/fp16-copy-gpr.mir | 4 +- .../GlobalISel/inline-memcpy-forced.mir | 38 +- .../AArch64/GlobalISel/inline-memcpy.mir | 90 +- .../AArch64/GlobalISel/inline-memmove.mir | 60 +- .../AArch64/GlobalISel/inline-memset.mir | 50 +- .../GlobalISel/inline-small-memcpy.mir | 18 +- .../GlobalISel/irtranslator-arguments.ll | 6 +- .../irtranslator-atomic-metadata.ll | 12 +- .../GlobalISel/irtranslator-exceptions.ll | 2 +- .../GlobalISel/irtranslator-load-metadata.ll | 12 +- .../GlobalISel/irtranslator-localescape.ll | 10 +- .../irtranslator-max-address-space.ll | 6 +- .../GlobalISel/irtranslator-memcpy-inline.ll | 16 +- .../irtranslator-stack-evt-bug47619.ll | 2 +- .../GlobalISel/irtranslator-stack-objects.ll | 6 +- .../irtranslator-stackprotect-check.ll | 10 +- .../GlobalISel/irtranslator-store-metadata.ll | 8 +- .../GlobalISel/irtranslator-switch-bittest.ll | 2 +- .../AArch64/GlobalISel/irtranslator-tbaa.ll | 4 +- .../GlobalISel/labels-are-not-dead.mir | 8 +- .../AArch64/GlobalISel/legalize-atomicrmw.mir | 16 +- .../GlobalISel/legalize-blockaddress.mir | 4 +- .../AArch64/GlobalISel/legalize-bzero.mir | 4 +- .../GlobalISel/legalize-cmpxchg-128.mir | 12 +- .../legalize-cmpxchg-with-success.mir | 8 +- .../AArch64/GlobalISel/legalize-cmpxchg.mir | 16 +- .../AArch64/GlobalISel/legalize-exceptions.ll | 4 +- .../AArch64/GlobalISel/legalize-extload.mir | 28 +- .../legalize-extract-vector-elt.mir | 6 +- .../AArch64/GlobalISel/legalize-fpext.mir | 6 +- .../AArch64/GlobalISel/legalize-fptrunc.mir | 6 +- .../legalize-load-store-fewerElts.mir | 16 +- ...lize-load-store-vector-of-ptr-debugloc.mir | 2 +- .../legalize-load-store-vector-of-ptr.mir | 12 +- .../GlobalISel/legalize-load-store.mir | 198 +- .../GlobalISel/legalize-load-trunc.mir | 4 +- .../GlobalISel/legalize-memlib-debug-loc.mir | 2 +- .../AArch64/GlobalISel/legalize-mul.mir | 20 +- .../legalize-non-pow2-load-store.mir | 12 +- .../legalize-phi-insertpt-decrement.mir | 26 +- .../AArch64/GlobalISel/legalize-phi.mir | 8 +- .../GlobalISel/legalize-reduce-add.mir | 20 +- .../AArch64/GlobalISel/legalize-s128-div.mir | 24 +- .../GlobalISel/legalize-sext-zext-128.mir | 24 +- .../AArch64/GlobalISel/legalize-sextload.mir | 4 +- .../GlobalISel/legalize-shuffle-vector.mir | 12 +- .../AArch64/GlobalISel/legalize-vaarg.mir | 12 +- .../AArch64/GlobalISel/legalize-zextload.mir | 4 +- .../AArch64/GlobalISel/legalizer-combiner.mir | 8 +- .../GlobalISel/load-addressing-modes.mir | 92 +- .../GlobalISel/load-wro-addressing-modes.mir | 56 +- .../AArch64/GlobalISel/localizer-arm64-tti.ll | 18 +- .../CodeGen/AArch64/GlobalISel/localizer.mir | 44 +- .../GlobalISel/non-pow-2-extload-combine.mir | 4 +- .../GlobalISel/opt-overlapping-and.mir | 4 +- ...legalizer-combiner-redundant-sextinreg.mir | 4 +- .../postlegalizer-combiner-store-undef.mir | 2 +- .../postlegalizer-lowering-truncstore.mir | 8 +- .../postlegalizercombiner-extending-loads.mir | 8 +- .../postselectopt-constrain-new-regop.mir | 8 +- ...galizer-combiner-divrem-insertpt-crash.mir | 4 +- ...combiner-icmp-to-true-false-known-bits.mir | 42 +- ...galizer-combiner-load-or-pattern-align.mir | 18 +- .../prelegalizer-combiner-load-or-pattern.mir | 356 +- .../GlobalISel/prelegalizercombiner-bzero.mir | 30 +- ...ercombiner-extending-loads-cornercases.mir | 32 +- ...relegalizercombiner-extending-loads-s1.mir | 4 +- .../prelegalizercombiner-extending-loads.mir | 80 +- ...legalizercombiner-icmp-redundant-trunc.mir | 16 +- ...galizercombiner-not-really-equiv-insts.mir | 34 +- .../prelegalizercombiner-prop-extends-phi.mir | 12 +- ...alizercombiner-sextload-from-sextinreg.mir | 24 +- .../GlobalISel/preselect-process-phis.mir | 24 +- .../GlobalISel/regbank-assert-sext.mir | 4 +- .../GlobalISel/regbank-assert-zext.mir | 4 +- .../AArch64/GlobalISel/regbank-ceil.mir | 4 +- .../AArch64/GlobalISel/regbank-fp-use-def.mir | 24 +- .../AArch64/GlobalISel/regbank-intrinsic.mir | 8 +- .../GlobalISel/regbankselect-build-vector.mir | 8 +- .../GlobalISel/regbankselect-default.mir | 8 +- .../AArch64/GlobalISel/ret-vec-promote.ll | 2 +- .../GlobalISel/select-arith-extended-reg.mir | 16 +- .../GlobalISel/select-atomic-load-store.mir | 4 +- .../AArch64/GlobalISel/select-atomicrmw.mir | 44 +- .../GlobalISel/select-blockaddress.mir | 6 +- .../GlobalISel/select-brcond-of-binop.mir | 8 +- .../CodeGen/AArch64/GlobalISel/select-cbz.mir | 12 +- .../AArch64/GlobalISel/select-cmpxchg.mir | 8 +- .../AArch64/GlobalISel/select-const-pool.mir | 6 +- .../GlobalISel/select-const-vector.mir | 20 +- .../AArch64/GlobalISel/select-extload.mir | 8 +- .../GlobalISel/select-fmul-indexed.mir | 4 +- .../GlobalISel/select-gv-cmodel-large.mir | 12 +- .../GlobalISel/select-gv-cmodel-tiny.mir | 12 +- .../select-jump-table-brjt-constrain.mir | 4 +- .../GlobalISel/select-ldaxr-intrin.mir | 16 +- .../AArch64/GlobalISel/select-ldxr-intrin.mir | 16 +- .../select-load-store-vector-of-ptr.mir | 8 +- .../AArch64/GlobalISel/select-load.mir | 100 +- .../AArch64/GlobalISel/select-reduce-add.mir | 20 +- .../select-redundant-zext-of-load.mir | 8 +- .../AArch64/GlobalISel/select-sextload.mir | 8 +- .../GlobalISel/select-stlxr-intrin.mir | 16 +- .../AArch64/GlobalISel/select-store.mir | 140 +- .../CodeGen/AArch64/GlobalISel/select-stx.mir | 16 +- .../select-with-no-legality-check.mir | 72 +- .../GlobalISel/select-zext-as-copy.mir | 4 +- .../AArch64/GlobalISel/select-zextload.mir | 28 +- .../GlobalISel/sext-inreg-ldrow-16b.mir | 16 +- .../GlobalISel/store-addressing-modes.mir | 36 +- .../GlobalISel/store-wro-addressing-modes.mir | 12 +- .../AArch64/GlobalISel/subreg-copy.mir | 8 +- .../GlobalISel/translate-constant-dag.ll | 32 +- .../GlobalISel/varargs-ios-translator.ll | 2 +- .../CodeGen/AArch64/GlobalISel/vastart.ll | 4 +- .../GlobalISel/widen-narrow-tbz-tbnz.mir | 4 +- .../xro-addressing-mode-constant.mir | 32 +- .../AArch64/aarch64-ldst-modified-baseReg.mir | 50 +- .../aarch64-ldst-no-premature-sp-pop.mir | 6 +- .../aarch64-ldst-subsuperReg-no-ldp.mir | 8 +- .../AArch64/aarch64-mov-debug-locs.mir | 14 +- .../CodeGen/AArch64/aarch64-vector-pcs.mir | 20 +- .../AArch64/arm64-misched-memdep-bug.ll | 6 +- .../AArch64/branch-folder-merge-mmos.ll | 2 +- .../AArch64/branch-relax-block-size.mir | 10 +- .../AArch64/branch-target-enforcement.mir | 30 +- llvm/test/CodeGen/AArch64/cfi_restore.mir | 8 +- .../CodeGen/AArch64/cluster-frame-index.mir | 8 +- .../AArch64/debug-info-sve-dbg-declare.mir | 6 +- .../dont-shrink-wrap-stack-mayloadorstore.mir | 16 +- .../AArch64/early-ifcvt-regclass-mismatch.mir | 2 +- llvm/test/CodeGen/AArch64/elim-dead-mi.mir | 2 +- llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir | 38 +- llvm/test/CodeGen/AArch64/irg-nomem.mir | 4 +- .../CodeGen/AArch64/jti-correct-datatype.mir | 8 +- .../CodeGen/AArch64/jump-table-duplicate.mir | 8 +- .../test/CodeGen/AArch64/ldrpre-ldr-merge.mir | 296 +- llvm/test/CodeGen/AArch64/ldst-miflags.mir | 20 +- .../AArch64/ldst-nopreidx-sp-redzone.mir | 56 +- llvm/test/CodeGen/AArch64/ldst-opt-aa.mir | 8 +- .../AArch64/ldst-opt-non-imm-offset.mir | 4 +- .../CodeGen/AArch64/ldst-opt-zr-clobber.mir | 4 +- llvm/test/CodeGen/AArch64/ldst-opt.mir | 44 +- .../AArch64/loh-use-between-adrp-add.mir | 4 +- llvm/test/CodeGen/AArch64/loop-sink-limit.mir | 6 +- llvm/test/CodeGen/AArch64/loop-sink.mir | 52 +- .../CodeGen/AArch64/machine-outliner-bti.mir | 6 +- .../AArch64/machine-outliner-iterative-2.mir | 8 +- .../AArch64/machine-outliner-iterative.mir | 12 +- .../machine-outliner-retaddr-sign-sp-mod.mir | 56 +- .../CodeGen/AArch64/machine-scheduler.mir | 6 +- llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll | 20 +- .../AArch64/mlicm-stack-write-check.mir | 2 +- .../AArch64/multi-vector-store-size.ll | 18 +- .../CodeGen/AArch64/post-ra-machine-sink.mir | 4 +- .../CodeGen/AArch64/reg-scavenge-frame.mir | 8 +- llvm/test/CodeGen/AArch64/seqpairspill.mir | 8 +- llvm/test/CodeGen/AArch64/settag-merge.mir | 22 +- .../AArch64/speculation-hardening-sls-blr.mir | 8 +- llvm/test/CodeGen/AArch64/spill-fold.mir | 10 +- llvm/test/CodeGen/AArch64/spill-undef.mir | 6 +- .../CodeGen/AArch64/stack-guard-reassign.mir | 8 +- .../AArch64/stp-opt-with-renaming-debug.mir | 28 +- .../AArch64/stp-opt-with-renaming-ld3.mir | 24 +- .../stp-opt-with-renaming-reserved-regs.mir | 56 +- .../CodeGen/AArch64/stp-opt-with-renaming.mir | 226 +- .../test/CodeGen/AArch64/strpre-str-merge.mir | 136 +- .../CodeGen/AArch64/taildup-inst-dup-loc.mir | 4 +- .../AArch64/unwind-preserved-from-mir.mir | 62 +- .../CodeGen/AArch64/wineh-frame-scavenge.mir | 6 +- llvm/test/CodeGen/AArch64/wineh-frame4.mir | 6 +- llvm/test/CodeGen/AArch64/wineh-frame6.mir | 18 +- llvm/test/CodeGen/AArch64/wineh-frame7.mir | 28 +- llvm/test/CodeGen/AArch64/wineh-frame8.mir | 8 +- llvm/test/CodeGen/AArch64/wineh1.mir | 24 +- llvm/test/CodeGen/AArch64/wineh2.mir | 36 +- llvm/test/CodeGen/AArch64/wineh3.mir | 28 +- llvm/test/CodeGen/AArch64/wineh4.mir | 42 +- llvm/test/CodeGen/AArch64/wineh5.mir | 36 +- llvm/test/CodeGen/AArch64/wineh6.mir | 6 +- llvm/test/CodeGen/AArch64/wineh7.mir | 12 +- llvm/test/CodeGen/AArch64/wineh8.mir | 40 +- .../test/CodeGen/AArch64/wineh_shrinkwrap.mir | 2 +- ...lee-save-size-after-livedebugvariables.mir | 4 +- .../CodeGen/AArch64/zext-reg-coalesce.mir | 4 +- .../GlobalISel/artifact-combiner-zext.mir | 8 +- .../AMDGPU/GlobalISel/combine-sext-inreg.mir | 72 +- .../AMDGPU/GlobalISel/function-returns.ll | 204 +- .../CodeGen/AMDGPU/GlobalISel/global-value.ll | 8 +- .../GlobalISel/image_ls_mipmap_zero.a16.ll | 48 +- ...inst-select-amdgpu-atomic-cmpxchg-flat.mir | 56 +- ...st-select-amdgpu-atomic-cmpxchg-global.mir | 126 +- .../inst-select-atomic-cmpxchg-local.mir | 32 +- .../inst-select-atomic-cmpxchg-region.mir | 32 +- .../inst-select-atomicrmw-add-flat.mir | 112 +- .../inst-select-atomicrmw-add-global.mir | 140 +- .../inst-select-atomicrmw-fadd-local.mir | 24 +- .../inst-select-atomicrmw-fadd-region.mir | 24 +- .../inst-select-atomicrmw-xchg-local.mir | 16 +- .../inst-select-atomicrmw-xchg-region.mir | 16 +- .../AMDGPU/GlobalISel/inst-select-copy.mir | 24 +- .../GlobalISel/inst-select-fmaxnum-ieee.mir | 24 +- .../AMDGPU/GlobalISel/inst-select-fmaxnum.mir | 24 +- .../GlobalISel/inst-select-fminnum-ieee.mir | 24 +- .../AMDGPU/GlobalISel/inst-select-fminnum.mir | 24 +- .../AMDGPU/GlobalISel/inst-select-fmul.mir | 52 +- .../AMDGPU/GlobalISel/inst-select-fptoui.mir | 12 +- .../GlobalISel/inst-select-fract.f64.mir | 25 +- .../AMDGPU/GlobalISel/inst-select-icmp.mir | 56 +- .../GlobalISel/inst-select-implicit-def.mir | 12 +- .../GlobalISel/inst-select-inttoptr.mir | 4 +- .../inst-select-load-atomic-flat.mir | 80 +- .../inst-select-load-atomic-global.mir | 132 +- .../inst-select-load-atomic-local.mir | 72 +- .../GlobalISel/inst-select-load-constant.mir | 290 +- .../GlobalISel/inst-select-load-flat.mir | 310 +- .../inst-select-load-global-saddr.mir | 108 +- .../GlobalISel/inst-select-load-global.mir | 392 +- .../inst-select-load-global.s96.mir | 36 +- .../GlobalISel/inst-select-load-local-128.mir | 48 +- .../GlobalISel/inst-select-load-local.mir | 404 +- .../GlobalISel/inst-select-load-private.mir | 168 +- .../GlobalISel/inst-select-load-smrd.mir | 38 +- .../AMDGPU/GlobalISel/inst-select-sitofp.mir | 12 +- .../inst-select-store-atomic-flat.mir | 60 +- .../inst-select-store-atomic-local.mir | 80 +- .../GlobalISel/inst-select-store-flat.mir | 210 +- .../GlobalISel/inst-select-store-global.mir | 252 +- .../inst-select-store-global.s96.mir | 36 +- .../GlobalISel/inst-select-store-local.mir | 310 +- .../GlobalISel/inst-select-store-private.mir | 108 +- .../GlobalISel/irtranslator-amdgpu_kernel.ll | 692 +-- .../GlobalISel/irtranslator-amdgpu_vs.ll | 4 +- .../GlobalISel/irtranslator-atomicrmw.ll | 6 +- .../irtranslator-call-implicit-args.ll | 8 +- .../GlobalISel/irtranslator-call-non-fixed.ll | 12 +- .../irtranslator-call-return-values.ll | 136 +- .../GlobalISel/irtranslator-call-sret.ll | 23 +- .../AMDGPU/GlobalISel/irtranslator-call.ll | 232 +- .../GlobalISel/irtranslator-constantexpr.ll | 4 +- ...translator-fixed-function-abi-vgpr-args.ll | 6 +- .../GlobalISel/irtranslator-function-args.ll | 540 +- .../GlobalISel/irtranslator-indirect-call.ll | 2 +- .../GlobalISel/irtranslator-inline-asm.ll | 6 +- .../irtranslator-memory-intrinsics.ll | 30 +- .../GlobalISel/irtranslator-sibling-call.ll | 259 +- .../irtranslator-struct-return-intrinsics.ll | 4 +- .../GlobalISel/legalize-addrspacecast.mir | 8 +- .../legalize-atomic-cmpxchg-with-success.mir | 20 +- .../GlobalISel/legalize-atomic-cmpxchg.mir | 24 +- .../GlobalISel/legalize-atomicrmw-add.mir | 16 +- .../GlobalISel/legalize-atomicrmw-and.mir | 16 +- .../legalize-atomicrmw-fadd-global.mir | 6 +- .../legalize-atomicrmw-fadd-local.mir | 6 +- .../GlobalISel/legalize-atomicrmw-max.mir | 16 +- .../GlobalISel/legalize-atomicrmw-min.mir | 16 +- .../GlobalISel/legalize-atomicrmw-nand.mir | 6 +- .../GlobalISel/legalize-atomicrmw-or.mir | 16 +- .../GlobalISel/legalize-atomicrmw-sub.mir | 16 +- .../GlobalISel/legalize-atomicrmw-umax.mir | 16 +- .../GlobalISel/legalize-atomicrmw-umin.mir | 16 +- .../legalize-atomicrmw-xchg-flat.mir | 10 +- .../GlobalISel/legalize-atomicrmw-xchg.mir | 16 +- .../GlobalISel/legalize-atomicrmw-xor.mir | 16 +- .../legalize-extract-vector-elt.mir | 302 +- .../AMDGPU/GlobalISel/legalize-fcmp.mir | 8 +- .../AMDGPU/GlobalISel/legalize-icmp.mir | 8 +- .../GlobalISel/legalize-implicit-def.mir | 8 +- .../GlobalISel/legalize-insert-vector-elt.mir | 416 +- ...galize-llvm.amdgcn.image.atomic.dim.a16.ll | 96 +- .../legalize-llvm.amdgcn.image.dim.a16.ll | 184 +- .../legalize-llvm.amdgcn.image.load.2d.d16.ll | 152 +- .../legalize-llvm.amdgcn.image.load.2d.ll | 76 +- ...lize-llvm.amdgcn.image.load.2darraymsaa.ll | 12 +- .../legalize-llvm.amdgcn.image.load.3d.ll | 12 +- .../legalize-llvm.amdgcn.image.sample.a16.ll | 188 +- .../legalize-llvm.amdgcn.image.sample.g16.ll | 38 +- ...legalize-llvm.amdgcn.image.store.2d.d16.ll | 32 +- .../legalize-llvm.amdgcn.s.buffer.load.mir | 16 +- .../legalize-load-constant-32bit.mir | 14 +- .../GlobalISel/legalize-load-constant.mir | 2623 ++++---- .../AMDGPU/GlobalISel/legalize-load-flat.mir | 2027 ++++--- .../GlobalISel/legalize-load-global.mir | 4350 ++++++------- .../AMDGPU/GlobalISel/legalize-load-local.mir | 5378 +++++++++-------- .../legalize-load-memory-metadata.mir | 16 +- .../GlobalISel/legalize-load-private.mir | 3806 ++++++------ .../legalize-sextload-constant-32bit.mir | 24 +- .../GlobalISel/legalize-sextload-flat.mir | 36 +- .../GlobalISel/legalize-sextload-global.mir | 126 +- .../GlobalISel/legalize-sextload-local.mir | 24 +- .../GlobalISel/legalize-sextload-private.mir | 24 +- .../GlobalISel/legalize-store-global.mir | 3326 +++++----- .../AMDGPU/GlobalISel/legalize-store.mir | 279 +- .../legalize-zextload-constant-32bit.mir | 24 +- .../GlobalISel/legalize-zextload-flat.mir | 36 +- .../GlobalISel/legalize-zextload-global.mir | 96 +- .../GlobalISel/legalize-zextload-local.mir | 24 +- .../GlobalISel/legalize-zextload-private.mir | 24 +- .../AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll | 36 +- .../llvm.amdgcn.ds.gws.sema.release.all.ll | 2 +- .../llvm.amdgcn.image.atomic.dim.mir | 32 +- .../llvm.amdgcn.raw.buffer.atomic.add.ll | 16 +- .../llvm.amdgcn.raw.buffer.atomic.cmpswap.ll | 10 +- .../llvm.amdgcn.raw.buffer.atomic.fadd.ll | 40 +- .../llvm.amdgcn.raw.buffer.load.format.f16.ll | 20 +- .../llvm.amdgcn.raw.buffer.load.format.ll | 12 +- .../GlobalISel/llvm.amdgcn.raw.buffer.load.ll | 66 +- ...llvm.amdgcn.raw.buffer.store.format.f16.ll | 44 +- ...llvm.amdgcn.raw.buffer.store.format.f32.ll | 24 +- .../llvm.amdgcn.raw.buffer.store.ll | 66 +- .../llvm.amdgcn.raw.tbuffer.load.f16.ll | 32 +- .../llvm.amdgcn.raw.tbuffer.load.ll | 18 +- .../llvm.amdgcn.raw.tbuffer.store.f16.ll | 40 +- .../llvm.amdgcn.raw.tbuffer.store.i8.ll | 16 +- .../llvm.amdgcn.raw.tbuffer.store.ll | 50 +- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 450 +- .../llvm.amdgcn.struct.buffer.atomic.add.ll | 14 +- ...lvm.amdgcn.struct.buffer.atomic.cmpswap.ll | 10 +- .../llvm.amdgcn.struct.buffer.atomic.fadd.ll | 40 +- ...vm.amdgcn.struct.buffer.load.format.f16.ll | 24 +- .../llvm.amdgcn.struct.buffer.load.format.ll | 14 +- .../llvm.amdgcn.struct.buffer.load.ll | 32 +- ...m.amdgcn.struct.buffer.store.format.f16.ll | 20 +- ...m.amdgcn.struct.buffer.store.format.f32.ll | 12 +- .../llvm.amdgcn.struct.buffer.store.ll | 20 +- .../llvm.amdgcn.struct.tbuffer.load.f16.ll | 26 +- .../llvm.amdgcn.struct.tbuffer.load.ll | 14 +- .../AMDGPU/GlobalISel/no-legalize-atomic.mir | 16 +- .../postlegalizer-combiner-divrem.mir | 72 +- .../GlobalISel/postlegalizercombiner-and.mir | 52 +- .../prelegalizer-combiner-divrem.mir | 176 +- .../prelegalizer-combiner-memcpy-inline.mir | 10 +- .../regbankselect-amdgcn-s-buffer-load.mir | 6 +- .../regbankselect-amdgcn.image.load.1d.ll | 32 +- .../regbankselect-amdgcn.image.sample.1d.ll | 40 +- .../regbankselect-amdgcn.raw.buffer.load.ll | 10 +- .../regbankselect-amdgcn.s.buffer.load.ll | 420 +- .../regbankselect-amdgcn.s.buffer.load.mir | 8 +- ...regbankselect-amdgcn.struct.buffer.load.ll | 10 +- ...egbankselect-amdgcn.struct.buffer.store.ll | 10 +- .../regbankselect-atomic-cmpxchg.mir | 12 +- .../regbankselect-atomicrmw-add.mir | 12 +- .../regbankselect-atomicrmw-and.mir | 12 +- .../regbankselect-atomicrmw-fadd.mir | 4 +- .../regbankselect-atomicrmw-max.mir | 12 +- .../regbankselect-atomicrmw-min.mir | 12 +- .../GlobalISel/regbankselect-atomicrmw-or.mir | 12 +- .../regbankselect-atomicrmw-sub.mir | 12 +- .../regbankselect-atomicrmw-umax.mir | 12 +- .../regbankselect-atomicrmw-umin.mir | 12 +- .../regbankselect-atomicrmw-xchg.mir | 12 +- .../regbankselect-atomicrmw-xor.mir | 12 +- .../GlobalISel/regbankselect-constant.mir | 4 +- .../regbankselect-insert-vector-elt.mir | 4 +- .../AMDGPU/GlobalISel/regbankselect-load.mir | 229 +- .../GlobalISel/regbankselect-sextload.mir | 25 +- ...gbankselect-split-scalar-load-metadata.mir | 12 +- .../regbankselect-uniform-load-noclobber.mir | 64 +- .../regbankselect-waterfall-agpr.mir | 8 +- .../regbankselect-widen-scalar-loads.mir | 152 +- .../GlobalISel/regbankselect-zextload.mir | 24 +- .../AMDGPU/GlobalISel/regbankselect.mir | 36 +- .../AMDGPU/SRSRC-GIT-clobber-check.mir | 4 +- .../AMDGPU/alloc-aligned-tuples-gfx908.mir | 4 +- .../AMDGPU/alloc-aligned-tuples-gfx90a.mir | 4 +- .../AMDGPU/amdgcn-load-offset-from-reg.ll | 2 +- .../AMDGPU/branch-relaxation-debug-info.mir | 8 +- .../AMDGPU/buffer-intrinsics-mmo-offsets.ll | 202 +- .../AMDGPU/clamp-omod-special-case.mir | 24 +- .../AMDGPU/cluster-flat-loads-postra.mir | 8 +- .../CodeGen/AMDGPU/cluster-flat-loads.mir | 4 +- .../coalescer-extend-pruned-subrange.mir | 4 +- ...scer-subranges-another-copymi-not-live.mir | 6 +- ...oalescer-subranges-another-prune-error.mir | 4 +- .../CodeGen/AMDGPU/coalescer-subreg-join.mir | 4 +- .../coalescer-with-subregs-bad-identical.mir | 6 +- .../CodeGen/AMDGPU/collapse-endcf-broken.mir | 4 +- llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir | 16 +- .../AMDGPU/constant-fold-imm-immreg.mir | 2 +- .../AMDGPU/couldnt-join-subrange-3.mir | 4 +- .../AMDGPU/dbg-value-ends-sched-region.mir | 12 +- llvm/test/CodeGen/AMDGPU/endpgm-dce.mir | 10 +- llvm/test/CodeGen/AMDGPU/extload-align.ll | 2 +- .../AMDGPU/extract_subvector_vec4_vec3.ll | 4 +- .../test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir | 4 +- .../fastregalloc-self-loop-heuristic.mir | 26 +- .../AMDGPU/flat-error-unsupported-gpu-hsa.ll | 2 +- .../CodeGen/AMDGPU/flat-load-clustering.mir | 12 +- .../CodeGen/AMDGPU/flat-scratch-fold-fi.mir | 20 +- llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir | 78 +- .../AMDGPU/fold-operands-remove-m0-redef.mir | 108 +- .../AMDGPU/fp-atomic-to-s_denormmode.mir | 64 +- .../AMDGPU/frame-lowering-fp-adjusted.mir | 2 +- llvm/test/CodeGen/AMDGPU/hard-clauses.mir | 20 +- .../i1_copy_phi_with_phi_incoming_value.mir | 4 +- .../AMDGPU/indirect-addressing-term.ll | 32 +- .../CodeGen/AMDGPU/insert-waitcnts-exp.mir | 8 +- .../CodeGen/AMDGPU/invert-br-undef-vcc.mir | 8 +- .../AMDGPU/legalize-fp-load-invariant.ll | 2 +- .../AMDGPU/limit-soft-clause-reg-pressure.mir | 46 +- .../CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll | 2 +- ...llvm.amdgcn.ds.gws.barrier-fastregalloc.ll | 2 +- .../AMDGPU/llvm.amdgcn.ds.gws.barrier.ll | 2 +- .../llvm.amdgcn.ds.gws.sema.release.all.ll | 2 +- .../CodeGen/AMDGPU/load-store-opt-scc.mir | 36 +- .../lower-control-flow-other-terminators.mir | 4 +- .../AMDGPU/mcp-overlap-after-propagation.mir | 4 +- llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll | 14 +- .../memory-legalizer-atomic-insert-end.mir | 8 +- .../memory-legalizer-invalid-addrspace.mir | 10 +- .../CodeGen/AMDGPU/memory-legalizer-local.mir | 316 +- ...egalizer-multiple-mem-operands-atomics.mir | 16 +- ...er-multiple-mem-operands-nontemporal-1.mir | 16 +- ...er-multiple-mem-operands-nontemporal-2.mir | 16 +- .../AMDGPU/memory-legalizer-region.mir | 316 +- llvm/test/CodeGen/AMDGPU/memory_clause.mir | 14 +- .../CodeGen/AMDGPU/merge-image-load-gfx10.mir | 248 +- llvm/test/CodeGen/AMDGPU/merge-image-load.mir | 238 +- .../AMDGPU/merge-image-sample-gfx10.mir | 520 +- .../CodeGen/AMDGPU/merge-image-sample.mir | 520 +- .../CodeGen/AMDGPU/merge-load-store-agpr.mir | 32 +- .../AMDGPU/merge-load-store-physreg.mir | 8 +- .../CodeGen/AMDGPU/merge-load-store-vreg.mir | 24 +- llvm/test/CodeGen/AMDGPU/merge-load-store.mir | 76 +- .../AMDGPU/merge-out-of-order-ldst.mir | 14 +- llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir | 776 +-- llvm/test/CodeGen/AMDGPU/nsa-reassign.mir | 32 +- llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir | 10 +- .../optimize-exec-mask-pre-ra-loop-phi.mir | 16 +- .../AMDGPU/pei-build-spill-partial-agpr.mir | 156 +- llvm/test/CodeGen/AMDGPU/pei-build-spill.mir | 1876 +++--- .../AMDGPU/pei-reg-scavenger-position.mir | 8 +- .../AMDGPU/pei-scavenge-sgpr-carry-out.mir | 4 +- .../CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir | 8 +- .../test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir | 4 +- .../AMDGPU/pei-scavenge-vgpr-spill.mir | 20 +- .../CodeGen/AMDGPU/phi-elimination-end-cf.mir | 2 +- .../CodeGen/AMDGPU/postra-bundle-memops.mir | 16 +- .../AMDGPU/regcoal-subrange-join-seg.mir | 4 +- llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir | 6 +- ...scing-remove-partial-redundancy-assert.mir | 6 +- ...ssert-dead-def-subreg-use-other-subreg.mir | 4 +- .../CodeGen/AMDGPU/sched-crash-dbg-value.mir | 14 +- ...dleMoveUp-subreg-def-across-subreg-def.mir | 48 +- .../AMDGPU/schedule-barrier-fpmode.mir | 32 +- .../CodeGen/AMDGPU/schedule-regpressure.mir | 2 +- .../AMDGPU/scheduler-handle-move-bundle.mir | 16 +- llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir | 8 +- llvm/test/CodeGen/AMDGPU/sdwa-ops.mir | 30 +- .../AMDGPU/sdwa-peephole-instr-gfx10.mir | 8 +- .../CodeGen/AMDGPU/sdwa-peephole-instr.mir | 12 +- llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir | 18 +- llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir | 20 +- llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 4 +- .../AMDGPU/sgpr-spill-partially-undef.mir | 4 +- .../AMDGPU/sgpr-spill-wrong-stack-id.mir | 34 +- llvm/test/CodeGen/AMDGPU/sgpr-spill.mir | 528 +- .../CodeGen/AMDGPU/si-fix-sgpr-copies.mir | 4 +- .../AMDGPU/skip-branch-taildup-ret.mir | 12 +- .../AMDGPU/smem-no-clause-coalesced.mir | 4 +- llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir | 2 +- .../CodeGen/AMDGPU/soft-clause-dbg-value.mir | 20 +- .../AMDGPU/spill-agpr-partially-undef.mir | 18 +- llvm/test/CodeGen/AMDGPU/spill-agpr.mir | 152 +- .../AMDGPU/spill-empty-live-interval.mir | 4 +- .../AMDGPU/spill-reg-tuple-super-reg-use.mir | 32 +- .../CodeGen/AMDGPU/spill-special-sgpr.mir | 36 +- llvm/test/CodeGen/AMDGPU/spill192.mir | 12 +- llvm/test/CodeGen/AMDGPU/spill224.mir | 12 +- .../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 56 +- .../AMDGPU/splitkit-copy-live-lanes.mir | 284 +- .../AMDGPU/splitkit-getsubrangeformask.ll | 176 +- .../AMDGPU/splitkit-nolivesubranges.mir | 4 +- .../stack-slot-color-sgpr-vgpr-spills.mir | 4 +- .../AMDGPU/subreg-split-live-in-error.mir | 8 +- ...ubreg-undef-def-with-other-subreg-defs.mir | 8 +- llvm/test/CodeGen/AMDGPU/swdev282079.mir | 12 +- llvm/test/CodeGen/AMDGPU/syncscopes.ll | 6 +- .../transform-block-with-return-to-epilog.ll | 4 +- .../unallocatable-bundle-regression.mir | 96 +- .../AMDGPU/undefined-physreg-sgpr-spill.mir | 14 +- .../AMDGPU/unexpected-reg-unit-state.mir | 4 +- .../CodeGen/AMDGPU/unsupported-image-a16.ll | 2 +- .../CodeGen/AMDGPU/unsupported-image-g16.ll | 2 +- .../CodeGen/AMDGPU/verify-ds-gws-align.mir | 28 +- .../vgpr-spill-dead-frame-in-dbg-value.mir | 4 +- llvm/test/CodeGen/AMDGPU/vgpr-spill.mir | 42 +- .../virtregrewrite-undef-identity-copy.mir | 8 +- .../CodeGen/AMDGPU/vmem-to-salu-hazard.mir | 2 +- llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir | 20 +- .../CodeGen/AMDGPU/waitcnt-back-edge-loop.mir | 8 +- .../CodeGen/AMDGPU/waitcnt-no-redundant.mir | 4 +- llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir | 16 +- llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir | 4 +- llvm/test/CodeGen/AMDGPU/waitcnt.mir | 16 +- llvm/test/CodeGen/AMDGPU/wqm.mir | 6 +- .../ARM/GlobalISel/arm-instruction-select.mir | 64 +- .../ARM/GlobalISel/arm-irtranslator.ll | 40 +- .../ARM/GlobalISel/arm-legalize-binops.mir | 48 +- .../ARM/GlobalISel/arm-legalize-cmp.mir | 8 +- .../ARM/GlobalISel/arm-legalize-consts.mir | 8 +- .../ARM/GlobalISel/arm-legalize-exts.mir | 22 +- .../GlobalISel/arm-legalize-load-store.mir | 70 +- .../ARM/GlobalISel/arm-legalize-vfp4.mir | 4 +- .../CodeGen/ARM/GlobalISel/arm-legalizer.mir | 4 +- .../ARM/GlobalISel/arm-param-lowering.ll | 54 +- .../ARM/GlobalISel/arm-regbankselect.mir | 32 +- .../ARM/GlobalISel/arm-select-globals-pic.mir | 28 +- .../arm-select-globals-ropi-rwpi.mir | 28 +- .../GlobalISel/arm-select-globals-static.mir | 8 +- .../irtranslator-varargs-lowering.ll | 8 +- .../ARM/GlobalISel/select-fp-const.mir | 16 +- .../test/CodeGen/ARM/GlobalISel/select-fp.mir | 10 +- .../ARM/GlobalISel/thumb-select-exts.mir | 36 +- .../GlobalISel/thumb-select-globals-pic.mir | 28 +- .../thumb-select-globals-ropi-rwpi.mir | 28 +- .../thumb-select-globals-static.mir | 12 +- .../GlobalISel/thumb-select-load-store.mir | 40 +- llvm/test/CodeGen/ARM/cmp2-peephole-thumb.mir | 8 +- llvm/test/CodeGen/ARM/cmpxchg.mir | 2 +- .../CodeGen/ARM/cmse-clear-float-bigend.mir | 16 +- .../CodeGen/ARM/cmse-vlldm-no-reorder.mir | 4 +- llvm/test/CodeGen/ARM/codesize-ifcvt.mir | 18 +- .../CodeGen/ARM/const-load-align-thumb.mir | 6 +- .../ARM/constant-island-SOImm-limit16.mir | 4 +- .../CodeGen/ARM/constant-island-movwt.mir | 108 +- .../CodeGen/ARM/constant-islands-split-IT.mir | 16 +- llvm/test/CodeGen/ARM/fold-sext-sextload.ll | 2 +- llvm/test/CodeGen/ARM/fold-zext-zextload.ll | 2 +- llvm/test/CodeGen/ARM/fp16-litpool-arm.mir | 16 +- llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir | 14 +- llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir | 4 +- llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir | 4 +- llvm/test/CodeGen/ARM/fpoffset_overflow.mir | 12 +- llvm/test/CodeGen/ARM/ldrd-memoper.ll | 2 +- .../ARM/load_store_opt_clobber_cpsr.mir | 16 +- .../CodeGen/ARM/load_store_opt_reg_limit.mir | 66 +- .../CodeGen/ARM/machine-sink-multidef.mir | 8 +- .../CodeGen/ARM/misched-int-basic-thumb2.mir | 10 +- .../nonreserved-callframe-with-basereg.mir | 4 +- llvm/test/CodeGen/ARM/pei-swiftself.mir | 2 +- .../CodeGen/ARM/pr42638-VMOVRRDCombine.ll | 6 +- llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir | 8 +- llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir | 18 +- .../ARM/register-scavenger-exceptions.mir | 4 +- llvm/test/CodeGen/ARM/single-issue-r52.mir | 4 +- llvm/test/CodeGen/ARM/store-prepostinc.mir | 80 +- .../test/CodeGen/ARM/v6-jumptable-clobber.mir | 8 +- llvm/test/CodeGen/ARM/vldm-liveness.mir | 14 +- llvm/test/CodeGen/ARM/vldmia-sched.mir | 4 +- .../check-line-and-variables-x.mir | 2 +- .../MIRDebugify/check-line-and-variables.mir | 18 +- llvm/test/CodeGen/Hexagon/addrmode-immop.mir | 2 +- .../CodeGen/Hexagon/bank-conflict-load.mir | 6 +- llvm/test/CodeGen/Hexagon/bank-conflict.mir | 14 +- .../CodeGen/Hexagon/cext-opt-stack-no-rr.mir | 8 +- .../Hexagon/early-if-conv-lifetime.mir | 2 +- .../CodeGen/Hexagon/early-if-predicator.mir | 2 +- .../CodeGen/Hexagon/ifcvt-diamond-ret.mir | 4 +- .../Hexagon/livephysregs-regmask-clobber.mir | 8 +- .../CodeGen/Hexagon/packetize-dccleana.mir | 2 +- .../Hexagon/packetize-load-store-aliasing.mir | 8 +- .../CodeGen/Hexagon/packetize-nvstore.mir | 4 +- .../Hexagon/packetize-update-offset.mir | 4 +- .../Hexagon/pipeliner/swp-phi-start.mir | 38 +- .../CodeGen/Hexagon/post-inc-aa-metadata.ll | 2 +- .../CodeGen/Hexagon/postinc-baseoffset.mir | 4 +- .../CodeGen/Hexagon/regalloc-bad-undef.mir | 2 +- .../test/CodeGen/Hexagon/swp-carried-dep1.mir | 30 +- .../test/CodeGen/Hexagon/swp-carried-dep2.mir | 20 +- llvm/test/CodeGen/Lanai/peephole-compare.mir | 4 +- .../MIR/AArch64/addrspace-memoperands.mir | 20 +- .../MIR/AArch64/atomic-memoperands.mir | 24 +- .../CodeGen/MIR/AArch64/base-memoperands.mir | 36 +- .../CodeGen/MIR/AArch64/machine-metadata.mir | 48 +- .../MIR/AArch64/mirCanonCopyCopyProp.mir | 4 +- .../MIR/AArch64/mirCanonIdempotent.mir | 40 +- llvm/test/CodeGen/MIR/AArch64/mirnamer.mir | 16 +- .../MIR/AArch64/stack-object-local-offset.mir | 6 +- llvm/test/CodeGen/MIR/AArch64/swp.mir | 4 +- .../MIR/AArch64/target-memoperands.mir | 16 +- .../MIR/AMDGPU/custom-pseudo-source-values.ll | 6 +- .../CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir | 36 +- .../CodeGen/MIR/AMDGPU/machine-metadata.mir | 24 +- .../MIR/AMDGPU/mircanon-memoperands.mir | 12 +- .../MIR/AMDGPU/parse-order-reserved-regs.mir | 4 +- llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir | 24 +- .../test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir | 8 +- .../MIR/Hexagon/bundled-call-site-info.mir | 2 +- ...lobal-value-or-symbol-after-call-entry.mir | 4 +- .../test/CodeGen/MIR/Mips/memory-operands.mir | 12 +- .../CodeGen/MIR/Mips/setRegClassOrRegBank.ll | 4 +- .../CodeGen/MIR/Mips/setRegClassOrRegBank.mir | 8 +- .../PowerPC/unordered-implicit-registers.mir | 2 +- .../MIR/X86/branch-folder-with-label.mir | 8 +- llvm/test/CodeGen/MIR/X86/diexpr-win32.mir | 20 +- .../MIR/X86/duplicate-memory-operand-flag.mir | 4 +- .../X86/expected-align-in-memory-operand.mir | 10 +- ...lignment-after-align-in-memory-operand.mir | 10 +- .../expected-comma-after-memory-operand.mir | 4 +- ...expected-metadata-node-in-stack-object.mir | 2 +- ...pected-pointer-value-in-memory-operand.mir | 4 +- ...xpected-positive-alignment-after-align.mir | 10 +- .../X86/expected-power-of-2-after-align.mir | 4 +- ...d-size-integer-after-memory-operation2.mir | 2 +- .../X86/expected-value-in-memory-operand.mir | 4 +- .../MIR/X86/fixed-stack-memory-operands.mir | 6 +- .../MIR/X86/frame-info-stack-references.mir | 2 +- .../test/CodeGen/MIR/X86/machine-metadata.mir | 72 +- llvm/test/CodeGen/MIR/X86/memory-operands.mir | 156 +- .../CodeGen/MIR/X86/mir-canon-hash-bb.mir | 10 +- llvm/test/CodeGen/MIR/X86/roundtrip.mir | 4 +- .../X86/undefined-value-in-memory-operand.mir | 4 +- .../MIR/X86/unknown-metadata-keyword.mir | 6 +- .../GlobalISel/instruction-select/add_vec.mir | 48 +- .../instruction-select/fabs_vec.mir | 16 +- .../GlobalISel/instruction-select/fence.mir | 4 +- ...oating_point_vec_arithmetic_operations.mir | 96 +- .../instruction-select/fsqrt_vec.mir | 16 +- .../instruction-select/gloal_address_pic.mir | 16 +- .../jump_table_and_brjt.mir | 12 +- .../GlobalISel/instruction-select/load.mir | 18 +- .../instruction-select/load_4_unaligned.mir | 14 +- .../load_4_unaligned_r6.mir | 12 +- .../instruction-select/load_store_fold.mir | 32 +- .../instruction-select/load_store_vec.mir | 48 +- .../GlobalISel/instruction-select/mul.mir | 8 +- .../GlobalISel/instruction-select/mul_vec.mir | 48 +- .../GlobalISel/instruction-select/phi.mir | 18 +- .../instruction-select/pointers.mir | 12 +- .../instruction-select/rem_and_div_vec.mir | 192 +- .../GlobalISel/instruction-select/select.mir | 6 +- .../instruction-select/stack_args.mir | 8 +- .../GlobalISel/instruction-select/store.mir | 18 +- .../instruction-select/store_4_unaligned.mir | 14 +- .../store_4_unaligned_r6.mir | 12 +- .../GlobalISel/instruction-select/sub_vec.mir | 48 +- .../truncStore_and_aExtLoad.mir | 24 +- .../GlobalISel/instruction-select/var_arg.mir | 46 +- .../zextLoad_and_sextLoad.mir | 16 +- .../irtranslator/aggregate_struct_return.ll | 36 +- .../Mips/GlobalISel/irtranslator/call.ll | 4 +- .../GlobalISel/irtranslator/extend_args.ll | 36 +- .../Mips/GlobalISel/irtranslator/pointers.ll | 6 +- .../GlobalISel/irtranslator/split_args.ll | 14 +- .../GlobalISel/irtranslator/sret_pointer.ll | 6 +- .../GlobalISel/irtranslator/stack_args.ll | 6 +- .../Mips/GlobalISel/irtranslator/var_arg.ll | 20 +- .../CodeGen/Mips/GlobalISel/legalizer/add.mir | 24 +- .../Mips/GlobalISel/legalizer/add_vec.mir | 48 +- .../GlobalISel/legalizer/add_vec_builtin.mir | 80 +- .../GlobalISel/legalizer/dyn_stackalloc.mir | 10 +- .../Mips/GlobalISel/legalizer/fabs_vec.mir | 16 +- .../GlobalISel/legalizer/fabs_vec_builtin.mir | 16 +- .../Mips/GlobalISel/legalizer/fence.mir | 4 +- ...oating_point_vec_arithmetic_operations.mir | 96 +- ...oint_vec_arithmetic_operations_builtin.mir | 96 +- .../Mips/GlobalISel/legalizer/fsqrt_vec.mir | 16 +- .../legalizer/fsqrt_vec_builtin.mir | 16 +- .../Mips/GlobalISel/legalizer/load.mir | 16 +- .../GlobalISel/legalizer/load_4_unaligned.mir | 48 +- .../GlobalISel/legalizer/load_store_vec.mir | 48 +- .../CodeGen/Mips/GlobalISel/legalizer/mul.mir | 24 +- .../Mips/GlobalISel/legalizer/mul_vec.mir | 48 +- .../GlobalISel/legalizer/mul_vec_builtin.mir | 48 +- .../CodeGen/Mips/GlobalISel/legalizer/phi.mir | 36 +- .../Mips/GlobalISel/legalizer/pointers.mir | 12 +- .../GlobalISel/legalizer/rem_and_div_vec.mir | 192 +- .../legalizer/rem_and_div_vec_builtin.mir | 192 +- .../Mips/GlobalISel/legalizer/select.mir | 12 +- .../Mips/GlobalISel/legalizer/stack_args.mir | 8 +- .../Mips/GlobalISel/legalizer/store.mir | 16 +- .../legalizer/store_4_unaligned.mir | 48 +- ...tore_split_because_of_memsize_or_align.mir | 272 +- .../CodeGen/Mips/GlobalISel/legalizer/sub.mir | 16 +- .../Mips/GlobalISel/legalizer/sub_vec.mir | 48 +- .../GlobalISel/legalizer/sub_vec_builtin.mir | 80 +- .../legalizer/truncStore_and_aExtLoad.mir | 40 +- .../Mips/GlobalISel/legalizer/var_arg.mir | 46 +- .../legalizer/zextLoad_and_sextLoad.mir | 40 +- .../inline-memcpy.mir | 10 +- .../truncStore_and_aExtLoad.mir | 8 +- .../zextLoad_and_sextLoad.mir | 40 +- .../TypeInfoforMF_skipCopies.mir | 16 +- .../Mips/GlobalISel/regbankselect/add_vec.mir | 48 +- .../GlobalISel/regbankselect/fabs_vec.mir | 16 +- .../Mips/GlobalISel/regbankselect/fence.mir | 4 +- ...oating_point_vec_arithmetic_operations.mir | 96 +- .../GlobalISel/regbankselect/fsqrt_vec.mir | 16 +- .../Mips/GlobalISel/regbankselect/load.mir | 36 +- .../regbankselect/load_4_unaligned.mir | 18 +- .../regbankselect/load_store_vec.mir | 48 +- .../long_ambiguous_chain_s32.mir | 228 +- .../long_ambiguous_chain_s64.mir | 228 +- .../Mips/GlobalISel/regbankselect/mul.mir | 8 +- .../Mips/GlobalISel/regbankselect/mul_vec.mir | 48 +- .../Mips/GlobalISel/regbankselect/phi.mir | 36 +- .../GlobalISel/regbankselect/pointers.mir | 12 +- .../regbankselect/rem_and_div_vec.mir | 192 +- .../Mips/GlobalISel/regbankselect/select.mir | 36 +- .../GlobalISel/regbankselect/stack_args.mir | 8 +- .../Mips/GlobalISel/regbankselect/store.mir | 20 +- .../regbankselect/store_4_unaligned.mir | 18 +- .../Mips/GlobalISel/regbankselect/sub_vec.mir | 48 +- .../regbankselect/test_TypeInfoforMF.mir | 40 +- .../regbankselect/truncStore_and_aExtLoad.mir | 24 +- .../Mips/GlobalISel/regbankselect/var_arg.mir | 48 +- .../regbankselect/zextLoad_and_sextLoad.mir | 24 +- llvm/test/CodeGen/Mips/address-selection.ll | 4 +- .../compact-branch-implicit-def.mir | 26 +- .../Mips/compactbranches/empty-block.mir | 4 +- llvm/test/CodeGen/Mips/copy-fp64.ll | 4 +- .../Mips/delay-slot-filler-bundled-insts.mir | 8 +- llvm/test/CodeGen/Mips/micromips-eva.mir | 24 +- .../Mips/micromips-short-delay-slot.mir | 4 +- .../micromips-lwp-swp.mir | 56 +- .../micromips-movep.mir | 16 +- .../micromips-no-lwp-swp.mir | 32 +- .../mirparser/target-flags-pic-mxgot-tls.mir | 32 +- .../Mips/mirparser/target-flags-pic-o32.mir | 10 +- .../Mips/mirparser/target-flags-pic.mir | 10 +- .../mirparser/target-flags-static-tls.mir | 14 +- .../test/CodeGen/Mips/msa/emergency-spill.mir | 146 +- .../CodeGen/Mips/unaligned-memops-mapping.mir | 16 +- llvm/test/CodeGen/Mips/unaligned-memops.ll | 16 +- .../CodeGen/PowerPC/2013-07-01-PHIElimBug.mir | 8 +- ...HoistingDueToBlockHotnessNoProfileData.mir | 14 +- ...leHoistingDueToBlockHotnessProfileData.mir | 14 +- .../NoCRFieldRedefWhenSpillingCRBIT.mir | 10 +- llvm/test/CodeGen/PowerPC/aantidep-def-ec.mir | 14 +- .../CodeGen/PowerPC/addisdtprelha-nonr3.mir | 8 +- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 738 +-- llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll | 62 +- .../CodeGen/PowerPC/aix-cc-byval-split.ll | 32 +- llvm/test/CodeGen/PowerPC/aix-cc-byval.ll | 266 +- .../CodeGen/PowerPC/aix-csr-vector-extabi.ll | 96 +- llvm/test/CodeGen/PowerPC/aix-csr-vector.ll | 48 +- llvm/test/CodeGen/PowerPC/aix-csr.ll | 84 +- .../CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll | 18 +- .../test/CodeGen/PowerPC/aix-indirect-call.ll | 30 +- .../PowerPC/aix-lower-block-address.ll | 8 +- .../PowerPC/aix-lower-constant-pool-index.ll | 16 +- .../CodeGen/PowerPC/aix-lower-jump-table.ll | 16 +- .../CodeGen/PowerPC/aix-vec-arg-spills-mir.ll | 54 +- .../PowerPC/aix-vector-vararg-caller.ll | 132 +- .../PowerPC/aix-vector-vararg-fixed-caller.ll | 46 +- .../CodeGen/PowerPC/aix-xcoff-reloc-symb.mir | 2 +- .../CodeGen/PowerPC/aix32-cc-abi-vaarg.ll | 64 +- llvm/test/CodeGen/PowerPC/aix32-crsave.mir | 8 +- .../aix32-vector-vararg-callee-split.ll | 6 +- .../PowerPC/aix32-vector-vararg-callee.ll | 18 +- .../aix32-vector-vararg-caller-split.ll | 18 +- .../aix32-vector-vararg-fixed-callee.ll | 4 +- .../CodeGen/PowerPC/aix64-cc-abi-vaarg.ll | 60 +- llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll | 32 +- .../PowerPC/aix64-vector-vararg-callee.ll | 18 +- .../aix64-vector-vararg-fixed-callee.ll | 10 +- .../CodeGen/PowerPC/block-placement-1.mir | 8 +- llvm/test/CodeGen/PowerPC/block-placement.mir | 80 +- llvm/test/CodeGen/PowerPC/botheightreduce.mir | 36 +- llvm/test/CodeGen/PowerPC/byval-agg-info.ll | 4 +- ...rt-rr-to-ri-instrs-R0-special-handling.mir | 6 +- .../PowerPC/convert-rr-to-ri-instrs.mir | 1586 ++--- llvm/test/CodeGen/PowerPC/kernel-fp-round.ll | 6 +- llvm/test/CodeGen/PowerPC/ldst-16-byte.mir | 72 +- llvm/test/CodeGen/PowerPC/ldst-align.ll | 6 +- llvm/test/CodeGen/PowerPC/licm-tocReg.ll | 24 +- llvm/test/CodeGen/PowerPC/livephysregs.mir | 8 +- llvm/test/CodeGen/PowerPC/livevars-crash1.mir | 20 +- llvm/test/CodeGen/PowerPC/livevars-crash2.mir | 20 +- .../CodeGen/PowerPC/lower-globaladdr32-aix.ll | 16 +- .../CodeGen/PowerPC/lower-globaladdr64-aix.ll | 16 +- .../PowerPC/no-rlwimi-trivial-commute.mir | 8 +- llvm/test/CodeGen/PowerPC/nofpexcept.ll | 14 +- .../test/CodeGen/PowerPC/peephole-phi-acc.mir | 16 +- llvm/test/CodeGen/PowerPC/phi-eliminate.mir | 4 +- llvm/test/CodeGen/PowerPC/ppc64-crsave.mir | 8 +- .../PowerPC/remove-copy-crunsetcrbit.mir | 12 +- .../remove-redundant-li-skip-imp-kill.mir | 20 +- llvm/test/CodeGen/PowerPC/scavenging.mir | 4 +- .../CodeGen/PowerPC/schedule-addi-load.mir | 16 +- llvm/test/CodeGen/PowerPC/setcr_bc.mir | 4 +- llvm/test/CodeGen/PowerPC/setcr_bc2.mir | 4 +- llvm/test/CodeGen/PowerPC/setcr_bc3.mir | 4 +- .../PowerPC/sink-down-more-instructions-1.mir | 24 +- ...own-more-instructions-regpressure-high.mir | 252 +- .../CodeGen/PowerPC/stack-coloring-vararg.mir | 44 +- .../CodeGen/PowerPC/tls_get_addr_fence1.mir | 2 +- .../CodeGen/PowerPC/tls_get_addr_fence2.mir | 2 +- llvm/test/CodeGen/PowerPC/toc-data.ll | 8 +- .../CodeGen/PowerPC/topdepthreduce-postra.mir | 8 +- .../CodeGen/PowerPC/two-address-crash.mir | 4 +- .../test/CodeGen/PowerPC/unal-vec-negarith.ll | 4 +- llvm/test/CodeGen/RISCV/copy-frameindex.mir | 4 +- llvm/test/CodeGen/RISCV/disjoint.ll | 4 +- .../RISCV/rvv/addi-scalable-offset.mir | 8 +- .../test/CodeGen/RISCV/rvv/emergency-slot.mir | 64 +- .../test/CodeGen/RISCV/rvv/frameindex-addr.ll | 2 +- .../RISCV/rvv/tail-agnostic-impdef-copy.mir | 4 +- .../test/CodeGen/RISCV/rvv/vsetvli-insert.mir | 16 +- .../RISCV/rvv/wrong-stack-slot-rv32.mir | 2 +- .../RISCV/rvv/wrong-stack-slot-rv64.mir | 2 +- llvm/test/CodeGen/RISCV/vector-abi.ll | 16 +- llvm/test/CodeGen/SPARC/fp128-split.ll | 16 +- .../SystemZ/RAbasic-invalid-LR-update.mir | 4 +- .../SystemZ/branch-folder-hoist-livein.mir | 10 +- .../SystemZ/clear-liverange-spillreg.mir | 230 +- .../SystemZ/combine_loads_from_build_pair.ll | 8 +- llvm/test/CodeGen/SystemZ/cond-move-05.mir | 2 +- llvm/test/CodeGen/SystemZ/cond-move-08.mir | 6 +- .../SystemZ/cond-move-regalloc-hints-02.mir | 2 +- .../SystemZ/cond-move-regalloc-hints.mir | 10 +- llvm/test/CodeGen/SystemZ/debuginstr-01.mir | 2 +- .../test/CodeGen/SystemZ/foldmemop-imm-02.mir | 8 +- llvm/test/CodeGen/SystemZ/foldmemop-msc.mir | 8 +- .../CodeGen/SystemZ/foldmemop-vec-binops.mir | 48 +- .../test/CodeGen/SystemZ/foldmemop-vec-cc.mir | 2 +- .../CodeGen/SystemZ/foldmemop-vec-cmp.mir | 26 +- .../CodeGen/SystemZ/foldmemop-vec-fusedfp.mir | 32 +- llvm/test/CodeGen/SystemZ/fp-cmp-07.mir | 2 +- llvm/test/CodeGen/SystemZ/fp-conv-17.mir | 102 +- llvm/test/CodeGen/SystemZ/frame-26.mir | 70 +- llvm/test/CodeGen/SystemZ/int-cmp-56.mir | 16 +- llvm/test/CodeGen/SystemZ/int-cmp-59.ll | 2 +- llvm/test/CodeGen/SystemZ/isel-debug.ll | 2 +- .../SystemZ/load-and-test-RA-hints.mir | 6 +- llvm/test/CodeGen/SystemZ/multiselect-02.mir | 4 +- .../SystemZ/postra-sched-expandedops.mir | 14 +- .../CodeGen/SystemZ/regalloc-GR128-02.mir | 4 +- ...gcoal-undef-lane-4-rm-cp-commuting-def.mir | 4 +- .../CodeGen/SystemZ/subregliveness-06.mir | 58 +- .../CodeGen/SystemZ/subregliveness-07.mir | 4 +- llvm/test/CodeGen/Thumb/PR36658.mir | 6 +- .../CodeGen/Thumb/machine-cse-deadreg.mir | 8 +- llvm/test/CodeGen/Thumb/tbb-reuse.mir | 84 +- .../Thumb2/LowOverheadLoops/add_reduce.mir | 24 +- .../begin-vpt-without-inst.mir | 8 +- .../biquad-cascade-default.mir | 104 +- .../biquad-cascade-optsize-strd-lr.mir | 92 +- .../biquad-cascade-optsize.mir | 104 +- .../Thumb2/LowOverheadLoops/cond-mov.mir | 8 +- .../count_dominates_start.mir | 8 +- .../LowOverheadLoops/ctlz-non-zeros.mir | 48 +- .../Thumb2/LowOverheadLoops/disjoint-vcmp.mir | 28 +- .../LowOverheadLoops/dont-ignore-vctp.mir | 4 +- .../dont-remove-loop-update.mir | 12 +- .../Thumb2/LowOverheadLoops/emptyblock.mir | 78 +- .../LowOverheadLoops/end-positive-offset.mir | 92 +- .../LowOverheadLoops/extract-element.mir | 8 +- .../LowOverheadLoops/incorrect-sub-16.mir | 12 +- .../LowOverheadLoops/incorrect-sub-32.mir | 12 +- .../LowOverheadLoops/incorrect-sub-8.mir | 12 +- .../LowOverheadLoops/inloop-vpnot-1.mir | 28 +- .../LowOverheadLoops/inloop-vpnot-2.mir | 28 +- .../LowOverheadLoops/inloop-vpnot-3.mir | 28 +- .../LowOverheadLoops/inloop-vpsel-1.mir | 20 +- .../LowOverheadLoops/inloop-vpsel-2.mir | 20 +- .../LowOverheadLoops/invariant-qreg.mir | 28 +- .../LowOverheadLoops/it-block-chain-store.mir | 24 +- .../LowOverheadLoops/it-block-chain.mir | 4 +- .../LowOverheadLoops/it-block-itercount.mir | 4 +- .../LowOverheadLoops/it-block-random.mir | 4 +- .../iv-two-vcmp-reordered.mir | 12 +- .../Thumb2/LowOverheadLoops/iv-two-vcmp.mir | 12 +- .../Thumb2/LowOverheadLoops/iv-vcmp.mir | 12 +- .../LowOverheadLoops/livereg-no-loop-def.mir | 16 +- .../LowOverheadLoops/loop-dec-copy-chain.mir | 112 +- .../loop-dec-copy-prev-iteration.mir | 112 +- .../LowOverheadLoops/loop-dec-liveout.mir | 112 +- .../lstp-insertion-position.mir | 24 +- .../Thumb2/LowOverheadLoops/massive.mir | 12 +- .../Thumb2/LowOverheadLoops/matrix-debug.mir | 16 +- .../Thumb2/LowOverheadLoops/matrix.mir | 24 +- .../Thumb2/LowOverheadLoops/mov-after-dls.mir | 8 +- .../LowOverheadLoops/mov-after-dlstp.mir | 12 +- .../LowOverheadLoops/mov-lr-terminator.mir | 12 +- .../move-def-before-start.mir | 12 +- .../LowOverheadLoops/move-start-after-def.mir | 12 +- .../LowOverheadLoops/multiblock-massive.mir | 16 +- .../LowOverheadLoops/multiple-do-loops.mir | 100 +- .../Thumb2/LowOverheadLoops/no-dec-cbnz.mir | 48 +- .../LowOverheadLoops/no-dec-reorder.mir | 32 +- .../Thumb2/LowOverheadLoops/no-dec.mir | 48 +- .../LowOverheadLoops/no-vpsel-liveout.mir | 8 +- .../LowOverheadLoops/non-masked-load.mir | 8 +- .../LowOverheadLoops/non-masked-store.mir | 12 +- .../LowOverheadLoops/out-of-range-cbz.mir | 64 +- .../LowOverheadLoops/predicated-invariant.mir | 8 +- .../LowOverheadLoops/predicated-liveout.mir | 8 +- .../reductions-vpt-liveout.mir | 48 +- .../LowOverheadLoops/remove-elem-moves.mir | 16 +- .../LowOverheadLoops/revert-non-header.mir | 8 +- .../LowOverheadLoops/revert-non-loop.mir | 4 +- .../Thumb2/LowOverheadLoops/revert-while.mir | 8 +- .../LowOverheadLoops/revertcallearly.mir | 8 +- .../LowOverheadLoops/safe-def-no-mov.mir | 8 +- .../LowOverheadLoops/safe-retaining.mir | 32 +- .../Thumb2/LowOverheadLoops/size-limit.mir | 12 +- .../Thumb2/LowOverheadLoops/skip-debug.mir | 12 +- .../LowOverheadLoops/skip-vpt-debug.mir | 12 +- .../Thumb2/LowOverheadLoops/switch.mir | 2 +- .../LowOverheadLoops/unpredicated-max.mir | 8 +- .../LowOverheadLoops/unrolled-and-vector.mir | 96 +- .../LowOverheadLoops/unsafe-cpsr-loop-def.mir | 8 +- .../LowOverheadLoops/unsafe-cpsr-loop-use.mir | 8 +- .../LowOverheadLoops/unsafe-retaining.mir | 32 +- .../LowOverheadLoops/unsafe-use-after.mir | 8 +- .../CodeGen/Thumb2/LowOverheadLoops/vaddv.mir | 164 +- .../vctp-add-operand-liveout.mir | 8 +- .../Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir | 20 +- .../Thumb2/LowOverheadLoops/vctp-in-vpt.mir | 48 +- .../Thumb2/LowOverheadLoops/vctp-subi3.mir | 12 +- .../Thumb2/LowOverheadLoops/vctp-subri.mir | 12 +- .../Thumb2/LowOverheadLoops/vctp-subri12.mir | 12 +- .../Thumb2/LowOverheadLoops/vctp16-reduce.mir | 12 +- .../LowOverheadLoops/vmaxmin_vpred_r.mir | 24 +- .../LowOverheadLoops/vmldava_in_vpt.mir | 20 +- .../Thumb2/LowOverheadLoops/vpt-blocks.mir | 28 +- .../while-negative-offset.mir | 48 +- .../CodeGen/Thumb2/LowOverheadLoops/while.mir | 8 +- .../LowOverheadLoops/wls-search-killed.mir | 12 +- .../LowOverheadLoops/wls-search-pred.mir | 8 +- .../CodeGen/Thumb2/LowOverheadLoops/wlstp.mir | 32 +- .../wrong-liveout-lsr-shift.mir | 12 +- .../wrong-vctp-opcode-liveout.mir | 8 +- .../wrong-vctp-operand-liveout.mir | 8 +- llvm/test/CodeGen/Thumb2/block-placement.mir | 4 +- .../CodeGen/Thumb2/constant-islands-cbz.mir | 32 +- .../Thumb2/constant-islands-cbzundef.mir | 8 +- .../CodeGen/Thumb2/constant-islands-ldrsb.mir | 8 +- llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir | 8 +- .../Thumb2/frame-index-addrmode-t2i8s4.mir | 8 +- llvm/test/CodeGen/Thumb2/high-reg-spill.mir | 4 +- .../CodeGen/Thumb2/ifcvt-dead-predicate.mir | 4 +- llvm/test/CodeGen/Thumb2/m4-sched-ldr.mir | 4 +- .../CodeGen/Thumb2/mve-gatherscatter-mmo.ll | 24 +- .../CodeGen/Thumb2/mve-postinc-distribute.mir | 464 +- llvm/test/CodeGen/Thumb2/mve-stacksplot.mir | 16 +- .../Thumb2/mve-vpt-block-fold-vcmp.mir | 36 +- .../CodeGen/Thumb2/postinc-distribute.mir | 56 +- llvm/test/CodeGen/Thumb2/scavenge-lr.mir | 82 +- llvm/test/CodeGen/Thumb2/store-prepostinc.mir | 128 +- llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir | 48 +- llvm/test/CodeGen/Thumb2/tbb-removeadd.mir | 4 +- .../GlobalISel/irtranslator-callingconv.ll | 202 +- .../GlobalISel/legalize-memop-scalar-32.mir | 56 +- .../GlobalISel/legalize-memop-scalar-64.mir | 48 +- .../X86/GlobalISel/legalize-mul-scalar.mir | 4 +- .../X86/GlobalISel/legalize-or-scalar.mir | 4 +- .../X86/GlobalISel/legalize-ptr-add.mir | 16 +- .../CodeGen/X86/GlobalISel/legalize-sub.mir | 4 +- .../CodeGen/X86/GlobalISel/legalize-trunc.mir | 18 +- .../CodeGen/X86/GlobalISel/legalize-undef.mir | 32 +- .../X86/GlobalISel/legalize-xor-scalar.mir | 2 +- .../X86/GlobalISel/regbankselect-AVX2.mir | 4 +- .../X86/GlobalISel/regbankselect-AVX512.mir | 4 +- .../X86/GlobalISel/regbankselect-X86_64.mir | 66 +- .../CodeGen/X86/GlobalISel/select-GV-32.mir | 12 +- .../CodeGen/X86/GlobalISel/select-GV-64.mir | 12 +- .../X86/GlobalISel/select-constant.mir | 4 +- .../X86/GlobalISel/select-fconstant.mir | 4 +- .../select-memop-scalar-unordered.mir | 200 +- .../GlobalISel/select-memop-scalar-x32.mir | 80 +- .../X86/GlobalISel/select-memop-scalar.mir | 200 +- .../X86/GlobalISel/select-memop-v128.mir | 40 +- .../X86/GlobalISel/select-memop-v256.mir | 32 +- .../X86/GlobalISel/select-memop-v512.mir | 16 +- .../X86/GlobalISel/x86-legalize-inttoptr.mir | 4 +- .../X86/GlobalISel/x86-legalize-ptrtoint.mir | 16 +- .../X86/GlobalISel/x86-legalize-srem.mir | 24 +- .../X86/GlobalISel/x86-legalize-urem.mir | 24 +- .../X86/GlobalISel/x86-select-inttoptr.mir | 4 +- .../X86/GlobalISel/x86-select-ptrtoint.mir | 16 +- .../X86/GlobalISel/x86-select-srem.mir | 24 +- .../X86/GlobalISel/x86-select-udiv.mir | 24 +- .../X86/GlobalISel/x86-select-urem.mir | 24 +- .../x86_64-irtranslator-struct-return.ll | 58 +- .../X86/StackColoring-use-between-allocas.mir | 16 +- llvm/test/CodeGen/X86/adx-commute.mir | 16 +- .../CodeGen/X86/avoid-sfb-g-no-change.mir | 12 +- .../CodeGen/X86/avoid-sfb-g-no-change2.mir | 6 +- .../CodeGen/X86/avoid-sfb-g-no-change3.mir | 6 +- .../test/CodeGen/X86/avoid-sfb-kill-flags.mir | 24 +- llvm/test/CodeGen/X86/avoid-sfb-offset.mir | 66 +- llvm/test/CodeGen/X86/avx512f-256-set0.mir | 4 +- llvm/test/CodeGen/X86/bad-tls-fold.mir | 12 +- .../X86/basic-block-sections-mir-parse.mir | 10 +- llvm/test/CodeGen/X86/block-placement.mir | 4 +- llvm/test/CodeGen/X86/bug47278.mir | 12 +- llvm/test/CodeGen/X86/cf-opt-memops.mir | 12 +- .../X86/conditional-tailcall-samedest.mir | 56 +- .../CodeGen/X86/copy-eflags-liveinlists.mir | 12 +- llvm/test/CodeGen/X86/domain-reassignment.mir | 4 +- .../CodeGen/X86/extend-set-cc-uses-dbg.ll | 4 +- .../fast-regalloc-live-out-debug-values.mir | 42 +- llvm/test/CodeGen/X86/fixup-bw-inst.mir | 4 +- llvm/test/CodeGen/X86/fold-sext-trunc.ll | 2 +- llvm/test/CodeGen/X86/fp-intrinsics-flags.ll | 64 +- .../test/CodeGen/X86/implicit-null-checks.mir | 50 +- .../isel-postprocessing-test-fold-memop.ll | 2 +- llvm/test/CodeGen/X86/lea-opt-with-debug.mir | 12 +- llvm/test/CodeGen/X86/limit-split-cost.mir | 4 +- .../CodeGen/X86/lvi-hardening-gadget-graph.ll | 30 +- llvm/test/CodeGen/X86/machine-cp-mask-reg.mir | 4 +- llvm/test/CodeGen/X86/memcpy-scoped-aa.ll | 36 +- .../X86/merge-store-partially-alias-loads.ll | 8 +- llvm/test/CodeGen/X86/movtopush.mir | 26 +- .../CodeGen/X86/non-value-mem-operand.mir | 48 +- llvm/test/CodeGen/X86/opt_phis2.mir | 2 +- .../test/CodeGen/X86/peephole-fold-testrr.mir | 8 +- llvm/test/CodeGen/X86/peephole-recurrence.mir | 2 +- .../CodeGen/X86/post-ra-sched-with-debug.mir | 22 +- llvm/test/CodeGen/X86/pr27681.mir | 6 +- llvm/test/CodeGen/X86/pr30821.mir | 144 +- llvm/test/CodeGen/X86/pr46827.ll | 2 +- llvm/test/CodeGen/X86/pr48064.mir | 38 +- llvm/test/CodeGen/X86/pre-coalesce.mir | 10 +- .../CodeGen/X86/prologepilog_deref_size.mir | 2 +- .../regalloc-fast-missing-live-out-spill.mir | 14 +- .../CodeGen/X86/shrink_wrap_dbg_value.mir | 4 +- llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll | 20 +- llvm/test/CodeGen/X86/stack-folding-adx.mir | 80 +- llvm/test/CodeGen/X86/stack-folding-bmi2.mir | 16 +- .../X86/stack-folding-fp-nofpexcept.mir | 4 +- .../CodeGen/X86/stack-protector-weight.ll | 12 +- .../CodeGen/X86/statepoint-fixup-call.mir | 6 +- .../X86/statepoint-fixup-copy-prop-neg.mir | 14 +- .../CodeGen/X86/statepoint-fixup-invoke.mir | 14 +- .../X86/statepoint-fixup-shared-ehpad.mir | 24 +- .../X86/statepoint-fixup-undef-def.mir | 24 +- .../CodeGen/X86/statepoint-fixup-undef.mir | 24 +- .../X86/statepoint-invoke-ra-enter-at-end.mir | 42 +- .../X86/statepoint-invoke-ra-hoist-copies.mir | 18 +- .../statepoint-invoke-ra-inline-spiller.mir | 36 +- ...tatepoint-invoke-ra-remove-back-copies.mir | 20 +- .../test/CodeGen/X86/statepoint-invoke-ra.mir | 98 +- llvm/test/CodeGen/X86/statepoint-ra.ll | 62 +- .../CodeGen/X86/statepoint-vreg-details.ll | 24 +- .../CodeGen/X86/statepoint-vreg-folding.mir | 62 +- .../CodeGen/X86/statepoint-vreg-invoke.ll | 36 +- .../statepoint-vreg-unlimited-tied-opnds.ll | 170 +- llvm/test/CodeGen/X86/statepoint-vreg.mir | 4 +- llvm/test/CodeGen/X86/tail-dup-asm-goto.ll | 4 +- .../test/CodeGen/X86/tail-merge-after-mbp.mir | 20 +- .../test/CodeGen/X86/taildup-callsiteinfo.mir | 2 +- .../CodeGen/X86/topdepthreduce-postra.mir | 4 +- llvm/test/CodeGen/X86/unfoldMemoryOperand.mir | 8 +- llvm/test/CodeGen/X86/vecloadextract.ll | 16 +- .../vector-constrained-fp-intrinsics-flags.ll | 16 +- llvm/test/CodeGen/X86/vmaskmov-offset.ll | 20 +- .../CodeGen/X86/win64-eh-empty-block-2.mir | 8 +- llvm/test/CodeGen/X86/x87-reg-usage.mir | 80 +- .../DebugInfo/AArch64/asan-stack-vars.mir | 318 +- .../compiler-gen-bbs-livedebugvalues.mir | 40 +- llvm/test/DebugInfo/ARM/cfi-eof-prologue.mir | 56 +- .../ARM/machine-cp-updates-dbg-reg.mir | 4 +- .../test/DebugInfo/MIR/AArch64/clobber-sp.mir | 10 +- .../MIR/AArch64/dbgcall-site-expr-chain.mir | 10 +- .../AArch64/dbgcall-site-expr-entry-value.mir | 4 +- ...bgcall-site-indirect-param-with-offset.mir | 8 +- .../AArch64/dbgcall-site-indirect-param.mir | 8 +- .../AArch64/dbgcall-site-interpret-movzxi.mir | 4 +- .../AArch64/dbgcall-site-interpretation.mir | 16 +- .../MIR/AArch64/dbgcall-site-orr-moves.mir | 18 +- .../MIR/AArch64/implicit-def-dead-scope.mir | 82 +- .../MIR/ARM/dbgcall-site-interpretation.mir | 8 +- .../MIR/ARM/if-coverter-call-site-info.mir | 4 +- .../MIR/Hexagon/bundled-call-pr44001.mir | 2 +- ...dbgcall-site-instr-before-bundled-call.mir | 44 +- ...live-debug-values-bundled-entry-values.mir | 4 +- ...all-site-info-update-delay-slot-filler.mir | 6 +- .../MIR/Mips/dbg-call-site-copy-sub-reg.mir | 4 +- ...l-site-delay-slot-interpretation-64bit.mir | 6 +- ...bg-call-site-delay-slot-interpretation.mir | 8 +- .../Mips/dbg-call-site-param-addiu-64bit.mir | 8 +- .../MIR/Mips/dbg-call-site-param-addiu.mir | 8 +- .../DebugInfo/MIR/Mips/last-inst-bundled.mir | 10 +- .../MIR/Mips/live-debug-values-reg-copy.mir | 30 +- .../DebugInfo/MIR/X86/clobbered-fragments.mir | 8 +- ...bg-call-site-spilled-arg-multiple-defs.mir | 6 +- .../MIR/X86/dbg-call-site-spilled-arg.mir | 4 +- .../MIR/X86/dbg-stack-value-range.mir | 4 +- .../MIR/X86/dbgcall-site-interpretation.mir | 6 +- .../X86/dbgcall-site-lea-interpretation.mir | 10 +- .../MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | 8 +- .../MIR/X86/debug-call-site-param.mir | 2 +- .../MIR/X86/debug-entry-value-operation.mir | 2 +- llvm/test/DebugInfo/MIR/X86/debug-loc-0.mir | 2 +- .../X86/dvl-livedebugvalues-spillrestore.mir | 4 +- llvm/test/DebugInfo/MIR/X86/empty-inline.mir | 10 +- .../DebugInfo/MIR/X86/kill-after-spill.mir | 20 +- .../MIR/X86/live-debug-values-cutoffs.mir | 4 +- .../X86/live-debug-values-entry-transfer.mir | 2 +- .../MIR/X86/live-debug-values-reg-copy.mir | 6 +- .../X86/live-debug-values-restore-collide.mir | 4 +- .../MIR/X86/live-debug-values-restore.mir | 38 +- .../MIR/X86/live-debug-values-spill.mir | 40 +- .../X86/live-debug-values-stack-clobber.mir | 18 +- .../DebugInfo/MIR/X86/live-debug-values.mir | 4 +- .../live-debug-vars-unused-arg-debugonly.mir | 4 +- .../MIR/X86/live-debug-vars-unused-arg.mir | 4 +- .../MIR/X86/livedebugvalues_load_in_loop.mir | 2 +- .../X86/livedebugvars-crossbb-interval.mir | 2 +- .../MIR/X86/mlicm-hoist-post-regalloc.mir | 8 +- .../MIR/X86/mlicm-hoist-pre-regalloc.mir | 8 +- .../X86/multiple-param-dbg-value-entry.mir | 2 +- .../MIR/X86/prolog-epilog-indirection.mir | 6 +- .../X86/regcoalescing-clears-dead-dbgvals.mir | 4 +- .../DebugInfo/MIR/X86/sink-leaves-undef.mir | 2 +- llvm/test/DebugInfo/X86/debug-loc-asan.mir | 82 +- llvm/test/DebugInfo/X86/debug-loc-offset.mir | 26 +- llvm/test/DebugInfo/X86/dw_op_minus.mir | 16 +- .../DebugInfo/X86/live-debug-vars-dse.mir | 6 +- .../X86/live-debug-vars-intervals.mir | 12 +- .../X86/location-range-inlined-xblock.mir | 4 +- llvm/test/DebugInfo/X86/location-range.mir | 8 +- llvm/test/DebugInfo/X86/pr19307.mir | 22 +- .../X86/single-location-inlined-param.mir | 2 +- .../X86/single-location-interrupted-scope.mir | 8 +- llvm/test/DebugInfo/X86/single-location.mir | 2 +- .../generic-vreg-undef-use.mir | 2 +- llvm/test/MachineVerifier/test_g_bzero.mir | 10 +- llvm/test/MachineVerifier/test_g_load.mir | 4 +- llvm/test/MachineVerifier/test_g_memcpy.mir | 20 +- llvm/test/MachineVerifier/test_g_memset.mir | 8 +- llvm/test/MachineVerifier/test_g_sextload.mir | 6 +- llvm/test/MachineVerifier/test_g_store.mir | 4 +- llvm/test/MachineVerifier/test_g_zextload.mir | 6 +- llvm/test/MachineVerifier/verify-regops.mir | 8 +- .../CodeGen/GlobalISel/KnownBitsTest.cpp | 54 +- .../GlobalISel/KnownBitsVectorTest.cpp | 48 +- .../GlobalISel/LegalizerHelperTest.cpp | 2 +- .../CodeGen/GlobalISel/LegalizerTest.cpp | 16 +- .../GlobalISel/MachineIRBuilderTest.cpp | 4 +- llvm/unittests/MIR/MachineMetadata.cpp | 18 +- 1162 files changed, 33260 insertions(+), 32557 deletions(-) diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 1368663e93b31..fb3c34a9bc493 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -3220,18 +3220,34 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseOptionalAtomicOrdering(FailureOrder)) return true; + LLT MemoryType; if (Token.isNot(MIToken::IntegerLiteral) && - Token.isNot(MIToken::kw_unknown_size)) - return error("expected the size integer literal or 'unknown-size' after " + Token.isNot(MIToken::kw_unknown_size) && + Token.isNot(MIToken::lparen)) + return error("expected memory LLT, the size integer literal or 'unknown-size' after " "memory operation"); - uint64_t Size; + + uint64_t Size = MemoryLocation::UnknownSize; if (Token.is(MIToken::IntegerLiteral)) { if (getUint64(Size)) return true; + + // Convert from bytes to bits for storage. + MemoryType = LLT::scalar(8 * Size); + lex(); } else if (Token.is(MIToken::kw_unknown_size)) { Size = MemoryLocation::UnknownSize; + lex(); + } else { + if (expectAndConsume(MIToken::lparen)) + return true; + if (parseLowLevelType(Token.location(), MemoryType)) + return true; + if (expectAndConsume(MIToken::rparen)) + return true; + + Size = MemoryType.getSizeInBytes(); } - lex(); MachinePointerInfo Ptr = MachinePointerInfo(); if (Token.is(MIToken::Identifier)) { @@ -3247,7 +3263,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (parseMachinePointerInfo(Ptr)) return true; } - unsigned BaseAlignment = (Size != MemoryLocation::UnknownSize ? Size : 1); + unsigned BaseAlignment = + (Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1); AAMDNodes AAInfo; MDNode *Range = nullptr; while (consumeIfPresent(MIToken::comma)) { @@ -3294,8 +3311,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { } if (expectAndConsume(MIToken::rparen)) return true; - Dest = MF.getMachineMemOperand(Ptr, Flags, Size, Align(BaseAlignment), AAInfo, - Range, SSID, Order, FailureOrder); + Dest = MF.getMachineMemOperand(Ptr, Flags, MemoryType, Align(BaseAlignment), + AAInfo, Range, SSID, Order, FailureOrder); return false; } diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index b020c42375769..db973bda5e555 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1121,10 +1121,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, if (getFailureOrdering() != AtomicOrdering::NotAtomic) OS << toIRString(getFailureOrdering()) << ' '; - if (getSize() == MemoryLocation::UnknownSize) - OS << "unknown-size"; + if (getMemoryType().isValid()) + OS << '(' << getMemoryType() << ')'; else - OS << getSize(); + OS << "unknown-size"; if (const Value *Val = getValue()) { OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll index 558566141ae85..0340ab94357e9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll @@ -25,22 +25,22 @@ define void @test_varargs() { ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ANYEXT]](s32) - ; CHECK: G_STORE [[ANYEXT1]](s64), [[PTR_ADD]](p0) :: (store 8 into stack, align 1) + ; CHECK: G_STORE [[ANYEXT1]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[C4]](s16) ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ANYEXT2]](s32) - ; CHECK: G_STORE [[ANYEXT3]](s64), [[PTR_ADD1]](p0) :: (store 8 into stack + 8, align 1) + ; CHECK: G_STORE [[ANYEXT3]](s64), [[PTR_ADD1]](p0) :: (store (s64) into stack + 8, align 1) ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C5]](s32) - ; CHECK: G_STORE [[ANYEXT4]](s64), [[PTR_ADD2]](p0) :: (store 8 into stack + 16, align 1) + ; CHECK: G_STORE [[ANYEXT4]](s64), [[PTR_ADD2]](p0) :: (store (s64) into stack + 16, align 1) ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CHECK: G_STORE [[C6]](s32), [[PTR_ADD3]](p0) :: (store 4 into stack + 24, align 1) + ; CHECK: G_STORE [[C6]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 24, align 1) ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CHECK: G_STORE [[C7]](s64), [[PTR_ADD4]](p0) :: (store 8 into stack + 32, align 1) + ; CHECK: G_STORE [[C7]](s64), [[PTR_ADD4]](p0) :: (store (s64) into stack + 32, align 1) ; CHECK: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 ; CHECK: ADJCALLSTACKUP 40, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR @@ -79,16 +79,16 @@ define i32 @i8i16caller() nounwind readnone { ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CHECK: G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store 1 into stack) + ; CHECK: G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack) ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; CHECK: G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store 2 into stack + 2, align 1) + ; CHECK: G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1) ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64) - ; CHECK: G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store 1 into stack + 4) + ; CHECK: G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store (s8) into stack + 4) ; CHECK: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64) - ; CHECK: G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store 1 into stack + 5) + ; CHECK: G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store (s8) into stack + 5) ; CHECK: BL @i8i16callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: ADJCALLSTACKUP 6, 0, implicit-def $sp, implicit $sp diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll index efbf54392b8b8..4b0cf5a8b8ba8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll @@ -121,7 +121,7 @@ define void @test_stack_ext_needed() { ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store 1 into stack) + ; CHECK: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack) ; CHECK: BL @stack_ext_needed, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7 ; CHECK: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp ; CHECK: RET_ReallyLR @@ -141,7 +141,7 @@ define void @callee_s128(i128 %a, i128 %b, i128 *%ptr) { ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64) ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x4 - ; CHECK: G_STORE [[MV1]](s128), [[COPY4]](p0) :: (store 16 into %ir.ptr) + ; CHECK: G_STORE [[MV1]](s128), [[COPY4]](p0) :: (store (s128) into %ir.ptr) ; CHECK: RET_ReallyLR store i128 %b, i128 *%ptr ret void @@ -153,7 +153,7 @@ define void @caller_s128(i128 *%ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.ptr) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) ; CHECK: $x0 = COPY [[UV]](s64) @@ -202,16 +202,16 @@ define i32 @i8i16caller() nounwind readnone { ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CHECK: G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store 1 into stack) + ; CHECK: G_STORE [[C8]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack) ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; CHECK: G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store 2 into stack + 8, align 1) + ; CHECK: G_STORE [[C9]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1) ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s64) - ; CHECK: G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store 1 into stack + 16) + ; CHECK: G_STORE [[C10]](s8), [[PTR_ADD2]](p0) :: (store (s8) into stack + 16) ; CHECK: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64) - ; CHECK: G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store 1 into stack + 24) + ; CHECK: G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store (s8) into stack + 24) ; CHECK: BL @i8i16callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: ADJCALLSTACKUP 32, 0, implicit-def $sp, implicit $sp @@ -230,7 +230,7 @@ define void @arg_v2i64(<2 x i64> %arg) { ; CHECK: liveins: $q0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](<2 x s64>), [[DEF]](p0) :: (store 16 into `<2 x i64>* undef`) + ; CHECK: G_STORE [[COPY]](<2 x s64>), [[DEF]](p0) :: (store (<2 x s64>) into `<2 x i64>* undef`) ; CHECK: RET_ReallyLR store <2 x i64> %arg, <2 x i64>* undef ret void @@ -246,7 +246,7 @@ define void @arg_v8i64(<8 x i64> %arg) { ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s64>) = G_CONCAT_VECTORS [[COPY]](<2 x s64>), [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>), [[COPY3]](<2 x s64>) ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s64>), [[DEF]](p0) :: (store 64 into `<8 x i64>* undef`) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s64>), [[DEF]](p0) :: (store (<8 x s64>) into `<8 x i64>* undef`) ; CHECK: RET_ReallyLR store <8 x i64> %arg, <8 x i64>* undef ret void @@ -259,7 +259,7 @@ define void @arg_v4f32(<4 x float> %arg) { ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BITCAST]](<4 x s32>), [[DEF]](p0) :: (store 16 into `<4 x float>* undef`) + ; CHECK: G_STORE [[BITCAST]](<4 x s32>), [[DEF]](p0) :: (store (<4 x s32>) into `<4 x float>* undef`) ; CHECK: RET_ReallyLR store <4 x float> %arg, <4 x float>* undef ret void @@ -279,7 +279,7 @@ define void @ret_arg_v16f32(<16 x float> %arg) { ; CHECK: [[BITCAST3:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY3]](<2 x s64>) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[BITCAST1]](<4 x s32>), [[BITCAST2]](<4 x s32>), [[BITCAST3]](<4 x s32>) ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s32>), [[DEF]](p0) :: (store 64 into `<16 x float>* undef`) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s32>), [[DEF]](p0) :: (store (<16 x s32>) into `<16 x float>* undef`) ; CHECK: RET_ReallyLR store <16 x float> %arg, <16 x float>* undef ret void diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 0cc39df24752a..f4ab08a50f5f8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -32,7 +32,7 @@ define i128 @ABIi128(i128 %arg1) { ret i128 %res } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %3:_(<3 x s32>), %4:_(p0) :: (store 12 into %ir.addr + 16, align 16, basealign 32) (in function: odd_vector) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %3:_(<3 x s32>), %4:_(p0) :: (store (s96) into %ir.addr + 16, align 16, basealign 32) (in function: odd_vector) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: define void @odd_vector(<7 x i32>* %addr) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll index 80629f2f9b1a0..d4d06c52d33d5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll @@ -15,12 +15,12 @@ define i32 @cse_gep([4 x i32]* %ptr, i32 %idx) { ; O0: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; O0: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64) ; O0: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) - ; O0: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.gep1) + ; O0: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1) ; O0: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; O0: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) ; O0: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; O0: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; O0: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 4 from %ir.gep2) + ; O0: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2) ; O0: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[LOAD1]] ; O0: $w0 = COPY [[ADD]](s32) ; O0: RET_ReallyLR implicit $w0 @@ -34,10 +34,10 @@ define i32 @cse_gep([4 x i32]* %ptr, i32 %idx) { ; O3: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; O3: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64) ; O3: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) - ; O3: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.gep1) + ; O3: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1) ; O3: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; O3: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) - ; O3: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 4 from %ir.gep2) + ; O3: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2) ; O3: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[LOAD1]] ; O3: $w0 = COPY [[ADD]](s32) ; O3: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll index ad01264f1a43f..6708b8c7ecb09 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll @@ -11,8 +11,8 @@ ; CHECK-NOT: id: 1 ; CHECK: [[GUARD_SLOT:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.StackGuardSlot -; CHECK: [[GUARD:%[0-9]+]]:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) -; CHECK: G_STORE [[GUARD]](p0), [[GUARD_SLOT]](p0) :: (volatile store 8 into %stack.0.StackGuardSlot) +; CHECK: [[GUARD:%[0-9]+]]:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load (p0) from @__stack_chk_guard) +; CHECK: G_STORE [[GUARD]](p0), [[GUARD_SLOT]](p0) :: (volatile store (p0) into %stack.0.StackGuardSlot) declare void @llvm.stackprotector(i8*, i8**) define void @test_stack_guard_remat2() { %StackGuardSlot = alloca i8* diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll index 46ef3bf363906..adf7c3f2b026e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll @@ -784,7 +784,7 @@ define void @jt_multiple_jump_tables(%1* %arg, i32 %arg1, i32* %arg2) { ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64) ; CHECK: [[C112:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C112]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load 8 from %ir.tmp59) + ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from %ir.tmp59) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: $x0 = COPY [[COPY]](p0) ; CHECK: $x1 = COPY [[LOAD]](p0) @@ -1106,7 +1106,7 @@ define void @jt_2_tables_phi_edge_from_second() { ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load 4 from `i32* undef`, align 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32) from `i32* undef`, align 8) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]] ; CHECK: G_BRCOND [[ICMP]](s1), %bb.6 ; CHECK: G_BR %bb.19 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index f95ae9f6ac2fe..b05aa3f16e6b3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -346,12 +346,12 @@ define void @trunc(i64 %a) { ; CHECK-LABEL: name: load ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[ADDR42:%[0-9]+]]:_(p42) = COPY $x1 -; CHECK: [[VAL1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 16) -; CHECK: [[VAL2:%[0-9]+]]:_(s64) = G_LOAD [[ADDR42]](p42) :: (load 8 from %ir.addr42, addrspace 42) +; CHECK: [[VAL1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load (s64) from %ir.addr, align 16) +; CHECK: [[VAL2:%[0-9]+]]:_(s64) = G_LOAD [[ADDR42]](p42) :: (load (s64) from %ir.addr42, addrspace 42) ; CHECK: [[SUM2:%.*]]:_(s64) = G_ADD [[VAL1]], [[VAL2]] -; CHECK: [[VAL3:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (volatile load 8 from %ir.addr) +; CHECK: [[VAL3:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (volatile load (s64) from %ir.addr) ; CHECK: [[SUM3:%[0-9]+]]:_(s64) = G_ADD [[SUM2]], [[VAL3]] -; CHECK: [[VAL4:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, !range !0) +; CHECK: [[VAL4:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load (s64) from %ir.addr, !range !0) ; CHECK: [[SUM4:%[0-9]+]]:_(s64) = G_ADD [[SUM3]], [[VAL4]] ; CHECK: $x0 = COPY [[SUM4]] ; CHECK: RET_ReallyLR implicit $x0 @@ -374,9 +374,9 @@ define i64 @load(i64* %addr, i64 addrspace(42)* %addr42) { ; CHECK: [[ADDR42:%[0-9]+]]:_(p42) = COPY $x1 ; CHECK: [[VAL1:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK: [[VAL2:%[0-9]+]]:_(s64) = COPY $x3 -; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr, align 16) -; CHECK: G_STORE [[VAL2]](s64), [[ADDR42]](p42) :: (store 8 into %ir.addr42, addrspace 42) -; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (volatile store 8 into %ir.addr) +; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (store (s64) into %ir.addr, align 16) +; CHECK: G_STORE [[VAL2]](s64), [[ADDR42]](p42) :: (store (s64) into %ir.addr42, addrspace 42) +; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (volatile store (s64) into %ir.addr) ; CHECK: RET_ReallyLR define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2) { store i64 %val1, i64* %addr, align 16 @@ -602,13 +602,13 @@ define i8* @test_constant_null() { ; CHECK-LABEL: name: test_struct_memops ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[VAL1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[VAL1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) -; CHECK: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.addr + 4) -; CHECK: G_STORE [[VAL1]](s8), [[ADDR]](p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load (s32) from %ir.addr + 4) +; CHECK: G_STORE [[VAL1]](s8), [[ADDR]](p0) :: (store (s8) into %ir.addr, align 4) ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) -; CHECK: G_STORE [[VAL2]](s32), [[GEP2]](p0) :: (store 4 into %ir.addr + 4) +; CHECK: G_STORE [[VAL2]](s32), [[GEP2]](p0) :: (store (s32) into %ir.addr + 4) define void @test_struct_memops({ i8, i32 }* %addr) { %val = load { i8, i32 }, { i8, i32 }* %addr store { i8, i32 } %val, { i8, i32 }* %addr @@ -617,8 +617,8 @@ define void @test_struct_memops({ i8, i32 }* %addr) { ; CHECK-LABEL: name: test_i1_memops ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[VAL:%[0-9]+]]:_(s1) = G_LOAD [[ADDR]](p0) :: (load 1 from %ir.addr) -; CHECK: G_STORE [[VAL]](s1), [[ADDR]](p0) :: (store 1 into %ir.addr) +; CHECK: [[VAL:%[0-9]+]]:_(s1) = G_LOAD [[ADDR]](p0) :: (load (s1) from %ir.addr) +; CHECK: G_STORE [[VAL]](s1), [[ADDR]](p0) :: (store (s1) into %ir.addr) define void @test_i1_memops(i1* %addr) { %val = load i1, i1* %addr store i1 %val, i1* %addr @@ -709,10 +709,10 @@ define float @test_frem(float %arg1, float %arg2) { ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SADDO [[LHS]], [[RHS]] -; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.addr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) -; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -725,10 +725,10 @@ define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UADDO [[LHS]], [[RHS]] -; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.addr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) -; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -741,10 +741,10 @@ define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SSUBO [[LHS]], [[RHS]] -; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.subr) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.subr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) -; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.subr + 4, align 4) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -757,10 +757,10 @@ define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_USUBO [[LHS]], [[RHS]] -; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.subr) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.subr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) -; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.subr + 4, align 4) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.subr + 4, align 4) declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { %res = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -773,10 +773,10 @@ define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SMULO [[LHS]], [[RHS]] -; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.addr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) -; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -789,10 +789,10 @@ define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 ; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UMULO [[LHS]], [[RHS]] -; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store 4 into %ir.addr) +; CHECK: G_STORE [[VAL]](s32), [[ADDR]](p0) :: (store (s32) into %ir.addr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) -; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4) declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs) @@ -802,16 +802,16 @@ define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { ; CHECK-LABEL: name: test_extractvalue ; CHECK: %0:_(p0) = COPY $x0 -; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) -; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load (s8) from %ir.addr + 4, align 4) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) -; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.addr + 8) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) -; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 12) ; CHECK: $w0 = COPY [[LD3]](s32) %struct.nested = type {i8, { i8, i32 }, i32} define i32 @test_extractvalue(%struct.nested* %addr) { @@ -823,19 +823,19 @@ define i32 @test_extractvalue(%struct.nested* %addr) { ; CHECK-LABEL: name: test_extractvalue_agg ; CHECK: %0:_(p0) = COPY $x0 ; CHECK: %1:_(p0) = COPY $x1 -; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) -; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load (s8) from %ir.addr + 4, align 4) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) -; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.addr + 8) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) -; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) -; CHECK: G_STORE [[LD2]](s8), %1(p0) :: (store 1 into %ir.addr2, align 4) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 12) +; CHECK: G_STORE [[LD2]](s8), %1(p0) :: (store (s8) into %ir.addr2, align 4) ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD %1, [[CST1]](s64) -; CHECK: G_STORE [[LD3]](s32), [[GEP4]](p0) :: (store 4 into %ir.addr2 + 4) +; CHECK: G_STORE [[LD3]](s32), [[GEP4]](p0) :: (store (s32) into %ir.addr2 + 4) define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %struct = load %struct.nested, %struct.nested* %addr %res = extractvalue %struct.nested %struct, 1 @@ -857,23 +857,23 @@ define void @test_trivial_extract_ptr([1 x i8*] %s, i8 %val) { ; CHECK-LABEL: name: test_insertvalue ; CHECK: %0:_(p0) = COPY $x0 ; CHECK: %1:_(s32) = COPY $w1 -; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) -; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[LD2:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p0) :: (load (s8) from %ir.addr + 4, align 4) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) -; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.addr + 8) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) -; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 12) -; CHECK: G_STORE [[LD1]](s8), %0(p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[LD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 12) +; CHECK: G_STORE [[LD1]](s8), %0(p0) :: (store (s8) into %ir.addr, align 4) ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) -; CHECK: G_STORE [[LD2]](s8), [[GEP4]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: G_STORE [[LD2]](s8), [[GEP4]](p0) :: (store (s8) into %ir.addr + 4, align 4) ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) -; CHECK: G_STORE %1(s32), [[GEP5]](p0) :: (store 4 into %ir.addr + 8) +; CHECK: G_STORE %1(s32), [[GEP5]](p0) :: (store (s32) into %ir.addr + 8) ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) -; CHECK: G_STORE [[LD4]](s32), [[GEP6]](p0) :: (store 4 into %ir.addr + 12) +; CHECK: G_STORE [[LD4]](s32), [[GEP6]](p0) :: (store (s32) into %ir.addr + 12) define void @test_insertvalue(%struct.nested* %addr, i32 %val) { %struct = load %struct.nested, %struct.nested* %addr %newstruct = insertvalue %struct.nested %struct, i32 %val, 1, 1 @@ -902,26 +902,26 @@ define [1 x i8*] @test_trivial_insert_ptr([1 x i8*] %s, i8* %val) { ; CHECK-LABEL: name: test_insertvalue_agg ; CHECK: %0:_(p0) = COPY $x0 ; CHECK: %1:_(p0) = COPY $x1 -; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %1(p0) :: (load 1 from %ir.addr2, align 4) +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD %1(p0) :: (load (s8) from %ir.addr2, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %1, [[CST1]](s64) -; CHECK: [[LD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.addr2 + 4) -; CHECK: [[LD3:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr, align 4) +; CHECK: [[LD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load (s32) from %ir.addr2 + 4) +; CHECK: [[LD3:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr, align 4) ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) -; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[GEP2]](p0) :: (load 1 from %ir.addr + 4, align 4) +; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[GEP2]](p0) :: (load (s8) from %ir.addr + 4, align 4) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) -; CHECK: [[LD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load 4 from %ir.addr + 8) +; CHECK: [[LD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p0) :: (load (s32) from %ir.addr + 8) ; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST4]](s64) -; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.addr + 12) -; CHECK: G_STORE [[LD3]](s8), %0(p0) :: (store 1 into %ir.addr, align 4) +; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load (s32) from %ir.addr + 12) +; CHECK: G_STORE [[LD3]](s8), %0(p0) :: (store (s8) into %ir.addr, align 4) ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) -; CHECK: G_STORE [[LD1]](s8), [[GEP5]](p0) :: (store 1 into %ir.addr + 4, align 4) +; CHECK: G_STORE [[LD1]](s8), [[GEP5]](p0) :: (store (s8) into %ir.addr + 4, align 4) ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) -; CHECK: G_STORE [[LD2]](s32), [[GEP6]](p0) :: (store 4 into %ir.addr + 8) +; CHECK: G_STORE [[LD2]](s32), [[GEP6]](p0) :: (store (s32) into %ir.addr + 8) ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST4]](s64) -; CHECK: G_STORE [[LD6]](s32), [[GEP7]](p0) :: (store 4 into %ir.addr + 12) +; CHECK: G_STORE [[LD6]](s32), [[GEP7]](p0) :: (store (s32) into %ir.addr + 12) define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %smallstruct = load {i8, i32}, {i8, i32}* %addr2 %struct = load %struct.nested, %struct.nested* %addr @@ -1138,7 +1138,7 @@ define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 -; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src) +; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } @@ -1148,7 +1148,7 @@ define void @test_memcpy_tail(i8* %dst, i8* %src, i64 %size) { ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 -; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 1 :: (store 1 into %ir.dst), (load 1 from %ir.src) +; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 1 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } @@ -1159,7 +1159,7 @@ define void @test_memcpy_nonzero_as(i8 addrspace(1)* %dst, i8 addrspace(1) * %sr ; CHECK: [[DST:%[0-9]+]]:_(p1) = COPY $x0 ; CHECK: [[SRC:%[0-9]+]]:_(p1) = COPY $x1 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 -; CHECK: G_MEMCPY [[DST]](p1), [[SRC]](p1), [[SIZE]](s64), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 1) +; CHECK: G_MEMCPY [[DST]](p1), [[SRC]](p1), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 1) call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %size, i1 0) ret void } @@ -1170,7 +1170,7 @@ define void @test_memmove(i8* %dst, i8* %src, i64 %size) { ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 -; CHECK: G_MEMMOVE [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src) +; CHECK: G_MEMMOVE [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } @@ -1182,7 +1182,7 @@ define void @test_memset(i8* %dst, i8 %val, i64 %size) { ; CHECK: [[SRC_C:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[SRC:%[0-9]+]]:_(s8) = G_TRUNC [[SRC_C]] ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2 -; CHECK: G_MEMSET [[DST]](p0), [[SRC]](s8), [[SIZE]](s64), 0 :: (store 1 into %ir.dst) +; CHECK: G_MEMSET [[DST]](p0), [[SRC]](s8), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst) call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0) ret void } @@ -1464,12 +1464,12 @@ define void @test_lifetime_intrin() { define void @test_load_store_atomics(i8* %addr) { ; CHECK-LABEL: name: test_load_store_atomics ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[V0:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load unordered 1 from %ir.addr) -; CHECK: G_STORE [[V0]](s8), [[ADDR]](p0) :: (store monotonic 1 into %ir.addr) -; CHECK: [[V1:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load acquire 1 from %ir.addr) -; CHECK: G_STORE [[V1]](s8), [[ADDR]](p0) :: (store release 1 into %ir.addr) -; CHECK: [[V2:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load syncscope("singlethread") seq_cst 1 from %ir.addr) -; CHECK: G_STORE [[V2]](s8), [[ADDR]](p0) :: (store syncscope("singlethread") monotonic 1 into %ir.addr) +; CHECK: [[V0:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load unordered (s8) from %ir.addr) +; CHECK: G_STORE [[V0]](s8), [[ADDR]](p0) :: (store monotonic (s8) into %ir.addr) +; CHECK: [[V1:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load acquire (s8) from %ir.addr) +; CHECK: G_STORE [[V1]](s8), [[ADDR]](p0) :: (store release (s8) into %ir.addr) +; CHECK: [[V2:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load syncscope("singlethread") seq_cst (s8) from %ir.addr) +; CHECK: G_STORE [[V2]](s8), [[ADDR]](p0) :: (store syncscope("singlethread") monotonic (s8) into %ir.addr) %v0 = load atomic i8, i8* %addr unordered, align 1 store atomic i8 %v0, i8* %addr monotonic, align 1 @@ -1778,7 +1778,7 @@ define <4 x half> @test_constant_vector() { define i32 @test_target_mem_intrinsic(i32* %addr) { ; CHECK-LABEL: name: test_target_mem_intrinsic ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load 4 from %ir.addr) +; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load (s32) from %ir.addr) ; CHECK: G_TRUNC [[VAL]](s64) %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) %trunc = trunc i64 %val to i32 @@ -1850,33 +1850,33 @@ define void @test_phi_diamond({ i8, i16, i32 }* %a.ptr, { i8, i16, i32 }* %b.ptr ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2 ; CHECK: G_BR %bb.3 -; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD [[ARG1]](p0) :: (load 1 from %ir.a.ptr, align 4) +; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD [[ARG1]](p0) :: (load (s8) from %ir.a.ptr, align 4) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG1]], [[CST1]](s64) -; CHECK: [[LD2:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p0) :: (load 2 from %ir.a.ptr + 2) +; CHECK: [[LD2:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p0) :: (load (s16) from %ir.a.ptr + 2) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG1]], [[CST2]](s64) -; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.a.ptr + 4) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.a.ptr + 4) ; CHECK: G_BR %bb.4 -; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[ARG2]](p0) :: (load 1 from %ir.b.ptr, align 4) +; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[ARG2]](p0) :: (load (s8) from %ir.b.ptr, align 4) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG2]], [[CST3]](s64) -; CHECK: [[LD5:%[0-9]+]]:_(s16) = G_LOAD [[GEP3]](p0) :: (load 2 from %ir.b.ptr + 2) +; CHECK: [[LD5:%[0-9]+]]:_(s16) = G_LOAD [[GEP3]](p0) :: (load (s16) from %ir.b.ptr + 2) ; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG2]], [[CST4]](s64) -; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.b.ptr + 4) +; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load (s32) from %ir.b.ptr + 4) ; CHECK: [[PN1:%[0-9]+]]:_(s8) = G_PHI [[LD1]](s8), %bb.2, [[LD4]](s8), %bb.3 ; CHECK: [[PN2:%[0-9]+]]:_(s16) = G_PHI [[LD2]](s16), %bb.2, [[LD5]](s16), %bb.3 ; CHECK: [[PN3:%[0-9]+]]:_(s32) = G_PHI [[LD3]](s32), %bb.2, [[LD6]](s32), %bb.3 -; CHECK: G_STORE [[PN1]](s8), [[ARG4]](p0) :: (store 1 into %ir.dst, align 4) +; CHECK: G_STORE [[PN1]](s8), [[ARG4]](p0) :: (store (s8) into %ir.dst, align 4) ; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG4]], [[CST5]](s64) -; CHECK: G_STORE [[PN2]](s16), [[GEP5]](p0) :: (store 2 into %ir.dst + 2) +; CHECK: G_STORE [[PN2]](s16), [[GEP5]](p0) :: (store (s16) into %ir.dst + 2) ; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG4]], [[CST6]](s64) -; CHECK: G_STORE [[PN3]](s32), [[GEP6]](p0) :: (store 4 into %ir.dst + 4) +; CHECK: G_STORE [[PN3]](s32), [[GEP6]](p0) :: (store (s32) into %ir.dst + 4) ; CHECK: RET_ReallyLR entry: @@ -1909,25 +1909,25 @@ define void @test_nested_aggregate_const(%agg.nested *%ptr) { ; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[CST6:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 -; CHECK: G_STORE [[CST1]](s32), [[BASE]](p0) :: (store 4 into %ir.ptr, align 8) +; CHECK: G_STORE [[CST1]](s32), [[BASE]](p0) :: (store (s32) into %ir.ptr, align 8) ; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST7]](s64) -; CHECK: G_STORE [[CST1]](s32), [[GEP1]](p0) :: (store 4 into %ir.ptr + 4) +; CHECK: G_STORE [[CST1]](s32), [[GEP1]](p0) :: (store (s32) into %ir.ptr + 4) ; CHECK: [[CST8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST8]](s64) -; CHECK: G_STORE [[CST2]](s16), [[GEP2]](p0) :: (store 2 into %ir.ptr + 8, align 8) +; CHECK: G_STORE [[CST2]](s16), [[GEP2]](p0) :: (store (s16) into %ir.ptr + 8, align 8) ; CHECK: [[CST9:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST9]](s64) -; CHECK: G_STORE [[CST3]](s8), [[GEP3]](p0) :: (store 1 into %ir.ptr + 10, align 2) +; CHECK: G_STORE [[CST3]](s8), [[GEP3]](p0) :: (store (s8) into %ir.ptr + 10, align 2) ; CHECK: [[CST10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST10]](s64) -; CHECK: G_STORE [[CST4]](s64), [[GEP4]](p0) :: (store 8 into %ir.ptr + 16) +; CHECK: G_STORE [[CST4]](s64), [[GEP4]](p0) :: (store (s64) into %ir.ptr + 16) ; CHECK: [[CST11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST11]](s64) -; CHECK: G_STORE [[CST5]](s64), [[GEP5]](p0) :: (store 8 into %ir.ptr + 24) +; CHECK: G_STORE [[CST5]](s64), [[GEP5]](p0) :: (store (s64) into %ir.ptr + 24) ; CHECK: [[CST12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST12]](s64) -; CHECK: G_STORE [[CST6]](s32), [[GEP6]](p0) :: (store 4 into %ir.ptr + 32, align 8) +; CHECK: G_STORE [[CST6]](s32), [[GEP6]](p0) :: (store (s32) into %ir.ptr + 32, align 8) store %agg.nested { i32 1, i32 1, %agg.inner { i16 2, i8 3, %agg.inner.inner {i64 5, i64 8} }, i32 13}, %agg.nested *%ptr ret void } @@ -1954,7 +1954,7 @@ define i32 @test_atomic_cmpxchg_1(i32* %addr) { ; CHECK-NEXT: [[NEWVAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: bb.2.repeat: ; CHECK-NEXT: successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}}) -; CHECK: [[OLDVALRES:%[0-9]+]]:_(s32), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store monotonic monotonic 4 on %ir.addr) +; CHECK: [[OLDVALRES:%[0-9]+]]:_(s32), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store monotonic monotonic (s32) on %ir.addr) ; CHECK-NEXT: G_BRCOND [[SUCCESS]](s1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK: bb.3.done: @@ -1980,7 +1980,7 @@ define i32 @test_weak_atomic_cmpxchg_1(i32* %addr) { ; CHECK-NEXT: [[NEWVAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: bb.2.repeat: ; CHECK-NEXT: successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}}) -; CHECK: [[OLDVALRES:%[0-9]+]]:_(s32), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store monotonic monotonic 4 on %ir.addr) +; CHECK: [[OLDVALRES:%[0-9]+]]:_(s32), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store monotonic monotonic (s32) on %ir.addr) ; CHECK-NEXT: G_BRCOND [[SUCCESS]](s1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK: bb.3.done: @@ -2006,7 +2006,7 @@ define i16 @test_atomic_cmpxchg_2(i16* %addr) { ; CHECK-NEXT: [[NEWVAL:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK: bb.2.repeat: ; CHECK-NEXT: successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}}) -; CHECK: [[OLDVALRES:%[0-9]+]]:_(s16), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst seq_cst 2 on %ir.addr) +; CHECK: [[OLDVALRES:%[0-9]+]]:_(s16), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst seq_cst (s16) on %ir.addr) ; CHECK-NEXT: G_BRCOND [[SUCCESS]](s1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK: bb.3.done: @@ -2032,7 +2032,7 @@ define i64 @test_atomic_cmpxchg_3(i64* %addr) { ; CHECK-NEXT: [[NEWVAL:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: bb.2.repeat: ; CHECK-NEXT: successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}}) -; CHECK: [[OLDVALRES:%[0-9]+]]:_(s64), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst acquire 8 on %ir.addr) +; CHECK: [[OLDVALRES:%[0-9]+]]:_(s64), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst acquire (s64) on %ir.addr) ; CHECK-NEXT: G_BRCOND [[SUCCESS]](s1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK: bb.3.done: @@ -2055,7 +2055,7 @@ define i32 @test_atomicrmw_xchg(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_XCHG [[ADDR]](p0), [[VAL]] :: (load store monotonic 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_XCHG [[ADDR]](p0), [[VAL]] :: (load store monotonic (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw xchg i256* %addr, i256 1 monotonic ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2072,7 +2072,7 @@ define i32 @test_atomicrmw_add(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_ADD [[ADDR]](p0), [[VAL]] :: (load store acquire 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_ADD [[ADDR]](p0), [[VAL]] :: (load store acquire (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw add i256* %addr, i256 1 acquire ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2089,7 +2089,7 @@ define i32 @test_atomicrmw_sub(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_SUB [[ADDR]](p0), [[VAL]] :: (load store release 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_SUB [[ADDR]](p0), [[VAL]] :: (load store release (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw sub i256* %addr, i256 1 release ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2106,7 +2106,7 @@ define i32 @test_atomicrmw_and(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_AND [[ADDR]](p0), [[VAL]] :: (load store acq_rel 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_AND [[ADDR]](p0), [[VAL]] :: (load store acq_rel (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw and i256* %addr, i256 1 acq_rel ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2123,7 +2123,7 @@ define i32 @test_atomicrmw_nand(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_NAND [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_NAND [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw nand i256* %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2140,7 +2140,7 @@ define i32 @test_atomicrmw_or(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_OR [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_OR [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw or i256* %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2157,7 +2157,7 @@ define i32 @test_atomicrmw_xor(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_XOR [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_XOR [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw xor i256* %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2174,7 +2174,7 @@ define i32 @test_atomicrmw_min(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_MIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_MIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw min i256* %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2191,7 +2191,7 @@ define i32 @test_atomicrmw_max(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_MAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_MAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw max i256* %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2208,7 +2208,7 @@ define i32 @test_atomicrmw_umin(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_UMIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_UMIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw umin i256* %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2225,7 +2225,7 @@ define i32 @test_atomicrmw_umax(i256* %addr) { ; CHECK-NEXT: liveins: $x0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[VAL:%[0-9]+]]:_(s256) = G_CONSTANT i256 1 -; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_UMAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst 32 on %ir.addr) +; CHECK-NEXT: [[OLDVALRES:%[0-9]+]]:_(s256) = G_ATOMICRMW_UMAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s256) on %ir.addr) ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_TRUNC [[OLDVALRES]] %oldval = atomicrmw umax i256* %addr, i256 1 seq_cst ; FIXME: We currently can't lower 'ret i256' and it's not the purpose of this @@ -2342,7 +2342,7 @@ define float @test_nearbyint_f32(float %x) { } ; CHECK-LABEL: name: test_llvm.aarch64.neon.ld3.v4i32.p0i32 -; CHECK: %1:_(<4 x s32>), %2:_(<4 x s32>), %3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), %0(p0) :: (load 48 from %ir.ptr, align 64) +; CHECK: %1:_(<4 x s32>), %2:_(<4 x s32>), %3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), %0(p0) :: (load (s384) from %ir.ptr, align 64) define void @test_llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %ptr) { %arst = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %ptr) ret void diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir index 37d00dfb31747..7092bae1cbb19 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir @@ -233,7 +233,7 @@ body: | successors: %bb.2.end, %bb.1.then liveins: $x0, $x1, $w2 - %0 = LDRWui killed $x0, 0 :: (load 4 from %ir.src) + %0 = LDRWui killed $x0, 0 :: (load (s32) from %ir.src) %5(s32) = COPY %0 %1(p0) = COPY $x1 %2 = COPY $w2 @@ -245,7 +245,7 @@ body: | bb.2.end: %4(s32) = PHI %0, %bb.0.entry, %3, %bb.1.then - G_STORE killed %4, killed %1 :: (store 4 into %ir.dst) + G_STORE killed %4, killed %1 :: (store (s32) into %ir.dst) RET_ReallyLR ... @@ -692,10 +692,10 @@ registers: # CHECK: %0:gpr(s64) = COPY $x0 # CHECK-NEXT: %1:gpr(p0) = COPY $x1 # FAST-NEXT: %2:fpr(<2 x s32>) = G_BITCAST %0(s64) -# FAST-NEXT: %3:fpr(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) +# FAST-NEXT: %3:fpr(<2 x s32>) = G_LOAD %1(p0) :: (load (<2 x s32>) from %ir.addr) # FAST-NEXT: %4:fpr(<2 x s32>) = G_OR %2, %3 # GREEDY-NEXT: %2:gpr(<2 x s32>) = G_BITCAST %0(s64) -# GREEDY-NEXT: %3:gpr(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) +# GREEDY-NEXT: %3:gpr(<2 x s32>) = G_LOAD %1(p0) :: (load (<2 x s32>) from %ir.addr) # GREEDY-NEXT: %4:gpr(<2 x s32>) = G_OR %2, %3 # CHECK-NEXT: %5:gpr(s64) = G_BITCAST %4(<2 x s32>) # CHECK-NEXT: $x0 = COPY %5(s64) @@ -707,7 +707,7 @@ body: | %0(s64) = COPY $x0 %1(p0) = COPY $x1 %2(<2 x s32>) = G_BITCAST %0(s64) - %3(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) + %3(<2 x s32>) = G_LOAD %1(p0) :: (load (<2 x s32>) from %ir.addr) %4(<2 x s32>) = G_OR %2, %3 %5(s64) = G_BITCAST %4(<2 x s32>) $x0 = COPY %5(s64) @@ -737,7 +737,7 @@ registers: # No repairing should be necessary for both modes. # CHECK: %0:gpr(s64) = COPY $x0 # CHECK-NEXT: %1:gpr(p0) = COPY $x1 -# CHECK-NEXT: %2:fpr(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr) +# CHECK-NEXT: %2:fpr(s64) = G_LOAD %1(p0) :: (load (s64) from %ir.addr) # %0 has been mapped to GPR, we need to repair to match FPR. # CHECK-NEXT: %4:fpr(s64) = COPY %0 # CHECK-NEXT: %3:fpr(s64) = G_FADD %4, %2 @@ -750,7 +750,7 @@ body: | %0(s64) = COPY $x0 %1(p0) = COPY $x1 - %2(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr) + %2(s64) = G_LOAD %1(p0) :: (load (s64) from %ir.addr) %3(s64) = G_FADD %0, %2 $x0 = COPY %3(s64) RET_ReallyLR implicit $x0 @@ -781,7 +781,7 @@ registers: # CHECK-NEXT: %3:fpr(s64) = COPY %0 # CHECK-NEXT: %4:fpr(s64) = COPY %0 # CHECK-NEXT: %2:fpr(s64) = G_FADD %3, %4 -# CHECK-NEXT: G_STORE %2(s64), %1(p0) :: (store 8 into %ir.addr) +# CHECK-NEXT: G_STORE %2(s64), %1(p0) :: (store (s64) into %ir.addr) # CHECK-NEXT: RET_ReallyLR body: | @@ -791,7 +791,7 @@ body: | %0(s64) = COPY $x0 %1(p0) = COPY $x1 %2(s64) = G_FADD %0, %0 - G_STORE %2(s64), %1(p0) :: (store 8 into %ir.addr) + G_STORE %2(s64), %1(p0) :: (store (s64) into %ir.addr) RET_ReallyLR ... @@ -931,11 +931,11 @@ body: | # If we didn't look through the copy for %0, the default mapping # would have been on GPR and we would have to insert a copy to move # the value away from FPR (h0). -# CHECK-NEXT: G_STORE %0(s16), %1(p0) :: (store 2 into %ir.p.addr) +# CHECK-NEXT: G_STORE %0(s16), %1(p0) :: (store (s16) into %ir.p.addr) # If we didn't look through the copy for %2, the default mapping # would have been on GPR and we would have to insert a copy to move # the value to FPR (h0). -# CHECK-NEXT: %2:fpr(s16) = G_LOAD %1(p0) :: (load 2 from %ir.p.addr) +# CHECK-NEXT: %2:fpr(s16) = G_LOAD %1(p0) :: (load (s16) from %ir.p.addr) # CHECK-NEXT: $h0 = COPY %2(s16) name: passFp16ViaAllocas alignment: 4 @@ -955,8 +955,8 @@ body: | %0(s16) = COPY $h0 %1(p0) = G_FRAME_INDEX %stack.0.p.addr - G_STORE %0(s16), %1(p0) :: (store 2 into %ir.p.addr) - %2(s16) = G_LOAD %1(p0) :: (load 2 from %ir.p.addr) + G_STORE %0(s16), %1(p0) :: (store (s16) into %ir.p.addr) + %2(s16) = G_LOAD %1(p0) :: (load (s16) from %ir.p.addr) $h0 = COPY %2(s16) RET_ReallyLR implicit $h0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll index 689a3bba9434f..a0a661c9eeef4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-signext.ll @@ -53,9 +53,9 @@ define i32 @signext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0, align 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8) ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD1]], 1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) @@ -80,9 +80,9 @@ define i32 @dont_need_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.0, align 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0, align 8) ; CHECK: $w0 = COPY [[LOAD1]](s32) ; CHECK: RET_ReallyLR implicit $w0 i64 %f, i64 %g, i64 %h, i64 %i, @@ -104,9 +104,9 @@ define i8 @s8_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0, align 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8) ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD1]], 8 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll index 47d944ab43038..07ae6126e114c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-zeroext.ll @@ -41,7 +41,6 @@ define i32 @zeroext_param_i32(i32 zeroext %x) { ; Zeroext param is passed on the stack. We should still get a G_ASSERT_ZEXT. define i32 @zeroext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, - i64 %g, i64 %h, i64 %i, i1 zeroext %j) { ; CHECK-LABEL: name: zeroext_param_stack ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 @@ -54,22 +53,21 @@ define i32 @zeroext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0, align 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8) ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD1]], 1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) ; CHECK: $w0 = COPY [[ZEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 + i64 %g, i64 %h, i64 %i, i1 zeroext %j) { %v = zext i1 %j to i32 ret i32 %v } ; The zeroext parameter is a s32, so there's no extension required. define i32 @dont_need_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, - i64 %f, i64 %g, i64 %h, i64 %i, - i32 zeroext %j) { ; CHECK-LABEL: name: dont_need_assert_zext_stack ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 @@ -82,18 +80,18 @@ define i32 @dont_need_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.0, align 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0, align 8) ; CHECK: $w0 = COPY [[LOAD1]](s32) ; CHECK: RET_ReallyLR implicit $w0 + i64 %f, i64 %g, i64 %h, i64 %i, + i32 zeroext %j) { ret i32 %j } ; s8 requires extension to s32, so we should get a G_ASSERT_ZEXT here. define i8 @s8_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, - i64 %f, i64 %g, i64 %h, i64 %i, - i8 zeroext %j) { ; CHECK-LABEL: name: s8_assert_zext_stack ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 @@ -106,13 +104,15 @@ define i8 @s8_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0, align 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8) ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD1]], 8 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 + i64 %f, i64 %g, i64 %h, i64 %i, + i8 zeroext %j) { ret i8 %j } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll index ae2690346094e..aebf7f27f3183 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-cse.ll @@ -2,17 +2,17 @@ ; CHECK-LABEL: name: test_split_struct ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) -; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8 from %ir.ptr + 8) +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load (s64) from %ir.ptr + 8) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[CST2]](s64) -; CHECK: G_STORE [[LO]](s64), [[GEP2]](p0) :: (store 8 into stack, align 1) +; CHECK: G_STORE [[LO]](s64), [[GEP2]](p0) :: (store (s64) into stack, align 1) ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[CST]](s64) -; CHECK: G_STORE [[HI]](s64), [[GEP3]](p0) :: (store 8 into stack + 8, align 1) +; CHECK: G_STORE [[HI]](s64), [[GEP3]](p0) :: (store (s64) into stack + 8, align 1) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll index c42f7e5ed580f..067d58273b96f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -6,11 +6,11 @@ ; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, ; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 1, size: 1, ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 -; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 1 from %fixed-stack.1, align 16) +; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.1, align 16) ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD]], 8 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 -; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0) +; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0) ; CHECK: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD1]], 8 ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT1]](s32) ; CHECK: [[ADD:%[0-9]+]]:_(s8) = G_ADD [[TRUNC]], [[TRUNC1]] @@ -28,10 +28,10 @@ define signext i8 @test_stack_slots([8 x i64], i8 signext %lhs, i8 signext %rhs) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C42_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C42_LOC:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[C42_OFFS]](s64) -; CHECK: G_STORE [[C42]](s8), [[C42_LOC]](p0) :: (store 1 into stack) +; CHECK: G_STORE [[C42]](s8), [[C42_LOC]](p0) :: (store (s8) into stack) ; CHECK: [[C12_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[C12_LOC:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[C12_OFFS]](s64) -; CHECK: G_STORE [[C12]](s8), [[C12_LOC]](p0) :: (store 1 into stack + 1) +; CHECK: G_STORE [[C12]](s8), [[C12_LOC]](p0) :: (store (s8) into stack + 1) ; CHECK: BL @test_stack_slots define void @test_call_stack() { call signext i8 @test_stack_slots([8 x i64] undef, i8 signext 42, i8 signext 12) @@ -59,18 +59,18 @@ define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { } ; CHECK-LABEL: name: test_split_struct -; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST]](s64) -; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.ptr + 8) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.ptr + 8) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF]](s64) -; CHECK: G_STORE [[LD1]](s64), [[ADDR]](p0) :: (store 8 into stack, align 1) +; CHECK: G_STORE [[LD1]](s64), [[ADDR]](p0) :: (store (s64) into stack, align 1) ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[CST]] -; CHECK: G_STORE [[LD2]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 1) +; CHECK: G_STORE [[LD2]](s64), [[ADDR]](p0) :: (store (s64) into stack + 8, align 1) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, @@ -85,10 +85,10 @@ define void @test_split_struct([2 x i64]* %ptr) { ; CHECK-DAG: - { id: [[HI_FRAME:[0-9]+]], type: default, offset: 8, size: 8 ; CHECK: [[LOPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LO_FRAME]] -; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD [[LOPTR]](p0) :: (invariant load 8 from %fixed-stack.[[LO_FRAME]], align 16) +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD [[LOPTR]](p0) :: (invariant load (s64) from %fixed-stack.[[LO_FRAME]], align 16) ; CHECK: [[HIPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] -; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[HIPTR]](p0) :: (invariant load 8 from %fixed-stack.[[HI_FRAME]]) +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[HIPTR]](p0) :: (invariant load (s64) from %fixed-stack.[[HI_FRAME]]) define void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, i64, i64, i64, [2 x i64] %in) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll index 5a1ce24bf025a..cb8c31be256e4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll @@ -51,8 +51,8 @@ define void @dont_tail_call_explicit_sret_alloca_dummyusers(i64* %ptr) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.ptr) - ; CHECK: G_STORE [[LOAD]](s64), [[FRAME_INDEX]](p0) :: (store 8 into %ir.l) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.ptr) + ; CHECK: G_STORE [[LOAD]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %ir.l) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 @@ -90,7 +90,7 @@ define i64 @dont_tail_call_sret_alloca_returned() { ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.l) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.l) ; CHECK: $x0 = COPY [[LOAD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %l = alloca i64, align 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll index 39c8ccd39f2f9..30876eb332659 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -66,7 +66,7 @@ define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) { ; DARWIN: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 ; DARWIN: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 ; DARWIN: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16) + ; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) ; DARWIN: $d0 = COPY [[LOAD]](<4 x s16>) ; DARWIN: TCRETURNdi @outgoing_stack_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $d0 ; WINDOWS-LABEL: name: test_outgoing_stack_args @@ -81,7 +81,7 @@ define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) { ; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 ; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 ; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16) + ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) ; WINDOWS: $d0 = COPY [[LOAD]](<4 x s16>) ; WINDOWS: TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0 tail call void @outgoing_stack_args_fn(<4 x half> %arg) @@ -109,10 +109,10 @@ define i32 @test_too_big_stack() { ; DARWIN: [[COPY:%[0-9]+]]:_(p0) = COPY $sp ; DARWIN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; DARWIN: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; DARWIN: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store 1 into stack) + ; DARWIN: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack) ; DARWIN: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; DARWIN: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; DARWIN: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store 2 into stack + 2, align 1) + ; DARWIN: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1) ; DARWIN: BL @too_big_stack, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0 ; DARWIN: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 ; DARWIN: ADJCALLSTACKUP 4, 0, implicit-def $sp, implicit $sp @@ -135,10 +135,10 @@ define i32 @test_too_big_stack() { ; WINDOWS: [[COPY:%[0-9]+]]:_(p0) = COPY $sp ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; WINDOWS: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; WINDOWS: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store 1 into stack) + ; WINDOWS: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack) ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; WINDOWS: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; WINDOWS: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store 2 into stack + 8, align 1) + ; WINDOWS: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1) ; WINDOWS: BL @too_big_stack, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0 ; WINDOWS: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 ; WINDOWS: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp @@ -206,7 +206,7 @@ define void @test_varargs_2() { ; DARWIN: [[COPY:%[0-9]+]]:_(p0) = COPY $sp ; DARWIN: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; DARWIN: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; DARWIN: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store 8 into stack, align 1) + ; DARWIN: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1) ; DARWIN: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 ; DARWIN: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp ; DARWIN: RET_ReallyLR @@ -242,7 +242,7 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) { ; DARWIN: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 ; DARWIN: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 ; DARWIN: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16) + ; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) ; DARWIN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; DARWIN: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 ; DARWIN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 @@ -254,7 +254,7 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) { ; DARWIN: [[COPY8:%[0-9]+]]:_(p0) = COPY $sp ; DARWIN: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; DARWIN: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64) - ; DARWIN: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store 8 into stack, align 1) + ; DARWIN: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1) ; DARWIN: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1 ; DARWIN: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp ; DARWIN: RET_ReallyLR @@ -270,7 +270,7 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) { ; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 ; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 ; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16) + ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) ; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll index 3a52f6e43cfc9..f05e3be2524b6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -65,13 +65,13 @@ define void @test_multiple_args(i64 %in) { ; CHECK: [[I8:%[0-9]+]]:_(s8) = G_TRUNC [[I8_C]] ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 -; CHECK: G_STORE [[DBL]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr) +; CHECK: G_STORE [[DBL]](s64), [[ADDR]](p0) :: (store (s64) into %ir.addr) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) -; CHECK: G_STORE [[I64]](s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8) +; CHECK: G_STORE [[I64]](s64), [[GEP1]](p0) :: (store (s64) into %ir.addr + 8) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST2]](s64) -; CHECK: G_STORE [[I8]](s8), [[GEP2]](p0) :: (store 1 into %ir.addr + 16, align 8) +; CHECK: G_STORE [[I8]](s8), [[GEP2]](p0) :: (store (s8) into %ir.addr + 16, align 8) ; CHECK: RET_ReallyLR define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) { store {double, i64, i8} %in, {double, i64, i8}* %addr @@ -82,13 +82,13 @@ define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) ; CHECK-LABEL: name: test_struct_return ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr) +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load (s64) from %ir.addr) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64) -; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load (s64) from %ir.addr + 8) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST2]](s64) -; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.addr + 16, align 8) +; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.addr + 16, align 8) ; CHECK: $d0 = COPY [[LD1]](s64) ; CHECK: $x0 = COPY [[LD2]](s64) @@ -101,16 +101,16 @@ define {double, i64, i32} @test_struct_return({double, i64, i32}* %addr) { ; CHECK-LABEL: name: test_arr_call ; CHECK: %0:_(p0) = COPY $x0 -; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr) +; CHECK: [[LD1:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.addr) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST1]](s64) -; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir.addr + 8) +; CHECK: [[LD2:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load (s64) from %ir.addr + 8) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST2]](s64) -; CHECK: [[LD3:%[0-9]+]]:_(s64) = G_LOAD [[GEP2]](p0) :: (load 8 from %ir.addr + 16) +; CHECK: [[LD3:%[0-9]+]]:_(s64) = G_LOAD [[GEP2]](p0) :: (load (s64) from %ir.addr + 16) ; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64) -; CHECK: [[LD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP3]](p0) :: (load 8 from %ir.addr + 24) +; CHECK: [[LD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP3]](p0) :: (load (s64) from %ir.addr + 24) ; CHECK: $x0 = COPY [[LD1]](s64) ; CHECK: $x1 = COPY [[LD2]](s64) @@ -155,7 +155,7 @@ define void @test_abi_exts_call(i8* %addr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.addr) +; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) ; CHECK: $w0 = COPY [[ZEXT]](s32) @@ -173,7 +173,7 @@ define void @test_zext_in_callee(i8* %addr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.addr) +; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) ; CHECK: $w0 = COPY [[SEXT]](s32) @@ -213,11 +213,11 @@ define zeroext i8 @test_abi_zext_ret(i8* %addr) { ; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, ; CHECK-DAG: - { id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 8, ; CHECK: [[LHS_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] -; CHECK: [[LHS:%[0-9]+]]:_(s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 16) +; CHECK: [[LHS:%[0-9]+]]:_(s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load (s64) from %fixed-stack.[[STACK0]], align 16) ; CHECK: [[RHS_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] -; CHECK: [[RHS:%[0-9]+]]:_(s64) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]]) +; CHECK: [[RHS:%[0-9]+]]:_(s64) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load (s64) from %fixed-stack.[[STACK8]]) ; CHECK: [[ADDR_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] -; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_LOAD [[ADDR_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK16]], align 16) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_LOAD [[ADDR_ADDR]](p0) :: (invariant load (s64) from %fixed-stack.[[STACK16]], align 16) ; CHECK: [[SUM:%[0-9]+]]:_(s64) = G_ADD [[LHS]], [[RHS]] ; CHECK: G_STORE [[SUM]](s64), [[ADDR]](p0) define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, i64* %addr) { @@ -234,13 +234,13 @@ define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, i64* %addr) { ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[C42_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C42_LOC:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[C42_OFFS]](s64) -; CHECK: G_STORE [[C42]](s64), [[C42_LOC]](p0) :: (store 8 into stack, align 1) +; CHECK: G_STORE [[C42]](s64), [[C42_LOC]](p0) :: (store (s64) into stack, align 1) ; CHECK: [[C12_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[C12_LOC:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[C12_OFFS]](s64) -; CHECK: G_STORE [[C12]](s64), [[C12_LOC]](p0) :: (store 8 into stack + 8, align 1) +; CHECK: G_STORE [[C12]](s64), [[C12_LOC]](p0) :: (store (s64) into stack + 8, align 1) ; CHECK: [[PTR_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_LOC:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[PTR_OFFS]](s64) -; CHECK: G_STORE [[PTR]](p0), [[PTR_LOC]](p0) :: (store 8 into stack + 16, align 1) +; CHECK: G_STORE [[PTR]](p0), [[PTR_LOC]](p0) :: (store (s64) into stack + 16, align 1) ; CHECK: BL @test_stack_slots ; CHECK: ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp define void @test_call_stack() { @@ -253,7 +253,7 @@ define void @test_call_stack() { ; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, stack-id: default, ; CHECK-NEXT: isImmutable: true, ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[SLOT]] -; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[SLOT]], align 16) +; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[ADDR]](p0) :: (invariant load (s8) from %fixed-stack.[[SLOT]], align 16) ; CHECK-NEXT: {{%[0-9]+}}:_(s1) = G_TRUNC [[LOAD]] define void @test_mem_i1([8 x i64], i1 %in) { ret void @@ -281,17 +281,17 @@ define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { ; CHECK-LABEL: name: test_split_struct ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST]](s64) -; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8 from %ir.ptr + 8) +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load (s64) from %ir.ptr + 8) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[CST2]](s64) -; CHECK: G_STORE [[LO]](s64), [[GEP2]](p0) :: (store 8 into stack, align 1) +; CHECK: G_STORE [[LO]](s64), [[GEP2]](p0) :: (store (s64) into stack, align 1) ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[CST]](s64) -; CHECK: G_STORE [[HI]](s64), [[GEP3]](p0) :: (store 8 into stack + 8, align 1) +; CHECK: G_STORE [[HI]](s64), [[GEP3]](p0) :: (store (s64) into stack + 8, align 1) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, @@ -306,10 +306,10 @@ define void @test_split_struct([2 x i64]* %ptr) { ; CHECK-DAG: - { id: [[HI_FRAME:[0-9]+]], type: default, offset: 8, size: 8 ; CHECK: [[LOPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LO_FRAME]] -; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD [[LOPTR]](p0) :: (invariant load 8 from %fixed-stack.[[LO_FRAME]], align 16) +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD [[LOPTR]](p0) :: (invariant load (s64) from %fixed-stack.[[LO_FRAME]], align 16) ; CHECK: [[HIPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] -; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[HIPTR]](p0) :: (invariant load 8 from %fixed-stack.[[HI_FRAME]]) +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[HIPTR]](p0) :: (invariant load (s64) from %fixed-stack.[[HI_FRAME]]) define void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, i64, i64, i64, [2 x i64] %in) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir index 7e0e6f03443bd..02d27e556fd58 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ext-debugloc.mir @@ -9,7 +9,7 @@ target triple = "arm64-apple-ios13.0.0" @.str = external dso_local unnamed_addr constant [4 x i8], align 1 - + define void @main() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !4 { entry: %tobool = trunc i8 undef to i1 @@ -19,13 +19,13 @@ call void (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) ret void } - + declare void @printf(i8*, ...) declare i32 @__gxx_personality_v0(...) - + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2, !3} - + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 10.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, nameTableKind: None, sysroot: "/pata/tino", sdk: "iPhoneOS13.0.sdk") !1 = !DIFile(filename: "/pata/tino/main.cpp", directory: "/pata/tino") !2 = !{i32 2, !"Debug Info Version", i32 3} @@ -64,15 +64,15 @@ body: | %6:_(s64) = G_CONSTANT i64 0 %7:_(p0) = G_PTR_ADD %5, %6(s64), debug-location !8 %8:_(s64) = G_ANYEXT %2(s32), debug-location !8 - G_STORE %8(s64), %7(p0), debug-location !8 :: (store 8 into stack, align 1) + G_STORE %8(s64), %7(p0), debug-location !8 :: (store (s64) into stack, align 1) BL @printf, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, debug-location !8 ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp, debug-location !8 - %13:_(s64) = G_LOAD %10(p0), debug-location !9 :: (load 4 from `i32* undef`) + %13:_(s64) = G_LOAD %10(p0), debug-location !9 :: (load (s32) from `i32* undef`) ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp $x0 = COPY %4(p0) %11:_(p0) = COPY $sp %12:_(p0) = G_PTR_ADD %11, %6(s64) - G_STORE %13(s64), %12(p0) :: (store 8 into stack, align 1) + G_STORE %13(s64), %12(p0) :: (store (s64) into stack, align 1) BL @printf, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir index 6784f75c0df58..3ef70df291232 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir @@ -20,13 +20,13 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1028443341 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 524 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store 4) + ; CHECK: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 %1:_(s64) = G_CONSTANT i64 524 %2:_(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %3(s32), %2(p0) :: (store 4) + G_STORE %3(s32), %2(p0) :: (store (s32)) RET_ReallyLR ... --- @@ -43,11 +43,11 @@ body: | ; CHECK: liveins: $x0 ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: %c:_(s64) = G_CONSTANT i64 0 - ; CHECK: G_STORE %c(s64), %ptr(p0) :: (store 8) + ; CHECK: G_STORE %c(s64), %ptr(p0) :: (store (s64)) ; CHECK: RET_ReallyLR %ptr:_(p0) = COPY $x0 %c:_(s64) = G_FCONSTANT double 0.0 - G_STORE %c(s64), %ptr(p0) :: (store 8) + G_STORE %c(s64), %ptr(p0) :: (store (s64)) RET_ReallyLR ... --- diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir index 523a1cc7ce852..58856573f83cb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir @@ -11,11 +11,11 @@ body: | ; CHECK-LABEL: name: test_combine_sext_trunc_of_sextload ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXTLOAD]](s64) ; CHECK: $w0 = COPY [[TRUNC]](s32) %0:_(p0) = COPY $x0 - %1:_(s64) = G_SEXTLOAD %0:_(p0) :: (load 2) + %1:_(s64) = G_SEXTLOAD %0:_(p0) :: (load (s16)) %2:_(s32) = G_TRUNC %1:_(s64) %3:_(s32) = G_SEXT_INREG %2:_(s32), 16 $w0 = COPY %3(s32) @@ -30,10 +30,10 @@ body: | ; CHECK-LABEL: name: test_combine_sext_of_sextload ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) ; CHECK: $w0 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2) + %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load (s16)) %2:_(s32) = COPY %1:_(s32) %3:_(s32) = G_SEXT_INREG %2:_(s32), 16 $w0 = COPY %3(s32) @@ -49,10 +49,10 @@ body: | ; CHECK-LABEL: name: test_combine_sext_of_sextload_not_matching ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) ; CHECK: $w0 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2) + %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load (s16)) %2:_(s32) = COPY %1:_(s32) %3:_(s32) = G_SEXT_INREG %2:_(s32), 24 $w0 = COPY %3(s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir index 0cfd974892c43..e55eb19518a97 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir @@ -20,7 +20,7 @@ body: | ; CHECK: bb.1: ; CHECK: successors: ; CHECK: bb.2: - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p0) :: (load 4 from `i32* undef`, align 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p0) :: (load (s32) from `i32* undef`, align 8) ; CHECK: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[C]], [[LOAD]] ; CHECK: [[MUL1:%[0-9]+]]:_(s32) = nsw G_MUL [[MUL]], [[C1]] ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -45,7 +45,7 @@ body: | bb.3: - %2:_(s32) = G_LOAD %3(p0) :: (load 4 from `i32* undef`, align 8) + %2:_(s32) = G_LOAD %3(p0) :: (load (s32) from `i32* undef`, align 8) %5:_(s32) = nsw G_MUL %4, %2 %7:_(s32) = nsw G_MUL %5, %6 %9:_(s32) = nsw G_MUL %7, %8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll b/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll index ae4b39427dcc7..0642e8bf83aca 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll @@ -17,14 +17,14 @@ define i32 @main() #0 !dbg !14 { ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2, debug-location !DILocation(line: 0, scope: !22) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.retval) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0), debug-location !17 :: (dereferenceable load 4 from @var1) + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.retval) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0), debug-location !17 :: (dereferenceable load (s32) from @var1) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C1]], debug-location !19 ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2, debug-location !20 ; CHECK: G_BR %bb.3, debug-location !20 ; CHECK: bb.2.if.then: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: G_STORE [[C2]](s32), [[GV1]](p0), debug-location !21 :: (store 4 into @var2) + ; CHECK: G_STORE [[C2]](s32), [[GV1]](p0), debug-location !21 :: (store (s32) into @var2) ; CHECK: bb.3.if.end: ; CHECK: $w0 = COPY [[C]](s32), debug-location !24 ; CHECK: RET_ReallyLR implicit $w0, debug-location !24 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/constant-mir-debugify.mir b/llvm/test/CodeGen/AArch64/GlobalISel/constant-mir-debugify.mir index 1ba8b0043b923..60d9b37904971 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/constant-mir-debugify.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/constant-mir-debugify.mir @@ -21,13 +21,13 @@ body: | ; CHECK: DBG_VALUE [[C1]](s64), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 3, column: 1, scope: !6) ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64), debug-location !DILocation(line: 4, column: 1, scope: !6) ; CHECK: DBG_VALUE [[PTR_ADD]](p0), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 4, column: 1, scope: !6) - ; CHECK: G_STORE [[C]](s32), [[PTR_ADD]](p0), debug-location !DILocation(line: 5, column: 1, scope: !6) :: (store 4) + ; CHECK: G_STORE [[C]](s32), [[PTR_ADD]](p0), debug-location !DILocation(line: 5, column: 1, scope: !6) :: (store (s32)) ; CHECK: DBG_VALUE 0, $noreg, !9, !DIExpression(), debug-location !DILocation(line: 5, column: 1, scope: !6) ; CHECK: RET_ReallyLR debug-location !DILocation(line: 6, column: 1, scope: !6) %0:_(p0) = COPY $x0 %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 %1:_(s64) = G_CONSTANT i64 524 %2:_(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %3(s32), %2(p0) :: (store 4) + G_STORE %3(s32), %2(p0) :: (store (s32)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir index 892758877bc79..61a70dd782651 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/contract-store.mir @@ -20,11 +20,11 @@ body: | ; CHECK-LABEL: name: contract_s64_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:fpr(s64) = COPY %1 - G_STORE %2:fpr(s64), %0 :: (store 8 into %ir.addr) + G_STORE %2:fpr(s64), %0 :: (store (s64) into %ir.addr) ... --- name: contract_s32_gpr @@ -36,11 +36,11 @@ body: | ; CHECK-LABEL: name: contract_s32_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) + ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s32) = COPY $w1 %2:fpr(s32) = COPY %1 - G_STORE %2:fpr(s32), %0 :: (store 4 into %ir.addr) + G_STORE %2:fpr(s32), %0 :: (store (s32) into %ir.addr) ... --- name: contract_s64_fpr @@ -52,11 +52,11 @@ body: | ; CHECK-LABEL: name: contract_s64_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:fpr(s64) = COPY $d1 %2:gpr(s64) = COPY %1 - G_STORE %2:gpr(s64), %0 :: (store 8 into %ir.addr) + G_STORE %2:gpr(s64), %0 :: (store (s64) into %ir.addr) ... --- name: contract_s32_fpr @@ -68,11 +68,11 @@ body: | ; CHECK-LABEL: name: contract_s32_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) + ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:fpr(s32) = COPY $s1 %2:gpr(s32) = COPY %1 - G_STORE %2:gpr(s32), %0 :: (store 4 into %ir.addr) + G_STORE %2:gpr(s32), %0 :: (store (s32) into %ir.addr) ... --- name: contract_s16_fpr @@ -84,11 +84,11 @@ body: | ; CHECK-LABEL: name: contract_s16_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1 - ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store 2 into %ir.addr) + ; CHECK: STRHui [[COPY1]], [[COPY]], 0 :: (store (s16) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:fpr(s16) = COPY $h1 %2:gpr(s16) = COPY %1 - G_STORE %2:gpr(s16), %0 :: (store 2 into %ir.addr) + G_STORE %2:gpr(s16), %0 :: (store (s16) into %ir.addr) ... --- name: contract_g_unmerge_values_first @@ -101,13 +101,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[LOAD]].dsub - ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) %0:gpr(p0) = COPY $x0 - %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load 16 from %ir.addr) + %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr) %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>) %4:gpr(s64) = COPY %2 %5:gpr(s64) = COPY %3 - G_STORE %4:gpr(s64), %0 :: (store 8 into %ir.addr) + G_STORE %4:gpr(s64), %0 :: (store (s64) into %ir.addr) ... --- name: contract_g_unmerge_values_second @@ -120,10 +120,10 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LOAD:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = CPYi64 [[LOAD]], 1 - ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) %0:gpr(p0) = COPY $x0 - %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load 16 from %ir.addr) + %1:fpr(<2 x s64>) = G_LOAD %0:gpr(p0) :: (dereferenceable load (<2 x s64>) from %ir.addr) %2:fpr(s64), %3:fpr(s64) = G_UNMERGE_VALUES %1:fpr(<2 x s64>) %4:gpr(s64) = COPY %2 %5:gpr(s64) = COPY %3 - G_STORE %5:gpr(s64), %0 :: (store 8 into %ir.addr) + G_STORE %5:gpr(s64), %0 :: (store (s64) into %ir.addr) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/debug-loc-legalize-tail-call.mir b/llvm/test/CodeGen/AArch64/GlobalISel/debug-loc-legalize-tail-call.mir index bfc69d5496ce9..ab93d897a332d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/debug-loc-legalize-tail-call.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/debug-loc-legalize-tail-call.mir @@ -5,13 +5,13 @@ # CHECK-NOT: remark: file.ll:[[#]]:[[#]]: lost [[#]] debug locations during pass ---- | +--- | define void @snork() !dbg !6 { unreachable } - + !llvm.module.flags = !{!0} !llvm.dbg.cu = !{!1} !llvm.debugify = !{!4, !5} - + !0 = !{i32 2, !"Debug Info Version", i32 3} !1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3) !2 = !DIFile(filename: "file.ll", directory: "/") @@ -36,7 +36,7 @@ body: | %0:_(p0) = G_IMPLICIT_DEF debug-location !DILocation(line: 0, scope: !6) %1:_(s8) = G_CONSTANT i8 0 %2:_(s64) = G_IMPLICIT_DEF debug-location !DILocation(line: 0, scope: !6) - G_MEMSET %0(p0), %1(s8), %2(s64), 1, debug-location !11 :: (store 1) + G_MEMSET %0(p0), %1(s8), %2(s64), 1, debug-location !11 :: (store (s8)) DBG_VALUE 0, 0, !9, !DIExpression(), debug-location !12 RET_ReallyLR debug-location !12 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir index 8d659ac72a0b7..b1ff56d25a6aa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/fold-global-offsets.mir @@ -85,8 +85,8 @@ body: | ; CHECK: %val1:_(s64) = COPY $x0 ; CHECK: %val2:_(s64) = COPY $x1 ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 1 - ; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store 8) - ; CHECK: G_STORE %val2(s64), [[GV]](p0) :: (store 8) + ; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store (s64)) + ; CHECK: G_STORE %val2(s64), [[GV]](p0) :: (store (s64)) ; CHECK: RET_ReallyLR implicit $x0 %val1:_(s64) = COPY $x0 %val2:_(s64) = COPY $x1 @@ -94,8 +94,8 @@ body: | %offset:_(s64) = G_CONSTANT i64 1 %ptr_add1:_(p0) = G_PTR_ADD %global, %offset(s64) %ptr_add2:_(p0) = G_PTR_ADD %global, %offset(s64) - G_STORE %val1:_(s64), %ptr_add1 :: (store 8) - G_STORE %val2:_(s64), %ptr_add2 :: (store 8) + G_STORE %val1:_(s64), %ptr_add1 :: (store (s64)) + G_STORE %val2:_(s64), %ptr_add2 :: (store (s64)) RET_ReallyLR implicit $x0 ... @@ -118,8 +118,8 @@ body: | ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g + 2 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: %ptr_add2:_(p0) = G_PTR_ADD [[GV]], [[C]](s64) - ; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store 8) - ; CHECK: G_STORE %val2(s64), %ptr_add2(p0) :: (store 8) + ; CHECK: G_STORE %val1(s64), [[GV]](p0) :: (store (s64)) + ; CHECK: G_STORE %val2(s64), %ptr_add2(p0) :: (store (s64)) ; CHECK: RET_ReallyLR implicit $x0 %val1:_(s64) = COPY $x0 %val2:_(s64) = COPY $x1 @@ -128,8 +128,8 @@ body: | %offset2:_(s64) = G_CONSTANT i64 10 %ptr_add1:_(p0) = G_PTR_ADD %global, %offset1(s64) %ptr_add2:_(p0) = G_PTR_ADD %global, %offset2(s64) - G_STORE %val1:_(s64), %ptr_add1 :: (store 8) - G_STORE %val2:_(s64), %ptr_add2 :: (store 8) + G_STORE %val1:_(s64), %ptr_add1 :: (store (s64)) + G_STORE %val2:_(s64), %ptr_add2 :: (store (s64)) RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir index d7aef52744be5..8e65483583a00 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir @@ -2,7 +2,7 @@ --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" - + define fp128 @x(fp128 %a) { entry: %a.addr = alloca fp128, align 16 @@ -11,7 +11,7 @@ %sub = fsub fp128 0xL00000000000000008000000000000000, %0 ret fp128 %sub } - + ... --- name: x @@ -21,9 +21,9 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -fixedStack: -stack: - - { id: 0, name: a.addr, type: default, offset: 0, size: 16, alignment: 16, +fixedStack: +stack: + - { id: 0, name: a.addr, type: default, offset: 0, size: 16, alignment: 16, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } @@ -36,8 +36,8 @@ body: | ; CHECK: ret %0:_(s128) = COPY $q0 %1:_(p0) = G_FRAME_INDEX %stack.0.a.addr - G_STORE %0(s128), %1(p0) :: (store 16 into %ir.a.addr) - %2:_(s128) = G_LOAD %1(p0) :: (load 16 from %ir.a.addr) + G_STORE %0(s128), %1(p0) :: (store (s128) into %ir.a.addr) + %2:_(s128) = G_LOAD %1(p0) :: (load (s128) from %ir.a.addr) %3:_(s128) = G_FNEG %2 $q0 = COPY %3(s128) RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir index 193b278b6b2fd..35549c5685066 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir @@ -55,7 +55,7 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG1]] ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY3]], 16, 15 ; CHECK: [[COPY4:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRWui [[BFMWri1]], [[COPY4]], 0 :: (store 4 into %ir.addr, align 2) + ; CHECK: STRWui [[BFMWri1]], [[COPY4]], 0 :: (store (s32) into %ir.addr, align 2) ; CHECK: RET_ReallyLR %1:fpr(s16) = COPY $h0 %2:fpr(s16) = COPY $h1 @@ -66,7 +66,7 @@ body: | %5:gpr(s32) = G_INSERT %4, %12(s16), 16 %0:gpr(s32) = COPY %5(s32) %6:gpr(p0) = COPY $x0 - G_STORE %0(s32), %6(p0) :: (store 4 into %ir.addr, align 2) + G_STORE %0(s32), %6(p0) :: (store (s32) into %ir.addr, align 2) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir index 73766ff82d2c8..b57b5339a691c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-forced.mir @@ -34,53 +34,53 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) - ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 32, align 4) ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 48, align 4) ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load 16 from %ir.1 + 64, align 4) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load (s128) from %ir.1 + 64, align 4) ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store 16 into %ir.0 + 64, align 4) + ; CHECK: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store (s128) into %ir.0 + 64, align 4) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load 16 from %ir.1 + 80, align 4) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load (s128) from %ir.1 + 80, align 4) ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store 16 into %ir.0 + 80, align 4) + ; CHECK: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store (s128) into %ir.0 + 80, align 4) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load 16 from %ir.1 + 96, align 4) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load (s128) from %ir.1 + 96, align 4) ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store 16 into %ir.0 + 96, align 4) + ; CHECK: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store (s128) into %ir.0 + 96, align 4) ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load 16 from %ir.1 + 112, align 4) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load (s128) from %ir.1 + 112, align 4) ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store 16 into %ir.0 + 112, align 4) + ; CHECK: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store (s128) into %ir.0 + 112, align 4) ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127 ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load (s128) from %ir.1 + 127, align 1, basealign 4) ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CHECK: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4) + ; CHECK: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store (s128) into %ir.0 + 127, align 1, basealign 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 143 - G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir index eefe948e20778..4ffcbdfa7e7ec 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir @@ -70,12 +70,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = COPY $x2 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... @@ -96,33 +96,33 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) - ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load (s128) from %ir.1 + 32, align 4) ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load (s128) from %ir.1 + 48, align 4) ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load (s64) from %ir.1 + 64, align 4) ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4) + ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store (s64) into %ir.0 + 64, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 72 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... @@ -143,33 +143,33 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) - ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load (s128) from %ir.1 + 32, align 4) ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load (s128) from %ir.1 + 48, align 4) ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load (s64) from %ir.1 + 64, align 4) ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4) + ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store (s64) into %ir.0 + 64, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 72 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... @@ -191,12 +191,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72 - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 72 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... @@ -217,53 +217,53 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) - ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load (s128) from %ir.1 + 32, align 4) ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load (s128) from %ir.1 + 48, align 4) ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store (s128) into %ir.0 + 48, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[GEP6]](p0) :: (load 16 from %ir.1 + 64, align 4) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[GEP6]](p0) :: (load (s128) from %ir.1 + 64, align 4) ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD4]](s128), [[GEP7]](p0) :: (store 16 into %ir.0 + 64, align 4) + ; CHECK: G_STORE [[LOAD4]](s128), [[GEP7]](p0) :: (store (s128) into %ir.0 + 64, align 4) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK: [[GEP8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[GEP8]](p0) :: (load 16 from %ir.1 + 80, align 4) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[GEP8]](p0) :: (load (s128) from %ir.1 + 80, align 4) ; CHECK: [[GEP9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD5]](s128), [[GEP9]](p0) :: (store 16 into %ir.0 + 80, align 4) + ; CHECK: G_STORE [[LOAD5]](s128), [[GEP9]](p0) :: (store (s128) into %ir.0 + 80, align 4) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK: [[GEP10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[GEP10]](p0) :: (load 16 from %ir.1 + 96, align 4) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[GEP10]](p0) :: (load (s128) from %ir.1 + 96, align 4) ; CHECK: [[GEP11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK: G_STORE [[LOAD6]](s128), [[GEP11]](p0) :: (store 16 into %ir.0 + 96, align 4) + ; CHECK: G_STORE [[LOAD6]](s128), [[GEP11]](p0) :: (store (s128) into %ir.0 + 96, align 4) ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK: [[GEP12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[GEP12]](p0) :: (load 16 from %ir.1 + 112, align 4) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[GEP12]](p0) :: (load (s128) from %ir.1 + 112, align 4) ; CHECK: [[GEP13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK: G_STORE [[LOAD7]](s128), [[GEP13]](p0) :: (store 16 into %ir.0 + 112, align 4) + ; CHECK: G_STORE [[LOAD7]](s128), [[GEP13]](p0) :: (store (s128) into %ir.0 + 112, align 4) ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127 ; CHECK: [[GEP14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[GEP14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[GEP14]](p0) :: (load (s128) from %ir.1 + 127, align 1, basealign 4) ; CHECK: [[GEP15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CHECK: G_STORE [[LOAD8]](s128), [[GEP15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4) + ; CHECK: G_STORE [[LOAD8]](s128), [[GEP15]](p0) :: (store (s128) into %ir.0 + 127, align 1, basealign 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 143 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir index c4444731fbc7e..1f931221b2a2c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir @@ -55,12 +55,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = COPY $x2 - G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... @@ -76,25 +76,25 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP1]](p0) :: (load 16 from %ir.1 + 32, align 4) - ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP2]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 48 - G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... @@ -111,12 +111,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 96 - G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... @@ -132,31 +132,31 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP1]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.1 + 48) - ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.1 + 48) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[GEP4]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD4]](p0) :: (store (s128) into %ir.0 + 32, align 4) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK: G_STORE [[LOAD3]](s32), [[GEP5]](p0) :: (store 4 into %ir.0 + 48) + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.0 + 48) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 52 - G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir index ac5e9f7317e7f..1e80895d91aea 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir @@ -69,14 +69,14 @@ body: | ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32) - ; CHECK: G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[ZEXT]](s64), 1 :: (store 1 into %ir.dst) + ; CHECK: G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[ZEXT]](s64), 1 :: (store (s8) into %ir.dst) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s32) = COPY $w2 %3:_(s8) = G_TRUNC %1(s32) %4:_(s64) = G_ZEXT %2(s32) - G_MEMSET %0(p0), %3(s8), %4(s64), 1 :: (store 1 into %ir.dst) + G_MEMSET %0(p0), %3(s8), %4(s64), 1 :: (store (s8) into %ir.dst) RET_ReallyLR ... @@ -96,16 +96,16 @@ body: | ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] - ; CHECK: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store 8 into %ir.dst, align 1) + ; CHECK: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.dst + 8, align 1) + ; CHECK: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %3:_(s64) = G_CONSTANT i64 16 %2:_(s8) = G_TRUNC %1(s32) - G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store 1 into %ir.dst) + G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store (s8) into %ir.dst) RET_ReallyLR ... @@ -122,22 +122,22 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store 16 into %ir.dst, align 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (s128) into %ir.dst, align 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into %ir.dst + 16, align 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (s128) into %ir.dst + 16, align 1) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store 16 into %ir.dst + 32, align 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (s128) into %ir.dst + 32, align 1) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store 16 into %ir.dst + 48, align 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (s128) into %ir.dst + 48, align 1) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 0 %3:_(s64) = G_CONSTANT i64 64 %2:_(s8) = G_TRUNC %1(s32) - G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store 1 into %ir.dst) + G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store (s8) into %ir.dst) RET_ReallyLR ... @@ -154,15 +154,15 @@ body: | ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072 - ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store 8 into %ir.dst, align 1) + ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.dst + 8, align 1) + ; CHECK: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s8) = G_CONSTANT i8 64 %2:_(s64) = G_CONSTANT i64 16 - G_MEMSET %0(p0), %1(s8), %2(s64), 1 :: (store 1 into %ir.dst) + G_MEMSET %0(p0), %1(s8), %2(s64), 1 :: (store (s8) into %ir.dst) RET_ReallyLR ... @@ -183,22 +183,22 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MUL]](s64), [[MUL]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store 16 into %ir.dst, align 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[COPY]](p0) :: (store (s128) into %ir.dst, align 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into %ir.dst + 16, align 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD]](p0) :: (store (s128) into %ir.dst + 16, align 1) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store 16 into %ir.dst + 32, align 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (s128) into %ir.dst + 32, align 1) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store 16 into %ir.dst + 44, align 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[PTR_ADD2]](p0) :: (store (s128) into %ir.dst + 44, align 1) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %3:_(s64) = G_CONSTANT i64 60 %2:_(s8) = G_TRUNC %1(s32) - G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store 1 into %ir.dst) + G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store (s8) into %ir.dst) RET_ReallyLR ... @@ -214,19 +214,19 @@ body: | ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4629771061636907072 - ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store 8 into %ir.dst, align 1) + ; CHECK: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.dst + 8, align 1) + ; CHECK: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1) ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[TRUNC]](s16), [[PTR_ADD1]](p0) :: (store 2 into %ir.dst + 16, align 1) + ; CHECK: G_STORE [[TRUNC]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + 16, align 1) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s8) = G_CONSTANT i8 64 %2:_(s64) = G_CONSTANT i64 18 - G_MEMSET %0(p0), %1(s8), %2(s64), 1 :: (store 1 into %ir.dst) + G_MEMSET %0(p0), %1(s8), %2(s64), 1 :: (store (s8) into %ir.dst) RET_ReallyLR ... @@ -245,14 +245,14 @@ body: | ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s8) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] - ; CHECK: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store 8 into %ir.dst, align 1) + ; CHECK: G_STORE [[MUL]](s64), [[COPY]](p0) :: (store (s64) into %ir.dst, align 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.dst + 8, align 1) + ; CHECK: G_STORE [[MUL]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %3:_(s64) = G_CONSTANT i64 16 %2:_(s8) = G_TRUNC %1(s32) - G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store 1 into %ir.dst) + G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store (s8) into %ir.dst) RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir index cf74772a125ec..c62210fbfe2d3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir @@ -42,18 +42,18 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) - ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store (s128) into %ir.0 + 16, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 32 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... @@ -75,12 +75,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 36 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll index b1493cfd80d10..5076b3c9d5e9a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll @@ -18,12 +18,12 @@ declare void @use_s128(i128 %a, i128 %b) ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $w5 ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $w6 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 -; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.2, align 16) +; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 -; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 8 from %fixed-stack.1) +; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.1) ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 -; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) +; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-atomic-metadata.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-atomic-metadata.ll index e1c59711c5af8..826291973648a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-atomic-metadata.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-atomic-metadata.ll @@ -7,7 +7,7 @@ define i32 @atomicrmw_volatile(i32* %ptr) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (volatile load store monotonic 4 on %ir.ptr) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (volatile load store monotonic (s32) on %ir.ptr) ; CHECK: $w0 = COPY [[ATOMICRMW_ADD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %oldval = atomicrmw volatile add i32* %ptr, i32 1 monotonic @@ -20,7 +20,7 @@ define i32 @atomicrmw_falkor(i32* %ptr) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: ("aarch64-strided-access" load store monotonic 4 on %ir.ptr) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: ("aarch64-strided-access" load store monotonic (s32) on %ir.ptr) ; CHECK: $w0 = COPY [[ATOMICRMW_ADD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %oldval = atomicrmw add i32* %ptr, i32 1 monotonic, !falkor.strided.access !0 @@ -33,7 +33,7 @@ define i32 @atomicrmw_volatile_falkor(i32* %ptr) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (volatile "aarch64-strided-access" load store monotonic 4 on %ir.ptr) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (volatile "aarch64-strided-access" load store monotonic (s32) on %ir.ptr) ; CHECK: $w0 = COPY [[ATOMICRMW_ADD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %oldval = atomicrmw volatile add i32* %ptr, i32 1 monotonic, !falkor.strided.access !0 @@ -47,7 +47,7 @@ define i32 @cmpxchg_volatile(i32* %addr) { ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (volatile load store monotonic monotonic 4 on %ir.addr) + ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (volatile load store monotonic monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) ; CHECK: RET_ReallyLR implicit $w0 %val_success = cmpxchg volatile i32* %addr, i32 0, i32 1 monotonic monotonic @@ -62,7 +62,7 @@ define i32 @cmpxchg_falkor(i32* %addr) { ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: ("aarch64-strided-access" load store monotonic monotonic 4 on %ir.addr) + ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: ("aarch64-strided-access" load store monotonic monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) ; CHECK: RET_ReallyLR implicit $w0 %val_success = cmpxchg i32* %addr, i32 0, i32 1 monotonic monotonic, !falkor.strided.access !0 @@ -77,7 +77,7 @@ define i32 @cmpxchg_volatile_falkor(i32* %addr) { ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (volatile "aarch64-strided-access" load store monotonic monotonic 4 on %ir.addr) + ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (volatile "aarch64-strided-access" load store monotonic monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32) ; CHECK: RET_ReallyLR implicit $w0 %val_success = cmpxchg volatile i32* %addr, i32 0, i32 1 monotonic monotonic, !falkor.strided.access !0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll index c3c97a22b2c2c..2a7c3bc36a8f0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll @@ -101,7 +101,7 @@ continue: ; CHECK: [[BAD]].{{[a-z]+}} (landing-pad): ; CHECK: [[PHI_ELEVEN:%[0-9]+]]:_(s32) = G_PHI [[ELEVEN]](s32), %bb.1 ; CHECK: EH_LABEL -; CHECK: G_STORE [[PHI_ELEVEN]](s32), {{%[0-9]+}}(p0) :: (store 4 into @global_var) +; CHECK: G_STORE [[PHI_ELEVEN]](s32), {{%[0-9]+}}(p0) :: (store (s32) into @global_var) ; CHECK: [[GOOD]].{{[a-z]+}}: ; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PHI diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll index 5511cf08686e0..9ec8c6b737cf2 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-load-metadata.ll @@ -6,7 +6,7 @@ define i32 @load_invariant(i32* %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load 4 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (invariant load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4, !invariant.load !0 @@ -18,7 +18,7 @@ define i32 @load_volatile_invariant(i32* %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile invariant load 4 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (volatile invariant load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %load = load volatile i32, i32* %ptr, align 4, !invariant.load !0 @@ -30,7 +30,7 @@ define i32 @load_dereferenceable(i32* dereferenceable(4) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable load 4 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4 @@ -42,7 +42,7 @@ define i32 @load_dereferenceable_invariant(i32* dereferenceable(4) %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable invariant load 4 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (dereferenceable invariant load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4, !invariant.load !0 @@ -54,7 +54,7 @@ define i32 @load_nontemporal(i32* %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (non-temporal load 4 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (non-temporal load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4, !nontemporal !0 @@ -66,7 +66,7 @@ define i32 @load_falkor_strided_access(i32* %ptr) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load 4 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load (s32) from %ir.ptr) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %load = load i32, i32* %ptr, align 4, !falkor.strided.access !0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll index 60eaea4e287bf..1a6fdcb47e304 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll @@ -10,8 +10,8 @@ define void @local_escape() { ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.b - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.a) - ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.b) + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.a) + ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.b) ; CHECK: RET_ReallyLR %a = alloca i32 %b = alloca i32, i32 2 @@ -31,8 +31,8 @@ define void @local_escape_insert_point() { ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.b - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.a) - ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.b) + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.a) + ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.b) ; CHECK: RET_ReallyLR %a = alloca i32 %b = alloca i32, i32 2 @@ -51,7 +51,7 @@ define void @local_escape_strip_ptr_cast() { ; CHECK: LOCAL_ESCAPE , %stack.0.a ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.cast) + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.cast) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: $x0 = COPY [[FRAME_INDEX]](p0) ; CHECK: BL @foo, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-max-address-space.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-max-address-space.ll index 5bbc0ede90c8f..82bb27a34048b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-max-address-space.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-max-address-space.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: name: store_max_address_space ; CHECK: %0:_(p16777215) = COPY $x0 -; CHECK: G_STORE %1(s32), %0(p16777215) :: (store 4 into %ir.ptr, addrspace 16777215) +; CHECK: G_STORE %1(s32), %0(p16777215) :: (store (s32) into %ir.ptr, addrspace 16777215) define void @store_max_address_space(i32 addrspace(16777215)* %ptr) { store i32 0, i32 addrspace(16777215)* %ptr ret void @@ -11,7 +11,7 @@ define void @store_max_address_space(i32 addrspace(16777215)* %ptr) { ; CHECK-LABEL: name: store_max_address_space_vector ; CHECK: %0:_(<2 x p16777215>) = COPY $q0 ; CHECK: %1:_(p16777215) = G_EXTRACT_VECTOR_ELT %0(<2 x p16777215>), %2(s64) -; CHECK: %1(p16777215) :: (store 4 into %ir.elt0, addrspace 16777215) +; CHECK: %1(p16777215) :: (store (s32) into %ir.elt0, addrspace 16777215) define void @store_max_address_space_vector(<2 x i32 addrspace(16777215)*> %vptr) { %elt0 = extractelement <2 x i32 addrspace(16777215)*> %vptr, i32 0 store i32 0, i32 addrspace(16777215)* %elt0 @@ -19,7 +19,7 @@ define void @store_max_address_space_vector(<2 x i32 addrspace(16777215)*> %vptr } ; CHECK-LABEL: name: max_address_space_vector_max_num_elts -; CHECK: %0:_(<65535 x p16777215>) = G_LOAD %1(p0) :: (volatile load 524280 from `<65535 x i32 addrspace(16777215)*>* undef`, align 524288) +; CHECK: %0:_(<65535 x p16777215>) = G_LOAD %1(p0) :: (volatile load (<65535 x p16777215>) from `<65535 x i32 addrspace(16777215)*>* undef`, align 524288) define void @max_address_space_vector_max_num_elts() { %load = load volatile <65535 x i32 addrspace(16777215)*>, <65535 x i32 addrspace(16777215)*>* undef ret void diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll index 76e2b574c6c6e..12a8f819661c8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll @@ -9,7 +9,7 @@ define void @copy(i8* %dst, i8* %src) { ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) @@ -24,7 +24,7 @@ define void @inline_copy(i8* %dst, i8* %src) { ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store (s8) into %ir.dst), (load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) @@ -39,7 +39,7 @@ define void @copy_volatile(i8* %dst, i8* %src) { ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (volatile store (s8) into %ir.dst), (volatile load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) @@ -54,7 +54,7 @@ define void @inline_copy_volatile(i8* %dst, i8* %src) { ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store (s8) into %ir.dst), (volatile load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) @@ -69,7 +69,7 @@ define void @tail_copy(i8* %dst, i8* %src) { ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (store (s8) into %ir.dst), (load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) @@ -84,7 +84,7 @@ define void @tail_inline_copy(i8* %dst, i8* %src) { ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store (s8) into %ir.dst), (load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) @@ -99,7 +99,7 @@ define void @tail_copy_volatile(i8* %dst, i8* %src) { ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (volatile store (s8) into %ir.dst), (volatile load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) @@ -114,7 +114,7 @@ define void @tail_inline_copy_volatile(i8* %dst, i8* %src) { ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store (s8) into %ir.dst), (volatile load (s8) from %ir.src) ; CHECK: RET_ReallyLR entry: tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-evt-bug47619.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-evt-bug47619.ll index 6090f76814d1c..92ef424a9283e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-evt-bug47619.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-evt-bug47619.ll @@ -17,7 +17,7 @@ define i3 @bug47619(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %a ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) ; CHECK: [[TRUNC:%[0-9]+]]:_(s3) = G_TRUNC [[LOAD]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s3) ; CHECK: $w0 = COPY [[ANYEXT]](s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll index 00eaeb9ae7a34..ff0843175b5ed 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stack-objects.ll @@ -13,12 +13,12 @@ define void @stack_passed_i64(i64 %arg, i64 %arg1, i64 %arg2, i64 %arg3, i64 %ar i64 %arg7, i64 %arg8, i64* byval(i64) %arg9) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX1]](p0) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY8]](p0) :: (dereferenceable load 8 from %ir.arg9) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY8]](p0) :: (dereferenceable load (s64) from %ir.arg9) ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD1]], [[LOAD]] - ; CHECK: G_STORE [[ADD]](s64), [[COPY8]](p0) :: (volatile store 8 into %ir.arg9) + ; CHECK: G_STORE [[ADD]](s64), [[COPY8]](p0) :: (volatile store (s64) into %ir.arg9) ; CHECK: RET_ReallyLR %load = load i64, i64* %arg9 %add = add i64 %load, %arg8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stackprotect-check.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stackprotect-check.ll index 3fc5a528bd613..144b7b228059f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stackprotect-check.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-stackprotect-check.ll @@ -27,12 +27,12 @@ define void @foo() ssp { ; CHECK-MIR: bb.1.entry: ; CHECK-MIR: %0:_(p0) = G_FRAME_INDEX %stack.0.StackGuardSlot -; CHECK-MIR-NEXT: %1:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) -; CHECK-MIR-NEXT: %2:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) -; CHECK-MIR-NEXT: G_STORE %2(p0), %0(p0) :: (volatile store 8 into %stack.0.StackGuardSlot) +; CHECK-MIR-NEXT: %1:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load (p0) from @__stack_chk_guard) +; CHECK-MIR-NEXT: %2:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load (p0) from @__stack_chk_guard) +; CHECK-MIR-NEXT: G_STORE %2(p0), %0(p0) :: (volatile store (p0) into %stack.0.StackGuardSlot) ; CHECK-MIR-NEXT: %3:_(p0) = G_FRAME_INDEX %stack.1.buf -; CHECK-MIR-NEXT: %4:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) -; CHECK-MIR-NEXT: %5:_(p0) = G_LOAD %0(p0) :: (volatile dereferenceable load 8 from %ir.StackGuardSlot) +; CHECK-MIR-NEXT: %4:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load (p0) from @__stack_chk_guard) +; CHECK-MIR-NEXT: %5:_(p0) = G_LOAD %0(p0) :: (volatile dereferenceable load (p0) from %ir.StackGuardSlot) ; CHECK-MIR-NEXT: %6:_(s1) = G_ICMP intpred(eq), %4(p0), %5 ; CHECK-MIR-NEXT: G_BRCOND %6(s1), %bb.2 ; CHECK-MIR-NEXT: G_BR %bb.3 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll index fcabd5bb40d54..10e394dda64b4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll @@ -7,7 +7,7 @@ define void @store_nontemporal(i32* dereferenceable(4) %ptr) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (non-temporal store 4 into %ir.ptr) + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (non-temporal store (s32) into %ir.ptr) ; CHECK: RET_ReallyLR store i32 0, i32* %ptr, align 4, !nontemporal !0 ret void @@ -19,7 +19,7 @@ define void @store_dereferenceable(i32* dereferenceable(4) %ptr) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store 4 into %ir.ptr) + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.ptr) ; CHECK: RET_ReallyLR store i32 0, i32* %ptr, align 4 ret void @@ -31,7 +31,7 @@ define void @store_volatile_dereferenceable(i32* dereferenceable(4) %ptr) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (volatile store 4 into %ir.ptr) + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (volatile store (s32) into %ir.ptr) ; CHECK: RET_ReallyLR store volatile i32 0, i32* %ptr, align 4 ret void @@ -43,7 +43,7 @@ define void @store_falkor_strided_access(i32* %ptr) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store 4 into %ir.ptr) + ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store (s32) into %ir.ptr) ; CHECK: RET_ReallyLR store i32 0, i32* %ptr, align 4, !falkor.strided.access !0 ret void diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll index 7fbb08d4bc6ab..90f4471e99169 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-bittest.ll @@ -202,7 +202,7 @@ define void @bit_test_block_incomplete_phi() { ; CHECK: successors: ; CHECK: bb.3.if.end: ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[DEF1]](p0) :: (load 8 from `i8** undef`) + ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[DEF1]](p0) :: (load (p0) from `i8** undef`) ; CHECK: bb.4.return: ; CHECK: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.3, [[C1]](s1), %bb.5 ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll index 33417d8a1fc66..2f43410492163 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-tbaa.ll @@ -8,8 +8,8 @@ bb: %tmp5 = getelementptr i16, i16* null, i64 2 %tmp6 = load i16, i16* %tmp1, align 2, !tbaa !0 store i16 %tmp6, i16* %tmp5, align 2, !tbaa !0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD %{{[0-9]+}}(p0) :: (load 2 from %ir.tmp1, !tbaa !0) - ; CHECK: G_STORE [[LOAD]](s16), %{{[0-9]+}}(p0) :: (store 2 into %ir.tmp5, !tbaa !0) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD %{{[0-9]+}}(p0) :: (load (s16) from %ir.tmp1, !tbaa !0) + ; CHECK: G_STORE [[LOAD]](s16), %{{[0-9]+}}(p0) :: (store (s16) into %ir.tmp5, !tbaa !0) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/labels-are-not-dead.mir b/llvm/test/CodeGen/AArch64/GlobalISel/labels-are-not-dead.mir index ae7c7d3d6c6dd..1897907ad465e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/labels-are-not-dead.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/labels-are-not-dead.mir @@ -18,8 +18,8 @@ body: | ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4) - ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store 4) + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32)) + ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32)) ; CHECK: RET_ReallyLR LOCAL_ESCAPE , %stack.0 LOCAL_ESCAPE , %stack.1 @@ -27,8 +27,8 @@ body: | %3:_(s32) = G_CONSTANT i32 13 %0:_(p0) = G_FRAME_INDEX %stack.0 %1:_(p0) = G_FRAME_INDEX %stack.1 - G_STORE %2(s32), %0(p0) :: (store 4) - G_STORE %3(s32), %1(p0) :: (store 4) + G_STORE %2(s32), %0(p0) :: (store (s32)) + G_STORE %3(s32), %1(p0) :: (store (s32)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir index 30897eda81532..ecb7a2bae1625 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir @@ -19,12 +19,12 @@ body: | ; CHECK-LABEL: name: cmpxchg_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s8) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store monotonic 1 on %ir.addr) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s8) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store monotonic (s8) on %ir.addr) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ATOMICRMW_ADD]](s8) ; CHECK: $w0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $x0 %1:_(s8) = G_CONSTANT i8 1 - %2:_(s8) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 1 on %ir.addr) + %2:_(s8) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s8) on %ir.addr) %3:_(s32) = G_ANYEXT %2 $w0 = COPY %3(s32) ... @@ -38,12 +38,12 @@ body: | ; CHECK-LABEL: name: cmpxchg_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s16) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store monotonic 2 on %ir.addr) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s16) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store monotonic (s16) on %ir.addr) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ATOMICRMW_ADD]](s16) ; CHECK: $w0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $x0 %1:_(s16) = G_CONSTANT i16 1 - %2:_(s16) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 2 on %ir.addr) + %2:_(s16) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s16) on %ir.addr) %3:_(s32) = G_ANYEXT %2 $w0 = COPY %3(s32) ... @@ -57,11 +57,11 @@ body: | ; CHECK-LABEL: name: cmpxchg_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store monotonic 4 on %ir.addr) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[ATOMICRMW_ADD]](s32) %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 4 on %ir.addr) + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -74,10 +74,10 @@ body: | ; CHECK-LABEL: name: cmpxchg_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s64) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s64) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store monotonic (s64) on %ir.addr) ; CHECK: $x0 = COPY [[ATOMICRMW_ADD]](s64) %0:_(p0) = COPY $x0 %1:_(s64) = G_CONSTANT i64 1 - %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 8 on %ir.addr) + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s64) on %ir.addr) $x0 = COPY %2(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir index 7f14a42fea84a..f332237897793 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-blockaddress.mir @@ -28,14 +28,14 @@ body: | ; CHECK: [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) ; CHECK: [[ADRP:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @addr ; CHECK: [[ADD_LOW:%[0-9]+]]:_(p0) = G_ADD_LOW [[ADRP]](p0), target-flags(aarch64-pageoff, aarch64-nc) @addr - ; CHECK: G_STORE [[BLOCK_ADDR]](p0), [[ADD_LOW]](p0) :: (store 8 into @addr) + ; CHECK: G_STORE [[BLOCK_ADDR]](p0), [[ADD_LOW]](p0) :: (store (p0) into @addr) ; CHECK: G_BRINDIRECT [[BLOCK_ADDR]](p0) ; CHECK: bb.1.block (address-taken): ; CHECK: RET_ReallyLR bb.1 (%ir-block.0): %0:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) %1:_(p0) = G_GLOBAL_VALUE @addr - G_STORE %0(p0), %1(p0) :: (store 8 into @addr) + G_STORE %0(p0), %1(p0) :: (store (p0) into @addr) G_BRINDIRECT %0(p0) bb.2.block (address-taken): diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir index 9e4908396d9d2..e7a5a94541ff0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir @@ -22,7 +22,7 @@ body: | ; CHECK: RET_ReallyLR %ptr:_(p0) = COPY $x0 %width:_(s64) = COPY $x1 - G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) RET_ReallyLR ... @@ -41,5 +41,5 @@ body: | ; CHECK: TCRETURNdi &bzero, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1 %ptr:_(p0) = COPY $x0 %width:_(s64) = COPY $x1 - G_BZERO %ptr(p0), %width(s64), 1 :: (store 4) + G_BZERO %ptr(p0), %width(s64), 1 :: (store (s32)) RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir index 74975638789e1..5c03edd472f4d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir @@ -39,10 +39,10 @@ body: | ; CHECK-NOLSE: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64) ; CHECK-NOLSE: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64) ; CHECK-NOLSE: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64) - ; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32 = CMP_SWAP_128 [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire 16) + ; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32 = CMP_SWAP_128 [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128)) ; CHECK-NOLSE: [[COPY9:%[0-9]+]]:gpr64 = COPY %16 ; CHECK-NOLSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64) - ; CHECK-NOLSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK-NOLSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128)) ; CHECK-NOLSE: RET_ReallyLR ; CHECK-LSE-LABEL: name: compare_swap_128 ; CHECK-LSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4 @@ -53,11 +53,11 @@ body: | ; CHECK-LSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 ; CHECK-LSE: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64 ; CHECK-LSE: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64 - ; CHECK-LSE: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire 16) + ; CHECK-LSE: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire (s128)) ; CHECK-LSE: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0 ; CHECK-LSE: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64 ; CHECK-LSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64) - ; CHECK-LSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK-LSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128)) ; CHECK-LSE: RET_ReallyLR %0:_(p0) = COPY $x0 %3:_(s64) = COPY $x1 @@ -66,8 +66,8 @@ body: | %5:_(s64) = COPY $x3 %6:_(s64) = COPY $x4 %2:_(s128) = G_MERGE_VALUES %5(s64), %6(s64) - %7:_(s128), %8:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p0), %1, %2 :: (load store acquire acquire 16) - G_STORE %7(s128), %0(p0) :: (store 16) + %7:_(s128), %8:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p0), %1, %2 :: (load store acquire acquire (s128)) + G_STORE %7(s128), %0(p0) :: (store (s128)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir index 2c77885115901..8fdb39a89daa6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir @@ -19,7 +19,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic (s64) on %ir.addr) ; CHECK: [[SRES:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[RES]](s32), [[CMP]] ; CHECK: [[SRES32:%[0-9]+]]:_(s32) = COPY [[SRES]] ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[RES]], [[SRES32]] @@ -27,7 +27,7 @@ body: | %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic (s64) on %ir.addr) %5:_(s32) = G_ANYEXT %4 %6:_(s32) = G_MUL %3, %5 $w0 = COPY %6(s32) @@ -44,7 +44,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CMP:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic (s64) on %ir.addr) ; CHECK: [[SRES:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[RES]](s64), [[CMP]] ; CHECK: [[SRES64:%[0-9]+]]:_(s64) = G_ANYEXT [[SRES]] ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[RES]], [[SRES64]] @@ -52,7 +52,7 @@ body: | %0:_(p0) = COPY $x0 %1:_(s64) = G_CONSTANT i64 0 %2:_(s64) = G_CONSTANT i64 1 - %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic (s64) on %ir.addr) %5:_(s64) = G_ANYEXT %4 %6:_(s64) = G_MUL %3, %5 $x0 = COPY %6(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir index d7e2af1c6c955..a884901fd4764 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir @@ -20,13 +20,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s8) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic 1 on %ir.addr) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s8) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s8) on %ir.addr) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ATOMIC_CMPXCHG]](s8) ; CHECK: $w0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $x0 %1:_(s8) = G_CONSTANT i8 0 %2:_(s8) = G_CONSTANT i8 1 - %3:_(s8) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 1 on %ir.addr) + %3:_(s8) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s8) on %ir.addr) %4:_(s32) = G_ANYEXT %3 $w0 = COPY %4(s32) ... @@ -41,13 +41,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s16) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic 2 on %ir.addr) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s16) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s16) on %ir.addr) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ATOMIC_CMPXCHG]](s16) ; CHECK: $w0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $x0 %1:_(s16) = G_CONSTANT i16 0 %2:_(s16) = G_CONSTANT i16 1 - %3:_(s16) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 2 on %ir.addr) + %3:_(s16) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s16) on %ir.addr) %4:_(s32) = G_ANYEXT %3 $w0 = COPY %4(s32) ... @@ -62,12 +62,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic 4 on %ir.addr) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[ATOMIC_CMPXCHG]](s32) %0:_(p0) = COPY $x0 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_CONSTANT i32 1 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 4 on %ir.addr) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32) on %ir.addr) $w0 = COPY %3(s32) ... @@ -81,11 +81,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic (s64) on %ir.addr) ; CHECK: $x0 = COPY [[ATOMIC_CMPXCHG]](s64) %0:_(p0) = COPY $x0 %1:_(s64) = G_CONSTANT i64 0 %2:_(s64) = G_CONSTANT i64 1 - %3:_(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %3:_(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s64) on %ir.addr) $x0 = COPY %3(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll index 23797a4878cfe..208cf683197f4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll @@ -18,8 +18,8 @@ declare void @_Unwind_Resume(i8*) ; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[SEL_PTR_INT:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]](p0) -; CHECK: G_STORE [[PTR]](p0), %0(p0) :: (store 8 into %ir.exn.slot) -; CHECK: G_STORE [[SEL_PTR_INT]](s32), %1(p0) :: (store 4 into %ir.ehselector.slot) +; CHECK: G_STORE [[PTR]](p0), %0(p0) :: (store (p0) into %ir.exn.slot) +; CHECK: G_STORE [[SEL_PTR_INT]](s32), %1(p0) :: (store (s32) into %ir.ehselector.slot) define void @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { %exn.slot = alloca i8* diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir index 22873b7ff9679..a3fabb573d111 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir @@ -53,10 +53,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_extload ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s32) = G_LOAD %0 :: (load 1) + %1:_(s32) = G_LOAD %0 :: (load (s8)) $w0 = COPY %1 ... --- @@ -67,11 +67,11 @@ body: | ; CHECK-LABEL: name: sext_i32_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 4 from %ir.ptr) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s32) from %ir.ptr) ; CHECK: $x0 = COPY [[SEXTLOAD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %0:_(p0) = COPY $x0 - %2:_(s64) = G_SEXTLOAD %0(p0) :: (load 4 from %ir.ptr) + %2:_(s64) = G_SEXTLOAD %0(p0) :: (load (s32) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -84,11 +84,11 @@ body: | ; CHECK-LABEL: name: sext_i16_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 2 from %ir.ptr) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.ptr) ; CHECK: $x0 = COPY [[SEXTLOAD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %0:_(p0) = COPY $x0 - %2:_(s64) = G_SEXTLOAD %0(p0) :: (load 2 from %ir.ptr) + %2:_(s64) = G_SEXTLOAD %0(p0) :: (load (s16) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -101,11 +101,11 @@ body: | ; CHECK-LABEL: name: sext_i8_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.ptr) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.ptr) ; CHECK: $x0 = COPY [[SEXTLOAD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %0:_(p0) = COPY $x0 - %2:_(s64) = G_SEXTLOAD %0(p0) :: (load 1 from %ir.ptr) + %2:_(s64) = G_SEXTLOAD %0(p0) :: (load (s8) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -118,11 +118,11 @@ body: | ; CHECK-LABEL: name: zext_i32_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load 4 from %ir.ptr) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s32) from %ir.ptr) ; CHECK: $x0 = COPY [[ZEXTLOAD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %0:_(p0) = COPY $x0 - %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load 4 from %ir.ptr) + %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load (s32) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -135,11 +135,11 @@ body: | ; CHECK-LABEL: name: zext_i16_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load 2 from %ir.ptr) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.ptr) ; CHECK: $x0 = COPY [[ZEXTLOAD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %0:_(p0) = COPY $x0 - %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load 2 from %ir.ptr) + %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load (s16) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -152,11 +152,11 @@ body: | ; CHECK-LABEL: name: zext_i8_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.ptr) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.ptr) ; CHECK: $x0 = COPY [[ZEXTLOAD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %0:_(p0) = COPY $x0 - %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.ptr) + %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load (s8) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir index 1c8c45b691190..b213995a09e89 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir @@ -179,16 +179,16 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 ; CHECK: %idx:_(s64) = COPY $x0 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 - ; CHECK: G_STORE [[COPY]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store 16 into %stack.0, align 32) + ; CHECK: G_STORE [[COPY]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) - ; CHECK: G_STORE [[COPY1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into %stack.0 + 16, basealign 32) + ; CHECK: G_STORE [[COPY1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (s128) into %stack.0 + 16, basealign 32) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %idx, [[C1]] ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C2]] ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64)) ; CHECK: $x0 = COPY [[LOAD]](s64) ; CHECK: RET_ReallyLR %0:_(<2 x s64>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir index 5fcc443d71759..cfefa069241ba 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir @@ -19,15 +19,15 @@ body: | ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK: [[FPEXT:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>) ; CHECK: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>) - ; CHECK: G_STORE [[FPEXT]](<2 x s64>), [[COPY1]](p0) :: (store 16, align 32) + ; CHECK: G_STORE [[FPEXT]](<2 x s64>), [[COPY1]](p0) :: (store (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: G_STORE [[FPEXT1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[FPEXT1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %0:_(<4 x s32>) = COPY $q0 %1:_(p0) = COPY $x0 %2:_(<4 x s64>) = G_FPEXT %0(<4 x s32>) - G_STORE %2(<4 x s64>), %1(p0) :: (store 32) + G_STORE %2(<4 x s64>), %1(p0) :: (store (<4 x s64>)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir index 4e257c6efaeb8..02cf7db0a144d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir @@ -117,10 +117,10 @@ body: | ; CHECK: [[COPY5:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC]](<2 x s32>), [[FPTRUNC1]](<2 x s32>) ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC2]](<2 x s32>), [[FPTRUNC3]](<2 x s32>) - ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY5]](p0) :: (store 16, align 32) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY5]](p0) :: (store (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C]](s64) - ; CHECK: G_STORE [[CONCAT_VECTORS1]](<4 x s32>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[CONCAT_VECTORS1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %2:_(<2 x s64>) = COPY $q0 %3:_(<2 x s64>) = COPY $q1 @@ -130,6 +130,6 @@ body: | %1:_(p0) = COPY $x0 %6:_(<8 x s32>) = G_FPTRUNC %0(<8 x s64>) %7:_(p0) = COPY $x0 - G_STORE %6(<8 x s32>), %7(p0) :: (store 32) + G_STORE %6(<8 x s32>), %7(p0) :: (store (<8 x s32>)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir index 76b3f346f11c4..e450b8c6b6cb6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-fewerElts.mir @@ -12,12 +12,12 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CHECK: G_STORE [[LOAD]](<4 x s32>), [[COPY1]](p0) :: (store 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) + ; CHECK: G_STORE [[LOAD]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>)) %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16) - G_STORE %2(<4 x s32>), %1(p0) :: (store 16) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>)) + G_STORE %2(<4 x s32>), %1(p0) :: (store (<4 x s32>)) ... --- @@ -32,11 +32,11 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) - ; CHECK: G_STORE [[LOAD]](<2 x s64>), [[COPY1]](p0) :: (store 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) + ; CHECK: G_STORE [[LOAD]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>)) %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16) - G_STORE %2(<2 x s64>), %1(p0) :: (store 16) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>)) + G_STORE %2(<2 x s64>), %1(p0) :: (store (<2 x s64>)) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr-debugloc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr-debugloc.mir index 4ab9a6c3ab06a..e8b4de0518519 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr-debugloc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr-debugloc.mir @@ -46,7 +46,7 @@ body: | ; CHECK: RET_ReallyLR debug-location !DILocation(line: 5, column: 1 %0:_(<2 x p0>) = COPY $q0 %1:_(p0) = COPY $x0 - G_STORE %0(<2 x p0>), %1(p0), debug-location !11 :: (store 16) + G_STORE %0(<2 x p0>), %1(p0), debug-location !11 :: (store (<2 x p0>)) RET_ReallyLR debug-location !12 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr.mir index 5b32fd51f58c6..9e690f50e247d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector-of-ptr.mir @@ -32,11 +32,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<2 x p0>) - ; CHECK: G_STORE [[BITCAST]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.ptr) + ; CHECK: G_STORE [[BITCAST]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x p0>) into %ir.ptr) ; CHECK: RET_ReallyLR %0:_(<2 x p0>) = COPY $q0 %1:_(p0) = COPY $x0 - G_STORE %0(<2 x p0>), %1(p0) :: (store 16 into %ir.ptr) + G_STORE %0(<2 x p0>), %1(p0) :: (store (<2 x p0>) into %ir.ptr) RET_ReallyLR ... @@ -52,12 +52,12 @@ body: | ; CHECK-LABEL: name: load_v2p0 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x p0>) from %ir.ptr) ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x p0>) = G_BITCAST [[LOAD]](<2 x s64>) ; CHECK: $q0 = COPY [[BITCAST]](<2 x p0>) ; CHECK: RET_ReallyLR implicit $q0 %0:_(p0) = COPY $x0 - %1:_(<2 x p0>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + %1:_(<2 x p0>) = G_LOAD %0(p0) :: (load (<2 x p0>) from %ir.ptr) $q0 = COPY %1(<2 x p0>) RET_ReallyLR implicit $q0 @@ -76,11 +76,11 @@ body: | ; CHECK-LABEL: name: load_v2p1 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>) from %ir.ptr) ; CHECK: $q0 = COPY [[LOAD]](<2 x p1>) ; CHECK: RET_ReallyLR implicit $q0 %0:_(p0) = COPY $x0 - %1:_(<2 x p1>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + %1:_(<2 x p1>) = G_LOAD %0(p0) :: (load (<2 x p1>) from %ir.ptr) $q0 = COPY %1(<2 x p1>) RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir index 634bcb79bc39d..6c4266fc43a50 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -8,48 +8,48 @@ body: | ; CHECK-LABEL: name: test_load ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s1)) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) ; CHECK: $w0 = COPY [[ANYEXT]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) ; CHECK: $w0 = COPY [[ANYEXT1]](s32) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load 2) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD2]](s16) ; CHECK: $w0 = COPY [[ANYEXT2]](s32) ; CHECK: $w0 = COPY [[ANYEXT1]](s32) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; CHECK: $x0 = COPY [[LOAD3]](s64) - ; CHECK: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load 8) + ; CHECK: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0)) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[LOAD4]](p0) ; CHECK: $x0 = COPY [[PTRTOINT]](s64) - ; CHECK: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CHECK: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD5]](<2 x s32>) ; CHECK: $x0 = COPY [[BITCAST]](s64) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128)) ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD6]](s128) ; CHECK: $x0 = COPY [[TRUNC]](s64) %0:_(p0) = COPY $x0 - %1:_(s1) = G_LOAD %0(p0) :: (load 1) + %1:_(s1) = G_LOAD %0(p0) :: (load (s1)) %2:_(s32) = G_ANYEXT %1(s1) $w0 = COPY %2(s32) - %3:_(s8) = G_LOAD %0(p0) :: (load 1) + %3:_(s8) = G_LOAD %0(p0) :: (load (s8)) %4:_(s32) = G_ANYEXT %3(s8) $w0 = COPY %4(s32) - %5:_(s16) = G_LOAD %0(p0) :: (load 2) + %5:_(s16) = G_LOAD %0(p0) :: (load (s16)) %6:_(s32) = G_ANYEXT %5(s16) $w0 = COPY %6(s32) - %7:_(s32) = G_LOAD %0(p0) :: (load 4) + %7:_(s32) = G_LOAD %0(p0) :: (load (s32)) $w0 = COPY %4(s32) - %8:_(s64) = G_LOAD %0(p0) :: (load 8) + %8:_(s64) = G_LOAD %0(p0) :: (load (s64)) $x0 = COPY %8(s64) - %9:_(p0) = G_LOAD %0(p0) :: (load 8) + %9:_(p0) = G_LOAD %0(p0) :: (load (p0)) %10:_(s64) = G_PTRTOINT %9(p0) $x0 = COPY %10(s64) - %11:_(<2 x s32>) = G_LOAD %0(p0) :: (load 8) + %11:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) %12:_(s64) = G_BITCAST %11(<2 x s32>) $x0 = COPY %12(s64) - %13:_(s128) = G_LOAD %0(p0) :: (load 16) + %13:_(s128) = G_LOAD %0(p0) :: (load (s128)) %14:_(s64) = G_TRUNC %13(s128) $x0 = COPY %14(s64) ... @@ -67,33 +67,33 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store 1) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s1)) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY]](p0) :: (store 1) + ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY]](p0) :: (store (s8)) ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_STORE [[TRUNC2]](s16), [[COPY]](p0) :: (store 2) - ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4) + ; CHECK: G_STORE [[TRUNC2]](s16), [[COPY]](p0) :: (store (s16)) + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32)) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) - ; CHECK: G_STORE [[PTRTOINT]](s64), [[COPY]](p0) :: (store 8) - ; CHECK: G_STORE [[COPY]](p0), [[COPY]](p0) :: (store 8) + ; CHECK: G_STORE [[PTRTOINT]](s64), [[COPY]](p0) :: (store (s64)) + ; CHECK: G_STORE [[COPY]](p0), [[COPY]](p0) :: (store (p0)) ; CHECK: [[PTRTOINT1:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[PTRTOINT1]](s64), [[PTRTOINT1]](s64) - ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128)) %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s1) = G_TRUNC %1(s32) - G_STORE %2(s1), %0(p0) :: (store 1) + G_STORE %2(s1), %0(p0) :: (store (s1)) %3:_(s8) = G_TRUNC %1(s32) - G_STORE %3(s8), %0(p0) :: (store 1) + G_STORE %3(s8), %0(p0) :: (store (s8)) %4:_(s16) = G_TRUNC %1(s32) - G_STORE %4(s16), %0(p0) :: (store 2) - G_STORE %1(s32), %0(p0) :: (store 4) + G_STORE %4(s16), %0(p0) :: (store (s16)) + G_STORE %1(s32), %0(p0) :: (store (s32)) %5:_(s64) = G_PTRTOINT %0(p0) - G_STORE %5(s64), %0(p0) :: (store 8) - G_STORE %0(p0), %0(p0) :: (store 8) + G_STORE %5(s64), %0(p0) :: (store (s64)) + G_STORE %0(p0), %0(p0) :: (store (p0)) %6:_(s64) = G_PTRTOINT %0(p0) %7:_(s128) = G_MERGE_VALUES %6(s64), %6 - G_STORE %7(s128), %0(p0) :: (store 16) + G_STORE %7(s128), %0(p0) :: (store (s128)) ... --- name: store_4xi16 @@ -108,11 +108,11 @@ body: | ; CHECK: liveins: $d0, $x0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store 8) + ; CHECK: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store (<4 x s16>)) ; CHECK: RET_ReallyLR %0:_(<4 x s16>) = COPY $d0 %1:_(p0) = COPY $x0 - G_STORE %0(<4 x s16>), %1(p0) :: (store 8) + G_STORE %0(<4 x s16>), %1(p0) :: (store (<4 x s16>)) RET_ReallyLR ... @@ -129,11 +129,11 @@ body: | ; CHECK: liveins: $q0, $x0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: G_STORE [[COPY]](<4 x s32>), [[COPY1]](p0) :: (store 16) + ; CHECK: G_STORE [[COPY]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>)) ; CHECK: RET_ReallyLR %0:_(<4 x s32>) = COPY $q0 %1:_(p0) = COPY $x0 - G_STORE %0(<4 x s32>), %1(p0) :: (store 16) + G_STORE %0(<4 x s32>), %1(p0) :: (store (<4 x s32>)) RET_ReallyLR ... @@ -150,11 +150,11 @@ body: | ; CHECK: liveins: $q0, $x0 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: G_STORE [[COPY]](<8 x s16>), [[COPY1]](p0) :: (store 16) + ; CHECK: G_STORE [[COPY]](<8 x s16>), [[COPY1]](p0) :: (store (<8 x s16>)) ; CHECK: RET_ReallyLR %0:_(<8 x s16>) = COPY $q0 %1:_(p0) = COPY $x0 - G_STORE %0(<8 x s16>), %1(p0) :: (store 16) + G_STORE %0(<8 x s16>), %1(p0) :: (store (<8 x s16>)) RET_ReallyLR ... @@ -171,11 +171,11 @@ body: | ; CHECK: liveins: $q0, $x0 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: G_STORE [[COPY]](<16 x s8>), [[COPY1]](p0) :: (store 16) + ; CHECK: G_STORE [[COPY]](<16 x s8>), [[COPY1]](p0) :: (store (<16 x s8>)) ; CHECK: RET_ReallyLR %0:_(<16 x s8>) = COPY $q0 %1:_(p0) = COPY $x0 - G_STORE %0(<16 x s8>), %1(p0) :: (store 16) + G_STORE %0(<16 x s8>), %1(p0) :: (store (<16 x s8>)) RET_ReallyLR ... @@ -191,11 +191,11 @@ body: | ; CHECK-LABEL: name: load_4xi16 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; CHECK: $d0 = COPY [[LOAD]](<4 x s16>) ; CHECK: RET_ReallyLR implicit $d0 %0:_(p0) = COPY $x0 - %1:_(<4 x s16>) = G_LOAD %0(p0) :: (load 8) + %1:_(<4 x s16>) = G_LOAD %0(p0) :: (load (<4 x s16>)) $d0 = COPY %1(<4 x s16>) RET_ReallyLR implicit $d0 @@ -212,11 +212,11 @@ body: | ; CHECK-LABEL: name: load_4xi32 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) ; CHECK: $q0 = COPY [[LOAD]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 %0:_(p0) = COPY $x0 - %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16) + %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>)) $q0 = COPY %1(<4 x s32>) RET_ReallyLR implicit $q0 @@ -233,11 +233,11 @@ body: | ; CHECK-LABEL: name: load_8xi16 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>)) ; CHECK: $q0 = COPY [[LOAD]](<8 x s16>) ; CHECK: RET_ReallyLR implicit $q0 %0:_(p0) = COPY $x0 - %1:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16) + %1:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>)) $q0 = COPY %1(<8 x s16>) RET_ReallyLR implicit $q0 @@ -254,11 +254,11 @@ body: | ; CHECK-LABEL: name: load_16xi8 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>)) ; CHECK: $q0 = COPY [[LOAD]](<16 x s8>) ; CHECK: RET_ReallyLR implicit $q0 %0:_(p0) = COPY $x0 - %1:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16) + %1:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>)) $q0 = COPY %1(<16 x s8>) RET_ReallyLR implicit $q0 @@ -274,11 +274,11 @@ body: | ; CHECK-LABEL: name: load_8xi8 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[COPY]](p0) :: (load (<8 x s8>)) ; CHECK: $d0 = COPY [[LOAD]](<8 x s8>) ; CHECK: RET_ReallyLR implicit $d0 %0:_(p0) = COPY $x0 - %1:_(<8 x s8>) = G_LOAD %0(p0) :: (load 8) + %1:_(<8 x s8>) = G_LOAD %0(p0) :: (load (<8 x s8>)) $d0 = COPY %1(<8 x s8>) RET_ReallyLR implicit $d0 @@ -295,11 +295,11 @@ body: | ; CHECK: liveins: $x0, $d0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK: G_STORE [[COPY1]](<8 x s8>), [[COPY]](p0) :: (store 8) + ; CHECK: G_STORE [[COPY1]](<8 x s8>), [[COPY]](p0) :: (store (<8 x s8>)) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(<8 x s8>) = COPY $d0 - G_STORE %1(<8 x s8>), %0(p0) :: (store 8) + G_STORE %1(<8 x s8>), %0(p0) :: (store (<8 x s8>)) RET_ReallyLR ... --- @@ -316,14 +316,14 @@ body: | ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s8>), %ptr(p0) :: (store 16, align 32) + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s8>), %ptr(p0) :: (store (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s8>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s8>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %val:_(<32 x s8>) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - G_STORE %val(<32 x s8>), %ptr(p0) :: (store 32) + G_STORE %val(<32 x s8>), %ptr(p0) :: (store (<32 x s8>)) RET_ReallyLR ... --- @@ -340,14 +340,14 @@ body: | ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16) - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s16>), %ptr(p0) :: (store 16, align 32) + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s16>), %ptr(p0) :: (store (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s16>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s16>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %val:_(<16 x s16>) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - G_STORE %val(<16 x s16>), %ptr(p0) :: (store 32) + G_STORE %val(<16 x s16>), %ptr(p0) :: (store (<16 x s16>)) RET_ReallyLR ... --- @@ -364,14 +364,14 @@ body: | ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), %ptr(p0) :: (store 16, align 32) + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), %ptr(p0) :: (store (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %val:_(<8 x s32>) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - G_STORE %val(<8 x s32>), %ptr(p0) :: (store 32) + G_STORE %val(<8 x s32>), %ptr(p0) :: (store (<8 x s32>)) RET_ReallyLR ... --- @@ -386,14 +386,14 @@ body: | ; CHECK: liveins: $x0 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF ; CHECK: %ptr:_(p0) = COPY $x0 - ; CHECK: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store 16, align 32) + ; CHECK: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %val:_(<4 x s64>) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - G_STORE %val(<4 x s64>), %ptr(p0) :: (store 32) + G_STORE %val(<4 x s64>), %ptr(p0) :: (store (<4 x s64>)) RET_ReallyLR ... --- @@ -407,17 +407,17 @@ body: | ; CHECK-LABEL: name: load_32xs8 ; CHECK: liveins: $x0 ; CHECK: %ptr:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD %ptr(p0) :: (load 16, align 32) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD %ptr(p0) :: (load (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) - ; CHECK: G_STORE [[LOAD]](<16 x s8>), %ptr(p0) :: (store 16, align 32) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) + ; CHECK: G_STORE [[LOAD]](<16 x s8>), %ptr(p0) :: (store (s128), align 32) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](<16 x s8>), [[PTR_ADD1]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[LOAD1]](<16 x s8>), [[PTR_ADD1]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %ptr:_(p0) = COPY $x0 - %val:_(<32 x s8>) = G_LOAD %ptr(p0) :: (load 32) - G_STORE %val(<32 x s8>), %ptr(p0) :: (store 32) + %val:_(<32 x s8>) = G_LOAD %ptr(p0) :: (load (<32 x s8>)) + G_STORE %val(<32 x s8>), %ptr(p0) :: (store (<32 x s8>)) RET_ReallyLR ... --- @@ -431,17 +431,17 @@ body: | ; CHECK-LABEL: name: load_16xs16 ; CHECK: liveins: $x0 ; CHECK: %ptr:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD %ptr(p0) :: (load 16, align 32) + ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD %ptr(p0) :: (load (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) - ; CHECK: G_STORE [[LOAD]](<8 x s16>), %ptr(p0) :: (store 16, align 32) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) + ; CHECK: G_STORE [[LOAD]](<8 x s16>), %ptr(p0) :: (store (s128), align 32) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](<8 x s16>), [[PTR_ADD1]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[LOAD1]](<8 x s16>), [[PTR_ADD1]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %ptr:_(p0) = COPY $x0 - %val:_(<16 x s16>) = G_LOAD %ptr(p0) :: (load 32) - G_STORE %val(<16 x s16>), %ptr(p0) :: (store 32) + %val:_(<16 x s16>) = G_LOAD %ptr(p0) :: (load (<16 x s16>)) + G_STORE %val(<16 x s16>), %ptr(p0) :: (store (<16 x s16>)) RET_ReallyLR ... --- @@ -455,17 +455,17 @@ body: | ; CHECK-LABEL: name: load_8xs32 ; CHECK: liveins: $x0 ; CHECK: %ptr:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16, align 32) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) - ; CHECK: G_STORE [[LOAD]](<4 x s32>), %ptr(p0) :: (store 16, align 32) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) + ; CHECK: G_STORE [[LOAD]](<4 x s32>), %ptr(p0) :: (store (s128), align 32) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](<4 x s32>), [[PTR_ADD1]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[LOAD1]](<4 x s32>), [[PTR_ADD1]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %ptr:_(p0) = COPY $x0 - %val:_(<8 x s32>) = G_LOAD %ptr(p0) :: (load 32) - G_STORE %val(<8 x s32>), %ptr(p0) :: (store 32) + %val:_(<8 x s32>) = G_LOAD %ptr(p0) :: (load (<8 x s32>)) + G_STORE %val(<8 x s32>), %ptr(p0) :: (store (<8 x s32>)) RET_ReallyLR ... --- @@ -479,17 +479,17 @@ body: | ; CHECK-LABEL: name: load_4xs64 ; CHECK: liveins: $x0 ; CHECK: %ptr:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr(p0) :: (load 16, align 32) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr(p0) :: (load (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) - ; CHECK: G_STORE [[LOAD]](<2 x s64>), %ptr(p0) :: (store 16, align 32) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) + ; CHECK: G_STORE [[LOAD]](<2 x s64>), %ptr(p0) :: (store (s128), align 32) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](<2 x s64>), [[PTR_ADD1]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[LOAD1]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %ptr:_(p0) = COPY $x0 - %val:_(<4 x s64>) = G_LOAD %ptr(p0) :: (load 32) - G_STORE %val(<4 x s64>), %ptr(p0) :: (store 32) + %val:_(<4 x s64>) = G_LOAD %ptr(p0) :: (load (<4 x s64>)) + G_STORE %val(<4 x s64>), %ptr(p0) :: (store (<4 x s64>)) RET_ReallyLR ... --- @@ -502,20 +502,20 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: %val64:_(s64) = COPY $x2 - ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 1) - ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 2) - ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 1) - ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 2) - ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 4) + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s8)) + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s16)) + ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store (s8)) + ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store (s16)) + ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store (s32)) %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s8) = G_TRUNC %1(s32) %val64:_(s64) = COPY $x2 - G_STORE %1(s32), %0(p0) :: (store 1) - G_STORE %1(s32), %0(p0) :: (store 2) - G_STORE %val64(s64), %0(p0) :: (store 1) - G_STORE %val64(s64), %0(p0) :: (store 2) - G_STORE %val64(s64), %0(p0) :: (store 4) + G_STORE %1(s32), %0(p0) :: (store (s8)) + G_STORE %1(s32), %0(p0) :: (store (s16)) + G_STORE %val64(s64), %0(p0) :: (store (s8)) + G_STORE %val64(s64), %0(p0) :: (store (s16)) + G_STORE %val64(s64), %0(p0) :: (store (s32)) ... --- name: store_6xs64 @@ -527,16 +527,16 @@ body: | ; CHECK: liveins: $x0 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF ; CHECK: %ptr:_(p0) = COPY $x0 - ; CHECK: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store 16) + ; CHECK: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store (s128)) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64) - ; CHECK: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64) - ; CHECK: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store 16 into unknown-address + 32) + ; CHECK: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (s128) into unknown-address + 32) ; CHECK: RET_ReallyLR %val:_(<6 x s64>) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - G_STORE %val(<6 x s64>), %ptr(p0) :: (store 48, align 16) + G_STORE %val(<6 x s64>), %ptr(p0) :: (store (<6 x s64>), align 16) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir index c41f9b6b1ac2e..8589be6f4d884 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir @@ -11,11 +11,11 @@ body: | ; CHECK-LABEL: name: test_load_trunc ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (load 2) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s10)) ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s16) ; CHECK: RET_ReallyLR implicit [[TRUNC]](s1) %0:_(p0) = G_FRAME_INDEX %stack.0 - %1:_(s10) = G_LOAD %0(p0) :: (load 2) + %1:_(s10) = G_LOAD %0(p0) :: (load (s10)) %2:_(s1) = G_TRUNC %1(s10) RET_ReallyLR implicit %2(s1) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memlib-debug-loc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memlib-debug-loc.mir index e26b037cbefe0..9e26abad0bee5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memlib-debug-loc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memlib-debug-loc.mir @@ -54,7 +54,7 @@ body: | %2:_(s32) = COPY $w2 %3:_(s64) = G_ZEXT %2(s32), debug-location !11 %4:_(s8) = G_TRUNC %1(s32), debug-location !11 - G_MEMSET %0(p0), %4(s8), %3(s64), 0, debug-location !11 :: (store 1 into %ir.ptr) + G_MEMSET %0(p0), %4(s8), %3(s64), 0, debug-location !11 :: (store (s8) into %ir.ptr) RET_ReallyLR debug-location !12 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index 20af216aaeb5e..c9c0c9f9fe4cd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -120,15 +120,15 @@ body: | ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3 - ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store 8) - ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store 8) - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load 8) + ; CHECK: G_STORE [[COPY2]](s64), [[FRAME_INDEX]](p0) :: (store (s64)) + ; CHECK: G_STORE [[COPY1]](s64), [[FRAME_INDEX1]](p0) :: (store (s64)) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64)) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (s64)) ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[LOAD]], [[LOAD1]] ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[LOAD1]] ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] - ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store 8, align 1) + ; CHECK: G_STORE [[C]](s64), [[FRAME_INDEX2]](p0) :: (store (s64), align 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C1]] @@ -142,13 +142,13 @@ body: | %3:_(p0) = G_FRAME_INDEX %stack.0 %4:_(p0) = G_FRAME_INDEX %stack.1 %6:_(p0) = G_FRAME_INDEX %stack.3 - G_STORE %2(s64), %3(p0) :: (store 8) - G_STORE %1(s64), %4(p0) :: (store 8) - %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load 8) - %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load 8) + G_STORE %2(s64), %3(p0) :: (store (s64)) + G_STORE %1(s64), %4(p0) :: (store (s64)) + %7:_(s64) = G_LOAD %3(p0) :: (dereferenceable load (s64)) + %8:_(s64) = G_LOAD %4(p0) :: (dereferenceable load (s64)) %9:_(s64), %10:_(s1) = G_UMULO %7, %8 %31:_(s64) = G_CONSTANT i64 0 - G_STORE %31(s64), %6(p0) :: (store 8, align 1) + G_STORE %31(s64), %6(p0) :: (store (s64), align 1) %16:_(s64) = G_ZEXT %10(s1) $x0 = COPY %9(s64) $x1 = COPY %16(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir index 79aa4b2b8dd42..a334322d6b69f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir @@ -24,25 +24,25 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 2 from %ir.ptr, align 4) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.ptr, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.ptr + 2, align 2, basealign 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.ptr + 2, align 2, basealign 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s64) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s64) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store 2 into %ir.ptr2, align 4) - ; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.ptr2 + 2, align 2, basealign 4) + ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16) into %ir.ptr2, align 4) + ; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into %ir.ptr2 + 2, align 2, basealign 4) ; CHECK: $w0 = COPY [[C]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %3:_(s32) = G_CONSTANT i32 0 - %2:_(s24) = G_LOAD %0(p0) :: (load 3 from %ir.ptr, align 4) - G_STORE %2(s24), %1(p0) :: (store 3 into %ir.ptr2, align 4) + %2:_(s24) = G_LOAD %0(p0) :: (load (s24) from %ir.ptr, align 4) + G_STORE %2(s24), %1(p0) :: (store (s24) into %ir.ptr2, align 4) $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir index e6d40306eee6b..508346ca6d5af 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi-insertpt-decrement.mir @@ -43,28 +43,30 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK: bb.1.bb1: ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK: [[PHI:%[0-9]+]]:_(p0) = G_PHI %6(p0), %bb.2, [[DEF]](p0), %bb.0 - ; CHECK: [[PHI1:%[0-9]+]]:_(s16) = G_PHI %20(s16), %bb.2, [[DEF1]](s16), %bb.0 + ; CHECK: [[PHI1:%[0-9]+]]:_(s16) = G_PHI %20(s16), %bb.2, [[DEF2]](s16), %bb.0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[PHI1]](s16) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.3 ; CHECK: bb.2.bb3: ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[INTTOPTR]], [[C1]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[PHI]](p0) :: (load 2 from %ir.lsr.iv) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[INTTOPTR]], [[C2]](s64) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[PHI]](p0) :: (load (s16) from %ir.lsr.iv) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ZEXT]](s32), [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ZEXT]](s32), [[C3]] ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[GEP]](p0) :: (load 2 from %ir.tmp5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from %ir.tmp5) ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ZEXT1]](s32), [[COPY]] - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PHI]], [[C1]](s64) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PHI]], [[C2]](s64) ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ICMP1]](s32) ; CHECK: G_BRCOND [[TRUNC1]](s1), %bb.3 ; CHECK: G_BR %bb.1 @@ -85,9 +87,9 @@ body: | bb.3.bb3: %4:_(s64) = G_CONSTANT i64 4 %5:_(p0) = G_PTR_ADD %2, %4(s64) - %6:_(s16) = G_LOAD %0(p0) :: (load 2 from %ir.lsr.iv) + %6:_(s16) = G_LOAD %0(p0) :: (load (s16) from %ir.lsr.iv) %8:_(s1) = G_ICMP intpred(eq), %6(s16), %7 - %9:_(s16) = G_LOAD %5(p0) :: (load 2 from %ir.tmp5) + %9:_(s16) = G_LOAD %5(p0) :: (load (s16) from %ir.tmp5) %10:_(s1) = G_ICMP intpred(eq), %9(s16), %7 %11:_(p0) = G_PTR_ADD %0, %4(s64) G_BRCOND %8(s1), %bb.4 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir index b9fbd17c07dae..78b1be9190246 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -577,8 +577,8 @@ body: | ; CHECK: [[PHI1:%[0-9]+]]:_(s64) = G_PHI [[DEF]](s64), %bb.0 ; CHECK: [[PHI2:%[0-9]+]]:_(s64) = G_PHI [[COPY]](s64), %bb.0 ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[PHI]](s64), [[PHI1]](s64) - ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store 16) - ; CHECK: G_STORE [[PHI2]](s64), [[COPY1]](p0) :: (store 8) + ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store (s128)) + ; CHECK: G_STORE [[PHI2]](s64), [[COPY1]](p0) :: (store (s64)) ; CHECK: RET_ReallyLR ; Check that the G_MERGE here gets inserted after all the PHIs. bb.0: @@ -593,8 +593,8 @@ body: | bb.1: %3:_(s128) = G_PHI %2(s128), %bb.0 %4:_(s64) = G_PHI %0(s64), %bb.0 - G_STORE %3(s128), %1(p0) :: (store 16) - G_STORE %4(s64), %1(p0) :: (store 8) + G_STORE %3(s128), %1(p0) :: (store (s128)) + G_STORE %4(s64), %1(p0) :: (store (s64)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-add.mir index eba3a38658540..7dce3437871f7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-add.mir @@ -10,13 +10,13 @@ body: | ; CHECK-LABEL: name: add_v16s8 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>)) ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s8) = G_VECREDUCE_ADD [[LOAD]](<16 x s8>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[VECREDUCE_ADD]](s8) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16) + %1:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>)) %2:_(s8) = G_VECREDUCE_ADD %1(<16 x s8>) %3:_(s32) = G_ANYEXT %2(s8) $w0 = COPY %3(s32) @@ -33,13 +33,13 @@ body: | ; CHECK-LABEL: name: add_v8s16 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>)) ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s16) = G_VECREDUCE_ADD [[LOAD]](<8 x s16>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[VECREDUCE_ADD]](s16) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16) + %1:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>)) %2:_(s16) = G_VECREDUCE_ADD %1(<8 x s16>) %3:_(s32) = G_ANYEXT %2(s16) $w0 = COPY %3(s32) @@ -56,12 +56,12 @@ body: | ; CHECK-LABEL: name: add_v4s32 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[LOAD]](<4 x s32>) ; CHECK: $w0 = COPY [[VECREDUCE_ADD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16) + %1:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>)) %2:_(s32) = G_VECREDUCE_ADD %1(<4 x s32>) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 @@ -77,12 +77,12 @@ body: | ; CHECK-LABEL: name: add_v2s64 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s64) = G_VECREDUCE_ADD [[LOAD]](<2 x s64>) ; CHECK: $x0 = COPY [[VECREDUCE_ADD]](s64) ; CHECK: RET_ReallyLR implicit $x0 %0:_(p0) = COPY $x0 - %1:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16) + %1:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>)) %2:_(s64) = G_VECREDUCE_ADD %1(<2 x s64>) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -98,12 +98,12 @@ body: | ; CHECK-LABEL: name: add_v2s32 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[LOAD]](<2 x s32>) ; CHECK: $w0 = COPY [[VECREDUCE_ADD]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load 8) + %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) %2:_(s32) = G_VECREDUCE_ADD %1(<2 x s32>) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir index d2764ff1c53ab..810c4d5f6c360 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir @@ -25,8 +25,8 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.v1ptr) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.v2ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.v1ptr) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.v2ptr) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) ; CHECK: $x0 = COPY [[UV]](s64) @@ -39,14 +39,14 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16 into %ir.v1ptr) + ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128) into %ir.v1ptr) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 - %2:_(s128) = G_LOAD %0(p0) :: (load 16 from %ir.v1ptr) - %3:_(s128) = G_LOAD %1(p0) :: (load 16 from %ir.v2ptr) + %2:_(s128) = G_LOAD %0(p0) :: (load (s128) from %ir.v1ptr) + %3:_(s128) = G_LOAD %1(p0) :: (load (s128) from %ir.v2ptr) %4:_(s128) = G_UDIV %2, %3 - G_STORE %4(s128), %0(p0) :: (store 16 into %ir.v1ptr) + G_STORE %4(s128), %0(p0) :: (store (s128) into %ir.v1ptr) RET_ReallyLR ... @@ -66,8 +66,8 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.v1ptr) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.v2ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.v1ptr) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.v2ptr) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128) ; CHECK: $x0 = COPY [[UV]](s64) @@ -80,14 +80,14 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY2]](s64), [[COPY3]](s64) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16 into %ir.v1ptr) + ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128) into %ir.v1ptr) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 - %2:_(s128) = G_LOAD %0(p0) :: (load 16 from %ir.v1ptr) - %3:_(s128) = G_LOAD %1(p0) :: (load 16 from %ir.v2ptr) + %2:_(s128) = G_LOAD %0(p0) :: (load (s128) from %ir.v1ptr) + %3:_(s128) = G_LOAD %1(p0) :: (load (s128) from %ir.v2ptr) %4:_(s128) = G_SDIV %2, %3 - G_STORE %4(s128), %0(p0) :: (store 16 into %ir.v1ptr) + G_STORE %4(s128), %0(p0) :: (store (s128) into %ir.v1ptr) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir index cb333bbfbb03b..2dbbcdb76da5c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir @@ -14,12 +14,12 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64) - ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store 16) + ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store (s128)) ; CHECK: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s128) = G_SEXT %0(s64) - G_STORE %2(s128), %1(p0) :: (store 16) + G_STORE %2(s128), %1(p0) :: (store (s128)) RET_ReallyLR ... @@ -36,12 +36,12 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64) - ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store 16) + ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store (s128)) ; CHECK: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s128) = G_ZEXT %0(s64) - G_STORE %2(s128), %1(p0) :: (store 16) + G_STORE %2(s128), %1(p0) :: (store (s128)) RET_ReallyLR ... @@ -60,12 +60,12 @@ body: | ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) - ; CHECK: G_STORE [[MV1]](s128), [[COPY1]](p0) :: (store 16) + ; CHECK: G_STORE [[MV1]](s128), [[COPY1]](p0) :: (store (s128)) ; CHECK: RET_ReallyLR %0:_(s32) = COPY $w0 %1:_(p0) = COPY $x1 %2:_(s128) = G_ZEXT %0(s32) - G_STORE %2(s128), %1(p0) :: (store 16) + G_STORE %2(s128), %1(p0) :: (store (s128)) RET_ReallyLR ... @@ -81,17 +81,17 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store 8) - ; CHECK: G_STORE [[C]](s64), [[COPY1]](p0) :: (store 8) - ; CHECK: G_STORE [[C]](s64), [[COPY1]](p0) :: (store 8) + ; CHECK: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64)) + ; CHECK: G_STORE [[C]](s64), [[COPY1]](p0) :: (store (s64)) + ; CHECK: G_STORE [[C]](s64), [[COPY1]](p0) :: (store (s64)) ; CHECK: RET_ReallyLR %0:_(s64) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s192) = G_ZEXT %0(s64) %3:_(s64), %4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %2(s192) - G_STORE %3, %1(p0) :: (store 8) - G_STORE %4, %1(p0) :: (store 8) - G_STORE %5, %1(p0) :: (store 8) + G_STORE %3, %1(p0) :: (store (s64)) + G_STORE %4, %1(p0) :: (store (s64)) + G_STORE %5, %1(p0) :: (store (s64)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sextload.mir index 7f568f8d6175d..4d36192adf9d9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sextload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sextload.mir @@ -7,9 +7,9 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_sextload ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8)) $w0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index 8b87a903af27a..c07bbf275369f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -153,10 +153,10 @@ body: | ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY1]](<2 x s64>), [[COPY2]], shufflemask(1, 2) ; CHECK: [[SHUF1:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY3]](<2 x s64>), [[COPY]], shufflemask(1, 2) - ; CHECK: G_STORE [[SHUF]](<2 x s64>), [[COPY4]](p0) :: (store 16, align 32) + ; CHECK: G_STORE [[SHUF]](<2 x s64>), [[COPY4]](p0) :: (store (s128), align 32) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s64) - ; CHECK: G_STORE [[SHUF1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[SHUF1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %3:_(<2 x s64>) = COPY $q0 %4:_(<2 x s64>) = COPY $q1 @@ -166,7 +166,7 @@ body: | %1:_(<4 x s64>) = G_CONCAT_VECTORS %5(<2 x s64>), %6(<2 x s64>) %2:_(p0) = COPY $x0 %7:_(<4 x s64>) = G_SHUFFLE_VECTOR %0(<4 x s64>), %1, shufflemask(3, 4, 7, 0) - G_STORE %7(<4 x s64>), %2(p0) :: (store 32) + G_STORE %7(<4 x s64>), %2(p0) :: (store (<4 x s64>)) RET_ReallyLR ... @@ -195,10 +195,10 @@ body: | ; CHECK: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY3]](<4 x s32>), [[C3]](s64) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32) ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY1]](<4 x s32>), [[COPY]], shufflemask(2, 6, 5, 3) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](p0) :: (store 16, align 32) + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](p0) :: (store (s128), align 32) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s64) - ; CHECK: G_STORE [[SHUF]](<4 x s32>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16) + ; CHECK: G_STORE [[SHUF]](<4 x s32>), [[PTR_ADD]](p0) :: (store (s128) into unknown-address + 16) ; CHECK: RET_ReallyLR %3:_(<4 x s32>) = COPY $q0 %4:_(<4 x s32>) = COPY $q1 @@ -208,7 +208,7 @@ body: | %1:_(<8 x s32>) = G_CONCAT_VECTORS %5(<4 x s32>), %6(<4 x s32>) %2:_(p0) = COPY $x0 %7:_(<8 x s32>) = G_SHUFFLE_VECTOR %0(<8 x s32>), %1, shufflemask(0, 5, 10, 15, 6, 2, 1, 7) - G_STORE %7(<8 x s32>), %2(p0) :: (store 32) + G_STORE %7(<8 x s32>), %2(p0) :: (store (<8 x s32>)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir index 12be076e14cb1..32abeedf2057f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir @@ -33,20 +33,20 @@ body: | bb.0: ; CHECK-LABEL: name: test_vaarg ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) :: (load 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) :: (load (p0)) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD]], [[C]](s64), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) - ; CHECK: G_STORE [[PTR_ADD]](p0), [[COPY]](p0), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) :: (store 8) - ; CHECK: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0), debug-location !DILocation(line: 5, column: 1, scope: {{.*}}) :: (load 8) + ; CHECK: G_STORE [[PTR_ADD]](p0), [[COPY]](p0), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) :: (store (p0)) + ; CHECK: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0), debug-location !DILocation(line: 5, column: 1, scope: {{.*}}) :: (load (p0)) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD1]], [[C]](s64), debug-location !DILocation(line: 5, column: 1, scope: {{.*}}) - ; CHECK: G_STORE [[PTR_ADD1]](p0), [[COPY]](p0), debug-location !DILocation(line: 5, column: 1, scope: {{.*}}) :: (store 8) - ; CHECK: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) :: (load 8) + ; CHECK: G_STORE [[PTR_ADD1]](p0), [[COPY]](p0), debug-location !DILocation(line: 5, column: 1, scope: {{.*}}) :: (store (p0)) + ; CHECK: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) :: (load (p0)) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD2]], [[C1]](s64), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 ; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[PTR_ADD2]], [[C2]](s64), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTRMASK]], [[C]](s64), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) - ; CHECK: G_STORE [[PTR_ADD3]](p0), [[COPY]](p0), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) :: (store 8) + ; CHECK: G_STORE [[PTR_ADD3]](p0), [[COPY]](p0), debug-location !DILocation(line: 4, column: 3, scope: {{.*}}) :: (store (p0)) %0:_(p0) = COPY $x0 %1:_(s8) = G_VAARG %0(p0), 1, debug-location !11 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-zextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-zextload.mir index ad3603d1d136e..03112e4a3d72e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-zextload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-zextload.mir @@ -7,9 +7,9 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_zextload ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[ZEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8)) $w0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir index 285f45896af07..2a08de43cd2e1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner.mir @@ -100,9 +100,9 @@ body: | ; CHECK-LABEL: name: unmerge_merge_combine ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128)) ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128) ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[LOAD]], [[UV]] ; CHECK: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[C]], [[UV]] @@ -114,8 +114,8 @@ body: | ; CHECK: $q0 = COPY [[MV]](s128) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 - %1:_(s128) = G_ZEXTLOAD %0:_(p0) :: (load 8) - %2:_(s128) = G_LOAD %0:_(p0) :: (load 16) + %1:_(s128) = G_ZEXTLOAD %0:_(p0) :: (load (s64)) + %2:_(s128) = G_LOAD %0:_(p0) :: (load (s128)) %3:_(s128) = G_MUL %1:_, %2:_ $q0 = COPY %3 RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir index 91b681b9b9bb7..03002a33650c2 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir @@ -42,13 +42,13 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[COPY1]], 0, 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[COPY1]], 0, 0 :: (load (s64) from %ir.addr) ; CHECK: $x0 = COPY [[LDRXroX]] ; CHECK: RET_ReallyLR implicit $x0 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 - %4:gpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr) + %4:gpr(s64) = G_LOAD %2(p0) :: (load (s64) from %ir.addr) $x0 = COPY %4(s64) RET_ReallyLR implicit $x0 ... @@ -67,13 +67,13 @@ body: | ; CHECK: liveins: $d0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY]], [[COPY1]], 0, 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY]], [[COPY1]], 0, 0 :: (load (s64) from %ir.addr) ; CHECK: $d0 = COPY [[LDRDroX]] ; CHECK: RET_ReallyLR implicit $d0 %0:gpr(p0) = COPY $d0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 - %4:fpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr) + %4:fpr(s64) = G_LOAD %2(p0) :: (load (s64) from %ir.addr) $d0 = COPY %4(s64) RET_ReallyLR implicit $d0 ... @@ -94,7 +94,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]] - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr) ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]] ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]] ; CHECK: $x0 = COPY [[ADDXrr1]] @@ -102,7 +102,7 @@ body: | %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 - %4:gpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr) + %4:gpr(s64) = G_LOAD %2(p0) :: (load (s64) from %ir.addr) %5:gpr(s64) = G_PTRTOINT %2 %6:gpr(s64) = G_ADD %5, %4 $x0 = COPY %6(s64) @@ -123,7 +123,7 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) ; CHECK: $x2 = COPY [[LDRXroX]] ; CHECK: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 @@ -131,7 +131,7 @@ body: | %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) $x2 = COPY %5(s64) RET_ReallyLR implicit $x2 @@ -150,7 +150,7 @@ body: | ; CHECK: liveins: $x0, $x1, $d2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) ; CHECK: $d2 = COPY [[LDRDroX]] ; CHECK: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 @@ -158,7 +158,7 @@ body: | %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:fpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) $d2 = COPY %5(s64) RET_ReallyLR implicit $d2 @@ -177,7 +177,7 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) ; CHECK: $x2 = COPY [[LDRXroX]] ; CHECK: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 @@ -185,7 +185,7 @@ body: | %2:gpr(s64) = G_MUL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) $x2 = COPY %5(s64) RET_ReallyLR implicit $x2 @@ -204,7 +204,7 @@ body: | ; CHECK: liveins: $x0, $x1, $d2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) ; CHECK: $d2 = COPY [[LDRDroX]] ; CHECK: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 @@ -212,7 +212,7 @@ body: | %2:gpr(s64) = G_MUL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:fpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) $d2 = COPY %5(s64) RET_ReallyLR implicit $d2 @@ -231,7 +231,7 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) ; CHECK: $x2 = COPY [[LDRXroX]] ; CHECK: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 @@ -239,7 +239,7 @@ body: | %2:gpr(s64) = G_MUL %1, %0(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) $x2 = COPY %5(s64) RET_ReallyLR implicit $x2 @@ -258,7 +258,7 @@ body: | ; CHECK: liveins: $x0, $x1, $d2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) ; CHECK: $d2 = COPY [[LDRDroX]] ; CHECK: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 @@ -266,7 +266,7 @@ body: | %2:gpr(s64) = G_MUL %1, %0(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:fpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) $d2 = COPY %5(s64) RET_ReallyLR implicit $d2 @@ -290,7 +290,7 @@ body: | ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[SUBREG_TO_REG]], [[COPY]], $xzr ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load (s64) from %ir.addr) ; CHECK: $d2 = COPY [[LDRDroX]] ; CHECK: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 @@ -298,7 +298,7 @@ body: | %2:gpr(s64) = G_MUL %1, %0(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:fpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) $d2 = COPY %5(s64) RET_ReallyLR implicit $d2 @@ -322,7 +322,7 @@ body: | ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[SUBREG_TO_REG]], [[COPY]], $xzr ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load (s64) from %ir.addr) ; CHECK: $d2 = COPY [[LDRDroX]] ; CHECK: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 @@ -330,7 +330,7 @@ body: | %2:gpr(s64) = G_MUL %1, %0(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:fpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) $d2 = COPY %5(s64) RET_ReallyLR implicit $d2 @@ -352,7 +352,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[UBFMXri]], 0, 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[UBFMXri]], 0, 0 :: (load (s64) from %ir.addr) ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]] ; CHECK: $x2 = COPY [[ADDXrr]] @@ -362,7 +362,7 @@ body: | %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) %6:gpr(s64) = G_ADD %2, %1 %7:gpr(s64) = G_ADD %5, %6 $x2 = COPY %7(s64) @@ -387,7 +387,7 @@ body: | ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]] - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr) ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]] ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]] @@ -399,7 +399,7 @@ body: | %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) %6:gpr(s64) = G_ADD %2, %1 %7:gpr(s64) = G_ADD %5, %6 %8:gpr(s64) = G_PTRTOINT %4 @@ -424,8 +424,8 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) - ; CHECK: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]] ; CHECK: $x2 = COPY [[ADDXrr]] ; CHECK: RET_ReallyLR implicit $x2 @@ -434,8 +434,8 @@ body: | %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) - %6:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) + %6:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) %7:gpr(s64) = G_ADD %5, %6 $x2 = COPY %7(s64) RET_ReallyLR implicit $x2 @@ -458,8 +458,8 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3 - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load 8 from %ir.addr) - ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) + ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]] ; CHECK: $x2 = COPY [[ADDXrr]] ; CHECK: RET_ReallyLR implicit $x2 @@ -468,8 +468,8 @@ body: | %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) - %6:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) + %6:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) %7:gpr(s64) = G_ADD %5, %6 $x2 = COPY %7(s64) RET_ReallyLR implicit $x2 @@ -494,7 +494,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]] ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64 = ADDXrs [[COPY2]], [[COPY]], 3 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]] ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrs]], [[ADDXrr]] @@ -505,7 +505,7 @@ body: | %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %4:gpr(p0) = G_PTR_ADD %3, %2 - %5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr) + %5:gpr(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.addr) %6:gpr(s64) = G_ADD %2, %1 %7:gpr(s64) = G_ADD %5, %6 %8:gpr(s64) = G_PTRTOINT %4 @@ -527,13 +527,13 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], [[COPY1]], 0, 0 :: (load 4 from %ir.addr) + ; CHECK: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], [[COPY1]], 0, 0 :: (load (s32) from %ir.addr) ; CHECK: $w2 = COPY [[LDRWroX]] ; CHECK: RET_ReallyLR implicit $w2 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 - %4:gpr(s32) = G_LOAD %2(p0) :: (load 4 from %ir.addr) + %4:gpr(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.addr) $w2 = COPY %4(s32) RET_ReallyLR implicit $w2 ... @@ -551,13 +551,13 @@ body: | ; CHECK: liveins: $d0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 0 :: (load 4 from %ir.addr) + ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 0 :: (load (s32) from %ir.addr) ; CHECK: $s2 = COPY [[LDRSroX]] ; CHECK: RET_ReallyLR implicit $h2 %0:gpr(p0) = COPY $d0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 - %4:fpr(s32) = G_LOAD %2(p0) :: (load 4 from %ir.addr) + %4:fpr(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.addr) $s2 = COPY %4(s32) RET_ReallyLR implicit $h2 ... @@ -575,13 +575,13 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRHroX:%[0-9]+]]:fpr16 = LDRHroX [[COPY]], [[COPY1]], 0, 0 :: (load 2 from %ir.addr) + ; CHECK: [[LDRHroX:%[0-9]+]]:fpr16 = LDRHroX [[COPY]], [[COPY1]], 0, 0 :: (load (s16) from %ir.addr) ; CHECK: $h2 = COPY [[LDRHroX]] ; CHECK: RET_ReallyLR implicit $h2 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 - %4:fpr(s16) = G_LOAD %2(p0) :: (load 2 from %ir.addr) + %4:fpr(s16) = G_LOAD %2(p0) :: (load (s16) from %ir.addr) $h2 = COPY %4(s16) RET_ReallyLR implicit $h2 ... @@ -599,13 +599,13 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRBBroX:%[0-9]+]]:gpr32 = LDRBBroX [[COPY]], [[COPY1]], 0, 0 :: (load 1 from %ir.addr) + ; CHECK: [[LDRBBroX:%[0-9]+]]:gpr32 = LDRBBroX [[COPY]], [[COPY1]], 0, 0 :: (load (s8) from %ir.addr) ; CHECK: $w2 = COPY [[LDRBBroX]] ; CHECK: RET_ReallyLR implicit $w2 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 - %4:gpr(s32) = G_LOAD %2(p0) :: (load 1 from %ir.addr) + %4:gpr(s32) = G_LOAD %2(p0) :: (load (s8) from %ir.addr) $w2 = COPY %4(s32) RET_ReallyLR implicit $w2 ... @@ -623,12 +623,12 @@ body: | ; CHECK: liveins: $d0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY]], [[COPY1]], 0, 0 :: (load 16 from %ir.addr) + ; CHECK: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY]], [[COPY1]], 0, 0 :: (load (<2 x s64>) from %ir.addr) ; CHECK: $q0 = COPY [[LDRQroX]] ; CHECK: RET_ReallyLR implicit $q0 %0:gpr(p0) = COPY $d0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 - %4:fpr(<2 x s64>) = G_LOAD %2(p0) :: (load 16 from %ir.addr) + %4:fpr(<2 x s64>) = G_LOAD %2(p0) :: (load (<2 x s64>) from %ir.addr) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir index 8efd7648eed93..6a4b1248e7a4c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir @@ -22,7 +22,7 @@ body: | ; CHECK: liveins: $w1, $x0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load 4) + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load (s32)) ; CHECK: $w0 = COPY %load ; CHECK: RET_ReallyLR implicit $w0 %base:gpr(p0) = COPY $x0 @@ -31,7 +31,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 2 %offset:gpr(s64) = G_SHL %ext, %c %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) $w0 = COPY %load(s32) RET_ReallyLR implicit $w0 ... @@ -56,7 +56,7 @@ body: | ; CHECK: liveins: $w1, $x0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4) + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load (s32)) ; CHECK: $w0 = COPY %load ; CHECK: RET_ReallyLR implicit $w0 %base:gpr(p0) = COPY $x0 @@ -65,7 +65,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 2 %offset:gpr(s64) = G_SHL %ext, %c %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) $w0 = COPY %load(s32) RET_ReallyLR implicit $w0 ... @@ -90,7 +90,7 @@ body: | ; CHECK: liveins: $w1, $x0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4) + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load (s32)) ; CHECK: $w0 = COPY %load ; CHECK: RET_ReallyLR implicit $w0 %base:gpr(p0) = COPY $x0 @@ -99,7 +99,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 2 %offset:gpr(s64) = G_SHL %ext, %c %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) $w0 = COPY %load(s32) RET_ReallyLR implicit $w0 ... @@ -122,7 +122,7 @@ body: | ; CHECK: liveins: $w1, $x0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load 4) + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load (s32)) ; CHECK: $w0 = COPY %load ; CHECK: RET_ReallyLR implicit $w0 %base:gpr(p0) = COPY $x0 @@ -131,7 +131,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 4 %offset:gpr(s64) = G_MUL %c, %ext %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) $w0 = COPY %load(s32) RET_ReallyLR implicit $w0 ... @@ -154,7 +154,7 @@ body: | ; CHECK: liveins: $w1, $x0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4) + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load (s32)) ; CHECK: $w0 = COPY %load ; CHECK: RET_ReallyLR implicit $w0 %base:gpr(p0) = COPY $x0 @@ -163,7 +163,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 4 %offset:gpr(s64) = G_MUL %c, %ext %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) $w0 = COPY %load(s32) RET_ReallyLR implicit $w0 ... @@ -186,7 +186,7 @@ body: | ; CHECK: liveins: $w1, $x0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4) + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load (s32)) ; CHECK: $w0 = COPY %load ; CHECK: RET_ReallyLR implicit $w0 %base:gpr(p0) = COPY $x0 @@ -195,7 +195,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 4 %offset:gpr(s64) = G_MUL %c, %ext %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) $w0 = COPY %load(s32) RET_ReallyLR implicit $w0 ... @@ -218,7 +218,7 @@ body: | ; CHECK: liveins: $w1, $x0, $d0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:fpr64 = LDRDroW %base, %foo, 1, 1 :: (load 8) + ; CHECK: %load:fpr64 = LDRDroW %base, %foo, 1, 1 :: (load (<2 x s32>)) ; CHECK: $x0 = COPY %load ; CHECK: RET_ReallyLR implicit $x0 %base:gpr(p0) = COPY $x0 @@ -227,7 +227,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 8 %offset:gpr(s64) = G_MUL %c, %ext %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8) + %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load (<2 x s32>)) $x0 = COPY %load(<2 x s32>) RET_ReallyLR implicit $x0 ... @@ -250,7 +250,7 @@ body: | ; CHECK: liveins: $w1, $x0, $d0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:gpr64 = LDRXroW %base, %foo, 1, 1 :: (load 8) + ; CHECK: %load:gpr64 = LDRXroW %base, %foo, 1, 1 :: (load (s64)) ; CHECK: $x0 = COPY %load ; CHECK: RET_ReallyLR implicit $x0 %base:gpr(p0) = COPY $x0 @@ -259,7 +259,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 8 %offset:gpr(s64) = G_MUL %c, %ext %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8) + %load:gpr(s64) = G_LOAD %ptr(p0) :: (load (s64)) $x0 = COPY %load(s64) RET_ReallyLR implicit $x0 ... @@ -283,14 +283,14 @@ body: | ; CHECK: liveins: $x0, $w0, $w1 ; CHECK: %val:gpr32 = COPY $w1 ; CHECK: %base:gpr64sp = COPY $x0 - ; CHECK: %load:gpr32 = LDRBBroW %base, %val, 1, 0 :: (load 1) + ; CHECK: %load:gpr32 = LDRBBroW %base, %val, 1, 0 :: (load (s8)) ; CHECK: $w0 = COPY %load ; CHECK: RET_ReallyLR implicit $w0 %val:gpr(s32) = COPY $w1 %base:gpr(p0) = COPY $x0 %ext:gpr(s64) = G_SEXT %val(s32) %ptr:gpr(p0) = G_PTR_ADD %base, %ext(s64) - %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 1) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load (s8)) $w0 = COPY %load(s32) RET_ReallyLR implicit $w0 ... @@ -313,7 +313,7 @@ body: | ; CHECK: liveins: $w1, $x0 ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 - ; CHECK: %load:fpr16 = LDRHroW %base, %foo, 1, 1 :: (load 2) + ; CHECK: %load:fpr16 = LDRHroW %base, %foo, 1, 1 :: (load (s16)) ; CHECK: $h0 = COPY %load ; CHECK: RET_ReallyLR implicit $h0 %base:gpr(p0) = COPY $x0 @@ -322,7 +322,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 2 %offset:gpr(s64) = G_MUL %c, %ext %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) - %load:fpr(s16) = G_LOAD %ptr(p0) :: (load 2) + %load:fpr(s16) = G_LOAD %ptr(p0) :: (load (s16)) $h0 = COPY %load(s16) RET_ReallyLR implicit $h0 ... @@ -346,7 +346,7 @@ body: | ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %imp:gpr64 = IMPLICIT_DEF ; CHECK: %and:gpr64common = ANDXri %imp, 4103 - ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load 8) + ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load (s64)) ; CHECK: $x1 = COPY %load ; CHECK: RET_ReallyLR implicit $x1 %base:gpr(p0) = COPY $x0 @@ -356,7 +356,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 8 %mul:gpr(s64) = G_MUL %c, %and %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64) - %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8) + %load:gpr(s64) = G_LOAD %ptr(p0) :: (load (s64)) $x1 = COPY %load(s64) RET_ReallyLR implicit $x1 ... @@ -380,7 +380,7 @@ body: | ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %imp:gpr64 = IMPLICIT_DEF ; CHECK: %and:gpr64common = ANDXri %imp, 4111 - ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load 8) + ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load (s64)) ; CHECK: $x1 = COPY %load ; CHECK: RET_ReallyLR implicit $x1 %base:gpr(p0) = COPY $x0 @@ -390,7 +390,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 8 %mul:gpr(s64) = G_MUL %c, %and %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64) - %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8) + %load:gpr(s64) = G_LOAD %ptr(p0) :: (load (s64)) $x1 = COPY %load(s64) RET_ReallyLR implicit $x1 ... @@ -414,7 +414,7 @@ body: | ; CHECK: %imp:gpr64 = IMPLICIT_DEF ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %imp.sub_32 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY1]], 0, 1 :: (load 8) + ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY1]], 0, 1 :: (load (s64)) ; CHECK: $x1 = COPY %load ; CHECK: RET_ReallyLR implicit $x1 %base:gpr(p0) = COPY $x0 @@ -424,7 +424,7 @@ body: | %c:gpr(s64) = G_CONSTANT i64 8 %mul:gpr(s64) = G_MUL %c, %and %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64) - %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8) + %load:gpr(s64) = G_LOAD %ptr(p0) :: (load (s64)) $x1 = COPY %load(s64) RET_ReallyLR implicit $x1 ... @@ -448,7 +448,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 ; CHECK: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[COPY]], 7 - ; CHECK: [[LDRWroW:%[0-9]+]]:gpr32 = LDRWroW [[COPY1]], [[ANDWri]], 0, 1 :: (load 4) + ; CHECK: [[LDRWroW:%[0-9]+]]:gpr32 = LDRWroW [[COPY1]], [[ANDWri]], 0, 1 :: (load (s32)) ; CHECK: $w0 = COPY [[LDRWroW]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(s32) = COPY $w0 @@ -459,7 +459,7 @@ body: | %12:gpr(s32) = G_SHL %3, %13(s64) %6:gpr(s64) = G_ZEXT %12(s32) %7:gpr(p0) = G_PTR_ADD %1, %6(s64) - %9:gpr(s32) = G_LOAD %7(p0) :: (load 4) + %9:gpr(s32) = G_LOAD %7(p0) :: (load (s32)) $w0 = COPY %9(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll index 0210a9a89ca4e..2daaf89d1225d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -24,21 +24,22 @@ define i32 @foo() { ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @var1) + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @var1) ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.3 + ; CHECK: G_BR %bb.2 ; CHECK: bb.2.if.then: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 - ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2) + ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2) ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 - ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1) + ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store (s32) into @var1) ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 - ; CHECK: G_STORE [[C4]](s32), [[GV4]](p0) :: (store 4 into @var3) - ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1) + ; CHECK: G_STORE [[C4]](s32), [[GV4]](p0) :: (store (s32) into @var3) + ; CHECK: G_STORE [[C5]](s32), [[GV]](p0) :: (store (s32) into @var1) ; CHECK: bb.3.if.end: ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; CHECK: $w0 = COPY [[C6]](s32) @@ -74,16 +75,17 @@ define i32 @darwin_tls() { ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 - ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load 4 from @var1) + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1) ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C1]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.3 + ; CHECK: G_BR %bb.2 ; CHECK: bb.2.if.then: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[LOAD1:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @tls_gv) + ; CHECK: [[LOAD1:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @tls_gv) ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 - ; CHECK: G_STORE [[LOAD1]](s32), [[GV3]](p0) :: (store 4 into @var2) + ; CHECK: G_STORE [[LOAD1]](s32), [[GV3]](p0) :: (store (s32) into @var2) ; CHECK: bb.3.if.end: ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; CHECK: $w0 = COPY [[C2]](s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir index 412ece2ca4f81..90580c847f290 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -358,7 +358,7 @@ body: | ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 - ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV2]](p0) :: (load 4 from @var1) + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV2]](p0) :: (load (s32) from @var1) ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) @@ -368,13 +368,13 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 - ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2) + ; CHECK: G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2) ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 ; CHECK: [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 - ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1) + ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1) ; CHECK: [[GV5:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 - ; CHECK: G_STORE [[C4]](s32), [[GV5]](p0) :: (store 4 into @var3) - ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1) + ; CHECK: G_STORE [[C4]](s32), [[GV5]](p0) :: (store (s32) into @var3) + ; CHECK: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1) ; CHECK: bb.2.if.end: ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; CHECK: $w0 = COPY [[C6]](s32) @@ -390,17 +390,17 @@ body: | %6:gpr(s32) = G_CONSTANT i32 3 %7:gpr(p0) = G_GLOBAL_VALUE @var3 %8:gpr(s32) = G_CONSTANT i32 0 - %0:gpr(s32) = G_LOAD %1(p0) :: (load 4 from @var1) + %0:gpr(s32) = G_LOAD %1(p0) :: (load (s32) from @var1) %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2 %3:gpr(s1) = G_TRUNC %9(s32) G_BRCOND %3(s1), %bb.2 G_BR %bb.3 bb.2.if.then: - G_STORE %4(s32), %5(p0) :: (store 4 into @var2) - G_STORE %6(s32), %1(p0) :: (store 4 into @var1) - G_STORE %4(s32), %7(p0) :: (store 4 into @var3) - G_STORE %6(s32), %1(p0) :: (store 4 into @var1) + G_STORE %4(s32), %5(p0) :: (store (s32) into @var2) + G_STORE %6(s32), %1(p0) :: (store (s32) into @var1) + G_STORE %4(s32), %7(p0) :: (store (s32) into @var3) + G_STORE %6(s32), %1(p0) :: (store (s32) into @var1) bb.3.if.end: $w0 = COPY %8(s32) @@ -425,7 +425,7 @@ body: | ; CHECK: [[ADRP2:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var3 ; CHECK: %addlow3:gpr(p0) = G_ADD_LOW [[ADRP2]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var3 ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[ADRP]](p0) :: (load 4 from @var1) + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[ADRP]](p0) :: (load (s32) from @var1) ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) @@ -436,15 +436,15 @@ body: | ; CHECK: [[ADRP3:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var2 ; CHECK: [[ADD_LOW:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP3]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var2 ; CHECK: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 - ; CHECK: G_STORE [[C4]](s32), [[ADD_LOW]](p0) :: (store 4 into @var2) + ; CHECK: G_STORE [[C4]](s32), [[ADD_LOW]](p0) :: (store (s32) into @var2) ; CHECK: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 ; CHECK: [[ADRP4:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var1 ; CHECK: [[ADD_LOW1:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP4]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var1 - ; CHECK: G_STORE [[C5]](s32), [[ADD_LOW1]](p0) :: (store 4 into @var1) + ; CHECK: G_STORE [[C5]](s32), [[ADD_LOW1]](p0) :: (store (s32) into @var1) ; CHECK: [[ADRP5:%[0-9]+]]:gpr64(p0) = ADRP target-flags(aarch64-page) @var3 ; CHECK: [[ADD_LOW2:%[0-9]+]]:gpr(p0) = G_ADD_LOW [[ADRP5]](p0), target-flags(aarch64-pageoff, aarch64-nc) @var3 - ; CHECK: G_STORE [[C4]](s32), [[ADD_LOW2]](p0) :: (store 4 into @var3) - ; CHECK: G_STORE [[C5]](s32), [[ADD_LOW1]](p0) :: (store 4 into @var1) + ; CHECK: G_STORE [[C4]](s32), [[ADD_LOW2]](p0) :: (store (s32) into @var3) + ; CHECK: G_STORE [[C5]](s32), [[ADD_LOW1]](p0) :: (store (s32) into @var1) ; CHECK: bb.2.if.end: ; CHECK: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; CHECK: $w0 = COPY [[C6]](s32) @@ -462,17 +462,17 @@ body: | %7:gpr64(p0) = ADRP target-flags(aarch64-page) @var3 %addlow3:gpr(p0) = G_ADD_LOW %7(p0), target-flags(aarch64-pageoff, aarch64-nc) @var3 %8:gpr(s32) = G_CONSTANT i32 0 - %0:gpr(s32) = G_LOAD %1(p0) :: (load 4 from @var1) + %0:gpr(s32) = G_LOAD %1(p0) :: (load (s32) from @var1) %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2 %3:gpr(s1) = G_TRUNC %9(s32) G_BRCOND %3(s1), %bb.2 G_BR %bb.3 bb.2.if.then: - G_STORE %4(s32), %addlow2(p0) :: (store 4 into @var2) - G_STORE %6(s32), %addlow1(p0) :: (store 4 into @var1) - G_STORE %4(s32), %addlow3(p0) :: (store 4 into @var3) - G_STORE %6(s32), %addlow1(p0) :: (store 4 into @var1) + G_STORE %4(s32), %addlow2(p0) :: (store (s32) into @var2) + G_STORE %6(s32), %addlow1(p0) :: (store (s32) into @var1) + G_STORE %4(s32), %addlow3(p0) :: (store (s32) into @var3) + G_STORE %6(s32), %addlow1(p0) :: (store (s32) into @var1) bb.3.if.end: $w0 = COPY %8(s32) @@ -503,7 +503,7 @@ body: | ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: G_STORE [[ADD]](s32), [[COPY1]](p0) :: (store 4) + ; CHECK: G_STORE [[ADD]](s32), [[COPY1]](p0) :: (store (s32) ; CHECK: [[C3:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128 ; CHECK: [[INTTOPTR2:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C3]](s64) ; CHECK: $x0 = COPY [[INTTOPTR2]](p0) @@ -530,7 +530,7 @@ body: | bb.2: %8:gpr(s32) = G_ADD %0, %0 - G_STORE %8(s32), %1(p0) :: (store 4) + G_STORE %8(s32), %1(p0) :: (store (s32)) $x0 = COPY %3(p0) RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/non-pow-2-extload-combine.mir b/llvm/test/CodeGen/AArch64/GlobalISel/non-pow-2-extload-combine.mir index 12d346a7d0652..7d2d3204c2c09 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/non-pow-2-extload-combine.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/non-pow-2-extload-combine.mir @@ -23,13 +23,13 @@ body: | ; CHECK-LABEL: name: ld_zext_i24 ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[COPY]](p0) :: (load 3 from %ir.ptr, align 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[COPY]](p0) :: (load (s24) from %ir.ptr, align 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) ; CHECK: $w0 = COPY [[ZEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 - %2:_(s24) = G_LOAD %0(p0) :: (load 3 from %ir.ptr, align 1) + %2:_(s24) = G_LOAD %0(p0) :: (load (s24) from %ir.ptr, align 1) %3:_(s32) = G_ZEXT %2(s24) $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir index ab8f3f11e0e60..cedd6f0fac69b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir @@ -105,7 +105,7 @@ body: | ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] - ; CHECK: G_STORE [[AND]](s32), [[COPY1]](p0) :: (store 4) + ; CHECK: G_STORE [[AND]](s32), [[COPY1]](p0) :: (store (s32)) ; CHECK: $w0 = COPY [[AND1]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 @@ -114,7 +114,7 @@ body: | %4:_(s32) = G_CONSTANT i32 -128 %3:_(s32) = G_AND %0, %2 %5:_(s32) = G_AND %3, %4 - G_STORE %3(s32), %1(p0) :: (store 4) + G_STORE %3(s32), %1(p0) :: (store (s32)) $w0 = COPY %5(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-redundant-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-redundant-sextinreg.mir index 11c6cedf8344b..8e9642ab3e3b2 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-redundant-sextinreg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-redundant-sextinreg.mir @@ -13,10 +13,10 @@ body: | ; CHECK-LABEL: name: sextload ; CHECK: liveins: $x0 ; CHECK: %x0:_(p0) = COPY $x0 - ; CHECK: %sextload:_(s32) = G_SEXTLOAD %x0(p0) :: (load 2) + ; CHECK: %sextload:_(s32) = G_SEXTLOAD %x0(p0) :: (load (s16)) ; CHECK: $w0 = COPY %sextload(s32) %x0:_(p0) = COPY $x0 - %sextload:_(s32) = G_SEXTLOAD %x0:_(p0) :: (load 2) + %sextload:_(s32) = G_SEXTLOAD %x0:_(p0) :: (load (s16)) %sext_inreg:_(s32) = G_SEXT_INREG %sextload:_(s32), 24 $w0 = COPY %sext_inreg(s32) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-store-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-store-undef.mir index 0c3a8deaf2507..27e9b5c1f4523 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-store-undef.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-store-undef.mir @@ -19,7 +19,7 @@ body: | ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(s32) = G_IMPLICIT_DEF - G_STORE %1(s32), %0(p0) :: (store 4) + G_STORE %1(s32), %0(p0) :: (store (s32)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir index 636e30613705d..d302e7f68c581 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir @@ -10,11 +10,11 @@ body: | ; CHECK-LABEL: name: truncstore_s8 ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: %val:_(s32) = COPY $w1 - ; CHECK: G_STORE %val(s32), %ptr(p0) :: (store 1) + ; CHECK: G_STORE %val(s32), %ptr(p0) :: (store (s8)) %ptr:_(p0) = COPY $x0 %val:_(s32) = COPY $w1 %trunc:_(s8) = G_TRUNC %val - G_STORE %trunc(s8), %ptr(p0) :: (store 1) + G_STORE %trunc(s8), %ptr(p0) :: (store (s8)) ... --- name: truncstore_vector @@ -26,9 +26,9 @@ body: | ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: %val:_(<4 x s32>) = COPY $q0 ; CHECK: %trunc:_(<4 x s8>) = G_TRUNC %val(<4 x s32>) - ; CHECK: G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4) + ; CHECK: G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store (<4 x s8>)) %ptr:_(p0) = COPY $x0 %val:_(<4 x s32>) = COPY $q0 %trunc:_(<4 x s8>) = G_TRUNC %val - G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4) + G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store (<4 x s8>)) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extending-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extending-loads.mir index 5ed7661f98f18..db576419a7647 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extending-loads.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extending-loads.mir @@ -22,10 +22,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_zeroext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.addr) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.addr) ; CHECK: $w0 = COPY [[ZEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %1:_(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) %2:_(s32) = G_ZEXT %1 $w0 = COPY %2 ... @@ -40,11 +40,11 @@ body: | ; because an anyexting load like s64 = G_LOAD %p (load 4) isn't legal. ; CHECK-LABEL: name: test_no_anyext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.addr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.addr) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CHECK: $x0 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $x0 - %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.addr) + %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.addr) %2:_(s64) = G_ANYEXT %1 $x0 = COPY %2 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir index cc4ebca0726b9..dded9241478a3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir @@ -39,12 +39,12 @@ body: | ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 60, 59 - ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[UBFMXri]], 0, 0 :: (load 4) + ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[UBFMXri]], 0, 0 :: (load (s32)) ; CHECK: [[COPY6:%[0-9]+]]:fpr32 = COPY [[DEF]] ; CHECK: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY6]], [[LDRSroX]] ; CHECK: [[COPY7:%[0-9]+]]:fpr32 = COPY [[DEF]] ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[FMULSrr]], [[COPY7]] - ; CHECK: STRSui [[FADDSrr]], [[COPY2]], 0 :: (store 4) + ; CHECK: STRSui [[FADDSrr]], [[COPY2]], 0 :: (store (s32)) ; CHECK: bb.2: ; CHECK: RET_ReallyLR bb.1: @@ -70,12 +70,12 @@ body: | bb.2: %12:gpr64 = UBFMXri %8, 60, 59 - %15:fpr32 = LDRSroX %0, %12, 0, 0 :: (load 4) + %15:fpr32 = LDRSroX %0, %12, 0, 0 :: (load (s32)) %31:fpr32 = COPY %3 %16:fpr32 = FMULSrr %31, %15 %32:fpr32 = COPY %3 %17:fpr32 = FADDSrr %16, %32 - STRSui %17, %2, 0 :: (store 4) + STRSui %17, %2, 0 :: (store (s32)) bb.3: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir index 352f6f1e5750d..1e77db17fc228 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir @@ -30,7 +30,7 @@ body: | ; CHECK: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[FREEZE]], [[C]] ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C1]](s64) ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SHL]], [[UDIV]] - ; CHECK: G_STORE [[ADD]](s64), [[COPY]](p0) :: (store 8) + ; CHECK: G_STORE [[ADD]](s64), [[COPY]](p0) :: (store (s64)) ; CHECK: bb.2: bb.1: liveins: $x0 @@ -58,7 +58,7 @@ body: | %16:_(s64) = G_UDIV %15, %4 %17:_(s64) = G_SHL %7, %12(s64) %18:_(s64) = G_ADD %17, %16 - G_STORE %18(s64), %0(p0) :: (store 8) + G_STORE %18(s64), %0(p0) :: (store (s64)) bb.3: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-icmp-to-true-false-known-bits.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-icmp-to-true-false-known-bits.mir index 06708ec6c76ff..ff543b92949c9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-icmp-to-true-false-known-bits.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-icmp-to-true-false-known-bits.mir @@ -93,7 +93,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 2 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(sge), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -115,7 +115,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 3 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(sgt), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -137,7 +137,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 1 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(sle), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -160,7 +160,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 -1 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(slt), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -182,7 +182,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 2 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(uge), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -204,7 +204,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 -1 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(ugt), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -226,7 +226,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 1 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(ule), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -248,7 +248,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 0 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(ule), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -270,7 +270,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 0 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(eq), %load_eq_1(s32), %cst %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -314,7 +314,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 -1 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(sge), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -336,7 +336,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 1 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(sgt), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -358,7 +358,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 3 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(sle), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -381,7 +381,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 2 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(slt), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -403,7 +403,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 0 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(uge), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -425,7 +425,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 1 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(ugt), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -447,7 +447,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 -1 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(ule), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -469,7 +469,7 @@ body: | ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 2 - %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !0) + %load_eq_1:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !0) %cmp:_(s1) = G_ICMP intpred(ule), %cst, %load_eq_1(s32) %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -487,14 +487,14 @@ body: | ; CHECK: liveins: $x0 ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: %cst:_(s32) = G_CONSTANT i32 1 - ; CHECK: %load_between_1_2:_(s32) = G_LOAD %ptr(p0) :: (load 4, + ; CHECK: %load_between_1_2:_(s32) = G_LOAD %ptr(p0) :: (load (s32), ; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %load_between_1_2(s32), %cst ; CHECK: %cmp_ext:_(s32) = G_ZEXT %cmp(s1) ; CHECK: $w0 = COPY %cmp_ext(s32) ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 1 - %load_between_1_2:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !1) + %load_between_1_2:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !1) %cmp:_(s1) = G_ICMP intpred(eq), %load_between_1_2(s32), %cst %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) @@ -512,14 +512,14 @@ body: | ; CHECK: liveins: $x0 ; CHECK: %ptr:_(p0) = COPY $x0 ; CHECK: %cst:_(s32) = G_CONSTANT i32 1 - ; CHECK: %load_between_1_2:_(s32) = G_LOAD %ptr(p0) :: (load 4, + ; CHECK: %load_between_1_2:_(s32) = G_LOAD %ptr(p0) :: (load (s32), ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %load_between_1_2(s32), %cst ; CHECK: %cmp_ext:_(s32) = G_ZEXT %cmp(s1) ; CHECK: $w0 = COPY %cmp_ext(s32) ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 1 - %load_between_1_2:_(s32) = G_LOAD %ptr(p0) :: (load 4, !range !1) + %load_between_1_2:_(s32) = G_LOAD %ptr(p0) :: (load (s32), !range !1) %cmp:_(s1) = G_ICMP intpred(ne), %load_between_1_2(s32), %cst %cmp_ext:_(s32) = G_ZEXT %cmp(s1) $w0 = COPY %cmp_ext(s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern-align.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern-align.mir index 993a6713aaaa5..bd5719ab3d28e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern-align.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern-align.mir @@ -15,7 +15,7 @@ body: | ; NOT_STRICT-LABEL: name: misaligned ; NOT_STRICT: liveins: $x0, $x1 ; NOT_STRICT: %ptr:_(p0) = COPY $x1 - ; NOT_STRICT: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 2) + ; NOT_STRICT: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; NOT_STRICT: $w1 = COPY %full_load(s32) ; NOT_STRICT: RET_ReallyLR implicit $w1 ; STRICT-LABEL: name: misaligned @@ -24,8 +24,8 @@ body: | ; STRICT: %cst_16:_(s32) = G_CONSTANT i32 16 ; STRICT: %ptr:_(p0) = COPY $x1 ; STRICT: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; STRICT: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; STRICT: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; STRICT: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; STRICT: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; STRICT: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; STRICT: %full_load:_(s32) = G_OR %low_half, %high_half ; STRICT: $w1 = COPY %full_load(s32) @@ -36,8 +36,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2, align 2) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2, align 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16), align 2) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), align 2) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -55,13 +55,13 @@ body: | ; NOT_STRICT-LABEL: name: aligned ; NOT_STRICT: liveins: $x0, $x1 ; NOT_STRICT: %ptr:_(p0) = COPY $x1 - ; NOT_STRICT: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4) + ; NOT_STRICT: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32)) ; NOT_STRICT: $w1 = COPY %full_load(s32) ; NOT_STRICT: RET_ReallyLR implicit $w1 ; STRICT-LABEL: name: aligned ; STRICT: liveins: $x0, $x1 ; STRICT: %ptr:_(p0) = COPY $x1 - ; STRICT: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4) + ; STRICT: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32)) ; STRICT: $w1 = COPY %full_load(s32) ; STRICT: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 @@ -70,8 +70,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2, align 4) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2, align 4) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16), align 4) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), align 4) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir index bb1f5a0d36ac1..4b02fe205e5a0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir @@ -28,13 +28,13 @@ body: | ; LITTLE-LABEL: name: s8_loads_to_s32_little_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 1) + ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: s8_loads_to_s32_little_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 1) + ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 @@ -51,11 +51,11 @@ body: | %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) + %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) @@ -94,14 +94,14 @@ body: | ; LITTLE-LABEL: name: s8_loads_to_s32_big_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 1) + ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: s8_loads_to_s32_big_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 1) + ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 @@ -117,14 +117,14 @@ body: | %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) + %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %byte0:_(s32) = nuw G_SHL %elt0, %cst_24(s32) %byte1:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_8(s32) - %byte3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + %byte3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 @@ -152,13 +152,13 @@ body: | ; LITTLE-LABEL: name: different_or_pattern ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 1) + ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: different_or_pattern ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 1) + ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 @@ -175,11 +175,11 @@ body: | %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) + %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) @@ -219,13 +219,13 @@ body: | ; LITTLE-LABEL: name: s16_loads_to_s32_little_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 2) + ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: s16_loads_to_s32_little_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 2) + ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 @@ -235,8 +235,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -260,14 +260,14 @@ body: | ; LITTLE-LABEL: name: s16_loads_to_s32_big_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 2) + ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: s16_loads_to_s32_big_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 2) + ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 @@ -276,9 +276,9 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt0, %cst_16(s32) - %low_half:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) @@ -301,13 +301,13 @@ body: | ; LITTLE-LABEL: name: s16_loads_to_s64_little_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load 8, align 2) + ; LITTLE: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) ; LITTLE: $x1 = COPY %full_load(s64) ; LITTLE: RET_ReallyLR implicit $x1 ; BIG-LABEL: name: s16_loads_to_s64_little_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load 8, align 2) + ; BIG: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) ; BIG: %full_load:_(s64) = G_BSWAP [[LOAD]] ; BIG: $x1 = COPY %full_load(s64) ; BIG: RET_ReallyLR implicit $x1 @@ -324,11 +324,11 @@ body: | %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) - %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load 2) + %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) - %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) - %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 2) - %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 2) + %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) + %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s16)) + %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) %byte4_byte5:_(s64) = nuw G_SHL %elt2, %cst_32(s64) @@ -358,14 +358,14 @@ body: | ; LITTLE-LABEL: name: s16_loads_to_s64_big_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load 8, align 2) + ; LITTLE: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) ; LITTLE: %full_load:_(s64) = G_BSWAP [[LOAD]] ; LITTLE: $x1 = COPY %full_load(s64) ; LITTLE: RET_ReallyLR implicit $x1 ; BIG-LABEL: name: s16_loads_to_s64_big_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load 8, align 2) + ; BIG: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) ; BIG: $x1 = COPY %full_load(s64) ; BIG: RET_ReallyLR implicit $x1 %cst_1:_(s64) = G_CONSTANT i64 1 @@ -381,14 +381,14 @@ body: | %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) - %elt0:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load 2) - %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) - %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 2) + %elt0:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) + %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s16)) %byte0_byte1:_(s64) = nuw G_SHL %elt0, %cst_48(s64) %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_32(s64) %byte4_byte5:_(s64) = nuw G_SHL %elt2, %cst_16(s64) - %byte6_byte7:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 2) + %byte6_byte7:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 %or2:_(s64) = G_OR %byte4_byte5, %byte6_byte7 @@ -417,7 +417,7 @@ body: | ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 ; LITTLE: %ptr:_(p0) = COPY $x0 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load 4, align 1) + ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_positive_little_endian_pat @@ -425,7 +425,7 @@ body: | ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 ; BIG: %ptr:_(p0) = COPY $x0 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load 4, align 1) + ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 @@ -444,11 +444,11 @@ body: | %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32) - %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) - %elt4:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load 1) + %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) + %elt4:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load (s8)) - %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) + %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt2, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt3, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt4, %cst_24(s32) @@ -479,7 +479,7 @@ body: | ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 ; LITTLE: %ptr:_(p0) = COPY $x0 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load 4, align 1) + ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 @@ -488,7 +488,7 @@ body: | ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 ; BIG: %ptr:_(p0) = COPY $x0 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load 4, align 1) + ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 @@ -506,11 +506,11 @@ body: | %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) - %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load 1) + %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt3, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt1, %cst_24(s32) @@ -541,7 +541,7 @@ body: | ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 ; LITTLE: %ptr:_(p0) = COPY $x0 ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) - ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load 4, align 1) + ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_negative_little_endian_pat @@ -549,7 +549,7 @@ body: | ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 ; BIG: %ptr:_(p0) = COPY $x0 ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) - ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load 4, align 1) + ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 @@ -566,11 +566,11 @@ body: | %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32) %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32) - %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load 1) - %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load 1) - %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) + %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8)) + %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8)) + %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) - %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load 1) + %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt_neg_2, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt_neg_1, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt_0, %cst_24(s32) @@ -601,7 +601,7 @@ body: | ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 ; LITTLE: %ptr:_(p0) = COPY $x0 ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) - ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load 4, align 1) + ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 @@ -610,7 +610,7 @@ body: | ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 ; BIG: %ptr:_(p0) = COPY $x0 ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) - ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load 4, align 1) + ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_neg_1:_(s32) = G_CONSTANT i32 -1 @@ -626,12 +626,12 @@ body: | %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32) %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32) - %elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load 1) - %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load 1) - %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load 1) - %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) + %elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8)) + %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8)) + %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8)) + %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) - %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) + %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt_neg_1, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt_neg_2, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt_neg_3, %cst_24(s32) @@ -659,8 +659,8 @@ body: | ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load 2) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) @@ -671,8 +671,8 @@ body: | ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load 2) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) @@ -683,8 +683,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -707,8 +707,8 @@ body: | ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; LITTLE: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; LITTLE: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) @@ -719,8 +719,8 @@ body: | ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; BIG: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; BIG: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; BIG: %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) @@ -731,8 +731,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -758,8 +758,8 @@ body: | ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) @@ -770,8 +770,8 @@ body: | ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) @@ -782,8 +782,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -809,8 +809,8 @@ body: | ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) @@ -821,8 +821,8 @@ body: | ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) @@ -833,8 +833,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -863,9 +863,9 @@ body: | ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) - ; LITTLE: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; LITTLE: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) - ; LITTLE: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 2) + ; LITTLE: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; LITTLE: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) + ; LITTLE: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) ; LITTLE: %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) ; LITTLE: %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) ; LITTLE: %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 @@ -881,9 +881,9 @@ body: | ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) - ; BIG: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; BIG: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) - ; BIG: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 2) + ; BIG: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; BIG: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) + ; BIG: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) ; BIG: %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) ; BIG: %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) ; BIG: %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 @@ -900,10 +900,10 @@ body: | %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) - %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load 2) + %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) - %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) - %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 2) + %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) + %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) @@ -930,8 +930,8 @@ body: | ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2, addrspace 1) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) @@ -942,8 +942,8 @@ body: | ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2, addrspace 1) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) @@ -954,8 +954,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2, addrspace 0) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2, addrspace 1) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16), addrspace 0) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -986,10 +986,10 @@ body: | ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) - ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load 1) - ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load 1) + ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) + ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) ; LITTLE: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; LITTLE: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) ; LITTLE: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) @@ -1009,10 +1009,10 @@ body: | ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) - ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - ; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load 1) - ; BIG: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load 1) + ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + ; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) + ; BIG: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) ; BIG: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; BIG: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) ; BIG: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) @@ -1033,11 +1033,11 @@ body: | %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) - %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) + %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load 1) - %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load 1) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) + %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) @@ -1073,10 +1073,10 @@ body: | ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; LITTLE: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; LITTLE: %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) ; LITTLE: %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32) @@ -1096,10 +1096,10 @@ body: | ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; BIG: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; BIG: %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) ; BIG: %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32) @@ -1120,11 +1120,11 @@ body: | %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) + %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) @@ -1163,10 +1163,10 @@ body: | ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - ; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) - ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + ; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) + ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; LITTLE: %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) ; LITTLE: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) ; LITTLE: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) @@ -1187,10 +1187,10 @@ body: | ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - ; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) - ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + ; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) + ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; BIG: %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) ; BIG: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) ; BIG: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) @@ -1213,10 +1213,10 @@ body: | %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) ; This load is index 0 - %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 1) - %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 1) - %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load 1) - %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load 1) + %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) + %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) + %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) + %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; ... But it ends up being shifted, so we shouldn't combine. %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) @@ -1247,8 +1247,8 @@ body: | ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: %extra_use:_(s32) = G_AND %full_load, %low_half @@ -1260,8 +1260,8 @@ body: | ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: %extra_use:_(s32) = G_AND %full_load, %low_half @@ -1273,8 +1273,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -1299,8 +1299,8 @@ body: | ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: %extra_use:_(s32) = G_AND %full_load, %high_half @@ -1312,8 +1312,8 @@ body: | ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: %extra_use:_(s32) = G_AND %full_load, %high_half @@ -1325,8 +1325,8 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -1349,11 +1349,11 @@ body: | ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %other_ptr:_(p0) = COPY $x1 ; LITTLE: %some_val:_(s32) = G_CONSTANT i32 12 - ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store 2) - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) @@ -1364,11 +1364,11 @@ body: | ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %other_ptr:_(p0) = COPY $x1 ; BIG: %some_val:_(s32) = G_CONSTANT i32 12 - ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store 2) - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) @@ -1378,14 +1378,14 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; Memory could be modified here, so don't combine! %other_ptr:_(p0) = COPY $x1 %some_val:_(s32) = G_CONSTANT i32 12 - G_STORE %some_val, %other_ptr :: (store 2) + G_STORE %some_val, %other_ptr :: (store (s16)) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -1408,16 +1408,16 @@ body: | ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: bb.1: ; LITTLE: successors: %bb.2(0x80000000) ; LITTLE: liveins: $x0, $x1 ; LITTLE: %other_ptr:_(p0) = COPY $x1 ; LITTLE: %some_val:_(s32) = G_CONSTANT i32 12 - ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store 2) + ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) ; LITTLE: bb.2: ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) @@ -1430,16 +1430,16 @@ body: | ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: bb.1: ; BIG: successors: %bb.2(0x80000000) ; BIG: liveins: $x0, $x1 ; BIG: %other_ptr:_(p0) = COPY $x1 ; BIG: %some_val:_(s32) = G_CONSTANT i32 12 - ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store 2) + ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) ; BIG: bb.2: ; BIG: liveins: $x0, $x1 - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) @@ -1455,7 +1455,7 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) bb.1: liveins: $x0, $x1 @@ -1463,11 +1463,11 @@ body: | ; Memory could be modified here, so don't combine! %other_ptr:_(p0) = COPY $x1 %some_val:_(s32) = G_CONSTANT i32 12 - G_STORE %some_val, %other_ptr :: (store 2) + G_STORE %some_val, %other_ptr :: (store (s16)) bb.2: liveins: $x0, $x1 - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -1489,10 +1489,10 @@ body: | ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: bb.1: ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) @@ -1505,10 +1505,10 @@ body: | ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: bb.1: ; BIG: liveins: $x0, $x1 - ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) @@ -1523,11 +1523,11 @@ body: | %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) bb.1: liveins: $x0, $x1 - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half @@ -1546,24 +1546,24 @@ body: | ; LITTLE-LABEL: name: load_first ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 2) + ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: load_first ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load 4, align 2) + ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load 2) + %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) - %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load 2) + %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir index 73fd9d17230ca..a70a5220b4e54 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir @@ -24,19 +24,19 @@ body: | ; DARWIN: liveins: $x0, $x1 ; DARWIN: %ptr:_(p0) = COPY $x0 ; DARWIN: %width:_(s64) = COPY $x1 - ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) ; DARWIN: RET_ReallyLR ; UNKNOWN-LABEL: name: bzero_unknown_width ; UNKNOWN: liveins: $x0, $x1 ; UNKNOWN: %ptr:_(p0) = COPY $x0 ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 ; UNKNOWN: %width:_(s64) = COPY $x1 - ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) ; UNKNOWN: RET_ReallyLR %ptr:_(p0) = COPY $x0 %zero:_(s8) = G_CONSTANT i8 0 %width:_(s64) = COPY $x1 - G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) RET_ReallyLR ... --- @@ -49,19 +49,19 @@ body: | ; DARWIN: liveins: $x0, $x1 ; DARWIN: %ptr:_(p0) = COPY $x0 ; DARWIN: %width:_(s64) = COPY $x1 - ; DARWIN: G_BZERO %ptr(p0), %width(s64), 1 :: (store 4) + ; DARWIN: G_BZERO %ptr(p0), %width(s64), 1 :: (store (s32)) ; DARWIN: RET_ReallyLR ; UNKNOWN-LABEL: name: bzero_tail_unknown_width ; UNKNOWN: liveins: $x0, $x1 ; UNKNOWN: %ptr:_(p0) = COPY $x0 ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 ; UNKNOWN: %width:_(s64) = COPY $x1 - ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store 4) + ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store (s32)) ; UNKNOWN: RET_ReallyLR %ptr:_(p0) = COPY $x0 %zero:_(s8) = G_CONSTANT i8 0 %width:_(s64) = COPY $x1 - G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store 4) + G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store (s32)) RET_ReallyLR ... --- @@ -76,19 +76,19 @@ body: | ; DARWIN: liveins: $x0, $x1 ; DARWIN: %ptr:_(p0) = COPY $x0 ; DARWIN: %width:_(s64) = G_CONSTANT i64 1024 - ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) ; DARWIN: RET_ReallyLR ; UNKNOWN-LABEL: name: bzero_constant_width ; UNKNOWN: liveins: $x0, $x1 ; UNKNOWN: %ptr:_(p0) = COPY $x0 ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 1024 - ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) ; UNKNOWN: RET_ReallyLR %ptr:_(p0) = COPY $x0 %zero:_(s8) = G_CONSTANT i8 0 %width:_(s64) = G_CONSTANT i64 1024 - G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) RET_ReallyLR ... --- @@ -103,19 +103,19 @@ body: | ; DARWIN: liveins: $x0, $x1 ; DARWIN: %ptr:_(p0) = COPY $x0 ; DARWIN: %width:_(s64) = G_CONSTANT i64 256 - ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4) + ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store (s32)) ; DARWIN: RET_ReallyLR ; UNKNOWN-LABEL: name: bzero_constant_width_minsize ; UNKNOWN: liveins: $x0, $x1 ; UNKNOWN: %ptr:_(p0) = COPY $x0 ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0 ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256 - ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) ; UNKNOWN: RET_ReallyLR %ptr:_(p0) = COPY $x0 %zero:_(s8) = G_CONSTANT i8 0 %width:_(s64) = G_CONSTANT i64 256 - G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4) + G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store (s32)) RET_ReallyLR ... --- @@ -131,17 +131,17 @@ body: | ; DARWIN: %ptr:_(p0) = COPY $x0 ; DARWIN: %not_zero:_(s8) = G_CONSTANT i8 1 ; DARWIN: %width:_(s64) = G_CONSTANT i64 256 - ; DARWIN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4) + ; DARWIN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32)) ; DARWIN: RET_ReallyLR ; UNKNOWN-LABEL: name: not_zero ; UNKNOWN: liveins: $x0, $x1 ; UNKNOWN: %ptr:_(p0) = COPY $x0 ; UNKNOWN: %not_zero:_(s8) = G_CONSTANT i8 1 ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256 - ; UNKNOWN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4) + ; UNKNOWN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32)) ; UNKNOWN: RET_ReallyLR %ptr:_(p0) = COPY $x0 %not_zero:_(s8) = G_CONSTANT i8 1 %width:_(s64) = G_CONSTANT i64 256 - G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4) + G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store (s32)) RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir index 63e80af680d6d..2a9291634fe34 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-cornercases.mir @@ -75,7 +75,7 @@ body: | ; CHECK: [[T0:%[0-9]+]]:_(s32) = G_SEXTLOAD %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 - %2:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) %3:_(s32) = G_SEXT %2 %4:_(s32) = G_CONSTANT i32 1 %5:_(s1) = G_ICMP intpred(ne), %1:_(s32), %4:_ @@ -112,15 +112,15 @@ body: | # a test of the debug output and a test. # # CHECK-WORKLIST-LABEL: Generic MI Combiner for: multiple_copies -# CHECK-WORKLIST: Try combining [[IN0:%[0-9]+]]:_(s8) = G_LOAD [[IN1:%[0-9]+]]:_(p0){{.*}} :: (load 1 from %ir.addr) +# CHECK-WORKLIST: Try combining [[IN0:%[0-9]+]]:_(s8) = G_LOAD [[IN1:%[0-9]+]]:_(p0){{.*}} :: (load (s8) from %ir.addr) # CHECK-WORKLIST: Preferred use is: [[IN2:%[0-9]+]]:_(s32) = G_SEXT [[IN0]]:_(s8) -# CHECK-WORKLIST-DAG: Changing: [[IN0]]:_(s8) = G_LOAD [[IN1]]:_(p0){{.*}} :: (load 1 from %ir.addr) +# CHECK-WORKLIST-DAG: Changing: [[IN0]]:_(s8) = G_LOAD [[IN1]]:_(p0){{.*}} :: (load (s8) from %ir.addr) # CHECK-WORKLIST-DAG: Changing: [[IN3:%[0-9]+]]:_(s8) = G_ADD [[IN0]]:_, [[IN4:%[0-9]+]]:_ # CHECK-WORKLIST-DAG: Changed: [[IN3]]:_(s8) = G_ADD [[NEW1:%[0-9]+]]:_, [[IN4]]:_ # CHECK-WORKLIST-DAG: Changing: [[IN5:%[0-9]+]]:_(s8) = G_SUB [[IN0]]:_, [[IN6:%[0-9]+]]:_ # CHECK-WORKLIST-DAG: Changed: [[IN5]]:_(s8) = G_SUB [[NEW2:%[0-9]+]]:_, [[IN6]]:_ # CHECK-WORKLIST-DAG: Erasing: [[IN2]]:_(s32) = G_SEXT [[IN0]]:_(s8) -# CHECK-WORKLIST-DAG: Changed: [[IN2]]:_(s32) = G_SEXTLOAD [[IN1]]:_(p0){{.*}} :: (load 1 from %ir.addr) +# CHECK-WORKLIST-DAG: Changed: [[IN2]]:_(s32) = G_SEXTLOAD [[IN1]]:_(p0){{.*}} :: (load (s8) from %ir.addr) # CHECK-WORKLIST-DAG: Created: [[NEW1]]:_(s8) = G_TRUNC [[IN2]]:_(s32) # CHECK-WORKLIST-DAG: Created: [[NEW2]]:_(s8) = G_TRUNC [[IN2]]:_(s32) # CHECK-WORKLIST: Try combining @@ -140,7 +140,7 @@ body: | ; CHECK: [[T0:%[0-9]+]]:_(s32) = G_SEXTLOAD %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 - %2:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) ; CHECK: [[T4:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32) %3:_(s32) = G_SEXT %2 %4:_(s32) = G_CONSTANT i32 1 @@ -178,7 +178,7 @@ body: | ; CHECK: [[T0:%[0-9]+]]:_(s32) = G_SEXTLOAD %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 - %2:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) %3:_(s32) = G_CONSTANT i32 1 %4:_(s1) = G_ICMP intpred(ne), %1:_(s32), %3:_ G_BRCOND %4:_(s1), %bb.1 @@ -211,9 +211,9 @@ body: | $w0 = COPY %10 $w1 = COPY %11 # CHECK-WORKLIST-LABEL: Generic MI Combiner for: sink_to_phi_nondominating -# CHECK-WORKLIST: Try combining [[IN0:%[0-9]+]]:_(s8) = G_LOAD [[IN1:%[0-9]+]]:_(p0){{.*}} :: (load 1 from %ir.addr) +# CHECK-WORKLIST: Try combining [[IN0:%[0-9]+]]:_(s8) = G_LOAD [[IN1:%[0-9]+]]:_(p0){{.*}} :: (load (s8) from %ir.addr) # CHECK-WORKLIST: Preferred use is: [[IN2:%[0-9]+]]:_(s32) = G_SEXT [[IN0]]:_(s8) -# CHECK-WORKLIST-DAG: Changing: [[IN0]]:_(s8) = G_LOAD [[IN1]]:_(p0){{.*}} :: (load 1 from %ir.addr) +# CHECK-WORKLIST-DAG: Changing: [[IN0]]:_(s8) = G_LOAD [[IN1]]:_(p0){{.*}} :: (load (s8) from %ir.addr) # CHECK-WORKLIST-DAG: Creating: G_TRUNC # CHECK-WORKLIST-DAG: Changing: [[IN3:%[0-9]+]]:_(s8) = G_ADD [[IN0]]:_, [[IN4:%[0-9]+]]:_ # CHECK-WORKLIST-DAG: Changed: [[IN3]]:_(s8) = G_ADD [[OUT1:%[0-9]+]]:_, [[IN4]]:_ @@ -221,7 +221,7 @@ body: | # CHECK-WORKLIST-DAG: Changing: [[IN5:%[0-9]+]]:_(s8) = G_SUB [[IN0]]:_, [[IN6:%[0-9]+]]:_ # CHECK-WORKLIST-DAG: Changed: [[IN5]]:_(s8) = G_SUB [[OUT2:%[0-9]+]]:_, [[IN6]]:_ # CHECK-WORKLIST-DAG: Erasing: [[IN2]]:_(s32) = G_SEXT [[IN0]]:_(s8) -# CHECK-WORKLIST-DAG: Changed: [[IN2]]:_(s32) = G_SEXTLOAD [[IN1]]:_(p0){{.*}} :: (load 1 from %ir.addr) +# CHECK-WORKLIST-DAG: Changed: [[IN2]]:_(s32) = G_SEXTLOAD [[IN1]]:_(p0){{.*}} :: (load (s8) from %ir.addr) # CHECK-WORKLIST-DAG: Created: [[OUT1]]:_(s8) = G_TRUNC [[IN2]]:_(s32) # CHECK-WORKLIST-DAG: Created: [[OUT2]]:_(s8) = G_TRUNC [[IN2]]:_(s32) # CHECK-WORKLIST: Try combining @@ -238,7 +238,7 @@ body: | ; CHECK: [[T0:%[0-9]+]]:_(s32) = G_SEXTLOAD %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 - %2:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %2:_(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) %3:_(s32) = G_SEXT %2 %4:_(s32) = G_CONSTANT i32 1 %5:_(s1) = G_ICMP intpred(ne), %1:_(s32), %4:_ @@ -287,10 +287,10 @@ body: | bb.0.entry: liveins: $x0, $w1 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) - ; CHECK: %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) - G_STORE %1(s8), %0(p0) :: (store 1 into %ir.addr) - ; CHECK: G_STORE %1(s8), %0(p0) :: (store 1 into %ir.addr) + %1:_(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) + ; CHECK: %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr) + G_STORE %1(s8), %0(p0) :: (store (s8) into %ir.addr) + ; CHECK: G_STORE %1(s8), %0(p0) :: (store (s8) into %ir.addr) ... --- @@ -302,8 +302,8 @@ body: | bb.0.entry: liveins: $x0, $w1 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1 from %ir.addr) - ; CHECK: %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) + %1:_(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) + ; CHECK: %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.addr) G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.hint), %1(s8) ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.hint), %1(s8) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-s1.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-s1.mir index 48f520b154ea1..b8da4288c784e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-s1.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads-s1.mir @@ -24,12 +24,12 @@ body: | ; CHECK-LABEL: name: test ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.ptr) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[COPY]](p0) :: (load (s1) from %ir.ptr) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) ; CHECK: $w0 = COPY [[ZEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(s1) = G_LOAD %0(p0) :: (load 1 from %ir.ptr) + %1:_(s1) = G_LOAD %0(p0) :: (load (s1) from %ir.ptr) %2:_(s8) = G_ZEXT %1(s1) %3:_(s32) = G_ANYEXT %2(s8) $w0 = COPY %3(s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir index f89a899834191..627585e65ddfa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir @@ -7,10 +7,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_anyext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ANYEXT %1 $w0 = COPY %2 ... @@ -22,10 +22,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_anyext_with_copy ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s8) = COPY %1 %3:_(s32) = G_ANYEXT %1 $w0 = COPY %3 @@ -38,10 +38,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_signext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_SEXT %1 $w0 = COPY %2 ... @@ -53,10 +53,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_zeroext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[ZEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ZEXT %1 $w0 = COPY %2 ... @@ -68,11 +68,11 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_2anyext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[LOAD]](s32) ; CHECK: $w1 = COPY [[LOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ANYEXT %1 %3:_(s32) = G_ANYEXT %1 $w0 = COPY %2 @@ -86,11 +86,11 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_1anyext_1signext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[SEXTLOAD]](s32) ; CHECK: $w1 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ANYEXT %1 %3:_(s32) = G_SEXT %1 $w0 = COPY %2 @@ -104,7 +104,7 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_1xor_1signext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXTLOAD]](s32) ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1 ; CHECK: [[XOR:%[0-9]+]]:_(s8) = G_XOR [[TRUNC]], [[C]] @@ -112,7 +112,7 @@ body: | ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: $w1 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s8) = G_CONSTANT i8 -1 %3:_(s8) = G_XOR %1, %2 %5:_(s32) = G_ANYEXT %3 @@ -128,11 +128,11 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_1anyext_1zeroext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: $w0 = COPY [[ZEXTLOAD]](s32) ; CHECK: $w1 = COPY [[ZEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ANYEXT %1 %3:_(s32) = G_ZEXT %1 $w0 = COPY %2 @@ -146,13 +146,13 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_1signext_1zeroext ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXTLOAD]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) ; CHECK: $w0 = COPY [[ZEXT]](s32) ; CHECK: $w1 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ZEXT %1 %3:_(s32) = G_SEXT %1 $w0 = COPY %2 @@ -166,12 +166,12 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_1anyext64_1signext32 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SEXTLOAD]](s32) ; CHECK: $x0 = COPY [[ANYEXT]](s64) ; CHECK: $w1 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s64) = G_ANYEXT %1 %3:_(s32) = G_SEXT %1 $x0 = COPY %2 @@ -185,13 +185,13 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_1anyext32_1signext64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXTLOAD]](s64) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: $x1 = COPY [[SEXTLOAD]](s64) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ANYEXT %1 %3:_(s64) = G_SEXT %1 $w0 = COPY %2 @@ -205,7 +205,7 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_2anyext32_1signext64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXTLOAD]](s64) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8) @@ -213,7 +213,7 @@ body: | ; CHECK: $x1 = COPY [[SEXTLOAD]](s64) ; CHECK: $w2 = COPY [[ANYEXT1]](s32) %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ANYEXT %1 %3:_(s64) = G_SEXT %1 %4:_(s32) = G_ANYEXT %1 @@ -229,14 +229,14 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: G_BR %bb.1 ; CHECK: bb.1: ; CHECK: $w0 = COPY [[LOAD]](s32) bb.0: liveins: $x0 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) G_BR %bb.1 bb.1: %2:_(s32) = G_ANYEXT %1 @@ -250,14 +250,14 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: G_BR %bb.1 ; CHECK: bb.1: ; CHECK: $w0 = COPY [[SEXTLOAD]](s32) bb.0: liveins: $x0 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) G_BR %bb.1 bb.1: %2:_(s32) = G_SEXT %1 @@ -271,14 +271,14 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: G_BR %bb.1 ; CHECK: bb.1: ; CHECK: $w0 = COPY [[ZEXTLOAD]](s32) bb.0: liveins: $x0 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) G_BR %bb.1 bb.1: %2:_(s32) = G_ZEXT %1 @@ -292,7 +292,7 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: G_BR %bb.1 ; CHECK: bb.1: ; CHECK: $w0 = COPY [[LOAD]](s32) @@ -300,7 +300,7 @@ body: | bb.0: liveins: $x0 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %2:_(s32) = G_ANYEXT %1 G_BR %bb.1 bb.1: @@ -316,7 +316,7 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: G_BR %bb.1 ; CHECK: bb.1: ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SEXTLOAD]](s32) @@ -325,7 +325,7 @@ body: | bb.0: liveins: $x0 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) G_BR %bb.1 bb.1: %2:_(s64) = G_ANYEXT %1 @@ -341,7 +341,7 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: G_BR %bb.1 ; CHECK: bb.1: ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXTLOAD]](s64) @@ -351,7 +351,7 @@ body: | bb.0: liveins: $x0 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) G_BR %bb.1 bb.1: %2:_(s32) = G_ANYEXT %1 @@ -367,7 +367,7 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXTLOAD]](s64) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8) ; CHECK: G_BR %bb.1 @@ -380,7 +380,7 @@ body: | bb.0: liveins: $x0 %0:_(p0) = COPY $x0 - %1:_(s8) = G_LOAD %0 :: (load 1) + %1:_(s8) = G_LOAD %0 :: (load (s8)) %4:_(s32) = G_ANYEXT %1 G_BR %bb.1 bb.1: @@ -403,12 +403,12 @@ body: | ; CHECK-LABEL: name: test_atomic ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load unordered 2) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load unordered (s16)) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) ; CHECK: $w0 = COPY [[ZEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(s16) = G_LOAD %0(p0) :: (load unordered 2) + %1:_(s16) = G_LOAD %0(p0) :: (load unordered (s16)) %2:_(s32) = G_ZEXT %1(s16) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir index f89d8e31fa485..67a30ba2093c9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir @@ -12,14 +12,14 @@ body: | ; CHECK-LABEL: name: icmp_trunc_sextload ; CHECK: liveins: $x0 ; CHECK: %v:_(p0) = COPY $x0 - ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4) + ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load (s32)) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %load(s64), [[C]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %v:_(p0) = COPY $x0 - %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4) + %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load (s32)) %trunc:_(s32) = G_TRUNC %load(s64) %zero:_(s32) = G_CONSTANT i32 0 %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s32), %zero @@ -37,14 +37,14 @@ body: | ; CHECK-LABEL: name: icmp_trunc_sextload_eq ; CHECK: liveins: $x0 ; CHECK: %v:_(p0) = COPY $x0 - ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4) + ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load (s32)) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %load(s64), [[C]] ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %v:_(p0) = COPY $x0 - %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4) + %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load (s32)) %trunc:_(s32) = G_TRUNC %load(s64) %zero:_(s32) = G_CONSTANT i32 0 %cmp:_(s1) = G_ICMP intpred(eq), %trunc(s32), %zero @@ -62,7 +62,7 @@ body: | ; CHECK-LABEL: name: icmp_trunc_sextload_wrongpred ; CHECK: liveins: $x0 ; CHECK: %v:_(p0) = COPY $x0 - ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4) + ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load (s32)) ; CHECK: %trunc:_(s32) = G_TRUNC %load(s64) ; CHECK: %zero:_(s32) = G_CONSTANT i32 0 ; CHECK: %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero @@ -70,7 +70,7 @@ body: | ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %v:_(p0) = COPY $x0 - %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4) + %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load (s32)) %trunc:_(s32) = G_TRUNC %load(s64) %zero:_(s32) = G_CONSTANT i32 0 %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero @@ -88,7 +88,7 @@ body: | ; CHECK-LABEL: name: icmp_trunc_sextload_extend_mismatch ; CHECK: liveins: $x0 ; CHECK: %v:_(p0) = COPY $x0 - ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4) + ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load (s32)) ; CHECK: %trunc:_(s16) = G_TRUNC %load(s64) ; CHECK: %zero:_(s16) = G_CONSTANT i16 0 ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero @@ -96,7 +96,7 @@ body: | ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %v:_(p0) = COPY $x0 - %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4) + %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load (s32)) %trunc:_(s16) = G_TRUNC %load(s64) %zero:_(s16) = G_CONSTANT i16 0 %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-not-really-equiv-insts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-not-really-equiv-insts.mir index e387c5e58d6fb..b8187de6157bb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-not-really-equiv-insts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-not-really-equiv-insts.mir @@ -22,16 +22,16 @@ body: | ; CHECK-LABEL: name: not_necessarily_equiv_loads ; CHECK: %ptr:_(p0) = G_GLOBAL_VALUE @g - ; CHECK: %load1:_(s32) = G_LOAD %ptr(p0) :: (load 4 from @g) - ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (load 4 from @g) + ; CHECK: %load1:_(s32) = G_LOAD %ptr(p0) :: (load (s32) from @g) + ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (load (s32) from @g) ; CHECK: %or:_(s32) = G_OR %load2, %load1 - ; CHECK: G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + ; CHECK: G_STORE %or(s32), %ptr(p0) :: (store (s32) into @g) ; CHECK: RET_ReallyLR %ptr:_(p0) = G_GLOBAL_VALUE @g - %load1:_(s32) = G_LOAD %ptr(p0) :: (load 4 from @g) - %load2:_(s32) = G_LOAD %ptr(p0) :: (load 4 from @g) + %load1:_(s32) = G_LOAD %ptr(p0) :: (load (s32) from @g) + %load2:_(s32) = G_LOAD %ptr(p0) :: (load (s32) from @g) %or:_(s32) = G_OR %load2, %load1 - G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + G_STORE %or(s32), %ptr(p0) :: (store (s32) into @g) RET_ReallyLR ... @@ -46,14 +46,14 @@ body: | ; CHECK-LABEL: name: invariant_loads ; CHECK: %ptr:_(p0) = G_GLOBAL_VALUE @g - ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) - ; CHECK: G_STORE %load2(s32), %ptr(p0) :: (store 4 into @g) + ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load (s32) from @g) + ; CHECK: G_STORE %load2(s32), %ptr(p0) :: (store (s32) into @g) ; CHECK: RET_ReallyLR %ptr:_(p0) = G_GLOBAL_VALUE @g - %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) - %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) + %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load (s32) from @g) + %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load (s32) from @g) %or:_(s32) = G_OR %load2, %load1 - G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + G_STORE %or(s32), %ptr(p0) :: (store (s32) into @g) RET_ReallyLR ... @@ -68,15 +68,15 @@ body: | ; CHECK-LABEL: name: both_have_to_be_invariant ; CHECK: %ptr:_(p0) = G_GLOBAL_VALUE @g - ; CHECK: %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) - ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable load 4 from @g) + ; CHECK: %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load (s32) from @g) + ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable load (s32) from @g) ; CHECK: %or:_(s32) = G_OR %load2, %load1 - ; CHECK: G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + ; CHECK: G_STORE %or(s32), %ptr(p0) :: (store (s32) into @g) ; CHECK: RET_ReallyLR %ptr:_(p0) = G_GLOBAL_VALUE @g - %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) - %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable load 4 from @g) + %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load (s32) from @g) + %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable load (s32) from @g) %or:_(s32) = G_OR %load2, %load1 - G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + G_STORE %or(s32), %ptr(p0) :: (store (s32) into @g) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir index 1653d9157732f..4719bb320d4cd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir @@ -122,15 +122,15 @@ body: | ; CHECK: G_BR %bb.1 ; CHECK: bb.1: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16) + ; CHECK: %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>)) ; CHECK: G_BR %bb.3 ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16) + ; CHECK: %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>)) ; CHECK: bb.3: ; CHECK: %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.1, %ld2(<4 x s32>), %bb.2 ; CHECK: %ext:_(<4 x s64>) = G_SEXT %phi(<4 x s32>) - ; CHECK: G_STORE %ext(<4 x s64>), %ptr(p0) :: (store 16) + ; CHECK: G_STORE %ext(<4 x s64>), %ptr(p0) :: (store (<4 x s64>)) ; CHECK: RET_ReallyLR bb.1.entry: liveins: $x0, $q0, $q1 @@ -143,16 +143,16 @@ body: | G_BR %bb.3 bb.2: - %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16) + %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>)) G_BR %bb.4 bb.3: - %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16) + %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load (<4 x s32>)) bb.4: %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.2, %ld2(<4 x s32>), %bb.3 %ext:_(<4 x s64>) = G_SEXT %phi - G_STORE %ext(<4 x s64>), %ptr(p0) :: (store 16) + G_STORE %ext(<4 x s64>), %ptr(p0) :: (store (<4 x s64>)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir index 1b8dd93d48f11..b9a4a5fb580ee 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir @@ -13,12 +13,12 @@ body: | ; CHECK-LABEL: name: sextload_from_inreg ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load 1, align 2) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load (s8), align 2) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXTLOAD]](s16) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(s16) = G_LOAD %0(p0) :: (load 2) + %1:_(s16) = G_LOAD %0(p0) :: (load (s16)) %2:_(s16) = G_SEXT_INREG %1, 8 %3:_(s32) = G_ANYEXT %2(s16) $w0 = COPY %3(s32) @@ -39,14 +39,14 @@ body: | ; CHECK-LABEL: name: sextload_from_inreg_across_store ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load 1, align 2) - ; CHECK: G_STORE [[COPY]](p0), [[COPY]](p0) :: (store 8) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load (s8), align 2) + ; CHECK: G_STORE [[COPY]](p0), [[COPY]](p0) :: (store (p0)) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXTLOAD]](s16) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(s16) = G_LOAD %0(p0) :: (load 2) - G_STORE %0(p0), %0(p0) :: (store 8) + %1:_(s16) = G_LOAD %0(p0) :: (load (s16)) + G_STORE %0(p0), %0(p0) :: (store (p0)) %2:_(s16) = G_SEXT_INREG %1, 8 %3:_(s32) = G_ANYEXT %2(s16) $w0 = COPY %3(s32) @@ -66,12 +66,12 @@ body: | ; CHECK-LABEL: name: non_pow_2_inreg ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 24 ; CHECK: $w0 = COPY [[SEXT_INREG]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32)) %2:_(s32) = G_SEXT_INREG %1, 24 $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 @@ -90,13 +90,13 @@ body: | ; CHECK-LABEL: name: atomic ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load acquire 2) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load acquire (s16)) ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[LOAD]], 8 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(s16) = G_LOAD %0(p0) :: (load acquire 2) + %1:_(s16) = G_LOAD %0(p0) :: (load acquire (s16)) %2:_(s16) = G_SEXT_INREG %1, 8 %3:_(s32) = G_ANYEXT %2(s16) $w0 = COPY %3(s32) @@ -116,13 +116,13 @@ body: | ; CHECK-LABEL: name: volatile ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (volatile load 2) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (volatile load (s16)) ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[LOAD]], 8 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 - %1:_(s16) = G_LOAD %0(p0) :: (volatile load 2) + %1:_(s16) = G_LOAD %0(p0) :: (volatile load (s16)) %2:_(s16) = G_SEXT_INREG %1, 8 %3:_(s32) = G_ANYEXT %2(s16) $w0 = COPY %3(s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir index 01d55def986be..fff083a17dada 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir @@ -29,7 +29,7 @@ body: | ; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]] ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[FCVTHSr]], %subreg.hsub ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]] - ; CHECK: STRHHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`) + ; CHECK: STRHHui [[PHI]], [[DEF1]], 0 :: (store (s16) into `half* undef`) ; CHECK: B %bb.2 bb.0: successors: %bb.1(0x80000000) @@ -50,7 +50,7 @@ body: | %3:gpr(s16) = G_PHI %1(s16), %bb.1, %5(s16), %bb.2 %5:fpr(s16) = G_FPTRUNC %8(s32) - G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`) + G_STORE %3(s16), %4(p0) :: (store (s16) into `half* undef`) G_BR %bb.2 ... @@ -83,7 +83,7 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[PHI:%[0-9]+]]:fpr16 = PHI %7, %bb.2, [[COPY2]], %bb.1 ; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]] - ; CHECK: STRHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`) + ; CHECK: STRHui [[PHI]], [[DEF1]], 0 :: (store (s16) into `half* undef`) ; CHECK: B %bb.2 bb.0: successors: %bb.1(0x80000000) @@ -104,7 +104,7 @@ body: | %3:fpr(s16) = G_PHI %5(s16), %bb.2, %1(s16), %bb.1 %5:fpr(s16) = G_FPTRUNC %8(s32) - G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`) + G_STORE %3(s16), %4(p0) :: (store (s16) into `half* undef`) G_BR %bb.2 ... @@ -154,10 +154,10 @@ body: | ; CHECK: %use_gp_phi1:gpr32 = PHI %gpr_1, %bb.0, %gp_phi1, %bb.4 ; CHECK: %use_gp_phi2:gpr32 = PHI %gpr_1, %bb.0, %gp_phi2, %bb.4 ; CHECK: %use_gp_phi3:gpr32 = PHI %gpr_1, %bb.0, %gp_phi3, %bb.4 - ; CHECK: STRHHui %use_fp_phi, %ptr, 0 :: (store 2) - ; CHECK: STRHHui %use_gp_phi1, %ptr, 0 :: (store 2) - ; CHECK: STRHHui %use_gp_phi2, %ptr, 0 :: (store 2) - ; CHECK: STRHHui %use_gp_phi3, %ptr, 0 :: (store 2) + ; CHECK: STRHHui %use_fp_phi, %ptr, 0 :: (store (s16)) + ; CHECK: STRHHui %use_gp_phi1, %ptr, 0 :: (store (s16)) + ; CHECK: STRHHui %use_gp_phi2, %ptr, 0 :: (store (s16)) + ; CHECK: STRHHui %use_gp_phi3, %ptr, 0 :: (store (s16)) ; CHECK: RET_ReallyLR bb.1: successors: %bb.2, %bb.6 @@ -187,9 +187,9 @@ body: | %use_gp_phi1:gpr(s16) = G_PHI %gpr_1(s16), %bb.1, %gp_phi1(s16), %bb.5 %use_gp_phi2:gpr(s16) = G_PHI %gpr_1(s16), %bb.1, %gp_phi2(s16), %bb.5 %use_gp_phi3:gpr(s16) = G_PHI %gpr_1(s16), %bb.1, %gp_phi3(s16), %bb.5 - G_STORE %use_fp_phi(s16), %ptr(p0) :: (store 2) - G_STORE %use_gp_phi1(s16), %ptr(p0) :: (store 2) - G_STORE %use_gp_phi2(s16), %ptr(p0) :: (store 2) - G_STORE %use_gp_phi3(s16), %ptr(p0) :: (store 2) + G_STORE %use_fp_phi(s16), %ptr(p0) :: (store (s16)) + G_STORE %use_gp_phi1(s16), %ptr(p0) :: (store (s16)) + G_STORE %use_gp_phi2(s16), %ptr(p0) :: (store (s16)) + G_STORE %use_gp_phi3(s16), %ptr(p0) :: (store (s16)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-sext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-sext.mir index ce4b98334ed58..c7d01ff5a460e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-sext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-sext.mir @@ -135,12 +135,12 @@ body: | ; CHECK: %ptr:gpr(p0) = COPY $x0 ; CHECK: %copy:fpr(s32) = COPY $s0 ; CHECK: %copy_assert_sext:fpr(s32) = G_ASSERT_SEXT %copy, 16 - ; CHECK: G_STORE %copy_assert_sext(s32), %ptr(p0) :: (store 4) + ; CHECK: G_STORE %copy_assert_sext(s32), %ptr(p0) :: (store (s32)) ; CHECK: RET_ReallyLR %ptr:_(p0) = COPY $x0 %copy:_(s32) = COPY $s0 %copy_assert_sext:_(s32) = G_ASSERT_SEXT %copy(s32), 16 - G_STORE %copy_assert_sext(s32), %ptr(p0) :: (store 4) + G_STORE %copy_assert_sext(s32), %ptr(p0) :: (store (s32)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-zext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-zext.mir index d0b8e65d1fe42..592bd1f8d4a9f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-zext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-zext.mir @@ -135,12 +135,12 @@ body: | ; CHECK: %ptr:gpr(p0) = COPY $x0 ; CHECK: %copy:fpr(s32) = COPY $s0 ; CHECK: %copy_assert_zext:fpr(s32) = G_ASSERT_ZEXT %copy, 16 - ; CHECK: G_STORE %copy_assert_zext(s32), %ptr(p0) :: (store 4) + ; CHECK: G_STORE %copy_assert_zext(s32), %ptr(p0) :: (store (s32)) ; CHECK: RET_ReallyLR %ptr:_(p0) = COPY $x0 %copy:_(s32) = COPY $s0 %copy_assert_zext:_(s32) = G_ASSERT_ZEXT %copy(s32), 16 - G_STORE %copy_assert_zext(s32), %ptr(p0) :: (store 4) + G_STORE %copy_assert_zext(s32), %ptr(p0) :: (store (s32)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-ceil.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-ceil.mir index bab5884876359..e538aab695c1c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-ceil.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-ceil.mir @@ -13,12 +13,12 @@ body: | ; CHECK-LABEL: name: load_gets_fpr ; CHECK: liveins: $x0 ; CHECK: %ptr:gpr(p0) = COPY $x0 - ; CHECK: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load 4) + ; CHECK: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) ; CHECK: %fceil:fpr(s32) = G_FCEIL %load ; CHECK: $s0 = COPY %fceil(s32) ; CHECK: RET_ReallyLR implicit $s0 %ptr:_(p0) = COPY $x0 - %load:_(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:_(s32) = G_LOAD %ptr(p0) :: (load (s32)) %fceil:_(s32) = G_FCEIL %load $s0 = COPY %fceil:_(s32) RET_ReallyLR implicit $s0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir index 46177b4f1b1fb..a9aa47af2a341 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir @@ -24,13 +24,13 @@ body: | ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 ; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 2.000000e+00 - ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(uno), [[C]](s32), [[LOAD]] ; CHECK: $w0 = COPY [[FCMP]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 %1:_(s32) = G_FCONSTANT float 2.0 - %2:_(s32) = G_LOAD %0 :: (load 4) + %2:_(s32) = G_LOAD %0 :: (load (s32)) %3:_(s32) = G_FCMP floatpred(uno), %1, %2 $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 @@ -70,11 +70,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1 ; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32) - ; CHECK: G_STORE [[SITOFP]](s32), [[COPY]](p0) :: (store 4) + ; CHECK: G_STORE [[SITOFP]](s32), [[COPY]](p0) :: (store (s32)) %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s32) = G_SITOFP %1 - G_STORE %2, %0 :: (store 4) + G_STORE %2, %0 :: (store (s32)) ... --- @@ -120,7 +120,7 @@ body: | ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load 4) + ; CHECK: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %load(s32), %bb.1 @@ -137,7 +137,7 @@ body: | G_BR %bb.2 bb.1: successors: %bb.2 - %load:_(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:_(s32) = G_LOAD %ptr(p0) :: (load (s32)) G_BR %bb.2 bb.2: %phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %load(s32), %bb.1 @@ -162,7 +162,7 @@ body: | ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + ; CHECK: %load:gpr(s32) = G_LOAD %ptr(p0) :: (load (s32)) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1 @@ -179,7 +179,7 @@ body: | G_BR %bb.2 bb.1: successors: %bb.2 - %load:_(s32) = G_LOAD %ptr(p0) :: (load 4) + %load:_(s32) = G_LOAD %ptr(p0) :: (load (s32)) G_BR %bb.2 bb.2: %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1 @@ -375,12 +375,12 @@ body: | ; CHECK-LABEL: name: load_used_by_sitofp ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[LOAD]](s32) ; CHECK: $s0 = COPY [[SITOFP]](s32) ; CHECK: RET_ReallyLR implicit $s0 %0:_(p0) = COPY $x0 - %1:_(s32) = G_LOAD %0 :: (load 4) + %1:_(s32) = G_LOAD %0 :: (load (s32)) %2:_(s32) = G_SITOFP %1:_(s32) $s0 = COPY %2(s32) RET_ReallyLR implicit $s0 @@ -395,12 +395,12 @@ body: | ; CHECK-LABEL: name: load_used_by_uitofp ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CHECK: [[UITOFP:%[0-9]+]]:fpr(s32) = G_UITOFP [[LOAD]](s32) ; CHECK: $s0 = COPY [[UITOFP]](s32) ; CHECK: RET_ReallyLR implicit $s0 %0:_(p0) = COPY $x0 - %1:_(s32) = G_LOAD %0 :: (load 4) + %1:_(s32) = G_LOAD %0 :: (load (s32)) %2:_(s32) = G_UITOFP %1:_(s32) $s0 = COPY %2(s32) RET_ReallyLR implicit $s0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir index 7e89c9917b506..82cfb351baf4f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir @@ -38,12 +38,12 @@ body: | ; CHECK-LABEL: name: uaddlv_fpr_load ; CHECK: liveins: $x0 ; CHECK: %ptr:gpr(p0) = COPY $x0 - ; CHECK: %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8) + ; CHECK: %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load (<2 x s32>)) ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>) ; CHECK: $w0 = COPY %intrin(s32) ; CHECK: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 - %load:_(<2 x s32>) = G_LOAD %ptr :: (load 8) + %load:_(<2 x s32>) = G_LOAD %ptr :: (load (<2 x s32>)) %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>) $w0 = COPY %intrin(s32) RET_ReallyLR implicit $w0 @@ -63,8 +63,8 @@ body: | ; CHECK: %ptr:gpr(p0) = COPY $x0 ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s32>) = COPY %copy(<2 x s32>) ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[COPY]](<2 x s32>) - ; CHECK: G_STORE %intrin(s32), %ptr(p0) :: (store 4) + ; CHECK: G_STORE %intrin(s32), %ptr(p0) :: (store (s32)) %copy:_(<2 x s32>) = COPY $x0 %ptr:_(p0) = COPY $x0 %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<2 x s32>) - G_STORE %intrin, %ptr :: (store 4) + G_STORE %intrin, %ptr :: (store (s32)) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir index 6a9b309fdc004..eade999c3d524 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir @@ -92,8 +92,8 @@ body: | ; CHECK: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 344 ; CHECK: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4) - ; CHECK: [[LOAD1:%[0-9]+]]:fpr(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 4) + ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32)) + ; CHECK: [[LOAD1:%[0-9]+]]:fpr(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32)) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:fpr(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CHECK: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CHECK: RET_ReallyLR implicit $d0 @@ -102,8 +102,8 @@ body: | %5:_(p0) = G_PTR_ADD %0, %4(s64) %6:_(s64) = G_CONSTANT i64 344 %7:_(p0) = G_PTR_ADD %0, %6(s64) - %15:_(s32) = G_LOAD %5(p0) :: (load 4) - %20:_(s32) = G_LOAD %7(p0) :: (load 4) + %15:_(s32) = G_LOAD %5(p0) :: (load (s32)) + %20:_(s32) = G_LOAD %7(p0) :: (load (s32)) %21:_(<2 x s32>) = G_BUILD_VECTOR %15(s32), %20(s32) $d0 = COPY %21(<2 x s32>) RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir index 09884c75409e9..2ecde8b582b5e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir @@ -550,9 +550,9 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_load_s32_p0 ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) %0(p0) = COPY $x0 - %1(s32) = G_LOAD %0 :: (load 4) + %1(s32) = G_LOAD %0 :: (load (s32)) ... --- @@ -567,10 +567,10 @@ body: | ; CHECK-LABEL: name: test_store_s32_p0 ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1 - ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4) + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32)) %0(p0) = COPY $x0 %1(s32) = COPY $w1 - G_STORE %1, %0 :: (store 4) + G_STORE %1, %0 :: (store (s32)) ... --- diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll b/llvm/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll index 2d39203920b06..c83d3b34298b7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/ret-vec-promote.ll @@ -7,7 +7,7 @@ define <4 x i1> @ret_v4i1(<4 x i1> *%v) { ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s1>) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.v, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s1>) = G_LOAD [[COPY]](p0) :: (load (<4 x s1>) from %ir.v, align 4) ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[LOAD]](<4 x s1>) ; CHECK: $d0 = COPY [[ANYEXT]](<4 x s16>) ; CHECK: RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir index b8737ba1255da..80511ea18efc9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir @@ -655,7 +655,7 @@ body: | ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.sub_32 ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 15 ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] - ; CHECK: STRXroX [[COPY3]], [[COPY]], %zext, 0, 1 :: (store 8) + ; CHECK: STRXroX [[COPY3]], [[COPY]], %zext, 0, 1 :: (store (p0)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %1:gpr(s32) = COPY $w1 @@ -665,7 +665,7 @@ body: | %cst:gpr(s64) = G_CONSTANT i64 3 %shl:gpr(s64) = G_SHL %zext, %cst(s64) %gep:gpr(p0) = G_PTR_ADD %0, %shl(s64) - G_STORE %2(p0), %gep(p0) :: (store 8) + G_STORE %2(p0), %gep(p0) :: (store (p0)) RET_ReallyLR ... --- @@ -690,7 +690,7 @@ body: | ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.sub_32 ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 7 ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] - ; CHECK: STRXroX [[COPY3]], [[COPY]], %zext, 0, 1 :: (store 8) + ; CHECK: STRXroX [[COPY3]], [[COPY]], %zext, 0, 1 :: (store (p0)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %1:gpr(s32) = COPY $w1 @@ -700,7 +700,7 @@ body: | %cst:gpr(s64) = G_CONSTANT i64 3 %shl:gpr(s64) = G_SHL %zext, %cst(s64) %gep:gpr(p0) = G_PTR_ADD %0, %shl(s64) - G_STORE %2(p0), %gep(p0) :: (store 8) + G_STORE %2(p0), %gep(p0) :: (store (p0)) RET_ReallyLR ... --- @@ -725,7 +725,7 @@ body: | ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.sub_32 ; CHECK: %zext:gpr64 = SBFMXri [[SUBREG_TO_REG]], 0, 15 ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] - ; CHECK: STRXroX [[COPY3]], [[COPY]], %zext, 0, 1 :: (store 8) + ; CHECK: STRXroX [[COPY3]], [[COPY]], %zext, 0, 1 :: (store (p0)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %1:gpr(s32) = COPY $w1 @@ -735,7 +735,7 @@ body: | %cst:gpr(s64) = G_CONSTANT i64 3 %shl:gpr(s64) = G_SHL %zext, %cst(s64) %gep:gpr(p0) = G_PTR_ADD %0, %shl(s64) - G_STORE %2(p0), %gep(p0) :: (store 8) + G_STORE %2(p0), %gep(p0) :: (store (p0)) RET_ReallyLR ... --- @@ -760,7 +760,7 @@ body: | ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.sub_32 ; CHECK: %zext:gpr64 = SBFMXri [[SUBREG_TO_REG]], 0, 7 ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] - ; CHECK: STRXroX [[COPY3]], [[COPY]], %zext, 0, 1 :: (store 8) + ; CHECK: STRXroX [[COPY3]], [[COPY]], %zext, 0, 1 :: (store (p0)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %1:gpr(s32) = COPY $w1 @@ -770,6 +770,6 @@ body: | %cst:gpr(s64) = G_CONSTANT i64 3 %shl:gpr(s64) = G_SHL %zext, %cst(s64) %gep:gpr(p0) = G_PTR_ADD %0, %shl(s64) - G_STORE %2(p0), %gep(p0) :: (store 8) + G_STORE %2(p0), %gep(p0) :: (store (p0)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir index c3b63dba88f62..46e062c317650 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-atomic-load-store.mir @@ -26,11 +26,11 @@ body: | ; CHECK-LABEL: name: load_acq_i8 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire 1 from %ir.ptr, align 8) + ; CHECK: [[LDARB:%[0-9]+]]:gpr32 = LDARB [[COPY]] :: (load acquire (s8) from %ir.ptr, align 8) ; CHECK: $w0 = COPY [[LDARB]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 - %2:gpr(s32) = G_LOAD %0(p0) :: (load acquire 1 from %ir.ptr, align 8) + %2:gpr(s32) = G_LOAD %0(p0) :: (load acquire (s8) from %ir.ptr, align 8) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir index 7a670d9c52c94..c08334654be92 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir @@ -31,11 +31,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 - ; CHECK: [[SWPX:%[0-9]+]]:gpr64 = SWPX [[SUBREG_TO_REG]], [[COPY]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[SWPX:%[0-9]+]]:gpr64 = SWPX [[SUBREG_TO_REG]], [[COPY]] :: (load store monotonic (s64) on %ir.addr) ; CHECK: $x0 = COPY [[SWPX]] %0:gpr(p0) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 1 - %2:gpr(s64) = G_ATOMICRMW_XCHG %0, %1 :: (load store monotonic 8 on %ir.addr) + %2:gpr(s64) = G_ATOMICRMW_XCHG %0, %1 :: (load store monotonic (s64) on %ir.addr) $x0 = COPY %2(s64) ... --- @@ -51,11 +51,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 - ; CHECK: [[LDADDX:%[0-9]+]]:gpr64 = LDADDX [[SUBREG_TO_REG]], [[COPY]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[LDADDX:%[0-9]+]]:gpr64 = LDADDX [[SUBREG_TO_REG]], [[COPY]] :: (load store monotonic (s64) on %ir.addr) ; CHECK: $x0 = COPY [[LDADDX]] %0:gpr(p0) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 1 - %2:gpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 8 on %ir.addr) + %2:gpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic (s64) on %ir.addr) $x0 = COPY %2(s64) ... --- @@ -70,11 +70,11 @@ body: | ; CHECK-LABEL: name: atomicrmw_add_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[LDADDALW:%[0-9]+]]:gpr32 = LDADDALW [[MOVi32imm]], [[COPY]] :: (load store seq_cst 4 on %ir.addr) + ; CHECK: [[LDADDALW:%[0-9]+]]:gpr32 = LDADDALW [[MOVi32imm]], [[COPY]] :: (load store seq_cst (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDADDALW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -90,11 +90,11 @@ body: | ; CHECK-LABEL: name: atomicrmw_sub_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[LDADDALW:%[0-9]+]]:gpr32 = LDADDALW [[MOVi32imm]], [[COPY]] :: (load store seq_cst 4 on %ir.addr) + ; CHECK: [[LDADDALW:%[0-9]+]]:gpr32 = LDADDALW [[MOVi32imm]], [[COPY]] :: (load store seq_cst (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDADDALW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -111,11 +111,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr $wzr, [[MOVi32imm]] - ; CHECK: [[LDCLRAW:%[0-9]+]]:gpr32 = LDCLRAW [[ORNWrr]], [[COPY]] :: (load store acquire 4 on %ir.addr) + ; CHECK: [[LDCLRAW:%[0-9]+]]:gpr32 = LDCLRAW [[ORNWrr]], [[COPY]] :: (load store acquire (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDCLRAW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_AND %0, %1 :: (load store acquire 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_AND %0, %1 :: (load store acquire (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -131,11 +131,11 @@ body: | ; CHECK-LABEL: name: atomicrmw_or_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[LDSETLW:%[0-9]+]]:gpr32 = LDSETLW [[MOVi32imm]], [[COPY]] :: (load store release 4 on %ir.addr) + ; CHECK: [[LDSETLW:%[0-9]+]]:gpr32 = LDSETLW [[MOVi32imm]], [[COPY]] :: (load store release (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDSETLW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_OR %0, %1 :: (load store release 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_OR %0, %1 :: (load store release (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -151,11 +151,11 @@ body: | ; CHECK-LABEL: name: atomicrmw_xor_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[LDEORALW:%[0-9]+]]:gpr32 = LDEORALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: [[LDEORALW:%[0-9]+]]:gpr32 = LDEORALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDEORALW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store acq_rel 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store acq_rel (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -171,11 +171,11 @@ body: | ; CHECK-LABEL: name: atomicrmw_min_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[LDSMINALW:%[0-9]+]]:gpr32 = LDSMINALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: [[LDSMINALW:%[0-9]+]]:gpr32 = LDSMINALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDSMINALW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store acq_rel 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store acq_rel (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -191,11 +191,11 @@ body: | ; CHECK-LABEL: name: atomicrmw_max_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[LDSMAXALW:%[0-9]+]]:gpr32 = LDSMAXALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: [[LDSMAXALW:%[0-9]+]]:gpr32 = LDSMAXALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDSMAXALW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store acq_rel 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store acq_rel (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -211,11 +211,11 @@ body: | ; CHECK-LABEL: name: atomicrmw_umin_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[LDUMINALW:%[0-9]+]]:gpr32 = LDUMINALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: [[LDUMINALW:%[0-9]+]]:gpr32 = LDUMINALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDUMINALW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store acq_rel 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store acq_rel (s32) on %ir.addr) $w0 = COPY %2(s32) ... @@ -231,10 +231,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_umax_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[LDUMAXALW:%[0-9]+]]:gpr32 = LDUMAXALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel 4 on %ir.addr) + ; CHECK: [[LDUMAXALW:%[0-9]+]]:gpr32 = LDUMAXALW [[MOVi32imm]], [[COPY]] :: (load store acq_rel (s32) on %ir.addr) ; CHECK: $w0 = COPY [[LDUMAXALW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 1 - %2:gpr(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store acq_rel 4 on %ir.addr) + %2:gpr(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store acq_rel (s32) on %ir.addr) $w0 = COPY %2(s32) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir index 45012f23de620..1f63da4d12c59 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir @@ -32,7 +32,7 @@ body: | ; CHECK: bb.0 (%ir-block.0): ; CHECK: [[MOVaddrBA:%[0-9]+]]:gpr64 = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block) ; CHECK: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr - ; CHECK: STRXui [[MOVaddrBA]], [[MOVaddr]], 0 :: (store 8 into @addr) + ; CHECK: STRXui [[MOVaddrBA]], [[MOVaddr]], 0 :: (store (p0) into @addr) ; CHECK: BR [[MOVaddrBA]] ; CHECK: bb.1.block (address-taken): ; CHECK: RET_ReallyLR @@ -46,14 +46,14 @@ body: | ; LARGE: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @addr, 16 ; LARGE: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @addr, 32 ; LARGE: [[MOVKXi5:%[0-9]+]]:gpr64common = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @addr, 48 - ; LARGE: STRXui [[MOVKXi2]], [[MOVKXi5]], 0 :: (store 8 into @addr) + ; LARGE: STRXui [[MOVKXi2]], [[MOVKXi5]], 0 :: (store (p0) into @addr) ; LARGE: BR [[MOVKXi2]] ; LARGE: bb.1.block (address-taken): ; LARGE: RET_ReallyLR bb.1 (%ir-block.0): %0:gpr(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block) %1:gpr(p0) = G_GLOBAL_VALUE @addr - G_STORE %0(p0), %1(p0) :: (store 8 into @addr) + G_STORE %0(p0), %1(p0) :: (store (p0) into @addr) G_BRINDIRECT %0(p0) bb.2.block (address-taken): diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-binop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-binop.mir index 9d480b8e96e02..d8e4a549148fd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-binop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-brcond-of-binop.mir @@ -11,7 +11,7 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8)) ; CHECK: TBZW [[LDRBBui]], 0, %bb.2 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -22,7 +22,7 @@ body: | liveins: $x0 %0:gpr(p0) = COPY $x0 - %8:gpr(s8) = G_LOAD %0(p0) :: (load 1) + %8:gpr(s8) = G_LOAD %0(p0) :: (load (s8)) %4:gpr(s32) = G_ANYEXT %8(s8) %5:gpr(s32) = G_CONSTANT i32 1 %6:gpr(s32) = G_XOR %4, %5 @@ -47,7 +47,7 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8)) ; CHECK: TBZW [[LDRBBui]], 0, %bb.2 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -61,7 +61,7 @@ body: | liveins: $x0 %0:gpr(p0) = COPY $x0 - %8:gpr(s8) = G_LOAD %0(p0) :: (load 1) + %8:gpr(s8) = G_LOAD %0(p0) :: (load (s8)) %4:gpr(s64) = G_ANYEXT %8(s8) %5:gpr(s64) = G_CONSTANT i64 1 %6:gpr(s64) = G_XOR %4, %5 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir index 2cb6e5a253c20..f7b78b1e0f094 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cbz.mir @@ -123,7 +123,7 @@ body: | ; CHECK: CBZX [[COPY]], %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8) + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store (s64)) ; CHECK: bb.2: ; CHECK: RET_ReallyLR bb.1: @@ -139,7 +139,7 @@ body: | bb.2: %5:gpr(s64) = G_CONSTANT i64 0 - G_STORE %5(s64), %0(p0) :: (store 8) + G_STORE %5(s64), %0(p0) :: (store (s64)) bb.3: RET_ReallyLR @@ -157,12 +157,12 @@ body: | ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY]], 0 :: (load 8) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY]], 0 :: (load (s64)) ; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[LDRXui]], 42, 0, implicit-def $nzcv ; CHECK: Bcc 0, %bb.2, implicit $nzcv ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8) + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store (s64)) ; CHECK: bb.2: ; CHECK: RET_ReallyLR bb.1: @@ -172,14 +172,14 @@ body: | %0:gpr(p0) = COPY $x0 %2:gpr(s64) = G_CONSTANT i64 42 %4:gpr(s64) = G_CONSTANT i64 0 - %1:gpr(s64) = G_LOAD %0(p0) :: (load 8) + %1:gpr(s64) = G_LOAD %0(p0) :: (load (s64)) %5:gpr(s32) = G_ICMP intpred(eq), %1(s64), %2 %3:gpr(s1) = G_TRUNC %5(s32) G_BRCOND %3(s1), %bb.3 bb.2: %6:gpr(s64) = G_CONSTANT i64 0 - G_STORE %6(s64), %0(p0) :: (store 8) + G_STORE %6(s64), %0(p0) :: (store (s64)) bb.3: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir index 574364a0ead76..dd2b15a6a560d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir @@ -21,12 +21,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CASW:%[0-9]+]]:gpr32 = CASW [[COPY1]], [[MOVi32imm]], [[COPY]] :: (load store monotonic 4 on %ir.addr) + ; CHECK: [[CASW:%[0-9]+]]:gpr32 = CASW [[COPY1]], [[MOVi32imm]], [[COPY]] :: (load store monotonic (s32) on %ir.addr) ; CHECK: $w0 = COPY [[CASW]] %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 0 %2:gpr(s32) = G_CONSTANT i32 1 - %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 4 on %ir.addr) + %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s32) on %ir.addr) $w0 = COPY %3(s32) ... @@ -44,11 +44,11 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $xzr ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 - ; CHECK: [[CASX:%[0-9]+]]:gpr64 = CASX [[COPY1]], [[SUBREG_TO_REG]], [[COPY]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[CASX:%[0-9]+]]:gpr64 = CASX [[COPY1]], [[SUBREG_TO_REG]], [[COPY]] :: (load store monotonic (s64) on %ir.addr) ; CHECK: $x0 = COPY [[CASX]] %0:gpr(p0) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 0 %2:gpr(s64) = G_CONSTANT i64 1 - %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic (s64) on %ir.addr) $x0 = COPY %3(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-const-pool.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-const-pool.mir index 0e4e364e55714..041b76c54a5f8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-const-pool.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-const-pool.mir @@ -18,14 +18,14 @@ body: | ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load 16 from constant-pool) - ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s128) from constant-pool) + ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store (<2 x s64>)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %3:fpr(s64) = G_FCONSTANT double 5.000000e-01 %2:fpr(s64) = G_FCONSTANT double 1.600000e+01 %1:fpr(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64) - G_STORE %1(<2 x s64>), %0(p0) :: (store 16) + G_STORE %1(<2 x s64>), %0(p0) :: (store (<2 x s64>)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-const-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-const-vector.mir index 6a34bea17a422..038c214a268d8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-const-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-const-vector.mir @@ -20,13 +20,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store 16) + ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store (<2 x s64>)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %3:fpr(s64) = G_FCONSTANT double 5.000000e-01 %2:fpr(s64) = G_FCONSTANT double 1.600000e+01 %1:fpr(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64) - G_STORE %1(<2 x s64>), %0(p0) :: (store 16) + G_STORE %1(<2 x s64>), %0(p0) :: (store (<2 x s64>)) RET_ReallyLR ... @@ -49,13 +49,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store 16) + ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store (<4 x s32>)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %3:fpr(s32) = G_FCONSTANT float 5.000000e-01 %2:fpr(s32) = G_FCONSTANT float 1.600000e+01 %1:fpr(<4 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %3(s32), %3(s32) - G_STORE %1(<4 x s32>), %0(p0) :: (store 16) + G_STORE %1(<4 x s32>), %0(p0) :: (store (<4 x s32>)) RET_ReallyLR ... @@ -78,13 +78,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store 16) + ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store (<2 x s64>)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %3:gpr(s64) = G_CONSTANT i64 67839 %2:gpr(s64) = G_CONSTANT i64 12375 %1:fpr(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64) - G_STORE %1(<2 x s64>), %0(p0) :: (store 16) + G_STORE %1(<2 x s64>), %0(p0) :: (store (<2 x s64>)) RET_ReallyLR ... @@ -107,13 +107,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store 16) + ; CHECK: STRQui [[LDRQui]], [[COPY]], 0 :: (store (<4 x s32>)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %3:gpr(s32) = G_CONSTANT i32 67839 %2:gpr(s32) = G_CONSTANT i32 12375 %1:fpr(<4 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %2(s32), %3(s32) - G_STORE %1(<4 x s32>), %0(p0) :: (store 16) + G_STORE %1(<4 x s32>), %0(p0) :: (store (<4 x s32>)) RET_ReallyLR ... @@ -137,13 +137,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: STRDui [[LDRDui]], [[COPY]], 0 :: (store 8) + ; CHECK: STRDui [[LDRDui]], [[COPY]], 0 :: (store (<2 x s32>)) ; CHECK: RET_ReallyLR %0:gpr(p0) = COPY $x0 %3:gpr(s32) = G_CONSTANT i32 67839 %2:gpr(s32) = G_CONSTANT i32 12375 %1:fpr(<2 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32) - G_STORE %1(<2 x s32>), %0(p0) :: (store 8) + G_STORE %1(<2 x s32>), %0(p0) :: (store (<2 x s32>)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extload.mir index e3b8f7fb40bc1..4ebeb2a773f30 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-extload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extload.mir @@ -20,10 +20,10 @@ body: | ; CHECK-LABEL: name: aextload_s32_from_s16 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: $w0 = COPY [[T0]] %0:gpr(p0) = COPY $x0 - %1:gpr(s32) = G_LOAD %0 :: (load 2 from %ir.addr) + %1:gpr(s32) = G_LOAD %0 :: (load (s16) from %ir.addr) $w0 = COPY %1(s32) ... @@ -38,11 +38,11 @@ body: | ; CHECK-LABEL: name: aextload_s32_from_s16 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: [[T1:%[0-9]+]]:gpr32all = COPY [[T0]] ; CHECK: $w0 = COPY [[T1]] %0:gpr(p0) = COPY $x0 - %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) + %1:gpr(s16) = G_LOAD %0 :: (load (s16) from %ir.addr) %2:gpr(s32) = G_ANYEXT %1 $w0 = COPY %2(s32) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fmul-indexed.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fmul-indexed.mir index e528adad6a75c..3d0beffe26581 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fmul-indexed.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fmul-indexed.mir @@ -20,7 +20,7 @@ body: | ; CHECK: liveins: $d0, $x0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load 8, align 4) + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (<2 x s32>), align 4) ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[LDRDui]], %subreg.dsub ; CHECK: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY]], [[INSERT_SUBREG]], 0 @@ -28,7 +28,7 @@ body: | ; CHECK: RET_ReallyLR implicit $d0 %0:fpr(<2 x s32>) = COPY $d0 %1:gpr(p0) = COPY $x0 - %2:fpr(<2 x s32>) = G_LOAD %1(p0) :: (load 8, align 4) + %2:fpr(<2 x s32>) = G_LOAD %1(p0) :: (load (<2 x s32>), align 4) %9:fpr(<2 x s32>) = G_IMPLICIT_DEF %10:fpr(<4 x s32>) = G_CONCAT_VECTORS %2(<2 x s32>), %9(<2 x s32>) %8:gpr(s64) = G_CONSTANT i64 0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir index 9eede217d9574..d503074f9f693 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir @@ -40,9 +40,9 @@ body: | ; CHECK: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32 ; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]] - ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store 4 into %ir.retval) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) - ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] ; CHECK: $w0 = COPY [[ADDWrr]] ; CHECK: RET_ReallyLR implicit $w0 @@ -52,9 +52,9 @@ body: | %7:gpr(p0) = G_GLOBAL_VALUE @foo2 %6:gpr(p0) = COPY %7(p0) %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval - G_STORE %1(s32), %0(p0) :: (store 4 into %ir.retval) - %2:gpr(s32) = G_LOAD %3(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) - %5:gpr(s32) = G_LOAD %6(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.retval) + %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) %8:gpr(s32) = G_ADD %2, %5 $w0 = COPY %8(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir index 92d044dbdb6b6..64aee2d0afbf4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir @@ -34,9 +34,9 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY [[ADR]] ; CHECK: [[ADR1:%[0-9]+]]:gpr64 = ADR @foo2 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[ADR1]] - ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store 4 into %ir.retval) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) - ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] ; CHECK: $w0 = COPY [[ADDWrr]] ; CHECK: RET_ReallyLR implicit $w0 @@ -46,9 +46,9 @@ body: | %7:gpr(p0) = G_GLOBAL_VALUE @foo2 %6:gpr(p0) = COPY %7(p0) %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval - G_STORE %1(s32), %0(p0) :: (store 4 into %ir.retval) - %2:gpr(s32) = G_LOAD %3(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) - %5:gpr(s32) = G_LOAD %6(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.retval) + %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) %8:gpr(s32) = G_ADD %2, %5 $w0 = COPY %8(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir index 440a03173c83c..affaef15856e7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir @@ -24,7 +24,7 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK: [[DEF:%[0-9]+]]:gpr64common = IMPLICIT_DEF - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[DEF]], 0 :: (load 1) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[DEF]], 0 :: (load (s8)) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64common = SUBREG_TO_REG 0, [[LDRBBui]], %subreg.sub_32 ; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBREG_TO_REG]], 8, 0, implicit-def $nzcv ; CHECK: Bcc 8, %bb.3, implicit $nzcv @@ -40,7 +40,7 @@ body: | ; CHECK: RET_ReallyLR bb.1: %1:gpr(p0) = G_IMPLICIT_DEF - %5:gpr(s64) = G_ZEXTLOAD %1(p0) :: (load 1) + %5:gpr(s64) = G_ZEXTLOAD %1(p0) :: (load (s8)) %7:gpr(s64) = G_CONSTANT i64 8 %16:gpr(s32) = G_ICMP intpred(ugt), %5(s64), %7 %8:gpr(s1) = G_TRUNC %16(s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir index e6550647524e2..9dbbc93ba5cfa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ldaxr-intrin.mir @@ -20,12 +20,12 @@ body: | ; CHECK-LABEL: name: test_load_acquire_i8 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDAXRB:%[0-9]+]]:gpr32 = LDAXRB [[COPY]] :: (volatile load 1 from %ir.addr) + ; CHECK: [[LDAXRB:%[0-9]+]]:gpr32 = LDAXRB [[COPY]] :: (volatile load (s8) from %ir.addr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDAXRB]], %subreg.sub_32 ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x1 %0:gpr(p0) = COPY $x0 - %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 1 from %ir.addr) + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load (s8) from %ir.addr) $x1 = COPY %1 RET_ReallyLR implicit $x1 @@ -42,12 +42,12 @@ body: | ; CHECK-LABEL: name: test_load_acquire_i16 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDAXRH:%[0-9]+]]:gpr32 = LDAXRH [[COPY]] :: (volatile load 2 from %ir.addr) + ; CHECK: [[LDAXRH:%[0-9]+]]:gpr32 = LDAXRH [[COPY]] :: (volatile load (s16) from %ir.addr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDAXRH]], %subreg.sub_32 ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x1 %0:gpr(p0) = COPY $x0 - %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 2 from %ir.addr) + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load (s16) from %ir.addr) $x1 = COPY %1 RET_ReallyLR implicit $x1 @@ -64,12 +64,12 @@ body: | ; CHECK-LABEL: name: test_load_acquire_i32 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDAXRW:%[0-9]+]]:gpr32 = LDAXRW [[COPY]] :: (volatile load 4 from %ir.addr) + ; CHECK: [[LDAXRW:%[0-9]+]]:gpr32 = LDAXRW [[COPY]] :: (volatile load (s32) from %ir.addr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDAXRW]], %subreg.sub_32 ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x1 %0:gpr(p0) = COPY $x0 - %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 4 from %ir.addr) + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load (s32) from %ir.addr) $x1 = COPY %1 RET_ReallyLR implicit $x1 @@ -85,10 +85,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: test_load_acquire_i64 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDAXRX:%[0-9]+]]:gpr64 = LDAXRX [[COPY]] :: (volatile load 8 from %ir.addr) + ; CHECK: [[LDAXRX:%[0-9]+]]:gpr64 = LDAXRX [[COPY]] :: (volatile load (s64) from %ir.addr) ; CHECK: $x1 = COPY [[LDAXRX]] ; CHECK: RET_ReallyLR implicit $x1 %0:gpr(p0) = COPY $x0 - %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load 8 from %ir.addr) + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldaxr), %0(p0) :: (volatile load (s64) from %ir.addr) $x1 = COPY %1 RET_ReallyLR implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ldxr-intrin.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ldxr-intrin.mir index 13deedddc8445..8e4e07cae8ab8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ldxr-intrin.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ldxr-intrin.mir @@ -19,12 +19,12 @@ body: | ; CHECK-LABEL: name: test_load_i8 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDXRB:%[0-9]+]]:gpr32 = LDXRB [[COPY]] :: (volatile load 1 from %ir.addr) + ; CHECK: [[LDXRB:%[0-9]+]]:gpr32 = LDXRB [[COPY]] :: (volatile load (s8) from %ir.addr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDXRB]], %subreg.sub_32 ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x1 %0:gpr(p0) = COPY $x0 - %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), %0(p0) :: (volatile load 1 from %ir.addr) + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), %0(p0) :: (volatile load (s8) from %ir.addr) $x1 = COPY %1(s64) RET_ReallyLR implicit $x1 @@ -41,12 +41,12 @@ body: | ; CHECK-LABEL: name: test_load_i16 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDXRH:%[0-9]+]]:gpr32 = LDXRH [[COPY]] :: (volatile load 2 from %ir.addr) + ; CHECK: [[LDXRH:%[0-9]+]]:gpr32 = LDXRH [[COPY]] :: (volatile load (s16) from %ir.addr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDXRH]], %subreg.sub_32 ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x1 %0:gpr(p0) = COPY $x0 - %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), %0(p0) :: (volatile load 2 from %ir.addr) + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), %0(p0) :: (volatile load (s16) from %ir.addr) $x1 = COPY %1(s64) RET_ReallyLR implicit $x1 @@ -63,12 +63,12 @@ body: | ; CHECK-LABEL: name: test_load_i32 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDXRW:%[0-9]+]]:gpr32 = LDXRW [[COPY]] :: (volatile load 4 from %ir.addr) + ; CHECK: [[LDXRW:%[0-9]+]]:gpr32 = LDXRW [[COPY]] :: (volatile load (s32) from %ir.addr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDXRW]], %subreg.sub_32 ; CHECK: $x1 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x1 %0:gpr(p0) = COPY $x0 - %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), %0(p0) :: (volatile load 4 from %ir.addr) + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), %0(p0) :: (volatile load (s32) from %ir.addr) $x1 = COPY %1(s64) RET_ReallyLR implicit $x1 @@ -86,10 +86,10 @@ body: | ; CHECK-LABEL: name: test_load_i64 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDXRX:%[0-9]+]]:gpr64 = LDXRX [[COPY]] :: (volatile load 8 from %ir.addr) + ; CHECK: [[LDXRX:%[0-9]+]]:gpr64 = LDXRX [[COPY]] :: (volatile load (s64) from %ir.addr) ; CHECK: $x1 = COPY [[LDXRX]] ; CHECK: RET_ReallyLR implicit $x1 %0:gpr(p0) = COPY $x0 - %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), %0(p0) :: (volatile load 8 from %ir.addr) + %1:gpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), %0(p0) :: (volatile load (s64) from %ir.addr) $x1 = COPY %1(s64) RET_ReallyLR implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load-store-vector-of-ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load-store-vector-of-ptr.mir index d46102a6ac825..e2659d379cd55 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load-store-vector-of-ptr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load-store-vector-of-ptr.mir @@ -34,12 +34,12 @@ body: | ; CHECK: liveins: $q0, $x0 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store 16 into %ir.ptr) + ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store (<2 x s64>) into %ir.ptr) ; CHECK: RET_ReallyLR %0:fpr(<2 x p0>) = COPY $q0 %1:gpr(p0) = COPY $x0 %2:fpr(<2 x s64>) = G_BITCAST %0(<2 x p0>) - G_STORE %2(<2 x s64>), %1(p0) :: (store 16 into %ir.ptr) + G_STORE %2(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.ptr) RET_ReallyLR ... @@ -61,11 +61,11 @@ body: | ; CHECK-LABEL: name: load_v2p0 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.ptr) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<2 x s64>) from %ir.ptr) ; CHECK: $q0 = COPY [[LDRQui]] ; CHECK: RET_ReallyLR implicit $q0 %0:gpr(p0) = COPY $x0 - %2:fpr(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + %2:fpr(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.ptr) %1:fpr(<2 x p0>) = G_BITCAST %2(<2 x s64>) $q0 = COPY %1(<2 x p0>) RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir index b6a329b04d07d..4339005ea7f99 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -56,10 +56,10 @@ body: | ; CHECK-LABEL: name: load_s64_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 0 :: (load (s64) from %ir.addr) ; CHECK: $x0 = COPY [[LDRXui]] %0(p0) = COPY $x0 - %1(s64) = G_LOAD %0 :: (load 8 from %ir.addr) + %1(s64) = G_LOAD %0 :: (load (s64) from %ir.addr) $x0 = COPY %1(s64) ... @@ -78,10 +78,10 @@ body: | ; CHECK-LABEL: name: load_s32_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.addr) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.addr) ; CHECK: $w0 = COPY [[LDRWui]] %0(p0) = COPY $x0 - %1(s32) = G_LOAD %0 :: (load 4 from %ir.addr) + %1(s32) = G_LOAD %0 :: (load (s32) from %ir.addr) $w0 = COPY %1(s32) ... @@ -96,10 +96,10 @@ body: | ; CHECK-LABEL: name: load_s16_gpr_anyext ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: $w0 = COPY [[LDRHHui]] %0:gpr(p0) = COPY $x0 - %1:gpr(s32) = G_LOAD %0 :: (load 2 from %ir.addr) + %1:gpr(s32) = G_LOAD %0 :: (load (s16) from %ir.addr) $w0 = COPY %1(s32) ... @@ -118,11 +118,11 @@ body: | ; CHECK-LABEL: name: load_s16_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] ; CHECK: $w0 = COPY [[COPY1]] %0(p0) = COPY $x0 - %1(s16) = G_LOAD %0 :: (load 2 from %ir.addr) + %1(s16) = G_LOAD %0 :: (load (s16) from %ir.addr) %2:gpr(s32) = G_ANYEXT %1 $w0 = COPY %2(s32) ... @@ -138,10 +138,10 @@ body: | ; CHECK-LABEL: name: load_s8_gpr_anyext ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr) ; CHECK: $w0 = COPY [[LDRBBui]] %0:gpr(p0) = COPY $x0 - %1:gpr(s32) = G_LOAD %0 :: (load 1 from %ir.addr) + %1:gpr(s32) = G_LOAD %0 :: (load (s8) from %ir.addr) $w0 = COPY %1(s32) ... @@ -160,11 +160,11 @@ body: | ; CHECK-LABEL: name: load_s8_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.addr) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] ; CHECK: $w0 = COPY [[COPY1]] %0(p0) = COPY $x0 - %1(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %1(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) %2:gpr(s32) = G_ANYEXT %1 $w0 = COPY %2(s32) ... @@ -186,10 +186,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: load_fi_s64_gpr - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load 8) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load (s64)) ; CHECK: $x0 = COPY [[LDRXui]] %0(p0) = G_FRAME_INDEX %stack.0.ptr0 - %1(s64) = G_LOAD %0 :: (load 8) + %1(s64) = G_LOAD %0 :: (load (s64)) $x0 = COPY %1(s64) ... @@ -210,12 +210,12 @@ body: | ; CHECK-LABEL: name: load_gep_128_s64_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 16 :: (load 8 from %ir.addr) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 16 :: (load (s64) from %ir.addr) ; CHECK: $x0 = COPY [[LDRXui]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 128 %2(p0) = G_PTR_ADD %0, %1 - %3(s64) = G_LOAD %2 :: (load 8 from %ir.addr) + %3(s64) = G_LOAD %2 :: (load (s64) from %ir.addr) $x0 = COPY %3 ... @@ -236,12 +236,12 @@ body: | ; CHECK-LABEL: name: load_gep_512_s32_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 128 :: (load 4 from %ir.addr) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 128 :: (load (s32) from %ir.addr) ; CHECK: $w0 = COPY [[LDRWui]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 512 %2(p0) = G_PTR_ADD %0, %1 - %3(s32) = G_LOAD %2 :: (load 4 from %ir.addr) + %3(s32) = G_LOAD %2 :: (load (s32) from %ir.addr) $w0 = COPY %3 ... @@ -262,13 +262,13 @@ body: | ; CHECK-LABEL: name: load_gep_64_s16_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load 2 from %ir.addr) + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load (s16) from %ir.addr) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] ; CHECK: $w0 = COPY [[COPY1]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 64 %2(p0) = G_PTR_ADD %0, %1 - %3(s16) = G_LOAD %2 :: (load 2 from %ir.addr) + %3(s16) = G_LOAD %2 :: (load (s16) from %ir.addr) %4:gpr(s32) = G_ANYEXT %3 $w0 = COPY %4 ... @@ -290,13 +290,13 @@ body: | ; CHECK-LABEL: name: load_gep_1_s8_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load 1 from %ir.addr) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load (s8) from %ir.addr) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] ; CHECK: $w0 = COPY [[COPY1]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 1 %2(p0) = G_PTR_ADD %0, %1 - %3(s8) = G_LOAD %2 :: (load 1 from %ir.addr) + %3(s8) = G_LOAD %2 :: (load (s8) from %ir.addr) %4:gpr(s32) = G_ANYEXT %3 $w0 = COPY %4 ... @@ -316,10 +316,10 @@ body: | ; CHECK-LABEL: name: load_s64_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (s64) from %ir.addr) ; CHECK: $d0 = COPY [[LDRDui]] %0(p0) = COPY $x0 - %1(s64) = G_LOAD %0 :: (load 8 from %ir.addr) + %1(s64) = G_LOAD %0 :: (load (s64) from %ir.addr) $d0 = COPY %1(s64) ... @@ -338,10 +338,10 @@ body: | ; CHECK-LABEL: name: load_s32_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 :: (load 4 from %ir.addr) + ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 :: (load (s32) from %ir.addr) ; CHECK: $s0 = COPY [[LDRSui]] %0(p0) = COPY $x0 - %1(s32) = G_LOAD %0 :: (load 4 from %ir.addr) + %1(s32) = G_LOAD %0 :: (load (s32) from %ir.addr) $s0 = COPY %1(s32) ... @@ -360,10 +360,10 @@ body: | ; CHECK-LABEL: name: load_s16_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: $h0 = COPY [[LDRHui]] %0(p0) = COPY $x0 - %1(s16) = G_LOAD %0 :: (load 2 from %ir.addr) + %1(s16) = G_LOAD %0 :: (load (s16) from %ir.addr) $h0 = COPY %1(s16) ... @@ -382,10 +382,10 @@ body: | ; CHECK-LABEL: name: load_s8_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load 1 from %ir.addr) + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8) from %ir.addr) ; CHECK: $b0 = COPY [[LDRBui]] %0(p0) = COPY $x0 - %1(s8) = G_LOAD %0 :: (load 1 from %ir.addr) + %1(s8) = G_LOAD %0 :: (load (s8) from %ir.addr) $b0 = COPY %1(s8) ... @@ -406,12 +406,12 @@ body: | ; CHECK-LABEL: name: load_gep_8_s64_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 1 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 1 :: (load (s64) from %ir.addr) ; CHECK: $d0 = COPY [[LDRDui]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 8 %2(p0) = G_PTR_ADD %0, %1 - %3(s64) = G_LOAD %2 :: (load 8 from %ir.addr) + %3(s64) = G_LOAD %2 :: (load (s64) from %ir.addr) $d0 = COPY %3 ... @@ -432,12 +432,12 @@ body: | ; CHECK-LABEL: name: load_gep_16_s32_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 4 :: (load 4 from %ir.addr) + ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 4 :: (load (s32) from %ir.addr) ; CHECK: $s0 = COPY [[LDRSui]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 16 %2(p0) = G_PTR_ADD %0, %1 - %3(s32) = G_LOAD %2 :: (load 4 from %ir.addr) + %3(s32) = G_LOAD %2 :: (load (s32) from %ir.addr) $s0 = COPY %3 ... @@ -458,12 +458,12 @@ body: | ; CHECK-LABEL: name: load_gep_64_s16_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 32 :: (load 2 from %ir.addr) + ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 32 :: (load (s16) from %ir.addr) ; CHECK: $h0 = COPY [[LDRHui]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 64 %2(p0) = G_PTR_ADD %0, %1 - %3(s16) = G_LOAD %2 :: (load 2 from %ir.addr) + %3(s16) = G_LOAD %2 :: (load (s16) from %ir.addr) $h0 = COPY %3 ... @@ -484,12 +484,12 @@ body: | ; CHECK-LABEL: name: load_gep_32_s8_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 32 :: (load 1 from %ir.addr) + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 32 :: (load (s8) from %ir.addr) ; CHECK: $b0 = COPY [[LDRBui]] %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 32 %2(p0) = G_PTR_ADD %0, %1 - %3(s8) = G_LOAD %2 :: (load 1 from %ir.addr) + %3(s8) = G_LOAD %2 :: (load (s8) from %ir.addr) $b0 = COPY %3 ... --- @@ -507,10 +507,10 @@ body: | ; CHECK-LABEL: name: load_v2s32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8 from %ir.addr) + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (<2 x s32>) from %ir.addr) ; CHECK: $d0 = COPY [[LDRDui]] %0(p0) = COPY $x0 - %1(<2 x s32>) = G_LOAD %0 :: (load 8 from %ir.addr) + %1(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>) from %ir.addr) $d0 = COPY %1(<2 x s32>) ... --- @@ -528,10 +528,10 @@ body: | ; CHECK-LABEL: name: load_v2s64 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.addr) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<2 x s64>) from %ir.addr) ; CHECK: $q0 = COPY [[LDRQui]] %0(p0) = COPY $x0 - %1(<2 x s64>) = G_LOAD %0 :: (load 16 from %ir.addr) + %1(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>) from %ir.addr) $q0 = COPY %1(<2 x s64>) ... --- @@ -551,11 +551,11 @@ body: | ; CHECK-LABEL: name: load_4xi16 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8 from %ir.ptr) + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (<4 x s16>) from %ir.ptr) ; CHECK: $d0 = COPY [[LDRDui]] ; CHECK: RET_ReallyLR implicit $d0 %0:gpr(p0) = COPY $x0 - %1:fpr(<4 x s16>) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) + %1:fpr(<4 x s16>) = G_LOAD %0(p0) :: (load (<4 x s16>) from %ir.ptr) $d0 = COPY %1(<4 x s16>) RET_ReallyLR implicit $d0 @@ -577,11 +577,11 @@ body: | ; CHECK-LABEL: name: load_4xi32 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.ptr) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<4 x s32>) from %ir.ptr) ; CHECK: $q0 = COPY [[LDRQui]] ; CHECK: RET_ReallyLR implicit $q0 %0:gpr(p0) = COPY $x0 - %1:fpr(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + %1:fpr(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.ptr) $q0 = COPY %1(<4 x s32>) RET_ReallyLR implicit $q0 @@ -603,11 +603,11 @@ body: | ; CHECK-LABEL: name: load_8xi16 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.ptr) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<8 x s16>) from %ir.ptr) ; CHECK: $q0 = COPY [[LDRQui]] ; CHECK: RET_ReallyLR implicit $q0 %0:gpr(p0) = COPY $x0 - %1:fpr(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + %1:fpr(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.ptr) $q0 = COPY %1(<8 x s16>) RET_ReallyLR implicit $q0 @@ -629,11 +629,11 @@ body: | ; CHECK-LABEL: name: load_16xi8 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16 from %ir.ptr) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<16 x s8>) from %ir.ptr) ; CHECK: $q0 = COPY [[LDRQui]] ; CHECK: RET_ReallyLR implicit $q0 %0:gpr(p0) = COPY $x0 - %1:fpr(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.ptr) + %1:fpr(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.ptr) $q0 = COPY %1(<16 x s8>) RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-add.mir index afd5aa7dd9e3f..5496e7ed8d7b0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-reduce-add.mir @@ -15,14 +15,14 @@ body: | ; CHECK-LABEL: name: add_B ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<16 x s8>)) ; CHECK: [[ADDVv16i8v:%[0-9]+]]:fpr8 = ADDVv16i8v [[LDRQui]] ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[ADDVv16i8v]], %subreg.bsub ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]] ; CHECK: $w0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 - %1:fpr(<16 x s8>) = G_LOAD %0(p0) :: (load 16) + %1:fpr(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>)) %2:fpr(s8) = G_VECREDUCE_ADD %1(<16 x s8>) %4:gpr(s8) = COPY %2(s8) %3:gpr(s32) = G_ANYEXT %4(s8) @@ -45,14 +45,14 @@ body: | ; CHECK-LABEL: name: add_H ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<8 x s16>)) ; CHECK: [[ADDVv8i16v:%[0-9]+]]:fpr16 = ADDVv8i16v [[LDRQui]] ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[ADDVv8i16v]], %subreg.hsub ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]] ; CHECK: $w0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 - %1:fpr(<8 x s16>) = G_LOAD %0(p0) :: (load 16) + %1:fpr(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>)) %2:fpr(s16) = G_VECREDUCE_ADD %1(<8 x s16>) %4:gpr(s16) = COPY %2(s16) %3:gpr(s32) = G_ANYEXT %4(s16) @@ -75,12 +75,12 @@ body: | ; CHECK-LABEL: name: add_S ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<4 x s32>)) ; CHECK: [[ADDVv4i32v:%[0-9]+]]:fpr32 = ADDVv4i32v [[LDRQui]] ; CHECK: $w0 = COPY [[ADDVv4i32v]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 - %1:fpr(<4 x s32>) = G_LOAD %0(p0) :: (load 16) + %1:fpr(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>)) %2:fpr(s32) = G_VECREDUCE_ADD %1(<4 x s32>) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 @@ -101,13 +101,13 @@ body: | ; CHECK-LABEL: name: add_S_v2i32 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8) + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load (<2 x s32>)) ; CHECK: [[ADDPv2i32_:%[0-9]+]]:fpr64 = ADDPv2i32 [[LDRDui]], [[LDRDui]] ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[ADDPv2i32_]].ssub ; CHECK: $w0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 - %1:fpr(<2 x s32>) = G_LOAD %0(p0) :: (load 8) + %1:fpr(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) %2:fpr(s32) = G_VECREDUCE_ADD %1(<2 x s32>) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 @@ -128,12 +128,12 @@ body: | ; CHECK-LABEL: name: add_D ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (<2 x s64>)) ; CHECK: [[ADDPv2i64p:%[0-9]+]]:fpr64 = ADDPv2i64p [[LDRQui]] ; CHECK: $x0 = COPY [[ADDPv2i64p]] ; CHECK: RET_ReallyLR implicit $x0 %0:gpr(p0) = COPY $x0 - %1:fpr(<2 x s64>) = G_LOAD %0(p0) :: (load 16) + %1:fpr(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>)) %2:fpr(s64) = G_VECREDUCE_ADD %1(<2 x s64>) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext-of-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext-of-load.mir index 46b1ac4221c21..135799d837296 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext-of-load.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext-of-load.mir @@ -12,12 +12,12 @@ body: | ; CHECK-LABEL: name: redundant_zext_8 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8)) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] ; CHECK: $w0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(p0) = COPY $x0 - %2:gpr(s8) = G_LOAD %1(p0) :: (load 1) + %2:gpr(s8) = G_LOAD %1(p0) :: (load (s8)) %3:gpr(s32) = G_ZEXT %2(s8) $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 @@ -35,12 +35,12 @@ body: | ; CHECK-LABEL: name: redundant_zext_16 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2) + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16)) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] ; CHECK: $w0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(p0) = COPY $x0 - %2:gpr(s16) = G_LOAD %1(p0) :: (load 2) + %2:gpr(s16) = G_LOAD %1(p0) :: (load (s16)) %3:gpr(s32) = G_ZEXT %2(s16) $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-sextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-sextload.mir index 0e2c3077bfc98..6ce316cec5c3f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-sextload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-sextload.mir @@ -19,10 +19,10 @@ body: | ; CHECK-LABEL: name: sextload_s32_from_s16 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRSHWui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRSHWui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: $w0 = COPY [[T0]] %0:gpr(p0) = COPY $x0 - %1:gpr(s32) = G_SEXTLOAD %0 :: (load 2 from %ir.addr) + %1:gpr(s32) = G_SEXTLOAD %0 :: (load (s16) from %ir.addr) $w0 = COPY %1(s32) ... @@ -37,11 +37,11 @@ body: | ; CHECK-LABEL: name: sextload_s32_from_s16_not_combined ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[T0:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: [[T1:%[0-9]+]]:gpr32 = SBFMWri [[T0]], 0, 15 ; CHECK: $w0 = COPY [[T1]] %0:gpr(p0) = COPY $x0 - %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) + %1:gpr(s16) = G_LOAD %0 :: (load (s16) from %ir.addr) %2:gpr(s32) = G_SEXT %1 $w0 = COPY %2(s32) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir index c7644ba5d62c4..6b899dc4c84ab 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-stlxr-intrin.mir @@ -27,12 +27,12 @@ body: | ; CHECK: liveins: $w0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 - ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]] :: (volatile store 8 into %ir.addr) + ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]] :: (volatile store (s64) into %ir.addr) ; CHECK: $w0 = COPY %2 ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = COPY $x2 - %3:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %1(s64), %2(p0) :: (volatile store 8 into %ir.addr) + %3:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %1(s64), %2(p0) :: (volatile store (s64) into %ir.addr) $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 @@ -50,13 +50,13 @@ body: | ; CHECK: liveins: $w0, $w1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 - ; CHECK: early-clobber %3:gpr32 = STLXRW [[COPY]], [[COPY1]] :: (volatile store 4 into %ir.addr) + ; CHECK: early-clobber %3:gpr32 = STLXRW [[COPY]], [[COPY1]] :: (volatile store (s32) into %ir.addr) ; CHECK: $w0 = COPY %3 ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(s32) = COPY $w1 %2:gpr(p0) = COPY $x2 %3:gpr(s64) = G_ZEXT %1(s32) - %4:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %3(s64), %2(p0) :: (volatile store 4 into %ir.addr) + %4:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %3(s64), %2(p0) :: (volatile store (s32) into %ir.addr) $w0 = COPY %4(s32) RET_ReallyLR implicit $w0 @@ -78,7 +78,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32 ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].sub_32 - ; CHECK: early-clobber %5:gpr32 = STLXRB [[COPY2]], [[COPY1]] :: (volatile store 1 into %ir.addr) + ; CHECK: early-clobber %5:gpr32 = STLXRB [[COPY2]], [[COPY1]] :: (volatile store (s8) into %ir.addr) ; CHECK: $w0 = COPY %5 ; CHECK: RET_ReallyLR implicit $w0 %3:gpr(s32) = COPY $w1 @@ -86,7 +86,7 @@ body: | %6:gpr(s64) = G_CONSTANT i64 255 %7:gpr(s64) = G_ANYEXT %3(s32) %4:gpr(s64) = G_AND %7, %6 - %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %4(s64), %2(p0) :: (volatile store 1 into %ir.addr) + %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %4(s64), %2(p0) :: (volatile store (s8) into %ir.addr) $w0 = COPY %5(s32) RET_ReallyLR implicit $w0 @@ -108,7 +108,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32 ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].sub_32 - ; CHECK: early-clobber %5:gpr32 = STLXRH [[COPY2]], [[COPY1]] :: (volatile store 2 into %ir.addr) + ; CHECK: early-clobber %5:gpr32 = STLXRH [[COPY2]], [[COPY1]] :: (volatile store (s16) into %ir.addr) ; CHECK: $w0 = COPY %5 ; CHECK: RET_ReallyLR implicit $w0 %3:gpr(s32) = COPY $w1 @@ -116,6 +116,6 @@ body: | %6:gpr(s64) = G_CONSTANT i64 65535 %7:gpr(s64) = G_ANYEXT %3(s32) %4:gpr(s64) = G_AND %7, %6 - %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %4(s64), %2(p0) :: (volatile store 2 into %ir.addr) + %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %4(s64), %2(p0) :: (volatile store (s16) into %ir.addr) $w0 = COPY %5(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir index a414bf2c55d12..38bfa9eee8a64 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -63,10 +63,10 @@ body: | ; CHECK-LABEL: name: store_s64_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) %0(p0) = COPY $x0 %1(s64) = COPY $x1 - G_STORE %1, %0 :: (store 8 into %ir.addr) + G_STORE %1, %0 :: (store (s64) into %ir.addr) ... @@ -86,10 +86,10 @@ body: | ; CHECK-LABEL: name: store_s32_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) + ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr) %0(p0) = COPY $x0 %1(s32) = COPY $w1 - G_STORE %1, %0 :: (store 4 into %ir.addr) + G_STORE %1, %0 :: (store (s32) into %ir.addr) ... @@ -109,11 +109,11 @@ body: | ; CHECK-LABEL: name: store_s16_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store 2 into %ir.addr) + ; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store (s16) into %ir.addr) %0(p0) = COPY $x0 %2:gpr(s32) = COPY $w1 %1(s16) = G_TRUNC %2 - G_STORE %1, %0 :: (store 2 into %ir.addr) + G_STORE %1, %0 :: (store (s16) into %ir.addr) ... @@ -133,11 +133,11 @@ body: | ; CHECK-LABEL: name: store_s8_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: STRBBui [[COPY1]], [[COPY]], 0 :: (store 1 into %ir.addr) + ; CHECK: STRBBui [[COPY1]], [[COPY]], 0 :: (store (s8) into %ir.addr) %0(p0) = COPY $x0 %2:gpr(s32) = COPY $w1 %1(s8) = G_TRUNC %2 - G_STORE %1, %0 :: (store 1 into %ir.addr) + G_STORE %1, %0 :: (store (s8) into %ir.addr) ... @@ -156,10 +156,10 @@ body: | ; CHECK-LABEL: name: store_zero_s64_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store (s64) into %ir.addr) %0(p0) = COPY $x0 %1(s64) = G_CONSTANT i64 0 - G_STORE %1, %0 :: (store 8 into %ir.addr) + G_STORE %1, %0 :: (store (s64) into %ir.addr) ... @@ -178,10 +178,10 @@ body: | ; CHECK-LABEL: name: store_zero_s32_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRWui $wzr, [[COPY]], 0 :: (store 4 into %ir.addr) + ; CHECK: STRWui $wzr, [[COPY]], 0 :: (store (s32) into %ir.addr) %0(p0) = COPY $x0 %1(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store 4 into %ir.addr) + G_STORE %1, %0 :: (store (s32) into %ir.addr) ... @@ -194,10 +194,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: store_zero_s16 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRHHui $wzr, [[COPY]], 0 :: (store 2) + ; CHECK: STRHHui $wzr, [[COPY]], 0 :: (store (s16)) %0:gpr(p0) = COPY $x0 %1:gpr(s16) = G_CONSTANT i16 0 - G_STORE %1(s16), %0(p0) :: (store 2) + G_STORE %1(s16), %0(p0) :: (store (s16)) ... @@ -210,10 +210,10 @@ body: | liveins: $x0 ; CHECK-LABEL: name: store_zero_s8 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRBBui $wzr, [[COPY]], 0 :: (store 1) + ; CHECK: STRBBui $wzr, [[COPY]], 0 :: (store (s8)) %0:gpr(p0) = COPY $x0 %1:gpr(s8) = G_CONSTANT i8 0 - G_STORE %1(s8), %0(p0) :: (store 1) + G_STORE %1(s8), %0(p0) :: (store (s8)) ... --- @@ -225,11 +225,11 @@ body: | liveins: $x0 ; CHECK-LABEL: name: store_zero_look_through_cst ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRXui $xzr, [[COPY]], 0 :: (store (s64) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s32) = G_CONSTANT i32 0 %2:gpr(s64) = G_ZEXT %1 - G_STORE %2, %0 :: (store 8 into %ir.addr) + G_STORE %2, %0 :: (store (s64) into %ir.addr) ... --- @@ -251,10 +251,10 @@ body: | ; CHECK-LABEL: name: store_fi_s64_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] - ; CHECK: STRXui [[COPY1]], %stack.0.ptr0, 0 :: (store 8) + ; CHECK: STRXui [[COPY1]], %stack.0.ptr0, 0 :: (store (p0)) %0(p0) = COPY $x0 %1(p0) = G_FRAME_INDEX %stack.0.ptr0 - G_STORE %0, %1 :: (store 8) + G_STORE %0, %1 :: (store (p0)) ... --- @@ -275,12 +275,12 @@ body: | ; CHECK-LABEL: name: store_gep_128_s64_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: STRXui [[COPY1]], [[COPY]], 16 :: (store 8 into %ir.addr) + ; CHECK: STRXui [[COPY1]], [[COPY]], 16 :: (store (s64) into %ir.addr) %0(p0) = COPY $x0 %1(s64) = COPY $x1 %2(s64) = G_CONSTANT i64 128 %3(p0) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store 8 into %ir.addr) + G_STORE %1, %3 :: (store (s64) into %ir.addr) ... --- @@ -301,12 +301,12 @@ body: | ; CHECK-LABEL: name: store_gep_512_s32_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: STRWui [[COPY1]], [[COPY]], 128 :: (store 4 into %ir.addr) + ; CHECK: STRWui [[COPY1]], [[COPY]], 128 :: (store (s32) into %ir.addr) %0(p0) = COPY $x0 %1(s32) = COPY $w1 %2(s64) = G_CONSTANT i64 512 %3(p0) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store 4 into %ir.addr) + G_STORE %1, %3 :: (store (s32) into %ir.addr) ... --- @@ -327,13 +327,13 @@ body: | ; CHECK-LABEL: name: store_gep_64_s16_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: STRHHui [[COPY1]], [[COPY]], 32 :: (store 2 into %ir.addr) + ; CHECK: STRHHui [[COPY1]], [[COPY]], 32 :: (store (s16) into %ir.addr) %0(p0) = COPY $x0 %4:gpr(s32) = COPY $w1 %1(s16) = G_TRUNC %4 %2(s64) = G_CONSTANT i64 64 %3(p0) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store 2 into %ir.addr) + G_STORE %1, %3 :: (store (s16) into %ir.addr) ... --- @@ -354,13 +354,13 @@ body: | ; CHECK-LABEL: name: store_gep_1_s8_gpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: STRBBui [[COPY1]], [[COPY]], 1 :: (store 1 into %ir.addr) + ; CHECK: STRBBui [[COPY1]], [[COPY]], 1 :: (store (s8) into %ir.addr) %0(p0) = COPY $x0 %4:gpr(s32) = COPY $w1 %1(s8) = G_TRUNC %4 %2(s64) = G_CONSTANT i64 1 %3(p0) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store 1 into %ir.addr) + G_STORE %1, %3 :: (store (s8) into %ir.addr) ... --- @@ -379,10 +379,10 @@ body: | ; CHECK-LABEL: name: store_s64_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (s64) into %ir.addr) %0(p0) = COPY $x0 %1(s64) = COPY $d1 - G_STORE %1, %0 :: (store 8 into %ir.addr) + G_STORE %1, %0 :: (store (s64) into %ir.addr) ... @@ -402,10 +402,10 @@ body: | ; CHECK-LABEL: name: store_s32_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) + ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store (s32) into %ir.addr) %0(p0) = COPY $x0 %1(s32) = COPY $s1 - G_STORE %1, %0 :: (store 4 into %ir.addr) + G_STORE %1, %0 :: (store (s32) into %ir.addr) ... @@ -427,12 +427,12 @@ body: | ; CHECK-LABEL: name: store_gep_8_s64_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK: STRDui [[COPY1]], [[COPY]], 1 :: (store 8 into %ir.addr) + ; CHECK: STRDui [[COPY1]], [[COPY]], 1 :: (store (s64) into %ir.addr) %0(p0) = COPY $x0 %1(s64) = COPY $d1 %2(s64) = G_CONSTANT i64 8 %3(p0) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store 8 into %ir.addr) + G_STORE %1, %3 :: (store (s64) into %ir.addr) ... --- @@ -453,12 +453,12 @@ body: | ; CHECK-LABEL: name: store_gep_8_s32_fpr ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: STRSui [[COPY1]], [[COPY]], 2 :: (store 4 into %ir.addr) + ; CHECK: STRSui [[COPY1]], [[COPY]], 2 :: (store (s32) into %ir.addr) %0(p0) = COPY $x0 %1(s32) = COPY $s1 %2(s64) = G_CONSTANT i64 8 %3(p0) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store 4 into %ir.addr) + G_STORE %1, %3 :: (store (s32) into %ir.addr) ... --- name: store_v2s32 @@ -476,10 +476,10 @@ body: | ; CHECK-LABEL: name: store_v2s32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store (<2 x s32>) into %ir.addr) %0(p0) = COPY $x0 %1(<2 x s32>) = COPY $d1 - G_STORE %1, %0 :: (store 8 into %ir.addr) + G_STORE %1, %0 :: (store (<2 x s32>) into %ir.addr) ... --- @@ -497,10 +497,10 @@ body: | ; CHECK-LABEL: name: store_v2s64 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 - ; CHECK: STRQui [[COPY1]], [[COPY]], 0 :: (store 16 into %ir.addr, align 8) + ; CHECK: STRQui [[COPY1]], [[COPY]], 0 :: (store (<2 x s64>) into %ir.addr, align 8) %0(p0) = COPY $x0 %1(<2 x s64>) = COPY $q1 - G_STORE %1, %0 :: (store 16 into %ir.addr, align 8) + G_STORE %1, %0 :: (store (<2 x s64>) into %ir.addr, align 8) ... --- @@ -521,11 +521,11 @@ body: | ; CHECK: liveins: $d0, $x0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRDui [[COPY]], [[COPY1]], 0 :: (store 8 into %ir.ptr) + ; CHECK: STRDui [[COPY]], [[COPY1]], 0 :: (store (<4 x s16>) into %ir.ptr) ; CHECK: RET_ReallyLR %0:fpr(<4 x s16>) = COPY $d0 %1:gpr(p0) = COPY $x0 - G_STORE %0(<4 x s16>), %1(p0) :: (store 8 into %ir.ptr) + G_STORE %0(<4 x s16>), %1(p0) :: (store (<4 x s16>) into %ir.ptr) RET_ReallyLR ... @@ -547,11 +547,11 @@ body: | ; CHECK: liveins: $q0, $x0 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store 16 into %ir.ptr) + ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store (<4 x s32>) into %ir.ptr) ; CHECK: RET_ReallyLR %0:fpr(<4 x s32>) = COPY $q0 %1:gpr(p0) = COPY $x0 - G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.ptr) + G_STORE %0(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.ptr) RET_ReallyLR ... @@ -573,11 +573,11 @@ body: | ; CHECK: liveins: $q0, $x0 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store 16 into %ir.ptr) + ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store (<8 x s16>) into %ir.ptr) ; CHECK: RET_ReallyLR %0:fpr(<8 x s16>) = COPY $q0 %1:gpr(p0) = COPY $x0 - G_STORE %0(<8 x s16>), %1(p0) :: (store 16 into %ir.ptr) + G_STORE %0(<8 x s16>), %1(p0) :: (store (<8 x s16>) into %ir.ptr) RET_ReallyLR ... @@ -599,11 +599,11 @@ body: | ; CHECK: liveins: $q0, $x0 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store 16 into %ir.ptr) + ; CHECK: STRQui [[COPY]], [[COPY1]], 0 :: (store (<16 x s8>) into %ir.ptr) ; CHECK: RET_ReallyLR %0:fpr(<16 x s8>) = COPY $q0 %1:gpr(p0) = COPY $x0 - G_STORE %0(<16 x s8>), %1(p0) :: (store 16 into %ir.ptr) + G_STORE %0(<16 x s8>), %1(p0) :: (store (<16 x s8>) into %ir.ptr) RET_ReallyLR ... @@ -620,11 +620,11 @@ body: | ; CHECK: %copy:gpr64all = COPY $x0 ; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy - ; CHECK: STRXui [[COPY]], %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store 8 into @x) + ; CHECK: STRXui [[COPY]], %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0) into @x) %copy:gpr(p0) = COPY $x0 %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x - G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) + G_STORE %copy(p0), %add_low(p0) :: (store (p0) into @x) ... --- @@ -640,11 +640,11 @@ body: | ; CHECK: %copy:gpr64all = COPY $x0 ; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x + 8 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy - ; CHECK: STRXui [[COPY]], %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x + 8 :: (store 8 into @x) + ; CHECK: STRXui [[COPY]], %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x + 8 :: (store (p0) into @x) %copy:gpr(p0) = COPY $x0 %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 8 %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 8 - G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) + G_STORE %copy(p0), %add_low(p0) :: (store (p0) into @x) ... --- @@ -660,11 +660,11 @@ body: | ; CHECK: %copy:gpr64all = COPY $x0 ; CHECK: %add_low:gpr64common = MOVaddr target-flags(aarch64-page) @x + 3, target-flags(aarch64-pageoff, aarch64-nc) @x + 3 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %copy - ; CHECK: STRXui [[COPY]], %add_low, 0 :: (store 8 into @x) + ; CHECK: STRXui [[COPY]], %add_low, 0 :: (store (p0) into @x) %copy:gpr(p0) = COPY $x0 %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 3 %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 3 - G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x) + G_STORE %copy(p0), %add_low(p0) :: (store (p0) into @x) ... --- @@ -679,36 +679,36 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: %val32:gpr32 = COPY $w1 ; CHECK: %val64:gpr64 = COPY $x2 - ; CHECK: STRBBui %val32, [[COPY]], 0 :: (store 1) - ; CHECK: STRBBui %val32, [[COPY]], 43 :: (store 1) - ; CHECK: STRHHui %val32, [[COPY]], 0 :: (store 2) - ; CHECK: STURHHi %val32, [[COPY]], 43 :: (store 2) + ; CHECK: STRBBui %val32, [[COPY]], 0 :: (store (s8)) + ; CHECK: STRBBui %val32, [[COPY]], 43 :: (store (s8)) + ; CHECK: STRHHui %val32, [[COPY]], 0 :: (store (s16)) + ; CHECK: STURHHi %val32, [[COPY]], 43 :: (store (s16)) ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %val64.sub_32 - ; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store 2) + ; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store (s16)) ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY %val64.sub_32 - ; CHECK: STURHHi [[COPY2]], [[COPY]], 43 :: (store 2) + ; CHECK: STURHHi [[COPY2]], [[COPY]], 43 :: (store (s16)) ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY %val64.sub_32 - ; CHECK: STRWui [[COPY3]], [[COPY]], 0 :: (store 4) + ; CHECK: STRWui [[COPY3]], [[COPY]], 0 :: (store (s32)) ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY %val64.sub_32 - ; CHECK: STURWi [[COPY4]], [[COPY]], 43 :: (store 4) + ; CHECK: STURWi [[COPY4]], [[COPY]], 43 :: (store (s32)) %0:gpr(p0) = COPY $x0 %val32:gpr(s32) = COPY $w1 %val64:gpr(s64) = COPY $x2 - G_STORE %val32, %0 :: (store 1) + G_STORE %val32, %0 :: (store (s8)) ; unscaled offset: %cst:gpr(s64) = G_CONSTANT i64 43 %newptr:gpr(p0) = G_PTR_ADD %0, %cst - G_STORE %val32, %newptr :: (store 1) + G_STORE %val32, %newptr :: (store (s8)) - G_STORE %val32, %0 :: (store 2) + G_STORE %val32, %0 :: (store (s16)) ; unscaled offset: - G_STORE %val32, %newptr :: (store 2) + G_STORE %val32, %newptr :: (store (s16)) - G_STORE %val64, %0 :: (store 2) + G_STORE %val64, %0 :: (store (s16)) ; unscaled offset: - G_STORE %val64, %newptr :: (store 2) + G_STORE %val64, %newptr :: (store (s16)) - G_STORE %val64, %0 :: (store 4) + G_STORE %val64, %0 :: (store (s32)) ; unscaled offset: - G_STORE %val64, %newptr :: (store 4) + G_STORE %val64, %newptr :: (store (s32)) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-stx.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-stx.mir index 5e6655106231d..ebbf69e51ee81 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-stx.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-stx.mir @@ -25,7 +25,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32 ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].sub_32 - ; CHECK: early-clobber %5:gpr32 = STXRB [[COPY2]], [[COPY1]] :: (volatile store 1 into %ir.addr) + ; CHECK: early-clobber %5:gpr32 = STXRB [[COPY2]], [[COPY1]] :: (volatile store (s8) into %ir.addr) ; CHECK: $w0 = COPY %5 ; CHECK: RET_ReallyLR implicit $w0 %3:gpr(s32) = COPY $w1 @@ -33,7 +33,7 @@ body: | %6:gpr(s64) = G_CONSTANT i64 255 %7:gpr(s64) = G_ANYEXT %3(s32) %4:gpr(s64) = G_AND %7, %6 - %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stxr), %4(s64), %2(p0) :: (volatile store 1 into %ir.addr) + %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stxr), %4(s64), %2(p0) :: (volatile store (s8) into %ir.addr) $w0 = COPY %5(s32) RET_ReallyLR implicit $w0 @@ -56,7 +56,7 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32 ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].sub_32 - ; CHECK: early-clobber %5:gpr32 = STXRH [[COPY2]], [[COPY1]] :: (volatile store 2 into %ir.addr) + ; CHECK: early-clobber %5:gpr32 = STXRH [[COPY2]], [[COPY1]] :: (volatile store (s16) into %ir.addr) ; CHECK: $w0 = COPY %5 ; CHECK: RET_ReallyLR implicit $w0 %3:gpr(s32) = COPY $w1 @@ -64,7 +64,7 @@ body: | %6:gpr(s64) = G_CONSTANT i64 65535 %7:gpr(s64) = G_ANYEXT %3(s32) %4:gpr(s64) = G_AND %7, %6 - %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stxr), %4(s64), %2(p0) :: (volatile store 2 into %ir.addr) + %5:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stxr), %4(s64), %2(p0) :: (volatile store (s16) into %ir.addr) $w0 = COPY %5(s32) RET_ReallyLR implicit $w0 @@ -84,13 +84,13 @@ body: | ; CHECK: liveins: $w0, $w1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 - ; CHECK: early-clobber %3:gpr32 = STXRW [[COPY]], [[COPY1]] :: (volatile store 4 into %ir.addr) + ; CHECK: early-clobber %3:gpr32 = STXRW [[COPY]], [[COPY1]] :: (volatile store (s32) into %ir.addr) ; CHECK: $w0 = COPY %3 ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(s32) = COPY $w1 %2:gpr(p0) = COPY $x2 %3:gpr(s64) = G_ZEXT %1(s32) - %4:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stxr), %3(s64), %2(p0) :: (volatile store 4 into %ir.addr) + %4:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stxr), %3(s64), %2(p0) :: (volatile store (s32) into %ir.addr) $w0 = COPY %4(s32) RET_ReallyLR implicit $w0 @@ -110,12 +110,12 @@ body: | ; CHECK: liveins: $w0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2 - ; CHECK: early-clobber %2:gpr32 = STXRX [[COPY]], [[COPY1]] :: (volatile store 8 into %ir.addr) + ; CHECK: early-clobber %2:gpr32 = STXRX [[COPY]], [[COPY1]] :: (volatile store (s64) into %ir.addr) ; CHECK: $w0 = COPY %2 ; CHECK: RET_ReallyLR implicit $w0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = COPY $x2 - %3:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stxr), %1(s64), %2(p0) :: (volatile store 8 into %ir.addr) + %3:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stxr), %1(s64), %2(p0) :: (volatile store (s64) into %ir.addr) $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir index 0b8f1ed948e64..e0983dbfdd14c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir @@ -21,10 +21,10 @@ body: | ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128)) ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] %1:fpr(p0) = COPY $d0 - %0:fpr(s128) = G_LOAD %1(p0) :: (load 16) + %0:fpr(s128) = G_LOAD %1(p0) :: (load (s128)) $noreg = PATCHABLE_RET %0(s128) ... @@ -49,11 +49,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: STRDui [[COPY1]], [[COPY2]], 0 :: (store 8) + ; CHECK: STRDui [[COPY1]], [[COPY2]], 0 :: (store (<8 x s8>)) ; CHECK: $noreg = PATCHABLE_RET %1:fpr(p0) = COPY $d1 %0:fpr(<8 x s8>) = COPY $d0 - G_STORE %0(<8 x s8>), %1(p0) :: (store 8) + G_STORE %0(<8 x s8>), %1(p0) :: (store (<8 x s8>)) $noreg = PATCHABLE_RET ... @@ -78,11 +78,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: STRDui [[COPY1]], [[COPY2]], 0 :: (store 8) + ; CHECK: STRDui [[COPY1]], [[COPY2]], 0 :: (store (<4 x s16>)) ; CHECK: $noreg = PATCHABLE_RET %1:fpr(p0) = COPY $d1 %0:fpr(<4 x s16>) = COPY $d0 - G_STORE %0(<4 x s16>), %1(p0) :: (store 8) + G_STORE %0(<4 x s16>), %1(p0) :: (store (<4 x s16>)) $noreg = PATCHABLE_RET ... @@ -107,11 +107,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store (<4 x s32>)) ; CHECK: $noreg = PATCHABLE_RET %1:fpr(p0) = COPY $d0 %0:fpr(<4 x s32>) = COPY $q0 - G_STORE %0(<4 x s32>), %1(p0) :: (store 16) + G_STORE %0(<4 x s32>), %1(p0) :: (store (<4 x s32>)) $noreg = PATCHABLE_RET ... @@ -136,11 +136,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store (<2 x s64>)) ; CHECK: $noreg = PATCHABLE_RET %1:fpr(p0) = COPY $d0 %0:fpr(<2 x s64>) = COPY $q0 - G_STORE %0(<2 x s64>), %1(p0) :: (store 16) + G_STORE %0(<2 x s64>), %1(p0) :: (store (<2 x s64>)) $noreg = PATCHABLE_RET ... @@ -165,11 +165,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store (<16 x s8>)) ; CHECK: $noreg = PATCHABLE_RET %1:fpr(p0) = COPY $d0 %0:fpr(<16 x s8>) = COPY $q0 - G_STORE %0(<16 x s8>), %1(p0) :: (store 16) + G_STORE %0(<16 x s8>), %1(p0) :: (store (<16 x s8>)) $noreg = PATCHABLE_RET ... @@ -194,11 +194,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store (<8 x s16>)) ; CHECK: $noreg = PATCHABLE_RET %1:fpr(p0) = COPY $d0 %0:fpr(<8 x s16>) = COPY $q0 - G_STORE %0(<8 x s16>), %1(p0) :: (store 16) + G_STORE %0(<8 x s16>), %1(p0) :: (store (<8 x s16>)) $noreg = PATCHABLE_RET ... @@ -223,11 +223,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q0 ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store 16) + ; CHECK: STRQui [[COPY1]], [[COPY2]], 0 :: (store (s128)) ; CHECK: $noreg = PATCHABLE_RET %1:fpr(p0) = COPY $d0 %0:fpr(s128) = COPY $q0 - G_STORE %0(s128), %1(p0) :: (store 16) + G_STORE %0(s128), %1(p0) :: (store (s128)) $noreg = PATCHABLE_RET ... @@ -251,10 +251,10 @@ body: | ; CHECK-LABEL: name: test_rule92_id2150_at_idx7770 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8)) ; CHECK: $noreg = PATCHABLE_RET [[LDRBBui]] %2:gpr(p0) = COPY $x0 - %0:gpr(s32) = G_LOAD %2(p0) :: (load 1) + %0:gpr(s32) = G_LOAD %2(p0) :: (load (s8)) $noreg = PATCHABLE_RET %0(s32) ... @@ -278,12 +278,12 @@ body: | ; CHECK-LABEL: name: test_rule96_id2146_at_idx8070 ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load 1) + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s1)) ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]] ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0 ; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]] %2:gpr(p0) = COPY $x0 - %0:fpr(s1) = G_LOAD %2(p0) :: (load 1) + %0:fpr(s1) = G_LOAD %2(p0) :: (load (s1)) %1:gpr(s32) = G_ZEXT %0(s1) $noreg = PATCHABLE_RET %1(s32) @@ -307,10 +307,10 @@ body: | ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load 8) + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (<8 x s8>)) ; CHECK: $noreg = PATCHABLE_RET [[LDRDui]] %1:fpr(p0) = COPY $d0 - %0:fpr(<8 x s8>) = G_LOAD %1(p0) :: (load 8) + %0:fpr(<8 x s8>) = G_LOAD %1(p0) :: (load (<8 x s8>)) $noreg = PATCHABLE_RET %0(<8 x s8>) ... @@ -333,10 +333,10 @@ body: | ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load 8) + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (<4 x s16>)) ; CHECK: $noreg = PATCHABLE_RET [[LDRDui]] %1:fpr(p0) = COPY $d0 - %0:fpr(<4 x s16>) = G_LOAD %1(p0) :: (load 8) + %0:fpr(<4 x s16>) = G_LOAD %1(p0) :: (load (<4 x s16>)) $noreg = PATCHABLE_RET %0(<4 x s16>) ... @@ -359,10 +359,10 @@ body: | ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (<4 x s32>)) ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] %1:fpr(p0) = COPY $d0 - %0:fpr(<4 x s32>) = G_LOAD %1(p0) :: (load 16) + %0:fpr(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>)) $noreg = PATCHABLE_RET %0(<4 x s32>) ... @@ -385,10 +385,10 @@ body: | ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (<2 x s64>)) ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] %1:fpr(p0) = COPY $d0 - %0:fpr(<2 x s64>) = G_LOAD %1(p0) :: (load 16) + %0:fpr(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>)) $noreg = PATCHABLE_RET %0(<2 x s64>) ... @@ -411,10 +411,10 @@ body: | ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (<16 x s8>)) ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] %1:fpr(p0) = COPY $d0 - %0:fpr(<16 x s8>) = G_LOAD %1(p0) :: (load 16) + %0:fpr(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>)) $noreg = PATCHABLE_RET %0(<16 x s8>) ... @@ -437,10 +437,10 @@ body: | ; CHECK: liveins: $d0 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[COPY]] - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (<8 x s16>)) ; CHECK: $noreg = PATCHABLE_RET [[LDRQui]] %1:fpr(p0) = COPY $d0 - %0:fpr(<8 x s16>) = G_LOAD %1(p0) :: (load 16) + %0:fpr(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>)) $noreg = PATCHABLE_RET %0(<8 x s16>) ... @@ -2551,11 +2551,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY $x0 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[COPY1]] - ; CHECK: ST1Onev8b [[COPY2]], [[COPY]] :: (store 8) + ; CHECK: ST1Onev8b [[COPY2]], [[COPY]] :: (store (<8 x s8>)) ; CHECK: $noreg = PATCHABLE_RET %1:gpr(p0) = COPY $x1 %0:gpr(<8 x s8>) = COPY $x0 - G_STORE %0(<8 x s8>), %1(p0) :: (store 8) + G_STORE %0(<8 x s8>), %1(p0) :: (store (<8 x s8>)) $noreg = PATCHABLE_RET ... @@ -2580,11 +2580,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY $x0 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[COPY1]] - ; CHECK: ST1Onev4h [[COPY2]], [[COPY]] :: (store 8) + ; CHECK: ST1Onev4h [[COPY2]], [[COPY]] :: (store (<4 x s16>)) ; CHECK: $noreg = PATCHABLE_RET %1:gpr(p0) = COPY $x1 %0:gpr(<4 x s16>) = COPY $x0 - G_STORE %0(<4 x s16>), %1(p0) :: (store 8) + G_STORE %0(<4 x s16>), %1(p0) :: (store (<4 x s16>)) $noreg = PATCHABLE_RET ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-zext-as-copy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-zext-as-copy.mir index 53d0147f57fe6..60af59672a4ca 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-zext-as-copy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-zext-as-copy.mir @@ -21,13 +21,13 @@ body: | ; CHECK-LABEL: name: zext_of_load_copy ; CHECK: [[DEF:%[0-9]+]]:gpr64common = IMPLICIT_DEF - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[DEF]], 0 :: (load 1) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[DEF]], 0 :: (load (s8)) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDRBBui]], %subreg.sub_32 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[COPY]], 4096 ; CHECK: $x0 = COPY [[ANDXri]] %3:gpr(p0) = G_IMPLICIT_DEF - %2:gpr(s8) = G_LOAD %3(p0) :: (load 1) + %2:gpr(s8) = G_LOAD %3(p0) :: (load (s8)) %4:gpr(s64) = G_ZEXT %2(s8) %5:gpr(s64) = G_CONSTANT i64 1 %6:gpr(s64) = G_AND %4, %5 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir index f4e52ae47ff1c..ea44159bdb689 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-zextload.mir @@ -50,10 +50,10 @@ body: | ; CHECK-LABEL: name: zextload_s32_from_s16 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: $w0 = COPY [[LDRHHui]] %0:gpr(p0) = COPY $x0 - %1:gpr(s32) = G_ZEXTLOAD %0 :: (load 2 from %ir.addr) + %1:gpr(s32) = G_ZEXTLOAD %0 :: (load (s16) from %ir.addr) $w0 = COPY %1(s32) ... --- @@ -67,11 +67,11 @@ body: | ; CHECK-LABEL: name: zextload_s32_from_s16_not_combined ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.addr) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] ; CHECK: $w0 = COPY [[COPY1]] %0:gpr(p0) = COPY $x0 - %1:gpr(s16) = G_LOAD %0 :: (load 2 from %ir.addr) + %1:gpr(s16) = G_LOAD %0 :: (load (s16) from %ir.addr) %2:gpr(s32) = G_ZEXT %1 $w0 = COPY %2(s32) ... @@ -85,12 +85,12 @@ body: | ; CHECK-LABEL: name: i32_to_i64 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.ptr) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.ptr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDRWui]], %subreg.sub_32 ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x0 %0:gpr(p0) = COPY $x0 - %2:gpr(s64) = G_ZEXTLOAD %0(p0) :: (load 4 from %ir.ptr) + %2:gpr(s64) = G_ZEXTLOAD %0(p0) :: (load (s32) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -105,12 +105,12 @@ body: | ; CHECK-LABEL: name: i16_to_i64 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.ptr) + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.ptr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDRHHui]], %subreg.sub_32 ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x0 %0:gpr(p0) = COPY $x0 - %2:gpr(s64) = G_ZEXTLOAD %0(p0) :: (load 2 from %ir.ptr) + %2:gpr(s64) = G_ZEXTLOAD %0(p0) :: (load (s16) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -125,12 +125,12 @@ body: | ; CHECK-LABEL: name: i8_to_i64 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.ptr) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.ptr) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[LDRBBui]], %subreg.sub_32 ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x0 %0:gpr(p0) = COPY $x0 - %2:gpr(s64) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.ptr) + %2:gpr(s64) = G_ZEXTLOAD %0(p0) :: (load (s8) from %ir.ptr) $x0 = COPY %2(s64) RET_ReallyLR implicit $x0 @@ -145,11 +145,11 @@ body: | ; CHECK-LABEL: name: i8_to_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.ptr) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load (s8) from %ir.ptr) ; CHECK: $w0 = COPY [[LDRBBui]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 - %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.ptr) + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load (s8) from %ir.ptr) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 @@ -164,11 +164,11 @@ body: | ; CHECK-LABEL: name: i16_to_i32 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.ptr) + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16) from %ir.ptr) ; CHECK: $w0 = COPY [[LDRHHui]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 - %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load 2 from %ir.ptr) + %2:gpr(s32) = G_ZEXTLOAD %0(p0) :: (load (s16) from %ir.ptr) $w0 = COPY %2(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sext-inreg-ldrow-16b.mir b/llvm/test/CodeGen/AArch64/GlobalISel/sext-inreg-ldrow-16b.mir index e7a5d9e4c19a5..66a74388f3ff9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/sext-inreg-ldrow-16b.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/sext-inreg-ldrow-16b.mir @@ -61,24 +61,24 @@ body: | ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] - ; CHECK: STRXui [[COPY1]], %stack.0.ptr.addr, 0 :: (store 8 into %ir.ptr.addr) - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui %stack.0.ptr.addr, 0 :: (dereferenceable load 8 from %ir.ptr.addr) + ; CHECK: STRXui [[COPY1]], %stack.0.ptr.addr, 0 :: (store (p0) into %ir.ptr.addr) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui %stack.0.ptr.addr, 0 :: (dereferenceable load (p0) from %ir.ptr.addr) ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @x - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @x :: (dereferenceable load 4 from @x) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @x :: (dereferenceable load (s32) from @x) ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = nsw SUBSWri [[LDRWui]], 8, 12, implicit-def $nzcv ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[SUBSWri]], %subreg.sub_32 ; CHECK: [[SBFMXri:%[0-9]+]]:gpr64 = SBFMXri [[INSERT_SUBREG]], 0, 15 - ; CHECK: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[LDRXui]], [[SBFMXri]], 0, 1 :: (load 4 from %ir.arrayidx) + ; CHECK: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[LDRXui]], [[SBFMXri]], 0, 1 :: (load (s32) from %ir.arrayidx) ; CHECK: $w0 = COPY [[LDRWroX]] ; CHECK: RET_ReallyLR implicit $w0 %0:gpr(p0) = COPY $x0 %1:gpr(p0) = G_FRAME_INDEX %stack.0.ptr.addr - G_STORE %0(p0), %1(p0) :: (store 8 into %ir.ptr.addr) - %2:gpr(p0) = G_LOAD %1(p0) :: (dereferenceable load 8 from %ir.ptr.addr) + G_STORE %0(p0), %1(p0) :: (store (p0) into %ir.ptr.addr) + %2:gpr(p0) = G_LOAD %1(p0) :: (dereferenceable load (p0) from %ir.ptr.addr) %15:gpr64(p0) = ADRP target-flags(aarch64-page) @x %4:gpr(p0) = G_ADD_LOW %15(p0), target-flags(aarch64-pageoff, aarch64-nc) @x - %3:gpr(s32) = G_LOAD %4(p0) :: (dereferenceable load 4 from @x) + %3:gpr(s32) = G_LOAD %4(p0) :: (dereferenceable load (s32) from @x) %5:gpr(s32) = G_CONSTANT i32 32768 %6:gpr(s32) = nsw G_SUB %3, %5 %16:gpr(s64) = G_ANYEXT %6(s32) @@ -86,7 +86,7 @@ body: | %14:gpr(s64) = G_CONSTANT i64 2 %10:gpr(s64) = G_SHL %8, %14(s64) %11:gpr(p0) = G_PTR_ADD %2, %10(s64) - %13:gpr(s32) = G_LOAD %11(p0) :: (load 4 from %ir.arrayidx) + %13:gpr(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.arrayidx) $w0 = COPY %13(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir index a13c860621848..ff5143a737ee8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-addressing-modes.mir @@ -28,12 +28,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 - ; CHECK: STRXroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 8 into %ir.addr) + ; CHECK: STRXroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s64) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %0, %1 %3:gpr(s64) = COPY $x2 - G_STORE %3, %ptr :: (store 8 into %ir.addr) + G_STORE %3, %ptr :: (store (s64) into %ir.addr) ... --- name: strxrox_p0 @@ -51,12 +51,12 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:gpr64all = COPY $x2 ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] - ; CHECK: STRXroX [[COPY3]], [[COPY]], [[COPY1]], 0, 0 :: (store 8 into %ir.addr) + ; CHECK: STRXroX [[COPY3]], [[COPY]], [[COPY1]], 0, 0 :: (store (p0) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %0, %1 %3:gpr(p0) = COPY $x2 - G_STORE %3, %ptr :: (store 8 into %ir.addr) + G_STORE %3, %ptr :: (store (p0) into %ir.addr) ... --- name: strdrox @@ -73,12 +73,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d2 - ; CHECK: STRDroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 8 into %ir.addr) + ; CHECK: STRDroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s64) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %0, %1 %3:fpr(s64) = COPY $d2 - G_STORE %3, %ptr :: (store 8 into %ir.addr) + G_STORE %3, %ptr :: (store (s64) into %ir.addr) ... --- name: strwrox @@ -95,12 +95,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 - ; CHECK: STRWroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 4 into %ir.addr) + ; CHECK: STRWroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s32) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %0, %1 %3:gpr(s32) = COPY $w2 - G_STORE %3, %ptr :: (store 4 into %ir.addr) + G_STORE %3, %ptr :: (store (s32) into %ir.addr) ... --- name: strsrox @@ -117,12 +117,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s2 - ; CHECK: STRSroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 4 into %ir.addr) + ; CHECK: STRSroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s32) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %0, %1 %3:fpr(s32) = COPY $s2 - G_STORE %3, %ptr :: (store 4 into %ir.addr) + G_STORE %3, %ptr :: (store (s32) into %ir.addr) ... --- name: strhrox @@ -139,12 +139,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK: STRHroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 2 into %ir.addr) + ; CHECK: STRHroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (s16) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %0, %1 %3:fpr(s16) = COPY $h0 - G_STORE %3, %ptr :: (store 2 into %ir.addr) + G_STORE %3, %ptr :: (store (s16) into %ir.addr) ... --- name: strqrox @@ -161,12 +161,12 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q2 - ; CHECK: STRQroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 16 into %ir.addr) + ; CHECK: STRQroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store (<2 x s64>) into %ir.addr) %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %0, %1 %2:fpr(<2 x s64>) = COPY $q2 - G_STORE %2, %ptr :: (store 16 into %ir.addr) + G_STORE %2, %ptr :: (store (<2 x s64>) into %ir.addr) ... --- name: shl @@ -183,14 +183,14 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 - ; CHECK: STRXroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store 8 into %ir.addr) + ; CHECK: STRXroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store (s64) into %ir.addr) %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %3, %2 %4:gpr(s64) = COPY $x2 - G_STORE %4, %ptr :: (store 8 into %ir.addr) + G_STORE %4, %ptr :: (store (s64) into %ir.addr) ... --- name: shl_p0 @@ -208,11 +208,11 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:gpr64all = COPY $x2 ; CHECK: [[COPY3:%[0-9]+]]:gpr64 = COPY [[COPY2]] - ; CHECK: STRXroX [[COPY3]], [[COPY1]], [[COPY]], 0, 1 :: (store 8 into %ir.addr) + ; CHECK: STRXroX [[COPY3]], [[COPY1]], [[COPY]], 0, 1 :: (store (s64) into %ir.addr) %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) %3:gpr(p0) = COPY $x1 %ptr:gpr(p0) = G_PTR_ADD %3, %2 %4:gpr(p0) = COPY $x2 - G_STORE %4, %ptr :: (store 8 into %ir.addr) + G_STORE %4, %ptr :: (store (s64) into %ir.addr) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir index 915f60f1dcec5..43004835007a4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir @@ -15,7 +15,7 @@ body: | ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 ; CHECK: %dst:gpr32 = COPY $w2 - ; CHECK: STRWroW %dst, %base, %foo, 1, 1 :: (store 4) + ; CHECK: STRWroW %dst, %base, %foo, 1, 1 :: (store (s32)) %base:gpr(p0) = COPY $x0 %foo:gpr(s32) = COPY $w1 %ext:gpr(s64) = G_SEXT %foo(s32) @@ -23,7 +23,7 @@ body: | %offset:gpr(s64) = G_SHL %ext, %c %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) %dst:gpr(s32) = COPY $w2 - G_STORE %dst, %ptr :: (store 4) + G_STORE %dst, %ptr :: (store (s32)) ... --- name: strxrow @@ -40,7 +40,7 @@ body: | ; CHECK: %base:gpr64sp = COPY $x0 ; CHECK: %foo:gpr32 = COPY $w1 ; CHECK: %dst:gpr64 = COPY $x2 - ; CHECK: STRXroW %dst, %base, %foo, 1, 1 :: (store 8) + ; CHECK: STRXroW %dst, %base, %foo, 1, 1 :: (store (s64)) %base:gpr(p0) = COPY $x0 %foo:gpr(s32) = COPY $w1 %ext:gpr(s64) = G_SEXT %foo(s32) @@ -48,7 +48,7 @@ body: | %offset:gpr(s64) = G_SHL %ext, %c %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) %dst:gpr(s64) = COPY $x2 - G_STORE %dst, %ptr :: (store 8) + G_STORE %dst, %ptr :: (store (s64)) ... --- name: strxrow_p0 @@ -66,7 +66,7 @@ body: | ; CHECK: %foo:gpr32 = COPY $w1 ; CHECK: %dst:gpr64all = COPY $x2 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %dst - ; CHECK: STRXroW [[COPY]], %base, %foo, 1, 1 :: (store 8) + ; CHECK: STRXroW [[COPY]], %base, %foo, 1, 1 :: (store (p0)) %base:gpr(p0) = COPY $x0 %foo:gpr(s32) = COPY $w1 %ext:gpr(s64) = G_SEXT %foo(s32) @@ -74,5 +74,5 @@ body: | %offset:gpr(s64) = G_SHL %ext, %c %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) %dst:gpr(p0) = COPY $x2 - G_STORE %dst, %ptr :: (store 8) + G_STORE %dst, %ptr :: (store (p0)) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir index d5902d70842be..3c99623d57e51 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir @@ -12,7 +12,7 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (s128)) ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRQui]].ssub ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] ; CHECK: TBNZW [[COPY2]], 0, %bb.1 @@ -23,7 +23,7 @@ body: | %1:gpr(p0) = COPY $x0 %3:gpr(s64) = G_CONSTANT i64 1 %5:gpr(s64) = G_CONSTANT i64 0 - %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16) + %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load (s128)) %2:fpr(s64) = G_TRUNC %0:fpr(s128) %8:gpr(s64) = COPY %2:fpr(s64) %4:gpr(s64) = G_AND %8:gpr, %3:gpr @@ -46,7 +46,7 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load (s128)) ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[LDRQui]].dsub ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]] ; CHECK: TBNZX [[COPY2]], 33, %bb.1 @@ -57,7 +57,7 @@ body: | %1:gpr(p0) = COPY $x0 %3:gpr(s64) = G_CONSTANT i64 8589934592 %5:gpr(s64) = G_CONSTANT i64 0 - %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16) + %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load (s128)) %2:fpr(s64) = G_TRUNC %0:fpr(s128) %8:gpr(s64) = COPY %2:fpr(s64) %4:gpr(s64) = G_AND %8:gpr, %3:gpr diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll index 41c0c10c50a0f..1f91d8d06e8de 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll @@ -9,24 +9,24 @@ define void @test_const(%dag* %dst) { ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 10 - ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store (s8) into %ir.dst) ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 20 - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1) - ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2) - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store (s8) into %ir.dst + 1) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store (s8) into %ir.dst + 2) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store (s8) into %ir.dst + 3) ; CHECK: [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 50 - ; CHECK: STRBBui [[MOVi32imm2]], [[COPY]], 4 :: (store 1 into %ir.dst + 4) - ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5) - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6) - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store 1 into %ir.dst + 7) - ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst) - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1) - ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2) - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3) - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 4 :: (store 1 into %ir.dst + 4) - ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5) - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6) - ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store 1 into %ir.dst + 7) + ; CHECK: STRBBui [[MOVi32imm2]], [[COPY]], 4 :: (store (s8) into %ir.dst + 4) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store (s8) into %ir.dst + 5) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store (s8) into %ir.dst + 6) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store (s8) into %ir.dst + 7) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store (s8) into %ir.dst) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store (s8) into %ir.dst + 1) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store (s8) into %ir.dst + 2) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store (s8) into %ir.dst + 3) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 4 :: (store (s8) into %ir.dst + 4) + ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store (s8) into %ir.dst + 5) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store (s8) into %ir.dst + 6) + ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 7 :: (store (s8) into %ir.dst + 7) ; CHECK: RET_ReallyLR entry: %updated = insertvalue diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll index 5f861f4ed0eeb..86055f0690f65 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll @@ -8,7 +8,7 @@ define void @test_varargs_sentinel(i8* %list, i64, i64, i64, i64, i64, i64, i64, ; CHECK: body: ; CHECK: [[LIST:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[VARARGS_AREA:%[0-9]+]]:gpr64common = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0 -; CHECK: STRXui [[VARARGS_AREA]], [[LIST]], 0 :: (store 8 into %ir.list, align 1) +; CHECK: STRXui [[VARARGS_AREA]], [[LIST]], 0 :: (store (s64) into %ir.list, align 1) call void @llvm.va_start(i8* %list) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll b/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll index 32d1ac7194901..f83554339542f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll @@ -6,8 +6,8 @@ declare void @llvm.va_start(i8*) define void @test_va_start(i8* %list) { ; CHECK-LABEL: name: test_va_start ; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY $x0 -; CHECK-IOS: G_VASTART [[LIST]](p0) :: (store 8 into %ir.list, align 1) -; CHECK-LINUX: G_VASTART [[LIST]](p0) :: (store 32 into %ir.list, align 1) +; CHECK-IOS: G_VASTART [[LIST]](p0) :: (store (s64) into %ir.list, align 1) +; CHECK-LINUX: G_VASTART [[LIST]](p0) :: (store (s256) into %ir.list, align 1) call void @llvm.va_start(i8* %list) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir index 16c67bb66f973..dcd80c403e451 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir @@ -76,7 +76,7 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %glob:gpr64common = MOVaddr target-flags(aarch64-page) @glob, target-flags(aarch64-pageoff, aarch64-nc) @glob - ; CHECK: %load:gpr32 = LDRBBui %glob, 0 :: (dereferenceable load 1 from @glob, align 4) + ; CHECK: %load:gpr32 = LDRBBui %glob, 0 :: (dereferenceable load (s8) from @glob, align 4) ; CHECK: TBNZW %load, 0, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: @@ -84,7 +84,7 @@ body: | bb.0: successors: %bb.0, %bb.1 %glob:gpr(p0) = G_GLOBAL_VALUE @glob - %load:gpr(s8) = G_LOAD %glob(p0) :: (dereferenceable load 1 from @glob, align 4) + %load:gpr(s8) = G_LOAD %glob(p0) :: (dereferenceable load (s8) from @glob, align 4) %trunc:gpr(s1) = G_TRUNC %load(s8) ; Look through G_TRUNC to get the load. The load is into a s8, which will diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/xro-addressing-mode-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/xro-addressing-mode-constant.mir index 7b2dae2f7e505..bd9dc51908ad0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/xro-addressing-mode-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/xro-addressing-mode-constant.mir @@ -24,12 +24,12 @@ body: | ; CHECK: liveins: $x0 ; CHECK: %copy:gpr64sp = COPY $x0 ; CHECK: %cst:gpr64 = MOVi64imm 4580179968 - ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8) + ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load (s64)) ; CHECK: RET_ReallyLR %copy:gpr(p0) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 4580179968 %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64) - %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8) + %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load (s64)) RET_ReallyLR ... @@ -54,12 +54,12 @@ body: | ; CHECK: %copy:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 61440 ; CHECK: %cst:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 - ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8) + ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load (s64)) ; CHECK: RET_ReallyLR %copy:gpr(p0) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 61440 %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64) - %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8) + %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load (s64)) RET_ReallyLR ... @@ -78,12 +78,12 @@ body: | ; CHECK: liveins: $x0 ; CHECK: %copy:gpr64sp = COPY $x0 ; CHECK: %cst:gpr64 = MOVi64imm -61440 - ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8) + ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load (s64)) ; CHECK: RET_ReallyLR %copy:gpr(p0) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 -61440 %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64) - %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8) + %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load (s64)) RET_ReallyLR ... @@ -101,12 +101,12 @@ body: | ; CHECK-LABEL: name: dont_use_xro_selectable_imm ; CHECK: liveins: $x0 ; CHECK: %copy:gpr64sp = COPY $x0 - ; CHECK: %load:gpr64 = LDRXui %copy, 2 :: (volatile load 8) + ; CHECK: %load:gpr64 = LDRXui %copy, 2 :: (volatile load (s64)) ; CHECK: RET_ReallyLR %copy:gpr(p0) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 16 %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64) - %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8) + %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load (s64)) RET_ReallyLR ... @@ -124,12 +124,12 @@ body: | ; CHECK-LABEL: name: dont_use_xro_selectable_negative_imm ; CHECK: liveins: $x0 ; CHECK: %copy:gpr64sp = COPY $x0 - ; CHECK: %load:gpr64 = LDURXi %copy, -16 :: (volatile load 8) + ; CHECK: %load:gpr64 = LDURXi %copy, -16 :: (volatile load (s64)) ; CHECK: RET_ReallyLR %copy:gpr(p0) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 -16 %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64) - %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8) + %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load (s64)) RET_ReallyLR ... @@ -147,12 +147,12 @@ body: | ; CHECK-LABEL: name: dont_use_xro_zero ; CHECK: liveins: $x0 ; CHECK: %copy:gpr64sp = COPY $x0 - ; CHECK: %load:gpr64 = LDRXui %copy, 0 :: (volatile load 8) + ; CHECK: %load:gpr64 = LDRXui %copy, 0 :: (volatile load (s64)) ; CHECK: RET_ReallyLR %copy:gpr(p0) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 0 %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64) - %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8) + %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load (s64)) RET_ReallyLR ... @@ -171,12 +171,12 @@ body: | ; CHECK-LABEL: name: dont_use_xro_in_range ; CHECK: liveins: $x0 ; CHECK: %copy:gpr64sp = COPY $x0 - ; CHECK: %load:gpr64 = LDURXi %copy, 17 :: (volatile load 8) + ; CHECK: %load:gpr64 = LDURXi %copy, 17 :: (volatile load (s64)) ; CHECK: RET_ReallyLR %copy:gpr(p0) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 17 %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64) - %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8) + %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load (s64)) RET_ReallyLR ... @@ -202,10 +202,10 @@ body: | ; CHECK: %copy:gpr64 = COPY $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %copy ; CHECK: %addr:gpr64sp = ADDXri [[COPY]], 273, 12 - ; CHECK: %load:gpr64 = LDRXui %addr, 0 :: (volatile load 8) + ; CHECK: %load:gpr64 = LDRXui %addr, 0 :: (volatile load (s64)) ; CHECK: RET_ReallyLR %copy:gpr(p0) = COPY $x0 %cst:gpr(s64) = G_CONSTANT i64 1118208 %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64) - %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8) + %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load (s64)) RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir b/llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir index 54e5f394bf47e..74fca757fb453 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir +++ b/llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir @@ -13,9 +13,9 @@ # # CHECK-LABEL: name: ldr-modified-baseReg-no-ldp1 # CHECK-NOT: LDP -# CHECK: $x9 = LDRXui $x10, 1 :: (load 8) -# CHECK: $x10 = LDURXi $x8, 1 :: (load 8) -# CHECK: $x10 = LDRXui $x10, 0 :: (load 8) +# CHECK: $x9 = LDRXui $x10, 1 :: (load (s64)) +# CHECK: $x10 = LDURXi $x8, 1 :: (load (s64)) +# CHECK: $x10 = LDRXui $x10, 0 :: (load (s64)) # CHECK: RET --- name: ldr-modified-baseReg-no-ldp1 @@ -24,17 +24,17 @@ body: | bb.0: liveins: $x8, $x10 - $x9 = LDRXui $x10, 1 :: (load 8) - $x10 = LDURXi $x8, 1 :: (load 8) - $x10 = LDRXui $x10, 0 :: (load 8) + $x9 = LDRXui $x10, 1 :: (load (s64)) + $x10 = LDURXi $x8, 1 :: (load (s64)) + $x10 = LDRXui $x10, 0 :: (load (s64)) RET undef $lr, implicit undef $w0 ... # CHECK-LABEL: name: str-modified-baseReg-no-stp1 # CHECK-NOT: STP -# CHECK: STRXui $x9, $x10, 1 :: (store 8) -# CHECK: $x10 = LDRXui $x8, 0 :: (load 8) -# CHECK: STRXui $x10, $x10, 0 :: (store 8) +# CHECK: STRXui $x9, $x10, 1 :: (store (s64)) +# CHECK: $x10 = LDRXui $x8, 0 :: (load (s64)) +# CHECK: STRXui $x10, $x10, 0 :: (store (s64)) # CHECK: RET --- name: str-modified-baseReg-no-stp1 @@ -43,17 +43,17 @@ body: | bb.0: liveins: $x9, $x8, $x10 - STRXui $x9, $x10, 1 :: (store 8) - $x10 = LDRXui $x8, 0 :: (load 8) - STRXui $x10, $x10, 0 :: (store 8) + STRXui $x9, $x10, 1 :: (store (s64)) + $x10 = LDRXui $x8, 0 :: (load (s64)) + STRXui $x10, $x10, 0 :: (store (s64)) RET undef $lr, implicit undef $w0 ... # CHECK-LABEL: name: ldr-modified-baseReg-no-ldp2 # CHECK-NOT: LDP -# CHECK: $x9 = LDRXui $x10, 1 :: (load 8) +# CHECK: $x9 = LDRXui $x10, 1 :: (load (s64)) # CHECK: $x10 = MOVi64imm 13 -# CHECK: $x11 = LDRXui $x10, 0 :: (load 8) +# CHECK: $x11 = LDRXui $x10, 0 :: (load (s64)) # CHECK: RET --- name: ldr-modified-baseReg-no-ldp2 @@ -62,17 +62,17 @@ body: | bb.0: liveins: $x8, $x10 - $x9 = LDRXui $x10, 1 :: (load 8) + $x9 = LDRXui $x10, 1 :: (load (s64)) $x10 = MOVi64imm 13 - $x11 = LDRXui $x10, 0 :: (load 8) + $x11 = LDRXui $x10, 0 :: (load (s64)) RET undef $lr, implicit undef $w0 ... # CHECK-LABEL: name: ldr-modified-baseReg-no-ldp3 # CHECK-NOT: LDP -# CHECK: $x9 = LDRXui $x10, 1 :: (load 8) +# CHECK: $x9 = LDRXui $x10, 1 :: (load (s64)) # CHECK: $x10 = ADDXri $x8, $x11, 0 -# CHECK: $x12 = LDRXui $x10, 0 :: (load 8) +# CHECK: $x12 = LDRXui $x10, 0 :: (load (s64)) # CHECK: RET --- name: ldr-modified-baseReg-no-ldp3 @@ -81,15 +81,15 @@ body: | bb.0: liveins: $x8, $x10, $x11 - $x9 = LDRXui $x10, 1 :: (load 8) + $x9 = LDRXui $x10, 1 :: (load (s64)) $x10 = ADDXri $x8, $x11, 0 - $x12 = LDRXui $x10, 0 :: (load 8) + $x12 = LDRXui $x10, 0 :: (load (s64)) RET undef $lr, implicit undef $w0 ... # CHECK-LABEL: name: ldr-modified-baseAddr-convert-to-ldp -# CHECK: $x12, $x9 = LDPXi $x10, 0 :: (load 8) -# CHECK: STRXui $x11, $x10, 1 :: (store 8) +# CHECK: $x12, $x9 = LDPXi $x10, 0 :: (load (s64)) +# CHECK: STRXui $x11, $x10, 1 :: (store (s64)) # CHECK: RET --- name: ldr-modified-baseAddr-convert-to-ldp @@ -98,8 +98,8 @@ body: | bb.0: liveins: $x8, $x10, $x11 - $x9 = LDRXui $x10, 1 :: (load 8) - STRXui $x11, $x10, 1 :: (store 8) - $x12 = LDRXui $x10, 0 :: (load 8) + $x9 = LDRXui $x10, 1 :: (load (s64)) + STRXui $x11, $x10, 1 :: (store (s64)) + $x12 = LDRXui $x10, 0 :: (load (s64)) RET undef $lr, implicit undef $w0 ... diff --git a/llvm/test/CodeGen/AArch64/aarch64-ldst-no-premature-sp-pop.mir b/llvm/test/CodeGen/AArch64/aarch64-ldst-no-premature-sp-pop.mir index 44b39ae8e6265..6db7f2f2534f8 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-ldst-no-premature-sp-pop.mir +++ b/llvm/test/CodeGen/AArch64/aarch64-ldst-no-premature-sp-pop.mir @@ -76,9 +76,9 @@ body: | $sp = frame-setup SUBXri $sp, 16, 0 $x8 = ADDXri $sp, 0, 0 - STRXui $xzr, $sp, 1 :: (store 8 into %ir.3 + 8) - STRXui $xzr, $sp, 0 :: (store 8 into %ir.3) - renamable $w0 = LDRWroW killed renamable $x8, killed renamable $w0, 1, 1 :: (load 4 from %ir.5, !tbaa !2) + STRXui $xzr, $sp, 1 :: (store (s64) into %ir.3 + 8) + STRXui $xzr, $sp, 0 :: (store (s64) into %ir.3) + renamable $w0 = LDRWroW killed renamable $x8, killed renamable $w0, 1, 1 :: (load (s32) from %ir.5, !tbaa !2) $sp = frame-destroy ADDXri $sp, 16, 0 RET undef $lr, implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-ldst-subsuperReg-no-ldp.mir b/llvm/test/CodeGen/AArch64/aarch64-ldst-subsuperReg-no-ldp.mir index 96eb270b06069..cf53a13c51218 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-ldst-subsuperReg-no-ldp.mir +++ b/llvm/test/CodeGen/AArch64/aarch64-ldst-subsuperReg-no-ldp.mir @@ -19,8 +19,8 @@ tracksRegLiveness: true body: | bb.0: liveins: $x9 - $x10 = LDRSWui $x9, 0 :: (load 4) - $w10 = LDRWui $x9, 1 :: (load 4) + $x10 = LDRSWui $x9, 0 :: (load (s32)) + $w10 = LDRWui $x9, 1 :: (load (s32)) RET undef $lr, implicit undef $w0 ... # CHECK-NOT: LDP @@ -33,7 +33,7 @@ tracksRegLiveness: true body: | bb.0: liveins: $x9 - $w10 = LDRWui $x9, 0 :: (load 4) - $x10 = LDRSWui $x9, 1 :: (load 4) + $w10 = LDRWui $x9, 0 :: (load (s32)) + $x10 = LDRSWui $x9, 1 :: (load (s32)) RET undef $lr, implicit undef $w0 ... diff --git a/llvm/test/CodeGen/AArch64/aarch64-mov-debug-locs.mir b/llvm/test/CodeGen/AArch64/aarch64-mov-debug-locs.mir index 9d7f2ae260475..6e444373047e4 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-mov-debug-locs.mir +++ b/llvm/test/CodeGen/AArch64/aarch64-mov-debug-locs.mir @@ -189,9 +189,9 @@ body: | DBG_VALUE $w0, $noreg, !15, !DIExpression(), debug-location !19 DBG_VALUE $w0, $noreg, !15, !DIExpression(), debug-location !19 DBG_VALUE $x1, $noreg, !16, !DIExpression(), debug-location !19 - early-clobber $sp = frame-setup STPXpre killed $x22, killed $x21, $sp, -6 :: (store 8 into %stack.5), (store 8 into %stack.4) - frame-setup STPXi killed $x20, killed $x19, $sp, 2 :: (store 8 into %stack.3), (store 8 into %stack.2) - frame-setup STPXi $fp, killed $lr, $sp, 4 :: (store 8 into %stack.1), (store 8 into %stack.0) + early-clobber $sp = frame-setup STPXpre killed $x22, killed $x21, $sp, -6 :: (store (s64) into %stack.5), (store (s64) into %stack.4) + frame-setup STPXi killed $x20, killed $x19, $sp, 2 :: (store (s64) into %stack.3), (store (s64) into %stack.2) + frame-setup STPXi $fp, killed $lr, $sp, 4 :: (store (s64) into %stack.1), (store (s64) into %stack.0) $fp = frame-setup ADDXri $sp, 32, 0 frame-setup CFI_INSTRUCTION def_cfa $w29, 16 frame-setup CFI_INSTRUCTION offset $w30, -8, debug-location !23 @@ -220,7 +220,7 @@ body: | liveins: $x19, $x20, $x21, $x22 DBG_VALUE $x20, $noreg, !17, !DIExpression(), debug-location !20 - renamable $x2 = LDRXroX renamable $x19, renamable $x20, 0, 1, debug-location !25 :: (load 8 from %ir.scevgep, !tbaa !28) + renamable $x2 = LDRXroX renamable $x19, renamable $x20, 0, 1, debug-location !25 :: (load (s64) from %ir.scevgep, !tbaa !28) $x0 = ORRXrs $xzr, $x21, 0, debug-location !32 $w1 = ORRWrs $wzr, $w20, 0, debug-location !32 BL @printf, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit killed $w1, implicit killed $x2, implicit-def $sp, implicit-def dead $w0, debug-location !32 @@ -230,10 +230,10 @@ body: | Bcc 1, %bb.2, implicit killed $nzcv, debug-location !23 bb.3.for.cond.cleanup: - $fp, $lr = frame-destroy LDPXi $sp, 4, debug-location !27 :: (load 8 from %stack.1), (load 8 from %stack.0) - $x20, $x19 = frame-destroy LDPXi $sp, 2, debug-location !27 :: (load 8 from %stack.3), (load 8 from %stack.2) + $fp, $lr = frame-destroy LDPXi $sp, 4, debug-location !27 :: (load (s64) from %stack.1), (load (s64) from %stack.0) + $x20, $x19 = frame-destroy LDPXi $sp, 2, debug-location !27 :: (load (s64) from %stack.3), (load (s64) from %stack.2) $w0 = ORRWrs $wzr, $wzr, 0, debug-location !27 - early-clobber $sp, $x22, $x21 = frame-destroy LDPXpost $sp, 6, debug-location !27 :: (load 8 from %stack.5), (load 8 from %stack.4) + early-clobber $sp, $x22, $x21 = frame-destroy LDPXpost $sp, 6, debug-location !27 :: (load (s64) from %stack.5), (load (s64) from %stack.4) RET undef $lr, implicit killed $w0, debug-location !27 ... diff --git a/llvm/test/CodeGen/AArch64/aarch64-vector-pcs.mir b/llvm/test/CodeGen/AArch64/aarch64-vector-pcs.mir index 10d311253c6b2..15b8e759dec42 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-vector-pcs.mir +++ b/llvm/test/CodeGen/AArch64/aarch64-vector-pcs.mir @@ -56,10 +56,10 @@ body: | ; +-------------------+ <- SP -48 ; CHECK-LABEL: test_q10_q11_x19{{[[:space:]]}} - ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -3 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]]) + ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -3 :: (store (s128) into %stack.[[Q11:[0-9]+]]), (store (s128) into %stack.[[Q10:[0-9]+]]) ; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -48, size: 16, alignment: 16 ; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -32, size: 16, alignment: 16 - ; CHECK-DAG: frame-setup STRXui killed $x19, $sp, 4 :: (store 8 into %stack.[[X19:[0-9]+]]) + ; CHECK-DAG: frame-setup STRXui killed $x19, $sp, 4 :: (store (s64) into %stack.[[X19:[0-9]+]]) ; CHECK-DAG: - { id: [[X19]], {{.*}}, offset: -16, size: 8, alignment: 16 ... @@ -81,8 +81,8 @@ body: | ; +-------------------+ <- SP -48 ; CHECK-LABEL: test_q10_q11_x19_x20{{[[:space:]]}} - ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -3 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]]) - ; CHECK-DAG: frame-setup STPXi killed $x20, killed $x19, $sp, 4 :: (store 8 into %stack.[[X20:[0-9]+]]), (store 8 into %stack.[[X19:[0-9]+]]) + ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -3 :: (store (s128) into %stack.[[Q11:[0-9]+]]), (store (s128) into %stack.[[Q10:[0-9]+]]) + ; CHECK-DAG: frame-setup STPXi killed $x20, killed $x19, $sp, 4 :: (store (s64) into %stack.[[X20:[0-9]+]]), (store (s64) into %stack.[[X19:[0-9]+]]) ; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -48, size: 16, alignment: 16 ; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -32, size: 16, alignment: 16 ; CHECK-DAG: - { id: [[X20]], {{.*}}, offset: -16, size: 8, alignment: 8 @@ -112,8 +112,8 @@ body: | ; +-------------------+ <- SP -64 ; CHECK-LABEL: test_q10_q11_x19_x20_x21 - ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -4 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]]) - ; CHECK-DAG: frame-setup STRXui killed $x21, $sp, 4 :: (store 8 into %stack.[[X21:[0-9]+]]) + ; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -4 :: (store (s128) into %stack.[[Q11:[0-9]+]]), (store (s128) into %stack.[[Q10:[0-9]+]]) + ; CHECK-DAG: frame-setup STRXui killed $x21, $sp, 4 :: (store (s64) into %stack.[[X21:[0-9]+]]) ; CHECK-DAG: frame-setup STPXi killed $x20, killed $x19, $sp, 6 ; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -64, size: 16, alignment: 16 ; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -48, size: 16, alignment: 16 @@ -168,7 +168,7 @@ body: | ; CHECK-LABEL: test_q8_to_q23_x19_to_x30 ; CHECK: $sp = frame-setup SUBXri $sp, 368, 0 - ; CHECK-NEXT: frame-setup STPQi killed $q23, killed $q22, $sp, 1 :: (store 16 into %stack.{{[0-9]+}}), (store 16 into %stack.{{[0-9]+}}) + ; CHECK-NEXT: frame-setup STPQi killed $q23, killed $q22, $sp, 1 :: (store (s128) into %stack.{{[0-9]+}}), (store (s128) into %stack.{{[0-9]+}}) ; CHECK-NEXT: frame-setup STPQi killed $q21, killed $q20, $sp, 3 ; CHECK-NEXT: frame-setup STPQi killed $q19, killed $q18, $sp, 5 ; CHECK-NEXT: frame-setup STPQi killed $q17, killed $q16, $sp, 7 @@ -176,7 +176,7 @@ body: | ; CHECK-NEXT: frame-setup STPQi killed $q13, killed $q12, $sp, 11 ; CHECK-NEXT: frame-setup STPQi killed $q11, killed $q10, $sp, 13 ; CHECK-NEXT: frame-setup STPQi killed $q9, killed $q8, $sp, 15 - ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 34 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}}) + ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 34 :: (store (s64) into %stack.{{[0-9]+}}), (store (s64) into %stack.{{[0-9]+}}) ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 36 ; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 38 ; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 40 @@ -234,7 +234,7 @@ body: | ; +-------------------+ <- SP -400 ; CHECK-LABEL: test_q8_to_q23_x19_to_x30_preinc - ; CHECK: $sp = frame-setup STPQpre killed $q23, killed $q22, $sp, -22 :: (store 16 into %stack.{{[0-9]+}}), (store 16 into %stack.{{[0-9]+}}) + ; CHECK: $sp = frame-setup STPQpre killed $q23, killed $q22, $sp, -22 :: (store (s128) into %stack.{{[0-9]+}}), (store (s128) into %stack.{{[0-9]+}}) ; CHECK-NEXT: frame-setup STPQi killed $q21, killed $q20, $sp, 2 ; CHECK-NEXT: frame-setup STPQi killed $q19, killed $q18, $sp, 4 ; CHECK-NEXT: frame-setup STPQi killed $q17, killed $q16, $sp, 6 @@ -242,7 +242,7 @@ body: | ; CHECK-NEXT: frame-setup STPQi killed $q13, killed $q12, $sp, 10 ; CHECK-NEXT: frame-setup STPQi killed $q11, killed $q10, $sp, 12 ; CHECK-NEXT: frame-setup STPQi killed $q9, killed $q8, $sp, 14 - ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 32 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}}) + ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 32 :: (store (s64) into %stack.{{[0-9]+}}), (store (s64) into %stack.{{[0-9]+}}) ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 34 ; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 36 ; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 38 diff --git a/llvm/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll b/llvm/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll index 6d81d9acd8610..d53c9f270679b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll +++ b/llvm/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll @@ -5,14 +5,14 @@ ; ; CHECK: ********** MI Scheduling ********** ; CHECK: misched_bug:%bb.0 entry -; CHECK: SU(2): %2:gpr32 = LDRWui %0:gpr64common, 1 :: (load 4 from %ir.ptr1_plus1) +; CHECK: SU(2): %2:gpr32 = LDRWui %0:gpr64common, 1 :: (load (s32) from %ir.ptr1_plus1) ; CHECK: Successors: ; CHECK-NEXT: SU(5): Data Latency=4 Reg=%2 ; CHECK-NEXT: SU(4): Ord Latency=0 -; CHECK: SU(3): STRWui $wzr, %0:gpr64common, 0 :: (store 4 into %ir.ptr1) +; CHECK: SU(3): STRWui $wzr, %0:gpr64common, 0 :: (store (s32) into %ir.ptr1) ; CHECK: Successors: ; CHECK: SU(4): Ord Latency=0 -; CHECK: SU(4): STRWui $wzr, %1:gpr64common, 0 :: (store 4 into %ir.ptr2) +; CHECK: SU(4): STRWui $wzr, %1:gpr64common, 0 :: (store (s32) into %ir.ptr2) ; CHECK: SU(5): $w0 = COPY %2 ; CHECK: ** ScheduleDAGMI::schedule picking next node define i32 @misched_bug(i32* %ptr1, i32* %ptr2) { diff --git a/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll b/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll index 3ecb1d49ee1c7..d39260c68d79c 100644 --- a/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll +++ b/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll @@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; Function Attrs: norecurse nounwind define void @foo(i32 %a, i32 %b, float* nocapture %foo_arr) #0 { -; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}) +; CHECK: (load (s32) from %ir.arrayidx1.{{i[1-2]}}) entry: %cmp = icmp sgt i32 %a, 0 br i1 %cmp, label %if.then, label %if.end diff --git a/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir b/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir index fb70fda38b3da..ec6900cc970c7 100644 --- a/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir +++ b/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir @@ -54,8 +54,8 @@ body: | successors: %bb.2(0x30000000), %bb.1(0x50000000) liveins: $x0, $x19, $lr - early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -32 :: (store 8 into %stack.2) - frame-setup STPXi $fp, killed $lr, $sp, 2 :: (store 8 into %stack.1), (store 8 into %stack.0) + early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -32 :: (store (s64) into %stack.2) + frame-setup STPXi $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.1), (store (s64) into %stack.0) $fp = frame-setup ADDXri $sp, 16, 0 frame-setup CFI_INSTRUCTION def_cfa $w29, 16 frame-setup CFI_INSTRUCTION offset $w30, -8 @@ -68,13 +68,13 @@ body: | bb.1.if.then: liveins: $x19 - renamable $w0 = LDRWui killed renamable $x19, 0 :: (load 4 from %ir.a) + renamable $w0 = LDRWui killed renamable $x19, 0 :: (load (s32) from %ir.a) bb.2.return: liveins: $w0 - $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0) - early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 32 :: (load 8 from %stack.2) + $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.1), (load (s64) from %stack.0) + early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 32 :: (load (s64) from %stack.2) RET undef $lr, implicit killed $w0 ... diff --git a/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir b/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir index b8cb7750ff2c8..946005a90e74a 100644 --- a/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir +++ b/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir @@ -147,10 +147,10 @@ body: | ; CHECK-NOT: HINT ; CHECK: RETAA frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store 8 into %stack.0) + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr $w0 = ORRWrs $wzr, $wzr, 0 - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load 8 from %stack.0) + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) RETAA implicit $sp, implicit $lr, implicit killed $w0 --- @@ -171,10 +171,10 @@ body: | ; CHECK-NOT: HINT ; CHECK: RETAB frame-setup PACIBSP implicit-def $lr, implicit killed $lr, implicit $sp - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store 8 into %stack.0) + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr $w0 = ORRWrs $wzr, $wzr, 0 - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load 8 from %stack.0) + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) RETAB implicit $sp, implicit $lr, implicit killed $w0 --- @@ -206,7 +206,7 @@ body: | $x9 = ADRP target-flags(aarch64-page) %jump-table.0 renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0, 0 - renamable $x8 = LDRXroX killed renamable $x9, killed renamable $x8, 0, 1 :: (load 8 from jump-table) + renamable $x8 = LDRXroX killed renamable $x9, killed renamable $x8, 0, 1 :: (load (s64) from jump-table) BR killed renamable $x8 bb.2.sw.bb: @@ -267,7 +267,7 @@ body: | successors: %bb.1(0x40000000), %bb.2(0x40000000) renamable $x8 = ADRP target-flags(aarch64-page) @label_address.addr - renamable $x9 = LDRXui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (dereferenceable load 8 from @label_address.addr) + renamable $x9 = LDRXui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (dereferenceable load (s64) from @label_address.addr) BR killed renamable $x9 bb.1.return (address-taken): @@ -278,7 +278,7 @@ body: | $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.lab2) renamable $w0 = ORRWri $wzr, 0 renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.lab2), 0 - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store 8 into @label_address.addr) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store (s64) into @label_address.addr) RET undef $lr, implicit killed $w0 bb.2.lab2 (address-taken): @@ -289,7 +289,7 @@ body: | $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.return) renamable $w0 = ORRWri $wzr, 1984 renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.return), 0 - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store 8 into @label_address.addr) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store (s64) into @label_address.addr) RET undef $lr, implicit killed $w0 --- @@ -310,7 +310,7 @@ body: | successors: %bb.1(0x40000000), %bb.2(0x40000000) renamable $x8 = ADRP target-flags(aarch64-page) @label_address.addr - renamable $x9 = LDRXui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (dereferenceable load 8 from @label_address.addr) + renamable $x9 = LDRXui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (dereferenceable load (s64) from @label_address.addr) BR killed renamable $x9 bb.1.return (address-taken): @@ -319,13 +319,13 @@ body: | liveins: $x8 frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp frame-setup CFI_INSTRUCTION negate_ra_sign_state - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store 8 into %stack.0) + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.entry) renamable $w0 = ORRWri $wzr, 0 renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.entry), 0 - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store 8 into @label_address.addr) - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load 8 from %stack.0) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store (s64) into @label_address.addr) + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) RETAA implicit $sp, implicit $lr, implicit killed $w0 bb.2.lab2: @@ -336,7 +336,7 @@ body: | $x9 = ADRP target-flags(aarch64-page) blockaddress(@label_address, %ir-block.return) renamable $w0 = ORRWri $wzr, 1984 renamable $x9 = ADDXri killed $x9, target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@label_address, %ir-block.return), 0 - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store 8 into @label_address.addr) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @label_address.addr :: (store (s64) into @label_address.addr) RET undef $lr, implicit killed $w0 --- # When PACIASP is the first real instruction in the functions then BTI should not be inserted. @@ -356,10 +356,10 @@ body: | ; CHECK: RETAA frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp frame-setup CFI_INSTRUCTION negate_ra_sign_state - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store 8 into %stack.0) + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) INLINEASM &"", 1, 12, implicit-def dead early-clobber $lr $w0 = ORRWrs $wzr, $wzr, 0 - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load 8 from %stack.0) + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) RETAA implicit $sp, implicit $lr, implicit killed $w0 ... diff --git a/llvm/test/CodeGen/AArch64/cfi_restore.mir b/llvm/test/CodeGen/AArch64/cfi_restore.mir index dc244e94f097f..a68cdeb492804 100644 --- a/llvm/test/CodeGen/AArch64/cfi_restore.mir +++ b/llvm/test/CodeGen/AArch64/cfi_restore.mir @@ -17,18 +17,18 @@ body: | liveins: $fp, $lr $sp = frame-setup SUBXri $sp, 16, 0 - frame-setup STRXui killed $fp, $sp, 0 :: (store 8 into %stack.1) + frame-setup STRXui killed $fp, $sp, 0 :: (store (s64) into %stack.1) frame-setup CFI_INSTRUCTION offset $w29, -16 ; CHECK: .cfi_offset w29, -16 - frame-setup STRXui killed $lr, $sp, 1 :: (store 8 into %stack.0) + frame-setup STRXui killed $lr, $sp, 1 :: (store (s64) into %stack.0) frame-setup CFI_INSTRUCTION offset $w30, -8 ; CHECK: .cfi_offset w30, -8 $fp = frame-setup ADDXri $sp, 0, 0 frame-setup CFI_INSTRUCTION def_cfa $w29, 16 - $lr = LDRXui $sp, 1 :: (load 8 from %stack.0) + $lr = LDRXui $sp, 1 :: (load (s64) from %stack.0) CFI_INSTRUCTION restore $w30 ; CHECK: .cfi_restore w30 - $fp = LDRXui $sp, 0 :: (load 8 from %stack.1) + $fp = LDRXui $sp, 0 :: (load (s64) from %stack.1) CFI_INSTRUCTION restore $w29 ; CHECK: .cfi_restore w29 $sp = ADDXri $sp, 16, 0 diff --git a/llvm/test/CodeGen/AArch64/cluster-frame-index.mir b/llvm/test/CodeGen/AArch64/cluster-frame-index.mir index 74582b386968b..37ab9418f4dbd 100644 --- a/llvm/test/CodeGen/AArch64/cluster-frame-index.mir +++ b/llvm/test/CodeGen/AArch64/cluster-frame-index.mir @@ -12,9 +12,9 @@ body: | %0:gpr32 = COPY $w0 %1:gpr32 = COPY $w1 undef %3.sub_32:gpr64 = ORRWrs $wzr, %0, 0 - STRXui %3, %stack.0, 0 :: (store 8) + STRXui %3, %stack.0, 0 :: (store (s64)) undef %5.sub_32:gpr64 = ORRWrs $wzr, %1, 0 - STRXui %5, %stack.0, 1 :: (store 8) + STRXui %5, %stack.0, 1 :: (store (s64)) RET_ReallyLR ; CHECK: COPY @@ -38,9 +38,9 @@ body: | %0:gpr32 = COPY $w0 %1:gpr32 = COPY $w1 undef %3.sub_32:gpr64 = ORRWrs $wzr, %0, 0 - STRXui %3, %fixed-stack.0, 0 :: (store 8) + STRXui %3, %fixed-stack.0, 0 :: (store (s64)) undef %5.sub_32:gpr64 = ORRWrs $wzr, %1, 0 - STRXui %5, %fixed-stack.0, 1 :: (store 8) + STRXui %5, %fixed-stack.0, 1 :: (store (s64)) RET_ReallyLR ; CHECK: COPY diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir index 32b8cd0920243..81b3c6d9092c5 100644 --- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir +++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir @@ -198,9 +198,9 @@ body: | ST1W_IMM killed renamable $z1, renamable $p0, %stack.1.z1.addr, 0 :: (store unknown-size into %ir.z1.addr, align 16) STR_PXI killed renamable $p2, %stack.2.p0.addr, 0 :: (store unknown-size into %ir.p0.addr, align 2) STR_PXI killed renamable $p1, %stack.3.p1.addr, 0 :: (store unknown-size into %ir.p1.addr, align 2) - STRWui killed renamable $w0, %stack.4.w0.addr, 0 :: (store 4 into %ir.w0.addr) - renamable $w8 = LDRWui %stack.4.w0.addr, 0, debug-location !41 :: (dereferenceable load 4 from %ir.w0.addr) - STRWui killed renamable $w8, %stack.5.local_gpr0, 0, debug-location !40 :: (store 4 into %ir.local_gpr0) + STRWui killed renamable $w0, %stack.4.w0.addr, 0 :: (store (s32) into %ir.w0.addr) + renamable $w8 = LDRWui %stack.4.w0.addr, 0, debug-location !41 :: (dereferenceable load (s32) from %ir.w0.addr) + STRWui killed renamable $w8, %stack.5.local_gpr0, 0, debug-location !40 :: (store (s32) into %ir.local_gpr0) renamable $z0 = LD1W_IMM renamable $p0, %stack.0.z0.addr, 0, debug-location !44 :: (load unknown-size from %ir.z0.addr, align 16) ST1W_IMM killed renamable $z0, renamable $p0, %stack.6.localv0, 0, debug-location !43 :: (store unknown-size into %ir.localv0, align 16) renamable $z0 = LD1W_IMM renamable $p0, %stack.1.z1.addr, 0, debug-location !47 :: (load unknown-size from %ir.z1.addr, align 16) diff --git a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir index 0755e3b4c3b24..bc60b7b571197 100644 --- a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir +++ b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir @@ -138,7 +138,7 @@ body: | bb.1.if.end: liveins: $w0 - STRWui killed renamable $w0, %stack.0.rstack, 0 :: (volatile store 4 into %ir.arrayidx1) + STRWui killed renamable $w0, %stack.0.rstack, 0 :: (volatile store (s32) into %ir.arrayidx1) renamable $w9 = MOVi32imm 1 renamable $x8 = ADDXri %stack.0.rstack, 0, 0 @@ -147,7 +147,7 @@ body: | liveins: $w9, $x8 renamable $w10 = SUBWri renamable $w9, 1, 0, implicit-def $x10 - renamable $w11 = LDRWroW renamable $x8, renamable $w10, 0, 1 :: (volatile load 4 from %ir.arrayidx2) + renamable $w11 = LDRWroW renamable $x8, renamable $w10, 0, 1 :: (volatile load (s32) from %ir.arrayidx2) CBNZW renamable $w11, %bb.4 bb.3: @@ -159,7 +159,7 @@ body: | bb.4.if.then4: liveins: $w9, $w11, $x8, $x10 - STRWroX killed renamable $w11, renamable $x8, killed renamable $x10, 0, 1 :: (volatile store 4 into %ir.1) + STRWroX killed renamable $w11, renamable $x8, killed renamable $x10, 0, 1 :: (volatile store (s32) into %ir.1) bb.5.if.end7: successors: %bb.6(0x04000000), %bb.2(0x7c000000) @@ -204,19 +204,19 @@ body: | renamable $w9 = MOVi32imm 67305985 renamable $w8 = ORRWrs $wzr, renamable $w1, 0, implicit-def $x8 - STRWui killed renamable $w9, %stack.0.tmp, 0 :: (volatile store 4 into %ir.tmp41) + STRWui killed renamable $w9, %stack.0.tmp, 0 :: (volatile store (s32) into %ir.tmp41) renamable $x9 = ADDXri %stack.0.tmp, 0, 0 - renamable $w10 = LDRBBroX renamable $x9, renamable $x8, 0, 0 :: (volatile load 1 from %ir.tmp6) + renamable $w10 = LDRBBroX renamable $x9, renamable $x8, 0, 0 :: (volatile load (s8) from %ir.tmp6) dead $wzr = SUBSWri killed renamable $w1, 2, 0, implicit-def $nzcv - STRWui killed renamable $w10, renamable $x0, 0 :: (store 4 into %ir.tmp92) + STRWui killed renamable $w10, renamable $x0, 0 :: (store (s32) into %ir.tmp92) Bcc 8, %bb.4, implicit killed $nzcv B %bb.3 bb.3.bb11: liveins: $x0, $x8, $x9 - renamable $w8 = LDRBBroX killed renamable $x9, killed renamable $x8, 0, 0 :: (volatile load 1 from %ir.sunkaddr) - STRWui killed renamable $w8, killed renamable $x0, 1 :: (store 4 into %ir.tmp14) + renamable $w8 = LDRBBroX killed renamable $x9, killed renamable $x8, 0, 0 :: (volatile load (s8) from %ir.sunkaddr) + STRWui killed renamable $w8, killed renamable $x0, 1 :: (store (s32) into %ir.tmp14) bb.4.bb15: renamable $w0 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir b/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir index 436baae1a2b29..619d8fe0b76eb 100644 --- a/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir +++ b/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir @@ -128,7 +128,7 @@ body: | bb.2.if.then139.i: successors: %bb.5(0x80000000) - %7:gpr64 = LDRXui %8, 0 :: (load 8 from `double* undef`) + %7:gpr64 = LDRXui %8, 0 :: (load (s64) from `double* undef`) B %bb.5 bb.3.if.else142.i: diff --git a/llvm/test/CodeGen/AArch64/elim-dead-mi.mir b/llvm/test/CodeGen/AArch64/elim-dead-mi.mir index 1bfd5b44d9c5c..bfbd9a529a065 100644 --- a/llvm/test/CodeGen/AArch64/elim-dead-mi.mir +++ b/llvm/test/CodeGen/AArch64/elim-dead-mi.mir @@ -39,7 +39,7 @@ body: | bb.2: successors: %bb.1(0x80000000) %3:gpr64common = ADDXrr %6, %2 - %4:gpr32 = LDRBBui killed %3, 1 :: (load 1 from %ir.scevgep) + %4:gpr32 = LDRBBui killed %3, 1 :: (load (s8) from %ir.scevgep) %5:gpr32all = COPY %4 B %bb.1 diff --git a/llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir b/llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir index 4f144e1ef8bff..3f72b87ee7475 100644 --- a/llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir +++ b/llvm/test/CodeGen/AArch64/falkor-hwpf-fix.mir @@ -12,7 +12,7 @@ body: | bb.0: liveins: $w0, $x1 - $w2 = LDRWui $x1, 0 :: ("aarch64-strided-access" load 4) + $w2 = LDRWui $x1, 0 :: ("aarch64-strided-access" load (s32)) $w2 = LDRWui $x1, 1 $w0 = SUBWri $w0, 1, 0 @@ -33,7 +33,7 @@ body: | bb.0: liveins: $w0, $x1, $q2 - $q2 = LD1i64 $q2, 0, $x1 :: ("aarch64-strided-access" load 4) + $q2 = LD1i64 $q2, 0, $x1 :: ("aarch64-strided-access" load (s32)) $w2 = LDRWui $x1, 0 $w0 = SUBWri $w0, 1, 0 @@ -54,7 +54,7 @@ body: | bb.0: liveins: $w0, $x1, $q2 - $q2 = LD1i8 $q2, 0, $x1 :: ("aarch64-strided-access" load 4) + $q2 = LD1i8 $q2, 0, $x1 :: ("aarch64-strided-access" load (s32)) $w0 = LDRWui $x1, 0 $w0 = SUBWri $w0, 1, 0 @@ -75,7 +75,7 @@ body: | bb.0: liveins: $w0, $x1 - $d2 = LD1Onev1d $x1 :: ("aarch64-strided-access" load 4) + $d2 = LD1Onev1d $x1 :: ("aarch64-strided-access" load (s32)) $w2 = LDRWui $x1, 0 $w0 = SUBWri $w0, 1, 0 @@ -96,7 +96,7 @@ body: | bb.0: liveins: $w0, $x1 - $d2_d3 = LD1Twov1d $x1 :: ("aarch64-strided-access" load 4) + $d2_d3 = LD1Twov1d $x1 :: ("aarch64-strided-access" load (s32)) $w0 = LDRWui $x1, 0 $w0 = SUBWri $w0, 1, 0 @@ -117,7 +117,7 @@ body: | bb.0: liveins: $w0, $x1 - $q2, $q3 = LDPQi $x1, 3 :: ("aarch64-strided-access" load 4) + $q2, $q3 = LDPQi $x1, 3 :: ("aarch64-strided-access" load (s32)) $w0 = LDRWui $x1, 3 $w0 = SUBWri $w0, 1, 0 @@ -138,7 +138,7 @@ body: | bb.0: liveins: $w0, $x1 - $x2, $x3 = LDPXi $x1, 3 :: ("aarch64-strided-access" load 4) + $x2, $x3 = LDPXi $x1, 3 :: ("aarch64-strided-access" load (s32)) $w2 = LDRWui $x1, 2 $w0 = SUBWri $w0, 1, 0 @@ -163,7 +163,7 @@ body: | bb.0: liveins: $w0, $x1 - $x1, $w2 = LDRWpost $x1, 0 :: ("aarch64-strided-access" load 4) + $x1, $w2 = LDRWpost $x1, 0 :: ("aarch64-strided-access" load (s32)) $w2 = LDRWui $x1, 1 $w0 = SUBWri $w0, 1, 0 @@ -185,7 +185,7 @@ body: | bb.0: liveins: $w0, $x1, $q2 - $x1, $q2 = LD1i64_POST $q2, 0, $x1, $x1 :: ("aarch64-strided-access" load 4) + $x1, $q2 = LD1i64_POST $q2, 0, $x1, $x1 :: ("aarch64-strided-access" load (s32)) $w2 = LDRWui $x1, 132 $w0 = SUBWri $w0, 1, 0 @@ -207,7 +207,7 @@ body: | bb.0: liveins: $w0, $x1, $q2 - $x1, $q2 = LD1i8_POST $q2, 0, $x1, $x1 :: ("aarch64-strided-access" load 4) + $x1, $q2 = LD1i8_POST $q2, 0, $x1, $x1 :: ("aarch64-strided-access" load (s32)) $w0 = LDRWui $x1, 132 $w0 = SUBWri $w0, 1, 0 @@ -229,7 +229,7 @@ body: | bb.0: liveins: $w0, $x1, $q2 - $x1, $d2 = LD1Rv1d_POST $x1, $xzr :: ("aarch64-strided-access" load 4) + $x1, $d2 = LD1Rv1d_POST $x1, $xzr :: ("aarch64-strided-access" load (s32)) $w2 = LDRWui $x1, 252 $w0 = SUBWri $w0, 1, 0 @@ -251,7 +251,7 @@ body: | bb.0: liveins: $w0, $x1, $x17, $q2 - $x1, $d2_d3_d4 = LD3Threev2s_POST $x1, $x0 :: ("aarch64-strided-access" load 4) + $x1, $d2_d3_d4 = LD3Threev2s_POST $x1, $x0 :: ("aarch64-strided-access" load (s32)) $w0 = LDRWroX $x17, $x0, 0, 0 $w0 = SUBWri $w0, 1, 0 @@ -273,7 +273,7 @@ body: | bb.0: liveins: $w0, $x1, $x17, $q2 - $x1, $d2, $d3 = LDPDpost $x1, 3 :: ("aarch64-strided-access" load 4) + $x1, $d2, $d3 = LDPDpost $x1, 3 :: ("aarch64-strided-access" load (s32)) $w16 = LDRWui $x17, 2 $w0 = SUBWri $w0, 1, 0 @@ -295,7 +295,7 @@ body: | bb.0: liveins: $w0, $x1, $x17, $q2 - $x1, $x2, $x3 = LDPXpost $x1, 3 :: ("aarch64-strided-access" load 4) + $x1, $x2, $x3 = LDPXpost $x1, 3 :: ("aarch64-strided-access" load (s32)) $w18 = LDRWui $x17, 2 $w0 = SUBWri $w0, 1, 0 @@ -319,9 +319,9 @@ body: | bb.0: liveins: $w0, $x1, $x17 - $w1 = LDARW $x1 :: ("aarch64-strided-access" load 4) - $w1 = LDRWui $x1, 0 :: ("aarch64-strided-access" load 4) - $w17 = LDRWui $x17, 0 :: ("aarch64-strided-access" load 4) + $w1 = LDARW $x1 :: ("aarch64-strided-access" load (s32)) + $w1 = LDRWui $x1, 0 :: ("aarch64-strided-access" load (s32)) + $w17 = LDRWui $x17, 0 :: ("aarch64-strided-access" load (s32)) $w0 = SUBWri $w0, 1, 0 $wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv @@ -343,7 +343,7 @@ body: | bb.0: liveins: $w0, $x15 - $w1 = LDRWui $x15, 0 :: ("aarch64-strided-access" load 4) + $w1 = LDRWui $x15, 0 :: ("aarch64-strided-access" load (s32)) $w17 = LDRWui $sp, 0 $w0 = SUBWri $w0, 1, 0 @@ -366,7 +366,7 @@ body: | bb.0: liveins: $w0, $x1, $x2, $x17, $x18 - $w10 = LDRWroX $x1, $x2, 0, 0 :: ("aarch64-strided-access" load 4) + $w10 = LDRWroX $x1, $x2, 0, 0 :: ("aarch64-strided-access" load (s32)) $x2 = ORRXrs $xzr, $x10, 0 $w26 = LDRWroX $x1, $x2, 0, 0 diff --git a/llvm/test/CodeGen/AArch64/irg-nomem.mir b/llvm/test/CodeGen/AArch64/irg-nomem.mir index d023fd6405bcb..d428f16011a70 100644 --- a/llvm/test/CodeGen/AArch64/irg-nomem.mir +++ b/llvm/test/CodeGen/AArch64/irg-nomem.mir @@ -64,10 +64,10 @@ body: | ; Check that stores are merged across IRG. ; CHECK: STPXi renamable $x9, renamable $x9, renamable $x0, 0 - STRXui renamable $x9, renamable $x0, 0 :: (store 8 into %ir.x) + STRXui renamable $x9, renamable $x0, 0 :: (store (s64) into %ir.x) dead renamable $x10 = IRG renamable $x8, $xzr dead renamable $x8 = IRG $sp, $xzr - STRXui killed renamable $x9, killed renamable $x0, 1 :: (store 8 into %ir.arrayidx1) + STRXui killed renamable $x9, killed renamable $x0, 1 :: (store (s64) into %ir.arrayidx1) RET undef $lr ... diff --git a/llvm/test/CodeGen/AArch64/jti-correct-datatype.mir b/llvm/test/CodeGen/AArch64/jti-correct-datatype.mir index aeb815c99d00c..dcfa829750d1b 100644 --- a/llvm/test/CodeGen/AArch64/jti-correct-datatype.mir +++ b/llvm/test/CodeGen/AArch64/jti-correct-datatype.mir @@ -46,7 +46,7 @@ body: | liveins: $x1, $x2, $x3, $x4 renamable $x8 = ADRP target-flags(aarch64-page) @reps - renamable $w9 = LDRWui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @reps :: (volatile dereferenceable load 4 from @reps, !tbaa !2) + renamable $w9 = LDRWui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @reps :: (volatile dereferenceable load (s32) from @reps, !tbaa !2) dead $wzr = SUBSWri killed renamable $w9, 1, 0, implicit-def $nzcv Bcc 11, %bb.6, implicit $nzcv @@ -55,7 +55,7 @@ body: | liveins: $x1, $x2, $x3, $x4 renamable $x8 = ADRP target-flags(aarch64-page) @reps - renamable $w9 = LDRWui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @reps :: (volatile dereferenceable load 4 from @reps, !tbaa !2) + renamable $w9 = LDRWui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @reps :: (volatile dereferenceable load (s32) from @reps, !tbaa !2) dead $wzr = SUBSWri killed renamable $w9, 1, 0, implicit-def $nzcv Bcc 11, %bb.6, implicit $nzcv @@ -64,7 +64,7 @@ body: | liveins: $x1, $x2, $x3, $x4 renamable $x8 = ADRP target-flags(aarch64-page) @reps - renamable $w9 = LDRWui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @reps :: (volatile dereferenceable load 4 from @reps, !tbaa !2) + renamable $w9 = LDRWui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @reps :: (volatile dereferenceable load (s32) from @reps, !tbaa !2) dead $wzr = SUBSWri killed renamable $w9, 1, 0, implicit-def $nzcv Bcc 11, %bb.6, implicit $nzcv @@ -73,7 +73,7 @@ body: | liveins: $x1, $x2, $x3, $x4 renamable $x8 = ADRP target-flags(aarch64-page) @reps - renamable $w9 = LDRWui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @reps :: (volatile dereferenceable load 4 from @reps, !tbaa !2) + renamable $w9 = LDRWui renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @reps :: (volatile dereferenceable load (s32) from @reps, !tbaa !2) dead $wzr = SUBSWri killed renamable $w9, 1, 0, implicit-def $nzcv Bcc 11, %bb.6, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir index 0e4148e0163f5..0963ecbb12311 100644 --- a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir +++ b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir @@ -121,8 +121,8 @@ body: | successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $w0, $w1, $x19, $x20, $lr - early-clobber $sp = frame-setup STPXpre killed $x20, killed $x19, $sp, -4 :: (store 8 into %stack.3), (store 8 into %stack.2) - frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store 8 into %stack.1), (store 8 into %stack.0) + early-clobber $sp = frame-setup STPXpre killed $x20, killed $x19, $sp, -4 :: (store (s64) into %stack.3), (store (s64) into %stack.2) + frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.1), (store (s64) into %stack.0) frame-setup CFI_INSTRUCTION def_cfa_offset 32 frame-setup CFI_INSTRUCTION offset $w30, -8 frame-setup CFI_INSTRUCTION offset $w29, -16 @@ -181,8 +181,8 @@ body: | bb.9.lbl1: liveins: $w0 - $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0) - early-clobber $sp, $x20, $x19 = frame-destroy LDPXpost $sp, 4 :: (load 8 from %stack.3), (load 8 from %stack.2) + $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.1), (load (s64) from %stack.0) + early-clobber $sp, $x20, $x19 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.3), (load (s64) from %stack.2) RET_ReallyLR implicit $w0 ... diff --git a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir index 683a92fae457c..0ecea1f8ae9ba 100644 --- a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir +++ b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir @@ -14,13 +14,13 @@ body: | liveins: $w0, $w1, $x1 ; CHECK-LABEL: name: 1-ldrwpre-ldrwui-merge ; CHECK: liveins: $w0, $w1, $x1 - ; CHECK: early-clobber $x1, renamable $w0, renamable $w1 = LDPWpre renamable $x1, 5 :: (load 4) - ; CHECK: STPWi renamable $w0, renamable $w1, renamable $x1, 0 :: (store 4) + ; CHECK: early-clobber $x1, renamable $w0, renamable $w1 = LDPWpre renamable $x1, 5 :: (load (s32)) + ; CHECK: STPWi renamable $w0, renamable $w1, renamable $x1, 0 :: (store (s32)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $w0 = LDRWpre killed renamable $x1, 20 :: (load 4) - renamable $w1 = LDRWui renamable $x1, 1 :: (load 4) - STRWui killed renamable $w0, renamable $x1, 0 :: (store 4) - STRWui killed renamable $w1, renamable $x1, 1 :: (store 4) + early-clobber renamable $x1, renamable $w0 = LDRWpre killed renamable $x1, 20 :: (load (s32)) + renamable $w1 = LDRWui renamable $x1, 1 :: (load (s32)) + STRWui killed renamable $w0, renamable $x1, 0 :: (store (s32)) + STRWui killed renamable $w1, renamable $x1, 1 :: (store (s32)) RET undef $lr ... @@ -39,13 +39,13 @@ body: | liveins: $x2, $x3, $x1 ; CHECK-LABEL: name: 2-ldrxpre-ldrxui-merge ; CHECK: liveins: $x1, $x2, $x3 - ; CHECK: early-clobber $x1, renamable $x2, renamable $x3 = LDPXpre renamable $x1, 3 :: (load 8) - ; CHECK: STPXi renamable $x2, renamable $x3, renamable $x1, 0 :: (store 8) + ; CHECK: early-clobber $x1, renamable $x2, renamable $x3 = LDPXpre renamable $x1, 3 :: (load (s64)) + ; CHECK: STPXi renamable $x2, renamable $x3, renamable $x1, 0 :: (store (s64)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $x2 = LDRXpre killed renamable $x1, 24 :: (load 8) - renamable $x3 = LDRXui renamable $x1, 1 :: (load 8) - STRXui killed renamable $x2, renamable $x1, 0 :: (store 8) - STRXui killed renamable $x3, renamable $x1, 1 :: (store 8) + early-clobber renamable $x1, renamable $x2 = LDRXpre killed renamable $x1, 24 :: (load (s64)) + renamable $x3 = LDRXui renamable $x1, 1 :: (load (s64)) + STRXui killed renamable $x2, renamable $x1, 0 :: (store (s64)) + STRXui killed renamable $x3, renamable $x1, 1 :: (store (s64)) RET undef $lr ... @@ -64,14 +64,14 @@ body: | liveins: $s0, $s1, $x1 ; CHECK-LABEL: name: 3-ldrspre-ldrsui-merge ; CHECK: liveins: $s0, $s1, $x1 - ; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 3 :: (load 4) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 3 :: (load (s32)) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load 4) - renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) - STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) - STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32)) + renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) + STRSui killed renamable $s1, renamable $x1, 1 :: (store (s32)) RET undef $lr ... @@ -90,14 +90,14 @@ body: | liveins: $d0, $d1, $x1 ; CHECK-LABEL: name: 4-ldrqdre-ldrdui-merge ; CHECK: liveins: $d0, $d1, $x1 - ; CHECK: early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16 :: (load 8) - ; CHECK: STRDui renamable $d0, renamable $x1, 0 :: (store 8) - ; CHECK: STRDui renamable $d1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 8) + ; CHECK: early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16 :: (load (s64)) + ; CHECK: STRDui renamable $d0, renamable $x1, 0 :: (store (s64)) + ; CHECK: STRDui renamable $d1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s64)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $d0 = LDRDpre killed renamable $x1, 128 :: (load 8) - renamable $d1 = LDRDui renamable $x1, 1 :: (load 8) - STRDui killed renamable $d0, renamable $x1, 0 :: (store 8) - STRDui killed renamable $d1, renamable $x1, 1 :: (store 8) + early-clobber renamable $x1, renamable $d0 = LDRDpre killed renamable $x1, 128 :: (load (s64)) + renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64)) + STRDui killed renamable $d0, renamable $x1, 0 :: (store (s64)) + STRDui killed renamable $d1, renamable $x1, 1 :: (store (s64)) RET undef $lr ... @@ -120,13 +120,13 @@ body: | liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 5-ldrqpre-ldrqui-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 3 :: (load 16) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 3 :: (load (s128)) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load 16) - renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load (s128)) + renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) RET undef $lr ... @@ -149,14 +149,14 @@ body: | liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 6-ldrqui-ldrqpre-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load 16) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load (s128)) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) RET undef $lr ... @@ -179,13 +179,13 @@ body: | liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 7-ldrqpre-ldrqui-max-offset-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 15 :: (load 16) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 15 :: (load (s128)) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 240 :: (load 16) - renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 240 :: (load (s128)) + renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) RET undef $lr ... @@ -208,13 +208,13 @@ body: | liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 8-ldrqpre-ldrqui-min-offset-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, -16 :: (load 16) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, -16 :: (load (s128)) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, -256 :: (load 16) - renamable $q1 = LDRQui renamable $x1, 1 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, -256 :: (load (s128)) + renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) RET undef $lr ... @@ -238,16 +238,16 @@ body: | liveins: $s0, $s1, $x0, $x1 ; CHECK-LABEL: name: 9-ldrspre-ldrsui-mod-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 - ; CHECK: dead early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load 4) - ; CHECK: renamable $x1 = LDRXui renamable $x0, 1 :: (load 8) - ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) - ; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store 4) + ; CHECK: dead early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) + ; CHECK: renamable $x1 = LDRXui renamable $x0, 1 :: (load (s64)) + ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + ; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load 4) - renamable $x1 = LDRXui renamable $x0, 1 :: (load 8) - renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) - STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) - STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32)) + renamable $x1 = LDRXui renamable $x0, 1 :: (load (s64)) + renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) + STRSui killed renamable $s1, renamable $x1, 1 :: (store (s32)) RET undef $lr ... @@ -271,19 +271,19 @@ body: | liveins: $s0, $s1, $x0, $x1 ; CHECK-LABEL: name: 10-ldrspre-ldrsui-used-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 - ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load 4) - ; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load 8) - ; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store 8) - ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) + ; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) + ; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store (s64)) + ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load 4) - renamable $x0 = LDRXui renamable $x1, 1 :: (load 8) - STRXui killed renamable $x0, renamable $x0, 1 :: (store 8) - renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) - STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) - STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32)) + renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) + STRXui killed renamable $x0, renamable $x0, 1 :: (store (s64)) + renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) + STRSui killed renamable $s1, renamable $x1, 1 :: (store (s32)) RET undef $lr ... @@ -306,22 +306,22 @@ body: | liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 11-ldrqpre-ldrqpre-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load 16) - ; CHECK: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 1, implicit $w1 :: (load 16) - ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load 16) - ; CHECK: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 12, implicit $w1 :: (load 16) - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load 16) - ; CHECK: early-clobber renamable $x1, renamable $q1 = LDRQpre renamable $x1, 16, implicit $w1 :: (load 16) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load (s128)) + ; CHECK: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 1, implicit $w1 :: (load (s128)) + ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) + ; CHECK: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 12, implicit $w1 :: (load (s128)) + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) + ; CHECK: early-clobber renamable $x1, renamable $q1 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load 16) - early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 1 :: (load 16) - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 16 :: (load 16) - early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 12 :: (load 16) - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 16 :: (load 16) - early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 16 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load (s128)) + early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 1 :: (load (s128)) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 16 :: (load (s128)) + early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 12 :: (load (s128)) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 16 :: (load (s128)) + early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 16 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) RET undef $lr ... @@ -344,15 +344,15 @@ body: | ; CHECK-LABEL: name: 12-ldrspre-ldrsui-no-merge ; CHECK: liveins: $s0, $s1, $x1 - ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load 4) - ; CHECK: renamable $s1 = LDRSui renamable $x1, 2 :: (load 4) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) + ; CHECK: renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32)) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load 4) - renamable $s1 = LDRSui renamable $x1, 2 :: (load 4) - STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) - STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32)) + renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32)) + STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) + STRSui killed renamable $s1, renamable $x1, 1 :: (store (s32)) RET undef $lr ... @@ -375,15 +375,15 @@ body: | liveins: $q0, $d1, $x1 ; CHECK-LABEL: name: 13-ldrqpre-ldrdui-no-merge ; CHECK: liveins: $d1, $q0, $x1 - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) - ; CHECK: renamable $d1 = LDRDui renamable $x1, 1 :: (load 8) - ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store 16) - ; CHECK: STRDui renamable $d1, renamable $x1, 1 :: (store 8) + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK: renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64)) + ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) + ; CHECK: STRDui renamable $d1, renamable $x1, 1 :: (store (s64)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) - renamable $d1 = LDRDui renamable $x1, 1 :: (load 8) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRDui killed renamable $d1, renamable $x1, 1 :: (store 8) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) + renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRDui killed renamable $d1, renamable $x1, 1 :: (store (s64)) RET undef $lr ... @@ -406,11 +406,11 @@ body: | liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 14-ldrqpre-strqui-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) - ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) RET undef $lr ... @@ -433,13 +433,13 @@ body: | liveins: $q0, $x1 ; CHECK-LABEL: name: 15-ldrqpre-ldrqui-same-dst-reg-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) - ; CHECK: renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) - ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK: renamable $q0 = LDRQui renamable $x1, 1 :: (load (s128)) + ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) - renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) + renamable $q0 = LDRQui renamable $x1, 1 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) RET undef $lr ... @@ -463,14 +463,14 @@ body: | liveins: $q0, $q1, $x1, $x2 ; CHECK-LABEL: name: 16-ldrqpre-ldrqui-diff-base-reg-no-merge ; CHECK: liveins: $q0, $q1, $x1, $x2 - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) - ; CHECK: renamable $q1 = LDRQui renamable $x2, 1 :: (load 16) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK: renamable $q1 = LDRQui renamable $x2, 1 :: (load (s128)) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) - renamable $q1 = LDRQui renamable $x2, 1 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) + renamable $q1 = LDRQui renamable $x2, 1 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) RET undef $lr ... @@ -493,13 +493,13 @@ body: | liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 17-ldrqpre-ldurqi-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 2 :: (load 16) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 2 :: (load (s128)) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) - renamable $q1 = LDURQi renamable $x1, 16 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) + renamable $q1 = LDURQi renamable $x1, 16 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) RET undef $lr ... @@ -522,14 +522,14 @@ body: | liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 18-ldrqpre-ldurqi-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load 16) - ; CHECK: renamable $q1 = LDURQi renamable $x1, 1 :: (load 16) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store 16) + ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK: renamable $q1 = LDURQi renamable $x1, 1 :: (load (s128)) + ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load 16) - renamable $q1 = LDURQi renamable $x1, 1 :: (load 16) - STRQui killed renamable $q0, renamable $x1, 0 :: (store 16) - STRQui killed renamable $q1, renamable $x1, 1 :: (store 16) + early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) + renamable $q1 = LDURQi renamable $x1, 1 :: (load (s128)) + STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) + STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128)) RET undef $lr ... @@ -548,14 +548,14 @@ body: | liveins: $s0, $s1, $x1 ; CHECK-LABEL: name: 19-ldrspre-ldrsui-max-merge ; CHECK: liveins: $s0, $s1, $x1 - ; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 63 :: (load 4) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 63 :: (load (s32)) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 252 :: (load 4) - renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) - STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) - STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 252 :: (load (s32)) + renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) + STRSui killed renamable $s1, renamable $x1, 1 :: (store (s32)) RET undef $lr ... @@ -574,14 +574,14 @@ body: | liveins: $s0, $s1, $x1 ; CHECK-LABEL: name: 20-ldrspre-ldrsui-unaligned-no-merge ; CHECK: liveins: $s0, $s1, $x1 - ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1 :: (load 4) - ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store 4) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store 4) + ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1 :: (load (s32)) + ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) + ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) ; CHECK: RET undef $lr - early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 251 :: (load 4) - renamable $s1 = LDRSui renamable $x1, 1 :: (load 4) - STRSui killed renamable $s0, renamable $x1, 0 :: (store 4) - STRSui killed renamable $s1, renamable $x1, 1 :: (store 4) + early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 251 :: (load (s32)) + renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) + STRSui killed renamable $s1, renamable $x1, 1 :: (store (s32)) RET undef $lr ... diff --git a/llvm/test/CodeGen/AArch64/ldst-miflags.mir b/llvm/test/CodeGen/AArch64/ldst-miflags.mir index 73ca0cae9f47b..89c2c82ca12be 100644 --- a/llvm/test/CodeGen/AArch64/ldst-miflags.mir +++ b/llvm/test/CodeGen/AArch64/ldst-miflags.mir @@ -6,8 +6,8 @@ name: case11 # CHECK-LABEL: name: case11 body: | bb.0: - frame-setup STRWui $w1, $x0, 1 :: (store 4) - $w1 = frame-destroy LDRWui $x0, 1 :: (load 4) + frame-setup STRWui $w1, $x0, 1 :: (store (s32)) + $w1 = frame-destroy LDRWui $x0, 1 :: (load (s32)) ; CHECK: frame-setup STRWui ; CHECK-NOT: frame-setup @@ -22,8 +22,8 @@ name: case12 # CHECK-LABEL: name: case12 body: | bb.0: - frame-setup STRWui $w1, $x0, 1 :: (store 4) - $w2 = frame-destroy LDRHHui $x0, 2 :: (load 2) + frame-setup STRWui $w1, $x0, 1 :: (store (s32)) + $w2 = frame-destroy LDRHHui $x0, 2 :: (load (s16)) ; CHECK: frame-setup STRWui ; CHECK-NOT: frame-setup @@ -38,8 +38,8 @@ name: case13 # CHECK-LABEL: name: case13 body: | bb.0: - frame-setup STRWui $w1, $x0, 1 :: (store 4) - $w2 = frame-destroy LDRHHui $x0, 3 :: (load 2) + frame-setup STRWui $w1, $x0, 1 :: (store (s32)) + $w2 = frame-destroy LDRHHui $x0, 3 :: (load (s16)) ; CHECK: frame-setup STRWui ; CHECK-NOT: frame-setup @@ -54,8 +54,8 @@ name: case2 # CHECK-LABEL: name: case2 body: | bb.0: - frame-setup STRHHui $wzr, $x0, 0 :: (store 4) - frame-destroy STRHHui $wzr, $x0, 1 :: (store 4) + frame-setup STRHHui $wzr, $x0, 0 :: (store (s32)) + frame-destroy STRHHui $wzr, $x0, 1 :: (store (s32)) ; CHECK: frame-setup frame-destroy STRWui RET_ReallyLR @@ -67,8 +67,8 @@ name: case3 body: | bb.0: - $x0 = frame-setup LDRXui $x2, 0 :: (load 8) - $x1 = frame-destroy LDRXui $x2, 1 :: (load 8) + $x0 = frame-setup LDRXui $x2, 0 :: (load (s64)) + $x1 = frame-destroy LDRXui $x2, 1 :: (load (s64)) ; CHECK: frame-setup frame-destroy LDPXi RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/ldst-nopreidx-sp-redzone.mir b/llvm/test/CodeGen/AArch64/ldst-nopreidx-sp-redzone.mir index 8c66937fe8e02..bf2043d07f9e4 100644 --- a/llvm/test/CodeGen/AArch64/ldst-nopreidx-sp-redzone.mir +++ b/llvm/test/CodeGen/AArch64/ldst-nopreidx-sp-redzone.mir @@ -152,8 +152,8 @@ body: | bb.0.bb: successors: %bb.1(0x7ffff800), %bb.2(0x00000800) liveins: $x27, $x28, $lr - early-clobber $sp = frame-setup STPXpre killed $x28, killed $x27, $sp, -4 :: (store 8 into %stack.9), (store 8 into %stack.8) - frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store 8 into %stack.7), (store 8 into %stack.6) + early-clobber $sp = frame-setup STPXpre killed $x28, killed $x27, $sp, -4 :: (store (s64) into %stack.9), (store (s64) into %stack.8) + frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6) $fp = frame-setup ADDXri $sp, 16, 0 $sp = frame-setup SUBXri $sp, 80, 0 frame-setup CFI_INSTRUCTION def_cfa $w29, 16 @@ -163,21 +163,21 @@ body: | frame-setup CFI_INSTRUCTION offset $w28, -32 $x8 = ADRP target-flags(aarch64-page, aarch64-got) @__stack_chk_guard $x8 = LDRXui killed $x8, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @__stack_chk_guard - $x8 = LDRXui killed $x8, 0 :: (dereferenceable invariant load 8 from @__stack_chk_guard) - STURXi killed renamable $x8, $fp, -24 :: (volatile store 8 into %stack.0.StackGuardSlot) - STRXui $xzr, $sp, 1 :: (store 8 into %ir.tmp3) - STRXui $xzr, $sp, 0 :: (store 8 into %ir.tmp4) - renamable $x8 = LDURXi $fp, -24 :: (volatile load 8 from %stack.0.StackGuardSlot) + $x8 = LDRXui killed $x8, 0 :: (dereferenceable invariant load (s64) from @__stack_chk_guard) + STURXi killed renamable $x8, $fp, -24 :: (volatile store (s64) into %stack.0.StackGuardSlot) + STRXui $xzr, $sp, 1 :: (store (s64) into %ir.tmp3) + STRXui $xzr, $sp, 0 :: (store (s64) into %ir.tmp4) + renamable $x8 = LDURXi $fp, -24 :: (volatile load (s64) from %stack.0.StackGuardSlot) $x9 = ADRP target-flags(aarch64-page, aarch64-got) @__stack_chk_guard $x9 = LDRXui killed $x9, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @__stack_chk_guard - $x9 = LDRXui killed $x9, 0 :: (dereferenceable invariant load 8 from @__stack_chk_guard) + $x9 = LDRXui killed $x9, 0 :: (dereferenceable invariant load (s64) from @__stack_chk_guard) $xzr = SUBSXrs killed renamable $x9, killed renamable $x8, 0, implicit-def $nzcv, implicit-def $nzcv Bcc 1, %bb.2, implicit $nzcv bb.1.bb: $sp = frame-destroy ADDXri $sp, 480, 0 - $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.7), (load 8 from %stack.6) - early-clobber $sp, $x28, $x27 = frame-destroy LDPXpost $sp, 4 :: (load 8 from %stack.9), (load 8 from %stack.8) + $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.7), (load (s64) from %stack.6) + early-clobber $sp, $x28, $x27 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.9), (load (s64) from %stack.8) RET undef $lr bb.2.bb: @@ -263,8 +263,8 @@ body: | bb.0.bb: successors: %bb.1(0x7ffff800), %bb.2(0x00000800) liveins: $x27, $x28, $lr - early-clobber $sp = frame-setup STPXpre killed $x28, killed $x27, $sp, -4 :: (store 8 into %stack.9), (store 8 into %stack.8) - frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store 8 into %stack.7), (store 8 into %stack.6) + early-clobber $sp = frame-setup STPXpre killed $x28, killed $x27, $sp, -4 :: (store (s64) into %stack.9), (store (s64) into %stack.8) + frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6) $fp = frame-setup ADDXri $sp, 16, 0 $sp = frame-setup SUBXri $sp, 480, 0 frame-setup CFI_INSTRUCTION def_cfa $w29, 16 @@ -274,21 +274,21 @@ body: | frame-setup CFI_INSTRUCTION offset $w28, -32 $x8 = ADRP target-flags(aarch64-page, aarch64-got) @__stack_chk_guard $x8 = LDRXui killed $x8, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @__stack_chk_guard - $x8 = LDRXui killed $x8, 0 :: (dereferenceable invariant load 8 from @__stack_chk_guard) - STURXi killed renamable $x8, $fp, -24 :: (volatile store 8 into %stack.0.StackGuardSlot) - STRXui $xzr, $sp, 1 :: (store 8 into %ir.tmp3) - STRXui $xzr, $sp, 0 :: (store 8 into %ir.tmp4) - renamable $x8 = LDURXi $fp, -24 :: (volatile load 8 from %stack.0.StackGuardSlot) + $x8 = LDRXui killed $x8, 0 :: (dereferenceable invariant load (s64) from @__stack_chk_guard) + STURXi killed renamable $x8, $fp, -24 :: (volatile store (s64) into %stack.0.StackGuardSlot) + STRXui $xzr, $sp, 1 :: (store (s64) into %ir.tmp3) + STRXui $xzr, $sp, 0 :: (store (s64) into %ir.tmp4) + renamable $x8 = LDURXi $fp, -24 :: (volatile load (s64) from %stack.0.StackGuardSlot) $x9 = ADRP target-flags(aarch64-page, aarch64-got) @__stack_chk_guard $x9 = LDRXui killed $x9, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @__stack_chk_guard - $x9 = LDRXui killed $x9, 0 :: (dereferenceable invariant load 8 from @__stack_chk_guard) + $x9 = LDRXui killed $x9, 0 :: (dereferenceable invariant load (s64) from @__stack_chk_guard) $xzr = SUBSXrs killed renamable $x9, killed renamable $x8, 0, implicit-def $nzcv, implicit-def $nzcv Bcc 1, %bb.2, implicit $nzcv bb.1.bb: $sp = frame-destroy ADDXri $sp, 480, 0 - $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.7), (load 8 from %stack.6) - early-clobber $sp, $x28, $x27 = frame-destroy LDPXpost $sp, 4 :: (load 8 from %stack.9), (load 8 from %stack.8) + $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.7), (load (s64) from %stack.6) + early-clobber $sp, $x28, $x27 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.9), (load (s64) from %stack.8) RET undef $lr bb.2.bb: @@ -376,8 +376,8 @@ body: | bb.0.bb: successors: %bb.1(0x7ffff800), %bb.2(0x00000800) liveins: $x27, $x28, $lr - early-clobber $sp = frame-setup STPXpre killed $x28, killed $x27, $sp, -4 :: (store 8 into %stack.9), (store 8 into %stack.8) - frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store 8 into %stack.7), (store 8 into %stack.6) + early-clobber $sp = frame-setup STPXpre killed $x28, killed $x27, $sp, -4 :: (store (s64) into %stack.9), (store (s64) into %stack.8) + frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6) $fp = frame-setup ADDXri $sp, 16, 0 $sp = frame-setup SUBXri $sp, 480, 0 frame-setup CFI_INSTRUCTION def_cfa $w29, 16 @@ -386,19 +386,19 @@ body: | frame-setup CFI_INSTRUCTION offset $w27, -24 frame-setup CFI_INSTRUCTION offset $w28, -32 $x8 = ADRP target-flags(aarch64-page, aarch64-got) @__stack_chk_guard - STRXui $xzr, $sp, 1 :: (store 8 into %ir.tmp3) - STRXui $xzr, $sp, 0 :: (store 8 into %ir.tmp4) - renamable $x8 = LDURXi $fp, -24 :: (volatile load 8 from %stack.0.StackGuardSlot) + STRXui $xzr, $sp, 1 :: (store (s64) into %ir.tmp3) + STRXui $xzr, $sp, 0 :: (store (s64) into %ir.tmp4) + renamable $x8 = LDURXi $fp, -24 :: (volatile load (s64) from %stack.0.StackGuardSlot) $x9 = ADRP target-flags(aarch64-page, aarch64-got) @__stack_chk_guard $x9 = LDRXui killed $x9, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @__stack_chk_guard - $x9 = LDRXui killed $x9, 0 :: (dereferenceable invariant load 8 from @__stack_chk_guard) + $x9 = LDRXui killed $x9, 0 :: (dereferenceable invariant load (s64) from @__stack_chk_guard) $xzr = SUBSXrs killed renamable $x9, killed renamable $x8, 0, implicit-def $nzcv, implicit-def $nzcv Bcc 1, %bb.2, implicit $nzcv bb.1.bb: $sp = frame-destroy ADDXri $sp, 480, 0 - $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.7), (load 8 from %stack.6) - early-clobber $sp, $x28, $x27 = frame-destroy LDPXpost $sp, 4 :: (load 8 from %stack.9), (load 8 from %stack.8) + $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.7), (load (s64) from %stack.6) + early-clobber $sp, $x28, $x27 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.9), (load (s64) from %stack.8) RET undef $lr bb.2.bb: diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir b/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir index a7a47278a4e9e..9f2680aca970e 100644 --- a/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir +++ b/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir @@ -22,9 +22,9 @@ body: | bb.0.entry: liveins: $x0, $x1 - $w8 = LDRWui $x1, 0 :: (load 4 from %ir.y) - STRWui killed $w8, $x0, 0 :: (store 4 into %ir.x) - $w9 = LDRWui killed $x1, 1 :: (load 4 from %ir.arrayidx2) - STRWui killed $w9, killed $x0, 1 :: (store 4 into %ir.arrayidx3) + $w8 = LDRWui $x1, 0 :: (load (s32) from %ir.y) + STRWui killed $w8, $x0, 0 :: (store (s32) into %ir.x) + $w9 = LDRWui killed $x1, 1 :: (load (s32) from %ir.arrayidx2) + STRWui killed $w9, killed $x0, 1 :: (store (s32) into %ir.arrayidx3) RET undef $lr diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-non-imm-offset.mir b/llvm/test/CodeGen/AArch64/ldst-opt-non-imm-offset.mir index 86dff69c4dfa2..a39f001f44b21 100644 --- a/llvm/test/CodeGen/AArch64/ldst-opt-non-imm-offset.mir +++ b/llvm/test/CodeGen/AArch64/ldst-opt-non-imm-offset.mir @@ -20,8 +20,8 @@ tracksRegLiveness: true body: | bb.0.entry: renamable $x8 = ADRP target-flags(aarch64-page) @g - STRWui $wzr, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store 4 into @g) - renamable $w0 = LDRWui undef renamable $x8, 0 :: (load 4 from `i32* undef`) + STRWui $wzr, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store (s32) into @g) + renamable $w0 = LDRWui undef renamable $x8, 0 :: (load (s32) from `i32* undef`) RET_ReallyLR implicit $w0 ... diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir b/llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir index f42711cbc8fca..dfd5b0da7a683 100644 --- a/llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir +++ b/llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir @@ -15,10 +15,10 @@ name: no-clobber-zr body: | bb.0: liveins: $x0, $x1 - STRXui $xzr, $x0, 0 :: (store 8 into %ir.p) + STRXui $xzr, $x0, 0 :: (store (s64) into %ir.p) dead $xzr = SUBSXri killed $x1, 0, 0, implicit-def $nzcv $w8 = CSINCWr $wzr, $wzr, 1, implicit killed $nzcv - STRXui $xzr, killed $x0, 1 :: (store 8 into %ir.p) + STRXui $xzr, killed $x0, 1 :: (store (s64) into %ir.p) $w0 = ORRWrs $wzr, killed $w8, 0 RET $lr, implicit $w0 ... diff --git a/llvm/test/CodeGen/AArch64/ldst-opt.mir b/llvm/test/CodeGen/AArch64/ldst-opt.mir index 0d583d06434a3..aebd36eb9b736 100644 --- a/llvm/test/CodeGen/AArch64/ldst-opt.mir +++ b/llvm/test/CodeGen/AArch64/ldst-opt.mir @@ -6,7 +6,7 @@ body: | bb.0: liveins: $w1, $x0, $lr - STRWui killed $w1, $x0, 0 :: (store 4) + STRWui killed $w1, $x0, 0 :: (store (s32)) CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 @@ -27,7 +27,7 @@ body: | CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 - $w0 = LDRHHui killed $x0, 1 :: (load 2) + $w0 = LDRHHui killed $x0, 1 :: (load (s16)) RET $lr, implicit $w0 ... @@ -42,7 +42,7 @@ body: | bb.0: liveins: $w1, $x0, $lr - STRWui $w1, $x0, 0 :: (store 4) + STRWui $w1, $x0, 0 :: (store (s32)) CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 @@ -63,7 +63,7 @@ body: | CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 CFI_INSTRUCTION 0 - STRWui killed $w1, killed $x0, 1 :: (store 4) + STRWui killed $w1, killed $x0, 1 :: (store (s32)) RET $lr ... @@ -76,17 +76,17 @@ body: | bb.0: liveins: $w1, $x0, $lr - STRWui $w1, $x0, 0 :: (store 4) + STRWui $w1, $x0, 0 :: (store (s32)) $w2 = COPY $w1 $x3 = COPY $x0 - STRWui killed $w1, killed $x0, 1 :: (store 4) + STRWui killed $w1, killed $x0, 1 :: (store (s32)) RET $lr ... # When merging a lower store with an upper one, we must clear kill flags on # the lower store. # CHECK-LABEL: store-pair-clearkill0 -# CHECK-NOT: STPWi $w1, killed $w1, $x0, 0 :: (store 4) -# CHECK: STPWi $w1, $w1, $x0, 0 :: (store 4) +# CHECK-NOT: STPWi $w1, killed $w1, $x0, 0 :: (store (s32)) +# CHECK: STPWi $w1, $w1, $x0, 0 :: (store (s32)) # CHECK: $w2 = COPY $w1 # CHECK: RET $lr --- @@ -98,15 +98,15 @@ body: | $w1 = MOVi32imm 13 $w2 = MOVi32imm 7 - STRWui $w1, $x0, 1 :: (store 4) + STRWui $w1, $x0, 1 :: (store (s32)) $w2 = COPY killed $w1 - STRWui killed $w2, $x0, 0 :: (store 4) + STRWui killed $w2, $x0, 0 :: (store (s32)) $w1 = MOVi32imm 42 $w2 = MOVi32imm 7 - STRWui $w1, $x0, 0 :: (store 4) + STRWui $w1, $x0, 0 :: (store (s32)) $w2 = COPY killed $w1 - STRWui killed $w2, killed $x0, 1 :: (store 4) + STRWui killed $w2, killed $x0, 1 :: (store (s32)) RET $lr ... @@ -131,15 +131,15 @@ body: | bb.0: liveins: $w1 - STRWui $w1, $sp, 0 :: (store 4) + STRWui $w1, $sp, 0 :: (store (s32)) $wzr = COPY killed $w1 ; killing use of $w1 - $w11 = LDRWui $sp, 0 :: (load 4) + $w11 = LDRWui $sp, 0 :: (load (s32)) HINT 0, implicit $w11 ; some use of $w11 ... # When replaceing the load of a store-load pair with a copy the kill flags # along the way need to be cleared. # CHECK-LABEL: name: store-load-clearkill -# CHECK: STRWui $w1, $sp, 0 :: (store 4) +# CHECK: STRWui $w1, $sp, 0 :: (store (s32)) # CHECK-NOT: COPY killed $w1 # CHECK: $wzr = COPY $w1 # CHECK: $w11 = ORRWrs $wzr, $w1, 0 @@ -151,10 +151,10 @@ body: | bb.0: liveins: $x0, $x2, $lr - STRWui undef $w1, $x0, 0 :: (store 4) - $w0 = LDRBBui $x0, 1 :: (load 2) - STRHHui undef $w3, $x2, 0 :: (store 4) - $w1 = LDRBBui $x2, 0 :: (load 4) + STRWui undef $w1, $x0, 0 :: (store (s32)) + $w0 = LDRBBui $x0, 1 :: (load (s16)) + STRHHui undef $w3, $x2, 0 :: (store (s32)) + $w1 = LDRBBui $x2, 0 :: (load (s32)) RET $lr, implicit $w0 ... # CHECK-LABEL: name: promote-load-from-store-undef @@ -169,9 +169,9 @@ body: | bb.0: liveins: $x0, $lr - STRXui $x0, $sp, 0 :: (store 8) - STRXui killed $x0, $sp, 2 :: (store 8) - $x0 = LDRXui $sp, 0 :: (load 8) + STRXui $x0, $sp, 0 :: (store (s64)) + STRXui killed $x0, $sp, 2 :: (store (s64)) + $x0 = LDRXui $sp, 0 :: (load (s64)) BL &bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit-def $sp RET $lr ... diff --git a/llvm/test/CodeGen/AArch64/loh-use-between-adrp-add.mir b/llvm/test/CodeGen/AArch64/loh-use-between-adrp-add.mir index b04811bb0d089..0921636343636 100644 --- a/llvm/test/CodeGen/AArch64/loh-use-between-adrp-add.mir +++ b/llvm/test/CodeGen/AArch64/loh-use-between-adrp-add.mir @@ -30,7 +30,7 @@ body: | bb.0: liveins: $x11, $x12 renamable $x15 = ADRP target-flags(aarch64-page) @rrdpb - STRXui renamable $x12, killed renamable $x11, 1 :: (store 8) + STRXui renamable $x12, killed renamable $x11, 1 :: (store (s64)) renamable $x11 = ADDXri killed renamable $x15, target-flags(aarch64-pageoff, aarch64-nc) @rrdpb, 0 STRXui renamable $x11, killed renamable $x11, 0 RET undef $lr @@ -47,7 +47,7 @@ liveins: body: | bb.0: liveins: $x11, $x12 - STRXui renamable $x12, killed renamable $x11, 1 :: (store 8) + STRXui renamable $x12, killed renamable $x11, 1 :: (store (s64)) renamable $x15 = ADRP target-flags(aarch64-page) @rrdpb renamable $x11 = ADDXri killed renamable $x15, target-flags(aarch64-pageoff, aarch64-nc) @rrdpb, 0 STRXui renamable $x11, killed renamable $x11, 0 diff --git a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir index 2d85f023f0e45..bdb99f296fcab 100644 --- a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir +++ b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir @@ -99,7 +99,7 @@ body: | ; SINK1: bb.1.for.body.preheader: ; SINK1: successors: %bb.3(0x80000000) ; SINK1: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; SINK1: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; SINK1: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; SINK1: B %bb.3 ; SINK1: bb.2.for.cond.cleanup: ; SINK1: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 @@ -126,7 +126,7 @@ body: | ; SINK2: bb.1.for.body.preheader: ; SINK2: successors: %bb.3(0x80000000) ; SINK2: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; SINK2: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; SINK2: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; SINK2: B %bb.3 ; SINK2: bb.2.for.cond.cleanup: ; SINK2: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 @@ -155,7 +155,7 @@ body: | successors: %bb.3(0x80000000) %8:gpr64common = ADRP target-flags(aarch64-page) @A - %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) B %bb.3 bb.2.for.cond.cleanup: diff --git a/llvm/test/CodeGen/AArch64/loop-sink.mir b/llvm/test/CodeGen/AArch64/loop-sink.mir index 8a4e70aae1e12..091183bf492c3 100644 --- a/llvm/test/CodeGen/AArch64/loop-sink.mir +++ b/llvm/test/CodeGen/AArch64/loop-sink.mir @@ -342,7 +342,7 @@ body: | ; CHECK: bb.1..backedge: ; CHECK: successors: %bb.9(0x09249249), %bb.2(0x76db6db7) ; CHECK: [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY7]], %bb.0, %7, %bb.9 - ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[PHI]], 0 :: (load 1 from %ir.lsr.iv) + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[PHI]], 0 :: (load (s8) from %ir.lsr.iv) ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[LDRBBui]], %subreg.sub_32 ; CHECK: [[COPY8:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32 ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[COPY8]], 50, 0, implicit-def $nzcv @@ -421,7 +421,7 @@ body: | successors: %bb.8(0x09249249), %bb.9(0x76db6db7) %6:gpr64sp = PHI %5, %bb.0, %7, %bb.8 - %17:gpr32 = LDRBBui %6, 0 :: (load 1 from %ir.lsr.iv) + %17:gpr32 = LDRBBui %6, 0 :: (load (s8) from %ir.lsr.iv) %16:gpr64 = SUBREG_TO_REG 0, killed %17, %subreg.sub_32 %18:gpr32sp = COPY %16.sub_32 %19:gpr32 = SUBSWri killed %18, 50, 0, implicit-def $nzcv @@ -557,7 +557,7 @@ body: | ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp ; CHECK: $w0 = COPY [[COPY]] ; CHECK: BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 @@ -590,7 +590,7 @@ body: | successors: %bb.3(0x80000000) %8:gpr64common = ADRP target-flags(aarch64-page) @A - %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp $w0 = COPY %6 BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 @@ -678,7 +678,7 @@ body: | ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp ; CHECK: $w0 = COPY [[LDRWui]] ; CHECK: BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 @@ -711,7 +711,7 @@ body: | successors: %bb.3(0x80000000) %8:gpr64common = ADRP target-flags(aarch64-page) @A - %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp $w0 = COPY %9 BL @use, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0 @@ -806,7 +806,7 @@ body: | ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[LDRWui]] ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: @@ -844,7 +844,7 @@ body: | successors: %bb.3(0x80000000) %11:gpr64common = ADRP target-flags(aarch64-page) @A - %12:gpr32 = LDRWui killed %11, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + %12:gpr32 = LDRWui killed %11, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) %0:gpr32all = COPY %12 B %bb.3 @@ -932,7 +932,7 @@ body: | ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3 @@ -961,7 +961,7 @@ body: | successors: %bb.3(0x80000000) %8:gpr64common = ADRP target-flags(aarch64-page) @A - %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) + %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`) B %bb.3 bb.2.for.cond.cleanup: @@ -1053,13 +1053,13 @@ body: | ; CHECK: B %bb.1 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY2]], 0 :: (load (s32) from %ir.read, !tbaa !0) ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[ADDWri]] ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 - ; CHECK: STRWui [[PHI]], [[COPY1]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: STRWui [[PHI]], [[COPY1]], 0 :: (store (s32) into %ir.write, !tbaa !0) ; CHECK: RET_ReallyLR ; CHECK: bb.3.for.body: ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) @@ -1081,7 +1081,7 @@ body: | %11:gpr32common = COPY $w2 %10:gpr64common = COPY $x1 %9:gpr64common = COPY $x0 - %12:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6) + %12:gpr32common = LDRWui %9, 0 :: (load (s32) from %ir.read, !tbaa !6) %13:gpr32 = SUBSWri %11, 1, 0, implicit-def $nzcv Bcc 11, %bb.2, implicit $nzcv B %bb.1 @@ -1095,7 +1095,7 @@ body: | bb.2.for.cond.cleanup: %2:gpr32 = PHI %11, %bb.0, %6, %bb.3 - STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6) + STRWui %2, %10, 0 :: (store (s32) into %ir.write, !tbaa !6) RET_ReallyLR bb.3.for.body: @@ -1189,15 +1189,15 @@ body: | ; CHECK: B %bb.1 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY3]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY3]], 0 :: (load (s32) from %ir.read, !tbaa !0) ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]] ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 43 - ; CHECK: STRWui killed [[MOVi32imm]], [[COPY1]], 0 :: (store 4 into %ir.store, !tbaa !0) + ; CHECK: STRWui killed [[MOVi32imm]], [[COPY1]], 0 :: (store (s32) into %ir.store, !tbaa !0) ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 - ; CHECK: STRWui [[PHI]], [[COPY2]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: STRWui [[PHI]], [[COPY2]], 0 :: (store (s32) into %ir.write, !tbaa !0) ; CHECK: RET_ReallyLR ; CHECK: bb.3.for.body: ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) @@ -1220,7 +1220,7 @@ body: | %11:gpr64common = COPY $x2 %10:gpr64common = COPY $x1 %9:gpr64common = COPY $x0 - %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6) + %13:gpr32common = LDRWui %9, 0 :: (load (s32) from %ir.read, !tbaa !6) %15:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv Bcc 11, %bb.2, implicit $nzcv B %bb.1 @@ -1231,12 +1231,12 @@ body: | %16:gpr32sp = ADDWri %13, 42, 0 %1:gpr32all = COPY %16 %14:gpr32 = MOVi32imm 43 - STRWui killed %14, %11, 0 :: (store 4 into %ir.store, !tbaa !6) + STRWui killed %14, %11, 0 :: (store (s32) into %ir.store, !tbaa !6) B %bb.3 bb.2.for.cond.cleanup: %2:gpr32 = PHI %12, %bb.0, %6, %bb.3 - STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6) + STRWui %2, %10, 0 :: (store (s32) into %ir.write, !tbaa !6) RET_ReallyLR bb.3.for.body: @@ -1330,15 +1330,15 @@ body: | ; CHECK: B %bb.1 ; CHECK: bb.1.for.body.preheader: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY3]], 0 :: (load 4 from %ir.read, !tbaa !0) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32common = LDRWui [[COPY3]], 0 :: (load (s32) from %ir.read, !tbaa !0) ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[LDRWui]], 42, 0 ; CHECK: [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]] ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 43 - ; CHECK: STRWui killed [[MOVi32imm]], [[COPY3]], 0 :: (store 4 into %ir.read, !tbaa !0) + ; CHECK: STRWui killed [[MOVi32imm]], [[COPY3]], 0 :: (store (s32) into %ir.read, !tbaa !0) ; CHECK: B %bb.3 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.0, %6, %bb.3 - ; CHECK: STRWui [[PHI]], [[COPY2]], 0 :: (store 4 into %ir.write, !tbaa !0) + ; CHECK: STRWui [[PHI]], [[COPY2]], 0 :: (store (s32) into %ir.write, !tbaa !0) ; CHECK: RET_ReallyLR ; CHECK: bb.3.for.body: ; CHECK: successors: %bb.2(0x04000000), %bb.3(0x7c000000) @@ -1361,7 +1361,7 @@ body: | %11:gpr64common = COPY $x2 %10:gpr64common = COPY $x1 %9:gpr64common = COPY $x0 - %13:gpr32common = LDRWui %9, 0 :: (load 4 from %ir.read, !tbaa !6) + %13:gpr32common = LDRWui %9, 0 :: (load (s32) from %ir.read, !tbaa !6) %15:gpr32 = SUBSWri %12, 1, 0, implicit-def $nzcv Bcc 11, %bb.2, implicit $nzcv B %bb.1 @@ -1372,12 +1372,12 @@ body: | %16:gpr32sp = ADDWri %13, 42, 0 %1:gpr32all = COPY %16 %14:gpr32 = MOVi32imm 43 - STRWui killed %14, %9, 0 :: (store 4 into %ir.read, !tbaa !6) + STRWui killed %14, %9, 0 :: (store (s32) into %ir.read, !tbaa !6) B %bb.3 bb.2.for.cond.cleanup: %2:gpr32 = PHI %12, %bb.0, %6, %bb.3 - STRWui %2, %10, 0 :: (store 4 into %ir.write, !tbaa !6) + STRWui %2, %10, 0 :: (store (s32) into %ir.write, !tbaa !6) RET_ReallyLR bb.3.for.body: diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-bti.mir b/llvm/test/CodeGen/AArch64/machine-outliner-bti.mir index 885c326fd91ea..3a6cd273eac57 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-bti.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-bti.mir @@ -31,13 +31,13 @@ body: | HINT 34 - STRWui renamable $w21, renamable $x20, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store 4 into @g) + STRWui renamable $w21, renamable $x20, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store (s32) into @g) BLR renamable $x19, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp - STRWui renamable $w21, renamable $x20, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store 4 into @g) + STRWui renamable $w21, renamable $x20, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store (s32) into @g) BLR renamable $x19, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp - STRWui killed renamable $w21, killed renamable $x20, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store 4 into @g) + STRWui killed renamable $w21, killed renamable $x20, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store (s32) into @g) BLR killed renamable $x19, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp TCRETURNdi @foo, 0, csr_aarch64_aapcs, implicit $sp diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-iterative-2.mir b/llvm/test/CodeGen/AArch64/machine-outliner-iterative-2.mir index 33b0dfe783a89..37b4ca81c0966 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-iterative-2.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-iterative-2.mir @@ -42,7 +42,7 @@ body: | bb.0: liveins: $w0, $lr, $x19 - early-clobber $sp = frame-setup STPXpre killed $x19, killed $lr, $sp, -2 :: (store 8), (store 8) + early-clobber $sp = frame-setup STPXpre killed $x19, killed $lr, $sp, -2 :: (store (s64)), (store (s64)) $w19 = ORRWrs $wzr, killed $w0, 0 $w0 = ORRWri $wzr, 0 $w1 = ORRWri $wzr, 1 @@ -58,7 +58,7 @@ body: | $w0 = ORRWri $wzr, 0 $w1 = ORRWri $wzr, 1 BL @z1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp - early-clobber $sp, $x19, $lr = frame-destroy LDPXpost $sp, 2 :: (load 8), (load 8) + early-clobber $sp, $x19, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64)), (load (s64)) RET undef $lr ... @@ -88,7 +88,7 @@ body: | bb.0: liveins: $w0, $lr, $x19 - early-clobber $sp = frame-setup STPXpre killed $x19, killed $lr, $sp, -2 :: (store 8), (store 8) + early-clobber $sp = frame-setup STPXpre killed $x19, killed $lr, $sp, -2 :: (store (s64)), (store (s64)) $w19 = ORRWrs $wzr, killed $w0, 0 $w0 = ORRWri $wzr, 0 $w1 = ORRWri $wzr, 1 @@ -104,7 +104,7 @@ body: | $w0 = ORRWri $wzr, 0 $w1 = ORRWri $wzr, 1 BL @z1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit-def $sp - early-clobber $sp, $x19, $lr = frame-destroy LDPXpost $sp, 2 :: (load 8), (load 8) + early-clobber $sp, $x19, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64)), (load (s64)) RET undef $lr ... diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir b/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir index c692929d7b882..b7fbdc09c1dd1 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir @@ -64,7 +64,7 @@ body: | bb.0: liveins: $x0, $x19, $lr - early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0) + early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store (s64) into %stack.1), (store (s64) into %stack.0) frame-setup CFI_INSTRUCTION def_cfa_offset 16 frame-setup CFI_INSTRUCTION offset $w19, -8 frame-setup CFI_INSTRUCTION offset $w30, -16 @@ -76,7 +76,7 @@ body: | BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp $x0 = COPY killed renamable $x19 BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp - early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0) + early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.1), (load (s64) from %stack.0) RET_ReallyLR ... @@ -90,7 +90,7 @@ body: | bb.0: liveins: $x0, $x19, $lr - early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0) + early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store (s64) into %stack.1), (store (s64) into %stack.0) frame-setup CFI_INSTRUCTION def_cfa_offset 16 frame-setup CFI_INSTRUCTION offset $w19, -8 frame-setup CFI_INSTRUCTION offset $w30, -16 @@ -102,7 +102,7 @@ body: | BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp $x0 = COPY killed renamable $x19 BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp - early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0) + early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.1), (load (s64) from %stack.0) RET_ReallyLR ... @@ -116,7 +116,7 @@ body: | bb.0: liveins: $x0, $x19, $lr - early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0) + early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store (s64) into %stack.1), (store (s64) into %stack.0) frame-setup CFI_INSTRUCTION def_cfa_offset 16 frame-setup CFI_INSTRUCTION offset $w19, -8 frame-setup CFI_INSTRUCTION offset $w30, -16 @@ -128,7 +128,7 @@ body: | BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp $x0 = COPY killed renamable $x19 BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp - early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0) + early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.1), (load (s64) from %stack.0) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir index 0b86499461bbb..d20053b40991b 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir @@ -72,13 +72,13 @@ body: | $sp = frame-setup SUBXri $sp, 16, 0 renamable $x8 = ADRP target-flags(aarch64-page) @v $x9 = ADDXri $sp, 12, 0 - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) $sp = frame-destroy ADDXri $sp, 16, 0 frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp RET undef $lr @@ -105,13 +105,13 @@ body: | $sp = frame-setup SUBXri $sp, 16, 0 renamable $x8 = ADRP target-flags(aarch64-page) @v $x9 = ADDXri $sp, 12, 0 - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) $sp = frame-destroy ADDXri $sp, 16, 0 frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp RET undef $lr @@ -138,13 +138,13 @@ body: | $sp = frame-setup SUBXri $sp, 16, 0 renamable $x8 = ADRP target-flags(aarch64-page) @v $x9 = ADDXri $sp, 12, 0 - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) $sp = frame-destroy ADDXri $sp, 12, 0 frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp RET undef $lr @@ -161,13 +161,13 @@ body: | $sp = frame-setup SUBXri $sp, 16, 0 renamable $x8 = ADRP target-flags(aarch64-page) @v $x9 = ADDXri $sp, 12, 0 - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) - STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store 8 into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui renamable $x9, renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) + STRXui killed renamable $x9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @v :: (volatile store (s64) into @v) $sp = frame-destroy ADDXri $sp, 12, 0 frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp RET undef $lr diff --git a/llvm/test/CodeGen/AArch64/machine-scheduler.mir b/llvm/test/CodeGen/AArch64/machine-scheduler.mir index b66a2eff09103..09f30337fdd26 100644 --- a/llvm/test/CodeGen/AArch64/machine-scheduler.mir +++ b/llvm/test/CodeGen/AArch64/machine-scheduler.mir @@ -26,9 +26,9 @@ tracksRegLiveness: true body: | bb.0.entry: liveins: $w1, $x0 - $w8 = LDRWui $x0, 1, implicit-def $x8 :: (load 4 from %ir.0) - STRWui killed $w1, $x0, 2 :: (store 4 into %ir.arrayidx1) - $w9 = LDRWui killed $x0, 0, implicit-def $x9 :: (load 4 from %ir.arrayidx19, align 8) + $w8 = LDRWui $x0, 1, implicit-def $x8 :: (load (s32) from %ir.0) + STRWui killed $w1, $x0, 2 :: (store (s32) into %ir.arrayidx1) + $w9 = LDRWui killed $x0, 0, implicit-def $x9 :: (load (s32) from %ir.arrayidx19, align 8) $x0 = ADDXrr killed $x9, killed $x8 RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll index 85bba57b39a47..22d5ba7c0fcd4 100644 --- a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll @@ -8,8 +8,8 @@ ; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]} ; MIR-LABEL: name: test_memcpy -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memcpy: ; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] @@ -30,8 +30,8 @@ define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { } ; MIR-LABEL: name: test_memcpy_inline -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memcpy_inline: ; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] @@ -52,8 +52,8 @@ define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { } ; MIR-LABEL: name: test_memmove -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memmove(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memmove: ; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] @@ -75,8 +75,8 @@ define i32 @test_memmove(i32* nocapture %p, i32* nocapture readonly %q) { ; MIR-LABEL: name: test_memset ; MIR: %2:gpr64 = MOVi64imm -6148914691236517206 -; MIR-NEXT: STRXui %2, %0, 1 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRXui %2, %0, 0 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRXui %2, %0, 1 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRXui %2, %0, 0 :: (store (s64) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memset(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memset: ; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] @@ -95,8 +95,8 @@ define i32 @test_memset(i32* nocapture %p, i32* nocapture readonly %q) { } ; MIR-LABEL: name: test_mempcpy -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_mempcpy(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_mempcpy: ; CHECK-DAG: ldp [[Q0:w[0-9]+]], [[Q1:w[0-9]+]], [x1] diff --git a/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir b/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir index c5d1a8f81fcfa..276bd9fb269dc 100644 --- a/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir +++ b/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir @@ -19,7 +19,7 @@ body: | ; CHECK-NOT: $x2 = LDRXui %stack.0, 0 liveins: $x0 DBG_VALUE %stack.0, 0 - $x2 = LDRXui %stack.0, 0 :: (load 8 from %stack.0) + $x2 = LDRXui %stack.0, 0 :: (load (s64) from %stack.0) $x0 = ADDXrr $x0, $x2 $xzr = SUBSXri $x0, 1, 0, implicit-def $nzcv Bcc 11, %bb.1, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/multi-vector-store-size.ll b/llvm/test/CodeGen/AArch64/multi-vector-store-size.ll index 8764eb447a300..ddc79400cee8a 100644 --- a/llvm/test/CodeGen/AArch64/multi-vector-store-size.ll +++ b/llvm/test/CodeGen/AArch64/multi-vector-store-size.ll @@ -26,11 +26,11 @@ define void @addstx(float* %res, <4 x float>* %a, <4 x float>* %b, <4 x float>* ; The sizes below are conservative. AArch64TargetLowering ; conservatively assumes the entire vector is stored. tail call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, float* %res) -; CHECK: ST2Twov4s {{.*}} :: (store 32 {{.*}}) +; CHECK: ST2Twov4s {{.*}} :: (store (s256) {{.*}}) tail call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, float* %res) -; CHECK: ST3Threev4s {{.*}} :: (store 48 {{.*}}) +; CHECK: ST3Threev4s {{.*}} :: (store (s384) {{.*}}) tail call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, <4 x float> %dr, float* %res) -; CHECK: ST4Fourv4s {{.*}} :: (store 64 {{.*}}) +; CHECK: ST4Fourv4s {{.*}} :: (store (s512) {{.*}}) ret void } @@ -49,11 +49,11 @@ define void @addst1x(float* %res, <4 x float>* %a, <4 x float>* %b, <4 x float> ; The sizes below are conservative. AArch64TargetLowering ; conservatively assumes the entire vector is stored. tail call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, float* %res) -; CHECK: ST1Twov4s {{.*}} :: (store 32 {{.*}}) +; CHECK: ST1Twov4s {{.*}} :: (store (s256) {{.*}}) tail call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, float* %res) -; CHECK: ST1Threev4s {{.*}} :: (store 48 {{.*}}) +; CHECK: ST1Threev4s {{.*}} :: (store (s384) {{.*}}) tail call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, <4 x float> %dr, float* %res) -; CHECK: ST1Fourv4s {{.*}} :: (store 64 {{.*}}) +; CHECK: ST1Fourv4s {{.*}} :: (store (s512) {{.*}}) ret void } @@ -72,11 +72,11 @@ define void @addstxlane(float* %res, <4 x float>* %a, <4 x float>* %b, <4 x flo ; The sizes below are conservative. AArch64TargetLowering ; conservatively assumes the entire vector is stored. tail call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, i64 1, float* %res) -; CHECK: ST2i32 {{.*}} :: (store 32 {{.*}}) +; CHECK: ST2i32 {{.*}} :: (store (s256) {{.*}}) tail call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, i64 1, float* %res) -; CHECK: ST3i32 {{.*}} :: (store 48 {{.*}}) +; CHECK: ST3i32 {{.*}} :: (store (s384) {{.*}}) tail call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, <4 x float> %dr, i64 1, float* %res) -; CHECK: ST4i32 {{.*}} :: (store 64 {{.*}}) +; CHECK: ST4i32 {{.*}} :: (store (s512) {{.*}}) ret void } diff --git a/llvm/test/CodeGen/AArch64/post-ra-machine-sink.mir b/llvm/test/CodeGen/AArch64/post-ra-machine-sink.mir index 5c9f6cce9affd..a0eb3c1979391 100644 --- a/llvm/test/CodeGen/AArch64/post-ra-machine-sink.mir +++ b/llvm/test/CodeGen/AArch64/post-ra-machine-sink.mir @@ -252,7 +252,7 @@ body: | liveins: $w0, $w1 $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv renamable $w19 = COPY $w0 - $w0 = LDRWui $sp, 0 :: (load 4) + $w0 = LDRWui $sp, 0 :: (load (s32)) Bcc 11, %bb.2, implicit $nzcv B %bb.1 @@ -279,7 +279,7 @@ body: | liveins: $w0, $w1 $w1 = SUBSWri $w1, 1, 0, implicit-def $nzcv renamable $w19 = COPY $w0 - STRWui $w1, $x19, 0 :: (store 4) + STRWui $w1, $x19, 0 :: (store (s32)) Bcc 11, %bb.2, implicit $nzcv B %bb.1 diff --git a/llvm/test/CodeGen/AArch64/reg-scavenge-frame.mir b/llvm/test/CodeGen/AArch64/reg-scavenge-frame.mir index dad83ac97f46a..3db69cfb21593 100644 --- a/llvm/test/CodeGen/AArch64/reg-scavenge-frame.mir +++ b/llvm/test/CodeGen/AArch64/reg-scavenge-frame.mir @@ -44,11 +44,11 @@ body: | $x28 = COPY $xzr $fp = COPY $xzr $lr = COPY $xzr - ST1Fourv1d killed $d16_d17_d18_d19, %stack.0 :: (store 32 into %stack.0, align 8) - ; CHECK: STRXui killed $[[SCAVREG:x[0-9]+|fp|lr]], $sp, [[SPOFFSET:[0-9]+]] :: (store 8 into %stack.1) + ST1Fourv1d killed $d16_d17_d18_d19, %stack.0 :: (store (s256) into %stack.0, align 8) + ; CHECK: STRXui killed $[[SCAVREG:x[0-9]+|fp|lr]], $sp, [[SPOFFSET:[0-9]+]] :: (store (s64) into %stack.1) ; CHECK-NEXT: $[[SCAVREG]] = ADDXri $sp, {{[0-9]+}}, 0 - ; CHECK-NEXT: ST1Fourv1d killed $d16_d17_d18_d19, killed $[[SCAVREG]] :: (store 32 into %stack.0, align 8) - ; CHECK-NEXT: $[[SCAVREG]] = LDRXui $sp, [[SPOFFSET]] :: (load 8 from %stack.1) + ; CHECK-NEXT: ST1Fourv1d killed $d16_d17_d18_d19, killed $[[SCAVREG]] :: (store (s256) into %stack.0, align 8) + ; CHECK-NEXT: $[[SCAVREG]] = LDRXui $sp, [[SPOFFSET]] :: (load (s64) from %stack.1) HINT 0, implicit $x0 HINT 0, implicit $x1 diff --git a/llvm/test/CodeGen/AArch64/seqpairspill.mir b/llvm/test/CodeGen/AArch64/seqpairspill.mir index 12748378e6784..0e6c94c44712c 100644 --- a/llvm/test/CodeGen/AArch64/seqpairspill.mir +++ b/llvm/test/CodeGen/AArch64/seqpairspill.mir @@ -7,9 +7,9 @@ body: | bb.0: ; Check the spill/reload sequence for the %0 register ; CHECK: renamable $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALX - ; CHECK-NEXT: STPXi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store 16 into %stack.0, align 8) + ; CHECK-NEXT: STPXi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s128) into %stack.0, align 8) ; CHECK: INLINEASM - ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load 16 from %stack.0, align 8) + ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s128) from %stack.0, align 8) ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]] ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]] %0 : xseqpairsclass = IMPLICIT_DEF @@ -27,9 +27,9 @@ body: | bb.0: ; Check the spill/reload sequence for the %0 register ; CHECK: $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALW - ; CHECK-NEXT: STPWi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store 8 into %stack.0, align 4) + ; CHECK-NEXT: STPWi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s64) into %stack.0, align 4) ; CHECK: INLINEASM - ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load 8 from %stack.0, align 4) + ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s64) from %stack.0, align 4) ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]] ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]] %0 : wseqpairsclass = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/settag-merge.mir b/llvm/test/CodeGen/AArch64/settag-merge.mir index dc2a00c7d3d37..991e0a23e319d 100644 --- a/llvm/test/CodeGen/AArch64/settag-merge.mir +++ b/llvm/test/CodeGen/AArch64/settag-merge.mir @@ -44,11 +44,11 @@ stack: - { id: 3, name: d, size: 16, alignment: 16 } body: | bb.0.entry: - STGOffset $sp, %stack.0.a, 0 :: (store 16 into %ir.a) - STGOffset $sp, %stack.1.b, 0 :: (store 16 into %ir.b) - STGOffset $sp, %stack.2.c, 0 :: (store 16 into %ir.c) + STGOffset $sp, %stack.0.a, 0 :: (store (s128) into %ir.a) + STGOffset $sp, %stack.1.b, 0 :: (store (s128) into %ir.b) + STGOffset $sp, %stack.2.c, 0 :: (store (s128) into %ir.c) $w0 = COPY $wzr - STGOffset $sp, %stack.3.d, 0 :: (store 16 into %ir.d) + STGOffset $sp, %stack.3.d, 0 :: (store (s128) into %ir.d) RET_ReallyLR implicit killed $w0 ... @@ -61,7 +61,7 @@ body: | # CHECK: ST2GOffset $sp, $sp, 6 # CHECK: STGOffset $sp, $sp, 8 # CHECK: STRBBui -# CHECK: ST2GOffset $sp, $sp, 0 +# CHECK: ST2GOffset $sp, $sp, 0 # CHECK: RET_ReallyLR name: stg16_store_128 @@ -71,13 +71,13 @@ stack: - { id: 1, name: b, size: 128, alignment: 16 } body: | bb.0.entry: - STGOffset $sp, %stack.0.a, 0 :: (store 16 into %ir.a) + STGOffset $sp, %stack.0.a, 0 :: (store (s128) into %ir.a) renamable $w8 = MOVi32imm 42 - ST2GOffset $sp, %stack.1.b, 6 :: (store 32 into %ir.b + 96, align 16) - ST2GOffset $sp, %stack.1.b, 4 :: (store 32 into %ir.b + 64, align 16) - ST2GOffset $sp, %stack.1.b, 2 :: (store 32 into %ir.b + 32, align 16) - STRBBui killed renamable $w8, %stack.0.a, 0 :: (store 1 into %ir.a, align 16) - ST2GOffset $sp, %stack.1.b, 0 :: (store 32 into %ir.b, align 16) + ST2GOffset $sp, %stack.1.b, 6 :: (store (s256) into %ir.b + 96, align 16) + ST2GOffset $sp, %stack.1.b, 4 :: (store (s256) into %ir.b + 64, align 16) + ST2GOffset $sp, %stack.1.b, 2 :: (store (s256) into %ir.b + 32, align 16) + STRBBui killed renamable $w8, %stack.0.a, 0 :: (store (s8) into %ir.a, align 16) + ST2GOffset $sp, %stack.1.b, 0 :: (store (s256) into %ir.b, align 16) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-sls-blr.mir b/llvm/test/CodeGen/AArch64/speculation-hardening-sls-blr.mir index cbcc546c89206..81f95348f511e 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening-sls-blr.mir +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-sls-blr.mir @@ -32,16 +32,16 @@ body: | bb.0.entry: liveins: $lr - early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 ; :: (store 8 into %stack.0) + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 ; :: (store (s64) into %stack.0) frame-setup CFI_INSTRUCTION def_cfa_offset 16 frame-setup CFI_INSTRUCTION offset $w30, -16 renamable $x8 = ADRP target-flags(aarch64-page) @a - renamable $x8 = LDRXui killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @a :: (dereferenceable load 8 from `i32 ()** bitcast (i32 (...)** @a to i32 ()**)`) + renamable $x8 = LDRXui killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @a :: (dereferenceable load (s64) from `i32 ()** bitcast (i32 (...)** @a to i32 ()**)`) BLRNoIP killed renamable $x8, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 ; CHECK: BL , csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0, implicit killed $x8 renamable $x8 = ADRP target-flags(aarch64-page) @b - STRWui killed renamable $w0, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @b :: (store 4 into @b) - early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 ; :: (load 8 from %stack.0) + STRWui killed renamable $w0, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @b :: (store (s32) into @b) + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 ; :: (load (s64) from %stack.0) RET undef $lr diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir index 6cfd48529f9fc..624fcc449b6cc 100644 --- a/llvm/test/CodeGen/AArch64/spill-fold.mir +++ b/llvm/test/CodeGen/AArch64/spill-fold.mir @@ -14,7 +14,7 @@ registers: - { id: 0, class: gpr64 } body: | bb.0: - ; CHECK: STRXui $xzr, %stack.0, 0 :: (store 8 into %stack.0) + ; CHECK: STRXui $xzr, %stack.0, 0 :: (store (s64) into %stack.0) undef %0.sub_32 = COPY $wzr INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp $x0 = COPY %0 @@ -28,7 +28,7 @@ registers: - { id: 0, class: gpr64sp } body: | bb.0: - ; CHECK: STRXui $xzr, %stack.0, 0 :: (store 8 into %stack.0) + ; CHECK: STRXui $xzr, %stack.0, 0 :: (store (s64) into %stack.0) undef %0.sub_32 = COPY $wzr INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp $x0 = ADDXri %0, 1, 0 @@ -42,7 +42,7 @@ registers: - { id: 0, class: fpr64 } body: | bb.0: - ; CHECK: STRXui $xzr, %stack.0, 0 :: (store 8 into %stack.0) + ; CHECK: STRXui $xzr, %stack.0, 0 :: (store (s64) into %stack.0) undef %0.ssub = COPY $wzr INLINEASM &nop, 1, 12, implicit-def dead $d0, 12, implicit-def dead $d1, 12, implicit-def dead $d2, 12, implicit-def dead $d3, 12, implicit-def dead $d4, 12, implicit-def dead $d5, 12, implicit-def dead $d6, 12, implicit-def dead $d7, 12, implicit-def dead $d8, 12, implicit-def dead $d9, 12, implicit-def dead $d10, 12, implicit-def dead $d11, 12, implicit-def dead $d12, 12, implicit-def dead $d13, 12, implicit-def dead $d14, 12, implicit-def dead $d15, 12, implicit-def dead $d16, 12, implicit-def dead $d17, 12, implicit-def dead $d18, 12, implicit-def dead $d19, 12, implicit-def dead $d20, 12, implicit-def dead $d21, 12, implicit-def dead $d22, 12, implicit-def dead $d23, 12, implicit-def dead $d24, 12, implicit-def dead $d25, 12, implicit-def dead $d26, 12, implicit-def dead $d27, 12, implicit-def dead $d28, 12, implicit-def dead $d29, 12, implicit-def dead $d30, 12, implicit-def $d31 $x0 = COPY %0 @@ -59,7 +59,7 @@ body: | bb.0: %0 = COPY $wzr INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp - ; CHECK: undef %1.sub_32:gpr64 = LDRWui %stack.0, 0 :: (load 4 from %stack.0) + ; CHECK: undef %1.sub_32:gpr64 = LDRWui %stack.0, 0 :: (load (s32) from %stack.0) undef %1.sub_32 = COPY %0 $x0 = COPY %1 RET_ReallyLR implicit $x0 @@ -75,7 +75,7 @@ body: | bb.0: %0 = COPY $wzr INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp - ; CHECK: undef %1.ssub:fpr64 = LDRSui %stack.0, 0 :: (load 4 from %stack.0) + ; CHECK: undef %1.ssub:fpr64 = LDRSui %stack.0, 0 :: (load (s32) from %stack.0) undef %1.ssub = COPY %0 $d0 = COPY %1 RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/spill-undef.mir b/llvm/test/CodeGen/AArch64/spill-undef.mir index 86cf83df54b68..bca5e9df3f4d2 100644 --- a/llvm/test/CodeGen/AArch64/spill-undef.mir +++ b/llvm/test/CodeGen/AArch64/spill-undef.mir @@ -53,15 +53,15 @@ body: | bb.1: %4 = ADRP target-flags(aarch64-page) @g - %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load 4 from @g) + %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load (s32) from @g) INLINEASM &nop, 1, 12, implicit-def dead early-clobber $x0, 12, implicit-def dead early-clobber $x1, 12, implicit-def dead early-clobber $x2, 12, implicit-def dead early-clobber $x3, 12, implicit-def dead early-clobber $x4, 12, implicit-def dead early-clobber $x5, 12, implicit-def dead early-clobber $x6, 12, implicit-def dead early-clobber $x7, 12, implicit-def dead early-clobber $x8, 12, implicit-def dead early-clobber $x9, 12, implicit-def dead early-clobber $x10, 12, implicit-def dead early-clobber $x11, 12, implicit-def dead early-clobber $x12, 12, implicit-def dead early-clobber $x13, 12, implicit-def dead early-clobber $x14, 12, implicit-def dead early-clobber $x15, 12, implicit-def dead early-clobber $x16, 12, implicit-def dead early-clobber $x17, 12, implicit-def dead early-clobber $x18, 12, implicit-def dead early-clobber $x19, 12, implicit-def dead early-clobber $x20, 12, implicit-def dead early-clobber $x21, 12, implicit-def dead early-clobber $x22, 12, implicit-def dead early-clobber $x23, 12, implicit-def dead early-clobber $x24, 12, implicit-def dead early-clobber $x25, 12, implicit-def dead early-clobber $x26, 12, implicit-def dead early-clobber $x27, 12, implicit-def dead early-clobber $x28, 12, implicit-def dead early-clobber $fp, 12, implicit-def dead early-clobber $lr bb.2: INLINEASM &nop, 1, 12, implicit-def dead early-clobber $x0, 12, implicit-def dead early-clobber $x1, 12, implicit-def dead early-clobber $x2, 12, implicit-def dead early-clobber $x3, 12, implicit-def dead early-clobber $x4, 12, implicit-def dead early-clobber $x5, 12, implicit-def dead early-clobber $x6, 12, implicit-def dead early-clobber $x7, 12, implicit-def dead early-clobber $x8, 12, implicit-def dead early-clobber $x9, 12, implicit-def dead early-clobber $x10, 12, implicit-def dead early-clobber $x11, 12, implicit-def dead early-clobber $x12, 12, implicit-def dead early-clobber $x13, 12, implicit-def dead early-clobber $x14, 12, implicit-def dead early-clobber $x15, 12, implicit-def dead early-clobber $x16, 12, implicit-def dead early-clobber $x17, 12, implicit-def dead early-clobber $x18, 12, implicit-def dead early-clobber $x19, 12, implicit-def dead early-clobber $x20, 12, implicit-def dead early-clobber $x21, 12, implicit-def dead early-clobber $x22, 12, implicit-def dead early-clobber $x23, 12, implicit-def dead early-clobber $x24, 12, implicit-def dead early-clobber $x25, 12, implicit-def dead early-clobber $x26, 12, implicit-def dead early-clobber $x27, 12, implicit-def dead early-clobber $x28, 12, implicit-def dead early-clobber $fp, 12, implicit-def dead early-clobber $lr %6 = ADRP target-flags(aarch64-page) @g $w0 = MOVi32imm 42 - STRWui %8, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 4 into @g) - STRXui %9, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 8 into @g) + STRWui %8, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store (s32) into @g) + STRXui %9, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store (s64) into @g) RET_ReallyLR implicit killed $w0 ... diff --git a/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir b/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir index c0dfcbce1d88a..fabaed184b85a 100644 --- a/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir +++ b/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir @@ -25,10 +25,10 @@ stack: - { id: 2, size: 4, alignment: 4, stack-id: default } body: | bb.0: - %25:gpr64common = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) - STRXui killed %25, %stack.0.StackGuardSlot, 0 :: (volatile store 8 into %stack.0.StackGuardSlot) - %28:gpr64 = LDRXui %stack.0.StackGuardSlot, 0 :: (volatile load 8 from %stack.0.StackGuardSlot) - %29:gpr64common = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) + %25:gpr64common = LOAD_STACK_GUARD :: (dereferenceable invariant load (s64) from @__stack_chk_guard) + STRXui killed %25, %stack.0.StackGuardSlot, 0 :: (volatile store (s64) into %stack.0.StackGuardSlot) + %28:gpr64 = LDRXui %stack.0.StackGuardSlot, 0 :: (volatile load (s64) from %stack.0.StackGuardSlot) + %29:gpr64common = LOAD_STACK_GUARD :: (dereferenceable invariant load (s64) from @__stack_chk_guard) RET_ReallyLR implicit undef $w0, implicit killed %28, implicit killed %29 ... diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir index e89c778c53669..2c24383e6baa3 100644 --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir @@ -20,12 +20,12 @@ # CHECK-LABEL: name: test_dbg_value1 # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1 -# CHECK: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) -# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) +# CHECK: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) +# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: DBG_VALUE $x9, $noreg # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test_dbg_value1 alignment: 4 @@ -41,13 +41,13 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1 - renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) - STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) + STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) DBG_VALUE $x9, $noreg, !7, !DIExpression(DW_OP_plus_uconst, 32), debug-location !9 renamable $x8 = ADDXrr $x8, $x8 - STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4) + STRXui renamable $x8, renamable $x0, 10 :: (store (s64), align 4) RET undef $lr ... @@ -59,9 +59,9 @@ body: | # CHECK: $x8 = ORRXrs $xzr, $x0, 0 # CHECK-NEXT: renamable $x0 = nuw ADDXri $x0, 8, 0 # CHECK-NEXT: DBG_VALUE $x0, $noreg, -# CHECK-NEXT: STRXui killed renamable $x8, renamable $x19, 2 :: (store 8) +# CHECK-NEXT: STRXui killed renamable $x8, renamable $x19, 2 :: (store (s64)) # CHECK-NEXT: $x8 = ADDXrs renamable $x0, killed renamable $x20, 0 -# CHECK-NEXT: STPXi $xzr, renamable $x8, renamable $x19, 0 :: (store 8) +# CHECK-NEXT: STPXi $xzr, renamable $x8, renamable $x19, 0 :: (store (s64)) # CHECK-NEXT: RET undef $lr, implicit $x0 name: test_dbg_value2 alignment: 4 @@ -80,9 +80,9 @@ body: | $x8 = ORRXrs $xzr, $x0, 0 renamable $x0 = nuw ADDXri $x0, 8, 0 DBG_VALUE $x0, $noreg, !7, !DIExpression(), debug-location !9 - STRXui killed renamable $x8, renamable $x19, 2 :: (store 8) + STRXui killed renamable $x8, renamable $x19, 2 :: (store (s64)) $x8 = ADDXrs renamable $x0, killed renamable $x20, 0 - STRXui $xzr, renamable $x19, 0 :: (store 8) - STRXui killed renamable $x8, killed renamable $x19, 1 :: (store 8) + STRXui $xzr, renamable $x19, 0 :: (store (s64)) + STRXui killed renamable $x8, killed renamable $x19, 1 :: (store (s64)) RET undef $lr, implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-ld3.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-ld3.mir index c20bad2db0cbf..ec4437f1a443c 100644 --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-ld3.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-ld3.mir @@ -11,11 +11,11 @@ # CHECK-LABEL: name: test_ld3 # CHECK: bb.0.entry: # CHECK: renamable $x0, renamable $d0_d1_d2 = LD3Threev8b_POST killed renamable $x0, $xzr -# CHECK-NEXT: STPDi renamable $d0, renamable $d1, $fp, -6 :: (store 8) -# CHECK-NEXT: STURDi renamable $d2, $fp, -32, implicit killed $d0_d1_d2 :: (store 8 into %ir.s1) -# CHECK-NEXT: renamable $d0_d1_d2 = LD3Threev8b killed renamable $x0 :: (load 24 from %ir.a1, align 32) -# CHECK-NEXT: STPDi renamable $d0, renamable $d1, $fp, -3 :: (store 8) -# CHECK-NEXT: STURDi renamable $d2, $fp, -8, implicit killed $d0_d1_d2 :: (store 8) +# CHECK-NEXT: STPDi renamable $d0, renamable $d1, $fp, -6 :: (store (s64)) +# CHECK-NEXT: STURDi renamable $d2, $fp, -32, implicit killed $d0_d1_d2 :: (store (s64) into %ir.s1) +# CHECK-NEXT: renamable $d0_d1_d2 = LD3Threev8b killed renamable $x0 :: (load (s192) from %ir.a1, align 32) +# CHECK-NEXT: STPDi renamable $d0, renamable $d1, $fp, -3 :: (store (s64)) +# CHECK-NEXT: STURDi renamable $d2, $fp, -8, implicit killed $d0_d1_d2 :: (store (s64)) # CHECK-NEXT: RET undef $lr # name: test_ld3 @@ -28,13 +28,13 @@ body: | liveins: $x0, $x1, $lr, $fp renamable $x0, renamable $d0_d1_d2 = LD3Threev8b_POST killed renamable $x0, $xzr - STURDi renamable $d0, $fp, -48 :: (store 8) - STURDi renamable $d1, $fp, -40 :: (store 8) - STURDi renamable $d2, $fp, -32, implicit killed $d0_d1_d2 :: (store 8 into %ir.s1) - renamable $d0_d1_d2 = LD3Threev8b killed renamable $x0 :: (load 24 from %ir.a1, align 32) - STURDi renamable $d0, $fp, -24 :: (store 8) - STURDi renamable $d1, $fp, -16 :: (store 8) - STURDi renamable $d2, $fp, -8, implicit killed $d0_d1_d2 :: (store 8) + STURDi renamable $d0, $fp, -48 :: (store (s64)) + STURDi renamable $d1, $fp, -40 :: (store (s64)) + STURDi renamable $d2, $fp, -32, implicit killed $d0_d1_d2 :: (store (s64) into %ir.s1) + renamable $d0_d1_d2 = LD3Threev8b killed renamable $x0 :: (load (s192) from %ir.a1, align 32) + STURDi renamable $d0, $fp, -24 :: (store (s64)) + STURDi renamable $d1, $fp, -16 :: (store (s64)) + STURDi renamable $d2, $fp, -8, implicit killed $d0_d1_d2 :: (store (s64)) RET undef $lr ... diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir index 38e770eeb404d..f1491acc971f1 100644 --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir @@ -13,13 +13,13 @@ # CHECK-LABEL: name: test1 # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1 -# PRESERVED: $x12, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) -# NOPRES: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) -# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) +# PRESERVED: $x12, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# NOPRES: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) +# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# PRESERVED-NEXT: STPXi renamable $x8, killed $x12, renamable $x0, 10 :: (store 8, align 4) -# NOPRES-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store 8, align 4) +# PRESERVED-NEXT: STPXi renamable $x8, killed $x12, renamable $x0, 10 :: (store (s64), align 4) +# NOPRES-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test1 @@ -36,28 +36,28 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1 - renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) - STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) + STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) renamable $x8 = ADDXrr $x8, $x8 - STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4) + STRXui renamable $x8, renamable $x0, 10 :: (store (s64), align 4) RET undef $lr ... # CHECK-LABEL: name: test2 # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13 -# CHECK: renamable $w19 = LDRWui renamable $x0, 0 :: (load 8) -# PRESERVED-NEXT: $x18, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8) -# NOPRES-NEXT: $x15, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 3 :: (load 8) -# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 5 :: (load 8) -# PRESERVED-NEXT: STPXi renamable $x9, killed $x18, renamable $x0, 10 :: (store 8, align 4) -# NOPRES-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store 8, align 4) -# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 200 :: (store 8, align 4) +# CHECK: renamable $w19 = LDRWui renamable $x0, 0 :: (load (s64)) +# PRESERVED-NEXT: $x18, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) +# NOPRES-NEXT: $x15, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 3 :: (load (s64)) +# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 5 :: (load (s64)) +# PRESERVED-NEXT: STPXi renamable $x9, killed $x18, renamable $x0, 10 :: (store (s64), align 4) +# NOPRES-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 200 :: (store (s64), align 4) # CHECK-NEXT: renamable $w8 = ADDWrr $w19, $w19 -# CHECK-NEXT: STRWui renamable $w8, renamable $x0, 100 :: (store 8, align 4) +# CHECK-NEXT: STRWui renamable $w8, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr # name: test2 @@ -74,15 +74,15 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1, $x10, $x11, $x12, $x13 - renamable $w19 = LDRWui renamable $x0, 0 :: (load 8) - renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 3 :: (load 8) - renamable $x14 = LDRXui renamable $x0, 5 :: (load 8) - STRXui renamable $x9, renamable $x0, 10 :: (store 8, align 4) - STRXui renamable killed $x14, renamable $x0, 200 :: (store 8, align 4) + renamable $w19 = LDRWui renamable $x0, 0 :: (load (s64)) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 3 :: (load (s64)) + renamable $x14 = LDRXui renamable $x0, 5 :: (load (s64)) + STRXui renamable $x9, renamable $x0, 10 :: (store (s64), align 4) + STRXui renamable killed $x14, renamable $x0, 200 :: (store (s64), align 4) renamable $w8 = ADDWrr $w19, $w19 - STRWui renamable $w8, renamable $x0, 100 :: (store 8, align 4) + STRWui renamable $w8, renamable $x0, 100 :: (store (s64), align 4) RET undef $lr ... diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir index 21d22dc585f62..ad0fb80f1c0d8 100644 --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -10,11 +10,11 @@ # CHECK-LABEL: name: test1 # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1 -# CHECK: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) -# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) +# CHECK: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) +# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test1 @@ -31,12 +31,12 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1 - renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) - STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) + STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) renamable $x8 = ADDXrr $x8, $x8 - STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4) + STRXui renamable $x8, renamable $x0, 10 :: (store (s64), align 4) RET undef $lr ... @@ -45,11 +45,11 @@ body: | # CHECK-LABEL: bb.0: # CHECK-NEXT: liveins: $x0, $x9, $x1 -# CHECK: $x10, renamable $x8 = LDPXi renamable $x9, 0 :: (load 8) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) -# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) +# CHECK: $x10, renamable $x8 = LDPXi renamable $x9, 0 :: (load (s64)) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64)) +# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test2 @@ -66,12 +66,12 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x9, $x1 - renamable $x9, renamable $x8 = LDPXi renamable $x9, 0 :: (load 8) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) - STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) + renamable $x9, renamable $x8 = LDPXi renamable $x9, 0 :: (load (s64)) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64)) + STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) renamable $x8 = ADDXrr $x8, $x8 - STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4) + STRXui renamable $x8, renamable $x0, 10 :: (store (s64), align 4) RET undef $lr ... @@ -83,9 +83,9 @@ body: | # CHECK: renamable $x8 = MRS 58880 # CHECK-NEXT: renamable $x8 = MOVZXi 15309, 0 # CHECK-NEXT: renamable $x8 = MOVKXi renamable $x8, 26239, 16 -# CHECK-NEXT: STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 8) +# CHECK-NEXT: STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store (s64)) # CHECK-NEXT: renamable $x8 = MRS 55840 -# CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 1, implicit killed $x8 :: (store 8) +# CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 1, implicit killed $x8 :: (store (s64)) # CHECK-NEXT: RET undef $lr # name: test3 @@ -103,9 +103,9 @@ body: | renamable $x8 = MRS 58880, implicit-def $nzcv renamable $x8 = MOVZXi 15309, 0 renamable $x8 = MOVKXi renamable $x8, 26239, 16 - STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 8) + STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store (s64)) renamable $x8 = MRS 55840, implicit-def $nzcv - STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 8) + STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store (s64)) RET undef $lr ... @@ -116,7 +116,7 @@ body: | # CHECK: $x9 = MRS 58880 # CHECK-NEXT: renamable $x8 = MRS 55840 -# CHECK-NEXT: STPXi $x9, killed renamable $x8, killed renamable $x0, 0 :: (store 4) +# CHECK-NEXT: STPXi $x9, killed renamable $x8, killed renamable $x0, 0 :: (store (s32)) # CHECK-NEXT: RET undef $lr name: test4 @@ -135,9 +135,9 @@ body: | liveins: $x0, $x1 renamable $x8 = MRS 58880, implicit-def $nzcv - STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 4) + STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store (s32)) renamable $x8 = MRS 55840, implicit-def $nzcv - STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) + STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store (s32)) RET undef $lr ... @@ -148,7 +148,7 @@ body: | # CHECK: $x9 = MRS 58880 # CHECK-NEXT: renamable $x8 = MRS 55840 -# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store 4) +# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store (s32)) # CHECK-NEXT: RET undef $lr name: test5 @@ -167,9 +167,9 @@ body: | liveins: $x0, $x1 renamable $x8 = MRS 58880, implicit-def $nzcv - STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4) + STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store (s32)) renamable $x8 = MRS 55840, implicit-def $nzcv - STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) + STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store (s32)) RET undef $lr ... @@ -178,11 +178,11 @@ body: | # CHECK-LABEL: bb.0: # CHECK: liveins: $x0, $x1, $q3 -# CHECK: renamable $q9 = LDRQui $x0, 0 :: (load 16) +# CHECK: renamable $q9 = LDRQui $x0, 0 :: (load (s128)) # CHECK-NEXT: renamable $q9 = XTNv8i16 renamable $q9, killed renamable $q3 -# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4) +# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store (s128), align 4) # CHECK-NEXT: renamable $q9 = FADDv2f64 renamable $q9, renamable $q9 -# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 10 :: (store 16, align 4) +# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 10 :: (store (s128), align 4) # CHECK-NEXT: RET undef $lr # XTN has a tied use-def. @@ -201,11 +201,11 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1, $q3 - renamable $q9 = LDRQui $x0, 0 :: (load 16) + renamable $q9 = LDRQui $x0, 0 :: (load (s128)) renamable $q9 = XTNv8i16 renamable $q9, killed renamable $q3 - STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4) + STRQui renamable $q9, renamable $x0, 11 :: (store (s128), align 4) renamable $q9 = FADDv2f64 renamable $q9, renamable $q9 - STRQui renamable $q9, renamable $x0, 10 :: (store 16, align 4) + STRQui renamable $q9, renamable $x0, 10 :: (store (s128), align 4) RET undef $lr ... @@ -216,10 +216,10 @@ body: | # CHECK-NEXT: liveins: $x0, $x1 # CHECK: $sp = frame-setup SUBXri $sp, 64, 0 -# CHECK-NEXT: renamable $x9 = frame-setup LDRXui renamable $x0, 0 :: (load 8) -# CHECK-NEXT: STRXui renamable $x9, $x0, 10 :: (store 8, align 4) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) -# CHECK-NEXT: STRXui renamable $x9, $x0, 11 :: (store 8, align 4) +# CHECK-NEXT: renamable $x9 = frame-setup LDRXui renamable $x0, 0 :: (load (s64)) +# CHECK-NEXT: STRXui renamable $x9, $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) +# CHECK-NEXT: STRXui renamable $x9, $x0, 11 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr # name: test7 @@ -243,10 +243,10 @@ body: | bb.0: liveins: $x0, $x1 $sp = frame-setup SUBXri $sp, 64, 0 - renamable $x9 = frame-setup LDRXui renamable $x0, 0 :: (load 8) - STRXui renamable $x9, $x0, 10 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) - STRXui renamable $x9, $x0, 11 :: (store 8, align 4) + renamable $x9 = frame-setup LDRXui renamable $x0, 0 :: (load (s64)) + STRXui renamable $x9, $x0, 10 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) + STRXui renamable $x9, $x0, 11 :: (store (s64), align 4) RET undef $lr ... --- @@ -257,7 +257,7 @@ body: | # CHECK: renamable $x8 = MRS 58880 # CHECK-NEXT: $w9 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x9 # CHECK-NEXT: renamable $x8 = MRS 55840 -# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store 4) +# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store (s32)) # CHECK-NEXT: RET undef $lr name: test8 @@ -277,9 +277,9 @@ body: | renamable $x8 = MRS 58880, implicit-def $nzcv renamable $w8 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x8 - STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4) + STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store (s32)) renamable $x8 = MRS 55840, implicit-def $nzcv - STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) + STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store (s32)) RET undef $lr ... @@ -291,10 +291,10 @@ body: | # CHECK-LABEL: bb.0: # CHECK: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 -# CHECK: renamable $q9 = LDRQui $x0, 0 :: (load 16) -# CHECK-NEXT: STRQui killed renamable $q9, renamable $x0, 10 :: (store 16, align 4) -# CHECK: renamable $q9 = LDRQui $x0, 1 :: (load 16) -# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4) +# CHECK: renamable $q9 = LDRQui $x0, 0 :: (load (s128)) +# CHECK-NEXT: STRQui killed renamable $q9, renamable $x0, 10 :: (store (s128), align 4) +# CHECK: renamable $q9 = LDRQui $x0, 1 :: (load (s128)) +# CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store (s128), align 4) # CHECK-NEXT: RET undef $lr name: test9 @@ -312,10 +312,10 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 - renamable $q9 = LDRQui $x0, 0 :: (load 16) - STRQui renamable killed $q9, renamable $x0, 10 :: (store 16, align 4) - renamable $q9 = LDRQui $x0, 1 :: (load 16) - STRQui renamable $q9, renamable $x0, 11 :: (store 16, align 4) + renamable $q9 = LDRQui $x0, 0 :: (load (s128)) + STRQui renamable killed $q9, renamable $x0, 10 :: (store (s128), align 4) + renamable $q9 = LDRQui $x0, 1 :: (load (s128)) + STRQui renamable $q9, renamable $x0, 11 :: (store (s128), align 4) RET undef $lr ... @@ -326,10 +326,10 @@ body: | # CHECK: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 # CHECK: renamable $q7 = FADDv2f64 renamable $q7, renamable $q7 -# CHECK-NEXT: STRQui killed renamable $q7, renamable $x0, 100 :: (store 16, align 4) -# CHECK-NEXT: $q7 = LDRQui $x0, 0 :: (load 16) -# CHECK-NEXT: renamable $q9 = LDRQui $x0, 1 :: (load 16) -# CHECK-NEXT: STPQi killed renamable $q9, killed $q7, renamable $x0, 10 :: (store 16, align 4) +# CHECK-NEXT: STRQui killed renamable $q7, renamable $x0, 100 :: (store (s128), align 4) +# CHECK-NEXT: $q7 = LDRQui $x0, 0 :: (load (s128)) +# CHECK-NEXT: renamable $q9 = LDRQui $x0, 1 :: (load (s128)) +# CHECK-NEXT: STPQi killed renamable $q9, killed $q7, renamable $x0, 10 :: (store (s128), align 4) # CHECK-NEXT: RET undef $lr name: test10 @@ -348,11 +348,11 @@ body: | bb.0: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 renamable $q7 = FADDv2f64 renamable $q7, renamable $q7 - STRQui renamable killed $q7, renamable $x0, 100 :: (store 16, align 4) - renamable $q9 = LDRQui $x0, 0 :: (load 16) - STRQui renamable killed $q9, renamable $x0, 11 :: (store 16, align 4) - renamable $q9 = LDRQui $x0, 1 :: (load 16) - STRQui renamable killed $q9, renamable $x0, 10 :: (store 16, align 4) + STRQui renamable killed $q7, renamable $x0, 100 :: (store (s128), align 4) + renamable $q9 = LDRQui $x0, 0 :: (load (s128)) + STRQui renamable killed $q9, renamable $x0, 11 :: (store (s128), align 4) + renamable $q9 = LDRQui $x0, 1 :: (load (s128)) + STRQui renamable killed $q9, renamable $x0, 10 :: (store (s128), align 4) RET undef $lr ... @@ -363,14 +363,14 @@ body: | # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1, $x11, $x12, $x13 -# CHECK: renamable $w10 = LDRWui renamable $x0, 0 :: (load 8) -# CHECK-NEXT: $x15, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 3 :: (load 8) -# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 5 :: (load 8) -# CHECK-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store 8, align 4) -# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 200 :: (store 8, align 4) +# CHECK: renamable $w10 = LDRWui renamable $x0, 0 :: (load (s64)) +# CHECK-NEXT: $x15, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 3 :: (load (s64)) +# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 5 :: (load (s64)) +# CHECK-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 200 :: (store (s64), align 4) # CHECK-NEXT: renamable $w8 = ADDWrr $w10, $w10 -# CHECK-NEXT: STRWui renamable $w8, renamable $x0, 100 :: (store 8, align 4) +# CHECK-NEXT: STRWui renamable $w8, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr # name: test11 @@ -387,15 +387,15 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1, $x11, $x12, $x13 - renamable $w10 = LDRWui renamable $x0, 0 :: (load 8) - renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load 8) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 3 :: (load 8) - renamable $x14 = LDRXui renamable $x0, 5 :: (load 8) - STRXui renamable $x9, renamable $x0, 10 :: (store 8, align 4) - STRXui renamable killed $x14, renamable $x0, 200 :: (store 8, align 4) + renamable $w10 = LDRWui renamable $x0, 0 :: (load (s64)) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 3 :: (load (s64)) + renamable $x14 = LDRXui renamable $x0, 5 :: (load (s64)) + STRXui renamable $x9, renamable $x0, 10 :: (store (s64), align 4) + STRXui renamable killed $x14, renamable $x0, 200 :: (store (s64), align 4) renamable $w8 = ADDWrr $w10, $w10 - STRWui renamable $w8, renamable $x0, 100 :: (store 8, align 4) + STRWui renamable $w8, renamable $x0, 100 :: (store (s64), align 4) RET undef $lr ... @@ -406,12 +406,12 @@ body: | # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1 # -# CHECK: renamable $x10 = LDRXui renamable $x0, 0 :: (load 8) -# CHECK-NEXT: $x11, renamable $x8 = LDPXi renamable $x0, 3 :: (load 8) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) +# CHECK: renamable $x10 = LDRXui renamable $x0, 0 :: (load (s64)) +# CHECK-NEXT: $x11, renamable $x8 = LDPXi renamable $x0, 3 :: (load (s64)) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64)) # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# CHECK-NEXT: STPXi renamable $x8, killed $x11, renamable $x0, 10 :: (store 8, align 4) -# CHECK-NEXT: STPXi killed renamable $x10, renamable $x9, renamable $x0, 20 :: (store 8, align 4) +# CHECK-NEXT: STPXi renamable $x8, killed $x11, renamable $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: STPXi killed renamable $x10, renamable $x9, renamable $x0, 20 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test12 @@ -428,14 +428,14 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1 - renamable $x10 = LDRXui renamable $x0, 0 :: (load 8) - STRXui renamable killed $x10, renamable $x0, 20 :: (store 8, align 4) - renamable $x9, renamable $x8 = LDPXi renamable $x0, 3 :: (load 8) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) + renamable $x10 = LDRXui renamable $x0, 0 :: (load (s64)) + STRXui renamable killed $x10, renamable $x0, 20 :: (store (s64), align 4) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 3 :: (load (s64)) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64)) renamable $x8 = ADDXrr $x8, $x8 - STRXui renamable $x8, renamable $x0, 10 :: (store 8, align 4) - STRXui renamable $x9, renamable $x0, 21 :: (store 8, align 4) + STRXui renamable $x8, renamable $x0, 10 :: (store (s64), align 4) + STRXui renamable $x9, renamable $x0, 21 :: (store (s64), align 4) RET undef $lr ... @@ -445,11 +445,11 @@ body: | # CHECK-LABEL: name: test13 # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13 -# CHECK: $x15, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) -# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 4 :: (load 8) -# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 100 :: (store 8, align 4) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) -# CHECK-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store 8, align 4) +# CHECK: $x15, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 4 :: (load (s64)) +# CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 100 :: (store (s64), align 4) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64)) +# CHECK-NEXT: STPXi renamable $x9, killed $x15, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr # name: test13 @@ -466,12 +466,12 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1, $x10, $x11, $x12, $x13 - renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load 8) - renamable $x14 = LDRXui renamable $x0, 4 :: (load 8) - STRXui renamable killed $x14, renamable $x0, 100 :: (store 8, align 4) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 2 :: (load 8) - STRXui renamable $x9, renamable $x0, 10 :: (store 8) + renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) + renamable $x14 = LDRXui renamable $x0, 4 :: (load (s64)) + STRXui renamable killed $x14, renamable $x0, 100 :: (store (s64), align 4) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64)) + STRXui renamable $x9, renamable $x0, 10 :: (store (s64)) RET undef $lr ... @@ -482,9 +482,9 @@ body: | # CHECK: bb.0: # CHECK-NEXT: liveins: $w8, $fp, $w25 # CHECK: renamable $w8 = KILL killed renamable $w8, implicit-def $x8 -# CHECK-NEXT: STURXi killed renamable $x8, $fp, -40 :: (store 8) +# CHECK-NEXT: STURXi killed renamable $x8, $fp, -40 :: (store (s64)) # CHECK-NEXT: $w8 = ORRWrs $wzr, killed $w25, 0, implicit-def $x8 -# CHECK-NEXT: STURXi killed renamable $x8, $fp, -32 :: (store 8) +# CHECK-NEXT: STURXi killed renamable $x8, $fp, -32 :: (store (s64)) # CHECK-NEXT: RET undef $lr # name: test14_pseudo @@ -503,9 +503,9 @@ body: | liveins: $w8, $fp, $w25 renamable $w8 = KILL killed renamable $w8, implicit-def $x8 - STURXi killed renamable $x8, $fp, -40 :: (store 8) + STURXi killed renamable $x8, $fp, -40 :: (store (s64)) $w8 = ORRWrs $wzr, killed $w25, 0, implicit-def $x8 - STURXi killed renamable $x8, $fp, -32 :: (store 8) + STURXi killed renamable $x8, $fp, -32 :: (store (s64)) RET undef $lr ... --- @@ -514,11 +514,11 @@ body: | # CHECK-LABEL: name: test15_undef_op # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1, $x8 -# CHECK: undef renamable $x10, $x11 = LDPXi renamable $x0, 0 :: (load 8) -# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) -# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) +# CHECK: undef renamable $x10, $x11 = LDPXi renamable $x0, 0 :: (load (s64)) +# CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) +# CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: renamable $x10 = ADDXrr $x10, $x10 -# CHECK-NEXT: STPXi renamable $x10, killed $x11, renamable $x0, 10 :: (store 8, align 4) +# CHECK-NEXT: STPXi renamable $x10, killed $x11, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr # name: test15_undef_op @@ -535,12 +535,12 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1, $x8 - renamable undef $x10, renamable $x9 = LDPXi renamable $x0, 0 :: (load 8) - STRXui renamable killed $x9, renamable $x0, 11 :: (store 8, align 4) - renamable $x9 = LDRXui renamable $x0, 1 :: (load 8) - STRXui renamable $x9, renamable $x0, 100 :: (store 8, align 4) + renamable undef $x10, renamable $x9 = LDPXi renamable $x0, 0 :: (load (s64)) + STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) + renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) + STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) renamable $x10 = ADDXrr $x10, $x10 - STRXui renamable $x10, renamable $x0, 10 :: (store 8, align 4) + STRXui renamable $x10, renamable $x0, 10 :: (store (s64), align 4) RET undef $lr ... diff --git a/llvm/test/CodeGen/AArch64/strpre-str-merge.mir b/llvm/test/CodeGen/AArch64/strpre-str-merge.mir index d7f3e177cf34d..017b5f3cd2a2b 100644 --- a/llvm/test/CodeGen/AArch64/strpre-str-merge.mir +++ b/llvm/test/CodeGen/AArch64/strpre-str-merge.mir @@ -18,10 +18,10 @@ body: | liveins: $w1, $w2, $x0 ; CHECK-LABEL: name: 1-strwpre-strwui-merge ; CHECK: liveins: $w1, $w2, $x0 - ; CHECK: early-clobber $x0 = STPWpre renamable $w1, renamable $w2, renamable $x0, 5 :: (store 4) + ; CHECK: early-clobber $x0 = STPWpre renamable $w1, renamable $w2, renamable $x0, 5 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRWpre killed renamable $w1, killed renamable $x0, 20 :: (store 4) - STRWui killed renamable $w2, renamable $x0, 1 :: (store 4) + early-clobber renamable $x0 = STRWpre killed renamable $w1, killed renamable $x0, 20 :: (store (s32)) + STRWui killed renamable $w2, renamable $x0, 1 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -46,10 +46,10 @@ body: | ; CHECK-LABEL: name: 2-strxpre-strxui-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber $x0 = STPXpre renamable $x1, renamable $x2, renamable $x0, 3 :: (store 8) + ; CHECK: early-clobber $x0 = STPXpre renamable $x1, renamable $x2, renamable $x0, 3 :: (store (s64)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRXpre killed renamable $x1, killed renamable $x0, 24 :: (store 8) - STRXui killed renamable $x2, renamable $x0, 1 :: (store 8) + early-clobber renamable $x0 = STRXpre killed renamable $x1, killed renamable $x0, 24 :: (store (s64)) + STRXui killed renamable $x2, renamable $x0, 1 :: (store (s64)) RET undef $lr, implicit $x0 ... @@ -73,10 +73,10 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 3-strspre-strsui-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, 3 :: (store 4) + ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, 3 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store 4) - STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store (s32)) + STRSui killed renamable $s1, renamable $x0, 1 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -100,10 +100,10 @@ body: | ; CHECK-LABEL: name: 4-strdpre-strdui-merge ; CHECK: liveins: $d0, $d1, $x0 - ; CHECK: early-clobber $x0 = STPDpre renamable $d0, renamable $d1, renamable $x0, 16 :: (store 8) + ; CHECK: early-clobber $x0 = STPDpre renamable $d0, renamable $d1, renamable $x0, 16 :: (store (s64)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRDpre killed renamable $d0, killed renamable $x0, 128 :: (store 8) - STRDui killed renamable $d1, renamable $x0, 1 :: (store 8) + early-clobber renamable $x0 = STRDpre killed renamable $d0, killed renamable $x0, 128 :: (store (s64)) + STRDui killed renamable $d1, renamable $x0, 1 :: (store (s64)) RET undef $lr, implicit $x0 ... @@ -128,10 +128,10 @@ body: | ; CHECK-LABEL: name: 5-strqpre-strqui-merge ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: early-clobber $x0 = STPQpre renamable $q0, renamable $q1, renamable $x0, 3 :: (store 16) + ; CHECK: early-clobber $x0 = STPQpre renamable $q0, renamable $q1, renamable $x0, 3 :: (store (s128)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store 16) - STRQui killed renamable $q1, renamable $x0, 1 :: (store 16) + early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store (s128)) + STRQui killed renamable $q1, renamable $x0, 1 :: (store (s128)) RET undef $lr, implicit $x0 ... @@ -155,11 +155,11 @@ body: | liveins: $q0, $q1, $x0 ; CHECK-LABEL: name: 6-strqui-strqpre-no-merge ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: STRQui renamable $q1, renamable $x0, 1 :: (store 16) - ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0 :: (store 16) + ; CHECK: STRQui renamable $q1, renamable $x0, 1 :: (store (s128)) + ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0 :: (store (s128)) ; CHECK: RET undef $lr, implicit $x0 - STRQui killed renamable $q1, renamable $x0, 1 :: (store 16) - early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store 16) + STRQui killed renamable $q1, renamable $x0, 1 :: (store (s128)) + early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store (s128)) RET undef $lr, implicit $x0 ... @@ -182,10 +182,10 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 7-strspre-strsui-max-offset-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, 63 :: (store 4) + ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, 63 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 252 :: (store 4) - STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 252 :: (store (s32)) + STRSui killed renamable $s1, renamable $x0, 1 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -208,10 +208,10 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 8-strspre-strsui-min-offset-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, -64 :: (store 4) + ; CHECK: early-clobber $x0 = STPSpre renamable $s0, renamable $s1, renamable $x0, -64 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, -256 :: (store 4) - STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, -256 :: (store (s32)) + STRSui killed renamable $s1, renamable $x0, 1 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -235,13 +235,13 @@ body: | liveins: $s0, $s1, $x0, $x1 ; CHECK-LABEL: name: 9-strspre-strsui-mod-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 - ; CHECK: dead early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store 4) - ; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load 8) - ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store 4) + ; CHECK: dead early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store (s32)) + ; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) + ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store 4) - renamable $x0 = LDRXui renamable $x1, 1 :: (load 8) - STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store (s32)) + renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) + STRSui killed renamable $s1, renamable $x0, 1 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -265,15 +265,15 @@ body: | liveins: $s0, $s1, $x0, $x1 ; CHECK-LABEL: name: 10-strspre-strsui-used-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store 4) - ; CHECK: STRXui renamable $x1, renamable $x1, 1 :: (store 4) - ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store (s32)) + ; CHECK: STRXui renamable $x1, renamable $x1, 1 :: (store (s32)) + ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store 4) + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store (s32)) - STRXui killed renamable $x1, renamable $x1, 1 :: (store 4) + STRXui killed renamable $x1, renamable $x1, 1 :: (store (s32)) - STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + STRSui killed renamable $s1, renamable $x0, 1 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -296,19 +296,19 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 11-strspre-strspre-no-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store 4) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 16, implicit $w0 :: (store 4) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0 :: (store 4) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 12, implicit $w0 :: (store 4) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0 :: (store 4) - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 4, implicit $w0 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 16, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 12, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 4, implicit $w0 :: (store (s32)) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s1, renamable $x0, 4, implicit $w0 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 12 :: (store 4) - early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 16 :: (store 4) - early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 4 :: (store 4) - early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 12 :: (store 4) - early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 4 :: (store 4) - early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 4 :: (store 4) + early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 12 :: (store (s32)) + early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 16 :: (store (s32)) + early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 4 :: (store (s32)) + early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 12 :: (store (s32)) + early-clobber renamable $x0 = STRSpre renamable $s0, killed renamable $x0, 4 :: (store (s32)) + early-clobber renamable $x0 = STRSpre renamable $s1, killed renamable $x0, 4 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -335,11 +335,11 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 12-strspre-strsui-no-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store 4) - ; CHECK: STRSui renamable $s1, renamable $x0, 2 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 12, implicit $w0 :: (store (s32)) + ; CHECK: STRSui renamable $s1, renamable $x0, 2 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store 4) - STRSui killed renamable $s1, renamable $x0, 2 :: (store 4) + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 12 :: (store (s32)) + STRSui killed renamable $s1, renamable $x0, 2 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -363,10 +363,10 @@ body: | ; CHECK-LABEL: name: 13-strqpre-sturqi-merge ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: early-clobber $x0 = STPQpre renamable $q0, renamable $q1, renamable $x0, 3 :: (store 16) + ; CHECK: early-clobber $x0 = STPQpre renamable $q0, renamable $q1, renamable $x0, 3 :: (store (s128)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store 16) - STURQi killed renamable $q1, renamable $x0, 16 :: (store 16) + early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store (s128)) + STURQi killed renamable $q1, renamable $x0, 16 :: (store (s128)) RET undef $lr, implicit $x0 ... @@ -390,11 +390,11 @@ body: | liveins: $q0, $q1, $x0 ; CHECK-LABEL: name: 14-strqpre-sturqi-no-merge ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0 :: (store 16) - ; CHECK: STURQi renamable $q1, renamable $x0, 1 :: (store 16) + ; CHECK: early-clobber renamable $x0 = STRQpre renamable $q0, renamable $x0, 48, implicit $w0 :: (store (s128)) + ; CHECK: STURQi renamable $q1, renamable $x0, 1 :: (store (s128)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store 16) - STURQi killed renamable $q1, renamable $x0, 1 :: (store 16) + early-clobber renamable $x0 = STRQpre killed renamable $q0, killed renamable $x0, 48 :: (store (s128)) + STURQi killed renamable $q1, renamable $x0, 1 :: (store (s128)) RET undef $lr, implicit $x0 ... @@ -417,11 +417,11 @@ body: | liveins: $s0, $s1, $x0 ; CHECK-LABEL: name: 15-strspre-strsui-unaligned-no-merge ; CHECK: liveins: $s0, $s1, $x0 - ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 251, implicit $w0 :: (store 4) - ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store 4) + ; CHECK: early-clobber renamable $x0 = STRSpre renamable $s0, renamable $x0, 251, implicit $w0 :: (store (s32)) + ; CHECK: STRSui renamable $s1, renamable $x0, 1 :: (store (s32)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 251 :: (store 4) - STRSui killed renamable $s1, renamable $x0, 1 :: (store 4) + early-clobber renamable $x0 = STRSpre killed renamable $s0, killed renamable $x0, 251 :: (store (s32)) + STRSui killed renamable $s1, renamable $x0, 1 :: (store (s32)) RET undef $lr, implicit $x0 ... @@ -443,11 +443,11 @@ body: | liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 16-strxpre-strxui-same-reg-no-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber renamable $x0 = STRXpre renamable $x1, renamable $x0, 24, implicit $w0 :: (store 8) - ; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store 8) + ; CHECK: early-clobber renamable $x0 = STRXpre renamable $x1, renamable $x0, 24, implicit $w0 :: (store (s64)) + ; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store (s64)) ; CHECK: RET undef $lr, implicit $x0 - early-clobber renamable $x0 = STRXpre killed renamable $x1, killed renamable $x0, 24 :: (store 8) - STRXui renamable $x0, renamable $x0, 1 :: (store 8) + early-clobber renamable $x0 = STRXpre killed renamable $x1, killed renamable $x0, 24 :: (store (s64)) + STRXui renamable $x0, renamable $x0, 1 :: (store (s64)) RET undef $lr, implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/taildup-inst-dup-loc.mir b/llvm/test/CodeGen/AArch64/taildup-inst-dup-loc.mir index 5954801d9926d..275c9a2b96d4b 100644 --- a/llvm/test/CodeGen/AArch64/taildup-inst-dup-loc.mir +++ b/llvm/test/CodeGen/AArch64/taildup-inst-dup-loc.mir @@ -20,7 +20,7 @@ body: | ; CHECK: B %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.9(0x80000000) - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[DEF3]], 0 :: (load 8 from `i64* undef`) + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[DEF3]], 0 :: (load (s64) from `i64* undef`) ; CHECK: B %bb.9 ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) @@ -71,7 +71,7 @@ body: | bb.2: successors: %bb.8(0x80000000) - %8:gpr64 = LDRXui %9, 0 :: (load 8 from `i64* undef`) + %8:gpr64 = LDRXui %9, 0 :: (load (s64) from `i64* undef`) B %bb.8 bb.3: diff --git a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir index 7642c826acff5..276ba08834a0a 100644 --- a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir +++ b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir @@ -53,15 +53,15 @@ body: | ; CHECK: successors: %bb.1, %bb.2 ; CHECK: liveins: $q0, $q22, $q23, $q20, $q21, $q18, $q19, $q16, $q17, $q14, $q15, $q12, $q13, $q10, $q11, $q8, $q9, $lr, $fp ; CHECK: $sp = frame-setup SUBXri $sp, 304, 0 - ; CHECK: frame-setup STPQi killed $q23, killed $q22, $sp, 2 :: (store 16 into %stack.19), (store 16 into %stack.18) - ; CHECK: frame-setup STPQi killed $q21, killed $q20, $sp, 4 :: (store 16 into %stack.17), (store 16 into %stack.16) - ; CHECK: frame-setup STPQi killed $q19, killed $q18, $sp, 6 :: (store 16 into %stack.15), (store 16 into %stack.14) - ; CHECK: frame-setup STPQi killed $q17, killed $q16, $sp, 8 :: (store 16 into %stack.13), (store 16 into %stack.12) - ; CHECK: frame-setup STPQi killed $q15, killed $q14, $sp, 10 :: (store 16 into %stack.11), (store 16 into %stack.10) - ; CHECK: frame-setup STPQi killed $q13, killed $q12, $sp, 12 :: (store 16 into %stack.9), (store 16 into %stack.8) - ; CHECK: frame-setup STPQi killed $q11, killed $q10, $sp, 14 :: (store 16 into %stack.7), (store 16 into %stack.6) - ; CHECK: frame-setup STPQi killed $q9, killed $q8, $sp, 16 :: (store 16 into %stack.5), (store 16 into %stack.4) - ; CHECK: frame-setup STPXi killed $fp, killed $lr, $sp, 36 :: (store 8 into %stack.3), (store 8 into %stack.2) + ; CHECK: frame-setup STPQi killed $q23, killed $q22, $sp, 2 :: (store (s128) into %stack.19), (store (s128) into %stack.18) + ; CHECK: frame-setup STPQi killed $q21, killed $q20, $sp, 4 :: (store (s128) into %stack.17), (store (s128) into %stack.16) + ; CHECK: frame-setup STPQi killed $q19, killed $q18, $sp, 6 :: (store (s128) into %stack.15), (store (s128) into %stack.14) + ; CHECK: frame-setup STPQi killed $q17, killed $q16, $sp, 8 :: (store (s128) into %stack.13), (store (s128) into %stack.12) + ; CHECK: frame-setup STPQi killed $q15, killed $q14, $sp, 10 :: (store (s128) into %stack.11), (store (s128) into %stack.10) + ; CHECK: frame-setup STPQi killed $q13, killed $q12, $sp, 12 :: (store (s128) into %stack.9), (store (s128) into %stack.8) + ; CHECK: frame-setup STPQi killed $q11, killed $q10, $sp, 14 :: (store (s128) into %stack.7), (store (s128) into %stack.6) + ; CHECK: frame-setup STPQi killed $q9, killed $q8, $sp, 16 :: (store (s128) into %stack.5), (store (s128) into %stack.4) + ; CHECK: frame-setup STPXi killed $fp, killed $lr, $sp, 36 :: (store (s64) into %stack.3), (store (s64) into %stack.2) ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 304 ; CHECK: frame-setup CFI_INSTRUCTION offset $w30, -8 ; CHECK: frame-setup CFI_INSTRUCTION offset $w29, -16 @@ -81,38 +81,38 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $b21, -240 ; CHECK: frame-setup CFI_INSTRUCTION offset $b22, -256 ; CHECK: frame-setup CFI_INSTRUCTION offset $b23, -272 - ; CHECK: STRQui $q0, $sp, 0 :: (store 16 into %stack.1) + ; CHECK: STRQui $q0, $sp, 0 :: (store (s128) into %stack.1) ; CHECK: EH_LABEL ; CHECK: BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $q0 - ; CHECK: STRQui killed $q0, $sp, 1 :: (store 16 into %stack.0) + ; CHECK: STRQui killed $q0, $sp, 1 :: (store (s128) into %stack.0) ; CHECK: EH_LABEL ; CHECK: B %bb.1 ; CHECK: bb.1..Lcontinue: - ; CHECK: $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0) - ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2) - ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4) - ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6) - ; CHECK: $q13, $q12 = frame-destroy LDPQi $sp, 12 :: (load 16 from %stack.9), (load 16 from %stack.8) - ; CHECK: $q15, $q14 = frame-destroy LDPQi $sp, 10 :: (load 16 from %stack.11), (load 16 from %stack.10) - ; CHECK: $q17, $q16 = frame-destroy LDPQi $sp, 8 :: (load 16 from %stack.13), (load 16 from %stack.12) - ; CHECK: $q19, $q18 = frame-destroy LDPQi $sp, 6 :: (load 16 from %stack.15), (load 16 from %stack.14) - ; CHECK: $q21, $q20 = frame-destroy LDPQi $sp, 4 :: (load 16 from %stack.17), (load 16 from %stack.16) - ; CHECK: $q23, $q22 = frame-destroy LDPQi $sp, 2 :: (load 16 from %stack.19), (load 16 from %stack.18) + ; CHECK: $q0 = LDRQui $sp, 1 :: (load (s128) from %stack.0) + ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load (s64) from %stack.3), (load (s64) from %stack.2) + ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load (s128) from %stack.5), (load (s128) from %stack.4) + ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load (s128) from %stack.7), (load (s128) from %stack.6) + ; CHECK: $q13, $q12 = frame-destroy LDPQi $sp, 12 :: (load (s128) from %stack.9), (load (s128) from %stack.8) + ; CHECK: $q15, $q14 = frame-destroy LDPQi $sp, 10 :: (load (s128) from %stack.11), (load (s128) from %stack.10) + ; CHECK: $q17, $q16 = frame-destroy LDPQi $sp, 8 :: (load (s128) from %stack.13), (load (s128) from %stack.12) + ; CHECK: $q19, $q18 = frame-destroy LDPQi $sp, 6 :: (load (s128) from %stack.15), (load (s128) from %stack.14) + ; CHECK: $q21, $q20 = frame-destroy LDPQi $sp, 4 :: (load (s128) from %stack.17), (load (s128) from %stack.16) + ; CHECK: $q23, $q22 = frame-destroy LDPQi $sp, 2 :: (load (s128) from %stack.19), (load (s128) from %stack.18) ; CHECK: $sp = frame-destroy ADDXri $sp, 304, 0 ; CHECK: RET_ReallyLR implicit killed $q0 ; CHECK: bb.2..Lunwind (landing-pad): ; CHECK: liveins: $x0, $x1 ; CHECK: EH_LABEL - ; CHECK: $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1) - ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2) - ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4) - ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6) - ; CHECK: $q13, $q12 = frame-destroy LDPQi $sp, 12 :: (load 16 from %stack.9), (load 16 from %stack.8) - ; CHECK: $q15, $q14 = frame-destroy LDPQi $sp, 10 :: (load 16 from %stack.11), (load 16 from %stack.10) - ; CHECK: $q17, $q16 = frame-destroy LDPQi $sp, 8 :: (load 16 from %stack.13), (load 16 from %stack.12) - ; CHECK: $q19, $q18 = frame-destroy LDPQi $sp, 6 :: (load 16 from %stack.15), (load 16 from %stack.14) - ; CHECK: $q21, $q20 = frame-destroy LDPQi $sp, 4 :: (load 16 from %stack.17), (load 16 from %stack.16) - ; CHECK: $q23, $q22 = frame-destroy LDPQi $sp, 2 :: (load 16 from %stack.19), (load 16 from %stack.18) + ; CHECK: $q0 = LDRQui $sp, 0 :: (load (s128) from %stack.1) + ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load (s64) from %stack.3), (load (s64) from %stack.2) + ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load (s128) from %stack.5), (load (s128) from %stack.4) + ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load (s128) from %stack.7), (load (s128) from %stack.6) + ; CHECK: $q13, $q12 = frame-destroy LDPQi $sp, 12 :: (load (s128) from %stack.9), (load (s128) from %stack.8) + ; CHECK: $q15, $q14 = frame-destroy LDPQi $sp, 10 :: (load (s128) from %stack.11), (load (s128) from %stack.10) + ; CHECK: $q17, $q16 = frame-destroy LDPQi $sp, 8 :: (load (s128) from %stack.13), (load (s128) from %stack.12) + ; CHECK: $q19, $q18 = frame-destroy LDPQi $sp, 6 :: (load (s128) from %stack.15), (load (s128) from %stack.14) + ; CHECK: $q21, $q20 = frame-destroy LDPQi $sp, 4 :: (load (s128) from %stack.17), (load (s128) from %stack.16) + ; CHECK: $q23, $q22 = frame-destroy LDPQi $sp, 2 :: (load (s128) from %stack.19), (load (s128) from %stack.18) ; CHECK: $sp = frame-destroy ADDXri $sp, 304, 0 ; CHECK: RET_ReallyLR implicit killed $q0 bb.0 (%ir-block.0): diff --git a/llvm/test/CodeGen/AArch64/wineh-frame-scavenge.mir b/llvm/test/CodeGen/AArch64/wineh-frame-scavenge.mir index 299482713fe79..09799ee266491 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame-scavenge.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame-scavenge.mir @@ -22,11 +22,11 @@ # CHECK-NEXT: stack-id: default, callee-saved-register: '$x19', callee-saved-restored: true, # CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -6 :: (store 8 into %stack.4), (store 8 into %stack.5) +# CHECK: early-clobber $sp = frame-setup STPXpre killed $x19, killed $x20, $sp, -6 :: (store (s64) into %stack.4), (store (s64) into %stack.5) # CHECK-NEXT: frame-setup SEH_SaveRegP_X 19, 20, -48 -# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x22, $sp, 2 :: (store 8 into %stack.2), (store 8 into %stack.3) +# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x22, $sp, 2 :: (store (s64) into %stack.2), (store (s64) into %stack.3) # CHECK-NEXT: frame-setup SEH_SaveRegP 21, 22, 16 -# CHECK-NEXT: frame-setup STRXui killed $x23, $sp, 4 :: (store 8 into %stack.1) +# CHECK-NEXT: frame-setup STRXui killed $x23, $sp, 4 :: (store (s64) into %stack.1) # CHECK-NEXT: frame-setup SEH_SaveReg 23, 32 # CHECK-NEXT: frame-setup SEH_PrologEnd diff --git a/llvm/test/CodeGen/AArch64/wineh-frame4.mir b/llvm/test/CodeGen/AArch64/wineh-frame4.mir index e9135f1bcf599..9fa3b5f7ee7c3 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame4.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame4.mir @@ -4,13 +4,13 @@ # CHECK: early-clobber $sp = frame-setup STRDpre killed $d8, $sp, -16 # CHECK-NEXT: frame-setup SEH_SaveFReg_X 8, -16 -# CHECK-NEXT: frame-setup STRDui killed $d10, $sp, 1 :: (store 8 into %stack.0) +# CHECK-NEXT: frame-setup STRDui killed $d10, $sp, 1 :: (store (s64) into %stack.0) # CHECK-NEXT: frame-setup SEH_SaveFReg 10, 8 # CHECK-NEXT: frame-setup SEH_PrologEnd # CHECK: frame-destroy SEH_EpilogStart -# CHECK-NEXT: $d10 = frame-destroy LDRDui $sp, 1 :: (load 8 from %stack.0) +# CHECK-NEXT: $d10 = frame-destroy LDRDui $sp, 1 :: (load (s64) from %stack.0) # CHECK-NEXT: frame-destroy SEH_SaveFReg 10, 8 -# CHECK-NEXT: early-clobber $sp, $d8 = frame-destroy LDRDpost $sp, 16 :: (load 8 from %stack.1) +# CHECK-NEXT: early-clobber $sp, $d8 = frame-destroy LDRDpost $sp, 16 :: (load (s64) from %stack.1) # CHECK-NEXT: frame-destroy SEH_SaveFReg_X 8, -16 # CHECK-NEXT: frame-destroy SEH_EpilogEnd # CHECK-NEXT: RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/wineh-frame6.mir b/llvm/test/CodeGen/AArch64/wineh-frame6.mir index 4f8f130549eaa..535d805d03084 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame6.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame6.mir @@ -122,24 +122,24 @@ body: | bb.0.entry: liveins: $w0, $w1, $w2, $w3 - STRWui killed renamable $w3, %stack.0.c.addr, 0 :: (store 4 into %ir.c.addr) - STRWui killed renamable $w2, %stack.1.b.addr, 0 :: (store 4 into %ir.b.addr) - STRWui killed renamable $w1, %stack.2.idx.addr, 0 :: (store 4 into %ir.idx.addr) - STRWui killed renamable $w0, %stack.3.n.addr, 0 :: (store 4 into %ir.n.addr) - renamable $x8 = LDRSWui %stack.3.n.addr, 0 :: (dereferenceable load 4 from %ir.n.addr) + STRWui killed renamable $w3, %stack.0.c.addr, 0 :: (store (s32) into %ir.c.addr) + STRWui killed renamable $w2, %stack.1.b.addr, 0 :: (store (s32) into %ir.b.addr) + STRWui killed renamable $w1, %stack.2.idx.addr, 0 :: (store (s32) into %ir.idx.addr) + STRWui killed renamable $w0, %stack.3.n.addr, 0 :: (store (s32) into %ir.n.addr) + renamable $x8 = LDRSWui %stack.3.n.addr, 0 :: (dereferenceable load (s32) from %ir.n.addr) ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp renamable $x8 = nuw ADDXri killed renamable $x8, 15, 0 renamable $x8 = UBFMXri killed renamable $x8, 4, 63 $x15 = COPY renamable $x8 - STRXui killed $x8, %stack.6, 0 :: (store 8 into %stack.6) + STRXui killed $x8, %stack.6, 0 :: (store (s64) into %stack.6) BL &__chkstk, csr_aarch64_stackprobe_windows, implicit-def dead $lr, implicit $sp, implicit killed $x15 renamable $x8 = COPY $sp - $x15 = LDRXui %stack.6, 0 :: (load 8 from %stack.6) + $x15 = LDRXui %stack.6, 0 :: (load (s64) from %stack.6) renamable $x8 = SUBSXrs killed renamable $x8, killed renamable $x15, 4, implicit-def dead $nzcv $sp = COPY renamable $x8 ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - STRXui killed renamable $x8, %stack.4.a, 0 :: (store 8 into %ir.a) - renamable $x0 = LDRXui %stack.4.a, 0 :: (dereferenceable load 8 from %ir.a) + STRXui killed renamable $x8, %stack.4.a, 0 :: (store (s64) into %ir.a) + renamable $x0 = LDRXui %stack.4.a, 0 :: (dereferenceable load (s64) from %ir.a) ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp BL @"?init@@YAXPEAH@Z", csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $x0, implicit-def $sp ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp diff --git a/llvm/test/CodeGen/AArch64/wineh-frame7.mir b/llvm/test/CodeGen/AArch64/wineh-frame7.mir index b52cec2529a42..e0c6759f5851c 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame7.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame7.mir @@ -139,18 +139,18 @@ body: | renamable $x8 = ADDXri %stack.1.i.addr, 0, 0 renamable $w9 = MOVi32imm 2 - STRWui killed renamable $w0, renamable $x8, 0 :: (store 4 into %ir.i.addr) - renamable $w0 = LDRWui renamable $x8, 0 :: (load 4 from %ir.i.addr) + STRWui killed renamable $w0, renamable $x8, 0 :: (store (s32) into %ir.i.addr) + renamable $w0 = LDRWui renamable $x8, 0 :: (load (s32) from %ir.i.addr) renamable $w0 = ADDWri killed renamable $w0, 2, 0 - STRWui killed renamable $w0, %stack.3.a, 0 :: (store 4 into %ir.a) + STRWui killed renamable $w0, %stack.3.a, 0 :: (store (s32) into %ir.a) ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - STRXui killed $x8, %stack.5, 0 :: (store 8 into %stack.5) - STRWui killed $w9, %stack.6, 0 :: (store 4 into %stack.6) + STRXui killed $x8, %stack.5, 0 :: (store (s64) into %stack.5) + STRWui killed $w9, %stack.6, 0 :: (store (s32) into %stack.6) BL @"?func2@@YAHXZ", csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0 ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - $x8 = LDRXui %stack.5, 0 :: (load 8 from %stack.5) - renamable $w9 = LDRWui killed renamable $x8, 0 :: (load 4 from %ir.i.addr) - $w10 = LDRWui %stack.6, 0 :: (load 4 from %stack.6) + $x8 = LDRXui %stack.5, 0 :: (load (s64) from %stack.5) + renamable $w9 = LDRWui killed renamable $x8, 0 :: (load (s32) from %ir.i.addr) + $w10 = LDRWui %stack.6, 0 :: (load (s32) from %stack.6) $wzr = SUBSWrr killed renamable $w9, killed renamable $w10, implicit-def $nzcv renamable $w9 = CSINCWr $wzr, $wzr, 13, implicit $nzcv TBNZW killed renamable $w9, 0, %bb.2 @@ -162,8 +162,8 @@ body: | ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp BL @"?func2@@YAHXZ", csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0 ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - $x8 = LDRXui %stack.5, 0 :: (load 8 from %stack.5) - STRWui killed renamable $w0, killed renamable $x8, 1 :: (store 4 into %ir.retval) + $x8 = LDRXui %stack.5, 0 :: (load (s64) from %stack.5) + STRWui killed renamable $w0, killed renamable $x8, 1 :: (store (s32) into %ir.retval) B %bb.4 bb.3.if.else: @@ -174,12 +174,12 @@ body: | $x0 = COPY killed renamable $x8 BL @"?func3@@YAHPEAH@Z", csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit killed $x0, implicit-def $w0 ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - $x8 = LDRXui %stack.5, 0 :: (load 8 from %stack.5) - STRWui killed renamable $w0, killed renamable $x8, 1 :: (store 4 into %ir.retval) + $x8 = LDRXui %stack.5, 0 :: (load (s64) from %stack.5) + STRWui killed renamable $w0, killed renamable $x8, 1 :: (store (s32) into %ir.retval) bb.4.return: - $x8 = LDRXui %stack.5, 0 :: (load 8 from %stack.5) - renamable $w0 = LDRWui killed renamable $x8, 1 :: (load 4 from %ir.retval) + $x8 = LDRXui %stack.5, 0 :: (load (s64) from %stack.5) + renamable $w0 = LDRWui killed renamable $x8, 1 :: (load (s32) from %ir.retval) RET_ReallyLR implicit killed $w0 ... diff --git a/llvm/test/CodeGen/AArch64/wineh-frame8.mir b/llvm/test/CodeGen/AArch64/wineh-frame8.mir index 3c3befaeeb098..86a4ceff683b2 100644 --- a/llvm/test/CodeGen/AArch64/wineh-frame8.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame8.mir @@ -78,10 +78,10 @@ body: | liveins: $w0 renamable $w8 = MOVi32imm 2 - STRWui killed renamable $w0, %stack.0.a.addr, 0 :: (store 4 into %ir.a.addr) - STRWui killed renamable $w8, %stack.1.b, 0 :: (store 4 into %ir.b) - renamable $w8 = LDRWui %stack.1.b, 0 :: (load 4 from %ir.b) - renamable $w0 = LDRWui %stack.0.a.addr, 0 :: (load 4 from %ir.a.addr) + STRWui killed renamable $w0, %stack.0.a.addr, 0 :: (store (s32) into %ir.a.addr) + STRWui killed renamable $w8, %stack.1.b, 0 :: (store (s32) into %ir.b) + renamable $w8 = LDRWui %stack.1.b, 0 :: (load (s32) from %ir.b) + renamable $w0 = LDRWui %stack.0.a.addr, 0 :: (load (s32) from %ir.a.addr) renamable $w0 = nsw ADDWrr killed renamable $w8, killed renamable $w0 RET_ReallyLR implicit killed $w0 diff --git a/llvm/test/CodeGen/AArch64/wineh1.mir b/llvm/test/CodeGen/AArch64/wineh1.mir index d82e4bce7d194..5aec175181d3f 100644 --- a/llvm/test/CodeGen/AArch64/wineh1.mir +++ b/llvm/test/CodeGen/AArch64/wineh1.mir @@ -112,17 +112,17 @@ stack: body: | bb.0.entry: liveins: $x0, $x1, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 - early-clobber $sp = frame-setup STPXpre killed $x27, killed $x28, $sp, -10 :: (store 8 into %stack.8), (store 8 into %stack.9) + early-clobber $sp = frame-setup STPXpre killed $x27, killed $x28, $sp, -10 :: (store (s64) into %stack.8), (store (s64) into %stack.9) frame-setup SEH_SaveRegP_X 27, 28, -80 - frame-setup STPXi killed $x25, killed $x26, $sp, 2 :: (store 8 into %stack.6), (store 8 into %stack.7) + frame-setup STPXi killed $x25, killed $x26, $sp, 2 :: (store (s64) into %stack.6), (store (s64) into %stack.7) frame-setup SEH_SaveRegP 25, 26, 16 - frame-setup STPXi killed $x23, killed $x24, $sp, 4 :: (store 8 into %stack.4), (store 8 into %stack.5) + frame-setup STPXi killed $x23, killed $x24, $sp, 4 :: (store (s64) into %stack.4), (store (s64) into %stack.5) frame-setup SEH_SaveRegP 23, 24, 32 - frame-setup STRXui killed $x21, $sp, 6 :: (store 8 into %stack.2) + frame-setup STRXui killed $x21, $sp, 6 :: (store (s64) into %stack.2) frame-setup SEH_SaveReg 21, 48 - frame-setup STRXui killed $x22, $sp, 7 :: (store 8 into %stack.3) + frame-setup STRXui killed $x22, $sp, 7 :: (store (s64) into %stack.3) frame-setup SEH_SaveReg 22, 56 - frame-setup STPXi killed $x19, killed $x20, $sp, 8 :: (store 8 into %stack.0), (store 8 into %stack.1) + frame-setup STPXi killed $x19, killed $x20, $sp, 8 :: (store (s64) into %stack.0), (store (s64) into %stack.1) frame-setup SEH_SaveRegP 19, 20, 64 frame-setup SEH_PrologEnd $x19 = ADDXrr $x0, killed $x1 @@ -136,19 +136,19 @@ body: | $x27 = ADDXrr $x26, killed $x25 $x28 = ADDXrr $x27, killed $x26 frame-destroy SEH_EpilogStart - $x19, $x20 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.0), (load 8 from %stack.1) + $x19, $x20 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.0), (load (s64) from %stack.1) frame-destroy SEH_SaveRegP 19, 20, 64 - $x21 = frame-destroy LDRXui $sp, 6 :: (load 8 from %stack.2) + $x21 = frame-destroy LDRXui $sp, 6 :: (load (s64) from %stack.2) frame-destroy SEH_SaveReg 21, 48 $x0 = COPY $x28 frame-destroy SEH_Nop - $x21 = frame-destroy LDRXui $sp, 6 :: (load 8 from %stack.2) + $x21 = frame-destroy LDRXui $sp, 6 :: (load (s64) from %stack.2) frame-destroy SEH_SaveReg 22, 56 - $x23, $x24 = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.4), (load 8 from %stack.5) + $x23, $x24 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.4), (load (s64) from %stack.5) frame-destroy SEH_SaveRegP 23, 24, 32 - $x25, $x26 = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.6), (load 8 from %stack.7) + $x25, $x26 = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.6), (load (s64) from %stack.7) frame-destroy SEH_SaveRegP 25, 26, 16 - early-clobber $sp, $x27, $x28 = frame-destroy LDPXpost $sp, 10 :: (load 8 from %stack.8), (load 8 from %stack.9) + early-clobber $sp, $x27, $x28 = frame-destroy LDPXpost $sp, 10 :: (load (s64) from %stack.8), (load (s64) from %stack.9) frame-destroy SEH_SaveRegP_X 27, 28, -80 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/wineh2.mir b/llvm/test/CodeGen/AArch64/wineh2.mir index 79d022df26923..81c73460dd9ef 100644 --- a/llvm/test/CodeGen/AArch64/wineh2.mir +++ b/llvm/test/CodeGen/AArch64/wineh2.mir @@ -124,23 +124,23 @@ body: | bb.0.entry: liveins: $x0, $x1, $d0, $d1, $d8, $d9, $d10, $d11, $d12, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28 - early-clobber $sp = frame-setup STRDpre killed $d12, $sp, -128 :: (store 8 into %stack.14) + early-clobber $sp = frame-setup STRDpre killed $d12, $sp, -128 :: (store (s64) into %stack.14) frame-setup SEH_SaveFReg_X 12, -128 - frame-setup STPDi killed $d10, killed $d11, $sp, 2 :: (store 8 into %stack.12), (store 8 into %stack.13) + frame-setup STPDi killed $d10, killed $d11, $sp, 2 :: (store (s64) into %stack.12), (store (s64) into %stack.13) frame-setup SEH_SaveFRegP 10, 11, 16 - frame-setup STRDui killed $d8, $sp, 4 :: (store 8 into %stack.10) + frame-setup STRDui killed $d8, $sp, 4 :: (store (s64) into %stack.10) frame-setup SEH_SaveFReg 8, 32 - frame-setup STRDui killed $d9, $sp, 5 :: (store 8 into %stack.11) + frame-setup STRDui killed $d9, $sp, 5 :: (store (s64) into %stack.11) frame-setup SEH_SaveFReg 9, 40 - frame-setup STPXi killed $x27, killed $x28, $sp, 6 :: (store 8 into %stack.8), (store 8 into %stack.9) + frame-setup STPXi killed $x27, killed $x28, $sp, 6 :: (store (s64) into %stack.8), (store (s64) into %stack.9) frame-setup SEH_SaveRegP 27, 28, 48 - frame-setup STPXi killed $x25, killed $x26, $sp, 8 :: (store 8 into %stack.6), (store 8 into %stack.7) + frame-setup STPXi killed $x25, killed $x26, $sp, 8 :: (store (s64) into %stack.6), (store (s64) into %stack.7) frame-setup SEH_SaveRegP 25, 26, 64 - frame-setup STPXi killed $x23, killed $x24, $sp, 10 :: (store 8 into %stack.4), (store 8 into %stack.5) + frame-setup STPXi killed $x23, killed $x24, $sp, 10 :: (store (s64) into %stack.4), (store (s64) into %stack.5) frame-setup SEH_SaveRegP 23, 24, 80 - frame-setup STPXi killed $x21, killed $x22, $sp, 12 :: (store 8 into %stack.2), (store 8 into %stack.3) + frame-setup STPXi killed $x21, killed $x22, $sp, 12 :: (store (s64) into %stack.2), (store (s64) into %stack.3) frame-setup SEH_SaveRegP 21, 22, 96 - frame-setup STPXi killed $x19, killed $x20, $sp, 14 :: (store 8 into %stack.0), (store 8 into %stack.1) + frame-setup STPXi killed $x19, killed $x20, $sp, 14 :: (store (s64) into %stack.0), (store (s64) into %stack.1) frame-setup SEH_SaveRegP 19, 20, 112 frame-setup SEH_PrologEnd $x19 = ADDXrr $x0, killed $x1 @@ -161,23 +161,23 @@ body: | $x0 = COPY $d12 $x0 = ADDXrr $x0, killed $x28 frame-destroy SEH_EpilogStart - $x19, $x20 = frame-destroy LDPXi $sp, 14 :: (load 8 from %stack.0), (load 8 from %stack.1) + $x19, $x20 = frame-destroy LDPXi $sp, 14 :: (load (s64) from %stack.0), (load (s64) from %stack.1) frame-destroy SEH_SaveRegP 19, 20, 112 - $x21, $x22 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.2), (load 8 from %stack.3) + $x21, $x22 = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.2), (load (s64) from %stack.3) frame-destroy SEH_SaveRegP 21, 22, 96 - $x23, $x24 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.4), (load 8 from %stack.5) + $x23, $x24 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.4), (load (s64) from %stack.5) frame-destroy SEH_SaveRegP 23, 24, 80 - $x25, $x26 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.6), (load 8 from %stack.7) + $x25, $x26 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.6), (load (s64) from %stack.7) frame-destroy SEH_SaveRegP 25, 26, 64 - $x27, $x28 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.8), (load 8 from %stack.9) + $x27, $x28 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.8), (load (s64) from %stack.9) frame-destroy SEH_SaveRegP 27, 28, 48 - $d8 = frame-destroy LDRDui $sp, 4 :: (load 8 from %stack.10) + $d8 = frame-destroy LDRDui $sp, 4 :: (load (s64) from %stack.10) frame-destroy SEH_SaveFReg 8, 32 - $d9 = frame-destroy LDRDui $sp, 5 :: (load 8 from %stack.11) + $d9 = frame-destroy LDRDui $sp, 5 :: (load (s64) from %stack.11) frame-destroy SEH_SaveFReg 9, 40 - $d10, $d11 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.12), (load 8 from %stack.13) + $d10, $d11 = frame-destroy LDPDi $sp, 2 :: (load (s64) from %stack.12), (load (s64) from %stack.13) frame-destroy SEH_SaveFRegP 10, 11, 16 - early-clobber $sp, $d12 = frame-destroy LDRDpost $sp, 128 :: (load 8 from %stack.14) + early-clobber $sp, $d12 = frame-destroy LDRDpost $sp, 128 :: (load (s64) from %stack.14) frame-destroy SEH_SaveFReg_X 12, -128 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/wineh3.mir b/llvm/test/CodeGen/AArch64/wineh3.mir index d1ffa4aedc085..bbda3415d1510 100644 --- a/llvm/test/CodeGen/AArch64/wineh3.mir +++ b/llvm/test/CodeGen/AArch64/wineh3.mir @@ -103,19 +103,19 @@ body: | bb.0.entry: liveins: $x0, $x1, $d0, $d1, $d10, $d11, $d8, $d9, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 - early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store 8 into %stack.12), (store 8 into %stack.13) + early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store (s64) into %stack.12), (store (s64) into %stack.13) frame-setup SEH_SaveFRegP_X 10, 11, -112 - frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store 8 into %stack.10), (store 8 into %stack.11) + frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store (s64) into %stack.10), (store (s64) into %stack.11) frame-setup SEH_SaveFRegP 8, 9, 16 - frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store 8 into %stack.8), (store 8 into %stack.9) + frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store (s64) into %stack.8), (store (s64) into %stack.9) frame-setup SEH_SaveRegP 27, 28, 32 - frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store 8 into %stack.6), (store 8 into %stack.7) + frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store (s64) into %stack.6), (store (s64) into %stack.7) frame-setup SEH_SaveRegP 25, 26, 48 - frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store 8 into %stack.4), (store 8 into %stack.5) + frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store (s64) into %stack.4), (store (s64) into %stack.5) frame-setup SEH_SaveRegP 23, 24, 64 - frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store 8 into %stack.2), (store 8 into %stack.3) + frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store (s64) into %stack.2), (store (s64) into %stack.3) frame-setup SEH_SaveRegP 21, 22, 80 - frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store 8 into %stack.0), (store 8 into %stack.1) + frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store (s64) into %stack.0), (store (s64) into %stack.1) frame-setup SEH_SaveRegP 19, 20, 96 frame-setup SEH_PrologEnd $x19 = ADDXrr $x0, killed $x1 @@ -135,19 +135,19 @@ body: | $x0 = COPY $d11 $x0 = ADDXrr $x0, killed $x28 frame-destroy SEH_EpilogStart - $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1) + $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.0), (load (s64) from %stack.1) frame-destroy SEH_SaveRegP 19, 20, 96 - $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3) + $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.2), (load (s64) from %stack.3) frame-destroy SEH_SaveRegP 21, 22, 80 - $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5) + $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.4), (load (s64) from %stack.5) frame-destroy SEH_SaveRegP 23, 24, 64 - $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7) + $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.6), (load (s64) from %stack.7) frame-destroy SEH_SaveRegP 25, 26, 48 - $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.8), (load 8 from %stack.9) + $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.8), (load (s64) from %stack.9) frame-destroy SEH_SaveRegP 27, 28, 32 - $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11) + $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load (s64) from %stack.10), (load (s64) from %stack.11) frame-destroy SEH_SaveFRegP 8, 9, 16 - early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13) + early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load (s64) from %stack.12), (load (s64) from %stack.13) frame-destroy SEH_SaveFRegP_X 10, 11, -112 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/wineh4.mir b/llvm/test/CodeGen/AArch64/wineh4.mir index c007671a83c80..a281bb246a3a5 100644 --- a/llvm/test/CodeGen/AArch64/wineh4.mir +++ b/llvm/test/CodeGen/AArch64/wineh4.mir @@ -133,19 +133,19 @@ body: | successors: %bb.2(0x40000000), %bb.1(0x40000000) liveins: $x0, $x1, $d0, $d1, $d10, $d11, $d8, $d9, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 - early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store 8 into %stack.12), (store 8 into %stack.13) + early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store (s64) into %stack.12), (store (s64) into %stack.13) frame-setup SEH_SaveFRegP_X 10, 11, -112 - frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store 8 into %stack.10), (store 8 into %stack.11) + frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store (s64) into %stack.10), (store (s64) into %stack.11) frame-setup SEH_SaveFRegP 8, 9, 16 - frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store 8 into %stack.8), (store 8 into %stack.9) + frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store (s64) into %stack.8), (store (s64) into %stack.9) frame-setup SEH_SaveRegP 27, 28, 32 - frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store 8 into %stack.6), (store 8 into %stack.7) + frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store (s64) into %stack.6), (store (s64) into %stack.7) frame-setup SEH_SaveRegP 25, 26, 48 - frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store 8 into %stack.4), (store 8 into %stack.5) + frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store (s64) into %stack.4), (store (s64) into %stack.5) frame-setup SEH_SaveRegP 23, 24, 64 - frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store 8 into %stack.2), (store 8 into %stack.3) + frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store (s64) into %stack.2), (store (s64) into %stack.3) frame-setup SEH_SaveRegP 21, 22, 80 - frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store 8 into %stack.0), (store 8 into %stack.1) + frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store (s64) into %stack.0), (store (s64) into %stack.1) frame-setup SEH_SaveRegP 19, 20, 96 frame-setup SEH_PrologEnd frame-setup CFI_INSTRUCTION def_cfa_offset 112 @@ -185,19 +185,19 @@ body: | $x28 = ADDXrr $x27, killed $x26 $x0 = COPY $x28 frame-destroy SEH_EpilogStart - $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1) + $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.0), (load (s64) from %stack.1) frame-destroy SEH_SaveRegP 19, 20, 96 - $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3) + $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.2), (load (s64) from %stack.3) frame-destroy SEH_SaveRegP 21, 22, 80 - $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5) + $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.4), (load (s64) from %stack.5) frame-destroy SEH_SaveRegP 23, 24, 64 - $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7) + $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.6), (load (s64) from %stack.7) frame-destroy SEH_SaveRegP 25, 26, 48 - $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.8), (load 8 from %stack.9) + $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.8), (load (s64) from %stack.9) frame-destroy SEH_SaveRegP 27, 28, 32 - $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11) + $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load (s64) from %stack.10), (load (s64) from %stack.11) frame-destroy SEH_SaveFRegP 8, 9, 16 - early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13) + early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load (s64) from %stack.12), (load (s64) from %stack.13) frame-destroy SEH_SaveFRegP_X 10, 11, -112 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit $x0 @@ -208,19 +208,19 @@ body: | $x0 = COPY $d11 $x0 = ADDXrr $x0, $x0 frame-destroy SEH_EpilogStart - $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1) + $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.0), (load (s64) from %stack.1) frame-destroy SEH_SaveRegP 19, 20, 96 - $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3) + $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.2), (load (s64) from %stack.3) frame-destroy SEH_SaveRegP 21, 22, 80 - $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5) + $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.4), (load (s64) from %stack.5) frame-destroy SEH_SaveRegP 23, 24, 64 - $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7) + $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.6), (load (s64) from %stack.7) frame-destroy SEH_SaveRegP 25, 26, 48 - $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.8), (load 8 from %stack.9) + $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.8), (load (s64) from %stack.9) frame-destroy SEH_SaveRegP 27, 28, 32 - $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11) + $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load (s64) from %stack.10), (load (s64) from %stack.11) frame-destroy SEH_SaveFRegP 8, 9, 16 - early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13) + early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load (s64) from %stack.12), (load (s64) from %stack.13) frame-destroy SEH_SaveFRegP_X 10, 11, -112 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/wineh5.mir b/llvm/test/CodeGen/AArch64/wineh5.mir index 6fe6b66fc2e54..46b127a05a935 100644 --- a/llvm/test/CodeGen/AArch64/wineh5.mir +++ b/llvm/test/CodeGen/AArch64/wineh5.mir @@ -160,9 +160,9 @@ body: | successors: %bb.2(0x40000000), %bb.3(0x40000000) liveins: $w0, $x28, $fp, $lr - early-clobber $sp = frame-setup STRXpre killed $x28, $sp, -32 :: (store 8 into %stack.9) + early-clobber $sp = frame-setup STRXpre killed $x28, $sp, -32 :: (store (s64) into %stack.9) frame-setup SEH_SaveReg_X 28, -256 - frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store 8 into %stack.7), (store 8 into %stack.8) + frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.8) frame-setup SEH_SaveFPLR 16 $x15 = frame-setup MOVi64imm 187081 frame-setup SEH_Nop @@ -174,16 +174,16 @@ body: | $x8 = ADDXri $sp, 730, 12 $x8 = ADDXri $x8, 3208, 0 renamable $w9 = MOVi32imm 2 - STRWui killed renamable $w0, renamable $x8, 0 :: (store 4 into %ir.i.addr) - renamable $w0 = LDRWui renamable $x8, 0 :: (load 4 from %ir.i.addr) + STRWui killed renamable $w0, renamable $x8, 0 :: (store (s32) into %ir.i.addr) + renamable $w0 = LDRWui renamable $x8, 0 :: (load (s32) from %ir.i.addr) renamable $w0 = ADDWri killed renamable $w0, 2, 0 - STRWui killed renamable $w0, $sp, 128 :: (store 4 into %ir.a) - STRXui killed $x8, $sp, 1 :: (store 8 into %stack.5) - STRWui killed $w9, $sp, 1 :: (store 4 into %stack.6) + STRWui killed renamable $w0, $sp, 128 :: (store (s32) into %ir.a) + STRXui killed $x8, $sp, 1 :: (store (s64) into %stack.5) + STRWui killed $w9, $sp, 1 :: (store (s32) into %stack.6) BL @"?func2@@YAHXZ", csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0 - $x8 = LDRXui $sp, 1 :: (load 8 from %stack.5) - renamable $w9 = LDRWui killed renamable $x8, 0 :: (load 4 from %ir.i.addr) - $w10 = LDRWui $sp, 1 :: (load 4 from %stack.6) + $x8 = LDRXui $sp, 1 :: (load (s64) from %stack.5) + renamable $w9 = LDRWui killed renamable $x8, 0 :: (load (s32) from %ir.i.addr) + $w10 = LDRWui $sp, 1 :: (load (s32) from %stack.6) $wzr = SUBSWrr killed renamable $w9, killed renamable $w10, implicit-def $nzcv renamable $w9 = CSINCWr $wzr, $wzr, 13, implicit $nzcv TBNZW killed renamable $w9, 0, %bb.2 @@ -193,8 +193,8 @@ body: | successors: %bb.4(0x80000000) BL @"?func2@@YAHXZ", csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0 - $x8 = LDRXui $sp, 1 :: (load 8 from %stack.5) - STRWui killed renamable $w0, killed renamable $x8, 1 :: (store 4 into %ir.retval) + $x8 = LDRXui $sp, 1 :: (load (s64) from %stack.5) + STRWui killed renamable $w0, killed renamable $x8, 1 :: (store (s32) into %ir.retval) B %bb.4 bb.3.if.else: @@ -203,20 +203,20 @@ body: | $x8 = ADDXri $sp, 20, 0 $x0 = COPY killed renamable $x8 BL @"?func3@@YAHPEAH@Z", csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit killed $x0, implicit-def $w0 - $x8 = LDRXui $sp, 1 :: (load 8 from %stack.5) - STRWui killed renamable $w0, killed renamable $x8, 1 :: (store 4 into %ir.retval) + $x8 = LDRXui $sp, 1 :: (load (s64) from %stack.5) + STRWui killed renamable $w0, killed renamable $x8, 1 :: (store (s32) into %ir.retval) bb.4.return: - $x8 = LDRXui $sp, 1 :: (load 8 from %stack.5) - renamable $w0 = LDRWui killed renamable $x8, 1 :: (load 4 from %ir.retval) + $x8 = LDRXui $sp, 1 :: (load (s64) from %stack.5) + renamable $w0 = LDRWui killed renamable $x8, 1 :: (load (s32) from %ir.retval) frame-destroy SEH_EpilogStart $sp = frame-destroy ADDXri $sp, 730, 12 frame-destroy SEH_StackAlloc 2990080 $sp = frame-destroy ADDXri $sp, 3216, 0 frame-destroy SEH_StackAlloc 3216 - $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.7), (load 8 from %stack.8) + $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.7), (load (s64) from %stack.8) frame-destroy SEH_SaveFPLR 16 - early-clobber $sp, $x28 = frame-destroy LDRXpost $sp, 32 :: (load 8 from %stack.9) + early-clobber $sp, $x28 = frame-destroy LDRXpost $sp, 32 :: (load (s64) from %stack.9) frame-destroy SEH_SaveReg_X 28, -256 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit killed $w0 diff --git a/llvm/test/CodeGen/AArch64/wineh6.mir b/llvm/test/CodeGen/AArch64/wineh6.mir index e7592bd711460..f3d5fc8921a57 100644 --- a/llvm/test/CodeGen/AArch64/wineh6.mir +++ b/llvm/test/CodeGen/AArch64/wineh6.mir @@ -96,7 +96,7 @@ body: | bb.0.entry: liveins: $w0, $w1, $w2, $w3, $lr - early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store 8 into %stack.7), (store 8 into %stack.8) + early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.7), (store (s64) into %stack.8) frame-setup SEH_SaveFPLR_X -16 $fp = frame-setup ADDXri $sp, 0, 0 frame-setup SEH_SetFP @@ -111,10 +111,10 @@ body: | renamable $x8 = ADDXri killed renamable $x8, 15, 0 renamable $x8 = UBFMXri killed renamable $x8, 4, 63 $x15 = COPY renamable $x8 - STURXi killed $x8, $fp, -32 :: (store 8 into %stack.6) + STURXi killed $x8, $fp, -32 :: (store (s64) into %stack.6) BL &__chkstk, csr_aarch64_stackprobe_windows, implicit-def dead $lr, implicit $sp, implicit killed $x15 renamable $x8 = COPY $sp - $x15 = LDURXi $fp, -32 :: (load 8 from %stack.6) + $x15 = LDURXi $fp, -32 :: (load (s64) from %stack.6) renamable $x8 = SUBSXrs killed renamable $x8, killed renamable $x15, 4, implicit-def dead $nzcv $sp = COPY renamable $x8 STURXi killed renamable $x8, $fp, -24 diff --git a/llvm/test/CodeGen/AArch64/wineh7.mir b/llvm/test/CodeGen/AArch64/wineh7.mir index 6bf06d80861a4..ada4ebcf5057e 100644 --- a/llvm/test/CodeGen/AArch64/wineh7.mir +++ b/llvm/test/CodeGen/AArch64/wineh7.mir @@ -83,11 +83,11 @@ body: | bb.0.entry: liveins: $w0, $w1, $w2, $w3, $x21, $x22, $x19, $x20, $lr - early-clobber $sp = frame-setup STPXpre killed $x21, killed $x22, $sp, -6 :: (store 8 into %stack.5), (store 8 into %stack.6) + early-clobber $sp = frame-setup STPXpre killed $x21, killed $x22, $sp, -6 :: (store (s64) into %stack.5), (store (s64) into %stack.6) frame-setup SEH_SaveRegP_X 21, 22, -48 - frame-setup STPXi killed $x19, killed $x20, $sp, 2 :: (store 8 into %stack.3), (store 8 into %stack.4) + frame-setup STPXi killed $x19, killed $x20, $sp, 2 :: (store (s64) into %stack.3), (store (s64) into %stack.4) frame-setup SEH_SaveRegP 19, 20, 16 - frame-setup STPXi killed $fp, killed $lr, $sp, 4 :: (store 8 into %stack.1), (store 8 into %stack.2) + frame-setup STPXi killed $fp, killed $lr, $sp, 4 :: (store (s64) into %stack.1), (store (s64) into %stack.2) frame-setup SEH_SaveFPLR 32 $fp = frame-setup ADDXri $sp, 32, 0 frame-setup SEH_AddFP 32 @@ -109,11 +109,11 @@ body: | frame-destroy SEH_EpilogStart $sp = frame-destroy SUBXri $fp, 32, 0 frame-destroy SEH_AddFP 32 - $fp, $lr = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.1), (load 8 from %stack.2) + $fp, $lr = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.1), (load (s64) from %stack.2) frame-destroy SEH_SaveFPLR 32 - $x19, $x20 = frame-destroy LDPXi $sp, 2 :: (load 8 from %stack.3), (load 8 from %stack.4) + $x19, $x20 = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.3), (load (s64) from %stack.4) frame-destroy SEH_SaveRegP 19, 20, 16 - early-clobber $sp, $x21, $x22 = frame-destroy LDPXpost $sp, 6 :: (load 8 from %stack.5), (load 8 from %stack.6) + early-clobber $sp, $x21, $x22 = frame-destroy LDPXpost $sp, 6 :: (load (s64) from %stack.5), (load (s64) from %stack.6) frame-destroy SEH_SaveRegP_X 21, 22, -48 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/wineh8.mir b/llvm/test/CodeGen/AArch64/wineh8.mir index 9a37ed760ad88..38853e7d8d8b0 100644 --- a/llvm/test/CodeGen/AArch64/wineh8.mir +++ b/llvm/test/CodeGen/AArch64/wineh8.mir @@ -132,19 +132,19 @@ body: | successors: %bb.2(0x40000000), %bb.1(0x40000000) liveins: $x0, $x1, $d0, $d1, $d10, $d11, $d8, $d9, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 - early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store 8 into %stack.12), (store 8 into %stack.13) + early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store (s64) into %stack.12), (store (s64) into %stack.13) frame-setup SEH_SaveFRegP_X 10, 11, -112 - frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store 8 into %stack.10), (store 8 into %stack.11) + frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store (s64) into %stack.10), (store (s64) into %stack.11) frame-setup SEH_SaveFRegP 8, 9, 16 - frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store 8 into %stack.8), (store 8 into %stack.9) + frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store (s64) into %stack.8), (store (s64) into %stack.9) frame-setup SEH_SaveRegP 27, 28, 32 - frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store 8 into %stack.6), (store 8 into %stack.7) + frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store (s64) into %stack.6), (store (s64) into %stack.7) frame-setup SEH_SaveRegP 25, 26, 48 - frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store 8 into %stack.4), (store 8 into %stack.5) + frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store (s64) into %stack.4), (store (s64) into %stack.5) frame-setup SEH_SaveRegP 23, 24, 64 - frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store 8 into %stack.2), (store 8 into %stack.3) + frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store (s64) into %stack.2), (store (s64) into %stack.3) frame-setup SEH_SaveRegP 21, 22, 80 - frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store 8 into %stack.0), (store 8 into %stack.1) + frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store (s64) into %stack.0), (store (s64) into %stack.1) frame-setup SEH_SaveRegP 19, 20, 96 frame-setup SEH_PrologEnd frame-setup CFI_INSTRUCTION def_cfa_offset 112 @@ -184,19 +184,19 @@ body: | $x28 = ADDXrr $x27, killed $x26 $x0 = COPY $x28 frame-destroy SEH_EpilogStart - $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1) + $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.0), (load (s64) from %stack.1) frame-destroy SEH_SaveRegP 19, 20, 96 - $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3) + $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.2), (load (s64) from %stack.3) frame-destroy SEH_SaveRegP 21, 22, 80 - $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5) + $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.4), (load (s64) from %stack.5) frame-destroy SEH_SaveRegP 23, 24, 64 - $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7) + $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.6), (load (s64) from %stack.7) frame-destroy SEH_SaveRegP 25, 26, 48 - $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.8), (load 8 from %stack.9) + $x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.8), (load (s64) from %stack.9) frame-destroy SEH_SaveRegP 27, 28, 32 - $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11) + $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load (s64) from %stack.10), (load (s64) from %stack.11) frame-destroy SEH_SaveFRegP 8, 9, 16 - early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13) + early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load (s64) from %stack.12), (load (s64) from %stack.13) frame-destroy SEH_SaveFRegP_X 10, 11, -112 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit $x0 @@ -207,17 +207,17 @@ body: | $x0 = COPY $d11 $x0 = ADDXrr $x0, $x0 frame-destroy SEH_EpilogStart - $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1) + $x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load (s64) from %stack.0), (load (s64) from %stack.1) frame-destroy SEH_SaveRegP 19, 20, 96 - $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3) + $x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load (s64) from %stack.2), (load (s64) from %stack.3) frame-destroy SEH_SaveRegP 21, 22, 80 - $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5) + $x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load (s64) from %stack.4), (load (s64) from %stack.5) frame-destroy SEH_SaveRegP 23, 24, 64 - $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7) + $x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load (s64) from %stack.6), (load (s64) from %stack.7) frame-destroy SEH_SaveRegP 25, 26, 48 - $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11) + $d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load (s64) from %stack.10), (load (s64) from %stack.11) frame-destroy SEH_SaveFRegP 8, 9, 16 - early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13) + early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load (s64) from %stack.12), (load (s64) from %stack.13) frame-destroy SEH_SaveFRegP_X 10, 11, -112 frame-destroy SEH_EpilogEnd RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir b/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir index 51bb8ff0d393f..aacadd38e90c5 100644 --- a/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir +++ b/llvm/test/CodeGen/AArch64/wineh_shrinkwrap.mir @@ -134,7 +134,7 @@ body: | $x0 = ADDXri %stack.0.A, 0, 0 BL @"?init@@YAXPEAH@Z", csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - renamable $w8 = LDRWui %stack.0.A, 100 :: (dereferenceable load 4 from %ir.arrayidx, !tbaa !2) + renamable $w8 = LDRWui %stack.0.A, 100 :: (dereferenceable load (s32) from %ir.arrayidx, !tbaa !2) renamable $w8 = ADDWrr killed renamable $w19, killed renamable $w8 renamable $w0 = ADDWri killed renamable $w8, 1, 0 diff --git a/llvm/test/CodeGen/AArch64/wrong-callee-save-size-after-livedebugvariables.mir b/llvm/test/CodeGen/AArch64/wrong-callee-save-size-after-livedebugvariables.mir index eb92edf2725ad..7f13f46adb24b 100644 --- a/llvm/test/CodeGen/AArch64/wrong-callee-save-size-after-livedebugvariables.mir +++ b/llvm/test/CodeGen/AArch64/wrong-callee-save-size-after-livedebugvariables.mir @@ -82,8 +82,8 @@ stack: body: | bb.1.entry: renamable $x2 = ADDXri %stack.2.obj3, 0, 0 - renamable $w0 = LDRBBui %stack.0.obj1, 0, debug-location !20 :: (load 1 from %ir.obj1) - renamable $w1 = LDRBBui %stack.1.obj2, 0, debug-location !21 :: (load 1 from %ir.obj2) + renamable $w0 = LDRBBui %stack.0.obj1, 0, debug-location !20 :: (load (s8) from %ir.obj1) + renamable $w1 = LDRBBui %stack.1.obj2, 0, debug-location !21 :: (load (s8) from %ir.obj2) ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp, debug-location !23 BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $x2, implicit-def $w0, debug-location !23 ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp, debug-location !23 diff --git a/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir index b31144b409fca..85f6cbafd6037 100644 --- a/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir +++ b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir @@ -22,10 +22,10 @@ body: | %1:gpr32 = COPY $w0 %2:gpr64common = ADRP target-flags(aarch64-page) @c - %3:gpr64 = LDRSBXui %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (dereferenceable load 1 from @c, align 4) + %3:gpr64 = LDRSBXui %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (dereferenceable load (s8) from @c, align 4) %0:gpr32 = COPY %3.sub_32 ; CHECK: {{.*}}.sub_32:gpr64 = COPY {{.*}}.sub_32 - STRBBui %1, %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (store 1 into @c, align 4) + STRBBui %1, %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (store (s8) into @c, align 4) %8:gpr64all = SUBREG_TO_REG 0, %0, %subreg.sub_32 $x0 = COPY %8 ; CHECK: $x0 = COPY diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir index 993eec9c42a99..9f531307f2e96 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -170,14 +170,14 @@ body: | ; CHECK-LABEL: name: test_zext_s8_to_s32_of_sext_s8_to_s16 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]] ; CHECK: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1) + %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1) %2:_(s16) = G_SEXT %1 %3:_(s32) = G_ZEXT %2 $vgpr0 = COPY %3 @@ -255,7 +255,7 @@ body: | ; CHECK-LABEL: name: test_zext_v2s8_to_v2s32_of_sext_v2s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -274,7 +274,7 @@ body: | ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) %2:_(<2 x s16>) = G_SEXT %1 %3:_(<2 x s32>) = G_ZEXT %2 $vgpr0_vgpr1 = COPY %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir index 619b761436512..163aa61870adb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir @@ -11,11 +11,11 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_7_sextload_from_1 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 7 ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT_INREG %1, 7 $vgpr0 = COPY %2 @@ -31,10 +31,10 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_8_sextload_from_1 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT_INREG %1, 8 $vgpr0 = COPY %2 @@ -50,10 +50,10 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_9_sextload_from_1 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT_INREG %1, 9 $vgpr0 = COPY %2 @@ -69,11 +69,11 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_7_sext_from_s8 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 7 ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1) + %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT %1 %3:_(s32) = G_SEXT_INREG %2, 7 $vgpr0 = COPY %3 @@ -90,10 +90,10 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_8_sext_from_s8 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1) + %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT %1 %3:_(s32) = G_SEXT_INREG %2, 8 $vgpr0 = COPY %3 @@ -110,10 +110,10 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_8_sext_from_s9 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1) + %1:_(s8) = G_LOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT %1 %3:_(s32) = G_SEXT_INREG %2, 9 $vgpr0 = COPY %3 @@ -130,12 +130,12 @@ body: | ; GCN-LABEL: name: sext_inreg_v2s32_7_sext_from_v2s8 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>) ; GCN: [[SEXT_INREG:%[0-9]+]]:_(<2 x s32>) = G_SEXT_INREG [[SEXT]], 7 ; GCN: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) %2:_(<2 x s32>) = G_SEXT %1 %3:_(<2 x s32>) = G_SEXT_INREG %2, 7 $vgpr0_vgpr1 = COPY %3 @@ -152,11 +152,11 @@ body: | ; GCN-LABEL: name: sext_inreg_v2s32_8_sext_from_v2s8 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>) ; GCN: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) %2:_(<2 x s32>) = G_SEXT %1 %3:_(<2 x s32>) = G_SEXT_INREG %2, 8 $vgpr0_vgpr1 = COPY %3 @@ -173,11 +173,11 @@ body: | ; GCN-LABEL: name: sext_inreg_v2s32_9_sext_from_v2s8 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>) ; GCN: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), addrspace 1) %2:_(<2 x s32>) = G_SEXT %1 %3:_(<2 x s32>) = G_SEXT_INREG %2, 9 $vgpr0_vgpr1 = COPY %3 @@ -194,11 +194,11 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_7_zextload_from_1 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ZEXTLOAD]], 7 ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT_INREG %1, 7 $vgpr0 = COPY %2 @@ -214,11 +214,11 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_8_zextload_from_1 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ZEXTLOAD]], 8 ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT_INREG %1, 8 $vgpr0 = COPY %2 @@ -234,10 +234,10 @@ body: | ; GCN-LABEL: name: sext_inreg_s32_9_zextload_from_1 ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GCN: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GCN: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GCN: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_SEXT_INREG %1, 9 $vgpr0 = COPY %2 @@ -257,8 +257,8 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) - ; GCN: [[SEXTLOAD1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY1]](p1) :: (load 1, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GCN: [[SEXTLOAD1:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY1]](p1) :: (load (s8), addrspace 1) ; GCN: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SEXTLOAD]], [[SEXTLOAD1]] ; GCN: $vgpr0 = COPY [[SELECT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 @@ -266,8 +266,8 @@ body: | %2:_(s32) = COPY $vgpr4 %3:_(s32) = COPY $vgpr5 %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) - %6:_(s32) = G_SEXTLOAD %1 :: (load 1, addrspace 1) + %5:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) + %6:_(s32) = G_SEXTLOAD %1 :: (load (s8), addrspace 1) %7:_(s32) = G_SELECT %4, %5, %6 %8:_(s32) = G_SEXT_INREG %7, 8 $vgpr0 = COPY %8 @@ -288,8 +288,8 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY1]](p1) :: (load 1, addrspace 1) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY1]](p1) :: (load (s8), addrspace 1) ; GCN: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[LOAD]], [[SEXTLOAD]] ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT]], 8 ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) @@ -298,8 +298,8 @@ body: | %2:_(s32) = COPY $vgpr4 %3:_(s32) = COPY $vgpr5 %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32) = G_LOAD %0 :: (load 4, addrspace 1) - %6:_(s32) = G_SEXTLOAD %1 :: (load 1, addrspace 1) + %5:_(s32) = G_LOAD %0 :: (load (s32), addrspace 1) + %6:_(s32) = G_SEXTLOAD %1 :: (load (s8), addrspace 1) %7:_(s32) = G_SELECT %4, %5, %6 %8:_(s32) = G_SEXT_INREG %7, 8 $vgpr0 = COPY %8 @@ -320,8 +320,8 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GCN: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] - ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (load 4, addrspace 1) + ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (load (s32), addrspace 1) ; GCN: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SEXTLOAD]], [[LOAD]] ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT]], 8 ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32) @@ -330,8 +330,8 @@ body: | %2:_(s32) = COPY $vgpr4 %3:_(s32) = COPY $vgpr5 %4:_(s1) = G_ICMP intpred(eq), %2, %3 - %5:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) - %6:_(s32) = G_LOAD %1 :: (load 4, addrspace 1) + %5:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) + %6:_(s32) = G_LOAD %1 :: (load (s32), addrspace 1) %7:_(s32) = G_SELECT %4, %5, %6 %8:_(s32) = G_SEXT_INREG %7, 8 $vgpr0 = COPY %8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index e6571b9fbb909..dcd114663cd19 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -9,7 +9,7 @@ define i1 @i1_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -24,7 +24,7 @@ define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -39,7 +39,7 @@ define signext i1 @i1_signext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load 1 from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) ; CHECK: $vgpr0 = COPY [[SEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -54,7 +54,7 @@ define i7 @i7_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -69,7 +69,7 @@ define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -84,7 +84,7 @@ define signext i7 @i7_signext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) ; CHECK: $vgpr0 = COPY [[SEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -99,7 +99,7 @@ define i8 @i8_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -114,7 +114,7 @@ define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -129,7 +129,7 @@ define signext i8 @i8_signext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) ; CHECK: $vgpr0 = COPY [[SEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -144,7 +144,7 @@ define i16 @i16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -159,7 +159,7 @@ define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -174,7 +174,7 @@ define signext i16 @i16_signext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) ; CHECK: $vgpr0 = COPY [[SEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -189,7 +189,7 @@ define half @f16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load 2 from `half addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -204,7 +204,7 @@ define i24 @i24_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -219,7 +219,7 @@ define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -234,7 +234,7 @@ define signext i24 @i24_signext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load 3 from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) ; CHECK: $vgpr0 = COPY [[SEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -249,7 +249,7 @@ define <2 x i24> @v2i24_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load 6 from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) @@ -267,7 +267,7 @@ define <3 x i24> @v3i24_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load 9 from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) @@ -287,7 +287,7 @@ define i32 @i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load 4 from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: $vgpr0 = COPY [[LOAD]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 @@ -301,7 +301,7 @@ define i48 @i48_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -318,7 +318,7 @@ define signext i48 @i48_signext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -335,7 +335,7 @@ define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -352,7 +352,7 @@ define i64 @i64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load 8 from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -368,7 +368,7 @@ define i65 @i65_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -386,7 +386,7 @@ define signext i65 @i65_signext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -404,7 +404,7 @@ define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -422,7 +422,7 @@ define float @f32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load 4 from `float addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) ; CHECK: $vgpr0 = COPY [[LOAD]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 @@ -436,7 +436,7 @@ define double @f64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load 8 from `double addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -452,7 +452,7 @@ define <2 x double> @v2f64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load 16 from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -470,7 +470,7 @@ define <2 x i32> @v2i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load 8 from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -486,7 +486,7 @@ define <3 x i32> @v3i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load 12 from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -503,7 +503,7 @@ define <4 x i32> @v4i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load 16 from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -521,7 +521,7 @@ define <5 x i32> @v5i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 20 from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -540,8 +540,8 @@ define <8 x i32> @v8i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -564,8 +564,8 @@ define <16 x i32> @v16i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load 64 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -596,8 +596,8 @@ define <32 x i32> @v32i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -644,7 +644,7 @@ define <2 x i64> @v2i64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load 16 from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -662,8 +662,8 @@ define <3 x i64> @v3i64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load 24 from %ir.ptr, align 32, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -684,8 +684,8 @@ define <4 x i64> @v4i64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -708,8 +708,8 @@ define <5 x i64> @v5i64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load 40 from %ir.ptr, align 64, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -734,8 +734,8 @@ define <8 x i64> @v8i64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load 64 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -766,8 +766,8 @@ define <16 x i64> @v16i64_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -814,7 +814,7 @@ define <2 x i16> @v2i16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load 4 from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 @@ -828,7 +828,7 @@ define <2 x half> @v2f16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load 4 from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 @@ -842,7 +842,7 @@ define <3 x i16> @v3i16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load 6 from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) @@ -860,7 +860,7 @@ define <4 x i16> @v4i16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load 8 from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) @@ -876,7 +876,7 @@ define <4 x half> @v4f16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load 8 from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) @@ -892,8 +892,8 @@ define <5 x i16> @v5i16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load 10 from %ir.ptr, align 16, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) @@ -913,8 +913,8 @@ define <8 x i16> @v8i16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load 16 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) @@ -933,8 +933,8 @@ define <16 x i16> @v16i16_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) @@ -957,8 +957,8 @@ define <16 x i8> @v16i8_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load 16 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) @@ -1021,7 +1021,7 @@ define <2 x i8> @v2i8_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load 2 from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) @@ -1041,7 +1041,7 @@ define <3 x i8> @v3i8_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load 3 from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) @@ -1064,8 +1064,8 @@ define <4 x i8> @v4i8_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load 4 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) @@ -1092,10 +1092,10 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) ; CHECK: $vgpr1 = COPY [[LOAD1]](s32) @@ -1113,12 +1113,12 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load 4 from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store 1 into %ir.gep01, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store 4 into %ir.gep1, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY3]] %val0 = load volatile i8, i8 addrspace(1)* undef @@ -1141,9 +1141,9 @@ define <33 x i32> @v33i32_func_void() #0 { ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load 132 from %ir.ptr, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store 132, align 256, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (s1056), align 256, addrspace 5) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef @@ -1166,8 +1166,8 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load 132 from %ir.gep, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store 132, align 256, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (s1056), align 256, addrspace 5) ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY6]] %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx @@ -1182,15 +1182,15 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store 128, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (s1024), addrspace 5) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store 4, align 128, addrspace 5) + ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef @@ -1205,15 +1205,15 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load 8 from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load 4 from %ir.ptr, align 128, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 128 from %ir.ptr + 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store 4, align 128, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store 128, addrspace 5) + ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (s1024), addrspace 5) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef @@ -1233,10 +1233,10 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) @@ -1272,10 +1272,10 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load 4 from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load 4 from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) @@ -1313,9 +1313,9 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 @@ -1336,7 +1336,7 @@ define i1022 @i1022_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -1383,7 +1383,7 @@ define signext i1022 @i1022_signext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -1430,7 +1430,7 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) ; CHECK: $vgpr0 = COPY [[UV]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll index d1e86b4139d03..177fbbd68d539 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -19,7 +19,7 @@ define i32 addrspace(4)* @external_constant_got() { ; GCN: liveins: $sgpr30_sgpr31 ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc - ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (s64) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) @@ -34,7 +34,7 @@ define i32 addrspace(1)* @external_global_got() { ; GCN: liveins: $sgpr30_sgpr31 ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc - ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (s64) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) @@ -49,7 +49,7 @@ define i32 addrspace(999)* @external_other_got() { ; GCN: liveins: $sgpr30_sgpr31 ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc - ; GCN: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; GCN: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (s64) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) @@ -106,7 +106,7 @@ define i32 addrspace(6)* @external_constant32_got() { ; GCN: liveins: $sgpr30_sgpr31 ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc - ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (s64) from got, addrspace 4) ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll index 2410fcb312690..8d1967fe78e9c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll @@ -19,7 +19,7 @@ define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i16 %s) { ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[DEF]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -42,7 +42,7 @@ define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i16 %s) { ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -72,7 +72,7 @@ define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i16 %s, i16 %t) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -96,7 +96,7 @@ define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i16 %s, i16 %t) ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -131,7 +131,7 @@ define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i16 %s, i16 %t, ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -160,7 +160,7 @@ define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i16 %s, i16 %t, ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -190,7 +190,7 @@ define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i16 %s, i1 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -214,7 +214,7 @@ define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i16 %s, i1 ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -249,7 +249,7 @@ define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i16 %s, i1 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -278,7 +278,7 @@ define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i16 %s, i1 ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -313,7 +313,7 @@ define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i16 %s, i16 % ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -342,7 +342,7 @@ define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i16 %s, i16 % ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -376,7 +376,7 @@ define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: store_mip_1d ; GFX10: bb.1.main_body: @@ -399,7 +399,7 @@ define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10: S_ENDPGM 0 main_body: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 0, <8 x i32> %rsrc, i32 0, i32 0) @@ -429,7 +429,7 @@ define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: store_mip_2d ; GFX10: bb.1.main_body: @@ -453,7 +453,7 @@ define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10: S_ENDPGM 0 main_body: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0) @@ -488,7 +488,7 @@ define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: store_mip_3d ; GFX10: bb.1.main_body: @@ -517,7 +517,7 @@ define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10: S_ENDPGM 0 main_body: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) @@ -547,7 +547,7 @@ define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: store_mip_1darray ; GFX10: bb.1.main_body: @@ -571,7 +571,7 @@ define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10: S_ENDPGM 0 main_body: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0) @@ -606,7 +606,7 @@ define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: store_mip_2darray ; GFX10: bb.1.main_body: @@ -635,7 +635,7 @@ define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10: S_ENDPGM 0 main_body: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) @@ -670,7 +670,7 @@ define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: store_mip_cube ; GFX10: bb.1.main_body: @@ -699,7 +699,7 @@ define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10: S_ENDPGM 0 main_body: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir index d73e7e41dc867..157f5f9d53dd3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -19,7 +19,7 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -27,7 +27,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -35,13 +35,13 @@ body: | ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 ... @@ -71,7 +71,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -79,7 +79,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -97,7 +97,7 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -105,7 +105,7 @@ body: | %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 4 %5:vgpr(p0) = G_PTR_ADD %0, %4 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 0) + %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %6 ... @@ -125,7 +125,7 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -133,7 +133,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -141,13 +141,13 @@ body: | ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 0) + %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 0) $vgpr0_vgpr1 = COPY %4 ... @@ -177,7 +177,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -185,7 +185,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -203,7 +203,7 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -211,7 +211,7 @@ body: | %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 4 %5:vgpr(p0) = G_PTR_ADD %0, %4 - %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 8, addrspace 0) + %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 0) $vgpr0_vgpr1 = COPY %6 ... @@ -241,7 +241,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -259,7 +259,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -277,7 +277,7 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -285,7 +285,7 @@ body: | %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 -4 %5:vgpr(p0) = G_PTR_ADD %0, %4 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 0) + %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %6 ... @@ -305,26 +305,26 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 0) ... @@ -343,25 +343,25 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 0) + %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir index 3bcc0932fcb63..5361e77927cfe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -26,7 +26,7 @@ body: | ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6: $vgpr0 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global @@ -40,7 +40,7 @@ body: | ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7: $vgpr0 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global @@ -49,7 +49,7 @@ body: | ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -57,7 +57,7 @@ body: | ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -65,7 +65,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -73,13 +73,13 @@ body: | ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %4 ... @@ -104,7 +104,7 @@ body: | ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6: $vgpr0 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -118,7 +118,7 @@ body: | ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7: $vgpr0 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 @@ -137,7 +137,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -155,7 +155,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -163,7 +163,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -171,7 +171,7 @@ body: | ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -179,7 +179,7 @@ body: | %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 4 %5:vgpr(p1) = G_PTR_ADD %0, %4 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 1) + %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %6 ... @@ -204,7 +204,7 @@ body: | ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global @@ -218,7 +218,7 @@ body: | ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global @@ -227,7 +227,7 @@ body: | ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -235,7 +235,7 @@ body: | ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -243,7 +243,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -251,13 +251,13 @@ body: | ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 1) + %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 1) $vgpr0_vgpr1 = COPY %4 ... @@ -282,7 +282,7 @@ body: | ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -296,7 +296,7 @@ body: | ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 @@ -315,7 +315,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -333,7 +333,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -341,7 +341,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -349,7 +349,7 @@ body: | ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -357,7 +357,7 @@ body: | %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 4 %5:vgpr(p1) = G_PTR_ADD %0, %4 - %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 8, addrspace 1) + %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 1) $vgpr0_vgpr1 = COPY %6 ... @@ -392,7 +392,7 @@ body: | ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX6: $vgpr0 = COPY [[COPY7]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 @@ -416,7 +416,7 @@ body: | ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7: $vgpr0 = COPY [[COPY7]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 @@ -435,7 +435,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -453,7 +453,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -461,7 +461,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -469,7 +469,7 @@ body: | ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -477,7 +477,7 @@ body: | %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 %4:vgpr(s64) = G_CONSTANT i64 -4 %5:vgpr(p1) = G_PTR_ADD %0, %4 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 1) + %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %6 ... @@ -502,7 +502,7 @@ body: | ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -515,7 +515,7 @@ body: | ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 @@ -523,33 +523,33 @@ body: | ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 1) ... @@ -573,7 +573,7 @@ body: | ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -586,7 +586,7 @@ body: | ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 @@ -594,33 +594,33 @@ body: | ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 1) + %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 1) ... @@ -643,7 +643,7 @@ body: | ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX6: $vgpr0 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr @@ -656,7 +656,7 @@ body: | ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX7: $vgpr0 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr @@ -666,7 +666,7 @@ body: | ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -675,7 +675,7 @@ body: | ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX8: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]] - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -684,7 +684,7 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -693,13 +693,13 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %4 ... @@ -723,7 +723,7 @@ body: | ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX6: $vgpr0 = COPY [[COPY3]] ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 @@ -736,7 +736,7 @@ body: | ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0 ; GFX7: $vgpr0 = COPY [[COPY3]] ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 @@ -756,7 +756,7 @@ body: | ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX7-FLAT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -775,7 +775,7 @@ body: | ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -784,7 +784,7 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3 @@ -793,7 +793,7 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr2 @@ -801,7 +801,7 @@ body: | %3:sgpr(s64) = G_CONSTANT i64 4095 %4:sgpr(p1) = G_PTR_ADD %0, %3 %5:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 - %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %4, %5 :: (load store seq_cst 4, addrspace 1) + %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %4, %5 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %6 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir index bf4db71346f31..40fbac94fd68f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir @@ -21,7 +21,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_local ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -29,19 +29,19 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_local ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_CMPST_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3) + %3:vgpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 3) $vgpr0 = COPY %3 ... @@ -63,7 +63,7 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX6: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -71,21 +71,21 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: [[DS_CMPST_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_CMPST_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 %3:vgpr(s32) = G_CONSTANT i32 4 %4:vgpr(p3) = G_PTR_ADD %0, %3 - %5:vgpr(s32) = G_ATOMIC_CMPXCHG %4, %1, %2 :: (load store seq_cst 4, addrspace 3) + %5:vgpr(s32) = G_ATOMIC_CMPXCHG %4, %1, %2 :: (load store seq_cst (s32), addrspace 3) $vgpr0 = COPY %5 ... @@ -105,7 +105,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 3) + ; GFX6: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) ; GFX6: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_local ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 @@ -113,19 +113,19 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 3) + ; GFX7: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_local ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 3) + ; GFX9: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 - %3:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 3) + %3:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 3) $vgpr0_vgpr1 = COPY %3 ... @@ -145,7 +145,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 3) + ; GFX6: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) ; GFX6: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 @@ -153,21 +153,21 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 3) + ; GFX7: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 3) + ; GFX9: [[DS_CMPST_RTN_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 %3:vgpr(s32) = G_CONSTANT i32 4 %4:vgpr(p3) = G_PTR_ADD %0, %3 - %5:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 3) + %5:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 3) $vgpr0_vgpr1 = COPY %5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir index 834380d617912..2325732f8607c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-region.mir @@ -21,7 +21,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_region ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -29,19 +29,19 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX7: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX7: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_region ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX9: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 2) + %3:vgpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 2) $vgpr0 = COPY %3 ... @@ -63,7 +63,7 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX6: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] ; GFX7-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -71,21 +71,21 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX7: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX7: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] ; GFX9-LABEL: name: atomic_cmpxchg_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX9: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 [[COPY]], [[COPY1]], [[COPY2]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 %3:vgpr(s32) = G_CONSTANT i32 4 %4:vgpr(p2) = G_PTR_ADD %0, %3 - %5:vgpr(s32) = G_ATOMIC_CMPXCHG %4, %1, %2 :: (load store seq_cst 4, addrspace 2) + %5:vgpr(s32) = G_ATOMIC_CMPXCHG %4, %1, %2 :: (load store seq_cst (s32), addrspace 2) $vgpr0 = COPY %5 ... @@ -105,7 +105,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 2) + ; GFX6: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) ; GFX6: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_region ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 @@ -113,19 +113,19 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 2) + ; GFX7: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_region ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 2) + ; GFX9: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 - %3:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 2) + %3:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 2) $vgpr0_vgpr1 = COPY %3 ... @@ -145,7 +145,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 2) + ; GFX6: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) ; GFX6: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] ; GFX7-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 @@ -153,21 +153,21 @@ body: | ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 2) + ; GFX7: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] ; GFX9-LABEL: name: atomic_cmpxchg_s64_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2, $vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GFX9: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 8, addrspace 2) + ; GFX9: [[DS_CMPST_RTN_B64_:%[0-9]+]]:vreg_64 = DS_CMPST_RTN_B64 [[COPY]], [[COPY1]], [[COPY2]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s64), addrspace 2) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_CMPST_RTN_B64_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s64) = COPY $vgpr1_vgpr2 %2:vgpr(s64) = COPY $vgpr3_vgpr4 %3:vgpr(s32) = G_CONSTANT i32 4 %4:vgpr(p2) = G_PTR_ADD %0, %3 - %5:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 2) + %5:vgpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 2) $vgpr0_vgpr1 = COPY %5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir index e60ad00707b0a..fd9527b199235 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -17,23 +17,23 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %2 ... @@ -51,20 +51,20 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -91,13 +91,13 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -113,13 +113,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 ... @@ -147,12 +147,12 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -167,12 +167,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -199,13 +199,13 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -221,13 +221,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 ... @@ -255,12 +255,12 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -275,12 +275,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -307,13 +307,13 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -329,13 +329,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 ... @@ -363,12 +363,12 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -383,12 +383,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -415,7 +415,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -431,7 +431,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -447,13 +447,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) $vgpr0 = COPY %4 ... @@ -481,7 +481,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -496,7 +496,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -511,12 +511,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4) + ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 0) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0) ... @@ -533,23 +533,23 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 0) + %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) $vgpr0_vgpr1 = COPY %2 ... @@ -567,20 +567,20 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 0) + %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) ... @@ -607,13 +607,13 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -629,13 +629,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 8, addrspace 0) + %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0) $vgpr0_vgpr1 = COPY %4 ... @@ -663,12 +663,12 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -683,11 +683,11 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8) + ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 %3:vgpr(p0) = G_PTR_ADD %0, %2 - %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 8, addrspace 0) + %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir index 9f1bc538740ff..e96c4068ef8a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir @@ -18,7 +18,7 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX6-LABEL: name: global_atomicrmw_add_s32 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 @@ -29,23 +29,23 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %2 ... @@ -63,7 +63,7 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX6-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -73,20 +73,20 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) ... @@ -113,7 +113,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 @@ -124,25 +124,25 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %4 ... @@ -170,7 +170,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -180,22 +180,22 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) ... @@ -222,7 +222,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 @@ -233,13 +233,13 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -255,13 +255,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %4 ... @@ -289,7 +289,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -299,12 +299,12 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -319,12 +319,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) ... @@ -351,7 +351,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 @@ -362,13 +362,13 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -384,13 +384,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %4 ... @@ -418,7 +418,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -428,12 +428,12 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -448,12 +448,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) ... @@ -480,7 +480,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]] ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 @@ -492,7 +492,7 @@ body: | ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 @@ -508,7 +508,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 @@ -524,13 +524,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) $vgpr0 = COPY %4 ... @@ -558,7 +558,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -569,7 +569,7 @@ body: | ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -584,7 +584,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -599,12 +599,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 4, addrspace 1) + %4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 1) ... @@ -621,7 +621,7 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX6-LABEL: name: global_atomicrmw_add_s64 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -632,23 +632,23 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 1) + %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) $vgpr0_vgpr1 = COPY %2 ... @@ -666,7 +666,7 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX6-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -676,20 +676,20 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 - %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 1) + %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) ... @@ -716,7 +716,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]] ; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -727,13 +727,13 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]] ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -749,13 +749,13 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 8, addrspace 1) + %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 1) $vgpr0_vgpr1 = COPY %4 ... @@ -783,7 +783,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -793,12 +793,12 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -813,11 +813,11 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 %3:vgpr(p1) = G_PTR_ADD %0, %2 - %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst 8, addrspace 1) + %4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir index 87bc4be1e1d95..27572461c3a84 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir @@ -21,24 +21,24 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX8: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_local ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3) + %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) $vgpr0 = COPY %2 ... @@ -57,21 +57,21 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst 4, addrspace 3) + %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) ... @@ -89,13 +89,13 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX8: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_gfx9_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 @@ -104,13 +104,13 @@ body: | ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 %3:vgpr(p3) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_FADD %3(p3), %1 :: (load store seq_cst 4, addrspace 3) + %4:vgpr(s32) = G_ATOMICRMW_FADD %3(p3), %1 :: (load store seq_cst (s32), addrspace 3) $vgpr0 = COPY %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir index 26619a1753eb5..cd1d3df7bfeac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir @@ -21,24 +21,24 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX8: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_region ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst 4, addrspace 2) + ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst 4, addrspace 2) + %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) $vgpr0 = COPY %2 ... @@ -57,21 +57,21 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst 4, addrspace 2) + ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst 4, addrspace 2) + %2:vgpr(s32) = G_ATOMICRMW_FADD %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) ... @@ -89,13 +89,13 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8: $m0 = S_MOV_B32 -1 - ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX8: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX8: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX9: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9: $vgpr0 = COPY [[DS_ADD_RTN_F32_]] ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_gep4 ; GFX6: liveins: $vgpr0, $vgpr1 @@ -104,13 +104,13 @@ body: | ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p2) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[COPY1]] :: (load store seq_cst 4, addrspace 2) + ; GFX6: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_FADD [[PTR_ADD]](p2), [[COPY1]] :: (load store seq_cst (s32), addrspace 2) ; GFX6: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 %3:vgpr(p2) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_FADD %3(p2), %1 :: (load store seq_cst 4, addrspace 2) + %4:vgpr(s32) = G_ATOMICRMW_FADD %3(p2), %1 :: (load store seq_cst (s32), addrspace 2) $vgpr0 = COPY %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir index 16e01429f68c4..ac4f32a2566f8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir @@ -20,24 +20,24 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_local ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_local ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst 4, addrspace 3) + %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p3), %1 :: (load store seq_cst (s32), addrspace 3) $vgpr0 = COPY %2 ... @@ -58,26 +58,26 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX6: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_local_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst 4, addrspace 3) + ; GFX9: [[DS_WRXCHG_RTN_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32_gfx9 [[COPY]], [[COPY1]], 4, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 %3:vgpr(p3) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_XCHG %3(p3), %1 :: (load store seq_cst 4, addrspace 3) + %4:vgpr(s32) = G_ATOMICRMW_XCHG %3(p3), %1 :: (load store seq_cst (s32), addrspace 3) $vgpr0 = COPY %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir index 0552551f35105..13d42cf089ba5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-region.mir @@ -20,24 +20,24 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_region ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX7: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_region ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX9: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 - %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p2), %1 :: (load store seq_cst 4, addrspace 2) + %2:vgpr(s32) = G_ATOMICRMW_XCHG %0(p2), %1 :: (load store seq_cst (s32), addrspace 2) $vgpr0 = COPY %2 ... @@ -58,26 +58,26 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GFX6: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] ; GFX7-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX7: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX7: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] ; GFX9-LABEL: name: atomicrmw_xchg_s32_region_gep4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 2) + ; GFX9: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 [[COPY]], [[COPY1]], 4, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]] %0:vgpr(p2) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_CONSTANT i32 4 %3:vgpr(p2) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_ATOMICRMW_XCHG %3(p2), %1 :: (load store seq_cst 4, addrspace 2) + %4:vgpr(s32) = G_ATOMICRMW_XCHG %3(p2), %1 :: (load store seq_cst (s32), addrspace 2) $vgpr0 = COPY %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir index c9b9c16ed4f8e..a30fb1b6bd1b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -17,16 +17,16 @@ body: | ; WAVE64: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy ; WAVE32: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; WAVE32: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; WAVE32: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:sgpr(p1) = COPY $sgpr2_sgpr3 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_IMPLICIT_DEF - G_STORE %2, %1 :: (store 4, addrspace 1) + G_STORE %2, %1 :: (store (s32), addrspace 1) ... --- @@ -46,7 +46,7 @@ body: | ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 @@ -55,14 +55,14 @@ body: | ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 %3:sgpr(s1) = COPY $scc %4:vcc(s1) = COPY %3 %5:vgpr(s32) = G_SELECT %4, %1, %2 - G_STORE %5, %0 :: (store 4, addrspace 1) + G_STORE %5, %0 :: (store (s32), addrspace 1) ... --- @@ -85,7 +85,7 @@ body: | ; WAVE64: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec ; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 @@ -96,7 +96,7 @@ body: | ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -105,7 +105,7 @@ body: | %5:vgpr(s32) = G_SELECT %4, %1, %2 %6:vcc(s1) = COPY %3 %7:vgpr(s32) = G_SELECT %6, %1, %5 - G_STORE %7, %0 :: (store 4, addrspace 1) + G_STORE %7, %0 :: (store (s32), addrspace 1) ... --- @@ -124,20 +124,20 @@ body: | ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 %3:vcc(s1) = COPY $scc %5:vgpr(s32) = G_SELECT %3, %1, %2 - G_STORE %5, %0 :: (store 4, addrspace 1) + G_STORE %5, %0 :: (store (s32), addrspace 1) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir index 8df94adbb36b9..6b4ab6d1201ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir @@ -24,9 +24,9 @@ body: | ; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec ; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec @@ -49,9 +49,9 @@ body: | ; maxnum_ieee vv %6:vgpr(s32) = G_FMAXNUM_IEEE %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ; 64-bit @@ -92,9 +92,9 @@ body: | ; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec ; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec @@ -117,9 +117,9 @@ body: | ; maxnum_ieee vv %6:vgpr(s32) = G_FMAXNUM_IEEE %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ; 64-bit diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir index c805deecb19e1..1af8bb0e78c28 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir @@ -25,9 +25,9 @@ body: | ; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec ; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec @@ -50,9 +50,9 @@ body: | ; maxnum vv %6:vgpr(s32) = G_FMAXNUM %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ; 64-bit @@ -91,9 +91,9 @@ body: | ; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec ; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec @@ -116,9 +116,9 @@ body: | ; maxnum vv %6:vgpr(s32) = G_FMAXNUM %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ; 64-bit diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir index caae913004102..89e887564ce15 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir @@ -24,9 +24,9 @@ body: | ; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec ; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec @@ -49,9 +49,9 @@ body: | ; minnum_ieee vv %6:vgpr(s32) = G_FMINNUM_IEEE %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ; 64-bit @@ -92,9 +92,9 @@ body: | ; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec ; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec @@ -117,9 +117,9 @@ body: | ; minnum_ieee vv %6:vgpr(s32) = G_FMINNUM_IEEE %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ; 64-bit diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir index c24ba7a6a4f87..89f82b6fb6ddc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir @@ -25,9 +25,9 @@ body: | ; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec ; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec @@ -50,9 +50,9 @@ body: | ; minnum vv %6:vgpr(s32) = G_FMINNUM %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ; 64-bit @@ -91,9 +91,9 @@ body: | ; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec ; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec @@ -116,9 +116,9 @@ body: | ; minnum vv %6:vgpr(s32) = G_FMINNUM %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ; 64-bit diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir index 950b00ff3ec8b..d94583c86d429 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -18,9 +18,9 @@ body: | ; GCN: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 @@ -35,9 +35,9 @@ body: | ; fmul vv %6:vgpr(s32) = G_FMUL %1, %2 - G_STORE %4, %3 :: (store 4, addrspace 1) - G_STORE %5, %3 :: (store 4, addrspace 1) - G_STORE %6, %3 :: (store 4, addrspace 1) + G_STORE %4, %3 :: (store (s32), addrspace 1) + G_STORE %5, %3 :: (store (s32), addrspace 1) + G_STORE %6, %3 :: (store (s32), addrspace 1) ... --- @@ -133,16 +133,16 @@ body: | ; GCN: %13:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: %15:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec - ; GCN: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(p1) = COPY $vgpr2_vgpr3 @@ -184,15 +184,15 @@ body: | ; fneg fabs lhs, fneg rhs %15:vgpr(s32) = G_FMUL %5, %4 - G_STORE %6, %2 :: (store 4, addrspace 1) - G_STORE %7, %2 :: (store 4, addrspace 1) - G_STORE %8, %2 :: (store 4, addrspace 1) - G_STORE %9, %2 :: (store 4, addrspace 1) - G_STORE %10, %2 :: (store 4, addrspace 1) - G_STORE %11, %2 :: (store 4, addrspace 1) - G_STORE %12, %2 :: (store 4, addrspace 1) - G_STORE %13, %2 :: (store 4, addrspace 1) - G_STORE %14, %2 :: (store 4, addrspace 1) - G_STORE %15, %2 :: (store 4, addrspace 1) + G_STORE %6, %2 :: (store (s32), addrspace 1) + G_STORE %7, %2 :: (store (s32), addrspace 1) + G_STORE %8, %2 :: (store (s32), addrspace 1) + G_STORE %9, %2 :: (store (s32), addrspace 1) + G_STORE %10, %2 :: (store (s32), addrspace 1) + G_STORE %11, %2 :: (store (s32), addrspace 1) + G_STORE %12, %2 :: (store (s32), addrspace 1) + G_STORE %13, %2 :: (store (s32), addrspace 1) + G_STORE %14, %2 :: (store (s32), addrspace 1) + G_STORE %15, %2 :: (store (s32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir index 92ffb4f131f7d..aa6dbea977c93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -18,16 +18,16 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; VI-LABEL: name: fptoui ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; VI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; VI: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -40,8 +40,8 @@ body: | ; fptoui v %4:vgpr(s32) = G_FPTOUI %1 - G_STORE %3, %2 :: (store 4, addrspace 1) - G_STORE %4, %2 :: (store 4, addrspace 1) + G_STORE %3, %2 :: (store (s32), addrspace 1) + G_STORE %4, %2 :: (store (s32), addrspace 1) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir index fbb7c1778f305..4b6628472c8bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -14,10 +14,10 @@ body: | ; CHECK-LABEL: name: fract_f64_neg ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 - ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load 8, addrspace 1) + ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -26,16 +26,16 @@ body: | ; CHECK: %12:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; CHECK: %15:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK: S_ENDPGM 0 %2:sgpr(p4) = COPY $sgpr0_sgpr1 %7:sgpr(s64) = G_CONSTANT i64 36 %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) - %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) + %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 %15:sgpr(p1) = G_INTTOPTR %13(s64) - %18:sgpr(s64) = G_LOAD %15(p1) :: (load 8, addrspace 1) + %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) %19:sgpr(s64) = G_FCONSTANT double -0.000000e+00 %24:sgpr(s64) = G_FNEG %18 %25:vgpr(s64) = COPY %19(s64) @@ -46,7 +46,7 @@ body: | %22:vgpr(s64) = G_FADD %20, %23 %12:sgpr(p1) = G_INTTOPTR %10(s64) %27:vgpr(p1) = COPY %12(p1) - G_STORE %22(s64), %27(p1) :: (store 8, addrspace 1) + G_STORE %22(s64), %27(p1) :: (store (s64), addrspace 1) S_ENDPGM 0 ... @@ -63,10 +63,11 @@ body: | ; CHECK-LABEL: name: fract_f64_neg_abs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = + ; S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1 ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3 - ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load 8, addrspace 1) + ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load (s64), addrspace 1) ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 @@ -75,16 +76,16 @@ body: | ; CHECK: %13:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec ; CHECK: %16:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK: S_ENDPGM 0 %2:sgpr(p4) = COPY $sgpr0_sgpr1 %7:sgpr(s64) = G_CONSTANT i64 36 %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) - %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) + %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 %15:sgpr(p1) = G_INTTOPTR %13(s64) - %18:sgpr(s64) = G_LOAD %15(p1) :: (load 8, addrspace 1) + %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) %19:sgpr(s64) = G_FABS %18 %20:sgpr(s64) = G_FCONSTANT double -0.000000e+00 %25:sgpr(s64) = G_FNEG %19 @@ -96,6 +97,6 @@ body: | %23:vgpr(s64) = G_FADD %21, %24 %12:sgpr(p1) = G_INTTOPTR %10(s64) %28:vgpr(p1) = COPY %12(p1) - G_STORE %23(s64), %28(p1) :: (store 8, addrspace 1) + G_STORE %23(s64), %28(p1) :: (store (s64), addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir index e51a940be02bf..dca03642b8f25 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir @@ -43,9 +43,9 @@ body: | %11:sgpr(s32) = G_SELECT %9, %7, %8 %12:sgpr(s32) = G_SELECT %10, %3, %4 %13:vgpr(s32) = COPY %11 - G_STORE %13, %0 :: (volatile store 4, addrspace 1) + G_STORE %13, %0 :: (volatile store (s32), addrspace 1) %14:vgpr(s32) = COPY %12 - G_STORE %14, %0 :: (volatile store 4, addrspace 1) + G_STORE %14, %0 :: (volatile store (s32), addrspace 1) ... --- @@ -95,25 +95,25 @@ body: | %23:sgpr(s32) = G_SELECT %13, %3, %4 %24:sgpr(s32) = G_SELECT %14, %3, %4 %25:vgpr(s32) = COPY %15 - G_STORE %25, %0 :: (volatile store 4, addrspace 1) + G_STORE %25, %0 :: (volatile store (s32), addrspace 1) %26:vgpr(s32) = COPY %16 - G_STORE %26, %0 :: (volatile store 4, addrspace 1) + G_STORE %26, %0 :: (volatile store (s32), addrspace 1) %27:vgpr(s32) = COPY %17 - G_STORE %27, %0 :: (volatile store 4, addrspace 1) + G_STORE %27, %0 :: (volatile store (s32), addrspace 1) %28:vgpr(s32) = COPY %18 - G_STORE %28, %0 :: (volatile store 4, addrspace 1) + G_STORE %28, %0 :: (volatile store (s32), addrspace 1) %29:vgpr(s32) = COPY %19 - G_STORE %29, %0 :: (volatile store 4, addrspace 1) + G_STORE %29, %0 :: (volatile store (s32), addrspace 1) %30:vgpr(s32) = COPY %20 - G_STORE %30, %0 :: (volatile store 4, addrspace 1) + G_STORE %30, %0 :: (volatile store (s32), addrspace 1) %31:vgpr(s32) = COPY %21 - G_STORE %31, %0 :: (volatile store 4, addrspace 1) + G_STORE %31, %0 :: (volatile store (s32), addrspace 1) %32:vgpr(s32) = COPY %22 - G_STORE %32, %0 :: (volatile store 4, addrspace 1) + G_STORE %32, %0 :: (volatile store (s32), addrspace 1) %33:vgpr(s32) = COPY %23 - G_STORE %33, %0 :: (volatile store 4, addrspace 1) + G_STORE %33, %0 :: (volatile store (s32), addrspace 1) %34:vgpr(s32) = COPY %24 - G_STORE %34, %0 :: (volatile store 4, addrspace 1) + G_STORE %34, %0 :: (volatile store (s32), addrspace 1) ... --- @@ -152,8 +152,8 @@ body: | %10:vcc(s1) = G_ICMP intpred(ne), %5, %6 %11:vgpr(s32) = G_SELECT %9, %7, %8 %12:vgpr(s32) = G_SELECT %10, %3, %4 - G_STORE %11, %0 :: (volatile store 4, addrspace 1) - G_STORE %12, %0 :: (volatile store 4, addrspace 1) + G_STORE %11, %0 :: (volatile store (s32), addrspace 1) + G_STORE %12, %0 :: (volatile store (s32), addrspace 1) ... --- name: icmp_s32_valu @@ -201,16 +201,16 @@ body: | %22:vgpr(s32) = G_SELECT %12, %3, %4 %23:vgpr(s32) = G_SELECT %13, %3, %4 %24:vgpr(s32) = G_SELECT %14, %3, %4 - G_STORE %15, %0 :: (volatile store 4, addrspace 1) - G_STORE %16, %0 :: (volatile store 4, addrspace 1) - G_STORE %17, %0 :: (volatile store 4, addrspace 1) - G_STORE %18, %0 :: (volatile store 4, addrspace 1) - G_STORE %19, %0 :: (volatile store 4, addrspace 1) - G_STORE %20, %0 :: (volatile store 4, addrspace 1) - G_STORE %21, %0 :: (volatile store 4, addrspace 1) - G_STORE %22, %0 :: (volatile store 4, addrspace 1) - G_STORE %23, %0 :: (volatile store 4, addrspace 1) - G_STORE %24, %0 :: (volatile store 4, addrspace 1) + G_STORE %15, %0 :: (volatile store (s32), addrspace 1) + G_STORE %16, %0 :: (volatile store (s32), addrspace 1) + G_STORE %17, %0 :: (volatile store (s32), addrspace 1) + G_STORE %18, %0 :: (volatile store (s32), addrspace 1) + G_STORE %19, %0 :: (volatile store (s32), addrspace 1) + G_STORE %20, %0 :: (volatile store (s32), addrspace 1) + G_STORE %21, %0 :: (volatile store (s32), addrspace 1) + G_STORE %22, %0 :: (volatile store (s32), addrspace 1) + G_STORE %23, %0 :: (volatile store (s32), addrspace 1) + G_STORE %24, %0 :: (volatile store (s32), addrspace 1) ... --- @@ -234,7 +234,7 @@ body: | %4:vgpr(s32) = COPY $vgpr5 %5:vcc(s1) = G_ICMP intpred(ne), %1, %2 %6:vgpr(s32) = G_SELECT %5, %3, %4 - G_STORE %6, %0 :: (store 4, addrspace 1) + G_STORE %6, %0 :: (store (s32), addrspace 1) ... --- @@ -258,7 +258,7 @@ body: | %4:sgpr(s32) = COPY $sgpr0 %5:vcc(s1) = G_ICMP intpred(ne), %1, %4 %6:vgpr(s32) = G_SELECT %5, %2, %3 - G_STORE %6, %0 :: (store 4, addrspace 1) + G_STORE %6, %0 :: (store (s32), addrspace 1) ... --- @@ -282,7 +282,7 @@ body: | %4:sgpr(s32) = COPY $sgpr0 %5:vcc(s1) = G_ICMP intpred(ne), %4, %1 %6:vgpr(s32) = G_SELECT %5, %2, %3 - G_STORE %6, %0 :: (store 4, addrspace 1) + G_STORE %6, %0 :: (store (s32), addrspace 1) ... --- @@ -306,7 +306,7 @@ body: | %4:sgpr(s32) = COPY $sgpr0 %5:vcc(s1) = G_ICMP intpred(ne), %4, %1 %6:vgpr(s32) = G_SELECT %5, %2, %3 - G_STORE %6, %0 :: (store 4, addrspace 1) + G_STORE %6, %0 :: (store (s32), addrspace 1) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir index 5db4821336648..19fad4e2662d6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir @@ -99,10 +99,10 @@ body: | ; GCN-LABEL: name: implicit_def_p1_vgpr ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:vgpr(p1) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 - G_STORE %1, %0 :: (store 4, addrspace 1) + G_STORE %1, %0 :: (store (s32), addrspace 1) ... --- @@ -117,10 +117,10 @@ body: | ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec ; GCN: $m0 = S_MOV_B32 -1 - ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:vgpr(p3) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 - G_STORE %1, %0 :: (store 4, addrspace 1) + G_STORE %1, %0 :: (store (s32), addrspace 1) ... --- @@ -134,10 +134,10 @@ body: | ; GCN-LABEL: name: implicit_def_p4_vgpr ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) %0:vgpr(p4) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 - G_STORE %1, %0 :: (store 4, addrspace 1) + G_STORE %1, %0 :: (store (s32), addrspace 1) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir index 6321080d6becd..81fdf2fe0e78a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-inttoptr.mir @@ -20,8 +20,8 @@ body: | %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 %2:sgpr(p4) = G_INTTOPTR %0 - %3:sgpr(s32) = G_LOAD %2 :: (load 4, addrspace 1) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32), addrspace 1) %4:vgpr(p0) = G_INTTOPTR %1 %5:vgpr(s32) = COPY %3 - G_STORE %5, %4 :: (store 4, addrspace 1) + G_STORE %5, %4 :: (store (s32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir index fce8020a3c74a..cfb8ded8da2b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -17,20 +17,20 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0) + %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -49,20 +49,20 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: load_atomic_flat_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s16>)) ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0) + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -81,20 +81,20 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) ; GFX7: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) ; GFX10-LABEL: name: load_atomic_flat_p3_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst 4) + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p0) :: (load seq_cst (p3)) ; GFX10: $vgpr0 = COPY [[LOAD]](p3) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0) + %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -113,20 +113,20 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8) + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8) + ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_flat_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8) + ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -145,20 +145,20 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -177,20 +177,20 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: load_atomic_flat_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<4 x s16>)) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -209,20 +209,20 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX10-LABEL: name: load_atomic_flat_p1_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p0) :: (load seq_cst (p1)) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -241,20 +241,20 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0) ; GFX10-LABEL: name: load_atomic_flat_p0_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst 8) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p0) :: (load seq_cst (p0)) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p0) %0:vgpr(p0) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0) + %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -283,7 +283,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 @@ -298,7 +298,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 @@ -313,12 +313,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p0) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0) $vgpr0 = COPY %3 ... @@ -347,12 +347,12 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 @@ -367,12 +367,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4) + ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p0) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0) $vgpr0 = COPY %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir index fd61ca683012c..40328d32690a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -24,7 +24,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 @@ -34,25 +34,25 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1) + %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -71,30 +71,30 @@ body: | ; GFX6-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: load_atomic_global_v2s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s16>), addrspace 1) ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1) + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -113,30 +113,30 @@ body: | ; GFX6-LABEL: name: load_atomic_global_p3_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX6: $vgpr0 = COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_atomic_global_p3_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX7: $vgpr0 = COPY [[LOAD]](p3) ; GFX7-FLAT-LABEL: name: load_atomic_global_p3_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_global_p3_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) ; GFX10-LABEL: name: load_atomic_global_p3_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load seq_cst (p3), addrspace 1) ; GFX10: $vgpr0 = COPY [[LOAD]](p3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1) + %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -160,7 +160,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 @@ -170,25 +170,25 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -207,30 +207,30 @@ body: | ; GFX6-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -249,30 +249,30 @@ body: | ; GFX6-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX7-FLAT-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: load_atomic_global_v4s16_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<4 x s16>), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<4 x s16>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -291,30 +291,30 @@ body: | ; GFX6-LABEL: name: load_atomic_global_p1_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_atomic_global_p1_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX7-FLAT-LABEL: name: load_atomic_global_p1_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_global_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX10-LABEL: name: load_atomic_global_p1_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load seq_cst (p1), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -333,30 +333,30 @@ body: | ; GFX6-LABEL: name: load_atomic_global_p0_seq_cst ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p0) ; GFX7-LABEL: name: load_atomic_global_p0_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0) ; GFX7-FLAT-LABEL: name: load_atomic_global_p0_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_global_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0) ; GFX10-LABEL: name: load_atomic_global_p0_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst 8, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p1) :: (load seq_cst (p0), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p0) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1) + %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -390,7 +390,7 @@ body: | ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 @@ -410,7 +410,7 @@ body: | ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -425,22 +425,22 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 1) $vgpr0 = COPY %3 ... @@ -464,7 +464,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 @@ -474,7 +474,7 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -489,12 +489,12 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 @@ -509,12 +509,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 1) $vgpr0 = COPY %3 ... @@ -548,7 +548,7 @@ body: | ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 @@ -568,7 +568,7 @@ body: | ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -583,22 +583,22 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s64), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 8, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst (s64), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s64) = G_LOAD %2 :: (load seq_cst 8, align 8, addrspace 1) + %3:vgpr(s64) = G_LOAD %2 :: (load seq_cst (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir index d00bbcf14c9f1..7a423c045262e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir @@ -19,21 +19,21 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3) + %1:vgpr(s32) = G_LOAD %0 :: (load seq_cst (s32), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -53,21 +53,21 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3) + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_atomic_local_v2s16_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: load_atomic_local_v2s16_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s16>), addrspace 3) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3) + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s16>), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -87,21 +87,21 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3) + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) ; GFX6: $vgpr0 = COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_atomic_local_p3_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) ; GFX7: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-LABEL: name: load_atomic_local_p3_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load seq_cst (p3), addrspace 3) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 3) + %1:vgpr(p3) = G_LOAD %0 :: (load seq_cst (p3), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -121,21 +121,21 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_atomic_local_s64_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s64), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_atomic_local_s64_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 3) + ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (s64), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + %1:vgpr(s64) = G_LOAD %0 :: (load seq_cst (s64), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -155,21 +155,21 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_atomic_local_v2s32_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: load_atomic_local_v2s32_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -189,21 +189,21 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_atomic_local_v4s16_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: load_atomic_local_v4s16_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load seq_cst (<2 x s32>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -223,21 +223,21 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_atomic_local_p1_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: load_atomic_local_p1_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load seq_cst (p1), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + %1:vgpr(p1) = G_LOAD %0 :: (load seq_cst (p1), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -257,21 +257,21 @@ body: | ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p0) ; GFX7-LABEL: name: load_atomic_local_p0_seq_cst ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p0) ; GFX9-LABEL: name: load_atomic_local_p0_seq_cst ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p0) = G_LOAD [[COPY]](p3) :: (load seq_cst (p0), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p0) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 3) + %1:vgpr(p0) = G_LOAD %0 :: (load seq_cst (p0), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -293,23 +293,23 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load seq_cst (s32), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_atomic_local_s32_seq_cst_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst 4, addrspace 3) + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load seq_cst (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65535 %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst 4, align 4, addrspace 3) + %3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 3) $vgpr0 = COPY %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir index bfe865dd6cc7e..a02f29ff215e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -19,25 +19,25 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:sgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %1 ... @@ -57,25 +57,25 @@ body: | ; GFX6-LABEL: name: load_constant_v2s16_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_v2s16_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_v2s16_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_v2s16_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) $sgpr0 = COPY %1 ... @@ -94,25 +94,25 @@ body: | ; GFX6-LABEL: name: load_constant_v2s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v2s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v2s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v2s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -130,25 +130,25 @@ body: | ; GFX6-LABEL: name: load_constant_v2s32_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v2s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v2s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v2s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<2 x s32>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:sgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -166,25 +166,25 @@ body: | ; GFX6-LABEL: name: load_constant_v4s16_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v4s16_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v4s16_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v4s16_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -203,25 +203,25 @@ body: | ; GFX6-LABEL: name: load_constant_v4s32_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v4s32_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v4s32_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v4s32_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<4 x s32>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:sgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 ... @@ -240,25 +240,25 @@ body: | ; GFX6-LABEL: name: load_constant_s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -277,25 +277,25 @@ body: | ; GFX6-LABEL: name: load_constant_s64_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_s64_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_s64_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_s64_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (s64), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:sgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -314,25 +314,25 @@ body: | ; GFX6-LABEL: name: load_constant_v2s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX7-LABEL: name: load_constant_v2s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX8-LABEL: name: load_constant_v2s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] ; GFX10-LABEL: name: load_constant_v2s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<2 x s64>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:sgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 ... @@ -351,25 +351,25 @@ body: | ; GFX6-LABEL: name: load_constant_v2p1 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX7-LABEL: name: load_constant_v2p1 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_constant_v2p1 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_constant_v2p1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(<2 x p1>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<2 x p1>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:sgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 ... @@ -388,25 +388,25 @@ body: | ; GFX6-LABEL: name: load_constant_s128_align4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) ; GFX7-LABEL: name: load_constant_s128_align4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_constant_s128_align4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_constant_s128_align4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(s128) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](s128) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:sgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 ... @@ -425,32 +425,32 @@ body: | ; GFX6-LABEL: name: load_constant_p3_from_4 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_p3_from_4 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_p3_from_4 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_p3_from_4 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (p3), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:sgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 4) $sgpr0 = COPY %1 ... --- -name: load_constant_p1_from_8 +name: load_constant_p4_from_8 legalized: true regBankSelected: true tracksRegLiveness: true @@ -459,28 +459,28 @@ body: | bb.0: liveins: $sgpr0_sgpr1 - ; GFX6-LABEL: name: load_constant_p1_from_8 + ; GFX6-LABEL: name: load_constant_p4_from_8 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX7-LABEL: name: load_constant_p1_from_8 + ; GFX7-LABEL: name: load_constant_p4_from_8 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX8-LABEL: name: load_constant_p1_from_8 + ; GFX8-LABEL: name: load_constant_p4_from_8 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] - ; GFX10-LABEL: name: load_constant_p1_from_8 + ; GFX10-LABEL: name: load_constant_p4_from_8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (p4), addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:sgpr(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -499,25 +499,25 @@ body: | ; GFX6-LABEL: name: load_constant_p999_from_8 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX6: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_constant_p999_from_8 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_constant_p999_from_8 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_constant_p999_from_8 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sreg_64(p999) = G_LOAD [[COPY]](p4) :: (load (p999), addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[LOAD]](p999) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:sgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -536,25 +536,25 @@ body: | ; GFX6-LABEL: name: load_constant_v2p3 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX6: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_constant_v2p3 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_constant_v2p3 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_constant_v2p3 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sreg_64(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[LOAD]](<2 x p3>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:sgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -573,25 +573,25 @@ body: | ; GFX6-LABEL: name: load_constant_v2s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_v2s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_v2s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_v2s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (<2 x s16>), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:sgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) $sgpr0 = COPY %1 ... @@ -610,25 +610,25 @@ body: | ; GFX6-LABEL: name: load_constant_v4s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) ; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX7-LABEL: name: load_constant_v4s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) ; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX8-LABEL: name: load_constant_v4s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) ; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] ; GFX10-LABEL: name: load_constant_v4s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load (<4 x s16>), addrspace 4) ; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:sgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 4) $sgpr0_sgpr1 = COPY %1 ... @@ -647,25 +647,25 @@ body: | ; GFX6-LABEL: name: load_constant_v8s16 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX6: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX7-LABEL: name: load_constant_v8s16 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX8-LABEL: name: load_constant_v8s16 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX10-LABEL: name: load_constant_v8s16 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>) %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:sgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1 ... @@ -684,25 +684,25 @@ body: | ; GFX6-LABEL: name: load_constant_v8s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] ; GFX7-LABEL: name: load_constant_v8s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] ; GFX8-LABEL: name: load_constant_v8s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] ; GFX10-LABEL: name: load_constant_v8s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load (<8 x s32>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<8 x s32>) = G_LOAD %0 :: (load 32, align 4, addrspace 4) + %1:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1 ... @@ -721,25 +721,25 @@ body: | ; GFX6-LABEL: name: load_constant_v16s32 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; GFX7-LABEL: name: load_constant_v16s32 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; GFX8-LABEL: name: load_constant_v16s32 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; GFX10-LABEL: name: load_constant_v16s32 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<16 x s32>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<16 x s32>) = G_LOAD %0 :: (load 64, align 4, addrspace 4) + %1:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 ... @@ -758,25 +758,25 @@ body: | ; GFX6-LABEL: name: load_constant_v8s64 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; GFX7-LABEL: name: load_constant_v8s64 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; GFX8-LABEL: name: load_constant_v8s64 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] ; GFX10-LABEL: name: load_constant_v8s64 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load (<8 x s64>), align 4, addrspace 4) ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 - %1:sgpr(<8 x s64>) = G_LOAD %0 :: (load 64, align 4, addrspace 4) + %1:sgpr(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), align 4, addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 ... @@ -799,27 +799,27 @@ body: | ; GFX6-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load (s32), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1020 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load (s32), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1020 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %3 ... @@ -839,27 +839,27 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load (s32), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1024 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load (s32), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1024 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %3 ... @@ -879,29 +879,29 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load (s32), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048575 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575 - ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1048575 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %3 ... @@ -921,29 +921,29 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load (s32), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048576 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1048576 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %3 ... @@ -963,30 +963,30 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1 ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX7: liveins: $sgpr0_sgpr1 ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX8: liveins: $sgpr0_sgpr1 ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_1073741823 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823 - ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load (s32), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 1073741823 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %3 ... @@ -1013,7 +1013,7 @@ body: | ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX7: liveins: $sgpr0_sgpr1 @@ -1026,7 +1026,7 @@ body: | ; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX8: liveins: $sgpr0_sgpr1 @@ -1039,17 +1039,17 @@ body: | ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load (s32), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %3 ... @@ -1078,7 +1078,7 @@ body: | ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4) + ; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) ; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX7: liveins: $sgpr0_sgpr1 @@ -1093,7 +1093,7 @@ body: | ; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4) + ; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) ; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX8: liveins: $sgpr0_sgpr1 @@ -1108,17 +1108,17 @@ body: | ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc ; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 - ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4) + ; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load (s32), addrspace 4) ; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load 4, addrspace 4) + ; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load (s32), addrspace 4) ; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -524288 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4, align 4, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32), align 4, addrspace 4) $sgpr0 = COPY %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index df6207e0a58bc..7214cd666202d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -19,25 +19,25 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -56,25 +56,25 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_2 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; GFX8-LABEL: name: load_flat_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; GFX9-LABEL: name: load_flat_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; GFX10-LABEL: name: load_flat_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 0) + %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 0) $vgpr0 = COPY %1 ... @@ -93,25 +93,25 @@ body: | ; GFX7-LABEL: name: load_flat_s32_from_1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 0) + %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %1 ... @@ -129,22 +129,22 @@ body: | ; GFX7-LABEL: name: load_flat_v2s32 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_v2s32 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -163,25 +163,25 @@ body: | ; GFX7-LABEL: name: load_flat_v3s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; GFX8-LABEL: name: load_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; GFX9-LABEL: name: load_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; GFX10-LABEL: name: load_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -200,25 +200,25 @@ body: | ; GFX7-LABEL: name: load_flat_v4s32 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -237,25 +237,25 @@ body: | ; GFX7-LABEL: name: load_flat_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -274,25 +274,25 @@ body: | ; GFX7-LABEL: name: load_flat_v2s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -311,25 +311,25 @@ body: | ; GFX7-LABEL: name: load_flat_v2p1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_flat_v2p1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_flat_v2p1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_flat_v2p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -348,25 +348,25 @@ body: | ; GFX7-LABEL: name: load_flat_s96 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX8-LABEL: name: load_flat_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX9-LABEL: name: load_flat_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX10-LABEL: name: load_flat_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -385,25 +385,25 @@ body: | ; GFX7-LABEL: name: load_flat_s128 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -422,25 +422,25 @@ body: | ; GFX7-LABEL: name: load_flat_p3_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_p3_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_p3_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_p3_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -459,25 +459,25 @@ body: | ; GFX7-LABEL: name: load_flat_p1_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_p1_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_p1_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_p1_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -496,25 +496,25 @@ body: | ; GFX7-LABEL: name: load_flat_p999_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_flat_p999_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_flat_p999_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_flat_p999_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999)) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -532,22 +532,22 @@ body: | ; GFX7-LABEL: name: load_flat_v2p3 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_flat_v2p3 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_flat_v2p3 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_flat_v2p3 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>)) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -566,25 +566,25 @@ body: | ; GFX7-LABEL: name: load_flat_v2s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX10-LABEL: name: load_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -603,25 +603,25 @@ body: | ; GFX7-LABEL: name: load_flat_v4s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>)) ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -640,25 +640,25 @@ body: | ; GFX7-LABEL: name: load_flat_v6s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) ; GFX8-LABEL: name: load_flat_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) ; GFX9-LABEL: name: load_flat_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) ; GFX10-LABEL: name: load_flat_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -677,25 +677,25 @@ body: | ; GFX7-LABEL: name: load_flat_v8s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX8-LABEL: name: load_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX9-LABEL: name: load_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX10-LABEL: name: load_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -728,7 +728,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1 @@ -743,12 +743,12 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 @@ -763,12 +763,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -797,7 +797,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX8: liveins: $vgpr0_vgpr1 @@ -812,12 +812,12 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048 ; GFX10: liveins: $vgpr0_vgpr1 @@ -832,12 +832,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -866,7 +866,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX8: liveins: $vgpr0_vgpr1 @@ -881,7 +881,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 @@ -896,7 +896,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 @@ -911,12 +911,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -945,7 +945,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX8: liveins: $vgpr0_vgpr1 @@ -960,7 +960,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 @@ -975,7 +975,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 @@ -990,12 +990,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -1024,7 +1024,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1039,12 +1039,12 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1059,12 +1059,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -1093,7 +1093,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1108,7 +1108,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1123,7 +1123,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1138,12 +1138,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -1172,7 +1172,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1187,7 +1187,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1202,7 +1202,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1217,12 +1217,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -1251,7 +1251,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1266,7 +1266,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1281,7 +1281,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1296,12 +1296,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -1330,7 +1330,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1345,7 +1345,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1360,7 +1360,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1375,12 +1375,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -1409,7 +1409,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1424,7 +1424,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1439,7 +1439,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1454,12 +1454,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -1488,7 +1488,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1503,7 +1503,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1518,7 +1518,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1533,12 +1533,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... @@ -1567,7 +1567,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1582,7 +1582,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1597,7 +1597,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1612,12 +1612,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8)) ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 0) $vgpr0 = COPY %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir index 5841bdb5f7511..a766cb3588f16 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir @@ -16,17 +16,17 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(p1) = COPY %0 - %2:vgpr(s32) = G_LOAD %1 :: (load 4, align 4, addrspace 1) + %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %2 ... @@ -47,20 +47,20 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(p1) = COPY %0 %3:vgpr(s64) = G_ZEXT %1 %4:vgpr(p1) = G_PTR_ADD %2, %3 - %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1) + %5:vgpr(s32) = G_LOAD %4 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %5 ... @@ -81,13 +81,13 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -95,7 +95,7 @@ body: | %zero:vgpr(s32) = G_CONSTANT i32 0 %3:vgpr(s64) = G_MERGE_VALUES %1, %zero %4:vgpr(p1) = G_PTR_ADD %2, %3 - %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1) + %5:vgpr(s32) = G_LOAD %4 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %5 ... @@ -125,7 +125,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 @@ -141,7 +141,7 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -149,7 +149,7 @@ body: | %notzero:vgpr(s32) = G_CONSTANT i32 1 %3:vgpr(s64) = G_MERGE_VALUES %1, %notzero %4:vgpr(p1) = G_PTR_ADD %2, %3 - %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1) + %5:vgpr(s32) = G_LOAD %4 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %5 ... @@ -169,7 +169,7 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 @@ -195,7 +195,7 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec ; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -205,7 +205,7 @@ body: | %4:vgpr(p1) = G_PTR_ADD %2, %zext %5:vgpr(s64) = G_CONSTANT i64 4095 %6:vgpr(p1) = G_PTR_ADD %4, %5 - %7:vgpr(s32) = G_LOAD %6 :: (load 4, align 4, addrspace 1) + %7:vgpr(s32) = G_LOAD %6 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %7 ... @@ -225,7 +225,7 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 @@ -251,7 +251,7 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec ; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -261,7 +261,7 @@ body: | %4:vgpr(p1) = G_PTR_ADD %2, %zext %5:vgpr(s64) = G_CONSTANT i64 -4096 %6:vgpr(p1) = G_PTR_ADD %4, %5 - %7:vgpr(s32) = G_LOAD %6 :: (load 4, align 4, addrspace 1) + %7:vgpr(s32) = G_LOAD %6 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %7 ... @@ -279,19 +279,19 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4096 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -310,19 +310,19 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4097 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -351,7 +351,7 @@ body: | ; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097 ; GFX10: liveins: $sgpr0_sgpr1 @@ -367,13 +367,13 @@ body: | ; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4097 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -392,19 +392,19 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec - ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 2049 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -423,7 +423,7 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049 ; GFX10: liveins: $sgpr0_sgpr1 @@ -439,13 +439,13 @@ body: | ; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -2049 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -463,19 +463,19 @@ body: | ; GFX9: liveins: $sgpr0_sgpr1 ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295 ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec - ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294967295 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -503,7 +503,7 @@ body: | ; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296 ; GFX10: liveins: $sgpr0_sgpr1 @@ -519,13 +519,13 @@ body: | ; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294967296 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -554,7 +554,7 @@ body: | ; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390 ; GFX10: liveins: $sgpr0_sgpr1 @@ -570,13 +570,13 @@ body: | ; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4294971390 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -605,7 +605,7 @@ body: | ; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295 ; GFX10: liveins: $sgpr0_sgpr1 @@ -621,13 +621,13 @@ body: | ; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4294967295 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -655,7 +655,7 @@ body: | ; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296 ; GFX10: liveins: $sgpr0_sgpr1 @@ -671,13 +671,13 @@ body: | ; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 ; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -4294967296 %2:sgpr(p1) = G_PTR_ADD %0, %1 %3:vgpr(p1) = COPY %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 4, align 4, addrspace 1) + %4:vgpr(s32) = G_LOAD %3 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %4 ... @@ -693,16 +693,16 @@ body: | ; GFX9-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX9: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_copy_undef_sgpr ; GFX10: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:sgpr(p1) = G_IMPLICIT_DEF %1:vgpr(p1) = COPY %0 - %2:vgpr(s32) = G_LOAD %1 :: (load 4, align 4, addrspace 1) + %2:vgpr(s32) = G_LOAD %1 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %2 ... @@ -717,14 +717,14 @@ body: | bb.0: ; GFX9-LABEL: name: load_global_s32_from_undef_vgpr ; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_undef_vgpr ; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = G_IMPLICIT_DEF - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir index fc992c3284fc3..c5c60a47d23d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -27,7 +27,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_4 ; GFX7: liveins: $vgpr0_vgpr1 @@ -37,30 +37,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_s32_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_s32_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_s32_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -84,7 +84,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1) + ; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_2 ; GFX7: liveins: $vgpr0_vgpr1 @@ -94,30 +94,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1) + ; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; GFX8-LABEL: name: load_global_s32_from_2 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1) + ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s16), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] ; GFX9-LABEL: name: load_global_s32_from_2 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] ; GFX10-LABEL: name: load_global_s32_from_2 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 1) + %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) $vgpr0 = COPY %1 ... @@ -141,7 +141,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1 ; GFX7: liveins: $vgpr0_vgpr1 @@ -151,30 +151,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 1) + %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %1 ... @@ -198,7 +198,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-LABEL: name: load_global_v2s32 ; GFX7: liveins: $vgpr0_vgpr1 @@ -208,30 +208,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32>), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -255,7 +255,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; GFX7-LABEL: name: load_global_v4s32 ; GFX7: liveins: $vgpr0_vgpr1 @@ -265,30 +265,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32>), align 4, addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -307,35 +307,35 @@ body: | ; GFX6-LABEL: name: load_global_s64 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_global_s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -354,35 +354,35 @@ body: | ; GFX6-LABEL: name: load_global_v2s64 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX7-LABEL: name: load_global_v2s64 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX7-FLAT-LABEL: name: load_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX8-LABEL: name: load_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s64>), align 4, addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] ; GFX9-LABEL: name: load_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] ; GFX10-LABEL: name: load_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s64>), align 4, addrspace 1) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -401,35 +401,35 @@ body: | ; GFX6-LABEL: name: load_global_v2p1 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX7-LABEL: name: load_global_v2p1 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX7-FLAT-LABEL: name: load_global_v2p1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX8-LABEL: name: load_global_v2p1 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_global_v2p1 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX10-LABEL: name: load_global_v2p1 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -448,35 +448,35 @@ body: | ; GFX6-LABEL: name: load_global_s128 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX7-LABEL: name: load_global_s128 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX7-FLAT-LABEL: name: load_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX8-LABEL: name: load_global_s128 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_global_s128 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX10-LABEL: name: load_global_s128 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -495,35 +495,35 @@ body: | ; GFX6-LABEL: name: load_global_p3_from_4 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; GFX6: $vgpr0 = COPY [[LOAD]](p3) ; GFX7-LABEL: name: load_global_p3_from_4 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX7-FLAT-LABEL: name: load_global_p3_from_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_p3_from_4 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p3), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_p3_from_4 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_p3_from_4 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -542,35 +542,35 @@ body: | ; GFX6-LABEL: name: load_global_p1_from_8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_global_p1_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_p1_from_8 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_p1_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (p1), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_p1_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_p1_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -589,35 +589,35 @@ body: | ; GFX6-LABEL: name: load_global_p999_from_8 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_global_p999_from_8 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX7-FLAT-LABEL: name: load_global_p999_from_8 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX8-LABEL: name: load_global_p999_from_8 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_global_p999_from_8 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX10-LABEL: name: load_global_p999_from_8 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load (p999), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -636,35 +636,35 @@ body: | ; GFX6-LABEL: name: load_global_v2p3 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_global_v2p3 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX7-FLAT-LABEL: name: load_global_v2p3 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX8-LABEL: name: load_global_v2p3 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_global_v2p3 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: load_global_v2p3 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -683,35 +683,35 @@ body: | ; GFX6-LABEL: name: load_global_v2s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX7-LABEL: name: load_global_v2s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX7-FLAT-LABEL: name: load_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX8-LABEL: name: load_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s16>), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] ; GFX9-LABEL: name: load_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] ; GFX10-LABEL: name: load_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -730,35 +730,35 @@ body: | ; GFX6-LABEL: name: load_global_v4s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX7-LABEL: name: load_global_v4s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX7-FLAT-LABEL: name: load_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX8-LABEL: name: load_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s16>), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; GFX9-LABEL: name: load_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; GFX10-LABEL: name: load_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 1) ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -777,35 +777,35 @@ body: | ; GFX6-LABEL: name: load_global_v8s16 ; GFX6: liveins: $vgpr0_vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX7-LABEL: name: load_global_v8s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX7-FLAT-LABEL: name: load_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX8-LABEL: name: load_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX9-LABEL: name: load_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX10-LABEL: name: load_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -833,7 +833,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1 @@ -843,7 +843,7 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -858,7 +858,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1 @@ -873,22 +873,22 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -912,7 +912,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX7: liveins: $vgpr0_vgpr1 @@ -922,7 +922,7 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -937,7 +937,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX8: liveins: $vgpr0_vgpr1 @@ -952,12 +952,12 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048 ; GFX10: liveins: $vgpr0_vgpr1 @@ -972,12 +972,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1011,7 +1011,7 @@ body: | ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1031,7 +1031,7 @@ body: | ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1046,7 +1046,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1061,22 +1061,22 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2047 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1110,7 +1110,7 @@ body: | ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1130,7 +1130,7 @@ body: | ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1145,7 +1145,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1160,22 +1160,22 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -2048 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1199,7 +1199,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1209,7 +1209,7 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1224,7 +1224,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1239,12 +1239,12 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1259,12 +1259,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1289,7 +1289,7 @@ body: | ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1300,7 +1300,7 @@ body: | ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1315,7 +1315,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1330,7 +1330,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1345,7 +1345,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1360,12 +1360,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1399,7 +1399,7 @@ body: | ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1419,7 +1419,7 @@ body: | ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1434,7 +1434,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1449,12 +1449,12 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1469,12 +1469,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4095 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1508,7 +1508,7 @@ body: | ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1528,7 +1528,7 @@ body: | ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1543,7 +1543,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1558,12 +1558,12 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1578,12 +1578,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -4096 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1608,7 +1608,7 @@ body: | ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1619,7 +1619,7 @@ body: | ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1634,7 +1634,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1649,7 +1649,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1664,7 +1664,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1679,12 +1679,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1709,7 +1709,7 @@ body: | ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1720,7 +1720,7 @@ body: | ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1735,7 +1735,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1750,7 +1750,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1765,7 +1765,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1780,12 +1780,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1819,7 +1819,7 @@ body: | ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1839,7 +1839,7 @@ body: | ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1854,7 +1854,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1869,7 +1869,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0_vgpr1 @@ -1884,7 +1884,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 ; GFX10: liveins: $vgpr0_vgpr1 @@ -1899,12 +1899,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8191 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... @@ -1938,7 +1938,7 @@ body: | ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX7: liveins: $vgpr0_vgpr1 @@ -1958,7 +1958,7 @@ body: | ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 @@ -1973,7 +1973,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX8: liveins: $vgpr0_vgpr1 @@ -1988,7 +1988,7 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0_vgpr1 @@ -2003,7 +2003,7 @@ body: | ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 ; GFX10: liveins: $vgpr0_vgpr1 @@ -2018,12 +2018,12 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load (s8), addrspace 1) ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -8192 %2:vgpr(p1) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 1) $vgpr0 = COPY %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir index 880961bd006be..97e47f7ad2bee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.s96.mir @@ -24,30 +24,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1) + ; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]] ; GFX7-FLAT-LABEL: name: load_global_v3s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1) + ; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; GFX8-LABEL: name: load_global_v3s32 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1) + ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<3 x s32>), align 4, addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] ; GFX9-LABEL: name: load_global_v3s32 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load 12, align 4, addrspace 1) + ; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] ; GFX10-LABEL: name: load_global_v3s32 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load 12, align 4, addrspace 1) + ; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load (<3 x s32>), align 4, addrspace 1) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -66,30 +66,30 @@ body: | ; GFX7-LABEL: name: load_global_s96 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX7-FLAT-LABEL: name: load_global_s96 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX8-LABEL: name: load_global_s96 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX9-LABEL: name: load_global_s96 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) ; GFX10-LABEL: name: load_global_s96 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + %1:vgpr(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -108,30 +108,30 @@ body: | ; GFX7-LABEL: name: load_global_v6s16 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) ; GFX7-FLAT-LABEL: name: load_global_v6s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX7-FLAT: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) ; GFX8-LABEL: name: load_global_v6s16 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) ; GFX9-LABEL: name: load_global_v6s16 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) ; GFX10-LABEL: name: load_global_v6s16 ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir index 6f3b2d4e514af..d8f3c8f23c38b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir @@ -18,15 +18,15 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 16, addrspace 3) + ; GFX7: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]] ; GFX9-LABEL: name: load_local_v4s32_align16 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 16, addrspace 3) + ; GFX9: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3) + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -46,15 +46,15 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 16, align 8, addrspace 3) + ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 16, align 8, addrspace 3) + ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 3) + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -74,17 +74,17 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load 16, align 8, addrspace 3) + ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_160 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load 16, align 8, addrspace 3) + ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 400 %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(<4 x s32>) = G_LOAD %2 :: (load 16, align 8, addrspace 3) + %3:vgpr(<4 x s32>) = G_LOAD %2 :: (load (<4 x s32>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 ... @@ -106,19 +106,19 @@ body: | ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load 16, align 8, addrspace 3) + ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_320 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load 16, align 8, addrspace 3) + ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4000 %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(<4 x s32>) = G_LOAD %2 :: (load 16, align 8, addrspace 3) + %3:vgpr(<4 x s32>) = G_LOAD %2 :: (load (<4 x s32>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 ... @@ -138,15 +138,15 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 16, align 8, addrspace 3) + ; GFX7: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]] ; GFX9-LABEL: name: load_local_v2s64 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 16, align 8, addrspace 3) + ; GFX9: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 8, addrspace 3) + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -166,15 +166,15 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) ; GFX9-LABEL: name: load_local_v2p1 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 8, addrspace 3) + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -194,15 +194,15 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; GFX9-LABEL: name: load_local_s128 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 8, addrspace 3) + %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -222,15 +222,15 @@ body: | ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) ; GFX9-LABEL: name: load_local_v8s16 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 8, addrspace 3) + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir index 45d74ad38e532..654cc2ba2d51d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -16,25 +16,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s32_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_s32_from_4 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_s32_from_4 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -50,25 +50,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s32_from_2 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U16_]] ; GFX7-LABEL: name: load_local_s32_from_2 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3) + ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U16_]] ; GFX9-LABEL: name: load_local_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 3) + ; GFX9: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] - ; GFX6-LABEL: name: load_local_s32_from_2 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_U16_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 3) + %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 3) $vgpr0 = COPY %1 ... @@ -87,25 +87,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s32_from_1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] - ; GFX6-LABEL: name: load_local_s32_from_1 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 3) + %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %1 ... @@ -121,25 +121,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_v2s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_v2s32 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_v2s32 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 3) + ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] - ; GFX6-LABEL: name: load_local_v2s32 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -155,25 +155,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_v2s32_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX7-LABEL: name: load_local_v2s32_align4 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_v2s32_align4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s32>), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_v2s32_align4 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -189,25 +189,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s64 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_s64 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s64), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_s64 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 3) + ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] - ; GFX6-LABEL: name: load_local_s64 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -223,25 +223,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s64_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_s64_align4 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + %1:vgpr(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -257,25 +257,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_p3_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_p3_from_4 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p3), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p3), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_p3_from_4 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -291,25 +291,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_p5_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_p5_from_4 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p5), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p5), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_p5_from_4 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -325,25 +325,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_p1_align8 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_p1_align8 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (p1), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_p1_align8 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 3) + ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (p1), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] - ; GFX6-LABEL: name: load_local_p1_align8 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -359,25 +359,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_p1_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX7-LABEL: name: load_local_p1_align4 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (p1), align 4, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_p1_align4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (p1), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_p1_align4 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + %1:vgpr(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -393,25 +393,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_p999_from_8 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX7-LABEL: name: load_local_p999_from_8 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) ; GFX9-LABEL: name: load_local_p999_from_8 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load (p999), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) - ; GFX6-LABEL: name: load_local_p999_from_8 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:vgpr(p999) = G_LOAD %0 :: (load (p999), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -427,25 +427,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_v2p3 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX7-LABEL: name: load_local_v2p3 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: load_local_v2p3 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) - ; GFX6-LABEL: name: load_local_v2p3 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -461,25 +461,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_v2s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX7-LABEL: name: load_local_v2s16 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<2 x s16>), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] ; GFX9-LABEL: name: load_local_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_v2s16 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -495,25 +495,25 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_v4s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX7-LABEL: name: load_local_v4s16 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s16>), addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] ; GFX9-LABEL: name: load_local_v4s16 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 3) + ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s16>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] - ; GFX6-LABEL: name: load_local_v4s16 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] %0:vgpr(p3) = COPY $vgpr0 - %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -533,7 +533,7 @@ body: | # liveins: $vgpr0 # %0:vgpr(p3) = COPY $vgpr0 -# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 3) +# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 3) # $vgpr0_vgpr1_vgpr2 = COPY %1 # ... @@ -553,29 +553,29 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] - ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65535 %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %3 ... @@ -591,36 +591,36 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX7: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX9: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load (s8), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] - ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec - ; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2147483647 %2:vgpr(s32) = G_AND %0, %1 %3:vgpr(p3) = G_INTTOPTR %2 %4:vgpr(s32) = G_CONSTANT i32 65535 %5:vgpr(p3) = G_PTR_ADD %3, %4 - %6:vgpr(s32) = G_LOAD %5 :: (load 1, align 1, addrspace 3) + %6:vgpr(s32) = G_LOAD %5 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %6 ... @@ -636,33 +636,33 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] - ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 65536 %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %3 ... @@ -678,33 +678,33 @@ body: | bb.0: liveins: $vgpr0 + ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX7: liveins: $vgpr0 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3) ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load (s8), addrspace 3) ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] - ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1 - ; GFX6: liveins: $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) - ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -1 %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 3) $vgpr0 = COPY %3 ... @@ -720,29 +720,29 @@ body: | bb.0: liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1016 + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016 + ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load (s64), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1016 - ; GFX6: liveins: $vgpr0_vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016 - ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 1016 %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s64) = G_LOAD %2 :: (load 8, align 4, addrspace 3) + %3:vgpr(s64) = G_LOAD %2 :: (load (s64), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %3 ... @@ -758,33 +758,33 @@ body: | bb.0: liveins: $vgpr0_vgpr1 + ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1020 + ; GFX6: liveins: $vgpr0_vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020 + ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64), align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load (s64), align 4, addrspace 3) ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]] ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3) + ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (s64), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]] - ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1020 - ; GFX6: liveins: $vgpr0_vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020 - ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3) - ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 1020 %2:vgpr(p3) = G_PTR_ADD %0, %1 - %3:vgpr(s64) = G_LOAD %2 :: (load 8, align 4, addrspace 3) + %3:vgpr(s64) = G_LOAD %2 :: (load (s64), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index 29500933890e8..2ac7eb044bba5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -20,15 +20,15 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_4 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -50,15 +50,15 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_2 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5) + ; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_2 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5) + ; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s16), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s16), align 2, addrspace 5) $vgpr0 = COPY %1 ... @@ -80,15 +80,15 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 ... @@ -110,15 +110,15 @@ body: | ; GFX6-LABEL: name: load_private_p3_from_4 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_p3_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p3), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:vgpr(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -140,15 +140,15 @@ body: | ; GFX6-LABEL: name: load_private_p5_from_4 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_p5_from_4 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (p5), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:vgpr(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -171,15 +171,15 @@ body: | ; GFX6-LABEL: name: load_private_v2s16 ; GFX6: liveins: $vgpr0 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_v2s16 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (<2 x s16>), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 - %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -207,17 +207,17 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2047 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -241,14 +241,14 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec ; GFX9: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2147483647 @@ -256,7 +256,7 @@ body: | %3:vgpr(p5) = G_INTTOPTR %2 %4:vgpr(s32) = G_CONSTANT i32 2047 %5:vgpr(p5) = G_PTR_ADD %3, %4 - %6:vgpr(s32) = G_LOAD %5 :: (load 1, align 1, addrspace 5) + %6:vgpr(s32) = G_LOAD %5 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %6 ... @@ -280,17 +280,17 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 2048 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -314,19 +314,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -2047 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -350,19 +350,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -2048 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -386,17 +386,17 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -420,19 +420,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -456,19 +456,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -492,19 +492,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -528,19 +528,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 8191 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -564,19 +564,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 8192 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -600,19 +600,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -8191 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -636,19 +636,19 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(s32) = G_CONSTANT i32 -8192 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -667,13 +667,13 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_4_constant_0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 - ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] %0:vgpr(p5) = G_CONSTANT i32 0 - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -692,13 +692,13 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 - ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] %0:sgpr(p5) = G_CONSTANT i32 16 - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -717,13 +717,13 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095 - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] %0:vgpr(p5) = G_CONSTANT i32 4095 - %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 ... @@ -743,14 +743,14 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = G_CONSTANT i32 4096 - %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %1 ... @@ -771,13 +771,13 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_fi - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_fi - ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -797,15 +797,15 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -826,16 +826,16 @@ body: | bb.0: ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:sgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(s32) = COPY %1 %3:vgpr(p5) = G_PTR_ADD %0, %2 - %4:vgpr(s32) = G_LOAD %3 :: (load 1, align 1, addrspace 5) + %4:vgpr(s32) = G_LOAD %3 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %4 ... @@ -859,18 +859,18 @@ body: | ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec - ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load (s8), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4096 %2:vgpr(p5) = G_PTR_ADD %0, %1 - %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + %3:vgpr(s32) = G_LOAD %2 :: (load (s8), align 1, addrspace 5) $vgpr0 = COPY %3 ... @@ -891,14 +891,14 @@ body: | ; GFX6-LABEL: name: load_private_s32_from_neg1 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX9-LABEL: name: load_private_s32_from_neg1 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] %0:vgpr(p5) = G_CONSTANT i32 -1 - %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:vgpr(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index 2c41c3a858ab6..03a4c8a6fadce 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -103,56 +103,56 @@ body: | %1:sgpr(s64) = G_CONSTANT i64 4 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %3 %4:sgpr(s64) = G_CONSTANT i64 1020 %5:sgpr(p4) = G_PTR_ADD %0, %4 - %6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0, addrspace 4) + %6:sgpr(s32) = G_LOAD %5 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %6 %7:sgpr(s64) = G_CONSTANT i64 1024 %8:sgpr(p4) = G_PTR_ADD %0, %7 - %9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0, addrspace 4) + %9:sgpr(s32) = G_LOAD %8 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %9 %10:sgpr(s64) = G_CONSTANT i64 1048572 %11:sgpr(p4) = G_PTR_ADD %0, %10 - %12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0, addrspace 4) + %12:sgpr(s32) = G_LOAD %11 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %12 %13:sgpr(s64) = G_CONSTANT i64 1048576 %14:sgpr(p4) = G_PTR_ADD %0, %13 - %15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0, addrspace 4) + %15:sgpr(s32) = G_LOAD %14 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %15 %16:sgpr(s64) = G_CONSTANT i64 17179869180 %17:sgpr(p4) = G_PTR_ADD %0, %16 - %18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0, addrspace 4) + %18:sgpr(s32) = G_LOAD %17 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %18 %19:sgpr(s64) = G_CONSTANT i64 17179869184 %20:sgpr(p4) = G_PTR_ADD %0, %19 - %21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0, addrspace 4) + %21:sgpr(s32) = G_LOAD %20 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %21 %22:sgpr(s64) = G_CONSTANT i64 4294967292 %23:sgpr(p4) = G_PTR_ADD %0, %22 - %24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0, addrspace 4) + %24:sgpr(s32) = G_LOAD %23 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %24 %25:sgpr(s64) = G_CONSTANT i64 4294967296 %26:sgpr(p4) = G_PTR_ADD %0, %25 - %27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0, addrspace 4) + %27:sgpr(s32) = G_LOAD %26 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %27 - %28:sgpr(p0) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) + %28:sgpr(p0) = G_LOAD %0 :: (load (p0) from %ir.const0, addrspace 4) $sgpr0_sgpr1 = COPY %28 - %29:sgpr(p1) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) + %29:sgpr(p1) = G_LOAD %0 :: (load (p1) from %ir.const0, addrspace 4) $sgpr0_sgpr1 = COPY %29 - %30:sgpr(p4) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4) + %30:sgpr(p4) = G_LOAD %0 :: (load (p4) from %ir.const0, addrspace 4) $sgpr0_sgpr1 = COPY %30 ... @@ -171,19 +171,19 @@ body: | ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]] - %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) + %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 ; CHECK: s_load_dwordx16 [[CONSTANT_PTR]] - %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4) + %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3 ; CHECK: s_load_dwordx8 [[GLOBAL_PTR]] - %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load 32, addrspace 1) + %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load (<8 x s32>), addrspace 1) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 ; CHECK s_load_dwordx16 [[GLOBAL_PTR]] - %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load 64, addrspace 1) + %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load (<16 x s32>), addrspace 1) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5 ... @@ -192,8 +192,8 @@ body: | # GCN-LABEL: name: constant_address_positive{{$}} # GCN: %0:sreg_64 = S_MOV_B64 44 -# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load 4, addrspace 4) -# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load 4, addrspace 4) +# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4) +# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4) --- @@ -207,6 +207,6 @@ body: | %0:sgpr(p4) = G_CONSTANT i64 44 %1:sgpr(s64) = G_CONSTANT i64 64 %2:sgpr(p4) = G_PTR_ADD %0, %1 - %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load 4, align 4, addrspace 4) + %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4) S_ENDPGM 0, implicit %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir index b31579d729a99..6aa5e97e945ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -18,16 +18,16 @@ body: | ; WAVE64: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; WAVE64: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; WAVE64: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; WAVE32-LABEL: name: sitofp ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; WAVE32: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; WAVE32: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1) - ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -40,8 +40,8 @@ body: | ; sitofp v %4:vgpr(s32) = G_SITOFP %1 - G_STORE %3, %2 :: (store 4, addrspace 1) - G_STORE %4, %2 :: (store 4, addrspace 1) + G_STORE %3, %2 :: (store (s32), addrspace 1) + G_STORE %4, %2 :: (store (s32), addrspace 1) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir index dd620a447f147..0c55dd5333e9b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir @@ -18,15 +18,15 @@ body: | ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4) + ; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) ; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 - ; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4) + ; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (s32), align 4, addrspace 0) ... @@ -45,15 +45,15 @@ body: | ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX7: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX7: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst (<2 x s16>)) ; GFX9-LABEL: name: atomic_store_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst (<2 x s16>)) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (<2 x s16>), align 4, addrspace 0) ... @@ -72,15 +72,15 @@ body: | ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX7: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX7: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst (p3)) ; GFX9-LABEL: name: atomic_store_flat_p3_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst (p3)) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (p3), align 4, addrspace 0) ... @@ -99,15 +99,15 @@ body: | ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX7: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX7: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst (p5)) ; GFX9-LABEL: name: atomic_store_flat_p5_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst (p5)) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (p5), align 4, addrspace 0) ... @@ -126,15 +126,15 @@ body: | ; GFX7: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX7: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX7: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst (p6)) ; GFX9-LABEL: name: atomic_store_flat_p6_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 - ; GFX9: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst 4) + ; GFX9: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst (p6)) %0:vgpr(p6) = COPY $vgpr0 %1:vgpr(p0) = COPY $vgpr1_vgpr2 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (p6), align 4, addrspace 0) ... @@ -153,15 +153,15 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) ; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8) + ; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (s64), align 8, addrspace 0) ... @@ -180,15 +180,15 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>)) ; GFX9-LABEL: name: atomic_store_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>)) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (<2 x s32>), align 8, addrspace 0) ... @@ -207,15 +207,15 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst (<4 x s16>)) ; GFX9-LABEL: name: atomic_store_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst (<4 x s16>)) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (<4 x s16>), align 8, addrspace 0) ... @@ -234,15 +234,15 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX7: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX7: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst (p0)) ; GFX9-LABEL: name: atomic_store_flat_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX9: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst (p0)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (p0), align 8, addrspace 0) ... --- @@ -260,14 +260,14 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst (p1)) ; GFX9-LABEL: name: atomic_store_flat_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst 8) + ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst (p1)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0) + G_STORE %0, %1 :: (store seq_cst (p1), align 8, addrspace 0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir index 6f3010f3faf80..9c301b2bcbb40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-local.mir @@ -20,21 +20,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 4, addrspace 3) + ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_s32_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 4, addrspace 3) + ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s32), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst 4, addrspace 3) + ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (s32), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (s32), align 4, addrspace 3) ... @@ -54,21 +54,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst (<2 x s16>), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_v2s16_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX7: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst (<2 x s16>), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_v2s16_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 - ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p3) :: (store seq_cst (<2 x s16>), addrspace 3) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (<2 x s16>), align 4, addrspace 3) ... @@ -88,21 +88,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst (p3), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_p3_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX7: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst (p3), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_p3_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 - ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p3) :: (store seq_cst (p3), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (p3), align 4, addrspace 3) ... @@ -122,21 +122,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst (p5), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_p5_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX7: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst (p5), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_p5_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 - ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p3) :: (store seq_cst (p5), addrspace 3) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (p5), align 4, addrspace 3) ... @@ -156,21 +156,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX6: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst (p6), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_p6_seq_cst ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX7: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst (p6), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_p6_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr1 - ; GFX9: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst 4, addrspace 3) + ; GFX9: G_STORE [[COPY]](p6), [[COPY1]](p3) :: (store seq_cst (p6), addrspace 3) %0:vgpr(p6) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (p6), align 4, addrspace 3) ... @@ -190,21 +190,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 8, addrspace 3) + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_s64_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst 8, addrspace 3) + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store seq_cst (s64), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst 8, addrspace 3) + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store seq_cst (s64), addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (s64), align 8, addrspace 3) ... @@ -224,21 +224,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst (<2 x s32>), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst (<2 x s32>), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store seq_cst (<2 x s32>), addrspace 3) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (<2 x s32>), align 8, addrspace 3) ... @@ -258,21 +258,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst (<4 x s16>), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_v4s16_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst (<4 x s16>), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store seq_cst (<4 x s16>), addrspace 3) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (<4 x s16>), align 8, addrspace 3) ... @@ -292,21 +292,21 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX6: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst (p0), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_p0_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX7: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst (p0), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX9: G_STORE [[COPY]](p0), [[COPY1]](p3) :: (store seq_cst (p0), addrspace 3) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (p0), align 8, addrspace 3) ... --- @@ -325,20 +325,20 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst (p1), addrspace 3) ; GFX7-LABEL: name: atomic_store_local_p1_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst (p1), addrspace 3) ; GFX9-LABEL: name: atomic_store_local_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst 8, addrspace 3) + ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store seq_cst (p1), addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store seq_cst (p1), align 8, addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index b4e7203e8ec31..9e8a79bf85ea3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -19,25 +19,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX8-LABEL: name: store_flat_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX9-LABEL: name: store_flat_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX10-LABEL: name: store_flat_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 4, addrspace 0) + G_STORE %1, %0 :: (store (s32), align 4, addrspace 0) ... @@ -55,25 +55,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2) + ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX8-LABEL: name: store_flat_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2) + ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX9-LABEL: name: store_flat_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2) + ; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) ; GFX10-LABEL: name: store_flat_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2) + ; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 2, align 2, addrspace 0) + G_STORE %1, %0 :: (store (s16), align 2, addrspace 0) ... @@ -91,25 +91,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1) + ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX8-LABEL: name: store_flat_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1) + ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX9-LABEL: name: store_flat_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1) + ; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) ; GFX10-LABEL: name: store_flat_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1) + ; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 1, align 1, addrspace 0) + G_STORE %1, %0 :: (store (s8), align 1, addrspace 0) ... @@ -128,25 +128,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX8-LABEL: name: store_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX9-LABEL: name: store_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; GFX10-LABEL: name: store_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + G_STORE %1, %0 :: (store (s64), align 8, addrspace 0) ... --- @@ -164,25 +164,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16) + ; GFX7: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) ; GFX8-LABEL: name: store_flat_s96 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16) + ; GFX8: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) ; GFX9-LABEL: name: store_flat_s96 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16) + ; GFX9: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) ; GFX10-LABEL: name: store_flat_s96 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16) + ; GFX10: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 16, addrspace 0) + G_STORE %1, %0 :: (store (s96), align 16, addrspace 0) ... --- @@ -200,25 +200,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16) + ; GFX7: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) ; GFX8-LABEL: name: store_flat_s128 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16) + ; GFX8: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) ; GFX9-LABEL: name: store_flat_s128 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16) + ; GFX9: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) ; GFX10-LABEL: name: store_flat_s128 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16) + ; GFX10: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + G_STORE %1, %0 :: (store (s128), align 16, addrspace 0) ... @@ -237,25 +237,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX8-LABEL: name: store_flat_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX9-LABEL: name: store_flat_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) ; GFX10-LABEL: name: store_flat_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 0) ... --- @@ -273,25 +273,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16) + ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX8-LABEL: name: store_flat_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16) + ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX9-LABEL: name: store_flat_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16) + ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) ; GFX10-LABEL: name: store_flat_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16) + ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 16, addrspace 0) + G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 0) ... --- @@ -309,25 +309,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX8-LABEL: name: store_flat_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX9-LABEL: name: store_flat_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) ; GFX10-LABEL: name: store_flat_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 0) ... @@ -346,25 +346,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX8-LABEL: name: store_flat_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX9-LABEL: name: store_flat_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) ; GFX10-LABEL: name: store_flat_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 4, addrspace 0) + G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 0) ... @@ -383,25 +383,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX8-LABEL: name: store_flat_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX9-LABEL: name: store_flat_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) ; GFX10-LABEL: name: store_flat_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 0) ... @@ -420,25 +420,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16) + ; GFX7: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) ; GFX8-LABEL: name: store_flat_v6s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16) + ; GFX8: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) ; GFX9-LABEL: name: store_flat_v6s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16) + ; GFX9: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) ; GFX10-LABEL: name: store_flat_v6s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16) + ; GFX10: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 16, addrspace 0) + G_STORE %1, %0 :: (store (<6 x s16>), align 16, addrspace 0) ... --- @@ -456,25 +456,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16) + ; GFX7: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>)) ; GFX8-LABEL: name: store_flat_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16) + ; GFX8: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>)) ; GFX9-LABEL: name: store_flat_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16) + ; GFX9: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>)) ; GFX10-LABEL: name: store_flat_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16) + ; GFX10: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 0) ... @@ -493,25 +493,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX8-LABEL: name: store_flat_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX9-LABEL: name: store_flat_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) ; GFX10-LABEL: name: store_flat_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 0) ... @@ -530,25 +530,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX8-LABEL: name: store_flat_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX9-LABEL: name: store_flat_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) ; GFX10-LABEL: name: store_flat_p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + G_STORE %1, %0 :: (store (p1), align 8, addrspace 0) ... @@ -567,25 +567,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16) + ; GFX7: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) ; GFX8-LABEL: name: store_flat_v2p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16) + ; GFX8: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) ; GFX9-LABEL: name: store_flat_v2p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16) + ; GFX9: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) ; GFX10-LABEL: name: store_flat_v2p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16) + ; GFX10: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + G_STORE %1, %0 :: (store (<2 x p1>), align 16, addrspace 0) ... @@ -604,25 +604,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX8-LABEL: name: store_flat_p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX9-LABEL: name: store_flat_p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) ; GFX10-LABEL: name: store_flat_p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 4, addrspace 0) + G_STORE %1, %0 :: (store (p3), align 4, addrspace 0) ... @@ -641,25 +641,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX7: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8) + ; GFX7: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) ; GFX8-LABEL: name: store_flat_v2p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX8: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8) + ; GFX8: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) ; GFX9-LABEL: name: store_flat_v2p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8) + ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) ; GFX10-LABEL: name: store_flat_v2p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX10: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8) + ; GFX10: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 0) ... --- @@ -677,25 +677,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4) + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX8-LABEL: name: store_atomic_flat_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4) + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX9-LABEL: name: store_atomic_flat_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4) + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) ; GFX10-LABEL: name: store_atomic_flat_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4) + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 0) + G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 0) ... @@ -714,25 +714,25 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX8-LABEL: name: store_atomic_flat_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX9-LABEL: name: store_atomic_flat_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8) + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) ; GFX10-LABEL: name: store_atomic_flat_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8) + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 0) + G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 0) ... @@ -761,7 +761,7 @@ body: | ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX8-LABEL: name: store_flat_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -776,12 +776,12 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX9-LABEL: name: store_flat_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store (s32)) ; GFX10-LABEL: name: store_flat_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -796,11 +796,11 @@ body: | ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store 4, align 4, addrspace 0) + G_STORE %1, %3 :: (store (s32), align 4, addrspace 0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir index 55a73a957aabd..70cfa06afb771 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -26,7 +26,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -36,30 +36,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_4 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_4 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_4 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 4, addrspace 1) + G_STORE %1, %0 :: (store (s32), align 4, addrspace 1) ... @@ -82,7 +82,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1) + ; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_2 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -92,30 +92,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1) + ; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_2 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_2 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1) + ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_2 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 2, addrspace 1) + ; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_2 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 2, addrspace 1) + ; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s16), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 2, align 2, addrspace 1) + G_STORE %1, %0 :: (store (s16), align 2, addrspace 1) ... @@ -138,7 +138,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1) + ; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX7-LABEL: name: store_global_s32_to_1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -148,30 +148,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1) + ; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_to_1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) ; GFX8-LABEL: name: store_global_s32_to_1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1) + ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s8), addrspace 1) ; GFX9-LABEL: name: store_global_s32_to_1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 1, addrspace 1) + ; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) ; GFX10-LABEL: name: store_global_s32_to_1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 1, addrspace 1) + ; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s8), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 1, align 1, addrspace 1) + G_STORE %1, %0 :: (store (s8), align 1, addrspace 1) ... @@ -190,35 +190,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX6: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX6: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) ; GFX7-LABEL: name: store_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX8-LABEL: name: store_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64), addrspace 1) ; GFX9-LABEL: name: store_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; GFX10-LABEL: name: store_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (s64), align 8, addrspace 1) ... --- @@ -236,35 +236,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX6: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX6: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX7-LABEL: name: store_global_s128 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX7: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s128 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX7-FLAT: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX8-LABEL: name: store_global_s128 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX8: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9-LABEL: name: store_global_s128 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX10-LABEL: name: store_global_s128 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX10: G_STORE [[COPY1]](s128), [[COPY]](p1) :: (store (s128), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (s128), align 16, addrspace 1) ... @@ -288,7 +288,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX7-LABEL: name: store_global_v2s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -298,30 +298,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s32>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 1) ... --- @@ -344,7 +344,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX7-LABEL: name: store_global_v4s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -354,30 +354,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v4s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) ; GFX8-LABEL: name: store_global_v4s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s32>), addrspace 1) ; GFX9-LABEL: name: store_global_v4s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) ; GFX10-LABEL: name: store_global_v4s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s32>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 1) ... @@ -396,35 +396,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 - ; GFX6: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1) + ; GFX6: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1) ; GFX7-LABEL: name: store_global_v2s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s16>) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s16>), align 4, addrspace 1) ... @@ -443,35 +443,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX6: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) ; GFX7-LABEL: name: store_global_v4s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v4s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v4s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<4 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v4s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v4s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 1) ... @@ -490,35 +490,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX6: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX6: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) ; GFX7-LABEL: name: store_global_v8s16 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX7: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v8s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX7-FLAT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) ; GFX8-LABEL: name: store_global_v8s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX8: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) ; GFX9-LABEL: name: store_global_v8s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) ; GFX10-LABEL: name: store_global_v8s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX10: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1) ... @@ -537,35 +537,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX6: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX6: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) ; GFX7-LABEL: name: store_global_v2s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX8-LABEL: name: store_global_v2s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<2 x s64>), addrspace 1) ; GFX9-LABEL: name: store_global_v2s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) ; GFX10-LABEL: name: store_global_v2s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<2 x s64>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 1) ... @@ -584,35 +584,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY $vgpr2_vgpr3 - ; GFX6: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX6: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1) ; GFX7-LABEL: name: store_global_p1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_p1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX8-LABEL: name: store_global_p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p1), addrspace 1) ; GFX9-LABEL: name: store_global_p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) ; GFX10-LABEL: name: store_global_p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p1), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p1) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (p1), align 8, addrspace 1) ... @@ -631,35 +631,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX6: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX6: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>), addrspace 1) ; GFX7-LABEL: name: store_global_v2p1 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX7: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2p1 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX7-FLAT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX7-FLAT: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>), addrspace 1) ; GFX8-LABEL: name: store_global_v2p1 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX8: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX8: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>), addrspace 1) ; GFX9-LABEL: name: store_global_v2p1 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>), addrspace 1) ; GFX10-LABEL: name: store_global_v2p1 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX10: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX10: G_STORE [[COPY1]](<2 x p1>), [[COPY]](p1) :: (store (<2 x p1>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p1>), align 16, addrspace 1) ... @@ -678,35 +678,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; GFX6: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) ; GFX7-LABEL: name: store_global_p3 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_p3 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX8-LABEL: name: store_global_p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (p3), addrspace 1) ; GFX9-LABEL: name: store_global_p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) ; GFX10-LABEL: name: store_global_p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (p3), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 4, addrspace 1) + G_STORE %1, %0 :: (store (p3), align 4, addrspace 1) ... @@ -725,35 +725,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX6: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX6: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) ; GFX7-LABEL: name: store_global_v2p3 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX7: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX7: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v2p3 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX7-FLAT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX7-FLAT: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) ; GFX8-LABEL: name: store_global_v2p3 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX8: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX8: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) ; GFX9-LABEL: name: store_global_v2p3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) ; GFX10-LABEL: name: store_global_v2p3 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX10: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX10: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 1) ... --- @@ -771,35 +771,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GFX6: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic 4, addrspace 1) + ; GFX6: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic (s32), addrspace 1) ; GFX7-LABEL: name: store_atomic_global_s32 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1) + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_atomic_global_s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX8-LABEL: name: store_atomic_global_s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1) + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1) ; GFX9-LABEL: name: store_atomic_global_s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 4, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) ; GFX10-LABEL: name: store_atomic_global_s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 4, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 1) + G_STORE %1, %0 :: (store monotonic (s32), align 4, addrspace 1) ... @@ -818,35 +818,35 @@ body: | ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 - ; GFX6: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic 8, addrspace 1) + ; GFX6: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic (s64), addrspace 1) ; GFX7-LABEL: name: store_atomic_global_s64 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1) + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: store_atomic_global_s64 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX8-LABEL: name: store_atomic_global_s64 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1) + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1) ; GFX9-LABEL: name: store_atomic_global_s64 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 8, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) ; GFX10-LABEL: name: store_atomic_global_s64 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 8, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store monotonic (s64), align 8, addrspace 1) ... @@ -870,7 +870,7 @@ body: | ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-LABEL: name: store_global_s32_gep_2047 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -880,7 +880,7 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -895,7 +895,7 @@ body: | ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX8-LABEL: name: store_global_s32_gep_2047 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -910,21 +910,21 @@ body: | ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1 - ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; GFX9-LABEL: name: store_global_s32_gep_2047 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) ; GFX10-LABEL: name: store_global_s32_gep_2047 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 %3:vgpr(p1) = G_PTR_ADD %0, %2 - G_STORE %1, %3 :: (store 4, align 4, addrspace 1) + G_STORE %1, %3 :: (store (s32), align 4, addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir index 752c84dc0f4d1..c2f7cf301a656 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir @@ -27,30 +27,30 @@ body: | ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1) + ; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v3s32 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1) + ; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) ; GFX8-LABEL: name: store_global_v3s32 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1) + ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<3 x s32>), align 16, addrspace 1) ; GFX9-LABEL: name: store_global_v3s32 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 12, align 16, addrspace 1) + ; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) ; GFX10-LABEL: name: store_global_v3s32 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 12, align 16, addrspace 1) + ; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<3 x s32>), align 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 1) ... @@ -74,30 +74,30 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX7: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_s96 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-FLAT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX7-FLAT: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) ; GFX8-LABEL: name: store_global_s96 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX8: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) ; GFX9-LABEL: name: store_global_s96 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) ; GFX10-LABEL: name: store_global_s96 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX10: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 16, addrspace 1) + G_STORE %1, %0 :: (store (s96), align 16, addrspace 1) ... @@ -121,29 +121,29 @@ body: | ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX7: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) ; GFX7-FLAT-LABEL: name: store_global_v6s16 ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX7-FLAT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX7-FLAT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX7-FLAT: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) ; GFX8-LABEL: name: store_global_v6s16 ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX8: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX8: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) ; GFX9-LABEL: name: store_global_v6s16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) ; GFX10-LABEL: name: store_global_v6s16 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX10: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX10: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store (<6 x s16>), align 16, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<6 x s16>), align 16, addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir index f918818117363..03bdc6141242b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir @@ -19,26 +19,26 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: store_local_s32_to_4 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_4 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 4, addrspace 3) - ; GFX6-LABEL: name: store_local_s32_to_4 - ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s32), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 3) + G_STORE %0, %1 :: (store (s32), align 4, addrspace 3) ... @@ -56,26 +56,26 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: store_local_s32_to_2 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_2 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 2, addrspace 3) + ; GFX7: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s16), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 2, addrspace 3) - ; GFX6-LABEL: name: store_local_s32_to_2 - ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B16 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 2, addrspace 3) + ; GFX9: DS_WRITE_B16_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s16), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store 2, align 2, addrspace 3) + G_STORE %0, %1 :: (store (s16), align 2, addrspace 3) ... @@ -93,26 +93,26 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: store_local_s32_to_1 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_1 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 1, addrspace 3) + ; GFX7: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 1, addrspace 3) - ; GFX6-LABEL: name: store_local_s32_to_1 - ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B8 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 1, addrspace 3) + ; GFX9: DS_WRITE_B8_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s8), addrspace 3) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store 1, align 1, addrspace 3) + G_STORE %0, %1 :: (store (s8), align 1, addrspace 3) ... @@ -130,26 +130,26 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: store_local_v2s16 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) ; GFX7-LABEL: name: store_local_v2s16 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s16>), addrspace 3) ; GFX9-LABEL: name: store_local_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 4, addrspace 3) - ; GFX6-LABEL: name: store_local_v2s16 - ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 3) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 3) + G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 3) ... @@ -167,26 +167,26 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: store_local_p3 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) ; GFX7-LABEL: name: store_local_p3 ; GFX7: liveins: $vgpr0, $vgpr1 ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + ; GFX7: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p3), addrspace 3) ; GFX9-LABEL: name: store_local_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 4, addrspace 3) - ; GFX6-LABEL: name: store_local_p3 - ; GFX6: liveins: $vgpr0, $vgpr1 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B32 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + ; GFX9: DS_WRITE_B32_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p3), addrspace 3) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p3) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 3) + G_STORE %0, %1 :: (store (p3), align 4, addrspace 3) ... @@ -200,23 +200,23 @@ tracksRegLiveness: true body: | bb.0: + ; GFX6-LABEL: name: store_local_s32_to_1_constant_4095 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_1_constant_4095 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store 1, addrspace 3) + ; GFX7: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_1_constant_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store 1, addrspace 3) - ; GFX6-LABEL: name: store_local_s32_to_1_constant_4095 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store 1, addrspace 3) + ; GFX9: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3) %0:vgpr(p3) = G_CONSTANT i32 4095 %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store 1, align 1, addrspace 3) + G_STORE %1, %0 :: (store (s8), align 1, addrspace 3) ... @@ -235,23 +235,23 @@ stack: body: | bb.0: + ; GFX6-LABEL: name: store_local_s32_to_1_constant_4096 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX7-LABEL: name: store_local_s32_to_1_constant_4096 ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store 1, addrspace 3) + ; GFX7: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s8), addrspace 3) ; GFX9-LABEL: name: store_local_s32_to_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store 1, addrspace 3) - ; GFX6-LABEL: name: store_local_s32_to_1_constant_4096 - ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B8 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store 1, addrspace 3) + ; GFX9: DS_WRITE_B8_gfx9 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec :: (store (s8), addrspace 3) %0:vgpr(p3) = G_CONSTANT i32 4096 %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store 1, align 1, addrspace 3) + G_STORE %1, %0 :: (store (s8), align 1, addrspace 3) ... @@ -269,6 +269,12 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_s64_align4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store (s64), align 4, addrspace 3) ; GFX7-LABEL: name: store_local_s64_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -276,23 +282,17 @@ body: | ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) - ; GFX6-LABEL: name: store_local_s64_align4 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store 8, align 4, addrspace 3) + G_STORE %0, %1 :: (store (s64), align 4, addrspace 3) ... @@ -310,6 +310,12 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_p1_align4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store (p1), align 4, addrspace 3) ; GFX7-LABEL: name: store_local_p1_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -317,23 +323,17 @@ body: | ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (p1), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_p1_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) - ; GFX6-LABEL: name: store_local_p1_align4 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (p1), align 4, addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store 8, align 4, addrspace 3) + G_STORE %0, %1 :: (store (p1), align 4, addrspace 3) ... @@ -351,6 +351,12 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_v2s32_align4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store (<2 x s32>), align 4, addrspace 3) ; GFX7-LABEL: name: store_local_v2s32_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -358,23 +364,17 @@ body: | ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_v2s32_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) - ; GFX6-LABEL: name: store_local_v2s32_align4 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<2 x s32>), align 4, addrspace 3) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store 8, align 4, addrspace 3) + G_STORE %0, %1 :: (store (<2 x s32>), align 4, addrspace 3) ... @@ -392,6 +392,12 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_v4s16_align4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store (<4 x s16>), align 4, addrspace 3) ; GFX7-LABEL: name: store_local_v4s16_align4 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -399,23 +405,17 @@ body: | ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_v4s16_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) - ; GFX6-LABEL: name: store_local_v4s16_align4 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (<4 x s16>), align 4, addrspace 3) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store 8, align 4, addrspace 3) + G_STORE %0, %1 :: (store (<4 x s16>), align 4, addrspace 3) ... @@ -433,26 +433,26 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_s64_align8 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) ; GFX7-LABEL: name: store_local_s64_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (s64), addrspace 3) ; GFX9-LABEL: name: store_local_s64_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3) - ; GFX6-LABEL: name: store_local_s64_align8 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s64), addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store (s64), align 8, addrspace 3) ... @@ -470,26 +470,26 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_p1_align8 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) ; GFX7-LABEL: name: store_local_p1_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (p1), addrspace 3) ; GFX9-LABEL: name: store_local_p1_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3) - ; GFX6-LABEL: name: store_local_p1_align8 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (p1), addrspace 3) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store (p1), align 8, addrspace 3) ... @@ -507,26 +507,26 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_v2s32_align8 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) ; GFX7-LABEL: name: store_local_v2s32_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<2 x s32>), addrspace 3) ; GFX9-LABEL: name: store_local_v2s32_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3) - ; GFX6-LABEL: name: store_local_v2s32_align8 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<2 x s32>), addrspace 3) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store (<2 x s32>), align 8, addrspace 3) ... @@ -544,26 +544,26 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_v4s16_align8 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) ; GFX7-LABEL: name: store_local_v4s16_align8 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7: $m0 = S_MOV_B32 -1 - ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX7: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store (<4 x s16>), addrspace 3) ; GFX9-LABEL: name: store_local_v4s16_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store 8, addrspace 3) - ; GFX6-LABEL: name: store_local_v4s16_align8 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: DS_WRITE_B64 [[COPY1]], [[COPY]], 0, 0, implicit $m0, implicit $exec :: (store 8, addrspace 3) + ; GFX9: DS_WRITE_B64_gfx9 [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (<4 x s16>), addrspace 3) %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 - G_STORE %0, %1 :: (store 8, align 8, addrspace 3) + G_STORE %0, %1 :: (store (<4 x s16>), align 8, addrspace 3) ... @@ -581,6 +581,14 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1016 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016 + ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store (s64), align 4, addrspace 3) ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -588,27 +596,19 @@ body: | ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) + ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1016 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store 8, align 4, addrspace 3) - ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1016 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016 - ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 %2:vgpr(s32) = G_CONSTANT i32 1016 %3:vgpr(p3) = G_PTR_ADD %1, %2 - G_STORE %0, %3 :: (store 8, align 4, addrspace 3) + G_STORE %0, %3 :: (store (s64), align 4, addrspace 3) ... @@ -626,6 +626,14 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1020 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 + ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020 + ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store (s64), align 4, addrspace 3) ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -635,7 +643,7 @@ body: | ; GFX7: $m0 = S_MOV_B32 -1 ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX7: DS_WRITE2_B32 %3, [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3) + ; GFX7: DS_WRITE2_B32 %3, [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store (s64), align 4, addrspace 3) ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1020 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -644,19 +652,11 @@ body: | ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; GFX9: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3) - ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1020 - ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2 - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2 - ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020 - ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; GFX6: $m0 = S_MOV_B32 -1 - ; GFX6: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store 8, align 4, addrspace 3) + ; GFX9: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store (s64), align 4, addrspace 3) %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(p3) = COPY $vgpr2 %2:vgpr(s32) = G_CONSTANT i32 1020 %3:vgpr(p3) = G_PTR_ADD %1, %2 - G_STORE %0, %3 :: (store 8, align 4, addrspace 3) + G_STORE %0, %3 :: (store (s64), align 4, addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir index f7f32b49c1f48..6c0d70c7f53ac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -22,15 +22,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) ... @@ -53,15 +53,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 2, align 2, addrspace 5) + G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) ... @@ -84,15 +84,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 1, align 1, addrspace 5) + G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) ... @@ -115,15 +115,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) ; GFX9-LABEL: name: function_store_private_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (<2 x s16>), addrspace 5) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + G_STORE %0, %1 :: (store (<2 x s16>), align 4, addrspace 5) ... @@ -146,15 +146,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX9-LABEL: name: function_store_private_p3 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) ... @@ -177,15 +177,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX9-LABEL: name: function_store_private_p5 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) ... @@ -207,15 +207,15 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 %3:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %3, %2 :: (store 1, align 1, addrspace 5) + G_STORE %3, %2 :: (store (s8), align 1, addrspace 5) ... @@ -237,13 +237,13 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4095 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4095 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4095 %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) ... @@ -266,14 +266,14 @@ body: | ; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4096 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4096 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4096 %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) ... @@ -295,15 +295,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_4 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) ... @@ -325,15 +325,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_2 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s16), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 2, align 2, addrspace 5) + G_STORE %0, %1 :: (store (s16), align 2, addrspace 5) ... @@ -355,15 +355,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_1 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 1, align 1, addrspace 5) + G_STORE %0, %1 :: (store (s8), align 1, addrspace 5) ... @@ -385,15 +385,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + G_STORE %0, %1 :: (store (s32), align 4, addrspace 5) ... @@ -415,15 +415,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_p3 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p3), addrspace 5) %0:vgpr(p3) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + G_STORE %0, %1 :: (store (p3), align 4, addrspace 5) ... @@ -445,15 +445,15 @@ body: | ; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_p5 ; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (p5), addrspace 5) %0:vgpr(p5) = COPY $vgpr0 %1:vgpr(p5) = COPY $vgpr1 - G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + G_STORE %0, %1 :: (store (p5), align 4, addrspace 5) ... @@ -476,16 +476,16 @@ body: | ; GFX6-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095 ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) %0:vgpr(p5) = G_FRAME_INDEX %stack.0 %1:vgpr(s32) = G_CONSTANT i32 4095 %2:vgpr(p5) = G_PTR_ADD %0, %1 %3:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %3, %2 :: (store 1, align 1, addrspace 5) + G_STORE %3, %2 :: (store (s8), align 1, addrspace 5) ... @@ -508,14 +508,14 @@ body: | ; GFX6-LABEL: name: kernel_store_private_s32_to_1_constant_4095 ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4095 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4095 %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) ... @@ -539,14 +539,14 @@ body: | ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) ; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4096 ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec - ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s8), addrspace 5) %0:vgpr(p5) = G_CONSTANT i32 4096 %1:vgpr(s32) = G_CONSTANT i32 0 - G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + G_STORE %1, %0 :: (store (s8), align 1, addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll index d74e3c4d97d65..398df2fc0fe56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -9,12 +9,12 @@ define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) noun ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i8_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -22,12 +22,12 @@ define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) noun ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = zext i8 %in to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -41,12 +41,12 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -54,12 +54,12 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = zext i8 %in to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -73,12 +73,12 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) - ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -86,12 +86,12 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) - ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = sext i8 %in to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -105,12 +105,12 @@ define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) no ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i16_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -118,12 +118,12 @@ define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) no ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = zext i16 %in to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -137,12 +137,12 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -150,12 +150,12 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = zext i16 %in to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -169,12 +169,12 @@ define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 si ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) ; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) - ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -182,12 +182,12 @@ define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 si ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) ; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) - ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = sext i16 %in to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -201,11 +201,11 @@ define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) no ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -213,11 +213,11 @@ define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) no ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store i32 %in, i32 addrspace(1)* %out, align 4 @@ -231,11 +231,11 @@ define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: f32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -243,11 +243,11 @@ define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store float %in, float addrspace(1)* %out, align 4 @@ -261,11 +261,11 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store 2 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v2i8_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -273,11 +273,11 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store 2 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <2 x i8> %in, <2 x i8> addrspace(1)* %out @@ -291,11 +291,11 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v2i16_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -303,11 +303,11 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <2 x i16> %in, <2 x i16> addrspace(1)* %out @@ -321,11 +321,11 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v2i32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -333,11 +333,11 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 @@ -351,11 +351,11 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, < ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v2f32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -363,11 +363,11 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 @@ -381,11 +381,11 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 3, align 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store 3 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s24), align 8, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v3i8_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -393,11 +393,11 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 3, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store 3 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s24), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 @@ -411,11 +411,11 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 6, align 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store 6 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s48), align 8, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v3i16_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -423,11 +423,11 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 6, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store 6 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s48), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 @@ -441,11 +441,11 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v3i32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -453,11 +453,11 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 @@ -471,11 +471,11 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, < ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v3f32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -483,11 +483,11 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 @@ -501,11 +501,11 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v4i8_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -513,11 +513,11 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <4 x i8> %in, <4 x i8> addrspace(1)* %out @@ -531,11 +531,11 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v4i16_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -543,11 +543,11 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <4 x i16> %in, <4 x i16> addrspace(1)* %out @@ -561,11 +561,11 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v4i32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -573,11 +573,11 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 @@ -591,11 +591,11 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, < ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v4f32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -603,11 +603,11 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 @@ -621,11 +621,11 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v8i8_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -633,11 +633,11 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <8 x i8> %in, <8 x i8> addrspace(1)* %out @@ -651,11 +651,11 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v8i16_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -663,11 +663,11 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <8 x i16> %in, <8 x i16> addrspace(1)* %out @@ -681,11 +681,11 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v8i32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -693,11 +693,11 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 @@ -711,11 +711,11 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, < ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v8f32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -723,11 +723,11 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 @@ -741,11 +741,11 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v16i8_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -753,11 +753,11 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <16 x i8> %in, <16 x i8> addrspace(1)* %out @@ -771,11 +771,11 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store 32 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 16, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v16i16_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -783,11 +783,11 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store 32 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s256), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <16 x i16> %in, <16 x i16> addrspace(1)* %out @@ -801,11 +801,11 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, < ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 16, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v16i32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -813,11 +813,11 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, < ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 @@ -831,11 +831,11 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 16, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 16, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: v16f32_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -843,11 +843,11 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s512), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 @@ -861,11 +861,11 @@ define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwi ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -873,11 +873,11 @@ define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwi ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 store i64 %a, i64 addrspace(1)* %out, align 8 ret void @@ -890,11 +890,11 @@ define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg ; LEGACY-MESA-VI: bb.1.entry: @@ -902,11 +902,11 @@ define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 entry: store double %in, double addrspace(1)* %out @@ -920,11 +920,11 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) - ; HSA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store 1 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) + ; HSA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i1_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -932,11 +932,11 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store 1 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 store i1 %x, i1 addrspace(1)* %out, align 1 ret void @@ -949,12 +949,12 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) - ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -962,12 +962,12 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) - ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = zext i1 %x to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -981,12 +981,12 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) - ; HSA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -994,12 +994,12 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) - ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = zext i1 %x to i64 store i64 %ext, i64 addrspace(1)* %out, align 8 @@ -1013,12 +1013,12 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) - ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1026,12 +1026,12 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) - ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = sext i1 %x to i32 store i32 %ext, i32addrspace(1)* %out, align 4 @@ -1045,12 +1045,12 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) ; HSA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) - ; HSA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1058,12 +1058,12 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) - ; LEGACY-MESA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %ext = sext i1 %x to i64 store i64 %ext, i64 addrspace(1)* %out, align 8 @@ -1079,9 +1079,9 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1089,9 +1089,9 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 store i32 %arg1, i32 addrspace(1)* undef ret void @@ -1104,9 +1104,9 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: empty_array_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1114,9 +1114,9 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 store i32 %arg1, i32 addrspace(1)* undef ret void @@ -1137,25 +1137,25 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s128), addrspace 4) ; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0 ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 16, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 16, align 8, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s128), align 8, addrspace 4) ; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0 ; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64 ; HSA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1) ; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1) - ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store 1 into `i8 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1163,25 +1163,25 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4) ; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0 ; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s128), align 4, addrspace 4) ; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0 ; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64 ; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1) ; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1) - ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store 1 into `i8 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %val0 = extractvalue {i32, i64} %arg0, 0 %val1 = extractvalue {i32, i64} %arg0, 1 @@ -1204,20 +1204,20 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s96), align 16, addrspace 4) ; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0 ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 1, addrspace 4) ; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0 ; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32 ; HSA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1) - ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment ; LEGACY-MESA-VI: bb.1 (%ir-block.1): @@ -1225,20 +1225,20 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s96), align 4, addrspace 4) ; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0 ; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s96), align 1, addrspace 4) ; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0 ; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1) - ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %val0 = extractvalue <{i32, i64}> %arg0, 0 %val1 = extractvalue <{i32, i64}> %arg0, 1 @@ -1274,12 +1274,12 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1287,12 +1287,12 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i8, i8 addrspace(4)* %in.byref %ext = zext i8 %in to i32 @@ -1307,12 +1307,12 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4) ; HSA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1320,12 +1320,12 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) - ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i16, i16 addrspace(4)* %in.byref %ext = zext i16 %in to i32 @@ -1340,15 +1340,15 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) - ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1356,15 +1356,15 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(4)* %in.byref store volatile i32 %in, i32 addrspace(1)* %out, align 4 @@ -1379,15 +1379,15 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4) - ; HSA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) + ; HSA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1395,15 +1395,15 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 @@ -1419,15 +1419,15 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) - ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1435,15 +1435,15 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(4)* %in.byref store volatile i32 %in, i32 addrspace(1)* %out, align 4 @@ -1458,15 +1458,15 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4) - ; HSA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) + ; HSA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.1): @@ -1474,15 +1474,15 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)* @@ -1499,13 +1499,13 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1513,13 +1513,13 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load (s32) from %ir.1, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(1)* %in.byref store i32 %in, i32 addrspace(1)* %out, align 4 @@ -1533,12 +1533,12 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1546,12 +1546,12 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32* %in.byref store i32 %in, i32 addrspace(1)* %out, align 4 @@ -1565,12 +1565,12 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1578,12 +1578,12 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(6)* %in.byref store i32 %in, i32 addrspace(1)* %out, align 4 @@ -1597,12 +1597,12 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1610,12 +1610,12 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(999)* %in.byref store i32 %in, i32 addrspace(1)* %out, align 4 @@ -1630,12 +1630,12 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1643,12 +1643,12 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(3)* %in.byref store i32 %in, i32 addrspace(1)* %out, align 4 @@ -1662,19 +1662,19 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) - ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4) - ; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4) - ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; HSA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) + ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4) + ; HSA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4) + ; HSA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; HSA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1682,19 +1682,19 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4) - ; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) - ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) + ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in0 = load i32, i32 addrspace(4)* %in0.byref %in1 = load i32, i32 addrspace(4)* %in1.byref @@ -1712,8 +1712,8 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) - ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): @@ -1722,8 +1722,8 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4) - ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %in = load i32, i32 addrspace(4)* %in.byref store i32 %in, i32 addrspace(1)* undef, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 6a32581f7a9c5..f5be7ef76f4af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -32,7 +32,7 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) { ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load 4 from %ir.arg0, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load (s32) from %ir.arg0, addrspace 4) ; CHECK: S_ENDPGM 0 %tmp0 = load volatile i32, i32 addrspace(4)* %arg0 ret void @@ -47,7 +47,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 ; CHECK: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load 4 from %ir.arg1, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile load (s32) from %ir.arg1, addrspace 4) ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0 ; CHECK: S_ENDPGM 0 %tmp0 = load volatile i32, i32 addrspace(4)* %arg1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index 34c1678784074..35f88d71394da 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -8,7 +8,7 @@ define float @test_atomicrmw_fadd(float addrspace(3)* %addr) { ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst 4 on %ir.addr, addrspace 3) + ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 @@ -25,13 +25,13 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4 from %ir.addr, addrspace 3) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) ; CHECK: bb.2.atomicrmw.start: ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK: [[PHI:%[0-9]+]]:_(s64) = G_PHI %9(s64), %bb.2, [[C1]](s64), %bb.1 ; CHECK: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2 ; CHECK: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] - ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst 4 on %ir.2, addrspace 3) + ; CHECK: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) ; CHECK: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) ; CHECK: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64) ; CHECK: G_BRCOND [[INT1]](s1), %bb.3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index e96af5efd13a0..a1fb4dbb54429 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -266,7 +266,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; GFX900: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; GFX900: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GFX900: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX900: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; GFX900: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; GFX900: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -351,7 +351,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; GFX908: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; GFX908: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GFX908: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX908: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; GFX908: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; GFX908: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -480,7 +480,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX900: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) - ; GFX900: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GFX900: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX900: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) ; GFX900: $sgpr4_sgpr5 = COPY [[COPY26]](p4) @@ -605,7 +605,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX908: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) - ; GFX908: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GFX908: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX908: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) ; GFX908: $sgpr4_sgpr5 = COPY [[COPY26]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index 6ba8db133437a..f4045ef5b5bcd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -71,11 +71,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) @@ -100,11 +100,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index f6f76aa36e275..139f0c4c321fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -83,7 +83,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* ; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 16, addrspace 4) + ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4) ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_func_i32 ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -118,7 +118,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.load, addrspace 1) + ; GCN: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i32 @external_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out @@ -142,7 +142,7 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1) ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store 4 into %ir.out, addrspace 1) + ; GCN: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; GCN: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GCN: S_SETPC_B64_return [[COPY5]] %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) @@ -199,7 +199,7 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 { ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i1 @external_i1_func_void() store volatile i1 %val, i1 addrspace(1)* undef @@ -220,7 +220,7 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; GCN: S_SETPC_B64_return [[COPY3]] %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() @@ -278,7 +278,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 { ; GCN: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) - ; GCN: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i1 @external_i1_zeroext_func_void() %val.ext = zext i1 %val to i32 @@ -336,7 +336,7 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 { ; GCN: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) - ; GCN: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i1 @external_i1_signext_func_void() %val.ext = sext i1 %val to i32 @@ -394,7 +394,7 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 { ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i8 @external_i8_func_void() store volatile i8 %val, i8 addrspace(1)* undef @@ -416,7 +416,7 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GCN: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; GCN: S_SETPC_B64_return [[COPY3]] %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() @@ -475,7 +475,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 { ; GCN: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s8) - ; GCN: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i8 @external_i8_zeroext_func_void() %val.ext = zext i8 %val to i32 @@ -534,7 +534,7 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 { ; GCN: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s8) - ; GCN: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i8 @external_i8_signext_func_void() %val.ext = sext i8 %val to i32 @@ -591,7 +591,7 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 { ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store 2 into `i16 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i16 @external_i16_func_void() store volatile i16 %val, i16 addrspace(1)* undef @@ -648,7 +648,7 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 { ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) - ; GCN: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i16 @external_i16_zeroext_func_void() %val.ext = zext i16 %val to i32 @@ -706,7 +706,7 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 { ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; GCN: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i16 @external_i16_signext_func_void() %val.ext = sext i16 %val to i32 @@ -762,7 +762,7 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 { ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i32 @external_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef @@ -782,7 +782,7 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; GCN: S_SETPC_B64_return [[COPY3]] %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() @@ -841,7 +841,7 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 { ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store 6 into `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i48 @external_i48_func_void() store volatile i48 %val, i48 addrspace(1)* undef @@ -900,7 +900,7 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 { ; GCN: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) - ; GCN: G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i48 @external_i48_zeroext_func_void() %ext = zext i48 %val to i64 @@ -960,7 +960,7 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 { ; GCN: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) - ; GCN: G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i48 @external_i48_signext_func_void() %ext = sext i48 %val to i64 @@ -1018,7 +1018,7 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 { ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i64 @external_i64_func_void() store volatile i64 %val, i64 addrspace(1)* undef @@ -1075,7 +1075,7 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 { ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store 8 into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call i8 addrspace(1)* @external_p1_func_void() store volatile i8 addrspace(1)* %val, i8 addrspace(1)* addrspace(1)* undef @@ -1136,7 +1136,7 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 { ; GCN: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store 16 into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <2 x i8 addrspace(1)*> @external_v2p1_func_void() store volatile <2 x i8 addrspace(1)*> %val, <2 x i8 addrspace(1)*> addrspace(1)* undef @@ -1191,7 +1191,7 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 { ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 ; GCN: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store 4 into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3) + ; GCN: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3) ; GCN: S_ENDPGM 0 %val = call i8 addrspace(3)* @external_p3_func_void() store volatile i8 addrspace(3)* %val, i8 addrspace(3)* addrspace(3)* undef @@ -1248,7 +1248,7 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 { ; GCN: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store 8 into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3) + ; GCN: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3) ; GCN: S_ENDPGM 0 %val = call <2 x i8 addrspace(3)*> @external_v2p3_func_void() store volatile <2 x i8 addrspace(3)*> %val, <2 x i8 addrspace(3)*> addrspace(3)* undef @@ -1304,7 +1304,7 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 { ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store 2 into `half addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call half @external_f16_func_void() store volatile half %val, half addrspace(1)* undef @@ -1359,7 +1359,7 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 { ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store 4 into `float addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call float @external_f32_func_void() store volatile float %val, float addrspace(1)* undef @@ -1416,7 +1416,7 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 { ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store 8 into `double addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `double addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call double @external_f64_func_void() store volatile double %val, double addrspace(1)* undef @@ -1477,7 +1477,7 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 { ; GCN: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <2 x double> @external_v2f64_func_void() store volatile <2 x double> %val, <2 x double> addrspace(1)* undef @@ -1534,7 +1534,7 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 { ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <2 x i32> @external_v2i32_func_void() store volatile <2 x i32> %val, <2 x i32> addrspace(1)* undef @@ -1592,7 +1592,7 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 { ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store 12 into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <3 x i32> @external_v3i32_func_void() store volatile <3 x i32> %val, <3 x i32> addrspace(1)* undef, align 8 @@ -1651,7 +1651,7 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 { ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store 16 into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <4 x i32> @external_v4i32_func_void() store volatile <4 x i32> %val, <4 x i32> addrspace(1)* undef, align 8 @@ -1711,7 +1711,7 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 { ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store 20 into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <5 x i32> @external_v5i32_func_void() store volatile <5 x i32> %val, <5 x i32> addrspace(1)* undef, align 8 @@ -1774,7 +1774,7 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 { ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store 32 into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <8 x i32> @external_v8i32_func_void() store volatile <8 x i32> %val, <8 x i32> addrspace(1)* undef, align 8 @@ -1845,7 +1845,7 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 { ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store 64 into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <16 x i32> @external_v16i32_func_void() store volatile <16 x i32> %val, <16 x i32> addrspace(1)* undef, align 8 @@ -1932,7 +1932,7 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 { ; GCN: [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <32 x i32> @external_v32i32_func_void() store volatile <32 x i32> %val, <32 x i32> addrspace(1)* undef, align 8 @@ -1987,7 +1987,7 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 { ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 ; GCN: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <2 x i16> @external_v2i16_func_void() store volatile <2 x i16> %val, <2 x i16> addrspace(1)* undef @@ -2046,7 +2046,7 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <3 x i16> @external_v3i16_func_void() store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef @@ -2103,7 +2103,7 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 { ; GCN: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store 8 into `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <4 x i16> @external_v4i16_func_void() store volatile <4 x i16> %val, <4 x i16> addrspace(1)* undef @@ -2158,7 +2158,7 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 { ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 ; GCN: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <2 x half> @external_v2f16_func_void() store volatile <2 x half> %val, <2 x half> addrspace(1)* undef @@ -2217,7 +2217,7 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <3 x half> @external_v3f16_func_void() store volatile <3 x half> %val, <3 x half> addrspace(1)* undef @@ -2274,7 +2274,7 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 { ; GCN: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store 8 into `<4 x half> addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <4 x half> @external_v4f16_func_void() store volatile <4 x half> %val, <4 x half> addrspace(1)* undef @@ -2332,7 +2332,7 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 { ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store 12 into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <3 x float> @external_v3f32_func_void() store volatile <3 x float> %val, <3 x float> addrspace(1)* undef @@ -2392,7 +2392,7 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 { ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store 20 into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1) + ; GCN: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <5 x float> @external_v5f32_func_void() store volatile <5 x float> %val, <5 x float> addrspace(1)* undef @@ -2452,8 +2452,8 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[MV]](s64), [[COPY10]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[MV]](s64), [[COPY10]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call { i32, i64 } @external_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 @@ -2480,8 +2480,8 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GCN: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; GCN: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; GCN: S_SETPC_B64_return [[COPY6]] %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() @@ -2541,8 +2541,8 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 { ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call [2 x i32] @external_a2i32_func_void() %val.0 = extractvalue [2 x i32] %val, 0 @@ -2614,11 +2614,11 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 { ; GCN: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32) ; GCN: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16) ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call [5 x i8] @external_a5i8_func_void() %val.0 = extractvalue [5 x i8] %val, 0 @@ -2684,12 +2684,12 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 { ; GCN: $vgpr31 = COPY [[OR1]](s32) ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load 128 from %stack.0, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1024) from %stack.0, addrspace 5) ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from %stack.0, align 128, addrspace 5) - ; GCN: G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) - ; GCN: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) + ; GCN: G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void() %val0 = extractvalue { <32 x i32>, i32 } %val, 0 @@ -2749,12 +2749,12 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 { ; GCN: $vgpr31 = COPY [[OR1]](s32) ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load 4 from %stack.0, align 128, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5) ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; GCN: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load 128 from %stack.0, addrspace 5) - ; GCN: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (s1024) from %stack.0, addrspace 5) + ; GCN: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call { i32, <32 x i32> } @external_i32_v32i32_func_void() %val0 = extractvalue { i32, <32 x i32> } %val, 0 @@ -2813,8 +2813,8 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 { ; GCN: $vgpr31 = COPY [[OR1]](s32) ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load 132 from %stack.0, align 256, addrspace 5) - ; GCN: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store 132 into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1056) from %stack.0, align 256, addrspace 5) + ; GCN: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <33 x i32> @external_v33i32_func_void() store volatile <33 x i32> %val, <33 x i32> addrspace(1)* undef, align 8 @@ -2837,10 +2837,10 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> ; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load 8 from %ir.p.kernarg.offset.cast, align 16, addrspace 4) + ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset.cast, align 16, addrspace 4) ; GCN: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64) - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4 from %ir.idx.kernarg.offset.cast, align 8, addrspace 4) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset.cast, align 8, addrspace 4) ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 @@ -2879,8 +2879,8 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> ; GCN: $vgpr31 = COPY [[OR1]](s32) ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load 132 from %stack.0, align 256, addrspace 5) - ; GCN: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store 132 into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1056) from %stack.0, align 256, addrspace 5) + ; GCN: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1) ; GCN: S_ENDPGM 0 %val = call <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) store volatile <33 x i32> %val, <33 x i32> addrspace(1)* undef, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index bbeb5c0f2fcfc..433b9c4945525 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -26,8 +26,8 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) - ; GCN: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store 1 into %ir.in.gep01, addrspace 5) - ; GCN: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store 4 into %ir.in.gep1, addrspace 5) + ; GCN: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.gep01, addrspace 5) + ; GCN: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5) ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -49,8 +49,13 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32) ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; GCN: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg + ; GCN: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) + ; GCN: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) + ; GCN: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; GCN: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) @@ -61,11 +66,11 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN: $vgpr31 = COPY [[OR1]](s32) ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GCN: ADJCALLSTACKDOWN 0, 8, implicit-def $scc - ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) - ; GCN: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load 1 from %ir.out.gep02, addrspace 5) - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5) - ; GCN: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) + ; GCN: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5) + ; GCN: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 %in.val = alloca { i8, i32 }, align 4, addrspace(5) %out.val = alloca { i8, i32 }, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 431a3c5a150c6..d5f29b415f940 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -379,7 +379,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -436,7 +436,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load (s1) from `i1 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -549,7 +549,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -607,7 +607,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -719,7 +719,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -776,7 +776,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -981,7 +981,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load 16 from `<2 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1100,7 +1100,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1160,7 +1160,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1220,7 +1220,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 { ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1279,7 +1279,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_p0 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1335,7 +1335,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: (load 16 from `<2 x i8*> addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: (load (<2 x p0>) from `<2 x i8*> addrspace(1)* null`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2p0 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -1397,7 +1397,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[DEF]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load 16 from `<2 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1) ; CHECK: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i64 @@ -1464,7 +1464,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593 ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C2]](s64) - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load 16 from `<2 x i64> addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1) ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i64 @@ -1991,7 +1991,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load 4 from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2046,7 +2046,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load 6 from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2105,7 +2105,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load 6 from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2164,7 +2164,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load 8 from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2280,7 +2280,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: (load 10 from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: (load (<5 x s16>) from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2340,7 +2340,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: (load 14 from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: (load (<7 x s16>) from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v7i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2401,7 +2401,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: (load 126 from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: (load (<63 x s16>) from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v63i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2459,7 +2459,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2493,7 +2493,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: (load 130 from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: (load (<65 x s16>) from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v65i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2551,10 +2551,10 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2588,7 +2588,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: (load 132 from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: (load (<66 x s16>) from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v66i16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2644,10 +2644,10 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2681,7 +2681,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load 4 from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f16 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2736,7 +2736,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load 8 from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2972,7 +2972,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load 16 from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3155,8 +3155,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load 32 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3289,8 +3289,8 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load 64 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3362,8 +3362,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3419,7 +3419,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -3456,9 +3456,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr0, addrspace 1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (load 4 from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3514,10 +3514,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -3555,10 +3555,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr0, addrspace 1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: (load 2 from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16 ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3614,18 +3614,18 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 { ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32) - ; CHECK: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store 2 into stack + 4, align 4, addrspace 5) + ; CHECK: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5) ; CHECK: [[COPY22:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32) - ; CHECK: G_STORE [[COPY22]](s16), [[PTR_ADD3]](p5) :: (store 2 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[COPY22]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) - ; CHECK: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store 2 into stack + 12, align 4, addrspace 5) + ; CHECK: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5) ; CHECK: [[COPY23:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY23]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -3665,10 +3665,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load 128 from %ir.ptr0, addrspace 1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: (load 4 from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: (load 4 from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: (load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: (load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5 ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3724,13 +3724,13 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 { ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32) - ; CHECK: G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32) - ; CHECK: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -3767,11 +3767,11 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3821,11 +3821,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) @@ -3850,11 +3850,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load 1 from %ir.ptr0, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from %ir.ptr0 + 4, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) @@ -3892,8 +3892,8 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) - ; CHECK: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store 1 into %ir.gep01, addrspace 5) - ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store 4 into %ir.gep1, addrspace 5) + ; CHECK: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -3918,7 +3918,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32) ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8 from %ir.val, align 4, addrspace 5) + ; CHECK: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -3973,11 +3973,11 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12 from %ir.incoming0, align 4, addrspace 5) + ; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1 from %ir.incoming1, align 32, addrspace 5) + ; CHECK: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) ; CHECK: $vgpr0 = COPY [[C]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) @@ -4029,7 +4029,7 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store 32 into stack, align 4, addrspace 5), (dereferenceable load 32 from %ir.incoming_high_align, align 256, addrspace 5) + ; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) ; CHECK: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -4063,8 +4063,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: (load 2 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<2 x s8>) from %ir.ptr, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4126,8 +4126,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: (load 3 from %ir.ptr, align 4, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s8>) from %ir.ptr, align 4, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4192,8 +4192,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load 4 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4261,8 +4261,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: (load 8 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s8>) from %ir.ptr, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4342,8 +4342,8 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load 8 from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load 16 from %ir.ptr, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i8 ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4447,10 +4447,10 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load 128 from %ir.val.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset.cast, align 16, addrspace 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8 from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4506,14 +4506,14 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C5]](s32) - ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -4628,20 +4628,20 @@ define void @stack_12xv3i32() #0 { ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) - ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) - ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) + ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) @@ -4769,20 +4769,20 @@ define void @stack_12xv3f32() #0 { ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) - ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) - ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) + ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) @@ -4902,32 +4902,32 @@ define void @stack_8xv5i32() #0 { ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) - ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) - ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) + ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) - ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store 4 into stack + 20, addrspace 5) + ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) - ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store 4 into stack + 24, align 8, addrspace 5) + ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) - ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store 4 into stack + 28, addrspace 5) + ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) - ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store 4 into stack + 32, align 16, addrspace 5) + ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) @@ -5043,32 +5043,32 @@ define void @stack_8xv5f32() #0 { ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) - ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) - ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store 4 into stack + 12, addrspace 5) + ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store 4 into stack + 16, align 16, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) - ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store 4 into stack + 20, addrspace 5) + ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) - ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store 4 into stack + 24, align 8, addrspace 5) + ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) - ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store 4 into stack + 28, addrspace 5) + ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) - ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store 4 into stack + 32, align 16, addrspace 5) + ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll index 3254fb4c9f2a0..fd3d450356717 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -37,7 +37,7 @@ define amdgpu_kernel void @constantexpr_select_0() { ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: S_ENDPGM 0 store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* null), i32 1, i32 0), i32 addrspace(1)* undef, align 4 ret void @@ -54,7 +54,7 @@ define amdgpu_kernel void @constantexpr_select_1() { ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: S_ENDPGM 0 store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* inttoptr (i64 1024 to i8 addrspace(1)*)), i32 1, i32 0), i32 addrspace(1)* undef, align 4 ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll index 9d398ea907c0d..515db98c940d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -41,10 +41,10 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; FIXED: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; FIXED: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; FIXED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) + ; FIXED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; FIXED: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; FIXED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; FIXED: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; FIXED: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; FIXED: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] ; FIXED: S_SETPC_B64_return [[COPY32]] ; VARABI-LABEL: name: void_a31i32_i32 @@ -84,7 +84,7 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; VARABI: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; VARABI: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; VARABI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; VARABI: G_STORE [[COPY31]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; VARABI: G_STORE [[COPY31]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; VARABI: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; VARABI: S_SETPC_B64_return [[COPY33]] store i32 %arg1, i32 addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index 7c2eb49baa86b..d295cf5bbc7d3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -12,7 +12,7 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i32 %arg1, i32 addrspace(1)* undef @@ -26,7 +26,7 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i32 %arg1, i32 addrspace(1)* undef @@ -41,7 +41,7 @@ define void @void_func_i1(i1 %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store 1 into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i1 %arg0, i1 addrspace(1)* undef @@ -60,7 +60,7 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = zext i1 %arg0 to i32 @@ -81,7 +81,7 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = sext i1 %arg0 to i32 @@ -107,7 +107,7 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK: G_BR %bb.3 ; CHECK: bb.2.bb1: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: bb.3.bb2: ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] @@ -132,7 +132,7 @@ define void @void_func_i8(i8 %arg0) #0 { ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store 1 into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i8 %arg0, i8 addrspace(1)* undef @@ -151,7 +151,7 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = zext i8 %arg0 to i32 @@ -172,7 +172,7 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = sext i8 %arg0 to i32 @@ -189,7 +189,7 @@ define void @void_func_i16(i16 %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i16 %arg0, i16 addrspace(1)* undef @@ -208,7 +208,7 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = zext i16 %arg0 to i32 @@ -229,7 +229,7 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %ext = sext i16 %arg0 to i32 @@ -246,7 +246,7 @@ define void @void_func_i24(i24 %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store 3 into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i24 %arg0, i24 addrspace(1)* undef @@ -262,7 +262,7 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store 3 into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i24 %arg0, i24 addrspace(1)* undef @@ -278,7 +278,7 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store 3 into `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i24 %arg0, i24 addrspace(1)* undef @@ -292,7 +292,7 @@ define void @void_func_i32(i32 %arg0) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i32 %arg0, i32 addrspace(1)* undef @@ -307,7 +307,7 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i32 %arg0, i32 addrspace(1)* undef @@ -322,7 +322,7 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i32 %arg0, i32 addrspace(1)* undef @@ -336,7 +336,7 @@ define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store 4 into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef @@ -353,7 +353,7 @@ define void @void_func_i48(i48 %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store 6 into `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store i48 %arg0, i48 addrspace(1)* undef @@ -373,7 +373,7 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] %ext = zext i48 %arg0 to i64 @@ -395,7 +395,7 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] %ext = sext i48 %arg0 to i64 @@ -413,7 +413,7 @@ define void @void_func_i64(i64 %arg0) #0 { ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store i64 %arg0, i64 addrspace(1)* undef @@ -431,7 +431,7 @@ define void @void_func_i95(i95 %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store 12 into `i95 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] store i95 %arg0, i95 addrspace(1)* undef @@ -452,7 +452,7 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) ; CHECK: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] %ext = zext i95 %arg0 to i96 @@ -475,7 +475,7 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 { ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) ; CHECK: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] - ; CHECK: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] %ext = sext i95 %arg0 to i96 @@ -494,7 +494,7 @@ define void @void_func_i96(i96 %arg0) #0 { ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] store i96 %arg0, i96 addrspace(1)* undef @@ -510,7 +510,7 @@ define void @void_func_p0i8(i8* %arg0) #0 { ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](p0), [[DEF]](p1) :: (store 8 into `i8* addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store i8* %arg0, i8* addrspace(1)* undef @@ -526,7 +526,7 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](p1), [[DEF]](p1) :: (store 8 into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store i8 addrspace(1)* %arg0, i8 addrspace(1)* addrspace(1)* undef @@ -541,7 +541,7 @@ define void @void_func_f16(half %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `half addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store half %arg0, half addrspace(1)* undef @@ -555,7 +555,7 @@ define void @void_func_f32(float %arg0) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store float %arg0, float addrspace(1)* undef @@ -571,7 +571,7 @@ define void @void_func_f64(double %arg0) #0 { ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `double addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store double %arg0, double addrspace(1)* undef @@ -587,7 +587,7 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef @@ -604,7 +604,7 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store 6 into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef @@ -622,7 +622,7 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 { ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store 9 into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef @@ -641,7 +641,7 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store 2 into `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef @@ -662,7 +662,7 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store 3 into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef @@ -685,7 +685,7 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store 4 into `<4 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef @@ -701,7 +701,7 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store 8 into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x i8 addrspace(3)*> %arg0, <2 x i8 addrspace(3)*> addrspace(1)* undef @@ -718,7 +718,7 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef @@ -736,7 +736,7 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef @@ -755,7 +755,7 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store 20 into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] ; CHECK: S_SETPC_B64_return [[COPY6]] store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef @@ -777,7 +777,7 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] ; CHECK: S_SETPC_B64_return [[COPY9]] store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef @@ -807,7 +807,7 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] ; CHECK: S_SETPC_B64_return [[COPY17]] store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef @@ -853,7 +853,7 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY33]] store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -898,11 +898,11 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store 132 into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY33]] store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef @@ -922,7 +922,7 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef @@ -942,7 +942,7 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store 16 into `<2 x i8*> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <2 x i8*> %arg0, <2 x i8*> addrspace(1)* undef @@ -962,7 +962,7 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <2 x i8 addrspace(1)*> %arg0, <2 x i8 addrspace(1)*> addrspace(1)* undef @@ -985,7 +985,7 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] ; CHECK: S_SETPC_B64_return [[COPY7]] store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef @@ -1011,7 +1011,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] ; CHECK: S_SETPC_B64_return [[COPY9]] store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef @@ -1040,7 +1040,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) ; CHECK: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store 40 into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] ; CHECK: S_SETPC_B64_return [[COPY11]] store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef @@ -1078,7 +1078,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] ; CHECK: S_SETPC_B64_return [[COPY17]] store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef @@ -1140,7 +1140,7 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY33]] store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef @@ -1154,7 +1154,7 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef @@ -1172,7 +1172,7 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef @@ -1188,7 +1188,7 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef @@ -1207,7 +1207,7 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef @@ -1225,7 +1225,7 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef @@ -1247,7 +1247,7 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] ; CHECK: S_SETPC_B64_return [[COPY9]] store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef @@ -1293,13 +1293,13 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store 130 into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY33]] store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef @@ -1315,7 +1315,7 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <2 x float> %arg0, <2 x float> addrspace(1)* undef @@ -1332,7 +1332,7 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] store <3 x float> %arg0, <3 x float> addrspace(1)* undef @@ -1350,7 +1350,7 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <4 x float> %arg0, <4 x float> addrspace(1)* undef @@ -1372,7 +1372,7 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] ; CHECK: S_SETPC_B64_return [[COPY9]] store <8 x float> %arg0, <8 x float> addrspace(1)* undef @@ -1402,7 +1402,7 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] ; CHECK: S_SETPC_B64_return [[COPY17]] store <16 x float> %arg0, <16 x float> addrspace(1)* undef @@ -1422,7 +1422,7 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <2 x double> %arg0, <2 x double> addrspace(1)* undef @@ -1445,7 +1445,7 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] ; CHECK: S_SETPC_B64_return [[COPY7]] store <3 x double> %arg0, <3 x double> addrspace(1)* undef @@ -1471,7 +1471,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] ; CHECK: S_SETPC_B64_return [[COPY9]] store <4 x double> %arg0, <4 x double> addrspace(1)* undef @@ -1509,7 +1509,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] ; CHECK: S_SETPC_B64_return [[COPY17]] store <8 x double> %arg0, <8 x double> addrspace(1)* undef @@ -1571,7 +1571,7 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY33]] store <16 x double> %arg0, <16 x double> addrspace(1)* undef @@ -1585,7 +1585,7 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store <2 x half> %arg0, <2 x half> addrspace(1)* undef @@ -1603,7 +1603,7 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <3 x half> %arg0, <3 x half> addrspace(1)* undef @@ -1619,7 +1619,7 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store <4 x half> %arg0, <4 x half> addrspace(1)* undef @@ -1637,7 +1637,7 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] store <8 x half> %arg0, <8 x half> addrspace(1)* undef @@ -1659,7 +1659,7 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] ; CHECK: S_SETPC_B64_return [[COPY9]] store <16 x half> %arg0, <16 x half> addrspace(1)* undef @@ -1679,9 +1679,9 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[MV]](s64), [[COPY5]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[MV]](s64), [[COPY5]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY6]] store volatile i32 %arg0, i32 addrspace(1)* undef @@ -1697,7 +1697,7 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `{ i32 } addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] store { i32 } %arg0, { i32 } addrspace(1)* undef @@ -1714,10 +1714,10 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef @@ -1732,14 +1732,14 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK: S_SETPC_B64_return [[COPY2]] %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 @@ -1759,21 +1759,21 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load 1 from %ir.arg1, align 4, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load 4 from %ir.arg1 + 4, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 @@ -1795,10 +1795,10 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 8 from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY4]] %arg0.load = load i32, i32 addrspace(5)* %arg0 @@ -1810,13 +1810,6 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64 - ; CHECK: frameInfo: - ; CHECK: maxAlignment: 64 - ; CHECK: fixedStack: - ; CHECK: - { id: 0, type: default, offset: 64, size: 2, alignment: 16, stack-id: default, - ; CHECK-NEXT: isImmutable: false, isAliased: false, callee-saved-register: '', - ; CHECK: - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - ; CHECK-NEXT: isImmutable: false, isAliased: false, callee-saved-register: '', ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -1826,10 +1819,10 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) % ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load 1 from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s8), [[C]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY4]] %arg0.load = load i8, i8 addrspace(5)* %arg0 @@ -1842,13 +1835,6 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) % ; Make sure the alignment is taken from the correct parameter. define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byval([3 x i32]) align 128 %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64 - ; CHECK: frameInfo: - ; CHECK: maxAlignment: 128 - ; CHECK: fixedStack: - ; CHECK-NEXT: - { id: 0, type: default, offset: 64, size: 2, alignment: 16, stack-id: default, - ; CHECK-NEXT: isImmutable: false, isAliased: false, callee-saved-register: '', - ; CHECK: - { id: 1, type: default, offset: 0, size: 12, alignment: 16, stack-id: default, - ; CHECK-NEXT: isImmutable: false, isAliased: false, callee-saved-register: '', ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 @@ -1858,22 +1844,22 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load 4 from %ir.arg0 + 8, addrspace 5) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) - ; CHECK: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) + ; CHECK: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY4]] %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0 @@ -1886,13 +1872,6 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva ; byval argument after non-byval stack passed argument define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrspace(5)* byval(i8) align 8 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8 - ; CHECK: frameInfo: - ; CHECK: maxAlignment: 8 - ; CHECK: fixedStack: - ; CHECK: - { id: 0, type: default, offset: 8, size: 1, alignment: 8, stack-id: default, - ; CHECK-NEXT: isImmutable: false, isAliased: false, callee-saved-register: '', - ; CHECK: - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, - ; CHECK-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '', ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1929,15 +1908,15 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK: [[COPY33:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load 1 from %ir.arg2, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) + ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY33]] ; CHECK: S_SETPC_B64_return [[COPY35]] store i32 %arg1, i32 addrspace(1)* null @@ -1949,13 +1928,6 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs ; byval argument before non-byval stack passed argument define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* byval(i8) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32 - ; CHECK: frameInfo: - ; CHECK: maxAlignment: 4 - ; CHECK: fixedStack: - ; CHECK-NEXT: - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, - ; CHECK-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '', - ; CHECK: - { id: 1, type: default, offset: 0, size: 1, alignment: 16, stack-id: default, - ; CHECK-NEXT: isImmutable: false, isAliased: false, callee-saved-register: '', ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1994,13 +1966,13 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY33:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load 1 from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY33]] ; CHECK: S_SETPC_B64_return [[COPY35]] store i32 %arg2, i32 addrspace(1)* null @@ -2047,19 +2019,19 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD]](s32), [[COPY33]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[MV]](s64), [[COPY34]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD]](s32), [[COPY33]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[MV]](s64), [[COPY34]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY35]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2107,26 +2079,26 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s8) from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s32) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 2 from %fixed-stack.2, align 4, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.2, align 4, addrspace 5) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s16) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.0, align 4, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.0, align 4, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY35:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY36:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC]](s1), [[COPY33]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY34]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD2]](s16), [[COPY35]](p1) :: (volatile store 2 into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD3]](s16), [[COPY36]](p1) :: (volatile store 2 into `half addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s1), [[COPY33]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY34]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD2]](s16), [[COPY35]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD3]](s16), [[COPY36]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY37:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY37]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2175,16 +2147,16 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD]](p3), [[COPY33]](p1) :: (volatile store 4 into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](p5), [[COPY34]](p1) :: (volatile store 4 into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD]](p3), [[COPY33]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](p5), [[COPY34]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY35]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2231,22 +2203,22 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY33]](p1) :: (volatile store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY34]](p1) :: (volatile store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY33]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY34]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY35]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2293,16 +2265,16 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<2 x s16>), [[COPY33]](p1) :: (volatile store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<2 x s16>), [[COPY34]](p1) :: (volatile store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD]](<2 x s16>), [[COPY33]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](<2 x s16>), [[COPY34]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY35]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2349,24 +2321,24 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) @@ -2374,9 +2346,9 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY33]](p1) :: (volatile store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY34]](p1) :: (volatile store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY33]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY34]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY35]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2423,30 +2395,30 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY33]](p1) :: (volatile store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY34]](p1) :: (volatile store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY33]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY34]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY35]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2493,46 +2465,46 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.15, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.14, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.13, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.12, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.11, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.10, addrspace 5) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.9, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) + ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) + ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY33]](p1) :: (volatile store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY34]](p1) :: (volatile store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY33]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY34]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY35]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2579,78 +2551,78 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.31, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.31, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.30, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.30, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.29, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.29, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.28, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.28, addrspace 5) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.27, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.27, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.26, addrspace 5) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.26, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.25, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.25, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.24, addrspace 5) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.24, addrspace 5) ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.23, align 16, addrspace 5) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.23, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.22, addrspace 5) + ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.22, addrspace 5) ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.21, align 8, addrspace 5) + ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.21, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.20, addrspace 5) + ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.20, addrspace 5) ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.19, align 16, addrspace 5) + ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.19, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.18, addrspace 5) + ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.18, addrspace 5) ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.17, align 8, addrspace 5) + ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.17, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.16, addrspace 5) + ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.16, addrspace 5) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.15, align 16, addrspace 5) + ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load 4 from %fixed-stack.14, addrspace 5) + ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5) ; CHECK: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.13, align 8, addrspace 5) + ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load 4 from %fixed-stack.12, addrspace 5) + ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5) ; CHECK: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.11, align 16, addrspace 5) + ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load 4 from %fixed-stack.10, addrspace 5) + ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5) ; CHECK: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.9, align 8, addrspace 5) + ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) + ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) ; CHECK: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) + ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) ; CHECK: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; CHECK: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) + ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) ; CHECK: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY33]](p1) :: (volatile store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY34]](p1) :: (volatile store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY33]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY34]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY35]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef @@ -2678,10 +2650,10 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[COPY3]](s32), [[COPY5]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[COPY3]](s32), [[COPY5]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY6]] %arg0.0 = extractelement <3 x float> %arg0, i32 0 @@ -2711,10 +2683,10 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) - ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] %arg0.0 = extractelement <3 x i32> %arg0, i32 0 @@ -2768,7 +2740,7 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) ; CHECK: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] ; CHECK: S_SETPC_B64_return [[COPY17]] store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef @@ -2814,44 +2786,44 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 2 from %fixed-stack.15, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s16) from %fixed-stack.15, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 2 from %fixed-stack.14, align 4, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.14, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.13, align 8, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s16) from %fixed-stack.13, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.12, align 4, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.12, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; CHECK: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 2 from %fixed-stack.11, align 16, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.11, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; CHECK: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 2 from %fixed-stack.10, align 4, addrspace 5) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s16) from %fixed-stack.10, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; CHECK: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 2 from %fixed-stack.9, align 8, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s16) from %fixed-stack.9, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; CHECK: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 2 from %fixed-stack.8, align 4, addrspace 5) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s16) from %fixed-stack.8, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; CHECK: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 2 from %fixed-stack.7, align 16, addrspace 5) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s16) from %fixed-stack.7, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; CHECK: [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 2 from %fixed-stack.6, align 4, addrspace 5) + ; CHECK: [[LOAD9:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s16) from %fixed-stack.6, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; CHECK: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 2 from %fixed-stack.5, align 8, addrspace 5) + ; CHECK: [[LOAD10:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s16) from %fixed-stack.5, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; CHECK: [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 2 from %fixed-stack.4, align 4, addrspace 5) + ; CHECK: [[LOAD11:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s16) from %fixed-stack.4, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 2 from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD12:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s16) from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 2 from %fixed-stack.2, align 4, addrspace 5) + ; CHECK: [[LOAD13:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s16) from %fixed-stack.2, align 4, addrspace 5) ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 2 from %fixed-stack.1, align 8, addrspace 5) + ; CHECK: [[LOAD14:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s16) from %fixed-stack.1, align 8, addrspace 5) ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 2 from %fixed-stack.0, align 4, addrspace 5) + ; CHECK: [[LOAD15:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s16) from %fixed-stack.0, align 4, addrspace 5) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD]](s16), [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16) ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[COPY33]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[COPY33]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] ; CHECK: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index ebf1b1596d0fc..1aeeece198f7d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -16,7 +16,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK: [[LOAD:%[0-9]+]]:sreg_64(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load 8 from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sreg_64(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4) ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 1315f1bc275eb..30a857437c2eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -292,9 +292,9 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY %6 - ; CHECK: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY10]] %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll index 5c94c6fe5bc04..78a45678db185 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -14,7 +14,7 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) + ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) @@ -31,7 +31,7 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) + ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) @@ -49,7 +49,7 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) + ; CHECK: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) @@ -67,7 +67,7 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1) + ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false) @@ -84,7 +84,7 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1) + ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false) @@ -102,7 +102,7 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1) + ; CHECK: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false) @@ -120,7 +120,7 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) + ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) @@ -137,7 +137,7 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) + ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) @@ -155,7 +155,7 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) + ; CHECK: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) @@ -173,7 +173,7 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store 1 into %ir.dst, addrspace 1) + ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false) @@ -192,7 +192,7 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst, addrspace 1) + ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false) @@ -211,7 +211,7 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst, addrspace 1) + ; CHECK: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK: S_SETPC_B64_return [[COPY4]] call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false) @@ -228,7 +228,7 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store 1 into %ir.dst, addrspace 3) + ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false) @@ -244,7 +244,7 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 3) + ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false) @@ -261,7 +261,7 @@ define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) { ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 3) + ; CHECK: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK: S_SETPC_B64_return [[COPY3]] call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index 3809732b5899a..2d1e8e4c371c6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -28,7 +28,7 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN: $vgpr0 = COPY [[ADD]](s32) ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -42,10 +42,7 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32 - ; GCN: frameInfo: - ; GCN: hasCalls: false - ; GCN: hasTailCall: true -; GCN: bb.1.entry: + ; GCN: bb.1.entry: ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -106,7 +103,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] @@ -157,7 +154,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] @@ -249,7 +246,7 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GCN: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load 8 from %ir.0, align 16, addrspace 4) + ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.0, align 16, addrspace 4) ; GCN: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32) ; GCN: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32) ; GCN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 @@ -303,7 +300,7 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 4 from %ir.arg1, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] ; GCN: $vgpr0 = COPY [[ADD]](s32) ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] @@ -346,7 +343,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C]](s32) ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store 4 into stack, addrspace 5), (dereferenceable load 4 from %ir.b.byval, addrspace 5) + ; GCN: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) ; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) @@ -415,9 +412,9 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 16, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32) @@ -433,7 +430,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN: $vgpr0 = COPY [[COPY8]](s32) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store 4 into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load 4 from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) + ; GCN: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) @@ -486,11 +483,11 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 16, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] @@ -550,11 +547,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.5, align 16, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.3, align 8, addrspace 5) + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -597,11 +594,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN: $vgpr29 = COPY [[COPY37]](s32) ; GCN: $vgpr30 = COPY [[COPY38]](s32) ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store 4 into %fixed-stack.2, align 16, addrspace 5) + ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store 4 into %fixed-stack.0, align 8, addrspace 5) + ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) @@ -662,17 +659,17 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.5, align 16, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.3, align 8, addrspace 5) + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] @@ -714,11 +711,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN: $vgpr29 = COPY [[COPY37]](s32) ; GCN: $vgpr30 = COPY [[COPY38]](s32) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store 4 into %fixed-stack.2, align 16, addrspace 5) + ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store 4 into %fixed-stack.0, align 8, addrspace 5) + ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) @@ -801,13 +798,13 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C2]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -946,17 +943,17 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.5, align 16, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.3, align 8, addrspace 5) + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] @@ -998,11 +995,11 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN: $vgpr29 = COPY [[COPY37]](s32) ; GCN: $vgpr30 = COPY [[COPY38]](s32) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store 4 into %fixed-stack.2, align 16, addrspace 5) + ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store 4 into %fixed-stack.0, align 8, addrspace 5) + ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) @@ -1066,26 +1063,26 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.9, align 16, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.7, align 8, addrspace 5) + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.7, align 8, addrspace 5) ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) + ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.5, align 16, addrspace 5) + ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.3, align 8, addrspace 5) + ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] @@ -1127,11 +1124,11 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN: $vgpr29 = COPY [[C1]](s32) ; GCN: $vgpr30 = COPY [[C1]](s32) ; GCN: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store 4 into %fixed-stack.2, align 16, addrspace 5) + ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store 4 into %fixed-stack.0, align 8, addrspace 5) + ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) @@ -1197,88 +1194,88 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.35, align 16, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.35, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.34, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.34, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33 - ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.33, align 8, addrspace 5) + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.33, align 8, addrspace 5) ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 - ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.32, addrspace 5) + ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.32, addrspace 5) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.31, align 16, addrspace 5) + ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.31, align 16, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 - ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.30, addrspace 5) + ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.30, addrspace 5) ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.29, align 8, addrspace 5) + ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.29, align 8, addrspace 5) ; GCN: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 - ; GCN: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.28, addrspace 5) + ; GCN: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.28, addrspace 5) ; GCN: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; GCN: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.27, align 16, addrspace 5) + ; GCN: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.27, align 16, addrspace 5) ; GCN: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 - ; GCN: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.26, addrspace 5) + ; GCN: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.26, addrspace 5) ; GCN: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; GCN: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.25, align 8, addrspace 5) + ; GCN: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.25, align 8, addrspace 5) ; GCN: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 - ; GCN: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.24, addrspace 5) + ; GCN: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.24, addrspace 5) ; GCN: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; GCN: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.23, align 16, addrspace 5) + ; GCN: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.23, align 16, addrspace 5) ; GCN: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 - ; GCN: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.22, addrspace 5) + ; GCN: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.22, addrspace 5) ; GCN: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; GCN: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.21, align 8, addrspace 5) + ; GCN: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.21, align 8, addrspace 5) ; GCN: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 - ; GCN: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.20, addrspace 5) + ; GCN: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.20, addrspace 5) ; GCN: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; GCN: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.19, align 16, addrspace 5) + ; GCN: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.19, align 16, addrspace 5) ; GCN: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 - ; GCN: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load 4 from %fixed-stack.18, addrspace 5) + ; GCN: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.18, addrspace 5) ; GCN: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; GCN: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.17, align 8, addrspace 5) + ; GCN: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.17, align 8, addrspace 5) ; GCN: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; GCN: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load 4 from %fixed-stack.16, addrspace 5) + ; GCN: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.16, addrspace 5) ; GCN: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; GCN: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.15, align 16, addrspace 5) + ; GCN: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.15, align 16, addrspace 5) ; GCN: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; GCN: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load 4 from %fixed-stack.14, addrspace 5) + ; GCN: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.14, addrspace 5) ; GCN: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; GCN: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.13, align 8, addrspace 5) + ; GCN: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.13, align 8, addrspace 5) ; GCN: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; GCN: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load 4 from %fixed-stack.12, addrspace 5) + ; GCN: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.12, addrspace 5) ; GCN: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; GCN: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.11, align 16, addrspace 5) + ; GCN: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.11, align 16, addrspace 5) ; GCN: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; GCN: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load 4 from %fixed-stack.10, addrspace 5) + ; GCN: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.10, addrspace 5) ; GCN: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; GCN: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.9, align 8, addrspace 5) + ; GCN: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.9, align 8, addrspace 5) ; GCN: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; GCN: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) + ; GCN: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.8, addrspace 5) ; GCN: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; GCN: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; GCN: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.7, align 16, addrspace 5) ; GCN: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; GCN: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) + ; GCN: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.6, addrspace 5) ; GCN: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; GCN: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.5, align 8, addrspace 5) ; GCN: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; GCN: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; GCN: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 - ; GCN: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) + ; GCN: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GCN: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0 ; GCN: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.alloca1 - ; GCN: G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store 4 into %ir.alloca0, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store (s32) into %ir.alloca0, addrspace 5) ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store 4 into %ir.alloca0 + 4, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.alloca0 + 4, addrspace 5) ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store 4 into %ir.alloca0 + 8, addrspace 5) - ; GCN: G_STORE [[C1]](s64), [[FRAME_INDEX35]](p5) :: (store 8 into %ir.alloca1, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca0 + 8, addrspace 5) + ; GCN: G_STORE [[C1]](s64), [[FRAME_INDEX35]](p5) :: (store (s64) into %ir.alloca1, addrspace 5) ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) - ; GCN: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store 8 into %ir.alloca1 + 8, addrspace 5) + ; GCN: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] @@ -1291,10 +1288,10 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN: $vgpr0 = COPY [[COPY8]](s32) ; GCN: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GCN: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store 12 into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load 12 from %ir.alloca0, align 16, addrspace 5) + ; GCN: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) ; GCN: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store 16 into %fixed-stack.0, addrspace 5), (dereferenceable load 16 from %ir.alloca1, align 8, addrspace 5) + ; GCN: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) @@ -1362,84 +1359,84 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36 - ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.36, align 16, addrspace 5) + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.36, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.35, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.35, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 - ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.34, align 8, addrspace 5) + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.34, align 8, addrspace 5) ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33 - ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.33, addrspace 5) + ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.33, addrspace 5) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 - ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.32, align 16, addrspace 5) + ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.32, align 16, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 - ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.31, addrspace 5) + ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.31, addrspace 5) ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 - ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.30, align 8, addrspace 5) + ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.30, align 8, addrspace 5) ; GCN: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 - ; GCN: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.29, addrspace 5) + ; GCN: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load (s32) from %fixed-stack.29, addrspace 5) ; GCN: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 - ; GCN: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.28, align 16, addrspace 5) + ; GCN: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.28, align 16, addrspace 5) ; GCN: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 - ; GCN: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.27, addrspace 5) + ; GCN: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load (s32) from %fixed-stack.27, addrspace 5) ; GCN: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 - ; GCN: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.26, align 8, addrspace 5) + ; GCN: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load (s32) from %fixed-stack.26, align 8, addrspace 5) ; GCN: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 - ; GCN: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.25, addrspace 5) + ; GCN: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load (s32) from %fixed-stack.25, addrspace 5) ; GCN: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 - ; GCN: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.24, align 16, addrspace 5) + ; GCN: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load (s32) from %fixed-stack.24, align 16, addrspace 5) ; GCN: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 - ; GCN: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.23, addrspace 5) + ; GCN: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load (s32) from %fixed-stack.23, addrspace 5) ; GCN: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 - ; GCN: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.22, align 8, addrspace 5) + ; GCN: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load (s32) from %fixed-stack.22, align 8, addrspace 5) ; GCN: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 - ; GCN: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.21, addrspace 5) + ; GCN: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load (s32) from %fixed-stack.21, addrspace 5) ; GCN: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 - ; GCN: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.20, align 16, addrspace 5) + ; GCN: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.20, align 16, addrspace 5) ; GCN: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 - ; GCN: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load 4 from %fixed-stack.19, addrspace 5) + ; GCN: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load (s32) from %fixed-stack.19, addrspace 5) ; GCN: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 - ; GCN: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.18, align 8, addrspace 5) + ; GCN: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load (s32) from %fixed-stack.18, align 8, addrspace 5) ; GCN: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 - ; GCN: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load 4 from %fixed-stack.17, addrspace 5) + ; GCN: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load (s32) from %fixed-stack.17, addrspace 5) ; GCN: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 - ; GCN: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.16, align 16, addrspace 5) + ; GCN: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load (s32) from %fixed-stack.16, align 16, addrspace 5) ; GCN: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 - ; GCN: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load 4 from %fixed-stack.15, addrspace 5) + ; GCN: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load (s32) from %fixed-stack.15, addrspace 5) ; GCN: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 - ; GCN: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.14, align 8, addrspace 5) + ; GCN: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load (s32) from %fixed-stack.14, align 8, addrspace 5) ; GCN: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 - ; GCN: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load 4 from %fixed-stack.13, addrspace 5) + ; GCN: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load (s32) from %fixed-stack.13, addrspace 5) ; GCN: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 - ; GCN: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.12, align 16, addrspace 5) + ; GCN: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load (s32) from %fixed-stack.12, align 16, addrspace 5) ; GCN: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 - ; GCN: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load 4 from %fixed-stack.11, addrspace 5) + ; GCN: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load (s32) from %fixed-stack.11, addrspace 5) ; GCN: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 - ; GCN: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.10, align 8, addrspace 5) + ; GCN: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load (s32) from %fixed-stack.10, align 8, addrspace 5) ; GCN: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 - ; GCN: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load 4 from %fixed-stack.9, addrspace 5) + ; GCN: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load (s32) from %fixed-stack.9, addrspace 5) ; GCN: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 - ; GCN: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.8, align 16, addrspace 5) + ; GCN: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load (s32) from %fixed-stack.8, align 16, addrspace 5) ; GCN: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 - ; GCN: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load 4 from %fixed-stack.7, addrspace 5) + ; GCN: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load (s32) from %fixed-stack.7, addrspace 5) ; GCN: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 - ; GCN: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.6, align 8, addrspace 5) + ; GCN: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load (s32) from %fixed-stack.6, align 8, addrspace 5) ; GCN: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 - ; GCN: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.5, addrspace 5) + ; GCN: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load (s32) from %fixed-stack.5, addrspace 5) ; GCN: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 - ; GCN: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load 4 from %fixed-stack.4, align 16, addrspace 5) + ; GCN: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) ; GCN: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; GCN: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load 4 from %fixed-stack.3, addrspace 5) + ; GCN: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca - ; GCN: G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store 4 into %ir.alloca, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store (s32) into %ir.alloca, addrspace 5) ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store 4 into %ir.alloca + 4, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.alloca + 4, addrspace 5) ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) - ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store 4 into %ir.alloca + 8, addrspace 5) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] @@ -1451,7 +1448,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; GCN: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store 12 into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load 12 from %ir.alloca, align 16, addrspace 5) + ; GCN: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) ; GCN: $vgpr0 = COPY [[C1]](s32) ; GCN: $vgpr1 = COPY [[C1]](s32) ; GCN: $vgpr2 = COPY [[C1]](s32) @@ -1484,9 +1481,9 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN: $vgpr29 = COPY [[C1]](s32) ; GCN: $vgpr30 = COPY [[C1]](s32) ; GCN: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN: G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN: G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll index 187910f588303..6f4e66a36fbdc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll @@ -13,8 +13,8 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) { ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1 ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1) - ; CHECK: G_STORE [[INT]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[SEXT]](s32), [[COPY2]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[SEXT]](s32), [[COPY2]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: S_ENDPGM 0 %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true) %extract0 = extractvalue { float, i1 } %call, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index 77f7faa0abe1d..b6ecab7b7eb3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -179,7 +179,7 @@ body: | ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) @@ -262,7 +262,7 @@ body: | ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) @@ -468,14 +468,14 @@ body: | ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] ; VI: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C2]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 64, addrspace 4) ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]] ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) ; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir index b2526ca975b38..9230cee0f2c51 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir @@ -12,13 +12,13 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]] ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr3 - %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1) + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1) S_ENDPGM 0, implicit %3, implicit %4 ... @@ -34,13 +34,13 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 4) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32)) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]] ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr3 - %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 0) + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 0) S_ENDPGM 0, implicit %3, implicit %4 ... @@ -55,13 +55,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 3) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]] ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) %0:_(p3) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 3) + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3) S_ENDPGM 0, implicit %3, implicit %4 ... @@ -77,13 +77,13 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64) - ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]] ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5 - %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1) + %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1) S_ENDPGM 0, implicit %3, implicit %4 ... @@ -98,13 +98,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 3) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]] ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1) %0:_(p3) = COPY $vgpr0 %1:_(s64) = COPY $vgpr1_vgpr2 %2:_(s64) = COPY $vgpr3_vgpr4 - %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 3) + %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3) S_ENDPGM 0, implicit %3, implicit %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir index 2cadc7afa9b5b..b12cc3e5babc3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir @@ -12,11 +12,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -29,11 +29,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 8, addrspace 3) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst (s64), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 3) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 3) ... --- @@ -47,11 +47,11 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 1) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -65,11 +65,11 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 8, addrspace 1) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst (s64), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 1) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 1) ... --- @@ -84,11 +84,11 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst 4) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 0) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -103,9 +103,9 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) - ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst 8) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst (s64)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 0) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s64), addrspace 0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir index a28718de322f7..c4139fe42617c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-add.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_add_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_add_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_add_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_add_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir index 5ea7c58dbae9c..6c169313c8932 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-and.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_and_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_and_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_and_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_and_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir index 0abf6f613adab..8517e0f62a3ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir @@ -4,7 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s -# ERR: remark: :0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p1), %1:_ :: (load store seq_cst 4, addrspace 1) (in function: atomicrmw_fadd_global_i32) +# ERR: remark: :0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p1), %1:_ :: (load store seq_cst (s32), addrspace 1) (in function: atomicrmw_fadd_global_i32) --- name: atomicrmw_fadd_global_i32 @@ -15,8 +15,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_fadd_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir index e760b795b1bde..958faafc9cc6a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir @@ -4,7 +4,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s -# ERR: remark: :0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p3), %1:_ :: (load store seq_cst 4, addrspace 3) (in function: atomicrmw_fadd_local_i32) +# ERR: remark: :0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p3), %1:_ :: (load store seq_cst (s32), addrspace 3) (in function: atomicrmw_fadd_local_i32) --- name: atomicrmw_fadd_local_i32 @@ -15,8 +15,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_fadd_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir index c1b4648ae0ccc..13f1c40edd7b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-max.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_max_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_max_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_max_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_max_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir index 08ec0a0aa14db..8077e22156993 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-min.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_min_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_min_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_min_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_min_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-nand.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-nand.mir index 845bffd1d9f35..e90c3e14158bd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-nand.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-nand.mir @@ -3,7 +3,7 @@ # This needs to be expanded into a cmpxchg loop. # TODO: Will AtomicExpand still do this? -# ERROR: LLVM ERROR: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_NAND %0:_(p1), %1:_ :: (load store seq_cst 4, addrspace 1) (in function: atomicrmw_nand_global_i32) +# ERROR: LLVM ERROR: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_NAND %0:_(p1), %1:_ :: (load store seq_cst (s32), addrspace 1) (in function: atomicrmw_nand_global_i32) --- name: atomicrmw_nand_global_i32 @@ -14,9 +14,9 @@ body: | ; CHECK-LABEL: name: atomicrmw_nand_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_NAND:%[0-9]+]]:_(s32) = G_ATOMICRMW_NAND [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_NAND:%[0-9]+]]:_(s32) = G_ATOMICRMW_NAND [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_NAND %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_NAND %0, %1 :: (load store seq_cst (s32), addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir index e214ee6c8d2e7..4658120255286 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-or.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_or_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_or_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_or_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_or_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir index 974cd3f12e3b6..0c9c79a7cfa77 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-sub.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_sub_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_sub_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_sub_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_sub_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir index 0fc62a4ec895d..aa5954800113d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umax.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_umax_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_umax_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_umax_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_umax_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir index a72b6d7f152b1..0c09a67cb76ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-umin.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_umin_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_umin_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_umin_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_umin_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir index 9f2be730569c1..7891e2f9e469f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg-flat.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -o - %s | FileCheck %s # RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s -# ERROR: LLVM ERROR: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_XCHG %0:_(p0), %1:_ :: (load store seq_cst 4) (in function: atomicrmw_xchg_flat_i32) +# ERROR: LLVM ERROR: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_XCHG %0:_(p0), %1:_ :: (load store seq_cst (s32)) (in function: atomicrmw_xchg_flat_i32) --- @@ -14,10 +14,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_xchg_flat_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -29,8 +29,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_xchg_flat_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[COPY1]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir index 424ac334cc682..567d7c856052f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xchg.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_xchg_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_xchg_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_xchg_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_xchg_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir index ce5f78f651377..06781a0a2e520 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-xor.mir @@ -11,10 +11,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_xor_global_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -26,10 +26,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_xor_local_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) ... --- @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: atomicrmw_xor_global_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p1), [[COPY1]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -56,8 +56,8 @@ body: | ; CHECK-LABEL: name: atomicrmw_xor_local_i64 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 - ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p3), [[COPY1]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index 685917bba80e2..919fb03140470 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -1398,21 +1398,21 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_7_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (s512), align 4, addrspace 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 from unknown-address + 64, align 4, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s512) from unknown-address + 64, align 4, addrspace 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 from unknown-address + 128, align 4, addrspace 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s512) from unknown-address + 128, align 4, addrspace 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 from unknown-address + 192, align 4, addrspace 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s512) from unknown-address + 192, align 4, addrspace 4) ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 224 ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 7 - %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 S_ENDPGM 0, implicit %3 ... @@ -1426,21 +1426,21 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_33_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (s512), align 4, addrspace 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 from unknown-address + 64, align 4, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s512) from unknown-address + 64, align 4, addrspace 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 from unknown-address + 128, align 4, addrspace 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s512) from unknown-address + 128, align 4, addrspace 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 from unknown-address + 192, align 4, addrspace 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s512) from unknown-address + 192, align 4, addrspace 4) ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](<16 x s32>), 32 ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 - %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 S_ENDPGM 0, implicit %3 ... @@ -1460,7 +1460,7 @@ body: | ; CHECK: S_ENDPGM 0, implicit [[COPY1]](s32), implicit [[DEF]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 64 - %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 %4:_(s32) = G_CONSTANT i32 65 %5:_(s32) = G_EXTRACT_VECTOR_ELT %2, %4 @@ -1476,221 +1476,221 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_33_v64p3 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (s512), align 4, addrspace 4) ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 from unknown-address + 64, align 4, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s512) from unknown-address + 64, align 4, addrspace 4) ; CHECK: [[BITCAST1:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD1]](<16 x s32>) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 from unknown-address + 128, align 4, addrspace 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s512) from unknown-address + 128, align 4, addrspace 4) ; CHECK: [[BITCAST2:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD2]](<16 x s32>) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 from unknown-address + 192, align 4, addrspace 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s512) from unknown-address + 192, align 4, addrspace 4) ; CHECK: [[BITCAST3:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD3]](<16 x s32>) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; CHECK: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST]](<16 x p3>) ; CHECK: [[UV16:%[0-9]+]]:_(p3), [[UV17:%[0-9]+]]:_(p3), [[UV18:%[0-9]+]]:_(p3), [[UV19:%[0-9]+]]:_(p3), [[UV20:%[0-9]+]]:_(p3), [[UV21:%[0-9]+]]:_(p3), [[UV22:%[0-9]+]]:_(p3), [[UV23:%[0-9]+]]:_(p3), [[UV24:%[0-9]+]]:_(p3), [[UV25:%[0-9]+]]:_(p3), [[UV26:%[0-9]+]]:_(p3), [[UV27:%[0-9]+]]:_(p3), [[UV28:%[0-9]+]]:_(p3), [[UV29:%[0-9]+]]:_(p3), [[UV30:%[0-9]+]]:_(p3), [[UV31:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST1]](<16 x p3>) ; CHECK: [[UV32:%[0-9]+]]:_(p3), [[UV33:%[0-9]+]]:_(p3), [[UV34:%[0-9]+]]:_(p3), [[UV35:%[0-9]+]]:_(p3), [[UV36:%[0-9]+]]:_(p3), [[UV37:%[0-9]+]]:_(p3), [[UV38:%[0-9]+]]:_(p3), [[UV39:%[0-9]+]]:_(p3), [[UV40:%[0-9]+]]:_(p3), [[UV41:%[0-9]+]]:_(p3), [[UV42:%[0-9]+]]:_(p3), [[UV43:%[0-9]+]]:_(p3), [[UV44:%[0-9]+]]:_(p3), [[UV45:%[0-9]+]]:_(p3), [[UV46:%[0-9]+]]:_(p3), [[UV47:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST2]](<16 x p3>) ; CHECK: [[UV48:%[0-9]+]]:_(p3), [[UV49:%[0-9]+]]:_(p3), [[UV50:%[0-9]+]]:_(p3), [[UV51:%[0-9]+]]:_(p3), [[UV52:%[0-9]+]]:_(p3), [[UV53:%[0-9]+]]:_(p3), [[UV54:%[0-9]+]]:_(p3), [[UV55:%[0-9]+]]:_(p3), [[UV56:%[0-9]+]]:_(p3), [[UV57:%[0-9]+]]:_(p3), [[UV58:%[0-9]+]]:_(p3), [[UV59:%[0-9]+]]:_(p3), [[UV60:%[0-9]+]]:_(p3), [[UV61:%[0-9]+]]:_(p3), [[UV62:%[0-9]+]]:_(p3), [[UV63:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST3]](<16 x p3>) - ; CHECK: G_STORE [[UV]](p3), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) + ; CHECK: G_STORE [[UV]](p3), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; CHECK: G_STORE [[UV1]](p3), [[PTR_ADD3]](p5) :: (store 4 into %stack.0 + 4, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV1]](p3), [[PTR_ADD3]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK: G_STORE [[UV2]](p3), [[PTR_ADD4]](p5) :: (store 4 into %stack.0 + 8, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV2]](p3), [[PTR_ADD4]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK: G_STORE [[UV3]](p3), [[PTR_ADD5]](p5) :: (store 4 into %stack.0 + 12, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV3]](p3), [[PTR_ADD5]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK: G_STORE [[UV4]](p3), [[PTR_ADD6]](p5) :: (store 4 into %stack.0 + 16, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV4]](p3), [[PTR_ADD6]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK: G_STORE [[UV5]](p3), [[PTR_ADD7]](p5) :: (store 4 into %stack.0 + 20, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV5]](p3), [[PTR_ADD7]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK: G_STORE [[UV6]](p3), [[PTR_ADD8]](p5) :: (store 4 into %stack.0 + 24, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV6]](p3), [[PTR_ADD8]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK: G_STORE [[UV7]](p3), [[PTR_ADD9]](p5) :: (store 4 into %stack.0 + 28, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV7]](p3), [[PTR_ADD9]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK: G_STORE [[UV8]](p3), [[PTR_ADD10]](p5) :: (store 4 into %stack.0 + 32, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV8]](p3), [[PTR_ADD10]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK: G_STORE [[UV9]](p3), [[PTR_ADD11]](p5) :: (store 4 into %stack.0 + 36, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV9]](p3), [[PTR_ADD11]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK: G_STORE [[UV10]](p3), [[PTR_ADD12]](p5) :: (store 4 into %stack.0 + 40, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV10]](p3), [[PTR_ADD12]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK: G_STORE [[UV11]](p3), [[PTR_ADD13]](p5) :: (store 4 into %stack.0 + 44, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV11]](p3), [[PTR_ADD13]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK: G_STORE [[UV12]](p3), [[PTR_ADD14]](p5) :: (store 4 into %stack.0 + 48, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV12]](p3), [[PTR_ADD14]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK: G_STORE [[UV13]](p3), [[PTR_ADD15]](p5) :: (store 4 into %stack.0 + 52, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV13]](p3), [[PTR_ADD15]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK: G_STORE [[UV14]](p3), [[PTR_ADD16]](p5) :: (store 4 into %stack.0 + 56, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV14]](p3), [[PTR_ADD16]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK: G_STORE [[UV15]](p3), [[PTR_ADD17]](p5) :: (store 4 into %stack.0 + 60, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV15]](p3), [[PTR_ADD17]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK: G_STORE [[UV16]](p3), [[PTR_ADD18]](p5) :: (store 4 into %stack.0 + 64, align 64, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV16]](p3), [[PTR_ADD18]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK: G_STORE [[UV17]](p3), [[PTR_ADD19]](p5) :: (store 4 into %stack.0 + 68, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV17]](p3), [[PTR_ADD19]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK: G_STORE [[UV18]](p3), [[PTR_ADD20]](p5) :: (store 4 into %stack.0 + 72, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV18]](p3), [[PTR_ADD20]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK: G_STORE [[UV19]](p3), [[PTR_ADD21]](p5) :: (store 4 into %stack.0 + 76, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV19]](p3), [[PTR_ADD21]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK: G_STORE [[UV20]](p3), [[PTR_ADD22]](p5) :: (store 4 into %stack.0 + 80, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV20]](p3), [[PTR_ADD22]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK: G_STORE [[UV21]](p3), [[PTR_ADD23]](p5) :: (store 4 into %stack.0 + 84, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV21]](p3), [[PTR_ADD23]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK: G_STORE [[UV22]](p3), [[PTR_ADD24]](p5) :: (store 4 into %stack.0 + 88, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV22]](p3), [[PTR_ADD24]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK: G_STORE [[UV23]](p3), [[PTR_ADD25]](p5) :: (store 4 into %stack.0 + 92, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV23]](p3), [[PTR_ADD25]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK: G_STORE [[UV24]](p3), [[PTR_ADD26]](p5) :: (store 4 into %stack.0 + 96, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV24]](p3), [[PTR_ADD26]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK: G_STORE [[UV25]](p3), [[PTR_ADD27]](p5) :: (store 4 into %stack.0 + 100, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV25]](p3), [[PTR_ADD27]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK: G_STORE [[UV26]](p3), [[PTR_ADD28]](p5) :: (store 4 into %stack.0 + 104, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV26]](p3), [[PTR_ADD28]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK: G_STORE [[UV27]](p3), [[PTR_ADD29]](p5) :: (store 4 into %stack.0 + 108, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV27]](p3), [[PTR_ADD29]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK: G_STORE [[UV28]](p3), [[PTR_ADD30]](p5) :: (store 4 into %stack.0 + 112, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV28]](p3), [[PTR_ADD30]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK: G_STORE [[UV29]](p3), [[PTR_ADD31]](p5) :: (store 4 into %stack.0 + 116, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV29]](p3), [[PTR_ADD31]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK: G_STORE [[UV30]](p3), [[PTR_ADD32]](p5) :: (store 4 into %stack.0 + 120, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV30]](p3), [[PTR_ADD32]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK: G_STORE [[UV31]](p3), [[PTR_ADD33]](p5) :: (store 4 into %stack.0 + 124, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV31]](p3), [[PTR_ADD33]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK: G_STORE [[UV32]](p3), [[PTR_ADD34]](p5) :: (store 4 into %stack.0 + 128, align 128, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV32]](p3), [[PTR_ADD34]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK: G_STORE [[UV33]](p3), [[COPY1]](p5) :: (store 4 into %stack.0 + 132, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV33]](p3), [[COPY1]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK: G_STORE [[UV34]](p3), [[PTR_ADD36]](p5) :: (store 4 into %stack.0 + 136, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV34]](p3), [[PTR_ADD36]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK: G_STORE [[UV35]](p3), [[PTR_ADD37]](p5) :: (store 4 into %stack.0 + 140, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV35]](p3), [[PTR_ADD37]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK: G_STORE [[UV36]](p3), [[PTR_ADD38]](p5) :: (store 4 into %stack.0 + 144, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV36]](p3), [[PTR_ADD38]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK: G_STORE [[UV37]](p3), [[PTR_ADD39]](p5) :: (store 4 into %stack.0 + 148, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV37]](p3), [[PTR_ADD39]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK: G_STORE [[UV38]](p3), [[PTR_ADD40]](p5) :: (store 4 into %stack.0 + 152, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV38]](p3), [[PTR_ADD40]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK: G_STORE [[UV39]](p3), [[PTR_ADD41]](p5) :: (store 4 into %stack.0 + 156, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV39]](p3), [[PTR_ADD41]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK: G_STORE [[UV40]](p3), [[PTR_ADD42]](p5) :: (store 4 into %stack.0 + 160, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV40]](p3), [[PTR_ADD42]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK: G_STORE [[UV41]](p3), [[PTR_ADD43]](p5) :: (store 4 into %stack.0 + 164, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV41]](p3), [[PTR_ADD43]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK: G_STORE [[UV42]](p3), [[PTR_ADD44]](p5) :: (store 4 into %stack.0 + 168, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV42]](p3), [[PTR_ADD44]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK: G_STORE [[UV43]](p3), [[PTR_ADD45]](p5) :: (store 4 into %stack.0 + 172, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV43]](p3), [[PTR_ADD45]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK: G_STORE [[UV44]](p3), [[PTR_ADD46]](p5) :: (store 4 into %stack.0 + 176, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV44]](p3), [[PTR_ADD46]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK: G_STORE [[UV45]](p3), [[PTR_ADD47]](p5) :: (store 4 into %stack.0 + 180, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV45]](p3), [[PTR_ADD47]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK: G_STORE [[UV46]](p3), [[PTR_ADD48]](p5) :: (store 4 into %stack.0 + 184, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV46]](p3), [[PTR_ADD48]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK: G_STORE [[UV47]](p3), [[PTR_ADD49]](p5) :: (store 4 into %stack.0 + 188, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV47]](p3), [[PTR_ADD49]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK: G_STORE [[UV48]](p3), [[PTR_ADD50]](p5) :: (store 4 into %stack.0 + 192, align 64, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV48]](p3), [[PTR_ADD50]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK: G_STORE [[UV49]](p3), [[PTR_ADD51]](p5) :: (store 4 into %stack.0 + 196, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV49]](p3), [[PTR_ADD51]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK: G_STORE [[UV50]](p3), [[PTR_ADD52]](p5) :: (store 4 into %stack.0 + 200, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV50]](p3), [[PTR_ADD52]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK: G_STORE [[UV51]](p3), [[PTR_ADD53]](p5) :: (store 4 into %stack.0 + 204, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV51]](p3), [[PTR_ADD53]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK: G_STORE [[UV52]](p3), [[PTR_ADD54]](p5) :: (store 4 into %stack.0 + 208, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV52]](p3), [[PTR_ADD54]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK: G_STORE [[UV53]](p3), [[PTR_ADD55]](p5) :: (store 4 into %stack.0 + 212, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV53]](p3), [[PTR_ADD55]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK: G_STORE [[UV54]](p3), [[PTR_ADD56]](p5) :: (store 4 into %stack.0 + 216, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV54]](p3), [[PTR_ADD56]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK: G_STORE [[UV55]](p3), [[PTR_ADD57]](p5) :: (store 4 into %stack.0 + 220, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV55]](p3), [[PTR_ADD57]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK: G_STORE [[UV56]](p3), [[PTR_ADD58]](p5) :: (store 4 into %stack.0 + 224, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV56]](p3), [[PTR_ADD58]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK: G_STORE [[UV57]](p3), [[PTR_ADD59]](p5) :: (store 4 into %stack.0 + 228, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV57]](p3), [[PTR_ADD59]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK: G_STORE [[UV58]](p3), [[PTR_ADD60]](p5) :: (store 4 into %stack.0 + 232, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV58]](p3), [[PTR_ADD60]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK: G_STORE [[UV59]](p3), [[PTR_ADD61]](p5) :: (store 4 into %stack.0 + 236, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV59]](p3), [[PTR_ADD61]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK: G_STORE [[UV60]](p3), [[PTR_ADD62]](p5) :: (store 4 into %stack.0 + 240, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV60]](p3), [[PTR_ADD62]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK: G_STORE [[UV61]](p3), [[PTR_ADD63]](p5) :: (store 4 into %stack.0 + 244, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV61]](p3), [[PTR_ADD63]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK: G_STORE [[UV62]](p3), [[PTR_ADD64]](p5) :: (store 4 into %stack.0 + 248, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV62]](p3), [[PTR_ADD64]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK: G_STORE [[UV63]](p3), [[PTR_ADD65]](p5) :: (store 4 into %stack.0 + 252, basealign 256, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD35]](p5) :: (load 4 from %stack.0 + 132, addrspace 5) + ; CHECK: G_STORE [[UV63]](p3), [[PTR_ADD65]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD35]](p5) :: (load (p3) from %stack.0 + 132, addrspace 5) ; CHECK: S_ENDPGM 0, implicit [[LOAD4]](p3) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 - %2:_(<64 x p3>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %2:_(<64 x p3>) = G_LOAD %0 :: (load (<64 x p3>), align 4, addrspace 4) %3:_(p3) = G_EXTRACT_VECTOR_ELT %2, %1 S_ENDPGM 0, implicit %3 ... @@ -1705,220 +1705,220 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_varidx_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (s512), align 4, addrspace 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 from unknown-address + 64, align 4, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s512) from unknown-address + 64, align 4, addrspace 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 from unknown-address + 128, align 4, addrspace 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s512) from unknown-address + 128, align 4, addrspace 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 from unknown-address + 192, align 4, addrspace 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s512) from unknown-address + 192, align 4, addrspace 4) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) + ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; CHECK: G_STORE [[UV1]](s32), [[PTR_ADD3]](p5) :: (store 4 into %stack.0 + 4, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV1]](s32), [[PTR_ADD3]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK: G_STORE [[UV2]](s32), [[PTR_ADD4]](p5) :: (store 4 into %stack.0 + 8, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV2]](s32), [[PTR_ADD4]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK: G_STORE [[UV3]](s32), [[PTR_ADD5]](p5) :: (store 4 into %stack.0 + 12, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV3]](s32), [[PTR_ADD5]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK: G_STORE [[UV4]](s32), [[PTR_ADD6]](p5) :: (store 4 into %stack.0 + 16, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV4]](s32), [[PTR_ADD6]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK: G_STORE [[UV5]](s32), [[PTR_ADD7]](p5) :: (store 4 into %stack.0 + 20, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV5]](s32), [[PTR_ADD7]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK: G_STORE [[UV6]](s32), [[PTR_ADD8]](p5) :: (store 4 into %stack.0 + 24, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV6]](s32), [[PTR_ADD8]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK: G_STORE [[UV7]](s32), [[PTR_ADD9]](p5) :: (store 4 into %stack.0 + 28, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV7]](s32), [[PTR_ADD9]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK: G_STORE [[UV8]](s32), [[PTR_ADD10]](p5) :: (store 4 into %stack.0 + 32, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV8]](s32), [[PTR_ADD10]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK: G_STORE [[UV9]](s32), [[PTR_ADD11]](p5) :: (store 4 into %stack.0 + 36, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV9]](s32), [[PTR_ADD11]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK: G_STORE [[UV10]](s32), [[PTR_ADD12]](p5) :: (store 4 into %stack.0 + 40, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV10]](s32), [[PTR_ADD12]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK: G_STORE [[UV11]](s32), [[PTR_ADD13]](p5) :: (store 4 into %stack.0 + 44, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV11]](s32), [[PTR_ADD13]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK: G_STORE [[UV12]](s32), [[PTR_ADD14]](p5) :: (store 4 into %stack.0 + 48, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV12]](s32), [[PTR_ADD14]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK: G_STORE [[UV13]](s32), [[PTR_ADD15]](p5) :: (store 4 into %stack.0 + 52, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV13]](s32), [[PTR_ADD15]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK: G_STORE [[UV14]](s32), [[PTR_ADD16]](p5) :: (store 4 into %stack.0 + 56, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV14]](s32), [[PTR_ADD16]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK: G_STORE [[UV15]](s32), [[PTR_ADD17]](p5) :: (store 4 into %stack.0 + 60, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV15]](s32), [[PTR_ADD17]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK: G_STORE [[UV16]](s32), [[PTR_ADD18]](p5) :: (store 4 into %stack.0 + 64, align 64, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV16]](s32), [[PTR_ADD18]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK: G_STORE [[UV17]](s32), [[PTR_ADD19]](p5) :: (store 4 into %stack.0 + 68, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV17]](s32), [[PTR_ADD19]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK: G_STORE [[UV18]](s32), [[PTR_ADD20]](p5) :: (store 4 into %stack.0 + 72, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV18]](s32), [[PTR_ADD20]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK: G_STORE [[UV19]](s32), [[PTR_ADD21]](p5) :: (store 4 into %stack.0 + 76, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV19]](s32), [[PTR_ADD21]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK: G_STORE [[UV20]](s32), [[PTR_ADD22]](p5) :: (store 4 into %stack.0 + 80, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV20]](s32), [[PTR_ADD22]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK: G_STORE [[UV21]](s32), [[PTR_ADD23]](p5) :: (store 4 into %stack.0 + 84, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV21]](s32), [[PTR_ADD23]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK: G_STORE [[UV22]](s32), [[PTR_ADD24]](p5) :: (store 4 into %stack.0 + 88, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV22]](s32), [[PTR_ADD24]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK: G_STORE [[UV23]](s32), [[PTR_ADD25]](p5) :: (store 4 into %stack.0 + 92, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV23]](s32), [[PTR_ADD25]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK: G_STORE [[UV24]](s32), [[PTR_ADD26]](p5) :: (store 4 into %stack.0 + 96, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV24]](s32), [[PTR_ADD26]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK: G_STORE [[UV25]](s32), [[PTR_ADD27]](p5) :: (store 4 into %stack.0 + 100, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV25]](s32), [[PTR_ADD27]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK: G_STORE [[UV26]](s32), [[PTR_ADD28]](p5) :: (store 4 into %stack.0 + 104, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV26]](s32), [[PTR_ADD28]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK: G_STORE [[UV27]](s32), [[PTR_ADD29]](p5) :: (store 4 into %stack.0 + 108, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV27]](s32), [[PTR_ADD29]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK: G_STORE [[UV28]](s32), [[PTR_ADD30]](p5) :: (store 4 into %stack.0 + 112, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV28]](s32), [[PTR_ADD30]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK: G_STORE [[UV29]](s32), [[PTR_ADD31]](p5) :: (store 4 into %stack.0 + 116, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV29]](s32), [[PTR_ADD31]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK: G_STORE [[UV30]](s32), [[PTR_ADD32]](p5) :: (store 4 into %stack.0 + 120, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV30]](s32), [[PTR_ADD32]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD33]](p5) :: (store 4 into %stack.0 + 124, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[PTR_ADD33]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD34]](p5) :: (store 4 into %stack.0 + 128, align 128, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV32]](s32), [[PTR_ADD34]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD35]](p5) :: (store 4 into %stack.0 + 132, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[PTR_ADD35]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD36]](p5) :: (store 4 into %stack.0 + 136, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV34]](s32), [[PTR_ADD36]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD37]](p5) :: (store 4 into %stack.0 + 140, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[PTR_ADD37]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD38]](p5) :: (store 4 into %stack.0 + 144, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV36]](s32), [[PTR_ADD38]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD39]](p5) :: (store 4 into %stack.0 + 148, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV37]](s32), [[PTR_ADD39]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD40]](p5) :: (store 4 into %stack.0 + 152, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV38]](s32), [[PTR_ADD40]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD41]](p5) :: (store 4 into %stack.0 + 156, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV39]](s32), [[PTR_ADD41]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK: G_STORE [[UV40]](s32), [[PTR_ADD42]](p5) :: (store 4 into %stack.0 + 160, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV40]](s32), [[PTR_ADD42]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK: G_STORE [[UV41]](s32), [[PTR_ADD43]](p5) :: (store 4 into %stack.0 + 164, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV41]](s32), [[PTR_ADD43]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK: G_STORE [[UV42]](s32), [[PTR_ADD44]](p5) :: (store 4 into %stack.0 + 168, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV42]](s32), [[PTR_ADD44]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK: G_STORE [[UV43]](s32), [[PTR_ADD45]](p5) :: (store 4 into %stack.0 + 172, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV43]](s32), [[PTR_ADD45]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK: G_STORE [[UV44]](s32), [[PTR_ADD46]](p5) :: (store 4 into %stack.0 + 176, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV44]](s32), [[PTR_ADD46]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK: G_STORE [[UV45]](s32), [[PTR_ADD47]](p5) :: (store 4 into %stack.0 + 180, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV45]](s32), [[PTR_ADD47]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK: G_STORE [[UV46]](s32), [[PTR_ADD48]](p5) :: (store 4 into %stack.0 + 184, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV46]](s32), [[PTR_ADD48]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK: G_STORE [[UV47]](s32), [[PTR_ADD49]](p5) :: (store 4 into %stack.0 + 188, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV47]](s32), [[PTR_ADD49]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK: G_STORE [[UV48]](s32), [[PTR_ADD50]](p5) :: (store 4 into %stack.0 + 192, align 64, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV48]](s32), [[PTR_ADD50]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK: G_STORE [[UV49]](s32), [[PTR_ADD51]](p5) :: (store 4 into %stack.0 + 196, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV49]](s32), [[PTR_ADD51]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK: G_STORE [[UV50]](s32), [[PTR_ADD52]](p5) :: (store 4 into %stack.0 + 200, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV50]](s32), [[PTR_ADD52]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK: G_STORE [[UV51]](s32), [[PTR_ADD53]](p5) :: (store 4 into %stack.0 + 204, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV51]](s32), [[PTR_ADD53]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK: G_STORE [[UV52]](s32), [[PTR_ADD54]](p5) :: (store 4 into %stack.0 + 208, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV52]](s32), [[PTR_ADD54]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK: G_STORE [[UV53]](s32), [[PTR_ADD55]](p5) :: (store 4 into %stack.0 + 212, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV53]](s32), [[PTR_ADD55]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK: G_STORE [[UV54]](s32), [[PTR_ADD56]](p5) :: (store 4 into %stack.0 + 216, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV54]](s32), [[PTR_ADD56]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK: G_STORE [[UV55]](s32), [[PTR_ADD57]](p5) :: (store 4 into %stack.0 + 220, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV55]](s32), [[PTR_ADD57]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK: G_STORE [[UV56]](s32), [[PTR_ADD58]](p5) :: (store 4 into %stack.0 + 224, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV56]](s32), [[PTR_ADD58]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK: G_STORE [[UV57]](s32), [[PTR_ADD59]](p5) :: (store 4 into %stack.0 + 228, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV57]](s32), [[PTR_ADD59]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK: G_STORE [[UV58]](s32), [[PTR_ADD60]](p5) :: (store 4 into %stack.0 + 232, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV58]](s32), [[PTR_ADD60]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK: G_STORE [[UV59]](s32), [[PTR_ADD61]](p5) :: (store 4 into %stack.0 + 236, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV59]](s32), [[PTR_ADD61]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK: G_STORE [[UV60]](s32), [[PTR_ADD62]](p5) :: (store 4 into %stack.0 + 240, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV60]](s32), [[PTR_ADD62]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK: G_STORE [[UV61]](s32), [[PTR_ADD63]](p5) :: (store 4 into %stack.0 + 244, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV61]](s32), [[PTR_ADD63]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK: G_STORE [[UV62]](s32), [[PTR_ADD64]](p5) :: (store 4 into %stack.0 + 248, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV62]](s32), [[PTR_ADD64]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK: G_STORE [[UV63]](s32), [[PTR_ADD65]](p5) :: (store 4 into %stack.0 + 252, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV63]](s32), [[PTR_ADD65]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) ; CHECK: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C66]] ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[C3]] ; CHECK: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s32) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD66]](p5) :: (load 4, addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD66]](p5) :: (load (s32), addrspace 5) ; CHECK: S_ENDPGM 0, implicit [[LOAD4]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 S_ENDPGM 0, implicit %3 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir index 419bd546901c6..e4024067369fd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir @@ -265,7 +265,7 @@ body: | ; GFX7-LABEL: name: test_fcmp_v4s32 ; GFX7: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX7: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16) + ; GFX7: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) @@ -285,7 +285,7 @@ body: | ; GFX7: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) ; GFX8-LABEL: name: test_fcmp_v4s32 ; GFX8: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX8: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16) + ; GFX8: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) @@ -305,7 +305,7 @@ body: | ; GFX8: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_fcmp_v4s32 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) @@ -320,7 +320,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) %0:_(p1) = G_IMPLICIT_DEF - %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load 16) + %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load (<4 x s32>)) %2:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %3:_(<4 x s1>) = G_FCMP floatpred(oeq) , %1, %2 %4:_(<4 x s32>) = G_ANYEXT %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir index e06f9e4c74000..1844754e15210 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -352,7 +352,7 @@ body: | ; GFX7-LABEL: name: test_icmp_v4s32 ; GFX7: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX7: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16) + ; GFX7: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) ; GFX7: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX7: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX7: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) @@ -377,7 +377,7 @@ body: | ; GFX7: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) ; GFX8-LABEL: name: test_icmp_v4s32 ; GFX8: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX8: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16) + ; GFX8: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) @@ -402,7 +402,7 @@ body: | ; GFX8: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_icmp_v4s32 ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<4 x s32>)) ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) @@ -422,7 +422,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>) %0:_(p1) = G_IMPLICIT_DEF - %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load 16) + %1:_(<4 x s32>) = G_LOAD %0 :: (volatile load (<4 x s32>)) %2:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %3:_(<4 x s1>) = G_ICMP intpred(ne), %1, %2 %4:_(<4 x s32>) = G_ZEXT %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index f4192bfa7f8cb..4ac62b2905237 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -334,13 +334,13 @@ body: | ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store 4, addrspace 1) - ; CHECK: G_STORE [[UV16]](s32), [[COPY]](p1) :: (volatile store 4, addrspace 1) + ; CHECK: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK: G_STORE [[UV16]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0 %34:_(p1) = COPY $vgpr0_vgpr1 - G_STORE %1, %34 :: (volatile store 4, align 4, addrspace 1) - G_STORE %33, %34 :: (volatile store 4, align 4, addrspace 1) + G_STORE %1, %34 :: (volatile store (s32), align 4, addrspace 1) + G_STORE %33, %34 :: (volatile store (s32), align 4, addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index 646450d1bbbf5..d39a0b9fa3ae3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -197,98 +197,98 @@ body: | ; CHECK: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store (s128), align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store 16 into unknown-address + 32, align 4, addrspace 1) + ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store (s128) into unknown-address + 32, align 4, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store 16 into unknown-address + 48, align 4, addrspace 1) + ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store (s128) into unknown-address + 48, align 4, addrspace 1) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store 16 into unknown-address + 64, align 4, addrspace 1) + ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (s128) into unknown-address + 64, align 4, addrspace 1) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store 16 into unknown-address + 80, align 4, addrspace 1) + ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (s128) into unknown-address + 80, align 4, addrspace 1) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store 16 into unknown-address + 96, align 4, addrspace 1) + ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (s128) into unknown-address + 96, align 4, addrspace 1) ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store 16 into unknown-address + 112, align 4, addrspace 1) + ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (s128) into unknown-address + 112, align 4, addrspace 1) ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store 16 into unknown-address + 128, align 4, addrspace 1) + ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (s128) into unknown-address + 128, align 4, addrspace 1) ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store 16 into unknown-address + 144, align 4, addrspace 1) + ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (s128) into unknown-address + 144, align 4, addrspace 1) ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store 16 into unknown-address + 160, align 4, addrspace 1) + ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (s128) into unknown-address + 160, align 4, addrspace 1) ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store 16 into unknown-address + 176, align 4, addrspace 1) + ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (s128) into unknown-address + 176, align 4, addrspace 1) ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store 16 into unknown-address + 192, align 4, addrspace 1) + ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (s128) into unknown-address + 192, align 4, addrspace 1) ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store 16 into unknown-address + 208, align 4, addrspace 1) + ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (s128) into unknown-address + 208, align 4, addrspace 1) ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store 16 into unknown-address + 224, align 4, addrspace 1) + ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (s128) into unknown-address + 224, align 4, addrspace 1) ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store 16 into unknown-address + 240, align 4, addrspace 1) + ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (s128) into unknown-address + 240, align 4, addrspace 1) ; CHECK: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: G_STORE [[UV16]](<4 x s32>), [[COPY2]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE [[UV16]](<4 x s32>), [[COPY2]](p1) :: (store (s128), align 4, addrspace 1) ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C]](s64) - ; CHECK: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; CHECK: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C1]](s64) - ; CHECK: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store 16 into unknown-address + 32, align 4, addrspace 1) + ; CHECK: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (s128) into unknown-address + 32, align 4, addrspace 1) ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; CHECK: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store 16 into unknown-address + 48, align 4, addrspace 1) + ; CHECK: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (s128) into unknown-address + 48, align 4, addrspace 1) ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C3]](s64) - ; CHECK: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store 16 into unknown-address + 64, align 4, addrspace 1) + ; CHECK: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store (s128) into unknown-address + 64, align 4, addrspace 1) ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C4]](s64) - ; CHECK: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store 16 into unknown-address + 80, align 4, addrspace 1) + ; CHECK: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store (s128) into unknown-address + 80, align 4, addrspace 1) ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C5]](s64) - ; CHECK: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store 16 into unknown-address + 96, align 4, addrspace 1) + ; CHECK: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store (s128) into unknown-address + 96, align 4, addrspace 1) ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C6]](s64) - ; CHECK: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store 16 into unknown-address + 112, align 4, addrspace 1) + ; CHECK: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store (s128) into unknown-address + 112, align 4, addrspace 1) ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C7]](s64) - ; CHECK: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store 16 into unknown-address + 128, align 4, addrspace 1) + ; CHECK: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store (s128) into unknown-address + 128, align 4, addrspace 1) ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C8]](s64) - ; CHECK: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store 16 into unknown-address + 144, align 4, addrspace 1) + ; CHECK: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store (s128) into unknown-address + 144, align 4, addrspace 1) ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C9]](s64) - ; CHECK: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store 16 into unknown-address + 160, align 4, addrspace 1) + ; CHECK: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store (s128) into unknown-address + 160, align 4, addrspace 1) ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C10]](s64) - ; CHECK: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store 16 into unknown-address + 176, align 4, addrspace 1) + ; CHECK: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store (s128) into unknown-address + 176, align 4, addrspace 1) ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C11]](s64) - ; CHECK: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store 16 into unknown-address + 192, align 4, addrspace 1) + ; CHECK: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store (s128) into unknown-address + 192, align 4, addrspace 1) ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C12]](s64) - ; CHECK: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store 16 into unknown-address + 208, align 4, addrspace 1) + ; CHECK: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store (s128) into unknown-address + 208, align 4, addrspace 1) ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C13]](s64) - ; CHECK: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store 16 into unknown-address + 224, align 4, addrspace 1) + ; CHECK: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store (s128) into unknown-address + 224, align 4, addrspace 1) ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C14]](s64) - ; CHECK: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store 16 into unknown-address + 240, align 4, addrspace 1) + ; CHECK: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store (s128) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 64 - %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) %3:_(s32) = G_CONSTANT i32 12345 %4:_(<64 x s32>) = G_INSERT_VECTOR_ELT %2, %3, %1 %5:_(s32) = G_CONSTANT i32 65 %6:_(<64 x s32>) = G_INSERT_VECTOR_ELT %2, %3, %5 %7:_(p1) = COPY $vgpr0_vgpr1 %8:_(p1) = COPY $vgpr2_vgpr3 - G_STORE %4, %7 :: (store 256, align 4, addrspace 1) - G_STORE %6, %8 :: (store 256, align 4, addrspace 1) + G_STORE %4, %7 :: (store (<64 x s32>), align 4, addrspace 1) + G_STORE %6, %8 :: (store (<64 x s32>), align 4, addrspace 1) ... --- @@ -300,16 +300,16 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_33_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (s512), align 4, addrspace 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 from unknown-address + 64, align 4, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s512) from unknown-address + 64, align 4, addrspace 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 from unknown-address + 128, align 4, addrspace 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s512) from unknown-address + 128, align 4, addrspace 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 from unknown-address + 192, align 4, addrspace 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s512) from unknown-address + 192, align 4, addrspace 4) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>) ; CHECK: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32 @@ -317,56 +317,56 @@ body: | ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) ; CHECK: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>) - ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store (s128), align 4, addrspace 1) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store 16 into unknown-address + 32, align 4, addrspace 1) + ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (s128) into unknown-address + 32, align 4, addrspace 1) ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store 16 into unknown-address + 48, align 4, addrspace 1) + ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (s128) into unknown-address + 48, align 4, addrspace 1) ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store 16 into unknown-address + 64, align 4, addrspace 1) + ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (s128) into unknown-address + 64, align 4, addrspace 1) ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store 16 into unknown-address + 80, align 4, addrspace 1) + ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (s128) into unknown-address + 80, align 4, addrspace 1) ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store 16 into unknown-address + 96, align 4, addrspace 1) + ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (s128) into unknown-address + 96, align 4, addrspace 1) ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store 16 into unknown-address + 112, align 4, addrspace 1) + ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (s128) into unknown-address + 112, align 4, addrspace 1) ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store 16 into unknown-address + 128, align 4, addrspace 1) + ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (s128) into unknown-address + 128, align 4, addrspace 1) ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store 16 into unknown-address + 144, align 4, addrspace 1) + ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (s128) into unknown-address + 144, align 4, addrspace 1) ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store 16 into unknown-address + 160, align 4, addrspace 1) + ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (s128) into unknown-address + 160, align 4, addrspace 1) ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store 16 into unknown-address + 176, align 4, addrspace 1) + ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (s128) into unknown-address + 176, align 4, addrspace 1) ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store 16 into unknown-address + 192, align 4, addrspace 1) + ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (s128) into unknown-address + 192, align 4, addrspace 1) ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store 16 into unknown-address + 208, align 4, addrspace 1) + ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (s128) into unknown-address + 208, align 4, addrspace 1) ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store 16 into unknown-address + 224, align 4, addrspace 1) + ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (s128) into unknown-address + 224, align 4, addrspace 1) ; CHECK: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64) - ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store 16 into unknown-address + 240, align 4, addrspace 1) + ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (s128) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 - %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) %3:_(s32) = G_CONSTANT i32 12345 %4:_(<64 x s32>) = G_INSERT_VECTOR_ELT %2, %3, %1 %5:_(p1) = COPY $vgpr0_vgpr1 - G_STORE %4, %5 :: (store 256, align 4, addrspace 1) + G_STORE %4, %5 :: (store (<64 x s32>), align 4, addrspace 1) ... --- @@ -379,344 +379,344 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_varidx_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (s512), align 4, addrspace 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 from unknown-address + 64, align 4, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s512) from unknown-address + 64, align 4, addrspace 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 from unknown-address + 128, align 4, addrspace 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s512) from unknown-address + 128, align 4, addrspace 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 from unknown-address + 192, align 4, addrspace 4) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s512) from unknown-address + 192, align 4, addrspace 4) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) + ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, align 256, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5) - ; CHECK: G_STORE [[UV1]](s32), [[COPY2]](p5) :: (store 4 into %stack.0 + 4, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV1]](s32), [[COPY2]](p5) :: (store (s32) into %stack.0 + 4, basealign 256, addrspace 5) ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5) - ; CHECK: G_STORE [[UV2]](s32), [[COPY3]](p5) :: (store 4 into %stack.0 + 8, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV2]](s32), [[COPY3]](p5) :: (store (s32) into %stack.0 + 8, align 8, basealign 256, addrspace 5) ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) ; CHECK: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5) - ; CHECK: G_STORE [[UV3]](s32), [[COPY4]](p5) :: (store 4 into %stack.0 + 12, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV3]](s32), [[COPY4]](p5) :: (store (s32) into %stack.0 + 12, basealign 256, addrspace 5) ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5) - ; CHECK: G_STORE [[UV4]](s32), [[COPY5]](p5) :: (store 4 into %stack.0 + 16, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV4]](s32), [[COPY5]](p5) :: (store (s32) into %stack.0 + 16, align 16, basealign 256, addrspace 5) ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) ; CHECK: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5) - ; CHECK: G_STORE [[UV5]](s32), [[COPY6]](p5) :: (store 4 into %stack.0 + 20, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV5]](s32), [[COPY6]](p5) :: (store (s32) into %stack.0 + 20, basealign 256, addrspace 5) ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) ; CHECK: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5) - ; CHECK: G_STORE [[UV6]](s32), [[COPY7]](p5) :: (store 4 into %stack.0 + 24, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV6]](s32), [[COPY7]](p5) :: (store (s32) into %stack.0 + 24, align 8, basealign 256, addrspace 5) ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) ; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK: G_STORE [[UV7]](s32), [[COPY8]](p5) :: (store 4 into %stack.0 + 28, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV7]](s32), [[COPY8]](p5) :: (store (s32) into %stack.0 + 28, basealign 256, addrspace 5) ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) ; CHECK: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5) - ; CHECK: G_STORE [[UV8]](s32), [[COPY9]](p5) :: (store 4 into %stack.0 + 32, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV8]](s32), [[COPY9]](p5) :: (store (s32) into %stack.0 + 32, align 32, basealign 256, addrspace 5) ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) ; CHECK: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5) - ; CHECK: G_STORE [[UV9]](s32), [[COPY10]](p5) :: (store 4 into %stack.0 + 36, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV9]](s32), [[COPY10]](p5) :: (store (s32) into %stack.0 + 36, basealign 256, addrspace 5) ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) ; CHECK: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5) - ; CHECK: G_STORE [[UV10]](s32), [[COPY11]](p5) :: (store 4 into %stack.0 + 40, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV10]](s32), [[COPY11]](p5) :: (store (s32) into %stack.0 + 40, align 8, basealign 256, addrspace 5) ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) ; CHECK: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5) - ; CHECK: G_STORE [[UV11]](s32), [[COPY12]](p5) :: (store 4 into %stack.0 + 44, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV11]](s32), [[COPY12]](p5) :: (store (s32) into %stack.0 + 44, basealign 256, addrspace 5) ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) ; CHECK: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5) - ; CHECK: G_STORE [[UV12]](s32), [[COPY13]](p5) :: (store 4 into %stack.0 + 48, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV12]](s32), [[COPY13]](p5) :: (store (s32) into %stack.0 + 48, align 16, basealign 256, addrspace 5) ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) ; CHECK: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5) - ; CHECK: G_STORE [[UV13]](s32), [[COPY14]](p5) :: (store 4 into %stack.0 + 52, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV13]](s32), [[COPY14]](p5) :: (store (s32) into %stack.0 + 52, basealign 256, addrspace 5) ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) ; CHECK: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5) - ; CHECK: G_STORE [[UV14]](s32), [[COPY15]](p5) :: (store 4 into %stack.0 + 56, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV14]](s32), [[COPY15]](p5) :: (store (s32) into %stack.0 + 56, align 8, basealign 256, addrspace 5) ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) ; CHECK: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5) - ; CHECK: G_STORE [[UV15]](s32), [[COPY16]](p5) :: (store 4 into %stack.0 + 60, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV15]](s32), [[COPY16]](p5) :: (store (s32) into %stack.0 + 60, basealign 256, addrspace 5) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5) - ; CHECK: G_STORE [[UV16]](s32), [[COPY17]](p5) :: (store 4 into %stack.0 + 64, align 64, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV16]](s32), [[COPY17]](p5) :: (store (s32) into %stack.0 + 64, align 64, basealign 256, addrspace 5) ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) ; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5) - ; CHECK: G_STORE [[UV17]](s32), [[COPY18]](p5) :: (store 4 into %stack.0 + 68, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV17]](s32), [[COPY18]](p5) :: (store (s32) into %stack.0 + 68, basealign 256, addrspace 5) ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) ; CHECK: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5) - ; CHECK: G_STORE [[UV18]](s32), [[COPY19]](p5) :: (store 4 into %stack.0 + 72, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV18]](s32), [[COPY19]](p5) :: (store (s32) into %stack.0 + 72, align 8, basealign 256, addrspace 5) ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5) - ; CHECK: G_STORE [[UV19]](s32), [[COPY20]](p5) :: (store 4 into %stack.0 + 76, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV19]](s32), [[COPY20]](p5) :: (store (s32) into %stack.0 + 76, basealign 256, addrspace 5) ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5) - ; CHECK: G_STORE [[UV20]](s32), [[COPY21]](p5) :: (store 4 into %stack.0 + 80, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV20]](s32), [[COPY21]](p5) :: (store (s32) into %stack.0 + 80, align 16, basealign 256, addrspace 5) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) ; CHECK: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5) - ; CHECK: G_STORE [[UV21]](s32), [[COPY22]](p5) :: (store 4 into %stack.0 + 84, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV21]](s32), [[COPY22]](p5) :: (store (s32) into %stack.0 + 84, basealign 256, addrspace 5) ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) ; CHECK: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5) - ; CHECK: G_STORE [[UV22]](s32), [[COPY23]](p5) :: (store 4 into %stack.0 + 88, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV22]](s32), [[COPY23]](p5) :: (store (s32) into %stack.0 + 88, align 8, basealign 256, addrspace 5) ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) ; CHECK: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5) - ; CHECK: G_STORE [[UV23]](s32), [[COPY24]](p5) :: (store 4 into %stack.0 + 92, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV23]](s32), [[COPY24]](p5) :: (store (s32) into %stack.0 + 92, basealign 256, addrspace 5) ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) ; CHECK: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5) - ; CHECK: G_STORE [[UV24]](s32), [[COPY25]](p5) :: (store 4 into %stack.0 + 96, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV24]](s32), [[COPY25]](p5) :: (store (s32) into %stack.0 + 96, align 32, basealign 256, addrspace 5) ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) ; CHECK: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5) - ; CHECK: G_STORE [[UV25]](s32), [[COPY26]](p5) :: (store 4 into %stack.0 + 100, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV25]](s32), [[COPY26]](p5) :: (store (s32) into %stack.0 + 100, basealign 256, addrspace 5) ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) ; CHECK: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5) - ; CHECK: G_STORE [[UV26]](s32), [[COPY27]](p5) :: (store 4 into %stack.0 + 104, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV26]](s32), [[COPY27]](p5) :: (store (s32) into %stack.0 + 104, align 8, basealign 256, addrspace 5) ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) ; CHECK: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5) - ; CHECK: G_STORE [[UV27]](s32), [[COPY28]](p5) :: (store 4 into %stack.0 + 108, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV27]](s32), [[COPY28]](p5) :: (store (s32) into %stack.0 + 108, basealign 256, addrspace 5) ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) ; CHECK: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5) - ; CHECK: G_STORE [[UV28]](s32), [[COPY29]](p5) :: (store 4 into %stack.0 + 112, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV28]](s32), [[COPY29]](p5) :: (store (s32) into %stack.0 + 112, align 16, basealign 256, addrspace 5) ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) ; CHECK: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5) - ; CHECK: G_STORE [[UV29]](s32), [[COPY30]](p5) :: (store 4 into %stack.0 + 116, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV29]](s32), [[COPY30]](p5) :: (store (s32) into %stack.0 + 116, basealign 256, addrspace 5) ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) ; CHECK: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5) - ; CHECK: G_STORE [[UV30]](s32), [[COPY31]](p5) :: (store 4 into %stack.0 + 120, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV30]](s32), [[COPY31]](p5) :: (store (s32) into %stack.0 + 120, align 8, basealign 256, addrspace 5) ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5) - ; CHECK: G_STORE [[UV31]](s32), [[COPY32]](p5) :: (store 4 into %stack.0 + 124, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV31]](s32), [[COPY32]](p5) :: (store (s32) into %stack.0 + 124, basealign 256, addrspace 5) ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) ; CHECK: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5) - ; CHECK: G_STORE [[UV32]](s32), [[COPY33]](p5) :: (store 4 into %stack.0 + 128, align 128, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV32]](s32), [[COPY33]](p5) :: (store (s32) into %stack.0 + 128, align 128, basealign 256, addrspace 5) ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) ; CHECK: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK: G_STORE [[UV33]](s32), [[COPY34]](p5) :: (store 4 into %stack.0 + 132, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV33]](s32), [[COPY34]](p5) :: (store (s32) into %stack.0 + 132, basealign 256, addrspace 5) ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) ; CHECK: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5) - ; CHECK: G_STORE [[UV34]](s32), [[COPY35]](p5) :: (store 4 into %stack.0 + 136, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV34]](s32), [[COPY35]](p5) :: (store (s32) into %stack.0 + 136, align 8, basealign 256, addrspace 5) ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) ; CHECK: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5) - ; CHECK: G_STORE [[UV35]](s32), [[COPY36]](p5) :: (store 4 into %stack.0 + 140, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV35]](s32), [[COPY36]](p5) :: (store (s32) into %stack.0 + 140, basealign 256, addrspace 5) ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) ; CHECK: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5) - ; CHECK: G_STORE [[UV36]](s32), [[COPY37]](p5) :: (store 4 into %stack.0 + 144, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV36]](s32), [[COPY37]](p5) :: (store (s32) into %stack.0 + 144, align 16, basealign 256, addrspace 5) ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) ; CHECK: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5) - ; CHECK: G_STORE [[UV37]](s32), [[COPY38]](p5) :: (store 4 into %stack.0 + 148, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV37]](s32), [[COPY38]](p5) :: (store (s32) into %stack.0 + 148, basealign 256, addrspace 5) ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) ; CHECK: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5) - ; CHECK: G_STORE [[UV38]](s32), [[COPY39]](p5) :: (store 4 into %stack.0 + 152, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV38]](s32), [[COPY39]](p5) :: (store (s32) into %stack.0 + 152, align 8, basealign 256, addrspace 5) ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) ; CHECK: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5) - ; CHECK: G_STORE [[UV39]](s32), [[COPY40]](p5) :: (store 4 into %stack.0 + 156, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV39]](s32), [[COPY40]](p5) :: (store (s32) into %stack.0 + 156, basealign 256, addrspace 5) ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) ; CHECK: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5) - ; CHECK: G_STORE [[UV40]](s32), [[COPY41]](p5) :: (store 4 into %stack.0 + 160, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV40]](s32), [[COPY41]](p5) :: (store (s32) into %stack.0 + 160, align 32, basealign 256, addrspace 5) ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) ; CHECK: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5) - ; CHECK: G_STORE [[UV41]](s32), [[COPY42]](p5) :: (store 4 into %stack.0 + 164, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV41]](s32), [[COPY42]](p5) :: (store (s32) into %stack.0 + 164, basealign 256, addrspace 5) ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) ; CHECK: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5) - ; CHECK: G_STORE [[UV42]](s32), [[COPY43]](p5) :: (store 4 into %stack.0 + 168, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV42]](s32), [[COPY43]](p5) :: (store (s32) into %stack.0 + 168, align 8, basealign 256, addrspace 5) ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) ; CHECK: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5) - ; CHECK: G_STORE [[UV43]](s32), [[COPY44]](p5) :: (store 4 into %stack.0 + 172, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV43]](s32), [[COPY44]](p5) :: (store (s32) into %stack.0 + 172, basealign 256, addrspace 5) ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) ; CHECK: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5) - ; CHECK: G_STORE [[UV44]](s32), [[COPY45]](p5) :: (store 4 into %stack.0 + 176, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV44]](s32), [[COPY45]](p5) :: (store (s32) into %stack.0 + 176, align 16, basealign 256, addrspace 5) ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) ; CHECK: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5) - ; CHECK: G_STORE [[UV45]](s32), [[COPY46]](p5) :: (store 4 into %stack.0 + 180, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV45]](s32), [[COPY46]](p5) :: (store (s32) into %stack.0 + 180, basealign 256, addrspace 5) ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) ; CHECK: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5) - ; CHECK: G_STORE [[UV46]](s32), [[COPY47]](p5) :: (store 4 into %stack.0 + 184, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV46]](s32), [[COPY47]](p5) :: (store (s32) into %stack.0 + 184, align 8, basealign 256, addrspace 5) ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) ; CHECK: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5) - ; CHECK: G_STORE [[UV47]](s32), [[COPY48]](p5) :: (store 4 into %stack.0 + 188, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV47]](s32), [[COPY48]](p5) :: (store (s32) into %stack.0 + 188, basealign 256, addrspace 5) ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) ; CHECK: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5) - ; CHECK: G_STORE [[UV48]](s32), [[COPY49]](p5) :: (store 4 into %stack.0 + 192, align 64, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV48]](s32), [[COPY49]](p5) :: (store (s32) into %stack.0 + 192, align 64, basealign 256, addrspace 5) ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) ; CHECK: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5) - ; CHECK: G_STORE [[UV49]](s32), [[COPY50]](p5) :: (store 4 into %stack.0 + 196, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV49]](s32), [[COPY50]](p5) :: (store (s32) into %stack.0 + 196, basealign 256, addrspace 5) ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) ; CHECK: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5) - ; CHECK: G_STORE [[UV50]](s32), [[COPY51]](p5) :: (store 4 into %stack.0 + 200, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV50]](s32), [[COPY51]](p5) :: (store (s32) into %stack.0 + 200, align 8, basealign 256, addrspace 5) ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) ; CHECK: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5) - ; CHECK: G_STORE [[UV51]](s32), [[COPY52]](p5) :: (store 4 into %stack.0 + 204, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV51]](s32), [[COPY52]](p5) :: (store (s32) into %stack.0 + 204, basealign 256, addrspace 5) ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) ; CHECK: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5) - ; CHECK: G_STORE [[UV52]](s32), [[COPY53]](p5) :: (store 4 into %stack.0 + 208, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV52]](s32), [[COPY53]](p5) :: (store (s32) into %stack.0 + 208, align 16, basealign 256, addrspace 5) ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) ; CHECK: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5) - ; CHECK: G_STORE [[UV53]](s32), [[COPY54]](p5) :: (store 4 into %stack.0 + 212, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV53]](s32), [[COPY54]](p5) :: (store (s32) into %stack.0 + 212, basealign 256, addrspace 5) ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) ; CHECK: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5) - ; CHECK: G_STORE [[UV54]](s32), [[COPY55]](p5) :: (store 4 into %stack.0 + 216, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV54]](s32), [[COPY55]](p5) :: (store (s32) into %stack.0 + 216, align 8, basealign 256, addrspace 5) ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) ; CHECK: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5) - ; CHECK: G_STORE [[UV55]](s32), [[COPY56]](p5) :: (store 4 into %stack.0 + 220, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV55]](s32), [[COPY56]](p5) :: (store (s32) into %stack.0 + 220, basealign 256, addrspace 5) ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) ; CHECK: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5) - ; CHECK: G_STORE [[UV56]](s32), [[COPY57]](p5) :: (store 4 into %stack.0 + 224, align 32, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV56]](s32), [[COPY57]](p5) :: (store (s32) into %stack.0 + 224, align 32, basealign 256, addrspace 5) ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) ; CHECK: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5) - ; CHECK: G_STORE [[UV57]](s32), [[COPY58]](p5) :: (store 4 into %stack.0 + 228, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV57]](s32), [[COPY58]](p5) :: (store (s32) into %stack.0 + 228, basealign 256, addrspace 5) ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) ; CHECK: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5) - ; CHECK: G_STORE [[UV58]](s32), [[COPY59]](p5) :: (store 4 into %stack.0 + 232, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV58]](s32), [[COPY59]](p5) :: (store (s32) into %stack.0 + 232, align 8, basealign 256, addrspace 5) ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) ; CHECK: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5) - ; CHECK: G_STORE [[UV59]](s32), [[COPY60]](p5) :: (store 4 into %stack.0 + 236, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV59]](s32), [[COPY60]](p5) :: (store (s32) into %stack.0 + 236, basealign 256, addrspace 5) ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) ; CHECK: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5) - ; CHECK: G_STORE [[UV60]](s32), [[COPY61]](p5) :: (store 4 into %stack.0 + 240, align 16, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV60]](s32), [[COPY61]](p5) :: (store (s32) into %stack.0 + 240, align 16, basealign 256, addrspace 5) ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) ; CHECK: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5) - ; CHECK: G_STORE [[UV61]](s32), [[COPY62]](p5) :: (store 4 into %stack.0 + 244, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV61]](s32), [[COPY62]](p5) :: (store (s32) into %stack.0 + 244, basealign 256, addrspace 5) ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) ; CHECK: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5) - ; CHECK: G_STORE [[UV62]](s32), [[COPY63]](p5) :: (store 4 into %stack.0 + 248, align 8, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV62]](s32), [[COPY63]](p5) :: (store (s32) into %stack.0 + 248, align 8, basealign 256, addrspace 5) ; CHECK: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32) ; CHECK: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5) - ; CHECK: G_STORE [[UV63]](s32), [[COPY64]](p5) :: (store 4 into %stack.0 + 252, basealign 256, addrspace 5) + ; CHECK: G_STORE [[UV63]](s32), [[COPY64]](p5) :: (store (s32) into %stack.0 + 252, basealign 256, addrspace 5) ; CHECK: [[C67:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C67]] ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[C4]] ; CHECK: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s32) - ; CHECK: G_STORE [[C3]](s32), [[PTR_ADD66]](p5) :: (store 4, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load 4, align 256, addrspace 5) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 4, addrspace 5) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 12, addrspace 5) - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 4 from unknown-address + 20, addrspace 5) - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 4 from unknown-address + 28, addrspace 5) - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 4 from unknown-address + 32, align 32, addrspace 5) - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 4 from unknown-address + 36, addrspace 5) - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 4 from unknown-address + 40, align 8, addrspace 5) - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 4 from unknown-address + 44, addrspace 5) - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 4 from unknown-address + 48, align 16, addrspace 5) - ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 4 from unknown-address + 52, addrspace 5) - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 4 from unknown-address + 56, align 8, addrspace 5) - ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 4 from unknown-address + 60, addrspace 5) - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 4 from unknown-address + 64, align 64, addrspace 5) - ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 4 from unknown-address + 68, addrspace 5) - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 4 from unknown-address + 72, align 8, addrspace 5) - ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 4 from unknown-address + 76, addrspace 5) - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 4 from unknown-address + 80, align 16, addrspace 5) - ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load 4 from unknown-address + 84, addrspace 5) - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load 4 from unknown-address + 88, align 8, addrspace 5) - ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load 4 from unknown-address + 92, addrspace 5) - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load 4 from unknown-address + 96, align 32, addrspace 5) - ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load 4 from unknown-address + 100, addrspace 5) - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load 4 from unknown-address + 104, align 8, addrspace 5) - ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load 4 from unknown-address + 108, addrspace 5) - ; CHECK: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load 4 from unknown-address + 112, align 16, addrspace 5) - ; CHECK: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load 4 from unknown-address + 116, addrspace 5) - ; CHECK: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load 4 from unknown-address + 120, align 8, addrspace 5) - ; CHECK: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load 4 from unknown-address + 124, addrspace 5) - ; CHECK: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load 4 from unknown-address + 128, align 128, addrspace 5) - ; CHECK: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD35]](p5) :: (load 4 from unknown-address + 132, addrspace 5) - ; CHECK: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load 4 from unknown-address + 136, align 8, addrspace 5) - ; CHECK: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load 4 from unknown-address + 140, addrspace 5) - ; CHECK: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load 4 from unknown-address + 144, align 16, addrspace 5) - ; CHECK: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load 4 from unknown-address + 148, addrspace 5) - ; CHECK: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load 4 from unknown-address + 152, align 8, addrspace 5) - ; CHECK: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load 4 from unknown-address + 156, addrspace 5) - ; CHECK: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load 4 from unknown-address + 160, align 32, addrspace 5) - ; CHECK: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load 4 from unknown-address + 164, addrspace 5) - ; CHECK: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load 4 from unknown-address + 168, align 8, addrspace 5) - ; CHECK: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load 4 from unknown-address + 172, addrspace 5) - ; CHECK: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load 4 from unknown-address + 176, align 16, addrspace 5) - ; CHECK: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load 4 from unknown-address + 180, addrspace 5) - ; CHECK: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load 4 from unknown-address + 184, align 8, addrspace 5) - ; CHECK: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load 4 from unknown-address + 188, addrspace 5) - ; CHECK: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load 4 from unknown-address + 192, align 64, addrspace 5) - ; CHECK: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load 4 from unknown-address + 196, addrspace 5) - ; CHECK: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load 4 from unknown-address + 200, align 8, addrspace 5) - ; CHECK: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load 4 from unknown-address + 204, addrspace 5) - ; CHECK: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load 4 from unknown-address + 208, align 16, addrspace 5) - ; CHECK: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load 4 from unknown-address + 212, addrspace 5) - ; CHECK: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load 4 from unknown-address + 216, align 8, addrspace 5) - ; CHECK: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load 4 from unknown-address + 220, addrspace 5) - ; CHECK: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load 4 from unknown-address + 224, align 32, addrspace 5) - ; CHECK: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load 4 from unknown-address + 228, addrspace 5) - ; CHECK: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load 4 from unknown-address + 232, align 8, addrspace 5) - ; CHECK: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load 4 from unknown-address + 236, addrspace 5) - ; CHECK: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load 4 from unknown-address + 240, align 16, addrspace 5) - ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load 4 from unknown-address + 244, addrspace 5) - ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load 4 from unknown-address + 248, align 8, addrspace 5) - ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load 4 from unknown-address + 252, addrspace 5) + ; CHECK: G_STORE [[C3]](s32), [[PTR_ADD66]](p5) :: (store (s32), addrspace 5) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32), align 256, addrspace 5) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) + ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) + ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) + ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) + ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) + ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) + ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) + ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) + ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s32) from unknown-address + 64, align 64, addrspace 5) + ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s32) from unknown-address + 68, addrspace 5) + ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s32) from unknown-address + 72, align 8, addrspace 5) + ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s32) from unknown-address + 76, addrspace 5) + ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s32) from unknown-address + 80, align 16, addrspace 5) + ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load (s32) from unknown-address + 84, addrspace 5) + ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load (s32) from unknown-address + 88, align 8, addrspace 5) + ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load (s32) from unknown-address + 92, addrspace 5) + ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load (s32) from unknown-address + 96, align 32, addrspace 5) + ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load (s32) from unknown-address + 100, addrspace 5) + ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load (s32) from unknown-address + 104, align 8, addrspace 5) + ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load (s32) from unknown-address + 108, addrspace 5) + ; CHECK: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load (s32) from unknown-address + 112, align 16, addrspace 5) + ; CHECK: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load (s32) from unknown-address + 116, addrspace 5) + ; CHECK: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load (s32) from unknown-address + 120, align 8, addrspace 5) + ; CHECK: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load (s32) from unknown-address + 124, addrspace 5) + ; CHECK: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load (s32) from unknown-address + 128, align 128, addrspace 5) + ; CHECK: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD35]](p5) :: (load (s32) from unknown-address + 132, addrspace 5) + ; CHECK: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load (s32) from unknown-address + 136, align 8, addrspace 5) + ; CHECK: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load (s32) from unknown-address + 140, addrspace 5) + ; CHECK: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load (s32) from unknown-address + 144, align 16, addrspace 5) + ; CHECK: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load (s32) from unknown-address + 148, addrspace 5) + ; CHECK: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load (s32) from unknown-address + 152, align 8, addrspace 5) + ; CHECK: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load (s32) from unknown-address + 156, addrspace 5) + ; CHECK: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load (s32) from unknown-address + 160, align 32, addrspace 5) + ; CHECK: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load (s32) from unknown-address + 164, addrspace 5) + ; CHECK: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load (s32) from unknown-address + 168, align 8, addrspace 5) + ; CHECK: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load (s32) from unknown-address + 172, addrspace 5) + ; CHECK: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load (s32) from unknown-address + 176, align 16, addrspace 5) + ; CHECK: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load (s32) from unknown-address + 180, addrspace 5) + ; CHECK: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load (s32) from unknown-address + 184, align 8, addrspace 5) + ; CHECK: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load (s32) from unknown-address + 188, addrspace 5) + ; CHECK: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load (s32) from unknown-address + 192, align 64, addrspace 5) + ; CHECK: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load (s32) from unknown-address + 196, addrspace 5) + ; CHECK: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load (s32) from unknown-address + 200, align 8, addrspace 5) + ; CHECK: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load (s32) from unknown-address + 204, addrspace 5) + ; CHECK: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load (s32) from unknown-address + 208, align 16, addrspace 5) + ; CHECK: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load (s32) from unknown-address + 212, addrspace 5) + ; CHECK: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load (s32) from unknown-address + 216, align 8, addrspace 5) + ; CHECK: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load (s32) from unknown-address + 220, addrspace 5) + ; CHECK: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load (s32) from unknown-address + 224, align 32, addrspace 5) + ; CHECK: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load (s32) from unknown-address + 228, addrspace 5) + ; CHECK: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load (s32) from unknown-address + 232, align 8, addrspace 5) + ; CHECK: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load (s32) from unknown-address + 236, addrspace 5) + ; CHECK: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load (s32) from unknown-address + 240, align 16, addrspace 5) + ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load (s32) from unknown-address + 244, addrspace 5) + ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load (s32) from unknown-address + 248, align 8, addrspace 5) + ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load (s32) from unknown-address + 252, addrspace 5) ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32) @@ -734,56 +734,56 @@ body: | ; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) ; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) ; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (s128), align 4, addrspace 1) ; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD67]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD67]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; CHECK: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD68]](p1) :: (store 16 into unknown-address + 32, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD68]](p1) :: (store (s128) into unknown-address + 32, align 4, addrspace 1) ; CHECK: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD69]](p1) :: (store 16 into unknown-address + 48, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD69]](p1) :: (store (s128) into unknown-address + 48, align 4, addrspace 1) ; CHECK: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD70]](p1) :: (store 16 into unknown-address + 64, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD70]](p1) :: (store (s128) into unknown-address + 64, align 4, addrspace 1) ; CHECK: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD71]](p1) :: (store 16 into unknown-address + 80, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD71]](p1) :: (store (s128) into unknown-address + 80, align 4, addrspace 1) ; CHECK: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD72]](p1) :: (store 16 into unknown-address + 96, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD72]](p1) :: (store (s128) into unknown-address + 96, align 4, addrspace 1) ; CHECK: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD73]](p1) :: (store 16 into unknown-address + 112, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD73]](p1) :: (store (s128) into unknown-address + 112, align 4, addrspace 1) ; CHECK: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD74]](p1) :: (store 16 into unknown-address + 128, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD74]](p1) :: (store (s128) into unknown-address + 128, align 4, addrspace 1) ; CHECK: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD75]](p1) :: (store 16 into unknown-address + 144, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD75]](p1) :: (store (s128) into unknown-address + 144, align 4, addrspace 1) ; CHECK: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 ; CHECK: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD76]](p1) :: (store 16 into unknown-address + 160, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD76]](p1) :: (store (s128) into unknown-address + 160, align 4, addrspace 1) ; CHECK: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 ; CHECK: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD77]](p1) :: (store 16 into unknown-address + 176, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD77]](p1) :: (store (s128) into unknown-address + 176, align 4, addrspace 1) ; CHECK: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD78]](p1) :: (store 16 into unknown-address + 192, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD78]](p1) :: (store (s128) into unknown-address + 192, align 4, addrspace 1) ; CHECK: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 ; CHECK: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD79]](p1) :: (store 16 into unknown-address + 208, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD79]](p1) :: (store (s128) into unknown-address + 208, align 4, addrspace 1) ; CHECK: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 ; CHECK: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD80]](p1) :: (store 16 into unknown-address + 224, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD80]](p1) :: (store (s128) into unknown-address + 224, align 4, addrspace 1) ; CHECK: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK: [[PTR_ADD81:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C79]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD81]](p1) :: (store 16 into unknown-address + 240, align 4, addrspace 1) + ; CHECK: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD81]](p1) :: (store (s128) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) + %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) %3:_(s32) = G_CONSTANT i32 12345 %4:_(<64 x s32>) = G_INSERT_VECTOR_ELT %2, %3, %1 %5:_(p1) = COPY $vgpr0_vgpr1 - G_STORE %4, %5 :: (store 256, align 4, addrspace 1) + G_STORE %4, %5 :: (store (<64 x s32>), align 4, addrspace 1) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll index 00a4b468e7d95..a8cbf4f3acbcc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll @@ -18,7 +18,7 @@ define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_swap_1d @@ -36,7 +36,7 @@ define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -61,7 +61,7 @@ define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1d @@ -79,7 +79,7 @@ define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -104,7 +104,7 @@ define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_sub_1d @@ -122,7 +122,7 @@ define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -147,7 +147,7 @@ define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_smin_1d @@ -165,7 +165,7 @@ define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -191,7 +191,7 @@ define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_umin_1d @@ -209,7 +209,7 @@ define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -234,7 +234,7 @@ define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_smax_1d @@ -252,7 +252,7 @@ define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -277,7 +277,7 @@ define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_umax_1d @@ -295,7 +295,7 @@ define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -320,7 +320,7 @@ define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_and_1d @@ -338,7 +338,7 @@ define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -363,7 +363,7 @@ define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_or_1d @@ -381,7 +381,7 @@ define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -406,7 +406,7 @@ define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_xor_1d @@ -424,7 +424,7 @@ define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -449,7 +449,7 @@ define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_inc_1d @@ -467,7 +467,7 @@ define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -492,7 +492,7 @@ define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_dec_1d @@ -510,7 +510,7 @@ define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -537,7 +537,7 @@ define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_1d @@ -557,7 +557,7 @@ define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -585,7 +585,7 @@ define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2d @@ -606,7 +606,7 @@ define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -639,7 +639,7 @@ define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_3d @@ -665,7 +665,7 @@ define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -698,7 +698,7 @@ define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_cube @@ -724,7 +724,7 @@ define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -752,7 +752,7 @@ define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1darray @@ -773,7 +773,7 @@ define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -806,7 +806,7 @@ define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2darray @@ -832,7 +832,7 @@ define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -865,7 +865,7 @@ define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2dmsaa @@ -891,7 +891,7 @@ define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -925,7 +925,7 @@ define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa @@ -952,7 +952,7 @@ define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -977,7 +977,7 @@ define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_add_1d_slc @@ -995,7 +995,7 @@ define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -1025,7 +1025,7 @@ define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_2d @@ -1048,7 +1048,7 @@ define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -1083,7 +1083,7 @@ define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_3d @@ -1111,7 +1111,7 @@ define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 % ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -1147,7 +1147,7 @@ define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %c ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa @@ -1176,7 +1176,7 @@ define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %c ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll index 1c0ad3692b363..dd4c713726534 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -20,7 +20,7 @@ define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -44,7 +44,7 @@ define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -80,7 +80,7 @@ define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -109,7 +109,7 @@ define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -153,7 +153,7 @@ define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_l ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -189,7 +189,7 @@ define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_l ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -234,7 +234,7 @@ define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -270,7 +270,7 @@ define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -308,7 +308,7 @@ define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -337,7 +337,7 @@ define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -381,7 +381,7 @@ define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -417,7 +417,7 @@ define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -462,7 +462,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -498,7 +498,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -545,7 +545,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -583,7 +583,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -622,7 +622,7 @@ define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -651,7 +651,7 @@ define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -695,7 +695,7 @@ define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -731,7 +731,7 @@ define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -778,7 +778,7 @@ define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -816,7 +816,7 @@ define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -864,7 +864,7 @@ define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -902,7 +902,7 @@ define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -948,7 +948,7 @@ define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -984,7 +984,7 @@ define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -1031,7 +1031,7 @@ define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1069,7 +1069,7 @@ define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -1108,7 +1108,7 @@ define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d ; GFX10NSA: bb.1.main_body: @@ -1132,7 +1132,7 @@ define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1168,7 +1168,7 @@ define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_2d ; GFX10NSA: bb.1.main_body: @@ -1197,7 +1197,7 @@ define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1241,7 +1241,7 @@ define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_3d ; GFX10NSA: bb.1.main_body: @@ -1277,7 +1277,7 @@ define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1322,7 +1322,7 @@ define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_cube ; GFX10NSA: bb.1.main_body: @@ -1358,7 +1358,7 @@ define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1396,7 +1396,7 @@ define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1darray ; GFX10NSA: bb.1.main_body: @@ -1425,7 +1425,7 @@ define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1469,7 +1469,7 @@ define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_2darray ; GFX10NSA: bb.1.main_body: @@ -1505,7 +1505,7 @@ define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1550,7 +1550,7 @@ define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_2dmsaa ; GFX10NSA: bb.1.main_body: @@ -1586,7 +1586,7 @@ define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1633,7 +1633,7 @@ define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_2darraymsaa ; GFX10NSA: bb.1.main_body: @@ -1671,7 +1671,7 @@ define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1710,7 +1710,7 @@ define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_1d ; GFX10NSA: bb.1.main_body: @@ -1739,7 +1739,7 @@ define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -1783,7 +1783,7 @@ define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_2d ; GFX10NSA: bb.1.main_body: @@ -1819,7 +1819,7 @@ define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1866,7 +1866,7 @@ define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_3d ; GFX10NSA: bb.1.main_body: @@ -1904,7 +1904,7 @@ define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -1952,7 +1952,7 @@ define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_cube ; GFX10NSA: bb.1.main_body: @@ -1990,7 +1990,7 @@ define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2036,7 +2036,7 @@ define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_1darray ; GFX10NSA: bb.1.main_body: @@ -2072,7 +2072,7 @@ define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2119,7 +2119,7 @@ define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_mip_2darray ; GFX10NSA: bb.1.main_body: @@ -2157,7 +2157,7 @@ define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords_lo, i32 0 @@ -2626,7 +2626,7 @@ define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: load_1d_V1 @@ -2646,7 +2646,7 @@ define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -2673,7 +2673,7 @@ define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coord ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2695,7 +2695,7 @@ define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coord ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -2725,7 +2725,7 @@ define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V1 ; GFX10NSA: bb.1.main_body: @@ -2745,7 +2745,7 @@ define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (s32) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2774,7 +2774,7 @@ define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V2 ; GFX10NSA: bb.1.main_body: @@ -2796,7 +2796,7 @@ define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<2 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -2822,7 +2822,7 @@ define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2846,7 +2846,7 @@ define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -2877,7 +2877,7 @@ define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2901,7 +2901,7 @@ define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -2932,7 +2932,7 @@ define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2956,7 +2956,7 @@ define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -2992,7 +2992,7 @@ define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_glc ; GFX10NSA: bb.1.main_body: @@ -3016,7 +3016,7 @@ define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3047,7 +3047,7 @@ define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_slc ; GFX10NSA: bb.1.main_body: @@ -3071,7 +3071,7 @@ define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3102,7 +3102,7 @@ define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdat ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_glc_slc ; GFX10NSA: bb.1.main_body: @@ -3126,7 +3126,7 @@ define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdat ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "ImageResource") + ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") ; GFX10NSA: S_ENDPGM 0 main_body: %s = extractelement <2 x i16> %coords, i32 0 @@ -3198,9 +3198,9 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) @@ -3224,9 +3224,9 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) @@ -3265,9 +3265,9 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) @@ -3296,9 +3296,9 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) @@ -3345,9 +3345,9 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) @@ -3383,9 +3383,9 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) @@ -3435,9 +3435,9 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) @@ -3475,9 +3475,9 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index 9977aac3ee6f9..eca0f9756b9a1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -18,7 +18,7 @@ define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) ; UNPACKED: $vgpr0 = COPY [[ANYEXT]](s32) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -37,7 +37,7 @@ define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) ; PACKED: $vgpr0 = COPY [[ANYEXT]](s32) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -61,7 +61,7 @@ define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) @@ -89,7 +89,7 @@ define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -112,7 +112,7 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; UNPACKED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF @@ -155,7 +155,7 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -206,7 +206,7 @@ define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) @@ -242,7 +242,7 @@ define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) @@ -268,9 +268,9 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; UNPACKED: $vgpr0 = COPY [[COPY10]](s32) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -290,9 +290,9 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; PACKED: $vgpr0 = COPY [[COPY10]](s32) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -320,7 +320,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) @@ -331,7 +331,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16 @@ -350,10 +350,10 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -380,11 +380,11 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; UNPACKED: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; UNPACKED: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) @@ -425,12 +425,12 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) @@ -483,9 +483,9 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; UNPACKED: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] @@ -521,11 +521,11 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 @@ -589,7 +589,7 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] @@ -615,7 +615,7 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -675,7 +675,7 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -716,7 +716,7 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -766,7 +766,7 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -804,7 +804,7 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -945,7 +945,7 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) @@ -980,7 +980,7 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) @@ -1005,7 +1005,7 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) @@ -1038,7 +1038,7 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) @@ -1063,7 +1063,7 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] @@ -1092,7 +1092,7 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) @@ -1159,9 +1159,9 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; UNPACKED: $vgpr0 = COPY [[COPY10]](s32) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -1181,9 +1181,9 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; PACKED: $vgpr0 = COPY [[COPY10]](s32) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -1211,7 +1211,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) @@ -1221,7 +1221,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 @@ -1240,10 +1240,10 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -1270,7 +1270,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) @@ -1280,7 +1280,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 @@ -1299,10 +1299,10 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -1329,10 +1329,10 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) @@ -1372,11 +1372,11 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) @@ -1429,10 +1429,10 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) @@ -1470,11 +1470,11 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) @@ -1527,10 +1527,10 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) @@ -1568,11 +1568,11 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) @@ -1625,9 +1625,9 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] @@ -1662,11 +1662,11 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "ImageResource", align 8) + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 @@ -1694,9 +1694,9 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] @@ -1729,11 +1729,11 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 @@ -1761,9 +1761,9 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] @@ -1793,11 +1793,11 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 @@ -1825,9 +1825,9 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] @@ -1857,11 +1857,11 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "ImageResource") + ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll index c68fa9ca2ca9d..7342c91d11ca5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -17,7 +17,7 @@ define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -40,7 +40,7 @@ define amdgpu_ps <2 x float> @image_load_v2f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) @@ -65,7 +65,7 @@ define amdgpu_ps <3 x float> @image_load_v3f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "ImageResource", align 16) + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) @@ -91,7 +91,7 @@ define amdgpu_ps <4 x float> @image_load_v4f32(<8 x i32> inreg %rsrc, i32 %s, i3 ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) @@ -119,9 +119,9 @@ define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -148,9 +148,9 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 @@ -178,9 +178,9 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "ImageResource", align 16) + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GCN: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) ; GCN: $vgpr2 = COPY [[UV2]](s32) @@ -209,9 +209,9 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GCN: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) ; GCN: $vgpr2 = COPY [[UV2]](s32) @@ -261,7 +261,7 @@ define amdgpu_ps <2 x float> @image_load_v2f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GCN: $vgpr1 = COPY [[DEF]](s32) @@ -309,7 +309,7 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1100(<8 x i32> inreg %rsrc, ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN: $vgpr0 = COPY [[UV]](s32) @@ -336,7 +336,7 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GCN: $vgpr1 = COPY [[DEF]](s32) @@ -386,7 +386,7 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1110(<8 x i32> inreg %rsrc, ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "ImageResource", align 16) + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN: $vgpr0 = COPY [[UV]](s32) @@ -414,7 +414,7 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1100(<8 x i32> inreg %rsrc, ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN: $vgpr0 = COPY [[UV]](s32) @@ -442,7 +442,7 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_1000(<8 x i32> inreg %rsrc, ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GCN: $vgpr1 = COPY [[DEF]](s32) @@ -495,9 +495,9 @@ define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32 ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -524,10 +524,10 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[DEF1]](s32) ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 @@ -555,10 +555,10 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[DEF1]](s32) ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 @@ -586,10 +586,10 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) ; GCN: $vgpr2 = COPY [[DEF1]](s32) @@ -618,10 +618,10 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[DEF1]](s32) ; GCN: $vgpr2 = COPY [[DEF1]](s32) @@ -650,10 +650,10 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[DEF1]](s32) ; GCN: $vgpr2 = COPY [[DEF1]](s32) @@ -682,10 +682,10 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "ImageResource", align 16) + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) ; GCN: $vgpr2 = COPY [[UV2]](s32) @@ -715,10 +715,10 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) ; GCN: $vgpr2 = COPY [[DEF1]](s32) @@ -748,10 +748,10 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[DEF1]](s32) ; GCN: $vgpr2 = COPY [[DEF1]](s32) @@ -781,10 +781,10 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[DEF1]](s32) ; GCN: $vgpr2 = COPY [[DEF1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll index 0cc6fc36e7e73..1f0aa1bfc2fe3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll @@ -20,7 +20,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i3 ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX6: $vgpr0 = COPY [[UV]](s32) ; GFX6: $vgpr1 = COPY [[UV1]](s32) @@ -43,7 +43,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i3 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) @@ -75,9 +75,9 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 ad ; GFX6: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX6: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32) - ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX6: G_STORE [[UV4]](s32), [[MV]](p1) :: (store 4 into %ir.out, addrspace 1) + ; GFX6: G_STORE [[UV4]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) ; GFX6: $vgpr0 = COPY [[UV]](s32) ; GFX6: $vgpr1 = COPY [[UV1]](s32) ; GFX6: $vgpr2 = COPY [[UV2]](s32) @@ -102,9 +102,9 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 ad ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; GFX10NSA: G_STORE [[UV4]](s32), [[MV]](p1) :: (store 4 into %ir.out, addrspace 1) + ; GFX10NSA: G_STORE [[UV4]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: $vgpr1 = COPY [[UV1]](s32) ; GFX10NSA: $vgpr2 = COPY [[UV2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll index 60baab0650ae8..1f08970534d55 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll @@ -19,7 +19,7 @@ define amdgpu_ps float @image_load_3d_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX6: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX6: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: image_load_3d_f32 @@ -37,7 +37,7 @@ define amdgpu_ps float @image_load_3d_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call float @llvm.amdgcn.image.load.3d.f32.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) @@ -62,9 +62,9 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 ; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX6: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32) - ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX6: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX6: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX6: $vgpr0 = COPY [[UV]](s32) ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10NSA-LABEL: name: image_load_3d_tfe_f32 @@ -83,9 +83,9 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; GFX10NSA: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GFX10NSA: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll index 209bd73220fb4..cc5259a9f2ae1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll @@ -22,7 +22,7 @@ define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -48,7 +48,7 @@ define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -112,7 +112,7 @@ define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -152,7 +152,7 @@ define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -186,7 +186,7 @@ define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg % ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -226,7 +226,7 @@ define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -260,7 +260,7 @@ define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -295,7 +295,7 @@ define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -324,7 +324,7 @@ define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -364,7 +364,7 @@ define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -398,7 +398,7 @@ define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -435,7 +435,7 @@ define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -466,7 +466,7 @@ define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -504,7 +504,7 @@ define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -536,7 +536,7 @@ define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -571,7 +571,7 @@ define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -600,7 +600,7 @@ define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -640,7 +640,7 @@ define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -674,7 +674,7 @@ define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -712,7 +712,7 @@ define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -744,7 +744,7 @@ define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -786,7 +786,7 @@ define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -821,7 +821,7 @@ define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -858,7 +858,7 @@ define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -889,7 +889,7 @@ define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -927,7 +927,7 @@ define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -959,7 +959,7 @@ define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -998,7 +998,7 @@ define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1030,7 +1030,7 @@ define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1070,7 +1070,7 @@ define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1103,7 +1103,7 @@ define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1141,7 +1141,7 @@ define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1173,7 +1173,7 @@ define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1215,7 +1215,7 @@ define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1250,7 +1250,7 @@ define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1290,7 +1290,7 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1323,7 +1323,7 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1367,7 +1367,7 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1404,7 +1404,7 @@ define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1445,7 +1445,7 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1479,7 +1479,7 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1525,7 +1525,7 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1564,7 +1564,7 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1620,7 +1620,7 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1669,7 +1669,7 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32) ; GFX10: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1712,7 +1712,7 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1748,7 +1748,7 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1796,7 +1796,7 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1837,7 +1837,7 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1880,7 +1880,7 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -1916,7 +1916,7 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -1966,7 +1966,7 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2009,7 +2009,7 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2054,7 +2054,7 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2092,7 +2092,7 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2144,7 +2144,7 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2189,7 +2189,7 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2230,7 +2230,7 @@ define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2264,7 +2264,7 @@ define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2310,7 +2310,7 @@ define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2349,7 +2349,7 @@ define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2392,7 +2392,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2428,7 +2428,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2476,7 +2476,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2517,7 +2517,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2560,7 +2560,7 @@ define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2596,7 +2596,7 @@ define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2646,7 +2646,7 @@ define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2689,7 +2689,7 @@ define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2734,7 +2734,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2772,7 +2772,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2824,7 +2824,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2869,7 +2869,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2904,7 +2904,7 @@ define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -2933,7 +2933,7 @@ define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -2973,7 +2973,7 @@ define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -3007,7 +3007,7 @@ define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -3045,7 +3045,7 @@ define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -3077,7 +3077,7 @@ define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -3119,7 +3119,7 @@ define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -3154,7 +3154,7 @@ define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -3186,7 +3186,7 @@ define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -3212,7 +3212,7 @@ define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -3247,7 +3247,7 @@ define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -3276,7 +3276,7 @@ define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -3313,7 +3313,7 @@ define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -3344,7 +3344,7 @@ define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -3382,7 +3382,7 @@ define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -3414,7 +3414,7 @@ define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -3468,7 +3468,7 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10-LABEL: name: sample_c_d_o_2darray_V1 @@ -3511,7 +3511,7 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -3561,7 +3561,7 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 8 from custom "ImageResource") + ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) @@ -3606,7 +3606,7 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 ; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 8 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll index 12ae4a3166d1e..f02ab648442c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll @@ -27,7 +27,7 @@ define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -69,7 +69,7 @@ define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -119,7 +119,7 @@ define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32) ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -159,7 +159,7 @@ define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -203,7 +203,7 @@ define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -242,7 +242,7 @@ define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -285,7 +285,7 @@ define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -326,7 +326,7 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -371,7 +371,7 @@ define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -409,7 +409,7 @@ define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -451,7 +451,7 @@ define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -491,7 +491,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -535,7 +535,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -574,7 +574,7 @@ define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -617,7 +617,7 @@ define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -658,7 +658,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32) ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -703,7 +703,7 @@ define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) @@ -750,7 +750,7 @@ define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 4 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32) from custom "ImageResource") ; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -793,7 +793,7 @@ define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 8 from custom "ImageResource") + ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>) from custom "ImageResource") ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index 3700c238905ab..7777e4a3ab2d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -22,7 +22,7 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; UNPACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "ImageResource") + ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") ; UNPACKED: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_f16 ; GFX81: bb.1 (%ir-block.0): @@ -41,7 +41,7 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; GFX81: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX81: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "ImageResource") + ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") ; GFX81: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_f16 ; GFX9: bb.1 (%ir-block.0): @@ -60,7 +60,7 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_f16 ; GFX10: bb.1 (%ir-block.0): @@ -79,7 +79,7 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") ; GFX10: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -108,7 +108,7 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY11]](s32), [[COPY12]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "ImageResource") + ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") ; UNPACKED: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v2f16 ; GFX81: bb.1 (%ir-block.0): @@ -129,7 +129,7 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX81: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX81: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "ImageResource") + ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") ; GFX81: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v2f16 ; GFX9: bb.1 (%ir-block.0): @@ -147,7 +147,7 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v2f16 ; GFX10: bb.1 (%ir-block.0): @@ -165,7 +165,7 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") ; GFX10: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -200,7 +200,7 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "ImageResource", align 8) + ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; UNPACKED: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v3f16 ; GFX81: bb.1 (%ir-block.0): @@ -244,7 +244,7 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX81: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; GFX81: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) ; GFX81: [[BITCAST4:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "ImageResource", align 8) + ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX81: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v3f16 ; GFX9: bb.1 (%ir-block.0): @@ -278,7 +278,7 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) ; GFX9: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "ImageResource", align 8) + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v3f16 ; GFX10: bb.1 (%ir-block.0): @@ -312,7 +312,7 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) ; GFX10: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "ImageResource", align 8) + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX10: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -346,7 +346,7 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; UNPACKED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "ImageResource") + ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") ; UNPACKED: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v4f16 ; GFX81: bb.1 (%ir-block.0): @@ -370,7 +370,7 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX81: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX81: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "ImageResource") + ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") ; GFX81: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v4f16 ; GFX9: bb.1 (%ir-block.0): @@ -390,7 +390,7 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX9: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "ImageResource") + ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") ; GFX9: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v4f16 ; GFX10: bb.1 (%ir-block.0): @@ -410,7 +410,7 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX10: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "ImageResource") + ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") ; GFX10: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir index 2b23f5c28b86d..c9cc4c317efa8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -11,7 +11,7 @@ body: | ; GCN-LABEL: name: s_buffer_load_s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 4) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) ; GCN: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 @@ -29,7 +29,7 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) @@ -50,7 +50,7 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3p3 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) @@ -72,7 +72,7 @@ body: | ; GCN-LABEL: name: s_buffer_load_v6s16 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) @@ -94,7 +94,7 @@ body: | ; GCN-LABEL: name: s_buffer_load_v6s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 24, align 4) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) ; GCN: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), [[DEF]](<8 x s32>), [[DEF]](<8 x s32>) ; GCN: [[UV:%[0-9]+]]:_(<6 x s32>), [[UV1:%[0-9]+]]:_(<6 x s32>), [[UV2:%[0-9]+]]:_(<6 x s32>), [[UV3:%[0-9]+]]:_(<6 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<24 x s32>) @@ -115,7 +115,7 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3s64 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 24, align 4) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) ; GCN: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), [[DEF]](<4 x s64>), [[DEF]](<4 x s64>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) @@ -136,7 +136,7 @@ body: | ; GCN-LABEL: name: s_buffer_load_v12s8 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) @@ -227,7 +227,7 @@ body: | ; GCN-LABEL: name: s_buffer_load_s96 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir index 325773ebe6d34..d900da6240e11 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir @@ -11,16 +11,16 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 1, addrspace 6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s8), addrspace 6) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C1]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 6) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 6) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C2]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 6) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 6) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C3]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 6) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 6) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] @@ -41,7 +41,7 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: $vgpr0 = COPY [[OR2]](s32) %0:_(p6) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 6) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 6) $vgpr0 = COPY %1 ... @@ -55,9 +55,9 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, addrspace 6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) ; CI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p6) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 6) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 6) $vgpr0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 3b8679e9335d5..1ec6a7bcd8e53 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -12,27 +12,27 @@ body: | ; CI-LABEL: name: test_load_constant_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_constant_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_constant_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s1) = G_LOAD %0 :: (load 1, align 1, addrspace 4) + %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 4) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -45,27 +45,27 @@ body: | ; CI-LABEL: name: test_load_constant_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_constant_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_constant_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s2) = G_LOAD %0 :: (load 1, align 1, addrspace 4) + %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 4) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -78,21 +78,21 @@ body: | ; CI-LABEL: name: test_load_constant_s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_constant_s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, align 4, addrspace 4) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -105,21 +105,21 @@ body: | ; CI-LABEL: name: test_load_constant_s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_constant_s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, align 1, addrspace 4) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -132,21 +132,21 @@ body: | ; CI-LABEL: name: test_load_constant_s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_constant_s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 4, addrspace 4) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -159,21 +159,21 @@ body: | ; CI-LABEL: name: test_load_constant_s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_constant_s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 4) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -186,10 +186,10 @@ body: | ; CI-LABEL: name: test_load_constant_s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -204,10 +204,10 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -220,10 +220,10 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -235,7 +235,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 1, addrspace 4) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -248,18 +248,18 @@ body: | ; CI-LABEL: name: test_load_constant_s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_constant_s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_constant_s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) $vgpr0 = COPY %1 ... @@ -271,10 +271,10 @@ body: | ; CI-LABEL: name: test_load_constant_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -286,10 +286,10 @@ body: | ; CI: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_constant_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -301,10 +301,10 @@ body: | ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_constant_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -315,7 +315,7 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: $vgpr0 = COPY [[OR]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 4) $vgpr0 = COPY %1 ... @@ -327,16 +327,16 @@ body: | ; CI-LABEL: name: test_load_constant_s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -358,16 +358,16 @@ body: | ; CI: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_constant_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -389,16 +389,16 @@ body: | ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_load_constant_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -419,7 +419,7 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: $vgpr0 = COPY [[OR2]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 4) $vgpr0 = COPY %1 ... @@ -431,21 +431,21 @@ body: | ; CI-LABEL: name: test_load_constant_s24_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_constant_s24_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s24_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), align 8, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 4) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -458,21 +458,21 @@ body: | ; CI-LABEL: name: test_load_constant_s24_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_constant_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 4) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -485,10 +485,10 @@ body: | ; CI-LABEL: name: test_load_constant_s24_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -522,10 +522,10 @@ body: | ; CI: $vgpr0 = COPY [[COPY5]](s32) ; VI-LABEL: name: test_load_constant_s24_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -555,10 +555,10 @@ body: | ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s24_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 2, align 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 2, align 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -587,7 +587,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 4) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -600,13 +600,13 @@ body: | ; CI-LABEL: name: test_load_constant_s24_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -635,13 +635,13 @@ body: | ; CI: $vgpr0 = COPY [[COPY4]](s32) ; VI-LABEL: name: test_load_constant_s24_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -666,13 +666,13 @@ body: | ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_constant_s24_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -696,7 +696,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 4) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 4) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -709,27 +709,27 @@ body: | ; CI-LABEL: name: test_load_constant_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; VI-LABEL: name: test_load_constant_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-LABEL: name: test_load_constant_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 4) + %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 4) %2:_(s64) = G_ZEXT %1 $vgpr0_vgpr1 = COPY %2 ... @@ -742,18 +742,18 @@ body: | ; CI-LABEL: name: test_load_constant_s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; VI-LABEL: name: test_load_constant_s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-LABEL: name: test_load_constant_s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -765,18 +765,18 @@ body: | ; CI-LABEL: name: test_load_constant_s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; VI-LABEL: name: test_load_constant_s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-LABEL: name: test_load_constant_s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p4) :: (load (s64), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -788,16 +788,16 @@ body: | ; CI-LABEL: name: test_load_constant_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -816,16 +816,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_constant_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -844,16 +844,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_constant_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -871,7 +871,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -883,28 +883,28 @@ body: | ; CI-LABEL: name: test_load_constant_s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -952,28 +952,28 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_constant_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1013,28 +1013,28 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_constant_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1073,7 +1073,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -1085,21 +1085,21 @@ body: | ; CI-LABEL: name: test_load_constant_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 16, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_constant_s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 16, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_constant_s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 16, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 16, addrspace 4) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1111,21 +1111,21 @@ body: | ; CI-LABEL: name: test_load_constant_s96_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 8, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_constant_s96_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 8, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_constant_s96_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 8, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 4) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1137,21 +1137,21 @@ body: | ; CI-LABEL: name: test_load_constant_s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 4, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_constant_s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 4, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_constant_s96_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 4, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 4) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1163,10 +1163,10 @@ body: | ; CI-LABEL: name: test_load_constant_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1177,9 +1177,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1188,9 +1188,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1202,10 +1202,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_constant_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1216,9 +1216,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1227,9 +1227,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1241,10 +1241,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_constant_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1255,9 +1255,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1266,9 +1266,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1279,7 +1279,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 4) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1291,16 +1291,16 @@ body: | ; CI-LABEL: name: test_load_constant_s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1321,13 +1321,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1344,13 +1344,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1370,16 +1370,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_constant_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1400,13 +1400,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1423,13 +1423,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1449,16 +1449,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_constant_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1479,13 +1479,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1502,13 +1502,13 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1527,7 +1527,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 4) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1539,36 +1539,36 @@ body: | ; CI-LABEL: name: test_load_constant_s160_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from unknown-address + 16, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI: S_NOP 0, implicit [[BITCAST]](s160) ; VI-LABEL: name: test_load_constant_s160_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from unknown-address + 16, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI: S_NOP 0, implicit [[BITCAST]](s160) ; GFX9-LABEL: name: test_load_constant_s160_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from unknown-address + 16, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 16, addrspace 4) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; GFX9: S_NOP 0, implicit [[BITCAST]](s160) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s160) = G_LOAD %0 :: (load 20, align 4, addrspace 4) + %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 4) S_NOP 0, implicit %1 ... @@ -1580,10 +1580,10 @@ body: | ; CI-LABEL: name: test_load_constant_s224_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 16, align 4, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 16, align 4, addrspace 4) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -1599,10 +1599,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_constant_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 16, align 4, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 16, align 4, addrspace 4) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -1618,10 +1618,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-LABEL: name: test_load_constant_s224_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 16, align 4, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 16, align 4, addrspace 4) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -1635,8 +1635,8 @@ body: | ; GFX9: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) - %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s224) = G_LOAD %0 :: (load 28, align 4, addrspace 4) + %0:_(p4) = COPY $vgpr0_vgpr1 + %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 4) %2:_(s256) = G_IMPLICIT_DEF %3:_(s256) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -1651,21 +1651,21 @@ body: | ; CI-LABEL: name: test_load_constant_s128_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_constant_s128_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_constant_s128_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 16, addrspace 4) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -1677,21 +1677,21 @@ body: | ; CI-LABEL: name: test_load_constant_s128_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_constant_s128_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_constant_s128_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 4, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -1703,16 +1703,16 @@ body: | ; CI-LABEL: name: test_load_constant_s128_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1733,13 +1733,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1756,13 +1756,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1779,13 +1779,13 @@ body: | ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -1805,16 +1805,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_constant_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1835,13 +1835,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1858,13 +1858,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1881,13 +1881,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -1907,16 +1907,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_constant_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1937,13 +1937,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1960,13 +1960,13 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1983,13 +1983,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2008,7 +2008,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 4) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -2020,21 +2020,21 @@ body: | ; CI-LABEL: name: test_load_constant_s256_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, align 16, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (s256), align 16, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; VI-LABEL: name: test_load_constant_s256_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, align 16, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (s256), align 16, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; GFX9-LABEL: name: test_load_constant_s256_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, align 16, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (s256), align 16, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_LOAD %0 :: (load 32, align 16, addrspace 4) + %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -2046,18 +2046,18 @@ body: | ; CI-LABEL: name: test_load_constant_p1_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; VI-LABEL: name: test_load_constant_p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: test_load_constant_p1_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -2069,18 +2069,18 @@ body: | ; CI-LABEL: name: test_load_constant_p1_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; VI-LABEL: name: test_load_constant_p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: test_load_constant_p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p4) :: (load (p1), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -2092,28 +2092,28 @@ body: | ; CI-LABEL: name: test_load_constant_p1_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2161,28 +2161,28 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_constant_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2222,28 +2222,28 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_constant_p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2282,7 +2282,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 4) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -2294,18 +2294,18 @@ body: | ; CI-LABEL: name: test_load_constant_p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4) ; CI: $vgpr0 = COPY [[LOAD]](p3) ; VI-LABEL: name: test_load_constant_p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4) ; VI: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-LABEL: name: test_load_constant_p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p4) :: (load (p3), addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 4) $vgpr0 = COPY %1 ... @@ -2317,18 +2317,18 @@ body: | ; CI-LABEL: name: test_load_constant_p4_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; VI-LABEL: name: test_load_constant_p4_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-LABEL: name: test_load_constant_p4_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -2340,18 +2340,18 @@ body: | ; CI-LABEL: name: test_load_constant_p4_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; VI-LABEL: name: test_load_constant_p4_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-LABEL: name: test_load_constant_p4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p4) :: (load (p4), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -2363,16 +2363,16 @@ body: | ; CI-LABEL: name: test_load_constant_p4_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2391,16 +2391,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_constant_p4_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2419,16 +2419,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-LABEL: name: test_load_constant_p4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2446,7 +2446,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 2, addrspace 4) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -2458,28 +2458,28 @@ body: | ; CI-LABEL: name: test_load_constant_p4_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2527,28 +2527,28 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_constant_p4_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2588,28 +2588,28 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-LABEL: name: test_load_constant_p4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2648,7 +2648,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 1, addrspace 4) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -2660,18 +2660,18 @@ body: | ; CI-LABEL: name: test_load_constant_p5_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4) ; CI: $vgpr0 = COPY [[LOAD]](p5) ; VI-LABEL: name: test_load_constant_p5_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4) ; VI: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-LABEL: name: test_load_constant_p5_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p4) :: (load (p5), addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 4) $vgpr0 = COPY %1 ... @@ -2683,10 +2683,10 @@ body: | ; CI-LABEL: name: test_load_constant_p5_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2699,10 +2699,10 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_constant_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2715,10 +2715,10 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_constant_p5_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2730,7 +2730,7 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 4) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 4) $vgpr0 = COPY %1 ... @@ -2742,16 +2742,16 @@ body: | ; CI-LABEL: name: test_load_constant_p5_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2774,16 +2774,16 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_constant_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2806,16 +2806,16 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_constant_p5_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2837,7 +2837,7 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 4) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 4) $vgpr0 = COPY %1 ... @@ -2849,7 +2849,7 @@ body: | ; CI-LABEL: name: test_load_constant_v2s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<2 x s8>), align 4, addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2870,7 +2870,7 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<2 x s8>), align 4, addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2889,7 +2889,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<2 x s8>), align 4, addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2907,7 +2907,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 4, addrspace 4) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 4) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -2921,7 +2921,7 @@ body: | ; CI-LABEL: name: test_load_constant_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<2 x s8>), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2942,7 +2942,7 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<2 x s8>), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2961,7 +2961,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<2 x s8>), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2979,7 +2979,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 4) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 4) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -2993,10 +2993,10 @@ body: | ; CI-LABEL: name: test_load_constant_v2s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3011,10 +3011,10 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3027,10 +3027,10 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3042,7 +3042,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 4) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 4) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -3056,24 +3056,24 @@ body: | ; CI-LABEL: name: test_load_constant_v3s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load (<3 x s8>), align 4, addrspace 4) ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load (<3 x s8>), align 4, addrspace 4) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v3s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 3, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load (<3 x s8>), align 4, addrspace 4) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 4) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 4) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -3087,24 +3087,24 @@ body: | ; CI-LABEL: name: test_load_constant_v3s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load (<3 x s8>), align 1, addrspace 4) ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load (<3 x s8>), align 1, addrspace 4) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v3s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p4) :: (load (<3 x s8>), align 1, addrspace 4) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 4) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 4) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -3118,7 +3118,7 @@ body: | ; CI-LABEL: name: test_load_constant_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<4 x s8>), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -3134,7 +3134,7 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<4 x s8>), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -3150,7 +3150,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (<4 x s8>), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -3167,7 +3167,7 @@ body: | ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 4) $vgpr0 = COPY %1 ... @@ -3179,10 +3179,10 @@ body: | ; CI-LABEL: name: test_load_constant_v4s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -3201,10 +3201,10 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -3223,10 +3223,10 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v4s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -3246,7 +3246,7 @@ body: | ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 4) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 4) $vgpr0 = COPY %1 ... @@ -3258,16 +3258,16 @@ body: | ; CI-LABEL: name: test_load_constant_v4s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -3277,16 +3277,16 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_constant_v4s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -3296,16 +3296,16 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_constant_v4s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -3316,7 +3316,7 @@ body: | ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 1, addrspace 4) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 4) $vgpr0 = COPY %1 ... @@ -3328,7 +3328,7 @@ body: | ; CI-LABEL: name: test_load_constant_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s8>), addrspace 4) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3355,7 +3355,7 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; VI-LABEL: name: test_load_constant_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s8>), addrspace 4) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3382,7 +3382,7 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; GFX9-LABEL: name: test_load_constant_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s8>), addrspace 4) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3412,7 +3412,7 @@ body: | ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -3424,7 +3424,7 @@ body: | ; CI-LABEL: name: test_load_constant_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s8>), addrspace 4) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3469,7 +3469,7 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; VI-LABEL: name: test_load_constant_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s8>), addrspace 4) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3514,7 +3514,7 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; GFX9-LABEL: name: test_load_constant_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s8>), addrspace 4) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3566,7 +3566,7 @@ body: | ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) + %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -3578,7 +3578,7 @@ body: | ; CI-LABEL: name: test_load_constant_v32s8_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<32 x s8>), addrspace 4) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3659,7 +3659,7 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) ; VI-LABEL: name: test_load_constant_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<32 x s8>), addrspace 4) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3740,7 +3740,7 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) ; GFX9-LABEL: name: test_load_constant_v32s8_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<32 x s8>), addrspace 4) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3836,7 +3836,7 @@ body: | ; GFX9: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) + %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -3849,18 +3849,18 @@ body: | ; CI-LABEL: name: test_load_constant_v2s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) ; CI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; VI-LABEL: name: test_load_constant_v2s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: test_load_constant_v2s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p4) :: (load (<2 x s16>), addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 4) $vgpr0 = COPY %1 ... @@ -3872,10 +3872,10 @@ body: | ; CI-LABEL: name: test_load_constant_v2s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3888,10 +3888,10 @@ body: | ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_constant_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3904,16 +3904,16 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_constant_v2s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 4) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 4) $vgpr0 = COPY %1 ... @@ -3925,10 +3925,10 @@ body: | ; CI-LABEL: name: test_load_constant_v2s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3941,9 +3941,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -3961,10 +3961,10 @@ body: | ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_constant_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3975,9 +3975,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -3993,10 +3993,10 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_constant_v2s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4007,9 +4007,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4021,7 +4021,7 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 4) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 4) $vgpr0 = COPY %1 ... @@ -4033,7 +4033,7 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -4041,7 +4041,7 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -4049,14 +4049,14 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 4) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 4) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -4070,24 +4070,24 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p4) :: (load (<3 x s16>), align 4, addrspace 4) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 4, addrspace 4) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 4) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -4101,13 +4101,13 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -4131,13 +4131,13 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -4161,13 +4161,13 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -4181,7 +4181,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 4) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 4) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -4195,10 +4195,10 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4211,9 +4211,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4224,9 +4224,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4254,10 +4254,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4268,9 +4268,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4279,9 +4279,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -4307,10 +4307,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4321,9 +4321,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4332,9 +4332,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -4354,7 +4354,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 4) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 4) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -4368,18 +4368,18 @@ body: | ; CI-LABEL: name: test_load_constant_v4s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v4s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v4s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -4391,18 +4391,18 @@ body: | ; CI-LABEL: name: test_load_constant_v4s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v4s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v4s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -4414,16 +4414,16 @@ body: | ; CI-LABEL: name: test_load_constant_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4444,16 +4444,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4474,16 +4474,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -4493,7 +4493,7 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 4) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -4505,10 +4505,10 @@ body: | ; CI-LABEL: name: test_load_constant_v4s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4521,9 +4521,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4540,9 +4540,9 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4552,9 +4552,9 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4572,10 +4572,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4586,9 +4586,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4603,9 +4603,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -4613,9 +4613,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -4631,10 +4631,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4645,9 +4645,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4659,9 +4659,9 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -4669,9 +4669,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -4684,7 +4684,7 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 4) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -4696,18 +4696,21 @@ body: | ; CI-LABEL: name: test_load_constant_v8s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 8, addrspace 4) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; VI-LABEL: name: test_load_constant_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 8, addrspace 4) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; GFX9-LABEL: name: test_load_constant_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 8, addrspace 4) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4719,18 +4722,18 @@ body: | ; CI-LABEL: name: test_load_constant_v2s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_constant_v2s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_load_constant_v2s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -4742,18 +4745,18 @@ body: | ; CI-LABEL: name: test_load_constant_v2s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_constant_v2s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_load_constant_v2s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -4765,10 +4768,10 @@ body: | ; CI-LABEL: name: test_load_constant_v2s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4779,9 +4782,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -4792,10 +4795,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_constant_v2s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4806,9 +4809,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -4819,10 +4822,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_constant_v2s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4833,9 +4836,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -4845,7 +4848,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 2, addrspace 4) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -4857,16 +4860,16 @@ body: | ; CI-LABEL: name: test_load_constant_v2s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4887,13 +4890,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4912,16 +4915,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_constant_v2s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4942,13 +4945,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4967,16 +4970,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_constant_v2s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4997,13 +5000,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5021,7 +5024,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 1, addrspace 4) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -5033,18 +5036,18 @@ body: | ; CI-LABEL: name: test_load_constant_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_load_constant_v3s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-LABEL: name: test_load_constant_v3s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 16, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 16, addrspace 4) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -5058,18 +5061,18 @@ body: | ; CI-LABEL: name: test_load_constant_v3s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_load_constant_v3s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-LABEL: name: test_load_constant_v3s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 4) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 4) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -5081,18 +5084,18 @@ body: | ; CI-LABEL: name: test_load_constant_v4s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_constant_v4s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_constant_v4s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5104,18 +5107,18 @@ body: | ; CI-LABEL: name: test_load_constant_v4s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_constant_v4s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_constant_v4s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 8, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 4) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5127,18 +5130,18 @@ body: | ; CI-LABEL: name: test_load_constant_v4s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_constant_v4s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_constant_v4s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5150,18 +5153,18 @@ body: | ; CI-LABEL: name: test_load_constant_v8s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) ; VI-LABEL: name: test_load_constant_v8s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) ; GFX9-LABEL: name: test_load_constant_v8s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -5173,41 +5176,41 @@ body: | ; CI-LABEL: name: test_load_constant_v16s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) ; VI-LABEL: name: test_load_constant_v16s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) ; GFX9-LABEL: name: test_load_constant_v16s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), align 32, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 64, align 32, addrspace 4) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... --- -name: test_load_constant_v16s32_align32_extload_from_16 +name: test_load_constant_v16s32_align32_extload_from_v16s16 body: | bb.0: liveins: $vgpr0_vgpr1 - ; CI-LABEL: name: test_load_constant_v16s32_align32_extload_from_16 + ; CI-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) - ; VI-LABEL: name: test_load_constant_v16s32_align32_extload_from_16 + ; VI-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) - ; GFX9-LABEL: name: test_load_constant_v16s32_align32_extload_from_16 + ; GFX9-LABEL: name: test_load_constant_v16s32_align32_extload_from_v16s16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 16, align 32, addrspace 4) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s16>), align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... @@ -5219,18 +5222,18 @@ body: | ; CI-LABEL: name: test_load_constant_v2s64_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_constant_v2s64_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-LABEL: name: test_load_constant_v2s64_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5242,18 +5245,18 @@ body: | ; CI-LABEL: name: test_load_constant_v2s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_constant_v2s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-LABEL: name: test_load_constant_v2s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 8, addrspace 4) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5265,18 +5268,18 @@ body: | ; CI-LABEL: name: test_load_constant_v2s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_constant_v2s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-LABEL: name: test_load_constant_v2s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5288,16 +5291,16 @@ body: | ; CI-LABEL: name: test_load_constant_v2s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5315,13 +5318,13 @@ body: | ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5339,16 +5342,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_constant_v2s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5366,13 +5369,13 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5390,16 +5393,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_constant_v2s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5417,13 +5420,13 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 4) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 4) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 4) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5440,7 +5443,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 2, addrspace 4) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5452,28 +5455,28 @@ body: | ; CI-LABEL: name: test_load_constant_v2s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5520,21 +5523,21 @@ body: | ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -5580,28 +5583,28 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_constant_v2s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5640,21 +5643,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -5692,28 +5695,28 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_constant_v2s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5752,21 +5755,21 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -5803,7 +5806,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 4) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5815,27 +5818,27 @@ body: | ; CI-LABEL: name: test_load_constant_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 4) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -5849,10 +5852,10 @@ body: | ; CI-LABEL: name: test_load_constant_v3s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (s128), align 8, addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load 8 from unknown-address + 16, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5860,10 +5863,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (s128), align 8, addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load 8 from unknown-address + 16, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5871,17 +5874,17 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (s128), align 8, addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load 8 from unknown-address + 16, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 8, addrspace 4) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -5895,28 +5898,28 @@ body: | ; CI-LABEL: name: test_load_constant_v3s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5963,21 +5966,21 @@ body: | ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -6021,21 +6024,21 @@ body: | ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4) + ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) ; CI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 4) + ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) ; CI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 4) + ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) ; CI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 4) + ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) ; CI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 4) + ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) ; CI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 4) + ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) ; CI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 4) + ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) ; CI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 4) + ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -6083,28 +6086,28 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6143,21 +6146,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -6193,21 +6196,21 @@ body: | ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 4) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 4) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 4) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 4) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 4) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 4) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 4) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -6247,28 +6250,28 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6307,21 +6310,21 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -6357,21 +6360,21 @@ body: | ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4) + ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 4) + ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 4) + ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 4) + ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 4) + ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 4) + ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 4) + ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 4) + ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -6410,7 +6413,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 1, addrspace 4) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -6424,18 +6427,18 @@ body: | ; CI-LABEL: name: test_load_constant_v4s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v4s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v4s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -6447,18 +6450,18 @@ body: | ; CI-LABEL: name: test_load_constant_v4s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, align 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v4s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, align 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v4s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), align 8, addrspace 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 8, addrspace 4) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -6470,28 +6473,28 @@ body: | ; CI-LABEL: name: test_load_constant_v4s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6538,21 +6541,21 @@ body: | ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -6596,21 +6599,21 @@ body: | ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4) + ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) ; CI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 4) + ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) ; CI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 4) + ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) ; CI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 4) + ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) ; CI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 4) + ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) ; CI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 4) + ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) ; CI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 4) + ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) ; CI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 4) + ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -6654,21 +6657,21 @@ body: | ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; CI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CI: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; CI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load 1 from unknown-address + 24, addrspace 4) + ; CI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4) ; CI: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; CI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load 1 from unknown-address + 25, addrspace 4) + ; CI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4) ; CI: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; CI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load 1 from unknown-address + 26, addrspace 4) + ; CI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4) ; CI: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; CI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load 1 from unknown-address + 27, addrspace 4) + ; CI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4) ; CI: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; CI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load 1 from unknown-address + 28, addrspace 4) + ; CI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4) ; CI: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; CI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load 1 from unknown-address + 29, addrspace 4) + ; CI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4) ; CI: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; CI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load 1 from unknown-address + 30, addrspace 4) + ; CI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4) ; CI: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; CI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load 1 from unknown-address + 31, addrspace 4) + ; CI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4) ; CI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; CI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -6714,28 +6717,28 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v4s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6774,21 +6777,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -6824,21 +6827,21 @@ body: | ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 4) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 4) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 4) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 4) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 4) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 4) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 4) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -6874,21 +6877,21 @@ body: | ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; VI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; VI: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load 1 from unknown-address + 24, addrspace 4) + ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4) ; VI: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load 1 from unknown-address + 25, addrspace 4) + ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4) ; VI: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load 1 from unknown-address + 26, addrspace 4) + ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4) ; VI: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load 1 from unknown-address + 27, addrspace 4) + ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4) ; VI: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load 1 from unknown-address + 28, addrspace 4) + ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4) ; VI: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load 1 from unknown-address + 29, addrspace 4) + ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4) ; VI: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load 1 from unknown-address + 30, addrspace 4) + ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4) ; VI: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load 1 from unknown-address + 31, addrspace 4) + ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4) ; VI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) @@ -6926,28 +6929,28 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v4s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6986,21 +6989,21 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -7036,21 +7039,21 @@ body: | ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 4) + ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 4) ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 4) + ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 4) ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 4) + ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 4) ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 4) + ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 4) ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 4) + ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 4) ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 4) + ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 4) ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 4) + ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 4) ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 4) + ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 4) ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -7086,21 +7089,21 @@ body: | ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; GFX9: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; GFX9: [[PTR_ADD23:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; GFX9: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load 1 from unknown-address + 24, addrspace 4) + ; GFX9: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p4) :: (load (s8) from unknown-address + 24, addrspace 4) ; GFX9: [[PTR_ADD24:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; GFX9: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load 1 from unknown-address + 25, addrspace 4) + ; GFX9: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p4) :: (load (s8) from unknown-address + 25, addrspace 4) ; GFX9: [[PTR_ADD25:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; GFX9: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load 1 from unknown-address + 26, addrspace 4) + ; GFX9: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p4) :: (load (s8) from unknown-address + 26, addrspace 4) ; GFX9: [[PTR_ADD26:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; GFX9: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load 1 from unknown-address + 27, addrspace 4) + ; GFX9: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p4) :: (load (s8) from unknown-address + 27, addrspace 4) ; GFX9: [[PTR_ADD27:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; GFX9: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load 1 from unknown-address + 28, addrspace 4) + ; GFX9: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p4) :: (load (s8) from unknown-address + 28, addrspace 4) ; GFX9: [[PTR_ADD28:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; GFX9: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load 1 from unknown-address + 29, addrspace 4) + ; GFX9: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p4) :: (load (s8) from unknown-address + 29, addrspace 4) ; GFX9: [[PTR_ADD29:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; GFX9: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load 1 from unknown-address + 30, addrspace 4) + ; GFX9: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p4) :: (load (s8) from unknown-address + 30, addrspace 4) ; GFX9: [[PTR_ADD30:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; GFX9: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load 1 from unknown-address + 31, addrspace 4) + ; GFX9: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p4) :: (load (s8) from unknown-address + 31, addrspace 4) ; GFX9: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; GFX9: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; GFX9: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) @@ -7137,7 +7140,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 1, addrspace 4) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -7149,21 +7152,21 @@ body: | ; CI-LABEL: name: test_load_constant_v2s128_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s128>), addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; VI-LABEL: name: test_load_constant_v2s128_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s128>), addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; GFX9-LABEL: name: test_load_constant_v2s128_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s128>), addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 4) + %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -7175,21 +7178,21 @@ body: | ; CI-LABEL: name: test_load_constant_v2p1_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_constant_v2p1_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_constant_v2p1_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 16, addrspace 4) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7201,21 +7204,21 @@ body: | ; CI-LABEL: name: test_load_constant_v2p1_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 8, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_constant_v2p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 8, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_constant_v2p1_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 8, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 8, addrspace 4) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7227,21 +7230,21 @@ body: | ; CI-LABEL: name: test_load_constant_v2p1_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_constant_v2p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_constant_v2p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x p1>), align 4, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 4) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7253,16 +7256,16 @@ body: | ; CI-LABEL: name: test_load_constant_v2p1_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7283,13 +7286,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7306,13 +7309,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7329,13 +7332,13 @@ body: | ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -7355,16 +7358,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_constant_v2p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7385,13 +7388,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7408,13 +7411,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7431,13 +7434,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -7457,16 +7460,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_constant_v2p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7487,13 +7490,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7510,13 +7513,13 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 4) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 4) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 4) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 4) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 4) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 4) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 4) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 4) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7533,13 +7536,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 4) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 4) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 4) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 4) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 4) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 4) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 4) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 4) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -7558,7 +7561,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 1, addrspace 4) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7570,18 +7573,18 @@ body: | ; CI-LABEL: name: test_load_constant_v2p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; VI-LABEL: name: test_load_constant_v2p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: test_load_constant_v2p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 4) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -7593,18 +7596,18 @@ body: | ; CI-LABEL: name: test_load_constant_v2p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; VI-LABEL: name: test_load_constant_v2p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: test_load_constant_v2p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p4) :: (load (<2 x p3>), align 4, addrspace 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 4, addrspace 4) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -7616,16 +7619,16 @@ body: | ; CI-LABEL: name: test_load_constant_v2p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7647,13 +7650,13 @@ body: | ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7673,16 +7676,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; VI-LABEL: name: test_load_constant_v2p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7704,13 +7707,13 @@ body: | ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7730,16 +7733,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-LABEL: name: test_load_constant_v2p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 4) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 4) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 4) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7761,13 +7764,13 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 4) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 4) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 4) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 4) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 4) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7786,7 +7789,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 1, addrspace 4) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -7798,18 +7801,18 @@ body: | ; CI-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_ext_load_constant_s32_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 4) $vgpr0 = COPY %1 ... @@ -7821,18 +7824,18 @@ body: | ; CI-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_ext_load_constant_s32_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 2, align 4, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 4) $vgpr0 = COPY %1 ... @@ -7845,21 +7848,21 @@ body: | ; CI-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -7871,21 +7874,21 @@ body: | ; CI-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -7897,21 +7900,21 @@ body: | ; CI-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -7923,7 +7926,7 @@ body: | ; CI-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -7931,7 +7934,7 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -7939,14 +7942,14 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-LABEL: name: test_ext_load_constant_s128_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 4) + %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7958,21 +7961,21 @@ body: | ; CI-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_2_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), align 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -7984,21 +7987,21 @@ body: | ; CI-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_constant_s64_from_1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), align 4, addrspace 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 4) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 4) $vgpr0_vgpr1 = COPY %1 ... @@ -8010,18 +8013,171 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 1, addrspace 1) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) + ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) + ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) + ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) + ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) + ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; CI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 1, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) + ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) + ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) + ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) + ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 1, addrspace 1) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) + ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C5]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) + ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) + ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) + ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) + ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) + ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) + ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -8033,18 +8189,87 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 2, addrspace 1) - ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) + ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 2, addrspace 1) - ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, align 2, addrspace 1) - ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) + ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -8056,18 +8281,18 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 1) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_extload_constant_v2s32_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -8079,18 +8304,18 @@ body: | ; CI-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 1) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-LABEL: name: test_extload_constant_v3s32_from_6_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 6, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 6, align 4, addrspace 1) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -8102,18 +8327,18 @@ body: | ; CI-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 1) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_extload_constant_v4s32_from_8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 1) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -8125,16 +8350,16 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8155,13 +8380,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8178,13 +8403,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8203,13 +8428,13 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 1) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 1) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 1) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 1) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8225,13 +8450,13 @@ body: | ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; CI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 1) + ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1) ; CI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 1) + ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1) ; CI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 1) + ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1) ; CI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 1) + ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1) ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -8247,13 +8472,13 @@ body: | ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; CI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 1) + ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1) ; CI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 1) + ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1) ; CI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 1) + ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1) ; CI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 1) + ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1) ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -8276,16 +8501,16 @@ body: | ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8306,13 +8531,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8329,13 +8554,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8354,13 +8579,13 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 1) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 1) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8376,13 +8601,13 @@ body: | ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; VI: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 1) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 1) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 1) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 1) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -8398,13 +8623,13 @@ body: | ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; VI: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 1) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 1) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 1) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 1) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -8427,16 +8652,16 @@ body: | ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8457,13 +8682,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8480,13 +8705,13 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8505,13 +8730,13 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load 1 from unknown-address + 12, addrspace 1) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p4) :: (load (s8) from unknown-address + 12, addrspace 1) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load 1 from unknown-address + 13, addrspace 1) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p4) :: (load (s8) from unknown-address + 13, addrspace 1) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load 1 from unknown-address + 14, addrspace 1) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p4) :: (load (s8) from unknown-address + 14, addrspace 1) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load 1 from unknown-address + 15, addrspace 1) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p4) :: (load (s8) from unknown-address + 15, addrspace 1) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8527,13 +8752,13 @@ body: | ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load 1 from unknown-address + 16, addrspace 1) + ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p4) :: (load (s8) from unknown-address + 16, addrspace 1) ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load 1 from unknown-address + 17, addrspace 1) + ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p4) :: (load (s8) from unknown-address + 17, addrspace 1) ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load 1 from unknown-address + 18, addrspace 1) + ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p4) :: (load (s8) from unknown-address + 18, addrspace 1) ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load 1 from unknown-address + 19, addrspace 1) + ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p4) :: (load (s8) from unknown-address + 19, addrspace 1) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -8549,13 +8774,13 @@ body: | ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load 1 from unknown-address + 20, addrspace 1) + ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p4) :: (load (s8) from unknown-address + 20, addrspace 1) ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load 1 from unknown-address + 21, addrspace 1) + ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p4) :: (load (s8) from unknown-address + 21, addrspace 1) ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load 1 from unknown-address + 22, addrspace 1) + ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p4) :: (load (s8) from unknown-address + 22, addrspace 1) ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load 1 from unknown-address + 23, addrspace 1) + ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p4) :: (load (s8) from unknown-address + 23, addrspace 1) ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -8577,7 +8802,7 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 1) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -8592,10 +8817,10 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8606,9 +8831,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -8617,9 +8842,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 1) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 1) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8630,9 +8855,9 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 1) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 1) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -8640,9 +8865,9 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 2 from unknown-address + 16, addrspace 1) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 2 from unknown-address + 18, addrspace 1) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8650,9 +8875,9 @@ body: | ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; CI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 2 from unknown-address + 20, addrspace 1) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 2 from unknown-address + 22, addrspace 1) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -8667,10 +8892,10 @@ body: | ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8681,9 +8906,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -8692,9 +8917,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8705,9 +8930,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -8715,9 +8940,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 2 from unknown-address + 16, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 2 from unknown-address + 18, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8725,9 +8950,9 @@ body: | ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; VI: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 2 from unknown-address + 20, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 2 from unknown-address + 22, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -8742,10 +8967,10 @@ body: | ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 2, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s16), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8756,9 +8981,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -8767,9 +8992,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load 2 from unknown-address + 8, addrspace 1) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (load (s16) from unknown-address + 8, addrspace 1) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load 2 from unknown-address + 10, addrspace 1) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s16) from unknown-address + 10, addrspace 1) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8780,9 +9005,9 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load 2 from unknown-address + 12, addrspace 1) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p4) :: (load (s16) from unknown-address + 12, addrspace 1) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load 2 from unknown-address + 14, addrspace 1) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p4) :: (load (s16) from unknown-address + 14, addrspace 1) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -8790,9 +9015,9 @@ body: | ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load 2 from unknown-address + 16, addrspace 1) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p4) :: (load (s16) from unknown-address + 16, addrspace 1) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load 2 from unknown-address + 18, addrspace 1) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p4) :: (load (s16) from unknown-address + 18, addrspace 1) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8800,9 +9025,9 @@ body: | ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load 2 from unknown-address + 20, addrspace 1) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p4) :: (load (s16) from unknown-address + 20, addrspace 1) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p4) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load 2 from unknown-address + 22, addrspace 1) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p4) :: (load (s16) from unknown-address + 22, addrspace 1) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -8816,7 +9041,7 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 1) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -8831,11 +9056,11 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 1) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 4, addrspace 1) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) @@ -8843,11 +9068,11 @@ body: | ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 4, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) @@ -8855,18 +9080,18 @@ body: | ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 4, addrspace 1) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 1) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -8881,11 +9106,11 @@ body: | ; CI-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 1) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 16, addrspace 1) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) @@ -8893,11 +9118,11 @@ body: | ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; VI-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 16, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) @@ -8905,18 +9130,18 @@ body: | ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) ; GFX9-LABEL: name: test_extload_constant_v2s96_from_24_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load 12, align 16, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p4) :: (load (s96), align 16, addrspace 1) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 16, addrspace 1) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -8931,21 +9156,21 @@ body: | ; CI-LABEL: name: test_load_constant_s512_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (s512), align 32, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) ; VI-LABEL: name: test_load_constant_s512_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (s512), align 32, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) ; GFX9-LABEL: name: test_load_constant_s512_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (s512), align 32, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s512) = G_BITCAST [[LOAD]](<16 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](s512) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(s512) = G_LOAD %0 :: (load 64, align 32, addrspace 4) + %1:_(s512) = G_LOAD %0 :: (load (s512), align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... @@ -8957,20 +9182,20 @@ body: | ; CI-LABEL: name: test_load_constant_v4s128_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s128>), align 32, addrspace 4) ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) ; VI-LABEL: name: test_load_constant_v4s128_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s128>), align 32, addrspace 4) ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) ; GFX9-LABEL: name: test_load_constant_v4s128_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s128>), align 32, addrspace 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<4 x s128>) = G_LOAD %0 :: (load 64, align 32, addrspace 4) + %1:_(<4 x s128>) = G_LOAD %0 :: (load (<4 x s128>), align 32, addrspace 4) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 554bb7887eff8..b16a1313b683b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -12,27 +12,27 @@ body: | ; CI-LABEL: name: test_load_flat_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_flat_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_flat_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s1) = G_LOAD %0 :: (load 1, align 1, addrspace 0) + %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 0) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -45,27 +45,27 @@ body: | ; CI-LABEL: name: test_load_flat_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_flat_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_flat_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s2) = G_LOAD %0 :: (load 1, align 1, addrspace 0) + %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 0) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -78,21 +78,21 @@ body: | ; CI-LABEL: name: test_load_flat_s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_flat_s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_flat_s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, align 4, addrspace 0) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 0) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -105,21 +105,21 @@ body: | ; CI-LABEL: name: test_load_flat_s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_flat_s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_flat_s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, align 1, addrspace 0) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 0) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -132,21 +132,21 @@ body: | ; CI-LABEL: name: test_load_flat_s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_flat_s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_flat_s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 4, addrspace 0) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 0) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -159,21 +159,21 @@ body: | ; CI-LABEL: name: test_load_flat_s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_flat_s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_flat_s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 0) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 0) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -186,10 +186,10 @@ body: | ; CI-LABEL: name: test_load_flat_s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -204,10 +204,10 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -220,10 +220,10 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -235,7 +235,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 1, addrspace 0) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 0) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -248,18 +248,18 @@ body: | ; CI-LABEL: name: test_load_flat_s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_flat_s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_flat_s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -271,10 +271,10 @@ body: | ; CI-LABEL: name: test_load_flat_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -286,10 +286,10 @@ body: | ; CI: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_flat_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -301,10 +301,10 @@ body: | ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_flat_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -315,7 +315,7 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: $vgpr0 = COPY [[OR]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 0) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 0) $vgpr0 = COPY %1 ... @@ -327,16 +327,16 @@ body: | ; CI-LABEL: name: test_load_flat_s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -358,16 +358,16 @@ body: | ; CI: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_flat_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -389,16 +389,16 @@ body: | ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_load_flat_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -419,7 +419,7 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: $vgpr0 = COPY [[OR2]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 0) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 0) $vgpr0 = COPY %1 ... @@ -431,27 +431,27 @@ body: | ; CI-LABEL: name: test_load_flat_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; VI-LABEL: name: test_load_flat_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-LABEL: name: test_load_flat_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 0) + %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 0) %2:_(s64) = G_ZEXT %1 $vgpr0_vgpr1 = COPY %2 ... @@ -464,18 +464,18 @@ body: | ; CI-LABEL: name: test_load_flat_s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; VI-LABEL: name: test_load_flat_s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-LABEL: name: test_load_flat_s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -487,18 +487,18 @@ body: | ; CI-LABEL: name: test_load_flat_s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; VI-LABEL: name: test_load_flat_s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-LABEL: name: test_load_flat_s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -510,16 +510,16 @@ body: | ; CI-LABEL: name: test_load_flat_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -538,16 +538,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_flat_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -566,16 +566,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_flat_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -593,7 +593,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -605,28 +605,28 @@ body: | ; CI-LABEL: name: test_load_flat_s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -674,28 +674,28 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_flat_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -735,28 +735,28 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_flat_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -795,7 +795,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -807,21 +807,21 @@ body: | ; CI-LABEL: name: test_load_flat_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 16) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_flat_s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 16) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_flat_s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 16) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 16, addrspace 0) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -833,21 +833,21 @@ body: | ; CI-LABEL: name: test_load_flat_s96_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 8) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 8) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_flat_s96_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 8) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 8) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_flat_s96_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 8) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 0) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -859,21 +859,21 @@ body: | ; CI-LABEL: name: test_load_flat_s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 4) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_flat_s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 4) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_flat_s96_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (s96), align 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -885,10 +885,10 @@ body: | ; CI-LABEL: name: test_load_flat_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -899,9 +899,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -910,9 +910,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 from unknown-address + 10) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -924,10 +924,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_flat_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -938,9 +938,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -949,9 +949,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 from unknown-address + 10) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -963,10 +963,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_flat_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -977,9 +977,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -988,9 +988,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 from unknown-address + 10) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1001,7 +1001,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 0) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1013,16 +1013,16 @@ body: | ; CI-LABEL: name: test_load_flat_s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1043,13 +1043,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1066,13 +1066,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1092,16 +1092,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_flat_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1122,13 +1122,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1145,13 +1145,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1171,16 +1171,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_flat_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1201,13 +1201,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1224,13 +1224,13 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1249,7 +1249,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 0) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1261,36 +1261,36 @@ body: | ; CI-LABEL: name: test_load_flat_s160_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4 from unknown-address + 16) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI: S_NOP 0, implicit [[BITCAST]](s160) ; VI-LABEL: name: test_load_flat_s160_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4 from unknown-address + 16) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI: S_NOP 0, implicit [[BITCAST]](s160) ; GFX9-LABEL: name: test_load_flat_s160_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 4 from unknown-address + 16) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 16) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; GFX9: S_NOP 0, implicit [[BITCAST]](s160) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s160) = G_LOAD %0 :: (load 20, align 4, addrspace 0) + %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 0) S_NOP 0, implicit %1 ... @@ -1302,10 +1302,10 @@ body: | ; CI-LABEL: name: test_load_flat_s224_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 12 from unknown-address + 16, align 4) + ; CI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s96) from unknown-address + 16, align 4) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -1321,10 +1321,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_flat_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 12 from unknown-address + 16, align 4) + ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s96) from unknown-address + 16, align 4) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -1340,10 +1340,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-LABEL: name: test_load_flat_s224_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 12 from unknown-address + 16, align 4) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s96) from unknown-address + 16, align 4) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -1358,7 +1358,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s224) = G_LOAD %0 :: (load 28, align 4, addrspace 0) + %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 0) %2:_(s256) = G_IMPLICIT_DEF %3:_(s256) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -1373,21 +1373,21 @@ body: | ; CI-LABEL: name: test_load_flat_s128_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128)) ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_flat_s128_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128)) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_flat_s128_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128)) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 16, addrspace 0) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -1399,21 +1399,21 @@ body: | ; CI-LABEL: name: test_load_flat_s128_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_flat_s128_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_flat_s128_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -1425,16 +1425,16 @@ body: | ; CI-LABEL: name: test_load_flat_s128_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1455,13 +1455,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1478,13 +1478,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1501,13 +1501,13 @@ body: | ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -1527,16 +1527,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_flat_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1557,13 +1557,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1580,13 +1580,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1603,13 +1603,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -1629,16 +1629,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_flat_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1659,13 +1659,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1682,13 +1682,13 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1705,13 +1705,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -1730,7 +1730,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 0) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -1742,33 +1742,33 @@ body: | ; CI-LABEL: name: test_load_flat_s256_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; CI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; VI-LABEL: name: test_load_flat_s256_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; VI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; GFX9-LABEL: name: test_load_flat_s256_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_LOAD %0 :: (load 32, align 16, addrspace 0) + %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -1780,18 +1780,18 @@ body: | ; CI-LABEL: name: test_load_flat_p1_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1)) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; VI-LABEL: name: test_load_flat_p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1)) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: test_load_flat_p1_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -1803,18 +1803,18 @@ body: | ; CI-LABEL: name: test_load_flat_p1_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; VI-LABEL: name: test_load_flat_p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: test_load_flat_p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 0) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -1826,28 +1826,28 @@ body: | ; CI-LABEL: name: test_load_flat_p1_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1895,28 +1895,28 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_flat_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1956,28 +1956,28 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_flat_p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2016,7 +2016,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 0) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -2028,18 +2028,18 @@ body: | ; CI-LABEL: name: test_load_flat_p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3)) ; CI: $vgpr0 = COPY [[LOAD]](p3) ; VI-LABEL: name: test_load_flat_p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3)) ; VI: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-LABEL: name: test_load_flat_p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p0) :: (load (p3)) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -2051,18 +2051,18 @@ body: | ; CI-LABEL: name: test_load_flat_p4_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4)) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; VI-LABEL: name: test_load_flat_p4_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4)) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-LABEL: name: test_load_flat_p4_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -2074,18 +2074,18 @@ body: | ; CI-LABEL: name: test_load_flat_p4_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; VI-LABEL: name: test_load_flat_p4_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-LABEL: name: test_load_flat_p4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 4, addrspace 0) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -2097,16 +2097,16 @@ body: | ; CI-LABEL: name: test_load_flat_p4_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2125,16 +2125,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_flat_p4_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2153,16 +2153,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-LABEL: name: test_load_flat_p4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2180,7 +2180,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 2, addrspace 0) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -2192,28 +2192,28 @@ body: | ; CI-LABEL: name: test_load_flat_p4_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2261,28 +2261,28 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_flat_p4_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2322,28 +2322,28 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-LABEL: name: test_load_flat_p4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2382,7 +2382,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 1, addrspace 0) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -2394,18 +2394,18 @@ body: | ; CI-LABEL: name: test_load_flat_p5_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5)) ; CI: $vgpr0 = COPY [[LOAD]](p5) ; VI-LABEL: name: test_load_flat_p5_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5)) ; VI: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-LABEL: name: test_load_flat_p5_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5)) ; GFX9: $vgpr0 = COPY [[LOAD]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -2417,10 +2417,10 @@ body: | ; CI-LABEL: name: test_load_flat_p5_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2433,10 +2433,10 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_flat_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2449,10 +2449,10 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_flat_p5_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2464,7 +2464,7 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 0) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 0) $vgpr0 = COPY %1 ... @@ -2476,16 +2476,16 @@ body: | ; CI-LABEL: name: test_load_flat_p5_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2508,16 +2508,16 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_flat_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2540,16 +2540,16 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_flat_p5_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2571,7 +2571,7 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 0) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 0) $vgpr0 = COPY %1 ... @@ -2583,7 +2583,7 @@ body: | ; CI-LABEL: name: test_load_flat_v2s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<2 x s8>), align 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2604,7 +2604,7 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<2 x s8>), align 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2623,7 +2623,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<2 x s8>), align 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2641,7 +2641,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 4, addrspace 0) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 0) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -2655,7 +2655,7 @@ body: | ; CI-LABEL: name: test_load_flat_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<2 x s8>)) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2676,7 +2676,7 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<2 x s8>)) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2695,7 +2695,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<2 x s8>)) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2713,7 +2713,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 0) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 0) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -2727,10 +2727,10 @@ body: | ; CI-LABEL: name: test_load_flat_v2s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -2745,10 +2745,10 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -2761,10 +2761,10 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -2776,7 +2776,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 0) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 0) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -2790,24 +2790,24 @@ body: | ; CI-LABEL: name: test_load_flat_v3s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 3, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load (<3 x s8>), align 4) ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 3, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load (<3 x s8>), align 4) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v3s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 3, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load (<3 x s8>), align 4) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 0) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 0) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -2821,24 +2821,24 @@ body: | ; CI-LABEL: name: test_load_flat_v3s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 1) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load (<3 x s8>), align 1) ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load (<3 x s8>), align 1) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v3s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p0) :: (load (<3 x s8>), align 1) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 0) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 0) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -2852,7 +2852,7 @@ body: | ; CI-LABEL: name: test_load_flat_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<4 x s8>)) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2868,7 +2868,7 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<4 x s8>)) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2884,7 +2884,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (<4 x s8>)) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2901,7 +2901,7 @@ body: | ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -2913,10 +2913,10 @@ body: | ; CI-LABEL: name: test_load_flat_v4s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2935,10 +2935,10 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2957,10 +2957,10 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v4s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -2980,7 +2980,7 @@ body: | ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 0) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 0) $vgpr0 = COPY %1 ... @@ -2992,16 +2992,16 @@ body: | ; CI-LABEL: name: test_load_flat_v4s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -3011,16 +3011,16 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_flat_v4s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -3030,16 +3030,16 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_flat_v4s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -3050,7 +3050,7 @@ body: | ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 1, addrspace 0) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 0) $vgpr0 = COPY %1 ... @@ -3062,7 +3062,7 @@ body: | ; CI-LABEL: name: test_load_flat_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<8 x s8>)) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3089,7 +3089,7 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; VI-LABEL: name: test_load_flat_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<8 x s8>)) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3116,7 +3116,7 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; GFX9-LABEL: name: test_load_flat_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<8 x s8>)) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3146,7 +3146,7 @@ body: | ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -3158,7 +3158,7 @@ body: | ; CI-LABEL: name: test_load_flat_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>)) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3203,7 +3203,7 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; VI-LABEL: name: test_load_flat_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>)) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3248,7 +3248,7 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; GFX9-LABEL: name: test_load_flat_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>)) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -3300,7 +3300,7 @@ body: | ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) + %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -3312,10 +3312,10 @@ body: | ; CI-LABEL: name: test_load_flat_v32s8_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -3397,10 +3397,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) ; VI-LABEL: name: test_load_flat_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -3482,10 +3482,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) ; GFX9-LABEL: name: test_load_flat_v32s8_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s32>) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -3582,7 +3582,7 @@ body: | ; GFX9: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) + %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -3595,18 +3595,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) ; CI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; VI-LABEL: name: test_load_flat_v2s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: test_load_flat_v2s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>)) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -3618,10 +3618,10 @@ body: | ; CI-LABEL: name: test_load_flat_v2s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3634,10 +3634,10 @@ body: | ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_flat_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3650,16 +3650,16 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_flat_v2s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 0) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 0) $vgpr0 = COPY %1 ... @@ -3671,10 +3671,10 @@ body: | ; CI-LABEL: name: test_load_flat_v2s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3687,9 +3687,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -3707,10 +3707,10 @@ body: | ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_flat_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3721,9 +3721,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -3739,10 +3739,10 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_flat_v2s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3753,9 +3753,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -3767,7 +3767,7 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 0) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 0) $vgpr0 = COPY %1 ... @@ -3779,7 +3779,7 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -3787,7 +3787,7 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -3795,14 +3795,14 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 0) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 0) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -3816,24 +3816,24 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p0) :: (load (<3 x s16>), align 4) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 4, addrspace 0) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 0) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -3847,13 +3847,13 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -3877,13 +3877,13 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -3907,13 +3907,13 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -3927,7 +3927,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 0) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 0) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -3941,10 +3941,10 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -3957,9 +3957,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -3970,9 +3970,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4000,10 +4000,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4014,9 +4014,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4025,9 +4025,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -4053,10 +4053,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4067,9 +4067,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4078,9 +4078,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -4100,7 +4100,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 0) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 0) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -4114,18 +4114,18 @@ body: | ; CI-LABEL: name: test_load_flat_v4s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v4s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v4s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -4137,18 +4137,18 @@ body: | ; CI-LABEL: name: test_load_flat_v4s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v4s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v4s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 0) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -4160,16 +4160,16 @@ body: | ; CI-LABEL: name: test_load_flat_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4190,16 +4190,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4220,16 +4220,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -4239,7 +4239,7 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 0) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -4251,10 +4251,10 @@ body: | ; CI-LABEL: name: test_load_flat_v4s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4267,9 +4267,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4286,9 +4286,9 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4298,9 +4298,9 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4318,10 +4318,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4332,9 +4332,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4349,9 +4349,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -4359,9 +4359,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -4377,10 +4377,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4391,9 +4391,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -4405,9 +4405,9 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -4415,9 +4415,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -4430,7 +4430,7 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 0) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -4442,18 +4442,21 @@ body: | ; CI-LABEL: name: test_load_flat_v8s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>), align 8) + ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; VI-LABEL: name: test_load_flat_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>), align 8) + ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; GFX9-LABEL: name: test_load_flat_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 8) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>), align 8) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4465,18 +4468,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_flat_v2s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_load_flat_v2s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -4488,18 +4491,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_flat_v2s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_load_flat_v2s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 0) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -4512,18 +4515,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_flat_v2s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_load_flat_v2s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>), align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 0) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -4535,18 +4538,18 @@ body: | ; CI-LABEL: name: test_load_flat_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_load_flat_v3s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-LABEL: name: test_load_flat_v3s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 16) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 16, addrspace 0) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -4560,18 +4563,18 @@ body: | ; CI-LABEL: name: test_load_flat_v3s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_load_flat_v3s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-LABEL: name: test_load_flat_v3s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load 12, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -4583,18 +4586,18 @@ body: | ; CI-LABEL: name: test_load_flat_v4s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_flat_v4s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_flat_v4s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>)) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4606,18 +4609,18 @@ body: | ; CI-LABEL: name: test_load_flat_v4s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_flat_v4s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_flat_v4s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 8) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 0) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4629,18 +4632,18 @@ body: | ; CI-LABEL: name: test_load_flat_v4s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_flat_v4s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_flat_v4s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4652,30 +4655,30 @@ body: | ; CI-LABEL: name: test_load_flat_v8s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; VI-LABEL: name: test_load_flat_v8s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; GFX9-LABEL: name: test_load_flat_v8s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -4687,18 +4690,48 @@ body: | ; CI-LABEL: name: test_load_flat_v16s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) + ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from unknown-address + 32, align 32) + ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from unknown-address + 48) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; VI-LABEL: name: test_load_flat_v16s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from unknown-address + 32, align 32) + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from unknown-address + 48) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; GFX9-LABEL: name: test_load_flat_v16s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from unknown-address + 32, align 32) + ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX9: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from unknown-address + 48) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 16, align 32, addrspace 0) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... @@ -4710,18 +4743,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2s64_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_flat_v2s64_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-LABEL: name: test_load_flat_v2s64_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>)) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4733,18 +4766,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_flat_v2s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-LABEL: name: test_load_flat_v2s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 8, addrspace 0) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4756,18 +4789,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_flat_v2s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-LABEL: name: test_load_flat_v2s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 4) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4779,16 +4812,16 @@ body: | ; CI-LABEL: name: test_load_flat_v2s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4806,13 +4839,13 @@ body: | ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 from unknown-address + 10) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2 from unknown-address + 12) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2 from unknown-address + 14) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4830,16 +4863,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_flat_v2s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4857,13 +4890,13 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 from unknown-address + 10) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2 from unknown-address + 12) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2 from unknown-address + 14) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4881,16 +4914,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_flat_v2s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 2 from unknown-address + 2) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 from unknown-address + 4) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 2 from unknown-address + 6) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4908,13 +4941,13 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 2 from unknown-address + 8) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 2 from unknown-address + 10) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 2 from unknown-address + 12) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 2 from unknown-address + 14) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4931,7 +4964,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 2, addrspace 0) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4943,28 +4976,28 @@ body: | ; CI-LABEL: name: test_load_flat_v2s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5011,21 +5044,21 @@ body: | ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -5071,28 +5104,28 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_flat_v2s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5131,21 +5164,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -5183,28 +5216,28 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_flat_v2s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5243,21 +5276,21 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -5294,7 +5327,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 0) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5306,10 +5339,10 @@ body: | ; CI-LABEL: name: test_load_flat_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16, align 16) + ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5317,10 +5350,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16, align 16) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5328,17 +5361,17 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16, align 16) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 0) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -5352,10 +5385,10 @@ body: | ; CI-LABEL: name: test_load_flat_v3s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 8) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16) + ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5363,10 +5396,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 8) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5374,17 +5407,17 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 8) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from unknown-address + 16) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 8, addrspace 0) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -5398,28 +5431,28 @@ body: | ; CI-LABEL: name: test_load_flat_v3s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5466,21 +5499,21 @@ body: | ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -5524,21 +5557,21 @@ body: | ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16) + ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) ; CI: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 from unknown-address + 17) + ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) ; CI: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 from unknown-address + 18) + ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) ; CI: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 from unknown-address + 19) + ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) ; CI: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 from unknown-address + 20) + ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) ; CI: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 from unknown-address + 21) + ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) ; CI: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 from unknown-address + 22) + ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) ; CI: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 from unknown-address + 23) + ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -5586,28 +5619,28 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5646,21 +5679,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -5696,21 +5729,21 @@ body: | ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 from unknown-address + 17) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 from unknown-address + 18) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 from unknown-address + 19) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 from unknown-address + 20) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 from unknown-address + 21) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 from unknown-address + 22) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 from unknown-address + 23) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -5750,28 +5783,28 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -5810,21 +5843,21 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -5860,21 +5893,21 @@ body: | ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16) + ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 from unknown-address + 17) + ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 from unknown-address + 18) + ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 from unknown-address + 19) + ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 from unknown-address + 20) + ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 from unknown-address + 21) + ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 from unknown-address + 22) + ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 from unknown-address + 23) + ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -5913,7 +5946,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 1, addrspace 0) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -5927,30 +5960,30 @@ body: | ; CI-LABEL: name: test_load_flat_v4s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v4s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v4s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -5962,30 +5995,30 @@ body: | ; CI-LABEL: name: test_load_flat_v4s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 8) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16, align 8) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16, align 8) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v4s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 8) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16, align 8) + ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16, align 8) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v4s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (s128), align 8) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16, align 8) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16, align 8) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 8, addrspace 0) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -5997,28 +6030,28 @@ body: | ; CI-LABEL: name: test_load_flat_v4s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6065,21 +6098,21 @@ body: | ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -6124,21 +6157,21 @@ body: | ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16) + ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) ; CI: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 from unknown-address + 17) + ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) ; CI: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 from unknown-address + 18) + ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) ; CI: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 from unknown-address + 19) + ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) ; CI: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 from unknown-address + 20) + ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) ; CI: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 from unknown-address + 21) + ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) ; CI: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 from unknown-address + 22) + ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) ; CI: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 from unknown-address + 23) + ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) ; CI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; CI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -6181,21 +6214,21 @@ body: | ; CI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; CI: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C11]](s64) - ; CI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load 1 from unknown-address + 24) + ; CI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) ; CI: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; CI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load 1 from unknown-address + 25) + ; CI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) ; CI: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; CI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load 1 from unknown-address + 26) + ; CI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) ; CI: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; CI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load 1 from unknown-address + 27) + ; CI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) ; CI: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; CI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load 1 from unknown-address + 28) + ; CI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) ; CI: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; CI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load 1 from unknown-address + 29) + ; CI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) ; CI: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; CI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load 1 from unknown-address + 30) + ; CI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) ; CI: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; CI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load 1 from unknown-address + 31) + ; CI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) ; CI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; CI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; CI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -6242,28 +6275,28 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v4s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6302,21 +6335,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -6353,21 +6386,21 @@ body: | ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 from unknown-address + 17) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 from unknown-address + 18) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 from unknown-address + 19) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 from unknown-address + 20) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 from unknown-address + 21) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 from unknown-address + 22) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 from unknown-address + 23) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -6402,21 +6435,21 @@ body: | ; VI: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; VI: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C10]](s64) - ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load 1 from unknown-address + 24) + ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) ; VI: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load 1 from unknown-address + 25) + ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) ; VI: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load 1 from unknown-address + 26) + ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) ; VI: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load 1 from unknown-address + 27) + ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) ; VI: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load 1 from unknown-address + 28) + ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) ; VI: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load 1 from unknown-address + 29) + ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) ; VI: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load 1 from unknown-address + 30) + ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) ; VI: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load 1 from unknown-address + 31) + ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) ; VI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) @@ -6455,28 +6488,28 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v4s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6515,21 +6548,21 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -6566,21 +6599,21 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load 1 from unknown-address + 16) + ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load 1 from unknown-address + 17) + ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load 1 from unknown-address + 18) + ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load 1 from unknown-address + 19) + ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load 1 from unknown-address + 20) + ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load 1 from unknown-address + 21) + ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load 1 from unknown-address + 22) + ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load 1 from unknown-address + 23) + ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) ; GFX9: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -6615,21 +6648,21 @@ body: | ; GFX9: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; GFX9: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C10]](s64) - ; GFX9: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load 1 from unknown-address + 24) + ; GFX9: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) ; GFX9: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; GFX9: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load 1 from unknown-address + 25) + ; GFX9: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) ; GFX9: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; GFX9: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load 1 from unknown-address + 26) + ; GFX9: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) ; GFX9: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; GFX9: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load 1 from unknown-address + 27) + ; GFX9: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) ; GFX9: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; GFX9: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load 1 from unknown-address + 28) + ; GFX9: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) ; GFX9: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; GFX9: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load 1 from unknown-address + 29) + ; GFX9: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) ; GFX9: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; GFX9: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load 1 from unknown-address + 30) + ; GFX9: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) ; GFX9: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; GFX9: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load 1 from unknown-address + 31) + ; GFX9: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) ; GFX9: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; GFX9: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; GFX9: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) @@ -6667,7 +6700,7 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 1, addrspace 0) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -6679,33 +6712,33 @@ body: | ; CI-LABEL: name: test_load_flat_v2s128_align32 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; VI-LABEL: name: test_load_flat_v2s128_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; GFX9-LABEL: name: test_load_flat_v2s128_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (s128), align 32) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from unknown-address + 16) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from unknown-address + 16) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 0) + %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -6717,21 +6750,21 @@ body: | ; CI-LABEL: name: test_load_flat_v2p1_align16 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>)) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_flat_v2p1_align16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>)) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_flat_v2p1_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>)) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 16, addrspace 0) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -6743,21 +6776,21 @@ body: | ; CI-LABEL: name: test_load_flat_v2p1_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>), align 8) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_flat_v2p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>), align 8) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_flat_v2p1_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>), align 8) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 8, addrspace 0) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -6769,21 +6802,21 @@ body: | ; CI-LABEL: name: test_load_flat_v2p1_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>), align 4) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_flat_v2p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>), align 4) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_flat_v2p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x p1>), align 4) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -6795,16 +6828,16 @@ body: | ; CI-LABEL: name: test_load_flat_v2p1_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6825,13 +6858,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6848,13 +6881,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -6871,13 +6904,13 @@ body: | ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -6897,16 +6930,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_flat_v2p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6927,13 +6960,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6950,13 +6983,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -6973,13 +7006,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -6999,16 +7032,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_flat_v2p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7029,13 +7062,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7052,13 +7085,13 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load 1 from unknown-address + 8) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load 1 from unknown-address + 9) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load 1 from unknown-address + 10) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load 1 from unknown-address + 11) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7075,13 +7108,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load 1 from unknown-address + 12) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load 1 from unknown-address + 13) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load 1 from unknown-address + 14) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load 1 from unknown-address + 15) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -7100,7 +7133,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 1, addrspace 0) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7112,18 +7145,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>)) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; VI-LABEL: name: test_load_flat_v2p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>)) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: test_load_flat_v2p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>)) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -7135,18 +7168,18 @@ body: | ; CI-LABEL: name: test_load_flat_v2p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; VI-LABEL: name: test_load_flat_v2p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: test_load_flat_v2p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load 8, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 4) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 4, addrspace 0) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -7158,16 +7191,16 @@ body: | ; CI-LABEL: name: test_load_flat_v2p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7189,13 +7222,13 @@ body: | ; CI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7215,16 +7248,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; VI-LABEL: name: test_load_flat_v2p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7246,13 +7279,13 @@ body: | ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7272,16 +7305,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-LABEL: name: test_load_flat_v2p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from unknown-address + 1) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1 from unknown-address + 2) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1 from unknown-address + 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7303,13 +7336,13 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load 1 from unknown-address + 4) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load 1 from unknown-address + 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load 1 from unknown-address + 6) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load 1 from unknown-address + 7) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7328,7 +7361,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 1, addrspace 0) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -7340,18 +7373,18 @@ body: | ; CI-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_ext_load_flat_s32_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 0) + %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -7363,18 +7396,18 @@ body: | ; CI-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_ext_load_flat_s32_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 2, align 4, addrspace 0) + %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 0) $vgpr0 = COPY %1 ... @@ -7387,21 +7420,21 @@ body: | ; CI-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -7413,21 +7446,21 @@ body: | ; CI-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -7439,21 +7472,21 @@ body: | ; CI-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -7465,7 +7498,7 @@ body: | ; CI-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -7473,7 +7506,7 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -7481,14 +7514,14 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-LABEL: name: test_ext_load_flat_s128_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7500,21 +7533,21 @@ body: | ; CI-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_2_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... @@ -7526,21 +7559,21 @@ body: | ; CI-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_flat_s64_from_1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8), align 4) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 0) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 0) $vgpr0_vgpr1 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index da62fce5b1e82..ae1937b37901c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -19,48 +19,48 @@ body: | ; SI-LABEL: name: test_load_global_s1_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-HSA-LABEL: name: test_load_global_s1_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-HSA: $vgpr0 = COPY [[AND]](s32) ; CI-MESA-LABEL: name: test_load_global_s1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-MESA: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_global_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s1_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-HSA: $vgpr0 = COPY [[AND]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-MESA: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s1) = G_LOAD %0 :: (load 1, align 1, addrspace 1) + %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 1) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -73,48 +73,48 @@ body: | ; SI-LABEL: name: test_load_global_s2_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-HSA-LABEL: name: test_load_global_s2_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-HSA: $vgpr0 = COPY [[AND]](s32) ; CI-MESA-LABEL: name: test_load_global_s2_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-MESA: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_global_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s2_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-HSA: $vgpr0 = COPY [[AND]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s2_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-MESA: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s2) = G_LOAD %0 :: (load 1, align 1, addrspace 1) + %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 1) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -132,36 +132,36 @@ body: | ; CI: $vgpr0 = COPY [[COPY1]](s32) ; SI-LABEL: name: test_load_global_s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-HSA-LABEL: name: test_load_global_s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_global_s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, align 4, addrspace 1) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -174,36 +174,36 @@ body: | ; SI-LABEL: name: test_load_global_s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-HSA-LABEL: name: test_load_global_s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_global_s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s8) = G_LOAD %0 :: (load 1, align 1, addrspace 1) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -216,36 +216,36 @@ body: | ; SI-LABEL: name: test_load_global_s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-HSA-LABEL: name: test_load_global_s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_global_s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 4, addrspace 1) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -258,36 +258,36 @@ body: | ; SI-LABEL: name: test_load_global_s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-HSA-LABEL: name: test_load_global_s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_global_s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 1) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -322,10 +322,10 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; SI-LABEL: name: test_load_global_s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -340,15 +340,15 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -363,10 +363,10 @@ body: | ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -379,15 +379,15 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -399,7 +399,7 @@ body: | ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s16) = G_LOAD %0 :: (load 2, align 1, addrspace 1) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -412,30 +412,30 @@ body: | ; SI-LABEL: name: test_load_global_s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-HSA-LABEL: name: test_load_global_s32_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32) ; CI-MESA-LABEL: name: test_load_global_s32_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_global_s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s32_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s32_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -447,10 +447,10 @@ body: | ; SI-LABEL: name: test_load_global_s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -462,14 +462,14 @@ body: | ; SI: $vgpr0 = COPY [[OR]](s32) ; CI-HSA-LABEL: name: test_load_global_s32_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32) ; CI-MESA-LABEL: name: test_load_global_s32_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -481,10 +481,10 @@ body: | ; CI-MESA: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_global_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -496,14 +496,14 @@ body: | ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s32_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 2, addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s32_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -514,7 +514,7 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: $vgpr0 = COPY [[OR]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 1) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 1) $vgpr0 = COPY %1 ... @@ -526,16 +526,16 @@ body: | ; SI-LABEL: name: test_load_global_s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -557,20 +557,20 @@ body: | ; SI: $vgpr0 = COPY [[OR2]](s32) ; CI-HSA-LABEL: name: test_load_global_s32_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32) ; CI-MESA-LABEL: name: test_load_global_s32_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -592,16 +592,16 @@ body: | ; CI-MESA: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_global_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -623,20 +623,20 @@ body: | ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s32_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 1, addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s32_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -657,7 +657,7 @@ body: | ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-MESA: $vgpr0 = COPY [[OR2]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 1) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 1) $vgpr0 = COPY %1 ... @@ -669,36 +669,36 @@ body: | ; SI-LABEL: name: test_load_global_s24_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-HSA-LABEL: name: test_load_global_s24_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s24_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_global_s24_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s24_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s24_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), align 8, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 1) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -711,36 +711,36 @@ body: | ; SI-LABEL: name: test_load_global_s24_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-HSA-LABEL: name: test_load_global_s24_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s24_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_global_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s24_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s24_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -753,10 +753,10 @@ body: | ; SI-LABEL: name: test_load_global_s24_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -790,10 +790,10 @@ body: | ; SI: $vgpr0 = COPY [[COPY5]](s32) ; CI-HSA-LABEL: name: test_load_global_s24_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] @@ -801,10 +801,10 @@ body: | ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s24_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -838,10 +838,10 @@ body: | ; CI-MESA: $vgpr0 = COPY [[COPY5]](s32) ; VI-LABEL: name: test_load_global_s24_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -871,10 +871,10 @@ body: | ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s24_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] @@ -882,10 +882,10 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s24_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, align 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -914,7 +914,7 @@ body: | ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 1) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -927,13 +927,13 @@ body: | ; SI-LABEL: name: test_load_global_s24_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -962,10 +962,10 @@ body: | ; SI: $vgpr0 = COPY [[COPY4]](s32) ; CI-HSA-LABEL: name: test_load_global_s24_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] @@ -973,13 +973,13 @@ body: | ; CI-HSA: $vgpr0 = COPY [[COPY1]](s32) ; CI-MESA-LABEL: name: test_load_global_s24_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1008,13 +1008,13 @@ body: | ; CI-MESA: $vgpr0 = COPY [[COPY4]](s32) ; VI-LABEL: name: test_load_global_s24_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1039,10 +1039,10 @@ body: | ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s24_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] @@ -1050,13 +1050,13 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s24_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1080,7 +1080,7 @@ body: | ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 1) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -1100,48 +1100,48 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; SI-LABEL: name: test_load_global_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; CI-HSA-LABEL: name: test_load_global_s48_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CI-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; CI-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64) ; CI-MESA-LABEL: name: test_load_global_s48_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CI-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; CI-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) ; VI-LABEL: name: test_load_global_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s48_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX9-HSA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[AND]](s64) ; GFX9-MESA-LABEL: name: test_load_global_s48_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 1) + %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 1) %2:_(s64) = G_ZEXT %1 $vgpr0_vgpr1 = COPY %2 ... @@ -1154,30 +1154,30 @@ body: | ; SI-LABEL: name: test_load_global_s64_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-HSA-LABEL: name: test_load_global_s64_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-MESA-LABEL: name: test_load_global_s64_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; VI-LABEL: name: test_load_global_s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s64_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-MESA-LABEL: name: test_load_global_s64_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -1189,30 +1189,30 @@ body: | ; SI-LABEL: name: test_load_global_s64_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-HSA-LABEL: name: test_load_global_s64_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-MESA-LABEL: name: test_load_global_s64_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; VI-LABEL: name: test_load_global_s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s64_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-MESA-LABEL: name: test_load_global_s64_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -1224,16 +1224,16 @@ body: | ; SI-LABEL: name: test_load_global_s64_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1252,20 +1252,20 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-HSA-LABEL: name: test_load_global_s64_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-MESA-LABEL: name: test_load_global_s64_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1284,16 +1284,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_global_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1312,20 +1312,20 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s64_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 2, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-MESA-LABEL: name: test_load_global_s64_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1343,7 +1343,7 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -1355,28 +1355,28 @@ body: | ; SI-LABEL: name: test_load_global_s64_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1424,32 +1424,32 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-HSA-LABEL: name: test_load_global_s64_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-MESA-LABEL: name: test_load_global_s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1497,28 +1497,28 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_global_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1558,32 +1558,32 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-HSA-LABEL: name: test_load_global_s64_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-MESA-LABEL: name: test_load_global_s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1622,7 +1622,7 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -1634,37 +1634,37 @@ body: | ; SI-LABEL: name: test_load_global_s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-MESA-LABEL: name: test_load_global_s96_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_global_s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-HSA-LABEL: name: test_load_global_s96_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-MESA-LABEL: name: test_load_global_s96_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 16, addrspace 1) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1676,41 +1676,41 @@ body: | ; SI-LABEL: name: test_load_global_s96_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, align 8, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, align 8, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 8, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-MESA-LABEL: name: test_load_global_s96_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 8, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_global_s96_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 8, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-HSA-LABEL: name: test_load_global_s96_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 8, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-MESA-LABEL: name: test_load_global_s96_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 8, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 1) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1722,41 +1722,41 @@ body: | ; SI-LABEL: name: test_load_global_s96_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-MESA-LABEL: name: test_load_global_s96_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_global_s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-HSA-LABEL: name: test_load_global_s96_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-MESA-LABEL: name: test_load_global_s96_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1768,10 +1768,10 @@ body: | ; SI-LABEL: name: test_load_global_s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1782,9 +1782,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1793,9 +1793,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1807,15 +1807,15 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 2, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-MESA-LABEL: name: test_load_global_s96_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1826,9 +1826,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1837,9 +1837,9 @@ body: | ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1851,10 +1851,10 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_global_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1865,9 +1865,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1876,9 +1876,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1890,15 +1890,15 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-HSA-LABEL: name: test_load_global_s96_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 2, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-MESA-LABEL: name: test_load_global_s96_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1909,9 +1909,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1920,9 +1920,9 @@ body: | ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1933,7 +1933,7 @@ body: | ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 1) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1945,16 +1945,16 @@ body: | ; SI-LABEL: name: test_load_global_s96_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1975,13 +1975,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1998,13 +1998,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2024,21 +2024,21 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 1, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-MESA-LABEL: name: test_load_global_s96_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2059,13 +2059,13 @@ body: | ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2082,13 +2082,13 @@ body: | ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2108,16 +2108,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_global_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2138,13 +2138,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2161,13 +2161,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2187,21 +2187,21 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-HSA-LABEL: name: test_load_global_s96_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 1, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-MESA-LABEL: name: test_load_global_s96_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2222,13 +2222,13 @@ body: | ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2245,13 +2245,13 @@ body: | ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2270,7 +2270,7 @@ body: | ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 1) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -2287,66 +2287,66 @@ body: | ; CI: S_NOP 0, implicit [[TRUNC]](s160) ; SI-LABEL: name: test_load_global_s160_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; SI: S_NOP 0, implicit [[BITCAST]](s160) ; CI-HSA-LABEL: name: test_load_global_s160_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-HSA: S_NOP 0, implicit [[BITCAST]](s160) ; CI-MESA-LABEL: name: test_load_global_s160_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; CI-MESA: S_NOP 0, implicit [[BITCAST]](s160) ; VI-LABEL: name: test_load_global_s160_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; VI: S_NOP 0, implicit [[BITCAST]](s160) ; GFX9-HSA-LABEL: name: test_load_global_s160_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; GFX9-HSA: S_NOP 0, implicit [[BITCAST]](s160) ; GFX9-MESA-LABEL: name: test_load_global_s160_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 16, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 16, addrspace 1) ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[LOAD1]](s32) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[BUILD_VECTOR]](<5 x s32>) ; GFX9-MESA: S_NOP 0, implicit [[BITCAST]](s160) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s160) = G_LOAD %0 :: (load 20, align 4, addrspace 1) + %1:_(s160) = G_LOAD %0 :: (load (s160), align 4, addrspace 1) S_NOP 0, implicit %1 ... @@ -2358,13 +2358,13 @@ body: | ; SI-LABEL: name: test_load_global_s224_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, align 4, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 4, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 4 from unknown-address + 24, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 24, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2380,10 +2380,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; CI-HSA-LABEL: name: test_load_global_s224_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 16, align 4, addrspace 1) ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-HSA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2399,10 +2399,10 @@ body: | ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; CI-MESA-LABEL: name: test_load_global_s224_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 16, align 4, addrspace 1) ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-MESA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2418,10 +2418,10 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_global_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 16, align 4, addrspace 1) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2437,10 +2437,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-HSA-LABEL: name: test_load_global_s224_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 16, align 4, addrspace 1) ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-HSA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2456,10 +2456,10 @@ body: | ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-MESA-LABEL: name: test_load_global_s224_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 16, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 16, align 4, addrspace 1) ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-MESA: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF @@ -2474,7 +2474,7 @@ body: | ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s224) = G_LOAD %0 :: (load 28, align 4, addrspace 1) + %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 1) %2:_(s256) = G_IMPLICIT_DEF %3:_(s256) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -2489,36 +2489,36 @@ body: | ; SI-LABEL: name: test_load_global_s128_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-HSA-LABEL: name: test_load_global_s128_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-MESA-LABEL: name: test_load_global_s128_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_global_s128_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-HSA-LABEL: name: test_load_global_s128_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-MESA-LABEL: name: test_load_global_s128_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 16, addrspace 1) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -2534,36 +2534,36 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) ; SI-LABEL: name: test_load_global_s128_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-HSA-LABEL: name: test_load_global_s128_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-MESA-LABEL: name: test_load_global_s128_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_global_s128_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-HSA-LABEL: name: test_load_global_s128_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-MESA-LABEL: name: test_load_global_s128_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -2575,16 +2575,16 @@ body: | ; SI-LABEL: name: test_load_global_s128_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2605,13 +2605,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2628,13 +2628,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2651,13 +2651,13 @@ body: | ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2677,21 +2677,21 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-HSA-LABEL: name: test_load_global_s128_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 1, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-MESA-LABEL: name: test_load_global_s128_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2712,13 +2712,13 @@ body: | ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2735,13 +2735,13 @@ body: | ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2758,13 +2758,13 @@ body: | ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2784,16 +2784,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_global_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2814,13 +2814,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2837,13 +2837,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2860,13 +2860,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2886,21 +2886,21 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-HSA-LABEL: name: test_load_global_s128_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128), align 1, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-MESA-LABEL: name: test_load_global_s128_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2921,13 +2921,13 @@ body: | ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2944,13 +2944,13 @@ body: | ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2967,13 +2967,13 @@ body: | ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2992,7 +2992,7 @@ body: | ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 1) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -3004,36 +3004,36 @@ body: | ; SI-LABEL: name: test_load_global_s256_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (s256), align 16, addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; CI-HSA-LABEL: name: test_load_global_s256_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (s256), align 16, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; CI-MESA-LABEL: name: test_load_global_s256_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (s256), align 16, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; VI-LABEL: name: test_load_global_s256_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (s256), align 16, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; GFX9-HSA-LABEL: name: test_load_global_s256_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (s256), align 16, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) ; GFX9-MESA-LABEL: name: test_load_global_s256_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (s256), align 16, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s256) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](s256) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_LOAD %0 :: (load 32, align 16, addrspace 1) + %1:_(s256) = G_LOAD %0 :: (load (s256), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -3045,30 +3045,30 @@ body: | ; SI-LABEL: name: test_load_global_p1_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-HSA-LABEL: name: test_load_global_p1_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-MESA-LABEL: name: test_load_global_p1_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; VI-LABEL: name: test_load_global_p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-HSA-LABEL: name: test_load_global_p1_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-MESA-LABEL: name: test_load_global_p1_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -3080,30 +3080,30 @@ body: | ; SI-LABEL: name: test_load_global_p1_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-HSA-LABEL: name: test_load_global_p1_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-MESA-LABEL: name: test_load_global_p1_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; VI-LABEL: name: test_load_global_p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-HSA-LABEL: name: test_load_global_p1_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-MESA-LABEL: name: test_load_global_p1_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 1) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -3115,28 +3115,28 @@ body: | ; SI-LABEL: name: test_load_global_p1_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -3184,32 +3184,32 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-HSA-LABEL: name: test_load_global_p1_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-MESA-LABEL: name: test_load_global_p1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -3257,28 +3257,28 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_global_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -3318,32 +3318,32 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-HSA-LABEL: name: test_load_global_p1_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load (p1), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-MESA-LABEL: name: test_load_global_p1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -3382,7 +3382,7 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 1) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -3394,30 +3394,30 @@ body: | ; SI-LABEL: name: test_load_global_p3_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](p3) ; CI-HSA-LABEL: name: test_load_global_p3_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](p3) ; CI-MESA-LABEL: name: test_load_global_p3_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; CI-MESA: $vgpr0 = COPY [[LOAD]](p3) ; VI-LABEL: name: test_load_global_p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; VI: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-HSA-LABEL: name: test_load_global_p3_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-MESA-LABEL: name: test_load_global_p3_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load (p3), addrspace 1) ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](p3) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -3433,30 +3433,30 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; SI-LABEL: name: test_load_global_p4_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; CI-HSA-LABEL: name: test_load_global_p4_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; CI-MESA-LABEL: name: test_load_global_p4_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; VI-LABEL: name: test_load_global_p4_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-HSA-LABEL: name: test_load_global_p4_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-MESA-LABEL: name: test_load_global_p4_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -3468,30 +3468,30 @@ body: | ; SI-LABEL: name: test_load_global_p4_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; CI-HSA-LABEL: name: test_load_global_p4_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; CI-MESA-LABEL: name: test_load_global_p4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; VI-LABEL: name: test_load_global_p4_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-HSA-LABEL: name: test_load_global_p4_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-MESA-LABEL: name: test_load_global_p4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 4, addrspace 1) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -3503,16 +3503,16 @@ body: | ; SI-LABEL: name: test_load_global_p4_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3531,20 +3531,20 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; CI-HSA-LABEL: name: test_load_global_p4_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; CI-MESA-LABEL: name: test_load_global_p4_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3563,16 +3563,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_global_p4_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3591,20 +3591,20 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-HSA-LABEL: name: test_load_global_p4_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 2, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-MESA-LABEL: name: test_load_global_p4_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3622,7 +3622,7 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 2, addrspace 1) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -3634,28 +3634,28 @@ body: | ; SI-LABEL: name: test_load_global_p4_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -3703,32 +3703,32 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; CI-HSA-LABEL: name: test_load_global_p4_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; CI-MESA-LABEL: name: test_load_global_p4_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -3776,28 +3776,28 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) ; VI-LABEL: name: test_load_global_p4_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -3837,32 +3837,32 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p4) ; GFX9-HSA-LABEL: name: test_load_global_p4_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](p4) ; GFX9-MESA-LABEL: name: test_load_global_p4_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -3901,7 +3901,7 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[MV]](p4) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p4) = G_LOAD %0 :: (load 8, align 1, addrspace 1) + %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -3913,30 +3913,30 @@ body: | ; SI-LABEL: name: test_load_global_p5_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](p5) ; CI-HSA-LABEL: name: test_load_global_p5_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](p5) ; CI-MESA-LABEL: name: test_load_global_p5_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; CI-MESA: $vgpr0 = COPY [[LOAD]](p5) ; VI-LABEL: name: test_load_global_p5_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; VI: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-HSA-LABEL: name: test_load_global_p5_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-MESA-LABEL: name: test_load_global_p5_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), addrspace 1) ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](p5) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -3948,10 +3948,10 @@ body: | ; SI-LABEL: name: test_load_global_p5_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3964,14 +3964,14 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-HSA-LABEL: name: test_load_global_p5_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](p5) ; CI-MESA-LABEL: name: test_load_global_p5_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3984,10 +3984,10 @@ body: | ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_global_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4000,14 +4000,14 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-HSA-LABEL: name: test_load_global_p5_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 2, addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-MESA-LABEL: name: test_load_global_p5_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4019,7 +4019,7 @@ body: | ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 1) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 1) $vgpr0 = COPY %1 ... @@ -4031,16 +4031,16 @@ body: | ; SI-LABEL: name: test_load_global_p5_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4063,20 +4063,20 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-HSA-LABEL: name: test_load_global_p5_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](p5) ; CI-MESA-LABEL: name: test_load_global_p5_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4099,16 +4099,16 @@ body: | ; CI-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_global_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4131,20 +4131,20 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-HSA-LABEL: name: test_load_global_p5_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p1) :: (load (p5), align 1, addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-MESA-LABEL: name: test_load_global_p5_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4166,7 +4166,7 @@ body: | ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9-MESA: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 1) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 1) $vgpr0 = COPY %1 ... @@ -4178,7 +4178,7 @@ body: | ; SI-LABEL: name: test_load_global_v2s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4199,7 +4199,7 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), align 4, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4220,7 +4220,7 @@ body: | ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), align 4, addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4241,7 +4241,7 @@ body: | ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4260,7 +4260,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), align 4, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4279,7 +4279,7 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), align 4, addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4297,7 +4297,7 @@ body: | ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 4, addrspace 1) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 4, addrspace 1) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -4311,7 +4311,7 @@ body: | ; SI-LABEL: name: test_load_global_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4332,7 +4332,7 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4353,7 +4353,7 @@ body: | ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4374,7 +4374,7 @@ body: | ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4393,7 +4393,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4412,7 +4412,7 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4430,7 +4430,7 @@ body: | ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 1) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 1) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -4444,10 +4444,10 @@ body: | ; SI-LABEL: name: test_load_global_v2s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4462,7 +4462,7 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), align 1, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4483,10 +4483,10 @@ body: | ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4501,10 +4501,10 @@ body: | ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_v2s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4517,7 +4517,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<2 x s8>), align 1, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4536,10 +4536,10 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -4551,7 +4551,7 @@ body: | ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 1) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 1) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -4565,42 +4565,42 @@ body: | ; SI-LABEL: name: test_load_global_v3s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 4, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v3s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 4, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v3s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 4, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_global_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 4, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 4, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 4, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 4, addrspace 1) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 4, addrspace 1) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -4615,42 +4615,42 @@ body: | ; SI-LABEL: name: test_load_global_v3s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 1, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v3s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 1, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v3s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 1, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_global_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 1, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 1, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9-HSA: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v3s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load 3, align 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p1) :: (load (<3 x s8>), align 1, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9-MESA: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 1, addrspace 1) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 1) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -4664,7 +4664,7 @@ body: | ; SI-LABEL: name: test_load_global_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4680,7 +4680,7 @@ body: | ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4696,7 +4696,7 @@ body: | ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4712,7 +4712,7 @@ body: | ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_global_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4728,7 +4728,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4746,7 +4746,7 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4763,7 +4763,7 @@ body: | ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -4794,10 +4794,10 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; SI-LABEL: name: test_load_global_v4s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4816,7 +4816,7 @@ body: | ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), align 2, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4832,10 +4832,10 @@ body: | ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4854,10 +4854,10 @@ body: | ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_global_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4876,7 +4876,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), align 2, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4894,10 +4894,10 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4917,7 +4917,7 @@ body: | ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 1) $vgpr0 = COPY %1 ... @@ -4929,16 +4929,16 @@ body: | ; SI-LABEL: name: test_load_global_v4s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -4948,7 +4948,7 @@ body: | ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-HSA-LABEL: name: test_load_global_v4s8_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), align 1, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4964,16 +4964,16 @@ body: | ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-MESA-LABEL: name: test_load_global_v4s8_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -4983,16 +4983,16 @@ body: | ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_global_v4s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -5002,7 +5002,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<4 x s8>), align 1, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -5020,16 +5020,16 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v4s8_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -5040,7 +5040,7 @@ body: | ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 1) $vgpr0 = COPY %1 ... @@ -5052,7 +5052,7 @@ body: | ; SI-LABEL: name: test_load_global_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s8>), addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5079,7 +5079,7 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; CI-HSA-LABEL: name: test_load_global_v8s8_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s8>), addrspace 1) ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5106,7 +5106,7 @@ body: | ; CI-HSA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; CI-MESA-LABEL: name: test_load_global_v8s8_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s8>), addrspace 1) ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5133,7 +5133,7 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; VI-LABEL: name: test_load_global_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s8>), addrspace 1) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5160,7 +5160,7 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s8>), addrspace 1) ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5191,7 +5191,7 @@ body: | ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v8s8_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s8>), addrspace 1) ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5221,7 +5221,7 @@ body: | ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -5233,7 +5233,7 @@ body: | ; SI-LABEL: name: test_load_global_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s8>), addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5278,7 +5278,7 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; CI-HSA-LABEL: name: test_load_global_v16s8_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s8>), addrspace 1) ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5323,7 +5323,7 @@ body: | ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; CI-MESA-LABEL: name: test_load_global_v16s8_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s8>), addrspace 1) ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5368,7 +5368,7 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; VI-LABEL: name: test_load_global_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s8>), addrspace 1) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5413,7 +5413,7 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s8>), addrspace 1) ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5466,7 +5466,7 @@ body: | ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v16s8_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s8>), addrspace 1) ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5518,7 +5518,7 @@ body: | ; GFX9-MESA: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 16, addrspace 1) + %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5530,7 +5530,7 @@ body: | ; SI-LABEL: name: test_load_global_v32s8_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<32 x s8>), addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5611,7 +5611,7 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) ; CI-HSA-LABEL: name: test_load_global_v32s8_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<32 x s8>), addrspace 1) ; CI-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5692,7 +5692,7 @@ body: | ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) ; CI-MESA-LABEL: name: test_load_global_v32s8_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<32 x s8>), addrspace 1) ; CI-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5773,7 +5773,7 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) ; VI-LABEL: name: test_load_global_v32s8_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<32 x s8>), addrspace 1) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5854,7 +5854,7 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<32 x s8>) ; GFX9-HSA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<32 x s8>), addrspace 1) ; GFX9-HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -5951,7 +5951,7 @@ body: | ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) ; GFX9-MESA-LABEL: name: test_load_global_v32s8_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<32 x s8>), addrspace 1) ; GFX9-MESA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -6047,7 +6047,7 @@ body: | ; GFX9-MESA: [[CONCAT_VECTORS8:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>), [[TRUNC4]](<4 x s8>), [[TRUNC5]](<4 x s8>), [[TRUNC6]](<4 x s8>), [[TRUNC7]](<4 x s8>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS8]](<32 x s8>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<32 x s8>) = G_LOAD %0 :: (load 32, align 32, addrspace 1) + %1:_(<32 x s8>) = G_LOAD %0 :: (load (<32 x s8>), align 32, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -6060,30 +6060,30 @@ body: | ; SI-LABEL: name: test_load_global_v2s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v2s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v2s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-MESA: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; VI-LABEL: name: test_load_global_v2s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -6095,10 +6095,10 @@ body: | ; SI-LABEL: name: test_load_global_v2s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6111,14 +6111,14 @@ body: | ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v2s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v2s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6131,10 +6131,10 @@ body: | ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_global_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6147,20 +6147,20 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 1) $vgpr0 = COPY %1 ... @@ -6209,10 +6209,10 @@ body: | ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; SI-LABEL: name: test_load_global_v2s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6225,9 +6225,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6245,14 +6245,14 @@ body: | ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v2s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v2s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6265,9 +6265,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6285,10 +6285,10 @@ body: | ; CI-MESA: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_global_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6299,9 +6299,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -6317,14 +6317,14 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v2s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6335,9 +6335,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -6349,7 +6349,7 @@ body: | ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 1) $vgpr0 = COPY %1 ... @@ -6361,7 +6361,7 @@ body: | ; SI-LABEL: name: test_load_global_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -6369,7 +6369,7 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -6377,7 +6377,7 @@ body: | ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -6385,7 +6385,7 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -6393,7 +6393,7 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -6401,14 +6401,14 @@ body: | ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 1) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 1) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -6428,42 +6428,42 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; SI-LABEL: name: test_load_global_v3s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 4, addrspace 1) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -6477,13 +6477,13 @@ body: | ; SI-LABEL: name: test_load_global_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -6507,19 +6507,19 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 2, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -6543,13 +6543,13 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -6573,19 +6573,19 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 2, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -6599,7 +6599,7 @@ body: | ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 1) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 1) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -6613,10 +6613,10 @@ body: | ; SI-LABEL: name: test_load_global_v3s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6629,9 +6629,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6642,9 +6642,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6672,16 +6672,16 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 1, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6694,9 +6694,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6707,9 +6707,9 @@ body: | ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6737,10 +6737,10 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6751,9 +6751,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -6762,9 +6762,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -6790,16 +6790,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 1, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6810,9 +6810,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -6821,9 +6821,9 @@ body: | ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -6843,7 +6843,7 @@ body: | ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 1) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 1) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -6857,30 +6857,30 @@ body: | ; SI-LABEL: name: test_load_global_v4s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v4s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v4s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; VI-LABEL: name: test_load_global_v4s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -6892,30 +6892,30 @@ body: | ; SI-LABEL: name: test_load_global_v4s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v4s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v4s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; VI-LABEL: name: test_load_global_v4s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 1) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -6927,16 +6927,16 @@ body: | ; SI-LABEL: name: test_load_global_v4s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6957,20 +6957,20 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v4s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v4s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6991,16 +6991,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_global_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7021,20 +7021,20 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 2, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -7044,7 +7044,7 @@ body: | ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 1) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -7056,10 +7056,10 @@ body: | ; SI-LABEL: name: test_load_global_v4s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -7072,9 +7072,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7091,9 +7091,9 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7103,9 +7103,9 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7123,14 +7123,14 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v4s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v4s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -7143,9 +7143,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7162,9 +7162,9 @@ body: | ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7174,9 +7174,9 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7194,10 +7194,10 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_global_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -7208,9 +7208,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -7225,9 +7225,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -7235,9 +7235,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -7253,14 +7253,14 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v4s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -7271,9 +7271,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -7285,9 +7285,9 @@ body: | ; GFX9-MESA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -7295,9 +7295,9 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -7310,7 +7310,7 @@ body: | ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 1) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -7322,7 +7322,7 @@ body: | ; SI-LABEL: name: test_load_global_v5s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7365,7 +7365,7 @@ body: | ; SI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7408,7 +7408,7 @@ body: | ; CI-HSA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7451,7 +7451,7 @@ body: | ; CI-MESA: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7494,7 +7494,7 @@ body: | ; VI: $vgpr2 = COPY [[BITCAST7]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7524,7 +7524,7 @@ body: | ; GFX9-HSA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7553,7 +7553,7 @@ body: | ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 16, addrspace 1) + %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 16, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -7571,10 +7571,10 @@ body: | ; SI-LABEL: name: test_load_global_v5s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 8, align 8, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7615,7 +7615,7 @@ body: | ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7627,7 +7627,7 @@ body: | ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7639,7 +7639,7 @@ body: | ; CI-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7651,7 +7651,7 @@ body: | ; VI: $vgpr2 = COPY [[UV8]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7663,7 +7663,7 @@ body: | ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 8, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7674,7 +7674,7 @@ body: | ; GFX9-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 8, addrspace 1) + %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 8, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -7692,10 +7692,10 @@ body: | ; SI-LABEL: name: test_load_global_v5s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 8, align 4, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7736,7 +7736,7 @@ body: | ; SI: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7748,7 +7748,7 @@ body: | ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7760,7 +7760,7 @@ body: | ; CI-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7772,7 +7772,7 @@ body: | ; VI: $vgpr2 = COPY [[UV8]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7784,7 +7784,7 @@ body: | ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 4, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7795,7 +7795,7 @@ body: | ; GFX9-MESA: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-MESA: $vgpr2 = COPY [[UV8]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 4, addrspace 1) + %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 4, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -7813,19 +7813,19 @@ body: | ; SI-LABEL: name: test_load_global_v5s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7861,7 +7861,7 @@ body: | ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 2, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7873,19 +7873,19 @@ body: | ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CI-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7921,19 +7921,19 @@ body: | ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -7969,7 +7969,7 @@ body: | ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 2, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -7981,19 +7981,19 @@ body: | ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF @@ -8015,7 +8015,7 @@ body: | ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 2, addrspace 1) + %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 2, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -8033,10 +8033,10 @@ body: | ; SI-LABEL: name: test_load_global_v5s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -8049,9 +8049,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8062,9 +8062,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8074,9 +8074,9 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8087,9 +8087,9 @@ body: | ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8128,7 +8128,7 @@ body: | ; SI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v5s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 1, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -8140,10 +8140,10 @@ body: | ; CI-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v5s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -8156,9 +8156,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8169,9 +8169,9 @@ body: | ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8181,9 +8181,9 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8194,9 +8194,9 @@ body: | ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8235,10 +8235,10 @@ body: | ; CI-MESA: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_load_global_v5s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -8249,9 +8249,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -8260,9 +8260,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -8270,9 +8270,9 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -8281,9 +8281,9 @@ body: | ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -8320,7 +8320,7 @@ body: | ; VI: $vgpr2 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load 10, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[COPY]](p1) :: (load (<5 x s16>), align 1, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) @@ -8332,10 +8332,10 @@ body: | ; GFX9-HSA: $vgpr2 = COPY [[UV8]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v5s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -8346,9 +8346,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -8357,9 +8357,9 @@ body: | ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -8367,9 +8367,9 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -8378,9 +8378,9 @@ body: | ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -8408,7 +8408,7 @@ body: | ; GFX9-MESA: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<5 x s16>) = G_LOAD %0 :: (load 10, align 1, addrspace 1) + %1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 1, addrspace 1) %2:_(<5 x s16>) = G_IMPLICIT_DEF %3:_(<10 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -8426,37 +8426,37 @@ body: | ; SI-LABEL: name: test_load_global_v6s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 16, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-MESA-LABEL: name: test_load_global_v6s16_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 16, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; VI-LABEL: name: test_load_global_v6s16_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 16, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 16, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 16, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load 12, align 16, addrspace 1) + %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -8468,41 +8468,41 @@ body: | ; SI-LABEL: name: test_load_global_v6s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, align 8, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, align 8, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 8, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-MESA-LABEL: name: test_load_global_v6s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 8, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; VI-LABEL: name: test_load_global_v6s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 8, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 8, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 8, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load 12, align 8, addrspace 1) + %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 8, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -8514,41 +8514,41 @@ body: | ; SI-LABEL: name: test_load_global_v6s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-MESA-LABEL: name: test_load_global_v6s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; VI-LABEL: name: test_load_global_v6s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 4, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -8560,10 +8560,10 @@ body: | ; SI-LABEL: name: test_load_global_v6s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8574,9 +8574,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -8585,9 +8585,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8599,15 +8599,15 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 2, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-MESA-LABEL: name: test_load_global_v6s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8618,9 +8618,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -8629,9 +8629,9 @@ body: | ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8643,10 +8643,10 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; VI-LABEL: name: test_load_global_v6s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8657,9 +8657,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -8668,9 +8668,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8682,15 +8682,15 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 2, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8701,9 +8701,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -8712,9 +8712,9 @@ body: | ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8725,7 +8725,7 @@ body: | ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load 12, align 2, addrspace 1) + %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 2, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -8737,16 +8737,16 @@ body: | ; SI-LABEL: name: test_load_global_v6s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8767,13 +8767,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8790,13 +8790,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8816,21 +8816,21 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 1, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-MESA-LABEL: name: test_load_global_v6s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8851,13 +8851,13 @@ body: | ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8874,13 +8874,13 @@ body: | ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8900,16 +8900,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; VI-LABEL: name: test_load_global_v6s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8930,13 +8930,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8953,13 +8953,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8979,21 +8979,21 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v6s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<6 x s16>), align 1, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v6s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -9014,13 +9014,13 @@ body: | ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -9037,13 +9037,13 @@ body: | ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -9062,7 +9062,7 @@ body: | ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (load 12, align 1, addrspace 1) + %1:_(<6 x s16>) = G_LOAD %0 :: (load (<6 x s16>), align 1, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -9074,7 +9074,7 @@ body: | ; SI-LABEL: name: test_load_global_v7s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF @@ -9126,7 +9126,7 @@ body: | ; SI: $vgpr3 = COPY [[BITCAST9]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF @@ -9178,7 +9178,7 @@ body: | ; CI-HSA: $vgpr3 = COPY [[BITCAST9]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF @@ -9230,7 +9230,7 @@ body: | ; CI-MESA: $vgpr3 = COPY [[BITCAST9]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF @@ -9282,7 +9282,7 @@ body: | ; VI: $vgpr3 = COPY [[BITCAST9]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF @@ -9317,7 +9317,7 @@ body: | ; GFX9-HSA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF @@ -9351,7 +9351,7 @@ body: | ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 16, addrspace 1) + %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 16, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -9370,7 +9370,7 @@ body: | ; SI-LABEL: name: test_load_global_v7s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9383,7 +9383,7 @@ body: | ; SI: $vgpr3 = COPY [[UV11]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9396,7 +9396,7 @@ body: | ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9409,7 +9409,7 @@ body: | ; CI-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9422,7 +9422,7 @@ body: | ; VI: $vgpr3 = COPY [[UV11]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9435,7 +9435,7 @@ body: | ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 8, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9447,7 +9447,7 @@ body: | ; GFX9-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>) ; GFX9-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 8, addrspace 1) + %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 8, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -9466,7 +9466,7 @@ body: | ; SI-LABEL: name: test_load_global_v7s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9479,7 +9479,7 @@ body: | ; SI: $vgpr3 = COPY [[UV11]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9492,7 +9492,7 @@ body: | ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9505,7 +9505,7 @@ body: | ; CI-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9518,7 +9518,7 @@ body: | ; VI: $vgpr3 = COPY [[UV11]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9531,7 +9531,7 @@ body: | ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 4, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9543,7 +9543,7 @@ body: | ; GFX9-MESA: $vgpr2 = COPY [[UV10]](<2 x s16>) ; GFX9-MESA: $vgpr3 = COPY [[UV11]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 4, addrspace 1) + %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 4, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -9562,23 +9562,23 @@ body: | ; SI-LABEL: name: test_load_global_v7s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -9730,7 +9730,7 @@ body: | ; SI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 2, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -9743,23 +9743,23 @@ body: | ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -9911,23 +9911,23 @@ body: | ; CI-MESA: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -10079,7 +10079,7 @@ body: | ; VI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 2, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -10092,23 +10092,23 @@ body: | ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -10201,7 +10201,7 @@ body: | ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 2, addrspace 1) + %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 2, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -10220,10 +10220,10 @@ body: | ; SI-LABEL: name: test_load_global_v7s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10236,9 +10236,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10249,9 +10249,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10261,9 +10261,9 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10274,9 +10274,9 @@ body: | ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10286,9 +10286,9 @@ body: | ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10298,9 +10298,9 @@ body: | ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10453,7 +10453,7 @@ body: | ; SI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) ; CI-HSA-LABEL: name: test_load_global_v7s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 1, addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -10466,10 +10466,10 @@ body: | ; CI-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; CI-MESA-LABEL: name: test_load_global_v7s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10482,9 +10482,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10495,9 +10495,9 @@ body: | ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10507,9 +10507,9 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) ; CI-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10520,9 +10520,9 @@ body: | ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC7]] ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10532,9 +10532,9 @@ body: | ; CI-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) ; CI-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[TRUNC9]] ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10544,9 +10544,9 @@ body: | ; CI-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) ; CI-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[TRUNC11]] ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10699,10 +10699,10 @@ body: | ; CI-MESA: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) ; VI-LABEL: name: test_load_global_v7s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10713,9 +10713,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -10724,9 +10724,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -10734,9 +10734,9 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -10745,9 +10745,9 @@ body: | ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -10755,9 +10755,9 @@ body: | ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) @@ -10765,9 +10765,9 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16) ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) @@ -10918,7 +10918,7 @@ body: | ; VI: $vgpr3 = COPY [[BITCAST27]](<2 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load 14, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[COPY]](p1) :: (load (<7 x s16>), align 1, addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<56 x s16>) = G_CONCAT_VECTORS [[DEF]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>), [[DEF1]](<8 x s16>) @@ -10931,10 +10931,10 @@ body: | ; GFX9-HSA: $vgpr3 = COPY [[UV11]](<2 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v7s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10945,9 +10945,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -10956,9 +10956,9 @@ body: | ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -10966,9 +10966,9 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX9-MESA: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -10977,9 +10977,9 @@ body: | ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -10987,9 +10987,9 @@ body: | ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] ; GFX9-MESA: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32) @@ -10997,9 +10997,9 @@ body: | ; GFX9-MESA: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16) ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; GFX9-MESA: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] ; GFX9-MESA: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32) @@ -11098,7 +11098,7 @@ body: | ; GFX9-MESA: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) ; GFX9-MESA: $vgpr3 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<7 x s16>) = G_LOAD %0 :: (load 14, align 1, addrspace 1) + %1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 1, addrspace 1) %2:_(<7 x s16>) = G_IMPLICIT_DEF %3:_(<14 x s16>) = G_CONCAT_VECTORS %1, %2 %4:_(<2 x s16>), %5:_(<2 x s16>), %6:_(<2 x s16>), %7:_(<2 x s16>), %8:_(<2 x s16>), %9:_(<2 x s16>), %10:_(<2 x s16>) = G_UNMERGE_VALUES %3 @@ -11117,36 +11117,36 @@ body: | ; SI-LABEL: name: test_load_global_v8s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; CI-HSA-LABEL: name: test_load_global_v8s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_global_v8s16_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; VI-LABEL: name: test_load_global_v8s16_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = G_LOAD %0 :: (load 16, align 16, addrspace 1) + %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -11158,36 +11158,36 @@ body: | ; SI-LABEL: name: test_load_global_v8s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 8, addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; CI-HSA-LABEL: name: test_load_global_v8s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 8, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; CI-MESA-LABEL: name: test_load_global_v8s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 8, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; VI-LABEL: name: test_load_global_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 8, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 8, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v8s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 8, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s16>) = G_LOAD %0 :: (load 16, align 8, addrspace 1) + %1:_(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -11199,30 +11199,30 @@ body: | ; SI-LABEL: name: test_load_global_v2s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-HSA-LABEL: name: test_load_global_v2s32_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-MESA-LABEL: name: test_load_global_v2s32_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_global_v2s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -11234,30 +11234,30 @@ body: | ; SI-LABEL: name: test_load_global_v2s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-HSA-LABEL: name: test_load_global_v2s32_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-MESA-LABEL: name: test_load_global_v2s32_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_global_v2s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -11269,10 +11269,10 @@ body: | ; SI-LABEL: name: test_load_global_v2s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -11283,9 +11283,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -11296,14 +11296,14 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-HSA-LABEL: name: test_load_global_v2s32_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-MESA-LABEL: name: test_load_global_v2s32_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -11314,9 +11314,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -11327,10 +11327,10 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_global_v2s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -11341,9 +11341,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -11354,14 +11354,14 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 2, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -11372,9 +11372,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -11384,7 +11384,7 @@ body: | ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 2, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -11396,16 +11396,16 @@ body: | ; SI-LABEL: name: test_load_global_v2s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11426,13 +11426,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11451,20 +11451,20 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-HSA-LABEL: name: test_load_global_v2s32_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-MESA-LABEL: name: test_load_global_v2s32_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11485,13 +11485,13 @@ body: | ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11510,16 +11510,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_global_v2s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11540,13 +11540,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11565,20 +11565,20 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v2s32_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s32>), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v2s32_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11599,13 +11599,13 @@ body: | ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11623,7 +11623,7 @@ body: | ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 1, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -11635,31 +11635,31 @@ body: | ; SI-LABEL: name: test_load_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; CI-MESA-LABEL: name: test_load_global_v3s32_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_load_global_v3s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 16, addrspace 1) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -11671,35 +11671,35 @@ body: | ; SI-LABEL: name: test_load_global_v3s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-HSA-LABEL: name: test_load_global_v3s32_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; CI-MESA-LABEL: name: test_load_global_v3s32_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_load_global_v3s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v3s32_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v3s32_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -11711,30 +11711,30 @@ body: | ; SI-LABEL: name: test_load_global_v4s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-HSA-LABEL: name: test_load_global_v4s32_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-MESA-LABEL: name: test_load_global_v4s32_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_global_v4s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 1) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -11746,30 +11746,30 @@ body: | ; SI-LABEL: name: test_load_global_v4s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-HSA-LABEL: name: test_load_global_v4s32_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-MESA-LABEL: name: test_load_global_v4s32_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_global_v4s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 8, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 1) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -11781,30 +11781,30 @@ body: | ; SI-LABEL: name: test_load_global_v4s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-HSA-LABEL: name: test_load_global_v4s32_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-MESA-LABEL: name: test_load_global_v4s32_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_global_v4s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v4s32_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v4s32_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -11816,30 +11816,30 @@ body: | ; SI-LABEL: name: test_load_global_v8s32_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) ; CI-HSA-LABEL: name: test_load_global_v8s32_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) ; CI-MESA-LABEL: name: test_load_global_v8s32_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) ; VI-LABEL: name: test_load_global_v8s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v8s32_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v8s32_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<8 x s32>), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 1) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -11901,30 +11901,30 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) ; SI-LABEL: name: test_load_global_v16s32_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 32, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) ; CI-HSA-LABEL: name: test_load_global_v16s32_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 32, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) ; CI-MESA-LABEL: name: test_load_global_v16s32_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 32, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) ; VI-LABEL: name: test_load_global_v16s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 32, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) ; GFX9-HSA-LABEL: name: test_load_global_v16s32_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 32, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) ; GFX9-MESA-LABEL: name: test_load_global_v16s32_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 32, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 64, align 32, addrspace 1) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... @@ -11936,30 +11936,30 @@ body: | ; SI-LABEL: name: test_load_global_v2s64_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; CI-HSA-LABEL: name: test_load_global_v2s64_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_global_v2s64_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_global_v2s64_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 16, addrspace 1) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -11971,30 +11971,30 @@ body: | ; SI-LABEL: name: test_load_global_v2s64_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; CI-HSA-LABEL: name: test_load_global_v2s64_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_global_v2s64_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_global_v2s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 8, addrspace 1) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -12006,30 +12006,30 @@ body: | ; SI-LABEL: name: test_load_global_v2s64_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; CI-HSA-LABEL: name: test_load_global_v2s64_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_global_v2s64_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; VI-LABEL: name: test_load_global_v2s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -12041,16 +12041,16 @@ body: | ; SI-LABEL: name: test_load_global_v2s64_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12068,13 +12068,13 @@ body: | ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12092,20 +12092,20 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-HSA-LABEL: name: test_load_global_v2s64_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_global_v2s64_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12123,13 +12123,13 @@ body: | ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12147,16 +12147,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_global_v2s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12174,13 +12174,13 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12198,20 +12198,20 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 2, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12229,13 +12229,13 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12252,7 +12252,7 @@ body: | ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 2, addrspace 1) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -12264,28 +12264,28 @@ body: | ; SI-LABEL: name: test_load_global_v2s64_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -12332,21 +12332,21 @@ body: | ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -12392,32 +12392,32 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-HSA-LABEL: name: test_load_global_v2s64_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; CI-MESA-LABEL: name: test_load_global_v2s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -12464,21 +12464,21 @@ body: | ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -12524,28 +12524,28 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_global_v2s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -12584,21 +12584,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -12636,32 +12636,32 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v2s64_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v2s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -12700,21 +12700,21 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -12751,7 +12751,7 @@ body: | ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 1) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -12763,36 +12763,36 @@ body: | ; SI-LABEL: name: test_load_global_v2sp1_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-HSA-LABEL: name: test_load_global_v2sp1_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-MESA-LABEL: name: test_load_global_v2sp1_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_global_v2sp1_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-HSA-LABEL: name: test_load_global_v2sp1_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-MESA-LABEL: name: test_load_global_v2sp1_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 16, addrspace 1) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -12804,48 +12804,48 @@ body: | ; SI-LABEL: name: test_load_global_v3s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 1) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -12859,10 +12859,10 @@ body: | ; SI-LABEL: name: test_load_global_v3s64_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128), align 8, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -12870,10 +12870,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128), align 8, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -12881,10 +12881,10 @@ body: | ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128), align 8, addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-MESA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -12892,10 +12892,10 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128), align 8, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -12903,10 +12903,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128), align 8, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -12914,17 +12914,17 @@ body: | ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128), align 8, addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-MESA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 8, addrspace 1) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -12938,28 +12938,28 @@ body: | ; SI-LABEL: name: test_load_global_v3s64_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13006,21 +13006,21 @@ body: | ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13064,21 +13064,21 @@ body: | ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; SI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13126,10 +13126,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128), align 1, addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, align 1, addrspace 1) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; CI-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -13137,28 +13137,28 @@ body: | ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13205,21 +13205,21 @@ body: | ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13263,21 +13263,21 @@ body: | ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13325,28 +13325,28 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13385,21 +13385,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -13435,21 +13435,21 @@ body: | ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -13489,10 +13489,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128), align 1, addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 16, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; GFX9-HSA: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -13500,28 +13500,28 @@ body: | ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13560,21 +13560,21 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -13610,21 +13610,21 @@ body: | ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -13663,7 +13663,7 @@ body: | ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 1, addrspace 1) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -13677,30 +13677,30 @@ body: | ; SI-LABEL: name: test_load_global_v4s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v4s64_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v4s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; VI-LABEL: name: test_load_global_v4s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 1) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -13712,30 +13712,30 @@ body: | ; SI-LABEL: name: test_load_global_v4s64_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v4s64_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v4s64_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; VI-LABEL: name: test_load_global_v4s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 8, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 8, addrspace 1) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 8, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -13747,28 +13747,28 @@ body: | ; SI-LABEL: name: test_load_global_v4s64_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13815,21 +13815,21 @@ body: | ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13873,21 +13873,21 @@ body: | ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; SI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13931,21 +13931,21 @@ body: | ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; SI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; SI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; SI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load 1 from unknown-address + 24, addrspace 1) + ; SI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1) ; SI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; SI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load 1 from unknown-address + 25, addrspace 1) + ; SI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1) ; SI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; SI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load 1 from unknown-address + 26, addrspace 1) + ; SI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1) ; SI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; SI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load 1 from unknown-address + 27, addrspace 1) + ; SI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1) ; SI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; SI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load 1 from unknown-address + 28, addrspace 1) + ; SI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1) ; SI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; SI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load 1 from unknown-address + 29, addrspace 1) + ; SI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1) ; SI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; SI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load 1 from unknown-address + 30, addrspace 1) + ; SI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1) ; SI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; SI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load 1 from unknown-address + 31, addrspace 1) + ; SI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1) ; SI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; SI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13991,32 +13991,32 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v4s64_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v4s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -14063,21 +14063,21 @@ body: | ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; CI-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -14121,21 +14121,21 @@ body: | ; CI-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; CI-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; CI-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; CI-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; CI-MESA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -14179,21 +14179,21 @@ body: | ; CI-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; CI-MESA: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; CI-MESA: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64) - ; CI-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load 1 from unknown-address + 24, addrspace 1) + ; CI-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1) ; CI-MESA: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; CI-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load 1 from unknown-address + 25, addrspace 1) + ; CI-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1) ; CI-MESA: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; CI-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load 1 from unknown-address + 26, addrspace 1) + ; CI-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1) ; CI-MESA: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; CI-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load 1 from unknown-address + 27, addrspace 1) + ; CI-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1) ; CI-MESA: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; CI-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load 1 from unknown-address + 28, addrspace 1) + ; CI-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1) ; CI-MESA: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; CI-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load 1 from unknown-address + 29, addrspace 1) + ; CI-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1) ; CI-MESA: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; CI-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load 1 from unknown-address + 30, addrspace 1) + ; CI-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1) ; CI-MESA: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; CI-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load 1 from unknown-address + 31, addrspace 1) + ; CI-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1) ; CI-MESA: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; CI-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; CI-MESA: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -14239,28 +14239,28 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v4s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -14299,21 +14299,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -14349,21 +14349,21 @@ body: | ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; VI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; VI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; VI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -14399,21 +14399,21 @@ body: | ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; VI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; VI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load 1 from unknown-address + 24, addrspace 1) + ; VI: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1) ; VI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load 1 from unknown-address + 25, addrspace 1) + ; VI: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1) ; VI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load 1 from unknown-address + 26, addrspace 1) + ; VI: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1) ; VI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load 1 from unknown-address + 27, addrspace 1) + ; VI: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1) ; VI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load 1 from unknown-address + 28, addrspace 1) + ; VI: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1) ; VI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load 1 from unknown-address + 29, addrspace 1) + ; VI: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1) ; VI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load 1 from unknown-address + 30, addrspace 1) + ; VI: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1) ; VI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load 1 from unknown-address + 31, addrspace 1) + ; VI: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1) ; VI: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; VI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; VI: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) @@ -14451,32 +14451,32 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v4s64_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 32, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v4s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -14515,21 +14515,21 @@ body: | ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9-MESA: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; GFX9-MESA: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9-MESA: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -14565,21 +14565,21 @@ body: | ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) ; GFX9-MESA: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64) - ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) - ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; GFX9-MESA: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD16]](s32) ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C7]] ; GFX9-MESA: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD17]](s32) @@ -14615,21 +14615,21 @@ body: | ; GFX9-MESA: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) ; GFX9-MESA: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; GFX9-MESA: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64) - ; GFX9-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load 1 from unknown-address + 24, addrspace 1) + ; GFX9-MESA: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p1) :: (load (s8) from unknown-address + 24, addrspace 1) ; GFX9-MESA: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) - ; GFX9-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load 1 from unknown-address + 25, addrspace 1) + ; GFX9-MESA: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p1) :: (load (s8) from unknown-address + 25, addrspace 1) ; GFX9-MESA: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) - ; GFX9-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load 1 from unknown-address + 26, addrspace 1) + ; GFX9-MESA: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p1) :: (load (s8) from unknown-address + 26, addrspace 1) ; GFX9-MESA: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) - ; GFX9-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load 1 from unknown-address + 27, addrspace 1) + ; GFX9-MESA: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p1) :: (load (s8) from unknown-address + 27, addrspace 1) ; GFX9-MESA: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) - ; GFX9-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load 1 from unknown-address + 28, addrspace 1) + ; GFX9-MESA: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p1) :: (load (s8) from unknown-address + 28, addrspace 1) ; GFX9-MESA: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) - ; GFX9-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load 1 from unknown-address + 29, addrspace 1) + ; GFX9-MESA: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p1) :: (load (s8) from unknown-address + 29, addrspace 1) ; GFX9-MESA: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) - ; GFX9-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load 1 from unknown-address + 30, addrspace 1) + ; GFX9-MESA: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p1) :: (load (s8) from unknown-address + 30, addrspace 1) ; GFX9-MESA: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C6]](s64) - ; GFX9-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load 1 from unknown-address + 31, addrspace 1) + ; GFX9-MESA: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p1) :: (load (s8) from unknown-address + 31, addrspace 1) ; GFX9-MESA: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD24]](s32) ; GFX9-MESA: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC24]], [[C7]] ; GFX9-MESA: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD25]](s32) @@ -14666,7 +14666,7 @@ body: | ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 1, addrspace 1) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -14678,36 +14678,36 @@ body: | ; SI-LABEL: name: test_load_global_v2s128_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s128>), addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; CI-HSA-LABEL: name: test_load_global_v2s128_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s128>), addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; CI-MESA-LABEL: name: test_load_global_v2s128_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s128>), addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; VI-LABEL: name: test_load_global_v2s128_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s128>), addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; GFX9-HSA-LABEL: name: test_load_global_v2s128_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s128>), addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) ; GFX9-MESA-LABEL: name: test_load_global_v2s128_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s128>), addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 1) + %1:_(<2 x s128>) = G_LOAD %0 :: (load (<2 x s128>), align 32, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -14719,36 +14719,36 @@ body: | ; SI-LABEL: name: test_load_global_v2p1_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-HSA-LABEL: name: test_load_global_v2p1_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-MESA-LABEL: name: test_load_global_v2p1_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_global_v2p1_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 16, addrspace 1) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 16, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -14760,36 +14760,36 @@ body: | ; SI-LABEL: name: test_load_global_v2p1_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 8, addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-HSA-LABEL: name: test_load_global_v2p1_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 8, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-MESA-LABEL: name: test_load_global_v2p1_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 8, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_global_v2p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 8, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 8, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 8, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 8, addrspace 1) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -14801,36 +14801,36 @@ body: | ; SI-LABEL: name: test_load_global_v2p1_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-HSA-LABEL: name: test_load_global_v2p1_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-MESA-LABEL: name: test_load_global_v2p1_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_global_v2p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 4, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -14842,16 +14842,16 @@ body: | ; SI-LABEL: name: test_load_global_v2p1_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -14872,13 +14872,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -14895,13 +14895,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -14918,13 +14918,13 @@ body: | ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -14944,21 +14944,21 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-HSA-LABEL: name: test_load_global_v2p1_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 1, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-MESA-LABEL: name: test_load_global_v2p1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -14979,13 +14979,13 @@ body: | ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15002,13 +15002,13 @@ body: | ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -15025,13 +15025,13 @@ body: | ; CI-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -15051,16 +15051,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_global_v2p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15081,13 +15081,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15104,13 +15104,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -15127,13 +15127,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -15153,21 +15153,21 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-HSA-LABEL: name: test_load_global_v2p1_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x p1>), align 1, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-MESA-LABEL: name: test_load_global_v2p1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15188,13 +15188,13 @@ body: | ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15211,13 +15211,13 @@ body: | ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -15234,13 +15234,13 @@ body: | ; GFX9-MESA: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -15259,7 +15259,7 @@ body: | ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 1, addrspace 1) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -15271,36 +15271,36 @@ body: | ; SI-LABEL: name: test_load_global_v4p1_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x p1>), align 8, addrspace 1) ; SI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; CI-HSA-LABEL: name: test_load_global_v4p1_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x p1>), align 8, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; CI-MESA-LABEL: name: test_load_global_v4p1_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x p1>), align 8, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; VI-LABEL: name: test_load_global_v4p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x p1>), align 8, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; GFX9-HSA-LABEL: name: test_load_global_v4p1_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x p1>), align 8, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; GFX9-MESA-LABEL: name: test_load_global_v4p1_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, align 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x p1>), align 8, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[LOAD]](<8 x s32>) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x p1>) = G_LOAD %0 :: (load 32, align 8, addrspace 1) + %1:_(<4 x p1>) = G_LOAD %0 :: (load (<4 x p1>), align 8, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -15312,30 +15312,30 @@ body: | ; SI-LABEL: name: test_load_global_v2p3_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; CI-HSA-LABEL: name: test_load_global_v2p3_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; CI-MESA-LABEL: name: test_load_global_v2p3_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; VI-LABEL: name: test_load_global_v2p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -15347,30 +15347,30 @@ body: | ; SI-LABEL: name: test_load_global_v2p3_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; CI-HSA-LABEL: name: test_load_global_v2p3_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; CI-MESA-LABEL: name: test_load_global_v2p3_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; VI-LABEL: name: test_load_global_v2p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 4, addrspace 1) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -15382,16 +15382,16 @@ body: | ; SI-LABEL: name: test_load_global_v2p3_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15413,13 +15413,13 @@ body: | ; SI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15439,20 +15439,20 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; CI-HSA-LABEL: name: test_load_global_v2p3_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; CI-MESA-LABEL: name: test_load_global_v2p3_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15474,13 +15474,13 @@ body: | ; CI-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15500,16 +15500,16 @@ body: | ; CI-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; VI-LABEL: name: test_load_global_v2p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15531,13 +15531,13 @@ body: | ; VI: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15557,20 +15557,20 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-HSA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-MESA-LABEL: name: test_load_global_v2p3_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15592,13 +15592,13 @@ body: | ; GFX9-MESA: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15617,7 +15617,7 @@ body: | ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 1, addrspace 1) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -15629,30 +15629,30 @@ body: | ; SI-LABEL: name: test_ext_load_global_s32_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32) ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_ext_load_global_s32_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_1_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 1) + %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -15664,30 +15664,30 @@ body: | ; SI-LABEL: name: test_ext_load_global_s32_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32) ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_ext_load_global_s32_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_2_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 2, align 4, addrspace 1) + %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -15700,36 +15700,36 @@ body: | ; SI-LABEL: name: test_ext_load_global_s64_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -15741,36 +15741,36 @@ body: | ; SI-LABEL: name: test_ext_load_global_s64_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -15786,36 +15786,36 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; SI-LABEL: name: test_ext_load_global_s64_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -15827,7 +15827,7 @@ body: | ; SI-LABEL: name: test_ext_load_global_s128_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -15835,7 +15835,7 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI-HSA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; CI-HSA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -15843,7 +15843,7 @@ body: | ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; CI-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -15851,7 +15851,7 @@ body: | ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -15859,7 +15859,7 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-HSA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-HSA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX9-HSA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -15867,14 +15867,14 @@ body: | ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-MESA-LABEL: name: test_ext_load_global_s128_from_4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-MESA: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9-MESA: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -15886,36 +15886,36 @@ body: | ; SI-LABEL: name: test_ext_load_global_s64_from_2_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_2_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -15927,232 +15927,232 @@ body: | ; SI-LABEL: name: test_ext_load_global_s64_from_1_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-HSA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-MESA-LABEL: name: test_ext_load_global_s64_from_1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 1) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- -name: test_extload_global_v2s32_from_4_align1 +name: test_extload_global_v2s32_from_v2s16_align1 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v2s32_from_4_align1 + ; SI-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_4_align1 + ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_4_align1 + ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; VI-LABEL: name: test_extload_global_v2s32_from_4_align1 + ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_4_align1 + ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_4_align1 + ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 1, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- -name: test_extload_global_v2s32_from_4_align2 +name: test_extload_global_v2s32_from_v2s16_align2 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v2s32_from_4_align2 + ; SI-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_4_align2 + ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_4_align2 + ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; VI-LABEL: name: test_extload_global_v2s32_from_4_align2 + ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_4_align2 + ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_4_align2 + ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, align 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), align 2, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- -name: test_extload_global_v2s32_from_4_align4 +name: test_extload_global_v2s32_from_v2s16_align4 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v2s32_from_4_align4 + ; SI-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_4_align4 + ; CI-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_4_align4 + ; CI-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; CI-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; VI-LABEL: name: test_extload_global_v2s32_from_4_align4 + ; VI-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_4_align4 + ; GFX9-HSA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_4_align4 + ; GFX9-MESA-LABEL: name: test_extload_global_v2s32_from_v2s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- -name: test_extload_global_v3s32_from_6_align4 +name: test_extload_global_v3s32_from_v3s16_align4 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v3s32_from_6_align4 + ; SI-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; CI-HSA-LABEL: name: test_extload_global_v3s32_from_6_align4 + ; CI-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; CI-MESA-LABEL: name: test_extload_global_v3s32_from_6_align4 + ; CI-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; VI-LABEL: name: test_extload_global_v3s32_from_6_align4 + ; VI-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; GFX9-HSA-LABEL: name: test_extload_global_v3s32_from_6_align4 + ; GFX9-HSA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_global_v3s32_from_6_align4 + ; GFX9-MESA-LABEL: name: test_extload_global_v3s32_from_v3s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s16>), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 6, align 4, addrspace 1) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2 = COPY %1 ... --- -name: test_extload_global_v4s32_from_8_align4 +name: test_extload_global_v4s32_from_v4s16_align4 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v4s32_from_8_align4 + ; SI-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-HSA-LABEL: name: test_extload_global_v4s32_from_8_align4 + ; CI-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; CI-MESA-LABEL: name: test_extload_global_v4s32_from_8_align4 + ; CI-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; VI-LABEL: name: test_extload_global_v4s32_from_8_align4 + ; VI-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-HSA-LABEL: name: test_extload_global_v4s32_from_8_align4 + ; GFX9-HSA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) - ; GFX9-MESA-LABEL: name: test_extload_global_v4s32_from_8_align4 + ; GFX9-MESA-LABEL: name: test_extload_global_v4s32_from_v4s16_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), align 4, addrspace 1) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 1) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... --- -name: test_extload_global_v2s96_from_24_align1 +name: test_global_v2s96_align1 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v2s96_from_24_align1 + ; SI-LABEL: name: test_global_v2s96_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -16173,13 +16173,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16196,13 +16196,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16221,13 +16221,13 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -16243,13 +16243,13 @@ body: | ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -16265,13 +16265,13 @@ body: | ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -16292,30 +16292,30 @@ body: | ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align1 + ; CI-HSA-LABEL: name: test_global_v2s96_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 1, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 1, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 1, addrspace 1) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 1, addrspace 1) ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-MESA-LABEL: name: test_extload_global_v2s96_from_24_align1 + ; CI-MESA-LABEL: name: test_global_v2s96_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -16336,13 +16336,13 @@ body: | ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16359,13 +16359,13 @@ body: | ; CI-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16384,13 +16384,13 @@ body: | ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; CI-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; CI-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; CI-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; CI-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; CI-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; CI-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -16406,13 +16406,13 @@ body: | ; CI-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; CI-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; CI-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; CI-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; CI-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; CI-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; CI-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; CI-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; CI-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; CI-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; CI-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; CI-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; CI-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -16428,13 +16428,13 @@ body: | ; CI-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; CI-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; CI-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; CI-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; CI-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; CI-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; CI-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; CI-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; CI-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; CI-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; CI-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; CI-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; CI-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -16455,18 +16455,18 @@ body: | ; CI-MESA: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; VI-LABEL: name: test_extload_global_v2s96_from_24_align1 + ; VI-LABEL: name: test_global_v2s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -16487,13 +16487,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16510,13 +16510,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16535,13 +16535,13 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -16557,13 +16557,13 @@ body: | ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -16579,13 +16579,13 @@ body: | ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -16606,30 +16606,30 @@ body: | ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; GFX9-HSA-LABEL: name: test_extload_global_v2s96_from_24_align1 + ; GFX9-HSA-LABEL: name: test_global_v2s96_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 1, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 1, addrspace 1) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 1, addrspace 1) ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-MESA-LABEL: name: test_extload_global_v2s96_from_24_align1 + ; GFX9-MESA-LABEL: name: test_global_v2s96_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 1, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1 from unknown-address + 3, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s8) from unknown-address + 3, addrspace 1) ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -16650,13 +16650,13 @@ body: | ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9-MESA: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 1 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s8) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 1 from unknown-address + 5, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1) ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 1 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s8) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 1 from unknown-address + 7, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s8) from unknown-address + 7, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16673,13 +16673,13 @@ body: | ; GFX9-MESA: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9-MESA: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 1 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s8) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 1 from unknown-address + 9, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1) ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 1 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s8) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 1 from unknown-address + 11, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s8) from unknown-address + 11, addrspace 1) ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16698,13 +16698,13 @@ body: | ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-MESA: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load 1 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p1) :: (load (s8) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) - ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load 1 from unknown-address + 13, addrspace 1) + ; GFX9-MESA: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1) ; GFX9-MESA: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) - ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load 1 from unknown-address + 14, addrspace 1) + ; GFX9-MESA: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p1) :: (load (s8) from unknown-address + 14, addrspace 1) ; GFX9-MESA: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) - ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load 1 from unknown-address + 15, addrspace 1) + ; GFX9-MESA: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p1) :: (load (s8) from unknown-address + 15, addrspace 1) ; GFX9-MESA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9-MESA: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9-MESA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -16720,13 +16720,13 @@ body: | ; GFX9-MESA: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; GFX9-MESA: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; GFX9-MESA: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s64) - ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load 1 from unknown-address + 16, addrspace 1) + ; GFX9-MESA: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p1) :: (load (s8) from unknown-address + 16, addrspace 1) ; GFX9-MESA: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) - ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load 1 from unknown-address + 17, addrspace 1) + ; GFX9-MESA: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p1) :: (load (s8) from unknown-address + 17, addrspace 1) ; GFX9-MESA: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) - ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load 1 from unknown-address + 18, addrspace 1) + ; GFX9-MESA: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p1) :: (load (s8) from unknown-address + 18, addrspace 1) ; GFX9-MESA: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) - ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load 1 from unknown-address + 19, addrspace 1) + ; GFX9-MESA: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p1) :: (load (s8) from unknown-address + 19, addrspace 1) ; GFX9-MESA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; GFX9-MESA: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; GFX9-MESA: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -16742,13 +16742,13 @@ body: | ; GFX9-MESA: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; GFX9-MESA: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; GFX9-MESA: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C8]](s64) - ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load 1 from unknown-address + 20, addrspace 1) + ; GFX9-MESA: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p1) :: (load (s8) from unknown-address + 20, addrspace 1) ; GFX9-MESA: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) - ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load 1 from unknown-address + 21, addrspace 1) + ; GFX9-MESA: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p1) :: (load (s8) from unknown-address + 21, addrspace 1) ; GFX9-MESA: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) - ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load 1 from unknown-address + 22, addrspace 1) + ; GFX9-MESA: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p1) :: (load (s8) from unknown-address + 22, addrspace 1) ; GFX9-MESA: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) - ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load 1 from unknown-address + 23, addrspace 1) + ; GFX9-MESA: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p1) :: (load (s8) from unknown-address + 23, addrspace 1) ; GFX9-MESA: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; GFX9-MESA: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; GFX9-MESA: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -16770,7 +16770,7 @@ body: | ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 1) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -16778,17 +16778,17 @@ body: | ... --- -name: test_extload_global_v2s96_from_24_align2 +name: test_global_v2s96_align2 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v2s96_from_24_align2 + ; SI-LABEL: name: test_global_v2s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16799,9 +16799,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16810,9 +16810,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16823,9 +16823,9 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16833,9 +16833,9 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 2 from unknown-address + 16, addrspace 1) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 2 from unknown-address + 18, addrspace 1) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16843,9 +16843,9 @@ body: | ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 2 from unknown-address + 20, addrspace 1) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 2 from unknown-address + 22, addrspace 1) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16858,24 +16858,24 @@ body: | ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align2 + ; CI-HSA-LABEL: name: test_global_v2s96_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 2, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 2, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 2, addrspace 1) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 2, addrspace 1) ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-MESA-LABEL: name: test_extload_global_v2s96_from_24_align2 + ; CI-MESA-LABEL: name: test_global_v2s96_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16886,9 +16886,9 @@ body: | ; CI-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16897,9 +16897,9 @@ body: | ; CI-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CI-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; CI-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; CI-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; CI-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16910,9 +16910,9 @@ body: | ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; CI-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; CI-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1) + ; CI-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) ; CI-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16920,9 +16920,9 @@ body: | ; CI-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 2 from unknown-address + 16, addrspace 1) + ; CI-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1) ; CI-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 2 from unknown-address + 18, addrspace 1) + ; CI-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1) ; CI-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; CI-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16930,9 +16930,9 @@ body: | ; CI-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; CI-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; CI-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 2 from unknown-address + 20, addrspace 1) + ; CI-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1) ; CI-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 2 from unknown-address + 22, addrspace 1) + ; CI-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1) ; CI-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; CI-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; CI-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16945,12 +16945,12 @@ body: | ; CI-MESA: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; VI-LABEL: name: test_extload_global_v2s96_from_24_align2 + ; VI-LABEL: name: test_global_v2s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16961,9 +16961,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16972,9 +16972,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16985,9 +16985,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16995,9 +16995,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 2 from unknown-address + 16, addrspace 1) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 2 from unknown-address + 18, addrspace 1) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -17005,9 +17005,9 @@ body: | ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 2 from unknown-address + 20, addrspace 1) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 2 from unknown-address + 22, addrspace 1) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -17020,24 +17020,24 @@ body: | ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; GFX9-HSA-LABEL: name: test_extload_global_v2s96_from_24_align2 + ; GFX9-HSA-LABEL: name: test_global_v2s96_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 2, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 2, addrspace 1) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 2, addrspace 1) ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-MESA-LABEL: name: test_extload_global_v2s96_from_24_align2 + ; GFX9-MESA-LABEL: name: test_global_v2s96_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 2 from unknown-address + 2, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 2, addrspace 1) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -17048,9 +17048,9 @@ body: | ; GFX9-MESA: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9-MESA: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 from unknown-address + 4, addrspace 1) + ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1) ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 2 from unknown-address + 6, addrspace 1) + ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s16) from unknown-address + 6, addrspace 1) ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -17059,9 +17059,9 @@ body: | ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9-MESA: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GFX9-MESA: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load 2 from unknown-address + 8, addrspace 1) + ; GFX9-MESA: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1) ; GFX9-MESA: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) - ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load 2 from unknown-address + 10, addrspace 1) + ; GFX9-MESA: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s16) from unknown-address + 10, addrspace 1) ; GFX9-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9-MESA: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9-MESA: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -17072,9 +17072,9 @@ body: | ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9-MESA: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load 2 from unknown-address + 12, addrspace 1) + ; GFX9-MESA: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1) ; GFX9-MESA: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) - ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load 2 from unknown-address + 14, addrspace 1) + ; GFX9-MESA: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p1) :: (load (s16) from unknown-address + 14, addrspace 1) ; GFX9-MESA: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9-MESA: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX9-MESA: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -17082,9 +17082,9 @@ body: | ; GFX9-MESA: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; GFX9-MESA: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; GFX9-MESA: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) - ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load 2 from unknown-address + 16, addrspace 1) + ; GFX9-MESA: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p1) :: (load (s16) from unknown-address + 16, addrspace 1) ; GFX9-MESA: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) - ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load 2 from unknown-address + 18, addrspace 1) + ; GFX9-MESA: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s16) from unknown-address + 18, addrspace 1) ; GFX9-MESA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9-MESA: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; GFX9-MESA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -17092,9 +17092,9 @@ body: | ; GFX9-MESA: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; GFX9-MESA: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; GFX9-MESA: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s64) - ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load 2 from unknown-address + 20, addrspace 1) + ; GFX9-MESA: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p1) :: (load (s16) from unknown-address + 20, addrspace 1) ; GFX9-MESA: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) - ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load 2 from unknown-address + 22, addrspace 1) + ; GFX9-MESA: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p1) :: (load (s16) from unknown-address + 22, addrspace 1) ; GFX9-MESA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; GFX9-MESA: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; GFX9-MESA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -17108,7 +17108,7 @@ body: | ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 1) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -17116,25 +17116,25 @@ body: | ... --- -name: test_extload_global_v2s96_from_24_align4 +name: test_global_v2s96_align4 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v2s96_from_24_align4 + ; SI-LABEL: name: test_global_v2s96_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load (s64), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from unknown-address + 8, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from unknown-address + 8, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 8 from unknown-address + 12, align 4, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s64) from unknown-address + 12, align 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 4 from unknown-address + 20, addrspace 1) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32) from unknown-address + 20, addrspace 1) ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -17142,68 +17142,68 @@ body: | ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align4 + ; CI-HSA-LABEL: name: test_global_v2s96_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-MESA-LABEL: name: test_extload_global_v2s96_from_24_align4 + ; CI-MESA-LABEL: name: test_global_v2s96_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; VI-LABEL: name: test_extload_global_v2s96_from_24_align4 + ; VI-LABEL: name: test_global_v2s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-HSA-LABEL: name: test_extload_global_v2s96_from_24_align4 + ; GFX9-HSA-LABEL: name: test_global_v2s96_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-MESA-LABEL: name: test_extload_global_v2s96_from_24_align4 + ; GFX9-MESA-LABEL: name: test_global_v2s96_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 4, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 1) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -17211,22 +17211,22 @@ body: | ... --- -name: test_extload_global_v2s96_from_24_align16 +name: test_global_v2s96_align16 body: | bb.0: liveins: $vgpr0_vgpr1 - ; SI-LABEL: name: test_extload_global_v2s96_from_24_align16 + ; SI-LABEL: name: test_global_v2s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 8 from unknown-address + 12, align 4, addrspace 1) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 12, align 4, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 4 from unknown-address + 20, addrspace 1) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 20, addrspace 1) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) @@ -17234,68 +17234,68 @@ body: | ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align16 + ; CI-HSA-LABEL: name: test_global_v2s96_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; CI-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; CI-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; CI-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-MESA-LABEL: name: test_extload_global_v2s96_from_24_align16 + ; CI-MESA-LABEL: name: test_global_v2s96_align16 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; CI-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; CI-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; VI-LABEL: name: test_extload_global_v2s96_from_24_align16 + ; VI-LABEL: name: test_global_v2s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; VI: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-HSA-LABEL: name: test_extload_global_v2s96_from_24_align16 + ; GFX9-HSA-LABEL: name: test_global_v2s96_align16 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; GFX9-HSA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-HSA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-HSA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-MESA-LABEL: name: test_extload_global_v2s96_from_24_align16 + ; GFX9-MESA-LABEL: name: test_global_v2s96_align16 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (s96), align 16, addrspace 1) ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 12 from unknown-address + 12, align 4, addrspace 1) + ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s96) from unknown-address + 12, align 4, addrspace 1) ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-MESA: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-MESA: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 16, addrspace 1) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 1) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -17310,7 +17310,7 @@ body: | ; SI-LABEL: name: test_load_global_v32s1_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<32 x s1>), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -17410,7 +17410,7 @@ body: | ; SI: $vgpr0 = COPY [[TRUNC]](<32 x s1>) ; CI-HSA-LABEL: name: test_load_global_v32s1_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<32 x s1>), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -17510,7 +17510,7 @@ body: | ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) ; CI-MESA-LABEL: name: test_load_global_v32s1_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<32 x s1>), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -17610,7 +17610,7 @@ body: | ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) ; VI-LABEL: name: test_load_global_v32s1_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<32 x s1>), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -17710,7 +17710,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<32 x s1>) ; GFX9-HSA-LABEL: name: test_load_global_v32s1_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<32 x s1>), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -17826,7 +17826,7 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) ; GFX9-MESA-LABEL: name: test_load_global_v32s1_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<32 x s1>), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -17941,7 +17941,7 @@ body: | ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<32 x s1>) = G_TRUNC [[CONCAT_VECTORS]](<32 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<32 x s1>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<32 x s1>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(<32 x s1>) = G_LOAD %0 :: (load (<32 x s1>), align 4, addrspace 1) $vgpr0 = COPY %1 ... @@ -17953,7 +17953,7 @@ body: | ; SI-LABEL: name: test_load_global_v8s4_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<8 x s4>), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -17981,7 +17981,7 @@ body: | ; SI: $vgpr0 = COPY [[TRUNC]](<8 x s4>) ; CI-HSA-LABEL: name: test_load_global_v8s4_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<8 x s4>), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -18009,7 +18009,7 @@ body: | ; CI-HSA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) ; CI-MESA-LABEL: name: test_load_global_v8s4_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<8 x s4>), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -18037,7 +18037,7 @@ body: | ; CI-MESA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) ; VI-LABEL: name: test_load_global_v8s4_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<8 x s4>), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -18065,7 +18065,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<8 x s4>) ; GFX9-HSA-LABEL: name: test_load_global_v8s4_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<8 x s4>), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9-HSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -18097,7 +18097,7 @@ body: | ; GFX9-HSA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) ; GFX9-MESA-LABEL: name: test_load_global_v8s4_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (<8 x s4>), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 @@ -18128,6 +18128,6 @@ body: | ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) ; GFX9-MESA: $vgpr0 = COPY [[TRUNC]](<8 x s4>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s4>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + %1:_(<8 x s4>) = G_LOAD %0 :: (load (<8 x s4>), align 4, addrspace 1) $vgpr0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 1dd4347e17289..0efdb04db7636 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -16,62 +16,62 @@ body: | ; SI-LABEL: name: test_load_local_s1_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_local_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; CI-DS128-LABEL: name: test_load_local_s1_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-DS128: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_local_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_local_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-UNALIGNED: $vgpr0 = COPY [[AND]](s32) ; GFX10-LABEL: name: test_load_local_s1_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX10: $vgpr0 = COPY [[AND]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX10-UNALIGNED: $vgpr0 = COPY [[AND]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s1) = G_LOAD %0 :: (load 1, align 1, addrspace 3) + %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 3) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -84,62 +84,62 @@ body: | ; SI-LABEL: name: test_load_local_s2_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_local_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; CI-DS128-LABEL: name: test_load_local_s2_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-DS128: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_local_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_local_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-UNALIGNED: $vgpr0 = COPY [[AND]](s32) ; GFX10-LABEL: name: test_load_local_s2_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX10: $vgpr0 = COPY [[AND]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX10-UNALIGNED: $vgpr0 = COPY [[AND]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s2) = G_LOAD %0 :: (load 1, align 1, addrspace 3) + %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 3) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -152,46 +152,46 @@ body: | ; SI-LABEL: name: test_load_local_s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_local_s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; CI-DS128-LABEL: name: test_load_local_s8_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_local_s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s8_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s8) = G_LOAD %0 :: (load 1, align 4, addrspace 3) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -204,46 +204,46 @@ body: | ; SI-LABEL: name: test_load_local_s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_local_s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; CI-DS128-LABEL: name: test_load_local_s8_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_local_s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s8_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s8_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s8) = G_LOAD %0 :: (load 1, align 1, addrspace 3) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -256,46 +256,46 @@ body: | ; SI-LABEL: name: test_load_local_s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_local_s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; CI-DS128-LABEL: name: test_load_local_s16_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_local_s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s16_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load 2, align 4, addrspace 3) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -308,46 +308,46 @@ body: | ; SI-LABEL: name: test_load_local_s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_local_s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; CI-DS128-LABEL: name: test_load_local_s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_local_s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s16_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 3) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -360,10 +360,10 @@ body: | ; SI-LABEL: name: test_load_local_s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -378,10 +378,10 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-LABEL: name: test_load_local_s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -396,10 +396,10 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-DS128-LABEL: name: test_load_local_s16_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -414,10 +414,10 @@ body: | ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_local_s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -430,10 +430,10 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_local_s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -446,15 +446,15 @@ body: | ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s16_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -467,10 +467,10 @@ body: | ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s16_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -482,7 +482,7 @@ body: | ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX10-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load 2, align 1, addrspace 3) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -495,38 +495,38 @@ body: | ; SI-LABEL: name: test_load_local_s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-LABEL: name: test_load_local_s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; CI-DS128-LABEL: name: test_load_local_s32_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_local_s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_local_s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-LABEL: name: test_load_local_s32_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -538,10 +538,10 @@ body: | ; SI-LABEL: name: test_load_local_s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -553,10 +553,10 @@ body: | ; SI: $vgpr0 = COPY [[OR]](s32) ; CI-LABEL: name: test_load_local_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -568,10 +568,10 @@ body: | ; CI: $vgpr0 = COPY [[OR]](s32) ; CI-DS128-LABEL: name: test_load_local_s32_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -583,10 +583,10 @@ body: | ; CI-DS128: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_local_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -598,10 +598,10 @@ body: | ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_local_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -613,14 +613,14 @@ body: | ; GFX9: $vgpr0 = COPY [[OR]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-LABEL: name: test_load_local_s32_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -632,10 +632,10 @@ body: | ; GFX10: $vgpr0 = COPY [[OR]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -646,7 +646,7 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 3) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 3) $vgpr0 = COPY %1 ... @@ -658,16 +658,16 @@ body: | ; SI-LABEL: name: test_load_local_s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -689,16 +689,16 @@ body: | ; SI: $vgpr0 = COPY [[OR2]](s32) ; CI-LABEL: name: test_load_local_s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -720,16 +720,16 @@ body: | ; CI: $vgpr0 = COPY [[OR2]](s32) ; CI-DS128-LABEL: name: test_load_local_s32_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -751,16 +751,16 @@ body: | ; CI-DS128: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_local_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -782,16 +782,16 @@ body: | ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_load_local_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -813,20 +813,20 @@ body: | ; GFX9: $vgpr0 = COPY [[OR2]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-LABEL: name: test_load_local_s32_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -848,16 +848,16 @@ body: | ; GFX10: $vgpr0 = COPY [[OR2]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s32_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -878,7 +878,7 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: $vgpr0 = COPY [[OR2]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 3) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 3) $vgpr0 = COPY %1 ... @@ -890,46 +890,46 @@ body: | ; SI-LABEL: name: test_load_local_s24_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_local_s24_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; CI-DS128-LABEL: name: test_load_local_s24_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_local_s24_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s24_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s24_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, align 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), align 8, addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 3) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -942,46 +942,46 @@ body: | ; SI-LABEL: name: test_load_local_s24_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_local_s24_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; CI-DS128-LABEL: name: test_load_local_s24_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_local_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s24_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 3) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -994,10 +994,10 @@ body: | ; SI-LABEL: name: test_load_local_s24_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -1031,10 +1031,10 @@ body: | ; SI: $vgpr0 = COPY [[COPY5]](s32) ; CI-LABEL: name: test_load_local_s24_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -1068,10 +1068,10 @@ body: | ; CI: $vgpr0 = COPY [[COPY5]](s32) ; CI-DS128-LABEL: name: test_load_local_s24_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -1105,10 +1105,10 @@ body: | ; CI-DS128: $vgpr0 = COPY [[COPY5]](s32) ; VI-LABEL: name: test_load_local_s24_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -1138,10 +1138,10 @@ body: | ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s24_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -1171,10 +1171,10 @@ body: | ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] @@ -1182,10 +1182,10 @@ body: | ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s24_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -1215,10 +1215,10 @@ body: | ; GFX10: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, align 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, align 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -1247,7 +1247,7 @@ body: | ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 3) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -1260,13 +1260,13 @@ body: | ; SI-LABEL: name: test_load_local_s24_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1295,13 +1295,13 @@ body: | ; SI: $vgpr0 = COPY [[COPY4]](s32) ; CI-LABEL: name: test_load_local_s24_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1330,13 +1330,13 @@ body: | ; CI: $vgpr0 = COPY [[COPY4]](s32) ; CI-DS128-LABEL: name: test_load_local_s24_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI-DS128: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1365,13 +1365,13 @@ body: | ; CI-DS128: $vgpr0 = COPY [[COPY4]](s32) ; VI-LABEL: name: test_load_local_s24_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1396,13 +1396,13 @@ body: | ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_local_s24_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1427,10 +1427,10 @@ body: | ; GFX9: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), align 1, addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] @@ -1438,13 +1438,13 @@ body: | ; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-LABEL: name: test_load_local_s24_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1469,13 +1469,13 @@ body: | ; GFX10: $vgpr0 = COPY [[COPY1]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s24_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -1499,7 +1499,7 @@ body: | ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 3) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -1512,46 +1512,46 @@ body: | ; SI-LABEL: name: test_load_local_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; SI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; CI-LABEL: name: test_load_local_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; CI-DS128-LABEL: name: test_load_local_s48_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; CI-DS128: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; VI-LABEL: name: test_load_local_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; GFX9-LABEL: name: test_load_local_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; GFX10-LABEL: name: test_load_local_s48_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX10: $vgpr0_vgpr1 = COPY [[COPY1]](s64) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s48_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[COPY1]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 3) + %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 3) %2:_(s64) = G_ANYEXT %1 $vgpr0_vgpr1 = COPY %2 ... @@ -1564,38 +1564,38 @@ body: | ; SI-LABEL: name: test_load_local_s64_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-LABEL: name: test_load_local_s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-DS128-LABEL: name: test_load_local_s64_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; VI-LABEL: name: test_load_local_s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-LABEL: name: test_load_local_s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX10-LABEL: name: test_load_local_s64_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -1607,38 +1607,38 @@ body: | ; SI-LABEL: name: test_load_local_s64_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-LABEL: name: test_load_local_s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CI-DS128-LABEL: name: test_load_local_s64_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; VI-LABEL: name: test_load_local_s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-LABEL: name: test_load_local_s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX10-LABEL: name: test_load_local_s64_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -1650,16 +1650,16 @@ body: | ; SI-LABEL: name: test_load_local_s64_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1678,16 +1678,16 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_local_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1706,16 +1706,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-DS128-LABEL: name: test_load_local_s64_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1734,16 +1734,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_local_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1762,16 +1762,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_local_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1790,20 +1790,20 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX10-LABEL: name: test_load_local_s64_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1822,16 +1822,16 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1849,7 +1849,7 @@ body: | ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -1861,28 +1861,28 @@ body: | ; SI-LABEL: name: test_load_local_s64_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1930,28 +1930,28 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_local_s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -1999,28 +1999,28 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-DS128-LABEL: name: test_load_local_s64_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2068,28 +2068,28 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_local_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2129,28 +2129,28 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_local_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2190,32 +2190,32 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; GFX10-LABEL: name: test_load_local_s64_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2255,28 +2255,28 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s64_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -2315,7 +2315,7 @@ body: | ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -2327,16 +2327,16 @@ body: | ; SI-LABEL: name: test_load_local_s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2357,13 +2357,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2379,13 +2379,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2405,16 +2405,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_local_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2435,13 +2435,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2457,13 +2457,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2483,16 +2483,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-DS128-LABEL: name: test_load_local_s96_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2513,13 +2513,13 @@ body: | ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2535,13 +2535,13 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2561,16 +2561,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_local_s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2591,13 +2591,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2613,13 +2613,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2639,16 +2639,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_local_s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2669,13 +2669,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2691,13 +2691,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2717,21 +2717,21 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 1, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-LABEL: name: test_load_local_s96_align16 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2752,13 +2752,13 @@ body: | ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2774,13 +2774,13 @@ body: | ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2800,16 +2800,16 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align16 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2830,13 +2830,13 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2852,13 +2852,13 @@ body: | ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2877,7 +2877,7 @@ body: | ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 3) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -2889,81 +2889,81 @@ body: | ; SI-LABEL: name: test_load_local_s96_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_local_s96_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-DS128-LABEL: name: test_load_local_s96_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_local_s96_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_local_s96_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 8, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-LABEL: name: test_load_local_s96_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 3) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -2975,81 +2975,81 @@ body: | ; SI-LABEL: name: test_load_local_s96_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_local_s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-DS128-LABEL: name: test_load_local_s96_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_local_s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_local_s96_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 4, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-LABEL: name: test_load_local_s96_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -3061,10 +3061,10 @@ body: | ; SI-LABEL: name: test_load_local_s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3075,9 +3075,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3086,9 +3086,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3100,10 +3100,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_local_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3114,9 +3114,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3125,9 +3125,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3139,10 +3139,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-DS128-LABEL: name: test_load_local_s96_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3153,9 +3153,9 @@ body: | ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3164,9 +3164,9 @@ body: | ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3178,10 +3178,10 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_local_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3192,9 +3192,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3203,9 +3203,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3217,10 +3217,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_local_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3231,9 +3231,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3242,9 +3242,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3256,15 +3256,15 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 2, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-LABEL: name: test_load_local_s96_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3275,9 +3275,9 @@ body: | ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3286,9 +3286,9 @@ body: | ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3300,10 +3300,10 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3314,9 +3314,9 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3325,9 +3325,9 @@ body: | ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3338,7 +3338,7 @@ body: | ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 3) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -3350,16 +3350,16 @@ body: | ; SI-LABEL: name: test_load_local_s96_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3380,13 +3380,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3402,13 +3402,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3428,16 +3428,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_local_s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3458,13 +3458,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3480,13 +3480,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3506,16 +3506,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-DS128-LABEL: name: test_load_local_s96_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3536,13 +3536,13 @@ body: | ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3558,13 +3558,13 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3584,16 +3584,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_local_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3614,13 +3614,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3636,13 +3636,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3662,16 +3662,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_local_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3692,13 +3692,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3714,13 +3714,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3740,21 +3740,21 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 1, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-LABEL: name: test_load_local_s96_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3775,13 +3775,13 @@ body: | ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3797,13 +3797,13 @@ body: | ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3823,16 +3823,16 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s96_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3853,13 +3853,13 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3875,13 +3875,13 @@ body: | ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3900,7 +3900,7 @@ body: | ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 3) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -3912,16 +3912,16 @@ body: | ; SI-LABEL: name: test_load_local_s128_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3942,13 +3942,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3965,13 +3965,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3987,13 +3987,13 @@ body: | ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -4014,16 +4014,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_local_s128_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4044,13 +4044,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4067,13 +4067,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -4089,13 +4089,13 @@ body: | ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -4116,16 +4116,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-DS128-LABEL: name: test_load_local_s128_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4146,13 +4146,13 @@ body: | ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4168,13 +4168,13 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -4191,13 +4191,13 @@ body: | ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -4217,16 +4217,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_local_s128_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4247,13 +4247,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4269,13 +4269,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -4292,13 +4292,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -4318,16 +4318,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_local_s128_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4348,13 +4348,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4370,13 +4370,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -4393,13 +4393,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -4419,21 +4419,21 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 1, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-LABEL: name: test_load_local_s128_align16 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4454,13 +4454,13 @@ body: | ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4476,13 +4476,13 @@ body: | ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -4499,13 +4499,13 @@ body: | ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -4525,16 +4525,16 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align16 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4555,13 +4555,13 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4577,13 +4577,13 @@ body: | ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -4600,13 +4600,13 @@ body: | ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -4625,7 +4625,7 @@ body: | ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 3) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4637,54 +4637,54 @@ body: | ; SI-LABEL: name: test_load_local_s128_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_local_s128_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-DS128-LABEL: name: test_load_local_s128_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_local_s128_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_local_s128_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-LABEL: name: test_load_local_s128_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; GFX10: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3) ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 8, addrspace 3) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4696,104 +4696,104 @@ body: | ; SI-LABEL: name: test_load_local_s128_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_local_s128_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-DS128-LABEL: name: test_load_local_s128_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_local_s128_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_local_s128_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 4, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-LABEL: name: test_load_local_s128_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -4805,10 +4805,10 @@ body: | ; SI-LABEL: name: test_load_local_s128_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4819,9 +4819,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -4831,9 +4831,9 @@ body: | ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4841,9 +4841,9 @@ body: | ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -4856,10 +4856,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_local_s128_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4870,9 +4870,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -4882,9 +4882,9 @@ body: | ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4892,9 +4892,9 @@ body: | ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -4907,10 +4907,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-DS128-LABEL: name: test_load_local_s128_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4921,9 +4921,9 @@ body: | ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -4933,9 +4933,9 @@ body: | ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4943,9 +4943,9 @@ body: | ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -4958,10 +4958,10 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_local_s128_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4972,9 +4972,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -4984,9 +4984,9 @@ body: | ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -4994,9 +4994,9 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -5009,10 +5009,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_local_s128_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -5023,9 +5023,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -5035,9 +5035,9 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5045,9 +5045,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -5060,15 +5060,15 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 2, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-LABEL: name: test_load_local_s128_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -5079,9 +5079,9 @@ body: | ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -5091,9 +5091,9 @@ body: | ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5101,9 +5101,9 @@ body: | ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -5116,10 +5116,10 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -5130,9 +5130,9 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -5142,9 +5142,9 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5152,9 +5152,9 @@ body: | ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX10-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -5166,7 +5166,7 @@ body: | ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 2, addrspace 3) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5178,16 +5178,16 @@ body: | ; SI-LABEL: name: test_load_local_s128_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5208,13 +5208,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5231,13 +5231,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -5253,13 +5253,13 @@ body: | ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -5280,16 +5280,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_local_s128_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5310,13 +5310,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5333,13 +5333,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -5355,13 +5355,13 @@ body: | ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -5382,16 +5382,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-DS128-LABEL: name: test_load_local_s128_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5412,13 +5412,13 @@ body: | ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5434,13 +5434,13 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -5457,13 +5457,13 @@ body: | ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -5483,16 +5483,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_local_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5513,13 +5513,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5535,13 +5535,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -5558,13 +5558,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -5584,16 +5584,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_local_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5614,13 +5614,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5636,13 +5636,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -5659,13 +5659,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -5685,21 +5685,21 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 1, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-LABEL: name: test_load_local_s128_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5720,13 +5720,13 @@ body: | ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5742,13 +5742,13 @@ body: | ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -5765,13 +5765,13 @@ body: | ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -5791,16 +5791,16 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s128_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -5821,13 +5821,13 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -5843,13 +5843,13 @@ body: | ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -5866,13 +5866,13 @@ body: | ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -5891,7 +5891,7 @@ body: | ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 3) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5903,38 +5903,38 @@ body: | ; SI-LABEL: name: test_load_local_p1_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-LABEL: name: test_load_local_p1_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-DS128-LABEL: name: test_load_local_p1_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; VI-LABEL: name: test_load_local_p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: test_load_local_p1_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX10-LABEL: name: test_load_local_p1_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -5946,38 +5946,38 @@ body: | ; SI-LABEL: name: test_load_local_p1_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-LABEL: name: test_load_local_p1_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; CI-DS128-LABEL: name: test_load_local_p1_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; VI-LABEL: name: test_load_local_p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-LABEL: name: test_load_local_p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX10-LABEL: name: test_load_local_p1_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 4, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1) %0:_(p3) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -5989,16 +5989,16 @@ body: | ; SI-LABEL: name: test_load_local_p1_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6017,16 +6017,16 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_local_p1_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6045,16 +6045,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-DS128-LABEL: name: test_load_local_p1_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6073,16 +6073,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_local_p1_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6101,16 +6101,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_local_p1_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6129,20 +6129,20 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX10-LABEL: name: test_load_local_p1_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6161,16 +6161,16 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6188,7 +6188,7 @@ body: | ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p3) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load 8, align 2, addrspace 3) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -6200,28 +6200,28 @@ body: | ; SI-LABEL: name: test_load_local_p1_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6269,28 +6269,28 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_local_p1_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6338,28 +6338,28 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-DS128-LABEL: name: test_load_local_p1_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6407,28 +6407,28 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_local_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6468,28 +6468,28 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_local_p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6529,32 +6529,32 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load (p1), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](p1) ; GFX10-LABEL: name: test_load_local_p1_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6594,28 +6594,28 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p1_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -6654,7 +6654,7 @@ body: | ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p3) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 3) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -6666,38 +6666,38 @@ body: | ; SI-LABEL: name: test_load_local_p3_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; SI: $vgpr0 = COPY [[LOAD]](p3) ; CI-LABEL: name: test_load_local_p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; CI: $vgpr0 = COPY [[LOAD]](p3) ; CI-DS128-LABEL: name: test_load_local_p3_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; CI-DS128: $vgpr0 = COPY [[LOAD]](p3) ; VI-LABEL: name: test_load_local_p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; VI: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-LABEL: name: test_load_local_p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3) ; GFX10-LABEL: name: test_load_local_p3_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX10: $vgpr0 = COPY [[LOAD]](p3) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), addrspace 3) ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -6709,10 +6709,10 @@ body: | ; SI-LABEL: name: test_load_local_p3_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6725,10 +6725,10 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-LABEL: name: test_load_local_p3_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6741,10 +6741,10 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-DS128-LABEL: name: test_load_local_p3_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6757,10 +6757,10 @@ body: | ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p3) ; VI-LABEL: name: test_load_local_p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6773,10 +6773,10 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-LABEL: name: test_load_local_p3_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6789,14 +6789,14 @@ body: | ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3) ; GFX10-LABEL: name: test_load_local_p3_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6809,10 +6809,10 @@ body: | ; GFX10: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6824,7 +6824,7 @@ body: | ; GFX10-UNALIGNED: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load 4, align 2, addrspace 3) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 3) $vgpr0 = COPY %1 ... @@ -6836,16 +6836,16 @@ body: | ; SI-LABEL: name: test_load_local_p3_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6868,16 +6868,16 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-LABEL: name: test_load_local_p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6900,16 +6900,16 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-DS128-LABEL: name: test_load_local_p3_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6932,16 +6932,16 @@ body: | ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p3) ; VI-LABEL: name: test_load_local_p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6964,16 +6964,16 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-LABEL: name: test_load_local_p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6996,20 +6996,20 @@ body: | ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p3_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p3) :: (load (p3), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p3) ; GFX10-LABEL: name: test_load_local_p3_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7032,16 +7032,16 @@ body: | ; GFX10: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p3_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7063,7 +7063,7 @@ body: | ; GFX10-UNALIGNED: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p3) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load 4, align 1, addrspace 3) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 3) $vgpr0 = COPY %1 ... @@ -7075,38 +7075,38 @@ body: | ; SI-LABEL: name: test_load_local_p5_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; SI: $vgpr0 = COPY [[LOAD]](p5) ; CI-LABEL: name: test_load_local_p5_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; CI: $vgpr0 = COPY [[LOAD]](p5) ; CI-DS128-LABEL: name: test_load_local_p5_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; CI-DS128: $vgpr0 = COPY [[LOAD]](p5) ; VI-LABEL: name: test_load_local_p5_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; VI: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-LABEL: name: test_load_local_p5_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX9: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5) ; GFX10-LABEL: name: test_load_local_p5_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX10: $vgpr0 = COPY [[LOAD]](p5) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), addrspace 3) ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5) %0:_(p3) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -7118,10 +7118,10 @@ body: | ; SI-LABEL: name: test_load_local_p5_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7134,10 +7134,10 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-LABEL: name: test_load_local_p5_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7150,10 +7150,10 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-DS128-LABEL: name: test_load_local_p5_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7166,10 +7166,10 @@ body: | ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_local_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7182,10 +7182,10 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_local_p5_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7198,14 +7198,14 @@ body: | ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5) ; GFX10-LABEL: name: test_load_local_p5_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7218,10 +7218,10 @@ body: | ; GFX10: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7233,7 +7233,7 @@ body: | ; GFX10-UNALIGNED: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p3) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 3) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 3) $vgpr0 = COPY %1 ... @@ -7245,16 +7245,16 @@ body: | ; SI-LABEL: name: test_load_local_p5_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7277,16 +7277,16 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-LABEL: name: test_load_local_p5_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7309,16 +7309,16 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-DS128-LABEL: name: test_load_local_p5_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7341,16 +7341,16 @@ body: | ; CI-DS128: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_local_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7373,16 +7373,16 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_local_p5_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7405,20 +7405,20 @@ body: | ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-UNALIGNED-LABEL: name: test_load_local_p5_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p3) :: (load (p5), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](p5) ; GFX10-LABEL: name: test_load_local_p5_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7441,16 +7441,16 @@ body: | ; GFX10: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX10-UNALIGNED-LABEL: name: test_load_local_p5_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7472,7 +7472,7 @@ body: | ; GFX10-UNALIGNED: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p3) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 3) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 3) $vgpr0 = COPY %1 ... @@ -7484,7 +7484,7 @@ body: | ; SI-LABEL: name: test_load_local_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7505,7 +7505,7 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-LABEL: name: test_load_local_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7526,7 +7526,7 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-DS128-LABEL: name: test_load_local_v2s8_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7547,7 +7547,7 @@ body: | ; CI-DS128: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_local_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7566,7 +7566,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_local_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7585,7 +7585,7 @@ body: | ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7604,7 +7604,7 @@ body: | ; GFX9-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_load_local_v2s8_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7623,7 +7623,7 @@ body: | ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7641,7 +7641,7 @@ body: | ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX10-UNALIGNED: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 3) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 3) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -7655,10 +7655,10 @@ body: | ; SI-LABEL: name: test_load_local_v2s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) @@ -7667,10 +7667,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_local_v2s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) @@ -7679,10 +7679,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-DS128-LABEL: name: test_load_local_v2s8_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) @@ -7691,10 +7691,10 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_local_v2s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) @@ -7703,17 +7703,17 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_local_v2s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<2 x s8>), align 1, addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7726,26 +7726,26 @@ body: | ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX10-LABEL: name: test_load_local_v2s8_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s8_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 3) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 3) %2:_(<2 x s32>) = G_ANYEXT %1 $vgpr0_vgpr1 = COPY %2 ... @@ -7758,54 +7758,54 @@ body: | ; SI-LABEL: name: test_load_local_v3s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 4, addrspace 1) ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-LABEL: name: test_load_local_v3s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 4, addrspace 1) ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-DS128-LABEL: name: test_load_local_v3s8_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 4, addrspace 1) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI-DS128: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_local_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 4, addrspace 1) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_local_v3s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 4, addrspace 1) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 4, addrspace 1) ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX10-LABEL: name: test_load_local_v3s8_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 4, addrspace 1) ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX10: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 4, addrspace 1) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 4, addrspace 1) ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX10-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, addrspace 1, align 4) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 1, align 4) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -7819,54 +7819,54 @@ body: | ; SI-LABEL: name: test_load_local_v3s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 1, addrspace 3) ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-LABEL: name: test_load_local_v3s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 1, addrspace 3) ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-DS128-LABEL: name: test_load_local_v3s8_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 1, addrspace 3) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI-DS128: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_local_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 1, addrspace 3) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_local_v3s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 1, addrspace 3) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 1, addrspace 3) ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX10-LABEL: name: test_load_local_v3s8_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 1, addrspace 3) ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX10: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s8_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load 3, align 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p3) :: (load (<3 x s8>), align 1, addrspace 3) ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX10-UNALIGNED: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 1, addrspace 3) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 3) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -7880,7 +7880,7 @@ body: | ; SI-LABEL: name: test_load_local_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<4 x s8>), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7896,7 +7896,7 @@ body: | ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-LABEL: name: test_load_local_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<4 x s8>), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7912,7 +7912,7 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-DS128-LABEL: name: test_load_local_v4s8_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<4 x s8>), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7928,7 +7928,7 @@ body: | ; CI-DS128: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_local_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<4 x s8>), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7944,7 +7944,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_local_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<4 x s8>), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7962,7 +7962,7 @@ body: | ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<4 x s8>), addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7980,7 +7980,7 @@ body: | ; GFX9-UNALIGNED: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX10-LABEL: name: test_load_local_v4s8_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<4 x s8>), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7998,7 +7998,7 @@ body: | ; GFX10: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s8_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (<4 x s8>), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8015,7 +8015,7 @@ body: | ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-UNALIGNED: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -8027,7 +8027,7 @@ body: | ; SI-LABEL: name: test_load_local_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<8 x s8>), addrspace 3) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8054,7 +8054,7 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; CI-LABEL: name: test_load_local_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<8 x s8>), addrspace 3) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8081,7 +8081,7 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; CI-DS128-LABEL: name: test_load_local_v8s8_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<8 x s8>), addrspace 3) ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8108,7 +8108,7 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; VI-LABEL: name: test_load_local_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<8 x s8>), addrspace 3) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8135,7 +8135,7 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; GFX9-LABEL: name: test_load_local_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<8 x s8>), addrspace 3) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8166,7 +8166,7 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<8 x s8>), addrspace 3) ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8197,7 +8197,7 @@ body: | ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) ; GFX10-LABEL: name: test_load_local_v8s8_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<8 x s8>), addrspace 3) ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8228,7 +8228,7 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s8_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<8 x s8>), addrspace 3) ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8258,7 +8258,7 @@ body: | ; GFX10-UNALIGNED: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) %0:_(p3) = COPY $vgpr0 - %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -8270,42 +8270,42 @@ body: | ; SI-LABEL: name: test_load_local_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -8334,42 +8334,42 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; CI-LABEL: name: test_load_local_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -8398,43 +8398,43 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; CI-DS128-LABEL: name: test_load_local_v16s8_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -8463,43 +8463,43 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; VI-LABEL: name: test_load_local_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -8528,43 +8528,43 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; GFX9-LABEL: name: test_load_local_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -8601,7 +8601,7 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<16 x s8>), align 1, addrspace 3) ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9-UNALIGNED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) @@ -8654,43 +8654,43 @@ body: | ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) ; GFX10-LABEL: name: test_load_local_v16s8_align16 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -8727,43 +8727,43 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s8_align16 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -8799,7 +8799,7 @@ body: | ; GFX10-UNALIGNED: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) %0:_(p3) = COPY $vgpr0 - %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 3) + %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -8811,38 +8811,38 @@ body: | ; SI-LABEL: name: test_load_local_v2s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CI-LABEL: name: test_load_local_v2s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CI-DS128-LABEL: name: test_load_local_v2s16_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-DS128: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; VI-LABEL: name: test_load_local_v2s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: test_load_local_v2s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: test_load_local_v2s16_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -8854,10 +8854,10 @@ body: | ; SI-LABEL: name: test_load_local_v2s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8870,10 +8870,10 @@ body: | ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-LABEL: name: test_load_local_v2s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8886,10 +8886,10 @@ body: | ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-DS128-LABEL: name: test_load_local_v2s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8902,10 +8902,10 @@ body: | ; CI-DS128: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_local_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -8918,40 +8918,40 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_local_v2s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: test_load_local_v2s16_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 3) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3) $vgpr0 = COPY %1 ... @@ -8963,10 +8963,10 @@ body: | ; SI-LABEL: name: test_load_local_v2s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -8979,9 +8979,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8999,10 +8999,10 @@ body: | ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-LABEL: name: test_load_local_v2s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9015,9 +9015,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -9035,10 +9035,10 @@ body: | ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-DS128-LABEL: name: test_load_local_v2s16_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9051,9 +9051,9 @@ body: | ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -9071,10 +9071,10 @@ body: | ; CI-DS128: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_local_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9085,9 +9085,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -9103,10 +9103,10 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_local_v2s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9117,9 +9117,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -9132,14 +9132,14 @@ body: | ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX10-LABEL: name: test_load_local_v2s16_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9150,9 +9150,9 @@ body: | ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -9165,10 +9165,10 @@ body: | ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s16_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9179,9 +9179,9 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -9193,7 +9193,7 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX10-UNALIGNED: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 3) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3) $vgpr0 = COPY %1 ... @@ -9205,7 +9205,7 @@ body: | ; SI-LABEL: name: test_load_local_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -9213,7 +9213,7 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -9221,7 +9221,7 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -9229,7 +9229,7 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -9237,7 +9237,7 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -9245,7 +9245,7 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -9253,7 +9253,7 @@ body: | ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v3s16_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) @@ -9261,14 +9261,14 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 3) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 3) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -9282,13 +9282,13 @@ body: | ; SI-LABEL: name: test_load_local_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -9312,13 +9312,13 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -9342,13 +9342,13 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -9372,13 +9372,13 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -9402,13 +9402,13 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9423,19 +9423,19 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v3s16_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9450,13 +9450,13 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -9470,7 +9470,7 @@ body: | ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 3) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 3) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -9484,10 +9484,10 @@ body: | ; SI-LABEL: name: test_load_local_v3s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9500,9 +9500,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -9513,9 +9513,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -9543,10 +9543,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9559,9 +9559,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -9572,9 +9572,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -9602,10 +9602,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9618,9 +9618,9 @@ body: | ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -9631,9 +9631,9 @@ body: | ; CI-DS128: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -9661,10 +9661,10 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9675,9 +9675,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -9686,9 +9686,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -9714,10 +9714,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9728,9 +9728,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -9739,9 +9739,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -9762,16 +9762,16 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0 ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v3s16_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9782,9 +9782,9 @@ body: | ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -9793,9 +9793,9 @@ body: | ; GFX10: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -9816,10 +9816,10 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -9830,9 +9830,9 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -9841,9 +9841,9 @@ body: | ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX10-UNALIGNED: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -9863,7 +9863,7 @@ body: | ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 3) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 3) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -9876,38 +9876,38 @@ body: | liveins: $vgpr0 ; SI-LABEL: name: test_load_local_v4s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-LABEL: name: test_load_local_v4s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v4s16_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; VI-LABEL: name: test_load_local_v4s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v4s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v4s16_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -9919,38 +9919,38 @@ body: | ; SI-LABEL: name: test_load_local_v4s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-LABEL: name: test_load_local_v4s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v4s16_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; VI-LABEL: name: test_load_local_v4s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v4s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v4s16_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -9961,16 +9961,16 @@ body: | liveins: $vgpr0 ; SI-LABEL: name: test_load_local_v4s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -9991,16 +9991,16 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_local_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -10021,16 +10021,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v4s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -10051,16 +10051,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_local_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -10081,16 +10081,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -10101,20 +10101,20 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v4s16_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -10125,16 +10125,16 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -10144,7 +10144,7 @@ body: | ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 3) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -10156,10 +10156,10 @@ body: | ; SI-LABEL: name: test_load_local_v4s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10172,9 +10172,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10191,9 +10191,9 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10203,9 +10203,9 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10223,10 +10223,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_local_v4s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10239,9 +10239,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10258,9 +10258,9 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10270,9 +10270,9 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10290,10 +10290,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v4s16_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10306,9 +10306,9 @@ body: | ; CI-DS128: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI-DS128: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10325,9 +10325,9 @@ body: | ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10337,9 +10337,9 @@ body: | ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI-DS128: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -10357,10 +10357,10 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_local_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10371,9 +10371,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -10388,9 +10388,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -10398,9 +10398,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -10416,10 +10416,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10430,9 +10430,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -10444,9 +10444,9 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -10454,9 +10454,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -10470,14 +10470,14 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) ; GFX10-LABEL: name: test_load_local_v4s16_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10488,9 +10488,9 @@ body: | ; GFX10: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX10: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -10502,9 +10502,9 @@ body: | ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -10512,9 +10512,9 @@ body: | ; GFX10: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX10: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX10: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -10528,10 +10528,10 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s16_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -10542,9 +10542,9 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX10-UNALIGNED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -10556,9 +10556,9 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX10-UNALIGNED: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -10566,9 +10566,9 @@ body: | ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX10-UNALIGNED: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -10581,7 +10581,7 @@ body: | ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 3) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -10593,38 +10593,38 @@ body: | ; SI-LABEL: name: test_load_local_v2s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-LABEL: name: test_load_local_v2s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-DS128-LABEL: name: test_load_local_v2s32_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_local_v2s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_load_local_v2s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: test_load_local_v2s32_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -10636,38 +10636,38 @@ body: | ; SI-LABEL: name: test_load_local_v2s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-LABEL: name: test_load_local_v2s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-DS128-LABEL: name: test_load_local_v2s32_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_load_local_v2s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_load_local_v2s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: test_load_local_v2s32_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 4, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -10679,10 +10679,10 @@ body: | ; SI-LABEL: name: test_load_local_v2s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10693,9 +10693,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10706,10 +10706,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_local_v2s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10720,9 +10720,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10733,10 +10733,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-DS128-LABEL: name: test_load_local_v2s32_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10747,9 +10747,9 @@ body: | ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10760,10 +10760,10 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_local_v2s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10774,9 +10774,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10787,10 +10787,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_local_v2s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10801,9 +10801,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10814,14 +10814,14 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: test_load_local_v2s32_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10832,9 +10832,9 @@ body: | ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10845,10 +10845,10 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10859,9 +10859,9 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10871,7 +10871,7 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 2, addrspace 3) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -10883,16 +10883,16 @@ body: | ; SI-LABEL: name: test_load_local_v2s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -10913,13 +10913,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -10938,16 +10938,16 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_local_v2s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -10968,13 +10968,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -10993,16 +10993,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-DS128-LABEL: name: test_load_local_v2s32_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11023,13 +11023,13 @@ body: | ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11048,16 +11048,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_local_v2s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11078,13 +11078,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11103,16 +11103,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_local_v2s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11133,13 +11133,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11158,20 +11158,20 @@ body: | ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: test_load_local_v2s32_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11192,13 +11192,13 @@ body: | ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11217,16 +11217,16 @@ body: | ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s32_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11247,13 +11247,13 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11271,7 +11271,7 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 1, addrspace 3) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -11283,16 +11283,16 @@ body: | ; SI-LABEL: name: test_load_local_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11313,13 +11313,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11335,13 +11335,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -11360,16 +11360,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-LABEL: name: test_load_local_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11390,13 +11390,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11412,13 +11412,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -11437,16 +11437,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-LABEL: name: test_load_local_v3s32_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11467,13 +11467,13 @@ body: | ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11489,13 +11489,13 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -11514,16 +11514,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_load_local_v3s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11544,13 +11544,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11566,13 +11566,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -11591,16 +11591,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_load_local_v3s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11621,13 +11621,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11643,13 +11643,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -11668,20 +11668,20 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX10-LABEL: name: test_load_local_v3s32_align16 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11702,13 +11702,13 @@ body: | ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11724,13 +11724,13 @@ body: | ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -11749,16 +11749,16 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align16 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -11779,13 +11779,13 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -11801,13 +11801,13 @@ body: | ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -11825,7 +11825,7 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 1, addrspace 3) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -11837,73 +11837,73 @@ body: | ; SI-LABEL: name: test_load_local_v3s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-LABEL: name: test_load_local_v3s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128-LABEL: name: test_load_local_v3s32_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_load_local_v3s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_load_local_v3s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX10-LABEL: name: test_load_local_v3s32_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s32_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -11915,46 +11915,46 @@ body: | ; SI-LABEL: name: test_load_local_v4s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 16, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-LABEL: name: test_load_local_v4s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 16, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-DS128-LABEL: name: test_load_local_v4s32_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-LABEL: name: test_load_local_v4s32_align16 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align16 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -11966,46 +11966,46 @@ body: | ; SI-LABEL: name: test_load_local_v4s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-LABEL: name: test_load_local_v4s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-DS128-LABEL: name: test_load_local_v4s32_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-LABEL: name: test_load_local_v4s32_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 8, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 3) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -12017,96 +12017,96 @@ body: | ; SI-LABEL: name: test_load_local_v4s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-LABEL: name: test_load_local_v4s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-DS128-LABEL: name: test_load_local_v4s32_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-LABEL: name: test_load_local_v4s32_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -12118,10 +12118,10 @@ body: | ; SI-LABEL: name: test_load_local_v4s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -12132,9 +12132,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -12144,9 +12144,9 @@ body: | ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12154,9 +12154,9 @@ body: | ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -12168,10 +12168,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-LABEL: name: test_load_local_v4s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -12182,9 +12182,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -12194,9 +12194,9 @@ body: | ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12204,9 +12204,9 @@ body: | ; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -12218,10 +12218,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-DS128-LABEL: name: test_load_local_v4s32_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -12232,9 +12232,9 @@ body: | ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -12244,9 +12244,9 @@ body: | ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12254,9 +12254,9 @@ body: | ; CI-DS128: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -12268,10 +12268,10 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -12282,9 +12282,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -12294,9 +12294,9 @@ body: | ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12304,9 +12304,9 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -12318,10 +12318,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -12332,9 +12332,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -12344,9 +12344,9 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12354,9 +12354,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -12368,14 +12368,14 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-LABEL: name: test_load_local_v4s32_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -12386,9 +12386,9 @@ body: | ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -12398,9 +12398,9 @@ body: | ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12408,9 +12408,9 @@ body: | ; GFX10: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -12422,10 +12422,10 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -12436,9 +12436,9 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -12448,9 +12448,9 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12458,9 +12458,9 @@ body: | ; GFX10-UNALIGNED: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX10-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -12471,7 +12471,7 @@ body: | ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 2, addrspace 3) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -12483,16 +12483,16 @@ body: | ; SI-LABEL: name: test_load_local_v4s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12513,13 +12513,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12536,13 +12536,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -12558,13 +12558,13 @@ body: | ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -12584,16 +12584,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-LABEL: name: test_load_local_v4s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12614,13 +12614,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12637,13 +12637,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -12659,13 +12659,13 @@ body: | ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -12685,16 +12685,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) ; CI-DS128-LABEL: name: test_load_local_v4s32_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12715,13 +12715,13 @@ body: | ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12737,13 +12737,13 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -12760,13 +12760,13 @@ body: | ; CI-DS128: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -12785,16 +12785,16 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_local_v4s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12815,13 +12815,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12837,13 +12837,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -12860,13 +12860,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -12885,16 +12885,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_local_v4s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -12915,13 +12915,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -12937,13 +12937,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -12960,13 +12960,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -12985,20 +12985,20 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s32>), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-LABEL: name: test_load_local_v4s32_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -13019,13 +13019,13 @@ body: | ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -13041,13 +13041,13 @@ body: | ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -13064,13 +13064,13 @@ body: | ; GFX10: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -13089,16 +13089,16 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s32_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -13119,13 +13119,13 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -13141,13 +13141,13 @@ body: | ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -13164,13 +13164,13 @@ body: | ; GFX10-UNALIGNED: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -13188,7 +13188,7 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 1, addrspace 3) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -13200,82 +13200,82 @@ body: | ; SI-LABEL: name: test_load_local_v8s32_align32 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load 8 from unknown-address + 24, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; CI-LABEL: name: test_load_local_v8s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load 8 from unknown-address + 24, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; CI-DS128-LABEL: name: test_load_local_v8s32_align32 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; VI-LABEL: name: test_load_local_v8s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; GFX9-LABEL: name: test_load_local_v8s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; GFX10-LABEL: name: test_load_local_v8s32_align32 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v8s32_align32 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 3) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -13287,38 +13287,142 @@ body: | ; SI-LABEL: name: test_load_local_v16s32_align32 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; SI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; SI: [[LOAD4:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (s64) from unknown-address + 32, align 32, addrspace 3) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; SI: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (s64) from unknown-address + 40, addrspace 3) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; SI: [[LOAD6:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (s64) from unknown-address + 48, align 16, addrspace 3) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 + ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; SI: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (s64) from unknown-address + 56, addrspace 3) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; CI-LABEL: name: test_load_local_v16s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CI: [[LOAD4:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD3]](p3) :: (load (s64) from unknown-address + 32, align 32, addrspace 3) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CI: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD4]](p3) :: (load (s64) from unknown-address + 40, addrspace 3) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI: [[LOAD6:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD5]](p3) :: (load (s64) from unknown-address + 48, align 16, addrspace 3) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 + ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; CI: [[LOAD7:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD6]](p3) :: (load (s64) from unknown-address + 56, addrspace 3) + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>), [[LOAD4]](<2 x s32>), [[LOAD5]](<2 x s32>), [[LOAD6]](<2 x s32>), [[LOAD7]](<2 x s32>) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; CI-DS128-LABEL: name: test_load_local_v16s32_align32 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) - ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) + ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) + ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s128) from unknown-address + 32, align 32, addrspace 3) + ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s128) from unknown-address + 48, addrspace 3) + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; VI-LABEL: name: test_load_local_v16s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; VI: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s128) from unknown-address + 32, align 32, addrspace 3) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; VI: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s128) from unknown-address + 48, addrspace 3) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; GFX9-LABEL: name: test_load_local_v16s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX9: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s128) from unknown-address + 32, align 32, addrspace 3) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX9: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s128) from unknown-address + 48, addrspace 3) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) - ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) + ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) + ; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX9-UNALIGNED: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s128) from unknown-address + 32, align 32, addrspace 3) + ; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX9-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX9-UNALIGNED: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s128) from unknown-address + 48, addrspace 3) + ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; GFX10-LABEL: name: test_load_local_v16s32_align32 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) - ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) + ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) + ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s128) from unknown-address + 32, align 32, addrspace 3) + ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s128) from unknown-address + 48, addrspace 3) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v16s32_align32 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) - ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) + ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s128) from unknown-address + 32, align 32, addrspace 3) + ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load (s128) from unknown-address + 48, addrspace 3) + ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[CONCAT_VECTORS]](<16 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 16, align 32, addrspace 3) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... @@ -13330,66 +13434,66 @@ body: | ; SI-LABEL: name: test_load_local_v2s64_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-LABEL: name: test_load_local_v2s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-DS128-LABEL: name: test_load_local_v2s64_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_local_v2s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_local_v2s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX10-LABEL: name: test_load_local_v2s64_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -13401,28 +13505,28 @@ body: | ; SI-LABEL: name: test_load_local_v2s64_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13469,21 +13573,21 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13529,28 +13633,28 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-LABEL: name: test_load_local_v2s64_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13597,21 +13701,21 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13657,28 +13761,28 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-DS128-LABEL: name: test_load_local_v2s64_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13725,21 +13829,21 @@ body: | ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI-DS128: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C8]](s32) @@ -13785,28 +13889,28 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_local_v2s64_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13845,21 +13949,21 @@ body: | ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -13897,28 +14001,28 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_local_v2s64_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -13957,21 +14061,21 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -14009,32 +14113,32 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) ; GFX10-LABEL: name: test_load_local_v2s64_align16 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -14073,21 +14177,21 @@ body: | ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX10: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX10: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -14125,28 +14229,28 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s64_align16 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX10-UNALIGNED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]] @@ -14185,21 +14289,21 @@ body: | ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) ; GFX10-UNALIGNED: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32) - ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32) - ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10-UNALIGNED: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]] ; GFX10-UNALIGNED: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32) @@ -14236,7 +14340,7 @@ body: | ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 3) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -14248,36 +14352,36 @@ body: | ; SI-LABEL: name: test_load_local_v3s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-LABEL: name: test_load_local_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-DS128-LABEL: name: test_load_local_v3s64_align32 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; CI-DS128: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -14285,10 +14389,10 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_local_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -14296,10 +14400,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_local_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -14307,10 +14411,10 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-UNALIGNED: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX9-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -14318,10 +14422,10 @@ body: | ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX10-LABEL: name: test_load_local_v3s64_align32 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -14329,17 +14433,17 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 3) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -14353,82 +14457,82 @@ body: | ; SI-LABEL: name: test_load_local_v4s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load 8 from unknown-address + 24, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-LABEL: name: test_load_local_v4s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 16, align 16, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load 8 from unknown-address + 24, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load (s64) from unknown-address + 24, addrspace 3) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-DS128-LABEL: name: test_load_local_v4s64_align32 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; VI-LABEL: name: test_load_local_v4s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; GFX9-LABEL: name: test_load_local_v4s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; GFX9-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; GFX10-LABEL: name: test_load_local_v4s64_align32 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v4s64_align32 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (s128), align 32, addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16 from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load (s128) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 3) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -14440,104 +14544,104 @@ body: | ; SI-LABEL: name: test_load_local_v2p1_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-LABEL: name: test_load_local_v2p1_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 8, align 4, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 8, align 4, addrspace 3) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-DS128-LABEL: name: test_load_local_v2p1_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI-DS128: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_local_v2p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_local_v2p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 4, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX10-LABEL: name: test_load_local_v2p1_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p1_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -14549,38 +14653,38 @@ body: | ; SI-LABEL: name: test_load_local_v2p3_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; CI-LABEL: name: test_load_local_v2p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; CI-DS128-LABEL: name: test_load_local_v2p3_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; VI-LABEL: name: test_load_local_v2p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-LABEL: name: test_load_local_v2p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX10-LABEL: name: test_load_local_v2p3_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2p3_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load (<2 x p3>), addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -14592,38 +14696,38 @@ body: | ; SI-LABEL: name: test_extload_local_s32_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-LABEL: name: test_extload_local_s32_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; CI-DS128-LABEL: name: test_extload_local_s32_from_1_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_extload_local_s32_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_1_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 3) + %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -14635,38 +14739,38 @@ body: | ; SI-LABEL: name: test_extload_local_s32_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-LABEL: name: test_extload_local_s32_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; CI-DS128-LABEL: name: test_extload_local_s32_from_2_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_extload_local_s32_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10: $vgpr0 = COPY [[LOAD]](s32) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s32_from_2_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED: $vgpr0 = COPY [[LOAD]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 2, align 4, addrspace 3) + %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) $vgpr0 = COPY %1 ... @@ -14679,46 +14783,46 @@ body: | ; SI-LABEL: name: test_extload_local_s64_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -14730,46 +14834,46 @@ body: | ; SI-LABEL: name: test_extload_local_s64_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -14781,46 +14885,46 @@ body: | ; SI-LABEL: name: test_extload_local_s64_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_4_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_4_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -14832,7 +14936,7 @@ body: | ; SI-LABEL: name: test_extload_local_s128_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -14840,7 +14944,7 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-LABEL: name: test_extload_local_s128_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -14848,7 +14952,7 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-DS128-LABEL: name: test_extload_local_s128_from_4_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CI-DS128: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI-DS128: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; CI-DS128: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -14856,7 +14960,7 @@ body: | ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_extload_local_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -14864,7 +14968,7 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -14872,7 +14976,7 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX9-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX9-UNALIGNED: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -14880,7 +14984,7 @@ body: | ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX10-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX10: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -14888,14 +14992,14 @@ body: | ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s128_from_4_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; GFX10-UNALIGNED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX10-UNALIGNED: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX10-UNALIGNED: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p3) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -14907,46 +15011,46 @@ body: | ; SI-LABEL: name: test_extload_local_s64_from_2_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_2_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_2_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_2_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), align 4, addrspace 3) ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -14958,46 +15062,46 @@ body: | ; SI-LABEL: name: test_extload_local_s64_from_1_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_extload_local_s64_from_1_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-DS128-LABEL: name: test_extload_local_s64_from_1_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; CI-DS128: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI-DS128: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_extload_local_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX9-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_s64_from_1_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), align 4, addrspace 3) ; GFX10-UNALIGNED: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 3) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -15009,38 +15113,38 @@ body: | ; SI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_extload_local_v2s32_from_4_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 1, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 3) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -15052,38 +15156,38 @@ body: | ; SI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_extload_local_v2s32_from_4_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, align 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), align 2, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 3) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -15095,38 +15199,38 @@ body: | ; SI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-DS128-LABEL: name: test_extload_local_v2s32_from_4_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; CI-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_extload_local_v2s32_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s32_from_4_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (<2 x s16>), addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 3) $vgpr0_vgpr1 = COPY %1 ... @@ -15138,38 +15242,38 @@ body: | ; SI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; CI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; CI-DS128-LABEL: name: test_extload_local_v3s32_from_6_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_extload_local_v3s32_from_6_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX10-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v3s32_from_6_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 6, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (<3 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 6, align 4, addrspace 3) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -15181,59 +15285,59 @@ body: | ; SI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-DS128-LABEL: name: test_extload_local_v4s32_from_8_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_extload_local_v4s32_from_8_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v4s32_from_8_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), align 4, addrspace 3) ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p3) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 3) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... --- -name: test_extload_local_v2s96_from_24_align1 +name: test_load_local_v2s96_align1 body: | bb.0: liveins: $vgpr0 - ; SI-LABEL: name: test_extload_local_v2s96_from_24_align1 + ; SI-LABEL: name: test_load_local_v2s96_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15254,13 +15358,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15276,13 +15380,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -15301,13 +15405,13 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -15323,13 +15427,13 @@ body: | ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; SI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load 1 from unknown-address + 16, addrspace 3) + ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load 1 from unknown-address + 17, addrspace 3) + ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3) + ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3) + ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -15345,13 +15449,13 @@ body: | ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; SI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3) + ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) ; SI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load 1 from unknown-address + 21, addrspace 3) + ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) ; SI: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3) + ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) ; SI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3) + ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -15372,18 +15476,18 @@ body: | ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; CI-LABEL: name: test_extload_local_v2s96_from_24_align1 + ; CI-LABEL: name: test_load_local_v2s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15404,13 +15508,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15426,13 +15530,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -15451,13 +15555,13 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -15473,13 +15577,13 @@ body: | ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; CI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load 1 from unknown-address + 16, addrspace 3) + ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) ; CI: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load 1 from unknown-address + 17, addrspace 3) + ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) ; CI: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3) + ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) ; CI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3) + ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -15495,13 +15599,13 @@ body: | ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; CI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3) + ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) ; CI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load 1 from unknown-address + 21, addrspace 3) + ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) ; CI: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3) + ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) ; CI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3) + ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -15522,18 +15626,18 @@ body: | ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align1 + ; CI-DS128-LABEL: name: test_load_local_v2s96_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15554,13 +15658,13 @@ body: | ; CI-DS128: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI-DS128: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15576,13 +15680,13 @@ body: | ; CI-DS128: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI-DS128: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -15601,13 +15705,13 @@ body: | ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; CI-DS128: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; CI-DS128: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; CI-DS128: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; CI-DS128: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; CI-DS128: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI-DS128: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI-DS128: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -15623,13 +15727,13 @@ body: | ; CI-DS128: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; CI-DS128: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; CI-DS128: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; CI-DS128: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load 1 from unknown-address + 16, addrspace 3) + ; CI-DS128: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) ; CI-DS128: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; CI-DS128: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load 1 from unknown-address + 17, addrspace 3) + ; CI-DS128: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) ; CI-DS128: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; CI-DS128: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3) + ; CI-DS128: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) ; CI-DS128: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; CI-DS128: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3) + ; CI-DS128: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) ; CI-DS128: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; CI-DS128: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; CI-DS128: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -15645,13 +15749,13 @@ body: | ; CI-DS128: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; CI-DS128: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; CI-DS128: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; CI-DS128: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3) + ; CI-DS128: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) ; CI-DS128: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; CI-DS128: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load 1 from unknown-address + 21, addrspace 3) + ; CI-DS128: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) ; CI-DS128: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; CI-DS128: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3) + ; CI-DS128: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) ; CI-DS128: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; CI-DS128: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3) + ; CI-DS128: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) ; CI-DS128: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; CI-DS128: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; CI-DS128: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -15672,18 +15776,18 @@ body: | ; CI-DS128: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; VI-LABEL: name: test_extload_local_v2s96_from_24_align1 + ; VI-LABEL: name: test_load_local_v2s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15704,13 +15808,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15726,13 +15830,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -15751,13 +15855,13 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -15773,13 +15877,13 @@ body: | ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; VI: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load 1 from unknown-address + 16, addrspace 3) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load 1 from unknown-address + 17, addrspace 3) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -15795,13 +15899,13 @@ body: | ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; VI: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load 1 from unknown-address + 21, addrspace 3) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -15822,18 +15926,18 @@ body: | ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align1 + ; GFX9-LABEL: name: test_load_local_v2s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -15854,13 +15958,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -15876,13 +15980,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -15901,13 +16005,13 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -15923,13 +16027,13 @@ body: | ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load 1 from unknown-address + 16, addrspace 3) + ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load 1 from unknown-address + 17, addrspace 3) + ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3) + ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3) + ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -15945,13 +16049,13 @@ body: | ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3) + ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load 1 from unknown-address + 21, addrspace 3) + ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3) + ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3) + ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -15972,30 +16076,30 @@ body: | ; GFX9: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align1 + ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 1, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 from unknown-address + 12, align 1, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s96) from unknown-address + 12, align 1, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX10-LABEL: name: test_extload_local_v2s96_from_24_align1 + ; GFX10-LABEL: name: test_load_local_v2s96_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -16016,13 +16120,13 @@ body: | ; GFX10: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16038,13 +16142,13 @@ body: | ; GFX10: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16063,13 +16167,13 @@ body: | ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX10: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -16085,13 +16189,13 @@ body: | ; GFX10: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; GFX10: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; GFX10: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; GFX10: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load 1 from unknown-address + 16, addrspace 3) + ; GFX10: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) ; GFX10: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; GFX10: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load 1 from unknown-address + 17, addrspace 3) + ; GFX10: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) ; GFX10: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; GFX10: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3) + ; GFX10: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) ; GFX10: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; GFX10: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3) + ; GFX10: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; GFX10: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -16107,13 +16211,13 @@ body: | ; GFX10: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; GFX10: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; GFX10: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; GFX10: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3) + ; GFX10: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) ; GFX10: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; GFX10: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load 1 from unknown-address + 21, addrspace 3) + ; GFX10: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) ; GFX10: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; GFX10: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3) + ; GFX10: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) ; GFX10: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; GFX10: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3) + ; GFX10: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; GFX10: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -16134,18 +16238,18 @@ body: | ; GFX10: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align1 + ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 1, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s8) from unknown-address + 1, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 1 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s8) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 1 from unknown-address + 3, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s8) from unknown-address + 3, addrspace 3) ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -16166,13 +16270,13 @@ body: | ; GFX10-UNALIGNED: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX10-UNALIGNED: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 1 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s8) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 1 from unknown-address + 5, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 1 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s8) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1 from unknown-address + 7, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s8) from unknown-address + 7, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16188,13 +16292,13 @@ body: | ; GFX10-UNALIGNED: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s8) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1 from unknown-address + 9, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s8) from unknown-address + 9, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s8) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1 from unknown-address + 11, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s8) from unknown-address + 11, addrspace 3) ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16213,13 +16317,13 @@ body: | ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load (s8) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1 from unknown-address + 13, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load (s8) from unknown-address + 13, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load (s8) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1 from unknown-address + 15, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load (s8) from unknown-address + 15, addrspace 3) ; GFX10-UNALIGNED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX10-UNALIGNED: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX10-UNALIGNED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -16235,13 +16339,13 @@ body: | ; GFX10-UNALIGNED: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; GFX10-UNALIGNED: [[PTR_ADD15:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; GFX10-UNALIGNED: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load 1 from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p3) :: (load (s8) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD16:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load 1 from unknown-address + 17, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p3) :: (load (s8) from unknown-address + 17, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD17:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load 1 from unknown-address + 18, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p3) :: (load (s8) from unknown-address + 18, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD18:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load 1 from unknown-address + 19, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p3) :: (load (s8) from unknown-address + 19, addrspace 3) ; GFX10-UNALIGNED: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; GFX10-UNALIGNED: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; GFX10-UNALIGNED: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -16257,13 +16361,13 @@ body: | ; GFX10-UNALIGNED: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; GFX10-UNALIGNED: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; GFX10-UNALIGNED: [[PTR_ADD19:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load 1 from unknown-address + 20, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p3) :: (load (s8) from unknown-address + 20, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD20:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load 1 from unknown-address + 21, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p3) :: (load (s8) from unknown-address + 21, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD21:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load 1 from unknown-address + 22, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p3) :: (load (s8) from unknown-address + 22, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD22:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; GFX10-UNALIGNED: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load 1 from unknown-address + 23, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p3) :: (load (s8) from unknown-address + 23, addrspace 3) ; GFX10-UNALIGNED: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; GFX10-UNALIGNED: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; GFX10-UNALIGNED: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -16285,7 +16389,7 @@ body: | ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 3) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -16293,17 +16397,17 @@ body: | ... --- -name: test_extload_local_v2s96_from_24_align2 +name: test_load_local_v2s96_align2 body: | bb.0: liveins: $vgpr0 - ; SI-LABEL: name: test_extload_local_v2s96_from_24_align2 + ; SI-LABEL: name: test_load_local_v2s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16314,9 +16418,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16325,9 +16429,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16338,9 +16442,9 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16348,9 +16452,9 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16358,9 +16462,9 @@ body: | ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; SI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16373,12 +16477,12 @@ body: | ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; CI-LABEL: name: test_extload_local_v2s96_from_24_align2 + ; CI-LABEL: name: test_load_local_v2s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16389,9 +16493,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16400,9 +16504,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16413,9 +16517,9 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16423,9 +16527,9 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16433,9 +16537,9 @@ body: | ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; CI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16448,12 +16552,12 @@ body: | ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align2 + ; CI-DS128-LABEL: name: test_load_local_v2s96_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16464,9 +16568,9 @@ body: | ; CI-DS128: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-DS128: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; CI-DS128: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI-DS128: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI-DS128: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16475,9 +16579,9 @@ body: | ; CI-DS128: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-DS128: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; CI-DS128: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; CI-DS128: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; CI-DS128: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI-DS128: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI-DS128: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16488,9 +16592,9 @@ body: | ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; CI-DS128: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; CI-DS128: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; CI-DS128: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; CI-DS128: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI-DS128: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI-DS128: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16498,9 +16602,9 @@ body: | ; CI-DS128: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; CI-DS128: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-DS128: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3) + ; CI-DS128: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) ; CI-DS128: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3) + ; CI-DS128: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) ; CI-DS128: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI-DS128: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; CI-DS128: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16508,9 +16612,9 @@ body: | ; CI-DS128: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; CI-DS128: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; CI-DS128: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3) + ; CI-DS128: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) ; CI-DS128: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3) + ; CI-DS128: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) ; CI-DS128: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; CI-DS128: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; CI-DS128: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16523,12 +16627,12 @@ body: | ; CI-DS128: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; VI-LABEL: name: test_extload_local_v2s96_from_24_align2 + ; VI-LABEL: name: test_load_local_v2s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16539,9 +16643,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16550,9 +16654,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16563,9 +16667,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16573,9 +16677,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16583,9 +16687,9 @@ body: | ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16598,12 +16702,12 @@ body: | ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align2 + ; GFX9-LABEL: name: test_load_local_v2s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16614,9 +16718,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16625,9 +16729,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16638,9 +16742,9 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16648,9 +16752,9 @@ body: | ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16658,9 +16762,9 @@ body: | ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16673,24 +16777,24 @@ body: | ; GFX9: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align2 + ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 2, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 from unknown-address + 12, align 2, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s96) from unknown-address + 12, align 2, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX10-LABEL: name: test_extload_local_v2s96_from_24_align2 + ; GFX10-LABEL: name: test_load_local_v2s96_align2 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16701,9 +16805,9 @@ body: | ; GFX10: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16712,9 +16816,9 @@ body: | ; GFX10: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX10: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX10: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX10: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16725,9 +16829,9 @@ body: | ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX10: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX10: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX10: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX10: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16735,9 +16839,9 @@ body: | ; GFX10: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; GFX10: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; GFX10: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3) + ; GFX10: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) ; GFX10: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3) + ; GFX10: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16745,9 +16849,9 @@ body: | ; GFX10: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; GFX10: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; GFX10: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3) + ; GFX10: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) ; GFX10: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3) + ; GFX10: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; GFX10: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16760,12 +16864,12 @@ body: | ; GFX10: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align2 + ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align2 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 2 from unknown-address + 2, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s16) from unknown-address + 2, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -16776,9 +16880,9 @@ body: | ; GFX10-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX10-UNALIGNED: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 from unknown-address + 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2 from unknown-address + 6, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s16) from unknown-address + 6, addrspace 3) ; GFX10-UNALIGNED: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX10-UNALIGNED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX10-UNALIGNED: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -16787,9 +16891,9 @@ body: | ; GFX10-UNALIGNED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX10-UNALIGNED: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load (s16) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2 from unknown-address + 10, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s16) from unknown-address + 10, addrspace 3) ; GFX10-UNALIGNED: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX10-UNALIGNED: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX10-UNALIGNED: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -16800,9 +16904,9 @@ body: | ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2 from unknown-address + 12, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load (s16) from unknown-address + 12, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2 from unknown-address + 14, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load (s16) from unknown-address + 14, addrspace 3) ; GFX10-UNALIGNED: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX10-UNALIGNED: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX10-UNALIGNED: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -16810,9 +16914,9 @@ body: | ; GFX10-UNALIGNED: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; GFX10-UNALIGNED: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; GFX10-UNALIGNED: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 2 from unknown-address + 16, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load (s16) from unknown-address + 16, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 2 from unknown-address + 18, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load (s16) from unknown-address + 18, addrspace 3) ; GFX10-UNALIGNED: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX10-UNALIGNED: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; GFX10-UNALIGNED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -16820,9 +16924,9 @@ body: | ; GFX10-UNALIGNED: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; GFX10-UNALIGNED: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; GFX10-UNALIGNED: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 2 from unknown-address + 20, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load (s16) from unknown-address + 20, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 2 from unknown-address + 22, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load (s16) from unknown-address + 22, addrspace 3) ; GFX10-UNALIGNED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; GFX10-UNALIGNED: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; GFX10-UNALIGNED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -16836,7 +16940,7 @@ body: | ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 3) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -16844,25 +16948,25 @@ body: | ... --- -name: test_extload_local_v2s96_from_24_align4 +name: test_load_local_v2s96_align4 body: | bb.0: liveins: $vgpr0 - ; SI-LABEL: name: test_extload_local_v2s96_from_24_align4 + ; SI-LABEL: name: test_load_local_v2s96_align4 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -16870,20 +16974,20 @@ body: | ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-LABEL: name: test_extload_local_v2s96_from_24_align4 + ; CI-LABEL: name: test_load_local_v2s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; CI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -16891,20 +16995,20 @@ body: | ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align4 + ; CI-DS128-LABEL: name: test_load_local_v2s96_align4 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; CI-DS128: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; CI-DS128: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; CI-DS128: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; CI-DS128: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -16912,20 +17016,20 @@ body: | ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; VI-LABEL: name: test_extload_local_v2s96_from_24_align4 + ; VI-LABEL: name: test_load_local_v2s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -16933,20 +17037,20 @@ body: | ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align4 + ; GFX9-LABEL: name: test_load_local_v2s96_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -16954,32 +17058,32 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align4 + ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 4, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 from unknown-address + 12, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s96) from unknown-address + 12, align 4, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX10-LABEL: name: test_extload_local_v2s96_from_24_align4 + ; GFX10-LABEL: name: test_load_local_v2s96_align4 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; GFX10: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; GFX10: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -16987,20 +17091,20 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align4 + ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align4 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 4, addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, addrspace 3) ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; GFX10-UNALIGNED: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-UNALIGNED: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; GFX10-UNALIGNED: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -17009,7 +17113,7 @@ body: | ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 3) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -17017,25 +17121,25 @@ body: | ... --- -name: test_extload_local_v2s96_from_24_align16 +name: test_load_local_v2s96_align16 body: | bb.0: liveins: $vgpr0 - ; SI-LABEL: name: test_extload_local_v2s96_from_24_align16 + ; SI-LABEL: name: test_load_local_v2s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 16, addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; SI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -17043,20 +17147,20 @@ body: | ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-LABEL: name: test_extload_local_v2s96_from_24_align16 + ; CI-LABEL: name: test_load_local_v2s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load (s64), align 16, addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4 from unknown-address + 8, align 8, addrspace 3) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load (s32) from unknown-address + 8, align 8, addrspace 3) ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; CI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; CI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<2 x s32>) ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[LOAD3]](s32) ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) @@ -17064,16 +17168,16 @@ body: | ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-DS128-LABEL: name: test_extload_local_v2s96_from_24_align16 + ; CI-DS128-LABEL: name: test_load_local_v2s96_align16 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 16, addrspace 3) ; CI-DS128: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI-DS128: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; CI-DS128: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; CI-DS128: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI-DS128: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; CI-DS128: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; CI-DS128: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) ; CI-DS128: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) @@ -17081,16 +17185,16 @@ body: | ; CI-DS128: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; VI-LABEL: name: test_extload_local_v2s96_from_24_align16 + ; VI-LABEL: name: test_load_local_v2s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 16, addrspace 3) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) @@ -17098,16 +17202,16 @@ body: | ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align16 + ; GFX9-LABEL: name: test_load_local_v2s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 16, addrspace 3) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) @@ -17115,28 +17219,28 @@ body: | ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align16 + ; GFX9-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 16, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 12 from unknown-address + 12, align 4, addrspace 3) + ; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s96) from unknown-address + 12, align 4, addrspace 3) ; GFX9-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX9-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX10-LABEL: name: test_extload_local_v2s96_from_24_align16 + ; GFX10-LABEL: name: test_load_local_v2s96_align16 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 16, addrspace 3) ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; GFX10: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; GFX10: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) ; GFX10: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) @@ -17144,16 +17248,16 @@ body: | ; GFX10: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX10-UNALIGNED-LABEL: name: test_extload_local_v2s96_from_24_align16 + ; GFX10-UNALIGNED-LABEL: name: test_load_local_v2s96_align16 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load (s96), align 16, addrspace 3) ; GFX10-UNALIGNED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX10-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8 from unknown-address + 12, align 4, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 12, align 4, addrspace 3) ; GFX10-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX10-UNALIGNED: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 4 from unknown-address + 20, addrspace 3) + ; GFX10-UNALIGNED: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s32) from unknown-address + 20, addrspace 3) ; GFX10-UNALIGNED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; GFX10-UNALIGNED: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) ; GFX10-UNALIGNED: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) @@ -17162,7 +17266,7 @@ body: | ; GFX10-UNALIGNED: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX10-UNALIGNED: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p3) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 16, addrspace 3) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 3) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir index 557219d52074b..52d0cafc06340 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir @@ -41,13 +41,13 @@ body: | liveins: $vgpr0_vgpr1 ; SI-LABEL: name: widen_load_range0_tbaa ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !range !0, !tbaa !1) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !range !0, !tbaa !1) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 @@ -61,10 +61,10 @@ body: | liveins: $vgpr0_vgpr1 ; SI-LABEL: name: widen_load_range1_tbaa ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !range !0, !tbaa !1) + %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !range !0, !tbaa !1) $vgpr0 = COPY %1 ... @@ -75,13 +75,13 @@ body: | liveins: $vgpr0_vgpr1 ; SI-LABEL: name: widen_load_tbaa0 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !tbaa !1) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !tbaa !1) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 @@ -95,10 +95,10 @@ body: | liveins: $vgpr0_vgpr1 ; SI-LABEL: name: widen_load_tbaa1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), !tbaa !1, addrspace 1) ; SI: $vgpr0 = COPY [[LOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !tbaa !1) + %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1, !tbaa !1) $vgpr0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index 727edc3ce5fe0..5f88e8f90e6e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -13,34 +13,34 @@ body: | ; SI-LABEL: name: test_load_private_s1_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_private_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_private_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_private_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s1) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + %1:_(s1) = G_LOAD %0 :: (load (s1), align 1, addrspace 5) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -53,34 +53,34 @@ body: | ; SI-LABEL: name: test_load_private_s2_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_private_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_private_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_private_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s2) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + %1:_(s2) = G_LOAD %0 :: (load (s2), align 1, addrspace 5) %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... @@ -93,26 +93,26 @@ body: | ; SI-LABEL: name: test_load_private_s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_private_s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_private_s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s8) = G_LOAD %0 :: (load 1, align 4, addrspace 5) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -125,26 +125,26 @@ body: | ; SI-LABEL: name: test_load_private_s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_private_s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_private_s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s8) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + %1:_(s8) = G_LOAD %0 :: (load (s8), align 1, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -157,26 +157,26 @@ body: | ; SI-LABEL: name: test_load_private_s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_private_s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_private_s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load 2, align 4, addrspace 5) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -189,26 +189,26 @@ body: | ; SI-LABEL: name: test_load_private_s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_private_s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_private_s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load 2, align 2, addrspace 5) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 2, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -221,10 +221,10 @@ body: | ; SI-LABEL: name: test_load_private_s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -239,10 +239,10 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-LABEL: name: test_load_private_s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -257,10 +257,10 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_private_s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -273,10 +273,10 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_private_s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -288,7 +288,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s16) = G_LOAD %0 :: (load 2, align 1, addrspace 5) + %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -301,22 +301,22 @@ body: | ; SI-LABEL: name: test_load_private_s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-LABEL: name: test_load_private_s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_load_private_s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_load_private_s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -328,10 +328,10 @@ body: | ; SI-LABEL: name: test_load_private_s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -343,10 +343,10 @@ body: | ; SI: $vgpr0 = COPY [[OR]](s32) ; CI-LABEL: name: test_load_private_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -358,10 +358,10 @@ body: | ; CI: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_load_private_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -373,10 +373,10 @@ body: | ; VI: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_load_private_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -387,7 +387,7 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: $vgpr0 = COPY [[OR]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, align 2, addrspace 5) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 5) $vgpr0 = COPY %1 ... @@ -399,16 +399,16 @@ body: | ; SI-LABEL: name: test_load_private_s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -430,16 +430,16 @@ body: | ; SI: $vgpr0 = COPY [[OR2]](s32) ; CI-LABEL: name: test_load_private_s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -461,16 +461,16 @@ body: | ; CI: $vgpr0 = COPY [[OR2]](s32) ; VI-LABEL: name: test_load_private_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -492,16 +492,16 @@ body: | ; VI: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_load_private_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -522,7 +522,7 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: $vgpr0 = COPY [[OR2]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, align 1, addrspace 5) + %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 5) $vgpr0 = COPY %1 ... @@ -534,26 +534,26 @@ body: | ; SI-LABEL: name: test_load_private_s24_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_private_s24_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_private_s24_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s24_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load 3, align 8, addrspace 5) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 8, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -566,26 +566,26 @@ body: | ; SI-LABEL: name: test_load_private_s24_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; CI-LABEL: name: test_load_private_s24_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_load_private_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load 3, align 4, addrspace 5) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 4, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -598,10 +598,10 @@ body: | ; SI-LABEL: name: test_load_private_s24_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -635,10 +635,10 @@ body: | ; SI: $vgpr0 = COPY [[COPY5]](s32) ; CI-LABEL: name: test_load_private_s24_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -672,10 +672,10 @@ body: | ; CI: $vgpr0 = COPY [[COPY5]](s32) ; VI-LABEL: name: test_load_private_s24_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -705,10 +705,10 @@ body: | ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s24_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 2, align 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, align 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -737,7 +737,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load 3, align 2, addrspace 5) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 2, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -750,13 +750,13 @@ body: | ; SI-LABEL: name: test_load_private_s24_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -785,13 +785,13 @@ body: | ; SI: $vgpr0 = COPY [[COPY4]](s32) ; CI-LABEL: name: test_load_private_s24_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -820,13 +820,13 @@ body: | ; CI: $vgpr0 = COPY [[COPY4]](s32) ; VI-LABEL: name: test_load_private_s24_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -851,13 +851,13 @@ body: | ; VI: $vgpr0 = COPY [[COPY1]](s32) ; GFX9-LABEL: name: test_load_private_s24_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) @@ -881,7 +881,7 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s24) = G_LOAD %0 :: (load 3, align 1, addrspace 5) + %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -894,10 +894,10 @@ body: | ; SI-LABEL: name: test_load_private_s48_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -917,10 +917,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; CI-LABEL: name: test_load_private_s48_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -940,10 +940,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; VI-LABEL: name: test_load_private_s48_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -963,10 +963,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; GFX9-LABEL: name: test_load_private_s48_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -985,7 +985,7 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64) ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s48) = G_LOAD %0 :: (load 6, align 8, addrspace 5) + %1:_(s48) = G_LOAD %0 :: (load (s48), align 8, addrspace 5) %2:_(s64) = G_ANYEXT %1 $vgpr0_vgpr1 = COPY %2 ... @@ -998,38 +998,98 @@ body: | ; SI-LABEL: name: test_load_private_s64_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) - ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; SI: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; CI-LABEL: name: test_load_private_s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) - ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CI: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CI: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; VI-LABEL: name: test_load_private_s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) - ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; VI: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; VI: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; GFX9-LABEL: name: test_load_private_s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s48), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -1041,38 +1101,38 @@ body: | ; SI-LABEL: name: test_load_private_s64_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_private_s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_private_s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_private_s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -1084,10 +1144,10 @@ body: | ; SI-LABEL: name: test_load_private_s64_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1098,9 +1158,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1111,10 +1171,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_private_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1125,9 +1185,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1138,10 +1198,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_private_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1152,9 +1212,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1165,10 +1225,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_private_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1179,9 +1239,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1191,7 +1251,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 8, align 2, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -1203,16 +1263,16 @@ body: | ; SI-LABEL: name: test_load_private_s64_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1233,13 +1293,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1258,16 +1318,16 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; CI-LABEL: name: test_load_private_s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1288,13 +1348,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1313,16 +1373,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; VI-LABEL: name: test_load_private_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1343,13 +1403,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1368,16 +1428,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_load_private_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1398,13 +1458,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1422,7 +1482,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 8, align 1, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -1434,16 +1494,16 @@ body: | ; SI-LABEL: name: test_load_private_s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1464,13 +1524,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1486,13 +1546,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1512,16 +1572,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_private_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1542,13 +1602,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1564,13 +1624,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1590,16 +1650,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_private_s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1620,13 +1680,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1642,13 +1702,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1668,16 +1728,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_private_s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -1698,13 +1758,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1720,13 +1780,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -1745,7 +1805,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 56) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 56) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1757,54 +1817,54 @@ body: | ; SI-LABEL: name: test_load_private_s96_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_private_s96_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_private_s96_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_private_s96_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 5) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 8, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1816,54 +1876,54 @@ body: | ; SI-LABEL: name: test_load_private_s96_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_private_s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_private_s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_private_s96_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 5) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -1875,10 +1935,10 @@ body: | ; SI-LABEL: name: test_load_private_s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1889,9 +1949,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1900,9 +1960,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1914,10 +1974,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_private_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1928,9 +1988,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1939,9 +1999,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1953,10 +2013,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_private_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -1967,9 +2027,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -1978,9 +2038,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -1992,10 +2052,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_private_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2006,9 +2066,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -2017,9 +2077,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2030,7 +2090,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 2, addrspace 5) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -2042,16 +2102,16 @@ body: | ; SI-LABEL: name: test_load_private_s96_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2072,13 +2132,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2094,13 +2154,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2120,16 +2180,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-LABEL: name: test_load_private_s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2150,13 +2210,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2172,13 +2232,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2198,16 +2258,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; VI-LABEL: name: test_load_private_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2228,13 +2288,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2250,13 +2310,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2276,16 +2336,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; GFX9-LABEL: name: test_load_private_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2306,13 +2366,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2328,13 +2388,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2353,7 +2413,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(s96) = G_LOAD %0 :: (load 12, align 1, addrspace 5) + %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -2365,16 +2425,16 @@ body: | ; SI-LABEL: name: test_load_private_s128_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2395,13 +2455,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2417,13 +2477,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2440,13 +2500,13 @@ body: | ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2466,16 +2526,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_private_s128_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2496,13 +2556,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2518,13 +2578,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2541,13 +2601,13 @@ body: | ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2567,16 +2627,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_private_s128_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2597,13 +2657,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2619,13 +2679,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2642,13 +2702,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2668,16 +2728,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_private_s128_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -2698,13 +2758,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2720,13 +2780,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -2743,13 +2803,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -2768,7 +2828,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 56) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 56) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -2780,66 +2840,66 @@ body: | ; SI-LABEL: name: test_load_private_s128_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_private_s128_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_private_s128_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_private_s128_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 8, addrspace 5) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -2851,66 +2911,66 @@ body: | ; SI-LABEL: name: test_load_private_s128_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_private_s128_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_private_s128_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_private_s128_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -2922,10 +2982,10 @@ body: | ; SI-LABEL: name: test_load_private_s128_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2936,9 +2996,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -2947,9 +3007,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -2958,9 +3018,9 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -2972,10 +3032,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_private_s128_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -2986,9 +3046,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -2997,9 +3057,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3008,9 +3068,9 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -3022,10 +3082,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_private_s128_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3036,9 +3096,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3047,9 +3107,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3058,9 +3118,9 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -3072,10 +3132,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_private_s128_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3086,9 +3146,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3097,9 +3157,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3108,9 +3168,9 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -3121,7 +3181,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 2, addrspace 5) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -3133,16 +3193,16 @@ body: | ; SI-LABEL: name: test_load_private_s128_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3163,13 +3223,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3185,13 +3245,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3208,13 +3268,13 @@ body: | ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -3234,16 +3294,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; CI-LABEL: name: test_load_private_s128_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3264,13 +3324,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3286,13 +3346,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3309,13 +3369,13 @@ body: | ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -3335,16 +3395,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; VI-LABEL: name: test_load_private_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3365,13 +3425,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3387,13 +3447,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3410,13 +3470,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -3436,16 +3496,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; GFX9-LABEL: name: test_load_private_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3466,13 +3526,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3488,13 +3548,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -3511,13 +3571,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -3536,7 +3596,7 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 5) + %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -3548,38 +3608,38 @@ body: | ; SI-LABEL: name: test_load_private_p1_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_private_p1_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_private_p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_private_p1_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p5) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -3591,38 +3651,38 @@ body: | ; SI-LABEL: name: test_load_private_p1_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_private_p1_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_private_p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_private_p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p5) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 5) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -3634,10 +3694,10 @@ body: | ; SI-LABEL: name: test_load_private_p1_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3648,9 +3708,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3661,10 +3721,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_private_p1_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3675,9 +3735,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3688,10 +3748,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_private_p1_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3702,9 +3762,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3715,10 +3775,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_private_p1_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -3729,9 +3789,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -3741,7 +3801,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p5) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load 8, align 2, addrspace 5) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -3753,16 +3813,16 @@ body: | ; SI-LABEL: name: test_load_private_p1_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3783,13 +3843,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3808,16 +3868,16 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; CI-LABEL: name: test_load_private_p1_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3838,13 +3898,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3863,16 +3923,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; VI-LABEL: name: test_load_private_p1_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3893,13 +3953,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3918,16 +3978,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[MV]](p1) ; GFX9-LABEL: name: test_load_private_p1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -3948,13 +4008,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -3972,7 +4032,7 @@ body: | ; GFX9: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(p5) = COPY $vgpr0 - %1:_(p1) = G_LOAD %0 :: (load 8, align 1, addrspace 5) + %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -3984,22 +4044,22 @@ body: | ; SI-LABEL: name: test_load_private_p3_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; SI: $vgpr0 = COPY [[LOAD]](p3) ; CI-LABEL: name: test_load_private_p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; CI: $vgpr0 = COPY [[LOAD]](p3) ; VI-LABEL: name: test_load_private_p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; VI: $vgpr0 = COPY [[LOAD]](p3) ; GFX9-LABEL: name: test_load_private_p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), addrspace 5) ; GFX9: $vgpr0 = COPY [[LOAD]](p3) %0:_(p5) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -4011,10 +4071,10 @@ body: | ; SI-LABEL: name: test_load_private_p3_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4027,10 +4087,10 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-LABEL: name: test_load_private_p3_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4043,10 +4103,10 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p3) ; VI-LABEL: name: test_load_private_p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4059,10 +4119,10 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-LABEL: name: test_load_private_p3_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4074,7 +4134,7 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p5) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load 4, align 2, addrspace 5) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 5) $vgpr0 = COPY %1 ... @@ -4086,16 +4146,16 @@ body: | ; SI-LABEL: name: test_load_private_p3_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4118,16 +4178,16 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p3) ; CI-LABEL: name: test_load_private_p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4150,16 +4210,16 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p3) ; VI-LABEL: name: test_load_private_p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4182,16 +4242,16 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p3) ; GFX9-LABEL: name: test_load_private_p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4213,7 +4273,7 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p5) = COPY $vgpr0 - %1:_(p3) = G_LOAD %0 :: (load 4, align 1, addrspace 5) + %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 5) $vgpr0 = COPY %1 ... @@ -4225,22 +4285,22 @@ body: | ; SI-LABEL: name: test_load_private_p5_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; SI: $vgpr0 = COPY [[LOAD]](p5) ; CI-LABEL: name: test_load_private_p5_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; CI: $vgpr0 = COPY [[LOAD]](p5) ; VI-LABEL: name: test_load_private_p5_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; VI: $vgpr0 = COPY [[LOAD]](p5) ; GFX9-LABEL: name: test_load_private_p5_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), addrspace 5) ; GFX9: $vgpr0 = COPY [[LOAD]](p5) %0:_(p5) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -4252,10 +4312,10 @@ body: | ; SI-LABEL: name: test_load_private_p5_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4268,10 +4328,10 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-LABEL: name: test_load_private_p5_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4284,10 +4344,10 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_private_p5_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4300,10 +4360,10 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_private_p5_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -4315,7 +4375,7 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p5) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load 4, align 2, addrspace 5) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 5) $vgpr0 = COPY %1 ... @@ -4327,16 +4387,16 @@ body: | ; SI-LABEL: name: test_load_private_p5_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4359,16 +4419,16 @@ body: | ; SI: $vgpr0 = COPY [[INTTOPTR]](p5) ; CI-LABEL: name: test_load_private_p5_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4391,16 +4451,16 @@ body: | ; CI: $vgpr0 = COPY [[INTTOPTR]](p5) ; VI-LABEL: name: test_load_private_p5_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4423,16 +4483,16 @@ body: | ; VI: $vgpr0 = COPY [[INTTOPTR]](p5) ; GFX9-LABEL: name: test_load_private_p5_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -4454,7 +4514,7 @@ body: | ; GFX9: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) ; GFX9: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p5) = COPY $vgpr0 - %1:_(p5) = G_LOAD %0 :: (load 4, align 1, addrspace 5) + %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 5) $vgpr0 = COPY %1 ... @@ -4466,7 +4526,7 @@ body: | ; SI-LABEL: name: test_load_private_v2s8_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (<2 x s8>), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4487,7 +4547,7 @@ body: | ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-LABEL: name: test_load_private_v2s8_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (<2 x s8>), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4508,7 +4568,7 @@ body: | ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_private_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (<2 x s8>), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4527,7 +4587,7 @@ body: | ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_private_v2s8_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (<2 x s8>), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4545,7 +4605,7 @@ body: | ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 2, addrspace 5) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 2, addrspace 5) %2:_(s16) = G_BITCAST %1 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 @@ -4559,10 +4619,10 @@ body: | ; SI-LABEL: name: test_load_private_v2s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) @@ -4571,10 +4631,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_private_v2s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) @@ -4583,10 +4643,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_private_v2s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) @@ -4595,16 +4655,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_private_v2s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, align 1, addrspace 5) + %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 5) %2:_(<2 x s32>) = G_ANYEXT %1 $vgpr0_vgpr1 = COPY %2 ... @@ -4617,30 +4677,30 @@ body: | ; SI-LABEL: name: test_load_private_v3s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load (<3 x s8>), align 4, addrspace 5) ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-LABEL: name: test_load_private_v3s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load (<3 x s8>), align 4, addrspace 5) ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_private_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load (<3 x s8>), align 4, addrspace 5) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_private_v3s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load (<3 x s8>), align 4, addrspace 5) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, addrspace 5, align 4) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), addrspace 5, align 4) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -4654,30 +4714,30 @@ body: | ; SI-LABEL: name: test_load_private_v3s8_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load (<3 x s8>), align 1, addrspace 5) ; SI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; SI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; CI-LABEL: name: test_load_private_v3s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load (<3 x s8>), align 1, addrspace 5) ; CI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; CI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; VI-LABEL: name: test_load_private_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load (<3 x s8>), align 1, addrspace 5) ; VI: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; VI: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; VI: $vgpr0 = COPY [[INSERT]](<4 x s8>) ; GFX9-LABEL: name: test_load_private_v3s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load 3, align 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[COPY]](p5) :: (load (<3 x s8>), align 1, addrspace 5) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[LOAD]](<3 x s8>), 0 ; GFX9: $vgpr0 = COPY [[INSERT]](<4 x s8>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s8>) = G_LOAD %0 :: (load 3, align 1, addrspace 5) + %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 5) %2:_(<4 x s8>) = G_IMPLICIT_DEF %3:_(<4 x s8>) = G_INSERT %2, %1, 0 $vgpr0 = COPY %3 @@ -4691,7 +4751,7 @@ body: | ; SI-LABEL: name: test_load_private_v4s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (<4 x s8>), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4707,7 +4767,7 @@ body: | ; SI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; CI-LABEL: name: test_load_private_v4s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (<4 x s8>), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4723,7 +4783,7 @@ body: | ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; VI-LABEL: name: test_load_private_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (<4 x s8>), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4739,7 +4799,7 @@ body: | ; VI: $vgpr0 = COPY [[TRUNC]](<4 x s8>) ; GFX9-LABEL: name: test_load_private_v4s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (<4 x s8>), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4756,7 +4816,7 @@ body: | ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s8>) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -4768,10 +4828,10 @@ body: | ; SI-LABEL: name: test_load_private_v8s8_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4797,10 +4857,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; CI-LABEL: name: test_load_private_v8s8_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4826,10 +4886,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; VI-LABEL: name: test_load_private_v8s8_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4855,10 +4915,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<8 x s8>) ; GFX9-LABEL: name: test_load_private_v8s8_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -4887,7 +4947,7 @@ body: | ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS2]](<8 x s8>) %0:_(p5) = COPY $vgpr0 - %1:_(<8 x s8>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + %1:_(<8 x s8>) = G_LOAD %0 :: (load (<8 x s8>), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -4899,43 +4959,43 @@ body: | ; SI-LABEL: name: test_load_private_v16s8_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -4964,43 +5024,43 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; CI-LABEL: name: test_load_private_v16s8_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -5029,43 +5089,43 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; VI-LABEL: name: test_load_private_v16s8_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) @@ -5094,43 +5154,43 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>) ; GFX9-LABEL: name: test_load_private_v16s8_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -5166,7 +5226,7 @@ body: | ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC2]](<4 x s8>), [[TRUNC3]](<4 x s8>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<16 x s8>) %0:_(p5) = COPY $vgpr0 - %1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 56) + %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 56) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -5178,22 +5238,22 @@ body: | ; SI-LABEL: name: test_load_private_v2s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; CI-LABEL: name: test_load_private_v2s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; CI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; VI-LABEL: name: test_load_private_v2s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>) ; GFX9-LABEL: name: test_load_private_v2s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -5205,10 +5265,10 @@ body: | ; SI-LABEL: name: test_load_private_v2s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -5221,10 +5281,10 @@ body: | ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-LABEL: name: test_load_private_v2s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -5237,10 +5297,10 @@ body: | ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_private_v2s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -5253,16 +5313,16 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_private_v2s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 2, addrspace 5) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5) $vgpr0 = COPY %1 ... @@ -5274,10 +5334,10 @@ body: | ; SI-LABEL: name: test_load_private_v2s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -5290,9 +5350,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5310,10 +5370,10 @@ body: | ; SI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; CI-LABEL: name: test_load_private_v2s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -5326,9 +5386,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5346,10 +5406,10 @@ body: | ; CI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; VI-LABEL: name: test_load_private_v2s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -5360,9 +5420,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -5378,10 +5438,10 @@ body: | ; VI: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_load_private_v2s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -5392,9 +5452,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -5406,7 +5466,7 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, align 1, addrspace 5) + %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5) $vgpr0 = COPY %1 ... @@ -5418,10 +5478,10 @@ body: | ; SI-LABEL: name: test_load_private_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) @@ -5447,10 +5507,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) ; CI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) @@ -5476,10 +5536,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) @@ -5505,10 +5565,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 4, align 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 4, align 4, addrspace 5) ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) @@ -5525,7 +5585,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 5) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 5) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -5539,13 +5599,13 @@ body: | ; SI-LABEL: name: test_load_private_v3s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -5569,13 +5629,13 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -5599,13 +5659,13 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] @@ -5629,13 +5689,13 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) @@ -5649,7 +5709,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 5) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 2, addrspace 5) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -5663,10 +5723,10 @@ body: | ; SI-LABEL: name: test_load_private_v3s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -5679,9 +5739,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5692,9 +5752,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5722,10 +5782,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -5738,9 +5798,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5751,9 +5811,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5781,10 +5841,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -5795,9 +5855,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -5806,9 +5866,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -5834,10 +5894,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -5848,9 +5908,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -5859,9 +5919,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -5881,7 +5941,7 @@ body: | ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 5) + %1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 1, addrspace 5) %2:_(<4 x s16>) = G_IMPLICIT_DEF %3:_(<4 x s16>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1 = COPY %3 @@ -5894,38 +5954,38 @@ body: | liveins: $vgpr0 ; SI-LABEL: name: test_load_private_v4s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_private_v4s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_private_v4s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v4s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -5937,38 +5997,38 @@ body: | ; SI-LABEL: name: test_load_private_v4s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_private_v4s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_private_v4s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v4s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 5) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -5979,10 +6039,10 @@ body: | liveins: $vgpr0 ; SI-LABEL: name: test_load_private_v4s16_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -5994,9 +6054,9 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -6008,10 +6068,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_private_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6023,9 +6083,9 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -6037,10 +6097,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_private_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6052,9 +6112,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -6066,25 +6126,25 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 2, addrspace 5) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -6096,10 +6156,10 @@ body: | ; SI-LABEL: name: test_load_private_v4s16_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6112,9 +6172,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6131,9 +6191,9 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6143,9 +6203,9 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6163,10 +6223,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; CI-LABEL: name: test_load_private_v4s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6179,9 +6239,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6198,9 +6258,9 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6210,9 +6270,9 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6230,10 +6290,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_load_private_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6244,9 +6304,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -6261,9 +6321,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -6271,9 +6331,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -6289,10 +6349,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] @@ -6303,9 +6363,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) @@ -6317,9 +6377,9 @@ body: | ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32) @@ -6327,9 +6387,9 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32) @@ -6342,7 +6402,7 @@ body: | ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8, align 1, addrspace 5) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -6354,38 +6414,38 @@ body: | ; SI-LABEL: name: test_load_private_v2s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_private_v2s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_private_v2s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_private_v2s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -6397,38 +6457,38 @@ body: | ; SI-LABEL: name: test_load_private_v2s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_private_v2s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_private_v2s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_private_v2s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 5) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -6440,10 +6500,10 @@ body: | ; SI-LABEL: name: test_load_private_v2s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6454,9 +6514,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -6467,10 +6527,10 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_private_v2s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6481,9 +6541,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -6494,10 +6554,10 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_private_v2s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6508,9 +6568,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -6521,10 +6581,10 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_private_v2s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -6535,9 +6595,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -6547,7 +6607,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 2, addrspace 5) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -6559,16 +6619,16 @@ body: | ; SI-LABEL: name: test_load_private_v2s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6589,13 +6649,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6614,16 +6674,16 @@ body: | ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; CI-LABEL: name: test_load_private_v2s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6644,13 +6704,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6669,16 +6729,16 @@ body: | ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_load_private_v2s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6699,13 +6759,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6724,16 +6784,16 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_load_private_v2s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6754,13 +6814,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6778,7 +6838,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, align 1, addrspace 5) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -6790,16 +6850,16 @@ body: | ; SI-LABEL: name: test_load_private_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6820,13 +6880,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6842,13 +6902,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -6867,16 +6927,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-LABEL: name: test_load_private_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6897,13 +6957,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6919,13 +6979,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -6944,16 +7004,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_load_private_v3s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -6974,13 +7034,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -6996,13 +7056,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7021,16 +7081,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_load_private_v3s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7051,13 +7111,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7073,13 +7133,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7097,7 +7157,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 1, addrspace 56) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 56) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -7109,50 +7169,50 @@ body: | ; SI-LABEL: name: test_load_private_v3s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-LABEL: name: test_load_private_v3s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_load_private_v3s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_load_private_v3s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 5) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -7164,16 +7224,16 @@ body: | ; SI-LABEL: name: test_load_private_v4s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7194,13 +7254,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7216,13 +7276,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7239,13 +7299,13 @@ body: | ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -7264,16 +7324,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; CI-LABEL: name: test_load_private_v4s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7294,13 +7354,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7316,13 +7376,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7339,13 +7399,13 @@ body: | ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -7364,16 +7424,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_private_v4s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7394,13 +7454,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7416,13 +7476,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7439,13 +7499,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -7464,16 +7524,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_private_v4s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 56) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 56) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 56) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 56) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 56) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 56) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 56) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 56) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7494,13 +7554,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 56) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 56) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 56) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 56) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 56) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 56) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 56) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 56) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7516,13 +7576,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 56) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 56) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 56) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 56) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 56) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 56) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 56) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 56) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7539,13 +7599,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 56) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 56) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 56) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 56) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 56) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 56) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 56) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 56) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -7563,7 +7623,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 1, addrspace 56) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 56) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7575,62 +7635,62 @@ body: | ; SI-LABEL: name: test_load_private_v4s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; CI-LABEL: name: test_load_private_v4s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_private_v4s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_private_v4s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 5) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7642,62 +7702,62 @@ body: | ; SI-LABEL: name: test_load_private_v4s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; CI-LABEL: name: test_load_private_v4s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_private_v4s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_private_v4s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7709,10 +7769,10 @@ body: | ; SI-LABEL: name: test_load_private_v4s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7723,9 +7783,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -7734,9 +7794,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7745,9 +7805,9 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -7758,10 +7818,10 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; CI-LABEL: name: test_load_private_v4s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7772,9 +7832,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -7783,9 +7843,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7794,9 +7854,9 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -7807,10 +7867,10 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_private_v4s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7821,9 +7881,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -7832,9 +7892,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7843,9 +7903,9 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -7856,10 +7916,10 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_private_v4s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -7870,9 +7930,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -7881,9 +7941,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7892,9 +7952,9 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -7904,7 +7964,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 2, addrspace 5) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -7916,16 +7976,16 @@ body: | ; SI-LABEL: name: test_load_private_v4s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -7946,13 +8006,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -7968,13 +8028,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -7991,13 +8051,13 @@ body: | ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8016,16 +8076,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; CI-LABEL: name: test_load_private_v4s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8046,13 +8106,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8068,13 +8128,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8091,13 +8151,13 @@ body: | ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8116,16 +8176,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_load_private_v4s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8146,13 +8206,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8168,13 +8228,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8191,13 +8251,13 @@ body: | ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8216,16 +8276,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_load_private_v4s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8246,13 +8306,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8268,13 +8328,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8291,13 +8351,13 @@ body: | ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8315,7 +8375,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 1, addrspace 5) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -8327,22 +8387,110 @@ body: | ; SI-LABEL: name: test_load_private_v8s32_align32 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p5) :: (load 16, align 32, addrspace 5) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) ; CI-LABEL: name: test_load_private_v8s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p5) :: (load 16, align 32, addrspace 5) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) ; VI-LABEL: name: test_load_private_v8s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p5) :: (load 16, align 32, addrspace 5) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) ; GFX9-LABEL: name: test_load_private_v8s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p5) :: (load 16, align 32, addrspace 5) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<8 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 16, align 32, addrspace 5) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), align 32, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -8354,22 +8502,206 @@ body: | ; SI-LABEL: name: test_load_private_v16s32_align32 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p5) :: (load 16, align 32, addrspace 5) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) + ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 + ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) + ; SI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) + ; SI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 + ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) + ; SI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) + ; SI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 + ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) + ; SI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 + ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) + ; SI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 + ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) ; CI-LABEL: name: test_load_private_v16s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p5) :: (load 16, align 32, addrspace 5) - ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) + ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 + ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) + ; CI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) + ; CI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 + ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) + ; CI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) + ; CI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 + ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) + ; CI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 + ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) + ; CI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 + ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) ; VI-LABEL: name: test_load_private_v16s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p5) :: (load 16, align 32, addrspace 5) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) + ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 + ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) + ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) + ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 + ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) + ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) + ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 + ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) + ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 + ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) + ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 + ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) ; GFX9-LABEL: name: test_load_private_v16s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p5) :: (load 16, align 32, addrspace 5) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[LOAD]](<16 x s32>) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s32) from unknown-address + 32, align 32, addrspace 5) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 + ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s32) from unknown-address + 36, addrspace 5) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C9]](s32) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s32) from unknown-address + 40, align 8, addrspace 5) + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 + ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C10]](s32) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s32) from unknown-address + 44, addrspace 5) + ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C11]](s32) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s32) from unknown-address + 48, align 16, addrspace 5) + ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 + ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C12]](s32) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s32) from unknown-address + 52, addrspace 5) + ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 + ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C13]](s32) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s32) from unknown-address + 56, align 8, addrspace 5) + ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 + ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C14]](s32) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s32) from unknown-address + 60, addrspace 5) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 16, align 32, addrspace 5) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), align 32, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... @@ -8381,66 +8713,66 @@ body: | ; SI-LABEL: name: test_load_private_v2s64_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-LABEL: name: test_load_private_v2s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_private_v2s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_private_v2s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -8452,16 +8784,16 @@ body: | ; SI-LABEL: name: test_load_private_v2s64_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8482,13 +8814,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8505,13 +8837,13 @@ body: | ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8527,13 +8859,13 @@ body: | ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8553,16 +8885,16 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; CI-LABEL: name: test_load_private_v2s64_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8583,13 +8915,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8606,13 +8938,13 @@ body: | ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8628,13 +8960,13 @@ body: | ; CI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; CI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8654,16 +8986,16 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_load_private_v2s64_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8684,13 +9016,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8707,13 +9039,13 @@ body: | ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8729,13 +9061,13 @@ body: | ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8755,16 +9087,16 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_load_private_v2s64_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -8785,13 +9117,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -8808,13 +9140,13 @@ body: | ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR5]](s32) ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -8830,13 +9162,13 @@ body: | ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -8855,7 +9187,7 @@ body: | ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 1, addrspace 5) + %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -8867,22 +9199,22 @@ body: | ; SI-LABEL: name: test_load_private_v3s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 32, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -8890,22 +9222,22 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-LABEL: name: test_load_private_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 32, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -8913,22 +9245,22 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_private_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 32, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF @@ -8936,29 +9268,29 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_private_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 32, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 5) + %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5) %2:_(<4 x s64>) = G_IMPLICIT_DEF %3:_(<4 x s64>) = G_INSERT %2, %1, 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 @@ -8972,114 +9304,114 @@ body: | ; SI-LABEL: name: test_load_private_v4s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 32, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 28, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; SI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-LABEL: name: test_load_private_v4s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 32, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 28, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; CI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_private_v4s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 32, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 28, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; VI: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_private_v4s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 32, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 16, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 16, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 28, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 5) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 32, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -9091,66 +9423,66 @@ body: | ; SI-LABEL: name: test_load_private_v2p1_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; CI-LABEL: name: test_load_private_v2p1_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; VI-LABEL: name: test_load_private_v2p1_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; GFX9-LABEL: name: test_load_private_v2p1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -9162,114 +9494,114 @@ body: | ; SI-LABEL: name: test_load_private_v4p1_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 8, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 28, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; CI-LABEL: name: test_load_private_v4p1_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 8, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 28, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; VI-LABEL: name: test_load_private_v4p1_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 8, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 28, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) ; GFX9-LABEL: name: test_load_private_v4p1_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, align 8, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, align 8, addrspace 5) ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from unknown-address + 24, align 8, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s32) from unknown-address + 24, align 8, addrspace 5) ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C6]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from unknown-address + 28, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s32) from unknown-address + 28, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x p1>) = G_BITCAST [[BUILD_VECTOR]](<8 x s32>) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x p1>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x p1>) = G_LOAD %0 :: (load 32, align 8, addrspace 5) + %1:_(<4 x p1>) = G_LOAD %0 :: (load (<4 x p1>), align 8, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -9281,38 +9613,38 @@ body: | ; SI-LABEL: name: test_load_private_v2p3_align8 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; CI-LABEL: name: test_load_private_v2p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; VI-LABEL: name: test_load_private_v2p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; GFX9-LABEL: name: test_load_private_v2p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load 4, align 8, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (s32), align 8, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[LOAD]](p3), [[LOAD1]](p3) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9324,22 +9656,22 @@ body: | ; SI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_ext_load_private_s32_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_ext_load_private_s32_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 5) + %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -9351,22 +9683,22 @@ body: | ; SI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI: $vgpr0 = COPY [[LOAD]](s32) ; CI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI: $vgpr0 = COPY [[LOAD]](s32) ; VI-LABEL: name: test_ext_load_private_s32_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI: $vgpr0 = COPY [[LOAD]](s32) ; GFX9-LABEL: name: test_ext_load_private_s32_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9: $vgpr0 = COPY [[LOAD]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_LOAD %0 :: (load 2, align 4, addrspace 5) + %1:_(s32) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) $vgpr0 = COPY %1 ... @@ -9379,26 +9711,26 @@ body: | ; SI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_1_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9410,26 +9742,26 @@ body: | ; SI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_2_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9441,26 +9773,26 @@ body: | ; SI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9472,7 +9804,7 @@ body: | ; SI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; SI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -9480,7 +9812,7 @@ body: | ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; CI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; CI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -9488,7 +9820,7 @@ body: | ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; VI-LABEL: name: test_ext_load_private_s128_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; VI: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF @@ -9496,14 +9828,14 @@ body: | ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) ; GFX9-LABEL: name: test_ext_load_private_s128_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[DEF]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) %0:_(p5) = COPY $vgpr0 - %1:_(s128) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:_(s128) = G_LOAD %0 :: (load (s32), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -9515,26 +9847,26 @@ body: | ; SI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_2_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 4, addrspace 5) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 2, align 4, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s16), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9546,26 +9878,26 @@ body: | ; SI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; CI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; CI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; VI-LABEL: name: test_ext_load_private_s64_from_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_ext_load_private_s64_from_1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), align 4, addrspace 5) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_LOAD %0 :: (load 1, align 4, addrspace 5) + %1:_(s64) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9577,22 +9909,22 @@ body: | ; SI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, align 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, align 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_extload_private_v2s32_from_4_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, align 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, align 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 1, addrspace 5) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9604,22 +9936,22 @@ body: | ; SI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, align 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, align 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_extload_private_v2s32_from_4_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, align 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, align 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 2, addrspace 5) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9631,22 +9963,22 @@ body: | ; SI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; CI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; VI-LABEL: name: test_extload_private_v2s32_from_4_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) ; GFX9-LABEL: name: test_extload_private_v2s32_from_4_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), addrspace 5) ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s32>) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s16>), align 4, addrspace 5) $vgpr0_vgpr1 = COPY %1 ... @@ -9658,22 +9990,22 @@ body: | ; SI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load 6, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; CI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load 6, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; VI-LABEL: name: test_extload_private_v3s32_from_6_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load 6, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) ; GFX9-LABEL: name: test_extload_private_v3s32_from_6_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load 6, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s16>), align 4, addrspace 5) ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 6, align 4, addrspace 5) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s16>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 ... @@ -9685,43 +10017,43 @@ body: | ; SI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load 8, align 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; CI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load 8, align 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; VI-LABEL: name: test_extload_private_v4s32_from_8_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load 8, align 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) ; GFX9-LABEL: name: test_extload_private_v4s32_from_8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load 8, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 4, addrspace 5) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) %0:_(p5) = COPY $vgpr0 - %1:_(<4 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 5) + %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s16>), align 4, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... --- -name: test_extload_private_v2s96_from_24_align1 +name: test_load_private_v2s96_align1 body: | bb.0: liveins: $vgpr0 - ; SI-LABEL: name: test_extload_private_v2s96_from_24_align1 + ; SI-LABEL: name: test_load_private_v2s96_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -9742,13 +10074,13 @@ body: | ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -9764,13 +10096,13 @@ body: | ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -9789,13 +10121,13 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; SI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; SI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; SI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -9811,13 +10143,13 @@ body: | ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; SI: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 1 from unknown-address + 16, addrspace 5) + ; SI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 1 from unknown-address + 17, addrspace 5) + ; SI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 1 from unknown-address + 18, addrspace 5) + ; SI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 1 from unknown-address + 19, addrspace 5) + ; SI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; SI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -9833,13 +10165,13 @@ body: | ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; SI: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 1 from unknown-address + 20, addrspace 5) + ; SI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) ; SI: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 1 from unknown-address + 21, addrspace 5) + ; SI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) ; SI: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 1 from unknown-address + 22, addrspace 5) + ; SI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) ; SI: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 1 from unknown-address + 23, addrspace 5) + ; SI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; SI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -9860,18 +10192,18 @@ body: | ; SI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; CI-LABEL: name: test_extload_private_v2s96_from_24_align1 + ; CI-LABEL: name: test_load_private_v2s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -9892,13 +10224,13 @@ body: | ; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -9914,13 +10246,13 @@ body: | ; CI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; CI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -9939,13 +10271,13 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; CI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; CI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; CI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; CI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; CI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -9961,13 +10293,13 @@ body: | ; CI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; CI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; CI: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 1 from unknown-address + 16, addrspace 5) + ; CI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) ; CI: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 1 from unknown-address + 17, addrspace 5) + ; CI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) ; CI: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 1 from unknown-address + 18, addrspace 5) + ; CI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) ; CI: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 1 from unknown-address + 19, addrspace 5) + ; CI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) ; CI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; CI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; CI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -9983,13 +10315,13 @@ body: | ; CI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; CI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; CI: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 1 from unknown-address + 20, addrspace 5) + ; CI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) ; CI: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 1 from unknown-address + 21, addrspace 5) + ; CI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) ; CI: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 1 from unknown-address + 22, addrspace 5) + ; CI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) ; CI: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 1 from unknown-address + 23, addrspace 5) + ; CI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) ; CI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; CI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; CI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -10010,18 +10342,18 @@ body: | ; CI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; VI-LABEL: name: test_extload_private_v2s96_from_24_align1 + ; VI-LABEL: name: test_load_private_v2s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -10042,13 +10374,13 @@ body: | ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -10064,13 +10396,13 @@ body: | ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -10089,13 +10421,13 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; VI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -10111,13 +10443,13 @@ body: | ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; VI: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 1 from unknown-address + 16, addrspace 5) + ; VI: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 1 from unknown-address + 17, addrspace 5) + ; VI: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 1 from unknown-address + 18, addrspace 5) + ; VI: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 1 from unknown-address + 19, addrspace 5) + ; VI: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; VI: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -10133,13 +10465,13 @@ body: | ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; VI: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 1 from unknown-address + 20, addrspace 5) + ; VI: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 1 from unknown-address + 21, addrspace 5) + ; VI: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 1 from unknown-address + 22, addrspace 5) + ; VI: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 1 from unknown-address + 23, addrspace 5) + ; VI: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -10160,18 +10492,18 @@ body: | ; VI: [[COPY26:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) - ; GFX9-LABEL: name: test_extload_private_v2s96_from_24_align1 + ; GFX9-LABEL: name: test_load_private_v2s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 1 from unknown-address + 1, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 1 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 1 from unknown-address + 3, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] @@ -10192,13 +10524,13 @@ body: | ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C7]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 1 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 1 from unknown-address + 5, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 1 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 1 from unknown-address + 7, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -10214,13 +10546,13 @@ body: | ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 1 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 1 from unknown-address + 9, addrspace 5) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 1 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 1 from unknown-address + 11, addrspace 5) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -10239,13 +10571,13 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C8]](s32) - ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 1 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) - ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 1 from unknown-address + 13, addrspace 5) + ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) - ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 1 from unknown-address + 14, addrspace 5) + ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) - ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 1 from unknown-address + 15, addrspace 5) + ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LOAD12]](s32) ; GFX9: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32) @@ -10261,13 +10593,13 @@ body: | ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C6]](s32) ; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; GFX9: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C7]](s32) - ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 1 from unknown-address + 16, addrspace 5) + ; GFX9: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) ; GFX9: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) - ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 1 from unknown-address + 17, addrspace 5) + ; GFX9: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) ; GFX9: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s32) - ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 1 from unknown-address + 18, addrspace 5) + ; GFX9: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) ; GFX9: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) - ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 1 from unknown-address + 19, addrspace 5) + ; GFX9: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LOAD16]](s32) ; GFX9: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C3]] ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LOAD17]](s32) @@ -10283,13 +10615,13 @@ body: | ; GFX9: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C6]](s32) ; GFX9: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] ; GFX9: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) - ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 1 from unknown-address + 20, addrspace 5) + ; GFX9: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) ; GFX9: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) - ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 1 from unknown-address + 21, addrspace 5) + ; GFX9: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) ; GFX9: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s32) - ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 1 from unknown-address + 22, addrspace 5) + ; GFX9: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) ; GFX9: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) - ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 1 from unknown-address + 23, addrspace 5) + ; GFX9: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LOAD20]](s32) ; GFX9: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C3]] ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LOAD21]](s32) @@ -10311,7 +10643,7 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY25]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY26]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 1, addrspace 5) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 1, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -10319,17 +10651,17 @@ body: | ... --- -name: test_extload_private_v2s96_from_24_align2 +name: test_load_private_v2s96_align2 body: | bb.0: liveins: $vgpr0 - ; SI-LABEL: name: test_extload_private_v2s96_from_24_align2 + ; SI-LABEL: name: test_load_private_v2s96_align2 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10340,9 +10672,9 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10351,9 +10683,9 @@ body: | ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -10364,9 +10696,9 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; SI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -10374,9 +10706,9 @@ body: | ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 2 from unknown-address + 16, addrspace 5) + ; SI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 2 from unknown-address + 18, addrspace 5) + ; SI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -10384,9 +10716,9 @@ body: | ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; SI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 2 from unknown-address + 20, addrspace 5) + ; SI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 2 from unknown-address + 22, addrspace 5) + ; SI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -10399,12 +10731,12 @@ body: | ; SI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; CI-LABEL: name: test_extload_private_v2s96_from_24_align2 + ; CI-LABEL: name: test_load_private_v2s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10415,9 +10747,9 @@ body: | ; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10426,9 +10758,9 @@ body: | ; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; CI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -10439,9 +10771,9 @@ body: | ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; CI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; CI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; CI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -10449,9 +10781,9 @@ body: | ; CI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; CI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 2 from unknown-address + 16, addrspace 5) + ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) ; CI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 2 from unknown-address + 18, addrspace 5) + ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) ; CI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; CI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; CI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -10459,9 +10791,9 @@ body: | ; CI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; CI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; CI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 2 from unknown-address + 20, addrspace 5) + ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) ; CI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 2 from unknown-address + 22, addrspace 5) + ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) ; CI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; CI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; CI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -10474,12 +10806,12 @@ body: | ; CI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; VI-LABEL: name: test_extload_private_v2s96_from_24_align2 + ; VI-LABEL: name: test_load_private_v2s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10490,9 +10822,9 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10501,9 +10833,9 @@ body: | ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -10514,9 +10846,9 @@ body: | ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -10524,9 +10856,9 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 2 from unknown-address + 16, addrspace 5) + ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 2 from unknown-address + 18, addrspace 5) + ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -10534,9 +10866,9 @@ body: | ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; VI: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 2 from unknown-address + 20, addrspace 5) + ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 2 from unknown-address + 22, addrspace 5) + ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; VI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -10549,12 +10881,12 @@ body: | ; VI: [[COPY14:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) - ; GFX9-LABEL: name: test_extload_private_v2s96_from_24_align2 + ; GFX9-LABEL: name: test_load_private_v2s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 from unknown-address + 2, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] @@ -10565,9 +10897,9 @@ body: | ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 2 from unknown-address + 6, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) @@ -10576,9 +10908,9 @@ body: | ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 2 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 2 from unknown-address + 10, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32) ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32) @@ -10589,9 +10921,9 @@ body: | ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 2 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) - ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 2 from unknown-address + 14, addrspace 5) + ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32) ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32) @@ -10599,9 +10931,9 @@ body: | ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) - ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 2 from unknown-address + 16, addrspace 5) + ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) - ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 2 from unknown-address + 18, addrspace 5) + ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32) ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C1]] ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32) @@ -10609,9 +10941,9 @@ body: | ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C4]](s32) - ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 2 from unknown-address + 20, addrspace 5) + ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) - ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 2 from unknown-address + 22, addrspace 5) + ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32) ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C1]] ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LOAD11]](s32) @@ -10625,7 +10957,7 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY13]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY14]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 2, addrspace 5) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 2, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -10633,101 +10965,101 @@ body: | ... --- -name: test_extload_private_v2s96_from_24_align4 +name: test_load_private_v2s96_align4 body: | bb.0: liveins: $vgpr0 - ; SI-LABEL: name: test_extload_private_v2s96_from_24_align4 + ; SI-LABEL: name: test_load_private_v2s96_align4 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-LABEL: name: test_extload_private_v2s96_from_24_align4 + ; CI-LABEL: name: test_load_private_v2s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; VI-LABEL: name: test_extload_private_v2s96_from_24_align4 + ; VI-LABEL: name: test_load_private_v2s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-LABEL: name: test_extload_private_v2s96_from_24_align4 + ; GFX9-LABEL: name: test_load_private_v2s96_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) @@ -10735,7 +11067,7 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 5) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 4, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 @@ -10743,101 +11075,101 @@ body: | ... --- -name: test_extload_private_v2s96_from_24_align16 +name: test_load_private_v2s96_align16 body: | bb.0: liveins: $vgpr0 - ; SI-LABEL: name: test_extload_private_v2s96_from_24_align16 + ; SI-LABEL: name: test_load_private_v2s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 16, addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; SI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, addrspace 5) + ; SI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; SI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; SI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; SI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; CI-LABEL: name: test_extload_private_v2s96_from_24_align16 + ; CI-LABEL: name: test_load_private_v2s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 16, addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; CI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; CI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, addrspace 5) + ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) ; CI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; CI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; CI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; CI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; CI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; VI-LABEL: name: test_extload_private_v2s96_from_24_align16 + ; VI-LABEL: name: test_load_private_v2s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 16, addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; VI: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; VI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; VI: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, addrspace 5) + ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; VI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) - ; GFX9-LABEL: name: test_extload_private_v2s96_from_24_align16 + ; GFX9-LABEL: name: test_load_private_v2s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, align 16, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 16, addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 4 from unknown-address + 4, addrspace 5) + ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s32) from unknown-address + 4, addrspace 5) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 4 from unknown-address + 8, align 8, addrspace 5) + ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from unknown-address + 8, align 8, addrspace 5) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32) ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load 4 from unknown-address + 12, addrspace 5) + ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s32) from unknown-address + 12, addrspace 5) ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C]](s32) - ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from unknown-address + 16, addrspace 5) + ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load (s32) from unknown-address + 16, addrspace 5) ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD2]], [[C1]](s32) - ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from unknown-address + 20, addrspace 5) + ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) @@ -10845,7 +11177,7 @@ body: | ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96) %0:_(p5) = COPY $vgpr0 - %1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 16, addrspace 5) + %1:_(<2 x s96>) = G_LOAD %0 :: (load (<2 x s96>), align 16, addrspace 5) %2:_(s96) = G_EXTRACT %1, 0 %3:_(s96) = G_EXTRACT %1, 96 $vgpr0_vgpr1_vgpr2 = COPY %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir index dec47585acd2e..bab49a9849abf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir @@ -12,11 +12,11 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, addrspace 6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) ; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 4, align 4, addrspace 6) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 4, addrspace 6) $vgpr0_vgpr1 = COPY %1 ... @@ -30,11 +30,11 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, align 2, addrspace 6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6) ; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 4, align 2, addrspace 6) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 2, addrspace 6) $vgpr0_vgpr1 = COPY %1 ... @@ -48,11 +48,11 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, align 1, addrspace 6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6) ; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 4, align 1, addrspace 6) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), align 1, addrspace 6) $vgpr0_vgpr1 = COPY %1 ... @@ -66,10 +66,10 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load 1, addrspace 6) + ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) ; CI: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, align 1, addrspace 6) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 6) $vgpr0 = COPY %1 ... @@ -83,10 +83,10 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load 2, addrspace 6) + ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6) ; CI: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, align 2, addrspace 6) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 6) $vgpr0 = COPY %1 ... @@ -100,9 +100,9 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load 2, align 1, addrspace 6) + ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6) ; CI: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, align 1, addrspace 6) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 6) $vgpr0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir index 0780874927403..1d0bf3775c30d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir @@ -9,14 +9,14 @@ body: | ; SI-LABEL: name: test_sextload_flat_i32_i8 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; SI: $vgpr0 = COPY [[SEXTLOAD]](s32) ; VI-LABEL: name: test_sextload_flat_i32_i8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; VI: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 0) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 0) $vgpr0 = COPY %1 ... --- @@ -27,14 +27,14 @@ body: | ; SI-LABEL: name: test_sextload_flat_i32_i16 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) ; SI: $vgpr0 = COPY [[SEXTLOAD]](s32) ; VI-LABEL: name: test_sextload_flat_i32_i16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) ; VI: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 0) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 0) $vgpr0 = COPY %1 ... --- @@ -45,16 +45,16 @@ body: | ; SI-LABEL: name: test_sextload_flat_i31_i8 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_sextload_flat_i31_i8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_SEXTLOAD %0 :: (load 1, addrspace 0) + %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 0) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -66,16 +66,16 @@ body: | ; SI-LABEL: name: test_sextload_flat_i64_i8 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) ; VI-LABEL: name: test_sextload_flat_i64_i8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1) + ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8)) ; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load 1, addrspace 0) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 0) $vgpr0_vgpr1 = COPY %1 ... --- @@ -86,16 +86,16 @@ body: | ; SI-LABEL: name: test_sextload_flat_i64_i16 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; SI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) ; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) ; VI-LABEL: name: test_sextload_flat_i64_i16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2) + ; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16)) ; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load 2, addrspace 0) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 0) $vgpr0_vgpr1 = COPY %1 ... --- @@ -106,15 +106,15 @@ body: | ; SI-LABEL: name: test_sextload_flat_i64_i32 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; SI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) ; VI-LABEL: name: test_sextload_flat_i64_i32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load 4, addrspace 0) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 0) $vgpr0_vgpr1 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir index 67dec9ae5c363..b5017eef20001 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir @@ -6,12 +6,12 @@ # FIXME: Run with and without unaligned access turned on # ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_sextload_global_v2i16_from_2) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_sextload_global_v2i32_from_2) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_sextload_global_v2i32_from_4) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_sextload_global_v2i64_from_4) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load 8, addrspace 1) (in function: test_sextload_global_v2i64_from_8) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(s128) = G_SEXTLOAD %0:_(p1) :: (load 8, addrspace 1) (in function: test_sextload_global_s128_8) +# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i64_from_v2s16) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s32>), addrspace 1) (in function: test_sextload_global_v2i64_from_v2s32) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(s128) = G_SEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_sextload_global_s128_8) # ERR-NOT: remark --- @@ -22,14 +22,14 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i32_i8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) $vgpr0 = COPY %1 ... --- @@ -40,14 +40,14 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i32_i16 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i16 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 1) $vgpr0 = COPY %1 ... --- @@ -58,16 +58,16 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i31_i8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) ; GFX8: $vgpr0 = COPY [[COPY1]](s32) ; GFX6-LABEL: name: test_sextload_global_i31_i8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) ; GFX6: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_SEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -79,16 +79,16 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i64_i8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64) ; GFX6-LABEL: name: test_sextload_global_i64_i8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- @@ -99,16 +99,16 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i64_i16 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64) ; GFX6-LABEL: name: test_sextload_global_i64_i16 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load 2, addrspace 1) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- @@ -119,16 +119,16 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i64_i32 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64) ; GFX6-LABEL: name: test_sextload_global_i64_i32 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load 4, addrspace 1) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -140,14 +140,14 @@ body: | ; GFX8-LABEL: name: test_sextload_global_s32_from_2_align1 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) ; GFX6-LABEL: name: test_sextload_global_s32_from_2_align1 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, align 1, addrspace 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 1) $vgpr0 = COPY %1 ... @@ -159,111 +159,111 @@ body: | ; GFX8-LABEL: name: test_sextload_global_s64_from_2_align1 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64) ; GFX6-LABEL: name: test_sextload_global_s64_from_2_align1 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_SEXTLOAD %0 :: (load 2, align 1, addrspace 1) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- -name: test_sextload_global_v2i16_from_2 +name: test_sextload_global_v2i16_from_v2s8 body: | bb.0: liveins: $vgpr0_vgpr1 - ; GFX8-LABEL: name: test_sextload_global_v2i16_from_2 + ; GFX8-LABEL: name: test_sextload_global_v2i16_from_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>) - ; GFX6-LABEL: name: test_sextload_global_v2i16_from_2 + ; GFX6-LABEL: name: test_sextload_global_v2i16_from_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_SEXTLOAD %0 :: (load 2, addrspace 1) + %1:_(<2 x s16>) = G_SEXTLOAD %0 :: (load (<2 x s8>), addrspace 1) $vgpr0 = COPY %1 ... --- -name: test_sextload_global_v2i32_from_2 +name: test_sextload_global_v2i32_from_v2s8 body: | bb.0: liveins: $vgpr0_vgpr1 - ; GFX8-LABEL: name: test_sextload_global_v2i32_from_2 + ; GFX8-LABEL: name: test_sextload_global_v2i32_from_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) - ; GFX6-LABEL: name: test_sextload_global_v2i32_from_2 + ; GFX6-LABEL: name: test_sextload_global_v2i32_from_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s8>), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load 2, addrspace 1) + %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load (<2 x s8>), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- -name: test_sextload_global_v2i32_from_4 +name: test_sextload_global_v2i32_from_v2s16 body: | bb.0: liveins: $vgpr0_vgpr1 - ; GFX8-LABEL: name: test_sextload_global_v2i32_from_4 + ; GFX8-LABEL: name: test_sextload_global_v2i32_from_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) - ; GFX6-LABEL: name: test_sextload_global_v2i32_from_4 + ; GFX6-LABEL: name: test_sextload_global_v2i32_from_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[SEXTLOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load 4, addrspace 1) + %1:_(<2 x s32>) = G_SEXTLOAD %0 :: (load (<2 x s16>), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- -name: test_sextload_global_v2i64_from_4 +name: test_sextload_global_v2i64_from_v2s16 body: | bb.0: liveins: $vgpr0_vgpr1 - ; GFX8-LABEL: name: test_sextload_global_v2i64_from_4 + ; GFX8-LABEL: name: test_sextload_global_v2i64_from_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) - ; GFX6-LABEL: name: test_sextload_global_v2i64_from_4 + ; GFX6-LABEL: name: test_sextload_global_v2i64_from_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s16>), addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load 4, addrspace 1) + %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load (<2 x s16>), addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... --- -name: test_sextload_global_v2i64_from_8 +name: test_sextload_global_v2i64_from_v2s32 body: | bb.0: liveins: $vgpr0_vgpr1 - ; GFX8-LABEL: name: test_sextload_global_v2i64_from_8 + ; GFX8-LABEL: name: test_sextload_global_v2i64_from_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) - ; GFX6-LABEL: name: test_sextload_global_v2i64_from_8 + ; GFX6-LABEL: name: test_sextload_global_v2i64_from_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_SEXTLOAD [[COPY]](p1) :: (load (<2 x s32>), addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load 8, addrspace 1) + %1:_(<2 x s64>) = G_SEXTLOAD %0 :: (load (<2 x s32>), addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -275,13 +275,13 @@ body: | ; GFX8-LABEL: name: test_sextload_global_s128_8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](s128) ; GFX6-LABEL: name: test_sextload_global_s128_8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s128) = G_SEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SEXTLOAD]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_SEXTLOAD %0 :: (load 8, addrspace 1) + %1:_(s128) = G_SEXTLOAD %0 :: (load (s64), addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir index 6363200890837..41127b01ececd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-local.mir @@ -9,10 +9,10 @@ body: | ; CHECK-LABEL: name: test_sextload_local_i32_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 3) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 3) $vgpr0 = COPY %1 ... --- @@ -23,10 +23,10 @@ body: | ; CHECK-LABEL: name: test_sextload_local_i32_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 3) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 3) $vgpr0 = COPY %1 ... --- @@ -37,11 +37,11 @@ body: | ; CHECK-LABEL: name: test_sextload_local_i31_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s31) = G_SEXTLOAD %0 :: (load 1, addrspace 3) + %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -53,11 +53,11 @@ body: | ; CHECK-LABEL: name: test_sextload_local_i64_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 1, addrspace 3) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 3) $vgpr0_vgpr1 = COPY %1 ... --- @@ -68,11 +68,11 @@ body: | ; CHECK-LABEL: name: test_sextload_local_i64_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 2, addrspace 3) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 3) $vgpr0_vgpr1 = COPY %1 ... --- @@ -83,10 +83,10 @@ body: | ; CHECK-LABEL: name: test_sextload_local_i64_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 4, addrspace 3) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 3) $vgpr0_vgpr1 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir index 53d4d0d48da18..b1e275a94fd4a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-private.mir @@ -10,10 +10,10 @@ body: | ; CHECK-LABEL: name: test_sextload_private_i32_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 5) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 5) $vgpr0 = COPY %1 ... @@ -25,10 +25,10 @@ body: | ; CHECK-LABEL: name: test_sextload_private_i32_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CHECK: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 5) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 5) $vgpr0 = COPY %1 ... --- @@ -39,11 +39,11 @@ body: | ; CHECK-LABEL: name: test_sextload_private_i31_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s31) = G_SEXTLOAD %0 :: (load 1, addrspace 5) + %1:_(s31) = G_SEXTLOAD %0 :: (load (s8), addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -55,11 +55,11 @@ body: | ; CHECK-LABEL: name: test_sextload_private_i64_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 1, addrspace 5) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s8), addrspace 5) $vgpr0_vgpr1 = COPY %1 ... --- @@ -70,11 +70,11 @@ body: | ; CHECK-LABEL: name: test_sextload_private_i64_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 2, addrspace 5) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s16), addrspace 5) $vgpr0_vgpr1 = COPY %1 ... --- @@ -85,10 +85,10 @@ body: | ; CHECK-LABEL: name: test_sextload_private_i64_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_SEXTLOAD %0 :: (load 4, addrspace 5) + %1:_(s64) = G_SEXTLOAD %0 :: (load (s32), addrspace 5) $vgpr0_vgpr1 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 1b27d9ee3fd18..c5cc75c83de6d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -17,32 +17,32 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) ; CI-LABEL: name: test_store_global_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) ; VI-LABEL: name: test_store_global_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) ; GFX9-LABEL: name: test_store_global_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s1) = G_TRUNC %1 - G_STORE %2, %0 :: (store 1, align 1, addrspace 1) + G_STORE %2, %0 :: (store (s1), align 1, addrspace 1) ... --- @@ -55,26 +55,26 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) ; CI-LABEL: name: test_store_global_s7_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) ; VI-LABEL: name: test_store_global_s7_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) ; GFX9-LABEL: name: test_store_global_s7_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s7) = G_TRUNC %1 - G_STORE %2, %0 :: (store 1, align 1, addrspace 1) + G_STORE %2, %0 :: (store (s7), align 1, addrspace 1) ... --- @@ -87,26 +87,26 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; CI-LABEL: name: test_store_global_s8_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_store_global_s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; GFX9-LABEL: name: test_store_global_s8_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s8) = G_TRUNC %1 - G_STORE %2, %0 :: (store 1, align 1, addrspace 1) + G_STORE %2, %0 :: (store (s8), align 1, addrspace 1) ... --- @@ -125,16 +125,16 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; CI-LABEL: name: test_store_global_s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 1, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -145,20 +145,20 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; GFX9-LABEL: name: test_store_global_s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 - G_STORE %2, %0 :: (store 2, align 1, addrspace 1) + G_STORE %2, %0 :: (store (s16), align 1, addrspace 1) ... --- @@ -171,26 +171,26 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; CI-LABEL: name: test_store_global_s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-LABEL: name: test_store_global_s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; GFX9-LABEL: name: test_store_global_s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 - G_STORE %2, %0 :: (store 2, align 2, addrspace 1) + G_STORE %2, %0 :: (store (s16), align 2, addrspace 1) ... --- @@ -203,26 +203,26 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 - G_STORE %2, %0 :: (store 2, align 4, addrspace 1) + G_STORE %2, %0 :: (store (s16), align 4, addrspace 1) ... --- @@ -239,8 +239,8 @@ body: | ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) - ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -249,8 +249,8 @@ body: | ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) - ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -259,8 +259,8 @@ body: | ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) - ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s24_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -269,12 +269,12 @@ body: | ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) - ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s24) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, align 4, addrspace 1) + G_STORE %2, %0 :: (store (s24), align 4, addrspace 1) ... --- @@ -295,11 +295,11 @@ body: | ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -308,8 +308,8 @@ body: | ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) - ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -322,11 +322,11 @@ body: | ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s24_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -335,12 +335,12 @@ body: | ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) - ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) + ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s24) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, align 2, addrspace 1) + G_STORE %2, %0 :: (store (s24), align 2, addrspace 1) ... --- @@ -366,15 +366,15 @@ body: | ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; SI: G_STORE [[COPY6]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_s24_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -383,8 +383,8 @@ body: | ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 1, addrspace 1) - ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_s24_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -400,15 +400,15 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s24_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -417,12 +417,12 @@ body: | ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 1, addrspace 1) - ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 1, addrspace 1) + ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s24) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, align 1, addrspace 1) + G_STORE %2, %0 :: (store (s24), align 1, addrspace 1) ... --- @@ -435,26 +435,26 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) ; CI-LABEL: name: test_store_global_s25_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) ; VI-LABEL: name: test_store_global_s25_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) ; GFX9-LABEL: name: test_store_global_s25_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s25) = G_TRUNC %1 - G_STORE %2, %0 :: (store 4, align 4, addrspace 1) + G_STORE %2, %0 :: (store (s25), align 4, addrspace 1) ... # --- @@ -497,23 +497,23 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; CI-LABEL: name: test_store_global_s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -524,26 +524,26 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; GFX9-LABEL: name: test_store_global_s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 1, addrspace 1) + G_STORE %1, %0 :: (store (s32), align 1, addrspace 1) ... --- @@ -558,33 +558,33 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 2, addrspace 1) + G_STORE %1, %0 :: (store (s32), align 2, addrspace 1) ... --- @@ -596,22 +596,22 @@ body: | ; SI-LABEL: name: test_store_global_s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; CI-LABEL: name: test_store_global_s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; CI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; VI-LABEL: name: test_store_global_s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; GFX9-LABEL: name: test_store_global_s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 4, addrspace 1) + G_STORE %1, %0 :: (store (s32), align 4, addrspace 1) ... --- @@ -631,23 +631,23 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; CI-LABEL: name: test_store_global_p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 @@ -659,26 +659,26 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; GFX9-LABEL: name: test_store_global_p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 1, addrspace 1) + G_STORE %1, %0 :: (store (p3), align 1, addrspace 1) ... --- @@ -694,15 +694,15 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; CI-LABEL: name: test_store_global_p3_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 @@ -710,18 +710,18 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; GFX9-LABEL: name: test_store_global_p3_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 2, addrspace 1) + G_STORE %1, %0 :: (store (p3), align 2, addrspace 1) ... --- @@ -733,22 +733,22 @@ body: | ; SI-LABEL: name: test_store_global_p3_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; SI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) ; CI-LABEL: name: test_store_global_p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; CI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) ; VI-LABEL: name: test_store_global_p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; VI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) ; GFX9-LABEL: name: test_store_global_p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, align 4, addrspace 1) + G_STORE %1, %0 :: (store (p3), align 4, addrspace 1) ... --- @@ -771,29 +771,29 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C2]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) - ; SI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s16) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[COPY8]](s32) ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; CI-LABEL: name: test_store_global_s48_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -803,9 +803,9 @@ body: | ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 1, addrspace 1) + ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 4, align 1, addrspace 1) + ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s48_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -820,28 +820,28 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C2]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) - ; VI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[C7]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; GFX9-LABEL: name: test_store_global_s48_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -851,13 +851,13 @@ body: | ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 1, addrspace 1) + ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 4, align 1, addrspace 1) + ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %1 - G_STORE %2, %0 :: (store 6, align 1, addrspace 1) + G_STORE %2, %0 :: (store (s48), align 1, addrspace 1) ... --- @@ -876,15 +876,15 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) - ; SI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; CI-LABEL: name: test_store_global_s48_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -894,9 +894,9 @@ body: | ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 2, addrspace 1) + ; CI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; CI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; CI: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-LABEL: name: test_store_global_s48_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -907,15 +907,15 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) - ; VI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_s48_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -925,13 +925,13 @@ body: | ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) - ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, align 2, addrspace 1) + ; GFX9: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; GFX9: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %1 - G_STORE %2, %0 :: (store 6, align 2, addrspace 1) + G_STORE %2, %0 :: (store (s48), align 2, addrspace 1) ... # --- @@ -943,7 +943,7 @@ body: | # %0:_(p1) = COPY $vgpr0_vgpr1 # %1:_(s64) = COPY $vgpr2_vgpr3 # %2:_(s48) = G_TRUNC %1 -# G_STORE %2, %0 :: (store 6, align 4, addrspace 1) +# G_STORE %2, %0 :: (store (s48), align 4, addrspace 1) # ... # --- @@ -955,7 +955,7 @@ body: | # %0:_(p1) = COPY $vgpr0_vgpr1 # %1:_(s64) = COPY $vgpr2_vgpr3 # %2:_(s48) = G_TRUNC %1 -# G_STORE %2, %0 :: (store 6, align 8, addrspace 1) +# G_STORE %2, %0 :: (store (s48), align 8, addrspace 1) # ... @@ -990,39 +990,39 @@ body: | ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY9]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; CI-LABEL: name: test_store_global_s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -1040,42 +1040,42 @@ body: | ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; GFX9-LABEL: name: test_store_global_s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 1, addrspace 1) + G_STORE %1, %0 :: (store (s64), align 1, addrspace 1) ... --- @@ -1092,23 +1092,23 @@ body: | ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 @@ -1117,26 +1117,26 @@ body: | ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 2, addrspace 1) + G_STORE %1, %0 :: (store (s64), align 2, addrspace 1) ... --- @@ -1148,22 +1148,22 @@ body: | ; SI-LABEL: name: test_store_global_s64_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 4, addrspace 1) + G_STORE %1, %0 :: (store (s64), align 4, addrspace 1) ... --- @@ -1175,22 +1175,22 @@ body: | ; SI-LABEL: name: test_store_global_s64_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) ; CI-LABEL: name: test_store_global_s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) ; VI-LABEL: name: test_store_global_s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) ; GFX9-LABEL: name: test_store_global_s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (s64), align 8, addrspace 1) ... --- @@ -1202,22 +1202,22 @@ body: | ; SI-LABEL: name: test_store_global_s64_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) ; CI-LABEL: name: test_store_global_s64_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; CI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) ; VI-LABEL: name: test_store_global_s64_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s64_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 16, addrspace 1) + G_STORE %1, %0 :: (store (s64), align 16, addrspace 1) ... --- @@ -1251,39 +1251,39 @@ body: | ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY9]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; CI-LABEL: name: test_store_global_p0_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_p0_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 @@ -1301,42 +1301,42 @@ body: | ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; GFX9-LABEL: name: test_store_global_p0_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 1, addrspace 1) + G_STORE %1, %0 :: (store (p0), align 1, addrspace 1) ... --- @@ -1353,23 +1353,23 @@ body: | ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_p0_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_p0_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 @@ -1378,26 +1378,26 @@ body: | ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_p0_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 2, addrspace 1) + G_STORE %1, %0 :: (store (p0), align 2, addrspace 1) ... --- @@ -1409,22 +1409,22 @@ body: | ; SI-LABEL: name: test_store_global_p0_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_p0_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_p0_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_p0_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 4, addrspace 1) + G_STORE %1, %0 :: (store (p0), align 4, addrspace 1) ... --- @@ -1436,22 +1436,22 @@ body: | ; SI-LABEL: name: test_store_global_p0_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) ; CI-LABEL: name: test_store_global_p0_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, addrspace 1) + ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) ; VI-LABEL: name: test_store_global_p0_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) ; GFX9-LABEL: name: test_store_global_p0_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (p0), align 8, addrspace 1) ... --- @@ -1463,22 +1463,22 @@ body: | ; SI-LABEL: name: test_store_global_p0_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) ; CI-LABEL: name: test_store_global_p0_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; CI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) ; VI-LABEL: name: test_store_global_p0_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; VI: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_p0_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p0), [[COPY]](p1) :: (store (p0), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 16, addrspace 1) + G_STORE %1, %0 :: (store (p0), align 16, addrspace 1) ... --- @@ -1512,39 +1512,39 @@ body: | ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY9]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; CI-LABEL: name: test_store_global_p999_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_p999_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 @@ -1562,42 +1562,42 @@ body: | ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; GFX9-LABEL: name: test_store_global_p999_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 1, addrspace 1) + G_STORE %1, %0 :: (store (p999), align 1, addrspace 1) ... --- @@ -1614,23 +1614,23 @@ body: | ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_p999_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_p999_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 @@ -1639,26 +1639,26 @@ body: | ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_p999_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 2, addrspace 1) + G_STORE %1, %0 :: (store (p999), align 2, addrspace 1) ... --- @@ -1670,22 +1670,22 @@ body: | ; SI-LABEL: name: test_store_global_p999_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_p999_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_p999_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_p999_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 4, addrspace 1) + G_STORE %1, %0 :: (store (p999), align 4, addrspace 1) ... --- @@ -1697,22 +1697,22 @@ body: | ; SI-LABEL: name: test_store_global_p999_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) ; CI-LABEL: name: test_store_global_p999_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, addrspace 1) + ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) ; VI-LABEL: name: test_store_global_p999_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) ; GFX9-LABEL: name: test_store_global_p999_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (p999), align 8, addrspace 1) ... --- @@ -1724,22 +1724,22 @@ body: | ; SI-LABEL: name: test_store_global_p999_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) ; CI-LABEL: name: test_store_global_p999_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; CI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) ; VI-LABEL: name: test_store_global_p999_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; VI: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_p999_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 16, addrspace 1) + G_STORE %1, %0 :: (store (p999), align 16, addrspace 1) ... --- @@ -1759,39 +1759,39 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; CI-LABEL: name: test_store_global_v2s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v2s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -1803,42 +1803,42 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s32>), align 1, addrspace 1) ... --- @@ -1854,23 +1854,23 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_v2s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v2s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -1878,26 +1878,26 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s32>), align 2, addrspace 1) ... --- @@ -1909,22 +1909,22 @@ body: | ; SI-LABEL: name: test_store_global_v2s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v2s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v2s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s32>), align 4, addrspace 1) ... --- @@ -1936,22 +1936,22 @@ body: | ; SI-LABEL: name: test_store_global_v2s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; CI-LABEL: name: test_store_global_v2s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; VI-LABEL: name: test_store_global_v2s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s32>), align 8, addrspace 1) ... --- @@ -1963,22 +1963,22 @@ body: | ; SI-LABEL: name: test_store_global_v2s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v2s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v2s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s32>), align 16, addrspace 1) ... --- @@ -1999,19 +1999,19 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) @@ -2019,20 +2019,20 @@ body: | ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; CI-LABEL: name: test_store_global_v2p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v2p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 @@ -2045,19 +2045,19 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) @@ -2065,23 +2065,23 @@ body: | ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p3>), align 1, addrspace 1) ... --- @@ -2098,24 +2098,24 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_v2p3_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v2p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 @@ -2124,27 +2124,27 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[PTRTOINT]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT1]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[PTRTOINT1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p3_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p3>), align 2, addrspace 1) ... --- @@ -2156,22 +2156,22 @@ body: | ; SI-LABEL: name: test_store_global_v2p3_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v2p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v2p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p3>), align 4, addrspace 1) ... --- @@ -2183,22 +2183,22 @@ body: | ; SI-LABEL: name: test_store_global_v2p3_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) ; CI-LABEL: name: test_store_global_v2p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) ; VI-LABEL: name: test_store_global_v2p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p3>), align 8, addrspace 1) ... --- @@ -2210,22 +2210,22 @@ body: | ; SI-LABEL: name: test_store_global_v2p3_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v2p3_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v2p3_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p3_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x p3>), [[COPY]](p1) :: (store (<2 x p3>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p3>), align 16, addrspace 1) ... --- @@ -2246,11 +2246,11 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) @@ -2258,10 +2258,10 @@ body: | ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) @@ -2269,24 +2269,24 @@ body: | ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; CI-LABEL: name: test_store_global_v4s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v4s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -2299,11 +2299,11 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) @@ -2311,10 +2311,10 @@ body: | ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C3]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) @@ -2322,27 +2322,27 @@ body: | ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C2]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s16>), align 1, addrspace 1) ... --- @@ -2361,23 +2361,23 @@ body: | ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; CI-LABEL: name: test_store_global_v4s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v4s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -2388,26 +2388,26 @@ body: | ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s16>), align 2, addrspace 1) ... --- @@ -2419,22 +2419,22 @@ body: | ; SI-LABEL: name: test_store_global_v4s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v4s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v4s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s16>), align 4, addrspace 1) ... --- @@ -2446,22 +2446,22 @@ body: | ; SI-LABEL: name: test_store_global_v4s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) ; CI-LABEL: name: test_store_global_v4s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) ; VI-LABEL: name: test_store_global_v4s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s16>), align 8, addrspace 1) ... --- @@ -2473,22 +2473,22 @@ body: | ; SI-LABEL: name: test_store_global_v4s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v4s16_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v4s16_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; VI: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s16_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s16>), [[COPY]](p1) :: (store (<4 x s16>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s16>), align 16, addrspace 1) ... --- @@ -2510,55 +2510,55 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -2572,58 +2572,58 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; GFX9-LABEL: name: test_store_global_v3s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<3 x s32>), align 1, addrspace 1) ... --- @@ -2641,31 +2641,31 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -2675,34 +2675,34 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; GFX9-LABEL: name: test_store_global_v3s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<3 x s32>), align 2, addrspace 1) ... --- @@ -2716,25 +2716,25 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v3s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<3 x s32>), align 4, addrspace 1) ... --- @@ -2748,25 +2748,25 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (s64), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 8, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 8, addrspace 1) + ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 8, addrspace 1) + ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v3s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<3 x s32>), align 8, addrspace 1) ... --- @@ -2780,25 +2780,25 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 8, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; CI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v3s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<3 x s32>), align 16, addrspace 1) ... --- @@ -2818,71 +2818,71 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; CI-LABEL: name: test_store_global_v4s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v4s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -2894,74 +2894,74 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s32>), align 1, addrspace 1) ... --- @@ -2978,39 +2978,39 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v4s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v4s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3019,42 +3019,42 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s32>), align 2, addrspace 1) ... --- @@ -3066,22 +3066,22 @@ body: | ; SI-LABEL: name: test_store_global_v4s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v4s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v4s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s32>), align 4, addrspace 1) ... --- @@ -3093,22 +3093,22 @@ body: | ; SI-LABEL: name: test_store_global_v4s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v4s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v4s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s32>), align 8, addrspace 1) ... --- @@ -3120,22 +3120,22 @@ body: | ; SI-LABEL: name: test_store_global_v4s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-LABEL: name: test_store_global_v4s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-LABEL: name: test_store_global_v4s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-LABEL: name: test_store_global_v4s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<4 x s32>), align 16, addrspace 1) ... --- @@ -3170,35 +3170,35 @@ body: | ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY9]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) @@ -3221,32 +3221,32 @@ body: | ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C2]] ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY23]](s32) ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY25]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY25]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY26]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY26]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY27]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY27]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY28]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY28]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY29]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY29]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s64) ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY30]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY30]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C8]](s64) ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY31]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY31]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C9]](s64) ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY32]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY32]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; CI-LABEL: name: test_store_global_v2s64_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v2s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3265,35 +3265,35 @@ body: | ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) @@ -3308,35 +3308,35 @@ body: | ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) ; VI: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; VI: G_STORE [[ANYEXT4]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[ANYEXT4]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; VI: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s64) ; VI: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; VI: G_STORE [[ANYEXT6]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[ANYEXT6]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C7]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C8]](s64) ; VI: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; VI: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s64>), align 1, addrspace 1) ... --- @@ -3354,39 +3354,39 @@ body: | ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v2s64_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v2s64_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3396,42 +3396,42 @@ body: | ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s64_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s64>), align 2, addrspace 1) ... --- @@ -3443,22 +3443,22 @@ body: | ; SI-LABEL: name: test_store_global_v2s64_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v2s64_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v2s64_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s64_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s64>), align 4, addrspace 1) ... --- @@ -3470,22 +3470,22 @@ body: | ; SI-LABEL: name: test_store_global_v2s64_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v2s64_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v2s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s64>), align 8, addrspace 1) ... --- @@ -3497,22 +3497,22 @@ body: | ; SI-LABEL: name: test_store_global_v2s64_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) ; CI-LABEL: name: test_store_global_v2s64_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) ; VI-LABEL: name: test_store_global_v2s64_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s64_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s64>), align 16, addrspace 1) ... --- @@ -3533,72 +3533,72 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; CI-LABEL: name: test_store_global_v8s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v8s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3611,75 +3611,75 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s16>), align 1, addrspace 1) ... --- @@ -3697,40 +3697,40 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v8s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v8s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3740,43 +3740,43 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s16>), align 2, addrspace 1) ... --- @@ -3789,25 +3789,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v8s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v8s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s16>), align 4, addrspace 1) ... --- @@ -3820,25 +3820,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v8s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v8s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s16>), align 8, addrspace 1) ... --- @@ -3851,25 +3851,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) ; CI-LABEL: name: test_store_global_v8s16_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) ; VI-LABEL: name: test_store_global_v8s16_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s16_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<8 x s16>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1) ... --- @@ -3890,72 +3890,72 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; CI-LABEL: name: test_store_global_v2p0_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v2p0_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -3968,75 +3968,75 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p0_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p0>), align 1, addrspace 1) ... --- @@ -4054,40 +4054,40 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_v2p0_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v2p0_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4097,43 +4097,43 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p0_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p0>), align 2, addrspace 1) ... --- @@ -4146,25 +4146,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v2p0_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v2p0_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p0_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p0>), align 4, addrspace 1) ... --- @@ -4177,25 +4177,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v2p0_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v2p0_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p0_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p0>), align 8, addrspace 1) ... --- @@ -4208,25 +4208,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), addrspace 1) ; CI-LABEL: name: test_store_global_v2p0_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), addrspace 1) ; VI-LABEL: name: test_store_global_v2p0_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), addrspace 1) ; GFX9-LABEL: name: test_store_global_v2p0_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x p0>) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<2 x p0>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x p0>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<2 x p0>), align 16, addrspace 1) ... --- @@ -4249,56 +4249,56 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 1, addrspace 1) + ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s96_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4313,59 +4313,59 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; GFX9-LABEL: name: test_store_global_s96_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 1, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 1, addrspace 1) + G_STORE %1, %0 :: (store (s96), align 1, addrspace 1) ... --- @@ -4384,32 +4384,32 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 2, addrspace 1) + ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s96_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4420,35 +4420,35 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; GFX9-LABEL: name: test_store_global_s96_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 2, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 2, addrspace 1) + G_STORE %1, %0 :: (store (s96), align 2, addrspace 1) ... --- @@ -4463,28 +4463,28 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_s96_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_s96_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 4, addrspace 1) + G_STORE %1, %0 :: (store (s96), align 4, addrspace 1) ... --- @@ -4499,28 +4499,28 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (s64), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 8, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 8, addrspace 1) + ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 8, addrspace 1) ; VI-LABEL: name: test_store_global_s96_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 8, addrspace 1) + ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_s96_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 8, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 8, addrspace 1) + G_STORE %1, %0 :: (store (s96), align 8, addrspace 1) ... --- @@ -4535,28 +4535,28 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (s64), align 16, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 8, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; CI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; CI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) ; VI-LABEL: name: test_store_global_s96_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s96_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store 12, align 16, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<3 x s32>), [[COPY]](p1) :: (store (s96), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 16, addrspace 1) + G_STORE %1, %0 :: (store (s96), align 16, addrspace 1) ... --- @@ -4577,72 +4577,72 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; CI-LABEL: name: test_store_global_s128_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s128_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4655,75 +4655,75 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; GFX9-LABEL: name: test_store_global_s128_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 1, addrspace 1) + G_STORE %1, %0 :: (store (s128), align 1, addrspace 1) ... --- @@ -4741,40 +4741,40 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; CI-LABEL: name: test_store_global_s128_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s128_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -4784,43 +4784,43 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; GFX9-LABEL: name: test_store_global_s128_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 2, addrspace 1) + G_STORE %1, %0 :: (store (s128), align 2, addrspace 1) ... --- @@ -4833,25 +4833,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; CI-LABEL: name: test_store_global_s128_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_s128_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_s128_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 4, addrspace 1) + G_STORE %1, %0 :: (store (s128), align 4, addrspace 1) ... --- @@ -4864,25 +4864,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s128_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; VI-LABEL: name: test_store_global_s128_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_s128_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 8, addrspace 1) + G_STORE %1, %0 :: (store (s128), align 8, addrspace 1) ... --- @@ -4895,25 +4895,25 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; CI-LABEL: name: test_store_global_s128_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI-LABEL: name: test_store_global_s128_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9-LABEL: name: test_store_global_s128_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + G_STORE %1, %0 :: (store (s128), align 16, addrspace 1) ... --- @@ -4935,92 +4935,92 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -5034,95 +5034,95 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<5 x s32>), align 1, addrspace 1) ... --- @@ -5141,52 +5141,52 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -5197,55 +5197,55 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<5 x s32>), align 2, addrspace 1) ... --- @@ -5259,40 +5259,40 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<5 x s32>), align 4, addrspace 1) ... --- @@ -5306,40 +5306,40 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<5 x s32>), align 8, addrspace 1) ... --- @@ -5353,40 +5353,40 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<5 x s32>), align 16, addrspace 1) ... --- @@ -5408,93 +5408,93 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -5509,96 +5509,96 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<5 x p3>), align 1, addrspace 1) ... --- @@ -5618,53 +5618,53 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -5676,56 +5676,56 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<5 x p3>), align 2, addrspace 1) ... --- @@ -5740,43 +5740,43 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<5 x p3>), align 4, addrspace 1) ... --- @@ -5791,43 +5791,43 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<5 x p3>), align 8, addrspace 1) ... --- @@ -5842,43 +5842,43 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<5 x p3>), align 16, addrspace 1) ... --- @@ -5893,43 +5893,43 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v10s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v10s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v10s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<10 x s16>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store 20, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<10 x s16>), align 16, addrspace 1) ... --- @@ -5941,22 +5941,42 @@ body: | ; SI-LABEL: name: test_store_global_v11s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[DEF:%[0-9]+]]:_(<11 x s16>) = G_IMPLICIT_DEF - ; SI: G_STORE [[DEF]](<11 x s16>), [[COPY]](p1) :: (store 20, align 16, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[DEF]](<11 x s16>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<11 x s16>), 128 + ; SI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store (s128), addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: G_STORE [[EXTRACT1]](<3 x s16>), [[PTR_ADD]](p1) :: (store (s48) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v11s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[DEF:%[0-9]+]]:_(<11 x s16>) = G_IMPLICIT_DEF - ; CI: G_STORE [[DEF]](<11 x s16>), [[COPY]](p1) :: (store 20, align 16, addrspace 1) + ; CI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[DEF]](<11 x s16>), 0 + ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<11 x s16>), 128 + ; CI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store (s128), addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: G_STORE [[EXTRACT1]](<3 x s16>), [[PTR_ADD]](p1) :: (store (s48) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v11s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<11 x s16>) = G_IMPLICIT_DEF - ; VI: G_STORE [[DEF]](<11 x s16>), [[COPY]](p1) :: (store 20, align 16, addrspace 1) + ; VI: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[DEF]](<11 x s16>), 0 + ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<11 x s16>), 128 + ; VI: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store (s128), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: G_STORE [[EXTRACT1]](<3 x s16>), [[PTR_ADD]](p1) :: (store (s48) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v11s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[DEF:%[0-9]+]]:_(<11 x s16>) = G_IMPLICIT_DEF - ; GFX9: G_STORE [[DEF]](<11 x s16>), [[COPY]](p1) :: (store 20, align 16, addrspace 1) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[DEF]](<11 x s16>), 0 + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<11 x s16>), 128 + ; GFX9: G_STORE [[EXTRACT]](<8 x s16>), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: G_STORE [[EXTRACT1]](<3 x s16>), [[PTR_ADD]](p1) :: (store (s48) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<11 x s16>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store 20, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1) ... --- @@ -5968,22 +5988,46 @@ body: | ; SI-LABEL: name: test_store_global_v12s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; SI: G_STORE [[DEF]](<12 x s16>), [[COPY]](p1) :: (store 20, align 16, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (s64) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; CI: G_STORE [[DEF]](<12 x s16>), [[COPY]](p1) :: (store 20, align 16, addrspace 1) + ; CI: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) + ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 + ; CI: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CI: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (s64) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; VI: G_STORE [[DEF]](<12 x s16>), [[COPY]](p1) :: (store 20, align 16, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 + ; VI: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; VI: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (s64) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; GFX9: G_STORE [[DEF]](<12 x s16>), [[COPY]](p1) :: (store 20, align 16, addrspace 1) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX9: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (s64) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<12 x s16>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store 20, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1) ... --- @@ -6006,93 +6050,93 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -6107,96 +6151,96 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C1]](s32) ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C2]](s32) ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 1, addrspace 1) + G_STORE %1, %0 :: (store (s160), align 1, addrspace 1) ... --- @@ -6216,53 +6260,53 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -6274,56 +6318,56 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 2, addrspace 1) + G_STORE %1, %0 :: (store (s160), align 2, addrspace 1) ... --- @@ -6338,43 +6382,43 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 4, addrspace 1) + G_STORE %1, %0 :: (store (s160), align 4, addrspace 1) ... --- @@ -6389,43 +6433,43 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 8, addrspace 1) + G_STORE %1, %0 :: (store (s160), align 8, addrspace 1) ... --- @@ -6440,43 +6484,43 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 16, align 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - G_STORE %1, %0 :: (store 20, align 16, addrspace 1) + G_STORE %1, %0 :: (store (s160), align 16, addrspace 1) ... --- @@ -6497,67 +6541,67 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) @@ -6565,69 +6609,69 @@ body: | ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI: G_STORE [[COPY22]](s32), [[PTR_ADD19]](p1) :: (store 1 into unknown-address + 20, addrspace 1) + ; SI: G_STORE [[COPY22]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; SI: G_STORE [[COPY23]](s32), [[PTR_ADD20]](p1) :: (store 1 into unknown-address + 21, addrspace 1) + ; SI: G_STORE [[COPY23]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; SI: G_STORE [[COPY24]](s32), [[PTR_ADD21]](p1) :: (store 1 into unknown-address + 22, addrspace 1) + ; SI: G_STORE [[COPY24]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64) ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; SI: G_STORE [[COPY25]](s32), [[PTR_ADD22]](p1) :: (store 1 into unknown-address + 23, addrspace 1) + ; SI: G_STORE [[COPY25]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32) ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32) ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI: G_STORE [[COPY26]](s32), [[PTR_ADD23]](p1) :: (store 1 into unknown-address + 24, addrspace 1) + ; SI: G_STORE [[COPY26]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; SI: G_STORE [[COPY27]](s32), [[PTR_ADD24]](p1) :: (store 1 into unknown-address + 25, addrspace 1) + ; SI: G_STORE [[COPY27]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; SI: G_STORE [[COPY28]](s32), [[PTR_ADD25]](p1) :: (store 1 into unknown-address + 26, addrspace 1) + ; SI: G_STORE [[COPY28]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; SI: G_STORE [[COPY29]](s32), [[PTR_ADD26]](p1) :: (store 1 into unknown-address + 27, addrspace 1) + ; SI: G_STORE [[COPY29]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64) ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32) ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32) ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32) ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI: G_STORE [[COPY30]](s32), [[PTR_ADD27]](p1) :: (store 1 into unknown-address + 28, addrspace 1) + ; SI: G_STORE [[COPY30]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; SI: G_STORE [[COPY31]](s32), [[PTR_ADD28]](p1) :: (store 1 into unknown-address + 29, addrspace 1) + ; SI: G_STORE [[COPY31]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; SI: G_STORE [[COPY32]](s32), [[PTR_ADD29]](p1) :: (store 1 into unknown-address + 30, addrspace 1) + ; SI: G_STORE [[COPY32]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64) ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; SI: G_STORE [[COPY33]](s32), [[PTR_ADD30]](p1) :: (store 1 into unknown-address + 31, addrspace 1) + ; SI: G_STORE [[COPY33]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; CI-LABEL: name: test_store_global_v8s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 1, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v8s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -6640,67 +6684,67 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) @@ -6708,72 +6752,72 @@ body: | ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI: G_STORE [[COPY22]](s32), [[PTR_ADD19]](p1) :: (store 1 into unknown-address + 20, addrspace 1) + ; VI: G_STORE [[COPY22]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; VI: G_STORE [[COPY23]](s32), [[PTR_ADD20]](p1) :: (store 1 into unknown-address + 21, addrspace 1) + ; VI: G_STORE [[COPY23]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; VI: G_STORE [[COPY24]](s32), [[PTR_ADD21]](p1) :: (store 1 into unknown-address + 22, addrspace 1) + ; VI: G_STORE [[COPY24]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64) ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; VI: G_STORE [[COPY25]](s32), [[PTR_ADD22]](p1) :: (store 1 into unknown-address + 23, addrspace 1) + ; VI: G_STORE [[COPY25]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32) ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32) ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI: G_STORE [[COPY26]](s32), [[PTR_ADD23]](p1) :: (store 1 into unknown-address + 24, addrspace 1) + ; VI: G_STORE [[COPY26]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; VI: G_STORE [[COPY27]](s32), [[PTR_ADD24]](p1) :: (store 1 into unknown-address + 25, addrspace 1) + ; VI: G_STORE [[COPY27]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; VI: G_STORE [[COPY28]](s32), [[PTR_ADD25]](p1) :: (store 1 into unknown-address + 26, addrspace 1) + ; VI: G_STORE [[COPY28]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; VI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; VI: G_STORE [[COPY29]](s32), [[PTR_ADD26]](p1) :: (store 1 into unknown-address + 27, addrspace 1) + ; VI: G_STORE [[COPY29]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64) ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32) ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32) ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32) ; VI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI: G_STORE [[COPY30]](s32), [[PTR_ADD27]](p1) :: (store 1 into unknown-address + 28, addrspace 1) + ; VI: G_STORE [[COPY30]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) ; VI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; VI: G_STORE [[COPY31]](s32), [[PTR_ADD28]](p1) :: (store 1 into unknown-address + 29, addrspace 1) + ; VI: G_STORE [[COPY31]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; VI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; VI: G_STORE [[COPY32]](s32), [[PTR_ADD29]](p1) :: (store 1 into unknown-address + 30, addrspace 1) + ; VI: G_STORE [[COPY32]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; VI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64) ; VI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; VI: G_STORE [[COPY33]](s32), [[PTR_ADD30]](p1) :: (store 1 into unknown-address + 31, addrspace 1) + ; VI: G_STORE [[COPY33]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 1, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 1, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s32>), align 1, addrspace 1) ... --- @@ -6791,75 +6835,75 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV11]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 2 into unknown-address + 20, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 2 into unknown-address + 22, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 2 into unknown-address + 24, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 2 into unknown-address + 26, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 2 into unknown-address + 28, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 2 into unknown-address + 30, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; CI-LABEL: name: test_store_global_v8s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 2, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v8s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -6869,78 +6913,78 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) ; VI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV11]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 2 into unknown-address + 20, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 2 into unknown-address + 22, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 2 into unknown-address + 24, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 2 into unknown-address + 26, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 2 into unknown-address + 28, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 2 into unknown-address + 30, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 2, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 2, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s32>), align 2, addrspace 1) ... --- @@ -6953,37 +6997,37 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v8s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v8s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s32>), align 4, addrspace 1) ... --- @@ -6996,37 +7040,37 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v8s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v8s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 8, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s32>), align 8, addrspace 1) ... --- @@ -7039,37 +7083,37 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v8s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v8s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 16, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s32>), align 16, addrspace 1) ... --- @@ -7083,40 +7127,40 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>) ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v2s128_align32 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>) ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v2s128_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>) ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v2s128_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>) ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 32, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s128>), align 32, addrspace 1) ... --- @@ -7138,67 +7182,67 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) @@ -7206,70 +7250,70 @@ body: | ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI: G_STORE [[COPY22]](s32), [[PTR_ADD19]](p1) :: (store 1 into unknown-address + 20, addrspace 1) + ; SI: G_STORE [[COPY22]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; SI: G_STORE [[COPY23]](s32), [[PTR_ADD20]](p1) :: (store 1 into unknown-address + 21, addrspace 1) + ; SI: G_STORE [[COPY23]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; SI: G_STORE [[COPY24]](s32), [[PTR_ADD21]](p1) :: (store 1 into unknown-address + 22, addrspace 1) + ; SI: G_STORE [[COPY24]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64) ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; SI: G_STORE [[COPY25]](s32), [[PTR_ADD22]](p1) :: (store 1 into unknown-address + 23, addrspace 1) + ; SI: G_STORE [[COPY25]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32) ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32) ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI: G_STORE [[COPY26]](s32), [[PTR_ADD23]](p1) :: (store 1 into unknown-address + 24, addrspace 1) + ; SI: G_STORE [[COPY26]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; SI: G_STORE [[COPY27]](s32), [[PTR_ADD24]](p1) :: (store 1 into unknown-address + 25, addrspace 1) + ; SI: G_STORE [[COPY27]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; SI: G_STORE [[COPY28]](s32), [[PTR_ADD25]](p1) :: (store 1 into unknown-address + 26, addrspace 1) + ; SI: G_STORE [[COPY28]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; SI: G_STORE [[COPY29]](s32), [[PTR_ADD26]](p1) :: (store 1 into unknown-address + 27, addrspace 1) + ; SI: G_STORE [[COPY29]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64) ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32) ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32) ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32) ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI: G_STORE [[COPY30]](s32), [[PTR_ADD27]](p1) :: (store 1 into unknown-address + 28, addrspace 1) + ; SI: G_STORE [[COPY30]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; SI: G_STORE [[COPY31]](s32), [[PTR_ADD28]](p1) :: (store 1 into unknown-address + 29, addrspace 1) + ; SI: G_STORE [[COPY31]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; SI: G_STORE [[COPY32]](s32), [[PTR_ADD29]](p1) :: (store 1 into unknown-address + 30, addrspace 1) + ; SI: G_STORE [[COPY32]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64) ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; SI: G_STORE [[COPY33]](s32), [[PTR_ADD30]](p1) :: (store 1 into unknown-address + 31, addrspace 1) + ; SI: G_STORE [[COPY33]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; CI-LABEL: name: test_store_global_s256_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 1, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s256_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -7283,67 +7327,67 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) @@ -7351,73 +7395,73 @@ body: | ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI: G_STORE [[COPY22]](s32), [[PTR_ADD19]](p1) :: (store 1 into unknown-address + 20, addrspace 1) + ; VI: G_STORE [[COPY22]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; VI: G_STORE [[COPY23]](s32), [[PTR_ADD20]](p1) :: (store 1 into unknown-address + 21, addrspace 1) + ; VI: G_STORE [[COPY23]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; VI: G_STORE [[COPY24]](s32), [[PTR_ADD21]](p1) :: (store 1 into unknown-address + 22, addrspace 1) + ; VI: G_STORE [[COPY24]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64) ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; VI: G_STORE [[COPY25]](s32), [[PTR_ADD22]](p1) :: (store 1 into unknown-address + 23, addrspace 1) + ; VI: G_STORE [[COPY25]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C1]](s32) ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32) ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI: G_STORE [[COPY26]](s32), [[PTR_ADD23]](p1) :: (store 1 into unknown-address + 24, addrspace 1) + ; VI: G_STORE [[COPY26]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; VI: G_STORE [[COPY27]](s32), [[PTR_ADD24]](p1) :: (store 1 into unknown-address + 25, addrspace 1) + ; VI: G_STORE [[COPY27]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; VI: G_STORE [[COPY28]](s32), [[PTR_ADD25]](p1) :: (store 1 into unknown-address + 26, addrspace 1) + ; VI: G_STORE [[COPY28]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; VI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; VI: G_STORE [[COPY29]](s32), [[PTR_ADD26]](p1) :: (store 1 into unknown-address + 27, addrspace 1) + ; VI: G_STORE [[COPY29]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64) ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32) ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C1]](s32) ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C2]](s32) ; VI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI: G_STORE [[COPY30]](s32), [[PTR_ADD27]](p1) :: (store 1 into unknown-address + 28, addrspace 1) + ; VI: G_STORE [[COPY30]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) ; VI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; VI: G_STORE [[COPY31]](s32), [[PTR_ADD28]](p1) :: (store 1 into unknown-address + 29, addrspace 1) + ; VI: G_STORE [[COPY31]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; VI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; VI: G_STORE [[COPY32]](s32), [[PTR_ADD29]](p1) :: (store 1 into unknown-address + 30, addrspace 1) + ; VI: G_STORE [[COPY32]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; VI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64) ; VI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; VI: G_STORE [[COPY33]](s32), [[PTR_ADD30]](p1) :: (store 1 into unknown-address + 31, addrspace 1) + ; VI: G_STORE [[COPY33]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; GFX9-LABEL: name: test_store_global_s256_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 1, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 1, addrspace 1) + G_STORE %1, %0 :: (store (s256), align 1, addrspace 1) ... --- @@ -7436,76 +7480,76 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV11]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 2 into unknown-address + 20, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 2 into unknown-address + 22, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 2 into unknown-address + 24, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 2 into unknown-address + 26, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 2 into unknown-address + 28, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 2 into unknown-address + 30, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; CI-LABEL: name: test_store_global_s256_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 2, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s256_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 @@ -7516,79 +7560,79 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) ; VI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV8]](<2 x s32>) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV11]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store 2 into unknown-address + 20, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store 2 into unknown-address + 22, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV9]](<2 x s32>) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store 2 into unknown-address + 24, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store 2 into unknown-address + 26, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV13]], [[C]](s32) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store 2 into unknown-address + 28, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store 2 into unknown-address + 30, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; GFX9-LABEL: name: test_store_global_s256_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 2, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 2, addrspace 1) + G_STORE %1, %0 :: (store (s256), align 2, addrspace 1) ... --- @@ -7602,40 +7646,40 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; CI-LABEL: name: test_store_global_s256_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; VI-LABEL: name: test_store_global_s256_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_s256_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 4, addrspace 1) + G_STORE %1, %0 :: (store (s256), align 4, addrspace 1) ... --- @@ -7649,40 +7693,40 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s256_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_s256_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_s256_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 8, addrspace 1) + G_STORE %1, %0 :: (store (s256), align 8, addrspace 1) ... --- @@ -7696,40 +7740,40 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_s256_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_s256_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s256_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 16, addrspace 1) + G_STORE %1, %0 :: (store (s256), align 16, addrspace 1) ... --- @@ -7743,40 +7787,40 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_s256_align32 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_s256_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s256_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 32, addrspace 1) + G_STORE %1, %0 :: (store (s256), align 32, addrspace 1) ... --- @@ -7789,37 +7833,37 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v8s32_align32 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v8s32_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v8s32_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1) + ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 32, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store 32, align 32, addrspace 1) + G_STORE %1, %0 :: (store (<8 x s32>), align 32, addrspace 1) ... --- @@ -7845,67 +7889,67 @@ body: | ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) @@ -7913,77 +7957,77 @@ body: | ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; SI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; SI: G_STORE [[COPY22]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY22]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; SI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; SI: G_STORE [[COPY23]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; SI: G_STORE [[COPY23]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) ; SI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY24]](s32), [[PTR_ADD19]](p1) :: (store 1 into unknown-address + 20, addrspace 1) + ; SI: G_STORE [[COPY24]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) ; SI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; SI: G_STORE [[COPY25]](s32), [[PTR_ADD20]](p1) :: (store 1 into unknown-address + 21, addrspace 1) + ; SI: G_STORE [[COPY25]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; SI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; SI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; SI: G_STORE [[COPY26]](s32), [[PTR_ADD21]](p1) :: (store 1 into unknown-address + 22, addrspace 1) + ; SI: G_STORE [[COPY26]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64) ; SI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; SI: G_STORE [[COPY27]](s32), [[PTR_ADD22]](p1) :: (store 1 into unknown-address + 23, addrspace 1) + ; SI: G_STORE [[COPY27]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) ; SI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; SI: G_STORE [[COPY28]](s32), [[PTR_ADD23]](p1) :: (store 1 into unknown-address + 24, addrspace 1) + ; SI: G_STORE [[COPY28]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; SI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) ; SI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; SI: G_STORE [[COPY29]](s32), [[PTR_ADD24]](p1) :: (store 1 into unknown-address + 25, addrspace 1) + ; SI: G_STORE [[COPY29]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; SI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; SI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; SI: G_STORE [[COPY30]](s32), [[PTR_ADD25]](p1) :: (store 1 into unknown-address + 26, addrspace 1) + ; SI: G_STORE [[COPY30]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) ; SI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; SI: G_STORE [[COPY31]](s32), [[PTR_ADD26]](p1) :: (store 1 into unknown-address + 27, addrspace 1) + ; SI: G_STORE [[COPY31]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64) ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) ; SI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; SI: G_STORE [[COPY32]](s32), [[PTR_ADD27]](p1) :: (store 1 into unknown-address + 28, addrspace 1) + ; SI: G_STORE [[COPY32]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; SI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) ; SI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; SI: G_STORE [[COPY33]](s32), [[PTR_ADD28]](p1) :: (store 1 into unknown-address + 29, addrspace 1) + ; SI: G_STORE [[COPY33]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; SI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; SI: [[COPY34:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; SI: G_STORE [[COPY34]](s32), [[PTR_ADD29]](p1) :: (store 1 into unknown-address + 30, addrspace 1) + ; SI: G_STORE [[COPY34]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; SI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64) ; SI: [[COPY35:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; SI: G_STORE [[COPY35]](s32), [[PTR_ADD30]](p1) :: (store 1 into unknown-address + 31, addrspace 1) + ; SI: G_STORE [[COPY35]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; SI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) ; SI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32) ; SI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C1]](s32) ; SI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C2]](s32) ; SI: [[COPY36:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) - ; SI: G_STORE [[COPY36]](s32), [[PTR_ADD31]](p1) :: (store 1 into unknown-address + 32, addrspace 1) + ; SI: G_STORE [[COPY36]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) ; SI: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64) ; SI: [[COPY37:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) - ; SI: G_STORE [[COPY37]](s32), [[PTR_ADD32]](p1) :: (store 1 into unknown-address + 33, addrspace 1) + ; SI: G_STORE [[COPY37]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) ; SI: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64) ; SI: [[COPY38:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) - ; SI: G_STORE [[COPY38]](s32), [[PTR_ADD33]](p1) :: (store 1 into unknown-address + 34, addrspace 1) + ; SI: G_STORE [[COPY38]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) ; SI: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C5]](s64) ; SI: [[COPY39:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) - ; SI: G_STORE [[COPY39]](s32), [[PTR_ADD34]](p1) :: (store 1 into unknown-address + 35, addrspace 1) + ; SI: G_STORE [[COPY39]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -7993,13 +8037,13 @@ body: | ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 1, addrspace 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 1, addrspace 1) + ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8017,67 +8061,67 @@ body: | ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD3]](p1) :: (store 1 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C3]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store 1 into unknown-address + 5, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C4]](s64) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD5]](p1) :: (store 1 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD5]](p1) :: (store (s8) into unknown-address + 6, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C5]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD6]](p1) :: (store 1 into unknown-address + 7, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD7]](p1) :: (store 1 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD7]](p1) :: (store (s8) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store 1 into unknown-address + 9, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s8) into unknown-address + 9, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD9]](p1) :: (store 1 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD9]](p1) :: (store (s8) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD10]](p1) :: (store 1 into unknown-address + 11, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) ; VI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD11]](p1) :: (store 1 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD11]](p1) :: (store (s8) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C3]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD12]](p1) :: (store 1 into unknown-address + 13, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD12]](p1) :: (store (s8) into unknown-address + 13, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s64) ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) - ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD13]](p1) :: (store 1 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD13]](p1) :: (store (s8) into unknown-address + 14, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C5]](s64) ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) - ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD14]](p1) :: (store 1 into unknown-address + 15, addrspace 1) + ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) @@ -8085,77 +8129,77 @@ body: | ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) ; VI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD15]](p1) :: (store 1 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD15]](p1) :: (store (s8) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C3]](s64) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) - ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD16]](p1) :: (store 1 into unknown-address + 17, addrspace 1) + ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 17, addrspace 1) ; VI: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) ; VI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) - ; VI: G_STORE [[COPY22]](s32), [[PTR_ADD17]](p1) :: (store 1 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY22]](s32), [[PTR_ADD17]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) ; VI: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) - ; VI: G_STORE [[COPY23]](s32), [[PTR_ADD18]](p1) :: (store 1 into unknown-address + 19, addrspace 1) + ; VI: G_STORE [[COPY23]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) ; VI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) ; VI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) ; VI: [[COPY24:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY24]](s32), [[PTR_ADD19]](p1) :: (store 1 into unknown-address + 20, addrspace 1) + ; VI: G_STORE [[COPY24]](s32), [[PTR_ADD19]](p1) :: (store (s8) into unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C3]](s64) ; VI: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) - ; VI: G_STORE [[COPY25]](s32), [[PTR_ADD20]](p1) :: (store 1 into unknown-address + 21, addrspace 1) + ; VI: G_STORE [[COPY25]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 21, addrspace 1) ; VI: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C4]](s64) ; VI: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) - ; VI: G_STORE [[COPY26]](s32), [[PTR_ADD21]](p1) :: (store 1 into unknown-address + 22, addrspace 1) + ; VI: G_STORE [[COPY26]](s32), [[PTR_ADD21]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C5]](s64) ; VI: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) - ; VI: G_STORE [[COPY27]](s32), [[PTR_ADD22]](p1) :: (store 1 into unknown-address + 23, addrspace 1) + ; VI: G_STORE [[COPY27]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) ; VI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) ; VI: [[COPY28:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) - ; VI: G_STORE [[COPY28]](s32), [[PTR_ADD23]](p1) :: (store 1 into unknown-address + 24, addrspace 1) + ; VI: G_STORE [[COPY28]](s32), [[PTR_ADD23]](p1) :: (store (s8) into unknown-address + 24, addrspace 1) ; VI: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C3]](s64) ; VI: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) - ; VI: G_STORE [[COPY29]](s32), [[PTR_ADD24]](p1) :: (store 1 into unknown-address + 25, addrspace 1) + ; VI: G_STORE [[COPY29]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 25, addrspace 1) ; VI: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) ; VI: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) - ; VI: G_STORE [[COPY30]](s32), [[PTR_ADD25]](p1) :: (store 1 into unknown-address + 26, addrspace 1) + ; VI: G_STORE [[COPY30]](s32), [[PTR_ADD25]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; VI: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C5]](s64) ; VI: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) - ; VI: G_STORE [[COPY31]](s32), [[PTR_ADD26]](p1) :: (store 1 into unknown-address + 27, addrspace 1) + ; VI: G_STORE [[COPY31]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C8]](s64) ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) ; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C1]](s32) ; VI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C2]](s32) ; VI: [[COPY32:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) - ; VI: G_STORE [[COPY32]](s32), [[PTR_ADD27]](p1) :: (store 1 into unknown-address + 28, addrspace 1) + ; VI: G_STORE [[COPY32]](s32), [[PTR_ADD27]](p1) :: (store (s8) into unknown-address + 28, addrspace 1) ; VI: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C3]](s64) ; VI: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) - ; VI: G_STORE [[COPY33]](s32), [[PTR_ADD28]](p1) :: (store 1 into unknown-address + 29, addrspace 1) + ; VI: G_STORE [[COPY33]](s32), [[PTR_ADD28]](p1) :: (store (s8) into unknown-address + 29, addrspace 1) ; VI: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C4]](s64) ; VI: [[COPY34:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) - ; VI: G_STORE [[COPY34]](s32), [[PTR_ADD29]](p1) :: (store 1 into unknown-address + 30, addrspace 1) + ; VI: G_STORE [[COPY34]](s32), [[PTR_ADD29]](p1) :: (store (s8) into unknown-address + 30, addrspace 1) ; VI: [[PTR_ADD30:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C5]](s64) ; VI: [[COPY35:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) - ; VI: G_STORE [[COPY35]](s32), [[PTR_ADD30]](p1) :: (store 1 into unknown-address + 31, addrspace 1) + ; VI: G_STORE [[COPY35]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; VI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64) ; VI: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32) ; VI: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C1]](s32) ; VI: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C2]](s32) ; VI: [[COPY36:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) - ; VI: G_STORE [[COPY36]](s32), [[PTR_ADD31]](p1) :: (store 1 into unknown-address + 32, addrspace 1) + ; VI: G_STORE [[COPY36]](s32), [[PTR_ADD31]](p1) :: (store (s8) into unknown-address + 32, addrspace 1) ; VI: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C3]](s64) ; VI: [[COPY37:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) - ; VI: G_STORE [[COPY37]](s32), [[PTR_ADD32]](p1) :: (store 1 into unknown-address + 33, addrspace 1) + ; VI: G_STORE [[COPY37]](s32), [[PTR_ADD32]](p1) :: (store (s8) into unknown-address + 33, addrspace 1) ; VI: [[PTR_ADD33:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C4]](s64) ; VI: [[COPY38:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) - ; VI: G_STORE [[COPY38]](s32), [[PTR_ADD33]](p1) :: (store 1 into unknown-address + 34, addrspace 1) + ; VI: G_STORE [[COPY38]](s32), [[PTR_ADD33]](p1) :: (store (s8) into unknown-address + 34, addrspace 1) ; VI: [[PTR_ADD34:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C5]](s64) ; VI: [[COPY39:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) - ; VI: G_STORE [[COPY39]](s32), [[PTR_ADD34]](p1) :: (store 1 into unknown-address + 35, addrspace 1) + ; VI: G_STORE [[COPY39]](s32), [[PTR_ADD34]](p1) :: (store (s8) into unknown-address + 35, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8165,19 +8209,19 @@ body: | ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 1, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 1, addrspace 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 1, addrspace 1) + ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store 36, align 1, addrspace 1) + G_STORE %4, %0 :: (store (<9 x s32>), align 1, addrspace 1) ... --- @@ -8200,75 +8244,75 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; SI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; SI: G_STORE [[COPY10]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; SI: G_STORE [[COPY11]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV6]](<2 x s32>) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[COPY12]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; SI: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32) ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD9]](p1) :: (store 2 into unknown-address + 20, addrspace 1) + ; SI: G_STORE [[COPY14]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD10]](p1) :: (store 2 into unknown-address + 22, addrspace 1) + ; SI: G_STORE [[COPY15]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV7]](<2 x s32>) ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32) ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) - ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD11]](p1) :: (store 2 into unknown-address + 24, addrspace 1) + ; SI: G_STORE [[COPY16]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD12]](p1) :: (store 2 into unknown-address + 26, addrspace 1) + ; SI: G_STORE [[COPY17]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32) ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV11]](s32) - ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD13]](p1) :: (store 2 into unknown-address + 28, addrspace 1) + ; SI: G_STORE [[COPY18]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; SI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD14]](p1) :: (store 2 into unknown-address + 30, addrspace 1) + ; SI: G_STORE [[COPY19]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32) ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) - ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD15]](p1) :: (store 2 into unknown-address + 32, addrspace 1) + ; SI: G_STORE [[COPY20]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; SI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD16]](p1) :: (store 2 into unknown-address + 34, addrspace 1) + ; SI: G_STORE [[COPY21]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8278,13 +8322,13 @@ body: | ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 2, addrspace 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 2, addrspace 1) + ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8299,75 +8343,75 @@ body: | ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 2 into unknown-address + 4, addrspace 1) + ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store 2 into unknown-address + 6, addrspace 1) + ; VI: G_STORE [[COPY7]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<2 x s32>) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) - ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD3]](p1) :: (store 2 into unknown-address + 8, addrspace 1) + ; VI: G_STORE [[COPY8]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store 2 into unknown-address + 10, addrspace 1) + ; VI: G_STORE [[COPY9]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) ; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) - ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD5]](p1) :: (store 2 into unknown-address + 12, addrspace 1) + ; VI: G_STORE [[COPY10]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) - ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD6]](p1) :: (store 2 into unknown-address + 14, addrspace 1) + ; VI: G_STORE [[COPY11]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; VI: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; VI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV6]](<2 x s32>) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) ; VI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) - ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD7]](p1) :: (store 2 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[COPY12]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) - ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store 2 into unknown-address + 18, addrspace 1) + ; VI: G_STORE [[COPY13]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV9]], [[C]](s32) ; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) - ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD9]](p1) :: (store 2 into unknown-address + 20, addrspace 1) + ; VI: G_STORE [[COPY14]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD10]](p1) :: (store 2 into unknown-address + 22, addrspace 1) + ; VI: G_STORE [[COPY15]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) ; VI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV7]](<2 x s32>) ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C]](s32) ; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV10]](s32) - ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD11]](p1) :: (store 2 into unknown-address + 24, addrspace 1) + ; VI: G_STORE [[COPY16]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) - ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD12]](p1) :: (store 2 into unknown-address + 26, addrspace 1) + ; VI: G_STORE [[COPY17]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV11]], [[C]](s32) ; VI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[UV11]](s32) - ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD13]](p1) :: (store 2 into unknown-address + 28, addrspace 1) + ; VI: G_STORE [[COPY18]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) ; VI: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) - ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD14]](p1) :: (store 2 into unknown-address + 30, addrspace 1) + ; VI: G_STORE [[COPY19]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; VI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) ; VI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT2]], [[C]](s32) ; VI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) - ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD15]](p1) :: (store 2 into unknown-address + 32, addrspace 1) + ; VI: G_STORE [[COPY20]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) ; VI: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) - ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD16]](p1) :: (store 2 into unknown-address + 34, addrspace 1) + ; VI: G_STORE [[COPY21]](s32), [[PTR_ADD16]](p1) :: (store (s16) into unknown-address + 34, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8377,19 +8421,19 @@ body: | ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 2, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 2, addrspace 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 2, addrspace 1) + ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store 36, align 2, addrspace 1) + G_STORE %4, %0 :: (store (<9 x s32>), align 2, addrspace 1) ... --- @@ -8407,13 +8451,13 @@ body: | ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; SI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, addrspace 1) + ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8423,13 +8467,13 @@ body: | ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, addrspace 1) + ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8439,13 +8483,13 @@ body: | ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, addrspace 1) + ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8455,19 +8499,19 @@ body: | ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 4, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 4, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 4, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 4, addrspace 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, addrspace 1) + ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store 36, align 4, addrspace 1) + G_STORE %4, %0 :: (store (<9 x s32>), align 4, addrspace 1) ... --- @@ -8485,13 +8529,13 @@ body: | ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; SI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8501,13 +8545,13 @@ body: | ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 8, addrspace 1) + ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8517,13 +8561,13 @@ body: | ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 8, addrspace 1) + ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8533,19 +8577,19 @@ body: | ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), align 8, addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, align 8, addrspace 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 8, addrspace 1) + ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store 36, align 8, addrspace 1) + G_STORE %4, %0 :: (store (<9 x s32>), align 8, addrspace 1) ... --- @@ -8563,13 +8607,13 @@ body: | ; SI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; SI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 16, addrspace 1) + ; SI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8579,13 +8623,13 @@ body: | ; CI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; CI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; CI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; CI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; CI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 16, addrspace 1) + ; CI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8595,13 +8639,13 @@ body: | ; VI: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; VI: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; VI: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 16, addrspace 1) + ; VI: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -8611,17 +8655,17 @@ body: | ; GFX9: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 into unknown-address + 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (s128) into unknown-address + 16, addrspace 1) ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store 4 into unknown-address + 32, align 16, addrspace 1) + ; GFX9: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 %3:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 %4:_(<9 x s32>) = G_CONCAT_VECTORS %1, %2, %3 - G_STORE %4, %0 :: (store 36, align 16, addrspace 1) + G_STORE %4, %0 :: (store (<9 x s32>), align 16, addrspace 1) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 14a2bed2c0e56..85c71f3cfe888 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -10,14 +10,14 @@ body: | ; SI-LABEL: name: test_store_global_i32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; VI-LABEL: name: test_store_global_i32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, addrspace 1) + G_STORE %1, %0 :: (store (s32), addrspace 1) ... --- @@ -29,14 +29,14 @@ body: | ; SI-LABEL: name: test_store_global_i64 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) ; VI-LABEL: name: test_store_global_i64 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store (s64), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, addrspace 1) + G_STORE %1, %0 :: (store (s64), addrspace 1) ... --- @@ -48,14 +48,14 @@ body: | ; SI-LABEL: name: test_store_global_p1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1) ; VI-LABEL: name: test_store_global_p1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store (p1), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p1) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, addrspace 1) + G_STORE %1, %0 :: (store (p1), addrspace 1) ... --- @@ -67,14 +67,14 @@ body: | ; SI-LABEL: name: test_store_global_p4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1) ; VI-LABEL: name: test_store_global_p4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store (p4), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p4) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, addrspace 1) + G_STORE %1, %0 :: (store (p4), addrspace 1) ... --- @@ -86,14 +86,14 @@ body: | ; SI-LABEL: name: test_store_global_p3 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; SI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) ; VI-LABEL: name: test_store_global_p3 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; VI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store (p3), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p3) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, addrspace 1) + G_STORE %1, %0 :: (store (p3), addrspace 1) ... --- @@ -105,14 +105,14 @@ body: | ; SI-LABEL: name: test_store_global_v2s32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; VI-LABEL: name: test_store_global_v2s32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 8, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s32>), addrspace 1) ... --- @@ -124,14 +124,14 @@ body: | ; SI-LABEL: name: test_store_global_v2s16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1) ; VI-LABEL: name: test_store_global_v2s16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store (<2 x s16>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 - G_STORE %1, %0 :: (store 4, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s16>), addrspace 1) ... --- @@ -145,17 +145,17 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (s64), align 4, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1) + ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store (<3 x s32>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - G_STORE %1, %0 :: (store 12, align 4, addrspace 1) + G_STORE %1, %0 :: (store (<3 x s32>), align 4, addrspace 1) ... --- @@ -168,15 +168,15 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_truncstore_global_s64_to_s8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 1, addrspace 1) + G_STORE %1, %0 :: (store (s8), addrspace 1) ... --- @@ -189,15 +189,15 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_truncstore_global_s64_to_s16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 1, addrspace 1) + G_STORE %1, %0 :: (store (s8), addrspace 1) ... --- @@ -210,15 +210,15 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; VI-LABEL: name: test_truncstore_global_s64_to_s32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - G_STORE %1, %0 :: (store 4, addrspace 1) + G_STORE %1, %0 :: (store (s32), addrspace 1) ... --- @@ -231,15 +231,15 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) - ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-LABEL: name: test_truncstore_global_s128_to_s16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) - ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 2, addrspace 1) + G_STORE %1, %0 :: (store (s16), addrspace 1) ... --- @@ -251,16 +251,16 @@ body: | ; SI-LABEL: name: test_truncstore_global_s128_to_s8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) - ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI-LABEL: name: test_truncstore_global_s128_to_s8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) - ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 1, addrspace 1) + G_STORE %1, %0 :: (store (s128), addrspace 1) ... --- @@ -275,18 +275,18 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) ; VI-LABEL: name: test_store_global_i1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s1) = G_TRUNC %1 - G_STORE %2, %0 :: (store 1, addrspace 1) + G_STORE %2, %0 :: (store (s1), addrspace 1) ... --- @@ -299,16 +299,16 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_store_global_i8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s8) = G_TRUNC %1 - G_STORE %2, %0 :: (store 1, addrspace 1) + G_STORE %2, %0 :: (store (s8), addrspace 1) ... --- @@ -321,16 +321,16 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-LABEL: name: test_store_global_i16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 - G_STORE %2, %0 :: (store 2, addrspace 1) + G_STORE %2, %0 :: (store (s16), addrspace 1) ... --- @@ -345,19 +345,19 @@ body: | ; SI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96) ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY1]](p1) :: (store 8, align 16, addrspace 1) + ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY1]](p1) :: (store (s64), align 16, addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store 4 into unknown-address + 8, align 8, addrspace 1) + ; SI: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_96 ; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 ; VI: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96) - ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY1]](p1) :: (store 12, align 16, addrspace 1) + ; VI: G_STORE [[BITCAST]](<3 x s32>), [[COPY1]](p1) :: (store (s96), align 16, addrspace 1) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(p1) = COPY $vgpr3_vgpr4 - G_STORE %0, %1 :: (store 12, addrspace 1, align 16) + G_STORE %0, %1 :: (store (s96), addrspace 1, align 16) ... --- @@ -370,15 +370,15 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) ; VI-LABEL: name: test_store_global_i128 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](s128) - ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (s128), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, addrspace 1) + G_STORE %1, %0 :: (store (s128), addrspace 1) ... --- @@ -390,14 +390,14 @@ body: | ; SI-LABEL: name: test_store_global_v2s64 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; SI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) ; VI-LABEL: name: test_store_global_v2s64 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store 16, addrspace 1) + ; VI: G_STORE [[COPY1]](<2 x s64>), [[COPY]](p1) :: (store (<2 x s64>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - G_STORE %1, %0 :: (store 16, addrspace 1) + G_STORE %1, %0 :: (store (<2 x s64>), addrspace 1) ... @@ -412,24 +412,24 @@ body: | ; SI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI-LABEL: name: test_store_global_v2s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store 2, addrspace 1, align 1) + G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 1) ... @@ -454,7 +454,7 @@ body: | ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (<2 x s8>), addrspace 1) ; VI-LABEL: name: test_store_global_v2s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF @@ -468,10 +468,10 @@ body: | ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (<2 x s8>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store 2, addrspace 1, align 2) + G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 2) ... @@ -496,7 +496,7 @@ body: | ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (<2 x s8>), align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF @@ -510,10 +510,10 @@ body: | ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (<2 x s8>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store 2, addrspace 1, align 4) + G_STORE %1, %0 :: (store (<2 x s8>), addrspace 1, align 4) ... @@ -544,15 +544,15 @@ body: | ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY4]](s16) ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C1]](s32) ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY4]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -570,19 +570,19 @@ body: | ; VI: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16) ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[COPY2]], [[C1]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, addrspace 1, align 1) + G_STORE %2, %0 :: (store (<3 x s8>), addrspace 1, align 1) ... @@ -610,11 +610,11 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]] ; SI: [[COPY3:%[0-9]+]]:_(s16) = COPY [[OR]](s16) ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY3]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -631,15 +631,15 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, addrspace 1, align 2) + G_STORE %2, %0 :: (store (<3 x s8>), addrspace 1, align 2) ... @@ -681,8 +681,8 @@ body: | ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C3]](s32) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) - ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -711,12 +711,12 @@ body: | ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1) - ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), align 4, addrspace 1) + ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, addrspace 1, align 4) + G_STORE %2, %0 :: (store (<3 x s8>), addrspace 1, align 4) ... @@ -731,41 +731,41 @@ body: | ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-LABEL: name: test_store_global_v4s8_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY4]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) - ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store 1 into unknown-address + 3, addrspace 1) + ; VI: G_STORE [[COPY5]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 4, addrspace 1, align 1) + G_STORE %2, %0 :: (store (<4 x s8>), addrspace 1, align 1) ... @@ -798,11 +798,11 @@ body: | ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]] ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; SI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-LABEL: name: test_store_global_v4s8_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -822,15 +822,15 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store 2 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 4, addrspace 1, align 2) + G_STORE %2, %0 :: (store (<4 x s8>), addrspace 1, align 2) ... @@ -862,7 +862,7 @@ body: | ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; SI: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; SI: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (<4 x s8>), addrspace 1) ; VI-LABEL: name: test_store_global_v4s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 @@ -885,11 +885,11 @@ body: | ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; VI: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + ; VI: G_STORE [[OR2]](s32), [[COPY]](p1) :: (store (<4 x s8>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 4, addrspace 1, align 4) + G_STORE %2, %0 :: (store (<4 x s8>), addrspace 1, align 4) ... @@ -914,7 +914,7 @@ body: | ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1) ; VI-LABEL: name: test_truncstore_global_v2s8_to_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF @@ -928,10 +928,10 @@ body: | ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (<2 x s4>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_IMPLICIT_DEF - G_STORE %1, %0 :: (store 1, addrspace 1, align 1) + G_STORE %1, %0 :: (store (<2 x s4>), addrspace 1, align 1) ... @@ -970,7 +970,7 @@ body: | ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -996,11 +996,11 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 1, addrspace 1, align 1) + G_STORE %2, %0 :: (store (<3 x s2>), addrspace 1, align 1) ... @@ -1039,7 +1039,7 @@ body: | ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -1065,22 +1065,21 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 2, addrspace 1, align 2) + G_STORE %2, %0 :: (store (<3 x s4>), addrspace 1, align 2) ... - --- -name: test_truncstore_global_v4s8_to_3_align1 +name: test_truncstore_global_v4s8_to_v4s5_align1 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-LABEL: name: test_truncstore_global_v4s8_to_3_align1 + ; SI-LABEL: name: test_truncstore_global_v4s8_to_v4s5_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) @@ -1101,16 +1100,16 @@ body: | ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY4]](s16) ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C1]](s32) ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY4]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; SI: G_STORE [[COPY5]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) - ; VI-LABEL: name: test_truncstore_global_v4s8_to_3_align1 + ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_v4s8_to_v4s5_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) @@ -1127,29 +1126,29 @@ body: | ; VI: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16) ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[COPY2]], [[C1]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; VI: G_STORE [[ANYEXT1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store 1 into unknown-address + 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, addrspace 1, align 1) + G_STORE %2, %0 :: (store (<4 x s5>), addrspace 1, align 1) ... --- -name: test_truncstore_global_v4s8_to_3_align2 +name: test_truncstore_global_v4s8_to_v4s5_align2 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-LABEL: name: test_truncstore_global_v4s8_to_3_align2 + ; SI-LABEL: name: test_truncstore_global_v4s8_to_v4s5_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) @@ -1167,12 +1166,12 @@ body: | ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]] ; SI: [[COPY3:%[0-9]+]]:_(s16) = COPY [[OR]](s16) ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY3]](s16) - ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; SI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) - ; VI-LABEL: name: test_truncstore_global_v4s8_to_3_align2 + ; SI: G_STORE [[COPY4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_v4s8_to_v4s5_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) @@ -1188,39 +1187,39 @@ body: | ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] ; VI: [[COPY2:%[0-9]+]]:_(s16) = COPY [[OR]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY2]](s16) - ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store 2, addrspace 1) + ; VI: G_STORE [[ANYEXT]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) - ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store 1 into unknown-address + 2, align 2, addrspace 1) + ; VI: G_STORE [[COPY3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 2, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, addrspace 1, align 2) + G_STORE %2, %0 :: (store (<4 x s5>), addrspace 1, align 2) ... --- -name: test_truncstore_global_v4s8_to_3_align4 +name: test_truncstore_global_v4s8_to_v4s5_align4 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; SI-LABEL: name: test_truncstore_global_v4s8_to_3_align4 + ; SI-LABEL: name: test_truncstore_global_v4s8_to_v4s5_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[COPY1]](<4 x s32>) ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<4 x s8>) - ; SI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store 3, align 4, addrspace 1) - ; VI-LABEL: name: test_truncstore_global_v4s8_to_3_align4 + ; SI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (<4 x s5>), align 4, addrspace 1) + ; VI-LABEL: name: test_truncstore_global_v4s8_to_v4s5_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[COPY1]](<4 x s32>) ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<4 x s8>) - ; VI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store 3, align 4, addrspace 1) + ; VI: G_STORE [[BITCAST]](s32), [[COPY]](p1) :: (store (<4 x s5>), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 %2:_(<4 x s8>) = G_TRUNC %1 - G_STORE %2, %0 :: (store 3, addrspace 1, align 4) + G_STORE %2, %0 :: (store (<4 x s5>), addrspace 1, align 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir index 00a63c72afa0b..7b454aea94a44 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir @@ -13,11 +13,11 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, addrspace 6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, align 4, addrspace 6) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 4, addrspace 6) $vgpr0_vgpr1 = COPY %1 ... @@ -31,11 +31,11 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, align 2, addrspace 6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6) ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, align 2, addrspace 6) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 2, addrspace 6) $vgpr0_vgpr1 = COPY %1 ... @@ -49,11 +49,11 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, align 1, addrspace 6) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6) ; CI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; CI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p6) = COPY $sgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, align 1, addrspace 6) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 1, addrspace 6) $vgpr0_vgpr1 = COPY %1 ... @@ -67,10 +67,10 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load 1, addrspace 6) + ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) ; CI: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, align 1, addrspace 6) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 6) $vgpr0 = COPY %1 ... @@ -84,10 +84,10 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load 2, addrspace 6) + ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6) ; CI: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, align 2, addrspace 6) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 6) $vgpr0 = COPY %1 ... @@ -101,9 +101,9 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0 ; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6) - ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load 2, align 1, addrspace 6) + ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6) ; CI: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, align 1, addrspace 6) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 6) $vgpr0 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir index 589f9ea7eab98..ca61bda6fcea4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-flat.mir @@ -9,14 +9,14 @@ body: | ; SI-LABEL: name: test_zextload_flat_i32_i8 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; SI: $vgpr0 = COPY [[ZEXTLOAD]](s32) ; VI-LABEL: name: test_zextload_flat_i32_i8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; VI: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 0) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0) $vgpr0 = COPY %1 ... --- @@ -27,14 +27,14 @@ body: | ; SI-LABEL: name: test_zextload_flat_i32_i16 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 2) + ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) ; SI: $vgpr0 = COPY [[ZEXTLOAD]](s32) ; VI-LABEL: name: test_zextload_flat_i32_i16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 2) + ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) ; VI: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, addrspace 0) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 0) $vgpr0 = COPY %1 ... --- @@ -45,16 +45,16 @@ body: | ; SI-LABEL: name: test_zextload_flat_i31_i8 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) ; SI: $vgpr0 = COPY [[COPY1]](s32) ; VI-LABEL: name: test_zextload_flat_i31_i8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) ; VI: $vgpr0 = COPY [[COPY1]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_ZEXTLOAD %0 :: (load 1, addrspace 0) + %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -66,16 +66,16 @@ body: | ; SI-LABEL: name: test_zextload_flat_i64_i8 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_zextload_flat_i64_i8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1) + ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 1, addrspace 0) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 0) $vgpr0_vgpr1 = COPY %1 ... --- @@ -86,16 +86,16 @@ body: | ; SI-LABEL: name: test_zextload_flat_i64_i16 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 2) + ; SI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_zextload_flat_i64_i16 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 2) + ; VI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 2, addrspace 0) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 0) $vgpr0_vgpr1 = COPY %1 ... --- @@ -106,15 +106,15 @@ body: | ; SI-LABEL: name: test_zextload_flat_i64_i32 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; SI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; VI-LABEL: name: test_zextload_flat_i64_i32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; VI: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, addrspace 0) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 0) $vgpr0_vgpr1 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir index 6ea4383317836..c877762540da5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir @@ -6,12 +6,12 @@ # FIXME: Run with and without unaligned access turned on # ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_zextload_global_v2i16_from_2) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load 2, addrspace 1) (in function: test_zextload_global_v2i32_from_2) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_zextload_global_v2i32_from_4) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load 4, addrspace 1) (in function: test_zextload_global_v2i64_from_4) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load 8, addrspace 1) (in function: test_zextload_global_v2i64_from_8) -# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(s128) = G_ZEXTLOAD %0:_(p1) :: (load 8, addrspace 1) (in function: test_zextload_global_s128_8) +# ERR: remark: :0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i16_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i32_from_2) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s32), addrspace 1) (in function: test_zextload_global_v2i32_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load (s32), addrspace 1) (in function: test_zextload_global_v2i64_from_4) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(<2 x s64>) = G_ZEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_zextload_global_v2i64_from_8) +# ERR-NEXT: remark: :0:0: unable to legalize instruction: %1:_(s128) = G_ZEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_zextload_global_s128_8) # ERR-NOT: remark --- @@ -22,14 +22,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i32_i8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) $vgpr0 = COPY %1 ... --- @@ -40,14 +40,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i32_i16 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i16 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1) $vgpr0 = COPY %1 ... --- @@ -58,16 +58,16 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i31_i8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) ; GFX8: $vgpr0 = COPY [[COPY1]](s32) ; GFX6-LABEL: name: test_zextload_global_i31_i8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) ; GFX6: $vgpr0 = COPY [[COPY1]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s31) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -79,16 +79,16 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i64_i8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX6-LABEL: name: test_zextload_global_i64_i8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 1, addrspace 1) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- @@ -99,16 +99,16 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i64_i16 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX6-LABEL: name: test_zextload_global_i64_i16 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... --- @@ -119,16 +119,16 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i64_i32 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX6-LABEL: name: test_zextload_global_i64_i32 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -140,14 +140,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_s32_from_2_align1 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) ; GFX6-LABEL: name: test_zextload_global_s32_from_2_align1 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, align 1, addrspace 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 1) $vgpr0 = COPY %1 ... @@ -159,16 +159,16 @@ body: | ; GFX8-LABEL: name: test_zextload_global_s64_from_2_align1 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) ; GFX6-LABEL: name: test_zextload_global_s64_from_2_align1 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1) ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 2, align 1, addrspace 1) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -180,14 +180,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_v2i16_from_2 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>) ; GFX6-LABEL: name: test_zextload_global_v2i16_from_2 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s16>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](<2 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) + %1:_(<2 x s16>) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1) $vgpr0 = COPY %1 ... @@ -199,14 +199,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_v2i32_from_2 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) ; GFX6-LABEL: name: test_zextload_global_v2i32_from_2 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load 2, addrspace 1) + %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -218,14 +218,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_v2i32_from_4 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX8: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) ; GFX6-LABEL: name: test_zextload_global_v2i32_from_4 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s32>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX6: $vgpr0_vgpr1 = COPY [[ZEXTLOAD]](<2 x s32>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) + %1:_(<2 x s32>) = G_ZEXTLOAD %0 :: (load (s32), addrspace 1) $vgpr0_vgpr1 = COPY %1 ... @@ -237,14 +237,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_v2i64_from_4 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) ; GFX6-LABEL: name: test_zextload_global_v2i64_from_4 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s32), addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load 4, addrspace 1) + %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load (s32), addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -256,14 +256,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_v2i64_from_8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) ; GFX6-LABEL: name: test_zextload_global_v2i64_from_8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(<2 x s64>) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](<2 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load 8, addrspace 1) + %1:_(<2 x s64>) = G_ZEXTLOAD %0 :: (load (s64), addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... @@ -275,13 +275,13 @@ body: | ; GFX8-LABEL: name: test_zextload_global_s128_8 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](s128) ; GFX6-LABEL: name: test_zextload_global_s128_8 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s128) = G_ZEXTLOAD [[COPY]](p1) :: (load (s64), addrspace 1) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ZEXTLOAD]](s128) %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s128) = G_ZEXTLOAD %0 :: (load 8, addrspace 1) + %1:_(s128) = G_ZEXTLOAD %0 :: (load (s64), addrspace 1) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir index 648b9697bdcbd..8b7cddb5c14e6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-local.mir @@ -9,10 +9,10 @@ body: | ; CHECK-LABEL: name: test_zextload_local_i32_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 3) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3) $vgpr0 = COPY %1 ... --- @@ -23,10 +23,10 @@ body: | ; CHECK-LABEL: name: test_zextload_local_i32_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, addrspace 3) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3) $vgpr0 = COPY %1 ... --- @@ -37,11 +37,11 @@ body: | ; CHECK-LABEL: name: test_zextload_local_i31_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(p3) = COPY $vgpr0 - %1:_(s31) = G_ZEXTLOAD %0 :: (load 1, addrspace 3) + %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -53,11 +53,11 @@ body: | ; CHECK-LABEL: name: test_zextload_local_i64_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 1, addrspace 3) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 1, addrspace 3) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3) $vgpr0_vgpr1 = COPY %1 ... --- @@ -68,11 +68,11 @@ body: | ; CHECK-LABEL: name: test_zextload_local_i64_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, addrspace 3) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load (s16), addrspace 3) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 2, addrspace 3) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3) $vgpr0_vgpr1 = COPY %1 ... --- @@ -83,10 +83,10 @@ body: | ; CHECK-LABEL: name: test_zextload_local_i64_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32), addrspace 3) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p3) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, addrspace 3) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 3) $vgpr0_vgpr1 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir index 67619983436cd..757255b160fae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-private.mir @@ -10,10 +10,10 @@ body: | ; CHECK-LABEL: name: test_zextload_private_i32_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 5) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5) $vgpr0 = COPY %1 ... --- @@ -24,10 +24,10 @@ body: | ; CHECK-LABEL: name: test_zextload_private_i32_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CHECK: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, addrspace 5) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 5) $vgpr0 = COPY %1 ... --- @@ -38,11 +38,11 @@ body: | ; CHECK-LABEL: name: test_zextload_private_i31_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(p5) = COPY $vgpr0 - %1:_(s31) = G_ZEXTLOAD %0 :: (load 1, addrspace 5) + %1:_(s31) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5) %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... @@ -54,11 +54,11 @@ body: | ; CHECK-LABEL: name: test_zextload_private_i64_i8 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load 1, addrspace 5) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 1, addrspace 5) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s8), addrspace 5) $vgpr0_vgpr1 = COPY %1 ... --- @@ -69,11 +69,11 @@ body: | ; CHECK-LABEL: name: test_zextload_private_i64_i16 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load 2, addrspace 5) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 2, addrspace 5) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), addrspace 5) $vgpr0_vgpr1 = COPY %1 ... --- @@ -84,10 +84,10 @@ body: | ; CHECK-LABEL: name: test_zextload_private_i64_i32 ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), addrspace 5) ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(p5) = COPY $vgpr0 - %1:_(s64) = G_ZEXTLOAD %0 :: (load 4, addrspace 5) + %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), addrspace 5) $vgpr0_vgpr1 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll index f5092c1fc400f..87ef4479d4bf5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -33,7 +33,7 @@ define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inr ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss @@ -43,7 +43,7 @@ define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inr ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) @@ -75,7 +75,7 @@ define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, fl ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset @@ -85,7 +85,7 @@ define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, fl ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR: SI_RETURN_TO_EPILOG implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 @@ -116,7 +116,7 @@ define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn ; GFX9-MIR: bb.1 (%ir-block.0): @@ -125,7 +125,7 @@ define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: S_ENDPGM 0 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -154,7 +154,7 @@ define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %pt ; GFX8-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): @@ -163,7 +163,7 @@ define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %pt ; GFX9-MIR: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX9-MIR: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX9-MIR: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR: S_ENDPGM 0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) @@ -192,7 +192,7 @@ define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 @@ -202,7 +202,7 @@ define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 @@ -232,7 +232,7 @@ define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 @@ -242,7 +242,7 @@ define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 @@ -273,7 +273,7 @@ define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn @@ -282,7 +282,7 @@ define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) @@ -311,7 +311,7 @@ define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn @@ -320,7 +320,7 @@ define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store 4 on %ir.gep, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 @@ -350,7 +350,7 @@ define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 - ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store 4 on %ir.ptr, addrspace 3) + ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 @@ -360,7 +360,7 @@ define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store 4 on %ir.ptr, addrspace 3) + ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll index 0509ea3fe4dea..8d4b1cca9335b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll @@ -4,5 +4,5 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll -; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store 4 into custom "GWSResource") (in function: gws_sema_release_all_offset0) +; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store (s32) into custom "GWSResource") (in function: gws_sema_release_all_offset0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir index 4351cdb7785c1..d78e1c299e2d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -18,7 +18,7 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0 ; GFX6: $vgpr0 = COPY [[COPY3]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -27,7 +27,7 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0 ; GFX8: $vgpr0 = COPY [[COPY3]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -36,14 +36,14 @@ body: | ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 ; GFX10: $vgpr0 = COPY [[COPY3]] ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store 4 on custom "ImageResource") + %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource") $vgpr0 = COPY %3(s32) SI_RETURN_TO_EPILOG implicit $vgpr0 ... @@ -63,26 +63,26 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX6: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX8: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "ImageResource") + ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource") ; GFX10: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %2:vgpr(s32) = COPY $vgpr2 - %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store 4 on custom "ImageResource") + %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource") S_ENDPGM 0 ... @@ -101,7 +101,7 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1 ; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 @@ -110,7 +110,7 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX8: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1 ; GFX8: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 @@ -119,14 +119,14 @@ body: | ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX10: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 ; GFX10: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %2:vgpr(s32) = COPY $vgpr4 - %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store 8 on custom "ImageResource") + %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource") $vgpr0_vgpr1 = COPY %3(s64) SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ... @@ -146,25 +146,25 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX6: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX8: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "ImageResource") + ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource") ; GFX10: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %2:vgpr(s32) = COPY $vgpr4 - %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store 8 on custom "ImageResource") + %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource") S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll index 21cb91c14723d..6bdeb567e6729 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc__vg ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -56,7 +56,7 @@ define amdgpu_ps <2 x float> @raw_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc__vg ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY8]] @@ -81,7 +81,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgp ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -121,7 +121,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -170,7 +170,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -195,7 +195,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -217,7 +217,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll index 39c2a0ce688db..6ded27f2a4c13 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK: $vgpr0 = COPY [[COPY8]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -40,7 +40,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -84,7 +84,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -137,7 +137,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -165,7 +165,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 ; CHECK: $vgpr0 = COPY [[COPY8]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll index c29f5cf8f977c..c345d74714e40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -28,7 +28,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -46,7 +46,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -59,7 +59,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -77,7 +77,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -89,7 +89,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -107,7 +107,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -119,7 +119,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -159,7 +159,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -200,7 +200,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp ; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -284,7 +284,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v ; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -309,7 +309,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add4095 ; GFX90A: bb.1 (%ir-block.0): @@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -342,7 +342,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): @@ -355,7 +355,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -373,7 +373,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -386,7 +386,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -403,7 +403,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0 ; GFX908: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -415,7 +415,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0 ; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll index ecd6d20859823..debc0aeacca08 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll @@ -14,7 +14,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -27,7 +27,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -45,7 +45,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -58,7 +58,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 @@ -93,7 +93,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 ; PACKED: $vgpr0 = COPY [[COPY6]] @@ -109,7 +109,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 @@ -169,7 +169,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -209,7 +209,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -234,7 +234,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 ; PACKED: $vgpr0 = COPY [[COPY6]] @@ -250,7 +250,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll index b8eb3ac678a35..679eb445808d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll @@ -13,7 +13,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__sgpr_rsrc__vgpr_voffset__sgp ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -31,7 +31,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32__sgpr_rsrc__vgpr_voff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY6]] @@ -52,7 +52,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_format_v3f32__sgpr_rsrc__vgpr_voff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 @@ -75,7 +75,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -121,7 +121,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -146,7 +146,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll index ce835676f04d3..bfe163d37d5a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll @@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffs ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -69,7 +69,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -114,7 +114,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -140,7 +140,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) @@ -159,7 +159,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) @@ -178,7 +178,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) @@ -197,7 +197,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) @@ -216,7 +216,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) @@ -235,7 +235,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) @@ -254,7 +254,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sg ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY6]] @@ -275,7 +275,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sg ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2 @@ -298,7 +298,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sg ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -323,7 +323,7 @@ define amdgpu_ps half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -341,7 +341,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgp ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -365,7 +365,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgp ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY6]] @@ -386,7 +386,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) + ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from custom "BufferResource", addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -406,7 +406,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) + ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from custom "BufferResource", addrspace 4) ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], 0, 8, implicit $exec ; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -444,7 +444,7 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -486,7 +486,7 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) + ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from custom "BufferResource", addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -512,7 +512,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffs ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) @@ -529,7 +529,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) @@ -548,7 +548,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) @@ -566,7 +566,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 16, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 16 @@ -585,7 +585,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -607,7 +607,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4096 @@ -626,7 +626,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) @@ -644,7 +644,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) @@ -664,7 +664,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 16 @@ -685,7 +685,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 @@ -706,7 +706,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 @@ -744,7 +744,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -790,7 +790,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %13, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 5000, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %13, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 5000, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll index f40e52443ad06..58d6d9754db72 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 ; PACKED: bb.1 (%ir-block.0): @@ -27,7 +27,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -44,7 +44,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16 ; PACKED: bb.1 (%ir-block.0): @@ -56,7 +56,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -78,7 +78,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 ; PACKED: bb.1 (%ir-block.0): @@ -91,7 +91,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -116,7 +116,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 ; PACKED: bb.1 (%ir-block.0): @@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -211,7 +211,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -240,7 +240,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 ; PACKED: bb.1 (%ir-block.0): @@ -253,7 +253,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -275,7 +275,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 ; PACKED: bb.1 (%ir-block.0): @@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -310,7 +310,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 16, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16 ; PACKED: bb.1 (%ir-block.0): @@ -323,7 +323,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 16, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -346,7 +346,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095 ; PACKED: bb.1 (%ir-block.0): @@ -359,7 +359,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -385,7 +385,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4096, align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096 ; PACKED: bb.1 (%ir-block.0): @@ -401,7 +401,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4096, align 1, addrspace 4) ; PACKED: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -448,7 +448,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into custom "BufferResource" + 4096, align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -489,7 +489,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into custom "BufferResource" + 4096, align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll index 37c8c8055b89b..15f3ff7f4de2c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -51,7 +51,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -72,7 +72,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -94,7 +94,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -132,7 +132,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -159,7 +159,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -179,7 +179,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -199,7 +199,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 16, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -220,7 +220,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -244,7 +244,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec - ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll index 3e0ee00bfe320..7530c416c2aaf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -36,7 +36,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -71,7 +71,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -103,7 +103,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -148,7 +148,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1) ret void @@ -191,7 +191,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -209,7 +209,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3) ret void @@ -227,7 +227,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4) ret void @@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6) ret void @@ -263,7 +263,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5) ret void @@ -281,7 +281,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7) ret void @@ -301,7 +301,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -344,7 +344,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -362,7 +362,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -381,7 +381,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -418,7 +418,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -438,7 +438,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -474,7 +474,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -498,7 +498,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -517,7 +517,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0) ret void @@ -535,7 +535,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -554,7 +554,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -576,7 +576,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -595,7 +595,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) ret void @@ -613,7 +613,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) ret void @@ -631,7 +631,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 16 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -650,7 +650,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -672,7 +672,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4096 call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -711,7 +711,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 5000, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -755,7 +755,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 5000, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll index 056e373f9a995..70a81e8212e23 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll @@ -13,7 +13,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset @@ -26,7 +26,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -44,7 +44,7 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 @@ -68,7 +68,7 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -92,7 +92,7 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0 ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2 @@ -126,7 +126,7 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0 ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1 ; PACKED: $vgpr0 = COPY [[COPY6]] @@ -167,7 +167,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -207,7 +207,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -232,7 +232,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc @@ -245,7 +245,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) @@ -263,7 +263,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc @@ -276,7 +276,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) @@ -294,7 +294,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc @@ -307,7 +307,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) @@ -325,7 +325,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc @@ -338,7 +338,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll index d09baeb278ab7..6fc1d458c8972 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll @@ -12,7 +12,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -30,7 +30,7 @@ define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__s ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY6]] @@ -51,7 +51,7 @@ define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__s ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 @@ -74,7 +74,7 @@ define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__s ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 @@ -119,7 +119,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -144,7 +144,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) @@ -162,7 +162,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) @@ -180,7 +180,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) @@ -198,7 +198,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll index 206968e017e80..2662a74f9845b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): @@ -27,7 +27,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -49,7 +49,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): @@ -62,7 +62,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -93,7 +93,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): @@ -108,7 +108,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -143,7 +143,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -179,7 +179,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -224,7 +224,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -263,7 +263,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -349,7 +349,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -374,7 +374,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc ; PACKED: bb.1 (%ir-block.0): @@ -387,7 +387,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -405,7 +405,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; PACKED: bb.1 (%ir-block.0): @@ -418,7 +418,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -436,7 +436,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc ; PACKED: bb.1 (%ir-block.0): @@ -449,7 +449,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -467,7 +467,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc ; PACKED: bb.1 (%ir-block.0): @@ -480,7 +480,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll index 0f8f768cab44b..4b5de5ba2e4d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): @@ -27,7 +27,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -62,7 +62,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -98,7 +98,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs ; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -143,7 +143,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -182,7 +182,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -228,7 +228,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -268,7 +268,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs ; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll index 9ead4cf92e3a9..5ef4cd715cc16 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll @@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -35,7 +35,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -57,7 +57,7 @@ define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -80,7 +80,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_so ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) ret void @@ -100,7 +100,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soff ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) ret void @@ -135,7 +135,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -180,7 +180,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -226,7 +226,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -252,7 +252,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) ret void @@ -271,7 +271,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) ret void @@ -290,7 +290,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) ret void @@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) ret void @@ -328,7 +328,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soff ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0) ret void @@ -345,7 +345,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0) ret void @@ -364,7 +364,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0) ret void @@ -382,7 +382,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 16, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset = add i32 %voffset.base, 16 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -401,7 +401,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -423,7 +423,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 4096, align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4096 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -442,7 +442,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0) ret void @@ -460,7 +460,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0) ret void @@ -480,7 +480,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %soffset = add i32 %soffset.base, 16 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -501,7 +501,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4095 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -522,7 +522,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %soffset = add i32 %soffset.base, 4096 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0) @@ -560,7 +560,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -606,7 +606,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4) + ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource" + 5000, align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index 8b2e2c0435049..5dcc3cf994aa0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -16,7 +16,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -30,7 +30,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -44,7 +44,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -63,7 +63,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -77,7 +77,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -91,7 +91,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -110,7 +110,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 8, align 4) + ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] @@ -129,7 +129,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 8, align 4) + ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] @@ -148,7 +148,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 8, align 4) + ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] @@ -172,7 +172,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 12, align 4) + ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 ; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] @@ -199,7 +199,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 12, align 4) + ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 ; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] @@ -226,7 +226,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 12, align 4) + ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 ; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] @@ -258,7 +258,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 32, align 4) + ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 @@ -301,7 +301,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 32, align 4) + ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 @@ -344,7 +344,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 32, align 4) + ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 @@ -392,7 +392,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 64, align 4) + ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 @@ -467,7 +467,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 64, align 4) + ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 @@ -542,7 +542,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load 64, align 4) + ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 @@ -622,7 +622,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -636,7 +636,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -649,7 +649,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -667,7 +667,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -680,7 +680,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -693,7 +693,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -712,7 +712,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -726,7 +726,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -739,7 +739,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -757,7 +757,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -770,7 +770,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -783,7 +783,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -801,7 +801,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -814,7 +814,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -827,7 +827,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -846,7 +846,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -860,7 +860,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -873,7 +873,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -892,7 +892,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -905,7 +905,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -918,7 +918,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -937,7 +937,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -951,7 +951,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -964,7 +964,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -983,7 +983,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -997,7 +997,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1011,7 +1011,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1030,7 +1030,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1043,7 +1043,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1057,7 +1057,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1076,7 +1076,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1089,7 +1089,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1103,7 +1103,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1122,7 +1122,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1135,7 +1135,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1149,7 +1149,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1168,7 +1168,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1181,7 +1181,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1195,7 +1195,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1214,7 +1214,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1227,7 +1227,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1241,7 +1241,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1260,7 +1260,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1273,7 +1273,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1287,7 +1287,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1306,7 +1306,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1319,7 +1319,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1333,7 +1333,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1352,7 +1352,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1365,7 +1365,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1379,7 +1379,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1398,7 +1398,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1411,7 +1411,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1424,7 +1424,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1443,7 +1443,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1456,7 +1456,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load 4) + ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1470,7 +1470,7 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load 4) + ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] @@ -1491,7 +1491,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset @@ -1504,7 +1504,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset @@ -1517,7 +1517,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1535,7 +1535,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; GFX6: $vgpr0 = COPY [[COPY5]] @@ -1551,7 +1551,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; GFX7: $vgpr0 = COPY [[COPY5]] @@ -1567,7 +1567,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 8, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 ; GFX8: $vgpr0 = COPY [[COPY5]] @@ -1588,7 +1588,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX6: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] ; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] @@ -1611,7 +1611,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX7: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] ; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] @@ -1634,7 +1634,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF ; GFX8: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] ; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] @@ -1662,7 +1662,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -1682,7 +1682,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -1702,7 +1702,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 @@ -1727,8 +1727,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -1757,8 +1757,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -1787,8 +1787,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -1822,10 +1822,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -1870,10 +1870,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -1918,10 +1918,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -1971,7 +1971,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 @@ -1984,7 +1984,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 @@ -1997,7 +1997,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 @@ -2016,7 +2016,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 @@ -2029,7 +2029,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 @@ -2042,7 +2042,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 @@ -2061,7 +2061,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 @@ -2074,7 +2074,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 @@ -2087,7 +2087,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 @@ -2107,8 +2107,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2137,8 +2137,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2167,8 +2167,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2204,8 +2204,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2234,8 +2234,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2264,8 +2264,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2300,10 +2300,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2348,10 +2348,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2396,10 +2396,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2450,10 +2450,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2498,10 +2498,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2546,10 +2546,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 @@ -2617,7 +2617,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2652,7 +2652,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2687,7 +2687,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2726,7 +2726,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2759,7 +2759,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2792,7 +2792,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2836,7 +2836,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2873,7 +2873,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2910,7 +2910,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2950,7 +2950,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2983,7 +2983,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3016,7 +3016,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3057,7 +3057,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3092,7 +3092,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3125,7 +3125,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4 from unknown-address + 4096) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3165,8 +3165,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3215,8 +3215,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3265,8 +3265,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3327,8 +3327,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3381,8 +3381,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3435,8 +3435,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3495,8 +3495,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3549,8 +3549,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3603,8 +3603,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3660,8 +3660,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3711,8 +3711,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3762,8 +3762,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3819,8 +3819,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3870,8 +3870,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3921,8 +3921,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -3978,8 +3978,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -4029,8 +4029,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -4080,8 +4080,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -4136,8 +4136,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -4186,8 +4186,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -4236,8 +4236,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -4277,7 +4277,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr @@ -4290,7 +4290,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr @@ -4303,7 +4303,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s @@ -4322,7 +4322,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr @@ -4335,7 +4335,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr @@ -4348,7 +4348,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v @@ -4370,7 +4370,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm @@ -4386,7 +4386,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm @@ -4402,7 +4402,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s @@ -4425,7 +4425,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm @@ -4441,7 +4441,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm @@ -4457,7 +4457,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v @@ -4480,7 +4480,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr @@ -4495,7 +4495,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr @@ -4510,7 +4510,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 @@ -4533,7 +4533,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr @@ -4549,7 +4549,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr @@ -4565,7 +4565,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll index 3b2095fba134f..a39b79799cf9c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -38,7 +38,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc_ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -62,7 +62,7 @@ define amdgpu_ps <2 x float> @struct_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc_ ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0 ; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY9]] @@ -89,7 +89,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__ ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -132,7 +132,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -184,7 +184,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -212,7 +212,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll index f03efd59a3249..ed246a51b7727 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -19,7 +19,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK: $vgpr0 = COPY [[COPY9]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -45,7 +45,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgpr_cm ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -92,7 +92,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -148,7 +148,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc @@ -178,7 +178,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 ; CHECK: $vgpr0 = COPY [[COPY9]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll index a53e42bd33219..c659de89ab96f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -32,7 +32,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -52,7 +52,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_plus4095__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -67,7 +67,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) @@ -86,7 +86,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__4095_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -99,7 +99,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 4095, align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0) ret void @@ -118,7 +118,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -131,7 +131,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -174,7 +174,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX908: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -218,7 +218,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; GFX90A: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -265,7 +265,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -306,7 +306,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__ ; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec ; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -334,7 +334,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): @@ -349,7 +349,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -367,7 +367,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc ; GFX90A: bb.1 (%ir-block.0): @@ -380,7 +380,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__ ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2) ret void @@ -400,7 +400,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -415,7 +415,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -433,7 +433,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX908: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on custom "BufferResource", align 1, addrspace 4) ; GFX908: S_ENDPGM 0 ; GFX90A-LABEL: name: struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset ; GFX90A: bb.1 (%ir-block.0): @@ -446,7 +446,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc ; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on custom "BufferResource", align 1, addrspace 4) ; GFX90A: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll index d36e9951af82d..6c1dac72367ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll @@ -15,7 +15,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -30,7 +30,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -50,7 +50,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 @@ -76,7 +76,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -102,7 +102,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 @@ -138,7 +138,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 ; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 ; PACKED: $vgpr0 = COPY [[COPY7]] @@ -183,7 +183,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -247,7 +247,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -277,7 +277,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffsset_add_4095 @@ -292,7 +292,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -313,7 +313,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -328,7 +328,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll index f8f4745037e60..716962ff76e99 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll @@ -15,7 +15,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -35,7 +35,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32__sgpr_rsrc__vgpr_v ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY7]] @@ -58,7 +58,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_format_v3f32__sgpr_rsrc__vgpr_v ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 @@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__sgpr_rsrc__vgpr_v ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 @@ -132,7 +132,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -166,7 +166,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -187,7 +187,7 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll index 60a6f8595cbe0..7cecb0d8f2ee7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -37,7 +37,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__ ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY7]] @@ -61,7 +61,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__ ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 @@ -87,7 +87,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__ ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 @@ -116,7 +116,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0) @@ -137,7 +137,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 @@ -158,7 +158,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0) @@ -200,7 +200,7 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -227,7 +227,7 @@ define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) + ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from custom "BufferResource", addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -249,7 +249,7 @@ define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4) + ; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8) from custom "BufferResource", addrspace 4) ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 8, implicit $exec ; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -272,7 +272,7 @@ define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgp ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -294,7 +294,7 @@ define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgp ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 16, implicit $exec ; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -318,7 +318,7 @@ define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voff ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -339,7 +339,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__v ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -366,7 +366,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__v ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY7]] @@ -390,7 +390,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll index 701531d328ce4..890f09b0607b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): @@ -31,7 +31,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -55,7 +55,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg ; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1 ; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): @@ -70,7 +70,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -103,7 +103,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg ; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY1]], implicit $exec ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3 ; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): @@ -120,7 +120,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; PACKED: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -162,7 +162,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -206,7 +206,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -233,7 +233,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; UNPACKED: S_ENDPGM 0 ; PACKED-LABEL: name: struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; PACKED: bb.1 (%ir-block.0): @@ -248,7 +248,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; PACKED: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll index 892cc82237c91..5ccc0f7ceb561 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll @@ -15,7 +15,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -37,7 +37,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc__vg ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -60,7 +60,7 @@ define amdgpu_ps void @struct_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc__vg ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -84,7 +84,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc__vg ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -126,7 +126,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -153,7 +153,7 @@ define amdgpu_ps void @struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll index b1de301e9abb7..29ff3bb9f25b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -39,7 +39,7 @@ define amdgpu_ps void @struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -62,7 +62,7 @@ define amdgpu_ps void @struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -86,7 +86,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -132,7 +132,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -159,7 +159,7 @@ define amdgpu_ps void @struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4) + ; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into custom "BufferResource", addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i8 call void @llvm.amdgcn.struct.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -180,7 +180,7 @@ define amdgpu_ps void @struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 %val.trunc = trunc i32 %val to i16 call void @llvm.amdgcn.struct.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -201,7 +201,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_ ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) ret void @@ -221,7 +221,7 @@ define amdgpu_ps void @struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -249,7 +249,7 @@ define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vinde ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll index a9296f0061675..12e0277cbffcf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll @@ -16,7 +16,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset @@ -31,7 +31,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -51,8 +51,8 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__ ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) + ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset ; UNPACKED: bb.1 (%ir-block.0): @@ -66,7 +66,7 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__ ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1 ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 @@ -103,7 +103,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__ ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0 ; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1 ; PACKED: $vgpr0 = COPY [[COPY7]] @@ -121,7 +121,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__ ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0 ; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1 ; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2 @@ -163,7 +163,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0 @@ -179,7 +179,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -220,7 +220,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -266,7 +266,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__ ; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>) from custom "BufferResource", align 1, addrspace 4) ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -314,7 +314,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource" + 4095, align 1, addrspace 4) ; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]] ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 ; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095 @@ -329,7 +329,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof ; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16) from custom "BufferResource" + 4095, align 1, addrspace 4) ; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]] ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll index ca7eb249124df..fe9abb0a6c208 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll @@ -15,7 +15,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -35,7 +35,7 @@ define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32__sgpr_rsrc__vgpr_vindex_ ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1 ; CHECK: $vgpr0 = COPY [[COPY7]] @@ -58,7 +58,7 @@ define amdgpu_ps <3 x float> @struct_tbuffer_load_v3f32__sgpr_rsrc__vgpr_vindex_ ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2 @@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__sgpr_rsrc__vgpr_vindex_ ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0 ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2 @@ -111,7 +111,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0) @@ -152,7 +152,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_ ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc ; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -186,7 +186,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4) + ; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4095, align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]] ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %voffset = add i32 %voffset.base, 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-legalize-atomic.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-legalize-atomic.mir index 995a614ccaa81..ddbd69cc81047 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-legalize-atomic.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-legalize-atomic.mir @@ -1,9 +1,9 @@ # RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s -# CHECK: %1:_(<8 x s32>) = G_LOAD %0(p1) :: (load monotonic 32, addrspace 1) -# CHECK: G_STORE %1(<8 x s32>), %0(p1) :: (store monotonic 32, addrspace 1) -# CHECK: %1:_(s256) = G_LOAD %0(p1) :: (load monotonic 32, addrspace 1) -# CHECK: G_STORE %1(s256), %0(p1) :: (store monotonic 32, addrspace 1) +# CHECK: %1:_(<8 x s32>) = G_LOAD %0(p1) :: (load monotonic (<8 x s32>), addrspace 1) +# CHECK: G_STORE %1(<8 x s32>), %0(p1) :: (store monotonic (<8 x s32>), addrspace 1) +# CHECK: %1:_(s256) = G_LOAD %0(p1) :: (load monotonic (s256), addrspace 1) +# CHECK: G_STORE %1(s256), %0(p1) :: (store monotonic (s256), addrspace 1) --- name: test_atomic_load_global_v8s32 @@ -11,7 +11,7 @@ body: | bb.0: liveins: $vgpr0_vgpr1 %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load monotonic 32, addrspace 1, align 32) + %1:_(<8 x s32>) = G_LOAD %0 :: (load monotonic (<8 x s32>), addrspace 1, align 32) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -22,7 +22,7 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store monotonic 32, addrspace 1, align 32) + G_STORE %1, %0 :: (store monotonic (<8 x s32>), addrspace 1, align 32) ... --- @@ -31,7 +31,7 @@ body: | bb.0: liveins: $vgpr0_vgpr1 %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s256) = G_LOAD %0 :: (load monotonic 32, addrspace 1, align 32) + %1:_(s256) = G_LOAD %0 :: (load monotonic (s256), addrspace 1, align 32) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 ... @@ -42,5 +42,5 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - G_STORE %1, %0 :: (store monotonic 32, addrspace 1, align 32) + G_STORE %1, %0 :: (store monotonic (s256), addrspace 1, align 32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir index b886d8f4023df..e956cae625602 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-divrem.mir @@ -16,17 +16,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_srem_sdiv @@ -42,17 +42,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_udiv_urem @@ -68,17 +68,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32) = G_UDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %rem:_(s32) = G_UREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_urem_udiv @@ -94,17 +94,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %rem:_(s32) = G_UREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %div:_(s32) = G_UDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_sdiv_srem_v2 @@ -120,17 +120,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(<2 x s32>) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) ; CHECK: %rem:_(<2 x s32>) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) ... --- name: test_udiv_urem_v2 @@ -146,17 +146,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(<2 x s32>) = G_UDIV %src1, %src2 - ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) ; CHECK: %rem:_(<2 x s32>) = G_UREM %src1, %src2 - ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) ... --- name: test_sdiv_srem_extra_sdiv @@ -173,22 +173,22 @@ body: | ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr3:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) ; CHECK: %div2:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div2(s32), %ptr3(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div2(s32), %ptr3(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %ptr3:_(p1) = COPY $vgpr6_vgpr7 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) %div2:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div2:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div2:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_sdiv_srem_extra_srem @@ -205,20 +205,20 @@ body: | ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr3:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) ; CHECK: %rem2:_(s32) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem2(s32), %ptr3(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem2(s32), %ptr3(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %ptr3:_(p1) = COPY $vgpr6_vgpr7 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) %rem2:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem2:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem2:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir index 62812a7274fb2..9c1ce9cff7fed 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir @@ -12,10 +12,10 @@ body: | ; CHECK-LABEL: name: remove_and_255_zextload ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: %ptr:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: %load:_(s32) = G_ZEXTLOAD %ptr(p1) :: (load 1, addrspace 1) + ; CHECK: %load:_(s32) = G_ZEXTLOAD %ptr(p1) :: (load (s8), addrspace 1) ; CHECK: $vgpr0 = COPY %load(s32) %ptr:_(p1) = COPY $vgpr0_vgpr1 - %load:_(s32) = G_ZEXTLOAD %ptr :: (load 1, addrspace 1, align 1) + %load:_(s32) = G_ZEXTLOAD %ptr :: (load (s8), addrspace 1, align 1) %mask:_(s32) = G_CONSTANT i32 255 %and:_(s32) = G_AND %load, %mask $vgpr0 = COPY %and @@ -33,14 +33,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) - ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1 ; CHECK: $vgpr0 = COPY %smin(s32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) %smin:_(s32) = G_SMIN %load0, %load1 %mask:_(s32) = G_CONSTANT i32 255 %and:_(s32) = G_AND %smin, %mask @@ -59,14 +59,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) - ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) ; CHECK: %smax:_(s32) = G_SMAX %load0, %load1 ; CHECK: $vgpr0 = COPY %smax(s32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) %smax:_(s32) = G_SMAX %load0, %load1 %mask:_(s32) = G_CONSTANT i32 255 %and:_(s32) = G_AND %smax, %mask @@ -85,14 +85,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) - ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) ; CHECK: %umin:_(s32) = G_UMIN %load0, %load1 ; CHECK: $vgpr0 = COPY %umin(s32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) %umin:_(s32) = G_UMIN %load0, %load1 %mask:_(s32) = G_CONSTANT i32 255 %and:_(s32) = G_AND %umin, %mask @@ -111,14 +111,14 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) - ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) ; CHECK: %umax:_(s32) = G_UMAX %load0, %load1 ; CHECK: $vgpr0 = COPY %umax(s32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) %umax:_(s32) = G_UMAX %load0, %load1 %mask:_(s32) = G_CONSTANT i32 255 %and:_(s32) = G_AND %umax, %mask @@ -138,16 +138,16 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: %load0:_(s32) = G_LOAD %ptr0(p1) :: (load 4, addrspace 1) - ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load 1, addrspace 1) + ; CHECK: %load0:_(s32) = G_LOAD %ptr0(p1) :: (load (s32), addrspace 1) + ; CHECK: %load1:_(s32) = G_ZEXTLOAD %ptr1(p1) :: (load (s8), addrspace 1) ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1 ; CHECK: %mask:_(s32) = G_CONSTANT i32 255 ; CHECK: %and:_(s32) = G_AND %smin, %mask ; CHECK: $vgpr0 = COPY %and(s32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_LOAD %ptr0 :: (load 4, addrspace 1, align 4) - %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load 1, addrspace 1, align 1) + %load0:_(s32) = G_LOAD %ptr0 :: (load (s32), addrspace 1, align 4) + %load1:_(s32) = G_ZEXTLOAD %ptr1 :: (load (s8), addrspace 1, align 1) %smin:_(s32) = G_SMIN %load0, %load1 %mask:_(s32) = G_CONSTANT i32 255 %and:_(s32) = G_AND %smin, %mask @@ -167,16 +167,16 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK: %ptr0:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load 1, addrspace 1) - ; CHECK: %load1:_(s32) = G_LOAD %ptr1(p1) :: (load 4, addrspace 1) + ; CHECK: %load0:_(s32) = G_ZEXTLOAD %ptr0(p1) :: (load (s8), addrspace 1) + ; CHECK: %load1:_(s32) = G_LOAD %ptr1(p1) :: (load (s32), addrspace 1) ; CHECK: %smin:_(s32) = G_SMIN %load0, %load1 ; CHECK: %mask:_(s32) = G_CONSTANT i32 255 ; CHECK: %and:_(s32) = G_AND %smin, %mask ; CHECK: $vgpr0 = COPY %and(s32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 - %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load 1, addrspace 1, align 1) - %load1:_(s32) = G_LOAD %ptr1 :: (load 4, addrspace 1, align 4) + %load0:_(s32) = G_ZEXTLOAD %ptr0 :: (load (s8), addrspace 1, align 1) + %load1:_(s32) = G_LOAD %ptr1 :: (load (s32), addrspace 1, align 4) %smin:_(s32) = G_SMIN %load0, %load1 %mask:_(s32) = G_CONSTANT i32 255 %and:_(s32) = G_AND %smin, %mask diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir index bc595ea0f890d..19a5baac249d6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-divrem.mir @@ -14,16 +14,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_sdiv_srem_v2 @@ -38,16 +38,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(<2 x s32>), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) - ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) ... --- name: test_sdiv_srem_v4 @@ -62,16 +62,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr8_vgpr9 ; CHECK: %ptr2:_(p1) = COPY $vgpr10_vgpr11 ; CHECK: %div:_(<4 x s32>), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %div(<4 x s32>), %ptr1(p1) :: (store 16, align 4, addrspace 1) - ; CHECK: G_STORE %rem(<4 x s32>), %ptr2(p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE %div(<4 x s32>), %ptr1(p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK: G_STORE %rem(<4 x s32>), %ptr2(p1) :: (store (<4 x s32>), align 4, addrspace 1) %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %ptr1:_(p1) = COPY $vgpr8_vgpr9 %ptr2:_(p1) = COPY $vgpr10_vgpr11 %div:_(<4 x s32>) = G_SDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %div:_(<4 x s32>), %ptr1:_(p1) :: (store 16, addrspace 1, align 4) + G_STORE %div:_(<4 x s32>), %ptr1:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) %rem:_(<4 x s32>) = G_SREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %rem:_(<4 x s32>), %ptr2:_(p1) :: (store 16, addrspace 1, align 4) + G_STORE %rem:_(<4 x s32>), %ptr2:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) ... --- name: test_srem_sdiv @@ -86,16 +86,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) - ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store (s32), addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_srem_sdiv_v2 @@ -110,16 +110,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(<2 x s32>), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %rem(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) - ; CHECK: G_STORE %div(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; CHECK: G_STORE %div(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 %rem:_(<2 x s32>) = G_SREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %rem:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) %div:_(<2 x s32>) = G_SDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %div:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) ... --- name: test_srem_sdiv_v4 @@ -134,16 +134,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr8_vgpr9 ; CHECK: %ptr2:_(p1) = COPY $vgpr10_vgpr11 ; CHECK: %div:_(<4 x s32>), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %rem(<4 x s32>), %ptr1(p1) :: (store 16, align 4, addrspace 1) - ; CHECK: G_STORE %div(<4 x s32>), %ptr2(p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<4 x s32>), %ptr1(p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK: G_STORE %div(<4 x s32>), %ptr2(p1) :: (store (<4 x s32>), align 4, addrspace 1) %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %ptr1:_(p1) = COPY $vgpr8_vgpr9 %ptr2:_(p1) = COPY $vgpr10_vgpr11 %rem:_(<4 x s32>) = G_SREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %rem:_(<4 x s32>), %ptr1:_(p1) :: (store 16, addrspace 1, align 4) + G_STORE %rem:_(<4 x s32>), %ptr1:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) %div:_(<4 x s32>) = G_SDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %div:_(<4 x s32>), %ptr2:_(p1) :: (store 16, addrspace 1, align 4) + G_STORE %div:_(<4 x s32>), %ptr2:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) ... --- name: test_udiv_urem @@ -158,16 +158,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_udiv_urem_v2 @@ -182,16 +182,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(<2 x s32>), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) - ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %div(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; CHECK: G_STORE %rem(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %div:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %rem:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) ... --- name: test_udiv_urem_v4 @@ -206,16 +206,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr8_vgpr9 ; CHECK: %ptr2:_(p1) = COPY $vgpr10_vgpr11 ; CHECK: %div:_(<4 x s32>), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK: G_STORE %div(<4 x s32>), %ptr1(p1) :: (store 16, align 4, addrspace 1) - ; CHECK: G_STORE %rem(<4 x s32>), %ptr2(p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE %div(<4 x s32>), %ptr1(p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK: G_STORE %rem(<4 x s32>), %ptr2(p1) :: (store (<4 x s32>), align 4, addrspace 1) %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %ptr1:_(p1) = COPY $vgpr8_vgpr9 %ptr2:_(p1) = COPY $vgpr10_vgpr11 %div:_(<4 x s32>) = G_UDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %div:_(<4 x s32>), %ptr1:_(p1) :: (store 16, addrspace 1, align 4) + G_STORE %div:_(<4 x s32>), %ptr1:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) %rem:_(<4 x s32>) = G_UREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %rem:_(<4 x s32>), %ptr2:_(p1) :: (store 16, addrspace 1, align 4) + G_STORE %rem:_(<4 x s32>), %ptr2:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) ... --- name: test_urem_udiv @@ -230,16 +230,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store 4, addrspace 1) - ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr1(p1) :: (store (s32), addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_urem_udiv_v2 @@ -254,16 +254,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr2:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(<2 x s32>), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK: G_STORE %rem(<2 x s32>), %ptr1(p1) :: (store 8, align 4, addrspace 1) - ; CHECK: G_STORE %div(<2 x s32>), %ptr2(p1) :: (store 8, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<2 x s32>), %ptr1(p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; CHECK: G_STORE %div(<2 x s32>), %ptr2(p1) :: (store (<2 x s32>), align 4, addrspace 1) %src1:_(<2 x s32>) = COPY $vgpr0_vgpr1 %src2:_(<2 x s32>) = COPY $vgpr2_vgpr3 %ptr1:_(p1) = COPY $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr6_vgpr7 %rem:_(<2 x s32>) = G_UREM %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %rem:_(<2 x s32>), %ptr1:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %rem:_(<2 x s32>), %ptr1:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) %div:_(<2 x s32>) = G_UDIV %src1:_(<2 x s32>), %src2:_(<2 x s32>) - G_STORE %div:_(<2 x s32>), %ptr2:_(p1) :: (store 8, addrspace 1, align 4) + G_STORE %div:_(<2 x s32>), %ptr2:_(p1) :: (store (<2 x s32>), addrspace 1, align 4) ... --- name: test_urem_udiv_v4 @@ -278,16 +278,16 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr8_vgpr9 ; CHECK: %ptr2:_(p1) = COPY $vgpr10_vgpr11 ; CHECK: %div:_(<4 x s32>), %rem:_ = G_UDIVREM %src1, %src2 - ; CHECK: G_STORE %rem(<4 x s32>), %ptr1(p1) :: (store 16, align 4, addrspace 1) - ; CHECK: G_STORE %div(<4 x s32>), %ptr2(p1) :: (store 16, align 4, addrspace 1) + ; CHECK: G_STORE %rem(<4 x s32>), %ptr1(p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK: G_STORE %div(<4 x s32>), %ptr2(p1) :: (store (<4 x s32>), align 4, addrspace 1) %src1:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %src2:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %ptr1:_(p1) = COPY $vgpr8_vgpr9 %ptr2:_(p1) = COPY $vgpr10_vgpr11 %rem:_(<4 x s32>) = G_UREM %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %rem:_(<4 x s32>), %ptr1:_(p1) :: (store 16, addrspace 1, align 4) + G_STORE %rem:_(<4 x s32>), %ptr1:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) %div:_(<4 x s32>) = G_UDIV %src1:_(<4 x s32>), %src2:_(<4 x s32>) - G_STORE %div:_(<4 x s32>), %ptr2:_(p1) :: (store 16, addrspace 1, align 4) + G_STORE %div:_(<4 x s32>), %ptr2:_(p1) :: (store (<4 x s32>), addrspace 1, align 4) ... --- name: test_sdiv_srem_extra_use @@ -303,23 +303,23 @@ body: | ; CHECK: %ptr2:_(p1) = G_IMPLICIT_DEF ; CHECK: %ptr3:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr4:_(p1) = COPY $vgpr4_vgpr5 - ; CHECK: G_STORE %src1(s32), %ptr1(p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE %src2(s32), %ptr2(p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE %src1(s32), %ptr1(p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE %src2(s32), %ptr2(p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr3(p1) :: (store 4, addrspace 1) - ; CHECK: G_STORE %rem(s32), %ptr4(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr3(p1) :: (store (s32), addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr4(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = G_IMPLICIT_DEF %ptr2:_(p1) = G_IMPLICIT_DEF %ptr3:_(p1) = COPY $vgpr2_vgpr3 %ptr4:_(p1) = COPY $vgpr4_vgpr5 - G_STORE %src1:_(s32), %ptr1:_(p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - G_STORE %src2:_(s32), %ptr2:_(p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + G_STORE %src1:_(s32), %ptr1:_(p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + G_STORE %src2:_(s32), %ptr2:_(p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr4:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr4:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_sdiv_srem_extra_sdiv @@ -337,21 +337,21 @@ body: | ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr3:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) ; CHECK: %div2:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div2(s32), %ptr3(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div2(s32), %ptr3(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %ptr3:_(p1) = COPY $vgpr6_vgpr7 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) %div2:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div2:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div2:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_sdiv_srem_extra_srem @@ -369,21 +369,21 @@ body: | ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %ptr3:_(p1) = COPY $vgpr6_vgpr7 ; CHECK: %div:_(s32), %rem:_ = G_SDIVREM %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) ; CHECK: %rem2:_(s32) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem2(s32), %ptr3(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem2(s32), %ptr3(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %ptr3:_(p1) = COPY $vgpr6_vgpr7 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) %rem2:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem2:_(s32), %ptr3:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem2:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4) ... # Some negative tests. --- @@ -400,18 +400,18 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr3_vgpr4 ; CHECK: %ptr2:_(p1) = COPY $vgpr5_vgpr6 ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %rem:_(s32) = G_SREM %src1, %src3 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %src3:_(s32) = COPY $vgpr2 %ptr1:_(p1) = COPY $vgpr3_vgpr4 %ptr2:_(p1) = COPY $vgpr5_vgpr6 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src3:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_sdiv_srem_src_opnds_swapped @@ -426,17 +426,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %rem:_(s32) = G_SREM %src2, %src1 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src2:_(s32), %src1:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_sdiv_urem @@ -451,17 +451,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %rem:_(s32) = G_UREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_UREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_udiv_srem @@ -476,17 +476,17 @@ body: | ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %div:_(s32) = G_UDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %div:_(s32) = G_UDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... --- name: test_sdiv_srem_different_blocks @@ -500,24 +500,24 @@ body: | ; CHECK: %src2:_(s32) = COPY $vgpr1 ; CHECK: %ptr1:_(p1) = COPY $vgpr2_vgpr3 ; CHECK: %div:_(s32) = G_SDIV %src1, %src2 - ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %div(s32), %ptr1(p1) :: (store (s32), addrspace 1) ; CHECK: S_BRANCH %bb.1 ; CHECK: bb.1: ; CHECK: liveins: $vgpr4_vgpr5 ; CHECK: %ptr2:_(p1) = COPY $vgpr4_vgpr5 ; CHECK: %rem:_(s32) = G_SREM %src1, %src2 - ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store 4, addrspace 1) + ; CHECK: G_STORE %rem(s32), %ptr2(p1) :: (store (s32), addrspace 1) bb.0: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 %src1:_(s32) = COPY $vgpr0 %src2:_(s32) = COPY $vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 %div:_(s32) = G_SDIV %src1:_(s32), %src2:_(s32) - G_STORE %div:_(s32), %ptr1:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %div:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4) S_BRANCH %bb.1 bb.1: liveins: $vgpr4_vgpr5 %ptr2:_(p1) = COPY $vgpr4_vgpr5 %rem:_(s32) = G_SREM %src1:_(s32), %src2:_(s32) - G_STORE %rem:_(s32), %ptr2:_(p1) :: (store 4, addrspace 1, align 4) + G_STORE %rem:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir index 678d40d6d7652..1e4ff0324793f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir @@ -57,13 +57,13 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[MV1]](p0) :: (load 8 from %ir.1, align 4) - ; CHECK: G_STORE [[LOAD]](s64), [[MV]](p0) :: (store 8 into %ir.0, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[MV1]](p0) :: (load (s64) from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s64), [[MV]](p0) :: (store (s64) into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from %ir.1 + 5, align 1, basealign 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from %ir.1 + 5, align 1, basealign 4) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s64), [[PTR_ADD1]](p0) :: (store 8 into %ir.0 + 5, align 1, basealign 4) + ; CHECK: G_STORE [[LOAD1]](s64), [[PTR_ADD1]](p0) :: (store (s64) into %ir.0 + 5, align 1, basealign 4) ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; CHECK: S_SETPC_B64_return [[COPY5]] %3:_(s32) = COPY $vgpr0 @@ -74,7 +74,7 @@ body: | %1:_(p0) = G_MERGE_VALUES %5(s32), %6(s32) %2:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s64) = G_CONSTANT i64 13 - G_MEMCPY_INLINE %0(p0), %1(p0), %7(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY_INLINE %0(p0), %1(p0), %7(s64) :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) %8:ccr_sgpr_64 = COPY %2 S_SETPC_B64_return %8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir index 7846472a3e8e5..24ca3f7c0ad8f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir @@ -35,7 +35,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $vgpr0 %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 @@ -74,7 +74,7 @@ body: | ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -119,7 +119,7 @@ body: | ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll index 597d570ed48c4..d0f1dd0e30d07 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll @@ -18,9 +18,9 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): @@ -36,9 +36,9 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -62,9 +62,9 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): @@ -81,9 +81,9 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -135,7 +135,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -144,7 +144,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; FAST: bb.4: ; FAST: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): @@ -189,7 +189,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -198,7 +198,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; GREEDY: bb.4: ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -251,7 +251,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -260,7 +260,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; FAST: bb.4: ; FAST: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc ; GREEDY: bb.1 (%ir-block.0): @@ -306,7 +306,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -315,7 +315,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; GREEDY: bb.4: ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll index 9ff032726c8e2..a1973a9e01d6c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll @@ -23,8 +23,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -45,8 +45,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -75,8 +75,8 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; FAST: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; FAST: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -98,8 +98,8 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre ; GREEDY: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32) - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -156,7 +156,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; FAST: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -164,7 +164,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; FAST: successors: %bb.4(0x80000000) ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; FAST: bb.4: - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -214,7 +214,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -222,7 +222,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr ; GREEDY: successors: %bb.4(0x80000000) ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; GREEDY: bb.4: - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -269,7 +269,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; FAST: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; FAST: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -277,7 +277,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; FAST: successors: %bb.4(0x80000000) ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; FAST: bb.4: - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -317,7 +317,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -325,7 +325,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre ; GREEDY: successors: %bb.4(0x80000000) ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; GREEDY: bb.4: - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef @@ -394,7 +394,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec ; FAST: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc ; FAST: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -402,7 +402,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; FAST: successors: %bb.4(0x80000000) ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; FAST: bb.4: - ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; FAST: S_ENDPGM 0 ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp ; GREEDY: bb.1 (%ir-block.0): @@ -464,7 +464,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec ; GREEDY: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32) - ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") + ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -472,7 +472,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr ; GREEDY: successors: %bb.4(0x80000000) ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] ; GREEDY: bb.4: - ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) ; GREEDY: S_ENDPGM 0 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll index 2fb84ebd73c83..d6fb80accd254 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll @@ -16,7 +16,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -38,7 +38,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY7]], [[COPY5]], 0, 0, 0 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY7]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -78,7 +78,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -116,7 +116,7 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -166,7 +166,7 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index 0286b395e9fe3..927d3519c7eb6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -13,7 +13,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; CHECK: $sgpr0 = COPY [[INT]](s32) @@ -27,7 +27,7 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; GREEDY: $sgpr0 = COPY [[INT]](s32) @@ -46,7 +46,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 8, align 4) + ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) @@ -64,7 +64,7 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 8, align 4) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) @@ -87,7 +87,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) @@ -113,7 +113,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) ; GREEDY: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) @@ -144,7 +144,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 32, align 4) + ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) @@ -180,7 +180,7 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 32, align 4) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) @@ -221,7 +221,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 64, align 4) + ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) @@ -281,7 +281,7 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 64, align 4) + ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) @@ -349,7 +349,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset @@ -363,7 +363,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -382,7 +382,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 8, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -398,7 +398,7 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 8, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) ; GREEDY: $vgpr0 = COPY [[UV]](s32) ; GREEDY: $vgpr1 = COPY [[UV1]](s32) @@ -419,7 +419,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) ; CHECK: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) @@ -443,7 +443,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) @@ -472,7 +472,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) @@ -490,7 +490,7 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) ; GREEDY: $vgpr0 = COPY [[UV]](s32) ; GREEDY: $vgpr1 = COPY [[UV1]](s32) @@ -513,8 +513,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -537,8 +537,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) ; GREEDY: $vgpr0 = COPY [[UV]](s32) @@ -566,10 +566,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -600,10 +600,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) ; GREEDY: $vgpr0 = COPY [[UV]](s32) @@ -640,9 +640,9 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; CHECK: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i96_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -656,9 +656,9 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; GREEDY: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store 12 into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32> %rsrc, i32 %soffset, i32 0) store i96 %val, i96 addrspace(1)* undef @@ -679,14 +679,14 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) ; CHECK: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i256_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -700,14 +700,14 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32> %rsrc, i32 %soffset, i32 0) store i256 %val, i256 addrspace(1)* undef @@ -728,22 +728,22 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) ; CHECK: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; CHECK: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; CHECK: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i512_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -757,22 +757,22 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store 16 into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; GREEDY: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store 16 into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; GREEDY: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32> %rsrc, i32 %soffset, i32 0) store i512 %val, i512 addrspace(1)* undef @@ -793,14 +793,14 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) ; CHECK: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (s128) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (s128) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v16i16_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -814,14 +814,14 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (s128) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (s128) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <16 x i16> %val, <16 x i16> addrspace(1)* undef @@ -842,22 +842,22 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) ; CHECK: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (s128) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (s128) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (s128) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (s128) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v32i16_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -871,22 +871,22 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (s128) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (s128) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (s128) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store 16 into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (s128) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <32 x i16> %val, <32 x i16> addrspace(1)* undef @@ -907,14 +907,14 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (s128) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (s128) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v4i64_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -928,14 +928,14 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (s128) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (s128) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i64> %val, <4 x i64> addrspace(1)* undef @@ -956,22 +956,22 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (s128) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (s128) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (s128) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (s128) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v8i64_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -985,22 +985,22 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (s128) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (s128) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (s128) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (s128) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i64> %val, <8 x i64> addrspace(1)* undef @@ -1021,14 +1021,14 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) - ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (s128) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (s128) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v4p1_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -1042,14 +1042,14 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) - ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (s128) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (s128) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i8 addrspace(1)*> %val, <4 x i8 addrspace(1)*> addrspace(1)* undef @@ -1070,22 +1070,22 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) - ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (s128) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (s128) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (s128) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (s128) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) ; CHECK: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v8p1_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): @@ -1099,22 +1099,22 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) - ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (s128) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (s128) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (s128) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store 16 into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (s128) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) ; GREEDY: S_ENDPGM 0 %val = call <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i8 addrspace(1)*> %val, <8 x i8 addrspace(1)*> addrspace(1)* undef @@ -1136,7 +1136,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 @@ -1153,7 +1153,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 @@ -1176,7 +1176,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 @@ -1193,7 +1193,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 @@ -1215,7 +1215,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 @@ -1231,7 +1231,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 @@ -1255,8 +1255,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -1282,8 +1282,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) ; GREEDY: $vgpr0 = COPY [[UV]](s32) @@ -1315,8 +1315,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -1341,8 +1341,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) ; GREEDY: $vgpr0 = COPY [[UV]](s32) @@ -1374,10 +1374,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -1411,10 +1411,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) ; GREEDY: $vgpr0 = COPY [[UV]](s32) @@ -1453,10 +1453,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) ; CHECK: $vgpr0 = COPY [[UV]](s32) @@ -1489,10 +1489,10 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 48, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) ; GREEDY: $vgpr0 = COPY [[UV]](s32) @@ -1549,7 +1549,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1588,7 +1588,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1634,7 +1634,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1674,7 +1674,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1722,7 +1722,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1763,7 +1763,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1809,7 +1809,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1848,7 +1848,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load 4 from unknown-address + 4095, align 1) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1893,7 +1893,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1932,7 +1932,7 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -1977,8 +1977,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2025,8 +2025,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2082,8 +2082,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2131,8 +2131,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2186,8 +2186,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2235,8 +2235,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2289,8 +2289,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2337,8 +2337,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2391,8 +2391,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2439,8 +2439,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2493,8 +2493,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2541,8 +2541,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load 16, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2594,8 +2594,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2641,8 +2641,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load 16 from unknown-address + 4064, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -2678,7 +2678,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr @@ -2694,7 +2694,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s @@ -2716,7 +2716,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr @@ -2732,7 +2732,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v @@ -2758,7 +2758,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm @@ -2778,7 +2778,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s @@ -2805,7 +2805,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm @@ -2825,7 +2825,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v @@ -2851,7 +2851,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr @@ -2869,7 +2869,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 @@ -2895,7 +2895,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr @@ -2914,7 +2914,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir index dba7c36d7a975..bcd55e225f472 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -25,7 +25,7 @@ body: | ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY3]], [[COPY2]] ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load 4) + ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY3]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_cross_bank_copy_add_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr0 @@ -36,7 +36,7 @@ body: | ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[C]] ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[COPY2]], [[C1]], 256, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $sgpr0 @@ -64,7 +64,7 @@ body: | ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_negative_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 @@ -75,7 +75,7 @@ body: | ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll index 109143ccdfffd..11246f573a481 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll @@ -15,7 +15,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -37,7 +37,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sg ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -76,7 +76,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -113,7 +113,7 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -162,7 +162,7 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll index 2d3d27c04bfc7..5b29e575902b4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll @@ -16,7 +16,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -39,7 +39,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sg ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32) - ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]], [[COPY7]], 0, 0, -1 :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: S_ENDPGM 0 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -76,7 +76,7 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -111,7 +111,7 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec - ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec @@ -158,7 +158,7 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc - ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir index 4681284163f2f..d0faf4100b246 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomic-cmpxchg.mir @@ -16,11 +16,11 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p1), [[COPY4]], [[COPY5]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p1), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 1) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -37,11 +37,11 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p0), [[COPY4]], [[COPY5]] :: (load store seq_cst 4) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p0), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 %2:_(s32) = COPY $sgpr3 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 0) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -58,9 +58,9 @@ body: | ; CHECK: [[COPY3:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p3), [[COPY4]], [[COPY5]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_ATOMIC_CMPXCHG [[COPY3]](p3), [[COPY4]], [[COPY5]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 - %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3) + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir index fdcb8ffbc7ca3..5cae7130a69f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-add.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir index c22f7f1860dd5..c073c40b65633 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-and.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_AND:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_AND [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_AND %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir index 842e7d71e2893..5018d1901cb09 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-fadd.mir @@ -14,8 +14,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_FADD [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir index 5dc204e17c25f..4a568decec7d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-max.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_MAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir index e23ea62e3a34f..cafd664f5c72c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-min.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_MIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_MIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir index d5250afd1efd2..999ffaba78a64 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-or.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_OR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_OR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_OR %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir index f7452ed4edb79..3bd729b925f30 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-sub.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_SUB:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_SUB [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_SUB %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir index 2165f59661e83..b7a62f8df27b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umax.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_UMAX:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMAX [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir index 4fb882b3e0c22..26196f1e9852e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-umin.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_UMIN:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_UMIN [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir index 34f2037657cb6..49861ffed908e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xchg.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_XCHG:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XCHG [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_XCHG %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir index 83dda48c9221a..1754363a7c3a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-atomicrmw-xor.mir @@ -14,10 +14,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst 4, addrspace 1) + ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p1), [[COPY3]] :: (load store seq_cst (s32), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst 4, addrspace 1) + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 1) ... --- @@ -32,10 +32,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst 4) + ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p0), [[COPY3]] :: (load store seq_cst (s32)) %0:_(p0) = COPY $sgpr0_sgpr1 %1:_(s32) = COPY $sgpr2 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst 4, addrspace 0) + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 0) ... --- @@ -50,8 +50,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst 4, addrspace 3) + ; CHECK: [[ATOMICRMW_XOR:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_XOR [[COPY2]](p3), [[COPY3]] :: (load store seq_cst (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst 4, addrspace 3) + %2:_(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store seq_cst (s32), addrspace 3) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir index 45d809b786f11..917ef6928e199 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-constant.mir @@ -12,10 +12,10 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4) + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store (s32)) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 - G_STORE %1, %0 :: (store 4) + G_STORE %1, %0 :: (store (s32)) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir index a77b2c8bdf2f0..547c05101981e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir @@ -764,7 +764,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr4_sgpr5 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 4) + ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s32>) = COPY [[COPY1]](<2 x s32>) ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](<2 x s32>) ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -781,7 +781,7 @@ body: | %1:_(<2 x s32>) = COPY $sgpr4_sgpr5 %2:_(s32) = COPY $vgpr0 %3:_(s32) = G_CONSTANT i32 0 - %4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x s32>), %3(s32), 0 :: (dereferenceable invariant load 4) + %4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0(<4 x s32>), %3(s32), 0 :: (dereferenceable invariant load (s32)) %5:_(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %4(s32), %2(s32) S_ENDPGM 0, implicit %5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 55257e83b8219..89305943cf822 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -116,13 +116,13 @@ body: | ; CHECK-LABEL: name: load_global_v8i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v8i32, align 32, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s128) from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v8i32) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.global.not.uniform.v8i32) ... --- @@ -136,13 +136,13 @@ body: | ; CHECK-LABEL: name: load_global_v4i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v4i64, align 32, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (s128) from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v4i64) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.global.not.uniform.v4i64) ... --- @@ -155,19 +155,19 @@ body: | ; CHECK-LABEL: name: load_global_v16i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 16 from %ir.global.not.uniform.v16i32, align 64, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (s128) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s128) from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s128) from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s128) from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v16i32) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.global.not.uniform.v16i32) ... --- @@ -178,20 +178,21 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_non_uniform - ; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v8i64, align 64, addrspace 1) - ; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64) - ; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) - ; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP32:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64) - ; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP32]](p1) :: (load 16 from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64) - ; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) - ; CHECK: %1:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>) + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (s128) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (s128) from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s128) from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) + ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s128) from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v8i64) + %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.global.not.uniform.v8i64) ... --- @@ -203,9 +204,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load 32, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s32>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (invariant load 32, addrspace 1) + %1:_(<8 x s32>) = G_LOAD %0 :: (invariant load (<8 x s32>), addrspace 1) ... --- @@ -217,9 +218,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v4i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load 32, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<4 x s64>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (invariant load 32, addrspace 1) + %1:_(<4 x s64>) = G_LOAD %0 :: (invariant load (<4 x s64>), addrspace 1) ... --- @@ -231,9 +232,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load 64, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<16 x s32>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (invariant load 64, addrspace 1) + %1:_(<16 x s32>) = G_LOAD %0 :: (invariant load (<16 x s32>), addrspace 1) ... --- @@ -245,9 +246,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load 64, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s64>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (invariant load 64, addrspace 1) + %1:_(<8 x s64>) = G_LOAD %0 :: (invariant load (<8 x s64>), addrspace 1) ... --- @@ -260,13 +261,13 @@ body: | ; CHECK-LABEL: name: load_constant_v8i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v8i32) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.constant.not.uniform.v8i32) ... --- @@ -279,13 +280,13 @@ body: | ; CHECK-LABEL: name: load_constant_i256_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s256) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform) + %1:_(s256) = G_LOAD %0 :: (load (s256) from %ir.constant.not.uniform) ... --- @@ -299,13 +300,13 @@ body: | ; CHECK-LABEL: name: load_constant_v16i16_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<16 x s16>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform) + %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>) from %ir.constant.not.uniform) ... --- @@ -318,13 +319,13 @@ body: | ; CHECK-LABEL: name: load_constant_v4i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v4i64) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.constant.not.uniform.v4i64) ... --- @@ -337,19 +338,19 @@ body: | ; CHECK-LABEL: name: load_constant_v16i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load (s128) from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load (s128) from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v16i32) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.constant.not.uniform.v16i32) ... --- @@ -362,19 +363,19 @@ body: | ; CHECK-LABEL: name: load_constant_v8i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) + ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load (s128) from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) + ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load (s128) from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v8i64) + %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.constant.not.uniform.v8i64) ... --- @@ -386,9 +387,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) ... --- @@ -400,9 +401,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i16_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<16 x s16>) = G_LOAD %0 :: (load 32, addrspace 4) + %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>), addrspace 4) ... --- @@ -414,9 +415,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, addrspace 4) + %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), addrspace 4) ... --- @@ -428,9 +429,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4) + %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4) ... --- @@ -442,9 +443,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load 64, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load (<8 x s64>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (load 64, addrspace 4) + %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), addrspace 4) ... --- @@ -457,9 +458,9 @@ body: | ; CHECK-LABEL: name: load_local_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load 4, addrspace 3) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 3) + %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 3) ... --- @@ -472,9 +473,9 @@ body: | ; CHECK-LABEL: name: load_region_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load 4, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 5) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5) + %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5) ... @@ -488,9 +489,9 @@ body: | ; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 1, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 4, align 1) + %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 4, align 1) ... --- @@ -504,9 +505,9 @@ body: | ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 1, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 1, align 1) + %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 1, align 1) ... --- @@ -520,9 +521,9 @@ body: | ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 2, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 4, align 2) + %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 4, align 2) ... --- @@ -536,9 +537,9 @@ body: | ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 2, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2) + %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 1, align 2) ... --- @@ -550,9 +551,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i32_uniform_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 4) + %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 4) ... --- @@ -566,9 +567,9 @@ body: | ; CHECK-LABEL: name: load_constant_i32_uniform_align2 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 4, align 2, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 2) + %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 2) ... --- @@ -582,9 +583,9 @@ body: | ; CHECK-LABEL: name: load_constant_i32_uniform_align1 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load 4, align 1, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 1) + %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 1) ... --- @@ -598,9 +599,9 @@ body: | ; CHECK-LABEL: name: load_private_uniform_sgpr_i32 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load 4, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load (s32), addrspace 5) %0:_(p5) = COPY $sgpr0 - %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5, align 4) + %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5, align 4) ... --- @@ -615,13 +616,13 @@ body: | ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load 16, align 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (s128), align 32, addrspace 4) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from unknown-address + 16, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from unknown-address + 16, addrspace 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4) + %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) ... --- @@ -640,10 +641,10 @@ body: | ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[PHI:%[0-9]+]]:vgpr(p4) = G_PHI [[COPY]](p4), %bb.0, %3(p4), %bb.1 - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load 16, align 32, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load (s128), align 32, addrspace 4) ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from unknown-address + 16, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from unknown-address + 16, addrspace 4) ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; CHECK: [[COPY2:%[0-9]+]]:sgpr(p4) = COPY [[COPY1]](p4) ; CHECK: G_BR %bb.1 @@ -657,7 +658,7 @@ body: | bb.1: %2:_(p4) = G_PHI %0, %bb.0, %4, %bb.1 - %3:_(<8 x s32>) = G_LOAD %2 :: (load 32, addrspace 4) + %3:_(<8 x s32>) = G_LOAD %2 :: (load (<8 x s32>), addrspace 4) %4:_(p4) = COPY %1 G_BR %bb.1 ... @@ -671,16 +672,16 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load 8, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), align 4, addrspace 4) ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load 4 from unknown-address + 8, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 4) + %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 ... @@ -693,16 +694,16 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load 8, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), addrspace 4) ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load 4 from unknown-address + 8, align 8, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 8) + %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 ... @@ -715,11 +716,11 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load 16, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4) ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 16) + %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 ... @@ -732,16 +733,16 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load 8, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), align 4, addrspace 4) ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load 4 from unknown-address + 8, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 4) + %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 ... @@ -754,16 +755,16 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load 8, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), addrspace 4) ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load 4 from unknown-address + 8, align 8, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 8) + %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 ... @@ -776,11 +777,11 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load 16, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(<6 x s16>) = G_EXTRACT [[LOAD]](<8 x s16>), 0 ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 16) + %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 ... @@ -793,16 +794,16 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load 8, align 4, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), align 4, addrspace 4) ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load 4 from unknown-address + 8, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s96) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 4) + %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 4) S_ENDPGM 0, implicit %1 ... @@ -815,16 +816,16 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load 8, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), addrspace 4) ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load 4 from unknown-address + 8, align 8, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) ; CHECK: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s96) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 8) + %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 8) S_ENDPGM 0, implicit %1 ... @@ -837,10 +838,10 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load 16, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4) ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(s96) = G_EXTRACT [[LOAD]](s128), 0 ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s96) = G_LOAD %0 :: (invariant load 12, addrspace 4, align 16) + %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 16) S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir index 906bf9456b169..59130900784ce 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir @@ -12,9 +12,9 @@ body: | ; CHECK-LABEL: name: sextload_constant_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load 1, addrspace 4) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 4, align 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 4, align 1) ... --- @@ -28,9 +28,9 @@ body: | ; CHECK-LABEL: name: sextload_global_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load 1, addrspace 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1, align 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1, align 1) ... --- @@ -44,9 +44,9 @@ body: | ; CHECK-LABEL: name: sextload_constant_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load 2, addrspace 4) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 4, align 2) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 4, align 2) ... --- @@ -60,9 +60,9 @@ body: | ; CHECK-LABEL: name: sextload_global_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load 2, addrspace 1) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 1, align 2) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 1, align 2) ... --- @@ -75,9 +75,9 @@ body: | ; CHECK-LABEL: name: sextload_local_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load 1, addrspace 3) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 3, align 1) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 3, align 1) ... --- @@ -91,8 +91,7 @@ body: | ; CHECK-LABEL: name: sextload_local_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load 2, addrspace 3) + ; CHECK: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 3, align 2) + %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), addrspace 3, align 2) ... -! diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir index 4ba1fc7b088e8..163a83aac29b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir @@ -30,16 +30,16 @@ body: | ; SI-LABEL: name: split_smrd_load_range ; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4) + ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) ; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from unknown-address + 8, align 8, addrspace 4) + ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4) ; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 ; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 ; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 8, addrspace 4, !range !0) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !range !0) $sgpr0_sgpr1_sgpr2 = COPY %1 ... @@ -53,16 +53,16 @@ body: | ; SI-LABEL: name: split_smrd_load_tbaa ; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, !tbaa !2, addrspace 4) + ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), !tbaa !2, addrspace 4) ; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 from unknown-address + 8, align 8, !tbaa !2, addrspace 4) + ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4) ; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF ; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 ; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 ; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 8, addrspace 4, !tbaa !1) + %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !tbaa !1) $sgpr0_sgpr1_sgpr2 = COPY %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir index 4aca3118b2924..5f470b60c1a41 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir @@ -14,75 +14,75 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX7: %in_addr:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX7: %out_addr:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load 16, align 4, addrspace 1) + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load (s128), align 4, addrspace 1) ; GFX7: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; GFX7: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](s64) - ; GFX7: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from unknown-address + 16, align 4, addrspace 1) + ; GFX7: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s128) from unknown-address + 16, align 4, addrspace 1) ; GFX7: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 ; GFX7: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C1]](s64) - ; GFX7: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 16 from unknown-address + 32, align 4, addrspace 1) + ; GFX7: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s128) from unknown-address + 32, align 4, addrspace 1) ; GFX7: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; GFX7: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C2]](s64) - ; GFX7: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 16 from unknown-address + 48, align 4, addrspace 1) + ; GFX7: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s128) from unknown-address + 48, align 4, addrspace 1) ; GFX7: %load:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX7: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>), %load8_11:vgpr(<4 x s32>), %load12_15:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>) - ; GFX7: G_STORE %load0_3(<4 x s32>), %out_addr(p1) :: (store 16, align 4, addrspace 1) + ; GFX7: G_STORE %load0_3(<4 x s32>), %out_addr(p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX7: %cst16:sgpr(s64) = G_CONSTANT i64 16 ; GFX7: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(s64) - ; GFX7: G_STORE %load4_7(<4 x s32>), %out_addr_plus_16(p1) :: (store 16, align 4, addrspace 1) + ; GFX7: G_STORE %load4_7(<4 x s32>), %out_addr_plus_16(p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX7: %cst32:sgpr(s64) = G_CONSTANT i64 32 ; GFX7: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(s64) - ; GFX7: G_STORE %load8_11(<4 x s32>), %out_addr_plus_32(p1) :: (store 16, align 4, addrspace 1) + ; GFX7: G_STORE %load8_11(<4 x s32>), %out_addr_plus_32(p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX7: %cst48:sgpr(s64) = G_CONSTANT i64 48 ; GFX7: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(s64) - ; GFX7: G_STORE %load12_15(<4 x s32>), %out_addr_plus_48(p1) :: (store 16, align 4, addrspace 1) + ; GFX7: G_STORE %load12_15(<4 x s32>), %out_addr_plus_48(p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX7: S_ENDPGM 0 ; GFX1010-LABEL: name: test_uniform_load_without_noclobber ; GFX1010: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX1010: %in_addr:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX1010: %out_addr:sgpr(p1) = COPY $sgpr2_sgpr3 ; GFX1010: [[COPY:%[0-9]+]]:vgpr(p1) = COPY %in_addr(p1) - ; GFX1010: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load 16, align 4, addrspace 1) + ; GFX1010: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load (s128), align 4, addrspace 1) ; GFX1010: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; GFX1010: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](s64) - ; GFX1010: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from unknown-address + 16, align 4, addrspace 1) + ; GFX1010: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (s128) from unknown-address + 16, align 4, addrspace 1) ; GFX1010: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 ; GFX1010: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C1]](s64) - ; GFX1010: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 16 from unknown-address + 32, align 4, addrspace 1) + ; GFX1010: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (s128) from unknown-address + 32, align 4, addrspace 1) ; GFX1010: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 ; GFX1010: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C2]](s64) - ; GFX1010: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 16 from unknown-address + 48, align 4, addrspace 1) + ; GFX1010: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (s128) from unknown-address + 48, align 4, addrspace 1) ; GFX1010: %load:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) ; GFX1010: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>), %load8_11:vgpr(<4 x s32>), %load12_15:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>) ; GFX1010: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out_addr(p1) - ; GFX1010: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store 16, align 4, addrspace 1) + ; GFX1010: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010: %cst16:sgpr(s64) = G_CONSTANT i64 16 ; GFX1010: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(s64) ; GFX1010: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_16(p1) - ; GFX1010: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store 16, align 4, addrspace 1) + ; GFX1010: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010: %cst32:sgpr(s64) = G_CONSTANT i64 32 ; GFX1010: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(s64) ; GFX1010: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_32(p1) - ; GFX1010: G_STORE %load8_11(<4 x s32>), [[COPY3]](p1) :: (store 16, align 4, addrspace 1) + ; GFX1010: G_STORE %load8_11(<4 x s32>), [[COPY3]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010: %cst48:sgpr(s64) = G_CONSTANT i64 48 ; GFX1010: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(s64) ; GFX1010: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_48(p1) - ; GFX1010: G_STORE %load12_15(<4 x s32>), [[COPY4]](p1) :: (store 16, align 4, addrspace 1) + ; GFX1010: G_STORE %load12_15(<4 x s32>), [[COPY4]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX1010: S_ENDPGM 0 %in_addr:_(p1) = COPY $sgpr0_sgpr1 %out_addr:_(p1) = COPY $sgpr2_sgpr3 - %load:_(<16 x s32>) = G_LOAD %in_addr(p1) :: (load 64, align 4, addrspace 1) + %load:_(<16 x s32>) = G_LOAD %in_addr(p1) :: (load (<16 x s32>), align 4, addrspace 1) %load0_3:_(<4 x s32>), %load4_7:_(<4 x s32>), %load8_11:_(<4 x s32>), %load12_15:_(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>) - G_STORE %load0_3(<4 x s32>), %out_addr(p1) :: (store 16, align 4, addrspace 1) + G_STORE %load0_3(<4 x s32>), %out_addr(p1) :: (store (<4 x s32>), align 4, addrspace 1) %cst16:_(s64) = G_CONSTANT i64 16 %out_addr_plus_16:_(p1) = G_PTR_ADD %out_addr, %cst16(s64) - G_STORE %load4_7(<4 x s32>), %out_addr_plus_16(p1) :: (store 16, align 4, addrspace 1) + G_STORE %load4_7(<4 x s32>), %out_addr_plus_16(p1) :: (store (<4 x s32>), align 4, addrspace 1) %cst32:_(s64) = G_CONSTANT i64 32 %out_addr_plus_32:_(p1) = G_PTR_ADD %out_addr, %cst32(s64) - G_STORE %load8_11(<4 x s32>), %out_addr_plus_32(p1) :: (store 16, align 4, addrspace 1) + G_STORE %load8_11(<4 x s32>), %out_addr_plus_32(p1) :: (store (<4 x s32>), align 4, addrspace 1) %cst48:_(s64) = G_CONSTANT i64 48 %out_addr_plus_48:_(p1) = G_PTR_ADD %out_addr, %cst48(s64) - G_STORE %load12_15(<4 x s32>), %out_addr_plus_48(p1) :: (store 16, align 4, addrspace 1) + G_STORE %load12_15(<4 x s32>), %out_addr_plus_48(p1) :: (store (<4 x s32>), align 4, addrspace 1) S_ENDPGM 0 ... @@ -98,42 +98,42 @@ body: | ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX7: %ptr:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7: %out:sgpr(p1) = COPY $sgpr2_sgpr3 - ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load 16, align 1, addrspace 4) + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load (s128), align 1, addrspace 4) ; GFX7: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; GFX7: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](s64) - ; GFX7: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from unknown-address + 16, align 1, addrspace 4) + ; GFX7: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from unknown-address + 16, align 1, addrspace 4) ; GFX7: %load:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX7: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>) - ; GFX7: G_STORE %load0_3(<4 x s32>), %out(p1) :: (store 16, align 32, addrspace 1) + ; GFX7: G_STORE %load0_3(<4 x s32>), %out(p1) :: (store (<4 x s32>), align 32, addrspace 1) ; GFX7: %cst_16:sgpr(s64) = G_CONSTANT i64 16 ; GFX7: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(s64) - ; GFX7: G_STORE %load4_7(<4 x s32>), %out_plus_16(p1) :: (store 16, align 32, addrspace 1) + ; GFX7: G_STORE %load4_7(<4 x s32>), %out_plus_16(p1) :: (store (<4 x s32>), align 32, addrspace 1) ; GFX7: S_ENDPGM 0 ; GFX1010-LABEL: name: test_s_load_constant_v8i32_align1 ; GFX1010: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 ; GFX1010: %ptr:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX1010: %out:sgpr(p1) = COPY $sgpr2_sgpr3 ; GFX1010: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %ptr(p4) - ; GFX1010: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load 16, align 1, addrspace 4) + ; GFX1010: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load (s128), align 1, addrspace 4) ; GFX1010: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 ; GFX1010: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](s64) - ; GFX1010: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from unknown-address + 16, align 1, addrspace 4) + ; GFX1010: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from unknown-address + 16, align 1, addrspace 4) ; GFX1010: %load:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) ; GFX1010: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>) ; GFX1010: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out(p1) - ; GFX1010: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store 16, align 32, addrspace 1) + ; GFX1010: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 32, addrspace 1) ; GFX1010: %cst_16:sgpr(s64) = G_CONSTANT i64 16 ; GFX1010: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(s64) ; GFX1010: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_plus_16(p1) - ; GFX1010: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store 16, align 32, addrspace 1) + ; GFX1010: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store (<4 x s32>), align 32, addrspace 1) ; GFX1010: S_ENDPGM 0 %ptr:_(p4) = COPY $sgpr0_sgpr1 %out:_(p1) = COPY $sgpr2_sgpr3 - %load:_(<8 x s32>) = G_LOAD %ptr(p4) :: (load 32, align 1, addrspace 4) + %load:_(<8 x s32>) = G_LOAD %ptr(p4) :: (load (<8 x s32>), align 1, addrspace 4) %load0_3:_(<4 x s32>), %load4_7:_(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>) - G_STORE %load0_3(<4 x s32>), %out(p1) :: (store 16, align 32, addrspace 1) + G_STORE %load0_3(<4 x s32>), %out(p1) :: (store (<4 x s32>), align 32, addrspace 1) %cst_16:_(s64) = G_CONSTANT i64 16 %out_plus_16:_(p1) = G_PTR_ADD %out, %cst_16(s64) - G_STORE %load4_7(<4 x s32>), %out_plus_16(p1) :: (store 16, basealign 32, addrspace 1) + G_STORE %load4_7(<4 x s32>), %out_plus_16(p1) :: (store (<4 x s32>), basealign 32, addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir index a437d6059ddbe..25c0d24a65a99 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir @@ -30,7 +30,7 @@ body: | ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.1 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec - ; CHECK: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store 4, addrspace 4) + ; CHECK: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (s32), addrspace 4) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -44,7 +44,7 @@ body: | %agpr:_(s32) = COPY $agpr0 %voffset:_(s32) = COPY $vgpr1 %zero:_(s32) = G_CONSTANT i32 0 - G_AMDGPU_BUFFER_STORE %val, %rsrc, %zero, %voffset, %agpr, 0, 0, 0 :: (dereferenceable store 4, addrspace 4) + G_AMDGPU_BUFFER_STORE %val, %rsrc, %zero, %voffset, %agpr, 0, 0, 0 :: (dereferenceable store (s32), addrspace 4) S_ENDPGM 0 ... @@ -91,7 +91,7 @@ body: | ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32) - ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY1]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16) + ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY1]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>)) ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec @@ -102,6 +102,6 @@ body: | ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) %0:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 %1:_(s32) = COPY $vgpr0 - %2:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %1(s32), %0(<8 x s32>), 0, 0, 0 :: (dereferenceable load 16) + %2:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %1(s32), %0(<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>)) S_ENDPGM 0, implicit %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir index 39c8fda387e0a..0e689f05a0c4a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-widen-scalar-loads.mir @@ -11,18 +11,18 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: constant_load_i8_align8 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, align 8, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), align 8, addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i8_align8 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, align 8, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), align 8, addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i8_align8 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, align 8, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), align 8, addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load 1, align 8, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 8, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -34,18 +34,18 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: constant_load_i8_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i8_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i8_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load 1, align 4, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 4, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -57,18 +57,18 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: constant_load_i16_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i16_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i16_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load 2, align 4, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (invariant load (s16), align 4, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -80,21 +80,21 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: constant_sextload_i8_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX8: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 ; GFX8: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) ; GFX9-LABEL: name: constant_sextload_i8_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX9: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 ; GFX9: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) ; GFX10-LABEL: name: constant_sextload_i8_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX10: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 ; GFX10: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load 1, align 4, addrspace 4) + %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s8), align 4, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -106,21 +106,21 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: constant_sextload_i16_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX8: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 16 ; GFX8: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) ; GFX9-LABEL: name: constant_sextload_i16_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX9: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 16 ; GFX9: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) ; GFX10-LABEL: name: constant_sextload_i16_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX10: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 16 ; GFX10: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load 2, align 4, addrspace 4) + %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s16), align 4, addrspace 4) S_ENDPGM 0, implicit %1 ... @@ -133,24 +133,24 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: constant_zextload_i8_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX8: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255 ; GFX8: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX8: S_ENDPGM 0, implicit [[AND]](s32) ; GFX9-LABEL: name: constant_zextload_i8_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255 ; GFX9: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX9: S_ENDPGM 0, implicit [[AND]](s32) ; GFX10-LABEL: name: constant_zextload_i8_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255 ; GFX10: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX10: S_ENDPGM 0, implicit [[AND]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load 1, align 4, addrspace 4) + %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s8), align 4, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -162,24 +162,24 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: constant_zextload_i16_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX8: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 ; GFX8: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX8: S_ENDPGM 0, implicit [[AND]](s32) ; GFX9-LABEL: name: constant_zextload_i16_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 ; GFX9: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX9: S_ENDPGM 0, implicit [[AND]](s32) ; GFX10-LABEL: name: constant_zextload_i16_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 4) ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 ; GFX10: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX10: S_ENDPGM 0, implicit [[AND]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load 2, align 4, addrspace 4) + %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s16), align 4, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -191,18 +191,18 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: global_load_i8_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: global_load_i8_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: global_load_i8_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load 1, align 4, addrspace 1) + %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 4, addrspace 1) S_ENDPGM 0, implicit %1 ... --- @@ -214,18 +214,18 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: global_load_i16_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: global_load_i16_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: global_load_i16_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load 2, align 4, addrspace 1) + %1:_(s32) = G_LOAD %0 :: (invariant load (s16), align 4, addrspace 1) S_ENDPGM 0, implicit %1 ... --- @@ -237,21 +237,21 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: global_sextload_i8_alig4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX8: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 ; GFX8: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) ; GFX9-LABEL: name: global_sextload_i8_alig4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX9: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 ; GFX9: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) ; GFX10-LABEL: name: global_sextload_i8_alig4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX10: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[LOAD]], 8 ; GFX10: S_ENDPGM 0, implicit [[SEXT_INREG]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load 1, align 4, addrspace 1) + %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s8), align 4, addrspace 1) S_ENDPGM 0, implicit %1 ... --- @@ -263,24 +263,24 @@ body: | liveins: $sgpr0_sgpr1 ; GFX8-LABEL: name: global_zextload_i16_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX8: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX8: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 ; GFX8: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX8: S_ENDPGM 0, implicit [[AND]](s32) ; GFX9-LABEL: name: global_zextload_i16_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX9: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 ; GFX9: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX9: S_ENDPGM 0, implicit [[AND]](s32) ; GFX10-LABEL: name: global_zextload_i16_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4, addrspace 1) + ; GFX10: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32), addrspace 1) ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 ; GFX10: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[LOAD]], [[C]] ; GFX10: S_ENDPGM 0, implicit [[AND]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load 2, align 4, addrspace 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s16), align 4, addrspace 1) S_ENDPGM 0, implicit %1 ... # Some negative test cases @@ -293,20 +293,20 @@ body: | ; GFX8-LABEL: name: constant_load_i8_align2 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i8_align2 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i8_align2 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load 1, align 2, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (invariant load (s8), align 2, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -319,20 +319,20 @@ body: | ; GFX8-LABEL: name: constant_load_i16_align2 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: constant_load_i16_align2 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: constant_load_i16_align2 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load 2, align 2, addrspace 4) + %1:_(s32) = G_LOAD %0 :: (invariant load (s16), align 2, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -345,20 +345,20 @@ body: | ; GFX8-LABEL: name: constant_sextload_i8_align2 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX9-LABEL: name: constant_sextload_i8_align2 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX9: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX10-LABEL: name: constant_sextload_i8_align2 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX10: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load 1, align 2, addrspace 4) + %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s8), align 2, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -371,20 +371,20 @@ body: | ; GFX8-LABEL: name: constant_sextload_i16_align2 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX8: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX9-LABEL: name: constant_sextload_i16_align2 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX9: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) ; GFX10-LABEL: name: constant_sextload_i16_align2 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX10: [[SEXTLOAD:%[0-9]+]]:vgpr(s32) = G_SEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[SEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_SEXTLOAD %0 :: (invariant load 2, align 2, addrspace 4) + %1:_(s32) = G_SEXTLOAD %0 :: (invariant load (s16), align 2, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -397,20 +397,20 @@ body: | ; GFX8-LABEL: name: constant_zextload_i8_align2 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX9-LABEL: name: constant_zextload_i8_align2 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX9: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX10-LABEL: name: constant_zextload_i8_align2 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load 1, align 2, addrspace 4) + ; GFX10: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s8), align 2, addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load 1, align 2, addrspace 4) + %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s8), align 2, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -423,20 +423,20 @@ body: | ; GFX8-LABEL: name: constant_zextload_i16_align2 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX8: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX8: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX9-LABEL: name: constant_zextload_i16_align2 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX9: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX9: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) ; GFX10-LABEL: name: constant_zextload_i16_align2 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load 2, addrspace 4) + ; GFX10: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p1) :: (invariant load (s16), addrspace 4) ; GFX10: S_ENDPGM 0, implicit [[ZEXTLOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load 2, align 2, addrspace 4) + %1:_(s32) = G_ZEXTLOAD %0 :: (invariant load (s16), align 2, addrspace 4) S_ENDPGM 0, implicit %1 ... --- @@ -449,20 +449,20 @@ body: | ; GFX8-LABEL: name: local_load_i8_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 1, align 4, addrspace 3) + ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: local_load_i8_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 1, align 4, addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: local_load_i8_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 1, align 4, addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 3) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 3) + %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 3) S_ENDPGM 0, implicit %1 ... --- @@ -475,19 +475,19 @@ body: | ; GFX8-LABEL: name: private_load_i8_align4 ; GFX8: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 1, align 4, addrspace 5) + ; GFX8: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) ; GFX8: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX9-LABEL: name: private_load_i8_align4 ; GFX9: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 1, align 4, addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) ; GFX9: S_ENDPGM 0, implicit [[LOAD]](s32) ; GFX10-LABEL: name: private_load_i8_align4 ; GFX10: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 1, align 4, addrspace 5) + ; GFX10: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s8), align 4, addrspace 5) ; GFX10: S_ENDPGM 0, implicit [[LOAD]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 1, align 4, addrspace 5) + %1:_(s32) = G_LOAD %0 :: (load (s8), align 4, addrspace 5) S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir index f9cf086820f4a..6d97616007e6c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir @@ -12,9 +12,9 @@ body: | ; CHECK-LABEL: name: zextload_constant_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load 1, addrspace 4) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 4, align 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 4, align 1) ... --- @@ -28,9 +28,9 @@ body: | ; CHECK-LABEL: name: zextload_global_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load 1, addrspace 1) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 1, align 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1, align 1) ... --- @@ -44,9 +44,9 @@ body: | ; CHECK-LABEL: name: zextload_constant_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load 2, addrspace 4) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, addrspace 4, align 2) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 4, align 2) ... --- @@ -60,9 +60,9 @@ body: | ; CHECK-LABEL: name: zextload_global_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load 2, addrspace 1) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, addrspace 1, align 2) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1, align 2) ... --- @@ -75,9 +75,9 @@ body: | ; CHECK-LABEL: name: zextload_local_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load 1, addrspace 3) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 1, addrspace 3, align 1) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3, align 1) ... --- @@ -91,7 +91,7 @@ body: | ; CHECK-LABEL: name: zextload_local_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load 2, addrspace 3) + ; CHECK: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) %0:_(p3) = COPY $sgpr0 - %1:_(s32) = G_ZEXTLOAD %0 :: (load 2, addrspace 3, align 2) + %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3, align 2) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index 5e8b492277c77..7565dd71e3b8a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -62,9 +62,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load 4 from %ir.ptr0, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s32) from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0) + %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr0) ... --- @@ -76,9 +76,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_volatile ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (volatile load 4 from %ir.ptr0, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (volatile load (s32) from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (volatile load 4 from %ir.ptr0) + %1:_(s32) = G_LOAD %0 :: (volatile load (s32) from %ir.ptr0) ... --- @@ -90,9 +90,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_uniform_invariant ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4 from %ir.ptr1, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load 4 from %ir.ptr1) + %1:_(s32) = G_LOAD %0 :: (invariant load (s32) from %ir.ptr1) ... --- @@ -105,9 +105,9 @@ body: | ; CHECK-LABEL: name: load_global_uniform_noclobber ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.ptr1, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1) + %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr1) ... --- @@ -120,9 +120,9 @@ body: | ; CHECK-LABEL: name: load_global_uniform_variant ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.ptr1, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1) + %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.ptr1) ... --- @@ -135,9 +135,9 @@ body: | ; CHECK-LABEL: name: load_global_uniform_volatile_invariant ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (volatile invariant load 4 from %ir.ptr1, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (volatile invariant load (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (volatile invariant load 4 from %ir.ptr1) + %1:_(s32) = G_LOAD %0 :: (volatile invariant load (s32) from %ir.ptr1) ... --- @@ -150,9 +150,9 @@ body: | ; CHECK-LABEL: name: load_global_uniform_atomic_invariant ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load acquire 4 from %ir.ptr1, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load acquire (s32) from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (invariant load acquire 4 from %ir.ptr1) + %1:_(s32) = G_LOAD %0 :: (invariant load acquire (s32) from %ir.ptr1) ... --- @@ -165,9 +165,9 @@ body: | ; CHECK-LABEL: name: load_global_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.tmp1, addrspace 1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load (s32) from %ir.tmp1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.tmp1) + %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.tmp1) ... --- @@ -195,8 +195,8 @@ body: | bb.0: ; CHECK-LABEL: name: load_constant_v4i16_from_8_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 8 from %ir.ptr0, addrspace 4) + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>) from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(<4 x s16>) = G_LOAD %0 :: (load 8 from %ir.ptr0, align 8, addrspace 4) + %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>) from %ir.ptr0, align 8, addrspace 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir b/llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir index 4a68ca617bb1a..78db6db772b45 100644 --- a/llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir +++ b/llvm/test/CodeGen/AMDGPU/SRSRC-GIT-clobber-check.mir @@ -24,7 +24,7 @@ body: | ; CHECK: $sgpr1 = COPY killed $sgpr5 ; CHECK: $sgpr4_sgpr5 = S_GETPC_B64 ; CHECK: $sgpr4 = S_MOV_B32 $sgpr8 - ; CHECK: $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM $sgpr4_sgpr5, 0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; CHECK: $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM $sgpr4_sgpr5, 0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 :: (dereferenceable invariant load (s128), align 4, addrspace 4) bb.0: successors: %bb.1, %bb.2 liveins: $sgpr0 @@ -39,7 +39,7 @@ body: | bb.1: renamable $vgpr0 = V_MOV_B32_e32 1065353216, implicit $exec - BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr0, undef renamable $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr0, undef renamable $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) bb.2: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir index fe9e582b073aa..06646572a73d9 100644 --- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir +++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir @@ -71,7 +71,7 @@ body: | %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vgpr_32 = COPY $vgpr2 - %2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec ... @@ -92,7 +92,7 @@ body: | %1:vgpr_32 = COPY $vgpr2 %3:sgpr_256 = IMPLICIT_DEF %2:vreg_256 = COPY %3:sgpr_256 - %4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir index afa1d452bc509..24a3001865b61 100644 --- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir +++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir @@ -73,7 +73,7 @@ body: | %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %1:vgpr_32 = COPY $vgpr2 - %2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec ... @@ -94,7 +94,7 @@ body: | %1:vgpr_32 = COPY $vgpr2 %3:sgpr_256 = IMPLICIT_DEF %2:vreg_256_align2 = COPY %3:sgpr_256 - %4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll index 22476fec3e866..6523621098a8f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -4,7 +4,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s ; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer -; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load 16 from %ir.13, addrspace 4) +; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load (s128) from %ir.13, addrspace 4) define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 { .entry: diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir index 1a42f801b3772..1f9581a29d9d4 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir +++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir @@ -83,14 +83,14 @@ body: | successors: %bb.1(0x40000000), %bb.4(0x40000000) liveins: $sgpr4_sgpr5 - renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) - renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0 :: (dereferenceable invariant load 4 from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4) + renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4) + renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4) S_WAITCNT 49279 renamable $vgpr0 = nofpexcept V_MUL_F32_e64 0, killed $sgpr4, 0, $sgpr4, 0, 0, implicit $mode, implicit $exec DBG_VALUE renamable $sgpr6_sgpr7, $noreg, !11, !DIExpression(DW_OP_plus_uconst, 12, DW_OP_stack_value), debug-location !12 $vgpr1 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr6_sgpr7 $vgpr2 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit killed $sgpr6_sgpr7, implicit $exec - GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, implicit $exec, debug-location !12 :: (store 4 into %ir.tmp2, addrspace 1) + GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, implicit $exec, debug-location !12 :: (store (s32) into %ir.tmp2, addrspace 1) renamable $sgpr4 = S_MOV_B32 8388608 renamable $sgpr4_sgpr5 = nofpexcept V_CMP_GT_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc @@ -102,7 +102,7 @@ body: | renamable $sgpr4_sgpr5 = IMPLICIT_DEF $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5 $vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $exec - renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1) + renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`, addrspace 1) renamable $sgpr4 = S_MOV_B32 2139095040 S_WAITCNT 3952 renamable $sgpr4_sgpr5 = nofpexcept V_CMP_NEQ_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll index 2c715c6faa675..a2b79838e50d8 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll @@ -9,235 +9,235 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1 - ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 16, align 1, addrspace 4) + ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg0, addrspace 6) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 16, align 1, addrspace 4) ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 32, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 32, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 48, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 48, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 64, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 64, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 80, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 80, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 96, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 96, align 1, addrspace 4) ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 ; GCN: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF1]] ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]].sub0 ; GCN: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[DEF2]] ; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0 ; GCN: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[DEF3]] ; GCN: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0 ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec - ; GCN: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 112, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 112, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64 - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128 - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72 - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144 - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 160, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) ; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80 - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 160, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) ; GCN: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160 - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 160, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) ; GCN: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[DEF4]] ; GCN: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]].sub0 ; GCN: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88 ; GCN: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) ; GCN: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF5]] ; GCN: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY15]].sub0 ; GCN: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176 ; GCN: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) ; GCN: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF6]] ; GCN: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 ; GCN: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF7]] ; GCN: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0 ; GCN: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF8]] ; GCN: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0 ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) ; GCN: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96 - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) ; GCN: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192 - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) ; GCN: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104 - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) ; GCN: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208 - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) ; GCN: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112 ; GCN: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) ; GCN: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224 ; GCN: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) ; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) ; GCN: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120 ; GCN: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) ; GCN: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240 ; GCN: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 256, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) ; GCN: [[COPY37:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY37]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 256, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY37]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 128, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) ; GCN: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256 ; GCN: [[COPY38:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY38]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 256, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY38]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY39:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY40:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) ; GCN: [[COPY42:%[0-9]+]]:vreg_64 = COPY [[DEF9]] ; GCN: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[COPY42]].sub0 ; GCN: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136 ; GCN: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[DEF10:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) ; GCN: [[COPY45:%[0-9]+]]:vreg_64 = COPY [[DEF10]] ; GCN: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[COPY45]].sub0 ; GCN: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272 ; GCN: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[DEF11:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 272, align 1, addrspace 4) ; GCN: [[COPY48:%[0-9]+]]:vreg_64 = COPY [[DEF11]] ; GCN: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[COPY48]].sub0 ; GCN: [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY50:%[0-9]+]]:vreg_64 = COPY [[DEF12]] ; GCN: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[COPY50]].sub0 ; GCN: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN: [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY54:%[0-9]+]]:vreg_64 = COPY [[DEF13]] ; GCN: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[COPY54]].sub0 ; GCN: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY56:%[0-9]+]]:vreg_64 = COPY [[DEF14]] ; GCN: [[COPY57:%[0-9]+]]:vgpr_32 = COPY [[COPY56]].sub0 ; GCN: [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY58:%[0-9]+]]:vreg_64 = COPY [[DEF15]] ; GCN: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[COPY58]].sub0 ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY60:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) ; GCN: [[COPY61:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) ; GCN: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288 ; GCN: [[COPY62:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY65:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) ; GCN: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152 ; GCN: [[COPY66:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) ; GCN: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304 ; GCN: [[COPY67:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY68:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY69:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) ; GCN: S_ENDPGM 0 bb.0: %tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0 diff --git a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir index 8876af8f08335..d1b0a7ca528b5 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir +++ b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir @@ -43,8 +43,8 @@ body: | %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -105,8 +105,8 @@ body: | %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -168,8 +168,8 @@ body: | %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -233,8 +233,8 @@ body: | %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -310,8 +310,8 @@ body: | %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -375,8 +375,8 @@ body: | %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir index 7e7db6b62460a..6494d7f60c419 100644 --- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir +++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir @@ -17,15 +17,15 @@ body: | $vgpr0_vgpr1 = IMPLICIT_DEF $vgpr4_vgpr5 = IMPLICIT_DEF - $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) $vgpr2 = IMPLICIT_DEF $vgpr3 = IMPLICIT_DEF $vgpr6 = IMPLICIT_DEF $vgpr0 = V_ADD_CO_U32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec $vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec - FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) - FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) + FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir index 9693de8a1d550..0b97e6fe30919 100644 --- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir +++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir @@ -14,7 +14,7 @@ registers: body: | bb.0: %0 = IMPLICIT_DEF - %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec - %3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ... diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir index 181cfba5b0aa7..7ce1ddf4411e4 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir @@ -30,7 +30,7 @@ body: | %14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec %15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec %16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec - BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4) + BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into constant-pool, align 1, addrspace 4) S_ENDPGM 0 bb.2: @@ -78,7 +78,7 @@ body: | bb.8: successors: %bb.10 - %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) + %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from constant-pool, align 1, addrspace 4) %34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec %35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec %28:vgpr_32 = COPY %35 diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir index 38714d205b8e8..1360fd87c2e39 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir @@ -83,7 +83,7 @@ body: | bb.9: successors: %bb.10(0x80000000) - %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, align 1, addrspace 4) %21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec %22:sreg_64 = COPY $exec, implicit-def $exec %23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc @@ -125,9 +125,9 @@ body: | %27.sub5:sgpr_256 = COPY %26 %27.sub6:sgpr_256 = COPY %26 %27.sub7:sgpr_256 = COPY killed %26 - %28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4) %29:vgpr_32 = nofpexcept V_ADD_F32_e32 0, killed %28, implicit $mode, implicit $exec $m0 = S_MOV_B32 -1 - DS_WRITE_B32 undef %30:vgpr_32, killed %29, 0, 0, implicit $m0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`, addrspace 3) + DS_WRITE_B32 undef %30:vgpr_32, killed %29, 0, 0, implicit $m0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`, addrspace 3) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir index 6145657a72ffb..5e748eaf805d0 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir @@ -68,7 +68,7 @@ body: | %23:vreg_128 = COPY killed %17 %24:sreg_64 = COPY killed %16 %25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec - %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, align 1, addrspace 4) %28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec %29:vreg_128 = COPY killed %21 %29.sub0:vreg_128 = COPY %1 @@ -257,7 +257,7 @@ body: | %109.sub5:sgpr_256 = COPY %108 %109.sub6:sgpr_256 = COPY %108 %109.sub7:sgpr_256 = COPY killed %108 - %110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4) %112:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %110, implicit $mode, implicit $exec %113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %112, implicit $mode, implicit $exec %114:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %113, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir index d5644f39e2b5e..70e486c22187d 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir @@ -61,7 +61,7 @@ body: | %11.sub6 = COPY %1 %11.sub7 = COPY %1 %11.sub8 = COPY %1 - dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4) + dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32)) %20.sub1 = COPY %2 %20.sub2 = COPY %2 %20.sub3 = COPY %2 @@ -70,6 +70,6 @@ body: | %20.sub6 = COPY %2 %20.sub7 = COPY %2 %20.sub8 = COPY %2 - dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4) + dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32)) ... diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir index e09816438f05f..aae0496b48feb 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir @@ -47,7 +47,7 @@ body: | %4.sub5:sgpr_256 = COPY %1 %4.sub6:sgpr_256 = COPY %1 %4.sub7:sgpr_256 = COPY killed %1 - %5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4) %6:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %5, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec %7:vgpr_32 = nofpexcept V_RCP_F32_e32 killed %6, implicit $mode, implicit $exec %8:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %7, implicit $mode, implicit $exec @@ -145,10 +145,10 @@ body: | %40:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $mode, implicit $exec %41:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $mode, implicit $exec %42:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 killed %41, implicit $mode, implicit $exec - %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0 :: (dereferenceable invariant load 4) + %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) %45:vgpr_32 = V_MUL_LO_I32_e64 killed %42, killed %43, implicit $exec %46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec - %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) + %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from constant-pool, align 1, addrspace 4) %49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec %50:sreg_64 = COPY $exec, implicit-def $exec %51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir index 50d204823205d..ca2db2eaee450 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir @@ -25,7 +25,7 @@ body: | ; GXN: $exec = S_OR_B64 $exec, [[COPY1]], implicit-def $scc ; GXN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GXN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GXN: DS_WRITE_B32 [[DEF]], [[DEF1]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + ; GXN: DS_WRITE_B32 [[DEF]], [[DEF1]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) ; GXN: S_ENDPGM 0 bb.0: liveins: $vgpr0, $sgpr0_sgpr1 @@ -43,7 +43,7 @@ body: | %5:vgpr_32 = IMPLICIT_DEF %6:vgpr_32 = IMPLICIT_DEF - DS_WRITE_B32 %5, %6, 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + DS_WRITE_B32 %5, %6, 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir index 7f8ceafda2f96..53e645aa08e99 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir @@ -34,7 +34,7 @@ body: | ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -42,7 +42,7 @@ body: | ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc @@ -54,7 +54,7 @@ body: | ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc @@ -65,7 +65,7 @@ body: | ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN: $m0 = S_MOV_B32 -1 - ; GCN: DS_WRITE_B32 [[V_MOV_B32_e32_2]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + ; GCN: DS_WRITE_B32 [[V_MOV_B32_e32_2]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) ; GCN: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 @@ -83,7 +83,7 @@ body: | bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -91,7 +91,7 @@ body: | %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -103,7 +103,7 @@ body: | %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1) bb.3: $exec = S_OR_B64 $exec, %12, implicit-def $scc @@ -115,7 +115,7 @@ body: | %17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec $m0 = S_MOV_B32 -1 - DS_WRITE_B32 %18, %17, 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3) + DS_WRITE_B32 %18, %17, 0, 0, implicit $m0, implicit $exec :: (store (s32), addrspace 3) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir index 3440ed9232601..31aaf84ec8d9d 100644 --- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -723,7 +723,7 @@ body: | %2:vgpr_32 = COPY $vgpr0 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec %16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %15, %subreg.sub1 %17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir b/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir index b224922f22345..9081ba545201f 100644 --- a/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir +++ b/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir @@ -37,7 +37,7 @@ body: | ; GCN: S_BRANCH %bb.3 ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) - ; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + ; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, align 1, addrspace 4) ; GCN: dead %18:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc ; GCN: dead %20:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -89,7 +89,7 @@ body: | S_BRANCH %bb.3 bb.3: - dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, align 1, addrspace 4) dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec %36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir index e427cd51580c1..2d9558e84d55b 100644 --- a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir +++ b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir @@ -66,9 +66,9 @@ body: | ; CHECK: dead %16:vgpr_32 = COPY %11.sub0 ; CHECK: undef %17.sub0:vreg_64, %18:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF4]].sub0, [[DEF6]].sub0, 0, implicit $exec ; CHECK: dead undef %17.sub1:vreg_64, dead %19:sreg_64_xexec = V_ADDC_U32_e64 [[DEF4]].sub1, [[DEF6]].sub1, %18, 0, implicit $exec - ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, implicit $exec :: (load 8, addrspace 1) + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK: dead %20:sreg_64 = V_CMP_GT_I32_e64 4, [[DEF7]], implicit $exec - ; CHECK: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, implicit $exec :: (store 8, addrspace 1) + ; CHECK: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] @@ -79,7 +79,7 @@ body: | ; CHECK: bb.4: ; CHECK: successors: %bb.5(0x80000000) ; CHECK: dead %21:sreg_64 = COPY $exec - ; CHECK: dead %22:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY1]], 0, 0, implicit $exec :: (load 16, addrspace 1) + ; CHECK: dead %22:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY1]], 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK: DBG_VALUE %22, $noreg, <0x{{[0-9a-f]+}}>, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !DILocation(line: 0, scope: <0x{{[0-9a-f]+}}>) ; CHECK: bb.5: ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) @@ -109,9 +109,9 @@ body: | dead %16:vgpr_32 = COPY %11.sub0 undef %17.sub0:vreg_64, %18:sreg_64_xexec = V_ADD_CO_U32_e64 %6.sub0, %8.sub0, 0, implicit $exec dead %17.sub1:vreg_64, dead %19:sreg_64_xexec = V_ADDC_U32_e64 %6.sub1, %8.sub1, %18, 0, implicit $exec - %6:vreg_64 = GLOBAL_LOAD_DWORDX2 %3, 0, 0, implicit $exec :: (load 8, addrspace 1) + %6:vreg_64 = GLOBAL_LOAD_DWORDX2 %3, 0, 0, implicit $exec :: (load (s64), addrspace 1) dead %20:sreg_64 = V_CMP_GT_I32_e64 4, %9, implicit $exec - GLOBAL_STORE_DWORDX2 %0, %10, 288, 0, implicit $exec :: (store 8, addrspace 1) + GLOBAL_STORE_DWORDX2 %0, %10, 288, 0, implicit $exec :: (store (s64), addrspace 1) bb.2: %5:vgpr_32 = COPY %13 @@ -122,7 +122,7 @@ body: | bb.4: dead %21:sreg_64 = COPY $exec - %22:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec :: (load 16, addrspace 1) + %22:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec :: (load (s128), addrspace 1) DBG_VALUE %22, $noreg, !16, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !21 bb.5: diff --git a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir index 7522af1724eb2..4d9a5dccbd62c 100644 --- a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir +++ b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir @@ -17,7 +17,7 @@ body: | %0 = IMPLICIT_DEF %3 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc - %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc S_ENDPGM 0 @@ -49,7 +49,7 @@ body: | --- # GCN-LABEL: name: load_volatile # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc -# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) +# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32)) # GCN-NEXT: S_ENDPGM 0 name: load_volatile tracksRegLiveness: true @@ -65,7 +65,7 @@ body: | %0 = IMPLICIT_DEF %3 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc - %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) + %1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32)) %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc S_ENDPGM 0 @@ -73,7 +73,7 @@ body: | --- # GCN-LABEL: name: store # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc -# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) +# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) # GCN-NEXT: S_ENDPGM 0 name: store tracksRegLiveness: true @@ -86,7 +86,7 @@ body: | %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc - FLAT_STORE_DWORD %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) S_ENDPGM 0 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/extload-align.ll b/llvm/test/CodeGen/AMDGPU/extload-align.ll index f9392dc969221..094ab0aea440e 100644 --- a/llvm/test/CodeGen/AMDGPU/extload-align.ll +++ b/llvm/test/CodeGen/AMDGPU/extload-align.ll @@ -7,7 +7,7 @@ target datalayout = "A5" ; size and not 4 corresponding to the sign-extended size (i32). ; DEBUG: {{^}}# Machine code for function extload_align: -; DEBUG: (volatile load 2 from %ir.a, addrspace 5) +; DEBUG: (volatile load (s16) from %ir.a, addrspace 5) ; DEBUG: {{^}}# End machine code for function extload_align. define amdgpu_kernel void @extload_align(i32 addrspace(5)* %out, i32 %index) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll index 25a7f75a21e95..82606e35b17df 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll @@ -12,7 +12,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) { ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; GCN: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 @@ -21,7 +21,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) { ; GCN: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] ; GCN: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into custom "BufferResource", align 1, addrspace 4) ; GCN: S_ENDPGM 0 main_body: %tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir index 265b69ed741e0..182219741ee8b 100644 --- a/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir @@ -46,9 +46,9 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc - ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc - ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc %0:vgpr_32 = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir index 2b6b36b24a05e..9cb864c17a59d 100644 --- a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir +++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir @@ -13,10 +13,10 @@ body: | ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0_vgpr1 - ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec @@ -48,14 +48,14 @@ body: | ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0_vgpr1 - ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec - ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: @@ -90,12 +90,12 @@ body: | ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0_vgpr1 - ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr2 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: @@ -126,13 +126,13 @@ body: | ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0_vgpr1 - ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, implicit $exec ; GCN: renamable $vgpr0 = V_ADD_U32_e64 1, 1, 0, implicit $exec - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -162,10 +162,10 @@ body: | ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0_vgpr1 - ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit-def dead $vgpr2_vgpr3 ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr1, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll b/llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll index 8881d9e7088c4..21654bdb07f0f 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll @@ -7,7 +7,7 @@ ; Flat instructions should not select if the target device doesn't ; support them. The default device should be able to select for HSA. -; ERROR: LLVM ERROR: Cannot select: {{0x[0-9,a-f]+|t[0-9]+}}: i32,ch = load<(volatile load 4 from %ir.flat.ptr.load)> +; ERROR: LLVM ERROR: Cannot select: {{0x[0-9,a-f]+|t[0-9]+}}: i32,ch = load<(volatile load (s32) from %ir.flat.ptr.load)> ; HSA-DEFAULT: flat_load_dword define amdgpu_kernel void @load_flat_i32(i32* %flat.ptr) { %load = load volatile i32, i32* %flat.ptr, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir index 2eb9a3f301773..d5aa9b6309173 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir +++ b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir @@ -54,24 +54,24 @@ body: | %1 = COPY $sgpr4_sgpr5 %0 = COPY $vgpr0 - %3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %7 = V_LSHLREV_B32_e32 2, %0, implicit $exec %2 = V_MOV_B32_e32 0, implicit $exec undef %12.sub0 = V_ADD_CO_U32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec %11 = COPY %4.sub1 %12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec - %5 = FLAT_LOAD_DWORD %12, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1) + %5 = FLAT_LOAD_DWORD %12, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.gep1) undef %9.sub0 = V_ADD_CO_U32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec %8 = COPY %3.sub1 %9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec undef %13.sub0 = V_ADD_CO_U32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec %13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec - %6 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34) + %6 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.gep34) undef %10.sub0 = V_ADD_CO_U32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec %10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec - FLAT_STORE_DWORD %9, %5, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2) - FLAT_STORE_DWORD %10, %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4) + FLAT_STORE_DWORD %9, %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.gep2) + FLAT_STORE_DWORD %10, %6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.gep4) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir index e24523f2a9d1f..d957efee38ea6 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir @@ -8,10 +8,10 @@ stack: body: | bb.0.entry: ; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr - ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; GCN: S_ENDPGM 0 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec - %1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + %1:vgpr_32 = SCRATCH_LOAD_DWORD %0:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) S_ENDPGM 0 ... @@ -23,10 +23,10 @@ stack: body: | bb.0.entry: ; GCN-LABEL: name: test_fold_fi_scratch_load_sgpr - ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; GCN: S_ENDPGM 0 %0:sgpr_32 = S_MOV_B32 %stack.0 - %1:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %0:sgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + %1:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %0:sgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) S_ENDPGM 0 ... @@ -39,11 +39,11 @@ body: | bb.0.entry: ; GCN-LABEL: name: test_fold_fi_scratch_store_vgpr ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN: S_ENDPGM 0 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec %1:vgpr_32 = IMPLICIT_DEF - SCRATCH_STORE_DWORD %1:vgpr_32, %0:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + SCRATCH_STORE_DWORD %1:vgpr_32, %0:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) S_ENDPGM 0 ... @@ -57,11 +57,11 @@ body: | ; GCN-LABEL: name: test_no_fold_fi_scratch_store_vgpr ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: SCRATCH_STORE_DWORD [[V_MOV_B32_e32_]], [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN: SCRATCH_STORE_DWORD [[V_MOV_B32_e32_]], [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN: S_ENDPGM 0 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec %1:vgpr_32 = IMPLICIT_DEF - SCRATCH_STORE_DWORD %0:vgpr_32, %1:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + SCRATCH_STORE_DWORD %0:vgpr_32, %1:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) S_ENDPGM 0 ... @@ -74,11 +74,11 @@ body: | bb.0.entry: ; GCN-LABEL: name: test_fold_fi_scratch_store_sgpr ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN: S_ENDPGM 0 %0:sgpr_32 = S_MOV_B32 %stack.0 %1:vgpr_32 = IMPLICIT_DEF - SCRATCH_STORE_DWORD_SADDR %1:vgpr_32, %0:sgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + SCRATCH_STORE_DWORD_SADDR %1:vgpr_32, %0:sgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir index 26917f4c2ef94..75d5a73347f54 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir @@ -158,10 +158,10 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) %12 = V_MOV_B32_e32 1065353216, implicit $exec %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) S_ENDPGM 0 ... @@ -222,13 +222,13 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 1065353216, implicit $exec %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) S_ENDPGM 0 ... @@ -289,14 +289,14 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 1065353216, implicit $exec %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `float addrspace(1)* undef`) S_ENDPGM 0 ... @@ -360,16 +360,16 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 1065353216, implicit $exec %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `float addrspace(1)* undef`) S_ENDPGM 0 ... @@ -426,13 +426,13 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 1, implicit $exec %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) S_ENDPGM 0 ... @@ -492,16 +492,16 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 -2, implicit $exec %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `float addrspace(1)* undef`) S_ENDPGM 0 ... @@ -562,13 +562,13 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 15360, implicit $exec %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) - BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `float addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `float addrspace(1)* undef`) S_ENDPGM 0 ... @@ -629,13 +629,13 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) %13 = V_MOV_B32_e32 80886784, implicit $exec %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) S_ENDPGM 0 ... @@ -695,13 +695,13 @@ body: | %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `half addrspace(1)* undef`) %13 = V_MOV_B32_e32 305413120, implicit $exec %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `half addrspace(1)* undef`) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir index 10b49e6848311..2e976449ee5e6 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-remove-m0-redef.mir @@ -32,14 +32,14 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = COPY %1 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = COPY %1 - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -56,15 +56,15 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = COPY %1 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = COPY %1 $m0 = COPY %1 - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -82,16 +82,16 @@ body: | ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: $m0 = COPY [[COPY2]] - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 %2:sgpr_32 = COPY $sgpr1 $m0 = COPY %1 - %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = COPY %2 - %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -109,17 +109,17 @@ body: | ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: $m0 = COPY [[COPY2]] - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 %2:sgpr_32 = COPY $sgpr1 $m0 = COPY %1 - %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = COPY %1 $m0 = COPY %2 - %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -138,18 +138,18 @@ body: | ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: $m0 = COPY [[COPY2]] ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 %2:sgpr_32 = COPY $sgpr1 $m0 = COPY %1 - %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = COPY %2 $m0 = COPY %1 - %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -166,14 +166,14 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = S_MOV_B32 -1 - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = S_MOV_B32 -1 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = S_MOV_B32 -1 - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -190,15 +190,15 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: $m0 = IMPLICIT_DEF - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = COPY %1 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = IMPLICIT_DEF - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -215,15 +215,15 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: S_NOP 0, implicit-def $m0 - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = COPY %1 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) S_NOP 0, implicit-def $m0 - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -241,19 +241,19 @@ body: | ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: $m0 = COPY [[COPY2]] - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) - ; GCN: [[DS_READ_B32_2:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 128, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) + ; GCN: [[DS_READ_B32_2:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 128, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 %2:sgpr_32 = COPY $sgpr1 $m0 = COPY %1 - %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = COPY %2 - %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %4:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = COPY %2 - %5:vgpr_32 = DS_READ_B32 %0, 128, 0, implicit $m0, implicit $exec :: (load 4) + %5:vgpr_32 = DS_READ_B32 %0, 128, 0, implicit $m0, implicit $exec :: (load (s32)) ... --- @@ -269,17 +269,17 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = COPY %1 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) dead $sgpr30_sgpr31 = SI_CALL undef $sgpr6_sgpr7, @func, csr_amdgpu_highregs $m0 = COPY %1 - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -295,21 +295,21 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: bb.1: ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) bb.0: liveins: $vgpr0, $sgpr0 %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = COPY %1 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) bb.1: $m0 = COPY %1 - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -326,15 +326,15 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = COPY [[COPY1]] - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: $m0 = COPY $m0 - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = COPY %1 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $m0 = COPY $m0 - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... @@ -351,16 +351,16 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; GCN: $m0 = COPY $sgpr0 - ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s32)) ; GCN: $sgpr0 = S_MOV_B32 0 ; GCN: $m0 = COPY $sgpr0 - ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load 4) + ; GCN: [[DS_READ_B32_1:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 64, 0, implicit $m0, implicit $exec :: (load (s32)) %0:vgpr_32 = COPY $vgpr0 %1:sgpr_32 = COPY $sgpr0 $m0 = COPY $sgpr0 - %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %0, 0, 0, implicit $m0, implicit $exec :: (load (s32)) $sgpr0 = S_MOV_B32 0 $m0 = COPY $sgpr0 - %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %0, 64, 0, implicit $m0, implicit $exec :: (load (s32)) ... diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir index 51ad23780803a..c4de9f3623657 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir +++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir @@ -8,7 +8,7 @@ name: flat_atomic_fcmpswap_to_s_denorm_mode body: | bb.0: - FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -20,7 +20,7 @@ body: | name: flat_atomic_fcmpswap_x2_to_s_denorm_mode body: | bb.0: - FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -32,7 +32,7 @@ body: | name: flat_atomic_fmax_to_s_denorm_mode body: | bb.0: - FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -44,7 +44,7 @@ body: | name: flat_atomic_fmax_x2_to_s_denorm_mode body: | bb.0: - FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -56,7 +56,7 @@ body: | name: flat_atomic_fmin_to_s_denorm_mode body: | bb.0: - FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -68,7 +68,7 @@ body: | name: flat_atomic_fmin_x2_to_s_denorm_mode body: | bb.0: - FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -80,7 +80,7 @@ body: | name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -92,7 +92,7 @@ body: | name: flat_atomic_fmax_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -104,7 +104,7 @@ body: | name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -116,7 +116,7 @@ body: | name: flat_atomic_fmin_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -128,7 +128,7 @@ body: | name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -140,7 +140,7 @@ body: | name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -152,7 +152,7 @@ body: | name: global_atomic_fcmpswap_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -164,7 +164,7 @@ body: | name: global_atomic_fcmpswap_x2_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -176,7 +176,7 @@ body: | name: global_atomic_fmax_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -188,7 +188,7 @@ body: | name: global_atomic_fmax_x2_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -200,7 +200,7 @@ body: | name: global_atomic_fmin_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -212,7 +212,7 @@ body: | name: global_atomic_fmin_x2_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -224,7 +224,7 @@ body: | name: global_atomic_fcmpswap_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -236,7 +236,7 @@ body: | name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -248,7 +248,7 @@ body: | name: global_atomic_fmax_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -260,7 +260,7 @@ body: | name: global_atomic_fmax_x2_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -272,7 +272,7 @@ body: | name: global_atomic_fmin_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -284,7 +284,7 @@ body: | name: global_atomic_fmin_x2_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -296,7 +296,7 @@ body: | name: global_atomic_fcmpswap_saddr_to_s_denorm_mode body: | bb.0: - GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -308,7 +308,7 @@ body: | name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -320,7 +320,7 @@ body: | name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -332,7 +332,7 @@ body: | name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -344,7 +344,7 @@ body: | name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode body: | bb.0: - %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -356,7 +356,7 @@ body: | name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode body: | bb.0: - %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 1, implicit $exec :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -368,7 +368,7 @@ body: | name: flat_fp_atomic_to_s_denorm_mode_waitcnt body: | bb.0: - FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) S_WAITCNT 0 S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... @@ -381,7 +381,7 @@ body: | name: flat_fp_atomic_to_s_denorm_mode_valu body: | bb.0: - FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst (s32) on `float addrspace(1)* undef`) %2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $mode, implicit $exec S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir index 78a9d389ab371..b2ba63cc5c14f 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir @@ -46,4 +46,4 @@ body: | liveins: $sgpr4, $sgpr5, $sgpr9, $sgpr22, $vgpr0, $sgpr6_sgpr7 renamable $vgpr2 = IMPLICIT_DEF - SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir index c1615d98fa8a4..9ff28639f4342 100644 --- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir +++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir @@ -218,10 +218,10 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; CHECK-LABEL: name: mimg_nsa ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + ; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) ... --- @@ -232,10 +232,10 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; CHECK-LABEL: name: mimg_nsa_mixed ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - ; CHECK: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + ; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + ; CHECK: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) ... diff --git a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir index 856893ae02057..3e7c588b983b0 100644 --- a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir +++ b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir @@ -11,7 +11,7 @@ body: | ; GCN: liveins: $vgpr0, $sgpr4_sgpr5 ; GCN: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4) + ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORD_IMM]] ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) ; GCN: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY1]](s32), [[S_LOAD_DWORD_IMM]], implicit $exec @@ -65,7 +65,7 @@ body: | %1:sgpr_64(p4) = COPY $sgpr4_sgpr5 %2:vgpr_32(s32) = COPY $vgpr0 - %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1:sgpr_64(p4), 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4) + %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1:sgpr_64(p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) %8:sreg_32 = COPY %3:sreg_32_xm0_xexec %14:vgpr_32 = COPY %2:vgpr_32(s32) %9:sreg_64 = V_CMP_LT_I32_e64 %2:vgpr_32(s32), %3:sreg_32_xm0_xexec, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll index a582f64aaa223..d3fa761334251 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -12,8 +12,8 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN: bb.0.entry: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0, $sgpr0_sgpr1 - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) - ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4) + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset.cast, align 4, addrspace 4) ; GCN: renamable $sgpr6 = COPY renamable $sgpr1 ; GCN: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 ; GCN: renamable $sgpr4 = S_MOV_B32 61440 @@ -22,7 +22,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN: renamable $sgpr1 = COPY killed renamable $sgpr6 ; GCN: renamable $sgpr2 = COPY killed renamable $sgpr5 ; GCN: renamable $sgpr3 = COPY killed renamable $sgpr4 - ; GCN: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.2, align 4, addrspace 5) + ; GCN: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5) ; GCN: renamable $sgpr0 = S_MOV_B32 16 ; GCN: renamable $sgpr1 = S_MOV_B32 15 ; GCN: renamable $sgpr2 = S_MOV_B32 14 @@ -71,35 +71,35 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN: renamable $vgpr13 = COPY killed renamable $vgpr18 ; GCN: renamable $vgpr14 = COPY killed renamable $vgpr17 ; GCN: renamable $vgpr15 = COPY killed renamable $vgpr16 - ; GCN: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store 64 into %stack.1, align 4, addrspace 5) + ; GCN: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.1, align 4, addrspace 5) ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec - ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr0 = IMPLICIT_DEF ; GCN: renamable $sgpr0_sgpr1 = IMPLICIT_DEF ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) - ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.4, align 4, addrspace 5) - ; GCN: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load 4 from %stack.5, addrspace 5) - ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 64 from %stack.1, align 4, addrspace 5) - ; GCN: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5) + ; GCN: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.1, align 4, addrspace 5) + ; GCN: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec ; GCN: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec ; GCN: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN: renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec - ; GCN: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.4, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc ; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GCN: bb.3: ; GCN: successors: %bb.2(0x80000000) - ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) ; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 ; GCN: bb.2: - ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5) - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load 16 from %stack.2, align 4, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) + ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1) ; GCN: S_ENDPGM 0 entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir index c240e770c592e..c0bb0beb9e69a 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir @@ -49,10 +49,10 @@ body: | bb.0 (%ir-block.2): $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) + $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) + $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `float addrspace(1)* undef`) EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec $vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir index c41da0f25b358..d3a64fcd0c7f3 100644 --- a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir +++ b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir @@ -55,7 +55,7 @@ body: | bb.0.entry: liveins: $sgpr0_sgpr1 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc @@ -64,7 +64,7 @@ body: | liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 100, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`) $vgpr0 = V_MOV_B32_e32 1, implicit $exec S_BRANCH %bb.3 @@ -72,7 +72,7 @@ body: | liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 9, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`) $vgpr0 = V_MOV_B32_e32 0, implicit $exec bb.3.done: @@ -80,7 +80,7 @@ body: | $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll b/llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll index b34b18f67c849..0c43c0d4de60b 100644 --- a/llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll +++ b/llvm/test/CodeGen/AMDGPU/legalize-fp-load-invariant.ll @@ -3,7 +3,7 @@ ; Type legalization for illegal FP type results was dropping invariant ; and dereferenceable flags. -; GCN: BUFFER_LOAD_USHORT{{.*}} :: (dereferenceable invariant load 2 from %ir.ptr, addrspace 4) +; GCN: BUFFER_LOAD_USHORT{{.*}} :: (dereferenceable invariant load (s16) from %ir.ptr, addrspace 4) define half @legalize_f16_load(half addrspace(4)* dereferenceable(4) %ptr) { %load = load half, half addrspace(4)* %ptr, !invariant.load !0 %add = fadd half %load, 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir b/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir index b0a363ceb25bd..0688a6b33df70 100644 --- a/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir +++ b/llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir @@ -17,31 +17,31 @@ body: | ; CHECK-LABEL: name: soft_clause_bundle_out_of_registers ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM4:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM5:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM6:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load 64, align 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM4:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM5:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM6:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu_highregs, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 bb.0: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6 %0:sgpr_64 = COPY $sgpr4_sgpr5 - %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load 8, addrspace 4) + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), addrspace 4) %2:vreg_64 = IMPLICIT_DEF bb.1: undef %3.sub0:sreg_64 = S_ADD_U32 %1.sub0, 0, implicit-def $scc %3.sub1:sreg_64 = S_ADDC_U32 %1.sub1, 0, implicit-def dead $scc, implicit killed $scc - %4:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load 64, align 4, addrspace 4) - %5:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load 64, align 4, addrspace 4) - %6:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load 64, align 4, addrspace 4) - %7:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load 64, align 4, addrspace 4) - %8:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load 64, align 4, addrspace 4) - %9:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load 64, align 4, addrspace 4) - %10:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load 64, align 4, addrspace 4) + %4:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load (s512), align 4, addrspace 4) + %5:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load (s512), align 4, addrspace 4) + %6:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load (s512), align 4, addrspace 4) + %7:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load (s512), align 4, addrspace 4) + %8:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load (s512), align 4, addrspace 4) + %9:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load (s512), align 4, addrspace 4) + %10:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load (s512), align 4, addrspace 4) dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu_highregs, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 dead %11:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub1, 0, 0, implicit $mode, implicit $exec dead %12:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub2, 0, 0, implicit $mode, implicit $exec @@ -118,10 +118,10 @@ body: | bb.0: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6 ; CHECK-LABEL: name: simple_huge_reg_tuple_clause - ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load 64, align 4, addrspace 4) - ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load 64, align 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load (s512), align 4, addrspace 4) + ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORDX16_IMM]] %0:sreg_64 = COPY $sgpr4_sgpr5 %1:sreg_64 = S_MOV_B64 0 @@ -131,10 +131,10 @@ body: | %5:sreg_64 = S_MOV_B64 4 %6:sreg_64 = S_MOV_B64 5 %7:sreg_64 = S_MOV_B64 6 - %8:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load 64, align 4, addrspace 4) - %9:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load 64, align 4, addrspace 4) - %10:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load 64, align 4, addrspace 4) - %11:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load 64, align 4, addrspace 4) + %8:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load (s512), align 4, addrspace 4) + %9:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load (s512), align 4, addrspace 4) + %10:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load (s512), align 4, addrspace 4) + %11:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load (s512), align 4, addrspace 4) S_NOP 0, implicit %8 S_NOP 0, implicit %9 S_NOP 0, implicit %10 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll index 417901e78a3c1..dd6d780a6eab5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -20,7 +20,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] ; MIR-LABEL: @lds_atomic_inc_ret_i32 -; MIR: DS_INC_RTN_U32 {{.*}} :: (load store 4 on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3) +; MIR: DS_INC_RTN_U32 {{.*}} :: (load store (s32) on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3) define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0 store i32 %result, i32 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll index 267bcee2aa2ee..95e50da8a4709 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll @@ -4,7 +4,7 @@ ; MIR-LABEL: name: gws_barrier_offset0{{$}} ; MIR: BUNDLE implicit{{( killed)?( renamable)?}} $vgpr0, implicit $m0, implicit $exec { -; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource") +; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load (s32) from custom "GWSResource") ; MIR-NEXT: S_WAITCNT 0 ; MIR-NEXT: } define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll index b3d52793fec04..08a1a12a708a9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll @@ -27,7 +27,7 @@ ; MIR-LABEL: name: gws_barrier_offset0{{$}} ; MIR: BUNDLE implicit{{( killed)?( renamable)?}} $vgpr0, implicit $m0, implicit $exec { -; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource") +; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load (s32) from custom "GWSResource") ; MIR-NEXT: S_WAITCNT 0 ; MIR-NEXT: } define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll index bbe7dfbb2d308..9c4f610fd4bb4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll @@ -7,7 +7,7 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s ; GFX6ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.gws.sema.release.all -; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store 4 into custom "GWSResource") (in function: gws_sema_release_all_offset0) +; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store (s32) into custom "GWSResource") (in function: gws_sema_release_all_offset0) ; GCN-LABEL: {{^}}gws_sema_release_all_offset0: ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-scc.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-scc.mir index a168cdc7d2fe5..2d3c1011bd561 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-scc.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-scc.mir @@ -32,7 +32,7 @@ } ... -# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1) +# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1) --- name: test1 liveins: @@ -48,7 +48,7 @@ body: | $sgpr3 = S_MOV_B32 61440 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 @@ -56,14 +56,14 @@ body: | %5:vgpr_32 = COPY $vgpr0 %6:vgpr_32 = COPY $vgpr1 - BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) - BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) S_ENDPGM 0 ... -# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) -# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 16, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) --- name: test2 liveins: @@ -79,7 +79,7 @@ body: | $sgpr3 = S_MOV_B32 61440 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 @@ -87,14 +87,14 @@ body: | %5:vgpr_32 = COPY $vgpr0 %6:vgpr_32 = COPY $vgpr1 - BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) - BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 16, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) S_ENDPGM 0 ... -# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) -# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 16, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) --- name: test3 liveins: @@ -110,7 +110,7 @@ body: | $sgpr3 = S_MOV_B32 61440 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 @@ -118,13 +118,13 @@ body: | %5:vgpr_32 = COPY $vgpr0 %6:vgpr_32 = COPY $vgpr1 - BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) - BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 16, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) S_ENDPGM 0 ... -# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 16, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1) +# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 16, 0, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1) --- name: test4 liveins: @@ -140,7 +140,7 @@ body: | $sgpr3 = S_MOV_B32 61440 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 @@ -148,8 +148,8 @@ body: | %5:vgpr_32 = COPY $vgpr0 %6:vgpr_32 = COPY $vgpr1 - BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) - BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 16, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 16, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 16, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir index 8901505b5a31f..68e0e663b7059 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir @@ -203,7 +203,7 @@ body: | ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]] - ; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) + ; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1) ; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]] ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) @@ -233,7 +233,7 @@ body: | bb.1: %11:sreg_64_xexec = COPY %13 - dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load 4, addrspace 1) + dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1) %14:sreg_64_xexec = COPY %11 bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir index b8b898be09640..987c5ecc07fd5 100644 --- a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir +++ b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir @@ -17,11 +17,11 @@ body: | renamable $sgpr8_sgpr9 = S_GETPC_B64 renamable $sgpr8 = COPY killed renamable $sgpr2 - renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr8_sgpr9, 144, 0 :: (invariant load 32, align 16, addrspace 4) + renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr8_sgpr9, 144, 0 :: (invariant load (s256), align 16, addrspace 4) renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = COPY killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, implicit $exec renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16) + renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll index 736f01934a5c3..f7e224cc1e375 100644 --- a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll @@ -12,8 +12,8 @@ ; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]} ; MIR-LABEL: name: test_memcpy -; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) -; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) { ; Check loads of %q are scheduled ahead of that store of the memcpy on %p. ; CHECK-LABEL: test_memcpy: @@ -34,8 +34,8 @@ define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapt } ; MIR-LABEL: name: test_memcpy_inline -; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) -; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) { ; Check loads of %q are scheduled ahead of that store of the memcpy on %p. ; CHECK-LABEL: test_memcpy_inline: @@ -56,8 +56,8 @@ define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* } ; MIR-LABEL: name: test_memmove -; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) -; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) define i32 @test_memmove(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) { ; Check loads of %q are scheduled ahead of that store of the memmove on %p. ; CHECK-LABEL: test_memmove: @@ -78,7 +78,7 @@ define i32 @test_memmove(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocap } ; MIR-LABEL: name: test_memset -; MIR: GLOBAL_STORE_DWORDX4 killed %10, killed %11, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) +; MIR: GLOBAL_STORE_DWORDX4 killed %10, killed %11, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]], addrspace 1) define i32 @test_memset(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) { ; Check loads of %q are scheduled ahead of that store of the memset on %p. ; CHECK-LABEL: test_memset: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir index 93a26d5dd7f36..c9f3ae354fc3f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir @@ -75,13 +75,13 @@ body: | successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000) liveins: $vgpr0, $sgpr0_sgpr1 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) $vgpr1 = V_ASHRREV_I32_e32 31, $vgpr0, implicit $exec $vgpr1_vgpr2 = V_LSHL_B64_e64 $vgpr0_vgpr1, 3, implicit $exec $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 0 S_WAITCNT 127 - $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep) + $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load (s64) from %ir.tid.gep) $vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec @@ -92,14 +92,14 @@ body: | successors: %bb.2.exit(0x80000000) liveins: $sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr1_vgpr2_vgpr3_vgpr4:0x00000003 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) dead $vgpr0 = V_MOV_B32_e32 -1, implicit $exec dead $vgpr0 = V_MOV_B32_e32 61440, implicit $exec $sgpr4_sgpr5 = S_MOV_B64 0 S_WAITCNT 127 $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec S_WAITCNT 3952 - BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from %ir.gep) + BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load syncscope("one-as") seq_cst (s32) from %ir.gep) bb.2.exit: liveins: $sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir index afe8a2750bf56..8ba41362605ee 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir @@ -11,10 +11,10 @@ body: | $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3 $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec - renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`) + renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst (s32) from `i32 addrspace(42)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -30,7 +30,7 @@ body: | $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `i32 addrspace(42)* undef`) S_ENDPGM 0 ... @@ -47,7 +47,7 @@ body: | $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`) + FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst (s32) on `i32 addrspace(42)* undef`) S_ENDPGM 0 ... @@ -63,7 +63,7 @@ body: | $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3 $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst 4 on `i32 addrspace(42)* undef`) + FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst (s32) on `i32 addrspace(42)* undef`) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir index 26b248fef9191..aed7814e9310f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir @@ -13,14 +13,14 @@ name: load_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -37,14 +37,14 @@ body: | name: load_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -61,14 +61,14 @@ body: | name: load_singlethread_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -85,14 +85,14 @@ body: | name: load_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -109,14 +109,14 @@ body: | name: load_wavefront_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -133,14 +133,14 @@ body: | name: load_wavefront_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -157,14 +157,14 @@ body: | name: load_wavefront_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -181,14 +181,14 @@ body: | name: load_wavefront_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -205,14 +205,14 @@ body: | name: load_workgroup_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -229,14 +229,14 @@ body: | name: load_workgroup_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -253,14 +253,14 @@ body: | name: load_workgroup_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -277,14 +277,14 @@ body: | name: load_workgroup_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -301,14 +301,14 @@ body: | name: load_agent_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -325,14 +325,14 @@ body: | name: load_agent_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -349,14 +349,14 @@ body: | name: load_agent_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -373,14 +373,14 @@ body: | name: load_agent_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -397,14 +397,14 @@ body: | name: load_system_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -421,14 +421,14 @@ body: | name: load_system_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -445,14 +445,14 @@ body: | name: load_system_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -469,14 +469,14 @@ body: | name: load_system_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst (s32) from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -493,12 +493,12 @@ body: | name: store_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -515,12 +515,12 @@ body: | name: store_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -537,12 +537,12 @@ body: | name: store_singlethread_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -559,12 +559,12 @@ body: | name: store_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -581,12 +581,12 @@ body: | name: store_wavefront_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -603,12 +603,12 @@ body: | name: store_wavefront_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -625,12 +625,12 @@ body: | name: store_wavefront_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -647,12 +647,12 @@ body: | name: store_wavefront_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -669,12 +669,12 @@ body: | name: store_workgroup_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -691,12 +691,12 @@ body: | name: store_workgroup_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -713,12 +713,12 @@ body: | name: store_workgroup_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -735,12 +735,12 @@ body: | name: store_workgroup_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -757,12 +757,12 @@ body: | name: store_agent_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -779,12 +779,12 @@ body: | name: store_agent_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -801,12 +801,12 @@ body: | name: store_agent_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -823,12 +823,12 @@ body: | name: store_agent_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -845,12 +845,12 @@ body: | name: store_system_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") unordered 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") unordered (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -867,12 +867,12 @@ body: | name: store_system_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") monotonic 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") monotonic (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -889,12 +889,12 @@ body: | name: store_system_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -911,12 +911,12 @@ body: | name: store_system_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -933,12 +933,12 @@ body: | name: atomicrmw_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -955,12 +955,12 @@ body: | name: atomicrmw_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -977,12 +977,12 @@ body: | name: atomicrmw_singlethread_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(3)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -999,12 +999,12 @@ body: | name: atomicrmw_singlethread_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -1021,12 +1021,12 @@ body: | name: atomicrmw_singlethread_acq_rel body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(3)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... @@ -1043,12 +1043,12 @@ body: | name: atomicrmw_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst (s32) into `i32 addrspace(3)* undef`) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir index a1c63153f0be8..4835769d587e7 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir @@ -16,27 +16,27 @@ body: | successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $sgpr0_sgpr1, $sgpr3 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $vgpr0 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(5)* undef`) S_WAITCNT 127 S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 2, implicit $exec $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`) + BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(5)* undef`) S_CBRANCH_SCC0 %bb.1, implicit killed $scc bb.2: successors: %bb.3(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 32772, implicit $exec S_BRANCH %bb.3 @@ -45,7 +45,7 @@ body: | successors: %bb.3(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 4, implicit $exec @@ -55,11 +55,11 @@ body: | S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`) + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered (s32) from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst (s32) from `[8192 x i32] addrspace(5)* undef`) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 - FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`) + FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32 addrspace(1)* undef`) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir index d36b7d96f0625..b9afc7e048918 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir @@ -110,27 +110,27 @@ body: | successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000) liveins: $sgpr0_sgpr1, $sgpr3 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $vgpr0 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store (s32) into %ir.scratchptr01) S_WAITCNT 127 S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 2, implicit $exec $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12) + BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.scratchptr12) S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc bb.2.else: successors: %bb.3.done(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 32772, implicit $exec S_BRANCH %bb.3.done @@ -139,7 +139,7 @@ body: | successors: %bb.3.done(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 4, implicit $exec @@ -149,11 +149,11 @@ body: | S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr) + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (non-temporal load (s32) from %ir.else_ptr), (non-temporal load (s32) from %ir.if_ptr) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 - FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out) + FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.out) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir index 2784c62a4af5d..1ea29fe2f286a 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir @@ -90,27 +90,27 @@ body: | successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000) liveins: $sgpr0_sgpr1, $sgpr3 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $vgpr0 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store (s32) into %ir.scratchptr01) S_WAITCNT 127 S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 2, implicit $exec $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12) + BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.scratchptr12) S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc bb.2.else: successors: %bb.3.done(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 32772, implicit $exec S_BRANCH %bb.3.done @@ -119,7 +119,7 @@ body: | successors: %bb.3.done(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 4, implicit $exec @@ -129,11 +129,11 @@ body: | S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr) + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %ir.else_ptr), (non-temporal load (s32) from %ir.if_ptr) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 - FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out) + FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.out) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir index 4e31ac2880725..f836cc0b44ed6 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-region.mir @@ -13,14 +13,14 @@ name: load_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -37,14 +37,14 @@ body: | name: load_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -61,14 +61,14 @@ body: | name: load_singlethread_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -85,14 +85,14 @@ body: | name: load_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -109,14 +109,14 @@ body: | name: load_wavefront_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -133,14 +133,14 @@ body: | name: load_wavefront_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -157,14 +157,14 @@ body: | name: load_wavefront_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -181,14 +181,14 @@ body: | name: load_wavefront_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -205,14 +205,14 @@ body: | name: load_workgroup_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -229,14 +229,14 @@ body: | name: load_workgroup_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -253,14 +253,14 @@ body: | name: load_workgroup_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -277,14 +277,14 @@ body: | name: load_workgroup_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -301,14 +301,14 @@ body: | name: load_agent_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -325,14 +325,14 @@ body: | name: load_agent_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -349,14 +349,14 @@ body: | name: load_agent_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -373,14 +373,14 @@ body: | name: load_agent_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -397,14 +397,14 @@ body: | name: load_system_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -421,14 +421,14 @@ body: | name: load_system_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -445,14 +445,14 @@ body: | name: load_system_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -469,14 +469,14 @@ body: | name: load_system_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec - renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`) + renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst (s32) from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) S_ENDPGM 0 ... @@ -493,12 +493,12 @@ body: | name: store_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -515,12 +515,12 @@ body: | name: store_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -537,12 +537,12 @@ body: | name: store_singlethread_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -559,12 +559,12 @@ body: | name: store_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -581,12 +581,12 @@ body: | name: store_wavefront_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -603,12 +603,12 @@ body: | name: store_wavefront_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -625,12 +625,12 @@ body: | name: store_wavefront_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -647,12 +647,12 @@ body: | name: store_wavefront_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -669,12 +669,12 @@ body: | name: store_workgroup_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -691,12 +691,12 @@ body: | name: store_workgroup_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -713,12 +713,12 @@ body: | name: store_workgroup_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -735,12 +735,12 @@ body: | name: store_workgroup_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -757,12 +757,12 @@ body: | name: store_agent_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -779,12 +779,12 @@ body: | name: store_agent_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -801,12 +801,12 @@ body: | name: store_agent_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -823,12 +823,12 @@ body: | name: store_agent_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -845,12 +845,12 @@ body: | name: store_system_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store unordered (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -867,12 +867,12 @@ body: | name: store_system_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store monotonic (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -889,12 +889,12 @@ body: | name: store_system_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -911,12 +911,12 @@ body: | name: store_system_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(2)* undef`) + DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -933,12 +933,12 @@ body: | name: atomicrmw_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -955,12 +955,12 @@ body: | name: atomicrmw_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -977,12 +977,12 @@ body: | name: atomicrmw_singlethread_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(2)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -999,12 +999,12 @@ body: | name: atomicrmw_singlethread_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -1021,12 +1021,12 @@ body: | name: atomicrmw_singlethread_acq_rel body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(2)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... @@ -1043,12 +1043,12 @@ body: | name: atomicrmw_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`) + $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst (s32) into `i32 addrspace(2)* undef`) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.mir b/llvm/test/CodeGen/AMDGPU/memory_clause.mir index dd634b8d2b771..3e938822b8464 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.mir +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.mir @@ -303,12 +303,12 @@ body: | bb.0: %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF - %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) - IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) - IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) + %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) + IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) + IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) ... # GCN-LABEL: {{^}}name: mixed_clause{{$}} @@ -334,7 +334,7 @@ body: | %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF %2 = IMPLICIT_DEF - %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir index 0d50dfb76cd23..7193504b88986 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-load-gfx10.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s # GFX10-LABEL: name: image_load_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -13,13 +13,13 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_merged_v1v3_reversed -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 @@ -31,14 +31,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_merged_v2v2 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 # GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 @@ -50,14 +50,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_merged_v2v2_reversed -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 # GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 @@ -69,14 +69,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5:vreg_64, %3:sgpr_256, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_merged_v3v1 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 @@ -88,14 +88,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) ... --- # GFX10-LABEL: name: image_load_merged_v3v1_reversed -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 @@ -107,14 +107,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) ... --- # GFX10-LABEL: name: image_load_divided_merged -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) name: image_load_divided_merged body: | @@ -124,19 +124,19 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %9:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %7:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %11:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %9:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %7:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %11:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_divided_not_merged -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_divided_not_merged body: | @@ -146,16 +146,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vreg_128 = COPY %2 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - IMAGE_STORE_V4_V2 %4:vreg_128, %5:vreg_64, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + IMAGE_STORE_V4_V2 %4:vreg_128, %5:vreg_64, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_dmask_overlapped_not_merged -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_dmask_overlapped_not_merged body: | @@ -165,15 +165,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_dmask_not_disjoint_not_merged -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_dmask_not_disjoint_not_merged body: | @@ -183,15 +183,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_0 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_0 body: | @@ -201,16 +201,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_1 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_1 body: | @@ -221,15 +221,15 @@ body: | %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %5:vgpr_32 = COPY %2.sub3 - %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %6:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %6, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %6, %4, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_3 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_3 body: | @@ -239,15 +239,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_4 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_4 body: | @@ -257,15 +257,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_5 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_5 body: | @@ -275,15 +275,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_6 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_6 body: | @@ -293,15 +293,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_7 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_7 body: | @@ -311,15 +311,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_8 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_8 body: | @@ -329,16 +329,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) %6:vgpr_32 = COPY %5.sub0 - %7:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V1_gfx10 %6, %3, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_9 -# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_9 body: | @@ -348,15 +348,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_10 -# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_10 body: | @@ -366,15 +366,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_LOAD_V2_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_not_merged_11 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_11 body: | @@ -384,14 +384,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V2_gfx10 %5, %3, 8, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V2_gfx10 %5, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_mip_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -403,16 +403,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_MIP_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_mip_pck_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -424,16 +424,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V3_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -445,14 +445,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_96 = BUFFER_LOAD_DWORDX3_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V3_gfx10 %5:vreg_96, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V3_gfx10 %5:vreg_96, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_pck_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -464,14 +464,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_PCK_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_PCK_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_load_pck_sgn_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V2_gfx10 %5, %3, 15, 1, -1, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -483,8 +483,8 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V2_gfx10 %5:vreg_64, %3:sgpr_256, 1, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V2_gfx10 %5:vreg_64, %3:sgpr_256, 14, 1, -1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-load.mir b/llvm/test/CodeGen/AMDGPU/merge-image-load.mir index cf26d4f7d84e0..1821bb31792de 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-load.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-load.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s # GFX9-LABEL: name: image_load_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -13,14 +13,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_merged_v1v3_reversed -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 @@ -32,14 +32,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_merged_v2v2 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 # GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 @@ -51,14 +51,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_merged_v2v2_reversed -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 # GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 @@ -70,14 +70,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sgpr_256, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_merged_v3v1 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 @@ -89,14 +89,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) ... --- # GFX9-LABEL: name: image_load_merged_v3v1_reversed -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 @@ -108,14 +108,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) ... --- # GFX9-LABEL: name: image_load_divided_merged -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) name: image_load_divided_merged body: | @@ -125,19 +125,19 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %9:vreg_96 = IMAGE_LOAD_V3_V4 %7:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %11:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %9:vreg_96 = IMAGE_LOAD_V3_V4 %7:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %11:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_divided_not_merged -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_divided_not_merged body: | @@ -147,16 +147,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vreg_128 = COPY %2 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_dmask_overlapped_not_merged -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_dmask_overlapped_not_merged body: | @@ -166,15 +166,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_dmask_not_disjoint_not_merged -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_dmask_not_disjoint_not_merged body: | @@ -184,15 +184,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_0 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_0 body: | @@ -202,16 +202,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_1 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_1 body: | @@ -222,15 +222,15 @@ body: | %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %5:vgpr_32 = COPY %2.sub3 - %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_10 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_10 body: | @@ -240,15 +240,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_3 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_3 body: | @@ -258,15 +258,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_4 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_4 body: | @@ -276,15 +276,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_5 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_5 body: | @@ -294,15 +294,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_6 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_6 body: | @@ -312,15 +312,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_7 -# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_7 body: | @@ -330,15 +330,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_8 -# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_8 body: | @@ -348,15 +348,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_not_merged_9 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_load_not_merged_9 body: | @@ -366,14 +366,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_mip_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -385,14 +385,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_mip_pck_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -404,14 +404,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -423,14 +423,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_pck_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -442,14 +442,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_load_pck_sgn_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -461,9 +461,9 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sgpr_256, 1, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sgpr_256, 14, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir index c405a16418e36..96ae9fba670f5 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx10.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s # GFX10-LABEL: name: image_sample_l_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -13,13 +13,13 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_merged_v1v3_reversed -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 @@ -31,14 +31,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_merged_v2v2 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 # GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 @@ -50,14 +50,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_merged_v2v2_reversed -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 # GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 @@ -69,14 +69,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 12, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 3, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_merged_v3v1 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 @@ -88,14 +88,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_merged_v3v1_reversed -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX10: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 @@ -107,14 +107,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_divided_merged -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) name: image_sample_l_divided_merged body: | @@ -124,19 +124,19 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %8:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %9:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %7:vgpr_32, %7:vgpr_32, %7:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %11:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %8:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %9:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %7:vgpr_32, %7:vgpr_32, %7:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %11:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_divided_not_merged -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_divided_not_merged body: | @@ -146,16 +146,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vreg_128 = COPY %2 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - IMAGE_STORE_V4_V2_nsa_gfx10 %4:vreg_128, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + IMAGE_STORE_V4_V2_nsa_gfx10 %4:vreg_128, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_dmask_overlapped_not_merged -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_dmask_overlapped_not_merged body: | @@ -165,15 +165,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_dmask_not_disjoint_not_merged -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_dmask_not_disjoint_not_merged body: | @@ -183,15 +183,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 11, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_0 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_0 body: | @@ -201,16 +201,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_1 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_1 body: | @@ -221,15 +221,15 @@ body: | %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %5:vgpr_32 = COPY %2.sub3 - %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_2 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_2 body: | @@ -240,15 +240,15 @@ body: | %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %5:vgpr_32 = COPY %2.sub3 - %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %6, %6, %6, %4, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %6, %6, %6, %4, %3, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_3 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_3 body: | @@ -258,15 +258,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_4 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_4 body: | @@ -276,15 +276,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_5 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_5 body: | @@ -294,15 +294,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_6 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_6 body: | @@ -312,15 +312,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_7 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_7 body: | @@ -330,15 +330,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V2_nsa_gfx10 %5, %5, %3, %2, 8, 1, -1, 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_8 -# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_8 body: | @@ -348,15 +348,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_9 -# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_9 body: | @@ -366,15 +366,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_not_merged_10 -# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_10 body: | @@ -384,9 +384,9 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V3_nsa_gfx10 %5, %5, %5, %3, %2, 8, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V3_nsa_gfx10 %5, %5, %5, %3, %2, 7, 1, -1, 0, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- @@ -394,7 +394,7 @@ body: | # GFX10-LABEL: name: image_sample_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -406,15 +406,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_b_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -426,15 +426,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_B_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_b_cl_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -446,15 +446,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_b_cl_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -466,15 +466,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_b_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -486,15 +486,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -506,15 +506,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_cd_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -526,15 +526,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_cd_cl_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -546,15 +546,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_cd_cl_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -566,15 +566,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_cd_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -586,15 +586,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_cl_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -606,15 +606,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_cl_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -626,15 +626,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_b_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -646,15 +646,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_b_cl_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -666,15 +666,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -686,15 +686,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_b_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -706,15 +706,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_cd_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -726,15 +726,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_cd_cl_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -746,15 +746,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -766,15 +766,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_cd_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -786,15 +786,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_cl_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -806,15 +806,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_cl_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -826,15 +826,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_d_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -846,15 +846,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_d_cl_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -866,15 +866,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V9_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -886,15 +886,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V9_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_d_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -906,15 +906,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_l_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -926,15 +926,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_lz_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -946,15 +946,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_lz_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -966,15 +966,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_l_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V5_nsa_gfx10 %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -986,15 +986,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V5_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_c_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1006,15 +1006,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_d_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V6_nsa_gfx10 %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1026,15 +1026,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_D_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_V3_V6_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_d_cl_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1046,15 +1046,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_d_cl_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V8_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1066,15 +1066,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V8_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_d_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V7_nsa_gfx10 %5, %5, %5, %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1086,15 +1086,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V7_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_lz_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2_nsa_gfx10 %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1106,15 +1106,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V2_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_lz_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1126,15 +1126,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_l_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4_nsa_gfx10 %5, %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1146,15 +1146,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX10-LABEL: name: image_sample_o_merged_v1v3 -# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V3_nsa_gfx10 %5, %5, %5, %3, %2, 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1166,8 +1166,8 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_O_V3_V3_nsa_gfx10 %5:vgpr_32, %5:vgpr_32, %5:vgpr_32, %3:sgpr_256, %2:sgpr_128, 14, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir b/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir index 0ce5c14f52dbc..8e848d032fb81 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-image-sample.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s # GFX9-LABEL: name: image_sample_l_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -13,14 +13,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_merged_v1v3_reversed -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 @@ -32,14 +32,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_merged_v2v2 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 # GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 @@ -51,14 +51,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_merged_v2v2_reversed -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 # GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 @@ -70,14 +70,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) - %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) + %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_merged_v3v1 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 @@ -89,14 +89,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_merged_v3v1_reversed -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), addrspace 4) # GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 @@ -108,14 +108,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_divided_merged -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) name: image_sample_l_divided_merged body: | @@ -125,19 +125,19 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %9:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %7:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) - %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %11:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %9:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %7:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) + %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %11:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_divided_not_merged -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_divided_not_merged body: | @@ -147,16 +147,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vreg_128 = COPY %2 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sgpr_256, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_dmask_overlapped_not_merged -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_dmask_overlapped_not_merged body: | @@ -166,15 +166,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_dmask_not_disjoint_not_merged -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_dmask_not_disjoint_not_merged body: | @@ -184,15 +184,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_0 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_0 body: | @@ -202,16 +202,16 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_1 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_1 body: | @@ -222,15 +222,15 @@ body: | %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %5:vgpr_32 = COPY %2.sub3 - %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_2 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_2 body: | @@ -241,15 +241,15 @@ body: | %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 %4:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %5:vgpr_32 = COPY %2.sub3 - %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_3 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_3 body: | @@ -259,15 +259,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_4 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_4 body: | @@ -277,15 +277,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_5 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_5 body: | @@ -295,15 +295,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_6 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_6 body: | @@ -313,15 +313,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_7 -# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_7 body: | @@ -331,15 +331,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_8 -# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_8 body: | @@ -349,15 +349,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load (s64), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_9 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_9 body: | @@ -367,15 +367,15 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_not_merged_10 -# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) -# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) name: image_sample_l_not_merged_10 body: | @@ -385,14 +385,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -404,14 +404,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_b_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -423,14 +423,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_b_cl_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -442,14 +442,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_b_cl_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -461,14 +461,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_b_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -480,14 +480,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -499,14 +499,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_cd_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -518,14 +518,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_cd_cl_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -537,14 +537,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_cd_cl_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -556,14 +556,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_cd_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -575,14 +575,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_cl_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -594,14 +594,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_cl_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -613,14 +613,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_b_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -632,14 +632,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_b_cl_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -651,14 +651,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -670,14 +670,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_b_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -689,14 +689,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_cd_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -708,14 +708,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_cd_cl_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -727,14 +727,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -746,14 +746,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_cd_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -765,14 +765,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_cl_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -784,14 +784,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_cl_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -803,14 +803,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_d_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -822,14 +822,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_d_cl_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -841,14 +841,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -860,14 +860,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_d_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -879,14 +879,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_l_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -898,14 +898,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_lz_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -917,14 +917,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_lz_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -936,14 +936,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_l_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -955,14 +955,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_c_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -974,14 +974,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_d_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -993,14 +993,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_d_cl_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1012,14 +1012,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_d_cl_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1031,14 +1031,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_d_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1050,14 +1050,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_lz_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1069,14 +1069,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_lz_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1088,14 +1088,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_l_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1107,14 +1107,14 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- # GFX9-LABEL: name: image_sample_o_merged_v1v3 -# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 @@ -1126,8 +1126,8 @@ body: | %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0 %4:vgpr_32 = COPY %2.sub3 - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) - %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) - %7:vreg_96 = IMAGE_SAMPLE_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128)) + %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_O_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store-agpr.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store-agpr.mir index 3d936f45428e5..7d0a273091fab 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-load-store-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store-agpr.mir @@ -7,8 +7,8 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - %1:vgpr_32 = DS_READ_B32_gfx9 %0, 0, 0, implicit $exec :: (load 4 from `i32 addrspace(3)* undef`) - %2:vgpr_32 = DS_READ_B32_gfx9 %0, 8, 0, implicit $exec :: (load 4 from `i32 addrspace(3)* undef`) + %1:vgpr_32 = DS_READ_B32_gfx9 %0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) + %2:vgpr_32 = DS_READ_B32_gfx9 %0, 8, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) ... # GCN-LABEL: name: ds_read_b32_a_a @@ -18,8 +18,8 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - %1:agpr_32 = DS_READ_B32_gfx9 %0, 0, 0, implicit $exec :: (load 4 from `i32 addrspace(3)* undef`) - %2:agpr_32 = DS_READ_B32_gfx9 %0, 8, 0, implicit $exec :: (load 4 from `i32 addrspace(3)* undef`) + %1:agpr_32 = DS_READ_B32_gfx9 %0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) + %2:agpr_32 = DS_READ_B32_gfx9 %0, 8, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) ... # GCN-LABEL: name: ds_read_b32_v_a @@ -30,8 +30,8 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - %1:vgpr_32 = DS_READ_B32_gfx9 %0, 0, 0, implicit $exec :: (load 4 from `i32 addrspace(3)* undef`) - %2:agpr_32 = DS_READ_B32_gfx9 %0, 8, 0, implicit $exec :: (load 4 from `i32 addrspace(3)* undef`) + %1:vgpr_32 = DS_READ_B32_gfx9 %0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) + %2:agpr_32 = DS_READ_B32_gfx9 %0, 8, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) ... # GCN-LABEL: name: ds_read_b32_a_v @@ -42,8 +42,8 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - %1:agpr_32 = DS_READ_B32_gfx9 %0, 8, 0, implicit $exec :: (load 4 from `i32 addrspace(3)* undef`) - %2:vgpr_32 = DS_READ_B32_gfx9 %0, 0, 0, implicit $exec :: (load 4 from `i32 addrspace(3)* undef`) + %1:agpr_32 = DS_READ_B32_gfx9 %0, 8, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) + %2:vgpr_32 = DS_READ_B32_gfx9 %0, 0, 0, implicit $exec :: (load (s32) from `i32 addrspace(3)* undef`) ... # GCN-LABEL: name: ds_write_b32_v_v @@ -53,8 +53,8 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - DS_WRITE_B32_gfx9 %0, undef %1:vgpr_32, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`) - DS_WRITE_B32_gfx9 %0, undef %2:vgpr_32, 8, 0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32_gfx9 %0, undef %1:vgpr_32, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) + DS_WRITE_B32_gfx9 %0, undef %2:vgpr_32, 8, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) ... # GCN-LABEL: name: ds_write_b32_a_a @@ -65,8 +65,8 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - DS_WRITE_B32_gfx9 %0, undef %1:agpr_32, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`) - DS_WRITE_B32_gfx9 %0, undef %2:agpr_32, 8, 0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32_gfx9 %0, undef %1:agpr_32, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) + DS_WRITE_B32_gfx9 %0, undef %2:agpr_32, 8, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) ... # GCN-LABEL: name: ds_write_b32_v_a @@ -77,8 +77,8 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - DS_WRITE_B32_gfx9 %0, undef %1:vgpr_32, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`) - DS_WRITE_B32_gfx9 %0, undef %2:agpr_32, 8, 0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32_gfx9 %0, undef %1:vgpr_32, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) + DS_WRITE_B32_gfx9 %0, undef %2:agpr_32, 8, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) ... # GCN-LABEL: name: ds_write_b32_a_v @@ -89,6 +89,6 @@ body: | bb.0: %0:vgpr_32 = IMPLICIT_DEF - DS_WRITE_B32_gfx9 %0, undef %1:agpr_32, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`) - DS_WRITE_B32_gfx9 %0, undef %2:vgpr_32, 8, 0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`) + DS_WRITE_B32_gfx9 %0, undef %1:agpr_32, 0, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) + DS_WRITE_B32_gfx9 %0, undef %2:vgpr_32, 8, 0, implicit $exec :: (store (s32) into `i32 addrspace(3)* undef`) ... diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store-physreg.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store-physreg.mir index f6200cf76bccc..15b3607d79d91 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-load-store-physreg.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store-physreg.mir @@ -22,12 +22,12 @@ body: | %10:sgpr_32 = COPY $sgpr0 $m0 = S_MOV_B32 -1 - %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load (s32)) %11:sgpr_32 = S_ADD_U32 %10, 4, implicit-def $scc %12:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc - %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load (s32)) S_ENDPGM 0 ... @@ -49,14 +49,14 @@ body: | %10:sgpr_32 = COPY $sgpr0 $m0 = S_MOV_B32 -1 - %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4) + %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load (s32)) %20:sgpr_32 = V_READFIRSTLANE_B32 %2, implicit $exec %21:sgpr_32 = S_ADD_U32 %20, 4, implicit-def $scc ; The S_ADDC_U32 depends on the first DS_READ_B32 only via SCC %11:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc - %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4) + %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load (s32)) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir index a2597dbcd0d51..404ad119d273a 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir @@ -79,11 +79,11 @@ body: | %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec - DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) + DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp) %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1) - %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2) - %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3) + DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp1) + %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp2) + %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp3) $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc S_CBRANCH_VCCNZ %bb.1, implicit $vcc S_BRANCH %bb.1 @@ -114,11 +114,11 @@ body: | %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec - DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) + DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp) %3:vreg_64 = V_LSHLREV_B64_e64 0, 0, implicit $exec - DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1) - %4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2) - %5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3) + DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp1) + %4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp2) + %5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp3) $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc S_CBRANCH_VCCNZ %bb.1, implicit $vcc S_BRANCH %bb.1 @@ -145,11 +145,11 @@ body: | %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec - DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp) + DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp) %3:vreg_64 = V_LSHLREV_B64_e64 0, 0, implicit $exec - DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1) - %4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2) - %5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3) + DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.tmp1) + %4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp2) + %5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp3) $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc S_CBRANCH_VCCNZ %bb.1, implicit $vcc S_BRANCH %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir index d1f5608edea25..bc8cf09bfcdd9 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir @@ -99,16 +99,16 @@ body: | %1:vgpr_32 = COPY $vgpr0 $m0 = S_MOV_B32 -1 - %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0) - DS_WRITE_B32 %1, killed %2, 64, 0, implicit $m0, implicit $exec :: (store 4 into %ir.ptr.64) + %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.ptr.0) + DS_WRITE_B32 %1, killed %2, 64, 0, implicit $m0, implicit $exec :: (store (s32) into %ir.ptr.64) ; Make this load unmergeable, to tempt SILoadStoreOptimizer into merging the ; other two loads. - %6:vreg_64 = DS_READ2_B32 %1, 16, 17, 0, implicit $m0, implicit $exec :: (load 8 from %ir.ptr.64, align 4) + %6:vreg_64 = DS_READ2_B32 %1, 16, 17, 0, implicit $m0, implicit $exec :: (load (s64) from %ir.ptr.64, align 4) %3:vgpr_32 = COPY %6.sub0 - %4:vgpr_32 = DS_READ_B32 %1, 4, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.4) + %4:vgpr_32 = DS_READ_B32 %1, 4, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.ptr.4) %5:vgpr_32 = V_ADD_CO_U32_e32 killed %3, killed %4, implicit-def $vcc, implicit $exec - DS_WRITE_B32 killed %1, %5, 0, 0, implicit killed $m0, implicit $exec :: (store 4 into %ir.ptr.0) + DS_WRITE_B32 killed %1, %5, 0, 0, implicit killed $m0, implicit $exec :: (store (s32) into %ir.ptr.0) S_ENDPGM 0 ... @@ -128,13 +128,13 @@ registers: body: | bb.0: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %2:vgpr_32 = DS_READ_B32 %1, 3072, 0, implicit $m0, implicit $exec :: (dereferenceable load 4 from `i32 addrspace(3)* getelementptr inbounds ([256 x i32], [256 x i32] addrspace(3)* @lds0, i32 0, i32 0)`, addrspace 3) - %3:vgpr_32 = DS_READ_B32 %1, 2048, 0, implicit $m0, implicit $exec :: (load 4 from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds1, i32 0, i32 undef)`, addrspace 3) - %4:vgpr_32 = DS_READ_B32 %1, 1024, 0, implicit $m0, implicit $exec :: (load 4 from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds3, i32 0, i32 undef)`, addrspace 3) + %2:vgpr_32 = DS_READ_B32 %1, 3072, 0, implicit $m0, implicit $exec :: (dereferenceable load (s32) from `i32 addrspace(3)* getelementptr inbounds ([256 x i32], [256 x i32] addrspace(3)* @lds0, i32 0, i32 0)`, addrspace 3) + %3:vgpr_32 = DS_READ_B32 %1, 2048, 0, implicit $m0, implicit $exec :: (load (s32) from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds1, i32 0, i32 undef)`, addrspace 3) + %4:vgpr_32 = DS_READ_B32 %1, 1024, 0, implicit $m0, implicit $exec :: (load (s32) from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds3, i32 0, i32 undef)`, addrspace 3) INLINEASM &"v_or_b32 $0, 0, $1", 32, 327690, def %0, 327689, %4 - %5:vgpr_32 = DS_READ_B32 %0, 2048, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp12, addrspace 3) - %6:vgpr_32 = DS_READ_B32 %5, 2048, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp21, addrspace 3) - %7:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds2, i32 0, i32 undef)`, addrspace 3) + %5:vgpr_32 = DS_READ_B32 %0, 2048, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp12, addrspace 3) + %6:vgpr_32 = DS_READ_B32 %5, 2048, 0, implicit $m0, implicit $exec :: (load (s32) from %ir.tmp21, addrspace 3) + %7:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load (s32) from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds2, i32 0, i32 undef)`, addrspace 3) S_SETPC_B64_return undef $sgpr30_sgpr31, implicit %6, implicit %7 ... @@ -155,25 +155,25 @@ body: | %6:sreg_32_xm0_xexec = S_MOV_B32 0 %7:sreg_32_xm0 = S_MOV_B32 0 %8:sreg_64_xexec = REG_SEQUENCE killed %6, %subreg.sub0, %7, %subreg.sub1 - %9:sgpr_128 = S_LOAD_DWORDX4_IMM killed %8, 0, 0 :: (invariant load 16, addrspace 6) - %31:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %9, 0, 0 :: (dereferenceable invariant load 4) + %9:sgpr_128 = S_LOAD_DWORDX4_IMM killed %8, 0, 0 :: (invariant load (s128), addrspace 6) + %31:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %9, 0, 0 :: (dereferenceable invariant load (s32)) %10:sreg_32_xm0_xexec = COPY %31.sub0 %11:sreg_32_xm0_xexec = COPY killed %31.sub1 - %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 2, 0 :: (dereferenceable invariant load 4) + %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 2, 0 :: (dereferenceable invariant load (s32)) %13:sreg_64 = V_CMP_NE_U32_e64 killed %11, 0, implicit $exec %15:sreg_64 = V_CMP_NE_U32_e64 killed %12, 0, implicit $exec %17:sreg_64_xexec = S_AND_B64 killed %13, killed %15, implicit-def dead $scc S_CMP_EQ_U32 killed %10, 0, implicit-def $scc - %18:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 3, 0 :: (dereferenceable invariant load 4) + %18:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %9, 3, 0 :: (dereferenceable invariant load (s32)) S_ENDPGM 0 ... --- # CHECK-LABEL: merge_mmos -# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load 8, align 4) -# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 4) -# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 4) -# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from %ir.ptr_addr1 + 64, align 4 -# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.ptr_addr1 + 64, align 4 +# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 4) +# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 4) +# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 4) +# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from %ir.ptr_addr1 + 64, align 4 +# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.ptr_addr1 + 64, align 4 name: merge_mmos tracksRegLiveness: true body: | @@ -181,24 +181,24 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load 4) - %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 1, 0 :: (dereferenceable invariant load 4) - %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4) - %4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable load 4) - BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4) - BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable store 4) - %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 64) - %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 68) - BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 64) - BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 68) + %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32)) + %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 1, 0 :: (dereferenceable invariant load (s32)) + %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32)) + %4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable load (s32)) + BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32)) + BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable store (s32)) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.ptr_addr1 + 64) + %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.ptr_addr1 + 68) + BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.ptr_addr1 + 64) + BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.ptr_addr1 + 68) S_ENDPGM 0 ... --- # CHECK-LABEL: reorder_offsets -# CHECK-DAG: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 16, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.reorder_addr1 + 16, align 4, addrspace 1) -# CHECK-DAG: BUFFER_STORE_DWORDX4_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into %ir.reorder_addr1, align 4, addrspace 1) +# CHECK-DAG: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into %ir.reorder_addr1 + 16, align 4, addrspace 1) +# CHECK-DAG: BUFFER_STORE_DWORDX4_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into %ir.reorder_addr1, align 4, addrspace 1) name: reorder_offsets tracksRegLiveness: true @@ -208,12 +208,12 @@ body: | %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 4) - BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 8, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 8) - BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 12, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 12) - BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 16) - BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1 + 20) - BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.reorder_addr1) + BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 4, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.reorder_addr1 + 4) + BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 8, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.reorder_addr1 + 8) + BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 12, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.reorder_addr1 + 12) + BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 16, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.reorder_addr1 + 16) + BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.reorder_addr1 + 20) + BUFFER_STORE_DWORD_OFFSET_exact %1, %0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.reorder_addr1) S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir b/llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir index 17c2e130aa126..5477f5ea3b2a7 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir @@ -11,13 +11,13 @@ name: out_of_order_merge body: | bb.0: %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %5:vreg_64 = DS_READ_B64_gfx9 %4, 776, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef`, addrspace 3) - %6:vreg_64 = DS_READ_B64_gfx9 %4, 784, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef` + 8, addrspace 3) - %17:vreg_64 = DS_READ_B64_gfx9 %4, 840, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef`, addrspace 3) - DS_WRITE_B64_gfx9 %4, %17, 8, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef` + 8, addrspace 3) - DS_WRITE_B64_gfx9 %4, %6, 0, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef`, align 16, addrspace 3) - %24:vreg_64 = DS_READ_B64_gfx9 %4, 928, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef` + 8, addrspace 3) - DS_WRITE_B64_gfx9 undef %29:vgpr_32, %5, 0, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef`, addrspace 3) + %5:vreg_64 = DS_READ_B64_gfx9 %4, 776, 0, implicit $exec :: (load (s64) from `double addrspace(3)* undef`, addrspace 3) + %6:vreg_64 = DS_READ_B64_gfx9 %4, 784, 0, implicit $exec :: (load (s64) from `double addrspace(3)* undef` + 8, addrspace 3) + %17:vreg_64 = DS_READ_B64_gfx9 %4, 840, 0, implicit $exec :: (load (s64) from `double addrspace(3)* undef`, addrspace 3) + DS_WRITE_B64_gfx9 %4, %17, 8, 0, implicit $exec :: (store (s64) into `double addrspace(3)* undef` + 8, addrspace 3) + DS_WRITE_B64_gfx9 %4, %6, 0, 0, implicit $exec :: (store (s64) into `double addrspace(3)* undef`, align 16, addrspace 3) + %24:vreg_64 = DS_READ_B64_gfx9 %4, 928, 0, implicit $exec :: (load (s64) from `double addrspace(3)* undef` + 8, addrspace 3) + DS_WRITE_B64_gfx9 undef %29:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64) into `double addrspace(3)* undef`, addrspace 3) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir index 2a55cfdd221f5..5c8fd612574be 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir @@ -6,7 +6,7 @@ # # GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz -# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 # GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 name: gfx9_tbuffer_load_x_xyz @@ -17,13 +17,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_xyz_x -# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 name: gfx9_tbuffer_load_xyz_x @@ -34,13 +34,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy -# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 # GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 name: gfx9_tbuffer_load_xy_xy @@ -51,13 +51,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) - %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) + %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) + %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_x_xy -# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 # GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 name: gfx9_tbuffer_load_x_xy @@ -68,13 +68,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_xy_x -# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 name: gfx9_tbuffer_load_xy_x @@ -85,14 +85,14 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_x_x -# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 @@ -104,13 +104,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_x_x_format_32_32_32_32 -# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 @@ -122,24 +122,24 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_float_32 -# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 -# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 126, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 126, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 -# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 125, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 125, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 @@ -153,30 +153,30 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_sint_32 -# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 91, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 91, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 -# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 -# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 93, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 93, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 @@ -190,30 +190,30 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_uint_32 -# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 75, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 75, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 -# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 -# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 # GFX9: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 @@ -227,15 +227,15 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 68, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 68, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- @@ -245,15 +245,15 @@ body: | # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) name: gfx9_tbuffer_load_not_merged_data_format_mismatch @@ -264,15 +264,15 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- @@ -282,15 +282,15 @@ body: | # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) name: gfx9_tbuffer_load_not_merged_num_format_mismatch body: | bb.0.entry: @@ -299,22 +299,22 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_store_x_xyz # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2 # GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %9, %subreg.sub1_sub2_sub3 -# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) name: gfx9_tbuffer_store_x_xyz body: | bb.0.entry: @@ -329,8 +329,8 @@ body: | %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 125, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) ... --- @@ -339,7 +339,7 @@ body: | # GFX9-LABEL: name: gfx9_tbuffer_store_xyz_x # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2 # GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1_sub2, %0, %subreg.sub3 -# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) name: gfx9_tbuffer_store_xyz_x body: | bb.0.entry: @@ -354,8 +354,8 @@ body: | %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 - TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -363,7 +363,7 @@ body: | # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1, %10, %subreg.sub2_sub3 -# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) name: gfx9_tbuffer_store_xy_xy body: | bb.0.entry: @@ -379,15 +379,15 @@ body: | %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 - TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_store_x_xy # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_64, %subreg.sub1_sub2 -# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx9_tbuffer_store_x_xy body: | bb.0.entry: @@ -402,15 +402,15 @@ body: | %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_store_xy_x # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %9, %subreg.sub0_sub1, %0, %subreg.sub2 -# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 125, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx9_tbuffer_store_xy_x body: | bb.0.entry: @@ -426,15 +426,15 @@ body: | %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 - TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_store_x_x # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 -# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) name: gfx9_tbuffer_store_x_x body: | bb.0.entry: @@ -448,14 +448,14 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_store_x_x_format_32_32_32_32 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 -# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) name: gfx9_tbuffer_store_x_x_format_32_32_32_32 body: | bb.0.entry: @@ -469,8 +469,8 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 126, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 126, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -490,14 +490,14 @@ body: | # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 -# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2 # GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3 -# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 126, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 126, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2 -# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 125, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 125, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx9_tbuffer_store_float32 body: | bb.0.entry: @@ -516,15 +516,15 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -544,14 +544,14 @@ body: | # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 -# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 91, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 91, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2 # GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3 -# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2 -# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 93, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 93, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx9_tbuffer_store_sint32 body: | bb.0.entry: @@ -570,15 +570,15 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -598,14 +598,14 @@ body: | # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 -# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 75, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 75, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2 # GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3 -# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) # GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 # GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2 -# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx9_tbuffer_store_uint32 body: | bb.0.entry: @@ -624,15 +624,15 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 68, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 68, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -651,15 +651,15 @@ body: | # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) name: gfx9_tbuffer_store_not_merged_data_format_mismatch body: | bb.0.entry: @@ -678,15 +678,15 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -705,15 +705,15 @@ body: | # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 # GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) name: gfx9_tbuffer_store_not_merged_num_format_mismatch body: | bb.0.entry: @@ -732,22 +732,22 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 114, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_0 -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) name: gfx9_tbuffer_load_not_merged_swizzled_0 body: | bb.0.entry: @@ -756,15 +756,15 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_1 -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) name: gfx9_tbuffer_load_not_merged_swizzled_1 body: | bb.0.entry: @@ -773,8 +773,8 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- @@ -784,7 +784,7 @@ body: | # # GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz -# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 # GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 name: gfx10_tbuffer_load_x_xyz @@ -795,13 +795,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_xyz_x -# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 name: gfx10_tbuffer_load_xyz_x @@ -812,13 +812,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) + %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy -# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 # GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 name: gfx10_tbuffer_load_xy_xy @@ -829,13 +829,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) - %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) + %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) + %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_x_xy -# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 # GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 name: gfx10_tbuffer_load_x_xy @@ -846,13 +846,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_xy_x -# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 name: gfx10_tbuffer_load_xy_x @@ -863,14 +863,14 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_x_x -# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 @@ -882,13 +882,13 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_x_x_format_32_32_32_32 -# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 @@ -900,24 +900,24 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_float_32 -# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 -# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 77, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 -# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 74, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 74, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 @@ -931,30 +931,30 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_sint_32 -# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 -# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 76, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 76, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 -# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 73, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 73, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 @@ -968,30 +968,30 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_uint_32 -# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 62, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 62, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 -# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 75, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 75, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_96 = COPY %17.sub0_sub1_sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = COPY %16.sub0_sub1 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 -# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 72, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 # GFX10: %{{[0-9]+}}:vgpr_32 = COPY %18.sub0 @@ -1005,15 +1005,15 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 20, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 20, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- @@ -1023,15 +1023,15 @@ body: | # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) name: gfx10_tbuffer_load_not_merged_data_format_mismatch @@ -1042,15 +1042,15 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- @@ -1060,15 +1060,15 @@ body: | # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) name: gfx10_tbuffer_load_not_merged_num_format_mismatch body: | bb.0.entry: @@ -1077,15 +1077,15 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- @@ -1094,7 +1094,7 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_store_x_xyz # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2 # GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %9, %subreg.sub1_sub2_sub3 -# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) name: gfx10_tbuffer_store_x_xyz body: | bb.0.entry: @@ -1109,8 +1109,8 @@ body: | %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 74, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) ... --- @@ -1118,7 +1118,7 @@ body: | # GFX10-LABEL: name: gfx10_tbuffer_store_xyz_x # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2 # GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1_sub2, %0, %subreg.sub3 -# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) name: gfx10_tbuffer_store_xyz_x body: | bb.0.entry: @@ -1133,8 +1133,8 @@ body: | %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 - TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -1142,7 +1142,7 @@ body: | # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1, %10, %subreg.sub2_sub3 -# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) name: gfx10_tbuffer_store_xy_xy body: | bb.0.entry: @@ -1158,15 +1158,15 @@ body: | %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 - TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_store_x_xy # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_64, %subreg.sub1_sub2 -# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx10_tbuffer_store_x_xy body: | bb.0.entry: @@ -1181,15 +1181,15 @@ body: | %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_store_xy_x # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %9, %subreg.sub0_sub1, %0, %subreg.sub2 -# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 74, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx10_tbuffer_store_xy_x body: | bb.0.entry: @@ -1205,15 +1205,15 @@ body: | %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 - TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_store_x_x # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 -# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) name: gfx10_tbuffer_store_x_x body: | bb.0.entry: @@ -1227,14 +1227,14 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_store_x_x_format_32_32_32_32 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 -# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) name: gfx10_tbuffer_store_x_x_format_32_32_32_32 body: | bb.0.entry: @@ -1248,8 +1248,8 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -1269,14 +1269,14 @@ body: | # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 -# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2 # GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3 -# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 77, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 77, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2 -# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 74, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 74, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx10_tbuffer_store_float32 body: | bb.0.entry: @@ -1295,15 +1295,15 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -1323,14 +1323,14 @@ body: | # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 -# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 63, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2 # GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3 -# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 76, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 76, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2 -# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 73, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 73, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx10_tbuffer_store_sint32 body: | bb.0.entry: @@ -1349,15 +1349,15 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -1377,14 +1377,14 @@ body: | # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 -# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 62, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 62, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, %4, %subreg.sub2 # GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %16, %subreg.sub0_sub1_sub2, %3, %subreg.sub3 -# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 75, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %17, %13, 0, 16, 75, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) # GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 # GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %18, %subreg.sub0_sub1, %0, %subreg.sub2 -# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 72, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 72, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) name: gfx10_tbuffer_store_uint32 body: | bb.0.entry: @@ -1403,15 +1403,15 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 20, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 20, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -1430,15 +1430,15 @@ body: | # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) name: gfx10_tbuffer_store_not_merged_data_format_mismatch body: | bb.0.entry: @@ -1457,15 +1457,15 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- @@ -1484,15 +1484,15 @@ body: | # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 # GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 # GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) -# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) name: gfx10_tbuffer_store_not_merged_num_format_mismatch body: | bb.0.entry: @@ -1511,22 +1511,22 @@ body: | %1:sgpr_32 = COPY $sgpr1 %0:sgpr_32 = COPY $sgpr0 %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) - TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 13, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_0 -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) name: gfx10_tbuffer_load_not_merged_swizzled_0 body: | bb.0.entry: @@ -1535,15 +1535,15 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- # GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_1 -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) -# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) name: gfx10_tbuffer_load_not_merged_swizzled_1 body: | bb.0.entry: @@ -1552,8 +1552,8 @@ body: | %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 - %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) - %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir index f1d2b6e494c6c..404529d8c69ec 100644 --- a/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir +++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.mir @@ -27,14 +27,14 @@ registers: - { id: 7, class: vgpr_32, preferred-register: '$vgpr7' } body: | bb.0: - %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource") + %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "ImageResource") S_ENDPGM 0, implicit %7 ... @@ -58,15 +58,15 @@ registers: - { id: 7, class: vgpr_32, preferred-register: '$vgpr7' } body: | bb.0: - %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) - %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + %0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + %6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6 - %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource") + %7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "ImageResource") S_ENDPGM 0, implicit %7 ... diff --git a/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir index dfd8c56a7ae8e..d593bdba0292e 100644 --- a/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir @@ -8,7 +8,7 @@ name: hazard_image_sample_d_buf_off6 body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, implicit $exec ... @@ -19,7 +19,7 @@ body: | name: no_hazard_image_sample_d_buf_off1 body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, implicit $exec ... @@ -31,7 +31,7 @@ body: | name: no_hazard_image_sample_d_buf_far body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) V_NOP_e32 implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, implicit $exec ... @@ -44,7 +44,7 @@ body: | name: no_hazard_image_sample_v4_v2_buf_off6 body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, implicit $exec ... @@ -56,6 +56,6 @@ body: | name: no_hazard_image_sample_v4_v3_buf_off6 body: | bb.0: - $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-loop-phi.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-loop-phi.mir index 0c62d666e7296..a54e829dea812 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-loop-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-loop-phi.mir @@ -24,7 +24,7 @@ body: | ; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec ; CHECK: V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec ; CHECK: %1.sub1:vreg_64 = COPY %1.sub0 - ; CHECK: DS_WRITE_B64_gfx9 undef %3:vgpr_32, %1, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: DS_WRITE_B64_gfx9 undef %3:vgpr_32, %1, 0, 0, implicit $exec :: (store (s64), addrspace 3) ; CHECK: ATOMIC_FENCE 4, 2 ; CHECK: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0 ; CHECK: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc @@ -46,7 +46,7 @@ body: | %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %7, implicit $exec V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec %5.sub1:vreg_64 = COPY %5.sub0 - DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) + DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3) ATOMIC_FENCE 4, 2 %7:sreg_64_xexec = S_MOV_B64 0 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc @@ -71,7 +71,7 @@ body: | ; CHECK: bb.2: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: %1.sub1:vreg_64 = COPY %1.sub0 - ; CHECK: DS_WRITE_B64_gfx9 undef %3:vgpr_32, %1, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: DS_WRITE_B64_gfx9 undef %3:vgpr_32, %1, 0, 0, implicit $exec :: (store (s64), addrspace 3) ; CHECK: ATOMIC_FENCE 4, 2 ; CHECK: $vcc = S_ANDN2_B64 $exec, [[S_MOV_B64_]], implicit-def dead $scc ; CHECK: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0 @@ -93,7 +93,7 @@ body: | %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %7, implicit $exec V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec %5.sub1:vreg_64 = COPY %5.sub0 - DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) + DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3) ATOMIC_FENCE 4, 2 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc %7:sreg_64_xexec = S_MOV_B64 0 @@ -120,7 +120,7 @@ body: | ; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec ; CHECK: V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec ; CHECK: %0.sub1:vreg_64 = COPY %0.sub0 - ; CHECK: DS_WRITE_B64_gfx9 undef %2:vgpr_32, %0, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: DS_WRITE_B64_gfx9 undef %2:vgpr_32, %0, 0, 0, implicit $exec :: (store (s64), addrspace 3) ; CHECK: ATOMIC_FENCE 4, 2 ; CHECK: $sgpr4_sgpr5 = S_MOV_B64 0 ; CHECK: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc @@ -143,7 +143,7 @@ body: | %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec %5.sub1:vreg_64 = COPY %5.sub0 - DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) + DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3) ATOMIC_FENCE 4, 2 $sgpr4_sgpr5 = S_MOV_B64 0 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc @@ -168,7 +168,7 @@ body: | ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: liveins: $sgpr4_sgpr5 ; CHECK: %0.sub1:vreg_64 = COPY %0.sub0 - ; CHECK: DS_WRITE_B64_gfx9 undef %2:vgpr_32, %0, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: DS_WRITE_B64_gfx9 undef %2:vgpr_32, %0, 0, 0, implicit $exec :: (store (s64), addrspace 3) ; CHECK: ATOMIC_FENCE 4, 2 ; CHECK: $vcc = S_ANDN2_B64 $exec, $sgpr4_sgpr5, implicit-def dead $scc ; CHECK: $sgpr4_sgpr5 = S_MOV_B64 0 @@ -191,7 +191,7 @@ body: | %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec %5.sub1:vreg_64 = COPY %5.sub0 - DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) + DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3) ATOMIC_FENCE 4, 2 $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc $sgpr4_sgpr5 = S_MOV_B64 0 diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir index ee8a7397c9c11..0cd0b7b0c159d 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir @@ -60,21 +60,21 @@ body: | ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v2_partial_agpr ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 - ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1 - ; FLATSCR-V2A: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR-V2A: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR-V2A: S_ENDPGM 0 $vgpr0_vgpr1 = IMPLICIT_DEF - SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -94,23 +94,23 @@ body: | ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-V2A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-V2A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v3_partial_agpr ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr1_vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 8 into %stack.0 + 4, align 4, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr1_vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s64) into %stack.0 + 4, align 4, addrspace 5) ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 - ; FLATSCR-V2A: $vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 8 from %stack.0 + 4, align 4, addrspace 5) + ; FLATSCR-V2A: $vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s64) from %stack.0 + 4, align 4, addrspace 5) ; FLATSCR-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 12 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5) + SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -132,11 +132,11 @@ body: | ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v4_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 @@ -144,15 +144,15 @@ body: | ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 - ; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5) + ; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0 + 12, addrspace 5) ; FLATSCR-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -174,13 +174,13 @@ body: | ; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v5_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 @@ -188,17 +188,17 @@ body: | ; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5) - ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 - ; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5) - ; FLATSCR-V2A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; FLATSCR-V2A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store 20 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5) + SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -222,13 +222,13 @@ body: | ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 20, addrspace 5) ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5) + ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 20, addrspace 5) ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v6_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 @@ -238,17 +238,17 @@ body: | ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 20, addrspace 5) ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; FLATSCR-V2A: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5) + ; FLATSCR-V2A: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 20, addrspace 5) ; FLATSCR-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s196) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s196) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -271,18 +271,18 @@ body: | ; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 28, addrspace 5) ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5) + ; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 28, addrspace 5) ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v8_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 @@ -291,16 +291,16 @@ body: | ; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; FLATSCR-V2A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-V2A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store 32 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5) + SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -324,33 +324,33 @@ body: | ; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5) - ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 32, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 36, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 40, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 44, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 48, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 52, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 56, addrspace 5) + ; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 60, addrspace 5) ; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5) - ; MUBUF-V2A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5) - ; MUBUF-V2A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5) - ; MUBUF-V2A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5) - ; MUBUF-V2A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5) - ; MUBUF-V2A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5) - ; MUBUF-V2A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5) - ; MUBUF-V2A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5) - ; MUBUF-V2A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5) + ; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 28, addrspace 5) + ; MUBUF-V2A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 32, addrspace 5) + ; MUBUF-V2A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 36, addrspace 5) + ; MUBUF-V2A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 40, addrspace 5) + ; MUBUF-V2A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 44, addrspace 5) + ; MUBUF-V2A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 48, addrspace 5) + ; MUBUF-V2A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 52, addrspace 5) + ; MUBUF-V2A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 56, addrspace 5) + ; MUBUF-V2A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 60, addrspace 5) ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v16_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 @@ -360,21 +360,21 @@ body: | ; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr5_vgpr6_vgpr7, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 12 into %stack.0 + 20, align 4, addrspace 5) - ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr5_vgpr6_vgpr7, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s96) into %stack.0 + 20, align 4, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5) ; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; FLATSCR-V2A: $vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 12 from %stack.0 + 20, align 4, addrspace 5) - ; FLATSCR-V2A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-V2A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-V2A: $vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s96) from %stack.0 + 20, align 4, addrspace 5) + ; FLATSCR-V2A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-V2A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 48, align 4, addrspace 5) ; FLATSCR-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store 64 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5) + SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir index 2865cef960c8b..9b75f923cc74b 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir @@ -21,8 +21,8 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v1 ; MUBUF: $vgpr0 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v1 ; MUBUF-V2A: liveins: $agpr0 @@ -32,8 +32,8 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v1 ; FLATSCR: $vgpr0 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v1 ; FLATSCR-V2A: liveins: $agpr0 @@ -43,8 +43,8 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v1 ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v1 ; MUBUF-GFX90A-V2A: liveins: $agpr0 @@ -54,8 +54,8 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v1 ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v1 ; FLATSCR-GFX90A-V2A: liveins: $agpr0 @@ -64,8 +64,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0 = IMPLICIT_DEF - SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, align 4, addrspace 5) - $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5) + $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -83,10 +83,10 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v2 ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v2 ; MUBUF-V2A: liveins: $agpr0, $agpr1 @@ -98,8 +98,8 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v2 ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5) - ; FLATSCR: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) + ; FLATSCR: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v2 ; FLATSCR-V2A: liveins: $agpr0, $agpr1 @@ -111,10 +111,10 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v2 ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v2 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1 @@ -126,8 +126,8 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v2 ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v2 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1 @@ -138,8 +138,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0_vgpr1 = IMPLICIT_DEF - SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -157,12 +157,12 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v3 ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v3 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 @@ -176,8 +176,8 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v3 ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5) - ; FLATSCR: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) + ; FLATSCR: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v3 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 @@ -191,12 +191,12 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v3 ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v3 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 @@ -210,8 +210,8 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v3 ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v3 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 @@ -224,8 +224,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF - SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 12 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5) + SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -243,14 +243,14 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v4 ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v4 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 @@ -266,8 +266,8 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v4 ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v4 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 @@ -283,14 +283,14 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v4 ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v4 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 @@ -306,8 +306,8 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v4 ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v4 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 @@ -322,8 +322,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF - SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -341,16 +341,16 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v5 ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v5 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 @@ -368,10 +368,10 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v5 ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5) - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v5 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 @@ -389,16 +389,16 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v5 ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v5 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 @@ -416,10 +416,10 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v5 ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v5 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 @@ -436,8 +436,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF - SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store 20 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5) + SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -455,18 +455,18 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v6 ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 20, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v6 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 @@ -486,10 +486,10 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v6 ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s64) from %stack.0 + 16, align 4, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v6 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 @@ -509,18 +509,18 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v6 ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0 + 20, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v6 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 @@ -540,10 +540,10 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v6 ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s64) from %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v6 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 @@ -562,8 +562,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF - SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -581,22 +581,22 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v8 ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 28, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v8 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 @@ -620,10 +620,10 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v8 ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v8 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 @@ -647,22 +647,22 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v8 ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0 + 28, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v8 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 @@ -686,10 +686,10 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v8 ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v8 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 @@ -712,8 +712,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF - SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store 32 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5) + SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -731,38 +731,38 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v16 ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5) - ; MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5) - ; MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5) - ; MUBUF: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5) - ; MUBUF: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5) - ; MUBUF: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5) - ; MUBUF: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5) - ; MUBUF: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5) - ; MUBUF: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 32, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 36, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 40, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 44, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 48, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 52, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 56, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 60, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 28, addrspace 5) + ; MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 32, addrspace 5) + ; MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 36, addrspace 5) + ; MUBUF: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 40, addrspace 5) + ; MUBUF: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 44, addrspace 5) + ; MUBUF: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 48, addrspace 5) + ; MUBUF: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 52, addrspace 5) + ; MUBUF: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 56, addrspace 5) + ; MUBUF: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 60, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v16 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 @@ -802,14 +802,14 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v16 ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 48, align 4, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v16 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 @@ -849,38 +849,38 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v16 ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5) - ; MUBUF-GFX90A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5) - ; MUBUF-GFX90A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5) - ; MUBUF-GFX90A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5) - ; MUBUF-GFX90A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5) - ; MUBUF-GFX90A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5) - ; MUBUF-GFX90A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5) - ; MUBUF-GFX90A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 32, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 36, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 40, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 44, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 48, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 52, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 56, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 60, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 32, addrspace 5) + ; MUBUF-GFX90A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 36, addrspace 5) + ; MUBUF-GFX90A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 40, addrspace 5) + ; MUBUF-GFX90A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 44, addrspace 5) + ; MUBUF-GFX90A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 48, addrspace 5) + ; MUBUF-GFX90A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 52, addrspace 5) + ; MUBUF-GFX90A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 56, addrspace 5) + ; MUBUF-GFX90A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0 + 60, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v16 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 @@ -920,14 +920,14 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v16 ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0 + 48, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v16 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 @@ -966,8 +966,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr15, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF - SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store 64 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5) + SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -985,70 +985,70 @@ body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v32 ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 32, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 36, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 40, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 44, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 48, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 52, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 56, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 60, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 64, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 68, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 72, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 76, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 80, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 84, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 88, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 92, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 96, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 100, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 104, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 108, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 112, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 116, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 120, addrspace 5) - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 124, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 28, addrspace 5) - ; MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 32, addrspace 5) - ; MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 36, addrspace 5) - ; MUBUF: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 40, addrspace 5) - ; MUBUF: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 44, addrspace 5) - ; MUBUF: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 48, addrspace 5) - ; MUBUF: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 52, addrspace 5) - ; MUBUF: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 56, addrspace 5) - ; MUBUF: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 60, addrspace 5) - ; MUBUF: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 64, addrspace 5) - ; MUBUF: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 68, addrspace 5) - ; MUBUF: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 72, addrspace 5) - ; MUBUF: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 76, addrspace 5) - ; MUBUF: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 80, addrspace 5) - ; MUBUF: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 84, addrspace 5) - ; MUBUF: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 88, addrspace 5) - ; MUBUF: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 92, addrspace 5) - ; MUBUF: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 96, addrspace 5) - ; MUBUF: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 100, addrspace 5) - ; MUBUF: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 104, addrspace 5) - ; MUBUF: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 108, addrspace 5) - ; MUBUF: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 112, addrspace 5) - ; MUBUF: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 116, addrspace 5) - ; MUBUF: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 120, addrspace 5) - ; MUBUF: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 124, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 32, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 36, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 40, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 44, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 48, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 52, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 56, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 60, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 64, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 68, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 72, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 76, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 80, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 84, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 88, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 92, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 96, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 100, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 104, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 108, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 112, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 116, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 120, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 124, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 28, addrspace 5) + ; MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 32, addrspace 5) + ; MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 36, addrspace 5) + ; MUBUF: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 40, addrspace 5) + ; MUBUF: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 44, addrspace 5) + ; MUBUF: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 48, addrspace 5) + ; MUBUF: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 52, addrspace 5) + ; MUBUF: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 56, addrspace 5) + ; MUBUF: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 60, addrspace 5) + ; MUBUF: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 64, addrspace 5) + ; MUBUF: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 68, addrspace 5) + ; MUBUF: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 72, addrspace 5) + ; MUBUF: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 76, addrspace 5) + ; MUBUF: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 80, addrspace 5) + ; MUBUF: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 84, addrspace 5) + ; MUBUF: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 88, addrspace 5) + ; MUBUF: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 92, addrspace 5) + ; MUBUF: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 96, addrspace 5) + ; MUBUF: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 100, addrspace 5) + ; MUBUF: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 104, addrspace 5) + ; MUBUF: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 108, addrspace 5) + ; MUBUF: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 112, addrspace 5) + ; MUBUF: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 116, addrspace 5) + ; MUBUF: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 120, addrspace 5) + ; MUBUF: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 124, addrspace 5) ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_v32 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 @@ -1120,22 +1120,22 @@ body: | ; MUBUF-V2A: S_ENDPGM 0 ; FLATSCR-LABEL: name: test_spill_v32 ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5) - ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5) - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5) - ; FLATSCR: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5) - ; FLATSCR: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5) - ; FLATSCR: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 64, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 80, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 96, align 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5) + ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 64, align 4, addrspace 5) + ; FLATSCR: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 80, align 4, addrspace 5) + ; FLATSCR: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 96, align 4, addrspace 5) + ; FLATSCR: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 112, align 4, addrspace 5) ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_v32 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 @@ -1207,70 +1207,70 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_v32 ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 32, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 36, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 40, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 44, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 48, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 52, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 56, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 60, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 64, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 68, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 72, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 76, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 80, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 84, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 88, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 92, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 96, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 100, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 104, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 108, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 112, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 116, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 120, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 124, addrspace 5) - ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 32, addrspace 5) - ; MUBUF-GFX90A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 36, addrspace 5) - ; MUBUF-GFX90A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 40, addrspace 5) - ; MUBUF-GFX90A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 44, addrspace 5) - ; MUBUF-GFX90A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 48, addrspace 5) - ; MUBUF-GFX90A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 52, addrspace 5) - ; MUBUF-GFX90A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 56, addrspace 5) - ; MUBUF-GFX90A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 60, addrspace 5) - ; MUBUF-GFX90A: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 64, addrspace 5) - ; MUBUF-GFX90A: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 68, addrspace 5) - ; MUBUF-GFX90A: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 72, addrspace 5) - ; MUBUF-GFX90A: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 76, addrspace 5) - ; MUBUF-GFX90A: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 80, addrspace 5) - ; MUBUF-GFX90A: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 84, addrspace 5) - ; MUBUF-GFX90A: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 88, addrspace 5) - ; MUBUF-GFX90A: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 92, addrspace 5) - ; MUBUF-GFX90A: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 96, addrspace 5) - ; MUBUF-GFX90A: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 100, addrspace 5) - ; MUBUF-GFX90A: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 104, addrspace 5) - ; MUBUF-GFX90A: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 108, addrspace 5) - ; MUBUF-GFX90A: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 112, addrspace 5) - ; MUBUF-GFX90A: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 116, addrspace 5) - ; MUBUF-GFX90A: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 120, addrspace 5) - ; MUBUF-GFX90A: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 124, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 32, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 36, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 40, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 44, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 48, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 52, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 56, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 60, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 64, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 68, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 72, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 76, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 80, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 84, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 88, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 92, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 96, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 100, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 104, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 108, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 112, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 116, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 120, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 124, addrspace 5) + ; MUBUF-GFX90A: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 32, addrspace 5) + ; MUBUF-GFX90A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 36, addrspace 5) + ; MUBUF-GFX90A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 40, addrspace 5) + ; MUBUF-GFX90A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 44, addrspace 5) + ; MUBUF-GFX90A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 48, addrspace 5) + ; MUBUF-GFX90A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 52, addrspace 5) + ; MUBUF-GFX90A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 56, addrspace 5) + ; MUBUF-GFX90A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 60, addrspace 5) + ; MUBUF-GFX90A: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 64, addrspace 5) + ; MUBUF-GFX90A: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 68, addrspace 5) + ; MUBUF-GFX90A: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 72, addrspace 5) + ; MUBUF-GFX90A: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 76, addrspace 5) + ; MUBUF-GFX90A: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 80, addrspace 5) + ; MUBUF-GFX90A: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 84, addrspace 5) + ; MUBUF-GFX90A: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 88, addrspace 5) + ; MUBUF-GFX90A: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 92, addrspace 5) + ; MUBUF-GFX90A: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 96, addrspace 5) + ; MUBUF-GFX90A: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 100, addrspace 5) + ; MUBUF-GFX90A: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 104, addrspace 5) + ; MUBUF-GFX90A: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 108, addrspace 5) + ; MUBUF-GFX90A: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 112, addrspace 5) + ; MUBUF-GFX90A: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 116, addrspace 5) + ; MUBUF-GFX90A: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 120, addrspace 5) + ; MUBUF-GFX90A: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0 + 124, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v32 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 @@ -1342,22 +1342,22 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_v32 ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5) - ; FLATSCR-GFX90A: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 64, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 80, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 96, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 64, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 80, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 96, align 4, addrspace 5) + ; FLATSCR-GFX90A: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0 + 112, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v32 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 @@ -1428,8 +1428,8 @@ body: | ; FLATSCR-GFX90A-V2A: $vgpr31 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF - SI_SPILL_V1024_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store 128 into %stack.0, align 4, addrspace 5) - $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = SI_SPILL_V1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5) + SI_SPILL_V1024_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = SI_SPILL_V1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -1448,8 +1448,8 @@ body: | ; MUBUF-LABEL: name: test_spill_a1 ; MUBUF: $agpr0 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a1 @@ -1461,8 +1461,8 @@ body: | ; FLATSCR-LABEL: name: test_spill_a1 ; FLATSCR: $agpr0 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a1 @@ -1473,8 +1473,8 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a1 ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a1 ; MUBUF-GFX90A-V2A: liveins: $vgpr0 @@ -1484,8 +1484,8 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a1 ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) - ; FLATSCR-GFX90A: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; FLATSCR-GFX90A: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a1 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0 @@ -1494,8 +1494,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0 = IMPLICIT_DEF - SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, align 4, addrspace 5) - $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5) + $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -1514,12 +1514,12 @@ body: | ; MUBUF-LABEL: name: test_spill_a2 ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a2 @@ -1533,12 +1533,12 @@ body: | ; FLATSCR-LABEL: name: test_spill_a2 ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a2 @@ -1551,10 +1551,10 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a2 ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a2 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1 @@ -1566,8 +1566,8 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a2 ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a2 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1 @@ -1578,8 +1578,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0_agpr1 = IMPLICIT_DEF - SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) - $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -1598,16 +1598,16 @@ body: | ; MUBUF-LABEL: name: test_spill_a3 ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a3 @@ -1623,16 +1623,16 @@ body: | ; FLATSCR-LABEL: name: test_spill_a3 ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5) ; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a3 @@ -1647,12 +1647,12 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a3 ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a3 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -1666,8 +1666,8 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a3 ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a3 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -1680,8 +1680,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0_agpr1_agpr2 = IMPLICIT_DEF - SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 12 into %stack.0, align 4, addrspace 5) - $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5) + SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -1700,20 +1700,20 @@ body: | ; MUBUF-LABEL: name: test_spill_a4 ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a4 @@ -1731,20 +1731,20 @@ body: | ; FLATSCR-LABEL: name: test_spill_a4 ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5) ; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5) ; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a4 @@ -1761,14 +1761,14 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a4 ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a4 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -1784,8 +1784,8 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a4 ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a4 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -1800,8 +1800,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF - SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) - $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -1820,24 +1820,24 @@ body: | ; MUBUF-LABEL: name: test_spill_a5 ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 8, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 12, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a5 @@ -1857,24 +1857,24 @@ body: | ; FLATSCR-LABEL: name: test_spill_a5 ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 8, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 12, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5) ; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5) ; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a5 @@ -1893,16 +1893,16 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a5 ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a5 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 @@ -1920,10 +1920,10 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a5 ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store 4 into %stack.0 + 16, addrspace 5) - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a5 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 @@ -1940,8 +1940,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF - SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store 20 into %stack.0, align 4, addrspace 5) - $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5) + SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -1960,28 +1960,28 @@ body: | ; MUBUF-LABEL: name: test_spill_a6 ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 8, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 12, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 16, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5) ; MUBUF: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a6 @@ -2003,28 +2003,28 @@ body: | ; FLATSCR-LABEL: name: test_spill_a6 ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 4, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 8, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 12, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5) ; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5) ; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5) ; FLATSCR: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a6 @@ -2045,18 +2045,18 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a6 ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 4 from %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s32) from %stack.0 + 20, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a6 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -2076,10 +2076,10 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a6 ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s64) from %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a6 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -2098,8 +2098,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF - SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -2118,36 +2118,36 @@ body: | ; MUBUF-LABEL: name: test_spill_a8 ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 12, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 16, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 20, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 24, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5) ; MUBUF: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5) ; MUBUF: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5) ; MUBUF: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a8 @@ -2173,36 +2173,36 @@ body: | ; FLATSCR-LABEL: name: test_spill_a8 ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 12, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 20, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 24, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5) ; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5) ; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5) ; FLATSCR: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5) ; FLATSCR: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5) ; FLATSCR: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a8 @@ -2227,22 +2227,22 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a8 ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 4 from %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0 + 28, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a8 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 @@ -2266,10 +2266,10 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a8 ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a8 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 @@ -2292,8 +2292,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF - SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store 32 into %stack.0, align 4, addrspace 5) - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5) + SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -2312,68 +2312,68 @@ body: | ; MUBUF-LABEL: name: test_spill_a16 ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 8, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 12, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 16, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 20, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 24, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 28, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 32, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 36, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 40, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 44, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 48, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 52, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 56, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5) ; MUBUF: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5) ; MUBUF: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5) ; MUBUF: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 32, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5) ; MUBUF: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 36, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5) ; MUBUF: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 40, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5) ; MUBUF: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 44, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5) ; MUBUF: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 48, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5) ; MUBUF: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 52, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5) ; MUBUF: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 56, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5) ; MUBUF: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 60, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5) ; MUBUF: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a16 @@ -2415,68 +2415,68 @@ body: | ; FLATSCR-LABEL: name: test_spill_a16 ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 4, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 8, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 12, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 20, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 24, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 28, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 32, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 36, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 40, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 44, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 48, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 52, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 56, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5) ; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5) ; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5) ; FLATSCR: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5) ; FLATSCR: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5) ; FLATSCR: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 32, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5) ; FLATSCR: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 36, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5) ; FLATSCR: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 40, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 40, addrspace 5) ; FLATSCR: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 44, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 44, addrspace 5) ; FLATSCR: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 48, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 48, addrspace 5) ; FLATSCR: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 52, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 52, addrspace 5) ; FLATSCR: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 56, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 56, addrspace 5) ; FLATSCR: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 60, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 60, addrspace 5) ; FLATSCR: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a16 @@ -2517,38 +2517,38 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a16 ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 32, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 36, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 40, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 44, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 48, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 52, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 56, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 4 into %stack.0 + 60, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 32, addrspace 5) - ; MUBUF-GFX90A: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 36, addrspace 5) - ; MUBUF-GFX90A: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 40, addrspace 5) - ; MUBUF-GFX90A: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 44, addrspace 5) - ; MUBUF-GFX90A: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 48, addrspace 5) - ; MUBUF-GFX90A: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 52, addrspace 5) - ; MUBUF-GFX90A: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 56, addrspace 5) - ; MUBUF-GFX90A: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 4 from %stack.0 + 60, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 32, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 36, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 40, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 44, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 48, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 52, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 56, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 32, addrspace 5) + ; MUBUF-GFX90A: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 36, addrspace 5) + ; MUBUF-GFX90A: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 40, addrspace 5) + ; MUBUF-GFX90A: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 44, addrspace 5) + ; MUBUF-GFX90A: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 48, addrspace 5) + ; MUBUF-GFX90A: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 52, addrspace 5) + ; MUBUF-GFX90A: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 56, addrspace 5) + ; MUBUF-GFX90A: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0 + 60, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a16 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 @@ -2588,14 +2588,14 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a16 ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s128) from %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s128) from %stack.0 + 48, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a16 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 @@ -2634,8 +2634,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF - SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store 64 into %stack.0, align 4, addrspace 5) - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5) + SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -2654,132 +2654,132 @@ body: | ; MUBUF-LABEL: name: test_spill_a32 ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 8, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 12, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 16, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 20, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 24, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 28, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 32, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 36, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 40, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 44, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 48, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 52, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 56, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 60, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 64, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 68, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 72, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 76, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 80, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 84, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 88, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 92, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 96, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 100, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 104, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 108, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 112, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 116, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 120, addrspace 5) ; MUBUF: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5) - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 4, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5) ; MUBUF: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 8, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5) ; MUBUF: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 12, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5) ; MUBUF: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 16, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5) ; MUBUF: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 20, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5) ; MUBUF: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 24, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5) ; MUBUF: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 28, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5) ; MUBUF: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 32, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5) ; MUBUF: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 36, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5) ; MUBUF: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 40, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5) ; MUBUF: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 44, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5) ; MUBUF: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 48, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5) ; MUBUF: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 52, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5) ; MUBUF: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 56, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5) ; MUBUF: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 60, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5) ; MUBUF: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 64, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5) ; MUBUF: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 68, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5) ; MUBUF: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 72, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5) ; MUBUF: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 76, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5) ; MUBUF: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 80, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5) ; MUBUF: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 84, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5) ; MUBUF: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 88, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5) ; MUBUF: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 92, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5) ; MUBUF: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 96, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5) ; MUBUF: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 100, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5) ; MUBUF: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 104, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5) ; MUBUF: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 108, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5) ; MUBUF: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 112, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5) ; MUBUF: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 116, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5) ; MUBUF: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 120, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5) ; MUBUF: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec :: (load 4 from %stack.0 + 124, addrspace 5) + ; MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5) ; MUBUF: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF: S_ENDPGM 0 ; MUBUF-V2A-LABEL: name: test_spill_a32 @@ -2853,132 +2853,132 @@ body: | ; FLATSCR-LABEL: name: test_spill_a32 ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 4, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 8, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 12, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 20, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 24, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 28, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 32, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 36, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 40, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 44, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 48, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 52, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 56, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 60, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 64, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 68, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 72, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 76, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 80, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 84, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 88, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 92, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 96, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 100, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 104, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 108, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 112, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 116, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 120, addrspace 5) ; FLATSCR: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5) - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 4, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5) ; FLATSCR: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5) ; FLATSCR: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 12, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5) ; FLATSCR: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 16, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5) ; FLATSCR: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 20, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5) ; FLATSCR: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 24, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5) ; FLATSCR: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 28, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5) ; FLATSCR: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 32, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5) ; FLATSCR: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 36, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5) ; FLATSCR: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 40, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 40, addrspace 5) ; FLATSCR: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 44, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 44, addrspace 5) ; FLATSCR: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 48, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 48, addrspace 5) ; FLATSCR: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 52, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 52, addrspace 5) ; FLATSCR: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 56, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 56, addrspace 5) ; FLATSCR: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 60, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 60, addrspace 5) ; FLATSCR: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 64, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 64, addrspace 5) ; FLATSCR: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 68, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 68, addrspace 5) ; FLATSCR: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 72, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 72, addrspace 5) ; FLATSCR: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 76, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 76, addrspace 5) ; FLATSCR: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 80, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 80, addrspace 5) ; FLATSCR: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 84, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 84, addrspace 5) ; FLATSCR: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 88, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 88, addrspace 5) ; FLATSCR: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 92, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 92, addrspace 5) ; FLATSCR: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 96, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 96, addrspace 5) ; FLATSCR: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 100, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 100, addrspace 5) ; FLATSCR: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 104, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 104, addrspace 5) ; FLATSCR: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 108, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 108, addrspace 5) ; FLATSCR: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 112, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 112, addrspace 5) ; FLATSCR: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 116, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 116, addrspace 5) ; FLATSCR: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 120, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 120, addrspace 5) ; FLATSCR: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0 + 124, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 124, addrspace 5) ; FLATSCR: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR: S_ENDPGM 0 ; FLATSCR-V2A-LABEL: name: test_spill_a32 @@ -3051,70 +3051,70 @@ body: | ; FLATSCR-V2A: S_ENDPGM 0 ; MUBUF-GFX90A-LABEL: name: test_spill_a32 ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 32, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 36, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 40, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 44, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 48, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 52, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 56, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 60, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 64, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 68, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 72, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 76, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 80, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 84, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 88, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 92, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 96, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 100, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 104, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 108, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 112, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 116, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 120, addrspace 5) - ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 4 into %stack.0 + 124, addrspace 5) - ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0, addrspace 5) - ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 4, addrspace 5) - ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 8, addrspace 5) - ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 12, addrspace 5) - ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 16, addrspace 5) - ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 20, addrspace 5) - ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 24, addrspace 5) - ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 28, addrspace 5) - ; MUBUF-GFX90A: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 32, addrspace 5) - ; MUBUF-GFX90A: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 36, addrspace 5) - ; MUBUF-GFX90A: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 40, addrspace 5) - ; MUBUF-GFX90A: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 44, addrspace 5) - ; MUBUF-GFX90A: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 48, addrspace 5) - ; MUBUF-GFX90A: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 52, addrspace 5) - ; MUBUF-GFX90A: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 56, addrspace 5) - ; MUBUF-GFX90A: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 60, addrspace 5) - ; MUBUF-GFX90A: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 64, addrspace 5) - ; MUBUF-GFX90A: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 68, addrspace 5) - ; MUBUF-GFX90A: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 72, addrspace 5) - ; MUBUF-GFX90A: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 76, addrspace 5) - ; MUBUF-GFX90A: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 80, addrspace 5) - ; MUBUF-GFX90A: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 84, addrspace 5) - ; MUBUF-GFX90A: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 88, addrspace 5) - ; MUBUF-GFX90A: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 92, addrspace 5) - ; MUBUF-GFX90A: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 96, addrspace 5) - ; MUBUF-GFX90A: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 100, addrspace 5) - ; MUBUF-GFX90A: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 104, addrspace 5) - ; MUBUF-GFX90A: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 108, addrspace 5) - ; MUBUF-GFX90A: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 112, addrspace 5) - ; MUBUF-GFX90A: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 116, addrspace 5) - ; MUBUF-GFX90A: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 120, addrspace 5) - ; MUBUF-GFX90A: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 4 from %stack.0 + 124, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 32, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 36, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 40, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 44, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 48, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 52, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 56, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 60, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 64, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 68, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 72, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 76, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 80, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 84, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 88, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 92, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 96, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 100, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 104, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 108, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 112, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 116, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 120, addrspace 5) + ; MUBUF-GFX90A: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5) + ; MUBUF-GFX90A: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0, addrspace 5) + ; MUBUF-GFX90A: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 4, addrspace 5) + ; MUBUF-GFX90A: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 8, addrspace 5) + ; MUBUF-GFX90A: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 12, addrspace 5) + ; MUBUF-GFX90A: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 16, addrspace 5) + ; MUBUF-GFX90A: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 20, addrspace 5) + ; MUBUF-GFX90A: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 24, addrspace 5) + ; MUBUF-GFX90A: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 28, addrspace 5) + ; MUBUF-GFX90A: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 32, addrspace 5) + ; MUBUF-GFX90A: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 36, addrspace 5) + ; MUBUF-GFX90A: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 40, addrspace 5) + ; MUBUF-GFX90A: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 44, addrspace 5) + ; MUBUF-GFX90A: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 48, addrspace 5) + ; MUBUF-GFX90A: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 52, addrspace 5) + ; MUBUF-GFX90A: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 56, addrspace 5) + ; MUBUF-GFX90A: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 60, addrspace 5) + ; MUBUF-GFX90A: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 64, addrspace 5) + ; MUBUF-GFX90A: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 68, addrspace 5) + ; MUBUF-GFX90A: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 72, addrspace 5) + ; MUBUF-GFX90A: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 76, addrspace 5) + ; MUBUF-GFX90A: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 80, addrspace 5) + ; MUBUF-GFX90A: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 84, addrspace 5) + ; MUBUF-GFX90A: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 88, addrspace 5) + ; MUBUF-GFX90A: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 92, addrspace 5) + ; MUBUF-GFX90A: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 96, addrspace 5) + ; MUBUF-GFX90A: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 100, addrspace 5) + ; MUBUF-GFX90A: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 104, addrspace 5) + ; MUBUF-GFX90A: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 108, addrspace 5) + ; MUBUF-GFX90A: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 112, addrspace 5) + ; MUBUF-GFX90A: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 116, addrspace 5) + ; MUBUF-GFX90A: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 120, addrspace 5) + ; MUBUF-GFX90A: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0 + 124, addrspace 5) ; MUBUF-GFX90A: S_ENDPGM 0 ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a32 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 @@ -3186,22 +3186,22 @@ body: | ; MUBUF-GFX90A-V2A: S_ENDPGM 0 ; FLATSCR-GFX90A-LABEL: name: test_spill_a32 ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5) - ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5) - ; FLATSCR-GFX90A: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 64, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 80, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 96, align 4, addrspace 5) + ; FLATSCR-GFX90A: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0 + 16, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0 + 32, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0 + 48, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0 + 64, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0 + 80, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0 + 96, align 4, addrspace 5) + ; FLATSCR-GFX90A: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0 + 112, align 4, addrspace 5) ; FLATSCR-GFX90A: S_ENDPGM 0 ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a32 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 @@ -3272,8 +3272,8 @@ body: | ; FLATSCR-GFX90A-V2A: $agpr31 = V_ACCVGPR_WRITE_B32_e64 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A: S_ENDPGM 0 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF - SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store 128 into %stack.0, align 4, addrspace 5) - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5) + SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir index 8d4dbb7149d25..573c7045adab6 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir @@ -29,18 +29,18 @@ body: | ; CHECK: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr5 = S_MOV_B32 524288 - ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5) + ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5) ; CHECK: S_BRANCH %bb.1 ; CHECK: bb.1: ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr4 = S_MOV_B32 524288 - ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5) + ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5) ; CHECK: S_ENDPGM 0, implicit $vgpr0 bb.0: - $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) S_BRANCH %bb.1 bb.1: - $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) S_ENDPGM 0, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index 5647d9271739c..1e7e1d99fd8ad 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -30,7 +30,7 @@ body: | ; CHECK: liveins: $vgpr1, $vgpr2 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc @@ -48,7 +48,7 @@ body: | ; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc - ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir index 7ca2fa6e73bfc..16cca9b1584de 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir @@ -26,7 +26,7 @@ body: | ; MUBUF: liveins: $vgpr1, $vgpr2 ; MUBUF: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; MUBUF: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc - ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; MUBUF: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; MUBUF: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc @@ -41,14 +41,14 @@ body: | ; MUBUF: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; MUBUF: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; MUBUF: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc - ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; MUBUF: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; MUBUF: S_ENDPGM 0, implicit $vcc ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs ; FLATSCR: liveins: $vgpr1, $vgpr2 ; FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; FLATSCR: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc @@ -63,7 +63,7 @@ body: | ; FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc - ; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5) + ; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; FLATSCR: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir index 37af668f460ec..ed341bcdeeafb 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -24,7 +24,7 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei ; CHECK: liveins: $vgpr1, $vgpr2 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; CHECK: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc @@ -36,7 +36,7 @@ body: | ; CHECK: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -524288, implicit-def $scc ; CHECK: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; CHECK: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.2, addrspace 5) + ; CHECK: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) ; CHECK: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir index 9f2c37de31a6d..981643f141342 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -27,7 +27,7 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 ; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc - ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX8: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc @@ -35,7 +35,7 @@ body: | ; GFX8: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc - ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8: $vcc_lo = S_MOV_B32 8192 ; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec @@ -44,16 +44,16 @@ body: | ; GFX8: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX8: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc - ; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX8: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc - ; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) + ; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) ; GFX8: S_ENDPGM 0, csr_amdgpu_allvgprs ; GFX9-LABEL: name: pei_scavenge_vgpr_spill ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 ; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX9: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc - ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX9: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc @@ -61,7 +61,7 @@ body: | ; GFX9: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def $scc ; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX9: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc - ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; GFX9: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX9: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec ; GFX9: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec @@ -69,16 +69,16 @@ body: | ; GFX9: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX9: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc - ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc - ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) + ; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) ; GFX9: S_ENDPGM 0, csr_amdgpu_allvgprs ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 ; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX9-FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc - ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) + ; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc @@ -91,7 +91,7 @@ body: | ; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0 ; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX9-FLATSCR: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc - ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5) + ; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) ; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir index 509af0a08b7f4..d465e9cbd6b47 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir @@ -46,7 +46,7 @@ body: | %15:sreg_32_xm0 = S_MOV_B32 61440 %16:sreg_32_xm0 = S_MOV_B32 -1 %17:sgpr_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3 - BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) %19:vgpr_32 = COPY %4 %20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec S_BRANCH %bb.3 diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir index 3d34eb01699a5..27703b949ceee 100644 --- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir +++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir @@ -56,12 +56,12 @@ body: | ; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec ; GCN: } ; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec { - ; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) - ; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) + ; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + ; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) ; GCN: } ; GCN: BUNDLE implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec { - ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16) - ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16) + ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) + ; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) ; GCN: } ; GCN: S_NOP 0 ; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0 @@ -101,10 +101,10 @@ body: | $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr2, 0, 0, 0, 0, implicit $exec BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec - $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) - $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) - IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16) - IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store 16) + $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) + IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, implicit $exec :: (store (s128)) S_NOP 0 $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0 $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 128, 0 diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir index 33bb91e7b2c29..98f3d2d1e237d 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir @@ -185,9 +185,9 @@ body: | bb.28: %9 = S_FF1_I32_B32 undef %10 %13 = V_MAD_U32_U24_e64 killed %9, 48, 32, 0, implicit $exec - %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) %46 = V_AND_B32_e32 1, killed %45, implicit $exec - %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0 :: (dereferenceable invariant load 4) + %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0 :: (dereferenceable invariant load (s32)) %25 = nofpexcept V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $mode, implicit $exec %26 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %25, implicit $exec %62 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir b/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir index 069a21509c074..5c84aa2c84d12 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir @@ -56,8 +56,8 @@ body: | %3 = COPY killed $vgpr0 %0 = COPY killed $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORD_IMM killed %0, 13, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORD_IMM killed %0, 13, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) %18 = V_ASHRREV_I32_e32 31, %3, implicit $exec undef %19.sub0 = COPY killed %3 %19.sub1 = COPY killed %18 @@ -70,7 +70,7 @@ body: | %13.sub2_sub3 = COPY killed %12 %20 = V_LSHL_B64_e64 killed %19, 2, implicit $exec %16 = COPY killed %5 - BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) + BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir b/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir index dd5b4a9ba4383..9e4b076699279 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir @@ -16,7 +16,7 @@ body: | %23:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 killed %21, implicit $mode, implicit $exec %108:vgpr_32 = V_LSHRREV_B32_e32 4, killed %23, implicit $exec undef %109.sub1:vreg_128 = COPY %108 - %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sgpr_128, 3044, 0 :: (dereferenceable invariant load 4) + %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sgpr_128, 3044, 0 :: (dereferenceable invariant load (s32)) S_CMP_EQ_U32 killed %28, 0, implicit-def $scc S_CBRANCH_SCC0 %bb.2, implicit killed $scc @@ -47,7 +47,7 @@ body: | S_BRANCH %bb.6 bb.6: - %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sgpr_128, 2708, 0 :: (dereferenceable invariant load 4) + %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sgpr_128, 2708, 0 :: (dereferenceable invariant load (s32)) %39:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32_e64 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $mode, implicit $exec %40:vgpr_32 = nofpexcept V_MAD_F32_e64 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $mode, implicit $exec %41:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $mode, implicit $exec @@ -83,7 +83,7 @@ body: | S_BRANCH %bb.8 bb.8: - dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sgpr_128, 2704, 0 :: (dereferenceable invariant load 4) + dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sgpr_128, 2704, 0 :: (dereferenceable invariant load (s32)) %138:vreg_128 = COPY killed %111 bb.9: diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir index 7a78608de113d..46ce3930ce5c6 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir @@ -27,7 +27,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:vreg_512 = COPY %0 ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), align 8, addrspace 5) ; CHECK: dead %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec ; CHECK: dead %8:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec ; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec @@ -51,7 +51,7 @@ body: | %4:vreg_512 = COPY %0 bb.1: - BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5) + BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), align 8, addrspace 5) %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec %8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec %9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir index 142fe4e3d4d6d..430ce95c334f1 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -200,9 +200,9 @@ body: | %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 - %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0 %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0 %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0 @@ -239,7 +239,7 @@ body: | undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0, 0, implicit $exec %42:vgpr_32 = COPY %33 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec - %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, implicit $exec :: (load 8 from %ir.tmp34) + %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34) undef %45.sub1:vreg_64 = IMPLICIT_DEF %45.sub0:vreg_64 = COPY %37.sub1 %46:vreg_64 = V_LSHLREV_B64_e64 3, %45, implicit $exec @@ -247,7 +247,7 @@ body: | %49:vgpr_32 = COPY %33 %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec %51:vreg_64 = IMPLICIT_DEF - undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, implicit $exec :: (load 4 from %ir.18 + 8) + undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8) %52.sub1:vreg_64 = IMPLICIT_DEF %53:vreg_64 = V_LSHLREV_B64_e64 3, %52, implicit $exec undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0, 0, implicit $exec @@ -258,14 +258,14 @@ body: | %59:sreg_64 = IMPLICIT_DEF %60:sreg_32_xm0 = S_ADD_U32 %5.sub0, %59.sub0, implicit-def $scc %61:sgpr_32 = S_ADDC_U32 %5.sub1, %59.sub1, implicit-def dead $scc, implicit killed $scc - %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4) + %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4) undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec %63.sub0:vreg_64 = COPY %62.sub0 %64:vreg_64 = IMPLICIT_DEF undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %64.sub0, 0, implicit $exec %67:vgpr_32 = COPY %61 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec - %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, implicit $exec :: (load 16 from %ir.tmp58) + %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58) undef %70.sub1:vreg_64 = IMPLICIT_DEF %70.sub0:vreg_64 = IMPLICIT_DEF %71:vreg_64 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir index 27e3412967134..5981bc509cc73 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir @@ -25,8 +25,8 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $sgpr4_sgpr5 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) - ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5329 ; CHECK: undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -37,10 +37,10 @@ body: | ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead %11 - ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) + ; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 3) ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %15, 851978 /* regdef:VGPR_LO16 */, def %16 ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec ; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec @@ -49,10 +49,10 @@ body: | ; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:VGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:VGPR_LO16 */, %15, 851977 /* reguse:VGPR_LO16 */, %16, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_2]] ; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] - ; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) - ; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) - ; CHECK: DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) - ; CHECK: undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store (s32), addrspace 3) + ; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store (s32), addrspace 3) + ; CHECK: DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3) + ; CHECK: undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[DEF2]], implicit $exec ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 ; CHECK: [[DEF]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] @@ -68,19 +68,19 @@ body: | ; CHECK: undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec ; CHECK: undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec ; CHECK: %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec - ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0 :: (load 4, addrspace 1) + ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0 :: (load (s32), addrspace 1) ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3) - ; CHECK: GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, implicit $exec :: (store 4, addrspace 1) + ; CHECK: [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load (s32), addrspace 3) + ; CHECK: GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, implicit $exec :: (store (s32), addrspace 1) ; CHECK: %31.sub0:vreg_64 = COPY [[S_LOAD_DWORD_IMM]], implicit $exec - ; CHECK: DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store (s64), addrspace 3) ; CHECK: S_BRANCH %bb.1 bb.0: liveins: $sgpr4_sgpr5 %0:sgpr_64(p4) = COPY $sgpr4_sgpr5 - %1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) - %3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + %1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) + %3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) %4:sreg_32_xm0 = S_MOV_B32 5329 undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -91,8 +91,8 @@ body: | bb.1: INLINEASM &"", 1, 851978, def %11:vgpr_32 - GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, implicit $exec :: (store 4, addrspace 1) - %13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) + GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1) + %13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 3) INLINEASM &"def $0 $1", 1, 851978, def %15:vgpr_32, 851978, def %16:vgpr_32 %17:vgpr_32 = DS_READ_B32_gfx9 %6, 0, 0, implicit $exec %18:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec @@ -105,10 +105,10 @@ body: | %26:sreg_64_xexec = V_CMP_GT_U32_e64 64, %25, implicit $exec %27:vgpr_32 = V_MOV_B32_e32 0, implicit $exec INLINEASM &"", 1, 851978, def dead %24, 851978, def dead %27, 851977, %13.sub0, 2147483657, %24(tied-def 3), 2147549193, %27(tied-def 5), 851977, %15, 851977, %16, 851977, %18, 851977, %17, 851977, %23, 851977, %19 - DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) - DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) - DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) - undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store (s32), addrspace 3) + DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3) + undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %33:vgpr_32 = V_MUL_LO_U32_e64 %25, %4, implicit $exec %10:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %25, %26, implicit $exec %34:vgpr_32 = V_SUB_U32_e32 %33, %9, implicit $exec @@ -122,12 +122,12 @@ body: | undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec - %43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0 :: (load 4, addrspace 1) + %43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0 :: (load (s32), addrspace 1) INLINEASM &"", 1 - %44:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3) - GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, implicit $exec :: (store 4, addrspace 1) + %44:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load (s32), addrspace 3) + GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, implicit $exec :: (store (s32), addrspace 1) %31.sub0:vreg_64 = COPY %43, implicit $exec - DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3) + DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store (s64), addrspace 3) S_BRANCH %bb.1 ... diff --git a/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir b/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir index b5163239c1022..c7687b7f05665 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir +++ b/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir @@ -12,15 +12,15 @@ body: | ; CHECK-LABEL: name: denorm_mode_not_barrier ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4) - ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load 4) + ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32)) + ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load (s32)) ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD1]], implicit $exec ; CHECK: S_DENORM_MODE 0, implicit-def $mode, implicit $mode ; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e32_]] %0:vreg_64 = COPY $vgpr0_vgpr1 - %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load 4) + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32)) S_DENORM_MODE 0, implicit-def $mode, implicit $mode - %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load 4) + %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32)) %3:vgpr_32 = V_ADD_U32_e32 %1, %2, implicit $exec S_ENDPGM 0, implicit %3 ... @@ -35,15 +35,15 @@ body: | ; CHECK-LABEL: name: round_mode_not_barrier ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4) - ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load 4) + ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32)) + ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load (s32)) ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD1]], implicit $exec ; CHECK: S_ROUND_MODE 0, implicit-def $mode, implicit $mode ; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e32_]] %0:vreg_64 = COPY $vgpr0_vgpr1 - %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load 4) + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32)) S_ROUND_MODE 0, implicit-def $mode, implicit $mode - %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load 4) + %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32)) %3:vgpr_32 = V_ADD_U32_e32 %1, %2, implicit $exec S_ENDPGM 0, implicit %3 ... @@ -58,17 +58,17 @@ body: | ; CHECK-LABEL: name: denorm_mode_mode_def_use ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4) - ; CHECK: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load 4) + ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32)) + ; CHECK: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load (s32)) ; CHECK: S_DENORM_MODE 0, implicit-def $mode, implicit $mode ; CHECK: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 0, [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORD]], [[V_ADD_F32_e32_]], implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_ADD_F32_e32_]], implicit [[V_ADD_U32_e32_]] %0:vreg_64 = COPY $vgpr0_vgpr1 - %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load 4) + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32)) S_DENORM_MODE 0, implicit-def $mode, implicit $mode %2:vgpr_32 = V_ADD_F32_e32 0, %1, implicit $mode, implicit $exec - %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load 4) + %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32)) %4:vgpr_32 = V_ADD_U32_e32 %1, %2, implicit $exec S_ENDPGM 0, implicit %2, implicit %4 ... @@ -83,17 +83,17 @@ body: | ; CHECK-LABEL: name: round_mode_mode_def_use ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4) - ; CHECK: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load 4) + ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load (s32)) + ; CHECK: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4, 0, implicit $exec :: (load (s32)) ; CHECK: S_ROUND_MODE 0, implicit-def $mode, implicit $mode ; CHECK: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 0, [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORD]], [[V_ADD_F32_e32_]], implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_ADD_F32_e32_]], implicit [[V_ADD_U32_e32_]] %0:vreg_64 = COPY $vgpr0_vgpr1 - %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load 4) + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec :: (load (s32)) S_ROUND_MODE 0, implicit-def $mode, implicit $mode %2:vgpr_32 = V_ADD_F32_e32 0, %1, implicit $mode, implicit $exec - %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load 4) + %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec :: (load (s32)) %4:vgpr_32 = V_ADD_U32_e32 %1, %2, implicit $exec S_ENDPGM 0, implicit %2, implicit %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir index b4a2ce11d2fd2..5cd5fbf06aa42 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir +++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -47,7 +47,7 @@ body: | liveins: $sgpr4_sgpr5 %1 = COPY $sgpr4_sgpr5 - %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) $m0 = S_MOV_B32 -1 %7 = COPY %5 %6 = DS_READ_B32 %7, 0, 0, implicit $m0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir index 3d299ad5157b6..2dbe91499cf4b 100644 --- a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir @@ -19,32 +19,32 @@ body: | ; GCN-LABEL: name: handleMove_bundle ; GCN: liveins: $sgpr4_sgpr5 ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4) + ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store 4, addrspace 3) + ; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store (s32), addrspace 3) ; GCN: $vgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GCN: $m0 = S_MOV_B32 0 ; GCN: BUNDLE implicit $vgpr0, implicit $m0, implicit $exec { - ; GCN: DS_GWS_INIT $vgpr0, 11, implicit $m0, implicit $exec :: (store 4) + ; GCN: DS_GWS_INIT $vgpr0, 11, implicit $m0, implicit $exec :: (store (s32)) ; GCN: S_WAITCNT 0 ; GCN: } - ; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec :: (store 4, addrspace 3) + ; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec :: (store (s32), addrspace 3) ; GCN: S_ENDPGM 0 %2:sgpr_64 = COPY $sgpr4_sgpr5 - %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %2, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4) + %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %2, 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) %6:vgpr_32 = V_MOV_B32_e32 1, implicit $exec %7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - DS_WRITE_B32_gfx9 %7, %6, 0, 0, implicit $exec :: (store 4, addrspace 3) + DS_WRITE_B32_gfx9 %7, %6, 0, 0, implicit $exec :: (store (s32), addrspace 3) $m0 = S_MOV_B32 0 $vgpr0 = COPY %5 BUNDLE implicit killed $vgpr0, implicit $m0, implicit $exec { - DS_GWS_INIT $vgpr0, 11, implicit $m0, implicit $exec :: (store 4) + DS_GWS_INIT $vgpr0, 11, implicit $m0, implicit $exec :: (store (s32)) S_WAITCNT 0 } %8:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - DS_WRITE_B32_gfx9 %7, %8, 0, 0, implicit $exec :: (store 4, addrspace 3) + DS_WRITE_B32_gfx9 %7, %8, 0, 0, implicit $exec :: (store (s32), addrspace 3) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir index 37d78afc88a1b..0612de0122c3d 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir @@ -37,11 +37,11 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %12 = S_MOV_B32 123 %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec %11 = V_ADD_CO_U32_e32 %12, killed %10, implicit-def $vcc, implicit $exec - FLAT_STORE_DWORD %0, %11, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %11, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -80,9 +80,9 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec %11 = V_TRUNC_F32_e64 0, killed %10, 1, 2, implicit $mode, implicit $exec, implicit-def $vcc - FLAT_STORE_DWORD %0, %11, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %11, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir index c642f5729ce3f..ca0e8238e2fe6 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir @@ -29,19 +29,19 @@ body: | %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %173:vgpr_32, %175:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %171, 0, implicit $exec %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %172, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %172, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -77,13 +77,13 @@ body: | %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -113,7 +113,7 @@ body: | %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -143,7 +143,7 @@ body: | %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -172,7 +172,7 @@ body: | %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -201,7 +201,7 @@ body: | %30:vreg_64 = COPY $sgpr0_sgpr1 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -232,7 +232,7 @@ body: | %30:vreg_64 = COPY $sgpr0_sgpr1 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -263,7 +263,7 @@ body: | %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %31:vreg_64 = COPY $vcc %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -294,7 +294,7 @@ body: | %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %31:vreg_64 = COPY $vcc %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -326,7 +326,7 @@ body: | %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -357,7 +357,7 @@ body: | %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) ... @@ -387,5 +387,5 @@ body: | %31:vreg_64 = COPY killed $vcc %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64)) diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir index 56f254196f375..dcb51fcb76653 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir @@ -80,7 +80,7 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %5 = S_MOV_B32 65535 %6 = S_MOV_B32 65535 @@ -130,7 +130,7 @@ body: | %100 = V_MOV_B32_e32 %48, implicit $exec - FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -227,7 +227,7 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %5 = S_MOV_B32 65535 %6 = S_MOV_B32 65535 @@ -286,7 +286,7 @@ body: | %100 = V_MOV_B32_e32 %60, implicit $exec - FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir index 61c9afbc612e5..a2cad1398bcd1 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir @@ -89,7 +89,7 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %5 = S_MOV_B32 65535 %6 = S_MOV_B32 65535 @@ -139,7 +139,7 @@ body: | %100 = V_MOV_B32_e32 %48, implicit $exec - FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -256,7 +256,7 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %5 = S_MOV_B32 65535 %6 = S_MOV_B32 65535 @@ -315,7 +315,7 @@ body: | %100 = V_MOV_B32_e32 %60, implicit $exec - FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -400,7 +400,7 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %5 = S_MOV_B32 65535 %6 = S_MOV_B32 65535 @@ -441,7 +441,7 @@ body: | %100 = V_MOV_B32_e32 $vcc_lo, implicit $exec - FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %100, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 ... diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir index 6047643339869..04b727bb2266a 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir @@ -36,8 +36,8 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %5 = V_AND_B32_e32 65535, %3, implicit $exec %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec @@ -51,7 +51,7 @@ body: | %13 = V_OR_B32_e64 %10, %12, implicit $exec - FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -88,14 +88,14 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec %10:sreg_32_xm0 = S_MOV_B32 255 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0) - FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) S_ENDPGM 0 ... @@ -131,14 +131,14 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) + %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec %10:sreg_32_xm0 = S_MOV_B32 65535 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0) - FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir index 45f1f7334668f..b79034ec91fe4 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir @@ -203,7 +203,7 @@ body: | liveins: $sgpr4_sgpr5 %4 = COPY $sgpr4_sgpr5 - %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %8 = S_MOV_B64 0 %7 = COPY %9 %30 = V_MOV_B32_e32 1, implicit $exec @@ -221,26 +221,26 @@ body: | %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc %16 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1 %18 = COPY %16 - %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45) + %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.uglygep45) %60 = V_BFE_U32_e64 %17, 8, 8, implicit $exec %61 = V_LSHLREV_B32_e32 2, killed %60, implicit $exec %70 = V_ADD_CO_U32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec %66 = COPY %13 %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %67 = REG_SEQUENCE %70, %subreg.sub0, killed %65, %subreg.sub1 - FLAT_STORE_DWORD %67, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9) + FLAT_STORE_DWORD %67, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tmp9) %37 = S_ADD_U32 %14, 4, implicit-def $scc %38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc %71 = COPY killed %37 %72 = COPY killed %38 %41 = REG_SEQUENCE killed %71, %subreg.sub0, killed %72, %subreg.sub1 - %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep) + %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.scevgep) %73 = V_BFE_U32_e64 %40, 8, 8, implicit $exec %74 = V_LSHLREV_B32_e32 2, killed %73, implicit $exec %83 = V_ADD_CO_U32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %80 = REG_SEQUENCE %83, %subreg.sub0, killed %78, %subreg.sub1 - FLAT_STORE_DWORD %80, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17) + FLAT_STORE_DWORD %80, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tmp17) %55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc %57 = REG_SEQUENCE %55, %subreg.sub0, killed %56, %subreg.sub1 @@ -365,7 +365,7 @@ body: | liveins: $sgpr4_sgpr5 %4 = COPY $sgpr4_sgpr5 - %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) %8 = S_MOV_B64 0 %7 = COPY %9 %30 = V_MOV_B32_e32 1, implicit $exec @@ -384,26 +384,26 @@ body: | %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc %16 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1 %18 = COPY %16 - %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45) + %17 = FLAT_LOAD_DWORD %18, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.uglygep45) %60 = V_BFE_U32_e64 %17, 8, 8, implicit $exec %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit $exec %70 = V_ADD_CO_U32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec %66 = COPY %13 %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %67 = REG_SEQUENCE %70, %subreg.sub0, killed %65, %subreg.sub1 - FLAT_STORE_DWORD %67, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9) + FLAT_STORE_DWORD %67, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tmp9) %37 = S_ADD_U32 %14, 4, implicit-def $scc %38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc %71 = COPY killed %37 %72 = COPY killed %38 %41 = REG_SEQUENCE killed %71, %subreg.sub0, killed %72, %subreg.sub1 - %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep) + %40 = FLAT_LOAD_DWORD killed %41, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.scevgep) %73 = V_BFE_U32_e64 %40, 8, 8, implicit $exec %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit $exec %83 = V_ADD_CO_U32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %80 = REG_SEQUENCE %83, %subreg.sub0, killed %78, %subreg.sub1 - FLAT_STORE_DWORD %80, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17) + FLAT_STORE_DWORD %80, %30, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.tmp17) %55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc %57 = REG_SEQUENCE %55, %subreg.sub0, killed %56, %subreg.sub1 diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir index 1fd89e43f33ea..876fa6f5f2744 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir @@ -41,7 +41,7 @@ body: | %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32)) %12 = V_LSHRREV_B32_e64 16, %3, implicit $exec %13 = V_BCNT_U32_B32_e64 %3, killed %12, implicit-def $vcc, implicit $exec @@ -56,6 +56,6 @@ body: | %19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec %20 = V_MOV_B32_e64 %19, implicit $exec - FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %20, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32)) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir index f2da7c1001e6f..8b428f482bfc9 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir @@ -23,7 +23,7 @@ body: | ; CHECK: liveins: $sgpr4, $vgpr0 ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5 - SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ... @@ -47,6 +47,6 @@ body: | ; CHECK: liveins: $sgpr5, $vgpr0 ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 ; CHECK: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5 - SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir index ce47ba67ce777..7ad302e3bf79d 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir @@ -33,16 +33,16 @@ # SHARE: stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true, # SHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -# SHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store 4 into %stack.2, addrspace 5) -# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) -# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.1, align 4, addrspace 5) -# SHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5) +# SHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5) +# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) +# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5) +# SHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5) # SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu_highregs, implicit undef $vgpr0 -# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5) -# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) -# SHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5) +# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5) +# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) +# SHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5) # SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu_highregs, implicit $vgpr0 -# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5) +# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5) # NOSHARE: stack: # NOSHARE: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, @@ -58,17 +58,17 @@ # NOSHARE: stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true, # NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store 4 into %stack.2, addrspace 5) -# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) -# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.1, align 4, addrspace 5) -# NOSHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5) +# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5) +# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) +# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.1, align 4, addrspace 5) +# NOSHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5) # NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu_highregs, implicit undef $vgpr0 -# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5) -# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store 4 into %stack.3, addrspace 5) -# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) -# NOSHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5) +# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5) +# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.3, addrspace 5) +# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) +# NOSHARE: renamable $sgpr4_sgpr5 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.1, align 4, addrspace 5) # NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu_highregs, implicit $vgpr0 -# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.3, addrspace 5) +# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.3, addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir index cdaf6094a32bb..430970d4168bb 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -66,89 +66,89 @@ body: | ; GCN32: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32: renamable $sgpr12 = IMPLICIT_DEF ; GCN32: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0 - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 -1 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0 ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: renamable $sgpr12 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 -1 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 7, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 -1 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 15, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 -1 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 @@ -156,14 +156,14 @@ body: | ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 31, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 -1 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 @@ -174,14 +174,14 @@ body: | ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 255, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 -1 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 @@ -200,14 +200,14 @@ body: | ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 65535, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 -1 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12 ; GCN32: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 @@ -242,9 +242,9 @@ body: | ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN32: $sgpr64 = S_OR_SAVEEXEC_B32 4294967295, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) + ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 -1 - ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5) ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr64 ; FLATSCR-LABEL: name: check_spill ; FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 @@ -253,81 +253,81 @@ body: | ; FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF ; FLATSCR: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo ; FLATSCR: $exec_lo = S_MOV_B32 1 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $exec_lo = S_MOV_B32 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0 ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo ; FLATSCR: $exec_lo = S_MOV_B32 1 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 3 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 3 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 7 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.2, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 @@ -335,14 +335,14 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 15 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 @@ -351,14 +351,14 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 31 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 @@ -370,14 +370,14 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 255 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.5, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 @@ -397,14 +397,14 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 65535 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.6, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 @@ -440,9 +440,9 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; FLATSCR: $sgpr64_sgpr65 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 4294967295 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.7, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr64_sgpr65 ; GCN64-MUBUF-LABEL: name: check_spill ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 @@ -456,74 +456,74 @@ body: | ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 7, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 15, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 31, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 255, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 @@ -532,13 +532,13 @@ body: | ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 65535, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 @@ -555,13 +555,13 @@ body: | ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 @@ -594,17 +594,17 @@ body: | ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 ; GCN64-MUBUF: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def $scc - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, align 4096, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, align 4096, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN32-MUBUF-LABEL: name: check_spill ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 @@ -618,74 +618,74 @@ body: | ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 @@ -694,13 +694,13 @@ body: | ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 @@ -717,13 +717,13 @@ body: | ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 @@ -756,17 +756,17 @@ body: | ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 ; GCN32-MUBUF: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def $scc - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, align 4096, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, align 4096, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN64-FLATSCR-LABEL: name: check_spill ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 @@ -776,74 +776,74 @@ body: | ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 7, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.2, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 15, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 31, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.4, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 255, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 @@ -852,13 +852,13 @@ body: | ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.5, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 65535, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 @@ -875,13 +875,13 @@ body: | ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.6, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 @@ -914,17 +914,17 @@ body: | ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.7, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 ; GCN64-FLATSCR: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def $scc - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, align 4096, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 renamable $sgpr12 = IMPLICIT_DEF SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 @@ -1009,53 +1009,53 @@ body: | ; GCN64-MUBUF: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 ; GCN64-MUBUF: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 7, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.2, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 ; GCN64-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-MUBUF: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 15, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-MUBUF: $sgpr14 = V_READLANE_B32 $vgpr0, 2 ; GCN64-MUBUF: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 31, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-MUBUF: $sgpr14 = V_READLANE_B32 $vgpr0, 2 ; GCN64-MUBUF: $sgpr15 = V_READLANE_B32 $vgpr0, 3 ; GCN64-MUBUF: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 255, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, 0, implicit $exec :: (load 4 from %stack.5, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-MUBUF: $sgpr14 = V_READLANE_B32 $vgpr0, 2 @@ -1064,12 +1064,12 @@ body: | ; GCN64-MUBUF: $sgpr17 = V_READLANE_B32 $vgpr0, 5 ; GCN64-MUBUF: $sgpr18 = V_READLANE_B32 $vgpr0, 6 ; GCN64-MUBUF: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 65535, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-MUBUF: $sgpr14 = V_READLANE_B32 $vgpr0, 2 @@ -1086,12 +1086,12 @@ body: | ; GCN64-MUBUF: $sgpr25 = V_READLANE_B32 $vgpr0, 13 ; GCN64-MUBUF: $sgpr26 = V_READLANE_B32 $vgpr0, 14 ; GCN64-MUBUF: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, 0, implicit $exec :: (load 4 from %stack.7, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) ; GCN64-MUBUF: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-MUBUF: $sgpr65 = V_READLANE_B32 $vgpr0, 1 ; GCN64-MUBUF: $sgpr66 = V_READLANE_B32 $vgpr0, 2 @@ -1124,15 +1124,15 @@ body: | ; GCN64-MUBUF: $sgpr93 = V_READLANE_B32 $vgpr0, 29 ; GCN64-MUBUF: $sgpr94 = V_READLANE_B32 $vgpr0, 30 ; GCN64-MUBUF: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def $scc - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, align 4096, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, align 4096, addrspace 5) ; GCN64-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN32-MUBUF-LABEL: name: check_reload ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 @@ -1145,53 +1145,53 @@ body: | ; GCN32-MUBUF: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 ; GCN32-MUBUF: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, implicit $exec :: (load 4 from %stack.2, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 ; GCN32-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN32-MUBUF: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN32-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN32-MUBUF: $sgpr14 = V_READLANE_B32 $vgpr0, 2 ; GCN32-MUBUF: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN32-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN32-MUBUF: $sgpr14 = V_READLANE_B32 $vgpr0, 2 ; GCN32-MUBUF: $sgpr15 = V_READLANE_B32 $vgpr0, 3 ; GCN32-MUBUF: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, implicit $exec :: (load 4 from %stack.5, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN32-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN32-MUBUF: $sgpr14 = V_READLANE_B32 $vgpr0, 2 @@ -1200,12 +1200,12 @@ body: | ; GCN32-MUBUF: $sgpr17 = V_READLANE_B32 $vgpr0, 5 ; GCN32-MUBUF: $sgpr18 = V_READLANE_B32 $vgpr0, 6 ; GCN32-MUBUF: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN32-MUBUF: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN32-MUBUF: $sgpr14 = V_READLANE_B32 $vgpr0, 2 @@ -1222,12 +1222,12 @@ body: | ; GCN32-MUBUF: $sgpr25 = V_READLANE_B32 $vgpr0, 13 ; GCN32-MUBUF: $sgpr26 = V_READLANE_B32 $vgpr0, 14 ; GCN32-MUBUF: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, implicit $exec :: (load 4 from %stack.7, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) ; GCN32-MUBUF: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN32-MUBUF: $sgpr65 = V_READLANE_B32 $vgpr0, 1 ; GCN32-MUBUF: $sgpr66 = V_READLANE_B32 $vgpr0, 2 @@ -1260,15 +1260,15 @@ body: | ; GCN32-MUBUF: $sgpr93 = V_READLANE_B32 $vgpr0, 29 ; GCN32-MUBUF: $sgpr94 = V_READLANE_B32 $vgpr0, 30 ; GCN32-MUBUF: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 - ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def $scc - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, align 4096, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, align 4096, addrspace 5) ; GCN32-MUBUF: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN64-FLATSCR-LABEL: name: check_reload ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 @@ -1277,53 +1277,53 @@ body: | ; GCN64-FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.1, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 ; GCN64-FLATSCR: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 7, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.2, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 ; GCN64-FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-FLATSCR: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 15, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-FLATSCR: $sgpr14 = V_READLANE_B32 $vgpr0, 2 ; GCN64-FLATSCR: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 31, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.4, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; GCN64-FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-FLATSCR: $sgpr14 = V_READLANE_B32 $vgpr0, 2 ; GCN64-FLATSCR: $sgpr15 = V_READLANE_B32 $vgpr0, 3 ; GCN64-FLATSCR: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 255, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.5, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; GCN64-FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-FLATSCR: $sgpr14 = V_READLANE_B32 $vgpr0, 2 @@ -1332,12 +1332,12 @@ body: | ; GCN64-FLATSCR: $sgpr17 = V_READLANE_B32 $vgpr0, 5 ; GCN64-FLATSCR: $sgpr18 = V_READLANE_B32 $vgpr0, 6 ; GCN64-FLATSCR: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 65535, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.6, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; GCN64-FLATSCR: $sgpr14 = V_READLANE_B32 $vgpr0, 2 @@ -1354,12 +1354,12 @@ body: | ; GCN64-FLATSCR: $sgpr25 = V_READLANE_B32 $vgpr0, 13 ; GCN64-FLATSCR: $sgpr26 = V_READLANE_B32 $vgpr0, 14 ; GCN64-FLATSCR: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.7, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5) ; GCN64-FLATSCR: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN64-FLATSCR: $sgpr65 = V_READLANE_B32 $vgpr0, 1 ; GCN64-FLATSCR: $sgpr66 = V_READLANE_B32 $vgpr0, 2 @@ -1392,15 +1392,15 @@ body: | ; GCN64-FLATSCR: $sgpr93 = V_READLANE_B32 $vgpr0, 29 ; GCN64-FLATSCR: $sgpr94 = V_READLANE_B32 $vgpr0, 30 ; GCN64-FLATSCR: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0 - ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def $scc - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, align 4096, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5) ; GCN64-FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir index 1bb852fc0f2e0..793a87f2187a1 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir @@ -75,7 +75,7 @@ body: | # Make sure there's no assert when looking at the implicit use on S_ENDPGM # GCN-LABEL: name: s_to_v_copy_implicit_use -# GCN: %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %1:sreg_64, 0, 0 :: (load 4, addrspace 4) +# GCN: %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %1:sreg_64, 0, 0 :: (load (s32), addrspace 4) # GCN-NEXT: %2:vgpr_32 = COPY %0 # GCN-NEXT: S_ENDPGM 0, implicit %2 --- @@ -83,7 +83,7 @@ name: s_to_v_copy_implicit_use tracksRegLiveness: true body: | bb.0: - %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %2:sreg_64, 0, 0 :: (load 4, addrspace 4) + %0:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %2:sreg_64, 0, 0 :: (load (s32), addrspace 4) %1:vgpr_32 = COPY %0 S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir index 4c53c51d1ce4e..01a79e4ad2a05 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir +++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir @@ -9,14 +9,14 @@ body: | ; CHECK-LABEL: name: skip_branch_taildup_endpgm ; CHECK: bb.0: ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) - ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; CHECK: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; CHECK: S_WAITCNT 127 ; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec ; CHECK: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec ; CHECK: renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec - ; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) - ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; CHECK: renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + ; CHECK: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; CHECK: S_WAITCNT 112 ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec @@ -52,14 +52,14 @@ body: | successors: %bb.1, %bb.2 liveins: $vgpr0, $sgpr4_sgpr5, $sgpr7 - renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 4, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec S_WAITCNT 127 $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec - renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) - renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1) + renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) S_WAITCNT 112 V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir b/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir index cec6699bd96ad..3c3ce442039e1 100644 --- a/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir +++ b/llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir @@ -36,8 +36,8 @@ body: | %3.sub1:sgpr_128 = S_AND_B32 %2, 65535, implicit-def dead $scc %3.sub3:sgpr_128 = S_MOV_B32 151468 %3.sub2:sgpr_128 = S_MOV_B32 -1 - %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4) - %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0 :: (dereferenceable invariant load 8) + %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0 :: (load (s32) from `i8 addrspace(4)* undef`, addrspace 4) + %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0 :: (dereferenceable invariant load (s64)) undef %9.sub0:vreg_128 = V_LSHL_ADD_U32_e64 %6, 4, %4, implicit $exec %9.sub1:vreg_128 = V_LSHL_ADD_U32_e64 %5, 4, %0, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir index dfe9d87a56493..10c36d559f2e2 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir +++ b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir @@ -43,7 +43,7 @@ body: | %8:vgpr_32 = COPY %6 %7:vgpr_32 = V_ADD_U32_e32 %4, killed %8, implicit $exec %10:sreg_32 = COPY %7 - %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0 :: (dereferenceable invariant load 4) + %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0 :: (dereferenceable invariant load (s32)) $vgpr0 = COPY %9 SI_RETURN_TO_EPILOG $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir index 97737372310b2..f68f94f8e33e3 100644 --- a/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir @@ -14,33 +14,33 @@ body: | ; CHECK-LABEL: name: sgpr_clause_dbg_value ; CHECK: liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load (s32), addrspace 4) ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM]], 0, 0 - ; CHECK: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (load 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (load (s32), addrspace 4) ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM1]], 0, 0 ; CHECK: S_NOP 0 ; CHECK: S_NOP 0 ; CHECK: S_NOP 0 - ; CHECK: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0 :: (load 4, addrspace 4) - ; CHECK: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 32, 0 :: (load 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0 :: (load (s32), addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 32, 0 :: (load (s32), addrspace 4) ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM2]], 0, 0 ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM3]], 0, 0 - ; CHECK: [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 64, 0 :: (load 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 64, 0 :: (load (s32), addrspace 4) ; CHECK: KILL [[COPY]] ; CHECK: S_ENDPGM 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_LOAD_DWORD_IMM1]], implicit [[S_LOAD_DWORD_IMM2]], implicit [[S_LOAD_DWORD_IMM3]], implicit [[S_LOAD_DWORD_IMM4]] %0:sreg_64 = COPY $sgpr4_sgpr5 - %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (load 4, align 4, addrspace 4) + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0 :: (load (s32), align 4, addrspace 4) DBG_VALUE %1, 0, 0 - %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0 :: (load 4, align 4, addrspace 4) + %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0 :: (load (s32), align 4, addrspace 4) DBG_VALUE %2, 0, 0 S_NOP 0 S_NOP 0 S_NOP 0 - %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (load 4, align 4, addrspace 4) - %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 32, 0 :: (load 4, align 4, addrspace 4) + %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (load (s32), align 4, addrspace 4) + %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 32, 0 :: (load (s32), align 4, addrspace 4) DBG_VALUE %3, 0, 0 DBG_VALUE %4, 0, 0 - %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (load 4, align 4, addrspace 4) + %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (load (s32), align 4, addrspace 4) S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5 ... diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir index 8edf93ce50279..bc0553ec2c1b9 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir @@ -17,10 +17,10 @@ body: | ; CHECK-LABEL: name: spill_a64_kill ; CHECK: liveins: $agpr0_agpr1 ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) ... # Make sure there's no verifier error on the undef spill component when the value is killed. @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: spill_a64_undef_sub1_killed ; CHECK: liveins: $agpr0 ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) ... --- @@ -63,8 +63,8 @@ body: | ; CHECK-LABEL: name: spill_a64_undef_sub0_killed ; CHECK: liveins: $agpr1 ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir index cabf933a0834d..8e5b0477a9207 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir @@ -15,16 +15,16 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; GFX908-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; GFX908-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; GFX908-SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX908-SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GFX908-SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr32 ; GFX908-EXPANDED: bb.0: @@ -48,16 +48,16 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; GFX90A-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; GFX90A-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; GFX90A-SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX90A-SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GFX90A-SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr32 ; GFX90A-EXPANDED: bb.0: @@ -81,16 +81,16 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1 ; EXPANDED-LABEL: name: spill_restore_agpr32 ; EXPANDED: bb.0: @@ -133,13 +133,13 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1 - ; GFX908-SPILLED: SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr64 ; GFX908-EXPANDED: bb.0: @@ -162,13 +162,13 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1 - ; GFX90A-SPILLED: SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr64 ; GFX90A-EXPANDED: bb.0: @@ -191,13 +191,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1 - ; SPILLED: SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1 ; EXPANDED-LABEL: name: spill_restore_agpr64 ; EXPANDED: bb.0: @@ -239,12 +239,12 @@ body: | ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; GFX908-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX908-SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX908-SPILLED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GFX908-SPILLED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 @@ -267,15 +267,15 @@ body: | ; GFX908-EXPANDED: successors: %bb.1(0x80000000) ; GFX908-EXPANDED: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-EXPANDED: S_NOP 0, implicit-def renamable $agpr0 - ; GFX908-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GFX908-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GFX908-EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec - ; GFX908-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX908-EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GFX908-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX908-EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GFX908-EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-EXPANDED: bb.1: ; GFX908-EXPANDED: successors: %bb.2(0x80000000) ; GFX908-EXPANDED: bb.2: - ; GFX908-EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX908-EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GFX908-EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; GFX908-EXPANDED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX908-EXPANDED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 @@ -299,12 +299,12 @@ body: | ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; GFX90A-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX90A-SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX90A-SPILLED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GFX90A-SPILLED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 @@ -327,12 +327,12 @@ body: | ; GFX90A-EXPANDED: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-EXPANDED: S_NOP 0, implicit-def renamable $agpr0 - ; GFX90A-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GFX90A-EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GFX90A-EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-EXPANDED: bb.1: ; GFX90A-EXPANDED: successors: %bb.2(0x80000000) ; GFX90A-EXPANDED: bb.2: - ; GFX90A-EXPANDED: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX90A-EXPANDED: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GFX90A-EXPANDED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GFX90A-EXPANDED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GFX90A-EXPANDED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 @@ -355,12 +355,12 @@ body: | ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 ; SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: bb.2: - ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; SPILLED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; SPILLED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; SPILLED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 @@ -383,15 +383,15 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0 - ; EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec - ; EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; EXPANDED: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000) ; EXPANDED: bb.2: - ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; EXPANDED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; EXPANDED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 @@ -449,13 +449,13 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 - ; GFX908-SPILLED: SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 12 into %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr96 ; GFX908-EXPANDED: bb.0: @@ -480,13 +480,13 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 - ; GFX90A-SPILLED: SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 12 into %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr96 ; GFX90A-EXPANDED: bb.0: @@ -511,13 +511,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 - ; SPILLED: SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 12 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_A96_SAVE killed $agpr0_agpr1_agpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5) + ; SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 ; EXPANDED-LABEL: name: spill_restore_agpr96 ; EXPANDED: bb.0: @@ -560,13 +560,13 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 - ; GFX908-SPILLED: SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr128 ; GFX908-EXPANDED: bb.0: @@ -593,13 +593,13 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 - ; GFX90A-SPILLED: SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr128 ; GFX90A-EXPANDED: bb.0: @@ -626,13 +626,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 - ; SPILLED: SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + ; SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 ; EXPANDED-LABEL: name: spill_restore_agpr128 ; EXPANDED: bb.0: @@ -677,13 +677,13 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX908-SPILLED: SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store 20 into %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr160 ; GFX908-EXPANDED: bb.0: @@ -712,13 +712,13 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 - ; GFX90A-SPILLED: SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store 20 into %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr160 ; GFX90A-EXPANDED: bb.0: @@ -747,13 +747,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 - ; SPILLED: SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store 20 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_A160_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5) + ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED-LABEL: name: spill_restore_agpr160 ; EXPANDED: bb.0: @@ -800,13 +800,13 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX908-SPILLED: SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr192 ; GFX908-EXPANDED: bb.0: @@ -837,13 +837,13 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; GFX90A-SPILLED: SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr192 ; GFX90A-EXPANDED: bb.0: @@ -874,13 +874,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; SPILLED: SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_A192_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED-LABEL: name: spill_restore_agpr192 ; EXPANDED: bb.0: @@ -929,13 +929,13 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX908-SPILLED: SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store 32 into %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr256 ; GFX908-EXPANDED: bb.0: @@ -970,13 +970,13 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; GFX90A-SPILLED: SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store 32 into %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr256 ; GFX90A-EXPANDED: bb.0: @@ -1011,13 +1011,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; SPILLED: SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store 32 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_A256_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5) + ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED-LABEL: name: spill_restore_agpr256 ; EXPANDED: bb.0: @@ -1070,13 +1070,13 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX908-SPILLED: SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store 64 into %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr512 ; GFX908-EXPANDED: bb.0: @@ -1127,13 +1127,13 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; GFX90A-SPILLED: SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store 64 into %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr512 ; GFX90A-EXPANDED: bb.0: @@ -1184,13 +1184,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; SPILLED: SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store 64 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_A512_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5) + ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED-LABEL: name: spill_restore_agpr512 ; EXPANDED: bb.0: @@ -1259,13 +1259,13 @@ body: | ; GFX908-SPILLED: bb.0: ; GFX908-SPILLED: successors: %bb.1(0x80000000) ; GFX908-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX908-SPILLED: SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store 128 into %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX908-SPILLED: bb.1: ; GFX908-SPILLED: successors: %bb.2(0x80000000) ; GFX908-SPILLED: S_NOP 1 ; GFX908-SPILLED: bb.2: - ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5) + ; GFX908-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5) ; GFX908-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX908-EXPANDED-LABEL: name: spill_restore_agpr1024 ; GFX908-EXPANDED: bb.0: @@ -1348,13 +1348,13 @@ body: | ; GFX90A-SPILLED: bb.0: ; GFX90A-SPILLED: successors: %bb.1(0x80000000) ; GFX90A-SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; GFX90A-SPILLED: SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store 128 into %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; GFX90A-SPILLED: bb.1: ; GFX90A-SPILLED: successors: %bb.2(0x80000000) ; GFX90A-SPILLED: S_NOP 1 ; GFX90A-SPILLED: bb.2: - ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5) + ; GFX90A-SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5) ; GFX90A-SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-EXPANDED-LABEL: name: spill_restore_agpr1024 ; GFX90A-EXPANDED: bb.0: @@ -1437,13 +1437,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; SPILLED: SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store 128 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_A1024_SAVE killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, %stack.0, $sgpr32, 0, implicit $exec :: (store (s1024) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5) + ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s1024) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED-LABEL: name: spill_restore_agpr1024 ; EXPANDED: bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir index 65b36d44e6498..298979502b2c8 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir @@ -8,12 +8,12 @@ # CHECK-LABEL: name: expecting_non_empty_interval # CHECK: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit $mode, implicit $exec -# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) +# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) # CHECK-NEXT: undef %5.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec # CHECK-NEXT: dead %3:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit $mode, implicit $exec # CHECK: S_NOP 0, implicit %6.sub1 -# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) +# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) # CHECK-NEXT: S_NOP 0, implicit %8.sub1 # CHECK-NEXT: S_NOP 0, implicit undef %9.sub0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir index e4b77496440a8..ae4000149e284 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -22,7 +22,7 @@ body: | ; GCN-LABEL: name: spill_sgpr128_use_subreg ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN: $sgpr8_sgpr9 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN: $exec = S_MOV_B64 killed $sgpr8_sgpr9 ; GCN: renamable $sgpr1 = COPY $sgpr2 ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -31,11 +31,11 @@ body: | ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: renamable $sgpr8 = COPY killed renamable $sgpr1 ; GCN: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; GCN: S_ENDPGM 0, implicit $sgpr8 renamable $sgpr1 = COPY $sgpr2 - SI_SPILL_S128_SAVE renamable $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5) + SI_SPILL_S128_SAVE renamable $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5) renamable $sgpr8 = COPY killed renamable $sgpr1 S_ENDPGM 0, implicit $sgpr8 ... @@ -58,7 +58,7 @@ body: | ; GCN-LABEL: name: spill_sgpr128_use_kill ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN: $sgpr8_sgpr9 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN: $exec = S_MOV_B64 killed $sgpr8_sgpr9 ; GCN: renamable $sgpr1 = COPY $sgpr2 ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -66,11 +66,11 @@ body: | ; GCN: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $vgpr0 = V_WRITELANE_B32 killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; GCN: S_ENDPGM 0 renamable $sgpr1 = COPY $sgpr2 - SI_SPILL_S128_SAVE renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5) + SI_SPILL_S128_SAVE renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... @@ -91,14 +91,14 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_subreg ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN: renamable $vgpr1 = COPY $vgpr2 - ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr1 ; GCN: S_ENDPGM 0, implicit $vgpr8 renamable $vgpr1 = COPY $vgpr2 - SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) renamable $vgpr8 = COPY killed renamable $vgpr1 S_ENDPGM 0, implicit $vgpr8 ... @@ -120,12 +120,12 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_kill ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN: renamable $vgpr1 = COPY $vgpr2 - ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) ; GCN: S_ENDPGM 0 renamable $vgpr1 = COPY $vgpr2 - SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir index 4bcf9ae38944d..f464b92831284 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -48,28 +48,28 @@ body: | ; GFX9: $vcc = IMPLICIT_DEF ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc - ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX9: $vcc = IMPLICIT_DEF ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc - ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GFX9: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc ; GFX9: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 - ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX10-LABEL: name: check_vcc ; GFX10: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 @@ -83,28 +83,28 @@ body: | ; GFX10: $vcc = IMPLICIT_DEF ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc - ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX10: $vcc = IMPLICIT_DEF ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc - ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr0 - ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5) - ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GFX10: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc ; GFX10: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1 - ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5) + ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 $vcc = IMPLICIT_DEF SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir index 3a959401aabf4..cbbb0de64ffb2 100644 --- a/llvm/test/CodeGen/AMDGPU/spill192.mir +++ b/llvm/test/CodeGen/AMDGPU/spill192.mir @@ -17,13 +17,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; SPILLED: SI_SPILL_S192_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, %stack.0, implicit $exec, implicit $sgpr32 :: (store 24 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_S192_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s192) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 24 from %stack.0, align 4, addrspace 5) + ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s192) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED-LABEL: name: spill_restore_sgpr192 ; EXPANDED: bb.0: @@ -72,25 +72,25 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; SPILLED: SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; EXPANDED-LABEL: name: spill_restore_vgpr192 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; EXPANDED: SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5) + ; EXPANDED: SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s192) into %stack.0, align 4, addrspace 5) ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000) ; EXPANDED: S_NOP 1 ; EXPANDED: bb.2: - ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) + ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s192) from %stack.0, align 4, addrspace 5) ; EXPANDED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 bb.0: S_NOP 0, implicit-def %0:vreg_192 diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir index e8d6a80e84f95..0d04e058df6e6 100644 --- a/llvm/test/CodeGen/AMDGPU/spill224.mir +++ b/llvm/test/CodeGen/AMDGPU/spill224.mir @@ -15,13 +15,13 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 - ; SPILLED: SI_SPILL_S224_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, %stack.0, implicit $exec, implicit $sgpr32 :: (store 28 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_S224_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s224) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 28 from %stack.0, align 4, addrspace 5) + ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 = SI_SPILL_S224_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s224) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 ; EXPANDED-LABEL: name: spill_restore_sgpr224 ; EXPANDED: bb.0: @@ -72,25 +72,25 @@ body: | ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SPILLED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5) + ; SPILLED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store (s224) into %stack.0, align 4, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5) + ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5) ; SPILLED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; EXPANDED-LABEL: name: spill_restore_vgpr224 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: S_NOP 0, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; EXPANDED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store 28 into %stack.0, align 4, addrspace 5) + ; EXPANDED: SI_SPILL_V224_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, %stack.0, $sgpr32, 0, implicit $exec :: (store (s224) into %stack.0, align 4, addrspace 5) ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000) ; EXPANDED: S_NOP 1 ; EXPANDED: bb.2: - ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 28 from %stack.0, align 4, addrspace 5) + ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = SI_SPILL_V224_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s224) from %stack.0, align 4, addrspace 5) ; EXPANDED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 bb.0: S_NOP 0, implicit-def %0:vreg_224 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index 5eec0d97bab6e..317f240b6adf6 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -266,9 +266,9 @@ body: | ; RA: internal %15.sub13:sgpr_512 = COPY [[DEF2]].sub13 ; RA: internal %15.sub14:sgpr_512 = COPY [[DEF2]].sub14 ; RA: } - ; RA: SI_SPILL_S512_SAVE %15, %stack.0, implicit $exec, implicit $sgpr32 :: (store 64 into %stack.0, align 4, addrspace 5) + ; RA: SI_SPILL_S512_SAVE %15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) ; RA: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 - ; RA: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 64 from %stack.0, align 4, addrspace 5) + ; RA: [[SI_SPILL_S512_RESTORE:%[0-9]+]]:sgpr_512 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) ; RA: undef %14.sub4_sub5:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub4_sub5 { ; RA: internal %14.sub10_sub11:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub10_sub11 ; RA: internal %14.sub7:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub7 @@ -276,14 +276,14 @@ body: | ; RA: internal %14.sub13:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub13 ; RA: internal %14.sub14:sgpr_512 = COPY [[SI_SPILL_S512_RESTORE]].sub14 ; RA: } - ; RA: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0 :: (dereferenceable invariant load 4) - ; RA: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0 :: (dereferenceable invariant load 4) - ; RA: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub10, 0 :: (dereferenceable invariant load 4) - ; RA: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub11, 0 :: (dereferenceable invariant load 4) - ; RA: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub7, 0 :: (dereferenceable invariant load 4) - ; RA: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub8, 0 :: (dereferenceable invariant load 4) - ; RA: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub13, 0 :: (dereferenceable invariant load 4) - ; RA: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub14, 0 :: (dereferenceable invariant load 4) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub4, 0 :: (dereferenceable invariant load (s32)) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub5, 0 :: (dereferenceable invariant load (s32)) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub10, 0 :: (dereferenceable invariant load (s32)) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub11, 0 :: (dereferenceable invariant load (s32)) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub7, 0 :: (dereferenceable invariant load (s32)) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub8, 0 :: (dereferenceable invariant load (s32)) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub13, 0 :: (dereferenceable invariant load (s32)) + ; RA: [[S_BUFFER_LOAD_DWORD_SGPR7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[DEF]], %14.sub14, 0 :: (dereferenceable invariant load (s32)) ; RA: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR]], implicit [[S_BUFFER_LOAD_DWORD_SGPR1]], implicit [[S_BUFFER_LOAD_DWORD_SGPR2]], implicit [[S_BUFFER_LOAD_DWORD_SGPR3]], implicit [[S_BUFFER_LOAD_DWORD_SGPR4]], implicit [[S_BUFFER_LOAD_DWORD_SGPR5]], implicit [[S_BUFFER_LOAD_DWORD_SGPR6]], implicit [[S_BUFFER_LOAD_DWORD_SGPR7]] ; VR-LABEL: name: splitkit_copy_unbundle_reorder ; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF @@ -295,9 +295,9 @@ body: | ; VR: renamable $sgpr20 = S_MOV_B32 -1 ; VR: renamable $sgpr25 = S_MOV_B32 -1 ; VR: renamable $sgpr26 = S_MOV_B32 -1 - ; VR: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr32 :: (store 64 into %stack.0, align 4, addrspace 5) + ; VR: SI_SPILL_S512_SAVE killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) ; VR: S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 - ; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 64 from %stack.0, align 4, addrspace 5) + ; VR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) ; VR: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr16_sgpr17 ; VR: renamable $sgpr15 = COPY killed renamable $sgpr19 ; VR: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr22_sgpr23 @@ -305,15 +305,15 @@ body: | ; VR: renamable $sgpr21 = COPY killed renamable $sgpr25 ; VR: renamable $sgpr22 = COPY killed renamable $sgpr26 ; VR: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = IMPLICIT_DEF - ; VR: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0 :: (dereferenceable invariant load 4) - ; VR: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0 :: (dereferenceable invariant load 4) - ; VR: renamable $sgpr14 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr15, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr8 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr12, 0 :: (dereferenceable invariant load (s32)) + ; VR: renamable $sgpr9 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr13, 0 :: (dereferenceable invariant load (s32)) + ; VR: renamable $sgpr14 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr15, 0 :: (dereferenceable invariant load (s32)) ; VR: renamable $sgpr10_sgpr11 = IMPLICIT_DEF - ; VR: renamable $sgpr17 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr22, 0 :: (dereferenceable invariant load 4) - ; VR: renamable $sgpr15 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr16, 0 :: (dereferenceable invariant load 4) - ; VR: renamable $sgpr12 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr18, 0 :: (dereferenceable invariant load 4) - ; VR: renamable $sgpr13 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr19, 0 :: (dereferenceable invariant load 4) - ; VR: renamable $sgpr16 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr21, 0 :: (dereferenceable invariant load 4) + ; VR: renamable $sgpr17 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr22, 0 :: (dereferenceable invariant load (s32)) + ; VR: renamable $sgpr15 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr16, 0 :: (dereferenceable invariant load (s32)) + ; VR: renamable $sgpr12 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr18, 0 :: (dereferenceable invariant load (s32)) + ; VR: renamable $sgpr13 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr19, 0 :: (dereferenceable invariant load (s32)) + ; VR: renamable $sgpr16 = S_BUFFER_LOAD_DWORD_SGPR renamable $sgpr4_sgpr5_sgpr6_sgpr7, killed renamable $sgpr21, 0 :: (dereferenceable invariant load (s32)) ; VR: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr10_sgpr11, implicit killed renamable $sgpr8, implicit killed renamable $sgpr9, implicit killed renamable $sgpr12, implicit killed renamable $sgpr13, implicit killed renamable $sgpr14, implicit killed renamable $sgpr15, implicit killed renamable $sgpr16, implicit killed renamable $sgpr17 %0:sgpr_128 = IMPLICIT_DEF %1:sreg_64 = IMPLICIT_DEF @@ -331,14 +331,14 @@ body: | ; Clobber registers S_NOP 0, implicit-def $sgpr8, implicit-def $sgpr12, implicit-def $sgpr16, implicit-def $sgpr20, implicit-def $sgpr24, implicit-def $sgpr28, implicit-def $sgpr32, implicit-def $sgpr36, implicit-def $sgpr40, implicit-def $sgpr44, implicit-def $sgpr48, implicit-def $sgpr52, implicit-def $sgpr56, implicit-def $sgpr60, implicit-def $sgpr64, implicit-def $sgpr68, implicit-def $sgpr72, implicit-def $sgpr74, implicit-def $sgpr78, implicit-def $sgpr82, implicit-def $sgpr86, implicit-def $sgpr90, implicit-def $sgpr94, implicit-def $sgpr98 - %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub4:sgpr_512, 0 :: (dereferenceable invariant load 4) - %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub5:sgpr_512, 0 :: (dereferenceable invariant load 4) - %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub10:sgpr_512, 0 :: (dereferenceable invariant load 4) - %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub11:sgpr_512, 0 :: (dereferenceable invariant load 4) - %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub7:sgpr_512, 0 :: (dereferenceable invariant load 4) - %10:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub8:sgpr_512, 0 :: (dereferenceable invariant load 4) - %11:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub13:sgpr_512, 0 :: (dereferenceable invariant load 4) - %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub14:sgpr_512, 0 :: (dereferenceable invariant load 4) + %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub4:sgpr_512, 0 :: (dereferenceable invariant load (s32)) + %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub5:sgpr_512, 0 :: (dereferenceable invariant load (s32)) + %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub10:sgpr_512, 0 :: (dereferenceable invariant load (s32)) + %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub11:sgpr_512, 0 :: (dereferenceable invariant load (s32)) + %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub7:sgpr_512, 0 :: (dereferenceable invariant load (s32)) + %10:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub8:sgpr_512, 0 :: (dereferenceable invariant load (s32)) + %11:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub13:sgpr_512, 0 :: (dereferenceable invariant load (s32)) + %12:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %0:sgpr_128, %2.sub14:sgpr_512, 0 :: (dereferenceable invariant load (s32)) S_NOP 0, implicit %0, implicit %1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir index 15168c7325c8d..8d700780046cf 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: zextload_global_v64i16_to_v64i64 ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) ; CHECK: undef %2.sub3:sgpr_128 = S_MOV_B32 61440 ; CHECK: %2.sub2:sgpr_128 = S_MOV_B32 -1 ; CHECK: %2.sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 @@ -24,124 +24,124 @@ body: | ; CHECK: %3.sub2:sgpr_128 = COPY %2.sub2 ; CHECK: %3.sub3:sgpr_128 = COPY %2.sub3 ; CHECK: early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec { - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1) - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK: } ; CHECK: undef %47.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %47, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %47, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) ; CHECK: undef %52.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %52, %stack.1, $sgpr32, 0, implicit $exec :: (store 16 into %stack.1, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %52, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) ; CHECK: undef %57.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %57, %stack.2, $sgpr32, 0, implicit $exec :: (store 16 into %stack.2, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %57, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) ; CHECK: undef %62.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %62, %stack.3, $sgpr32, 0, implicit $exec :: (store 16 into %stack.3, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %62, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) ; CHECK: undef %67.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec ; CHECK: undef %71.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %71, %stack.4, $sgpr32, 0, implicit $exec :: (store 16 into %stack.4, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %71, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) ; CHECK: undef %76.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %76, %stack.5, $sgpr32, 0, implicit $exec :: (store 16 into %stack.5, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %76, %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) ; CHECK: undef %81.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %81, %stack.6, $sgpr32, 0, implicit $exec :: (store 16 into %stack.6, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %81, %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) ; CHECK: undef %86.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec ; CHECK: undef %90.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %90, %stack.7, $sgpr32, 0, implicit $exec :: (store 16 into %stack.7, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %90, %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) ; CHECK: undef %95.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %95, %stack.8, $sgpr32, 0, implicit $exec :: (store 16 into %stack.8, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %95, %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) ; CHECK: undef %100.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %100, %stack.9, $sgpr32, 0, implicit $exec :: (store 16 into %stack.9, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %100, %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) ; CHECK: undef %105.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec ; CHECK: undef %109.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec ; CHECK: undef %113.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec ; CHECK: undef %117.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %117, %stack.10, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5) - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1) + ; CHECK: SI_SPILL_V128_SAVE %117, %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, implicit $exec :: (load (s128), align 64, addrspace 1) ; CHECK: undef %122.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec ; CHECK: undef %126.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec ; CHECK: undef %130.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %130, %stack.11, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %130, %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) ; CHECK: undef %135.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %135, %stack.12, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5) - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + ; CHECK: SI_SPILL_V128_SAVE %135, %stack.12, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.12, align 4, addrspace 5) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK: undef %140.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec ; CHECK: undef %144.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %144, %stack.13, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %144, %stack.13, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.13, align 4, addrspace 5) ; CHECK: undef %149.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE %149, %stack.14, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE %149, %stack.14, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.14, align 4, addrspace 5) ; CHECK: undef %154.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1) ; CHECK: undef %158.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec ; CHECK: undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec ; CHECK: undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec ; CHECK: undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec - ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + ; CHECK: [[BUFFER_LOAD_DWORDX4_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) ; CHECK: undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub1, implicit $exec ; CHECK: undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub0, implicit $exec ; CHECK: undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec ; CHECK: undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 - ; CHECK: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE]], %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE1]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE1]], %stack.1, $sgpr32, 0, implicit $exec :: (store 16 into %stack.1, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE1]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE2]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE2]], %stack.2, $sgpr32, 0, implicit $exec :: (store 16 into %stack.2, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE2]], %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE3]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE3]], %stack.3, $sgpr32, 0, implicit $exec :: (store 16 into %stack.3, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE3]], %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5) ; CHECK: undef %68.sub2:vreg_128 = COPY %67.sub2 ; CHECK: %68.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec - ; CHECK: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE4]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE4]], %stack.4, $sgpr32, 0, implicit $exec :: (store 16 into %stack.4, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE4]], %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE5]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE5]], %stack.5, $sgpr32, 0, implicit $exec :: (store 16 into %stack.5, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE5]], %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE6]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE6]], %stack.6, $sgpr32, 0, implicit $exec :: (store 16 into %stack.6, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE6]], %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5) ; CHECK: undef %87.sub2:vreg_128 = COPY %86.sub2 ; CHECK: %87.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec - ; CHECK: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE7]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE7]], %stack.7, $sgpr32, 0, implicit $exec :: (store 16 into %stack.7, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE7]], %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE8]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE8]], %stack.8, $sgpr32, 0, implicit $exec :: (store 16 into %stack.8, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE8]], %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE9]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE9]], %stack.9, $sgpr32, 0, implicit $exec :: (store 16 into %stack.9, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE9]], %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5) ; CHECK: undef %106.sub2:vreg_128 = COPY %105.sub2 ; CHECK: %106.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec ; CHECK: undef %110.sub2:vreg_128 = COPY %109.sub2 ; CHECK: %110.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec ; CHECK: undef %114.sub2:vreg_128 = COPY %113.sub2 ; CHECK: %114.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec - ; CHECK: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE10]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE10]], %stack.10, $sgpr32, 0, implicit $exec :: (store 16 into %stack.10, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE10]], %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5) ; CHECK: undef %123.sub2:vreg_128 = COPY %122.sub2 ; CHECK: %123.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec ; CHECK: undef %127.sub2:vreg_128 = COPY %126.sub2 ; CHECK: %127.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec - ; CHECK: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE11]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE11]], %stack.11, $sgpr32, 0, implicit $exec :: (store 16 into %stack.11, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE12:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE11]], %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE12:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.12, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE12]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE12]], %stack.12, $sgpr32, 0, implicit $exec :: (store 16 into %stack.12, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE12]], %stack.12, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.12, align 4, addrspace 5) ; CHECK: undef %141.sub2:vreg_128 = COPY %140.sub2 ; CHECK: %141.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec - ; CHECK: [[SI_SPILL_V128_RESTORE13:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE13:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.13, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE13]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE13]], %stack.13, $sgpr32, 0, implicit $exec :: (store 16 into %stack.13, align 4, addrspace 5) - ; CHECK: [[SI_SPILL_V128_RESTORE14:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE13]], %stack.13, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.13, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_V128_RESTORE14:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.14, align 4, addrspace 5) ; CHECK: [[SI_SPILL_V128_RESTORE14]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec - ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE14]], %stack.14, $sgpr32, 0, implicit $exec :: (store 16 into %stack.14, align 4, addrspace 5) + ; CHECK: SI_SPILL_V128_SAVE [[SI_SPILL_V128_RESTORE14]], %stack.14, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.14, align 4, addrspace 5) ; CHECK: undef %155.sub2:vreg_128 = COPY %154.sub2 ; CHECK: %155.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec ; CHECK: undef %159.sub2:vreg_128 = COPY %158.sub2 @@ -155,193 +155,193 @@ body: | ; CHECK: %43.sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec ; CHECK: %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec ; CHECK: %43.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK: %42.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %42.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK: %41.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %41.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK: %40.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %40.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK: %38.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %38.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK: %37.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %37.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK: %36.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %36.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) ; CHECK: undef %157.sub0:vreg_128 = COPY %159.sub0 { ; CHECK: internal %157.sub2:vreg_128 = COPY %159.sub2 ; CHECK: } ; CHECK: %157.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %157.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %157, %2, 0, 400, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %157, %2, 0, 400, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK: undef %153.sub0:vreg_128 = COPY %155.sub0 { ; CHECK: internal %153.sub2:vreg_128 = COPY %155.sub2 ; CHECK: } ; CHECK: %153.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %153.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 352, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE15:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load 16 from %stack.14, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %153, %2, 0, 352, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE15:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.14, align 4, addrspace 5) ; CHECK: undef %148.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub0 { ; CHECK: internal %148.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE15]].sub2 ; CHECK: } ; CHECK: %148.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %148.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 368, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE16:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load 16 from %stack.13, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %148, %2, 0, 368, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE16:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.13, align 4, addrspace 5) ; CHECK: undef %143.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub0 { ; CHECK: internal %143.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE16]].sub2 ; CHECK: } ; CHECK: %143.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %143.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 320, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %143, %2, 0, 320, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK: undef %139.sub0:vreg_128 = COPY %141.sub0 { ; CHECK: internal %139.sub2:vreg_128 = COPY %141.sub2 ; CHECK: } ; CHECK: %139.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %139.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %139, %2, 0, 336, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE17:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load 16 from %stack.12, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %139, %2, 0, 336, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE17:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.12, align 4, addrspace 5) ; CHECK: undef %134.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub0 { ; CHECK: internal %134.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE17]].sub2 ; CHECK: } ; CHECK: %134.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %134.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 288, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE18:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load 16 from %stack.11, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %134, %2, 0, 288, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE18:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5) ; CHECK: undef %129.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub0 { ; CHECK: internal %129.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE18]].sub2 ; CHECK: } ; CHECK: %129.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %129.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %129, %2, 0, 304, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %129, %2, 0, 304, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK: undef %125.sub0:vreg_128 = COPY %127.sub0 { ; CHECK: internal %125.sub2:vreg_128 = COPY %127.sub2 ; CHECK: } ; CHECK: %125.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %125.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %125, %2, 0, 256, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %125, %2, 0, 256, 0, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) ; CHECK: undef %121.sub0:vreg_128 = COPY %123.sub0 { ; CHECK: internal %121.sub2:vreg_128 = COPY %123.sub2 ; CHECK: } ; CHECK: %121.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %121.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %121, %2, 0, 272, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE19:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load 16 from %stack.10, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %121, %2, 0, 272, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE19:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5) ; CHECK: undef %116.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub0 { ; CHECK: internal %116.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE19]].sub2 ; CHECK: } ; CHECK: %116.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %116.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %116, %2, 0, 224, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %116, %2, 0, 224, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) ; CHECK: undef %112.sub0:vreg_128 = COPY %114.sub0 { ; CHECK: internal %112.sub2:vreg_128 = COPY %114.sub2 ; CHECK: } ; CHECK: %112.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %112.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 240, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %112, %2, 0, 240, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK: undef %108.sub0:vreg_128 = COPY %110.sub0 { ; CHECK: internal %108.sub2:vreg_128 = COPY %110.sub2 ; CHECK: } ; CHECK: %108.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %108.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %108, %2, 0, 192, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %108, %2, 0, 192, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK: undef %104.sub0:vreg_128 = COPY %106.sub0 { ; CHECK: internal %104.sub2:vreg_128 = COPY %106.sub2 ; CHECK: } ; CHECK: %104.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %104.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %104, %2, 0, 208, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE20:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load 16 from %stack.9, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %104, %2, 0, 208, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE20:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5) ; CHECK: undef %99.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub0 { ; CHECK: internal %99.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE20]].sub2 ; CHECK: } ; CHECK: %99.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %99.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %99, %2, 0, 160, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE21:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load 16 from %stack.8, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %99, %2, 0, 160, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE21:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5) ; CHECK: undef %94.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub0 { ; CHECK: internal %94.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE21]].sub2 ; CHECK: } ; CHECK: %94.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %94.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 176, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE22:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load 16 from %stack.7, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %94, %2, 0, 176, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE22:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5) ; CHECK: undef %89.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub0 { ; CHECK: internal %89.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE22]].sub2 ; CHECK: } ; CHECK: %89.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %89.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %89, %2, 0, 128, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %89, %2, 0, 128, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) ; CHECK: undef %85.sub0:vreg_128 = COPY %87.sub0 { ; CHECK: internal %85.sub2:vreg_128 = COPY %87.sub2 ; CHECK: } ; CHECK: %85.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %85.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %85, %2, 0, 144, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE23:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load 16 from %stack.6, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %85, %2, 0, 144, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE23:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5) ; CHECK: undef %80.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub0 { ; CHECK: internal %80.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE23]].sub2 ; CHECK: } ; CHECK: %80.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %80.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %80, %2, 0, 96, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE24:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load 16 from %stack.5, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %80, %2, 0, 96, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE24:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5) ; CHECK: undef %75.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub0 { ; CHECK: internal %75.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE24]].sub2 ; CHECK: } ; CHECK: %75.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %75.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %75, %2, 0, 112, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE25:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load 16 from %stack.4, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %75, %2, 0, 112, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE25:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5) ; CHECK: undef %70.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub0 { ; CHECK: internal %70.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE25]].sub2 ; CHECK: } ; CHECK: %70.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %70.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %70, %2, 0, 64, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %70, %2, 0, 64, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) ; CHECK: undef %66.sub0:vreg_128 = COPY %68.sub0 { ; CHECK: internal %66.sub2:vreg_128 = COPY %68.sub2 ; CHECK: } ; CHECK: %66.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %66.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %66, %2, 0, 80, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE26:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.3, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %66, %2, 0, 80, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE26:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5) ; CHECK: undef %61.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub0 { ; CHECK: internal %61.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE26]].sub2 ; CHECK: } ; CHECK: %61.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %61.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %61, %2, 0, 32, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE27:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load 16 from %stack.2, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %61, %2, 0, 32, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE27:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) ; CHECK: undef %56.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub0 { ; CHECK: internal %56.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE27]].sub2 ; CHECK: } ; CHECK: %56.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %56.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %56, %2, 0, 48, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE28:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 16 from %stack.1, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %56, %2, 0, 48, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE28:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5) ; CHECK: undef %51.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub0 { ; CHECK: internal %51.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE28]].sub2 ; CHECK: } ; CHECK: %51.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %51.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %51, %2, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1) - ; CHECK: [[SI_SPILL_V128_RESTORE29:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %51, %2, 0, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) + ; CHECK: [[SI_SPILL_V128_RESTORE29:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5) ; CHECK: undef %46.sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub0 { ; CHECK: internal %46.sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE29]].sub2 ; CHECK: } ; CHECK: %46.sub1:vreg_128 = COPY %43.sub1 ; CHECK: %46.sub3:vreg_128 = COPY %43.sub1 - ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %46, %2, 0, 16, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; CHECK: BUFFER_STORE_DWORDX4_OFFSET %46, %2, 0, 16, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK: S_ENDPGM 0 %0:sgpr_64(p4) = COPY $sgpr0_sgpr1 - %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4) + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4) undef %2.sub3:sgpr_128 = S_MOV_B32 61440 %2.sub2:sgpr_128 = S_MOV_B32 -1 %2.sub0:sgpr_128 = COPY %1.sub0 @@ -351,10 +351,10 @@ body: | %3.sub2:sgpr_128 = COPY %2.sub2 %3.sub3:sgpr_128 = COPY %2.sub3 early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec { - %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 128, addrspace 1) - %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) - %4:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) - %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1) + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) + %4:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) } undef %8.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub1, implicit $exec undef %9.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub0, implicit $exec @@ -372,22 +372,22 @@ body: | undef %21.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub0, implicit $exec undef %22.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub3, implicit $exec undef %23.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub2, implicit $exec - %24:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, implicit $exec :: (load 16, align 64, addrspace 1) + %24:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, 0, implicit $exec :: (load (s128), align 64, addrspace 1) undef %25.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub1, implicit $exec undef %26.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub0, implicit $exec undef %27.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub3, implicit $exec undef %28.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub2, implicit $exec - %29:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + %29:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) undef %30.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub1, implicit $exec undef %31.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub0, implicit $exec undef %32.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub3, implicit $exec undef %33.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub2, implicit $exec - %34:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, implicit $exec :: (load 16, align 32, addrspace 1) + %34:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1) undef %35.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub1, implicit $exec undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub0, implicit $exec undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub3, implicit $exec undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub2, implicit $exec - %39:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, implicit $exec :: (load 16, addrspace 1) + %39:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, 0, implicit $exec :: (load (s128), addrspace 1) undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub1, implicit $exec undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub0, implicit $exec undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub3, implicit $exec @@ -427,99 +427,99 @@ body: | %43.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub2, implicit $exec %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec %43.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) %42.sub1:vreg_128 = COPY %43.sub1 %42.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %41.sub1:vreg_128 = COPY %43.sub1 %41.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) %40.sub1:vreg_128 = COPY %43.sub1 %40.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %38.sub1:vreg_128 = COPY %43.sub1 %38.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) %37.sub1:vreg_128 = COPY %43.sub1 %37.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %36.sub1:vreg_128 = COPY %43.sub1 %36.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) %35.sub1:vreg_128 = COPY %43.sub1 %35.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %35, %2, 0, 400, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %35, %2, 0, 400, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %33.sub1:vreg_128 = COPY %43.sub1 %33.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %33, %2, 0, 352, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %33, %2, 0, 352, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) %32.sub1:vreg_128 = COPY %43.sub1 %32.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %32, %2, 0, 368, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %32, %2, 0, 368, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %31.sub1:vreg_128 = COPY %43.sub1 %31.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %31, %2, 0, 320, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %31, %2, 0, 320, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) %30.sub1:vreg_128 = COPY %43.sub1 %30.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %30, %2, 0, 336, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %30, %2, 0, 336, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %28.sub1:vreg_128 = COPY %43.sub1 %28.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %28, %2, 0, 288, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %28, %2, 0, 288, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) %27.sub1:vreg_128 = COPY %43.sub1 %27.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %27, %2, 0, 304, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %27, %2, 0, 304, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %26.sub1:vreg_128 = COPY %43.sub1 %26.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %26, %2, 0, 256, 0, 0, 0, implicit $exec :: (store 16, align 256, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %26, %2, 0, 256, 0, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1) %25.sub1:vreg_128 = COPY %43.sub1 %25.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %25, %2, 0, 272, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %25, %2, 0, 272, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %23.sub1:vreg_128 = COPY %43.sub1 %23.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %23, %2, 0, 224, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %23, %2, 0, 224, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) %22.sub1:vreg_128 = COPY %43.sub1 %22.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %22, %2, 0, 240, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %22, %2, 0, 240, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %21.sub1:vreg_128 = COPY %43.sub1 %21.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %21, %2, 0, 192, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %21, %2, 0, 192, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) %20.sub1:vreg_128 = COPY %43.sub1 %20.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %20, %2, 0, 208, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %20, %2, 0, 208, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %19.sub1:vreg_128 = COPY %43.sub1 %19.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) %18.sub1:vreg_128 = COPY %43.sub1 %18.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %18, %2, 0, 176, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %18, %2, 0, 176, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %17.sub1:vreg_128 = COPY %43.sub1 %17.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %17, %2, 0, 128, 0, 0, 0, implicit $exec :: (store 16, align 128, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %17, %2, 0, 128, 0, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1) %16.sub1:vreg_128 = COPY %43.sub1 %16.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %16, %2, 0, 144, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %16, %2, 0, 144, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %15.sub1:vreg_128 = COPY %43.sub1 %15.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %15, %2, 0, 96, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %15, %2, 0, 96, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) %14.sub1:vreg_128 = COPY %43.sub1 %14.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %14, %2, 0, 112, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %14, %2, 0, 112, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %13.sub1:vreg_128 = COPY %43.sub1 %13.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %13, %2, 0, 64, 0, 0, 0, implicit $exec :: (store 16, align 64, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %13, %2, 0, 64, 0, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1) %12.sub1:vreg_128 = COPY %43.sub1 %12.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %12, %2, 0, 80, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %12, %2, 0, 80, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %11.sub1:vreg_128 = COPY %43.sub1 %11.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %11, %2, 0, 32, 0, 0, 0, implicit $exec :: (store 16, align 32, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %11, %2, 0, 32, 0, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1) %10.sub1:vreg_128 = COPY %43.sub1 %10.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %10, %2, 0, 48, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %10, %2, 0, 48, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) %9.sub1:vreg_128 = COPY %43.sub1 %9.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %9, %2, 0, 0, 0, 0, 0, implicit $exec :: (store 16, align 512, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %9, %2, 0, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1) %8.sub1:vreg_128 = COPY %43.sub1 %8.sub3:vreg_128 = COPY %43.sub1 - BUFFER_STORE_DWORDX4_OFFSET %8, %2, 0, 16, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + BUFFER_STORE_DWORDX4_OFFSET %8, %2, 0, 16, 0, 0, 0, implicit $exec :: (store (s128), addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll index ffcfa32cfd3d7..e5502657f16ca 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll +++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll @@ -10,7 +10,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: bb.0..expVert: ; CHECK: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31 ; CHECK: undef %56.sub0:sgpr_64 = COPY $sgpr31 - ; CHECK: SI_SPILL_S32_SAVE $sgpr27, %stack.2, implicit $exec, implicit $sgpr32 :: (store 4 into %stack.2, addrspace 5) + ; CHECK: SI_SPILL_S32_SAVE $sgpr27, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.2, addrspace 5) ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr25 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr5 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr4 @@ -24,7 +24,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr9 ; CHECK: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr10 ; CHECK: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr8 - ; CHECK: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (load 8 from %ir.40, addrspace 4) + ; CHECK: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (load (s64) from %ir.40, addrspace 4) ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; CHECK: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc ; CHECK: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc @@ -32,26 +32,26 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc ; CHECK: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc - ; CHECK: SI_SPILL_S32_SAVE [[S_AND_B32_]], %stack.0, implicit $exec, implicit $sgpr32 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: SI_SPILL_S32_SAVE [[S_AND_B32_]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) ; CHECK: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc ; CHECK: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY4]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (load 16 from %ir.84, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (load 16 from `<4 x i32> addrspace(4)* undef`, addrspace 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (load (s128) from %ir.84, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (load (s128) from `<4 x i32> addrspace(4)* undef`, addrspace 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: KILL undef %74:sreg_64 ; CHECK: KILL undef %132:sgpr_128 ; CHECK: KILL %130.sub0, %130.sub1 - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: %71.sub3:sgpr_128 = S_MOV_B32 553734060 ; CHECK: %71.sub2:sgpr_128 = S_MOV_B32 -1 - ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: KILL undef %89:sgpr_128 ; CHECK: KILL undef %118:sgpr_128 - ; CHECK: SI_SPILL_S128_SAVE %71, %stack.1, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5) + ; CHECK: SI_SPILL_S128_SAVE %71, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.1, align 4, addrspace 5) ; CHECK: %71.sub1:sgpr_128 = S_MOV_B32 0 ; CHECK: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc ; CHECK: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc @@ -63,14 +63,14 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: undef %156.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK: %156.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc ; CHECK: undef %163.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_2]], implicit-def $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0 :: (load 16 from %ir.91, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (load 16 from %ir.97, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0 :: (load (s128) from %ir.91, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (load (s128) from %ir.97, addrspace 4) ; CHECK: KILL %156.sub0, %156.sub1 ; CHECK: KILL %149.sub0, %149.sub1 ; CHECK: %163.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc ; CHECK: undef %176.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], undef %171:sreg_32, implicit-def $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0 :: (load 16 from %ir.103, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0 :: (load (s128) from %ir.103, addrspace 4) ; CHECK: %176.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc ; CHECK: undef %183.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_]], implicit-def $scc ; CHECK: %183.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc @@ -102,22 +102,22 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc ; CHECK: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc - ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, undef %314:sreg_32, 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 16, 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (load 16 from %ir.111, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (load 16 from %ir.117, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (load 16 from %ir.123, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (load 16 from %ir.131, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (load 16 from %ir.138, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, undef %314:sreg_32, 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (load (s128) from %ir.111, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (load (s128) from %ir.117, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (load (s128) from %ir.123, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (load (s128) from %ir.131, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (load (s128) from %ir.138, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR]], -98, implicit-def dead $scc ; CHECK: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR1]], -114, implicit-def dead $scc ; CHECK: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR2]], -130, implicit-def dead $scc @@ -132,17 +132,17 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY9]], 4, implicit-def dead $scc ; CHECK: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0 :: (dereferenceable invariant load 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (load 16 from %ir.155, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (load 16 from %ir.144, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (load 16 from %ir.150, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (load 16 from %ir.162, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (load 16 from %ir.170, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (load (s128) from %ir.155, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (load (s128) from %ir.144, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (load (s128) from %ir.150, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (load (s128) from %ir.162, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (load (s128) from %ir.170, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR4]], -217, implicit-def dead $scc ; CHECK: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -233, implicit-def dead $scc ; CHECK: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR5]], -249, implicit-def dead $scc @@ -156,94 +156,94 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK: %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc ; CHECK: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc ; CHECK: undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc ; CHECK: %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc ; CHECK: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc ; CHECK: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc ; CHECK: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc ; CHECK: %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %441, 0, 0 :: (load 4 from %ir..i085.i, align 8, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (load 16 from %ir.176, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (load 16 from %ir.185, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (load 16 from %ir.194, addrspace 4) - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (load 16 from %ir.200, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %441, 0, 0 :: (load (s32) from %ir..i085.i, align 8, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (load (s128) from %ir.176, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (load (s128) from %ir.185, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (load (s128) from %ir.194, addrspace 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (load (s128) from %ir.200, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc - ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc ; CHECK: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc ; CHECK: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc ; CHECK: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc - ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (load 8 from %ir.308, addrspace 4) - ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (load 16 from %ir.223, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (load 16 from %ir.230, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (load 16 from %ir.236, addrspace 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (load 16 from %ir.242, addrspace 4) + ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (load (s64) from %ir.308, addrspace 4) + ; CHECK: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (load (s128) from %ir.223, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (load (s128) from %ir.230, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (load (s128) from %ir.236, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (load (s128) from %ir.242, addrspace 4) ; CHECK: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY1]], 3, implicit-def dead $scc - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc ; CHECK: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc ; CHECK: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc ; CHECK: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc - ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (load 8 from %ir.320, addrspace 4) + ; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (load (s64) from %ir.320, addrspace 4) ; CHECK: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load 4) - ; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (load 16 from %ir.282, addrspace 4) - ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (load 4 from `i32 addrspace(4)* undef`, addrspace 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (load (s128) from %ir.282, addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (load (s32) from `i32 addrspace(4)* undef`, addrspace 4) ; CHECK: KILL %411.sub0, %411.sub1 ; CHECK: KILL undef %488:sreg_64 ; CHECK: KILL %71.sub0_sub1 ; CHECK: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 3, implicit-def dead $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (load 16 from %ir.291, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (load (s128) from %ir.291, addrspace 4) ; CHECK: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc ; CHECK: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc ; CHECK: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc ; CHECK: %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc - ; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %485, 0, 0 :: (load 4 from %ir..i0100.i, align 8, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %485, 0, 0 :: (load (s32) from %ir..i0100.i, align 8, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: KILL [[S_LOAD_DWORDX4_IMM24]] ; CHECK: KILL [[S_LOAD_DWORDX4_IMM23]] ; CHECK: %71.sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_]], implicit-def dead $scc - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc ; CHECK: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -475, implicit-def dead $scc ; CHECK: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -491, implicit-def dead $scc ; CHECK: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -507, implicit-def dead $scc ; CHECK: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -539, implicit-def dead $scc ; CHECK: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc - ; CHECK: [[SI_SPILL_S32_RESTORE:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5) + ; CHECK: [[SI_SPILL_S32_RESTORE:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.2, addrspace 5) ; CHECK: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[SI_SPILL_S32_RESTORE]], 96, implicit-def $scc ; CHECK: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc ; CHECK: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc ; CHECK: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0 :: (load 16 from %ir.351, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0 :: (load (s128) from %ir.351, addrspace 4) ; CHECK: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc ; CHECK: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0 :: (load 16 from %ir.357, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0 :: (load (s128) from %ir.357, addrspace 4) ; CHECK: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc ; CHECK: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc - ; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (load 16 from %ir.363, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) - ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (load (s128) from %ir.363, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) + ; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4) ; CHECK: KILL [[S_LOAD_DWORDX4_IMM27]] ; CHECK: KILL [[S_LOAD_DWORDX4_IMM25]] ; CHECK: KILL [[V_MOV_B32_e32_]] @@ -352,26 +352,26 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x ; CHECK: [[V_OR_B32_e32_61:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_60]], [[V_ADD_U32_e32_25]], implicit $exec ; CHECK: [[V_ADD_U32_e32_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -575, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec ; CHECK: [[V_OR_B32_e32_62:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_61]], [[V_ADD_U32_e32_26]], implicit $exec - ; CHECK: [[SI_SPILL_S32_RESTORE1:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.0, addrspace 5) - ; CHECK: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load 16 from %stack.1, align 4, addrspace 5) + ; CHECK: [[SI_SPILL_S32_RESTORE1:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5) + ; CHECK: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5) ; CHECK: undef %914.sub2_sub3:sgpr_128 = COPY [[SI_SPILL_S128_RESTORE]].sub2_sub3 { ; CHECK: internal %914.sub0:sgpr_128 = COPY [[SI_SPILL_S128_RESTORE]].sub0 ; CHECK: } ; CHECK: %914.sub1:sgpr_128 = COPY [[SI_SPILL_S32_RESTORE1]] - ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %914, 0, 0 :: (dereferenceable invariant load 4) + ; CHECK: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %914, 0, 0 :: (dereferenceable invariant load (s32)) ; CHECK: [[V_ADD_U32_e32_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec ; CHECK: [[V_OR_B32_e32_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_62]], [[V_ADD_U32_e32_27]], implicit $exec ; CHECK: [[V_ADD_U32_e32_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec ; CHECK: [[V_OR_B32_e32_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_63]], [[V_ADD_U32_e32_28]], implicit $exec ; CHECK: [[V_ADD_U32_e32_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec ; CHECK: [[V_OR_B32_e32_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_64]], [[V_ADD_U32_e32_29]], implicit $exec - ; CHECK: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0 :: (load 32 from `<8 x i32> addrspace(4)* undef`, addrspace 4) + ; CHECK: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0 :: (load (s256) from `<8 x i32> addrspace(4)* undef`, addrspace 4) ; CHECK: [[V_OR_B32_e32_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_65]], [[V_ADD_U32_e32_30]], implicit $exec ; CHECK: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc ; CHECK: [[V_OR_B32_e32_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_24]], [[V_OR_B32_e32_66]], implicit $exec ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e32_67]], implicit $exec ; CHECK: undef %691.sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec - ; CHECK: IMAGE_STORE_V4_V2_gfx10 %691, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "ImageResource") + ; CHECK: IMAGE_STORE_V4_V2_gfx10 %691, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "ImageResource") ; CHECK: S_ENDPGM 0 .expVert: %0 = extractelement <31 x i32> %userData, i64 2 diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir index fa907f7f992e7..0cf90223b8635 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir @@ -24,9 +24,9 @@ body: | ; CHECK: renamable $sgpr0_sgpr1 = IMPLICIT_DEF ; CHECK: renamable $sgpr0 = IMPLICIT_DEF ; CHECK: renamable $sgpr1 = IMPLICIT_DEF - ; CHECK: SI_SPILL_S64_SAVE renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store 8 into %stack.0, align 4, addrspace 5) + ; CHECK: SI_SPILL_S64_SAVE renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 - ; CHECK: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load 8 from %stack.0, align 4, addrspace 5) + ; CHECK: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5) ; CHECK: $sgpr105 = S_AND_B32 killed renamable $sgpr1, renamable $sgpr1, implicit-def $scc ; CHECK: S_NOP 0, implicit $sgpr104, implicit $sgpr105 %0:sreg_64 = COPY $sgpr0_sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir index c7a8fd96ea852..863631444ea65 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir +++ b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir @@ -6,8 +6,8 @@ # CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, # CHECK-NEXT: stack-id: sgpr-spill, -# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store 4 into %stack.0, addrspace 5) -# CHECK: renamable $sgpr5 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.0, addrspace 5) +# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) +# CHECK: renamable $sgpr5 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5) name: no_merge_sgpr_vgpr_spill_slot tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir index c9619a65c1a1a..af50d682841aa 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -97,7 +97,7 @@ body: | %11.sub5:sgpr_256 = COPY %11.sub0 %11.sub6:sgpr_256 = COPY %11.sub0 %11.sub7:sgpr_256 = COPY %11.sub0 - %12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4) %14:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec %15:vreg_128 = IMPLICIT_DEF S_CBRANCH_SCC1 %bb.8, implicit undef $scc @@ -163,12 +163,12 @@ body: | %18:vgpr_32 = V_MAD_F32_e64 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $mode, implicit $exec %19:vgpr_32 = V_MAD_F32_e64 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $mode, implicit $exec - %20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0 :: (dereferenceable invariant load 16) + %20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0 :: (dereferenceable invariant load (s128)) %22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $mode, implicit $exec %23:vgpr_32 = V_MAD_F32_e64 0, %18, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec %24:vgpr_32 = COPY %20.sub3 %25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $mode, implicit $exec - %26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0 :: (dereferenceable invariant load 16) + %26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0 :: (dereferenceable invariant load (s128)) %28:vgpr_32 = V_MAD_F32_e64 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $mode, implicit $exec %29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $mode, implicit $exec %30:vgpr_32 = V_RCP_F32_e32 %29, implicit $mode, implicit $exec @@ -268,7 +268,7 @@ body: | %62:vgpr_32 = V_MOV_B32_e32 1033100696, implicit $exec %63:vgpr_32 = V_MUL_F32_e32 1060575065, %15.sub1, implicit $mode, implicit $exec %63:vgpr_32 = V_MAC_F32_e32 1046066128, %15.sub0, %63, implicit $mode, implicit $exec - %64:vgpr_32 = IMAGE_LOAD_V1_V2 %60, %61, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %64:vgpr_32 = IMAGE_LOAD_V1_V2 %60, %61, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4) %64:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel) 0, %51.sub0, %64, implicit $mode, implicit $exec %65:vgpr_32 = V_MUL_F32_e32 0, %64, implicit $mode, implicit $exec %66:vgpr_32 = V_MUL_F32_e32 0, %65, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir index ee47432238476..d87bfde007aee 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir @@ -24,7 +24,7 @@ body: | ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 3) ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]] ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub1 @@ -37,7 +37,7 @@ body: | %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec bb.1: - %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load 4, addrspace 3) + %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load (s32), addrspace 3) INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) INLINEASM &"", 1, 851977, %2 INLINEASM &"", 1, 851978, def undef %0.sub0, 851978, def %0.sub1 @@ -62,7 +62,7 @@ body: | ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32), addrspace 3) ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3) ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]] ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub0 @@ -75,7 +75,7 @@ body: | %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec bb.1: - %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load 4, addrspace 3) + %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load (s32), addrspace 3) INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) INLINEASM &"", 1, 851977, %2 INLINEASM &"", 1, 851978, def %0.sub1, 851978, def undef %0.sub0 diff --git a/llvm/test/CodeGen/AMDGPU/swdev282079.mir b/llvm/test/CodeGen/AMDGPU/swdev282079.mir index d1761697d9b22..494a426b175d5 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev282079.mir +++ b/llvm/test/CodeGen/AMDGPU/swdev282079.mir @@ -22,7 +22,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $vgpr1 = V_MOV_B32_e32 1, implicit $exec @@ -31,7 +31,7 @@ body: | %1:vgpr_32 = COPY $vgpr1 %2:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 %3:vreg_64_align2 = IMPLICIT_DEF - FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) S_ENDPGM 0 ... @@ -54,7 +54,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK: FLAT_STORE_DWORDX2 killed [[REG_SEQUENCE]], killed [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; CHECK: FLAT_STORE_DWORDX2 killed [[REG_SEQUENCE]], killed [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $vgpr1 = V_MOV_B32_e32 1, implicit $exec @@ -64,7 +64,7 @@ body: | %2:vgpr_32 = COPY %0 %3:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %2, %subreg.sub1 %4:vreg_64_align2 = IMPLICIT_DEF - FLAT_STORE_DWORDX2 killed %3, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + FLAT_STORE_DWORDX2 killed %3, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) S_ENDPGM 0 ... @@ -87,7 +87,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[COPY]], %subreg.sub1 ; CHECK: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; CHECK: FLAT_STORE_DWORDX2 killed [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) ; CHECK: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $vgpr1 = V_MOV_B32_e32 1, implicit $exec @@ -96,7 +96,7 @@ body: | %1:vgpr_32 = COPY $vgpr0 %2:vreg_64_align2 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 %3:vreg_64_align2 = IMPLICIT_DEF - FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + FLAT_STORE_DWORDX2 killed %3, killed %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64)) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/syncscopes.ll b/llvm/test/CodeGen/AMDGPU/syncscopes.ll index 2a7c87ea33850..c960f59456b8c 100644 --- a/llvm/test/CodeGen/AMDGPU/syncscopes.ll +++ b/llvm/test/CodeGen/AMDGPU/syncscopes.ll @@ -1,9 +1,9 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-after=si-late-branch-lowering < %s | FileCheck --check-prefix=GCN %s ; GCN-LABEL: name: syncscopes -; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out) -; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out) -; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) +; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst (s32) into %ir.agent_out) +; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst (s32) into %ir.workgroup_out) +; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst (s32) into %ir.wavefront_out) define void @syncscopes( i32 %agent, i32* %agent_out, diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll index e5a019e5d04af..21d45a9c3bf58 100644 --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -25,7 +25,7 @@ define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float ; GCN: bb.2.else: ; GCN: successors: ; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_WAITCNT 3952 ; GCN: bb.3: entry: @@ -62,7 +62,7 @@ define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, ; GCN: bb.4.else: ; GCN: successors: ; GCN: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_WAITCNT 3952 ; GCN: bb.5: entry: diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir index fb81b04e7fdeb..4e513c4912fd0 100644 --- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir @@ -24,7 +24,7 @@ body: | ; CHECK-LABEL: name: unallocatable_clause_bundle ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: renamable $sgpr4 = COPY $sgpr0 - ; CHECK: SI_SPILL_S128_SAVE $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5) + ; CHECK: SI_SPILL_S128_SAVE $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5) ; CHECK: renamable $sgpr5 = S_MOV_B32 0 ; CHECK: renamable $sgpr76 = COPY renamable $sgpr5 ; CHECK: renamable $sgpr77 = COPY renamable $sgpr5 @@ -41,44 +41,44 @@ body: | ; CHECK: renamable $sgpr14 = COPY renamable $sgpr5 ; CHECK: renamable $sgpr15 = COPY renamable $sgpr5 ; CHECK: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1 - ; CHECK: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0 :: (dereferenceable load 32, addrspace 6) - ; CHECK: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load 16, addrspace 6) + ; CHECK: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) ; CHECK: renamable $sgpr0 = S_MOV_B32 1200 ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5 - ; CHECK: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load 32, addrspace 6) - ; CHECK: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load 16, addrspace 6) + ; CHECK: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1 ; CHECK: renamable $sgpr0 = S_MOV_B32 1264 ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5 - ; CHECK: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load 32, addrspace 6) - ; CHECK: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load 16, addrspace 6) + ; CHECK: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1 ; CHECK: renamable $sgpr0 = S_MOV_B32 1328 ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5 - ; CHECK: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load 32, addrspace 6) - ; CHECK: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load 16, addrspace 6) + ; CHECK: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1 - ; CHECK: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0 :: (dereferenceable load 32, addrspace 6) + ; CHECK: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0 :: (dereferenceable load (s256), addrspace 6) ; CHECK: renamable $sgpr0 = S_MOV_B32 1392 ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5 - ; CHECK: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load 32, addrspace 6) + ; CHECK: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s256), addrspace 6) ; CHECK: renamable $sgpr2 = S_MOV_B32 1456 ; CHECK: renamable $sgpr3 = COPY renamable $sgpr5 - ; CHECK: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0 :: (dereferenceable load 32, addrspace 6) + ; CHECK: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0 :: (dereferenceable load (s256), addrspace 6) ; CHECK: renamable $sgpr4 = S_MOV_B32 1520 - ; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0 :: (load 16, addrspace 6) - ; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load 16, addrspace 6) - ; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load 16, addrspace 6) - ; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - ; CHECK: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 16 from %stack.0, align 4, addrspace 5) - ; CHECK: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + ; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0 :: (load (s128), addrspace 6) + ; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) + ; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) + ; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + ; CHECK: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5) + ; CHECK: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) ; CHECK: KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 ; CHECK: KILL killed renamable $sgpr92_sgpr93_sgpr94_sgpr95 ; CHECK: KILL killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 @@ -121,40 +121,40 @@ body: | %5.sub6:sgpr_256 = COPY %1.sub1 %5.sub7:sgpr_256 = COPY %1.sub1 %6:vreg_64 = COPY %4 - %7:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1088, 0 :: (dereferenceable load 32, addrspace 6) - %8:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0 :: (load 16, addrspace 6) + %7:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1088, 0 :: (dereferenceable load (s256), addrspace 6) + %8:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0 :: (load (s128), addrspace 6) undef %9.sub0:sreg_64_xexec = S_MOV_B32 1200 %9.sub1:sreg_64_xexec = COPY %1.sub1 - %10:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1152, 0 :: (dereferenceable load 32, addrspace 6) - %11:sgpr_128 = S_LOAD_DWORDX4_IMM %9, 0, 0 :: (load 16, addrspace 6) + %10:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1152, 0 :: (dereferenceable load (s256), addrspace 6) + %11:sgpr_128 = S_LOAD_DWORDX4_IMM %9, 0, 0 :: (load (s128), addrspace 6) undef %12.sub0:sreg_64_xexec = S_MOV_B32 1264 %12.sub1:sreg_64_xexec = COPY %1.sub1 - %13:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1216, 0 :: (dereferenceable load 32, addrspace 6) - %14:sgpr_128 = S_LOAD_DWORDX4_IMM %12, 0, 0 :: (load 16, addrspace 6) + %13:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1216, 0 :: (dereferenceable load (s256), addrspace 6) + %14:sgpr_128 = S_LOAD_DWORDX4_IMM %12, 0, 0 :: (load (s128), addrspace 6) undef %15.sub0:sreg_64_xexec = S_MOV_B32 1328 %15.sub1:sreg_64_xexec = COPY %1.sub1 - %16:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1280, 0 :: (dereferenceable load 32, addrspace 6) - %17:sgpr_128 = S_LOAD_DWORDX4_IMM %15, 0, 0 :: (load 16, addrspace 6) - %18:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1344, 0 :: (dereferenceable load 32, addrspace 6) + %16:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1280, 0 :: (dereferenceable load (s256), addrspace 6) + %17:sgpr_128 = S_LOAD_DWORDX4_IMM %15, 0, 0 :: (load (s128), addrspace 6) + %18:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1344, 0 :: (dereferenceable load (s256), addrspace 6) undef %19.sub0:sreg_64_xexec = S_MOV_B32 1392 %19.sub1:sreg_64_xexec = COPY %1.sub1 - %20:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 0, 0 :: (load 32, addrspace 6) + %20:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 0, 0 :: (load (s256), addrspace 6) undef %21.sub0:sreg_64_xexec = S_MOV_B32 1456 %21.sub1:sreg_64_xexec = COPY %1.sub1 - %22:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1472, 0 :: (dereferenceable load 32, addrspace 6) + %22:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 1472, 0 :: (dereferenceable load (s256), addrspace 6) %1.sub0:sgpr_64 = S_MOV_B32 1520 - %23:sgpr_128 = S_LOAD_DWORDX4_IMM %21, 0, 0 :: (load 16, addrspace 6) - %24:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0 :: (load 16, addrspace 6) - %25:sgpr_128 = S_LOAD_DWORDX4_IMM %19, 0, 0 :: (load 16, addrspace 6) - %26:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %5, %3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - %27:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %7, %8, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - %28:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %10, %11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - %29:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %13, %14, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - %30:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %16, %17, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - %31:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %20, %23, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - %32:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %22, %24, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - %33:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %18, %25, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") - %34:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN %0, %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4) + %23:sgpr_128 = S_LOAD_DWORDX4_IMM %21, 0, 0 :: (load (s128), addrspace 6) + %24:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 0, 0 :: (load (s128), addrspace 6) + %25:sgpr_128 = S_LOAD_DWORDX4_IMM %19, 0, 0 :: (load (s128), addrspace 6) + %26:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %5, %3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %27:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %7, %8, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %28:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %10, %11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %29:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %13, %14, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %30:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %16, %17, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %31:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %20, %23, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %32:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %22, %24, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %33:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 %6, %18, %25, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") + %34:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN %0, %2, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) %35:vgpr_32 = nofpexcept V_MAX_F32_e32 %26, %27, implicit $mode, implicit $exec %36:vgpr_32 = V_MAX3_F32_e64 0, %35, 0, %28, 0, %29, 0, 0, implicit $mode, implicit $exec %37:vgpr_32 = nofpexcept V_ADD_F32_e32 -1083321614, %31, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir index 8c75b1f78e951..103a8c025fe23 100644 --- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -44,12 +44,12 @@ body: | liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec - $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) + $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load (s8) from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec $sgpr0_sgpr1 = COPY $exec, implicit-def $exec - SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5) + SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) $sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 S_CBRANCH_EXECZ %bb.2, implicit $exec @@ -68,7 +68,7 @@ body: | successors: liveins: $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 - $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5) + $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load (s64) from %stack.0, align 4, addrspace 5) $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc bb.3: @@ -84,7 +84,7 @@ body: | # CHECK-LABEL: {{^}}name: undefined_physreg_sgpr_spill_reorder # CHECK: $sgpr0_sgpr1 = COPY $exec, implicit-def $exec # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc -# CHECK: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5) +# CHECK: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) # CHECK: $exec = COPY killed $sgpr2_sgpr3 name: undefined_physreg_sgpr_spill_reorder alignment: 1 @@ -109,13 +109,13 @@ body: | liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec - $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) + $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load (s8) from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec $sgpr0_sgpr1 = COPY $exec, implicit-def $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc - SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5) + SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store (s64) into %stack.0, align 4, addrspace 5) $exec = S_MOV_B64_term killed $sgpr2_sgpr3 S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 @@ -133,7 +133,7 @@ body: | successors: liveins: $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 - $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5) + $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load (s64) from %stack.0, align 4, addrspace 5) $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc bb.3: diff --git a/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir index ea645992c21a3..a77e720065e43 100644 --- a/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir +++ b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir @@ -16,9 +16,9 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc - ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc - ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc %0:vgpr_32 = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll index c8bd158cb1755..d1ab4cb03e3f7 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll @@ -5,7 +5,7 @@ ; feature, and instead generates a slection error. ; SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.load.1d -; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "ImageResource") (in function: load_1d) +; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(<8 x s32>), 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") (in function: load_1d) define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll index cf248a35cc824..d50d9166d708c 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll @@ -8,7 +8,7 @@ ; generates a selection error. ; SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.sample.d.1d -; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s32), %{{[0-9]+}}:_(<8 x s32>), %{{[0-9]+}}:_(<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "ImageResource") (in function: sample_d_1d) +; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(s32), %{{[0-9]+}}:_(<8 x s32>), %{{[0-9]+}}:_(<4 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") (in function: sample_d_1d) define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir b/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir index c41bf27288261..bdb273cba79c6 100644 --- a/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-ds-gws-align.mir @@ -1,37 +1,37 @@ # RUN: not --crash llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX90A-ERR %s # GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** -# GFX90A-ERR: DS_GWS_INIT killed %0.sub1:areg_128_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") +# GFX90A-ERR: DS_GWS_INIT killed %0.sub1:areg_128_align2, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") # GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** -# GFX90A-ERR: DS_GWS_INIT killed %0.sub3:areg_128_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") +# GFX90A-ERR: DS_GWS_INIT killed %0.sub3:areg_128_align2, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") # GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** -# GFX90A-ERR: DS_GWS_SEMA_BR killed %1.sub1:vreg_64_align2, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") +# GFX90A-ERR: DS_GWS_SEMA_BR killed %1.sub1:vreg_64_align2, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") # GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** -# GFX90A-ERR: DS_GWS_BARRIER killed %2.sub0:vreg_64, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") +# GFX90A-ERR: DS_GWS_BARRIER killed %2.sub0:vreg_64, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") # GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** -# GFX90A-ERR: DS_GWS_INIT killed %3:vgpr_32, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") +# GFX90A-ERR: DS_GWS_INIT killed %3:vgpr_32, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") # GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** -# GFX90A-ERR: DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") +# GFX90A-ERR: DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") # GFX90A-ERR: *** Bad machine code: Subtarget requires even aligned vector registers for DS_GWS instructions *** -# GFX90A-ERR: DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") +# GFX90A-ERR: DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") --- name: gws_odd_vgpr body: | bb.0: %0:areg_128_align2 = IMPLICIT_DEF - DS_GWS_INIT killed %0.sub1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") + DS_GWS_INIT killed %0.sub1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") %0:areg_128_align2 = IMPLICIT_DEF - DS_GWS_INIT killed %0.sub3, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") + DS_GWS_INIT killed %0.sub3, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") %1:vreg_64_align2 = IMPLICIT_DEF - DS_GWS_SEMA_BR killed %1.sub1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") + DS_GWS_SEMA_BR killed %1.sub1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") %2:vreg_64 = IMPLICIT_DEF - DS_GWS_BARRIER killed %2.sub0, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") + DS_GWS_BARRIER killed %2.sub0, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") %3:vgpr_32 = IMPLICIT_DEF - DS_GWS_INIT killed %3, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") + DS_GWS_INIT killed %3, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") $vgpr1 = IMPLICIT_DEF - DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") + DS_GWS_INIT $vgpr1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") $agpr1 = IMPLICIT_DEF - DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store 4 into custom "GWSResource") + DS_GWS_INIT $agpr1, 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource") S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir index 9606d812a40bf..76a6baf139868 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-dead-frame-in-dbg-value.mir @@ -48,9 +48,9 @@ body: | ; CHECK: S_ENDPGM 0 bb.0: $vgpr2 = IMPLICIT_DEF - SI_SPILL_V32_SAVE $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, align 4, addrspace 5) + SI_SPILL_V32_SAVE $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5) DBG_VALUE %stack.0, 0, !1, !8, debug-location !9 bb.1: - renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5) + renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir index 2422fe046ebfb..ace372280b7e1 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir @@ -16,9 +16,9 @@ body: | ; CHECK-LABEL: name: spill_v32 ; CHECK: liveins: $vgpr0 - ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; CHECK: S_NOP 0, implicit $vgpr0 - SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) S_NOP 0, implicit $vgpr0 ... @@ -37,8 +37,8 @@ body: | ; CHECK-LABEL: name: spill_v32_kill ; CHECK: liveins: $vgpr0 - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ... --- @@ -56,10 +56,10 @@ body: | ; CHECK-LABEL: name: spill_v64 ; CHECK: liveins: $vgpr0_vgpr1 - ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) - ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; CHECK: S_NOP 0, implicit $vgpr0_vgpr1 - SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) S_NOP 0, implicit $vgpr0_vgpr1 ... @@ -78,9 +78,9 @@ body: | ; CHECK-LABEL: name: spill_v64_kill ; CHECK: liveins: $vgpr0_vgpr1 - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) ... # Make sure there's no verifier error on the undef spill component when the value is killed. @@ -100,9 +100,9 @@ body: | ; CHECK-LABEL: name: spill_v64_undef_sub1_killed ; CHECK: liveins: $vgpr0 - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) ... --- @@ -120,9 +120,9 @@ body: | ; CHECK-LABEL: name: spill_v64_undef_sub0_killed ; CHECK: liveins: $vgpr1 - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) - SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) ... --- @@ -140,9 +140,9 @@ body: | ; CHECK-LABEL: name: spill_v128_kill ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) - ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) - SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5) + SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5) ... diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir index f5eab81eab5e1..67676e6dcefc1 100644 --- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir @@ -29,7 +29,7 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: undef_identity_copy - ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1) + ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1) ; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 ; CHECK: $sgpr4 = COPY $sgpr95 @@ -44,9 +44,9 @@ body: | ; CHECK: $vgpr3 = KILL undef renamable $vgpr3 ; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0 ; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 - ; CHECK: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; CHECK: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; CHECK: S_ENDPGM 0 - %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1) + %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1) %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95, implicit-def $scc $sgpr4 = COPY $sgpr95 @@ -62,7 +62,7 @@ body: | dead $sgpr30_sgpr31 = SI_CALL %3, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0 %5:vgpr_32 = COPY $vgpr0 ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 - FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir index 6ffedea5a9f04..cb0061515c037 100644 --- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir @@ -340,6 +340,6 @@ body: | $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = IMPLICIT_DEF - $vgpr3 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst 4, addrspace 1) + $vgpr3 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32), addrspace 1) $exec_lo = S_MOV_B32 -1 ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir index 96c61fc1c6f1c..3c213ff0031fd 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir @@ -59,8 +59,8 @@ body: | ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: S_WAITCNT 0 - ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4, addrspace 1) - ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16, addrspace 1) + ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4, addrspace 1) + ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1) ; GCN: S_WAITCNT 3953 ; GCN: $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec ; GCN: S_BRANCH %bb.1 @@ -68,33 +68,33 @@ body: | ; GCN: successors: %bb.2(0x80000000) ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr ; GCN: S_WAITCNT 3952 - ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16, addrspace 1) + ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16, addrspace 1) ; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec ; GCN: S_BRANCH %bb.2 ; GCN: bb.2: ; GCN: S_WAITCNT 49279 - ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) + ; GCN: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) ; GCN: S_WAITCNT 3952 - ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) + ; GCN: $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16) ; GCN: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec ; GCN: S_ENDPGM 0 bb.0: successors: %bb.1 - $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) - $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) + $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4) + $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16) $agpr0 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec S_BRANCH %bb.1 bb.1: successors: %bb.2 $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr - $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) + $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16) $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec S_BRANCH %bb.2 bb.2: - $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) - $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) + $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) + $agpr4_agpr5_agpr6_agpr7 = FLAT_LOAD_DWORDX4 $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16) $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir index 7945db91c1ab3..9619808755140 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir @@ -13,8 +13,8 @@ body: | $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2 $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2 - $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) + $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32) from `float addrspace(1)* null`, addrspace 1) + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32) from `float addrspace(1)* null`, addrspace 1) $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec $vgpr3 = V_CNDMASK_B32_e64 0, -1082130432, 0, 1065353216, killed $sgpr0_sgpr1, implicit $exec $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec @@ -23,7 +23,7 @@ body: | bb.3: successors: %bb.1 - $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) + $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32) from `float addrspace(1)* null`, addrspace 1) bb.1: successors: %bb.5, %bb.2 @@ -43,7 +43,7 @@ body: | bb.4: successors: %bb.3, %bb.1 - $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) + $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32) from `float addrspace(1)* null`, addrspace 1) $vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $mode, implicit $exec V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir index 8fc384a752009..d8555a1f15770 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir @@ -34,7 +34,7 @@ body: | name: waitcnt-no-war-wait body: | bb.0: - renamable $sgpr8 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr0_sgpr1_sgpr2_sgpr3, 276, 0 :: (dereferenceable invariant load 4) - TBUFFER_STORE_FORMAT_X_OFFEN_exact killed renamable $vgpr0, renamable $vgpr15, renamable $sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr9, 0, 116, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4) + renamable $sgpr8 = S_BUFFER_LOAD_DWORD_IMM renamable $sgpr0_sgpr1_sgpr2_sgpr3, 276, 0 :: (dereferenceable invariant load (s32)) + TBUFFER_STORE_FORMAT_X_OFFEN_exact killed renamable $vgpr0, renamable $vgpr15, renamable $sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr9, 0, 116, 1, 0, 0, implicit $exec :: (dereferenceable store (s32) into custom "BufferResource", align 1, addrspace 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir index 3b8027de6c7d0..4f403a01e1c03 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir @@ -44,10 +44,10 @@ body: | ; GFX9-LABEL: name: gather_gather ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; GFX9: S_WAITCNT 0 - ; GFX9: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - ; GFX9: $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + ; GFX9: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + ; GFX9: $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr0_vgpr1_vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + $vgpr13_vgpr14_vgpr15_vgpr16 = IMAGE_GATHER4_LZ_O_V4_V3 $vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) ... # Image load vs image sample. Waitcnt required because they are not guaranteed @@ -62,9 +62,9 @@ body: | ; GFX9-LABEL: name: nosampler_sampler ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: S_WAITCNT 0 - ; GFX9: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) + ; GFX9: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) ; GFX9: S_WAITCNT 3952 - ; GFX9: $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 16) - $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16) - $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 16) + ; GFX9: $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s128)) + $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) + $vgpr4 = IMAGE_SAMPLE_L_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 8, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s128)) ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir index 24b1afeece130..80d87c7b7a6a9 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.mir @@ -10,8 +10,8 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr0_sgpr1 - $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr4 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) S_WAITCNT_VSCNT undef $sgpr_null, 0 - $vgpr0 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst 4, addrspace 1) + $vgpr0 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32), addrspace 1) S_CMP_LG_U32 killed $sgpr4, 0, implicit-def $scc ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir index 0b79a092c678e..96bd9b3ff6f24 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir @@ -87,34 +87,34 @@ name: flat_zero_waitcnt body: | bb.0: successors: %bb.1 - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) - $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4) + $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16) $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec S_BRANCH %bb.1 bb.1: successors: %bb.2 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr - $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) + $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.global16) $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec S_BRANCH %bb.2 bb.2: successors: %bb.3 - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) - $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) + $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %ir.flat16) $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec S_BRANCH %bb.3 bb.3: successors: %bb.4 - $vgpr3 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) - $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) + $vgpr3 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) + $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.global4) $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec S_BRANCH %bb.4 bb.4: - $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) + $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.flat4) $vgpr0 = V_MOV_B32_e32 $vgpr5, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir index 78ee39de8d99e..36e7c21c1f938 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm.mir @@ -210,14 +210,14 @@ body: | undef %7.sub0:vreg_64 = COPY %2:vgpr_32 %7.sub1:vreg_64 = COPY %3:vgpr_32 - %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) + %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %7:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) S_CMP_EQ_U32 %8:sgpr_32, 0, implicit-def $scc undef %5.sub0:vreg_64 = nsz arcp nofpexcept V_ADD_F32_e64 0, %4.sub0:vreg_128, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec %5.sub1:vreg_64 = nsz arcp nofpexcept V_MUL_F32_e32 %2, %3, implicit $mode, implicit $exec %6:vgpr_32 = nsz arcp nofpexcept V_ADD_F32_e64 0, %2:vgpr_32, 0, %3:vgpr_32, 1, 0, implicit $mode, implicit $exec - %9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) + %9:vreg_128 = IMAGE_SAMPLE_V4_V2 %5:vreg_64, %100:sgpr_256, %101:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4) S_CBRANCH_SCC0 %bb.2, implicit $scc @@ -293,7 +293,7 @@ body: | bb.4: %3:sgpr_128 = IMPLICIT_DEF - %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") + %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") $vgpr0 = COPY %4.sub0:vreg_128 $vgpr1 = COPY %4.sub1:vreg_128 SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index 5fd63a573c33b..6a6da04807cf3 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -255,7 +255,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[COPY1]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRH [[ANDri]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: STRH [[ANDri]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -265,7 +265,7 @@ body: | %3(s16) = G_ZEXT %2(s1) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -290,7 +290,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[COPY1]], 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: [[RSBri:%[0-9]+]]:gpr = RSBri [[ANDri]], 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRH [[RSBri]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: STRH [[RSBri]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -300,7 +300,7 @@ body: | %3(s16) = G_SEXT %2(s1) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -323,7 +323,7 @@ body: | ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 - ; CHECK: STRH [[COPY1]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: STRH [[COPY1]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -333,7 +333,7 @@ body: | %3(s16) = G_ANYEXT %2(s1) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -358,7 +358,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:gprnopc = COPY [[COPY1]] ; CHECK: [[UXTB:%[0-9]+]]:gprnopc = UXTB [[COPY2]], 0, 14 /* CC::al */, $noreg - ; CHECK: STRH [[UXTB]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: STRH [[UXTB]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -368,7 +368,7 @@ body: | %3(s16) = G_ZEXT %2(s8) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -393,7 +393,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:gprnopc = COPY [[COPY1]] ; CHECK: [[SXTB:%[0-9]+]]:gprnopc = SXTB [[COPY2]], 0, 14 /* CC::al */, $noreg - ; CHECK: STRH [[SXTB]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: STRH [[SXTB]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -403,7 +403,7 @@ body: | %3(s16) = G_SEXT %2(s8) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -426,7 +426,7 @@ body: | ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 - ; CHECK: STRH [[COPY1]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: STRH [[COPY1]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -436,7 +436,7 @@ body: | %3(s16) = G_ANYEXT %2(s8) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -460,7 +460,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[ANDri:%[0-9]+]]:gprnopc = ANDri [[COPY1]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRBi12 [[ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) + ; CHECK: STRBi12 [[ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -470,7 +470,7 @@ body: | %3(s8) = G_ZEXT %2(s1) - G_STORE %3(s8), %0(p0) :: (store 1) + G_STORE %3(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... @@ -495,7 +495,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[COPY1]], 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: [[RSBri:%[0-9]+]]:gprnopc = RSBri [[ANDri]], 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRBi12 [[RSBri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) + ; CHECK: STRBi12 [[RSBri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -505,7 +505,7 @@ body: | %3(s8) = G_SEXT %2(s1) - G_STORE %3(s8), %0(p0) :: (store 1) + G_STORE %3(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... @@ -529,7 +529,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:gprnopc = COPY [[COPY1]] - ; CHECK: STRBi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) + ; CHECK: STRBi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -539,7 +539,7 @@ body: | %3(s8) = G_ANYEXT %2(s1) - G_STORE %3(s8), %0(p0) :: (store 1) + G_STORE %3(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... @@ -935,22 +935,22 @@ body: | ; CHECK-LABEL: name: test_load_from_stack ; CHECK: [[ADDri:%[0-9]+]]:gpr = ADDri %fixed-stack.0, 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[LDRi12_:%[0-9]+]]:gpr = LDRi12 [[ADDri]], 0, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[LDRi12_:%[0-9]+]]:gpr = LDRi12 [[ADDri]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[LDRi12_]] ; CHECK: [[ADDri1:%[0-9]+]]:gpr = ADDri %fixed-stack.2, 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[LDRBi12_:%[0-9]+]]:gprnopc = LDRBi12 [[ADDri1]], 0, 14 /* CC::al */, $noreg :: (load 1) + ; CHECK: [[LDRBi12_:%[0-9]+]]:gprnopc = LDRBi12 [[ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s1)) ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY [[LDRBi12_]] ; CHECK: $r0 = COPY [[COPY]] ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = G_FRAME_INDEX %fixed-stack.2 - %1(s32) = G_LOAD %0(p0) :: (load 4) + %1(s32) = G_LOAD %0(p0) :: (load (s32)) $r0 = COPY %1 %2(p0) = G_FRAME_INDEX %fixed-stack.0 - %3(s1) = G_LOAD %2(p0) :: (load 1) + %3(s1) = G_LOAD %2(p0) :: (load (s1)) %4(s32) = G_ANYEXT %3(s1) @@ -978,10 +978,10 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:gprnopc = COPY [[COPY1]] ; CHECK: [[ANDri:%[0-9]+]]:gprnopc = ANDri [[COPY1]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRBi12 [[ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) - ; CHECK: STRBi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) - ; CHECK: STRH [[COPY1]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store 2) - ; CHECK: STRi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRBi12 [[ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s1)) + ; CHECK: STRBi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) + ; CHECK: STRH [[COPY1]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) + ; CHECK: STRi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -993,13 +993,13 @@ body: | %2(s16) = G_TRUNC %3(s32) - G_STORE %4(s1), %0(p0) :: (store 1) + G_STORE %4(s1), %0(p0) :: (store (s1)) - G_STORE %1(s8), %0(p0) :: (store 1) + G_STORE %1(s8), %0(p0) :: (store (s8)) - G_STORE %2(s16), %0(p0) :: (store 2) + G_STORE %2(s16), %0(p0) :: (store (s16)) - G_STORE %3(s32), %0(p0) :: (store 4) + G_STORE %3(s32), %0(p0) :: (store (s32)) BX_RET 14, $noreg ... @@ -1117,11 +1117,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_pointer_constant_constrained ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 [[MOVi]], [[MOVi]], 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 [[MOVi]], [[MOVi]], 0, 14 /* CC::al */, $noreg :: (store (p0)) %0(p0) = G_CONSTANT i32 0 ; This constrains %0 before the G_CONSTANT is selected. - G_STORE %0(p0), %0(p0) :: (store 4) + G_STORE %0(p0), %0(p0) :: (store (p0)) ... --- name: test_inttoptr_s32 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index e0285faef70e8..8eaba740e3d2f 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -146,7 +146,7 @@ define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5 ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: [[VREGP2:%[0-9]+]]:_(s32) = COPY $r2 ; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]]{{.*}}load 4 +; CHECK: [[VREGP5:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]]{{.*}}load (s32) ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[VREGP2]], [[VREGP5]] ; CHECK: $r0 = COPY [[SUM]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -165,7 +165,7 @@ define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK: [[VREGR1:%[0-9]+]]:_(s32) = COPY $r1 ; CHECK: [[VREGP1:%[0-9]+]]:_(s16) = G_TRUNC [[VREGR1]] ; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5EXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 +; CHECK: [[VREGP5EXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load (s32) ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[VREGP5EXT]], 16 ; CHECK: [[VREGP5:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]] ; CHECK: [[SUM:%[0-9]+]]:_(s16) = G_ADD [[VREGP1]], [[VREGP5]] @@ -187,7 +187,7 @@ define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK: [[VREGR2:%[0-9]+]]:_(s32) = COPY $r2 ; CHECK: [[VREGP2:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR2]] ; CHECK: [[FIP4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P4]] -; CHECK: [[VREGP4EXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP4]](p0){{.*}}load 4 +; CHECK: [[VREGP4EXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP4]](p0){{.*}}load (s32) ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[VREGP4EXT]], 8 ; CHECK: [[VREGP4:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]] ; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[VREGP2]], [[VREGP4]] @@ -209,7 +209,7 @@ define i8 @test_stack_args_noext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK: [[VREGR2:%[0-9]+]]:_(s32) = COPY $r2 ; CHECK: [[VREGP2:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR2]] ; CHECK: [[FIP4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P4]] -; CHECK: [[VREGP4:%[0-9]+]]:_(s32) = G_LOAD [[FIP4]](p0){{.*}}load 4 +; CHECK: [[VREGP4:%[0-9]+]]:_(s32) = G_LOAD [[FIP4]](p0){{.*}}load (s32) ; CHECK: [[TRUNC_VREGP4:%[0-9]+]]:_(s8) = G_TRUNC [[VREGP4]] ; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[VREGP2]], [[TRUNC_VREGP4]] ; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] @@ -228,7 +228,7 @@ define zeroext i16 @test_stack_args_extend_the_extended(i32 %p0, i16 %p1, i8 %p2 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4, alignment: 4 ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5SEXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 +; CHECK: [[VREGP5SEXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load (s32) ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[VREGP5SEXT]], 16 ; CHECK: [[VREGP5:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]] ; CHECK: [[VREGP5ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[VREGP5]] @@ -242,7 +242,7 @@ define i16 @test_ptr_arg(i16* %p) { ; CHECK-LABEL: name: test_ptr_arg ; CHECK: liveins: $r0 ; CHECK: [[VREGP:%[0-9]+]]:_(p0) = COPY $r0 -; CHECK: [[VREGV:%[0-9]+]]:_(s16) = G_LOAD [[VREGP]](p0){{.*}}load 2 +; CHECK: [[VREGV:%[0-9]+]]:_(s16) = G_LOAD [[VREGP]](p0){{.*}}load (s16) entry: %v = load i16, i16* %p ret i16 %v @@ -253,7 +253,7 @@ define i32* @test_ptr_ret(i32** %p) { ; CHECK-LABEL: name: test_ptr_ret ; CHECK: liveins: $r0 ; CHECK: [[VREGP:%[0-9]+]]:_(p0) = COPY $r0 -; CHECK: [[VREGV:%[0-9]+]]:_(p0) = G_LOAD [[VREGP]](p0){{.*}}load 4 +; CHECK: [[VREGV:%[0-9]+]]:_(p0) = G_LOAD [[VREGP]](p0){{.*}}load (p0) ; CHECK: $r0 = COPY [[VREGV]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 entry: @@ -267,8 +267,8 @@ define i32 @test_ptr_arg_on_stack(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32* %p) { ; CHECK: id: [[P:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: [[FIP:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P]] -; CHECK: [[VREGP:%[0-9]+]]:_(p0) = G_LOAD [[FIP]](p0){{.*}}load 4 -; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_LOAD [[VREGP]](p0){{.*}}load 4 +; CHECK: [[VREGP:%[0-9]+]]:_(p0) = G_LOAD [[FIP]](p0){{.*}}load (s32) +; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_LOAD [[VREGP]](p0){{.*}}load (s32) ; CHECK: $r0 = COPY [[VREGV]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 entry: @@ -285,7 +285,7 @@ define arm_aapcscc float @test_float_aapcscc(float %p0, float %p1, float %p2, ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: [[VREGP1:%[0-9]+]]:_(s32) = COPY $r1 ; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 +; CHECK: [[VREGP5:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load (s32) ; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_FADD [[VREGP1]], [[VREGP5]] ; CHECK: $r0 = COPY [[VREGV]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -314,7 +314,7 @@ define arm_aapcs_vfpcc float @test_float_vfpcc(float %p0, float %p1, float %p2, ; CHECK: liveins: $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7, $s8, $s9, $s10, $s11, $s12, $s13, $s14, $s15 ; CHECK: [[VREGP1:%[0-9]+]]:_(s32) = COPY $s1 ; CHECK: [[FIQ1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]]:_(s32) = G_LOAD [[FIQ1]](p0){{.*}}load 4 +; CHECK: [[VREGQ1:%[0-9]+]]:_(s32) = G_LOAD [[FIQ1]](p0){{.*}}load (s32) ; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: $s0 = COPY [[VREGV]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0 @@ -335,7 +335,7 @@ define arm_aapcs_vfpcc double @test_double_vfpcc(double %p0, double %p1, double ; CHECK: liveins: $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7 ; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = COPY $d1 ; CHECK: [[FIQ1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]]:_(s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 +; CHECK: [[VREGQ1:%[0-9]+]]:_(s64) = G_LOAD [[FIQ1]](p0){{.*}}load (s64) ; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: $d0 = COPY [[VREGV]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 @@ -358,7 +358,7 @@ define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p ; LITTLE: [[VREGP1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP1LO]](s32), [[VREGP1HI]](s32) ; BIG: [[VREGP1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP1HI]](s32), [[VREGP1LO]](s32) ; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]]:_(s64) = G_LOAD [[FIP5]](p0){{.*}}load 8 +; CHECK: [[VREGP5:%[0-9]+]]:_(s64) = G_LOAD [[FIP5]](p0){{.*}}load (s64) ; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP1]], [[VREGP5]] ; LITTLE: [[VREGVLO:%[0-9]+]]:_(s32), [[VREGVHI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) @@ -383,7 +383,7 @@ define arm_aapcs_vfpcc double @test_double_gap_vfpcc(double %p0, float %filler, ; CHECK: liveins: $d0, $d2, $d3, $d4, $d5, $d6, $d7, $s2 ; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = COPY $d2 ; CHECK: [[FIQ1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]]:_(s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 +; CHECK: [[VREGQ1:%[0-9]+]]:_(s64) = G_LOAD [[FIQ1]](p0){{.*}}load (s64) ; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: $d0 = COPY [[VREGV]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 @@ -403,7 +403,7 @@ define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0, ; LITTLE: [[VREGP0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) ; BIG: [[VREGP0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) ; CHECK: [[FIP1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P1]] -; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 +; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = G_LOAD [[FIP1]](p0){{.*}}load (s64) ; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP0]], [[VREGP1]] ; LITTLE: [[VREGVLO:%[0-9]+]]:_(s32), [[VREGVHI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) @@ -426,7 +426,7 @@ define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler, ; LITTLE: [[VREGP0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) ; BIG: [[VREGP0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) ; CHECK: [[FIP1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P1]] -; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 +; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = G_LOAD [[FIP1]](p0){{.*}}load (s64) ; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP0]], [[VREGP1]] ; LITTLE: [[VREGVLO:%[0-9]+]]:_(s32), [[VREGVHI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) @@ -559,13 +559,13 @@ define void @test_load_store_struct({i32, i32} *%addr) { ; when breaking up loads and stores of aggregates. ; CHECK-LABEL: name: test_load_store_struct ; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY $r0 -; CHECK-DAG: [[VAL1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4 from %ir.addr) +; CHECK-DAG: [[VAL1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load (s32) from %ir.addr) ; CHECK-DAG: [[OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-DAG: [[ADDR2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR1]], [[OFFSET]](s32) -; CHECK-DAG: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load 4 from %ir.addr + 4) -; CHECK-DAG: G_STORE [[VAL1]](s32), [[ADDR1]](p0) :: (store 4 into %ir.addr) +; CHECK-DAG: [[VAL2:%[0-9]+]]:_(s32) = G_LOAD [[ADDR2]](p0) :: (load (s32) from %ir.addr + 4) +; CHECK-DAG: G_STORE [[VAL1]](s32), [[ADDR1]](p0) :: (store (s32) into %ir.addr) ; CHECK-DAG: [[ADDR3:%[0-9]+]]:_(p0) = COPY [[ADDR2]] -; CHECK-DAG: G_STORE [[VAL2]](s32), [[ADDR3]](p0) :: (store 4 into %ir.addr + 4) +; CHECK-DAG: G_STORE [[VAL2]](s32), [[ADDR3]](p0) :: (store (s32) into %ir.addr + 4) %val = load {i32, i32}, {i32, i32} *%addr, align 4 store {i32, i32} %val, {i32, i32} *%addr, align 4 ret void diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir index 199b72a9bb84b..c8745e7bb9723 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-binops.mir @@ -57,9 +57,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0 :: (load 1) + %1(s8) = G_LOAD %0 :: (load (s8)) %2(p0) = COPY $r0 - %3(s8) = G_LOAD %2 :: (load 1) + %3(s8) = G_LOAD %2 :: (load (s8)) %4(s8) = G_ADD %1, %3 ; G_ADD with s8 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_ADD {{%[0-9]+, %[0-9]+}} @@ -89,9 +89,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0 :: (load 2) + %1(s16) = G_LOAD %0 :: (load (s16)) %2(p0) = COPY $r0 - %3(s16) = G_LOAD %2 :: (load 2) + %3(s16) = G_LOAD %2 :: (load (s16)) %4(s16) = G_ADD %1, %3 ; G_ADD with s16 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_ADD {{%[0-9]+, %[0-9]+}} @@ -146,9 +146,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0 :: (load 1) + %1(s8) = G_LOAD %0 :: (load (s8)) %2(p0) = COPY $r0 - %3(s8) = G_LOAD %2 :: (load 1) + %3(s8) = G_LOAD %2 :: (load (s8)) %4(s8) = G_SUB %1, %3 ; G_SUB with s8 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_SUB {{%[0-9]+, %[0-9]+}} @@ -178,9 +178,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0 :: (load 2) + %1(s16) = G_LOAD %0 :: (load (s16)) %2(p0) = COPY $r0 - %3(s16) = G_LOAD %2 :: (load 2) + %3(s16) = G_LOAD %2 :: (load (s16)) %4(s16) = G_SUB %1, %3 ; G_SUB with s16 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_SUB {{%[0-9]+, %[0-9]+}} @@ -235,9 +235,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0 :: (load 1) + %1(s8) = G_LOAD %0 :: (load (s8)) %2(p0) = COPY $r0 - %3(s8) = G_LOAD %2 :: (load 1) + %3(s8) = G_LOAD %2 :: (load (s8)) %4(s8) = G_MUL %1, %3 ; G_MUL with s8 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_MUL {{%[0-9]+, %[0-9]+}} @@ -267,9 +267,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0 :: (load 2) + %1(s16) = G_LOAD %0 :: (load (s16)) %2(p0) = COPY $r0 - %3(s16) = G_LOAD %2 :: (load 2) + %3(s16) = G_LOAD %2 :: (load (s16)) %4(s16) = G_MUL %1, %3 ; G_MUL with s16 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_MUL {{%[0-9]+, %[0-9]+}} @@ -324,9 +324,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0 :: (load 1) + %1(s8) = G_LOAD %0 :: (load (s8)) %2(p0) = COPY $r0 - %3(s8) = G_LOAD %2 :: (load 1) + %3(s8) = G_LOAD %2 :: (load (s8)) %4(s8) = G_AND %1, %3 ; G_AND with s8 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_AND {{%[0-9]+, %[0-9]+}} @@ -356,9 +356,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0 :: (load 2) + %1(s16) = G_LOAD %0 :: (load (s16)) %2(p0) = COPY $r0 - %3(s16) = G_LOAD %2 :: (load 2) + %3(s16) = G_LOAD %2 :: (load (s16)) %4(s16) = G_AND %1, %3 ; G_AND with s16 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_AND {{%[0-9]+, %[0-9]+}} @@ -448,9 +448,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0 :: (load 1) + %1(s8) = G_LOAD %0 :: (load (s8)) %2(p0) = COPY $r0 - %3(s8) = G_LOAD %2 :: (load 1) + %3(s8) = G_LOAD %2 :: (load (s8)) %4(s8) = G_OR %1, %3 ; G_OR with s8 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_OR {{%[0-9]+, %[0-9]+}} @@ -480,9 +480,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0 :: (load 2) + %1(s16) = G_LOAD %0 :: (load (s16)) %2(p0) = COPY $r0 - %3(s16) = G_LOAD %2 :: (load 2) + %3(s16) = G_LOAD %2 :: (load (s16)) %4(s16) = G_OR %1, %3 ; G_OR with s16 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_OR {{%[0-9]+, %[0-9]+}} @@ -572,9 +572,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0 :: (load 1) + %1(s8) = G_LOAD %0 :: (load (s8)) %2(p0) = COPY $r0 - %3(s8) = G_LOAD %2 :: (load 1) + %3(s8) = G_LOAD %2 :: (load (s8)) %4(s8) = G_XOR %1, %3 ; G_XOR with s8 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_XOR {{%[0-9]+, %[0-9]+}} @@ -604,9 +604,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0 :: (load 2) + %1(s16) = G_LOAD %0 :: (load (s16)) %2(p0) = COPY $r0 - %3(s16) = G_LOAD %2 :: (load 2) + %3(s16) = G_LOAD %2 :: (load (s16)) %4(s16) = G_XOR %1, %3 ; G_XOR with s16 should widen ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_XOR {{%[0-9]+, %[0-9]+}} diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-cmp.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-cmp.mir index dd06933603c2c..c03e985f09c3b 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-cmp.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-cmp.mir @@ -27,9 +27,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0 :: (load 1) + %1(s8) = G_LOAD %0 :: (load (s8)) %2(p0) = COPY $r1 - %3(s8) = G_LOAD %2 :: (load 1) + %3(s8) = G_LOAD %2 :: (load (s8)) %4(s1) = G_ICMP intpred(ne), %1(s8), %3 ; G_ICMP with s8 should widen ; CHECK: {{%[0-9]+}}:_(s1) = G_ICMP intpred(ne), {{%[0-9]+}}(s32), {{%[0-9]+}} @@ -58,9 +58,9 @@ body: | liveins: $r0, $r1 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0 :: (load 2) + %1(s16) = G_LOAD %0 :: (load (s16)) %2(p0) = COPY $r1 - %3(s16) = G_LOAD %2 :: (load 2) + %3(s16) = G_LOAD %2 :: (load (s16)) %4(s1) = G_ICMP intpred(slt), %1(s16), %3 ; G_ICMP with s16 should widen ; CHECK: {{%[0-9]+}}:_(s1) = G_ICMP intpred(slt), {{%[0-9]+}}(s32), {{%[0-9]+}} diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir index 9d66209211058..e1c972855a394 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir @@ -28,28 +28,28 @@ body: | ; CHECK: {{%[0-9]+}}:_(s32) = G_CONSTANT i32 42 %1(s16) = G_CONSTANT i16 21 - G_STORE %1(s16), %4(p0) :: (store 2) + G_STORE %1(s16), %4(p0) :: (store (s16)) ; CHECK-NOT: G_CONSTANT i16 ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 ; CHECK: {{%[0-9]+}}:_(s16) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i16 %2(s8) = G_CONSTANT i8 10 - G_STORE %2(s8), %4(p0) :: (store 1) + G_STORE %2(s8), %4(p0) :: (store (s8)) ; CHECK-NOT: G_CONSTANT i8 ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 ; CHECK: {{%[0-9]+}}:_(s8) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i8 %3(s1) = G_CONSTANT i1 1 - G_STORE %3(s1), %4(p0) :: (store 1) + G_STORE %3(s1), %4(p0) :: (store (s1)) ; CHECK-NOT: G_CONSTANT i1 ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: {{%[0-9]+}}:_(s1) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i1 %5(p0) = G_CONSTANT i32 0 - G_STORE %5(p0), %4(p0) :: (store 4) + G_STORE %5(p0), %4(p0) :: (store (p0)) ; CHECK: {{%[0-9]+}}:_(p0) = G_CONSTANT i32 0 $r0 = COPY %0(s32) diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir index 1f673b85068b7..c3fb95c995730 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir @@ -30,7 +30,7 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0 :: (load 2) + %1(s16) = G_LOAD %0 :: (load (s16)) %2(s32) = G_ZEXT %1 ; G_ZEXT with s16 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_ZEXT {{%[0-9]+}} @@ -54,7 +54,7 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0(p0) :: (load 1) + %1(s8) = G_LOAD %0(p0) :: (load (s8)) %2(s32) = G_SEXT %1 ; G_SEXT with s8 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_SEXT {{%[0-9]+}} @@ -78,7 +78,7 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s32) = G_LOAD %0(p0) :: (load 4) + %1(s32) = G_LOAD %0(p0) :: (load (s32)) %2(s32) = G_SEXT_INREG %1, 8 ; G_SEXT_INREG should be lowered to a shift pair ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 @@ -104,7 +104,7 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s1) = G_LOAD %0(p0) :: (load 1) + %1(s1) = G_LOAD %0(p0) :: (load (s1)) %2(s32) = G_ANYEXT %1 ; G_ANYEXT with s1 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_ANYEXT {{%[0-9]+}} @@ -128,11 +128,11 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0(p0) :: (load 1) + %1(s8) = G_LOAD %0(p0) :: (load (s8)) %2(s16) = G_ZEXT %1 ; G_ZEXT from s8 to s16 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s16) = G_ZEXT {{%[0-9]+}}(s8) - G_STORE %2(s16), %0(p0) :: (store 2) + G_STORE %2(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... --- @@ -152,11 +152,11 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s1) = G_LOAD %0(p0) :: (load 1) + %1(s1) = G_LOAD %0(p0) :: (load (s1)) %2(s16) = G_SEXT %1(s1) ; G_SEXT from s1 to s16 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s16) = G_SEXT {{%[0-9]+}}(s1) - G_STORE %2(s16), %0(p0) :: (store 2) + G_STORE %2(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... --- @@ -176,11 +176,11 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s1) = G_LOAD %0(p0) :: (load 1) + %1(s1) = G_LOAD %0(p0) :: (load (s1)) %2(s8) = G_ANYEXT %1 ; G_ANYEXT from s1 to s8 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s8) = G_ANYEXT {{%[0-9]+}}(s1) - G_STORE %2(s8), %0(p0) :: (store 1) + G_STORE %2(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... --- @@ -203,7 +203,7 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0(p0) :: (load 1) + %1(s8) = G_LOAD %0(p0) :: (load (s8)) ; CHECK: [[V8:%[0-9]+]]:_(s8) = G_LOAD %2(s16) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir index 9a63d156daa80..f2b6277a80234 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir @@ -45,16 +45,16 @@ body: | ; CHECK-DAG: {{%[0-9]+}}:_(s1) = G_LOAD %0(p0) ; CHECK-DAG: {{%[0-9]+}}:_(p0) = G_LOAD %0(p0) %0(p0) = COPY $r0 - %2(s32) = G_LOAD %0(p0) :: (load 4) - G_STORE %2(s32), %0(p0) :: (store 4) - %3(s16) = G_LOAD %0(p0) :: (load 2) - G_STORE %3(s16), %0(p0) :: (store 2) - %4(s8) = G_LOAD %0(p0) :: (load 1) - G_STORE %4(s8), %0(p0) :: (store 1) - %5(s1) = G_LOAD %0(p0) :: (load 1) - G_STORE %5(s1), %0(p0) :: (store 1) - %6(p0) = G_LOAD %0(p0) :: (load 4) - G_STORE %6(p0), %0(p0) :: (store 4) + %2(s32) = G_LOAD %0(p0) :: (load (s32)) + G_STORE %2(s32), %0(p0) :: (store (s32)) + %3(s16) = G_LOAD %0(p0) :: (load (s16)) + G_STORE %3(s16), %0(p0) :: (store (s16)) + %4(s8) = G_LOAD %0(p0) :: (load (s8)) + G_STORE %4(s8), %0(p0) :: (store (s8)) + %5(s1) = G_LOAD %0(p0) :: (load (s1)) + G_STORE %5(s1), %0(p0) :: (store (s1)) + %6(p0) = G_LOAD %0(p0) :: (load (p0)) + G_STORE %6(p0), %0(p0) :: (store (p0)) BX_RET 14, $noreg ... --- @@ -81,9 +81,9 @@ body: | ; This is legal, so we should find it unchanged in the output ; CHECK: [[FIVREG:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[FRAME_INDEX]] - ; CHECK: {{%[0-9]+}}:_(s32) = G_LOAD [[FIVREG]](p0) :: (load 4) + ; CHECK: {{%[0-9]+}}:_(s32) = G_LOAD [[FIVREG]](p0) :: (load (s32)) %0(p0) = G_FRAME_INDEX %fixed-stack.2 - %1(s32) = G_LOAD %0(p0) :: (load 4) + %1(s32) = G_LOAD %0(p0) :: (load (s32)) $r0 = COPY %1(s32) BX_RET 14, $noreg, implicit $r0 ... @@ -106,22 +106,22 @@ body: | ; Can't use the VFP support for unaligned operations, we need to use 32-bits ; operations instead. ; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY $r0 - ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4, align 1) + ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load (s32), align 1) ; CHECK-NEXT: [[OFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR1]], [[OFF]] ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[ADDR2]] - ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from unknown-address + 4, align 1) - ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4, align 1) - ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4 into unknown-address + 4, align 1) + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from unknown-address + 4, align 1) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store (s32), align 1) + ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store (s32) into unknown-address + 4, align 1) %0(p0) = COPY $r0 - %1(s64) = G_LOAD %0(p0) :: (load 8, align 1) - G_STORE %1(s64), %0(p0) :: (store 8, align 1) + %1(s64) = G_LOAD %0(p0) :: (load (s64), align 1) + G_STORE %1(s64), %0(p0) :: (store (s64), align 1) ; For word-aligned we can use VFP operations. - ; CHECK: [[V:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 8, align 4) - ; CHECK: G_STORE [[V]](s64), %0(p0) :: (store 8, align 4) - %2(s64) = G_LOAD %0(p0) :: (load 8, align 4) - G_STORE %2(s64), %0(p0) :: (store 8, align 4) + ; CHECK: [[V:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load (s64), align 4) + ; CHECK: G_STORE [[V]](s64), %0(p0) :: (store (s64), align 4) + %2(s64) = G_LOAD %0(p0) :: (load (s64), align 4) + G_STORE %2(s64), %0(p0) :: (store (s64), align 4) BX_RET 14, $noreg ... @@ -143,25 +143,25 @@ body: | ; When we don't have VFP support, we need to use 32-bit operations. ; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY $r0 - ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4, align 1) + ; CHECK-NEXT: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load (s32), align 1) ; CHECK-NEXT: [[OFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[ADDR2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR1]], [[OFF]] ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[ADDR2]] - ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from unknown-address + 4, align 1) - ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4, align 1) + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from unknown-address + 4, align 1) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store (s32), align 1) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[ADDR2]] - ; CHECK-NEXT: G_STORE [[V2]](s32), [[COPY2]](p0) :: (store 4 into unknown-address + 4, align 1) + ; CHECK-NEXT: G_STORE [[V2]](s32), [[COPY2]](p0) :: (store (s32) into unknown-address + 4, align 1) %0(p0) = COPY $r0 - %1(s64) = G_LOAD %0(p0) :: (load 8, align 1) - G_STORE %1(s64), %0(p0) :: (store 8, align 1) + %1(s64) = G_LOAD %0(p0) :: (load (s64), align 1) + G_STORE %1(s64), %0(p0) :: (store (s64), align 1) - ; CHECK: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load 4) + ; CHECK: [[V1:%[0-9]+]]:_(s32) = G_LOAD [[ADDR1]](p0) :: (load (s32)) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[ADDR2]] - ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY3]](p0) :: (load 4 from unknown-address + 4) - ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store 4) - ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store 4 into unknown-address + 4) - %2(s64) = G_LOAD %0(p0) :: (load 8, align 4) - G_STORE %2(s64), %0(p0) :: (store 8, align 4) + ; CHECK-NEXT: [[V2:%[0-9]+]]:_(s32) = G_LOAD [[COPY3]](p0) :: (load (s32) from unknown-address + 4) + ; CHECK-NEXT: G_STORE [[V1]](s32), [[ADDR1]](p0) :: (store (s32)) + ; CHECK-NEXT: G_STORE [[V2]](s32), [[ADDR2]](p0) :: (store (s32) into unknown-address + 4) + %2(s64) = G_LOAD %0(p0) :: (load (s64), align 4) + G_STORE %2(s64), %0(p0) :: (store (s64), align 4) BX_RET 14, $noreg ... @@ -207,7 +207,7 @@ body: | liveins: $r0 %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0(p0) :: (load 2) + %1(s16) = G_LOAD %0(p0) :: (load (s16)) ; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16) ; CHECK: {{%[0-9]+}}:_(p0) = G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s32) diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir index 73369ac49d6a1..1613f0ff49a61 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir @@ -98,11 +98,11 @@ body: | ; FIXME: Should avoid multiple copies from $sp ; FIXME: This ought to be align 8 but ARM's call lowering hardcodes it to 1 - ; SOFT-ABI: G_STORE [[Y0]](s32), [[FI1]](p0){{.*}}store 4 into stack, align 1) + ; SOFT-ABI: G_STORE [[Y0]](s32), [[FI1]](p0){{.*}}store (s32) into stack, align 1) ; SOFT-ABI: [[SP2:%[0-9]+]]:_(p0) = COPY $sp ; SOFT-ABI: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; SOFT-ABI: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32) - ; SOFT-ABI: G_STORE [[Y1]](s32), [[FI2]](p0){{.*}}store 4 into stack + 4, align 1) + ; SOFT-ABI: G_STORE [[Y1]](s32), [[FI2]](p0){{.*}}store (s32) into stack + 4, align 1) ; SOFT-ABI: BL &fma, {{.*}}, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 ; SOFT-ABI-DAG: [[R0:%[0-9]+]]:_(s32) = COPY $r0 ; SOFT-ABI-DAG: [[R1:%[0-9]+]]:_(s32) = COPY $r1 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index f4408adce960b..b1cab3ab144bc 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -27,8 +27,8 @@ body: | %1(s64) = G_CONSTANT i64 17179869200 ; = 4 * 2 ^ 32 + 16 %2(s32), %3(s32) = G_UNMERGE_VALUES %1(s64) - G_STORE %2(s32), %0(p0) :: (store 4) - G_STORE %3(s32), %0(p0) :: (store 4) + G_STORE %2(s32), %0(p0) :: (store (s32)) + G_STORE %3(s32), %0(p0) :: (store (s32)) ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_CONSTANT i32 4 ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_CONSTANT i32 16 ; CHECK-NOT: G_CONSTANT i64 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll index 5114ce03b77ad..e30f4315d9653 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll @@ -37,11 +37,11 @@ define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) { ; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32) -; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store (s32) ; CHECK: [[SP2:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32) -; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store 4 +; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store (s32) ; ARM: BL @simple_stack_params_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0 ; THUMB: tBL 14 /* CC::al */, $noreg, @simple_stack_params_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0 ; CHECK: [[RVREG:%[0-9]+]]:_(p0) = COPY $r0 @@ -77,27 +77,27 @@ define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) { ; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32) -; CHECK: G_STORE [[SEXTA2]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: G_STORE [[SEXTA2]](s32), [[FI1]](p0){{.*}}store (s32) ; CHECK: [[ZEXTA2:%[0-9]+]]:_(s32) = G_ZEXT [[AVREG]] ; CHECK: [[SP2:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32) -; CHECK: G_STORE [[ZEXTA2]](s32), [[FI2]](p0){{.*}}store 4 +; CHECK: G_STORE [[ZEXTA2]](s32), [[FI2]](p0){{.*}}store (s32) ; CHECK: [[SEXTB2:%[0-9]+]]:_(s32) = G_SEXT [[BVREG]] ; CHECK: [[SP3:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[FI3:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP3]], [[OFF3]](s32) -; CHECK: G_STORE [[SEXTB2]](s32), [[FI3]](p0){{.*}}store 4 +; CHECK: G_STORE [[SEXTB2]](s32), [[FI3]](p0){{.*}}store (s32) ; CHECK: [[ZEXTB2:%[0-9]+]]:_(s32) = G_ZEXT [[BVREG]] ; CHECK: [[SP4:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[FI4:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP4]], [[OFF4]](s32) -; CHECK: G_STORE [[ZEXTB2]](s32), [[FI4]](p0){{.*}}store 4 +; CHECK: G_STORE [[ZEXTB2]](s32), [[FI4]](p0){{.*}}store (s32) ; CHECK: [[ZEXTC:%[0-9]+]]:_(s32) = G_ZEXT [[CVREG]] ; CHECK: [[SP5:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[FI5:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP5]], [[OFF5]](s32) -; CHECK: G_STORE [[ZEXTC]](s32), [[FI5]](p0){{.*}}store 4 +; CHECK: G_STORE [[ZEXTC]](s32), [[FI5]](p0){{.*}}store (s32) ; ARM: BL @ext_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0 ; THUMB: tBL 14 /* CC::al */, $noreg, @ext_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0 ; CHECK: [[R0VREG:%[0-9]+]]:_(s32) = COPY $r0 @@ -153,11 +153,11 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { ; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32) -; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store (s32) ; CHECK: [[SP2:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32) -; CHECK: G_STORE [[AVREG]](s64), [[FI2]](p0){{.*}}store 8 +; CHECK: G_STORE [[AVREG]](s64), [[FI2]](p0){{.*}}store (s64) ; ARM: BL @aapcscc_fp_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 ; THUMB: tBL 14 /* CC::al */, $noreg, @aapcscc_fp_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 ; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY $r0 @@ -264,9 +264,9 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { ; CHECK-DAG: [[R2:%[0-9]+]]:_(s32) = COPY $r2 ; CHECK-DAG: [[R3:%[0-9]+]]:_(s32) = COPY $r3 ; CHECK: [[FIRST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[FIRST_STACK_ID]] -; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load (s32) from %fixed-stack.[[FIRST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] -; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load (s32) from %fixed-stack.[[LAST_STACK_ID]] ; CHECK: ADJCALLSTACKDOWN 64, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp ; CHECK: $r0 = COPY [[R0]] ; CHECK: $r1 = COPY [[R1]] @@ -275,13 +275,13 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF_FIRST_ELEMENT]](s32) -; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store 4 +; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store (s32) ; Match the second-to-last offset, so we can get the correct SP for the last element ; CHECK: G_CONSTANT i32 56 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 ; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF_LAST_ELEMENT]](s32) -; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 +; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store (s32) ; ARM: BL @large_int_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3 ; THUMB: tBL 14 /* CC::al */, $noreg, @large_int_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3 ; CHECK: ADJCALLSTACKUP 64, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp @@ -308,7 +308,7 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { ; LITTLE: [[ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARR1_0]](s32), [[ARR1_1]](s32) ; BIG: [[ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32) ; CHECK: [[ARR2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]] -; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]] +; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load (s64) from %fixed-stack.[[ARR2_ID]] ; CHECK: ADJCALLSTACKDOWN 8, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[ARR0_0:%[0-9]+]]:_(s32), [[ARR0_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR0]](s64) ; LITTLE: $r0 = COPY [[ARR0_0]](s32) @@ -323,7 +323,7 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[ARR2_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[ARR2_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[ARR2_OFFSET]](s32) -; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store 8 +; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store (s64) ; ARM: BL @fp_arrays_aapcs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 ; THUMB: tBL 14 /* CC::al */, $noreg, @fp_arrays_aapcs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY $r0 @@ -355,13 +355,13 @@ define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 ; CHECK: [[Y1:%[0-9]+]]:_(s32) = COPY $s7 ; CHECK: [[Y2:%[0-9]+]]:_(s32) = COPY $s8 ; CHECK: [[Z0_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z0_ID]] -; CHECK: [[Z0:%[0-9]+]]:_(s64) = G_LOAD [[Z0_FI]]{{.*}}load 8 +; CHECK: [[Z0:%[0-9]+]]:_(s64) = G_LOAD [[Z0_FI]]{{.*}}load (s64) ; CHECK: [[Z1_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z1_ID]] -; CHECK: [[Z1:%[0-9]+]]:_(s64) = G_LOAD [[Z1_FI]]{{.*}}load 8 +; CHECK: [[Z1:%[0-9]+]]:_(s64) = G_LOAD [[Z1_FI]]{{.*}}load (s64) ; CHECK: [[Z2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z2_ID]] -; CHECK: [[Z2:%[0-9]+]]:_(s64) = G_LOAD [[Z2_FI]]{{.*}}load 8 +; CHECK: [[Z2:%[0-9]+]]:_(s64) = G_LOAD [[Z2_FI]]{{.*}}load (s64) ; CHECK: [[Z3_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z3_ID]] -; CHECK: [[Z3:%[0-9]+]]:_(s64) = G_LOAD [[Z3_FI]]{{.*}}load 8 +; CHECK: [[Z3:%[0-9]+]]:_(s64) = G_LOAD [[Z3_FI]]{{.*}}load (s64) ; CHECK: ADJCALLSTACKDOWN 32, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp ; CHECK: $d0 = COPY [[X0]](s64) ; CHECK: $d1 = COPY [[X1]](s64) @@ -372,19 +372,19 @@ define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[Z0_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[Z0_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[Z0_OFFSET]](s32) -; CHECK: G_STORE [[Z0]](s64), [[Z0_ADDR]](p0){{.*}}store 8 +; CHECK: G_STORE [[Z0]](s64), [[Z0_ADDR]](p0){{.*}}store (s64) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[Z1_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[Z1_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[Z1_OFFSET]](s32) -; CHECK: G_STORE [[Z1]](s64), [[Z1_ADDR]](p0){{.*}}store 8 +; CHECK: G_STORE [[Z1]](s64), [[Z1_ADDR]](p0){{.*}}store (s64) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[Z2_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[Z2_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[Z2_OFFSET]](s32) -; CHECK: G_STORE [[Z2]](s64), [[Z2_ADDR]](p0){{.*}}store 8 +; CHECK: G_STORE [[Z2]](s64), [[Z2_ADDR]](p0){{.*}}store (s64) ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[Z3_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK: [[Z3_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[Z3_OFFSET]](s32) -; CHECK: G_STORE [[Z3]](s64), [[Z3_ADDR]](p0){{.*}}store 8 +; CHECK: G_STORE [[Z3]](s64), [[Z3_ADDR]](p0){{.*}}store (s64) ; ARM: BL @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit $d1, implicit $d2, implicit $s6, implicit $s7, implicit $s8, implicit-def $s0, implicit-def $s1, implicit-def $s2, implicit-def $s3 ; THUMB: tBL 14 /* CC::al */, $noreg, @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit $d1, implicit $d2, implicit $s6, implicit $s7, implicit $s8, implicit-def $s0, implicit-def $s1, implicit-def $s2, implicit-def $s3 ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY $s0 @@ -418,9 +418,9 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { ; CHECK-DAG: [[R2:%[0-9]+]]:_(s32) = COPY $r2 ; CHECK-DAG: [[R3:%[0-9]+]]:_(s32) = COPY $r3 ; CHECK: [[FIRST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[FIRST_STACK_ID]] -; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load (s32) from %fixed-stack.[[FIRST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] -; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load (s32) from %fixed-stack.[[LAST_STACK_ID]] ; CHECK: ADJCALLSTACKDOWN 80, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp ; CHECK: $r0 = COPY [[R0]] ; CHECK: $r1 = COPY [[R1]] @@ -429,13 +429,13 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF_FIRST_ELEMENT]](s32) -; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store 4 +; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store (s32) ; Match the second-to-last offset, so we can get the correct SP for the last element ; CHECK: G_CONSTANT i32 72 ; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 ; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF_LAST_ELEMENT]](s32) -; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 +; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store (s32) ; ARM: BL @tough_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 ; THUMB: tBL 14 /* CC::al */, $noreg, @tough_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1 ; CHECK: [[R0:%[0-9]+]]:_(p0) = COPY $r0 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index 2b695c204a9ea..6c97952862c53 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -415,12 +415,12 @@ body: | bb.0: liveins: $r0 %0(p0) = COPY $r0 - %6(s64) = G_LOAD %0 :: (load 8) - %1(s32) = G_LOAD %0 :: (load 4) - %2(s16) = G_LOAD %0 :: (load 2) - %3(s8) = G_LOAD %0 :: (load 1) - %4(s1) = G_LOAD %0 :: (load 1) - %5(p0) = G_LOAD %0 :: (load 4) + %6(s64) = G_LOAD %0 :: (load (s64)) + %1(s32) = G_LOAD %0 :: (load (s32)) + %2(s16) = G_LOAD %0 :: (load (s16)) + %3(s8) = G_LOAD %0 :: (load (s8)) + %4(s1) = G_LOAD %0 :: (load (s1)) + %5(p0) = G_LOAD %0 :: (load (p0)) BX_RET 14, $noreg, implicit $r0 ... @@ -452,17 +452,17 @@ body: | liveins: $r0, $r1, $r5, $d6 %0(p0) = COPY $r0 %1(s32) = COPY $r1 - G_STORE %1(s32), %0 :: (store 4) + G_STORE %1(s32), %0 :: (store (s32)) %2(s16) = G_TRUNC %1(s32) - G_STORE %2(s16), %0 :: (store 2) + G_STORE %2(s16), %0 :: (store (s16)) %3(s8) = G_TRUNC %1(s32) - G_STORE %3(s8), %0 :: (store 1) + G_STORE %3(s8), %0 :: (store (s8)) %4(s1) = G_TRUNC %1(s32) - G_STORE %4(s1), %0 :: (store 1) + G_STORE %4(s1), %0 :: (store (s1)) %5(p0) = COPY $r5 - G_STORE %5(p0), %0 :: (store 4) + G_STORE %5(p0), %0 :: (store (p0)) %6(s64) = COPY $d6 - G_STORE %6(s64), %0 :: (store 8) + G_STORE %6(s64), %0 :: (store (s64)) BX_RET 14, $noreg, implicit $r0 ... @@ -489,12 +489,12 @@ fixedStack: body: | bb.0: %0(p0) = G_FRAME_INDEX %fixed-stack.0 - %1(s32) = G_LOAD %0(p0) :: (load 4 from %fixed-stack.0, align 4) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from %fixed-stack.0, align 4) %2(p0) = COPY $sp %3(s32) = G_CONSTANT i32 8 %4(p0) = G_PTR_ADD %2, %3(s32) - G_STORE %1(s32), %4(p0) :: (store 4) + G_STORE %1(s32), %4(p0) :: (store (s32)) BX_RET 14, $noreg @@ -682,7 +682,7 @@ body: | %0(s32) = COPY $r0 %2(p0) = COPY $r1 %1(s16) = G_TRUNC %0(s32) - G_STORE %1(s16), %2 :: (store 2) + G_STORE %1(s16), %2 :: (store (s16)) BX_RET 14, $noreg ... --- @@ -706,7 +706,7 @@ body: | %0(s64) = COPY $d0 %2(p0) = COPY $r0 %1(s32) = G_TRUNC %0(s64) - G_STORE %1(s32), %2 :: (store 4) + G_STORE %1(s32), %2 :: (store (s32)) BX_RET 14, $noreg ... --- diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir index 5d2a2297a6298..6ff883cd4a9f1 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir @@ -32,8 +32,8 @@ body: | ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel target-flags(arm-nonlazy) @internal_global ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel @internal_global - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -54,12 +54,12 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global - ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr target-flags(arm-nonlazy) @external_global :: (load 4 from got) - ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-nonlazy) @external_global :: (load 4 from got) - ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-got) @external_global :: (load 4 from got) + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr target-flags(arm-nonlazy) @external_global :: (load (s32) from got) + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-nonlazy) @external_global :: (load (s32) from got) + ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-got) @external_global :: (load (s32) from got) - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -84,8 +84,8 @@ body: | ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel target-flags(arm-nonlazy) @internal_constant ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel @internal_constant - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_constant) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_constant) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -106,12 +106,12 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_constant - ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr target-flags(arm-nonlazy) @external_constant :: (load 4 from got) - ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-nonlazy) @external_constant :: (load 4 from got) - ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-got) @external_constant :: (load 4 from got) + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr target-flags(arm-nonlazy) @external_constant :: (load (s32) from got) + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-nonlazy) @external_constant :: (load (s32) from got) + ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-got) @external_constant :: (load (s32) from got) - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_constant) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_constant) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir index 0221a97c25327..ef3b53f7c9609 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir @@ -37,13 +37,13 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global ; RW-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global - ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; RWPI-MOVT: [[OFF:%[0-9]+]]:gpr = MOVi32imm target-flags(arm-sbrel) @internal_global - ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; RWPI: [[G:%[0-9]+]]:gpr = ADDrr $r9, [[OFF]], 14 /* CC::al */, $noreg, $noreg - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -71,13 +71,13 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global ; RW-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global - ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; RWPI-MOVT: [[OFF:%[0-9]+]]:gpr = MOVi32imm target-flags(arm-sbrel) @external_global - ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; RWPI: [[G:%[0-9]+]]:gpr = ADDrr $r9, [[OFF]], 14 /* CC::al */, $noreg, $noreg - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -104,10 +104,10 @@ body: | ; ROPI-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel @internal_constant ; ROPI-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel @internal_constant ; RO-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_constant - ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_constant) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_constant) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -134,10 +134,10 @@ body: | ; ROPI-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel @external_constant ; ROPI-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel @external_constant ; RO-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_constant - ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_constant) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_constant) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir index c950c12689cc8..f794aa5beeb18 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir @@ -26,11 +26,11 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global ; ELF-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global - ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_abs @internal_global - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_global) ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg $r0 = COPY %1(s32) @@ -56,11 +56,11 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global ; ELF-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global - ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_abs @external_global - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_global) ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14 /* CC::al */, $noreg $r0 = COPY %1(s32) diff --git a/llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll b/llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll index 62d70fe36a2a8..e0ea6d78dbaf0 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll +++ b/llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll @@ -15,11 +15,11 @@ define arm_aapcscc i32 @test_call_to_varargs_with_ints(i32 *%a, i32 %b) { ; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32) -; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store (s32) ; CHECK: [[SP2:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32) -; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store 4 +; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store (s32) ; ARM: BL @int_varargs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0 ; THUMB: tBL 14 /* CC::al */, $noreg, @int_varargs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0 ; CHECK: [[RVREG:%[0-9]+]]:_(s32) = COPY $r0 @@ -46,7 +46,7 @@ define arm_aapcs_vfpcc float @test_call_to_varargs_with_floats(float %a, double ; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32) -; CHECK: G_STORE [[BVREG]](s64), [[FI1]](p0){{.*}}store 8 +; CHECK: G_STORE [[BVREG]](s64), [[FI1]](p0){{.*}}store (s64) ; ARM: BL @float_varargs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0 ; THUMB: tBL 14 /* CC::al */, $noreg, @float_varargs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0 ; CHECK: [[RVREG:%[0-9]+]]:_(s32) = COPY $r0 @@ -93,7 +93,7 @@ define arm_aapcs_vfpcc float @test_indirect_call_to_varargs(float (float, double ; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp ; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32) -; CHECK: G_STORE [[BVREG]](s64), [[FI1]](p0){{.*}}store 8 +; CHECK: G_STORE [[BVREG]](s64), [[FI1]](p0){{.*}}store (s64) ; ARM: BLX [[FPTRVREG]](p0), csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0 ; THUMB: tBLXr 14 /* CC::al */, $noreg, [[FPTRVREG]](p0), csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0 ; CHECK: [[RVREG:%[0-9]+]]:_(s32) = COPY $r0 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-fp-const.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-fp-const.mir index 0421f007d10a8..cebf3f44992c9 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/select-fp-const.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/select-fp-const.mir @@ -32,9 +32,9 @@ body: | ; CHECK: [[PTR:%[0-9]+]]:gpr = COPY $r0 %1(s32) = G_FCONSTANT float 0.0 - ; CHECK: [[VREG:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VREG:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) - G_STORE %1(s32), %0 :: (store 4) + G_STORE %1(s32), %0 :: (store (s32)) ; CHECK: VSTRS [[VREG]], [[PTR]], 0, 14 /* CC::al */, $noreg BX_RET 14, $noreg @@ -63,9 +63,9 @@ body: | ; CHECK: [[PTR:%[0-9]+]]:gpr = COPY $r0 %1(s64) = G_FCONSTANT double 0.0 - ; CHECK: [[VREG:%[0-9]+]]:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load 8 from constant-pool) + ; CHECK: [[VREG:%[0-9]+]]:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool) - G_STORE %1(s64), %0 :: (store 8) + G_STORE %1(s64), %0 :: (store (s64)) ; CHECK: VSTRD [[VREG]], [[PTR]], 0, 14 /* CC::al */, $noreg BX_RET 14, $noreg @@ -96,9 +96,9 @@ body: | %1(s32) = G_FCONSTANT float -2.0 ; VFP3: [[VREG:%[0-9]+]]:spr = FCONSTS 128, 14 /* CC::al */, $noreg - ; VFP2: [[VREG:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; VFP2: [[VREG:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) - G_STORE %1(s32), %0 :: (store 4) + G_STORE %1(s32), %0 :: (store (s32)) ; CHECK: VSTRS [[VREG]], [[PTR]], 0, 14 /* CC::al */, $noreg BX_RET 14, $noreg @@ -129,9 +129,9 @@ body: | %1(s64) = G_FCONSTANT double 5.0e-1 ; VFP3: [[VREG:%[0-9]+]]:dpr = FCONSTD 96, 14 /* CC::al */, $noreg - ; VFP2: [[VREG:%[0-9]+]]:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load 8 from constant-pool) + ; VFP2: [[VREG:%[0-9]+]]:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool) - G_STORE %1(s64), %0 :: (store 8) + G_STORE %1(s64), %0 :: (store (s64)) ; CHECK: VSTRD [[VREG]], [[PTR]], 0, 14 /* CC::al */, $noreg BX_RET 14, $noreg diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir index d0fbeb482d579..ec834f1233ace 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir @@ -68,7 +68,7 @@ body: | %1(s32) = G_TRUNC %0(s64) ; CHECK: [[VREGTRUNC:%[0-9]+]]:gpr, [[UNINTERESTING:%[0-9]+]]:gpr = VMOVRRD [[VREG]] - G_STORE %1(s32), %2 :: (store 4) + G_STORE %1(s32), %2 :: (store (s32)) ; CHECK: STRi12 [[VREGTRUNC]], [[PTR]], 0, 14 /* CC::al */, $noreg BX_RET 14, $noreg @@ -720,7 +720,7 @@ body: | %0(p0) = COPY $r0 ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0 - %1(s32) = G_LOAD %0(p0) :: (load 4) + %1(s32) = G_LOAD %0(p0) :: (load (s32)) ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14 /* CC::al */, $noreg $s0 = COPY %1 @@ -746,7 +746,7 @@ body: | %0(p0) = COPY $r0 ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0 - %1(s64) = G_LOAD %0(p0) :: (load 8) + %1(s64) = G_LOAD %0(p0) :: (load (s64)) ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14 /* CC::al */, $noreg $d0 = COPY %1 @@ -777,10 +777,10 @@ body: | %1(s32) = COPY $s0 %2(s64) = COPY $d2 - G_STORE %1(s32), %0(p0) :: (store 4) + G_STORE %1(s32), %0(p0) :: (store (s32)) ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14 /* CC::al */, $noreg - G_STORE %2(s64), %0(p0) :: (store 8) + G_STORE %2(s64), %0(p0) :: (store (s64)) ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14 /* CC::al */, $noreg BX_RET 14, $noreg diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-exts.mir b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-exts.mir index 37a5f60c86897..f626e56b9c68b 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-exts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-exts.mir @@ -320,7 +320,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] ; CHECK: [[t2ANDri:%[0-9]+]]:rgpr = t2ANDri [[COPY2]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRHi12 [[t2ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: t2STRHi12 [[t2ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -330,7 +330,7 @@ body: | %3(s16) = G_ZEXT %2(s1) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -356,7 +356,7 @@ body: | ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] ; CHECK: [[t2ANDri:%[0-9]+]]:rgpr = t2ANDri [[COPY2]], 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: [[t2RSBri:%[0-9]+]]:rgpr = t2RSBri [[t2ANDri]], 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRHi12 [[t2RSBri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: t2STRHi12 [[t2RSBri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -366,7 +366,7 @@ body: | %3(s16) = G_SEXT %2(s1) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -390,7 +390,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] - ; CHECK: t2STRHi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: t2STRHi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -400,7 +400,7 @@ body: | %3(s16) = G_ANYEXT %2(s1) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -425,7 +425,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] ; CHECK: [[t2UXTB:%[0-9]+]]:rgpr = t2UXTB [[COPY2]], 0, 14 /* CC::al */, $noreg - ; CHECK: t2STRHi12 [[t2UXTB]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: t2STRHi12 [[t2UXTB]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -435,7 +435,7 @@ body: | %3(s16) = G_ZEXT %2(s8) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -460,7 +460,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] ; CHECK: [[t2SXTB:%[0-9]+]]:rgpr = t2SXTB [[COPY2]], 0, 14 /* CC::al */, $noreg - ; CHECK: t2STRHi12 [[t2SXTB]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: t2STRHi12 [[t2SXTB]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -470,7 +470,7 @@ body: | %3(s16) = G_SEXT %2(s8) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -494,7 +494,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] - ; CHECK: t2STRHi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: t2STRHi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -504,7 +504,7 @@ body: | %3(s16) = G_ANYEXT %2(s8) - G_STORE %3(s16), %0(p0) :: (store 2) + G_STORE %3(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -529,7 +529,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] ; CHECK: [[t2ANDri:%[0-9]+]]:rgpr = t2ANDri [[COPY2]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRBi12 [[t2ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) + ; CHECK: t2STRBi12 [[t2ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -539,7 +539,7 @@ body: | %3(s8) = G_ZEXT %2(s1) - G_STORE %3(s8), %0(p0) :: (store 1) + G_STORE %3(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... @@ -565,7 +565,7 @@ body: | ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] ; CHECK: [[t2ANDri:%[0-9]+]]:rgpr = t2ANDri [[COPY2]], 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: [[t2RSBri:%[0-9]+]]:rgpr = t2RSBri [[t2ANDri]], 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRBi12 [[t2RSBri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) + ; CHECK: t2STRBi12 [[t2RSBri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -575,7 +575,7 @@ body: | %3(s8) = G_SEXT %2(s1) - G_STORE %3(s8), %0(p0) :: (store 1) + G_STORE %3(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... @@ -599,7 +599,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY [[COPY1]] - ; CHECK: t2STRBi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) + ; CHECK: t2STRBi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 @@ -609,7 +609,7 @@ body: | %3(s8) = G_ANYEXT %2(s1) - G_STORE %3(s8), %0(p0) :: (store 1) + G_STORE %3(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-pic.mir b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-pic.mir index 9b35bfa29eded..dd717a8f5950c 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-pic.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-pic.mir @@ -32,8 +32,8 @@ body: | ; DARWIN-NOMOVT: [[G:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel target-flags(arm-nonlazy) @internal_global ; ELF: [[G:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel @internal_global - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -55,14 +55,14 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global ; DARWIN-MOVT: [[G_GOT:%[0-9]+]]:rgpr = t2MOV_ga_pcrel target-flags(arm-nonlazy) @external_global - ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load 4 from got) + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load (s32) from got) ; DARWIN-NOMOVT: [[G_GOT:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel target-flags(arm-nonlazy) @external_global - ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load 4 from got) + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load (s32) from got) ; ELF: [[G_GOT:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel target-flags(arm-got) @external_global - ; ELF: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load 4 from got) + ; ELF: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load (s32) from got) - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -87,8 +87,8 @@ body: | ; DARWIN-NOMOVT: [[G:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel target-flags(arm-nonlazy) @internal_constant ; ELF: [[G:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel @internal_constant - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_constant) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_constant) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -110,14 +110,14 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_constant ; DARWIN-MOVT: [[G_GOT:%[0-9]+]]:rgpr = t2MOV_ga_pcrel target-flags(arm-nonlazy) @external_constant - ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load 4 from got) + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load (s32) from got) ; DARWIN-NOMOVT: [[G_GOT:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel target-flags(arm-nonlazy) @external_constant - ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load 4 from got) + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load (s32) from got) ; ELF: [[G_GOT:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel target-flags(arm-got) @external_constant - ; ELF: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load 4 from got) + ; ELF: [[G:%[0-9]+]]:gpr = t2LDRi12 [[G_GOT]], 0, 14 /* CC::al */, $noreg :: (load (s32) from got) - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_constant) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_constant) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-ropi-rwpi.mir b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-ropi-rwpi.mir index ad13a282ec2d0..3277a3ddbfe3a 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-ropi-rwpi.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-ropi-rwpi.mir @@ -37,13 +37,13 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global ; RW-DEFAULT-MOVT: [[G:%[0-9]+]]:rgpr = t2MOVi32imm @internal_global - ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; RWPI-MOVT: [[OFF:%[0-9]+]]:rgpr = t2MOVi32imm target-flags(arm-sbrel) @internal_global - ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:rgpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:rgpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; RWPI: [[G:%[0-9]+]]:gprnopc = t2ADDrr $r9, [[OFF]], 14 /* CC::al */, $noreg, $noreg - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -71,13 +71,13 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global ; RW-DEFAULT-MOVT: [[G:%[0-9]+]]:rgpr = t2MOVi32imm @external_global - ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; RWPI-MOVT: [[OFF:%[0-9]+]]:rgpr = t2MOVi32imm target-flags(arm-sbrel) @external_global - ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:rgpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:rgpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; RWPI: [[G:%[0-9]+]]:gprnopc = t2ADDrr $r9, [[OFF]], 14 /* CC::al */, $noreg, $noreg - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -104,10 +104,10 @@ body: | ; ROPI-MOVT: [[G:%[0-9]+]]:rgpr = t2MOV_ga_pcrel @internal_constant ; ROPI-NOMOVT: [[G:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel @internal_constant ; RO-DEFAULT-MOVT: [[G:%[0-9]+]]:rgpr = t2MOVi32imm @internal_constant - ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_constant) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_constant) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -134,10 +134,10 @@ body: | ; ROPI-MOVT: [[G:%[0-9]+]]:rgpr = t2MOV_ga_pcrel @external_constant ; ROPI-NOMOVT: [[G:%[0-9]+]]:tgpr = tLDRLIT_ga_pcrel @external_constant ; RO-DEFAULT-MOVT: [[G:%[0-9]+]]:rgpr = t2MOVi32imm @external_constant - ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_constant) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_constant) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-static.mir b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-static.mir index 22ddb0a5bf37d..d2009051ce089 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-static.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-globals-static.mir @@ -26,12 +26,12 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global ; ELF-MOVT: [[G:%[0-9]+]]:rgpr = t2MOVi32imm @internal_global - ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; DARWIN-MOVT: [[G:%[0-9]+]]:rgpr = t2MOVi32imm @internal_global ; DARWIN-NOMOVT: [[G:%[0-9]+]]:tgpr = tLDRLIT_ga_abs @internal_global - %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @internal_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @internal_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] @@ -56,12 +56,12 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global ; ELF-MOVT: [[G:%[0-9]+]]:rgpr = t2MOVi32imm @external_global - ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = t2LDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; DARWIN-MOVT: [[G:%[0-9]+]]:rgpr = t2MOVi32imm @external_global ; DARWIN-NOMOVT: [[G:%[0-9]+]]:tgpr = tLDRLIT_ga_abs @external_global - %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load 4 from @external_global) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = t2LDRi12 [[G]], 0, 14 /* CC::al */, $noreg :: (load (s32) from @external_global) $r0 = COPY %1(s32) ; CHECK: $r0 = COPY [[V]] diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir index aebd8c50aea33..4f92f411de30f 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir @@ -24,15 +24,15 @@ body: | ; CHECK-LABEL: name: test_s1 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 1) + ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s1)) ; CHECK: [[t2ANDri:%[0-9]+]]:rgpr = t2ANDri [[t2LDRBi12_]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRBi12 [[t2ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) + ; CHECK: t2STRBi12 [[t2ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s1)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 - %1(s1) = G_LOAD %0(p0) :: (load 1) + %1(s1) = G_LOAD %0(p0) :: (load (s1)) - G_STORE %1(s1), %0(p0) :: (store 1) + G_STORE %1(s1), %0(p0) :: (store (s1)) BX_RET 14, $noreg ... @@ -50,14 +50,14 @@ body: | ; CHECK-LABEL: name: test_s8 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 1) - ; CHECK: t2STRBi12 [[t2LDRBi12_]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 1) + ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s8)) + ; CHECK: t2STRBi12 [[t2LDRBi12_]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 - %1(s8) = G_LOAD %0(p0) :: (load 1) + %1(s8) = G_LOAD %0(p0) :: (load (s8)) - G_STORE %1(s8), %0(p0) :: (store 1) + G_STORE %1(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... @@ -75,14 +75,14 @@ body: | ; CHECK-LABEL: name: test_s16 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 - ; CHECK: [[t2LDRHi12_:%[0-9]+]]:rgpr = t2LDRHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 2) - ; CHECK: t2STRHi12 [[t2LDRHi12_]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 2) + ; CHECK: [[t2LDRHi12_:%[0-9]+]]:rgpr = t2LDRHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s16)) + ; CHECK: t2STRHi12 [[t2LDRHi12_]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0(p0) :: (load 2) + %1(s16) = G_LOAD %0(p0) :: (load (s16)) - G_STORE %1(s16), %0(p0) :: (store 2) + G_STORE %1(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... @@ -100,14 +100,14 @@ body: | ; CHECK-LABEL: name: test_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 - ; CHECK: [[t2LDRi12_:%[0-9]+]]:gpr = t2LDRi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: t2STRi12 [[t2LDRi12_]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: [[t2LDRi12_:%[0-9]+]]:gpr = t2LDRi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32)) + ; CHECK: t2STRi12 [[t2LDRi12_]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = COPY $r0 - %1(s32) = G_LOAD %0(p0) :: (load 4) + %1(s32) = G_LOAD %0(p0) :: (load (s32)) - G_STORE %1(s32), %0(p0) :: (store 4) + G_STORE %1(s32), %0(p0) :: (store (s32)) BX_RET 14, $noreg ... @@ -161,22 +161,22 @@ body: | ; CHECK-LABEL: name: test_load_from_stack ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri %fixed-stack.0, 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[t2LDRi12_:%[0-9]+]]:gpr = t2LDRi12 [[t2ADDri]], 0, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRi12_:%[0-9]+]]:gpr = t2LDRi12 [[t2ADDri]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[t2LDRi12_]] ; CHECK: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri %fixed-stack.2, 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:gprnopc = t2LDRBi12 [[t2ADDri1]], 0, 14 /* CC::al */, $noreg :: (load 1) + ; CHECK: [[t2LDRBi12_:%[0-9]+]]:gprnopc = t2LDRBi12 [[t2ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s1)) ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY [[t2LDRBi12_]] ; CHECK: $r0 = COPY [[COPY]] ; CHECK: BX_RET 14 /* CC::al */, $noreg %0(p0) = G_FRAME_INDEX %fixed-stack.2 - %1(s32) = G_LOAD %0(p0) :: (load 4) + %1(s32) = G_LOAD %0(p0) :: (load (s32)) $r0 = COPY %1 %2(p0) = G_FRAME_INDEX %fixed-stack.0 - %3(s1) = G_LOAD %2(p0) :: (load 1) + %3(s1) = G_LOAD %2(p0) :: (load (s1)) %4(s32) = G_ANYEXT %3(s1) diff --git a/llvm/test/CodeGen/ARM/cmp2-peephole-thumb.mir b/llvm/test/CodeGen/ARM/cmp2-peephole-thumb.mir index ba983ba5bf242..96a02344ee79f 100644 --- a/llvm/test/CodeGen/ARM/cmp2-peephole-thumb.mir +++ b/llvm/test/CodeGen/ARM/cmp2-peephole-thumb.mir @@ -81,22 +81,22 @@ body: | %1 = COPY $r1 %0 = COPY $r0 %2, $cpsr = tMUL %0, %1, 14, $noreg - tSTRspi %2, %stack.1.mul, 0, 14, $noreg :: (store 4 into %ir.mul) + tSTRspi %2, %stack.1.mul, 0, 14, $noreg :: (store (s32) into %ir.mul) tCMPi8 %2, 0, 14, $noreg, implicit-def $cpsr tBcc %bb.2.if.end, 12, $cpsr tB %bb.1.if.then, 14, $noreg bb.1.if.then: %4, $cpsr = tMOVi8 42, 14, $noreg - tSTRspi killed %4, %stack.0.retval, 0, 14, $noreg :: (store 4 into %ir.retval) + tSTRspi killed %4, %stack.0.retval, 0, 14, $noreg :: (store (s32) into %ir.retval) tB %bb.3.return, 14, $noreg bb.2.if.end: %3, $cpsr = tMOVi8 1, 14, $noreg - tSTRspi killed %3, %stack.0.retval, 0, 14, $noreg :: (store 4 into %ir.retval) + tSTRspi killed %3, %stack.0.retval, 0, 14, $noreg :: (store (s32) into %ir.retval) bb.3.return: - %5 = tLDRspi %stack.0.retval, 0, 14, $noreg :: (dereferenceable load 4 from %ir.retval) + %5 = tLDRspi %stack.0.retval, 0, 14, $noreg :: (dereferenceable load (s32) from %ir.retval) $r0 = COPY %5 tBX_RET 14, $noreg, implicit $r0 diff --git a/llvm/test/CodeGen/ARM/cmpxchg.mir b/llvm/test/CodeGen/ARM/cmpxchg.mir index bb0e04beb8cb3..1b88dcdd7b2c4 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg.mir +++ b/llvm/test/CodeGen/ARM/cmpxchg.mir @@ -23,5 +23,5 @@ body: | ; CHECK: CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: Bcc %bb.1, 1 /* CC::ne */, killed $cpsr ; CHECK: .3: - dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2 = CMP_SWAP_64 killed renamable $r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic 8) + dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2 = CMP_SWAP_64 killed renamable $r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64)) ... diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir b/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir index 5c743d5b5a3af..a5774e49f5e3e 100644 --- a/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir +++ b/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir @@ -78,14 +78,14 @@ body: | frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r7, -8 - renamable $d0 = VLDRD %const.0, 0, 14, $noreg :: (load 8 from constant-pool) - renamable $d1 = VLDRD %const.1, 0, 14, $noreg :: (load 8 from constant-pool) - renamable $d2 = VLDRD %const.2, 0, 14, $noreg :: (load 8 from constant-pool) - renamable $d3 = VLDRD %const.3, 0, 14, $noreg :: (load 8 from constant-pool) - renamable $d4 = VLDRD %const.4, 0, 14, $noreg :: (load 8 from constant-pool) - renamable $d5 = VLDRD %const.5, 0, 14, $noreg :: (load 8 from constant-pool) - renamable $d6 = VLDRD %const.6, 0, 14, $noreg :: (load 8 from constant-pool) - renamable $d7 = VLDRD %const.7, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d0 = VLDRD %const.0, 0, 14, $noreg :: (load (s64) from constant-pool) + renamable $d1 = VLDRD %const.1, 0, 14, $noreg :: (load (s64) from constant-pool) + renamable $d2 = VLDRD %const.2, 0, 14, $noreg :: (load (s64) from constant-pool) + renamable $d3 = VLDRD %const.3, 0, 14, $noreg :: (load (s64) from constant-pool) + renamable $d4 = VLDRD %const.4, 0, 14, $noreg :: (load (s64) from constant-pool) + renamable $d5 = VLDRD %const.5, 0, 14, $noreg :: (load (s64) from constant-pool) + renamable $d6 = VLDRD %const.6, 0, 14, $noreg :: (load (s64) from constant-pool) + renamable $d7 = VLDRD %const.7, 0, 14, $noreg :: (load (s64) from constant-pool) renamable $r0 = t2BICri killed renamable $r0, 1, 14, $noreg, $noreg tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $d0, implicit $d1, implicit $d2, implicit $d3, implicit $d4, implicit $d5, implicit $d6, implicit $d7, implicit-def $sp $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r7, def $lr diff --git a/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir index 571ec0cec86cf..69361e8914ed0 100644 --- a/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir +++ b/llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.mir @@ -77,10 +77,10 @@ body: | frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r7, -8 renamable $r0 = t2MOVi32imm @g - renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from `float ()** bitcast (float (...)** @g to float ()**)`) + renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from `float ()** bitcast (float (...)** @g to float ()**)`) tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0 renamable $r0 = t2MOVi32imm @a - VSTRS killed renamable $s0, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into @a) + VSTRS killed renamable $s0, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32) into @a) $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc ... diff --git a/llvm/test/CodeGen/ARM/codesize-ifcvt.mir b/llvm/test/CodeGen/ARM/codesize-ifcvt.mir index 639d81921062f..698cdea7ec8ab 100644 --- a/llvm/test/CodeGen/ARM/codesize-ifcvt.mir +++ b/llvm/test/CodeGen/ARM/codesize-ifcvt.mir @@ -179,7 +179,7 @@ body: | ; CHECK-V7: t2B %bb.4, 14 /* CC::al */, $noreg ; CHECK-V7: bb.3.b3: ; CHECK-V7: successors: %bb.4(0x80000000) - ; CHECK-V7: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from `i32* undef`) + ; CHECK-V7: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from `i32* undef`) ; CHECK-V7: renamable $r0 = t2ANDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg ; CHECK-V7: bb.4.b5: ; CHECK-V7: successors: %bb.5(0x50000000) @@ -213,7 +213,7 @@ body: | ; CHECK-V8: t2B %bb.4, 14 /* CC::al */, $noreg ; CHECK-V8: bb.3.b3: ; CHECK-V8: successors: %bb.4(0x80000000) - ; CHECK-V8: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from `i32* undef`) + ; CHECK-V8: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from `i32* undef`) ; CHECK-V8: renamable $r0 = t2ANDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg ; CHECK-V8: bb.4.b5: ; CHECK-V8: successors: %bb.5(0x30000000), %bb.6(0x50000000) @@ -256,7 +256,7 @@ body: | bb.3.b3: successors: %bb.4(0x80000000) - renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14, $noreg :: (load 4 from `i32* undef`) + renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14, $noreg :: (load (s32) from `i32* undef`) renamable $r0 = t2ANDri killed renamable $r0, 256, 14, $noreg, $noreg bb.4.b5: @@ -344,7 +344,7 @@ body: | ; CHECK-V7: t2B %bb.4, 14 /* CC::al */, $noreg ; CHECK-V7: bb.3.b3: ; CHECK-V7: successors: %bb.4(0x80000000) - ; CHECK-V7: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from `i32* undef`) + ; CHECK-V7: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from `i32* undef`) ; CHECK-V7: renamable $r0 = t2ANDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg ; CHECK-V7: bb.4.b5: ; CHECK-V7: successors: %bb.5(0x30000000), %bb.6(0x50000000) @@ -381,7 +381,7 @@ body: | ; CHECK-V8: t2B %bb.4, 14 /* CC::al */, $noreg ; CHECK-V8: bb.3.b3: ; CHECK-V8: successors: %bb.4(0x80000000) - ; CHECK-V8: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from `i32* undef`) + ; CHECK-V8: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from `i32* undef`) ; CHECK-V8: renamable $r0 = t2ANDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg ; CHECK-V8: bb.4.b5: ; CHECK-V8: successors: %bb.5(0x30000000), %bb.6(0x50000000) @@ -424,7 +424,7 @@ body: | bb.3.b3: successors: %bb.4(0x80000000) - renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14, $noreg :: (load 4 from `i32* undef`) + renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14, $noreg :: (load (s32) from `i32* undef`) renamable $r0 = t2ANDri killed renamable $r0, 256, 14, $noreg, $noreg bb.4.b5: @@ -512,7 +512,7 @@ body: | ; CHECK-V7: t2B %bb.4, 14 /* CC::al */, $noreg ; CHECK-V7: bb.3.b3: ; CHECK-V7: successors: %bb.4(0x80000000) - ; CHECK-V7: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from `i32* undef`) + ; CHECK-V7: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from `i32* undef`) ; CHECK-V7: renamable $r0 = t2ANDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg ; CHECK-V7: bb.4.b5: ; CHECK-V7: successors: %bb.5(0x30000000), %bb.6(0x50000000) @@ -549,7 +549,7 @@ body: | ; CHECK-V8: t2B %bb.4, 14 /* CC::al */, $noreg ; CHECK-V8: bb.3.b3: ; CHECK-V8: successors: %bb.4(0x80000000) - ; CHECK-V8: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from `i32* undef`) + ; CHECK-V8: renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from `i32* undef`) ; CHECK-V8: renamable $r0 = t2ANDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg ; CHECK-V8: bb.4.b5: ; CHECK-V8: successors: %bb.5(0x30000000), %bb.6(0x50000000) @@ -592,7 +592,7 @@ body: | bb.3.b3: successors: %bb.4(0x80000000) - renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14, $noreg :: (load 4 from `i32* undef`) + renamable $r0 = t2LDRi12 undef renamable $r0, 0, 14, $noreg :: (load (s32) from `i32* undef`) renamable $r0 = t2ANDri killed renamable $r0, 256, 14, $noreg, $noreg bb.4.b5: diff --git a/llvm/test/CodeGen/ARM/const-load-align-thumb.mir b/llvm/test/CodeGen/ARM/const-load-align-thumb.mir index daa8a91033094..7b2697d0f2195 100644 --- a/llvm/test/CodeGen/ARM/const-load-align-thumb.mir +++ b/llvm/test/CodeGen/ARM/const-load-align-thumb.mir @@ -46,9 +46,9 @@ body: | frame-setup CFI_INSTRUCTION offset $r7, -8 $sp = frame-setup tSUBspi $sp, 2, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 16 - renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load 2 from constant-pool) - VSTRH killed renamable $s0, $sp, 3, 14, $noreg :: (store 2 into %ir.P5) - renamable $r0 = t2LDRHi12 $sp, 6, 14 /* CC::al */, $noreg :: (dereferenceable load 2 from %ir.P5) + renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool) + VSTRH killed renamable $s0, $sp, 3, 14, $noreg :: (store (s16) into %ir.P5) + renamable $r0 = t2LDRHi12 $sp, 6, 14 /* CC::al */, $noreg :: (dereferenceable load (s16) from %ir.P5) tBL 14 /* CC::al */, $noreg, @z_bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp renamable $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg $sp = frame-destroy tADDspi $sp, 2, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/constant-island-SOImm-limit16.mir b/llvm/test/CodeGen/ARM/constant-island-SOImm-limit16.mir index 223a3b0b33b13..bc0b843584057 100644 --- a/llvm/test/CodeGen/ARM/constant-island-SOImm-limit16.mir +++ b/llvm/test/CodeGen/ARM/constant-island-SOImm-limit16.mir @@ -48,9 +48,9 @@ body: | ; 2 consecutive entries: 1 is 4-byte aligned, 1 is not 4-byte aligned. renamable $r1 = LEApcrel %const.0, 14, $noreg - renamable $r1 = LDRH killed renamable $r1, $noreg, 0, 14, $noreg :: (load 2 from constant-pool) + renamable $r1 = LDRH killed renamable $r1, $noreg, 0, 14, $noreg :: (load (s16) from constant-pool) renamable $r1 = LEApcrel %const.1, 14, $noreg - renamable $r1 = LDRH killed renamable $r1, $noreg, 0, 14, $noreg :: (load 2 from constant-pool) + renamable $r1 = LDRH killed renamable $r1, $noreg, 0, 14, $noreg :: (load (s16) from constant-pool) renamable $r0 = SPACE SPACEBYTES, undef renamable $r0 diff --git a/llvm/test/CodeGen/ARM/constant-island-movwt.mir b/llvm/test/CodeGen/ARM/constant-island-movwt.mir index 75b9919e21897..350d952fd2e71 100644 --- a/llvm/test/CodeGen/ARM/constant-island-movwt.mir +++ b/llvm/test/CodeGen/ARM/constant-island-movwt.mir @@ -414,7 +414,7 @@ body: | renamable $d27 = VSETLNi32 undef renamable $d27, killed renamable $r1, 0, 14, $noreg, implicit killed $q13, implicit-def $q13 $r1 = t2MOVi16 target-flags(arm-lo16) @.str.19, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.19, 14, $noreg - renamable $s4 = VLDRS %const.0, 0, 14, $noreg, implicit killed $q1, implicit-def $q1 :: (load 4 from constant-pool) + renamable $s4 = VLDRS %const.0, 0, 14, $noreg, implicit killed $q1, implicit-def $q1 :: (load (s32) from constant-pool) renamable $d3 = VSETLNi32 undef renamable $d3, killed renamable $r1, 0, 14, $noreg, implicit killed $q1, implicit-def $q1 $r1 = t2MOVi16 target-flags(arm-lo16) @.str.61, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.61, 14, $noreg @@ -474,7 +474,7 @@ body: | $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.29, 14, $noreg renamable $d10 = VSETLNi32 undef renamable $d10, killed renamable $r1, 0, 14, $noreg, implicit-def $q5 renamable $r1 = t2ADDri renamable $r0, 16, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q3, 14, $noreg :: (store 16 into %ir.1, align 8) + VST1q64 killed $r1, 0, killed $q3, 14, $noreg :: (store (s128) into %ir.1, align 8) $r1 = t2MOVi16 target-flags(arm-lo16) @.str.39, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.39, 14, $noreg renamable $d9 = VSETLNi32 undef renamable $d9, killed renamable $r1, 0, 14, $noreg, implicit killed $q4, implicit-def $q4 @@ -482,7 +482,7 @@ body: | $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.69, 14, $noreg renamable $d24 = VSETLNi32 undef renamable $d24, killed renamable $r1, 0, 14, $noreg, implicit killed $q12, implicit-def $q12 renamable $r1 = t2ADDri renamable $r0, 32, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q2, 14, $noreg :: (store 16 into %ir.2, align 8) + VST1q64 killed $r1, 0, killed $q2, 14, $noreg :: (store (s128) into %ir.2, align 8) $r1 = t2MOVi16 target-flags(arm-lo16) @.str.31, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.31, 14, $noreg renamable $d11 = VSETLNi32 undef renamable $d11, killed renamable $r1, 0, 14, $noreg, implicit killed $q5, implicit-def $q5 @@ -502,7 +502,7 @@ body: | $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.67, 14, $noreg renamable $d5 = VSETLNi32 undef renamable $d5, killed renamable $r1, 0, 14, $noreg, implicit-def $q2 renamable $r1 = t2ADDri renamable $r0, 48, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q5, 14, $noreg :: (store 16 into %ir.3, align 8) + VST1q64 killed $r1, 0, killed $q5, 14, $noreg :: (store (s128) into %ir.3, align 8) $r1 = t2MOVi16 target-flags(arm-lo16) @.str.51, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.51, 14, $noreg renamable $d31 = VSETLNi32 undef renamable $d31, killed renamable $r1, 0, 14, $noreg, implicit killed $q15, implicit-def $q15 @@ -510,7 +510,7 @@ body: | $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.43, 14, $noreg renamable $d7 = VSETLNi32 undef renamable $d7, killed renamable $r1, 0, 14, $noreg, implicit-def $q3 renamable $r1 = t2ADDri renamable $r0, 80, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q4, 14, $noreg :: (store 16 into %ir.5, align 8) + VST1q64 killed $r1, 0, killed $q4, 14, $noreg :: (store (s128) into %ir.5, align 8) $r1 = t2MOVi16 target-flags(arm-lo16) @.str.47, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.47, 14, $noreg renamable $d1 = VSETLNi32 undef renamable $d1, killed renamable $r1, 0, 14, $noreg, implicit killed $q0, implicit-def $q0 @@ -530,7 +530,7 @@ body: | $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.41, 14, $noreg renamable $d6 = VSETLNi32 undef renamable $d6, killed renamable $r1, 0, 14, $noreg, implicit killed $q3, implicit-def $q3 renamable $r1 = t2ADDri renamable $r0, 112, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q0, 14, $noreg :: (store 16 into %ir.7, align 8) + VST1q64 killed $r1, 0, killed $q0, 14, $noreg :: (store (s128) into %ir.7, align 8) $r1 = t2MOVi16 target-flags(arm-lo16) @.str.59, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.59, 14, $noreg renamable $d29 = VSETLNi32 undef renamable $d29, killed renamable $r1, 0, 14, $noreg, implicit killed $q14, implicit-def $q14 @@ -538,7 +538,7 @@ body: | $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.65, 14, $noreg renamable $d4 = VSETLNi32 undef renamable $d4, killed renamable $r1, 0, 14, $noreg, implicit killed $q2, implicit-def $q2 renamable $r1 = t2ADDri renamable $r0, 128, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q15, 14, $noreg :: (store 16 into %ir.8, align 8) + VST1q64 killed $r1, 0, killed $q15, 14, $noreg :: (store (s128) into %ir.8, align 8) $r1 = t2MOVi16 target-flags(arm-lo16) @.str.55, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.55, 14, $noreg renamable $d23 = VSETLNi32 undef renamable $d23, killed renamable $r1, 0, 14, $noreg, implicit killed $q11, implicit-def $q11 @@ -558,7 +558,7 @@ body: | $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.104, 14, $noreg renamable $d31 = VSETLNi32 undef renamable $d31, killed renamable $r1, 0, 14, $noreg, implicit-def $q15 renamable $r1 = t2ADDri renamable $r0, 144, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q11, 14, $noreg :: (store 16 into %ir.9, align 8) + VST1q64 killed $r1, 0, killed $q11, 14, $noreg :: (store (s128) into %ir.9, align 8) $r1 = t2MOVi16 target-flags(arm-lo16) @.str.126, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.126, 14, $noreg renamable $d23 = VSETLNi32 undef renamable $d23, killed renamable $r1, 0, 14, $noreg, implicit-def $q11 @@ -566,7 +566,7 @@ body: | $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.98, 14, $noreg renamable $d0 = VSETLNi32 undef renamable $d0, killed renamable $r1, 0, 14, $noreg, implicit-def $q0 renamable $r1 = t2ADDri renamable $r0, 200, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q14, 14, $noreg :: (store 16 into %ir.11, align 8) + VST1q64 killed $r1, 0, killed $q14, 14, $noreg :: (store (s128) into %ir.11, align 8) $lr = t2MOVi16 target-flags(arm-lo16) @.str.124, 14, $noreg $lr = t2MOVTi16 $lr, target-flags(arm-hi16) @.str.124, 14, $noreg $r2 = t2MOVi16 target-flags(arm-lo16) @.str.127, 14, $noreg @@ -599,9 +599,9 @@ body: | $r3 = t2MOVTi16 $r3, target-flags(arm-hi16) @.str.121, 14, $noreg renamable $q14 = VDUP32q killed renamable $r3, 14, $noreg renamable $r3 = t2ADDri renamable $r0, 216, 14, $noreg, $noreg - VST1q64 killed $r3, 0, killed $q10, 14, $noreg :: (store 16 into %ir.12, align 8) + VST1q64 killed $r3, 0, killed $q10, 14, $noreg :: (store (s128) into %ir.12, align 8) $r3 = tMOVr $r0, 14, $noreg - renamable $r3 = VST1q32wb_register killed $r3, 0, killed $r4, killed $q1, 14, $noreg :: (store 16 into %ir.0, align 8) + renamable $r3 = VST1q32wb_register killed $r3, 0, killed $r4, killed $q1, 14, $noreg :: (store (s128) into %ir.0, align 8) $r4 = t2MOVi16 target-flags(arm-lo16) @.str.120, 14, $noreg $r4 = t2MOVTi16 $r4, target-flags(arm-hi16) @.str.120, 14, $noreg renamable $q10 = VMOVv4i32 0, 14, $noreg @@ -636,29 +636,29 @@ body: | $r4 = t2MOVTi16 $r4, target-flags(arm-hi16) @.str.88, 14, $noreg renamable $d8 = VSETLNi32 undef renamable $d8, killed renamable $r4, 0, 14, $noreg, implicit-def $q4 renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg - tSTRi renamable $r4, killed renamable $r3, 0, 14, $noreg :: (store 4 into %ir.4 + 36) + tSTRi renamable $r4, killed renamable $r3, 0, 14, $noreg :: (store (s32) into %ir.4 + 36) $r3 = tMOVr $r0, 14, $noreg t2STRDi8 $r4, $r4, $r0, 192, 14, $noreg - early-clobber renamable $r3 = t2STR_PRE renamable $r4, killed renamable $r3, 96, 14, $noreg :: (store 4 into %ir.4 + 32) - VST1q64 killed $r3, 0, killed $q3, 14, $noreg :: (store 16 into %ir.6, align 8) + early-clobber renamable $r3 = t2STR_PRE renamable $r4, killed renamable $r3, 96, 14, $noreg :: (store (s32) into %ir.4 + 32) + VST1q64 killed $r3, 0, killed $q3, 14, $noreg :: (store (s128) into %ir.6, align 8) renamable $r3 = t2ADDri renamable $r0, 64, 14, $noreg, $noreg $r5 = t2MOVi16 target-flags(arm-lo16) @.str.81, 14, $noreg $r5 = t2MOVTi16 $r5, target-flags(arm-hi16) @.str.81, 14, $noreg - VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store 16 into %ir.4, align 4) + VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store (s128) into %ir.4, align 4) renamable $r3 = t2ADDri renamable $r0, 176, 14, $noreg, $noreg - VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store 16 into %ir.10 + 16, align 4) + VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store (s128) into %ir.10 + 16, align 4) renamable $r3 = t2ADDri renamable $r0, 160, 14, $noreg, $noreg renamable $q1 = VDUP32q killed renamable $r5, 14, $noreg renamable $r5 = t2ADDri renamable $r0, 248, 14, $noreg, $noreg - VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store 16 into %ir.10, align 4) + VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store (s128) into %ir.10, align 4) renamable $r3 = t2ADDri renamable $r0, 232, 14, $noreg, $noreg - VST1q64 killed $r3, 0, killed $q2, 14, $noreg :: (store 16 into %ir.13, align 8) + VST1q64 killed $r3, 0, killed $q2, 14, $noreg :: (store (s128) into %ir.13, align 8) $r3 = t2MOVi16 target-flags(arm-lo16) @.str.82, 14, $noreg $r3 = t2MOVTi16 $r3, target-flags(arm-hi16) @.str.82, 14, $noreg - VST1q64 killed $r5, 0, killed $q12, 14, $noreg :: (store 16 into %ir.14, align 8) + VST1q64 killed $r5, 0, killed $q12, 14, $noreg :: (store (s128) into %ir.14, align 8) renamable $d3 = VSETLNi32 killed renamable $d3, killed renamable $r3, 1, 14, $noreg, implicit $q1, implicit-def $q1 renamable $r3 = t2ADDri renamable $r0, 264, 14, $noreg, $noreg - VST1q64 killed $r3, 0, killed $q13, 14, $noreg :: (store 16 into %ir.15, align 8) + VST1q64 killed $r3, 0, killed $q13, 14, $noreg :: (store (s128) into %ir.15, align 8) $r3 = t2MOVi16 target-flags(arm-lo16) @.str.91, 14, $noreg $r3 = t2MOVTi16 $r3, target-flags(arm-hi16) @.str.91, 14, $noreg $r5 = t2MOVi16 target-flags(arm-lo16) @.str.90, 14, $noreg @@ -676,9 +676,9 @@ body: | renamable $r4 = t2ADDri renamable $r0, 296, 14, $noreg, $noreg renamable $d5 = VSETLNi32 undef renamable $d5, killed renamable $r5, 0, 14, $noreg, implicit-def $q2 renamable $r5 = t2ADDri renamable $r0, 280, 14, $noreg, $noreg - VST1q32 $r4, 0, $q10, 14, $noreg :: (store 16 into %ir.16 + 16, align 4) - VST1q32 killed $r5, 0, $q10, 14, $noreg :: (store 16 into %ir.16, align 4) - VST1q64 killed $r4, 0, killed $q1, 14, $noreg :: (store 16 into %ir.17, align 8) + VST1q32 $r4, 0, $q10, 14, $noreg :: (store (s128) into %ir.16 + 16, align 4) + VST1q32 killed $r5, 0, $q10, 14, $noreg :: (store (s128) into %ir.16, align 4) + VST1q64 killed $r4, 0, killed $q1, 14, $noreg :: (store (s128) into %ir.17, align 8) $r5 = t2MOVi16 target-flags(arm-lo16) @.str.83, 14, $noreg $r5 = t2MOVTi16 $r5, target-flags(arm-hi16) @.str.83, 14, $noreg renamable $d9 = VSETLNi32 killed renamable $d9, renamable $r3, 1, 14, $noreg, implicit $q4, implicit-def $q4 @@ -689,7 +689,7 @@ body: | renamable $d8 = VSETLNi32 killed renamable $d8, killed renamable $r5, 1, 14, $noreg, implicit $q4, implicit-def $q4 $r5 = t2MOVi16 target-flags(arm-lo16) @.str.106, 14, $noreg $r5 = t2MOVTi16 $r5, target-flags(arm-hi16) @.str.106, 14, $noreg - VST1q64 killed $r4, 0, killed $q13, 14, $noreg :: (store 16 into %ir.18, align 8) + VST1q64 killed $r4, 0, killed $q13, 14, $noreg :: (store (s128) into %ir.18, align 8) $r4 = t2MOVi16 target-flags(arm-lo16) @.str.119, 14, $noreg $r4 = t2MOVTi16 $r4, target-flags(arm-hi16) @.str.119, 14, $noreg renamable $q1 = VDUP32q killed renamable $r5, 14, $noreg @@ -699,7 +699,7 @@ body: | $r4 = t2MOVTi16 $r4, target-flags(arm-hi16) @.str.116, 14, $noreg renamable $d27 = VSETLNi32 undef renamable $d27, killed renamable $r4, 0, 14, $noreg, implicit-def $q13 renamable $r4 = t2ADDri renamable $r0, 344, 14, $noreg, $noreg - VST1q64 killed $r4, 0, killed $q4, 14, $noreg :: (store 16 into %ir.19, align 8) + VST1q64 killed $r4, 0, killed $q4, 14, $noreg :: (store (s128) into %ir.19, align 8) $r4 = t2MOVi16 target-flags(arm-lo16) @.str.107, 14, $noreg $r4 = t2MOVTi16 $r4, target-flags(arm-hi16) @.str.107, 14, $noreg renamable $d3 = VSETLNi32 killed renamable $d3, killed renamable $r4, 1, 14, $noreg, implicit $q1, implicit-def $q1 @@ -720,7 +720,7 @@ body: | renamable $r3 = t2ADDri renamable $r0, 360, 14, $noreg, $noreg renamable $d26 = VDUP32d killed renamable $r7, 14, $noreg, implicit killed $q13, implicit-def $q13 renamable $r7 = t2ADDri renamable $r0, 504, 14, $noreg, $noreg - VST1q64 killed $r3, 0, killed $q2, 14, $noreg :: (store 16 into %ir.20, align 8) + VST1q64 killed $r3, 0, killed $q2, 14, $noreg :: (store (s128) into %ir.20, align 8) $r3 = t2MOVi16 target-flags(arm-lo16) @.str.97, 14, $noreg $r3 = t2MOVTi16 $r3, target-flags(arm-hi16) @.str.97, 14, $noreg renamable $q2 = VDUP32q killed renamable $r3, 14, $noreg @@ -728,24 +728,24 @@ body: | $r3 = t2MOVTi16 $r3, target-flags(arm-hi16) @.str.96, 14, $noreg renamable $d4 = VSETLNi32 killed renamable $d4, killed renamable $r3, 0, 14, $noreg, implicit $q2, implicit-def $q2 renamable $r3 = t2ADDri renamable $r0, 388, 14, $noreg, $noreg - VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store 16 into %ir.21 + 12, align 4) + VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store (s128) into %ir.21 + 12, align 4) renamable $r3 = t2ADDri renamable $r0, 376, 14, $noreg, $noreg - VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store 16 into %ir.21, align 4) + VST1q32 killed $r3, 0, $q10, 14, $noreg :: (store (s128) into %ir.21, align 4) renamable $r3 = t2ADDri renamable $r0, 584, 14, $noreg, $noreg - VST1q64 killed $r4, 0, killed $q2, 14, $noreg :: (store 16 into %ir.22, align 8) + VST1q64 killed $r4, 0, killed $q2, 14, $noreg :: (store (s128) into %ir.22, align 8) $r4 = t2MOVi16 target-flags(arm-lo16) @.str.100, 14, $noreg $r4 = t2MOVTi16 $r4, target-flags(arm-hi16) @.str.100, 14, $noreg renamable $d1 = VDUP32d killed renamable $r4, 14, $noreg, implicit killed $q0, implicit-def $q0 renamable $r4 = t2ADDri renamable $r0, 408, 14, $noreg, $noreg - VST1q64 killed $r4, 0, killed $q0, 14, $noreg :: (store 16 into %ir.23, align 8) + VST1q64 killed $r4, 0, killed $q0, 14, $noreg :: (store (s128) into %ir.23, align 8) renamable $r4 = t2ADDri renamable $r0, 440, 14, $noreg, $noreg - VST1q32 killed $r5, 0, $q10, 14, $noreg :: (store 16 into %ir.24 + 12, align 4) + VST1q32 killed $r5, 0, $q10, 14, $noreg :: (store (s128) into %ir.24 + 12, align 4) $r5 = t2MOVi16 target-flags(arm-lo16) @.str.103, 14, $noreg $r5 = t2MOVTi16 $r5, target-flags(arm-hi16) @.str.103, 14, $noreg renamable $d30 = VDUP32d killed renamable $r5, 14, $noreg, implicit killed $q15, implicit-def $q15 renamable $r5 = t2ADDri renamable $r0, 424, 14, $noreg, $noreg - VST1q32 killed $r5, 0, $q10, 14, $noreg :: (store 16 into %ir.24, align 4) - VST1q64 killed $r4, 0, killed $q15, 14, $noreg :: (store 16 into %ir.25, align 8) + VST1q32 killed $r5, 0, $q10, 14, $noreg :: (store (s128) into %ir.24, align 4) + VST1q64 killed $r4, 0, killed $q15, 14, $noreg :: (store (s128) into %ir.25, align 8) $r4 = t2MOVi16 target-flags(arm-lo16) @.str.111, 14, $noreg $r4 = t2MOVTi16 $r4, target-flags(arm-hi16) @.str.111, 14, $noreg $r5 = t2MOVi16 target-flags(arm-lo16) @.str.110, 14, $noreg @@ -761,22 +761,22 @@ body: | renamable $d0 = VSETLNi32 killed renamable $d0, killed renamable $r4, 0, 14, $noreg, implicit $q0, implicit-def $q0 renamable $r4 = t2ADDri renamable $r0, 568, 14, $noreg, $noreg renamable $d31 = VDUP32d killed renamable $r12, 14, $noreg, implicit killed $q15, implicit-def $q15 - VST1q64 killed $r1, 0, killed $q1, 14, $noreg :: (store 16 into %ir.26, align 8) + VST1q64 killed $r1, 0, killed $q1, 14, $noreg :: (store (s128) into %ir.26, align 8) renamable $r1 = t2ADDri renamable $r0, 472, 14, $noreg, $noreg - VST1q64 killed $r1, 0, killed $q0, 14, $noreg :: (store 16 into %ir.27, align 8) + VST1q64 killed $r1, 0, killed $q0, 14, $noreg :: (store (s128) into %ir.27, align 8) renamable $r1 = t2ADDri renamable $r0, 552, 14, $noreg, $noreg - VST1q64 killed $r5, 0, killed $q15, 14, $noreg :: (store 16 into %ir.28, align 8) + VST1q64 killed $r5, 0, killed $q15, 14, $noreg :: (store (s128) into %ir.28, align 8) renamable $r5 = t2ADDri renamable $r0, 536, 14, $noreg, $noreg - VST1q64 killed $r7, 0, killed $q9, 14, $noreg :: (store 16 into %ir.29, align 8) + VST1q64 killed $r7, 0, killed $q9, 14, $noreg :: (store (s128) into %ir.29, align 8) renamable $r7 = t2ADDri renamable $r0, 660, 14, $noreg, $noreg - VST1q64 killed $r6, 0, killed $q13, 14, $noreg :: (store 16 into %ir.30, align 8) - VST1q64 killed $r5, 0, killed $q12, 14, $noreg :: (store 16 into %ir.31, align 8) - VST1q64 killed $r1, 0, killed $q14, 14, $noreg :: (store 16 into %ir.32, align 8) + VST1q64 killed $r6, 0, killed $q13, 14, $noreg :: (store (s128) into %ir.30, align 8) + VST1q64 killed $r5, 0, killed $q12, 14, $noreg :: (store (s128) into %ir.31, align 8) + VST1q64 killed $r1, 0, killed $q14, 14, $noreg :: (store (s128) into %ir.32, align 8) renamable $r1 = t2ADDri renamable $r0, 608, 14, $noreg, $noreg - VST1q64 killed $r4, 0, killed $q8, 14, $noreg :: (store 16 into %ir.33, align 8) - VST1q64 killed $r3, 0, killed $q11, 14, $noreg :: (store 16 into %ir.34, align 8) + VST1q64 killed $r4, 0, killed $q8, 14, $noreg :: (store (s128) into %ir.33, align 8) + VST1q64 killed $r3, 0, killed $q11, 14, $noreg :: (store (s128) into %ir.34, align 8) t2STRDi8 killed $r2, $r2, $r0, 600, 14, $noreg - VST1q32 killed $r1, 0, $q10, 14, $noreg :: (store 16 into %ir.35, align 4) + VST1q32 killed $r1, 0, $q10, 14, $noreg :: (store (s128) into %ir.35, align 4) $r12 = t2MOVi16 target-flags(arm-lo16) @.str.139, 14, $noreg $r12 = t2MOVTi16 $r12, target-flags(arm-hi16) @.str.139, 14, $noreg $r2 = t2MOVi16 target-flags(arm-lo16) @.str.151, 14, $noreg @@ -848,7 +848,7 @@ body: | $r5 = t2MOVTi16 $r5, target-flags(arm-hi16) @.str.133, 14, $noreg renamable $q2 = VDUP32q killed renamable $r5, 14, $noreg renamable $r5 = t2ADDri renamable $r0, 756, 14, $noreg, $noreg - VST1q32 killed $r1, 0, killed $q10, 14, $noreg :: (store 16 into %ir.35 + 12, align 4) + VST1q32 killed $r1, 0, killed $q10, 14, $noreg :: (store (s128) into %ir.35 + 12, align 4) $r1 = t2MOVi16 target-flags(arm-lo16) @.str.132, 14, $noreg $r1 = t2MOVTi16 $r1, target-flags(arm-hi16) @.str.132, 14, $noreg renamable $d19 = VDUP32d killed renamable $r4, 14, $noreg, implicit killed $q9, implicit-def $q9 @@ -865,19 +865,19 @@ body: | t2STRDi8 killed $r4, killed $r6, $r0, 636, 14, $noreg renamable $r4 = t2ADDri renamable $r0, 644, 14, $noreg, $noreg renamable $r6 = t2ADDri renamable $r0, 692, 14, $noreg, $noreg - VST1q64 killed $r4, 0, killed $q1, 14, $noreg :: (store 16 into %ir.36, align 8) + VST1q64 killed $r4, 0, killed $q1, 14, $noreg :: (store (s128) into %ir.36, align 8) renamable $r4 = t2ADDri renamable $r0, 724, 14, $noreg, $noreg - VST1q64 killed $r7, 0, killed $q2, 14, $noreg :: (store 16 into %ir.37, align 8) + VST1q64 killed $r7, 0, killed $q2, 14, $noreg :: (store (s128) into %ir.37, align 8) renamable $r7 = t2ADDri renamable $r0, 708, 14, $noreg, $noreg renamable $r0 = t2ADDri killed renamable $r0, 676, 14, $noreg, $noreg - VST1q64 killed $r0, 0, killed $q12, 14, $noreg :: (store 16 into %ir.38, align 8) - VST1q64 killed $r6, 0, killed $q14, 14, $noreg :: (store 16 into %ir.39, align 8) - VST1q64 killed $r7, 0, killed $q11, 14, $noreg :: (store 16 into %ir.40, align 8) - VST1q64 killed $r4, 0, killed $q15, 14, $noreg :: (store 16 into %ir.41, align 8) - VST1q64 killed $r2, 0, killed $q0, 14, $noreg :: (store 16 into %ir.42, align 8) - VST1q64 killed $r5, 0, killed $q9, 14, $noreg :: (store 16 into %ir.43, align 8) - VST1q64 killed $r1, 0, killed $q13, 14, $noreg :: (store 16 into %ir.44, align 8) - VST1q64 killed $lr, 0, killed $q8, 14, $noreg :: (store 16 into %ir.45, align 8) + VST1q64 killed $r0, 0, killed $q12, 14, $noreg :: (store (s128) into %ir.38, align 8) + VST1q64 killed $r6, 0, killed $q14, 14, $noreg :: (store (s128) into %ir.39, align 8) + VST1q64 killed $r7, 0, killed $q11, 14, $noreg :: (store (s128) into %ir.40, align 8) + VST1q64 killed $r4, 0, killed $q15, 14, $noreg :: (store (s128) into %ir.41, align 8) + VST1q64 killed $r2, 0, killed $q0, 14, $noreg :: (store (s128) into %ir.42, align 8) + VST1q64 killed $r5, 0, killed $q9, 14, $noreg :: (store (s128) into %ir.43, align 8) + VST1q64 killed $r1, 0, killed $q13, 14, $noreg :: (store (s128) into %ir.44, align 8) + VST1q64 killed $lr, 0, killed $q8, 14, $noreg :: (store (s128) into %ir.45, align 8) $sp = VLDMDIA_UPD $sp, 14, $noreg, def $d8, def $d9, def $d10, def $d11 $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r11, def $pc diff --git a/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir b/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir index 4ebcf77b9e66c..85fd2d610b1e6 100644 --- a/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir +++ b/llvm/test/CodeGen/ARM/constant-islands-split-IT.mir @@ -70,7 +70,7 @@ body: | ; CHECK-LABEL: name: h ; CHECK: bb.0: ; CHECK: successors: %bb.4(0x80000000) - ; CHECK: renamable $d0 = VLDRD %const.3, 0, 14 /* CC::al */, $noreg :: (load 8 from constant-pool) + ; CHECK: renamable $d0 = VLDRD %const.3, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool) ; CHECK: dead renamable $r0 = SPACE 40, undef renamable $r0 ; CHECK: tB %bb.4, 14 /* CC::al */, $noreg ; CHECK: bb.1 (align 8): @@ -90,9 +90,9 @@ body: | ; CHECK: t2CMPri $r0, 32, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: renamable $r0 = SPACE 200, undef renamable $r0 ; CHECK: t2IT 0, 1, implicit-def $itstate - ; CHECK: renamable $d0 = VLDRD %const.7, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load 8 from constant-pool) - ; CHECK: renamable $d1 = VLDRD %const.5, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load 8 from constant-pool) - ; CHECK: renamable $d2 = VLDRD %const.6, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load 8 from constant-pool) + ; CHECK: renamable $d0 = VLDRD %const.7, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) + ; CHECK: renamable $d1 = VLDRD %const.5, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) + ; CHECK: renamable $d2 = VLDRD %const.6, 0, 0 /* CC::eq */, $cpsr, implicit $itstate :: (load (s64) from constant-pool) ; CHECK: $r0 = t2SUBri $r0, 12, 0 /* CC::eq */, $cpsr, $noreg, implicit killed $itstate ; CHECK: t2B %bb.7, 14 /* CC::al */, $noreg ; CHECK: bb.6 (align 8): @@ -114,7 +114,7 @@ body: | bb.0: successors: %bb.1(0x80000000) - renamable $d0 = VLDRD %const.0, 0, 14, $noreg :: (load 8 from constant-pool) + renamable $d0 = VLDRD %const.0, 0, 14, $noreg :: (load (s64) from constant-pool) dead renamable $r0 = SPACE 40, undef renamable $r0 bb.1: @@ -129,9 +129,9 @@ body: | t2CMPri $r0, 32, 14, $noreg, implicit-def $cpsr renamable $r0 = SPACE 200, undef renamable $r0 t2IT 0, 1, implicit-def $itstate - renamable $d0 = VLDRD %const.1, 0, 0, $cpsr, implicit $itstate :: (load 8 from constant-pool) - renamable $d1 = VLDRD %const.2, 0, 0, $cpsr, implicit $itstate :: (load 8 from constant-pool) - renamable $d2 = VLDRD %const.0, 0, 0, $cpsr, implicit $itstate :: (load 8 from constant-pool) + renamable $d0 = VLDRD %const.1, 0, 0, $cpsr, implicit $itstate :: (load (s64) from constant-pool) + renamable $d1 = VLDRD %const.2, 0, 0, $cpsr, implicit $itstate :: (load (s64) from constant-pool) + renamable $d2 = VLDRD %const.0, 0, 0, $cpsr, implicit $itstate :: (load (s64) from constant-pool) $r0 = t2SUBri $r0, 12, 0, $cpsr, $noreg, implicit killed $itstate t2IT 0, 4, implicit-def $itstate $sp = tMOVr $r0, 0, $cpsr, implicit $itstate diff --git a/llvm/test/CodeGen/ARM/fold-sext-sextload.ll b/llvm/test/CodeGen/ARM/fold-sext-sextload.ll index beea9e71cee82..0d6ecfc2b2128 100644 --- a/llvm/test/CodeGen/ARM/fold-sext-sextload.ll +++ b/llvm/test/CodeGen/ARM/fold-sext-sextload.ll @@ -2,7 +2,7 @@ define <4 x i8> @i(<4 x i8>*, <4 x i8>) !dbg !8 { %3 = load <4 x i8>, <4 x i8>* %0, align 4, !dbg !14 - ; CHECK: $[[reg:.*]] = VLD1LNd32 {{.*}} debug-location !14 :: (load 4 from %ir.0) + ; CHECK: $[[reg:.*]] = VLD1LNd32 {{.*}} debug-location !14 :: (load (s32) from %ir.0) ; CHECK: VMOVLsv8i16 {{.*}} $[[reg]], {{.*}} debug-location !14 ; CHECK: VMOVLsv4i32 {{.*}} $[[reg]], {{.*}} debug-location !14 %4 = sdiv <4 x i8> %1, %3, !dbg !15 diff --git a/llvm/test/CodeGen/ARM/fold-zext-zextload.ll b/llvm/test/CodeGen/ARM/fold-zext-zextload.ll index 28224de901abc..b9b08e4d6c527 100644 --- a/llvm/test/CodeGen/ARM/fold-zext-zextload.ll +++ b/llvm/test/CodeGen/ARM/fold-zext-zextload.ll @@ -2,7 +2,7 @@ define <4 x i8> @i(<4 x i8>*, <4 x i8>) !dbg !8 { %3 = load <4 x i8>, <4 x i8>* %0, align 4, !dbg !14 - ; CHECK: $[[reg:.*]] = VLD1LNd32 {{.*}} debug-location !14 :: (load 4 from %ir.0) + ; CHECK: $[[reg:.*]] = VLD1LNd32 {{.*}} debug-location !14 :: (load (s32) from %ir.0) ; CHECK-NEXT: VMOVLuv8i16 {{.*}} $[[reg]], {{.*}} debug-location !14 ; CHECK-NEXT: VMOVLuv4i32 {{.*}} $[[reg]], {{.*}} debug-location !14 %4 = udiv <4 x i8> %1, %3, !dbg !15 diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir index 86457a7815c17..d1703009e219f 100644 --- a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir +++ b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir @@ -73,16 +73,16 @@ body: | $sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 4 renamable $s0 = FCONSTH 112, 14, $noreg - renamable $r1 = LDRcp %const.0, 0, 14, $noreg :: (load 4 from constant-pool) - renamable $r2 = LDRcp %const.1, 0, 14, $noreg :: (load 4 from constant-pool) - VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (volatile store 2 into %ir.S) - STRi12 killed renamable $r2, renamable $r0, 4, 14, $noreg :: (volatile store 4 into %ir.LL + 4) - renamable $s0 = VLDRH %const.2, 0, 14, $noreg :: (load 2 from constant-pool) - STRi12 killed renamable $r1, killed renamable $r0, 0, 14, $noreg :: (volatile store 4 into %ir.LL, align 8) + renamable $r1 = LDRcp %const.0, 0, 14, $noreg :: (load (s32) from constant-pool) + renamable $r2 = LDRcp %const.1, 0, 14, $noreg :: (load (s32) from constant-pool) + VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S) + STRi12 killed renamable $r2, renamable $r0, 4, 14, $noreg :: (volatile store (s32) into %ir.LL + 4) + renamable $s0 = VLDRH %const.2, 0, 14, $noreg :: (load (s16) from constant-pool) + STRi12 killed renamable $r1, killed renamable $r0, 0, 14, $noreg :: (volatile store (s32) into %ir.LL, align 8) dead renamable $r0 = SPACE 8920, undef renamable $r0 - renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load 2 from %ir.S) + renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S) renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg - VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store 2 into %ir.S) + VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S) renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg dead renamable $r1 = SPACE 1350, undef renamable $r0 $sp = ADDri $sp, 4, 14, $noreg, $noreg diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir index aca33a1e4575f..ca89912fafa0f 100644 --- a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir +++ b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir @@ -65,18 +65,18 @@ body: | bb.0.entry: $sp = frame-setup tSUBspi $sp, 2, 14, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 8 - renamable $r0 = tLDRpci %const.0, 14, $noreg :: (load 4 from constant-pool) + renamable $r0 = tLDRpci %const.0, 14, $noreg :: (load (s32) from constant-pool) renamable $s0 = FCONSTH 112, 14, $noreg - tSTRspi killed renamable $r0, $sp, 1, 14, $noreg :: (volatile store 4 into %ir.F) - VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (volatile store 2 into %ir.S) - renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load 2 from %ir.S) - renamable $s0 = VLDRH %const.1, 0, 14, $noreg :: (load 2 from constant-pool) + tSTRspi killed renamable $r0, $sp, 1, 14, $noreg :: (volatile store (s32) into %ir.F) + VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S) + renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S) + renamable $s0 = VLDRH %const.1, 0, 14, $noreg :: (load (s16) from constant-pool) dead renamable $r0 = SPACE 1230, undef renamable $r0 renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg - VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store 2 into %ir.S) + VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S) renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg dead renamable $r1 = SPACE 1330, undef renamable $r0 - dead renamable $s0 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load 2 from %ir.S) + dead renamable $s0 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S) $sp = tADDspi $sp, 2, 14, $noreg tBX_RET 14, $noreg, implicit killed $r0 diff --git a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir index 194ec839b7135..065cc3b814a14 100644 --- a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir +++ b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir @@ -88,9 +88,9 @@ body: | $sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 4 - renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load 2 from constant-pool) + renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool) VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv - VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store 2 into %ir.res) + VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res) FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv Bcc %bb.2, 0, killed $cpsr diff --git a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir index abd4a6d7f631c..38348e5b67e1e 100644 --- a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir +++ b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir @@ -94,9 +94,9 @@ body: | $sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 4 - renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load 2 from constant-pool) + renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool) VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv - VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store 2 into %ir.res) + VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res) FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv Bcc %bb.2, 0, killed $cpsr diff --git a/llvm/test/CodeGen/ARM/fpoffset_overflow.mir b/llvm/test/CodeGen/ARM/fpoffset_overflow.mir index 56f20267a2ff8..006acf68663aa 100644 --- a/llvm/test/CodeGen/ARM/fpoffset_overflow.mir +++ b/llvm/test/CodeGen/ARM/fpoffset_overflow.mir @@ -41,10 +41,10 @@ body: | ; CHECK: $r11 = IMPLICIT_DEF ; CHECK: $r12 = IMPLICIT_DEF ; CHECK: $lr = IMPLICIT_DEF - ; CHECK: t2STRi12 killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: t2STRi12 killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: $r0 = t2ADDri killed $sp, 4096, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $sp = t2LDRi12 killed $r0, 40, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: $sp = t2LDRi12 killed $r0, 40, 14 /* CC::al */, $noreg :: (load (s32)) + ; CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: KILL $r0 ; CHECK: KILL $r1 ; CHECK: KILL $r2 @@ -74,7 +74,7 @@ body: | $r12 = IMPLICIT_DEF $lr = IMPLICIT_DEF - $sp = t2LDRi12 %fixed-stack.0, 0, 14, $noreg :: (load 4) + $sp = t2LDRi12 %fixed-stack.0, 0, 14, $noreg :: (load (s32)) KILL $r0 KILL $r1 @@ -129,7 +129,7 @@ body: | ; CHECK: $r11 = IMPLICIT_DEF ; CHECK: $r12 = IMPLICIT_DEF ; CHECK: $lr = IMPLICIT_DEF - ; CHECK: $r11 = t2LDRi12 $sp, 4092, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r11 = t2LDRi12 $sp, 4092, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: KILL $r0 ; CHECK: KILL $r1 ; CHECK: KILL $r2 @@ -157,7 +157,7 @@ body: | $r12 = IMPLICIT_DEF $lr = IMPLICIT_DEF - $r11 = t2LDRi12 %fixed-stack.0, 0, 14, $noreg :: (load 4) + $r11 = t2LDRi12 %fixed-stack.0, 0, 14, $noreg :: (load (s32)) KILL $r0 KILL $r1 diff --git a/llvm/test/CodeGen/ARM/ldrd-memoper.ll b/llvm/test/CodeGen/ARM/ldrd-memoper.ll index 78121adcfeb4c..8501ee0af19d0 100644 --- a/llvm/test/CodeGen/ARM/ldrd-memoper.ll +++ b/llvm/test/CodeGen/ARM/ldrd-memoper.ll @@ -5,7 +5,7 @@ @b = external global i64* -; CHECK: Formed {{.*}} t2LDRD{{.*}} (load 4 from %ir.0), (load 4 from %ir.0 + 4) +; CHECK: Formed {{.*}} t2LDRD{{.*}} (load (s32) from %ir.0), (load (s32) from %ir.0 + 4) define i64 @t(i64 %a) nounwind readonly { entry: %0 = load i64*, i64** @b, align 4 diff --git a/llvm/test/CodeGen/ARM/load_store_opt_clobber_cpsr.mir b/llvm/test/CodeGen/ARM/load_store_opt_clobber_cpsr.mir index c43680baea2e7..8adb3c7e48f71 100644 --- a/llvm/test/CodeGen/ARM/load_store_opt_clobber_cpsr.mir +++ b/llvm/test/CodeGen/ARM/load_store_opt_clobber_cpsr.mir @@ -5,8 +5,8 @@ # Make sure bb.1 is transformed, so the test doesn't accidentally break. # CHECK-LABEL: bb.0: -# CHECK: renamable $r0 = tLDRi renamable $r4, 0, 14 /* CC::al */, $noreg :: (load 4) -# CHECK: renamable $r1 = tLDRi renamable $r4, 1, 14 /* CC::al */, $noreg :: (load 4) +# CHECK: renamable $r0 = tLDRi renamable $r4, 0, 14 /* CC::al */, $noreg :: (load (s32)) +# CHECK: renamable $r1 = tLDRi renamable $r4, 1, 14 /* CC::al */, $noreg :: (load (s32)) # CHECK-LABEL: bb.1: # CHECK: $r4 = tLDMIA_UPD $r4, 14 /* CC::al */, $noreg, def $r0, def $r1 @@ -17,17 +17,17 @@ tracksRegLiveness: true body: | bb.0: liveins: $r2, $r4 - renamable $r0 = tLDRi renamable $r2, 4, 14, $noreg :: (load 4) + renamable $r0 = tLDRi renamable $r2, 4, 14, $noreg :: (load (s32)) dead renamable $r0, $cpsr = tADDi3 killed renamable $r0, 1, 14, $noreg - renamable $r0 = tLDRi renamable $r4, 0, 14, $noreg :: (load 4) - renamable $r1 = tLDRi renamable $r4, 1, 14, $noreg :: (load 4) + renamable $r0 = tLDRi renamable $r4, 0, 14, $noreg :: (load (s32)) + renamable $r1 = tLDRi renamable $r4, 1, 14, $noreg :: (load (s32)) tBcc %bb.1, 0, killed $cpsr bb.1: liveins: $r2, $r4 - renamable $r0 = tLDRi renamable $r2, 4, 14, $noreg :: (load 4) + renamable $r0 = tLDRi renamable $r2, 4, 14, $noreg :: (load (s32)) dead renamable $r0, $cpsr = tADDi3 killed renamable $r0, 1, 14, $noreg - renamable $r0 = tLDRi renamable $r4, 0, 14, $noreg :: (load 4) - renamable $r1 = tLDRi renamable $r4, 1, 14, $noreg :: (load 4) + renamable $r0 = tLDRi renamable $r4, 0, 14, $noreg :: (load (s32)) + renamable $r1 = tLDRi renamable $r4, 1, 14, $noreg :: (load (s32)) bb.2: liveins: $r4 tTRAP diff --git a/llvm/test/CodeGen/ARM/load_store_opt_reg_limit.mir b/llvm/test/CodeGen/ARM/load_store_opt_reg_limit.mir index cc2e5421e8101..7bc710d01ebce 100644 --- a/llvm/test/CodeGen/ARM/load_store_opt_reg_limit.mir +++ b/llvm/test/CodeGen/ARM/load_store_opt_reg_limit.mir @@ -2,39 +2,39 @@ #CHECK-MERGE: foo name: foo # CHECK-MERGE: VSTMDIA $r4, 14 /* CC::al */, $noreg, $d15, $d16, $d17, $d18, $d19, $d20, $d21, $d22, $d23, $d24, $d25, $d26, $d27, $d28, $d29, $d30 -# CHECK-MERGE-NEXT: VSTRD $d31, $r4, 32, 14 /* CC::al */, $noreg :: (store 8) +# CHECK-MERGE-NEXT: VSTRD $d31, $r4, 32, 14 /* CC::al */, $noreg :: (store (s64)) # CHECK-MERGE: VSTMDIA killed $r0, 14 /* CC::al */, $noreg, $d4, $d5, $d6, $d7, $d8, $d9, $d10, $d11, $d12, $d13, $d14 body: | bb.0: - VSTRD $d15, $r4, 0, 14, $noreg :: (store 8) - VSTRD $d16, $r4, 2, 14, $noreg :: (store 8) - VSTRD $d17, $r4, 4, 14, $noreg :: (store 8) - VSTRD $d18, $r4, 6, 14, $noreg :: (store 8) - VSTRD $d19, $r4, 8, 14, $noreg :: (store 8) - VSTRD $d20, $r4, 10, 14, $noreg :: (store 8) - VSTRD $d21, $r4, 12, 14, $noreg :: (store 8) - VSTRD $d22, $r4, 14, 14, $noreg :: (store 8) - VSTRD $d23, $r4, 16, 14, $noreg :: (store 8) - VSTRD $d24, $r4, 18, 14, $noreg :: (store 8) - VSTRD $d25, $r4, 20, 14, $noreg :: (store 8) - VSTRD $d26, $r4, 22, 14, $noreg :: (store 8) - VSTRD $d27, $r4, 24, 14, $noreg :: (store 8) - VSTRD $d28, $r4, 26, 14, $noreg :: (store 8) - VSTRD $d29, $r4, 28, 14, $noreg :: (store 8) - VSTRD $d30, $r4, 30, 14, $noreg :: (store 8) - VSTRD $d31, $r4, 32, 14, $noreg :: (store 8) - VSTRD $d0, $r4, 34, 14, $noreg :: (store 8) - VSTRD $d1, $r4, 36, 14, $noreg :: (store 8) - VSTRD $d3, $r4, 38, 14, $noreg :: (store 8) - VSTRD $d2, $r4, 40, 14, $noreg :: (store 8) - VSTRD $d4, $r4, 42, 14, $noreg :: (store 8) - VSTRD $d5, $r4, 44, 14, $noreg :: (store 8) - VSTRD $d6, $r4, 46, 14, $noreg :: (store 8) - VSTRD $d7, $r4, 48, 14, $noreg :: (store 8) - VSTRD $d8, $r4, 50, 14, $noreg :: (store 8) - VSTRD $d9, $r4, 52, 14, $noreg :: (store 8) - VSTRD $d10, $r4, 54, 14, $noreg :: (store 8) - VSTRD $d11, $r4, 56, 14, $noreg :: (store 8) - VSTRD $d12, $r4, 58, 14, $noreg :: (store 8) - VSTRD $d13, $r4, 60, 14, $noreg :: (store 8) - VSTRD $d14, $r4, 62, 14, $noreg :: (store 8) + VSTRD $d15, $r4, 0, 14, $noreg :: (store (s64)) + VSTRD $d16, $r4, 2, 14, $noreg :: (store (s64)) + VSTRD $d17, $r4, 4, 14, $noreg :: (store (s64)) + VSTRD $d18, $r4, 6, 14, $noreg :: (store (s64)) + VSTRD $d19, $r4, 8, 14, $noreg :: (store (s64)) + VSTRD $d20, $r4, 10, 14, $noreg :: (store (s64)) + VSTRD $d21, $r4, 12, 14, $noreg :: (store (s64)) + VSTRD $d22, $r4, 14, 14, $noreg :: (store (s64)) + VSTRD $d23, $r4, 16, 14, $noreg :: (store (s64)) + VSTRD $d24, $r4, 18, 14, $noreg :: (store (s64)) + VSTRD $d25, $r4, 20, 14, $noreg :: (store (s64)) + VSTRD $d26, $r4, 22, 14, $noreg :: (store (s64)) + VSTRD $d27, $r4, 24, 14, $noreg :: (store (s64)) + VSTRD $d28, $r4, 26, 14, $noreg :: (store (s64)) + VSTRD $d29, $r4, 28, 14, $noreg :: (store (s64)) + VSTRD $d30, $r4, 30, 14, $noreg :: (store (s64)) + VSTRD $d31, $r4, 32, 14, $noreg :: (store (s64)) + VSTRD $d0, $r4, 34, 14, $noreg :: (store (s64)) + VSTRD $d1, $r4, 36, 14, $noreg :: (store (s64)) + VSTRD $d3, $r4, 38, 14, $noreg :: (store (s64)) + VSTRD $d2, $r4, 40, 14, $noreg :: (store (s64)) + VSTRD $d4, $r4, 42, 14, $noreg :: (store (s64)) + VSTRD $d5, $r4, 44, 14, $noreg :: (store (s64)) + VSTRD $d6, $r4, 46, 14, $noreg :: (store (s64)) + VSTRD $d7, $r4, 48, 14, $noreg :: (store (s64)) + VSTRD $d8, $r4, 50, 14, $noreg :: (store (s64)) + VSTRD $d9, $r4, 52, 14, $noreg :: (store (s64)) + VSTRD $d10, $r4, 54, 14, $noreg :: (store (s64)) + VSTRD $d11, $r4, 56, 14, $noreg :: (store (s64)) + VSTRD $d12, $r4, 58, 14, $noreg :: (store (s64)) + VSTRD $d13, $r4, 60, 14, $noreg :: (store (s64)) + VSTRD $d14, $r4, 62, 14, $noreg :: (store (s64)) diff --git a/llvm/test/CodeGen/ARM/machine-sink-multidef.mir b/llvm/test/CodeGen/ARM/machine-sink-multidef.mir index f0de852d319c2..5952538af95d1 100644 --- a/llvm/test/CodeGen/ARM/machine-sink-multidef.mir +++ b/llvm/test/CodeGen/ARM/machine-sink-multidef.mir @@ -48,7 +48,7 @@ body: | ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY1:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[LDR_PRE_REG:%[0-9]+]]:gpr, [[LDR_PRE_REG1:%[0-9]+]]:gpr = LDR_PRE_REG [[COPY]], killed [[COPY1]], 16387, 14 /* CC::al */, $noreg :: (load 4 from %ir.c) + ; CHECK: [[LDR_PRE_REG:%[0-9]+]]:gpr, [[LDR_PRE_REG1:%[0-9]+]]:gpr = LDR_PRE_REG [[COPY]], killed [[COPY1]], 16387, 14 /* CC::al */, $noreg :: (load (s32) from %ir.c) ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14 /* CC::al */, $noreg, $noreg ; CHECK: CMPri [[MOVi]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: Bcc %bb.1, 0 /* CC::eq */, $cpsr @@ -60,7 +60,7 @@ body: | ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:gpr = PHI [[LDR_PRE_REG]], %bb.3, [[MOVi]], %bb.1 ; CHECK: CMPri [[MOVi]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: [[LDRi12_:%[0-9]+]]:gpr = LDRi12 killed [[LDR_PRE_REG1]], 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.d) + ; CHECK: [[LDRi12_:%[0-9]+]]:gpr = LDRi12 killed [[LDR_PRE_REG1]], 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.d) ; CHECK: MOVPCLR 14 /* CC::al */, $noreg bb.0: liveins: $r0, $r1 @@ -68,7 +68,7 @@ body: | %8:gpr = COPY $r1 %9:gprnopc = COPY $r0 - %0:gpr, %6:gpr = LDR_PRE_REG %8, killed %9, 16387, 14, $noreg :: (load 4 from %ir.c) + %0:gpr, %6:gpr = LDR_PRE_REG %8, killed %9, 16387, 14, $noreg :: (load (s32) from %ir.c) %7:gpr = MOVi 0, 14, $noreg, $noreg CMPri %7, 0, 14, $noreg, implicit-def $cpsr Bcc %bb.2, 1, $cpsr @@ -81,7 +81,7 @@ body: | %2:gpr = PHI %0, %bb.0, %7, %bb.1 CMPri %7, 0, 14, $noreg, implicit-def $cpsr - %1:gpr = LDRi12 killed %6, 4, 14, $noreg :: (load 4 from %ir.d) + %1:gpr = LDRi12 killed %6, 4, 14, $noreg :: (load (s32) from %ir.d) MOVPCLR 14, $noreg ... diff --git a/llvm/test/CodeGen/ARM/misched-int-basic-thumb2.mir b/llvm/test/CodeGen/ARM/misched-int-basic-thumb2.mir index 5d481aa57c807..06b34acd4e05d 100644 --- a/llvm/test/CodeGen/ARM/misched-int-basic-thumb2.mir +++ b/llvm/test/CodeGen/ARM/misched-int-basic-thumb2.mir @@ -42,7 +42,7 @@ # CHECK_SWIFT: Latency : 2 # CHECK_R52: Latency : 2 # -# CHECK: SU(3): %3:rgpr = t2LDRi12 %2:rgpr, 0, 14, $noreg :: (dereferenceable load 4 from @g1) +# CHECK: SU(3): %3:rgpr = t2LDRi12 %2:rgpr, 0, 14, $noreg :: (dereferenceable load (s32) from @g1) # CHECK_A9: Latency : 1 # CHECK_SWIFT: Latency : 3 # CHECK_R52: Latency : 4 @@ -57,7 +57,7 @@ # CHECK_SWIFT: Latency : 14 # CHECK_R52: Latency : 8 -# CHECK: SU(8): t2STRi12 %7:rgpr, %2:rgpr, 0, 14, $noreg :: (store 4 into @g1) +# CHECK: SU(8): t2STRi12 %7:rgpr, %2:rgpr, 0, 14, $noreg :: (store (s32) into @g1) # CHECK_A9: Latency : 1 # CHECK_SWIFT: Latency : 0 # CHECK_R52: Latency : 4 @@ -152,12 +152,12 @@ body: | %1 = COPY $r1 %0 = COPY $r0 %2 = t2MOVi32imm @g1 - %3 = t2LDRi12 %2, 0, 14, $noreg :: (dereferenceable load 4 from @g1) + %3 = t2LDRi12 %2, 0, 14, $noreg :: (dereferenceable load (s32) from @g1) %4 = t2MOVi32imm @g2 - %5 = t2LDRi12 %4, 0, 14, $noreg :: (dereferenceable load 4 from @g2) + %5 = t2LDRi12 %4, 0, 14, $noreg :: (dereferenceable load (s32) from @g2) %6 = t2ADDrr %3, %3, 14, $noreg, $noreg %7 = t2SDIV %6, %5, 14, $noreg - t2STRi12 %7, %2, 0, 14, $noreg :: (store 4 into @g1) + t2STRi12 %7, %2, 0, 14, $noreg :: (store (s32) into @g1) %8 = t2SMULBB %1, %1, 14, $noreg %9 = t2SMLABB %0, %0, %8, 14, $noreg %10 = t2UXTH %9, 0, 14, $noreg diff --git a/llvm/test/CodeGen/ARM/nonreserved-callframe-with-basereg.mir b/llvm/test/CodeGen/ARM/nonreserved-callframe-with-basereg.mir index a262594473ff6..1e491a248cad5 100644 --- a/llvm/test/CodeGen/ARM/nonreserved-callframe-with-basereg.mir +++ b/llvm/test/CodeGen/ARM/nonreserved-callframe-with-basereg.mir @@ -37,12 +37,12 @@ body: | liveins: $r0 ; CHECK: t2STRi12 killed $r0, $r6, [[OFFSET:[0-9]+]] - t2STRi12 killed $r0, %stack.0, 0, 14, $noreg :: (store 4 into %stack.0) + t2STRi12 killed $r0, %stack.0, 0, 14, $noreg :: (store (s32) into %stack.0) ADJCALLSTACKDOWN 2276, 0, 14, $noreg, implicit-def dead $sp, implicit $sp ; CHECK: renamable $r0 = t2LDRi12 $r6, [[OFFSET]] - renamable $r0 = t2LDRi12 %stack.0, 0, 14, $noreg, :: (load 4 from %stack.0) + renamable $r0 = t2LDRi12 %stack.0, 0, 14, $noreg, :: (load (s32) from %stack.0) renamable $r1 = IMPLICIT_DEF renamable $r2 = IMPLICIT_DEF renamable $r3 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/ARM/pei-swiftself.mir b/llvm/test/CodeGen/ARM/pei-swiftself.mir index 5228fbb0f6daa..cd75589328618 100644 --- a/llvm/test/CodeGen/ARM/pei-swiftself.mir +++ b/llvm/test/CodeGen/ARM/pei-swiftself.mir @@ -39,7 +39,7 @@ body: | ; not just use $r10 for that. ; CHECK-NOT: STRi12 %1,{{.*}}$r10 - STRi12 $r1, %stack.0, 0, 14, $noreg :: (store 4) + STRi12 $r1, %stack.0, 0, 14, $noreg :: (store (s32)) ; use the swiftself parameter value. KILL $r10 diff --git a/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll b/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll index aac5de4ce5e3c..22c73c7775d55 100644 --- a/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll +++ b/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll @@ -25,9 +25,9 @@ attributes #0 = { "target-cpu"="cortex-r52" "target-features"="-fp64" } ; 32 bits of the double to before actually storing it to memory ; CHECK: Creating new node: {{.*}} = add FrameIndex:i32<2>, Constant:i32<4> -; CHECK-NEXT: Creating new node: {{.*}} i32,ch = load<(load 4 from [[MEM:%.*]] + 4)> +; CHECK-NEXT: Creating new node: {{.*}} i32,ch = load<(load (s32) from [[MEM:%.*]] + 4)> ; CHECK: INLINEASM -; CHECK: (load 4 from [[MEM]] + 4) -; CHECK-NOT: (store 4 into [[MEM]] + 4) +; CHECK: (load (s32) from [[MEM]] + 4) +; CHECK-NOT: (store (s32) into [[MEM]] + 4) diff --git a/llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir b/llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir index 6da3877be23ac..0d1ea4891614c 100644 --- a/llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir +++ b/llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir @@ -26,10 +26,10 @@ body: | %1 : gpr = COPY $r1 %0 : gpr = COPY $r0 - %2 : gpr = t2LDRi12 %1, 0, 14, $noreg :: (load 4 from %ir.y) - t2STRi12 killed %2, %0, 0, 14, $noreg :: (store 4 into %ir.x) - %3 : gpr = t2LDRi12 %1, 4, 14, $noreg :: (load 4 from %ir.arrayidx2) - t2STRi12 killed %3, %0, 4, 14, $noreg :: (store 4 into %ir.arrayidx3) + %2 : gpr = t2LDRi12 %1, 0, 14, $noreg :: (load (s32) from %ir.y) + t2STRi12 killed %2, %0, 0, 14, $noreg :: (store (s32) into %ir.x) + %3 : gpr = t2LDRi12 %1, 4, 14, $noreg :: (load (s32) from %ir.arrayidx2) + t2STRi12 killed %3, %0, 4, 14, $noreg :: (store (s32) into %ir.arrayidx3) ; CHECK: t2LDRi12 ; CHECK-NEXT: t2LDRi12 ; CHECK-NEXT: t2STRi12 diff --git a/llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir b/llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir index 37d68c57764db..42a6fda35adb2 100644 --- a/llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir +++ b/llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir @@ -35,9 +35,9 @@ body: | %5 : rgpr = t2MOVi32imm -858993459 %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, $noreg %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, $noreg - t2STRi12 %1, %0, 0, 14, $noreg :: (store 4) + t2STRi12 %1, %0, 0, 14, $noreg :: (store (s32)) %10 : rgpr = t2LSLri %2, 1, 14, $noreg, $noreg - t2STRi12 killed %10, %0, 4, 14, $noreg :: (store 4) + t2STRi12 killed %10, %0, 4, 14, $noreg :: (store (s32)) ; Make sure we move the paired stores next to each other, and ; insert them in an appropriate location. @@ -48,9 +48,9 @@ body: | %11 : rgpr = t2MOVi 55, 14, $noreg, $noreg %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, $noreg, $noreg - t2STRi12 killed %12, %0, 16, 14, $noreg :: (store 4) + t2STRi12 killed %12, %0, 16, 14, $noreg :: (store (s32)) %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, $noreg, $noreg - t2STRi12 killed %13, %0, 20, 14, $noreg :: (store 4) + t2STRi12 killed %13, %0, 20, 14, $noreg :: (store (s32)) ; Make sure we move the paired stores next to each other. ; CHECK: t2STRi12 killed %12, @@ -73,11 +73,11 @@ body: | %2 : rgpr = COPY $r2 %1 : rgpr = COPY $r1 %0 : gpr = COPY $r0 - t2STRi12 %1, %0, 0, 14, $noreg :: (store 4) + t2STRi12 %1, %0, 0, 14, $noreg :: (store (s32)) %10 : rgpr = t2LSLri %2, 1, 14, $noreg, $noreg - t2STRi12 killed %10, %0, 4, 14, $noreg :: (store 4) + t2STRi12 killed %10, %0, 4, 14, $noreg :: (store (s32)) %3 : rgpr = t2MUL %2, %2, 14, $noreg - t2STRi12 %3, %0, 8, 14, $noreg :: (store 4) + t2STRi12 %3, %0, 8, 14, $noreg :: (store (s32)) ; Make sure we move the paired stores next to each other, and ; insert them in an appropriate location. @@ -100,9 +100,9 @@ body: | %10 : rgpr = t2LSLri %2, 1, 14, $noreg, $noreg %11 : rgpr = t2MOVi 55, 14, $noreg, $noreg %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, $noreg, $noreg - t2STRi12 killed %12, %0, 16, 14, $noreg :: (store 4) + t2STRi12 killed %12, %0, 16, 14, $noreg :: (store (s32)) %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, $noreg, $noreg - t2STRi12 killed %13, %0, 20, 14, $noreg :: (store 4) + t2STRi12 killed %13, %0, 20, 14, $noreg :: (store (s32)) ; Make sure we move the paired stores next to each other. ; CHECK: t2STRi12 {{.*}}, 16 diff --git a/llvm/test/CodeGen/ARM/register-scavenger-exceptions.mir b/llvm/test/CodeGen/ARM/register-scavenger-exceptions.mir index 43f225c2d5c3f..a563fd244643d 100644 --- a/llvm/test/CodeGen/ARM/register-scavenger-exceptions.mir +++ b/llvm/test/CodeGen/ARM/register-scavenger-exceptions.mir @@ -52,14 +52,14 @@ body: | ; CHECK: $r3 = MOVi 0, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r4 = MOVi 0, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r10 = SUBri killed $r11, 4096, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed $lr, killed $r10, -916, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + ; CHECK: STRi12 killed $lr, killed $r10, -916, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) ; CHECK: BL @_Z3barv, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit killed $r2, implicit killed $r3, implicit killed $r4, implicit-def $sp $r0 = MOVi 0, 14, $noreg, $noreg $r1 = MOVi 0, 14, $noreg, $noreg $r2 = MOVi 0, 14, $noreg, $noreg $r3 = MOVi 0, 14, $noreg, $noreg $r4 = MOVi 0, 14, $noreg, $noreg - STRi12 killed $lr, %stack.2, 0, 14, $noreg :: (store 4 into %stack.2) + STRi12 killed $lr, %stack.2, 0, 14, $noreg :: (store (s32) into %stack.2) BL @_Z3barv, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit killed $r2, implicit killed $r3, implicit killed $r4, implicit-def $sp ... diff --git a/llvm/test/CodeGen/ARM/single-issue-r52.mir b/llvm/test/CodeGen/ARM/single-issue-r52.mir index ddfe46815cc28..af393be3a27af 100644 --- a/llvm/test/CodeGen/ARM/single-issue-r52.mir +++ b/llvm/test/CodeGen/ARM/single-issue-r52.mir @@ -20,7 +20,7 @@ # CHECK: ********** MI Scheduling ********** # CHECK: ScheduleDAGMILive::schedule starting -# CHECK: SU(1): %1:qqpr = VLD4d8Pseudo %0:gpr, 8, 14, $noreg :: (load 32 from %ir.A, align 8) +# CHECK: SU(1): %1:qqpr = VLD4d8Pseudo %0:gpr, 8, 14, $noreg :: (load (s256) from %ir.A, align 8) # CHECK: Latency : 8 # CHECK: Single Issue : true; # CHECK: SU(2): %4:dpr = VADDv8i8 %1.dsub_0:qqpr, %1.dsub_1:qqpr, 14, $noreg @@ -76,7 +76,7 @@ body: | liveins: $r0 %0 = COPY $r0 - %1 = VLD4d8Pseudo %0, 8, 14, $noreg :: (load 32 from %ir.A, align 8) + %1 = VLD4d8Pseudo %0, 8, 14, $noreg :: (load (s256) from %ir.A, align 8) %4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, $noreg %5, %6 = VMOVRRD %4, 14, $noreg $r0 = COPY %5 diff --git a/llvm/test/CodeGen/ARM/store-prepostinc.mir b/llvm/test/CodeGen/ARM/store-prepostinc.mir index bea3d4ff68a4b..b974bc29838ca 100644 --- a/llvm/test/CodeGen/ARM/store-prepostinc.mir +++ b/llvm/test/CodeGen/ARM/store-prepostinc.mir @@ -41,10 +41,10 @@ body: | ; CHECK-LABEL: name: STR_pre4 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_PRE_IMM killed $r1, $r0, 4, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_PRE_IMM killed $r1, $r0, 4, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw ADDri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -62,10 +62,10 @@ body: | ; CHECK-LABEL: name: STR_pre8 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -83,10 +83,10 @@ body: | ; CHECK-LABEL: name: STR_pre255 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -104,10 +104,10 @@ body: | ; CHECK-LABEL: name: STR_pre256 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -125,10 +125,10 @@ body: | ; CHECK-LABEL: name: STR_pre1024 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw ADDri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw ADDri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -146,10 +146,10 @@ body: | ; CHECK-LABEL: name: STR_pre4095 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw ADDri killed renamable $r0, 4095, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw ADDri killed renamable $r0, 4095, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -167,10 +167,10 @@ body: | ; CHECK-LABEL: name: STR_pre4096 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw ADDri killed renamable $r0, 4096, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw ADDri killed renamable $r0, 4096, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -188,10 +188,10 @@ body: | ; CHECK-LABEL: name: STR_prem1024 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw SUBri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw SUBri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -209,10 +209,10 @@ body: | ; CHECK-LABEL: name: STR_prem4095 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw SUBri killed renamable $r0, 4095, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw SUBri killed renamable $r0, 4095, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -230,10 +230,10 @@ body: | ; CHECK-LABEL: name: STR_prem4096 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw SUBri killed renamable $r0, 4096, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw SUBri killed renamable $r0, 4096, 14 /* CC::al */, $noreg, $noreg - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) BX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -251,9 +251,9 @@ body: | ; CHECK-LABEL: name: STR_post4 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 4, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 4, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw ADDri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -271,9 +271,9 @@ body: | ; CHECK-LABEL: name: STR_post8 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 8, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 8, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -291,9 +291,9 @@ body: | ; CHECK-LABEL: name: STR_post255 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 255, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 255, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -311,9 +311,9 @@ body: | ; CHECK-LABEL: name: STR_post256 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 256, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 256, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -331,9 +331,9 @@ body: | ; CHECK-LABEL: name: STR_post1024 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 1024, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 1024, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw ADDri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -351,9 +351,9 @@ body: | ; CHECK-LABEL: name: STR_post4095 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 2095, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 2095, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw ADDri killed renamable $r0, 2095, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -371,10 +371,10 @@ body: | ; CHECK-LABEL: name: STR_post4096 ; CHECK: liveins: $r0, $r1 - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: renamable $r0 = nuw ADDri killed renamable $r0, 4096, 14 /* CC::al */, $noreg, $noreg ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw ADDri killed renamable $r0, 4096, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -392,9 +392,9 @@ body: | ; CHECK-LABEL: name: STR_postm1024 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 5120, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 5120, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw SUBri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -412,9 +412,9 @@ body: | ; CHECK-LABEL: name: STR_postm4095 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 6191, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = STR_POST_IMM killed $r1, $r0, $noreg, 6191, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw SUBri killed renamable $r0, 2095, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -432,10 +432,10 @@ body: | ; CHECK-LABEL: name: STR_postm4096 ; CHECK: liveins: $r0, $r1 - ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: renamable $r0 = nuw SUBri killed renamable $r0, 4096, 14 /* CC::al */, $noreg, $noreg ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0 - STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw SUBri killed renamable $r0, 4096, 14 /* CC::al */, $noreg, $noreg BX_RET 14 /* CC::al */, $noreg, implicit $r0 diff --git a/llvm/test/CodeGen/ARM/v6-jumptable-clobber.mir b/llvm/test/CodeGen/ARM/v6-jumptable-clobber.mir index a572ce5e1a076..ec6f9ef8e9819 100644 --- a/llvm/test/CodeGen/ARM/v6-jumptable-clobber.mir +++ b/llvm/test/CodeGen/ARM/v6-jumptable-clobber.mir @@ -232,7 +232,7 @@ body: | liveins: $r0, $r1 $r2 = tLDRpci %const.0, 14, $noreg - tSTRi killed $r2, killed $r1, 0, 14, $noreg :: (store 4 into %ir.addr) + tSTRi killed $r2, killed $r1, 0, 14, $noreg :: (store (s32) into %ir.addr) dead $r1 = SPACE 980, undef $r0 $r0 = tUXTB killed $r0, 14, $noreg $r1, dead $cpsr = tSUBi3 killed $r0, 1, 14, $noreg @@ -245,7 +245,7 @@ body: | $r0, dead $cpsr = tLSLri killed $r1, 2, 14, $noreg $r1 = tLEApcrelJT %jump-table.0, 14, $noreg - $r0 = tLDRr killed $r1, killed $r0, 14, $noreg :: (load 4 from jump-table) + $r0 = tLDRr killed $r1, killed $r0, 14, $noreg :: (load (s32) from jump-table) tBR_JTr killed $r0, %jump-table.0 bb.3.d2: @@ -330,7 +330,7 @@ body: | liveins: $r0, $r1 $r2 = tLDRpci %const.0, 14, $noreg - tSTRi killed $r2, killed $r1, 0, 14, $noreg :: (store 4 into %ir.addr) + tSTRi killed $r2, killed $r1, 0, 14, $noreg :: (store (s32) into %ir.addr) $r0 = tUXTB killed $r0, 14, $noreg $r1, dead $cpsr = tSUBi3 killed $r0, 1, 14, $noreg tCMPi8 $r1, 25, 14, $noreg, implicit-def $cpsr @@ -342,7 +342,7 @@ body: | $r0, dead $cpsr = tLSLri killed $r1, 2, 14, $noreg $r1 = tLEApcrelJT %jump-table.0, 14, $noreg - $r0 = tLDRr killed $r1, killed $r0, 14, $noreg :: (load 4 from jump-table) + $r0 = tLDRr killed $r1, killed $r0, 14, $noreg :: (load (s32) from jump-table) tBR_JTr killed $r0, %jump-table.0 bb.3.d2: diff --git a/llvm/test/CodeGen/ARM/vldm-liveness.mir b/llvm/test/CodeGen/ARM/vldm-liveness.mir index 675c4c8e9e565..14123ac3759d9 100644 --- a/llvm/test/CodeGen/ARM/vldm-liveness.mir +++ b/llvm/test/CodeGen/ARM/vldm-liveness.mir @@ -28,16 +28,16 @@ body: | liveins: $r0 ; CHECK-LABEL: name: foo - ; CHECK: $s3 = VLDRS $r0, 2, 14 /* CC::al */, $noreg, implicit killed undef $q0, implicit-def $q0 :: (load 4) - ; CHECK: VLDMSIA $r0, 14 /* CC::al */, $noreg, def $s0, def $s1, implicit-def $noreg :: (load 4) - ; CHECK: $s2 = VLDRS killed $r0, 4, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 4) + ; CHECK: $s3 = VLDRS $r0, 2, 14 /* CC::al */, $noreg, implicit killed undef $q0, implicit-def $q0 :: (load (s32)) + ; CHECK: VLDMSIA $r0, 14 /* CC::al */, $noreg, def $s0, def $s1, implicit-def $noreg :: (load (s32)) + ; CHECK: $s2 = VLDRS killed $r0, 4, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s32)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0 - $s1 = VLDRS $r0, 1, 14, $noreg, implicit-def $q0 :: (load 4) - $s3 = VLDRS $r0, 2, 14, $noreg, implicit killed $q0, implicit-def $q0 :: (load 4) + $s1 = VLDRS $r0, 1, 14, $noreg, implicit-def $q0 :: (load (s32)) + $s3 = VLDRS $r0, 2, 14, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s32)) - $s0 = VLDRS $r0, 0, 14, $noreg, implicit killed $q0, implicit-def $q0 :: (load 4) + $s0 = VLDRS $r0, 0, 14, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s32)) - $s2 = VLDRS killed $r0, 4, 14, $noreg, implicit killed $q0, implicit-def $q0 :: (load 4) + $s2 = VLDRS killed $r0, 4, 14, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s32)) tBX_RET 14, $noreg, implicit $q0 ... diff --git a/llvm/test/CodeGen/ARM/vldmia-sched.mir b/llvm/test/CodeGen/ARM/vldmia-sched.mir index 30b5d928cc702..1b2d9ddbff564 100644 --- a/llvm/test/CodeGen/ARM/vldmia-sched.mir +++ b/llvm/test/CodeGen/ARM/vldmia-sched.mir @@ -24,8 +24,8 @@ body: | $r0 = t2MOVTi16 internal $r0, target-flags(arm-hi16) @a, 14, $noreg } $r1 = t2ADDri $r0, 8, 14, $noreg, $noreg - VLDMDIA killed $r1, 14, $noreg, def $d23, def $d24, def $d25, def $d26, def $d27, def $d28, def $d29, def $d30, def $d31 :: (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 2, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 4, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 6, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 8, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 10, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 12, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 14, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 16, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 18, i32 0) to <2 x float>*)`, align 4) + VLDMDIA killed $r1, 14, $noreg, def $d23, def $d24, def $d25, def $d26, def $d27, def $d28, def $d29, def $d30, def $d31 :: (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 2, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 4, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 6, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 8, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 10, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 12, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 14, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 16, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 18, i32 0) to <2 x float>*)`, align 4) $r0, dead $cpsr = tADDi8 killed $r0, 80, 14, $noreg - VLDMDIA killed $r0, 14, $noreg, def $d0, def $d1, def $d2, def $d3, def $d4, def $d5, def $d6 :: (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 20, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 22, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 24, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 26, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 28, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 30, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 32, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 33, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 34, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 35, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 36, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 37, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 38, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 39, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 40, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 41, i32 0) to <2 x float>*)`, align 4), (load 8 from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 42, i32 0) to <2 x float>*)`, align 4) + VLDMDIA killed $r0, 14, $noreg, def $d0, def $d1, def $d2, def $d3, def $d4, def $d5, def $d6 :: (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 20, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 22, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 24, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 26, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 28, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 30, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 32, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 33, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 34, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 35, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 36, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 37, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 38, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 39, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 40, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 41, i32 0) to <2 x float>*)`, align 4), (load (s64) from `<2 x float>* bitcast (float* getelementptr ([1 x float], [1 x float]* @a, i32 42, i32 0) to <2 x float>*)`, align 4) ... diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir index 18006ff3681db..8614fc04ddf65 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir @@ -64,7 +64,7 @@ body: | %3:gr32 = nsw ADD32rr %2, %2, implicit-def dead $eflags, debug-location !18 DBG_VALUE %3, $noreg, !12, !DIExpression(), debug-location !18 DBG_VALUE $eflags, $noreg, !12, !DIExpression(), debug-location !18 - MOV32mr $rip, 1, $noreg, @ga, $noreg, killed %3, debug-location !DILocation(line: 5, column: 1, scope: !9) :: (store 4 into @ga, !tbaa !18) + MOV32mr $rip, 1, $noreg, @ga, $noreg, killed %3, debug-location !DILocation(line: 5, column: 1, scope: !9) :: (store (s32) into @ga, !tbaa !18) DBG_VALUE 0, $noreg, !12, !DIExpression(), debug-location !DILocation(line: 5, column: 1, scope: !9) ; Let it miss Line 6: Change "!DILocation(line: 6, ..." to "!DILocation(line: 5, ..." $eax = COPY %2, debug-location !DILocation(line: 5, column: 1, scope: !9) diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir index fbaaefe5ba76f..3c412a19fecc7 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir @@ -50,17 +50,17 @@ body: | %0:gr32 = COPY $edi %1:gr32 = COPY killed %0 %3:gr32 = COPY killed %2 - MOV32mr %stack.0.a.addr, 1, $noreg, 0, $noreg, %1 :: (store 4 into %ir.a.addr) - MOV32mr %stack.1.b.addr, 1, $noreg, 0, $noreg, %3 :: (store 4 into %ir.b.addr) - %14:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg :: (load 4 from %ir.a.addr) - %13:gr32 = ADD32rm killed %14, %stack.1.b.addr, 1, $noreg, 0, $noreg, implicit-def $eflags :: (load 4 from %ir.b.addr) + MOV32mr %stack.0.a.addr, 1, $noreg, 0, $noreg, %1 :: (store (s32) into %ir.a.addr) + MOV32mr %stack.1.b.addr, 1, $noreg, 0, $noreg, %3 :: (store (s32) into %ir.b.addr) + %14:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg :: (load (s32) from %ir.a.addr) + %13:gr32 = ADD32rm killed %14, %stack.1.b.addr, 1, $noreg, 0, $noreg, implicit-def $eflags :: (load (s32) from %ir.b.addr) ; dead-mi-elimination will remove %15:gr32 = ... - %15:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg :: (load 4 from %ir.a.addr) - MOV32mr %stack.2.c, 1, $noreg, 0, $noreg, killed %13 :: (store 4 into %ir.c) - %9:gr32 = MOV32rm %stack.2.c, 1, $noreg, 0, $noreg :: (load 4 from %ir.c) + %15:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg :: (load (s32) from %ir.a.addr) + MOV32mr %stack.2.c, 1, $noreg, 0, $noreg, killed %13 :: (store (s32) into %ir.c) + %9:gr32 = MOV32rm %stack.2.c, 1, $noreg, 0, $noreg :: (load (s32) from %ir.c) %8:gr32 = SHL32ri killed %9, 1, implicit-def $eflags - MOV32mr $noreg, 1, $noreg, @ga, $noreg, killed %8 :: (store 4 into @ga) - %5:gr32 = MOV32rm %stack.2.c, 1, $noreg, 0, $noreg :: (load 4 from %ir.c) + MOV32mr $noreg, 1, $noreg, @ga, $noreg, killed %8 :: (store (s32) into @ga) + %5:gr32 = MOV32rm %stack.2.c, 1, $noreg, 0, $noreg :: (load (s32) from %ir.c) $eax = COPY %5 RETQ implicit $eax diff --git a/llvm/test/CodeGen/Hexagon/addrmode-immop.mir b/llvm/test/CodeGen/Hexagon/addrmode-immop.mir index 6aa72fd3414bf..f1928f8159dc4 100644 --- a/llvm/test/CodeGen/Hexagon/addrmode-immop.mir +++ b/llvm/test/CodeGen/Hexagon/addrmode-immop.mir @@ -33,7 +33,7 @@ tracksRegLiveness: true body: | bb.0.b0: $r2 = A2_tfrsi @g0 + 12 - $r2 = L2_loadri_io killed $r2, @f1 - 1 :: (load 4 from `i32 (%s.0*)** bitcast (i8* getelementptr (i8, i8* bitcast (i8** getelementptr inbounds ({ [3 x i8*], [3 x i8*] }, { [3 x i8*], [3 x i8*] }* @g0, i32 0, inrange i32 0, i32 3) to i8*), i32 sub (i32 ptrtoint (i32 (%s.0*)* @f1 to i32), i32 1)) to i32 (%s.0*)**)`) + $r2 = L2_loadri_io killed $r2, @f1 - 1 :: (load (s32) from `i32 (%s.0*)** bitcast (i8* getelementptr (i8, i8* bitcast (i8** getelementptr inbounds ({ [3 x i8*], [3 x i8*] }, { [3 x i8*], [3 x i8*] }* @g0, i32 0, inrange i32 0, i32 3) to i8*), i32 sub (i32 ptrtoint (i32 (%s.0*)* @f1 to i32), i32 1)) to i32 (%s.0*)**)`) ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29 PS_callr_nr killed $r2, hexagoncsr, implicit undef $r0, implicit-def $r29, implicit-def dead $r0 ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29 diff --git a/llvm/test/CodeGen/Hexagon/bank-conflict-load.mir b/llvm/test/CodeGen/Hexagon/bank-conflict-load.mir index f3d698cae3e17..8a924ffc3ff41 100644 --- a/llvm/test/CodeGen/Hexagon/bank-conflict-load.mir +++ b/llvm/test/CodeGen/Hexagon/bank-conflict-load.mir @@ -21,8 +21,8 @@ body: | bb.0: liveins: $r0, $r1 - $r2 = L2_loadri_io $r0, 8 :: (load 4 from %ir.a) - $r3 = L2_loadri_io killed $r0, 12 :: (load 4 from %ir.a) - $r4 = L2_loadri_io killed $r1, 0 :: (load 4 from %ir.b) + $r2 = L2_loadri_io $r0, 8 :: (load (s32) from %ir.a) + $r3 = L2_loadri_io killed $r0, 12 :: (load (s32) from %ir.a) + $r4 = L2_loadri_io killed $r1, 0 :: (load (s32) from %ir.b) ... diff --git a/llvm/test/CodeGen/Hexagon/bank-conflict.mir b/llvm/test/CodeGen/Hexagon/bank-conflict.mir index ee055f9ac71fc..01828f0b09af4 100644 --- a/llvm/test/CodeGen/Hexagon/bank-conflict.mir +++ b/llvm/test/CodeGen/Hexagon/bank-conflict.mir @@ -112,8 +112,8 @@ body: | liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $sa0:0x00000004 $r5 = M2_mpysip $r3, 1824 - $r7 = L2_loadrigp @g2, implicit $gp :: (dereferenceable load 4 from @g2) - $r8 = L2_loadrigp @g3, implicit killed $gp :: (dereferenceable load 4 from @g3, align 8) + $r7 = L2_loadrigp @g2, implicit $gp :: (dereferenceable load (s32) from @g2) + $r8 = L2_loadrigp @g3, implicit killed $gp :: (dereferenceable load (s32) from @g3, align 8) $r6 = A2_tfr $r5 $r7 = A2_and killed $r8, killed $r7 $r5 = M2_accii killed $r5, $r2, 1248 @@ -125,17 +125,17 @@ body: | liveins: $lc0:0x00000004, $r0:0x00000001, $r1:0x00000001, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r5:0x00000001, $r6:0x00000001, $r7:0x00000001, $sa0:0x00000004 $r8 = S2_cl0 $r7 - $r12 = L2_loadrubgp @g4, implicit $gp :: (dereferenceable load 1 from @g4) + $r12 = L2_loadrubgp @g4, implicit $gp :: (dereferenceable load (s8) from @g4) $r7 = S2_setbit_r killed $r7, $r8 - $r9 = L4_loadri_rr $r6, $r8, 2 :: (load 4 from %ir.v14) - $r13 = L4_loadri_rr $r5, killed $r8, 2 :: (load 4 from %ir.v17) + $r9 = L4_loadri_rr $r6, $r8, 2 :: (load (s32) from %ir.v14) + $r13 = L4_loadri_rr $r5, killed $r8, 2 :: (load (s32) from %ir.v17) $d4 = M2_vmpy2s_s0 killed $r9, $r9, implicit-def dead $usr_ovf $p0 = S2_tstbit_i killed $r12, 0 $d4 = M2_vmac2s_s0 killed $d4, killed $r13, $r13, implicit-def dead $usr_ovf $p1 = C2_cmpeqi $r7, 0 $d6 = A2_vaddws $d0, $d4, implicit-def dead $usr_ovf $d0 = A2_tfrpt $p0, killed $d0, implicit $d0 - S4_pstorerdf_abs $p0, @g0, $d6, implicit killed $gp :: (store 8 into @g0) + S4_pstorerdf_abs $p0, @g0, $d6, implicit killed $gp :: (store (s64) into @g0) $d0 = A2_tfrpf killed $p0, killed $d6, implicit killed $d0 J2_jumpf killed $p1, %bb.2, implicit-def dead $pc @@ -150,7 +150,7 @@ body: | bb.4: liveins: $r8:0x00000001, $r9:0x00000001 - S2_storerdgp @g1, killed $d4, implicit killed $gp :: (store 8 into @g1) + S2_storerdgp @g1, killed $d4, implicit killed $gp :: (store (s64) into @g1) PS_jmpret killed $r31, implicit-def dead $pc ... diff --git a/llvm/test/CodeGen/Hexagon/cext-opt-stack-no-rr.mir b/llvm/test/CodeGen/Hexagon/cext-opt-stack-no-rr.mir index f743ddf8e2e16..74346d0ddc210 100644 --- a/llvm/test/CodeGen/Hexagon/cext-opt-stack-no-rr.mir +++ b/llvm/test/CodeGen/Hexagon/cext-opt-stack-no-rr.mir @@ -14,10 +14,10 @@ body: | successors: %bb.1, %bb.2 %0:intregs = IMPLICIT_DEF - %1:intregs = L2_loadrub_io killed %0:intregs, 0 :: (load 1 from `i8* undef`, align 2) + %1:intregs = L2_loadrub_io killed %0:intregs, 0 :: (load (s8) from `i8* undef`, align 2) %2:predregs = C2_cmpeqi %1:intregs, 5 %3:intregs = A2_tfrsi 0 - S2_pstorerbt_io %2:predregs, %stack.0, 267, killed %3:intregs :: (store 1 into %stack.0) + S2_pstorerbt_io %2:predregs, %stack.0, 267, killed %3:intregs :: (store (s8) into %stack.0) J2_jumpt %2:predregs, %bb.2, implicit-def $pc bb.1: @@ -25,11 +25,11 @@ body: | %4:predregs = C2_cmpeqi %1:intregs, 6 %5:intregs = A2_tfrsi 2 - S2_pstorerbt_io %4:predregs, %stack.0, 267, killed %5:intregs :: (store 1 into %stack.0) + S2_pstorerbt_io %4:predregs, %stack.0, 267, killed %5:intregs :: (store (s8) into %stack.0) bb.2: %6:intregs = A2_tfrsi 32968 - S2_storerh_io %stack.0, 0, killed %6:intregs :: (store 2 into %stack.0, align 4) + S2_storerh_io %stack.0, 0, killed %6:intregs :: (store (s16) into %stack.0, align 4) PS_jmpret $r31, implicit-def dead $pc ... diff --git a/llvm/test/CodeGen/Hexagon/early-if-conv-lifetime.mir b/llvm/test/CodeGen/Hexagon/early-if-conv-lifetime.mir index 1dd6d36435cd0..0dedca4d14ff0 100644 --- a/llvm/test/CodeGen/Hexagon/early-if-conv-lifetime.mir +++ b/llvm/test/CodeGen/Hexagon/early-if-conv-lifetime.mir @@ -58,7 +58,7 @@ body: | successors: %bb.1.b1(0x40000000), %bb.2.b2(0x40000000) %1 = IMPLICIT_DEF - %0 = L2_loadrb_io killed %1, 0 :: (load 1 from `i8* undef`) + %0 = L2_loadrb_io killed %1, 0 :: (load (s8) from `i8* undef`) %2 = C2_cmpeqi killed %0, 102 %3 = COPY killed %2 J2_jumpf killed %3, %bb.2.b2, implicit-def dead $pc diff --git a/llvm/test/CodeGen/Hexagon/early-if-predicator.mir b/llvm/test/CodeGen/Hexagon/early-if-predicator.mir index 785fcd9d873c2..51fb2ab86a2d0 100644 --- a/llvm/test/CodeGen/Hexagon/early-if-predicator.mir +++ b/llvm/test/CodeGen/Hexagon/early-if-predicator.mir @@ -73,7 +73,7 @@ body: | bb.1.if: successors: %bb.2(0x80000000) - S4_storeiri_io %0, 0, 1 :: (store 4 into %ir.p) + S4_storeiri_io %0, 0, 1 :: (store (s32) into %ir.p) bb.2.endif: PS_jmpret $r31, implicit-def dead $pc diff --git a/llvm/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir b/llvm/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir index ecf9d3d8319ea..797efc384ba7f 100644 --- a/llvm/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir +++ b/llvm/test/CodeGen/Hexagon/ifcvt-diamond-ret.mir @@ -16,10 +16,10 @@ body: | J2_jumpf killed renamable $p0, %bb.2, implicit-def dead $pc bb.1: - S4_storeiri_io undef renamable $r0, 0, 32768 :: (store 4 into `i32* undef`) + S4_storeiri_io undef renamable $r0, 0, 32768 :: (store (s32) into `i32* undef`) PS_jmpret $r31, implicit-def dead $pc bb.2: - S4_storeiri_io undef renamable $r0, 0, 32768 :: (store 4 into `i32* undef`) + S4_storeiri_io undef renamable $r0, 0, 32768 :: (store (s32) into `i32* undef`) PS_jmpret $r31, implicit-def dead $pc ... diff --git a/llvm/test/CodeGen/Hexagon/livephysregs-regmask-clobber.mir b/llvm/test/CodeGen/Hexagon/livephysregs-regmask-clobber.mir index 6a80fdf39303e..8f1cb42b96a6f 100644 --- a/llvm/test/CodeGen/Hexagon/livephysregs-regmask-clobber.mir +++ b/llvm/test/CodeGen/Hexagon/livephysregs-regmask-clobber.mir @@ -25,14 +25,14 @@ body: | bb.0: renamable $r0 = PS_fi %stack.0, 0 ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29 - renamable $w0 = PS_vloadrw_ai %stack.2, 0 :: (load 128 from %stack.2) - V6_vS32b_ai killed renamable $r0, 0, renamable $v1 :: (store 64 into %stack.0, align 128) + renamable $w0 = PS_vloadrw_ai %stack.2, 0 :: (load (s1024) from %stack.2) + V6_vS32b_ai killed renamable $r0, 0, renamable $v1 :: (store (s512) into %stack.0, align 128) $r0 = A2_tfrsi 0 - renamable $r1 = L2_loadri_io %stack.0, 4 :: (load 4 from %stack.0 + 4) + renamable $r1 = L2_loadri_io %stack.0, 4 :: (load (s32) from %stack.0 + 4) J2_call &__hexagon_divsi3, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit killed $r0, implicit killed $r1, implicit-def $r29, implicit-def $r0 ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29 renamable $v0 = V6_lvsplatw killed renamable $r0 - PS_vstorerw_ai %stack.1, 0, killed renamable $w0 :: (store 128 into %stack.1) + PS_vstorerw_ai %stack.1, 0, killed renamable $w0 :: (store (s1024) into %stack.1) ... diff --git a/llvm/test/CodeGen/Hexagon/packetize-dccleana.mir b/llvm/test/CodeGen/Hexagon/packetize-dccleana.mir index a26fabd300765..16f58440d607c 100644 --- a/llvm/test/CodeGen/Hexagon/packetize-dccleana.mir +++ b/llvm/test/CodeGen/Hexagon/packetize-dccleana.mir @@ -12,5 +12,5 @@ body: | bb.0: liveins: $r1 Y2_dccleana killed renamable $r1 - $d8 = L2_loadrd_io killed $r29, 8 :: (load 8 from %fixed-stack.0) + $d8 = L2_loadrd_io killed $r29, 8 :: (load (s64) from %fixed-stack.0) ... diff --git a/llvm/test/CodeGen/Hexagon/packetize-load-store-aliasing.mir b/llvm/test/CodeGen/Hexagon/packetize-load-store-aliasing.mir index 1589eaf18505f..815627ab1d4cd 100644 --- a/llvm/test/CodeGen/Hexagon/packetize-load-store-aliasing.mir +++ b/llvm/test/CodeGen/Hexagon/packetize-load-store-aliasing.mir @@ -15,8 +15,8 @@ stack: body: | bb.0: liveins: $r0 - S2_storeri_io $r29, 0, $r0 :: (store 4 into %stack.0) - $r1 = L2_loadri_io $r29, 4 :: (load 4 from %stack.1) + S2_storeri_io $r29, 0, $r0 :: (store (s32) into %stack.0) + $r1 = L2_loadri_io $r29, 4 :: (load (s32) from %stack.1) ... @@ -35,7 +35,7 @@ stack: body: | bb.0: liveins: $r0 - S2_storeri_io $r29, 0, $r0 :: (store 4 into %stack.0) - $r1 = L2_loadri_io $r29, 0 :: (load 4 from %stack.0) + S2_storeri_io $r29, 0, $r0 :: (store (s32) into %stack.0) + $r1 = L2_loadri_io $r29, 0 :: (load (s32) from %stack.0) ... diff --git a/llvm/test/CodeGen/Hexagon/packetize-nvstore.mir b/llvm/test/CodeGen/Hexagon/packetize-nvstore.mir index a93efa3a8764d..e010226e7a497 100644 --- a/llvm/test/CodeGen/Hexagon/packetize-nvstore.mir +++ b/llvm/test/CodeGen/Hexagon/packetize-nvstore.mir @@ -19,7 +19,7 @@ body: | bb.0: liveins: $r0 renamable $r1 = A2_tfrsi 0 - renamable $r0 = S2_storeri_pi renamable $r0, 4, killed renamable $r1 :: (store 4 into %stack.0) - S4_storeiri_io renamable $r0, 0, 0 :: (store 4 into %stack.0 + 4) + renamable $r0 = S2_storeri_pi renamable $r0, 4, killed renamable $r1 :: (store (s32) into %stack.0) + S4_storeiri_io renamable $r0, 0, 0 :: (store (s32) into %stack.0 + 4) J2_jumpr $r31, implicit-def $pc ... diff --git a/llvm/test/CodeGen/Hexagon/packetize-update-offset.mir b/llvm/test/CodeGen/Hexagon/packetize-update-offset.mir index 7b3dff93d8d12..166e93e23cbe5 100644 --- a/llvm/test/CodeGen/Hexagon/packetize-update-offset.mir +++ b/llvm/test/CodeGen/Hexagon/packetize-update-offset.mir @@ -24,8 +24,8 @@ body: | successors: %bb.0, %bb.1 liveins: $lc0, $r0, $r27 $r1 = A2_addi $r0, 24 - $r0 = S2_storerb_pi $r0, 2, $r27 :: (store 1 into @g0, align 2) - S4_storeiri_io killed $r0, 0, 0 :: (store 4 into @g1, align 4) + $r0 = S2_storerb_pi $r0, 2, $r27 :: (store (s8) into @g0, align 2) + S4_storeiri_io killed $r0, 0, 0 :: (store (s32) into @g1, align 4) $r0 = A2_tfr killed $r1 ENDLOOP0 %bb.0, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 diff --git a/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir b/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir index aa553dccc7de7..e03b5b7bf2772 100644 --- a/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir +++ b/llvm/test/CodeGen/Hexagon/pipeliner/swp-phi-start.mir @@ -6,8 +6,8 @@ # CHECK-NOT: Stage-3 # CHECK: J2_loop0r # CHECK: intregs = S2_addasl_rrri %{{[0-9]+}}, %{{[0-9]+}}, 1, post-instr-symbol -# CHECK: intregs = L2_loadruh_io %{{[0-9]+}}, -4, post-instr-symbol :: (load 2 from %ir.cgep2, !tbaa !0) -# CHECK: intregs = S2_storerh_pi %{{[0-9]+}}, -2, %{{[0-9]+}}, post-instr-symbol :: (store 2 into %ir.lsr.iv, !tbaa !0) +# CHECK: intregs = L2_loadruh_io %{{[0-9]+}}, -4, post-instr-symbol :: (load (s16) from %ir.cgep2, !tbaa !0) +# CHECK: intregs = S2_storerh_pi %{{[0-9]+}}, -2, %{{[0-9]+}}, post-instr-symbol :: (store (s16) into %ir.lsr.iv, !tbaa !0) # CHECK: intregs = nsw A2_addi %{{[0-9]+}}, -1, post-instr-symbol # CHECK: ENDLOOP0 %bb.{{[0-9]+}}, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 # CHECK-NOT: Stage-0 @@ -16,19 +16,19 @@ ; ModuleID = '/google/src/cloud/jmolloy/tc/google3/third_party/llvm/llvm/test/CodeGen/Hexagon/swp-phi-start.ll' source_filename = "/google/src/cloud/jmolloy/tc/google3/third_party/llvm/llvm/test/CodeGen/Hexagon/swp-phi-start.ll" target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" - + ; Function Attrs: nounwind define void @f0(i32 %a0, i16* nocapture %a1) #0 { b0: br i1 undef, label %b1, label %b2.preheader - + b1: ; preds = %b0 br i1 undef, label %b3, label %b2.preheader - + b2.preheader: ; preds = %b0, %b1 %cgep = getelementptr i16, i16* %a1, i32 undef br label %b2 - + b2: ; preds = %b2.preheader, %b2 %lsr.iv = phi i16* [ %cgep, %b2.preheader ], [ %cgep3, %b2 ] %v1 = phi i32 [ %v7, %b2 ], [ undef, %b2.preheader ] @@ -41,13 +41,13 @@ %v8 = icmp sgt i32 %v7, 0 %cgep3 = getelementptr i16, i16* %lsr.iv, i32 -1 br i1 %v8, label %b2, label %b3 - + b3: ; preds = %b2, %b1 ret void } - + attributes #0 = { nounwind "target-cpu"="hexagonv55" } - + !0 = !{!1, !1, i64 0} !1 = !{!"short", !2, i64 0} !2 = !{!"omnipotent char", !3, i64 0} @@ -111,40 +111,40 @@ body: | bb.0.b0: successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: $r0, $r1 - + %7:intregs = COPY $r1 %6:intregs = COPY $r0 %8:predregs = IMPLICIT_DEF J2_jumpt %8, %bb.2, implicit-def dead $pc J2_jump %bb.1, implicit-def dead $pc - + bb.1.b1: successors: %bb.4(0x40000000), %bb.2(0x40000000) - + %9:predregs = IMPLICIT_DEF J2_jumpt %9, %bb.4, implicit-def dead $pc J2_jump %bb.2, implicit-def dead $pc - + bb.2.b2.preheader: successors: %bb.3(0x80000000) - + %10:intregs = IMPLICIT_DEF %14:intregs = COPY %10 J2_loop0r %bb.3, %14, implicit-def $lc0, implicit-def $sa0, implicit-def $usr - + bb.3.b2 (address-taken): successors: %bb.3(0x7c000000), %bb.4(0x04000000) - + %1:intregs = PHI %7, %bb.2, %5, %bb.3, post-instr-symbol %2:intregs = PHI %10, %bb.2, %4, %bb.3, post-instr-symbol %3:intregs = PHI %6, %bb.2, %2, %bb.3, post-instr-symbol %11:intregs = S2_addasl_rrri %7, %3, 1, post-instr-symbol - %12:intregs = L2_loadruh_io %11, -4, post-instr-symbol :: (load 2 from %ir.cgep2, !tbaa !0) - %5:intregs = S2_storerh_pi %1, -2, %12, post-instr-symbol :: (store 2 into %ir.lsr.iv, !tbaa !0) + %12:intregs = L2_loadruh_io %11, -4, post-instr-symbol :: (load (s16) from %ir.cgep2, !tbaa !0) + %5:intregs = S2_storerh_pi %1, -2, %12, post-instr-symbol :: (store (s16) into %ir.lsr.iv, !tbaa !0) %4:intregs = nsw A2_addi %2, -1, post-instr-symbol ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 J2_jump %bb.4, implicit-def dead $pc - + bb.4.b3: PS_jmpret $r31, implicit-def dead $pc diff --git a/llvm/test/CodeGen/Hexagon/post-inc-aa-metadata.ll b/llvm/test/CodeGen/Hexagon/post-inc-aa-metadata.ll index 940dc8991246d..43e4070966bd7 100644 --- a/llvm/test/CodeGen/Hexagon/post-inc-aa-metadata.ll +++ b/llvm/test/CodeGen/Hexagon/post-inc-aa-metadata.ll @@ -3,7 +3,7 @@ ; Check that the generated post-increment load has TBAA information. ; CHECK-LABEL: Machine code for function fred: -; CHECK: = V6_vL32b_pi %{{[0-9]+}}{{[^,]*}}, 64 :: (load 64{{.*}}!tbaa +; CHECK: = V6_vL32b_pi %{{[0-9]+}}{{[^,]*}}, 64 :: (load (s512){{.*}}!tbaa target triple = "hexagon" diff --git a/llvm/test/CodeGen/Hexagon/postinc-baseoffset.mir b/llvm/test/CodeGen/Hexagon/postinc-baseoffset.mir index 9ab5920450e32..962df0c055247 100644 --- a/llvm/test/CodeGen/Hexagon/postinc-baseoffset.mir +++ b/llvm/test/CodeGen/Hexagon/postinc-baseoffset.mir @@ -18,5 +18,5 @@ tracksRegLiveness: true body: | bb.0: liveins: $r0 - S4_storeiri_io $r0, 0, -1 :: (store 4 into %ir.a) - $r1, $r0 = L2_loadri_pi $r0, 8 :: (load 4 from %ir.a) + S4_storeiri_io $r0, 0, -1 :: (store (s32) into %ir.a) + $r1, $r0 = L2_loadri_pi $r0, 8 :: (load (s32) from %ir.a) diff --git a/llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir b/llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir index 06f3b3a706978..093d3ad945aa5 100644 --- a/llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir +++ b/llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir @@ -186,7 +186,7 @@ body: | %21 = COPY %13 %21 = S2_lsr_i_p_and %21, %29, 9 %22 = S2_asl_i_p_and %22, %7, 42 - S2_storerd_io undef %23, 0, %22 :: (store 8 into `i64* undef`) + S2_storerd_io undef %23, 0, %22 :: (store (s64) into `i64* undef`) %25 = C2_cmpeqp %21, %51 J2_jumpt %25, %bb.3.for.end, implicit-def dead $pc J2_jump %bb.2.if.end82, implicit-def dead $pc diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir index d9db8ec6194c8..474285644cdc5 100644 --- a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir @@ -11,21 +11,21 @@ # CHECK-NEXT: SU(6) --- | - + %struct.A = type { i16, i16 } - + define i32 @test(%struct.A* noalias nocapture %s, i16* noalias nocapture readonly %r, i32 %n) { entry: %cmp19 = icmp eq i32 %n, 2 br i1 %cmp19, label %for.end, label %for.body.preheader - + for.body.preheader: %0 = add i32 %n, -2 %cgep = getelementptr %struct.A, %struct.A* %s, i32 2, i32 1 %scevgep1 = bitcast i16* %cgep to %struct.A* %cgep9 = getelementptr i16, i16* %r, i32 2 br label %for.body - + for.body: %lsr.iv7 = phi i16* [ %cgep9, %for.body.preheader ], [ %cgep12, %for.body ] %lsr.iv2 = phi %struct.A* [ %scevgep1, %for.body.preheader ], [ %cgep11, %for.body ] @@ -46,12 +46,12 @@ %cgep11 = getelementptr %struct.A, %struct.A* %lsr.iv2, i32 1 %cgep12 = getelementptr i16, i16* %lsr.iv7, i32 1 br i1 %cmp, label %for.end, label %for.body - + for.end: %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add7, %for.body ] ret i32 %sum.0.lcssa } - + ... --- name: test @@ -61,7 +61,7 @@ body: | bb.0: successors: %bb.3, %bb.1 liveins: $r0, $r1, $r2 - + %14:intregs = COPY $r2 %13:intregs = COPY $r1 %12:intregs = COPY $r0 @@ -69,32 +69,32 @@ body: | %15:intregs = A2_tfrsi 0 J2_jumpt killed %16, %bb.3, implicit-def dead $pc J2_jump %bb.1, implicit-def dead $pc - + bb.1: successors: %bb.2 - + %0:intregs = A2_addi %14, -2 %1:intregs = A2_addi %12, 10 %2:intregs = A2_addi %13, 4 %17:intregs = A2_tfrsi 0 %23:intregs = COPY %0 J2_loop0r %bb.2, %23, implicit-def $lc0, implicit-def $sa0, implicit-def $usr - + bb.2 (address-taken): successors: %bb.3, %bb.2 - + %3:intregs = PHI %2, %bb.1, %10, %bb.2 %4:intregs = PHI %1, %bb.1, %9, %bb.2 %6:intregs = PHI %17, %bb.1, %7, %bb.2 - %18:intregs, %10:intregs = L2_loadrh_pi %3, 2 :: (load 2 from %ir.lsr.iv7) - %19:intregs = L2_loadrh_io %4, -8 :: (load 2 from %ir.cgep10) + %18:intregs, %10:intregs = L2_loadrh_pi %3, 2 :: (load (s16) from %ir.lsr.iv7) + %19:intregs = L2_loadrh_io %4, -8 :: (load (s16) from %ir.cgep10) %20:intregs = A2_addi %18, 10 - S2_storerh_io %4, 0, killed %20 :: (store 2 into %ir.lsr.iv24) + S2_storerh_io %4, 0, killed %20 :: (store (s16) into %ir.lsr.iv24) %7:intregs = M2_acci %19, %6, %18 %9:intregs = A2_addi %4, 4 ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 J2_jump %bb.3, implicit-def dead $pc - + bb.3: %11:intregs = PHI %15, %bb.0, %7, %bb.2 $r0 = COPY %11 diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir index 5271a2db7758e..f05f4dd7658b5 100644 --- a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir @@ -11,11 +11,11 @@ # CHECK: SU(5) --- | - + define void @test() { b0: br label %b3 - + b3: %lsr.iv = phi [9 x i32]* [ %0, %b3 ], [ undef, %b0 ] %v0 = phi i32 [ %v8, %b3 ], [ 7, %b0 ] @@ -31,11 +31,11 @@ %0 = bitcast i32* %cgep3 to [9 x i32]* %v9 = icmp sgt i32 %v8, 1 br i1 %v9, label %b3, label %b4 - + b4: unreachable } - + declare i32 @llvm.hexagon.A2.subsat(i32, i32) #0 declare void @llvm.stackprotector(i8*, i8**) #1 @@ -47,24 +47,24 @@ tracksRegLiveness: true body: | bb.0: successors: %bb.1 - + %10:intregs = IMPLICIT_DEF %11:intregs = IMPLICIT_DEF J2_loop0i %bb.1, 6, implicit-def $lc0, implicit-def $sa0, implicit-def $usr - + bb.1 (address-taken): successors: %bb.1, %bb.2 - + %0:intregs = PHI %11, %bb.0, %6, %bb.1 %2:intregs = PHI %10, %bb.0, %4, %bb.1 %3:intregs = PHI %10, %bb.0, %2, %bb.1 - %4:intregs = L2_loadri_io %0, -8 :: (load 4 from %ir.cgep) + %4:intregs = L2_loadri_io %0, -8 :: (load (s32) from %ir.cgep) %12:intregs = A2_subsat %3, %4, implicit-def dead $usr_ovf - S2_storeri_io %0, 0, %12 :: (store 4 into %ir.lsr.iv1) + S2_storeri_io %0, 0, %12 :: (store (s32) into %ir.lsr.iv1) %6:intregs = A2_addi %0, -4 ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 J2_jump %bb.2, implicit-def dead $pc - + bb.2: ... diff --git a/llvm/test/CodeGen/Lanai/peephole-compare.mir b/llvm/test/CodeGen/Lanai/peephole-compare.mir index 61568e451af2a..63a30448198b7 100644 --- a/llvm/test/CodeGen/Lanai/peephole-compare.mir +++ b/llvm/test/CodeGen/Lanai/peephole-compare.mir @@ -632,10 +632,10 @@ body: | %1 = MOVHI target-flags(lanai-hi) @a %2 = OR_I_LO killed %1, target-flags(lanai-lo) @a - %3 = LDW_RI killed %2, 0, 0 :: (load 4 from @a, !tbaa !0) + %3 = LDW_RI killed %2, 0, 0 :: (load (s32) from @a, !tbaa !0) %4 = MOVHI target-flags(lanai-hi) @b %5 = OR_I_LO killed %4, target-flags(lanai-lo) @b - %6 = LDW_RI killed %5, 0, 0 :: (load 4 from @b, !tbaa !0) + %6 = LDW_RI killed %5, 0, 0 :: (load (s32) from @b, !tbaa !0) %0 = SUB_R killed %6, killed %3, 0 SFSUB_F_RI_LO %0, 0, implicit-def $sr BRCC %bb.3.if.end, 10, implicit $sr diff --git a/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir b/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir index e84ed9c368906..78b7e97d6b331 100644 --- a/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir @@ -16,18 +16,18 @@ body: | ; CHECK-LABEL: name: addrspace_memoperands ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4, align 2, addrspace 3) - ; CHECK: G_STORE [[LOAD]](s64), [[COPY]](p0) :: (store 8, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: (store 4, align 2, addrspace 3) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: (store 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2, addrspace 3) + ; CHECK: G_STORE [[LOAD]](s64), [[COPY]](p0) :: (store (s64), addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: (store (s32), align 2, addrspace 3) + ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: (store (s32)) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 - %1:_(s64) = G_LOAD %0(p0) :: (load 8, addrspace 1) - %2:_(s32) = G_LOAD %0(p0) :: (load 4, align 2, addrspace 3) - G_STORE %1(s64), %0(p0) :: (store 8, addrspace 1) - G_STORE %2(s32), %0(p0) :: (store 4, align 2, addrspace 3) + %1:_(s64) = G_LOAD %0(p0) :: (load (s64), addrspace 1) + %2:_(s32) = G_LOAD %0(p0) :: (load (s32), align 2, addrspace 3) + G_STORE %1(s64), %0(p0) :: (store (s64), addrspace 1) + G_STORE %2(s32), %0(p0) :: (store (s32), align 2, addrspace 3) ; addrspace 0 is accepted by the parser but not printed - G_STORE %2(s32), %0(p0) :: (store 4, addrspace 0) + G_STORE %2(s32), %0(p0) :: (store (s32), addrspace 0) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/MIR/AArch64/atomic-memoperands.mir b/llvm/test/CodeGen/MIR/AArch64/atomic-memoperands.mir index bb9f920bedd2e..ec9c11ae1ccd0 100644 --- a/llvm/test/CodeGen/MIR/AArch64/atomic-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AArch64/atomic-memoperands.mir @@ -15,19 +15,19 @@ body: | ; CHECK-LABEL: name: atomic_memoperands ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load unordered 8) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load monotonic 4) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load acquire 2) - ; CHECK: G_STORE [[LOAD2]](s16), [[COPY]](p0) :: (store release 2) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: (store acq_rel 4) - ; CHECK: G_STORE [[LOAD]](s64), [[COPY]](p0) :: (store syncscope("singlethread") seq_cst 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load unordered (s64)) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load monotonic (s32)) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load acquire (s16)) + ; CHECK: G_STORE [[LOAD2]](s16), [[COPY]](p0) :: (store release (s16)) + ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: (store acq_rel (s32)) + ; CHECK: G_STORE [[LOAD]](s64), [[COPY]](p0) :: (store syncscope("singlethread") seq_cst (s64)) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 - %1:_(s64) = G_LOAD %0(p0) :: (load unordered 8) - %2:_(s32) = G_LOAD %0(p0) :: (load monotonic 4) - %3:_(s16) = G_LOAD %0(p0) :: (load acquire 2) - G_STORE %3(s16), %0(p0) :: (store release 2) - G_STORE %2(s32), %0(p0) :: (store acq_rel 4) - G_STORE %1(s64), %0(p0) :: (store syncscope("singlethread") seq_cst 8) + %1:_(s64) = G_LOAD %0(p0) :: (load unordered (s64)) + %2:_(s32) = G_LOAD %0(p0) :: (load monotonic (s32)) + %3:_(s16) = G_LOAD %0(p0) :: (load acquire (s16)) + G_STORE %3(s16), %0(p0) :: (store release (s16)) + G_STORE %2(s32), %0(p0) :: (store acq_rel (s32)) + G_STORE %1(s64), %0(p0) :: (store syncscope("singlethread") seq_cst (s64)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/MIR/AArch64/base-memoperands.mir b/llvm/test/CodeGen/MIR/AArch64/base-memoperands.mir index fdaaa79859773..ebcf0e9f280f9 100644 --- a/llvm/test/CodeGen/MIR/AArch64/base-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AArch64/base-memoperands.mir @@ -1,14 +1,8 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -o - %s | FileCheck %s ---- | - - define void @memoperands() { - ret void - } - -... --- -name: memoperands +name: memoperands body: | bb.0: liveins: $x0, $w0 @@ -16,12 +10,28 @@ body: | ; CHECK-LABEL: name: memoperands ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]] - ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p1) :: (store 1, addrspace 1) - ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p1) :: (store 1 into unknown-address + 1, addrspace 1) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p1) :: (store (s8), addrspace 1) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p1) :: (store (s8) into unknown-address + 1, addrspace 1) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK: G_STORE [[TRUNC1]](s1), [[COPY]](p1) :: (store (s1) into unknown-address + 4, addrspace 1) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s3) = G_TRUNC [[COPY1]](s32) + ; CHECK: G_STORE [[TRUNC2]](s3), [[COPY]](p1) :: (store (s3) into unknown-address + 5, addrspace 1) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s24) = G_TRUNC [[COPY1]](s32) + ; CHECK: G_STORE [[TRUNC3]](s24), [[COPY]](p1) :: (store (s24) into unknown-address + 6, align 2, basealign 4, addrspace 1) %0:_(p1) = COPY $x0 %1:_(s32) = COPY $w0 %2:_(s8) = G_TRUNC %1 - G_STORE %2(s8), %0(p1) :: (store 1, addrspace 1) - G_STORE %2(s8), %0(p1) :: (store 1 into unknown-address + 1, addrspace 1) + G_STORE %2(s8), %0(p1) :: (store (s8), addrspace 1) + G_STORE %2(s8), %0(p1) :: (store (s8) into unknown-address + 1, addrspace 1) + + %3:_(s1) = G_TRUNC %1 + G_STORE %3, %0 :: (store (s1) into unknown-address + 4, addrspace 1) + + %4:_(s3) = G_TRUNC %1 + G_STORE %4, %0 :: (store (s3) into unknown-address + 5, addrspace 1) + + %5:_(s24) = G_TRUNC %1 + G_STORE %5, %0 :: (store (s24) into unknown-address + 6, addrspace 1) + ... diff --git a/llvm/test/CodeGen/MIR/AArch64/machine-metadata.mir b/llvm/test/CodeGen/MIR/AArch64/machine-metadata.mir index 1dfc45d6546a8..ab248b8249f2e 100644 --- a/llvm/test/CodeGen/MIR/AArch64/machine-metadata.mir +++ b/llvm/test/CodeGen/MIR/AArch64/machine-metadata.mir @@ -76,19 +76,19 @@ body: | ; CHECK-LABEL: name: test_memcpy ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) - ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] ; CHECK: $w0 = COPY [[ADDWrr]] ; CHECK: RET_ReallyLR implicit $w0 %1:gpr64common = COPY $x1 %0:gpr64common = COPY $x0 - %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) - STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) - %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) %5:gpr32 = ADDWrr killed %3, killed %4 $w0 = COPY %5 RET_ReallyLR implicit $w0 @@ -111,19 +111,19 @@ body: | ; CHECK-LABEL: name: test_memcpy_inline ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) - ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] ; CHECK: $w0 = COPY [[ADDWrr]] ; CHECK: RET_ReallyLR implicit $w0 %1:gpr64common = COPY $x1 %0:gpr64common = COPY $x0 - %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) - STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) - %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) %5:gpr32 = ADDWrr killed %3, killed %4 $w0 = COPY %5 RET_ReallyLR implicit $w0 @@ -146,19 +146,19 @@ body: | ; CHECK-LABEL: name: test_mempcpy ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 1, !alias.scope !5, !noalias !8) - ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 1, !alias.scope !10, !noalias !11) - ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128) from %ir.p1, align 1, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store (s128) into %ir.p0, align 1, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] ; CHECK: $w0 = COPY [[ADDWrr]] ; CHECK: RET_ReallyLR implicit $w0 %1:gpr64common = COPY $x1 %0:gpr64common = COPY $x0 - %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 1, !alias.scope !5, !noalias !8) - STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 1, !alias.scope !10, !noalias !11) - %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 1, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 1, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) %5:gpr32 = ADDWrr killed %3, killed %4 $w0 = COPY %5 RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir index 4b03f42083dd7..21a7dddc98591 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir @@ -38,9 +38,9 @@ body: | bb.0: liveins: $x0, $x1, $d0, $d1 - %42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + %42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) - ;CHECK: %bb0_{{[0-9]+}}__1:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + ;CHECK: %bb0_{{[0-9]+}}__1:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) ;CHECK-NEXT: $w0 = COPY %bb0_ ;CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir index 924a34d5ce43d..b30ca7c1c7e3c 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir @@ -49,20 +49,20 @@ body: | %2:fpr64 = COPY $d0 %1:gpr64 = COPY $x1 %0:gpr64common = COPY $x0 - STRXui %0, %stack.1, 0 :: (store 8) - STRXui %1, %stack.2, 0 :: (store 8) - STRDui %2, %stack.3, 0 :: (store 8) - STRDui %3, %stack.4, 0 :: (store 8) + STRXui %0, %stack.1, 0 :: (store (s64)) + STRXui %1, %stack.2, 0 :: (store (s64)) + STRDui %2, %stack.3, 0 :: (store (s64)) + STRDui %3, %stack.4, 0 :: (store (s64)) %4:fpr64 = FMOVDi 20 %5:fpr64 = FADDDrr %2, killed %4 - STRDui %5, %stack.5, 0 :: (store 8) + STRDui %5, %stack.5, 0 :: (store (s64)) %6:gpr32 = FCVTZSUWDr %5 STRDroW %3, %0, killed %6, 1, 1 - %7:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8) - %8:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + %7:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load (s64)) + %8:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) %9:gpr32common = FCVTZSUWDr killed %8 %10:fpr64 = LDRDroW %7, %9, 1, 1 @@ -70,30 +70,30 @@ body: | %11:gpr32common = ADDWri %9, 1, 0 STRDroW killed %10, %7, killed %11, 1, 1 - %12:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) - %13:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8) + %12:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) + %13:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load (s64)) %14:gpr32common = FCVTZSUWDr %12 %15:gpr32common = ADDWri killed %14, 30, 0 STRDroW %12, killed %13, killed %15, 1, 1 - %16:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) - STRDui killed %16, %stack.6, 0 :: (store 8) + %16:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) + STRDui killed %16, %stack.6, 0 :: (store (s64)) %19:fpr64 = FMOVDi 112 %46:gpr32 = MOVi32imm 408 - %43:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) - %44:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8) + %43:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) + %44:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load (s64)) %45:gpr32 = FCVTZSUWDr %43 %47:gpr64common = SMADDLrrr killed %45, %46, killed %44 - %48:fpr64 = LDRDui %stack.6, 0 :: (dereferenceable load 8) + %48:fpr64 = LDRDui %stack.6, 0 :: (dereferenceable load (s64)) %49:gpr32 = FCVTZSUWDr killed %48 STRDroW %43, killed %47, killed %49, 1, 1 - %21:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8) - %22:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + %21:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load (s64)) + %22:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) %23:gpr32 = FCVTZSUWDr killed %22 %24:gpr32 = MOVi32imm 408 @@ -103,15 +103,15 @@ body: | %29:fpr64 = FADDDrr killed %27, %19 STURDi killed %29, %26, -8 - %30:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8) - %31:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + %30:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load (s64)) + %31:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) %32:gpr32common = FCVTZSUWDr killed %31 %34:gpr64all = IMPLICIT_DEF %33:gpr64 = INSERT_SUBREG %34, %32, %subreg.sub_32 %35:gpr64 = SBFMXri killed %33, 61, 31 %36:fpr64 = LDRDroX killed %30, %35, 0, 0 - %37:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8) + %37:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load (s64)) %38:gpr32common = ADDWri %32, 20, 0 %39:gpr64common = SMADDLrrr killed %38, %24, killed %37 @@ -119,7 +119,7 @@ body: | %40:gpr64 = MOVi64imm 4617315517961601024 - %42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + %42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) $w0 = COPY %42 RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir index ef4939e47136a..8764a41caf340 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir @@ -8,12 +8,12 @@ body: | ;CHECK-LABEL: bb.0 ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:_(p0) = COPY $d0 ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:_(<4 x s32>) = COPY $q0 - ;CHECK-NEXT: G_STORE %bb0_{{[0-9]+}}__1(<4 x s32>), %bb0_{{[0-9]+}}__1(p0) :: (store 16) + ;CHECK-NEXT: G_STORE %bb0_{{[0-9]+}}__1(<4 x s32>), %bb0_{{[0-9]+}}__1(p0) :: (store (<4 x s32>)) liveins: $q0, $d0 %1:fpr(p0) = COPY $d0 %0:fpr(<4 x s32>) = COPY $q0 - G_STORE %0(<4 x s32>), %1(p0) :: (store 16) + G_STORE %0(<4 x s32>), %1(p0) :: (store (<4 x s32>)) ... --- name: bar @@ -40,18 +40,18 @@ body: | ;CHECK-NEXT: %bb0_{{[0-9]+}}__5:gpr32 = LDRWui ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 5 - %0:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + %0:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %1:gpr32 = MOVi32imm 1 - %2:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + %2:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %3:gpr32 = MOVi32imm 2 - %4:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + %4:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %5:gpr32 = MOVi32imm 3 %10:gpr32 = nsw ADDWrr %0:gpr32, %1:gpr32 - %6:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + %6:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %11:gpr32 = nsw ADDWrr %2:gpr32, %3:gpr32 %7:gpr32 = MOVi32imm 4 %12:gpr32 = nsw ADDWrr %4:gpr32, %5:gpr32 - %8:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + %8:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %9:gpr32 = MOVi32imm 5 %13:gpr32 = nsw ADDWrr %6:gpr32, %7:gpr32 %14:gpr32 = nsw ADDWrr %8:gpr32, %9:gpr32 @@ -80,7 +80,7 @@ body: | ;CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = COPY %bb0_{{[0-9]+}}__1 ;CHECK-NEXT: $w0 = COPY %bb0_{{[0-9]+}}__2 - %0:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + %0:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %1:gpr32 = COPY %0 %2:gpr32 = COPY %1 %3:gpr32 = COPY %2 diff --git a/llvm/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir b/llvm/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir index ac5f54b5872c0..b4dc46aec2ba7 100644 --- a/llvm/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir +++ b/llvm/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir @@ -34,10 +34,10 @@ stack: body: | bb.0.entry: %0 = ADRP @var - %1 = LDRXui killed %0, @var :: (load 8 from @var) - STRXui killed %1, %stack.0.local_var, 0 :: (store 8 into %ir.local_var) + %1 = LDRXui killed %0, @var :: (load (s64) from @var) + STRXui killed %1, %stack.0.local_var, 0 :: (store (s64) into %ir.local_var) %2 = ADRP @local_addr %3 = ADDXri %stack.0.local_var, 0, 0 - STRXui killed %3, killed %2, @local_addr :: (store 8 into @local_addr) + STRXui killed %3, killed %2, @local_addr :: (store (s64) into @local_addr) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/MIR/AArch64/swp.mir b/llvm/test/CodeGen/MIR/AArch64/swp.mir index d7555bf89384e..47a00f12efb74 100644 --- a/llvm/test/CodeGen/MIR/AArch64/swp.mir +++ b/llvm/test/CodeGen/MIR/AArch64/swp.mir @@ -24,10 +24,10 @@ body: | liveins: $x0 ; CHECK-LABEL: swp - ; CHECK: {{[0-9]+}}:gpr32 = SWPW killed %1, %0 :: (volatile load store monotonic 4 on %ir.addr) + ; CHECK: {{[0-9]+}}:gpr32 = SWPW killed %1, %0 :: (volatile load store monotonic (s32) on %ir.addr) %0:gpr64common = COPY $x0 %1:gpr32 = MOVi32imm 1 - %2:gpr32 = SWPW killed %1, %0 :: (volatile load store monotonic 4 on %ir.addr) + %2:gpr32 = SWPW killed %1, %0 :: (volatile load store monotonic (s32) on %ir.addr) $w0 = COPY %2 RET_ReallyLR implicit $w0 ... diff --git a/llvm/test/CodeGen/MIR/AArch64/target-memoperands.mir b/llvm/test/CodeGen/MIR/AArch64/target-memoperands.mir index ab79611a5c347..2aab5251895e2 100644 --- a/llvm/test/CodeGen/MIR/AArch64/target-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AArch64/target-memoperands.mir @@ -15,15 +15,15 @@ body: | ; CHECK-LABEL: name: target_memoperands ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: ("aarch64-suppress-pair" load 8) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load 4) - ; CHECK: G_STORE [[LOAD]](s64), [[COPY]](p0) :: ("aarch64-suppress-pair" store 8) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: ("aarch64-suppress-pair" load (s64)) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load (s32)) + ; CHECK: G_STORE [[LOAD]](s64), [[COPY]](p0) :: ("aarch64-suppress-pair" store (s64)) + ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store (s32)) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 - %1:_(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8) - %2:_(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4) - G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store 8) - G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store 4) + %1:_(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load (s64)) + %2:_(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load (s32)) + G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store (s64)) + G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store (s32)) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll b/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll index c8442e399da89..53c32f383ac7f 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/custom-pseudo-source-values.ll @@ -4,9 +4,9 @@ ; Test that custom pseudo source values can be round trip serialized through MIR. ; CHECK-LABEL: {{^}}name: shader -; CHECK: %[[#]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed %17, %18, 4, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4, align 1, addrspace 4) -; CHECK: IMAGE_STORE_V4_V3_nsa_gfx10 killed %[[#]], %[[#]], %[[#]], %[[#]], killed %[[#]], 15, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "ImageResource") -; CHECK: DS_GWS_BARRIER %[[#]], 63, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource") +; CHECK: %[[#]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed %17, %18, 4, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource" + 4, align 1, addrspace 4) +; CHECK: IMAGE_STORE_V4_V3_nsa_gfx10 killed %[[#]], %[[#]], %[[#]], %[[#]], killed %[[#]], 15, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "ImageResource") +; CHECK: DS_GWS_BARRIER %[[#]], 63, implicit $m0, implicit $exec :: (load (s32) from custom "GWSResource") define amdgpu_cs void @shader(i32 %arg0, i32 %arg1, <8 x i32> inreg %arg2, <4 x i32> inreg %arg3) { %bload0 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %arg3, i32 4, i32 0, i32 0) %bload1 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %arg3, i32 8, i32 0, i32 0) diff --git a/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir b/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir index 659c4dbef31ae..56aec7c0a2b7f 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/load-store-opt-dlc.mir @@ -32,7 +32,7 @@ } ... -# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1) +# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1) --- name: test1 liveins: @@ -48,7 +48,7 @@ body: | $sgpr3 = S_MOV_B32 61440 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 @@ -56,14 +56,14 @@ body: | %5:vgpr_32 = COPY $vgpr0 %6:vgpr_32 = COPY $vgpr1 - BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) - BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) S_ENDPGM 0 ... -# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) -# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) --- name: test2 liveins: @@ -79,7 +79,7 @@ body: | $sgpr3 = S_MOV_B32 61440 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 @@ -87,14 +87,14 @@ body: | %5:vgpr_32 = COPY $vgpr0 %6:vgpr_32 = COPY $vgpr1 - BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) - BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) S_ENDPGM 0 ... -# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) -# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) +# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) --- name: test3 liveins: @@ -110,7 +110,7 @@ body: | $sgpr3 = S_MOV_B32 61440 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 @@ -118,13 +118,13 @@ body: | %5:vgpr_32 = COPY $vgpr0 %6:vgpr_32 = COPY $vgpr1 - BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) - BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) S_ENDPGM 0 ... -# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1) +# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1) --- name: test4 liveins: @@ -140,7 +140,7 @@ body: | $sgpr3 = S_MOV_B32 61440 %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4) + %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`, addrspace 4) %2:sgpr_32 = COPY $sgpr2 %3:sgpr_32 = COPY $sgpr3 %4:sgpr_128 = REG_SEQUENCE %1, %2, %3 @@ -148,8 +148,8 @@ body: | %5:vgpr_32 = COPY $vgpr0 %6:vgpr_32 = COPY $vgpr1 - BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) - BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) + BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir index 74ea99465185a..a9fbe15b244e9 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir @@ -88,11 +88,11 @@ body: | ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec @@ -108,11 +108,11 @@ body: | %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 %9:vreg_64 = COPY %18 - %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) %10:vreg_64 = COPY %18 - GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) %12:vreg_64 = COPY %17 - %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec @@ -145,11 +145,11 @@ body: | ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec @@ -165,11 +165,11 @@ body: | %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 %9:vreg_64 = COPY %18 - %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load (s128) from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) %10:vreg_64 = COPY %18 - GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store (s128) into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) %12:vreg_64 = COPY %17 - %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load (s64) from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir index 01828eb4f6521..e5d80e9c59fcd 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir @@ -33,11 +33,11 @@ body: | ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 = COPY $sgpr4_sgpr5 - %1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %2 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %3 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( invariant load 8 from `i64 addrspace(4)* undef`) - %4 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load 8 from `i64 addrspace(2)* undef`) - %6 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load 8 from `i64 addrspace(1)* undef`) + %1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %2 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + %3 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( invariant load (s64) from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `i64 addrspace(2)* undef`) + %6 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: ( load (s64) from `i64 addrspace(1)* undef`) ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir index be154815bbb95..3b38fdb6592a4 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir @@ -12,7 +12,7 @@ # CHECK: isEntryFunction: true # CHECK: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' # CHECK: frameOffsetReg: '$sgpr50' -# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) +# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) name: reserve_correct_register tracksRegLiveness: true machineFunctionInfo: @@ -24,6 +24,6 @@ stack: body: | bb.0: - renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir b/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir index 44155cd765e18..8ee8decc480a7 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/syncscopes.mir @@ -42,9 +42,9 @@ !0 = !{i32 1} # GCN-LABEL: name: syncscopes -# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4) -# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4) -# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4) +# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst (s32) into %ir.agent_out, addrspace 4) +# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst (s32) into %ir.workgroup_out, addrspace 4) +# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst (s32) into %ir.wavefront_out, addrspace 4) ... --- name: syncscopes @@ -74,27 +74,27 @@ body: | liveins: $sgpr4_sgpr5 S_WAITCNT 0 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) - $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) - $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) + $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) + $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) + $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`) S_WAITCNT 127 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec - FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out) + FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst (s32) into %ir.agent_out) S_WAITCNT 112 $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3 $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $sgpr2_sgpr3, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec - FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out) + FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst (s32) into %ir.workgroup_out) S_WAITCNT 112 $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5 $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $sgpr4_sgpr5, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec - FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) + FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst (s32) into %ir.wavefront_out) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir index 520a8fadf7a53..3f0ca25883d25 100644 --- a/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir +++ b/llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir @@ -86,14 +86,14 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7 ; CHECK: $sp = frame-setup t2SUBspImm12 killed $sp, 4008, 14 /* CC::al */, $noreg ; CHECK: renamable $r0 = t2ADDri $sp, 8, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 killed renamable $r0, $sp, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.s) - ; CHECK: renamable $r0 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from %ir.s) + ; CHECK: t2STRi12 killed renamable $r0, $sp, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.s) + ; CHECK: renamable $r0 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from %ir.s) ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp ; CHECK: $sp = frame-destroy t2ADDspImm12 killed $sp, 4008, 14 /* CC::al */, $noreg ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc renamable $r0 = t2ADDri %stack.0.v, 0, 14, $noreg, $noreg - t2STRi12 killed renamable $r0, %stack.1.s, 0, 14, $noreg :: (store 4 into %ir.s) - renamable $r0 = t2LDRi12 %stack.1.s, 0, 14, $noreg :: (dereferenceable load 4 from %ir.s) + t2STRi12 killed renamable $r0, %stack.1.s, 0, 14, $noreg :: (store (s32) into %ir.s) + renamable $r0 = t2LDRi12 %stack.1.s, 0, 14, $noreg :: (dereferenceable load (s32) from %ir.s) ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp diff --git a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir index e4100543d3c71..cdcfdae270341 100644 --- a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir +++ b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir @@ -37,7 +37,7 @@ callSites: body: | bb.0.entry: BUNDLE implicit-def $r29, implicit-def $r30, implicit-def dead $r0, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit killed $framekey, implicit killed $framelimit, implicit killed $r30, implicit killed $r31 { - $r29 = S2_allocframe $r29, 0, implicit-def $r30, implicit killed $framekey, implicit killed $framelimit, implicit killed $r30, implicit killed $r31 :: (store 4 into stack) + $r29 = S2_allocframe $r29, 0, implicit-def $r30, implicit killed $framekey, implicit killed $framelimit, implicit killed $r30, implicit killed $r31 :: (store (s32) into stack) $r0 = A2_tfrsi 12345 J2_call @callee, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit internal killed $r29, implicit internal killed $r0, implicit-def $r29 } diff --git a/llvm/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir b/llvm/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir index 002b60be8b87f..4874410c6a6e9 100644 --- a/llvm/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir +++ b/llvm/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir @@ -30,8 +30,8 @@ body: | $v0, $v1 = GotPrologue16 &_gp_disp, &_gp_disp $v0 = SllX16 killed $v0, 16 $v0 = AdduRxRyRz16 killed $v1, killed $v0 - ; CHECK: [[@LINE+1]]:67: expected a global value or an external symbol after 'call-entry' - $v1 = LwRxRyOffMemX16 $v0, @foo, 0 :: (load 4 from call-entry foo) + ; CHECK: [[@LINE+1]]:71: expected a global value or an external symbol after 'call-entry' + $v1 = LwRxRyOffMemX16 $v0, @foo, 0 :: (load (s32) from call-entry foo) $t9 = COPY $v1 $gp = COPY killed $v0 JumpLinkReg16 killed $v1, csr_o32, implicit-def $ra, implicit killed $t9, implicit $a0, implicit killed $gp, implicit-def $sp, implicit-def dead $v0 diff --git a/llvm/test/CodeGen/MIR/Mips/memory-operands.mir b/llvm/test/CodeGen/MIR/Mips/memory-operands.mir index f3a813612ca27..1cb228fdf31d2 100644 --- a/llvm/test/CodeGen/MIR/Mips/memory-operands.mir +++ b/llvm/test/CodeGen/MIR/Mips/memory-operands.mir @@ -50,8 +50,8 @@ body: | $v0 = SllX16 killed $v0, 16 $v0 = AdduRxRyRz16 killed $v1, killed $v0 ; CHECK-LABEL: name: test - ; CHECK: $v1 = LwRxRyOffMemX16 $v0, @foo :: (load 4 from call-entry @foo) - $v1 = LwRxRyOffMemX16 $v0, @foo :: (load 4 from call-entry @foo) + ; CHECK: $v1 = LwRxRyOffMemX16 $v0, @foo :: (load (s32) from call-entry @foo) + $v1 = LwRxRyOffMemX16 $v0, @foo :: (load (s32) from call-entry @foo) $t9 = COPY $v1 $gp = COPY killed $v0 JumpLinkReg16 killed $v1, csr_o32, implicit-def $ra, implicit killed $t9, implicit $a0, implicit killed $gp, implicit-def $sp, implicit-def dead $v0 @@ -87,13 +87,13 @@ body: | $v0, $v1 = GotPrologue16 &_gp_disp, &_gp_disp $v0 = SllX16 killed $v0, 16 $s0 = AdduRxRyRz16 killed $v1, killed $v0 - $v0 = LwRxRyOffMemX16 $s0, @g :: (load 4 from call-entry @g) + $v0 = LwRxRyOffMemX16 $s0, @g :: (load (s32) from call-entry @g) ; CHECK-LABEL: test2 - ; CHECK: $v1 = LwRxRyOffMemX16 $s0, &__mips16_call_stub_sf_0 :: (load 4 from call-entry &__mips16_call_stub_sf_0) - $v1 = LwRxRyOffMemX16 $s0, &__mips16_call_stub_sf_0 :: (load 4 from call-entry &__mips16_call_stub_sf_0) + ; CHECK: $v1 = LwRxRyOffMemX16 $s0, &__mips16_call_stub_sf_0 :: (load (s32) from call-entry &__mips16_call_stub_sf_0) + $v1 = LwRxRyOffMemX16 $s0, &__mips16_call_stub_sf_0 :: (load (s32) from call-entry &__mips16_call_stub_sf_0) $gp = COPY $s0 JumpLinkReg16 killed $v1, csr_o32, implicit-def $ra, implicit $v0, implicit killed $gp, implicit-def $sp, implicit-def $v0 - $v1 = LwRxRyOffMemX16 $s0, @__mips16_ret_sf :: (load 4 from call-entry @__mips16_ret_sf) + $v1 = LwRxRyOffMemX16 $s0, @__mips16_ret_sf :: (load (s32) from call-entry @__mips16_ret_sf) $t9 = COPY $v1 $gp = COPY killed $s0 JumpLinkReg16 killed $v1, csr_mips16rethelper, implicit-def $ra, implicit killed $t9, implicit $v0, implicit killed $gp, implicit-def $sp diff --git a/llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.ll b/llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.ll index 71329fd45b5d3..9af0dc3367378 100644 --- a/llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.ll +++ b/llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.ll @@ -15,9 +15,9 @@ define void @add_v4i32_builtin_imm(<4 x i32>* %a, <4 x i32>* %c) { ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[ADDVI_W:%[0-9]+]]:msa128w(<4 x s32>) = ADDVI_W [[LOAD]](<4 x s32>), 25 - ; P5600: G_STORE [[ADDVI_W]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADDVI_W]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA entry: %0 = load <4 x i32>, <4 x i32>* %a, align 16 diff --git a/llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.mir b/llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.mir index 85fda1aad0862..80af292f13f22 100644 --- a/llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.mir +++ b/llvm/test/CodeGen/MIR/Mips/setRegClassOrRegBank.mir @@ -25,15 +25,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[ADDVI_W:%[0-9]+]]:msa128w(<4 x s32>) = ADDVI_W [[LOAD]](<4 x s32>), 25 - ; P5600: G_STORE [[ADDVI_W]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADDVI_W]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.addvi.w), %2(<4 x s32>), 25 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir b/llvm/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir index b6cee56f2bb97..406180b59ceae 100644 --- a/llvm/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir +++ b/llvm/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir @@ -34,7 +34,7 @@ body: | liveins: $x3 %0 = COPY $x3 - %1 = LWZ 0, %0 :: (load 4 from %ir.p) + %1 = LWZ 0, %0 :: (load (s32) from %ir.p) %2 = LI 0 %3 = RLWIMI %2, killed %1, 0, 0, 31 %4 = EXTSW_32_64 killed %3 diff --git a/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir b/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir index 9f4ac617cb8ac..610bbea17e2f7 100644 --- a/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir +++ b/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir @@ -232,7 +232,7 @@ body: | CFI_INSTRUCTION def_cfa_offset 16 CFI_INSTRUCTION offset $rbx, -16 renamable $rbx = COPY $rdi - renamable $edi = MOV32rm $rdi, 1, $noreg, 0, $noreg :: (load 4 from %ir.out) + renamable $edi = MOV32rm $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.out) CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax, debug-location !9 TEST32rr renamable $eax, renamable $eax, implicit-def $eflags JCC_1 %bb.2, 9, implicit killed $eflags @@ -248,7 +248,7 @@ body: | successors: %bb.3(0x80000000) liveins: $rbx - MOV32mi killed renamable $rbx, 1, $noreg, 0, $noreg, 1 :: (store 4 into %ir.out) + MOV32mi killed renamable $rbx, 1, $noreg, 0, $noreg, 1 :: (store (s32) into %ir.out) renamable $eax = MOV32r0 implicit-def dead $eflags bb.3.cleanup: @@ -367,7 +367,7 @@ body: | successors: %bb.8(0x30000000), %bb.7(0x50000000) liveins: $rbx, $r14 - CMP32mi8 $rsp, 1, $noreg, 4, $noreg, 0, implicit-def $eflags :: (dereferenceable load 4 from %ir.idx) + CMP32mi8 $rsp, 1, $noreg, 4, $noreg, 0, implicit-def $eflags :: (dereferenceable load (s32) from %ir.idx) JCC_1 %bb.8, 8, implicit killed $eflags JMP_1 %bb.7 @@ -375,7 +375,7 @@ body: | successors: %bb.8(0x30000000), %bb.3(0x50000000) liveins: $rbx, $r14 - CMP32mi8 renamable $rbx, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 4 from %ir.1) + CMP32mi8 renamable $rbx, 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load (s32) from %ir.1) JCC_1 %bb.3, 5, implicit killed $eflags JMP_1 %bb.8 diff --git a/llvm/test/CodeGen/MIR/X86/diexpr-win32.mir b/llvm/test/CodeGen/MIR/X86/diexpr-win32.mir index c1fe8cbd7e9b3..c937c50cab9b6 100644 --- a/llvm/test/CodeGen/MIR/X86/diexpr-win32.mir +++ b/llvm/test/CodeGen/MIR/X86/diexpr-win32.mir @@ -192,16 +192,16 @@ body: | frame-setup PUSH32r killed $esi, implicit-def $esp, implicit $esp CFI_INSTRUCTION def_cfa_offset 8 CFI_INSTRUCTION offset $esi, -8 - $esi = MOV32rm $esp, 1, _, 8, _ :: (load 4 from %fixed-stack.2) + $esi = MOV32rm $esp, 1, _, 8, _ :: (load (s32) from %fixed-stack.2) DBG_VALUE $esp, 0, !26, !10, debug-location !25 DBG_VALUE $esp, 0, !23, !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref), debug-location !25 CALLpcrel32 @getString, csr_32, implicit $esp, implicit-def $esp, implicit-def $eax, debug-location !29 - $ecx = MOV32rm $eax, 1, _, 0, _, debug-location !29 :: (dereferenceable load 4 from %ir.1) - $edx = MOV32rm $eax, 1, _, 4, _, debug-location !29 :: (dereferenceable load 4 from %ir.1 + 4) - MOV32mr $esi, 1, _, 0, _, killed $ecx, debug-location !29 :: (store 4 into %ir.0) - MOV32mr $esi, 1, _, 4, _, killed $edx, debug-location !29 :: (store 4 into %ir.0 + 4) - $eax = MOV32rm killed $eax, 1, _, 8, _, debug-location !29 :: (dereferenceable load 4 from %ir.1 + 8) - MOV32mr $esi, 1, _, 8, _, killed $eax, debug-location !29 :: (store 4 into %ir.0 + 8) + $ecx = MOV32rm $eax, 1, _, 0, _, debug-location !29 :: (dereferenceable load (s32) from %ir.1) + $edx = MOV32rm $eax, 1, _, 4, _, debug-location !29 :: (dereferenceable load (s32) from %ir.1 + 4) + MOV32mr $esi, 1, _, 0, _, killed $ecx, debug-location !29 :: (store (s32) into %ir.0) + MOV32mr $esi, 1, _, 4, _, killed $edx, debug-location !29 :: (store (s32) into %ir.0 + 4) + $eax = MOV32rm killed $eax, 1, _, 8, _, debug-location !29 :: (dereferenceable load (s32) from %ir.1 + 8) + MOV32mr $esi, 1, _, 8, _, killed $eax, debug-location !29 :: (store (s32) into %ir.0 + 8) $eax = COPY killed $esi, debug-location !30 $esi = POP32r implicit-def $esp, implicit $esp, debug-location !30 RET 0, $eax, debug-location !30 @@ -243,10 +243,10 @@ stack: constants: body: | bb.0.entry: - $eax = MOV32rm $esp, 1, _, 4, _ :: (load 4 from %fixed-stack.1) - $eax = MOV32rm killed $eax, 1, _, 0, _, debug-location !34 :: (load 4 from %ir.0) + $eax = MOV32rm $esp, 1, _, 4, _ :: (load (s32) from %fixed-stack.1) + $eax = MOV32rm killed $eax, 1, _, 0, _, debug-location !34 :: (load (s32) from %ir.0) DBG_VALUE $eax, 0, !35, !DIExpression(DW_OP_constu, 4, DW_OP_minus), debug-location !34 - $eax = ADD32rm killed $eax, $esp, 1, _, 8, _, implicit-def dead $eflags, debug-location !36 :: (load 4 from %fixed-stack.0) + $eax = ADD32rm killed $eax, $esp, 1, _, 8, _, implicit-def dead $eflags, debug-location !36 :: (load (s32) from %fixed-stack.0) RET 0, $eax, debug-location !36 ... diff --git a/llvm/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir b/llvm/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir index f61f1e015bed9..13229dc70db74 100644 --- a/llvm/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir @@ -20,8 +20,8 @@ body: | bb.0.entry: liveins: $rdi ; CHECK: [[@LINE+1]]:50: duplicate 'volatile' memory operand flag - $eax = MOV32rm $rdi, 1, _, 0, _ :: (volatile volatile load 4 from %ir.x) + $eax = MOV32rm $rdi, 1, _, 0, _ :: (volatile volatile load (s32) from %ir.x) $eax = INC32r killed $eax, implicit-def dead $eflags - MOV32mr killed $rdi, 1, _, 0, _, $eax :: (volatile store 4 into %ir.x) + MOV32mr killed $rdi, 1, _, 0, _, $eax :: (volatile store (s32) into %ir.x) RETQ $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir index f075719fbea65..2cda984eba8c0 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir @@ -19,12 +19,12 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: [[@LINE+1]]:65: expected 'align' - $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load 16 from %ir.vec, 32) - $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, basealign 32) + ; CHECK: [[@LINE+1]]:69: expected 'align' + $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load (s128) from %ir.vec, 32) + $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load (s128) from %ir.vec + 16, basealign 32) $xmm2 = FsFLD0SS $xmm1 = MOVSSrr killed $xmm1, killed $xmm2 - MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store 16 into %ir.vec, align 32) - MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store 16 into %ir.vec + 16, basealign 32) + MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec, align 32) + MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 32) RETQ ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir index ffed6048de2d8..db09b558fdbcf 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir @@ -19,12 +19,12 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: [[@LINE+1]]:70: expected an integer literal after 'align' - $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align) - $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, basealign 32) + ; CHECK: [[@LINE+1]]:74: expected an integer literal after 'align' + $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load (s128) from %ir.vec, align) + $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load (s128) from %ir.vec + 16, basealign 32) $xmm2 = FsFLD0SS $xmm1 = MOVSSrr killed $xmm1, killed $xmm2 - MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store 16 into %ir.vec, align 32) - MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store 16 into %ir.vec + 16, basealign 32) + MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec, align 32) + MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 32) RETQ ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir index 3c49d9dd1c69d..5a32c4f58faff 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir @@ -19,7 +19,7 @@ liveins: body: | bb.0.entry2: liveins: $rdi - ; CHECK: [[@LINE+1]]:87: expected ',' before the next machine memory operand - INC32m killed $rdi, 1, _, 0, _, implicit-def dead $eflags :: (store 4 into %ir.a) (load 4 from %ir.a) + ; CHECK: [[@LINE+1]]:91: expected ',' before the next machine memory operand + INC32m killed $rdi, 1, _, 0, _, implicit-def dead $eflags :: (store (s32) into %ir.a) (load (s32) from %ir.a) RETQ ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir b/llvm/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir index 4ffb0b28d97b9..d35bf538f871a 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir @@ -20,7 +20,7 @@ body: | bb.0.entry: liveins: $edi - MOV32mr $rsp, 1, _, -4, _, $edi :: (store 4 into %ir.xa) + MOV32mr $rsp, 1, _, -4, _, $edi :: (store (s32) into %ir.xa) $eax = COPY killed $edi RETQ killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir index 8e19be67499b8..3951943b98f55 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir @@ -17,8 +17,8 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: [[@LINE+1]]:60: expected a pointer IR value - $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load 4 from %ir.b) + ; CHECK: [[@LINE+1]]:64: expected a pointer IR value + $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load (s32) from %ir.b) RETQ $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir b/llvm/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir index da6cb61d14ae1..1596fe3813734 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir @@ -19,12 +19,12 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: [[@LINE+1]]:71: expected an integer literal after 'align' - $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align -32) - $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, basealign 32) + ; CHECK: [[@LINE+1]]:75: expected an integer literal after 'align' + $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load (s128) from %ir.vec, align -32) + $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load (s128) from %ir.vec + 16, basealign 32) $xmm2 = FsFLD0SS $xmm1 = MOVSSrr killed $xmm1, killed $xmm2 - MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store 16 into %ir.vec, align 32) - MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store 16 into %ir.vec + 16, basealign 32) + MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec, align 32) + MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 32) RETQ ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-power-of-2-after-align.mir b/llvm/test/CodeGen/MIR/X86/expected-power-of-2-after-align.mir index 0842d96cac2bd..1cf57719fa876 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-power-of-2-after-align.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-power-of-2-after-align.mir @@ -6,7 +6,7 @@ body: | bb.0: %0:_(p0) = IMPLICIT_DEF - ; CHECK: [[@LINE+1]]:50: expected a power-of-2 literal after 'align' - %1:_(s64) = G_LOAD %0(p0) :: (load 8, align 0) + ; CHECK: [[@LINE+1]]:54: expected a power-of-2 literal after 'align' + %1:_(s64) = G_LOAD %0(p0) :: (load (s64), align 0) ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation2.mir b/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation2.mir index 4a80455425dd7..b7533f266bae1 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation2.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation2.mir @@ -17,7 +17,7 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: [[@LINE+1]]:53: expected the size integer literal or 'unknown-size' after memory operation + ; CHECK: [[@LINE+1]]:53: expected memory LLT, the size integer literal or 'unknown-size' after memory operation $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load . from %ir.a) RETQ $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir index 4beaa2477b1c6..02dd5b2337c7c 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir @@ -17,8 +17,8 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: [[@LINE+1]]:60: expected an IR value reference - $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load 4 from a) + ; CHECK: [[@LINE+1]]:64: expected an IR value reference + $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load (s32) from a) RETQ $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir b/llvm/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir index 1c6104675734d..cb31d32d1cefd 100644 --- a/llvm/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir @@ -31,9 +31,9 @@ body: | frame-setup PUSH32r undef $eax, implicit-def $esp, implicit $esp CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: name: test - ; CHECK: $eax = MOV32rm $esp, 1, $noreg, 8, $noreg :: (load 4 from %fixed-stack.0, align 16) - $eax = MOV32rm $esp, 1, _, 8, _ :: (load 4 from %fixed-stack.0, align 16) - MOV32mr $esp, 1, _, 0, _, $eax :: (store 4 into %ir.b) + ; CHECK: $eax = MOV32rm $esp, 1, $noreg, 8, $noreg :: (load (s32) from %fixed-stack.0, align 16) + $eax = MOV32rm $esp, 1, _, 8, _ :: (load (s32) from %fixed-stack.0, align 16) + MOV32mr $esp, 1, _, 0, _, $eax :: (store (s32) into %ir.b) $edx = POP32r implicit-def $esp, implicit $esp RETL $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/frame-info-stack-references.mir b/llvm/test/CodeGen/MIR/X86/frame-info-stack-references.mir index 4083e889cc0de..01e619a783347 100644 --- a/llvm/test/CodeGen/MIR/X86/frame-info-stack-references.mir +++ b/llvm/test/CodeGen/MIR/X86/frame-info-stack-references.mir @@ -57,7 +57,7 @@ body: | frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp $rsp = frame-setup SUB64ri8 $rsp, 32, implicit-def dead $eflags - $rbx = LOAD_STACK_GUARD :: (invariant load 8 from @__stack_chk_guard) + $rbx = LOAD_STACK_GUARD :: (invariant load (s64) from @__stack_chk_guard) MOV64mr $rsp, 1, _, 24, _, $rbx $rsi = LEA64r $rsp, 1, _, 19, _ MOV64mr $rsp, 1, _, 8, _, $rsi diff --git a/llvm/test/CodeGen/MIR/X86/machine-metadata.mir b/llvm/test/CodeGen/MIR/X86/machine-metadata.mir index b4993fcc59634..9e4f6d04a4bc2 100644 --- a/llvm/test/CodeGen/MIR/X86/machine-metadata.mir +++ b/llvm/test/CodeGen/MIR/X86/machine-metadata.mir @@ -76,22 +76,22 @@ body: | ; CHECK-LABEL: name: test_memcpy ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) - ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) - ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) - ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; CHECK: $eax = COPY [[ADD32rm]] ; CHECK: RET 0, $eax %1:gr64 = COPY $rsi %0:gr64 = COPY $rdi - %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) - %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) - MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) - MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) - %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store (s64) into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) $eax = COPY %5 RET 0, $eax @@ -113,22 +113,22 @@ body: | ; CHECK-LABEL: name: test_memcpy_inline ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) - ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) - ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) - ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; CHECK: $eax = COPY [[ADD32rm]] ; CHECK: RET 0, $eax %1:gr64 = COPY $rsi %0:gr64 = COPY $rdi - %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) - %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) - MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) - MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) - %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store (s64) into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) $eax = COPY %5 RET 0, $eax @@ -150,22 +150,22 @@ body: | ; CHECK-LABEL: name: test_mempcpy ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1, !alias.scope !5, !noalias !8) - ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1, !alias.scope !5, !noalias !8) - ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 1, !alias.scope !10, !noalias !11) - ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 1, !alias.scope !10, !noalias !11) - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 1, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 1, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 1, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 1, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; CHECK: $eax = COPY [[ADD32rm]] ; CHECK: RET 0, $eax %1:gr64 = COPY $rsi %0:gr64 = COPY $rdi - %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1, !alias.scope !5, !noalias !8) - %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1, !alias.scope !5, !noalias !8) - MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 1, !alias.scope !10, !noalias !11) - MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 1, !alias.scope !10, !noalias !11) - %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) - %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 1, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 1, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store (s64) into %ir.p0 + 8, align 1, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store (s64) into %ir.p0, align 1, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) $eax = COPY %5 RET 0, $eax diff --git a/llvm/test/CodeGen/MIR/X86/memory-operands.mir b/llvm/test/CodeGen/MIR/X86/memory-operands.mir index e4827a4f1bbad..dca4f9d693cb8 100644 --- a/llvm/test/CodeGen/MIR/X86/memory-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/memory-operands.mir @@ -200,10 +200,10 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: $eax = MOV32rm $rdi, 1, $noreg, 0, $noreg :: (load 4 from %ir.a) - ; CHECK-NEXT: MOV32mi killed $rdi, 1, $noreg, 0, $noreg, 42 :: (store 4 into %ir.a) - $eax = MOV32rm $rdi, 1, _, 0, _ :: (load 4 from %ir.a) - MOV32mi killed $rdi, 1, _, 0, _, 42 :: (store 4 into %ir.a) + ; CHECK: $eax = MOV32rm $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.a) + ; CHECK-NEXT: MOV32mi killed $rdi, 1, $noreg, 0, $noreg, 42 :: (store (s32) into %ir.a) + $eax = MOV32rm $rdi, 1, _, 0, _ :: (load (s32) from %ir.a) + MOV32mi killed $rdi, 1, _, 0, _, 42 :: (store (s32) into %ir.a) RETQ $eax ... --- @@ -214,8 +214,8 @@ liveins: body: | bb.0.entry2: liveins: $rdi - ; CHECK: INC32m killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (store 4 into %ir."a value"), (load 4 from %ir."a value") - INC32m killed $rdi, 1, _, 0, _, implicit-def dead $eflags :: (store 4 into %ir."a value"), (load 4 from %ir."a value") + ; CHECK: INC32m killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (store (s32) into %ir."a value"), (load (s32) from %ir."a value") + INC32m killed $rdi, 1, _, 0, _, implicit-def dead $eflags :: (store (s32) into %ir."a value"), (load (s32) from %ir."a value") RETQ ... --- @@ -232,11 +232,11 @@ body: | liveins: $rdi ; Verify that the unnamed local values can be serialized. ; CHECK-LABEL: name: test3 - ; CHECK: $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (load 4 from %ir.0) - ; CHECK: MOV32mr $rsp, 1, $noreg, -4, $noreg, killed $eax :: (store 4 into %ir.1) - $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load 4 from %ir.0) + ; CHECK: $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0) + ; CHECK: MOV32mr $rsp, 1, $noreg, -4, $noreg, killed $eax :: (store (s32) into %ir.1) + $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load (s32) from %ir.0) $eax = INC32r killed $eax, implicit-def dead $eflags - MOV32mr $rsp, 1, _, -4, _, killed $eax :: (store 4 into %ir.1) + MOV32mr $rsp, 1, _, -4, _, killed $eax :: (store (s32) into %ir.1) RETQ ... --- @@ -248,11 +248,11 @@ body: | bb.0.entry: liveins: $rdi ; CHECK: name: volatile_inc - ; CHECK: $eax = MOV32rm $rdi, 1, $noreg, 0, $noreg :: (volatile load 4 from %ir.x) - ; CHECK: MOV32mr killed $rdi, 1, $noreg, 0, $noreg, $eax :: (volatile store 4 into %ir.x) - $eax = MOV32rm $rdi, 1, _, 0, _ :: (volatile load 4 from %ir.x) + ; CHECK: $eax = MOV32rm $rdi, 1, $noreg, 0, $noreg :: (volatile load (s32) from %ir.x) + ; CHECK: MOV32mr killed $rdi, 1, $noreg, 0, $noreg, $eax :: (volatile store (s32) into %ir.x) + $eax = MOV32rm $rdi, 1, _, 0, _ :: (volatile load (s32) from %ir.x) $eax = INC32r killed $eax, implicit-def dead $eflags - MOV32mr killed $rdi, 1, _, 0, _, $eax :: (volatile store 4 into %ir.x) + MOV32mr killed $rdi, 1, _, 0, _, $eax :: (volatile store (s32) into %ir.x) RETQ $eax ... --- @@ -265,8 +265,8 @@ body: | bb.0.entry: liveins: $esi, $rdi ; CHECK: name: non_temporal_store - ; CHECK: MOVNTImr killed $rdi, 1, $noreg, 0, $noreg, killed $esi :: (non-temporal store 4 into %ir.a) - MOVNTImr killed $rdi, 1, _, 0, _, killed $esi :: (non-temporal store 4 into %ir.a) + ; CHECK: MOVNTImr killed $rdi, 1, $noreg, 0, $noreg, killed $esi :: (non-temporal store (s32) into %ir.a) + MOVNTImr killed $rdi, 1, _, 0, _, killed $esi :: (non-temporal store (s32) into %ir.a) RETQ ... --- @@ -278,8 +278,8 @@ body: | bb.0.entry: liveins: $rdi ; CHECK: name: invariant_load - ; CHECK: $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (invariant load 4 from %ir.x) - $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (invariant load 4 from %ir.x) + ; CHECK: $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (invariant load (s32) from %ir.x) + $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (invariant load (s32) from %ir.x) RETQ $eax ... --- @@ -291,16 +291,16 @@ body: | bb.0.entry: liveins: $rdi ; CHECK: name: memory_offset - ; CHECK: $xmm0 = MOVAPSrm $rdi, 1, $noreg, 0, $noreg :: (load 16 from %ir.vec) - ; CHECK-NEXT: $xmm1 = MOVAPSrm $rdi, 1, $noreg, 16, $noreg :: (load 16 from %ir.vec + 16) - ; CHECK: MOVAPSmr $rdi, 1, $noreg, 0, $noreg, killed $xmm0 :: (store 16 into %ir.vec) - ; CHECK-NEXT: MOVAPSmr killed $rdi, 1, $noreg, 16, $noreg, killed $xmm1 :: (store 16 into %ir.vec + 16) - $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load 16 from %ir.vec) - $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16) + ; CHECK: $xmm0 = MOVAPSrm $rdi, 1, $noreg, 0, $noreg :: (load (s128) from %ir.vec) + ; CHECK-NEXT: $xmm1 = MOVAPSrm $rdi, 1, $noreg, 16, $noreg :: (load (s128) from %ir.vec + 16) + ; CHECK: MOVAPSmr $rdi, 1, $noreg, 0, $noreg, killed $xmm0 :: (store (s128) into %ir.vec) + ; CHECK-NEXT: MOVAPSmr killed $rdi, 1, $noreg, 16, $noreg, killed $xmm1 :: (store (s128) into %ir.vec + 16) + $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load (s128) from %ir.vec) + $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load (s128) from %ir.vec + 16) $xmm2 = FsFLD0SS $xmm1 = MOVSSrr killed $xmm1, killed $xmm2 - MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store 16 into %ir.vec) - MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store 16 into %ir.vec + 16) + MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec) + MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16) RETQ ... --- @@ -312,24 +312,24 @@ body: | bb.0.entry: liveins: $rdi ; CHECK: name: memory_alignment - ; CHECK: $xmm0 = MOVAPSrm $rdi, 1, $noreg, 0, $noreg :: (load 16 from %ir.vec, align 64) - ; CHECK-NEXT: $xmm1 = MOVAPSrm $rdi, 1, $noreg, 16, $noreg :: (load 16 from %ir.vec + 16, basealign 64) - ; CHECK-NEXT: $xmm2 = MOVAPSrm $rdi, 1, $noreg, 32, $noreg :: (load 16 from %ir.vec + 32, align 32, basealign 64) - ; CHECK-NEXT: $xmm3 = MOVAPSrm $rdi, 1, $noreg, 48, $noreg :: (load 16 from %ir.vec + 48, basealign 64) - ; CHECK: MOVAPSmr $rdi, 1, $noreg, 0, $noreg, killed $xmm0 :: (store 16 into %ir.vec, align 64) - ; CHECK-NEXT: MOVAPSmr $rdi, 1, $noreg, 16, $noreg, killed $xmm1 :: (store 16 into %ir.vec + 16, basealign 64) - ; CHECK-NEXT: MOVAPSmr $rdi, 1, $noreg, 32, $noreg, killed $xmm2 :: (store 16 into %ir.vec + 32, align 32, basealign 64) - ; CHECK-NEXT: MOVAPSmr killed $rdi, 1, $noreg, 48, $noreg, killed $xmm3 :: (store 16 into %ir.vec + 48, basealign 64) - $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align 64) - $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, basealign 64) - $xmm2 = MOVAPSrm $rdi, 1, _, 32, _ :: (load 16 from %ir.vec + 32, align 32, basealign 64) - $xmm3 = MOVAPSrm $rdi, 1, _, 48, _ :: (load 16 from %ir.vec + 48, basealign 64) + ; CHECK: $xmm0 = MOVAPSrm $rdi, 1, $noreg, 0, $noreg :: (load (s128) from %ir.vec, align 64) + ; CHECK-NEXT: $xmm1 = MOVAPSrm $rdi, 1, $noreg, 16, $noreg :: (load (s128) from %ir.vec + 16, basealign 64) + ; CHECK-NEXT: $xmm2 = MOVAPSrm $rdi, 1, $noreg, 32, $noreg :: (load (s128) from %ir.vec + 32, align 32, basealign 64) + ; CHECK-NEXT: $xmm3 = MOVAPSrm $rdi, 1, $noreg, 48, $noreg :: (load (s128) from %ir.vec + 48, basealign 64) + ; CHECK: MOVAPSmr $rdi, 1, $noreg, 0, $noreg, killed $xmm0 :: (store (s128) into %ir.vec, align 64) + ; CHECK-NEXT: MOVAPSmr $rdi, 1, $noreg, 16, $noreg, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 64) + ; CHECK-NEXT: MOVAPSmr $rdi, 1, $noreg, 32, $noreg, killed $xmm2 :: (store (s128) into %ir.vec + 32, align 32, basealign 64) + ; CHECK-NEXT: MOVAPSmr killed $rdi, 1, $noreg, 48, $noreg, killed $xmm3 :: (store (s128) into %ir.vec + 48, basealign 64) + $xmm0 = MOVAPSrm $rdi, 1, _, 0, _ :: (load (s128) from %ir.vec, align 64) + $xmm1 = MOVAPSrm $rdi, 1, _, 16, _ :: (load (s128) from %ir.vec + 16, basealign 64) + $xmm2 = MOVAPSrm $rdi, 1, _, 32, _ :: (load (s128) from %ir.vec + 32, align 32, basealign 64) + $xmm3 = MOVAPSrm $rdi, 1, _, 48, _ :: (load (s128) from %ir.vec + 48, basealign 64) $xmm4 = FsFLD0SS $xmm1 = MOVSSrr killed $xmm1, killed $xmm4 - MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store 16 into %ir.vec, align 64) - MOVAPSmr $rdi, 1, _, 16, _, killed $xmm1 :: (store 16 into %ir.vec + 16, basealign 64) - MOVAPSmr $rdi, 1, _, 32, _, killed $xmm2 :: (store 16 into %ir.vec + 32, align 32, basealign 64) - MOVAPSmr killed $rdi, 1, _, 48, _, killed $xmm3 :: (store 16 into %ir.vec + 48, basealign 64) + MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec, align 64) + MOVAPSmr $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 64) + MOVAPSmr $rdi, 1, _, 32, _, killed $xmm2 :: (store (s128) into %ir.vec + 32, align 32, basealign 64) + MOVAPSmr killed $rdi, 1, _, 48, _, killed $xmm3 :: (store (s128) into %ir.vec + 48, basealign 64) RETQ ... --- @@ -344,10 +344,10 @@ body: | bb.0.entry: liveins: $xmm0 ; CHECK: name: constant_pool_psv - ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load 8 from constant-pool) - ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load 8 from constant-pool + 8) - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load 8 from constant-pool) - $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load 8 from constant-pool + 8) + ; CHECK: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load (s64) from constant-pool) + ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load (s64) from constant-pool + 8) + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load (s64) from constant-pool) + $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load (s64) from constant-pool + 8) RETQ $xmm0 ... --- @@ -367,8 +367,8 @@ body: | CFI_INSTRUCTION def_cfa_offset 32 LD_F80m $rsp, 1, $noreg, 32, $noreg, implicit-def dead $fpsw, implicit $fpcw ; CHECK: name: stack_psv - ; CHECK: ST_FP80m $rsp, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into stack, align 16) - ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into stack, align 16) + ; CHECK: ST_FP80m $rsp, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (store (s80) into stack, align 16) + ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw, implicit $fpcw :: (store (s80) into stack, align 16) CALL64pcrel32 &cosl, csr_64, implicit $rsp, implicit-def $rsp, implicit-def $fp0 $rsp = ADD64ri8 $rsp, 24, implicit-def dead $eflags RETQ @@ -379,8 +379,8 @@ tracksRegLiveness: true body: | bb.0.entry: ; CHECK: name: got_psv - ; CHECK: $rax = MOV64rm $rip, 1, $noreg, @G, $noreg :: (load 8 from got) - $rax = MOV64rm $rip, 1, _, @G, _ :: (load 8 from got) + ; CHECK: $rax = MOV64rm $rip, 1, $noreg, @G, $noreg :: (load (s64) from got) + $rax = MOV64rm $rip, 1, _, @G, _ :: (load (s64) from got) $eax = MOV32rm killed $rax, 1, _, 0, _ $eax = INC32r killed $eax, implicit-def dead $eflags RETQ $eax @@ -392,11 +392,11 @@ body: | bb.0.entry: $rax = MOV64rm $rip, 1, _, @G, _ ; CHECK-LABEL: name: global_value - ; CHECK: $eax = MOV32rm killed $rax, 1, $noreg, 0, $noreg, implicit-def $rax :: (load 4 from @G) - ; CHECK: $ecx = MOV32rm killed $rcx, 1, $noreg, 0, $noreg, implicit-def $rcx :: (load 4 from @0) - $eax = MOV32rm killed $rax, 1, _, 0, _, implicit-def $rax :: (load 4 from @G) + ; CHECK: $eax = MOV32rm killed $rax, 1, $noreg, 0, $noreg, implicit-def $rax :: (load (s32) from @G) + ; CHECK: $ecx = MOV32rm killed $rcx, 1, $noreg, 0, $noreg, implicit-def $rcx :: (load (s32) from @0) + $eax = MOV32rm killed $rax, 1, _, 0, _, implicit-def $rax :: (load (s32) from @G) $rcx = MOV64rm $rip, 1, _, @0, _ - $ecx = MOV32rm killed $rcx, 1, _, 0, _, implicit-def $rcx :: (load 4 from @0) + $ecx = MOV32rm killed $rcx, 1, _, 0, _, implicit-def $rcx :: (load (s32) from @0) $eax = LEA64_32r killed $rax, 1, killed $rcx, 1, _ RETQ $eax ... @@ -425,8 +425,8 @@ body: | $rcx = LEA64r $rip, 1, _, %jump-table.0, _ ; CHECK: name: jumptable_psv - ; CHECK: $rax = MOVSX64rm32 $rcx, 4, killed $rax, 0, $noreg :: (load 4 from jump-table, align 8) - $rax = MOVSX64rm32 $rcx, 4, killed $rax, 0, _ :: (load 4 from jump-table, align 8) + ; CHECK: $rax = MOVSX64rm32 $rcx, 4, killed $rax, 0, $noreg :: (load (s32) from jump-table, align 8) + $rax = MOVSX64rm32 $rcx, 4, killed $rax, 0, _ :: (load (s32) from jump-table, align 8) $rax = ADD64rr killed $rax, killed $rcx, implicit-def dead $eflags JMP64r killed $rax @@ -455,12 +455,12 @@ name: tbaa_metadata tracksRegLiveness: true body: | bb.0.entry: - $rax = MOV64rm $rip, 1, _, @a, _ :: (load 8 from got) + $rax = MOV64rm $rip, 1, _, @a, _ :: (load (s64) from got) ; CHECK-LABEL: name: tbaa_metadata - ; CHECK: $eax = MOV32rm killed $rax, 1, $noreg, 0, $noreg, implicit-def $rax :: (load 4 from @a, !tbaa !2) - ; CHECK-NEXT: $eax = MOV32rm killed $rax, 1, $noreg, 0, $noreg :: (load 4 from %ir.total_len2, !tbaa !6) - $eax = MOV32rm killed $rax, 1, _, 0, _, implicit-def $rax :: (load 4 from @a, !tbaa !2) - $eax = MOV32rm killed $rax, 1, _, 0, _ :: (load 4 from %ir.total_len2, !tbaa !6) + ; CHECK: $eax = MOV32rm killed $rax, 1, $noreg, 0, $noreg, implicit-def $rax :: (load (s32) from @a, !tbaa !2) + ; CHECK-NEXT: $eax = MOV32rm killed $rax, 1, $noreg, 0, $noreg :: (load (s32) from %ir.total_len2, !tbaa !6) + $eax = MOV32rm killed $rax, 1, _, 0, _, implicit-def $rax :: (load (s32) from @a, !tbaa !2) + $eax = MOV32rm killed $rax, 1, _, 0, _ :: (load (s32) from %ir.total_len2, !tbaa !6) RETQ $eax ... --- @@ -473,12 +473,12 @@ body: | bb.0.entry: liveins: $rdi, $rsi ; CHECK-LABEL: name: aa_scope - ; CHECK: $xmm0 = MOVSSrm_alt $rsi, 1, $noreg, 0, $noreg :: (load 4 from %ir.c, !alias.scope !9) - $xmm0 = MOVSSrm_alt $rsi, 1, _, 0, _ :: (load 4 from %ir.c, !alias.scope !9) - ; CHECK-NEXT: MOVSSmr $rdi, 1, $noreg, 20, $noreg, killed $xmm0 :: (store 4 into %ir.arrayidx.i, !noalias !9) - MOVSSmr $rdi, 1, _, 20, _, killed $xmm0 :: (store 4 into %ir.arrayidx.i, !noalias !9) - $xmm0 = MOVSSrm_alt killed $rsi, 1, _, 0, _ :: (load 4 from %ir.c) - MOVSSmr killed $rdi, 1, _, 28, _, killed $xmm0 :: (store 4 into %ir.arrayidx) + ; CHECK: $xmm0 = MOVSSrm_alt $rsi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.c, !alias.scope !9) + $xmm0 = MOVSSrm_alt $rsi, 1, _, 0, _ :: (load (s32) from %ir.c, !alias.scope !9) + ; CHECK-NEXT: MOVSSmr $rdi, 1, $noreg, 20, $noreg, killed $xmm0 :: (store (s32) into %ir.arrayidx.i, !noalias !9) + MOVSSmr $rdi, 1, _, 20, _, killed $xmm0 :: (store (s32) into %ir.arrayidx.i, !noalias !9) + $xmm0 = MOVSSrm_alt killed $rsi, 1, _, 0, _ :: (load (s32) from %ir.c) + MOVSSmr killed $rdi, 1, _, 28, _, killed $xmm0 :: (store (s32) into %ir.arrayidx) RETQ ... --- @@ -490,8 +490,8 @@ body: | bb.0.entry: liveins: $rdi ; CHECK-LABEL: name: range_metadata - ; CHECK: $al = MOV8rm killed $rdi, 1, $noreg, 0, $noreg :: (load 1 from %ir.x, !range !11) - $al = MOV8rm killed $rdi, 1, _, 0, _ :: (load 1 from %ir.x, !range !11) + ; CHECK: $al = MOV8rm killed $rdi, 1, $noreg, 0, $noreg :: (load (s8) from %ir.x, !range !11) + $al = MOV8rm killed $rdi, 1, _, 0, _ :: (load (s8) from %ir.x, !range !11) RETQ $al ... --- @@ -503,10 +503,10 @@ body: | bb.0.entry: liveins: $rdi - $rax = MOV64rm $rip, 1, _, @values, _ :: (load 8 from got) + $rax = MOV64rm $rip, 1, _, @values, _ :: (load (s64) from got) ; CHECK-LABEL: gep_value - ; CHECK: MOV32mr killed $rax, 1, $noreg, 0, $noreg, $edi, implicit killed $rdi :: (store 4 into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16) - MOV32mr killed $rax, 1, _, 0, _, $edi, implicit killed $rdi :: (store 4 into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16) + ; CHECK: MOV32mr killed $rax, 1, $noreg, 0, $noreg, $edi, implicit killed $rdi :: (store (s32) into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16) + MOV32mr killed $rax, 1, _, 0, _, $edi, implicit killed $rdi :: (store (s32) into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16) RETQ ... --- @@ -515,32 +515,32 @@ tracksRegLiveness: true body: | bb.0.entry: ; CHECK-LABEL: name: undef_value - ; CHECK: $rax = MOV64rm undef $rax, 1, $noreg, 0, $noreg :: (load 8 from `i8** undef`) - $rax = MOV64rm undef $rax, 1, _, 0, _ :: (load 8 from `i8** undef`) + ; CHECK: $rax = MOV64rm undef $rax, 1, $noreg, 0, $noreg :: (load (s64) from `i8** undef`) + $rax = MOV64rm undef $rax, 1, _, 0, _ :: (load (s64) from `i8** undef`) RETQ $rax ... --- # Test memory operand without associated value. # CHECK-LABEL: name: dummy0 -# CHECK: $rax = MOV64rm undef $rax, 1, $noreg, 0, $noreg :: (load 8) +# CHECK: $rax = MOV64rm undef $rax, 1, $noreg, 0, $noreg :: (load (s64)) name: dummy0 tracksRegLiveness: true body: | bb.0: - $rax = MOV64rm undef $rax, 1, _, 0, _ :: (load 8) + $rax = MOV64rm undef $rax, 1, _, 0, _ :: (load (s64)) RETQ $rax ... --- # Test parsing of stack references in machine memory operands. # CHECK-LABEL: name: dummy1 -# CHECK: $rax = MOV64rm $rsp, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +# CHECK: $rax = MOV64rm $rsp, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) name: dummy1 tracksRegLiveness: true stack: - { id: 0, size: 4, alignment: 4 } body: | bb.0: - $rax = MOV64rm $rsp, 1, _, 0, _ :: (load 8 from %stack.0) + $rax = MOV64rm $rsp, 1, _, 0, _ :: (load (s64) from %stack.0) RETQ $rax ... --- diff --git a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir index a0a2f9e378efa..ebd29f917ffb6 100644 --- a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir +++ b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir @@ -33,8 +33,8 @@ body: | %tmp1:_(s32) = G_CONSTANT i32 0 %tmp5:_(p0) = G_FRAME_INDEX %stack.0.tmp %tmp6:_(p0) = G_FRAME_INDEX %stack.1.tmp1 - G_STORE %tmp0(s32), %tmp5(p0) :: (store 4 into %ir.tmp) - %tmp7:_(s32) = G_LOAD %tmp5(p0) :: (load 4 from %ir.tmp) + G_STORE %tmp0(s32), %tmp5(p0) :: (store (s32) into %ir.tmp) + %tmp7:_(s32) = G_LOAD %tmp5(p0) :: (load (s32) from %ir.tmp) %tmp8:_(s1) = G_ICMP intpred(ne), %tmp7(s32), %tmp1 G_BRCOND %tmp8(s1), %bb.1 G_BR %bb.2 @@ -43,7 +43,7 @@ body: | ; CHECK: %bb2_{{[0-9]+}}__1:_(s32) = G_CONSTANT bb.1: %tmp4:_(s32) = G_CONSTANT i32 1 - G_STORE %tmp4(s32), %tmp6(p0) :: (store 4 into %ir.tmp1) + G_STORE %tmp4(s32), %tmp6(p0) :: (store (s32) into %ir.tmp1) G_BR %bb.3 @@ -51,12 +51,12 @@ body: | ; CHECK: %bb1_{{[0-9]+}}__1:_(s32) = G_CONSTANT bb.2: %tmp3:_(s32) = G_CONSTANT i32 2 - G_STORE %tmp3(s32), %tmp6(p0) :: (store 4 into %ir.tmp1) + G_STORE %tmp3(s32), %tmp6(p0) :: (store (s32) into %ir.tmp1) ; CHECK: bb.3: ; CHECK: %bb3_{{[0-9]+}}__1:_(s32) = G_LOAD bb.3: - %tmp9:_(s32) = G_LOAD %tmp6(p0) :: (load 4 from %ir.tmp1) + %tmp9:_(s32) = G_LOAD %tmp6(p0) :: (load (s32) from %ir.tmp1) $eax = COPY %tmp9(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/MIR/X86/roundtrip.mir b/llvm/test/CodeGen/MIR/X86/roundtrip.mir index 94e562f5511d7..b703be0a827ee 100644 --- a/llvm/test/CodeGen/MIR/X86/roundtrip.mir +++ b/llvm/test/CodeGen/MIR/X86/roundtrip.mir @@ -8,13 +8,13 @@ # CHECK: bb.0: # CHECK: %0:gr32 = MOV32r0 implicit-def $eflags # CHECK: dead %1:gr32 = COPY %0 -# CHECK: MOV32mr undef $rcx, 1, $noreg, 0, $noreg, killed %0 :: (volatile store 4) +# CHECK: MOV32mr undef $rcx, 1, $noreg, 0, $noreg, killed %0 :: (volatile store (s32)) # CHECK: RETQ undef $eax name: func0 body: | bb.0: %0 : gr32 = MOV32r0 implicit-def $eflags dead %1 : gr32 = COPY %0 - MOV32mr undef $rcx, 1, _, 0, _, killed %0 :: (volatile store 4) + MOV32mr undef $rcx, 1, _, 0, _, killed %0 :: (volatile store (s32)) RETQ undef $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir index 4ad8519d268aa..e56873d52d607 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir @@ -17,8 +17,8 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: [[@LINE+1]]:60: use of undefined IR value '%ir.c' - $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load 4 from %ir.c) + ; CHECK: [[@LINE+1]]:64: use of undefined IR value '%ir.c' + $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load (s32) from %ir.c) RETQ $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir b/llvm/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir index 9c17655dd8bba..85abb2fcfdffe 100644 --- a/llvm/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir +++ b/llvm/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir @@ -17,9 +17,9 @@ liveins: body: | bb.0.entry: liveins: $rdi - ; CHECK: [[@LINE+1]]:60: use of unknown metadata keyword '!tba' - $eax = MOV32rm $rdi, 1, _, 0, _ :: (load 4 from %ir.x, !tba !0) + ; CHECK: [[@LINE+1]]:64: use of unknown metadata keyword '!tba' + $eax = MOV32rm $rdi, 1, _, 0, _ :: (load (s32) from %ir.x, !tba !0) $eax = INC32r killed $eax, implicit-def dead $eflags - MOV32mr killed $rdi, 1, _, 0, _, $eax :: (store 4 into %ir.x) + MOV32mr killed $rdi, 1, _, 0, _, $eax :: (store (s32) into %ir.x) RETQ $eax ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/add_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/add_vec.mir index 6903f01ae5ade..e578cf9746fe4 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/add_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/add_vec.mir @@ -23,18 +23,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load (<16 x s8>) from %ir.b) ; P5600: [[ADDV_B:%[0-9]+]]:msa128b = ADDV_B [[LD_B1]], [[LD_B]] - ; P5600: ST_B [[ADDV_B]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_B [[ADDV_B]], [[COPY2]], 0 :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:fprb(<16 x s8>) = G_ADD %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -53,18 +53,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load (<8 x s16>) from %ir.b) ; P5600: [[ADDV_H:%[0-9]+]]:msa128h = ADDV_H [[LD_H1]], [[LD_H]] - ; P5600: ST_H [[ADDV_H]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_H [[ADDV_H]], [[COPY2]], 0 :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:fprb(<8 x s16>) = G_ADD %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -83,18 +83,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[ADDV_W:%[0-9]+]]:msa128w = ADDV_W [[LD_W1]], [[LD_W]] - ; P5600: ST_W [[ADDV_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[ADDV_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_ADD %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -113,18 +113,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[ADDV_D:%[0-9]+]]:msa128d = ADDV_D [[LD_D1]], [[LD_D]] - ; P5600: ST_D [[ADDV_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[ADDV_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_ADD %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fabs_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fabs_vec.mir index ed56cdc1744f5..308c18f8f8711 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fabs_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fabs_vec.mir @@ -20,15 +20,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) ; P5600: [[FABS_W:%[0-9]+]]:msa128w = FABS_W [[LD_W]] - ; P5600: ST_W [[FABS_W]], [[COPY1]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[FABS_W]], [[COPY1]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:fprb(<4 x s32>) = G_FABS %2 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -46,15 +46,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) ; P5600: [[FABS_D:%[0-9]+]]:msa128d = FABS_D [[LD_D]] - ; P5600: ST_D [[FABS_D]], [[COPY1]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[FABS_D]], [[COPY1]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:fprb(<2 x s64>) = G_FABS %2 - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fence.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fence.mir index 8595c136eef15..f569ec81c2c68 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fence.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fence.mir @@ -18,12 +18,12 @@ body: | ; MIPS32-LABEL: name: atomic_load_i32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[COPY]], 0 :: (load monotonic 4 from %ir.ptr) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[COPY]], 0 :: (load monotonic (s32) from %ir.ptr) ; MIPS32: SYNC 0 ; MIPS32: $v0 = COPY [[LW]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 - %1:gprb(s32) = G_LOAD %0(p0) :: (load monotonic 4 from %ir.ptr) + %1:gprb(s32) = G_LOAD %0(p0) :: (load monotonic (s32) from %ir.ptr) G_FENCE 4, 1 $v0 = COPY %1(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/floating_point_vec_arithmetic_operations.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/floating_point_vec_arithmetic_operations.mir index 4b49d6f46b530..1fcba134704a5 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/floating_point_vec_arithmetic_operations.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/floating_point_vec_arithmetic_operations.mir @@ -30,18 +30,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[FADD_W:%[0-9]+]]:msa128w = FADD_W [[LD_W]], [[LD_W1]] - ; P5600: ST_W [[FADD_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[FADD_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_FADD %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -60,18 +60,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[FADD_D:%[0-9]+]]:msa128d = FADD_D [[LD_D]], [[LD_D1]] - ; P5600: ST_D [[FADD_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[FADD_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_FADD %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -90,18 +90,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[FSUB_W:%[0-9]+]]:msa128w = FSUB_W [[LD_W]], [[LD_W1]] - ; P5600: ST_W [[FSUB_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[FSUB_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_FSUB %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -120,18 +120,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[FSUB_D:%[0-9]+]]:msa128d = FSUB_D [[LD_D]], [[LD_D1]] - ; P5600: ST_D [[FSUB_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[FSUB_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_FSUB %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -150,18 +150,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[FMUL_W:%[0-9]+]]:msa128w = FMUL_W [[LD_W]], [[LD_W1]] - ; P5600: ST_W [[FMUL_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[FMUL_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_FMUL %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -180,18 +180,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[FMUL_D:%[0-9]+]]:msa128d = FMUL_D [[LD_D]], [[LD_D1]] - ; P5600: ST_D [[FMUL_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[FMUL_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_FMUL %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -210,18 +210,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[FDIV_W:%[0-9]+]]:msa128w = FDIV_W [[LD_W]], [[LD_W1]] - ; P5600: ST_W [[FDIV_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[FDIV_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_FDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -240,18 +240,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[FDIV_D:%[0-9]+]]:msa128d = FDIV_D [[LD_D]], [[LD_D1]] - ; P5600: ST_D [[FDIV_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[FDIV_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_FDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fsqrt_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fsqrt_vec.mir index 5a81540f2947f..2fdae65b0a5c5 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fsqrt_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fsqrt_vec.mir @@ -20,15 +20,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) ; P5600: [[FSQRT_W:%[0-9]+]]:msa128w = FSQRT_W [[LD_W]] - ; P5600: ST_W [[FSQRT_W]], [[COPY1]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[FSQRT_W]], [[COPY1]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:fprb(<4 x s32>) = G_FSQRT %2 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -46,15 +46,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) ; P5600: [[FSQRT_D:%[0-9]+]]:msa128d = FSQRT_D [[LD_D]] - ; P5600: ST_D [[FSQRT_D]], [[COPY1]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[FSQRT_D]], [[COPY1]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:fprb(<2 x s64>) = G_FSQRT %2 - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address_pic.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address_pic.mir index 0babd2bd5c29d..13f76532cb8ef 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address_pic.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address_pic.mir @@ -54,7 +54,7 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got-call) @f :: (load 4 from got) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got-call) @f :: (load (s32) from got) ; MIPS32: $a0 = COPY [[COPY]] ; MIPS32: $a1 = COPY [[COPY1]] ; MIPS32: $gp = COPY [[ADDu1]] @@ -95,7 +95,7 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @f_with_local_linkage :: (load 4 from got) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @f_with_local_linkage :: (load (s32) from got) ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LW]], target-flags(mips-abs-lo) @f_with_local_linkage ; MIPS32: $a0 = COPY [[COPY]] ; MIPS32: $a1 = COPY [[COPY1]] @@ -131,12 +131,12 @@ body: | ; MIPS32-LABEL: name: ret_global_int ; MIPS32: liveins: $t9, $v0 ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @val :: (load 4 from got) - ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[LW]], 0 :: (load 4 from @val) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @val :: (load (s32) from got) + ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[LW]], 0 :: (load (s32) from @val) ; MIPS32: $v0 = COPY [[LW1]] ; MIPS32: RetRA implicit $v0 %1:gprb(p0) = G_GLOBAL_VALUE @val - %0:gprb(s32) = G_LOAD %1(p0) :: (load 4 from @val) + %0:gprb(s32) = G_LOAD %1(p0) :: (load (s32) from @val) $v0 = COPY %0(s32) RetRA implicit $v0 @@ -152,13 +152,13 @@ body: | ; MIPS32-LABEL: name: ret_global_int_with_local_linkage ; MIPS32: liveins: $t9, $v0 ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @val_with_local_linkage :: (load 4 from got) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @val_with_local_linkage :: (load (s32) from got) ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LW]], target-flags(mips-abs-lo) @val_with_local_linkage - ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from @val_with_local_linkage) + ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (s32) from @val_with_local_linkage) ; MIPS32: $v0 = COPY [[LW1]] ; MIPS32: RetRA implicit $v0 %1:gprb(p0) = G_GLOBAL_VALUE @val_with_local_linkage - %0:gprb(s32) = G_LOAD %1(p0) :: (load 4 from @val_with_local_linkage) + %0:gprb(s32) = G_LOAD %1(p0) :: (load (s32) from @val_with_local_linkage) $v0 = COPY %0(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/jump_table_and_brjt.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/jump_table_and_brjt.mir index 34a21c371a1b7..b8450fffdb98d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/jump_table_and_brjt.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/jump_table_and_brjt.mir @@ -92,7 +92,7 @@ body: | ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) %jump-table.0 ; MIPS32: [[SLL:%[0-9]+]]:gpr32 = SLL [[SUBu]], 2 ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu [[LUi]], [[SLL]] - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-abs-lo) %jump-table.0 :: (load 4) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-abs-lo) %jump-table.0 :: (load (s32)) ; MIPS32: PseudoIndirectBranch [[LW]] ; MIPS32: bb.2.sw.bb: ; MIPS32: $v0 = COPY [[ORi4]] @@ -120,7 +120,7 @@ body: | ; MIPS32: [[LUi1:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) %jump-table.1 ; MIPS32: [[SLL1:%[0-9]+]]:gpr32 = SLL [[SUBu1]], 2 ; MIPS32: [[ADDu1:%[0-9]+]]:gpr32 = ADDu [[LUi1]], [[SLL1]] - ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDu1]], target-flags(mips-abs-lo) %jump-table.1 :: (load 4) + ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDu1]], target-flags(mips-abs-lo) %jump-table.1 :: (load (s32)) ; MIPS32: PseudoIndirectBranch [[LW1]] ; MIPS32: bb.9.sw.bb4: ; MIPS32: $v0 = COPY [[ORi4]] @@ -156,10 +156,10 @@ body: | ; MIPS32_PIC: BNE [[ANDi]], $zero, %bb.6, implicit-def $at ; MIPS32_PIC: bb.1.entry: ; MIPS32_PIC: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) - ; MIPS32_PIC: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) %jump-table.0 :: (load 4 from got) + ; MIPS32_PIC: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) %jump-table.0 :: (load (s32) from got) ; MIPS32_PIC: [[SLL:%[0-9]+]]:gpr32 = SLL [[SUBu]], 2 ; MIPS32_PIC: [[ADDu1:%[0-9]+]]:gpr32 = ADDu [[LW]], [[SLL]] - ; MIPS32_PIC: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDu1]], target-flags(mips-abs-lo) %jump-table.0 :: (load 4) + ; MIPS32_PIC: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDu1]], target-flags(mips-abs-lo) %jump-table.0 :: (load (s32)) ; MIPS32_PIC: [[ADDu2:%[0-9]+]]:gpr32 = ADDu [[LW1]], [[ADDu]] ; MIPS32_PIC: PseudoIndirectBranch [[ADDu2]] ; MIPS32_PIC: bb.2.sw.bb: @@ -185,10 +185,10 @@ body: | ; MIPS32_PIC: BNE [[ANDi1]], $zero, %bb.13, implicit-def $at ; MIPS32_PIC: bb.8.sw.epilog: ; MIPS32_PIC: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) - ; MIPS32_PIC: [[LW2:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) %jump-table.1 :: (load 4 from got) + ; MIPS32_PIC: [[LW2:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) %jump-table.1 :: (load (s32) from got) ; MIPS32_PIC: [[SLL1:%[0-9]+]]:gpr32 = SLL [[SUBu1]], 2 ; MIPS32_PIC: [[ADDu3:%[0-9]+]]:gpr32 = ADDu [[LW2]], [[SLL1]] - ; MIPS32_PIC: [[LW3:%[0-9]+]]:gpr32 = LW [[ADDu3]], target-flags(mips-abs-lo) %jump-table.1 :: (load 4) + ; MIPS32_PIC: [[LW3:%[0-9]+]]:gpr32 = LW [[ADDu3]], target-flags(mips-abs-lo) %jump-table.1 :: (load (s32)) ; MIPS32_PIC: [[ADDu4:%[0-9]+]]:gpr32 = ADDu [[LW3]], [[ADDu]] ; MIPS32_PIC: PseudoIndirectBranch [[ADDu4]] ; MIPS32_PIC: bb.9.sw.bb4: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load.mir index 7039b271f7345..0540d41e10b42 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load.mir @@ -21,17 +21,17 @@ body: | ; MIPS32FP32-LABEL: name: load_i32 ; MIPS32FP32: liveins: $a0 ; MIPS32FP32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32FP32: [[LW:%[0-9]+]]:gpr32 = LW [[COPY]], 0 :: (load 4 from %ir.ptr) + ; MIPS32FP32: [[LW:%[0-9]+]]:gpr32 = LW [[COPY]], 0 :: (load (s32) from %ir.ptr) ; MIPS32FP32: $v0 = COPY [[LW]] ; MIPS32FP32: RetRA implicit $v0 ; MIPS32FP64-LABEL: name: load_i32 ; MIPS32FP64: liveins: $a0 ; MIPS32FP64: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32FP64: [[LW:%[0-9]+]]:gpr32 = LW [[COPY]], 0 :: (load 4 from %ir.ptr) + ; MIPS32FP64: [[LW:%[0-9]+]]:gpr32 = LW [[COPY]], 0 :: (load (s32) from %ir.ptr) ; MIPS32FP64: $v0 = COPY [[LW]] ; MIPS32FP64: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 - %1:gprb(s32) = G_LOAD %0(p0) :: (load 4 from %ir.ptr) + %1:gprb(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.ptr) $v0 = COPY %1(s32) RetRA implicit $v0 @@ -49,17 +49,17 @@ body: | ; MIPS32FP32-LABEL: name: load_float ; MIPS32FP32: liveins: $a0 ; MIPS32FP32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32FP32: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[COPY]], 0 :: (load 4 from %ir.ptr) + ; MIPS32FP32: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[COPY]], 0 :: (load (s32) from %ir.ptr) ; MIPS32FP32: $f0 = COPY [[LWC1_]] ; MIPS32FP32: RetRA implicit $f0 ; MIPS32FP64-LABEL: name: load_float ; MIPS32FP64: liveins: $a0 ; MIPS32FP64: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32FP64: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[COPY]], 0 :: (load 4 from %ir.ptr) + ; MIPS32FP64: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[COPY]], 0 :: (load (s32) from %ir.ptr) ; MIPS32FP64: $f0 = COPY [[LWC1_]] ; MIPS32FP64: RetRA implicit $f0 %0:gprb(p0) = COPY $a0 - %1:fprb(s32) = G_LOAD %0(p0) :: (load 4 from %ir.ptr) + %1:fprb(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.ptr) $f0 = COPY %1(s32) RetRA implicit $f0 @@ -77,17 +77,17 @@ body: | ; MIPS32FP32-LABEL: name: load_double ; MIPS32FP32: liveins: $a0 ; MIPS32FP32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32FP32: [[LDC1_:%[0-9]+]]:afgr64 = LDC1 [[COPY]], 0 :: (load 8 from %ir.ptr) + ; MIPS32FP32: [[LDC1_:%[0-9]+]]:afgr64 = LDC1 [[COPY]], 0 :: (load (s64) from %ir.ptr) ; MIPS32FP32: $d0 = COPY [[LDC1_]] ; MIPS32FP32: RetRA implicit $d0 ; MIPS32FP64-LABEL: name: load_double ; MIPS32FP64: liveins: $a0 ; MIPS32FP64: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32FP64: [[LDC164_:%[0-9]+]]:fgr64 = LDC164 [[COPY]], 0 :: (load 8 from %ir.ptr) + ; MIPS32FP64: [[LDC164_:%[0-9]+]]:fgr64 = LDC164 [[COPY]], 0 :: (load (s64) from %ir.ptr) ; MIPS32FP64: $d0 = COPY [[LDC164_]] ; MIPS32FP64: RetRA implicit $d0 %0:gprb(p0) = COPY $a0 - %1:fprb(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) + %1:fprb(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) $d0 = COPY %1(s64) RetRA implicit $d0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_4_unaligned.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_4_unaligned.mir index 5452d62ba9e14..78808f6cdd75b 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_4_unaligned.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_4_unaligned.mir @@ -37,12 +37,12 @@ body: | ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @float_align1 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @float_align1 ; MIPS32: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF - ; MIPS32: [[LWL:%[0-9]+]]:gpr32 = LWL [[ADDiu]], 3, [[DEF]] :: (dereferenceable load 4 from @float_align1, align 1) - ; MIPS32: [[LWR:%[0-9]+]]:gpr32 = LWR [[ADDiu]], 0, [[LWL]] :: (dereferenceable load 4 from @float_align1, align 1) + ; MIPS32: [[LWL:%[0-9]+]]:gpr32 = LWL [[ADDiu]], 3, [[DEF]] :: (dereferenceable load (s32) from @float_align1, align 1) + ; MIPS32: [[LWR:%[0-9]+]]:gpr32 = LWR [[ADDiu]], 0, [[LWL]] :: (dereferenceable load (s32) from @float_align1, align 1) ; MIPS32: $f0 = COPY [[LWR]] ; MIPS32: RetRA implicit $f0 %1:gprb(p0) = G_GLOBAL_VALUE @float_align1 - %0:gprb(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align1, align 1) + %0:gprb(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align1, align 1) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -58,11 +58,11 @@ body: | ; MIPS32-LABEL: name: load_float_align4 ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @float_align4 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @float_align4 - ; MIPS32: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[ADDiu]], 0 :: (dereferenceable load 4 from @float_align4) + ; MIPS32: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[ADDiu]], 0 :: (dereferenceable load (s32) from @float_align4) ; MIPS32: $f0 = COPY [[LWC1_]] ; MIPS32: RetRA implicit $f0 %1:gprb(p0) = G_GLOBAL_VALUE @float_align4 - %0:fprb(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align4) + %0:fprb(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align4) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -78,11 +78,11 @@ body: | ; MIPS32-LABEL: name: load_i32_align8 ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @i32_align8 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @i32_align8 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (dereferenceable load 4 from @i32_align8, align 8) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (dereferenceable load (s32) from @i32_align8, align 8) ; MIPS32: $v0 = COPY [[LW]] ; MIPS32: RetRA implicit $v0 %1:gprb(p0) = G_GLOBAL_VALUE @i32_align8 - %0:gprb(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @i32_align8, align 8) + %0:gprb(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @i32_align8, align 8) $v0 = COPY %0(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_4_unaligned_r6.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_4_unaligned_r6.mir index b0ec86c4219b4..3b4a40a476715 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_4_unaligned_r6.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_4_unaligned_r6.mir @@ -37,11 +37,11 @@ body: | ; MIPS32R6-LABEL: name: load_float_align1 ; MIPS32R6: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @float_align1 ; MIPS32R6: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @float_align1 - ; MIPS32R6: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[ADDiu]], 0 :: (dereferenceable load 4 from @float_align1, align 1) + ; MIPS32R6: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[ADDiu]], 0 :: (dereferenceable load (s32) from @float_align1, align 1) ; MIPS32R6: $f0 = COPY [[LWC1_]] ; MIPS32R6: RetRA implicit $f0 %1:gprb(p0) = G_GLOBAL_VALUE @float_align1 - %0:fprb(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align1, align 1) + %0:fprb(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align1, align 1) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -58,11 +58,11 @@ body: | ; MIPS32R6-LABEL: name: load_float_align8 ; MIPS32R6: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @float_align8 ; MIPS32R6: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @float_align8 - ; MIPS32R6: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[ADDiu]], 0 :: (dereferenceable load 4 from @float_align8, align 8) + ; MIPS32R6: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[ADDiu]], 0 :: (dereferenceable load (s32) from @float_align8, align 8) ; MIPS32R6: $f0 = COPY [[LWC1_]] ; MIPS32R6: RetRA implicit $f0 %1:gprb(p0) = G_GLOBAL_VALUE @float_align8 - %0:fprb(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align8, align 8) + %0:fprb(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align8, align 8) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -79,11 +79,11 @@ body: | ; MIPS32R6-LABEL: name: load_i32_align2 ; MIPS32R6: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @i32_align2 ; MIPS32R6: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @i32_align2 - ; MIPS32R6: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (dereferenceable load 4 from @i32_align2, align 2) + ; MIPS32R6: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (dereferenceable load (s32) from @i32_align2, align 2) ; MIPS32R6: $v0 = COPY [[LW]] ; MIPS32R6: RetRA implicit $v0 %1:gprb(p0) = G_GLOBAL_VALUE @i32_align2 - %0:gprb(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @i32_align2, align 2) + %0:gprb(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @i32_align2, align 2) $v0 = COPY %0(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_store_fold.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_store_fold.mir index de487f9aea58b..43ddd4c429baa 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_store_fold.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_store_fold.mir @@ -25,13 +25,13 @@ body: | ; MIPS32-LABEL: name: _16_bit_positive_offset ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LBu:%[0-9]+]]:gpr32 = LBu [[COPY]], 32767 :: (load 1) + ; MIPS32: [[LBu:%[0-9]+]]:gpr32 = LBu [[COPY]], 32767 :: (load (s8)) ; MIPS32: $v0 = COPY [[LBu]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 %1:gprb(s32) = G_CONSTANT i32 32767 %2:gprb(p0) = G_PTR_ADD %0, %1(s32) - %4:gprb(s32) = G_ZEXTLOAD %2(p0) :: (load 1) + %4:gprb(s32) = G_ZEXTLOAD %2(p0) :: (load (s8)) $v0 = COPY %4(s32) RetRA implicit $v0 @@ -50,14 +50,14 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32: SB [[COPY]], [[COPY1]], -32768 :: (store 1) + ; MIPS32: SB [[COPY]], [[COPY1]], -32768 :: (store (s8)) ; MIPS32: RetRA %2:gprb(s32) = COPY $a0 %1:gprb(p0) = COPY $a1 %3:gprb(s32) = G_CONSTANT i32 -32768 %4:gprb(p0) = G_PTR_ADD %1, %3(s32) %5:gprb(s32) = COPY %2(s32) - G_STORE %5(s32), %4(p0) :: (store 1) + G_STORE %5(s32), %4(p0) :: (store (s8)) RetRA ... @@ -77,14 +77,14 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 32768 ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu [[COPY1]], [[ORi]] - ; MIPS32: SB [[COPY]], [[ADDu]], 0 :: (store 1) + ; MIPS32: SB [[COPY]], [[ADDu]], 0 :: (store (s8)) ; MIPS32: RetRA %2:gprb(s32) = COPY $a0 %1:gprb(p0) = COPY $a1 %3:gprb(s32) = G_CONSTANT i32 32768 %4:gprb(p0) = G_PTR_ADD %1, %3(s32) %5:gprb(s32) = COPY %2(s32) - G_STORE %5(s32), %4(p0) :: (store 1) + G_STORE %5(s32), %4(p0) :: (store (s8)) RetRA ... @@ -104,13 +104,13 @@ body: | ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi 65535 ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi [[LUi]], 32767 ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu [[COPY]], [[ORi]] - ; MIPS32: [[LB:%[0-9]+]]:gpr32 = LB [[ADDu]], 0 :: (load 1) + ; MIPS32: [[LB:%[0-9]+]]:gpr32 = LB [[ADDu]], 0 :: (load (s8)) ; MIPS32: $v0 = COPY [[LB]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 %1:gprb(s32) = G_CONSTANT i32 -32769 %2:gprb(p0) = G_PTR_ADD %0, %1(s32) - %4:gprb(s32) = G_SEXTLOAD %2(p0) :: (load 1) + %4:gprb(s32) = G_SEXTLOAD %2(p0) :: (load (s8)) $v0 = COPY %4(s32) RetRA implicit $v0 @@ -128,13 +128,13 @@ body: | ; MIPS32-LABEL: name: fold_f32_load ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[COPY]], 40 :: (load 4) + ; MIPS32: [[LWC1_:%[0-9]+]]:fgr32 = LWC1 [[COPY]], 40 :: (load (s32)) ; MIPS32: $f0 = COPY [[LWC1_]] ; MIPS32: RetRA implicit $f0 %0:gprb(p0) = COPY $a0 %1:gprb(s32) = G_CONSTANT i32 40 %2:gprb(p0) = G_PTR_ADD %0, %1(s32) - %3:fprb(s32) = G_LOAD %2(p0) :: (load 4) + %3:fprb(s32) = G_LOAD %2(p0) :: (load (s32)) $f0 = COPY %3(s32) RetRA implicit $f0 @@ -153,13 +153,13 @@ body: | ; MIPS32: liveins: $a2, $d6 ; MIPS32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 - ; MIPS32: SDC1 [[COPY]], [[COPY1]], -80 :: (store 8) + ; MIPS32: SDC1 [[COPY]], [[COPY1]], -80 :: (store (s64)) ; MIPS32: RetRA %0:fprb(s64) = COPY $d6 %1:gprb(p0) = COPY $a2 %2:gprb(s32) = G_CONSTANT i32 -80 %3:gprb(p0) = G_PTR_ADD %1, %2(s32) - G_STORE %0(s64), %3(p0) :: (store 8) + G_STORE %0(s64), %3(p0) :: (store (s64)) RetRA ... @@ -176,13 +176,13 @@ body: | ; MIPS32-LABEL: name: fold_i16_load ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LHu:%[0-9]+]]:gpr32 = LHu [[COPY]], -20 :: (load 2) + ; MIPS32: [[LHu:%[0-9]+]]:gpr32 = LHu [[COPY]], -20 :: (load (s16)) ; MIPS32: $v0 = COPY [[LHu]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 %1:gprb(s32) = G_CONSTANT i32 -20 %2:gprb(p0) = G_PTR_ADD %0, %1(s32) - %4:gprb(s32) = G_LOAD %2(p0) :: (load 2) + %4:gprb(s32) = G_LOAD %2(p0) :: (load (s16)) $v0 = COPY %4(s32) RetRA implicit $v0 @@ -201,13 +201,13 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32: SW [[COPY]], [[COPY1]], 40 :: (store 4) + ; MIPS32: SW [[COPY]], [[COPY1]], 40 :: (store (s32)) ; MIPS32: RetRA %0:gprb(s32) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(s32) = G_CONSTANT i32 40 %3:gprb(p0) = G_PTR_ADD %1, %2(s32) - G_STORE %0(s32), %3(p0) :: (store 4) + G_STORE %0(s32), %3(p0) :: (store (s32)) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_store_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_store_vec.mir index b44a92fd3b9c7..18a884b21cfd6 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_store_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/load_store_vec.mir @@ -24,13 +24,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.b) - ; P5600: ST_B [[LD_B]], [[COPY]], 0 :: (store 16 into %ir.a) + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load (<16 x s8>) from %ir.b) + ; P5600: ST_B [[LD_B]], [[COPY]], 0 :: (store (<16 x s8>) into %ir.a) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<16 x s8>), %0(p0) :: (store 16 into %ir.a) + %2:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) + G_STORE %2(<16 x s8>), %0(p0) :: (store (<16 x s8>) into %ir.a) RetRA ... @@ -48,13 +48,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.b) - ; P5600: ST_H [[LD_H]], [[COPY]], 0 :: (store 16 into %ir.a) + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load (<8 x s16>) from %ir.b) + ; P5600: ST_H [[LD_H]], [[COPY]], 0 :: (store (<8 x s16>) into %ir.a) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<8 x s16>), %0(p0) :: (store 16 into %ir.a) + %2:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) + G_STORE %2(<8 x s16>), %0(p0) :: (store (<8 x s16>) into %ir.a) RetRA ... @@ -72,13 +72,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) - ; P5600: ST_W [[LD_W]], [[COPY]], 0 :: (store 16 into %ir.a) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) + ; P5600: ST_W [[LD_W]], [[COPY]], 0 :: (store (<4 x s32>) into %ir.a) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<4 x s32>), %0(p0) :: (store 16 into %ir.a) + %2:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) + G_STORE %2(<4 x s32>), %0(p0) :: (store (<4 x s32>) into %ir.a) RetRA ... @@ -96,13 +96,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) - ; P5600: ST_D [[LD_D]], [[COPY]], 0 :: (store 16 into %ir.a) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) + ; P5600: ST_D [[LD_D]], [[COPY]], 0 :: (store (<2 x s64>) into %ir.a) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<2 x s64>), %0(p0) :: (store 16 into %ir.a) + %2:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) + G_STORE %2(<2 x s64>), %0(p0) :: (store (<2 x s64>) into %ir.a) RetRA ... @@ -120,13 +120,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) - ; P5600: ST_W [[LD_W]], [[COPY]], 0 :: (store 16 into %ir.a) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) + ; P5600: ST_W [[LD_W]], [[COPY]], 0 :: (store (<4 x s32>) into %ir.a) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<4 x s32>), %0(p0) :: (store 16 into %ir.a) + %2:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) + G_STORE %2(<4 x s32>), %0(p0) :: (store (<4 x s32>) into %ir.a) RetRA ... @@ -144,13 +144,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) - ; P5600: ST_D [[LD_D]], [[COPY]], 0 :: (store 16 into %ir.a) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) + ; P5600: ST_D [[LD_D]], [[COPY]], 0 :: (store (<2 x s64>) into %ir.a) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<2 x s64>), %0(p0) :: (store 16 into %ir.a) + %2:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) + G_STORE %2(<2 x s64>), %0(p0) :: (store (<2 x s64>) into %ir.a) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul.mir index a77f60f686208..7a6a684f3db1e 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul.mir @@ -51,8 +51,8 @@ body: | ; MIPS32: [[PseudoMFHI:%[0-9]+]]:gpr32 = PseudoMFHI [[PseudoMULTu]] ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu $zero, [[PseudoMFHI]] ; MIPS32: [[ANDi:%[0-9]+]]:gpr32 = ANDi [[SLTu]], 1 - ; MIPS32: SB [[ANDi]], [[COPY3]], 0 :: (store 1 into %ir.pcarry_flag) - ; MIPS32: SW [[MUL]], [[COPY2]], 0 :: (store 4 into %ir.pmul) + ; MIPS32: SB [[ANDi]], [[COPY3]], 0 :: (store (s8) into %ir.pcarry_flag) + ; MIPS32: SW [[MUL]], [[COPY2]], 0 :: (store (s32) into %ir.pmul) ; MIPS32: RetRA %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 @@ -65,8 +65,8 @@ body: | %10:gprb(s32) = G_CONSTANT i32 1 %11:gprb(s32) = COPY %9(s32) %6:gprb(s32) = G_AND %11, %10 - G_STORE %6(s32), %3(p0) :: (store 1 into %ir.pcarry_flag) - G_STORE %4(s32), %2(p0) :: (store 4 into %ir.pmul) + G_STORE %6(s32), %3(p0) :: (store (s8) into %ir.pcarry_flag) + G_STORE %4(s32), %2(p0) :: (store (s32) into %ir.pmul) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul_vec.mir index b69f8c3abe6de..14ad6761ebd10 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/mul_vec.mir @@ -23,18 +23,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load (<16 x s8>) from %ir.b) ; P5600: [[MULV_B:%[0-9]+]]:msa128b = MULV_B [[LD_B1]], [[LD_B]] - ; P5600: ST_B [[MULV_B]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_B [[MULV_B]], [[COPY2]], 0 :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:fprb(<16 x s8>) = G_MUL %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -53,18 +53,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load (<8 x s16>) from %ir.b) ; P5600: [[MULV_H:%[0-9]+]]:msa128h = MULV_H [[LD_H1]], [[LD_H]] - ; P5600: ST_H [[MULV_H]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_H [[MULV_H]], [[COPY2]], 0 :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:fprb(<8 x s16>) = G_MUL %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -83,18 +83,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[MULV_W:%[0-9]+]]:msa128w = MULV_W [[LD_W1]], [[LD_W]] - ; P5600: ST_W [[MULV_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[MULV_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_MUL %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -113,18 +113,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[MULV_D:%[0-9]+]]:msa128d = MULV_D [[LD_D1]], [[LD_D]] - ; P5600: ST_D [[MULV_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[MULV_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_MUL %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/phi.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/phi.mir index 2be2092d0e670..dd203253ceac4 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/phi.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/phi.mir @@ -150,9 +150,9 @@ body: | ; MIPS32FP32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 ; MIPS32FP32: [[COPY2:%[0-9]+]]:gpr32 = COPY $a3 ; MIPS32FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.0, 0 - ; MIPS32FP32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32FP32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32FP32: [[ADDiu1:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.1, 0 - ; MIPS32FP32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu1]], 0 :: (load 4 from %fixed-stack.1) + ; MIPS32FP32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu1]], 0 :: (load (s32) from %fixed-stack.1) ; MIPS32FP32: [[ANDi:%[0-9]+]]:gpr32 = ANDi [[COPY]], 1 ; MIPS32FP32: BNE [[ANDi]], $zero, %bb.1, implicit-def $at ; MIPS32FP32: J %bb.2, implicit-def $at @@ -175,9 +175,9 @@ body: | ; MIPS32FP64: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 ; MIPS32FP64: [[COPY2:%[0-9]+]]:gpr32 = COPY $a3 ; MIPS32FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.0, 0 - ; MIPS32FP64: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32FP64: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32FP64: [[ADDiu1:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.1, 0 - ; MIPS32FP64: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu1]], 0 :: (load 4 from %fixed-stack.1) + ; MIPS32FP64: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu1]], 0 :: (load (s32) from %fixed-stack.1) ; MIPS32FP64: [[ANDi:%[0-9]+]]:gpr32 = ANDi [[COPY]], 1 ; MIPS32FP64: BNE [[ANDi]], $zero, %bb.1, implicit-def $at ; MIPS32FP64: J %bb.2, implicit-def $at @@ -199,9 +199,9 @@ body: | %4:gprb(s32) = COPY $a2 %5:gprb(s32) = COPY $a3 %8:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - %6:gprb(s32) = G_LOAD %8(p0) :: (load 4 from %fixed-stack.1, align 8) + %6:gprb(s32) = G_LOAD %8(p0) :: (load (s32) from %fixed-stack.1, align 8) %9:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - %7:gprb(s32) = G_LOAD %9(p0) :: (load 4 from %fixed-stack.0) + %7:gprb(s32) = G_LOAD %9(p0) :: (load (s32) from %fixed-stack.0) %14:gprb(s32) = G_CONSTANT i32 1 %15:gprb(s32) = COPY %3(s32) %13:gprb(s32) = G_AND %15, %14 @@ -305,7 +305,7 @@ body: | ; MIPS32FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6 ; MIPS32FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7 ; MIPS32FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.0, 0 - ; MIPS32FP32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32FP32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32FP32: [[ANDi:%[0-9]+]]:gpr32 = ANDi [[LW]], 1 ; MIPS32FP32: BNE [[ANDi]], $zero, %bb.1, implicit-def $at ; MIPS32FP32: J %bb.2, implicit-def $at @@ -325,7 +325,7 @@ body: | ; MIPS32FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6 ; MIPS32FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7 ; MIPS32FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.0, 0 - ; MIPS32FP64: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32FP64: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32FP64: [[ANDi:%[0-9]+]]:gpr32 = ANDi [[LW]], 1 ; MIPS32FP64: BNE [[ANDi]], $zero, %bb.1, implicit-def $at ; MIPS32FP64: J %bb.2, implicit-def $at @@ -344,7 +344,7 @@ body: | %0:fprb(s64) = COPY $d6 %1:fprb(s64) = COPY $d7 %4:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - %3:gprb(s32) = G_LOAD %4(p0) :: (load 4 from %fixed-stack.0, align 8) + %3:gprb(s32) = G_LOAD %4(p0) :: (load (s32) from %fixed-stack.0, align 8) %7:gprb(s32) = G_CONSTANT i32 1 %8:gprb(s32) = COPY %3(s32) %6:gprb(s32) = G_AND %8, %7 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/pointers.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/pointers.mir index ef214f11efa04..2d5dea55e1c7b 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/pointers.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/pointers.mir @@ -20,11 +20,11 @@ body: | ; MIPS32-LABEL: name: ptr_arg_in_regs ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[COPY]], 0 :: (load 4 from %ir.p) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[COPY]], 0 :: (load (s32) from %ir.p) ; MIPS32: $v0 = COPY [[LW]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 - %1:gprb(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p) + %1:gprb(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p) $v0 = COPY %1(s32) RetRA implicit $v0 @@ -44,8 +44,8 @@ body: | ; MIPS32-LABEL: name: ptr_arg_on_stack ; MIPS32: liveins: $a0, $a1, $a2, $a3 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.0, 0 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from %fixed-stack.0, align 8) - ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[LW]], 0 :: (load 4 from %ir.p) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (p0) from %fixed-stack.0, align 8) + ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[LW]], 0 :: (load (s32) from %ir.p) ; MIPS32: $v0 = COPY [[LW1]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 @@ -53,8 +53,8 @@ body: | %2:gprb(s32) = COPY $a2 %3:gprb(s32) = COPY $a3 %5:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - %4:gprb(p0) = G_LOAD %5(p0) :: (load 4 from %fixed-stack.0, align 8) - %6:gprb(s32) = G_LOAD %4(p0) :: (load 4 from %ir.p) + %4:gprb(p0) = G_LOAD %5(p0) :: (load (p0) from %fixed-stack.0, align 8) + %6:gprb(s32) = G_LOAD %4(p0) :: (load (s32) from %ir.p) $v0 = COPY %6(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/rem_and_div_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/rem_and_div_vec.mir index 14abf0af763d8..c15fcbe363f81 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/rem_and_div_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/rem_and_div_vec.mir @@ -38,18 +38,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load (<16 x s8>) from %ir.b) ; P5600: [[DIV_S_B:%[0-9]+]]:msa128b = DIV_S_B [[LD_B]], [[LD_B1]] - ; P5600: ST_B [[DIV_S_B]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_B [[DIV_S_B]], [[COPY2]], 0 :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:fprb(<16 x s8>) = G_SDIV %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -68,18 +68,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load (<8 x s16>) from %ir.b) ; P5600: [[DIV_S_H:%[0-9]+]]:msa128h = DIV_S_H [[LD_H]], [[LD_H1]] - ; P5600: ST_H [[DIV_S_H]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_H [[DIV_S_H]], [[COPY2]], 0 :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:fprb(<8 x s16>) = G_SDIV %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -98,18 +98,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[DIV_S_W:%[0-9]+]]:msa128w = DIV_S_W [[LD_W]], [[LD_W1]] - ; P5600: ST_W [[DIV_S_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[DIV_S_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_SDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -128,18 +128,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[DIV_S_D:%[0-9]+]]:msa128d = DIV_S_D [[LD_D]], [[LD_D1]] - ; P5600: ST_D [[DIV_S_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[DIV_S_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_SDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -158,18 +158,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load (<16 x s8>) from %ir.b) ; P5600: [[MOD_S_B:%[0-9]+]]:msa128b = MOD_S_B [[LD_B]], [[LD_B1]] - ; P5600: ST_B [[MOD_S_B]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_B [[MOD_S_B]], [[COPY2]], 0 :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:fprb(<16 x s8>) = G_SREM %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -188,18 +188,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load (<8 x s16>) from %ir.b) ; P5600: [[MOD_S_H:%[0-9]+]]:msa128h = MOD_S_H [[LD_H]], [[LD_H1]] - ; P5600: ST_H [[MOD_S_H]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_H [[MOD_S_H]], [[COPY2]], 0 :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:fprb(<8 x s16>) = G_SREM %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -218,18 +218,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[MOD_S_W:%[0-9]+]]:msa128w = MOD_S_W [[LD_W]], [[LD_W1]] - ; P5600: ST_W [[MOD_S_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[MOD_S_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_SREM %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -248,18 +248,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[MOD_S_D:%[0-9]+]]:msa128d = MOD_S_D [[LD_D]], [[LD_D1]] - ; P5600: ST_D [[MOD_S_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[MOD_S_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_SREM %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -278,18 +278,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load (<16 x s8>) from %ir.b) ; P5600: [[DIV_U_B:%[0-9]+]]:msa128b = DIV_U_B [[LD_B]], [[LD_B1]] - ; P5600: ST_B [[DIV_U_B]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_B [[DIV_U_B]], [[COPY2]], 0 :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:fprb(<16 x s8>) = G_UDIV %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -308,18 +308,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load (<8 x s16>) from %ir.b) ; P5600: [[DIV_U_H:%[0-9]+]]:msa128h = DIV_U_H [[LD_H]], [[LD_H1]] - ; P5600: ST_H [[DIV_U_H]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_H [[DIV_U_H]], [[COPY2]], 0 :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:fprb(<8 x s16>) = G_UDIV %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -338,18 +338,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[DIV_U_W:%[0-9]+]]:msa128w = DIV_U_W [[LD_W]], [[LD_W1]] - ; P5600: ST_W [[DIV_U_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[DIV_U_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_UDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -368,18 +368,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[DIV_U_D:%[0-9]+]]:msa128d = DIV_U_D [[LD_D]], [[LD_D1]] - ; P5600: ST_D [[DIV_U_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[DIV_U_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_UDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -398,18 +398,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load (<16 x s8>) from %ir.b) ; P5600: [[MOD_U_B:%[0-9]+]]:msa128b = MOD_U_B [[LD_B]], [[LD_B1]] - ; P5600: ST_B [[MOD_U_B]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_B [[MOD_U_B]], [[COPY2]], 0 :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:fprb(<16 x s8>) = G_UREM %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -428,18 +428,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load (<8 x s16>) from %ir.b) ; P5600: [[MOD_U_H:%[0-9]+]]:msa128h = MOD_U_H [[LD_H]], [[LD_H1]] - ; P5600: ST_H [[MOD_U_H]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_H [[MOD_U_H]], [[COPY2]], 0 :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:fprb(<8 x s16>) = G_UREM %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -458,18 +458,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[MOD_U_W:%[0-9]+]]:msa128w = MOD_U_W [[LD_W]], [[LD_W1]] - ; P5600: ST_W [[MOD_U_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[MOD_U_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_UREM %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -488,18 +488,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[MOD_U_D:%[0-9]+]]:msa128d = MOD_U_D [[LD_D]], [[LD_D1]] - ; P5600: ST_D [[MOD_U_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[MOD_U_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_UREM %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/select.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/select.mir index fa4426f496a70..db558df87c762 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/select.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/select.mir @@ -149,7 +149,7 @@ body: | ; MIPS32FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6 ; MIPS32FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7 ; MIPS32FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.0, 0 - ; MIPS32FP32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32FP32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32FP32: [[ANDi:%[0-9]+]]:gpr32 = ANDi [[LW]], 1 ; MIPS32FP32: [[MOVN_I_D32_:%[0-9]+]]:afgr64 = MOVN_I_D32 [[COPY]], [[ANDi]], [[COPY1]] ; MIPS32FP32: $d0 = COPY [[MOVN_I_D32_]] @@ -159,7 +159,7 @@ body: | ; MIPS32FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6 ; MIPS32FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7 ; MIPS32FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.0, 0 - ; MIPS32FP64: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32FP64: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32FP64: [[ANDi:%[0-9]+]]:gpr32 = ANDi [[LW]], 1 ; MIPS32FP64: [[MOVN_I_D64_:%[0-9]+]]:fgr64 = MOVN_I_D64 [[COPY]], [[ANDi]], [[COPY1]] ; MIPS32FP64: $d0 = COPY [[MOVN_I_D64_]] @@ -167,7 +167,7 @@ body: | %0:fprb(s64) = COPY $d6 %1:fprb(s64) = COPY $d7 %4:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - %3:gprb(s32) = G_LOAD %4(p0) :: (load 4 from %fixed-stack.0, align 8) + %3:gprb(s32) = G_LOAD %4(p0) :: (load (s32) from %fixed-stack.0, align 8) %7:gprb(s32) = G_CONSTANT i32 1 %8:gprb(s32) = COPY %3(s32) %6:gprb(s32) = G_AND %8, %7 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/stack_args.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/stack_args.mir index cd9074d6d2126..b43f69e45c2bd 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/stack_args.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/stack_args.mir @@ -25,14 +25,14 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gpr32 = COPY $a3 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.0, 0 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]] ; MIPS32: $a1 = COPY [[COPY1]] ; MIPS32: $a2 = COPY [[COPY2]] ; MIPS32: $a3 = COPY [[COPY3]] ; MIPS32: [[COPY4:%[0-9]+]]:gpr32 = COPY $sp - ; MIPS32: SW [[LW]], [[COPY4]], 16 :: (store 4 into stack + 16) + ; MIPS32: SW [[LW]], [[COPY4]], 16 :: (store (s32) into stack + 16) ; MIPS32: JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:gpr32 = COPY $v0 ; MIPS32: ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp @@ -43,7 +43,7 @@ body: | %2:gprb(s32) = COPY $a2 %3:gprb(s32) = COPY $a3 %5:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - %4:gprb(s32) = G_LOAD %5(p0) :: (load 4 from %fixed-stack.0, align 8) + %4:gprb(s32) = G_LOAD %5(p0) :: (load (s32) from %fixed-stack.0, align 8) ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp $a0 = COPY %0(s32) $a1 = COPY %1(s32) @@ -52,7 +52,7 @@ body: | %7:gprb(p0) = COPY $sp %8:gprb(s32) = G_CONSTANT i32 16 %9:gprb(p0) = G_PTR_ADD %7, %8(s32) - G_STORE %4(s32), %9(p0) :: (store 4 into stack + 16, align 4) + G_STORE %4(s32), %9(p0) :: (store (s32) into stack + 16, align 4) JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 %6:gprb(s32) = COPY $v0 ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store.mir index bf9f946211b02..075ee35c0db73 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store.mir @@ -22,17 +22,17 @@ body: | ; MIPS32FP32: liveins: $a0, $a1 ; MIPS32FP32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32FP32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32FP32: SW [[COPY]], [[COPY1]], 0 :: (store 4 into %ir.ptr) + ; MIPS32FP32: SW [[COPY]], [[COPY1]], 0 :: (store (s32) into %ir.ptr) ; MIPS32FP32: RetRA ; MIPS32FP64-LABEL: name: store_i32 ; MIPS32FP64: liveins: $a0, $a1 ; MIPS32FP64: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32FP64: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32FP64: SW [[COPY]], [[COPY1]], 0 :: (store 4 into %ir.ptr) + ; MIPS32FP64: SW [[COPY]], [[COPY1]], 0 :: (store (s32) into %ir.ptr) ; MIPS32FP64: RetRA %0:gprb(s32) = COPY $a0 %1:gprb(p0) = COPY $a1 - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.ptr) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.ptr) RetRA ... @@ -50,17 +50,17 @@ body: | ; MIPS32FP32: liveins: $a1, $f12 ; MIPS32FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 ; MIPS32FP32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32FP32: SWC1 [[COPY]], [[COPY1]], 0 :: (store 4 into %ir.ptr) + ; MIPS32FP32: SWC1 [[COPY]], [[COPY1]], 0 :: (store (s32) into %ir.ptr) ; MIPS32FP32: RetRA ; MIPS32FP64-LABEL: name: store_float ; MIPS32FP64: liveins: $a1, $f12 ; MIPS32FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 ; MIPS32FP64: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32FP64: SWC1 [[COPY]], [[COPY1]], 0 :: (store 4 into %ir.ptr) + ; MIPS32FP64: SWC1 [[COPY]], [[COPY1]], 0 :: (store (s32) into %ir.ptr) ; MIPS32FP64: RetRA %0:fprb(s32) = COPY $f12 %1:gprb(p0) = COPY $a1 - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.ptr) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.ptr) RetRA ... @@ -78,17 +78,17 @@ body: | ; MIPS32FP32: liveins: $a2, $d6 ; MIPS32FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6 ; MIPS32FP32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 - ; MIPS32FP32: SDC1 [[COPY]], [[COPY1]], 0 :: (store 8 into %ir.ptr) + ; MIPS32FP32: SDC1 [[COPY]], [[COPY1]], 0 :: (store (s64) into %ir.ptr) ; MIPS32FP32: RetRA ; MIPS32FP64-LABEL: name: store_double ; MIPS32FP64: liveins: $a2, $d6 ; MIPS32FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6 ; MIPS32FP64: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 - ; MIPS32FP64: SDC164 [[COPY]], [[COPY1]], 0 :: (store 8 into %ir.ptr) + ; MIPS32FP64: SDC164 [[COPY]], [[COPY1]], 0 :: (store (s64) into %ir.ptr) ; MIPS32FP64: RetRA %0:fprb(s64) = COPY $d6 %1:gprb(p0) = COPY $a2 - G_STORE %0(s64), %1(p0) :: (store 8 into %ir.ptr) + G_STORE %0(s64), %1(p0) :: (store (s64) into %ir.ptr) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store_4_unaligned.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store_4_unaligned.mir index 2406393a6b3a4..0d296683a7541 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store_4_unaligned.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store_4_unaligned.mir @@ -41,13 +41,13 @@ body: | ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @float_align1 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @float_align1 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; MIPS32: SWL [[COPY1]], [[ADDiu]], 3 :: (store 4 into @float_align1, align 1) - ; MIPS32: SWR [[COPY1]], [[ADDiu]], 0 :: (store 4 into @float_align1, align 1) + ; MIPS32: SWL [[COPY1]], [[ADDiu]], 3 :: (store (s32) into @float_align1, align 1) + ; MIPS32: SWR [[COPY1]], [[ADDiu]], 0 :: (store (s32) into @float_align1, align 1) ; MIPS32: RetRA %0:fprb(s32) = COPY $f12 %1:gprb(p0) = G_GLOBAL_VALUE @float_align1 %2:gprb(s32) = COPY %0(s32) - G_STORE %2(s32), %1(p0) :: (store 4 into @float_align1, align 1) + G_STORE %2(s32), %1(p0) :: (store (s32) into @float_align1, align 1) RetRA ... @@ -66,11 +66,11 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @float_align4 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @float_align4 - ; MIPS32: SWC1 [[COPY]], [[ADDiu]], 0 :: (store 4 into @float_align4) + ; MIPS32: SWC1 [[COPY]], [[ADDiu]], 0 :: (store (s32) into @float_align4) ; MIPS32: RetRA %0:fprb(s32) = COPY $f12 %1:gprb(p0) = G_GLOBAL_VALUE @float_align4 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align4) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align4) RetRA ... @@ -89,11 +89,11 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @i32_align8 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @i32_align8 - ; MIPS32: SW [[COPY]], [[ADDiu]], 0 :: (store 4 into @i32_align8, align 8) + ; MIPS32: SW [[COPY]], [[ADDiu]], 0 :: (store (s32) into @i32_align8, align 8) ; MIPS32: RetRA %0:gprb(s32) = COPY $a0 %1:gprb(p0) = G_GLOBAL_VALUE @i32_align8 - G_STORE %0(s32), %1(p0) :: (store 4 into @i32_align8, align 8) + G_STORE %0(s32), %1(p0) :: (store (s32) into @i32_align8, align 8) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store_4_unaligned_r6.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store_4_unaligned_r6.mir index 79228007fee13..c776f2aa3d3d0 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store_4_unaligned_r6.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/store_4_unaligned_r6.mir @@ -40,11 +40,11 @@ body: | ; MIPS32R6: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 ; MIPS32R6: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @float_align1 ; MIPS32R6: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @float_align1 - ; MIPS32R6: SWC1 [[COPY]], [[ADDiu]], 0 :: (store 4 into @float_align1, align 1) + ; MIPS32R6: SWC1 [[COPY]], [[ADDiu]], 0 :: (store (s32) into @float_align1, align 1) ; MIPS32R6: RetRA %0:fprb(s32) = COPY $f12 %1:gprb(p0) = G_GLOBAL_VALUE @float_align1 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align1, align 1) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align1, align 1) RetRA ... @@ -63,11 +63,11 @@ body: | ; MIPS32R6: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 ; MIPS32R6: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @float_align8 ; MIPS32R6: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @float_align8 - ; MIPS32R6: SWC1 [[COPY]], [[ADDiu]], 0 :: (store 4 into @float_align8, align 8) + ; MIPS32R6: SWC1 [[COPY]], [[ADDiu]], 0 :: (store (s32) into @float_align8, align 8) ; MIPS32R6: RetRA %0:fprb(s32) = COPY $f12 %1:gprb(p0) = G_GLOBAL_VALUE @float_align8 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align8, align 8) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align8, align 8) RetRA ... @@ -86,11 +86,11 @@ body: | ; MIPS32R6: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32R6: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @i32_align2 ; MIPS32R6: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @i32_align2 - ; MIPS32R6: SW [[COPY]], [[ADDiu]], 0 :: (store 4 into @i32_align2, align 2) + ; MIPS32R6: SW [[COPY]], [[ADDiu]], 0 :: (store (s32) into @i32_align2, align 2) ; MIPS32R6: RetRA %0:gprb(s32) = COPY $a0 %1:gprb(p0) = G_GLOBAL_VALUE @i32_align2 - G_STORE %0(s32), %1(p0) :: (store 4 into @i32_align2, align 2) + G_STORE %0(s32), %1(p0) :: (store (s32) into @i32_align2, align 2) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/sub_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/sub_vec.mir index 78f7604fa5e2b..974e089c671ee 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/sub_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/sub_vec.mir @@ -23,18 +23,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY]], 0 :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LD_B1:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load (<16 x s8>) from %ir.b) ; P5600: [[SUBV_B:%[0-9]+]]:msa128b = SUBV_B [[LD_B1]], [[LD_B]] - ; P5600: ST_B [[SUBV_B]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_B [[SUBV_B]], [[COPY2]], 0 :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:fprb(<16 x s8>) = G_SUB %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -53,18 +53,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY]], 0 :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LD_H1:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load (<8 x s16>) from %ir.b) ; P5600: [[SUBV_H:%[0-9]+]]:msa128h = SUBV_H [[LD_H1]], [[LD_H]] - ; P5600: ST_H [[SUBV_H]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_H [[SUBV_H]], [[COPY2]], 0 :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:fprb(<8 x s16>) = G_SUB %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -83,18 +83,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LD_W1:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load (<4 x s32>) from %ir.b) ; P5600: [[SUBV_W:%[0-9]+]]:msa128w = SUBV_W [[LD_W1]], [[LD_W]] - ; P5600: ST_W [[SUBV_W]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_W [[SUBV_W]], [[COPY2]], 0 :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:fprb(<4 x s32>) = G_SUB %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -113,18 +113,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 - ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) - ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.b) + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LD_D1:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load (<2 x s64>) from %ir.b) ; P5600: [[SUBV_D:%[0-9]+]]:msa128d = SUBV_D [[LD_D1]], [[LD_D]] - ; P5600: ST_D [[SUBV_D]], [[COPY2]], 0 :: (store 16 into %ir.c) + ; P5600: ST_D [[SUBV_D]], [[COPY2]], 0 :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 %2:gprb(p0) = COPY $a2 - %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:fprb(<2 x s64>) = G_SUB %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/truncStore_and_aExtLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/truncStore_and_aExtLoad.mir index 0d94ef27276e9..655d472d2f783 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/truncStore_and_aExtLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/truncStore_and_aExtLoad.mir @@ -21,14 +21,14 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32: [[LBu:%[0-9]+]]:gpr32 = LBu [[COPY1]], 0 :: (load 1 from %ir.py) - ; MIPS32: SB [[LBu]], [[COPY]], 0 :: (store 1 into %ir.px) + ; MIPS32: [[LBu:%[0-9]+]]:gpr32 = LBu [[COPY1]], 0 :: (load (s8) from %ir.py) + ; MIPS32: SB [[LBu]], [[COPY]], 0 :: (store (s8) into %ir.px) ; MIPS32: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %4:gprb(s32) = G_LOAD %1(p0) :: (load 1 from %ir.py) + %4:gprb(s32) = G_LOAD %1(p0) :: (load (s8) from %ir.py) %3:gprb(s32) = COPY %4(s32) - G_STORE %3(s32), %0(p0) :: (store 1 into %ir.px) + G_STORE %3(s32), %0(p0) :: (store (s8) into %ir.px) RetRA ... @@ -46,14 +46,14 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32: [[LHu:%[0-9]+]]:gpr32 = LHu [[COPY1]], 0 :: (load 2 from %ir.py) - ; MIPS32: SH [[LHu]], [[COPY]], 0 :: (store 2 into %ir.px) + ; MIPS32: [[LHu:%[0-9]+]]:gpr32 = LHu [[COPY1]], 0 :: (load (s16) from %ir.py) + ; MIPS32: SH [[LHu]], [[COPY]], 0 :: (store (s16) into %ir.px) ; MIPS32: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %4:gprb(s32) = G_LOAD %1(p0) :: (load 2 from %ir.py) + %4:gprb(s32) = G_LOAD %1(p0) :: (load (s16) from %ir.py) %3:gprb(s32) = COPY %4(s32) - G_STORE %3(s32), %0(p0) :: (store 2 into %ir.px) + G_STORE %3(s32), %0(p0) :: (store (s16) into %ir.px) RetRA ... @@ -71,13 +71,13 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[COPY1]], 0 :: (load 4 from %ir.py) - ; MIPS32: SW [[LW]], [[COPY]], 0 :: (store 4 into %ir.px) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[COPY1]], 0 :: (load (s32) from %ir.py) + ; MIPS32: SW [[LW]], [[COPY]], 0 :: (store (s32) into %ir.px) ; MIPS32: RetRA %0:gprb(p0) = COPY $a0 %1:gprb(p0) = COPY $a1 - %2:gprb(s32) = G_LOAD %1(p0) :: (load 4 from %ir.py) - G_STORE %2(s32), %0(p0) :: (store 4 into %ir.px) + %2:gprb(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.py) + G_STORE %2(s32), %0(p0) :: (store (s32) into %ir.px) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/var_arg.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/var_arg.mir index a84137835be5d..bd3f03d87e75a 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/var_arg.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/var_arg.mir @@ -59,31 +59,31 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.1, 0 - ; MIPS32: SW [[COPY1]], [[ADDiu]], 0 :: (store 4 into %fixed-stack.1) + ; MIPS32: SW [[COPY1]], [[ADDiu]], 0 :: (store (s32) into %fixed-stack.1) ; MIPS32: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 ; MIPS32: [[ADDiu1:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.2, 0 - ; MIPS32: SW [[COPY2]], [[ADDiu1]], 0 :: (store 4 into %fixed-stack.2) + ; MIPS32: SW [[COPY2]], [[ADDiu1]], 0 :: (store (s32) into %fixed-stack.2) ; MIPS32: [[COPY3:%[0-9]+]]:gpr32 = COPY $a3 ; MIPS32: [[ADDiu2:%[0-9]+]]:gpr32 = ADDiu %fixed-stack.3, 0 - ; MIPS32: SW [[COPY3]], [[ADDiu2]], 0 :: (store 4 into %fixed-stack.3) + ; MIPS32: SW [[COPY3]], [[ADDiu2]], 0 :: (store (s32) into %fixed-stack.3) ; MIPS32: [[LUi:%[0-9]+]]:gpr32 = LUi target-flags(mips-abs-hi) @.str ; MIPS32: [[ADDiu3:%[0-9]+]]:gpr32 = ADDiu [[LUi]], target-flags(mips-abs-lo) @.str ; MIPS32: [[ADDiu4:%[0-9]+]]:gpr32 = ADDiu %stack.0.fmt.addr, 0 ; MIPS32: [[ADDiu5:%[0-9]+]]:gpr32 = ADDiu %stack.1.ap, 0 ; MIPS32: [[ADDiu6:%[0-9]+]]:gpr32 = ADDiu %stack.2.aq, 0 ; MIPS32: [[ADDiu7:%[0-9]+]]:gpr32 = ADDiu %stack.3.s, 0 - ; MIPS32: SW [[COPY]], [[ADDiu4]], 0 :: (store 4 into %ir.fmt.addr) + ; MIPS32: SW [[COPY]], [[ADDiu4]], 0 :: (store (p0) into %ir.fmt.addr) ; MIPS32: [[LEA_ADDiu:%[0-9]+]]:gpr32 = LEA_ADDiu %stack.0.fmt.addr, 0 ; MIPS32: SW [[LEA_ADDiu]], [[ADDiu5]], 0 - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu5]], 0 :: (load 4) - ; MIPS32: SW [[LW]], [[ADDiu6]], 0 :: (store 4) - ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu6]], 0 :: (load 4 from %ir.aq) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDiu5]], 0 :: (load (s32)) + ; MIPS32: SW [[LW]], [[ADDiu6]], 0 :: (store (s32)) + ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu6]], 0 :: (load (p0) from %ir.aq) ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 4 ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu [[LW1]], [[ORi]] - ; MIPS32: SW [[ADDu]], [[ADDiu6]], 0 :: (store 4 into %ir.aq) - ; MIPS32: [[LW2:%[0-9]+]]:gpr32 = LW [[LW1]], 0 :: (load 4 from %ir.2) - ; MIPS32: SW [[LW2]], [[ADDiu7]], 0 :: (store 4 into %ir.s) - ; MIPS32: [[LW3:%[0-9]+]]:gpr32 = LW [[ADDiu7]], 0 :: (load 4 from %ir.s) + ; MIPS32: SW [[ADDu]], [[ADDiu6]], 0 :: (store (p0) into %ir.aq) + ; MIPS32: [[LW2:%[0-9]+]]:gpr32 = LW [[LW1]], 0 :: (load (p0) from %ir.2) + ; MIPS32: SW [[LW2]], [[ADDiu7]], 0 :: (store (p0) into %ir.s) + ; MIPS32: [[LW3:%[0-9]+]]:gpr32 = LW [[ADDiu7]], 0 :: (load (p0) from %ir.s) ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[ADDiu3]] ; MIPS32: $a1 = COPY [[LW3]] @@ -93,30 +93,30 @@ body: | %0:gprb(p0) = COPY $a0 %1:gprb(s32) = COPY $a1 %2:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - G_STORE %1(s32), %2(p0) :: (store 4 into %fixed-stack.2) + G_STORE %1(s32), %2(p0) :: (store (s32) into %fixed-stack.2) %3:gprb(s32) = COPY $a2 %4:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - G_STORE %3(s32), %4(p0) :: (store 4 into %fixed-stack.1) + G_STORE %3(s32), %4(p0) :: (store (s32) into %fixed-stack.1) %5:gprb(s32) = COPY $a3 %6:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - G_STORE %5(s32), %6(p0) :: (store 4 into %fixed-stack.0) + G_STORE %5(s32), %6(p0) :: (store (s32) into %fixed-stack.0) %18:gprb(p0) = G_GLOBAL_VALUE @.str %17:gprb(p0) = COPY %18(p0) %7:gprb(p0) = G_FRAME_INDEX %stack.0.fmt.addr %8:gpr32(p0) = G_FRAME_INDEX %stack.1.ap %9:gpr32(p0) = G_FRAME_INDEX %stack.2.aq %10:gprb(p0) = G_FRAME_INDEX %stack.3.s - G_STORE %0(p0), %7(p0) :: (store 4 into %ir.fmt.addr) - G_VASTART %8(p0) :: (store 4 into %ir.ap1, align 1) - %19:gpr32 = LW %8(p0), 0 :: (load 4) - SW %19, %9(p0), 0 :: (store 4) - %11:gprb(p0) = G_LOAD %9(p0) :: (load 4 from %ir.aq) + G_STORE %0(p0), %7(p0) :: (store (p0) into %ir.fmt.addr) + G_VASTART %8(p0) :: (store (p0) into %ir.ap1, align 1) + %19:gpr32 = LW %8(p0), 0 :: (load (s32)) + SW %19, %9(p0), 0 :: (store (s32)) + %11:gprb(p0) = G_LOAD %9(p0) :: (load (p0) from %ir.aq) %12:gprb(s32) = G_CONSTANT i32 4 %13:gprb(p0) = G_PTR_ADD %11, %12(s32) - G_STORE %13(p0), %9(p0) :: (store 4 into %ir.aq) - %14:gprb(p0) = G_LOAD %11(p0) :: (load 4 from %ir.2) - G_STORE %14(p0), %10(p0) :: (store 4 into %ir.s) - %15:gprb(p0) = G_LOAD %10(p0) :: (load 4 from %ir.s) + G_STORE %13(p0), %9(p0) :: (store (p0) into %ir.aq) + %14:gprb(p0) = G_LOAD %11(p0) :: (load (p0) from %ir.2) + G_STORE %14(p0), %10(p0) :: (store (p0) into %ir.s) + %15:gprb(p0) = G_LOAD %10(p0) :: (load (p0) from %ir.s) ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp $a0 = COPY %17(p0) $a1 = COPY %15(p0) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/zextLoad_and_sextLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/zextLoad_and_sextLoad.mir index f6a6598a76a01..25f316057b111 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/zextLoad_and_sextLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/zextLoad_and_sextLoad.mir @@ -21,11 +21,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_zextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LBu:%[0-9]+]]:gpr32 = LBu [[COPY]], 0 :: (load 1 from %ir.px) + ; MIPS32: [[LBu:%[0-9]+]]:gpr32 = LBu [[COPY]], 0 :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[LBu]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 - %2:gprb(s32) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.px) + %2:gprb(s32) = G_ZEXTLOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -43,11 +43,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_zextLoad2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LHu:%[0-9]+]]:gpr32 = LHu [[COPY]], 0 :: (load 2 from %ir.px) + ; MIPS32: [[LHu:%[0-9]+]]:gpr32 = LHu [[COPY]], 0 :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[LHu]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 - %2:gprb(s32) = G_ZEXTLOAD %0(p0) :: (load 2 from %ir.px) + %2:gprb(s32) = G_ZEXTLOAD %0(p0) :: (load (s16) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -65,11 +65,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_sextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LB:%[0-9]+]]:gpr32 = LB [[COPY]], 0 :: (load 1 from %ir.px) + ; MIPS32: [[LB:%[0-9]+]]:gpr32 = LB [[COPY]], 0 :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[LB]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 - %2:gprb(s32) = G_SEXTLOAD %0(p0) :: (load 1 from %ir.px) + %2:gprb(s32) = G_SEXTLOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -87,11 +87,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_sextLoad2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 - ; MIPS32: [[LH:%[0-9]+]]:gpr32 = LH [[COPY]], 0 :: (load 2 from %ir.px) + ; MIPS32: [[LH:%[0-9]+]]:gpr32 = LH [[COPY]], 0 :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[LH]] ; MIPS32: RetRA implicit $v0 %0:gprb(p0) = COPY $a0 - %2:gprb(s32) = G_SEXTLOAD %0(p0) :: (load 2 from %ir.px) + %2:gprb(s32) = G_SEXTLOAD %0(p0) :: (load (s16) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll index f6b034ea7f8ed..d2000899b9c7d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll @@ -9,14 +9,14 @@ define { float, float } @add_complex_float({ float, float }* %a, { float, float ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; MIPS32: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir..realp) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir..realp) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 4 from %ir..imagp) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir..imagp) ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY1]](p0) - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[COPY3]](p0) :: (load 4 from %ir..realp1) - ; MIPS32: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir..imagp3) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[COPY3]](p0) :: (load (s32) from %ir..realp1) + ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir..imagp3) ; MIPS32: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[LOAD]], [[LOAD2]] ; MIPS32: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[LOAD1]], [[LOAD3]] ; MIPS32: $f0 = COPY [[FADD]](s32) @@ -46,14 +46,14 @@ define { double, double } @add_complex_double({ double, double }* %a, { double, ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; MIPS32: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) - ; MIPS32: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8 from %ir..realp) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load (s64) from %ir..realp) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8 from %ir..imagp) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from %ir..imagp) ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY1]](p0) - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir..realp1) - ; MIPS32: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 8 from %ir..imagp3) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir..realp1) + ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; MIPS32: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %ir..imagp3) ; MIPS32: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[LOAD]], [[LOAD2]] ; MIPS32: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[LOAD1]], [[LOAD3]] ; MIPS32: $d0 = COPY [[FADD]](s64) @@ -88,9 +88,9 @@ define void @call_ret_complex_float({ float, float }* %z) { ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY3]](p0) :: (store 4 into %ir..realp) - ; MIPS32: G_STORE [[COPY2]](s32), [[GEP]](p0) :: (store 4 into %ir..imagp) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY3]](p0) :: (store (s32) into %ir..realp) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir..imagp) ; MIPS32: RetRA entry: %call = call { float, float } @ret_complex_float() @@ -116,9 +116,9 @@ define void @call_ret_complex_double({ double, double }* %z) { ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s64), [[COPY3]](p0) :: (store 8 into %ir..realp) - ; MIPS32: G_STORE [[COPY2]](s64), [[GEP]](p0) :: (store 8 into %ir..imagp) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32: G_STORE [[COPY1]](s64), [[COPY3]](p0) :: (store (s64) into %ir..realp) + ; MIPS32: G_STORE [[COPY2]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir..imagp) ; MIPS32: RetRA entry: %call = call { double, double } @ret_complex_double() diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll index 67265c95b64bc..a020c25d9707d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll @@ -153,7 +153,7 @@ define void @call_symbol(i8* nocapture readonly %src, i8* nocapture %dest, i32 s ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32: G_MEMCPY [[COPY1]](p0), [[COPY]](p0), [[COPY2]](s32), 0 :: (store 1 into %ir.dest), (load 1 from %ir.src) + ; MIPS32: G_MEMCPY [[COPY1]](p0), [[COPY]](p0), [[COPY2]](s32), 0 :: (store (s8) into %ir.dest), (load (s8) from %ir.src) ; MIPS32: RetRA ; MIPS32_PIC-LABEL: name: call_symbol ; MIPS32_PIC: bb.1.entry: @@ -161,7 +161,7 @@ define void @call_symbol(i8* nocapture readonly %src, i8* nocapture %dest, i32 s ; MIPS32_PIC: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32_PIC: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; MIPS32_PIC: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 - ; MIPS32_PIC: G_MEMCPY [[COPY1]](p0), [[COPY]](p0), [[COPY2]](s32), 0 :: (store 1 into %ir.dest), (load 1 from %ir.src) + ; MIPS32_PIC: G_MEMCPY [[COPY1]](p0), [[COPY]](p0), [[COPY2]](s32), 0 :: (store (s8) into %ir.dest), (load (s8) from %ir.src) ; MIPS32_PIC: RetRA entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 %length, i1 false) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll index 2d0a26eff6593..e54b8b832f57e 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll @@ -54,7 +54,7 @@ define signext i8 @call_sext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) @@ -63,9 +63,9 @@ define signext i8 @call_sext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 ; MIPS32: $a3 = COPY [[COPY3]](s32) ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) ; MIPS32: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) - ; MIPS32: G_STORE [[SEXT]](s32), [[GEP]](p0) :: (store 4 into stack + 16, align 8) + ; MIPS32: G_STORE [[SEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8) ; MIPS32: JAL @sext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0 ; MIPS32: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32) @@ -87,7 +87,7 @@ define zeroext i8 @call_zext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) @@ -96,9 +96,9 @@ define zeroext i8 @call_zext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 ; MIPS32: $a3 = COPY [[COPY3]](s32) ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) ; MIPS32: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) - ; MIPS32: G_STORE [[ZEXT]](s32), [[GEP]](p0) :: (store 4 into stack + 16, align 8) + ; MIPS32: G_STORE [[ZEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8) ; MIPS32: JAL @zext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0 ; MIPS32: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32) @@ -120,7 +120,7 @@ define i8 @call_aext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 %a) { ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) @@ -129,9 +129,9 @@ define i8 @call_aext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 %a) { ; MIPS32: $a3 = COPY [[COPY3]](s32) ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) ; MIPS32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8) - ; MIPS32: G_STORE [[ANYEXT]](s32), [[GEP]](p0) :: (store 4 into stack + 16, align 8) + ; MIPS32: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8) ; MIPS32: JAL @aext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0 ; MIPS32: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32) @@ -197,7 +197,7 @@ define signext i16 @call_sext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) @@ -206,9 +206,9 @@ define signext i16 @call_sext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, ; MIPS32: $a3 = COPY [[COPY3]](s32) ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) ; MIPS32: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) - ; MIPS32: G_STORE [[SEXT]](s32), [[GEP]](p0) :: (store 4 into stack + 16, align 8) + ; MIPS32: G_STORE [[SEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8) ; MIPS32: JAL @sext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0 ; MIPS32: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) @@ -230,7 +230,7 @@ define zeroext i16 @call_zext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) @@ -239,9 +239,9 @@ define zeroext i16 @call_zext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, ; MIPS32: $a3 = COPY [[COPY3]](s32) ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) ; MIPS32: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) - ; MIPS32: G_STORE [[ZEXT]](s32), [[GEP]](p0) :: (store 4 into stack + 16, align 8) + ; MIPS32: G_STORE [[ZEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8) ; MIPS32: JAL @zext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0 ; MIPS32: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) @@ -263,7 +263,7 @@ define i16 @call_aext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 %a) ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) @@ -272,9 +272,9 @@ define i16 @call_aext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 %a) ; MIPS32: $a3 = COPY [[COPY3]](s32) ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) ; MIPS32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; MIPS32: G_STORE [[ANYEXT]](s32), [[GEP]](p0) :: (store 4 into stack + 16, align 8) + ; MIPS32: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8) ; MIPS32: JAL @aext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0 ; MIPS32: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/pointers.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/pointers.ll index 0163182589924..9782a7b777442 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/pointers.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/pointers.ll @@ -7,7 +7,7 @@ define i32 @ptr_arg_in_regs(i32* %p) { ; MIPS32: bb.1.entry: ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.p) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.p) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 entry: @@ -24,8 +24,8 @@ define i32 @ptr_arg_on_stack(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32* %p) { ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.p) + ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.p) ; MIPS32: $v0 = COPY [[LOAD1]](s32) ; MIPS32: RetRA implicit $v0 entry: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/split_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/split_args.ll index 803de0087a602..5a2c8f05b2137 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/split_args.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/split_args.ll @@ -27,9 +27,9 @@ define i64 @i64_stack(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i64 %a) { ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.[[STACK1]], align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.[[STACK1]], align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.[[STACK0]]) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.[[STACK0]]) ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; MIPS32: $v0 = COPY [[UV]](s32) @@ -68,11 +68,11 @@ define i64 @i64_stack_allign(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %s16, i64 % ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.[[STACK2]], align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.[[STACK2]], align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.[[STACK1]], align 8) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.[[STACK1]], align 8) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.[[STACK0]]) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.[[STACK0]]) ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; MIPS32: $v0 = COPY [[UV]](s32) @@ -93,9 +93,9 @@ define i64 @i64_reg_stack(i32 %a0, i32 %a1, i32 %a2, i64 %a) { ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.[[STACK1]], align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.[[STACK1]], align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.[[STACK0]]) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.[[STACK0]]) ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; MIPS32: $v0 = COPY [[UV]](s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll index f9487d325b029..28e44fd70a0c7 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll @@ -10,10 +10,10 @@ define void @ZeroInit(%struct.S* noalias sret(%struct.S) %agg.result) { ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) - ; MIPS32: G_STORE [[C]](s32), [[COPY1]](p0) :: (store 4 into %ir.x) + ; MIPS32: G_STORE [[C]](s32), [[COPY1]](p0) :: (store (s32) into %ir.x) ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; MIPS32: G_STORE [[C]](s32), [[GEP]](p0) :: (store 4 into %ir.y) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; MIPS32: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.y) ; MIPS32: RetRA entry: %x = getelementptr inbounds %struct.S, %struct.S* %agg.result, i32 0, i32 0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll index e33991c8454e5..9375be547fae6 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll @@ -12,7 +12,7 @@ define i32 @g(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5){ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) ; MIPS32: $a1 = COPY [[COPY1]](s32) @@ -20,8 +20,8 @@ define i32 @g(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5){ ; MIPS32: $a3 = COPY [[COPY3]](s32) ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) - ; MIPS32: G_STORE [[LOAD]](s32), [[GEP]](p0) :: (store 4 into stack + 16, align 8) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; MIPS32: G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8) ; MIPS32: JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0 ; MIPS32: ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll index 64b06b44e2c88..f89d55912b862 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll @@ -13,29 +13,29 @@ define void @testVaCopyArg(i8* %fmt, ...) { ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: G_STORE [[COPY1]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %fixed-stack.2) + ; MIPS32: G_STORE [[COPY1]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %fixed-stack.2) ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: G_STORE [[COPY2]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %fixed-stack.1) + ; MIPS32: G_STORE [[COPY2]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %fixed-stack.1) ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: G_STORE [[COPY3]](s32), [[FRAME_INDEX2]](p0) :: (store 4 into %fixed-stack.0) + ; MIPS32: G_STORE [[COPY3]](s32), [[FRAME_INDEX2]](p0) :: (store (s32) into %fixed-stack.0) ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @.str ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY [[GV]](p0) ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.fmt.addr ; MIPS32: [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.ap ; MIPS32: [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.2.aq ; MIPS32: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.s - ; MIPS32: G_STORE [[COPY]](p0), [[FRAME_INDEX3]](p0) :: (store 4 into %ir.fmt.addr) - ; MIPS32: G_VASTART [[FRAME_INDEX4]](p0) :: (store 4 into %ir.ap1, align 1) + ; MIPS32: G_STORE [[COPY]](p0), [[FRAME_INDEX3]](p0) :: (store (p0) into %ir.fmt.addr) + ; MIPS32: G_VASTART [[FRAME_INDEX4]](p0) :: (store (s32) into %ir.ap1, align 1) ; MIPS32: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.va_copy), [[FRAME_INDEX5]](p0), [[FRAME_INDEX4]](p0) - ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (dereferenceable load 4 from %ir.aq) + ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (dereferenceable load (p0) from %ir.aq) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD]], [[C]](s32) - ; MIPS32: G_STORE [[PTR_ADD]](p0), [[FRAME_INDEX5]](p0) :: (store 4 into %ir.aq) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.2) - ; MIPS32: G_STORE [[LOAD1]](p0), [[FRAME_INDEX6]](p0) :: (store 4 into %ir.s) - ; MIPS32: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (dereferenceable load 4 from %ir.s) + ; MIPS32: G_STORE [[PTR_ADD]](p0), [[FRAME_INDEX5]](p0) :: (store (p0) into %ir.aq) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.2) + ; MIPS32: G_STORE [[LOAD1]](p0), [[FRAME_INDEX6]](p0) :: (store (p0) into %ir.s) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (dereferenceable load (p0) from %ir.s) ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY4]](p0) ; MIPS32: $a1 = COPY [[LOAD2]](p0) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir index 6ee5f2a4b185c..c60ca3672c9ad 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir @@ -271,13 +271,13 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2) ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load 4 from %fixed-stack.3) + ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[COPY]] ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[COPY]] ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[COPY1]] @@ -306,13 +306,13 @@ body: | %5:_(s32) = COPY $a3 %0:_(s128) = G_MERGE_VALUES %2(s32), %3(s32), %4(s32), %5(s32) %10:_(p0) = G_FRAME_INDEX %fixed-stack.3 - %6:_(s32) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.3, align 4) + %6:_(s32) = G_LOAD %10(p0) :: (load (s32) from %fixed-stack.3, align 4) %11:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %7:_(s32) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.2, align 4) + %7:_(s32) = G_LOAD %11(p0) :: (load (s32) from %fixed-stack.2, align 4) %12:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %8:_(s32) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.1, align 4) + %8:_(s32) = G_LOAD %12(p0) :: (load (s32) from %fixed-stack.1, align 4) %13:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %9:_(s32) = G_LOAD %13(p0) :: (load 4 from %fixed-stack.0, align 4) + %9:_(s32) = G_LOAD %13(p0) :: (load (s32) from %fixed-stack.0, align 4) %1:_(s128) = G_MERGE_VALUES %6(s32), %7(s32), %8(s32), %9(s32) %14:_(s128) = G_ADD %1, %0 %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(s128) @@ -342,16 +342,16 @@ body: | ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store 1 into %ir.pcarry_flag) - ; MIPS32: G_STORE [[ADD]](s32), [[COPY2]](p0) :: (store 4 into %ir.padd) + ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s1) into %ir.pcarry_flag) + ; MIPS32: G_STORE [[ADD]](s32), [[COPY2]](p0) :: (store (s32) into %ir.padd) ; MIPS32: RetRA %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 %3:_(p0) = COPY $a3 %4:_(s32), %5:_(s1) = G_UADDO %0, %1 - G_STORE %5(s1), %3(p0) :: (store 1 into %ir.pcarry_flag) - G_STORE %4(s32), %2(p0) :: (store 4 into %ir.padd) + G_STORE %5(s1), %3(p0) :: (store (s1) into %ir.pcarry_flag) + G_STORE %4(s32), %2(p0) :: (store (s32) into %ir.padd) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add_vec.mir index d0950ea638f81..3b30544693a62 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add_vec.mir @@ -21,18 +21,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:_(<16 x s8>) = G_ADD [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[ADD]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_ADD %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -49,18 +49,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[ADD]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_ADD %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -77,18 +77,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:_(<4 x s32>) = G_ADD [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[ADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_ADD %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -105,18 +105,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[ADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_ADD %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add_vec_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add_vec_builtin.mir index e43a38e2c65cb..ad865007f3d0c 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add_vec_builtin.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add_vec_builtin.mir @@ -40,18 +40,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:_(<16 x s8>) = G_ADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[ADD]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.addv.b), %3(<16 x s8>), %4(<16 x s8>) - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -68,18 +68,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[ADD]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.addv.h), %3(<8 x s16>), %4(<8 x s16>) - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -96,18 +96,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:_(<4 x s32>) = G_ADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[ADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.addv.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -124,18 +124,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[ADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.addv.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -151,15 +151,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128b(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128b(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) ; P5600: [[ADDVI_B:%[0-9]+]]:msa128b(<16 x s8>) = ADDVI_B [[LOAD]](<16 x s8>), 3 - ; P5600: G_STORE [[ADDVI_B]](<16 x s8>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADDVI_B]](<16 x s8>), [[COPY1]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) %3:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.addvi.b), %2(<16 x s8>), 3 - G_STORE %3(<16 x s8>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<16 x s8>), %1(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -175,15 +175,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128h(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128h(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) ; P5600: [[ADDVI_H:%[0-9]+]]:msa128h(<8 x s16>) = ADDVI_H [[LOAD]](<8 x s16>), 18 - ; P5600: G_STORE [[ADDVI_H]](<8 x s16>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADDVI_H]](<8 x s16>), [[COPY1]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) %3:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.addvi.h), %2(<8 x s16>), 18 - G_STORE %3(<8 x s16>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<8 x s16>), %1(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -199,15 +199,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[ADDVI_W:%[0-9]+]]:msa128w(<4 x s32>) = ADDVI_W [[LOAD]](<4 x s32>), 25 - ; P5600: G_STORE [[ADDVI_W]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADDVI_W]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.addvi.w), %2(<4 x s32>), 25 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -223,15 +223,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128d(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128d(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) ; P5600: [[ADDVI_D:%[0-9]+]]:msa128d(<2 x s64>) = ADDVI_D [[LOAD]](<2 x s64>), 31 - ; P5600: G_STORE [[ADDVI_D]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADDVI_D]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.addvi.d), %2(<2 x s64>), 31 - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/dyn_stackalloc.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/dyn_stackalloc.mir index 7f9f561c4b411..2f7d043afcc00 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/dyn_stackalloc.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/dyn_stackalloc.mir @@ -52,10 +52,10 @@ body: | ; MIPS32: $a2 = COPY [[COPY1]](s32) ; MIPS32: JAL &memset, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2 ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[COPY1]](s32) - ; MIPS32: [[COPY5:%[0-9]+]]:_(p0) = COPY [[GEP]](p0) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[COPY1]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; MIPS32: G_STORE [[COPY6]](s32), [[COPY5]](p0) :: (store 1 into %ir.arrayidx) + ; MIPS32: G_STORE [[COPY6]](s32), [[COPY5]](p0) :: (store (s8) into %ir.arrayidx) ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY3]](p0) ; MIPS32: JAL @puts, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $v0 @@ -73,10 +73,10 @@ body: | %8:_(s32) = G_CONSTANT i32 -8 %9:_(s32) = G_AND %7, %8 %10:_(p0) = G_DYN_STACKALLOC %9(s32), 0 - G_MEMSET %10(p0), %0(s8), %1(s32), 0 :: (store 1 into %ir.vla) + G_MEMSET %10(p0), %0(s8), %1(s32), 0 :: (store (s8) into %ir.vla) %11:_(p0) = G_PTR_ADD %10, %1(s32) %12:_(p0) = COPY %11(p0) - G_STORE %13(s8), %12(p0) :: (store 1 into %ir.arrayidx) + G_STORE %13(s8), %12(p0) :: (store (s8) into %ir.arrayidx) ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp $a0 = COPY %10(p0) JAL @puts, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fabs_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fabs_vec.mir index f886cc21a4ffc..1190eb0c30a12 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fabs_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fabs_vec.mir @@ -18,15 +18,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[FABS:%[0-9]+]]:_(<4 x s32>) = G_FABS [[LOAD]] - ; P5600: G_STORE [[FABS]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FABS]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_FABS %2 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -42,15 +42,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) ; P5600: [[FABS:%[0-9]+]]:_(<2 x s64>) = G_FABS [[LOAD]] - ; P5600: G_STORE [[FABS]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FABS]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:_(<2 x s64>) = G_FABS %2 - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fabs_vec_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fabs_vec_builtin.mir index fba7bc8a53195..afab80f00afdd 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fabs_vec_builtin.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fabs_vec_builtin.mir @@ -21,15 +21,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[FMAX_A_W:%[0-9]+]]:msa128w(<4 x s32>) = FMAX_A_W [[LOAD]](<4 x s32>), [[LOAD]](<4 x s32>) - ; P5600: G_STORE [[FMAX_A_W]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FMAX_A_W]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fmax.a.w), %2(<4 x s32>), %2(<4 x s32>) - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -45,15 +45,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128d(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128d(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) ; P5600: [[FMAX_A_D:%[0-9]+]]:msa128d(<2 x s64>) = FMAX_A_D [[LOAD]](<2 x s64>), [[LOAD]](<2 x s64>) - ; P5600: G_STORE [[FMAX_A_D]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FMAX_A_D]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fmax.a.d), %2(<2 x s64>), %2(<2 x s64>) - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fence.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fence.mir index 3a5e42bbb0199..43b9bdfbfe9f1 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fence.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fence.mir @@ -16,12 +16,12 @@ body: | ; MIPS32-LABEL: name: atomic_load_i32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load monotonic 4 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load monotonic (s32) from %ir.ptr) ; MIPS32: G_FENCE 4, 1 ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load monotonic 4 from %ir.ptr) + %1:_(s32) = G_LOAD %0(p0) :: (load monotonic (s32) from %ir.ptr) G_FENCE 4, 1 $v0 = COPY %1(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations.mir index e26310b1a81b9..4060c17690218 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations.mir @@ -28,18 +28,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_FADD %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -56,18 +56,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_FADD %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -84,18 +84,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FSUB:%[0-9]+]]:_(<4 x s32>) = G_FSUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_FSUB %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -112,18 +112,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FSUB:%[0-9]+]]:_(<2 x s64>) = G_FSUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_FSUB %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -140,18 +140,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_FMUL %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -168,18 +168,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FMUL:%[0-9]+]]:_(<2 x s64>) = G_FMUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_FMUL %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -196,18 +196,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FDIV:%[0-9]+]]:_(<4 x s32>) = G_FDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_FDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -224,18 +224,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FDIV:%[0-9]+]]:_(<2 x s64>) = G_FDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_FDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations_builtin.mir index b874df19e13cd..b1fdeeac53628 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations_builtin.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/floating_point_vec_arithmetic_operations_builtin.mir @@ -40,18 +40,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fadd.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -68,18 +68,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fadd.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -96,18 +96,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FSUB:%[0-9]+]]:_(<4 x s32>) = G_FSUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fsub.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -124,18 +124,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FSUB:%[0-9]+]]:_(<2 x s64>) = G_FSUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fsub.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -152,18 +152,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fmul.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -180,18 +180,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FMUL:%[0-9]+]]:_(<2 x s64>) = G_FMUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fmul.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -208,18 +208,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FDIV:%[0-9]+]]:_(<4 x s32>) = G_FDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fdiv.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -236,18 +236,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FDIV:%[0-9]+]]:_(<2 x s64>) = G_FDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fdiv.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec.mir index a5994b6e88fd4..599c8c0c8da9d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec.mir @@ -18,15 +18,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[FSQRT:%[0-9]+]]:_(<4 x s32>) = G_FSQRT [[LOAD]] - ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_FSQRT %2 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -42,15 +42,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) ; P5600: [[FSQRT:%[0-9]+]]:_(<2 x s64>) = G_FSQRT [[LOAD]] - ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:_(<2 x s64>) = G_FSQRT %2 - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec_builtin.mir index e6d31789a4865..36dfdbbdeaf03 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec_builtin.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec_builtin.mir @@ -21,15 +21,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[FSQRT:%[0-9]+]]:_(<4 x s32>) = G_FSQRT [[LOAD]] - ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fsqrt.w), %2(<4 x s32>) - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -45,15 +45,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) ; P5600: [[FSQRT:%[0-9]+]]:_(<2 x s64>) = G_FSQRT [[LOAD]] - ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fsqrt.d), %2(<2 x s64>) - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load.mir index b0365f7c87db3..340fda34e95c7 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load.mir @@ -19,11 +19,11 @@ body: | ; MIPS32-LABEL: name: load_i32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.ptr) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.ptr) $v0 = COPY %1(s32) RetRA implicit $v0 @@ -39,13 +39,13 @@ body: | ; MIPS32-LABEL: name: load_i64 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.ptr) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) ; MIPS32: $v0 = COPY [[UV]](s32) ; MIPS32: $v1 = COPY [[UV1]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 - %1:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) + %1:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) $v0 = COPY %2(s32) $v1 = COPY %3(s32) @@ -63,11 +63,11 @@ body: | ; MIPS32-LABEL: name: load_float ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.ptr) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.ptr) $f0 = COPY %1(s32) RetRA implicit $f0 @@ -83,11 +83,11 @@ body: | ; MIPS32-LABEL: name: load_double ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.ptr) ; MIPS32: $d0 = COPY [[LOAD]](s64) ; MIPS32: RetRA implicit $d0 %0:_(p0) = COPY $a0 - %1:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) + %1:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) $d0 = COPY %1(s64) RetRA implicit $d0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load_4_unaligned.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load_4_unaligned.mir index e6e46e7e1af20..dd76044a41617 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load_4_unaligned.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load_4_unaligned.mir @@ -69,16 +69,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_float_align1 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align1, align 1) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align1, align 1) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 ; MIPS32R6-LABEL: name: load_float_align1 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align1 - ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align1, align 1) + ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align1, align 1) ; MIPS32R6: $f0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $f0 %1:_(p0) = G_GLOBAL_VALUE @float_align1 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align1, align 1) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align1, align 1) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -91,16 +91,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_float_align2 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align2 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align2, align 2) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align2, align 2) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 ; MIPS32R6-LABEL: name: load_float_align2 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align2 - ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align2, align 2) + ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align2, align 2) ; MIPS32R6: $f0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $f0 %1:_(p0) = G_GLOBAL_VALUE @float_align2 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align2, align 2) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align2, align 2) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -113,16 +113,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_float_align4 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align4 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align4) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align4) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 ; MIPS32R6-LABEL: name: load_float_align4 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align4 - ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align4) + ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align4) ; MIPS32R6: $f0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $f0 %1:_(p0) = G_GLOBAL_VALUE @float_align4 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align4) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align4) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -135,16 +135,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_float_align8 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align8 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align8, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align8, align 8) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 ; MIPS32R6-LABEL: name: load_float_align8 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align8 - ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align8, align 8) + ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align8, align 8) ; MIPS32R6: $f0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $f0 %1:_(p0) = G_GLOBAL_VALUE @float_align8 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align8, align 8) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align8, align 8) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -157,16 +157,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_i32_align1 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align1, align 1) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align1, align 1) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 ; MIPS32R6-LABEL: name: load_i32_align1 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align1 - ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align1, align 1) + ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align1, align 1) ; MIPS32R6: $v0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $v0 %1:_(p0) = G_GLOBAL_VALUE @i32_align1 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @i32_align1, align 1) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @i32_align1, align 1) $v0 = COPY %0(s32) RetRA implicit $v0 @@ -179,16 +179,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_i32_align2 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align2 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align2, align 2) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align2, align 2) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 ; MIPS32R6-LABEL: name: load_i32_align2 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align2 - ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align2, align 2) + ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align2, align 2) ; MIPS32R6: $v0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $v0 %1:_(p0) = G_GLOBAL_VALUE @i32_align2 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @i32_align2, align 2) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @i32_align2, align 2) $v0 = COPY %0(s32) RetRA implicit $v0 @@ -201,16 +201,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_i32_align4 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align4 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align4) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align4) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 ; MIPS32R6-LABEL: name: load_i32_align4 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align4 - ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align4) + ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align4) ; MIPS32R6: $v0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $v0 %1:_(p0) = G_GLOBAL_VALUE @i32_align4 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @i32_align4) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @i32_align4) $v0 = COPY %0(s32) RetRA implicit $v0 @@ -223,16 +223,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_i32_align8 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align8 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align8, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align8, align 8) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 ; MIPS32R6-LABEL: name: load_i32_align8 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align8 - ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align8, align 8) + ; MIPS32R6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align8, align 8) ; MIPS32R6: $v0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $v0 %1:_(p0) = G_GLOBAL_VALUE @i32_align8 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @i32_align8, align 8) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @i32_align8, align 8) $v0 = COPY %0(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load_store_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load_store_vec.mir index d191de0ab46aa..cd83d2f797951 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load_store_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/load_store_vec.mir @@ -22,13 +22,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<16 x s8>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<16 x s8>), [[COPY]](p0) :: (store (<16 x s8>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<16 x s8>), %0(p0) :: (store 16 into %ir.a) + %2:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) + G_STORE %2(<16 x s8>), %0(p0) :: (store (<16 x s8>) into %ir.a) RetRA ... @@ -44,13 +44,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<8 x s16>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<8 x s16>), [[COPY]](p0) :: (store (<8 x s16>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<8 x s16>), %0(p0) :: (store 16 into %ir.a) + %2:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) + G_STORE %2(<8 x s16>), %0(p0) :: (store (<8 x s16>) into %ir.a) RetRA ... @@ -66,13 +66,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<4 x s32>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<4 x s32>), %0(p0) :: (store 16 into %ir.a) + %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) + G_STORE %2(<4 x s32>), %0(p0) :: (store (<4 x s32>) into %ir.a) RetRA ... @@ -88,13 +88,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<2 x s64>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<2 x s64>), %0(p0) :: (store 16 into %ir.a) + %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) + G_STORE %2(<2 x s64>), %0(p0) :: (store (<2 x s64>) into %ir.a) RetRA ... @@ -110,13 +110,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<4 x s32>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<4 x s32>), %0(p0) :: (store 16 into %ir.a) + %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) + G_STORE %2(<4 x s32>), %0(p0) :: (store (<4 x s32>) into %ir.a) RetRA ... @@ -132,13 +132,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<2 x s64>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<2 x s64>), %0(p0) :: (store 16 into %ir.a) + %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) + G_STORE %2(<2 x s64>), %0(p0) :: (store (<2 x s64>) into %ir.a) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir index b146aa5ff13d5..c5a35cb1f8808 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -271,13 +271,13 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load 4 from %fixed-stack.3) + ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]] ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]] ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]] @@ -346,13 +346,13 @@ body: | %5:_(s32) = COPY $a3 %0:_(s128) = G_MERGE_VALUES %2(s32), %3(s32), %4(s32), %5(s32) %10:_(p0) = G_FRAME_INDEX %fixed-stack.3 - %6:_(s32) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.3, align 8) + %6:_(s32) = G_LOAD %10(p0) :: (load (s32) from %fixed-stack.3, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %7:_(s32) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.2) + %7:_(s32) = G_LOAD %11(p0) :: (load (s32) from %fixed-stack.2) %12:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %8:_(s32) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.1, align 8) + %8:_(s32) = G_LOAD %12(p0) :: (load (s32) from %fixed-stack.1, align 8) %13:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %9:_(s32) = G_LOAD %13(p0) :: (load 4 from %fixed-stack.0) + %9:_(s32) = G_LOAD %13(p0) :: (load (s32) from %fixed-stack.0) %1:_(s128) = G_MERGE_VALUES %6(s32), %7(s32), %8(s32), %9(s32) %14:_(s128) = G_MUL %1, %0 %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(s128) @@ -446,16 +446,16 @@ body: | ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store 1 into %ir.pcarry_flag) - ; MIPS32: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store 4 into %ir.pmul) + ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s1) into %ir.pcarry_flag) + ; MIPS32: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store (s32) into %ir.pmul) ; MIPS32: RetRA %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 %3:_(p0) = COPY $a3 %4:_(s32), %5:_(s1) = G_UMULO %0, %1 - G_STORE %5(s1), %3(p0) :: (store 1 into %ir.pcarry_flag) - G_STORE %4(s32), %2(p0) :: (store 4 into %ir.pmul) + G_STORE %5(s1), %3(p0) :: (store (s1) into %ir.pcarry_flag) + G_STORE %4(s32), %2(p0) :: (store (s32) into %ir.pmul) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul_vec.mir index 9e9149363cfdc..a0934a4328d27 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul_vec.mir @@ -21,18 +21,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:_(<16 x s8>) = G_MUL [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[MUL]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_MUL %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -49,18 +49,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:_(<8 x s16>) = G_MUL [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[MUL]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_MUL %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -77,18 +77,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[MUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_MUL %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -105,18 +105,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[MUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_MUL %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul_vec_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul_vec_builtin.mir index 6019a62d07443..30fb1cd1866a8 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul_vec_builtin.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul_vec_builtin.mir @@ -28,18 +28,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:_(<16 x s8>) = G_MUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[MUL]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.mulv.b), %3(<16 x s8>), %4(<16 x s8>) - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -56,18 +56,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:_(<8 x s16>) = G_MUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[MUL]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.mulv.h), %3(<8 x s16>), %4(<8 x s16>) - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -84,18 +84,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:_(<4 x s32>) = G_MUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[MUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.mulv.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -112,18 +112,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[MUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.mulv.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/phi.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/phi.mir index e14c3a16b6b62..8ebe509ad3351 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/phi.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/phi.mir @@ -367,9 +367,9 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) @@ -396,9 +396,9 @@ body: | %5:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) %8:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %6:_(s32) = G_LOAD %8(p0) :: (load 4 from %fixed-stack.1, align 8) + %6:_(s32) = G_LOAD %8(p0) :: (load (s32) from %fixed-stack.1, align 8) %9:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %7:_(s32) = G_LOAD %9(p0) :: (load 4 from %fixed-stack.0) + %7:_(s32) = G_LOAD %9(p0) :: (load (s32) from %fixed-stack.0) %2:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) G_BRCOND %0(s1), %bb.2 G_BR %bb.3 @@ -429,8 +429,8 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p0) :: (load 8 from %ir.i64_ptr_a) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8 from %ir.i64_ptr_b) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.i64_ptr_a) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load (s64) from %ir.i64_ptr_b) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] @@ -443,7 +443,7 @@ body: | ; MIPS32: successors: %bb.3(0x80000000) ; MIPS32: bb.3.cond.end: ; MIPS32: [[PHI:%[0-9]+]]:_(s64) = G_PHI [[LOAD]](s64), %bb.1, [[LOAD1]](s64), %bb.2 - ; MIPS32: G_STORE [[PHI]](s64), [[COPY3]](p0) :: (store 8 into %ir.i64_ptr_c) + ; MIPS32: G_STORE [[PHI]](s64), [[COPY3]](p0) :: (store (s64) into %ir.i64_ptr_c) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -453,8 +453,8 @@ body: | %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 %3:_(p0) = COPY $a3 - %5:_(s64) = G_LOAD %1(p0) :: (load 8 from %ir.i64_ptr_a) - %6:_(s64) = G_LOAD %2(p0) :: (load 8 from %ir.i64_ptr_b) + %5:_(s64) = G_LOAD %1(p0) :: (load (s64) from %ir.i64_ptr_a) + %6:_(s64) = G_LOAD %2(p0) :: (load (s64) from %ir.i64_ptr_b) G_BRCOND %0(s1), %bb.2 G_BR %bb.3 @@ -465,7 +465,7 @@ body: | bb.4.cond.end: %7:_(s64) = G_PHI %5(s64), %bb.2, %6(s64), %bb.3 - G_STORE %7(s64), %3(p0) :: (store 8 into %ir.i64_ptr_c) + G_STORE %7(s64), %3(p0) :: (store (s64) into %ir.i64_ptr_c) RetRA ... @@ -529,8 +529,8 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(p0) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load 4 from %ir.f32_ptr_a) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.f32_ptr_b) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.f32_ptr_a) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.f32_ptr_b) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] @@ -543,7 +543,7 @@ body: | ; MIPS32: successors: %bb.3(0x80000000) ; MIPS32: bb.3.cond.end: ; MIPS32: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, [[LOAD1]](s32), %bb.2 - ; MIPS32: G_STORE [[PHI]](s32), [[COPY3]](p0) :: (store 4 into %ir.f32_ptr_c) + ; MIPS32: G_STORE [[PHI]](s32), [[COPY3]](p0) :: (store (s32) into %ir.f32_ptr_c) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -553,8 +553,8 @@ body: | %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 %3:_(p0) = COPY $a3 - %5:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.f32_ptr_a) - %6:_(s32) = G_LOAD %2(p0) :: (load 4 from %ir.f32_ptr_b) + %5:_(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.f32_ptr_a) + %6:_(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.f32_ptr_b) G_BRCOND %0(s1), %bb.2 G_BR %bb.3 @@ -565,7 +565,7 @@ body: | bb.4.cond.end: %7:_(s32) = G_PHI %5(s32), %bb.2, %6(s32), %bb.3 - G_STORE %7(s32), %3(p0) :: (store 4 into %ir.f32_ptr_c) + G_STORE %7(s32), %3(p0) :: (store (s32) into %ir.f32_ptr_c) RetRA ... @@ -583,7 +583,7 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 ; MIPS32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] @@ -604,7 +604,7 @@ body: | %0:_(s64) = COPY $d6 %1:_(s64) = COPY $d7 %4:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %3:_(s32) = G_LOAD %4(p0) :: (load 4 from %fixed-stack.0, align 8) + %3:_(s32) = G_LOAD %4(p0) :: (load (s32) from %fixed-stack.0, align 8) %2:_(s1) = G_TRUNC %3(s32) G_BRCOND %2(s1), %bb.2 G_BR %bb.3 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/pointers.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/pointers.mir index 1176a25bc20f9..805298ebd76ea 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/pointers.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/pointers.mir @@ -18,11 +18,11 @@ body: | ; MIPS32-LABEL: name: ptr_arg_in_regs ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.p) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.p) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p) $v0 = COPY %1(s32) RetRA implicit $v0 @@ -44,8 +44,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.p) + ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.p) ; MIPS32: $v0 = COPY [[LOAD1]](s32) ; MIPS32: RetRA implicit $v0 %0:_(s32) = COPY $a0 @@ -53,8 +53,8 @@ body: | %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %5:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %4:_(p0) = G_LOAD %5(p0) :: (load 4 from %fixed-stack.0, align 4) - %6:_(s32) = G_LOAD %4(p0) :: (load 4 from %ir.p) + %4:_(p0) = G_LOAD %5(p0) :: (load (p0) from %fixed-stack.0, align 4) + %6:_(s32) = G_LOAD %4(p0) :: (load (s32) from %ir.p) $v0 = COPY %6(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/rem_and_div_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/rem_and_div_vec.mir index c66f567fc79fe..06be78b4533bc 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/rem_and_div_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/rem_and_div_vec.mir @@ -36,18 +36,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:_(<16 x s8>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_SDIV %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -64,18 +64,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:_(<8 x s16>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_SDIV %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -92,18 +92,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:_(<4 x s32>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_SDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -120,18 +120,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:_(<2 x s64>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_SDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -148,18 +148,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:_(<16 x s8>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_SREM %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -176,18 +176,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:_(<8 x s16>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_SREM %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -204,18 +204,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:_(<4 x s32>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_SREM %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -232,18 +232,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:_(<2 x s64>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_SREM %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -260,18 +260,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:_(<16 x s8>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_UDIV %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -288,18 +288,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:_(<8 x s16>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_UDIV %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -316,18 +316,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:_(<4 x s32>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_UDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -344,18 +344,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:_(<2 x s64>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_UDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -372,18 +372,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:_(<16 x s8>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_UREM %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -400,18 +400,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:_(<8 x s16>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_UREM %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -428,18 +428,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:_(<4 x s32>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_UREM %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -456,18 +456,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:_(<2 x s64>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_UREM %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/rem_and_div_vec_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/rem_and_div_vec_builtin.mir index 402313976b4fe..cd2dfc4630807 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/rem_and_div_vec_builtin.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/rem_and_div_vec_builtin.mir @@ -64,18 +64,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:_(<16 x s8>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.div.s.b), %3(<16 x s8>), %4(<16 x s8>) - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -92,18 +92,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:_(<8 x s16>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.div.s.h), %3(<8 x s16>), %4(<8 x s16>) - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -120,18 +120,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:_(<4 x s32>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.div.s.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -148,18 +148,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:_(<2 x s64>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.div.s.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -176,18 +176,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:_(<16 x s8>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.mod.s.b), %3(<16 x s8>), %4(<16 x s8>) - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -204,18 +204,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:_(<8 x s16>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.mod.s.h), %3(<8 x s16>), %4(<8 x s16>) - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -232,18 +232,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:_(<4 x s32>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.mod.s.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -260,18 +260,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:_(<2 x s64>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.mod.s.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -288,18 +288,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:_(<16 x s8>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.div.u.b), %3(<16 x s8>), %4(<16 x s8>) - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -316,18 +316,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:_(<8 x s16>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.div.u.h), %3(<8 x s16>), %4(<8 x s16>) - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -344,18 +344,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:_(<4 x s32>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.div.u.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -372,18 +372,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:_(<2 x s64>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.div.u.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -400,18 +400,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:_(<16 x s8>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.mod.u.b), %3(<16 x s8>), %4(<16 x s8>) - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -428,18 +428,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:_(<8 x s16>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.mod.u.h), %3(<8 x s16>), %4(<8 x s16>) - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -456,18 +456,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:_(<4 x s32>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.mod.u.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -484,18 +484,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:_(<2 x s64>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.mod.u.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir index 6f46f151dc219..d67706c1c1f43 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/select.mir @@ -190,9 +190,9 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) @@ -208,9 +208,9 @@ body: | %5:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) %8:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %6:_(s32) = G_LOAD %8(p0) :: (load 4 from %fixed-stack.1, align 8) + %6:_(s32) = G_LOAD %8(p0) :: (load (s32) from %fixed-stack.1, align 8) %9:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %7:_(s32) = G_LOAD %9(p0) :: (load 4 from %fixed-stack.0) + %7:_(s32) = G_LOAD %9(p0) :: (load (s32) from %fixed-stack.0) %2:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) %10:_(s64) = G_SELECT %0(s1), %1, %2 %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %10(s64) @@ -262,7 +262,7 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 ; MIPS32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] @@ -272,7 +272,7 @@ body: | %0:_(s64) = COPY $d6 %1:_(s64) = COPY $d7 %4:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %3:_(s32) = G_LOAD %4(p0) :: (load 4 from %fixed-stack.0, align 8) + %3:_(s32) = G_LOAD %4(p0) :: (load (s32) from %fixed-stack.0, align 8) %2:_(s1) = G_TRUNC %3(s32) %5:_(s64) = G_SELECT %2(s1), %0, %1 $d0 = COPY %5(s64) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/stack_args.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/stack_args.mir index f389bbc552170..a8080ac65bf67 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/stack_args.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/stack_args.mir @@ -23,7 +23,7 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) ; MIPS32: $a1 = COPY [[COPY1]](s32) @@ -32,7 +32,7 @@ body: | ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) - ; MIPS32: G_STORE [[LOAD]](s32), [[GEP]](p0) :: (store 4 into stack + 16) + ; MIPS32: G_STORE [[LOAD]](s32), [[GEP]](p0) :: (store (s32) into stack + 16) ; MIPS32: JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0 ; MIPS32: ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp @@ -43,7 +43,7 @@ body: | %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %5:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %4:_(s32) = G_LOAD %5(p0) :: (load 4 from %fixed-stack.0, align 8) + %4:_(s32) = G_LOAD %5(p0) :: (load (s32) from %fixed-stack.0, align 8) ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp $a0 = COPY %0(s32) $a1 = COPY %1(s32) @@ -52,7 +52,7 @@ body: | %7:_(p0) = COPY $sp %8:_(s32) = G_CONSTANT i32 16 %9:_(p0) = G_PTR_ADD %7, %8(s32) - G_STORE %4(s32), %9(p0) :: (store 4 into stack + 16, align 4) + G_STORE %4(s32), %9(p0) :: (store (s32) into stack + 16, align 4) JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 %6:_(s32) = COPY $v0 ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store.mir index e6f45e9b5c172..8b9980234b841 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store.mir @@ -21,11 +21,11 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store 4 into %ir.ptr) + ; MIPS32: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32) into %ir.ptr) ; MIPS32: RetRA %0:_(s32) = COPY $a0 %1:_(p0) = COPY $a1 - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.ptr) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.ptr) RetRA ... @@ -44,13 +44,13 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; MIPS32: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; MIPS32: G_STORE [[MV]](s64), [[COPY2]](p0) :: (store 8 into %ir.ptr) + ; MIPS32: G_STORE [[MV]](s64), [[COPY2]](p0) :: (store (s64) into %ir.ptr) ; MIPS32: RetRA %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %1:_(p0) = COPY $a2 - G_STORE %0(s64), %1(p0) :: (store 8 into %ir.ptr) + G_STORE %0(s64), %1(p0) :: (store (s64) into %ir.ptr) RetRA ... @@ -67,11 +67,11 @@ body: | ; MIPS32: liveins: $a1, $f12 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store 4 into %ir.ptr) + ; MIPS32: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32) into %ir.ptr) ; MIPS32: RetRA %0:_(s32) = COPY $f12 %1:_(p0) = COPY $a1 - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.ptr) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.ptr) RetRA ... @@ -88,11 +88,11 @@ body: | ; MIPS32: liveins: $a2, $d6 ; MIPS32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a2 - ; MIPS32: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store 8 into %ir.ptr) + ; MIPS32: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64) into %ir.ptr) ; MIPS32: RetRA %0:_(s64) = COPY $d6 %1:_(p0) = COPY $a2 - G_STORE %0(s64), %1(p0) :: (store 8 into %ir.ptr) + G_STORE %0(s64), %1(p0) :: (store (s64) into %ir.ptr) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store_4_unaligned.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store_4_unaligned.mir index ad6eaf4ed9582..23d28a3789c71 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store_4_unaligned.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store_4_unaligned.mir @@ -73,17 +73,17 @@ body: | ; MIPS32: liveins: $f12 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align1 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align1, align 1) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align1, align 1) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_float_align1 ; MIPS32R6: liveins: $f12 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align1 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align1, align 1) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align1, align 1) ; MIPS32R6: RetRA %0:_(s32) = COPY $f12 %1:_(p0) = G_GLOBAL_VALUE @float_align1 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align1, align 1) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align1, align 1) RetRA ... @@ -99,17 +99,17 @@ body: | ; MIPS32: liveins: $f12 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align2 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align2, align 2) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align2, align 2) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_float_align2 ; MIPS32R6: liveins: $f12 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align2 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align2, align 2) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align2, align 2) ; MIPS32R6: RetRA %0:_(s32) = COPY $f12 %1:_(p0) = G_GLOBAL_VALUE @float_align2 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align2, align 2) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align2, align 2) RetRA ... @@ -125,17 +125,17 @@ body: | ; MIPS32: liveins: $f12 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align4 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align4) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_float_align4 ; MIPS32R6: liveins: $f12 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align4 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align4) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align4) ; MIPS32R6: RetRA %0:_(s32) = COPY $f12 %1:_(p0) = G_GLOBAL_VALUE @float_align4 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align4) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align4) RetRA ... @@ -151,17 +151,17 @@ body: | ; MIPS32: liveins: $f12 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align8 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align8, align 8) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align8, align 8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_float_align8 ; MIPS32R6: liveins: $f12 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @float_align8 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align8, align 8) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align8, align 8) ; MIPS32R6: RetRA %0:_(s32) = COPY $f12 %1:_(p0) = G_GLOBAL_VALUE @float_align8 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align8, align 8) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align8, align 8) RetRA ... @@ -177,17 +177,17 @@ body: | ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align1 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align1, align 1) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align1, align 1) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i32_align1 ; MIPS32R6: liveins: $a0 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align1 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align1, align 1) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align1, align 1) ; MIPS32R6: RetRA %0:_(s32) = COPY $a0 %1:_(p0) = G_GLOBAL_VALUE @i32_align1 - G_STORE %0(s32), %1(p0) :: (store 4 into @i32_align1, align 1) + G_STORE %0(s32), %1(p0) :: (store (s32) into @i32_align1, align 1) RetRA ... @@ -203,17 +203,17 @@ body: | ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align2 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align2, align 2) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align2, align 2) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i32_align2 ; MIPS32R6: liveins: $a0 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align2 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align2, align 2) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align2, align 2) ; MIPS32R6: RetRA %0:_(s32) = COPY $a0 %1:_(p0) = G_GLOBAL_VALUE @i32_align2 - G_STORE %0(s32), %1(p0) :: (store 4 into @i32_align2, align 2) + G_STORE %0(s32), %1(p0) :: (store (s32) into @i32_align2, align 2) RetRA ... @@ -229,17 +229,17 @@ body: | ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align4 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align4) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i32_align4 ; MIPS32R6: liveins: $a0 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align4 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align4) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align4) ; MIPS32R6: RetRA %0:_(s32) = COPY $a0 %1:_(p0) = G_GLOBAL_VALUE @i32_align4 - G_STORE %0(s32), %1(p0) :: (store 4 into @i32_align4) + G_STORE %0(s32), %1(p0) :: (store (s32) into @i32_align4) RetRA ... @@ -255,17 +255,17 @@ body: | ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align8 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align8, align 8) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align8, align 8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i32_align8 ; MIPS32R6: liveins: $a0 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_align8 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align8, align 8) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align8, align 8) ; MIPS32R6: RetRA %0:_(s32) = COPY $a0 %1:_(p0) = G_GLOBAL_VALUE @i32_align8 - G_STORE %0(s32), %1(p0) :: (store 4 into @i32_align8, align 8) + G_STORE %0(s32), %1(p0) :: (store (s32) into @i32_align8, align 8) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store_split_because_of_memsize_or_align.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store_split_because_of_memsize_or_align.mir index 9e33c94a78a4d..fb5a53cc5c344 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store_split_because_of_memsize_or_align.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/store_split_because_of_memsize_or_align.mir @@ -223,13 +223,13 @@ body: | ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 1 into %ir.0) + ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s8) into %ir.0) ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 1) + ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into %ir.0 + 1) ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C3]](s32) - ; MIPS32: G_STORE [[LSHR1]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 2) + ; MIPS32: G_STORE [[LSHR1]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 2) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store3align1 ; MIPS32R6: liveins: $a0, $a1 @@ -238,15 +238,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.0, align 1) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.0, align 1) ; MIPS32R6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32R6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 2) + ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 2) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s24) = G_TRUNC %1(s32) - G_STORE %2(s24), %0(p0) :: (store 3 into %ir.0, align 1) + G_STORE %2(s24), %0(p0) :: (store (s24) into %ir.0, align 1) RetRA ... @@ -265,10 +265,10 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.0) + ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.0) ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 2, align 2) + ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 2, align 2) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store3align2 ; MIPS32R6: liveins: $a0, $a1 @@ -277,15 +277,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.0) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.0) ; MIPS32R6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32R6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 2, align 2) + ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 2, align 2) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s24) = G_TRUNC %1(s32) - G_STORE %2(s24), %0(p0) :: (store 3 into %ir.0, align 2) + G_STORE %2(s24), %0(p0) :: (store (s24) into %ir.0, align 2) RetRA ... @@ -304,10 +304,10 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.0, align 4) + ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.0, align 4) ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 2, align 2, basealign 4) + ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 2, align 2, basealign 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store3align4 ; MIPS32R6: liveins: $a0, $a1 @@ -316,15 +316,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.0, align 4) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.0, align 4) ; MIPS32R6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32R6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 2, align 2, basealign 4) + ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 2, align 2, basealign 4) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s24) = G_TRUNC %1(s32) - G_STORE %2(s24), %0(p0) :: (store 3 into %ir.0, align 4) + G_STORE %2(s24), %0(p0) :: (store (s24) into %ir.0, align 4) RetRA ... @@ -343,10 +343,10 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.0, align 8) + ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.0, align 8) ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 2, align 2, basealign 8) + ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 2, align 2, basealign 8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store3align8 ; MIPS32R6: liveins: $a0, $a1 @@ -355,15 +355,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.0, align 8) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.0, align 8) ; MIPS32R6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; MIPS32R6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) - ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 2, align 2, basealign 8) + ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 2, align 2, basealign 8) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(s24) = G_TRUNC %1(s32) - G_STORE %2(s24), %0(p0) :: (store 3 into %ir.0, align 8) + G_STORE %2(s24), %0(p0) :: (store (s24) into %ir.0, align 8) RetRA ... @@ -382,8 +382,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 1) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 1) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store5align1 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -392,15 +392,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 1) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 1) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s40) = G_TRUNC %1(s64) - G_STORE %4(s40), %0(p0) :: (store 5 into %ir.0, align 1) + G_STORE %4(s40), %0(p0) :: (store (s40) into %ir.0, align 1) RetRA ... @@ -419,8 +419,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 2) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4, align 2) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 2) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4, align 2) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store5align2 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -429,15 +429,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 2) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4, align 2) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 2) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4, align 2) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s40) = G_TRUNC %1(s64) - G_STORE %4(s40), %0(p0) :: (store 5 into %ir.0, align 2) + G_STORE %4(s40), %0(p0) :: (store (s40) into %ir.0, align 2) RetRA ... @@ -456,8 +456,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4, align 4) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4, align 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store5align4 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -466,15 +466,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4, align 4) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4, align 4) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s40) = G_TRUNC %1(s64) - G_STORE %4(s40), %0(p0) :: (store 5 into %ir.0, align 4) + G_STORE %4(s40), %0(p0) :: (store (s40) into %ir.0, align 4) RetRA ... @@ -493,8 +493,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 8) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4, align 4, basealign 8) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 8) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4, align 4, basealign 8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store5align8 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -503,15 +503,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 8) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4, align 4, basealign 8) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 8) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4, align 4, basealign 8) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s40) = G_TRUNC %1(s64) - G_STORE %4(s40), %0(p0) :: (store 5 into %ir.0, align 8) + G_STORE %4(s40), %0(p0) :: (store (s40) into %ir.0, align 8) RetRA ... @@ -530,13 +530,13 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 1) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 1) ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4) ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 5) + ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into %ir.0 + 5) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store6align1 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -545,15 +545,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 1) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 1) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 1) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 1) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s48) = G_TRUNC %1(s64) - G_STORE %4(s48), %0(p0) :: (store 6 into %ir.0, align 1) + G_STORE %4(s48), %0(p0) :: (store (s48) into %ir.0, align 1) RetRA ... @@ -572,8 +572,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 2) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 2) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store6align2 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -582,15 +582,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 2) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 2) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s48) = G_TRUNC %1(s64) - G_STORE %4(s48), %0(p0) :: (store 6 into %ir.0, align 2) + G_STORE %4(s48), %0(p0) :: (store (s48) into %ir.0, align 2) RetRA ... @@ -609,8 +609,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 4) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store6align4 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -619,15 +619,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 4) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 4) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s48) = G_TRUNC %1(s64) - G_STORE %4(s48), %0(p0) :: (store 6 into %ir.0, align 4) + G_STORE %4(s48), %0(p0) :: (store (s48) into %ir.0, align 4) RetRA ... @@ -646,8 +646,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 8) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 4, basealign 8) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 8) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 4, basealign 8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store6align8 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -656,15 +656,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 8) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 4, basealign 8) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 8) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 4, basealign 8) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s48) = G_TRUNC %1(s64) - G_STORE %4(s48), %0(p0) :: (store 6 into %ir.0, align 8) + G_STORE %4(s48), %0(p0) :: (store (s48) into %ir.0, align 8) RetRA ... @@ -683,18 +683,13 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 1) - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 1) + ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C2]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 1 into %ir.0 + 4) - ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C3]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD2]](p0) :: (store 1 into %ir.0 + 5) - ; MIPS32: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C4]](s32) - ; MIPS32: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 6) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s8) into %ir.0 + 4) + ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) + ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into %ir.0 + 5) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store7align1 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -703,20 +698,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 1) - ; MIPS32R6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32R6: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 1) - ; MIPS32R6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32R6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 6) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 1) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 1) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s56) = G_TRUNC %1(s64) - G_STORE %4(s56), %0(p0) :: (store 7 into %ir.0, align 1) + G_STORE %4(s56), %0(p0) :: (store (s42) into %ir.0, align 1) RetRA ... @@ -735,13 +725,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 2) - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 6, align 2) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 2) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store7align2 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -750,20 +735,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 2) - ; MIPS32R6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32R6: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4) - ; MIPS32R6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32R6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 6, align 2) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 2) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s56) = G_TRUNC %1(s64) - G_STORE %4(s56), %0(p0) :: (store 7 into %ir.0, align 2) + G_STORE %4(s56), %0(p0) :: (store (s42) into %ir.0, align 2) RetRA ... @@ -782,13 +762,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0) - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 4) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 6, align 2, basealign 4) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store7align4 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -797,20 +772,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0) - ; MIPS32R6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32R6: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 4) - ; MIPS32R6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32R6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 6, align 2, basealign 4) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 4) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s56) = G_TRUNC %1(s64) - G_STORE %4(s56), %0(p0) :: (store 7 into %ir.0, align 4) + G_STORE %4(s56), %0(p0) :: (store (s42) into %ir.0, align 4) RetRA ... @@ -829,13 +799,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 8) - ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 4, basealign 8) - ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 6, align 2, basealign 8) + ; MIPS32: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 8) + ; MIPS32: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 4, basealign 8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store7align8 ; MIPS32R6: liveins: $a0, $a2, $a3 @@ -844,20 +809,15 @@ body: | ; MIPS32R6: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32R6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32R6: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4 into %ir.0, align 8) - ; MIPS32R6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; MIPS32R6: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s32) - ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store 2 into %ir.0 + 4, align 4, basealign 8) - ; MIPS32R6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; MIPS32R6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32) - ; MIPS32R6: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 6, align 2, basealign 8) + ; MIPS32R6: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32) into %ir.0, align 8) + ; MIPS32R6: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s16) into %ir.0 + 4, align 4, basealign 8) ; MIPS32R6: RetRA %0:_(p0) = COPY $a0 %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %4:_(s56) = G_TRUNC %1(s64) - G_STORE %4(s56), %0(p0) :: (store 7 into %ir.0, align 8) + G_STORE %4(s56), %0(p0) :: (store (s42) into %ir.0, align 8) RetRA ... @@ -876,18 +836,18 @@ body: | ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s32) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; MIPS32: G_STORE [[UV]](s32), [[GV]](p0) :: (store 4 into @double_align1, align 1) - ; MIPS32: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store 4 into @double_align1 + 4, align 1) + ; MIPS32: G_STORE [[UV]](s32), [[GV]](p0) :: (store (s32) into @double_align1, align 1) + ; MIPS32: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store (s32) into @double_align1 + 4, align 1) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_double_align1 ; MIPS32R6: liveins: $d6 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @double_align1 - ; MIPS32R6: G_STORE [[COPY]](s64), [[GV]](p0) :: (store 8 into @double_align1, align 1) + ; MIPS32R6: G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @double_align1, align 1) ; MIPS32R6: RetRA %0:_(s64) = COPY $d6 %1:_(p0) = G_GLOBAL_VALUE @double_align1 - G_STORE %0(s64), %1(p0) :: (store 8 into @double_align1, align 1) + G_STORE %0(s64), %1(p0) :: (store (s64) into @double_align1, align 1) RetRA ... @@ -906,18 +866,18 @@ body: | ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s32) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; MIPS32: G_STORE [[UV]](s32), [[GV]](p0) :: (store 4 into @double_align2, align 2) - ; MIPS32: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store 4 into @double_align2 + 4, align 2) + ; MIPS32: G_STORE [[UV]](s32), [[GV]](p0) :: (store (s32) into @double_align2, align 2) + ; MIPS32: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store (s32) into @double_align2 + 4, align 2) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_double_align2 ; MIPS32R6: liveins: $d6 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @double_align2 - ; MIPS32R6: G_STORE [[COPY]](s64), [[GV]](p0) :: (store 8 into @double_align2, align 2) + ; MIPS32R6: G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @double_align2, align 2) ; MIPS32R6: RetRA %0:_(s64) = COPY $d6 %1:_(p0) = G_GLOBAL_VALUE @double_align2 - G_STORE %0(s64), %1(p0) :: (store 8 into @double_align2, align 2) + G_STORE %0(s64), %1(p0) :: (store (s64) into @double_align2, align 2) RetRA ... @@ -936,18 +896,18 @@ body: | ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s32) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; MIPS32: G_STORE [[UV]](s32), [[GV]](p0) :: (store 4 into @double_align4) - ; MIPS32: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store 4 into @double_align4 + 4) + ; MIPS32: G_STORE [[UV]](s32), [[GV]](p0) :: (store (s32) into @double_align4) + ; MIPS32: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store (s32) into @double_align4 + 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_double_align4 ; MIPS32R6: liveins: $d6 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @double_align4 - ; MIPS32R6: G_STORE [[COPY]](s64), [[GV]](p0) :: (store 8 into @double_align4, align 4) + ; MIPS32R6: G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @double_align4, align 4) ; MIPS32R6: RetRA %0:_(s64) = COPY $d6 %1:_(p0) = G_GLOBAL_VALUE @double_align4 - G_STORE %0(s64), %1(p0) :: (store 8 into @double_align4, align 4) + G_STORE %0(s64), %1(p0) :: (store (s64) into @double_align4, align 4) RetRA ... @@ -963,17 +923,17 @@ body: | ; MIPS32: liveins: $d6 ; MIPS32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @double_align8 - ; MIPS32: G_STORE [[COPY]](s64), [[GV]](p0) :: (store 8 into @double_align8) + ; MIPS32: G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @double_align8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_double_align8 ; MIPS32R6: liveins: $d6 ; MIPS32R6: [[COPY:%[0-9]+]]:_(s64) = COPY $d6 ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @double_align8 - ; MIPS32R6: G_STORE [[COPY]](s64), [[GV]](p0) :: (store 8 into @double_align8) + ; MIPS32R6: G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @double_align8) ; MIPS32R6: RetRA %0:_(s64) = COPY $d6 %1:_(p0) = G_GLOBAL_VALUE @double_align8 - G_STORE %0(s64), %1(p0) :: (store 8 into @double_align8) + G_STORE %0(s64), %1(p0) :: (store (s64) into @double_align8) RetRA ... @@ -992,8 +952,8 @@ body: | ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i64_align1 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s32) - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i64_align1, align 1) - ; MIPS32: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store 4 into @i64_align1 + 4, align 1) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i64_align1, align 1) + ; MIPS32: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into @i64_align1 + 4, align 1) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i64_align1 ; MIPS32R6: liveins: $a0, $a1 @@ -1001,13 +961,13 @@ body: | ; MIPS32R6: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32R6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i64_align1 - ; MIPS32R6: G_STORE [[MV]](s64), [[GV]](p0) :: (store 8 into @i64_align1, align 1) + ; MIPS32R6: G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @i64_align1, align 1) ; MIPS32R6: RetRA %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) %3:_(p0) = G_GLOBAL_VALUE @i64_align1 - G_STORE %0(s64), %3(p0) :: (store 8 into @i64_align1, align 1) + G_STORE %0(s64), %3(p0) :: (store (s64) into @i64_align1, align 1) RetRA ... @@ -1026,8 +986,8 @@ body: | ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i64_align2 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s32) - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i64_align2, align 2) - ; MIPS32: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store 4 into @i64_align2 + 4, align 2) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i64_align2, align 2) + ; MIPS32: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into @i64_align2 + 4, align 2) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i64_align2 ; MIPS32R6: liveins: $a0, $a1 @@ -1035,13 +995,13 @@ body: | ; MIPS32R6: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32R6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i64_align2 - ; MIPS32R6: G_STORE [[MV]](s64), [[GV]](p0) :: (store 8 into @i64_align2, align 2) + ; MIPS32R6: G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @i64_align2, align 2) ; MIPS32R6: RetRA %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) %3:_(p0) = G_GLOBAL_VALUE @i64_align2 - G_STORE %0(s64), %3(p0) :: (store 8 into @i64_align2, align 2) + G_STORE %0(s64), %3(p0) :: (store (s64) into @i64_align2, align 2) RetRA ... @@ -1060,8 +1020,8 @@ body: | ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i64_align4 ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s32) - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i64_align4) - ; MIPS32: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store 4 into @i64_align4 + 4) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i64_align4) + ; MIPS32: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into @i64_align4 + 4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i64_align4 ; MIPS32R6: liveins: $a0, $a1 @@ -1069,13 +1029,13 @@ body: | ; MIPS32R6: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32R6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i64_align4 - ; MIPS32R6: G_STORE [[MV]](s64), [[GV]](p0) :: (store 8 into @i64_align4, align 4) + ; MIPS32R6: G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @i64_align4, align 4) ; MIPS32R6: RetRA %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) %3:_(p0) = G_GLOBAL_VALUE @i64_align4 - G_STORE %0(s64), %3(p0) :: (store 8 into @i64_align4, align 4) + G_STORE %0(s64), %3(p0) :: (store (s64) into @i64_align4, align 4) RetRA ... @@ -1093,7 +1053,7 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i64_align8 - ; MIPS32: G_STORE [[MV]](s64), [[GV]](p0) :: (store 8 into @i64_align8) + ; MIPS32: G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @i64_align8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i64_align8 ; MIPS32R6: liveins: $a0, $a1 @@ -1101,13 +1061,13 @@ body: | ; MIPS32R6: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32R6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; MIPS32R6: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i64_align8 - ; MIPS32R6: G_STORE [[MV]](s64), [[GV]](p0) :: (store 8 into @i64_align8) + ; MIPS32R6: G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @i64_align8) ; MIPS32R6: RetRA %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) %3:_(p0) = G_GLOBAL_VALUE @i64_align8 - G_STORE %0(s64), %3(p0) :: (store 8 into @i64_align8) + G_STORE %0(s64), %3(p0) :: (store (s64) into @i64_align8) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir index 3f018e8fce1fe..e921f5c544039 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub.mir @@ -270,13 +270,13 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.2, align 8) ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load 4 from %fixed-stack.3) + ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.3) ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[LOAD]], [[COPY]] ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[LOAD]](s32), [[COPY]] ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LOAD1]], [[COPY1]] @@ -317,13 +317,13 @@ body: | %5:_(s32) = COPY $a3 %0:_(s128) = G_MERGE_VALUES %2(s32), %3(s32), %4(s32), %5(s32) %10:_(p0) = G_FRAME_INDEX %fixed-stack.3 - %6:_(s32) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.3, align 8) + %6:_(s32) = G_LOAD %10(p0) :: (load (s32) from %fixed-stack.3, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %7:_(s32) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.2, align 4) + %7:_(s32) = G_LOAD %11(p0) :: (load (s32) from %fixed-stack.2, align 4) %12:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %8:_(s32) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.1, align 8) + %8:_(s32) = G_LOAD %12(p0) :: (load (s32) from %fixed-stack.1, align 8) %13:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %9:_(s32) = G_LOAD %13(p0) :: (load 4 from %fixed-stack.0, align 4) + %9:_(s32) = G_LOAD %13(p0) :: (load (s32) from %fixed-stack.0, align 4) %1:_(s128) = G_MERGE_VALUES %6(s32), %7(s32), %8(s32), %9(s32) %14:_(s128) = G_SUB %1, %0 %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(s128) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub_vec.mir index b4e8a30c6de44..9b8d977b39113 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub_vec.mir @@ -21,18 +21,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:_(<16 x s8>) = G_SUB [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[SUB]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_SUB %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -49,18 +49,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[SUB]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_SUB %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -77,18 +77,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:_(<4 x s32>) = G_SUB [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[SUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_SUB %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -105,18 +105,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:_(<2 x s64>) = G_SUB [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[SUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_SUB %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub_vec_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub_vec_builtin.mir index bda2a1c15c5cd..efa9f473a9253 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub_vec_builtin.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/sub_vec_builtin.mir @@ -40,18 +40,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:_(<16 x s8>) = G_SUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SUB]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.subv.b), %3(<16 x s8>), %4(<16 x s8>) - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -68,18 +68,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:_(<8 x s16>) = G_SUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SUB]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.subv.h), %3(<8 x s16>), %4(<8 x s16>) - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -96,18 +96,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:_(<4 x s32>) = G_SUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.subv.w), %3(<4 x s32>), %4(<4 x s32>) - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -124,18 +124,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:_(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:_(<2 x s64>) = G_SUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.subv.d), %3(<2 x s64>), %4(<2 x s64>) - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -151,15 +151,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128b(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128b(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) ; P5600: [[SUBVI_B:%[0-9]+]]:msa128b(<16 x s8>) = SUBVI_B [[LOAD]](<16 x s8>), 3 - ; P5600: G_STORE [[SUBVI_B]](<16 x s8>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUBVI_B]](<16 x s8>), [[COPY1]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) %3:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.mips.subvi.b), %2(<16 x s8>), 3 - G_STORE %3(<16 x s8>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<16 x s8>), %1(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -175,15 +175,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128h(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128h(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) ; P5600: [[SUBVI_H:%[0-9]+]]:msa128h(<8 x s16>) = SUBVI_H [[LOAD]](<8 x s16>), 18 - ; P5600: G_STORE [[SUBVI_H]](<8 x s16>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUBVI_H]](<8 x s16>), [[COPY1]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) %3:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.mips.subvi.h), %2(<8 x s16>), 18 - G_STORE %3(<8 x s16>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<8 x s16>), %1(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -199,15 +199,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[SUBVI_W:%[0-9]+]]:msa128w(<4 x s32>) = SUBVI_W [[LOAD]](<4 x s32>), 25 - ; P5600: G_STORE [[SUBVI_W]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUBVI_W]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.subvi.w), %2(<4 x s32>), 25 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -223,15 +223,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:msa128d(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:msa128d(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) ; P5600: [[SUBVI_D:%[0-9]+]]:msa128d(<2 x s64>) = SUBVI_D [[LOAD]](<2 x s64>), 31 - ; P5600: G_STORE [[SUBVI_D]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUBVI_D]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.subvi.d), %2(<2 x s64>), 31 - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir index 740652574c6a7..02d01c18791be 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir @@ -21,11 +21,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_load1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_LOAD %0(p0) :: (load 1 from %ir.px) + %2:_(s32) = G_LOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -41,11 +41,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_load2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2 from %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_LOAD %0(p0) :: (load 2 from %ir.px) + %2:_(s32) = G_LOAD %0(p0) :: (load (s16) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -62,16 +62,16 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load 1 from %ir.py) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s1) from %ir.py) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY]](p0) :: (store 1 into %ir.px) + ; MIPS32: G_STORE [[AND]](s32), [[COPY]](p0) :: (store (s1) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s1) = G_LOAD %1(p0) :: (load 1 from %ir.py) - G_STORE %2(s1), %0(p0) :: (store 1 into %ir.px) + %2:_(s1) = G_LOAD %1(p0) :: (load (s1) from %ir.py) + G_STORE %2(s1), %0(p0) :: (store (s1) into %ir.px) RetRA ... @@ -87,14 +87,14 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load 1 from %ir.py) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.py) ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 1 into %ir.px) + ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s8) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s8) = G_LOAD %1(p0) :: (load 1 from %ir.py) - G_STORE %2(s8), %0(p0) :: (store 1 into %ir.px) + %2:_(s8) = G_LOAD %1(p0) :: (load (s8) from %ir.py) + G_STORE %2(s8), %0(p0) :: (store (s8) into %ir.px) RetRA ... @@ -110,14 +110,14 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load 2 from %ir.py) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s16) from %ir.py) ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.px) + ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s16) = G_LOAD %1(p0) :: (load 2 from %ir.py) - G_STORE %2(s16), %0(p0) :: (store 2 into %ir.px) + %2:_(s16) = G_LOAD %1(p0) :: (load (s16) from %ir.py) + G_STORE %2(s16), %0(p0) :: (store (s16) into %ir.px) RetRA ... @@ -133,13 +133,13 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load 4 from %ir.py) - ; MIPS32: G_STORE [[LOAD]](s32), [[COPY]](p0) :: (store 4 into %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.py) + ; MIPS32: G_STORE [[LOAD]](s32), [[COPY]](p0) :: (store (s32) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.py) - G_STORE %2(s32), %0(p0) :: (store 4 into %ir.px) + %2:_(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.py) + G_STORE %2(s32), %0(p0) :: (store (s32) into %ir.px) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir index 871e9ce673f33..3924d914fc62f 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir @@ -57,30 +57,30 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: G_STORE [[COPY1]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %fixed-stack.1) + ; MIPS32: G_STORE [[COPY1]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %fixed-stack.1) ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: G_STORE [[COPY2]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %fixed-stack.2) + ; MIPS32: G_STORE [[COPY2]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %fixed-stack.2) ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: G_STORE [[COPY3]](s32), [[FRAME_INDEX2]](p0) :: (store 4 into %fixed-stack.3) + ; MIPS32: G_STORE [[COPY3]](s32), [[FRAME_INDEX2]](p0) :: (store (s32) into %fixed-stack.3) ; MIPS32: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @.str ; MIPS32: [[COPY4:%[0-9]+]]:_(p0) = COPY [[GV]](p0) ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.fmt.addr ; MIPS32: [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.ap ; MIPS32: [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.2.aq ; MIPS32: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.s - ; MIPS32: G_STORE [[COPY]](p0), [[FRAME_INDEX3]](p0) :: (store 4 into %ir.fmt.addr) - ; MIPS32: G_VASTART [[FRAME_INDEX4]](p0) :: (store 4 into %ir.ap1, align 1) - ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX4]](p0) :: (load 4) - ; MIPS32: G_STORE [[LOAD]](p0), [[FRAME_INDEX5]](p0) :: (store 4) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (load 4 from %ir.aq) + ; MIPS32: G_STORE [[COPY]](p0), [[FRAME_INDEX3]](p0) :: (store (p0) into %ir.fmt.addr) + ; MIPS32: G_VASTART [[FRAME_INDEX4]](p0) :: (store (p0) into %ir.ap1, align 1) + ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX4]](p0) :: (load (s32)) + ; MIPS32: G_STORE [[LOAD]](p0), [[FRAME_INDEX5]](p0) :: (store (s32)) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (load (p0) from %ir.aq) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD1]], [[C]](s32) - ; MIPS32: G_STORE [[GEP]](p0), [[FRAME_INDEX5]](p0) :: (store 4 into %ir.aq) - ; MIPS32: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD1]](p0) :: (load 4 from %ir.2) - ; MIPS32: G_STORE [[LOAD2]](p0), [[FRAME_INDEX6]](p0) :: (store 4 into %ir.s) - ; MIPS32: [[LOAD3:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (load 4 from %ir.s) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD1]], [[C]](s32) + ; MIPS32: G_STORE [[PTR_ADD]](p0), [[FRAME_INDEX5]](p0) :: (store (p0) into %ir.aq) + ; MIPS32: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD1]](p0) :: (load (p0) from %ir.2) + ; MIPS32: G_STORE [[LOAD2]](p0), [[FRAME_INDEX6]](p0) :: (store (p0) into %ir.s) + ; MIPS32: [[LOAD3:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (load (p0) from %ir.s) ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY4]](p0) ; MIPS32: $a1 = COPY [[LOAD3]](p0) @@ -90,29 +90,29 @@ body: | %0:_(p0) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(p0) = G_FRAME_INDEX %fixed-stack.2 - G_STORE %1(s32), %2(p0) :: (store 4 into %fixed-stack.2) + G_STORE %1(s32), %2(p0) :: (store (s32) into %fixed-stack.2) %3:_(s32) = COPY $a2 %4:_(p0) = G_FRAME_INDEX %fixed-stack.1 - G_STORE %3(s32), %4(p0) :: (store 4 into %fixed-stack.1) + G_STORE %3(s32), %4(p0) :: (store (s32) into %fixed-stack.1) %5:_(s32) = COPY $a3 %6:_(p0) = G_FRAME_INDEX %fixed-stack.0 - G_STORE %5(s32), %6(p0) :: (store 4 into %fixed-stack.0) + G_STORE %5(s32), %6(p0) :: (store (s32) into %fixed-stack.0) %18:_(p0) = G_GLOBAL_VALUE @.str %17:_(p0) = COPY %18(p0) %7:_(p0) = G_FRAME_INDEX %stack.0.fmt.addr %8:_(p0) = G_FRAME_INDEX %stack.1.ap %9:_(p0) = G_FRAME_INDEX %stack.2.aq %10:_(p0) = G_FRAME_INDEX %stack.3.s - G_STORE %0(p0), %7(p0) :: (store 4 into %ir.fmt.addr) - G_VASTART %8(p0) :: (store 4 into %ir.ap1, align 1) + G_STORE %0(p0), %7(p0) :: (store (p0) into %ir.fmt.addr) + G_VASTART %8(p0) :: (store (p0) into %ir.ap1, align 1) G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.va_copy), %9(p0), %8(p0) - %11:_(p0) = G_LOAD %9(p0) :: (load 4 from %ir.aq) + %11:_(p0) = G_LOAD %9(p0) :: (load (p0) from %ir.aq) %12:_(s32) = G_CONSTANT i32 4 %13:_(p0) = G_PTR_ADD %11, %12(s32) - G_STORE %13(p0), %9(p0) :: (store 4 into %ir.aq) - %14:_(p0) = G_LOAD %11(p0) :: (load 4 from %ir.2) - G_STORE %14(p0), %10(p0) :: (store 4 into %ir.s) - %15:_(p0) = G_LOAD %10(p0) :: (load 4 from %ir.s) + G_STORE %13(p0), %9(p0) :: (store (p0) into %ir.aq) + %14:_(p0) = G_LOAD %11(p0) :: (load (p0) from %ir.2) + G_STORE %14(p0), %10(p0) :: (store (p0) into %ir.s) + %15:_(p0) = G_LOAD %10(p0) :: (load (p0) from %ir.s) ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp $a0 = COPY %17(p0) $a1 = COPY %15(p0) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zextLoad_and_sextLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zextLoad_and_sextLoad.mir index d0c084d2c122b..fbc8c4115d0f2 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zextLoad_and_sextLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/zextLoad_and_sextLoad.mir @@ -25,11 +25,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_zextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[ZEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.px) + %2:_(s32) = G_ZEXTLOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -45,11 +45,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_zextLoad2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 2 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[ZEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_ZEXTLOAD %0(p0) :: (load 2 from %ir.px) + %2:_(s32) = G_ZEXTLOAD %0(p0) :: (load (s16) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -65,12 +65,12 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_zextLoad1_s16 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXTLOAD]](s32) ; MIPS32: $v0 = COPY [[COPY1]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s16) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.px) + %2:_(s16) = G_ZEXTLOAD %0(p0) :: (load (s8) from %ir.px) %3:_(s32) = G_ANYEXT %2(s16) $v0 = COPY %3(s32) RetRA implicit $v0 @@ -87,11 +87,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_zextLoad1_s16_to_zextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[ZEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %3:_(s32) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.px) + %3:_(s32) = G_ZEXTLOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %3(s32) RetRA implicit $v0 @@ -107,13 +107,13 @@ body: | ; MIPS32-LABEL: name: load4_s32_to_zextLoad4_s64 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.px) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: $v1 = COPY [[C]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 - %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load 4 from %ir.px) + %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load (s32) from %ir.px) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(s64) $v0 = COPY %3(s32) $v1 = COPY %4(s32) @@ -131,11 +131,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_sextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[SEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_SEXTLOAD %0(p0) :: (load 1 from %ir.px) + %2:_(s32) = G_SEXTLOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -151,11 +151,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_sextLoad2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[SEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_SEXTLOAD %0(p0) :: (load 2 from %ir.px) + %2:_(s32) = G_SEXTLOAD %0(p0) :: (load (s16) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -171,12 +171,12 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_sextLoad1_s16 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32) ; MIPS32: $v0 = COPY [[COPY1]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s16) = G_SEXTLOAD %0(p0) :: (load 1 from %ir.px) + %2:_(s16) = G_SEXTLOAD %0(p0) :: (load (s8) from %ir.px) %3:_(s32) = G_ANYEXT %2(s16) $v0 = COPY %3(s32) RetRA implicit $v0 @@ -193,11 +193,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_sextLoad1_s16_to_sextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[SEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %3:_(s32) = G_SEXTLOAD %0(p0) :: (load 1 from %ir.px) + %3:_(s32) = G_SEXTLOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %3(s32) RetRA implicit $v0 @@ -213,7 +213,7 @@ body: | ; MIPS32-LABEL: name: load4_s32_to_sextLoad4_s64 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.px) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[LOAD]], [[C]](s32) @@ -221,7 +221,7 @@ body: | ; MIPS32: $v1 = COPY [[ASHR]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 - %2:_(s64) = G_SEXTLOAD %0(p0) :: (load 4 from %ir.px) + %2:_(s64) = G_SEXTLOAD %0(p0) :: (load (s32) from %ir.px) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(s64) $v0 = COPY %3(s32) $v1 = COPY %4(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir index 8fc9ce00a977c..d131d8c703292 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir @@ -42,19 +42,19 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load 1 from %ir.1, align 4) - ; MIPS32: G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store 1 into %ir.0, align 4) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.1, align 4) + ; MIPS32: G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store (s8) into %ir.0, align 4) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) - ; MIPS32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.1 + 1, basealign 4) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.1 + 1, basealign 4) ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: G_STORE [[LOAD1]](s8), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 1, basealign 4) + ; MIPS32: G_STORE [[LOAD1]](s8), [[PTR_ADD1]](p0) :: (store (s8) into %ir.0 + 1, basealign 4) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(s64) = G_CONSTANT i64 2 %3:_(s32) = G_TRUNC %2(s64) - G_MEMCPY_INLINE %0(p0), %1(p0), %3(s32) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY_INLINE %0(p0), %1(p0), %3(s32) :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/truncStore_and_aExtLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/truncStore_and_aExtLoad.mir index a8338fa24355e..5289d88259f08 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/truncStore_and_aExtLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/truncStore_and_aExtLoad.mir @@ -17,11 +17,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_load1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.px) + %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.px) %2:_(s32) = G_ANYEXT %1(s8) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -38,11 +38,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_load2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2 from %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s16) = G_LOAD %0(p0) :: (load 2 from %ir.px) + %1:_(s16) = G_LOAD %0(p0) :: (load (s16) from %ir.px) %2:_(s32) = G_ANYEXT %1(s16) $v0 = COPY %2(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/zextLoad_and_sextLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/zextLoad_and_sextLoad.mir index addd57c330c78..6390ad42e23e8 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/zextLoad_and_sextLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/zextLoad_and_sextLoad.mir @@ -25,11 +25,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_zextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[ZEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.px) + %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.px) %2:_(s32) = G_ZEXT %1(s8) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -46,11 +46,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_zextLoad2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 2 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[ZEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s16) = G_LOAD %0(p0) :: (load 2 from %ir.px) + %1:_(s16) = G_LOAD %0(p0) :: (load (s16) from %ir.px) %2:_(s32) = G_ZEXT %1(s16) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -67,12 +67,12 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_zextLoad1_s16 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s16) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s16) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXTLOAD]](s16) ; MIPS32: $v0 = COPY [[ANYEXT]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.px) + %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.px) %2:_(s16) = G_ZEXT %1(s8) %3:_(s32) = G_ANYEXT %2(s16) $v0 = COPY %3(s32) @@ -90,11 +90,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_zextLoad1_s16_to_zextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[ZEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.px) + %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.px) %2:_(s16) = G_ZEXT %1(s8) %3:_(s32) = G_ZEXT %2(s16) $v0 = COPY %3(s32) @@ -112,13 +112,13 @@ body: | ; MIPS32-LABEL: name: load4_s32_to_zextLoad4_s64 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load 4 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s32) from %ir.px) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXTLOAD]](s64) ; MIPS32: $v0 = COPY [[UV]](s32) ; MIPS32: $v1 = COPY [[UV1]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.px) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.px) %2:_(s64) = G_ZEXT %1(s32) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(s64) $v0 = COPY %3(s32) @@ -137,11 +137,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_sextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[SEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.px) + %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.px) %2:_(s32) = G_SEXT %1(s8) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -158,11 +158,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_sextLoad2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[SEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s16) = G_LOAD %0(p0) :: (load 2 from %ir.px) + %1:_(s16) = G_LOAD %0(p0) :: (load (s16) from %ir.px) %2:_(s32) = G_SEXT %1(s16) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -179,12 +179,12 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_sextLoad1_s16 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXTLOAD]](s16) ; MIPS32: $v0 = COPY [[ANYEXT]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.px) + %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.px) %2:_(s16) = G_SEXT %1(s8) %3:_(s32) = G_ANYEXT %2(s16) $v0 = COPY %3(s32) @@ -202,11 +202,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_sextLoad1_s16_to_sextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[SEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.px) + %1:_(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.px) %2:_(s16) = G_SEXT %1(s8) %3:_(s32) = G_SEXT %2(s16) $v0 = COPY %3(s32) @@ -224,13 +224,13 @@ body: | ; MIPS32-LABEL: name: load4_s32_to_sextLoad4_s64 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 4 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load (s32) from %ir.px) ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXTLOAD]](s64) ; MIPS32: $v0 = COPY [[UV]](s32) ; MIPS32: $v1 = COPY [[UV1]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.px) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.px) %2:_(s64) = G_SEXT %1(s32) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(s64) $v0 = COPY %3(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/TypeInfoforMF_skipCopies.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/TypeInfoforMF_skipCopies.mir index bb2a93048c82e..fcc34602d8642 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/TypeInfoforMF_skipCopies.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/TypeInfoforMF_skipCopies.mir @@ -20,21 +20,21 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.ptr_a) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr_a) ; MIPS32: [[COPY3:%[0-9]+]]:fprb(s32) = COPY [[LOAD]](s32) - ; MIPS32: G_STORE [[COPY3]](s32), [[COPY1]](p0) :: (store 4 into %ir.ptr_b) + ; MIPS32: G_STORE [[COPY3]](s32), [[COPY1]](p0) :: (store (s32) into %ir.ptr_b) ; MIPS32: [[COPY4:%[0-9]+]]:fprb(s32) = COPY [[COPY3]](s32) - ; MIPS32: G_STORE [[COPY4]](s32), [[COPY2]](p0) :: (store 4 into %ir.ptr_c) + ; MIPS32: G_STORE [[COPY4]](s32), [[COPY2]](p0) :: (store (s32) into %ir.ptr_c) ; MIPS32: $f0 = COPY [[COPY4]](s32) ; MIPS32: RetRA implicit $f0 %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.ptr_a) + %3:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.ptr_a) %4:_(s32) = COPY %3(s32) - G_STORE %4(s32), %1(p0) :: (store 4 into %ir.ptr_b) + G_STORE %4(s32), %1(p0) :: (store (s32) into %ir.ptr_b) %5:_(s32) = COPY %4(s32) - G_STORE %5(s32), %2(p0) :: (store 4 into %ir.ptr_c) + G_STORE %5(s32), %2(p0) :: (store (s32) into %ir.ptr_c) $f0 = COPY %5(s32) RetRA implicit $f0 @@ -54,7 +54,7 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f14 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.float_ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.float_ptr) ; MIPS32: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[COPY1]], [[COPY]] ; MIPS32: [[COPY4:%[0-9]+]]:fprb(s32) = COPY [[FADD]](s32) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 @@ -68,7 +68,7 @@ body: | %1:_(s32) = COPY $f14 %2:_(p0) = COPY $a2 %4:_(s32) = COPY $a3 - %5:_(s32) = G_LOAD %2(p0) :: (load 4 from %ir.float_ptr) + %5:_(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.float_ptr) %6:_(s32) = G_FADD %1, %0 %11:_(s32) = COPY %6(s32) %9:_(s32) = G_CONSTANT i32 1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/add_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/add_vec.mir index 59fa2a89bf3dd..4236c15333e22 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/add_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/add_vec.mir @@ -22,18 +22,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:fprb(<16 x s8>) = G_ADD [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[ADD]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_ADD %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -51,18 +51,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:fprb(<8 x s16>) = G_ADD [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[ADD]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_ADD %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -80,18 +80,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:fprb(<4 x s32>) = G_ADD [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[ADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_ADD %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -109,18 +109,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[ADD:%[0-9]+]]:fprb(<2 x s64>) = G_ADD [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[ADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[ADD]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_ADD %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fabs_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fabs_vec.mir index ae2472f031cb9..56dc87b966818 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fabs_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fabs_vec.mir @@ -19,15 +19,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[FABS:%[0-9]+]]:fprb(<4 x s32>) = G_FABS [[LOAD]] - ; P5600: G_STORE [[FABS]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FABS]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_FABS %2 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -44,15 +44,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) ; P5600: [[FABS:%[0-9]+]]:fprb(<2 x s64>) = G_FABS [[LOAD]] - ; P5600: G_STORE [[FABS]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FABS]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:_(<2 x s64>) = G_FABS %2 - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fence.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fence.mir index 9fdf17d8a8de1..e9051c3ad7f06 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fence.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fence.mir @@ -17,12 +17,12 @@ body: | ; MIPS32-LABEL: name: atomic_load_i32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load monotonic 4 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load monotonic (s32) from %ir.ptr) ; MIPS32: G_FENCE 4, 1 ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load monotonic 4 from %ir.ptr) + %1:_(s32) = G_LOAD %0(p0) :: (load monotonic (s32) from %ir.ptr) G_FENCE 4, 1 $v0 = COPY %1(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/floating_point_vec_arithmetic_operations.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/floating_point_vec_arithmetic_operations.mir index 6cdadb0f48777..11acce60ce1ba 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/floating_point_vec_arithmetic_operations.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/floating_point_vec_arithmetic_operations.mir @@ -29,18 +29,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FADD:%[0-9]+]]:fprb(<4 x s32>) = G_FADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FADD]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_FADD %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -58,18 +58,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FADD:%[0-9]+]]:fprb(<2 x s64>) = G_FADD [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FADD]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_FADD %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -87,18 +87,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FSUB:%[0-9]+]]:fprb(<4 x s32>) = G_FSUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSUB]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_FSUB %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -116,18 +116,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FSUB:%[0-9]+]]:fprb(<2 x s64>) = G_FSUB [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSUB]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_FSUB %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -145,18 +145,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FMUL:%[0-9]+]]:fprb(<4 x s32>) = G_FMUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FMUL]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_FMUL %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -174,18 +174,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FMUL:%[0-9]+]]:fprb(<2 x s64>) = G_FMUL [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FMUL]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_FMUL %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -203,18 +203,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[FDIV:%[0-9]+]]:fprb(<4 x s32>) = G_FDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_FDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -232,18 +232,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[FDIV:%[0-9]+]]:fprb(<2 x s64>) = G_FDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_FDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fsqrt_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fsqrt_vec.mir index d36a0e5197788..8b15f07997d89 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fsqrt_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fsqrt_vec.mir @@ -19,15 +19,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) ; P5600: [[FSQRT:%[0-9]+]]:fprb(<4 x s32>) = G_FSQRT [[LOAD]] - ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) %3:_(<4 x s32>) = G_FSQRT %2 - G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -44,15 +44,15 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) ; P5600: [[FSQRT:%[0-9]+]]:fprb(<2 x s64>) = G_FSQRT [[LOAD]] - ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) %3:_(<2 x s64>) = G_FSQRT %2 - G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + G_STORE %3(<2 x s64>), %1(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load.mir index bf2535c36d5ae..cc1f9aa028fd8 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load.mir @@ -22,11 +22,11 @@ body: | ; MIPS32-LABEL: name: load_i32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.ptr) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.ptr) $v0 = COPY %1(s32) RetRA implicit $v0 @@ -43,15 +43,15 @@ body: | ; MIPS32-LABEL: name: load_i64 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.ptr, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP]](p0) :: (load 4 from %ir.ptr + 4, basealign 8) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.ptr + 4, basealign 8) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: $v1 = COPY [[LOAD1]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 - %1:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) + %1:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) %2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64) $v0 = COPY %2(s32) $v1 = COPY %3(s32) @@ -71,13 +71,13 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.i64_ptr_a) - ; MIPS32: G_STORE [[LOAD]](s64), [[COPY1]](p0) :: (store 8 into %ir.i64_ptr_b) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.i64_ptr_a) + ; MIPS32: G_STORE [[LOAD]](s64), [[COPY1]](p0) :: (store (s64) into %ir.i64_ptr_b) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.i64_ptr_a) - G_STORE %2(s64), %1(p0) :: (store 8 into %ir.i64_ptr_b) + %2:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.i64_ptr_a) + G_STORE %2(s64), %1(p0) :: (store (s64) into %ir.i64_ptr_b) RetRA ... @@ -93,11 +93,11 @@ body: | ; MIPS32-LABEL: name: load_float ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.ptr) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.ptr) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.ptr) $f0 = COPY %1(s32) RetRA implicit $f0 @@ -115,13 +115,13 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.float_ptr_a) - ; MIPS32: G_STORE [[LOAD]](s32), [[COPY1]](p0) :: (store 4 into %ir.float_ptr_b) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.float_ptr_a) + ; MIPS32: G_STORE [[LOAD]](s32), [[COPY1]](p0) :: (store (s32) into %ir.float_ptr_b) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.float_ptr_a) - G_STORE %2(s32), %1(p0) :: (store 4 into %ir.float_ptr_b) + %2:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.float_ptr_a) + G_STORE %2(s32), %1(p0) :: (store (s32) into %ir.float_ptr_b) RetRA ... @@ -137,11 +137,11 @@ body: | ; MIPS32-LABEL: name: load_double ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.ptr) ; MIPS32: $d0 = COPY [[LOAD]](s64) ; MIPS32: RetRA implicit $d0 %0:_(p0) = COPY $a0 - %1:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.ptr) + %1:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.ptr) $d0 = COPY %1(s64) RetRA implicit $d0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load_4_unaligned.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load_4_unaligned.mir index a1eeca392ba12..863c26fe80892 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load_4_unaligned.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load_4_unaligned.mir @@ -35,16 +35,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_float_align1 ; MIPS32: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @float_align1 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align1, align 1) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align1, align 1) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 ; MIPS32R6-LABEL: name: load_float_align1 ; MIPS32R6: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @float_align1 - ; MIPS32R6: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align1, align 1) + ; MIPS32R6: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align1, align 1) ; MIPS32R6: $f0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $f0 %1:_(p0) = G_GLOBAL_VALUE @float_align1 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align1, align 1) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align1, align 1) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -58,16 +58,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_float_align4 ; MIPS32: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @float_align4 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align4) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align4) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 ; MIPS32R6-LABEL: name: load_float_align4 ; MIPS32R6: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @float_align4 - ; MIPS32R6: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @float_align4) + ; MIPS32R6: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @float_align4) ; MIPS32R6: $f0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $f0 %1:_(p0) = G_GLOBAL_VALUE @float_align4 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @float_align4) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @float_align4) $f0 = COPY %0(s32) RetRA implicit $f0 @@ -81,16 +81,16 @@ body: | bb.1.entry: ; MIPS32-LABEL: name: load_i32_align8 ; MIPS32: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @i32_align8 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align8, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align8, align 8) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 ; MIPS32R6-LABEL: name: load_i32_align8 ; MIPS32R6: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @i32_align8 - ; MIPS32R6: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @i32_align8, align 8) + ; MIPS32R6: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load (s32) from @i32_align8, align 8) ; MIPS32R6: $v0 = COPY [[LOAD]](s32) ; MIPS32R6: RetRA implicit $v0 %1:_(p0) = G_GLOBAL_VALUE @i32_align8 - %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load 4 from @i32_align8, align 8) + %0:_(s32) = G_LOAD %1(p0) :: (dereferenceable load (s32) from @i32_align8, align 8) $v0 = COPY %0(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load_store_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load_store_vec.mir index 7b42ca0be0238..71ac16162f872 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load_store_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/load_store_vec.mir @@ -23,13 +23,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<16 x s8>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<16 x s8>), [[COPY]](p0) :: (store (<16 x s8>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<16 x s8>), %0(p0) :: (store 16 into %ir.a) + %2:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) + G_STORE %2(<16 x s8>), %0(p0) :: (store (<16 x s8>) into %ir.a) RetRA ... @@ -46,13 +46,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<8 x s16>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<8 x s16>), [[COPY]](p0) :: (store (<8 x s16>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<8 x s16>), %0(p0) :: (store 16 into %ir.a) + %2:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) + G_STORE %2(<8 x s16>), %0(p0) :: (store (<8 x s16>) into %ir.a) RetRA ... @@ -69,13 +69,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<4 x s32>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<4 x s32>), %0(p0) :: (store 16 into %ir.a) + %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) + G_STORE %2(<4 x s32>), %0(p0) :: (store (<4 x s32>) into %ir.a) RetRA ... @@ -92,13 +92,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<2 x s64>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<2 x s64>), %0(p0) :: (store 16 into %ir.a) + %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) + G_STORE %2(<2 x s64>), %0(p0) :: (store (<2 x s64>) into %ir.a) RetRA ... @@ -115,13 +115,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<4 x s32>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<4 x s32>), %0(p0) :: (store 16 into %ir.a) + %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) + G_STORE %2(<4 x s32>), %0(p0) :: (store (<4 x s32>) into %ir.a) RetRA ... @@ -138,13 +138,13 @@ body: | ; P5600: liveins: $a0, $a1 ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) - ; P5600: G_STORE [[LOAD]](<2 x s64>), [[COPY]](p0) :: (store 16 into %ir.a) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) + ; P5600: G_STORE [[LOAD]](<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>) into %ir.a) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) - G_STORE %2(<2 x s64>), %0(p0) :: (store 16 into %ir.a) + %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) + G_STORE %2(<2 x s64>), %0(p0) :: (store (<2 x s64>) into %ir.a) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/long_ambiguous_chain_s32.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/long_ambiguous_chain_s32.mir index 21d2936a5a256..b8e5e2d22fe89 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/long_ambiguous_chain_s32.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/long_ambiguous_chain_s32.mir @@ -258,11 +258,11 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (p0) from %fixed-stack.2, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C]] @@ -281,15 +281,15 @@ body: | ; MIPS32: G_BRCOND [[AND2]](s32), %bb.5 ; MIPS32: bb.3.b.PHI.1.0: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.4.b.PHI.1.1: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.5.b.PHI.1.2: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load 8 from %ir.c) + ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.c) ; MIPS32: bb.6.b.PHI.1: ; MIPS32: successors: %bb.7(0x40000000), %bb.13(0x40000000) ; MIPS32: [[PHI:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD3]](s64), %bb.3, [[LOAD4]](s64), %bb.4, [[LOAD5]](s64), %bb.5 @@ -299,7 +299,7 @@ body: | ; MIPS32: G_BRCOND [[AND3]](s32), %bb.7 ; MIPS32: G_BR %bb.13 ; MIPS32: bb.7.b.PHI.1.end: - ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.8.pre.PHI.2: ; MIPS32: successors: %bb.9(0x40000000), %bb.10(0x40000000) @@ -310,11 +310,11 @@ body: | ; MIPS32: G_BR %bb.10 ; MIPS32: bb.9.b.PHI.2.0: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.11 ; MIPS32: bb.10.b.PHI.2.1: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: bb.11.b.PHI.2: ; MIPS32: successors: %bb.13(0x40000000), %bb.12(0x40000000) ; MIPS32: [[PHI1:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD6]](s64), %bb.9, [[LOAD7]](s64), %bb.10 @@ -323,7 +323,7 @@ body: | ; MIPS32: [[AND5:%[0-9]+]]:gprb(s32) = G_AND [[COPY9]], [[C5]] ; MIPS32: G_BRCOND [[AND5]](s32), %bb.13 ; MIPS32: bb.12.b.PHI.2.end: - ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.13.b.PHI.3: ; MIPS32: [[PHI2:%[0-9]+]]:fprb(s64) = G_PHI [[PHI1]](s64), %bb.11, [[PHI]](s64), %bb.6 @@ -335,8 +335,8 @@ body: | ; MIPS32: [[COPY11:%[0-9]+]]:gprb(s32) = COPY [[COPY1]](s32) ; MIPS32: [[AND7:%[0-9]+]]:gprb(s32) = G_AND [[COPY11]], [[C6]] ; MIPS32: [[SELECT1:%[0-9]+]]:fprb(s64) = G_SELECT [[AND7]](s32), [[SELECT]], [[PHI2]] - ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) - ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) + ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -346,11 +346,11 @@ body: | %9:_(s32) = COPY $a2 %3:_(p0) = COPY $a3 %10:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %4:_(p0) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.2, align 8) + %4:_(p0) = G_LOAD %10(p0) :: (load (p0) from %fixed-stack.2, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %5:_(p0) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.1) + %5:_(p0) = G_LOAD %11(p0) :: (load (p0) from %fixed-stack.1) %12:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %6:_(p0) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.0, align 8) + %6:_(p0) = G_LOAD %12(p0) :: (load (p0) from %fixed-stack.0, align 8) %32:_(s32) = G_CONSTANT i32 1 %33:_(s32) = COPY %7(s32) %31:_(s32) = G_AND %33, %32 @@ -369,15 +369,15 @@ body: | G_BRCOND %29(s32), %bb.6 bb.4.b.PHI.1.0: - %13:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %13:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.7 bb.5.b.PHI.1.1: - %15:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %15:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) G_BR %bb.7 bb.6.b.PHI.1.2: - %14:_(s64) = G_LOAD %5(p0) :: (load 8 from %ir.c) + %14:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir.c) bb.7.b.PHI.1: %16:_(s64) = G_PHI %13(s64), %bb.4, %15(s64), %bb.5, %14(s64), %bb.6 @@ -388,7 +388,7 @@ body: | G_BR %bb.14 bb.8.b.PHI.1.end: - G_STORE %16(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %16(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.9.pre.PHI.2: @@ -399,11 +399,11 @@ body: | G_BR %bb.11 bb.10.b.PHI.2.0: - %18:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %18:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.12 bb.11.b.PHI.2.1: - %17:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %17:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) bb.12.b.PHI.2: %19:_(s64) = G_PHI %18(s64), %bb.10, %17(s64), %bb.11 @@ -413,7 +413,7 @@ body: | G_BRCOND %26(s32), %bb.14 bb.13.b.PHI.2.end: - G_STORE %19(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %19(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.14.b.PHI.3: @@ -426,8 +426,8 @@ body: | %46:_(s32) = COPY %8(s32) %24:_(s32) = G_AND %46, %44 %23:_(s64) = G_SELECT %24(s32), %22, %20 - G_STORE %23(s64), %6(p0) :: (store 8 into %ir.result) - G_STORE %20(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %23(s64), %6(p0) :: (store (s64) into %ir.result) + G_STORE %20(s64), %6(p0) :: (store (s64) into %ir.result) RetRA ... @@ -450,11 +450,11 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (p0) from %fixed-stack.2, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 0 ; MIPS32: [[C1:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) @@ -474,24 +474,24 @@ body: | ; MIPS32: G_BRCOND [[AND2]](s32), %bb.5 ; MIPS32: bb.3.b.PHI.1.0: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD3:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY3]](p0) :: (load 4 from %ir.a, align 8) + ; MIPS32: [[LOAD3:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY3]](p0) :: (load (s32) from %ir.a, align 8) ; MIPS32: [[C4:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY3]], [[C4]](s32) - ; MIPS32: [[LOAD4:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP]](p0) :: (load 4 from %ir.a + 4, basealign 8) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY3]], [[C4]](s32) + ; MIPS32: [[LOAD4:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.a + 4, basealign 8) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.4.b.PHI.1.1: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD5:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.b, align 8) + ; MIPS32: [[LOAD5:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.b, align 8) ; MIPS32: [[C5:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP1:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C5]](s32) - ; MIPS32: [[LOAD6:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.b + 4, basealign 8) + ; MIPS32: [[PTR_ADD1:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C5]](s32) + ; MIPS32: [[LOAD6:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.b + 4, basealign 8) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.5.b.PHI.1.2: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD7:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD1]](p0) :: (load 4 from %ir.c, align 8) + ; MIPS32: [[LOAD7:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.c, align 8) ; MIPS32: [[C6:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP2:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD1]], [[C6]](s32) - ; MIPS32: [[LOAD8:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.c + 4, basealign 8) + ; MIPS32: [[PTR_ADD2:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD1]], [[C6]](s32) + ; MIPS32: [[LOAD8:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.c + 4, basealign 8) ; MIPS32: bb.6.b.PHI.1: ; MIPS32: successors: %bb.7(0x40000000), %bb.13(0x40000000) ; MIPS32: [[PHI:%[0-9]+]]:gprb(s32) = G_PHI [[LOAD3]](s32), %bb.3, [[LOAD5]](s32), %bb.4, [[LOAD7]](s32), %bb.5 @@ -502,10 +502,10 @@ body: | ; MIPS32: G_BRCOND [[AND3]](s32), %bb.7 ; MIPS32: G_BR %bb.13 ; MIPS32: bb.7.b.PHI.1.end: - ; MIPS32: G_STORE [[PHI]](s32), [[LOAD2]](p0) :: (store 4 into %ir.result, align 8) + ; MIPS32: G_STORE [[PHI]](s32), [[LOAD2]](p0) :: (store (s32) into %ir.result, align 8) ; MIPS32: [[C8:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP3:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C8]](s32) - ; MIPS32: G_STORE [[PHI1]](s32), [[GEP3]](p0) :: (store 4 into %ir.result + 4, basealign 8) + ; MIPS32: [[PTR_ADD3:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C8]](s32) + ; MIPS32: G_STORE [[PHI1]](s32), [[PTR_ADD3]](p0) :: (store (s32) into %ir.result + 4, basealign 8) ; MIPS32: RetRA ; MIPS32: bb.8.pre.PHI.2: ; MIPS32: successors: %bb.9(0x40000000), %bb.10(0x40000000) @@ -516,17 +516,17 @@ body: | ; MIPS32: G_BR %bb.10 ; MIPS32: bb.9.b.PHI.2.0: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD9:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY3]](p0) :: (load 4 from %ir.a, align 8) + ; MIPS32: [[LOAD9:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY3]](p0) :: (load (s32) from %ir.a, align 8) ; MIPS32: [[C10:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP4:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY3]], [[C10]](s32) - ; MIPS32: [[LOAD10:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.a + 4, basealign 8) + ; MIPS32: [[PTR_ADD4:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY3]], [[C10]](s32) + ; MIPS32: [[LOAD10:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from %ir.a + 4, basealign 8) ; MIPS32: G_BR %bb.11 ; MIPS32: bb.10.b.PHI.2.1: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD11:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.b, align 8) + ; MIPS32: [[LOAD11:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.b, align 8) ; MIPS32: [[C11:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP5:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C11]](s32) - ; MIPS32: [[LOAD12:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP5]](p0) :: (load 4 from %ir.b + 4, basealign 8) + ; MIPS32: [[PTR_ADD5:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C11]](s32) + ; MIPS32: [[LOAD12:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from %ir.b + 4, basealign 8) ; MIPS32: bb.11.b.PHI.2: ; MIPS32: successors: %bb.13(0x40000000), %bb.12(0x40000000) ; MIPS32: [[PHI2:%[0-9]+]]:gprb(s32) = G_PHI [[LOAD9]](s32), %bb.9, [[LOAD11]](s32), %bb.10 @@ -536,10 +536,10 @@ body: | ; MIPS32: [[AND5:%[0-9]+]]:gprb(s32) = G_AND [[COPY9]], [[C12]] ; MIPS32: G_BRCOND [[AND5]](s32), %bb.13 ; MIPS32: bb.12.b.PHI.2.end: - ; MIPS32: G_STORE [[PHI2]](s32), [[LOAD2]](p0) :: (store 4 into %ir.result, align 8) + ; MIPS32: G_STORE [[PHI2]](s32), [[LOAD2]](p0) :: (store (s32) into %ir.result, align 8) ; MIPS32: [[C13:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP6:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C13]](s32) - ; MIPS32: G_STORE [[PHI3]](s32), [[GEP6]](p0) :: (store 4 into %ir.result + 4, basealign 8) + ; MIPS32: [[PTR_ADD6:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C13]](s32) + ; MIPS32: G_STORE [[PHI3]](s32), [[PTR_ADD6]](p0) :: (store (s32) into %ir.result + 4, basealign 8) ; MIPS32: RetRA ; MIPS32: bb.13.b.PHI.3: ; MIPS32: [[PHI4:%[0-9]+]]:gprb(s32) = G_PHI [[PHI2]](s32), %bb.11, [[PHI]](s32), %bb.6 @@ -555,14 +555,14 @@ body: | ; MIPS32: [[AND7:%[0-9]+]]:gprb(s32) = G_AND [[COPY11]], [[C14]] ; MIPS32: [[SELECT2:%[0-9]+]]:gprb(s32) = G_SELECT [[AND7]](s32), [[SELECT]], [[PHI4]] ; MIPS32: [[SELECT3:%[0-9]+]]:gprb(s32) = G_SELECT [[AND7]](s32), [[SELECT1]], [[PHI5]] - ; MIPS32: G_STORE [[SELECT2]](s32), [[LOAD2]](p0) :: (store 4 into %ir.result, align 8) + ; MIPS32: G_STORE [[SELECT2]](s32), [[LOAD2]](p0) :: (store (s32) into %ir.result, align 8) ; MIPS32: [[C15:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP7:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C15]](s32) - ; MIPS32: G_STORE [[SELECT3]](s32), [[GEP7]](p0) :: (store 4 into %ir.result + 4, basealign 8) - ; MIPS32: G_STORE [[PHI4]](s32), [[LOAD2]](p0) :: (store 4 into %ir.result, align 8) + ; MIPS32: [[PTR_ADD7:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C15]](s32) + ; MIPS32: G_STORE [[SELECT3]](s32), [[PTR_ADD7]](p0) :: (store (s32) into %ir.result + 4, basealign 8) + ; MIPS32: G_STORE [[PHI4]](s32), [[LOAD2]](p0) :: (store (s32) into %ir.result, align 8) ; MIPS32: [[C16:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP8:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C16]](s32) - ; MIPS32: G_STORE [[PHI5]](s32), [[GEP8]](p0) :: (store 4 into %ir.result + 4, basealign 8) + ; MIPS32: [[PTR_ADD8:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C16]](s32) + ; MIPS32: G_STORE [[PHI5]](s32), [[PTR_ADD8]](p0) :: (store (s32) into %ir.result + 4, basealign 8) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -572,11 +572,11 @@ body: | %9:_(s32) = COPY $a2 %3:_(p0) = COPY $a3 %10:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %4:_(p0) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.2, align 8) + %4:_(p0) = G_LOAD %10(p0) :: (load (p0) from %fixed-stack.2, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %5:_(p0) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.1) + %5:_(p0) = G_LOAD %11(p0) :: (load (p0) from %fixed-stack.1) %12:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %6:_(p0) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.0, align 8) + %6:_(p0) = G_LOAD %12(p0) :: (load (p0) from %fixed-stack.0, align 8) %33:_(s32) = G_CONSTANT i32 0 %24:_(s64) = G_MERGE_VALUES %33(s32), %33(s32) %34:_(s32) = G_CONSTANT i32 1 @@ -597,15 +597,15 @@ body: | G_BRCOND %30(s32), %bb.6 bb.4.b.PHI.1.0: - %13:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %13:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.7 bb.5.b.PHI.1.1: - %15:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %15:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) G_BR %bb.7 bb.6.b.PHI.1.2: - %14:_(s64) = G_LOAD %5(p0) :: (load 8 from %ir.c) + %14:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir.c) bb.7.b.PHI.1: %16:_(s64) = G_PHI %13(s64), %bb.4, %15(s64), %bb.5, %14(s64), %bb.6 @@ -616,7 +616,7 @@ body: | G_BR %bb.14 bb.8.b.PHI.1.end: - G_STORE %16(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %16(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.9.pre.PHI.2: @@ -627,11 +627,11 @@ body: | G_BR %bb.11 bb.10.b.PHI.2.0: - %18:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %18:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.12 bb.11.b.PHI.2.1: - %17:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %17:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) bb.12.b.PHI.2: %19:_(s64) = G_PHI %18(s64), %bb.10, %17(s64), %bb.11 @@ -641,7 +641,7 @@ body: | G_BRCOND %27(s32), %bb.14 bb.13.b.PHI.2.end: - G_STORE %19(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %19(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.14.b.PHI.3: @@ -654,8 +654,8 @@ body: | %48:_(s32) = COPY %8(s32) %25:_(s32) = G_AND %48, %46 %23:_(s64) = G_SELECT %25(s32), %22, %20 - G_STORE %23(s64), %6(p0) :: (store 8 into %ir.result) - G_STORE %20(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %23(s64), %6(p0) :: (store (s64) into %ir.result) + G_STORE %20(s64), %6(p0) :: (store (s64) into %ir.result) RetRA ... @@ -678,11 +678,11 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (p0) from %fixed-stack.2, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C]] @@ -701,15 +701,15 @@ body: | ; MIPS32: G_BRCOND [[AND2]](s32), %bb.5 ; MIPS32: bb.3.b.PHI.1.0: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.4.b.PHI.1.1: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.5.b.PHI.1.2: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load 8 from %ir.c) + ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.c) ; MIPS32: bb.6.b.PHI.1: ; MIPS32: successors: %bb.7(0x40000000), %bb.13(0x40000000) ; MIPS32: [[PHI:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD3]](s64), %bb.3, [[LOAD4]](s64), %bb.4, [[LOAD5]](s64), %bb.5 @@ -719,7 +719,7 @@ body: | ; MIPS32: G_BRCOND [[AND3]](s32), %bb.7 ; MIPS32: G_BR %bb.13 ; MIPS32: bb.7.b.PHI.1.end: - ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.8.pre.PHI.2: ; MIPS32: successors: %bb.9(0x40000000), %bb.10(0x40000000) @@ -730,11 +730,11 @@ body: | ; MIPS32: G_BR %bb.10 ; MIPS32: bb.9.b.PHI.2.0: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.11 ; MIPS32: bb.10.b.PHI.2.1: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: bb.11.b.PHI.2: ; MIPS32: successors: %bb.13(0x40000000), %bb.12(0x40000000) ; MIPS32: [[PHI1:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD6]](s64), %bb.9, [[LOAD7]](s64), %bb.10 @@ -743,7 +743,7 @@ body: | ; MIPS32: [[AND5:%[0-9]+]]:gprb(s32) = G_AND [[COPY9]], [[C5]] ; MIPS32: G_BRCOND [[AND5]](s32), %bb.13 ; MIPS32: bb.12.b.PHI.2.end: - ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.13.b.PHI.3: ; MIPS32: [[PHI2:%[0-9]+]]:fprb(s64) = G_PHI [[PHI1]](s64), %bb.11, [[PHI]](s64), %bb.6 @@ -755,8 +755,8 @@ body: | ; MIPS32: [[COPY11:%[0-9]+]]:gprb(s32) = COPY [[COPY1]](s32) ; MIPS32: [[AND7:%[0-9]+]]:gprb(s32) = G_AND [[COPY11]], [[C6]] ; MIPS32: [[SELECT1:%[0-9]+]]:fprb(s64) = G_SELECT [[AND7]](s32), [[SELECT]], [[PHI2]] - ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) - ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) + ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -766,11 +766,11 @@ body: | %9:_(s32) = COPY $a2 %3:_(p0) = COPY $a3 %10:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %4:_(p0) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.2, align 8) + %4:_(p0) = G_LOAD %10(p0) :: (load (p0) from %fixed-stack.2, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %5:_(p0) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.1) + %5:_(p0) = G_LOAD %11(p0) :: (load (p0) from %fixed-stack.1) %12:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %6:_(p0) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.0, align 8) + %6:_(p0) = G_LOAD %12(p0) :: (load (p0) from %fixed-stack.0, align 8) %32:_(s32) = G_CONSTANT i32 1 %33:_(s32) = COPY %7(s32) %31:_(s32) = G_AND %33, %32 @@ -789,15 +789,15 @@ body: | G_BRCOND %29(s32), %bb.6 bb.4.b.PHI.1.0: - %13:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %13:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.7 bb.5.b.PHI.1.1: - %15:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %15:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) G_BR %bb.7 bb.6.b.PHI.1.2: - %14:_(s64) = G_LOAD %5(p0) :: (load 8 from %ir.c) + %14:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir.c) bb.7.b.PHI.1: %16:_(s64) = G_PHI %13(s64), %bb.4, %15(s64), %bb.5, %14(s64), %bb.6 @@ -808,7 +808,7 @@ body: | G_BR %bb.14 bb.8.b.PHI.1.end: - G_STORE %16(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %16(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.9.pre.PHI.2: @@ -819,11 +819,11 @@ body: | G_BR %bb.11 bb.10.b.PHI.2.0: - %18:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %18:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.12 bb.11.b.PHI.2.1: - %17:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %17:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) bb.12.b.PHI.2: %19:_(s64) = G_PHI %18(s64), %bb.10, %17(s64), %bb.11 @@ -833,7 +833,7 @@ body: | G_BRCOND %26(s32), %bb.14 bb.13.b.PHI.2.end: - G_STORE %19(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %19(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.14.b.PHI.3: @@ -846,8 +846,8 @@ body: | %46:_(s32) = COPY %8(s32) %24:_(s32) = G_AND %46, %44 %23:_(s64) = G_SELECT %24(s32), %22, %20 - G_STORE %23(s64), %6(p0) :: (store 8 into %ir.result) - G_STORE %20(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %23(s64), %6(p0) :: (store (s64) into %ir.result) + G_STORE %20(s64), %6(p0) :: (store (s64) into %ir.result) RetRA ... @@ -870,11 +870,11 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (p0) from %fixed-stack.2, align 8) ; MIPS32: [[C:%[0-9]+]]:fprb(s64) = G_FCONSTANT double 0.000000e+00 ; MIPS32: [[C1:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) @@ -894,15 +894,15 @@ body: | ; MIPS32: G_BRCOND [[AND2]](s32), %bb.5 ; MIPS32: bb.3.b.PHI.1.0: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.4.b.PHI.1.1: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.5.b.PHI.1.2: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load 8 from %ir.c) + ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.c) ; MIPS32: bb.6.b.PHI.1: ; MIPS32: successors: %bb.7(0x40000000), %bb.13(0x40000000) ; MIPS32: [[PHI:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD3]](s64), %bb.3, [[LOAD4]](s64), %bb.4, [[LOAD5]](s64), %bb.5 @@ -912,7 +912,7 @@ body: | ; MIPS32: G_BRCOND [[AND3]](s32), %bb.7 ; MIPS32: G_BR %bb.13 ; MIPS32: bb.7.b.PHI.1.end: - ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.8.pre.PHI.2: ; MIPS32: successors: %bb.9(0x40000000), %bb.10(0x40000000) @@ -923,11 +923,11 @@ body: | ; MIPS32: G_BR %bb.10 ; MIPS32: bb.9.b.PHI.2.0: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.11 ; MIPS32: bb.10.b.PHI.2.1: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: bb.11.b.PHI.2: ; MIPS32: successors: %bb.13(0x40000000), %bb.12(0x40000000) ; MIPS32: [[PHI1:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD6]](s64), %bb.9, [[LOAD7]](s64), %bb.10 @@ -936,7 +936,7 @@ body: | ; MIPS32: [[AND5:%[0-9]+]]:gprb(s32) = G_AND [[COPY9]], [[C6]] ; MIPS32: G_BRCOND [[AND5]](s32), %bb.13 ; MIPS32: bb.12.b.PHI.2.end: - ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.13.b.PHI.3: ; MIPS32: [[PHI2:%[0-9]+]]:fprb(s64) = G_PHI [[PHI1]](s64), %bb.11, [[PHI]](s64), %bb.6 @@ -948,8 +948,8 @@ body: | ; MIPS32: [[COPY11:%[0-9]+]]:gprb(s32) = COPY [[COPY1]](s32) ; MIPS32: [[AND7:%[0-9]+]]:gprb(s32) = G_AND [[COPY11]], [[C7]] ; MIPS32: [[SELECT1:%[0-9]+]]:fprb(s64) = G_SELECT [[AND7]](s32), [[SELECT]], [[PHI2]] - ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) - ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) + ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -959,11 +959,11 @@ body: | %9:_(s32) = COPY $a2 %3:_(p0) = COPY $a3 %10:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %4:_(p0) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.2, align 8) + %4:_(p0) = G_LOAD %10(p0) :: (load (p0) from %fixed-stack.2, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %5:_(p0) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.1) + %5:_(p0) = G_LOAD %11(p0) :: (load (p0) from %fixed-stack.1) %12:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %6:_(p0) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.0, align 8) + %6:_(p0) = G_LOAD %12(p0) :: (load (p0) from %fixed-stack.0, align 8) %24:_(s64) = G_FCONSTANT double 0.000000e+00 %33:_(s32) = G_CONSTANT i32 1 %34:_(s32) = COPY %7(s32) @@ -983,15 +983,15 @@ body: | G_BRCOND %30(s32), %bb.6 bb.4.b.PHI.1.0: - %13:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %13:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.7 bb.5.b.PHI.1.1: - %15:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %15:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) G_BR %bb.7 bb.6.b.PHI.1.2: - %14:_(s64) = G_LOAD %5(p0) :: (load 8 from %ir.c) + %14:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir.c) bb.7.b.PHI.1: %16:_(s64) = G_PHI %13(s64), %bb.4, %15(s64), %bb.5, %14(s64), %bb.6 @@ -1002,7 +1002,7 @@ body: | G_BR %bb.14 bb.8.b.PHI.1.end: - G_STORE %16(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %16(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.9.pre.PHI.2: @@ -1013,11 +1013,11 @@ body: | G_BR %bb.11 bb.10.b.PHI.2.0: - %18:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %18:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.12 bb.11.b.PHI.2.1: - %17:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %17:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) bb.12.b.PHI.2: %19:_(s64) = G_PHI %18(s64), %bb.10, %17(s64), %bb.11 @@ -1027,7 +1027,7 @@ body: | G_BRCOND %27(s32), %bb.14 bb.13.b.PHI.2.end: - G_STORE %19(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %19(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.14.b.PHI.3: @@ -1040,8 +1040,8 @@ body: | %47:_(s32) = COPY %8(s32) %25:_(s32) = G_AND %47, %45 %23:_(s64) = G_SELECT %25(s32), %22, %20 - G_STORE %23(s64), %6(p0) :: (store 8 into %ir.result) - G_STORE %20(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %23(s64), %6(p0) :: (store (s64) into %ir.result) + G_STORE %20(s64), %6(p0) :: (store (s64) into %ir.result) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/long_ambiguous_chain_s64.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/long_ambiguous_chain_s64.mir index 21d2936a5a256..b8e5e2d22fe89 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/long_ambiguous_chain_s64.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/long_ambiguous_chain_s64.mir @@ -258,11 +258,11 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (p0) from %fixed-stack.2, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C]] @@ -281,15 +281,15 @@ body: | ; MIPS32: G_BRCOND [[AND2]](s32), %bb.5 ; MIPS32: bb.3.b.PHI.1.0: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.4.b.PHI.1.1: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.5.b.PHI.1.2: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load 8 from %ir.c) + ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.c) ; MIPS32: bb.6.b.PHI.1: ; MIPS32: successors: %bb.7(0x40000000), %bb.13(0x40000000) ; MIPS32: [[PHI:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD3]](s64), %bb.3, [[LOAD4]](s64), %bb.4, [[LOAD5]](s64), %bb.5 @@ -299,7 +299,7 @@ body: | ; MIPS32: G_BRCOND [[AND3]](s32), %bb.7 ; MIPS32: G_BR %bb.13 ; MIPS32: bb.7.b.PHI.1.end: - ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.8.pre.PHI.2: ; MIPS32: successors: %bb.9(0x40000000), %bb.10(0x40000000) @@ -310,11 +310,11 @@ body: | ; MIPS32: G_BR %bb.10 ; MIPS32: bb.9.b.PHI.2.0: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.11 ; MIPS32: bb.10.b.PHI.2.1: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: bb.11.b.PHI.2: ; MIPS32: successors: %bb.13(0x40000000), %bb.12(0x40000000) ; MIPS32: [[PHI1:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD6]](s64), %bb.9, [[LOAD7]](s64), %bb.10 @@ -323,7 +323,7 @@ body: | ; MIPS32: [[AND5:%[0-9]+]]:gprb(s32) = G_AND [[COPY9]], [[C5]] ; MIPS32: G_BRCOND [[AND5]](s32), %bb.13 ; MIPS32: bb.12.b.PHI.2.end: - ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.13.b.PHI.3: ; MIPS32: [[PHI2:%[0-9]+]]:fprb(s64) = G_PHI [[PHI1]](s64), %bb.11, [[PHI]](s64), %bb.6 @@ -335,8 +335,8 @@ body: | ; MIPS32: [[COPY11:%[0-9]+]]:gprb(s32) = COPY [[COPY1]](s32) ; MIPS32: [[AND7:%[0-9]+]]:gprb(s32) = G_AND [[COPY11]], [[C6]] ; MIPS32: [[SELECT1:%[0-9]+]]:fprb(s64) = G_SELECT [[AND7]](s32), [[SELECT]], [[PHI2]] - ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) - ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) + ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -346,11 +346,11 @@ body: | %9:_(s32) = COPY $a2 %3:_(p0) = COPY $a3 %10:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %4:_(p0) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.2, align 8) + %4:_(p0) = G_LOAD %10(p0) :: (load (p0) from %fixed-stack.2, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %5:_(p0) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.1) + %5:_(p0) = G_LOAD %11(p0) :: (load (p0) from %fixed-stack.1) %12:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %6:_(p0) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.0, align 8) + %6:_(p0) = G_LOAD %12(p0) :: (load (p0) from %fixed-stack.0, align 8) %32:_(s32) = G_CONSTANT i32 1 %33:_(s32) = COPY %7(s32) %31:_(s32) = G_AND %33, %32 @@ -369,15 +369,15 @@ body: | G_BRCOND %29(s32), %bb.6 bb.4.b.PHI.1.0: - %13:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %13:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.7 bb.5.b.PHI.1.1: - %15:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %15:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) G_BR %bb.7 bb.6.b.PHI.1.2: - %14:_(s64) = G_LOAD %5(p0) :: (load 8 from %ir.c) + %14:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir.c) bb.7.b.PHI.1: %16:_(s64) = G_PHI %13(s64), %bb.4, %15(s64), %bb.5, %14(s64), %bb.6 @@ -388,7 +388,7 @@ body: | G_BR %bb.14 bb.8.b.PHI.1.end: - G_STORE %16(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %16(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.9.pre.PHI.2: @@ -399,11 +399,11 @@ body: | G_BR %bb.11 bb.10.b.PHI.2.0: - %18:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %18:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.12 bb.11.b.PHI.2.1: - %17:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %17:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) bb.12.b.PHI.2: %19:_(s64) = G_PHI %18(s64), %bb.10, %17(s64), %bb.11 @@ -413,7 +413,7 @@ body: | G_BRCOND %26(s32), %bb.14 bb.13.b.PHI.2.end: - G_STORE %19(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %19(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.14.b.PHI.3: @@ -426,8 +426,8 @@ body: | %46:_(s32) = COPY %8(s32) %24:_(s32) = G_AND %46, %44 %23:_(s64) = G_SELECT %24(s32), %22, %20 - G_STORE %23(s64), %6(p0) :: (store 8 into %ir.result) - G_STORE %20(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %23(s64), %6(p0) :: (store (s64) into %ir.result) + G_STORE %20(s64), %6(p0) :: (store (s64) into %ir.result) RetRA ... @@ -450,11 +450,11 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (p0) from %fixed-stack.2, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 0 ; MIPS32: [[C1:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) @@ -474,24 +474,24 @@ body: | ; MIPS32: G_BRCOND [[AND2]](s32), %bb.5 ; MIPS32: bb.3.b.PHI.1.0: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD3:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY3]](p0) :: (load 4 from %ir.a, align 8) + ; MIPS32: [[LOAD3:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY3]](p0) :: (load (s32) from %ir.a, align 8) ; MIPS32: [[C4:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY3]], [[C4]](s32) - ; MIPS32: [[LOAD4:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP]](p0) :: (load 4 from %ir.a + 4, basealign 8) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY3]], [[C4]](s32) + ; MIPS32: [[LOAD4:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.a + 4, basealign 8) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.4.b.PHI.1.1: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD5:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.b, align 8) + ; MIPS32: [[LOAD5:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.b, align 8) ; MIPS32: [[C5:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP1:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C5]](s32) - ; MIPS32: [[LOAD6:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP1]](p0) :: (load 4 from %ir.b + 4, basealign 8) + ; MIPS32: [[PTR_ADD1:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C5]](s32) + ; MIPS32: [[LOAD6:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.b + 4, basealign 8) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.5.b.PHI.1.2: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD7:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD1]](p0) :: (load 4 from %ir.c, align 8) + ; MIPS32: [[LOAD7:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.c, align 8) ; MIPS32: [[C6:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP2:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD1]], [[C6]](s32) - ; MIPS32: [[LOAD8:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP2]](p0) :: (load 4 from %ir.c + 4, basealign 8) + ; MIPS32: [[PTR_ADD2:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD1]], [[C6]](s32) + ; MIPS32: [[LOAD8:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.c + 4, basealign 8) ; MIPS32: bb.6.b.PHI.1: ; MIPS32: successors: %bb.7(0x40000000), %bb.13(0x40000000) ; MIPS32: [[PHI:%[0-9]+]]:gprb(s32) = G_PHI [[LOAD3]](s32), %bb.3, [[LOAD5]](s32), %bb.4, [[LOAD7]](s32), %bb.5 @@ -502,10 +502,10 @@ body: | ; MIPS32: G_BRCOND [[AND3]](s32), %bb.7 ; MIPS32: G_BR %bb.13 ; MIPS32: bb.7.b.PHI.1.end: - ; MIPS32: G_STORE [[PHI]](s32), [[LOAD2]](p0) :: (store 4 into %ir.result, align 8) + ; MIPS32: G_STORE [[PHI]](s32), [[LOAD2]](p0) :: (store (s32) into %ir.result, align 8) ; MIPS32: [[C8:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP3:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C8]](s32) - ; MIPS32: G_STORE [[PHI1]](s32), [[GEP3]](p0) :: (store 4 into %ir.result + 4, basealign 8) + ; MIPS32: [[PTR_ADD3:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C8]](s32) + ; MIPS32: G_STORE [[PHI1]](s32), [[PTR_ADD3]](p0) :: (store (s32) into %ir.result + 4, basealign 8) ; MIPS32: RetRA ; MIPS32: bb.8.pre.PHI.2: ; MIPS32: successors: %bb.9(0x40000000), %bb.10(0x40000000) @@ -516,17 +516,17 @@ body: | ; MIPS32: G_BR %bb.10 ; MIPS32: bb.9.b.PHI.2.0: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD9:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY3]](p0) :: (load 4 from %ir.a, align 8) + ; MIPS32: [[LOAD9:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY3]](p0) :: (load (s32) from %ir.a, align 8) ; MIPS32: [[C10:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP4:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY3]], [[C10]](s32) - ; MIPS32: [[LOAD10:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP4]](p0) :: (load 4 from %ir.a + 4, basealign 8) + ; MIPS32: [[PTR_ADD4:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY3]], [[C10]](s32) + ; MIPS32: [[LOAD10:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from %ir.a + 4, basealign 8) ; MIPS32: G_BR %bb.11 ; MIPS32: bb.10.b.PHI.2.1: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD11:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.b, align 8) + ; MIPS32: [[LOAD11:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.b, align 8) ; MIPS32: [[C11:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP5:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C11]](s32) - ; MIPS32: [[LOAD12:%[0-9]+]]:gprb(s32) = G_LOAD [[GEP5]](p0) :: (load 4 from %ir.b + 4, basealign 8) + ; MIPS32: [[PTR_ADD5:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C11]](s32) + ; MIPS32: [[LOAD12:%[0-9]+]]:gprb(s32) = G_LOAD [[PTR_ADD5]](p0) :: (load (s32) from %ir.b + 4, basealign 8) ; MIPS32: bb.11.b.PHI.2: ; MIPS32: successors: %bb.13(0x40000000), %bb.12(0x40000000) ; MIPS32: [[PHI2:%[0-9]+]]:gprb(s32) = G_PHI [[LOAD9]](s32), %bb.9, [[LOAD11]](s32), %bb.10 @@ -536,10 +536,10 @@ body: | ; MIPS32: [[AND5:%[0-9]+]]:gprb(s32) = G_AND [[COPY9]], [[C12]] ; MIPS32: G_BRCOND [[AND5]](s32), %bb.13 ; MIPS32: bb.12.b.PHI.2.end: - ; MIPS32: G_STORE [[PHI2]](s32), [[LOAD2]](p0) :: (store 4 into %ir.result, align 8) + ; MIPS32: G_STORE [[PHI2]](s32), [[LOAD2]](p0) :: (store (s32) into %ir.result, align 8) ; MIPS32: [[C13:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP6:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C13]](s32) - ; MIPS32: G_STORE [[PHI3]](s32), [[GEP6]](p0) :: (store 4 into %ir.result + 4, basealign 8) + ; MIPS32: [[PTR_ADD6:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C13]](s32) + ; MIPS32: G_STORE [[PHI3]](s32), [[PTR_ADD6]](p0) :: (store (s32) into %ir.result + 4, basealign 8) ; MIPS32: RetRA ; MIPS32: bb.13.b.PHI.3: ; MIPS32: [[PHI4:%[0-9]+]]:gprb(s32) = G_PHI [[PHI2]](s32), %bb.11, [[PHI]](s32), %bb.6 @@ -555,14 +555,14 @@ body: | ; MIPS32: [[AND7:%[0-9]+]]:gprb(s32) = G_AND [[COPY11]], [[C14]] ; MIPS32: [[SELECT2:%[0-9]+]]:gprb(s32) = G_SELECT [[AND7]](s32), [[SELECT]], [[PHI4]] ; MIPS32: [[SELECT3:%[0-9]+]]:gprb(s32) = G_SELECT [[AND7]](s32), [[SELECT1]], [[PHI5]] - ; MIPS32: G_STORE [[SELECT2]](s32), [[LOAD2]](p0) :: (store 4 into %ir.result, align 8) + ; MIPS32: G_STORE [[SELECT2]](s32), [[LOAD2]](p0) :: (store (s32) into %ir.result, align 8) ; MIPS32: [[C15:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP7:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C15]](s32) - ; MIPS32: G_STORE [[SELECT3]](s32), [[GEP7]](p0) :: (store 4 into %ir.result + 4, basealign 8) - ; MIPS32: G_STORE [[PHI4]](s32), [[LOAD2]](p0) :: (store 4 into %ir.result, align 8) + ; MIPS32: [[PTR_ADD7:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C15]](s32) + ; MIPS32: G_STORE [[SELECT3]](s32), [[PTR_ADD7]](p0) :: (store (s32) into %ir.result + 4, basealign 8) + ; MIPS32: G_STORE [[PHI4]](s32), [[LOAD2]](p0) :: (store (s32) into %ir.result, align 8) ; MIPS32: [[C16:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP8:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C16]](s32) - ; MIPS32: G_STORE [[PHI5]](s32), [[GEP8]](p0) :: (store 4 into %ir.result + 4, basealign 8) + ; MIPS32: [[PTR_ADD8:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD2]], [[C16]](s32) + ; MIPS32: G_STORE [[PHI5]](s32), [[PTR_ADD8]](p0) :: (store (s32) into %ir.result + 4, basealign 8) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -572,11 +572,11 @@ body: | %9:_(s32) = COPY $a2 %3:_(p0) = COPY $a3 %10:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %4:_(p0) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.2, align 8) + %4:_(p0) = G_LOAD %10(p0) :: (load (p0) from %fixed-stack.2, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %5:_(p0) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.1) + %5:_(p0) = G_LOAD %11(p0) :: (load (p0) from %fixed-stack.1) %12:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %6:_(p0) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.0, align 8) + %6:_(p0) = G_LOAD %12(p0) :: (load (p0) from %fixed-stack.0, align 8) %33:_(s32) = G_CONSTANT i32 0 %24:_(s64) = G_MERGE_VALUES %33(s32), %33(s32) %34:_(s32) = G_CONSTANT i32 1 @@ -597,15 +597,15 @@ body: | G_BRCOND %30(s32), %bb.6 bb.4.b.PHI.1.0: - %13:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %13:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.7 bb.5.b.PHI.1.1: - %15:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %15:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) G_BR %bb.7 bb.6.b.PHI.1.2: - %14:_(s64) = G_LOAD %5(p0) :: (load 8 from %ir.c) + %14:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir.c) bb.7.b.PHI.1: %16:_(s64) = G_PHI %13(s64), %bb.4, %15(s64), %bb.5, %14(s64), %bb.6 @@ -616,7 +616,7 @@ body: | G_BR %bb.14 bb.8.b.PHI.1.end: - G_STORE %16(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %16(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.9.pre.PHI.2: @@ -627,11 +627,11 @@ body: | G_BR %bb.11 bb.10.b.PHI.2.0: - %18:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %18:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.12 bb.11.b.PHI.2.1: - %17:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %17:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) bb.12.b.PHI.2: %19:_(s64) = G_PHI %18(s64), %bb.10, %17(s64), %bb.11 @@ -641,7 +641,7 @@ body: | G_BRCOND %27(s32), %bb.14 bb.13.b.PHI.2.end: - G_STORE %19(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %19(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.14.b.PHI.3: @@ -654,8 +654,8 @@ body: | %48:_(s32) = COPY %8(s32) %25:_(s32) = G_AND %48, %46 %23:_(s64) = G_SELECT %25(s32), %22, %20 - G_STORE %23(s64), %6(p0) :: (store 8 into %ir.result) - G_STORE %20(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %23(s64), %6(p0) :: (store (s64) into %ir.result) + G_STORE %20(s64), %6(p0) :: (store (s64) into %ir.result) RetRA ... @@ -678,11 +678,11 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (p0) from %fixed-stack.2, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C]] @@ -701,15 +701,15 @@ body: | ; MIPS32: G_BRCOND [[AND2]](s32), %bb.5 ; MIPS32: bb.3.b.PHI.1.0: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.4.b.PHI.1.1: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.5.b.PHI.1.2: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load 8 from %ir.c) + ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.c) ; MIPS32: bb.6.b.PHI.1: ; MIPS32: successors: %bb.7(0x40000000), %bb.13(0x40000000) ; MIPS32: [[PHI:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD3]](s64), %bb.3, [[LOAD4]](s64), %bb.4, [[LOAD5]](s64), %bb.5 @@ -719,7 +719,7 @@ body: | ; MIPS32: G_BRCOND [[AND3]](s32), %bb.7 ; MIPS32: G_BR %bb.13 ; MIPS32: bb.7.b.PHI.1.end: - ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.8.pre.PHI.2: ; MIPS32: successors: %bb.9(0x40000000), %bb.10(0x40000000) @@ -730,11 +730,11 @@ body: | ; MIPS32: G_BR %bb.10 ; MIPS32: bb.9.b.PHI.2.0: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.11 ; MIPS32: bb.10.b.PHI.2.1: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: bb.11.b.PHI.2: ; MIPS32: successors: %bb.13(0x40000000), %bb.12(0x40000000) ; MIPS32: [[PHI1:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD6]](s64), %bb.9, [[LOAD7]](s64), %bb.10 @@ -743,7 +743,7 @@ body: | ; MIPS32: [[AND5:%[0-9]+]]:gprb(s32) = G_AND [[COPY9]], [[C5]] ; MIPS32: G_BRCOND [[AND5]](s32), %bb.13 ; MIPS32: bb.12.b.PHI.2.end: - ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.13.b.PHI.3: ; MIPS32: [[PHI2:%[0-9]+]]:fprb(s64) = G_PHI [[PHI1]](s64), %bb.11, [[PHI]](s64), %bb.6 @@ -755,8 +755,8 @@ body: | ; MIPS32: [[COPY11:%[0-9]+]]:gprb(s32) = COPY [[COPY1]](s32) ; MIPS32: [[AND7:%[0-9]+]]:gprb(s32) = G_AND [[COPY11]], [[C6]] ; MIPS32: [[SELECT1:%[0-9]+]]:fprb(s64) = G_SELECT [[AND7]](s32), [[SELECT]], [[PHI2]] - ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) - ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) + ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -766,11 +766,11 @@ body: | %9:_(s32) = COPY $a2 %3:_(p0) = COPY $a3 %10:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %4:_(p0) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.2, align 8) + %4:_(p0) = G_LOAD %10(p0) :: (load (p0) from %fixed-stack.2, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %5:_(p0) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.1) + %5:_(p0) = G_LOAD %11(p0) :: (load (p0) from %fixed-stack.1) %12:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %6:_(p0) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.0, align 8) + %6:_(p0) = G_LOAD %12(p0) :: (load (p0) from %fixed-stack.0, align 8) %32:_(s32) = G_CONSTANT i32 1 %33:_(s32) = COPY %7(s32) %31:_(s32) = G_AND %33, %32 @@ -789,15 +789,15 @@ body: | G_BRCOND %29(s32), %bb.6 bb.4.b.PHI.1.0: - %13:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %13:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.7 bb.5.b.PHI.1.1: - %15:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %15:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) G_BR %bb.7 bb.6.b.PHI.1.2: - %14:_(s64) = G_LOAD %5(p0) :: (load 8 from %ir.c) + %14:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir.c) bb.7.b.PHI.1: %16:_(s64) = G_PHI %13(s64), %bb.4, %15(s64), %bb.5, %14(s64), %bb.6 @@ -808,7 +808,7 @@ body: | G_BR %bb.14 bb.8.b.PHI.1.end: - G_STORE %16(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %16(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.9.pre.PHI.2: @@ -819,11 +819,11 @@ body: | G_BR %bb.11 bb.10.b.PHI.2.0: - %18:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %18:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.12 bb.11.b.PHI.2.1: - %17:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %17:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) bb.12.b.PHI.2: %19:_(s64) = G_PHI %18(s64), %bb.10, %17(s64), %bb.11 @@ -833,7 +833,7 @@ body: | G_BRCOND %26(s32), %bb.14 bb.13.b.PHI.2.end: - G_STORE %19(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %19(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.14.b.PHI.3: @@ -846,8 +846,8 @@ body: | %46:_(s32) = COPY %8(s32) %24:_(s32) = G_AND %46, %44 %23:_(s64) = G_SELECT %24(s32), %22, %20 - G_STORE %23(s64), %6(p0) :: (store 8 into %ir.result) - G_STORE %20(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %23(s64), %6(p0) :: (store (s64) into %ir.result) + G_STORE %20(s64), %6(p0) :: (store (s64) into %ir.result) RetRA ... @@ -870,11 +870,11 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (p0) from %fixed-stack.1) ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (p0) from %fixed-stack.2, align 8) ; MIPS32: [[C:%[0-9]+]]:fprb(s64) = G_FCONSTANT double 0.000000e+00 ; MIPS32: [[C1:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) @@ -894,15 +894,15 @@ body: | ; MIPS32: G_BRCOND [[AND2]](s32), %bb.5 ; MIPS32: bb.3.b.PHI.1.0: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD3:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.4.b.PHI.1.1: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD4:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: G_BR %bb.6 ; MIPS32: bb.5.b.PHI.1.2: ; MIPS32: successors: %bb.6(0x80000000) - ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load 8 from %ir.c) + ; MIPS32: [[LOAD5:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.c) ; MIPS32: bb.6.b.PHI.1: ; MIPS32: successors: %bb.7(0x40000000), %bb.13(0x40000000) ; MIPS32: [[PHI:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD3]](s64), %bb.3, [[LOAD4]](s64), %bb.4, [[LOAD5]](s64), %bb.5 @@ -912,7 +912,7 @@ body: | ; MIPS32: G_BRCOND [[AND3]](s32), %bb.7 ; MIPS32: G_BR %bb.13 ; MIPS32: bb.7.b.PHI.1.end: - ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.8.pre.PHI.2: ; MIPS32: successors: %bb.9(0x40000000), %bb.10(0x40000000) @@ -923,11 +923,11 @@ body: | ; MIPS32: G_BR %bb.10 ; MIPS32: bb.9.b.PHI.2.0: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load 8 from %ir.a) + ; MIPS32: [[LOAD6:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir.a) ; MIPS32: G_BR %bb.11 ; MIPS32: bb.10.b.PHI.2.1: ; MIPS32: successors: %bb.11(0x80000000) - ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load 8 from %ir.b) + ; MIPS32: [[LOAD7:%[0-9]+]]:fprb(s64) = G_LOAD [[LOAD]](p0) :: (load (s64) from %ir.b) ; MIPS32: bb.11.b.PHI.2: ; MIPS32: successors: %bb.13(0x40000000), %bb.12(0x40000000) ; MIPS32: [[PHI1:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD6]](s64), %bb.9, [[LOAD7]](s64), %bb.10 @@ -936,7 +936,7 @@ body: | ; MIPS32: [[AND5:%[0-9]+]]:gprb(s32) = G_AND [[COPY9]], [[C6]] ; MIPS32: G_BRCOND [[AND5]](s32), %bb.13 ; MIPS32: bb.12.b.PHI.2.end: - ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[PHI1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA ; MIPS32: bb.13.b.PHI.3: ; MIPS32: [[PHI2:%[0-9]+]]:fprb(s64) = G_PHI [[PHI1]](s64), %bb.11, [[PHI]](s64), %bb.6 @@ -948,8 +948,8 @@ body: | ; MIPS32: [[COPY11:%[0-9]+]]:gprb(s32) = COPY [[COPY1]](s32) ; MIPS32: [[AND7:%[0-9]+]]:gprb(s32) = G_AND [[COPY11]], [[C7]] ; MIPS32: [[SELECT1:%[0-9]+]]:fprb(s64) = G_SELECT [[AND7]](s32), [[SELECT]], [[PHI2]] - ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) - ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store 8 into %ir.result) + ; MIPS32: G_STORE [[SELECT1]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) + ; MIPS32: G_STORE [[PHI2]](s64), [[LOAD2]](p0) :: (store (s64) into %ir.result) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -959,11 +959,11 @@ body: | %9:_(s32) = COPY $a2 %3:_(p0) = COPY $a3 %10:_(p0) = G_FRAME_INDEX %fixed-stack.2 - %4:_(p0) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.2, align 8) + %4:_(p0) = G_LOAD %10(p0) :: (load (p0) from %fixed-stack.2, align 8) %11:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %5:_(p0) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.1) + %5:_(p0) = G_LOAD %11(p0) :: (load (p0) from %fixed-stack.1) %12:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %6:_(p0) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.0, align 8) + %6:_(p0) = G_LOAD %12(p0) :: (load (p0) from %fixed-stack.0, align 8) %24:_(s64) = G_FCONSTANT double 0.000000e+00 %33:_(s32) = G_CONSTANT i32 1 %34:_(s32) = COPY %7(s32) @@ -983,15 +983,15 @@ body: | G_BRCOND %30(s32), %bb.6 bb.4.b.PHI.1.0: - %13:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %13:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.7 bb.5.b.PHI.1.1: - %15:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %15:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) G_BR %bb.7 bb.6.b.PHI.1.2: - %14:_(s64) = G_LOAD %5(p0) :: (load 8 from %ir.c) + %14:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir.c) bb.7.b.PHI.1: %16:_(s64) = G_PHI %13(s64), %bb.4, %15(s64), %bb.5, %14(s64), %bb.6 @@ -1002,7 +1002,7 @@ body: | G_BR %bb.14 bb.8.b.PHI.1.end: - G_STORE %16(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %16(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.9.pre.PHI.2: @@ -1013,11 +1013,11 @@ body: | G_BR %bb.11 bb.10.b.PHI.2.0: - %18:_(s64) = G_LOAD %3(p0) :: (load 8 from %ir.a) + %18:_(s64) = G_LOAD %3(p0) :: (load (s64) from %ir.a) G_BR %bb.12 bb.11.b.PHI.2.1: - %17:_(s64) = G_LOAD %4(p0) :: (load 8 from %ir.b) + %17:_(s64) = G_LOAD %4(p0) :: (load (s64) from %ir.b) bb.12.b.PHI.2: %19:_(s64) = G_PHI %18(s64), %bb.10, %17(s64), %bb.11 @@ -1027,7 +1027,7 @@ body: | G_BRCOND %27(s32), %bb.14 bb.13.b.PHI.2.end: - G_STORE %19(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %19(s64), %6(p0) :: (store (s64) into %ir.result) RetRA bb.14.b.PHI.3: @@ -1040,8 +1040,8 @@ body: | %47:_(s32) = COPY %8(s32) %25:_(s32) = G_AND %47, %45 %23:_(s64) = G_SELECT %25(s32), %22, %20 - G_STORE %23(s64), %6(p0) :: (store 8 into %ir.result) - G_STORE %20(s64), %6(p0) :: (store 8 into %ir.result) + G_STORE %23(s64), %6(p0) :: (store (s64) into %ir.result) + G_STORE %20(s64), %6(p0) :: (store (s64) into %ir.result) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/mul.mir index 3dea77ea2d56b..056cfdc4cfb14 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/mul.mir @@ -51,8 +51,8 @@ body: | ; MIPS32: [[C1:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C1]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store 1 into %ir.pcarry_flag) - ; MIPS32: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store 4 into %ir.pmul) + ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) + ; MIPS32: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store (s32) into %ir.pmul) ; MIPS32: RetRA %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 @@ -65,8 +65,8 @@ body: | %10:_(s32) = G_CONSTANT i32 1 %11:_(s32) = COPY %9(s32) %6:_(s32) = G_AND %11, %10 - G_STORE %6(s32), %3(p0) :: (store 1 into %ir.pcarry_flag) - G_STORE %4(s32), %2(p0) :: (store 4 into %ir.pmul) + G_STORE %6(s32), %3(p0) :: (store (s8) into %ir.pcarry_flag) + G_STORE %4(s32), %2(p0) :: (store (s32) into %ir.pmul) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/mul_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/mul_vec.mir index 5daa4a782be95..874f3e560ed8a 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/mul_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/mul_vec.mir @@ -22,18 +22,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:fprb(<16 x s8>) = G_MUL [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[MUL]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_MUL %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -51,18 +51,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:fprb(<8 x s16>) = G_MUL [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[MUL]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_MUL %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -80,18 +80,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:fprb(<4 x s32>) = G_MUL [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[MUL]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_MUL %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -109,18 +109,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[MUL:%[0-9]+]]:fprb(<2 x s64>) = G_MUL [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[MUL]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[MUL]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_MUL %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/phi.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/phi.mir index 26fc6d1d01844..4744e36daa360 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/phi.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/phi.mir @@ -166,9 +166,9 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY3]], [[C]] @@ -193,9 +193,9 @@ body: | %5:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) %8:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %6:_(s32) = G_LOAD %8(p0) :: (load 4 from %fixed-stack.1, align 8) + %6:_(s32) = G_LOAD %8(p0) :: (load (s32) from %fixed-stack.1, align 8) %9:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %7:_(s32) = G_LOAD %9(p0) :: (load 4 from %fixed-stack.0) + %7:_(s32) = G_LOAD %9(p0) :: (load (s32) from %fixed-stack.0) %2:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) %14:_(s32) = G_CONSTANT i32 1 %15:_(s32) = COPY %3(s32) @@ -230,8 +230,8 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY1]](p0) :: (load 8 from %ir.i64_ptr_a) - ; MIPS32: [[LOAD1:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY2]](p0) :: (load 8 from %ir.i64_ptr_b) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.i64_ptr_a) + ; MIPS32: [[LOAD1:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY2]](p0) :: (load (s64) from %ir.i64_ptr_b) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C]] @@ -244,7 +244,7 @@ body: | ; MIPS32: successors: %bb.3(0x80000000) ; MIPS32: bb.3.cond.end: ; MIPS32: [[PHI:%[0-9]+]]:fprb(s64) = G_PHI [[LOAD]](s64), %bb.1, [[LOAD1]](s64), %bb.2 - ; MIPS32: G_STORE [[PHI]](s64), [[COPY3]](p0) :: (store 8 into %ir.i64_ptr_c) + ; MIPS32: G_STORE [[PHI]](s64), [[COPY3]](p0) :: (store (s64) into %ir.i64_ptr_c) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -253,8 +253,8 @@ body: | %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 %3:_(p0) = COPY $a3 - %5:_(s64) = G_LOAD %1(p0) :: (load 8 from %ir.i64_ptr_a) - %6:_(s64) = G_LOAD %2(p0) :: (load 8 from %ir.i64_ptr_b) + %5:_(s64) = G_LOAD %1(p0) :: (load (s64) from %ir.i64_ptr_a) + %6:_(s64) = G_LOAD %2(p0) :: (load (s64) from %ir.i64_ptr_b) %9:_(s32) = G_CONSTANT i32 1 %10:_(s32) = COPY %4(s32) %8:_(s32) = G_AND %10, %9 @@ -268,7 +268,7 @@ body: | bb.4.cond.end: %7:_(s64) = G_PHI %5(s64), %bb.2, %6(s64), %bb.3 - G_STORE %7(s64), %3(p0) :: (store 8 into %ir.i64_ptr_c) + G_STORE %7(s64), %3(p0) :: (store (s64) into %ir.i64_ptr_c) RetRA ... @@ -336,8 +336,8 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load 4 from %ir.f32_ptr_a) - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.f32_ptr_b) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.f32_ptr_a) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.f32_ptr_b) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C]] @@ -350,7 +350,7 @@ body: | ; MIPS32: successors: %bb.3(0x80000000) ; MIPS32: bb.3.cond.end: ; MIPS32: [[PHI:%[0-9]+]]:gprb(s32) = G_PHI [[LOAD]](s32), %bb.1, [[LOAD1]](s32), %bb.2 - ; MIPS32: G_STORE [[PHI]](s32), [[COPY3]](p0) :: (store 4 into %ir.f32_ptr_c) + ; MIPS32: G_STORE [[PHI]](s32), [[COPY3]](p0) :: (store (s32) into %ir.f32_ptr_c) ; MIPS32: RetRA bb.1.entry: liveins: $a0, $a1, $a2, $a3 @@ -359,8 +359,8 @@ body: | %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 %3:_(p0) = COPY $a3 - %5:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.f32_ptr_a) - %6:_(s32) = G_LOAD %2(p0) :: (load 4 from %ir.f32_ptr_b) + %5:_(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.f32_ptr_a) + %6:_(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.f32_ptr_b) %9:_(s32) = G_CONSTANT i32 1 %10:_(s32) = COPY %4(s32) %8:_(s32) = G_AND %10, %9 @@ -374,7 +374,7 @@ body: | bb.4.cond.end: %7:_(s32) = G_PHI %5(s32), %bb.2, %6(s32), %bb.3 - G_STORE %7(s32), %3(p0) :: (store 4 into %ir.f32_ptr_c) + G_STORE %7(s32), %3(p0) :: (store (s32) into %ir.f32_ptr_c) RetRA ... @@ -393,7 +393,7 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6 ; MIPS32: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d7 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[LOAD]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] @@ -414,7 +414,7 @@ body: | %0:_(s64) = COPY $d6 %1:_(s64) = COPY $d7 %4:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %3:_(s32) = G_LOAD %4(p0) :: (load 4 from %fixed-stack.0, align 8) + %3:_(s32) = G_LOAD %4(p0) :: (load (s32) from %fixed-stack.0, align 8) %7:_(s32) = G_CONSTANT i32 1 %8:_(s32) = COPY %3(s32) %6:_(s32) = G_AND %8, %7 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/pointers.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/pointers.mir index 38d94f90311f9..b5c6efce6a10d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/pointers.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/pointers.mir @@ -19,11 +19,11 @@ body: | ; MIPS32-LABEL: name: ptr_arg_in_regs ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.p) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.p) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p) $v0 = COPY %1(s32) RetRA implicit $v0 @@ -46,8 +46,8 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.p) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p0) from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.p) ; MIPS32: $v0 = COPY [[LOAD1]](s32) ; MIPS32: RetRA implicit $v0 %0:_(s32) = COPY $a0 @@ -55,8 +55,8 @@ body: | %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %5:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %4:_(p0) = G_LOAD %5(p0) :: (load 4 from %fixed-stack.0, align 8) - %6:_(s32) = G_LOAD %4(p0) :: (load 4 from %ir.p) + %4:_(p0) = G_LOAD %5(p0) :: (load (p0) from %fixed-stack.0, align 8) + %6:_(s32) = G_LOAD %4(p0) :: (load (s32) from %ir.p) $v0 = COPY %6(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/rem_and_div_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/rem_and_div_vec.mir index 249204c3b1b20..0863b09355a5d 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/rem_and_div_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/rem_and_div_vec.mir @@ -37,18 +37,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:fprb(<16 x s8>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_SDIV %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -66,18 +66,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:fprb(<8 x s16>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_SDIV %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -95,18 +95,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:fprb(<4 x s32>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_SDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -124,18 +124,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SDIV:%[0-9]+]]:fprb(<2 x s64>) = G_SDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_SDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -153,18 +153,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:fprb(<16 x s8>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_SREM %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -182,18 +182,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:fprb(<8 x s16>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_SREM %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -211,18 +211,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:fprb(<4 x s32>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_SREM %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -240,18 +240,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SREM:%[0-9]+]]:fprb(<2 x s64>) = G_SREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[SREM]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SREM]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_SREM %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -269,18 +269,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:fprb(<16 x s8>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_UDIV %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -298,18 +298,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:fprb(<8 x s16>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_UDIV %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -327,18 +327,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:fprb(<4 x s32>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_UDIV %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -356,18 +356,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[UDIV:%[0-9]+]]:fprb(<2 x s64>) = G_UDIV [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UDIV]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UDIV]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_UDIV %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... @@ -385,18 +385,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:fprb(<16 x s8>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_UREM %3, %4 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -414,18 +414,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:fprb(<8 x s16>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_UREM %3, %4 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -443,18 +443,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:fprb(<4 x s32>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_UREM %3, %4 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -472,18 +472,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[UREM:%[0-9]+]]:fprb(<2 x s64>) = G_UREM [[LOAD]], [[LOAD1]] - ; P5600: G_STORE [[UREM]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[UREM]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_UREM %3, %4 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/select.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/select.mir index a9554968aaed4..93e6b72b87611 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/select.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/select.mir @@ -91,9 +91,9 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.1) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY3]], [[C]] @@ -107,9 +107,9 @@ body: | %5:_(s32) = COPY $a3 %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) %8:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %6:_(s32) = G_LOAD %8(p0) :: (load 4 from %fixed-stack.1, align 8) + %6:_(s32) = G_LOAD %8(p0) :: (load (s32) from %fixed-stack.1, align 8) %9:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %7:_(s32) = G_LOAD %9(p0) :: (load 4 from %fixed-stack.0) + %7:_(s32) = G_LOAD %9(p0) :: (load (s32) from %fixed-stack.0) %2:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) %14:_(s32) = G_CONSTANT i32 1 %15:_(s32) = COPY %3(s32) @@ -136,25 +136,25 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY1]](p0) :: (load 8 from %ir.i64_ptr_a) - ; MIPS32: [[LOAD1:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY2]](p0) :: (load 8 from %ir.i64_ptr_b) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.i64_ptr_a) + ; MIPS32: [[LOAD1:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY2]](p0) :: (load (s64) from %ir.i64_ptr_b) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C]] ; MIPS32: [[SELECT:%[0-9]+]]:fprb(s64) = G_SELECT [[AND]](s32), [[LOAD]], [[LOAD1]] - ; MIPS32: G_STORE [[SELECT]](s64), [[COPY3]](p0) :: (store 8 into %ir.i64_ptr_c) + ; MIPS32: G_STORE [[SELECT]](s64), [[COPY3]](p0) :: (store (s64) into %ir.i64_ptr_c) ; MIPS32: RetRA %4:_(s32) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 %3:_(p0) = COPY $a3 - %5:_(s64) = G_LOAD %1(p0) :: (load 8 from %ir.i64_ptr_a) - %6:_(s64) = G_LOAD %2(p0) :: (load 8 from %ir.i64_ptr_b) + %5:_(s64) = G_LOAD %1(p0) :: (load (s64) from %ir.i64_ptr_a) + %6:_(s64) = G_LOAD %2(p0) :: (load (s64) from %ir.i64_ptr_b) %9:_(s32) = G_CONSTANT i32 1 %10:_(s32) = COPY %4(s32) %8:_(s32) = G_AND %10, %9 %7:_(s64) = G_SELECT %8(s32), %5, %6 - G_STORE %7(s64), %3(p0) :: (store 8 into %ir.i64_ptr_c) + G_STORE %7(s64), %3(p0) :: (store (s64) into %ir.i64_ptr_c) RetRA ... @@ -206,25 +206,25 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(p0) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load 4 from %ir.f32_ptr_a) - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.f32_ptr_b) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.f32_ptr_a) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.f32_ptr_b) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY4]], [[C]] ; MIPS32: [[SELECT:%[0-9]+]]:gprb(s32) = G_SELECT [[AND]](s32), [[LOAD]], [[LOAD1]] - ; MIPS32: G_STORE [[SELECT]](s32), [[COPY3]](p0) :: (store 4 into %ir.f32_ptr_c) + ; MIPS32: G_STORE [[SELECT]](s32), [[COPY3]](p0) :: (store (s32) into %ir.f32_ptr_c) ; MIPS32: RetRA %4:_(s32) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 %3:_(p0) = COPY $a3 - %5:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.f32_ptr_a) - %6:_(s32) = G_LOAD %2(p0) :: (load 4 from %ir.f32_ptr_b) + %5:_(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.f32_ptr_a) + %6:_(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.f32_ptr_b) %9:_(s32) = G_CONSTANT i32 1 %10:_(s32) = COPY %4(s32) %8:_(s32) = G_AND %10, %9 %7:_(s32) = G_SELECT %8(s32), %5, %6 - G_STORE %7(s32), %3(p0) :: (store 4 into %ir.f32_ptr_c) + G_STORE %7(s32), %3(p0) :: (store (s32) into %ir.f32_ptr_c) RetRA ... @@ -244,7 +244,7 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6 ; MIPS32: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d7 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[LOAD]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] @@ -254,7 +254,7 @@ body: | %0:_(s64) = COPY $d6 %1:_(s64) = COPY $d7 %4:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %3:_(s32) = G_LOAD %4(p0) :: (load 4 from %fixed-stack.0, align 8) + %3:_(s32) = G_LOAD %4(p0) :: (load (s32) from %fixed-stack.0, align 8) %7:_(s32) = G_CONSTANT i32 1 %8:_(s32) = COPY %3(s32) %6:_(s32) = G_AND %8, %7 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/stack_args.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/stack_args.mir index f8ba638aac614..be2284f3df752 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/stack_args.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/stack_args.mir @@ -24,7 +24,7 @@ body: | ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8) ; MIPS32: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY]](s32) ; MIPS32: $a1 = COPY [[COPY1]](s32) @@ -33,7 +33,7 @@ body: | ; MIPS32: [[COPY4:%[0-9]+]]:gprb(p0) = COPY $sp ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 16 ; MIPS32: [[GEP:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY4]], [[C]](s32) - ; MIPS32: G_STORE [[LOAD]](s32), [[GEP]](p0) :: (store 4 into stack + 16) + ; MIPS32: G_STORE [[LOAD]](s32), [[GEP]](p0) :: (store (s32) into stack + 16) ; MIPS32: JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 ; MIPS32: [[COPY5:%[0-9]+]]:gprb(s32) = COPY $v0 ; MIPS32: ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp @@ -44,7 +44,7 @@ body: | %2:_(s32) = COPY $a2 %3:_(s32) = COPY $a3 %5:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %4:_(s32) = G_LOAD %5(p0) :: (load 4 from %fixed-stack.0, align 8) + %4:_(s32) = G_LOAD %5(p0) :: (load (s32) from %fixed-stack.0, align 8) ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp $a0 = COPY %0(s32) $a1 = COPY %1(s32) @@ -53,7 +53,7 @@ body: | %7:_(p0) = COPY $sp %8:_(s32) = G_CONSTANT i32 16 %9:_(p0) = G_PTR_ADD %7, %8(s32) - G_STORE %4(s32), %9(p0) :: (store 4 into stack + 16, align 4) + G_STORE %4(s32), %9(p0) :: (store (s32) into stack + 16, align 4) JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0 %6:_(s32) = COPY $v0 ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/store.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/store.mir index 3ae52adc952e1..fbc32e880d4c2 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/store.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/store.mir @@ -21,11 +21,11 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store 4 into %ir.ptr) + ; MIPS32: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32) into %ir.ptr) ; MIPS32: RetRA %0:_(s32) = COPY $a0 %1:_(p0) = COPY $a1 - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.ptr) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.ptr) RetRA ... @@ -43,16 +43,16 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; MIPS32: G_STORE [[COPY]](s32), [[COPY2]](p0) :: (store 4 into %ir.ptr, align 8) + ; MIPS32: G_STORE [[COPY]](s32), [[COPY2]](p0) :: (store (s32) into %ir.ptr, align 8) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 - ; MIPS32: [[GEP:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY2]], [[C]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[GEP]](p0) :: (store 4 into %ir.ptr + 4, basealign 8) + ; MIPS32: [[PTR_ADD:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[COPY2]], [[C]](s32) + ; MIPS32: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.ptr + 4, basealign 8) ; MIPS32: RetRA %2:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %1:_(p0) = COPY $a2 - G_STORE %0(s64), %1(p0) :: (store 8 into %ir.ptr) + G_STORE %0(s64), %1(p0) :: (store (s64) into %ir.ptr) RetRA ... @@ -69,11 +69,11 @@ body: | ; MIPS32: liveins: $a1, $f12 ; MIPS32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store 4 into %ir.ptr) + ; MIPS32: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32) into %ir.ptr) ; MIPS32: RetRA %0:_(s32) = COPY $f12 %1:_(p0) = COPY $a1 - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.ptr) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.ptr) RetRA ... @@ -90,11 +90,11 @@ body: | ; MIPS32: liveins: $a2, $d6 ; MIPS32: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a2 - ; MIPS32: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store 8 into %ir.ptr) + ; MIPS32: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64) into %ir.ptr) ; MIPS32: RetRA %0:_(s64) = COPY $d6 %1:_(p0) = COPY $a2 - G_STORE %0(s64), %1(p0) :: (store 8 into %ir.ptr) + G_STORE %0(s64), %1(p0) :: (store (s64) into %ir.ptr) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/store_4_unaligned.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/store_4_unaligned.mir index de20ea20d032d..b57b161d5c6e1 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/store_4_unaligned.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/store_4_unaligned.mir @@ -40,17 +40,17 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 ; MIPS32: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @float_align1 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) - ; MIPS32: G_STORE [[COPY1]](s32), [[GV]](p0) :: (store 4 into @float_align1, align 1) + ; MIPS32: G_STORE [[COPY1]](s32), [[GV]](p0) :: (store (s32) into @float_align1, align 1) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_float_align1 ; MIPS32R6: liveins: $f12 ; MIPS32R6: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 ; MIPS32R6: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @float_align1 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align1, align 1) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align1, align 1) ; MIPS32R6: RetRA %0:_(s32) = COPY $f12 %1:_(p0) = G_GLOBAL_VALUE @float_align1 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align1, align 1) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align1, align 1) RetRA ... @@ -67,17 +67,17 @@ body: | ; MIPS32: liveins: $f12 ; MIPS32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 ; MIPS32: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @float_align4 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align4) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align4) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_float_align4 ; MIPS32R6: liveins: $f12 ; MIPS32R6: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 ; MIPS32R6: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @float_align4 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @float_align4) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @float_align4) ; MIPS32R6: RetRA %0:_(s32) = COPY $f12 %1:_(p0) = G_GLOBAL_VALUE @float_align4 - G_STORE %0(s32), %1(p0) :: (store 4 into @float_align4) + G_STORE %0(s32), %1(p0) :: (store (s32) into @float_align4) RetRA ... @@ -94,17 +94,17 @@ body: | ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 ; MIPS32: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @i32_align8 - ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align8, align 8) + ; MIPS32: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align8, align 8) ; MIPS32: RetRA ; MIPS32R6-LABEL: name: store_i32_align8 ; MIPS32R6: liveins: $a0 ; MIPS32R6: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 ; MIPS32R6: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @i32_align8 - ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @i32_align8, align 8) + ; MIPS32R6: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @i32_align8, align 8) ; MIPS32R6: RetRA %0:_(s32) = COPY $a0 %1:_(p0) = G_GLOBAL_VALUE @i32_align8 - G_STORE %0(s32), %1(p0) :: (store 4 into @i32_align8, align 8) + G_STORE %0(s32), %1(p0) :: (store (s32) into @i32_align8, align 8) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/sub_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/sub_vec.mir index 42a83feb6ef30..cc66a985a1abd 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/sub_vec.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/sub_vec.mir @@ -22,18 +22,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY]](p0) :: (load (<16 x s8>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load (<16 x s8>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:fprb(<16 x s8>) = G_SUB [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[SUB]](<16 x s8>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<16 x s8>), [[COPY2]](p0) :: (store (<16 x s8>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>) from %ir.a) + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load (<16 x s8>) from %ir.b) %5:_(<16 x s8>) = G_SUB %4, %3 - G_STORE %5(<16 x s8>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<16 x s8>), %2(p0) :: (store (<16 x s8>) into %ir.c) RetRA ... @@ -51,18 +51,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY]](p0) :: (load (<8 x s16>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load (<8 x s16>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:fprb(<8 x s16>) = G_SUB [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[SUB]](<8 x s16>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<8 x s16>), [[COPY2]](p0) :: (store (<8 x s16>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<8 x s16>) = G_LOAD %0(p0) :: (load (<8 x s16>) from %ir.a) + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load (<8 x s16>) from %ir.b) %5:_(<8 x s16>) = G_SUB %4, %3 - G_STORE %5(<8 x s16>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<8 x s16>), %2(p0) :: (store (<8 x s16>) into %ir.c) RetRA ... @@ -80,18 +80,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load (<4 x s32>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:fprb(<4 x s32>) = G_SUB [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[SUB]](<4 x s32>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<4 x s32>), [[COPY2]](p0) :: (store (<4 x s32>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.a) + %4:_(<4 x s32>) = G_LOAD %1(p0) :: (load (<4 x s32>) from %ir.b) %5:_(<4 x s32>) = G_SUB %4, %3 - G_STORE %5(<4 x s32>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<4 x s32>), %2(p0) :: (store (<4 x s32>) into %ir.c) RetRA ... @@ -109,18 +109,18 @@ body: | ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 ; P5600: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) - ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.b) + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>) from %ir.a) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load (<2 x s64>) from %ir.b) ; P5600: [[SUB:%[0-9]+]]:fprb(<2 x s64>) = G_SUB [[LOAD1]], [[LOAD]] - ; P5600: G_STORE [[SUB]](<2 x s64>), [[COPY2]](p0) :: (store 16 into %ir.c) + ; P5600: G_STORE [[SUB]](<2 x s64>), [[COPY2]](p0) :: (store (<2 x s64>) into %ir.c) ; P5600: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 %2:_(p0) = COPY $a2 - %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) - %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.b) + %3:_(<2 x s64>) = G_LOAD %0(p0) :: (load (<2 x s64>) from %ir.a) + %4:_(<2 x s64>) = G_LOAD %1(p0) :: (load (<2 x s64>) from %ir.b) %5:_(<2 x s64>) = G_SUB %4, %3 - G_STORE %5(<2 x s64>), %2(p0) :: (store 16 into %ir.c) + G_STORE %5(<2 x s64>), %2(p0) :: (store (<2 x s64>) into %ir.c) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/test_TypeInfoforMF.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/test_TypeInfoforMF.mir index 234a5607112b5..c56572d094a9f 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/test_TypeInfoforMF.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/test_TypeInfoforMF.mir @@ -24,11 +24,11 @@ body: | ; MIPS32-LABEL: name: outgoing_gpr ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.i32_ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.i32_ptr) ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.i32_ptr) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.i32_ptr) $v0 = COPY %1(s32) RetRA implicit $v0 @@ -45,11 +45,11 @@ body: | ; MIPS32-LABEL: name: outgoing_fpr ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.float_ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.float_ptr) ; MIPS32: $f0 = COPY [[LOAD]](s32) ; MIPS32: RetRA implicit $f0 %0:_(p0) = COPY $a0 - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.float_ptr) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.float_ptr) $f0 = COPY %1(s32) RetRA implicit $f0 @@ -67,15 +67,15 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.i32_ptr1) - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load 4 from %ir.i32_ptr2) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.i32_ptr1) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.i32_ptr2) ; MIPS32: [[ADD:%[0-9]+]]:gprb(s32) = G_ADD [[LOAD1]], [[LOAD]] ; MIPS32: $v0 = COPY [[ADD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.i32_ptr1) - %3:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.i32_ptr2) + %2:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.i32_ptr1) + %3:_(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.i32_ptr2) %4:_(s32) = G_ADD %3, %2 $v0 = COPY %4(s32) RetRA implicit $v0 @@ -94,15 +94,15 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.float_ptr1) - ; MIPS32: [[LOAD1:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY1]](p0) :: (load 4 from %ir.float_ptr2) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.float_ptr1) + ; MIPS32: [[LOAD1:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.float_ptr2) ; MIPS32: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[LOAD]], [[LOAD1]] ; MIPS32: $f0 = COPY [[FADD]](s32) ; MIPS32: RetRA implicit $f0 %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.float_ptr1) - %3:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.float_ptr2) + %2:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.float_ptr1) + %3:_(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.float_ptr2) %4:_(s32) = G_FADD %2, %3 $f0 = COPY %4(s32) RetRA implicit $f0 @@ -122,7 +122,7 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.a) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.a) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY [[COPY1]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY3]], [[C]] @@ -132,7 +132,7 @@ body: | %0:_(s32) = COPY $a0 %3:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 - %4:_(s32) = G_LOAD %2(p0) :: (load 4 from %ir.a) + %4:_(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.a) %7:_(s32) = G_CONSTANT i32 1 %8:_(s32) = COPY %3(s32) %6:_(s32) = G_AND %8, %7 @@ -155,7 +155,7 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.a) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.a) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY [[COPY1]](s32) ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY3]], [[C]] @@ -165,7 +165,7 @@ body: | %0:_(s32) = COPY $f12 %3:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 - %4:_(s32) = G_LOAD %2(p0) :: (load 4 from %ir.a) + %4:_(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.a) %7:_(s32) = G_CONSTANT i32 1 %8:_(s32) = COPY %3(s32) %6:_(s32) = G_AND %8, %7 @@ -189,7 +189,7 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.i32_ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.i32_ptr) ; MIPS32: [[ADD:%[0-9]+]]:gprb(s32) = G_ADD [[COPY1]], [[COPY]] ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY3]](s32) @@ -201,7 +201,7 @@ body: | %1:_(s32) = COPY $a1 %2:_(p0) = COPY $a2 %4:_(s32) = COPY $a3 - %5:_(s32) = G_LOAD %2(p0) :: (load 4 from %ir.i32_ptr) + %5:_(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.i32_ptr) %6:_(s32) = G_ADD %1, %0 %9:_(s32) = G_CONSTANT i32 1 %10:_(s32) = COPY %4(s32) @@ -226,7 +226,7 @@ body: | ; MIPS32: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f14 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(p0) = COPY $a2 ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY $a3 - ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY2]](p0) :: (load 4 from %ir.float_ptr) + ; MIPS32: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.float_ptr) ; MIPS32: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[COPY1]], [[COPY]] ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:gprb(s32) = COPY [[COPY3]](s32) @@ -238,7 +238,7 @@ body: | %1:_(s32) = COPY $f14 %2:_(p0) = COPY $a2 %4:_(s32) = COPY $a3 - %5:_(s32) = G_LOAD %2(p0) :: (load 4 from %ir.float_ptr) + %5:_(s32) = G_LOAD %2(p0) :: (load (s32) from %ir.float_ptr) %6:_(s32) = G_FADD %1, %0 %9:_(s32) = G_CONSTANT i32 1 %10:_(s32) = COPY %4(s32) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/truncStore_and_aExtLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/truncStore_and_aExtLoad.mir index 049cae33c41e6..d7f549fd92220 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/truncStore_and_aExtLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/truncStore_and_aExtLoad.mir @@ -20,15 +20,15 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load 1 from %ir.py) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.py) ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[LOAD]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 1 into %ir.px) + ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s8) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %4:_(s32) = G_LOAD %1(p0) :: (load 1 from %ir.py) + %4:_(s32) = G_LOAD %1(p0) :: (load (s8) from %ir.py) %3:_(s32) = COPY %4(s32) - G_STORE %3(s32), %0(p0) :: (store 1 into %ir.px) + G_STORE %3(s32), %0(p0) :: (store (s8) into %ir.px) RetRA ... @@ -45,15 +45,15 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load 2 from %ir.py) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load (s16) from %ir.py) ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[LOAD]](s32) - ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 2 into %ir.px) + ; MIPS32: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store (s16) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %4:_(s32) = G_LOAD %1(p0) :: (load 2 from %ir.py) + %4:_(s32) = G_LOAD %1(p0) :: (load (s16) from %ir.py) %3:_(s32) = COPY %4(s32) - G_STORE %3(s32), %0(p0) :: (store 2 into %ir.px) + G_STORE %3(s32), %0(p0) :: (store (s16) into %ir.px) RetRA ... @@ -70,13 +70,13 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load 4 from %ir.py) - ; MIPS32: G_STORE [[LOAD]](s32), [[COPY]](p0) :: (store 4 into %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.py) + ; MIPS32: G_STORE [[LOAD]](s32), [[COPY]](p0) :: (store (s32) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 - %2:_(s32) = G_LOAD %1(p0) :: (load 4 from %ir.py) - G_STORE %2(s32), %0(p0) :: (store 4 into %ir.px) + %2:_(s32) = G_LOAD %1(p0) :: (load (s32) from %ir.py) + G_STORE %2(s32), %0(p0) :: (store (s32) into %ir.px) RetRA ... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/var_arg.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/var_arg.mir index 903a755b47377..ccd1b6c78c618 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/var_arg.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/var_arg.mir @@ -58,30 +58,30 @@ body: | ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.1 - ; MIPS32: G_STORE [[COPY1]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %fixed-stack.1) + ; MIPS32: G_STORE [[COPY1]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %fixed-stack.1) ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.2 - ; MIPS32: G_STORE [[COPY2]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %fixed-stack.2) + ; MIPS32: G_STORE [[COPY2]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %fixed-stack.2) ; MIPS32: [[COPY3:%[0-9]+]]:gprb(s32) = COPY $a3 ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %fixed-stack.3 - ; MIPS32: G_STORE [[COPY3]](s32), [[FRAME_INDEX2]](p0) :: (store 4 into %fixed-stack.3) + ; MIPS32: G_STORE [[COPY3]](s32), [[FRAME_INDEX2]](p0) :: (store (s32) into %fixed-stack.3) ; MIPS32: [[GV:%[0-9]+]]:gprb(p0) = G_GLOBAL_VALUE @.str ; MIPS32: [[COPY4:%[0-9]+]]:gprb(p0) = COPY [[GV]](p0) ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %stack.0.fmt.addr ; MIPS32: [[FRAME_INDEX4:%[0-9]+]]:gpr32(p0) = G_FRAME_INDEX %stack.1.ap ; MIPS32: [[FRAME_INDEX5:%[0-9]+]]:gpr32(p0) = G_FRAME_INDEX %stack.2.aq ; MIPS32: [[FRAME_INDEX6:%[0-9]+]]:gprb(p0) = G_FRAME_INDEX %stack.3.s - ; MIPS32: G_STORE [[COPY]](p0), [[FRAME_INDEX3]](p0) :: (store 4 into %ir.fmt.addr) - ; MIPS32: G_VASTART [[FRAME_INDEX4]](p0) :: (store 4 into %ir.ap1, align 1) - ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[FRAME_INDEX4]](p0), 0 :: (load 4) - ; MIPS32: SW [[LW]], [[FRAME_INDEX5]](p0), 0 :: (store 4) - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (load 4 from %ir.aq) + ; MIPS32: G_STORE [[COPY]](p0), [[FRAME_INDEX3]](p0) :: (store (p0) into %ir.fmt.addr) + ; MIPS32: G_VASTART [[FRAME_INDEX4]](p0) :: (store (s32) into %ir.ap1, align 1) + ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[FRAME_INDEX4]](p0), 0 :: (load (s32)) + ; MIPS32: SW [[LW]], [[FRAME_INDEX5]](p0), 0 :: (store (s32)) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (load (p0) from %ir.aq) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 4 ; MIPS32: [[GEP:%[0-9]+]]:gprb(p0) = G_PTR_ADD [[LOAD]], [[C]](s32) - ; MIPS32: G_STORE [[GEP]](p0), [[FRAME_INDEX5]](p0) :: (store 4 into %ir.aq) - ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.2) - ; MIPS32: G_STORE [[LOAD1]](p0), [[FRAME_INDEX6]](p0) :: (store 4 into %ir.s) - ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (load 4 from %ir.s) + ; MIPS32: G_STORE [[GEP]](p0), [[FRAME_INDEX5]](p0) :: (store (p0) into %ir.aq) + ; MIPS32: [[LOAD1:%[0-9]+]]:gprb(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.2) + ; MIPS32: G_STORE [[LOAD1]](p0), [[FRAME_INDEX6]](p0) :: (store (p0) into %ir.s) + ; MIPS32: [[LOAD2:%[0-9]+]]:gprb(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (load (p0) from %ir.s) ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp ; MIPS32: $a0 = COPY [[COPY4]](p0) ; MIPS32: $a1 = COPY [[LOAD2]](p0) @@ -91,30 +91,30 @@ body: | %0:_(p0) = COPY $a0 %1:_(s32) = COPY $a1 %2:_(p0) = G_FRAME_INDEX %fixed-stack.2 - G_STORE %1(s32), %2(p0) :: (store 4 into %fixed-stack.2) + G_STORE %1(s32), %2(p0) :: (store (s32) into %fixed-stack.2) %3:_(s32) = COPY $a2 %4:_(p0) = G_FRAME_INDEX %fixed-stack.1 - G_STORE %3(s32), %4(p0) :: (store 4 into %fixed-stack.1) + G_STORE %3(s32), %4(p0) :: (store (s32) into %fixed-stack.1) %5:_(s32) = COPY $a3 %6:_(p0) = G_FRAME_INDEX %fixed-stack.0 - G_STORE %5(s32), %6(p0) :: (store 4 into %fixed-stack.0) + G_STORE %5(s32), %6(p0) :: (store (s32) into %fixed-stack.0) %18:_(p0) = G_GLOBAL_VALUE @.str %17:_(p0) = COPY %18(p0) %7:_(p0) = G_FRAME_INDEX %stack.0.fmt.addr %8:gpr32(p0) = G_FRAME_INDEX %stack.1.ap %9:gpr32(p0) = G_FRAME_INDEX %stack.2.aq %10:_(p0) = G_FRAME_INDEX %stack.3.s - G_STORE %0(p0), %7(p0) :: (store 4 into %ir.fmt.addr) - G_VASTART %8(p0) :: (store 4 into %ir.ap1, align 1) - %19:gpr32 = LW %8(p0), 0 :: (load 4) - SW %19, %9(p0), 0 :: (store 4) - %11:_(p0) = G_LOAD %9(p0) :: (load 4 from %ir.aq) + G_STORE %0(p0), %7(p0) :: (store (p0) into %ir.fmt.addr) + G_VASTART %8(p0) :: (store (s32) into %ir.ap1, align 1) + %19:gpr32 = LW %8(p0), 0 :: (load (s32)) + SW %19, %9(p0), 0 :: (store (s32)) + %11:_(p0) = G_LOAD %9(p0) :: (load (p0) from %ir.aq) %12:_(s32) = G_CONSTANT i32 4 %13:_(p0) = G_PTR_ADD %11, %12(s32) - G_STORE %13(p0), %9(p0) :: (store 4 into %ir.aq) - %14:_(p0) = G_LOAD %11(p0) :: (load 4 from %ir.2) - G_STORE %14(p0), %10(p0) :: (store 4 into %ir.s) - %15:_(p0) = G_LOAD %10(p0) :: (load 4 from %ir.s) + G_STORE %13(p0), %9(p0) :: (store (p0) into %ir.aq) + %14:_(p0) = G_LOAD %11(p0) :: (load (p0) from %ir.2) + G_STORE %14(p0), %10(p0) :: (store (p0) into %ir.s) + %15:_(p0) = G_LOAD %10(p0) :: (load (p0) from %ir.s) ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp $a0 = COPY %17(p0) $a1 = COPY %15(p0) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/zextLoad_and_sextLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/zextLoad_and_sextLoad.mir index 057cf93aba18a..44baa0d07e6d6 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/zextLoad_and_sextLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/zextLoad_and_sextLoad.mir @@ -22,11 +22,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_zextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:gprb(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:gprb(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[ZEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.px) + %2:_(s32) = G_ZEXTLOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -43,11 +43,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_zextLoad2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:gprb(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load 2 from %ir.px) + ; MIPS32: [[ZEXTLOAD:%[0-9]+]]:gprb(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[ZEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_ZEXTLOAD %0(p0) :: (load 2 from %ir.px) + %2:_(s32) = G_ZEXTLOAD %0(p0) :: (load (s16) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -64,13 +64,13 @@ body: | ; MIPS32-LABEL: name: load4_s32_to_zextLoad4_s64 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.px) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 0 ; MIPS32: $v0 = COPY [[LOAD]](s32) ; MIPS32: $v1 = COPY [[C]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 - %5:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.px) + %5:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.px) %6:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_MERGE_VALUES %5(s32), %6(s32) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2(s64) @@ -91,11 +91,11 @@ body: | ; MIPS32-LABEL: name: load1_s8_to_sextLoad1_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:gprb(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:gprb(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8) from %ir.px) ; MIPS32: $v0 = COPY [[SEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_SEXTLOAD %0(p0) :: (load 1 from %ir.px) + %2:_(s32) = G_SEXTLOAD %0(p0) :: (load (s8) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -112,11 +112,11 @@ body: | ; MIPS32-LABEL: name: load2_s16_to_sextLoad2_s32 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[SEXTLOAD:%[0-9]+]]:gprb(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2 from %ir.px) + ; MIPS32: [[SEXTLOAD:%[0-9]+]]:gprb(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.px) ; MIPS32: $v0 = COPY [[SEXTLOAD]](s32) ; MIPS32: RetRA implicit $v0 %0:_(p0) = COPY $a0 - %2:_(s32) = G_SEXTLOAD %0(p0) :: (load 2 from %ir.px) + %2:_(s32) = G_SEXTLOAD %0(p0) :: (load (s16) from %ir.px) $v0 = COPY %2(s32) RetRA implicit $v0 @@ -133,7 +133,7 @@ body: | ; MIPS32-LABEL: name: load4_s32_to_sextLoad4_s64 ; MIPS32: liveins: $a0 ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 - ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.px) + ; MIPS32: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.px) ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 31 ; MIPS32: [[C1:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY [[C]](s32) @@ -142,7 +142,7 @@ body: | ; MIPS32: $v1 = COPY [[ASHR]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %0:_(p0) = COPY $a0 - %5:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.px) + %5:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.px) %9:_(s32) = G_CONSTANT i32 31 %10:_(s32) = G_CONSTANT i32 0 %8:_(s32) = COPY %9(s32) diff --git a/llvm/test/CodeGen/Mips/address-selection.ll b/llvm/test/CodeGen/Mips/address-selection.ll index 1e0a9a42e127b..5a1a97fdb3528 100644 --- a/llvm/test/CodeGen/Mips/address-selection.ll +++ b/llvm/test/CodeGen/Mips/address-selection.ll @@ -28,7 +28,7 @@ entry: ; MIPS-XGOT-LABEL: ===== Instruction selection ends: ; MIPS-XGOT: t[[B:[0-9]+]]: i32 = LUi TargetGlobalAddress:i32 0 [TF=20] ; MIPS-XGOT: t[[C:[0-9]+]]: i32 = ADDu t[[B]], Register:i32 %0 -; MIPS-XGOT: t{{.*}}: i32,ch = LW t[[C]], TargetGlobalAddress:i32 0 [TF=21], t{{.*}} +; MIPS-XGOT: t{{.*}}: i32,ch = LW t[[C]], TargetGlobalAddress:i32 0 [TF=21], t{{.*}} ; MM-LABEL: ===== Instruction selection ends: ; MM: t[[A:[0-9]+]]: i32 = LUi_MM TargetGlobalAddress:i32 0 [TF=4] @@ -37,4 +37,4 @@ entry: ; MM-XGOT-LABEL: ===== Instruction selection ends: ; MM-XGOT: t[[B:[0-9]+]]: i32 = LUi_MM TargetGlobalAddress:i32 0 [TF=20] ; MM-XGOT: t[[C:[0-9]+]]: i32 = ADDU16_MM t[[B]], Register:i32 %0 -; MM-XGOT: t{{.*}}: i32,ch = LW_MM t[[C]], TargetGlobalAddress:i32 0 [TF=21], t0 +; MM-XGOT: t{{.*}}: i32,ch = LW_MM t[[C]], TargetGlobalAddress:i32 0 [TF=21], t0 diff --git a/llvm/test/CodeGen/Mips/compactbranches/compact-branch-implicit-def.mir b/llvm/test/CodeGen/Mips/compactbranches/compact-branch-implicit-def.mir index 3ae01117f9bac..c964988b7ea06 100644 --- a/llvm/test/CodeGen/Mips/compactbranches/compact-branch-implicit-def.mir +++ b/llvm/test/CodeGen/Mips/compactbranches/compact-branch-implicit-def.mir @@ -100,16 +100,16 @@ body: | $sp_64 = DADDiu $sp_64, -32 CFI_INSTRUCTION def_cfa_offset 32 - SD killed $ra_64, $sp_64, 24 :: (store 8 into %stack.2) - SD killed $fp_64, $sp_64, 16 :: (store 8 into %stack.3) - SD killed $gp_64, $sp_64, 8 :: (store 8 into %stack.4) + SD killed $ra_64, $sp_64, 24 :: (store (s64) into %stack.2) + SD killed $fp_64, $sp_64, 16 :: (store (s64) into %stack.3) + SD killed $gp_64, $sp_64, 8 :: (store (s64) into %stack.4) CFI_INSTRUCTION offset $ra_64, -8 CFI_INSTRUCTION offset $fp_64, -16 CFI_INSTRUCTION offset $gp_64, -24 CFI_INSTRUCTION def_cfa_register $fp_64 $at_64 = LUi64 @f $v0_64 = DADDu killed $at_64, $t9_64 - SW $a0, $sp_64, 0 :: (store 4 into %ir.a.addr) + SW $a0, $sp_64, 0 :: (store (s32) into %ir.a.addr) BGTZC $a0, %bb.5.if.else, implicit-def $at bb.1.if.then: @@ -132,9 +132,9 @@ body: | successors: %bb.6.return(0x80000000) liveins: $t8 - $at = LW $sp_64, 0 :: (dereferenceable load 4 from %ir.a.addr) + $at = LW $sp_64, 0 :: (dereferenceable load (s32) from %ir.a.addr) $at = ADDu killed $at, $t8 - SW killed $at, $sp_64, 4 :: (store 4 into %ir.retval) + SW killed $at, $sp_64, 4 :: (store (s32) into %ir.retval) J %bb.6.return, implicit-def dead $at bb.5.if.else: @@ -142,16 +142,16 @@ body: | liveins: $v0_64 $gp_64 = DADDiu killed $v0_64, @f - $a0_64 = LW64 $sp_64, 0 :: (dereferenceable load 4 from %ir.a.addr) - $t9_64 = LD $gp_64, @g :: (load 8 from call-entry @g) + $a0_64 = LW64 $sp_64, 0 :: (dereferenceable load (s32) from %ir.a.addr) + $t9_64 = LD $gp_64, @g :: (load (s64) from call-entry @g) JALR64Pseudo $t9_64, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit $gp_64, implicit-def $sp, implicit-def $v0 - SW killed $v0, $sp_64, 4 :: (store 4 into %ir.retval) + SW killed $v0, $sp_64, 4 :: (store (s32) into %ir.retval) bb.6.return: - $v0 = LW $sp_64, 4 :: (dereferenceable load 4 from %ir.retval) - $gp_64 = LD $sp_64, 8 :: (load 8 from %stack.4) - $fp_64 = LD $sp_64, 16 :: (load 8 from %stack.3) - $ra_64 = LD $sp_64, 24 :: (load 8 from %stack.2) + $v0 = LW $sp_64, 4 :: (dereferenceable load (s32) from %ir.retval) + $gp_64 = LD $sp_64, 8 :: (load (s64) from %stack.4) + $fp_64 = LD $sp_64, 16 :: (load (s64) from %stack.3) + $ra_64 = LD $sp_64, 24 :: (load (s64) from %stack.2) $sp_64 = DADDiu $sp_64, 32 PseudoReturn64 $ra_64 diff --git a/llvm/test/CodeGen/Mips/compactbranches/empty-block.mir b/llvm/test/CodeGen/Mips/compactbranches/empty-block.mir index 5e9eb680faa5c..a2caf5b67aab3 100644 --- a/llvm/test/CodeGen/Mips/compactbranches/empty-block.mir +++ b/llvm/test/CodeGen/Mips/compactbranches/empty-block.mir @@ -66,7 +66,7 @@ body: | $sp = ADDiu $sp, -24 CFI_INSTRUCTION def_cfa_offset 24 - SW killed $ra, $sp, 20 :: (store 4 into %stack.0) + SW killed $ra, $sp, 20 :: (store (s32) into %stack.0) CFI_INSTRUCTION offset $ra_64, -4 JAL @k, csr_o32_fp64, implicit-def dead $ra, implicit-def $sp, implicit-def $v0 BLEZ $v0, %bb.4.if.end, implicit-def $at @@ -84,7 +84,7 @@ body: | JAL @f, csr_o32_fp64, implicit-def dead $ra, implicit killed $a0, implicit-def $sp bb.4.if.end: - $ra = LW $sp, 20 :: (load 4 from %stack.0) + $ra = LW $sp, 20 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 24 PseudoReturn undef $ra diff --git a/llvm/test/CodeGen/Mips/copy-fp64.ll b/llvm/test/CodeGen/Mips/copy-fp64.ll index eb096e3787c2c..77a18f2844185 100644 --- a/llvm/test/CodeGen/Mips/copy-fp64.ll +++ b/llvm/test/CodeGen/Mips/copy-fp64.ll @@ -16,9 +16,9 @@ define double @foo(double %self) { ; CHECK: renamable $t9 = LW killed renamable $at, target-flags(mips-got) @bar ; CHECK: dead $ra = JALR killed $t9, csr_o32_fp64, target-flags(mips-jalr) , implicit-def dead $ra, implicit killed $d6_64, implicit-def $d0_64 ; CHECK: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp - ; CHECK: SDC164 killed $d0_64, %stack.0, 0 :: (store 8 into %stack.0) + ; CHECK: SDC164 killed $d0_64, %stack.0, 0 :: (store (s64) into %stack.0) ; CHECK: bb.1.bb1: - ; CHECK: $d0_64 = LDC164 %stack.0, 0 :: (load 8 from %stack.0) + ; CHECK: $d0_64 = LDC164 %stack.0, 0 :: (load (s64) from %stack.0) ; CHECK: RetRA implicit killed $d0_64 start: %0 = call double @bar(double %self) diff --git a/llvm/test/CodeGen/Mips/delay-slot-filler-bundled-insts.mir b/llvm/test/CodeGen/Mips/delay-slot-filler-bundled-insts.mir index 1539bb5f73e91..11c3fb6c56a93 100644 --- a/llvm/test/CodeGen/Mips/delay-slot-filler-bundled-insts.mir +++ b/llvm/test/CodeGen/Mips/delay-slot-filler-bundled-insts.mir @@ -88,7 +88,7 @@ body: | ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) ; CHECK: $sp_64 = DADDiu $sp_64, -16 ; CHECK: CFI_INSTRUCTION def_cfa_offset 16 - ; CHECK: SD killed $ra_64, $sp_64, 8 :: (store 8 into %stack.0) + ; CHECK: SD killed $ra_64, $sp_64, 8 :: (store (s64) into %stack.0) ; CHECK: CFI_INSTRUCTION offset $ra_64, -8 ; CHECK: BUNDLE { ; CHECK: $sp_64 = DADDiu $sp_64, -16 @@ -111,7 +111,7 @@ body: | ; CHECK: $a0_64 = DADDiu $zero_64, 0 ; CHECK: } ; CHECK: bb.3.return: - ; CHECK: $ra_64 = LD $sp_64, 8 :: (load 8 from %stack.0) + ; CHECK: $ra_64 = LD $sp_64, 8 :: (load (s64) from %stack.0) ; CHECK: PseudoReturn64 undef $ra_64, implicit $v0_64 { ; CHECK: $sp_64 = DADDiu $sp_64, 16 ; CHECK: } @@ -121,7 +121,7 @@ body: | $sp_64 = DADDiu $sp_64, -16 CFI_INSTRUCTION def_cfa_offset 16 - SD killed $ra_64, $sp_64, 8 :: (store 8 into %stack.0) + SD killed $ra_64, $sp_64, 8 :: (store (s64) into %stack.0) CFI_INSTRUCTION offset $ra_64, -8 ; This BUNDLE instruction must not be split by the delay slot filler: BUNDLE { @@ -146,7 +146,7 @@ body: | bb.3.return: liveins: $v0_64 - $ra_64 = LD $sp_64, 8 :: (load 8 from %stack.0) + $ra_64 = LD $sp_64, 8 :: (load (s64) from %stack.0) $sp_64 = DADDiu $sp_64, 16 PseudoReturn64 undef $ra_64, implicit $v0_64 diff --git a/llvm/test/CodeGen/Mips/micromips-eva.mir b/llvm/test/CodeGen/Mips/micromips-eva.mir index c4d05cf6985e9..b5753d182ed94 100644 --- a/llvm/test/CodeGen/Mips/micromips-eva.mir +++ b/llvm/test/CodeGen/Mips/micromips-eva.mir @@ -100,25 +100,25 @@ body: | bb.0.entry: %0:gpr32 = LUi target-flags(mips-abs-hi) @bArray %1:gpr32 = ADDiu killed %0, target-flags(mips-abs-lo) @bArray - %2:gpr32 = LBuE %1, 5 :: (dereferenceable load 1 from `i8* getelementptr inbounds ([13 x i8], [13 x i8]* @bArray, i32 0, i32 5)`) + %2:gpr32 = LBuE %1, 5 :: (dereferenceable load (s8) from `i8* getelementptr inbounds ([13 x i8], [13 x i8]* @bArray, i32 0, i32 5)`) %3:gpr32 = ADDiu killed %2, -7 - SBE killed %3, %1, 3 :: (store 1 into `i8* getelementptr inbounds ([13 x i8], [13 x i8]* @bArray, i32 0, i32 3)`) - %4:gpr32 = LBE %1, 5 :: (dereferenceable load 1 from `i8* getelementptr inbounds ([13 x i8], [13 x i8]* @bArray, i32 0, i32 5)`) + SBE killed %3, %1, 3 :: (store (s8) into `i8* getelementptr inbounds ([13 x i8], [13 x i8]* @bArray, i32 0, i32 3)`) + %4:gpr32 = LBE %1, 5 :: (dereferenceable load (s8) from `i8* getelementptr inbounds ([13 x i8], [13 x i8]* @bArray, i32 0, i32 5)`) %5:gpr32 = ADDiu killed %4, -7 - SBE killed %5, %1, 3 :: (store 1 into `i8* getelementptr inbounds ([13 x i8], [13 x i8]* @bArray, i32 0, i32 3)`) + SBE killed %5, %1, 3 :: (store (s8) into `i8* getelementptr inbounds ([13 x i8], [13 x i8]* @bArray, i32 0, i32 3)`) %6:gpr32 = LUi target-flags(mips-abs-hi) @hArray %7:gpr32 = ADDiu killed %6, target-flags(mips-abs-lo) @hArray - %8:gpr32 = LHuE %7, 10 :: (dereferenceable load 2 from `i16* getelementptr inbounds ([13 x i16], [13 x i16]* @hArray, i32 0, i32 5)`) + %8:gpr32 = LHuE %7, 10 :: (dereferenceable load (s16) from `i16* getelementptr inbounds ([13 x i16], [13 x i16]* @hArray, i32 0, i32 5)`) %9:gpr32 = ADDiu killed %8, -7 - SHE killed %9, %7, 6 :: (store 2 into `i16* getelementptr inbounds ([13 x i16], [13 x i16]* @hArray, i32 0, i32 3)`) - %10:gpr32 = LHE %7, 10 :: (dereferenceable load 2 from `i16* getelementptr inbounds ([13 x i16], [13 x i16]* @hArray, i32 0, i32 5)`) + SHE killed %9, %7, 6 :: (store (s16) into `i16* getelementptr inbounds ([13 x i16], [13 x i16]* @hArray, i32 0, i32 3)`) + %10:gpr32 = LHE %7, 10 :: (dereferenceable load (s16) from `i16* getelementptr inbounds ([13 x i16], [13 x i16]* @hArray, i32 0, i32 5)`) %11:gpr32 = ADDiu killed %10, -7 - SHE killed %11, %7, 6 :: (store 2 into `i16* getelementptr inbounds ([13 x i16], [13 x i16]* @hArray, i32 0, i32 3)`) + SHE killed %11, %7, 6 :: (store (s16) into `i16* getelementptr inbounds ([13 x i16], [13 x i16]* @hArray, i32 0, i32 3)`) %12:gpr32 = LUi target-flags(mips-abs-hi) @wArray %13:gpr32 = ADDiu killed %12, target-flags(mips-abs-lo) @wArray - %14:gpr32 = LWE %13, 20 :: (dereferenceable load 4 from `i32* getelementptr inbounds ([13 x i32], [13 x i32]* @wArray, i32 0, i32 5)`) + %14:gpr32 = LWE %13, 20 :: (dereferenceable load (s32) from `i32* getelementptr inbounds ([13 x i32], [13 x i32]* @wArray, i32 0, i32 5)`) %15:gpr32 = ADDiu killed %14, -7 - SWE killed %15, %13, 12 :: (store 4 into `i32* getelementptr inbounds ([13 x i32], [13 x i32]* @wArray, i32 0, i32 3)`) + SWE killed %15, %13, 12 :: (store (s32) into `i32* getelementptr inbounds ([13 x i32], [13 x i32]* @wArray, i32 0, i32 3)`) RetRA ... @@ -173,8 +173,8 @@ body: | %0:gpr32 = COPY $a0 %1:gpr32 = COPY %0 - SW %0, %stack.0.z.addr, 0 :: (store 4 into %ir.z.addr) - %2:gpr32 = LW %stack.0.z.addr, 0 :: (dereferenceable load 4 from %ir.z.addr) + SW %0, %stack.0.z.addr, 0 :: (store (s32) into %ir.z.addr) + %2:gpr32 = LW %stack.0.z.addr, 0 :: (dereferenceable load (s32) from %ir.z.addr) SYNC 0 %3:gpr32 = ADDiu $zero, 42 diff --git a/llvm/test/CodeGen/Mips/micromips-short-delay-slot.mir b/llvm/test/CodeGen/Mips/micromips-short-delay-slot.mir index f242ea5bd8694..216fec6a7f6ef 100644 --- a/llvm/test/CodeGen/Mips/micromips-short-delay-slot.mir +++ b/llvm/test/CodeGen/Mips/micromips-short-delay-slot.mir @@ -58,12 +58,12 @@ body: | $sp = ADDiu $sp, -24 CFI_INSTRUCTION def_cfa_offset 24 - SW killed $ra, $sp, 20 :: (store 4 into %stack.0) + SW killed $ra, $sp, 20 :: (store (s32) into %stack.0) CFI_INSTRUCTION offset $ra_64, -4 $a0 = LI16_MM 1 $a1 = LI16_MM 2 JAL_MM @callee13, csr_o32, implicit-def dead $ra, implicit killed $a0, implicit killed $a1, implicit-def $sp, implicit-def $v0 - $ra = LW $sp, 20 :: (load 4 from %stack.0) + $ra = LW $sp, 20 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 24 PseudoReturn undef $ra, implicit $v0 diff --git a/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-lwp-swp.mir b/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-lwp-swp.mir index bc5dc6cd7e882..feab7ecb671af 100644 --- a/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-lwp-swp.mir +++ b/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-lwp-swp.mir @@ -64,20 +64,20 @@ body: | $sp = ADDiu $sp, -32 CFI_INSTRUCTION def_cfa_offset 32 - SW killed $ra, $sp, 28 :: (store 4 into %stack.0) - SW killed $s1, $sp, 24 :: (store 4 into %stack.1) - SW killed $s0, $sp, 20 :: (store 4 into %stack.2) + SW killed $ra, $sp, 28 :: (store (s32) into %stack.0) + SW killed $s1, $sp, 24 :: (store (s32) into %stack.1) + SW killed $s0, $sp, 20 :: (store (s32) into %stack.2) CFI_INSTRUCTION offset $ra_64, -4 CFI_INSTRUCTION offset $s1_64, -8 CFI_INSTRUCTION offset $s0_64, -12 $s1 = MOVE16_MM $a1 $s0 = MOVE16_MM $a0 JAL @f, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def dead $v0 - SW16_MM killed renamable $s1, killed renamable $s0, 0 :: (store 4 into %ir.adr) + SW16_MM killed renamable $s1, killed renamable $s0, 0 :: (store (s32) into %ir.adr) $v0 = LI16_MM 0 - $s0 = LW $sp, 20 :: (load 4 from %stack.2) - $s1 = LW $sp, 24 :: (load 4 from %stack.1) - $ra = LW $sp, 28 :: (load 4 from %stack.0) + $s0 = LW $sp, 20 :: (load (s32) from %stack.2) + $s1 = LW $sp, 24 :: (load (s32) from %stack.1) + $ra = LW $sp, 28 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 32 PseudoReturn undef $ra, implicit killed $v0 @@ -132,20 +132,20 @@ body: | $sp = ADDiu $sp, -32 CFI_INSTRUCTION def_cfa_offset 32 - SW killed $ra, $sp, 28 :: (store 4 into %stack.0) - SW_MM killed $s1, $sp, 24 :: (store 4 into %stack.1) - SW_MM killed $s0, $sp, 20 :: (store 4 into %stack.2) + SW killed $ra, $sp, 28 :: (store (s32) into %stack.0) + SW_MM killed $s1, $sp, 24 :: (store (s32) into %stack.1) + SW_MM killed $s0, $sp, 20 :: (store (s32) into %stack.2) CFI_INSTRUCTION offset $ra_64, -4 CFI_INSTRUCTION offset $s1_64, -8 CFI_INSTRUCTION offset $s0_64, -12 $s1 = MOVE16_MM $a1 $s0 = MOVE16_MM $a0 JAL @f, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def dead $v0 - SW16_MM killed renamable $s1, killed renamable $s0, 0 :: (store 4 into %ir.adr) + SW16_MM killed renamable $s1, killed renamable $s0, 0 :: (store (s32) into %ir.adr) $v0 = LI16_MM 0 - $s0 = LW_MM $sp, 20 :: (load 4 from %stack.2) - $s1 = LW_MM $sp, 24 :: (load 4 from %stack.1) - $ra = LW $sp, 28 :: (load 4 from %stack.0) + $s0 = LW_MM $sp, 20 :: (load (s32) from %stack.2) + $s1 = LW_MM $sp, 24 :: (load (s32) from %stack.1) + $ra = LW $sp, 28 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 32 PseudoReturn undef $ra, implicit killed $v0 @@ -200,20 +200,20 @@ body: | $sp = ADDiu $sp, -32 CFI_INSTRUCTION def_cfa_offset 32 - SW killed $ra, $sp, 28 :: (store 4 into %stack.0) - SW_MM killed $s1, $sp, 24 :: (store 4 into %stack.1) - SW killed $s0, $sp, 20 :: (store 4 into %stack.2) + SW killed $ra, $sp, 28 :: (store (s32) into %stack.0) + SW_MM killed $s1, $sp, 24 :: (store (s32) into %stack.1) + SW killed $s0, $sp, 20 :: (store (s32) into %stack.2) CFI_INSTRUCTION offset $ra_64, -4 CFI_INSTRUCTION offset $s1_64, -8 CFI_INSTRUCTION offset $s0_64, -12 $s1 = MOVE16_MM $a1 $s0 = MOVE16_MM $a0 JAL @f, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def dead $v0 - SW16_MM killed renamable $s1, killed renamable $s0, 0 :: (store 4 into %ir.adr) + SW16_MM killed renamable $s1, killed renamable $s0, 0 :: (store (s32) into %ir.adr) $v0 = LI16_MM 0 - $s0 = LW_MM $sp, 20 :: (load 4 from %stack.2) - $s1 = LW $sp, 24 :: (load 4 from %stack.1) - $ra = LW $sp, 28 :: (load 4 from %stack.0) + $s0 = LW_MM $sp, 20 :: (load (s32) from %stack.2) + $s1 = LW $sp, 24 :: (load (s32) from %stack.1) + $ra = LW $sp, 28 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 32 PseudoReturn undef $ra, implicit killed $v0 @@ -268,20 +268,20 @@ body: | $sp = ADDiu $sp, -32 CFI_INSTRUCTION def_cfa_offset 32 - SW killed $ra, $sp, 28 :: (store 4 into %stack.0) - SW killed $s1, $sp, 24 :: (store 4 into %stack.1) - SW_MM killed $s0, $sp, 20 :: (store 4 into %stack.2) + SW killed $ra, $sp, 28 :: (store (s32) into %stack.0) + SW killed $s1, $sp, 24 :: (store (s32) into %stack.1) + SW_MM killed $s0, $sp, 20 :: (store (s32) into %stack.2) CFI_INSTRUCTION offset $ra_64, -4 CFI_INSTRUCTION offset $s1_64, -8 CFI_INSTRUCTION offset $s0_64, -12 $s1 = MOVE16_MM $a1 $s0 = MOVE16_MM $a0 JAL @f, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def dead $v0 - SW16_MM killed renamable $s1, killed renamable $s0, 0 :: (store 4 into %ir.adr) + SW16_MM killed renamable $s1, killed renamable $s0, 0 :: (store (s32) into %ir.adr) $v0 = LI16_MM 0 - $s0 = LW $sp, 20 :: (load 4 from %stack.2) - $s1 = LW_MM $sp, 24 :: (load 4 from %stack.1) - $ra = LW $sp, 28 :: (load 4 from %stack.0) + $s0 = LW $sp, 20 :: (load (s32) from %stack.2) + $s1 = LW_MM $sp, 24 :: (load (s32) from %stack.1) + $ra = LW $sp, 28 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 32 PseudoReturn undef $ra, implicit killed $v0 diff --git a/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-movep.mir b/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-movep.mir index bd068f75ac166..37bf0a798157d 100644 --- a/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-movep.mir +++ b/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-movep.mir @@ -25,23 +25,23 @@ body: | ; CHECK-LABEL: name: move1 ; CHECK: ADDIUSP_MM -24 ; CHECK: CFI_INSTRUCTION def_cfa_offset 24 - ; CHECK: SWSP_MM killed $ra, $sp, 20 :: (store 4 into %stack.0) + ; CHECK: SWSP_MM killed $ra, $sp, 20 :: (store (s32) into %stack.0) ; CHECK: CFI_INSTRUCTION offset $ra_64, -4 ; CHECK: JAL_MM @g, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def $v0, implicit-def $v1 ; CHECK: $a0, $a1 = MOVEP_MM $v0, $v1 ; CHECK: JAL_MM @f, csr_o32, implicit-def dead $ra, implicit $a0, implicit $a1, implicit-def $sp, implicit-def $v0, implicit-def $v1 - ; CHECK: $ra = LWSP_MM $sp, 20 :: (load 4 from %stack.0) + ; CHECK: $ra = LWSP_MM $sp, 20 :: (load (s32) from %stack.0) ; CHECK: ADDIUSP_MM 24 ; CHECK: PseudoReturn undef $ra, implicit $v0, implicit $v1 $sp = ADDiu $sp, -24 CFI_INSTRUCTION def_cfa_offset 24 - SW killed $ra, $sp, 20 :: (store 4 into %stack.0) + SW killed $ra, $sp, 20 :: (store (s32) into %stack.0) CFI_INSTRUCTION offset $ra_64, -4 JAL_MM @g, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def $v0, implicit-def $v1 $a0 = MOVE16_MM $v0 $a1 = MOVE16_MM $v1 JAL_MM @f, csr_o32, implicit-def dead $ra, implicit $a0, implicit $a1, implicit-def $sp, implicit-def $v0, implicit-def $v1 - $ra = LW $sp, 20 :: (load 4 from %stack.0) + $ra = LW $sp, 20 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 24 PseudoReturn undef $ra, implicit $v0, implicit $v1 @@ -60,23 +60,23 @@ body: | ; CHECK-LABEL: name: move2 ; CHECK: ADDIUSP_MM -24 ; CHECK: CFI_INSTRUCTION def_cfa_offset 24 - ; CHECK: SWSP_MM killed $ra, $sp, 20 :: (store 4 into %stack.0) + ; CHECK: SWSP_MM killed $ra, $sp, 20 :: (store (s32) into %stack.0) ; CHECK: CFI_INSTRUCTION offset $ra_64, -4 ; CHECK: JAL_MM @g, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def $v0, implicit-def $v1 ; CHECK: $a0, $a1 = MOVEP_MM $v0, $v1 ; CHECK: JAL_MM @f, csr_o32, implicit-def dead $ra, implicit $a0, implicit $a1, implicit-def $sp, implicit-def $v0, implicit-def $v1 - ; CHECK: $ra = LWSP_MM $sp, 20 :: (load 4 from %stack.0) + ; CHECK: $ra = LWSP_MM $sp, 20 :: (load (s32) from %stack.0) ; CHECK: ADDIUSP_MM 24 ; CHECK: PseudoReturn undef $ra, implicit $v0, implicit $v1 $sp = ADDiu $sp, -24 CFI_INSTRUCTION def_cfa_offset 24 - SW killed $ra, $sp, 20 :: (store 4 into %stack.0) + SW killed $ra, $sp, 20 :: (store (s32) into %stack.0) CFI_INSTRUCTION offset $ra_64, -4 JAL_MM @g, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def $v0, implicit-def $v1 $a1 = MOVE16_MM $v1 $a0 = MOVE16_MM $v0 JAL_MM @f, csr_o32, implicit-def dead $ra, implicit $a0, implicit $a1, implicit-def $sp, implicit-def $v0, implicit-def $v1 - $ra = LW $sp, 20 :: (load 4 from %stack.0) + $ra = LW $sp, 20 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 24 PseudoReturn undef $ra, implicit $v0, implicit $v1 diff --git a/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-no-lwp-swp.mir b/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-no-lwp-swp.mir index 880c89ec31550..c9044f306b922 100644 --- a/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-no-lwp-swp.mir +++ b/llvm/test/CodeGen/Mips/micromips-sizereduction/micromips-no-lwp-swp.mir @@ -59,15 +59,15 @@ body: | $sp = ADDiu $sp, -24 CFI_INSTRUCTION def_cfa_offset 24 - SW killed $ra, $sp, 20 :: (store 4 into %stack.0) - SW killed $s0, $sp, 16 :: (store 4 into %stack.1) + SW killed $ra, $sp, 20 :: (store (s32) into %stack.0) + SW killed $s0, $sp, 16 :: (store (s32) into %stack.1) CFI_INSTRUCTION offset $ra_64, -4 CFI_INSTRUCTION offset $s0_64, -8 $s0 = MOVE16_MM $a1 JAL @f, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def dead $v0 $v0 = MOVE16_MM killed $s0 - $s0 = LW $sp, 16 :: (load 4 from %stack.1) - $ra = LW $sp, 20 :: (load 4 from %stack.0) + $s0 = LW $sp, 16 :: (load (s32) from %stack.1) + $ra = LW $sp, 20 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 24 PseudoReturn undef $ra, implicit killed $v0 @@ -118,15 +118,15 @@ body: | $sp = ADDiu $sp, -24 CFI_INSTRUCTION def_cfa_offset 24 - SW_MM killed $ra, $sp, 20 :: (store 4 into %stack.0) - SW_MM killed $s0, $sp, 16 :: (store 4 into %stack.1) + SW_MM killed $ra, $sp, 20 :: (store (s32) into %stack.0) + SW_MM killed $s0, $sp, 16 :: (store (s32) into %stack.1) CFI_INSTRUCTION offset $ra_64, -4 CFI_INSTRUCTION offset $s0_64, -8 $s0 = MOVE16_MM $a1 JAL @f, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def dead $v0 $v0 = MOVE16_MM killed $s0 - $s0 = LW_MM $sp, 16 :: (load 4 from %stack.1) - $ra = LW_MM $sp, 20 :: (load 4 from %stack.0) + $s0 = LW_MM $sp, 16 :: (load (s32) from %stack.1) + $ra = LW_MM $sp, 20 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 24 PseudoReturn undef $ra, implicit killed $v0 @@ -177,15 +177,15 @@ body: | $sp = ADDiu $sp, -24 CFI_INSTRUCTION def_cfa_offset 24 - SW_MM killed $ra, $sp, 20 :: (store 4 into %stack.0) - SW killed $s0, $sp, 16 :: (store 4 into %stack.1) + SW_MM killed $ra, $sp, 20 :: (store (s32) into %stack.0) + SW killed $s0, $sp, 16 :: (store (s32) into %stack.1) CFI_INSTRUCTION offset $ra_64, -4 CFI_INSTRUCTION offset $s0_64, -8 $s0 = MOVE16_MM $a1 JAL @f, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def dead $v0 $v0 = MOVE16_MM killed $s0 - $s0 = LW_MM $sp, 16 :: (load 4 from %stack.1) - $ra = LW $sp, 20 :: (load 4 from %stack.0) + $s0 = LW_MM $sp, 16 :: (load (s32) from %stack.1) + $ra = LW $sp, 20 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 24 PseudoReturn undef $ra, implicit killed $v0 @@ -236,15 +236,15 @@ body: | $sp = ADDiu $sp, -24 CFI_INSTRUCTION def_cfa_offset 24 - SW killed $ra, $sp, 20 :: (store 4 into %stack.0) - SW_MM killed $s0, $sp, 16 :: (store 4 into %stack.1) + SW killed $ra, $sp, 20 :: (store (s32) into %stack.0) + SW_MM killed $s0, $sp, 16 :: (store (s32) into %stack.1) CFI_INSTRUCTION offset $ra_64, -4 CFI_INSTRUCTION offset $s0_64, -8 $s0 = MOVE16_MM $a1 JAL @f, csr_o32, implicit-def dead $ra, implicit-def $sp, implicit-def dead $v0 $v0 = MOVE16_MM killed $s0 - $s0 = LW $sp, 16 :: (load 4 from %stack.1) - $ra = LW_MM $sp, 20 :: (load 4 from %stack.0) + $s0 = LW $sp, 16 :: (load (s32) from %stack.1) + $ra = LW_MM $sp, 20 :: (load (s32) from %stack.0) $sp = ADDiu $sp, 24 PseudoReturn undef $ra, implicit killed $v0 diff --git a/llvm/test/CodeGen/Mips/mirparser/target-flags-pic-mxgot-tls.mir b/llvm/test/CodeGen/Mips/mirparser/target-flags-pic-mxgot-tls.mir index 5b72917a48179..00b40898549ff 100644 --- a/llvm/test/CodeGen/Mips/mirparser/target-flags-pic-mxgot-tls.mir +++ b/llvm/test/CodeGen/Mips/mirparser/target-flags-pic-mxgot-tls.mir @@ -152,7 +152,7 @@ body: | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp %7 = LUi64 target-flags(mips-call-hi16) @_Z1gi %8 = DADDu killed %7, %6 - %9 = LD killed %8, target-flags(mips-call-lo16) @_Z1gi :: (load 8 from call-entry @_Z1gi) + %9 = LD killed %8, target-flags(mips-call-lo16) @_Z1gi :: (load (s64) from call-entry @_Z1gi) $a0_64 = COPY %5 $gp_64 = COPY %6 JALR64Pseudo killed %9, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit $gp_64, implicit-def $sp, implicit-def $v0 @@ -162,13 +162,13 @@ body: | %12 = ADDu %10, killed %11 %13 = LUi64 target-flags(mips-got-hi16) @v %14 = DADDu killed %13, %6 - %15 = LD killed %14, target-flags(mips-got-lo16) @v :: (load 8 from got) - %16 = LW killed %15, 0 :: (dereferenceable load 4 from @v) + %15 = LD killed %14, target-flags(mips-got-lo16) @v :: (load (s64) from got) + %16 = LW killed %15, 0 :: (dereferenceable load (s32) from @v) %0 = ADDu killed %12, killed %16 ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp %17 = LUi64 target-flags(mips-call-hi16) &__tls_get_addr %18 = DADDu killed %17, %6 - %19 = LD killed %18, target-flags(mips-call-lo16) &__tls_get_addr :: (load 8 from call-entry &__tls_get_addr) + %19 = LD killed %18, target-flags(mips-call-lo16) &__tls_get_addr :: (load (s64) from call-entry &__tls_get_addr) %20 = DADDiu %6, target-flags(mips-tlsldm) @__tls_guard $a0_64 = COPY %20 $gp_64 = COPY %6 @@ -176,7 +176,7 @@ body: | ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp %21 = COPY $v0_64 %22 = DADDiu %21, target-flags(mips-dtprel-hi) @__tls_guard - %23 = LBu killed %22, target-flags(mips-dtprel-lo) @__tls_guard :: (dereferenceable load 1 from @__tls_guard) + %23 = LBu killed %22, target-flags(mips-dtprel-lo) @__tls_guard :: (dereferenceable load (s8) from @__tls_guard) BEQ killed %23, $zero, %bb.2.init.i.i, implicit-def dead $at B %bb.1.entry._ZTW1k.exit_crit_edge, implicit-def dead $at @@ -186,14 +186,14 @@ body: | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp %39 = LUi64 target-flags(mips-call-hi16) &__tls_get_addr %40 = DADDu killed %39, %6 - %41 = LD killed %40, target-flags(mips-call-lo16) &__tls_get_addr :: (load 8 from call-entry &__tls_get_addr) + %41 = LD killed %40, target-flags(mips-call-lo16) &__tls_get_addr :: (load (s64) from call-entry &__tls_get_addr) %42 = DADDiu %6, target-flags(mips-tlsgd) @k $a0_64 = COPY %42 $gp_64 = COPY %6 JALR64Pseudo killed %41, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit $gp_64, implicit-def $sp, implicit-def $v0_64 ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp %43 = COPY $v0_64 - %1 = LW %43, 0 :: (dereferenceable load 4 from @k) + %1 = LW %43, 0 :: (dereferenceable load (s32) from @k) B %bb.3._ZTW1k.exit, implicit-def dead $at bb.2.init.i.i: @@ -202,7 +202,7 @@ body: | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp %24 = LUi64 target-flags(mips-call-hi16) &__tls_get_addr %25 = DADDu killed %24, %6 - %26 = LD %25, target-flags(mips-call-lo16) &__tls_get_addr :: (load 8 from call-entry &__tls_get_addr) + %26 = LD %25, target-flags(mips-call-lo16) &__tls_get_addr :: (load (s64) from call-entry &__tls_get_addr) %27 = DADDiu %6, target-flags(mips-tlsldm) @__tls_guard $a0_64 = COPY %27 $gp_64 = COPY %6 @@ -211,26 +211,26 @@ body: | %28 = COPY $v0_64 %29 = DADDiu %28, target-flags(mips-dtprel-hi) @__tls_guard %30 = ADDiu $zero, 1 - SB killed %30, killed %29, target-flags(mips-dtprel-lo) @__tls_guard :: (store 1 into @__tls_guard) + SB killed %30, killed %29, target-flags(mips-dtprel-lo) @__tls_guard :: (store (s8) into @__tls_guard) ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp %31 = LUi64 target-flags(mips-call-hi16) @_Z1gi %32 = DADDu killed %31, %6 %33 = DADDiu $zero_64, 3 - %34 = LD killed %32, target-flags(mips-call-lo16) @_Z1gi :: (load 8 from call-entry @_Z1gi) + %34 = LD killed %32, target-flags(mips-call-lo16) @_Z1gi :: (load (s64) from call-entry @_Z1gi) $a0_64 = COPY %33 $gp_64 = COPY %6 JALR64Pseudo killed %34, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit $gp_64, implicit-def $sp, implicit-def $v0 ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp %35 = COPY $v0 ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - %36 = LD %25, target-flags(mips-call-lo16) &__tls_get_addr :: (load 8 from call-entry &__tls_get_addr) + %36 = LD %25, target-flags(mips-call-lo16) &__tls_get_addr :: (load (s64) from call-entry &__tls_get_addr) %37 = DADDiu %6, target-flags(mips-tlsgd) @k $a0_64 = COPY %37 $gp_64 = COPY %6 JALR64Pseudo killed %36, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit $gp_64, implicit-def $sp, implicit-def $v0_64 ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp %38 = COPY $v0_64 - SW %35, %38, 0 :: (store 4 into @k) + SW %35, %38, 0 :: (store (s32) into @k) %2 = COPY %35 bb.3._ZTW1k.exit: @@ -240,7 +240,7 @@ body: | %4 = ADDu %0, %3 %44 = LUi64 target-flags(mips-got-hi16) @_ZTH1j %45 = DADDu killed %44, %6 - %46 = LD killed %45, target-flags(mips-got-lo16) @_ZTH1j :: (load 8 from got) + %46 = LD killed %45, target-flags(mips-got-lo16) @_ZTH1j :: (load (s64) from got) BEQ64 killed %46, $zero_64, %bb.5._ZTW1j.exit, implicit-def dead $at B %bb.4, implicit-def dead $at @@ -250,7 +250,7 @@ body: | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp %47 = LUi64 target-flags(mips-call-hi16) @_ZTH1j %48 = DADDu killed %47, %6 - %49 = LD killed %48, target-flags(mips-call-lo16) @_ZTH1j :: (load 8 from call-entry @_ZTH1j) + %49 = LD killed %48, target-flags(mips-call-lo16) @_ZTH1j :: (load (s64) from call-entry @_ZTH1j) $gp_64 = COPY %6 JALR64Pseudo killed %49, csr_n64, implicit-def dead $ra, implicit $gp_64, implicit-def $sp ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp @@ -259,14 +259,14 @@ body: | ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp %50 = LUi64 target-flags(mips-call-hi16) &__tls_get_addr %51 = DADDu killed %50, %6 - %52 = LD killed %51, target-flags(mips-call-lo16) &__tls_get_addr :: (load 8 from call-entry &__tls_get_addr) + %52 = LD killed %51, target-flags(mips-call-lo16) &__tls_get_addr :: (load (s64) from call-entry &__tls_get_addr) %53 = DADDiu %6, target-flags(mips-tlsgd) @j $a0_64 = COPY %53 $gp_64 = COPY %6 JALR64Pseudo killed %52, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit $gp_64, implicit-def $sp, implicit-def $v0_64 ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp %54 = COPY $v0_64 - %55 = LW %54, 0 :: (dereferenceable load 4 from @j) + %55 = LW %54, 0 :: (dereferenceable load (s32) from @j) %56 = ADDu %4, killed %55 $v0 = COPY %56 RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/mirparser/target-flags-pic-o32.mir b/llvm/test/CodeGen/Mips/mirparser/target-flags-pic-o32.mir index 5e10a84ddc080..1e646f77b2b23 100644 --- a/llvm/test/CodeGen/Mips/mirparser/target-flags-pic-o32.mir +++ b/llvm/test/CodeGen/Mips/mirparser/target-flags-pic-o32.mir @@ -75,18 +75,18 @@ body: | %1 = ADDu $v0, $t9 %0 = COPY $a0 ADJCALLSTACKDOWN 16, 0, implicit-def dead $sp, implicit $sp - %2 = LW %1, target-flags(mips-got-call) @_Z1gi :: (load 4 from call-entry @_Z1gi) + %2 = LW %1, target-flags(mips-got-call) @_Z1gi :: (load (s32) from call-entry @_Z1gi) $a0 = COPY %0 $gp = COPY %1 JALRPseudo killed %2, csr_o32_fpxx, implicit-def dead $ra, implicit $a0, implicit $gp, implicit-def $sp, implicit-def $v0 ADJCALLSTACKUP 16, 0, implicit-def dead $sp, implicit $sp %3 = COPY $v0 %4 = ADDu %3, %0 - %5 = LW %1, target-flags(mips-got) @v :: (load 4 from got) - %6 = LW killed %5, 0 :: (dereferenceable load 4 from @v) + %5 = LW %1, target-flags(mips-got) @v :: (load (s32) from got) + %6 = LW killed %5, 0 :: (dereferenceable load (s32) from @v) %7 = ADDu killed %4, killed %6 - %8 = LW %1, target-flags(mips-got) @j :: (load 4 from got) - %9 = LW killed %8, 0 :: (dereferenceable load 4 from @j) + %8 = LW %1, target-flags(mips-got) @j :: (load (s32) from got) + %9 = LW killed %8, 0 :: (dereferenceable load (s32) from @j) %10 = ADDu killed %7, killed %9 $v0 = COPY %10 RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/mirparser/target-flags-pic.mir b/llvm/test/CodeGen/Mips/mirparser/target-flags-pic.mir index 7592ef9f66bb9..7dd9f299e25fc 100644 --- a/llvm/test/CodeGen/Mips/mirparser/target-flags-pic.mir +++ b/llvm/test/CodeGen/Mips/mirparser/target-flags-pic.mir @@ -77,7 +77,7 @@ body: | %1 = DADDiu %13, target-flags(mips-gpoff-lo) @_Z2k1i %0 = COPY $a0_64 ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - %2 = LD %1, target-flags(mips-got-call) @_Z1gi :: (load 8 from call-entry @_Z1gi) + %2 = LD %1, target-flags(mips-got-call) @_Z1gi :: (load (s64) from call-entry @_Z1gi) $a0_64 = COPY %0 $gp_64 = COPY %1 JALR64Pseudo killed %2, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit $gp_64, implicit-def $sp, implicit-def $v0 @@ -85,11 +85,11 @@ body: | %3 = COPY $v0 %4 = COPY %0.sub_32 %5 = ADDu %3, killed %4 - %6 = LD %1, target-flags(mips-got-disp) @v :: (load 8 from got) - %7 = LW killed %6, 0 :: (dereferenceable load 4 from @v) + %6 = LD %1, target-flags(mips-got-disp) @v :: (load (s64) from got) + %7 = LW killed %6, 0 :: (dereferenceable load (s32) from @v) %8 = ADDu killed %5, killed %7 - %9 = LD %1, target-flags(mips-got-disp) @j :: (load 8 from got) - %10 = LW killed %9, 0 :: (dereferenceable load 4 from @j) + %9 = LD %1, target-flags(mips-got-disp) @j :: (load (s64) from got) + %10 = LW killed %9, 0 :: (dereferenceable load (s32) from @j) %11 = ADDu killed %8, killed %10 $v0 = COPY %11 RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/mirparser/target-flags-static-tls.mir b/llvm/test/CodeGen/Mips/mirparser/target-flags-static-tls.mir index b35dde33fd062..4b88009ef110f 100644 --- a/llvm/test/CodeGen/Mips/mirparser/target-flags-static-tls.mir +++ b/llvm/test/CodeGen/Mips/mirparser/target-flags-static-tls.mir @@ -153,7 +153,7 @@ body: | %11 = DSLL killed %10, 16 %12 = DADDiu killed %11, target-flags(mips-abs-hi) @v %13 = DSLL killed %12, 16 - %14 = LW killed %13, target-flags(mips-abs-lo) @v :: (dereferenceable load 4 from @v) + %14 = LW killed %13, target-flags(mips-abs-lo) @v :: (dereferenceable load (s32) from @v) %0 = ADDu killed %8, killed %14 %15 = LUi64 target-flags(mips-tprel-hi) @__tls_guard %16 = DADDiu killed %15, target-flags(mips-tprel-lo) @__tls_guard @@ -161,7 +161,7 @@ body: | $v1_64 = COPY %17 %18 = COPY $v1_64 %19 = DADDu %18, killed %16 - %20 = LBu killed %19, 0 :: (dereferenceable load 1 from @__tls_guard) + %20 = LBu killed %19, 0 :: (dereferenceable load (s8) from @__tls_guard) BEQ killed %20, $zero, %bb.2.init.i.i, implicit-def dead $at J %bb.1.entry._ZTW1k.exit_crit_edge, implicit-def dead $at @@ -174,7 +174,7 @@ body: | $v1_64 = COPY %34 %35 = COPY $v1_64 %36 = DADDu %35, killed %33 - %1 = LW killed %36, 0 :: (dereferenceable load 4 from @k) + %1 = LW killed %36, 0 :: (dereferenceable load (s32) from @k) J %bb.3._ZTW1k.exit, implicit-def dead $at bb.2.init.i.i: @@ -187,7 +187,7 @@ body: | %24 = COPY $v1_64 %25 = DADDu %24, killed %22 %26 = ADDiu $zero, 1 - SB killed %26, killed %25, 0 :: (store 1 into @__tls_guard) + SB killed %26, killed %25, 0 :: (store (s8) into @__tls_guard) %27 = LUi64 target-flags(mips-tprel-hi) @k %28 = DADDiu killed %27, target-flags(mips-tprel-lo) @k %29 = DADDu %24, killed %28 @@ -197,7 +197,7 @@ body: | JAL @_Z1gi, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit-def $sp, implicit-def $v0 ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp %31 = COPY $v0 - SW %31, killed %29, 0 :: (store 4 into @k) + SW %31, killed %29, 0 :: (store (s32) into @k) %2 = COPY %31 bb.3._ZTW1k.exit: @@ -224,10 +224,10 @@ body: | bb.5._ZTW1j.exit: %44 = RDHWR64 $hwr29, 0 $v1_64 = COPY %44 - %45 = LD %43, target-flags(mips-gottprel) @j :: (load 8) + %45 = LD %43, target-flags(mips-gottprel) @j :: (load (s64)) %46 = COPY $v1_64 %47 = DADDu %46, killed %45 - %48 = LW killed %47, 0 :: (dereferenceable load 4 from @j) + %48 = LW killed %47, 0 :: (dereferenceable load (s32) from @j) %49 = ADDu %4, killed %48 $v0 = COPY %49 RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/msa/emergency-spill.mir b/llvm/test/CodeGen/Mips/msa/emergency-spill.mir index 9cb4a6364b1fa..10d993220de3d 100644 --- a/llvm/test/CodeGen/Mips/msa/emergency-spill.mir +++ b/llvm/test/CodeGen/Mips/msa/emergency-spill.mir @@ -133,89 +133,89 @@ body: | bb.0.entry: liveins: $a0_64, $a1_64, $a2_64, $a3_64, $t0_64 - SD killed $a0_64, %stack.1.a, 0 :: (store 8 into %ir.1, align 16) - SD killed $a1_64, %stack.1.a, 8 :: (store 8 into %ir.2) - $w0 = LD_B %stack.1.a, 0 :: (dereferenceable load 16 from %ir.a) - SD killed $a2_64, %stack.2.b, 0 :: (store 8 into %ir.4, align 16) - SD killed $a3_64, %stack.2.b, 8 :: (store 8 into %ir.5) - $w1 = LD_B %stack.2.b, 0 :: (dereferenceable load 16 from %ir.b) - ST_B killed $w0, %stack.3.a.addr, 0 :: (store 16 into %ir.a.addr) - ST_B killed $w1, %stack.4.b.addr, 0 :: (store 16 into %ir.b.addr) - SW $t0, %stack.5.c.addr, 0, implicit killed $t0_64 :: (store 4 into %ir.c.addr) + SD killed $a0_64, %stack.1.a, 0 :: (store (s64) into %ir.1, align 16) + SD killed $a1_64, %stack.1.a, 8 :: (store (s64) into %ir.2) + $w0 = LD_B %stack.1.a, 0 :: (dereferenceable load (s128) from %ir.a) + SD killed $a2_64, %stack.2.b, 0 :: (store (s64) into %ir.4, align 16) + SD killed $a3_64, %stack.2.b, 8 :: (store (s64) into %ir.5) + $w1 = LD_B %stack.2.b, 0 :: (dereferenceable load (s128) from %ir.b) + ST_B killed $w0, %stack.3.a.addr, 0 :: (store (s128) into %ir.a.addr) + ST_B killed $w1, %stack.4.b.addr, 0 :: (store (s128) into %ir.b.addr) + SW $t0, %stack.5.c.addr, 0, implicit killed $t0_64 :: (store (s32) into %ir.c.addr) $at_64 = LEA_ADDiu64 %stack.8, 0 - SD killed $at_64, %stack.6.g, 0 :: (store 8 into %ir.g) - $a1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) + SD killed $at_64, %stack.6.g, 0 :: (store (s64) into %ir.g) + $a1_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp $a0_64 = LEA_ADDiu64 %stack.4.b.addr, 0 JAL @h, csr_n64, implicit-def dead $ra, implicit $a0_64, implicit $a1_64, implicit-def $sp ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - $at_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $v0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $v1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $a0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $a1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $a2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $a3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t4_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t5_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t6_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t7_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $s0_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $s1_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $s2_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $s3_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $s4_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $s5_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $s6_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $s7_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t8_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $t9_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $ra_64 = LD %stack.6.g, 0 :: (dereferenceable load 8 from %ir.g) - $w0 = LD_B %stack.3.a.addr, 0 :: (dereferenceable load 16 from %ir.a.addr) - SD $at_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $v0_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $v1_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $a0_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $a1_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $a2_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $a3_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t0_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t1_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t2_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t3_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t4_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t5_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t6_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t7_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $s0_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $s1_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $s2_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $s3_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $s4_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $s5_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $s6_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $s7_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t8_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $t9_64, %stack.7.d, 0 :: (store 8 into %ir.d) - SD $ra_64, %stack.7.d, 0 :: (store 8 into %ir.d) - $at_64 = LD %stack.7.d, 0 :: (dereferenceable load 8 from %ir.d) - $v0 = LB $at_64, 0 :: (load 1 from %ir.arrayidx) + $at_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $v0_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $v1_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $a0_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $a1_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $a2_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $a3_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t0_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t1_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t2_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t3_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t4_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t5_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t6_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t7_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $s0_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $s1_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $s2_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $s3_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $s4_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $s5_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $s6_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $s7_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t8_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $t9_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $ra_64 = LD %stack.6.g, 0 :: (dereferenceable load (s64) from %ir.g) + $w0 = LD_B %stack.3.a.addr, 0 :: (dereferenceable load (s128) from %ir.a.addr) + SD $at_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $v0_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $v1_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $a0_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $a1_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $a2_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $a3_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t0_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t1_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t2_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t3_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t4_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t5_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t6_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t7_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $s0_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $s1_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $s2_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $s3_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $s4_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $s5_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $s6_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $s7_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t8_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $t9_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + SD $ra_64, %stack.7.d, 0 :: (store (s64) into %ir.d) + $at_64 = LD %stack.7.d, 0 :: (dereferenceable load (s64) from %ir.d) + $v0 = LB $at_64, 0 :: (load (s8) from %ir.arrayidx) $w1 = FILL_B killed $v0 $w0 = ADDV_B killed $w0, killed $w1 - $at = LB killed $at_64, 1 :: (load 1 from %ir.arrayidx3) + $at = LB killed $at_64, 1 :: (load (s8) from %ir.arrayidx3) $w1 = FILL_B killed $at $w0 = ADDV_B killed $w0, killed $w1 - $w1 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load 16 from %ir.b.addr) + $w1 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load (s128) from %ir.b.addr) $w0 = ADDV_B killed $w1, killed $w0 - ST_B killed $w0, %stack.4.b.addr, 0 :: (store 16 into %ir.b.addr) - $w0 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load 16 from %ir.b.addr) - ST_B killed $w0, %stack.0.retval, 0 :: (store 16 into %ir.retval) - $v0_64 = LD %stack.0.retval, 0 :: (dereferenceable load 8 from %ir.20, align 16) - $v1_64 = LD %stack.0.retval, 8 :: (dereferenceable load 8 from %ir.20 + 8, align 16) + ST_B killed $w0, %stack.4.b.addr, 0 :: (store (s128) into %ir.b.addr) + $w0 = LD_B %stack.4.b.addr, 0 :: (dereferenceable load (s128) from %ir.b.addr) + ST_B killed $w0, %stack.0.retval, 0 :: (store (s128) into %ir.retval) + $v0_64 = LD %stack.0.retval, 0 :: (dereferenceable load (s64) from %ir.20, align 16) + $v1_64 = LD %stack.0.retval, 8 :: (dereferenceable load (s64) from %ir.20 + 8, align 16) RetRA implicit $v0_64, implicit $v1_64 ... diff --git a/llvm/test/CodeGen/Mips/unaligned-memops-mapping.mir b/llvm/test/CodeGen/Mips/unaligned-memops-mapping.mir index 47e9a3205e09e..c378bbd4dc02b 100644 --- a/llvm/test/CodeGen/Mips/unaligned-memops-mapping.mir +++ b/llvm/test/CodeGen/Mips/unaligned-memops-mapping.mir @@ -57,10 +57,10 @@ body: | %1:gpr32 = COPY $a1 %0:gpr32 = COPY $a0 %3:gpr32 = IMPLICIT_DEF - %2:gpr32 = LWL %0, 0, %3 :: (load 4 from %ir.a, align 1) - %4:gpr32 = LWR %0, 3, %2 :: (load 4 from %ir.a, align 1) - SWL %4, %1, 0 :: (store 4 into %ir.b, align 1) - SWR %4, %1, 3 :: (store 4 into %ir.b, align 1) + %2:gpr32 = LWL %0, 0, %3 :: (load (s32) from %ir.a, align 1) + %4:gpr32 = LWR %0, 3, %2 :: (load (s32) from %ir.a, align 1) + SWL %4, %1, 0 :: (store (s32) into %ir.b, align 1) + SWR %4, %1, 3 :: (store (s32) into %ir.b, align 1) RetRA ... @@ -103,10 +103,10 @@ body: | %1:gpr32 = COPY $a1 %0:gpr32 = COPY $a0 %3:gpr32 = IMPLICIT_DEF - %2:gpr32 = LWLE %0, 0, %3 :: (load 4 from %ir.a, align 1) - %4:gpr32 = LWRE %0, 3, %2 :: (load 4 from %ir.a, align 1) - SWLE %4, %1, 0 :: (store 4 into %ir.b, align 1) - SWRE %4, %1, 3 :: (store 4 into %ir.b, align 1) + %2:gpr32 = LWLE %0, 0, %3 :: (load (s32) from %ir.a, align 1) + %4:gpr32 = LWRE %0, 3, %2 :: (load (s32) from %ir.a, align 1) + SWLE %4, %1, 0 :: (store (s32) into %ir.b, align 1) + SWRE %4, %1, 3 :: (store (s32) into %ir.b, align 1) RetRA ... diff --git a/llvm/test/CodeGen/Mips/unaligned-memops.ll b/llvm/test/CodeGen/Mips/unaligned-memops.ll index a9595fea71b8c..19fdbd7bb7d69 100644 --- a/llvm/test/CodeGen/Mips/unaligned-memops.ll +++ b/llvm/test/CodeGen/Mips/unaligned-memops.ll @@ -12,10 +12,10 @@ define void @g2(i32* %a, i32* %b) { ; MIPS: [[COPY:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS: [[COPY1:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF - ; MIPS: [[LWL:%[0-9]+]]:gpr32 = LWL [[COPY1]], 0, [[DEF]] :: (load 4 from %ir.a, align 1) - ; MIPS: [[LWR:%[0-9]+]]:gpr32 = LWR [[COPY1]], 3, [[LWL]] :: (load 4 from %ir.a, align 1) - ; MIPS: SWL [[LWR]], [[COPY]], 0 :: (store 4 into %ir.b, align 1) - ; MIPS: SWR [[LWR]], [[COPY]], 3 :: (store 4 into %ir.b, align 1) + ; MIPS: [[LWL:%[0-9]+]]:gpr32 = LWL [[COPY1]], 0, [[DEF]] :: (load (s32) from %ir.a, align 1) + ; MIPS: [[LWR:%[0-9]+]]:gpr32 = LWR [[COPY1]], 3, [[LWL]] :: (load (s32) from %ir.a, align 1) + ; MIPS: SWL [[LWR]], [[COPY]], 0 :: (store (s32) into %ir.b, align 1) + ; MIPS: SWR [[LWR]], [[COPY]], 3 :: (store (s32) into %ir.b, align 1) ; MIPS: RetRA ; MICROMIPS-LABEL: name: g2 ; MICROMIPS: bb.0.entry: @@ -23,10 +23,10 @@ define void @g2(i32* %a, i32* %b) { ; MICROMIPS: [[COPY:%[0-9]+]]:gpr32 = COPY $a1 ; MICROMIPS: [[COPY1:%[0-9]+]]:gpr32 = COPY $a0 ; MICROMIPS: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF - ; MICROMIPS: [[LWL_MM:%[0-9]+]]:gpr32 = LWL_MM [[COPY1]], 0, [[DEF]] :: (load 4 from %ir.a, align 1) - ; MICROMIPS: [[LWR_MM:%[0-9]+]]:gpr32 = LWR_MM [[COPY1]], 3, [[LWL_MM]] :: (load 4 from %ir.a, align 1) - ; MICROMIPS: SWL_MM [[LWR_MM]], [[COPY]], 0 :: (store 4 into %ir.b, align 1) - ; MICROMIPS: SWR_MM [[LWR_MM]], [[COPY]], 3 :: (store 4 into %ir.b, align 1) + ; MICROMIPS: [[LWL_MM:%[0-9]+]]:gpr32 = LWL_MM [[COPY1]], 0, [[DEF]] :: (load (s32) from %ir.a, align 1) + ; MICROMIPS: [[LWR_MM:%[0-9]+]]:gpr32 = LWR_MM [[COPY1]], 3, [[LWL_MM]] :: (load (s32) from %ir.a, align 1) + ; MICROMIPS: SWL_MM [[LWR_MM]], [[COPY]], 0 :: (store (s32) into %ir.b, align 1) + ; MICROMIPS: SWR_MM [[LWR_MM]], [[COPY]], 3 :: (store (s32) into %ir.b, align 1) ; MICROMIPS: RetRA entry: %0 = load i32, i32* %a, align 1 diff --git a/llvm/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.mir b/llvm/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.mir index af5fb48b22dec..9e7c63a76ceda 100644 --- a/llvm/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.mir +++ b/llvm/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.mir @@ -27,7 +27,7 @@ body: | liveins: $x2 %0:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @g_51 - %1:g8rc_and_g8rc_nox0 = LDtocL @g_51, killed %0, implicit $x2 :: (load 8) + %1:g8rc_and_g8rc_nox0 = LDtocL @g_51, killed %0, implicit $x2 :: (load (s64)) %2:gprc = LI 0 %3:crrc = CMPLWI killed %2, 0 BCC 76, killed %3, %bb.2 @@ -50,7 +50,7 @@ body: | bb.4: %12:g8rc_and_g8rc_nox0 = PHI %4, %bb.1, %9, %bb.3 %13:g8rc = LI8 0 - STW8 killed %13, 0, killed %12 :: (store 4) + STW8 killed %13, 0, killed %12 :: (store (s32)) BLR8 implicit $lr8, implicit $rm ... @@ -63,7 +63,7 @@ body: | # # CHECK: bb.0: # CHECK: %0:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @g_51 -# CHECK-NEXT: %1:g8rc_and_g8rc_nox0 = LDtocL @g_51, killed %0, implicit killed $x2 :: (load 8) +# CHECK-NEXT: %1:g8rc_and_g8rc_nox0 = LDtocL @g_51, killed %0, implicit killed $x2 :: (load (s64)) # CHECK-NEXT: %2:gprc = LI 0 # CHECK-NEXT: %3:crrc = CMPLWI killed %2, 0 # CHECK-NEXT: BCC 76, killed %3, %bb.2 @@ -88,5 +88,5 @@ body: | # CHECK: bb.4: # CHECK: %12:g8rc_and_g8rc_nox0 = COPY killed %16 # CHECK-NEXT: %13:g8rc = LI8 0 -# CHECK-NEXT: STW8 killed %13, 0, killed %12 :: (store 4) +# CHECK-NEXT: STW8 killed %13, 0, killed %12 :: (store (s32)) # CHECK-NEXT: BLR8 implicit $lr8, implicit $rm diff --git a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir index 35a07280a9d9a..8eef233e44fca 100644 --- a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir +++ b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir @@ -145,9 +145,9 @@ body: | successors: %bb.4(0x00000002), %bb.5(0x7ffffffe) %1:gprc_and_gprc_nor0 = PHI %10, %bb.1, %2, %bb.5 - %12:gprc = LWZ 0, %6 :: (load 4 from %ir.Ptr) + %12:gprc = LWZ 0, %6 :: (load (s32) from %ir.Ptr) %13:gprc = ADD4 %1, killed %12 - STW killed %13, 0, %6 :: (store 4 into %ir.Ptr) + STW killed %13, 0, %6 :: (store (s32) into %ir.Ptr) BCn %0, %bb.5 B %bb.4 @@ -157,7 +157,7 @@ body: | INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2 ADJCALLSTACKDOWN 32, 0, implicit-def dead $r1, implicit $r1 %14:g8rc = COPY $x2 - STD %14, 24, $x1 :: (store 8 into stack + 24) + STD %14, 24, $x1 :: (store (s64) into stack + 24) %15:g8rc = EXTSW_32_64 %7 $x3 = COPY %15 $x12 = COPY %3 @@ -179,13 +179,13 @@ body: | #CHECK-LABEL: test #CHECK-HOIST: bb.1.for.body.lr.ph: #CHECK-HOIST: %14:g8rc = COPY $x2 -#CHECK-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24) +#CHECK-HOIST: STD %14, 24, $x1 :: (store (s64) into stack + 24) #CHECK-HOIST: %15:g8rc = EXTSW_32_64 %7 #CHECK-HOIST: B %bb.3 #CHECK-HOIST: bb.4.if.then: #CHECK-HOIST-NOT: %14:g8rc = COPY $x2 -#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24) +#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store (s64) into stack + 24) #CHECK-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7 #CHECK-HOIST: bb.5.for.inc: @@ -193,12 +193,12 @@ body: | #CHECK-LABEL: test #CHECK-NO-HOIST: bb.1.for.body.lr.ph: #CHECK-NO-HOIST-NOT: %14:g8rc = COPY $x2 -#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24) +#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store (s64) into stack + 24) #CHECK-NO-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7 #CHECK-NO-HOIST: B %bb.3 #CHECK-NO-HOIST: bb.4.if.then: #CHECK-NO-HOIST: %14:g8rc = COPY $x2 -#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24) +#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store (s64) into stack + 24) #CHECK-NO-HOIST: %15:g8rc = EXTSW_32_64 %7 #CHECK-NO-HOIST: bb.5.for.inc: diff --git a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir index a0139879f8c91..ad16daa7783ab 100644 --- a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir +++ b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir @@ -191,9 +191,9 @@ body: | successors: %bb.4(0x00000002), %bb.5(0x7ffffffe) %1:gprc_and_gprc_nor0 = PHI %10, %bb.1, %2, %bb.5 - %12:gprc = LWZ 0, %6 :: (load 4 from %ir.Ptr, !tbaa !33) + %12:gprc = LWZ 0, %6 :: (load (s32) from %ir.Ptr, !tbaa !33) %13:gprc = ADD4 %1, killed %12 - STW killed %13, 0, %6 :: (store 4 into %ir.Ptr, !tbaa !33) + STW killed %13, 0, %6 :: (store (s32) into %ir.Ptr, !tbaa !33) BCn %0, %bb.5 B %bb.4 @@ -203,7 +203,7 @@ body: | INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2, !31 ADJCALLSTACKDOWN 32, 0, implicit-def dead $r1, implicit $r1 %14:g8rc = COPY $x2 - STD %14, 24, $x1 :: (store 8 into stack + 24) + STD %14, 24, $x1 :: (store (s64) into stack + 24) %15:g8rc = EXTSW_32_64 %7 $x3 = COPY %15 $x12 = COPY %3 @@ -225,13 +225,13 @@ body: | #CHECK-LABEL: test #CHECK-HOIST: bb.1.for.body.lr.ph: #CHECK-HOIST: %14:g8rc = COPY $x2 -#CHECK-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24) +#CHECK-HOIST: STD %14, 24, $x1 :: (store (s64) into stack + 24) #CHECK-HOIST: %15:g8rc = EXTSW_32_64 %7 #CHECK-HOIST: B %bb.3 #CHECK-HOIST: bb.4.if.then: #CHECK-HOIST-NOT: %14:g8rc = COPY $x2 -#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24) +#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store (s64) into stack + 24) #CHECK-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7 #CHECK-HOIST: bb.5.for.inc: @@ -239,13 +239,13 @@ body: | #CHECK-LABEL: test #CHECK-NO-HOIST: bb.1.for.body.lr.ph: #CHECK-NO-HOIST-NOT: %14:g8rc = COPY $x2 -#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24) +#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store (s64) into stack + 24) #CHECK-NO-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7 #CHECK-NO-HOIST: B %bb.3 #CHECK-NO-HOIST: bb.4.if.then: #CHECK-NO-HOIST: %14:g8rc = COPY $x2 -#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24) +#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store (s64) into stack + 24) #CHECK-NO-HOIST: %15:g8rc = EXTSW_32_64 %7 #CHECK-NO-HOIST: bb.5.for.inc: diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir index 01ce79995512a..96a0b4123aa46 100644 --- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir +++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir @@ -95,18 +95,18 @@ body: | renamable $cr5gt = COPY renamable $cr1gt, implicit $cr1 ; CHECK: crnor 4*cr5+lt, lt, 4*cr1+gt ; CHECK: crmove 4*cr5+gt, 4*cr1+gt - SPILL_CRBIT killed renamable $cr5lt, 0, %stack.0 :: (store 4 into %stack.0) + SPILL_CRBIT killed renamable $cr5lt, 0, %stack.0 :: (store (s32) into %stack.0) renamable $cr1 = CMPW renamable $r4, renamable $r5, implicit killed $x5, implicit killed $x4 - SPILL_CRBIT killed renamable $cr5gt, 0, %stack.1 :: (store 4 into %stack.1) - SPILL_CRBIT killed renamable $cr1gt, 0, %stack.2 :: (store 4 into %stack.2) + SPILL_CRBIT killed renamable $cr5gt, 0, %stack.1 :: (store (s32) into %stack.1) + SPILL_CRBIT killed renamable $cr1gt, 0, %stack.2 :: (store (s32) into %stack.2) INLINEASM &"# nothing", 25, 12, implicit-def dead early-clobber $cr0, 12, implicit-def dead early-clobber $cr1, 12, implicit-def dead early-clobber $cr2, 12, implicit-def dead early-clobber $cr3, 12, implicit-def dead early-clobber $cr4, 12, implicit-def dead early-clobber $cr5, 12, implicit-def dead early-clobber $cr6, 12, implicit-def dead early-clobber $cr7, !1 BLR8 implicit $lr8, implicit $rm bb.1.if.end326: successors: %bb.2(0x00000001), %bb.3(0x7fffffff) - renamable $cr5lt = RESTORE_CRBIT 0, %stack.0 :: (load 4 from %stack.0) - renamable $cr5gt = RESTORE_CRBIT 0, %stack.1 :: (load 4 from %stack.1) + renamable $cr5lt = RESTORE_CRBIT 0, %stack.0 :: (load (s32) from %stack.0) + renamable $cr5gt = RESTORE_CRBIT 0, %stack.1 :: (load (s32) from %stack.1) renamable $cr5lt = CROR killed renamable $cr5lt, killed renamable $cr5gt BCn killed renamable $cr5lt, %bb.3 B %bb.2 diff --git a/llvm/test/CodeGen/PowerPC/aantidep-def-ec.mir b/llvm/test/CodeGen/PowerPC/aantidep-def-ec.mir index 39ffbeec10f4e..56ce8cc8ff0df 100644 --- a/llvm/test/CodeGen/PowerPC/aantidep-def-ec.mir +++ b/llvm/test/CodeGen/PowerPC/aantidep-def-ec.mir @@ -72,24 +72,24 @@ body: | $x0 = MFLR8 implicit $lr8 STD $x0, 16, $x1 $x1 = STDU $x1, -144, $x1 - STD killed $x29, 120, $x1 :: (store 8 into %fixed-stack.1) - STD killed $x30, 128, $x1 :: (store 8 into %fixed-stack.0, align 16) + STD killed $x29, 120, $x1 :: (store (s64) into %fixed-stack.1) + STD killed $x30, 128, $x1 :: (store (s64) into %fixed-stack.0, align 16) $x30 = OR8 $x4, $x4 - $x3 = LD 0, killed $x3 :: (load 8 from %ir.p1) + $x3 = LD 0, killed $x3 :: (load (s64) from %ir.p1) $x29 = ADDI8 killed $x3, -48 $x3 = ADDIStocHA8 $x2, @tasklist_lock - $x3 = LDtocL @tasklist_lock, killed $x3, implicit $x2 :: (load 8 from got) + $x3 = LDtocL @tasklist_lock, killed $x3, implicit $x2 :: (load (s64) from got) BL8_NOP @__raw_read_unlock, csr_ppc64_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3 $r3 = LI 0 - STW killed $r3, 0, killed $x30 :: (volatile store 4 into %ir.p2) + STW killed $r3, 0, killed $x30 :: (volatile store (s32) into %ir.p2) INLINEASM &"#compiler barrier", 25 INLINEASM &"\0Alwsync \0A1:\09lwarx\09$0,0,$1\09\09# atomic_dec_return\0A\09addic\09$0,$0,-1\0A\09stwcx.\09$0,0,$1\0A\09bne-\091b\0Async \0A", 25, 131083, def early-clobber $r3, 851977, killed $x29, 12, implicit-def dead early-clobber $cr0 ; CHECK-LABEL: @mm_update_next_owner ; CHECK-NOT: lwarx 29, 0, 29 ; CHECK-NOT: stwcx. 29, 0, 29 $cr0 = CMPLWI killed $r3, 0 - $x30 = LD 128, $x1 :: (load 8 from %fixed-stack.0, align 16) - $x29 = LD 120, $x1 :: (load 8 from %fixed-stack.1) + $x30 = LD 128, $x1 :: (load (s64) from %fixed-stack.0, align 16) + $x29 = LD 120, $x1 :: (load (s64) from %fixed-stack.1) $x1 = ADDI8 $x1, 144 $x0 = LD 16, $x1 MTLR8 $x0, implicit-def $lr8 diff --git a/llvm/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir b/llvm/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir index 94441161d1500..c52a55f17b303 100644 --- a/llvm/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir +++ b/llvm/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir @@ -55,7 +55,7 @@ body: | $x3 = ADDIStlsldHA $x2, @x $x31 = OR8 $x1, $x1 $x3 = ADDItlsldL killed $x3, @x - STD killed $x30, 48, $x31 :: (store 8 into %fixed-stack.0, align 16) + STD killed $x30, 48, $x31 :: (store (s64) into %fixed-stack.0, align 16) $x3 = GETtlsldADDR killed $x3, @x, implicit-def dead $x0, implicit-def dead $x4, implicit-def dead $x5, implicit-def dead $x6, implicit-def dead $x7, implicit-def dead $x8, implicit-def dead $x9, implicit-def dead $x10, implicit-def dead $x11, implicit-def dead $x12, implicit-def $lr8, implicit-def $ctr8, implicit-def dead $cr0, implicit-def dead $cr1, implicit-def dead $cr5, implicit-def dead $cr6, implicit-def dead $cr7 $x12 = ADDIStlsgdHA $x2, @y $x30 = OR8 killed $x3, $x3 @@ -65,9 +65,9 @@ body: | ; CHECK: addis 4, 30, x@dtprel@ha $x5 = LI8 1 $r6 = LI 20 - $x30 = LD 48, $x31 :: (load 8 from %fixed-stack.0, align 16) - STB8 killed $x5, target-flags(ppc-dtprel-lo) @x, killed $x4 :: (store 1 into @x) - STW killed $r6, 0, killed $x3 :: (store 4 into @y) + $x30 = LD 48, $x31 :: (load (s64) from %fixed-stack.0, align 16) + STB8 killed $x5, target-flags(ppc-dtprel-lo) @x, killed $x4 :: (store (s8) into @x) + STW killed $r6, 0, killed $x3 :: (store (s32) into @y) $x1 = ADDI8 $x1, 64 $x0 = LD 16, $x1 $x31 = LD -8, $x1 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 6aa176ec68929..eb095a75daf31 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -141,7 +141,7 @@ define void @test_i1(i1 %b) { ; 32BIT-NEXT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3 ; 32BIT: renamable $r3 = RLWINM killed renamable $r3, 0, 31, 31 -; 32BIT-NEXT: STB killed renamable $r3, 0, killed renamable $r4 :: (store 1 into @global_i1) +; 32BIT-NEXT: STB killed renamable $r3, 0, killed renamable $r4 :: (store (s8) into @global_i1) ; 64BIT: liveins: ; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } @@ -149,7 +149,7 @@ define void @test_i1(i1 %b) { ; 64BIT-NEXT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 ; 64BIT: renamable $r[[REG1:[0-9]+]] = RLWINM renamable $r[[REG1]], 0, 31, 31, implicit killed $x3 -; 64BIT-NEXT: STB killed renamable $r[[REG1]], 0, killed renamable $x4 :: (store 1 into @global_i1) +; 64BIT-NEXT: STB killed renamable $r[[REG1]], 0, killed renamable $x4 :: (store (s8) into @global_i1) define void @call_test_i1() { entry: @@ -181,7 +181,7 @@ define void @test_i1zext(i1 zeroext %b) { ; 32BIT-NEXT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3 ; CHECK-NOT: RLWINM -; 32BIT: STB killed renamable $r3, 0, killed renamable $r4 :: (store 1 into @global_i1) +; 32BIT: STB killed renamable $r3, 0, killed renamable $r4 :: (store (s8) into @global_i1) ; 64BIT: liveins: ; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } @@ -189,7 +189,7 @@ define void @test_i1zext(i1 zeroext %b) { ; 64BIT-NEXT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 ; CHECK-NOT: RLWINM -; 64BIT: STB8 killed renamable $x3, 0, killed renamable $x4 :: (store 1 into @global_i1) +; 64BIT: STB8 killed renamable $x3, 0, killed renamable $x4 :: (store (s8) into @global_i1) define i32 @test_ints(i32 signext %a, i32 zeroext %b, i32 zeroext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) { entry: @@ -346,14 +346,14 @@ entry: ; 32BIT: body: | ; 32BIT-NEXT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3 -; 32BIT: STW killed renamable $r3, 0, %stack.0.a.addr :: (store 4 into %ir.a.addr, align 8) +; 32BIT: STW killed renamable $r3, 0, %stack.0.a.addr :: (store (s32) into %ir.a.addr, align 8) ; 64BIT: liveins: ; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } ; 64BIT: body: | ; 64BIT-NEXT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 -; 64BIT: STD killed renamable $x3, 0, %stack.0.a.addr :: (store 8 into %ir.a.addr) +; 64BIT: STD killed renamable $x3, 0, %stack.0.a.addr :: (store (s64) into %ir.a.addr) define i32 @caller(i32 %i) { @@ -405,16 +405,16 @@ entry: ; CHECK-LABEL: name: call_test_floats{{.*}} -; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) +; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f1) ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: $f2 = COPY renamable $f1 ; 32BIT-NEXT: $f3 = COPY renamable $f1 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) +; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f1) ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: $f2 = COPY renamable $f1 ; 64BIT-NEXT: $f3 = COPY renamable $f1 @@ -447,18 +447,18 @@ entry: ; CHECK-LABEL: name: call_test_fpr_max{{.*}} -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d1) ; 32BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: STFD renamable $f1, 56, $r1 :: (store 8) -; 32BIT-DAG: STFD renamable $f1, 64, $r1 :: (store 8) -; 32BIT-DAG: STFD renamable $f1, 72, $r1 :: (store 8) -; 32BIT-DAG: STFD renamable $f1, 80, $r1 :: (store 8) -; 32BIT-DAG: STFD renamable $f1, 88, $r1 :: (store 8) -; 32BIT-DAG: STFD renamable $f1, 96, $r1 :: (store 8) -; 32BIT-DAG: STFD renamable $f1, 104, $r1 :: (store 8) -; 32BIT-DAG: STFD renamable $f1, 112, $r1 :: (store 8) -; 32BIT-DAG: STFD renamable $f1, 120, $r1 :: (store 8) +; 32BIT-DAG: STFD renamable $f1, 56, $r1 :: (store (s64)) +; 32BIT-DAG: STFD renamable $f1, 64, $r1 :: (store (s64)) +; 32BIT-DAG: STFD renamable $f1, 72, $r1 :: (store (s64)) +; 32BIT-DAG: STFD renamable $f1, 80, $r1 :: (store (s64)) +; 32BIT-DAG: STFD renamable $f1, 88, $r1 :: (store (s64)) +; 32BIT-DAG: STFD renamable $f1, 96, $r1 :: (store (s64)) +; 32BIT-DAG: STFD renamable $f1, 104, $r1 :: (store (s64)) +; 32BIT-DAG: STFD renamable $f1, 112, $r1 :: (store (s64)) +; 32BIT-DAG: STFD renamable $f1, 120, $r1 :: (store (s64)) ; 32BIT-DAG: $f2 = COPY renamable $f1 ; 32BIT-DAG: $f3 = COPY renamable $f1 ; 32BIT-DAG: $f4 = COPY renamable $f1 @@ -504,14 +504,14 @@ entry: ; ASM32PWR4-NEXT: nop ; ASM32PWR4-NEXT: addi 1, 1, 128 -; 64BIT: renamable $x[[REGD1ADDR:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x[[REGD1ADDR:[0-9]+]] :: (dereferenceable load 8 from @d1) +; 64BIT: renamable $x[[REGD1ADDR:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x[[REGD1ADDR:[0-9]+]] :: (dereferenceable load (s64) from @d1) ; 64BIT-NEXT: ADJCALLSTACKDOWN 152, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: STFD renamable $f1, 112, $x1 :: (store 8) -; 64BIT-DAG: STFD renamable $f1, 120, $x1 :: (store 8) -; 64BIT-DAG: STFD renamable $f1, 128, $x1 :: (store 8) -; 64BIT-DAG: STFD renamable $f1, 136, $x1 :: (store 8) -; 64BIT-DAG: STFD renamable $f1, 144, $x1 :: (store 8) +; 64BIT-DAG: STFD renamable $f1, 112, $x1 :: (store (s64)) +; 64BIT-DAG: STFD renamable $f1, 120, $x1 :: (store (s64)) +; 64BIT-DAG: STFD renamable $f1, 128, $x1 :: (store (s64)) +; 64BIT-DAG: STFD renamable $f1, 136, $x1 :: (store (s64)) +; 64BIT-DAG: STFD renamable $f1, 144, $x1 :: (store (s64)) ; 64BIT-DAG: $f2 = COPY renamable $f1 ; 64BIT-DAG: $f3 = COPY renamable $f1 ; 64BIT-DAG: $f4 = COPY renamable $f1 @@ -598,20 +598,20 @@ entry: ; CHECK-LABEL: name: call_test_mix{{.*}} -; 32BIT: renamable $r[[REG1:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $r[[REG2:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG1]] :: (dereferenceable load 4 from @f1) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG2]] :: (dereferenceable load 8 from @d1) +; 32BIT: renamable $r[[REG1:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $r[[REG2:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG1]] :: (dereferenceable load (s32) from @f1) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG2]] :: (dereferenceable load (s64) from @d1) ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: $r4 = LI 1 ; 32BIT-NEXT: $r7 = LI 97 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r4, implicit $f2, implicit killed $r7, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $x[[REG2:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load 4 from @f1) -; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG2]] :: (dereferenceable load 8 from @d1) +; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $x[[REG2:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load (s32) from @f1) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG2]] :: (dereferenceable load (s64) from @d1) ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: $x4 = LI8 1 ; 64BIT-NEXT: $x6 = LI8 97 @@ -700,16 +700,16 @@ declare void @test_vararg(i32, ...) ; CHECK-LABEL: name: call_test_vararg -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) -; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) -; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) -; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) -; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) -; 32BIT-NEXT: renamable $r7 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4) +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load (s32) from @f1) +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d1) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]] + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) +; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]], align 8) +; 32BIT-NEXT: renamable $r7 = LWZ 4, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]] + 4) ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: $r3 = LI 42 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r5, implicit $f2, implicit $r6, implicit $r7, implicit $r2, implicit-def $r1 @@ -732,14 +732,14 @@ declare void @test_vararg(i32, ...) ; ASM32PWR4-NEXT: bl .test_vararg[PR] ; ASM32PWR4-NEXT: nop -; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1) -; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1) -; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]]) -; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) -; 64BIT-NEXT: renamable $x5 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]]) +; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load (s32) from @f1) +; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load (s64) from @d1) +; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load (s64) from %stack.[[SLOT1]]) +; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) +; 64BIT-NEXT: renamable $x5 = LD 0, %stack.[[SLOT2]] :: (load (s64) from %stack.[[SLOT2]]) ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: $x3 = LI8 42 ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $f2, implicit $x5, implicit $x2, implicit-def $r1 @@ -769,16 +769,16 @@ entry: ; CHECK-LABEL: name: call_test_vararg2 -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) -; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) -; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) -; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) -; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) -; 32BIT-NEXT: renamable $r8 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4) +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load (s32) from @f1) +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d1) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]] + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) +; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]], align 8) +; 32BIT-NEXT: renamable $r8 = LWZ 4, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]] + 4) ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: $r3 = LI 42 ; 32BIT-NEXT: $r6 = LI 42 @@ -801,14 +801,14 @@ entry: ; ASM32PWR4-NEXT: bl .test_vararg[PR] ; ASM32PWR4-NEXT: nop -; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1) -; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1) -; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]]) -; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) -; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]]) +; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load (s32) from @f1) +; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load (s64) from @d1) +; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load (s64) from %stack.[[SLOT1]]) +; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) +; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load (s64) from %stack.[[SLOT2]]) ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: $x3 = LI8 42 ; 64BIT-NEXT: $x5 = LI8 42 @@ -840,16 +840,16 @@ entry: ; CHECK-LABEL: name: call_test_vararg3 -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) -; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) -; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) -; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) -; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) -; 32BIT-NEXT: renamable $r9 = LWZ 4, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]] + 4) +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load (s32) from @f1) +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d1) +; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load (s32) from %stack.[[SLOT1]] + 4) +; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) +; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]], align 8) +; 32BIT-NEXT: renamable $r9 = LWZ 4, %stack.[[SLOT2]] :: (load (s32) from %stack.[[SLOT2]] + 4) ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: $r3 = LI 42 ; 32BIT-NEXT: $r6 = LI 0 @@ -874,14 +874,14 @@ entry: ; ASM32PWR4-NEXT: bl .test_vararg[PR] ; ASM32PWR4-NEXT: nop -; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1) -; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d1) -; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load 8 from %stack.[[SLOT1]]) -; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) -; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load 8 from %stack.[[SLOT2]]) +; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load (s32) from @f1) +; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @d1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store (s64) into %stack.[[SLOT1]]) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load (s64) from @d1) +; 64BIT-NEXT: renamable $x4 = LD 0, %stack.[[SLOT1]] :: (load (s64) from %stack.[[SLOT1]]) +; 64BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store (s64) into %stack.[[SLOT2]]) +; 64BIT-NEXT: renamable $x6 = LD 0, %stack.[[SLOT2]] :: (load (s64) from %stack.[[SLOT2]]) ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: $x3 = LI8 42 ; 64BIT-NEXT: $x5 = LI8 42 @@ -911,10 +911,10 @@ entry: ; CHECK-LABEL: name: call_test_vararg4 -; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) -; 32BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store 4 into %stack.[[SLOT]]) -; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT]] :: (load 4 from %stack.[[SLOT]]) +; 32BIT: renamable $r[[REG:[0-9]+]] = LWZtoc @f1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load (s32) from @f1) +; 32BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store (s32) into %stack.[[SLOT]]) +; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT]] :: (load (s32) from %stack.[[SLOT]]) ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: $r3 = LI 42 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $f1, implicit $r4, implicit $r2, implicit-def $r1 @@ -929,10 +929,10 @@ entry: ; ASM32PWR4-NEXT: bl .test_vararg[PR] ; ASM32PWR4-NEXT: nop -; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load 4 from @f1) -; 64BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store 4 into %stack.[[SLOT]]) -; 64BIT-NEXT: renamable $x4 = LWZ8 0, %stack.[[SLOT]] :: (load 4 from %stack.[[SLOT]]) +; 64BIT: renamable $x[[REG:[0-9]+]] = LDtoc @f1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG]] :: (dereferenceable load (s32) from @f1) +; 64BIT-NEXT: STFS renamable $f1, 0, %stack.[[SLOT:[0-9]+]] :: (store (s32) into %stack.[[SLOT]]) +; 64BIT-NEXT: renamable $x4 = LWZ8 0, %stack.[[SLOT]] :: (load (s32) from %stack.[[SLOT]]) ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: $x3 = LI8 42 ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $f1, implicit $x4, implicit $x2, implicit-def $r1 @@ -980,21 +980,21 @@ declare void @test_stackarg_int(i32, i32, i32, i32, i32, i32, i32, i32, i8 zeroe ; 32BIT-DAG: $r8 = LI 6 ; 32BIT-DAG: $r9 = LI 7 ; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: renamable $r[[REGCADDR:[0-9]+]] = LWZtoc @c, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGC:[0-9]+]] = LBZ 0, killed renamable $r[[REGCADDR]] :: (dereferenceable load 1 from @c) -; 32BIT-DAG: STW killed renamable $r[[REGC]], 56, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LHA 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load 2 from @si) -; 32BIT-DAG: STW killed renamable $r[[REGSI]], 60, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGIADDR:[0-9]+]] = LWZtoc @i, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGI:[0-9]+]] = LWZ 0, killed renamable $r[[REGIADDR]] :: (dereferenceable load 4 from @i) -; 32BIT-DAG: STW killed renamable $r[[REGI]], 64, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGLLIADDR:[0-9]+]] = LWZtoc @lli, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGLLI1:[0-9]+]] = LWZ 0, renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli, align 8) -; 32BIT-DAG: STW killed renamable $r[[REGLLI1]], 68, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGLLI2:[0-9]+]] = LWZ 4, killed renamable $r[[REGLLIADDR]] :: (dereferenceable load 4 from @lli + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REGLLI2]], 72, $r1 :: (store 4) -; 32BIT-DAG: STW renamable $r[[REGI]], 76, $r1 :: (store 4) +; 32BIT-DAG: renamable $r[[REGCADDR:[0-9]+]] = LWZtoc @c, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGC:[0-9]+]] = LBZ 0, killed renamable $r[[REGCADDR]] :: (dereferenceable load (s8) from @c) +; 32BIT-DAG: STW killed renamable $r[[REGC]], 56, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LHA 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load (s16) from @si) +; 32BIT-DAG: STW killed renamable $r[[REGSI]], 60, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGIADDR:[0-9]+]] = LWZtoc @i, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGI:[0-9]+]] = LWZ 0, killed renamable $r[[REGIADDR]] :: (dereferenceable load (s32) from @i) +; 32BIT-DAG: STW killed renamable $r[[REGI]], 64, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGLLIADDR:[0-9]+]] = LWZtoc @lli, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGLLI1:[0-9]+]] = LWZ 0, renamable $r[[REGLLIADDR]] :: (dereferenceable load (s32) from @lli, align 8) +; 32BIT-DAG: STW killed renamable $r[[REGLLI1]], 68, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGLLI2:[0-9]+]] = LWZ 4, killed renamable $r[[REGLLIADDR]] :: (dereferenceable load (s32) from @lli + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REGLLI2]], 72, $r1 :: (store (s32)) +; 32BIT-DAG: STW renamable $r[[REGI]], 76, $r1 :: (store (s32)) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 80, 0, implicit-def dead $r1, implicit $r1 @@ -1038,19 +1038,19 @@ declare void @test_stackarg_int(i32, i32, i32, i32, i32, i32, i32, i32, i8 zeroe ; 64BIT-DAG: $x8 = LI8 6 ; 64BIT-DAG: $x9 = LI8 7 ; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: renamable $x[[REGCADDR:[0-9]+]] = LDtoc @c, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGC:[0-9]+]] = LBZ8 0, killed renamable $x[[REGCADDR]] :: (dereferenceable load 1 from @c) -; 64BIT-DAG: STD killed renamable $x[[REGC]], 112, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @si, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LHA8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load 2 from @si) -; 64BIT-DAG: STD killed renamable $x[[REGSI]], 120, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGIADDR:[0-9]+]] = LDtoc @i, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGIADDR]] :: (dereferenceable load 4 from @i) -; 64BIT-DAG: STD killed renamable $x[[REGI]], 128, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGLLIADDR:[0-9]+]] = LDtoc @lli, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGLLI:[0-9]+]] = LD 0, killed renamable $x[[REGLLIADDR]] :: (dereferenceable load 8 from @lli) -; 64BIT-DAG: STD killed renamable $x[[REGLLI]], 136, $x1 :: (store 8) -; 64BIT-DAG: STD renamable $x[[REGI]], 144, $x1 :: (store 8) +; 64BIT-DAG: renamable $x[[REGCADDR:[0-9]+]] = LDtoc @c, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGC:[0-9]+]] = LBZ8 0, killed renamable $x[[REGCADDR]] :: (dereferenceable load (s8) from @c) +; 64BIT-DAG: STD killed renamable $x[[REGC]], 112, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @si, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LHA8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load (s16) from @si) +; 64BIT-DAG: STD killed renamable $x[[REGSI]], 120, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGIADDR:[0-9]+]] = LDtoc @i, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGIADDR]] :: (dereferenceable load (s32) from @i) +; 64BIT-DAG: STD killed renamable $x[[REGI]], 128, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGLLIADDR:[0-9]+]] = LDtoc @lli, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGLLI:[0-9]+]] = LD 0, killed renamable $x[[REGLLIADDR]] :: (dereferenceable load (s64) from @lli) +; 64BIT-DAG: STD killed renamable $x[[REGLLI]], 136, $x1 :: (store (s64)) +; 64BIT-DAG: STD renamable $x[[REGI]], 144, $x1 :: (store (s64)) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1 @@ -1104,12 +1104,12 @@ declare void @test_stackarg_float(i32, i32, i32, i32, i32, i32, i32, i32, float, ; 32BIT-DAG: $r8 = LI 6 ; 32BIT-DAG: $r9 = LI 7 ; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f1 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load 4 from @f) -; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load 8 from @d) -; 32BIT-DAG: STFS renamable $f1, 56, $r1 :: (store 4) -; 32BIT-DAG: STFD renamable $f2, 60, $r1 :: (store 8) +; 32BIT-DAG: renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f1 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load (s32) from @f) +; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load (s64) from @d) +; 32BIT-DAG: STFS renamable $f1, 56, $r1 :: (store (s32)) +; 32BIT-DAG: STFD renamable $f2, 60, $r1 :: (store (s64)) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 68, 0, implicit-def dead $r1, implicit $r1 @@ -1145,12 +1145,12 @@ declare void @test_stackarg_float(i32, i32, i32, i32, i32, i32, i32, i32, float, ; 64BIT-DAG: $x8 = LI8 6 ; 64BIT-DAG: $x9 = LI8 7 ; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $f1 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load 4 from @f) -; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load 8 from @d) -; 64BIT-DAG: STFS renamable $f1, 112, $x1 :: (store 4) -; 64BIT-DAG: STFD renamable $f2, 120, $x1 :: (store 8) +; 64BIT-DAG: renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $f1 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load (s32) from @f) +; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load (s64) from @d) +; 64BIT-DAG: STFS renamable $f1, 112, $x1 :: (store (s32)) +; 64BIT-DAG: STFD renamable $f2, 120, $x1 :: (store (s64)) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $f1, implicit $f2, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 @@ -1193,11 +1193,11 @@ declare void @test_stackarg_float2(i32, i32, i32, i32, i32, i32, ...) ; 32BIT-DAG: $r6 = LI 4 ; 32BIT-DAG: $r7 = LI 5 ; 32BIT-DAG: $r8 = LI 6 -; 32BIT-DAG: renamable $r[[REG:[0-9]+]] = LWZtoc @d, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d) -; 32BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0) -; 32BIT-DAG: renamable $r9 = LWZ 0, %stack.0 :: (load 4 from %stack.0, align 8) -; 32BIT-DAG: renamable $r10 = LWZ 4, %stack.0 :: (load 4 from %stack.0 + 4) +; 32BIT-DAG: renamable $r[[REG:[0-9]+]] = LWZtoc @d, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load (s64) from @d) +; 32BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) +; 32BIT-DAG: renamable $r9 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) +; 32BIT-DAG: renamable $r10 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit $f1, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 @@ -1228,10 +1228,10 @@ declare void @test_stackarg_float2(i32, i32, i32, i32, i32, i32, ...) ; 64BIT-DAG: $x6 = LI8 4 ; 64BIT-DAG: $x7 = LI8 5 ; 64BIT-DAG: $x8 = LI8 6 -; 64BIT-DAG: renamable $x[[REG:[0-9]+]] = LDtoc @d, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load 8 from @d) -; 64BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0) -; 64BIT-DAG: renamable $x9 = LD 0, %stack.0 :: (load 8 from %stack.0) +; 64BIT-DAG: renamable $x[[REG:[0-9]+]] = LDtoc @d, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[REG]] :: (dereferenceable load (s64) from @d) +; 64BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) +; 64BIT-DAG: renamable $x9 = LD 0, %stack.0 :: (load (s64) from %stack.0) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit $f1, implicit $x9, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 @@ -1274,14 +1274,14 @@ declare void @test_stackarg_float3(i32, i32, i32, i32, i32, i32, i32, ...) ; 32BIT-DAG: $r7 = LI 5 ; 32BIT-DAG: $r8 = LI 6 ; 32BIT-DAG: $r9 = LI 7 -; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load 8 from @d) -; 32BIT-DAG: renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f2 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load 4 from @f) -; 32BIT-DAG: STFD renamable $f1, 52, $r1 :: (store 8) -; 32BIT-DAG: STFS renamable $f2, 60, $r1 :: (store 4) -; 32BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0) -; 32BIT-DAG: renamable $r10 = LWZ 0, %stack.0 :: (load 4 from %stack.0, align 8) +; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load (s64) from @d) +; 32BIT-DAG: renamable $r[[REGF:[0-9]+]] = LWZtoc @f, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f2 = LFS 0, killed renamable $r[[REGF]] :: (dereferenceable load (s32) from @f) +; 32BIT-DAG: STFD renamable $f1, 52, $r1 :: (store (s64)) +; 32BIT-DAG: STFS renamable $f2, 60, $r1 :: (store (s32)) +; 32BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) +; 32BIT-DAG: renamable $r10 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 @@ -1318,13 +1318,13 @@ declare void @test_stackarg_float3(i32, i32, i32, i32, i32, i32, i32, ...) ; 64BIT-DAG: $x7 = LI8 5 ; 64BIT-DAG: $x8 = LI8 6 ; 64BIT-DAG: $x9 = LI8 7 -; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load 8 from @d) -; 64BIT-DAG: renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $f2 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load 4 from @f) -; 64BIT-DAG: STFS renamable $f2, 112, $x1 :: (store 4) -; 64BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store 8 into %stack.0) -; 64BIT-DAG: renamable $x10 = LD 0, %stack.0 :: (load 8 from %stack.0) +; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[REGD]] :: (dereferenceable load (s64) from @d) +; 64BIT-DAG: renamable $x[[REGF:[0-9]+]] = LDtoc @f, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $f2 = LFS 0, killed renamable $x[[REGF]] :: (dereferenceable load (s32) from @f) +; 64BIT-DAG: STFS renamable $f2, 112, $x1 :: (store (s32)) +; 64BIT-DAG: STFD renamable $f1, 0, %stack.0 :: (store (s64) into %stack.0) +; 64BIT-DAG: renamable $x10 = LD 0, %stack.0 :: (load (s64) from %stack.0) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit $f1, implicit $x10, implicit $f2, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 @@ -1482,34 +1482,34 @@ entry: ; 32BIT-DAG: $r8 = LI 6 ; 32BIT-DAG: $r9 = LI 7 ; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: renamable $r[[REGLL1ADDR:[0-9]+]] = LWZtoc @ll1, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGLL1A:[0-9]+]] = LWZ 0, renamable $r[[REGLL1ADDR]] :: (dereferenceable load 4 from @ll1, align 8) -; 32BIT-DAG: renamable $r[[REGLL1B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL1ADDR]] :: (dereferenceable load 4 from @ll1 + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REGLL1A]], 56, $r1 :: (store 4) -; 32BIT-DAG: STW killed renamable $r[[REGLL1B]], 60, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si1, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LHA 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load 2 from @si1) -; 32BIT-DAG: STW killed renamable $r[[REGSI]], 64, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGCHADDR:[0-9]+]] = LWZtoc @ch, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGCH:[0-9]+]] = LBZ 0, killed renamable $r[[REGCHADDR]] :: (dereferenceable load 1 from @ch) -; 32BIT-DAG: STW killed renamable $r[[REGCH]], 68, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGUIADDR:[0-9]+]] = LWZtoc @ui, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGUI:[0-9]+]] = LWZ 0, killed renamable $r[[REGUIADDR]] :: (dereferenceable load 4 from @ui) -; 32BIT-DAG: STW killed renamable $r[[REGUI]], 72, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @sint, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LWZ 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load 4 from @sint) -; 32BIT-DAG: STW killed renamable $r[[REGSI]], 76, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGLL2ADDR:[0-9]+]] = LWZtoc @ll2, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGLL2A:[0-9]+]] = LWZ 0, renamable $r[[REGLL2ADDR]] :: (dereferenceable load 4 from @ll2, align 8) -; 32BIT-DAG: renamable $r[[REGLL2B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL2ADDR]] :: (dereferenceable load 4 from @ll2 + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REGLL2A]], 80, $r1 :: (store 4) -; 32BIT-DAG: STW killed renamable $r[[REGLL2B]], 84, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGUCADDR:[0-9]+]] = LWZtoc @uc1, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGUC:[0-9]+]] = LBZ 0, killed renamable $r[[REGUCADDR]] :: (dereferenceable load 1 from @uc1) -; 32BIT-DAG: STW killed renamable $r[[REGUC]], 88, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGIADDR:[0-9]+]] = LWZtoc @i1, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REGI:[0-9]+]] = LWZ 0, killed renamable $r[[REGIADDR]] :: (dereferenceable load 4 from @i1) -; 32BIT-DAG: STW killed renamable $r[[REGI]], 92, $r1 :: (store 4) +; 32BIT-DAG: renamable $r[[REGLL1ADDR:[0-9]+]] = LWZtoc @ll1, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGLL1A:[0-9]+]] = LWZ 0, renamable $r[[REGLL1ADDR]] :: (dereferenceable load (s32) from @ll1, align 8) +; 32BIT-DAG: renamable $r[[REGLL1B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL1ADDR]] :: (dereferenceable load (s32) from @ll1 + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REGLL1A]], 56, $r1 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r[[REGLL1B]], 60, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @si1, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LHA 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load (s16) from @si1) +; 32BIT-DAG: STW killed renamable $r[[REGSI]], 64, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGCHADDR:[0-9]+]] = LWZtoc @ch, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGCH:[0-9]+]] = LBZ 0, killed renamable $r[[REGCHADDR]] :: (dereferenceable load (s8) from @ch) +; 32BIT-DAG: STW killed renamable $r[[REGCH]], 68, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGUIADDR:[0-9]+]] = LWZtoc @ui, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGUI:[0-9]+]] = LWZ 0, killed renamable $r[[REGUIADDR]] :: (dereferenceable load (s32) from @ui) +; 32BIT-DAG: STW killed renamable $r[[REGUI]], 72, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGSIADDR:[0-9]+]] = LWZtoc @sint, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGSI:[0-9]+]] = LWZ 0, killed renamable $r[[REGSIADDR]] :: (dereferenceable load (s32) from @sint) +; 32BIT-DAG: STW killed renamable $r[[REGSI]], 76, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGLL2ADDR:[0-9]+]] = LWZtoc @ll2, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGLL2A:[0-9]+]] = LWZ 0, renamable $r[[REGLL2ADDR]] :: (dereferenceable load (s32) from @ll2, align 8) +; 32BIT-DAG: renamable $r[[REGLL2B:[0-9]+]] = LWZ 4, killed renamable $r[[REGLL2ADDR]] :: (dereferenceable load (s32) from @ll2 + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REGLL2A]], 80, $r1 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r[[REGLL2B]], 84, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGUCADDR:[0-9]+]] = LWZtoc @uc1, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGUC:[0-9]+]] = LBZ 0, killed renamable $r[[REGUCADDR]] :: (dereferenceable load (s8) from @uc1) +; 32BIT-DAG: STW killed renamable $r[[REGUC]], 88, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGIADDR:[0-9]+]] = LWZtoc @i1, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REGI:[0-9]+]] = LWZ 0, killed renamable $r[[REGIADDR]] :: (dereferenceable load (s32) from @i1) +; 32BIT-DAG: STW killed renamable $r[[REGI]], 92, $r1 :: (store (s32)) ; 32BIT-DAG: ADJCALLSTACKDOWN 96, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def dead $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 96, 0, implicit-def dead $r1, implicit $r1 @@ -1522,30 +1522,30 @@ entry: ; 64BIT-DAG: $x8 = LI8 6 ; 64BIT-DAG: $x9 = LI8 7 ; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: renamable $x[[REGLL1ADDR:[0-9]+]] = LDtoc @ll1, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGLL1:[0-9]+]] = LD 0, killed renamable $x[[REGLL1ADDR]] :: (dereferenceable load 8 from @ll1) -; 64BIT-DAG: STD killed renamable $x[[REGLL1]], 112, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @si1, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LHA8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load 2 from @si1) -; 64BIT-DAG: STD killed renamable $x[[REGSI]], 120, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGCHADDR:[0-9]+]] = LDtoc @ch, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGCH:[0-9]+]] = LBZ8 0, killed renamable $x[[REGCHADDR]] :: (dereferenceable load 1 from @ch) -; 64BIT-DAG: STD killed renamable $x[[REGCH]], 128, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGUIADDR:[0-9]+]] = LDtoc @ui, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGUI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGUIADDR]] :: (dereferenceable load 4 from @ui) -; 64BIT-DAG: STD killed renamable $x[[REGUI]], 136, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @sint, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load 4 from @sint) -; 64BIT-DAG: STD killed renamable $x[[REGSI]], 144, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGLL2ADDR:[0-9]+]] = LDtoc @ll2, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGLL2:[0-9]+]] = LD 0, killed renamable $x[[REGLL2ADDR]] :: (dereferenceable load 8 from @ll2) -; 64BIT-DAG: STD killed renamable $x[[REGLL2]], 152, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGUCADDR:[0-9]+]] = LDtoc @uc1, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGUC:[0-9]+]] = LBZ8 0, killed renamable $x[[REGUCADDR]] :: (dereferenceable load 1 from @uc1) -; 64BIT-DAG: STD killed renamable $x[[REGUC]], 160, $x1 :: (store 8) -; 64BIT-DAG: renamable $x[[REGIADDR:[0-9]+]] = LDtoc @i1, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGIADDR]] :: (dereferenceable load 4 from @i1) -; 64BIT-DAG: STD killed renamable $x[[REGI]], 168, $x1 :: (store 8) +; 64BIT-DAG: renamable $x[[REGLL1ADDR:[0-9]+]] = LDtoc @ll1, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGLL1:[0-9]+]] = LD 0, killed renamable $x[[REGLL1ADDR]] :: (dereferenceable load (s64) from @ll1) +; 64BIT-DAG: STD killed renamable $x[[REGLL1]], 112, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @si1, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LHA8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load (s16) from @si1) +; 64BIT-DAG: STD killed renamable $x[[REGSI]], 120, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGCHADDR:[0-9]+]] = LDtoc @ch, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGCH:[0-9]+]] = LBZ8 0, killed renamable $x[[REGCHADDR]] :: (dereferenceable load (s8) from @ch) +; 64BIT-DAG: STD killed renamable $x[[REGCH]], 128, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGUIADDR:[0-9]+]] = LDtoc @ui, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGUI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGUIADDR]] :: (dereferenceable load (s32) from @ui) +; 64BIT-DAG: STD killed renamable $x[[REGUI]], 136, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGSIADDR:[0-9]+]] = LDtoc @sint, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGSI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGSIADDR]] :: (dereferenceable load (s32) from @sint) +; 64BIT-DAG: STD killed renamable $x[[REGSI]], 144, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGLL2ADDR:[0-9]+]] = LDtoc @ll2, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGLL2:[0-9]+]] = LD 0, killed renamable $x[[REGLL2ADDR]] :: (dereferenceable load (s64) from @ll2) +; 64BIT-DAG: STD killed renamable $x[[REGLL2]], 152, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGUCADDR:[0-9]+]] = LDtoc @uc1, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGUC:[0-9]+]] = LBZ8 0, killed renamable $x[[REGUCADDR]] :: (dereferenceable load (s8) from @uc1) +; 64BIT-DAG: STD killed renamable $x[[REGUC]], 160, $x1 :: (store (s64)) +; 64BIT-DAG: renamable $x[[REGIADDR:[0-9]+]] = LDtoc @i1, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGI:[0-9]+]] = LWZ8 0, killed renamable $x[[REGIADDR]] :: (dereferenceable load (s32) from @i1) +; 64BIT-DAG: STD killed renamable $x[[REGI]], 168, $x1 :: (store (s64)) ; 64BIT-DAG: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 ; 64BIT-NEXT: ADJCALLSTACKUP 176, 0, implicit-def dead $r1, implicit $r1 @@ -1656,17 +1656,17 @@ define void @test_i1_stack(i32 %a, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 % ; 32BIT-DAG: - { id: 0, type: default, offset: 59, size: 1 ; 32BIT-DAG: body: | ; 32BIT-DAG: bb.0.entry: -; 32BIT-DAG: renamable $r[[REGB:[0-9]+]] = LBZ 0, %fixed-stack.0 :: (load 1 from %fixed-stack.0) -; 32BIT-DAG: renamable $r[[REGBTOC:[0-9]+]] = LWZtoc @globali1, $r2 :: (load 4 from got) -; 32BIT-DAG: STB killed renamable $r[[REGB]], 0, killed renamable $r[[REGBTOC]] :: (store 1 into @globali1) +; 32BIT-DAG: renamable $r[[REGB:[0-9]+]] = LBZ 0, %fixed-stack.0 :: (load (s8) from %fixed-stack.0) +; 32BIT-DAG: renamable $r[[REGBTOC:[0-9]+]] = LWZtoc @globali1, $r2 :: (load (s32) from got) +; 32BIT-DAG: STB killed renamable $r[[REGB]], 0, killed renamable $r[[REGBTOC]] :: (store (s8) into @globali1) ; 64BIT-LABEL: fixedStack: ; 64BIT-DAG: - { id: 0, type: default, offset: 119, size: 1 ; 64BIT-DAG: body: | ; 64BIT-DAG: bb.0.entry: -; 64BIT-DAG: renamable $r[[REGB:[0-9]+]] = LBZ 0, %fixed-stack.0 :: (load 1 from %fixed-stack.0) -; 64BIT-DAG: renamable $x[[REGBTOC:[0-9]+]] = LDtoc @globali1, $x2 :: (load 8 from got) -; 64BIT-DAG: STB killed renamable $r[[SCRATCHREG:[0-9]+]], 0, killed renamable $x[[REGBTOC]] :: (store 1 into @globali1) +; 64BIT-DAG: renamable $r[[REGB:[0-9]+]] = LBZ 0, %fixed-stack.0 :: (load (s8) from %fixed-stack.0) +; 64BIT-DAG: renamable $x[[REGBTOC:[0-9]+]] = LDtoc @globali1, $x2 :: (load (s64) from got) +; 64BIT-DAG: STB killed renamable $r[[SCRATCHREG:[0-9]+]], 0, killed renamable $x[[REGBTOC]] :: (store (s8) into @globali1) ; 64BIT-DAG: BLR8 implicit $lr8, implicit $rm ; CHECKASM-LABEL: test_i1_stack: @@ -1699,7 +1699,7 @@ define void @call_test_i1_stack() { ; 32BIT-DAG: $r9 = LI 7 ; 32BIT-DAG: $r10 = LI 8 ; 32BIT-DAG: renamable $r[[REGBOOLADDR:[0-9]+]] = LI 1 -; 32BIT-DAG: STW killed renamable $r[[REGBOOLADDR]], 56, $r1 :: (store 4) +; 32BIT-DAG: STW killed renamable $r[[REGBOOLADDR]], 56, $r1 :: (store (s32)) ; 32BIT-DAG: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-DAG: ADJCALLSTACKUP 60, 0, implicit-def dead $r1, implicit $r1 @@ -1713,7 +1713,7 @@ define void @call_test_i1_stack() { ; 64BIT-DAG: $x9 = LI8 7 ; 64BIT-DAG: $x10 = LI8 8 ; 64BIT-DAG: renamable $x[[REGBOOLADDR:[0-9]+]] = LI8 1 -; 64BIT-DAG: STD killed renamable $x[[REGBOOLADDR]], 112, $x1 :: (store 8) +; 64BIT-DAG: STD killed renamable $x[[REGBOOLADDR]], 112, $x1 :: (store (s64)) ; 64BIT-DAG: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 ; 64BIT-DAG: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 @@ -1829,92 +1829,92 @@ entry: ; CHECK-LABEL: caller_fpr_stack -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.0, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.1, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.2, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.3, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.4, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.5, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.6, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.7, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.8, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.9, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.10, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.11, $r2 :: (load 4 from got) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 56, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 60, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 64, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 68, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 72, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 76, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 80, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[SCRATCHREG:[0-9]+]], 84, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 88, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 92, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 96, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 100, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 104, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 108, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 112, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 116, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 120, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 124, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 128, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGF1:[0-9]+]] = LWZtoc @f14, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGF1]] :: (load 4 from @f14) -; 32BIT-DAG: STFD killed renamable $f0, 132, $r1 :: (store 8) -; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d15, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f0 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load 8 from @d15) -; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 140, $r1 :: (store 4) -; 32BIT-DAG: renamable $r[[REGF2:[0-9]+]] = LWZtoc @f16, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZ 0, killed renamable $r[[REGF2]] :: (load 4 from @f16) -; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f3 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f4 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f5 = LFS 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 4 from constant-pool) -; 32BIT-DAG: renamable $f6 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f7 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f8 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f9 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.0, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.1, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.2, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.3, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.4, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.5, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.6, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.7, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.8, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.9, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.10, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.11, $r2 :: (load (s32) from got) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 56, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 64, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 72, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 80, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[SCRATCHREG:[0-9]+]], 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 88, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 96, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 104, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 112, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 120, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 128, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGF1:[0-9]+]] = LWZtoc @f14, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGF1]] :: (load (s32) from @f14) +; 32BIT-DAG: STFD killed renamable $f0, 132, $r1 :: (store (s64)) +; 32BIT-DAG: renamable $r[[REGD:[0-9]+]] = LWZtoc @d15, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f0 = LFD 0, killed renamable $r[[REGD]] :: (dereferenceable load (s64) from @d15) +; 32BIT-DAG: STW killed renamable $r[[SCRATCHREG:[0-9]+]], 140, $r1 :: (store (s32)) +; 32BIT-DAG: renamable $r[[REGF2:[0-9]+]] = LWZtoc @f16, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZ 0, killed renamable $r[[REGF2]] :: (load (s32) from @f16) +; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f3 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f4 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f5 = LFS 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s32) from constant-pool) +; 32BIT-DAG: renamable $f6 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f7 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f8 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f9 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) ; 32BIT-DAG: $f10 = COPY renamable $f1 -; 32BIT-DAG: renamable $f11 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f12 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $f13 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) +; 32BIT-DAG: renamable $f11 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f12 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $f13 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 ; 32BIT-NEXT: ADJCALLSTACKUP 144, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.0, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.1, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.2, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.[[SCRATCHREG:[0-9]+]], $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.4, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.5, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.6, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.7, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.8, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.9, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.10, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REGF1:[0-9]+]] = LDtoc @f14, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $r3 = LWZ 0, killed renamable $x[[REGF1]] :: (load 4 from @f14) -; 64BIT-DAG: renamable $x[[REGF2:[0-9]+]] = LDtoc @f16, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $r5 = LWZ 0, killed renamable $x[[REGF2]] :: (load 4 from @f16) -; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d15, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x4 = LD 0, killed renamable $x[[REGD]] :: (load 8 from @d15) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.0, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.1, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.2, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.[[SCRATCHREG:[0-9]+]], $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.4, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.5, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.6, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.7, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.8, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.9, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.10, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REGF1:[0-9]+]] = LDtoc @f14, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $r3 = LWZ 0, killed renamable $x[[REGF1]] :: (load (s32) from @f14) +; 64BIT-DAG: renamable $x[[REGF2:[0-9]+]] = LDtoc @f16, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $r5 = LWZ 0, killed renamable $x[[REGF2]] :: (load (s32) from @f16) +; 64BIT-DAG: renamable $x[[REGD:[0-9]+]] = LDtoc @d15, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x4 = LD 0, killed renamable $x[[REGD]] :: (load (s64) from @d15) ; 64BIT-DAG: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f[[SCRATCHREG:[0-9]+]] = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f4 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f5 = LFS 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 4 from constant-pool) -; 64BIT-DAG: renamable $f6 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f7 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f8 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f9 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) +; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f[[SCRATCHREG:[0-9]+]] = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f4 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f5 = LFS 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s32) from constant-pool) +; 64BIT-DAG: renamable $f6 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f7 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f8 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f9 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) ; 64BIT-DAG: $f10 = COPY renamable $f1 -; 64BIT-DAG: renamable $f11 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f12 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f13 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) +; 64BIT-DAG: renamable $f11 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f12 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f13 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) ; 64BIT-DAG: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def dead $f1 ; 64BIT-NEXT: ADJCALLSTACKUP 176, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -2059,14 +2059,14 @@ define void @caller_mix() { ; CHECK-LABEL: name: caller_mix ; 32BIT-DAG: ADJCALLSTACKDOWN 84, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.0, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.1, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.2, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f3 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.3, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $f4 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.0, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f1 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.1, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f2 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.2, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f3 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LWZtoc %const.3, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $f4 = LFD 0, killed renamable $r[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) ; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 1 ; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 2 ; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LIS 457 @@ -2074,34 +2074,34 @@ define void @caller_mix() { ; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 40 ; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 50 ; 32BIT-DAG: renamable $r[[SCRATCHREG:[0-9]+]] = LI 60 -; 32BIT-DAG: STW killed renamable $r[[REG1:[0-9]+]], 56, $r1 :: (store 4) -; 32BIT-DAG: STW killed renamable $r[[REG2:[0-9]+]], 60, $r1 :: (store 4) -; 32BIT-DAG: STW killed renamable $r[[REG3:[0-9]+]], 64, $r1 :: (store 4) -; 32BIT-DAG: STW killed renamable $r[[REG4:[0-9]+]], 68, $r1 :: (store 4) -; 32BIT-DAG: STW killed renamable $r[[REG5:[0-9]+]], 72, $r1 :: (store 4) -; 32BIT-DAG: STW killed renamable $r[[REG6:[0-9]+]], 76, $r1 :: (store 4) -; 32BIT-DAG: STW killed renamable $r[[REG7:[0-9]+]], 80, $r1 :: (store 4) +; 32BIT-DAG: STW killed renamable $r[[REG1:[0-9]+]], 56, $r1 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r[[REG2:[0-9]+]], 60, $r1 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r[[REG3:[0-9]+]], 64, $r1 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r[[REG4:[0-9]+]], 68, $r1 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r[[REG5:[0-9]+]], 72, $r1 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r[[REG6:[0-9]+]], 76, $r1 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r[[REG7:[0-9]+]], 80, $r1 :: (store (s32)) ; 32BIT-DAG: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $r2, implicit-def $r1, implicit-def dead $r3 ; 32BIT-DAG: ADJCALLSTACKUP 84, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm ; 64BIT-DAG: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.0, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.1, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.2, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.3, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f3 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) -; 64BIT-DAG: renamable $f4 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load 8 from constant-pool) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.0, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.1, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.2, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LDtocCPT %const.3, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $f1 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f3 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) +; 64BIT-DAG: renamable $f4 = LFD 0, killed renamable $x[[SCRATCHREG:[0-9]+]] :: (load (s64) from constant-pool) ; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LI8 50 ; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LI8 60 ; 64BIT-DAG: renamable $x[[SCRATCHREG:[0-9]+]] = LIS8 457 ; 64BIT-DAG: $x7 = LI8 1 ; 64BIT-DAG: $x8 = LI8 2 ; 64BIT-DAG: $x10 = LI8 40 -; 64BIT-DAG: STD killed renamable $x[[REG1:[0-9]+]], 112, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG2:[0-9]+]], 120, $x1 :: (store 8) +; 64BIT-DAG: STD killed renamable $x[[REG1:[0-9]+]], 112, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG2:[0-9]+]], 120, $x1 :: (store (s64)) ; 64BIT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -2242,34 +2242,34 @@ define void @caller_mix() { ; 32BIT-DAG: $r8 = LI 6 ; 32BIT-DAG: $r9 = LI 7 ; 32BIT-DAG: $r10 = LI 8 -; 32BIT-DAG: STW killed renamable $r[[REG1:[0-9]+]], 56, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG2:[0-9]+]], 60, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG3:[0-9]+]], 64, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG4:[0-9]+]], 68, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG5:[0-9]+]], 72, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG6:[0-9]+]], 76, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG7:[0-9]+]], 80, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG8:[0-9]+]], 84, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG9:[0-9]+]], 88, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG10:[0-9]+]], 92, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG11:[0-9]+]], 96, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG12:[0-9]+]], 100, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG13:[0-9]+]], 104, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG14:[0-9]+]], 108, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG15:[0-9]+]], 112, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG16:[0-9]+]], 116, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG17:[0-9]+]], 120, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG18:[0-9]+]], 128, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW renamable $r[[REG19:[0-9]+]], 124, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG20:[0-9]+]], 132, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG21:[0-9]+]], 136, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG22:[0-9]+]], 140, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG23:[0-9]+]], 144, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG24:[0-9]+]], 148, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG25:[0-9]+]], 152, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG26:[0-9]+]], 156, $r1 :: (store 4 into unknown-address + 4, basealign 8) -; 32BIT-DAG: STW killed renamable $r[[REG27:[0-9]+]], 160, $r1 :: (store 4, align 8) -; 32BIT-DAG: STW killed renamable $r[[REG28:[0-9]+]], 164, $r1 :: (store 4 into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG1:[0-9]+]], 56, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG2:[0-9]+]], 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG3:[0-9]+]], 64, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG4:[0-9]+]], 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG5:[0-9]+]], 72, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG6:[0-9]+]], 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG7:[0-9]+]], 80, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG8:[0-9]+]], 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG9:[0-9]+]], 88, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG10:[0-9]+]], 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG11:[0-9]+]], 96, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG12:[0-9]+]], 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG13:[0-9]+]], 104, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG14:[0-9]+]], 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG15:[0-9]+]], 112, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG16:[0-9]+]], 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG17:[0-9]+]], 120, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[REG18:[0-9]+]], 128, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW renamable $r[[REG19:[0-9]+]], 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG20:[0-9]+]], 132, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG21:[0-9]+]], 136, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[REG22:[0-9]+]], 140, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG23:[0-9]+]], 144, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[REG24:[0-9]+]], 148, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG25:[0-9]+]], 152, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[REG26:[0-9]+]], 156, $r1 :: (store (s32) into unknown-address + 4, basealign 8) +; 32BIT-DAG: STW killed renamable $r[[REG27:[0-9]+]], 160, $r1 :: (store (s32), align 8) +; 32BIT-DAG: STW killed renamable $r[[REG28:[0-9]+]], 164, $r1 :: (store (s32) into unknown-address + 4, basealign 8) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 168, 0, implicit-def dead $r1, implicit $r1 @@ -2283,20 +2283,20 @@ define void @caller_mix() { ; 64BIT-DAG: $x8 = LI8 6 ; 64BIT-DAG: $x9 = LI8 7 ; 64BIT-DAG: $x10 = LI8 8 -; 64BIT-DAG: STD killed renamable $x[[REG1:[0-9]+]], 112, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG2:[0-9]+]], 120, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG3:[0-9]+]], 128, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG4:[0-9]+]], 136, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG5:[0-9]+]], 144, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG6:[0-9]+]], 152, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG7:[0-9]+]], 160, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG8:[0-9]+]], 168, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG9:[0-9]+]], 176, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG10:[0-9]+]], 184, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG12:[0-9]+]], 192, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG13:[0-9]+]], 200, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG14:[0-9]+]], 208, $x1 :: (store 8) -; 64BIT-DAG: STD killed renamable $x[[REG15:[0-9]+]], 216, $x1 :: (store 8) +; 64BIT-DAG: STD killed renamable $x[[REG1:[0-9]+]], 112, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG2:[0-9]+]], 120, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG3:[0-9]+]], 128, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG4:[0-9]+]], 136, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG5:[0-9]+]], 144, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG6:[0-9]+]], 152, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG7:[0-9]+]], 160, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG8:[0-9]+]], 168, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG9:[0-9]+]], 176, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG10:[0-9]+]], 184, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG12:[0-9]+]], 192, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG13:[0-9]+]], 200, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG14:[0-9]+]], 208, $x1 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x[[REG15:[0-9]+]], 216, $x1 :: (store (s64)) ; 64BIT-DAG: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def dead $x3 ; 64BIT-NEXT: ADJCALLSTACKUP 224, 0, implicit-def dead $r1, implicit $r1 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll index 3f30bfd0e8370..ac6c25de82d88 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll @@ -83,7 +83,7 @@ entry: ; Confirm the expected memcpy call is independent of the call to test_byval_mem2. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: %0:gprc = nuw ADDI $r1, 56 -; 32BIT-NEXT: %1:gprc = LWZtoc @gS256, $r2 :: (load 4 from got) +; 32BIT-NEXT: %1:gprc = LWZtoc @gS256, $r2 :: (load (s32) from got) ; 32BIT-NEXT: %2:gprc = LI 256 ; 32BIT-DAG: $r3 = COPY %0 ; 32BIT-DAG: $r4 = COPY %1 @@ -115,7 +115,7 @@ entry: ; Confirm the expected memcpy call is independent of the call to test_byval_mem2. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: %0:g8rc = nuw ADDI8 $x1, 112 -; 64BIT-NEXT: %1:g8rc = LDtoc @gS256, $x2 :: (load 8 from got) +; 64BIT-NEXT: %1:g8rc = LDtoc @gS256, $x2 :: (load (s64) from got) ; 64BIT-NEXT: %2:g8rc = LI8 256 ; 64BIT-DAG: $x3 = COPY %0 ; 64BIT-DAG: $x4 = COPY %1 @@ -180,7 +180,7 @@ entry: ; Confirm the expected memcpy call is independent of the call to test_byval_mem3. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: %0:gprc_and_gprc_nor0 = LWZtoc @gS57, $r2 :: (load 4 from got) +; 32BIT-NEXT: %0:gprc_and_gprc_nor0 = LWZtoc @gS57, $r2 :: (load (s32) from got) ; 32BIT-NEXT: %1:gprc = nuw ADDI %0, 24 ; 32BIT-NEXT: %2:gprc = nuw ADDI $r1, 56 ; 32BIT-NEXT: %3:gprc = LI 33 @@ -254,12 +254,12 @@ entry: ; 32BIT-DAG: %5:gprc = COPY $r8 ; 32BIT-DAG: %6:gprc = COPY $r9 ; 32BIT-DAG: %7:gprc = COPY $r10 -; 32BIT-NEXT: STW %2, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0 -; 32BIT-DAG: STW %3, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4 -; 32BIT-DAG: STW %4, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8 -; 32BIT-DAG: STW %5, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12 -; 32BIT-DAG: STW %6, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16 -; 32BIT-DAG: STW %7, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20 +; 32BIT-NEXT: STW %2, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 +; 32BIT-DAG: STW %3, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4 +; 32BIT-DAG: STW %4, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8 +; 32BIT-DAG: STW %5, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12 +; 32BIT-DAG: STW %6, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16 +; 32BIT-DAG: STW %7, 20, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 20 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm ; 64BIT: fixedStack: @@ -274,12 +274,12 @@ entry: ; 64BIT-DAG: %5:g8rc = COPY $x8 ; 64BIT-DAG: %6:g8rc = COPY $x9 ; 64BIT-DAG: %7:g8rc = COPY $x10 -; 64BIT-NEXT: STD %2, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16) -; 64BIT-DAG: STD %3, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8) -; 64BIT-DAG: STD %4, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16, align 16) -; 64BIT-DAG: STD %5, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24) -; 64BIT-DAG: STD %6, 32, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 32, align 16) -; 64BIT-DAG: STD %7, 40, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 40) +; 64BIT-NEXT: STD %2, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) +; 64BIT-DAG: STD %3, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) +; 64BIT-DAG: STD %4, 16, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 16, align 16) +; 64BIT-DAG: STD %5, 24, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 24) +; 64BIT-DAG: STD %6, 32, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 32, align 16) +; 64BIT-DAG: STD %7, 40, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 40) ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm %struct_S31 = type { [31 x i8] } @@ -300,7 +300,7 @@ entry: ; Confirm the expected memcpy call is independent of the call to test_byval_mem4. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: %3:gprc = nuw ADDI $r1, 60 -; 32BIT-NEXT: %4:gprc = LWZtoc @gS256, $r2 :: (load 4 from got) +; 32BIT-NEXT: %4:gprc = LWZtoc @gS256, $r2 :: (load (s32) from got) ; 32BIT-NEXT: %5:gprc = LI 256 ; 32BIT-DAG: $r3 = COPY %3 ; 32BIT-DAG: $r4 = COPY %4 @@ -342,7 +342,7 @@ entry: ; Confirm the expected memcpy call is independent of the call to test_byval_mem4. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: %0:g8rc_and_g8rc_nox0 = LDtoc @gS256, $x2 :: (load 8 from got) +; 64BIT-NEXT: %0:g8rc_and_g8rc_nox0 = LDtoc @gS256, $x2 :: (load (s64) from got) ; 64BIT-NEXT: %1:g8rc = nuw ADDI8 %0, 24 ; 64BIT-NEXT: %2:g8rc = nuw ADDI8 $x1, 112 ; 64BIT-NEXT: %3:g8rc = LI8 232 @@ -407,13 +407,13 @@ entry: ; 32BIT-DAG: %5:gprc = COPY $r8 ; 32BIT-DAG: %6:gprc = COPY $r9 ; 32BIT-DAG: %7:gprc = COPY $r10 -; 32BIT-NEXT: STW %1, 0, %fixed-stack.1 :: (store 4 into %fixed-stack.1 -; 32BIT-DAG: STW %2, 4, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 4 -; 32BIT-DAG: STW %3, 8, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 8 -; 32BIT-DAG: STW %4, 12, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 12 -; 32BIT-DAG: STW %5, 16, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 16 -; 32BIT-DAG: STW %6, 20, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 20 -; 32BIT-DAG: STW %7, 24, %fixed-stack.1 :: (store 4 into %fixed-stack.1 + 24 +; 32BIT-NEXT: STW %1, 0, %fixed-stack.1 :: (store (s32) into %fixed-stack.1 +; 32BIT-DAG: STW %2, 4, %fixed-stack.1 :: (store (s32) into %fixed-stack.1 + 4 +; 32BIT-DAG: STW %3, 8, %fixed-stack.1 :: (store (s32) into %fixed-stack.1 + 8 +; 32BIT-DAG: STW %4, 12, %fixed-stack.1 :: (store (s32) into %fixed-stack.1 + 12 +; 32BIT-DAG: STW %5, 16, %fixed-stack.1 :: (store (s32) into %fixed-stack.1 + 16 +; 32BIT-DAG: STW %6, 20, %fixed-stack.1 :: (store (s32) into %fixed-stack.1 + 20 +; 32BIT-DAG: STW %7, 24, %fixed-stack.1 :: (store (s32) into %fixed-stack.1 + 24 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm ; 64BIT: fixedStack: @@ -431,11 +431,11 @@ entry: ; 64BIT-DAG: %5:g8rc = COPY $x8 ; 64BIT-DAG: %6:g8rc = COPY $x9 ; 64BIT-DAG: %7:g8rc = COPY $x10 -; 64BIT-NEXT: STD %1, 0, %fixed-stack.1 :: (store 8 into %fixed-stack.1 -; 64BIT-DAG: STD %2, 8, %fixed-stack.1 :: (store 8 into %fixed-stack.1 + 8 -; 64BIT-DAG: STD %3, 16, %fixed-stack.1 :: (store 8 into %fixed-stack.1 + 16 -; 64BIT-DAG: STD %4, 24, %fixed-stack.1 :: (store 8 into %fixed-stack.1 + 24 -; 64BIT-DAG: STD %5, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0 -; 64BIT-DAG: STD %6, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8 -; 64BIT-DAG: STD %7, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16 +; 64BIT-NEXT: STD %1, 0, %fixed-stack.1 :: (store (s64) into %fixed-stack.1 +; 64BIT-DAG: STD %2, 8, %fixed-stack.1 :: (store (s64) into %fixed-stack.1 + 8 +; 64BIT-DAG: STD %3, 16, %fixed-stack.1 :: (store (s64) into %fixed-stack.1 + 16 +; 64BIT-DAG: STD %4, 24, %fixed-stack.1 :: (store (s64) into %fixed-stack.1 + 24 +; 64BIT-DAG: STD %5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 +; 64BIT-DAG: STD %6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8 +; 64BIT-DAG: STD %7, 16, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 16 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll index 6372f9ef17d6c..bcefacfee3ba7 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll @@ -37,17 +37,17 @@ entry: ; CHECK32-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 ; CHECK32: renamable $r[[REG1:[0-9]+]] = LWZ 84, %fixed-stack.0 -; CHECK32-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0 -; CHECK32-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4 +; CHECK32-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 +; CHECK32-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4 ; CHECK32: renamable $r[[REG2:[0-9]+]] = LWZ 80, %fixed-stack.0 -; CHECK32-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8 -; CHECK32-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12 -; CHECK32-DAG: STW renamable $r7, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16 -; CHECK32-DAG: STW renamable $r8, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20 -; CHECK32-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24 +; CHECK32-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8 +; CHECK32-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12 +; CHECK32-DAG: STW renamable $r7, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16 +; CHECK32-DAG: STW renamable $r8, 20, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 20 +; CHECK32-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 24 ; CHECK32: renamable $r4 = ADDC killed renamable $r8, killed renamable $r[[REG1]], implicit-def $carry ; CHECK32: renamable $r3 = ADDE killed renamable $r7, killed renamable $r[[REG2]], implicit-def dead $carry, implicit killed $carry -; CHECK32 STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28 +; CHECK32 STW killed renamable $r10, 28, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 28 ; CHECK32: BLR implicit $lr, implicit $rm, implicit $r3, implicit $r4 @@ -69,13 +69,13 @@ entry: ; CHECK64: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 ; CHECK64: renamable $x[[REG1:[0-9]+]] = LD 80, %fixed-stack.0 -; CHECK64: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0 -; CHECK64: STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8 -; CHECK64: STD renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16 -; CHECK64: STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24 -; CHECK64: STD killed renamable $x7, 32, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 32 -; CHECK64: STD killed renamable $x8, 40, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 40 -; CHECK64: STD killed renamable $x9, 48, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 48 +; CHECK64: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 +; CHECK64: STD killed renamable $x4, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8 +; CHECK64: STD renamable $x5, 16, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 16 +; CHECK64: STD killed renamable $x6, 24, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 24 +; CHECK64: STD killed renamable $x7, 32, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 32 +; CHECK64: STD killed renamable $x8, 40, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 40 +; CHECK64: STD killed renamable $x9, 48, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 48 ; CHECK64: renamable $x3 = ADD8 killed renamable $x5, killed renamable $x[[REG1]] -; CHECK64: STD killed renamable $x10, 56, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 56 +; CHECK64: STD killed renamable $x10, 56, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 56 ; CHECK64: BLR8 implicit $lr8, implicit $rm, implicit $x3 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll index e495bb04dba88..b33ec0be95f80 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll @@ -30,8 +30,8 @@ entry: ; CHECK-LABEL: name: call_test_byval_1Byte{{.*}} ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS1, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $r3 = LBZ 0, killed renamable $r[[REG]] :: (load 1) +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS1, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $r3 = LBZ 0, killed renamable $r[[REG]] :: (load (s8)) ; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 24, 0, 7 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 @@ -47,8 +47,8 @@ entry: ; ASM32-NEXT: addi 1, 1, 64 ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS1, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $x3 = LBZ8 0, killed renamable $x[[REG]] :: (load 1) +; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS1, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $x3 = LBZ8 0, killed renamable $x[[REG]] :: (load (s8)) ; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 56, 7 ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 @@ -80,8 +80,8 @@ entry: ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3 -; 32BIT: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 8) -; 32BIT-NEXT: renamable $r3 = LBZ 0, %fixed-stack.0 :: (dereferenceable load 1 +; 32BIT: STW killed renamable $r3, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 8) +; 32BIT-NEXT: renamable $r3 = LBZ 0, %fixed-stack.0 :: (dereferenceable load (s8) ; 32BIT-NEXT: BLR ; 64BIT: fixedStack: @@ -92,8 +92,8 @@ entry: ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 -; 64BIT: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16) -; 64BIT-NEXT: renamable $x3 = LBZ8 0, %fixed-stack.0 :: (dereferenceable load 1 +; 64BIT: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) +; 64BIT-NEXT: renamable $x3 = LBZ8 0, %fixed-stack.0 :: (dereferenceable load (s8) ; CHECKASM-LABEL: .test_byval_1Byte: @@ -122,12 +122,12 @@ entry: ; CHECK-LABEL: name: call_test_byval_2Byte{{.*}} ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; 32BIT: renamable $r[[REG1:[0-9]+]] = LWZtoc @f, $r2 :: (load 4 from got) -; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG1]] :: (dereferenceable load 4 from @f) +; 32BIT: renamable $r[[REG1:[0-9]+]] = LWZtoc @f, $r2 :: (load (s32) from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG1]] :: (dereferenceable load (s32) from @f) ; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-DAG: $r3 = LI 42 -; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LWZtoc @gS2, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REG3:[0-9]+]] = LHZ 0, killed renamable $r[[REG2]] :: (load 2) +; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LWZtoc @gS2, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REG3:[0-9]+]] = LHZ 0, killed renamable $r[[REG2]] :: (load (s16)) ; 32BIT-DAG: renamable $r5 = RLWINM killed renamable $r[[REG3]], 16, 0, 15 ; 32BIT-DAG: $f2 = COPY renamable $f1 ; 32BIT-DAG: $r7 = LI 43 @@ -151,12 +151,12 @@ entry: ; ASM32-NEXT: addi 1, 1, 64 ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. -; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load 4 from @f) +; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load (s32) from @f) ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-DAG: $x3 = LI8 42 -; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LDtoc @gS2, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LHZ8 0, killed renamable $x[[REG2]] :: (load 2) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LDtoc @gS2, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LHZ8 0, killed renamable $x[[REG2]] :: (load (s16)) ; 64BIT-DAG: renamable $x5 = RLDICR killed renamable $x[[REG3]], 48, 15 ; 64BIT-DAG: $f2 = COPY renamable $f1 ; 64BIT-DAG: $x7 = LI8 43 @@ -191,16 +191,16 @@ entry: ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r5 -; 32BIT: STW killed renamable $r5, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 16) -; 32BIT-NEXT: renamable $r3 = LBZ 1, %fixed-stack.0 :: (dereferenceable load 1 +; 32BIT: STW killed renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) +; 32BIT-NEXT: renamable $r3 = LBZ 1, %fixed-stack.0 :: (dereferenceable load (s8) ; 64BIT: fixedStack: ; 64BIT-NEXT: - { id: 0, type: default, offset: 64, size: 8, alignment: 16, stack-id: default, ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x5 -; 64BIT: STD killed renamable $x5, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16) -; 64BIT-NEXT: renamable $x3 = LBZ8 1, %fixed-stack.0 :: (dereferenceable load 1 +; 64BIT: STD killed renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) +; 64BIT-NEXT: renamable $x3 = LBZ8 1, %fixed-stack.0 :: (dereferenceable load (s8) ; CHECKASM-LABEL: .test_byval_2Byte: @@ -233,13 +233,13 @@ entry: ; 32BIT-DAG: $r7 = LI 5 ; 32BIT-DAG: $r8 = LI 6 ; 32BIT-DAG: $r9 = LI 7 -; 32BIT-DAG: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS3, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 0, killed renamable $r[[REGADDR]] :: (load 2) -; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LBZ 2, renamable $r[[REGADDR]] :: (load 1) +; 32BIT-DAG: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS3, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 0, killed renamable $r[[REGADDR]] :: (load (s16)) +; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LBZ 2, renamable $r[[REGADDR]] :: (load (s8)) ; 32BIT-DAG: renamable $r10 = RLWINM killed renamable $r[[REG2]], 8, 16, 23 ; 32BIT-DAG: renamable $r10 = RLWIMI killed renamable $r10, killed renamable $r[[REG1]], 16, 0, 15 ; 32BIT-DAG: renamable $r[[REG3:[0-9]+]] = LI 42 -; 32BIT-DAG: STW killed renamable $r[[REG3]], 56, $r1 :: (store 4) +; 32BIT-DAG: STW killed renamable $r[[REG3]], 56, $r1 :: (store (s32)) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 60, 0, implicit-def dead $r1, implicit $r1 @@ -273,13 +273,13 @@ entry: ; 64BIT-DAG: $x7 = LI8 5 ; 64BIT-DAG: $x8 = LI8 6 ; 64BIT-DAG: $x9 = LI8 7 -; 64BIT-DAG: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS3, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LHZ8 0, killed renamable $x[[REGADDR]] :: (load 2) -; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 2, renamable $x[[REGADDR]] :: (load 1) +; 64BIT-DAG: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS3, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LHZ8 0, killed renamable $x[[REGADDR]] :: (load (s16)) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 2, renamable $x[[REGADDR]] :: (load (s8)) ; 64BIT-DAG: renamable $x10 = RLDIC killed renamable $x[[REG2]], 40, 16 ; 64BIT-DAG: renamable $x10 = RLDIMI killed renamable $x10, killed renamable $x[[REG1]], 48, 0 ; 64BIT-DAG: $x[[REG3:[0-9]+]] = LI8 42 -; 64BIT-DAG: STD killed renamable $x[[REG3]], 112, $x1 :: (store 8) +; 64BIT-DAG: STD killed renamable $x[[REG3]], 112, $x1 :: (store (s64)) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit $x10, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 @@ -318,8 +318,8 @@ entry: ; 32BIT-LABEL: bb.0.entry: ; 32BIT-NEXT: liveins: $r10 -; 32BIT: STW killed renamable $r10, 0, %fixed-stack.1 :: (store 4 into %fixed-stack.1) -; 32BIT-NEXT: renamable $r3 = LHZ 1, %fixed-stack.1 :: (dereferenceable load 2 +; 32BIT: STW killed renamable $r10, 0, %fixed-stack.1 :: (store (s32) into %fixed-stack.1) +; 32BIT-NEXT: renamable $r3 = LHZ 1, %fixed-stack.1 :: (dereferenceable load (s16) ; 64BIT: fixedStack: ; 64BIT-NEXT: - { id: 0, type: default, offset: 116, size: 4, alignment: 4, stack-id: default, @@ -327,8 +327,8 @@ entry: ; 64BIT-LABEL: bb.0.entry: ; 64BIT-NEXT: liveins: $x10 -; 64BIT: STD killed renamable $x10, 0, %fixed-stack.1 :: (store 8 into %fixed-stack.1) -; 64BIT-NEXT: renamable $x3 = LHZ8 1, %fixed-stack.1 :: (dereferenceable load 2 +; 64BIT: STD killed renamable $x10, 0, %fixed-stack.1 :: (store (s64) into %fixed-stack.1) +; 64BIT-NEXT: renamable $x3 = LHZ8 1, %fixed-stack.1 :: (dereferenceable load (s16) ; CHECKASM-LABEL: .test_byval_3Byte: @@ -357,9 +357,9 @@ entry: ; CHECK-LABEL: name: call_test_byval_4Byte{{.*}} ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS4, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REG]] :: (load 4) -; 32BIT-DAG: renamable $r4 = LWZ 0, %stack.1.s4a :: (load 4 from %stack.1.s4a, align 8) +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS4, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REG]] :: (load (s32)) +; 32BIT-DAG: renamable $r4 = LWZ 0, %stack.1.s4a :: (load (s32) from %stack.1.s4a, align 8) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 @@ -374,9 +374,9 @@ entry: ; ASM32-NEXT: addi 1, 1, 80 ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS4, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[LD1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4) -; 64BIT-DAG: renamable $x[[LD2:[0-9]+]] = LWZ8 0, %stack.1.s4a :: (load 4 from %stack.1.s4a, align 8) +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS4, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[LD1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load (s32)) +; 64BIT-DAG: renamable $x[[LD2:[0-9]+]] = LWZ8 0, %stack.1.s4a :: (load (s32) from %stack.1.s4a, align 8) ; 64BIT-DAG: renamable $x3 = RLDICR killed renamable $x[[LD1]], 32, 31 ; 64BIT-DAG: renamable $x4 = RLDICR killed renamable $x[[LD2]], 32, 31 ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x2, implicit-def $r1 @@ -416,8 +416,8 @@ entry: ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3 -; 32BIT: STW renamable $r3, 0, %fixed-stack.2 :: (store 4 into %fixed-stack.2, align 8) -; 32BIT-DAG: STW killed renamable $r4, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0) +; 32BIT: STW renamable $r3, 0, %fixed-stack.2 :: (store (s32) into %fixed-stack.2, align 8) +; 32BIT-DAG: STW killed renamable $r4, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0) ; 32BIT-DAG: renamable $r[[SCRATCH:[0-9]+]] = RLWINM killed renamable $r3, 0, 24, 31 ; 32BIT-DAG: renamable $r3 = nsw ADD4 renamable $r4, killed renamable $r[[SCRATCH]] ; 32BIT: BLR @@ -432,10 +432,10 @@ entry: ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3 -; 64BIT: STD killed renamable $x3, 0, %fixed-stack.2 :: (store 8 into %fixed-stack.2, align 16) -; 64BIT-NEXT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0) -; 64BIT-DAG: renamable $r[[SCRATCH1:[0-9]+]] = LBZ 3, %fixed-stack.2 :: (dereferenceable load 1 -; 64BIT-DAG: renamable $r[[SCRATCH2:[0-9]+]] = LWZ 0, %fixed-stack.0 :: (dereferenceable load 4 +; 64BIT: STD killed renamable $x3, 0, %fixed-stack.2 :: (store (s64) into %fixed-stack.2, align 16) +; 64BIT-NEXT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) +; 64BIT-DAG: renamable $r[[SCRATCH1:[0-9]+]] = LBZ 3, %fixed-stack.2 :: (dereferenceable load (s8) +; 64BIT-DAG: renamable $r[[SCRATCH2:[0-9]+]] = LWZ 0, %fixed-stack.0 :: (dereferenceable load (s32) ; 64BIT-NEXT: renamable $r[[SCRATCH3:[0-9]+]] = nsw ADD4 killed renamable $r[[SCRATCH2]], killed renamable $r[[SCRATCH1]] ; 64BIT-NEXT: renamable $x3 = EXTSW_32_64 killed renamable $r[[SCRATCH3]] ; 64BIT-NEXT: BLR8 @@ -475,9 +475,9 @@ declare zeroext i8 @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1) ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS5, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LBZ 4, renamable $r[[REGADDR]] :: (load 1) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4) +; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS5, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LBZ 4, renamable $r[[REGADDR]] :: (load (s8)) +; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load (s32)) ; 32BIT-DAG: renamable $r4 = RLWINM killed renamable $r[[REG1]], 24, 0, 7 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 @@ -495,9 +495,9 @@ declare zeroext i8 @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1) ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS5, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4) -; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 4, renamable $x[[REGADDR]] :: (load 1) +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS5, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load (s32)) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 4, renamable $x[[REGADDR]] :: (load (s8)) ; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 24, 0, 7 ; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0 ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 @@ -530,9 +530,9 @@ declare zeroext i8 @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1) ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS6, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 4, renamable $r[[REGADDR]] :: (load 2) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4) +; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS6, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 4, renamable $r[[REGADDR]] :: (load (s16)) +; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load (s32)) ; 32BIT-DAG: renamable $r4 = RLWINM killed renamable $r[[REG1]], 16, 0, 15 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 @@ -550,9 +550,9 @@ declare zeroext i8 @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1) ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS6, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4) -; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2) +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS6, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load (s32)) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load (s16)) ; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 16, 0, 15 ; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0 ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 @@ -585,10 +585,10 @@ declare zeroext i8 @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1) ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS7, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 4, renamable $r[[REGADDR]] :: (load 2) -; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LBZ 6, renamable $r[[REGADDR]] :: (load 1) +; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS7, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 4, renamable $r[[REGADDR]] :: (load (s16)) +; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LBZ 6, renamable $r[[REGADDR]] :: (load (s8)) ; 32BIT-DAG: renamable $r4 = RLWINM killed renamable $r[[REG2]], 8, 16, 23 ; 32BIT-DAG: renamable $r4 = RLWIMI killed renamable $r4, killed renamable $r[[REG1]], 16, 0, 15 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1 @@ -609,10 +609,10 @@ declare zeroext i8 @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1) ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS7, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4) -; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2) -; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LBZ8 6, renamable $x[[REGADDR]] :: (load 1) +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS7, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load (s32)) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load (s16)) +; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LBZ8 6, renamable $x[[REGADDR]] :: (load (s8)) ; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23 ; 64BIT-DAG: renamable $x3 = RLWIMI8 killed renamable $x3, killed renamable $x[[REG2]], 16, 0, 15 ; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0 @@ -648,9 +648,9 @@ declare zeroext i8 @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1) ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS8, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load 4) +; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS8, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load (s32)) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 @@ -665,8 +665,8 @@ declare zeroext i8 @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1) ; ASM32-NEXT: nop ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS8, $x2 :: (load 8 from got) -; 64BIT-NEXT: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8) +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS8, $x2 :: (load (s64) from got) +; 64BIT-NEXT: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load (s64)) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 @@ -691,15 +691,15 @@ entry: ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS32, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r5 = LWZ 8, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r6 = LWZ 12, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r7 = LWZ 16, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r8 = LWZ 20, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r9 = LWZ 24, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r10 = LWZ 28, renamable $r[[REGADDR]] :: (load 4) +; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS32, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r5 = LWZ 8, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r6 = LWZ 12, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r7 = LWZ 16, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r8 = LWZ 20, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r9 = LWZ 24, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r10 = LWZ 28, renamable $r[[REGADDR]] :: (load (s32)) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 @@ -721,11 +721,11 @@ entry: ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS32, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8) -; 64BIT-DAG: renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load 8) -; 64BIT-DAG: renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load 8) -; 64BIT-DAG: renamable $x6 = LD 24, renamable $x[[REGADDR]] :: (load 8) +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS32, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load (s64)) +; 64BIT-DAG: renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load (s64)) +; 64BIT-DAG: renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load (s64)) +; 64BIT-DAG: renamable $x6 = LD 24, renamable $x[[REGADDR]] :: (load (s64)) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 @@ -756,15 +756,15 @@ entry: ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 -; 32BIT: STW killed renamable $r8, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20 -; 32BIT-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0 -; 32BIT-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4 -; 32BIT-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8 -; 32BIT-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12 -; 32BIT-DAG: STW killed renamable $r7, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16 -; 32BIT: renamable $r3 = LBZ 21, %fixed-stack.0 :: (dereferenceable load 1 -; 32BIT-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24 -; 32BIT-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28 +; 32BIT: STW killed renamable $r8, 20, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 20 +; 32BIT-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 +; 32BIT-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4 +; 32BIT-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8 +; 32BIT-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12 +; 32BIT-DAG: STW killed renamable $r7, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16 +; 32BIT: renamable $r3 = LBZ 21, %fixed-stack.0 :: (dereferenceable load (s8) +; 32BIT-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 24 +; 32BIT-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 28 ; 32BIT: BLR ; 64BIT: fixedStack: @@ -773,11 +773,11 @@ entry: ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 -; 64BIT: STD killed renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16 -; 64BIT-DAG: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0 -; 64BIT-NEXT: renamable $x3 = LBZ8 21, %fixed-stack.0 :: (dereferenceable load 1 -; 64BIT-DAG: STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8 -; 64BIT-DAG: STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24 +; 64BIT: STD killed renamable $x5, 16, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 16 +; 64BIT-DAG: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 +; 64BIT-NEXT: renamable $x3 = LBZ8 21, %fixed-stack.0 :: (dereferenceable load (s8) +; 64BIT-DAG: STD killed renamable $x4, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8 +; 64BIT-DAG: STD killed renamable $x6, 24, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 24 ; 64BIT-NEXT: BLR8 ; ASM-LABEL: .test_byval_32Byte: @@ -815,16 +815,16 @@ entry: ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS31, $r2 :: (load 4 from got) -; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r5 = LWZ 8, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r6 = LWZ 12, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r7 = LWZ 16, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r8 = LWZ 20, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r9 = LWZ 24, renamable $r[[REGADDR]] :: (load 4) -; 32BIT-DAG: renamable $r[[REG:[0-9]+]] = LHZ 28, renamable $r[[REGADDR]] :: (load 2) -; 32BIT-DAG: renamable $r10 = LBZ 30, renamable $r[[REGADDR]] :: (load 1) +; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS31, $r2 :: (load (s32) from got) +; 32BIT-DAG: renamable $r3 = LWZ 0, killed renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r4 = LWZ 4, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r5 = LWZ 8, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r6 = LWZ 12, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r7 = LWZ 16, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r8 = LWZ 20, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r9 = LWZ 24, renamable $r[[REGADDR]] :: (load (s32)) +; 32BIT-DAG: renamable $r[[REG:[0-9]+]] = LHZ 28, renamable $r[[REGADDR]] :: (load (s16)) +; 32BIT-DAG: renamable $r10 = LBZ 30, renamable $r[[REGADDR]] :: (load (s8)) ; 32BIT-DAG: renamable $r10 = RLWINM killed renamable $r10, 8, 16, 23 ; 32BIT-DAG: renamable $r10 = RLWIMI killed renamable $r10, killed renamable $r[[REG]], 16, 0, 15 ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 @@ -851,13 +851,13 @@ entry: ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS31, $x2 :: (load 8 from got) -; 64BIT-DAG: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8) -; 64BIT-DAG: renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load 8) -; 64BIT-DAG: renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load 8) -; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 24, renamable $x[[REGADDR]] :: (load 4) -; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 28, renamable $x[[REGADDR]] :: (load 2) -; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LBZ8 30, renamable $x[[REGADDR]] :: (load 1) +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS31, $x2 :: (load (s64) from got) +; 64BIT-DAG: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load (s64)) +; 64BIT-DAG: renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load (s64)) +; 64BIT-DAG: renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load (s64)) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 24, renamable $x[[REGADDR]] :: (load (s32)) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 28, renamable $x[[REGADDR]] :: (load (s16)) +; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LBZ8 30, renamable $x[[REGADDR]] :: (load (s8)) ; 64BIT-DAG: renamable $x6 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23 ; 64BIT-DAG: renamable $x6 = RLWIMI8 killed renamable $x6, killed renamable $x[[REG2]], 16, 0, 15 ; 64BIT-DAG: renamable $x6 = RLDIMI killed renamable $x6, killed renamable $x[[REG1]], 32, 0 @@ -895,15 +895,15 @@ entry: ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 -; 32BIT-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0 -; 32BIT-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4 -; 32BIT-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8 -; 32BIT-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12 -; 32BIT-DAG: STW killed renamable $r7, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16 -; 32BIT-DAG: STW killed renamable $r8, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20 -; 32BIT-NEXT: renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load 8 -; 32BIT-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24 -; 32BIT-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28 +; 32BIT-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 +; 32BIT-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4 +; 32BIT-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8 +; 32BIT-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12 +; 32BIT-DAG: STW killed renamable $r7, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16 +; 32BIT-DAG: STW killed renamable $r8, 20, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 20 +; 32BIT-NEXT: renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load (s64) +; 32BIT-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 24 +; 32BIT-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 28 ; 32BIT-NEXT: BLR ; 64BIT: fixedStack: @@ -912,11 +912,11 @@ entry: ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 -; 64BIT-DAG: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0 -; 64BIT-DAG: STD killed renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16 -; 64BIT-NEXT: renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load 8 -; 64BIT-DAG: STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8 -; 64BIT-DAG: STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24 +; 64BIT-DAG: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 +; 64BIT-DAG: STD killed renamable $x5, 16, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 16 +; 64BIT-NEXT: renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load (s64) +; 64BIT-DAG: STD killed renamable $x4, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8 +; 64BIT-DAG: STD killed renamable $x6, 24, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 24 ; 64BIT-NEXT: BLR8 ; ASM32-LABEL: .test_byval_31Byte: @@ -957,9 +957,9 @@ declare i32 @test_byval_homogeneous_float_struct(%struct.F* byval(%struct.F) ali ; CHECK-LABEL: name: call_test_byval_homogeneous_float_struct{{.*}} ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT-DAG: renamable $r3 = LWZ 0, %stack.0.s :: (load 4 from %stack.0.s, align 8) -; 32BIT-DAG: renamable $r4 = LWZ 4, %stack.0.s :: (load 4 from %stack.0.s + 4) -; 32BIT-DAG: renamable $r5 = LWZ 8, %stack.0.s :: (load 4 from %stack.0.s + 8, align 8) +; 32BIT-DAG: renamable $r3 = LWZ 0, %stack.0.s :: (load (s32) from %stack.0.s, align 8) +; 32BIT-DAG: renamable $r4 = LWZ 4, %stack.0.s :: (load (s32) from %stack.0.s + 4) +; 32BIT-DAG: renamable $r5 = LWZ 8, %stack.0.s :: (load (s32) from %stack.0.s + 8, align 8) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r2, implicit-def $r1, implicit-def $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 @@ -974,8 +974,8 @@ declare i32 @test_byval_homogeneous_float_struct(%struct.F* byval(%struct.F) ali ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT-DAG: renamable $x3 = LD 0, %stack.0.s :: (load 8 from %stack.0.s) -; 64BIT-DAG: renamable $x4 = LWZ8 8, %stack.0.s :: (load 4 from %stack.0.s + 8, align 8) +; 64BIT-DAG: renamable $x3 = LD 0, %stack.0.s :: (load (s64) from %stack.0.s) +; 64BIT-DAG: renamable $x4 = LWZ8 8, %stack.0.s :: (load (s32) from %stack.0.s + 8, align 8) ; 64BIT-DAG: renamable $x4 = RLDICR killed renamable $x4, 32, 31 ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x2, implicit-def $r1, implicit-def $x3 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 diff --git a/llvm/test/CodeGen/PowerPC/aix-csr-vector-extabi.ll b/llvm/test/CodeGen/PowerPC/aix-csr-vector-extabi.ll index 744ec2f63acb7..0fd811f82b818 100644 --- a/llvm/test/CodeGen/PowerPC/aix-csr-vector-extabi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-csr-vector-extabi.ll @@ -37,15 +37,15 @@ entry: ; MIR32: liveins: $v20, $v26, $v31 -; MIR32-DAG: STXVD2X killed $v20, $r1, killed $r{{[0-9]+}} :: (store 16 into %fixed-stack.2) -; MIR32-DAG: STXVD2X killed $v26, $r1, killed $r{{[0-9]+}} :: (store 16 into %fixed-stack.1) -; MIR32-DAG: STXVD2X killed $v31, $r1, killed $r{{[0-9]+}} :: (store 16 into %fixed-stack.0) +; MIR32-DAG: STXVD2X killed $v20, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.2) +; MIR32-DAG: STXVD2X killed $v26, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.1) +; MIR32-DAG: STXVD2X killed $v31, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.0) ; MIR32: INLINEASM -; MIR32-DAG: $v20 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load 16 from %fixed-stack.2) -; MIR32-DAG: $v26 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load 16 from %fixed-stack.1) -; MIR32-DAG: $v31 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load 16 from %fixed-stack.0) +; MIR32-DAG: $v20 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.2) +; MIR32-DAG: $v26 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.1) +; MIR32-DAG: $v31 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.0) ; MIR32: BLR implicit $lr, implicit $rm ; MIR64: name: vec_regs @@ -64,15 +64,15 @@ entry: ; MIR64: liveins: $v20, $v26, $v31 -; MIR64-DAG: STXVD2X killed $v20, $x1, killed $x{{[0-9]+}} :: (store 16 into %fixed-stack.2) -; MIR64-DAG: STXVD2X killed $v26, $x1, killed $x{{[0-9]+}} :: (store 16 into %fixed-stack.1) -; MIR64-DAG: STXVD2X killed $v31, $x1, killed $x{{[0-9]+}} :: (store 16 into %fixed-stack.0) +; MIR64-DAG: STXVD2X killed $v20, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.2) +; MIR64-DAG: STXVD2X killed $v26, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.1) +; MIR64-DAG: STXVD2X killed $v31, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.0) ; MIR64: INLINEASM -; MIR64-DAG: $v20 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load 16 from %fixed-stack.2) -; MIR64-DAG: $v26 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load 16 from %fixed-stack.1) -; MIR64-DAG: $v31 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load 16 from %fixed-stack.0) +; MIR64-DAG: $v20 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.2) +; MIR64-DAG: $v26 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.1) +; MIR64-DAG: $v31 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.0) ; MIR64: BLR8 implicit $lr8, implicit $rm @@ -149,33 +149,33 @@ define dso_local void @fprs_gprs_vecregs() { ; MIR32: liveins: $r14, $r25, $r31, $f14, $f21, $f31, $v20, $v26, $v31 -; MIR32-DAG: STW killed $r14, 232, $r1 :: (store 4 into %fixed-stack.8, align 8) -; MIR32-DAG: STW killed $r25, 276, $r1 :: (store 4 into %fixed-stack.7) -; MIR32-DAG: STW killed $r31, 300, $r1 :: (store 4 into %fixed-stack.6) -; MIR32-DAG: STFD killed $f14, 304, $r1 :: (store 8 into %fixed-stack.5, align 16) -; MIR32-DAG: STFD killed $f21, 360, $r1 :: (store 8 into %fixed-stack.4) -; MIR32-DAG: STFD killed $f31, 440, $r1 :: (store 8 into %fixed-stack.3) +; MIR32-DAG: STW killed $r14, 232, $r1 :: (store (s32) into %fixed-stack.8, align 8) +; MIR32-DAG: STW killed $r25, 276, $r1 :: (store (s32) into %fixed-stack.7) +; MIR32-DAG: STW killed $r31, 300, $r1 :: (store (s32) into %fixed-stack.6) +; MIR32-DAG: STFD killed $f14, 304, $r1 :: (store (s64) into %fixed-stack.5, align 16) +; MIR32-DAG: STFD killed $f21, 360, $r1 :: (store (s64) into %fixed-stack.4) +; MIR32-DAG: STFD killed $f31, 440, $r1 :: (store (s64) into %fixed-stack.3) ; MIR32-DAG: $r{{[0-9]+}} = LI 32 -; MIR32-DAG: STXVD2X killed $v20, $r1, killed $r{{[0-9]+}} :: (store 16 into %fixed-stack.2) +; MIR32-DAG: STXVD2X killed $v20, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.2) ; MIR32-DAG: $r{{[0-9]+}} = LI 128 -; MIR32-DAG: STXVD2X killed $v26, $r1, killed $r{{[0-9]+}} :: (store 16 into %fixed-stack.1) +; MIR32-DAG: STXVD2X killed $v26, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.1) ; MIR32-DAG: $r{{[0-9]+}} = LI 208 -; MIR32-DAG: STXVD2X killed $v31, $r1, killed $r{{[0-9]+}} :: (store 16 into %fixed-stack.0) +; MIR32-DAG: STXVD2X killed $v31, $r1, killed $r{{[0-9]+}} :: (store (s128) into %fixed-stack.0) ; MIR32-DAG: $r1 = STWU $r1, -448, $r1 ; MIR32: INLINEASM -; MIR32-DAG: $r14 = LWZ 232, $r1 :: (load 4 from %fixed-stack.8, align 8) -; MIR32-DAG: $r25 = LWZ 276, $r1 :: (load 4 from %fixed-stack.7) -; MIR32-DAG: $r31 = LWZ 300, $r1 :: (load 4 from %fixed-stack.6) -; MIR32-DAG: $f14 = LFD 304, $r1 :: (load 8 from %fixed-stack.5, align 16) -; MIR32-DAG: $f21 = LFD 360, $r1 :: (load 8 from %fixed-stack.4) -; MIR32-DAG: $f31 = LFD 440, $r1 :: (load 8 from %fixed-stack.3) -; MIR32-DAG: $v20 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load 16 from %fixed-stack.2) +; MIR32-DAG: $r14 = LWZ 232, $r1 :: (load (s32) from %fixed-stack.8, align 8) +; MIR32-DAG: $r25 = LWZ 276, $r1 :: (load (s32) from %fixed-stack.7) +; MIR32-DAG: $r31 = LWZ 300, $r1 :: (load (s32) from %fixed-stack.6) +; MIR32-DAG: $f14 = LFD 304, $r1 :: (load (s64) from %fixed-stack.5, align 16) +; MIR32-DAG: $f21 = LFD 360, $r1 :: (load (s64) from %fixed-stack.4) +; MIR32-DAG: $f31 = LFD 440, $r1 :: (load (s64) from %fixed-stack.3) +; MIR32-DAG: $v20 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.2) ; MIR32-DAG: $r{{[0-9]+}} = LI 32 -; MIR32-DAG: $v26 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load 16 from %fixed-stack.1) +; MIR32-DAG: $v26 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.1) ; MIR32-DAG: $r{{[0-9]+}} = LI 128 -; MIR32-DAG: $v31 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load 16 from %fixed-stack.0) +; MIR32-DAG: $v31 = LXVD2X $r1, killed $r{{[0-9]+}} :: (load (s128) from %fixed-stack.0) ; MIR32-DAG: $r{{[0-9]+}} = LI 208 ; MIR32-DAG: $r1 = ADDI $r1, 448 ; MIR32-DAG: BLR implicit $lr, implicit $rm @@ -215,32 +215,32 @@ define dso_local void @fprs_gprs_vecregs() { ; MIR64: liveins: $x14, $x25, $x31, $f14, $f21, $f31, $v20, $v26, $v31 ; MIR64-DAG: $x1 = STDU $x1, -544, $x1 -; MIR64-DAG: STD killed $x14, 256, $x1 :: (store 8 into %fixed-stack.8, align 16) -; MIR64-DAG: STD killed $x25, 344, $x1 :: (store 8 into %fixed-stack.7) -; MIR64-DAG: STD killed $x31, 392, $x1 :: (store 8 into %fixed-stack.6) -; MIR64-DAG: STFD killed $f14, 400, $x1 :: (store 8 into %fixed-stack.5, align 16) -; MIR64-DAG: STFD killed $f21, 456, $x1 :: (store 8 into %fixed-stack.4) -; MIR64-DAG: STFD killed $f31, 536, $x1 :: (store 8 into %fixed-stack.3) +; MIR64-DAG: STD killed $x14, 256, $x1 :: (store (s64) into %fixed-stack.8, align 16) +; MIR64-DAG: STD killed $x25, 344, $x1 :: (store (s64) into %fixed-stack.7) +; MIR64-DAG: STD killed $x31, 392, $x1 :: (store (s64) into %fixed-stack.6) +; MIR64-DAG: STFD killed $f14, 400, $x1 :: (store (s64) into %fixed-stack.5, align 16) +; MIR64-DAG: STFD killed $f21, 456, $x1 :: (store (s64) into %fixed-stack.4) +; MIR64-DAG: STFD killed $f31, 536, $x1 :: (store (s64) into %fixed-stack.3) ; MIR64-DAG: $x{{[0-9]+}} = LI8 64 -; MIR64-DAG: STXVD2X killed $v20, $x1, killed $x{{[0-9]+}} :: (store 16 into %fixed-stack.2) +; MIR64-DAG: STXVD2X killed $v20, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.2) ; MIR64-DAG: $x{{[0-9]+}} = LI8 160 -; MIR64-DAG: STXVD2X killed $v26, $x1, killed $x{{[0-9]+}} :: (store 16 into %fixed-stack.1) +; MIR64-DAG: STXVD2X killed $v26, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.1) ; MIR64-DAG: $x{{[0-9]+}} = LI8 240 -; MIR64-DAG: STXVD2X killed $v31, $x1, killed $x{{[0-9]+}} :: (store 16 into %fixed-stack.0) +; MIR64-DAG: STXVD2X killed $v31, $x1, killed $x{{[0-9]+}} :: (store (s128) into %fixed-stack.0) ; MIR64: INLINEASM -; MIR64-DAG: $x14 = LD 256, $x1 :: (load 8 from %fixed-stack.8, align 16) -; MIR64-DAG: $x25 = LD 344, $x1 :: (load 8 from %fixed-stack.7) -; MIR64-DAG: $x31 = LD 392, $x1 :: (load 8 from %fixed-stack.6) -; MIR64-DAG: $f14 = LFD 400, $x1 :: (load 8 from %fixed-stack.5, align 16) -; MIR64-DAG: $f21 = LFD 456, $x1 :: (load 8 from %fixed-stack.4) -; MIR64-DAG: $f31 = LFD 536, $x1 :: (load 8 from %fixed-stack.3) -; MIR64-DAG: $v20 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load 16 from %fixed-stack.2) +; MIR64-DAG: $x14 = LD 256, $x1 :: (load (s64) from %fixed-stack.8, align 16) +; MIR64-DAG: $x25 = LD 344, $x1 :: (load (s64) from %fixed-stack.7) +; MIR64-DAG: $x31 = LD 392, $x1 :: (load (s64) from %fixed-stack.6) +; MIR64-DAG: $f14 = LFD 400, $x1 :: (load (s64) from %fixed-stack.5, align 16) +; MIR64-DAG: $f21 = LFD 456, $x1 :: (load (s64) from %fixed-stack.4) +; MIR64-DAG: $f31 = LFD 536, $x1 :: (load (s64) from %fixed-stack.3) +; MIR64-DAG: $v20 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.2) ; MIR64-DAG: $x{{[0-9]+}} = LI8 64 -; MIR64-DAG: $v26 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load 16 from %fixed-stack.1) +; MIR64-DAG: $v26 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.1) ; MIR64-DAG: $x{{[0-9]+}} = LI8 160 -; MIR64-DAG: $v31 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load 16 from %fixed-stack.0) +; MIR64-DAG: $v31 = LXVD2X $x1, killed $x{{[0-9]+}} :: (load (s128) from %fixed-stack.0) ; MIR64-DAG: $x{{[0-9]+}} = LI8 240 ; MIR64-DAG: $x1 = ADDI8 $x1, 544 ; MIR64-DAG: BLR8 implicit $lr8, implicit $rm diff --git a/llvm/test/CodeGen/PowerPC/aix-csr-vector.ll b/llvm/test/CodeGen/PowerPC/aix-csr-vector.ll index 05b8d31bb6a5c..47210628e2692 100644 --- a/llvm/test/CodeGen/PowerPC/aix-csr-vector.ll +++ b/llvm/test/CodeGen/PowerPC/aix-csr-vector.ll @@ -76,24 +76,24 @@ define dso_local void @fprs_gprs_vecregs() { ; MIR32-NOT: STXVD2X killed $v20 ; MIR32-NOT: STXVD2X killed $v26 ; MIR32-NOT: STXVD2X killed $v31 -; MIR32-DAG: STW killed $r14, -216, $r1 :: (store 4 into %fixed-stack.5, align 8) -; MIR32-DAG: STW killed $r25, -172, $r1 :: (store 4 into %fixed-stack.4) -; MIR32-DAG: STW killed $r31, -148, $r1 :: (store 4 into %fixed-stack.3) -; MIR32-DAG: STFD killed $f14, -144, $r1 :: (store 8 into %fixed-stack.2, align 16) -; MIR32-DAG: STFD killed $f21, -88, $r1 :: (store 8 into %fixed-stack.1) -; MIR32-DAG: STFD killed $f31, -8, $r1 :: (store 8 into %fixed-stack.0) +; MIR32-DAG: STW killed $r14, -216, $r1 :: (store (s32) into %fixed-stack.5, align 8) +; MIR32-DAG: STW killed $r25, -172, $r1 :: (store (s32) into %fixed-stack.4) +; MIR32-DAG: STW killed $r31, -148, $r1 :: (store (s32) into %fixed-stack.3) +; MIR32-DAG: STFD killed $f14, -144, $r1 :: (store (s64) into %fixed-stack.2, align 16) +; MIR32-DAG: STFD killed $f21, -88, $r1 :: (store (s64) into %fixed-stack.1) +; MIR32-DAG: STFD killed $f31, -8, $r1 :: (store (s64) into %fixed-stack.0) ; MIR32-LABEL: INLINEASM ; MIR32-NOT: $v20 = LXVD2X ; MIR32-NOT: $v26 = LXVD2X ; MIR32-NOT: $v31 = LXVD2X -; MIR32-DAG: $r14 = LWZ -216, $r1 :: (load 4 from %fixed-stack.5, align 8) -; MIR32-DAG: $r25 = LWZ -172, $r1 :: (load 4 from %fixed-stack.4) -; MIR32-DAG: $r31 = LWZ -148, $r1 :: (load 4 from %fixed-stack.3) -; MIR32-DAG: $f14 = LFD -144, $r1 :: (load 8 from %fixed-stack.2, align 16) -; MIR32-DAG: $f21 = LFD -88, $r1 :: (load 8 from %fixed-stack.1) -; MIR32-DAG: $f31 = LFD -8, $r1 :: (load 8 from %fixed-stack.0) +; MIR32-DAG: $r14 = LWZ -216, $r1 :: (load (s32) from %fixed-stack.5, align 8) +; MIR32-DAG: $r25 = LWZ -172, $r1 :: (load (s32) from %fixed-stack.4) +; MIR32-DAG: $r31 = LWZ -148, $r1 :: (load (s32) from %fixed-stack.3) +; MIR32-DAG: $f14 = LFD -144, $r1 :: (load (s64) from %fixed-stack.2, align 16) +; MIR32-DAG: $f21 = LFD -88, $r1 :: (load (s64) from %fixed-stack.1) +; MIR32-DAG: $f31 = LFD -8, $r1 :: (load (s64) from %fixed-stack.0) ; MIR32-DAG: BLR implicit $lr, implicit $rm ; MIR64-LABEL: name: fprs_gprs_vecregs @@ -105,24 +105,24 @@ define dso_local void @fprs_gprs_vecregs() { ; MIR64-NOT: STXVD2X killed $v20 ; MIR64-NOT: STXVD2X killed $v26 ; MIR64-NOT: STXVD2X killed $v31 -; MIR64-DAG: STD killed $x14, -288, $x1 :: (store 8 into %fixed-stack.5, align 16) -; MIR64-DAG: STD killed $x25, -200, $x1 :: (store 8 into %fixed-stack.4) -; MIR64-DAG: STD killed $x31, -152, $x1 :: (store 8 into %fixed-stack.3) -; MIR64-DAG: STFD killed $f14, -144, $x1 :: (store 8 into %fixed-stack.2, align 16) -; MIR64-DAG: STFD killed $f21, -88, $x1 :: (store 8 into %fixed-stack.1) -; MIR64-DAG: STFD killed $f31, -8, $x1 :: (store 8 into %fixed-stack.0) +; MIR64-DAG: STD killed $x14, -288, $x1 :: (store (s64) into %fixed-stack.5, align 16) +; MIR64-DAG: STD killed $x25, -200, $x1 :: (store (s64) into %fixed-stack.4) +; MIR64-DAG: STD killed $x31, -152, $x1 :: (store (s64) into %fixed-stack.3) +; MIR64-DAG: STFD killed $f14, -144, $x1 :: (store (s64) into %fixed-stack.2, align 16) +; MIR64-DAG: STFD killed $f21, -88, $x1 :: (store (s64) into %fixed-stack.1) +; MIR64-DAG: STFD killed $f31, -8, $x1 :: (store (s64) into %fixed-stack.0) ; MIR64-LABEL: INLINEASM ; MIR64-NOT: $v20 = LXVD2X ; MIR64-NOT: $v26 = LXVD2X ; MIR64-NOT: $v31 = LXVD2X -; MIR64-DAG: $x14 = LD -288, $x1 :: (load 8 from %fixed-stack.5, align 16) -; MIR64-DAG: $x25 = LD -200, $x1 :: (load 8 from %fixed-stack.4) -; MIR64-DAG: $x31 = LD -152, $x1 :: (load 8 from %fixed-stack.3) -; MIR64-DAG: $f14 = LFD -144, $x1 :: (load 8 from %fixed-stack.2, align 16) -; MIR64-DAG: $f21 = LFD -88, $x1 :: (load 8 from %fixed-stack.1) -; MIR64-DAG: $f31 = LFD -8, $x1 :: (load 8 from %fixed-stack.0) +; MIR64-DAG: $x14 = LD -288, $x1 :: (load (s64) from %fixed-stack.5, align 16) +; MIR64-DAG: $x25 = LD -200, $x1 :: (load (s64) from %fixed-stack.4) +; MIR64-DAG: $x31 = LD -152, $x1 :: (load (s64) from %fixed-stack.3) +; MIR64-DAG: $f14 = LFD -144, $x1 :: (load (s64) from %fixed-stack.2, align 16) +; MIR64-DAG: $f21 = LFD -88, $x1 :: (load (s64) from %fixed-stack.1) +; MIR64-DAG: $f31 = LFD -8, $x1 :: (load (s64) from %fixed-stack.0) ; MIR64: BLR8 implicit $lr8, implicit $rm ;; We don't have -ppc-full-reg-names on AIX so can't reliably check-not for diff --git a/llvm/test/CodeGen/PowerPC/aix-csr.ll b/llvm/test/CodeGen/PowerPC/aix-csr.ll index 74da61b0ad67a..7b89bdad9b0e2 100644 --- a/llvm/test/CodeGen/PowerPC/aix-csr.ll +++ b/llvm/test/CodeGen/PowerPC/aix-csr.ll @@ -47,29 +47,29 @@ entry: ; MIR64: liveins: $x3, $x16, $x22, $x30 -; MIR64-DAG: STD killed $x16, -128, $x1 :: (store 8 into %fixed-stack.2, align 16) -; MIR64-DAG: STD killed $x22, -80, $x1 :: (store 8 into %fixed-stack.1, align 16) -; MIR64-DAG: STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.0, align 16) +; MIR64-DAG: STD killed $x16, -128, $x1 :: (store (s64) into %fixed-stack.2, align 16) +; MIR64-DAG: STD killed $x22, -80, $x1 :: (store (s64) into %fixed-stack.1, align 16) +; MIR64-DAG: STD killed $x30, -16, $x1 :: (store (s64) into %fixed-stack.0, align 16) ; MIR64: INLINEASM -; MIR64-DAG: $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.0, align 16) -; MIR64-DAG: $x22 = LD -80, $x1 :: (load 8 from %fixed-stack.1, align 16) -; MIR64-DAG: $x16 = LD -128, $x1 :: (load 8 from %fixed-stack.2, align 16) +; MIR64-DAG: $x30 = LD -16, $x1 :: (load (s64) from %fixed-stack.0, align 16) +; MIR64-DAG: $x22 = LD -80, $x1 :: (load (s64) from %fixed-stack.1, align 16) +; MIR64-DAG: $x16 = LD -128, $x1 :: (load (s64) from %fixed-stack.2, align 16) ; MIR64: BLR8 implicit $lr8, implicit $rm, implicit $x3 ; MIR32: liveins: $r3, $r16, $r22, $r30 -; MIR32-DAG: STW killed $r16, -64, $r1 :: (store 4 into %fixed-stack.2, align 16) -; MIR32-DAG: STW killed $r22, -40, $r1 :: (store 4 into %fixed-stack.1, align 8) -; MIR32-DAG: STW killed $r30, -8, $r1 :: (store 4 into %fixed-stack.0, align 8) +; MIR32-DAG: STW killed $r16, -64, $r1 :: (store (s32) into %fixed-stack.2, align 16) +; MIR32-DAG: STW killed $r22, -40, $r1 :: (store (s32) into %fixed-stack.1, align 8) +; MIR32-DAG: STW killed $r30, -8, $r1 :: (store (s32) into %fixed-stack.0, align 8) ; MIR32: INLINEASM -; MIR32-DAG: $r30 = LWZ -8, $r1 :: (load 4 from %fixed-stack.0, align 8) -; MIR32-DAG: $r22 = LWZ -40, $r1 :: (load 4 from %fixed-stack.1, align 8) -; MIR32-DAG: $r16 = LWZ -64, $r1 :: (load 4 from %fixed-stack.2, align 16) +; MIR32-DAG: $r30 = LWZ -8, $r1 :: (load (s32) from %fixed-stack.0, align 8) +; MIR32-DAG: $r22 = LWZ -40, $r1 :: (load (s32) from %fixed-stack.1, align 8) +; MIR32-DAG: $r16 = LWZ -64, $r1 :: (load (s32) from %fixed-stack.2, align 16) ; MIR32: BLR implicit $lr, implicit $rm, implicit $r3 @@ -161,24 +161,24 @@ define dso_local double @fprs_and_gprs(i32 signext %i) { ; MIR64: $x0 = MFLR8 implicit $lr8 ; MIR64-NEXT: STD killed $x0, 16, $x1 ; MIR64-NEXT: $x1 = STDU $x1, -400, $x1 -; MIR64-DAG: STD killed $x14, 112, $x1 :: (store 8 into %fixed-stack.6, align 16) -; MIR64-DAG: STD killed $x25, 200, $x1 :: (store 8 into %fixed-stack.5) -; MIR64-DAG: STD killed $x31, 248, $x1 :: (store 8 into %fixed-stack.4) -; MIR64-DAG: STFD killed $f14, 256, $x1 :: (store 8 into %fixed-stack.3, align 16) -; MIR64-DAG: STFD killed $f19, 296, $x1 :: (store 8 into %fixed-stack.2) -; MIR64-DAG: STFD killed $f21, 312, $x1 :: (store 8 into %fixed-stack.1) -; MIR64-DAG: STFD killed $f31, 392, $x1 :: (store 8 into %fixed-stack.0) +; MIR64-DAG: STD killed $x14, 112, $x1 :: (store (s64) into %fixed-stack.6, align 16) +; MIR64-DAG: STD killed $x25, 200, $x1 :: (store (s64) into %fixed-stack.5) +; MIR64-DAG: STD killed $x31, 248, $x1 :: (store (s64) into %fixed-stack.4) +; MIR64-DAG: STFD killed $f14, 256, $x1 :: (store (s64) into %fixed-stack.3, align 16) +; MIR64-DAG: STFD killed $f19, 296, $x1 :: (store (s64) into %fixed-stack.2) +; MIR64-DAG: STFD killed $f21, 312, $x1 :: (store (s64) into %fixed-stack.1) +; MIR64-DAG: STFD killed $f31, 392, $x1 :: (store (s64) into %fixed-stack.0) ; MIR64: INLINEASM ; MIR64-NEXT: BL8_NOP -; MIR64-DAG: $f31 = LFD 392, $x1 :: (load 8 from %fixed-stack.0) -; MIR64-DAG: $f21 = LFD 312, $x1 :: (load 8 from %fixed-stack.1) -; MIR64-DAG: $f19 = LFD 296, $x1 :: (load 8 from %fixed-stack.2) -; MIR64-DAG: $f14 = LFD 256, $x1 :: (load 8 from %fixed-stack.3, align 16) -; MIR64-DAG: $x31 = LD 248, $x1 :: (load 8 from %fixed-stack.4) -; MIR64-DAG: $x25 = LD 200, $x1 :: (load 8 from %fixed-stack.5) -; MIR64-DAG: $x14 = LD 112, $x1 :: (load 8 from %fixed-stack.6, align 16) +; MIR64-DAG: $f31 = LFD 392, $x1 :: (load (s64) from %fixed-stack.0) +; MIR64-DAG: $f21 = LFD 312, $x1 :: (load (s64) from %fixed-stack.1) +; MIR64-DAG: $f19 = LFD 296, $x1 :: (load (s64) from %fixed-stack.2) +; MIR64-DAG: $f14 = LFD 256, $x1 :: (load (s64) from %fixed-stack.3, align 16) +; MIR64-DAG: $x31 = LD 248, $x1 :: (load (s64) from %fixed-stack.4) +; MIR64-DAG: $x25 = LD 200, $x1 :: (load (s64) from %fixed-stack.5) +; MIR64-DAG: $x14 = LD 112, $x1 :: (load (s64) from %fixed-stack.6, align 16) ; MIR64: $x1 = ADDI8 $x1, 400 ; MIR64-NEXT: $x0 = LD 16, $x1 ; MIR64-NEXT: MTLR8 $x0, implicit-def $lr8 @@ -190,26 +190,26 @@ define dso_local double @fprs_and_gprs(i32 signext %i) { ; MIR32: $r0 = MFLR implicit $lr ; MIR32-NEXT: STW killed $r0, 8, $r1 ; MIR32-NEXT: $r1 = STWU $r1, -288, $r1 -; MIR32-DAG: STW killed $r13, 68, $r1 :: (store 4 into %fixed-stack.7) -; MIR32-DAG: STW killed $r14, 72, $r1 :: (store 4 into %fixed-stack.6, align 8) -; MIR32-DAG: STW killed $r25, 116, $r1 :: (store 4 into %fixed-stack.5) -; MIR32-DAG: STW killed $r31, 140, $r1 :: (store 4 into %fixed-stack.4) -; MIR32-DAG: STFD killed $f14, 144, $r1 :: (store 8 into %fixed-stack.3, align 16) -; MIR32-DAG: STFD killed $f19, 184, $r1 :: (store 8 into %fixed-stack.2) -; MIR32-DAG: STFD killed $f21, 200, $r1 :: (store 8 into %fixed-stack.1) -; MIR32-DAG: STFD killed $f31, 280, $r1 :: (store 8 into %fixed-stack.0) +; MIR32-DAG: STW killed $r13, 68, $r1 :: (store (s32) into %fixed-stack.7) +; MIR32-DAG: STW killed $r14, 72, $r1 :: (store (s32) into %fixed-stack.6, align 8) +; MIR32-DAG: STW killed $r25, 116, $r1 :: (store (s32) into %fixed-stack.5) +; MIR32-DAG: STW killed $r31, 140, $r1 :: (store (s32) into %fixed-stack.4) +; MIR32-DAG: STFD killed $f14, 144, $r1 :: (store (s64) into %fixed-stack.3, align 16) +; MIR32-DAG: STFD killed $f19, 184, $r1 :: (store (s64) into %fixed-stack.2) +; MIR32-DAG: STFD killed $f21, 200, $r1 :: (store (s64) into %fixed-stack.1) +; MIR32-DAG: STFD killed $f31, 280, $r1 :: (store (s64) into %fixed-stack.0) ; MIR32: INLINEASM ; MIR32: BL_NOP -; MIR32-DAG: $f31 = LFD 280, $r1 :: (load 8 from %fixed-stack.0) -; MIR32-DAG: $f21 = LFD 200, $r1 :: (load 8 from %fixed-stack.1) -; MIR32-DAG: $f19 = LFD 184, $r1 :: (load 8 from %fixed-stack.2) -; MIR32-DAG: $f14 = LFD 144, $r1 :: (load 8 from %fixed-stack.3, align 16) -; MIR32-DAG: $r31 = LWZ 140, $r1 :: (load 4 from %fixed-stack.4) -; MIR32-DAG: $r25 = LWZ 116, $r1 :: (load 4 from %fixed-stack.5) -; MIR32-DAG: $r14 = LWZ 72, $r1 :: (load 4 from %fixed-stack.6, align 8) -; MIR32-DAG: $r13 = LWZ 68, $r1 :: (load 4 from %fixed-stack.7) +; MIR32-DAG: $f31 = LFD 280, $r1 :: (load (s64) from %fixed-stack.0) +; MIR32-DAG: $f21 = LFD 200, $r1 :: (load (s64) from %fixed-stack.1) +; MIR32-DAG: $f19 = LFD 184, $r1 :: (load (s64) from %fixed-stack.2) +; MIR32-DAG: $f14 = LFD 144, $r1 :: (load (s64) from %fixed-stack.3, align 16) +; MIR32-DAG: $r31 = LWZ 140, $r1 :: (load (s32) from %fixed-stack.4) +; MIR32-DAG: $r25 = LWZ 116, $r1 :: (load (s32) from %fixed-stack.5) +; MIR32-DAG: $r14 = LWZ 72, $r1 :: (load (s32) from %fixed-stack.6, align 8) +; MIR32-DAG: $r13 = LWZ 68, $r1 :: (load (s32) from %fixed-stack.7) ; MIR32: $r1 = ADDI $r1, 288 ; MIR32-NEXT: $r0 = LWZ 8, $r1 ; MIR32-NEXT: MTLR $r0, implicit-def $lr diff --git a/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll b/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll index 5285d752f5e73..8a07feb5f6be5 100644 --- a/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll +++ b/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll @@ -85,15 +85,15 @@ entry: ; EXTABI: body: | ; EXTABI: bb.0.entry: ; EXTABI: liveins: $f1, $x4 -; EXTABI-DAG: renamable $f0 = XFLOADf64 $zero8, renamable $x4 :: (volatile load 8 from %ir.b, align 4) +; EXTABI-DAG: renamable $f0 = XFLOADf64 $zero8, renamable $x4 :: (volatile load (s64) from %ir.b, align 4) ; EXTABI-DAG: renamable $f0 = nofpexcept XSADDDP killed renamable $f0, renamable $f1, implicit $rm ; EXTABI-DAG: renamable $vf31 = nofpexcept XSMULDP killed renamable $f1, renamable $f1, implicit $rm -; EXTABI: XFSTOREf64 killed renamable $f0, $zero8, renamable $x4 :: (volatile store 8 into %ir.b, align 4) +; EXTABI: XFSTOREf64 killed renamable $f0, $zero8, renamable $x4 :: (volatile store (s64) into %ir.b, align 4) ; EXTABI-LABEL: INLINEASM -; EXTABI-DAG: renamable $f0 = XFLOADf64 $zero8, renamable $x4 :: (volatile load 8 from %ir.b, align 4) +; EXTABI-DAG: renamable $f0 = XFLOADf64 $zero8, renamable $x4 :: (volatile load (s64) from %ir.b, align 4) ; EXTABI-DAG: renamable $f0 = nofpexcept XSADDDP killed renamable $vf31, killed renamable $f0, implicit $rm -; EXTABI-DAG: XFSTOREf64 killed renamable $f0, $zero8, renamable $x4 :: (volatile store 8 into %ir.b, align 4) -; EXTABI: renamable $f1 = XFLOADf64 $zero8, killed renamable $x4 :: (volatile load 8 from %ir.b, align 4) +; EXTABI-DAG: XFSTOREf64 killed renamable $f0, $zero8, renamable $x4 :: (volatile store (s64) into %ir.b, align 4) +; EXTABI: renamable $f1 = XFLOADf64 $zero8, killed renamable $x4 :: (volatile load (s64) from %ir.b, align 4) ; DFLABI-LABEL: vec_test @@ -144,12 +144,12 @@ entry: ; EXTABI: body: | ; EXTABI-DAG: bb.0.entry: ; EXTABI-DAG: liveins: $v2, $x3 -; EXTABI-DAG: renamable $v3 = LXVW4X $zero8, renamable $x3 :: (volatile load 16 from %ir.b, align 4) +; EXTABI-DAG: renamable $v3 = LXVW4X $zero8, renamable $x3 :: (volatile load (s128) from %ir.b, align 4) ; EXTABI-DAG: renamable $v31 = COPY $v2 ; EXTABI-DAG: renamable $v2 = VADDUWM killed renamable $v3, $v2 ; EXTABI-LABEL: INLINEASM -; EXTABI-DAG: renamable $v2 = LXVW4X $zero8, renamable $x3 :: (volatile load 16 from %ir.b, align 4) +; EXTABI-DAG: renamable $v2 = LXVW4X $zero8, renamable $x3 :: (volatile load (s128) from %ir.b, align 4) ; EXTABI-DAG: renamable $v3 = VMULUWM killed renamable $v31, renamable $v31 ; EXTABI-DAG: renamable $v2 = VADDUWM killed renamable $v3, killed renamable $v2 -; EXTABI-DAG: STXVW4X killed renamable $v2, $zero8, renamable $x3 :: (volatile store 16 into %ir.b, align 4) -; EXTABI: renamable $v2 = LXVW4X $zero8, killed renamable $x3 :: (volatile load 16 from %ir.b, align 4) +; EXTABI-DAG: STXVW4X killed renamable $v2, $zero8, renamable $x3 :: (volatile store (s128) into %ir.b, align 4) +; EXTABI: renamable $v2 = LXVW4X $zero8, killed renamable $x3 :: (volatile load (s128) from %ir.b, align 4) diff --git a/llvm/test/CodeGen/PowerPC/aix-indirect-call.ll b/llvm/test/CodeGen/PowerPC/aix-indirect-call.ll index 2a0a6dfe21866..0e7844b6ba7b1 100644 --- a/llvm/test/CodeGen/PowerPC/aix-indirect-call.ll +++ b/llvm/test/CodeGen/PowerPC/aix-indirect-call.ll @@ -29,19 +29,19 @@ define signext i32 @callThroughPtr(i32 ()* nocapture) { ; MIR32: liveins: $r3 ; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 ; MIR32-DAG: STW $r2, 20, $r1 -; MIR32-DAG: renamable $r11 = LWZ 8, renamable $r3 :: (dereferenceable invariant load 4 from %ir.0 + 8) -; MIR32-DAG: renamable $[[REG:r[0-9]+]] = LWZ 0, renamable $r3 :: (dereferenceable invariant load 4 from %ir.0) -; MIR32-DAG: $r2 = LWZ 4, killed renamable $r3 :: (dereferenceable invariant load 4 from %ir.0 + 4) +; MIR32-DAG: renamable $r11 = LWZ 8, renamable $r3 :: (dereferenceable invariant load (s32) from %ir.0 + 8) +; MIR32-DAG: renamable $[[REG:r[0-9]+]] = LWZ 0, renamable $r3 :: (dereferenceable invariant load (s32) from %ir.0) +; MIR32-DAG: $r2 = LWZ 4, killed renamable $r3 :: (dereferenceable invariant load (s32) from %ir.0 + 4) ; MIR32-DAG: MTCTR killed renamable $[[REG]], implicit-def $ctr ; MIR32-NEXT: BCTRL_LWZinto_toc 20, $r1, csr_aix32, implicit-def dead $lr, implicit-def dead $r2, implicit $ctr, implicit $rm, implicit $r11, implicit $r2, implicit-def $r1, implicit-def $r3 ; MIR32-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 ; MIR64: liveins: $x3 ; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; MIR64-DAG: STD $x2, 40, $x1 :: (store 8 into stack + 40) -; MIR64-DAG: renamable $x11 = LD 16, renamable $x3 :: (dereferenceable invariant load 8 from %ir.0 + 16) -; MIR64-DAG: renamable $[[REG:x[0-9]+]] = LD 0, renamable $x3 :: (dereferenceable invariant load 8 from %ir.0) -; MIR64-DAG: $x2 = LD 8, killed renamable $x3 :: (dereferenceable invariant load 8 from %ir.0 + 8) +; MIR64-DAG: STD $x2, 40, $x1 :: (store (s64) into stack + 40) +; MIR64-DAG: renamable $x11 = LD 16, renamable $x3 :: (dereferenceable invariant load (s64) from %ir.0 + 16) +; MIR64-DAG: renamable $[[REG:x[0-9]+]] = LD 0, renamable $x3 :: (dereferenceable invariant load (s64) from %ir.0) +; MIR64-DAG: $x2 = LD 8, killed renamable $x3 :: (dereferenceable invariant load (s64) from %ir.0 + 8) ; MIR64-DAG: MTCTR8 killed renamable $[[REG]], implicit-def $ctr8 ; MIR64-NEXT: BCTRL8_LDinto_toc 40, $x1, csr_ppc64, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x11, implicit $x2, implicit-def $r1, implicit-def $x3 ; MIR64-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 @@ -88,11 +88,11 @@ define void @callThroughPtrWithArgs(void (i32, i16, i64)* nocapture) { ; MIR32: liveins: $r3 ; MIR32: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; MIR32-DAG: renamable $[[REG:r[0-9]+]] = LWZ 0, renamable $r3 :: (dereferenceable invariant load 4 from %ir.0) +; MIR32-DAG: renamable $[[REG:r[0-9]+]] = LWZ 0, renamable $r3 :: (dereferenceable invariant load (s32) from %ir.0) ; MIR32-DAG: MTCTR killed renamable $[[REG]], implicit-def $ctr -; MIR32-DAG: STW $r2, 20, $r1 :: (store 4 into stack + 20) -; MIR32-DAG: renamable $r11 = LWZ 8, renamable $r3 :: (dereferenceable invariant load 4 from %ir.0 + 8) -; MIR32-DAG: $r2 = LWZ 4, killed renamable $r3 :: (dereferenceable invariant load 4 from %ir.0 + 4) +; MIR32-DAG: STW $r2, 20, $r1 :: (store (s32) into stack + 20) +; MIR32-DAG: renamable $r11 = LWZ 8, renamable $r3 :: (dereferenceable invariant load (s32) from %ir.0 + 8) +; MIR32-DAG: $r2 = LWZ 4, killed renamable $r3 :: (dereferenceable invariant load (s32) from %ir.0 + 4) ; MIR32-DAG: $r3 = LI 1 ; MIR32-DAG: $r4 = LI 2 ; MIR32-DAG: $r5 = LI 0 @@ -102,11 +102,11 @@ define void @callThroughPtrWithArgs(void (i32, i16, i64)* nocapture) { ; MIR64: liveins: $x3 ; MIR64: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; MIR64-DAG: renamable $[[REG:x[0-9]+]] = LD 0, renamable $x3 :: (dereferenceable invariant load 8 from %ir.0) +; MIR64-DAG: renamable $[[REG:x[0-9]+]] = LD 0, renamable $x3 :: (dereferenceable invariant load (s64) from %ir.0) ; MIR64-DAG: MTCTR8 killed renamable $[[REG]], implicit-def $ctr8 -; MIR64-DAG: STD $x2, 40, $x1 :: (store 8 into stack + 40) -; MIR64-DAG: renamable $x11 = LD 16, renamable $x3 :: (dereferenceable invariant load 8 from %ir.0 + 16) -; MIR64-DAG: $x2 = LD 8, killed renamable $x3 :: (dereferenceable invariant load 8 from %ir.0 + 8) +; MIR64-DAG: STD $x2, 40, $x1 :: (store (s64) into stack + 40) +; MIR64-DAG: renamable $x11 = LD 16, renamable $x3 :: (dereferenceable invariant load (s64) from %ir.0 + 16) +; MIR64-DAG: $x2 = LD 8, killed renamable $x3 :: (dereferenceable invariant load (s64) from %ir.0 + 8) ; MIR64-DAG: $x3 = LI8 1 ; MIR64-DAG: $x4 = LI8 2 ; MIR64-DAG: $x5 = LI8 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll b/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll index 73452d51d4ad9..a5af9ee18a107 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll @@ -36,15 +36,15 @@ __here: ret void } -; 32SMALL-MIR: renamable $r[[REG1:[0-9]+]] = LWZtoc blockaddress(@foo, %ir-block.__here), $r2 :: (load 4 from got) +; 32SMALL-MIR: renamable $r[[REG1:[0-9]+]] = LWZtoc blockaddress(@foo, %ir-block.__here), $r2 :: (load (s32) from got) ; 32LARGE-MIR: renamable $r[[REG1:[0-9]+]] = ADDIStocHA $r2, blockaddress(@foo, %ir-block.__here) -; 32LARGE-MIR: renamable $r[[REG2:[0-9]+]] = LWZtocL blockaddress(@foo, %ir-block.__here), killed renamable $r[[REG1]], implicit $r2 :: (load 4 from got) +; 32LARGE-MIR: renamable $r[[REG2:[0-9]+]] = LWZtocL blockaddress(@foo, %ir-block.__here), killed renamable $r[[REG1]], implicit $r2 :: (load (s32) from got) -; 64SMALL-MIR: renamable $x[[REG1:[0-9]+]] = LDtocBA blockaddress(@foo, %ir-block.__here), $x2 :: (load 8 from got) +; 64SMALL-MIR: renamable $x[[REG1:[0-9]+]] = LDtocBA blockaddress(@foo, %ir-block.__here), $x2 :: (load (s64) from got) ; 64LARGE-MIR: renamable $x[[REG1:[0-9]+]] = ADDIStocHA8 $x2, blockaddress(@foo, %ir-block.__here) -; 64LARGE-MIR: renamable $x[[REG2:[0-9]+]] = LDtocL blockaddress(@foo, %ir-block.__here), killed renamable $x[[REG1]], implicit $x2 :: (load 8 from got) +; 64LARGE-MIR: renamable $x[[REG2:[0-9]+]] = LDtocL blockaddress(@foo, %ir-block.__here), killed renamable $x[[REG1]], implicit $x2 :: (load (s64) from got) ; 32SMALL-ASM-LABEL: foo ; 32SMALL-ASM: .foo: diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll b/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll index c774412e6b55f..9580d4d8392fd 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll @@ -31,19 +31,19 @@ entry: ret float 5.500000e+00 } -; 32SMALL-MIR: renamable $r[[REG1:[0-9]+]] = LWZtoc %const.0, $r2 :: (load 4 from got) -; 32SMALL-MIR: renamable $f[[REG2:[0-9]+]] = LFS 0, killed renamable $r[[REG1]] :: (load 4 from constant-pool) +; 32SMALL-MIR: renamable $r[[REG1:[0-9]+]] = LWZtoc %const.0, $r2 :: (load (s32) from got) +; 32SMALL-MIR: renamable $f[[REG2:[0-9]+]] = LFS 0, killed renamable $r[[REG1]] :: (load (s32) from constant-pool) ; 32LARGE-MIR: renamable $r[[REG1:[0-9]+]] = ADDIStocHA $r2, %const.0 -; 32LARGE-MIR: renamable $r[[REG2:[0-9]+]] = LWZtocL %const.0, killed renamable $r[[REG1]], implicit $r2 :: (load 4 from got) -; 32LARGE-MIR: renamable $f[[REG3:[0-9]+]] = LFS 0, killed renamable $r[[REG2]] :: (load 4 from constant-pool) +; 32LARGE-MIR: renamable $r[[REG2:[0-9]+]] = LWZtocL %const.0, killed renamable $r[[REG1]], implicit $r2 :: (load (s32) from got) +; 32LARGE-MIR: renamable $f[[REG3:[0-9]+]] = LFS 0, killed renamable $r[[REG2]] :: (load (s32) from constant-pool) -; 64SMALL-MIR: renamable $x[[REG1:[0-9]+]] = LDtocCPT %const.0, $x2 :: (load 8 from got) -; 64SMALL-MIR: renamable $f[[REG2:[0-9]+]] = LFS 0, killed renamable $x[[REG1]] :: (load 4 from constant-pool) +; 64SMALL-MIR: renamable $x[[REG1:[0-9]+]] = LDtocCPT %const.0, $x2 :: (load (s64) from got) +; 64SMALL-MIR: renamable $f[[REG2:[0-9]+]] = LFS 0, killed renamable $x[[REG1]] :: (load (s32) from constant-pool) ; 64LARGE-MIR: renamable $x[[REG1:[0-9]+]] = ADDIStocHA8 $x2, %const.0 -; 64LARGE-MIR: renamable $x[[REG2:[0-9]+]] = LDtocL %const.0, killed renamable $x[[REG1]], implicit $x2 :: (load 8 from got) -; 64LARGE-MIR: renamable $f[[REG3:[0-9]+]] = LFS 0, killed renamable $x[[REG2]] :: (load 4 from constant-pool) +; 64LARGE-MIR: renamable $x[[REG2:[0-9]+]] = LDtocL %const.0, killed renamable $x[[REG1]], implicit $x2 :: (load (s64) from got) +; 64LARGE-MIR: renamable $f[[REG3:[0-9]+]] = LFS 0, killed renamable $x[[REG2]] :: (load (s32) from constant-pool) ; 32SMALL-ASM: .csect .rodata[RO],2 ; 32SMALL-ASM: .align 2 diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll index 4201b7450a6a0..24803e91a6d17 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll @@ -62,26 +62,26 @@ } -; 32SMALL-MIR: renamable $r[[REG1:[0-9]+]] = LWZtoc %jump-table.0, $r2 :: (load 4 from got) +; 32SMALL-MIR: renamable $r[[REG1:[0-9]+]] = LWZtoc %jump-table.0, $r2 :: (load (s32) from got) ; 32SMALL-MIR: renamable $r[[REG3:[0-9]+]] = RLWINM killed renamable $r[[REG2:[0-9]+]], 2, 0, 29 -; 32SMALL-MIR: renamable $r[[REG4:[0-9]+]] = LWZX killed renamable $r[[REG3]], renamable $r[[REG1]] :: (load 4 from jump-table) +; 32SMALL-MIR: renamable $r[[REG4:[0-9]+]] = LWZX killed renamable $r[[REG3]], renamable $r[[REG1]] :: (load (s32) from jump-table) ; 32SMALL-MIR: renamable $r[[REG5:[0-9]+]] = ADD4 killed renamable $r[[REG4]], killed renamable $r[[REG1]] ; 32LARGE-MIR: renamable $r[[REG1:[0-9]+]] = ADDIStocHA $r2, %jump-table.0 -; 32LARGE-MIR: renamable $r[[REG2:[0-9]+]] = LWZtocL %jump-table.0, killed renamable $r[[REG1]], implicit $r2 :: (load 4 from got) +; 32LARGE-MIR: renamable $r[[REG2:[0-9]+]] = LWZtocL %jump-table.0, killed renamable $r[[REG1]], implicit $r2 :: (load (s32) from got) ; 32LARGE-MIR: renamable $r[[REG4:[0-9]+]] = RLWINM killed renamable $r[[REG3:[0-9]+]], 2, 0, 29 -; 32LARGE-MIR: renamable $r[[REG5:[0-9]+]] = LWZX killed renamable $r[[REG4]], renamable $r[[REG2]] :: (load 4 from jump-table) +; 32LARGE-MIR: renamable $r[[REG5:[0-9]+]] = LWZX killed renamable $r[[REG4]], renamable $r[[REG2]] :: (load (s32) from jump-table) ; 32LARGE-MIR: renamable $r[[REG6:[0-9]+]] = ADD4 killed renamable $r[[REG5]], killed renamable $r[[REG2]] -; 64SMALL-MIR: renamable $x[[REG1:[0-9]+]] = LDtocJTI %jump-table.0, $x2 :: (load 8 from got) +; 64SMALL-MIR: renamable $x[[REG1:[0-9]+]] = LDtocJTI %jump-table.0, $x2 :: (load (s64) from got) ; 64SMALL-MIR: renamable $x[[REG3:[0-9]+]] = RLDIC killed renamable $x[[REG2:[0-9]+]], 2, 30 -; 64SMALL-MIR: renamable $x[[REG4:[0-9]+]] = LWAX killed renamable $x[[REG3]], renamable $x[[REG1]] :: (load 4 from jump-table) +; 64SMALL-MIR: renamable $x[[REG4:[0-9]+]] = LWAX killed renamable $x[[REG3]], renamable $x[[REG1]] :: (load (s32) from jump-table) ; 64SMALL-MIR: renamable $x[[REG6:[0-9]+]] = ADD8 killed renamable $x[[REG4]], killed renamable $x[[REG1]] ; 64LARGE-MIR: renamable $x[[REG1:[0-9]+]] = ADDIStocHA8 $x2, %jump-table.0 -; 64LARGE-MIR: renamable $x[[REG2:[0-9]+]] = LDtocL %jump-table.0, killed renamable $x[[REG1]], implicit $x2 :: (load 8 from got) +; 64LARGE-MIR: renamable $x[[REG2:[0-9]+]] = LDtocL %jump-table.0, killed renamable $x[[REG1]], implicit $x2 :: (load (s64) from got) ; 64LARGE-MIR: renamable $x[[REG4:[0-9]+]] = RLDIC killed renamable $x[[REG3:[0-9]+]], 2, 30 -; 64LARGE-MIR: renamable $x[[REG5:[0-9]+]] = LWAX killed renamable $x[[REG4]], renamable $x[[REG2]] :: (load 4 from jump-table) +; 64LARGE-MIR: renamable $x[[REG5:[0-9]+]] = LWAX killed renamable $x[[REG4]], renamable $x[[REG2]] :: (load (s32) from jump-table) ; 64LARGE-MIR: renamable $x[[REG6:[0-9]+]] = ADD8 killed renamable $x[[REG5]], killed renamable $x[[REG2]] ; 32SMALL-ASM-LABEL: jump_table diff --git a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll index f3e95b54be7c2..12462d65eb3cc 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll @@ -14,7 +14,7 @@ define double @caller() { ; MIR32-LABEL: name: caller ; MIR32: bb.0.entry: -; MIR32: renamable $r3 = LWZtoc @__const.caller.t, $r2 :: (load 4 from got) +; MIR32: renamable $r3 = LWZtoc @__const.caller.t, $r2 :: (load (s32) from got) ; MIR32: renamable $r4 = LI 31 ; MIR32: renamable $v2 = LVX renamable $r3, killed renamable $r4 ; MIR32: renamable $r4 = LI 16 @@ -22,11 +22,11 @@ define double @caller() { ; MIR32: renamable $v4 = LVSL $zero, renamable $r3 ; MIR32: renamable $v2 = VPERM renamable $v3, killed renamable $v2, renamable $v4 ; MIR32: renamable $r4 = LI 172 -; MIR32: STXVW4X killed renamable $v2, $r1, killed renamable $r4 :: (store 16 into unknown-address + 16, align 4) +; MIR32: STXVW4X killed renamable $v2, $r1, killed renamable $r4 :: (store (s128) into unknown-address + 16, align 4) ; MIR32: renamable $v2 = LVX $zero, killed renamable $r3 ; MIR32: renamable $v2 = VPERM killed renamable $v2, killed renamable $v3, killed renamable $v4 ; MIR32: renamable $r3 = LI 156 -; MIR32: STXVW4X killed renamable $v2, $r1, killed renamable $r3 :: (store 16, align 4) +; MIR32: STXVW4X killed renamable $v2, $r1, killed renamable $r3 :: (store (s128), align 4) ; MIR32: ADJCALLSTACKDOWN 188, 0, implicit-def dead $r1, implicit $r1 ; MIR32: renamable $vsl0 = XXLXORz ; MIR32: $f1 = XXLXORdpz @@ -51,41 +51,41 @@ define double @caller() { ; MIR32: renamable $r3 = LI 136 ; MIR32: $f8 = XXLXORdpz ; MIR32: renamable $r4 = LI 120 -; MIR32: renamable $r5 = LWZtoc %const.0, $r2 :: (load 4 from got) -; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8) +; MIR32: renamable $r5 = LWZtoc %const.0, $r2 :: (load (s32) from got) +; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store (s128), align 8) ; MIR32: $f9 = XXLXORdpz ; MIR32: renamable $r3 = LI 104 -; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r4 :: (store 16, align 8) +; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r4 :: (store (s128), align 8) ; MIR32: $f10 = XXLXORdpz -; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8) +; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store (s128), align 8) ; MIR32: renamable $r3 = LI 88 ; MIR32: $f11 = XXLXORdpz -; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8) +; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store (s128), align 8) ; MIR32: renamable $r3 = LI 72 -; MIR32: renamable $v0 = LXVD2X $zero, killed renamable $r5 :: (load 16 from constant-pool) +; MIR32: renamable $v0 = LXVD2X $zero, killed renamable $r5 :: (load (s128) from constant-pool) ; MIR32: $f12 = XXLXORdpz -; MIR32: STXVW4X killed renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8) +; MIR32: STXVW4X killed renamable $vsl0, $r1, killed renamable $r3 :: (store (s128), align 8) ; MIR32: $f13 = XXLXORdpz ; MIR32: renamable $r5 = LI 48 ; MIR32: renamable $r6 = LI 512 ; MIR32: $r3 = LI 128 ; MIR32: $r4 = LI 256 -; MIR32: STXVD2X killed renamable $v0, $r1, killed renamable $r5 :: (store 16) -; MIR32: STW killed renamable $r6, 152, $r1 :: (store 4) +; MIR32: STXVD2X killed renamable $v0, $r1, killed renamable $r5 :: (store (s128)) +; MIR32: STW killed renamable $r6, 152, $r1 :: (store (s32)) ; MIR32: BL_NOP , csr_aix32_altivec, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $f1, implicit $f2, implicit $v2, implicit $v3, implicit $v4, implicit $v5, implicit killed $v6, implicit killed $v7, implicit killed $v8, implicit killed $v9, implicit killed $v10, implicit killed $v11, implicit killed $v12, implicit killed $v13, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def $f1 ; MIR32: ADJCALLSTACKUP 188, 0, implicit-def dead $r1, implicit $r1 ; MIR32: BLR implicit $lr, implicit $rm, implicit $f1 ; MIR64-LABEL: name: caller ; MIR64: bb.0.entry: -; MIR64: renamable $x3 = LDtoc @__const.caller.t, $x2 :: (load 8 from got) +; MIR64: renamable $x3 = LDtoc @__const.caller.t, $x2 :: (load (s64) from got) ; MIR64: renamable $x4 = LI8 16 -; MIR64: renamable $vsl0 = LXVD2X renamable $x3, killed renamable $x4 :: (load 16 from unknown-address + 16, align 8) +; MIR64: renamable $vsl0 = LXVD2X renamable $x3, killed renamable $x4 :: (load (s128) from unknown-address + 16, align 8) ; MIR64: renamable $x4 = LI8 208 -; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x4 :: (store 16 into unknown-address + 16, align 4) -; MIR64: renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load 16, align 8) +; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x4 :: (store (s128) into unknown-address + 16, align 4) +; MIR64: renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load (s128), align 8) ; MIR64: renamable $x3 = LI8 192 -; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 4) +; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store (s128), align 4) ; MIR64: ADJCALLSTACKDOWN 224, 0, implicit-def dead $r1, implicit $r1 ; MIR64: $f1 = XXLXORdpz ; MIR64: $f2 = XXLXORdpz @@ -102,34 +102,34 @@ define double @caller() { ; MIR64: $v12 = XXLXORz ; MIR64: $v13 = XXLXORz ; MIR64: $f3 = XXLXORdpz -; MIR64: renamable $x3 = LDtocCPT %const.0, $x2 :: (load 8 from got) +; MIR64: renamable $x3 = LDtocCPT %const.0, $x2 :: (load (s64) from got) ; MIR64: $f4 = XXLXORdpz ; MIR64: $f5 = XXLXORdpz ; MIR64: $f6 = XXLXORdpz -; MIR64: renamable $x4 = LDtocCPT %const.1, $x2 :: (load 8 from got) -; MIR64: renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load 16 from constant-pool) +; MIR64: renamable $x4 = LDtocCPT %const.1, $x2 :: (load (s64) from got) +; MIR64: renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load (s128) from constant-pool) ; MIR64: $f7 = XXLXORdpz ; MIR64: $f8 = XXLXORdpz ; MIR64: renamable $x3 = LI8 160 ; MIR64: $f9 = XXLXORdpz ; MIR64: renamable $x5 = LI8 144 -; MIR64: renamable $vsl13 = LXVD2X $zero8, killed renamable $x4 :: (load 16 from constant-pool) -; MIR64: STXVD2X renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 8) +; MIR64: renamable $vsl13 = LXVD2X $zero8, killed renamable $x4 :: (load (s128) from constant-pool) +; MIR64: STXVD2X renamable $vsl0, $x1, killed renamable $x3 :: (store (s128), align 8) ; MIR64: $f10 = XXLXORdpz ; MIR64: renamable $x3 = LI8 128 -; MIR64: STXVD2X renamable $vsl0, $x1, killed renamable $x5 :: (store 16, align 8) +; MIR64: STXVD2X renamable $vsl0, $x1, killed renamable $x5 :: (store (s128), align 8) ; MIR64: $f11 = XXLXORdpz ; MIR64: renamable $x4 = LI8 80 -; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 8) +; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store (s128), align 8) ; MIR64: $f12 = XXLXORdpz -; MIR64: STXVD2X killed renamable $vsl13, $x1, killed renamable $x4 :: (store 16) +; MIR64: STXVD2X killed renamable $vsl13, $x1, killed renamable $x4 :: (store (s128)) ; MIR64: $f13 = XXLXORdpz ; MIR64: renamable $x5 = LI8 512 ; MIR64: renamable $x6 = LI8 0 ; MIR64: $x3 = LI8 128 ; MIR64: $x4 = LI8 256 -; MIR64: STD killed renamable $x5, 184, $x1 :: (store 8) -; MIR64: STD killed renamable $x6, 176, $x1 :: (store 8) +; MIR64: STD killed renamable $x5, 184, $x1 :: (store (s64)) +; MIR64: STD killed renamable $x6, 176, $x1 :: (store (s64)) ; MIR64: BL8_NOP , csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $f1, implicit $f2, implicit killed $v2, implicit killed $v3, implicit killed $v4, implicit killed $v5, implicit killed $v6, implicit killed $v7, implicit killed $v8, implicit killed $v9, implicit killed $v10, implicit killed $v11, implicit killed $v12, implicit killed $v13, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def $f1 ; MIR64: ADJCALLSTACKUP 224, 0, implicit-def dead $r1, implicit $r1 ; MIR64: BLR8 implicit $lr8, implicit $rm, implicit $f1 diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-vararg-caller.ll b/llvm/test/CodeGen/PowerPC/aix-vector-vararg-caller.ll index 9f8ab9332987f..472be4fa63643 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-vararg-caller.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-vararg-caller.ll @@ -11,48 +11,48 @@ define <4 x i32> @caller() { ; 32BIT-LABEL: name: caller ; 32BIT: bb.0.entry: ; 32BIT: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT: [[LWZtoc:%[0-9]+]]:gprc = LWZtoc %const.0, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc]] :: (load 16 from constant-pool) + ; 32BIT: [[LWZtoc:%[0-9]+]]:gprc = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc]] :: (load (s128) from constant-pool) ; 32BIT: [[LI:%[0-9]+]]:gprc = LI 48 - ; 32BIT: STXVW4X killed [[LXVW4X]], $r1, killed [[LI]] :: (store 16) - ; 32BIT: [[LWZtoc1:%[0-9]+]]:gprc = LWZtoc %const.1, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc1]] :: (load 16 from constant-pool) + ; 32BIT: STXVW4X killed [[LXVW4X]], $r1, killed [[LI]] :: (store (s128)) + ; 32BIT: [[LWZtoc1:%[0-9]+]]:gprc = LWZtoc %const.1, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc1]] :: (load (s128) from constant-pool) ; 32BIT: [[LI1:%[0-9]+]]:gprc = LI 32 - ; 32BIT: STXVW4X killed [[LXVW4X1]], $r1, killed [[LI1]] :: (store 16) - ; 32BIT: [[LWZtoc2:%[0-9]+]]:gprc = LWZtoc %const.2, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X2:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc2]] :: (load 16 from constant-pool) + ; 32BIT: STXVW4X killed [[LXVW4X1]], $r1, killed [[LI1]] :: (store (s128)) + ; 32BIT: [[LWZtoc2:%[0-9]+]]:gprc = LWZtoc %const.2, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X2:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc2]] :: (load (s128) from constant-pool) ; 32BIT: [[LI2:%[0-9]+]]:gprc = LI 160 - ; 32BIT: STXVW4X killed [[LXVW4X2]], $r1, killed [[LI2]] :: (store 16) - ; 32BIT: [[LWZtoc3:%[0-9]+]]:gprc = LWZtoc %const.3, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X3:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc3]] :: (load 16 from constant-pool) + ; 32BIT: STXVW4X killed [[LXVW4X2]], $r1, killed [[LI2]] :: (store (s128)) + ; 32BIT: [[LWZtoc3:%[0-9]+]]:gprc = LWZtoc %const.3, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X3:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc3]] :: (load (s128) from constant-pool) ; 32BIT: [[LI3:%[0-9]+]]:gprc = LI 144 - ; 32BIT: STXVW4X killed [[LXVW4X3]], $r1, killed [[LI3]] :: (store 16) - ; 32BIT: [[LWZtoc4:%[0-9]+]]:gprc = LWZtoc %const.4, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X4:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc4]] :: (load 16 from constant-pool) + ; 32BIT: STXVW4X killed [[LXVW4X3]], $r1, killed [[LI3]] :: (store (s128)) + ; 32BIT: [[LWZtoc4:%[0-9]+]]:gprc = LWZtoc %const.4, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X4:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc4]] :: (load (s128) from constant-pool) ; 32BIT: [[LI4:%[0-9]+]]:gprc = LI 128 - ; 32BIT: STXVW4X killed [[LXVW4X4]], $r1, killed [[LI4]] :: (store 16) - ; 32BIT: [[LWZtoc5:%[0-9]+]]:gprc = LWZtoc %const.5, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X5:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc5]] :: (load 16 from constant-pool) + ; 32BIT: STXVW4X killed [[LXVW4X4]], $r1, killed [[LI4]] :: (store (s128)) + ; 32BIT: [[LWZtoc5:%[0-9]+]]:gprc = LWZtoc %const.5, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X5:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc5]] :: (load (s128) from constant-pool) ; 32BIT: [[LI5:%[0-9]+]]:gprc = LI 112 - ; 32BIT: STXVW4X killed [[LXVW4X5]], $r1, killed [[LI5]] :: (store 16) - ; 32BIT: [[LWZtoc6:%[0-9]+]]:gprc = LWZtoc %const.6, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X6:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc6]] :: (load 16 from constant-pool) + ; 32BIT: STXVW4X killed [[LXVW4X5]], $r1, killed [[LI5]] :: (store (s128)) + ; 32BIT: [[LWZtoc6:%[0-9]+]]:gprc = LWZtoc %const.6, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X6:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc6]] :: (load (s128) from constant-pool) ; 32BIT: [[LI6:%[0-9]+]]:gprc = LI 96 - ; 32BIT: STXVW4X killed [[LXVW4X6]], $r1, killed [[LI6]] :: (store 16) - ; 32BIT: [[LWZtoc7:%[0-9]+]]:gprc = LWZtoc %const.7, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X7:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc7]] :: (load 16 from constant-pool) + ; 32BIT: STXVW4X killed [[LXVW4X6]], $r1, killed [[LI6]] :: (store (s128)) + ; 32BIT: [[LWZtoc7:%[0-9]+]]:gprc = LWZtoc %const.7, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X7:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc7]] :: (load (s128) from constant-pool) ; 32BIT: [[LI7:%[0-9]+]]:gprc = LI 80 - ; 32BIT: STXVW4X killed [[LXVW4X7]], $r1, killed [[LI7]] :: (store 16) - ; 32BIT: [[LWZtoc8:%[0-9]+]]:gprc = LWZtoc %const.8, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X8:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc8]] :: (load 16 from constant-pool) + ; 32BIT: STXVW4X killed [[LXVW4X7]], $r1, killed [[LI7]] :: (store (s128)) + ; 32BIT: [[LWZtoc8:%[0-9]+]]:gprc = LWZtoc %const.8, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X8:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc8]] :: (load (s128) from constant-pool) ; 32BIT: [[LI8:%[0-9]+]]:gprc = LI 64 - ; 32BIT: STXVW4X killed [[LXVW4X8]], $r1, killed [[LI8]] :: (store 16) - ; 32BIT: [[LWZ:%[0-9]+]]:gprc = LWZ 52, $r1 :: (load 4) - ; 32BIT: [[LWZ1:%[0-9]+]]:gprc = LWZ 48, $r1 :: (load 4) - ; 32BIT: [[LWZ2:%[0-9]+]]:gprc = LWZ 44, $r1 :: (load 4) - ; 32BIT: [[LWZ3:%[0-9]+]]:gprc = LWZ 40, $r1 :: (load 4) - ; 32BIT: [[LWZ4:%[0-9]+]]:gprc = LWZ 36, $r1 :: (load 4) - ; 32BIT: [[LWZ5:%[0-9]+]]:gprc = LWZ 32, $r1 :: (load 4) + ; 32BIT: STXVW4X killed [[LXVW4X8]], $r1, killed [[LI8]] :: (store (s128)) + ; 32BIT: [[LWZ:%[0-9]+]]:gprc = LWZ 52, $r1 :: (load (s32)) + ; 32BIT: [[LWZ1:%[0-9]+]]:gprc = LWZ 48, $r1 :: (load (s32)) + ; 32BIT: [[LWZ2:%[0-9]+]]:gprc = LWZ 44, $r1 :: (load (s32)) + ; 32BIT: [[LWZ3:%[0-9]+]]:gprc = LWZ 40, $r1 :: (load (s32)) + ; 32BIT: [[LWZ4:%[0-9]+]]:gprc = LWZ 36, $r1 :: (load (s32)) + ; 32BIT: [[LWZ5:%[0-9]+]]:gprc = LWZ 32, $r1 :: (load (s32)) ; 32BIT: [[LI9:%[0-9]+]]:gprc = LI 9 ; 32BIT: $r3 = COPY [[LI9]] ; 32BIT: $r5 = COPY [[LWZ5]] @@ -70,48 +70,48 @@ define <4 x i32> @caller() { ; 64BIT-LABEL: name: caller ; 64BIT: bb.0.entry: ; 64BIT: ADJCALLSTACKDOWN 208, 0, implicit-def dead $r1, implicit $r1 - ; 64BIT: [[LDtocCPT:%[0-9]+]]:g8rc = LDtocCPT %const.0, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT]] :: (load 16 from constant-pool) + ; 64BIT: [[LDtocCPT:%[0-9]+]]:g8rc = LDtocCPT %const.0, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_:%[0-9]+]]:g8rc = LI8 96 - ; 64BIT: STXVW4X killed [[LXVW4X]], $x1, killed [[LI8_]] :: (store 16) - ; 64BIT: [[LDtocCPT1:%[0-9]+]]:g8rc = LDtocCPT %const.1, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT1]] :: (load 16 from constant-pool) + ; 64BIT: STXVW4X killed [[LXVW4X]], $x1, killed [[LI8_]] :: (store (s128)) + ; 64BIT: [[LDtocCPT1:%[0-9]+]]:g8rc = LDtocCPT %const.1, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT1]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_1:%[0-9]+]]:g8rc = LI8 80 - ; 64BIT: STXVW4X killed [[LXVW4X1]], $x1, killed [[LI8_1]] :: (store 16) - ; 64BIT: [[LDtocCPT2:%[0-9]+]]:g8rc = LDtocCPT %const.2, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X2:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT2]] :: (load 16 from constant-pool) + ; 64BIT: STXVW4X killed [[LXVW4X1]], $x1, killed [[LI8_1]] :: (store (s128)) + ; 64BIT: [[LDtocCPT2:%[0-9]+]]:g8rc = LDtocCPT %const.2, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X2:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT2]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_2:%[0-9]+]]:g8rc = LI8 64 - ; 64BIT: STXVW4X killed [[LXVW4X2]], $x1, killed [[LI8_2]] :: (store 16) - ; 64BIT: [[LDtocCPT3:%[0-9]+]]:g8rc = LDtocCPT %const.3, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X3:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT3]] :: (load 16 from constant-pool) + ; 64BIT: STXVW4X killed [[LXVW4X2]], $x1, killed [[LI8_2]] :: (store (s128)) + ; 64BIT: [[LDtocCPT3:%[0-9]+]]:g8rc = LDtocCPT %const.3, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X3:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT3]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_3:%[0-9]+]]:g8rc = LI8 192 - ; 64BIT: STXVW4X killed [[LXVW4X3]], $x1, killed [[LI8_3]] :: (store 16) - ; 64BIT: [[LDtocCPT4:%[0-9]+]]:g8rc = LDtocCPT %const.4, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X4:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT4]] :: (load 16 from constant-pool) + ; 64BIT: STXVW4X killed [[LXVW4X3]], $x1, killed [[LI8_3]] :: (store (s128)) + ; 64BIT: [[LDtocCPT4:%[0-9]+]]:g8rc = LDtocCPT %const.4, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X4:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT4]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_4:%[0-9]+]]:g8rc = LI8 176 - ; 64BIT: STXVW4X killed [[LXVW4X4]], $x1, killed [[LI8_4]] :: (store 16) - ; 64BIT: [[LDtocCPT5:%[0-9]+]]:g8rc = LDtocCPT %const.5, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X5:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT5]] :: (load 16 from constant-pool) + ; 64BIT: STXVW4X killed [[LXVW4X4]], $x1, killed [[LI8_4]] :: (store (s128)) + ; 64BIT: [[LDtocCPT5:%[0-9]+]]:g8rc = LDtocCPT %const.5, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X5:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT5]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_5:%[0-9]+]]:g8rc = LI8 160 - ; 64BIT: STXVW4X killed [[LXVW4X5]], $x1, killed [[LI8_5]] :: (store 16) - ; 64BIT: [[LDtocCPT6:%[0-9]+]]:g8rc = LDtocCPT %const.6, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X6:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT6]] :: (load 16 from constant-pool) + ; 64BIT: STXVW4X killed [[LXVW4X5]], $x1, killed [[LI8_5]] :: (store (s128)) + ; 64BIT: [[LDtocCPT6:%[0-9]+]]:g8rc = LDtocCPT %const.6, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X6:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT6]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_6:%[0-9]+]]:g8rc = LI8 144 - ; 64BIT: STXVW4X killed [[LXVW4X6]], $x1, killed [[LI8_6]] :: (store 16) - ; 64BIT: [[LDtocCPT7:%[0-9]+]]:g8rc = LDtocCPT %const.7, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X7:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT7]] :: (load 16 from constant-pool) + ; 64BIT: STXVW4X killed [[LXVW4X6]], $x1, killed [[LI8_6]] :: (store (s128)) + ; 64BIT: [[LDtocCPT7:%[0-9]+]]:g8rc = LDtocCPT %const.7, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X7:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT7]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_7:%[0-9]+]]:g8rc = LI8 128 - ; 64BIT: STXVW4X killed [[LXVW4X7]], $x1, killed [[LI8_7]] :: (store 16) - ; 64BIT: [[LDtocCPT8:%[0-9]+]]:g8rc = LDtocCPT %const.8, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X8:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT8]] :: (load 16 from constant-pool) + ; 64BIT: STXVW4X killed [[LXVW4X7]], $x1, killed [[LI8_7]] :: (store (s128)) + ; 64BIT: [[LDtocCPT8:%[0-9]+]]:g8rc = LDtocCPT %const.8, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X8:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT8]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_8:%[0-9]+]]:g8rc = LI8 112 - ; 64BIT: STXVW4X killed [[LXVW4X8]], $x1, killed [[LI8_8]] :: (store 16) - ; 64BIT: [[LD:%[0-9]+]]:g8rc = LD 104, $x1 :: (load 8) - ; 64BIT: [[LD1:%[0-9]+]]:g8rc = LD 96, $x1 :: (load 8) - ; 64BIT: [[LD2:%[0-9]+]]:g8rc = LD 88, $x1 :: (load 8) - ; 64BIT: [[LD3:%[0-9]+]]:g8rc = LD 80, $x1 :: (load 8) - ; 64BIT: [[LD4:%[0-9]+]]:g8rc = LD 72, $x1 :: (load 8) - ; 64BIT: [[LD5:%[0-9]+]]:g8rc = LD 64, $x1 :: (load 8) + ; 64BIT: STXVW4X killed [[LXVW4X8]], $x1, killed [[LI8_8]] :: (store (s128)) + ; 64BIT: [[LD:%[0-9]+]]:g8rc = LD 104, $x1 :: (load (s64)) + ; 64BIT: [[LD1:%[0-9]+]]:g8rc = LD 96, $x1 :: (load (s64)) + ; 64BIT: [[LD2:%[0-9]+]]:g8rc = LD 88, $x1 :: (load (s64)) + ; 64BIT: [[LD3:%[0-9]+]]:g8rc = LD 80, $x1 :: (load (s64)) + ; 64BIT: [[LD4:%[0-9]+]]:g8rc = LD 72, $x1 :: (load (s64)) + ; 64BIT: [[LD5:%[0-9]+]]:g8rc = LD 64, $x1 :: (load (s64)) ; 64BIT: [[LI8_9:%[0-9]+]]:g8rc = LI8 9 ; 64BIT: $x3 = COPY [[LI8_9]] ; 64BIT: $x5 = COPY [[LD5]] diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-vararg-fixed-caller.ll b/llvm/test/CodeGen/PowerPC/aix-vector-vararg-fixed-caller.ll index b3f79cba52487..d958c64533151 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-vararg-fixed-caller.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-vararg-fixed-caller.ll @@ -11,26 +11,26 @@ define void @caller() { ; 32BIT-LABEL: name: caller ; 32BIT: bb.0.entry: ; 32BIT: ADJCALLSTACKDOWN 88, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT: [[LWZtoc:%[0-9]+]]:gprc = LWZtoc %const.0, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc]] :: (load 16 from constant-pool) + ; 32BIT: [[LWZtoc:%[0-9]+]]:gprc = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc]] :: (load (s128) from constant-pool) ; 32BIT: [[LI:%[0-9]+]]:gprc = LI 64 - ; 32BIT: STXVW4X killed [[LXVW4X]], $r1, killed [[LI]] :: (store 16) + ; 32BIT: STXVW4X killed [[LXVW4X]], $r1, killed [[LI]] :: (store (s128)) ; 32BIT: [[LIS:%[0-9]+]]:gprc = LIS 38314 ; 32BIT: [[ORI:%[0-9]+]]:gprc = ORI killed [[LIS]], 63376 - ; 32BIT: STW killed [[ORI]], 84, $r1 :: (store 4 into unknown-address + 4, basealign 8) + ; 32BIT: STW killed [[ORI]], 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) ; 32BIT: [[LIS1:%[0-9]+]]:gprc = LIS 16389 ; 32BIT: [[ORI1:%[0-9]+]]:gprc = ORI killed [[LIS1]], 48905 - ; 32BIT: STW killed [[ORI1]], 80, $r1 :: (store 4, align 8) - ; 32BIT: [[LWZtoc1:%[0-9]+]]:gprc = LWZtoc %const.1, $r2 :: (load 4 from got) - ; 32BIT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc1]] :: (load 16 from constant-pool) - ; 32BIT: [[LWZtoc2:%[0-9]+]]:gprc = LWZtoc %const.2, $r2 :: (load 4 from got) - ; 32BIT: [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 $zero, killed [[LWZtoc2]] :: (load 8 from constant-pool) + ; 32BIT: STW killed [[ORI1]], 80, $r1 :: (store (s32), align 8) + ; 32BIT: [[LWZtoc1:%[0-9]+]]:gprc = LWZtoc %const.1, $r2 :: (load (s32) from got) + ; 32BIT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc1]] :: (load (s128) from constant-pool) + ; 32BIT: [[LWZtoc2:%[0-9]+]]:gprc = LWZtoc %const.2, $r2 :: (load (s32) from got) + ; 32BIT: [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 $zero, killed [[LWZtoc2]] :: (load (s64) from constant-pool) ; 32BIT: [[LIS2:%[0-9]+]]:gprc = LIS 16393 ; 32BIT: [[ORI2:%[0-9]+]]:gprc = ORI killed [[LIS2]], 8697 ; 32BIT: [[LIS3:%[0-9]+]]:gprc = LIS 61467 ; 32BIT: [[ORI3:%[0-9]+]]:gprc = ORI killed [[LIS3]], 34414 - ; 32BIT: [[LWZtoc3:%[0-9]+]]:gprc = LWZtoc %const.3, $r2 :: (load 4 from got) - ; 32BIT: [[XFLOADf64_1:%[0-9]+]]:vsfrc = XFLOADf64 $zero, killed [[LWZtoc3]] :: (load 8 from constant-pool) + ; 32BIT: [[LWZtoc3:%[0-9]+]]:gprc = LWZtoc %const.3, $r2 :: (load (s32) from got) + ; 32BIT: [[XFLOADf64_1:%[0-9]+]]:vsfrc = XFLOADf64 $zero, killed [[LWZtoc3]] :: (load (s64) from constant-pool) ; 32BIT: [[LI1:%[0-9]+]]:gprc = LI 55 ; 32BIT: $r3 = COPY [[LI1]] ; 32BIT: $v2 = COPY [[LXVW4X1]] @@ -46,24 +46,24 @@ define void @caller() { ; 64BIT-LABEL: name: caller ; 64BIT: bb.0.entry: ; 64BIT: ADJCALLSTACKDOWN 120, 0, implicit-def dead $r1, implicit $r1 - ; 64BIT: [[LDtocCPT:%[0-9]+]]:g8rc = LDtocCPT %const.0, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT]] :: (load 16 from constant-pool) + ; 64BIT: [[LDtocCPT:%[0-9]+]]:g8rc = LDtocCPT %const.0, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT]] :: (load (s128) from constant-pool) ; 64BIT: [[LI8_:%[0-9]+]]:g8rc = LI8 96 - ; 64BIT: STXVW4X killed [[LXVW4X]], $x1, killed [[LI8_]] :: (store 16) + ; 64BIT: STXVW4X killed [[LXVW4X]], $x1, killed [[LI8_]] :: (store (s128)) ; 64BIT: [[LIS8_:%[0-9]+]]:g8rc = LIS8 16389 ; 64BIT: [[ORI8_:%[0-9]+]]:g8rc = ORI8 killed [[LIS8_]], 48905 ; 64BIT: [[RLDIC:%[0-9]+]]:g8rc = RLDIC killed [[ORI8_]], 32, 1 ; 64BIT: [[ORIS8_:%[0-9]+]]:g8rc = ORIS8 killed [[RLDIC]], 38314 ; 64BIT: [[ORI8_1:%[0-9]+]]:g8rc = ORI8 killed [[ORIS8_]], 63376 - ; 64BIT: STD killed [[ORI8_1]], 112, $x1 :: (store 8) - ; 64BIT: [[LDtocCPT1:%[0-9]+]]:g8rc = LDtocCPT %const.1, $x2 :: (load 8 from got) - ; 64BIT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT1]] :: (load 16 from constant-pool) - ; 64BIT: [[LD:%[0-9]+]]:g8rc = LD 104, $x1 :: (load 8) - ; 64BIT: [[LD1:%[0-9]+]]:g8rc = LD 96, $x1 :: (load 8) - ; 64BIT: [[LDtocCPT2:%[0-9]+]]:g8rc = LDtocCPT %const.2, $x2 :: (load 8 from got) - ; 64BIT: [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 $zero8, killed [[LDtocCPT2]] :: (load 8 from constant-pool) - ; 64BIT: [[LDtocCPT3:%[0-9]+]]:g8rc = LDtocCPT %const.3, $x2 :: (load 8 from got) - ; 64BIT: [[XFLOADf64_1:%[0-9]+]]:vsfrc = XFLOADf64 $zero8, killed [[LDtocCPT3]] :: (load 8 from constant-pool) + ; 64BIT: STD killed [[ORI8_1]], 112, $x1 :: (store (s64)) + ; 64BIT: [[LDtocCPT1:%[0-9]+]]:g8rc = LDtocCPT %const.1, $x2 :: (load (s64) from got) + ; 64BIT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT1]] :: (load (s128) from constant-pool) + ; 64BIT: [[LD:%[0-9]+]]:g8rc = LD 104, $x1 :: (load (s64)) + ; 64BIT: [[LD1:%[0-9]+]]:g8rc = LD 96, $x1 :: (load (s64)) + ; 64BIT: [[LDtocCPT2:%[0-9]+]]:g8rc = LDtocCPT %const.2, $x2 :: (load (s64) from got) + ; 64BIT: [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 $zero8, killed [[LDtocCPT2]] :: (load (s64) from constant-pool) + ; 64BIT: [[LDtocCPT3:%[0-9]+]]:g8rc = LDtocCPT %const.3, $x2 :: (load (s64) from got) + ; 64BIT: [[XFLOADf64_1:%[0-9]+]]:vsfrc = XFLOADf64 $zero8, killed [[LDtocCPT3]] :: (load (s64) from constant-pool) ; 64BIT: [[LIS8_1:%[0-9]+]]:g8rc = LIS8 16393 ; 64BIT: [[ORI8_2:%[0-9]+]]:g8rc = ORI8 killed [[LIS8_1]], 8697 ; 64BIT: [[RLDIC1:%[0-9]+]]:g8rc = RLDIC killed [[ORI8_2]], 32, 1 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir index c8ee10f7ac247..f650168d5877d 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc-symb.mir @@ -16,7 +16,7 @@ body: | bb.0: successors: %bb.0(0x20000000) liveins: $r2 - renamable $r3 = LWZtoc %jump-table.0, $r2 :: (load 4 from got) + renamable $r3 = LWZtoc %jump-table.0, $r2 :: (load (s32) from got) BLR implicit $lr, implicit $rm, implicit killed $r3 ... diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll index e8821cb915770..909eadc35ef7c 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll @@ -61,21 +61,21 @@ ; 32BIT-LABEL: body: | ; 32BIT-DAG: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 -; 32BIT-DAG: STW killed renamable $r4, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0) -; 32BIT-DAG: STW killed renamable $r5, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4) -; 32BIT-DAG: STW killed renamable $r6, 8, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r7, 12, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r8, 16, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r9, 20, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r10, 24, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.arg2) +; 32BIT-DAG: STW killed renamable $r4, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0) +; 32BIT-DAG: STW killed renamable $r5, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) +; 32BIT-DAG: STW killed renamable $r6, 8, %fixed-stack.0 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r7, 12, %fixed-stack.0 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r8, 16, %fixed-stack.0 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r9, 20, %fixed-stack.0 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r10, 24, %fixed-stack.0 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store (s32) into %ir.arg2) ; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 4 -; 32BIT-DAG: STW killed renamable $r11, 0, %stack.1.arg2 :: (store 4 into %ir.1) +; 32BIT-DAG: STW killed renamable $r11, 0, %stack.1.arg2 :: (store (s32) into %ir.1) ; 32BIT-DAG: renamable $r11 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW renamable $r11, 0, %stack.0.arg1 :: (store 4 into %ir.0) -; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.arg1) -; 32BIT-DAG: renamable $r6 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2) -; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4) +; 32BIT-DAG: STW renamable $r11, 0, %stack.0.arg1 :: (store (s32) into %ir.0) +; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) +; 32BIT-DAG: renamable $r6 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.2) +; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.4) ; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r6, killed renamable $r3 ; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 ; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $r3 @@ -161,9 +161,9 @@ ; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r10 ; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3 ; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW killed renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.arg1) +; 32BIT-DAG: STW killed renamable $r4, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) ; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r4 -; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4, align 8) +; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.4, align 8) ; 32BIT-DAG: renamable $r11 = LI 4 ; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $r3 @@ -231,14 +231,14 @@ ; 32BIT-LABEL: body: | ; 32BIT-DAG: liveins: $f1, $r5, $r6, $r7, $r8, $r9, $r10 ; 32BIT-DAG: renamable $r3 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW renamable $r5, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 16) -; 32BIT-DAG: STW renamable $r6, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4) -; 32BIT-DAG: STW killed renamable $r7, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8, align 8) -; 32BIT-DAG: STW killed renamable $r8, 12, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r9, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16, align 16) -; 32BIT-DAG: STW killed renamable $r10, 20, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW renamable $r3, 0, %stack.0.arg1 :: (store 4 into %ir.0) -; 32BIT-DAG: STW killed renamable $r3, 0, %stack.1.arg2 :: (store 4 into %ir.1) +; 32BIT-DAG: STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) +; 32BIT-DAG: STW renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) +; 32BIT-DAG: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8) +; 32BIT-DAG: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32)) +; 32BIT-DAG: STW killed renamable $r9, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16, align 16) +; 32BIT-DAG: STW killed renamable $r10, 20, %fixed-stack.0 :: (store (s32)) +; 32BIT-DAG: STW renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.0) +; 32BIT-DAG: STW killed renamable $r3, 0, %stack.1.arg2 :: (store (s32) into %ir.1) ; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $f1 ; ASM32-LABEL: .double_va_arg: @@ -323,8 +323,8 @@ ; 32BIT-LABEL: body: | ; 32BIT-DAG: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 ; 32BIT-DAG: renamable $r3 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW killed renamable $r3, 0, %stack.0.arg1 :: (store 4 into %ir.0) -; 32BIT-DAG: renamable $r3 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.argp.cur142, align 16) +; 32BIT-DAG: STW killed renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.0) +; 32BIT-DAG: renamable $r3 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142, align 16) ; 32BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm ; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm ; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm @@ -338,14 +338,14 @@ ; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f11, implicit $rm ; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f12, implicit $rm ; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f13, implicit $rm -; 32BIT-DAG: renamable $r4 = LWZ 4, %fixed-stack.0 :: (load 4 from %ir.argp.cur1 + 4) -; 32BIT-DAG: STW renamable $r4, 4, %stack.2 :: (store 4 into %stack.2 + 4) -; 32BIT-DAG: renamable $f1 = LFD 0, %stack.2 :: (load 8 from %stack.2) -; 32BIT-DAG: STW killed renamable $r3, 0, %stack.3 :: (store 4 into %stack.3, align 8) -; 32BIT-DAG: STW killed renamable $r4, 4, %stack.3 :: (store 4 into %stack.3 + 4) -; 32BIT-DAG: renamable $f2 = LFD 0, %stack.3 :: (load 8 from %stack.3) +; 32BIT-DAG: renamable $r4 = LWZ 4, %fixed-stack.0 :: (load (s32) from %ir.argp.cur1 + 4) +; 32BIT-DAG: STW renamable $r4, 4, %stack.2 :: (store (s32) into %stack.2 + 4) +; 32BIT-DAG: renamable $f1 = LFD 0, %stack.2 :: (load (s64) from %stack.2) +; 32BIT-DAG: STW killed renamable $r3, 0, %stack.3 :: (store (s32) into %stack.3, align 8) +; 32BIT-DAG: STW killed renamable $r4, 4, %stack.3 :: (store (s32) into %stack.3 + 4) +; 32BIT-DAG: renamable $f2 = LFD 0, %stack.3 :: (load (s64) from %stack.3) ; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm -; 32BIT-DAG: STW renamable $r3, 0, %stack.2 :: (store 4 into %stack.2, align 8) +; 32BIT-DAG: STW renamable $r3, 0, %stack.2 :: (store (s32) into %stack.2, align 8) ; 32BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm ; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $f1 diff --git a/llvm/test/CodeGen/PowerPC/aix32-crsave.mir b/llvm/test/CodeGen/PowerPC/aix32-crsave.mir index 5622c963d933f..cf51f79c7e989 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-crsave.mir +++ b/llvm/test/CodeGen/PowerPC/aix32-crsave.mir @@ -32,9 +32,9 @@ body: | ; CHECK: $r12 = MFCR implicit killed $cr2, implicit killed $cr4 ; CHECK-NEXT: STW killed $r12, 4, $r1 - ; CHECK-NEXT: STW killed $r29, -12, $r1 :: (store 4 into %fixed-stack.0) + ; CHECK-NEXT: STW killed $r29, -12, $r1 :: (store (s32) into %fixed-stack.0) - ; CHECK: $r29 = LWZ -12, $r1 :: (load 4 from %fixed-stack.0) + ; CHECK: $r29 = LWZ -12, $r1 :: (load (s32) from %fixed-stack.0) ; CHECK-NEXT: $r12 = LWZ 4, $r1 ; CHECK-NEXT: $cr2 = MTOCRF $r12 ; CHECK-NEXT: $cr4 = MTOCRF killed $r12 @@ -69,8 +69,8 @@ body: | ; CHECK: $r12 = MFCR implicit killed $cr3 ; CHECK-NEXT: STW killed $r12, 4, $r1 - ; CHECK-NEXT: STW killed $r14, -72, $r1 :: (store 4 into %fixed-stack.0, align 8) + ; CHECK-NEXT: STW killed $r14, -72, $r1 :: (store (s32) into %fixed-stack.0, align 8) - ; CHECK: $r14 = LWZ -72, $r1 :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: $r14 = LWZ -72, $r1 :: (load (s32) from %fixed-stack.0, align 8) ; CHECK-NEXT: $r12 = LWZ 4, $r1 ; CHECK-NEXT: $cr3 = MTOCRF killed $r12 diff --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll index bd388e1a8a4f8..c60329ee81e5f 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee-split.ll @@ -13,11 +13,11 @@ define <4 x i32> @split_spill(double %d1, double %d2, double %d3, ...) { ; CHECK: liveins: $r9, $r10 ; CHECK: [[COPY:%[0-9]+]]:gprc = COPY $r10 ; CHECK: [[COPY1:%[0-9]+]]:gprc = COPY $r9 - ; CHECK: STW [[COPY1]], 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0, align 16) - ; CHECK: STW [[COPY]], 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4) + ; CHECK: STW [[COPY1]], 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) + ; CHECK: STW [[COPY]], 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) ; CHECK: LIFETIME_START %stack.0.arg_list ; CHECK: [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0 - ; CHECK: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[ADDI]] :: (load 16 from %ir.4) + ; CHECK: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[ADDI]] :: (load (s128) from %ir.4) ; CHECK: LIFETIME_END %stack.0.arg_list ; CHECK: $v2 = COPY [[LXVW4X]] ; CHECK: BLR implicit $lr, implicit $rm, implicit $v2 diff --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll index aac50e524bb49..33a4836e6c6c0 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-callee.ll @@ -15,19 +15,19 @@ define <4 x i32> @callee(i32 %count, ...) { ; CHECK: [[COPY4:%[0-9]+]]:gprc = COPY $r6 ; CHECK: [[COPY5:%[0-9]+]]:gprc = COPY $r5 ; CHECK: [[COPY6:%[0-9]+]]:gprc = COPY $r4 - ; CHECK: STW [[COPY6]], 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0) - ; CHECK: STW [[COPY5]], 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4) - ; CHECK: STW [[COPY4]], 8, %fixed-stack.0 :: (store 4) - ; CHECK: STW [[COPY3]], 12, %fixed-stack.0 :: (store 4) - ; CHECK: STW [[COPY2]], 16, %fixed-stack.0 :: (store 4) - ; CHECK: STW [[COPY1]], 20, %fixed-stack.0 :: (store 4) - ; CHECK: STW [[COPY]], 24, %fixed-stack.0 :: (store 4) + ; CHECK: STW [[COPY6]], 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0) + ; CHECK: STW [[COPY5]], 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) + ; CHECK: STW [[COPY4]], 8, %fixed-stack.0 :: (store (s32)) + ; CHECK: STW [[COPY3]], 12, %fixed-stack.0 :: (store (s32)) + ; CHECK: STW [[COPY2]], 16, %fixed-stack.0 :: (store (s32)) + ; CHECK: STW [[COPY1]], 20, %fixed-stack.0 :: (store (s32)) + ; CHECK: STW [[COPY]], 24, %fixed-stack.0 :: (store (s32)) ; CHECK: LIFETIME_START %stack.0.arg_list ; CHECK: [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0 - ; CHECK: STW killed [[ADDI]], 0, %stack.0.arg_list :: (store 4 into %ir.0) + ; CHECK: STW killed [[ADDI]], 0, %stack.0.arg_list :: (store (s32) into %ir.0) ; CHECK: [[ADDI1:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 15 ; CHECK: [[RLWINM:%[0-9]+]]:gprc = RLWINM killed [[ADDI1]], 0, 0, 27 - ; CHECK: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[RLWINM]] :: (load 16 from %ir.4) + ; CHECK: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[RLWINM]] :: (load (s128) from %ir.4) ; CHECK: LIFETIME_END %stack.0.arg_list ; CHECK: $v2 = COPY [[LXVW4X]] ; CHECK: BLR implicit $lr, implicit $rm, implicit $v2 diff --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll index 96fe50825c473..a15e7bb061134 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-caller-split.ll @@ -11,17 +11,17 @@ entry: declare <4 x i32> @split_spill(double, double, double, ...) ; CHECK: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 -; CHECK: [[VECCONSTADDR:%[0-9]+]]:gprc = LWZtoc %const.0, $r2 :: (load 4 from got) -; CHECK: [[VECCONST:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[VECCONSTADDR]] :: (load 16 from constant-pool) +; CHECK: [[VECCONSTADDR:%[0-9]+]]:gprc = LWZtoc %const.0, $r2 :: (load (s32) from got) +; CHECK: [[VECCONST:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[VECCONSTADDR]] :: (load (s128) from constant-pool) ; CHECK: [[STACKOFFSET:%[0-9]+]]:gprc = LI 48 -; CHECK: STXVW4X killed [[VECCONST]], $r1, killed [[STACKOFFSET]] :: (store 16) -; CHECK-DAG: [[ELEMENT1:%[0-9]+]]:gprc = LWZ 48, $r1 :: (load 4) -; CHECK-DAG: [[ELEMENT2:%[0-9]+]]:gprc = LWZ 52, $r1 :: (load 4) -; CHECK: [[FLOAT1ADDR:%[0-9]+]]:gprc_and_gprc_nor0 = LWZtoc %const.1, $r2 :: (load 4 from got) -; CHECK: [[FLOAT1:%[0-9]+]]:f4rc = LFS 0, killed [[FLOAT1ADDR]] :: (load 4 from constant-pool) +; CHECK: STXVW4X killed [[VECCONST]], $r1, killed [[STACKOFFSET]] :: (store (s128)) +; CHECK-DAG: [[ELEMENT1:%[0-9]+]]:gprc = LWZ 48, $r1 :: (load (s32)) +; CHECK-DAG: [[ELEMENT2:%[0-9]+]]:gprc = LWZ 52, $r1 :: (load (s32)) +; CHECK: [[FLOAT1ADDR:%[0-9]+]]:gprc_and_gprc_nor0 = LWZtoc %const.1, $r2 :: (load (s32) from got) +; CHECK: [[FLOAT1:%[0-9]+]]:f4rc = LFS 0, killed [[FLOAT1ADDR]] :: (load (s32) from constant-pool) ; CHECK: [[DOUBLE1:%[0-9]+]]:f8rc = COPY [[FLOAT1]] -; CHECK: [[FLOAT2ADDR:%[0-9]+]]:gprc_and_gprc_nor0 = LWZtoc %const.2, $r2 :: (load 4 from got) -; CHECK: [[FLOAT2:%[0-9]+]]:f4rc = LFS 0, killed [[FLOAT2ADDR]] :: (load 4 from constant-pool) +; CHECK: [[FLOAT2ADDR:%[0-9]+]]:gprc_and_gprc_nor0 = LWZtoc %const.2, $r2 :: (load (s32) from got) +; CHECK: [[FLOAT2:%[0-9]+]]:f4rc = LFS 0, killed [[FLOAT2ADDR]] :: (load (s32) from constant-pool) ; CHECK: [[DOUBLE2:%[0-9]+]]:f8rc = COPY [[FLOAT2]] ; CHECK: [[DZERO:%[0-9]+]]:vsfrc = XXLXORdpz diff --git a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll index aad4177753365..dcf5f6b5e82b6 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-vector-vararg-fixed-callee.ll @@ -11,11 +11,11 @@ define double @callee(i32 %count, <4 x i32> %vsi, double %next, ...) { ; CHECK: bb.0.entry: ; CHECK: LIFETIME_START %stack.0.arg_list ; CHECK: [[ADDI:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 0 - ; CHECK: STW killed [[ADDI]], 0, %stack.0.arg_list :: (store 4 into %ir.0) + ; CHECK: STW killed [[ADDI]], 0, %stack.0.arg_list :: (store (s32) into %ir.0) ; CHECK: [[ADDI1:%[0-9]+]]:gprc = ADDI %fixed-stack.0, 15 ; CHECK: [[RLWINM:%[0-9]+]]:gprc_and_gprc_nor0 = RLWINM killed [[ADDI1]], 0, 0, 27 ; CHECK: [[ADDI2:%[0-9]+]]:gprc = nuw ADDI killed [[RLWINM]], 16 - ; CHECK: [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 $zero, killed [[ADDI2]] :: (load 8 from %ir.4, align 16) + ; CHECK: [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 $zero, killed [[ADDI2]] :: (load (s64) from %ir.4, align 16) ; CHECK: LIFETIME_END %stack.0.arg_list ; CHECK: $f1 = COPY [[XFLOADf64_]] ; CHECK: BLR implicit $lr, implicit $rm, implicit $f1 diff --git a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll index 4c2daf3b4ecfc..96d2826add10c 100644 --- a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll @@ -54,23 +54,23 @@ ; 64BIT-LABEL: body: | ; 64BIT-DAG: bb.0.entry: ; 64BIT-DAG: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 -; 64BIT-DAG: STD killed renamable $x4, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0) -; 64BIT-DAG: STD killed renamable $x5, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8) -; 64BIT-DAG: STD killed renamable $x6, 16, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD killed renamable $x7, 24, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD killed renamable $x8, 32, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD killed renamable $x9, 40, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD killed renamable $x10, 48, %fixed-stack.0 :: (store 8) +; 64BIT-DAG: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) +; 64BIT-DAG: STD killed renamable $x5, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) +; 64BIT-DAG: STD killed renamable $x6, 16, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x7, 24, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x8, 32, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x9, 40, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x10, 48, %fixed-stack.0 :: (store (s64)) ; 64BIT-DAG: renamable $x11 = ADDI8 %fixed-stack.0, 0 -; 64BIT-DAG: STD renamable $x11, 0, %stack.1.arg2 :: (store 8 into %ir.1) -; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load 8 from %ir.arg2) +; 64BIT-DAG: STD renamable $x11, 0, %stack.1.arg2 :: (store (s64) into %ir.1) +; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2) ; 64BIT-DAG: renamable $x9 = ADDI8 renamable $x6, 4 ; 64BIT-DAG: renamable $x7 = ADDI8 %fixed-stack.0, 4 -; 64BIT-DAG: renamable $r8 = LWZ 0, %fixed-stack.0 :: (load 4 from %fixed-stack.0, align 8) -; 64BIT-DAG: STD killed renamable $x11, 0, %stack.0.arg1 :: (store 8 into %ir.0) -; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store 8 into %ir.arg1) -; 64BIT-DAG: STD killed renamable $x9, 0, %stack.1.arg2 :: (store 8 into %ir.arg2) -; 64BIT-DAG: renamable $r4 = LWZ 0, killed renamable $x6 :: (load 4) +; 64BIT-DAG: renamable $r8 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 8) +; 64BIT-DAG: STD killed renamable $x11, 0, %stack.0.arg1 :: (store (s64) into %ir.0) +; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) +; 64BIT-DAG: STD killed renamable $x9, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) +; 64BIT-DAG: renamable $r4 = LWZ 0, killed renamable $x6 :: (load (s32)) ; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r8, renamable $r3, implicit killed $x3 ; 64BIT-DAG: renamable $r4 = RLWINM killed renamable $r4, 1, 0, 30 ; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4, implicit-def $x3 @@ -148,7 +148,7 @@ ; 64BIT-LABEL: body: | ; 64BIT-DAG: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 -; 64BIT-DAG: renamable $r11 = LWZ 0, %fixed-stack.0 :: (load 4 from %fixed-stack.0, align 16) +; 64BIT-DAG: renamable $r11 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 16) ; 64BIT-DAG: renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4 ; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 ; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r6, implicit killed $x6 @@ -217,22 +217,22 @@ ; 64BIT-LABEL: body: | ; 64BIT-DAG: liveins: $f1, $x4, $x5, $x6, $x7, $x8, $x9, $x10 ; 64BIT-DAG: renamable $x3 = ADDI8 %fixed-stack.0, 0 -; 64BIT-DAG: STD killed renamable $x4, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0) -; 64BIT-DAG: STD killed renamable $x5, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8) -; 64BIT-DAG: STD killed renamable $x6, 16, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD killed renamable $x7, 24, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD killed renamable $x8, 32, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD killed renamable $x9, 40, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD killed renamable $x10, 48, %fixed-stack.0 :: (store 8) -; 64BIT-DAG: STD renamable $x3, 0, %stack.1.arg2 :: (store 8 into %ir.1) -; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load 8 from %ir.arg2) +; 64BIT-DAG: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) +; 64BIT-DAG: STD killed renamable $x5, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) +; 64BIT-DAG: STD killed renamable $x6, 16, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x7, 24, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x8, 32, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x9, 40, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD killed renamable $x10, 48, %fixed-stack.0 :: (store (s64)) +; 64BIT-DAG: STD renamable $x3, 0, %stack.1.arg2 :: (store (s64) into %ir.1) +; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2) ; 64BIT-DAG: renamable $x7 = ADDI8 %fixed-stack.0, 8 -; 64BIT-DAG: STD killed renamable $x3, 0, %stack.0.arg1 :: (store 8 into %ir.0) -; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store 8 into %ir.arg1) -; 64BIT-DAG: renamable $f0 = LFD 0, %fixed-stack.0 :: (load 8) +; 64BIT-DAG: STD killed renamable $x3, 0, %stack.0.arg1 :: (store (s64) into %ir.0) +; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) +; 64BIT-DAG: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64)) ; 64BIT-DAG: renamable $x3 = ADDI8 renamable $x6, 8 -; 64BIT-DAG: STD killed renamable $x3, 0, %stack.1.arg2 :: (store 8 into %ir.arg2) -; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x6 :: (load 8) +; 64BIT-DAG: STD killed renamable $x3, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) +; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x6 :: (load (s64)) ; 64BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm ; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm ; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm @@ -319,7 +319,7 @@ ; 64BIT-LABEL: body: | ; 64BIT-DAG: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 -; 64BIT-DAG: renamable $f0 = LFD 0, %fixed-stack.0 :: (load 8) +; 64BIT-DAG: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64)) ; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm ; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm ; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm diff --git a/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll index 97c1490e7ebb4..7c79796880e6b 100644 --- a/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll +++ b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll @@ -21,8 +21,8 @@ entry: ; CHECK-NEXT: - { id: 0, type: default, offset: 48, size: 8, alignment: 16, ; CHECK: bb.0.entry: ; CHECK-NEXT: liveins: $x3 -; CHECK: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16) -; CHECK-NEXT: renamable $x3 = LBZ8 4, %fixed-stack.0 :: (dereferenceable load 1 +; CHECK: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) +; CHECK-NEXT: renamable $x3 = LBZ8 4, %fixed-stack.0 :: (dereferenceable load (s8) ; CHECKASM-LABEL: .test_byval_5Byte: @@ -46,8 +46,8 @@ entry: ; CHECK-NEXT: - { id: 0, type: default, offset: 48, size: 8, alignment: 16, ; CHECK: bb.0.entry: ; CHECK-NEXT: liveins: $x3 -; CHECK: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16) -; CHECK-NEXT: renamable $x3 = LBZ8 5, %fixed-stack.0 :: (dereferenceable load 1 +; CHECK: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) +; CHECK-NEXT: renamable $x3 = LBZ8 5, %fixed-stack.0 :: (dereferenceable load (s8) ; CHECKASM-LABEL: .test_byval_6Byte: @@ -71,8 +71,8 @@ entry: ; CHECK-NEXT: - { id: 0, type: default, offset: 48, size: 8, alignment: 16, ; CHECK: bb.0.entry: ; CHECK-NEXT: liveins: $x3 -; CHECK: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16) -; CHECK-NEXT: renamable $x3 = LBZ8 6, %fixed-stack.0 :: (dereferenceable load 1 +; CHECK: STD killed renamable $x3, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) +; CHECK-NEXT: renamable $x3 = LBZ8 6, %fixed-stack.0 :: (dereferenceable load (s8) ; CHECKASM-LABEL: .test_byval_7Byte: @@ -98,7 +98,7 @@ entry: ; CHECK-NEXT: liveins: $x3 ; CHECK: renamable $x[[SCRATCH:[0-9]+]] = COPY $x3 ; CHECK-DAG: renamable $x3 = RLDICL $x3, 0, 56 -; CHECK-DAG: STD killed renamable $x[[SCRATCH]], 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0, align 16) +; CHECK-DAG: STD killed renamable $x[[SCRATCH]], 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) ; CHECKASM-LABEL: .test_byval_8Byte: @@ -125,15 +125,15 @@ declare void @test_byval_64Byte(%struct.S64* byval(%struct.S64) align 1) ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. ; CHECK: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; CHECK-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS64, $x2 :: (load 8 from got) -; CHECK-DAG: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8) -; CHECK-DAG: renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load 8) -; CHECK-DAG: renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load 8) -; CHECK-DAG: renamable $x6 = LD 24, renamable $x[[REGADDR]] :: (load 8) -; CHECK-DAG: renamable $x7 = LD 32, renamable $x[[REGADDR]] :: (load 8) -; CHECK-DAG: renamable $x8 = LD 40, renamable $x[[REGADDR]] :: (load 8) -; CHECK-DAG: renamable $x9 = LD 48, renamable $x[[REGADDR]] :: (load 8) -; CHECK-DAG: renamable $x10 = LD 56, renamable $x[[REGADDR]] :: (load 8) +; CHECK-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS64, $x2 :: (load (s64) from got) +; CHECK-DAG: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load (s64)) +; CHECK-DAG: renamable $x4 = LD 8, renamable $x[[REGADDR]] :: (load (s64)) +; CHECK-DAG: renamable $x5 = LD 16, renamable $x[[REGADDR]] :: (load (s64)) +; CHECK-DAG: renamable $x6 = LD 24, renamable $x[[REGADDR]] :: (load (s64)) +; CHECK-DAG: renamable $x7 = LD 32, renamable $x[[REGADDR]] :: (load (s64)) +; CHECK-DAG: renamable $x8 = LD 40, renamable $x[[REGADDR]] :: (load (s64)) +; CHECK-DAG: renamable $x9 = LD 48, renamable $x[[REGADDR]] :: (load (s64)) +; CHECK-DAG: renamable $x10 = LD 56, renamable $x[[REGADDR]] :: (load (s64)) ; CHECK-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 ; CHECK-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 diff --git a/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll index 056bb90c68134..b0181c2e7bd7d 100644 --- a/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll +++ b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-callee.ll @@ -15,19 +15,19 @@ define <4 x i32> @callee(i32 signext %count, ...) { ; CHECK: [[COPY4:%[0-9]+]]:g8rc = COPY $x6 ; CHECK: [[COPY5:%[0-9]+]]:g8rc = COPY $x5 ; CHECK: [[COPY6:%[0-9]+]]:g8rc = COPY $x4 - ; CHECK: STD [[COPY6]], 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0) - ; CHECK: STD [[COPY5]], 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8) - ; CHECK: STD [[COPY4]], 16, %fixed-stack.0 :: (store 8) - ; CHECK: STD [[COPY3]], 24, %fixed-stack.0 :: (store 8) - ; CHECK: STD [[COPY2]], 32, %fixed-stack.0 :: (store 8) - ; CHECK: STD [[COPY1]], 40, %fixed-stack.0 :: (store 8) - ; CHECK: STD [[COPY]], 48, %fixed-stack.0 :: (store 8) + ; CHECK: STD [[COPY6]], 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) + ; CHECK: STD [[COPY5]], 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) + ; CHECK: STD [[COPY4]], 16, %fixed-stack.0 :: (store (s64)) + ; CHECK: STD [[COPY3]], 24, %fixed-stack.0 :: (store (s64)) + ; CHECK: STD [[COPY2]], 32, %fixed-stack.0 :: (store (s64)) + ; CHECK: STD [[COPY1]], 40, %fixed-stack.0 :: (store (s64)) + ; CHECK: STD [[COPY]], 48, %fixed-stack.0 :: (store (s64)) ; CHECK: LIFETIME_START %stack.0.arg_list ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 0 - ; CHECK: STD killed [[ADDI8_]], 0, %stack.0.arg_list :: (store 8 into %ir.0) + ; CHECK: STD killed [[ADDI8_]], 0, %stack.0.arg_list :: (store (s64) into %ir.0) ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 15 ; CHECK: [[RLDICR:%[0-9]+]]:g8rc = RLDICR killed [[ADDI8_1]], 0, 59 - ; CHECK: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[RLDICR]] :: (load 16 from %ir.4) + ; CHECK: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[RLDICR]] :: (load (s128) from %ir.4) ; CHECK: LIFETIME_END %stack.0.arg_list ; CHECK: $v2 = COPY [[LXVW4X]] ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $v2 diff --git a/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll index a1da0b099340f..98eafff6068a0 100644 --- a/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll +++ b/llvm/test/CodeGen/PowerPC/aix64-vector-vararg-fixed-callee.ll @@ -13,16 +13,16 @@ define double @callee(i32 signext %count, <4 x i32> %vsi, double %next, ...) { ; CHECK: [[COPY:%[0-9]+]]:g8rc = COPY $x10 ; CHECK: [[COPY1:%[0-9]+]]:g8rc = COPY $x9 ; CHECK: [[COPY2:%[0-9]+]]:g8rc = COPY $x8 - ; CHECK: STD [[COPY2]], 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0) - ; CHECK: STD [[COPY1]], 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8) - ; CHECK: STD [[COPY]], 16, %fixed-stack.0 :: (store 8) + ; CHECK: STD [[COPY2]], 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) + ; CHECK: STD [[COPY1]], 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) + ; CHECK: STD [[COPY]], 16, %fixed-stack.0 :: (store (s64)) ; CHECK: LIFETIME_START %stack.0.arg_list ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 0 - ; CHECK: STD killed [[ADDI8_]], 0, %stack.0.arg_list :: (store 8 into %ir.0) + ; CHECK: STD killed [[ADDI8_]], 0, %stack.0.arg_list :: (store (s64) into %ir.0) ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 %fixed-stack.0, 15 ; CHECK: [[RLDICR:%[0-9]+]]:g8rc_and_g8rc_nox0 = RLDICR killed [[ADDI8_1]], 0, 59 ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 16 - ; CHECK: [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 killed [[RLDICR]], killed [[LI8_]] :: (load 8 from %ir.4, align 16) + ; CHECK: [[XFLOADf64_:%[0-9]+]]:vsfrc = XFLOADf64 killed [[RLDICR]], killed [[LI8_]] :: (load (s64) from %ir.4, align 16) ; CHECK: LIFETIME_END %stack.0.arg_list ; CHECK: $f1 = COPY [[XFLOADf64_]] ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $f1 diff --git a/llvm/test/CodeGen/PowerPC/block-placement-1.mir b/llvm/test/CodeGen/PowerPC/block-placement-1.mir index 26109354f6565..cff7f33d9de17 100644 --- a/llvm/test/CodeGen/PowerPC/block-placement-1.mir +++ b/llvm/test/CodeGen/PowerPC/block-placement-1.mir @@ -209,8 +209,8 @@ body: | CFI_INSTRUCTION offset $lr8, 16 CFI_INSTRUCTION offset $x29, -24 CFI_INSTRUCTION offset $x30, -16 - STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.1) - STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.0, align 16) + STD killed $x29, -24, $x1 :: (store (s64) into %fixed-stack.1) + STD killed $x30, -16, $x1 :: (store (s64) into %fixed-stack.0, align 16) STD killed $x0, 16, $x1 $x1 = STDU $x1, -64, $x1 renamable $r29 = LI 10 @@ -284,8 +284,8 @@ body: | $x1 = ADDI8 $x1, 64 $x0 = LD 16, $x1 MTLR8 killed $x0, implicit-def $lr8 - $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.0, align 16) - $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.1) + $x30 = LD -16, $x1 :: (load (s64) from %fixed-stack.0, align 16) + $x29 = LD -24, $x1 :: (load (s64) from %fixed-stack.1) BLR8 implicit $lr8, implicit $rm bb.10.ehcleanup: diff --git a/llvm/test/CodeGen/PowerPC/block-placement.mir b/llvm/test/CodeGen/PowerPC/block-placement.mir index cb6ceb4066f7a..fa32064ffc65d 100644 --- a/llvm/test/CodeGen/PowerPC/block-placement.mir +++ b/llvm/test/CodeGen/PowerPC/block-placement.mir @@ -5,17 +5,17 @@ source_filename = "block-placement.ll" target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" - + %"class.xercesc_2_7::HashXMLCh" = type { %"class.xercesc_2_7::HashBase" } %"class.xercesc_2_7::HashBase" = type { i32 (...)** } - + define dso_local zeroext i1 @_ZN11xercesc_2_79HashXMLCh6equalsEPKvS2_(%"class.xercesc_2_7::HashXMLCh"* nocapture readnone %this, i8* readonly %key1, i8* readonly %key2) unnamed_addr #0 { entry: %cmp.i = icmp eq i8* %key1, null %cmp1.i = icmp eq i8* %key2, null %or.cond.i = or i1 %cmp.i, %cmp1.i br i1 %or.cond.i, label %if.then.i, label %while.cond.preheader.i - + while.cond.preheader.i: ; preds = %entry %0 = bitcast i8* %key2 to i16* %1 = bitcast i8* %key1 to i16* @@ -23,33 +23,33 @@ %3 = load i16, i16* %0, align 2 %cmp926.i = icmp eq i16 %2, %3 br i1 %cmp926.i, label %while.body.i.preheader, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit - + while.body.i.preheader: ; preds = %while.cond.preheader.i %scevgep = getelementptr i8, i8* %key2, i64 2 %scevgep4 = getelementptr i8, i8* %key1, i64 2 br label %while.body.i - + if.then.i: ; preds = %entry br i1 %cmp.i, label %lor.lhs.false3.i, label %land.lhs.true.i - + land.lhs.true.i: ; preds = %if.then.i %4 = bitcast i8* %key1 to i16* %5 = load i16, i16* %4, align 2 %tobool.i = icmp eq i16 %5, 0 br i1 %tobool.i, label %lor.lhs.false3.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit - + lor.lhs.false3.i: ; preds = %land.lhs.true.i, %if.then.i br i1 %cmp1.i, label %if.else.i, label %land.lhs.true5.i - + land.lhs.true5.i: ; preds = %lor.lhs.false3.i %6 = bitcast i8* %key2 to i16* %7 = load i16, i16* %6, align 2 %tobool6.i = icmp eq i16 %7, 0 br i1 %tobool6.i, label %if.else.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit - + if.else.i: ; preds = %land.lhs.true5.i, %lor.lhs.false3.i br label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit - + while.body.i: ; preds = %while.body.i.preheader, %if.end12.i %lsr.iv5 = phi i8* [ %scevgep4, %while.body.i.preheader ], [ %scevgep6, %if.end12.i ] %lsr.iv = phi i8* [ %scevgep, %while.body.i.preheader ], [ %scevgep2, %if.end12.i ] @@ -62,7 +62,7 @@ %14 = bitcast i8* %13 to i16* %tobool10.i = icmp eq i16 %8, 0 br i1 %tobool10.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit, label %if.end12.i - + if.end12.i: ; preds = %while.body.i %15 = load i16, i16* %14, align 2 %16 = load i16, i16* %12, align 2 @@ -70,13 +70,13 @@ %scevgep2 = getelementptr i8, i8* %lsr.iv, i64 2 %scevgep6 = getelementptr i8, i8* %lsr.iv5, i64 2 br i1 %cmp9.i, label %while.body.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit - + _ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit: ; preds = %if.end12.i, %while.body.i, %if.else.i, %land.lhs.true5.i, %land.lhs.true.i, %while.cond.preheader.i %retval.0.i1 = phi i64 [ 1, %if.else.i ], [ 0, %land.lhs.true.i ], [ 0, %land.lhs.true5.i ], [ 0, %while.cond.preheader.i ], [ 0, %if.end12.i ], [ 1, %while.body.i ] %backToBool = trunc i64 %retval.0.i1 to i1 ret i1 %backToBool } - + attributes #0 = { "target-cpu"="pwr9" } ... @@ -122,90 +122,90 @@ body: | bb.0.entry: successors: %bb.5(0x40000000), %bb.1(0x40000000) liveins: $x4, $x5 - + renamable $cr1 = CMPDI renamable $x4, 0 renamable $cr0 = CMPDI renamable $x5, 0 renamable $cr5lt = CROR renamable $cr1eq, renamable $cr0eq BC killed renamable $cr5lt, %bb.5 - + bb.1.while.cond.preheader.i: successors: %bb.2(0x40000000), %bb.11(0x40000000) liveins: $x4, $x5 - - renamable $r8 = LHZ 0, renamable $x4 :: (load 2 from %ir.1) - renamable $r6 = LHZ 0, renamable $x5 :: (load 2 from %ir.0) + + renamable $r8 = LHZ 0, renamable $x4 :: (load (s16) from %ir.1) + renamable $r6 = LHZ 0, renamable $x5 :: (load (s16) from %ir.0) renamable $x3 = LI8 0 renamable $cr0 = CMPLW renamable $r8, killed renamable $r6 BCC 68, killed renamable $cr0, %bb.11 - + bb.2.while.body.i.preheader: successors: %bb.3(0x80000000) liveins: $r8, $x3, $x4, $x5 - + renamable $x6 = ADDI8 renamable $x5, 2 renamable $x7 = ADDI8 renamable $x4, 2 - + bb.3.while.body.i: successors: %bb.4(0x04000000), %bb.10(0x7c000000) liveins: $r8, $x3, $x4, $x5, $x6, $x7 - + dead renamable $r8 = ANDI_rec killed renamable $r8, 65535, implicit-def $cr0 BCC 68, killed renamable $cr0, %bb.10 - + bb.4: renamable $x3 = LI8 1 BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - + bb.5.if.then.i: successors: %bb.7(0x30000000), %bb.6(0x50000000) liveins: $cr0, $cr1, $x4, $x5 - + BC killed renamable $cr1eq, %bb.7 - + bb.6.land.lhs.true.i: successors: %bb.7(0x30000000), %bb.11(0x50000000) liveins: $cr0, $x4, $x5 - - renamable $r4 = LHZ 0, killed renamable $x4 :: (load 2 from %ir.4) + + renamable $r4 = LHZ 0, killed renamable $x4 :: (load (s16) from %ir.4) renamable $x3 = LI8 0 renamable $cr1 = CMPLWI killed renamable $r4, 0 BCC 68, killed renamable $cr1, %bb.11 - + bb.7.lor.lhs.false3.i: successors: %bb.9(0x30000000), %bb.8(0x50000000) liveins: $cr0, $x5 - + BC killed renamable $cr0eq, %bb.9 - + bb.8.land.lhs.true5.i: successors: %bb.9(0x80000000) liveins: $x5 - - renamable $r4 = LHZ 0, killed renamable $x5 :: (load 2 from %ir.6) + + renamable $r4 = LHZ 0, killed renamable $x5 :: (load (s16) from %ir.6) renamable $x3 = LI8 0 renamable $cr0 = CMPLWI killed renamable $r4, 0 BCCLR 68, killed renamable $cr0, implicit $lr, implicit $rm, implicit killed $x3 - + bb.9.if.else.i: renamable $x3 = LI8 1 BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - + bb.10.if.end12.i: successors: %bb.3(0x7c000000), %bb.11(0x04000000) liveins: $x3, $x4, $x5, $x6, $x7 - + renamable $x5 = ADDI8 killed renamable $x5, 2 renamable $x4 = ADDI8 killed renamable $x4, 2 - renamable $r8 = LHZ 0, renamable $x4 :: (load 2 from %ir.14) - renamable $r9 = LHZ 0, renamable $x5 :: (load 2 from %ir.12) + renamable $r8 = LHZ 0, renamable $x4 :: (load (s16) from %ir.14) + renamable $r9 = LHZ 0, renamable $x5 :: (load (s16) from %ir.12) renamable $x6 = ADDI8 killed renamable $x6, 2 renamable $x7 = ADDI8 killed renamable $x7, 2 renamable $cr0 = CMPLW renamable $r8, killed renamable $r9 BCC 76, killed renamable $cr0, %bb.3 - + bb.11._ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit: liveins: $x3 - + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 ; CHECK: bb.5.if.else.i: diff --git a/llvm/test/CodeGen/PowerPC/botheightreduce.mir b/llvm/test/CodeGen/PowerPC/botheightreduce.mir index 7a2220cda31c8..709cf08c8382c 100644 --- a/llvm/test/CodeGen/PowerPC/botheightreduce.mir +++ b/llvm/test/CodeGen/PowerPC/botheightreduce.mir @@ -26,16 +26,16 @@ body: | ; CHECK: [[LI8_6:%[0-9]+]]:g8rc = LI8 7 ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[LD:%[0-9]+]]:g8rc = LD 0, [[ADDI8_]] :: (load 8) - ; CHECK: [[LDX:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_]] :: (load 8) - ; CHECK: [[LDX1:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_3]] :: (load 8) - ; CHECK: [[LD1:%[0-9]+]]:g8rc = LD 4, [[ADDI8_]] :: (load 8) - ; CHECK: [[LDX2:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_4]] :: (load 8) - ; CHECK: [[LDX3:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_5]] :: (load 8) - ; CHECK: [[LDX4:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_6]] :: (load 8) - ; CHECK: [[LD2:%[0-9]+]]:g8rc = LD 8, [[ADDI8_]] :: (load 8) + ; CHECK: [[LD:%[0-9]+]]:g8rc = LD 0, [[ADDI8_]] :: (load (s64)) + ; CHECK: [[LDX:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_]] :: (load (s64)) + ; CHECK: [[LDX1:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_3]] :: (load (s64)) + ; CHECK: [[LD1:%[0-9]+]]:g8rc = LD 4, [[ADDI8_]] :: (load (s64)) + ; CHECK: [[LDX2:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_4]] :: (load (s64)) + ; CHECK: [[LDX3:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_5]] :: (load (s64)) + ; CHECK: [[LDX4:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_6]] :: (load (s64)) + ; CHECK: [[LD2:%[0-9]+]]:g8rc = LD 8, [[ADDI8_]] :: (load (s64)) ; CHECK: [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]] - ; CHECK: [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8) + ; CHECK: [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load (s64)) ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1 ; CHECK: [[MULLD1:%[0-9]+]]:g8rc = MULLD [[MULLD]], [[LDX5]] ; CHECK: [[MULLD2:%[0-9]+]]:g8rc = MULLD [[MULLD1]], [[LDX1]] @@ -67,16 +67,16 @@ body: | bb.1: %12:g8rc = ADDI8 %2, 1 - %13:g8rc = LD 0, %2 :: (load 8) - %14:g8rc = LDX %2, %4 :: (load 8) - %16:g8rc = LDX %2, %8 :: (load 8) - %17:g8rc = LD 4, %2 :: (load 8) - %18:g8rc = LDX %2, %9 :: (load 8) - %19:g8rc = LDX %2, %10 :: (load 8) - %20:g8rc = LDX %2, %11 :: (load 8) - %21:g8rc = LD 8, %2 :: (load 8) + %13:g8rc = LD 0, %2 :: (load (s64)) + %14:g8rc = LDX %2, %4 :: (load (s64)) + %16:g8rc = LDX %2, %8 :: (load (s64)) + %17:g8rc = LD 4, %2 :: (load (s64)) + %18:g8rc = LDX %2, %9 :: (load (s64)) + %19:g8rc = LDX %2, %10 :: (load (s64)) + %20:g8rc = LDX %2, %11 :: (load (s64)) + %21:g8rc = LD 8, %2 :: (load (s64)) %22:g8rc = MULLD %14, %13 - %15:g8rc = LDX %2, %7 :: (load 8) + %15:g8rc = LDX %2, %7 :: (load (s64)) %23:g8rc = MULLD %22, %15 %24:g8rc = MULLD %23, %16 %25:g8rc = MULLD %24, %17 diff --git a/llvm/test/CodeGen/PowerPC/byval-agg-info.ll b/llvm/test/CodeGen/PowerPC/byval-agg-info.ll index d78ea09d37e14..d4088a7d2b840 100644 --- a/llvm/test/CodeGen/PowerPC/byval-agg-info.ll +++ b/llvm/test/CodeGen/PowerPC/byval-agg-info.ll @@ -12,6 +12,6 @@ entry: } ; Make sure that the MMO on the store has no offset from the byval -; variable itself (we used to have (store 8 into %ir.v + 64)). -; CHECK: STD killed renamable $x5, 176, $x1 :: (store 8 into %ir.v, align 16) +; variable itself (we used to have (store (s64) into %ir.v + 64)). +; CHECK: STD killed renamable $x5, 176, $x1 :: (store (s64) into %ir.v, align 16) diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir index 904210ee13477..fe2b392f59da2 100644 --- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir +++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir @@ -333,7 +333,7 @@ body: | %1:g8rc = COPY $x4 %0:g8rc_and_g8rc_nox0 = LI8 44 - %2:g8rc = LDX %0, $x0 :: (load 8 from %ir.1, !tbaa !3) + %2:g8rc = LDX %0, $x0 :: (load (s64) from %ir.1, !tbaa !3) ; CHECK: li 3, 44 ; CHECK: ldx 3, 3, 0 $x3 = COPY %2 @@ -381,7 +381,7 @@ body: | %1:g8rc = LI8 44 %0:g8rc_and_g8rc_nox0 = LI8 44 - %2:g8rc = LDX $zero8, %1 :: (load 8 from %ir.1, !tbaa !3) + %2:g8rc = LDX $zero8, %1 :: (load (s64) from %ir.1, !tbaa !3) ; CHECK: ld 3, 44(0) $x3 = COPY %2 BLR8 implicit $lr8, implicit $rm, implicit $x3 @@ -428,7 +428,7 @@ body: | $x0 = LI8 44 %0:g8rc_and_g8rc_nox0 = COPY $x3 - %2:g8rc = LDX %0, $x0 :: (load 8 from %ir.1, !tbaa !3) + %2:g8rc = LDX %0, $x0 :: (load (s64) from %ir.1, !tbaa !3) ; CHECK: ld 3, 44(3) $x3 = COPY %2 BLR8 implicit $lr8, implicit $rm, implicit $x3 diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir index ba950dc3d3ae9..eb93a51887222 100644 --- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir +++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir @@ -6,7 +6,7 @@ source_filename = "convert-rr-to-ri-instrs.c" target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testADD4(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { entry: @@ -14,7 +14,7 @@ %add1 = add nsw i32 %add, %b ret i32 %add1 } - + ; Function Attrs: norecurse nounwind readnone define i64 @testADD8(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -22,21 +22,21 @@ %add1 = add nsw i64 %add, %b ret i64 %add1 } - + ; Function Attrs: norecurse nounwind readnone define i128 @testADDC(i128 %a, i128 %b) local_unnamed_addr #0 { entry: %add = add nsw i128 %b, %a ret i128 %add } - + ; Function Attrs: norecurse nounwind readnone define i128 @testADDC8(i128 %a, i128 %b) local_unnamed_addr #0 { entry: %add = add nsw i128 %b, %a ret i128 %add } - + ; Function Attrs: norecurse nounwind readnone define i64 @testADDC_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -46,21 +46,21 @@ %retval.0 = xor i64 %add, %neg ret i64 %retval.0 } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testADDI(i32 signext %a) local_unnamed_addr #0 { entry: %add = add nsw i32 %a, 44 ret i32 %add } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testADDI8(i32 signext %a) local_unnamed_addr #0 { entry: %add = add nsw i32 %a, 44 ret i32 %add } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testAND_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -70,7 +70,7 @@ %conv = trunc i64 %cond to i32 ret i32 %conv } - + ; Function Attrs: norecurse nounwind readnone define i64 @testAND8_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -79,7 +79,7 @@ %cond = select i1 %tobool, i64 %b, i64 %a ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testCMPD(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -88,7 +88,7 @@ %cond = add nsw i64 %add, %b ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testCMPDI(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -97,7 +97,7 @@ %cond = add nsw i64 %add, %b ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testCMPDI_F(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -106,7 +106,7 @@ %cond = add nsw i64 %add, %b ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testCMPLD(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -115,7 +115,7 @@ %cond = add i64 %add, %b ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testCMPLDI(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -124,7 +124,7 @@ %cond = add i64 %add, %b ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testCMPW(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { entry: @@ -133,7 +133,7 @@ %cond = add nsw i32 %add, %b ret i32 %cond } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testCMPWI(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { entry: @@ -142,7 +142,7 @@ %cond = add nsw i32 %add, %b ret i32 %cond } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testCMPLW(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { entry: @@ -151,7 +151,7 @@ %cond = add i32 %add, %b ret i32 %cond } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testCMPLWI(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { entry: @@ -160,7 +160,7 @@ %cond = add i32 %add, %b ret i32 %cond } - + ; Function Attrs: norecurse nounwind readonly define zeroext i8 @testLBZUX(i8* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -178,7 +178,7 @@ %conv6 = trunc i32 %add5 to i8 ret i8 %conv6 } - + ; Function Attrs: norecurse nounwind readonly define zeroext i8 @testLBZX(i8* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -196,7 +196,7 @@ %conv6 = trunc i32 %add5 to i8 ret i8 %conv6 } - + ; Function Attrs: norecurse nounwind readonly define zeroext i16 @testLHZUX(i16* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -214,7 +214,7 @@ %conv6 = trunc i32 %add5 to i16 ret i16 %conv6 } - + ; Function Attrs: norecurse nounwind readonly define zeroext i16 @testLHZX(i16* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -232,7 +232,7 @@ %conv6 = trunc i32 %add5 to i16 ret i16 %conv6 } - + ; Function Attrs: norecurse nounwind readonly define signext i16 @testLHAUX(i16* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -250,7 +250,7 @@ %conv6 = trunc i32 %add5 to i16 ret i16 %conv6 } - + ; Function Attrs: norecurse nounwind readonly define signext i16 @testLHAX(i16* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -268,7 +268,7 @@ %conv6 = trunc i32 %add5 to i16 ret i16 %conv6 } - + ; Function Attrs: norecurse nounwind readonly define zeroext i32 @testLWZUX(i32* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -283,7 +283,7 @@ %add4 = add i32 %1, %0 ret i32 %add4 } - + ; Function Attrs: norecurse nounwind readonly define zeroext i32 @testLWZX(i32* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -298,7 +298,7 @@ %add4 = add i32 %1, %0 ret i32 %add4 } - + ; Function Attrs: norecurse nounwind readonly define i64 @testLWAX(i32* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -315,7 +315,7 @@ %add5 = add nsw i64 %conv4, %conv ret i64 %add5 } - + ; Function Attrs: norecurse nounwind readonly define i64 @testLDUX(i64* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -330,7 +330,7 @@ %add4 = add i64 %1, %0 ret i64 %add4 } - + ; Function Attrs: norecurse nounwind readonly define i64 @testLDX(i64* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -345,7 +345,7 @@ %add4 = add i64 %1, %0 ret i64 %add4 } - + ; Function Attrs: norecurse nounwind readonly define double @testLFDUX(double* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #2 { entry: @@ -360,7 +360,7 @@ %add4 = fadd double %0, %1 ret double %add4 } - + ; Function Attrs: norecurse nounwind readonly define double @testLFDX(double* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #2 { entry: @@ -375,7 +375,7 @@ %add4 = fadd double %0, %1 ret double %add4 } - + ; Function Attrs: norecurse nounwind readonly define <4 x float> @testLFSUX(float* nocapture readonly %ptr, i32 signext %idx) local_unnamed_addr #2 { entry: @@ -405,7 +405,7 @@ %9 = bitcast <4 x i32> %vecinit14 to <4 x float> ret <4 x float> %9 } - + ; Function Attrs: norecurse nounwind readonly define float @testLFSX(float* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #2 { entry: @@ -420,7 +420,7 @@ %add4 = fadd float %0, %1 ret float %add4 } - + ; Function Attrs: norecurse nounwind readonly define double @testLXSDX(double* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -435,7 +435,7 @@ %add4 = fadd double %0, %1 ret double %add4 } - + ; Function Attrs: norecurse nounwind readonly define float @testLXSSPX(float* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -450,7 +450,7 @@ %add4 = fadd float %0, %1 ret float %add4 } - + ; Function Attrs: norecurse nounwind readonly define <4 x i32> @testLXVX(<4 x i32>* nocapture readonly %ptr, i32 zeroext %idx) local_unnamed_addr #1 { entry: @@ -465,35 +465,35 @@ %add4 = add <4 x i32> %1, %0 ret <4 x i32> %add4 } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testOR(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { entry: %or = or i32 %b, %a ret i32 %or } - + ; Function Attrs: norecurse nounwind readnone define i64 @testOR8(i64 %a, i64 %b) local_unnamed_addr #0 { entry: %or = or i64 %b, %a ret i64 %or } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testORI(i32 signext %a) local_unnamed_addr #0 { entry: %or = or i32 %a, 88 ret i32 %or } - + ; Function Attrs: norecurse nounwind readnone define i64 @testORI8(i64 %a) local_unnamed_addr #0 { entry: %or = or i64 %a, 99 ret i64 %or } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLDCL(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -504,7 +504,7 @@ %or = or i64 %shr, %shl ret i64 %or } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLDCL_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -517,7 +517,7 @@ %cond = select i1 %tobool, i64 %and, i64 %a ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLDCR(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -528,7 +528,7 @@ %or = or i64 %shr, %shl ret i64 %or } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLDCR_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -541,7 +541,7 @@ %cond = select i1 %tobool, i64 %and, i64 %a ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLDICL(i64 %a) local_unnamed_addr #0 { entry: @@ -549,7 +549,7 @@ %and = and i64 %shr, 16777215 ret i64 %and } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLDICL_MB0(i64 %a) local_unnamed_addr #0 { entry: @@ -557,7 +557,7 @@ %and = and i64 %shr, 16777215 ret i64 %and } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLDICL_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -567,7 +567,7 @@ %cond = select i1 %tobool, i64 %b, i64 %and ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLDICL_rec2(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -587,7 +587,7 @@ %cond = select i1 %tobool, i64 %b, i64 %and ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testRLWINM(i32 zeroext %a) local_unnamed_addr #0 { entry: @@ -595,7 +595,7 @@ %and = and i32 %shl, 4080 ret i32 %and } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testRLWINMFullReg(i32 zeroext %a) local_unnamed_addr #0 { entry: @@ -603,7 +603,7 @@ %and = and i32 %shl, 4080 ret i32 %and } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testRLWINMFullRegOutOfRange(i32 zeroext %a) local_unnamed_addr #0 { entry: @@ -611,7 +611,7 @@ %and = and i32 %shl, 4080 ret i32 %and } - + ; Function Attrs: norecurse nounwind readnone define i64 @testRLWINM8(i64 %a) local_unnamed_addr #0 { entry: @@ -619,7 +619,7 @@ %and = and i64 %shl, 4080 ret i64 %and } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testRLWINM_rec(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { entry: @@ -628,7 +628,7 @@ %cond = select i1 %tobool, i32 %b, i32 %a ret i32 %cond } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testRLWINM_rec2(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { entry: @@ -649,14 +649,14 @@ %cond = select i1 %tobool, i64 %b, i64 %conv1 ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testSLD(i64 %a, i64 %b) local_unnamed_addr #0 { entry: %shl = shl i64 %a, %b ret i64 %shl } - + ; Function Attrs: norecurse nounwind readnone define i64 @testSLD_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -665,14 +665,14 @@ %cond = select i1 %tobool, i64 %b, i64 %a ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testSRD(i64 %a, i64 %b) local_unnamed_addr #0 { entry: %shr = lshr i64 %a, %b ret i64 %shr } - + ; Function Attrs: norecurse nounwind readnone define i64 @testSRD_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -681,14 +681,14 @@ %cond = select i1 %tobool, i64 %b, i64 %a ret i64 %cond } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testSLW(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { entry: %shl = shl i32 %a, %b ret i32 %shl } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testSLW_rec(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { entry: @@ -697,14 +697,14 @@ %cond = select i1 %tobool, i32 %b, i32 %a ret i32 %cond } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testSRW(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { entry: %shr = lshr i32 %a, %b ret i32 %shr } - + ; Function Attrs: norecurse nounwind readnone define zeroext i32 @testSRW_rec(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr #0 { entry: @@ -713,14 +713,14 @@ %cond = select i1 %tobool, i32 %b, i32 %a ret i32 %cond } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testSRAW(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { entry: %shr = ashr i32 %a, %b ret i32 %shr } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testSRAW_rec(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { entry: @@ -729,14 +729,14 @@ %cond = select i1 %tobool, i32 %b, i32 %shr ret i32 %cond } - + ; Function Attrs: norecurse nounwind readnone define i64 @testSRAD(i64 %a, i64 %b) local_unnamed_addr #0 { entry: %shr = ashr i64 %a, %b ret i64 %shr } - + ; Function Attrs: norecurse nounwind readnone define i64 @testSRAD_rec(i64 %a, i64 %b) local_unnamed_addr #0 { entry: @@ -745,7 +745,7 @@ %cond = select i1 %tobool, i64 %b, i64 %shr ret i64 %cond } - + ; Function Attrs: norecurse nounwind define void @testSTBUX(i8* nocapture %ptr, i8 zeroext %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -759,7 +759,7 @@ store i8 %a, i8* %arrayidx3, align 1, !tbaa !3 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTBX(i8* nocapture %ptr, i8 zeroext %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -773,7 +773,7 @@ store i8 %a, i8* %arrayidx3, align 1, !tbaa !3 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTHUX(i16* nocapture %ptr, i16 zeroext %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -787,7 +787,7 @@ store i16 %a, i16* %arrayidx3, align 2, !tbaa !6 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTHX(i16* nocapture %ptr, i16 zeroext %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -801,7 +801,7 @@ store i16 %a, i16* %arrayidx3, align 1, !tbaa !3 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTWUX(i32* nocapture %ptr, i32 zeroext %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -815,7 +815,7 @@ store i32 %a, i32* %arrayidx3, align 4, !tbaa !8 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTWX(i32* nocapture %ptr, i32 zeroext %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -829,7 +829,7 @@ store i32 %a, i32* %arrayidx3, align 4, !tbaa !8 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTDUX(i64* nocapture %ptr, i64 %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -843,7 +843,7 @@ store i64 %a, i64* %arrayidx3, align 8, !tbaa !10 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTDX(i64* nocapture %ptr, i64 %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -857,7 +857,7 @@ store i64 %a, i64* %arrayidx3, align 8, !tbaa !10 ret void } - + ; Function Attrs: norecurse nounwind readonly define void @testSTFSX(float* nocapture %ptr, float %a, i32 zeroext %idx) local_unnamed_addr #2 { entry: @@ -871,7 +871,7 @@ store float %a, float* %arrayidx3, align 4, !tbaa !14 ret void } - + ; Function Attrs: norecurse nounwind readonly define void @testSTFSUX(float* nocapture %ptr, float %a, i32 zeroext %idx) local_unnamed_addr #2 { entry: @@ -885,7 +885,7 @@ store float %a, float* %arrayidx3, align 4, !tbaa !14 ret void } - + ; Function Attrs: norecurse nounwind readonly define void @testSTFDX(double* nocapture %ptr, double %a, i32 zeroext %idx) local_unnamed_addr #2 { entry: @@ -899,7 +899,7 @@ store double %a, double* %arrayidx3, align 8, !tbaa !12 ret void } - + ; Function Attrs: norecurse nounwind readonly define void @testSTFDUX(double* nocapture %ptr, double %a, i32 zeroext %idx) local_unnamed_addr #2 { entry: @@ -913,7 +913,7 @@ store double %a, double* %arrayidx3, align 8, !tbaa !12 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTXSSPX(float* nocapture %ptr, float %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -922,7 +922,7 @@ store float %a, float* %arrayidx, align 4, !tbaa !14 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTXSDX(double* nocapture %ptr, double %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -931,7 +931,7 @@ store double %a, double* %arrayidx, align 8, !tbaa !12 ret void } - + ; Function Attrs: norecurse nounwind define void @testSTXVX(<4 x i32>* nocapture %ptr, <4 x i32> %a, i32 zeroext %idx) local_unnamed_addr #3 { entry: @@ -940,57 +940,57 @@ store <4 x i32> %a, <4 x i32>* %arrayidx, align 16, !tbaa !3 ret void } - + ; Function Attrs: norecurse nounwind readnone define i128 @testSUBFC(i128 %a, i128 %b) local_unnamed_addr #0 { entry: %sub = sub nsw i128 %a, %b ret i128 %sub } - + ; Function Attrs: norecurse nounwind readnone define i128 @testSUBFC8(i128 %a, i128 %b) local_unnamed_addr #0 { entry: %sub = sub nsw i128 %a, %b ret i128 %sub } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testXOR(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { entry: %xor = xor i32 %b, %a ret i32 %xor } - + ; Function Attrs: norecurse nounwind readnone define i64 @testXOR8(i64 %a, i64 %b) local_unnamed_addr #0 { entry: %xor = xor i64 %b, %a ret i64 %xor } - + ; Function Attrs: norecurse nounwind readnone define signext i32 @testXORI(i32 signext %a) local_unnamed_addr #0 { entry: %xor = xor i32 %a, 17 ret i32 %xor } - + ; Function Attrs: norecurse nounwind readnone define i64 @testXOR8I(i64 %a) local_unnamed_addr #0 { entry: %xor = xor i64 %a, 17 ret i64 %xor } - + attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+power9-vector,+vsx" "unsafe-fp-math"="false" "use-soft-float"="false" } - + !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} - + !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"PIC Level", i32 2} !2 = !{!"clang version 6.0.0 (trunk 316067)"} @@ -1018,7 +1018,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } @@ -1026,10 +1026,10 @@ registers: - { id: 4, class: gprc_and_gprc_nor0, preferred-register: '' } - { id: 5, class: gprc, preferred-register: '' } - { id: 6, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1046,13 +1046,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 33 @@ -1077,15 +1077,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1102,13 +1102,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = LI8 33 %0 = COPY $x3 %2 = ADD8 %0, %1 @@ -1130,7 +1130,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -1140,12 +1140,12 @@ registers: - { id: 6, class: gprc, preferred-register: '' } - { id: 7, class: g8rc, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } - { reg: '$x6', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1162,13 +1162,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5, $x6 - + %3 = COPY $x6 %2 = COPY $x5 %1 = COPY $x4 @@ -1194,19 +1194,19 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } - { reg: '$x6', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1223,13 +1223,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5, $x6 - + %3 = COPY $x6 %2 = COPY $x5 %1 = COPY $x4 @@ -1252,7 +1252,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: gprc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } @@ -1262,10 +1262,10 @@ registers: - { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 7, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1282,13 +1282,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = LI 433 %0 = COPY $x3 %2 = COPY %0.sub_32 @@ -1313,14 +1313,14 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: gprc_and_gprc_nor0, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1337,13 +1337,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = COPY $x3 %1 = LI 77 %2 = ADDI killed %1, 44 @@ -1363,14 +1363,14 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1387,13 +1387,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = COPY $x3 %1 = LI8 333 %2 = ADDI8 killed %1, 44 @@ -1413,7 +1413,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: gprc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -1421,10 +1421,10 @@ registers: - { id: 4, class: crrc, preferred-register: '' } - { id: 5, class: gprc, preferred-register: '' } - { id: 6, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1441,13 +1441,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = LI 78 %0 = COPY $x3 %2 = COPY %0.sub_32 @@ -1470,16 +1470,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } - { id: 3, class: crrc, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1496,13 +1496,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = LI8 321 %0 = COPY $x3 %2 = AND8_rec %1, %0, implicit-def $cr0 @@ -1523,17 +1523,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: crrc, preferred-register: '' } - { id: 3, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1550,13 +1550,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = LI8 65533 %0 = COPY $x3 %2 = CMPD %0, %1 @@ -1577,17 +1577,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: crrc, preferred-register: '' } - { id: 3, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1604,13 +1604,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 89 %2 = CMPDI %0, 87 @@ -1630,17 +1630,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: crrc, preferred-register: '' } - { id: 3, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1657,13 +1657,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 87 %2 = CMPDI %0, 87 @@ -1683,17 +1683,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: crrc, preferred-register: '' } - { id: 3, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1710,13 +1710,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = LI8 99 %0 = COPY $x3 %2 = CMPLD %0, %1 @@ -1737,17 +1737,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: crrc, preferred-register: '' } - { id: 3, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1764,13 +1764,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 65534 %2 = CMPLDI %0, 65535 @@ -1790,7 +1790,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } @@ -1800,10 +1800,10 @@ registers: - { id: 6, class: gprc, preferred-register: '' } - { id: 7, class: gprc, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1820,13 +1820,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI -1 @@ -1849,7 +1849,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } @@ -1859,10 +1859,10 @@ registers: - { id: 6, class: gprc, preferred-register: '' } - { id: 7, class: gprc, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1879,13 +1879,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -1908,7 +1908,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } @@ -1920,10 +1920,10 @@ registers: - { id: 8, class: g8rc, preferred-register: '' } - { id: 9, class: g8rc, preferred-register: '' } - { id: 10, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -1940,13 +1940,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 32767 @@ -1972,7 +1972,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } @@ -1984,10 +1984,10 @@ registers: - { id: 8, class: g8rc, preferred-register: '' } - { id: 9, class: g8rc, preferred-register: '' } - { id: 10, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2004,13 +2004,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2035,7 +2035,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2054,10 +2054,10 @@ registers: - { id: 15, class: g8rc, preferred-register: '' } - { id: 16, class: g8rc, preferred-register: '' } - { id: 17, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2074,13 +2074,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2088,12 +2088,12 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = RLDICL killed %4, 0, 32 - %7 = LBZX %0, killed %6 :: (load 1 from %ir.arrayidx, !tbaa !3) + %7 = LBZX %0, killed %6 :: (load (s8) from %ir.arrayidx, !tbaa !3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -15 - %12,%17 = LBZUX %0, killed %11 :: (load 1 from %ir.arrayidx3, !tbaa !3) + %12,%17 = LBZUX %0, killed %11 :: (load (s8) from %ir.arrayidx3, !tbaa !3) ; CHECK: LBZU -15, %0 ; CHECK-LATE: lbzu 5, -15(3) %13 = ADD4 killed %12, killed %7 @@ -2113,7 +2113,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2131,10 +2131,10 @@ registers: - { id: 14, class: g8rc, preferred-register: '' } - { id: 15, class: g8rc, preferred-register: '' } - { id: 16, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2151,13 +2151,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 45 %2 = COPY %1.sub_32 @@ -2165,14 +2165,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = RLDICL killed %4, 0, 32 - %7 = LBZX %0, killed %6 :: (load 1 from %ir.arrayidx, !tbaa !3) + %7 = LBZX %0, killed %6 :: (load (s8) from %ir.arrayidx, !tbaa !3) ; CHECK: LBZ 45, killed %6 ; CHECK-LATE: lbz 5, 45(5) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = RLDICL killed %9, 0, 32 - %12 = LBZX %0, killed %11 :: (load 1 from %ir.arrayidx3, !tbaa !3) + %12 = LBZX %0, killed %11 :: (load (s8) from %ir.arrayidx3, !tbaa !3) ; CHECK: LBZ 45, killed %11 ; CHECK-LATE: lbz 3, 45(4) %13 = ADD4 killed %12, killed %7 @@ -2192,7 +2192,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2211,10 +2211,10 @@ registers: - { id: 15, class: g8rc, preferred-register: '' } - { id: 16, class: g8rc, preferred-register: '' } - { id: 17, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2231,13 +2231,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2245,12 +2245,12 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = RLDIC killed %4, 1, 31 - %7 = LHZX %0, killed %6 :: (load 2 from %ir.arrayidx, !tbaa !6) + %7 = LHZX %0, killed %6 :: (load (s16) from %ir.arrayidx, !tbaa !6) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 31440 - %12,%17 = LHZUX %0, killed %11 :: (load 2 from %ir.arrayidx3, !tbaa !6) + %12,%17 = LHZUX %0, killed %11 :: (load (s16) from %ir.arrayidx3, !tbaa !6) ; CHECK: LHZU 31440, %0 ; CHECK-LATE: lhzu 5, 31440(3) %13 = ADD4 killed %12, killed %7 @@ -2270,7 +2270,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2288,10 +2288,10 @@ registers: - { id: 14, class: g8rc, preferred-register: '' } - { id: 15, class: g8rc, preferred-register: '' } - { id: 16, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2308,13 +2308,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2322,12 +2322,12 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = RLDIC killed %4, 1, 31 - %7 = LHZX %0, killed %6 :: (load 2 from %ir.arrayidx, !tbaa !6) + %7 = LHZX %0, killed %6 :: (load (s16) from %ir.arrayidx, !tbaa !6) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 882 - %12 = LHZX %0, killed %11 :: (load 2 from %ir.arrayidx3, !tbaa !6) + %12 = LHZX %0, killed %11 :: (load (s16) from %ir.arrayidx3, !tbaa !6) ; CHECK: LHZ 882, %0 ; CHECK-LATE: lhz 3, 882(3) %13 = ADD4 killed %12, killed %7 @@ -2347,7 +2347,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2366,10 +2366,10 @@ registers: - { id: 15, class: g8rc, preferred-register: '' } - { id: 16, class: g8rc, preferred-register: '' } - { id: 17, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2386,13 +2386,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2400,12 +2400,12 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = RLDIC %4, 1, 31 - %7 = LHZX %0, killed %6 :: (load 2 from %ir.arrayidx, !tbaa !6) + %7 = LHZX %0, killed %6 :: (load (s16) from %ir.arrayidx, !tbaa !6) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 400 - %12,%17 = LHAUX %0, killed %11 :: (load 2 from %ir.arrayidx3, !tbaa !6) + %12,%17 = LHAUX %0, killed %11 :: (load (s16) from %ir.arrayidx3, !tbaa !6) ; CHECK: LHAU 400, %0 ; CHECK-LATE: lhau 5, 400(3) %13 = ADD4 killed %12, killed %7 @@ -2425,7 +2425,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2443,10 +2443,10 @@ registers: - { id: 14, class: g8rc, preferred-register: '' } - { id: 15, class: g8rc, preferred-register: '' } - { id: 16, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2463,13 +2463,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2477,15 +2477,15 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 -999 - %7 = LHAX %0, killed %6 :: (load 2 from %ir.arrayidx, !tbaa !6) + %7 = LHAX %0, killed %6 :: (load (s16) from %ir.arrayidx, !tbaa !6) ; CHECK: LHA -999, %0 ; CHECK-LATE: lha 4, -999(3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 999 - %12 = LHAX %0, killed %11 :: (load 2 from %ir.arrayidx3, !tbaa !6) - ; CHECK: LHA 999, %0 + %12 = LHAX %0, killed %11 :: (load (s16) from %ir.arrayidx3, !tbaa !6) + ; CHECK: LHA 999, %0 ; CHECK-LATE: lha 3, 999(3) %13 = ADD4 killed %12, killed %7 %15 = IMPLICIT_DEF @@ -2504,7 +2504,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2524,10 +2524,10 @@ registers: - { id: 16, class: g8rc, preferred-register: '' } - { id: 17, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 18, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2544,13 +2544,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2558,14 +2558,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 889 - %7,%17 = LWZUX %0, killed %6 :: (load 4 from %ir.arrayidx, !tbaa !8) + %7,%17 = LWZUX %0, killed %6 :: (load (s32) from %ir.arrayidx, !tbaa !8) ; CHECK: LWZU 889, %0 ; CHECK-LATE: lwzu {{[0-9]+}}, 889({{[0-9]+}}) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -2 - %12,%18 = LWZUX %0, killed %11 :: (load 4 from %ir.arrayidx3, !tbaa !8) + %12,%18 = LWZUX %0, killed %11 :: (load (s32) from %ir.arrayidx3, !tbaa !8) ; CHECK: LWZU -2, %0 ; CHECK-LATE: lwzu {{[0-9]+}}, -2({{[0-9]+}}) %13 = ADD4 killed %12, killed %7 @@ -2585,7 +2585,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2603,10 +2603,10 @@ registers: - { id: 14, class: g8rc, preferred-register: '' } - { id: 15, class: g8rc, preferred-register: '' } - { id: 16, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2623,13 +2623,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 1000 %2 = COPY %1.sub_32 @@ -2637,14 +2637,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = RLDIC %4, 2, 30 - %7 = LWZX %0, killed %6 :: (load 4 from %ir.arrayidx, !tbaa !8) + %7 = LWZX %0, killed %6 :: (load (s32) from %ir.arrayidx, !tbaa !8) ; CHECK: LWZ 1000, killed %6 ; CHECK-LATE: lwz 5, 1000(5) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = RLDIC %9, 2, 30 - %12 = LWZX %0, killed %11 :: (load 4 from %ir.arrayidx3, !tbaa !8) + %12 = LWZX %0, killed %11 :: (load (s32) from %ir.arrayidx3, !tbaa !8) ; CHECK: LWZ 1000, killed %11 ; CHECK-LATE: lwz 3, 1000(4) %13 = ADD4 killed %12, killed %7 @@ -2664,7 +2664,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2679,10 +2679,10 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: g8rc, preferred-register: '' } - { id: 13, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2699,13 +2699,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 444 %2 = COPY %1.sub_32 @@ -2713,14 +2713,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = RLDIC %4, 2, 30 - %7 = LWAX %0, killed %6 :: (load 4 from %ir.arrayidx, !tbaa !8) + %7 = LWAX %0, killed %6 :: (load (s32) from %ir.arrayidx, !tbaa !8) ; CHECK: LWA 444, killed %6 ; CHECK-LATE: lwa 5, 444(5) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = RLDIC %9, 2, 30 - %12 = LWAX %0, killed %11 :: (load 4 from %ir.arrayidx3, !tbaa !8) + %12 = LWAX %0, killed %11 :: (load (s32) from %ir.arrayidx3, !tbaa !8) ; CHECK: LWA 444, killed %11 ; CHECK-LATE: lwa 3, 444(4) %13 = ADD8 killed %12, killed %7 @@ -2737,7 +2737,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2754,10 +2754,10 @@ registers: - { id: 13, class: g8rc, preferred-register: '' } - { id: 14, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 15, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2774,13 +2774,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2788,14 +2788,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 100 - %7,%14 = LDUX %0, killed %6 :: (load 8 from %ir.arrayidx, !tbaa !10) + %7,%14 = LDUX %0, killed %6 :: (load (s64) from %ir.arrayidx, !tbaa !10) ; CHECK: LDU 100, %0 ; CHECK-LATE: ldu {{[0-9]+}}, 100({{[0-9]+}}) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 200 - %12,%15 = LDUX %0, killed %11 :: (load 8 from %ir.arrayidx3, !tbaa !10) + %12,%15 = LDUX %0, killed %11 :: (load (s64) from %ir.arrayidx3, !tbaa !10) ; CHECK: LDU 200, %0 ; CHECK-LATE: ldu {{[0-9]+}}, 200({{[0-9]+}}) %13 = ADD8 killed %12, killed %7 @@ -2812,7 +2812,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2827,10 +2827,10 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: g8rc, preferred-register: '' } - { id: 13, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2847,13 +2847,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2861,14 +2861,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 120 - %7 = LDX %0, killed %6 :: (load 8 from %ir.arrayidx, !tbaa !10) + %7 = LDX %0, killed %6 :: (load (s64) from %ir.arrayidx, !tbaa !10) ; CHECK: LD 120, %0 ; CHECK-LATE: ld 4, 120(3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 280 - %12 = LDX %0, killed %11 :: (load 8 from %ir.arrayidx3, !tbaa !10) + %12 = LDX %0, killed %11 :: (load (s64) from %ir.arrayidx3, !tbaa !10) ; CHECK: LD 280, %0 ; CHECK-LATE: ld 3, 280(3) %13 = ADD8 killed %12, killed %7 @@ -2885,7 +2885,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2902,10 +2902,10 @@ registers: - { id: 13, class: f8rc, preferred-register: '' } - { id: 14, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 15, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2922,13 +2922,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -2936,14 +2936,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 440 - %7,%14 = LFDUX %0, killed %6 :: (load 8 from %ir.arrayidx, !tbaa !12) + %7,%14 = LFDUX %0, killed %6 :: (load (s64) from %ir.arrayidx, !tbaa !12) ; CHECK: LFDU 440, %0 ; CHECK-LATE: lfdu {{[0-9]+}}, 440({{[0-9]+}}) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 16 - %12,%15 = LFDUX %0, killed %11 :: (load 8 from %ir.arrayidx3, !tbaa !12) + %12,%15 = LFDUX %0, killed %11 :: (load (s64) from %ir.arrayidx3, !tbaa !12) ; CHECK: LFDU 16, %0 ; CHECK-LATE: lfdu {{[0-9]+}}, 16({{[0-9]+}}) %13 = FADD killed %7, killed %12, implicit $rm @@ -2960,7 +2960,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -2975,10 +2975,10 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: f8rc, preferred-register: '' } - { id: 13, class: f8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -2995,13 +2995,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 -20 %2 = COPY %1.sub_32 @@ -3009,14 +3009,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = RLDIC %4, 3, 29 - %7 = LFDX %0, killed %6 :: (load 8 from %ir.arrayidx, !tbaa !12) + %7 = LFDX %0, killed %6 :: (load (s64) from %ir.arrayidx, !tbaa !12) ; CHECK: LFD -20, killed %6 ; CHECK-LATE: lfd {{[0-9]+}}, -20({{[0-9]+}}) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = RLDIC %9, 3, 29 - %12 = LFDX %0, killed %11 :: (load 8 from %ir.arrayidx3, !tbaa !12) + %12 = LFDX %0, killed %11 :: (load (s64) from %ir.arrayidx3, !tbaa !12) ; CHECK: LFD -20, killed %11 ; CHECK-LATE: lfd {{[0-9]+}}, -20({{[0-9]+}}) %13 = FADD killed %7, killed %12, implicit $rm @@ -3033,7 +3033,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -3059,10 +3059,10 @@ registers: - { id: 22, class: gprc, preferred-register: '' } - { id: 23, class: g8rc, preferred-register: '' } - { id: 24, class: vrrc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3079,64 +3079,64 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: - - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 16, +fixedStack: +stack: + - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 16, stack-id: default, callee-saved-register: '', callee-saved-restored: true, local-offset: -16, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: default, offset: 0, size: 4, alignment: 4, + - { id: 1, name: '', type: default, offset: 0, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, local-offset: -20, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4, + - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, local-offset: -24, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 3, name: '', type: default, offset: 0, size: 4, alignment: 4, + - { id: 3, name: '', type: default, offset: 0, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, local-offset: -28, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 4, name: '', type: default, offset: 0, size: 4, alignment: 4, + - { id: 4, name: '', type: default, offset: 0, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, local-offset: -32, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI8 72 - %3, %4 = LFSUX %0, killed %2 :: (load 4 from %ir.arrayidx, !tbaa !14) + %3, %4 = LFSUX %0, killed %2 :: (load (s32) from %ir.arrayidx, !tbaa !14) ; CHECK: LFSU 72, %0 ; CHECK-LATE: lfsu 0, 72(3) %5 = FCTIWUZ killed %3, implicit $rm %6 = ADDI8 %stack.4, 0 STFIWX killed %5, $zero8, killed %6 - %7 = LWZ 0, %stack.4 :: (load 4 from %stack.4) - %8 = LFS 4, %4 :: (load 4 from %ir.3, !tbaa !14) + %7 = LWZ 0, %stack.4 :: (load (s32) from %stack.4) + %8 = LFS 4, %4 :: (load (s32) from %ir.3, !tbaa !14) %10 = FCTIWUZ %8, implicit $rm %11 = ADDI8 %stack.1, 0 STFIWX killed %10, $zero8, killed %11 - %12 = LWZ 0, %stack.1 :: (load 4 from %stack.1) - %13 = LFS 8, %4 :: (load 4 from %ir.5, !tbaa !14) + %12 = LWZ 0, %stack.1 :: (load (s32) from %stack.1) + %13 = LFS 8, %4 :: (load (s32) from %ir.5, !tbaa !14) %15 = FCTIWUZ %13, implicit $rm %16 = ADDI8 %stack.2, 0 STFIWX killed %15, $zero8, killed %16 - %17 = LWZ 0, %stack.2 :: (load 4 from %stack.2) - %18 = LFS 12, %4 :: (load 4 from %ir.7, !tbaa !14) + %17 = LWZ 0, %stack.2 :: (load (s32) from %stack.2) + %18 = LFS 12, %4 :: (load (s32) from %ir.7, !tbaa !14) %20 = FCTIWUZ %18, implicit $rm %21 = ADDI8 %stack.3, 0 STFIWX killed %20, $zero8, killed %21 - %22 = LWZ 0, %stack.3 :: (load 4 from %stack.3) - STW killed %7, 0, %stack.0 :: (store 4 into %stack.0, align 16) - STW killed %22, 12, %stack.0 :: (store 4 into %stack.0 + 12) - STW killed %17, 8, %stack.0 :: (store 4 into %stack.0 + 8, align 8) - STW killed %12, 4, %stack.0 :: (store 4 into %stack.0 + 4) + %22 = LWZ 0, %stack.3 :: (load (s32) from %stack.3) + STW killed %7, 0, %stack.0 :: (store (s32) into %stack.0, align 16) + STW killed %22, 12, %stack.0 :: (store (s32) into %stack.0 + 12) + STW killed %17, 8, %stack.0 :: (store (s32) into %stack.0 + 8, align 8) + STW killed %12, 4, %stack.0 :: (store (s32) into %stack.0 + 4) %23 = ADDI8 %stack.0, 0 - %24 = LVX $zero8, killed %23 :: (load 16 from %stack.0) + %24 = LVX $zero8, killed %23 :: (load (s128) from %stack.0) $v2 = COPY %24 BLR8 implicit $lr8, implicit $rm, implicit $v2 @@ -3150,7 +3150,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -3165,10 +3165,10 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: f4rc, preferred-register: '' } - { id: 13, class: f4rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3185,13 +3185,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -3199,14 +3199,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 88 - %7 = LFSX %0, killed %6 :: (load 4 from %ir.arrayidx, !tbaa !14) + %7 = LFSX %0, killed %6 :: (load (s32) from %ir.arrayidx, !tbaa !14) ; CHECK: LFS 88, %0 ; CHECK-LATE: lfs 0, 88(3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -88 - %12 = LFSX %0, killed %11 :: (load 4 from %ir.arrayidx3, !tbaa !14) + %12 = LFSX %0, killed %11 :: (load (s32) from %ir.arrayidx3, !tbaa !14) ; CHECK: LFS -88, %0 ; CHECK-LATE: lfs 1, -88(3) %13 = FADDS killed %7, killed %12, implicit $rm @@ -3223,7 +3223,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -3238,10 +3238,10 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: vsfrc, preferred-register: '' } - { id: 13, class: vsfrc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3258,13 +3258,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -3272,14 +3272,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 100 - %7 = LXSDX %0, killed %6, implicit $rm :: (load 8 from %ir.arrayidx, !tbaa !12) + %7 = LXSDX %0, killed %6, implicit $rm :: (load (s64) from %ir.arrayidx, !tbaa !12) ; CHECK: DFLOADf64 100, %0 ; CHECK-LATE: lfd 0, 100(3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -120 - %12 = LXSDX %0, killed %11, implicit $rm :: (load 8 from %ir.arrayidx3, !tbaa !12) + %12 = LXSDX %0, killed %11, implicit $rm :: (load (s64) from %ir.arrayidx3, !tbaa !12) ; CHECK: DFLOADf64 -120, %0 ; CHECK-LATE: lfd 1, -120(3) %13 = XSADDDP killed %7, killed %12, implicit $rm @@ -3296,7 +3296,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -3311,10 +3311,10 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: vssrc, preferred-register: '' } - { id: 13, class: vssrc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3331,13 +3331,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -3345,14 +3345,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 96 - %7 = LXSSPX %0, killed %6 :: (load 4 from %ir.arrayidx, !tbaa !14) + %7 = LXSSPX %0, killed %6 :: (load (s32) from %ir.arrayidx, !tbaa !14) ; CHECK: DFLOADf32 96, %0 ; CHECK-LATE: lfs 0, 96(3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -92 - %12 = LXSSPX %0, killed %11 :: (load 4 from %ir.arrayidx3, !tbaa !14) + %12 = LXSSPX %0, killed %11 :: (load (s32) from %ir.arrayidx3, !tbaa !14) ; CHECK: DFLOADf32 -92, %0 ; CHECK-LATE: lfs 1, -92(3) %13 = XSADDSP killed %7, killed %12 @@ -3369,7 +3369,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -3384,10 +3384,10 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: vrrc, preferred-register: '' } - { id: 13, class: vrrc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3404,13 +3404,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -3418,14 +3418,14 @@ body: | %5 = IMPLICIT_DEF %4 = INSERT_SUBREG %5, killed %3, 1 %6 = LI8 32 - %7 = LXVX %0, killed %6 :: (load 16 from %ir.arrayidx, !tbaa !3) + %7 = LXVX %0, killed %6 :: (load (s128) from %ir.arrayidx, !tbaa !3) ; CHECK: LXV 32, %0 ; CHECK-LATE: lxv 34, 32(3) %8 = ADDI %2, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -16 - %12 = LXVX %0, killed %11 :: (load 16 from %ir.arrayidx3, !tbaa !3) + %12 = LXVX %0, killed %11 :: (load (s128) from %ir.arrayidx3, !tbaa !3) ; CHECK: LXV -16, %0 ; CHECK-LATE: lxv 35, -16(3) %13 = VADDUWM killed %12, killed %7 @@ -3442,15 +3442,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gprc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3467,13 +3467,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI 99 %3 = COPY %1.sub_32 @@ -3493,14 +3493,14 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3517,13 +3517,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 777 %2 = OR8 %1, %0 @@ -3542,12 +3542,12 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gprc, preferred-register: '' } - { id: 1, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3564,13 +3564,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = LI 777 %1 = ORI %0, 88 ; CHECK: LI 857 @@ -3588,12 +3588,12 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3610,13 +3610,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = LI8 8721 %1 = ORI8 %0, 99 ; CHECK: LI8 8819 @@ -3634,16 +3634,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: gprc, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3660,13 +3660,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -3687,7 +3687,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' } @@ -3695,10 +3695,10 @@ registers: - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: crrc, preferred-register: '' } - { id: 6, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3715,13 +3715,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = RLDICL %1, 0, 58 @@ -3744,16 +3744,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: gprc, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3770,13 +3770,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -3797,7 +3797,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' } @@ -3805,10 +3805,10 @@ registers: - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: crrc, preferred-register: '' } - { id: 6, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3825,13 +3825,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = RLDICL %1, 0, 58 @@ -3854,12 +3854,12 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3876,13 +3876,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = LI8 -1 %1 = RLDICL %0, 53, 49 ; CHECK: LI8 32767 @@ -3900,12 +3900,12 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3922,13 +3922,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = LI8 32 %1 = RLDICL %0, 60, 0 ; CHECK: LI8 2 @@ -3946,16 +3946,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 3, class: crrc, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -3972,13 +3972,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 -1 %2 = RLDICL_rec %0, 53, 48, implicit-def $cr0 @@ -4000,16 +4000,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 3, class: crrc, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4026,13 +4026,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 200 %2 = RLDICL_rec %0, 61, 3, implicit-def $cr0 @@ -4054,16 +4054,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 3, class: crrc, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4080,13 +4080,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 2 %2 = RLDICL_rec %0, 32, 32, implicit-def $cr0 @@ -4108,15 +4108,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: gprc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } - { id: 4, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4133,13 +4133,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = COPY $x3 %1 = COPY %0.sub_32 %3 = IMPLICIT_DEF @@ -4160,15 +4160,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: gprc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } - { id: 4, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4185,13 +4185,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = COPY $x3 %1 = COPY %0.sub_32 %3 = IMPLICIT_DEF @@ -4212,15 +4212,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: gprc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } - { id: 4, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4237,13 +4237,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = COPY $x3 %1 = COPY %0.sub_32 %3 = IMPLICIT_DEF @@ -4264,12 +4264,12 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4286,13 +4286,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = LI8 234 %1 = RLWINM8 %0, 4, 20, 27 ; CHECK: LI8 3744 @@ -4310,7 +4310,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -4321,10 +4321,10 @@ registers: - { id: 7, class: g8rc, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } - { id: 9, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4341,13 +4341,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -4375,7 +4375,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -4386,10 +4386,10 @@ registers: - { id: 7, class: g8rc, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } - { id: 9, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4406,13 +4406,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -4439,7 +4439,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -4449,10 +4449,10 @@ registers: - { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 7, class: crrc, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4469,13 +4469,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI8 -18 @@ -4499,15 +4499,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4524,13 +4524,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 13 @@ -4550,17 +4550,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } - { id: 4, class: crrc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4577,13 +4577,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 17 @@ -4605,15 +4605,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4630,13 +4630,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 4 @@ -4656,17 +4656,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } - { id: 4, class: crrc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4683,13 +4683,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 17 @@ -4711,7 +4711,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } @@ -4721,10 +4721,10 @@ registers: - { id: 6, class: g8rc, preferred-register: '' } - { id: 7, class: g8rc, preferred-register: '' } - { id: 8, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4741,13 +4741,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = COPY %1.sub_32 @@ -4768,7 +4768,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -4779,10 +4779,10 @@ registers: - { id: 7, class: g8rc, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } - { id: 9, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4799,13 +4799,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 11 @@ -4831,7 +4831,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } @@ -4841,10 +4841,10 @@ registers: - { id: 6, class: g8rc, preferred-register: '' } - { id: 7, class: g8rc, preferred-register: '' } - { id: 8, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4861,13 +4861,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 8 @@ -4888,7 +4888,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -4899,10 +4899,10 @@ registers: - { id: 7, class: g8rc, preferred-register: '' } - { id: 8, class: g8rc, preferred-register: '' } - { id: 9, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4919,13 +4919,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 7 @@ -4951,17 +4951,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: gprc, preferred-register: '' } - { id: 4, class: gprc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -4978,13 +4978,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 15 @@ -5006,7 +5006,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc_and_gprc_nor0, preferred-register: '' } @@ -5015,10 +5015,10 @@ registers: - { id: 5, class: crrc, preferred-register: '' } - { id: 6, class: gprc, preferred-register: '' } - { id: 7, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5035,13 +5035,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 8 @@ -5065,15 +5065,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5090,13 +5090,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 44 @@ -5116,17 +5116,17 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 4, class: crrc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5143,13 +5143,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = COPY $x3 %2 = LI 61 @@ -5171,7 +5171,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5187,11 +5187,11 @@ registers: - { id: 12, class: g8rc, preferred-register: '' } - { id: 13, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 14, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5208,13 +5208,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5 - + %2 = COPY $x5 %1 = COPY $x4 %0 = COPY $x3 @@ -5224,14 +5224,14 @@ body: | %7 = IMPLICIT_DEF %6 = INSERT_SUBREG %7, killed %5, 1 %8 = LI8 966 - %13 = STBUX %3, %0, killed %8 :: (store 1 into %ir.arrayidx, !tbaa !3) + %13 = STBUX %3, %0, killed %8 :: (store (s8) into %ir.arrayidx, !tbaa !3) ; CHECK: STBU %3, 966, %0 ; CHECK-LATE: {{[0-9]+}}, 966({{[0-9]+}}) %9 = ADDI %4, 2 %11 = IMPLICIT_DEF %10 = INSERT_SUBREG %11, killed %9, 1 %12 = LI8 777 - %14 = STBUX %3, %0, killed %12 :: (store 1 into %ir.arrayidx3, !tbaa !3) + %14 = STBUX %3, %0, killed %12 :: (store (s8) into %ir.arrayidx3, !tbaa !3) ; CHECK: STBU %3, 777, %0 ; CHECK-LATE: {{[0-9]+}}, 777({{[0-9]+}}) BLR8 implicit $lr8, implicit $rm @@ -5246,7 +5246,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5260,11 +5260,11 @@ registers: - { id: 10, class: g8rc, preferred-register: '' } - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5281,13 +5281,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5 - + %2 = COPY $x5 %1 = COPY $x4 %0 = LI8 975 @@ -5297,14 +5297,14 @@ body: | %7 = IMPLICIT_DEF %6 = INSERT_SUBREG %7, killed %5, 1 %8 = RLDICL killed %6, 0, 32 - STBX %3, %0, killed %8 :: (store 1 into %ir.arrayidx, !tbaa !3) + STBX %3, %0, killed %8 :: (store (s8) into %ir.arrayidx, !tbaa !3) ; CHECK: STB %3, 975, killed %8 ; CHECK-LATE: stb 4, 975(6) %9 = ADDI %4, 2 %11 = IMPLICIT_DEF %10 = INSERT_SUBREG %11, killed %9, 1 %12 = RLDICL killed %10, 0, 32 - STBX %3, %0, killed %12 :: (store 1 into %ir.arrayidx3, !tbaa !3) + STBX %3, %0, killed %12 :: (store (s8) into %ir.arrayidx3, !tbaa !3) ; CHECK: STB %3, 975, killed %12 ; CHECK-LATE: stb 4, 975(5) BLR8 implicit $lr8, implicit $rm @@ -5319,7 +5319,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5335,11 +5335,11 @@ registers: - { id: 12, class: g8rc, preferred-register: '' } - { id: 13, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 14, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5356,13 +5356,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5 - + %2 = COPY $x5 %1 = COPY $x4 %0 = COPY $x3 @@ -5372,14 +5372,14 @@ body: | %7 = IMPLICIT_DEF %6 = INSERT_SUBREG %7, killed %5, 1 %8 = LI8 32000 - %13 = STHUX %3, %0, killed %8 :: (store 2 into %ir.arrayidx, !tbaa !6) + %13 = STHUX %3, %0, killed %8 :: (store (s16) into %ir.arrayidx, !tbaa !6) ; CHECK: STHU %3, 32000, %0 ; CHECK-LATE: sthu {{[0-9]+}}, 32000({{[0-9]+}}) %9 = ADDI %4, 2 %11 = IMPLICIT_DEF %10 = INSERT_SUBREG %11, killed %9, 1 %12 = LI8 -761 - %14 = STHUX %3, %0, killed %12 :: (store 2 into %ir.arrayidx3, !tbaa !6) + %14 = STHUX %3, %0, killed %12 :: (store (s16) into %ir.arrayidx3, !tbaa !6) ; CHECK: STHU %3, -761, %0 ; CHECK-LATE: sthu {{[0-9]+}}, -761({{[0-9]+}}) BLR8 implicit $lr8, implicit $rm @@ -5394,7 +5394,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5408,11 +5408,11 @@ registers: - { id: 10, class: g8rc, preferred-register: '' } - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5429,13 +5429,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5 - + %2 = COPY $x5 %1 = COPY $x4 %0 = COPY $x3 @@ -5445,14 +5445,14 @@ body: | %7 = IMPLICIT_DEF %6 = INSERT_SUBREG %7, killed %5, 1 %8 = LI8 900 - STHX %3, %0, killed %8 :: (store 1 into %ir.arrayidx, !tbaa !3) + STHX %3, %0, killed %8 :: (store (s8) into %ir.arrayidx, !tbaa !3) ; CHECK: STH %3, 900, %0 ; CHECK-LATE: sth {{[0-9]+}}, 900({{[0-9]+}}) %9 = ADDI %4, 2 %11 = IMPLICIT_DEF %10 = INSERT_SUBREG %11, killed %9, 1 %12 = LI8 -900 - STHX %3, %0, killed %12 :: (store 1 into %ir.arrayidx3, !tbaa !3) + STHX %3, %0, killed %12 :: (store (s8) into %ir.arrayidx3, !tbaa !3) ; CHECK: STH %3, -900, %0 ; CHECK-LATE: sth {{[0-9]+}}, -900({{[0-9]+}}) BLR8 implicit $lr8, implicit $rm @@ -5467,7 +5467,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5483,11 +5483,11 @@ registers: - { id: 12, class: g8rc, preferred-register: '' } - { id: 13, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 14, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5504,13 +5504,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5 - + %2 = COPY $x5 %1 = COPY $x4 %0 = COPY $x3 @@ -5520,14 +5520,14 @@ body: | %7 = IMPLICIT_DEF %6 = INSERT_SUBREG %7, killed %5, 1 %8 = LI8 111 - %13 = STWUX %3, %0, killed %8 :: (store 4 into %ir.arrayidx, !tbaa !8) + %13 = STWUX %3, %0, killed %8 :: (store (s32) into %ir.arrayidx, !tbaa !8) ; CHECK: STWU %3, 111, %0 ; CHECK-LATE: stwu {{[0-9]+}}, 111({{[0-9]+}}) %9 = ADDI %4, 2 %11 = IMPLICIT_DEF %10 = INSERT_SUBREG %11, killed %9, 1 %12 = LI8 0 - %14 = STWUX %3, %0, killed %12 :: (store 4 into %ir.arrayidx3, !tbaa !8) + %14 = STWUX %3, %0, killed %12 :: (store (s32) into %ir.arrayidx3, !tbaa !8) ; CHECK: STWU %3, 0, %0 ; CHECK-LATE: stwu {{[0-9]+}}, 0({{[0-9]+}}) BLR8 implicit $lr8, implicit $rm @@ -5542,7 +5542,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5556,11 +5556,11 @@ registers: - { id: 10, class: g8rc, preferred-register: '' } - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5577,13 +5577,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5 - + %2 = COPY $x5 %1 = COPY $x4 %0 = COPY $x3 @@ -5593,14 +5593,14 @@ body: | %7 = IMPLICIT_DEF %6 = INSERT_SUBREG %7, killed %5, 1 %8 = LI8 2 - STWX %3, %0, killed %8 :: (store 4 into %ir.arrayidx, !tbaa !8) + STWX %3, %0, killed %8 :: (store (s32) into %ir.arrayidx, !tbaa !8) ; CHECK: STW %3, 2, %0 ; CHECK-LATE: stw 4, 2(3) %9 = ADDI %4, 2 %11 = IMPLICIT_DEF %10 = INSERT_SUBREG %11, killed %9, 1 %12 = LI8 99 - STWX %3, %0, killed %12 :: (store 4 into %ir.arrayidx3, !tbaa !8) + STWX %3, %0, killed %12 :: (store (s32) into %ir.arrayidx3, !tbaa !8) ; CHECK: STW %3, 99, %0 ; CHECK-LATE: stw 4, 99(3) BLR8 implicit $lr8, implicit $rm @@ -5615,7 +5615,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5630,11 +5630,11 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 13, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5651,13 +5651,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5 - + %2 = COPY $x5 %1 = COPY $x4 %0 = COPY $x3 @@ -5666,14 +5666,14 @@ body: | %6 = IMPLICIT_DEF %5 = INSERT_SUBREG %6, killed %4, 1 %7 = LI8 444 - %12 = STDUX %1, %0, killed %7 :: (store 8 into %ir.arrayidx, !tbaa !10) + %12 = STDUX %1, %0, killed %7 :: (store (s64) into %ir.arrayidx, !tbaa !10) ; CHECK: STDU %1, 444, %0 ; CHECK-LATE: stdu {{[0-9]+}}, 444({{[0-9]+}}) %8 = ADDI %3, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -8 - %13 = STDUX %1, %0, killed %11 :: (store 8 into %ir.arrayidx3, !tbaa !10) + %13 = STDUX %1, %0, killed %11 :: (store (s64) into %ir.arrayidx3, !tbaa !10) ; CHECK: STDU %1, -8, %0 ; CHECK-LATE: stdu {{[0-9]+}}, -8({{[0-9]+}}) BLR8 implicit $lr8, implicit $rm @@ -5688,7 +5688,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5701,11 +5701,11 @@ registers: - { id: 9, class: g8rc, preferred-register: '' } - { id: 10, class: g8rc, preferred-register: '' } - { id: 11, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5722,13 +5722,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5 - + %2 = COPY $x5 %1 = COPY $x4 %0 = LI8 1000 @@ -5737,14 +5737,14 @@ body: | %6 = IMPLICIT_DEF %5 = INSERT_SUBREG %6, killed %4, 1 %7 = LI8 900 - STDX %1, %0, killed %7 :: (store 8 into %ir.arrayidx, !tbaa !10) + STDX %1, %0, killed %7 :: (store (s64) into %ir.arrayidx, !tbaa !10) ; CHECK: STD %1, 1000, killed %7 ; CHECK-LATE: {{[0-9]+}}, 1000({{[0-9]+}}) %8 = ADDI %3, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -900 - STDX %1, %0, killed %11 :: (store 8 into %ir.arrayidx3, !tbaa !10) + STDX %1, %0, killed %11 :: (store (s64) into %ir.arrayidx3, !tbaa !10) ; CHECK: STD %1, 1000, killed %11 ; CHECK-LATE: {{[0-9]+}}, 1000({{[0-9]+}}) BLR8 implicit $lr8, implicit $rm @@ -5759,7 +5759,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: f4rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5772,11 +5772,11 @@ registers: - { id: 9, class: g8rc, preferred-register: '' } - { id: 10, class: g8rc, preferred-register: '' } - { id: 11, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$f1', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5793,13 +5793,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $f1, $x5 - + %2 = COPY $x5 %1 = COPY $f1 %0 = COPY $x3 @@ -5808,14 +5808,14 @@ body: | %6 = IMPLICIT_DEF %5 = INSERT_SUBREG %6, killed %4, 1 %7 = LI8 400 - STFSX %1, %0, killed %7 :: (store 4 into %ir.arrayidx, !tbaa !14) + STFSX %1, %0, killed %7 :: (store (s32) into %ir.arrayidx, !tbaa !14) ; CHECK: STFS %1, 400, %0 ; CHECK-LATE: stfs 1, 400(3) %8 = ADDI %3, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -401 - STFSX %1, %0, killed %11 :: (store 4 into %ir.arrayidx3, !tbaa !14) + STFSX %1, %0, killed %11 :: (store (s32) into %ir.arrayidx3, !tbaa !14) ; CHECK: STFS %1, -401, %0 ; CHECK-LATE: stfs 1, -401(3) BLR8 implicit $lr8, implicit $rm @@ -5830,7 +5830,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: f4rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5845,11 +5845,11 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 13, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$f1', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5866,13 +5866,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $f1, $x5 - + %2 = COPY $x5 %1 = COPY $f1 %0 = COPY $x3 @@ -5881,14 +5881,14 @@ body: | %6 = IMPLICIT_DEF %5 = INSERT_SUBREG %6, killed %4, 1 %7 = LI8 111 - %12 = STFSUX %1, %0, killed %7 :: (store 4 into %ir.arrayidx, !tbaa !14) + %12 = STFSUX %1, %0, killed %7 :: (store (s32) into %ir.arrayidx, !tbaa !14) ; CHECK: STFSU %1, 111, %0 ; CHECK-LATE: stfsu {{[0-9]+}}, 111({{[0-9]+}}) %8 = ADDI %3, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 987 - %13 = STFSUX %1, %0, killed %11 :: (store 4 into %ir.arrayidx3, !tbaa !14) + %13 = STFSUX %1, %0, killed %11 :: (store (s32) into %ir.arrayidx3, !tbaa !14) ; CHECK: STFSU %1, 987, %0 ; CHECK-LATE: stfsu {{[0-9]+}}, 987({{[0-9]+}}) BLR8 implicit $lr8, implicit $rm @@ -5903,7 +5903,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: f8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5916,11 +5916,11 @@ registers: - { id: 9, class: g8rc, preferred-register: '' } - { id: 10, class: g8rc, preferred-register: '' } - { id: 11, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$f1', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -5937,13 +5937,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $f1, $x5 - + %2 = COPY $x5 %1 = COPY $f1 %0 = COPY $x3 @@ -5952,14 +5952,14 @@ body: | %6 = IMPLICIT_DEF %5 = INSERT_SUBREG %6, killed %4, 1 %7 = LI8 876 - STFDX %1, %0, killed %7 :: (store 8 into %ir.arrayidx, !tbaa !12) + STFDX %1, %0, killed %7 :: (store (s64) into %ir.arrayidx, !tbaa !12) ; CHECK: STFD %1, 876, %0 ; CHECK-LATE: stfd 1, 876(3) %8 = ADDI %3, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 -873 - STFDX %1, %0, killed %11 :: (store 8 into %ir.arrayidx3, !tbaa !12) + STFDX %1, %0, killed %11 :: (store (s64) into %ir.arrayidx3, !tbaa !12) ; CHECK: STFD %1, -873, %0 ; CHECK-LATE: stfd 1, -873(3) BLR8 implicit $lr8, implicit $rm @@ -5974,7 +5974,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: f8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -5989,11 +5989,11 @@ registers: - { id: 11, class: g8rc, preferred-register: '' } - { id: 12, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 13, class: g8rc_and_g8rc_nox0, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$f1', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6010,13 +6010,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $f1, $x5 - + %2 = COPY $x5 %1 = COPY $f1 %0 = COPY $x3 @@ -6025,14 +6025,14 @@ body: | %6 = IMPLICIT_DEF %5 = INSERT_SUBREG %6, killed %4, 1 %7 = LI8 -9038 - %12 = STFDUX %1, %0, killed %7 :: (store 8 into %ir.arrayidx, !tbaa !12) + %12 = STFDUX %1, %0, killed %7 :: (store (s64) into %ir.arrayidx, !tbaa !12) ; CHECK: STFDU %1, -9038, %0 ; CHECK-LATE: stfdu {{[0-9]+}}, -9038({{[0-9]+}}) %8 = ADDI %3, 2 %10 = IMPLICIT_DEF %9 = INSERT_SUBREG %10, killed %8, 1 %11 = LI8 6477 - %13 = STFDUX %1, %0, killed %11 :: (store 8 into %ir.arrayidx3, !tbaa !12) + %13 = STFDUX %1, %0, killed %11 :: (store (s64) into %ir.arrayidx3, !tbaa !12) ; CHECK: STFDU %1, 6477, %0 ; CHECK-LATE: stfdu {{[0-9]+}}, 6477({{[0-9]+}}) BLR8 implicit $lr8, implicit $rm @@ -6047,16 +6047,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: vssrc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$f1', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6073,18 +6073,18 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $f1, $x5 - + %2 = COPY $x5 %1 = COPY $f1 %0 = COPY $x3 %3 = LI8 444 - STXSSPX %1, %0, killed %3 :: (store 4 into %ir.arrayidx, !tbaa !14) + STXSSPX %1, %0, killed %3 :: (store (s32) into %ir.arrayidx, !tbaa !14) ; CHECK: DFSTOREf32 %1, 444, %0 ; CHECK-LATE: stfs 1, 444(3) BLR8 implicit $lr8, implicit $rm @@ -6099,16 +6099,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: vsfrc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$f1', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6125,18 +6125,18 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $f1, $x5 - + %2 = COPY $x5 %1 = COPY $f1 %0 = COPY $x3 %3 = LI8 4 - STXSDX %1, %0, killed %3, implicit $rm :: (store 8 into %ir.arrayidx, !tbaa !12) + STXSDX %1, %0, killed %3, implicit $rm :: (store (s64) into %ir.arrayidx, !tbaa !12) ; CHECK: DFSTOREf64 %1, 4, %0 ; CHECK-LATE: stfd 1, 4(3) BLR8 implicit $lr8, implicit $rm @@ -6151,16 +6151,16 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc_and_g8rc_nox0, preferred-register: '' } - { id: 1, class: vrrc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$v2', virtual-reg: '%1' } - { reg: '$x7', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6177,18 +6177,18 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $v2, $x7 - + %2 = COPY $x7 %1 = COPY $v2 %0 = LI8 16 %3 = RLDICR %2, 4, 59 - STXVX %1, %0, killed %3 :: (store 16 into %ir.arrayidx, !tbaa !3) + STXVX %1, %0, killed %3 :: (store (s128) into %ir.arrayidx, !tbaa !3) ; CHECK: STXV %1, 16, killed %3 ; CHECK-LATE: stxv 34, 16(4) BLR8 implicit $lr8, implicit $rm @@ -6203,7 +6203,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gprc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } @@ -6213,12 +6213,12 @@ registers: - { id: 6, class: gprc, preferred-register: '' } - { id: 7, class: gprc, preferred-register: '' } - { id: 8, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } - { reg: '$x6', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6235,13 +6235,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5, $x6 - + %3 = COPY $x6 %2 = COPY $x5 %1 = COPY $x4 @@ -6267,19 +6267,19 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } - { id: 3, class: g8rc, preferred-register: '' } - { id: 4, class: g8rc, preferred-register: '' } - { id: 5, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } - { reg: '$x5', virtual-reg: '%2' } - { reg: '$x6', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6296,13 +6296,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4, $x5, $x6 - + %3 = COPY $x6 %2 = COPY $x5 %1 = COPY $x4 @@ -6325,15 +6325,15 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: gprc, preferred-register: '' } - { id: 2, class: gprc, preferred-register: '' } - { id: 3, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6350,13 +6350,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = LI 10101 %0 = COPY $x3 %3 = COPY %0.sub_32 @@ -6376,14 +6376,14 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } - { id: 2, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } - { reg: '$x4', virtual-reg: '%1' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6400,13 +6400,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3, $x4 - + %1 = COPY $x4 %0 = LI8 5535 %2 = XOR8 %1, %0 @@ -6425,12 +6425,12 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gprc, preferred-register: '' } - { id: 1, class: gprc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6447,13 +6447,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = LI 871 %1 = XORI %0, 17 ; CHECK: LI 886 @@ -6471,12 +6471,12 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: g8rc, preferred-register: '' } - { id: 1, class: g8rc, preferred-register: '' } -liveins: +liveins: - { reg: '$x3', virtual-reg: '%0' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -6493,13 +6493,13 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: liveins: $x3 - + %0 = LI8 453 %1 = XORI8 %0, 17 ; CHECK: LI8 468 diff --git a/llvm/test/CodeGen/PowerPC/kernel-fp-round.ll b/llvm/test/CodeGen/PowerPC/kernel-fp-round.ll index 4b0b82a6c2711..ca6f0afcd371a 100644 --- a/llvm/test/CodeGen/PowerPC/kernel-fp-round.ll +++ b/llvm/test/CodeGen/PowerPC/kernel-fp-round.ll @@ -12,7 +12,7 @@ define float @test(float %a) { ; CHECK-NEXT: - { id: 0, size: 4, alignment: 4 } ; CHECK: %2:f8rc = nofpexcept FCTIWZ killed %1, implicit $rm ; CHECK: STFIWX killed %2, $zero8, %3 -; CHECK-NEXT: %4:f8rc = LFIWAX $zero8, %3 :: (load 4 from %stack.0) +; CHECK-NEXT: %4:f8rc = LFIWAX $zero8, %3 :: (load (s32) from %stack.0) ; CHECK-NEXT: %5:f4rc = nofpexcept FCFIDS killed %4, implicit $rm ; CHECK-NEXT: $f1 = COPY %5 ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 @@ -21,7 +21,7 @@ define float @test(float %a) { ; CHECK-P6-NEXT: - { id: 0, size: 4, alignment: 4 } ; CHECK-P6: %2:f8rc = nofpexcept FCTIWZ killed %1, implicit $rm ; CHECK-P6: STFIWX killed %2, $zero, %3 -; CHECK-P6-NEXT: %4:f8rc = LFIWAX $zero, %3 :: (load 4 from %stack.0) +; CHECK-P6-NEXT: %4:f8rc = LFIWAX $zero, %3 :: (load (s32) from %stack.0) ; CHECK-P6-NEXT: %5:f8rc = nofpexcept FCFID killed %4, implicit $rm ; CHECK-P6-NEXT: %6:f4rc = nofpexcept FRSP killed %5, implicit $rm ; CHECK-P6-NEXT: $f1 = COPY %6 @@ -31,7 +31,7 @@ define float @test(float %a) { ; CHECK-P6-64-NEXT: - { id: 0, size: 4, alignment: 4 } ; CHECK-P6-64: %2:f8rc = nofpexcept FCTIWZ killed %1, implicit $rm ; CHECK-P6-64: STFIWX killed %2, $zero8, %3 -; CHECK-P6-64-NEXT: %4:f8rc = LFIWAX $zero8, %3 :: (load 4 from %stack.0) +; CHECK-P6-64-NEXT: %4:f8rc = LFIWAX $zero8, %3 :: (load (s32) from %stack.0) ; CHECK-P6-64-NEXT: %5:f8rc = nofpexcept FCFID killed %4, implicit $rm ; CHECK-P6-64-NEXT: %6:f4rc = nofpexcept FRSP killed %5, implicit $rm ; CHECK-P6-64-NEXT: $f1 = COPY %6 diff --git a/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir b/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir index d6efa5bcd3f5a..00c083e63c16c 100644 --- a/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir +++ b/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir @@ -71,24 +71,24 @@ body: | liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12 ; CHECK-LABEL: name: spill_g8prc ; CHECK: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 - ; CHECK: STD killed $x14, -144, $x1 :: (store 8 into %fixed-stack.17, align 16) - ; CHECK: STD killed $x15, -136, $x1 :: (store 8 into %fixed-stack.16) - ; CHECK: STD killed $x16, -128, $x1 :: (store 8 into %fixed-stack.15, align 16) - ; CHECK: STD killed $x17, -120, $x1 :: (store 8 into %fixed-stack.14) - ; CHECK: STD killed $x18, -112, $x1 :: (store 8 into %fixed-stack.13, align 16) - ; CHECK: STD killed $x19, -104, $x1 :: (store 8 into %fixed-stack.12) - ; CHECK: STD killed $x20, -96, $x1 :: (store 8 into %fixed-stack.11, align 16) - ; CHECK: STD killed $x21, -88, $x1 :: (store 8 into %fixed-stack.10) - ; CHECK: STD killed $x22, -80, $x1 :: (store 8 into %fixed-stack.9, align 16) - ; CHECK: STD killed $x23, -72, $x1 :: (store 8 into %fixed-stack.8) - ; CHECK: STD killed $x24, -64, $x1 :: (store 8 into %fixed-stack.7, align 16) - ; CHECK: STD killed $x25, -56, $x1 :: (store 8 into %fixed-stack.6) - ; CHECK: STD killed $x26, -48, $x1 :: (store 8 into %fixed-stack.5, align 16) - ; CHECK: STD killed $x27, -40, $x1 :: (store 8 into %fixed-stack.4) - ; CHECK: STD killed $x28, -32, $x1 :: (store 8 into %fixed-stack.3, align 16) - ; CHECK: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.2) - ; CHECK: STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.1, align 16) - ; CHECK: STD killed $x31, -8, $x1 :: (store 8 into %fixed-stack.0) + ; CHECK: STD killed $x14, -144, $x1 :: (store (s64) into %fixed-stack.17, align 16) + ; CHECK: STD killed $x15, -136, $x1 :: (store (s64) into %fixed-stack.16) + ; CHECK: STD killed $x16, -128, $x1 :: (store (s64) into %fixed-stack.15, align 16) + ; CHECK: STD killed $x17, -120, $x1 :: (store (s64) into %fixed-stack.14) + ; CHECK: STD killed $x18, -112, $x1 :: (store (s64) into %fixed-stack.13, align 16) + ; CHECK: STD killed $x19, -104, $x1 :: (store (s64) into %fixed-stack.12) + ; CHECK: STD killed $x20, -96, $x1 :: (store (s64) into %fixed-stack.11, align 16) + ; CHECK: STD killed $x21, -88, $x1 :: (store (s64) into %fixed-stack.10) + ; CHECK: STD killed $x22, -80, $x1 :: (store (s64) into %fixed-stack.9, align 16) + ; CHECK: STD killed $x23, -72, $x1 :: (store (s64) into %fixed-stack.8) + ; CHECK: STD killed $x24, -64, $x1 :: (store (s64) into %fixed-stack.7, align 16) + ; CHECK: STD killed $x25, -56, $x1 :: (store (s64) into %fixed-stack.6) + ; CHECK: STD killed $x26, -48, $x1 :: (store (s64) into %fixed-stack.5, align 16) + ; CHECK: STD killed $x27, -40, $x1 :: (store (s64) into %fixed-stack.4) + ; CHECK: STD killed $x28, -32, $x1 :: (store (s64) into %fixed-stack.3, align 16) + ; CHECK: STD killed $x29, -24, $x1 :: (store (s64) into %fixed-stack.2) + ; CHECK: STD killed $x30, -16, $x1 :: (store (s64) into %fixed-stack.1, align 16) + ; CHECK: STD killed $x31, -8, $x1 :: (store (s64) into %fixed-stack.0) ; CHECK: $x7 = OR8 $x3, $x3 ; CHECK: renamable $g8p4 = LQARX $x5, $x6 ; CHECK: STD killed $x8, -160, $x1 @@ -143,24 +143,24 @@ body: | ; CHECK: $x8 = LD -160, $x1 ; CHECK: $x9 = LD -152, $x1 ; CHECK: STQCX killed renamable $g8p4, $x5, $x6, implicit-def dead $cr0 - ; CHECK: $x31 = LD -8, $x1 :: (load 8 from %fixed-stack.0) - ; CHECK: $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.1, align 16) - ; CHECK: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.2) - ; CHECK: $x28 = LD -32, $x1 :: (load 8 from %fixed-stack.3, align 16) - ; CHECK: $x27 = LD -40, $x1 :: (load 8 from %fixed-stack.4) - ; CHECK: $x26 = LD -48, $x1 :: (load 8 from %fixed-stack.5, align 16) - ; CHECK: $x25 = LD -56, $x1 :: (load 8 from %fixed-stack.6) - ; CHECK: $x24 = LD -64, $x1 :: (load 8 from %fixed-stack.7, align 16) - ; CHECK: $x23 = LD -72, $x1 :: (load 8 from %fixed-stack.8) - ; CHECK: $x22 = LD -80, $x1 :: (load 8 from %fixed-stack.9, align 16) - ; CHECK: $x21 = LD -88, $x1 :: (load 8 from %fixed-stack.10) - ; CHECK: $x20 = LD -96, $x1 :: (load 8 from %fixed-stack.11, align 16) - ; CHECK: $x19 = LD -104, $x1 :: (load 8 from %fixed-stack.12) - ; CHECK: $x18 = LD -112, $x1 :: (load 8 from %fixed-stack.13, align 16) - ; CHECK: $x17 = LD -120, $x1 :: (load 8 from %fixed-stack.14) - ; CHECK: $x16 = LD -128, $x1 :: (load 8 from %fixed-stack.15, align 16) - ; CHECK: $x15 = LD -136, $x1 :: (load 8 from %fixed-stack.16) - ; CHECK: $x14 = LD -144, $x1 :: (load 8 from %fixed-stack.17, align 16) + ; CHECK: $x31 = LD -8, $x1 :: (load (s64) from %fixed-stack.0) + ; CHECK: $x30 = LD -16, $x1 :: (load (s64) from %fixed-stack.1, align 16) + ; CHECK: $x29 = LD -24, $x1 :: (load (s64) from %fixed-stack.2) + ; CHECK: $x28 = LD -32, $x1 :: (load (s64) from %fixed-stack.3, align 16) + ; CHECK: $x27 = LD -40, $x1 :: (load (s64) from %fixed-stack.4) + ; CHECK: $x26 = LD -48, $x1 :: (load (s64) from %fixed-stack.5, align 16) + ; CHECK: $x25 = LD -56, $x1 :: (load (s64) from %fixed-stack.6) + ; CHECK: $x24 = LD -64, $x1 :: (load (s64) from %fixed-stack.7, align 16) + ; CHECK: $x23 = LD -72, $x1 :: (load (s64) from %fixed-stack.8) + ; CHECK: $x22 = LD -80, $x1 :: (load (s64) from %fixed-stack.9, align 16) + ; CHECK: $x21 = LD -88, $x1 :: (load (s64) from %fixed-stack.10) + ; CHECK: $x20 = LD -96, $x1 :: (load (s64) from %fixed-stack.11, align 16) + ; CHECK: $x19 = LD -104, $x1 :: (load (s64) from %fixed-stack.12) + ; CHECK: $x18 = LD -112, $x1 :: (load (s64) from %fixed-stack.13, align 16) + ; CHECK: $x17 = LD -120, $x1 :: (load (s64) from %fixed-stack.14) + ; CHECK: $x16 = LD -128, $x1 :: (load (s64) from %fixed-stack.15, align 16) + ; CHECK: $x15 = LD -136, $x1 :: (load (s64) from %fixed-stack.16) + ; CHECK: $x14 = LD -144, $x1 :: (load (s64) from %fixed-stack.17, align 16) ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 %addr0:g8rc_nox0 = COPY $x3 %addr1:g8rc = COPY $x4 diff --git a/llvm/test/CodeGen/PowerPC/ldst-align.ll b/llvm/test/CodeGen/PowerPC/ldst-align.ll index 1cc625ea17d39..6c0dd09f85f42 100644 --- a/llvm/test/CodeGen/PowerPC/ldst-align.ll +++ b/llvm/test/CodeGen/PowerPC/ldst-align.ll @@ -6,7 +6,7 @@ define i64 @load(i64* %p) { ; CHECK: bb.0.entry: ; CHECK: liveins: $x3 ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 - ; CHECK: [[LD:%[0-9]+]]:g8rc = LD 24, [[COPY]] :: (load 8 from %ir.arrayidx, align 2) + ; CHECK: [[LD:%[0-9]+]]:g8rc = LD 24, [[COPY]] :: (load (s64) from %ir.arrayidx, align 2) ; CHECK: $x3 = COPY [[LD]] ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: @@ -21,7 +21,7 @@ define void @store(i64* %p) { ; CHECK: liveins: $x3 ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 9 - ; CHECK: STD killed [[LI8_]], 16, [[COPY]] :: (store 8 into %ir.arrayidx, align 1) + ; CHECK: STD killed [[LI8_]], 16, [[COPY]] :: (store (s64) into %ir.arrayidx, align 1) ; CHECK: BLR8 implicit $lr8, implicit $rm entry: %arrayidx = getelementptr inbounds i64, i64* %p, i64 2 @@ -35,7 +35,7 @@ define void @store_aligned(i64* %p) { ; CHECK: liveins: $x3 ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 9 - ; CHECK: STD killed [[LI8_]], 16, [[COPY]] :: (store 8 into %ir.arrayidx, align 4) + ; CHECK: STD killed [[LI8_]], 16, [[COPY]] :: (store (s64) into %ir.arrayidx, align 4) ; CHECK: BLR8 implicit $lr8, implicit $rm entry: %arrayidx = getelementptr inbounds i64, i64* %p, i64 2 diff --git a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll index 2772e925ca25e..b8f99ec08a294 100644 --- a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll +++ b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll @@ -19,12 +19,12 @@ ; ; %4 = COPY %x3 ; %5 = ADDIStocHA8 %x2, @ga -; %6 = LDtocL @ga, killed %5 :: (load 8 from got) -; %7 = LWZ 0, %6 :: (volatile dereferenceable load 4 from @ga) +; %6 = LDtocL @ga, killed %5 :: (load (s64) from got) +; %7 = LWZ 0, %6 :: (volatile dereferenceable load (s32) from @ga) ; %8 = ADDIStocHA8 %x2, @gb -; %9 = LDtocL @gb, killed %8 :: (load 8 from got) -; %10 = LWZ 0, killed %9 :: (volatile dereferenceable load 4 from @gb) -; %0 = LWZ 0, %6 :: (volatile dereferenceable load 4 from @ga) +; %9 = LDtocL @gb, killed %8 :: (load (s64) from got) +; %10 = LWZ 0, killed %9 :: (volatile dereferenceable load (s32) from @gb) +; %0 = LWZ 0, %6 :: (volatile dereferenceable load (s32) from @ga) ; %11 = CMPW killed %7, killed %10 ; BCC 44, killed %11, %bb.2.if.then ; B %bb.3.if.end @@ -33,7 +33,7 @@ ; %1 = PHI %0, %bb.0.entry, %3, %bb.3.if.end ; ADJCALLSTACKDOWN 32, 0, implicit-def dead %r1, implicit %r1 ; %20 = COPY %x2 -; STD %20, 24, %x1 :: (store 8 into stack + 24) +; STD %20, 24, %x1 :: (store (s64) into stack + 24) ; %21 = EXTSW_32_64 %1 ; %x3 = COPY %21 ; %x12 = COPY %4 @@ -50,13 +50,13 @@ ; %2 = PHI %0, %bb.0.entry, %3, %bb.3.if.end ; %12 = ADDI %2, 1 ; %13 = ADDIStocHA8 %x2, @ga -; %14 = LDtocL @ga, killed %13 :: (load 8 from got) -; STW killed %12, 0, %14 :: (volatile store 4 into @ga) -; %15 = LWZ 0, %14 :: (volatile dereferenceable load 4 from @ga) +; %14 = LDtocL @ga, killed %13 :: (load (s64) from got) +; STW killed %12, 0, %14 :: (volatile store (s32) into @ga) +; %15 = LWZ 0, %14 :: (volatile dereferenceable load (s32) from @ga) ; %16 = ADDIStocHA8 %x2, @gb -; %17 = LDtocL @gb, killed %16 :: (load 8 from got) -; %18 = LWZ 0, killed %17 :: (volatile dereferenceable load 4 from @gb) -; %3 = LWZ 0, %14 :: (volatile dereferenceable load 4 from @ga) +; %17 = LDtocL @gb, killed %16 :: (load (s64) from got) +; %18 = LWZ 0, killed %17 :: (volatile dereferenceable load (s32) from @gb) +; %3 = LWZ 0, %14 :: (volatile dereferenceable load (s32) from @ga) ; %19 = CMPW killed %15, killed %18 ; BCC 44, killed %19, %bb.2.if.then ; B %bb.3.if.end diff --git a/llvm/test/CodeGen/PowerPC/livephysregs.mir b/llvm/test/CodeGen/PowerPC/livephysregs.mir index dbadf576771af..de3fb6138ca0a 100644 --- a/llvm/test/CodeGen/PowerPC/livephysregs.mir +++ b/llvm/test/CodeGen/PowerPC/livephysregs.mir @@ -38,13 +38,13 @@ body: | bb.6: liveins: $x3, $x6, $x29, $x30 - STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.1) - STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.0, align 16) + STD killed $x29, -24, $x1 :: (store (s64) into %fixed-stack.1) + STD killed $x30, -16, $x1 :: (store (s64) into %fixed-stack.0, align 16) NOP implicit-def dead $x29 NOP implicit-def dead $x30 - $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.0, align 16) - $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.1) + $x30 = LD -16, $x1 :: (load (s64) from %fixed-stack.0, align 16) + $x29 = LD -24, $x1 :: (load (s64) from %fixed-stack.1) $x4 = RLDICR killed $x6, 16, 47 $x3 = OR8 killed $x4, killed $x3 diff --git a/llvm/test/CodeGen/PowerPC/livevars-crash1.mir b/llvm/test/CodeGen/PowerPC/livevars-crash1.mir index 6457a3d242362..8bb9ad1b44f30 100644 --- a/llvm/test/CodeGen/PowerPC/livevars-crash1.mir +++ b/llvm/test/CodeGen/PowerPC/livevars-crash1.mir @@ -52,18 +52,18 @@ body: | liveins: $x3 %4:g8rc_and_g8rc_nox0 = COPY killed $x3 - %0:g8rc = LD 0, %4 :: (dereferenceable load 8 from %ir.p) + %0:g8rc = LD 0, %4 :: (dereferenceable load (s64) from %ir.p) bb.1.loop: successors: %bb.1(0x20000000), %bb.2(0x60000000) %1:g8rc_and_g8rc_nox0 = PHI %0, %bb.0, %2, %bb.1, %3, %bb.3, %2, %bb.2 - %5:gprc = LBZ 0, %1 :: (load 1 from %ir.0) + %5:gprc = LBZ 0, %1 :: (load (s8) from %ir.0) %6:crrc = CMPWI killed %5, 0 %7:crbitrc = COPY killed %6.sub_eq %2:g8rc = nuw ADDI8 %1, 1 - STD %2, 0, %4 :: (store 8 into %ir.p) - %8:gprc = LBZ 1, %1 :: (load 1 from %ir.incdec.ptr) + STD %2, 0, %4 :: (store (s64) into %ir.p) + %8:gprc = LBZ 1, %1 :: (load (s8) from %ir.incdec.ptr) BCn killed %7, %bb.1 B %bb.2 @@ -79,7 +79,7 @@ body: | successors: %bb.1(0x80000000) %3:g8rc = nuw ADDI8 killed %1, 2 - STD %3, 0, %4 :: (store 8 into %ir.p) + STD %3, 0, %4 :: (store (s64) into %ir.p) B %bb.1 ; CHECK-LABEL: name: zext_free @@ -88,19 +88,19 @@ body: | ; CHECK: liveins: $x3 ; CHECK: %4:g8rc_and_g8rc_nox0 = COPY killed $x3 - ; CHECK: %0:g8rc = LD 0, %4 :: (dereferenceable load 8 from %ir.p) + ; CHECK: %0:g8rc = LD 0, %4 :: (dereferenceable load (s64) from %ir.p) ; CHECK: %12:g8rc_and_g8rc_nox0 = COPY killed %0 ; CHECK: bb.1.loop: ; CHECK: successors: %bb.1(0x20000000), %bb.2(0x60000000) ; CHECK: %1:g8rc_and_g8rc_nox0 = COPY killed %12 - ; CHECK: %5:gprc = LBZ 0, %1 :: (load 1 from %ir.0) + ; CHECK: %5:gprc = LBZ 0, %1 :: (load (s8) from %ir.0) ; CHECK: %6:crrc = CMPWI killed %5, 0 ; CHEXK: %7:crbitrc = COPY killed %6.sub_eq ; CHECK: %2:g8rc = nuw ADDI8 %1, 1 - ; CHECK: STD %2, 0, %4 :: (store 8 into %ir.p) - ; CHECK: %8:gprc = LBZ 1, %1 :: (load 1 from %ir.incdec.ptr) + ; CHECK: STD %2, 0, %4 :: (store (s64) into %ir.p) + ; CHECK: %8:gprc = LBZ 1, %1 :: (load (s8) from %ir.incdec.ptr) ; CHECK: %12:g8rc_and_g8rc_nox0 = COPY %2 ; CHECK: BCn killed %7, %bb.1 ; CHECK: B %bb.2 @@ -118,7 +118,7 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: %3:g8rc = nuw ADDI8 killed %1, 2 - ; CHECK: STD %3, 0, %4 :: (store 8 into %ir.p) + ; CHECK: STD %3, 0, %4 :: (store (s64) into %ir.p) ; CHECK: %12:g8rc_and_g8rc_nox0 = COPY killed %3 ; CHECK: B %bb.1 diff --git a/llvm/test/CodeGen/PowerPC/livevars-crash2.mir b/llvm/test/CodeGen/PowerPC/livevars-crash2.mir index 1bdf0419a8f59..2f1022f13a4b1 100644 --- a/llvm/test/CodeGen/PowerPC/livevars-crash2.mir +++ b/llvm/test/CodeGen/PowerPC/livevars-crash2.mir @@ -146,18 +146,18 @@ body: | liveins: $x3 %4:g8rc_and_g8rc_nox0 = COPY killed $x3 - %0:g8rc = LD 0, %4 :: (dereferenceable load 8 from %ir.p) + %0:g8rc = LD 0, %4 :: (dereferenceable load (s64) from %ir.p) bb.1.loop: successors: %bb.1(0x20000000), %bb.2(0x60000000) %1:g8rc_and_g8rc_nox0 = PHI %0, %bb.0, %2, %bb.1, %3, %bb.3, %2, %bb.2 - %5:gprc = LBZ 0, %1 :: (load 1 from %ir.0) + %5:gprc = LBZ 0, %1 :: (load (s8) from %ir.0) %6:crrc = CMPWI killed %5, 0 %7:crbitrc = COPY killed %6.sub_eq %2:g8rc = nuw ADDI8 %1, 1 - STD %2, 0, %4 :: (store 8 into %ir.p) - %8:gprc = LBZ 1, %1 :: (load 1 from %ir.incdec.ptr) + STD %2, 0, %4 :: (store (s64) into %ir.p) + %8:gprc = LBZ 1, %1 :: (load (s8) from %ir.incdec.ptr) BCn killed %7, %bb.1 B %bb.2 @@ -173,7 +173,7 @@ body: | successors: %bb.1(0x80000000) %3:g8rc = nuw ADDI8 killed %1, 2 - STD %3, 0, %4 :: (store 8 into %ir.p) + STD %3, 0, %4 :: (store (s64) into %ir.p) B %bb.1 ; CHECK-LABEL: name: testfloatslt @@ -182,19 +182,19 @@ body: | ; CHECK: liveins: $x3 ; CHECK: %4:g8rc_and_g8rc_nox0 = COPY killed $x3 - ; CHECK: %0:g8rc = LD 0, %4 :: (dereferenceable load 8 from %ir.p) + ; CHECK: %0:g8rc = LD 0, %4 :: (dereferenceable load (s64) from %ir.p) ; CHECK: %12:g8rc_and_g8rc_nox0 = COPY killed %0 ; CHECK: bb.1.loop: ; CHECK: successors: %bb.1(0x20000000), %bb.2(0x60000000) ; CHECK: %1:g8rc_and_g8rc_nox0 = COPY killed %12 - ; CHECK: %5:gprc = LBZ 0, %1 :: (load 1 from %ir.0) + ; CHECK: %5:gprc = LBZ 0, %1 :: (load (s8) from %ir.0) ; CHECK: %6:crrc = CMPWI killed %5, 0 ; CEHCK: %7:crbitrc = COPY killed %6.sub_eq ; CHECK: %2:g8rc = nuw ADDI8 %1, 1 - ; CHECK: STD %2, 0, %4 :: (store 8 into %ir.p) - ; CHECK: %8:gprc = LBZ 1, %1 :: (load 1 from %ir.incdec.ptr) + ; CHECK: STD %2, 0, %4 :: (store (s64) into %ir.p) + ; CHECK: %8:gprc = LBZ 1, %1 :: (load (s8) from %ir.incdec.ptr) ; CHECK: %12:g8rc_and_g8rc_nox0 = COPY %2 ; CHECK: BCn killed %7, %bb.1 ; CHECK: B %bb.2 @@ -212,7 +212,7 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: %3:g8rc = nuw ADDI8 killed %1, 2 - ; CHECK: STD %3, 0, %4 :: (store 8 into %ir.p) + ; CHECK: STD %3, 0, %4 :: (store (s64) into %ir.p) ; CHECK: %12:g8rc_and_g8rc_nox0 = COPY killed %3 ; CHECK: B %bb.1 diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll index 78cf10ebe688b..6956b5cdeae09 100644 --- a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll +++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix.ll @@ -18,19 +18,19 @@ define void @foo() { entry: -; SMALL: %0:gprc_and_gprc_nor0 = LWZtoc @msg, $r2 :: (load 4 from got) -; SMALL: %1:gprc = LWZ 0, %0:gprc_and_gprc_nor0 :: (dereferenceable load 4 from @msg) -; SMALL: %2:gprc_and_gprc_nor0 = LWZtoc @ptr, $r2 :: (load 4 from got) -; SMALL: STW %1:gprc, 0, %2:gprc_and_gprc_nor0 :: (store 4 into @ptr) +; SMALL: %0:gprc_and_gprc_nor0 = LWZtoc @msg, $r2 :: (load (s32) from got) +; SMALL: %1:gprc = LWZ 0, %0:gprc_and_gprc_nor0 :: (dereferenceable load (s32) from @msg) +; SMALL: %2:gprc_and_gprc_nor0 = LWZtoc @ptr, $r2 :: (load (s32) from got) +; SMALL: STW %1:gprc, 0, %2:gprc_and_gprc_nor0 :: (store (s32) into @ptr) ; MEDIUM: Medium code model is not supported on AIX. ; LARGE: %0:gprc_and_gprc_nor0 = ADDIStocHA $r2, @msg -; LARGE: %1:gprc_and_gprc_nor0 = LWZtocL @msg, %0:gprc_and_gprc_nor0, implicit $r2 :: (load 4 from got) -; LARGE: %2:gprc = LWZ 0, %1:gprc_and_gprc_nor0 :: (dereferenceable load 4 from @msg) +; LARGE: %1:gprc_and_gprc_nor0 = LWZtocL @msg, %0:gprc_and_gprc_nor0, implicit $r2 :: (load (s32) from got) +; LARGE: %2:gprc = LWZ 0, %1:gprc_and_gprc_nor0 :: (dereferenceable load (s32) from @msg) ; LARGE: %3:gprc_and_gprc_nor0 = ADDIStocHA $r2, @ptr -; LARGE: %4:gprc_and_gprc_nor0 = LWZtocL @ptr, %3:gprc_and_gprc_nor0, implicit $r2 :: (load 4 from got) -; LARGE: STW %2:gprc, 0, %4:gprc_and_gprc_nor0 :: (store 4 into @ptr) +; LARGE: %4:gprc_and_gprc_nor0 = LWZtocL @ptr, %3:gprc_and_gprc_nor0, implicit $r2 :: (load (s32) from got) +; LARGE: STW %2:gprc, 0, %4:gprc_and_gprc_nor0 :: (store (s32) into @ptr) %0 = load i8*, i8** @msg, align 4 store i8* %0, i8** @ptr, align 4 diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll index 29119f3257ad0..ff93fa9246a52 100644 --- a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll +++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix.ll @@ -18,19 +18,19 @@ define void @foo() { entry: -; SMALL: %0:g8rc_and_g8rc_nox0 = LDtoc @msg, $x2 :: (load 8 from got) -; SMALL: %1:g8rc = LD 0, %0:g8rc_and_g8rc_nox0 :: (dereferenceable load 8 from @msg) -; SMALL: %2:g8rc_and_g8rc_nox0 = LDtoc @ptr, $x2 :: (load 8 from got) -; SMALL: STD %1:g8rc, 0, %2:g8rc_and_g8rc_nox0 :: (store 8 into @ptr) +; SMALL: %0:g8rc_and_g8rc_nox0 = LDtoc @msg, $x2 :: (load (s64) from got) +; SMALL: %1:g8rc = LD 0, %0:g8rc_and_g8rc_nox0 :: (dereferenceable load (s64) from @msg) +; SMALL: %2:g8rc_and_g8rc_nox0 = LDtoc @ptr, $x2 :: (load (s64) from got) +; SMALL: STD %1:g8rc, 0, %2:g8rc_and_g8rc_nox0 :: (store (s64) into @ptr) ; MEDIUM: Medium code model is not supported on AIX. ; LARGE: %0:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @msg -; LARGE: %1:g8rc_and_g8rc_nox0 = LDtocL @msg, %0:g8rc_and_g8rc_nox0, implicit $x2 :: (load 8 from got) -; LARGE: %2:g8rc = LD 0, %1:g8rc_and_g8rc_nox0 :: (dereferenceable load 8 from @msg) +; LARGE: %1:g8rc_and_g8rc_nox0 = LDtocL @msg, %0:g8rc_and_g8rc_nox0, implicit $x2 :: (load (s64) from got) +; LARGE: %2:g8rc = LD 0, %1:g8rc_and_g8rc_nox0 :: (dereferenceable load (s64) from @msg) ; LARGE: %3:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @ptr -; LARGE: %4:g8rc_and_g8rc_nox0 = LDtocL @ptr, %3:g8rc_and_g8rc_nox0, implicit $x2 :: (load 8 from got) -; LARGE: STD %2:g8rc, 0, %4:g8rc_and_g8rc_nox0 :: (store 8 into @ptr) +; LARGE: %4:g8rc_and_g8rc_nox0 = LDtocL @ptr, %3:g8rc_and_g8rc_nox0, implicit $x2 :: (load (s64) from got) +; LARGE: STD %2:g8rc, 0, %4:g8rc_and_g8rc_nox0 :: (store (s64) into @ptr) %0 = load i8*, i8** @msg, align 8 store i8* %0, i8** @ptr, align 8 diff --git a/llvm/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir b/llvm/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir index f55a3161aae45..14eb65bda25e6 100644 --- a/llvm/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir +++ b/llvm/test/CodeGen/PowerPC/no-rlwimi-trivial-commute.mir @@ -71,8 +71,8 @@ body: | liveins: $x2 %0 = ADDIStocHA8 $x2, @b - %1 = LD target-flags(ppc-toc-lo) @b, killed %0 :: (load 8 from @b) - %2 = LWZ 0, %1 :: (load 4 from %ir.0) + %1 = LD target-flags(ppc-toc-lo) @b, killed %0 :: (load (s64) from @b) + %2 = LWZ 0, %1 :: (load (s32) from %ir.0) %3 = LI 0 %4 = RLWIMI %3, killed %2, 0, 0, 31 ; CHECK-LABEL: name: main @@ -80,9 +80,9 @@ body: | ; CHECK: %[[REG2:[0-9]+]]:gprc = COPY %[[REG1]] ; CHECK: %[[REG2]]:gprc = RLWIMI %[[REG2]], killed %2, 0, 0, 31 %8 = RLWIMI %3, %4, 0, 0, 31 - STW %4, 0, %1 :: (store 4 into %ir.0) + STW %4, 0, %1 :: (store (s32) into %ir.0) %10 = EXTSW_32_64 %8 - STW %8, 0, %1 :: (store 4 into %ir.0) + STW %8, 0, %1 :: (store (s32) into %ir.0) $x3 = COPY %10 BLR8 implicit $x3, implicit $lr8, implicit $rm diff --git a/llvm/test/CodeGen/PowerPC/nofpexcept.ll b/llvm/test/CodeGen/PowerPC/nofpexcept.ll index 43335e963b9a1..66fcf32a08d03 100644 --- a/llvm/test/CodeGen/PowerPC/nofpexcept.ll +++ b/llvm/test/CodeGen/PowerPC/nofpexcept.ll @@ -79,18 +79,18 @@ define void @fptoint_nofpexcept(ppc_fp128 %p, fp128 %m, i32* %addr1, i64* %addr2 ; CHECK: [[COPY5:%[0-9]+]]:vslrc = COPY %5 ; CHECK: [[COPY6:%[0-9]+]]:vfrc = COPY [[COPY5]].sub_64 ; CHECK: [[MFVSRWZ:%[0-9]+]]:gprc = MFVSRWZ killed [[COPY6]] - ; CHECK: STW killed [[MFVSRWZ]], 0, [[COPY1]] :: (volatile store 4 into %ir.addr1) + ; CHECK: STW killed [[MFVSRWZ]], 0, [[COPY1]] :: (volatile store (s32) into %ir.addr1) ; CHECK: %8:vrrc = nofpexcept XSCVQPUWZ [[COPY2]] ; CHECK: [[COPY7:%[0-9]+]]:vslrc = COPY %8 ; CHECK: [[COPY8:%[0-9]+]]:vfrc = COPY [[COPY7]].sub_64 ; CHECK: [[MFVSRWZ1:%[0-9]+]]:gprc = MFVSRWZ killed [[COPY8]] - ; CHECK: STW killed [[MFVSRWZ1]], 0, [[COPY1]] :: (volatile store 4 into %ir.addr1) + ; CHECK: STW killed [[MFVSRWZ1]], 0, [[COPY1]] :: (volatile store (s32) into %ir.addr1) ; CHECK: %11:vrrc = nofpexcept XSCVQPSDZ [[COPY2]] ; CHECK: %12:g8rc = nofpexcept MFVRD killed %11 - ; CHECK: STD killed %12, 0, [[COPY]] :: (volatile store 8 into %ir.addr2) + ; CHECK: STD killed %12, 0, [[COPY]] :: (volatile store (s64) into %ir.addr2) ; CHECK: %13:vrrc = nofpexcept XSCVQPUDZ [[COPY2]] ; CHECK: %14:g8rc = nofpexcept MFVRD killed %13 - ; CHECK: STD killed %14, 0, [[COPY]] :: (volatile store 8 into %ir.addr2) + ; CHECK: STD killed %14, 0, [[COPY]] :: (volatile store (s64) into %ir.addr2) ; CHECK: [[MFFS:%[0-9]+]]:f8rc = MFFS implicit $rm ; CHECK: MTFSB1 31, implicit-def $rm ; CHECK: MTFSB0 30, implicit-def $rm @@ -98,9 +98,9 @@ define void @fptoint_nofpexcept(ppc_fp128 %p, fp128 %m, i32* %addr1, i64* %addr2 ; CHECK: MTFSFb 1, [[MFFS]], implicit-def $rm ; CHECK: %16:vsfrc = nofpexcept XSCVDPSXWS killed %15, implicit $rm ; CHECK: [[MFVSRWZ2:%[0-9]+]]:gprc = MFVSRWZ killed %16 - ; CHECK: STW killed [[MFVSRWZ2]], 0, [[COPY1]] :: (volatile store 4 into %ir.addr1) + ; CHECK: STW killed [[MFVSRWZ2]], 0, [[COPY1]] :: (volatile store (s32) into %ir.addr1) ; CHECK: [[ADDIStocHA8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0 - ; CHECK: [[DFLOADf32_:%[0-9]+]]:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0, killed [[ADDIStocHA8_]] :: (load 4 from constant-pool) + ; CHECK: [[DFLOADf32_:%[0-9]+]]:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0, killed [[ADDIStocHA8_]] :: (load (s32) from constant-pool) ; CHECK: [[COPY9:%[0-9]+]]:f8rc = COPY [[DFLOADf32_]] ; CHECK: [[FCMPOD:%[0-9]+]]:crrc = FCMPOD [[COPY4]], [[COPY9]] ; CHECK: [[COPY10:%[0-9]+]]:crbitrc = COPY [[FCMPOD]].sub_eq @@ -137,7 +137,7 @@ define void @fptoint_nofpexcept(ppc_fp128 %p, fp128 %m, i32* %addr1, i64* %addr2 ; CHECK: %38:vsfrc = nofpexcept XSCVDPSXWS killed %37, implicit $rm ; CHECK: [[MFVSRWZ3:%[0-9]+]]:gprc = MFVSRWZ killed %38 ; CHECK: [[XOR:%[0-9]+]]:gprc = XOR killed [[MFVSRWZ3]], killed [[ISEL]] - ; CHECK: STW killed [[XOR]], 0, [[COPY1]] :: (volatile store 4 into %ir.addr1) + ; CHECK: STW killed [[XOR]], 0, [[COPY1]] :: (volatile store (s32) into %ir.addr1) ; CHECK: BLR8 implicit $lr8, implicit $rm entry: %conv1 = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0 diff --git a/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir b/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir index 5ebd8980d65b1..893070844b567 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir +++ b/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir @@ -269,14 +269,14 @@ body: | %15:vsrprc = INSERT_SUBREG %16, killed %14, %subreg.sub_vsx1 %17:vsrc = COPY %12.sub_vsx0 %18:vsrprc = INSERT_SUBREG %15, killed %17, %subreg.sub_vsx0 - STXVP killed %18, 32, %5 :: (store 32 into %ir.ptr + 32) + STXVP killed %18, 32, %5 :: (store (s256) into %ir.ptr + 32) %19:vsrprc = COPY %12.sub_pair1 %20:vsrc = COPY %19.sub_vsx1 %22:vsrprc = IMPLICIT_DEF %21:vsrprc = INSERT_SUBREG %22, killed %20, %subreg.sub_vsx1 %23:vsrc = COPY %19.sub_vsx0 %24:vsrprc = INSERT_SUBREG %21, killed %23, %subreg.sub_vsx0 - STXVP killed %24, 0, %5 :: (store 32 into %ir.ptr, align 64) + STXVP killed %24, 0, %5 :: (store (s256) into %ir.ptr, align 64) BLR8 implicit $lr8, implicit $rm ... @@ -387,14 +387,14 @@ body: | %15:vsrprc = INSERT_SUBREG %16, killed %14, %subreg.sub_vsx1 %17:vsrc = COPY %12.sub_vsx0 %18:vsrprc = INSERT_SUBREG %15, killed %17, %subreg.sub_vsx0 - STXVP killed %18, 32, %4 :: (store 32 into %ir.ptr + 32) + STXVP killed %18, 32, %4 :: (store (s256) into %ir.ptr + 32) %19:vsrprc = COPY %12.sub_pair1 %20:vsrc = COPY %19.sub_vsx1 %22:vsrprc = IMPLICIT_DEF %21:vsrprc = INSERT_SUBREG %22, killed %20, %subreg.sub_vsx1 %23:vsrc = COPY %19.sub_vsx0 %24:vsrprc = INSERT_SUBREG %21, killed %23, %subreg.sub_vsx0 - STXVP killed %24, 0, %4 :: (store 32 into %ir.ptr, align 64) + STXVP killed %24, 0, %4 :: (store (s256) into %ir.ptr, align 64) BLR8 implicit $lr8, implicit $rm ... @@ -579,14 +579,14 @@ body: | %45:vsrprc = INSERT_SUBREG %46, killed %44, %subreg.sub_vsx1 %47:vsrc = COPY %42.sub_vsx0 %48:vsrprc = INSERT_SUBREG %45, killed %47, %subreg.sub_vsx0 - STXVP killed %48, 96, %9 :: (store 32 into %ir.add.ptr + 32) + STXVP killed %48, 96, %9 :: (store (s256) into %ir.add.ptr + 32) %49:vsrprc = COPY %42.sub_pair1 %50:vsrc = COPY %49.sub_vsx1 %52:vsrprc = IMPLICIT_DEF %51:vsrprc = INSERT_SUBREG %52, killed %50, %subreg.sub_vsx1 %53:vsrc = COPY %49.sub_vsx0 %54:vsrprc = INSERT_SUBREG %51, killed %53, %subreg.sub_vsx0 - STXVP killed %54, 64, %9 :: (store 32 into %ir.add.ptr, align 64) + STXVP killed %54, 64, %9 :: (store (s256) into %ir.add.ptr, align 64) BLR8 implicit $lr8, implicit $rm bb.7.for.body: @@ -794,14 +794,14 @@ body: | %45:vsrprc = INSERT_SUBREG %46, killed %44, %subreg.sub_vsx1 %47:vsrc = COPY %42.sub_vsx0 %48:vsrprc = INSERT_SUBREG %45, killed %47, %subreg.sub_vsx0 - STXVP killed %48, 96, %9 :: (store 32 into %ir.add.ptr + 32) + STXVP killed %48, 96, %9 :: (store (s256) into %ir.add.ptr + 32) %49:vsrprc = COPY %42.sub_pair1 %50:vsrc = COPY %49.sub_vsx1 %52:vsrprc = IMPLICIT_DEF %51:vsrprc = INSERT_SUBREG %52, killed %50, %subreg.sub_vsx1 %53:vsrc = COPY %49.sub_vsx0 %54:vsrprc = INSERT_SUBREG %51, killed %53, %subreg.sub_vsx0 - STXVP killed %54, 64, %9 :: (store 32 into %ir.add.ptr, align 64) + STXVP killed %54, 64, %9 :: (store (s256) into %ir.add.ptr, align 64) BLR8 implicit $lr8, implicit $rm bb.7.for.body: diff --git a/llvm/test/CodeGen/PowerPC/phi-eliminate.mir b/llvm/test/CodeGen/PowerPC/phi-eliminate.mir index e74ca4ca0e5d9..a8eacc201a365 100644 --- a/llvm/test/CodeGen/PowerPC/phi-eliminate.mir +++ b/llvm/test/CodeGen/PowerPC/phi-eliminate.mir @@ -159,7 +159,7 @@ body: | ; CHECK: %22:crrc = CMPLWI %21, 10 ; CHECK: %23:gprc = ISEL %15, %14, killed %22.sub_lt ; CEHCK: %24:gprc = ADD4 killed %23, killed %21 - ; CHECK: %25:g8rc_and_g8rc_nox0 = STBU killed %24, -1, undef %0:g8rc_and_g8rc_nox0 :: (store 1 into %ir.7) + ; CHECK: %25:g8rc_and_g8rc_nox0 = STBU killed %24, -1, undef %0:g8rc_and_g8rc_nox0 :: (store (s8) into %ir.7) ; CHECK: %26:gprc = DIVW %19, %9 ; CHECK: %57:gprc = COPY killed %26 ; CHECK: %58:gprc = COPY %19 @@ -243,7 +243,7 @@ body: | %22:crrc = CMPLWI %21, 10 %23:gprc = ISEL %15, %14, killed %22.sub_lt %24:gprc = ADD4 killed %23, killed %21 - %25:g8rc_and_g8rc_nox0 = STBU killed %24, -1, undef %0:g8rc_and_g8rc_nox0 :: (store 1 into %ir.7) + %25:g8rc_and_g8rc_nox0 = STBU killed %24, -1, undef %0:g8rc_and_g8rc_nox0 :: (store (s8) into %ir.7) %26:gprc = DIVW %19, %9 BDZ8 %bb.4, implicit-def $ctr8, implicit $ctr8 B %bb.3 diff --git a/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir b/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir index e6a29a79524b4..f4af2ad21a567 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir +++ b/llvm/test/CodeGen/PowerPC/ppc64-crsave.mir @@ -40,10 +40,10 @@ body: | ; CHECK: liveins: $x3, $x29, $cr2, $cr4 ; CHECK: $x12 = MFCR8 implicit killed $cr2, implicit killed $cr4 - ; CHECK-DAG: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.0) + ; CHECK-DAG: STD killed $x29, -24, $x1 :: (store (s64) into %fixed-stack.0) ; CHECK-DAG: STW8 killed $x12, 8, $x1 - ; CHECK: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.0) + ; CHECK: $x29 = LD -24, $x1 :: (load (s64) from %fixed-stack.0) ; CHECK: $x12 = LWZ8 8, $x1 ; CHECK: $cr2 = MTOCRF8 $x12 ; CHECK: $cr4 = MTOCRF8 killed $x12 @@ -83,10 +83,10 @@ body: | ; SAVEONE: $x12 = MFOCRF8 killed $cr2 ; SAVEALL: $x12 = MFCR8 implicit killed $cr2 - ; CHECK-DAG: STD killed $x14, -144, $x1 :: (store 8 into %fixed-stack.0, align 16) + ; CHECK-DAG: STD killed $x14, -144, $x1 :: (store (s64) into %fixed-stack.0, align 16) ; CHECK-DAG: STW8 killed $x12, 8, $x1 - ; CHECK: $x14 = LD -144, $x1 :: (load 8 from %fixed-stack.0, align 16) + ; CHECK: $x14 = LD -144, $x1 :: (load (s64) from %fixed-stack.0, align 16) ; CHECK: $x12 = LWZ8 8, $x1 ; CHECK: $cr2 = MTOCRF8 killed $x12 diff --git a/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir b/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir index e5e94a321c11b..403c7e4ceb94a 100644 --- a/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir +++ b/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir @@ -111,7 +111,7 @@ body: | liveins: $x2 %3:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @b - %4:gprc = LWZ target-flags(ppc-toc-lo) @b, killed %3, implicit $x2 :: (dereferenceable load 4 from @b) + %4:gprc = LWZ target-flags(ppc-toc-lo) @b, killed %3, implicit $x2 :: (dereferenceable load (s32) from @b) %5:crrc = CMPLWI killed %4, 0 BCC 76, killed %5, %bb.5 B %bb.1 @@ -121,7 +121,7 @@ body: | liveins: $x2 %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @d - %7:gprc = LWZ target-flags(ppc-toc-lo) @d, killed %6, implicit $x2 :: (dereferenceable load 4 from @d) + %7:gprc = LWZ target-flags(ppc-toc-lo) @d, killed %6, implicit $x2 :: (dereferenceable load (s32) from @d) %8:crrc = CMPWI killed %7, 0 %0:crbitrc = COPY killed %8.sub_eq %9:crbitrc = CRUNSET @@ -144,10 +144,10 @@ body: | successors: %bb.4(0x80000000) liveins: $x2 - %11:g8rc_and_g8rc_nox0 = LD target-flags(ppc-toc-lo) @e, %10, implicit $x2 :: (dereferenceable load 8 from @e) - %12:g8rc = LWA 0, killed %11 :: (load 4 from %ir.1) + %11:g8rc_and_g8rc_nox0 = LD target-flags(ppc-toc-lo) @e, %10, implicit $x2 :: (dereferenceable load (s64) from @e) + %12:g8rc = LWA 0, killed %11 :: (load (s32) from %ir.1) %15:g8rc = RLDICR killed %12, 2, 61 - %16:gprc = LWZX %14, killed %15 :: (load 4 from %ir.arrayidx) + %16:gprc = LWZX %14, killed %15 :: (load (s32) from %ir.arrayidx) %17:crrc = CMPWI killed %16, 0 %18:crbitrc = COPY killed %17.sub_eq %1:crbitrc = CRNOR killed %18, %18 @@ -159,7 +159,7 @@ body: | %2:crbitrc = COPY killed %24 %21:gprc = ISEL %20, %19, killed %2 - STW killed %21, target-flags(ppc-toc-lo) @c, %22, implicit $x2 :: (store 4 into @c) + STW killed %21, target-flags(ppc-toc-lo) @c, %22, implicit $x2 :: (store (s32) into @c) B %bb.2 bb.5.while.end: diff --git a/llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir b/llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir index 78091d027cec8..f0a45465f465d 100644 --- a/llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir +++ b/llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir @@ -75,39 +75,39 @@ body: | ; CHECK-LABEL: name: b ; CHECK: liveins: $x3, $x4, $x29, $x30 ; CHECK: $x0 = MFLR8 implicit $lr8 - ; CHECK: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.0) - ; CHECK: STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.1, align 16) + ; CHECK: STD killed $x29, -24, $x1 :: (store (s64) into %fixed-stack.0) + ; CHECK: STD killed $x30, -16, $x1 :: (store (s64) into %fixed-stack.1, align 16) ; CHECK: STD killed $x0, 16, $x1 ; CHECK: $x1 = STDU $x1, -64, $x1 ; CHECK: $x30 = OR8 killed $x4, $x4 ; CHECK: dead $r4 = LI 10, implicit-def $x4 ; CHECK: $x29 = OR8 $x3, $x3 ; CHECK: BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit $x2, implicit-def $r1, implicit-def $x3 - ; CHECK: STW8 killed renamable $x3, 0, killed renamable $x30 :: (store 4 into %ir.b) + ; CHECK: STW8 killed renamable $x3, 0, killed renamable $x30 :: (store (s32) into %ir.b) ; CHECK: $x3 = OR8 killed $x29, $x29 ; CHECK: BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3 ; CHECK: $x1 = ADDI8 $x1, 64 ; CHECK: $x0 = LD 16, $x1 - ; CHECK: $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.1, align 16) - ; CHECK: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.0) + ; CHECK: $x30 = LD -16, $x1 :: (load (s64) from %fixed-stack.1, align 16) + ; CHECK: $x29 = LD -24, $x1 :: (load (s64) from %fixed-stack.0) ; CHECK: MTLR8 killed $x0, implicit-def $lr8 ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 $x0 = MFLR8 implicit $lr8 - STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.1) - STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.0, align 16) + STD killed $x29, -24, $x1 :: (store (s64) into %fixed-stack.1) + STD killed $x30, -16, $x1 :: (store (s64) into %fixed-stack.0, align 16) STD killed $x0, 16, $x1 $x1 = STDU $x1, -64, $x1 $x30 = OR8 killed $x4, $x4 dead $r4 = LI 10, implicit-def $x4 $x29 = OR8 $x3, $x3 BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit $x2, implicit-def $r1, implicit-def $x3 - STW8 killed renamable $x3, 0, killed renamable $x30 :: (store 4 into %ir.b) + STW8 killed renamable $x3, 0, killed renamable $x30 :: (store (s32) into %ir.b) $x3 = OR8 killed $x29, $x29 BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3 $x1 = ADDI8 $x1, 64 $x0 = LD 16, $x1 - $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.0, align 16) - $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.1) + $x30 = LD -16, $x1 :: (load (s64) from %fixed-stack.0, align 16) + $x29 = LD -24, $x1 :: (load (s64) from %fixed-stack.1) MTLR8 killed $x0, implicit-def $lr8 BLR8 implicit $lr8, implicit $rm, implicit killed $x3 diff --git a/llvm/test/CodeGen/PowerPC/scavenging.mir b/llvm/test/CodeGen/PowerPC/scavenging.mir index e9cf4c67c15af..c7bd59134041d 100644 --- a/llvm/test/CodeGen/PowerPC/scavenging.mir +++ b/llvm/test/CodeGen/PowerPC/scavenging.mir @@ -153,11 +153,11 @@ body: | # CHECK-LABEL: name: spill_at_begin # CHECK: bb.0: # CHECK: liveins: -# CHECK: STD killed [[REG:\$x[0-9]+]]{{.*}}(store 8 into %stack.{{[0-9]+}}) +# CHECK: STD killed [[REG:\$x[0-9]+]]{{.*}}(store (s64) into %stack.{{[0-9]+}}) # CHECK: [[REG]] = LIS8 0 # CHECK: [[REG]] = ORI8 killed [[REG]], 48 # CHECK: NOP implicit killed [[REG]] -# CHECK: [[REG]] = LD{{.*}}(load 8 from %stack.{{[0-9]+}}) +# CHECK: [[REG]] = LD{{.*}}(load (s64) from %stack.{{[0-9]+}}) name: spill_at_begin tracksRegLiveness: true stack: diff --git a/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir b/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir index 56212872b8362..c992f4dd7bcad 100644 --- a/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir +++ b/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir @@ -88,8 +88,8 @@ body: | %0:g8rc = COPY %9 %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32 - %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) - %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) + %6:gprc = LBZX %2, %5 :: (load (s8) from %ir.arrayidx) + %7:gprc = LBZX %3, %5 :: (load (s8) from %ir.arrayidx4) %9:g8rc = ADDI8 %5, 1 %8:crrc = CMPLW %6, %7 BCC 76, %8, %bb.1 @@ -97,21 +97,21 @@ body: | ; CHECK-LABEL: foo ; CHECK: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32 ; CHECK-NEXT: %9:g8rc = ADDI8 %5, 1 - ; CHECK-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) - ; CHECK-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) + ; CHECK-NEXT: %6:gprc = LBZX %2, %5 :: (load (s8) from %ir.arrayidx) + ; CHECK-NEXT: %7:gprc = LBZX %3, %5 :: (load (s8) from %ir.arrayidx4) ; CHECK-NEXT: %8:crrc = CMPLW %6, %7 ; CHECK-NEXT: BCC 76, %8 ; CHECK-DISABLE-LABEL: foo ; CHECK-DISABLE: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32 - ; CHECK-DISABLE-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) - ; CHECK-DISABLE-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) + ; CHECK-DISABLE-NEXT: %6:gprc = LBZX %2, %5 :: (load (s8) from %ir.arrayidx) + ; CHECK-DISABLE-NEXT: %7:gprc = LBZX %3, %5 :: (load (s8) from %ir.arrayidx4) ; CHECK-DISABLE-NEXT: %9:g8rc = ADDI8 %5, 1 ; CHECK-DISABLE-NEXT: %8:crrc = CMPLW %6, %7 ; CHECK-DISABLE-NEXT: BCC 76, %8 ; CHECK-P8-LABEL: foo ; CHECK-P8: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32 - ; CHECK-P8-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) - ; CHECK-P8-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) + ; CHECK-P8-NEXT: %6:gprc = LBZX %2, %5 :: (load (s8) from %ir.arrayidx) + ; CHECK-P8-NEXT: %7:gprc = LBZX %3, %5 :: (load (s8) from %ir.arrayidx4) ; CHECK-P8-NEXT: %8:crrc = CMPLW %6, %7 ; CHECK-P8-NEXT: %9:g8rc = ADDI8 %5, 1 ; CHECK-P8-NEXT: BCC 76, %8 diff --git a/llvm/test/CodeGen/PowerPC/setcr_bc.mir b/llvm/test/CodeGen/PowerPC/setcr_bc.mir index 564ee7d45957b..bc8bb5582137f 100644 --- a/llvm/test/CodeGen/PowerPC/setcr_bc.mir +++ b/llvm/test/CodeGen/PowerPC/setcr_bc.mir @@ -88,7 +88,7 @@ body: | $x0 = MFLR8 implicit $lr8 STD killed $x0, 16, $x1 $x1 = STDU $x1, -48, $x1 - STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16) + STD killed $x30, 32, $x1 :: (store (s64) into %fixed-stack.0, align 16) $x30 = OR8 $x3, $x3 BL8_NOP @callee, csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3 renamable $cr0 = CMPLWI renamable $r3, 0 @@ -115,7 +115,7 @@ body: | liveins: $x30 renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30 - $x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16) + $x30 = LD 32, $x1 :: (load (s64) from %fixed-stack.0, align 16) $x1 = ADDI8 $x1, 48 $x0 = LD 16, $x1 MTLR8 killed $x0, implicit-def $lr8 diff --git a/llvm/test/CodeGen/PowerPC/setcr_bc2.mir b/llvm/test/CodeGen/PowerPC/setcr_bc2.mir index 513cb85e1580a..5986c885f189e 100644 --- a/llvm/test/CodeGen/PowerPC/setcr_bc2.mir +++ b/llvm/test/CodeGen/PowerPC/setcr_bc2.mir @@ -88,7 +88,7 @@ body: | $x0 = MFLR8 implicit $lr8 STD killed $x0, 16, $x1 $x1 = STDU $x1, -48, $x1 - STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16) + STD killed $x30, 32, $x1 :: (store (s64) into %fixed-stack.0, align 16) $x30 = OR8 $x3, $x3 BL8_NOP @callee, csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3 renamable $cr0 = CMPLWI renamable $r3, 0 @@ -115,7 +115,7 @@ body: | liveins: $x30 renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30 - $x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16) + $x30 = LD 32, $x1 :: (load (s64) from %fixed-stack.0, align 16) $x1 = ADDI8 $x1, 48 $x0 = LD 16, $x1 MTLR8 killed $x0, implicit-def $lr8 diff --git a/llvm/test/CodeGen/PowerPC/setcr_bc3.mir b/llvm/test/CodeGen/PowerPC/setcr_bc3.mir index bed2932fd9507..2d037d03bb6b7 100644 --- a/llvm/test/CodeGen/PowerPC/setcr_bc3.mir +++ b/llvm/test/CodeGen/PowerPC/setcr_bc3.mir @@ -62,7 +62,7 @@ body: | $x0 = MFLR8 implicit $lr8 STD killed $x0, 16, $x1 $x1 = STDU $x1, -48, $x1 - STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16) + STD killed $x30, 32, $x1 :: (store (s64) into %fixed-stack.0, align 16) $x30 = OR8 $x3, $x3 BL8_NOP @callee, csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3 renamable $cr0 = CMPLWI renamable $r3, 0 @@ -91,7 +91,7 @@ body: | liveins: $x30 renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30 - $x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16) + $x30 = LD 32, $x1 :: (load (s64) from %fixed-stack.0, align 16) $x1 = ADDI8 $x1, 48 $x0 = LD 16, $x1 MTLR8 killed $x0, implicit-def $lr8 diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir index e44f096db6457..0ca0b08ca98fb 100644 --- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir @@ -337,7 +337,7 @@ body: | ; CHECK: bb.5 (%ir-block.23): ; CHECK: successors: %bb.7(0x2aaaaaab), %bb.6(0x55555555) ; CHECK: [[RLDICR:%[0-9]+]]:g8rc = RLDICR [[PHI]], 2, 61 - ; CHECK: [[LWZX:%[0-9]+]]:gprc = LWZX [[COPY2]], [[RLDICR]] :: (load 4 from %ir.24, !tbaa !2) + ; CHECK: [[LWZX:%[0-9]+]]:gprc = LWZX [[COPY2]], [[RLDICR]] :: (load (s32) from %ir.24, !tbaa !2) ; CHECK: [[ADD4_:%[0-9]+]]:gprc = nsw ADD4 killed [[LWZX]], [[PHI2]] ; CHECK: BCC 76, [[CMPLWI1]], %bb.7 ; CHECK: B %bb.6 @@ -359,7 +359,7 @@ body: | ; CHECK: successors: %bb.8(0x80000000) ; CHECK: [[PHI3:%[0-9]+]]:gprc = PHI [[PHI1]], %bb.5, [[ISEL]], %bb.6 ; CHECK: [[ADD4_1:%[0-9]+]]:gprc = nsw ADD4 [[PHI3]], [[ADD4_]] - ; CHECK: STWX killed [[ADD4_1]], [[COPY1]], [[RLDICR]] :: (store 4 into %ir.36, !tbaa !2) + ; CHECK: STWX killed [[ADD4_1]], [[COPY1]], [[RLDICR]] :: (store (s32) into %ir.36, !tbaa !2) ; CHECK: bb.8 (%ir-block.37): ; CHECK: [[LI8_2:%[0-9]+]]:g8rc = LI8 0 ; CHECK: $x3 = COPY [[LI8_2]] @@ -394,11 +394,11 @@ body: | ; CHECK: successors: %bb.15(0x2aaaaaab), %bb.13(0x55555555) ; CHECK: [[PHI8:%[0-9]+]]:gprc = PHI [[ADDI2]], %bb.11, [[ISEL1]], %bb.10 ; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8 - ; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 8, [[PHI6]] :: (load 4 from %ir.46, !tbaa !2) + ; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 8, [[PHI6]] :: (load (s32) from %ir.46, !tbaa !2) ; CHECK: [[ADD4_2:%[0-9]+]]:gprc = nsw ADD4 [[LWZU]], [[PHI5]] ; CHECK: [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 [[PHI8]], [[ADD4_2]] - ; CHECK: STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store 4 into %ir.44, !tbaa !2) - ; CHECK: [[LWZ:%[0-9]+]]:gprc = LWZ 4, [[LWZU1]] :: (load 4 from %ir.uglygep1112.cast, !tbaa !2) + ; CHECK: STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store (s32) into %ir.44, !tbaa !2) + ; CHECK: [[LWZ:%[0-9]+]]:gprc = LWZ 4, [[LWZU1]] :: (load (s32) from %ir.uglygep1112.cast, !tbaa !2) ; CHECK: BCC 76, [[CMPLWI2]], %bb.15 ; CHECK: B %bb.13 ; CHECK: bb.13 (%ir-block.60): @@ -426,7 +426,7 @@ body: | ; CHECK: [[COPY15:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_4]] ; CHECK: [[ADD4_4:%[0-9]+]]:gprc = nsw ADD4 [[LWZ]], [[ADD4_2]] ; CHECK: [[ADD4_5:%[0-9]+]]:gprc = nsw ADD4 [[PHI9]], [[ADD4_4]] - ; CHECK: STW killed [[ADD4_5]], 4, [[COPY15]] :: (store 4 into %ir.uglygep78.cast, !tbaa !2) + ; CHECK: STW killed [[ADD4_5]], 4, [[COPY15]] :: (store (s32) into %ir.uglygep78.cast, !tbaa !2) ; CHECK: [[ADDI8_5:%[0-9]+]]:g8rc = nuw nsw ADDI8 [[PHI4]], 2 ; CHECK: BDNZ8 %bb.9, implicit-def dead $ctr8, implicit $ctr8 ; CHECK: B %bb.3 @@ -488,7 +488,7 @@ body: | successors: %bb.8(0x2aaaaaab), %bb.21(0x55555555) %76:g8rc = RLDICR %4, 2, 61 - %77:gprc = LWZX %36, %76 :: (load 4 from %ir.24, !tbaa !2) + %77:gprc = LWZX %36, %76 :: (load (s32) from %ir.24, !tbaa !2) %7:gprc = nsw ADD4 killed %77, %6 BCC 76, %95, %bb.8 B %bb.21 @@ -510,7 +510,7 @@ body: | bb.8 (%ir-block.33): %10:gprc = PHI %5, %bb.5, %91, %bb.21 %87:gprc = nsw ADD4 %10, %7 - STWX killed %87, %37, %76 :: (store 4 into %ir.36, !tbaa !2) + STWX killed %87, %37, %76 :: (store (s32) into %ir.36, !tbaa !2) bb.9 (%ir-block.37): %89:g8rc = LI8 0 @@ -526,7 +526,7 @@ body: | %14:g8rc_and_g8rc_nox0 = PHI %2, %bb.2, %15, %bb.18 %16:g8rc_and_g8rc_nox0 = ADDI8 %14, 8 %15:g8rc_and_g8rc_nox0 = COPY %16 - %54:gprc, %55:g8rc_and_g8rc_nox0 = LWZU 8, %13 :: (load 4 from %ir.46, !tbaa !2) + %54:gprc, %55:g8rc_and_g8rc_nox0 = LWZU 8, %13 :: (load (s32) from %ir.46, !tbaa !2) %17:g8rc_and_g8rc_nox0 = COPY %55 %18:gprc_and_gprc_nor0 = COPY %11.sub_32 %58:gprc = MULHWU %18, %57 @@ -556,9 +556,9 @@ body: | %25:gprc = PHI %23, %bb.12, %93, %bb.19 %67:gprc = nsw ADD4 %25, %21 - STW killed %67, 0, %16 :: (store 4 into %ir.44, !tbaa !2) + STW killed %67, 0, %16 :: (store (s32) into %ir.44, !tbaa !2) %26:g8rc = ORI8 %11, 1 - %68:gprc = LWZ 4, %17 :: (load 4 from %ir.uglygep1112.cast, !tbaa !2) + %68:gprc = LWZ 4, %17 :: (load (s32) from %ir.uglygep1112.cast, !tbaa !2) %27:gprc = nsw ADD4 killed %68, %21 BCC 76, %69, %bb.16 B %bb.20 @@ -587,7 +587,7 @@ body: | %31:gprc = PHI %28, %bb.15, %29, %bb.16, %30, %bb.17 %73:gprc = nsw ADD4 %31, %27 - STW killed %73, 4, %15 :: (store 4 into %ir.uglygep78.cast, !tbaa !2) + STW killed %73, 4, %15 :: (store (s32) into %ir.uglygep78.cast, !tbaa !2) %32:g8rc = nuw nsw ADDI8 %11, 2 %74:gprc_and_gprc_nor0 = COPY %32.sub_32 %33:gprc_and_gprc_nor0 = ADDI killed %74, -2 diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-regpressure-high.mir b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-regpressure-high.mir index c16de17443838..f1a3f58531d5c 100644 --- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-regpressure-high.mir +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-regpressure-high.mir @@ -461,35 +461,35 @@ body: | ; CHECK: bb.1 (%ir-block.39): ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[COPY8:%[0-9]+]]:gprc = COPY [[COPY6]].sub_32 - ; CHECK: [[LD:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.28 :: (load 8 from %fixed-stack.28, align 16) - ; CHECK: [[LD1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.27 :: (load 8 from %fixed-stack.27) - ; CHECK: [[LD2:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.26 :: (load 8 from %fixed-stack.26, align 16) - ; CHECK: [[LD3:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.25 :: (load 8 from %fixed-stack.25) - ; CHECK: [[LD4:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.24 :: (load 8 from %fixed-stack.24, align 16) - ; CHECK: [[LD5:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.23 :: (load 8 from %fixed-stack.23) - ; CHECK: [[LD6:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.22 :: (load 8 from %fixed-stack.22, align 16) - ; CHECK: [[LD7:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.21 :: (load 8 from %fixed-stack.21) - ; CHECK: [[LD8:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.20 :: (load 8 from %fixed-stack.20, align 16) - ; CHECK: [[LD9:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.19 :: (load 8 from %fixed-stack.19) - ; CHECK: [[LD10:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.18 :: (load 8 from %fixed-stack.18, align 16) - ; CHECK: [[LD11:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.17 :: (load 8 from %fixed-stack.17) - ; CHECK: [[LD12:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.16 :: (load 8 from %fixed-stack.16, align 16) - ; CHECK: [[LD13:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.15 :: (load 8 from %fixed-stack.15) - ; CHECK: [[LD14:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.14 :: (load 8 from %fixed-stack.14, align 16) - ; CHECK: [[LD15:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.13 :: (load 8 from %fixed-stack.13) - ; CHECK: [[LD16:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.12 :: (load 8 from %fixed-stack.12, align 16) - ; CHECK: [[LD17:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.11 :: (load 8 from %fixed-stack.11) - ; CHECK: [[LD18:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.10 :: (load 8 from %fixed-stack.10, align 16) - ; CHECK: [[LD19:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.9 :: (load 8 from %fixed-stack.9) - ; CHECK: [[LD20:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.8 :: (load 8 from %fixed-stack.8, align 16) - ; CHECK: [[LD21:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.7 :: (load 8 from %fixed-stack.7) - ; CHECK: [[LD22:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.6 :: (load 8 from %fixed-stack.6, align 16) - ; CHECK: [[LD23:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.5 :: (load 8 from %fixed-stack.5) - ; CHECK: [[LD24:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.4 :: (load 8 from %fixed-stack.4, align 16) - ; CHECK: [[LD25:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.3 :: (load 8 from %fixed-stack.3) - ; CHECK: [[LD26:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.2 :: (load 8 from %fixed-stack.2, align 16) - ; CHECK: [[LD27:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.1 :: (load 8 from %fixed-stack.1) - ; CHECK: [[LD28:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.0 :: (load 8 from %fixed-stack.0, align 16) + ; CHECK: [[LD:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.28 :: (load (s64) from %fixed-stack.28, align 16) + ; CHECK: [[LD1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.27 :: (load (s64) from %fixed-stack.27) + ; CHECK: [[LD2:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.26 :: (load (s64) from %fixed-stack.26, align 16) + ; CHECK: [[LD3:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.25 :: (load (s64) from %fixed-stack.25) + ; CHECK: [[LD4:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.24 :: (load (s64) from %fixed-stack.24, align 16) + ; CHECK: [[LD5:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.23 :: (load (s64) from %fixed-stack.23) + ; CHECK: [[LD6:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.22 :: (load (s64) from %fixed-stack.22, align 16) + ; CHECK: [[LD7:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.21 :: (load (s64) from %fixed-stack.21) + ; CHECK: [[LD8:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.20 :: (load (s64) from %fixed-stack.20, align 16) + ; CHECK: [[LD9:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.19 :: (load (s64) from %fixed-stack.19) + ; CHECK: [[LD10:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.18 :: (load (s64) from %fixed-stack.18, align 16) + ; CHECK: [[LD11:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.17 :: (load (s64) from %fixed-stack.17) + ; CHECK: [[LD12:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.16 :: (load (s64) from %fixed-stack.16, align 16) + ; CHECK: [[LD13:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.15 :: (load (s64) from %fixed-stack.15) + ; CHECK: [[LD14:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.14 :: (load (s64) from %fixed-stack.14, align 16) + ; CHECK: [[LD15:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.13 :: (load (s64) from %fixed-stack.13) + ; CHECK: [[LD16:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.12 :: (load (s64) from %fixed-stack.12, align 16) + ; CHECK: [[LD17:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.11 :: (load (s64) from %fixed-stack.11) + ; CHECK: [[LD18:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.10 :: (load (s64) from %fixed-stack.10, align 16) + ; CHECK: [[LD19:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.9 :: (load (s64) from %fixed-stack.9) + ; CHECK: [[LD20:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.8 :: (load (s64) from %fixed-stack.8, align 16) + ; CHECK: [[LD21:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.7 :: (load (s64) from %fixed-stack.7) + ; CHECK: [[LD22:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.6 :: (load (s64) from %fixed-stack.6, align 16) + ; CHECK: [[LD23:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.5 :: (load (s64) from %fixed-stack.5) + ; CHECK: [[LD24:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.4 :: (load (s64) from %fixed-stack.4, align 16) + ; CHECK: [[LD25:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.3 :: (load (s64) from %fixed-stack.3) + ; CHECK: [[LD26:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2, align 16) + ; CHECK: [[LD27:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.1 :: (load (s64) from %fixed-stack.1) + ; CHECK: [[LD28:%[0-9]+]]:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.0 :: (load (s64) from %fixed-stack.0, align 16) ; CHECK: [[DEF:%[0-9]+]]:g8rc = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:g8rc = INSERT_SUBREG [[DEF]], [[COPY7]], %subreg.sub_32 ; CHECK: [[RLDICL:%[0-9]+]]:g8rc = RLDICL killed [[INSERT_SUBREG]], 0, 32 @@ -516,7 +516,7 @@ body: | ; CHECK: [[PHI3:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_]], %bb.1, %13, %bb.8 ; CHECK: [[PHI4:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.1, %11, %bb.8 ; CHECK: [[PHI5:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_2]], %bb.1, %9, %bb.8 - ; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 4, [[PHI3]] :: (load 4 from %ir.53, !tbaa !2) + ; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 4, [[PHI3]] :: (load (s32) from %ir.53, !tbaa !2) ; CHECK: [[COPY9:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[PHI1]].sub_32 ; CHECK: [[ADD4_:%[0-9]+]]:gprc = nsw ADD4 killed [[LWZU]], [[PHI2]] ; CHECK: BCC 76, [[CMPLWI]], %bb.6 @@ -549,71 +549,71 @@ body: | ; CHECK: [[COPY12:%[0-9]+]]:g8rc = COPY [[ADDI8_4]] ; CHECK: [[COPY13:%[0-9]+]]:g8rc = COPY [[LWZU1]] ; CHECK: [[ADD4_1:%[0-9]+]]:gprc = nsw ADD4 [[PHI6]], [[ADD4_]] - ; CHECK: [[LWZ:%[0-9]+]]:gprc = LWZ 0, [[ADDI8_4]] :: (load 4 from %ir.51, !tbaa !2) + ; CHECK: [[LWZ:%[0-9]+]]:gprc = LWZ 0, [[ADDI8_4]] :: (load (s32) from %ir.51, !tbaa !2) ; CHECK: [[ADD4_2:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_1]], killed [[LWZ]] - ; CHECK: [[LWZ1:%[0-9]+]]:gprc = LWZ 0, [[ADDI8_3]] :: (load 4 from %ir.49, !tbaa !2) + ; CHECK: [[LWZ1:%[0-9]+]]:gprc = LWZ 0, [[ADDI8_3]] :: (load (s32) from %ir.49, !tbaa !2) ; CHECK: [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_2]], killed [[LWZ1]] - ; CHECK: [[LWZX:%[0-9]+]]:gprc = LWZX [[COPY]], [[PHI]] :: (load 4 from %ir.uglygep6061, !tbaa !2) + ; CHECK: [[LWZX:%[0-9]+]]:gprc = LWZX [[COPY]], [[PHI]] :: (load (s32) from %ir.uglygep6061, !tbaa !2) ; CHECK: [[ADD4_4:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_3]], killed [[LWZX]] - ; CHECK: [[LWZX1:%[0-9]+]]:gprc = LWZX [[LD28]], [[PHI]] :: (load 4 from %ir.uglygep5859, !tbaa !2) + ; CHECK: [[LWZX1:%[0-9]+]]:gprc = LWZX [[LD28]], [[PHI]] :: (load (s32) from %ir.uglygep5859, !tbaa !2) ; CHECK: [[ADD4_5:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_4]], killed [[LWZX1]] - ; CHECK: [[LWZX2:%[0-9]+]]:gprc = LWZX [[LD27]], [[PHI]] :: (load 4 from %ir.uglygep5657, !tbaa !2) + ; CHECK: [[LWZX2:%[0-9]+]]:gprc = LWZX [[LD27]], [[PHI]] :: (load (s32) from %ir.uglygep5657, !tbaa !2) ; CHECK: [[ADD4_6:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_5]], killed [[LWZX2]] - ; CHECK: [[LWZX3:%[0-9]+]]:gprc = LWZX [[LD26]], [[PHI]] :: (load 4 from %ir.uglygep5455, !tbaa !2) + ; CHECK: [[LWZX3:%[0-9]+]]:gprc = LWZX [[LD26]], [[PHI]] :: (load (s32) from %ir.uglygep5455, !tbaa !2) ; CHECK: [[ADD4_7:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_6]], killed [[LWZX3]] - ; CHECK: [[LWZX4:%[0-9]+]]:gprc = LWZX [[LD25]], [[PHI]] :: (load 4 from %ir.uglygep5253, !tbaa !2) + ; CHECK: [[LWZX4:%[0-9]+]]:gprc = LWZX [[LD25]], [[PHI]] :: (load (s32) from %ir.uglygep5253, !tbaa !2) ; CHECK: [[ADD4_8:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_7]], killed [[LWZX4]] - ; CHECK: [[LWZX5:%[0-9]+]]:gprc = LWZX [[LD24]], [[PHI]] :: (load 4 from %ir.uglygep5051, !tbaa !2) + ; CHECK: [[LWZX5:%[0-9]+]]:gprc = LWZX [[LD24]], [[PHI]] :: (load (s32) from %ir.uglygep5051, !tbaa !2) ; CHECK: [[ADD4_9:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_8]], killed [[LWZX5]] - ; CHECK: [[LWZX6:%[0-9]+]]:gprc = LWZX [[LD23]], [[PHI]] :: (load 4 from %ir.uglygep4849, !tbaa !2) + ; CHECK: [[LWZX6:%[0-9]+]]:gprc = LWZX [[LD23]], [[PHI]] :: (load (s32) from %ir.uglygep4849, !tbaa !2) ; CHECK: [[ADD4_10:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_9]], killed [[LWZX6]] - ; CHECK: [[LWZX7:%[0-9]+]]:gprc = LWZX [[LD22]], [[PHI]] :: (load 4 from %ir.uglygep4647, !tbaa !2) + ; CHECK: [[LWZX7:%[0-9]+]]:gprc = LWZX [[LD22]], [[PHI]] :: (load (s32) from %ir.uglygep4647, !tbaa !2) ; CHECK: [[ADD4_11:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_10]], killed [[LWZX7]] - ; CHECK: [[LWZX8:%[0-9]+]]:gprc = LWZX [[LD21]], [[PHI]] :: (load 4 from %ir.uglygep4445, !tbaa !2) + ; CHECK: [[LWZX8:%[0-9]+]]:gprc = LWZX [[LD21]], [[PHI]] :: (load (s32) from %ir.uglygep4445, !tbaa !2) ; CHECK: [[ADD4_12:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_11]], killed [[LWZX8]] - ; CHECK: [[LWZX9:%[0-9]+]]:gprc = LWZX [[LD20]], [[PHI]] :: (load 4 from %ir.uglygep4243, !tbaa !2) + ; CHECK: [[LWZX9:%[0-9]+]]:gprc = LWZX [[LD20]], [[PHI]] :: (load (s32) from %ir.uglygep4243, !tbaa !2) ; CHECK: [[ADD4_13:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_12]], killed [[LWZX9]] - ; CHECK: [[LWZX10:%[0-9]+]]:gprc = LWZX [[LD19]], [[PHI]] :: (load 4 from %ir.uglygep4041, !tbaa !2) + ; CHECK: [[LWZX10:%[0-9]+]]:gprc = LWZX [[LD19]], [[PHI]] :: (load (s32) from %ir.uglygep4041, !tbaa !2) ; CHECK: [[ADD4_14:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_13]], killed [[LWZX10]] - ; CHECK: [[LWZX11:%[0-9]+]]:gprc = LWZX [[LD18]], [[PHI]] :: (load 4 from %ir.uglygep3839, !tbaa !2) + ; CHECK: [[LWZX11:%[0-9]+]]:gprc = LWZX [[LD18]], [[PHI]] :: (load (s32) from %ir.uglygep3839, !tbaa !2) ; CHECK: [[ADD4_15:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_14]], killed [[LWZX11]] - ; CHECK: [[LWZX12:%[0-9]+]]:gprc = LWZX [[LD17]], [[PHI]] :: (load 4 from %ir.uglygep3637, !tbaa !2) + ; CHECK: [[LWZX12:%[0-9]+]]:gprc = LWZX [[LD17]], [[PHI]] :: (load (s32) from %ir.uglygep3637, !tbaa !2) ; CHECK: [[ADD4_16:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_15]], killed [[LWZX12]] - ; CHECK: [[LWZX13:%[0-9]+]]:gprc = LWZX [[LD16]], [[PHI]] :: (load 4 from %ir.uglygep3435, !tbaa !2) + ; CHECK: [[LWZX13:%[0-9]+]]:gprc = LWZX [[LD16]], [[PHI]] :: (load (s32) from %ir.uglygep3435, !tbaa !2) ; CHECK: [[ADD4_17:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_16]], killed [[LWZX13]] - ; CHECK: [[LWZX14:%[0-9]+]]:gprc = LWZX [[LD15]], [[PHI]] :: (load 4 from %ir.uglygep3233, !tbaa !2) + ; CHECK: [[LWZX14:%[0-9]+]]:gprc = LWZX [[LD15]], [[PHI]] :: (load (s32) from %ir.uglygep3233, !tbaa !2) ; CHECK: [[ADD4_18:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_17]], killed [[LWZX14]] - ; CHECK: [[LWZX15:%[0-9]+]]:gprc = LWZX [[LD14]], [[PHI]] :: (load 4 from %ir.uglygep3031, !tbaa !2) + ; CHECK: [[LWZX15:%[0-9]+]]:gprc = LWZX [[LD14]], [[PHI]] :: (load (s32) from %ir.uglygep3031, !tbaa !2) ; CHECK: [[ADD4_19:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_18]], killed [[LWZX15]] - ; CHECK: [[LWZX16:%[0-9]+]]:gprc = LWZX [[LD13]], [[PHI]] :: (load 4 from %ir.uglygep2829, !tbaa !2) + ; CHECK: [[LWZX16:%[0-9]+]]:gprc = LWZX [[LD13]], [[PHI]] :: (load (s32) from %ir.uglygep2829, !tbaa !2) ; CHECK: [[ADD4_20:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_19]], killed [[LWZX16]] - ; CHECK: [[LWZX17:%[0-9]+]]:gprc = LWZX [[LD12]], [[PHI]] :: (load 4 from %ir.uglygep2627, !tbaa !2) + ; CHECK: [[LWZX17:%[0-9]+]]:gprc = LWZX [[LD12]], [[PHI]] :: (load (s32) from %ir.uglygep2627, !tbaa !2) ; CHECK: [[ADD4_21:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_20]], killed [[LWZX17]] - ; CHECK: [[LWZX18:%[0-9]+]]:gprc = LWZX [[LD11]], [[PHI]] :: (load 4 from %ir.uglygep2425, !tbaa !2) + ; CHECK: [[LWZX18:%[0-9]+]]:gprc = LWZX [[LD11]], [[PHI]] :: (load (s32) from %ir.uglygep2425, !tbaa !2) ; CHECK: [[ADD4_22:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_21]], killed [[LWZX18]] - ; CHECK: [[LWZX19:%[0-9]+]]:gprc = LWZX [[LD10]], [[PHI]] :: (load 4 from %ir.uglygep2223, !tbaa !2) + ; CHECK: [[LWZX19:%[0-9]+]]:gprc = LWZX [[LD10]], [[PHI]] :: (load (s32) from %ir.uglygep2223, !tbaa !2) ; CHECK: [[ADD4_23:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_22]], killed [[LWZX19]] - ; CHECK: [[LWZX20:%[0-9]+]]:gprc = LWZX [[LD9]], [[PHI]] :: (load 4 from %ir.uglygep2021, !tbaa !2) + ; CHECK: [[LWZX20:%[0-9]+]]:gprc = LWZX [[LD9]], [[PHI]] :: (load (s32) from %ir.uglygep2021, !tbaa !2) ; CHECK: [[ADD4_24:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_23]], killed [[LWZX20]] - ; CHECK: [[LWZX21:%[0-9]+]]:gprc = LWZX [[LD8]], [[PHI]] :: (load 4 from %ir.uglygep1819, !tbaa !2) + ; CHECK: [[LWZX21:%[0-9]+]]:gprc = LWZX [[LD8]], [[PHI]] :: (load (s32) from %ir.uglygep1819, !tbaa !2) ; CHECK: [[ADD4_25:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_24]], killed [[LWZX21]] - ; CHECK: [[LWZX22:%[0-9]+]]:gprc = LWZX [[LD7]], [[PHI]] :: (load 4 from %ir.uglygep1617, !tbaa !2) + ; CHECK: [[LWZX22:%[0-9]+]]:gprc = LWZX [[LD7]], [[PHI]] :: (load (s32) from %ir.uglygep1617, !tbaa !2) ; CHECK: [[ADD4_26:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_25]], killed [[LWZX22]] - ; CHECK: [[LWZX23:%[0-9]+]]:gprc = LWZX [[LD6]], [[PHI]] :: (load 4 from %ir.uglygep1415, !tbaa !2) + ; CHECK: [[LWZX23:%[0-9]+]]:gprc = LWZX [[LD6]], [[PHI]] :: (load (s32) from %ir.uglygep1415, !tbaa !2) ; CHECK: [[ADD4_27:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_26]], killed [[LWZX23]] - ; CHECK: [[LWZX24:%[0-9]+]]:gprc = LWZX [[LD5]], [[PHI]] :: (load 4 from %ir.uglygep1213, !tbaa !2) + ; CHECK: [[LWZX24:%[0-9]+]]:gprc = LWZX [[LD5]], [[PHI]] :: (load (s32) from %ir.uglygep1213, !tbaa !2) ; CHECK: [[ADD4_28:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_27]], killed [[LWZX24]] - ; CHECK: [[LWZX25:%[0-9]+]]:gprc = LWZX [[LD4]], [[PHI]] :: (load 4 from %ir.uglygep1011, !tbaa !2) + ; CHECK: [[LWZX25:%[0-9]+]]:gprc = LWZX [[LD4]], [[PHI]] :: (load (s32) from %ir.uglygep1011, !tbaa !2) ; CHECK: [[ADD4_29:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_28]], killed [[LWZX25]] - ; CHECK: [[LWZX26:%[0-9]+]]:gprc = LWZX [[LD3]], [[PHI]] :: (load 4 from %ir.uglygep89, !tbaa !2) + ; CHECK: [[LWZX26:%[0-9]+]]:gprc = LWZX [[LD3]], [[PHI]] :: (load (s32) from %ir.uglygep89, !tbaa !2) ; CHECK: [[ADD4_30:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_29]], killed [[LWZX26]] - ; CHECK: [[LWZX27:%[0-9]+]]:gprc = LWZX [[LD2]], [[PHI]] :: (load 4 from %ir.uglygep67, !tbaa !2) + ; CHECK: [[LWZX27:%[0-9]+]]:gprc = LWZX [[LD2]], [[PHI]] :: (load (s32) from %ir.uglygep67, !tbaa !2) ; CHECK: [[ADD4_31:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_30]], killed [[LWZX27]] - ; CHECK: [[LWZX28:%[0-9]+]]:gprc = LWZX [[LD1]], [[PHI]] :: (load 4 from %ir.uglygep45, !tbaa !2) + ; CHECK: [[LWZX28:%[0-9]+]]:gprc = LWZX [[LD1]], [[PHI]] :: (load (s32) from %ir.uglygep45, !tbaa !2) ; CHECK: [[ADD4_32:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_31]], killed [[LWZX28]] - ; CHECK: [[LWZX29:%[0-9]+]]:gprc = LWZX [[LD]], [[PHI]] :: (load 4 from %ir.uglygep23, !tbaa !2) + ; CHECK: [[LWZX29:%[0-9]+]]:gprc = LWZX [[LD]], [[PHI]] :: (load (s32) from %ir.uglygep23, !tbaa !2) ; CHECK: [[ADD4_33:%[0-9]+]]:gprc = nsw ADD4 killed [[ADD4_32]], killed [[LWZX29]] - ; CHECK: STWX killed [[ADD4_33]], [[COPY4]], [[PHI]] :: (store 4 into %ir.uglygep1, !tbaa !2) + ; CHECK: STWX killed [[ADD4_33]], [[COPY4]], [[PHI]] :: (store (s32) into %ir.uglygep1, !tbaa !2) ; CHECK: [[ADDI8_5:%[0-9]+]]:g8rc = nuw nsw ADDI8 [[PHI1]], 1 ; CHECK: [[ADDI8_6:%[0-9]+]]:g8rc = nuw nsw ADDI8 [[PHI]], 4 ; CHECK: BDNZ8 %bb.3, implicit-def dead $ctr8, implicit $ctr8 @@ -631,35 +631,35 @@ body: | %22:g8rc = COPY $x3 %30:gprc = COPY %22.sub_32 %31:gprc = COPY %26.sub_32 - %60:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.0 :: (load 8 from %fixed-stack.0, align 16) - %59:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.1 :: (load 8 from %fixed-stack.1) - %58:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.2 :: (load 8 from %fixed-stack.2, align 16) - %57:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.3 :: (load 8 from %fixed-stack.3) - %56:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.4 :: (load 8 from %fixed-stack.4, align 16) - %55:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.5 :: (load 8 from %fixed-stack.5) - %54:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.6 :: (load 8 from %fixed-stack.6, align 16) - %53:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.7 :: (load 8 from %fixed-stack.7) - %52:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.8 :: (load 8 from %fixed-stack.8, align 16) - %51:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.9 :: (load 8 from %fixed-stack.9) - %50:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.10 :: (load 8 from %fixed-stack.10, align 16) - %49:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.11 :: (load 8 from %fixed-stack.11) - %48:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.12 :: (load 8 from %fixed-stack.12, align 16) - %47:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.13 :: (load 8 from %fixed-stack.13) - %46:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.14 :: (load 8 from %fixed-stack.14, align 16) - %45:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.15 :: (load 8 from %fixed-stack.15) - %44:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.16 :: (load 8 from %fixed-stack.16, align 16) - %43:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.17 :: (load 8 from %fixed-stack.17) - %42:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.18 :: (load 8 from %fixed-stack.18, align 16) - %41:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.19 :: (load 8 from %fixed-stack.19) - %40:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.20 :: (load 8 from %fixed-stack.20, align 16) - %39:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.21 :: (load 8 from %fixed-stack.21) - %38:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.22 :: (load 8 from %fixed-stack.22, align 16) - %37:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.23 :: (load 8 from %fixed-stack.23) - %36:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.24 :: (load 8 from %fixed-stack.24, align 16) - %35:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.25 :: (load 8 from %fixed-stack.25) - %34:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.26 :: (load 8 from %fixed-stack.26, align 16) - %33:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.27 :: (load 8 from %fixed-stack.27) - %32:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.28 :: (load 8 from %fixed-stack.28, align 16) + %60:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.0 :: (load (s64) from %fixed-stack.0, align 16) + %59:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.1 :: (load (s64) from %fixed-stack.1) + %58:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.2 :: (load (s64) from %fixed-stack.2, align 16) + %57:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.3 :: (load (s64) from %fixed-stack.3) + %56:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.4 :: (load (s64) from %fixed-stack.4, align 16) + %55:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.5 :: (load (s64) from %fixed-stack.5) + %54:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.6 :: (load (s64) from %fixed-stack.6, align 16) + %53:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.7 :: (load (s64) from %fixed-stack.7) + %52:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.8 :: (load (s64) from %fixed-stack.8, align 16) + %51:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.9 :: (load (s64) from %fixed-stack.9) + %50:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.10 :: (load (s64) from %fixed-stack.10, align 16) + %49:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.11 :: (load (s64) from %fixed-stack.11) + %48:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.12 :: (load (s64) from %fixed-stack.12, align 16) + %47:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.13 :: (load (s64) from %fixed-stack.13) + %46:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.14 :: (load (s64) from %fixed-stack.14, align 16) + %45:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.15 :: (load (s64) from %fixed-stack.15) + %44:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.16 :: (load (s64) from %fixed-stack.16, align 16) + %43:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.17 :: (load (s64) from %fixed-stack.17) + %42:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.18 :: (load (s64) from %fixed-stack.18, align 16) + %41:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.19 :: (load (s64) from %fixed-stack.19) + %40:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.20 :: (load (s64) from %fixed-stack.20, align 16) + %39:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.21 :: (load (s64) from %fixed-stack.21) + %38:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.22 :: (load (s64) from %fixed-stack.22, align 16) + %37:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.23 :: (load (s64) from %fixed-stack.23) + %36:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.24 :: (load (s64) from %fixed-stack.24, align 16) + %35:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.25 :: (load (s64) from %fixed-stack.25) + %34:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.26 :: (load (s64) from %fixed-stack.26, align 16) + %33:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.27 :: (load (s64) from %fixed-stack.27) + %32:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.28 :: (load (s64) from %fixed-stack.28, align 16) %61:crrc = CMPWI %31, 1 BCC 12, killed %61, %bb.2 B %bb.1 @@ -698,7 +698,7 @@ body: | %9:g8rc = COPY %10 %12:g8rc_and_g8rc_nox0 = ADDI8 %7, 4 %11:g8rc = COPY %12 - %67:gprc, %68:g8rc_and_g8rc_nox0 = LWZU 4, %6 :: (load 4 from %ir.53, !tbaa !2) + %67:gprc, %68:g8rc_and_g8rc_nox0 = LWZU 4, %6 :: (load (s32) from %ir.53, !tbaa !2) %13:g8rc = COPY %68 %14:gprc_and_gprc_nor0 = COPY %4.sub_32 %71:gprc = MULHWU %14, %70 @@ -731,71 +731,71 @@ body: | %19:gprc = PHI %18, %bb.5, %17, %bb.4, %15, %bb.6 %77:gprc = nsw ADD4 %19, %16 - %78:gprc = LWZ 0, %12 :: (load 4 from %ir.51, !tbaa !2) + %78:gprc = LWZ 0, %12 :: (load (s32) from %ir.51, !tbaa !2) %79:gprc = nsw ADD4 killed %77, killed %78 - %80:gprc = LWZ 0, %10 :: (load 4 from %ir.49, !tbaa !2) + %80:gprc = LWZ 0, %10 :: (load (s32) from %ir.49, !tbaa !2) %81:gprc = nsw ADD4 killed %79, killed %80 - %82:gprc = LWZX %29, %3 :: (load 4 from %ir.uglygep6061, !tbaa !2) + %82:gprc = LWZX %29, %3 :: (load (s32) from %ir.uglygep6061, !tbaa !2) %83:gprc = nsw ADD4 killed %81, killed %82 - %84:gprc = LWZX %32, %3 :: (load 4 from %ir.uglygep5859, !tbaa !2) + %84:gprc = LWZX %32, %3 :: (load (s32) from %ir.uglygep5859, !tbaa !2) %85:gprc = nsw ADD4 killed %83, killed %84 - %86:gprc = LWZX %33, %3 :: (load 4 from %ir.uglygep5657, !tbaa !2) + %86:gprc = LWZX %33, %3 :: (load (s32) from %ir.uglygep5657, !tbaa !2) %87:gprc = nsw ADD4 killed %85, killed %86 - %88:gprc = LWZX %34, %3 :: (load 4 from %ir.uglygep5455, !tbaa !2) + %88:gprc = LWZX %34, %3 :: (load (s32) from %ir.uglygep5455, !tbaa !2) %89:gprc = nsw ADD4 killed %87, killed %88 - %90:gprc = LWZX %35, %3 :: (load 4 from %ir.uglygep5253, !tbaa !2) + %90:gprc = LWZX %35, %3 :: (load (s32) from %ir.uglygep5253, !tbaa !2) %91:gprc = nsw ADD4 killed %89, killed %90 - %92:gprc = LWZX %36, %3 :: (load 4 from %ir.uglygep5051, !tbaa !2) + %92:gprc = LWZX %36, %3 :: (load (s32) from %ir.uglygep5051, !tbaa !2) %93:gprc = nsw ADD4 killed %91, killed %92 - %94:gprc = LWZX %37, %3 :: (load 4 from %ir.uglygep4849, !tbaa !2) + %94:gprc = LWZX %37, %3 :: (load (s32) from %ir.uglygep4849, !tbaa !2) %95:gprc = nsw ADD4 killed %93, killed %94 - %96:gprc = LWZX %38, %3 :: (load 4 from %ir.uglygep4647, !tbaa !2) + %96:gprc = LWZX %38, %3 :: (load (s32) from %ir.uglygep4647, !tbaa !2) %97:gprc = nsw ADD4 killed %95, killed %96 - %98:gprc = LWZX %39, %3 :: (load 4 from %ir.uglygep4445, !tbaa !2) + %98:gprc = LWZX %39, %3 :: (load (s32) from %ir.uglygep4445, !tbaa !2) %99:gprc = nsw ADD4 killed %97, killed %98 - %100:gprc = LWZX %40, %3 :: (load 4 from %ir.uglygep4243, !tbaa !2) + %100:gprc = LWZX %40, %3 :: (load (s32) from %ir.uglygep4243, !tbaa !2) %101:gprc = nsw ADD4 killed %99, killed %100 - %102:gprc = LWZX %41, %3 :: (load 4 from %ir.uglygep4041, !tbaa !2) + %102:gprc = LWZX %41, %3 :: (load (s32) from %ir.uglygep4041, !tbaa !2) %103:gprc = nsw ADD4 killed %101, killed %102 - %104:gprc = LWZX %42, %3 :: (load 4 from %ir.uglygep3839, !tbaa !2) + %104:gprc = LWZX %42, %3 :: (load (s32) from %ir.uglygep3839, !tbaa !2) %105:gprc = nsw ADD4 killed %103, killed %104 - %106:gprc = LWZX %43, %3 :: (load 4 from %ir.uglygep3637, !tbaa !2) + %106:gprc = LWZX %43, %3 :: (load (s32) from %ir.uglygep3637, !tbaa !2) %107:gprc = nsw ADD4 killed %105, killed %106 - %108:gprc = LWZX %44, %3 :: (load 4 from %ir.uglygep3435, !tbaa !2) + %108:gprc = LWZX %44, %3 :: (load (s32) from %ir.uglygep3435, !tbaa !2) %109:gprc = nsw ADD4 killed %107, killed %108 - %110:gprc = LWZX %45, %3 :: (load 4 from %ir.uglygep3233, !tbaa !2) + %110:gprc = LWZX %45, %3 :: (load (s32) from %ir.uglygep3233, !tbaa !2) %111:gprc = nsw ADD4 killed %109, killed %110 - %112:gprc = LWZX %46, %3 :: (load 4 from %ir.uglygep3031, !tbaa !2) + %112:gprc = LWZX %46, %3 :: (load (s32) from %ir.uglygep3031, !tbaa !2) %113:gprc = nsw ADD4 killed %111, killed %112 - %114:gprc = LWZX %47, %3 :: (load 4 from %ir.uglygep2829, !tbaa !2) + %114:gprc = LWZX %47, %3 :: (load (s32) from %ir.uglygep2829, !tbaa !2) %115:gprc = nsw ADD4 killed %113, killed %114 - %116:gprc = LWZX %48, %3 :: (load 4 from %ir.uglygep2627, !tbaa !2) + %116:gprc = LWZX %48, %3 :: (load (s32) from %ir.uglygep2627, !tbaa !2) %117:gprc = nsw ADD4 killed %115, killed %116 - %118:gprc = LWZX %49, %3 :: (load 4 from %ir.uglygep2425, !tbaa !2) + %118:gprc = LWZX %49, %3 :: (load (s32) from %ir.uglygep2425, !tbaa !2) %119:gprc = nsw ADD4 killed %117, killed %118 - %120:gprc = LWZX %50, %3 :: (load 4 from %ir.uglygep2223, !tbaa !2) + %120:gprc = LWZX %50, %3 :: (load (s32) from %ir.uglygep2223, !tbaa !2) %121:gprc = nsw ADD4 killed %119, killed %120 - %122:gprc = LWZX %51, %3 :: (load 4 from %ir.uglygep2021, !tbaa !2) + %122:gprc = LWZX %51, %3 :: (load (s32) from %ir.uglygep2021, !tbaa !2) %123:gprc = nsw ADD4 killed %121, killed %122 - %124:gprc = LWZX %52, %3 :: (load 4 from %ir.uglygep1819, !tbaa !2) + %124:gprc = LWZX %52, %3 :: (load (s32) from %ir.uglygep1819, !tbaa !2) %125:gprc = nsw ADD4 killed %123, killed %124 - %126:gprc = LWZX %53, %3 :: (load 4 from %ir.uglygep1617, !tbaa !2) + %126:gprc = LWZX %53, %3 :: (load (s32) from %ir.uglygep1617, !tbaa !2) %127:gprc = nsw ADD4 killed %125, killed %126 - %128:gprc = LWZX %54, %3 :: (load 4 from %ir.uglygep1415, !tbaa !2) + %128:gprc = LWZX %54, %3 :: (load (s32) from %ir.uglygep1415, !tbaa !2) %129:gprc = nsw ADD4 killed %127, killed %128 - %130:gprc = LWZX %55, %3 :: (load 4 from %ir.uglygep1213, !tbaa !2) + %130:gprc = LWZX %55, %3 :: (load (s32) from %ir.uglygep1213, !tbaa !2) %131:gprc = nsw ADD4 killed %129, killed %130 - %132:gprc = LWZX %56, %3 :: (load 4 from %ir.uglygep1011, !tbaa !2) + %132:gprc = LWZX %56, %3 :: (load (s32) from %ir.uglygep1011, !tbaa !2) %133:gprc = nsw ADD4 killed %131, killed %132 - %134:gprc = LWZX %57, %3 :: (load 4 from %ir.uglygep89, !tbaa !2) + %134:gprc = LWZX %57, %3 :: (load (s32) from %ir.uglygep89, !tbaa !2) %135:gprc = nsw ADD4 killed %133, killed %134 - %136:gprc = LWZX %58, %3 :: (load 4 from %ir.uglygep67, !tbaa !2) + %136:gprc = LWZX %58, %3 :: (load (s32) from %ir.uglygep67, !tbaa !2) %137:gprc = nsw ADD4 killed %135, killed %136 - %138:gprc = LWZX %59, %3 :: (load 4 from %ir.uglygep45, !tbaa !2) + %138:gprc = LWZX %59, %3 :: (load (s32) from %ir.uglygep45, !tbaa !2) %139:gprc = nsw ADD4 killed %137, killed %138 - %140:gprc = LWZX %60, %3 :: (load 4 from %ir.uglygep23, !tbaa !2) + %140:gprc = LWZX %60, %3 :: (load (s32) from %ir.uglygep23, !tbaa !2) %141:gprc = nsw ADD4 killed %139, killed %140 - STWX killed %141, %25, %3 :: (store 4 into %ir.uglygep1, !tbaa !2) + STWX killed %141, %25, %3 :: (store (s32) into %ir.uglygep1, !tbaa !2) %20:g8rc = nuw nsw ADDI8 %4, 1 %21:g8rc = nuw nsw ADDI8 %3, 4 BDNZ8 %bb.3, implicit-def dead $ctr8, implicit $ctr8 diff --git a/llvm/test/CodeGen/PowerPC/stack-coloring-vararg.mir b/llvm/test/CodeGen/PowerPC/stack-coloring-vararg.mir index 0085369e19782..9cf3473a29ae5 100644 --- a/llvm/test/CodeGen/PowerPC/stack-coloring-vararg.mir +++ b/llvm/test/CodeGen/PowerPC/stack-coloring-vararg.mir @@ -129,34 +129,34 @@ body: | %2:gprc = COPY $r5 %1:gprc = COPY $r4 %0:gprc = COPY $r3 - STW %0, 0, %stack.2 :: (store 4 into %stack.2, align 8) - STW %1, 4, %stack.2 :: (store 4 into %stack.2 + 4) - STW %2, 8, %stack.2 :: (store 4 into %stack.2 + 8, align 8) - STW %3, 12, %stack.2 :: (store 4) - STW %4, 16, %stack.2 :: (store 4 into %stack.2 + 16, align 8) - STW %5, 20, %stack.2 :: (store 4) - STW %6, 24, %stack.2 :: (store 4 into %stack.2 + 24, align 8) - STW %7, 28, %stack.2 :: (store 4) - STFD %8, 32, %stack.2 :: (store 8) - STFD %9, 40, %stack.2 :: (store 8) - STFD %10, 48, %stack.2 :: (store 8) - STFD %11, 56, %stack.2 :: (store 8) - STFD %12, 64, %stack.2 :: (store 8) - STFD %13, 72, %stack.2 :: (store 8) - STFD %14, 80, %stack.2 :: (store 8) - STFD %15, 88, %stack.2 :: (store 8) + STW %0, 0, %stack.2 :: (store (s32) into %stack.2, align 8) + STW %1, 4, %stack.2 :: (store (s32) into %stack.2 + 4) + STW %2, 8, %stack.2 :: (store (s32) into %stack.2 + 8, align 8) + STW %3, 12, %stack.2 :: (store (s32)) + STW %4, 16, %stack.2 :: (store (s32) into %stack.2 + 16, align 8) + STW %5, 20, %stack.2 :: (store (s32)) + STW %6, 24, %stack.2 :: (store (s32) into %stack.2 + 24, align 8) + STW %7, 28, %stack.2 :: (store (s32)) + STFD %8, 32, %stack.2 :: (store (s64)) + STFD %9, 40, %stack.2 :: (store (s64)) + STFD %10, 48, %stack.2 :: (store (s64)) + STFD %11, 56, %stack.2 :: (store (s64)) + STFD %12, 64, %stack.2 :: (store (s64)) + STFD %13, 72, %stack.2 :: (store (s64)) + STFD %14, 80, %stack.2 :: (store (s64)) + STFD %15, 88, %stack.2 :: (store (s64)) %16:crbitrc = IMPLICIT_DEF BC killed %16, %bb.2 B %bb.1 bb.1.if.then6.i.i: LIFETIME_START %stack.1.ap2.i.i - %17:gprc = LWZ 8, $zero :: (load 4, align 8) - STW killed %17, 8, %stack.1.ap2.i.i :: (store 4 into %stack.1.ap2.i.i + 8, align 8) - %18:gprc = LWZ 4, $zero :: (load 4) - STW killed %18, 4, %stack.1.ap2.i.i :: (store 4 into %stack.1.ap2.i.i + 4, align 8) - %19:gprc = LWZ 0, $zero :: (load 4, align 8) - STW killed %19, 0, %stack.1.ap2.i.i :: (store 4 into %stack.1.ap2.i.i, align 8) + %17:gprc = LWZ 8, $zero :: (load (s32), align 8) + STW killed %17, 8, %stack.1.ap2.i.i :: (store (s32) into %stack.1.ap2.i.i + 8, align 8) + %18:gprc = LWZ 4, $zero :: (load (s32)) + STW killed %18, 4, %stack.1.ap2.i.i :: (store (s32) into %stack.1.ap2.i.i + 4, align 8) + %19:gprc = LWZ 0, $zero :: (load (s32), align 8) + STW killed %19, 0, %stack.1.ap2.i.i :: (store (s32) into %stack.1.ap2.i.i, align 8) BLR implicit $lr, implicit $rm bb.2.format_reason_ap.exit.i: diff --git a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir index b26e50f5be69d..9ea4b90086226 100644 --- a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir +++ b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir @@ -53,7 +53,7 @@ body: | liveins: $x2 %0 = ADDIStlsgdHA $x2, @tls_var %1 = ADDItlsgdLADDR killed %0, @tls_var, @tls_var, implicit-def dead $x0, implicit-def dead $x3, implicit-def dead $x4, implicit-def dead $x5, implicit-def dead $x6, implicit-def dead $x7, implicit-def dead $x8, implicit-def dead $x9, implicit-def dead $x10, implicit-def dead $x11, implicit-def dead $x12, implicit-def dead $lr8, implicit-def dead $ctr8, implicit-def dead $cr0, implicit-def dead $cr1, implicit-def dead $cr5, implicit-def dead $cr6, implicit-def dead $cr7 - %2 = LWZ8 0, killed %1 :: (dereferenceable load 4 from @tls_var) + %2 = LWZ8 0, killed %1 :: (dereferenceable load (s32) from @tls_var) $x3 = COPY %2 BLR8 implicit $lr8, implicit $rm, implicit $x3 ; CHECK-LABEL: bb.0.entry diff --git a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir index d25a5b07a331c..467f866923897 100644 --- a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir +++ b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir @@ -54,7 +54,7 @@ body: | ADJCALLSTACKDOWN 32, 0, implicit-def $r1, implicit $r1 %0 = ADDIStlsgdHA $x2, @tls_var %1 = ADDItlsgdLADDR killed %0, @tls_var, @tls_var, implicit-def dead $x0, implicit-def dead $x3, implicit-def dead $x4, implicit-def dead $x5, implicit-def dead $x6, implicit-def dead $x7, implicit-def dead $x8, implicit-def dead $x9, implicit-def dead $x10, implicit-def dead $x11, implicit-def dead $x12, implicit-def dead $lr8, implicit-def dead $ctr8, implicit-def dead $cr0, implicit-def dead $cr1, implicit-def dead $cr5, implicit-def dead $cr6, implicit-def dead $cr7 - %2 = LWZ8 0, killed %1 :: (dereferenceable load 4 from @tls_var) + %2 = LWZ8 0, killed %1 :: (dereferenceable load (s32) from @tls_var) $x3 = COPY %2 ADJCALLSTACKUP 32, 0, implicit-def $r1, implicit $r1 BLR8 implicit $lr8, implicit $rm, implicit $x3 diff --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll index 05583cb345811..7557d803db7c3 100644 --- a/llvm/test/CodeGen/PowerPC/toc-data.ll +++ b/llvm/test/CodeGen/PowerPC/toc-data.ll @@ -16,7 +16,7 @@ define dso_local void @write_int(i32 signext %in) { } ; CHECK: name: write_int ; CHECK: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @i, $r2 -; CHECK-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store 4 into @i) +; CHECK-NEXT: STW %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i) ; TEST: .write_int: ; TEST: la 4, i[TD](2) @@ -28,7 +28,7 @@ define dso_local i64 @read_ll() { ret i64 %0 } ; CHECK: name: read_ll -; CHECK: LWZtoc @ll, $r2 :: (load 4 from got) +; CHECK: LWZtoc @ll, $r2 :: (load (s32) from got) ; TEST: .read_ll: ; TEST: lwz 4, L..C0(2) @@ -42,7 +42,7 @@ define dso_local float @read_float() { } ; CHECK: name: read_float ; CHECK: %[[SCRATCH:[0-9]+]]:gprc_and_gprc_nor0 = ADDItoc @f, $r2 -; CHECK: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load 4 from @f) +; CHECK: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f) ; TEST: .read_float: ; TEST: la 3, f[TD](2) @@ -54,7 +54,7 @@ define dso_local void @write_double(double %in) { ret void } ; CHECK: name: write_double -; CHECK: LWZtoc @d, $r2 :: (load 4 from got) +; CHECK: LWZtoc @d, $r2 :: (load (s32) from got) ; TEST: .write_double ; TEST: lwz 3, L..C1(2) diff --git a/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir b/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir index 49211342d1306..627e553475480 100644 --- a/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir +++ b/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir @@ -7,12 +7,12 @@ name: test body: | bb.0: ; CHECK-LABEL: name: test - ; CHECK: renamable $x5 = LD 0, killed renamable $x5 :: (load 8) - ; CHECK: renamable $x4 = LD 0, killed renamable $x4 :: (load 8) + ; CHECK: renamable $x5 = LD 0, killed renamable $x5 :: (load (s64)) + ; CHECK: renamable $x4 = LD 0, killed renamable $x4 :: (load (s64)) ; CHECK: renamable $x5 = MULLD killed renamable $x5, renamable $x3 ; CHECK: renamable $x3 = MADDLD8 killed renamable $x4, killed renamable $x3, killed renamable $x5 - renamable $x5 = LD 0, killed renamable $x5 :: (load 8) + renamable $x5 = LD 0, killed renamable $x5 :: (load (s64)) renamable $x5 = MULLD killed renamable $x5, renamable $x3 - renamable $x4 = LD 0, killed renamable $x4 :: (load 8) + renamable $x4 = LD 0, killed renamable $x4 :: (load (s64)) renamable $x3 = MADDLD8 killed renamable $x4, killed renamable $x3, killed renamable $x5 ... diff --git a/llvm/test/CodeGen/PowerPC/two-address-crash.mir b/llvm/test/CodeGen/PowerPC/two-address-crash.mir index d35fcb36e7ecf..a05ace4de2646 100644 --- a/llvm/test/CodeGen/PowerPC/two-address-crash.mir +++ b/llvm/test/CodeGen/PowerPC/two-address-crash.mir @@ -71,7 +71,7 @@ machineFunctionInfo: {} body: | bb.0 (%ir-block.0): liveins: $r3, $r4, $r5, $r6 - + %3:gprc_and_gprc_nor0 = COPY killed $r6 %2:gprc = COPY killed $r5 %1:gprc = COPY killed $r4 @@ -84,7 +84,7 @@ body: | %9:gprc = ISEL killed %8, killed %7, killed %6 %10:gprc = RLWIMI killed %9, killed %0, 1, 0, 30 %11:gprc = XORI killed %10, 1 - STH killed %11, 0, killed %3 :: (store 2 into %ir.P) + STH killed %11, 0, killed %3 :: (store (s16) into %ir.P) BLR implicit $lr, implicit $rm ... diff --git a/llvm/test/CodeGen/PowerPC/unal-vec-negarith.ll b/llvm/test/CodeGen/PowerPC/unal-vec-negarith.ll index 625b9b4b41d7e..7b2a4528c0b33 100644 --- a/llvm/test/CodeGen/PowerPC/unal-vec-negarith.ll +++ b/llvm/test/CodeGen/PowerPC/unal-vec-negarith.ll @@ -9,8 +9,8 @@ entry: %r = load <16 x i8>, <16 x i8>* %p, align 1 ret <16 x i8> %r -; CHECK-NOT: v4i32,ch = llvm.ppc.altivec.lvx{{.*}}<(load 31 from %ir.p + 4294967281, align 1)> -; CHECK: v4i32,ch = llvm.ppc.altivec.lvx{{.*}}<(load 31 from %ir.p - 15, align 1)> +; CHECK-NOT: v4i32,ch = llvm.ppc.altivec.lvx{{.*}}<(load (s248) from %ir.p + 4294967281, align 1)> +; CHECK: v4i32,ch = llvm.ppc.altivec.lvx{{.*}}<(load (s248) from %ir.p - 15, align 1)> } attributes #0 = { nounwind "target-cpu"="pwr7" } diff --git a/llvm/test/CodeGen/RISCV/copy-frameindex.mir b/llvm/test/CodeGen/RISCV/copy-frameindex.mir index 27d52c65839d8..be86a1af8b508 100644 --- a/llvm/test/CodeGen/RISCV/copy-frameindex.mir +++ b/llvm/test/CodeGen/RISCV/copy-frameindex.mir @@ -45,7 +45,7 @@ body: | ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[ADDI:%[0-9]+]]:gpr = ADDI %stack.0, 0 - ; CHECK: SW $x0, killed [[ADDI]], 0 :: (volatile store 4 into %stack.0) + ; CHECK: SW $x0, killed [[ADDI]], 0 :: (volatile store (s32) into %stack.0) ; CHECK: bb.2: ; CHECK: PseudoRET bb.0: @@ -56,6 +56,6 @@ body: | DBG_VALUE %1, $noreg, !1, !DIExpression(DW_OP_plus_uconst, 0, DW_OP_stack_value), debug-location !3 BEQ killed %0:gpr, $x0, %bb.2 bb.1: - SW $x0, killed %1:gpr, 0 :: (volatile store 4 into %stack.0, align 4) + SW $x0, killed %1:gpr, 0 :: (volatile store (s32) into %stack.0, align 4) bb.2: PseudoRET diff --git a/llvm/test/CodeGen/RISCV/disjoint.ll b/llvm/test/CodeGen/RISCV/disjoint.ll index 5977c61137d7d..926fbeed534c6 100644 --- a/llvm/test/CodeGen/RISCV/disjoint.ll +++ b/llvm/test/CodeGen/RISCV/disjoint.ll @@ -13,9 +13,9 @@ define i32 @test_disjoint(i32* %P, i32 %v) { entry: ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: test_disjoint:%bb.0 -; CHECK:SU(2): SW %1:gpr, %0:gpr, 12 :: (store 4 into %ir.arrayidx) +; CHECK:SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx) ; CHECK-NOT: Successors: -; CHECK:SU(3): SW %1:gpr, %0:gpr, 8 :: (store 4 into %ir.arrayidx1) +; CHECK:SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) ; CHECK: Predecessors: ; CHECK-NOT: SU(2): Ord Latency=0 Memory %arrayidx = getelementptr inbounds i32, i32* %P, i32 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir index 14513b5698ebf..7ef47daf6120f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -29,8 +29,8 @@ body: | ; CHECK: liveins: $x10, $x11, $x1 ; CHECK: $x2 = frame-setup ADDI $x2, -2032 ; CHECK: CFI_INSTRUCTION def_cfa_offset 2032 - ; CHECK: SD killed $x1, $x2, 2024 :: (store 8 into %stack.3) - ; CHECK: SD killed $x8, $x2, 2016 :: (store 8 into %stack.4) + ; CHECK: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.3) + ; CHECK: SD killed $x8, $x2, 2016 :: (store (s64) into %stack.4) ; CHECK: CFI_INSTRUCTION offset $x1, -8 ; CHECK: CFI_INSTRUCTION offset $x8, -16 ; CHECK: $x8 = frame-setup ADDI $x2, 2032 @@ -49,8 +49,8 @@ body: | ; CHECK: $x10 = PseudoReadVLENB ; CHECK: $x2 = ADD $x2, killed $x10 ; CHECK: $x2 = frame-destroy ADDI $x2, 240 - ; CHECK: $x8 = LD $x2, 2016 :: (load 8 from %stack.4) - ; CHECK: $x1 = LD $x2, 2024 :: (load 8 from %stack.3) + ; CHECK: $x8 = LD $x2, 2016 :: (load (s64) from %stack.4) + ; CHECK: $x1 = LD $x2, 2024 :: (load (s64) from %stack.3) ; CHECK: $x2 = frame-destroy ADDI $x2, 2032 ; CHECK: PseudoRET %1:gpr = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir index 44fc7c3719347..b51019f36449d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -54,19 +54,19 @@ body: | ; CHECK: liveins: $x12, $x1, $x9, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27 ; CHECK: $x2 = frame-setup ADDI $x2, -2032 ; CHECK: CFI_INSTRUCTION def_cfa_offset 2032 - ; CHECK: SD killed $x1, $x2, 2024 :: (store 8 into %stack.3) - ; CHECK: SD killed $x8, $x2, 2016 :: (store 8 into %stack.4) - ; CHECK: SD killed $x9, $x2, 2008 :: (store 8 into %stack.5) - ; CHECK: SD killed $x18, $x2, 2000 :: (store 8 into %stack.6) - ; CHECK: SD killed $x19, $x2, 1992 :: (store 8 into %stack.7) - ; CHECK: SD killed $x20, $x2, 1984 :: (store 8 into %stack.8) - ; CHECK: SD killed $x21, $x2, 1976 :: (store 8 into %stack.9) - ; CHECK: SD killed $x22, $x2, 1968 :: (store 8 into %stack.10) - ; CHECK: SD killed $x23, $x2, 1960 :: (store 8 into %stack.11) - ; CHECK: SD killed $x24, $x2, 1952 :: (store 8 into %stack.12) - ; CHECK: SD killed $x25, $x2, 1944 :: (store 8 into %stack.13) - ; CHECK: SD killed $x26, $x2, 1936 :: (store 8 into %stack.14) - ; CHECK: SD killed $x27, $x2, 1928 :: (store 8 into %stack.15) + ; CHECK: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.3) + ; CHECK: SD killed $x8, $x2, 2016 :: (store (s64) into %stack.4) + ; CHECK: SD killed $x9, $x2, 2008 :: (store (s64) into %stack.5) + ; CHECK: SD killed $x18, $x2, 2000 :: (store (s64) into %stack.6) + ; CHECK: SD killed $x19, $x2, 1992 :: (store (s64) into %stack.7) + ; CHECK: SD killed $x20, $x2, 1984 :: (store (s64) into %stack.8) + ; CHECK: SD killed $x21, $x2, 1976 :: (store (s64) into %stack.9) + ; CHECK: SD killed $x22, $x2, 1968 :: (store (s64) into %stack.10) + ; CHECK: SD killed $x23, $x2, 1960 :: (store (s64) into %stack.11) + ; CHECK: SD killed $x24, $x2, 1952 :: (store (s64) into %stack.12) + ; CHECK: SD killed $x25, $x2, 1944 :: (store (s64) into %stack.13) + ; CHECK: SD killed $x26, $x2, 1936 :: (store (s64) into %stack.14) + ; CHECK: SD killed $x27, $x2, 1928 :: (store (s64) into %stack.15) ; CHECK: CFI_INSTRUCTION offset $x1, -8 ; CHECK: CFI_INSTRUCTION offset $x8, -16 ; CHECK: CFI_INSTRUCTION offset $x9, -24 @@ -112,8 +112,8 @@ body: | ; CHECK: renamable $x21 = ADDI $x2, 1664 ; CHECK: renamable $x22 = ADDI $x2, 1792 ; CHECK: renamable $x23 = ADDI $x2, 1920 - ; CHECK: SD killed $x1, $x2, 8 :: (store 8 into %stack.16) - ; CHECK: SD killed $x5, $x2, 0 :: (store 8 into %stack.17) + ; CHECK: SD killed $x1, $x2, 8 :: (store (s64) into %stack.16) + ; CHECK: SD killed $x5, $x2, 0 :: (store (s64) into %stack.17) ; CHECK: $x11 = LUI 1 ; CHECK: $x11 = ADDIW killed $x11, -2048 ; CHECK: $x24 = ADD $x2, killed $x11 @@ -128,7 +128,7 @@ body: | ; CHECK: renamable $x11 = ANDI renamable $x15, 255 ; CHECK: renamable $x13 = SLLI renamable $x11, 3 ; CHECK: renamable $x13 = ADD renamable $x26, killed renamable $x13 - ; CHECK: renamable $x13 = LD killed renamable $x13, 0 :: (load 8) + ; CHECK: renamable $x13 = LD killed renamable $x13, 0 :: (load (s64)) ; CHECK: renamable $x9 = SRAI renamable $x13, 63 ; CHECK: renamable $x9 = SRLI killed renamable $x9, 62 ; CHECK: renamable $x9 = ADD renamable $x13, killed renamable $x9 @@ -143,9 +143,9 @@ body: | ; CHECK: $x1 = ADDIW killed $x1, -1896 ; CHECK: $x1 = ADD $x2, killed $x1 ; CHECK: $x1 = ADD killed $x1, killed $x5 - ; CHECK: $x5 = LD $x2, 0 :: (load 8 from %stack.17) + ; CHECK: $x5 = LD $x2, 0 :: (load (s64) from %stack.17) ; CHECK: renamable $v0 = PseudoVRELOAD_M1 killed $x1 :: (load unknown-size from %stack.1, align 8) - ; CHECK: $x1 = LD $x2, 8 :: (load 8 from %stack.16) + ; CHECK: $x1 = LD $x2, 8 :: (load (s64) from %stack.16) ; CHECK: renamable $v0 = PseudoVSLIDEDOWN_VX_M1 undef renamable $v0, killed renamable $v0, killed renamable $x13, $noreg, 8, implicit $vl, implicit $vtype ; CHECK: renamable $x13 = PseudoVMV_X_S_M1 killed renamable $v0, 8, implicit $vl, implicit $vtype ; CHECK: BLT killed renamable $x16, renamable $x27, %bb.2 @@ -159,19 +159,19 @@ body: | ; CHECK: $x10 = frame-destroy ADDIW killed $x10, -1792 ; CHECK: $x2 = frame-destroy SUB $x8, killed $x10 ; CHECK: $x2 = frame-destroy ADDI $x2, 272 - ; CHECK: $x27 = LD $x2, 1928 :: (load 8 from %stack.15) - ; CHECK: $x26 = LD $x2, 1936 :: (load 8 from %stack.14) - ; CHECK: $x25 = LD $x2, 1944 :: (load 8 from %stack.13) - ; CHECK: $x24 = LD $x2, 1952 :: (load 8 from %stack.12) - ; CHECK: $x23 = LD $x2, 1960 :: (load 8 from %stack.11) - ; CHECK: $x22 = LD $x2, 1968 :: (load 8 from %stack.10) - ; CHECK: $x21 = LD $x2, 1976 :: (load 8 from %stack.9) - ; CHECK: $x20 = LD $x2, 1984 :: (load 8 from %stack.8) - ; CHECK: $x19 = LD $x2, 1992 :: (load 8 from %stack.7) - ; CHECK: $x18 = LD $x2, 2000 :: (load 8 from %stack.6) - ; CHECK: $x9 = LD $x2, 2008 :: (load 8 from %stack.5) - ; CHECK: $x8 = LD $x2, 2016 :: (load 8 from %stack.4) - ; CHECK: $x1 = LD $x2, 2024 :: (load 8 from %stack.3) + ; CHECK: $x27 = LD $x2, 1928 :: (load (s64) from %stack.15) + ; CHECK: $x26 = LD $x2, 1936 :: (load (s64) from %stack.14) + ; CHECK: $x25 = LD $x2, 1944 :: (load (s64) from %stack.13) + ; CHECK: $x24 = LD $x2, 1952 :: (load (s64) from %stack.12) + ; CHECK: $x23 = LD $x2, 1960 :: (load (s64) from %stack.11) + ; CHECK: $x22 = LD $x2, 1968 :: (load (s64) from %stack.10) + ; CHECK: $x21 = LD $x2, 1976 :: (load (s64) from %stack.9) + ; CHECK: $x20 = LD $x2, 1984 :: (load (s64) from %stack.8) + ; CHECK: $x19 = LD $x2, 1992 :: (load (s64) from %stack.7) + ; CHECK: $x18 = LD $x2, 2000 :: (load (s64) from %stack.6) + ; CHECK: $x9 = LD $x2, 2008 :: (load (s64) from %stack.5) + ; CHECK: $x8 = LD $x2, 2016 :: (load (s64) from %stack.4) + ; CHECK: $x1 = LD $x2, 2024 :: (load (s64) from %stack.3) ; CHECK: $x2 = frame-destroy ADDI $x2, 2032 ; CHECK: PseudoRET bb.0: @@ -207,7 +207,7 @@ body: | renamable $x11 = ANDI renamable $x15, 255 renamable $x13 = SLLI renamable $x11, 3 renamable $x13 = ADD renamable $x26, killed renamable $x13 - renamable $x13 = LD killed renamable $x13, 0 :: (load 8) + renamable $x13 = LD killed renamable $x13, 0 :: (load (s64)) renamable $x9 = SRAI renamable $x13, 63 renamable $x9 = SRLI killed renamable $x9, 62 renamable $x9 = ADD renamable $x13, killed renamable $x9 diff --git a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll index 206c570196e59..3895cbfba97d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll @@ -16,7 +16,7 @@ define i64 @test( %0) nounwind { ; CHECK: liveins: $v8 ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8 ; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6 - ; CHECK: [[LD:%[0-9]+]]:gpr = LD %stack.0.a, 0 :: (dereferenceable load 8 from %ir.a) + ; CHECK: [[LD:%[0-9]+]]:gpr = LD %stack.0.a, 0 :: (dereferenceable load (s64) from %ir.a) ; CHECK: $x10 = COPY [[LD]] ; CHECK: PseudoRET implicit $x10 entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir index b971bf6d64f51..446186a9e8cc1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir @@ -52,7 +52,7 @@ body: | ; CHECK: $v0 = COPY [[COPY]] ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]] - ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $x0, 6 :: (load 64 from %ir.a, align 8) + ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $x0, 6 :: (load (s512) from %ir.a, align 8) ; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]] ; CHECK: PseudoRET implicit $v8m8 %1:vr = COPY $v0 @@ -60,7 +60,7 @@ body: | $v0 = COPY %1 %3:vrm8 = IMPLICIT_DEF %4:vrm8nov0 = COPY %3 - %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6 :: (load 64 from %ir.a, align 8) + %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6 :: (load (s512) from %ir.a, align 8) $v8m8 = COPY %2 PseudoRET implicit $v8m8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir index 9eaa8b56282ff..07975e585e88f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -259,17 +259,17 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10 ; CHECK: dead $x0 = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype - ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x) - ; CHECK: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.y) + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) + ; CHECK: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load (s128) from %ir.y) ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6, implicit $vl, implicit $vtype - ; CHECK: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (store 16 into %ir.x) + ; CHECK: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (store (s128) into %ir.x) ; CHECK: PseudoRET %1:gpr = COPY $x11 %0:gpr = COPY $x10 - %2:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x) - %3:vr = PseudoVLE64_V_M1 %1, 2, 6 :: (load 16 from %ir.y) + %2:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load (s128) from %ir.x) + %3:vr = PseudoVLE64_V_M1 %1, 2, 6 :: (load (s128) from %ir.y) %4:vr = PseudoVADD_VV_M1 killed %2, killed %3, 2, 6 - PseudoVSE64_V_M1 killed %4, %0, 2, 6 :: (store 16 into %ir.x) + PseudoVSE64_V_M1 killed %4, %0, 2, 6 :: (store (s128) into %ir.x) PseudoRET ... @@ -297,7 +297,7 @@ body: | ; CHECK: liveins: $x10 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK: dead $x0 = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype - ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x) + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) ; CHECK: dead %6:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype ; CHECK: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 0, $noreg, 6, implicit $vl, implicit $vtype ; CHECK: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF @@ -307,7 +307,7 @@ body: | ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]] ; CHECK: PseudoRET implicit $x10 %0:gpr = COPY $x10 - %1:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x) + %1:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load (s128) from %ir.x) %2:vr = PseudoVMV_V_I_M1 0, $x0, 6 %4:vr = IMPLICIT_DEF %3:vr = PseudoVREDSUM_VS_M1 %4, killed %1, killed %2, 2, 6 diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir index a307e870f496a..08a3df1f0b44b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir @@ -43,7 +43,7 @@ body: | liveins: $x10, $v30m2 $x25 = COPY $x10 - SW renamable $x25, %stack.0, 0 :: (store 4 into %stack.0) + SW renamable $x25, %stack.0, 0 :: (store (s32) into %stack.0) PseudoVSPILL_M2 renamable $v30m2, %stack.1 :: (store unknown-size into %stack.1, align 8) PseudoRET diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir index 1a374c2c148da..ce0497adaa39f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir @@ -44,7 +44,7 @@ body: | liveins: $x10, $v30m2 $x25 = COPY $x10 - SD renamable $x25, %stack.0, 0 :: (store 8 into %stack.0) + SD renamable $x25, %stack.0, 0 :: (store (s64) into %stack.0) PseudoVSPILL_M2 renamable $v30m2, %stack.1 :: (store unknown-size into %stack.1, align 8) PseudoRET diff --git a/llvm/test/CodeGen/RISCV/vector-abi.ll b/llvm/test/CodeGen/RISCV/vector-abi.ll index e97f75394bb01..ad371a447438f 100644 --- a/llvm/test/CodeGen/RISCV/vector-abi.ll +++ b/llvm/test/CodeGen/RISCV/vector-abi.ll @@ -12,13 +12,13 @@ define void @caller() { ; RV32: bb.0 (%ir-block.0): ; RV32: ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 ; RV32: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 7 - ; RV32: SW killed [[ADDI]], %stack.0, 12 :: (store 4 into %stack.0) + ; RV32: SW killed [[ADDI]], %stack.0, 12 :: (store (s32) into %stack.0) ; RV32: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 6 - ; RV32: SW killed [[ADDI1]], %stack.0, 8 :: (store 4 into %stack.0) + ; RV32: SW killed [[ADDI1]], %stack.0, 8 :: (store (s32) into %stack.0) ; RV32: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 5 - ; RV32: SW killed [[ADDI2]], %stack.0, 4 :: (store 4 into %stack.0) + ; RV32: SW killed [[ADDI2]], %stack.0, 4 :: (store (s32) into %stack.0) ; RV32: [[ADDI3:%[0-9]+]]:gpr = ADDI $x0, 4 - ; RV32: SW killed [[ADDI3]], %stack.0, 0 :: (store 4 into %stack.0) + ; RV32: SW killed [[ADDI3]], %stack.0, 0 :: (store (s32) into %stack.0) ; RV32: [[ADDI4:%[0-9]+]]:gpr = ADDI %stack.0, 0 ; RV32: $x10 = COPY [[ADDI4]] ; RV32: PseudoCALL target-flags(riscv-plt) @callee, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 @@ -32,13 +32,13 @@ define void @caller() { ; RV64: bb.0 (%ir-block.0): ; RV64: ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 ; RV64: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 7 - ; RV64: SD killed [[ADDI]], %stack.0, 24 :: (store 8 into %stack.0) + ; RV64: SD killed [[ADDI]], %stack.0, 24 :: (store (s64) into %stack.0) ; RV64: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 6 - ; RV64: SD killed [[ADDI1]], %stack.0, 16 :: (store 8 into %stack.0) + ; RV64: SD killed [[ADDI1]], %stack.0, 16 :: (store (s64) into %stack.0) ; RV64: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 5 - ; RV64: SD killed [[ADDI2]], %stack.0, 8 :: (store 8 into %stack.0) + ; RV64: SD killed [[ADDI2]], %stack.0, 8 :: (store (s64) into %stack.0) ; RV64: [[ADDI3:%[0-9]+]]:gpr = ADDI $x0, 4 - ; RV64: SD killed [[ADDI3]], %stack.0, 0 :: (store 8 into %stack.0) + ; RV64: SD killed [[ADDI3]], %stack.0, 0 :: (store (s64) into %stack.0) ; RV64: [[ADDI4:%[0-9]+]]:gpr = ADDI %stack.0, 0 ; RV64: $x10 = COPY [[ADDI4]] ; RV64: PseudoCALL target-flags(riscv-plt) @callee, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 diff --git a/llvm/test/CodeGen/SPARC/fp128-split.ll b/llvm/test/CodeGen/SPARC/fp128-split.ll index 6a241ea11bb48..f9ac4b681a6c0 100644 --- a/llvm/test/CodeGen/SPARC/fp128-split.ll +++ b/llvm/test/CodeGen/SPARC/fp128-split.ll @@ -13,11 +13,11 @@ define fp128 @testcase(fp128 %0) { ; CHECK: [[COPY1:%[0-9]+]]:dfpregs = COPY [[COPY]].sub_odd64 ; CHECK: [[LEAX_ADDri:%[0-9]+]]:i64regs = LEAX_ADDri %stack.0, 0 ; CHECK: [[ORXri:%[0-9]+]]:i64regs = ORXri killed [[LEAX_ADDri]], 8 - ; CHECK: STDFrr [[ORXri]], $g0, killed [[COPY1]] :: (store 8 into %stack.0 + 8) + ; CHECK: STDFrr [[ORXri]], $g0, killed [[COPY1]] :: (store (s64) into %stack.0 + 8) ; CHECK: [[COPY2:%[0-9]+]]:dfpregs = COPY [[COPY]].sub_even64 - ; CHECK: STDFri %stack.0, 0, killed [[COPY2]] :: (store 8 into %stack.0, align 16) - ; CHECK: [[LDXrr:%[0-9]+]]:i64regs = LDXrr [[ORXri]], $g0 :: (load 8 from %stack.0 + 8) - ; CHECK: [[LDXri:%[0-9]+]]:i64regs = LDXri %stack.0, 0 :: (load 8 from %stack.0, align 16) + ; CHECK: STDFri %stack.0, 0, killed [[COPY2]] :: (store (s64) into %stack.0, align 16) + ; CHECK: [[LDXrr:%[0-9]+]]:i64regs = LDXrr [[ORXri]], $g0 :: (load (s64) from %stack.0 + 8) + ; CHECK: [[LDXri:%[0-9]+]]:i64regs = LDXri %stack.0, 0 :: (load (s64) from %stack.0, align 16) ; CHECK: [[COPY3:%[0-9]+]]:intregs = COPY [[LDXrr]] ; CHECK: [[COPY4:%[0-9]+]]:intregs = COPY [[LDXri]] ; CHECK: [[SRLXri:%[0-9]+]]:i64regs = SRLXri [[LDXrr]], 32 @@ -34,16 +34,16 @@ define fp128 @testcase(fp128 %0) { ; CHECK: [[ORXrr:%[0-9]+]]:i64regs = ORXrr killed [[SLLXri]], killed [[SRLri]] ; CHECK: [[LEAX_ADDri1:%[0-9]+]]:i64regs = LEAX_ADDri %stack.1, 0 ; CHECK: [[ORXri1:%[0-9]+]]:i64regs = ORXri killed [[LEAX_ADDri1]], 8 - ; CHECK: STXrr [[ORXri1]], $g0, killed [[ORXrr]] :: (store 8 into %stack.1 + 8, basealign 16) + ; CHECK: STXrr [[ORXri1]], $g0, killed [[ORXrr]] :: (store (s64) into %stack.1 + 8, basealign 16) ; CHECK: [[SRLri1:%[0-9]+]]:i64regs = SRLri killed [[ADDEri1]], 0 ; CHECK: [[COPY8:%[0-9]+]]:i64regs = COPY [[ADDEri2]] ; CHECK: [[SLLXri1:%[0-9]+]]:i64regs = SLLXri killed [[COPY8]], 32 ; CHECK: [[ORXrr1:%[0-9]+]]:i64regs = ORXrr killed [[SLLXri1]], killed [[SRLri1]] - ; CHECK: STXri %stack.1, 0, killed [[ORXrr1]] :: (store 8 into %stack.1, align 16) - ; CHECK: [[LDDFri:%[0-9]+]]:dfpregs = LDDFri %stack.1, 0 :: (load 8 from %stack.1, align 16) + ; CHECK: STXri %stack.1, 0, killed [[ORXrr1]] :: (store (s64) into %stack.1, align 16) + ; CHECK: [[LDDFri:%[0-9]+]]:dfpregs = LDDFri %stack.1, 0 :: (load (s64) from %stack.1, align 16) ; CHECK: [[DEF:%[0-9]+]]:qfpregs = IMPLICIT_DEF ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:qfpregs = INSERT_SUBREG [[DEF]], killed [[LDDFri]], %subreg.sub_even64 - ; CHECK: [[LDDFrr:%[0-9]+]]:dfpregs = LDDFrr [[ORXri1]], $g0 :: (load 8 from %stack.1 + 8) + ; CHECK: [[LDDFrr:%[0-9]+]]:dfpregs = LDDFrr [[ORXri1]], $g0 :: (load (s64) from %stack.1 + 8) ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:qfpregs = INSERT_SUBREG [[INSERT_SUBREG]], killed [[LDDFrr]], %subreg.sub_odd64 ; CHECK: $q0 = COPY [[INSERT_SUBREG1]] ; CHECK: RETL 8, implicit $q0 diff --git a/llvm/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir b/llvm/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir index 3dfc1a7ec5498..0c02c26fe5b12 100644 --- a/llvm/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir +++ b/llvm/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir @@ -149,7 +149,7 @@ body: | %11 = VGBM 0 %43 = LHIMux 0 %44 = LARL %const.0 - %45 = VL64 %44, 0, $noreg :: (load 8 from constant-pool) + %45 = VL64 %44, 0, $noreg :: (load (s64) from constant-pool) bb.1: ADJCALLSTACKDOWN 0, 0 @@ -194,7 +194,7 @@ body: | %36 = VLVGH %36, %20.subreg_l32, $noreg, 0 %36 = VLVGH %36, %34.subreg_l32, $noreg, 1 dead %36 = VLVGH %36, %40.subreg_l32, $noreg, 2 - %4 = LG undef %42, 0, $noreg :: (load 8 from `i64* undef`) + %4 = LG undef %42, 0, $noreg :: (load (s64) from `i64* undef`) undef %57.subreg_h64 = LLILL 0 undef %66.subreg_h64 = LLILL 0 undef %79.subreg_h64 = LLILL 0 diff --git a/llvm/test/CodeGen/SystemZ/branch-folder-hoist-livein.mir b/llvm/test/CodeGen/SystemZ/branch-folder-hoist-livein.mir index 96a44768c6c30..5e100b88ead30 100644 --- a/llvm/test/CodeGen/SystemZ/branch-folder-hoist-livein.mir +++ b/llvm/test/CodeGen/SystemZ/branch-folder-hoist-livein.mir @@ -19,10 +19,10 @@ body: | successors: %bb.2(0x7fffffff), %bb.1(0x00000001) liveins: - renamable $r1d = LGRL @b :: (load 4 from got, align 8) - renamable $r1l = LH killed renamable $r1d, 0, $noreg, implicit-def $r1d :: (dereferenceable load 1 from @b) + renamable $r1d = LGRL @b :: (load (s32) from got, align 8) + renamable $r1l = LH killed renamable $r1d, 0, $noreg, implicit-def $r1d :: (dereferenceable load (s8) from @b) renamable $r2l = LHI 0 - renamable $r3d = LGRL @d :: (load 4 from got, align 8) + renamable $r3d = LGRL @d :: (load (s32) from got, align 8) renamable $r4d = LLILL 0, implicit-def $r4q CHI killed renamable $r2l, 0, implicit-def $cc @@ -34,13 +34,13 @@ body: | liveins: $r1l, $r3d, $r4q renamable $r4d = COPY killed renamable $r4d, implicit killed $r4q - STH renamable $r1l, killed renamable $r3d, 0, $noreg, implicit killed $r4d :: (store 1 into @d) + STH renamable $r1l, killed renamable $r3d, 0, $noreg, implicit killed $r4d :: (store (s8) into @d) bb.2: liveins: $r1l, $r3d, $r4q renamable $r4d = COPY killed renamable $r4d, implicit killed $r4q - STH renamable $r1l, killed renamable $r3d, 0, $noreg, implicit killed $r4d :: (store 1 into @d) + STH renamable $r1l, killed renamable $r3d, 0, $noreg, implicit killed $r4d :: (store (s8) into @d) Return ... diff --git a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir index 0bc1af0c1db10..13110579bc0f3 100644 --- a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir +++ b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir @@ -24,7 +24,7 @@ name: encode_one_macroblock alignment: 4 tracksRegLiveness: true -registers: +registers: - { id: 0, class: addr64bit } - { id: 1, class: addr64bit } - { id: 2, class: grx32bit } @@ -156,71 +156,71 @@ registers: - { id: 128, class: grx32bit } - { id: 129, class: grx32bit } - { id: 130, class: fp64bit } -frameInfo: +frameInfo: hasCalls: true body: | bb.0: successors: %bb.2(0x00000001), %bb.1(0x7fffffff) - + CHIMux undef %20, 3, implicit-def $cc BRC 14, 8, %bb.2, implicit killed $cc J %bb.1 - + bb.1: successors: %bb.2(0x00000001), %bb.3(0x7fffffff) - + CHIMux undef %21, 0, implicit-def $cc BRC 14, 6, %bb.3, implicit killed $cc J %bb.2 - + bb.2: - + bb.3: successors: %bb.6(0x00000001), %bb.4(0x7fffffff) - + CHIMux undef %23, 2, implicit-def $cc BRC 14, 8, %bb.6, implicit killed $cc J %bb.4 - + bb.4: successors: %bb.5(0x00000001), %bb.7(0x7fffffff) - + CHIMux undef %24, 1, implicit-def $cc BRC 14, 6, %bb.7, implicit killed $cc J %bb.5 - + bb.5: - + bb.6: - + bb.7: successors: %bb.47(0x00000001), %bb.8(0x7fffffff) - + CHIMux undef %25, 1, implicit-def $cc BRC 14, 8, %bb.47, implicit killed $cc J %bb.8 - + bb.8: successors: %bb.46(0x00000001), %bb.48(0x7fffffff) - + CHIMux undef %26, 2, implicit-def $cc BRC 14, 8, %bb.46, implicit killed $cc J %bb.48 - + bb.9: successors: %bb.36(0x00000001), %bb.10(0x7fffffff) - + CHIMux undef %31, 1, implicit-def $cc BRC 14, 8, %bb.36, implicit killed $cc J %bb.10 - + bb.10: successors: %bb.35(0x00000001), %bb.37(0x7fffffff) - + CHIMux undef %32, 2, implicit-def $cc BRC 14, 8, %bb.35, implicit killed $cc J %bb.37 - + bb.11: %4 = COPY %60 %6 = SLLG %120, $noreg, 1 @@ -232,26 +232,26 @@ body: | %47 = SRK %120.subreg_l32, %45, implicit-def dead $cc %47 = SLL %47, $noreg, 3 %81 = LGFR %47 - + bb.12: successors: %bb.56, %bb.13 - + CHIMux %38, 0, implicit-def $cc BRC 14, 8, %bb.13, implicit killed $cc - + bb.56: J %bb.16 - + bb.13: successors: %bb.14(0x7fffffff), %bb.15(0x00000001) - + ADJCALLSTACKDOWN 0, 0 %49 = LGFR %120.subreg_l32 $r2d = COPY %49 CallBRASL @Get_Direct_Cost8x8, killed $r2d, undef $r3d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def $r2d ADJCALLSTACKUP 0, 0 %51 = COPY killed $r2d - MVHHI %7, 0, 0 :: (store 2) + MVHHI %7, 0, 0 :: (store (s16)) %12 = ARK %51.subreg_l32, %125, implicit-def dead $cc CFIMux %51.subreg_l32, 2147483647, implicit-def $cc %12 = LOCRMux %12, %126, 14, 8, implicit killed $cc @@ -260,7 +260,7 @@ body: | CHIMux undef %56, 0, implicit-def $cc BRC 14, 6, %bb.15, implicit killed $cc J %bb.14 - + bb.14: %124 = AHIMux %124, 1, implicit-def dead $cc ADJCALLSTACKDOWN 0, 0 @@ -268,72 +268,72 @@ body: | ADJCALLSTACKUP 0, 0 %125 = COPY %12 J %bb.16 - + bb.15: - + bb.16: successors: %bb.12(0x7c000000), %bb.17(0x04000000) - + CLGFI undef %59, 4, implicit-def $cc BRC 14, 4, %bb.12, implicit killed $cc J %bb.17 - + bb.17: successors: %bb.18, %bb.19 - - MVHI %0, 332, 2 :: (store 4) + + MVHI %0, 332, 2 :: (store (s32)) %60 = COPY %126 %60 = AR %60, %4, implicit-def dead $cc - %18 = LHMux %6, 0, $noreg :: (load 2) + %18 = LHMux %6, 0, $noreg :: (load (s16)) CHIMux %38, 0, implicit-def $cc BRC 14, 6, %bb.19, implicit killed $cc J %bb.18 - + bb.18: %62 = SLLG %81, $noreg, 1 %64 = LA %62, 0, %63 - %65 = LG undef %66, 0, $noreg :: (load 8) - %67 = LGF undef %68, 0, $noreg :: (load 4) - MVC undef %69, 0, 2, %64, 0 :: (store 2), (load 2) + %65 = LG undef %66, 0, $noreg :: (load (s64)) + %67 = LGF undef %68, 0, $noreg :: (load (s32)) + MVC undef %69, 0, 2, %64, 0 :: (store (s16)), (load (s16)) %70 = COPY %81 %70 = OILL64 %70, 3, implicit-def dead $cc %71 = LA %70, 2, $noreg %72 = SLLG %71, $noreg, 1 - %73 = LHMux %72, 0, %63 :: (load 2) + %73 = LHMux %72, 0, %63 :: (load (s16)) %74 = LA %70, 2, %67 %75 = SLLG %74, $noreg, 1 - %76 = LG %65, 0, $noreg :: (load 8) - STHMux %73, %76, 0, %75 :: (store 2) - %77 = LG undef %78, 0, $noreg :: (load 8) - %79 = LHRL @rec_mbY8x8 :: (load 2) - STHMux %79, %77, 0, $noreg :: (store 2) - %80 = LHMux %72, 0, %63 :: (load 2) - STHMux %80, %77, 0, %75 :: (store 2) + %76 = LG %65, 0, $noreg :: (load (s64)) + STHMux %73, %76, 0, %75 :: (store (s16)) + %77 = LG undef %78, 0, $noreg :: (load (s64)) + %79 = LHRL @rec_mbY8x8 :: (load (s16)) + STHMux %79, %77, 0, $noreg :: (store (s16)) + %80 = LHMux %72, 0, %63 :: (load (s16)) + STHMux %80, %77, 0, %75 :: (store (s16)) %81 = OILL64 %81, 7, implicit-def dead $cc %82 = SLLG %81, $noreg, 1 - %83 = LHMux %82, 0, %63 :: (load 2) - STHMux %83, %77, 0, $noreg :: (store 2) + %83 = LHMux %82, 0, %63 :: (load (s16)) + STHMux %83, %77, 0, $noreg :: (store (s16)) %84 = LA %62, 64, %63 - MVC undef %85, 0, 2, %84, 0 :: (store 2), (load 2) + MVC undef %85, 0, 2, %84, 0 :: (store (s16)), (load (s16)) %86 = SLLG %70, $noreg, 1 - %87 = LHMux %86, 64, %63 :: (load 2) + %87 = LHMux %86, 64, %63 :: (load (s16)) %88 = SLLG %67, $noreg, 3 - %89 = LG %65, 16, %88 :: (load 8) + %89 = LG %65, 16, %88 :: (load (s64)) %90 = LA %70, 0, %67 %91 = SLLG %90, $noreg, 1 - STHMux %87, %89, 0, %91 :: (store 2) + STHMux %87, %89, 0, %91 :: (store (s16)) %92 = LA %72, 64, %63 - MVC undef %93, 0, 2, %92, 0 :: (store 2), (load 2) + MVC undef %93, 0, 2, %92, 0 :: (store (s16)), (load (s16)) %94 = LA %86, 6, %63 - MVC undef %95, 0, 2, %94, 0 :: (store 2), (load 2) + MVC undef %95, 0, 2, %94, 0 :: (store (s16)), (load (s16)) %96 = LA %82, 0, %63 - MVC undef %97, 0, 2, %96, 0 :: (store 2), (load 2) - + MVC undef %97, 0, 2, %96, 0 :: (store (s16)), (load (s16)) + bb.19: successors: %bb.20(0x04000000), %bb.11(0x7c000000) - - %98 = LGH %7, 0, $noreg :: (load 2) - %99 = LGH undef %100, 0, $noreg :: (load 2) + + %98 = LGH %7, 0, $noreg :: (load (s16)) + %99 = LGH undef %100, 0, $noreg :: (load (s16)) ADJCALLSTACKDOWN 0, 0 %101 = LGFR %120.subreg_l32 %102 = LGFR %18 @@ -351,27 +351,27 @@ body: | CGHI %120, 4, implicit-def $cc BRC 14, 6, %bb.11, implicit killed $cc J %bb.20 - + bb.20: successors: %bb.22(0x00000001), %bb.21(0x7fffffff) - - MVHI undef %105, 0, 0 :: (store 4) + + MVHI undef %105, 0, 0 :: (store (s32)) CHIMux undef %106, 3, implicit-def $cc BRC 14, 8, %bb.22, implicit killed $cc J %bb.21 - + bb.21: successors: %bb.22(0x00000001), %bb.23(0x7fffffff) - + CHIMux undef %107, 0, implicit-def $cc BRC 14, 6, %bb.23, implicit killed $cc J %bb.22 - + bb.22: - + bb.23: successors: %bb.26(0x00000001), %bb.24(0x7fffffff) - + ADJCALLSTACKDOWN 0, 0 CallBRASL @Get_Direct_CostMB, undef $f0d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit-def dead $r2d ADJCALLSTACKUP 0, 0 @@ -382,97 +382,97 @@ body: | CHIMux undef %111, 13, implicit-def $cc BRC 14, 8, %bb.26, implicit killed $cc J %bb.24 - + bb.24: successors: %bb.25(0x00000001), %bb.27(0x7fffffff) - + CHIMux undef %112, 8, implicit-def $cc BRC 14, 6, %bb.27, implicit killed $cc J %bb.25 - + bb.25: - + bb.26: - + bb.27: successors: %bb.28, %bb.29 - + CHIMux undef %114, 0, implicit-def $cc BRC 14, 6, %bb.29, implicit killed $cc - + bb.28: %130 = CDFBR %60, implicit $fpc J %bb.30 - + bb.29: %130 = IMPLICIT_DEF - + bb.30: successors: %bb.33(0x00000001), %bb.31(0x7fffffff) - - VST64 %130, undef %117, 0, $noreg :: (store 8) + + VST64 %130, undef %117, 0, $noreg :: (store (s64)) CHIMux undef %118, 2, implicit-def $cc BRC 14, 8, %bb.33, implicit killed $cc J %bb.31 - + bb.31: successors: %bb.32(0x00000001), %bb.34(0x7fffffff) - + CHIMux undef %119, 1, implicit-def $cc BRC 14, 6, %bb.34, implicit killed $cc J %bb.32 - + bb.32: - + bb.33: - + bb.34: Return - + bb.35: - + bb.36: - + bb.37: successors: %bb.40(0x00000001), %bb.38(0x7fffffff) - + CHIMux undef %33, 1, implicit-def $cc BRC 14, 8, %bb.40, implicit killed $cc J %bb.38 - + bb.38: successors: %bb.39(0x00000001), %bb.41(0x7fffffff) - + CHIMux undef %34, 2, implicit-def $cc BRC 14, 6, %bb.41, implicit killed $cc J %bb.39 - + bb.39: - + bb.40: - + bb.41: successors: %bb.44(0x00000001), %bb.42(0x7fffffff) - + CHIMux undef %35, 1, implicit-def $cc BRC 14, 8, %bb.44, implicit killed $cc J %bb.42 - + bb.42: successors: %bb.43(0x00000001), %bb.45(0x7fffffff) - + CHIMux undef %36, 2, implicit-def $cc BRC 14, 6, %bb.45, implicit killed $cc J %bb.43 - + bb.43: - + bb.44: - + bb.45: - %0 = LG undef %22, 0, $noreg :: (load 8) + %0 = LG undef %22, 0, $noreg :: (load (s64)) %38 = LHIMux 0 - STRL %38, @bi_pred_me :: (store 4) + STRL %38, @bi_pred_me :: (store (s32)) %120 = LGHI 0 %41 = LARL @best8x8fwref %42 = LARL @best8x8mode @@ -482,45 +482,45 @@ body: | %125 = LHIMux 0 %60 = LHIMux 0 J %bb.11 - + bb.46: - + bb.47: - + bb.48: successors: %bb.51(0x00000001), %bb.49(0x7fffffff) - + CHIMux undef %27, 1, implicit-def $cc BRC 14, 8, %bb.51, implicit killed $cc J %bb.49 - + bb.49: successors: %bb.50(0x00000001), %bb.52(0x7fffffff) - + CHIMux undef %28, 2, implicit-def $cc BRC 14, 6, %bb.52, implicit killed $cc J %bb.50 - + bb.50: - + bb.51: - + bb.52: successors: %bb.55(0x00000001), %bb.53(0x7fffffff) - + CHIMux undef %29, 1, implicit-def $cc BRC 14, 8, %bb.55, implicit killed $cc J %bb.53 - + bb.53: successors: %bb.54(0x00000001), %bb.9(0x7fffffff) - + CHIMux undef %30, 2, implicit-def $cc BRC 14, 6, %bb.9, implicit killed $cc J %bb.54 - + bb.54: - + bb.55: ... diff --git a/llvm/test/CodeGen/SystemZ/combine_loads_from_build_pair.ll b/llvm/test/CodeGen/SystemZ/combine_loads_from_build_pair.ll index 4e76ea6caaec4..4258848ac4270 100644 --- a/llvm/test/CodeGen/SystemZ/combine_loads_from_build_pair.ll +++ b/llvm/test/CodeGen/SystemZ/combine_loads_from_build_pair.ll @@ -8,13 +8,13 @@ define i128 @func1({ i128, i8* } %struct) { ; so we expect the LD8 to load from the address used in the original HIBITS ; load. ; CHECK-LABEL: Initial selection DAG: -; CHECK: [[LOBITS:t[0-9]+]]: i64,ch = load<(load 8)> -; CHECK: [[HIBITS:t[0-9]+]]: i64,ch = load<(load 8)> +; CHECK: [[LOBITS:t[0-9]+]]: i64,ch = load<(load (s64))> +; CHECK: [[HIBITS:t[0-9]+]]: i64,ch = load<(load (s64))> ; CHECK: Combining: t{{[0-9]+}}: i128 = build_pair [[LOBITS]], [[HIBITS]] ; CHECK-NEXT: Creating new node -; CHECK-SAME: load<(load 16, align 8)> +; CHECK-SAME: load<(load (s128), align 8)> ; CHECK-NEXT: into -; CHECK-SAME: load<(load 16, align 8)> +; CHECK-SAME: load<(load (s128), align 8)> ; CHECK-LABEL: Optimized lowered selection DAG: %result = extractvalue {i128, i8* } %struct, 0 ret i128 %result diff --git a/llvm/test/CodeGen/SystemZ/cond-move-05.mir b/llvm/test/CodeGen/SystemZ/cond-move-05.mir index 09bb855de2693..3ce98de94fe4a 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-05.mir +++ b/llvm/test/CodeGen/SystemZ/cond-move-05.mir @@ -63,7 +63,7 @@ registers: - { id: 10, class: gr64bit } body: | bb.0.entry: - %0:gr64bit = LGFRL @g_74 :: (dereferenceable load 4 from @g_74) + %0:gr64bit = LGFRL @g_74 :: (dereferenceable load (s32) from @g_74) undef %3.subreg_l64:gr128bit = LGHI 1 %3.subreg_h64:gr128bit = LLILL 0 %3:gr128bit = DLGR %3, %0 diff --git a/llvm/test/CodeGen/SystemZ/cond-move-08.mir b/llvm/test/CodeGen/SystemZ/cond-move-08.mir index 66edbac416e03..067b90f938e44 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-08.mir +++ b/llvm/test/CodeGen/SystemZ/cond-move-08.mir @@ -129,12 +129,12 @@ body: | bb.2.bb7: successors: %bb.3(0x04000000), %bb.2(0x7c000000) - %14:gr32bit = LMux %26, 0, $noreg :: (load 4 from %ir.lsr.iv12) + %14:gr32bit = LMux %26, 0, $noreg :: (load (s32) from %ir.lsr.iv12) CR %14, undef %15:gr32bit, implicit-def $cc %16:grx32bit = COPY %28.subreg_l32 %16:grx32bit = LOCHIMux %16, 0, 14, 12, implicit $cc %17:grx32bit = SELRMux %27, %28.subreg_l32, 14, 2, implicit killed $cc - %18:gr32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`) + %18:gr32bit = LMux undef %19:addr64bit, 0, $noreg :: (load (s32) from `i32* undef`) %20:grx32bit = COPY %28.subreg_l32 %20:grx32bit = OILMux %20, 3, implicit-def dead $cc CR undef %21:gr32bit, %18, implicit-def $cc @@ -157,7 +157,7 @@ body: | ADJCALLSTACKDOWN 0, 0 CallBRASL @fun, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc ADJCALLSTACKUP 0, 0 - STRL %4, @globvar :: (store 4 into @globvar) + STRL %4, @globvar :: (store (s32) into @globvar) CLFIMux undef %23:grx32bit, 1, implicit-def $cc %25:grx32bit = LHIMux 0 %25:grx32bit = LOCHIMux %25, 1, 14, 2, implicit killed $cc diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir index 8ea5d605ef564..556d2d70d4434 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir +++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints-02.mir @@ -39,7 +39,7 @@ body: | %9:grx32bit = LHIMux 66 bb.1: - %6:grx32bit = LLCMux undef %7:addr64bit, 0, $noreg :: (load 1 from `i8* undef`) + %6:grx32bit = LLCMux undef %7:addr64bit, 0, $noreg :: (load (s8) from `i8* undef`) CHIMux %6, 1, implicit-def $cc %11:gr32bit = SELRMux %8, %9:grx32bit, 14, 6, implicit killed $cc CHIMux %6, 2, implicit-def $cc diff --git a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir index 05d52e27e134c..686bbc396a5d9 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir +++ b/llvm/test/CodeGen/SystemZ/cond-move-regalloc-hints.mir @@ -229,13 +229,13 @@ body: | %63:addr64bit = LCGR %43, implicit-def dead $cc %45:addr64bit = SLLG %10, $noreg, 3 %64:addr64bit = SLLG %10, $noreg, 2 - %64:addr64bit = AG %64, %1, 0, %45, implicit-def dead $cc :: (load 8 from %ir.tmp21) + %64:addr64bit = AG %64, %1, 0, %45, implicit-def dead $cc :: (load (s64) from %ir.tmp21) %65:grx32bit = COPY %59 bb.3.bb25: successors: %bb.4(0x04000000), %bb.3(0x7c000000) - %47:fp32bit = VL32 %64, 4, $noreg :: (load 4 from %ir.scevgep5) + %47:fp32bit = VL32 %64, 4, $noreg :: (load (s32) from %ir.scevgep5) %25:gr64bit = LA %64, 4, $noreg CEBR %47, undef %48:fp32bit, implicit-def $cc, implicit $fpc %62:grx32bit = LOCRMux %62, %65, 15, 4, implicit $cc @@ -259,11 +259,11 @@ body: | bb.5.bb42: successors: %bb.6(0x30000000), %bb.7(0x50000000) - %50:gr32bit = LMux %2, 0, $noreg :: (load 4 from %ir.tmp43) + %50:gr32bit = LMux %2, 0, $noreg :: (load (s32) from %ir.tmp43) %50:gr32bit = nsw SR %50, %0.subreg_l32, implicit-def dead $cc %52:addr64bit = LGFR %50 %52:addr64bit = MGHI %52, 40 - MVC undef %53:addr64bit, 0, 4, %52, 32 :: (store 4 into `i32* undef`), (load 4 from %ir.tmp47) + MVC undef %53:addr64bit, 0, 4, %52, 32 :: (store (s32) into `i32* undef`), (load (s32) from %ir.tmp47) CHIMux %32.subreg_l32, 0, implicit-def $cc BRC 14, 6, %bb.7, implicit killed $cc J %bb.6 @@ -271,7 +271,7 @@ body: | bb.6.bb52: bb.7.bb54: - STMux %0.subreg_l32, %2, 0, $noreg :: (store 4 into %ir.tmp56) + STMux %0.subreg_l32, %2, 0, $noreg :: (store (s32) into %ir.tmp56) %56:addr64bit = LA %56, 1, $noreg %55:gr32bit = AHIMux %55, 3, implicit-def dead $cc %57:gr64bit = LGHI 0 diff --git a/llvm/test/CodeGen/SystemZ/debuginstr-01.mir b/llvm/test/CodeGen/SystemZ/debuginstr-01.mir index 162bc6df7571b..ac86889d3caff 100644 --- a/llvm/test/CodeGen/SystemZ/debuginstr-01.mir +++ b/llvm/test/CodeGen/SystemZ/debuginstr-01.mir @@ -61,7 +61,7 @@ body: | successors: %bb.1(0x00000001), %bb.2(0x7fffffff) liveins: $r2d - renamable $r2l = LMux killed renamable $r2d, 0, $noreg :: (load 4 from %ir.ptr) + renamable $r2l = LMux killed renamable $r2d, 0, $noreg :: (load (s32) from %ir.ptr) DBG_VALUE $noreg, $r2l, !7, !DIExpression(), debug-location !9 CHIMux renamable $r2l, 0, implicit-def $cc DBG_VALUE $noreg, $r2l, !7, !DIExpression(), debug-location !9 diff --git a/llvm/test/CodeGen/SystemZ/foldmemop-imm-02.mir b/llvm/test/CodeGen/SystemZ/foldmemop-imm-02.mir index bf3347eb5c45d..8dbb88bacb628 100644 --- a/llvm/test/CodeGen/SystemZ/foldmemop-imm-02.mir +++ b/llvm/test/CodeGen/SystemZ/foldmemop-imm-02.mir @@ -51,7 +51,7 @@ body: | J %bb.1 bb.1: - %8:grx32bit = LMux %2, 0, $noreg :: (load 4 from %ir.src) + %8:grx32bit = LMux %2, 0, $noreg :: (load (s32) from %ir.src) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def early-clobber $r15d bb.2: @@ -105,7 +105,7 @@ body: | J %bb.1 bb.1: - %8:gr64bit = LG %2, 0, $noreg :: (load 8 from %ir.src) + %8:gr64bit = LG %2, 0, $noreg :: (load (s64) from %ir.src) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def early-clobber $r15d bb.2: @@ -162,7 +162,7 @@ body: | J %bb.1 bb.1: - %8:grx32bit = LMux %2, 0, $noreg :: (load 4 from %ir.src) + %8:grx32bit = LMux %2, 0, $noreg :: (load (s32) from %ir.src) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def early-clobber $r15d bb.2: @@ -219,7 +219,7 @@ body: | J %bb.1 bb.1: - %8:gr64bit = LG %2, 0, $noreg :: (load 8 from %ir.src) + %8:gr64bit = LG %2, 0, $noreg :: (load (s64) from %ir.src) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def early-clobber $r15d bb.2: diff --git a/llvm/test/CodeGen/SystemZ/foldmemop-msc.mir b/llvm/test/CodeGen/SystemZ/foldmemop-msc.mir index 4541605182212..b7efb2d193f69 100644 --- a/llvm/test/CodeGen/SystemZ/foldmemop-msc.mir +++ b/llvm/test/CodeGen/SystemZ/foldmemop-msc.mir @@ -49,7 +49,7 @@ body: | J %bb.1 bb.1: - %8:gr32bit = LMux %2, 0, $noreg :: (load 4 from %ir.src) + %8:gr32bit = LMux %2, 0, $noreg :: (load (s32) from %ir.src) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def early-clobber $r15d bb.2: @@ -101,7 +101,7 @@ body: | J %bb.1 bb.1: - %8:gr32bit = LMux %2, 0, $noreg :: (load 4 from %ir.src) + %8:gr32bit = LMux %2, 0, $noreg :: (load (s32) from %ir.src) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def early-clobber $r15d bb.2: @@ -153,7 +153,7 @@ body: | J %bb.1 bb.1: - %8:gr64bit = LG %2, 0, $noreg :: (load 8 from %ir.src) + %8:gr64bit = LG %2, 0, $noreg :: (load (s64) from %ir.src) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def early-clobber $r15d bb.2: @@ -205,7 +205,7 @@ body: | J %bb.1 bb.1: - %8:gr64bit = LG %2, 0, $noreg :: (load 8 from %ir.src) + %8:gr64bit = LG %2, 0, $noreg :: (load (s64) from %ir.src) INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def early-clobber $r15d bb.2: diff --git a/llvm/test/CodeGen/SystemZ/foldmemop-vec-binops.mir b/llvm/test/CodeGen/SystemZ/foldmemop-vec-binops.mir index c0faf27ef401e..6ef9de775fb01 100644 --- a/llvm/test/CodeGen/SystemZ/foldmemop-vec-binops.mir +++ b/llvm/test/CodeGen/SystemZ/foldmemop-vec-binops.mir @@ -60,7 +60,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFADB %0, %1, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -93,7 +93,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFADB %1, %0, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -126,7 +126,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFADB %1, %0, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -159,7 +159,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFASB %0, %1, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -192,7 +192,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFASB %1, %0, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -226,7 +226,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFASB %1, %0, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -259,7 +259,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFSDB %0, %1, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -292,7 +292,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFSDB %1, %0, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -325,7 +325,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFSDB %0, %1, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -358,7 +358,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFSSB %0, %1, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -391,7 +391,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFSSB %1, %0, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -425,7 +425,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFSSB %0, %1, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -457,7 +457,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFDDB %0, %1, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -490,7 +490,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFDDB %1, %0, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -523,7 +523,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFDDB %0, %1, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -556,7 +556,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFDSB %0, %1, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -589,7 +589,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFDSB %1, %0, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -623,7 +623,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFDSB %0, %1, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -656,7 +656,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMDB %0, %1, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -689,7 +689,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMDB %1, %0, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -722,7 +722,7 @@ body: | %0:fp64bit = COPY $f0d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMDB %1, %0, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -755,7 +755,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMSB %0, %1, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -788,7 +788,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMSB %1, %0, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... @@ -822,7 +822,7 @@ body: | %0:fp32bit = COPY $f0s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMSB %1, %0, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 4 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s32) into %ir.Dst) Return ... diff --git a/llvm/test/CodeGen/SystemZ/foldmemop-vec-cc.mir b/llvm/test/CodeGen/SystemZ/foldmemop-vec-cc.mir index 8f6139148304c..41716991dddf4 100644 --- a/llvm/test/CodeGen/SystemZ/foldmemop-vec-cc.mir +++ b/llvm/test/CodeGen/SystemZ/foldmemop-vec-cc.mir @@ -40,7 +40,7 @@ body: | %3:vr64bit = nofpexcept WFADB %0, %1, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 14, 8, implicit killed $cc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... diff --git a/llvm/test/CodeGen/SystemZ/foldmemop-vec-cmp.mir b/llvm/test/CodeGen/SystemZ/foldmemop-vec-cmp.mir index c6dee4ad031cc..1f66c4204e859 100644 --- a/llvm/test/CodeGen/SystemZ/foldmemop-vec-cmp.mir +++ b/llvm/test/CodeGen/SystemZ/foldmemop-vec-cmp.mir @@ -50,7 +50,7 @@ body: | nofpexcept WFCDB %0, %1, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -86,7 +86,7 @@ body: | nofpexcept WFCDB %1, %0, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -124,7 +124,7 @@ body: | nofpexcept WFCDB %1, %0, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -160,7 +160,7 @@ body: | nofpexcept WFCSB %0, %1, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -196,7 +196,7 @@ body: | nofpexcept WFCSB %1, %0, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -234,7 +234,7 @@ body: | nofpexcept WFCSB %1, %0, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -270,7 +270,7 @@ body: | nofpexcept WFKDB %0, %1, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -306,7 +306,7 @@ body: | nofpexcept WFKDB %1, %0, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -344,7 +344,7 @@ body: | nofpexcept WFKDB %1, %0, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -380,7 +380,7 @@ body: | nofpexcept WFKSB %0, %1, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -416,7 +416,7 @@ body: | nofpexcept WFKSB %1, %0, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -454,7 +454,7 @@ body: | nofpexcept WFKSB %1, %0, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -492,7 +492,7 @@ body: | nofpexcept WFCSB %0, %1.subreg_h32:vr128bit, implicit-def $cc, implicit $fpc %4:gr64bit = LGHI 0 %4:gr64bit = LOCGHI %4, 1, 15, 8, implicit killed $cc - STG %4, %2, 0, $noreg :: (store 8 into %ir.Dst) + STG %4, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... diff --git a/llvm/test/CodeGen/SystemZ/foldmemop-vec-fusedfp.mir b/llvm/test/CodeGen/SystemZ/foldmemop-vec-fusedfp.mir index 81c2a815aea19..e71d26830b9c5 100644 --- a/llvm/test/CodeGen/SystemZ/foldmemop-vec-fusedfp.mir +++ b/llvm/test/CodeGen/SystemZ/foldmemop-vec-fusedfp.mir @@ -52,7 +52,7 @@ body: | %4:fp64bit = COPY $f2d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMADB %0, %1, %4, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -87,7 +87,7 @@ body: | %4:fp64bit = COPY $f2d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMADB %1, %0, %4, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -122,7 +122,7 @@ body: | %4:fp64bit = COPY $f2d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMADB %1, %0, %4, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -158,7 +158,7 @@ body: | %4:fp64bit = COPY $f2d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMADB %4, %1, %0, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -193,7 +193,7 @@ body: | %4:fp32bit = COPY $f2s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMASB %0, %1, %4, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -228,7 +228,7 @@ body: | %4:fp32bit = COPY $f2s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMASB %1, %0, %4, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -264,7 +264,7 @@ body: | %4:fp32bit = COPY $f2s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMASB %1, %0, %4, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -300,7 +300,7 @@ body: | %4:fp32bit = COPY $f2s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMASB %4, %1, %0, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -335,7 +335,7 @@ body: | %4:fp64bit = COPY $f2d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMSDB %0, %1, %4, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -370,7 +370,7 @@ body: | %4:fp64bit = COPY $f2d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMSDB %1, %0, %4, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -405,7 +405,7 @@ body: | %4:fp64bit = COPY $f2d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMSDB %1, %0, %4, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -441,7 +441,7 @@ body: | %4:fp64bit = COPY $f2d INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr64bit = nofpexcept WFMSDB %4, %1, %0, implicit $fpc - VST64 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST64 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -476,7 +476,7 @@ body: | %4:fp32bit = COPY $f2s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMSSB %0, %1, %4, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -511,7 +511,7 @@ body: | %4:fp32bit = COPY $f2s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMSSB %1, %0, %4, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -547,7 +547,7 @@ body: | %4:fp32bit = COPY $f2s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMSSB %1, %0, %4, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... @@ -583,7 +583,7 @@ body: | %4:fp32bit = COPY $f2s INLINEASM &"", 1, 12, implicit-def dead early-clobber $r0d, 12, implicit-def dead early-clobber $r1d, 12, implicit-def dead early-clobber $r2d, 12, implicit-def dead early-clobber $r3d, 12, implicit-def dead early-clobber $r4d, 12, implicit-def dead early-clobber $r5d, 12, implicit-def dead early-clobber $r6d, 12, implicit-def dead early-clobber $r7d, 12, implicit-def dead early-clobber $r8d, 12, implicit-def dead early-clobber $r9d, 12, implicit-def dead early-clobber $r10d, 12, implicit-def dead early-clobber $r11d, 12, implicit-def dead early-clobber $r12d, 12, implicit-def dead early-clobber $r13d, 12, implicit-def dead early-clobber $r14d, 12, implicit-def dead early-clobber $f1d, 12, implicit-def dead early-clobber $f2d, 12, implicit-def dead early-clobber $f3d, 12, implicit-def dead early-clobber $f4d, 12, implicit-def dead early-clobber $f5d, 12, implicit-def dead early-clobber $f6d, 12, implicit-def dead early-clobber $f7d, 12, implicit-def dead early-clobber $f8d, 12, implicit-def dead early-clobber $f9d, 12, implicit-def dead early-clobber $f10d, 12, implicit-def dead early-clobber $f11d, 12, implicit-def dead early-clobber $f12d, 12, implicit-def dead early-clobber $f13d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f14d, 12, implicit-def dead early-clobber $f15d %3:vr32bit = nofpexcept WFMSSB %4, %1, %0, implicit $fpc - VST32 %3, %2, 0, $noreg :: (store 8 into %ir.Dst) + VST32 %3, %2, 0, $noreg :: (store (s64) into %ir.Dst) Return ... diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir b/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir index e8a2efeaa850a..63b9a3a4c9f02 100644 --- a/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir +++ b/llvm/test/CodeGen/SystemZ/fp-cmp-07.mir @@ -38,7 +38,7 @@ body: | bb.1.store: liveins: $f0s, $r2d - STE $f0s, killed $r2d, 0, $noreg :: (store 4 into %ir.dest) + STE $f0s, killed $r2d, 0, $noreg :: (store (s32) into %ir.dest) Return implicit $f0s ... diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-17.mir b/llvm/test/CodeGen/SystemZ/fp-conv-17.mir index 42c5e99a230a1..d9ed5303b35ae 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-17.mir +++ b/llvm/test/CodeGen/SystemZ/fp-conv-17.mir @@ -129,74 +129,74 @@ body: | %1 = COPY $r3d %0 = COPY $r2d - %2 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %3 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %4 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %5 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %6 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %7 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %8 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %9 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %10 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %11 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %12 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %13 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %14 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %15 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %16 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %17 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - %18 = LE %1, 0, $noreg :: (volatile load 4 from %ir.ptr2) - STE %2, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %3, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %4, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %5, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %6, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %7, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %8, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %9, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %10, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %11, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %12, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %13, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %14, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %15, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %16, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %17, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) - STE %18, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2) + %2 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %3 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %4 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %5 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %6 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %7 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %8 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %9 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %10 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %11 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %12 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %13 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %14 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %15 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %16 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %17 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + %18 = LE %1, 0, $noreg :: (volatile load (s32) from %ir.ptr2) + STE %2, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %3, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %4, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %5, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %6, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %7, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %8, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %9, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %10, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %11, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %12, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %13, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %14, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %15, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %16, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %17, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) + STE %18, %1, 0, $noreg :: (volatile store (s32) into %ir.ptr2) %19 = LDEBR %2, implicit $fpc - STD %19, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %19, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %20 = LDEBR %3, implicit $fpc - STD %20, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %20, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %21 = LDEBR %4, implicit $fpc - STD %21, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %21, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %22 = LDEBR %5, implicit $fpc - STD %22, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %22, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %23 = LDEBR %6, implicit $fpc - STD %23, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %23, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %24 = LDEBR %7, implicit $fpc - STD %24, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %24, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %25 = LDEBR %8, implicit $fpc - STD %25, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %25, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %26 = LDEBR %9, implicit $fpc - STD %26, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %26, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %27 = LDEBR %10, implicit $fpc - STD %27, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %27, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %28 = LDEBR %11, implicit $fpc - STD %28, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %28, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %29 = LDEBR %12, implicit $fpc - STD %29, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %29, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %30 = LDEBR %13, implicit $fpc - STD %30, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %30, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %31 = LDEBR %14, implicit $fpc - STD %31, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %31, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %32 = LDEBR %15, implicit $fpc - STD %32, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %32, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %33 = LDEBR %16, implicit $fpc - STD %33, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %33, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %34 = LDEBR %17, implicit $fpc - STD %34, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %34, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) %35 = LDEBR %18, implicit $fpc - STD %35, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1) + STD %35, %0, 0, $noreg :: (volatile store (s64) into %ir.ptr1) Return ... diff --git a/llvm/test/CodeGen/SystemZ/frame-26.mir b/llvm/test/CodeGen/SystemZ/frame-26.mir index ce438a2919db0..1831253becf64 100644 --- a/llvm/test/CodeGen/SystemZ/frame-26.mir +++ b/llvm/test/CodeGen/SystemZ/frame-26.mir @@ -75,10 +75,10 @@ body: | bb.0: liveins: $f0s, $f2s, $r6d - STG killed renamable $r6d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + STG killed renamable $r6d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i32** undef`) renamable $r0d = LARL @g_181 nofpexcept CEBR renamable $f0s, renamable $f2s, implicit-def $cc, implicit $fpc - STG renamable $r0d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + STG renamable $r0d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i32** undef`) BRC 15, 4, %bb.2, implicit killed $cc bb.1: @@ -89,36 +89,36 @@ body: | bb.2: liveins: $f0s, $r0d - STE killed renamable $f0s, undef renamable $r1d, 0, $noreg :: (volatile store 4 into `float* undef`) + STE killed renamable $f0s, undef renamable $r1d, 0, $noreg :: (volatile store (s32) into `float* undef`) renamable $r1d = nuw LA %stack.0, 16, $noreg renamable $r2d = nuw LA %stack.0, 24, $noreg renamable $r3d = LA %stack.0, 40, $noreg renamable $r4d = LARL @g_1390 - STG renamable $r4d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG renamable $r4d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) renamable $r5d = nuw LA %stack.0, 48, $noreg renamable $r14d = LA %stack.0, 72, $noreg renamable $r13d = LA %stack.0, 80, $noreg renamable $r12d = LA %stack.0, 56, $noreg renamable $r10d = LA %stack.0, 0, $noreg - STG renamable $r10d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG renamable $r10d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) renamable $r9d = LA %stack.0, 64, $noreg renamable $r8d = LA %stack.0, 88, $noreg renamable $r7d = nuw LA %stack.0, 8, $noreg MVGHI %stack.1, 904, 0 - STG killed renamable $r9d, $noreg, 0, $noreg :: (store 8 into `i64*** null`) - STG killed renamable $r3d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r14d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r7d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r1d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r4d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r2d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r5d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r8d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r12d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r13d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r10d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r9d, $noreg, 0, $noreg :: (store (s64) into `i64*** null`) + STG killed renamable $r3d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r14d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r7d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r1d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r4d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r2d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r5d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r8d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r12d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r13d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r10d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) $r2l = LHI 0 - STG killed renamable $r0d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + STG killed renamable $r0d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i32** undef`) Return implicit $r2l ... @@ -162,38 +162,38 @@ body: | bb.0: liveins: $r6d - STG killed renamable $r6d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + STG killed renamable $r6d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i32** undef`) renamable $r0d = LARL @g_181 - STG renamable $r0d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + STG renamable $r0d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i32** undef`) renamable $r1d = nuw LA %stack.0, 16, $noreg renamable $r2d = nuw LA %stack.0, 24, $noreg renamable $r3d = LA %stack.0, 40, $noreg renamable $r4d = LARL @g_1390 - STG renamable $r4d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG renamable $r4d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) renamable $r5d = nuw LA %stack.0, 48, $noreg renamable $r14d = LA %stack.0, 72, $noreg renamable $r13d = LA %stack.0, 80, $noreg renamable $r12d = LA %stack.0, 56, $noreg renamable $r10d = LA %stack.0, 0, $noreg - STG renamable $r10d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG renamable $r10d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) renamable $r9d = LA %stack.0, 64, $noreg renamable $r8d = LA %stack.0, 88, $noreg renamable $r7d = nuw LA %stack.0, 8, $noreg MVGHI %stack.1, 904, 0 - STG killed renamable $r9d, $noreg, 0, $noreg :: (store 8 into `i64*** null`) - STG killed renamable $r3d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r14d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r7d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r1d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r4d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r2d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r5d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r8d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r12d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r13d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) - STG killed renamable $r10d, undef renamable $r1d, 0, $noreg :: (store 8 into `i64*** undef`) + STG killed renamable $r9d, $noreg, 0, $noreg :: (store (s64) into `i64*** null`) + STG killed renamable $r3d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r14d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r7d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r1d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r4d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r2d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r5d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r8d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r12d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r13d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) + STG killed renamable $r10d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i64*** undef`) $r2l = LHI 0 - STG killed renamable $r0d, undef renamable $r1d, 0, $noreg :: (store 8 into `i32** undef`) + STG killed renamable $r0d, undef renamable $r1d, 0, $noreg :: (store (s64) into `i32** undef`) Return implicit $r2l ... diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-56.mir b/llvm/test/CodeGen/SystemZ/int-cmp-56.mir index fdd2d5e8b3930..8038544f4e670 100644 --- a/llvm/test/CodeGen/SystemZ/int-cmp-56.mir +++ b/llvm/test/CodeGen/SystemZ/int-cmp-56.mir @@ -17,7 +17,7 @@ # Test CGR -> CG # CHECK: name: fun1 -# CHECK: CG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 8 from %stack.0) +# CHECK: CG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load (s64) from %stack.0) # CHECK-NEXT: %12:gr64bit = LOCGHI %12, 8, 14, 12, implicit $cc --- name: fun1 @@ -53,7 +53,7 @@ machineFunctionInfo: {} body: | bb.0: liveins: $r2d - + %0:addr64bit = COPY $r2d %1:gr64bit = LG %0, 0, $noreg %2:gr64bit = LG %0, 16, $noreg @@ -94,7 +94,7 @@ body: | # Test CLGR -> CLG # CHECK: name: fun2 -# CHECK: CLG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 8 from %stack.0) +# CHECK: CLG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load (s64) from %stack.0) # CHECK-NEXT: %12:gr64bit = LOCGHI %12, 8, 14, 12, implicit $cc --- name: fun2 @@ -130,7 +130,7 @@ machineFunctionInfo: {} body: | bb.0: liveins: $r2d - + %0:addr64bit = COPY $r2d %1:gr64bit = LG %0, 0, $noreg %2:gr64bit = LG %0, 16, $noreg @@ -171,7 +171,7 @@ body: | # Test CR -> C # CHECK: name: fun3 -# CHECK: C %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 4 from %stack.0) +# CHECK: C %10, %stack.0, 0, $noreg, implicit-def $cc :: (load (s32) from %stack.0) # CHECK: %12:gr32bit = LOCHIMux %12, 8, 14, 12, implicit $cc --- name: fun3 @@ -207,7 +207,7 @@ machineFunctionInfo: {} body: | bb.0: liveins: $r2d - + %0:addr64bit = COPY $r2d %1:gr32bit = LMux %0, 0, $noreg %2:gr32bit = LMux %0, 8, $noreg @@ -248,7 +248,7 @@ body: | # Test CLR -> CL # CHECK: name: fun4 -# CHECK: CL %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 4 from %stack.0) +# CHECK: CL %10, %stack.0, 0, $noreg, implicit-def $cc :: (load (s32) from %stack.0) # CHECK: %12:gr32bit = LOCHIMux %12, 8, 14, 12, implicit $cc --- name: fun4 @@ -284,7 +284,7 @@ machineFunctionInfo: {} body: | bb.0: liveins: $r2d - + %0:addr64bit = COPY $r2d %1:gr32bit = LMux %0, 0, $noreg %2:gr32bit = LMux %0, 8, $noreg diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-59.ll b/llvm/test/CodeGen/SystemZ/int-cmp-59.ll index 85f4a68473108..7d00d12dd831c 100644 --- a/llvm/test/CodeGen/SystemZ/int-cmp-59.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-59.ll @@ -9,7 +9,7 @@ define i64 @main() { ; CHECK-LABEL: bb.0.entry: ; CHECK: %1:addr64bit = LARL @c -; CHECK: %2:gr64bit = LLGC %1, 0, $noreg :: (dereferenceable load 1 from @c, align 4) +; CHECK: %2:gr64bit = LLGC %1, 0, $noreg :: (dereferenceable load (s8) from @c, align 4) ; CHECK-NEXT: %4:gr64bit = IMPLICIT_DEF ; CHECK-NEXT: %3:gr64bit = RISBGN %4, killed %2, 63, 191, 0 ; CHECK-NEXT: %5:gr64bit = LCGR killed %3, implicit-def dead $cc diff --git a/llvm/test/CodeGen/SystemZ/isel-debug.ll b/llvm/test/CodeGen/SystemZ/isel-debug.ll index 0e48210e9b698..b867dac692879 100644 --- a/llvm/test/CodeGen/SystemZ/isel-debug.ll +++ b/llvm/test/CodeGen/SystemZ/isel-debug.ll @@ -5,7 +5,7 @@ ; ; Check that some debug output is printed without problems. ; CHECK: SystemZAddressingMode -; CHECK: Base t5: i64,ch = load<(load 8 from %ir.0)> +; CHECK: Base t5: i64,ch = load<(load (s64) from %ir.0)> ; CHECK: Index ; CHECK: Disp diff --git a/llvm/test/CodeGen/SystemZ/load-and-test-RA-hints.mir b/llvm/test/CodeGen/SystemZ/load-and-test-RA-hints.mir index f36216e61a293..3c10bb5983733 100644 --- a/llvm/test/CodeGen/SystemZ/load-and-test-RA-hints.mir +++ b/llvm/test/CodeGen/SystemZ/load-and-test-RA-hints.mir @@ -140,8 +140,8 @@ body: | bb.3.bb1: successors: %bb.7(0x30000000), %bb.4(0x50000000) - %5:grx32bit = LMux %21, 0, $noreg :: (load 4 from %ir.lsr.iv35, !tbaa !1) - %6:grx32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`, !tbaa !1) + %5:grx32bit = LMux %21, 0, $noreg :: (load (s32) from %ir.lsr.iv35, !tbaa !1) + %6:grx32bit = LMux undef %19:addr64bit, 0, $noreg :: (load (s32) from `i32* undef`, !tbaa !1) CHIMux %6, 0, implicit-def $cc BRC 14, 6, %bb.4, implicit killed $cc @@ -149,7 +149,7 @@ body: | J %bb.5 bb.4.bb8: - %20:grx32bit = LMux %22, 0, $noreg :: (load 4 from %ir.0, !tbaa !1) + %20:grx32bit = LMux %22, 0, $noreg :: (load (s32) from %ir.0, !tbaa !1) CHIMux %20, 0, implicit-def $cc %25:grx32bit = LOCRMux %25, %5, 14, 8, implicit $cc %24:grx32bit = LOCRMux %24, %23, 14, 8, implicit killed $cc diff --git a/llvm/test/CodeGen/SystemZ/multiselect-02.mir b/llvm/test/CodeGen/SystemZ/multiselect-02.mir index 67f4f04e8d8ed..ba23ee5361aad 100644 --- a/llvm/test/CodeGen/SystemZ/multiselect-02.mir +++ b/llvm/test/CodeGen/SystemZ/multiselect-02.mir @@ -17,7 +17,7 @@ tracksRegLiveness: true body: | bb.0.entry: %1:addr64bit = IMPLICIT_DEF - %0:gr32bit = LLC %1, 0, $noreg :: (load 1 from `i8* undef`) + %0:gr32bit = LLC %1, 0, $noreg :: (load (s8) from `i8* undef`) CHI killed %0, 0, implicit-def $cc %2:gr32bit = LHI 2 %3:gr32bit = LHI 8 @@ -27,7 +27,7 @@ body: | %6:gr64bit = INSERT_SUBREG %7, killed %5, %subreg.subreg_l32 %8:gr128bit = ZEXT128 killed %6 %10:addr64bit = IMPLICIT_DEF - %9:gr128bit = DL %8, %10, 0, $noreg :: (load 4 from `i64* undef` + 4) + %9:gr128bit = DL %8, %10, 0, $noreg :: (load (s32) from `i64* undef` + 4) %11:gr32bit = COPY %9.subreg_l32 %12:gr64bit = LGHI 2 %13:gr64bit = LGHI 8 diff --git a/llvm/test/CodeGen/SystemZ/postra-sched-expandedops.mir b/llvm/test/CodeGen/SystemZ/postra-sched-expandedops.mir index c3ebec184241f..b093715457d8e 100644 --- a/llvm/test/CodeGen/SystemZ/postra-sched-expandedops.mir +++ b/llvm/test/CodeGen/SystemZ/postra-sched-expandedops.mir @@ -70,21 +70,21 @@ body: | $r1d = LGR $r3d, implicit-def $r0q renamable $r3d = LARL @TTSize renamable $r0d = LLILL 0, implicit killed $r0q, implicit-def $r0q - renamable $r0q = DL killed renamable $r0q, killed renamable $r3d, 0, $noreg :: (dereferenceable load 4 from @TTSize) - renamable $r3d = LGRL @AS_TTable :: (dereferenceable load 8 from @AS_TTable) + renamable $r0q = DL killed renamable $r0q, killed renamable $r3d, 0, $noreg :: (dereferenceable load (s32) from @TTSize) + renamable $r3d = LGRL @AS_TTable :: (dereferenceable load (s64) from @AS_TTable) renamable $r1d = LLGFR renamable $r0l, implicit killed $r0q renamable $r5d = LARL @Variant renamable $r0l = LHI -3 - renamable $r0l = A killed renamable $r0l, killed renamable $r5d, 0, $noreg, implicit-def dead $cc :: (dereferenceable load 4 from @Variant) + renamable $r0l = A killed renamable $r0l, killed renamable $r5d, 0, $noreg, implicit-def dead $cc :: (dereferenceable load (s32) from @Variant) CLFI killed renamable $r0l, 1, implicit-def $cc renamable $r0l = LHI 1 renamable $r0l = LOCHI killed renamable $r0l, 3, 14, 2, implicit killed $cc - STC killed renamable $r0l, undef renamable $r1d, 0, $noreg :: (store 1 into `i8* undef`) - ST renamable $r2l, undef renamable $r1d, 0, $noreg, implicit killed $r2d :: (store 4 into `i32* undef`) - STC renamable $r4l, $noreg, 0, $noreg, implicit killed $r4d :: (store 1 into `i8* null`) + STC killed renamable $r0l, undef renamable $r1d, 0, $noreg :: (store (s8) into `i8* undef`) + ST renamable $r2l, undef renamable $r1d, 0, $noreg, implicit killed $r2d :: (store (s32) into `i32* undef`) + STC renamable $r4l, $noreg, 0, $noreg, implicit killed $r4d :: (store (s8) into `i8* null`) renamable $r1d = MGHI killed renamable $r1d, 20 renamable $r0l = LHI 0 - STC killed renamable $r0l, killed renamable $r3d, 2, killed renamable $r1d :: (store 1 into %ir.tmp11, align 2) + STC killed renamable $r0l, killed renamable $r3d, 2, killed renamable $r1d :: (store (s8) into %ir.tmp11, align 2) Return ... diff --git a/llvm/test/CodeGen/SystemZ/regalloc-GR128-02.mir b/llvm/test/CodeGen/SystemZ/regalloc-GR128-02.mir index e6a1e7e50cbed..1a71c8f5fb7d6 100644 --- a/llvm/test/CodeGen/SystemZ/regalloc-GR128-02.mir +++ b/llvm/test/CodeGen/SystemZ/regalloc-GR128-02.mir @@ -47,7 +47,7 @@ body: | %1:addr64bit = LARL @g_193 %2:gr32bit = IIFMux 2899813578 %3:gr32bit = COPY killed %2 - %3:gr32bit = O %3, killed %1, 0, $noreg, implicit-def dead $cc :: (dereferenceable load 4 from @g_193) + %3:gr32bit = O %3, killed %1, 0, $noreg, implicit-def dead $cc :: (dereferenceable load (s32) from @g_193) %4:gr64bit = LGFI -1395153718 undef %5.subreg_l64:gr128bit = COPY killed %4 %6:gr128bit = COPY killed %5 @@ -59,7 +59,7 @@ body: | bb.1 (%ir-block.4): %8:gr32bit = LHIMux -9 - STRL killed %8, @g_74 :: (store 4 into @g_74) + STRL killed %8, @g_74 :: (store (s32) into @g_74) Return bb.2 (%ir-block.5): diff --git a/llvm/test/CodeGen/SystemZ/regcoal-undef-lane-4-rm-cp-commuting-def.mir b/llvm/test/CodeGen/SystemZ/regcoal-undef-lane-4-rm-cp-commuting-def.mir index 86e22ad36447c..0bae4e5ae24a7 100644 --- a/llvm/test/CodeGen/SystemZ/regcoal-undef-lane-4-rm-cp-commuting-def.mir +++ b/llvm/test/CodeGen/SystemZ/regcoal-undef-lane-4-rm-cp-commuting-def.mir @@ -35,7 +35,7 @@ body: | ; CHECK: J %bb.4 ; CHECK: bb.2: ; CHECK: successors: - ; CHECK: STMux %20.subreg_l32, undef %8:addr64bit, 0, $noreg :: (store 4 into `i32* undef`) + ; CHECK: STMux %20.subreg_l32, undef %8:addr64bit, 0, $noreg :: (store (s32) into `i32* undef`) ; CHECK: bb.3: ; CHECK: successors: ; CHECK: bb.4: @@ -84,7 +84,7 @@ body: | bb.2: successors: - STMux killed %4, undef %22:addr64bit, 0, $noreg :: (store 4 into `i32* undef`) + STMux killed %4, undef %22:addr64bit, 0, $noreg :: (store (s32) into `i32* undef`) bb.3: successors: diff --git a/llvm/test/CodeGen/SystemZ/subregliveness-06.mir b/llvm/test/CodeGen/SystemZ/subregliveness-06.mir index 55af0a1a717ef..61f690112ed92 100644 --- a/llvm/test/CodeGen/SystemZ/subregliveness-06.mir +++ b/llvm/test/CodeGen/SystemZ/subregliveness-06.mir @@ -188,47 +188,47 @@ body: | undef %15.subreg_l32:gr64bit = COPY %2 %17:addr64bit = LGBR killed %15 %5:addr64bit = LA %17, 1, %18 - dead %19:grx32bit = LBMux killed %17, 1, %18 :: (volatile load 1 from %ir.tmp12, !tbaa !1) + dead %19:grx32bit = LBMux killed %17, 1, %18 :: (volatile load (s8) from %ir.tmp12, !tbaa !1) CHIMux %14, 0, implicit-def $cc BRC 14, 6, %bb.7, implicit killed $cc J %bb.2 bb.2.bb14: - %21:addr64bit = LGRL @g_195 :: (dereferenceable load 8 from @g_195, !tbaa !4) - dead %22:grx32bit = LBMux %5, 0, $noreg :: (volatile load 1 from %ir.tmp12, !tbaa !1) - MVHI %stack.0.tmp, 0, 7 :: (store 4 into %ir.tmp, !tbaa !6) - STRL %23, @g_69 :: (store 4 into @g_69, !tbaa !6) + %21:addr64bit = LGRL @g_195 :: (dereferenceable load (s64) from @g_195, !tbaa !4) + dead %22:grx32bit = LBMux %5, 0, $noreg :: (volatile load (s8) from %ir.tmp12, !tbaa !1) + MVHI %stack.0.tmp, 0, 7 :: (store (s32) into %ir.tmp, !tbaa !6) + STRL %23, @g_69 :: (store (s32) into @g_69, !tbaa !6) undef %24.subreg_l32:gr64bit = COPY %4 - %27:gr64bit = LLGC %26, 7, $noreg :: (dereferenceable load 1 from @g_352 + 7, !tbaa !8) + %27:gr64bit = LLGC %26, 7, $noreg :: (dereferenceable load (s8) from @g_352 + 7, !tbaa !8) %28:gr64bit = COPY killed %27 %28:gr64bit = RNSBG %28, killed %24, 0, 63, 0, implicit-def dead $cc CGHI killed %28, 1, implicit-def $cc %30:gr64bit = COPY %29 %30:gr64bit = LOCGHI %30, 1, 14, 6, implicit killed $cc - %31:addr64bit = LG killed %21, 0, $noreg :: (load 8 from %ir.tmp15) - STG killed %30, killed %31, 0, $noreg :: (store 8 into %ir.tmp22) + %31:addr64bit = LG killed %21, 0, $noreg :: (load (s64) from %ir.tmp15) + STG killed %30, killed %31, 0, $noreg :: (store (s64) into %ir.tmp22) %34:gr32bit = COPY %33 - %34:gr32bit = X %34, %32, 0, $noreg, implicit-def dead $cc :: (dereferenceable load 4 from @g_334, !tbaa !6) - STRL killed %34, @g_334 :: (store 4 into @g_334, !tbaa !6) + %34:gr32bit = X %34, %32, 0, $noreg, implicit-def dead $cc :: (dereferenceable load (s32) from @g_334, !tbaa !6) + STRL killed %34, @g_334 :: (store (s32) into @g_334, !tbaa !6) %35:gr32bit = LLCRMux killed %4 %36:gr32bit = COPY killed %35 %36:gr32bit = MHI %36, 26036 - %7:addr64bit = LG %1, 0, $noreg :: (dereferenceable load 8 from %ir.tmp5) + %7:addr64bit = LG %1, 0, $noreg :: (dereferenceable load (s64) from %ir.tmp5) bb.3.bb28: %37:gr32bit = COPY killed %36 %37:gr32bit = MSR %37, killed %3 %38:gr32bit = LLHRMux killed %37 - STRL killed %38, @g_69 :: (store 4 into @g_69, !tbaa !6) - MVI %39, 0, 0 :: (store 1 into @g_226, align 2, !tbaa !1) + STRL killed %38, @g_69 :: (store (s32) into @g_69, !tbaa !6) + MVI %39, 0, 0 :: (store (s8) into @g_226, align 2, !tbaa !1) J %bb.4 bb.4.bb32: - STHRL %33, @g_1055 :: (store 2 into @g_1055, !tbaa !10) - STGRL %29, @g_352 :: (store 8 into @g_352, !tbaa !8) - STG %32, undef %43:addr64bit, 0, $noreg :: (store 8 into `i32** undef`) - STGRL %44, @g_352 :: (store 8 into @g_352, !tbaa !8) - STG %32, $noreg, 0, $noreg :: (store 8 into `i32** null`) + STHRL %33, @g_1055 :: (store (s16) into @g_1055, !tbaa !10) + STGRL %29, @g_352 :: (store (s64) into @g_352, !tbaa !8) + STG %32, undef %43:addr64bit, 0, $noreg :: (store (s64) into `i32** undef`) + STGRL %44, @g_352 :: (store (s64) into @g_352, !tbaa !8) + STG %32, $noreg, 0, $noreg :: (store (s64) into `i32** null`) bb.5.bb34: successors: %bb.4(0x7c000000), %bb.6(0x04000000) @@ -238,16 +238,16 @@ body: | J %bb.6 bb.6.bb35: - STG %46, undef %47:addr64bit, 0, $noreg :: (store 8 into `i32** undef`) - MVI %48, 0, 0 :: (store 1 into @0, align 2, !tbaa !1) - STHRL %49, @g_189 :: (store 2 into @g_189, !tbaa !10) - MVI %50, 0, 1 :: (store 1 into @g_54, align 2, !tbaa !1) - MVI %51, 0, 1 :: (store 1 into @g_747, align 2) - MVGHI undef %52:addr64bit, 0, 0 :: (store 8 into `i64* undef`) - %53:gr64bit = LG $noreg, 0, $noreg :: (load 8 from %ir.tmp36) - %54:addr64bit = LG killed %7, 0, $noreg :: (load 8 from %ir.tmp27) - STG killed %53, killed %54, 0, $noreg :: (store 8 into %ir.tmp38) - STHRL %14, @g_189 :: (store 2 into @g_189, !tbaa !10) + STG %46, undef %47:addr64bit, 0, $noreg :: (store (s64) into `i32** undef`) + MVI %48, 0, 0 :: (store (s8) into @0, align 2, !tbaa !1) + STHRL %49, @g_189 :: (store (s16) into @g_189, !tbaa !10) + MVI %50, 0, 1 :: (store (s8) into @g_54, align 2, !tbaa !1) + MVI %51, 0, 1 :: (store (s8) into @g_747, align 2) + MVGHI undef %52:addr64bit, 0, 0 :: (store (s64) into `i64* undef`) + %53:gr64bit = LG $noreg, 0, $noreg :: (load (s64) from %ir.tmp36) + %54:addr64bit = LG killed %7, 0, $noreg :: (load (s64) from %ir.tmp27) + STG killed %53, killed %54, 0, $noreg :: (store (s64) into %ir.tmp38) + STHRL %14, @g_189 :: (store (s16) into @g_189, !tbaa !10) %60:grx32bit = LHIMux 0 %64:grx32bit = COPY killed %60 J %bb.8 @@ -259,7 +259,7 @@ body: | successors: %bb.1(0x7fffffff), %bb.9(0x00000001) %8:grx32bit = COPY killed %64 - dead %59:grx32bit = LBMux killed %5, 0, $noreg :: (volatile load 1 from %ir.tmp12, !tbaa !1) + dead %59:grx32bit = LBMux killed %5, 0, $noreg :: (volatile load (s8) from %ir.tmp12, !tbaa !1) %9:grx32bit = COPY killed %2 %9:grx32bit = AHIMux %9, 1, implicit-def dead $cc %58:grx32bit = LHIMux 0 diff --git a/llvm/test/CodeGen/SystemZ/subregliveness-07.mir b/llvm/test/CodeGen/SystemZ/subregliveness-07.mir index e8f951ea2a159..f3e190392e0be 100644 --- a/llvm/test/CodeGen/SystemZ/subregliveness-07.mir +++ b/llvm/test/CodeGen/SystemZ/subregliveness-07.mir @@ -65,7 +65,7 @@ body: | J %bb.6 bb.6: - %6:grx32bit = LBMux %17, 0, $noreg :: (dereferenceable load 1 from @g_105, align 2) + %6:grx32bit = LBMux %17, 0, $noreg :: (dereferenceable load (s8) from @g_105, align 2) %19:grx32bit = COPY killed %6 %20:gr32bit = COPY killed %4 %21:gr32bit = IMPLICIT_DEF @@ -73,6 +73,6 @@ body: | bb.7: %18:addr64bit = LARL @g_149 - STCMux killed %4, killed %18, 0, $noreg :: (store 1 into @g_149, align 2) + STCMux killed %4, killed %18, 0, $noreg :: (store (s8) into @g_149, align 2) ... diff --git a/llvm/test/CodeGen/Thumb/PR36658.mir b/llvm/test/CodeGen/Thumb/PR36658.mir index 3c988597b932f..26721d68cc937 100644 --- a/llvm/test/CodeGen/Thumb/PR36658.mir +++ b/llvm/test/CodeGen/Thumb/PR36658.mir @@ -226,7 +226,7 @@ body: | renamable $r0, dead $cpsr = tLSLri killed renamable $r2, 2, 14, $noreg renamable $r1 = tLEApcrelJT %jump-table.1, 14, $noreg - renamable $r0 = tLDRr killed renamable $r1, killed renamable $r0, 14, $noreg :: (load 4 from jump-table) + renamable $r0 = tLDRr killed renamable $r1, killed renamable $r0, 14, $noreg :: (load (s32) from jump-table) tBR_JTr killed renamable $r0, %jump-table.1 bb.19.sw.bb93: @@ -244,7 +244,7 @@ body: | renamable $r1, dead $cpsr = tLSLri killed renamable $r2, 2, 14, $noreg renamable $r2 = tLEApcrelJT %jump-table.0, 14, $noreg - renamable $r2 = tLDRr killed renamable $r2, killed renamable $r1, 14, $noreg :: (load 4 from jump-table) + renamable $r2 = tLDRr killed renamable $r2, killed renamable $r1, 14, $noreg :: (load (s32) from jump-table) $r1 = tMOVSr $r0, implicit-def dead $cpsr tBR_JTr killed renamable $r2, %jump-table.0 @@ -284,7 +284,7 @@ body: | liveins: $r4 renamable $r0, dead $cpsr = tMOVi8 1, 14, $noreg - tSTRi killed renamable $r0, killed renamable $r4, 0, 14, $noreg :: (store 4 into %ir.ignore_ptr) + tSTRi killed renamable $r0, killed renamable $r4, 0, 14, $noreg :: (store (s32) into %ir.ignore_ptr) renamable $r1, dead $cpsr = tMOVi8 0, 14, $noreg tB %bb.28, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb/machine-cse-deadreg.mir b/llvm/test/CodeGen/Thumb/machine-cse-deadreg.mir index 30ca5513a456d..e4db7abeea354 100644 --- a/llvm/test/CodeGen/Thumb/machine-cse-deadreg.mir +++ b/llvm/test/CodeGen/Thumb/machine-cse-deadreg.mir @@ -38,9 +38,9 @@ body: | %1:tgpr = COPY $r1 %0:tgpr = COPY $r0 - %2:tgpr = tLDRpci %const.0, 14, $noreg :: (load 4 from constant-pool) + %2:tgpr = tLDRpci %const.0, 14, $noreg :: (load (s32) from constant-pool) %3:tgpr, dead $cpsr = tADDrr %0, %2, 14, $noreg - %4:tgpr = tLDRpci %const.1, 14, $noreg :: (load 4 from constant-pool) + %4:tgpr = tLDRpci %const.1, 14, $noreg :: (load (s32) from constant-pool) %5:tgpr, $cpsr = tADDrr %0, %2, 14, $noreg %6:tgpr, $cpsr = tADC %1, killed %4, 14, $noreg, implicit $cpsr tBcc %bb.2, 3, $cpsr @@ -81,9 +81,9 @@ body: | %1:tgpr = COPY $r1 %0:tgpr = COPY $r0 - %2:tgpr = tLDRpci %const.0, 14, $noreg :: (load 4 from constant-pool) + %2:tgpr = tLDRpci %const.0, 14, $noreg :: (load (s32) from constant-pool) %3:tgpr, dead $cpsr = tADDrr %0, %2, 14, $noreg - %4:tgpr = tLDRpci %const.1, 14, $noreg :: (load 4 from constant-pool) + %4:tgpr = tLDRpci %const.1, 14, $noreg :: (load (s32) from constant-pool) %5:tgpr, dead $cpsr = tADDrr %0, %2, 14, $noreg %6:tgpr, $cpsr = tADDrr %1, killed %4, 14, $noreg tBcc %bb.2, 3, $cpsr diff --git a/llvm/test/CodeGen/Thumb/tbb-reuse.mir b/llvm/test/CodeGen/Thumb/tbb-reuse.mir index 5eb40dc09831b..b18a68528c596 100644 --- a/llvm/test/CodeGen/Thumb/tbb-reuse.mir +++ b/llvm/test/CodeGen/Thumb/tbb-reuse.mir @@ -5,17 +5,17 @@ source_filename = "" target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv6m--none-eabi" - + declare void @exit0() - + declare void @exit1(i32) - + declare void @exit2() - + declare void @exit3() - + declare void @exit4() - + define void @jump_table(i32 %val, i32 %arg2, i32 %arg3, i32 %arg4) { entry: switch i32 %val, label %default [ @@ -24,33 +24,33 @@ i32 3, label %lab3 i32 4, label %lab4 ] - + default: ; preds = %entry tail call void @exit0() ret void - + lab1: ; preds = %entry %b = sub i32 %val, 1 %a = shl i32 %b, 2 tail call void @exit1(i32 %a) ret void - + lab2: ; preds = %entry tail call void @exit2() ret void - + lab3: ; preds = %entry tail call void @exit3() ret void - + lab4: ; preds = %entry tail call void @exit4() ret void } - + ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #0 - + attributes #0 = { nounwind } ... @@ -62,23 +62,23 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -liveins: +liveins: - { reg: '$r0' } -calleeSavedRegisters: [ '$lr', '$d8', '$d9', '$d10', '$d11', '$d12', '$d13', - '$d14', '$d15', '$q4', '$q5', '$q6', '$q7', '$r4', - '$r5', '$r6', '$r7', '$r8', '$r9', '$r10', '$r11', - '$s16', '$s17', '$s18', '$s19', '$s20', '$s21', - '$s22', '$s23', '$s24', '$s25', '$s26', '$s27', - '$s28', '$s29', '$s30', '$s31', '$d8_d10', '$d9_d11', - '$d10_d12', '$d11_d13', '$d12_d14', '$d13_d15', - '$q4_q5', '$q5_q6', '$q6_q7', '$q4_q5_q6_q7', '$r4_r5', - '$r6_r7', '$r8_r9', '$r10_r11', '$d8_d9_d10', '$d9_d10_d11', - '$d10_d11_d12', '$d11_d12_d13', '$d12_d13_d14', - '$d13_d14_d15', '$d8_d10_d12', '$d9_d11_d13', '$d10_d12_d14', - '$d11_d13_d15', '$d8_d10_d12_d14', '$d9_d11_d13_d15', - '$d9_d10', '$d11_d12', '$d13_d14', '$d9_d10_d11_d12', +calleeSavedRegisters: [ '$lr', '$d8', '$d9', '$d10', '$d11', '$d12', '$d13', + '$d14', '$d15', '$q4', '$q5', '$q6', '$q7', '$r4', + '$r5', '$r6', '$r7', '$r8', '$r9', '$r10', '$r11', + '$s16', '$s17', '$s18', '$s19', '$s20', '$s21', + '$s22', '$s23', '$s24', '$s25', '$s26', '$s27', + '$s28', '$s29', '$s30', '$s31', '$d8_d10', '$d9_d11', + '$d10_d12', '$d11_d13', '$d12_d14', '$d13_d15', + '$q4_q5', '$q5_q6', '$q6_q7', '$q4_q5_q6_q7', '$r4_r5', + '$r6_r7', '$r8_r9', '$r10_r11', '$d8_d9_d10', '$d9_d10_d11', + '$d10_d11_d12', '$d11_d12_d13', '$d12_d13_d14', + '$d13_d14_d15', '$d8_d10_d12', '$d9_d11_d13', '$d10_d12_d14', + '$d11_d13_d15', '$d8_d10_d12_d14', '$d9_d11_d13_d15', + '$d9_d10', '$d11_d12', '$d13_d14', '$d9_d10_d11_d12', '$d11_d12_d13_d14' ] -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -92,22 +92,22 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false -stack: +stack: - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr', callee-saved-restored: false } - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r7' } -jumpTable: +jumpTable: kind: inline - entries: + entries: - id: 0 blocks: [ '%bb.3.lab1', '%bb.4.lab2', '%bb.5.lab3', '%bb.6.lab4' ] # r1 is redefined in the middle of the recognizable jump sequence - it shouldn't be clobbered! -# CHECK-NOT: tTBB_JT +# CHECK-NOT: tTBB_JT body: | bb.0.entry: successors: %bb.2.default(0x19999998), %bb.1.entry(0x66666668) liveins: $r0, $r7, $lr - + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset $lr, -4 @@ -115,35 +115,35 @@ body: | $r1, dead $cpsr = tSUBi3 $r0, 1, 14, $noreg tCMPi8 $r1, 3, 14, $noreg, implicit-def $cpsr tBcc %bb.2.default, 8, killed $cpsr - + bb.1.entry: successors: %bb.3.lab1(0x20000000), %bb.4.lab2(0x20000000), %bb.5.lab3(0x20000000), %bb.6.lab4(0x20000000) liveins: $r0, $r1 - + $r1, dead $cpsr = tLSLri killed $r1, 2, 14, $noreg $r2 = tLEApcrelJT %jump-table.0, 14, $noreg - $r2 = tLDRr killed $r1, killed $r2, 14, $noreg :: (load 4 from jump-table) + $r2 = tLDRr killed $r1, killed $r2, 14, $noreg :: (load (s32) from jump-table) $r1, dead $cpsr = tLSLri $r2, 2, 14, $noreg tBR_JTr killed $r2, %jump-table.0 - + bb.2.default: tBL 14, $noreg, @exit0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp tPOP_RET 14, $noreg, def $r7, def $pc, implicit-def $sp, implicit $sp - + bb.3.lab1: liveins: $r0,$r1 - + tBL 14, $noreg, @exit1, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp tPOP_RET 14, $noreg, def $r7, def $pc, implicit-def $sp, implicit $sp - + bb.4.lab2: tBL 14, $noreg, @exit2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp tPOP_RET 14, $noreg, def $r7, def $pc, implicit-def $sp, implicit $sp - + bb.5.lab3: tBL 14, $noreg, @exit3, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp tPOP_RET 14, $noreg, def $r7, def $pc, implicit-def $sp, implicit $sp - + bb.6.lab4: tBL 14, $noreg, @exit4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp tPOP_RET 14, $noreg, def $r7, def $pc, implicit-def $sp, implicit $sp diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir index c130d500bcc24..4b39743fc79da 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir @@ -157,16 +157,16 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -16 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -20 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -24 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 48, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.6, align 8) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 48, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.6, align 8) ; CHECK: renamable $r5 = t2ADDri renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r7, dead $cpsr = tLSRri killed renamable $r5, 2, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2WLS renamable $r7, %bb.3 ; CHECK: bb.1.for.body.lr.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r7, $r12 - ; CHECK: $r6, $r5 = t2LDRDi8 $sp, 40, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.4, align 8), (load 4 from %fixed-stack.5) + ; CHECK: $r6, $r5 = t2LDRDi8 $sp, 40, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.4, align 8), (load (s32) from %fixed-stack.5) ; CHECK: $r4 = tMOVr killed $r7, 14 /* CC::al */, $noreg - ; CHECK: $r7, $r8 = t2LDRDi8 $sp, 24, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8), (load 4 from %fixed-stack.1) + ; CHECK: $r7, $r8 = t2LDRDi8 $sp, 24, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8), (load (s32) from %fixed-stack.1) ; CHECK: renamable $q0 = MVE_VDUP32 killed renamable $r5, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r6, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r5, dead $cpsr = tSUBi3 killed renamable $r7, 4, 14 /* CC::al */, $noreg @@ -175,9 +175,9 @@ body: | ; CHECK: liveins: $q0, $q1, $r0, $r1, $r2, $r3, $r4, $r5, $r8, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load 16 from %ir.input_2_cast, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load (s128) from %ir.input_2_cast, align 4) ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load 16 from %ir.input_1_cast, align 4) + ; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load (s128) from %ir.input_1_cast, align 4) ; CHECK: renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, undef renamable $q3 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg @@ -189,7 +189,7 @@ body: | ; CHECK: renamable $q2 = MVE_VMAXu32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2 ; CHECK: renamable $q2 = MVE_VMINu32 killed renamable $q2, renamable $q0, 1, killed renamable $vpr, undef renamable $q2 ; CHECK: renamable $r6 = MVE_VADDVu32no_acc killed renamable $q2, 0, $noreg - ; CHECK: early-clobber renamable $r5 = t2STR_PRE killed renamable $r6, killed renamable $r5, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep2) + ; CHECK: early-clobber renamable $r5 = t2STR_PRE killed renamable $r6, killed renamable $r5, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep2) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -206,7 +206,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r6, -16 frame-setup CFI_INSTRUCTION offset $r5, -20 frame-setup CFI_INSTRUCTION offset $r4, -24 - renamable $r12 = t2LDRi12 $sp, 48, 14, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r12 = t2LDRi12 $sp, 48, 14, $noreg :: (load (s32) from %fixed-stack.0, align 8) renamable $r5 = t2ADDri renamable $r12, 3, 14, $noreg, $noreg renamable $r7, dead $cpsr = tLSRri killed renamable $r5, 2, 14, $noreg $lr = t2WhileLoopStartLR renamable $r7, %bb.3, implicit-def dead $cpsr @@ -216,9 +216,9 @@ body: | successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $r3, $r7, $r12 - $r6, $r5 = t2LDRDi8 $sp, 40, 14, $noreg :: (load 4 from %fixed-stack.2, align 8), (load 4 from %fixed-stack.1) + $r6, $r5 = t2LDRDi8 $sp, 40, 14, $noreg :: (load (s32) from %fixed-stack.2, align 8), (load (s32) from %fixed-stack.1) $r4 = tMOVr killed $r7, 14, $noreg - $r7, $r8 = t2LDRDi8 $sp, 24, 14, $noreg :: (load 4 from %fixed-stack.6, align 8), (load 4 from %fixed-stack.5) + $r7, $r8 = t2LDRDi8 $sp, 24, 14, $noreg :: (load (s32) from %fixed-stack.6, align 8), (load (s32) from %fixed-stack.5) renamable $q0 = MVE_VDUP32 killed renamable $r5, 0, $noreg, undef renamable $q0 renamable $q1 = MVE_VDUP32 killed renamable $r6, 0, $noreg, undef renamable $q1 renamable $r5, dead $cpsr = tSUBi3 killed renamable $r7, 4, 14, $noreg @@ -229,9 +229,9 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load 16 from %ir.input_2_cast, align 4) + renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load (s128) from %ir.input_2_cast, align 4) MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load 16 from %ir.input_1_cast, align 4) + renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load (s128) from %ir.input_1_cast, align 4) renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, undef renamable $q2 renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, undef renamable $q3 $lr = tMOVr $r4, 14, $noreg @@ -243,7 +243,7 @@ body: | renamable $q2 = MVE_VMAXu32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2 renamable $q2 = MVE_VMINu32 killed renamable $q2, renamable $q0, 1, killed renamable $vpr, undef renamable $q2 renamable $r6 = MVE_VADDVu32no_acc killed renamable $q2, 0, $noreg - early-clobber renamable $r5 = t2STR_PRE killed renamable $r6, killed renamable $r5, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + early-clobber renamable $r5 = t2STR_PRE killed renamable $r6, killed renamable $r5, 4, 14, $noreg :: (store (s32) into %ir.scevgep2) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir index 1930acad3ec49..3f8f9eb6d91eb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/begin-vpt-without-inst.mir @@ -65,7 +65,7 @@ body: | ; CHECK: liveins: $r0 ; CHECK: renamable $r1 = tLEApcrel %const.0, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 3, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $q1 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + ; CHECK: renamable $q1 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load (s128) from constant-pool, align 8) ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @arr, 14 /* CC::al */, $noreg ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @arr, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCMPu32 killed renamable $q0, killed renamable $q1, 8, 0, $noreg @@ -75,7 +75,7 @@ body: | ; CHECK: liveins: $vpr, $q0, $r0, $r1 ; CHECK: renamable $r0, $cpsr = tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr :: (store 16 into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4) + ; CHECK: MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr :: (store (s128) into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4) ; CHECK: tBcc %bb.2, 3 /* CC::lo */, killed $cpsr ; CHECK: bb.3.for.end5: ; CHECK: tBX_RET 14 /* CC::al */, $noreg @@ -93,7 +93,7 @@ body: | renamable $r1 = tLEApcrel %const.0, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 3, 0, $noreg, undef renamable $q0 - renamable $q1 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + renamable $q1 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load (s128) from constant-pool, align 8) $r1 = t2MOVi16 target-flags(arm-lo16) @arr, 14 /* CC::al */, $noreg $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @arr, 14 /* CC::al */, $noreg renamable $vpr = MVE_VCMPu32 killed renamable $q0, killed renamable $q1, 8, 0, $noreg @@ -105,7 +105,7 @@ body: | renamable $r0, $cpsr = tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg MVE_VPST 8, implicit $vpr - MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr :: (store 16 into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4) + MVE_VSTRWU32 renamable $q0, renamable $r1, 0, 1, renamable $vpr :: (store (s128) into `<4 x i32>* bitcast ([0 x i32]* @arr to <4 x i32>*)`, align 4) tBcc %bb.2, 3 /* CC::lo */, killed $cpsr bb.3.for.end5: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir index f523af1a7d43d..ffacf041e954a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir @@ -215,20 +215,20 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 ; CHECK: $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 76 - ; CHECK: $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + ; CHECK: $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i7), (load (s32) from %ir.i10) ; CHECK: $r8 = tMOVr killed $r3, 14 /* CC::al */, $noreg - ; CHECK: $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + ; CHECK: $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i), (load (s32) from %ir.i5) ; CHECK: renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store (s32) into %stack.9), (store (s32) into %stack.8), (store (s32) into %stack.7) ; CHECK: $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg - ; CHECK: renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + ; CHECK: renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.9) ; CHECK: tB %bb.2, 14 /* CC::al */, $noreg ; CHECK: bb.1.bb74 (align 4): ; CHECK: successors: %bb.6(0x04000000), %bb.2(0x7c000000) ; CHECK: liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 ; CHECK: renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 20, 14 /* CC::al */, $noreg - ; CHECK: t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) - ; CHECK: t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + ; CHECK: t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i14), (store (s32) into %ir.i81) + ; CHECK: t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i84), (store (s32) into %ir.i88) ; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $r5, $cpsr = tSUBi8 killed renamable $r5, 1, 14 /* CC::al */, $noreg ; CHECK: $r1 = tMOVr $r12, 14 /* CC::al */, $noreg @@ -236,42 +236,42 @@ body: | ; CHECK: bb.2.bb12: ; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $r1, $r2, $r3, $r5, $r7, $r8, $r12 - ; CHECK: $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) - ; CHECK: $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + ; CHECK: $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i14), (load (s32) from %ir.i20) + ; CHECK: $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i22), (load (s32) from %ir.i24) ; CHECK: dead $lr = t2SUBri renamable $r8, 0, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: tBcc %bb.1, 0 /* CC::eq */, killed $cpsr ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg ; CHECK: bb.3.bb27: ; CHECK: successors: %bb.4(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12 - ; CHECK: t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) - ; CHECK: renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) - ; CHECK: tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) - ; CHECK: renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) - ; CHECK: tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - ; CHECK: renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) - ; CHECK: tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) - ; CHECK: renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) - ; CHECK: t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) - ; CHECK: renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + ; CHECK: t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store (s32) into %stack.6), (store (s32) into %stack.5) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i13) + ; CHECK: tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i34) + ; CHECK: tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i32) + ; CHECK: tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i30) + ; CHECK: t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4), (store (s32) into %stack.3) + ; CHECK: renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i28) ; CHECK: bb.4.bb37 (align 4): ; CHECK: successors: %bb.4(0x7c000000), %bb.5(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r4, $r6, $r8, $r9, $r10, $r12 ; CHECK: $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) - ; CHECK: renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + ; CHECK: renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) + ; CHECK: renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) ; CHECK: renamable $r6, renamable $r11 = t2SMULL $r9, killed renamable $r6, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r4, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + ; CHECK: renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load (s32) from %stack.3) ; CHECK: $r5 = tMOVr killed $r9, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i38) ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: $lr = tMOVr $r8, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i39) ; CHECK: dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = tMOVr $r7, 14 /* CC::al */, $noreg @@ -284,8 +284,8 @@ body: | ; CHECK: $r0 = tMOVr killed $r7, 14 /* CC::al */, $noreg ; CHECK: $r7 = tADDrSPi $sp, 3, 14 /* CC::al */, $noreg ; CHECK: $r4 = tMOVr killed $r5, 14 /* CC::al */, $noreg - ; CHECK: $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8), (load 4 from %stack.7) - ; CHECK: tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load 4 from %stack.6), (load 4 from %stack.5), (load 4 from %stack.4) + ; CHECK: $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.8), (load (s32) from %stack.7) + ; CHECK: tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load (s32) from %stack.6), (load (s32) from %stack.5), (load (s32) from %stack.4) ; CHECK: tB %bb.1, 14 /* CC::al */, $noreg ; CHECK: bb.6.bb91: ; CHECK: $sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg @@ -307,13 +307,13 @@ body: | frame-setup CFI_INSTRUCTION offset $r4, -36 $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 76 - $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i7), (load (s32) from %ir.i10) $r8 = tMOVr killed $r3, 14 /* CC::al */, $noreg - $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i), (load (s32) from %ir.i5) renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg - t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store (s32) into %stack.9), (store (s32) into %stack.8), (store (s32) into %stack.7) $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg - renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.9) tB %bb.2, 14 /* CC::al */, $noreg bb.1.bb74 (align 4): @@ -321,8 +321,8 @@ body: | liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 20, 14 /* CC::al */, $noreg - t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) - t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i14), (store (s32) into %ir.i81) + t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i84), (store (s32) into %ir.i88) renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg renamable $r5, $cpsr = tSUBi8 killed renamable $r5, 1, 14 /* CC::al */, $noreg $r1 = tMOVr $r12, 14 /* CC::al */, $noreg @@ -332,8 +332,8 @@ body: | successors: %bb.3(0x40000000), %bb.1(0x40000000) liveins: $r1, $r3, $r5, $r7, $r8, $r12, $r2 - $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) - $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i14), (load (s32) from %ir.i20) + $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i22), (load (s32) from %ir.i24) $lr = t2WhileLoopStartLR renamable $r8, %bb.1, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -341,36 +341,36 @@ body: | successors: %bb.4(0x80000000) liveins: $r0, $r1, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 - t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) - renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) - tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) - renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) - tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) - tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) - renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) - t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) - renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store (s32) into %stack.6), (store (s32) into %stack.5) + renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i13) + tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) + renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i34) + tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) + renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i32) + tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) + renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i30) + t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4), (store (s32) into %stack.3) + renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i28) bb.4.bb37 (align 4): successors: %bb.4(0x7c000000), %bb.5(0x04000000) liveins: $r0, $r1, $r2, $r4, $r6, $r8, $r9, $r10, $r12 $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg - renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) - renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) + renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) renamable $r6, renamable $r11 = t2SMULL $r9, killed renamable $r6, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMLAL killed renamable $r4, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load (s32) from %stack.3) $r5 = tMOVr killed $r9, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i38) renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) $lr = tMOVr $r8, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg - early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i39) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg $r0 = tMOVr $r7, 14 /* CC::al */, $noreg @@ -385,8 +385,8 @@ body: | $r0 = tMOVr killed $r7, 14 /* CC::al */, $noreg $r7 = tADDrSPi $sp, 3, 14 /* CC::al */, $noreg $r4 = tMOVr killed $r5, 14 /* CC::al */, $noreg - $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8), (load 4 from %stack.7) - tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load 4 from %stack.6), (load 4 from %stack.5), (load 4 from %stack.4) + $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.8), (load (s32) from %stack.7) + tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load (s32) from %stack.6), (load (s32) from %stack.5), (load (s32) from %stack.4) tB %bb.1, 14 /* CC::al */, $noreg bb.6.bb91: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir index 6f35f122129b1..10574ba7320e6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir @@ -213,44 +213,44 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 ; CHECK: $sp = frame-setup tSUBspi $sp, 8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 68 - ; CHECK: $r6, $r4 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) - ; CHECK: $r7, $r5 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + ; CHECK: $r6, $r4 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i7), (load (s32) from %ir.i10) + ; CHECK: $r7, $r5 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i), (load (s32) from %ir.i5) ; CHECK: renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r3 :: (store 4 into %stack.7), (store 4 into %stack.6), (store 4 into %stack.5) + ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r3 :: (store (s32) into %stack.7), (store (s32) into %stack.6), (store (s32) into %stack.5) ; CHECK: $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg - ; CHECK: renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.7) + ; CHECK: renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.7) ; CHECK: bb.1.bb12 (align 4): ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; CHECK: liveins: $r1, $r2, $r3, $r4, $r5, $r7, $r12 - ; CHECK: $r10, $r0 = t2LDRDi8 $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) - ; CHECK: $r6, $r8 = t2LDRDi8 $r7, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + ; CHECK: $r10, $r0 = t2LDRDi8 $r7, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i14), (load (s32) from %ir.i20) + ; CHECK: $r6, $r8 = t2LDRDi8 $r7, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i22), (load (s32) from %ir.i24) ; CHECK: $lr = t2WLS renamable $r3, %bb.5 ; CHECK: bb.2.bb27: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r4, $r5, $r6, $r7, $r8, $r10, $r12 - ; CHECK: renamable $r3 = tLDRi renamable $r5, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) - ; CHECK: t2STRDi8 killed $r7, killed $r4, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) - ; CHECK: tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) - ; CHECK: renamable $r3 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) - ; CHECK: renamable $r4 = tLDRi renamable $r5, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) - ; CHECK: tSTRspi killed renamable $r3, $sp, 6, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - ; CHECK: $r9, $r3 = t2LDRDi8 $r5, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32), (load 4 from %ir.i30) - ; CHECK: tSTRspi killed renamable $r5, $sp, 5, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + ; CHECK: renamable $r3 = tLDRi renamable $r5, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i13) + ; CHECK: t2STRDi8 killed $r7, killed $r4, $sp, 12, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4), (store (s32) into %stack.3) + ; CHECK: tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) + ; CHECK: renamable $r3 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i34) + ; CHECK: renamable $r4 = tLDRi renamable $r5, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i28) + ; CHECK: tSTRspi killed renamable $r3, $sp, 6, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) + ; CHECK: $r9, $r3 = t2LDRDi8 $r5, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i32), (load (s32) from %ir.i30) + ; CHECK: tSTRspi killed renamable $r5, $sp, 5, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) ; CHECK: bb.3.bb37 (align 4): ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r6, $r8, $r9, $r10, $r12 ; CHECK: $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + ; CHECK: renamable $r6 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) ; CHECK: $r5 = tMOVr $r10, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMULL killed $r10, killed renamable $r6, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r9, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: renamable $r10, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + ; CHECK: renamable $r10, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i38) ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r7, renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $r0 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r10, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i39) ; CHECK: $r8 = tMOVr $r7, 14 /* CC::al */, $noreg ; CHECK: $r0 = tMOVr $r5, 14 /* CC::al */, $noreg ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 @@ -259,15 +259,15 @@ body: | ; CHECK: liveins: $r2, $r5, $r6, $r7, $r10 ; CHECK: $r0 = tMOVr killed $r5, 14 /* CC::al */, $noreg ; CHECK: $r8 = tMOVr killed $r7, 14 /* CC::al */, $noreg - ; CHECK: $r12, $r3 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.6), (load 4 from %stack.5) - ; CHECK: renamable $r5 = tLDRspi $sp, 5, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) - ; CHECK: $r7, $r4 = t2LDRDi8 $sp, 12, 14 /* CC::al */, $noreg :: (load 4 from %stack.4), (load 4 from %stack.3) + ; CHECK: $r12, $r3 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.6), (load (s32) from %stack.5) + ; CHECK: renamable $r5 = tLDRspi $sp, 5, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) + ; CHECK: $r7, $r4 = t2LDRDi8 $sp, 12, 14 /* CC::al */, $noreg :: (load (s32) from %stack.4), (load (s32) from %stack.3) ; CHECK: bb.5.bb74: ; CHECK: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; CHECK: liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r10, $r12, $r2 ; CHECK: renamable $r5, dead $cpsr = nuw tADDi8 killed renamable $r5, 20, 14 /* CC::al */, $noreg - ; CHECK: t2STRDi8 killed $r10, killed $r0, $r7, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) - ; CHECK: t2STRDi8 killed $r6, killed $r8, $r7, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + ; CHECK: t2STRDi8 killed $r10, killed $r0, $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i14), (store (s32) into %ir.i81) + ; CHECK: t2STRDi8 killed $r6, killed $r8, $r7, 8, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i84), (store (s32) into %ir.i88) ; CHECK: renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $r4, $cpsr = tSUBi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg ; CHECK: $r1 = tMOVr $r12, 14 /* CC::al */, $noreg @@ -292,19 +292,19 @@ body: | frame-setup CFI_INSTRUCTION offset $r4, -36 $sp = frame-setup tSUBspi $sp, 8, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 68 - $r6, $r4 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) - $r7, $r5 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + $r6, $r4 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i7), (load (s32) from %ir.i10) + $r7, $r5 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i), (load (s32) from %ir.i5) renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg - t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r3 :: (store 4 into %stack.7), (store 4 into %stack.6), (store 4 into %stack.5) + t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r3 :: (store (s32) into %stack.7), (store (s32) into %stack.6), (store (s32) into %stack.5) $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg - renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.7) + renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.7) bb.1.bb12 (align 4): successors: %bb.2(0x40000000), %bb.5(0x40000000) liveins: $r1, $r3, $r4, $r5, $r7, $r12, $r2 - $r10, $r0 = t2LDRDi8 $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) - $r6, $r8 = t2LDRDi8 $r7, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + $r10, $r0 = t2LDRDi8 $r7, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i14), (load (s32) from %ir.i20) + $r6, $r8 = t2LDRDi8 $r7, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i22), (load (s32) from %ir.i24) renamable $lr = t2WhileLoopStartLR renamable $r3, %bb.5, implicit-def dead $cpsr tB %bb.2, 14 /* CC::al */, $noreg @@ -312,31 +312,31 @@ body: | successors: %bb.3(0x80000000) liveins: $lr, $r0, $r1, $r4, $r5, $r6, $r7, $r8, $r10, $r12, $r2 - renamable $r3 = tLDRi renamable $r5, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) - t2STRDi8 killed $r7, killed $r4, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) - tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) - renamable $r3 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) - renamable $r4 = tLDRi renamable $r5, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) - tSTRspi killed renamable $r3, $sp, 6, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - $r9, $r3 = t2LDRDi8 $r5, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32), (load 4 from %ir.i30) - tSTRspi killed renamable $r5, $sp, 5, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + renamable $r3 = tLDRi renamable $r5, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i13) + t2STRDi8 killed $r7, killed $r4, $sp, 12, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4), (store (s32) into %stack.3) + tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) + renamable $r3 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i34) + renamable $r4 = tLDRi renamable $r5, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i28) + tSTRspi killed renamable $r3, $sp, 6, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) + $r9, $r3 = t2LDRDi8 $r5, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i32), (load (s32) from %ir.i30) + tSTRspi killed renamable $r5, $sp, 5, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) bb.3.bb37 (align 4): successors: %bb.3(0x7c000000), %bb.4(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r6, $r8, $r9, $r10, $r12 $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg - renamable $r6 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + renamable $r6 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) $r5 = tMOVr $r10, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMULL killed $r10, killed renamable $r6, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r9, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - renamable $r10, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + renamable $r10, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i38) renamable $r6, renamable $r11 = t2SMLAL renamable $r7, renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - renamable $r0 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + renamable $r0 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMLAL renamable $r10, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg - early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i39) $r8 = tMOVr $r7, 14 /* CC::al */, $noreg $r0 = tMOVr $r5, 14 /* CC::al */, $noreg renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr @@ -348,17 +348,17 @@ body: | $r0 = tMOVr killed $r5, 14 /* CC::al */, $noreg $r8 = tMOVr killed $r7, 14 /* CC::al */, $noreg - $r12, $r3 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.6), (load 4 from %stack.5) - renamable $r5 = tLDRspi $sp, 5, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) - $r7, $r4 = t2LDRDi8 $sp, 12, 14 /* CC::al */, $noreg :: (load 4 from %stack.4), (load 4 from %stack.3) + $r12, $r3 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.6), (load (s32) from %stack.5) + renamable $r5 = tLDRspi $sp, 5, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) + $r7, $r4 = t2LDRDi8 $sp, 12, 14 /* CC::al */, $noreg :: (load (s32) from %stack.4), (load (s32) from %stack.3) bb.5.bb74: successors: %bb.6(0x04000000), %bb.1(0x7c000000) liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r10, $r12, $r2 renamable $r5, dead $cpsr = nuw tADDi8 killed renamable $r5, 20, 14 /* CC::al */, $noreg - t2STRDi8 killed $r10, killed $r0, $r7, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) - t2STRDi8 killed $r6, killed $r8, $r7, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + t2STRDi8 killed $r10, killed $r0, $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i14), (store (s32) into %ir.i81) + t2STRDi8 killed $r6, killed $r8, $r7, 8, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i84), (store (s32) into %ir.i88) renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 16, 14 /* CC::al */, $noreg renamable $r4, $cpsr = tSUBi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg $r1 = tMOVr $r12, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir index e3a1585364778..62a266e3468b3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir @@ -223,50 +223,50 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36 ; CHECK: $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 76 - ; CHECK: $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + ; CHECK: $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i7), (load (s32) from %ir.i10) ; CHECK: $r8 = tMOVr killed $r3, 14 /* CC::al */, $noreg - ; CHECK: $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + ; CHECK: $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i), (load (s32) from %ir.i5) ; CHECK: renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + ; CHECK: t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store (s32) into %stack.9), (store (s32) into %stack.8), (store (s32) into %stack.7) ; CHECK: $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg - ; CHECK: renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + ; CHECK: renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.9) ; CHECK: bb.1.bb12 (align 4): ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; CHECK: liveins: $r1, $r2, $r3, $r5, $r7, $r8, $r12 - ; CHECK: $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) - ; CHECK: $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + ; CHECK: $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i14), (load (s32) from %ir.i20) + ; CHECK: $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i22), (load (s32) from %ir.i24) ; CHECK: dead $lr = t2WLS renamable $r8, %bb.5 ; CHECK: bb.2.bb27: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12 - ; CHECK: t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) - ; CHECK: renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) - ; CHECK: tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) - ; CHECK: renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) - ; CHECK: tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - ; CHECK: renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) - ; CHECK: tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) - ; CHECK: renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) - ; CHECK: t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) - ; CHECK: renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + ; CHECK: t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store (s32) into %stack.6), (store (s32) into %stack.5) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i13) + ; CHECK: tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i34) + ; CHECK: tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i32) + ; CHECK: tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) + ; CHECK: renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i30) + ; CHECK: t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4), (store (s32) into %stack.3) + ; CHECK: renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i28) ; CHECK: bb.3.bb37 (align 4): ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r4, $r6, $r8, $r9, $r10, $r12 ; CHECK: $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) - ; CHECK: renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + ; CHECK: renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) + ; CHECK: renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) ; CHECK: renamable $r6, renamable $r11 = t2SMULL $r9, killed renamable $r6, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r4, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + ; CHECK: renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load (s32) from %stack.3) ; CHECK: $r5 = tMOVr killed $r9, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + ; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i38) ; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: $lr = tMOVr $r8, 14 /* CC::al */, $noreg ; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg ; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + ; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i39) ; CHECK: renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = tMOVr $r7, 14 /* CC::al */, $noreg ; CHECK: $r4 = tMOVr $r5, 14 /* CC::al */, $noreg @@ -277,14 +277,14 @@ body: | ; CHECK: $r0 = tMOVr killed $r7, 14 /* CC::al */, $noreg ; CHECK: $r7 = tADDrSPi $sp, 3, 14 /* CC::al */, $noreg ; CHECK: $r4 = tMOVr killed $r5, 14 /* CC::al */, $noreg - ; CHECK: $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8), (load 4 from %stack.7) - ; CHECK: tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load 4 from %stack.6), (load 4 from %stack.5), (load 4 from %stack.4) + ; CHECK: $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.8), (load (s32) from %stack.7) + ; CHECK: tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load (s32) from %stack.6), (load (s32) from %stack.5), (load (s32) from %stack.4) ; CHECK: bb.5.bb74: ; CHECK: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; CHECK: liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 ; CHECK: renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 20, 14 /* CC::al */, $noreg - ; CHECK: t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) - ; CHECK: t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + ; CHECK: t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i14), (store (s32) into %ir.i81) + ; CHECK: t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i84), (store (s32) into %ir.i88) ; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $r5, $cpsr = tSUBi8 killed renamable $r5, 1, 14 /* CC::al */, $noreg ; CHECK: $r1 = tMOVr $r12, 14 /* CC::al */, $noreg @@ -309,20 +309,20 @@ body: | frame-setup CFI_INSTRUCTION offset $r4, -36 $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 76 - $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i7), (load 4 from %ir.i10) + $r6, $r5 = t2LDRDi8 $r0, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i7), (load (s32) from %ir.i10) $r8 = tMOVr killed $r3, 14 /* CC::al */, $noreg - $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i), (load 4 from %ir.i5) + $r3, $r7 = t2LDRDi8 killed $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i), (load (s32) from %ir.i5) renamable $r0 = t2RSBri killed renamable $r6, 31, 14 /* CC::al */, $noreg, $noreg - t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store 4 into %stack.9), (store 4 into %stack.8), (store 4 into %stack.7) + t2STMIA $sp, 14 /* CC::al */, $noreg, killed $r0, $r2, $r8 :: (store (s32) into %stack.9), (store (s32) into %stack.8), (store (s32) into %stack.7) $r12 = tMOVr killed $r2, 14 /* CC::al */, $noreg - renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.9) + renamable $r2 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.9) bb.1.bb12 (align 4): successors: %bb.2(0x40000000), %bb.5(0x40000000) liveins: $r1, $r3, $r5, $r7, $r8, $r12, $r2 - $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i14), (load 4 from %ir.i20) - $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.i22), (load 4 from %ir.i24) + $r9, $r4 = t2LDRDi8 $r3, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i14), (load (s32) from %ir.i20) + $r6, $r0 = t2LDRDi8 $r3, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i22), (load (s32) from %ir.i24) $lr = t2WhileLoopStartLR renamable $r8, %bb.5, implicit-def dead $cpsr tB %bb.2, 14 /* CC::al */, $noreg @@ -330,36 +330,36 @@ body: | successors: %bb.3(0x80000000) liveins: $r0, $r1, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 - t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store 4 into %stack.6), (store 4 into %stack.5) - renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.i13) - tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) - renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.i34) - tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.i32) - tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) - renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.i30) - t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) - renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %ir.i28) + t2STRDi8 killed $r3, killed $r5, $sp, 12, 14 /* CC::al */, $noreg :: (store (s32) into %stack.6), (store (s32) into %stack.5) + renamable $r3 = tLDRi renamable $r7, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i13) + tSTRspi killed renamable $r3, $sp, 9, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) + renamable $r3 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i34) + tSTRspi killed renamable $r3, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) + renamable $r3 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i32) + tSTRspi killed renamable $r3, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) + renamable $r3 = tLDRi renamable $r7, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i30) + t2STRDi8 $r7, killed $r3, $sp, 20, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4), (store (s32) into %stack.3) + renamable $r10 = t2LDRi12 killed renamable $r7, 16, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i28) bb.3.bb37 (align 4): successors: %bb.3(0x7c000000), %bb.4(0x04000000) liveins: $r0, $r1, $r2, $r4, $r6, $r8, $r9, $r10, $r12 $r7 = tMOVr killed $r6, 14 /* CC::al */, $noreg - renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) - renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + renamable $r6 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) + renamable $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) renamable $r6, renamable $r11 = t2SMULL $r9, killed renamable $r6, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMLAL killed renamable $r4, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) + renamable $r3 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load (s32) from %stack.3) $r5 = tMOVr killed $r9, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMLAL renamable $r7, killed renamable $r3, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38) + renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.i38) renamable $r6, renamable $r11 = t2SMLAL killed renamable $r0, renamable $r10, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg - renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + renamable $r0 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) $lr = tMOVr $r8, 14 /* CC::al */, $noreg renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg - early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39) + early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i39) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg $r0 = tMOVr $r7, 14 /* CC::al */, $noreg @@ -374,16 +374,16 @@ body: | $r0 = tMOVr killed $r7, 14 /* CC::al */, $noreg $r7 = tADDrSPi $sp, 3, 14 /* CC::al */, $noreg $r4 = tMOVr killed $r5, 14 /* CC::al */, $noreg - $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8), (load 4 from %stack.7) - tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load 4 from %stack.6), (load 4 from %stack.5), (load 4 from %stack.4) + $r12, $r8 = t2LDRDi8 $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.8), (load (s32) from %stack.7) + tLDMIA killed $r7, 14 /* CC::al */, $noreg, def $r3, def $r5, def $r7 :: (load (s32) from %stack.6), (load (s32) from %stack.5), (load (s32) from %stack.4) bb.5.bb74: successors: %bb.6(0x04000000), %bb.1(0x7c000000) liveins: $r0, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r12, $r2 renamable $r7, dead $cpsr = nuw tADDi8 killed renamable $r7, 20, 14 /* CC::al */, $noreg - t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.i14), (store 4 into %ir.i81) - t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store 4 into %ir.i84), (store 4 into %ir.i88) + t2STRDi8 killed $r9, killed $r4, $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i14), (store (s32) into %ir.i81) + t2STRDi8 killed $r6, killed $r0, $r3, 8, 14 /* CC::al */, $noreg :: (store (s32) into %ir.i84), (store (s32) into %ir.i88) renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg renamable $r5, $cpsr = tSUBi8 killed renamable $r5, 1, 14 /* CC::al */, $noreg $r1 = tMOVr $r12, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir index fa439a6ca44b1..4421f1444c386 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir @@ -98,8 +98,8 @@ body: | ; CHECK: bb.1.while.body: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep6) - ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep2) + ; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep6) + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep2) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 ; CHECK: bb.2.while.end: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -121,8 +121,8 @@ body: | successors: %bb.1(0x7c000000), %bb.2(0x04000000) liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) - early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store (s32) into %ir.scevgep2) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr tB %bb.2, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir index 88c469c410d14..939d3978b8601 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir @@ -137,8 +137,8 @@ body: | ; CHECK: [[MVE_VCTP16_:%[0-9]+]]:vccr = MVE_VCTP16 [[PHI4]], 0, $noreg ; CHECK: [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[PHI4]], 8, 14 /* CC::al */, $noreg, $noreg ; CHECK: [[COPY7:%[0-9]+]]:gpr = COPY [[t2SUBri1]] - ; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[PHI]], 16, 1, [[MVE_VCTP16_]] :: (load 16 from %ir.lsr.iv35, align 2) - ; CHECK: [[MVE_VLDRHU16_post2:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post3:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[PHI1]], 16, 1, [[MVE_VCTP16_]] :: (load 16 from %ir.lsr.iv12, align 2) + ; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[PHI]], 16, 1, [[MVE_VCTP16_]] :: (load (s128) from %ir.lsr.iv35, align 2) + ; CHECK: [[MVE_VLDRHU16_post2:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post3:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[PHI1]], 16, 1, [[MVE_VCTP16_]] :: (load (s128) from %ir.lsr.iv12, align 2) ; CHECK: [[MVE_VMLADAVas16_:%[0-9]+]]:tgpreven = MVE_VMLADAVas16 [[PHI2]], killed [[MVE_VLDRHU16_post3]], killed [[MVE_VLDRHU16_post1]], 1, [[MVE_VCTP16_]] ; CHECK: [[COPY8:%[0-9]+]]:gpr = COPY [[MVE_VMLADAVas16_]] ; CHECK: [[COPY9:%[0-9]+]]:gpr = COPY [[MVE_VLDRHU16_post2]] @@ -192,8 +192,8 @@ body: | %26:vccr = MVE_VCTP16 %6, 0, $noreg %27:rgpr = t2SUBri %6, 8, 14 /* CC::al */, $noreg, $noreg %7:gpr = COPY %27 - %28:rgpr, %29:mqpr = MVE_VLDRHU16_post %2, 16, 1, %26 :: (load 16 from %ir.lsr.iv35, align 2) - %30:rgpr, %31:mqpr = MVE_VLDRHU16_post %3, 16, 1, %26 :: (load 16 from %ir.lsr.iv12, align 2) + %28:rgpr, %29:mqpr = MVE_VLDRHU16_post %2, 16, 1, %26 :: (load (s128) from %ir.lsr.iv35, align 2) + %30:rgpr, %31:mqpr = MVE_VLDRHU16_post %3, 16, 1, %26 :: (load (s128) from %ir.lsr.iv12, align 2) %32:tgpreven = MVE_VMLADAVas16 %4, killed %31, killed %29, 1, %26 %8:gpr = COPY %32 %9:gpr = COPY %30 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir index ed07bbb625229..afc2916ddb916 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir @@ -170,7 +170,7 @@ body: | ; CHECK: bb.1.loop.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.2.loop.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) @@ -178,15 +178,15 @@ body: | ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2) - ; CHECK: renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 2) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 2) + ; CHECK: renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 2) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg ; CHECK: renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2) + ; CHECK: renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 2) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -206,7 +206,7 @@ body: | successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $r3 - renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) renamable $lr = t2DoLoopStart killed renamable $lr $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg @@ -217,8 +217,8 @@ body: | $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2) - renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 2) + renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 2) + renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 2) renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -226,7 +226,7 @@ body: | $r0 = tMOVr $r1, 14 /* CC::al */, $noreg renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2) + renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 2) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -277,7 +277,7 @@ body: | ; CHECK: bb.1.loop.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4 - ; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.2.loop.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) @@ -285,14 +285,14 @@ body: | ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: liveins: $r4 @@ -313,7 +313,7 @@ body: | successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $r3 - renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) renamable $lr = t2DoLoopStart killed renamable $lr $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg @@ -324,15 +324,15 @@ body: | $lr = tMOVr $r12, 14 /* CC::al */, $noreg renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) - renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) + renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -383,7 +383,7 @@ body: | ; CHECK: bb.1.loop.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4 - ; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.2.loop.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) @@ -391,14 +391,14 @@ body: | ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) - ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) + ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: liveins: $r4 @@ -419,7 +419,7 @@ body: | successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $r3 - renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) renamable $lr = t2DoLoopStart killed renamable $lr $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg @@ -430,15 +430,15 @@ body: | $lr = tMOVr $r12, 14 /* CC::al */, $noreg renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) - renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) + renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) + renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir index 805e2ba6c5ac9..266e8c7e3c358 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir @@ -138,28 +138,28 @@ body: | ; CHECK: renamable $r4, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @mask, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = t2BICri killed renamable $r4, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r5 = t2LDRHi12 killed renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 2 from %ir.mask.gep9) + ; CHECK: renamable $r5 = t2LDRHi12 killed renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s16) from %ir.mask.gep9) ; CHECK: renamable $r12 = t2SUBri killed renamable $r4, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: $vpr = VMSR_P0 $r5, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 16, 14 /* CC::al */, $noreg, $noreg - ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: renamable $q0 = MVE_VDUP32 killed renamable $r5, 0, $noreg, undef renamable $q0 ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r12 - ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: MVE_VPST 2, implicit $vpr ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr - ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) - ; CHECK: renamable $r3, renamable $q2 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 4) + ; CHECK: renamable $r3, renamable $q2 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv1, align 4) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 - ; CHECK: renamable $r12, renamable $q2 = MVE_VLDRWU32_pre killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.scevgep2, align 8) + ; CHECK: renamable $r12, renamable $q2 = MVE_VLDRWU32_pre killed renamable $r12, 16, 0, $noreg :: (load (s128) from %ir.scevgep2, align 8) ; CHECK: MVE_VPTv4u32 8, renamable $q0, killed renamable $q2, 2, implicit-def $vpr - ; CHECK: MVE_VSTRWU32 killed renamable $q1, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: MVE_VSTRWU32 killed renamable $q1, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) ; CHECK: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.bb27: @@ -187,13 +187,13 @@ body: | renamable $r4, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @mask, 14, $noreg renamable $r4 = t2BICri killed renamable $r4, 3, 14, $noreg, $noreg - renamable $r5 = t2LDRHi12 killed renamable $r12, 0, 14, $noreg :: (dereferenceable load 2 from %ir.mask.gep9) + renamable $r5 = t2LDRHi12 killed renamable $r12, 0, 14, $noreg :: (dereferenceable load (s16) from %ir.mask.gep9) renamable $r12 = t2SUBri killed renamable $r4, 4, 14, $noreg, $noreg renamable $r4, dead $cpsr = tMOVi8 1, 14, $noreg $vpr = VMSR_P0 $r5, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r12, 19, 14, $noreg, $noreg renamable $r12 = t2SUBri killed renamable $r3, 16, 14, $noreg, $noreg - VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) renamable $q0 = MVE_VDUP32 killed renamable $r5, 0, $noreg, undef renamable $q0 $r3 = tMOVr $r0, 14, $noreg $lr = t2DoLoopStart renamable $lr @@ -202,16 +202,16 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r12 - renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load (s32) from %stack.0) MVE_VPST 2, implicit $vpr renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr - renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) - renamable $r3, renamable $q2 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 4) + renamable $r3, renamable $q2 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv1, align 4) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 - renamable $r12, renamable $q2 = MVE_VLDRWU32_pre killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.scevgep2, align 8) + renamable $r12, renamable $q2 = MVE_VLDRWU32_pre killed renamable $r12, 16, 0, $noreg :: (load (s128) from %ir.scevgep2, align 8) MVE_VPTv4u32 8, renamable $q0, killed renamable $q2, 2, implicit-def $vpr - MVE_VSTRWU32 killed renamable $q1, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + MVE_VSTRWU32 killed renamable $q1, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $r3, 14, $noreg t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir index c4d864248bd99..737bc008e7621 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir @@ -102,7 +102,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: renamable $r3, dead $cpsr = tLSLri killed renamable $r2, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 ; CHECK: bb.1.do.body (align 4): ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) @@ -135,7 +135,7 @@ body: | renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg renamable $r2 = tLEApcrel %const.0, 14, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load (s128) from constant-pool) $lr = t2DoLoopStart renamable $lr bb.1.do.body (align 4): diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir index 05cb95ba7841e..b8301ab46d186 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir @@ -128,12 +128,12 @@ body: | ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: @@ -169,12 +169,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir index 8fee094ee4643..48e82fdec3786 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir @@ -399,9 +399,9 @@ body: | frame-setup CFI_INSTRUCTION offset $d8, -72 $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 112 - renamable $r4 = tLDRi renamable $r0, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.NumInputs) + renamable $r4 = tLDRi renamable $r0, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.NumInputs) $r5 = tMOVr killed $r1, 14 /* CC::al */, $noreg - renamable $r11 = t2LDRi12 renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.pDCTCoefs34) + renamable $r11 = t2LDRi12 renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.pDCTCoefs34) renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg $r1 = tMOVr $r4, 14 /* CC::al */, $noreg tCMPi8 renamable $r4, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr @@ -410,7 +410,7 @@ body: | renamable $r1, dead $cpsr = tSUBrr renamable $r4, killed renamable $r1, 14 /* CC::al */, $noreg renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 3, 14 /* CC::al */, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg - renamable $r3 = tLDRi killed renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.NumFilters) + renamable $r3 = tLDRi killed renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.NumFilters) $r0 = tMOVr $r4, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 $r1 = tMOVr $r5, 14 /* CC::al */, $noreg @@ -422,7 +422,7 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r0, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.pInT.033, align 4) + renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.pInT.033, align 4) renamable $q0 = MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, undef renamable $q0 renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 @@ -435,15 +435,15 @@ body: | renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg renamable $r0, dead $cpsr = tSUBi3 renamable $r3, 4, 14 /* CC::al */, $noreg - tSTRspi killed renamable $r3, $sp, 1, 14 /* CC::al */, $noreg :: (store 4 into %stack.8) + tSTRspi killed renamable $r3, $sp, 1, 14 /* CC::al */, $noreg :: (store (s32) into %stack.8) renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg - tSTRspi renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + tSTRspi renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 - renamable $s2 = VLDRS renamable $r11, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.2) + renamable $s2 = VLDRS renamable $r11, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.2) tCMPi8 killed renamable $r0, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr renamable $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg - VSTRS killed renamable $s0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pOut) + VSTRS killed renamable $s0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pOut) t2Bcc %bb.7, 3 /* CC::lo */, killed $cpsr bb.3.for.body.lr.ph: @@ -452,10 +452,10 @@ body: | renamable $r6 = t2ADDri renamable $r5, 16, 14 /* CC::al */, $noreg, $noreg renamable $r1, dead $cpsr = tSUBi3 renamable $r4, 4, 14 /* CC::al */, $noreg - tSTRspi killed renamable $r6, $sp, 4, 14 /* CC::al */, $noreg :: (store 4 into %stack.5) + tSTRspi killed renamable $r6, $sp, 4, 14 /* CC::al */, $noreg :: (store (s32) into %stack.5) renamable $r6, dead $cpsr = tLSLri renamable $r4, 4, 14 /* CC::al */, $noreg tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr - tSTRspi killed renamable $r6, $sp, 3, 14 /* CC::al */, $noreg :: (store 4 into %stack.6) + tSTRspi killed renamable $r6, $sp, 3, 14 /* CC::al */, $noreg :: (store (s32) into %stack.6) t2IT 10, 8, implicit-def $itstate renamable $r1 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r1, implicit killed $itstate renamable $r7 = t2ADDrs renamable $r4, renamable $r4, 10, 14 /* CC::al */, $noreg, $noreg @@ -468,11 +468,11 @@ body: | renamable $r1 = nuw nsw t2ADDrs renamable $r0, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg renamable $r6 = t2ADDri killed renamable $r12, 16, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2ADDri killed renamable $r3, 16, 14 /* CC::al */, $noreg, $noreg - tSTRspi killed renamable $r1, $sp, 2, 14 /* CC::al */, $noreg :: (store 4 into %stack.7) + tSTRspi killed renamable $r1, $sp, 2, 14 /* CC::al */, $noreg :: (store (s32) into %stack.7) renamable $r1 = t2ADDri killed renamable $lr, 16, 14 /* CC::al */, $noreg, $noreg renamable $r10 = t2ADDri killed renamable $r7, 16, 14 /* CC::al */, $noreg, $noreg - tSTRspi renamable $r4, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) - t2STRDi8 $r11, $r5, $sp, 20, 14 /* CC::al */, $noreg :: (store 4 into %stack.4), (store 4 into %stack.3) + tSTRspi renamable $r4, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) + t2STRDi8 $r11, $r5, $sp, 20, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4), (store (s32) into %stack.3) bb.4.for.body (align 4): successors: %bb.5(0x80000000) @@ -481,44 +481,44 @@ body: | renamable $r3 = t2MUL renamable $r0, renamable $r4, 14 /* CC::al */, $noreg renamable $r7, dead $cpsr = nuw nsw tADDi3 renamable $r0, 1, 14 /* CC::al */, $noreg renamable $r8 = nuw nsw t2ADDri renamable $r0, 2, 14 /* CC::al */, $noreg, $noreg - tSTRspi renamable $r7, $sp, 9, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + tSTRspi renamable $r7, $sp, 9, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) renamable $r9 = t2ADDri renamable $r0, 3, 14 /* CC::al */, $noreg, $noreg renamable $r7, dead $cpsr = tMUL renamable $r4, killed renamable $r7, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r5, 0, 0, $noreg :: (load 16 from %ir.39, align 4) + renamable $q0 = MVE_VLDRWU32 killed renamable $r5, 0, 0, $noreg :: (load (s128) from %ir.39, align 4) renamable $r3 = t2ADDrs renamable $r11, killed renamable $r3, 18, 14 /* CC::al */, $noreg, $noreg renamable $r5 = t2MUL renamable $r8, renamable $r4, 14 /* CC::al */, $noreg renamable $r4 = t2MUL renamable $r9, killed renamable $r4, 14 /* CC::al */, $noreg renamable $r7 = t2ADDrs renamable $r11, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg renamable $r5 = t2ADDrs renamable $r11, killed renamable $r5, 18, 14 /* CC::al */, $noreg, $noreg renamable $r4 = t2ADDrs killed renamable $r11, killed renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg - renamable $q1 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %ir.41, align 4) + renamable $q1 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %ir.41, align 4) renamable $q3 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q3 - renamable $q1 = MVE_VLDRWU32 killed renamable $r7, 0, 0, $noreg :: (load 16 from %ir.44, align 4) + renamable $q1 = MVE_VLDRWU32 killed renamable $r7, 0, 0, $noreg :: (load (s128) from %ir.44, align 4) renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q2 - renamable $q1 = MVE_VLDRWU32 killed renamable $r5, 0, 0, $noreg :: (load 16 from %ir.47, align 4) + renamable $q1 = MVE_VLDRWU32 killed renamable $r5, 0, 0, $noreg :: (load (s128) from %ir.47, align 4) renamable $q1 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 - renamable $q4 = MVE_VLDRWU32 killed renamable $r4, 0, 0, $noreg :: (load 16 from %ir.50, align 4) + renamable $q4 = MVE_VLDRWU32 killed renamable $r4, 0, 0, $noreg :: (load (s128) from %ir.50, align 4) renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q4, killed renamable $q0, 0, $noreg, undef renamable $q0 - renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.7) + renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %stack.7) $r3 = tMOVr $r10, 14 /* CC::al */, $noreg $r5 = tMOVr $r1, 14 /* CC::al */, $noreg $r4 = tMOVr $r12, 14 /* CC::al */, $noreg $lr = t2DoLoopStart renamable $lr $r7 = tMOVr $r6, 14 /* CC::al */, $noreg - renamable $r11 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load 4 from %stack.5) + renamable $r11 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load (s32) from %stack.5) bb.5.do.body24 (align 4): successors: %bb.5(0x7c000000), %bb.6(0x04000000) liveins: $lr, $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $r12 - renamable $r11, renamable $q4 = MVE_VLDRWU32_post killed renamable $r11, 16, 0, $noreg :: (load 16 from %ir.lsr.iv4, align 4) - renamable $r7, renamable $q5 = MVE_VLDRWU32_post killed renamable $r7, 16, 0, $noreg :: (load 16 from %ir.lsr.iv911, align 4) + renamable $r11, renamable $q4 = MVE_VLDRWU32_post killed renamable $r11, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv4, align 4) + renamable $r7, renamable $q5 = MVE_VLDRWU32_post killed renamable $r7, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv911, align 4) renamable $q3 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q3, renamable $q4, killed renamable $q5, 0, $noreg - renamable $r4, renamable $q5 = MVE_VLDRWU32_post killed renamable $r4, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1618, align 4) + renamable $r4, renamable $q5 = MVE_VLDRWU32_post killed renamable $r4, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv1618, align 4) renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q2, renamable $q4, killed renamable $q5, 0, $noreg - renamable $r5, renamable $q5 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load 16 from %ir.lsr.iv2325, align 4) + renamable $r5, renamable $q5 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv2325, align 4) renamable $q1 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q1, renamable $q4, killed renamable $q5, 0, $noreg - renamable $r3, renamable $q5 = MVE_VLDRWU32_post killed renamable $r3, 16, 0, $noreg :: (load 16 from %ir.lsr.iv3032, align 4) + renamable $r3, renamable $q5 = MVE_VLDRWU32_post killed renamable $r3, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv3032, align 4) renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q4, killed renamable $q5, 0, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr @@ -536,24 +536,24 @@ body: | renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2 renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg - renamable $r7 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + renamable $r7 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg renamable $r3 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg renamable $r7 = t2ADDrs renamable $r2, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1 renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 - VSTRS killed renamable $s12, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx37) - VSTRS killed renamable $s8, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx42) + VSTRS killed renamable $s12, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx37) + VSTRS killed renamable $s8, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx42) renamable $r3 = t2ADDrs renamable $r2, killed renamable $r8, 18, 14 /* CC::al */, $noreg, $noreg renamable $r7 = t2ADDrs renamable $r2, killed renamable $r9, 18, 14 /* CC::al */, $noreg, $noreg - VSTRS killed renamable $s4, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx47) - VSTRS killed renamable $s0, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx52) - $r11, $r5 = t2LDRDi8 $sp, 20, 14 /* CC::al */, $noreg :: (load 4 from %stack.4), (load 4 from %stack.3) + VSTRS killed renamable $s4, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx47) + VSTRS killed renamable $s0, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx52) + $r11, $r5 = t2LDRDi8 $sp, 20, 14 /* CC::al */, $noreg :: (load (s32) from %stack.4), (load (s32) from %stack.3) renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg - renamable $r7 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg :: (load 4 from %stack.6) - renamable $r3 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) - renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + renamable $r7 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg :: (load (s32) from %stack.6) + renamable $r3 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) + renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) renamable $r6 = tADDhirr killed renamable $r6, renamable $r7, 14 /* CC::al */, $noreg renamable $r12 = tADDhirr killed renamable $r12, renamable $r7, 14 /* CC::al */, $noreg renamable $r1 = tADDhirr killed renamable $r1, renamable $r7, 14 /* CC::al */, $noreg @@ -565,7 +565,7 @@ body: | successors: %bb.8(0x40000000), %bb.12(0x40000000) liveins: $r0, $r2, $r4, $r5, $r11 - renamable $r12 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.8) + renamable $r12 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.8) tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr tBcc %bb.12, 2 /* CC::hs */, killed $cpsr @@ -604,8 +604,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r6, 0, $noreg MVE_VPST 2, implicit $vpr - renamable $r7, renamable $q1 = MVE_VLDRWU32_post killed renamable $r7, 16, 1, renamable $vpr :: (load 16 from %ir.pInT.21, align 4) - renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.pCos0.12, align 4) + renamable $r7, renamable $q1 = MVE_VLDRWU32_post killed renamable $r7, 16, 1, renamable $vpr :: (load (s128) from %ir.pInT.21, align 4) + renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.pCos0.12, align 4) renamable $q0 = MVE_VFMAf32 killed renamable $q0, killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr renamable $r6, dead $cpsr = tSUBi8 killed renamable $r6, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 @@ -622,7 +622,7 @@ body: | renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr - VSTRS killed renamable $s0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.arrayidx70) + VSTRS killed renamable $s0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx70) tBcc %bb.9, 1 /* CC::ne */, killed $cpsr bb.12.for.end72: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir index 086119f320ad1..d4781d27306a5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir @@ -137,28 +137,28 @@ body: | ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg - ; CHECK: tSTRspi killed $r1, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) - ; CHECK: tSTRspi killed $r2, $sp, 6, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - ; CHECK: tSTRspi killed $r0, $sp, 5, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) - ; CHECK: tSTRspi killed $r3, $sp, 4, 14 /* CC::al */, $noreg :: (store 4 into %stack.3) + ; CHECK: tSTRspi killed $r1, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) + ; CHECK: tSTRspi killed $r2, $sp, 6, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) + ; CHECK: tSTRspi killed $r0, $sp, 5, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) + ; CHECK: tSTRspi killed $r3, $sp, 4, 14 /* CC::al */, $noreg :: (store (s32) into %stack.3) ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg ; CHECK: bb.1.for.body: ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK: $r0 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg :: (load 4 from %stack.4) - ; CHECK: renamable $r1, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep11) - ; CHECK: $r2 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %stack.5) - ; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: $r0 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg :: (load (s32) from %stack.4) + ; CHECK: renamable $r1, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep11) + ; CHECK: $r2 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load (s32) from %stack.5) + ; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) ; CHECK: renamable $r1, dead $cpsr = nsw tMUL killed renamable $r3, killed renamable $r1, 14 /* CC::al */, $noreg - ; CHECK: $r3 = tLDRspi $sp, 1, 14 /* CC::al */, $noreg :: (load 4 from %stack.6) - ; CHECK: early-clobber renamable $r3 = t2STR_PRE killed renamable $r1, killed renamable $r3, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep3) - ; CHECK: $r1 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.7) + ; CHECK: $r3 = tLDRspi $sp, 1, 14 /* CC::al */, $noreg :: (load (s32) from %stack.6) + ; CHECK: early-clobber renamable $r3 = t2STR_PRE killed renamable $r1, killed renamable $r3, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep3) + ; CHECK: $r1 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.7) ; CHECK: $lr = tMOVr killed $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg - ; CHECK: tSTRspi killed $r0, $sp, 7, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) - ; CHECK: tSTRspi killed $r2, $sp, 6, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - ; CHECK: tSTRspi killed $r3, $sp, 5, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) - ; CHECK: t2STRi12 killed $r12, $sp, 16, 14 /* CC::al */, $noreg :: (store 4 into %stack.3) + ; CHECK: tSTRspi killed $r0, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) + ; CHECK: tSTRspi killed $r2, $sp, 6, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) + ; CHECK: tSTRspi killed $r3, $sp, 5, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) + ; CHECK: t2STRi12 killed $r12, $sp, 16, 14 /* CC::al */, $noreg :: (store (s32) into %stack.3) ; CHECK: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr ; CHECK: tB %bb.2, 14 /* CC::al */, $noreg ; CHECK: bb.2.for.cond.cleanup: @@ -166,14 +166,14 @@ body: | ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; CHECK: bb.3.for.header: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: $r0 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %stack.3) - ; CHECK: $r1 = tLDRspi $sp, 5, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) - ; CHECK: $r2 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) - ; CHECK: $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) - ; CHECK: tSTRspi killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.7) - ; CHECK: tSTRspi killed $r1, $sp, 1, 14 /* CC::al */, $noreg :: (store 4 into %stack.6) - ; CHECK: tSTRspi killed $r2, $sp, 2, 14 /* CC::al */, $noreg :: (store 4 into %stack.5) - ; CHECK: tSTRspi killed $r3, $sp, 3, 14 /* CC::al */, $noreg :: (store 4 into %stack.4) + ; CHECK: $r0 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.3) + ; CHECK: $r1 = tLDRspi $sp, 5, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) + ; CHECK: $r2 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) + ; CHECK: $r3 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) + ; CHECK: tSTRspi killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.7) + ; CHECK: tSTRspi killed $r1, $sp, 1, 14 /* CC::al */, $noreg :: (store (s32) into %stack.6) + ; CHECK: tSTRspi killed $r2, $sp, 2, 14 /* CC::al */, $noreg :: (store (s32) into %stack.5) + ; CHECK: tSTRspi killed $r3, $sp, 3, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4) ; CHECK: tB %bb.1, 14 /* CC::al */, $noreg bb.0.entry: successors: %bb.3(0x80000000) @@ -189,30 +189,30 @@ body: | renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg - tSTRspi killed $r1, $sp, 7, 14, $noreg :: (store 4 into %stack.0) - tSTRspi killed $r2, $sp, 6, 14, $noreg :: (store 4 into %stack.1) - tSTRspi killed $r0, $sp, 5, 14, $noreg :: (store 4 into %stack.2) - tSTRspi killed $r3, $sp, 4, 14, $noreg :: (store 4 into %stack.3) + tSTRspi killed $r1, $sp, 7, 14, $noreg :: (store (s32) into %stack.0) + tSTRspi killed $r2, $sp, 6, 14, $noreg :: (store (s32) into %stack.1) + tSTRspi killed $r0, $sp, 5, 14, $noreg :: (store (s32) into %stack.2) + tSTRspi killed $r3, $sp, 4, 14, $noreg :: (store (s32) into %stack.3) tB %bb.3, 14, $noreg bb.1.for.body: successors: %bb.3(0x40000000), %bb.2(0x40000000) - $r0 = tLDRspi $sp, 3, 14, $noreg :: (load 4 from %stack.4) - renamable $r1, renamable $r0 = t2LDR_PRE renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep11) - $r2 = tLDRspi $sp, 2, 14, $noreg :: (load 4 from %stack.5) - renamable $r3, renamable $r2 = t2LDR_PRE renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + $r0 = tLDRspi $sp, 3, 14, $noreg :: (load (s32) from %stack.4) + renamable $r1, renamable $r0 = t2LDR_PRE renamable $r0, 4, 14, $noreg :: (load (s32) from %ir.scevgep11) + $r2 = tLDRspi $sp, 2, 14, $noreg :: (load (s32) from %stack.5) + renamable $r3, renamable $r2 = t2LDR_PRE renamable $r2, 4, 14, $noreg :: (load (s32) from %ir.scevgep7) renamable $r1, dead $cpsr = nsw tMUL killed renamable $r3, killed renamable $r1, 14, $noreg - $r3 = tLDRspi $sp, 1, 14, $noreg :: (load 4 from %stack.6) - early-clobber renamable $r3 = t2STR_PRE killed renamable $r1, renamable $r3, 4, 14, $noreg :: (store 4 into %ir.scevgep3) - $r1 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.7) + $r3 = tLDRspi $sp, 1, 14, $noreg :: (load (s32) from %stack.6) + early-clobber renamable $r3 = t2STR_PRE killed renamable $r1, renamable $r3, 4, 14, $noreg :: (store (s32) into %ir.scevgep3) + $r1 = tLDRspi $sp, 0, 14, $noreg :: (load (s32) from %stack.7) $lr = tMOVr killed $r1, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 $r12 = tMOVr $lr, 14, $noreg - tSTRspi killed $r0, $sp, 7, 14, $noreg :: (store 4 into %stack.0) - tSTRspi killed $r2, $sp, 6, 14, $noreg :: (store 4 into %stack.1) - tSTRspi killed $r3, $sp, 5, 14, $noreg :: (store 4 into %stack.2) - t2STRi12 killed $r12, $sp, 16, 14, $noreg :: (store 4 into %stack.3) + tSTRspi killed $r0, $sp, 7, 14, $noreg :: (store (s32) into %stack.0) + tSTRspi killed $r2, $sp, 6, 14, $noreg :: (store (s32) into %stack.1) + tSTRspi killed $r3, $sp, 5, 14, $noreg :: (store (s32) into %stack.2) + t2STRi12 killed $r12, $sp, 16, 14, $noreg :: (store (s32) into %stack.3) t2LoopEnd killed renamable $lr, %bb.3, implicit-def dead $cpsr tB %bb.2, 14, $noreg @@ -223,14 +223,14 @@ body: | bb.3.for.header: successors: %bb.1(0x80000000) - $r0 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %stack.3) - $r1 = tLDRspi $sp, 5, 14, $noreg :: (load 4 from %stack.2) - $r2 = tLDRspi $sp, 6, 14, $noreg :: (load 4 from %stack.1) - $r3 = tLDRspi $sp, 7, 14, $noreg :: (load 4 from %stack.0) - tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.7) - tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.6) - tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store 4 into %stack.5) - tSTRspi killed $r3, $sp, 3, 14, $noreg :: (store 4 into %stack.4) + $r0 = tLDRspi $sp, 4, 14, $noreg :: (load (s32) from %stack.3) + $r1 = tLDRspi $sp, 5, 14, $noreg :: (load (s32) from %stack.2) + $r2 = tLDRspi $sp, 6, 14, $noreg :: (load (s32) from %stack.1) + $r3 = tLDRspi $sp, 7, 14, $noreg :: (load (s32) from %stack.0) + tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store (s32) into %stack.7) + tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store (s32) into %stack.6) + tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store (s32) into %stack.5) + tSTRspi killed $r3, $sp, 3, 14, $noreg :: (store (s32) into %stack.4) tB %bb.1, 14, $noreg ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir index 29055d3489d18..5a328b961f9b6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir @@ -123,8 +123,8 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, killed $noreg :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -164,8 +164,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) $lr = tMOVr $r3, 14, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir index e930de51dc24c..d3547f6f84875 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir @@ -118,12 +118,12 @@ body: | ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRHU16_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRHU16_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 7, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nsw MVE_VADDi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRHU16_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + ; CHECK: renamable $r0 = MVE_VSTRHU16_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -156,12 +156,12 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r2, renamable $q1 = MVE_VLDRHU16_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r2, renamable $q1 = MVE_VLDRHU16_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 7, 14, $noreg renamable $q0 = nsw MVE_VADDi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRHU16_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + renamable $r0 = MVE_VSTRHU16_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir index e2002ddee0c92..d011ca8dc9764 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir @@ -126,12 +126,12 @@ body: | ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 5, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -164,12 +164,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 5, 14, $noreg renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir index 2d4a40e53b965..8bbe08efdbeea 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir @@ -119,12 +119,12 @@ body: | ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 15, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nsw MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + ; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -157,12 +157,12 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 15, 14, $noreg renamable $q0 = nsw MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir index 0fd351bca1053..e5dd18ace1c0f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir @@ -138,7 +138,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -12 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 20, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.1) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 20, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.1) ; CHECK: t2CMPri renamable $r12, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.3, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.1.vector.ph: @@ -147,7 +147,7 @@ body: | ; CHECK: renamable $lr = t2ADDri renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = t2BICri killed renamable $lr, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg @@ -157,12 +157,12 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) + ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -171,7 +171,7 @@ body: | ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.cast.e, align 4) + ; CHECK: renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.cast.e, align 4) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r7, def $pc @@ -185,7 +185,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r7, -8 frame-setup CFI_INSTRUCTION offset $r5, -12 frame-setup CFI_INSTRUCTION offset $r4, -16 - renamable $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load 4 from %fixed-stack.0) + renamable $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load (s32) from %fixed-stack.0) t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr tBcc %bb.3, 0, killed $cpsr @@ -196,7 +196,7 @@ body: | renamable $lr = t2ADDri renamable $r12, 3, 14, $noreg, $noreg renamable $r4, dead $cpsr = tMOVi8 1, 14, $noreg renamable $lr = t2BICri killed renamable $lr, 3, 14, $noreg, $noreg - renamable $r5 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %fixed-stack.1, align 8) + renamable $r5 = tLDRspi $sp, 4, 14, $noreg :: (load (s32) from %fixed-stack.1, align 8) renamable $lr = t2SUBri killed renamable $lr, 4, 14, $noreg, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14, $noreg, $noreg @@ -209,12 +209,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) + renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 $lr = tMOVr $r4, 14, $noreg renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -223,7 +223,7 @@ body: | renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.cast.e, align 4) + renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.cast.e, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir index 3b17afaeba758..d4694eefb3323 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir @@ -138,7 +138,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -12 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 20, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.1) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 20, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.1) ; CHECK: t2CMPri renamable $r12, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.3, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.1.vector.ph: @@ -147,7 +147,7 @@ body: | ; CHECK: renamable $lr = t2ADDri renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = t2BICri killed renamable $lr, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg @@ -157,12 +157,12 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) + ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -171,7 +171,7 @@ body: | ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 4, implicit $vpr ; CHECK: renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, killed renamable $vpr - ; CHECK: renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.cast.e, align 4) + ; CHECK: renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.cast.e, align 4) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r7, def $pc @@ -185,7 +185,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r7, -8 frame-setup CFI_INSTRUCTION offset $r5, -12 frame-setup CFI_INSTRUCTION offset $r4, -16 - renamable $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load 4 from %fixed-stack.0) + renamable $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load (s32) from %fixed-stack.0) t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr tBcc %bb.3, 0, killed $cpsr @@ -196,7 +196,7 @@ body: | renamable $lr = t2ADDri renamable $r12, 3, 14, $noreg, $noreg renamable $r4, dead $cpsr = tMOVi8 1, 14, $noreg renamable $lr = t2BICri killed renamable $lr, 3, 14, $noreg, $noreg - renamable $r5 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %fixed-stack.1, align 8) + renamable $r5 = tLDRspi $sp, 4, 14, $noreg :: (load (s32) from %fixed-stack.1, align 8) renamable $lr = t2SUBri killed renamable $lr, 4, 14, $noreg, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14, $noreg, $noreg @@ -209,12 +209,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) + renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 $lr = tMOVr $r4, 14, $noreg renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -223,7 +223,7 @@ body: | renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 4, implicit $vpr renamable $vpr = MVE_VPNOT renamable $vpr, 0, killed renamable $vpr - renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.cast.e, align 4) + renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.cast.e, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir index b57fad5480fac..102f5b9d6ce00 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir @@ -138,7 +138,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -12 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 20, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.1) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 20, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.1) ; CHECK: t2CMPri renamable $r12, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.3, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.1.vector.ph: @@ -147,7 +147,7 @@ body: | ; CHECK: renamable $lr = t2ADDri renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = t2BICri killed renamable $lr, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg @@ -157,12 +157,12 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) + ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg @@ -170,7 +170,7 @@ body: | ; CHECK: renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, renamable $vpr, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, renamable $vpr, undef renamable $q0 - ; CHECK: renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, renamable $vpr :: (store 16 into %ir.lsr.cast.e, align 4) + ; CHECK: renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, renamable $vpr :: (store (s128) into %ir.lsr.cast.e, align 4) ; CHECK: dead renamable $vpr = MVE_VPNOT killed renamable $vpr, 0, $noreg ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: @@ -185,7 +185,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r7, -8 frame-setup CFI_INSTRUCTION offset $r5, -12 frame-setup CFI_INSTRUCTION offset $r4, -16 - renamable $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load 4 from %fixed-stack.0) + renamable $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load (s32) from %fixed-stack.0) t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr tBcc %bb.3, 0, killed $cpsr @@ -196,7 +196,7 @@ body: | renamable $lr = t2ADDri renamable $r12, 3, 14, $noreg, $noreg renamable $r4, dead $cpsr = tMOVi8 1, 14, $noreg renamable $lr = t2BICri killed renamable $lr, 3, 14, $noreg, $noreg - renamable $r5 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %fixed-stack.1, align 8) + renamable $r5 = tLDRspi $sp, 4, 14, $noreg :: (load (s32) from %fixed-stack.1, align 8) renamable $lr = t2SUBri killed renamable $lr, 4, 14, $noreg, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 renamable $lr = nuw nsw t2ADDrs killed renamable $r4, killed renamable $lr, 19, 14, $noreg, $noreg @@ -209,12 +209,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) + renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) $lr = tMOVr $r4, 14, $noreg renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg @@ -222,7 +222,7 @@ body: | renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, renamable $vpr, undef renamable $q1 renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, renamable $vpr, undef renamable $q0 - renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, renamable $vpr :: (store 16 into %ir.lsr.cast.e, align 4) + renamable $r5 = MVE_VSTRWU32_post renamable $q0, killed renamable $r5, 16, 1, renamable $vpr :: (store (s128) into %ir.lsr.cast.e, align 4) renamable $vpr = MVE_VPNOT renamable $vpr, 0, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir index 7b9caf8bc1989..6df65dab7bd45 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir @@ -136,7 +136,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -12 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: t2CMPri renamable $r12, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.4, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.1.vector.ph: @@ -155,12 +155,12 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3, $r4, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) + ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -186,7 +186,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r7, -8 frame-setup CFI_INSTRUCTION offset $r5, -12 frame-setup CFI_INSTRUCTION offset $r4, -16 - renamable $r12 = t2LDRi12 $sp, 16, 14, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r12 = t2LDRi12 $sp, 16, 14, $noreg :: (load (s32) from %fixed-stack.0, align 8) t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr tBcc %bb.4, 0, killed $cpsr @@ -209,12 +209,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) + renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 $lr = tMOVr $r4, 14, $noreg renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir index 2f9216eef6f5a..84bc9f184a81e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir @@ -138,7 +138,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -12 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: t2CMPri renamable $r12, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.4, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.1.vector.ph: @@ -157,10 +157,10 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r3, $r4, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) - ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) - ; CHECK: renamable $r0, renamable $q4 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + ; CHECK: renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) + ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q4 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q4, 0, $noreg, undef renamable $q2 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg @@ -187,7 +187,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r7, -8 frame-setup CFI_INSTRUCTION offset $r5, -12 frame-setup CFI_INSTRUCTION offset $r4, -16 - renamable $r12 = t2LDRi12 $sp, 16, 14, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r12 = t2LDRi12 $sp, 16, 14, $noreg :: (load (s32) from %fixed-stack.0, align 8) t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr tBcc %bb.4, 0, killed $cpsr @@ -210,10 +210,10 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 2, implicit $vpr - renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17.d, align 2) - renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820.c, align 2) - renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) - renamable $r0, renamable $q4 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r3, renamable $q1 = MVE_VLDRHS32_post killed renamable $r3, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17.d, align 2) + renamable $r2, renamable $q2 = MVE_VLDRHS32_post killed renamable $r2, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820.c, align 2) + renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q4 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q4, 0, $noreg, undef renamable $q2 $lr = tMOVr $r4, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir index 76a3752a34a20..502f9ed24e9a8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir @@ -162,7 +162,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: tCBZ $r2, %bb.3 ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) @@ -172,10 +172,10 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1 - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: renamable $q2 = nsw MVE_VMULi32 renamable $q0, killed renamable $q2, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 - ; CHECK: renamable $r1 = MVE_VSTRWU32_post renamable $q1, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.lsr.store, align 4) + ; CHECK: renamable $r1 = MVE_VSTRWU32_post renamable $q1, killed renamable $r1, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.store, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: liveins: $q0 @@ -191,7 +191,7 @@ body: | frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r7, -8 renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) tCBZ $r2, %bb.3 bb.1.vector.ph: @@ -213,7 +213,7 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) $lr = tMOVr $r3, 14 /* CC::al */, $noreg renamable $q2 = nsw MVE_VMULi32 renamable $q0, killed renamable $q2, 0, $noreg, undef renamable $q2 renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg @@ -221,7 +221,7 @@ body: | renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 MVE_VPST 8, implicit $vpr - renamable $r1 = MVE_VSTRWU32_post renamable $q1, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.store, align 4) + renamable $r1 = MVE_VSTRWU32_post renamable $q1, killed renamable $r1, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.store, align 4) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -278,7 +278,7 @@ body: | ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r3 ; CHECK: $r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg ; CHECK: bb.2.vector.body: @@ -289,7 +289,7 @@ body: | ; CHECK: renamable $r1, dead $cpsr = nsw tSUBi8 killed $r1, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: renamable $r12 = MVE_VMLADAVu32 renamable $q0, killed renamable $q1, 0, $noreg ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: @@ -320,7 +320,7 @@ body: | renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r3 $r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg @@ -333,7 +333,7 @@ body: | renamable $r1, dead $cpsr = nsw tSUBi8 killed $r1, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) renamable $r12 = MVE_VMLADAVu32 renamable $q0, killed renamable $q1, 0, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr @@ -396,7 +396,7 @@ body: | ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r3 ; CHECK: $r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg ; CHECK: bb.2.vector.body: @@ -404,7 +404,7 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: $lr = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VADDi32 renamable $q0, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r1, dead $cpsr = nsw tSUBi8 killed $r1, 1, 14 /* CC::al */, $noreg @@ -439,7 +439,7 @@ body: | renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r3 $r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg @@ -449,7 +449,7 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) $lr = tMOVr $r1, 14 /* CC::al */, $noreg renamable $q1 = nsw MVE_VADDi32 renamable $q0, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r1, dead $cpsr = nsw tSUBi8 killed $r1, 1, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir index deec9a66a1afa..b7a48480ac312 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir @@ -142,7 +142,7 @@ body: | ; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.iter.addr) ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.1.do.body: @@ -150,9 +150,9 @@ body: | ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load (s128) from %ir.pSrc.addr.02, align 4) ; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4) + ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store (s128) into %ir.pDst.addr.01, align 4) ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 ; CHECK: bb.2.do.end: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -173,7 +173,7 @@ body: | renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg - t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.iter.addr) $lr = t2DoLoopStart renamable $lr $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg @@ -186,11 +186,11 @@ body: | renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4) + renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.pSrc.addr.02, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VMULf32 killed renamable $q0, renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4) + renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store (s128) into %ir.pDst.addr.01, align 4) t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr tB %bb.2, 14 /* CC::al */, $noreg @@ -243,7 +243,7 @@ body: | ; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.iter.addr) ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: bb.1.do.body: @@ -251,9 +251,9 @@ body: | ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load (s128) from %ir.pSrc.addr.02, align 4) ; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4) + ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store (s128) into %ir.pDst.addr.01, align 4) ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1 ; CHECK: bb.2.do.end: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -274,7 +274,7 @@ body: | renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg - t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr) + t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.iter.addr) $lr = t2DoLoopStart renamable $lr $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg @@ -287,11 +287,11 @@ body: | renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4) + renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.pSrc.addr.02, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VMULf32 killed renamable $q0, renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4) + renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store (s128) into %ir.pDst.addr.01, align 4) t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr tB %bb.2, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir index a3f176e5af52e..ed0685befdabc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir @@ -113,7 +113,7 @@ body: | ; CHECK: dead renamable $r12 = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 ; CHECK: bb.1.do.body (align 4): ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) @@ -148,7 +148,7 @@ body: | renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg renamable $r2 = tLEApcrel %const.0, 14, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load (s128) from constant-pool) $lr = t2DoLoopStart renamable $lr bb.1.do.body (align 4): diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir index dc2dc8a80b744..94019b3464473 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir @@ -105,7 +105,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: renamable $r3, dead $cpsr = tLSLri killed renamable $r2, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 ; CHECK: bb.1.do.body (align 4): ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) @@ -139,7 +139,7 @@ body: | renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg renamable $r2 = tLEApcrel %const.0, 14, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load (s128) from constant-pool) $lr = t2DoLoopStart renamable $lr bb.1.do.body (align 4): diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir index 285b2bad8c061..9c7b7c8d36a06 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir @@ -114,7 +114,7 @@ body: | ; CHECK: dead renamable $r12 = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: dead renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 ; CHECK: bb.1.do.body (align 4): ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) @@ -148,7 +148,7 @@ body: | renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg renamable $r2 = tLEApcrel %const.0, 14, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load (s128) from constant-pool) $lr = t2DoLoopStart renamable $lr bb.1.do.body (align 4): diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir index f4b64f4f56ec4..d746b84956583 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir @@ -121,7 +121,7 @@ body: | ; CHECK: dead $lr = t2DLS renamable $r3 ; CHECK: $r4 = tMOVr killed $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: bb.2.vector.body: @@ -134,8 +134,8 @@ body: | ; CHECK: MVE_VPST 1, implicit $vpr ; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q0, renamable $q2, 2, 1, killed renamable $vpr ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr - ; CHECK: renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) + ; CHECK: renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv35, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv12, align 4) ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q3, 0, $noreg, undef renamable $q0 ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: @@ -171,7 +171,7 @@ body: | $lr = t2DoLoopStart renamable $r3 $r4 = tMOVr killed $r3, 14 /* CC::al */, $noreg renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 @@ -186,8 +186,8 @@ body: | MVE_VPST 1, implicit $vpr renamable $vpr = MVE_VCMPu32 renamable $q0, renamable $q2, 2, 1, killed renamable $vpr renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr - renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) - renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) + renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv35, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv12, align 4) renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q3, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir index 0c1f7e41f1b4c..d01777c4441d4 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir @@ -118,7 +118,7 @@ body: | ; CHECK: dead $lr = t2DLS renamable $r3 ; CHECK: $r4 = tMOVr killed $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: bb.2.vector.body: @@ -131,8 +131,8 @@ body: | ; CHECK: MVE_VPST 1, implicit $vpr ; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q1, renamable $q0, 8, 1, killed renamable $vpr ; CHECK: renamable $vpr = MVE_VCMPu32 renamable $q0, renamable $q2, 2, 1, killed renamable $vpr - ; CHECK: renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) + ; CHECK: renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv35, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv12, align 4) ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q3, 0, $noreg, undef renamable $q0 ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: @@ -168,7 +168,7 @@ body: | $lr = t2DoLoopStart renamable $r3 $r4 = tMOVr killed $r3, 14 /* CC::al */, $noreg renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 @@ -183,8 +183,8 @@ body: | MVE_VPST 1, implicit $vpr renamable $vpr = MVE_VCMPu32 renamable $q1, renamable $q0, 8, 1, killed renamable $vpr renamable $vpr = MVE_VCMPu32 renamable $q0, renamable $q2, 2, 1, killed renamable $vpr - renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) - renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) + renamable $r1, renamable $q4 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv35, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q4, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv12, align 4) renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q3, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir index 084a0c925f97f..f8622b1ac8627 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir @@ -103,7 +103,7 @@ body: | ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $q2 = MVE_VMOVimmi32 4, 0, $noreg, undef renamable $q2 ; CHECK: renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 @@ -111,8 +111,8 @@ body: | ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $q2, $r0, $r1 ; CHECK: MVE_VPTv4u32 4, renamable $q1, renamable $q0, 8, implicit-def $vpr - ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q3, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) + ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv35, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q3, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv12, align 4) ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: @@ -144,7 +144,7 @@ body: | renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) renamable $r3, dead $cpsr = tLSRri renamable $r2, 1, 14 /* CC::al */, $noreg renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 $lr = t2DoLoopStart renamable $lr @@ -157,8 +157,8 @@ body: | renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg MVE_VPST 2, implicit $vpr renamable $vpr = MVE_VCMPu32 renamable $q1, renamable $q0, 8, 1, killed renamable $vpr - renamable $r1, renamable $q3 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv35, align 4) - renamable $r0 = MVE_VSTRWU32_post killed renamable $q3, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv12, align 4) + renamable $r1, renamable $q3 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv35, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q3, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv12, align 4) renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir index d86195c98b116..f4633a3ee9cc6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir @@ -93,7 +93,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 ; CHECK: renamable $r12 = t2ADDri $sp, 8, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: tCBZ $r3, %bb.3 ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) @@ -103,11 +103,11 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2 - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, $noreg :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 - ; CHECK: renamable $r2 = MVE_VSTRWU32_post renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store 16 into %ir.lsr.store, align 4) + ; CHECK: renamable $r2 = MVE_VSTRWU32_post renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.store, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: liveins: $q0 @@ -123,7 +123,7 @@ body: | frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r4, -8 renamable $r12 = t2ADDri $sp, 8, 14, $noreg, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) tCBZ $r3, %bb.3 bb.1.vector.ph: @@ -145,8 +145,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q2 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) $lr = tMOVr $r12, 14, $noreg renamable $q2 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 renamable $r12 = nsw t2SUBri killed $r12, 1, 14, $noreg, $noreg @@ -154,7 +154,7 @@ body: | renamable $q1 = MVE_VADDi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 MVE_VPST 8, implicit $vpr - renamable $r2 = MVE_VSTRWU32_post renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.store, align 4) + renamable $r2 = MVE_VSTRWU32_post renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.store, align 4) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir index bcb901188c8bd..34c8a251e98d2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir @@ -245,7 +245,7 @@ body: | ; CHECK: renamable $r4, dead $cpsr = tSUBi3 renamable $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r7 = t2ANDri renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: tCMPi8 killed renamable $r4, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: tSTRspi killed renamable $r7, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: tSTRspi killed renamable $r7, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: tBcc %bb.3, 2 /* CC::hs */, killed $cpsr ; CHECK: bb.2: ; CHECK: successors: %bb.5(0x80000000) @@ -266,67 +266,67 @@ body: | ; CHECK: bb.4.bb28: ; CHECK: successors: %bb.4(0x7c000000), %bb.5(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r8, $r12 - ; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep617) + ; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep617) ; CHECK: renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep418) + ; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep418) ; CHECK: $lr = tMOVr killed $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep219) + ; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep219) ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r12 = tMOVr $lr, 14 /* CC::al */, $noreg ; CHECK: renamable $r5 = nsw tADDhirr killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: tSTRr killed renamable $r5, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep219) + ; CHECK: tSTRr killed renamable $r5, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep219) ; CHECK: renamable $r5, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r4 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep11) - ; CHECK: renamable $r6 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep14) + ; CHECK: renamable $r4 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep11) + ; CHECK: renamable $r6 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep14) ; CHECK: renamable $r9 = t2EORrr killed renamable $r4, killed renamable $r6, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r6, dead $cpsr = tADDrr renamable $r0, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: $r11 = t2ADDri $r6, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg - ; CHECK: t2LDMIA killed $r11, 14 /* CC::al */, $noreg, def $r4, def $r10, def $r11 :: (load 4 from %ir.scevgep9), (load 4 from %ir.scevgep8), (load 4 from %ir.scevgep1) + ; CHECK: t2LDMIA killed $r11, 14 /* CC::al */, $noreg, def $r4, def $r10, def $r11 :: (load (s32) from %ir.scevgep9), (load (s32) from %ir.scevgep8), (load (s32) from %ir.scevgep1) ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r9, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, renamable $r6, 1, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep9) - ; CHECK: renamable $r9 = t2LDRi12 renamable $r5, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep12) - ; CHECK: renamable $r4 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep10) + ; CHECK: tSTRi killed renamable $r4, renamable $r6, 1, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep9) + ; CHECK: renamable $r9 = t2LDRi12 renamable $r5, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep12) + ; CHECK: renamable $r4 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep10) ; CHECK: renamable $r4 = t2EORrr killed renamable $r4, killed renamable $r9, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r10, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, renamable $r6, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep8) - ; CHECK: renamable $r4 = tLDRi killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep5) - ; CHECK: renamable $r5 = tLDRi killed renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3) + ; CHECK: tSTRi killed renamable $r4, renamable $r6, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep8) + ; CHECK: renamable $r4 = tLDRi killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep5) + ; CHECK: renamable $r5 = tLDRi killed renamable $r7, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) ; CHECK: renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep1) + ; CHECK: tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep1) ; CHECK: t2CMPri killed renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.4, 1 /* CC::ne */, killed $cpsr ; CHECK: tB %bb.5, 14 /* CC::al */, $noreg ; CHECK: bb.5.bb13: ; CHECK: successors: %bb.8(0x30000000), %bb.6(0x50000000) ; CHECK: liveins: $r0, $r1, $r2, $r8 - ; CHECK: renamable $r5 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $r5 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: tCBZ $r5, %bb.8 ; CHECK: bb.6.bb16: ; CHECK: successors: %bb.8(0x40000000), %bb.7(0x40000000) ; CHECK: liveins: $r0, $r1, $r2, $r5, $r8 - ; CHECK: renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp17) + ; CHECK: renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp17) ; CHECK: tCMPi8 renamable $r5, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp19) + ; CHECK: renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp19) ; CHECK: renamable $lr = t2EORrr killed renamable $lr, killed renamable $r3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp22) + ; CHECK: renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp22) ; CHECK: renamable $r3 = nsw tADDhirr killed renamable $r3, killed renamable $lr, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp22) + ; CHECK: t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp22) ; CHECK: tBcc %bb.8, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.7.bb57: ; CHECK: successors: %bb.8(0x40000000), %bb.9(0x40000000) ; CHECK: liveins: $r0, $r1, $r2, $r5, $r8 ; CHECK: renamable $r3 = nuw t2ADDri renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: tCMPi8 killed renamable $r5, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp58) - ; CHECK: renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp60) + ; CHECK: renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp58) + ; CHECK: renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp60) ; CHECK: renamable $r7 = t2EORrr killed renamable $r7, killed renamable $r6, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp63) + ; CHECK: renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp63) ; CHECK: renamable $r7 = nsw tADDhirr killed renamable $r7, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp63) + ; CHECK: t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp63) ; CHECK: tBcc %bb.9, 1 /* CC::ne */, killed $cpsr ; CHECK: bb.8.bb27: ; CHECK: $sp = tADDspi $sp, 1, 14 /* CC::al */, $noreg @@ -334,12 +334,12 @@ body: | ; CHECK: bb.9.bb68: ; CHECK: liveins: $r0, $r1, $r2, $r8 ; CHECK: renamable $r3 = nuw t2ADDri killed renamable $r8, 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp69) - ; CHECK: renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp71) + ; CHECK: renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp69) + ; CHECK: renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp71) ; CHECK: renamable $r1, dead $cpsr = tEOR killed renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp74) + ; CHECK: renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp74) ; CHECK: renamable $r1 = nsw tADDhirr killed renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp74) + ; CHECK: t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp74) ; CHECK: $sp = tADDspi $sp, 1, 14 /* CC::al */, $noreg ; CHECK: $sp = t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc bb.0.bb: @@ -369,7 +369,7 @@ body: | renamable $r4, dead $cpsr = tSUBi3 renamable $r3, 1, 14, $noreg renamable $r7 = t2ANDri renamable $r3, 3, 14, $noreg, $noreg tCMPi8 killed renamable $r4, 3, 14, $noreg, implicit-def $cpsr - tSTRspi killed renamable $r7, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + tSTRspi killed renamable $r7, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) tBcc %bb.3, 2, killed $cpsr bb.2: @@ -396,37 +396,37 @@ body: | successors: %bb.4(0x7c000000), %bb.5(0x04000000) liveins: $r0, $r1, $r2, $r3, $r8, $r12 - renamable $r5 = tLDRr renamable $r1, $r3, 14, $noreg :: (load 4 from %ir.scevgep617) + renamable $r5 = tLDRr renamable $r1, $r3, 14, $noreg :: (load (s32) from %ir.scevgep617) renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14, $noreg - renamable $r6 = tLDRr renamable $r2, $r3, 14, $noreg :: (load 4 from %ir.scevgep418) + renamable $r6 = tLDRr renamable $r2, $r3, 14, $noreg :: (load (s32) from %ir.scevgep418) $lr = tMOVr $r12, 14, $noreg renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14, $noreg, $noreg renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14, $noreg - renamable $r6 = tLDRr renamable $r0, $r3, 14, $noreg :: (load 4 from %ir.scevgep219) + renamable $r6 = tLDRr renamable $r0, $r3, 14, $noreg :: (load (s32) from %ir.scevgep219) renamable $lr = t2LoopDec killed renamable $lr, 1 $r12 = tMOVr $lr, 14, $noreg renamable $r5 = nsw tADDhirr killed renamable $r5, killed renamable $r6, 14, $noreg - tSTRr killed renamable $r5, renamable $r0, $r3, 14, $noreg :: (store 4 into %ir.scevgep219) + tSTRr killed renamable $r5, renamable $r0, $r3, 14, $noreg :: (store (s32) into %ir.scevgep219) renamable $r5, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg - renamable $r4 = tLDRi renamable $r7, 1, 14, $noreg :: (load 4 from %ir.scevgep11) - renamable $r6 = tLDRi renamable $r5, 1, 14, $noreg :: (load 4 from %ir.scevgep14) + renamable $r4 = tLDRi renamable $r7, 1, 14, $noreg :: (load (s32) from %ir.scevgep11) + renamable $r6 = tLDRi renamable $r5, 1, 14, $noreg :: (load (s32) from %ir.scevgep14) renamable $r9 = t2EORrr killed renamable $r4, killed renamable $r6, 14, $noreg, $noreg renamable $r6, dead $cpsr = tADDrr renamable $r0, renamable $r3, 14, $noreg $r11 = t2ADDri $r6, 4, 14, $noreg, $noreg renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 16, 14, $noreg - t2LDMIA killed $r11, 14, $noreg, def $r4, def $r10, def $r11 :: (load 4 from %ir.scevgep9), (load 4 from %ir.scevgep8), (load 4 from %ir.scevgep1) + t2LDMIA killed $r11, 14, $noreg, def $r4, def $r10, def $r11 :: (load (s32) from %ir.scevgep9), (load (s32) from %ir.scevgep8), (load (s32) from %ir.scevgep1) renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r9, 14, $noreg - tSTRi killed renamable $r4, renamable $r6, 1, 14, $noreg :: (store 4 into %ir.scevgep9) - renamable $r9 = t2LDRi12 renamable $r5, 8, 14, $noreg :: (load 4 from %ir.scevgep12) - renamable $r4 = tLDRi renamable $r7, 2, 14, $noreg :: (load 4 from %ir.scevgep10) + tSTRi killed renamable $r4, renamable $r6, 1, 14, $noreg :: (store (s32) into %ir.scevgep9) + renamable $r9 = t2LDRi12 renamable $r5, 8, 14, $noreg :: (load (s32) from %ir.scevgep12) + renamable $r4 = tLDRi renamable $r7, 2, 14, $noreg :: (load (s32) from %ir.scevgep10) renamable $r4 = t2EORrr killed renamable $r4, killed renamable $r9, 14, $noreg, $noreg renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r10, 14, $noreg - tSTRi killed renamable $r4, renamable $r6, 2, 14, $noreg :: (store 4 into %ir.scevgep8) - renamable $r4 = tLDRi killed renamable $r5, 3, 14, $noreg :: (load 4 from %ir.scevgep5) - renamable $r5 = tLDRi killed renamable $r7, 3, 14, $noreg :: (load 4 from %ir.scevgep3) + tSTRi killed renamable $r4, renamable $r6, 2, 14, $noreg :: (store (s32) into %ir.scevgep8) + renamable $r4 = tLDRi killed renamable $r5, 3, 14, $noreg :: (load (s32) from %ir.scevgep5) + renamable $r5 = tLDRi killed renamable $r7, 3, 14, $noreg :: (load (s32) from %ir.scevgep3) renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14, $noreg renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14, $noreg - tSTRi killed renamable $r4, killed renamable $r6, 3, 14, $noreg :: (store 4 into %ir.scevgep1) + tSTRi killed renamable $r4, killed renamable $r6, 3, 14, $noreg :: (store (s32) into %ir.scevgep1) t2LoopEnd killed renamable $lr, %bb.4, implicit-def dead $cpsr tB %bb.5, 14, $noreg @@ -434,20 +434,20 @@ body: | successors: %bb.8(0x30000000), %bb.6(0x50000000) liveins: $r0, $r1, $r2, $r8 - renamable $r5 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $r5 = tLDRspi $sp, 0, 14, $noreg :: (load (s32) from %stack.0) tCBZ $r5, %bb.8 bb.6.bb16: successors: %bb.8(0x40000000), %bb.7(0x40000000) liveins: $r0, $r1, $r2, $r5, $r8 - renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp17) + renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp17) tCMPi8 renamable $r5, 1, 14, $noreg, implicit-def $cpsr - renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp19) + renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp19) renamable $lr = t2EORrr killed renamable $lr, killed renamable $r3, 14, $noreg, $noreg - renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp22) + renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp22) renamable $r3 = nsw tADDhirr killed renamable $r3, killed renamable $lr, 14, $noreg - t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14, $noreg :: (store 4 into %ir.tmp22) + t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14, $noreg :: (store (s32) into %ir.tmp22) tBcc %bb.8, 0, killed $cpsr bb.7.bb57: @@ -456,12 +456,12 @@ body: | renamable $r3 = nuw t2ADDri renamable $r8, 1, 14, $noreg, $noreg tCMPi8 killed renamable $r5, 2, 14, $noreg, implicit-def $cpsr - renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp58) - renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp60) + renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp58) + renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp60) renamable $r7 = t2EORrr killed renamable $r7, killed renamable $r6, 14, $noreg, $noreg - renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp63) + renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp63) renamable $r7 = nsw tADDhirr killed renamable $r7, killed renamable $r6, 14, $noreg - t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store 4 into %ir.tmp63) + t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store (s32) into %ir.tmp63) tBcc %bb.9, 1, killed $cpsr bb.8.bb27: @@ -472,12 +472,12 @@ body: | liveins: $r0, $r1, $r2, $r8 renamable $r3 = nuw t2ADDri killed renamable $r8, 2, 14, $noreg, $noreg - renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp69) - renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp71) + renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp69) + renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp71) renamable $r1, dead $cpsr = tEOR killed renamable $r1, killed renamable $r2, 14, $noreg - renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp74) + renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp74) renamable $r1 = nsw tADDhirr killed renamable $r1, killed renamable $r2, 14, $noreg - t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store 4 into %ir.tmp74) + t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store (s32) into %ir.tmp74) $sp = tADDspi $sp, 1, 14, $noreg $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir index 1f66911983d22..214eb488a7677 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir @@ -245,7 +245,7 @@ body: | ; CHECK: renamable $r4, dead $cpsr = tSUBi3 renamable $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r7 = t2ANDri renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: tCMPi8 killed renamable $r4, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: tSTRspi killed renamable $r7, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: tSTRspi killed renamable $r7, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: tBcc %bb.4, 2 /* CC::hs */, killed $cpsr ; CHECK: bb.2: ; CHECK: successors: %bb.6(0x80000000) @@ -271,51 +271,51 @@ body: | ; CHECK: bb.5.bb28: ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r8 - ; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep617) + ; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep617) ; CHECK: renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep418) + ; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep418) ; CHECK: dead $r12 = tMOVr $lr, 14 /* CC::al */, $noreg ; CHECK: renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep219) + ; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep219) ; CHECK: renamable $r5 = nsw tADDhirr killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: tSTRr killed renamable $r5, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep219) + ; CHECK: tSTRr killed renamable $r5, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep219) ; CHECK: renamable $r5, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r4 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep11) - ; CHECK: renamable $r6 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep14) + ; CHECK: renamable $r4 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep11) + ; CHECK: renamable $r6 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep14) ; CHECK: renamable $r9 = t2EORrr killed renamable $r4, killed renamable $r6, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r6, dead $cpsr = tADDrr renamable $r0, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: $r11 = t2ADDri $r6, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg - ; CHECK: t2LDMIA killed $r11, 14 /* CC::al */, $noreg, def $r4, def $r10, def $r11 :: (load 4 from %ir.scevgep9), (load 4 from %ir.scevgep8), (load 4 from %ir.scevgep1) + ; CHECK: t2LDMIA killed $r11, 14 /* CC::al */, $noreg, def $r4, def $r10, def $r11 :: (load (s32) from %ir.scevgep9), (load (s32) from %ir.scevgep8), (load (s32) from %ir.scevgep1) ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r9, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, renamable $r6, 1, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep9) - ; CHECK: renamable $r9 = t2LDRi12 renamable $r5, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep12) - ; CHECK: renamable $r4 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep10) + ; CHECK: tSTRi killed renamable $r4, renamable $r6, 1, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep9) + ; CHECK: renamable $r9 = t2LDRi12 renamable $r5, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep12) + ; CHECK: renamable $r4 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep10) ; CHECK: renamable $r4 = t2EORrr killed renamable $r4, killed renamable $r9, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r10, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, renamable $r6, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep8) - ; CHECK: renamable $r4 = tLDRi killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep5) - ; CHECK: renamable $r5 = tLDRi killed renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3) + ; CHECK: tSTRi killed renamable $r4, renamable $r6, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep8) + ; CHECK: renamable $r4 = tLDRi killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep5) + ; CHECK: renamable $r5 = tLDRi killed renamable $r7, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) ; CHECK: renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep1) + ; CHECK: tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep1) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.5 ; CHECK: bb.6.bb13: ; CHECK: successors: %bb.12(0x30000000), %bb.7(0x50000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r8 - ; CHECK: renamable $r5 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $r5 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: tCBZ $r5, %bb.12 ; CHECK: bb.7.bb16: ; CHECK: successors: %bb.8(0x40000000), %bb.9(0x40000000) ; CHECK: liveins: $r0, $r1, $r2, $r5, $r8 - ; CHECK: renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp17) + ; CHECK: renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp17) ; CHECK: tCMPi8 renamable $r5, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp19) + ; CHECK: renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp19) ; CHECK: renamable $lr = t2EORrr killed renamable $lr, killed renamable $r3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp22) + ; CHECK: renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp22) ; CHECK: renamable $r3 = nsw tADDhirr killed renamable $r3, killed renamable $lr, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp22) + ; CHECK: t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp22) ; CHECK: tBcc %bb.9, 1 /* CC::ne */, killed $cpsr ; CHECK: bb.8: ; CHECK: successors: %bb.12(0x80000000) @@ -326,12 +326,12 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r5, $r8 ; CHECK: renamable $r3 = nuw t2ADDri renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: tCMPi8 killed renamable $r5, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp58) - ; CHECK: renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp60) + ; CHECK: renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp58) + ; CHECK: renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp60) ; CHECK: renamable $r7 = t2EORrr killed renamable $r7, killed renamable $r6, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp63) + ; CHECK: renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp63) ; CHECK: renamable $r7 = nsw tADDhirr killed renamable $r7, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp63) + ; CHECK: t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp63) ; CHECK: tBcc %bb.11, 1 /* CC::ne */, killed $cpsr ; CHECK: bb.10: ; CHECK: successors: %bb.12(0x80000000) @@ -342,12 +342,12 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r8 ; CHECK: renamable $r3 = nuw t2ADDri killed renamable $r8, 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp69) - ; CHECK: renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp71) + ; CHECK: renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp69) + ; CHECK: renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp71) ; CHECK: renamable $r1, dead $cpsr = tEOR killed renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp74) + ; CHECK: renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp74) ; CHECK: renamable $r1 = nsw tADDhirr killed renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp74) + ; CHECK: t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp74) ; CHECK: bb.12.bb27: ; CHECK: liveins: $lr ; CHECK: $r0 = tMOVr killed $lr, 14 /* CC::al */, $noreg @@ -379,7 +379,7 @@ body: | renamable $r4, dead $cpsr = tSUBi3 renamable $r3, 1, 14, $noreg renamable $r7 = t2ANDri renamable $r3, 3, 14, $noreg, $noreg tCMPi8 killed renamable $r4, 3, 14, $noreg, implicit-def $cpsr - tSTRspi killed renamable $r7, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + tSTRspi killed renamable $r7, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) tBcc %bb.4, 2, killed $cpsr bb.2: @@ -413,36 +413,36 @@ body: | successors: %bb.5(0x7c000000), %bb.6(0x04000000) liveins: $r0, $r1, $r2, $r3, $r8, $lr - renamable $r5 = tLDRr renamable $r1, $r3, 14, $noreg :: (load 4 from %ir.scevgep617) + renamable $r5 = tLDRr renamable $r1, $r3, 14, $noreg :: (load (s32) from %ir.scevgep617) renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14, $noreg - renamable $r6 = tLDRr renamable $r2, $r3, 14, $noreg :: (load 4 from %ir.scevgep418) + renamable $r6 = tLDRr renamable $r2, $r3, 14, $noreg :: (load (s32) from %ir.scevgep418) $r12 = tMOVr $lr, 14, $noreg renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14, $noreg, $noreg renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14, $noreg - renamable $r6 = tLDRr renamable $r0, $r3, 14, $noreg :: (load 4 from %ir.scevgep219) + renamable $r6 = tLDRr renamable $r0, $r3, 14, $noreg :: (load (s32) from %ir.scevgep219) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r5 = nsw tADDhirr killed renamable $r5, killed renamable $r6, 14, $noreg - tSTRr killed renamable $r5, renamable $r0, $r3, 14, $noreg :: (store 4 into %ir.scevgep219) + tSTRr killed renamable $r5, renamable $r0, $r3, 14, $noreg :: (store (s32) into %ir.scevgep219) renamable $r5, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg - renamable $r4 = tLDRi renamable $r7, 1, 14, $noreg :: (load 4 from %ir.scevgep11) - renamable $r6 = tLDRi renamable $r5, 1, 14, $noreg :: (load 4 from %ir.scevgep14) + renamable $r4 = tLDRi renamable $r7, 1, 14, $noreg :: (load (s32) from %ir.scevgep11) + renamable $r6 = tLDRi renamable $r5, 1, 14, $noreg :: (load (s32) from %ir.scevgep14) renamable $r9 = t2EORrr killed renamable $r4, killed renamable $r6, 14, $noreg, $noreg renamable $r6, dead $cpsr = tADDrr renamable $r0, renamable $r3, 14, $noreg $r11 = t2ADDri $r6, 4, 14, $noreg, $noreg renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 16, 14, $noreg - t2LDMIA killed $r11, 14, $noreg, def $r4, def $r10, def $r11 :: (load 4 from %ir.scevgep9), (load 4 from %ir.scevgep8), (load 4 from %ir.scevgep1) + t2LDMIA killed $r11, 14, $noreg, def $r4, def $r10, def $r11 :: (load (s32) from %ir.scevgep9), (load (s32) from %ir.scevgep8), (load (s32) from %ir.scevgep1) renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r9, 14, $noreg - tSTRi killed renamable $r4, renamable $r6, 1, 14, $noreg :: (store 4 into %ir.scevgep9) - renamable $r9 = t2LDRi12 renamable $r5, 8, 14, $noreg :: (load 4 from %ir.scevgep12) - renamable $r4 = tLDRi renamable $r7, 2, 14, $noreg :: (load 4 from %ir.scevgep10) + tSTRi killed renamable $r4, renamable $r6, 1, 14, $noreg :: (store (s32) into %ir.scevgep9) + renamable $r9 = t2LDRi12 renamable $r5, 8, 14, $noreg :: (load (s32) from %ir.scevgep12) + renamable $r4 = tLDRi renamable $r7, 2, 14, $noreg :: (load (s32) from %ir.scevgep10) renamable $r4 = t2EORrr killed renamable $r4, killed renamable $r9, 14, $noreg, $noreg renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r10, 14, $noreg - tSTRi killed renamable $r4, renamable $r6, 2, 14, $noreg :: (store 4 into %ir.scevgep8) - renamable $r4 = tLDRi killed renamable $r5, 3, 14, $noreg :: (load 4 from %ir.scevgep5) - renamable $r5 = tLDRi killed renamable $r7, 3, 14, $noreg :: (load 4 from %ir.scevgep3) + tSTRi killed renamable $r4, renamable $r6, 2, 14, $noreg :: (store (s32) into %ir.scevgep8) + renamable $r4 = tLDRi killed renamable $r5, 3, 14, $noreg :: (load (s32) from %ir.scevgep5) + renamable $r5 = tLDRi killed renamable $r7, 3, 14, $noreg :: (load (s32) from %ir.scevgep3) renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14, $noreg renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14, $noreg - tSTRi killed renamable $r4, killed renamable $r6, 3, 14, $noreg :: (store 4 into %ir.scevgep1) + tSTRi killed renamable $r4, killed renamable $r6, 3, 14, $noreg :: (store (s32) into %ir.scevgep1) t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr tB %bb.6, 14, $noreg @@ -450,20 +450,20 @@ body: | successors: %bb.12(0x30000000), %bb.7(0x50000000) liveins: $lr, $r0, $r1, $r2, $r8 - renamable $r5 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $r5 = tLDRspi $sp, 0, 14, $noreg :: (load (s32) from %stack.0) tCBZ $r5, %bb.12 bb.7.bb16: successors: %bb.8(0x40000000), %bb.9(0x40000000) liveins: $r0, $r1, $r2, $r5, $r8 - renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp17) + renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp17) tCMPi8 renamable $r5, 1, 14, $noreg, implicit-def $cpsr - renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp19) + renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp19) renamable $lr = t2EORrr killed renamable $lr, killed renamable $r3, 14, $noreg, $noreg - renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp22) + renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp22) renamable $r3 = nsw tADDhirr killed renamable $r3, killed renamable $lr, 14, $noreg - t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14, $noreg :: (store 4 into %ir.tmp22) + t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14, $noreg :: (store (s32) into %ir.tmp22) tBcc %bb.9, 1, killed $cpsr bb.8: @@ -478,12 +478,12 @@ body: | renamable $r3 = nuw t2ADDri renamable $r8, 1, 14, $noreg, $noreg tCMPi8 killed renamable $r5, 2, 14, $noreg, implicit-def $cpsr - renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp58) - renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp60) + renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp58) + renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp60) renamable $r7 = t2EORrr killed renamable $r7, killed renamable $r6, 14, $noreg, $noreg - renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp63) + renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp63) renamable $r7 = nsw tADDhirr killed renamable $r7, killed renamable $r6, 14, $noreg - t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store 4 into %ir.tmp63) + t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store (s32) into %ir.tmp63) tBcc %bb.11, 1, killed $cpsr bb.10: @@ -498,12 +498,12 @@ body: | renamable $r3 = nuw t2ADDri killed renamable $r8, 2, 14, $noreg, $noreg renamable $lr = t2MOVi 3, 14, $noreg, $noreg - renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp69) - renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp71) + renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp69) + renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp71) renamable $r1, dead $cpsr = tEOR killed renamable $r1, killed renamable $r2, 14, $noreg - renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp74) + renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp74) renamable $r1 = nsw tADDhirr killed renamable $r1, killed renamable $r2, 14, $noreg - t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store 4 into %ir.tmp74) + t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store (s32) into %ir.tmp74) bb.12.bb27: liveins: $lr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir index 06015ba2d69c3..8a1a3ed66c793 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir @@ -245,7 +245,7 @@ body: | ; CHECK: renamable $r4, dead $cpsr = tSUBi3 renamable $r3, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r7 = t2ANDri renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: tCMPi8 killed renamable $r4, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: tSTRspi killed renamable $r7, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: tSTRspi killed renamable $r7, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: tBcc %bb.4, 2 /* CC::hs */, killed $cpsr ; CHECK: bb.2: ; CHECK: successors: %bb.6(0x80000000) @@ -271,50 +271,50 @@ body: | ; CHECK: bb.5.bb28: ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r8 - ; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep617) + ; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep617) ; CHECK: renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep418) + ; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep418) ; CHECK: renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep219) + ; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep219) ; CHECK: renamable $r5 = nsw tADDhirr killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: tSTRr killed renamable $r5, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep219) + ; CHECK: tSTRr killed renamable $r5, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep219) ; CHECK: renamable $r5, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r4 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep11) - ; CHECK: renamable $r6 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep14) + ; CHECK: renamable $r4 = tLDRi renamable $r7, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep11) + ; CHECK: renamable $r6 = tLDRi renamable $r5, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep14) ; CHECK: renamable $r9 = t2EORrr killed renamable $r4, killed renamable $r6, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r6, dead $cpsr = tADDrr renamable $r0, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: $r11 = t2ADDri $r6, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg - ; CHECK: t2LDMIA killed $r11, 14 /* CC::al */, $noreg, def $r4, def $r10, def $r11 :: (load 4 from %ir.scevgep9), (load 4 from %ir.scevgep8), (load 4 from %ir.scevgep1) + ; CHECK: t2LDMIA killed $r11, 14 /* CC::al */, $noreg, def $r4, def $r10, def $r11 :: (load (s32) from %ir.scevgep9), (load (s32) from %ir.scevgep8), (load (s32) from %ir.scevgep1) ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r9, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, renamable $r6, 1, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep9) - ; CHECK: renamable $r9 = t2LDRi12 renamable $r5, 8, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep12) - ; CHECK: renamable $r4 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep10) + ; CHECK: tSTRi killed renamable $r4, renamable $r6, 1, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep9) + ; CHECK: renamable $r9 = t2LDRi12 renamable $r5, 8, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep12) + ; CHECK: renamable $r4 = tLDRi renamable $r7, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep10) ; CHECK: renamable $r4 = t2EORrr killed renamable $r4, killed renamable $r9, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r10, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, renamable $r6, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep8) - ; CHECK: renamable $r4 = tLDRi killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep5) - ; CHECK: renamable $r5 = tLDRi killed renamable $r7, 3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3) + ; CHECK: tSTRi killed renamable $r4, renamable $r6, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep8) + ; CHECK: renamable $r4 = tLDRi killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep5) + ; CHECK: renamable $r5 = tLDRi killed renamable $r7, 3, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) ; CHECK: renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep1) + ; CHECK: tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep1) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.5 ; CHECK: bb.6.bb13: ; CHECK: successors: %bb.12(0x30000000), %bb.7(0x50000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r8 - ; CHECK: renamable $r5 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $r5 = tLDRspi $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: tCBZ $r5, %bb.12 ; CHECK: bb.7.bb16: ; CHECK: successors: %bb.8(0x40000000), %bb.9(0x40000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r5, $r8 - ; CHECK: renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp17) + ; CHECK: renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp17) ; CHECK: tCMPi8 renamable $r5, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp19) + ; CHECK: renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp19) ; CHECK: renamable $lr = t2EORrr killed renamable $lr, killed renamable $r3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp22) + ; CHECK: renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp22) ; CHECK: renamable $r3 = nsw tADDhirr killed renamable $r3, killed renamable $lr, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp22) + ; CHECK: t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp22) ; CHECK: tBcc %bb.9, 1 /* CC::ne */, killed $cpsr ; CHECK: bb.8: ; CHECK: successors: %bb.12(0x80000000) @@ -325,12 +325,12 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r5, $r8 ; CHECK: renamable $r3 = nuw t2ADDri renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: tCMPi8 killed renamable $r5, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp58) - ; CHECK: renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp60) + ; CHECK: renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp58) + ; CHECK: renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp60) ; CHECK: renamable $r7 = t2EORrr killed renamable $r7, killed renamable $r6, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp63) + ; CHECK: renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp63) ; CHECK: renamable $r7 = nsw tADDhirr killed renamable $r7, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp63) + ; CHECK: t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp63) ; CHECK: tBcc %bb.11, 1 /* CC::ne */, killed $cpsr ; CHECK: bb.10: ; CHECK: successors: %bb.12(0x80000000) @@ -341,12 +341,12 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r8 ; CHECK: renamable $r3 = nuw t2ADDri killed renamable $r8, 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $lr = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp69) - ; CHECK: renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp71) + ; CHECK: renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp69) + ; CHECK: renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp71) ; CHECK: renamable $r1, dead $cpsr = tEOR killed renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.tmp74) + ; CHECK: renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.tmp74) ; CHECK: renamable $r1 = nsw tADDhirr killed renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.tmp74) + ; CHECK: t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.tmp74) ; CHECK: bb.12.bb27: ; CHECK: liveins: $lr ; CHECK: $r0 = tMOVr killed $lr, 14 /* CC::al */, $noreg @@ -378,7 +378,7 @@ body: | renamable $r4, dead $cpsr = tSUBi3 renamable $r3, 1, 14, $noreg renamable $r7 = t2ANDri renamable $r3, 3, 14, $noreg, $noreg tCMPi8 killed renamable $r4, 3, 14, $noreg, implicit-def $cpsr - tSTRspi killed renamable $r7, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + tSTRspi killed renamable $r7, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) tBcc %bb.4, 2, killed $cpsr bb.2: @@ -412,35 +412,35 @@ body: | successors: %bb.5(0x7c000000), %bb.6(0x04000000) liveins: $r0, $r1, $r2, $r3, $r8, $lr - renamable $r5 = tLDRr renamable $r1, $r3, 14, $noreg :: (load 4 from %ir.scevgep617) + renamable $r5 = tLDRr renamable $r1, $r3, 14, $noreg :: (load (s32) from %ir.scevgep617) renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14, $noreg - renamable $r6 = tLDRr renamable $r2, $r3, 14, $noreg :: (load 4 from %ir.scevgep418) + renamable $r6 = tLDRr renamable $r2, $r3, 14, $noreg :: (load (s32) from %ir.scevgep418) renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14, $noreg, $noreg renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14, $noreg - renamable $r6 = tLDRr renamable $r0, $r3, 14, $noreg :: (load 4 from %ir.scevgep219) + renamable $r6 = tLDRr renamable $r0, $r3, 14, $noreg :: (load (s32) from %ir.scevgep219) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r5 = nsw tADDhirr killed renamable $r5, killed renamable $r6, 14, $noreg - tSTRr killed renamable $r5, renamable $r0, $r3, 14, $noreg :: (store 4 into %ir.scevgep219) + tSTRr killed renamable $r5, renamable $r0, $r3, 14, $noreg :: (store (s32) into %ir.scevgep219) renamable $r5, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg - renamable $r4 = tLDRi renamable $r7, 1, 14, $noreg :: (load 4 from %ir.scevgep11) - renamable $r6 = tLDRi renamable $r5, 1, 14, $noreg :: (load 4 from %ir.scevgep14) + renamable $r4 = tLDRi renamable $r7, 1, 14, $noreg :: (load (s32) from %ir.scevgep11) + renamable $r6 = tLDRi renamable $r5, 1, 14, $noreg :: (load (s32) from %ir.scevgep14) renamable $r9 = t2EORrr killed renamable $r4, killed renamable $r6, 14, $noreg, $noreg renamable $r6, dead $cpsr = tADDrr renamable $r0, renamable $r3, 14, $noreg $r11 = t2ADDri $r6, 4, 14, $noreg, $noreg renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 16, 14, $noreg - t2LDMIA killed $r11, 14, $noreg, def $r4, def $r10, def $r11 :: (load 4 from %ir.scevgep9), (load 4 from %ir.scevgep8), (load 4 from %ir.scevgep1) + t2LDMIA killed $r11, 14, $noreg, def $r4, def $r10, def $r11 :: (load (s32) from %ir.scevgep9), (load (s32) from %ir.scevgep8), (load (s32) from %ir.scevgep1) renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r9, 14, $noreg - tSTRi killed renamable $r4, renamable $r6, 1, 14, $noreg :: (store 4 into %ir.scevgep9) - renamable $r9 = t2LDRi12 renamable $r5, 8, 14, $noreg :: (load 4 from %ir.scevgep12) - renamable $r4 = tLDRi renamable $r7, 2, 14, $noreg :: (load 4 from %ir.scevgep10) + tSTRi killed renamable $r4, renamable $r6, 1, 14, $noreg :: (store (s32) into %ir.scevgep9) + renamable $r9 = t2LDRi12 renamable $r5, 8, 14, $noreg :: (load (s32) from %ir.scevgep12) + renamable $r4 = tLDRi renamable $r7, 2, 14, $noreg :: (load (s32) from %ir.scevgep10) renamable $r4 = t2EORrr killed renamable $r4, killed renamable $r9, 14, $noreg, $noreg renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r10, 14, $noreg - tSTRi killed renamable $r4, renamable $r6, 2, 14, $noreg :: (store 4 into %ir.scevgep8) - renamable $r4 = tLDRi killed renamable $r5, 3, 14, $noreg :: (load 4 from %ir.scevgep5) - renamable $r5 = tLDRi killed renamable $r7, 3, 14, $noreg :: (load 4 from %ir.scevgep3) + tSTRi killed renamable $r4, renamable $r6, 2, 14, $noreg :: (store (s32) into %ir.scevgep8) + renamable $r4 = tLDRi killed renamable $r5, 3, 14, $noreg :: (load (s32) from %ir.scevgep5) + renamable $r5 = tLDRi killed renamable $r7, 3, 14, $noreg :: (load (s32) from %ir.scevgep3) renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14, $noreg renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14, $noreg - tSTRi killed renamable $r4, killed renamable $r6, 3, 14, $noreg :: (store 4 into %ir.scevgep1) + tSTRi killed renamable $r4, killed renamable $r6, 3, 14, $noreg :: (store (s32) into %ir.scevgep1) t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr tB %bb.6, 14, $noreg @@ -448,20 +448,20 @@ body: | successors: %bb.12(0x30000000), %bb.7(0x50000000) liveins: $lr, $r0, $r1, $r2, $r8 - renamable $r5 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $r5 = tLDRspi $sp, 0, 14, $noreg :: (load (s32) from %stack.0) tCBZ $r5, %bb.12 bb.7.bb16: successors: %bb.8(0x40000000), %bb.9(0x40000000) liveins: $lr, $r0, $r1, $r2, $r5, $r8 - renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp17) + renamable $lr = t2LDRs renamable $r1, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp17) tCMPi8 renamable $r5, 1, 14, $noreg, implicit-def $cpsr - renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp19) + renamable $r3 = t2LDRs renamable $r2, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp19) renamable $lr = t2EORrr killed renamable $lr, killed renamable $r3, 14, $noreg, $noreg - renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14, $noreg :: (load 4 from %ir.tmp22) + renamable $r3 = t2LDRs renamable $r0, renamable $r8, 2, 14, $noreg :: (load (s32) from %ir.tmp22) renamable $r3 = nsw tADDhirr killed renamable $r3, killed renamable $lr, 14, $noreg - t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14, $noreg :: (store 4 into %ir.tmp22) + t2STRs killed renamable $r3, renamable $r0, renamable $r8, 2, 14, $noreg :: (store (s32) into %ir.tmp22) tBcc %bb.9, 1, killed $cpsr bb.8: @@ -476,12 +476,12 @@ body: | renamable $r3 = nuw t2ADDri renamable $r8, 1, 14, $noreg, $noreg tCMPi8 killed renamable $r5, 2, 14, $noreg, implicit-def $cpsr - renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp58) - renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp60) + renamable $r7 = t2LDRs renamable $r1, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp58) + renamable $r6 = t2LDRs renamable $r2, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp60) renamable $r7 = t2EORrr killed renamable $r7, killed renamable $r6, 14, $noreg, $noreg - renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp63) + renamable $r6 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp63) renamable $r7 = nsw tADDhirr killed renamable $r7, killed renamable $r6, 14, $noreg - t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store 4 into %ir.tmp63) + t2STRs killed renamable $r7, renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store (s32) into %ir.tmp63) tBcc %bb.11, 1, killed $cpsr bb.10: @@ -496,12 +496,12 @@ body: | renamable $r3 = nuw t2ADDri killed renamable $r8, 2, 14, $noreg, $noreg renamable $lr = t2MOVi 3, 14, $noreg, $noreg - renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp69) - renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp71) + renamable $r1 = t2LDRs killed renamable $r1, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp69) + renamable $r2 = t2LDRs killed renamable $r2, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp71) renamable $r1, dead $cpsr = tEOR killed renamable $r1, killed renamable $r2, 14, $noreg - renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load 4 from %ir.tmp74) + renamable $r2 = t2LDRs renamable $r0, renamable $r3, 2, 14, $noreg :: (load (s32) from %ir.tmp74) renamable $r1 = nsw tADDhirr killed renamable $r1, killed renamable $r2, 14, $noreg - t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store 4 into %ir.tmp74) + t2STRs killed renamable $r1, killed renamable $r0, killed renamable $r3, 2, 14, $noreg :: (store (s32) into %ir.tmp74) bb.12.bb27: liveins: $lr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir index 0a051a0a3bfdd..33ea6fd9d4686 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir @@ -158,7 +158,7 @@ body: | ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1 ; CHECK: bb.2.vector.body: @@ -167,8 +167,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv12, align 4) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1315, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv12, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1315, align 4) ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: @@ -203,7 +203,7 @@ body: | renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg - renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) $lr = t2DoLoopStart renamable $lr renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1 @@ -216,8 +216,8 @@ body: | renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 MVE_VPST 2, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv12, align 4) - renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1315, align 4) + renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv12, align 4) + renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1315, align 4) renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -290,7 +290,7 @@ body: | ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg ; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1 @@ -300,8 +300,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: @@ -338,7 +338,7 @@ body: | renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $lr = t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg - renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) $lr = t2DoLoopStart renamable $lr renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg @@ -352,8 +352,8 @@ body: | renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 MVE_VPST 2, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir index 94bcca05a3ad7..3a926e7d556c6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir @@ -129,10 +129,10 @@ body: | ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: dead renamable $r3 = SPACE 4096, undef renamable $r0 - ; CHECK: renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3) - ; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) + ; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) ; CHECK: renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep11) + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep11) ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: t2Bcc %bb.2, 1 /* CC::ne */, killed $cpsr ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg @@ -165,10 +165,10 @@ body: | liveins: $lr, $r0, $r1, $r2 dead renamable $r3 = SPACE 4096, undef renamable $r0 - renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep3) - renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.scevgep3) + renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load (s32) from %ir.scevgep7) renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg - early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep11) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store (s32) into %ir.scevgep11) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir index df150cb321123..55223268e6250 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir @@ -246,7 +246,7 @@ body: | ; CHECK: liveins: $r1, $r5, $r8, $r9, $r10, $r12 ; CHECK: DBG_VALUE $r1, $noreg, !30, !DIExpression(), debug-location !32 ; CHECK: DBG_VALUE 0, $noreg, !31, !DIExpression(), debug-location !32 - ; CHECK: renamable $r2 = t2LDRs renamable $r9, renamable $r1, 2, 14 /* CC::al */, $noreg, debug-location !41 :: (load 4 from %ir.arrayidx7.us) + ; CHECK: renamable $r2 = t2LDRs renamable $r9, renamable $r1, 2, 14 /* CC::al */, $noreg, debug-location !41 :: (load (s32) from %ir.arrayidx7.us) ; CHECK: $r3 = tMOVr $r5, 14 /* CC::al */, $noreg, debug-location !32 ; CHECK: $r0 = tMOVr $r8, 14 /* CC::al */, $noreg, debug-location !32 ; CHECK: dead $lr = tMOVr $r10, 14 /* CC::al */, $noreg, debug-location !32 @@ -255,15 +255,15 @@ body: | ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r5, $r8, $r9, $r10, $r12 ; CHECK: DBG_VALUE $noreg, $noreg, !31, !DIExpression(), debug-location !32 - ; CHECK: renamable $r6, renamable $r3 = t2LDRSH_POST killed renamable $r3, 2, 14 /* CC::al */, $noreg, debug-location !43 :: (load 2 from %ir.lsr.iv5) - ; CHECK: renamable $r4, renamable $r0 = t2LDRSH_POST killed renamable $r0, 2, 14 /* CC::al */, $noreg, debug-location !44 :: (load 2 from %ir.lsr.iv1) + ; CHECK: renamable $r6, renamable $r3 = t2LDRSH_POST killed renamable $r3, 2, 14 /* CC::al */, $noreg, debug-location !43 :: (load (s16) from %ir.lsr.iv5) + ; CHECK: renamable $r4, renamable $r0 = t2LDRSH_POST killed renamable $r0, 2, 14 /* CC::al */, $noreg, debug-location !44 :: (load (s16) from %ir.lsr.iv1) ; CHECK: renamable $r2 = nsw t2SMLABB killed renamable $r4, killed renamable $r6, killed renamable $r2, 14 /* CC::al */, $noreg, debug-location !41 ; CHECK: DBG_VALUE $noreg, $noreg, !31, !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value), debug-location !32 ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3, debug-location !42 ; CHECK: bb.4.for.cond1.for.inc9_crit_edge.us: ; CHECK: successors: %bb.5(0x04000000), %bb.2(0x7c000000) ; CHECK: liveins: $r1, $r2, $r5, $r8, $r9, $r10, $r12 - ; CHECK: t2STRs killed renamable $r2, renamable $r9, renamable $r1, 2, 14 /* CC::al */, $noreg, debug-location !41 :: (store 4 into %ir.8) + ; CHECK: t2STRs killed renamable $r2, renamable $r9, renamable $r1, 2, 14 /* CC::al */, $noreg, debug-location !41 :: (store (s32) into %ir.8) ; CHECK: renamable $r1, dead $cpsr = nuw nsw tADDi8 killed renamable $r1, 1, 14 /* CC::al */, $noreg, debug-location !49 ; CHECK: DBG_VALUE $r1, $noreg, !30, !DIExpression(), debug-location !32 ; CHECK: renamable $r5 = tADDhirr killed renamable $r5, renamable $r12, 14 /* CC::al */, $noreg, debug-location !37 @@ -322,7 +322,7 @@ body: | DBG_VALUE $r1, $noreg, !30, !DIExpression(), debug-location !32 DBG_VALUE 0, $noreg, !31, !DIExpression(), debug-location !32 - renamable $r2 = t2LDRs renamable $r9, renamable $r1, 2, 14, $noreg, debug-location !41 :: (load 4 from %ir.arrayidx7.us) + renamable $r2 = t2LDRs renamable $r9, renamable $r1, 2, 14, $noreg, debug-location !41 :: (load (s32) from %ir.arrayidx7.us) $r3 = tMOVr $r5, 14, $noreg, debug-location !32 $r0 = tMOVr $r8, 14, $noreg, debug-location !32 $lr = tMOVr $r10, 14, $noreg, debug-location !32 @@ -333,9 +333,9 @@ body: | liveins: $lr, $r0, $r1, $r2, $r3, $r5, $r8, $r9, $r10, $r12 DBG_VALUE $noreg, $noreg, !31, !DIExpression(), debug-location !32 - renamable $r6, renamable $r3 = t2LDRSH_POST killed renamable $r3, 2, 14, $noreg, debug-location !47 :: (load 2 from %ir.lsr.iv5) + renamable $r6, renamable $r3 = t2LDRSH_POST killed renamable $r3, 2, 14, $noreg, debug-location !47 :: (load (s16) from %ir.lsr.iv5) renamable $lr = t2LoopDec killed renamable $lr, 1, debug-location !46 - renamable $r4, renamable $r0 = t2LDRSH_POST killed renamable $r0, 2, 14, $noreg, debug-location !50 :: (load 2 from %ir.lsr.iv1) + renamable $r4, renamable $r0 = t2LDRSH_POST killed renamable $r0, 2, 14, $noreg, debug-location !50 :: (load (s16) from %ir.lsr.iv1) renamable $r2 = nsw t2SMLABB killed renamable $r4, killed renamable $r6, killed renamable $r2, 14, $noreg, debug-location !41 DBG_VALUE $noreg, $noreg, !31, !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value), debug-location !32 t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr, debug-location !46 @@ -345,7 +345,7 @@ body: | successors: %bb.5(0x04000000), %bb.2(0x7c000000) liveins: $r1, $r2, $r5, $r8, $r9, $r10, $r12 - t2STRs killed renamable $r2, renamable $r9, renamable $r1, 2, 14, $noreg, debug-location !41 :: (store 4 into %ir.8) + t2STRs killed renamable $r2, renamable $r9, renamable $r1, 2, 14, $noreg, debug-location !41 :: (store (s32) into %ir.8) renamable $r1, dead $cpsr = nuw nsw tADDi8 killed renamable $r1, 1, 14, $noreg, debug-location !55 DBG_VALUE $r1, $noreg, !30, !DIExpression(), debug-location !32 renamable $r5 = tADDhirr killed renamable $r5, renamable $r12, 14, $noreg, debug-location !37 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir index 8bdf3b37b0aab..df691b2a4357d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir @@ -242,7 +242,7 @@ body: | ; CHECK: bb.2.for.body.i: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r1, $r2, $r4, $r5, $r6, $r8, $r10 - ; CHECK: renamable $r3, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.lsr.iv15) + ; CHECK: renamable $r3, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.lsr.iv15) ; CHECK: renamable $r2 = nsw tADDhirr killed renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: renamable $r7 = t2CSINC $zr, $zr, 13, implicit killed $cpsr @@ -279,7 +279,7 @@ body: | ; CHECK: bb.5.for.cond4.preheader.us: ; CHECK: successors: %bb.6(0x80000000) ; CHECK: liveins: $lr, $q0, $r0, $r3, $r4, $r5, $r7, $r8, $r9, $r10, $r12 - ; CHECK: renamable $r1 = t2LDRs renamable $r4, renamable $r7, 2, 14 /* CC::al */, $noreg :: (load 4 from %ir.arrayidx12.us) + ; CHECK: renamable $r1 = t2LDRs renamable $r4, renamable $r7, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.arrayidx12.us) ; CHECK: $q1 = MVE_VORR $q0, $q0, 0, $noreg, undef $q1 ; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VMOV_to_lane_32 killed renamable $q1, killed renamable $r1, 0, 14 /* CC::al */, $noreg @@ -293,8 +293,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: $q2 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q2 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r6, renamable $q1 = MVE_VLDRHS32_post killed renamable $r6, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1012, align 2) - ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv46, align 2) + ; CHECK: renamable $r6, renamable $q1 = MVE_VLDRHS32_post killed renamable $r6, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1012, align 2) + ; CHECK: renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv46, align 2) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q2, 0, $noreg, undef renamable $q1 @@ -307,7 +307,7 @@ body: | ; CHECK: renamable $q1 = MVE_VPSEL killed renamable $q1, killed renamable $q2, 0, killed renamable $vpr ; CHECK: $lr = tMOVr $r10, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = MVE_VADDVu32no_acc killed renamable $q1, 0, $noreg - ; CHECK: t2STRs killed renamable $r2, renamable $r4, renamable $r7, 2, 14 /* CC::al */, $noreg :: (store 4 into %ir.27) + ; CHECK: t2STRs killed renamable $r2, renamable $r4, renamable $r7, 2, 14 /* CC::al */, $noreg :: (store (s32) into %ir.27) ; CHECK: renamable $r7, dead $cpsr = nuw nsw tADDi8 killed renamable $r7, 1, 14 /* CC::al */, $noreg ; CHECK: tCMPhir renamable $r7, $r10, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.5, 1 /* CC::ne */, killed $cpsr @@ -324,7 +324,7 @@ body: | ; CHECK: bb.10.for.body.i57: ; CHECK: successors: %bb.10(0x7c000000), %bb.11(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r4, $r12 - ; CHECK: renamable $r2, renamable $r4 = t2LDR_POST killed renamable $r4, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.lsr.iv1) + ; CHECK: renamable $r2, renamable $r4 = t2LDR_POST killed renamable $r4, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.lsr.iv1) ; CHECK: renamable $r1 = nsw tADDhirr killed renamable $r1, renamable $r2, 14 /* CC::al */, $noreg ; CHECK: tCMPi8 renamable $r1, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: renamable $r3 = t2CSINC $zr, $zr, 13, implicit killed $cpsr @@ -392,7 +392,7 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r1, $r2, $r4, $r5, $r6, $r8, $r10 - renamable $r3, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.lsr.iv15) + renamable $r3, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.lsr.iv15) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r2 = nsw tADDhirr killed renamable $r2, renamable $r3, 14, $noreg tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr @@ -437,7 +437,7 @@ body: | successors: %bb.6(0x80000000) liveins: $lr, $q0, $r0, $r3, $r4, $r5, $r7, $r8, $r9, $r10, $r12 - renamable $r1 = t2LDRs renamable $r4, renamable $r7, 2, 14, $noreg :: (load 4 from %ir.arrayidx12.us) + renamable $r1 = t2LDRs renamable $r4, renamable $r7, 2, 14, $noreg :: (load (s32) from %ir.arrayidx12.us) $q1 = MVE_VORR $q0, $q0, 0, $noreg, undef $q1 $r2 = tMOVr killed $lr, 14, $noreg renamable $q1 = MVE_VMOV_to_lane_32 killed renamable $q1, killed renamable $r1, 0, 14, $noreg @@ -453,8 +453,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg $q2 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q2 MVE_VPST 4, implicit $vpr - renamable $r6, renamable $q1 = MVE_VLDRHS32_post killed renamable $r6, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1012, align 2) - renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv46, align 2) + renamable $r6, renamable $q1 = MVE_VLDRHS32_post killed renamable $r6, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1012, align 2) + renamable $r1, renamable $q3 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv46, align 2) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q3, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 @@ -471,7 +471,7 @@ body: | renamable $q1 = MVE_VPSEL killed renamable $q1, killed renamable $q2, 0, killed renamable $vpr $lr = tMOVr $r10, 14, $noreg renamable $r2 = MVE_VADDVu32no_acc killed renamable $q1, 0, $noreg - t2STRs killed renamable $r2, renamable $r4, renamable $r7, 2, 14, $noreg :: (store 4 into %ir.27) + t2STRs killed renamable $r2, renamable $r4, renamable $r7, 2, 14, $noreg :: (store (s32) into %ir.27) renamable $r7, dead $cpsr = nuw nsw tADDi8 killed renamable $r7, 1, 14, $noreg tCMPhir renamable $r7, $r10, 14, $noreg, implicit-def $cpsr tBcc %bb.5, 1, killed $cpsr @@ -495,7 +495,7 @@ body: | successors: %bb.10(0x7c000000), %bb.11(0x04000000) liveins: $lr, $r0, $r1, $r4, $r12 - renamable $r2, renamable $r4 = t2LDR_POST killed renamable $r4, 4, 14, $noreg :: (load 4 from %ir.lsr.iv1) + renamable $r2, renamable $r4 = t2LDR_POST killed renamable $r4, 4, 14, $noreg :: (load (s32) from %ir.lsr.iv1) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r1 = nsw tADDhirr killed renamable $r1, renamable $r2, 14, $noreg tCMPi8 renamable $r1, 0, 14, $noreg, implicit-def $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir index bdd81cbf12d7e..d3d333de00ca2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir @@ -98,8 +98,8 @@ body: | ; CHECK: bb.1.while.body: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep6) - ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep2) + ; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep6) + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep2) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 ; CHECK: bb.2.while.end: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -121,8 +121,8 @@ body: | successors: %bb.1(0x7c000000), %bb.2(0x04000000) liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) - early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store (s32) into %ir.scevgep2) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr tB %bb.2, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index 22e53267b619c..4504ecc0c14b9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -162,7 +162,7 @@ body: | ; CHECK: bb.1.do.body.i: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12 - ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load (s128) from %ir.pSrc.addr.0.i2, align 4) ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 ; CHECK: bb.2.arm_mean_f32_mve.exit: @@ -184,7 +184,7 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.01, align 4) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.pSrc.addr.01, align 4) ; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2 ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 1, killed renamable $vpr ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 @@ -195,7 +195,7 @@ body: | ; CHECK: $s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg ; CHECK: renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg - ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pResult) + ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pResult) ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: successors: %bb.1(0x80000000) @@ -227,7 +227,7 @@ body: | renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 MVE_VPST 4, implicit $vpr - renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load (s128) from %ir.pSrc.addr.0.i2, align 4) renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, renamable $q0 t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr tB %bb.2, 14 /* CC::al */, $noreg @@ -255,7 +255,7 @@ body: | renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 MVE_VPST 2, implicit $vpr - renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.01, align 4) + renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.pSrc.addr.01, align 4) renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2 renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, renamable $q2, 1, killed renamable $vpr t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr @@ -269,7 +269,7 @@ body: | $s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg - VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pResult) + VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pResult) frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir index 8d247fdad805e..11d71103fec38 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir @@ -119,12 +119,12 @@ body: | ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load (s32) from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load (s32) from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.iv1, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -163,15 +163,15 @@ body: | renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep45, align 1) renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14, $noreg renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg MVE_VPST 8, implicit $vpr - renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep23, align 1) renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir index ef158f399d643..af8412ab2fbd2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir @@ -132,15 +132,15 @@ body: | ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -179,15 +179,15 @@ body: | renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep45, align 1) renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14, $noreg renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg MVE_VPST 8, implicit $vpr - renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep23, align 1) renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir index 4882275446056..df79e5b9045d6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir @@ -132,15 +132,15 @@ body: | ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -179,15 +179,15 @@ body: | renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14, $noreg renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep45, align 1) renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14, $noreg renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg MVE_VPST 8, implicit $vpr - renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load (s32) from %ir.scevgep23, align 1) renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir index 0c0bf0234be37..7af07abb1c6f0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir @@ -135,17 +135,17 @@ body: | ; CHECK: successors: %bb.4(0x50000000), %bb.2(0x30000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: dead renamable $r3 = SPACE 3072, undef renamable $r0 - ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.lsr.iv4) - ; CHECK: renamable $r12 = t2LDRi12 renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.lsr.iv2) + ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.lsr.iv4) + ; CHECK: renamable $r12 = t2LDRi12 renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.lsr.iv2) ; CHECK: tCMPi8 renamable $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: renamable $r4 = nsw t2MUL renamable $r12, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r4, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.lsr.iv1) + ; CHECK: tSTRi killed renamable $r4, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.lsr.iv1) ; CHECK: t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.4.middle.block: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $r3 = t2UDIV killed renamable $r12, killed renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r3, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.lsr.iv1) + ; CHECK: tSTRi killed renamable $r3, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.lsr.iv1) ; CHECK: dead renamable $r3 = SPACE 1024, undef renamable $r0 ; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg ; CHECK: bb.5.for.cond.cleanup: @@ -186,11 +186,11 @@ body: | liveins: $lr, $r0, $r1, $r2 dead renamable $r3 = SPACE 3072, undef renamable $r0 - renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (load 4 from %ir.lsr.iv4) - renamable $r12 = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load 4 from %ir.lsr.iv2) + renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (load (s32) from %ir.lsr.iv4) + renamable $r12 = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load (s32) from %ir.lsr.iv2) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr renamable $r4 = nsw t2MUL renamable $r12, renamable $r3, 14, $noreg - tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1) + tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store (s32) into %ir.lsr.iv1) t2Bcc %bb.2, 0, killed $cpsr bb.4.middle.block: @@ -198,7 +198,7 @@ body: | liveins: $lr, $r0, $r1, $r2, $r3, $r12 renamable $r3 = t2UDIV killed renamable $r12, killed renamable $r3, 14, $noreg - tSTRi killed renamable $r3, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1) + tSTRi killed renamable $r3, renamable $r0, 0, 14, $noreg :: (store (s32) into %ir.lsr.iv1) dead renamable $r3 = SPACE 1024, undef renamable $r0 t2B %bb.2, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir index 17731f72200c3..4864bcfe1d133 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir @@ -372,10 +372,10 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r5, $r6, $r8 - ; CHECK: renamable $r6, renamable $q0 = MVE_VLDRWU32_post killed renamable $r6, 16, 0, $noreg :: (load 16 from %ir.lsr.iv6264, align 4) - ; CHECK: renamable $r5, renamable $q1 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load 16 from %ir.lsr.iv6567, align 4) + ; CHECK: renamable $r6, renamable $q0 = MVE_VLDRWU32_post killed renamable $r6, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv6264, align 4) + ; CHECK: renamable $r5, renamable $q1 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv6567, align 4) ; CHECK: renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv6870, align 4) + ; CHECK: renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.iv6870, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond4.preheader: ; CHECK: successors: %bb.6(0x30000000), %bb.4(0x50000000) @@ -389,12 +389,12 @@ body: | ; CHECK: bb.5.vector.body38: ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r12 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv55, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv5658, align 4) - ; CHECK: renamable $r12, renamable $q2 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.lsr.iv5961, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv55, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv5658, align 4) + ; CHECK: renamable $r12, renamable $q2 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv5961, align 4) ; CHECK: renamable $q0 = MVE_VEOR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 0, killed $noreg :: (store 16 into %ir.lsr.iv5961, align 4) + ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 0, killed $noreg :: (store (s128) into %ir.lsr.iv5961, align 4) ; CHECK: $r0 = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.5 ; CHECK: bb.6.for.cond.cleanup6: @@ -439,12 +439,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r4, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r6, renamable $q0 = MVE_VLDRWU32_post killed renamable $r6, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv6264, align 4) - renamable $r5, renamable $q1 = MVE_VLDRWU32_post killed renamable $r5, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv6567, align 4) + renamable $r6, renamable $q0 = MVE_VLDRWU32_post killed renamable $r6, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv6264, align 4) + renamable $r5, renamable $q1 = MVE_VLDRWU32_post killed renamable $r5, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv6567, align 4) renamable $r4, dead $cpsr = tSUBi8 killed renamable $r4, 4, 14, $noreg renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv6870, align 4) + renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv6870, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg @@ -470,14 +470,14 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 2, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv55, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv5658, align 4) - renamable $r12, renamable $q2 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv5961, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv55, align 4) + renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv5658, align 4) + renamable $r12, renamable $q2 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv5961, align 4) renamable $q0 = MVE_VEOR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg renamable $q0 = nsw MVE_VADDi32 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv5961, align 4) + MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv5961, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $r12, 14, $noreg t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr @@ -576,10 +576,10 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r5, $r6, $r8 - ; CHECK: renamable $r5, renamable $q0 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load 16 from %ir.lsr.iv6264, align 4) - ; CHECK: renamable $r6, renamable $q1 = MVE_VLDRWU32_post killed renamable $r6, 16, 0, $noreg :: (load 16 from %ir.lsr.iv6567, align 4) + ; CHECK: renamable $r5, renamable $q0 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv6264, align 4) + ; CHECK: renamable $r6, renamable $q1 = MVE_VLDRWU32_post killed renamable $r6, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv6567, align 4) ; CHECK: renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv6870, align 4) + ; CHECK: renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.iv6870, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond4.preheader: ; CHECK: successors: %bb.6(0x30000000), %bb.4(0x50000000) @@ -593,12 +593,12 @@ body: | ; CHECK: bb.5.vector.body38: ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r4 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv55, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv5658, align 4) - ; CHECK: renamable $r4, renamable $q2 = MVE_VLDRWU32_post killed renamable $r4, 16, 0, $noreg :: (load 16 from %ir.lsr.iv5961, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv55, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv5658, align 4) + ; CHECK: renamable $r4, renamable $q2 = MVE_VLDRWU32_post killed renamable $r4, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv5961, align 4) ; CHECK: renamable $q0 = MVE_VEOR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 0, killed $noreg :: (store 16 into %ir.lsr.iv5961, align 4) + ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 0, killed $noreg :: (store (s128) into %ir.lsr.iv5961, align 4) ; CHECK: $r0 = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.5 ; CHECK: bb.6.for.cond.cleanup6: @@ -645,12 +645,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r4, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r5, renamable $q0 = MVE_VLDRWU32_post killed renamable $r5, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv6264, align 4) - renamable $r6, renamable $q1 = MVE_VLDRWU32_post killed renamable $r6, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv6567, align 4) + renamable $r5, renamable $q0 = MVE_VLDRWU32_post killed renamable $r5, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv6264, align 4) + renamable $r6, renamable $q1 = MVE_VLDRWU32_post killed renamable $r6, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv6567, align 4) renamable $r4, dead $cpsr = tSUBi8 killed renamable $r4, 4, 14, $noreg renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv6870, align 4) + renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv6870, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg @@ -678,14 +678,14 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 2, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv55, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv5658, align 4) - renamable $r4, renamable $q2 = MVE_VLDRWU32_post killed renamable $r4, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv5961, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv55, align 4) + renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv5658, align 4) + renamable $r4, renamable $q2 = MVE_VLDRWU32_post killed renamable $r4, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv5961, align 4) renamable $q0 = MVE_VEOR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg renamable $q0 = nsw MVE_VADDi32 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv5961, align 4) + MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv5961, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $r4, 14, $noreg t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr @@ -791,10 +791,10 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r5, $r6, $r8 - ; CHECK: renamable $r6, renamable $q0 = MVE_VLDRWU32_post killed renamable $r6, 16, 0, $noreg :: (load 16 from %ir.lsr.iv117119, align 4) - ; CHECK: renamable $r5, renamable $q1 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load 16 from %ir.lsr.iv120122, align 4) + ; CHECK: renamable $r6, renamable $q0 = MVE_VLDRWU32_post killed renamable $r6, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv117119, align 4) + ; CHECK: renamable $r5, renamable $q1 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv120122, align 4) ; CHECK: renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv123125, align 4) + ; CHECK: renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.iv123125, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond4.preheader: ; CHECK: successors: %bb.6(0x30000000), %bb.4(0x50000000) @@ -814,12 +814,12 @@ body: | ; CHECK: bb.5.vector.body65: ; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r6, $r9, $r10 - ; CHECK: renamable $r4, renamable $q0 = MVE_VLDRWU32_post killed renamable $r4, 16, 0, $noreg :: (load 16 from %ir.lsr.iv108110, align 4) - ; CHECK: renamable $r9, renamable $q1 = MVE_VLDRWU32_post killed renamable $r9, 16, 0, $noreg :: (load 16 from %ir.lsr.iv111113, align 4) - ; CHECK: renamable $r6, renamable $q2 = MVE_VLDRWU32_post killed renamable $r6, 16, 0, $noreg :: (load 16 from %ir.lsr.iv114116, align 4) + ; CHECK: renamable $r4, renamable $q0 = MVE_VLDRWU32_post killed renamable $r4, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv108110, align 4) + ; CHECK: renamable $r9, renamable $q1 = MVE_VLDRWU32_post killed renamable $r9, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv111113, align 4) + ; CHECK: renamable $r6, renamable $q2 = MVE_VLDRWU32_post killed renamable $r6, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv114116, align 4) ; CHECK: renamable $q0 = MVE_VEOR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r10, 0, 0, killed $noreg :: (store 16 into %ir.lsr.iv114116, align 4) + ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r10, 0, 0, killed $noreg :: (store (s128) into %ir.lsr.iv114116, align 4) ; CHECK: $r10 = tMOVr $r6, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.5 ; CHECK: bb.6.for.cond15.preheader: @@ -834,12 +834,12 @@ body: | ; CHECK: bb.8.vector.body84: ; CHECK: successors: %bb.8(0x7c000000), %bb.9(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r5 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv101, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv102104, align 4) - ; CHECK: renamable $r5, renamable $q2 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load 16 from %ir.lsr.iv105107, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv101, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv102104, align 4) + ; CHECK: renamable $r5, renamable $q2 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv105107, align 4) ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = MVE_VSUBi32 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 0, killed $noreg :: (store 16 into %ir.lsr.iv105107, align 4) + ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 0, killed $noreg :: (store (s128) into %ir.lsr.iv105107, align 4) ; CHECK: $r0 = tMOVr $r5, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.8 ; CHECK: bb.9.for.cond.cleanup17: @@ -886,12 +886,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r4, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r6, renamable $q0 = MVE_VLDRWU32_post killed renamable $r6, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv117119, align 4) - renamable $r5, renamable $q1 = MVE_VLDRWU32_post killed renamable $r5, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv120122, align 4) + renamable $r6, renamable $q0 = MVE_VLDRWU32_post killed renamable $r6, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv117119, align 4) + renamable $r5, renamable $q1 = MVE_VLDRWU32_post killed renamable $r5, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv120122, align 4) renamable $r4, dead $cpsr = tSUBi8 killed renamable $r4, 4, 14, $noreg renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv123125, align 4) + renamable $r8 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r8, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv123125, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg @@ -927,14 +927,14 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r5, 0, $noreg MVE_VPST 2, implicit $vpr - renamable $r4, renamable $q0 = MVE_VLDRWU32_post killed renamable $r4, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv108110, align 4) - renamable $r9, renamable $q1 = MVE_VLDRWU32_post killed renamable $r9, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv111113, align 4) - renamable $r6, renamable $q2 = MVE_VLDRWU32_post killed renamable $r6, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv114116, align 4) + renamable $r4, renamable $q0 = MVE_VLDRWU32_post killed renamable $r4, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv108110, align 4) + renamable $r9, renamable $q1 = MVE_VLDRWU32_post killed renamable $r9, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv111113, align 4) + renamable $r6, renamable $q2 = MVE_VLDRWU32_post killed renamable $r6, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv114116, align 4) renamable $q0 = MVE_VEOR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 4, 14, $noreg renamable $q0 = nsw MVE_VADDi32 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - MVE_VSTRWU32 killed renamable $q0, killed renamable $r10, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv114116, align 4) + MVE_VSTRWU32 killed renamable $q0, killed renamable $r10, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv114116, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 $r10 = tMOVr $r6, 14, $noreg t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr @@ -960,14 +960,14 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 2, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv101, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv102104, align 4) - renamable $r5, renamable $q2 = MVE_VLDRWU32_post killed renamable $r5, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv105107, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv101, align 4) + renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv102104, align 4) + renamable $r5, renamable $q2 = MVE_VLDRWU32_post killed renamable $r5, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv105107, align 4) renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14, $noreg renamable $q0 = MVE_VSUBi32 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv105107, align 4) + MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv105107, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $r5, 14, $noreg t2LoopEnd renamable $lr, %bb.8, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir index 57bb85d821edb..b599829a0cfc6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir @@ -111,7 +111,7 @@ body: | ; CHECK-LOB: bb.0.entry: ; CHECK-LOB: successors: %bb.1(0x50000000), %bb.5(0x30000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + ; CHECK-LOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx) ; CHECK-LOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-LOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr ; CHECK-LOB: bb.1.while.cond.preheader: @@ -125,15 +125,15 @@ body: | ; CHECK-LOB: bb.3.land.rhs: ; CHECK-LOB: successors: %bb.4(0x80000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) - ; CHECK-LOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info2) + ; CHECK-LOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx3) ; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-LOB: t2IT 0, 8, implicit-def $itstate ; CHECK-LOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate ; CHECK-LOB: bb.4.while.body: ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next4) ; CHECK-LOB: tCBNZ renamable $r0, %bb.9 ; CHECK-LOB: t2LE %bb.3 ; CHECK-LOB: bb.5.while.cond9.preheader: @@ -143,18 +143,18 @@ body: | ; CHECK-LOB: bb.6.land.rhs11.lr.ph: ; CHECK-LOB: successors: %bb.7(0x80000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + ; CHECK-LOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16143) ; CHECK-LOB: bb.7.land.rhs11: ; CHECK-LOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) - ; CHECK-LOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) + ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info12) + ; CHECK-LOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.data165, align 2) ; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-LOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr ; CHECK-LOB: bb.8.while.body19: ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) + ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next206) ; CHECK-LOB: tCBZ renamable $r0, %bb.9 ; CHECK-LOB: t2LE %bb.7 ; CHECK-LOB: bb.9: @@ -167,7 +167,7 @@ body: | ; CHECK-NOLOB: bb.0.entry: ; CHECK-NOLOB: successors: %bb.1(0x50000000), %bb.5(0x30000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + ; CHECK-NOLOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx) ; CHECK-NOLOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr ; CHECK-NOLOB: bb.1.while.cond.preheader: @@ -181,15 +181,15 @@ body: | ; CHECK-NOLOB: bb.3.land.rhs: ; CHECK-NOLOB: successors: %bb.4(0x80000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) - ; CHECK-NOLOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info2) + ; CHECK-NOLOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx3) ; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: t2IT 0, 8, implicit-def $itstate ; CHECK-NOLOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate ; CHECK-NOLOB: bb.4.while.body: ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next4) ; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.3, 0 /* CC::eq */, killed $cpsr ; CHECK-NOLOB: tB %bb.9, 14 /* CC::al */, $noreg @@ -200,18 +200,18 @@ body: | ; CHECK-NOLOB: bb.6.land.rhs11.lr.ph: ; CHECK-NOLOB: successors: %bb.7(0x80000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + ; CHECK-NOLOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16143) ; CHECK-NOLOB: bb.7.land.rhs11: ; CHECK-NOLOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) - ; CHECK-NOLOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) + ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info12) + ; CHECK-NOLOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.data165, align 2) ; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr ; CHECK-NOLOB: bb.8.while.body19: ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) + ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next206) ; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.7, 1 /* CC::ne */, killed $cpsr ; CHECK-NOLOB: bb.9: @@ -224,7 +224,7 @@ body: | successors: %bb.5(0x50000000), %bb.1(0x30000000) liveins: $r0, $r1 - renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx) t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2Bcc %bb.1, 13 /* CC::le */, killed $cpsr @@ -245,8 +245,8 @@ body: | successors: %bb.8(0x80000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) - renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info2) + renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx3) tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate @@ -255,7 +255,7 @@ body: | successors: %bb.9(0x04000000), %bb.7(0x7c000000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next4) tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2Bcc %bb.7, 0 /* CC::eq */, killed $cpsr t2B %bb.9, 14 /* CC::al */, $noreg @@ -271,14 +271,14 @@ body: | successors: %bb.3(0x80000000) liveins: $r0, $r1 - renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16143) bb.3.land.rhs11: successors: %bb.10(0x04000000), %bb.4(0x7c000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) - renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) + renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info12) + renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.data165, align 2) tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2Bcc %bb.10, 0 /* CC::eq */, killed $cpsr @@ -286,7 +286,7 @@ body: | successors: %bb.9(0x04000000), %bb.3(0x7c000000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) + renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next206) tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2Bcc %bb.3, 1 /* CC::ne */, killed $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir index e2f61b73f4b78..465d080b3af6b 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir @@ -110,7 +110,7 @@ body: | ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x50000000), %bb.6(0x30000000) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + ; CHECK: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx) ; CHECK: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.6, 13 /* CC::le */, killed $cpsr ; CHECK: bb.1.while.cond.preheader: @@ -128,14 +128,14 @@ body: | ; CHECK: bb.3.land.rhs: ; CHECK: successors: %bb.5(0x04000000), %bb.4(0x7c000000) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) - ; CHECK: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info2) + ; CHECK: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx3) ; CHECK: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.5, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.4.while.body: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next4) ; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 4, implicit-def $itstate ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate @@ -155,12 +155,12 @@ body: | ; CHECK: bb.7.land.rhs11.lr.ph: ; CHECK: successors: %bb.8(0x80000000) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + ; CHECK: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16143) ; CHECK: bb.8.land.rhs11: ; CHECK: successors: %bb.9(0x80000000) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) - ; CHECK: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data166, align 2) + ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info12) + ; CHECK: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.data166, align 2) ; CHECK: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 8, implicit-def $itstate ; CHECK: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate @@ -168,7 +168,7 @@ body: | ; CHECK: bb.9.while.body19: ; CHECK: successors: %bb.8(0x80000000) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next205) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next205) ; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2IT 0, 4, implicit-def $itstate ; CHECK: renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate @@ -178,7 +178,7 @@ body: | successors: %bb.2(0x50000000), %bb.1(0x30000000) liveins: $r0, $r1 - renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx) t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2Bcc %bb.1, 13 /* CC::le */, killed $cpsr @@ -202,8 +202,8 @@ body: | successors: %bb.9(0x04000000), %bb.5(0x7c000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) - renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info2) + renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx3) tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr @@ -211,7 +211,7 @@ body: | successors: %bb.4(0x7c000000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next4) tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 4, implicit-def $itstate renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate @@ -237,14 +237,14 @@ body: | successors: %bb.8(0x80000000) liveins: $r0, $r1 - renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16143) bb.8.land.rhs11: successors: %bb.6(0x80000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) - renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data166, align 2) + renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info12) + renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.data166, align 2) tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate @@ -254,7 +254,7 @@ body: | successors: %bb.8(0x7c000000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next205) + renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next205) tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 0, 4, implicit-def $itstate renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit killed $r0, implicit $itstate diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir index a4c977ac0a68b..3d53b0acf8b98 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir @@ -115,7 +115,7 @@ body: | ; CHECK-LOB: bb.0.entry: ; CHECK-LOB: successors: %bb.1(0x50000000), %bb.5(0x30000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + ; CHECK-LOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx) ; CHECK-LOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-LOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr ; CHECK-LOB: bb.1.while.cond.preheader: @@ -129,15 +129,15 @@ body: | ; CHECK-LOB: bb.3.land.rhs: ; CHECK-LOB: successors: %bb.4(0x80000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) - ; CHECK-LOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info2) + ; CHECK-LOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx3) ; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-LOB: t2IT 0, 8, implicit-def $itstate ; CHECK-LOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate ; CHECK-LOB: bb.4.while.body: ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next4) ; CHECK-LOB: tCBZ renamable $r0, %bb.9 ; CHECK-LOB: t2LE %bb.3 ; CHECK-LOB: bb.5.while.cond9.preheader: @@ -147,18 +147,18 @@ body: | ; CHECK-LOB: bb.6.land.rhs11.lr.ph: ; CHECK-LOB: successors: %bb.7(0x80000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + ; CHECK-LOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16143) ; CHECK-LOB: bb.7.land.rhs11: ; CHECK-LOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) - ; CHECK-LOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) + ; CHECK-LOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info12) + ; CHECK-LOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.data165, align 2) ; CHECK-LOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-LOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr ; CHECK-LOB: bb.8.while.body19: ; CHECK-LOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) ; CHECK-LOB: liveins: $r0, $r1 - ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) + ; CHECK-LOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next206) ; CHECK-LOB: tCBZ renamable $r0, %bb.9 ; CHECK-LOB: t2LE %bb.7 ; CHECK-LOB: bb.9: @@ -171,7 +171,7 @@ body: | ; CHECK-NOLOB: bb.0.entry: ; CHECK-NOLOB: successors: %bb.1(0x50000000), %bb.5(0x30000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx) + ; CHECK-NOLOB: renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx) ; CHECK-NOLOB: t2CMPri renamable $r2, -1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.5, 13 /* CC::le */, killed $cpsr ; CHECK-NOLOB: bb.1.while.cond.preheader: @@ -185,15 +185,15 @@ body: | ; CHECK-NOLOB: bb.3.land.rhs: ; CHECK-NOLOB: successors: %bb.4(0x80000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info2) - ; CHECK-NOLOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 2 from %ir.idx3) + ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info2) + ; CHECK-NOLOB: renamable $r2 = tLDRHi killed renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s16) from %ir.idx3) ; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: t2IT 0, 8, implicit-def $itstate ; CHECK-NOLOB: tBX_RET 0 /* CC::eq */, killed $cpsr, implicit $r0, implicit killed $itstate ; CHECK-NOLOB: bb.4.while.body: ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.3(0x7c000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next4) + ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next4) ; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr ; CHECK-NOLOB: tB %bb.9, 14 /* CC::al */, $noreg @@ -204,18 +204,18 @@ body: | ; CHECK-NOLOB: bb.6.land.rhs11.lr.ph: ; CHECK-NOLOB: successors: %bb.7(0x80000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16143) + ; CHECK-NOLOB: renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16143) ; CHECK-NOLOB: bb.7.land.rhs11: ; CHECK-NOLOB: successors: %bb.10(0x04000000), %bb.8(0x7c000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info12) - ; CHECK-NOLOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.data165, align 2) + ; CHECK-NOLOB: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info12) + ; CHECK-NOLOB: renamable $r2 = tLDRBi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.data165, align 2) ; CHECK-NOLOB: tCMPr killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.10, 0 /* CC::eq */, killed $cpsr ; CHECK-NOLOB: bb.8.while.body19: ; CHECK-NOLOB: successors: %bb.9(0x04000000), %bb.7(0x7c000000) ; CHECK-NOLOB: liveins: $r0, $r1 - ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next206) + ; CHECK-NOLOB: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next206) ; CHECK-NOLOB: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NOLOB: tBcc %bb.7, 1 /* CC::ne */, killed $cpsr ; CHECK-NOLOB: bb.9: @@ -228,7 +228,7 @@ body: | successors: %bb.5(0x50000000), %bb.1(0x30000000) liveins: $r0, $r1 - renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load 2 from %ir.idx) + renamable $r2 = t2LDRSHi12 renamable $r1, 2, 14, $noreg :: (load (s16) from %ir.idx) t2CMPri renamable $r2, -1, 14, $noreg, implicit-def $cpsr t2Bcc %bb.1, 13, killed $cpsr @@ -249,8 +249,8 @@ body: | successors: %bb.10(0x80000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info2) - renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load 2 from %ir.idx3) + renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load (s32) from %ir.info2) + renamable $r2 = tLDRHi killed renamable $r2, 1, 14, $noreg :: (load (s16) from %ir.idx3) tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr t2IT 0, 8, implicit-def $itstate tBX_RET 0, killed $cpsr, implicit $r0, implicit killed $itstate @@ -259,7 +259,7 @@ body: | successors: %bb.8(0x04000000), %bb.7(0x7c000000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next4) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.next4) tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.7, 1, killed $cpsr t2B %bb.8, 14, $noreg @@ -275,14 +275,14 @@ body: | successors: %bb.3(0x80000000) liveins: $r0, $r1 - renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load 2 from %ir.data16143) + renamable $r1 = t2LDRSHi12 killed renamable $r1, 0, 14, $noreg :: (load (s16) from %ir.data16143) bb.3.land.rhs11: successors: %bb.9(0x04000000), %bb.4(0x7c000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info12) - renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load 1 from %ir.data165, align 2) + renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load (s32) from %ir.info12) + renamable $r2 = tLDRBi killed renamable $r2, 0, 14, $noreg :: (load (s8) from %ir.data165, align 2) tCMPr killed renamable $r2, renamable $r1, 14, $noreg, implicit-def $cpsr t2Bcc %bb.9, 0, killed $cpsr @@ -290,7 +290,7 @@ body: | successors: %bb.8(0x04000000), %bb.3(0x7c000000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next206) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.next206) tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.3, 1, killed $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir index 950702dd8b439..ef2e85bdefec1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir @@ -120,8 +120,8 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, killed $noreg :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -161,8 +161,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) $lr = tMOVr $r3, 14, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir index 994d1e1e31883..2c363760b0600 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir @@ -137,8 +137,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg ; CHECK: $q1 = MVE_VORR killed $q0, killed $q0, 0, $noreg, undef $q1 ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv2022, align 1) - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, $noreg :: (load 16 from %ir.lsr.iv19, align 1) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv2022, align 1) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, $noreg :: (load (s128) from %ir.lsr.iv19, align 1) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $q2 = MVE_VADDi8 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q0 = MVE_VADDi8 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 @@ -188,8 +188,8 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg $q1 = MVE_VORR killed $q0, $q0, 0, $noreg, undef $q1 MVE_VPST 2, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv2022, align 1) - renamable $r0, renamable $q2 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, $noreg :: (load 16 from %ir.lsr.iv19, align 1) + renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv2022, align 1) + renamable $r0, renamable $q2 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, $noreg :: (load (s128) from %ir.lsr.iv19, align 1) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14, $noreg renamable $q2 = MVE_VADDi8 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 renamable $lr = t2LoopDec killed renamable $lr, 1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir index da500cf7c88b5..06e700ba45b3f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir @@ -124,10 +124,10 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv15, align 1) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv1618, align 1) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv15, align 1) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv1618, align 1) ; CHECK: renamable $q0 = MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, $noreg :: (store 16 into %ir.lsr.iv1921, align 1) + ; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, $noreg :: (store (s128) into %ir.lsr.iv1921, align 1) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -163,11 +163,11 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv15, align 1) - renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1618, align 1) + renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv15, align 1) + renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1618, align 1) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, $noreg :: (store 16 into %ir.lsr.iv1921, align 1) + renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, $noreg :: (store (s128) into %ir.lsr.iv1921, align 1) t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir index d28bc9e7027ca..c136cfa74bb44 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir @@ -182,10 +182,10 @@ body: | ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @d, 14 /* CC::al */, $noreg ; CHECK: renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @d, 14 /* CC::al */, $noreg - ; CHECK: renamable $r2 = tLDRi killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r2 = tLDRi killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14 /* CC::al */, $noreg ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @e) ; CHECK: bb.3.j (align 4): ; CHECK: successors: %bb.4(0x04000000), %bb.3(0x7c000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 @@ -194,7 +194,7 @@ body: | ; CHECK: t2LE %bb.3 ; CHECK: bb.4.if.end: ; CHECK: liveins: $r1, $r3 - ; CHECK: tSTRi killed renamable $r3, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store 4 into @e) + ; CHECK: tSTRi killed renamable $r3, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32) into @e) ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc ; CHECK: bb.5.j.us.us.preheader: @@ -203,66 +203,66 @@ body: | ; CHECK: $lr = t2MOVi16 target-flags(arm-lo16) @a, 14 /* CC::al */, $noreg ; CHECK: $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @d, 14 /* CC::al */, $noreg ; CHECK: $r2 = t2MOVi16 target-flags(arm-lo16) @e, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: $lr = t2MOVTi16 killed $lr, target-flags(arm-hi16) @a, 14 /* CC::al */, $noreg ; CHECK: $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @e, 14 /* CC::al */, $noreg ; CHECK: bb.6.j.us.us (align 4): ; CHECK: successors: %bb.7(0x40000000), %bb.6(0x40000000) ; CHECK: liveins: $lr, $r2, $r3, $r12 ; CHECK: tCMPhir renamable $r3, renamable $lr, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r1 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: renamable $r1 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @e) ; CHECK: renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr ; CHECK: renamable $r0 = t2ANDrr killed renamable $r0, killed renamable $r1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: tSTRi killed renamable $r0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into @e) + ; CHECK: tSTRi killed renamable $r0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into @e) ; CHECK: tCBZ renamable $r3, %bb.7 ; CHECK: t2LE %bb.6 ; CHECK: bb.7.if.end.us.us.us: ; CHECK: successors: %bb.8(0x40000000), %bb.6(0x40000000) ; CHECK: liveins: $lr, $r2, $r12 ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: tCBZ renamable $r3, %bb.8 ; CHECK: t2LE %bb.6 ; CHECK: bb.8.if.end.us.us.us.1: ; CHECK: successors: %bb.9(0x40000000), %bb.6(0x40000000) ; CHECK: liveins: $lr, $r2, $r12 ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: tCBZ renamable $r3, %bb.9 ; CHECK: t2LE %bb.6 ; CHECK: bb.9.if.end.us.us.us.2: ; CHECK: successors: %bb.10(0x40000000), %bb.6(0x40000000) ; CHECK: liveins: $lr, $r2, $r12 ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: tCBZ renamable $r3, %bb.10 ; CHECK: t2LE %bb.6 ; CHECK: bb.10.if.end.us.us.us.3: ; CHECK: successors: %bb.11(0x40000000), %bb.6(0x40000000) ; CHECK: liveins: $lr, $r2, $r12 ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: tCBZ renamable $r3, %bb.11 ; CHECK: t2LE %bb.6 ; CHECK: bb.11.if.end.us.us.us.4: ; CHECK: successors: %bb.12(0x40000000), %bb.6(0x40000000) ; CHECK: liveins: $lr, $r2, $r12 ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: tCBZ renamable $r3, %bb.12 ; CHECK: t2LE %bb.6 ; CHECK: bb.12.if.end.us.us.us.5: ; CHECK: successors: %bb.13(0x40000000), %bb.6(0x40000000) ; CHECK: liveins: $lr, $r2, $r12 ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: tCBZ renamable $r3, %bb.13 ; CHECK: t2LE %bb.6 ; CHECK: bb.13.if.end.us.us.us.6: ; CHECK: successors: %bb.14(0x04000000), %bb.6(0x7c000000) ; CHECK: liveins: $lr, $r2, $r12 ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: tCBZ renamable $r3, %bb.14 ; CHECK: t2LE %bb.6 ; CHECK: bb.14.if.end.us.us.us.7: @@ -274,12 +274,12 @@ body: | ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14 /* CC::al */, $noreg ; CHECK: $r0 = t2MOVTi16 killed $r0, target-flags(arm-hi16) @d, 14 /* CC::al */, $noreg ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14 /* CC::al */, $noreg - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: tCMPr renamable $r0, killed renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14 /* CC::al */, $noreg ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = t2CSINC $zr, $zr, 10, implicit killed $cpsr - ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @e) ; CHECK: bb.16.j.us27 (align 4): ; CHECK: successors: %bb.17(0x04000000), %bb.16(0x7c000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 @@ -288,7 +288,7 @@ body: | ; CHECK: t2LE %bb.16 ; CHECK: bb.17.if.end.us38: ; CHECK: liveins: $r1, $r3 - ; CHECK: tSTRi killed renamable $r3, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store 4 into @e) + ; CHECK: tSTRi killed renamable $r3, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32) into @e) ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc bb.0.entry: @@ -319,10 +319,10 @@ body: | $r1 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @d, 14, $noreg - renamable $r2 = tLDRi killed renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r2 = tLDRi killed renamable $r1, 0, 14, $noreg :: (dereferenceable load (s32) from @d) $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg - renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load (s32) from @e) bb.13.j (align 4): successors: %bb.14(0x04000000), %bb.13(0x7c000000) @@ -335,7 +335,7 @@ body: | bb.14.if.end: liveins: $r1, $r3 - tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store (s32) into @e) INLINEASM &"", 1 tPOP_RET 14, $noreg, def $r7, def $pc @@ -346,7 +346,7 @@ body: | $lr = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @d, 14, $noreg $r2 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg - renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load (s32) from @d) $lr = t2MOVTi16 killed $lr, target-flags(arm-hi16) @a, 14, $noreg $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @e, 14, $noreg @@ -355,11 +355,11 @@ body: | liveins: $lr, $r2, $r3, $r12 tCMPhir renamable $r3, renamable $lr, 14, $noreg, implicit-def $cpsr - renamable $r1 = tLDRi renamable $r2, 0, 14, $noreg :: (dereferenceable load 4 from @e) + renamable $r1 = tLDRi renamable $r2, 0, 14, $noreg :: (dereferenceable load (s32) from @e) renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr renamable $r0 = t2ANDrr killed renamable $r0, killed renamable $r1, 14, $noreg, $noreg - tSTRi killed renamable $r0, renamable $r2, 0, 14, $noreg :: (store 4 into @e) + tSTRi killed renamable $r0, renamable $r2, 0, 14, $noreg :: (store (s32) into @e) t2Bcc %bb.2, 1, killed $cpsr bb.3.if.end.us.us.us: @@ -367,7 +367,7 @@ body: | liveins: $lr, $r2, $r12 INLINEASM &"", 1 - renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load (s32) from @d) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr @@ -376,7 +376,7 @@ body: | liveins: $lr, $r2, $r12 INLINEASM &"", 1 - renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load (s32) from @d) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr @@ -385,7 +385,7 @@ body: | liveins: $lr, $r2, $r12 INLINEASM &"", 1 - renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load (s32) from @d) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr @@ -394,7 +394,7 @@ body: | liveins: $lr, $r2, $r12 INLINEASM &"", 1 - renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load (s32) from @d) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr @@ -403,7 +403,7 @@ body: | liveins: $lr, $r2, $r12 INLINEASM &"", 1 - renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load (s32) from @d) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr @@ -412,7 +412,7 @@ body: | liveins: $lr, $r2, $r12 INLINEASM &"", 1 - renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load (s32) from @d) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr @@ -421,7 +421,7 @@ body: | liveins: $lr, $r2, $r12 INLINEASM &"", 1 - renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load (s32) from @d) tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 1, killed $cpsr @@ -436,12 +436,12 @@ body: | $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg $r0 = t2MOVTi16 killed $r0, target-flags(arm-hi16) @d, 14, $noreg $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (dereferenceable load 4 from @d) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (dereferenceable load (s32) from @d) tCMPr renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg renamable $r2 = t2CSINC $zr, $zr, 10, implicit killed $cpsr - renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load (s32) from @e) bb.16.j.us27 (align 4): successors: %bb.17(0x04000000), %bb.16(0x7c000000) @@ -454,7 +454,7 @@ body: | bb.17.if.end.us38: liveins: $r1, $r3 - tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store (s32) into @e) INLINEASM &"", 1 tPOP_RET 14, $noreg, def $r7, def $pc diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir index ef4c13ea8e888..86634f2cf750c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir @@ -85,12 +85,12 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r2 ; CHECK: renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.4(0x04000000) ; CHECK: liveins: $lr, $q0, $r0 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: renamable $q1 = MVE_VADDi32 renamable $q0, killed renamable $q1, 0, killed $noreg, undef renamable $q1 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: tB %bb.4, 14 /* CC::al */, $noreg @@ -122,7 +122,7 @@ body: | renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg renamable $r1 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r3 $r1 = tMOVr killed $r3, 14 /* CC::al */, $noreg @@ -136,7 +136,7 @@ body: | renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) renamable $q1 = MVE_VADDi32 renamable $q0, killed renamable $q1, 1, killed renamable $vpr, undef renamable $q1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.4, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir index 8d0c21c5b6125..cb3cf0668350c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir @@ -94,8 +94,8 @@ body: | ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1, $r3 ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRBU16_post killed renamable $r1, 8, 0, $noreg :: (load 8 from %ir.input_2_cast, align 1) - ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRBU16_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.input_1_cast, align 1) + ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRBU16_post killed renamable $r1, 8, 0, $noreg :: (load (s64) from %ir.input_2_cast, align 1) + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRBU16_post killed renamable $r0, 8, 0, $noreg :: (load (s64) from %ir.input_1_cast, align 1) ; CHECK: renamable $q1 = MVE_VADDi16 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi16 killed renamable $q1, killed renamable $q0, 0, killed $noreg, undef renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -136,8 +136,8 @@ body: | renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 MVE_VPST 1, implicit $vpr - renamable $r1, renamable $q1 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.input_2_cast, align 1) - renamable $r0, renamable $q2 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.input_1_cast, align 1) + renamable $r1, renamable $q1 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.input_2_cast, align 1) + renamable $r0, renamable $q2 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.input_1_cast, align 1) renamable $q1 = MVE_VADDi16 killed renamable $q2, killed renamable $q1, 1, renamable $vpr, undef renamable $q1 renamable $q0 = MVE_VADDi16 killed renamable $q1, killed renamable $q0, 1, killed renamable $vpr, undef renamable $q0 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir index eb0b41f5dac28..80875fca9d738 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir @@ -338,8 +338,8 @@ body: | ; CHECK: bb.2.vector.body (align 4): ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU32_post killed renamable $r0, 4, 0, $noreg :: (load 4 from %ir.lsr.iv13, align 1) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRBU32_post killed renamable $r1, 4, 0, $noreg :: (load 4 from %ir.lsr.iv1416, align 1) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU32_post killed renamable $r0, 4, 0, $noreg :: (load (s32) from %ir.lsr.iv13, align 1) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRBU32_post killed renamable $r1, 4, 0, $noreg :: (load (s32) from %ir.lsr.iv1416, align 1) ; CHECK: renamable $q1 = nuw nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -380,8 +380,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRBU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load 4 from %ir.lsr.iv13, align 1) - renamable $r1, renamable $q2 = MVE_VLDRBU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load 4 from %ir.lsr.iv1416, align 1) + renamable $r0, renamable $q1 = MVE_VLDRBU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load (s32) from %ir.lsr.iv13, align 1) + renamable $r1, renamable $q2 = MVE_VLDRBU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load (s32) from %ir.lsr.iv1416, align 1) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $q1 = nuw nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -444,8 +444,8 @@ body: | ; CHECK: bb.2.vector.body (align 4): ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU32_post killed renamable $r0, 4, 0, $noreg :: (load 4 from %ir.lsr.iv14, align 1) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRBU32_post killed renamable $r1, 4, 0, $noreg :: (load 4 from %ir.lsr.iv1517, align 1) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU32_post killed renamable $r0, 4, 0, $noreg :: (load (s32) from %ir.lsr.iv14, align 1) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRBU32_post killed renamable $r1, 4, 0, $noreg :: (load (s32) from %ir.lsr.iv1517, align 1) ; CHECK: renamable $q1 = MVE_VADDi32 renamable $q0, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q2, 0, killed $noreg, killed renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -486,8 +486,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRBU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load 4 from %ir.lsr.iv14, align 1) - renamable $r1, renamable $q2 = MVE_VLDRBU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load 4 from %ir.lsr.iv1517, align 1) + renamable $r0, renamable $q1 = MVE_VLDRBU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load (s32) from %ir.lsr.iv14, align 1) + renamable $r1, renamable $q2 = MVE_VLDRBU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load (s32) from %ir.lsr.iv1517, align 1) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $q1 = MVE_VADDi32 renamable $q0, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -551,8 +551,8 @@ body: | ; CHECK: bb.2.vector.body (align 4): ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv13, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, $noreg :: (load 8 from %ir.lsr.iv1416, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv13, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv1416, align 2) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -593,8 +593,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv13, align 2) - renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1416, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv13, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1416, align 2) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -657,8 +657,8 @@ body: | ; CHECK: bb.2.vector.body (align 4): ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load 8 from %ir.lsr.iv14, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, $noreg :: (load 8 from %ir.lsr.iv1517, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv14, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 0, $noreg :: (load (s64) from %ir.lsr.iv1517, align 2) ; CHECK: renamable $q1 = MVE_VADDi32 renamable $q0, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q2, 0, killed $noreg, killed renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -699,8 +699,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv14, align 2) - renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv1517, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv14, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv1517, align 2) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $q1 = MVE_VADDi32 renamable $q0, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -763,8 +763,8 @@ body: | ; CHECK: bb.2.vector.body (align 4): ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv12, align 4) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1315, align 4) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv12, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv1315, align 4) ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -805,8 +805,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv12, align 4) - renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1315, align 4) + renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv12, align 4) + renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1315, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 @@ -869,8 +869,8 @@ body: | ; CHECK: bb.2.vector.body (align 4): ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r0, $r1 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q1, killed renamable $q2, 0, killed $noreg, killed renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -911,8 +911,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $q1 = MVE_VADDi32 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remove-elem-moves.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remove-elem-moves.mir index d16b8679488a0..bf94eabeb7511 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remove-elem-moves.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remove-elem-moves.mir @@ -183,10 +183,10 @@ body: | ; CHECK: bb.4.vector.body: ; CHECK: successors: %bb.4(0x7c000000), %bb.5(0x04000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r7, $r12 - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_pre killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.scevgep18, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_pre killed renamable $r0, 16, 0, $noreg :: (load (s128) from %ir.scevgep18, align 4) ; CHECK: $lr = tMOVr killed $r5, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VABSf32 killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r1 = MVE_VSTRBU8_pre killed renamable $q0, killed renamable $r1, 16, 0, $noreg :: (store 16 into %ir.scevgep13, align 4) + ; CHECK: renamable $r1 = MVE_VSTRBU8_pre killed renamable $q0, killed renamable $r1, 16, 0, $noreg :: (store (s128) into %ir.scevgep13, align 4) ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: $r5 = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: tBcc %bb.4, 1 /* CC::ne */, killed $cpsr @@ -213,10 +213,10 @@ body: | ; CHECK: bb.8.while.body: ; CHECK: successors: %bb.8(0x7c000000), %bb.9(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $s0 = VLDRS renamable $r1, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3) + ; CHECK: renamable $s0 = VLDRS renamable $r1, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VABSS killed renamable $s0, 14 /* CC::al */, $noreg - ; CHECK: VSTRS killed renamable $s0, renamable $r0, 1, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep7) + ; CHECK: VSTRS killed renamable $s0, renamable $r0, 1, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep7) ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.8 ; CHECK: bb.9.while.end: @@ -272,10 +272,10 @@ body: | successors: %bb.4(0x7c000000), %bb.5(0x04000000) liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r7, $r12 - renamable $r0, renamable $q0 = MVE_VLDRWU32_pre killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.scevgep18, align 4) + renamable $r0, renamable $q0 = MVE_VLDRWU32_pre killed renamable $r0, 16, 0, $noreg :: (load (s128) from %ir.scevgep18, align 4) $lr = tMOVr killed $r5, 14, $noreg renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VABSf32 killed renamable $q0, 0, $noreg, undef renamable $q0 - renamable $r1 = MVE_VSTRBU8_pre killed renamable $q0, killed renamable $r1, 16, 0, $noreg :: (store 16 into %ir.scevgep13, align 4) + renamable $r1 = MVE_VSTRBU8_pre killed renamable $q0, killed renamable $r1, 16, 0, $noreg :: (store (s128) into %ir.scevgep13, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 $r5 = tMOVr $lr, 14, $noreg t2LoopEnd killed renamable $lr, %bb.4, implicit-def dead $cpsr @@ -311,10 +311,10 @@ body: | successors: %bb.8(0x7c000000), %bb.9(0x04000000) liveins: $lr, $r0, $r1 - renamable $s0 = VLDRS renamable $r1, 1, 14, $noreg :: (load 4 from %ir.scevgep3) + renamable $s0 = VLDRS renamable $r1, 1, 14, $noreg :: (load (s32) from %ir.scevgep3) renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg renamable $s0 = nnan ninf nsz arcp contract afn reassoc VABSS killed renamable $s0, 14, $noreg - VSTRS killed renamable $s0, renamable $r0, 1, 14, $noreg :: (store 4 into %ir.scevgep7) + VSTRS killed renamable $s0, renamable $r0, 1, 14, $noreg :: (store (s32) into %ir.scevgep7) renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.8, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir index d1a4f421474d9..616c16bda2fd4 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir @@ -250,12 +250,12 @@ body: | successors: %bb.5(0x07e00000), %bb.8(0x78200000) liveins: $lr, $r0, $r1, $r3, $r5, $r6, $r7, $r8, $r9, $r10, $r12 - renamable $r4 = tLDRr renamable $r3, $r6, 14, $noreg :: (load 4 from %ir.uglygep12) - renamable $r2 = tLDRr renamable $r5, $r6, 14, $noreg :: (load 4 from %ir.uglygep34) + renamable $r4 = tLDRr renamable $r3, $r6, 14, $noreg :: (load (s32) from %ir.uglygep12) + renamable $r2 = tLDRr renamable $r5, $r6, 14, $noreg :: (load (s32) from %ir.uglygep34) tCMPr renamable $r2, renamable $r4, 14, $noreg, implicit-def $cpsr t2IT 12, 1, implicit-def $itstate - tSTRr killed renamable $r4, renamable $r5, $r6, 12, $cpsr, implicit $itstate :: (store 4 into %ir.5) - tSTRr killed renamable $r2, renamable $r3, $r6, 12, $cpsr, implicit $itstate :: (store 4 into %ir.uglygep6) + tSTRr killed renamable $r4, renamable $r5, $r6, 12, $cpsr, implicit $itstate :: (store (s32) into %ir.5) + tSTRr killed renamable $r2, renamable $r3, $r6, 12, $cpsr, implicit $itstate :: (store (s32) into %ir.uglygep6) renamable $r6 = tADDhirr killed renamable $r6, renamable $r10, 12, $cpsr, implicit $r6, implicit $itstate renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r9, 12, $cpsr, $noreg, implicit $r7, implicit killed $itstate t2IT 12, 8, implicit-def $itstate diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir index 5c8639eaa76e0..d8a1dcc3bba49 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir @@ -133,8 +133,8 @@ body: | successors: %bb.3(0x7c000000), %bb.4(0x04000000) liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep4) - early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7) + renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load (s16) from %ir.scevgep4) + early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store (s16) into %ir.scevgep7) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr tB %bb.4, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir index d4fa91bf3718b..a6458ce1a530f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir @@ -115,8 +115,8 @@ body: | ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 ; CHECK: dead renamable $r2 = SPACE 4096, undef renamable $r0 - ; CHECK: renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep) - ; CHECK: early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep1) + ; CHECK: renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep) + ; CHECK: early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep1) ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr ; CHECK: t2Bcc %bb.2, 1 /* CC::ne */, killed $cpsr ; CHECK: tB %bb.3, 14 /* CC::al */, $noreg @@ -146,8 +146,8 @@ body: | liveins: $lr, $r0, $r1 dead renamable $r2 = SPACE 4096, undef renamable $r0 - renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14, $noreg :: (load 4 from %ir.scevgep) - early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep1) + renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14, $noreg :: (load (s32) from %ir.scevgep) + early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed renamable $r1, 4, 14, $noreg :: (store (s32) into %ir.scevgep1) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir index 2b0ca26454866..dff79b15ef8dd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir @@ -67,7 +67,7 @@ body: | ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x50000000), %bb.4(0x30000000) ; CHECK: [[t2MOVi32imm:%[0-9]+]]:rgpr = t2MOVi32imm @d - ; CHECK: [[t2LDRi12_:%[0-9]+]]:gprnopc = t2LDRi12 [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: [[t2LDRi12_:%[0-9]+]]:gprnopc = t2LDRi12 [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) ; CHECK: t2CMPri [[t2LDRi12_]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2Bcc %bb.4, 4 /* CC::mi */, $cpsr ; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg @@ -94,7 +94,7 @@ body: | ; CHECK: bb.3.for.cond.for.end9_crit_edge: ; CHECK: successors: %bb.4(0x80000000) ; CHECK: [[t2MOVi1:%[0-9]+]]:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 killed [[t2MOVi1]], [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (store 4 into @d) + ; CHECK: t2STRi12 killed [[t2MOVi1]], [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (store (s32) into @d) ; CHECK: bb.4.for.end9: ; CHECK: [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF ; CHECK: $r0 = COPY [[DEF]] @@ -103,7 +103,7 @@ body: | successors: %bb.1(0x50000000), %bb.4(0x30000000) %4:rgpr = t2MOVi32imm @d - %0:gprnopc = t2LDRi12 %4, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d) + %0:gprnopc = t2LDRi12 %4, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @d) t2CMPri %0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr t2Bcc %bb.4, 4 /* CC::mi */, $cpsr t2B %bb.1, 14 /* CC::al */, $noreg @@ -135,7 +135,7 @@ body: | successors: %bb.4(0x80000000) %12:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg - t2STRi12 killed %12, %4, 0, 14 /* CC::al */, $noreg :: (store 4 into @d) + t2STRi12 killed %12, %4, 0, 14 /* CC::al */, $noreg :: (store (s32) into @d) bb.4.for.end9: %13:gpr = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir index e596da82915bc..065e0e20ce377 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir @@ -106,8 +106,8 @@ body: | ; CHECK: bb.2.while.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep6) - ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep2) + ; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep6) + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep2) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.while.end: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -133,8 +133,8 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) - early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store (s32) into %ir.scevgep2) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir index 7cc240353e87a..acd867b18aedf 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/safe-retaining.mir @@ -128,7 +128,7 @@ body: | ; CHECK: bb.1.loop.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: dead $lr = MVE_DLSTP_32 killed renamable $r3 ; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg ; CHECK: bb.2.loop.body: @@ -136,10 +136,10 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r12 ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.addr.b, align 4) - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.addr.a, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.addr.b, align 4) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load (s128) from %ir.addr.a, align 4) ; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg - ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store 16 into %ir.addr.c, align 4) + ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store (s128) into %ir.addr.c, align 4) ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -159,7 +159,7 @@ body: | successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $r3, $r4, $lr - renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r4 $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg @@ -172,12 +172,12 @@ body: | renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) - renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) + renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -228,7 +228,7 @@ body: | ; CHECK: bb.1.loop.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: dead $lr = MVE_DLSTP_16 killed renamable $r3 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg ; CHECK: bb.2.loop.body: @@ -236,11 +236,11 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r4 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.addr.b, align 2) - ; CHECK: renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 0, $noreg :: (load 16 from %ir.addr.a, align 2) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.addr.b, align 2) + ; CHECK: renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 0, $noreg :: (load (s128) from %ir.addr.a, align 2) ; CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg - ; CHECK: renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store 16 into %ir.addr.c, align 2) + ; CHECK: renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 0, killed $noreg :: (store (s128) into %ir.addr.c, align 2) ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -260,7 +260,7 @@ body: | successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $r3, $r4, $lr - renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r12 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg @@ -273,13 +273,13 @@ body: | renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 2) - renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 2) + renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 2) + renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 2) renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $r1, 14 /* CC::al */, $noreg renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 2) + renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 2) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir index 1d3ce980b9f52..b069ed9927e68 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir @@ -129,10 +129,10 @@ body: | ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 ; CHECK: dead renamable $r3 = SPACE 4070, undef renamable $r0 - ; CHECK: renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep3) - ; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) + ; CHECK: renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) ; CHECK: renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep11) + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep11) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -163,10 +163,10 @@ body: | liveins: $lr, $r0, $r1, $r2 dead renamable $r3 = SPACE 4070, undef renamable $r0 - renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep3) - renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.scevgep3) + renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load (s32) from %ir.scevgep7) renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg - early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep11) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store (s32) into %ir.scevgep11) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir index ad4e483c5f89b..b448afb9c77e9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir @@ -183,7 +183,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 ; CHECK: dead $r7 = frame-setup tADDrSPi $sp, 2, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa $r7, 8 - ; CHECK: renamable $r12 = t2LDRi12 renamable $r0, 0, 14 /* CC::al */, $noreg, debug-location !24 :: (load 4 from %ir.a) + ; CHECK: renamable $r12 = t2LDRi12 renamable $r0, 0, 14 /* CC::al */, $noreg, debug-location !24 :: (load (s32) from %ir.a) ; CHECK: DBG_VALUE 0, $noreg, !21, !DIExpression(), debug-location !25 ; CHECK: DBG_VALUE $r12, $noreg, !20, !DIExpression(), debug-location !23 ; CHECK: tCBZ $r2, %bb.4, debug-location !28 @@ -207,7 +207,7 @@ body: | ; CHECK: DBG_VALUE $vpr, $noreg, !17, !DIExpression(), debug-location !30 ; CHECK: $q1 = MVE_VORR killed $q0, killed $q0, 0, $noreg, undef $q1 ; CHECK: MVE_VPST 8, implicit $vpr, debug-location !30 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 1, killed renamable $vpr, debug-location !30 :: (load 8 from %ir.lsr.iv14, align 2) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 1, killed renamable $vpr, debug-location !30 :: (load (s64) from %ir.lsr.iv14, align 2) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg, debug-location !30 ; CHECK: renamable $q0 = MVE_VMOVLs16bh killed renamable $q0, 0, $noreg, undef renamable $q0, debug-location !30 ; CHECK: renamable $q0 = MVE_VSUBi32 renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0, debug-location !32 @@ -221,7 +221,7 @@ body: | ; CHECK: bb.4.for.cond.cleanup: ; CHECK: liveins: $r0, $r12 ; CHECK: DBG_VALUE $r12, $noreg, !20, !DIExpression(), debug-location !23 - ; CHECK: t2STRi12 killed renamable $r12, killed renamable $r0, 0, 14 /* CC::al */, $noreg, debug-location !33 :: (store 4 into %ir.a) + ; CHECK: t2STRi12 killed renamable $r12, killed renamable $r0, 0, 14 /* CC::al */, $noreg, debug-location !33 :: (store (s32) into %ir.a) ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r6, def $r7, def $pc, debug-location !34 bb.0.entry: successors: %bb.4(0x30000000), %bb.1(0x50000000) @@ -241,7 +241,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r4, -16 $r7 = frame-setup tADDrSPi $sp, 2, 14, $noreg frame-setup CFI_INSTRUCTION def_cfa $r7, 8 - renamable $r12 = t2LDRi12 renamable $r0, 0, 14, $noreg, debug-location !24 :: (load 4 from %ir.a) + renamable $r12 = t2LDRi12 renamable $r0, 0, 14, $noreg, debug-location !24 :: (load (s32) from %ir.a) DBG_VALUE 0, $noreg, !21, !DIExpression(), debug-location !29 DBG_VALUE $r12, $noreg, !20, !DIExpression(), debug-location !23 tCBZ $r2, %bb.4, debug-location !32 @@ -270,7 +270,7 @@ body: | DBG_VALUE $vpr, $noreg, !17, !DIExpression(), debug-location !34 $q1 = MVE_VORR killed $q0, $q0, 0, $noreg, undef $q1 MVE_VPST 8, implicit $vpr, debug-location !34 - renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 1, killed renamable $vpr, debug-location !34 :: (load 8 from %ir.lsr.iv14, align 2) + renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 1, killed renamable $vpr, debug-location !34 :: (load (s64) from %ir.lsr.iv14, align 2) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg, debug-location !34 renamable $q0 = MVE_VMOVLs16bh killed renamable $q0, 0, $noreg, undef renamable $q0, debug-location !34 renamable $lr = t2LoopDec killed renamable $lr, 1, debug-location !33 @@ -290,7 +290,7 @@ body: | liveins: $r0, $r12 DBG_VALUE $r12, $noreg, !20, !DIExpression(), debug-location !23 - t2STRi12 killed renamable $r12, killed renamable $r0, 0, 14, $noreg, debug-location !42 :: (store 4 into %ir.a) + t2STRi12 killed renamable $r12, killed renamable $r0, 0, 14, $noreg, debug-location !42 :: (store (s32) into %ir.a) tPOP_RET 14, $noreg, def $r4, def $r6, def $r7, def $pc, debug-location !43 ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir index 8637ab3f18565..a395a28992a1e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir @@ -214,7 +214,7 @@ body: | ; CHECK: liveins: $lr, $q0, $r0, $r2 ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, debug-location !32 :: (load 16 from %ir.lsr.iv12, align 4, !tbaa !34) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, debug-location !32 :: (load (s128) from %ir.lsr.iv12, align 4, !tbaa !34) ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 ; CHECK: MVE_VPTv4f32 8, renamable $q1, renamable $q0, 12, implicit-def $vpr, debug-location !40 ; CHECK: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0, debug-location !40 @@ -236,12 +236,12 @@ body: | ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 - ; CHECK: renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + ; CHECK: renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) ; CHECK: bb.5.while.end: ; CHECK: liveins: $r2, $s0 ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 - ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store 4 into %ir.pResult, !tbaa !34) + ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store (s32) into %ir.pResult, !tbaa !34) ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46 ; CHECK: bb.6 (align 4): ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 4 @@ -286,7 +286,7 @@ body: | DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg MVE_VPST 2, implicit $vpr, debug-location !32 - renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, debug-location !32 :: (load 16 from %ir.lsr.iv12, align 4, !tbaa !34) + renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, debug-location !32 :: (load (s128) from %ir.lsr.iv12, align 4, !tbaa !34) DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29 renamable $vpr = MVE_VCMPf32 renamable $q1, renamable $q0, 12, 1, killed renamable $vpr, debug-location !40 renamable $q0 = MVE_VORR killed renamable $q1, renamable $q1, 1, killed renamable $vpr, killed renamable $q0, debug-location !40 @@ -314,14 +314,14 @@ body: | DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29 - renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) bb.5.while.end: liveins: $r2, $s0 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29 - VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store 4 into %ir.pResult, !tbaa !34) + VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store (s32) into %ir.pResult, !tbaa !34) frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46 bb.6 (align 4): diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir index f30ebf459bd60..54baaa12fbdf1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir @@ -157,7 +157,7 @@ body: | successors: %bb.2(0x26666665), %bb.5(0x5999999b) liveins: $lr, $r0, $r1, $r2, $r12 - renamable $r3 = tLDRBi renamable $r0, 0, 14, $noreg :: (load 1 from %ir.lsr.iv1) + renamable $r3 = tLDRBi renamable $r0, 0, 14, $noreg :: (load (s8) from %ir.lsr.iv1) renamable $r4 = t2SUBri renamable $r3, 108, 14, $noreg, $noreg tCMPi8 renamable $r4, 4, 14, $noreg, implicit-def $cpsr tBcc %bb.2, 8, killed $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir index cd2d311a5eacc..5fd899ebfd1fc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir @@ -100,10 +100,10 @@ body: | ; CHECK: $r3 = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 2) ; CHECK: renamable $r3 = MVE_VMAXVs16 killed renamable $r3, killed renamable $q0, 0, $noreg ; CHECK: $lr = tMOVr $r5, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r1 = t2STRH_POST killed renamable $r3, killed renamable $r1, 2, 14 /* CC::al */, $noreg :: (store 2 into %ir.lsr.iv.2) + ; CHECK: early-clobber renamable $r1 = t2STRH_POST killed renamable $r3, killed renamable $r1, 2, 14 /* CC::al */, $noreg :: (store (s16) into %ir.lsr.iv.2) ; CHECK: renamable $r5, dead $cpsr = nsw tSUBi8 killed $r5, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 @@ -142,10 +142,10 @@ body: | $r3 = tMOVr $r12, 14 /* CC::al */, $noreg renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 2) renamable $r3 = MVE_VMAXVs16 killed renamable $r3, killed renamable $q0, 0, $noreg $lr = tMOVr $r5, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STRH_POST killed renamable $r3, killed renamable $r1, 2, 14 /* CC::al */, $noreg :: (store 2 into %ir.lsr.iv.2) + early-clobber renamable $r1 = t2STRH_POST killed renamable $r3, killed renamable $r1, 2, 14 /* CC::al */, $noreg :: (store (s16) into %ir.lsr.iv.2) renamable $r5, dead $cpsr = nsw tSUBi8 killed $r5, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir index 6f60f35c6cac0..d4e1913c0af1a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir @@ -288,10 +288,10 @@ body: | ; CHECK: bb.5.vector.body: ; CHECK: successors: %bb.5(0x7c000000), %bb.11(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv46, align 1) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv4749, align 1) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv46, align 1) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv4749, align 1) ; CHECK: renamable $q0 = MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv5052, align 1) + ; CHECK: renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.iv5052, align 1) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.5 ; CHECK: tB %bb.11, 14 /* CC::al */, $noreg ; CHECK: bb.6.for.body.preheader.new: @@ -304,26 +304,26 @@ body: | ; CHECK: bb.7.for.body: ; CHECK: successors: %bb.7(0x7c000000), %bb.8(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 - ; CHECK: renamable $r4 = tLDRBr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 1 from %ir.scevgep2453) + ; CHECK: renamable $r4 = tLDRBr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load (s8) from %ir.scevgep2453) ; CHECK: renamable $r9 = t2ADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r5 = tLDRBr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 1 from %ir.scevgep2854) + ; CHECK: renamable $r5 = tLDRBr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load (s8) from %ir.scevgep2854) ; CHECK: renamable $r6, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = tADDhirr killed renamable $r4, killed renamable $r5, 14 /* CC::al */, $noreg - ; CHECK: tSTRBr killed renamable $r4, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store 1 into %ir.scevgep3255) - ; CHECK: renamable $r8 = t2LDRBi12 renamable $r9, 1, 14 /* CC::al */, $noreg :: (load 1 from %ir.scevgep40) - ; CHECK: renamable $r5 = tLDRBi renamable $r6, 1, 14 /* CC::al */, $noreg :: (load 1 from %ir.scevgep42) + ; CHECK: tSTRBr killed renamable $r4, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store (s8) into %ir.scevgep3255) + ; CHECK: renamable $r8 = t2LDRBi12 renamable $r9, 1, 14 /* CC::al */, $noreg :: (load (s8) from %ir.scevgep40) + ; CHECK: renamable $r5 = tLDRBi renamable $r6, 1, 14 /* CC::al */, $noreg :: (load (s8) from %ir.scevgep42) ; CHECK: renamable $r8 = tADDhirr killed renamable $r8, killed renamable $r5, 14 /* CC::al */, $noreg ; CHECK: renamable $r5, dead $cpsr = tADDrr renamable $r0, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: t2STRBi12 killed renamable $r8, renamable $r5, 1, 14 /* CC::al */, $noreg :: (store 1 into %ir.scevgep44) - ; CHECK: renamable $r8 = t2LDRBi12 renamable $r9, 2, 14 /* CC::al */, $noreg :: (load 1 from %ir.scevgep34) - ; CHECK: renamable $r4 = tLDRBi renamable $r6, 2, 14 /* CC::al */, $noreg :: (load 1 from %ir.scevgep36) + ; CHECK: t2STRBi12 killed renamable $r8, renamable $r5, 1, 14 /* CC::al */, $noreg :: (store (s8) into %ir.scevgep44) + ; CHECK: renamable $r8 = t2LDRBi12 renamable $r9, 2, 14 /* CC::al */, $noreg :: (load (s8) from %ir.scevgep34) + ; CHECK: renamable $r4 = tLDRBi renamable $r6, 2, 14 /* CC::al */, $noreg :: (load (s8) from %ir.scevgep36) ; CHECK: renamable $r4 = tADDhirr killed renamable $r4, killed renamable $r8, 14 /* CC::al */, $noreg - ; CHECK: tSTRBi killed renamable $r4, renamable $r5, 2, 14 /* CC::al */, $noreg :: (store 1 into %ir.scevgep38) - ; CHECK: renamable $r4 = t2LDRBi12 killed renamable $r9, 3, 14 /* CC::al */, $noreg :: (load 1 from %ir.scevgep22) - ; CHECK: renamable $r6 = tLDRBi killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (load 1 from %ir.scevgep26) + ; CHECK: tSTRBi killed renamable $r4, renamable $r5, 2, 14 /* CC::al */, $noreg :: (store (s8) into %ir.scevgep38) + ; CHECK: renamable $r4 = t2LDRBi12 killed renamable $r9, 3, 14 /* CC::al */, $noreg :: (load (s8) from %ir.scevgep22) + ; CHECK: renamable $r6 = tLDRBi killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (load (s8) from %ir.scevgep26) ; CHECK: renamable $r4 = tADDhirr killed renamable $r4, killed renamable $r6, 14 /* CC::al */, $noreg - ; CHECK: tSTRBi killed renamable $r4, killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (store 1 into %ir.scevgep30) + ; CHECK: tSTRBi killed renamable $r4, killed renamable $r5, 3, 14 /* CC::al */, $noreg :: (store (s8) into %ir.scevgep30) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.7 ; CHECK: bb.8.for.cond.cleanup.loopexit.unr-lcssa: ; CHECK: successors: %bb.11(0x30000000), %bb.9(0x50000000) @@ -333,21 +333,21 @@ body: | ; CHECK: bb.9.for.body.epil: ; CHECK: successors: %bb.11(0x40000000), %bb.10(0x40000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r12 - ; CHECK: renamable $r6 = tLDRBr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 1 from %ir.arrayidx.epil) + ; CHECK: renamable $r6 = tLDRBr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load (s8) from %ir.arrayidx.epil) ; CHECK: t2CMPri renamable $r12, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r5 = tLDRBr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 1 from %ir.arrayidx1.epil) + ; CHECK: renamable $r5 = tLDRBr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load (s8) from %ir.arrayidx1.epil) ; CHECK: renamable $r6 = tADDhirr killed renamable $r6, killed renamable $r5, 14 /* CC::al */, $noreg - ; CHECK: tSTRBr killed renamable $r6, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store 1 into %ir.arrayidx4.epil) + ; CHECK: tSTRBr killed renamable $r6, renamable $r0, $r3, 14 /* CC::al */, $noreg :: (store (s8) into %ir.arrayidx4.epil) ; CHECK: tBcc %bb.11, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.10.for.body.epil.1: ; CHECK: successors: %bb.11(0x40000000), %bb.12(0x40000000) ; CHECK: liveins: $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $r6, dead $cpsr = nuw tADDi3 renamable $r3, 1, 14 /* CC::al */, $noreg ; CHECK: t2CMPri killed renamable $r12, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr - ; CHECK: renamable $r5 = tLDRBr renamable $r1, $r6, 14 /* CC::al */, $noreg :: (load 1 from %ir.arrayidx.epil.1) - ; CHECK: renamable $r4 = tLDRBr renamable $r2, $r6, 14 /* CC::al */, $noreg :: (load 1 from %ir.arrayidx1.epil.1) + ; CHECK: renamable $r5 = tLDRBr renamable $r1, $r6, 14 /* CC::al */, $noreg :: (load (s8) from %ir.arrayidx.epil.1) + ; CHECK: renamable $r4 = tLDRBr renamable $r2, $r6, 14 /* CC::al */, $noreg :: (load (s8) from %ir.arrayidx1.epil.1) ; CHECK: renamable $r5 = tADDhirr killed renamable $r5, killed renamable $r4, 14 /* CC::al */, $noreg - ; CHECK: tSTRBr killed renamable $r5, renamable $r0, killed $r6, 14 /* CC::al */, $noreg :: (store 1 into %ir.arrayidx4.epil.1) + ; CHECK: tSTRBr killed renamable $r5, renamable $r0, killed $r6, 14 /* CC::al */, $noreg :: (store (s8) into %ir.arrayidx4.epil.1) ; CHECK: tBcc %bb.12, 1 /* CC::ne */, killed $cpsr ; CHECK: bb.11.for.cond.cleanup: ; CHECK: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r8, def $r9, def $r11 @@ -355,10 +355,10 @@ body: | ; CHECK: bb.12.for.body.epil.2: ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $r1 = tLDRBr killed renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 1 from %ir.arrayidx.epil.2) - ; CHECK: renamable $r2 = tLDRBr killed renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 1 from %ir.arrayidx1.epil.2) + ; CHECK: renamable $r1 = tLDRBr killed renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load (s8) from %ir.arrayidx.epil.2) + ; CHECK: renamable $r2 = tLDRBr killed renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load (s8) from %ir.arrayidx1.epil.2) ; CHECK: renamable $r1 = tADDhirr killed renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg - ; CHECK: tSTRBr killed renamable $r1, killed renamable $r0, killed $r3, 14 /* CC::al */, $noreg :: (store 1 into %ir.arrayidx4.epil.2) + ; CHECK: tSTRBr killed renamable $r1, killed renamable $r0, killed $r3, 14 /* CC::al */, $noreg :: (store (s8) into %ir.arrayidx4.epil.2) ; CHECK: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r8, def $r9, def $r11 ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $pc bb.0.entry: @@ -437,12 +437,12 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv46, align 1) - renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv4749, align 1) + renamable $r1, renamable $q0 = MVE_VLDRBU8_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv46, align 1) + renamable $r2, renamable $q1 = MVE_VLDRBU8_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv4749, align 1) renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VADDi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv5052, align 1) + renamable $r0 = MVE_VSTRBU8_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv5052, align 1) t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr tB %bb.11, 14, $noreg @@ -460,27 +460,27 @@ body: | successors: %bb.7(0x7c000000), %bb.8(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3, $r12 - renamable $r4 = tLDRBr renamable $r1, $r3, 14, $noreg :: (load 1 from %ir.scevgep2453) + renamable $r4 = tLDRBr renamable $r1, $r3, 14, $noreg :: (load (s8) from %ir.scevgep2453) renamable $r9 = t2ADDrr renamable $r1, renamable $r3, 14, $noreg, $noreg - renamable $r5 = tLDRBr renamable $r2, $r3, 14, $noreg :: (load 1 from %ir.scevgep2854) + renamable $r5 = tLDRBr renamable $r2, $r3, 14, $noreg :: (load (s8) from %ir.scevgep2854) renamable $r6, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $r4 = tADDhirr killed renamable $r4, killed renamable $r5, 14, $noreg - tSTRBr killed renamable $r4, renamable $r0, $r3, 14, $noreg :: (store 1 into %ir.scevgep3255) - renamable $r8 = t2LDRBi12 renamable $r9, 1, 14, $noreg :: (load 1 from %ir.scevgep40) - renamable $r5 = tLDRBi renamable $r6, 1, 14, $noreg :: (load 1 from %ir.scevgep42) + tSTRBr killed renamable $r4, renamable $r0, $r3, 14, $noreg :: (store (s8) into %ir.scevgep3255) + renamable $r8 = t2LDRBi12 renamable $r9, 1, 14, $noreg :: (load (s8) from %ir.scevgep40) + renamable $r5 = tLDRBi renamable $r6, 1, 14, $noreg :: (load (s8) from %ir.scevgep42) renamable $r8 = tADDhirr killed renamable $r8, killed renamable $r5, 14, $noreg renamable $r5, dead $cpsr = tADDrr renamable $r0, renamable $r3, 14, $noreg renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 4, 14, $noreg - t2STRBi12 killed renamable $r8, renamable $r5, 1, 14, $noreg :: (store 1 into %ir.scevgep44) - renamable $r8 = t2LDRBi12 renamable $r9, 2, 14, $noreg :: (load 1 from %ir.scevgep34) - renamable $r4 = tLDRBi renamable $r6, 2, 14, $noreg :: (load 1 from %ir.scevgep36) + t2STRBi12 killed renamable $r8, renamable $r5, 1, 14, $noreg :: (store (s8) into %ir.scevgep44) + renamable $r8 = t2LDRBi12 renamable $r9, 2, 14, $noreg :: (load (s8) from %ir.scevgep34) + renamable $r4 = tLDRBi renamable $r6, 2, 14, $noreg :: (load (s8) from %ir.scevgep36) renamable $r4 = tADDhirr killed renamable $r4, killed renamable $r8, 14, $noreg - tSTRBi killed renamable $r4, renamable $r5, 2, 14, $noreg :: (store 1 into %ir.scevgep38) - renamable $r4 = t2LDRBi12 killed renamable $r9, 3, 14, $noreg :: (load 1 from %ir.scevgep22) - renamable $r6 = tLDRBi killed renamable $r6, 3, 14, $noreg :: (load 1 from %ir.scevgep26) + tSTRBi killed renamable $r4, renamable $r5, 2, 14, $noreg :: (store (s8) into %ir.scevgep38) + renamable $r4 = t2LDRBi12 killed renamable $r9, 3, 14, $noreg :: (load (s8) from %ir.scevgep22) + renamable $r6 = tLDRBi killed renamable $r6, 3, 14, $noreg :: (load (s8) from %ir.scevgep26) renamable $r4 = tADDhirr killed renamable $r4, killed renamable $r6, 14, $noreg - tSTRBi killed renamable $r4, killed renamable $r5, 3, 14, $noreg :: (store 1 into %ir.scevgep30) + tSTRBi killed renamable $r4, killed renamable $r5, 3, 14, $noreg :: (store (s8) into %ir.scevgep30) t2LoopEnd renamable $lr, %bb.7, implicit-def dead $cpsr tB %bb.8, 14, $noreg @@ -495,11 +495,11 @@ body: | successors: %bb.11(0x40000000), %bb.10(0x40000000) liveins: $r0, $r1, $r2, $r3, $r12 - renamable $r6 = tLDRBr renamable $r1, $r3, 14, $noreg :: (load 1 from %ir.arrayidx.epil) + renamable $r6 = tLDRBr renamable $r1, $r3, 14, $noreg :: (load (s8) from %ir.arrayidx.epil) t2CMPri renamable $r12, 1, 14, $noreg, implicit-def $cpsr - renamable $r5 = tLDRBr renamable $r2, $r3, 14, $noreg :: (load 1 from %ir.arrayidx1.epil) + renamable $r5 = tLDRBr renamable $r2, $r3, 14, $noreg :: (load (s8) from %ir.arrayidx1.epil) renamable $r6 = tADDhirr killed renamable $r6, killed renamable $r5, 14, $noreg - tSTRBr killed renamable $r6, renamable $r0, $r3, 14, $noreg :: (store 1 into %ir.arrayidx4.epil) + tSTRBr killed renamable $r6, renamable $r0, $r3, 14, $noreg :: (store (s8) into %ir.arrayidx4.epil) tBcc %bb.11, 0, killed $cpsr bb.10.for.body.epil.1: @@ -508,10 +508,10 @@ body: | renamable $r6, dead $cpsr = nuw tADDi3 renamable $r3, 1, 14, $noreg t2CMPri killed renamable $r12, 2, 14, $noreg, implicit-def $cpsr - renamable $r5 = tLDRBr renamable $r1, $r6, 14, $noreg :: (load 1 from %ir.arrayidx.epil.1) - renamable $r4 = tLDRBr renamable $r2, $r6, 14, $noreg :: (load 1 from %ir.arrayidx1.epil.1) + renamable $r5 = tLDRBr renamable $r1, $r6, 14, $noreg :: (load (s8) from %ir.arrayidx.epil.1) + renamable $r4 = tLDRBr renamable $r2, $r6, 14, $noreg :: (load (s8) from %ir.arrayidx1.epil.1) renamable $r5 = tADDhirr killed renamable $r5, killed renamable $r4, 14, $noreg - tSTRBr killed renamable $r5, renamable $r0, killed $r6, 14, $noreg :: (store 1 into %ir.arrayidx4.epil.1) + tSTRBr killed renamable $r5, renamable $r0, killed $r6, 14, $noreg :: (store (s8) into %ir.arrayidx4.epil.1) tBcc %bb.12, 1, killed $cpsr bb.11.for.cond.cleanup: @@ -522,10 +522,10 @@ body: | liveins: $r0, $r1, $r2, $r3 renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 2, 14, $noreg - renamable $r1 = tLDRBr killed renamable $r1, $r3, 14, $noreg :: (load 1 from %ir.arrayidx.epil.2) - renamable $r2 = tLDRBr killed renamable $r2, $r3, 14, $noreg :: (load 1 from %ir.arrayidx1.epil.2) + renamable $r1 = tLDRBr killed renamable $r1, $r3, 14, $noreg :: (load (s8) from %ir.arrayidx.epil.2) + renamable $r2 = tLDRBr killed renamable $r2, $r3, 14, $noreg :: (load (s8) from %ir.arrayidx1.epil.2) renamable $r1 = tADDhirr killed renamable $r1, killed renamable $r2, 14, $noreg - tSTRBr killed renamable $r1, killed renamable $r0, killed $r3, 14, $noreg :: (store 1 into %ir.arrayidx4.epil.2) + tSTRBr killed renamable $r1, killed renamable $r0, killed $r3, 14, $noreg :: (store (s8) into %ir.arrayidx4.epil.2) $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r8, def $r9, def $r11 tPOP_RET 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $pc diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir index 6cc34fee6ff30..863d1f9e03242 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir @@ -107,12 +107,12 @@ body: | ; CHECK: bb.1.while.body: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 - ; CHECK: renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) ; CHECK: tCMPhir renamable $lr, renamable $r2, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: t2IT 2, 8, implicit-def $itstate ; CHECK: renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2 /* CC::hs */, killed $cpsr, implicit renamable $r3, implicit killed $itstate - ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep4) + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep4) ; CHECK: t2CMPri renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.1, 4 /* CC::mi */, killed $cpsr ; CHECK: tB %bb.2, 14 /* CC::al */, $noreg @@ -137,12 +137,12 @@ body: | successors: %bb.1(0x7c000000), %bb.2(0x04000000) liveins: $lr, $r0, $r1, $r2 - renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.scevgep7) tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr renamable $lr = t2LoopDec killed renamable $lr, 1 t2IT 2, 8, implicit-def $itstate renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate - early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store (s32) into %ir.scevgep4) t2CMPri renamable $lr, 0, 14, $noreg, implicit-def $cpsr tBcc %bb.1, 4, killed $cpsr tB %bb.2, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir index 3356fa84a348f..b873c52a75062 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir @@ -107,12 +107,12 @@ body: | ; CHECK: bb.1.while.body: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 - ; CHECK: renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) ; CHECK: tCMPhir renamable $lr, renamable $r2, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: t2IT 2, 8, implicit-def $itstate ; CHECK: renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2 /* CC::hs */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate - ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep4) + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep4) ; CHECK: renamable $lr = tMOVr killed $lr, 14 /* CC::al */, $noreg ; CHECK: t2CMPri renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr @@ -138,12 +138,12 @@ body: | successors: %bb.1(0x7c000000), %bb.2(0x04000000) liveins: $lr, $r0, $r1, $r2 - renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.scevgep7) tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr renamable $lr = t2LoopDec killed renamable $lr, 1 t2IT 2, 8, implicit-def $itstate renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate - early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store (s32) into %ir.scevgep4) renamable $lr = tMOVr $lr, 14, $noreg t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr tB %bb.2, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir index 34b7cf1e72230..7e60d32dc8b8c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-retaining.mir @@ -126,7 +126,7 @@ body: | ; CHECK: bb.1.loop.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r4 ; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg ; CHECK: bb.2.loop.body: @@ -135,14 +135,14 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) - ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) + ; CHECK: renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) ; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VMVN killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $q0 = MVE_VQSHRNbhs32 killed renamable $q0, killed renamable $q1, 15, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -162,7 +162,7 @@ body: | successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $r3, $r4, $lr - renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r4 $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg @@ -173,15 +173,15 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg $lr = tMOVr $r12, 14 /* CC::al */, $noreg MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) - renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) + renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) + renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg renamable $q1 = MVE_VMVN killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q0 = MVE_VQSHRNbhs32 killed renamable $q0, killed renamable $q1, 15, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -232,7 +232,7 @@ body: | ; CHECK: bb.1.loop.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r4 ; CHECK: $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg ; CHECK: bb.2.loop.body: @@ -240,15 +240,15 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VORN renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + ; CHECK: renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -268,7 +268,7 @@ body: | successors: %bb.2(0x80000000) liveins: $r0, $r1, $r2, $r3, $r4, $lr - renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r4 = tLDRspi $sp, 2, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r4 $r12 = tMOVr killed $r4, 14 /* CC::al */, $noreg @@ -278,8 +278,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.addr.b, align 4) - renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.addr.a, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.b, align 4) + renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load (s128) from %ir.addr.a, align 4) $lr = tMOVr $r12, 14 /* CC::al */, $noreg renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg @@ -287,7 +287,7 @@ body: | renamable $lr = t2LoopDec killed renamable $lr, 1 renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store 16 into %ir.addr.c, align 4) + renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr :: (store (s128) into %ir.addr.c, align 4) t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir index 87cc5c5704d7f..3a85ab5af293b 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir @@ -104,8 +104,8 @@ body: | ; CHECK: bb.2.while.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep6) - ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep2) + ; CHECK: renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep6) + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.scevgep2) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.while.end: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -131,8 +131,8 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep6) - early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2) + renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load (s32) from %ir.scevgep6) + early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store (s32) into %ir.scevgep2) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir index dc709ed2493f3..4ccc56c130729 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir @@ -860,9 +860,9 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -896,10 +896,10 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg $lr = tMOVr $r3, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 @@ -959,9 +959,9 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load (s128) from %ir.lsr.iv17, align 2) ; CHECK: renamable $r12 = MVE_VADDVs16no_acc killed renamable $q0, 0, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -995,10 +995,10 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q0 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 2) renamable $r12 = MVE_VADDVs16no_acc killed renamable $q0, 0, $noreg $lr = tMOVr $r3, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 @@ -1058,9 +1058,9 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.lsr.iv17, align 1) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 0, killed $noreg :: (load (s128) from %ir.lsr.iv17, align 1) ; CHECK: renamable $r12 = MVE_VADDVs8no_acc killed renamable $q0, 0, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -1094,10 +1094,10 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + renamable $r0, renamable $q0 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) renamable $r12 = MVE_VADDVs8no_acc killed renamable $q0, 0, $noreg $lr = tMOVr $r3, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 @@ -1155,7 +1155,7 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r2 - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 0, killed $noreg :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: renamable $r2 = MVE_VADDVu32acc killed renamable $r2, killed renamable $q0, 0, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: @@ -1198,7 +1198,7 @@ body: | renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) renamable $r2 = MVE_VADDVu32acc killed renamable $r2, killed renamable $q0, 0, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr @@ -1273,12 +1273,12 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: renamable $q0 = MVE_VMVN killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: @@ -1313,12 +1313,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) renamable $q0 = MVE_VMVN killed renamable $q0, 0, $noreg, undef renamable $q0 $lr = tMOVr $r3, 14 /* CC::al */, $noreg renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr @@ -1383,7 +1383,7 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMVN killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg @@ -1427,7 +1427,7 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q0 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) $lr = tMOVr $r3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMVN killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg @@ -1506,12 +1506,12 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: renamable $q0 = MVE_VMVN killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: @@ -1546,12 +1546,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q0 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) renamable $q0 = MVE_VMVN killed renamable $q0, 0, $noreg, undef renamable $q0 $lr = tMOVr $r3, 14 /* CC::al */, $noreg renamable $r12 = MVE_VADDVu32no_acc killed renamable $q0, 0, $noreg renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r12, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr @@ -1616,7 +1616,7 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMVN killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg @@ -1660,7 +1660,7 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q0 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) $lr = tMOVr $r3, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMVN killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg @@ -1736,7 +1736,7 @@ body: | ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg ; CHECK: bb.2.vector.body: @@ -1744,14 +1744,14 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBS16_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 1) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBS16_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 1) ; CHECK: renamable $q1 = MVE_VSUBi16 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = MVE_VADDVu16no_acc killed renamable $q1, 0, $noreg ; CHECK: renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2SXTH killed renamable $r12, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -1778,7 +1778,7 @@ body: | renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg @@ -1788,14 +1788,14 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRBS16_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 1) + renamable $r0, renamable $q1 = MVE_VLDRBS16_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 1) renamable $q1 = MVE_VSUBi16 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $r12 = MVE_VADDVu16no_acc killed renamable $q1, 0, $noreg renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg renamable $r3 = t2SXTH killed renamable $r12, 0, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -1856,7 +1856,7 @@ body: | ; CHECK: renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0) + ; CHECK: renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s64) from %fixed-stack.0) ; CHECK: renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2DLS renamable $r2 @@ -1866,7 +1866,7 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r3, $r4 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBS16_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 1) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBS16_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 1) ; CHECK: renamable $q1 = MVE_VSUBi16 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = MVE_VADDVu16no_acc killed renamable $q1, 0, $noreg @@ -1901,7 +1901,7 @@ body: | renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0) + renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s64) from %fixed-stack.0) renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg $lr = t2DoLoopStart renamable $r2 @@ -1913,7 +1913,7 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r1, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRBS16_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv17, align 1) + renamable $r0, renamable $q1 = MVE_VLDRBS16_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 1) renamable $q1 = MVE_VSUBi16 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $r2 = MVE_VADDVu16no_acc killed renamable $q1, 0, $noreg @@ -1990,7 +1990,7 @@ body: | ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg ; CHECK: bb.2.vector.body: @@ -1998,14 +1998,14 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 2) ; CHECK: renamable $q1 = MVE_VSUBi16 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = MVE_VADDVu16no_acc killed renamable $q1, 0, $noreg ; CHECK: renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2UXTH killed renamable $r12, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -2031,7 +2031,7 @@ body: | renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg @@ -2041,14 +2041,14 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 2) renamable $q1 = MVE_VSUBi16 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $r12 = MVE_VADDVu16no_acc killed renamable $q1, 0, $noreg renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg renamable $r3 = t2UXTH killed renamable $r12, 0, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -2109,7 +2109,7 @@ body: | ; CHECK: renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0) + ; CHECK: renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s64) from %fixed-stack.0) ; CHECK: renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2DLS renamable $r2 @@ -2119,7 +2119,7 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r3, $r4 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 2) ; CHECK: renamable $q1 = MVE_VSUBi16 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = MVE_VADDVu16no_acc killed renamable $q1, 0, $noreg @@ -2154,7 +2154,7 @@ body: | renamable $r2 = t2BICri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0) + renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s64) from %fixed-stack.0) renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg $lr = t2DoLoopStart renamable $r2 @@ -2166,7 +2166,7 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r1, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 2) renamable $q1 = MVE_VSUBi16 killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $r2 = MVE_VADDVu16no_acc killed renamable $q1, 0, $noreg @@ -2243,7 +2243,7 @@ body: | ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg ; CHECK: bb.2.vector.body: @@ -2251,14 +2251,14 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) ; CHECK: renamable $q1 = MVE_VEOR killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = MVE_VADDVu8no_acc killed renamable $q1, 0, $noreg ; CHECK: renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2SXTB killed renamable $r12, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -2284,7 +2284,7 @@ body: | renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg @@ -2294,14 +2294,14 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) renamable $q1 = MVE_VEOR killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $r12 = MVE_VADDVu8no_acc killed renamable $q1, 0, $noreg renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg renamable $r3 = t2SXTB killed renamable $r12, 0, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -2362,7 +2362,7 @@ body: | ; CHECK: renamable $r2 = t2BICri killed renamable $r2, 7, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 7, 14 /* CC::al */, $noreg - ; CHECK: renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0) + ; CHECK: renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s64) from %fixed-stack.0) ; CHECK: renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 27, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2DLS renamable $r2 @@ -2372,7 +2372,7 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r3, $r4 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) ; CHECK: renamable $q1 = MVE_VEOR killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = MVE_VADDVu8no_acc killed renamable $q1, 0, $noreg @@ -2407,7 +2407,7 @@ body: | renamable $r2 = t2BICri killed renamable $r2, 7, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 7, 14 /* CC::al */, $noreg - renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0) + renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s64) from %fixed-stack.0) renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 27, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg $lr = t2DoLoopStart renamable $r2 @@ -2419,7 +2419,7 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r1, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) renamable $q1 = MVE_VEOR killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $r2 = MVE_VADDVu8no_acc killed renamable $q1, 0, $noreg @@ -2496,7 +2496,7 @@ body: | ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) ; CHECK: dead $lr = t2DLS renamable $r12 ; CHECK: $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg ; CHECK: bb.2.vector.body: @@ -2504,14 +2504,14 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) ; CHECK: renamable $q1 = MVE_VEOR killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = MVE_VADDVu8no_acc killed renamable $q1, 0, $noreg ; CHECK: renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2UXTB killed renamable $r12, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg - ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + ; CHECK: early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.exit: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -2537,7 +2537,7 @@ body: | renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r12 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg renamable $r3 = tADDrSPi $sp, 2, 14 /* CC::al */, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from %fixed-stack.0, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from %fixed-stack.0, align 8) $lr = t2DoLoopStart renamable $r12 $r4 = tMOVr killed $r12, 14 /* CC::al */, $noreg @@ -2547,14 +2547,14 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r2, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) renamable $q1 = MVE_VEOR killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $r12 = MVE_VADDVu8no_acc killed renamable $q1, 0, $noreg renamable $r4, dead $cpsr = nsw tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg renamable $r3 = t2UXTB killed renamable $r12, 0, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg - early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.store.addr) + early-clobber renamable $r1 = t2STR_POST killed renamable $r3, killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.store.addr) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -2615,7 +2615,7 @@ body: | ; CHECK: renamable $r2 = t2BICri killed renamable $r2, 7, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 7, 14 /* CC::al */, $noreg - ; CHECK: renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0) + ; CHECK: renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s64) from %fixed-stack.0) ; CHECK: renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 27, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: dead $lr = t2DLS renamable $r2 @@ -2625,7 +2625,7 @@ body: | ; CHECK: liveins: $q0, $r0, $r1, $r3, $r4 ; CHECK: renamable $vpr = MVE_VCTP8 renamable $r1, 0, $noreg ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) ; CHECK: renamable $q1 = MVE_VEOR killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 ; CHECK: $lr = tMOVr $r4, 14 /* CC::al */, $noreg ; CHECK: renamable $r2 = MVE_VADDVu8no_acc killed renamable $q1, 0, $noreg @@ -2660,7 +2660,7 @@ body: | renamable $r2 = t2BICri killed renamable $r2, 7, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 7, 14 /* CC::al */, $noreg - renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load 8 from %fixed-stack.0) + renamable $d1 = VLDRD $sp, 2, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 :: (load (s64) from %fixed-stack.0) renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 27, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg $lr = t2DoLoopStart renamable $r2 @@ -2672,7 +2672,7 @@ body: | renamable $vpr = MVE_VCTP8 renamable $r1, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv17, align 1) + renamable $r0, renamable $q1 = MVE_VLDRBU8_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv17, align 1) renamable $q1 = MVE_VEOR killed renamable $q1, renamable $q0, 0, $noreg, undef renamable $q1 $lr = tMOVr $r4, 14 /* CC::al */, $noreg renamable $r2 = MVE_VADDVu8no_acc killed renamable $q1, 0, $noreg @@ -2744,8 +2744,8 @@ body: | ; CHECK: bb.2.while.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r12 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 0, $noreg :: (load 8 from %ir.tmp3, align 2) - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU32_post killed renamable $r0, 8, 0, killed $noreg :: (load 8 from %ir.tmp1, align 2) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 0, $noreg :: (load (s64) from %ir.tmp3, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU32_post killed renamable $r0, 8, 0, killed $noreg :: (load (s64) from %ir.tmp1, align 2) ; CHECK: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r12 = MVE_VADDVu32acc killed renamable $r12, killed renamable $q0, 0, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 @@ -2789,8 +2789,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 1, renamable $vpr :: (load 8 from %ir.tmp3, align 2) - renamable $r0, renamable $q1 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load 8 from %ir.tmp1, align 2) + renamable $r1, renamable $q0 = MVE_VLDRHU32_post killed renamable $r1, 8, 1, renamable $vpr :: (load (s64) from %ir.tmp3, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHU32_post killed renamable $r0, 8, 1, killed renamable $vpr :: (load (s64) from %ir.tmp1, align 2) renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r2, dead $cpsr = nsw tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $r12 = MVE_VADDVu32acc killed renamable $r12, killed renamable $q0, 0, $noreg @@ -2859,8 +2859,8 @@ body: | ; CHECK: bb.2.while.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r3 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.tmp3, align 2) - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load 16 from %ir.tmp1, align 2) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.tmp3, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 0, killed $noreg :: (load (s128) from %ir.tmp1, align 2) ; CHECK: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r12 = MVE_VADDVu16no_acc killed renamable $q0, 0, $noreg ; CHECK: renamable $r3 = t2UXTAH killed renamable $r3, killed renamable $r12, 0, 14 /* CC::al */, $noreg @@ -2905,8 +2905,8 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.tmp3, align 2) - renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.tmp1, align 2) + renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.tmp3, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.tmp1, align 2) renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r2, dead $cpsr = nsw tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg renamable $r12 = MVE_VADDVu16no_acc killed renamable $q0, 0, $noreg @@ -2985,8 +2985,8 @@ body: | ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.tmp3, align 2) - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.tmp1, align 2) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.tmp3, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.tmp1, align 2) ; CHECK: renamable $q2 = MVE_VMULLBs16 renamable $q1, renamable $q0, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q0 = MVE_VMULLTs16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, killed renamable $q2, 0, $noreg, undef renamable $q0 @@ -3033,8 +3033,8 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.tmp3, align 2) - renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.tmp1, align 2) + renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.tmp3, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.tmp1, align 2) renamable $q2 = MVE_VMULLBs16 renamable $q1, renamable $q0, 0, $noreg, undef renamable $q2 renamable $q0 = MVE_VMULLTs16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $q0 = MVE_VADDi32 killed renamable $q0, killed renamable $q2, 0, $noreg, undef renamable $q0 @@ -3110,8 +3110,8 @@ body: | ; CHECK: liveins: $r0, $r1, $r2, $r3, $r12 ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.tmp3, align 2) - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.tmp1, align 2) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.tmp3, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.tmp1, align 2) ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMULLTs16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $r12 = nsw t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg @@ -3159,8 +3159,8 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.tmp3, align 2) - renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load 16 from %ir.tmp1, align 2) + renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.tmp3, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHU16_post killed renamable $r0, 16, 1, killed renamable $vpr :: (load (s128) from %ir.tmp1, align 2) $lr = tMOVr $r12, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMULLTs16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $r12 = nsw t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir index b9ce1bfb4d706..86481962f3ce2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir @@ -136,8 +136,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg @@ -184,8 +184,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) $lr = tMOVr $r3, 14, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir index f0c2d9e873d72..989a58e8a1162 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir @@ -119,20 +119,20 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg - ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r3 - ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 4) ; CHECK: MVE_VPTv4i32r 8, renamable $q0, $zr, 1, implicit-def $vpr - ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1, align 4) ; CHECK: renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) ; CHECK: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.bb27: @@ -160,7 +160,7 @@ body: | renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg $vpr = VMSR_P0 killed $r3, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg - VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) $r3 = tMOVr $r0, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg $lr = t2DoLoopStart renamable $lr @@ -169,18 +169,18 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 - renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load (s32) from %stack.0) MVE_VPST 4, implicit $vpr renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 4) renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg MVE_VPST 4, implicit $vpr renamable $vpr = MVE_VCMPi32r renamable $q0, $zr, 1, 1, killed renamable $vpr - renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1, align 4) renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $r3, 14, $noreg t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir index 04df42dc89b83..5be38d4f740c5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir @@ -150,19 +150,19 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 ; CHECK: $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg - ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r3 - ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) - ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 4) + ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1, align 4) ; CHECK: renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) ; CHECK: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.bb27: @@ -190,7 +190,7 @@ body: | renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg $vpr = VMSR_P0 killed $r3, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg - VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) $r3 = tMOVr $r0, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg $lr = t2DoLoopStart renamable $lr @@ -199,15 +199,15 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 - renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load (s32) from %stack.0) MVE_VPST 2, implicit $vpr renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) - renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 4) + renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1, align 4) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) + MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $r3, 14, $noreg t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr @@ -288,18 +288,18 @@ body: | ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 - ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: MVE_VPST 2, implicit $vpr ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr ; CHECK: renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr - ; CHECK: VSTR_P0_off renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: VSTR_P0_off renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VPST 8, implicit $vpr @@ -331,7 +331,7 @@ body: | renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg $vpr = VMSR_P0 killed $r3, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg - VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) $r3 = tMOVr $r0, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg $lr = t2DoLoopStart renamable $lr @@ -340,12 +340,12 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 - renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load (s32) from %stack.0) MVE_VPST 2, implicit $vpr renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr - VSTR_P0_off renamable $vpr, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + VSTR_P0_off renamable $vpr, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr @@ -430,13 +430,13 @@ body: | ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 - ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: MVE_VPST 2, implicit $vpr ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr @@ -473,7 +473,7 @@ body: | renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg $vpr = VMSR_P0 killed $r3, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg - VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) $r3 = tMOVr $r0, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg $lr = t2DoLoopStart renamable $lr @@ -482,7 +482,7 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 - renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load (s32) from %stack.0) MVE_VPST 2, implicit $vpr renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr @@ -572,13 +572,13 @@ body: | ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2.bb9: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 - ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: MVE_VPST 2, implicit $vpr ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr @@ -615,7 +615,7 @@ body: | renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg $vpr = VMSR_P0 killed $r3, 14, $noreg renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg - VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store 4 into %stack.0) + VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store (s32) into %stack.0) $r3 = tMOVr $r0, 14, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg $lr = t2DoLoopStart renamable $lr @@ -624,7 +624,7 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1, $r2, $r3 - renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load 4 from %stack.0) + renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load (s32) from %stack.0) MVE_VPST 2, implicit $vpr renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir index 285cb46465f3c..a70c6f13239f1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir @@ -115,10 +115,10 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.iv1719, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -151,12 +151,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $r3, dead $cpsr = tSUBi3 killed renamable $r3, 4, 14, $noreg renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir index 6a5d7496d21e2..639dc610b75bb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir @@ -114,10 +114,10 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.iv1719, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -150,12 +150,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $r3 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir index ef702514d5700..9d6ea1cebd9b3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir @@ -114,10 +114,10 @@ body: | ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 - ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 0, $noreg :: (load (s128) from %ir.lsr.iv1416, align 4) ; CHECK: renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1719, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store (s128) into %ir.lsr.iv1719, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -150,12 +150,12 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) + renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv13, align 4) + renamable $r2, renamable $q1 = MVE_VLDRWU32_post killed renamable $r2, 16, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1416, align 4) renamable $r3 = t2SUBri12 killed renamable $r3, 4, 14, $noreg renamable $q0 = nsw MVE_VADDi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1719, align 4) + renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1719, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir index 09545db306209..11a9eb62574c1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir @@ -135,7 +135,7 @@ body: | ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: renamable $r3 = t2SUBrs renamable $r2, killed renamable $r12, 26, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) @@ -143,8 +143,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: $q1 = MVE_VORR killed $q0, killed $q0, 0, $noreg, undef $q1 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv19, align 1) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv2022, align 1) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv19, align 1) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv2022, align 1) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nuw MVE_VMULi16 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = MVE_VSUBi16 renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 @@ -186,7 +186,7 @@ body: | renamable $lr = nuw nsw t2ADDrs killed renamable $r3, renamable $r12, 27, 14, $noreg, $noreg renamable $r3 = tLEApcrel %const.0, 14, $noreg renamable $r12 = t2LSRri killed renamable $r12, 3, 14, $noreg, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) renamable $r3 = t2SUBrs renamable $r2, killed renamable $r12, 26, 14, $noreg, $noreg $lr = t2DoLoopStart renamable $lr @@ -197,8 +197,8 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg $q1 = MVE_VORR killed $q0, $q0, 0, $noreg, undef $q1 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv19, align 1) - renamable $r1, renamable $q2 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv2022, align 1) + renamable $r0, renamable $q0 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv19, align 1) + renamable $r1, renamable $q2 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv2022, align 1) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14, $noreg renamable $q0 = nuw MVE_VMULi16 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir index 9fe20c5ed9116..d7634a767fd81 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir @@ -149,27 +149,27 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -12 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -16 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -20 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 44, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.6, align 8) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 44, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.6, align 8) ; CHECK: $lr = MVE_WLSTP_32 killed renamable $r12, %bb.3 ; CHECK: bb.1.for.body.lr.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 - ; CHECK: $r7, $r6 = t2LDRDi8 $sp, 36, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.4, align 8), (load 4 from %fixed-stack.5) - ; CHECK: $r5, $r4 = t2LDRDi8 $sp, 20, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8), (load 4 from %fixed-stack.1) + ; CHECK: $r7, $r6 = t2LDRDi8 $sp, 36, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.4, align 8), (load (s32) from %fixed-stack.5) + ; CHECK: $r5, $r4 = t2LDRDi8 $sp, 20, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8), (load (s32) from %fixed-stack.1) ; CHECK: renamable $q0 = MVE_VDUP32 killed renamable $r6, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r7, 0, $noreg, undef renamable $q1 ; CHECK: bb.2.for.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3, $r4, $r5 - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 0, $noreg :: (load 16 from %ir.input_2_cast, align 4) - ; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 0, $noreg :: (load 16 from %ir.input_1_cast, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 0, $noreg :: (load (s128) from %ir.input_2_cast, align 4) + ; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 0, $noreg :: (load (s128) from %ir.input_1_cast, align 4) ; CHECK: renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, undef renamable $q3 ; CHECK: renamable $q2 = MVE_VMULi32 killed renamable $q3, killed renamable $q2, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r4, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q2 = MVE_VMAXu32 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q2 = MVE_VMINu32 killed renamable $q2, renamable $q0, 0, $noreg, undef renamable $q2 - ; CHECK: renamable $r5 = MVE_VSTRWU32_post killed renamable $q2, killed renamable $r5, 4, 0, killed $noreg :: (store 16 into %ir.output_cast, align 4) + ; CHECK: renamable $r5 = MVE_VSTRWU32_post killed renamable $q2, killed renamable $r5, 4, 0, killed $noreg :: (store (s128) into %ir.output_cast, align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -185,7 +185,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r6, -12 frame-setup CFI_INSTRUCTION offset $r5, -16 frame-setup CFI_INSTRUCTION offset $r4, -20 - renamable $r12 = t2LDRi12 $sp, 44, 14, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r12 = t2LDRi12 $sp, 44, 14, $noreg :: (load (s32) from %fixed-stack.0, align 8) renamable $r5 = t2ADDri renamable $r12, 3, 14, $noreg, $noreg renamable $lr = t2LSRri killed renamable $r5, 2, 14, $noreg, $noreg $lr = t2WhileLoopStartLR renamable $lr, %bb.3, implicit-def dead $cpsr @@ -195,8 +195,8 @@ body: | successors: %bb.2(0x80000000) liveins: $lr, $r0, $r1, $r2, $r3, $r12 - $r7, $r6 = t2LDRDi8 $sp, 36, 14, $noreg :: (load 4 from %fixed-stack.2, align 8), (load 4 from %fixed-stack.1) - $r5, $r4 = t2LDRDi8 $sp, 20, 14, $noreg :: (load 4 from %fixed-stack.6, align 8), (load 4 from %fixed-stack.5) + $r7, $r6 = t2LDRDi8 $sp, 36, 14, $noreg :: (load (s32) from %fixed-stack.2, align 8), (load (s32) from %fixed-stack.1) + $r5, $r4 = t2LDRDi8 $sp, 20, 14, $noreg :: (load (s32) from %fixed-stack.6, align 8), (load (s32) from %fixed-stack.5) renamable $q0 = MVE_VDUP32 killed renamable $r6, 0, $noreg, undef renamable $q0 renamable $q1 = MVE_VDUP32 killed renamable $r7, 0, $noreg, undef renamable $q1 @@ -206,9 +206,9 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load 16 from %ir.input_2_cast, align 4) + renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load (s128) from %ir.input_2_cast, align 4) MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load 16 from %ir.input_1_cast, align 4) + renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load (s128) from %ir.input_1_cast, align 4) renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, undef renamable $q2 renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, undef renamable $q3 renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg @@ -217,7 +217,7 @@ body: | MVE_VPST 2, implicit $vpr renamable $q2 = MVE_VMAXu32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2 renamable $q2 = MVE_VMINu32 killed renamable $q2, renamable $q0, 1, renamable $vpr, undef renamable $q2 - renamable $r5 = MVE_VSTRWU32_post killed renamable $q2, killed renamable $r5, 4, 1, killed renamable $vpr :: (store 16 into %ir.output_cast, align 4) + renamable $r5 = MVE_VSTRWU32_post killed renamable $q2, killed renamable $r5, 4, 1, killed renamable $vpr :: (store (s128) into %ir.output_cast, align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir index 78d4fcbc80919..974929b7ddc90 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir @@ -144,22 +144,22 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -8 ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -12 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 - ; CHECK: renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.5) + ; CHECK: renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.5) ; CHECK: renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg ; CHECK: $lr = MVE_WLSTP_32 killed renamable $r4, %bb.3 ; CHECK: bb.1.for.body.lr.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 - ; CHECK: renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) - ; CHECK: $r6, $r12 = t2LDRDi8 $sp, 28, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.3), (load 4 from %fixed-stack.4, align 8) + ; CHECK: renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) + ; CHECK: $r6, $r12 = t2LDRDi8 $sp, 28, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.3), (load (s32) from %fixed-stack.4, align 8) ; CHECK: renamable $q0 = MVE_VDUP32 killed renamable $r12, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r6, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2.for.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3, $r5, $r12 - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 0, $noreg :: (load 16 from %ir.input_2_cast, align 4) - ; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 0, $noreg :: (load 16 from %ir.input_1_cast, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 0, $noreg :: (load (s128) from %ir.input_2_cast, align 4) + ; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 0, $noreg :: (load (s128) from %ir.input_1_cast, align 4) ; CHECK: renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, undef renamable $q3 ; CHECK: renamable $q3 = MVE_VMLAS_qr_u32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg @@ -181,7 +181,7 @@ body: | frame-setup CFI_INSTRUCTION offset $r6, -8 frame-setup CFI_INSTRUCTION offset $r5, -12 frame-setup CFI_INSTRUCTION offset $r4, -16 - renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0) + renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0) renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg renamable $r5, dead $cpsr = tADDi3 renamable $r4, 3, 14 /* CC::al */, $noreg renamable $r5, dead $cpsr = tLSRri killed renamable $r5, 2, 14 /* CC::al */, $noreg @@ -192,8 +192,8 @@ body: | successors: %bb.2(0x80000000) liveins: $lr, $r0, $r1, $r2, $r3, $r4 - renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.5, align 8) - $r6, $r12 = t2LDRDi8 $sp, 28, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.2), (load 4 from %fixed-stack.1, align 8) + renamable $r5 = tLDRspi $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.5, align 8) + $r6, $r12 = t2LDRDi8 $sp, 28, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.2), (load (s32) from %fixed-stack.1, align 8) renamable $q0 = MVE_VDUP32 killed renamable $r12, 0, $noreg, undef renamable $q0 renamable $q1 = MVE_VDUP32 killed renamable $r6, 0, $noreg, undef renamable $q1 renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg @@ -204,9 +204,9 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r4, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load 16 from %ir.input_2_cast, align 4) + renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 4, 1, renamable $vpr :: (load (s128) from %ir.input_2_cast, align 4) MVE_VPST 8, implicit $vpr - renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load 16 from %ir.input_1_cast, align 4) + renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 1, renamable $vpr :: (load (s128) from %ir.input_1_cast, align 4) renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, undef renamable $q2 renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, undef renamable $q3 renamable $r4, dead $cpsr = tSUBi8 killed renamable $r4, 4, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir index cffef76169077..b11a55384952e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir @@ -1047,14 +1047,14 @@ body: | ; CHECK: renamable $r12 = t2ANDri killed renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r2 = t2RSBri killed renamable $r12, 0, 14 /* CC::al */, $noreg, $noreg ; CHECK: $vpr = VMSR_P0 killed $r2, 14 /* CC::al */, $noreg - ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + ; CHECK: VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r0 ; CHECK: bb.2 (align 4): ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $r1 - ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + ; CHECK: renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r1 = MVE_VSTRWU32_post renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16, align 4) + ; CHECK: renamable $r1 = MVE_VSTRWU32_post renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store (s128), align 4) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3: ; CHECK: $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg @@ -1087,19 +1087,19 @@ body: | renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg renamable $r2 = t2RSBri killed renamable $r12, 0, 14 /* CC::al */, $noreg, $noreg $vpr = VMSR_P0 killed $r2, 14 /* CC::al */, $noreg - VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.0) + VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) renamable $lr = t2DoLoopStartTP killed renamable $lr, renamable $r0 bb.2 (align 4): successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $q0, $r0, $r1 - renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.0) + renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) MVE_VPST 8, implicit $vpr renamable $vpr = MVE_VCTP32 renamable $r0, 1, killed renamable $vpr renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg MVE_VPST 8, implicit $vpr - renamable $r1 = MVE_VSTRWU32_post renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16, align 4) + renamable $r1 = MVE_VSTRWU32_post renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store (s128), align 4) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14 /* CC::al */, $noreg @@ -1145,14 +1145,14 @@ body: | ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r12 = t2LEApcrel %const.0, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load (s128) from constant-pool, align 8) ; CHECK: renamable $q2 = MVE_VMOVimmi32 4, 0, $noreg, undef renamable $q2 ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2 (align 4): ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $q0, $q1, $q2, $r0, $r1 ; CHECK: MVE_VPTv4s32r 8, renamable $q0, renamable $r1, 11, implicit-def $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q1, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q1, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128), align 4) ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3: @@ -1179,7 +1179,7 @@ body: | renamable $r3, dead $cpsr = nuw tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 - renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load (s128) from constant-pool, align 8) renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg @@ -1193,7 +1193,7 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg MVE_VPST 4, implicit $vpr renamable $vpr = MVE_VCMPs32r renamable $q0, renamable $r1, 11, 1, killed renamable $vpr - renamable $r0 = MVE_VSTRWU32_post renamable $q1, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16, align 4) + renamable $r0 = MVE_VSTRWU32_post renamable $q1, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128), align 4) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1, implicit-def dead $cpsr @@ -1246,7 +1246,7 @@ body: | ; CHECK: renamable $r3, dead $cpsr = nuw tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load (s128) from constant-pool, align 8) ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg @@ -1257,7 +1257,7 @@ body: | ; CHECK: MVE_VPTv4s32r 8, renamable $q0, renamable $r1, 11, implicit-def $vpr ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed $vpr ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q1, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16, align 4) + ; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q1, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128), align 4) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 @@ -1285,7 +1285,7 @@ body: | renamable $r3, dead $cpsr = nuw tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg renamable $q1 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q1 - renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load 16 from constant-pool, align 8) + renamable $q0 = MVE_VLDRWU32 killed renamable $r12, 0, 0, $noreg :: (load (s128) from constant-pool, align 8) renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg @@ -1299,7 +1299,7 @@ body: | MVE_VPTv4s32r 8, renamable $q0, renamable $r1, 11, implicit-def $vpr renamable $vpr = MVE_VCTP32 renamable $r2, 1, $vpr MVE_VPST 8, implicit $vpr - renamable $r0 = MVE_VSTRWU32_post renamable $q1, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16, align 4) + renamable $r0 = MVE_VSTRWU32_post renamable $q1, killed renamable $r0, 16, 1, killed renamable $vpr :: (store (s128), align 4) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg renamable $q0 = MVE_VADDi32 killed renamable $q0, renamable $q2, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1, implicit-def dead $cpsr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir index e8a2239066bee..2690ca170f1a4 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir @@ -138,10 +138,10 @@ body: | frame-setup CFI_INSTRUCTION offset $r4, -8 $sp = frame-setup tSUBspi $sp, 8, 14, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 40 - tSTRspi killed $r3, $sp, 7, 14, $noreg :: (store 4 into %stack.0) - tSTRspi killed $r2, $sp, 6, 14, $noreg :: (store 4 into %stack.1) - tSTRspi killed $r1, $sp, 5, 14, $noreg :: (store 4 into %stack.2) - tSTRspi killed $r0, $sp, 4, 14, $noreg :: (store 4 into %stack.3) + tSTRspi killed $r3, $sp, 7, 14, $noreg :: (store (s32) into %stack.0) + tSTRspi killed $r2, $sp, 6, 14, $noreg :: (store (s32) into %stack.1) + tSTRspi killed $r1, $sp, 5, 14, $noreg :: (store (s32) into %stack.2) + tSTRspi killed $r0, $sp, 4, 14, $noreg :: (store (s32) into %stack.3) tB %bb.4, 14, $noreg bb.1.for.cond.cleanup: @@ -151,44 +151,44 @@ body: | bb.2.for.body.preheader: successors: %bb.3(0x80000000) - $r0 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %stack.3) + $r0 = tLDRspi $sp, 4, 14, $noreg :: (load (s32) from %stack.3) renamable $r1, dead $cpsr = tSUBi3 killed renamable $r0, 4, 14, $noreg - $r2 = tLDRspi $sp, 6, 14, $noreg :: (load 4 from %stack.1) + $r2 = tLDRspi $sp, 6, 14, $noreg :: (load (s32) from %stack.1) renamable $r3, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg - $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load 4 from %stack.2) + $r12 = t2LDRi12 $sp, 20, 14, $noreg :: (load (s32) from %stack.2) renamable $lr = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg - $r4 = tLDRspi $sp, 7, 14, $noreg :: (load 4 from %stack.0) - t2STRi12 killed $lr, $sp, 12, 14, $noreg :: (store 4 into %stack.4) - tSTRspi killed $r3, $sp, 2, 14, $noreg :: (store 4 into %stack.5) - tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.6) - tSTRspi killed $r4, $sp, 0, 14, $noreg :: (store 4 into %stack.7) + $r4 = tLDRspi $sp, 7, 14, $noreg :: (load (s32) from %stack.0) + t2STRi12 killed $lr, $sp, 12, 14, $noreg :: (store (s32) into %stack.4) + tSTRspi killed $r3, $sp, 2, 14, $noreg :: (store (s32) into %stack.5) + tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store (s32) into %stack.6) + tSTRspi killed $r4, $sp, 0, 14, $noreg :: (store (s32) into %stack.7) tB %bb.3, 14, $noreg bb.3.for.body: successors: %bb.3(0x40000000), %bb.1(0x40000000) - $r0 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.7) - $r1 = tLDRspi $sp, 1, 14, $noreg :: (load 4 from %stack.6) - $r2 = tLDRspi $sp, 2, 14, $noreg :: (load 4 from %stack.5) - $r3 = tLDRspi $sp, 3, 14, $noreg :: (load 4 from %stack.4) - renamable $r12, renamable $r3 = t2LDR_PRE renamable $r3, 4, 14, $noreg :: (load 4 from %ir.scevgep11) - renamable $lr, renamable $r2 = t2LDR_PRE renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + $r0 = tLDRspi $sp, 0, 14, $noreg :: (load (s32) from %stack.7) + $r1 = tLDRspi $sp, 1, 14, $noreg :: (load (s32) from %stack.6) + $r2 = tLDRspi $sp, 2, 14, $noreg :: (load (s32) from %stack.5) + $r3 = tLDRspi $sp, 3, 14, $noreg :: (load (s32) from %stack.4) + renamable $r12, renamable $r3 = t2LDR_PRE renamable $r3, 4, 14, $noreg :: (load (s32) from %ir.scevgep11) + renamable $lr, renamable $r2 = t2LDR_PRE renamable $r2, 4, 14, $noreg :: (load (s32) from %ir.scevgep7) renamable $r12 = nsw t2MUL killed renamable $lr, killed renamable $r12, 14, $noreg - early-clobber renamable $r1 = t2STR_PRE killed renamable $r12, renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep3) + early-clobber renamable $r1 = t2STR_PRE killed renamable $r12, renamable $r1, 4, 14, $noreg :: (store (s32) into %ir.scevgep3) $lr = tMOVr killed $r0, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 $r0 = tMOVr $lr, 14, $noreg - tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.7) - tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.6) - tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store 4 into %stack.5) - tSTRspi killed $r3, $sp, 3, 14, $noreg :: (store 4 into %stack.4) + tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store (s32) into %stack.7) + tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store (s32) into %stack.6) + tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store (s32) into %stack.5) + tSTRspi killed $r3, $sp, 3, 14, $noreg :: (store (s32) into %stack.4) t2LoopEnd killed renamable $lr, %bb.3, implicit-def dead $cpsr tB %bb.1, 14, $noreg bb.4.while: successors: %bb.2(0x40000000), %bb.1(0x40000000) - $r0 = tLDRspi $sp, 7, 14, $noreg :: (load 4 from %stack.0) + $r0 = tLDRspi $sp, 7, 14, $noreg :: (load (s32) from %stack.0) $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr tB %bb.2, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.mir index f8b215072052c..590917607a731 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.mir @@ -106,8 +106,8 @@ body: | ; CHECK: bb.2.while.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: liveins: $lr, $r0, $r1 - ; CHECK: renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14 /* CC::al */, $noreg :: (load 2 from %ir.scevgep4) - ; CHECK: early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14 /* CC::al */, $noreg :: (store 2 into %ir.scevgep7) + ; CHECK: renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14 /* CC::al */, $noreg :: (load (s16) from %ir.scevgep4) + ; CHECK: early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14 /* CC::al */, $noreg :: (store (s16) into %ir.scevgep7) ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 ; CHECK: bb.3.while.end: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc @@ -134,8 +134,8 @@ body: | successors: %bb.2(0x7c000000), %bb.3(0x04000000) liveins: $lr, $r0, $r1 - renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.scevgep4) - early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7) + renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2, 14, $noreg :: (load (s16) from %ir.scevgep4) + early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store (s16) into %ir.scevgep7) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-search-killed.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-search-killed.mir index 1b4523ef17dcd..754abfa9436ba 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-search-killed.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-search-killed.mir @@ -90,7 +90,7 @@ body: | ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY]] ; CHECK: [[COPY5:%[0-9]+]]:gpr = COPY [[COPY1]] - ; CHECK: [[t2LDRi12_:%[0-9]+]]:rgpr = t2LDRi12 %fixed-stack.0, 0, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: [[t2LDRi12_:%[0-9]+]]:rgpr = t2LDRi12 %fixed-stack.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: [[t2ASRri:%[0-9]+]]:rgpr = t2ASRri [[t2LDRi12_]], 31, 14 /* CC::al */, $noreg, $noreg ; CHECK: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp ; CHECK: $r0 = COPY [[COPY5]] @@ -110,8 +110,8 @@ body: | ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) ; CHECK: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.1, %6, %bb.2 ; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2DoLoopStart]], %bb.1, %21, %bb.2 - ; CHECK: t2STRi12 [[COPY9]], [[PHI]], 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.lsr.iv, align 8) - ; CHECK: t2STRi12 [[COPY8]], [[PHI]], 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.lsr.iv + 4, basealign 8) + ; CHECK: t2STRi12 [[COPY9]], [[PHI]], 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.lsr.iv, align 8) + ; CHECK: t2STRi12 [[COPY8]], [[PHI]], 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.lsr.iv + 4, basealign 8) ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 8, 14 /* CC::al */, $noreg, $noreg ; CHECK: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri]] ; CHECK: [[t2LoopEndDec:%[0-9]+]]:gprlr = t2LoopEndDec [[PHI1]], %bb.2, implicit-def $cpsr @@ -137,7 +137,7 @@ body: | %13:gpr = COPY %11 %12:gpr = COPY %10 - %14:rgpr = t2LDRi12 %fixed-stack.0, 0, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + %14:rgpr = t2LDRi12 %fixed-stack.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) %0:gpr = COPY %15 %16:rgpr = t2ASRri %14, 31, 14 /* CC::al */, $noreg, $noreg ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp @@ -159,8 +159,8 @@ body: | %4:gprnopc = PHI %3, %bb.1, %6, %bb.2 %5:gprlr = PHI %0, %bb.1, %7, %bb.2 - t2STRi12 %1, %4, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.lsr.iv, align 8) - t2STRi12 %2, %4, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.lsr.iv + 4, basealign 8) + t2STRi12 %1, %4, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.lsr.iv, align 8) + t2STRi12 %2, %4, 4, 14 /* CC::al */, $noreg :: (store (s32) into %ir.lsr.iv + 4, basealign 8) %20:rgpr = t2ADDri %4, 8, 14 /* CC::al */, $noreg, $noreg %6:gpr = COPY %20 %21:gprlr = t2LoopDec %5, 1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-search-pred.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-search-pred.mir index e94af93d8cfe9..833abbb50b5dd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-search-pred.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-search-pred.mir @@ -83,8 +83,8 @@ body: | ; CHECK: [[PHI3:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.3, %19, %bb.4 ; CHECK: [[PHI4:%[0-9]+]]:gpr = PHI [[COPY1]], %bb.3, %21, %bb.4 ; CHECK: [[PHI5:%[0-9]+]]:gprlr = PHI [[t2DoLoopStart]], %bb.3, %26, %bb.4 - ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:gpr = t2LDRB_POST [[PHI3]], 1, 14 /* CC::al */, $noreg :: (load 1 from %ir.x.addr.08) - ; CHECK: early-clobber %25:gprnopc = t2STRB_POST killed [[t2LDRB_POST]], [[PHI4]], 1, 14 /* CC::al */, $noreg :: (store 1 into %ir.y.addr.07) + ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:gpr = t2LDRB_POST [[PHI3]], 1, 14 /* CC::al */, $noreg :: (load (s8) from %ir.x.addr.08) + ; CHECK: early-clobber %25:gprnopc = t2STRB_POST killed [[t2LDRB_POST]], [[PHI4]], 1, 14 /* CC::al */, $noreg :: (store (s8) into %ir.y.addr.07) ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY %25 ; CHECK: [[t2LoopEndDec1:%[0-9]+]]:gprlr = t2LoopEndDec [[PHI5]], %bb.4, implicit-def $cpsr ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg @@ -137,8 +137,8 @@ body: | %1:gpr = PHI %7, %bb.5, %4, %bb.2 %2:gpr = PHI %8, %bb.5, %5, %bb.2 %3:gprlr = PHI %0, %bb.5, %6, %bb.2 - %13:rgpr, %4:gpr = t2LDRB_POST %1, 1, 14 /* CC::al */, $noreg :: (load 1 from %ir.x.addr.08) - early-clobber %14:gprnopc = t2STRB_POST killed %13, %2, 1, 14 /* CC::al */, $noreg :: (store 1 into %ir.y.addr.07) + %13:rgpr, %4:gpr = t2LDRB_POST %1, 1, 14 /* CC::al */, $noreg :: (load (s8) from %ir.x.addr.08) + early-clobber %14:gprnopc = t2STRB_POST killed %13, %2, 1, 14 /* CC::al */, $noreg :: (store (s8) into %ir.y.addr.07) %15:gprlr = t2LoopDec %3, 1 %5:gpr = COPY %14 %6:gpr = COPY %15 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir index 390d510a3f942..128710c0462d8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -205,13 +205,13 @@ body: | ; CHECK: successors: %bb.3(0x04000000), %bb.2(0x7c000000) ; CHECK: liveins: $lr, $r0, $r1, $r2, $r12 ; CHECK: renamable $r4 = t2ADDrr renamable $r1, renamable $r12, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $q0 = MVE_VLDRBU8 killed renamable $r4, 0, 0, $noreg :: (load 16 from %ir.scevgep45, align 1) + ; CHECK: renamable $q0 = MVE_VLDRBU8 killed renamable $r4, 0, 0, $noreg :: (load (s128) from %ir.scevgep45, align 1) ; CHECK: renamable $r4 = t2ADDrr renamable $r2, renamable $r12, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $q1 = MVE_VLDRBU8 killed renamable $r4, 0, 0, $noreg :: (load 16 from %ir.scevgep23, align 1) + ; CHECK: renamable $q1 = MVE_VLDRBU8 killed renamable $r4, 0, 0, $noreg :: (load (s128) from %ir.scevgep23, align 1) ; CHECK: renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r12 = t2ADDri killed renamable $r12, 16, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 0, killed $noreg :: (store 16 into %ir.scevgep1, align 1) + ; CHECK: MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 0, killed $noreg :: (store (s128) into %ir.scevgep1, align 1) ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc @@ -244,16 +244,16 @@ body: | renamable $r4 = t2ADDrr renamable $r1, renamable $r12, 14, $noreg, $noreg renamable $vpr = MVE_VCTP8 renamable $r3, 0, $noreg MVE_VPST 8, implicit $vpr - renamable $q0 = MVE_VLDRBU8 killed renamable $r4, 0, 1, renamable $vpr :: (load 16 from %ir.scevgep45, align 1) + renamable $q0 = MVE_VLDRBU8 killed renamable $r4, 0, 1, renamable $vpr :: (load (s128) from %ir.scevgep45, align 1) renamable $r4 = t2ADDrr renamable $r2, renamable $r12, 14, $noreg, $noreg MVE_VPST 8, implicit $vpr - renamable $q1 = MVE_VLDRBU8 killed renamable $r4, 0, 1, renamable $vpr :: (load 16 from %ir.scevgep23, align 1) + renamable $q1 = MVE_VLDRBU8 killed renamable $r4, 0, 1, renamable $vpr :: (load (s128) from %ir.scevgep23, align 1) renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14, $noreg, $noreg renamable $r12 = t2ADDri killed renamable $r12, 16, 14, $noreg, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 1, killed renamable $vpr :: (store 16 into %ir.scevgep1, align 1) + MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 1, killed renamable $vpr :: (store (s128) into %ir.scevgep1, align 1) renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr tB %bb.3, 14, $noreg @@ -322,10 +322,10 @@ body: | ; CHECK: bb.1.vector.body: ; CHECK: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; CHECK: liveins: $lr, $r0, $r1, $r2 - ; CHECK: renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 0, $noreg :: (load 16 from %ir.lsr.iv57, align 2) - ; CHECK: renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 0, $noreg :: (load 16 from %ir.lsr.iv24, align 2) + ; CHECK: renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 0, $noreg :: (load (s128) from %ir.lsr.iv57, align 2) + ; CHECK: renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 0, $noreg :: (load (s128) from %ir.lsr.iv24, align 2) ; CHECK: renamable $q0 = MVE_VMULi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 2) + ; CHECK: MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 0, killed $noreg :: (store (s128) into %ir.lsr.iv1, align 2) ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 16, 14 /* CC::al */, $noreg ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg @@ -354,11 +354,11 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv57, align 2) - renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 2) + renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv57, align 2) + renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 2) renamable $q0 = MVE_VMULi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr - MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 2) + MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 1, killed renamable $vpr :: (store (s128) into %ir.lsr.iv1, align 2) renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 16, 14, $noreg renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg @@ -443,8 +443,8 @@ body: | ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 ; CHECK: renamable $vpr = MVE_VCTP32 $r2, 0, $noreg ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) - ; CHECK: renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 4) + ; CHECK: renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1, align 4) ; CHECK: $r3 = tMOVr $r2, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg @@ -493,8 +493,8 @@ body: | $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 renamable $vpr = MVE_VCTP32 $r2, 0, $noreg MVE_VPST 4, implicit $vpr - renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) - renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) + renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv24, align 4) + renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, renamable $vpr :: (load (s128) from %ir.lsr.iv1, align 4) $r3 = tMOVr $r2, 14, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir index 076482029b281..33368fe14b1cb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir @@ -135,7 +135,7 @@ body: | ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, renamable $r12, 27, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tLEApcrel %const.0, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) ; CHECK: renamable $r3 = t2SUBrs renamable $r2, killed renamable $r12, 26, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) @@ -143,8 +143,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg ; CHECK: $q1 = MVE_VORR killed $q0, killed $q0, 0, $noreg, undef $q1 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv19, align 1) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv2022, align 1) + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv19, align 1) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv2022, align 1) ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = nuw MVE_VMULi16 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q0 = MVE_VSUBi16 renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 @@ -186,7 +186,7 @@ body: | renamable $lr = nuw nsw t2ADDrs killed renamable $r3, renamable $r12, 27, 14, $noreg, $noreg renamable $r3 = tLEApcrel %const.0, 14, $noreg renamable $r12 = t2LSRri killed renamable $r12, 2, 14, $noreg, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load 16 from constant-pool) + renamable $q0 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg :: (load (s128) from constant-pool) renamable $r3 = t2SUBrs renamable $r2, killed renamable $r12, 26, 14, $noreg, $noreg $lr = t2DoLoopStart renamable $lr @@ -197,8 +197,8 @@ body: | renamable $vpr = MVE_VCTP16 renamable $r2, 0, $noreg $q1 = MVE_VORR killed $q0, $q0, 0, $noreg, undef $q1 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q0 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv19, align 1) - renamable $r1, renamable $q2 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv2022, align 1) + renamable $r0, renamable $q0 = MVE_VLDRBU16_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv19, align 1) + renamable $r1, renamable $q2 = MVE_VLDRBU16_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv2022, align 1) renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 8, 14, $noreg renamable $q0 = nuw MVE_VMULi16 killed renamable $q2, killed renamable $q0, 0, $noreg, undef renamable $q0 renamable $lr = t2LoopDec killed renamable $lr, 1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir index 75df351ac0e85..c1a08f98f71bc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir @@ -142,8 +142,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: $lr = tMOVr $r12, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r12 = nsw t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg @@ -193,8 +193,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) $lr = tMOVr $r12, 14, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r12 = nsw t2SUBri killed $r12, 1, 14, $noreg, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir index 018a2dc7f6211..95d48f2eecab5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir @@ -133,8 +133,8 @@ body: | ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, undef $q0 ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) ; CHECK: $lr = tMOVr $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14 /* CC::al */, $noreg @@ -180,8 +180,8 @@ body: | renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg $q0 = MVE_VORR killed $q1, $q1, 0, $noreg, undef $q0 MVE_VPST 4, implicit $vpr - renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load 8 from %ir.lsr.iv17, align 2) - renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load 8 from %ir.lsr.iv1820, align 2) + renamable $r0, renamable $q1 = MVE_VLDRHS32_post killed renamable $r0, 8, 1, renamable $vpr :: (load (s64) from %ir.lsr.iv17, align 2) + renamable $r1, renamable $q2 = MVE_VLDRHS32_post killed renamable $r1, 8, 1, killed renamable $vpr :: (load (s64) from %ir.lsr.iv1820, align 2) $lr = tMOVr $r3, 14, $noreg renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r3, dead $cpsr = nsw tSUBi8 killed $r3, 1, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/block-placement.mir b/llvm/test/CodeGen/Thumb2/block-placement.mir index bf44e4e0265c4..8a15b41905415 100644 --- a/llvm/test/CodeGen/Thumb2/block-placement.mir +++ b/llvm/test/CodeGen/Thumb2/block-placement.mir @@ -347,7 +347,7 @@ body: | ; CHECK: bb.1: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r12 = t2LDRi12 $sp, 32, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + ; CHECK: renamable $r12 = t2LDRi12 $sp, 32, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) ; CHECK: $r9 = tMOVr killed $r2, 14 /* CC::al */, $noreg ; CHECK: renamable $r8 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg @@ -420,7 +420,7 @@ body: | bb.1: liveins: $r0, $r1, $r2, $r3 - renamable $r12 = t2LDRi12 $sp, 32, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8) + renamable $r12 = t2LDRi12 $sp, 32, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8) $r9 = tMOVr killed $r2, 14 /* CC::al */, $noreg renamable $r8 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg t2B %bb.2, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/constant-islands-cbz.mir b/llvm/test/CodeGen/Thumb2/constant-islands-cbz.mir index aba4073ee50a0..082c00858b8ec 100644 --- a/llvm/test/CodeGen/Thumb2/constant-islands-cbz.mir +++ b/llvm/test/CodeGen/Thumb2/constant-islands-cbz.mir @@ -27,7 +27,7 @@ body: | ; CHECK: tCBZ renamable $r0, %bb.2 ; CHECK: bb.1: ; CHECK: liveins: $r0 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.x) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.x) ; CHECK: tTAILJMPdND @c, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp, implicit killed $r0 ; CHECK: bb.2: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -42,7 +42,7 @@ body: | bb.2: liveins: $r0 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.x) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.x) tTAILJMPdND @c, 14, $noreg, implicit $sp, implicit $sp, implicit killed $r0 bb.1: @@ -66,7 +66,7 @@ body: | ; CHECK: tCBZ renamable $r0, %bb.2 ; CHECK: bb.1: ; CHECK: liveins: $r0 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.x) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.x) ; CHECK: tTAILJMPdND @c, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp, implicit killed $r0 ; CHECK: bb.2: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -83,7 +83,7 @@ body: | bb.2: liveins: $r0 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.x) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.x) tTAILJMPdND @c, 14, $noreg, implicit $sp, implicit $sp, implicit killed $r0 bb.1: @@ -107,7 +107,7 @@ body: | ; CHECK: tBcc %bb.2, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.1: ; CHECK: liveins: $r0 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.x) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.x) ; CHECK: tTAILJMPdND @c, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp, implicit killed $r0 ; CHECK: bb.2: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -123,7 +123,7 @@ body: | bb.2: liveins: $r0 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.x) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.x) tTAILJMPdND @c, 14, $noreg, implicit $sp, implicit $sp, implicit killed $r0 bb.1: @@ -146,7 +146,7 @@ body: | ; CHECK: tCBZ renamable $r0, %bb.2 ; CHECK: bb.1: ; CHECK: liveins: $r0 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.x) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.x) ; CHECK: tTAILJMPdND @c, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp, implicit killed $r0 ; CHECK: bb.2: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -162,7 +162,7 @@ body: | bb.2: liveins: $r0 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.x) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.x) tTAILJMPdND @c, 14, $noreg, implicit $sp, implicit $sp, implicit killed $r0 bb.1: @@ -186,7 +186,7 @@ body: | ; CHECK: tBcc %bb.2, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.1: ; CHECK: liveins: $r0 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.x) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.x) ; CHECK: tTAILJMPdND @c, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp, implicit killed $r0 ; CHECK: bb.2: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -202,7 +202,7 @@ body: | bb.2: liveins: $r0 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.x) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.x) tTAILJMPdND @c, 14, $noreg, implicit $sp, implicit $sp, implicit killed $r0 bb.1: @@ -225,7 +225,7 @@ body: | ; CHECK: tCBZ killed renamable $r1, %bb.2 ; CHECK: bb.1: ; CHECK: liveins: $r0 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.x) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.x) ; CHECK: tTAILJMPdND @c, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp, implicit killed $r0 ; CHECK: bb.2: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -241,7 +241,7 @@ body: | bb.2: liveins: $r0 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.x) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.x) tTAILJMPdND @c, 14, $noreg, implicit $sp, implicit $sp, implicit killed $r0 bb.1: @@ -264,7 +264,7 @@ body: | ; CHECK: tCBZ killed renamable $r1, %bb.2 ; CHECK: bb.1: ; CHECK: liveins: $r0 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.x) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.x) ; CHECK: tTAILJMPdND @c, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp, implicit killed $r0 ; CHECK: bb.2: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -280,7 +280,7 @@ body: | bb.2: liveins: $r0 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.x) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.x) tTAILJMPdND @c, 14, $noreg, implicit $sp, implicit $sp, implicit killed $r0 bb.1: @@ -305,7 +305,7 @@ body: | ; CHECK: tBcc %bb.2, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.1: ; CHECK: liveins: $r0 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.x) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.x) ; CHECK: tTAILJMPdND @c, 14 /* CC::al */, $noreg, implicit $sp, implicit $sp, implicit killed $r0 ; CHECK: bb.2: ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg @@ -322,7 +322,7 @@ body: | bb.2: liveins: $r0 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.x) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.x) tTAILJMPdND @c, 14, $noreg, implicit $sp, implicit $sp, implicit killed $r0 bb.1: diff --git a/llvm/test/CodeGen/Thumb2/constant-islands-cbzundef.mir b/llvm/test/CodeGen/Thumb2/constant-islands-cbzundef.mir index 611df39803473..81cc6c684d278 100644 --- a/llvm/test/CodeGen/Thumb2/constant-islands-cbzundef.mir +++ b/llvm/test/CodeGen/Thumb2/constant-islands-cbzundef.mir @@ -43,9 +43,9 @@ body: | ; CHECK: successors: %bb.3(0x80000000) ; CHECK: $r0 = t2MOVi16 target-flags(arm-lo16) @b, 14 /* CC::al */, $noreg ; CHECK: $r0 = t2MOVTi16 killed $r0, target-flags(arm-hi16) @b, 14 /* CC::al */, $noreg - ; CHECK: renamable $r1 = tLDRi renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @b) + ; CHECK: renamable $r1 = tLDRi renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @b) ; CHECK: renamable $r1, dead $cpsr = nsw tADDi8 killed renamable $r1, 1, 14 /* CC::al */, $noreg - ; CHECK: tSTRi killed renamable $r1, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into @b) + ; CHECK: tSTRi killed renamable $r1, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32) into @b) ; CHECK: bb.3.if.end: ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit undef $r0 bb.0.entry: @@ -66,9 +66,9 @@ body: | $r0 = t2MOVi16 target-flags(arm-lo16) @b, 14 /* CC::al */, $noreg $r0 = t2MOVTi16 killed $r0, target-flags(arm-hi16) @b, 14 /* CC::al */, $noreg - renamable $r1 = tLDRi renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @b) + renamable $r1 = tLDRi renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from @b) renamable $r1, dead $cpsr = nsw tADDi8 killed renamable $r1, 1, 14 /* CC::al */, $noreg - tSTRi killed renamable $r1, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into @b) + tSTRi killed renamable $r1, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32) into @b) bb.3.if.end: tBX_RET 14 /* CC::al */, $noreg, implicit undef $r0 diff --git a/llvm/test/CodeGen/Thumb2/constant-islands-ldrsb.mir b/llvm/test/CodeGen/Thumb2/constant-islands-ldrsb.mir index 776f0f705dd23..ea9d7d79e15cd 100644 --- a/llvm/test/CodeGen/Thumb2/constant-islands-ldrsb.mir +++ b/llvm/test/CodeGen/Thumb2/constant-islands-ldrsb.mir @@ -23,13 +23,13 @@ body: | bb.0: $sp = frame-setup tSUBspi $sp, 3, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 12 - renamable $r0 = t2LDRSBpci %const.0, 14 /* CC::al */, $noreg :: (dereferenceable load 1, align 4) + renamable $r0 = t2LDRSBpci %const.0, 14 /* CC::al */, $noreg :: (dereferenceable load (s8), align 4) renamable $r1 = tMOVr $sp, 14 /* CC::al */, $noreg tCMPr killed renamable $r1, killed renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr $r1 = t2MOVi16 target-flags(arm-lo16) @t2LDRSBpci, 14 /* CC::al */, $noreg renamable $r0 = t2CSINC $zr, $zr, 3, implicit killed $cpsr $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @t2LDRSBpci, 14 /* CC::al */, $noreg - tSTRi killed renamable $r0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store 4) + tSTRi killed renamable $r0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32)) $sp = frame-destroy tADDspi $sp, 3, 14 /* CC::al */, $noreg tBX_RET 14 /* CC::al */, $noreg @@ -46,13 +46,13 @@ body: | bb.0: $sp = frame-setup tSUBspi $sp, 3, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 12 - renamable $r0 = t2LDRSHpci %const.0, 14 /* CC::al */, $noreg :: (dereferenceable load 1, align 4) + renamable $r0 = t2LDRSHpci %const.0, 14 /* CC::al */, $noreg :: (dereferenceable load (s8), align 4) renamable $r1 = tMOVr $sp, 14 /* CC::al */, $noreg tCMPr killed renamable $r1, killed renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr $r1 = t2MOVi16 target-flags(arm-lo16) @t2LDRSHpci, 14 /* CC::al */, $noreg renamable $r0 = t2CSINC $zr, $zr, 3, implicit killed $cpsr $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @t2LDRSHpci, 14 /* CC::al */, $noreg - tSTRi killed renamable $r0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store 4) + tSTRi killed renamable $r0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32)) $sp = frame-destroy tADDspi $sp, 3, 14 /* CC::al */, $noreg tBX_RET 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir b/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir index 687c64d9ebffe..f55f2b2e919dc 100644 --- a/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir +++ b/llvm/test/CodeGen/Thumb2/fp16-stacksplot.mir @@ -43,10 +43,10 @@ body: | ; CHECK: $r11 = IMPLICIT_DEF ; CHECK: $r12 = IMPLICIT_DEF ; CHECK: $lr = IMPLICIT_DEF - ; CHECK: t2STRi12 killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + ; CHECK: t2STRi12 killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) ; CHECK: $r0 = t2ADDri killed $sp, 1024, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $s4 = VLDRH killed $r0, 91, 14, $noreg :: (dereferenceable load 2 from %stack.0) - ; CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + ; CHECK: renamable $s4 = VLDRH killed $r0, 91, 14, $noreg :: (dereferenceable load (s16) from %stack.0) + ; CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) ; CHECK: KILL $r0 ; CHECK: KILL $r1 ; CHECK: KILL $r2 @@ -76,7 +76,7 @@ body: | $r12 = IMPLICIT_DEF $lr = IMPLICIT_DEF - renamable $s4 = VLDRH %stack.0, 0, 14, $noreg :: (dereferenceable load 2 from %stack.0) + renamable $s4 = VLDRH %stack.0, 0, 14, $noreg :: (dereferenceable load (s16) from %stack.0) KILL $r0 KILL $r1 diff --git a/llvm/test/CodeGen/Thumb2/frame-index-addrmode-t2i8s4.mir b/llvm/test/CodeGen/Thumb2/frame-index-addrmode-t2i8s4.mir index 0d246ffba7228..6fc2e2ee03f8f 100644 --- a/llvm/test/CodeGen/Thumb2/frame-index-addrmode-t2i8s4.mir +++ b/llvm/test/CodeGen/Thumb2/frame-index-addrmode-t2i8s4.mir @@ -31,14 +31,14 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 80 ; CHECK: renamable $r0 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r0, $sp, 8, 14 /* CC::al */, $noreg :: (volatile store 8 into %ir.arrayidx) - ; CHECK: $r0, $r1 = t2LDRDi8 $sp, 8, 14 /* CC::al */, $noreg :: (volatile dereferenceable load 8 from %ir.arrayidx1) + ; CHECK: t2STRDi8 killed $r1, killed $r0, $sp, 8, 14 /* CC::al */, $noreg :: (volatile store (s64) into %ir.arrayidx) + ; CHECK: $r0, $r1 = t2LDRDi8 $sp, 8, 14 /* CC::al */, $noreg :: (volatile dereferenceable load (s64) from %ir.arrayidx1) ; CHECK: $sp = frame-destroy tADDspi $sp, 20, 14 /* CC::al */, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1 renamable $r0 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg renamable $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed $r1, killed $r0, %stack.0.a, 8, 14 /* CC::al */, $noreg :: (volatile store 8 into %ir.arrayidx) - $r0, $r1 = t2LDRDi8 %stack.0.a, 8, 14 /* CC::al */, $noreg :: (volatile dereferenceable load 8 from %ir.arrayidx1) + t2STRDi8 killed $r1, killed $r0, %stack.0.a, 8, 14 /* CC::al */, $noreg :: (volatile store (s64) into %ir.arrayidx) + $r0, $r1 = t2LDRDi8 %stack.0.a, 8, 14 /* CC::al */, $noreg :: (volatile dereferenceable load (s64) from %ir.arrayidx1) tBX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1 ... diff --git a/llvm/test/CodeGen/Thumb2/high-reg-spill.mir b/llvm/test/CodeGen/Thumb2/high-reg-spill.mir index 4e2197c0f0f02..1cfb4b5cdb9bb 100644 --- a/llvm/test/CodeGen/Thumb2/high-reg-spill.mir +++ b/llvm/test/CodeGen/Thumb2/high-reg-spill.mir @@ -37,11 +37,11 @@ stack: body: | bb.0.entry: ; CHECK-LABEL: name: constraint_h - ; CHECK: renamable $r0 = tLDRspi %stack.0.i, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from %ir.i) + ; CHECK: renamable $r0 = tLDRspi %stack.0.i, 0, 14 /* CC::al */, $noreg :: (dereferenceable load (s32) from %ir.i) ; CHECK: renamable $r8 = COPY killed renamable $r0 ; CHECK: INLINEASM &"@ $0", 1 /* sideeffect attdialect */, 589833 /* reguse:GPRnopc */, killed renamable $r8, 12 /* clobber */, implicit-def dead early-clobber $r12 ; CHECK: tBX_RET 14 /* CC::al */, $noreg - %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) + %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load (s32) from %ir.i) %0:hgpr = COPY %1 INLINEASM &"@ $0", 1, 589833, %0, 12, implicit-def early-clobber $r12 tBX_RET 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/ifcvt-dead-predicate.mir b/llvm/test/CodeGen/Thumb2/ifcvt-dead-predicate.mir index c03d8682b924e..afba351695db9 100644 --- a/llvm/test/CodeGen/Thumb2/ifcvt-dead-predicate.mir +++ b/llvm/test/CodeGen/Thumb2/ifcvt-dead-predicate.mir @@ -47,7 +47,7 @@ body: | ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: renamable $r0 = tLDRBi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.command_set, align 4) + ; CHECK: renamable $r0 = tLDRBi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.command_set, align 4) ; CHECK: dead renamable $r0, $cpsr = tLSLri killed renamable $r0, 24, 14 /* CC::al */, $noreg ; CHECK: $r0, dead $noreg = tMOVi8 0, 5 /* CC::pl */, $cpsr ; CHECK: tBX_RET 5 /* CC::pl */, killed $cpsr, implicit killed $r0 @@ -60,7 +60,7 @@ body: | successors: %bb.1, %bb.2 liveins: $r0, $r1, $r2 - renamable $r0 = tLDRBi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 1 from %ir.command_set, align 4) + renamable $r0 = tLDRBi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s8) from %ir.command_set, align 4) dead renamable $r0, $cpsr = tLSLri killed renamable $r0, 24, 14 /* CC::al */, $noreg t2Bcc %bb.2, 4 /* CC::mi */, killed $cpsr diff --git a/llvm/test/CodeGen/Thumb2/m4-sched-ldr.mir b/llvm/test/CodeGen/Thumb2/m4-sched-ldr.mir index 8212db6d4e1bb..8bb3ec94785d3 100644 --- a/llvm/test/CodeGen/Thumb2/m4-sched-ldr.mir +++ b/llvm/test/CodeGen/Thumb2/m4-sched-ldr.mir @@ -49,9 +49,9 @@ body: | %1:gpr = COPY $r1 %0:gpr = COPY $r0 - %2:gprnopc = t2LDRi12 %0, 0, 14, $noreg :: (load 4 from %ir.a) + %2:gprnopc = t2LDRi12 %0, 0, 14, $noreg :: (load (s32) from %ir.a) %3:rgpr = nsw t2ADDri %2, 10, 14, $noreg, $noreg - %4:gprnopc = t2LDRi12 %1, 0, 14, $noreg :: (load 4 from %ir.b) + %4:gprnopc = t2LDRi12 %1, 0, 14, $noreg :: (load (s32) from %ir.b) %5:rgpr = nsw t2ADDri %4, 20, 14, $noreg, $noreg %6:rgpr = nsw t2MUL %5, %3, 14, $noreg $r0 = COPY %6 diff --git a/llvm/test/CodeGen/Thumb2/mve-gatherscatter-mmo.ll b/llvm/test/CodeGen/Thumb2/mve-gatherscatter-mmo.ll index 8d82d900186f3..61bb94365991f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gatherscatter-mmo.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gatherscatter-mmo.ll @@ -2,7 +2,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(i8* %base, <8 x i16> %offset) { ; CHECK-LABEL: name: test_vldrbq_gather_offset_s16 -; CHECK: early-clobber %2:mqpr = MVE_VLDRBS16_rq %0, %1, 0, $noreg :: (load 8, align 1) +; CHECK: early-clobber %2:mqpr = MVE_VLDRBS16_rq %0, %1, 0, $noreg :: (load (s64), align 1) entry: %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0) ret <8 x i16> %0 @@ -10,7 +10,7 @@ entry: define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(i8* %base, <4 x i32> %offset, i16 zeroext %p) { ; CHECK-LABEL: name: test_vldrbq_gather_offset_z_s32 -; CHECK: early-clobber %4:mqpr = MVE_VLDRBS32_rq %0, %1, 1, killed %3 :: (load 4, align 1) +; CHECK: early-clobber %4:mqpr = MVE_VLDRBS32_rq %0, %1, 1, killed %3 :: (load (s32), align 1) entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) @@ -20,7 +20,7 @@ entry: define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) { ; CHECK-LABEL: name: test_vldrdq_gather_base_s64 -; CHECK: early-clobber %1:mqpr = MVE_VLDRDU64_qi %0, 616, 0, $noreg :: (load 16, align 1) +; CHECK: early-clobber %1:mqpr = MVE_VLDRDU64_qi %0, 616, 0, $noreg :: (load (s128), align 1) entry: %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616) ret <2 x i64> %0 @@ -28,7 +28,7 @@ entry: define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) { ; CHECK-LABEL: name: test_vldrwq_gather_base_z_f32 -; CHECK: early-clobber %3:mqpr = MVE_VLDRWU32_qi %0, -300, 1, killed %2 :: (load 16, align 1) +; CHECK: early-clobber %3:mqpr = MVE_VLDRWU32_qi %0, -300, 1, killed %2 :: (load (s128), align 1) entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) @@ -38,7 +38,7 @@ entry: define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) { ; CHECK-LABEL: name: test_vldrdq_gather_base_wb_s64 -; CHECK: %2:mqpr, early-clobber %3:mqpr = MVE_VLDRDU64_qi_pre %1, 576, 0, $noreg :: (load 16, align 1) +; CHECK: %2:mqpr, early-clobber %3:mqpr = MVE_VLDRDU64_qi_pre %1, 576, 0, $noreg :: (load (s128), align 1) entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576) @@ -50,7 +50,7 @@ entry: define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* %addr, i16 zeroext %p) { ; CHECK-LABEL: name: test_vldrwq_gather_base_wb_z_f32 -; CHECK: %4:mqpr, early-clobber %5:mqpr = MVE_VLDRWU32_qi_pre %3, -352, 1, killed %2 :: (load 16, align 1) +; CHECK: %4:mqpr, early-clobber %5:mqpr = MVE_VLDRWU32_qi_pre %3, -352, 1, killed %2 :: (load (s128), align 1) entry: %0 = load <4 x i32>, <4 x i32>* %addr, align 8 %1 = zext i16 %p to i32 @@ -65,7 +65,7 @@ entry: define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value) { ; CHECK-LABEL: name: test_vstrbq_scatter_offset_s32 -; CHECK: MVE_VSTRB32_rq %2, %0, %1, 0, $noreg :: (store 4, align 1) +; CHECK: MVE_VSTRB32_rq %2, %0, %1, 0, $noreg :: (store (s32), align 1) entry: call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) ret void @@ -73,7 +73,7 @@ entry: define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { ; CHECK-LABEL: name: test_vstrbq_scatter_offset_p_s8 -; CHECK: MVE_VSTRB8_rq %2, %0, %1, 1, killed %4 :: (store 16, align 1) +; CHECK: MVE_VSTRB8_rq %2, %0, %1, 1, killed %4 :: (store (s128), align 1) entry: %0 = zext i16 %p to i32 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) @@ -83,7 +83,7 @@ entry: define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) { ; CHECK-LABEL: name: test_vstrdq_scatter_base_u64 -; CHECK: MVE_VSTRD64_qi %1, %0, -472, 0, $noreg :: (store 16, align 1) +; CHECK: MVE_VSTRD64_qi %1, %0, -472, 0, $noreg :: (store (s128), align 1) entry: call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value) ret void @@ -91,7 +91,7 @@ entry: define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: name: test_vstrdq_scatter_base_p_s64 -; CHECK: MVE_VSTRD64_qi %1, %0, 888, 1, killed %3 :: (store 16, align 1) +; CHECK: MVE_VSTRD64_qi %1, %0, 888, 1, killed %3 :: (store (s128), align 1) entry: %0 = zext i16 %p to i32 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) @@ -101,7 +101,7 @@ entry: define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(<2 x i64>* %addr, <2 x i64> %value) { ; CHECK-LABEL: name: test_vstrdq_scatter_base_wb_s64 -; CHECK: %3:mqpr = MVE_VSTRD64_qi_pre %1, %2, 208, 0, $noreg :: (store 16, align 1) +; CHECK: %3:mqpr = MVE_VSTRD64_qi_pre %1, %2, 208, 0, $noreg :: (store (s128), align 1) entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value) @@ -111,7 +111,7 @@ entry: define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) { ; CHECK-LABEL: name: test_vstrdq_scatter_base_wb_p_s64 -; CHECK: %5:mqpr = MVE_VSTRD64_qi_pre %1, %3, 248, 1, killed %4 :: (store 16, align 1) +; CHECK: %5:mqpr = MVE_VSTRD64_qi_pre %1, %3, 248, 1, killed %4 :: (store (s128), align 1) entry: %0 = load <2 x i64>, <2 x i64>* %addr, align 8 %1 = zext i16 %p to i32 diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir index 738df788acc4d..ae488c9f3fb66 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir @@ -88,11 +88,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRWU32 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -115,11 +115,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHU16 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRHU16 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRHU16 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -142,11 +142,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU8 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU8_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRBU8 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRBU8 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -169,11 +169,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBS32 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_post1:%[0-9]+]]:mqpr = MVE_VLDRBS32_post [[COPY]], 32, 0, $noreg :: (load 4, align 8) + ; CHECK: [[MVE_VLDRBS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_post1:%[0-9]+]]:mqpr = MVE_VLDRBS32_post [[COPY]], 32, 0, $noreg :: (load (s32), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %1:mqpr = MVE_VLDRBS32 %0, 0, 0, $noreg :: (load 4, align 8) + %1:mqpr = MVE_VLDRBS32 %0, 0, 0, $noreg :: (load (s32), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -196,11 +196,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU32 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_post1:%[0-9]+]]:mqpr = MVE_VLDRBU32_post [[COPY]], 32, 0, $noreg :: (load 4, align 8) + ; CHECK: [[MVE_VLDRBU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_post1:%[0-9]+]]:mqpr = MVE_VLDRBU32_post [[COPY]], 32, 0, $noreg :: (load (s32), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %1:mqpr = MVE_VLDRBU32 %0, 0, 0, $noreg :: (load 4, align 8) + %1:mqpr = MVE_VLDRBU32 %0, 0, 0, $noreg :: (load (s32), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -223,11 +223,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHS32 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_post1:%[0-9]+]]:mqpr = MVE_VLDRHS32_post [[COPY]], 32, 0, $noreg :: (load 8) + ; CHECK: [[MVE_VLDRHS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_post1:%[0-9]+]]:mqpr = MVE_VLDRHS32_post [[COPY]], 32, 0, $noreg :: (load (s64)) ; CHECK: $r0 = COPY [[MVE_VLDRHS32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %1:mqpr = MVE_VLDRHS32 %0, 0, 0, $noreg :: (load 8, align 8) + %1:mqpr = MVE_VLDRHS32 %0, 0, 0, $noreg :: (load (s64), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -250,11 +250,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHU32 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_post1:%[0-9]+]]:mqpr = MVE_VLDRHU32_post [[COPY]], 32, 0, $noreg :: (load 8) + ; CHECK: [[MVE_VLDRHU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_post1:%[0-9]+]]:mqpr = MVE_VLDRHU32_post [[COPY]], 32, 0, $noreg :: (load (s64)) ; CHECK: $r0 = COPY [[MVE_VLDRHU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %1:mqpr = MVE_VLDRHU32 %0, 0, 0, $noreg :: (load 8, align 8) + %1:mqpr = MVE_VLDRHU32 %0, 0, 0, $noreg :: (load (s64), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -277,11 +277,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBS16 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBS16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_post1:%[0-9]+]]:mqpr = MVE_VLDRBS16_post [[COPY]], 32, 0, $noreg :: (load 8) + ; CHECK: [[MVE_VLDRBS16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_post1:%[0-9]+]]:mqpr = MVE_VLDRBS16_post [[COPY]], 32, 0, $noreg :: (load (s64)) ; CHECK: $r0 = COPY [[MVE_VLDRBS16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %1:mqpr = MVE_VLDRBS16 %0, 0, 0, $noreg :: (load 8, align 8) + %1:mqpr = MVE_VLDRBS16 %0, 0, 0, $noreg :: (load (s64), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -304,11 +304,11 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU16 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_post1:%[0-9]+]]:mqpr = MVE_VLDRBU16_post [[COPY]], 32, 0, $noreg :: (load 8) + ; CHECK: [[MVE_VLDRBU16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_post1:%[0-9]+]]:mqpr = MVE_VLDRBU16_post [[COPY]], 32, 0, $noreg :: (load (s64)) ; CHECK: $r0 = COPY [[MVE_VLDRBU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %1:mqpr = MVE_VLDRBU16 %0, 0, 0, $noreg :: (load 8, align 8) + %1:mqpr = MVE_VLDRBU16 %0, 0, 0, $noreg :: (load (s64), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -332,12 +332,12 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRWU32_post:%[0-9]+]]:rgpr = MVE_VSTRWU32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRWU32_post:%[0-9]+]]:rgpr = MVE_VSTRWU32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:gprnopc = COPY $r0 - MVE_VSTRWU32 %1, %0, 0, 0, $noreg :: (store 16, align 8) + MVE_VSTRWU32 %1, %0, 0, 0, $noreg :: (store (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -361,12 +361,12 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRHU16_post:%[0-9]+]]:rgpr = MVE_VSTRHU16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRHU16_post:%[0-9]+]]:rgpr = MVE_VSTRHU16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRHU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:gprnopc = COPY $r0 - MVE_VSTRHU16 %1, %0, 0, 0, $noreg :: (store 16, align 8) + MVE_VSTRHU16 %1, %0, 0, 0, $noreg :: (store (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -390,12 +390,12 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRBU8_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:gprnopc = COPY $r0 - MVE_VSTRBU8 %1, %0, 0, 0, $noreg :: (store 16, align 8) + MVE_VSTRBU8 %1, %0, 0, 0, $noreg :: (store (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -419,12 +419,12 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRH32_post:%[0-9]+]]:tgpr = MVE_VSTRH32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 8) + ; CHECK: [[MVE_VSTRH32_post:%[0-9]+]]:tgpr = MVE_VSTRH32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s64)) ; CHECK: $r0 = COPY [[MVE_VSTRH32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - MVE_VSTRH32 %1, %0, 0, 0, $noreg :: (store 8, align 8) + MVE_VSTRH32 %1, %0, 0, 0, $noreg :: (store (s64), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -448,12 +448,12 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB32_post:%[0-9]+]]:tgpr = MVE_VSTRB32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 4, align 8) + ; CHECK: [[MVE_VSTRB32_post:%[0-9]+]]:tgpr = MVE_VSTRB32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s32), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - MVE_VSTRB32 %1, %0, 0, 0, $noreg :: (store 4, align 8) + MVE_VSTRB32 %1, %0, 0, 0, $noreg :: (store (s32), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -477,12 +477,12 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB16_post:%[0-9]+]]:tgpr = MVE_VSTRB16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 8) + ; CHECK: [[MVE_VSTRB16_post:%[0-9]+]]:tgpr = MVE_VSTRB16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s64)) ; CHECK: $r0 = COPY [[MVE_VSTRB16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - MVE_VSTRB16 %1, %0, 0, 0, $noreg :: (store 8, align 8) + MVE_VSTRB16 %1, %0, 0, 0, $noreg :: (store (s64), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -506,14 +506,14 @@ body: | ; CHECK-LABEL: name: ld0ld4 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -28, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -28, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -536,14 +536,14 @@ body: | ; CHECK-LABEL: name: ld4ld0 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -567,16 +567,16 @@ body: | ; CHECK-LABEL: name: ld0ld4ld0 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) - %4:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) + %4:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -600,16 +600,16 @@ body: | ; CHECK-LABEL: name: ld4ld0ld4 ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -28, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -28, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) - %4:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) + %4:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -632,14 +632,14 @@ body: | ; CHECK-LABEL: name: addload ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -28, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -28, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -662,14 +662,14 @@ body: | ; CHECK-LABEL: name: sub ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], -32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], 36, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], -32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], 36, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2SUBri %0, 32, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -693,15 +693,15 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[COPY]] ; CHECK: $r0 = COPY [[t2ADDri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %0 $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -727,14 +727,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 4, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 4, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[t2ADDri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) - %4:rgpr, %3:mqpr = MVE_VLDRWU32_post %0, 4, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) + %4:rgpr, %3:mqpr = MVE_VLDRWU32_post %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -757,15 +757,15 @@ body: | ; CHECK-LABEL: name: badScale ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load (s128), align 8) ; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = nuw t2SUBri [[COPY]], 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[t2SUBri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2SUBri %0, 3, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -788,15 +788,15 @@ body: | ; CHECK-LABEL: name: badRange ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load (s128), align 8) ; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = nuw t2SUBri [[COPY]], -300, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], -300, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], -300, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[t2SUBri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2SUBri %0, -300, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, -300, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, -300, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -820,15 +820,15 @@ body: | ; CHECK-LABEL: name: addUseOK ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], -32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], 36, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], -32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], 36, 0, $noreg :: (load (s128), align 8) ; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = nuw t2LSRri [[MVE_VLDRWU32_post]], 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2LSRri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2SUBri %0, 32, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) %4:rgpr = nuw t2LSRri %2, 2, 14, $noreg, $noreg $r0 = COPY %4 tBX_RET 14, $noreg, implicit $r0 @@ -855,15 +855,15 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = nuw t2SUBri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = nuw t2LSRri [[t2SUBri]], 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 0, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_1:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 4, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[t2LSRri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %2:rgpr = nuw t2SUBri %0, 32, 14, $noreg, $noreg %4:rgpr = nuw t2LSRri %2, 2, 14, $noreg, $noreg - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %4 tBX_RET 14, $noreg, implicit $r0 @@ -887,16 +887,16 @@ body: | ; CHECK-LABEL: name: addUseKilled ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], -32, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], -32, 0, $noreg :: (load (s128), align 8) ; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = nuw t2LSRri [[MVE_VLDRWU32_post]], 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], 36, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], 36, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[t2LSRri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load 16, align 8) + %1:mqpr = MVE_VLDRWU32 %0, 0, 0, $noreg :: (load (s128), align 8) %2:rgpr = nuw t2SUBri %0, 32, 14, $noreg, $noreg %4:rgpr = nuw t2LSRri killed %2, 2, 14, $noreg, $noreg - %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load 16, align 8) + %3:mqpr = MVE_VLDRWU32 %0, 4, 0, $noreg :: (load (s128), align 8) $r0 = COPY %4 tBX_RET 14, $noreg, implicit $r0 @@ -918,13 +918,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRWU32_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 - %2:rgpr, %1:mqpr = MVE_VLDRWU32_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRWU32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:rgpr, %1:mqpr = MVE_VLDRWU32_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRWU32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -946,13 +946,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHU16_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[MVE_VLDRHU16_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[MVE_VLDRHU16_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 - %2:rgpr, %1:mqpr = MVE_VLDRHU16_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRHU16 %0, 16, 0, $noreg :: (load 16, align 8) + %2:rgpr, %1:mqpr = MVE_VLDRHU16_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRHU16 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -974,13 +974,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU8_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[MVE_VLDRBU8_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[MVE_VLDRBU8_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU8_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 - %2:rgpr, %1:mqpr = MVE_VLDRBU8_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBU8 %0, 16, 0, $noreg :: (load 16, align 8) + %2:rgpr, %1:mqpr = MVE_VLDRBU8_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBU8 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1002,13 +1002,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBS32_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_post1:%[0-9]+]]:mqpr = MVE_VLDRBS32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[MVE_VLDRBS32_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_post1:%[0-9]+]]:mqpr = MVE_VLDRBS32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[MVE_VLDRBS32_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRBS32_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBS32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRBS32_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBS32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1030,13 +1030,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU32_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_post1:%[0-9]+]]:mqpr = MVE_VLDRBU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[MVE_VLDRBU32_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_post1:%[0-9]+]]:mqpr = MVE_VLDRBU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[MVE_VLDRBU32_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRBU32_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBU32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRBU32_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBU32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1058,13 +1058,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHS32_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_post1:%[0-9]+]]:mqpr = MVE_VLDRHS32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[MVE_VLDRHS32_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_post1:%[0-9]+]]:mqpr = MVE_VLDRHS32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[MVE_VLDRHS32_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHS32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRHS32_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRHS32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRHS32_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRHS32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1086,13 +1086,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHU32_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_post1:%[0-9]+]]:mqpr = MVE_VLDRHU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[MVE_VLDRHU32_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_post1:%[0-9]+]]:mqpr = MVE_VLDRHU32_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[MVE_VLDRHU32_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRHU32_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRHU32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRHU32_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRHU32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1114,13 +1114,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBS16_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBS16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_post1:%[0-9]+]]:mqpr = MVE_VLDRBS16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[MVE_VLDRBS16_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBS16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_post1:%[0-9]+]]:mqpr = MVE_VLDRBS16_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[MVE_VLDRBS16_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRBS16_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBS16 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRBS16_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBS16 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1142,13 +1142,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU16_post ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_post1:%[0-9]+]]:mqpr = MVE_VLDRBU16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[MVE_VLDRBU16_post]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_post1:%[0-9]+]]:mqpr = MVE_VLDRBU16_post [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[MVE_VLDRBU16_post]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRBU16_post %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBU16 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRBU16_post %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBU16 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1171,14 +1171,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRWU32_post:%[0-9]+]]:rgpr = MVE_VSTRWU32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRWU32 [[COPY]], [[MVE_VSTRWU32_post]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRWU32_post:%[0-9]+]]:rgpr = MVE_VSTRWU32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRWU32 [[COPY]], [[MVE_VSTRWU32_post]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:rgpr = COPY $r0 - %2:rgpr = MVE_VSTRWU32_post %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRWU32 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:rgpr = MVE_VSTRWU32_post %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRWU32 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1201,14 +1201,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRHU16_post:%[0-9]+]]:rgpr = MVE_VSTRHU16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRHU16 [[COPY]], [[MVE_VSTRHU16_post]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRHU16_post:%[0-9]+]]:rgpr = MVE_VSTRHU16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRHU16 [[COPY]], [[MVE_VSTRHU16_post]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRHU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:rgpr = COPY $r0 - %2:rgpr = MVE_VSTRHU16_post %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRHU16 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:rgpr = MVE_VSTRHU16_post %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRHU16 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1231,14 +1231,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRBU8 [[COPY]], [[MVE_VSTRBU8_post]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRBU8 [[COPY]], [[MVE_VSTRBU8_post]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRBU8_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:rgpr = COPY $r0 - %2:rgpr = MVE_VSTRBU8_post %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRBU8 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:rgpr = MVE_VSTRBU8_post %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRBU8 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1261,14 +1261,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRH32_post:%[0-9]+]]:tgpr = MVE_VSTRH32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRH32 [[COPY]], [[MVE_VSTRH32_post]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRH32_post:%[0-9]+]]:tgpr = MVE_VSTRH32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRH32 [[COPY]], [[MVE_VSTRH32_post]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRH32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRH32_post %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRH32 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRH32_post %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRH32 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1291,14 +1291,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB32_post:%[0-9]+]]:tgpr = MVE_VSTRB32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB32 [[COPY]], [[MVE_VSTRB32_post]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRB32_post:%[0-9]+]]:tgpr = MVE_VSTRB32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB32 [[COPY]], [[MVE_VSTRB32_post]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRB32_post %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRB32 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRB32_post %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB32 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1321,14 +1321,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB16_post:%[0-9]+]]:tgpr = MVE_VSTRB16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_post]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRB16_post:%[0-9]+]]:tgpr = MVE_VSTRB16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_post]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRB16_post %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRB16 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRB16_post %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB16 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1350,13 +1350,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRWU32_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRWU32_pre:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRWU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_pre:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRWU32_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 - %2:rgpr, %1:mqpr = MVE_VLDRWU32_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRWU32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:rgpr, %1:mqpr = MVE_VLDRWU32_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRWU32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1378,13 +1378,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHU16_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHU16_pre:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_pre1:%[0-9]+]]:mqpr = MVE_VLDRHU16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[MVE_VLDRHU16_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU16_pre:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_pre1:%[0-9]+]]:mqpr = MVE_VLDRHU16_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[MVE_VLDRHU16_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 - %2:rgpr, %1:mqpr = MVE_VLDRHU16_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRHU16 %0, 16, 0, $noreg :: (load 16, align 8) + %2:rgpr, %1:mqpr = MVE_VLDRHU16_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRHU16 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1406,13 +1406,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU8_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU8_pre:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU8_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[MVE_VLDRBU8_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU8_pre:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU8_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[MVE_VLDRBU8_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU8_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 - %2:rgpr, %1:mqpr = MVE_VLDRBU8_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBU8 %0, 16, 0, $noreg :: (load 16, align 8) + %2:rgpr, %1:mqpr = MVE_VLDRBU8_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBU8 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1434,13 +1434,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBS32_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBS32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_pre1:%[0-9]+]]:mqpr = MVE_VLDRBS32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[MVE_VLDRBS32_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBS32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_pre1:%[0-9]+]]:mqpr = MVE_VLDRBS32_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[MVE_VLDRBS32_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRBS32_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBS32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRBS32_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBS32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1462,13 +1462,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU32_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[MVE_VLDRBU32_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU32_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[MVE_VLDRBU32_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRBU32_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBU32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRBU32_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBU32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1490,13 +1490,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHS32_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHS32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_pre1:%[0-9]+]]:mqpr = MVE_VLDRHS32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[MVE_VLDRHS32_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHS32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_pre1:%[0-9]+]]:mqpr = MVE_VLDRHS32_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[MVE_VLDRHS32_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHS32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRHS32_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRHS32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRHS32_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRHS32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1518,13 +1518,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRHU32_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRHU32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRHU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[MVE_VLDRHU32_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRHU32_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[MVE_VLDRHU32_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRHU32_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRHU32 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRHU32_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRHU32 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1546,13 +1546,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBS16_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBS16_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_pre1:%[0-9]+]]:mqpr = MVE_VLDRBS16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[MVE_VLDRBS16_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBS16_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_pre1:%[0-9]+]]:mqpr = MVE_VLDRBS16_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[MVE_VLDRBS16_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRBS16_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBS16 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRBS16_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBS16 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1574,13 +1574,13 @@ body: | ; CHECK-LABEL: name: MVE_VLDRBU16_pre ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VLDRBU16_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[MVE_VLDRBU16_pre]], -16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU16_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU16_pre [[COPY]], 32, 0, $noreg :: (load (s128), align 8) + ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[MVE_VLDRBU16_pre]], -16, 0, $noreg :: (load (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 - %2:tgpr, %1:mqpr = MVE_VLDRBU16_pre %0, 32, 0, $noreg :: (load 16, align 8) - %1:mqpr = MVE_VLDRBU16 %0, 16, 0, $noreg :: (load 16, align 8) + %2:tgpr, %1:mqpr = MVE_VLDRBU16_pre %0, 32, 0, $noreg :: (load (s128), align 8) + %1:mqpr = MVE_VLDRBU16 %0, 16, 0, $noreg :: (load (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1603,14 +1603,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRWU32_pre:%[0-9]+]]:rgpr = MVE_VSTRWU32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRWU32 [[COPY]], [[MVE_VSTRWU32_pre]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRWU32_pre:%[0-9]+]]:rgpr = MVE_VSTRWU32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRWU32 [[COPY]], [[MVE_VSTRWU32_pre]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRWU32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:rgpr = COPY $r0 - %2:rgpr = MVE_VSTRWU32_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRWU32 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:rgpr = MVE_VSTRWU32_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRWU32 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1633,14 +1633,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRHU16_pre:%[0-9]+]]:rgpr = MVE_VSTRHU16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRHU16 [[COPY]], [[MVE_VSTRHU16_pre]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRHU16_pre:%[0-9]+]]:rgpr = MVE_VSTRHU16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRHU16 [[COPY]], [[MVE_VSTRHU16_pre]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRHU16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:rgpr = COPY $r0 - %2:rgpr = MVE_VSTRHU16_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRHU16 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:rgpr = MVE_VSTRHU16_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRHU16 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1663,14 +1663,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 - ; CHECK: [[MVE_VSTRBU8_pre:%[0-9]+]]:rgpr = MVE_VSTRBU8_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRBU8 [[COPY]], [[MVE_VSTRBU8_pre]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRBU8_pre:%[0-9]+]]:rgpr = MVE_VSTRBU8_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRBU8 [[COPY]], [[MVE_VSTRBU8_pre]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRBU8_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:rgpr = COPY $r0 - %2:rgpr = MVE_VSTRBU8_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRBU8 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:rgpr = MVE_VSTRBU8_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRBU8 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1693,14 +1693,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRH32_pre:%[0-9]+]]:tgpr = MVE_VSTRH32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRH32 [[COPY]], [[MVE_VSTRH32_pre]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRH32_pre:%[0-9]+]]:tgpr = MVE_VSTRH32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRH32 [[COPY]], [[MVE_VSTRH32_pre]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRH32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRH32_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRH32 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRH32_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRH32 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1723,14 +1723,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB32_pre:%[0-9]+]]:tgpr = MVE_VSTRB32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB32 [[COPY]], [[MVE_VSTRB32_pre]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRB32_pre:%[0-9]+]]:tgpr = MVE_VSTRB32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB32 [[COPY]], [[MVE_VSTRB32_pre]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRB32_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRB32 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRB32_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB32 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1753,14 +1753,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -16, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRB16_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRB16 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRB16_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB16 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1783,18 +1783,18 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -16, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -48, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], 2, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -16, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -48, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], 2, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRB16_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRB16 %1, %0, 16, 0, $noreg :: (store 16, align 8) - MVE_VSTRB16 %1, %0, -16, 0, $noreg :: (store 16, align 8) - MVE_VSTRB16 %1, %0, 34, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRB16_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB16 %1, %0, 16, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB16 %1, %0, -16, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB16 %1, %0, 34, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1818,16 +1818,16 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: [[MVE_VSTRB16_pre1:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 64, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre1]], -48, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: [[MVE_VSTRB16_pre1:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 64, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre1]], -48, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRB16_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - %3:tgpr = MVE_VSTRB16_pre %1, %0, 64, 0, $noreg :: (store 16, align 8) - MVE_VSTRB16 %1, %0, 16, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRB16_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + %3:tgpr = MVE_VSTRB16_pre %1, %0, 64, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB16 %1, %0, 16, 0, $noreg :: (store (s128), align 8) $r0 = COPY %3 tBX_RET 14, $noreg, implicit $r0 @@ -1851,15 +1851,15 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 0, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 0, 0, $noreg :: (store (s128), align 8) ; CHECK: [[t2ADDri:%[0-9]+]]:tgpr = nuw t2ADDri [[COPY1]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2ADDri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRB16_pre %1, %0, 32, 0, $noreg :: (store 16, align 8) - MVE_VSTRB16 %1, %0, 0, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRB16_pre %1, %0, 32, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB16 %1, %0, 0, 0, $noreg :: (store (s128), align 8) %3:tgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %3 tBX_RET 14, $noreg, implicit $r0 @@ -1883,14 +1883,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRBU8_pre:%[0-9]+]]:tgpr = MVE_VSTRBU8_pre [[COPY]], [[COPY1]], 33, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRWU32 [[COPY]], [[COPY1]], 0, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRBU8_pre:%[0-9]+]]:tgpr = MVE_VSTRBU8_pre [[COPY]], [[COPY1]], 33, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRWU32 [[COPY]], [[COPY1]], 0, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRBU8_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRBU8_pre %1, %0, 33, 0, $noreg :: (store 16, align 8) - MVE_VSTRWU32 %1, %0, 0, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRBU8_pre %1, %0, 33, 0, $noreg :: (store (s128), align 8) + MVE_VSTRWU32 %1, %0, 0, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -1913,14 +1913,14 @@ body: | ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 - ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 100, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], -100, 0, $noreg :: (store 16, align 8) + ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 100, 0, $noreg :: (store (s128), align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], -100, 0, $noreg :: (store (s128), align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 %0:tgpr = COPY $r0 - %2:tgpr = MVE_VSTRB16_pre %1, %0, 100, 0, $noreg :: (store 16, align 8) - MVE_VSTRB16 %1, %0, -100, 0, $noreg :: (store 16, align 8) + %2:tgpr = MVE_VSTRB16_pre %1, %0, 100, 0, $noreg :: (store (s128), align 8) + MVE_VSTRB16 %1, %0, -100, 0, $noreg :: (store (s128), align 8) $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 diff --git a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir index a2c3bdc894eb1..4fd4c01b1b2ca 100644 --- a/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir +++ b/llvm/test/CodeGen/Thumb2/mve-stacksplot.mir @@ -39,10 +39,10 @@ body: | ; CHECK: $r11 = IMPLICIT_DEF ; CHECK: $r12 = IMPLICIT_DEF ; CHECK: $lr = IMPLICIT_DEF - ; CHECK: t2STRi12 killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) + ; CHECK: t2STRi12 killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) ; CHECK: $r0 = tMOVr killed $sp, 14 /* CC::al */, $noreg - ; CHECK: renamable $q2 = MVE_VLDRBU32 killed $r0, 16, 0, $noreg :: (load 4 from %stack.0 + 12) - ; CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) + ; CHECK: renamable $q2 = MVE_VLDRBU32 killed $r0, 16, 0, $noreg :: (load (s32) from %stack.0 + 12) + ; CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) ; CHECK: KILL $r0 ; CHECK: KILL $r1 ; CHECK: KILL $r2 @@ -72,7 +72,7 @@ body: | $r12 = IMPLICIT_DEF $lr = IMPLICIT_DEF - renamable $q2 = MVE_VLDRBU32 %stack.0, 12, 0, $noreg :: (load 4 from %stack.0 + 12) + renamable $q2 = MVE_VLDRBU32 %stack.0, 12, 0, $noreg :: (load (s32) from %stack.0 + 12) KILL $r0 KILL $r1 @@ -132,10 +132,10 @@ body: | ; CHECK: $r11 = IMPLICIT_DEF ; CHECK: $r12 = IMPLICIT_DEF ; CHECK: $lr = IMPLICIT_DEF - ; CHECK: t2STRi12 killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.2) + ; CHECK: t2STRi12 killed $r0, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) ; CHECK: $r0 = t2ADDri killed $sp, 1152, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $q2 = MVE_VLDRBU8 killed $r0, 52, 0, $noreg :: (load 4 from %stack.0) - ; CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.2) + ; CHECK: renamable $q2 = MVE_VLDRBU8 killed $r0, 52, 0, $noreg :: (load (s32) from %stack.0) + ; CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) ; CHECK: KILL $r0 ; CHECK: KILL $r1 ; CHECK: KILL $r2 @@ -165,7 +165,7 @@ body: | $r12 = IMPLICIT_DEF $lr = IMPLICIT_DEF - renamable $q2 = MVE_VLDRBU8 %stack.0, 0, 0, $noreg :: (load 4 from %stack.0) + renamable $q2 = MVE_VLDRBU8 %stack.0, 0, 0, $noreg :: (load (s32) from %stack.0) KILL $r0 KILL $r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vpt-block-fold-vcmp.mir b/llvm/test/CodeGen/Thumb2/mve-vpt-block-fold-vcmp.mir index bed0ac8ef6424..57be46bb2755c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vpt-block-fold-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/mve-vpt-block-fold-vcmp.mir @@ -104,20 +104,20 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7 - ; CHECK: renamable $r12 = t2LDRi12 $r7, 16, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.2) - ; CHECK: renamable $lr = t2LDRi12 $r7, 12, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.1) - ; CHECK: renamable $r3 = t2LDRi12 $r7, 8, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0) + ; CHECK: renamable $r12 = t2LDRi12 $r7, 16, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.2) + ; CHECK: renamable $lr = t2LDRi12 $r7, 12, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.1) + ; CHECK: renamable $r3 = t2LDRi12 $r7, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0) ; CHECK: BUNDLE implicit-def $vpr, implicit-def dead $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit $q0, implicit $zr, implicit killed $r0, implicit killed $r3, implicit killed $r1, implicit killed $lr { ; CHECK: MVE_VPTv4f32r 1, renamable $q0, $zr, 10, implicit-def $vpr - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r0, 0, 1, internal renamable $vpr :: (load 16 from %ir.src, align 4) - ; CHECK: MVE_VSTRWU32 internal killed renamable $q0, killed renamable $r3, 0, 1, internal renamable $vpr :: (store 16 into %ir.dest, align 4) - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 1, internal renamable $vpr :: (load 16 from %ir.src2, align 4) - ; CHECK: MVE_VSTRWU32 internal killed renamable $q0, killed renamable $lr, 0, 1, internal renamable $vpr :: (store 16 into %ir.dest2, align 4) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r0, 0, 1, internal renamable $vpr :: (load (s128) from %ir.src, align 4) + ; CHECK: MVE_VSTRWU32 internal killed renamable $q0, killed renamable $r3, 0, 1, internal renamable $vpr :: (store (s128) into %ir.dest, align 4) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 1, internal renamable $vpr :: (load (s128) from %ir.src2, align 4) + ; CHECK: MVE_VSTRWU32 internal killed renamable $q0, killed renamable $lr, 0, 1, internal renamable $vpr :: (store (s128) into %ir.dest2, align 4) ; CHECK: } ; CHECK: BUNDLE implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit killed $vpr, implicit killed $r2, implicit killed $r12 { ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 1, renamable $vpr :: (load 16 from %ir.src3, align 4) - ; CHECK: MVE_VSTRWU32 internal renamable $q0, killed renamable $r12, 0, 1, killed renamable $vpr :: (store 16 into %ir.dest3, align 4) + ; CHECK: renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 1, renamable $vpr :: (load (s128) from %ir.src3, align 4) + ; CHECK: MVE_VSTRWU32 internal renamable $q0, killed renamable $r12, 0, 1, killed renamable $vpr :: (store (s128) into %ir.dest3, align 4) ; CHECK: } ; CHECK: $sp = t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc, implicit $q0 $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr @@ -126,16 +126,16 @@ body: | frame-setup CFI_INSTRUCTION offset $r7, -8 $r7 = frame-setup tMOVr killed $sp, 14, $noreg frame-setup CFI_INSTRUCTION def_cfa_register $r7 - renamable $r12 = t2LDRi12 $r7, 16, 14, $noreg :: (load 4 from %fixed-stack.1) - renamable $lr = t2LDRi12 $r7, 12, 14, $noreg :: (load 4 from %fixed-stack.2) - renamable $r3 = t2LDRi12 $r7, 8, 14, $noreg :: (load 4 from %fixed-stack.3) + renamable $r12 = t2LDRi12 $r7, 16, 14, $noreg :: (load (s32) from %fixed-stack.1) + renamable $lr = t2LDRi12 $r7, 12, 14, $noreg :: (load (s32) from %fixed-stack.2) + renamable $r3 = t2LDRi12 $r7, 8, 14, $noreg :: (load (s32) from %fixed-stack.3) renamable $vpr = MVE_VCMPf32r renamable $q0, $zr, 10, 0, $noreg - renamable $q0 = MVE_VLDRWU32 killed renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.src, align 4) - MVE_VSTRWU32 killed renamable $q0, killed renamable $r3, 0, 1, renamable $vpr :: (store 16 into %ir.dest, align 4) - renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.src2, align 4) - MVE_VSTRWU32 killed renamable $q0, killed renamable $lr, 0, 1, renamable $vpr :: (store 16 into %ir.dest2, align 4) - renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 1, renamable $vpr :: (load 16 from %ir.src3, align 4) - MVE_VSTRWU32 renamable $q0, killed renamable $r12, 0, 1, killed renamable $vpr :: (store 16 into %ir.dest3, align 4) + renamable $q0 = MVE_VLDRWU32 killed renamable $r0, 0, 1, renamable $vpr :: (load (s128) from %ir.src, align 4) + MVE_VSTRWU32 killed renamable $q0, killed renamable $r3, 0, 1, renamable $vpr :: (store (s128) into %ir.dest, align 4) + renamable $q0 = MVE_VLDRWU32 killed renamable $r1, 0, 1, renamable $vpr :: (load (s128) from %ir.src2, align 4) + MVE_VSTRWU32 killed renamable $q0, killed renamable $lr, 0, 1, renamable $vpr :: (store (s128) into %ir.dest2, align 4) + renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 1, renamable $vpr :: (load (s128) from %ir.src3, align 4) + MVE_VSTRWU32 renamable $q0, killed renamable $r12, 0, 1, killed renamable $vpr :: (store (s128) into %ir.dest3, align 4) $sp = t2LDMIA_RET $sp, 14, $noreg, def $r7, def $pc, implicit $q0 ... diff --git a/llvm/test/CodeGen/Thumb2/postinc-distribute.mir b/llvm/test/CodeGen/Thumb2/postinc-distribute.mir index d08ac4754c703..e5a0b7bf3a025 100644 --- a/llvm/test/CodeGen/Thumb2/postinc-distribute.mir +++ b/llvm/test/CodeGen/Thumb2/postinc-distribute.mir @@ -32,12 +32,12 @@ body: | ; CHECK-LABEL: name: t2LDRi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRi12_:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRi12_:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2ADDri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:rgpr = t2LDRi12 %0, 0, 14, $noreg :: (load 4, align 4) + %1:rgpr = t2LDRi12 %0, 0, 14, $noreg :: (load (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -59,11 +59,11 @@ body: | ; CHECK-LABEL: name: t2LDRHi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRH_POST:%[0-9]+]]:rgpr, [[t2LDRH_POST1:%[0-9]+]]:rgpr = t2LDRH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRH_POST:%[0-9]+]]:rgpr, [[t2LDRH_POST1:%[0-9]+]]:rgpr = t2LDRH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[t2LDRH_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:rgpr = t2LDRHi12 %0, 0, 14, $noreg :: (load 4, align 4) + %1:rgpr = t2LDRHi12 %0, 0, 14, $noreg :: (load (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -85,11 +85,11 @@ body: | ; CHECK-LABEL: name: t2LDRSHi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRSH_POST:%[0-9]+]]:rgpr, [[t2LDRSH_POST1:%[0-9]+]]:rgpr = t2LDRSH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRSH_POST:%[0-9]+]]:rgpr, [[t2LDRSH_POST1:%[0-9]+]]:rgpr = t2LDRSH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[t2LDRSH_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:rgpr = t2LDRSHi12 %0, 0, 14, $noreg :: (load 4, align 4) + %1:rgpr = t2LDRSHi12 %0, 0, 14, $noreg :: (load (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -111,11 +111,11 @@ body: | ; CHECK-LABEL: name: t2LDRBi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[t2LDRB_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4) + %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -137,11 +137,11 @@ body: | ; CHECK-LABEL: name: t2LDRSBi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRSB_POST:%[0-9]+]]:rgpr, [[t2LDRSB_POST1:%[0-9]+]]:rgpr = t2LDRSB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRSB_POST:%[0-9]+]]:rgpr, [[t2LDRSB_POST1:%[0-9]+]]:rgpr = t2LDRSB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[t2LDRSB_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:rgpr = t2LDRSBi12 %0, 0, 14, $noreg :: (load 4, align 4) + %1:rgpr = t2LDRSBi12 %0, 0, 14, $noreg :: (load (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -165,13 +165,13 @@ body: | ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 - ; CHECK: t2STRi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: t2STRi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2ADDri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = COPY $r1 - t2STRi12 %1:rgpr, %0, 0, 14, $noreg :: (store 4, align 4) + t2STRi12 %1:rgpr, %0, 0, 14, $noreg :: (store (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -195,12 +195,12 @@ body: | ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 - ; CHECK: early-clobber %2:rgpr = t2STRH_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber %2:rgpr = t2STRH_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: $r0 = COPY %2 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = COPY $r1 - t2STRHi12 %1:rgpr, %0, 0, 14, $noreg :: (store 4, align 4) + t2STRHi12 %1:rgpr, %0, 0, 14, $noreg :: (store (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -224,12 +224,12 @@ body: | ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 - ; CHECK: early-clobber %2:rgpr = t2STRB_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber %2:rgpr = t2STRB_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: $r0 = COPY %2 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = COPY $r1 - t2STRBi12 %1:rgpr, %0, 0, 14, $noreg :: (store 4, align 4) + t2STRBi12 %1:rgpr, %0, 0, 14, $noreg :: (store (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -251,12 +251,12 @@ body: | ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 [[t2ADDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: t2STRi12 [[t2ADDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: $r0 = COPY [[t2ADDri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg - t2STRi12 %1, %0, 0, 14, $noreg :: (store 4, align 4) + t2STRi12 %1, %0, 0, 14, $noreg :: (store (s32), align 4) $r0 = COPY %1 tBX_RET 14, $noreg, implicit $r0 @@ -278,13 +278,13 @@ body: | ; CHECK-LABEL: name: minsize2 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2LDRBi8_:%[0-9]+]]:rgpr = t2LDRBi8 [[t2LDRB_POST1]], -30, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load (s32)) + ; CHECK: [[t2LDRBi8_:%[0-9]+]]:rgpr = t2LDRBi8 [[t2LDRB_POST1]], -30, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[t2LDRB_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4) - %3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4) + %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load (s32), align 4) + %3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 @@ -308,16 +308,16 @@ body: | ; CHECK-LABEL: name: minsize3 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2LDRBi12_1:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 2, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2LDRBi12_2:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32)) + ; CHECK: [[t2LDRBi12_1:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 2, 14 /* CC::al */, $noreg :: (load (s32)) + ; CHECK: [[t2LDRBi12_2:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2ADDri]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 - %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4) - %3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4) - %4:rgpr = t2LDRBi12 %0, 4, 14, $noreg :: (load 4, align 4) + %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load (s32), align 4) + %3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load (s32), align 4) + %4:rgpr = t2LDRBi12 %0, 4, 14, $noreg :: (load (s32), align 4) %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg $r0 = COPY %2 tBX_RET 14, $noreg, implicit $r0 diff --git a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir index a2296c12eb607..68b2cf85be386 100644 --- a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir +++ b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir @@ -127,95 +127,95 @@ body: | bb.0.entry: liveins: $r0 $r2 = t2SUBri $r0, 128, 14 /* CC::al */, $noreg, $noreg - $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238, align 32) - VSTMQIA $q8, %stack.0, 14 /* CC::al */, $noreg :: (store 16 into %stack.0) + $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.www..sroa_cast238, align 32) + VSTMQIA $q8, %stack.0, 14 /* CC::al */, $noreg :: (store (s128) into %stack.0) $r12 = t2SUBri $r0, 256, 14 /* CC::al */, $noreg, $noreg - $q12 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248, align 32) + $q12 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.ppp..sroa_cast248, align 32) $q1 = VMULfq $q12, killed $q8, 14 /* CC::al */, $noreg $r3 = nuw t2ADDri $r0, 32, 14 /* CC::al */, $noreg, $noreg - $q10 = VLD1q64 killed $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235, align 32) + $q10 = VLD1q64 killed $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.uuu..sroa_cast235, align 32) $r5 = t2SUBri $r0, 160, 14 /* CC::al */, $noreg, $noreg - $q15 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248, align 32) + $q15 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.xxx..sroa_cast248, align 32) $q14 = VMULfq $q15, $q10, 14 /* CC::al */, $noreg $r6 = t2SUBri $r0, 192, 14 /* CC::al */, $noreg, $noreg - $q13 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230, align 32) + $q13 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.vvv..sroa_cast230, align 32) $q8 = VMULfq $q14, $q13, 14 /* CC::al */, $noreg $r4 = t2SUBri $r0, 96, 14 /* CC::al */, $noreg, $noreg - $q6 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226, align 32) + $q6 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.ttt..sroa_cast226, align 32) $q8 = VMULfq killed $q8, $q6, 14 /* CC::al */, $noreg $r3 = t2SUBri $r0, 224, 14 /* CC::al */, $noreg, $noreg - $q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241, align 32) + $q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.zzz..sroa_cast241, align 32) $q1 = VMLAfq killed $q1, $q5, killed $q8, 14 /* CC::al */, $noreg - $s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) + $s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0 $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0 $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0 $r7 = t2SUBri $r0, 64, 14 /* CC::al */, $noreg, $noreg - $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244, align 32) - VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store 16 into %stack.1) + $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.yyy..sroa_cast244, align 32) + VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store (s128) into %stack.1) $q8 = VMULfq killed $q8, $q13, 14 /* CC::al */, $noreg $r1 = t2ADDri $r0, 48, 14 /* CC::al */, $noreg, $noreg - $q9, $r0 = VLD1q32wb_fixed killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223, align 32) + $q9, $r0 = VLD1q32wb_fixed killed $r0, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.sss..sroa_cast223, align 32) $q11 = COPY $q9 $q11 = VMLSfq killed $q11, killed $q8, $q0, 14 /* CC::al */, $noreg - $r2 = VST1q32wb_fixed killed $r2, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238, align 32) - $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.www..sroa_cast238 + 16, basealign 32) - VSTMQIA $q8, %stack.3, 14 /* CC::al */, $noreg :: (store 16 into %stack.3) + $r2 = VST1q32wb_fixed killed $r2, 16, killed $q11, 14 /* CC::al */, $noreg :: (store (s128) into %ir.www..sroa_cast238, align 32) + $q8 = VLD1q64 $r2, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.www..sroa_cast238 + 16, basealign 32) + VSTMQIA $q8, %stack.3, 14 /* CC::al */, $noreg :: (store (s128) into %stack.3) $q11 = VMULfq $q10, $q0, 14 /* CC::al */, $noreg - $r12 = VST1q32wb_fixed killed $r12, 16, killed $q11, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248, align 32) - $q11 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ppp..sroa_cast248 + 16, basealign 32) - VSTMQIA $q11, %stack.2, 14 /* CC::al */, $noreg :: (store 16 into %stack.2) + $r12 = VST1q32wb_fixed killed $r12, 16, killed $q11, 14 /* CC::al */, $noreg :: (store (s128) into %ir.ppp..sroa_cast248, align 32) + $q11 = VLD1q64 $r12, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.ppp..sroa_cast248 + 16, basealign 32) + VSTMQIA $q11, %stack.2, 14 /* CC::al */, $noreg :: (store (s128) into %stack.2) $q1 = VMULfq killed $q11, killed $q8, 14 /* CC::al */, $noreg - $r5 = VST1q32wb_fixed killed $r5, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248, align 32) - $q4 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.xxx..sroa_cast248 + 16, basealign 32) - $q11 = VLD1q64 killed $r1, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.uuu..sroa_cast235 + 16, basealign 32) + $r5 = VST1q32wb_fixed killed $r5, 16, $q0, 14 /* CC::al */, $noreg :: (store (s128) into %ir.xxx..sroa_cast248, align 32) + $q4 = VLD1q64 $r5, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.xxx..sroa_cast248 + 16, basealign 32) + $q11 = VLD1q64 killed $r1, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.uuu..sroa_cast235 + 16, basealign 32) $q7 = VMULfq $q4, $q11, 14 /* CC::al */, $noreg - $r6 = VST1q32wb_fixed killed $r6, 16, $q0, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230, align 32) - $q3 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.vvv..sroa_cast230 + 16, basealign 32) + $r6 = VST1q32wb_fixed killed $r6, 16, $q0, 14 /* CC::al */, $noreg :: (store (s128) into %ir.vvv..sroa_cast230, align 32) + $q3 = VLD1q64 $r6, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.vvv..sroa_cast230 + 16, basealign 32) $q8 = VMULfq $q7, $q3, 14 /* CC::al */, $noreg $q12 = VMULfq killed $q12, killed $q6, 14 /* CC::al */, $noreg $q15 = VMLSfq killed $q15, killed $q12, $q0, 14 /* CC::al */, $noreg - $r4 = VST1q32wb_fixed killed $r4, 16, killed $q15, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226, align 32) - $q12 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.ttt..sroa_cast226 + 16, basealign 32) + $r4 = VST1q32wb_fixed killed $r4, 16, killed $q15, 14 /* CC::al */, $noreg :: (store (s128) into %ir.ttt..sroa_cast226, align 32) + $q12 = VLD1q64 $r4, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.ttt..sroa_cast226 + 16, basealign 32) $q8 = VMULfq killed $q8, $q12, 14 /* CC::al */, $noreg $q9 = VMULfq killed $q5, killed $q9, 14 /* CC::al */, $noreg $q10 = VMULfq killed $q10, killed $q13, 14 /* CC::al */, $noreg $q10 = VMLSfq killed $q10, killed $q9, $q0, 14 /* CC::al */, $noreg - $r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241, align 32) - $q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.zzz..sroa_cast241 + 16, basealign 32) + $r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store (s128) into %ir.zzz..sroa_cast241, align 32) + $q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.zzz..sroa_cast241 + 16, basealign 32) $q1 = VMLAfq killed $q1, $q10, killed $q8, 14 /* CC::al */, $noreg $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5 $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5 $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5 $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5 - VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.xxx..sroa_cast248 + 16, basealign 32) - VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store 16 into %ir.vvv..sroa_cast230 + 16, basealign 32) - $q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load 16 from %stack.0) - $q9 = VLDMQIA %stack.1, 14 /* CC::al */, $noreg :: (load 16 from %stack.1) + VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store (s128) into %ir.xxx..sroa_cast248 + 16, basealign 32) + VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store (s128) into %ir.vvv..sroa_cast230 + 16, basealign 32) + $q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load (s128) from %stack.0) + $q9 = VLDMQIA %stack.1, 14 /* CC::al */, $noreg :: (load (s128) from %stack.1) $q8 = VMULfq killed $q9, killed $q8, 14 /* CC::al */, $noreg $q14 = VMLSfq killed $q14, killed $q8, killed $q0, 14 /* CC::al */, $noreg - $r7 = VST1q32wb_fixed killed $r7, 16, killed $q14, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244, align 32) - $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.yyy..sroa_cast244 + 16, basealign 32) - $q9 = VLDMQIA %stack.3, 14 /* CC::al */, $noreg :: (load 16 from %stack.3) + $r7 = VST1q32wb_fixed killed $r7, 16, killed $q14, 14 /* CC::al */, $noreg :: (store (s128) into %ir.yyy..sroa_cast244, align 32) + $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.yyy..sroa_cast244 + 16, basealign 32) + $q9 = VLDMQIA %stack.3, 14 /* CC::al */, $noreg :: (load (s128) from %stack.3) $q9 = VMULfq $q8, killed $q9, 14 /* CC::al */, $noreg $q7 = VMLSfq killed $q7, killed $q9, $q5, 14 /* CC::al */, $noreg - VST1q64 killed $r7, 16, killed $q7, 14 /* CC::al */, $noreg :: (store 16 into %ir.yyy..sroa_cast244 + 16, basealign 32) - $q9 = VLDMQIA %stack.2, 14 /* CC::al */, $noreg :: (load 16 from %stack.2) + VST1q64 killed $r7, 16, killed $q7, 14 /* CC::al */, $noreg :: (store (s128) into %ir.yyy..sroa_cast244 + 16, basealign 32) + $q9 = VLDMQIA %stack.2, 14 /* CC::al */, $noreg :: (load (s128) from %stack.2) $q9 = VMULfq killed $q9, killed $q12, 14 /* CC::al */, $noreg $q4 = VMLSfq killed $q4, killed $q9, $q5, 14 /* CC::al */, $noreg - VST1q64 killed $r4, 16, killed $q4, 14 /* CC::al */, $noreg :: (store 16 into %ir.ttt..sroa_cast226 + 16, basealign 32) + VST1q64 killed $r4, 16, killed $q4, 14 /* CC::al */, $noreg :: (store (s128) into %ir.ttt..sroa_cast226 + 16, basealign 32) $q8 = VMULfq killed $q8, $q3, 14 /* CC::al */, $noreg - $q9 = VLD1q64 killed $r0, 16, 14 /* CC::al */, $noreg :: (load 16 from %ir.sss..sroa_cast223 + 16, basealign 32) + $q9 = VLD1q64 killed $r0, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.sss..sroa_cast223 + 16, basealign 32) $q12 = COPY $q9 $q12 = VMLSfq killed $q12, killed $q8, $q5, 14 /* CC::al */, $noreg - VST1q64 killed $r2, 16, killed $q12, 14 /* CC::al */, $noreg :: (store 16 into %ir.www..sroa_cast238 + 16, basealign 32) + VST1q64 killed $r2, 16, killed $q12, 14 /* CC::al */, $noreg :: (store (s128) into %ir.www..sroa_cast238 + 16, basealign 32) $q8 = VMULfq $q11, killed $q3, 14 /* CC::al */, $noreg $q9 = VMULfq killed $q10, killed $q9, 14 /* CC::al */, $noreg $q8 = VMLSfq killed $q8, killed $q9, $q5, 14 /* CC::al */, $noreg - VST1q64 killed $r3, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.zzz..sroa_cast241 + 16, basealign 32) + VST1q64 killed $r3, 16, killed $q8, 14 /* CC::al */, $noreg :: (store (s128) into %ir.zzz..sroa_cast241 + 16, basealign 32) $q8 = VMULfq killed $q11, killed $q5, 14 /* CC::al */, $noreg - VST1q64 killed $r12, 16, killed $q8, 14 /* CC::al */, $noreg :: (store 16 into %ir.ppp..sroa_cast248 + 16, basealign 32) + VST1q64 killed $r12, 16, killed $q8, 14 /* CC::al */, $noreg :: (store (s128) into %ir.ppp..sroa_cast248 + 16, basealign 32) tBX_RET 14 /* CC::al */, $noreg ... diff --git a/llvm/test/CodeGen/Thumb2/store-prepostinc.mir b/llvm/test/CodeGen/Thumb2/store-prepostinc.mir index b01983eec4669..35b2fd9526332 100644 --- a/llvm/test/CodeGen/Thumb2/store-prepostinc.mir +++ b/llvm/test/CodeGen/Thumb2/store-prepostinc.mir @@ -55,10 +55,10 @@ body: | ; CHECK-LABEL: name: STR_pre4 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = t2STR_PRE killed $r1, $r0, 4, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = t2STR_PRE killed $r1, $r0, 4, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg - t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -76,10 +76,10 @@ body: | ; CHECK-LABEL: name: STR_pre8 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg - t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -97,10 +97,10 @@ body: | ; CHECK-LABEL: name: STR_pre255 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg - t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -118,10 +118,10 @@ body: | ; CHECK-LABEL: name: STR_pre256 ; CHECK: liveins: $r0, $r1 ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -140,10 +140,10 @@ body: | ; CHECK-LABEL: name: STRD_pre4 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -161,10 +161,10 @@ body: | ; CHECK-LABEL: name: STRD_pre8 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_PRE killed renamable $r1, killed renamable $r2, killed $r0, 8, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_PRE killed renamable $r1, killed renamable $r2, killed $r0, 8, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -183,10 +183,10 @@ body: | ; CHECK-LABEL: name: STRD_pre255 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -205,10 +205,10 @@ body: | ; CHECK-LABEL: name: STRD_pre256 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -227,10 +227,10 @@ body: | ; CHECK-LABEL: name: STRD_pre1020 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 1020, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 1020, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -249,10 +249,10 @@ body: | ; CHECK-LABEL: name: STRD_pre1024 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2ADDri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -271,10 +271,10 @@ body: | ; CHECK-LABEL: name: STRD_prem4 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2SUBri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2SUBri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -292,10 +292,10 @@ body: | ; CHECK-LABEL: name: STRD_prem8 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_PRE killed renamable $r1, killed renamable $r2, killed $r0, -8, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_PRE killed renamable $r1, killed renamable $r2, killed $r0, -8, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2SUBri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -314,10 +314,10 @@ body: | ; CHECK-LABEL: name: STRD_prem255 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2SUBri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2SUBri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -336,10 +336,10 @@ body: | ; CHECK-LABEL: name: STRD_prem256 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2SUBri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2SUBri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -358,10 +358,10 @@ body: | ; CHECK-LABEL: name: STRD_prem1020 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2SUBri killed renamable $r0, 1020, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2SUBri killed renamable $r0, 1020, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -380,10 +380,10 @@ body: | ; CHECK-LABEL: name: STRD_prem1024 ; CHECK: liveins: $r0, $r1, $r2 ; CHECK: renamable $r0 = nuw t2SUBri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 renamable $r0 = nuw t2SUBri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ... @@ -401,9 +401,9 @@ body: | ; CHECK-LABEL: name: STR_post4 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = t2STR_POST killed $r1, $r0, 4, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = t2STR_POST killed $r1, $r0, 4, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw t2ADDri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -421,9 +421,9 @@ body: | ; CHECK-LABEL: name: STR_post8 ; CHECK: liveins: $r0, $r1 - ; CHECK: early-clobber $r0 = t2STR_POST killed $r1, $r0, 8, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: early-clobber $r0 = t2STR_POST killed $r1, $r0, 8, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw t2ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -441,10 +441,10 @@ body: | ; CHECK-LABEL: name: STR_post255 ; CHECK: liveins: $r0, $r1 - ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw t2ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -462,10 +462,10 @@ body: | ; CHECK-LABEL: name: STR_post256 ; CHECK: liveins: $r0, $r1 - ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4) + t2STRi12 killed renamable $r1, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s32)) renamable $r0 = nuw t2ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -484,9 +484,9 @@ body: | ; CHECK-LABEL: name: STRD_post4 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 4, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 4, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2ADDri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -505,9 +505,9 @@ body: | ; CHECK-LABEL: name: STRD_post8 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 8, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 8, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2ADDri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -526,10 +526,10 @@ body: | ; CHECK-LABEL: name: STRD_post255 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2ADDri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -548,9 +548,9 @@ body: | ; CHECK-LABEL: name: STRD_post256 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 256, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 256, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -569,9 +569,9 @@ body: | ; CHECK-LABEL: name: STRD_post1020 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 1020, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 1020, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2ADDri killed renamable $r0, 1020, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -590,10 +590,10 @@ body: | ; CHECK-LABEL: name: STRD_post1024 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: renamable $r0 = nuw t2ADDri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2ADDri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -612,9 +612,9 @@ body: | ; CHECK-LABEL: name: STRD_postm4 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, -4, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, -4, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2SUBri killed renamable $r0, 4, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -633,9 +633,9 @@ body: | ; CHECK-LABEL: name: STRD_postm8 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, -8, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, -8, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2SUBri killed renamable $r0, 8, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -654,10 +654,10 @@ body: | ; CHECK-LABEL: name: STRD_postm255 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: renamable $r0 = nuw t2SUBri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2SUBri killed renamable $r0, 255, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -676,9 +676,9 @@ body: | ; CHECK-LABEL: name: STRD_postm256 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 256, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, 256, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2ADDri killed renamable $r0, 256, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -697,9 +697,9 @@ body: | ; CHECK-LABEL: name: STRD_postm1020 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, -1020, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: $r0 = t2STRD_POST killed renamable $r1, killed renamable $r2, killed $r0, -1020, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2SUBri killed renamable $r0, 1020, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 @@ -718,10 +718,10 @@ body: | ; CHECK-LABEL: name: STRD_postm1024 ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + ; CHECK: t2STRDi8 killed $r1, killed $r2, $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) ; CHECK: renamable $r0 = nuw t2SUBri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 8) + t2STRDi8 killed renamable $r1, killed renamable $r2, renamable $r0, 0, 14 /* CC::al */, $noreg :: (store (s64)) renamable $r0 = nuw t2SUBri killed renamable $r0, 1024, 14 /* CC::al */, $noreg, $noreg tBX_RET 14 /* CC::al */, $noreg, implicit $r0 diff --git a/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir b/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir index 1729a59819217..e3b4ec256d285 100644 --- a/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir +++ b/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir @@ -104,7 +104,7 @@ body: | ; CHECK: bb.1.while.body.end: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next.i.14) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next.i.14) ; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: BUNDLE implicit-def dead $itstate, implicit killed $cpsr, implicit $r0 { ; CHECK: t2IT 0, 8, implicit-def $itstate @@ -113,19 +113,19 @@ body: | ; CHECK: bb.2.while.begin: ; CHECK: successors: %bb.4(0x04000000), %bb.3(0x7c000000) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info.i) - ; CHECK: renamable $r2 = tLDRHi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16.i1) + ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info.i) + ; CHECK: renamable $r2 = tLDRHi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16.i1) ; CHECK: dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14 /* CC::al */, $noreg ; CHECK: t2Bcc %bb.4, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.3.while.body.a: ; CHECK: successors: %bb.4(0x4207fef8), %bb.1(0x3df80108) ; CHECK: liveins: $r0, $r1 - ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next.i2) + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next.i2) ; CHECK: tCMPi8 renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: BUNDLE implicit-def dead $itstate, implicit-def dead $r2, implicit-def $cpsr, implicit $r0, implicit killed $cpsr, implicit $r1 { ; CHECK: t2IT 1, 30, implicit-def $itstate - ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 1 /* CC::ne */, $cpsr, implicit internal $itstate :: (load 4 from %ir.info.i.1) - ; CHECK: renamable $r2 = tLDRHi internal killed renamable $r2, 0, 1 /* CC::ne */, $cpsr, implicit internal killed $r2, implicit internal $itstate :: (load 2 from %ir.data16.i.13) + ; CHECK: renamable $r2 = tLDRi renamable $r0, 1, 1 /* CC::ne */, $cpsr, implicit internal $itstate :: (load (s32) from %ir.info.i.1) + ; CHECK: renamable $r2 = tLDRHi internal killed renamable $r2, 0, 1 /* CC::ne */, $cpsr, implicit internal killed $r2, implicit internal $itstate :: (load (s16) from %ir.data16.i.13) ; CHECK: t2TEQrr internal killed renamable $r2, renamable $r1, 1 /* CC::ne */, killed $cpsr, implicit-def $cpsr, implicit internal killed $itstate ; CHECK: } ; CHECK: t2Bcc %bb.1, 1 /* CC::ne */, killed $cpsr @@ -142,7 +142,7 @@ body: | successors: %bb.1(0x80000000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next.i.14) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.next.i.14) tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr BUNDLE implicit-def dead $itstate, implicit killed $cpsr, implicit $r0 { t2IT 0, 8, implicit-def $itstate @@ -153,8 +153,8 @@ body: | successors: %bb.4(0x04000000), %bb.2(0x7c000000) liveins: $r0, $r1 - renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info.i) - renamable $r2 = tLDRHi killed renamable $r2, 0, 14, $noreg :: (load 2 from %ir.data16.i1) + renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load (s32) from %ir.info.i) + renamable $r2 = tLDRHi killed renamable $r2, 0, 14, $noreg :: (load (s16) from %ir.data16.i1) dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14, $noreg t2Bcc %bb.4, 0, killed $cpsr @@ -162,12 +162,12 @@ body: | successors: %bb.4(0x80000000), %bb.3(0x78200000) liveins: $r0, $r1 - renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next.i2) + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load (s32) from %ir.next.i2) tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr BUNDLE implicit-def dead $itstate, implicit-def dead $r2, implicit-def $cpsr, implicit $r0, implicit killed $cpsr, implicit $r1 { t2IT 1, 30, implicit-def $itstate - renamable $r2 = tLDRi renamable $r0, 1, 1, $cpsr, implicit internal $itstate :: (load 4 from %ir.info.i.1) - renamable $r2 = tLDRHi internal killed renamable $r2, 0, 1, $cpsr, implicit internal killed $r2, implicit internal $itstate :: (load 2 from %ir.data16.i.13) + renamable $r2 = tLDRi renamable $r0, 1, 1, $cpsr, implicit internal $itstate :: (load (s32) from %ir.info.i.1) + renamable $r2 = tLDRHi internal killed renamable $r2, 0, 1, $cpsr, implicit internal killed $r2, implicit internal $itstate :: (load (s16) from %ir.data16.i.13) t2TEQrr internal killed renamable $r2, renamable $r1, 1, killed $cpsr, implicit-def $cpsr, implicit internal killed $itstate } t2Bcc %bb.3, 1, killed $cpsr @@ -193,27 +193,27 @@ body: | ; CHECK: bb.1.while.begin: ; CHECK: successors: %bb.5(0x04000000), %bb.2(0x7c000000) ; CHECK: liveins: $r1, $r2 - ; CHECK: renamable $r0 = tLDRi renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info.i) - ; CHECK: renamable $r0 = tLDRHi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16.i1) + ; CHECK: renamable $r0 = tLDRi renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info.i) + ; CHECK: renamable $r0 = tLDRHi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16.i1) ; CHECK: t2TEQrr renamable $r0, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2Bcc %bb.5, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.2.while.body.a: ; CHECK: successors: %bb.5(0x04000000), %bb.3(0x7c000000) ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: renamable $r2 = tLDRi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next.i2) + ; CHECK: renamable $r2 = tLDRi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next.i2) ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2Bcc %bb.5, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.3.it.block: ; CHECK: successors: %bb.5(0x04000000), %bb.4(0x7c000000) ; CHECK: liveins: $r1, $r2 - ; CHECK: renamable $r0 = tLDRi renamable $r2, 1, 14 /* CC::al */, $noreg :: (load 4 from %ir.info.i.1) - ; CHECK: renamable $r0 = tLDRHi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load 2 from %ir.data16.i.13) + ; CHECK: renamable $r0 = tLDRi renamable $r2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.info.i.1) + ; CHECK: renamable $r0 = tLDRHi killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s16) from %ir.data16.i.13) ; CHECK: t2TEQrr renamable $r0, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2Bcc %bb.5, 0 /* CC::eq */, killed $cpsr ; CHECK: bb.4.while.body.end: ; CHECK: successors: %bb.5(0x04000000), %bb.1(0x7c000000) ; CHECK: liveins: $r0, $r1, $r2 - ; CHECK: renamable $r2 = tLDRi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load 4 from %ir.next.i.14) + ; CHECK: renamable $r2 = tLDRi killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.next.i.14) ; CHECK: tCMPi8 renamable $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK: t2Bcc %bb.1, 1 /* CC::ne */, killed $cpsr ; CHECK: bb.5.exit: @@ -229,8 +229,8 @@ body: | successors: %bb.5(0x04000000), %bb.2(0x7c000000) liveins: $r1, $r2 - renamable $r0 = t2LDRi12 renamable $r2, 4, 14, $noreg :: (load 4 from %ir.info.i) - renamable $r0 = t2LDRHi12 killed renamable $r0, 0, 14, $noreg :: (load 2 from %ir.data16.i1) + renamable $r0 = t2LDRi12 renamable $r2, 4, 14, $noreg :: (load (s32) from %ir.info.i) + renamable $r0 = t2LDRHi12 killed renamable $r0, 0, 14, $noreg :: (load (s16) from %ir.data16.i1) t2TEQrr renamable $r0, renamable $r1, 14, $noreg, implicit-def $cpsr t2Bcc %bb.5, 0, killed $cpsr @@ -238,7 +238,7 @@ body: | successors: %bb.5(0x04000000), %bb.3(0x7c000000) liveins: $r0, $r1, $r2 - renamable $r2 = t2LDRi12 killed renamable $r2, 0, 14, $noreg :: (load 4 from %ir.next.i2) + renamable $r2 = t2LDRi12 killed renamable $r2, 0, 14, $noreg :: (load (s32) from %ir.next.i2) t2CMPri renamable $r2, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.5, 0, killed $cpsr @@ -246,8 +246,8 @@ body: | successors: %bb.5(0x04000000), %bb.4(0x7c000000) liveins: $r1, $r2 - renamable $r0 = t2LDRi12 renamable $r2, 4, 14, $noreg :: (load 4 from %ir.info.i.1) - renamable $r0 = t2LDRHi12 killed renamable $r0, 0, 14, $noreg :: (load 2 from %ir.data16.i.13) + renamable $r0 = t2LDRi12 renamable $r2, 4, 14, $noreg :: (load (s32) from %ir.info.i.1) + renamable $r0 = t2LDRHi12 killed renamable $r0, 0, 14, $noreg :: (load (s16) from %ir.data16.i.13) t2TEQrr renamable $r0, renamable $r1, 14, $noreg, implicit-def $cpsr t2Bcc %bb.5, 0, killed $cpsr @@ -255,7 +255,7 @@ body: | successors: %bb.5(0x04000000), %bb.1(0x7c000000) liveins: $r0, $r1, $r2 - renamable $r2 = t2LDRi12 killed renamable $r2, 0, 14, $noreg :: (load 4 from %ir.next.i.14) + renamable $r2 = t2LDRi12 killed renamable $r2, 0, 14, $noreg :: (load (s32) from %ir.next.i.14) t2CMPri renamable $r2, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.1, 1, killed $cpsr diff --git a/llvm/test/CodeGen/Thumb2/tbb-removeadd.mir b/llvm/test/CodeGen/Thumb2/tbb-removeadd.mir index e49559fffc6b2..44fd45d8967ac 100644 --- a/llvm/test/CodeGen/Thumb2/tbb-removeadd.mir +++ b/llvm/test/CodeGen/Thumb2/tbb-removeadd.mir @@ -102,7 +102,7 @@ body: | $r0, dead $cpsr = tMOVi8 0, 14, $noreg $r2, dead $cpsr = tMOVi8 1, 14, $noreg - tSTRi killed $r0, $r1, 0, 14, $noreg :: (store 4 into %ir.p) + tSTRi killed $r0, $r1, 0, 14, $noreg :: (store (s32) into %ir.p) t2B %bb.5.sw.epilog.sink.split, 14, $noreg bb.4.sw.bb3: @@ -115,7 +115,7 @@ body: | successors: %bb.6.sw.epilog(0x80000000) liveins: $r1, $r2 - tSTRi killed $r2, killed $r1, 0, 14, $noreg :: (store 4 into %ir.p) + tSTRi killed $r2, killed $r1, 0, 14, $noreg :: (store (s32) into %ir.p) bb.6.sw.epilog: tBX_RET 14, $noreg diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll index 71e24b640af71..1ae5136bd4f81 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -10,35 +10,35 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 % ; X86-LABEL: name: test_i8_args_8 ; X86: bb.1.entry: ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7 - ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 1 from %fixed-stack.7, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.7, align 16) ; X86: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6 - ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.6, align 4) + ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.6, align 4) ; X86: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; X86: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5 - ; X86: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load 1 from %fixed-stack.5, align 8) + ; X86: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s8) from %fixed-stack.5, align 8) ; X86: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32) ; X86: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4 - ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load 1 from %fixed-stack.4, align 4) + ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s8) from %fixed-stack.4, align 4) ; X86: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32) ; X86: [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; X86: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load 1 from %fixed-stack.3, align 16) + ; X86: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s8) from %fixed-stack.3, align 16) ; X86: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32) ; X86: [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; X86: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load 1 from %fixed-stack.2, align 4) + ; X86: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s8) from %fixed-stack.2, align 4) ; X86: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32) ; X86: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X86: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load 1 from %fixed-stack.1, align 8) + ; X86: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s8) from %fixed-stack.1, align 8) ; X86: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32) ; X86: [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load 1 from %fixed-stack.0, align 4) + ; X86: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s8) from %fixed-stack.0, align 4) ; X86: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32) ; X86: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit ; X86: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit ; X86: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit - ; X86: G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store 1 into @a1_8bit) - ; X86: G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store 1 into @a7_8bit) - ; X86: G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store 1 into @a8_8bit) + ; X86: G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store (s8) into @a1_8bit) + ; X86: G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store (s8) into @a7_8bit) + ; X86: G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit) ; X86: $al = COPY [[TRUNC]](s8) ; X86: RET 0, implicit $al ; X64-LABEL: name: test_i8_args_8 @@ -57,17 +57,17 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 % ; X64: [[COPY5:%[0-9]+]]:_(s32) = COPY $r9d ; X64: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32) ; X64: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X64: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 1 from %fixed-stack.1, align 16) + ; X64: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.1, align 16) ; X64: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32) ; X64: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X64: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0, align 8) + ; X64: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8) ; X64: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32) ; X64: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit ; X64: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit ; X64: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit - ; X64: G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store 1 into @a1_8bit) - ; X64: G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store 1 into @a7_8bit) - ; X64: G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store 1 into @a8_8bit) + ; X64: G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store (s8) into @a1_8bit) + ; X64: G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store (s8) into @a7_8bit) + ; X64: G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit) ; X64: $al = COPY [[TRUNC]](s8) ; X64: RET 0, implicit $al entry: @@ -85,27 +85,27 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg ; X86-LABEL: name: test_i32_args_8 ; X86: bb.1.entry: ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7 - ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.7, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.7, align 16) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6 - ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.6) + ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.6) ; X86: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5 - ; X86: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load 4 from %fixed-stack.5, align 8) + ; X86: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.5, align 8) ; X86: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4 - ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load 4 from %fixed-stack.4) + ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s32) from %fixed-stack.4) ; X86: [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; X86: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load 4 from %fixed-stack.3, align 16) + ; X86: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s32) from %fixed-stack.3, align 16) ; X86: [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; X86: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load 4 from %fixed-stack.2) + ; X86: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s32) from %fixed-stack.2) ; X86: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X86: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load 4 from %fixed-stack.1, align 8) + ; X86: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s32) from %fixed-stack.1, align 8) ; X86: [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load 4 from %fixed-stack.0) + ; X86: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s32) from %fixed-stack.0) ; X86: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit ; X86: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit ; X86: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit - ; X86: G_STORE [[LOAD]](s32), [[GV]](p0) :: (store 4 into @a1_32bit) - ; X86: G_STORE [[LOAD6]](s32), [[GV1]](p0) :: (store 4 into @a7_32bit) - ; X86: G_STORE [[LOAD7]](s32), [[GV2]](p0) :: (store 4 into @a8_32bit) + ; X86: G_STORE [[LOAD]](s32), [[GV]](p0) :: (store (s32) into @a1_32bit) + ; X86: G_STORE [[LOAD6]](s32), [[GV1]](p0) :: (store (s32) into @a7_32bit) + ; X86: G_STORE [[LOAD7]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit) ; X86: $eax = COPY [[LOAD]](s32) ; X86: RET 0, implicit $eax ; X64-LABEL: name: test_i32_args_8 @@ -118,15 +118,15 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg ; X64: [[COPY4:%[0-9]+]]:_(s32) = COPY $r8d ; X64: [[COPY5:%[0-9]+]]:_(s32) = COPY $r9d ; X64: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X64: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.1, align 16) + ; X64: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16) ; X64: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X64: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.0, align 8) + ; X64: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0, align 8) ; X64: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit ; X64: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit ; X64: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit - ; X64: G_STORE [[COPY]](s32), [[GV]](p0) :: (store 4 into @a1_32bit) - ; X64: G_STORE [[LOAD]](s32), [[GV1]](p0) :: (store 4 into @a7_32bit) - ; X64: G_STORE [[LOAD1]](s32), [[GV2]](p0) :: (store 4 into @a8_32bit) + ; X64: G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @a1_32bit) + ; X64: G_STORE [[LOAD]](s32), [[GV1]](p0) :: (store (s32) into @a7_32bit) + ; X64: G_STORE [[LOAD1]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit) ; X64: $eax = COPY [[COPY]](s32) ; X64: RET 0, implicit $eax entry: @@ -145,51 +145,51 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg ; X86-LABEL: name: test_i64_args_8 ; X86: bb.1.entry: ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.15 - ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.15, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.15, align 16) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.14 - ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.14) + ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.14) ; X86: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; X86: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.13 - ; X86: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load 4 from %fixed-stack.13, align 8) + ; X86: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.13, align 8) ; X86: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.12 - ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load 4 from %fixed-stack.12) + ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s32) from %fixed-stack.12) ; X86: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; X86: [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.11 - ; X86: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load 4 from %fixed-stack.11, align 16) + ; X86: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s32) from %fixed-stack.11, align 16) ; X86: [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.10 - ; X86: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load 4 from %fixed-stack.10) + ; X86: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s32) from %fixed-stack.10) ; X86: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; X86: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.9 - ; X86: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load 4 from %fixed-stack.9, align 8) + ; X86: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s32) from %fixed-stack.9, align 8) ; X86: [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.8 - ; X86: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load 4 from %fixed-stack.8) + ; X86: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s32) from %fixed-stack.8) ; X86: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; X86: [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7 - ; X86: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p0) :: (invariant load 4 from %fixed-stack.7, align 16) + ; X86: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p0) :: (invariant load (s32) from %fixed-stack.7, align 16) ; X86: [[FRAME_INDEX9:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6 - ; X86: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p0) :: (invariant load 4 from %fixed-stack.6) + ; X86: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p0) :: (invariant load (s32) from %fixed-stack.6) ; X86: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD8]](s32), [[LOAD9]](s32) ; X86: [[FRAME_INDEX10:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5 - ; X86: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p0) :: (invariant load 4 from %fixed-stack.5, align 8) + ; X86: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p0) :: (invariant load (s32) from %fixed-stack.5, align 8) ; X86: [[FRAME_INDEX11:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4 - ; X86: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p0) :: (invariant load 4 from %fixed-stack.4) + ; X86: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p0) :: (invariant load (s32) from %fixed-stack.4) ; X86: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD10]](s32), [[LOAD11]](s32) ; X86: [[FRAME_INDEX12:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; X86: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p0) :: (invariant load 4 from %fixed-stack.3, align 16) + ; X86: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p0) :: (invariant load (s32) from %fixed-stack.3, align 16) ; X86: [[FRAME_INDEX13:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; X86: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p0) :: (invariant load 4 from %fixed-stack.2) + ; X86: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p0) :: (invariant load (s32) from %fixed-stack.2) ; X86: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD12]](s32), [[LOAD13]](s32) ; X86: [[FRAME_INDEX14:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X86: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p0) :: (invariant load 4 from %fixed-stack.1, align 8) + ; X86: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p0) :: (invariant load (s32) from %fixed-stack.1, align 8) ; X86: [[FRAME_INDEX15:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p0) :: (invariant load 4 from %fixed-stack.0) + ; X86: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p0) :: (invariant load (s32) from %fixed-stack.0) ; X86: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD14]](s32), [[LOAD15]](s32) ; X86: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit ; X86: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit ; X86: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit - ; X86: G_STORE [[MV]](s64), [[GV]](p0) :: (store 8 into @a1_64bit, align 4) - ; X86: G_STORE [[MV6]](s64), [[GV1]](p0) :: (store 8 into @a7_64bit, align 4) - ; X86: G_STORE [[MV7]](s64), [[GV2]](p0) :: (store 8 into @a8_64bit, align 4) + ; X86: G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @a1_64bit, align 4) + ; X86: G_STORE [[MV6]](s64), [[GV1]](p0) :: (store (s64) into @a7_64bit, align 4) + ; X86: G_STORE [[MV7]](s64), [[GV2]](p0) :: (store (s64) into @a8_64bit, align 4) ; X86: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; X86: $eax = COPY [[UV]](s32) ; X86: $edx = COPY [[UV1]](s32) @@ -204,15 +204,15 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg ; X64: [[COPY4:%[0-9]+]]:_(s64) = COPY $r8 ; X64: [[COPY5:%[0-9]+]]:_(s64) = COPY $r9 ; X64: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X64: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; X64: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; X64: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X64: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 8 from %fixed-stack.0) + ; X64: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0) ; X64: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit ; X64: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit ; X64: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit - ; X64: G_STORE [[COPY]](s64), [[GV]](p0) :: (store 8 into @a1_64bit) - ; X64: G_STORE [[LOAD]](s64), [[GV1]](p0) :: (store 8 into @a7_64bit) - ; X64: G_STORE [[LOAD1]](s64), [[GV2]](p0) :: (store 8 into @a8_64bit) + ; X64: G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @a1_64bit) + ; X64: G_STORE [[LOAD]](s64), [[GV1]](p0) :: (store (s64) into @a7_64bit) + ; X64: G_STORE [[LOAD1]](s64), [[GV2]](p0) :: (store (s64) into @a8_64bit) ; X64: $rax = COPY [[COPY]](s64) ; X64: RET 0, implicit $rax entry: @@ -226,9 +226,9 @@ define float @test_float_args(float %arg1, float %arg2) { ; X86-LABEL: name: test_float_args ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.1, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.0) + ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0) ; X86: $fp0 = COPY [[LOAD1]](s32) ; X86: RET 0, implicit $fp0 ; X64-LABEL: name: test_float_args @@ -245,9 +245,9 @@ define double @test_double_args(double %arg1, double %arg2) { ; X86-LABEL: name: test_double_args ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X86: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 8 from %fixed-stack.0) + ; X86: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0) ; X86: $fp0 = COPY [[LOAD1]](s64) ; X86: RET 0, implicit $fp0 ; X64-LABEL: name: test_double_args @@ -317,7 +317,7 @@ define i32 * @test_memop_i32(i32 * %p1) { ; X86-LABEL: name: test_memop_i32 ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) ; X86: $eax = COPY [[LOAD]](p0) ; X86: RET 0, implicit $eax ; X64-LABEL: name: test_memop_i32 @@ -352,18 +352,18 @@ define void @test_simple_arg(i32 %in0, i32 %in1) { ; X86-LABEL: name: test_simple_arg ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.1, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.0) + ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0) ; X86: ADJCALLSTACKDOWN32 8, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; X86: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; X86: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p0) :: (store 4 into stack, align 1) + ; X86: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) ; X86: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; X86: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store 4 into stack + 4, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) ; X86: CALLpcrel32 @simple_arg_callee, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: RET 0 @@ -387,40 +387,40 @@ define void @test_simple_arg8_call(i32 %in0) { ; X86-LABEL: name: test_simple_arg8_call ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) ; X86: ADJCALLSTACKDOWN32 32, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; X86: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store 4 into stack, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) ; X86: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; X86: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store 4 into stack + 4, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) ; X86: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; X86: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD2]](p0) :: (store 4 into stack + 8, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1) ; X86: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; X86: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD3]](p0) :: (store 4 into stack + 12, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1) ; X86: [[COPY4:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; X86: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD4]](p0) :: (store 4 into stack + 16, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 1) ; X86: [[COPY5:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; X86: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C5]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD5]](p0) :: (store 4 into stack + 20, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20, align 1) ; X86: [[COPY6:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; X86: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY6]], [[C6]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD6]](p0) :: (store 4 into stack + 24, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 1) ; X86: [[COPY7:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; X86: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY7]], [[C7]](s32) - ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD7]](p0) :: (store 4 into stack + 28, align 1) + ; X86: G_STORE [[LOAD]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28, align 1) ; X86: CALLpcrel32 @simple_arg8_callee, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 32, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: RET 0 @@ -438,11 +438,11 @@ define void @test_simple_arg8_call(i32 %in0) { ; X64: [[COPY1:%[0-9]+]]:_(p0) = COPY $rsp ; X64: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; X64: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; X64: G_STORE [[COPY]](s32), [[PTR_ADD]](p0) :: (store 4 into stack, align 1) + ; X64: G_STORE [[COPY]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) ; X64: [[COPY2:%[0-9]+]]:_(p0) = COPY $rsp ; X64: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; X64: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C1]](s64) - ; X64: G_STORE [[COPY]](s32), [[PTR_ADD1]](p0) :: (store 4 into stack + 8, align 1) + ; X64: G_STORE [[COPY]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 8, align 1) ; X64: CALL64pcrel32 @simple_arg8_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d ; X64: ADJCALLSTACKUP64 16, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp ; X64: RET 0 @@ -459,7 +459,7 @@ define i32 @test_simple_return_callee() { ; X86: [[COPY:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; X86: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; X86: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store 4 into stack, align 1) + ; X86: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) ; X86: CALLpcrel32 @simple_return_callee, csr_32, implicit $esp, implicit $ssp, implicit-def $eax ; X86: [[COPY1:%[0-9]+]]:_(s32) = COPY $eax ; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp @@ -492,7 +492,7 @@ define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) { ; X86: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) ; X86: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2 ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 16 from %fixed-stack.0) + ; X86: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s128) from %fixed-stack.0) ; X86: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[LOAD]](<4 x s32>) ; X86: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>) @@ -540,7 +540,7 @@ define void @test_indirect_call(void()* %func) { ; X86-LABEL: name: test_indirect_call ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD:%[0-9]+]]:gr32(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; X86: [[LOAD:%[0-9]+]]:gr32(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) ; X86: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: CALL32r [[LOAD]](p0), csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp @@ -563,28 +563,28 @@ define void @test_abi_exts_call(i8* %addr) { ; X86-LABEL: name: test_abi_exts_call ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) - ; X86: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p0) :: (load 1 from %ir.addr) + ; X86: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) + ; X86: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p0) :: (load (s8) from %ir.addr) ; X86: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; X86: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) ; X86: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) - ; X86: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store 4 into stack, align 1) + ; X86: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) ; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) ; X86: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) - ; X86: G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store 4 into stack, align 1) + ; X86: G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack, align 1) ; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32) ; X86: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; X86: G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store 4 into stack, align 1) + ; X86: G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack, align 1) ; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: RET 0 @@ -592,7 +592,7 @@ define void @test_abi_exts_call(i8* %addr) { ; X64: bb.1 (%ir-block.0): ; X64: liveins: $rdi ; X64: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi - ; X64: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.addr) + ; X64: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr) ; X64: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp ; X64: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) ; X64: $edi = COPY [[ANYEXT]](s32) @@ -621,20 +621,20 @@ define void @test_variadic_call_1(i8** %addr_ptr, i32* %val_ptr) { ; X86-LABEL: name: test_variadic_call_1 ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X86: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.1, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.0) - ; X86: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.addr_ptr) - ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[LOAD1]](p0) :: (load 4 from %ir.val_ptr) + ; X86: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0) + ; X86: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.addr_ptr) + ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.val_ptr) ; X86: ADJCALLSTACKDOWN32 8, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; X86: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; X86: G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store 4 into stack, align 1) + ; X86: G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) ; X86: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; X86: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; X86: G_STORE [[LOAD3]](s32), [[PTR_ADD1]](p0) :: (store 4 into stack + 4, align 1) + ; X86: G_STORE [[LOAD3]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1) ; X86: CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: RET 0 @@ -643,8 +643,8 @@ define void @test_variadic_call_1(i8** %addr_ptr, i32* %val_ptr) { ; X64: liveins: $rdi, $rsi ; X64: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi ; X64: [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi - ; X64: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.addr_ptr) - ; X64: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load 4 from %ir.val_ptr) + ; X64: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.addr_ptr) + ; X64: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.val_ptr) ; X64: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp ; X64: $rdi = COPY [[LOAD]](p0) ; X64: $esi = COPY [[LOAD1]](s32) @@ -662,20 +662,20 @@ define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) { ; X86-LABEL: name: test_variadic_call_2 ; X86: bb.1 (%ir-block.0): ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; X86: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.1, align 16) + ; X86: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.0) - ; X86: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load 4 from %ir.addr_ptr) - ; X86: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p0) :: (load 8 from %ir.val_ptr, align 4) + ; X86: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0) + ; X86: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.addr_ptr) + ; X86: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.val_ptr, align 4) ; X86: ADJCALLSTACKDOWN32 12, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; X86: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) - ; X86: G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store 4 into stack, align 1) + ; X86: G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) ; X86: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; X86: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32) - ; X86: G_STORE [[LOAD3]](s64), [[PTR_ADD1]](p0) :: (store 8 into stack + 4, align 1) + ; X86: G_STORE [[LOAD3]](s64), [[PTR_ADD1]](p0) :: (store (s64) into stack + 4, align 1) ; X86: CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 12, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: RET 0 @@ -684,8 +684,8 @@ define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) { ; X64: liveins: $rdi, $rsi ; X64: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi ; X64: [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi - ; X64: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.addr_ptr) - ; X64: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p0) :: (load 8 from %ir.val_ptr) + ; X64: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.addr_ptr) + ; X64: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.val_ptr) ; X64: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp ; X64: $rdi = COPY [[LOAD]](p0) ; X64: $xmm0 = COPY [[LOAD1]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir index 16c129b89c548..71d63ffec178a 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-32.mir @@ -10,31 +10,31 @@ body: | bb.0: ; X32-LABEL: name: test_memop_s8tos32 ; X32: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load 1) - ; X32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load 1) - ; X32: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load 2) - ; X32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load 4) - ; X32: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load 4) + ; X32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s1)) + ; X32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s8)) + ; X32: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load (s16)) + ; X32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32)) + ; X32: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load (p0)) ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; X32: [[COPY:%[0-9]+]]:_(s8) = COPY [[LOAD]](s8) ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[COPY]], [[C]] - ; X32: G_STORE [[AND]](s8), [[DEF]](p0) :: (store 1) - ; X32: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store 1) - ; X32: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store 2) - ; X32: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store 4) - ; X32: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store 4) + ; X32: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s1)) + ; X32: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store (s8)) + ; X32: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store (s16)) + ; X32: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store (s32)) + ; X32: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store (p0)) %0:_(p0) = IMPLICIT_DEF - %9:_(s1) = G_LOAD %0 :: (load 1) - %1:_(s8) = G_LOAD %0 :: (load 1) - %2:_(s16) = G_LOAD %0 :: (load 2) - %3:_(s32) = G_LOAD %0 :: (load 4) - %4:_(p0) = G_LOAD %0 :: (load 4) + %9:_(s1) = G_LOAD %0 :: (load (s1)) + %1:_(s8) = G_LOAD %0 :: (load (s8)) + %2:_(s16) = G_LOAD %0 :: (load (s16)) + %3:_(s32) = G_LOAD %0 :: (load (s32)) + %4:_(p0) = G_LOAD %0 :: (load (p0)) - G_STORE %9, %0 :: (store 1) - G_STORE %1, %0 :: (store 1) - G_STORE %2, %0 :: (store 2) - G_STORE %3, %0 :: (store 4) - G_STORE %4, %0 :: (store 4) + G_STORE %9, %0 :: (store (s1)) + G_STORE %1, %0 :: (store (s8)) + G_STORE %2, %0 :: (store (s16)) + G_STORE %3, %0 :: (store (s32)) + G_STORE %4, %0 :: (store (p0)) ... --- name: test_memop_s64 @@ -47,16 +47,16 @@ body: | ; X32-LABEL: name: test_memop_s64 ; X32: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load 4, align 8) + ; X32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32), align 8) ; X32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; X32: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; X32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 4 from unknown-address + 4) - ; X32: G_STORE [[LOAD]](s32), [[DEF]](p0) :: (store 4, align 8) - ; X32: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; X32: G_STORE [[LOAD1]](s32), [[GEP1]](p0) :: (store 4 into unknown-address + 4) + ; X32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) + ; X32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4) + ; X32: G_STORE [[LOAD]](s32), [[DEF]](p0) :: (store (s32), align 8) + ; X32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) + ; X32: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 4) %0:_(p0) = IMPLICIT_DEF - %1:_(s64) = G_LOAD %0 :: (load 8) + %1:_(s64) = G_LOAD %0 :: (load (s64)) - G_STORE %1, %0 :: (store 8) + G_STORE %1, %0 :: (store (s64)) ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-64.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-64.mir index a550a82383108..05a6eaea2df7c 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-64.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-memop-scalar-64.mir @@ -10,31 +10,31 @@ body: | bb.0: ; X64-LABEL: name: test_memop_s8tos32 ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X64: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load 1) - ; X64: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load 1) - ; X64: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load 2) - ; X64: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load 4) - ; X64: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load 4) + ; X64: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s1)) + ; X64: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p0) :: (load (s8)) + ; X64: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p0) :: (load (s16)) + ; X64: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load (s32)) + ; X64: [[LOAD4:%[0-9]+]]:_(p0) = G_LOAD [[DEF]](p0) :: (load (p0)) ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; X64: [[COPY:%[0-9]+]]:_(s8) = COPY [[LOAD]](s8) ; X64: [[AND:%[0-9]+]]:_(s8) = G_AND [[COPY]], [[C]] - ; X64: G_STORE [[AND]](s8), [[DEF]](p0) :: (store 1) - ; X64: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store 1) - ; X64: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store 2) - ; X64: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store 4) - ; X64: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store 4) + ; X64: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s1)) + ; X64: G_STORE [[LOAD1]](s8), [[DEF]](p0) :: (store (s8)) + ; X64: G_STORE [[LOAD2]](s16), [[DEF]](p0) :: (store (s16)) + ; X64: G_STORE [[LOAD3]](s32), [[DEF]](p0) :: (store (s32)) + ; X64: G_STORE [[LOAD4]](p0), [[DEF]](p0) :: (store (p0)) %0:_(p0) = IMPLICIT_DEF - %9:_(s1) = G_LOAD %0(p0) :: (load 1) - %1:_(s8) = G_LOAD %0(p0) :: (load 1) - %2:_(s16) = G_LOAD %0(p0) :: (load 2) - %3:_(s32) = G_LOAD %0(p0) :: (load 4) - %4:_(p0) = G_LOAD %0(p0) :: (load 4) + %9:_(s1) = G_LOAD %0(p0) :: (load (s1)) + %1:_(s8) = G_LOAD %0(p0) :: (load (s8)) + %2:_(s16) = G_LOAD %0(p0) :: (load (s16)) + %3:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %4:_(p0) = G_LOAD %0(p0) :: (load (p0)) - G_STORE %9, %0 :: (store 1) - G_STORE %1, %0 :: (store 1) - G_STORE %2, %0 :: (store 2) - G_STORE %3, %0 :: (store 4) - G_STORE %4, %0 :: (store 4) + G_STORE %9, %0 :: (store (s1)) + G_STORE %1, %0 :: (store (s8)) + G_STORE %2, %0 :: (store (s16)) + G_STORE %3, %0 :: (store (s32)) + G_STORE %4, %0 :: (store (p0)) ... --- name: test_memop_s64 @@ -47,11 +47,11 @@ body: | ; X64-LABEL: name: test_memop_s64 ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X64: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p0) :: (load 8) - ; X64: G_STORE [[LOAD]](s64), [[DEF]](p0) :: (store 8) + ; X64: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p0) :: (load (s64)) + ; X64: G_STORE [[LOAD]](s64), [[DEF]](p0) :: (store (s64)) %0:_(p0) = IMPLICIT_DEF - %1:_(s64) = G_LOAD %0 :: (load 8) + %1:_(s64) = G_LOAD %0 :: (load (s64)) - G_STORE %1, %0 :: (store 8) + G_STORE %1, %0 :: (store (s64)) ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir index d4c753435b404..c82c40dfef1df 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir @@ -41,13 +41,13 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; CHECK: [[COPY1:%[0-9]+]]:_(s8) = COPY [[MUL]](s8) ; CHECK: [[AND:%[0-9]+]]:_(s8) = G_AND [[COPY1]], [[C]] - ; CHECK: G_STORE [[AND]](s8), [[DEF]](p0) :: (store 1) + ; CHECK: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s1)) ; CHECK: RET 0 %0(s32) = COPY $edx %1(s1) = G_TRUNC %0(s32) %2(s1) = G_MUL %1, %1 %3:_(p0) = G_IMPLICIT_DEF - G_STORE %2, %3 :: (store 1) + G_STORE %2, %3 :: (store (s1)) RET 0 ... --- diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-or-scalar.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-or-scalar.mir index ee571d3430cea..fe432f72ec768 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-or-scalar.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-or-scalar.mir @@ -49,13 +49,13 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; CHECK: [[COPY1:%[0-9]+]]:_(s8) = COPY [[OR]](s8) ; CHECK: [[AND:%[0-9]+]]:_(s8) = G_AND [[COPY1]], [[C]] - ; CHECK: G_STORE [[AND]](s8), [[DEF]](p0) :: (store 1) + ; CHECK: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s1)) ; CHECK: RET 0 %0(s32) = COPY $edx %1(s1) = G_TRUNC %0(s32) %2(s1) = G_OR %1, %1 %3:_(p0) = G_IMPLICIT_DEF - G_STORE %2, %3 :: (store 1) + G_STORE %2, %3 :: (store (s1)) RET 0 ... --- diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir index db4e58a2b85b4..2a080117342dc 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir @@ -35,12 +35,12 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; CHECK: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store 1 into %ir.addr) + ; CHECK: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) ; CHECK: RET 0 %0(p0) = IMPLICIT_DEF %1(s8) = G_CONSTANT i8 20 %2(p0) = G_PTR_ADD %0, %1(s8) - G_STORE %2, %0 :: (store 1 into %ir.addr) + G_STORE %2, %0 :: (store (p0) into %ir.addr) RET 0 ... --- @@ -56,12 +56,12 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; CHECK: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store 1 into %ir.addr) + ; CHECK: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) ; CHECK: RET 0 %0(p0) = IMPLICIT_DEF %1(s16) = G_CONSTANT i16 20 %2(p0) = G_PTR_ADD %0, %1(s16) - G_STORE %2, %0 :: (store 1 into %ir.addr) + G_STORE %2, %0 :: (store (p0) into %ir.addr) RET 0 ... --- @@ -77,12 +77,12 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; CHECK: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store 1 into %ir.addr) + ; CHECK: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) ; CHECK: RET 0 %0(p0) = IMPLICIT_DEF %1(s32) = G_CONSTANT i32 20 %2(p0) = G_PTR_ADD %0, %1(s32) - G_STORE %2, %0 :: (store 1 into %ir.addr) + G_STORE %2, %0 :: (store (p0) into %ir.addr) RET 0 ... --- @@ -98,11 +98,11 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store 1 into %ir.addr) + ; CHECK: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) ; CHECK: RET 0 %0(p0) = IMPLICIT_DEF %1(s64) = G_CONSTANT i64 20 %2(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %2, %0 :: (store 1 into %ir.addr) + G_STORE %2, %0 :: (store (p0) into %ir.addr) RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir index 74289897b6aab..958f2061c7d81 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-sub.mir @@ -32,13 +32,13 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; CHECK: [[COPY1:%[0-9]+]]:_(s8) = COPY [[SUB]](s8) ; CHECK: [[AND:%[0-9]+]]:_(s8) = G_AND [[COPY1]], [[C]] - ; CHECK: G_STORE [[AND]](s8), [[DEF]](p0) :: (store 1) + ; CHECK: G_STORE [[AND]](s8), [[DEF]](p0) :: (store (s1)) ; CHECK: RET 0 %0(s32) = COPY $edx %1(s1) = G_TRUNC %0(s32) %2(s1) = G_SUB %1, %1 %3:_(p0) = G_IMPLICIT_DEF - G_STORE %2, %3 :: (store 1) + G_STORE %2, %3 :: (store (s1)) RET 0 ... --- diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir index 1d5469127dd5c..e0fcce270e274 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-trunc.mir @@ -22,11 +22,11 @@ body: | ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; X32: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) ; X32: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X32: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store 1) + ; X32: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) ; X32: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X32: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store 1) + ; X32: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) ; X32: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; X32: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store 2) + ; X32: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) ; X32: RET 0 ; X64-LABEL: name: trunc_check ; X64: [[DEF:%[0-9]+]]:_(s32) = IMPLICIT_DEF @@ -34,22 +34,22 @@ body: | ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; X64: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) ; X64: [[AND:%[0-9]+]]:_(s8) = G_AND [[TRUNC]], [[C]] - ; X64: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store 1) + ; X64: G_STORE [[AND]](s8), [[DEF1]](p0) :: (store (s1)) ; X64: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s32) - ; X64: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store 1) + ; X64: G_STORE [[TRUNC1]](s8), [[DEF1]](p0) :: (store (s8)) ; X64: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; X64: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store 2) + ; X64: G_STORE [[TRUNC2]](s16), [[DEF1]](p0) :: (store (s16)) ; X64: RET 0 %0(s32) = IMPLICIT_DEF %1(s1) = G_TRUNC %0(s32) %4:_(p0) = G_IMPLICIT_DEF - G_STORE %1, %4 :: (store 1) + G_STORE %1, %4 :: (store (s1)) %2(s8) = G_TRUNC %0(s32) - G_STORE %2, %4 :: (store 1) + G_STORE %2, %4 :: (store (s8)) %3(s16) = G_TRUNC %0(s32) - G_STORE %3, %4 :: (store 2) + G_STORE %3, %4 :: (store (s16)) RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir index aa40f3b952118..2ae54a2093072 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir @@ -12,38 +12,38 @@ body: | ; X64-LABEL: name: test_implicit_def ; X64: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF ; X64: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; X64: G_STORE [[C]](s8), [[DEF]](p0) :: (store 1) + ; X64: G_STORE [[C]](s8), [[DEF]](p0) :: (store (s1)) ; X64: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; X64: G_STORE [[DEF1]](s8), [[DEF]](p0) :: (store 1) + ; X64: G_STORE [[DEF1]](s8), [[DEF]](p0) :: (store (s8)) ; X64: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; X64: G_STORE [[DEF2]](s16), [[DEF]](p0) :: (store 2) + ; X64: G_STORE [[DEF2]](s16), [[DEF]](p0) :: (store (s16)) ; X64: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; X64: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store 4) + ; X64: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store (s32)) ; X64: [[DEF4:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; X64: G_STORE [[DEF4]](s64), [[DEF]](p0) :: (store 8) + ; X64: G_STORE [[DEF4]](s64), [[DEF]](p0) :: (store (s64)) ; X32-LABEL: name: test_implicit_def ; X32: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF ; X32: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; X32: G_STORE [[C]](s8), [[DEF]](p0) :: (store 1) + ; X32: G_STORE [[C]](s8), [[DEF]](p0) :: (store (s1)) ; X32: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; X32: G_STORE [[DEF1]](s8), [[DEF]](p0) :: (store 1) + ; X32: G_STORE [[DEF1]](s8), [[DEF]](p0) :: (store (s8)) ; X32: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; X32: G_STORE [[DEF2]](s16), [[DEF]](p0) :: (store 2) + ; X32: G_STORE [[DEF2]](s16), [[DEF]](p0) :: (store (s16)) ; X32: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; X32: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store 4) - ; X32: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store 4, align 8) + ; X32: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store (s32)) + ; X32: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store (s32), align 8) ; X32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; X32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C1]](s32) - ; X32: G_STORE [[DEF3]](s32), [[PTR_ADD]](p0) :: (store 4 into unknown-address + 4) + ; X32: G_STORE [[DEF3]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4) %5:_(p0) = G_IMPLICIT_DEF %0:_(s1) = G_IMPLICIT_DEF - G_STORE %0, %5 ::(store 1) + G_STORE %0, %5 ::(store (s1)) %1:_(s8) = G_IMPLICIT_DEF - G_STORE %1, %5 ::(store 1) + G_STORE %1, %5 ::(store (s8)) %2:_(s16) = G_IMPLICIT_DEF - G_STORE %2, %5 ::(store 2) + G_STORE %2, %5 ::(store (s16)) %3:_(s32) = G_IMPLICIT_DEF - G_STORE %3, %5 ::(store 4) + G_STORE %3, %5 ::(store (s32)) %4:_(s64) = G_IMPLICIT_DEF - G_STORE %4, %5 ::(store 8) + G_STORE %4, %5 ::(store (s64)) ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-xor-scalar.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-xor-scalar.mir index 754402492b8b9..d2adaaa37d64f 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-xor-scalar.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-xor-scalar.mir @@ -50,7 +50,7 @@ body: | %1(s1) = G_TRUNC %0(s32) %2(s1) = G_XOR %1, %1 %3:_(p0) = G_IMPLICIT_DEF - G_STORE %2, %3 ::(store 1) + G_STORE %2, %3 ::(store (s1)) RET 0 ... --- diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir index d743b28325cd7..5b00e48453f9c 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir @@ -110,7 +110,7 @@ body: | liveins: $rdi %0(p0) = COPY $rdi - %1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1, align 1) + %1(<8 x s32>) = G_LOAD %0(p0) :: (load (<8 x s32>) from %ir.p1, align 1) $ymm0 = COPY %1(<8 x s32>) RET 0, implicit $ymm0 @@ -133,7 +133,7 @@ body: | %0(<8 x s32>) = COPY $ymm0 %1(p0) = COPY $rdi - G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1, align 1) + G_STORE %0(<8 x s32>), %1(p0) :: (store (<8 x s32>) into %ir.p1, align 1) RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir index e8cd6ae9308c4..6fd49d06203a2 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir @@ -103,7 +103,7 @@ body: | liveins: $rdi %0(p0) = COPY $rdi - %1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 1) + %1(<16 x s32>) = G_LOAD %0(p0) :: (load (<16 x s32>) from %ir.p1, align 1) $zmm0 = COPY %1(<16 x s32>) RET 0, implicit $zmm0 @@ -126,7 +126,7 @@ body: | %0(<16 x s32>) = COPY $zmm0 %1(p0) = COPY $rdi - G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 1) + G_STORE %0(<16 x s32>), %1(p0) :: (store (<16 x s32>) into %ir.p1, align 1) RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir index a5c5d1c1f4c7a..cbc3b57fc7746 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir @@ -966,16 +966,16 @@ body: | ; FAST-LABEL: name: test_load_i8 ; FAST: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; FAST: [[LOAD:%[0-9]+]]:gpr(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.p1) + ; FAST: [[LOAD:%[0-9]+]]:gpr(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.p1) ; FAST: $al = COPY [[LOAD]](s8) ; FAST: RET 0, implicit $al ; GREEDY-LABEL: name: test_load_i8 ; GREEDY: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.p1) + ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.p1) ; GREEDY: $al = COPY [[LOAD]](s8) ; GREEDY: RET 0, implicit $al %0(p0) = COPY $rdi - %1(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1) + %1(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.p1) $al = COPY %1(s8) RET 0, implicit $al @@ -995,16 +995,16 @@ body: | ; FAST-LABEL: name: test_load_i16 ; FAST: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; FAST: [[LOAD:%[0-9]+]]:gpr(s16) = G_LOAD [[COPY]](p0) :: (load 2 from %ir.p1) + ; FAST: [[LOAD:%[0-9]+]]:gpr(s16) = G_LOAD [[COPY]](p0) :: (load (s16) from %ir.p1) ; FAST: $ax = COPY [[LOAD]](s16) ; FAST: RET 0, implicit $ax ; GREEDY-LABEL: name: test_load_i16 ; GREEDY: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s16) = G_LOAD [[COPY]](p0) :: (load 2 from %ir.p1) + ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s16) = G_LOAD [[COPY]](p0) :: (load (s16) from %ir.p1) ; GREEDY: $ax = COPY [[LOAD]](s16) ; GREEDY: RET 0, implicit $ax %0(p0) = COPY $rdi - %1(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1) + %1(s16) = G_LOAD %0(p0) :: (load (s16) from %ir.p1) $ax = COPY %1(s16) RET 0, implicit $ax @@ -1024,16 +1024,16 @@ body: | ; FAST-LABEL: name: test_load_i32 ; FAST: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; FAST: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.p1) + ; FAST: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.p1) ; FAST: $eax = COPY [[LOAD]](s32) ; FAST: RET 0, implicit $eax ; GREEDY-LABEL: name: test_load_i32 ; GREEDY: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.p1) + ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.p1) ; GREEDY: $eax = COPY [[LOAD]](s32) ; GREEDY: RET 0, implicit $eax %0(p0) = COPY $rdi - %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p1) $eax = COPY %1(s32) RET 0, implicit $eax @@ -1054,16 +1054,16 @@ body: | ; FAST-LABEL: name: test_load_i64 ; FAST: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; FAST: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.p1) + ; FAST: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.p1) ; FAST: $rax = COPY [[LOAD]](s64) ; FAST: RET 0, implicit $rax ; GREEDY-LABEL: name: test_load_i64 ; GREEDY: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.p1) + ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.p1) ; GREEDY: $rax = COPY [[LOAD]](s64) ; GREEDY: RET 0, implicit $rax %0(p0) = COPY $rdi - %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %1(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.p1) $rax = COPY %1(s64) RET 0, implicit $rax @@ -1083,20 +1083,20 @@ body: | ; FAST-LABEL: name: test_load_float ; FAST: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; FAST: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.p1) + ; FAST: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.p1) ; FAST: [[COPY1:%[0-9]+]]:vecr(s32) = COPY [[LOAD]](s32) ; FAST: [[ANYEXT:%[0-9]+]]:vecr(s128) = G_ANYEXT [[COPY1]](s32) ; FAST: $xmm0 = COPY [[ANYEXT]](s128) ; FAST: RET 0, implicit $xmm0 ; GREEDY-LABEL: name: test_load_float ; GREEDY: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.p1) + ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.p1) ; GREEDY: [[COPY1:%[0-9]+]]:vecr(s32) = COPY [[LOAD]](s32) ; GREEDY: [[ANYEXT:%[0-9]+]]:vecr(s128) = G_ANYEXT [[COPY1]](s32) ; GREEDY: $xmm0 = COPY [[ANYEXT]](s128) ; GREEDY: RET 0, implicit $xmm0 %0:_(p0) = COPY $rdi - %1:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %1:_(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p1) %2:_(s128) = G_ANYEXT %1(s32) $xmm0 = COPY %2(s128) RET 0, implicit $xmm0 @@ -1117,20 +1117,20 @@ body: | ; FAST-LABEL: name: test_load_double ; FAST: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; FAST: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.p1) + ; FAST: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.p1) ; FAST: [[COPY1:%[0-9]+]]:vecr(s64) = COPY [[LOAD]](s64) ; FAST: [[ANYEXT:%[0-9]+]]:vecr(s128) = G_ANYEXT [[COPY1]](s64) ; FAST: $xmm0 = COPY [[ANYEXT]](s128) ; FAST: RET 0, implicit $xmm0 ; GREEDY-LABEL: name: test_load_double ; GREEDY: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.p1) + ; GREEDY: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.p1) ; GREEDY: [[COPY1:%[0-9]+]]:vecr(s64) = COPY [[LOAD]](s64) ; GREEDY: [[ANYEXT:%[0-9]+]]:vecr(s128) = G_ANYEXT [[COPY1]](s64) ; GREEDY: $xmm0 = COPY [[ANYEXT]](s128) ; GREEDY: RET 0, implicit $xmm0 %0:_(p0) = COPY $rdi - %1:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %1:_(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.p1) %2:_(s128) = G_ANYEXT %1(s64) $xmm0 = COPY %2(s128) RET 0, implicit $xmm0 @@ -1151,16 +1151,16 @@ body: | ; FAST-LABEL: name: test_load_v4i32 ; FAST: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; FAST: [[LOAD:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.p1, align 1) + ; FAST: [[LOAD:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.p1, align 1) ; FAST: $xmm0 = COPY [[LOAD]](<4 x s32>) ; FAST: RET 0, implicit $xmm0 ; GREEDY-LABEL: name: test_load_v4i32 ; GREEDY: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: [[LOAD:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.p1, align 1) + ; GREEDY: [[LOAD:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>) from %ir.p1, align 1) ; GREEDY: $xmm0 = COPY [[LOAD]](<4 x s32>) ; GREEDY: RET 0, implicit $xmm0 %0(p0) = COPY $rdi - %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1, align 1) + %1(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.p1, align 1) $xmm0 = COPY %1(<4 x s32>) RET 0, implicit $xmm0 @@ -1181,18 +1181,18 @@ body: | ; FAST-LABEL: name: test_store_i32 ; FAST: [[COPY:%[0-9]+]]:gpr(s32) = COPY $edi ; FAST: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi - ; FAST: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store 4 into %ir.p1) + ; FAST: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32) into %ir.p1) ; FAST: $rax = COPY [[COPY1]](p0) ; FAST: RET 0, implicit $rax ; GREEDY-LABEL: name: test_store_i32 ; GREEDY: [[COPY:%[0-9]+]]:gpr(s32) = COPY $edi ; GREEDY: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi - ; GREEDY: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store 4 into %ir.p1) + ; GREEDY: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32) into %ir.p1) ; GREEDY: $rax = COPY [[COPY1]](p0) ; GREEDY: RET 0, implicit $rax %0(s32) = COPY $edi %1(p0) = COPY $rsi - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -1213,18 +1213,18 @@ body: | ; FAST-LABEL: name: test_store_i64 ; FAST: [[COPY:%[0-9]+]]:gpr(s64) = COPY $rdi ; FAST: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi - ; FAST: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store 8 into %ir.p1) + ; FAST: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64) into %ir.p1) ; FAST: $rax = COPY [[COPY1]](p0) ; FAST: RET 0, implicit $rax ; GREEDY-LABEL: name: test_store_i64 ; GREEDY: [[COPY:%[0-9]+]]:gpr(s64) = COPY $rdi ; GREEDY: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi - ; GREEDY: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store 8 into %ir.p1) + ; GREEDY: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64) into %ir.p1) ; GREEDY: $rax = COPY [[COPY1]](p0) ; GREEDY: RET 0, implicit $rax %0(s64) = COPY $rdi %1(p0) = COPY $rsi - G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) + G_STORE %0(s64), %1(p0) :: (store (s64) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -1250,20 +1250,20 @@ body: | ; FAST: [[TRUNC:%[0-9]+]]:vecr(s32) = G_TRUNC [[COPY]](s128) ; FAST: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rdi ; FAST: [[COPY2:%[0-9]+]]:gpr(s32) = COPY [[TRUNC]](s32) - ; FAST: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store 4 into %ir.p1) + ; FAST: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s32) into %ir.p1) ; FAST: $rax = COPY [[COPY1]](p0) ; FAST: RET 0, implicit $rax ; GREEDY-LABEL: name: test_store_float ; GREEDY: [[COPY:%[0-9]+]]:vecr(s128) = COPY $xmm0 ; GREEDY: [[TRUNC:%[0-9]+]]:vecr(s32) = G_TRUNC [[COPY]](s128) ; GREEDY: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store 4 into %ir.p1) + ; GREEDY: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32) into %ir.p1) ; GREEDY: $rax = COPY [[COPY1]](p0) ; GREEDY: RET 0, implicit $rax %2:_(s128) = COPY $xmm0 %0:_(s32) = G_TRUNC %2(s128) %1:_(p0) = COPY $rdi - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -1289,20 +1289,20 @@ body: | ; FAST: [[TRUNC:%[0-9]+]]:vecr(s64) = G_TRUNC [[COPY]](s128) ; FAST: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rdi ; FAST: [[COPY2:%[0-9]+]]:gpr(s64) = COPY [[TRUNC]](s64) - ; FAST: G_STORE [[COPY2]](s64), [[COPY1]](p0) :: (store 8 into %ir.p1) + ; FAST: G_STORE [[COPY2]](s64), [[COPY1]](p0) :: (store (s64) into %ir.p1) ; FAST: $rax = COPY [[COPY1]](p0) ; FAST: RET 0, implicit $rax ; GREEDY-LABEL: name: test_store_double ; GREEDY: [[COPY:%[0-9]+]]:vecr(s128) = COPY $xmm0 ; GREEDY: [[TRUNC:%[0-9]+]]:vecr(s64) = G_TRUNC [[COPY]](s128) ; GREEDY: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rdi - ; GREEDY: G_STORE [[TRUNC]](s64), [[COPY1]](p0) :: (store 8 into %ir.p1) + ; GREEDY: G_STORE [[TRUNC]](s64), [[COPY1]](p0) :: (store (s64) into %ir.p1) ; GREEDY: $rax = COPY [[COPY1]](p0) ; GREEDY: RET 0, implicit $rax %2:_(s128) = COPY $xmm0 %0:_(s64) = G_TRUNC %2(s128) %1:_(p0) = COPY $rdi - G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) + G_STORE %0(s64), %1(p0) :: (store (s64) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-GV-32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-GV-32.mir index 348d1cd40a1bc..e28051057104a 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-GV-32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-GV-32.mir @@ -36,12 +36,12 @@ registers: - { id: 1, class: gpr, preferred-register: '' } # X32: %0:gr32 = IMPLICIT_DEF # X32-NEXT: %1:gr32 = LEA32r $noreg, 1, $noreg, @g_int, $noreg -# X32-NEXT: MOV32mr %0, 1, $noreg, 0, $noreg, %1 :: (store 4 into `i32** undef`) +# X32-NEXT: MOV32mr %0, 1, $noreg, 0, $noreg, %1 :: (store (p0) into `i32** undef`) # X32-NEXT: RET 0 # # X32ABI: %0:low32_addr_access = IMPLICIT_DEF # X32ABI-NEXT: %1:gr32 = LEA64_32r $noreg, 1, $noreg, @g_int, $noreg -# X32ABI-NEXT: MOV32mr %0, 1, $noreg, 0, $noreg, %1 :: (store 4 into `i32** undef`) +# X32ABI-NEXT: MOV32mr %0, 1, $noreg, 0, $noreg, %1 :: (store (p0) into `i32** undef`) # X32ABI-NEXT: RET 0 body: | bb.1.entry: @@ -49,7 +49,7 @@ body: | %0(p0) = IMPLICIT_DEF %1(p0) = G_GLOBAL_VALUE @g_int - G_STORE %1(p0), %0(p0) :: (store 4 into `i32** undef`) + G_STORE %1(p0), %0(p0) :: (store (p0) into `i32** undef`) RET 0 ... @@ -66,18 +66,18 @@ registers: - { id: 0, class: gpr, preferred-register: '' } - { id: 1, class: gpr, preferred-register: '' } # X32: %1:gr32 = LEA32r $noreg, 1, $noreg, @g_int, $noreg -# X32-NEXT: %0:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from @g_int) +# X32-NEXT: %0:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load (s32) from @g_int) # X32-NEXT: $eax = COPY %0 # X32-NEXT: RET 0, implicit $eax # # X32ABI: %1:gr32 = LEA64_32r $noreg, 1, $noreg, @g_int, $noreg -# X32ABI-NEXT: %0:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from @g_int) +# X32ABI-NEXT: %0:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load (s32) from @g_int) # X32ABI-NEXT: $eax = COPY %0 # X32ABI-NEXT: RET 0, implicit $eax body: | bb.1.entry: %1(p0) = G_GLOBAL_VALUE @g_int - %0(s32) = G_LOAD %1(p0) :: (load 4 from @g_int) + %0(s32) = G_LOAD %1(p0) :: (load (s32) from @g_int) $eax = COPY %0(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-GV-64.mir b/llvm/test/CodeGen/X86/GlobalISel/select-GV-64.mir index 48c98d72325af..aab28d08ce162 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-GV-64.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-GV-64.mir @@ -33,12 +33,12 @@ registers: - { id: 1, class: gpr, preferred-register: '' } # X64: %0:gr64 = IMPLICIT_DEF # X64-NEXT: %1:gr64 = LEA64r $noreg, 1, $noreg, @g_int, $noreg -# X64-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, %1 :: (store 8 into `i32** undef`) +# X64-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, %1 :: (store (p0) into `i32** undef`) # X64-NEXT: RET 0 # # X64_DARWIN_PIC: %0:gr64 = IMPLICIT_DEF # X64_DARWIN_PIC-NEXT: %1:gr64 = LEA64r $rip, 1, $noreg, @g_int, $noreg -# X64_DARWIN_PIC-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, %1 :: (store 8 into `i32** undef`) +# X64_DARWIN_PIC-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, %1 :: (store (p0) into `i32** undef`) # X64_DARWIN_PIC-NEXT: RET 0 # body: | @@ -47,7 +47,7 @@ body: | %0(p0) = IMPLICIT_DEF %1(p0) = G_GLOBAL_VALUE @g_int - G_STORE %1(p0), %0(p0) :: (store 8 into `i32** undef`) + G_STORE %1(p0), %0(p0) :: (store (p0) into `i32** undef`) RET 0 ... @@ -65,19 +65,19 @@ registers: - { id: 0, class: gpr, preferred-register: '' } - { id: 1, class: gpr, preferred-register: '' } # X64: %1:gr64 = LEA64r $noreg, 1, $noreg, @g_int, $noreg -# X64-NEXT: %0:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from @g_int) +# X64-NEXT: %0:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load (s32) from @g_int) # X64-NEXT: $eax = COPY %0 # X64-NEXT: RET 0, implicit $eax # # X64_DARWIN_PIC: %1:gr64 = LEA64r $rip, 1, $noreg, @g_int, $noreg -# X64_DARWIN_PIC-NEXT: %0:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from @g_int) +# X64_DARWIN_PIC-NEXT: %0:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load (s32) from @g_int) # X64_DARWIN_PIC-NEXT: $eax = COPY %0 # X64_DARWIN_PIC-NEXT: RET 0, implicit $eax # body: | bb.1.entry: %1(p0) = G_GLOBAL_VALUE @g_int - %0(s32) = G_LOAD %1(p0) :: (load 4 from @g_int) + %0(s32) = G_LOAD %1(p0) :: (load (s32) from @g_int) $eax = COPY %0(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-constant.mir b/llvm/test/CodeGen/X86/GlobalISel/select-constant.mir index 2a5bd6017f389..349e107a6379c 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-constant.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-constant.mir @@ -177,11 +177,11 @@ body: | ; CHECK-LABEL: name: main ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; CHECK: [[MOV64ri32_:%[0-9]+]]:gr64 = MOV64ri32 0 - ; CHECK: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[MOV64ri32_]] :: (store 8 into %ir.data) + ; CHECK: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[MOV64ri32_]] :: (store (p0) into %ir.data) ; CHECK: RET 0 %0(p0) = COPY $rdi %1(p0) = G_CONSTANT i64 0 - G_STORE %1(p0), %0(p0) :: (store 8 into %ir.data) + G_STORE %1(p0), %0(p0) :: (store (p0) into %ir.data) RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-fconstant.mir b/llvm/test/CodeGen/X86/GlobalISel/select-fconstant.mir index dbe2f00964b7d..dbb2476b47816 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-fconstant.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-fconstant.mir @@ -35,7 +35,7 @@ body: | ; CHECK_NOPIC64: RET 0, implicit $xmm0 ; CHECK_LARGE64-LABEL: name: test_float ; CHECK_LARGE64: [[MOV64ri:%[0-9]+]]:gr64 = MOV64ri %const.0 - ; CHECK_LARGE64: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt [[MOV64ri]], 1, $noreg, 0, $noreg :: (load 8 from constant-pool, align 4) + ; CHECK_LARGE64: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt [[MOV64ri]], 1, $noreg, 0, $noreg :: (load (s64) from constant-pool, align 4) ; CHECK_LARGE64: [[COPY:%[0-9]+]]:vr128 = COPY [[MOVSSrm_alt]] ; CHECK_LARGE64: $xmm0 = COPY [[COPY]] ; CHECK_LARGE64: RET 0, implicit $xmm0 @@ -82,7 +82,7 @@ body: | ; CHECK_NOPIC64: RET 0, implicit $xmm0 ; CHECK_LARGE64-LABEL: name: test_double ; CHECK_LARGE64: [[MOV64ri:%[0-9]+]]:gr64 = MOV64ri %const.0 - ; CHECK_LARGE64: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt [[MOV64ri]], 1, $noreg, 0, $noreg :: (load 8 from constant-pool) + ; CHECK_LARGE64: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt [[MOV64ri]], 1, $noreg, 0, $noreg :: (load (s64) from constant-pool) ; CHECK_LARGE64: [[COPY:%[0-9]+]]:vr128 = COPY [[MOVSDrm_alt]] ; CHECK_LARGE64: $xmm0 = COPY [[COPY]] ; CHECK_LARGE64: RET 0, implicit $xmm0 diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar-unordered.mir b/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar-unordered.mir index ca535326b9f32..9a06796429570 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar-unordered.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar-unordered.mir @@ -113,26 +113,26 @@ body: | ; SSE-LABEL: name: test_load_i8 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 1 from %ir.p1) + ; SSE: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s8) from %ir.p1) ; SSE: $al = COPY [[MOV8rm]] ; SSE: RET 0, implicit $al ; AVX-LABEL: name: test_load_i8 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 1 from %ir.p1) + ; AVX: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s8) from %ir.p1) ; AVX: $al = COPY [[MOV8rm]] ; AVX: RET 0, implicit $al ; AVX512F-LABEL: name: test_load_i8 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 1 from %ir.p1) + ; AVX512F: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s8) from %ir.p1) ; AVX512F: $al = COPY [[MOV8rm]] ; AVX512F: RET 0, implicit $al ; AVX512VL-LABEL: name: test_load_i8 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 1 from %ir.p1) + ; AVX512VL: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s8) from %ir.p1) ; AVX512VL: $al = COPY [[MOV8rm]] ; AVX512VL: RET 0, implicit $al %0(p0) = COPY $rdi - %1(s8) = G_LOAD %0(p0) :: (load unordered 1 from %ir.p1) + %1(s8) = G_LOAD %0(p0) :: (load unordered (s8) from %ir.p1) $al = COPY %1(s8) RET 0, implicit $al @@ -151,26 +151,26 @@ body: | ; SSE-LABEL: name: test_load_i16 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 2 from %ir.p1) + ; SSE: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s16) from %ir.p1) ; SSE: $ax = COPY [[MOV16rm]] ; SSE: RET 0, implicit $ax ; AVX-LABEL: name: test_load_i16 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 2 from %ir.p1) + ; AVX: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s16) from %ir.p1) ; AVX: $ax = COPY [[MOV16rm]] ; AVX: RET 0, implicit $ax ; AVX512F-LABEL: name: test_load_i16 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 2 from %ir.p1) + ; AVX512F: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s16) from %ir.p1) ; AVX512F: $ax = COPY [[MOV16rm]] ; AVX512F: RET 0, implicit $ax ; AVX512VL-LABEL: name: test_load_i16 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 2 from %ir.p1) + ; AVX512VL: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s16) from %ir.p1) ; AVX512VL: $ax = COPY [[MOV16rm]] ; AVX512VL: RET 0, implicit $ax %0(p0) = COPY $rdi - %1(s16) = G_LOAD %0(p0) :: (load unordered 2 from %ir.p1) + %1(s16) = G_LOAD %0(p0) :: (load unordered (s16) from %ir.p1) $ax = COPY %1(s16) RET 0, implicit $ax @@ -189,26 +189,26 @@ body: | ; SSE-LABEL: name: test_load_i32 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; SSE: $eax = COPY [[MOV32rm]] ; SSE: RET 0, implicit $eax ; AVX-LABEL: name: test_load_i32 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX: $eax = COPY [[MOV32rm]] ; AVX: RET 0, implicit $eax ; AVX512F-LABEL: name: test_load_i32 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX512F: $eax = COPY [[MOV32rm]] ; AVX512F: RET 0, implicit $eax ; AVX512VL-LABEL: name: test_load_i32 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX512VL: $eax = COPY [[MOV32rm]] ; AVX512VL: RET 0, implicit $eax %0(p0) = COPY $rdi - %1(s32) = G_LOAD %0(p0) :: (load unordered 4 from %ir.p1) + %1(s32) = G_LOAD %0(p0) :: (load unordered (s32) from %ir.p1) $eax = COPY %1(s32) RET 0, implicit $eax @@ -227,26 +227,26 @@ body: | ; SSE-LABEL: name: test_load_i64 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; SSE: $rax = COPY [[MOV64rm]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_load_i64 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX: $rax = COPY [[MOV64rm]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_load_i64 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX512F: $rax = COPY [[MOV64rm]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_load_i64 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX512VL: $rax = COPY [[MOV64rm]] ; AVX512VL: RET 0, implicit $rax %0(p0) = COPY $rdi - %1(s64) = G_LOAD %0(p0) :: (load unordered 8 from %ir.p1) + %1(s64) = G_LOAD %0(p0) :: (load unordered (s64) from %ir.p1) $rax = COPY %1(s64) RET 0, implicit $rax @@ -267,34 +267,34 @@ body: | ; SSE-LABEL: name: test_load_float ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; SSE: [[COPY1:%[0-9]+]]:fr32 = COPY [[MOV32rm]] ; SSE: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; SSE: $xmm0 = COPY [[COPY2]] ; SSE: RET 0, implicit $xmm0 ; AVX-LABEL: name: test_load_float ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX: [[COPY1:%[0-9]+]]:fr32 = COPY [[MOV32rm]] ; AVX: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; AVX: $xmm0 = COPY [[COPY2]] ; AVX: RET 0, implicit $xmm0 ; AVX512F-LABEL: name: test_load_float ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX512F: [[COPY1:%[0-9]+]]:fr32x = COPY [[MOV32rm]] ; AVX512F: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512F: $xmm0 = COPY [[COPY2]] ; AVX512F: RET 0, implicit $xmm0 ; AVX512VL-LABEL: name: test_load_float ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX512VL: [[COPY1:%[0-9]+]]:fr32x = COPY [[MOV32rm]] ; AVX512VL: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512VL: $xmm0 = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $xmm0 %0:gpr(p0) = COPY $rdi - %1:gpr(s32) = G_LOAD %0(p0) :: (load unordered 4 from %ir.p1) + %1:gpr(s32) = G_LOAD %0(p0) :: (load unordered (s32) from %ir.p1) %3:vecr(s32) = COPY %1(s32) %2:vecr(s128) = G_ANYEXT %3(s32) $xmm0 = COPY %2(s128) @@ -317,34 +317,34 @@ body: | ; SSE-LABEL: name: test_load_float_vecreg ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; SSE: [[COPY1:%[0-9]+]]:fr32 = COPY [[MOV32rm]] ; SSE: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; SSE: $xmm0 = COPY [[COPY2]] ; SSE: RET 0, implicit $xmm0 ; AVX-LABEL: name: test_load_float_vecreg ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX: [[COPY1:%[0-9]+]]:fr32 = COPY [[MOV32rm]] ; AVX: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; AVX: $xmm0 = COPY [[COPY2]] ; AVX: RET 0, implicit $xmm0 ; AVX512F-LABEL: name: test_load_float_vecreg ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX512F: [[COPY1:%[0-9]+]]:fr32x = COPY [[MOV32rm]] ; AVX512F: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512F: $xmm0 = COPY [[COPY2]] ; AVX512F: RET 0, implicit $xmm0 ; AVX512VL-LABEL: name: test_load_float_vecreg ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.p1) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.p1) ; AVX512VL: [[COPY1:%[0-9]+]]:fr32x = COPY [[MOV32rm]] ; AVX512VL: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512VL: $xmm0 = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $xmm0 %0:gpr(p0) = COPY $rdi - %1:gpr(s32) = G_LOAD %0(p0) :: (load unordered 4 from %ir.p1) + %1:gpr(s32) = G_LOAD %0(p0) :: (load unordered (s32) from %ir.p1) %3:vecr(s32) = COPY %1(s32) %2:vecr(s128) = G_ANYEXT %3(s32) $xmm0 = COPY %2(s128) @@ -367,34 +367,34 @@ body: | ; SSE-LABEL: name: test_load_double ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; SSE: [[COPY1:%[0-9]+]]:fr64 = COPY [[MOV64rm]] ; SSE: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; SSE: $xmm0 = COPY [[COPY2]] ; SSE: RET 0, implicit $xmm0 ; AVX-LABEL: name: test_load_double ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX: [[COPY1:%[0-9]+]]:fr64 = COPY [[MOV64rm]] ; AVX: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; AVX: $xmm0 = COPY [[COPY2]] ; AVX: RET 0, implicit $xmm0 ; AVX512F-LABEL: name: test_load_double ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX512F: [[COPY1:%[0-9]+]]:fr64x = COPY [[MOV64rm]] ; AVX512F: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512F: $xmm0 = COPY [[COPY2]] ; AVX512F: RET 0, implicit $xmm0 ; AVX512VL-LABEL: name: test_load_double ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX512VL: [[COPY1:%[0-9]+]]:fr64x = COPY [[MOV64rm]] ; AVX512VL: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512VL: $xmm0 = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $xmm0 %0:gpr(p0) = COPY $rdi - %1:gpr(s64) = G_LOAD %0(p0) :: (load unordered 8 from %ir.p1) + %1:gpr(s64) = G_LOAD %0(p0) :: (load unordered (s64) from %ir.p1) %3:vecr(s64) = COPY %1(s64) %2:vecr(s128) = G_ANYEXT %3(s64) $xmm0 = COPY %2(s128) @@ -417,34 +417,34 @@ body: | ; SSE-LABEL: name: test_load_double_vecreg ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; SSE: [[COPY1:%[0-9]+]]:fr64 = COPY [[MOV64rm]] ; SSE: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; SSE: $xmm0 = COPY [[COPY2]] ; SSE: RET 0, implicit $xmm0 ; AVX-LABEL: name: test_load_double_vecreg ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX: [[COPY1:%[0-9]+]]:fr64 = COPY [[MOV64rm]] ; AVX: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; AVX: $xmm0 = COPY [[COPY2]] ; AVX: RET 0, implicit $xmm0 ; AVX512F-LABEL: name: test_load_double_vecreg ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX512F: [[COPY1:%[0-9]+]]:fr64x = COPY [[MOV64rm]] ; AVX512F: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512F: $xmm0 = COPY [[COPY2]] ; AVX512F: RET 0, implicit $xmm0 ; AVX512VL-LABEL: name: test_load_double_vecreg ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.p1) + ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.p1) ; AVX512VL: [[COPY1:%[0-9]+]]:fr64x = COPY [[MOV64rm]] ; AVX512VL: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512VL: $xmm0 = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $xmm0 %0:gpr(p0) = COPY $rdi - %1:gpr(s64) = G_LOAD %0(p0) :: (load unordered 8 from %ir.p1) + %1:gpr(s64) = G_LOAD %0(p0) :: (load unordered (s64) from %ir.p1) %3:vecr(s64) = COPY %1(s64) %2:vecr(s128) = G_ANYEXT %3(s64) $xmm0 = COPY %2(s128) @@ -466,30 +466,30 @@ body: | ; SSE-LABEL: name: test_store_i32 ; SSE: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; SSE: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; SSE: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered 4 into %ir.p1) + ; SSE: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered (s32) into %ir.p1) ; SSE: $rax = COPY [[COPY1]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_i32 ; AVX: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; AVX: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered 4 into %ir.p1) + ; AVX: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered (s32) into %ir.p1) ; AVX: $rax = COPY [[COPY1]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_i32 ; AVX512F: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; AVX512F: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512F: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered 4 into %ir.p1) + ; AVX512F: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered (s32) into %ir.p1) ; AVX512F: $rax = COPY [[COPY1]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_i32 ; AVX512VL: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; AVX512VL: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512VL: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered 4 into %ir.p1) + ; AVX512VL: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered (s32) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY1]] ; AVX512VL: RET 0, implicit $rax %0(s32) = COPY $edi %1(p0) = COPY $rsi - G_STORE %0(s32), %1(p0) :: (store unordered 4 into %ir.p1) + G_STORE %0(s32), %1(p0) :: (store unordered (s32) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -509,30 +509,30 @@ body: | ; SSE-LABEL: name: test_store_i64 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; SSE: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered 8 into %ir.p1) + ; SSE: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered (s64) into %ir.p1) ; SSE: $rax = COPY [[COPY1]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_i64 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered 8 into %ir.p1) + ; AVX: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered (s64) into %ir.p1) ; AVX: $rax = COPY [[COPY1]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_i64 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512F: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered 8 into %ir.p1) + ; AVX512F: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered (s64) into %ir.p1) ; AVX512F: $rax = COPY [[COPY1]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_i64 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512VL: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered 8 into %ir.p1) + ; AVX512VL: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store unordered (s64) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY1]] ; AVX512VL: RET 0, implicit $rax %0(s64) = COPY $rdi %1(p0) = COPY $rsi - G_STORE %0(s64), %1(p0) :: (store unordered 8 into %ir.p1) + G_STORE %0(s64), %1(p0) :: (store unordered (s64) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -556,7 +556,7 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:fr32 = COPY [[COPY]] ; SSE: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; SSE: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 4 into %ir.p1) + ; SSE: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s32) into %ir.p1) ; SSE: $rax = COPY [[COPY2]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_float @@ -564,7 +564,7 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:fr32 = COPY [[COPY]] ; AVX: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 4 into %ir.p1) + ; AVX: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s32) into %ir.p1) ; AVX: $rax = COPY [[COPY2]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_float @@ -572,7 +572,7 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:fr32x = COPY [[COPY]] ; AVX512F: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX512F: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 4 into %ir.p1) + ; AVX512F: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s32) into %ir.p1) ; AVX512F: $rax = COPY [[COPY2]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_float @@ -580,14 +580,14 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:fr32x = COPY [[COPY]] ; AVX512VL: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX512VL: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 4 into %ir.p1) + ; AVX512VL: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s32) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $rax %2:vecr(s128) = COPY $xmm0 %0:vecr(s32) = G_TRUNC %2(s128) %1:gpr(p0) = COPY $rdi %3:gpr(s32) = COPY %0(s32) - G_STORE %3(s32), %1(p0) :: (store unordered 4 into %ir.p1) + G_STORE %3(s32), %1(p0) :: (store unordered (s32) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -611,7 +611,7 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:fr32 = COPY [[COPY]] ; SSE: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; SSE: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 4 into %ir.p1) + ; SSE: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s32) into %ir.p1) ; SSE: $rax = COPY [[COPY2]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_float_vec @@ -619,7 +619,7 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:fr32 = COPY [[COPY]] ; AVX: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 4 into %ir.p1) + ; AVX: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s32) into %ir.p1) ; AVX: $rax = COPY [[COPY2]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_float_vec @@ -627,7 +627,7 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:fr32x = COPY [[COPY]] ; AVX512F: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX512F: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 4 into %ir.p1) + ; AVX512F: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s32) into %ir.p1) ; AVX512F: $rax = COPY [[COPY2]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_float_vec @@ -635,14 +635,14 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:fr32x = COPY [[COPY]] ; AVX512VL: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX512VL: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 4 into %ir.p1) + ; AVX512VL: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s32) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $rax %2:vecr(s128) = COPY $xmm0 %0:vecr(s32) = G_TRUNC %2(s128) %1:gpr(p0) = COPY $rdi %3:gpr(s32) = COPY %0(s32) - G_STORE %3(s32), %1(p0) :: (store unordered 4 into %ir.p1) + G_STORE %3(s32), %1(p0) :: (store unordered (s32) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -667,7 +667,7 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:fr64 = COPY [[COPY]] ; SSE: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; SSE: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 8 into %ir.p1) + ; SSE: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s64) into %ir.p1) ; SSE: $rax = COPY [[COPY2]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_double @@ -675,7 +675,7 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:fr64 = COPY [[COPY]] ; AVX: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 8 into %ir.p1) + ; AVX: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s64) into %ir.p1) ; AVX: $rax = COPY [[COPY2]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_double @@ -683,7 +683,7 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:fr64x = COPY [[COPY]] ; AVX512F: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX512F: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 8 into %ir.p1) + ; AVX512F: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s64) into %ir.p1) ; AVX512F: $rax = COPY [[COPY2]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_double @@ -691,14 +691,14 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:fr64x = COPY [[COPY]] ; AVX512VL: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX512VL: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 8 into %ir.p1) + ; AVX512VL: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s64) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $rax %2:vecr(s128) = COPY $xmm0 %0:vecr(s64) = G_TRUNC %2(s128) %1:gpr(p0) = COPY $rdi %3:gpr(s64) = COPY %0(s64) - G_STORE %3(s64), %1(p0) :: (store unordered 8 into %ir.p1) + G_STORE %3(s64), %1(p0) :: (store unordered (s64) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -722,7 +722,7 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:fr64 = COPY [[COPY]] ; SSE: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; SSE: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 8 into %ir.p1) + ; SSE: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s64) into %ir.p1) ; SSE: $rax = COPY [[COPY2]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_double_vec @@ -730,7 +730,7 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:fr64 = COPY [[COPY]] ; AVX: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 8 into %ir.p1) + ; AVX: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s64) into %ir.p1) ; AVX: $rax = COPY [[COPY2]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_double_vec @@ -738,7 +738,7 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:fr64x = COPY [[COPY]] ; AVX512F: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX512F: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 8 into %ir.p1) + ; AVX512F: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s64) into %ir.p1) ; AVX512F: $rax = COPY [[COPY2]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_double_vec @@ -746,14 +746,14 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:fr64x = COPY [[COPY]] ; AVX512VL: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX512VL: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered 8 into %ir.p1) + ; AVX512VL: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store unordered (s64) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $rax %2:vecr(s128) = COPY $xmm0 %0:vecr(s64) = G_TRUNC %2(s128) %1:gpr(p0) = COPY $rdi %3:gpr(s64) = COPY %0(s64) - G_STORE %3(s64), %1(p0) :: (store unordered 8 into %ir.p1) + G_STORE %3(s64), %1(p0) :: (store unordered (s64) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -773,26 +773,26 @@ body: | ; SSE-LABEL: name: test_load_ptr ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.ptr1) + ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (p0) from %ir.ptr1) ; SSE: $rax = COPY [[MOV64rm]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_load_ptr ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.ptr1) + ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (p0) from %ir.ptr1) ; AVX: $rax = COPY [[MOV64rm]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_load_ptr ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.ptr1) + ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (p0) from %ir.ptr1) ; AVX512F: $rax = COPY [[MOV64rm]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_load_ptr ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.ptr1) + ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load unordered (p0) from %ir.ptr1) ; AVX512VL: $rax = COPY [[MOV64rm]] ; AVX512VL: RET 0, implicit $rax %0(p0) = COPY $rdi - %1(p0) = G_LOAD %0(p0) :: (load unordered 8 from %ir.ptr1) + %1(p0) = G_LOAD %0(p0) :: (load unordered (p0) from %ir.ptr1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -813,26 +813,26 @@ body: | ; SSE-LABEL: name: test_store_ptr ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; SSE: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered 8 into %ir.ptr1) + ; SSE: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered (p0) into %ir.ptr1) ; SSE: RET 0 ; AVX-LABEL: name: test_store_ptr ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered 8 into %ir.ptr1) + ; AVX: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered (p0) into %ir.ptr1) ; AVX: RET 0 ; AVX512F-LABEL: name: test_store_ptr ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512F: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered 8 into %ir.ptr1) + ; AVX512F: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered (p0) into %ir.ptr1) ; AVX512F: RET 0 ; AVX512VL-LABEL: name: test_store_ptr ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512VL: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered 8 into %ir.ptr1) + ; AVX512VL: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered (p0) into %ir.ptr1) ; AVX512VL: RET 0 %0(p0) = COPY $rdi %1(p0) = COPY $rsi - G_STORE %1(p0), %0(p0) :: (store unordered 8 into %ir.ptr1) + G_STORE %1(p0), %0(p0) :: (store unordered (p0) into %ir.ptr1) RET 0 ... @@ -854,37 +854,37 @@ body: | ; SSE-LABEL: name: test_gep_folding ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; SSE: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store unordered 4 into %ir.arrayidx) - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load unordered 4 from %ir.arrayidx) + ; SSE: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store unordered (s32) into %ir.arrayidx) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load unordered (s32) from %ir.arrayidx) ; SSE: $eax = COPY [[MOV32rm]] ; SSE: RET 0, implicit $eax ; AVX-LABEL: name: test_gep_folding ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; AVX: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store unordered 4 into %ir.arrayidx) - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load unordered 4 from %ir.arrayidx) + ; AVX: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store unordered (s32) into %ir.arrayidx) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load unordered (s32) from %ir.arrayidx) ; AVX: $eax = COPY [[MOV32rm]] ; AVX: RET 0, implicit $eax ; AVX512F-LABEL: name: test_gep_folding ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; AVX512F: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store unordered 4 into %ir.arrayidx) - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load unordered 4 from %ir.arrayidx) + ; AVX512F: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store unordered (s32) into %ir.arrayidx) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load unordered (s32) from %ir.arrayidx) ; AVX512F: $eax = COPY [[MOV32rm]] ; AVX512F: RET 0, implicit $eax ; AVX512VL-LABEL: name: test_gep_folding ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; AVX512VL: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store unordered 4 into %ir.arrayidx) - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load unordered 4 from %ir.arrayidx) + ; AVX512VL: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store unordered (s32) into %ir.arrayidx) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load unordered (s32) from %ir.arrayidx) ; AVX512VL: $eax = COPY [[MOV32rm]] ; AVX512VL: RET 0, implicit $eax %0(p0) = COPY $rdi %1(s32) = COPY $esi %2(s64) = G_CONSTANT i64 20 %3(p0) = G_PTR_ADD %0, %2(s64) - G_STORE %1(s32), %3(p0) :: (store unordered 4 into %ir.arrayidx) - %4(s32) = G_LOAD %3(p0) :: (load unordered 4 from %ir.arrayidx) + G_STORE %1(s32), %3(p0) :: (store unordered (s32) into %ir.arrayidx) + %4(s32) = G_LOAD %3(p0) :: (load unordered (s32) from %ir.arrayidx) $eax = COPY %4(s32) RET 0, implicit $eax @@ -909,8 +909,8 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:gr32 = COPY $esi ; SSE: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 228719476720 ; SSE: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 1, [[MOV64ri]], 0, $noreg - ; SSE: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered 4 into %ir.arrayidx) - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.arrayidx) + ; SSE: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered (s32) into %ir.arrayidx) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.arrayidx) ; SSE: $eax = COPY [[MOV32rm]] ; SSE: RET 0, implicit $eax ; AVX-LABEL: name: test_gep_folding_largeGepIndex @@ -918,8 +918,8 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:gr32 = COPY $esi ; AVX: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 228719476720 ; AVX: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 1, [[MOV64ri]], 0, $noreg - ; AVX: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered 4 into %ir.arrayidx) - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.arrayidx) + ; AVX: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered (s32) into %ir.arrayidx) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.arrayidx) ; AVX: $eax = COPY [[MOV32rm]] ; AVX: RET 0, implicit $eax ; AVX512F-LABEL: name: test_gep_folding_largeGepIndex @@ -927,8 +927,8 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:gr32 = COPY $esi ; AVX512F: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 228719476720 ; AVX512F: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 1, [[MOV64ri]], 0, $noreg - ; AVX512F: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered 4 into %ir.arrayidx) - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.arrayidx) + ; AVX512F: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered (s32) into %ir.arrayidx) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.arrayidx) ; AVX512F: $eax = COPY [[MOV32rm]] ; AVX512F: RET 0, implicit $eax ; AVX512VL-LABEL: name: test_gep_folding_largeGepIndex @@ -936,16 +936,16 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:gr32 = COPY $esi ; AVX512VL: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 228719476720 ; AVX512VL: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 1, [[MOV64ri]], 0, $noreg - ; AVX512VL: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered 4 into %ir.arrayidx) - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.arrayidx) + ; AVX512VL: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store unordered (s32) into %ir.arrayidx) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.arrayidx) ; AVX512VL: $eax = COPY [[MOV32rm]] ; AVX512VL: RET 0, implicit $eax %0(p0) = COPY $rdi %1(s32) = COPY $esi %2(s64) = G_CONSTANT i64 228719476720 %3(p0) = G_PTR_ADD %0, %2(s64) - G_STORE %1(s32), %3(p0) :: (store unordered 4 into %ir.arrayidx) - %4(s32) = G_LOAD %3(p0) :: (load unordered 4 from %ir.arrayidx) + G_STORE %1(s32), %3(p0) :: (store unordered (s32) into %ir.arrayidx) + %4(s32) = G_LOAD %3(p0) :: (load unordered (s32) from %ir.arrayidx) $eax = COPY %4(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir b/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir index 78be9d573ea2d..ea7bf09b06674 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir @@ -57,13 +57,13 @@ fixedStack: body: | bb.1 (%ir-block.0): ; ALL-LABEL: name: test_load_i8 - ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) - ; ALL: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load 1 from %ir.p1) + ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) + ; ALL: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.p1) ; ALL: $al = COPY [[MOV8rm]] ; ALL: RET 0, implicit $al %1(p0) = G_FRAME_INDEX %fixed-stack.0 - %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) - %2(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1) + %0(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) + %2(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.p1) $al = COPY %2(s8) RET 0, implicit $al @@ -82,13 +82,13 @@ fixedStack: body: | bb.1 (%ir-block.0): ; ALL-LABEL: name: test_load_i16 - ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) - ; ALL: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load 2 from %ir.p1) + ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) + ; ALL: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load (s16) from %ir.p1) ; ALL: $ax = COPY [[MOV16rm]] ; ALL: RET 0, implicit $ax %1(p0) = G_FRAME_INDEX %fixed-stack.0 - %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) - %2(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1) + %0(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) + %2(s16) = G_LOAD %0(p0) :: (load (s16) from %ir.p1) $ax = COPY %2(s16) RET 0, implicit $ax @@ -107,13 +107,13 @@ fixedStack: body: | bb.1 (%ir-block.0): ; ALL-LABEL: name: test_load_i32 - ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) - ; ALL: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) + ; ALL: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; ALL: $eax = COPY [[MOV32rm1]] ; ALL: RET 0, implicit $eax %1(p0) = G_FRAME_INDEX %fixed-stack.0 - %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) - %2(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %0(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) + %2(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p1) $eax = COPY %2(s32) RET 0, implicit $eax @@ -134,16 +134,16 @@ fixedStack: body: | bb.1 (%ir-block.0): ; ALL-LABEL: name: test_store_i8 - ; ALL: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 1 from %fixed-stack.0, align 16) - ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.1) - ; ALL: MOV8mr [[MOV32rm]], 1, $noreg, 0, $noreg, [[MOV8rm]] :: (store 1 into %ir.p1) + ; ALL: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) + ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.1) + ; ALL: MOV8mr [[MOV32rm]], 1, $noreg, 0, $noreg, [[MOV8rm]] :: (store (s8) into %ir.p1) ; ALL: $eax = COPY [[MOV32rm]] ; ALL: RET 0, implicit $eax %2(p0) = G_FRAME_INDEX %fixed-stack.1 - %0(s8) = G_LOAD %2(p0) :: (invariant load 1 from %fixed-stack.1, align 16) + %0(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) %3(p0) = G_FRAME_INDEX %fixed-stack.0 - %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 4) - G_STORE %0(s8), %1(p0) :: (store 1 into %ir.p1) + %1(p0) = G_LOAD %3(p0) :: (invariant load (p0) from %fixed-stack.0, align 4) + G_STORE %0(s8), %1(p0) :: (store (s8) into %ir.p1) $eax = COPY %1(p0) RET 0, implicit $eax @@ -164,16 +164,16 @@ fixedStack: body: | bb.1 (%ir-block.0): ; ALL-LABEL: name: test_store_i16 - ; ALL: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 2 from %fixed-stack.0, align 16) - ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.1) - ; ALL: MOV16mr [[MOV32rm]], 1, $noreg, 0, $noreg, [[MOV16rm]] :: (store 2 into %ir.p1) + ; ALL: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) + ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.1) + ; ALL: MOV16mr [[MOV32rm]], 1, $noreg, 0, $noreg, [[MOV16rm]] :: (store (s16) into %ir.p1) ; ALL: $eax = COPY [[MOV32rm]] ; ALL: RET 0, implicit $eax %2(p0) = G_FRAME_INDEX %fixed-stack.1 - %0(s16) = G_LOAD %2(p0) :: (invariant load 2 from %fixed-stack.1, align 16) + %0(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) %3(p0) = G_FRAME_INDEX %fixed-stack.0 - %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 4) - G_STORE %0(s16), %1(p0) :: (store 2 into %ir.p1) + %1(p0) = G_LOAD %3(p0) :: (invariant load (p0) from %fixed-stack.0, align 4) + G_STORE %0(s16), %1(p0) :: (store (s16) into %ir.p1) $eax = COPY %1(p0) RET 0, implicit $eax @@ -194,16 +194,16 @@ fixedStack: body: | bb.1 (%ir-block.0): ; ALL-LABEL: name: test_store_i32 - ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) - ; ALL: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.1) - ; ALL: MOV32mr [[MOV32rm1]], 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store 4 into %ir.p1) + ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) + ; ALL: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.1) + ; ALL: MOV32mr [[MOV32rm1]], 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %ir.p1) ; ALL: $eax = COPY [[MOV32rm1]] ; ALL: RET 0, implicit $eax %2(p0) = G_FRAME_INDEX %fixed-stack.1 - %0(s32) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 16) + %0(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) %3(p0) = G_FRAME_INDEX %fixed-stack.0 - %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 4) - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + %1(p0) = G_LOAD %3(p0) :: (invariant load (p0) from %fixed-stack.0, align 4) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.p1) $eax = COPY %1(p0) RET 0, implicit $eax @@ -222,13 +222,13 @@ fixedStack: body: | bb.1 (%ir-block.0): ; ALL-LABEL: name: test_load_ptr - ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) - ; ALL: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load 4 from %ir.ptr1) + ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) + ; ALL: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load (p0) from %ir.ptr1) ; ALL: $eax = COPY [[MOV32rm1]] ; ALL: RET 0, implicit $eax %1(p0) = G_FRAME_INDEX %fixed-stack.0 - %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) - %2(p0) = G_LOAD %0(p0) :: (load 4 from %ir.ptr1) + %0(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) + %2(p0) = G_LOAD %0(p0) :: (load (p0) from %ir.ptr1) $eax = COPY %2(p0) RET 0, implicit $eax @@ -249,15 +249,15 @@ fixedStack: body: | bb.1 (%ir-block.0): ; ALL-LABEL: name: test_store_ptr - ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) - ; ALL: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.1) - ; ALL: MOV32mr [[MOV32rm]], 1, $noreg, 0, $noreg, [[MOV32rm1]] :: (store 4 into %ir.ptr1) + ; ALL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) + ; ALL: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.1) + ; ALL: MOV32mr [[MOV32rm]], 1, $noreg, 0, $noreg, [[MOV32rm1]] :: (store (p0) into %ir.ptr1) ; ALL: RET 0 %2(p0) = G_FRAME_INDEX %fixed-stack.1 - %0(p0) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 16) + %0(p0) = G_LOAD %2(p0) :: (invariant load (p0) from %fixed-stack.1, align 16) %3(p0) = G_FRAME_INDEX %fixed-stack.0 - %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 4) - G_STORE %1(p0), %0(p0) :: (store 4 into %ir.ptr1) + %1(p0) = G_LOAD %3(p0) :: (invariant load (p0) from %fixed-stack.0, align 4) + G_STORE %1(p0), %0(p0) :: (store (p0) into %ir.ptr1) RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir b/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir index 8fcd411402ba6..576bcadf163f0 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir @@ -113,26 +113,26 @@ body: | ; SSE-LABEL: name: test_load_i8 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load 1 from %ir.p1) + ; SSE: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.p1) ; SSE: $al = COPY [[MOV8rm]] ; SSE: RET 0, implicit $al ; AVX-LABEL: name: test_load_i8 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load 1 from %ir.p1) + ; AVX: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.p1) ; AVX: $al = COPY [[MOV8rm]] ; AVX: RET 0, implicit $al ; AVX512F-LABEL: name: test_load_i8 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load 1 from %ir.p1) + ; AVX512F: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.p1) ; AVX512F: $al = COPY [[MOV8rm]] ; AVX512F: RET 0, implicit $al ; AVX512VL-LABEL: name: test_load_i8 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load 1 from %ir.p1) + ; AVX512VL: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.p1) ; AVX512VL: $al = COPY [[MOV8rm]] ; AVX512VL: RET 0, implicit $al %0(p0) = COPY $rdi - %1(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1) + %1(s8) = G_LOAD %0(p0) :: (load (s8) from %ir.p1) $al = COPY %1(s8) RET 0, implicit $al @@ -151,26 +151,26 @@ body: | ; SSE-LABEL: name: test_load_i16 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load 2 from %ir.p1) + ; SSE: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s16) from %ir.p1) ; SSE: $ax = COPY [[MOV16rm]] ; SSE: RET 0, implicit $ax ; AVX-LABEL: name: test_load_i16 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load 2 from %ir.p1) + ; AVX: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s16) from %ir.p1) ; AVX: $ax = COPY [[MOV16rm]] ; AVX: RET 0, implicit $ax ; AVX512F-LABEL: name: test_load_i16 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load 2 from %ir.p1) + ; AVX512F: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s16) from %ir.p1) ; AVX512F: $ax = COPY [[MOV16rm]] ; AVX512F: RET 0, implicit $ax ; AVX512VL-LABEL: name: test_load_i16 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load 2 from %ir.p1) + ; AVX512VL: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s16) from %ir.p1) ; AVX512VL: $ax = COPY [[MOV16rm]] ; AVX512VL: RET 0, implicit $ax %0(p0) = COPY $rdi - %1(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1) + %1(s16) = G_LOAD %0(p0) :: (load (s16) from %ir.p1) $ax = COPY %1(s16) RET 0, implicit $ax @@ -189,26 +189,26 @@ body: | ; SSE-LABEL: name: test_load_i32 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; SSE: $eax = COPY [[MOV32rm]] ; SSE: RET 0, implicit $eax ; AVX-LABEL: name: test_load_i32 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX: $eax = COPY [[MOV32rm]] ; AVX: RET 0, implicit $eax ; AVX512F-LABEL: name: test_load_i32 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX512F: $eax = COPY [[MOV32rm]] ; AVX512F: RET 0, implicit $eax ; AVX512VL-LABEL: name: test_load_i32 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX512VL: $eax = COPY [[MOV32rm]] ; AVX512VL: RET 0, implicit $eax %0(p0) = COPY $rdi - %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %1(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p1) $eax = COPY %1(s32) RET 0, implicit $eax @@ -227,26 +227,26 @@ body: | ; SSE-LABEL: name: test_load_i64 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; SSE: $rax = COPY [[MOV64rm]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_load_i64 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX: $rax = COPY [[MOV64rm]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_load_i64 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX512F: $rax = COPY [[MOV64rm]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_load_i64 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX512VL: $rax = COPY [[MOV64rm]] ; AVX512VL: RET 0, implicit $rax %0(p0) = COPY $rdi - %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %1(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.p1) $rax = COPY %1(s64) RET 0, implicit $rax @@ -267,34 +267,34 @@ body: | ; SSE-LABEL: name: test_load_float ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; SSE: [[COPY1:%[0-9]+]]:fr32 = COPY [[MOV32rm]] ; SSE: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; SSE: $xmm0 = COPY [[COPY2]] ; SSE: RET 0, implicit $xmm0 ; AVX-LABEL: name: test_load_float ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX: [[COPY1:%[0-9]+]]:fr32 = COPY [[MOV32rm]] ; AVX: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; AVX: $xmm0 = COPY [[COPY2]] ; AVX: RET 0, implicit $xmm0 ; AVX512F-LABEL: name: test_load_float ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX512F: [[COPY1:%[0-9]+]]:fr32x = COPY [[MOV32rm]] ; AVX512F: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512F: $xmm0 = COPY [[COPY2]] ; AVX512F: RET 0, implicit $xmm0 ; AVX512VL-LABEL: name: test_load_float ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX512VL: [[COPY1:%[0-9]+]]:fr32x = COPY [[MOV32rm]] ; AVX512VL: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512VL: $xmm0 = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $xmm0 %0:gpr(p0) = COPY $rdi - %1:gpr(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %1:gpr(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p1) %3:vecr(s32) = COPY %1(s32) %2:vecr(s128) = G_ANYEXT %3(s32) $xmm0 = COPY %2(s128) @@ -317,34 +317,34 @@ body: | ; SSE-LABEL: name: test_load_float_vecreg ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; SSE: [[COPY1:%[0-9]+]]:fr32 = COPY [[MOV32rm]] ; SSE: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; SSE: $xmm0 = COPY [[COPY2]] ; SSE: RET 0, implicit $xmm0 ; AVX-LABEL: name: test_load_float_vecreg ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX: [[COPY1:%[0-9]+]]:fr32 = COPY [[MOV32rm]] ; AVX: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; AVX: $xmm0 = COPY [[COPY2]] ; AVX: RET 0, implicit $xmm0 ; AVX512F-LABEL: name: test_load_float_vecreg ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX512F: [[COPY1:%[0-9]+]]:fr32x = COPY [[MOV32rm]] ; AVX512F: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512F: $xmm0 = COPY [[COPY2]] ; AVX512F: RET 0, implicit $xmm0 ; AVX512VL-LABEL: name: test_load_float_vecreg ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.p1) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.p1) ; AVX512VL: [[COPY1:%[0-9]+]]:fr32x = COPY [[MOV32rm]] ; AVX512VL: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512VL: $xmm0 = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $xmm0 %0:gpr(p0) = COPY $rdi - %1:gpr(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %1:gpr(s32) = G_LOAD %0(p0) :: (load (s32) from %ir.p1) %3:vecr(s32) = COPY %1(s32) %2:vecr(s128) = G_ANYEXT %3(s32) $xmm0 = COPY %2(s128) @@ -367,34 +367,34 @@ body: | ; SSE-LABEL: name: test_load_double ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; SSE: [[COPY1:%[0-9]+]]:fr64 = COPY [[MOV64rm]] ; SSE: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; SSE: $xmm0 = COPY [[COPY2]] ; SSE: RET 0, implicit $xmm0 ; AVX-LABEL: name: test_load_double ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX: [[COPY1:%[0-9]+]]:fr64 = COPY [[MOV64rm]] ; AVX: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; AVX: $xmm0 = COPY [[COPY2]] ; AVX: RET 0, implicit $xmm0 ; AVX512F-LABEL: name: test_load_double ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX512F: [[COPY1:%[0-9]+]]:fr64x = COPY [[MOV64rm]] ; AVX512F: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512F: $xmm0 = COPY [[COPY2]] ; AVX512F: RET 0, implicit $xmm0 ; AVX512VL-LABEL: name: test_load_double ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX512VL: [[COPY1:%[0-9]+]]:fr64x = COPY [[MOV64rm]] ; AVX512VL: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512VL: $xmm0 = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $xmm0 %0:gpr(p0) = COPY $rdi - %1:gpr(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %1:gpr(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.p1) %3:vecr(s64) = COPY %1(s64) %2:vecr(s128) = G_ANYEXT %3(s64) $xmm0 = COPY %2(s128) @@ -417,34 +417,34 @@ body: | ; SSE-LABEL: name: test_load_double_vecreg ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; SSE: [[COPY1:%[0-9]+]]:fr64 = COPY [[MOV64rm]] ; SSE: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; SSE: $xmm0 = COPY [[COPY2]] ; SSE: RET 0, implicit $xmm0 ; AVX-LABEL: name: test_load_double_vecreg ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX: [[COPY1:%[0-9]+]]:fr64 = COPY [[MOV64rm]] ; AVX: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY1]] ; AVX: $xmm0 = COPY [[COPY2]] ; AVX: RET 0, implicit $xmm0 ; AVX512F-LABEL: name: test_load_double_vecreg ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX512F: [[COPY1:%[0-9]+]]:fr64x = COPY [[MOV64rm]] ; AVX512F: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512F: $xmm0 = COPY [[COPY2]] ; AVX512F: RET 0, implicit $xmm0 ; AVX512VL-LABEL: name: test_load_double_vecreg ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.p1) + ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.p1) ; AVX512VL: [[COPY1:%[0-9]+]]:fr64x = COPY [[MOV64rm]] ; AVX512VL: [[COPY2:%[0-9]+]]:vr128x = COPY [[COPY1]] ; AVX512VL: $xmm0 = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $xmm0 %0:gpr(p0) = COPY $rdi - %1:gpr(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %1:gpr(s64) = G_LOAD %0(p0) :: (load (s64) from %ir.p1) %3:vecr(s64) = COPY %1(s64) %2:vecr(s128) = G_ANYEXT %3(s64) $xmm0 = COPY %2(s128) @@ -466,30 +466,30 @@ body: | ; SSE-LABEL: name: test_store_i32 ; SSE: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; SSE: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; SSE: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 4 into %ir.p1) + ; SSE: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (s32) into %ir.p1) ; SSE: $rax = COPY [[COPY1]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_i32 ; AVX: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; AVX: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 4 into %ir.p1) + ; AVX: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (s32) into %ir.p1) ; AVX: $rax = COPY [[COPY1]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_i32 ; AVX512F: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; AVX512F: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512F: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 4 into %ir.p1) + ; AVX512F: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (s32) into %ir.p1) ; AVX512F: $rax = COPY [[COPY1]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_i32 ; AVX512VL: [[COPY:%[0-9]+]]:gr32 = COPY $edi ; AVX512VL: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512VL: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 4 into %ir.p1) + ; AVX512VL: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (s32) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY1]] ; AVX512VL: RET 0, implicit $rax %0(s32) = COPY $edi %1(p0) = COPY $rsi - G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + G_STORE %0(s32), %1(p0) :: (store (s32) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -509,30 +509,30 @@ body: | ; SSE-LABEL: name: test_store_i64 ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; SSE: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 8 into %ir.p1) + ; SSE: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (s64) into %ir.p1) ; SSE: $rax = COPY [[COPY1]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_i64 ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 8 into %ir.p1) + ; AVX: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (s64) into %ir.p1) ; AVX: $rax = COPY [[COPY1]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_i64 ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512F: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 8 into %ir.p1) + ; AVX512F: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (s64) into %ir.p1) ; AVX512F: $rax = COPY [[COPY1]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_i64 ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512VL: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 8 into %ir.p1) + ; AVX512VL: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (s64) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY1]] ; AVX512VL: RET 0, implicit $rax %0(s64) = COPY $rdi %1(p0) = COPY $rsi - G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) + G_STORE %0(s64), %1(p0) :: (store (s64) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -556,7 +556,7 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:fr32 = COPY [[COPY]] ; SSE: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; SSE: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 4 into %ir.p1) + ; SSE: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s32) into %ir.p1) ; SSE: $rax = COPY [[COPY2]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_float @@ -564,7 +564,7 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:fr32 = COPY [[COPY]] ; AVX: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 4 into %ir.p1) + ; AVX: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s32) into %ir.p1) ; AVX: $rax = COPY [[COPY2]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_float @@ -572,7 +572,7 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:fr32x = COPY [[COPY]] ; AVX512F: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX512F: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 4 into %ir.p1) + ; AVX512F: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s32) into %ir.p1) ; AVX512F: $rax = COPY [[COPY2]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_float @@ -580,14 +580,14 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:fr32x = COPY [[COPY]] ; AVX512VL: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX512VL: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 4 into %ir.p1) + ; AVX512VL: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s32) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $rax %2:vecr(s128) = COPY $xmm0 %0:vecr(s32) = G_TRUNC %2(s128) %1:gpr(p0) = COPY $rdi %3:gpr(s32) = COPY %0(s32) - G_STORE %3(s32), %1(p0) :: (store 4 into %ir.p1) + G_STORE %3(s32), %1(p0) :: (store (s32) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -611,7 +611,7 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:fr32 = COPY [[COPY]] ; SSE: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; SSE: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 4 into %ir.p1) + ; SSE: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s32) into %ir.p1) ; SSE: $rax = COPY [[COPY2]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_float_vec @@ -619,7 +619,7 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:fr32 = COPY [[COPY]] ; AVX: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 4 into %ir.p1) + ; AVX: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s32) into %ir.p1) ; AVX: $rax = COPY [[COPY2]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_float_vec @@ -627,7 +627,7 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:fr32x = COPY [[COPY]] ; AVX512F: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX512F: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 4 into %ir.p1) + ; AVX512F: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s32) into %ir.p1) ; AVX512F: $rax = COPY [[COPY2]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_float_vec @@ -635,14 +635,14 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:fr32x = COPY [[COPY]] ; AVX512VL: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY3:%[0-9]+]]:gr32 = COPY [[COPY1]] - ; AVX512VL: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 4 into %ir.p1) + ; AVX512VL: MOV32mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s32) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $rax %2:vecr(s128) = COPY $xmm0 %0:vecr(s32) = G_TRUNC %2(s128) %1:gpr(p0) = COPY $rdi %3:gpr(s32) = COPY %0(s32) - G_STORE %3(s32), %1(p0) :: (store 4 into %ir.p1) + G_STORE %3(s32), %1(p0) :: (store (s32) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -667,7 +667,7 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:fr64 = COPY [[COPY]] ; SSE: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; SSE: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 8 into %ir.p1) + ; SSE: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s64) into %ir.p1) ; SSE: $rax = COPY [[COPY2]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_double @@ -675,7 +675,7 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:fr64 = COPY [[COPY]] ; AVX: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 8 into %ir.p1) + ; AVX: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s64) into %ir.p1) ; AVX: $rax = COPY [[COPY2]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_double @@ -683,7 +683,7 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:fr64x = COPY [[COPY]] ; AVX512F: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX512F: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 8 into %ir.p1) + ; AVX512F: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s64) into %ir.p1) ; AVX512F: $rax = COPY [[COPY2]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_double @@ -691,14 +691,14 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:fr64x = COPY [[COPY]] ; AVX512VL: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX512VL: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 8 into %ir.p1) + ; AVX512VL: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s64) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $rax %2:vecr(s128) = COPY $xmm0 %0:vecr(s64) = G_TRUNC %2(s128) %1:gpr(p0) = COPY $rdi %3:gpr(s64) = COPY %0(s64) - G_STORE %3(s64), %1(p0) :: (store 8 into %ir.p1) + G_STORE %3(s64), %1(p0) :: (store (s64) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -722,7 +722,7 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:fr64 = COPY [[COPY]] ; SSE: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; SSE: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 8 into %ir.p1) + ; SSE: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s64) into %ir.p1) ; SSE: $rax = COPY [[COPY2]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_store_double_vec @@ -730,7 +730,7 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:fr64 = COPY [[COPY]] ; AVX: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 8 into %ir.p1) + ; AVX: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s64) into %ir.p1) ; AVX: $rax = COPY [[COPY2]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_store_double_vec @@ -738,7 +738,7 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:fr64x = COPY [[COPY]] ; AVX512F: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX512F: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 8 into %ir.p1) + ; AVX512F: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s64) into %ir.p1) ; AVX512F: $rax = COPY [[COPY2]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_store_double_vec @@ -746,14 +746,14 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:fr64x = COPY [[COPY]] ; AVX512VL: [[COPY2:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; AVX512VL: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store 8 into %ir.p1) + ; AVX512VL: MOV64mr [[COPY2]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s64) into %ir.p1) ; AVX512VL: $rax = COPY [[COPY2]] ; AVX512VL: RET 0, implicit $rax %2:vecr(s128) = COPY $xmm0 %0:vecr(s64) = G_TRUNC %2(s128) %1:gpr(p0) = COPY $rdi %3:gpr(s64) = COPY %0(s64) - G_STORE %3(s64), %1(p0) :: (store 8 into %ir.p1) + G_STORE %3(s64), %1(p0) :: (store (s64) into %ir.p1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -773,26 +773,26 @@ body: | ; SSE-LABEL: name: test_load_ptr ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.ptr1) + ; SSE: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (p0) from %ir.ptr1) ; SSE: $rax = COPY [[MOV64rm]] ; SSE: RET 0, implicit $rax ; AVX-LABEL: name: test_load_ptr ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.ptr1) + ; AVX: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (p0) from %ir.ptr1) ; AVX: $rax = COPY [[MOV64rm]] ; AVX: RET 0, implicit $rax ; AVX512F-LABEL: name: test_load_ptr ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.ptr1) + ; AVX512F: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (p0) from %ir.ptr1) ; AVX512F: $rax = COPY [[MOV64rm]] ; AVX512F: RET 0, implicit $rax ; AVX512VL-LABEL: name: test_load_ptr ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load 8 from %ir.ptr1) + ; AVX512VL: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (p0) from %ir.ptr1) ; AVX512VL: $rax = COPY [[MOV64rm]] ; AVX512VL: RET 0, implicit $rax %0(p0) = COPY $rdi - %1(p0) = G_LOAD %0(p0) :: (load 8 from %ir.ptr1) + %1(p0) = G_LOAD %0(p0) :: (load (p0) from %ir.ptr1) $rax = COPY %1(p0) RET 0, implicit $rax @@ -813,26 +813,26 @@ body: | ; SSE-LABEL: name: test_store_ptr ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; SSE: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store 8 into %ir.ptr1) + ; SSE: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store (p0) into %ir.ptr1) ; SSE: RET 0 ; AVX-LABEL: name: test_store_ptr ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store 8 into %ir.ptr1) + ; AVX: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store (p0) into %ir.ptr1) ; AVX: RET 0 ; AVX512F-LABEL: name: test_store_ptr ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512F: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store 8 into %ir.ptr1) + ; AVX512F: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store (p0) into %ir.ptr1) ; AVX512F: RET 0 ; AVX512VL-LABEL: name: test_store_ptr ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY1:%[0-9]+]]:gr64 = COPY $rsi - ; AVX512VL: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store 8 into %ir.ptr1) + ; AVX512VL: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store (p0) into %ir.ptr1) ; AVX512VL: RET 0 %0(p0) = COPY $rdi %1(p0) = COPY $rsi - G_STORE %1(p0), %0(p0) :: (store 8 into %ir.ptr1) + G_STORE %1(p0), %0(p0) :: (store (p0) into %ir.ptr1) RET 0 ... @@ -854,37 +854,37 @@ body: | ; SSE-LABEL: name: test_gep_folding ; SSE: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; SSE: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; SSE: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store 4 into %ir.arrayidx) - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load 4 from %ir.arrayidx) + ; SSE: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store (s32) into %ir.arrayidx) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load (s32) from %ir.arrayidx) ; SSE: $eax = COPY [[MOV32rm]] ; SSE: RET 0, implicit $eax ; AVX-LABEL: name: test_gep_folding ; AVX: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; AVX: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store 4 into %ir.arrayidx) - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load 4 from %ir.arrayidx) + ; AVX: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store (s32) into %ir.arrayidx) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load (s32) from %ir.arrayidx) ; AVX: $eax = COPY [[MOV32rm]] ; AVX: RET 0, implicit $eax ; AVX512F-LABEL: name: test_gep_folding ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512F: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; AVX512F: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store 4 into %ir.arrayidx) - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load 4 from %ir.arrayidx) + ; AVX512F: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store (s32) into %ir.arrayidx) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load (s32) from %ir.arrayidx) ; AVX512F: $eax = COPY [[MOV32rm]] ; AVX512F: RET 0, implicit $eax ; AVX512VL-LABEL: name: test_gep_folding ; AVX512VL: [[COPY:%[0-9]+]]:gr64 = COPY $rdi ; AVX512VL: [[COPY1:%[0-9]+]]:gr32 = COPY $esi - ; AVX512VL: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store 4 into %ir.arrayidx) - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load 4 from %ir.arrayidx) + ; AVX512VL: MOV32mr [[COPY]], 1, $noreg, 20, $noreg, [[COPY1]] :: (store (s32) into %ir.arrayidx) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 20, $noreg :: (load (s32) from %ir.arrayidx) ; AVX512VL: $eax = COPY [[MOV32rm]] ; AVX512VL: RET 0, implicit $eax %0(p0) = COPY $rdi %1(s32) = COPY $esi %2(s64) = G_CONSTANT i64 20 %3(p0) = G_PTR_ADD %0, %2(s64) - G_STORE %1(s32), %3(p0) :: (store 4 into %ir.arrayidx) - %4(s32) = G_LOAD %3(p0) :: (load 4 from %ir.arrayidx) + G_STORE %1(s32), %3(p0) :: (store (s32) into %ir.arrayidx) + %4(s32) = G_LOAD %3(p0) :: (load (s32) from %ir.arrayidx) $eax = COPY %4(s32) RET 0, implicit $eax @@ -909,8 +909,8 @@ body: | ; SSE: [[COPY1:%[0-9]+]]:gr32 = COPY $esi ; SSE: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 228719476720 ; SSE: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 1, [[MOV64ri]], 0, $noreg - ; SSE: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store 4 into %ir.arrayidx) - ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load 4 from %ir.arrayidx) + ; SSE: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store (s32) into %ir.arrayidx) + ; SSE: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.arrayidx) ; SSE: $eax = COPY [[MOV32rm]] ; SSE: RET 0, implicit $eax ; AVX-LABEL: name: test_gep_folding_largeGepIndex @@ -918,8 +918,8 @@ body: | ; AVX: [[COPY1:%[0-9]+]]:gr32 = COPY $esi ; AVX: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 228719476720 ; AVX: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 1, [[MOV64ri]], 0, $noreg - ; AVX: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store 4 into %ir.arrayidx) - ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load 4 from %ir.arrayidx) + ; AVX: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store (s32) into %ir.arrayidx) + ; AVX: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.arrayidx) ; AVX: $eax = COPY [[MOV32rm]] ; AVX: RET 0, implicit $eax ; AVX512F-LABEL: name: test_gep_folding_largeGepIndex @@ -927,8 +927,8 @@ body: | ; AVX512F: [[COPY1:%[0-9]+]]:gr32 = COPY $esi ; AVX512F: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 228719476720 ; AVX512F: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 1, [[MOV64ri]], 0, $noreg - ; AVX512F: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store 4 into %ir.arrayidx) - ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load 4 from %ir.arrayidx) + ; AVX512F: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store (s32) into %ir.arrayidx) + ; AVX512F: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.arrayidx) ; AVX512F: $eax = COPY [[MOV32rm]] ; AVX512F: RET 0, implicit $eax ; AVX512VL-LABEL: name: test_gep_folding_largeGepIndex @@ -936,16 +936,16 @@ body: | ; AVX512VL: [[COPY1:%[0-9]+]]:gr32 = COPY $esi ; AVX512VL: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 228719476720 ; AVX512VL: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 1, [[MOV64ri]], 0, $noreg - ; AVX512VL: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store 4 into %ir.arrayidx) - ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load 4 from %ir.arrayidx) + ; AVX512VL: MOV32mr [[LEA64r]], 1, $noreg, 0, $noreg, [[COPY1]] :: (store (s32) into %ir.arrayidx) + ; AVX512VL: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[LEA64r]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.arrayidx) ; AVX512VL: $eax = COPY [[MOV32rm]] ; AVX512VL: RET 0, implicit $eax %0(p0) = COPY $rdi %1(s32) = COPY $esi %2(s64) = G_CONSTANT i64 228719476720 %3(p0) = G_PTR_ADD %0, %2(s64) - G_STORE %1(s32), %3(p0) :: (store 4 into %ir.arrayidx) - %4(s32) = G_LOAD %3(p0) :: (load 4 from %ir.arrayidx) + G_STORE %1(s32), %3(p0) :: (store (s32) into %ir.arrayidx) + %4(s32) = G_LOAD %3(p0) :: (load (s32) from %ir.arrayidx) $eax = COPY %4(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-memop-v128.mir b/llvm/test/CodeGen/X86/GlobalISel/select-memop-v128.mir index 18b2a752c8043..8dafc3cbb7223 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-memop-v128.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-memop-v128.mir @@ -35,17 +35,17 @@ registers: - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0:gr64 = COPY $rdi -# SSE: %1:vr128 = MOVUPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1, align 1) -# AVX: %1:vr128 = VMOVUPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1, align 1) -# AVX512F: %1:vr128x = VMOVUPSZ128rm_NOVLX %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1, align 1) -# AVX512VL: %1:vr128x = VMOVUPSZ128rm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1, align 1) +# SSE: %1:vr128 = MOVUPSrm %0, 1, $noreg, 0, $noreg :: (load (<4 x s32>) from %ir.p1, align 1) +# AVX: %1:vr128 = VMOVUPSrm %0, 1, $noreg, 0, $noreg :: (load (<4 x s32>) from %ir.p1, align 1) +# AVX512F: %1:vr128x = VMOVUPSZ128rm_NOVLX %0, 1, $noreg, 0, $noreg :: (load (<4 x s32>) from %ir.p1, align 1) +# AVX512VL: %1:vr128x = VMOVUPSZ128rm %0, 1, $noreg, 0, $noreg :: (load (<4 x s32>) from %ir.p1, align 1) # ALL: $xmm0 = COPY %1 body: | bb.1 (%ir-block.0): liveins: $rdi %0(p0) = COPY $rdi - %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1, align 1) + %1(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.p1, align 1) $xmm0 = COPY %1(<4 x s32>) RET 0, implicit $xmm0 @@ -60,17 +60,17 @@ registers: - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0:gr64 = COPY $rdi -# SSE: %1:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1) -# AVX: %1:vr128 = VMOVAPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1) -# AVX512F: %1:vr128x = VMOVAPSZ128rm_NOVLX %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1) -# AVX512VL: %1:vr128x = VMOVAPSZ128rm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1) +# SSE: %1:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg :: (load (<4 x s32>) from %ir.p1) +# AVX: %1:vr128 = VMOVAPSrm %0, 1, $noreg, 0, $noreg :: (load (<4 x s32>) from %ir.p1) +# AVX512F: %1:vr128x = VMOVAPSZ128rm_NOVLX %0, 1, $noreg, 0, $noreg :: (load (<4 x s32>) from %ir.p1) +# AVX512VL: %1:vr128x = VMOVAPSZ128rm %0, 1, $noreg, 0, $noreg :: (load (<4 x s32>) from %ir.p1) # ALL: $xmm0 = COPY %1 body: | bb.1 (%ir-block.0): liveins: $rdi %0(p0) = COPY $rdi - %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1) + %1(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>) from %ir.p1) $xmm0 = COPY %1(<4 x s32>) RET 0, implicit $xmm0 @@ -87,10 +87,10 @@ registers: # NO_AVX512F: %0:vr128 = COPY $xmm0 # AVX512ALL: %0:vr128x = COPY $xmm0 # ALL: %1:gr64 = COPY $rdi -# SSE: MOVAPSmr %1, 1, $noreg, 0, $noreg, %0 :: (store 16 into %ir.p1) -# AVX: VMOVAPSmr %1, 1, $noreg, 0, $noreg, %0 :: (store 16 into %ir.p1) -# AVX512F: VMOVAPSZ128mr_NOVLX %1, 1, $noreg, 0, $noreg, %0 :: (store 16 into %ir.p1) -# AVX512VL: VMOVAPSZ128mr %1, 1, $noreg, 0, $noreg, %0 :: (store 16 into %ir.p1) +# SSE: MOVAPSmr %1, 1, $noreg, 0, $noreg, %0 :: (store (<4 x s32>) into %ir.p1) +# AVX: VMOVAPSmr %1, 1, $noreg, 0, $noreg, %0 :: (store (<4 x s32>) into %ir.p1) +# AVX512F: VMOVAPSZ128mr_NOVLX %1, 1, $noreg, 0, $noreg, %0 :: (store (<4 x s32>) into %ir.p1) +# AVX512VL: VMOVAPSZ128mr %1, 1, $noreg, 0, $noreg, %0 :: (store (<4 x s32>) into %ir.p1) # ALL: $rax = COPY %1 body: | bb.1 (%ir-block.0): @@ -98,7 +98,7 @@ body: | %0(<4 x s32>) = COPY $xmm0 %1(p0) = COPY $rdi - G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.p1, align 16) + G_STORE %0(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.p1, align 16) $rax = COPY %1(p0) RET 0, implicit $rax @@ -115,10 +115,10 @@ registers: # NO_AVX512F: %0:vr128 = COPY $xmm0 # AVX512ALL: %0:vr128x = COPY $xmm0 # ALL: %1:gr64 = COPY $rdi -# SSE: MOVUPSmr %1, 1, $noreg, 0, $noreg, %0 :: (store 16 into %ir.p1, align 1) -# AVX: VMOVUPSmr %1, 1, $noreg, 0, $noreg, %0 :: (store 16 into %ir.p1, align 1) -# AVX512F: VMOVUPSZ128mr_NOVLX %1, 1, $noreg, 0, $noreg, %0 :: (store 16 into %ir.p1, align 1) -# AVX512VL: VMOVUPSZ128mr %1, 1, $noreg, 0, $noreg, %0 :: (store 16 into %ir.p1, align 1) +# SSE: MOVUPSmr %1, 1, $noreg, 0, $noreg, %0 :: (store (<4 x s32>) into %ir.p1, align 1) +# AVX: VMOVUPSmr %1, 1, $noreg, 0, $noreg, %0 :: (store (<4 x s32>) into %ir.p1, align 1) +# AVX512F: VMOVUPSZ128mr_NOVLX %1, 1, $noreg, 0, $noreg, %0 :: (store (<4 x s32>) into %ir.p1, align 1) +# AVX512VL: VMOVUPSZ128mr %1, 1, $noreg, 0, $noreg, %0 :: (store (<4 x s32>) into %ir.p1, align 1) # ALL: $rax = COPY %1 body: | bb.1 (%ir-block.0): @@ -126,7 +126,7 @@ body: | %0(<4 x s32>) = COPY $xmm0 %1(p0) = COPY $rdi - G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.p1, align 1) + G_STORE %0(<4 x s32>), %1(p0) :: (store (<4 x s32>) into %ir.p1, align 1) $rax = COPY %1(p0) RET 0, implicit $rax diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-memop-v256.mir b/llvm/test/CodeGen/X86/GlobalISel/select-memop-v256.mir index 4d9a39cc89253..af839a5d247dd 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-memop-v256.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-memop-v256.mir @@ -43,17 +43,17 @@ registers: - { id: 0, class: gpr } - { id: 1, class: vecr } # NO_AVX512F: %0:gr64 = COPY $rdi -# NO_AVX512F-NEXT: %1:vr256 = VMOVUPSYrm %0, 1, $noreg, 0, $noreg :: (load 32 from %ir.p1, align 1) +# NO_AVX512F-NEXT: %1:vr256 = VMOVUPSYrm %0, 1, $noreg, 0, $noreg :: (load (<8 x s32>) from %ir.p1, align 1) # NO_AVX512F-NEXT: $ymm0 = COPY %1 # NO_AVX512F-NEXT: RET 0, implicit $ymm0 # # AVX512F: %0:gr64 = COPY $rdi -# AVX512F-NEXT: %1:vr256x = VMOVUPSZ256rm_NOVLX %0, 1, $noreg, 0, $noreg :: (load 32 from %ir.p1, align 1) +# AVX512F-NEXT: %1:vr256x = VMOVUPSZ256rm_NOVLX %0, 1, $noreg, 0, $noreg :: (load (<8 x s32>) from %ir.p1, align 1) # AVX512F-NEXT: $ymm0 = COPY %1 # AVX512F-NEXT: RET 0, implicit $ymm0 # # AVX512VL: %0:gr64 = COPY $rdi -# AVX512VL-NEXT: %1:vr256x = VMOVUPSZ256rm %0, 1, $noreg, 0, $noreg :: (load 32 from %ir.p1, align 1) +# AVX512VL-NEXT: %1:vr256x = VMOVUPSZ256rm %0, 1, $noreg, 0, $noreg :: (load (<8 x s32>) from %ir.p1, align 1) # AVX512VL-NEXT: $ymm0 = COPY %1 # AVX512VL-NEXT: RET 0, implicit $ymm0 body: | @@ -61,7 +61,7 @@ body: | liveins: $rdi %0(p0) = COPY $rdi - %1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1, align 1) + %1(<8 x s32>) = G_LOAD %0(p0) :: (load (<8 x s32>) from %ir.p1, align 1) $ymm0 = COPY %1(<8 x s32>) RET 0, implicit $ymm0 @@ -76,17 +76,17 @@ registers: - { id: 0, class: gpr } - { id: 1, class: vecr } # NO_AVX512F: %0:gr64 = COPY $rdi -# NO_AVX512F-NEXT: %1:vr256 = VMOVAPSYrm %0, 1, $noreg, 0, $noreg :: (load 32 from %ir.p1) +# NO_AVX512F-NEXT: %1:vr256 = VMOVAPSYrm %0, 1, $noreg, 0, $noreg :: (load (<8 x s32>) from %ir.p1) # NO_AVX512F-NEXT: $ymm0 = COPY %1 # NO_AVX512F-NEXT: RET 0, implicit $ymm0 # # AVX512F: %0:gr64 = COPY $rdi -# AVX512F-NEXT: %1:vr256x = VMOVAPSZ256rm_NOVLX %0, 1, $noreg, 0, $noreg :: (load 32 from %ir.p1) +# AVX512F-NEXT: %1:vr256x = VMOVAPSZ256rm_NOVLX %0, 1, $noreg, 0, $noreg :: (load (<8 x s32>) from %ir.p1) # AVX512F-NEXT: $ymm0 = COPY %1 # AVX512F-NEXT: RET 0, implicit $ymm0 # # AVX512VL: %0:gr64 = COPY $rdi -# AVX512VL-NEXT: %1:vr256x = VMOVAPSZ256rm %0, 1, $noreg, 0, $noreg :: (load 32 from %ir.p1) +# AVX512VL-NEXT: %1:vr256x = VMOVAPSZ256rm %0, 1, $noreg, 0, $noreg :: (load (<8 x s32>) from %ir.p1) # AVX512VL-NEXT: $ymm0 = COPY %1 # AVX512VL-NEXT: RET 0, implicit $ymm0 body: | @@ -94,7 +94,7 @@ body: | liveins: $rdi %0(p0) = COPY $rdi - %1(<8 x s32>) = G_LOAD %0(p0) :: (load 32 from %ir.p1) + %1(<8 x s32>) = G_LOAD %0(p0) :: (load (<8 x s32>) from %ir.p1) $ymm0 = COPY %1(<8 x s32>) RET 0, implicit $ymm0 @@ -117,17 +117,17 @@ registers: - { id: 1, class: gpr } # NO_AVX512F: %0:vr256 = COPY $ymm0 # NO_AVX512F-NEXT: %1:gr64 = COPY $rdi -# NO_AVX512F-NEXT: VMOVUPSYmr %1, 1, $noreg, 0, $noreg, %0 :: (store 32 into %ir.p1, align 1) +# NO_AVX512F-NEXT: VMOVUPSYmr %1, 1, $noreg, 0, $noreg, %0 :: (store (<8 x s32>) into %ir.p1, align 1) # NO_AVX512F-NEXT: RET 0 # # AVX512F: %0:vr256x = COPY $ymm0 # AVX512F-NEXT: %1:gr64 = COPY $rdi -# AVX512F-NEXT: VMOVUPSZ256mr_NOVLX %1, 1, $noreg, 0, $noreg, %0 :: (store 32 into %ir.p1, align 1) +# AVX512F-NEXT: VMOVUPSZ256mr_NOVLX %1, 1, $noreg, 0, $noreg, %0 :: (store (<8 x s32>) into %ir.p1, align 1) # AVX512F-NEXT: RET 0 # # AVX512VL: %0:vr256x = COPY $ymm0 # AVX512VL-NEXT: %1:gr64 = COPY $rdi -# AVX512VL-NEXT: VMOVUPSZ256mr %1, 1, $noreg, 0, $noreg, %0 :: (store 32 into %ir.p1, align 1) +# AVX512VL-NEXT: VMOVUPSZ256mr %1, 1, $noreg, 0, $noreg, %0 :: (store (<8 x s32>) into %ir.p1, align 1) # AVX512VL-NEXT: RET 0 body: | bb.1 (%ir-block.0): @@ -135,7 +135,7 @@ body: | %0(<8 x s32>) = COPY $ymm0 %1(p0) = COPY $rdi - G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1, align 1) + G_STORE %0(<8 x s32>), %1(p0) :: (store (<8 x s32>) into %ir.p1, align 1) RET 0 ... @@ -157,17 +157,17 @@ registers: - { id: 1, class: gpr } # NO_AVX512F: %0:vr256 = COPY $ymm0 # NO_AVX512F-NEXT: %1:gr64 = COPY $rdi -# NO_AVX512F-NEXT: VMOVAPSYmr %1, 1, $noreg, 0, $noreg, %0 :: (store 32 into %ir.p1) +# NO_AVX512F-NEXT: VMOVAPSYmr %1, 1, $noreg, 0, $noreg, %0 :: (store (<8 x s32>) into %ir.p1) # NO_AVX512F-NEXT: RET 0 # # AVX512F: %0:vr256x = COPY $ymm0 # AVX512F-NEXT: %1:gr64 = COPY $rdi -# AVX512F-NEXT: VMOVAPSZ256mr_NOVLX %1, 1, $noreg, 0, $noreg, %0 :: (store 32 into %ir.p1) +# AVX512F-NEXT: VMOVAPSZ256mr_NOVLX %1, 1, $noreg, 0, $noreg, %0 :: (store (<8 x s32>) into %ir.p1) # AVX512F-NEXT: RET 0 # # AVX512VL: %0:vr256x = COPY $ymm0 # AVX512VL-NEXT: %1:gr64 = COPY $rdi -# AVX512VL-NEXT: VMOVAPSZ256mr %1, 1, $noreg, 0, $noreg, %0 :: (store 32 into %ir.p1) +# AVX512VL-NEXT: VMOVAPSZ256mr %1, 1, $noreg, 0, $noreg, %0 :: (store (<8 x s32>) into %ir.p1) # AVX512VL-NEXT: RET 0 body: | bb.1 (%ir-block.0): @@ -175,7 +175,7 @@ body: | %0(<8 x s32>) = COPY $ymm0 %1(p0) = COPY $rdi - G_STORE %0(<8 x s32>), %1(p0) :: (store 32 into %ir.p1) + G_STORE %0(<8 x s32>), %1(p0) :: (store (<8 x s32>) into %ir.p1) RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/select-memop-v512.mir b/llvm/test/CodeGen/X86/GlobalISel/select-memop-v512.mir index a9175592261f5..6aea213b9e7e3 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/select-memop-v512.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/select-memop-v512.mir @@ -36,11 +36,11 @@ body: | ; AVX512F-LABEL: name: test_load_v16i32_noalign ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[VMOVUPSZrm:%[0-9]+]]:vr512 = VMOVUPSZrm [[COPY]], 1, $noreg, 0, $noreg :: (load 64 from %ir.p1, align 1) + ; AVX512F: [[VMOVUPSZrm:%[0-9]+]]:vr512 = VMOVUPSZrm [[COPY]], 1, $noreg, 0, $noreg :: (load (<16 x s32>) from %ir.p1, align 1) ; AVX512F: $zmm0 = COPY [[VMOVUPSZrm]] ; AVX512F: RET 0, implicit $zmm0 %0(p0) = COPY $rdi - %1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 1) + %1(<16 x s32>) = G_LOAD %0(p0) :: (load (<16 x s32>) from %ir.p1, align 1) $zmm0 = COPY %1(<16 x s32>) RET 0, implicit $zmm0 @@ -59,11 +59,11 @@ body: | ; AVX512F-LABEL: name: test_load_v16i32_align ; AVX512F: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: [[VMOVUPSZrm:%[0-9]+]]:vr512 = VMOVUPSZrm [[COPY]], 1, $noreg, 0, $noreg :: (load 64 from %ir.p1, align 32) + ; AVX512F: [[VMOVUPSZrm:%[0-9]+]]:vr512 = VMOVUPSZrm [[COPY]], 1, $noreg, 0, $noreg :: (load (<16 x s32>) from %ir.p1, align 32) ; AVX512F: $zmm0 = COPY [[VMOVUPSZrm]] ; AVX512F: RET 0, implicit $zmm0 %0(p0) = COPY $rdi - %1(<16 x s32>) = G_LOAD %0(p0) :: (load 64 from %ir.p1, align 32) + %1(<16 x s32>) = G_LOAD %0(p0) :: (load (<16 x s32>) from %ir.p1, align 32) $zmm0 = COPY %1(<16 x s32>) RET 0, implicit $zmm0 @@ -83,11 +83,11 @@ body: | ; AVX512F-LABEL: name: test_store_v16i32_noalign ; AVX512F: [[COPY:%[0-9]+]]:vr512 = COPY $zmm0 ; AVX512F: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: VMOVUPSZmr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 64 into %ir.p1, align 1) + ; AVX512F: VMOVUPSZmr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (<16 x s32>) into %ir.p1, align 1) ; AVX512F: RET 0 %0(<16 x s32>) = COPY $zmm0 %1(p0) = COPY $rdi - G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 1) + G_STORE %0(<16 x s32>), %1(p0) :: (store (<16 x s32>) into %ir.p1, align 1) RET 0 ... @@ -106,11 +106,11 @@ body: | ; AVX512F-LABEL: name: test_store_v16i32_align ; AVX512F: [[COPY:%[0-9]+]]:vr512 = COPY $zmm0 ; AVX512F: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; AVX512F: VMOVUPSZmr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store 64 into %ir.p1, align 32) + ; AVX512F: VMOVUPSZmr [[COPY1]], 1, $noreg, 0, $noreg, [[COPY]] :: (store (<16 x s32>) into %ir.p1, align 32) ; AVX512F: RET 0 %0(<16 x s32>) = COPY $zmm0 %1(p0) = COPY $rdi - G_STORE %0(<16 x s32>), %1(p0) :: (store 64 into %ir.p1, align 32) + G_STORE %0(<16 x s32>), %1(p0) :: (store (<16 x s32>) into %ir.p1, align 32) RET 0 ... diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-inttoptr.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-inttoptr.mir index 67ed879723b69..5bc6beedbbe7f 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-inttoptr.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-inttoptr.mir @@ -26,12 +26,12 @@ body: | bb.1.entry: ; CHECK-LABEL: name: inttoptr_p0_s32 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[LOAD]](s32) ; CHECK: $eax = COPY [[INTTOPTR]](p0) ; CHECK: RET 0, implicit $eax %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:_(s32) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:_(s32) = G_LOAD %1(p0) :: (invariant load (s32) from %fixed-stack.0, align 16) %2:_(p0) = G_INTTOPTR %0(s32) $eax = COPY %2(p0) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-ptrtoint.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-ptrtoint.mir index 86879866f10a2..feecc2effa20a 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-ptrtoint.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-ptrtoint.mir @@ -45,13 +45,13 @@ body: | bb.1.entry: ; CHECK-LABEL: name: ptrtoint_s1_p0 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s1) = G_PTRTOINT [[LOAD]](p0) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s8) = G_ANYEXT [[PTRTOINT]](s1) ; CHECK: $al = COPY [[ANYEXT]](s8) ; CHECK: RET 0, implicit $al %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:_(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:_(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) %2:_(s1) = G_PTRTOINT %0(p0) %3:_(s8) = G_ANYEXT %2(s1) $al = COPY %3(s8) @@ -74,12 +74,12 @@ body: | bb.1.entry: ; CHECK-LABEL: name: ptrtoint_s8_p0 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s8) = G_PTRTOINT [[LOAD]](p0) ; CHECK: $al = COPY [[PTRTOINT]](s8) ; CHECK: RET 0, implicit $al %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:_(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:_(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) %2:_(s8) = G_PTRTOINT %0(p0) $al = COPY %2(s8) RET 0, implicit $al @@ -101,12 +101,12 @@ body: | bb.1.entry: ; CHECK-LABEL: name: ptrtoint_s16_p0 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s16) = G_PTRTOINT [[LOAD]](p0) ; CHECK: $ax = COPY [[PTRTOINT]](s16) ; CHECK: RET 0, implicit $ax %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:_(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:_(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) %2:_(s16) = G_PTRTOINT %0(p0) $ax = COPY %2(s16) RET 0, implicit $ax @@ -128,12 +128,12 @@ body: | bb.1.entry: ; CHECK-LABEL: name: ptrtoint_s32_p0 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[LOAD]](p0) ; CHECK: $eax = COPY [[PTRTOINT]](s32) ; CHECK: RET 0, implicit $eax %1:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:_(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:_(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) %2:_(s32) = G_PTRTOINT %0(p0) $eax = COPY %2(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir index 466990bf7f8ed..965bf635d6feb 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-srem.mir @@ -70,16 +70,16 @@ body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_srem_i8 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 1 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.0, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.1, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.1, align 4) ; CHECK: [[SREM:%[0-9]+]]:_(s8) = G_SREM [[LOAD]], [[LOAD1]] ; CHECK: $al = COPY [[SREM]](s8) ; CHECK: RET 0, implicit $al %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s8) = G_LOAD %2(p0) :: (invariant load 1 from %fixed-stack.1, align 16) + %0:_(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s8) = G_LOAD %3(p0) :: (invariant load 1 from %fixed-stack.0, align 4) + %1:_(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) %4:_(s8) = G_SREM %0, %1 $al = COPY %4(s8) RET 0, implicit $al @@ -132,16 +132,16 @@ body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_srem_i16 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 2 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s16) from %fixed-stack.0, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 2 from %fixed-stack.1, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s16) from %fixed-stack.1, align 4) ; CHECK: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[LOAD]], [[LOAD1]] ; CHECK: $ax = COPY [[SREM]](s16) ; CHECK: RET 0, implicit $ax %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s16) = G_LOAD %2(p0) :: (invariant load 2 from %fixed-stack.1, align 16) + %0:_(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s16) = G_LOAD %3(p0) :: (invariant load 2 from %fixed-stack.0, align 4) + %1:_(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) %4:_(s16) = G_SREM %0, %1 $ax = COPY %4(s16) RET 0, implicit $ax @@ -194,16 +194,16 @@ body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_srem_i32 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.1) ; CHECK: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[LOAD]], [[LOAD1]] ; CHECK: $eax = COPY [[SREM]](s32) ; CHECK: RET 0, implicit $eax %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s32) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 16) + %0:_(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s32) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 4) + %1:_(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) %4:_(s32) = G_SREM %0, %1 $eax = COPY %4(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir index 77ff45293fbdd..b6496216ac56d 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-legalize-urem.mir @@ -70,16 +70,16 @@ body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_urem_i8 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 1 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.0, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.1, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.1, align 4) ; CHECK: [[UREM:%[0-9]+]]:_(s8) = G_UREM [[LOAD]], [[LOAD1]] ; CHECK: $al = COPY [[UREM]](s8) ; CHECK: RET 0, implicit $al %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s8) = G_LOAD %2(p0) :: (invariant load 1 from %fixed-stack.1, align 16) + %0:_(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s8) = G_LOAD %3(p0) :: (invariant load 1 from %fixed-stack.0, align 4) + %1:_(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) %4:_(s8) = G_UREM %0, %1 $al = COPY %4(s8) RET 0, implicit $al @@ -132,16 +132,16 @@ body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_urem_i16 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 2 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s16) from %fixed-stack.0, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 2 from %fixed-stack.1, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s16) from %fixed-stack.1, align 4) ; CHECK: [[UREM:%[0-9]+]]:_(s16) = G_UREM [[LOAD]], [[LOAD1]] ; CHECK: $ax = COPY [[UREM]](s16) ; CHECK: RET 0, implicit $ax %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s16) = G_LOAD %2(p0) :: (invariant load 2 from %fixed-stack.1, align 16) + %0:_(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s16) = G_LOAD %3(p0) :: (invariant load 2 from %fixed-stack.0, align 4) + %1:_(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) %4:_(s16) = G_UREM %0, %1 $ax = COPY %4(s16) RET 0, implicit $ax @@ -194,16 +194,16 @@ body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_urem_i32 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.1) ; CHECK: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[LOAD]], [[LOAD1]] ; CHECK: $eax = COPY [[UREM]](s32) ; CHECK: RET 0, implicit $eax %2:_(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:_(s32) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 16) + %0:_(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) %3:_(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:_(s32) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 4) + %1:_(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) %4:_(s32) = G_UREM %0, %1 $eax = COPY %4(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-inttoptr.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-inttoptr.mir index 6b740a18c1478..32ff65102c1b9 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-inttoptr.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-select-inttoptr.mir @@ -27,11 +27,11 @@ fixedStack: body: | bb.1.entry: ; CHECK-LABEL: name: inttoptr_p0_s32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) ; CHECK: $eax = COPY [[MOV32rm]] ; CHECK: RET 0, implicit $eax %1:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:gpr(s32) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:gpr(s32) = G_LOAD %1(p0) :: (invariant load (s32) from %fixed-stack.0, align 16) %2:gpr(p0) = G_INTTOPTR %0(s32) $eax = COPY %2(p0) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-ptrtoint.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-ptrtoint.mir index 38ce2160b5835..8da74d0f49f9d 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-ptrtoint.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-select-ptrtoint.mir @@ -46,12 +46,12 @@ fixedStack: body: | bb.1.entry: ; CHECK-LABEL: name: ptrtoint_s1_p0 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32_abcd = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32_abcd = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY [[MOV32rm]].sub_8bit ; CHECK: $al = COPY [[COPY]] ; CHECK: RET 0, implicit $al %1:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:gpr(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:gpr(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) %2:gpr(s1) = G_PTRTOINT %0(p0) %3:gpr(s8) = G_ANYEXT %2(s1) $al = COPY %3(s8) @@ -75,12 +75,12 @@ fixedStack: body: | bb.1.entry: ; CHECK-LABEL: name: ptrtoint_s8_p0 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32_abcd = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32_abcd = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY [[MOV32rm]].sub_8bit ; CHECK: $al = COPY [[COPY]] ; CHECK: RET 0, implicit $al %1:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:gpr(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:gpr(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) %2:gpr(s8) = G_PTRTOINT %0(p0) $al = COPY %2(s8) RET 0, implicit $al @@ -103,12 +103,12 @@ fixedStack: body: | bb.1.entry: ; CHECK-LABEL: name: ptrtoint_s16_p0 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY [[MOV32rm]].sub_16bit ; CHECK: $ax = COPY [[COPY]] ; CHECK: RET 0, implicit $ax %1:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:gpr(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:gpr(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) %2:gpr(s16) = G_PTRTOINT %0(p0) $ax = COPY %2(s16) RET 0, implicit $ax @@ -131,11 +131,11 @@ fixedStack: body: | bb.1.entry: ; CHECK-LABEL: name: ptrtoint_s32_p0 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (p0) from %fixed-stack.0, align 16) ; CHECK: $eax = COPY [[MOV32rm]] ; CHECK: RET 0, implicit $eax %1:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %0:gpr(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %0:gpr(p0) = G_LOAD %1(p0) :: (invariant load (p0) from %fixed-stack.0, align 16) %2:gpr(s32) = G_PTRTOINT %0(p0) $eax = COPY %2(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir index 93bab680ee9b4..a7f5badcdef06 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-select-srem.mir @@ -69,17 +69,17 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_srem_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 1 from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 1 from %fixed-stack.1, align 4) + ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) + ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) ; CHECK: $ax = MOVSX16rr8 [[MOV8rm]] ; CHECK: IDIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $ah ; CHECK: $al = COPY [[COPY]] ; CHECK: RET 0, implicit $al %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load 1 from %fixed-stack.1, align 16) + %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load 1 from %fixed-stack.0, align 4) + %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) %4:gpr(s8) = G_SREM %0, %1 $al = COPY %4(s8) RET 0, implicit $al @@ -131,8 +131,8 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_srem_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 2 from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 2 from %fixed-stack.1, align 4) + ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) + ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) ; CHECK: $ax = COPY [[MOV16rm]] ; CHECK: CWD implicit-def $ax, implicit-def $dx, implicit $ax ; CHECK: IDIV16r [[MOV16rm1]], implicit-def $ax, implicit-def $dx, implicit-def $eflags, implicit $ax, implicit $dx @@ -140,9 +140,9 @@ body: | ; CHECK: $ax = COPY [[COPY]] ; CHECK: RET 0, implicit $ax %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load 2 from %fixed-stack.1, align 16) + %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load 2 from %fixed-stack.0, align 4) + %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) %4:gpr(s16) = G_SREM %0, %1 $ax = COPY %4(s16) RET 0, implicit $ax @@ -194,8 +194,8 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_srem_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.1) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) + ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1) ; CHECK: $eax = COPY [[MOV32rm]] ; CHECK: CDQ implicit-def $eax, implicit-def $edx, implicit $eax ; CHECK: IDIV32r [[MOV32rm1]], implicit-def $eax, implicit-def $edx, implicit-def $eflags, implicit $eax, implicit $edx @@ -203,9 +203,9 @@ body: | ; CHECK: $eax = COPY [[COPY]] ; CHECK: RET 0, implicit $eax %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 16) + %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 4) + %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) %4:gpr(s32) = G_SREM %0, %1 $eax = COPY %4(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir index b36c17fe8b3ad..1a960f9ad9e2c 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-select-udiv.mir @@ -69,17 +69,17 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_udiv_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 1 from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 1 from %fixed-stack.1, align 4) + ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) + ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) ; CHECK: $ax = MOVZX16rr8 [[MOV8rm]] ; CHECK: DIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $al ; CHECK: $al = COPY [[COPY]] ; CHECK: RET 0, implicit $al %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load 1 from %fixed-stack.1, align 16) + %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load 1 from %fixed-stack.0, align 4) + %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) %4:gpr(s8) = G_UDIV %0, %1 $al = COPY %4(s8) RET 0, implicit $al @@ -131,8 +131,8 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_udiv_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 2 from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 2 from %fixed-stack.1, align 4) + ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) + ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) ; CHECK: $ax = COPY [[MOV16rm]] ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags ; CHECK: $dx = COPY [[MOV32r0_]].sub_16bit @@ -141,9 +141,9 @@ body: | ; CHECK: $ax = COPY [[COPY]] ; CHECK: RET 0, implicit $ax %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load 2 from %fixed-stack.1, align 16) + %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load 2 from %fixed-stack.0, align 4) + %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) %4:gpr(s16) = G_UDIV %0, %1 $ax = COPY %4(s16) RET 0, implicit $ax @@ -195,8 +195,8 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_udiv_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.1, align 16) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0) + ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1, align 16) ; CHECK: $eax = COPY [[MOV32rm]] ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags ; CHECK: $edx = COPY [[MOV32r0_]] @@ -205,9 +205,9 @@ body: | ; CHECK: $eax = COPY [[COPY]] ; CHECK: RET 0, implicit $eax %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 4) + %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 4) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 16) + %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 16) %4:gpr(s32) = G_UDIV %0, %1 $eax = COPY %4(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir b/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir index fe97a2bcb39c9..23d2892ad9110 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/x86-select-urem.mir @@ -69,17 +69,17 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_urem_i8 - ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 1 from %fixed-stack.0, align 16) - ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 1 from %fixed-stack.1, align 4) + ; CHECK: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.0, align 16) + ; CHECK: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s8) from %fixed-stack.1, align 4) ; CHECK: $ax = MOVZX16rr8 [[MOV8rm]] ; CHECK: DIV8r [[MOV8rm1]], implicit-def $al, implicit-def $ah, implicit-def $eflags, implicit $ax ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY $ah ; CHECK: $al = COPY [[COPY]] ; CHECK: RET 0, implicit $al %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load 1 from %fixed-stack.1, align 16) + %0:gpr(s8) = G_LOAD %2(p0) :: (invariant load (s8) from %fixed-stack.1, align 16) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load 1 from %fixed-stack.0, align 4) + %1:gpr(s8) = G_LOAD %3(p0) :: (invariant load (s8) from %fixed-stack.0, align 4) %4:gpr(s8) = G_UREM %0, %1 $al = COPY %4(s8) RET 0, implicit $al @@ -131,8 +131,8 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_urem_i16 - ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 2 from %fixed-stack.0, align 16) - ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 2 from %fixed-stack.1, align 4) + ; CHECK: [[MOV16rm:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.0, align 16) + ; CHECK: [[MOV16rm1:%[0-9]+]]:gr16 = MOV16rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s16) from %fixed-stack.1, align 4) ; CHECK: $ax = COPY [[MOV16rm]] ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags ; CHECK: $dx = COPY [[MOV32r0_]].sub_16bit @@ -141,9 +141,9 @@ body: | ; CHECK: $ax = COPY [[COPY]] ; CHECK: RET 0, implicit $ax %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load 2 from %fixed-stack.1, align 16) + %0:gpr(s16) = G_LOAD %2(p0) :: (invariant load (s16) from %fixed-stack.1, align 16) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load 2 from %fixed-stack.0, align 4) + %1:gpr(s16) = G_LOAD %3(p0) :: (invariant load (s16) from %fixed-stack.0, align 4) %4:gpr(s16) = G_UREM %0, %1 $ax = COPY %4(s16) RET 0, implicit $ax @@ -195,8 +195,8 @@ constants: body: | bb.1 (%ir-block.0): ; CHECK-LABEL: name: test_urem_i32 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.0, align 16) - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load 4 from %fixed-stack.1) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.0, align 16) + ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (invariant load (s32) from %fixed-stack.1) ; CHECK: $eax = COPY [[MOV32rm]] ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags ; CHECK: $edx = COPY [[MOV32r0_]] @@ -205,9 +205,9 @@ body: | ; CHECK: $eax = COPY [[COPY]] ; CHECK: RET 0, implicit $eax %2:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 - %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 16) + %0:gpr(s32) = G_LOAD %2(p0) :: (invariant load (s32) from %fixed-stack.1, align 16) %3:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 4) + %1:gpr(s32) = G_LOAD %3(p0) :: (invariant load (s32) from %fixed-stack.0, align 4) %4:gpr(s32) = G_UREM %0, %1 $eax = COPY %4(s32) RET 0, implicit $eax diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll index b6758c8cca0e4..8decd9397c36a 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll @@ -17,9 +17,9 @@ define float @test_return_f1(float %f.coerce) { ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.f - ; ALL: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.coerce.dive2) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 4 from %ir.coerce.dive13) + ; ALL: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.coerce.dive2) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) + ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.coerce.dive13) ; ALL: $xmm0 = COPY [[LOAD]](s32) ; ALL: RET 0, implicit $xmm0 entry: @@ -45,9 +45,9 @@ define double @test_return_d1(double %d.coerce) { ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d - ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.coerce.dive2) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 8), (load 1 from %ir.1, align 8) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.coerce.dive13) + ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.coerce.dive2) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 8), (load (s8) from %ir.1, align 8) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.coerce.dive13) ; ALL: $xmm0 = COPY [[LOAD]](s64) ; ALL: RET 0, implicit $xmm0 entry: @@ -72,14 +72,14 @@ define { double, double } @test_return_d2(double %d.coerce0, double %d.coerce1) ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d - ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.1) + ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1) ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.2) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.3, align 8), (load 1 from %ir.4, align 8) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.5) + ; ALL: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5) ; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 8 from %ir.5 + 8) + ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8) ; ALL: $xmm0 = COPY [[LOAD]](s64) ; ALL: $xmm1 = COPY [[LOAD1]](s64) ; ALL: RET 0, implicit $xmm0, implicit $xmm1 @@ -107,9 +107,9 @@ define i32 @test_return_i1(i32 %i.coerce) { ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i - ; ALL: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.coerce.dive2) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 4 from %ir.coerce.dive13) + ; ALL: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.coerce.dive2) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) + ; ALL: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.coerce.dive13) ; ALL: $eax = COPY [[LOAD]](s32) ; ALL: RET 0, implicit $eax entry: @@ -133,9 +133,9 @@ define i64 @test_return_i2(i64 %i.coerce) { ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i - ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.0, align 4) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.1, align 4), (load 1 from %ir.2, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.3, align 4) + ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.0, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 4), (load (s8) from %ir.2, align 4) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.3, align 4) ; ALL: $rax = COPY [[LOAD]](s64) ; ALL: RET 0, implicit $rax entry: @@ -162,16 +162,16 @@ define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) { ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.2.coerce ; ALL: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp - ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store 8 into %ir.0, align 4) + ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.0, align 4) ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) - ; ALL: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store 4 into %ir.1) - ; ALL: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store 1 into %ir.2, align 4), (load 1 from %ir.3, align 4) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.4, align 4), (load 1 from %ir.5, align 4) - ; ALL: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store 1 into %ir.6, align 8), (load 1 from %ir.7, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load 8 from %ir.tmp) + ; ALL: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.1) + ; ALL: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp) ; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) - ; ALL: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 4 from %ir.tmp + 8, align 8) + ; ALL: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) ; ALL: $rax = COPY [[LOAD]](s64) ; ALL: $edx = COPY [[LOAD1]](s32) ; ALL: RET 0, implicit $rax, implicit $edx @@ -206,14 +206,14 @@ define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) { ; ALL: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; ALL: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i - ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.1, align 4) + ; ALL: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1, align 4) ; ALL: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.2, align 4) - ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.3, align 4), (load 1 from %ir.4, align 4) - ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.5, align 4) + ; ALL: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2, align 4) + ; ALL: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4) + ; ALL: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4) ; ALL: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 8 from %ir.5 + 8, align 4) + ; ALL: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) ; ALL: $rax = COPY [[LOAD]](s64) ; ALL: $rdx = COPY [[LOAD1]](s64) ; ALL: RET 0, implicit $rax, implicit $rdx diff --git a/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir b/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir index bf63e2b038c19..15d90d6355f4a 100644 --- a/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir +++ b/llvm/test/CodeGen/X86/StackColoring-use-between-allocas.mir @@ -148,8 +148,8 @@ machineFunctionInfo: {} body: | bb.0.entry: ; CHECK-LABEL: name: _Z1gv - ; CHECK: [[MOVSDrm:%[0-9]+]]:vr128 = MOVSDrm $rip, 1, $noreg, @i, $noreg :: (dereferenceable load 8 from `<2 x float>* bitcast (%class.d* @i to <2 x float>*)`) - ; CHECK: [[MOVSDrm1:%[0-9]+]]:vr128 = MOVSDrm $rip, 1, $noreg, @i + 8, $noreg :: (dereferenceable load 8 from `<2 x float>* bitcast (float* getelementptr inbounds (%class.d, %class.d* @i, i64 0, i32 0, i64 2) to <2 x float>*)`) + ; CHECK: [[MOVSDrm:%[0-9]+]]:vr128 = MOVSDrm $rip, 1, $noreg, @i, $noreg :: (dereferenceable load (s64) from `<2 x float>* bitcast (%class.d* @i to <2 x float>*)`) + ; CHECK: [[MOVSDrm1:%[0-9]+]]:vr128 = MOVSDrm $rip, 1, $noreg, @i + 8, $noreg :: (dereferenceable load (s64) from `<2 x float>* bitcast (float* getelementptr inbounds (%class.d, %class.d* @i, i64 0, i32 0, i64 2) to <2 x float>*)`) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: $xmm0 = COPY [[MOVSDrm]] ; CHECK: $xmm1 = COPY [[MOVSDrm1]] @@ -157,8 +157,8 @@ body: | ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY $xmm1 - ; CHECK: MOVLPDmr %stack.3.tmp, 1, $noreg, 0, $noreg, [[COPY]] :: (store 8 into %ir.3) - ; CHECK: MOVLPDmr %stack.3.tmp, 1, $noreg, 8, $noreg, [[COPY1]] :: (store 8 into %ir.5) + ; CHECK: MOVLPDmr %stack.3.tmp, 1, $noreg, 0, $noreg, [[COPY]] :: (store (s64) into %ir.3) + ; CHECK: MOVLPDmr %stack.3.tmp, 1, $noreg, 8, $noreg, [[COPY1]] :: (store (s64) into %ir.5) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0.a, 1, $noreg, 0, $noreg ; CHECK: [[LEA64r1:%[0-9]+]]:gr64 = LEA64r %stack.3.tmp, 1, $noreg, 0, $noreg @@ -176,8 +176,8 @@ body: | ; CHECK: RET 0 LIFETIME_START %stack.0.a LIFETIME_START %stack.1.ref.tmp - %0:vr128 = MOVSDrm $rip, 1, $noreg, @i, $noreg :: (dereferenceable load 8 from `<2 x float>* bitcast (%class.d* @i to <2 x float>*)`) - %1:vr128 = MOVSDrm $rip, 1, $noreg, @i + 8, $noreg :: (dereferenceable load 8 from `<2 x float>* bitcast (float* getelementptr inbounds (%class.d, %class.d* @i, i64 0, i32 0, i64 2) to <2 x float>*)`) + %0:vr128 = MOVSDrm $rip, 1, $noreg, @i, $noreg :: (dereferenceable load (s64) from `<2 x float>* bitcast (%class.d* @i to <2 x float>*)`) + %1:vr128 = MOVSDrm $rip, 1, $noreg, @i + 8, $noreg :: (dereferenceable load (s64) from `<2 x float>* bitcast (float* getelementptr inbounds (%class.d, %class.d* @i, i64 0, i32 0, i64 2) to <2 x float>*)`) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $xmm0 = COPY %0 $xmm1 = COPY %1 @@ -185,8 +185,8 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %2:vr128 = COPY $xmm0 %3:vr128 = COPY $xmm1 - MOVLPDmr %stack.1.ref.tmp, 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.2) - MOVLPDmr %stack.1.ref.tmp, 1, $noreg, 8, $noreg, %3 :: (store 8 into %ir.4) + MOVLPDmr %stack.1.ref.tmp, 1, $noreg, 0, $noreg, %2 :: (store (s64) into %ir.2) + MOVLPDmr %stack.1.ref.tmp, 1, $noreg, 8, $noreg, %3 :: (store (s64) into %ir.4) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %4:gr64 = LEA64r %stack.0.a, 1, $noreg, 0, $noreg %5:gr64 = LEA64r %stack.1.ref.tmp, 1, $noreg, 0, $noreg diff --git a/llvm/test/CodeGen/X86/adx-commute.mir b/llvm/test/CodeGen/X86/adx-commute.mir index e2023c12293b9..1f74abeb64a4b 100644 --- a/llvm/test/CodeGen/X86/adx-commute.mir +++ b/llvm/test/CodeGen/X86/adx-commute.mir @@ -83,7 +83,7 @@ body: | ; CHECK: dead [[COPY3]].sub_8bit:gr32 = ADD8ri [[COPY3]].sub_8bit, -1, implicit-def $eflags ; CHECK: [[ADCX32rr:%[0-9]+]]:gr32 = ADCX32rr [[ADCX32rr]], [[COPY2]], implicit-def dead $eflags, implicit killed $eflags ; CHECK: [[IMUL32rr:%[0-9]+]]:gr32 = IMUL32rr [[IMUL32rr]], [[COPY2]], implicit-def dead $eflags - ; CHECK: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[IMUL32rr]] :: (store 4 into %ir.res) + ; CHECK: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[IMUL32rr]] :: (store (s32) into %ir.res) ; CHECK: RET 0 %3:gr64 = COPY killed $rcx %2:gr32 = COPY killed $edx @@ -93,7 +93,7 @@ body: | dead %5:gr8 = ADD8ri killed %4, -1, implicit-def $eflags %6:gr32 = ADCX32rr %1, killed %2, implicit-def dead $eflags, implicit killed $eflags %7:gr32 = IMUL32rr killed %1, killed %6, implicit-def dead $eflags - MOV32mr killed %3, 1, $noreg, 0, $noreg, killed %7 :: (store 4 into %ir.res) + MOV32mr killed %3, 1, $noreg, 0, $noreg, killed %7 :: (store (s32) into %ir.res) RET 0 ... @@ -128,7 +128,7 @@ body: | ; CHECK: dead [[COPY3]].sub_8bit:gr32 = ADD8ri [[COPY3]].sub_8bit, -1, implicit-def $eflags ; CHECK: [[ADCX64rr:%[0-9]+]]:gr64 = ADCX64rr [[ADCX64rr]], [[COPY2]], implicit-def dead $eflags, implicit killed $eflags ; CHECK: [[IMUL64rr:%[0-9]+]]:gr64 = IMUL64rr [[IMUL64rr]], [[COPY2]], implicit-def dead $eflags - ; CHECK: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[IMUL64rr]] :: (store 8 into %ir.res) + ; CHECK: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[IMUL64rr]] :: (store (s64) into %ir.res) ; CHECK: RET 0 %3:gr64 = COPY killed $rcx %2:gr64 = COPY killed $rdx @@ -138,7 +138,7 @@ body: | dead %5:gr8 = ADD8ri killed %4, -1, implicit-def $eflags %6:gr64 = ADCX64rr %1, killed %2, implicit-def dead $eflags, implicit killed $eflags %7:gr64 = IMUL64rr killed %1, killed %6, implicit-def dead $eflags - MOV64mr killed %3, 1, $noreg, 0, $noreg, killed %7 :: (store 8 into %ir.res) + MOV64mr killed %3, 1, $noreg, 0, $noreg, killed %7 :: (store (s64) into %ir.res) RET 0 ... @@ -173,7 +173,7 @@ body: | ; CHECK: dead [[COPY3]].sub_8bit:gr32 = ADD8ri [[COPY3]].sub_8bit, 127, implicit-def $eflags ; CHECK: [[ADOX32rr:%[0-9]+]]:gr32 = ADOX32rr [[ADOX32rr]], [[COPY2]], implicit-def dead $eflags, implicit killed $eflags ; CHECK: [[IMUL32rr:%[0-9]+]]:gr32 = IMUL32rr [[IMUL32rr]], [[COPY2]], implicit-def dead $eflags - ; CHECK: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[IMUL32rr]] :: (store 4 into %ir.res) + ; CHECK: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[IMUL32rr]] :: (store (s32) into %ir.res) ; CHECK: RET 0 %3:gr64 = COPY killed $rcx %2:gr32 = COPY killed $edx @@ -183,7 +183,7 @@ body: | dead %5:gr8 = ADD8ri killed %4, 127, implicit-def $eflags %6:gr32 = ADOX32rr %1, killed %2, implicit-def dead $eflags, implicit killed $eflags %7:gr32 = IMUL32rr killed %1, killed %6, implicit-def dead $eflags - MOV32mr killed %3, 1, $noreg, 0, $noreg, killed %7 :: (store 4 into %ir.res) + MOV32mr killed %3, 1, $noreg, 0, $noreg, killed %7 :: (store (s32) into %ir.res) RET 0 ... @@ -218,7 +218,7 @@ body: | ; CHECK: dead [[COPY3]].sub_8bit:gr32 = ADD8ri [[COPY3]].sub_8bit, 127, implicit-def $eflags ; CHECK: [[ADOX64rr:%[0-9]+]]:gr64 = ADOX64rr [[ADOX64rr]], [[COPY2]], implicit-def dead $eflags, implicit killed $eflags ; CHECK: [[IMUL64rr:%[0-9]+]]:gr64 = IMUL64rr [[IMUL64rr]], [[COPY2]], implicit-def dead $eflags - ; CHECK: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[IMUL64rr]] :: (store 8 into %ir.res) + ; CHECK: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, [[IMUL64rr]] :: (store (s64) into %ir.res) ; CHECK: RET 0 %3:gr64 = COPY killed $rcx %2:gr64 = COPY killed $rdx @@ -228,7 +228,7 @@ body: | dead %5:gr8 = ADD8ri killed %4, 127, implicit-def $eflags %6:gr64 = ADOX64rr %1, killed %2, implicit-def dead $eflags, implicit killed $eflags %7:gr64 = IMUL64rr killed %1, killed %6, implicit-def dead $eflags - MOV64mr killed %3, 1, $noreg, 0, $noreg, killed %7 :: (store 8 into %ir.res) + MOV64mr killed %3, 1, $noreg, 0, $noreg, killed %7 :: (store (s64) into %ir.res) RET 0 ... diff --git a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir index a18edd2c5d248..fee6de870e3d6 100644 --- a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir +++ b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir @@ -144,10 +144,10 @@ body: | %0:gr64 = COPY $rdi DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !25 DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25 - MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !27 :: (store 1 into %ir.0) - %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !28 :: (load 16 from %ir.p1) + MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !27 :: (store (s8) into %ir.0) + %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !28 :: (load (s128) from %ir.p1) DBG_VALUE %2, $noreg, !24, !DIExpression(), debug-location !25 - MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !29 :: (store 16 into %ir.p2) + MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !29 :: (store (s128) into %ir.p2) RET 0, debug-location !30 ... @@ -197,9 +197,9 @@ body: | %1:gr64 = COPY $rsi %0:gr64 = COPY $rdi - MOV8mi %0, 1, $noreg, 0, $noreg, 0 :: (store 1 into %ir.0) - %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1) - MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2 :: (store 16 into %ir.p2) + MOV8mi %0, 1, $noreg, 0, $noreg, 0 :: (store (s8) into %ir.0) + %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg :: (load (s128) from %ir.p1) + MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2 :: (store (s128) into %ir.p2) RET 0 ; DEBUG-LABEL: name: debug diff --git a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change2.mir b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change2.mir index a7e1fd71ef45a..4b54a0e1b09ee 100644 --- a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change2.mir +++ b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change2.mir @@ -155,7 +155,7 @@ body: | %0:gr64 = COPY $rdi DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !25 DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25 - MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !27 :: (store 1 into %ir.0) + MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !27 :: (store (s8) into %ir.0) CFI_INSTRUCTION offset $r13, -123 DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !25 DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25 @@ -175,9 +175,9 @@ body: | DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25 DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !25 DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25 - %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !28 :: (load 16 from %ir.p1) + %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !28 :: (load (s128) from %ir.p1) DBG_VALUE %2, $noreg, !24, !DIExpression(), debug-location !25 - MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !29 :: (store 16 into %ir.p2) + MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !29 :: (store (s128) into %ir.p2) RET 0, debug-location !30 ; CHECK-LABEL: name: debug diff --git a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change3.mir b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change3.mir index aca00630b5150..7cd54abb7b3bb 100644 --- a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change3.mir +++ b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change3.mir @@ -175,7 +175,7 @@ body: | DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !27 TEST64rr %0, %0, implicit-def $eflags, debug-location !28 DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !27 - MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !30 :: (store 1 into %ir.0) + MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !30 :: (store (s8) into %ir.0) CFI_INSTRUCTION offset $r13, -123 DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !27 DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !27 @@ -201,9 +201,9 @@ body: | bb.1.if.then: successors: %bb.2(0x80000000) - %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !32 :: (load 16 from %ir.p1) + %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !32 :: (load (s128) from %ir.p1) DBG_VALUE %2, $noreg, !24, !DIExpression(), debug-location !33 - MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !34 :: (store 16 into %ir.p2) + MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !34 :: (store (s128) into %ir.p2) bb.2.if.end: RET 0, debug-location !36 diff --git a/llvm/test/CodeGen/X86/avoid-sfb-kill-flags.mir b/llvm/test/CodeGen/X86/avoid-sfb-kill-flags.mir index 9ae885c94c6e9..14466ec271373 100644 --- a/llvm/test/CodeGen/X86/avoid-sfb-kill-flags.mir +++ b/llvm/test/CodeGen/X86/avoid-sfb-kill-flags.mir @@ -43,22 +43,22 @@ liveins: body: | bb.0.entry: liveins: $rdi, $rsi, $rcx - ; CHECK: MOV32mi %0, 1, $noreg, 0, $noreg, 0 :: (store 4 into %ir.a2) - ; CHECK-NEXT: MOV32mi %3, 1, $noreg, 0, $noreg, 1 :: (store 4 into %ir.a13) - ; CHECK-NEXT: %5:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load 4 from %ir.1) - ; CHECK-NEXT: MOV32mr %1, 1, $noreg, 0, $noreg, killed %5 :: (store 4 into %ir.0) - ; CHECK-NEXT: %6:gr64 = MOV64rm %0, 1, $noreg, 4, $noreg :: (load 8 from %ir.1 + 4, align 4) - ; CHECK-NEXT: MOV64mr %1, 1, $noreg, 4, $noreg, killed %6 :: (store 8 into %ir.0 + 4, align 4) - ; CHECK-NEXT: %7:gr32 = MOV32rm killed %0, 1, $noreg, 12, $noreg :: (load 4 from %ir.1 + 12) - ; CHECK-NEXT: MOV32mr killed %1, 1, $noreg, 12, $noreg, killed %7 :: (store 4 into %ir.0 + 12) + ; CHECK: MOV32mi %0, 1, $noreg, 0, $noreg, 0 :: (store (s32) into %ir.a2) + ; CHECK-NEXT: MOV32mi %3, 1, $noreg, 0, $noreg, 1 :: (store (s32) into %ir.a13) + ; CHECK-NEXT: %5:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load (s32) from %ir.1) + ; CHECK-NEXT: MOV32mr %1, 1, $noreg, 0, $noreg, killed %5 :: (store (s32) into %ir.0) + ; CHECK-NEXT: %6:gr64 = MOV64rm %0, 1, $noreg, 4, $noreg :: (load (s64) from %ir.1 + 4, align 4) + ; CHECK-NEXT: MOV64mr %1, 1, $noreg, 4, $noreg, killed %6 :: (store (s64) into %ir.0 + 4, align 4) + ; CHECK-NEXT: %7:gr32 = MOV32rm killed %0, 1, $noreg, 12, $noreg :: (load (s32) from %ir.1 + 12) + ; CHECK-NEXT: MOV32mr killed %1, 1, $noreg, 12, $noreg, killed %7 :: (store (s32) into %ir.0 + 12) %3:gr64 = COPY $rcx %1:gr64 = COPY $rsi %0:gr64 = COPY $rdi - MOV32mi %0, 1, $noreg, 0, $noreg, 0 :: (store 4 into %ir.a2) - MOV32mi %3, 1, $noreg, 0, $noreg, 1 :: (store 4 into %ir.a13) - %4:vr128 = MOVUPSrm killed %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.1, align 4) - MOVUPSmr killed %1, 1, $noreg, 0, $noreg, killed %4 :: (store 16 into %ir.0, align 4) + MOV32mi %0, 1, $noreg, 0, $noreg, 0 :: (store (s32) into %ir.a2) + MOV32mi %3, 1, $noreg, 0, $noreg, 1 :: (store (s32) into %ir.a13) + %4:vr128 = MOVUPSrm killed %0, 1, $noreg, 0, $noreg :: (load (s128) from %ir.1, align 4) + MOVUPSmr killed %1, 1, $noreg, 0, $noreg, killed %4 :: (store (s128) into %ir.0, align 4) RET 0 ... diff --git a/llvm/test/CodeGen/X86/avoid-sfb-offset.mir b/llvm/test/CodeGen/X86/avoid-sfb-offset.mir index 314c1e9bf9399..6fd3f45d2942d 100644 --- a/llvm/test/CodeGen/X86/avoid-sfb-offset.mir +++ b/llvm/test/CodeGen/X86/avoid-sfb-offset.mir @@ -4,9 +4,9 @@ source_filename = "nice.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" - + @.str = private unnamed_addr constant [3 x i8] c"%u\00", align 1 - + define i32 @test_offset() #0 { entry: %a = alloca [36 x i32], align 16 @@ -21,13 +21,13 @@ call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 %0, i8* nonnull align 4 %scevgep40, i64 136, i1 false) ret i32 %1 } - + ; Function Attrs: argmemonly nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 - + ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #2 - + attributes #0 = { "target-cpu"="core-avx2" } attributes #1 = { argmemonly nounwind "target-cpu"="core-avx2" } attributes #2 = { nounwind } @@ -42,7 +42,7 @@ regBankSelected: false selected: false failedISel: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gr32, preferred-register: '' } - { id: 1, class: gr32, preferred-register: '' } - { id: 2, class: vr256, preferred-register: '' } @@ -50,8 +50,8 @@ registers: - { id: 4, class: vr256, preferred-register: '' } - { id: 5, class: gr64, preferred-register: '' } - { id: 6, class: vr256, preferred-register: '' } -liveins: -frameInfo: +liveins: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -69,38 +69,38 @@ frameInfo: localFrameSize: 0 savePoint: '' restorePoint: '' -fixedStack: -stack: - - { id: 0, name: a, type: default, offset: 0, size: 144, alignment: 16, +fixedStack: +stack: + - { id: 0, name: a, type: default, offset: 0, size: 144, alignment: 16, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: z, type: default, offset: 0, size: 144, alignment: 16, + - { id: 1, name: z, type: default, offset: 0, size: 144, alignment: 16, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -constants: +constants: body: | bb.0.entry: - %0:gr32 = MOV32rm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load 4 from %ir.arrayidx.9) + %0:gr32 = MOV32rm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load (s32) from %ir.arrayidx.9) %1:gr32 = ADD32ri8 %0, 9, implicit-def dead $eflags - MOV32mr %stack.0.a, 1, $noreg, 36, $noreg, killed %1 :: (store 4 into %ir.arrayidx.9) - %2:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 4, $noreg :: (dereferenceable load 32 from %ir.scevgep40, align 4) - VMOVUPSYmr %stack.1.z, 1, $noreg, 0, $noreg, killed %2 :: (store 32 into %ir.0, align 16) - %3:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 68, $noreg :: (dereferenceable load 32 from %ir.scevgep40 + 64, align 4) - VMOVUPSYmr %stack.1.z, 1, $noreg, 64, $noreg, killed %3 :: (store 32 into %ir.0 + 64, align 16) - %4:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 100, $noreg :: (dereferenceable load 32 from %ir.scevgep40 + 96, align 4) - VMOVUPSYmr %stack.1.z, 1, $noreg, 96, $noreg, killed %4 :: (store 32 into %ir.0 + 96, align 16) - %5:gr64 = MOV64rm %stack.0.a, 1, $noreg, 132, $noreg :: (dereferenceable load 8 from %ir.scevgep40 + 128, align 4) - MOV64mr %stack.1.z, 1, $noreg, 128, $noreg, killed %5 :: (store 8 into %ir.0 + 128, align 16) - ; CHECK: gr32 = MOV32rm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load 4 from %ir.scevgep40 + 32) - ; CHECK-NEXT: MOV32mr %stack.1.z, 1, $noreg, 32, $noreg, killed %7 :: (store 4 into %ir.0 + 32, align 16) - ; CHECK-NEXT: %8:vr128 = VMOVUPSrm %stack.0.a, 1, $noreg, 40, $noreg :: (dereferenceable load 16 from %ir.scevgep40 + 36, align 4) - ; CHECK-NEXT: VMOVUPSmr %stack.1.z, 1, $noreg, 36, $noreg, killed %8 :: (store 16 into %ir.0 + 36, align 4, basealign 16) - ; CHECK-NEXT: %9:gr64 = MOV64rm %stack.0.a, 1, $noreg, 56, $noreg :: (dereferenceable load 8 from %ir.scevgep40 + 52, align 4) - ; CHECK-NEXT: MOV64mr %stack.1.z, 1, $noreg, 52, $noreg, killed %9 :: (store 8 into %ir.0 + 52, align 4, basealign 16) - ; CHECK-NEXT: %10:gr32 = MOV32rm %stack.0.a, 1, $noreg, 64, $noreg :: (dereferenceable load 4 from %ir.scevgep40 + 60) - ; CHECK-NEXT: MOV32mr %stack.1.z, 1, $noreg, 60, $noreg, killed %10 :: (store 4 into %ir.0 + 60, basealign 16) - %6:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load 32 from %ir.scevgep40 + 32, align 4) - VMOVUPSYmr %stack.1.z, 1, $noreg, 32, $noreg, killed %6 :: (store 32 into %ir.0 + 32, align 16) + MOV32mr %stack.0.a, 1, $noreg, 36, $noreg, killed %1 :: (store (s32) into %ir.arrayidx.9) + %2:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 4, $noreg :: (dereferenceable load (s256) from %ir.scevgep40, align 4) + VMOVUPSYmr %stack.1.z, 1, $noreg, 0, $noreg, killed %2 :: (store (s256) into %ir.0, align 16) + %3:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 68, $noreg :: (dereferenceable load (s256) from %ir.scevgep40 + 64, align 4) + VMOVUPSYmr %stack.1.z, 1, $noreg, 64, $noreg, killed %3 :: (store (s256) into %ir.0 + 64, align 16) + %4:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 100, $noreg :: (dereferenceable load (s256) from %ir.scevgep40 + 96, align 4) + VMOVUPSYmr %stack.1.z, 1, $noreg, 96, $noreg, killed %4 :: (store (s256) into %ir.0 + 96, align 16) + %5:gr64 = MOV64rm %stack.0.a, 1, $noreg, 132, $noreg :: (dereferenceable load (s64) from %ir.scevgep40 + 128, align 4) + MOV64mr %stack.1.z, 1, $noreg, 128, $noreg, killed %5 :: (store (s64) into %ir.0 + 128, align 16) + ; CHECK: gr32 = MOV32rm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load (s32) from %ir.scevgep40 + 32) + ; CHECK-NEXT: MOV32mr %stack.1.z, 1, $noreg, 32, $noreg, killed %7 :: (store (s32) into %ir.0 + 32, align 16) + ; CHECK-NEXT: %8:vr128 = VMOVUPSrm %stack.0.a, 1, $noreg, 40, $noreg :: (dereferenceable load (s128) from %ir.scevgep40 + 36, align 4) + ; CHECK-NEXT: VMOVUPSmr %stack.1.z, 1, $noreg, 36, $noreg, killed %8 :: (store (s128) into %ir.0 + 36, align 4, basealign 16) + ; CHECK-NEXT: %9:gr64 = MOV64rm %stack.0.a, 1, $noreg, 56, $noreg :: (dereferenceable load (s64) from %ir.scevgep40 + 52, align 4) + ; CHECK-NEXT: MOV64mr %stack.1.z, 1, $noreg, 52, $noreg, killed %9 :: (store (s64) into %ir.0 + 52, align 4, basealign 16) + ; CHECK-NEXT: %10:gr32 = MOV32rm %stack.0.a, 1, $noreg, 64, $noreg :: (dereferenceable load (s32) from %ir.scevgep40 + 60) + ; CHECK-NEXT: MOV32mr %stack.1.z, 1, $noreg, 60, $noreg, killed %10 :: (store (s32) into %ir.0 + 60, basealign 16) + %6:vr256 = VMOVUPSYrm %stack.0.a, 1, $noreg, 36, $noreg :: (dereferenceable load (s256) from %ir.scevgep40 + 32, align 4) + VMOVUPSYmr %stack.1.z, 1, $noreg, 32, $noreg, killed %6 :: (store (s256) into %ir.0 + 32, align 16) $eax = COPY %0 RET 0, $eax diff --git a/llvm/test/CodeGen/X86/avx512f-256-set0.mir b/llvm/test/CodeGen/X86/avx512f-256-set0.mir index de240a6f37676..48a355c20a7ad 100644 --- a/llvm/test/CodeGen/X86/avx512f-256-set0.mir +++ b/llvm/test/CodeGen/X86/avx512f-256-set0.mir @@ -62,10 +62,10 @@ body: | bb.0.bb0: ; CHECK-LABEL: name: main ; CHECK: $zmm16 = VPXORDZrr undef $zmm16, undef $zmm16 - ; CHECK: VMOVAPSZmr $rip, 1, $noreg, @tst_, $noreg, killed renamable $zmm16 :: (store 32 into %ir.lsr.iv1, align 64) + ; CHECK: VMOVAPSZmr $rip, 1, $noreg, @tst_, $noreg, killed renamable $zmm16 :: (store (s256) into %ir.lsr.iv1, align 64) ; CHECK: RET 0 renamable $ymm16 = AVX512_256_SET0 - VMOVAPSZmr $rip, 1, $noreg, @tst_, $noreg, killed renamable $zmm16 :: (store 32 into %ir.lsr.iv1, align 64) + VMOVAPSZmr $rip, 1, $noreg, @tst_, $noreg, killed renamable $zmm16 :: (store (s256) into %ir.lsr.iv1, align 64) RET 0 ... diff --git a/llvm/test/CodeGen/X86/bad-tls-fold.mir b/llvm/test/CodeGen/X86/bad-tls-fold.mir index 597a83d893890..fbf4b30d9f4e3 100644 --- a/llvm/test/CodeGen/X86/bad-tls-fold.mir +++ b/llvm/test/CodeGen/X86/bad-tls-fold.mir @@ -28,11 +28,11 @@ registers: - { id: 4, class: gr32 } body: | bb.0.entry: - %0:gr64 = MOV64rm $rip, 1, $noreg, @x, $noreg :: (load 8) + %0:gr64 = MOV64rm $rip, 1, $noreg, @x, $noreg :: (load (s64)) %1:gr64 = OR64ri8 %0, 7, implicit-def dead $eflags - %2:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @i, $noreg :: (load 8) + %2:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @i, $noreg :: (load (s64)) %3:gr64 = OR64rr %2, %1, implicit-def dead $eflags - %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $fs :: (load 4) + %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $fs :: (load (s32)) ; CHECK-NOT: orq {{.*}}GOTTPOFF{{.*}} ; ; What we actually expect: @@ -58,11 +58,11 @@ registers: - { id: 4, class: gr32 } body: | bb.0.entry: - %0:gr64 = MOV64rm $rip, 1, $noreg, @x, $noreg :: (load 8) + %0:gr64 = MOV64rm $rip, 1, $noreg, @x, $noreg :: (load (s64)) %1:gr64 = OR64ri8 %0, 7, implicit-def dead $eflags - %2:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @i, $noreg :: (load 8) + %2:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @i, $noreg :: (load (s64)) %3:gr64 = AND64rr %2, %1, implicit-def dead $eflags - %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $fs :: (load 4) + %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $fs :: (load (s32)) ; CHECK-NOT: andq {{.*}}GOTTPOFF{{.*}} ; ; What we actually expect: diff --git a/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir index ae67f16e85f29..3006647f6bae5 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir @@ -97,25 +97,25 @@ body: | $rbp = frame-setup MOV64rr $rsp CFI_INSTRUCTION def_cfa_register $rbp renamable $dil = AND8ri renamable $dil, 1, implicit-def dead $eflags, implicit killed $edi, implicit-def $edi - MOV8mr $rbp, 1, $noreg, -1, $noreg, renamable $dil, implicit killed $edi :: (store 1 into %ir.3) - TEST8mi $rbp, 1, $noreg, -1, $noreg, 1, implicit-def $eflags :: (load 1 from %ir.3) + MOV8mr $rbp, 1, $noreg, -1, $noreg, renamable $dil, implicit killed $edi :: (store (s8) into %ir.3) + TEST8mi $rbp, 1, $noreg, -1, $noreg, 1, implicit-def $eflags :: (load (s8) from %ir.3) JCC_1 %bb.2, 4, implicit killed $eflags JMP_1 %bb.1 bb.1 (%ir-block.7, bbsections 1): successors: %bb.3(0x80000000) - MOV32mi $rbp, 1, $noreg, -8, $noreg, 1 :: (store 4 into %ir.2) + MOV32mi $rbp, 1, $noreg, -8, $noreg, 1 :: (store (s32) into %ir.2) JMP_1 %bb.3 bb.2 (%ir-block.8, bbsections 2): successors: %bb.3(0x80000000) - MOV32mi $rbp, 1, $noreg, -8, $noreg, 0 :: (store 4 into %ir.2) + MOV32mi $rbp, 1, $noreg, -8, $noreg, 0 :: (store (s32) into %ir.2) JMP_1 %bb.3 bb.3 (%ir-block.9, bbsections 3): - renamable $eax = MOV32rm $rbp, 1, $noreg, -8, $noreg :: (load 4 from %ir.2) + renamable $eax = MOV32rm $rbp, 1, $noreg, -8, $noreg :: (load (s32) from %ir.2) $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp CFI_INSTRUCTION def_cfa $rsp, 8 RETQ implicit $eax diff --git a/llvm/test/CodeGen/X86/block-placement.mir b/llvm/test/CodeGen/X86/block-placement.mir index 1d661687d2551..315669403c51f 100644 --- a/llvm/test/CodeGen/X86/block-placement.mir +++ b/llvm/test/CodeGen/X86/block-placement.mir @@ -46,7 +46,7 @@ liveins: - { reg: '$rdi' } - { reg: '$esi' } -# CHECK: $eax = FAULTING_OP 1, %bb.3, 1684, killed $rdi, 1, $noreg, 0, $noreg :: (load 4 from %ir.ptr) +# CHECK: $eax = FAULTING_OP 1, %bb.3, 1684, killed $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) # CHECK-NEXT: JMP_1 %bb.2 # CHECK: bb.3.null: # CHECK: bb.4.right: @@ -66,7 +66,7 @@ body: | successors: %bb.2(0x7ffff800), %bb.4(0x00000800) liveins: $rdi - $eax = FAULTING_OP 1, %bb.2, 1684, killed $rdi, 1, $noreg, 0, $noreg :: (load 4 from %ir.ptr) + $eax = FAULTING_OP 1, %bb.2, 1684, killed $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) JMP_1 %bb.4 bb.4.not_null: diff --git a/llvm/test/CodeGen/X86/bug47278.mir b/llvm/test/CodeGen/X86/bug47278.mir index 4e8b0a69c0d2c..c7387c915972c 100644 --- a/llvm/test/CodeGen/X86/bug47278.mir +++ b/llvm/test/CodeGen/X86/bug47278.mir @@ -11,9 +11,9 @@ body: | bb.0: ; CHECK-LABEL: name: foo ; CHECK: renamable $eax = IMPLICIT_DEF - ; CHECK: renamable $edx = MOVZX32rm8 renamable $eax, 1, $noreg, 0, $noreg :: (load 1 from `i168* undef` + 20, align 4, basealign 16) - ; CHECK: dead renamable $ecx = MOV32rm renamable $eax, 1, $noreg, 0, $noreg :: (load 4 from `i168* undef` + 12, basealign 16) - ; CHECK: renamable $al = MOV8rm killed renamable $eax, 1, $noreg, 0, $noreg :: (load 1 from `i32* undef`, align 4) + ; CHECK: renamable $edx = MOVZX32rm8 renamable $eax, 1, $noreg, 0, $noreg :: (load (s8) from `i168* undef` + 20, align 4, basealign 16) + ; CHECK: dead renamable $ecx = MOV32rm renamable $eax, 1, $noreg, 0, $noreg :: (load (s32) from `i168* undef` + 12, basealign 16) + ; CHECK: renamable $al = MOV8rm killed renamable $eax, 1, $noreg, 0, $noreg :: (load (s8) from `i32* undef`, align 4) ; CHECK: dead renamable $ecx = COPY renamable $edx ; CHECK: dead renamable $ecx = COPY renamable $edx ; CHECK: dead renamable $ecx = COPY renamable $edx @@ -26,9 +26,9 @@ body: | ; CHECK: dead renamable $eax = SHRD32rrCL renamable $eax, killed renamable $edx, implicit-def dead $eflags, implicit killed $cl ; CHECK: RETL %0:gr32 = IMPLICIT_DEF - %1:gr32 = MOVZX32rm8 %0, 1, $noreg, 0, $noreg :: (load 1 from `i168* undef` + 20, align 4, basealign 16) - %2:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load 4 from `i168* undef` + 12, basealign 16) - %3:gr8 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load 1 from `i32* undef`, align 4) + %1:gr32 = MOVZX32rm8 %0, 1, $noreg, 0, $noreg :: (load (s8) from `i168* undef` + 20, align 4, basealign 16) + %2:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load (s32) from `i168* undef` + 12, basealign 16) + %3:gr8 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load (s8) from `i32* undef`, align 4) %4:gr32 = COPY %1 %5:gr32 = COPY %1 %6:gr32 = COPY %1 diff --git a/llvm/test/CodeGen/X86/cf-opt-memops.mir b/llvm/test/CodeGen/X86/cf-opt-memops.mir index 40737a92d570e..7b63cb2fdc984 100644 --- a/llvm/test/CodeGen/X86/cf-opt-memops.mir +++ b/llvm/test/CodeGen/X86/cf-opt-memops.mir @@ -79,9 +79,9 @@ body: | bb.0.entry: ADJCALLSTACKDOWN64 24, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %0:gr64 = COPY $rsp - MOV64mi32 %0, 1, $noreg, 16, $noreg, @.str.16 :: (store 8 into stack + 16) - MOV64mi32 %0, 1, $noreg, 8, $noreg, @.str.15 :: (store 8 into stack + 8) - MOV64mi32 %0, 1, $noreg, 0, $noreg, @.str.14 :: (store 8 into stack) + MOV64mi32 %0, 1, $noreg, 16, $noreg, @.str.16 :: (store (s64) into stack + 16) + MOV64mi32 %0, 1, $noreg, 8, $noreg, @.str.15 :: (store (s64) into stack + 8) + MOV64mi32 %0, 1, $noreg, 0, $noreg, @.str.14 :: (store (s64) into stack) %1:gr64 = MOV32ri64 @.str.8 %2:gr64 = MOV32ri64 @.str.9 %3:gr64 = MOV32ri64 @.str.10 @@ -102,8 +102,8 @@ body: | RET 0 # Call frame optimization should propagate memory operands -# CHECK: PUSH64i32 @{{.*}} :: (store 8 into stack + 16) -# CHECK: PUSH64i32 @{{.*}} :: (store 8 into stack + 8) -# CHECK: PUSH64i32 @{{.*}} :: (store 8 into stack) +# CHECK: PUSH64i32 @{{.*}} :: (store (s64) into stack + 16) +# CHECK: PUSH64i32 @{{.*}} :: (store (s64) into stack + 8) +# CHECK: PUSH64i32 @{{.*}} :: (store (s64) into stack) ... diff --git a/llvm/test/CodeGen/X86/conditional-tailcall-samedest.mir b/llvm/test/CodeGen/X86/conditional-tailcall-samedest.mir index aa6d317acc376..e2f5b67f3114b 100644 --- a/llvm/test/CodeGen/X86/conditional-tailcall-samedest.mir +++ b/llvm/test/CodeGen/X86/conditional-tailcall-samedest.mir @@ -22,9 +22,9 @@ source_filename = "t.ll" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--linux" - + @static_local_guard = external global i64, align 8 - + ; Function Attrs: optsize define void @f(i32 %arg) #0 { entry: @@ -33,38 +33,38 @@ i32 1, label %sw.bb i32 2, label %sw.bb2 ] - + sw.bb: ; preds = %entry, %entry %tmp = load atomic i8, i8* bitcast (i64* @static_local_guard to i8*) acquire, align 8 %guard.uninitialized.i = icmp eq i8 %tmp, 0 br i1 %guard.uninitialized.i, label %init.check.i, label %return, !prof !0 - + init.check.i: ; preds = %sw.bb tail call void @initialize_static_local(i64* nonnull @static_local_guard) ret void - + sw.bb2: ; preds = %entry tail call void @mergeable_conditional_tailcall() ret void - + sw.epilog: ; preds = %entry tail call void @mergeable_conditional_tailcall() ret void - + return: ; preds = %sw.bb ret void } - + declare void @mergeable_conditional_tailcall() - + declare void @initialize_static_local(i64*) - + ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #1 - + attributes #0 = { optsize } attributes #1 = { nounwind } - + !0 = !{!"branch_weights", i32 1, i32 1048575} ... @@ -76,10 +76,10 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: -liveins: +registers: +liveins: - { reg: '$edi', virtual-reg: '' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -96,43 +96,43 @@ frameInfo: hasMustTailInVarArgFunc: false savePoint: '' restorePoint: '' -fixedStack: -stack: -constants: +fixedStack: +stack: +constants: body: | bb.0.entry: successors: %bb.2(0x40000000), %bb.1(0x40000000) liveins: $edi - + CMP32ri8 killed $edi, 2, implicit-def $eflags JCC_1 %bb.2, 2, implicit $eflags JMP_1 %bb.1 - + bb.1.entry: successors: %bb.4(0x40000000), %bb.5(0x40000000) liveins: $eflags - + JCC_1 %bb.4, 4, implicit killed $eflags JMP_1 %bb.5 - + bb.2.sw.bb: successors: %bb.3(0x00000800), %bb.6(0x7ffff800) - - $al = MOV8rm $rip, 1, $noreg, @static_local_guard, $noreg :: (volatile load acquire 1 from `i8* bitcast (i64* @static_local_guard to i8*)`, align 8) + + $al = MOV8rm $rip, 1, $noreg, @static_local_guard, $noreg :: (volatile load acquire (s8) from `i8* bitcast (i64* @static_local_guard to i8*)`, align 8) TEST8rr killed $al, $al, implicit-def $eflags JCC_1 %bb.6, 5, implicit killed $eflags JMP_1 %bb.3 - + bb.3.init.check.i: dead $edi = MOV32ri @static_local_guard, implicit-def $rdi TCRETURNdi64 @initialize_static_local, 0, csr_64, implicit $rsp, implicit $rdi - + bb.4.sw.bb2: TCRETURNdi64 @mergeable_conditional_tailcall, 0, csr_64, implicit $rsp - + bb.5.sw.epilog: TCRETURNdi64 @mergeable_conditional_tailcall, 0, csr_64, implicit $rsp - + bb.6.return: RET 0 diff --git a/llvm/test/CodeGen/X86/copy-eflags-liveinlists.mir b/llvm/test/CodeGen/X86/copy-eflags-liveinlists.mir index 54454fe0017f1..35ac60453c287 100644 --- a/llvm/test/CodeGen/X86/copy-eflags-liveinlists.mir +++ b/llvm/test/CodeGen/X86/copy-eflags-liveinlists.mir @@ -46,11 +46,11 @@ fixedStack: machineFunctionInfo: {} body: | bb.0: - %4:gr32 = MOV32rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.3) - %3:gr32 = MOV32rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.4) - %7:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) - %6:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) - %5:gr8 = MOV8rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 1 from %fixed-stack.2, align 4) + %4:gr32 = MOV32rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.3) + %3:gr32 = MOV32rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.4) + %7:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0) + %6:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1) + %5:gr8 = MOV8rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s8) from %fixed-stack.2, align 4) %9:gr32 = IMPLICIT_DEF %11:gr32 = IMPLICIT_DEF @@ -77,7 +77,7 @@ body: | liveins: $eflags %18:gr8 = PHI %5, %bb.2, %17, %bb.1 - MOV8mr %6, 1, $noreg, 0, $noreg, killed %18 :: (volatile store 1 into %ir.arg3) + MOV8mr %6, 1, $noreg, 0, $noreg, killed %18 :: (volatile store (s8) into %ir.arg3) JCC_1 %bb.5, 12, implicit $eflags bb.4: diff --git a/llvm/test/CodeGen/X86/domain-reassignment.mir b/llvm/test/CodeGen/X86/domain-reassignment.mir index e24a5ded09a00..192c2c11d19d3 100644 --- a/llvm/test/CodeGen/X86/domain-reassignment.mir +++ b/llvm/test/CodeGen/X86/domain-reassignment.mir @@ -145,7 +145,7 @@ body: | ; CHECK: [[DEF1:%[0-9]+]]:vr128 = IMPLICIT_DEF ; CHECK: [[VMOVSSZrrk:%[0-9]+]]:vr128 = VMOVSSZrrk [[COPY15]], killed [[COPY14]], killed [[DEF1]], [[COPY5]] ; CHECK: [[COPY16:%[0-9]+]]:fr32x = COPY [[VMOVSSZrrk]] - ; CHECK: VMOVSSZmr [[COPY6]], 1, $noreg, 0, $noreg, killed [[COPY16]] :: (store 4 into %ir.fptr) + ; CHECK: VMOVSSZmr [[COPY6]], 1, $noreg, 0, $noreg, killed [[COPY16]] :: (store (s32) into %ir.fptr) ; CHECK: RET 0 bb.0.entry: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -196,7 +196,7 @@ body: | %21 = IMPLICIT_DEF %20 = VMOVSSZrrk %19, killed %18, killed %21, %5 %22 = COPY %20 - VMOVSSZmr %4, 1, $noreg, 0, $noreg, killed %22 :: (store 4 into %ir.fptr) + VMOVSSZmr %4, 1, $noreg, 0, $noreg, killed %22 :: (store (s32) into %ir.fptr) RET 0 ... diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll index a66b74a19066b..2ba7fbeffc40c 100644 --- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -6,9 +6,9 @@ target triple = "x86_64-apple-macosx10.13.0" define void @foo(i32* %p) !dbg !4 { bb: %tmp = load i32, i32* %p, align 4, !dbg !7 - ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p) + ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load (s32) from %ir.p) ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7 - ; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -8, $noreg, $rax :: (store 8 into %stack.0) + ; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -8, $noreg, $rax :: (store (s64) into %stack.0) ; CHECK-NEXT: SUB64ri8 renamable $rax, 3, implicit-def $eflags, debug-location !7 switch i32 %tmp, label %bb7 [ diff --git a/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir b/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir index 2b39ee1c91317..2b2192a409e5e 100644 --- a/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir +++ b/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir @@ -141,29 +141,29 @@ body: | ; CHECK: frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp, debug-location !13 ; CHECK: $rsp = frame-setup SUB64ri8 $rsp, 40, implicit-def dead $eflags ; CHECK: CFI_INSTRUCTION offset $rbx, -24 - ; CHECK: renamable $eax = MOV32rm $rbp, 1, $noreg, -12, $noreg, debug-location !13 :: (dereferenceable load 4 from %ir.a.addr) + ; CHECK: renamable $eax = MOV32rm $rbp, 1, $noreg, -12, $noreg, debug-location !13 :: (dereferenceable load (s32) from %ir.a.addr) ; CHECK: renamable $rax = KILL killed renamable $eax, debug-location !13 ; CHECK: $rcx = MOV64rr $rsp, debug-location !14 - ; CHECK: MOV64mr $rbp, 1, $noreg, -40, $noreg, $rcx :: (store 8 into %stack.4) + ; CHECK: MOV64mr $rbp, 1, $noreg, -40, $noreg, $rcx :: (store (s64) into %stack.4) ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 ; CHECK: $rsp = MOV64rr $rcx, debug-location !14 - ; CHECK: MOV64mr $rbp, 1, $noreg, -24, $noreg, killed renamable $rax, debug-location !14 :: (store 8 into %ir.__vla_expr0) + ; CHECK: MOV64mr $rbp, 1, $noreg, -24, $noreg, killed renamable $rax, debug-location !14 :: (store (s64) into %ir.__vla_expr0) ; CHECK: DBG_VALUE renamable $rcx, 0, !18, !DIExpression(), debug-location !22 - ; CHECK: MOV32mi $rbp, 1, $noreg, -28, $noreg, 0, debug-location !25 :: (store 4 into %ir.i) + ; CHECK: MOV32mi $rbp, 1, $noreg, -28, $noreg, 0, debug-location !25 :: (store (s32) into %ir.i) ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 ; CHECK: bb.1.for.cond: ; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 - ; CHECK: renamable $eax = MOV32rm $rbp, 1, $noreg, -28, $noreg, debug-location !27 :: (load 4 from %ir.i) - ; CHECK: CMP32rm killed renamable $eax, $rbp, 1, $noreg, -12, $noreg, implicit-def $eflags, debug-location !30 :: (load 4 from %ir.a.addr) + ; CHECK: renamable $eax = MOV32rm $rbp, 1, $noreg, -28, $noreg, debug-location !27 :: (load (s32) from %ir.i) + ; CHECK: CMP32rm killed renamable $eax, $rbp, 1, $noreg, -12, $noreg, implicit-def $eflags, debug-location !30 :: (load (s32) from %ir.a.addr) ; CHECK: JCC_1 %bb.4, 13, implicit killed $eflags, debug-location !31 ; CHECK: bb.2.for.body: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 - ; CHECK: $rax = MOV64rm $rbp, 1, $noreg, -40, $noreg :: (load 8 from %stack.4) - ; CHECK: renamable $edx = MOV32rm $rbp, 1, $noreg, -12, $noreg, debug-location !32 :: (load 4 from %ir.a.addr) - ; CHECK: renamable $rcx = MOVSX64rm32 $rbp, 1, $noreg, -28, $noreg, debug-location !36 :: (load 4 from %ir.i) - ; CHECK: MOV32mr renamable $rax, 4, killed renamable $rcx, 0, $noreg, killed renamable $edx, debug-location !37 :: (store 4 into %ir.arrayidx) + ; CHECK: $rax = MOV64rm $rbp, 1, $noreg, -40, $noreg :: (load (s64) from %stack.4) + ; CHECK: renamable $edx = MOV32rm $rbp, 1, $noreg, -12, $noreg, debug-location !32 :: (load (s32) from %ir.a.addr) + ; CHECK: renamable $rcx = MOVSX64rm32 $rbp, 1, $noreg, -28, $noreg, debug-location !36 :: (load (s32) from %ir.i) + ; CHECK: MOV32mr renamable $rax, 4, killed renamable $rcx, 0, $noreg, killed renamable $edx, debug-location !37 :: (store (s32) into %ir.arrayidx) ; CHECK: bb.3.for.inc: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 @@ -171,12 +171,12 @@ body: | ; CHECK: bb.4.for.end: ; CHECK: DBG_VALUE $rbp, 0, !18, !DIExpression(DW_OP_constu, 40, DW_OP_minus, DW_OP_deref), debug-location !22 ; CHECK: $rax = IMPLICIT_DEF - ; CHECK: $rax = MOV64rm $rbp, 1, $noreg, -40, $noreg :: (load 8 from %stack.4) + ; CHECK: $rax = MOV64rm $rbp, 1, $noreg, -40, $noreg :: (load (s64) from %stack.4) ; CHECK: dead $rbx = IMPLICIT_DEF ; CHECK: dead $rcx = IMPLICIT_DEF ; CHECK: dead $rdx = IMPLICIT_DEF ; CHECK: renamable $rcx = IMPLICIT_DEF - ; CHECK: renamable $eax = MOV32rm killed renamable $rax, 4, killed renamable $rcx, 0, $noreg, debug-location !44 :: (load 4 from %ir.arrayidx3) + ; CHECK: renamable $eax = MOV32rm killed renamable $rax, 4, killed renamable $rcx, 0, $noreg, debug-location !44 :: (load (s32) from %ir.arrayidx3) ; CHECK: $rsp = LEA64r $rbp, 1, $noreg, -8, $noreg, debug-location !45 ; CHECK: $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !45 ; CHECK: $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !45 @@ -186,25 +186,25 @@ body: | liveins: $edi %0:gr32 = COPY $edi - %1:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg, debug-location !13 :: (dereferenceable load 4 from %ir.a.addr) + %1:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg, debug-location !13 :: (dereferenceable load (s32) from %ir.a.addr) %2:gr64_nosp = SUBREG_TO_REG 0, killed %1, %subreg.sub_32bit, debug-location !13 ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp, debug-location !14 %3:gr64 = COPY $rsp, debug-location !14 $rsp = COPY %3, debug-location !14 ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp, debug-location !14 - MOV64mr %stack.1.__vla_expr0, 1, $noreg, 0, $noreg, %2, debug-location !14 :: (store 8 into %ir.__vla_expr0) + MOV64mr %stack.1.__vla_expr0, 1, $noreg, 0, $noreg, %2, debug-location !14 :: (store (s64) into %ir.__vla_expr0) DBG_VALUE %3, 0, !18, !DIExpression(), debug-location !22 - MOV32mi %stack.2.i, 1, $noreg, 0, $noreg, 0, debug-location !25 :: (store 4 into %ir.i) + MOV32mi %stack.2.i, 1, $noreg, 0, $noreg, 0, debug-location !25 :: (store (s32) into %ir.i) bb.1.for.cond: - %4:gr32 = MOV32rm %stack.2.i, 1, $noreg, 0, $noreg, debug-location !27 :: (load 4 from %ir.i) - CMP32rm %4, %stack.0.a.addr, 1, $noreg, 0, $noreg, implicit-def $eflags, debug-location !30 :: (load 4 from %ir.a.addr) + %4:gr32 = MOV32rm %stack.2.i, 1, $noreg, 0, $noreg, debug-location !27 :: (load (s32) from %ir.i) + CMP32rm %4, %stack.0.a.addr, 1, $noreg, 0, $noreg, implicit-def $eflags, debug-location !30 :: (load (s32) from %ir.a.addr) JCC_1 %bb.4, 13, implicit $eflags, debug-location !31 bb.2.for.body: - %5:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg, debug-location !32 :: (load 4 from %ir.a.addr) - %6:gr64_nosp = MOVSX64rm32 %stack.2.i, 1, $noreg, 0, $noreg, debug-location !36 :: (load 4 from %ir.i) - MOV32mr %3, 4, %6, 0, $noreg, killed %5, debug-location !37 :: (store 4 into %ir.arrayidx) + %5:gr32 = MOV32rm %stack.0.a.addr, 1, $noreg, 0, $noreg, debug-location !32 :: (load (s32) from %ir.a.addr) + %6:gr64_nosp = MOVSX64rm32 %stack.2.i, 1, $noreg, 0, $noreg, debug-location !36 :: (load (s32) from %ir.i) + MOV32mr %3, 4, %6, 0, $noreg, killed %5, debug-location !37 :: (store (s32) into %ir.arrayidx) bb.3.for.inc: JMP_1 %bb.1, debug-location !39 @@ -215,7 +215,7 @@ body: | $rcx = IMPLICIT_DEF $rdx = IMPLICIT_DEF %7:gr64_nosp = IMPLICIT_DEF - %8:gr32 = MOV32rm %3, 4, %7, 0, $noreg, debug-location !44 :: (load 4 from %ir.arrayidx3) + %8:gr32 = MOV32rm %3, 4, %7, 0, $noreg, debug-location !44 :: (load (s32) from %ir.arrayidx3) $eax = COPY %8, debug-location !45 RETQ implicit $eax, debug-location !45 diff --git a/llvm/test/CodeGen/X86/fixup-bw-inst.mir b/llvm/test/CodeGen/X86/fixup-bw-inst.mir index 0f0b454579139..83cd81ca7151c 100644 --- a/llvm/test/CodeGen/X86/fixup-bw-inst.mir +++ b/llvm/test/CodeGen/X86/fixup-bw-inst.mir @@ -102,8 +102,8 @@ body: | bb.2.if.then: liveins: $rdi - $ax = MOV16rm killed $rdi, 1, $noreg, 0, $noreg, implicit-def $eax :: (load 2 from %ir.p) - ; CHECK: $eax = MOVZX32rm16 killed $rdi, 1, $noreg, 0, $noreg, implicit-def $eax :: (load 2 from %ir.p) + $ax = MOV16rm killed $rdi, 1, $noreg, 0, $noreg, implicit-def $eax :: (load (s16) from %ir.p) + ; CHECK: $eax = MOVZX32rm16 killed $rdi, 1, $noreg, 0, $noreg, implicit-def $eax :: (load (s16) from %ir.p) $ax = KILL $ax, implicit killed $eax RETQ $ax diff --git a/llvm/test/CodeGen/X86/fold-sext-trunc.ll b/llvm/test/CodeGen/X86/fold-sext-trunc.ll index a3a4ee88b6485..0f1745e6e0a82 100644 --- a/llvm/test/CodeGen/X86/fold-sext-trunc.ll +++ b/llvm/test/CodeGen/X86/fold-sext-trunc.ll @@ -14,7 +14,7 @@ define void @int322(i32 %foo) !dbg !5 { entry: %val = load i64, i64* getelementptr (%0, %0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0), !dbg !16 %0 = load i32, i32* getelementptr inbounds (%struct.S1, %struct.S1* @g_10, i32 0, i32 1), align 4, !dbg !17 -; MIR: renamable {{\$r[a-z]+}} = MOVSX64rm32 {{.*}}, @g_10 + 4,{{.*}} debug-location !17 :: (dereferenceable load 4 from `i64* getelementptr (%0, %0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0)` + 4) +; MIR: renamable {{\$r[a-z]+}} = MOVSX64rm32 {{.*}}, @g_10 + 4,{{.*}} debug-location !17 :: (dereferenceable load (s32) from `i64* getelementptr (%0, %0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0)` + 4) %1 = sext i32 %0 to i64, !dbg !18 %tmp4.i = lshr i64 %val, 32, !dbg !19 %tmp5.i = trunc i64 %tmp4.i to i32, !dbg !20 diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll index 52278a6369eb3..5e361fbe99af1 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll @@ -3,10 +3,10 @@ define double @sifdb(i8 %x) #0 { entry: ; CHECK-LABEL: name: sifdb -; CHECK: [[MOVSX32rm8_:%[0-9]+]]:gr32 = MOVSX32rm8 %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 1 from %fixed-stack.0, align 16) +; CHECK: [[MOVSX32rm8_:%[0-9]+]]:gr32 = MOVSX32rm8 %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s8) from %fixed-stack.0, align 16) ; CHECK: [[CVTSI2SDrr:%[0-9]+]]:fr64 = CVTSI2SDrr killed [[MOVSX32rm8_]] -; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[CVTSI2SDrr]] :: (store 8 into %stack.0, align 4) -; CHECK: [[LD_Fp64m80_:%[0-9]+]]:rfp80 = nofpexcept LD_Fp64m80 %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0, align 4) +; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[CVTSI2SDrr]] :: (store (s64) into %stack.0, align 4) +; CHECK: [[LD_Fp64m80_:%[0-9]+]]:rfp80 = nofpexcept LD_Fp64m80 %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %stack.0, align 4) ; CHECK: RET 0, killed [[LD_Fp64m80_]] %result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %result @@ -15,10 +15,10 @@ entry: define double @sifdw(i16 %x) #0 { entry: ; CHECK-LABEL: name: sifdw -; CHECK: [[MOVSX32rm16_:%[0-9]+]]:gr32 = MOVSX32rm16 %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 2 from %fixed-stack.0, align 16) +; CHECK: [[MOVSX32rm16_:%[0-9]+]]:gr32 = MOVSX32rm16 %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %fixed-stack.0, align 16) ; CHECK: [[CVTSI2SDrr:%[0-9]+]]:fr64 = CVTSI2SDrr killed [[MOVSX32rm16_]] -; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[CVTSI2SDrr]] :: (store 8 into %stack.0, align 4) -; CHECK: [[LD_Fp64m80_:%[0-9]+]]:rfp80 = nofpexcept LD_Fp64m80 %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0, align 4) +; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[CVTSI2SDrr]] :: (store (s64) into %stack.0, align 4) +; CHECK: [[LD_Fp64m80_:%[0-9]+]]:rfp80 = nofpexcept LD_Fp64m80 %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %stack.0, align 4) ; CHECK: RET 0, killed [[LD_Fp64m80_]] %result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %result @@ -27,28 +27,28 @@ entry: define i64 @f20u64(double %x) #0 { entry: ; CHECK-LABEL: name: f20u64 -; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16) -; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool) +; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0, align 16) +; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load (s64) from constant-pool) ; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr ; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD ; CHECK: JCC_1 ; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[FsFLD0SD]], {{.*}}, [[MOVSDrm_alt1]], {{.*}} ; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr -; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[SUBSDrr]] :: (store 8 into %stack.0) +; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[SUBSDrr]] :: (store (s64) into %stack.0) ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags -; CHECK: [[LD_Fp64m80:%[0-9]+]]:rfp80 = LD_Fp64m80 %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0) -; CHECK: FNSTCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit $fpcw :: (store 2 into %stack.1) -; CHECK: [[MOVZX32rm16_:%[0-9]+]]:gr32 = MOVZX32rm16 %stack.1, 1, $noreg, 0, $noreg :: (load 2 from %stack.1) +; CHECK: [[LD_Fp64m80:%[0-9]+]]:rfp80 = LD_Fp64m80 %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %stack.0) +; CHECK: FNSTCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit $fpcw :: (store (s16) into %stack.1) +; CHECK: [[MOVZX32rm16_:%[0-9]+]]:gr32 = MOVZX32rm16 %stack.1, 1, $noreg, 0, $noreg :: (load (s16) from %stack.1) ; CHECK: [[OR32ri:%[0-9]+]]:gr32 = OR32ri killed [[MOVZX32rm16_]], 3072, implicit-def $eflags ; CHECK: [[COPY3:%[0-9]+]]:gr16 = COPY killed [[OR32ri]].sub_16bit -; CHECK: MOV16mr %stack.2, 1, $noreg, 0, $noreg, killed [[COPY3]] :: (store 2 into %stack.2) -; CHECK: FLDCW16m %stack.2, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit-def $fpcw :: (load 2 from %stack.2) +; CHECK: MOV16mr %stack.2, 1, $noreg, 0, $noreg, killed [[COPY3]] :: (store (s16) into %stack.2) +; CHECK: FLDCW16m %stack.2, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit-def $fpcw :: (load (s16) from %stack.2) ; CHECK: IST_Fp64m80 %stack.0, 1, $noreg, 0, $noreg, [[LD_Fp64m80]], implicit-def $fpsw, implicit $fpcw -; CHECK: FLDCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit-def $fpcw :: (load 2 from %stack.1) +; CHECK: FLDCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit-def $fpcw :: (load (s16) from %stack.1) ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] ; CHECK: [[SHL32ri:%[0-9]+]]:gr32 = SHL32ri [[MOVZX32rr8_]], 31, implicit-def dead $eflags -; CHECK: [[XOR32rm:%[0-9]+]]:gr32 = XOR32rm [[SHL32ri]], %stack.0, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %stack.0 + 4) -; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load 4 from %stack.0, align 8) +; CHECK: [[XOR32rm:%[0-9]+]]:gr32 = XOR32rm [[SHL32ri]], %stack.0, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %stack.0 + 4) +; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0, align 8) ; CHECK: $eax = COPY [[MOV32rm]] ; CHECK: $edx = COPY [[XOR32rm]] ; CHECK: RET 0, $eax, $edx @@ -59,7 +59,7 @@ entry: define i8 @f20s8(double %x) #0 { entry: ; CHECK-LABEL: name: f20s8 -; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16) +; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load (s64) from %fixed-stack.0, align 16) ; CHECK: [[COPY:%[0-9]+]]:gr32_abcd = COPY [[CVTTSD2SIrm]] ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; CHECK: $al = COPY [[COPY1]] @@ -71,7 +71,7 @@ entry: define i16 @f20s16(double %x) #0 { entry: ; CHECK-LABEL: name: f20s16 -; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.0, align 16) +; CHECK: [[CVTTSD2SIrm:%[0-9]+]]:gr32 = CVTTSD2SIrm %fixed-stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load (s64) from %fixed-stack.0, align 16) ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY [[CVTTSD2SIrm]].sub_16bit ; CHECK: $ax = COPY [[COPY]] ; CHECK: RET 0, $ax @@ -82,8 +82,8 @@ entry: define i32 @f20u(double %x) #0 { entry: ; CHECK-LABEL: name: f20u -; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16) -; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool) +; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0, align 16) +; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load (s64) from constant-pool) ; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr ; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD ; CHECK: JCC_1 @@ -105,12 +105,12 @@ entry: define void @binop_cse(double %a, double %b, double* %x, double* %y) #0 { entry: ; CHECK-LABEL: name: binop_cse -; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) -; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1, align 16) -; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16) -; CHECK: %3:fr64 = DIVSDrm [[MOVSDrm_alt]], %fixed-stack.2, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.2) -; CHECK: MOVSDmr killed [[MOV32rm1]], 1, $noreg, 0, $noreg, %3 :: (store 8 into %ir.x, align 4) -; CHECK: MOVSDmr killed [[MOV32rm]], 1, $noreg, 0, $noreg, %3 :: (store 8 into %ir.y, align 4) +; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0) +; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 16) +; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +; CHECK: %3:fr64 = DIVSDrm [[MOVSDrm_alt]], %fixed-stack.2, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load (s64) from %fixed-stack.2) +; CHECK: MOVSDmr killed [[MOV32rm1]], 1, $noreg, 0, $noreg, %3 :: (store (s64) into %ir.x, align 4) +; CHECK: MOVSDmr killed [[MOV32rm]], 1, $noreg, 0, $noreg, %3 :: (store (s64) into %ir.y, align 4) ; CHECK: RET 0 %div = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 %div2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 @@ -124,11 +124,11 @@ entry: define void @sitofp_cse(i32 %a, double* %x, double* %y) #0 { entry: ; CHECK-LABEL: name: sitofp_cse -; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0, align 8) -; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) -; CHECK: %2:fr64 = CVTSI2SDrm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.2, align 16) -; CHECK: MOVSDmr killed [[MOV32rm1]], 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.x, align 4) -; CHECK: MOVSDmr killed [[MOV32rm]], 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.y, align 4) +; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 8) +; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1) +; CHECK: %2:fr64 = CVTSI2SDrm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 16) +; CHECK: MOVSDmr killed [[MOV32rm1]], 1, $noreg, 0, $noreg, %2 :: (store (s64) into %ir.x, align 4) +; CHECK: MOVSDmr killed [[MOV32rm]], 1, $noreg, 0, $noreg, %2 :: (store (s64) into %ir.y, align 4) ; CHECK: RET 0 %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 %result2 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 diff --git a/llvm/test/CodeGen/X86/implicit-null-checks.mir b/llvm/test/CodeGen/X86/implicit-null-checks.mir index e66bdea00bc35..5ba9caff35c63 100644 --- a/llvm/test/CodeGen/X86/implicit-null-checks.mir +++ b/llvm/test/CodeGen/X86/implicit-null-checks.mir @@ -407,7 +407,7 @@ liveins: - { reg: '$esi' } # CHECK: bb.0.entry: # CHECK: $eax = MOV32ri 2200000 -# CHECK-NEXT: $eax = FAULTING_OP 1, %bb.3, {{[0-9]+}}, $eax, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags :: (load 4 from %ir.x) +# CHECK-NEXT: $eax = FAULTING_OP 1, %bb.3, {{[0-9]+}}, $eax, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags :: (load (s32) from %ir.x) # CHECK-NEXT: JMP_1 %bb.1 body: | @@ -421,7 +421,7 @@ body: | liveins: $esi, $rdi $eax = MOV32ri 2200000 - $eax = AND32rm killed $eax, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.x) + $eax = AND32rm killed $eax, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.x) CMP32rr killed $eax, killed $esi, implicit-def $eflags JCC_1 %bb.4, 4, implicit $eflags @@ -447,7 +447,7 @@ liveins: - { reg: '$esi' } - { reg: '$rdx' } # CHECK: bb.0.entry: -# CHECK: $eax = MOV32rm killed $rdx, 1, $noreg, 0, $noreg :: (volatile load 4 from %ir.ptr) +# CHECK: $eax = MOV32rm killed $rdx, 1, $noreg, 0, $noreg :: (volatile load (s32) from %ir.ptr) # CHECK-NEXT: TEST64rr $rdi, $rdi, implicit-def $eflags # CHECK-NEXT: JCC_1 %bb.3, 4, implicit $eflags @@ -455,7 +455,7 @@ body: | bb.0.entry: liveins: $esi, $rdi, $rdx - $eax = MOV32rm killed $rdx, 1, $noreg, 0, $noreg :: (volatile load 4 from %ir.ptr) + $eax = MOV32rm killed $rdx, 1, $noreg, 0, $noreg :: (volatile load (s32) from %ir.ptr) TEST64rr $rdi, $rdi, implicit-def $eflags JCC_1 %bb.3, 4, implicit $eflags @@ -463,7 +463,7 @@ body: | liveins: $esi, $rdi $eax = MOV32ri 2200000 - $eax = AND32rm killed $eax, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.x) + $eax = AND32rm killed $eax, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.x) CMP32rr killed $eax, killed $esi, implicit-def $eflags JCC_1 %bb.4, 4, implicit $eflags @@ -505,7 +505,7 @@ body: | $eax = MOV32ri 2200000 $eax = ADD32ri killed $eax, 100, implicit-def dead $eflags - $eax = AND32rm killed $eax, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.x) + $eax = AND32rm killed $eax, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.x) CMP32rr killed $eax, killed $esi, implicit-def $eflags JCC_1 %bb.4, 4, implicit $eflags @@ -545,7 +545,7 @@ body: | liveins: $rsi, $rdi $rdi = MOV64ri 5000 - $rdi = AND64rm killed $rdi, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.x) + $rdi = AND64rm killed $rdi, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.x) CMP64rr killed $rdi, killed $rsi, implicit-def $eflags JCC_1 %bb.4, 4, implicit $eflags @@ -572,7 +572,7 @@ liveins: - { reg: '$rsi' } # CHECK: bb.0.entry: # CHECK: $rbx = MOV64rr $rdx -# CHECK-NEXT: $rbx = FAULTING_OP 1, %bb.3, {{[0-9]+}}, $rbx, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags :: (load 4 from %ir.x) +# CHECK-NEXT: $rbx = FAULTING_OP 1, %bb.3, {{[0-9]+}}, $rbx, $rdi, 1, $noreg, 0, $noreg, implicit-def $eflags :: (load (s32) from %ir.x) body: | bb.0.entry: @@ -585,7 +585,7 @@ body: | liveins: $rsi, $rdi, $rdx $rbx = MOV64rr $rdx - $rbx = AND64rm killed $rbx, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.x) + $rbx = AND64rm killed $rbx, killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.x) $rdx = MOV64ri 0 CMP64rr killed $rbx, killed $rsi, implicit-def $eflags JCC_1 %bb.4, 4, implicit $eflags @@ -633,7 +633,7 @@ body: | liveins: $rbx CALL64pcrel32 @f, csr_64, implicit $rsp, implicit-def $rsp - $eax = MOV32rm killed $rbx, 1, $noreg, 0, $noreg :: (load 4 from %ir.ptr) + $eax = MOV32rm killed $rbx, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) $rbx = POP64r implicit-def $rsp, implicit $rsp RETQ $eax @@ -669,10 +669,10 @@ body: | bb.1.not_null: liveins: $rdi, $rsi - $rcx = MOV64rm killed $rsi, 1, $noreg, 0, $noreg :: (load 8 from %ir.ptr2) + $rcx = MOV64rm killed $rsi, 1, $noreg, 0, $noreg :: (load (s64) from %ir.ptr2) $esi = MOV32ri 3076 - $eax = BEXTR32rm killed $rdi, 1, $noreg, 0, $noreg, killed $esi, implicit-def dead $eflags :: (load 4 from %ir.ptr) - $eax = ADD32rm killed $eax, killed $rcx, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.val) + $eax = BEXTR32rm killed $rdi, 1, $noreg, 0, $noreg, killed $esi, implicit-def dead $eflags :: (load (s32) from %ir.ptr) + $eax = ADD32rm killed $eax, killed $rcx, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.val) RETQ $eax bb.2.is_null: @@ -717,7 +717,7 @@ body: | name: imp_null_check_gep_load_with_use_dep # CHECK-LABEL: name: imp_null_check_gep_load_with_use_dep # CHECK: bb.0.entry: -# CHECK: $eax = FAULTING_OP 1, %bb.2, {{[0-9]+}}, $rdi, 1, $noreg, 0, $noreg, implicit-def $rax :: (load 4 from %ir.x) +# CHECK: $eax = FAULTING_OP 1, %bb.2, {{[0-9]+}}, $rdi, 1, $noreg, 0, $noreg, implicit-def $rax :: (load (s32) from %ir.x) # CHECK-NEXT: JMP_1 %bb.1 alignment: 16 tracksRegLiveness: true @@ -735,7 +735,7 @@ body: | liveins: $rdi, $rsi $rsi = ADD64rr $rsi, $rdi, implicit-def dead $eflags - $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg, implicit-def $rax :: (load 4 from %ir.x) + $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg, implicit-def $rax :: (load (s32) from %ir.x) $eax = LEA64_32r killed $rax, 1, killed $rsi, 4, $noreg RETQ $eax @@ -927,7 +927,7 @@ body: | bb.1.not_null: liveins: $rdi, $rsi - MOV32mr killed $rdi, 1, $noreg, 0, $noreg, killed $esi :: (volatile store 4 into %ir.ptr) + MOV32mr killed $rdi, 1, $noreg, 0, $noreg, killed $esi :: (volatile store (s32) into %ir.ptr) RETQ bb.2.is_null: @@ -1234,7 +1234,7 @@ body: | name: inc_store_and_load_no_alias # CHECK-LABEL: inc_store_and_load_no_alias # CHECK: bb.0.entry: -# CHECK: $eax = FAULTING_OP 1, %bb.2, {{[0-9]+}}, $rdi, 1, $noreg, 0, $noreg :: (load 4 from %ir.ptr) +# CHECK: $eax = FAULTING_OP 1, %bb.2, {{[0-9]+}}, $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) # CHECK-NEXT: JMP_1 %bb.1 # CHECK: bb.1.not_null @@ -1253,8 +1253,8 @@ body: | bb.1.not_null: liveins: $rdi, $rsi - MOV32mi killed $rsi, 1, $noreg, 0, $noreg, 3 :: (store 4 into %ir.ptr2) - $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (load 4 from %ir.ptr) + MOV32mi killed $rsi, 1, $noreg, 0, $noreg, 3 :: (store (s32) into %ir.ptr2) + $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) RETQ $eax bb.2.is_null: @@ -1285,8 +1285,8 @@ body: | bb.1.not_null: liveins: $rdi, $rsi - MOV32mi killed $rsi, 1, $noreg, 0, $noreg, 3 :: (store 4 into %ir.ptr2) - $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (load 4 from %ir.ptr) + MOV32mi killed $rsi, 1, $noreg, 0, $noreg, 3 :: (store (s32) into %ir.ptr2) + $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) RETQ $eax bb.2.is_null: @@ -1314,16 +1314,16 @@ body: | liveins: $rdi, $rsi $rsp = frame-setup SUB64ri8 $rsp, 8, implicit-def dead $eflags - MOV32mr $rsp, 1, $noreg, 0, $noreg, $esi :: (store 4 into %stack.0) + MOV32mr $rsp, 1, $noreg, 0, $noreg, $esi :: (store (s32) into %stack.0) TEST64rr $rdi, $rdi, implicit-def $eflags JCC_1 %bb.2, 4, implicit killed $eflags bb.1.not_null: liveins: $rdi, $rsi - $r14d = MOV32rm $rsp, 1, $noreg, 0, $noreg :: (load 4 from %stack.0) - MOV64mr $rsp, 1, $noreg, 0, $noreg, $rdi :: (store 8 into %stack.0) - $edi = MOV32rm $rdi, 1, $noreg, 8, $noreg :: (load 4 from %ir.ptr) + $r14d = MOV32rm $rsp, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0) + MOV64mr $rsp, 1, $noreg, 0, $noreg, $rdi :: (store (s64) into %stack.0) + $edi = MOV32rm $rdi, 1, $noreg, 8, $noreg :: (load (s32) from %ir.ptr) $eax = MOV32rr $edi RETQ $eax diff --git a/llvm/test/CodeGen/X86/isel-postprocessing-test-fold-memop.ll b/llvm/test/CodeGen/X86/isel-postprocessing-test-fold-memop.ll index 99a80fae441bc..c8096e179c1c1 100644 --- a/llvm/test/CodeGen/X86/isel-postprocessing-test-fold-memop.ll +++ b/llvm/test/CodeGen/X86/isel-postprocessing-test-fold-memop.ll @@ -9,6 +9,6 @@ entry: ; Folding the load+and+icmp instructions into a TEST64mr instruction ; should preserve memory operands. - ; CHECK: TEST64mr {{.*}} :: (load 8 from {{%.*}}) + ; CHECK: TEST64mr {{.*}} :: (load (s64) from {{%.*}}) } diff --git a/llvm/test/CodeGen/X86/lea-opt-with-debug.mir b/llvm/test/CodeGen/X86/lea-opt-with-debug.mir index c2fcb7c9d6a72..31774ccbf769e 100644 --- a/llvm/test/CodeGen/X86/lea-opt-with-debug.mir +++ b/llvm/test/CodeGen/X86/lea-opt-with-debug.mir @@ -100,23 +100,23 @@ body: | ; CHECK-NOT: %0:gr64 = LEA64r %1, 4, %3, 8, $noreg, debug-location !14 ; CHECK: DBG_VALUE %4, $noreg, !11, !DIExpression(DW_OP_plus_uconst, 8, DW_OP_stack_value), debug-location !15 - %1 = MOV64rm $rip, 1, $noreg, @c, $noreg, debug-location !13 :: (dereferenceable load 8 from @c) - %2 = MOVSX64rm32 $rip, 1, $noreg, @a, $noreg, debug-location !13 :: (dereferenceable load 4 from @a) + %1 = MOV64rm $rip, 1, $noreg, @c, $noreg, debug-location !13 :: (dereferenceable load (s64) from @c) + %2 = MOVSX64rm32 $rip, 1, $noreg, @a, $noreg, debug-location !13 :: (dereferenceable load (s32) from @a) %3 = LEA64r %2, 2, %2, 0, $noreg, debug-location !13 %4 = LEA64r %1, 4, %3, 0, $noreg, debug-location !13 %5 = COPY %4.sub_32bit, debug-location !13 - MOV32mr $rip, 1, $noreg, @d, $noreg, killed %5, debug-location !13 :: (store 4 into @d) + MOV32mr $rip, 1, $noreg, @d, $noreg, killed %5, debug-location !13 :: (store (s32) into @d) %0 = LEA64r %1, 4, %3, 8, $noreg, debug-location !14 DBG_VALUE %0, $noreg, !11, !DIExpression(), debug-location !15 ; CHECK-LABEL: bb.1 (%ir-block.8): - ; CHECK: %6:gr32 = MOV32rm %4, 1, $noreg, 8, $noreg, debug-location !17 :: (load 4 from %ir.7) + ; CHECK: %6:gr32 = MOV32rm %4, 1, $noreg, 8, $noreg, debug-location !17 :: (load (s32) from %ir.7) bb.1 (%ir-block.8): successors: %bb.1(0x80000000) - %6 = MOV32rm %0, 1, $noreg, 0, $noreg, debug-location !17 :: (load 4 from %ir.7) - MOV32mr $rip, 1, $noreg, @d, $noreg, killed %6, debug-location !17 :: (store 4 into @d) + %6 = MOV32rm %0, 1, $noreg, 0, $noreg, debug-location !17 :: (load (s32) from %ir.7) + MOV32mr $rip, 1, $noreg, @d, $noreg, killed %6, debug-location !17 :: (store (s32) into @d) JMP_1 %bb.1, debug-location !18 ... diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir index 088e39d29c3f8..6d32afd71253d 100644 --- a/llvm/test/CodeGen/X86/limit-split-cost.mir +++ b/llvm/test/CodeGen/X86/limit-split-cost.mir @@ -93,7 +93,7 @@ body: | %0:gr32 = COPY $edi %5:gr64 = LEA64r $rip, 1, $noreg, @.str.2, $noreg - %6:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @m, $noreg :: (load 8 from got) + %6:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @m, $noreg :: (load (s64) from got) %4:gr64 = LEA64r $rip, 1, $noreg, @.str.1, $noreg %3:gr64 = LEA64r $rip, 1, $noreg, @.str, $noreg @@ -140,7 +140,7 @@ body: | bb.7.do.cond: successors: %bb.8(0x04000000), %bb.1(0x7c000000) - CMP32mi8 %6, 1, $noreg, 0, $noreg, 5, implicit-def $eflags :: (dereferenceable load 4 from @m, !tbaa !4) + CMP32mi8 %6, 1, $noreg, 0, $noreg, 5, implicit-def $eflags :: (dereferenceable load (s32) from @m, !tbaa !4) JCC_1 %bb.1, 5, implicit killed $eflags JMP_1 %bb.8 diff --git a/llvm/test/CodeGen/X86/lvi-hardening-gadget-graph.ll b/llvm/test/CodeGen/X86/lvi-hardening-gadget-graph.ll index ba2ce26142b5a..0a18ba36f2dfe 100644 --- a/llvm/test/CodeGen/X86/lvi-hardening-gadget-graph.ll +++ b/llvm/test/CodeGen/X86/lvi-hardening-gadget-graph.ll @@ -67,56 +67,56 @@ for.end: ; preds = %for.cond ; CHECK-NEXT: label="Speculative gadgets for \"test\" function"; ; CHECK: Node0x{{[0-9a-f]+}} [shape=record,color = green,label="{LFENCE\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.4.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.i)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.4.i, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.i)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JCC_1 %bb.6, 13, implicit killed $eflags\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{CMP32rm killed renamable $eax, %stack.2.secret_size.addr, 1, $noreg, 0, $noreg, implicit-def $eflags :: (dereferenceable load 4 from %ir.secret_size.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{CMP32rm killed renamable $eax, %stack.2.secret_size.addr, 1, $noreg, 0, $noreg, implicit-def $eflags :: (dereferenceable load (s32) from %ir.secret_size.addr)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.4.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.i)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.4.i, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.i)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JCC_1 %bb.4, 5, implicit killed $eflags\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.1.secret.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.secret.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.1.secret.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.secret.addr)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm killed renamable $rax, 4, killed renamable $rcx, 0, $noreg :: (load 4 from %ir.arrayidx)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm killed renamable $rax, 4, killed renamable $rcx, 0, $noreg :: (load (s32) from %ir.arrayidx)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOVSX64rm32 %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOVSX64rm32 %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.ret_val)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOV64rm %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.untrusted_user_ptr.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOV64rm %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.untrusted_user_ptr.addr)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV32mr killed renamable $rcx, 1, $noreg, 0, $noreg, killed renamable $eax :: (store 4 into %ir.6)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV32mr killed renamable $rcx, 1, $noreg, 0, $noreg, killed renamable $eax :: (store (s32) into %ir.6)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.1.secret.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.secret.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.1.secret.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.secret.addr)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV32mi killed renamable $rax, 4, killed renamable $rcx, 0, $noreg, 42 :: (store 4 into %ir.arrayidx3)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV32mi killed renamable $rax, 4, killed renamable $rcx, 0, $noreg, 42 :: (store (s32) into %ir.arrayidx3)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOVSX64rm32 %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOVSX64rm32 %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.ret_val)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.untrusted_user_ptr.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.untrusted_user_ptr.addr)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm killed renamable $rax, 1, $noreg, 0, $noreg :: (load 4 from %ir.9)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm killed renamable $rax, 1, $noreg, 0, $noreg :: (load (s32) from %ir.9)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,color = blue,label="{ARGS}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV64mr %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg, killed renamable $rdi :: (store 8 into %ir.untrusted_user_ptr.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV64mr %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg, killed renamable $rdi :: (store (s64) into %ir.untrusted_user_ptr.addr)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JMP_1 %bb.5\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JMP_1 %bb.1\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; -; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.ret_val)\n}"]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0]; ; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{RET 0, $eax\n}"]; ; CHECK-NEXT: } diff --git a/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir b/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir index 86a077e64764f..1f697c68a86c4 100644 --- a/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir +++ b/llvm/test/CodeGen/X86/machine-cp-mask-reg.mir @@ -42,7 +42,7 @@ body: | ; CHECK: renamable $k0 = VPTESTNMBZrr killed renamable $zmm0, renamable $zmm0 ; CHECK: renamable $rax = COPY renamable $k0 ; CHECK: renamable $rsi = ADD64rr killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags - ; CHECK: MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %ir.y) + ; CHECK: MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store (s64) into %ir.y) ; CHECK: renamable $eax = COPY killed renamable $k0 ; CHECK: renamable $al = AND8ri renamable $al, 1, implicit-def dead $eflags, implicit killed $eax, implicit-def $eax ; CHECK: $al = KILL renamable $al, implicit killed $eax @@ -50,7 +50,7 @@ body: | renamable $k0 = VPTESTNMBZrr killed renamable $zmm0, renamable $zmm0 renamable $rax = COPY renamable $k0 renamable $rsi = ADD64rr killed renamable $rsi, killed renamable $rax, implicit-def dead $eflags - MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %ir.y) + MOV64mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store (s64) into %ir.y) renamable $eax = COPY killed renamable $k0 renamable $al = AND8ri renamable $al, 1, implicit-def dead $eflags, implicit killed $eax, implicit-def $eax $al = KILL renamable $al, implicit killed $eax diff --git a/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll b/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll index 341707c002da2..99ffd63d69cf9 100644 --- a/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll @@ -11,10 +11,10 @@ ; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]} ; MIR-LABEL: name: test_memcpy -; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store (s64) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 @@ -28,10 +28,10 @@ define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { } ; MIR-LABEL: name: test_memcpy_inline -; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store (s64) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 @@ -45,10 +45,10 @@ define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { } ; MIR-LABEL: name: test_memmove -; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store (s64) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memmove(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 @@ -63,8 +63,8 @@ define i32 @test_memmove(i32* nocapture %p, i32* nocapture readonly %q) { ; MIR-LABEL: name: test_memset ; MIR: %2:gr64 = MOV64ri -6148914691236517206 -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, %2 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, %2 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, %2 :: (store (s64) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memset(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4 @@ -76,10 +76,10 @@ define i32 @test_memset(i32* nocapture %p, i32* nocapture readonly %q) { } ; MIR-LABEL: name: test_mempcpy -; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store (s64) into %ir.p0 + 8, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store (s64) into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_mempcpy(i32* nocapture %p, i32* nocapture readonly %q) { %p0 = bitcast i32* %p to i8* %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 diff --git a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll index 8e139b11be95b..67b399b36220a 100644 --- a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll +++ b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll @@ -18,12 +18,12 @@ ; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]], ; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add {{(nuw )?}}[[BASEPTR]], Constant:i64<2> -; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<(load 2 from %ir.tmp81, align 1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64 -; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<(load 1 from %ir.tmp12)> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64 +; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<(load (s16) from %ir.tmp81, align 1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64 +; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<(load (s8) from %ir.tmp12)> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64 -; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<(store 1 into %ir.tmp14)> [[ENTRYTOKEN]], [[LD1]], t{{[0-9]+}}, undef:i64 +; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<(store (s8) into %ir.tmp14)> [[ENTRYTOKEN]], [[LD1]], t{{[0-9]+}}, undef:i64 ; DBGDAG-DAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1 -; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store<(store 2 into %ir.tmp10, align 1)> [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, undef:i64 +; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store<(store (s16) into %ir.tmp10, align 1)> [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, undef:i64 ; DBGDAG: X86ISD::RET_FLAG t{{[0-9]+}}, diff --git a/llvm/test/CodeGen/X86/movtopush.mir b/llvm/test/CodeGen/X86/movtopush.mir index 051b505813e4f..e7ed68de6712b 100644 --- a/llvm/test/CodeGen/X86/movtopush.mir +++ b/llvm/test/CodeGen/X86/movtopush.mir @@ -41,8 +41,8 @@ # CHECK-NEXT: CALLpcrel32 @good, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp # CHECK-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp # CHECK-NEXT: ADJCALLSTACKDOWN32 20, 0, 20, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp -# CHECK-NEXT: %1:gr32 = MOV32rm %stack.2.s, 1, $noreg, 0, $noreg :: (load 4 from %stack.2.s, align 8) -# CHECK-NEXT: %2:gr32 = MOV32rm %stack.2.s, 1, $noreg, 4, $noreg :: (load 4 from %stack.2.s + 4) +# CHECK-NEXT: %1:gr32 = MOV32rm %stack.2.s, 1, $noreg, 0, $noreg :: (load (s32) from %stack.2.s, align 8) +# CHECK-NEXT: %2:gr32 = MOV32rm %stack.2.s, 1, $noreg, 4, $noreg :: (load (s32) from %stack.2.s + 4) # CHECK-NEXT: %4:gr32 = LEA32r %stack.0.p, 1, $noreg, 0, $noreg # CHECK-NEXT: %5:gr32 = LEA32r %stack.1.q, 1, $noreg, 0, $noreg # CHECK-NEXT: PUSH32r %4, implicit-def $esp, implicit $esp @@ -104,23 +104,23 @@ body: | bb.0.entry: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp %0 = COPY $esp - MOV32mi %0, 1, $noreg, 12, $noreg, 4 :: (store 4 into stack + 12) - MOV32mi %0, 1, $noreg, 8, $noreg, 3 :: (store 4 into stack + 8) - MOV32mi %0, 1, $noreg, 4, $noreg, 2 :: (store 4 into stack + 4) - MOV32mi %0, 1, $noreg, 0, $noreg, 1 :: (store 4 into stack) + MOV32mi %0, 1, $noreg, 12, $noreg, 4 :: (store (s32) into stack + 12) + MOV32mi %0, 1, $noreg, 8, $noreg, 3 :: (store (s32) into stack + 8) + MOV32mi %0, 1, $noreg, 4, $noreg, 2 :: (store (s32) into stack + 4) + MOV32mi %0, 1, $noreg, 0, $noreg, 1 :: (store (s32) into stack) CALLpcrel32 @good, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp ADJCALLSTACKUP32 16, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp ADJCALLSTACKDOWN32 20, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp - %1 = MOV32rm %stack.2.s, 1, $noreg, 0, $noreg :: (load 4 from %stack.2.s, align 8) - %2 = MOV32rm %stack.2.s, 1, $noreg, 4, $noreg :: (load 4 from %stack.2.s + 4) + %1 = MOV32rm %stack.2.s, 1, $noreg, 0, $noreg :: (load (s32) from %stack.2.s, align 8) + %2 = MOV32rm %stack.2.s, 1, $noreg, 4, $noreg :: (load (s32) from %stack.2.s + 4) %3 = COPY $esp - MOV32mr %3, 1, $noreg, 4, $noreg, killed %2 :: (store 4) - MOV32mr %3, 1, $noreg, 0, $noreg, killed %1 :: (store 4) + MOV32mr %3, 1, $noreg, 4, $noreg, killed %2 :: (store (s32)) + MOV32mr %3, 1, $noreg, 0, $noreg, killed %1 :: (store (s32)) %4 = LEA32r %stack.0.p, 1, $noreg, 0, $noreg - MOV32mr %3, 1, $noreg, 16, $noreg, killed %4 :: (store 4 into stack + 16) + MOV32mr %3, 1, $noreg, 16, $noreg, killed %4 :: (store (s32) into stack + 16) %5 = LEA32r %stack.1.q, 1, $noreg, 0, $noreg - MOV32mr %3, 1, $noreg, 12, $noreg, killed %5 :: (store 4 into stack + 12) - MOV32mi %3, 1, $noreg, 8, $noreg, 6 :: (store 4 into stack + 8) + MOV32mr %3, 1, $noreg, 12, $noreg, killed %5 :: (store (s32) into stack + 12) + MOV32mi %3, 1, $noreg, 8, $noreg, 6 :: (store (s32) into stack + 8) CALLpcrel32 @struct, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, ADJCALLSTACKUP32 20, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp RET 0 diff --git a/llvm/test/CodeGen/X86/non-value-mem-operand.mir b/llvm/test/CodeGen/X86/non-value-mem-operand.mir index 427b4c22e615f..96d1d963c85bf 100644 --- a/llvm/test/CodeGen/X86/non-value-mem-operand.mir +++ b/llvm/test/CodeGen/X86/non-value-mem-operand.mir @@ -175,14 +175,14 @@ body: | successors: %bb.4.bb7(0x80000000) liveins: $rax - MOV64mr $rsp, 1, $noreg, 32, $noreg, $rax :: (store 8 into %stack.5) + MOV64mr $rsp, 1, $noreg, 32, $noreg, $rax :: (store (s64) into %stack.5) $r12 = MOV64rr killed $rax $r12 = ADD64ri8 killed $r12, 16, implicit-def dead $eflags $xmm0 = XORPSrr undef $xmm0, undef $xmm0 $esi = XOR32rr undef $esi, undef $esi, implicit-def dead $eflags $rax = MOV64ri %const.0 - $xmm1 = MOVSDrm_alt killed $rax, 1, $noreg, 0, $noreg :: (load 8 from constant-pool) - MOVSDmr $rsp, 1, $noreg, 40, $noreg, killed $xmm1 :: (store 8 into %stack.4) + $xmm1 = MOVSDrm_alt killed $rax, 1, $noreg, 0, $noreg :: (load (s64) from constant-pool) + MOVSDmr $rsp, 1, $noreg, 40, $noreg, killed $xmm1 :: (store (s64) into %stack.4) $eax = IMPLICIT_DEF $ecx = XOR32rr undef $ecx, undef $ecx, implicit-def dead $eflags @@ -200,13 +200,13 @@ body: | successors: %bb.6.bb26(0x80000000) liveins: $ebp, $rbx, $r14, $xmm0 - MOV32mr $rsp, 1, $noreg, 24, $noreg, $ebx :: (store 4 into %stack.0, align 8) - MOV32mr $rsp, 1, $noreg, 16, $noreg, $ebp :: (store 4 into %stack.1, align 8) - MOVSDmr $rsp, 1, $noreg, 8, $noreg, killed $xmm0 :: (store 8 into %stack.2) - $rax = MOV64rm $rsp, 1, $noreg, 32, $noreg :: (load 8 from %stack.5) - MOV64mr $rsp, 1, $noreg, 48, $noreg, killed $rax :: (store 8 into %stack.3) + MOV32mr $rsp, 1, $noreg, 24, $noreg, $ebx :: (store (s32) into %stack.0, align 8) + MOV32mr $rsp, 1, $noreg, 16, $noreg, $ebp :: (store (s32) into %stack.1, align 8) + MOVSDmr $rsp, 1, $noreg, 8, $noreg, killed $xmm0 :: (store (s64) into %stack.2) + $rax = MOV64rm $rsp, 1, $noreg, 32, $noreg :: (load (s64) from %stack.5) + MOV64mr $rsp, 1, $noreg, 48, $noreg, killed $rax :: (store (s64) into %stack.3) $rax = MOV64ri @wibble - STATEPOINT 2882400000, 0, 0, killed $rax, 2, 0, 2, 0, 2, 30, 2, 1, 2, 0, 2, 99, 2, 0, 2, 12, 2, 0, 2, 10, 1, 8, $rsp, 24, 2, 10, 2, 0, 2, 10, 1, 8, $rsp, 16, 2, 10, 2, 4278124286, 2, 6, 2, 4278124286, 2, 7, 1, 8, $rsp, 8, 2, 99, 2, 0, 2, 7, 2, 4278124286, 2, 99, 2, 0, 2, 13, 1, 8, $rsp, 48, 2, 7, 2, 4278124286, 2, 99, 2, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp :: (volatile load 8 from %stack.0), (volatile load 8 from %stack.1), (volatile load 8 from %stack.2), (volatile load 8 from %stack.3) + STATEPOINT 2882400000, 0, 0, killed $rax, 2, 0, 2, 0, 2, 30, 2, 1, 2, 0, 2, 99, 2, 0, 2, 12, 2, 0, 2, 10, 1, 8, $rsp, 24, 2, 10, 2, 0, 2, 10, 1, 8, $rsp, 16, 2, 10, 2, 4278124286, 2, 6, 2, 4278124286, 2, 7, 1, 8, $rsp, 8, 2, 99, 2, 0, 2, 7, 2, 4278124286, 2, 99, 2, 0, 2, 13, 1, 8, $rsp, 48, 2, 7, 2, 4278124286, 2, 99, 2, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp :: (volatile load (s64) from %stack.0), (volatile load (s64) from %stack.1), (volatile load (s64) from %stack.2), (volatile load (s64) from %stack.3) $esi = XOR32rr undef $esi, undef $esi, implicit-def dead $eflags $r12 = IMPLICIT_DEF @@ -215,16 +215,16 @@ body: | liveins: $ebp, $esi, $rbx, $r12, $r14 $rax = MOV64ri @global.1 - $rax = MOV64rm killed $rax, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from @global.1) + $rax = MOV64rm killed $rax, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from @global.1) TEST64rr $rax, $rax, implicit-def $eflags $rax = CMOV64rr undef $rax, killed $rax, 4, implicit killed $eflags - $ecx = MOV32rm undef $rax, 1, $noreg, 0, $noreg :: (load 4 from `i32* undef`) - $rdx = MOV64rm $r12, 8, $r14, 0, $noreg :: (load 8 from %ir.tmp3) + $ecx = MOV32rm undef $rax, 1, $noreg, 0, $noreg :: (load (s32) from `i32* undef`) + $rdx = MOV64rm $r12, 8, $r14, 0, $noreg :: (load (s64) from %ir.tmp3) $r15 = LEA64r $rdx, 1, $noreg, 1, _ - MOV64mr $r12, 8, $r14, 0, $noreg, $r15 :: (store 8 into %ir.tmp3) + MOV64mr $r12, 8, $r14, 0, $noreg, $r15 :: (store (s64) into %ir.tmp3) $ecx = SUB32rr killed $ecx, $edx, implicit-def dead $eflags, implicit killed $rdx - MOV32mr undef $rax, 1, $noreg, 0, $noreg, killed $ecx :: (store 4 into `i32* undef`) - $r13 = MOV64rm killed $rax, 1, $noreg, 768, $noreg :: (load 8 from %ir.tmp33) + MOV32mr undef $rax, 1, $noreg, 0, $noreg, killed $ecx :: (store (s32) into `i32* undef`) + $r13 = MOV64rm killed $rax, 1, $noreg, 768, $noreg :: (load (s64) from %ir.tmp33) TEST8rr $sil, $sil, implicit-def $eflags $rax = IMPLICIT_DEF JCC_1 %bb.8.bb37, 5, implicit $eflags @@ -242,7 +242,7 @@ body: | successors: %bb.9.bb37(0x40000000), %bb.10.bb37(0x40000000) liveins: $ebp, $esi, $rax, $rbx, $r12, $r13, $r14, $r15 - $rcx = MOV64rm killed $rax, 1, $noreg, 760, $noreg :: (load 8 from %ir.tmp40) + $rcx = MOV64rm killed $rax, 1, $noreg, 760, $noreg :: (load (s64) from %ir.tmp40) CMP64rr $r13, $rcx, implicit-def $eflags JCC_1 %bb.10.bb37, 12, implicit $eflags @@ -258,12 +258,12 @@ body: | $cl = KILL $cl, implicit killed $rcx $r15 = SAR64rCL killed $r15, implicit-def dead $eflags, implicit $cl - MOV64mr $r12, 8, killed $r14, 0, $noreg, killed $r15 :: (store 8 into %ir.tmp7) - MOV64mi32 undef $rax, 1, $noreg, 0, $noreg, 0 :: (store 8 into `i64* undef`) + MOV64mr $r12, 8, killed $r14, 0, $noreg, killed $r15 :: (store (s64) into %ir.tmp7) + MOV64mi32 undef $rax, 1, $noreg, 0, $noreg, 0 :: (store (s64) into `i64* undef`) $eax = LEA64_32r $rbx, 1, $noreg, 1, _ $ecx = MOV32ri 6 CMP32ri $eax, 15141, implicit-def $eflags - $xmm0 = MOVSDrm_alt $rsp, 1, $noreg, 40, $noreg :: (load 8 from %stack.4) + $xmm0 = MOVSDrm_alt $rsp, 1, $noreg, 40, $noreg :: (load (s64) from %stack.4) JCC_1 %bb.4.bb7, 12, implicit $eflags bb.11.bb51.loopexit: @@ -273,17 +273,17 @@ body: | $ebp = INC32r killed $ebp, implicit-def dead $eflags $ebx = INC32r $ebx, implicit-def dead $eflags, implicit killed $rbx, implicit-def $rbx $rax = MOV64ri %const.0 - $xmm0 = MOVSDrm_alt killed $rax, 1, $noreg, 0, $noreg :: (load 8 from constant-pool) + $xmm0 = MOVSDrm_alt killed $rax, 1, $noreg, 0, $noreg :: (load (s64) from constant-pool) bb.12.bb51: liveins: $ebp, $rbx, $xmm0 - MOV32mr $rsp, 1, $noreg, 24, $noreg, $ebx, implicit killed $rbx :: (store 4 into %stack.0, align 8) - MOV32mr $rsp, 1, $noreg, 16, $noreg, killed $ebp :: (store 4 into %stack.1, align 8) - MOVSDmr $rsp, 1, $noreg, 8, $noreg, killed $xmm0 :: (store 8 into %stack.2) + MOV32mr $rsp, 1, $noreg, 24, $noreg, $ebx, implicit killed $rbx :: (store (s32) into %stack.0, align 8) + MOV32mr $rsp, 1, $noreg, 16, $noreg, killed $ebp :: (store (s32) into %stack.1, align 8) + MOVSDmr $rsp, 1, $noreg, 8, $noreg, killed $xmm0 :: (store (s64) into %stack.2) $rax = MOV64ri @wobble $edi = MOV32ri -121 - STATEPOINT 2882400000, 0, 1, killed $rax, $edi, 2, 0, 2, 0, 2, 38, 2, 1, 2, 0, 2, 270, 2, 4, 2, 12, 2, 0, 2, 11, 2, 4278124286, 2, 99, 2, 0, 2, 10, 1, 8, $rsp, 24, 2, 6, 2, 4278124286, 2, 99, 2, 0, 2, 99, 2, 0, 2, 10, 1, 8, $rsp, 16, 2, 10, 2, 4278124286, 2, 99, 2, 0, 2, 7, 1, 8, $rsp, 8, 2, 99, 2, 0, 2, 7, 2, 4278124286, 2, 99, 2, 0, 2, 13, 2, 4278124286, 2, 99, 2, 0, 2, 99, 2, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp :: (volatile load 8 from %stack.0), (volatile load 8 from %stack.1), (volatile load 8 from %stack.2) + STATEPOINT 2882400000, 0, 1, killed $rax, $edi, 2, 0, 2, 0, 2, 38, 2, 1, 2, 0, 2, 270, 2, 4, 2, 12, 2, 0, 2, 11, 2, 4278124286, 2, 99, 2, 0, 2, 10, 1, 8, $rsp, 24, 2, 6, 2, 4278124286, 2, 99, 2, 0, 2, 99, 2, 0, 2, 10, 1, 8, $rsp, 16, 2, 10, 2, 4278124286, 2, 99, 2, 0, 2, 7, 1, 8, $rsp, 8, 2, 99, 2, 0, 2, 7, 2, 4278124286, 2, 99, 2, 0, 2, 13, 2, 4278124286, 2, 99, 2, 0, 2, 99, 2, 0, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp :: (volatile load (s64) from %stack.0), (volatile load (s64) from %stack.1), (volatile load (s64) from %stack.2) bb.13.bb59: $rax = MOV64ri @wobble diff --git a/llvm/test/CodeGen/X86/opt_phis2.mir b/llvm/test/CodeGen/X86/opt_phis2.mir index 7aca734118274..23c75b37c27aa 100644 --- a/llvm/test/CodeGen/X86/opt_phis2.mir +++ b/llvm/test/CodeGen/X86/opt_phis2.mir @@ -33,7 +33,7 @@ body: | JCC_1 %bb.8, 7, implicit $eflags bb.9: - JMP64m $noreg, 8, %10, %jump-table.0, $noreg :: (load 8 from jump-table) + JMP64m $noreg, 8, %10, %jump-table.0, $noreg :: (load (s64) from jump-table) bb.1: %0:vr256 = COPY %8 diff --git a/llvm/test/CodeGen/X86/peephole-fold-testrr.mir b/llvm/test/CodeGen/X86/peephole-fold-testrr.mir index 08e7c8bda047b..fe874ce6f58e8 100644 --- a/llvm/test/CodeGen/X86/peephole-fold-testrr.mir +++ b/llvm/test/CodeGen/X86/peephole-fold-testrr.mir @@ -39,13 +39,13 @@ body: | ; CHECK-LABEL: name: atomic ; CHECK: liveins: $rdi ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load unordered 8 from %ir.arg) + ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load unordered (s64) from %ir.arg) ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] ; CHECK: $eax = COPY [[MOVZX32rr8_]] ; CHECK: RET 0, $eax %0:gr64 = COPY $rdi - %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.arg) + %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load unordered (s64) from %ir.arg) TEST64rr %1, %1, implicit-def $eflags %2:gr8 = SETCCr 4, implicit $eflags %3:gr32 = MOVZX32rr8 killed %2 @@ -72,13 +72,13 @@ body: | ; CHECK-LABEL: name: nonatomic_unoptimized ; CHECK: liveins: $rdi ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 8 from %ir.arg) + ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load (s64) from %ir.arg) ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] ; CHECK: $eax = COPY [[MOVZX32rr8_]] ; CHECK: RET 0, $eax %0:gr64 = COPY $rdi - %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load 8 from %ir.arg) + %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.arg) TEST64rr %1, %1, implicit-def $eflags %2:gr8 = SETCCr 4, implicit $eflags %3:gr32 = MOVZX32rr8 killed %2 diff --git a/llvm/test/CodeGen/X86/peephole-recurrence.mir b/llvm/test/CodeGen/X86/peephole-recurrence.mir index 8067b1754e4ba..e28f2cd20ed5c 100644 --- a/llvm/test/CodeGen/X86/peephole-recurrence.mir +++ b/llvm/test/CodeGen/X86/peephole-recurrence.mir @@ -215,7 +215,7 @@ body: | ; CHECK: %11:gr32 = ADD32rr ; CHECK-SAME: %1, ; CHECK-SAME: %0, - MOV32mr %5, 1, $noreg, 0, $noreg, %0 :: (store 4 into %ir.p) + MOV32mr %5, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.p) %3 = ADD32rr %2, killed %11, implicit-def dead $eflags ; CHECK: %3:gr32 = ADD32rr ; CHECK-SAME: %2, diff --git a/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir b/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir index 442fff97a163e..98d25a5c41b87 100644 --- a/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir +++ b/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir @@ -270,16 +270,16 @@ body: | $rdi = LEA64r $rbx, 1, $noreg, 8, $noreg DBG_VALUE $rdi, $noreg, !20, !17, debug-location !27 DBG_VALUE $rdi, $noreg, !10, !17, debug-location !18 - $rax = MOV64rm $rbx, 1, $noreg, 16, $noreg :: (load 8) - MOV64mr $rbx, 1, $noreg, 8, $noreg, killed $rax :: (store 8) - MOV64mr $rbx, 1, $noreg, 24, $noreg, $rdi :: (store 8) + $rax = MOV64rm $rbx, 1, $noreg, 16, $noreg :: (load (s64)) + MOV64mr $rbx, 1, $noreg, 8, $noreg, killed $rax :: (store (s64)) + MOV64mr $rbx, 1, $noreg, 24, $noreg, $rdi :: (store (s64)) $eax = MOV32ri -1 $cl = MOV8rr $r14b, implicit killed $r14d $eax = SHL32rCL killed $eax, implicit-def dead $eflags, implicit $cl - MOV32mr $rbx, 1, $noreg, 32, $noreg, $eax :: (store 4, align 8) - MOV32mi $rbp, 1, $noreg, -20, $noreg, 0 :: (store 4) - $rcx = MOV64rm $rbx, 1, $noreg, 8, $noreg :: (load 8) - MOV64mr $rip, 1, $noreg, @n, $noreg, $rcx :: (store 8) + MOV32mr $rbx, 1, $noreg, 32, $noreg, $eax :: (store (s32), align 8) + MOV32mi $rbp, 1, $noreg, -20, $noreg, 0 :: (store (s32)) + $rcx = MOV64rm $rbx, 1, $noreg, 8, $noreg :: (load (s64)) + MOV64mr $rip, 1, $noreg, @n, $noreg, $rcx :: (store (s64)) $edx = XOR32rr undef $edx, undef $edx, implicit-def dead $eflags, implicit-def $rdx TEST64rr $rcx, $rcx, implicit-def $eflags $esi = MOV32ri @o, implicit-def $rsi @@ -291,8 +291,8 @@ body: | DBG_VALUE $rbp, -20, !29, !17, debug-location !36 $rcx = CMOV64rr killed $rcx, killed $rdx, 5, implicit killed $eflags $rcx = OR64rr killed $rcx, killed $rsi, implicit-def dead $eflags - $rdx = MOVSX64rm32 $rbx, 1, $noreg, 0, $noreg :: (load 4, align 8) - TEST32mr killed $rcx, 4, killed $rdx, 0, $noreg, killed $eax, implicit-def $eflags :: (load 4) + $rdx = MOVSX64rm32 $rbx, 1, $noreg, 0, $noreg :: (load (s32), align 8) + TEST32mr killed $rcx, 4, killed $rdx, 0, $noreg, killed $eax, implicit-def $eflags :: (load (s32)) JCC_1 %bb.2, 5, implicit $eflags JMP_1 %bb.3 @@ -300,7 +300,7 @@ body: | successors: %bb.2 liveins: $rbx, $rbp - $rdi = MOV64rm $rbx, 1, $noreg, 24, $noreg :: (load 8) + $rdi = MOV64rm $rbx, 1, $noreg, 24, $noreg :: (load (s64)) bb.2: successors: %bb.1, %bb.3 @@ -312,7 +312,7 @@ body: | $ecx = SHR32ri $ecx, 31, implicit-def dead $eflags, implicit killed $rcx, implicit-def $rcx $eax = LEA64_32r killed $rax, 1, killed $rcx, -1, $noreg $eax = SAR32r1 killed $eax, implicit-def dead $eflags - CMP32mr $rbx, 1, $noreg, 0, $noreg, killed $eax, implicit-def $eflags :: (load 4, align 8), (load 4, align 8) + CMP32mr $rbx, 1, $noreg, 0, $noreg, killed $eax, implicit-def $eflags :: (load (s32), align 8), (load (s32), align 8) JCC_1 %bb.1, 15, implicit killed $eflags bb.3: diff --git a/llvm/test/CodeGen/X86/pr27681.mir b/llvm/test/CodeGen/X86/pr27681.mir index 030b4a1d3227f..e7293fda709cf 100644 --- a/llvm/test/CodeGen/X86/pr27681.mir +++ b/llvm/test/CodeGen/X86/pr27681.mir @@ -47,11 +47,11 @@ body: | TEST32rr $edx, $edx, implicit-def $eflags $cl = SETCCr 5, implicit $eflags ; This %bl def is antidependent on the above use of $ebx - $bl = MOV8rm $esp, 1, $noreg, 3, _ ; :: (load 1 from %stack.0) + $bl = MOV8rm $esp, 1, $noreg, 3, _ ; :: (load (s8) from %stack.0) $cl = OR8rr killed $cl, $bl, implicit-def dead $eflags $esi = MOVZX32rr8 killed $cl $esi = ADD32rr killed $esi, killed $edi, implicit-def dead $eflags - $ecx = MOV32rm $esp, 1, $noreg, 24, _ ; :: (load 4 from %stack.2) + $ecx = MOV32rm $esp, 1, $noreg, 24, _ ; :: (load (s32) from %stack.2) $edx = SAR32rCL killed $edx, implicit-def dead $eflags, implicit $cl TEST32rr killed $edx, $edx, implicit-def $eflags $cl = SETCCr 5, implicit $eflags @@ -66,7 +66,7 @@ body: | bb.2: liveins: $cl, $eax, $ebp, $esi - OR32mr $esp, 1, $noreg, 8, $noreg, killed $eax, implicit-def $eflags ; :: (store 4 into %stack.1) + OR32mr $esp, 1, $noreg, 8, $noreg, killed $eax, implicit-def $eflags ; :: (store (s32) into %stack.1) $dl = SETCCr 5, implicit $eflags, implicit-def $edx bb.3: diff --git a/llvm/test/CodeGen/X86/pr30821.mir b/llvm/test/CodeGen/X86/pr30821.mir index 7ac7e3668e02e..9591d45e7baff 100644 --- a/llvm/test/CodeGen/X86/pr30821.mir +++ b/llvm/test/CodeGen/X86/pr30821.mir @@ -69,133 +69,133 @@ body: | ; sequence eliminated if LLVM is broken. ; Make first 15 $xmm registers live - $xmm0 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm1 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm2 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm3 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm4 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm5 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm6 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm7 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm8 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm9 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm10 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm11 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm12 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm13 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) - $xmm14 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) + $xmm0 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm1 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm2 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm3 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm4 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm5 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm6 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm7 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm8 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm9 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm10 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm11 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm12 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm13 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) + $xmm14 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) ; First vreg load - %1:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) + %1:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) ; First faulty sequence; %1 spilt - %12:fr64 = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india) + %12:fr64 = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s64) from %ir.india) %13:vr128 = COPY killed %12 - MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %13 :: (volatile store 16 into %ir.india) - ; CHECK: renamable $xmm{{[0-9]+}} = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india) - ; CHECK-NEXT: MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (volatile store 16 into %ir.india) + MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %13 :: (volatile store (s128) into %ir.india) + ; CHECK: renamable $xmm{{[0-9]+}} = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s64) from %ir.india) + ; CHECK-NEXT: MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (volatile store (s128) into %ir.india) ; Store %1 to avoid it being optimised out, will result in a load-from-spill - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %1 :: (volatile dereferenceable store 16 into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %1 :: (volatile dereferenceable store (s128) into %ir.india) ; That code sequence a second time, to generate a second spill slot that ; will get coloured and merged. - %2:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) + %2:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) - %22:fr64 = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india) + %22:fr64 = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s64) from %ir.india) %23:vr128 = COPY killed %22 - MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %23 :: (volatile store 16 into %ir.india) + MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %23 :: (volatile store (s128) into %ir.india) - ; CHECK: renamable $xmm{{[0-9]+}} = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 8 from %ir.india) - ; CHECK-NEXT: MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (volatile store 16 into %ir.india) + ; CHECK: renamable $xmm{{[0-9]+}} = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s64) from %ir.india) + ; CHECK-NEXT: MOVAPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (volatile store (s128) into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %2 :: (volatile dereferenceable store 16 into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %2 :: (volatile dereferenceable store (s128) into %ir.india) ; Test some sequences that _should_ be eliminated - %3:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) + %3:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) - %32:fr64 = VMOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.india) + %32:fr64 = VMOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.india) %33:fr64 = COPY killed %32 - VMOVSDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %33 :: (store 8 into %ir.india) + VMOVSDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %33 :: (store (s64) into %ir.india) ; This is the spill introduced by regalloc; we check that the inner dead ; store and load were eliminated - ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store 16 into %stack.3) - ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load 16 from %stack.3) + ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store (s128) into %stack.3) + ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load (s128) from %stack.3) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %3 :: (volatile dereferenceable store 16 into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %3 :: (volatile dereferenceable store (s128) into %ir.india) ; Moves with different encodings but same size should be eliminated - %4:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) + %4:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) - %42:fr32 = MOVSSrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.india) + %42:fr32 = MOVSSrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.india) %43:fr32 = COPY killed %42 - VMOVSSZmr %stack.2.india, 1, $noreg, 0, $noreg, killed %43 :: (store 4 into %ir.india) + VMOVSSZmr %stack.2.india, 1, $noreg, 0, $noreg, killed %43 :: (store (s32) into %ir.india) - ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store 16 into %stack.3) - ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load 16 from %stack.3) + ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store (s128) into %stack.3) + ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load (s128) from %stack.3) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %4 :: (volatile dereferenceable store 16 into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %4 :: (volatile dereferenceable store (s128) into %ir.india) ; Same deal with double-size - %5:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) + %5:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) - %52:fr64 = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.india) + %52:fr64 = MOVSDrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.india) %53:fr64 = COPY killed %52 - VMOVSDZmr %stack.2.india, 1, $noreg, 0, $noreg, killed %53 :: (store 8 into %ir.india) + VMOVSDZmr %stack.2.india, 1, $noreg, 0, $noreg, killed %53 :: (store (s64) into %ir.india) - ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store 16 into %stack.3) - ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load 16 from %stack.3) + ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store (s128) into %stack.3) + ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load (s128) from %stack.3) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %5 :: (volatile dereferenceable store 16 into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %5 :: (volatile dereferenceable store (s128) into %ir.india) ; Last two repeated, with load/store opcode flipped - %6:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) + %6:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) - %62:fr32 = VMOVSSZrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.india) + %62:fr32 = VMOVSSZrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.india) %63:fr32 = COPY killed %62 - MOVSSmr %stack.2.india, 1, $noreg, 0, $noreg, killed %63 :: (store 4 into %ir.india) + MOVSSmr %stack.2.india, 1, $noreg, 0, $noreg, killed %63 :: (store (s32) into %ir.india) - ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store 16 into %stack.3) - ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load 16 from %stack.3) + ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store (s128) into %stack.3) + ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load (s128) from %stack.3) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %6 :: (volatile dereferenceable store 16 into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %6 :: (volatile dereferenceable store (s128) into %ir.india) ; Flipped double-size different-encoding test - %7:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load 16 from %ir.india) + %7:vr128 = MOVUPDrm %stack.2.india, 1, $noreg, 0, $noreg :: (volatile dereferenceable load (s128) from %ir.india) - %72:fr64 = VMOVSDZrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.india) + %72:fr64 = VMOVSDZrm_alt %stack.2.india, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.india) %73:fr64 = COPY killed %72 - MOVSDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %73 :: (store 8 into %ir.india) + MOVSDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %73 :: (store (s64) into %ir.india) - ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store 16 into %stack.3) - ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load 16 from %stack.3) + ; CHECK: MOVAPSmr %stack.3, 1, $noreg, 0, $noreg, killed renamable $xmm{{[0-9]+}} :: (store (s128) into %stack.3) + ; CHECK-NEXT:renamable $xmm{{[0-9]+}} = MOVAPSrm %stack.3, 1, $noreg, 0, $noreg :: (load (s128) from %stack.3) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %7 :: (volatile dereferenceable store 16 into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed %7 :: (volatile dereferenceable store (s128) into %ir.india) ; Stores of first 15 $xmm registers to keep them live across the middle of ; this bb. - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm0 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm1 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm2 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm3 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm4 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm5 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm6 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm7 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm8 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm9 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm10 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm11 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm12 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm13 :: (volatile dereferenceable store 16 into %ir.india) - MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm14 :: (volatile dereferenceable store 16 into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm0 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm1 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm2 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm3 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm4 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm5 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm6 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm7 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm8 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm9 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm10 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm11 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm12 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm13 :: (volatile dereferenceable store (s128) into %ir.india) + MOVUPDmr %stack.2.india, 1, $noreg, 0, $noreg, killed $xmm14 :: (volatile dereferenceable store (s128) into %ir.india) RET 0 diff --git a/llvm/test/CodeGen/X86/pr46827.ll b/llvm/test/CodeGen/X86/pr46827.ll index 438b13c3400fe..f5d496fdddf5a 100644 --- a/llvm/test/CodeGen/X86/pr46827.ll +++ b/llvm/test/CodeGen/X86/pr46827.ll @@ -3,7 +3,7 @@ ; CHECK: body: | ; CHECK: bb.0.bb107: ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000) -; CHECK: %0:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0, align 16) +; CHECK: %0:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16) ; CHECK: %1:gr32 = SUB32ri8 %0, 1, implicit-def $eflags ; CHECK: XBEGIN_4 %bb.4, implicit-def $eax ; CHECK: bb.3.bb107: diff --git a/llvm/test/CodeGen/X86/pr48064.mir b/llvm/test/CodeGen/X86/pr48064.mir index 8ddfdec9b5903..1e1ea991e82c7 100644 --- a/llvm/test/CodeGen/X86/pr48064.mir +++ b/llvm/test/CodeGen/X86/pr48064.mir @@ -48,7 +48,7 @@ ## Make sure that %stack.3.exp.i not replaced with %stack.2.o.i # CHECK: bb.3.catch.i (landing-pad, ehfunclet-entry): -# CHECK: %7:gr32 = MOV32rm %stack.3.exp.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.exp.i) +# CHECK: %7:gr32 = MOV32rm %stack.3.exp.i, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.exp.i) --- | ; ModuleID = 'test-pre-stc.mir' @@ -316,21 +316,21 @@ body: | successors: %bb.1(0x7ffff800), %bb.2(0x00000800) %0:gr32 = COPY $esp - MOV32mr %stack.0.zx, 1, $noreg, 0, $noreg, %0 :: (store 4 into %ir.3) - MOV32mi %stack.0.zx, 1, $noreg, 12, $noreg, -1 :: (store 4 into %ir.4) + MOV32mr %stack.0.zx, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.3) + MOV32mi %stack.0.zx, 1, $noreg, 12, $noreg, -1 :: (store (s32) into %ir.4) %1:gr32 = nuw LEA32r %stack.0.zx, 1, $noreg, 4, $noreg - MOV32mi %stack.0.zx, 1, $noreg, 8, $noreg, @"__ehhandler$main" :: (store 4 into %ir.6) - %2:gr32 = MOV32rm $noreg, 1, $noreg, 0, $fs :: (load 4 from `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) - MOV32mr %stack.0.zx, 1, $noreg, 4, $noreg, killed %2 :: (store 4 into %ir.8) - MOV32mr $noreg, 1, $noreg, 0, $fs, killed %1 :: (store 4 into `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) - MOV32mi %stack.2.o.i, 1, $noreg, 0, $noreg, 1 :: (store 4 into %ir.i.i.i1) - MOV32mi %stack.1.tmp.i.i, 1, $noreg, 0, $noreg, 999 :: (store 4 into %ir.tmp.i.i) - MOV32mi %stack.0.zx, 1, $noreg, 12, $noreg, 1 :: (store 4 into %ir.12) + MOV32mi %stack.0.zx, 1, $noreg, 8, $noreg, @"__ehhandler$main" :: (store (s32) into %ir.6) + %2:gr32 = MOV32rm $noreg, 1, $noreg, 0, $fs :: (load (s32) from `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) + MOV32mr %stack.0.zx, 1, $noreg, 4, $noreg, killed %2 :: (store (s32) into %ir.8) + MOV32mr $noreg, 1, $noreg, 0, $fs, killed %1 :: (store (s32) into `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) + MOV32mi %stack.2.o.i, 1, $noreg, 0, $noreg, 1 :: (store (s32) into %ir.i.i.i1) + MOV32mi %stack.1.tmp.i.i, 1, $noreg, 0, $noreg, 999 :: (store (s32) into %ir.tmp.i.i) + MOV32mi %stack.0.zx, 1, $noreg, 12, $noreg, 1 :: (store (s32) into %ir.12) ADJCALLSTACKDOWN32 8, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp %3:gr32 = COPY $esp %4:gr32 = LEA32r %stack.1.tmp.i.i, 1, $noreg, 0, $noreg - MOV32mr %3, 1, $noreg, 0, $noreg, killed %4 :: (store 4 into stack) - MOV32mi %3, 1, $noreg, 4, $noreg, @_TI1H :: (store 4 into stack + 4) + MOV32mr %3, 1, $noreg, 0, $noreg, killed %4 :: (store (s32) into stack) + MOV32mi %3, 1, $noreg, 4, $noreg, @_TI1H :: (store (s32) into stack + 4) CALLpcrel32 @_CxxThrowException, csr_noregs, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp ADJCALLSTACKUP32 8, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp JMP_1 %bb.1 @@ -342,11 +342,11 @@ body: | bb.2.ehcleanup.i (landing-pad, ehfunclet-entry): successors: %bb.3(0x80000000) - MOV32mi %stack.2.o.i, 1, $noreg, 0, $noreg, 9999 :: (store 4 into %ir.14) + MOV32mi %stack.2.o.i, 1, $noreg, 0, $noreg, 9999 :: (store (s32) into %ir.14) ADJCALLSTACKDOWN32 4, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp %5:gr32 = COPY $esp %6:gr32 = LEA32r %stack.2.o.i, 1, $noreg, 0, $noreg - MOV32mr %5, 1, $noreg, 0, $noreg, killed %6 :: (store 4 into stack) + MOV32mr %5, 1, $noreg, 0, $noreg, killed %6 :: (store (s32) into stack) CALLpcrel32 @"escape__YAXPAH@Z", csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp ADJCALLSTACKUP32 4, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp CLEANUPRET @@ -354,9 +354,9 @@ body: | bb.3.catch.i (landing-pad, ehfunclet-entry): successors: %bb.4(0x80000000) - %7:gr32 = MOV32rm %stack.3.exp.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.exp.i) - %8:gr32 = MOV32rm killed %7, 1, $noreg, 0, $noreg :: (load 4 from %ir.18) - MOV32mr $noreg, 1, $noreg, @v__3HC, $noreg, killed %8 :: (volatile store release 4 into @v__3HC) + %7:gr32 = MOV32rm %stack.3.exp.i, 1, $noreg, 0, $noreg :: (dereferenceable load (s32) from %ir.exp.i) + %8:gr32 = MOV32rm killed %7, 1, $noreg, 0, $noreg :: (load (s32) from %ir.18) + MOV32mr $noreg, 1, $noreg, @v__3HC, $noreg, killed %8 :: (volatile store release (s32) into @v__3HC) CATCHRET %bb.4, %bb.0 bb.4.catch.i (landing-pad): @@ -365,8 +365,8 @@ body: | JMP_4 %bb.5 bb.5.func__YAXXZ.exit: - %9:gr32 = MOV32rm %stack.0.zx, 1, $noreg, 4, $noreg :: (dereferenceable load 4 from %ir.22) - MOV32mr $noreg, 1, $noreg, 0, $fs, killed %9 :: (store 4 into `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) + %9:gr32 = MOV32rm %stack.0.zx, 1, $noreg, 4, $noreg :: (dereferenceable load (s32) from %ir.22) + MOV32mr $noreg, 1, $noreg, 0, $fs, killed %9 :: (store (s32) into `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) %10:gr32 = MOV32r0 implicit-def dead $eflags $eax = COPY %10 RET 0, $eax diff --git a/llvm/test/CodeGen/X86/pre-coalesce.mir b/llvm/test/CodeGen/X86/pre-coalesce.mir index 3051c009eb9c2..aa7481adbaef6 100644 --- a/llvm/test/CodeGen/X86/pre-coalesce.mir +++ b/llvm/test/CodeGen/X86/pre-coalesce.mir @@ -83,10 +83,10 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.entry: - %0 = MOV64rm $rip, 1, $noreg, @b, $noreg :: (dereferenceable load 8 from @b) - %12 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load 1 from %ir.t0) + %0 = MOV64rm $rip, 1, $noreg, @b, $noreg :: (dereferenceable load (s64) from @b) + %12 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load (s8) from %ir.t0) TEST8rr %12, %12, implicit-def $eflags - %11 = MOV32rm $rip, 1, $noreg, @a, $noreg :: (dereferenceable load 4 from @a) + %11 = MOV32rm $rip, 1, $noreg, @a, $noreg :: (dereferenceable load (s32) from @a) JCC_1 %bb.1, 5, implicit killed $eflags bb.4: @@ -101,8 +101,8 @@ body: | %10 = SHL32ri %10, 5, implicit-def dead $eflags %10 = ADD32rr %10, %11, implicit-def dead $eflags %10 = ADD32rr %10, %8, implicit-def dead $eflags - MOV32mr $rip, 1, $noreg, @a, $noreg, %10 :: (store 4 into @a) - %12 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load 1 from %ir.t0) + MOV32mr $rip, 1, $noreg, @a, $noreg, %10 :: (store (s32) into @a) + %12 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load (s8) from %ir.t0) TEST8rr %12, %12, implicit-def $eflags %11 = COPY %10 JCC_1 %bb.2, 5, implicit killed $eflags diff --git a/llvm/test/CodeGen/X86/prologepilog_deref_size.mir b/llvm/test/CodeGen/X86/prologepilog_deref_size.mir index 5b0e15be10b15..7764eb14f0d8a 100644 --- a/llvm/test/CodeGen/X86/prologepilog_deref_size.mir +++ b/llvm/test/CodeGen/X86/prologepilog_deref_size.mir @@ -51,7 +51,7 @@ constants: [] machineFunctionInfo: {} body: | bb.0.entry: - renamable $ax = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 2 from %fixed-stack.0, align 8) + renamable $ax = MOV16rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s16) from %fixed-stack.0, align 8) DBG_VALUE %fixed-stack.0, 0, !32, !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value), debug-location !34 RET 0, $ax ... diff --git a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir index f6b4536cbbc4b..6a2eae9dbbc4c 100644 --- a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir +++ b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir @@ -15,23 +15,23 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $edi, $rsi - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rsi :: (store 8 into %stack.0) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rsi :: (store (s64) into %stack.0) ; CHECK: JMP_1 %bb.3 ; CHECK: bb.1: ; CHECK: successors: ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $rcx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) + ; CHECK: $rcx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) ; CHECK: renamable $eax = MOV32r0 implicit-def dead $eflags ; CHECK: renamable $rax = SUBREG_TO_REG 0, killed renamable $eax, %subreg.sub_32bit - ; CHECK: MOV64mi32 killed renamable $rcx, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rax :: (store 8 into %stack.0) + ; CHECK: MOV64mi32 killed renamable $rcx, 1, $noreg, 0, $noreg, 0 :: (volatile store (s64)) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rax :: (store (s64) into %stack.0) ; CHECK: bb.3: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) + ; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: renamable $ecx = MOV32r0 implicit-def dead $eflags ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.1) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rcx :: (store (s64) into %stack.1) ; CHECK: JMP64r killed renamable $rax bb.0: liveins: $edi, $rsi @@ -50,7 +50,7 @@ body: | %0:gr64 = COPY %12 %10:gr32 = MOV32r0 implicit-def $eflags %11:gr64 = SUBREG_TO_REG 0, %10, %subreg.sub_32bit - MOV64mi32 %0, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) + MOV64mi32 %0, 1, $noreg, 0, $noreg, 0 :: (volatile store (s64)) %13:gr64 = COPY %11 bb.3: diff --git a/llvm/test/CodeGen/X86/shrink_wrap_dbg_value.mir b/llvm/test/CodeGen/X86/shrink_wrap_dbg_value.mir index 8bb10127f3cf1..c71a8e7c2d9a6 100644 --- a/llvm/test/CodeGen/X86/shrink_wrap_dbg_value.mir +++ b/llvm/test/CodeGen/X86/shrink_wrap_dbg_value.mir @@ -148,9 +148,9 @@ body: | bb.1.for.cond.preheader: successors: %bb.2(0x80000000) - $esi = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) + $esi = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0) DBG_VALUE $esi, $noreg, !13, !DIExpression(), debug-location !19 - $edi = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) + $edi = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1) DBG_VALUE $edi, $noreg, !14, !DIExpression(), debug-location !20 $edi = DEC32r killed $edi, implicit-def dead $eflags, debug-location !30 $ebx = LEA32r %fixed-stack.1, 1, $noreg, 0, $noreg diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll index 3ccca93ad9828..d3715f2eac164 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -24,9 +24,9 @@ define float @sqrt_ieee_ninf(float %f) #0 { ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK: %3:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr - ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK: %5:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr - ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load (s32) from constant-pool) ; CHECK: %7:fr32 = ninf afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr ; CHECK: %8:fr32 = ninf afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr ; CHECK: %9:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr @@ -35,10 +35,10 @@ define float @sqrt_ieee_ninf(float %f) #0 { ; CHECK: %12:fr32 = ninf afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12 ; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]] - ; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load (s32) from constant-pool) ; CHECK: [[VPANDrr:%[0-9]+]]:vr128 = VPANDrr killed [[COPY2]], killed [[VPBROADCASTDrm]] ; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDrr]] - ; CHECK: %18:fr32 = nofpexcept VCMPSSrm killed [[COPY3]], $rip, 1, $noreg, %const.3, $noreg, 1, implicit $mxcsr :: (load 4 from constant-pool) + ; CHECK: %18:fr32 = nofpexcept VCMPSSrm killed [[COPY3]], $rip, 1, $noreg, %const.3, $noreg, 1, implicit $mxcsr :: (load (s32) from constant-pool) ; CHECK: [[COPY4:%[0-9]+]]:vr128 = COPY %18 ; CHECK: [[VPANDNrr:%[0-9]+]]:vr128 = VPANDNrr killed [[COPY4]], killed [[COPY1]] ; CHECK: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]] @@ -69,9 +69,9 @@ define float @sqrt_daz_ninf(float %f) #1 { ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK: %3:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr - ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK: %5:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr - ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load (s32) from constant-pool) ; CHECK: %7:fr32 = ninf afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr ; CHECK: %8:fr32 = ninf afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr ; CHECK: %9:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr @@ -98,9 +98,9 @@ define float @rsqrt_ieee(float %f) #0 { ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK: %3:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr - ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK: %5:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr - ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load (s32) from constant-pool) ; CHECK: %7:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr ; CHECK: %8:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr ; CHECK: %9:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr @@ -122,9 +122,9 @@ define float @rsqrt_daz(float %f) #1 { ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK: %3:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr - ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK: %5:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr - ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load (s32) from constant-pool) ; CHECK: %7:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr ; CHECK: %8:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr ; CHECK: %9:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr diff --git a/llvm/test/CodeGen/X86/stack-folding-adx.mir b/llvm/test/CodeGen/X86/stack-folding-adx.mir index 10bfd48a9a1cf..11b1f4654e05a 100644 --- a/llvm/test/CodeGen/X86/stack-folding-adx.mir +++ b/llvm/test/CodeGen/X86/stack-folding-adx.mir @@ -84,18 +84,18 @@ body: | ; CHECK-LABEL: name: stack_fold_adcx32 ; CHECK: liveins: $edi, $esi, $edx, $rcx - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rcx :: (store 8 into %stack.0) - ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edx :: (store 4 into %stack.1) - ; CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, $esi :: (store 4 into %stack.2) - ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.3) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.0) + ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edx :: (store (s32) into %stack.1) + ; CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, $esi :: (store (s32) into %stack.2) + ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store (s32) into %stack.3) ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load 4 from %stack.3) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %stack.3) ; CHECK: dead [[MOV32rm]].sub_8bit:gr32 = ADD8ri [[MOV32rm]].sub_8bit, -1, implicit-def $eflags - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load 4 from %stack.2) - ; CHECK: [[ADCX32rm:%[0-9]+]]:gr32 = ADCX32rm [[ADCX32rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit killed $eflags :: (load 4 from %stack.1) + ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %stack.2) + ; CHECK: [[ADCX32rm:%[0-9]+]]:gr32 = ADCX32rm [[ADCX32rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit killed $eflags :: (load (s32) from %stack.1) ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit killed $eflags - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) - ; CHECK: MOV32mr [[MOV64rm]], 1, $noreg, 0, $noreg, [[ADCX32rm]] :: (store 4 into %ir.3, align 1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; CHECK: MOV32mr [[MOV64rm]], 1, $noreg, 0, $noreg, [[ADCX32rm]] :: (store (s32) into %ir.3, align 1) ; CHECK: $al = COPY [[SETCCr]] ; CHECK: RET 0, $al %3:gr64 = COPY $rcx @@ -106,7 +106,7 @@ body: | dead %0.sub_8bit:gr32 = ADD8ri %0.sub_8bit, -1, implicit-def $eflags %6:gr32 = ADCX32rr %6, %2, implicit-def $eflags, implicit killed $eflags %7:gr8 = SETCCr 2, implicit killed $eflags - MOV32mr %3, 1, $noreg, 0, $noreg, %6 :: (store 4 into %ir.3, align 1) + MOV32mr %3, 1, $noreg, 0, $noreg, %6 :: (store (s32) into %ir.3, align 1) $al = COPY %7 RET 0, killed $al @@ -135,18 +135,18 @@ body: | ; CHECK-LABEL: name: stack_fold_adcx64 ; CHECK: liveins: $edi, $rsi, $rdx, $rcx - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rcx :: (store 8 into %stack.0) - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdx :: (store 8 into %stack.1) - ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rsi :: (store 8 into %stack.2) - ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.3) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.0) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdx :: (store (s64) into %stack.1) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rsi :: (store (s64) into %stack.2) + ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store (s32) into %stack.3) ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load 4 from %stack.3) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %stack.3) ; CHECK: dead [[MOV32rm]].sub_8bit:gr32 = ADD8ri [[MOV32rm]].sub_8bit, -1, implicit-def $eflags - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2) - ; CHECK: [[ADCX64rm:%[0-9]+]]:gr64 = ADCX64rm [[ADCX64rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit killed $eflags :: (load 8 from %stack.1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) + ; CHECK: [[ADCX64rm:%[0-9]+]]:gr64 = ADCX64rm [[ADCX64rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit killed $eflags :: (load (s64) from %stack.1) ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit killed $eflags - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) - ; CHECK: MOV64mr [[MOV64rm]], 1, $noreg, 0, $noreg, [[ADCX64rm]] :: (store 8 into %ir.3, align 1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; CHECK: MOV64mr [[MOV64rm]], 1, $noreg, 0, $noreg, [[ADCX64rm]] :: (store (s64) into %ir.3, align 1) ; CHECK: $al = COPY [[SETCCr]] ; CHECK: RET 0, $al %3:gr64 = COPY $rcx @@ -157,7 +157,7 @@ body: | dead %0.sub_8bit:gr32 = ADD8ri %0.sub_8bit, -1, implicit-def $eflags %6:gr64 = ADCX64rr %6, %2, implicit-def $eflags, implicit killed $eflags %7:gr8 = SETCCr 2, implicit killed $eflags - MOV64mr %3, 1, $noreg, 0, $noreg, %6 :: (store 8 into %ir.3, align 1) + MOV64mr %3, 1, $noreg, 0, $noreg, %6 :: (store (s64) into %ir.3, align 1) $al = COPY %7 RET 0, killed $al @@ -186,18 +186,18 @@ body: | ; CHECK-LABEL: name: stack_fold_adox32 ; CHECK: liveins: $edi, $esi, $edx, $rcx - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rcx :: (store 8 into %stack.0) - ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edx :: (store 4 into %stack.1) - ; CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, $esi :: (store 4 into %stack.2) - ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.3) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.0) + ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edx :: (store (s32) into %stack.1) + ; CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, $esi :: (store (s32) into %stack.2) + ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store (s32) into %stack.3) ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load 4 from %stack.3) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %stack.3) ; CHECK: dead [[MOV32rm]].sub_8bit:gr32 = ADD8ri [[MOV32rm]].sub_8bit, -1, implicit-def $eflags - ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load 4 from %stack.2) - ; CHECK: [[ADOX32rm:%[0-9]+]]:gr32 = ADOX32rm [[ADOX32rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit killed $eflags :: (load 4 from %stack.1) + ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %stack.2) + ; CHECK: [[ADOX32rm:%[0-9]+]]:gr32 = ADOX32rm [[ADOX32rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit killed $eflags :: (load (s32) from %stack.1) ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit killed $eflags - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) - ; CHECK: MOV32mr [[MOV64rm]], 1, $noreg, 0, $noreg, [[ADOX32rm]] :: (store 4 into %ir.3, align 1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; CHECK: MOV32mr [[MOV64rm]], 1, $noreg, 0, $noreg, [[ADOX32rm]] :: (store (s32) into %ir.3, align 1) ; CHECK: $al = COPY [[SETCCr]] ; CHECK: RET 0, $al %3:gr64 = COPY $rcx @@ -208,7 +208,7 @@ body: | dead %0.sub_8bit:gr32 = ADD8ri %0.sub_8bit, -1, implicit-def $eflags %6:gr32 = ADOX32rr %6, %2, implicit-def $eflags, implicit killed $eflags %7:gr8 = SETCCr 2, implicit killed $eflags - MOV32mr %3, 1, $noreg, 0, $noreg, %6 :: (store 4 into %ir.3, align 1) + MOV32mr %3, 1, $noreg, 0, $noreg, %6 :: (store (s32) into %ir.3, align 1) $al = COPY %7 RET 0, killed $al @@ -237,18 +237,18 @@ body: | ; CHECK-LABEL: name: stack_fold_adox64 ; CHECK: liveins: $edi, $rsi, $rdx, $rcx - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rcx :: (store 8 into %stack.0) - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdx :: (store 8 into %stack.1) - ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rsi :: (store 8 into %stack.2) - ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.3) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.0) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdx :: (store (s64) into %stack.1) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rsi :: (store (s64) into %stack.2) + ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store (s32) into %stack.3) ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15 - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load 4 from %stack.3) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %stack.3) ; CHECK: dead [[MOV32rm]].sub_8bit:gr32 = ADD8ri [[MOV32rm]].sub_8bit, -1, implicit-def $eflags - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2) - ; CHECK: [[ADOX64rm:%[0-9]+]]:gr64 = ADOX64rm [[ADOX64rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit killed $eflags :: (load 8 from %stack.1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) + ; CHECK: [[ADOX64rm:%[0-9]+]]:gr64 = ADOX64rm [[ADOX64rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags, implicit killed $eflags :: (load (s64) from %stack.1) ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 2, implicit killed $eflags - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) - ; CHECK: MOV64mr [[MOV64rm]], 1, $noreg, 0, $noreg, [[ADOX64rm]] :: (store 8 into %ir.3, align 1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; CHECK: MOV64mr [[MOV64rm]], 1, $noreg, 0, $noreg, [[ADOX64rm]] :: (store (s64) into %ir.3, align 1) ; CHECK: $al = COPY [[SETCCr]] ; CHECK: RET 0, $al %3:gr64 = COPY $rcx @@ -259,7 +259,7 @@ body: | dead %0.sub_8bit:gr32 = ADD8ri %0.sub_8bit, -1, implicit-def $eflags %6:gr64 = ADOX64rr %6, %2, implicit-def $eflags, implicit killed $eflags %7:gr8 = SETCCr 2, implicit killed $eflags - MOV64mr %3, 1, $noreg, 0, $noreg, %6 :: (store 8 into %ir.3, align 1) + MOV64mr %3, 1, $noreg, 0, $noreg, %6 :: (store (s64) into %ir.3, align 1) $al = COPY %7 RET 0, killed $al diff --git a/llvm/test/CodeGen/X86/stack-folding-bmi2.mir b/llvm/test/CodeGen/X86/stack-folding-bmi2.mir index dbbb93ee1a610..60892ba0e5e46 100644 --- a/llvm/test/CodeGen/X86/stack-folding-bmi2.mir +++ b/llvm/test/CodeGen/X86/stack-folding-bmi2.mir @@ -50,11 +50,11 @@ body: | ; CHECK-LABEL: name: stack_fold_mulx_u32 ; CHECK: liveins: $edi, $esi - ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, $esi :: (store 4 into %stack.0) - ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.1) + ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, $esi :: (store (s32) into %stack.0) + ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edi :: (store (s32) into %stack.1) ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15 - ; CHECK: $edx = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load 4 from %stack.1) - ; CHECK: %3:gr32, dead %4:gr32 = MULX32rm %stack.0, 1, $noreg, 0, $noreg, implicit $edx :: (load 4 from %stack.0) + ; CHECK: $edx = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1) + ; CHECK: %3:gr32, dead %4:gr32 = MULX32rm %stack.0, 1, $noreg, 0, $noreg, implicit $edx :: (load (s32) from %stack.0) ; CHECK: $eax = COPY %3 ; CHECK: RET 0, $eax %1:gr32 = COPY $esi @@ -85,11 +85,11 @@ body: | ; CHECK-LABEL: name: stack_fold_mulx_u64 ; CHECK: liveins: $rdi, $rsi - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rsi :: (store 8 into %stack.0) - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdi :: (store 8 into %stack.1) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rsi :: (store (s64) into %stack.0) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdi :: (store (s64) into %stack.1) ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15 - ; CHECK: $rdx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) - ; CHECK: %3:gr64, dead %4:gr64 = MULX64rm %stack.0, 1, $noreg, 0, $noreg, implicit $rdx :: (load 8 from %stack.0) + ; CHECK: $rdx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) + ; CHECK: %3:gr64, dead %4:gr64 = MULX64rm %stack.0, 1, $noreg, 0, $noreg, implicit $rdx :: (load (s64) from %stack.0) ; CHECK: $rax = COPY %3 ; CHECK: RET 0, $rax %1:gr64 = COPY $rsi diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-nofpexcept.mir b/llvm/test/CodeGen/X86/stack-folding-fp-nofpexcept.mir index 96cd0aa516e2e..017594dca8b9e 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-nofpexcept.mir +++ b/llvm/test/CodeGen/X86/stack-folding-fp-nofpexcept.mir @@ -35,10 +35,10 @@ body: | ; CHECK-LABEL: name: stack_fold_addpd ; CHECK: liveins: $xmm0, $xmm1 - ; CHECK: MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, $xmm1 :: (store 16 into %stack.0) + ; CHECK: MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, $xmm1 :: (store (s128) into %stack.0) ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $xmm1, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $eflags - ; CHECK: [[COPY]]:vr128 = nofpexcept ADDPDrm [[COPY]], %stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 16 from %stack.0) + ; CHECK: [[COPY]]:vr128 = nofpexcept ADDPDrm [[COPY]], %stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load (s128) from %stack.0) ; CHECK: $xmm0 = COPY [[COPY]] ; CHECK: RET 0, $xmm0 %1:vr128 = COPY $xmm1 diff --git a/llvm/test/CodeGen/X86/stack-protector-weight.ll b/llvm/test/CodeGen/X86/stack-protector-weight.ll index 38fe2266aac80..bf50ded68ceec 100644 --- a/llvm/test/CodeGen/X86/stack-protector-weight.ll +++ b/llvm/test/CodeGen/X86/stack-protector-weight.ll @@ -16,16 +16,16 @@ ; DARWIN-IR: CALL64pcrel32 @__stack_chk_fail ; MSVC-SELDAG: # Machine code for function test_branch_weights: -; MSVC-SELDAG: :: (volatile load 4 from @__security_cookie) -; MSVC-SELDAG: (store 4 into stack) -; MSVC-SELDAG: (volatile load 4 from %stack.0.StackGuardSlot) +; MSVC-SELDAG: :: (volatile load (s32) from @__security_cookie) +; MSVC-SELDAG: (store (s32) into stack) +; MSVC-SELDAG: (volatile load (s32) from %stack.0.StackGuardSlot) ; MSVC-SELDAG: CALLpcrel32 @__security_check_cookie ; MSVC always uses selection DAG now. ; MSVC-IR: # Machine code for function test_branch_weights: -; MSVC-IR: :: (volatile load 4 from @__security_cookie) -; MSVC-IR: (store 4 into stack) -; MSVC-IR: (volatile load 4 from %stack.0.StackGuardSlot) +; MSVC-IR: :: (volatile load (s32) from @__security_cookie) +; MSVC-IR: (store (s32) into stack) +; MSVC-IR: (volatile load (s32) from %stack.0.StackGuardSlot) ; MSVC-IR: CALLpcrel32 @__security_check_cookie define i32 @test_branch_weights(i32 %n) #0 { diff --git a/llvm/test/CodeGen/X86/statepoint-fixup-call.mir b/llvm/test/CodeGen/X86/statepoint-fixup-call.mir index 0dd8f4a6d8077..3c70f56f8456a 100644 --- a/llvm/test/CodeGen/X86/statepoint-fixup-call.mir +++ b/llvm/test/CodeGen/X86/statepoint-fixup-call.mir @@ -71,9 +71,9 @@ body: | ; CHECK-LABEL: name: test_one ; CHECK: liveins: $rdi ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdi :: (store 8 into %stack.0) - ; CHECK: STATEPOINT 0, 0, 0, @foo, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store 8 on %stack.0) - ; CHECK: $rdi = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdi :: (store (s64) into %stack.0) + ; CHECK: STATEPOINT 0, 0, 0, @foo, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store (s64) on %stack.0) + ; CHECK: $rdi = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: $rax = COPY killed renamable $rdi ; CHECK: RET 0, killed $rax diff --git a/llvm/test/CodeGen/X86/statepoint-fixup-copy-prop-neg.mir b/llvm/test/CodeGen/X86/statepoint-fixup-copy-prop-neg.mir index 4f69ec759daa7..1c28022ee6a2d 100644 --- a/llvm/test/CodeGen/X86/statepoint-fixup-copy-prop-neg.mir +++ b/llvm/test/CodeGen/X86/statepoint-fixup-copy-prop-neg.mir @@ -75,19 +75,19 @@ body: | ; CHECK-LABEL: name: test ; CHECK: liveins: $rcx, $rsi ; CHECK: renamable $rdi = COPY $rsi - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rcx :: (store 8 into %stack.0) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rcx :: (store (s64) into %stack.0) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdi :: (store 8 into %stack.1) - ; CHECK: STATEPOINT 0, 0, 1, @foo, $rdi, 2, 0, 2, 0, 2, 1, 1, 8, %stack.1, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (load 8 from %stack.1) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdi :: (store (s64) into %stack.1) + ; CHECK: STATEPOINT 0, 0, 1, @foo, $rdi, 2, 0, 2, 0, 2, 1, 1, 8, %stack.1, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0), (load (s64) from %stack.1) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) + ; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: RET 0, $rax renamable $rdi = COPY $rsi - MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rcx :: (store 8 into %stack.0) + MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rcx :: (store (s64) into %stack.0) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - STATEPOINT 0, 0, 1, @foo, $rdi, 2, 0, 2, 0, 2, 1, killed renamable $rdi, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) + STATEPOINT 0, 0, 1, @foo, $rdi, 2, 0, 2, 0, 2, 1, killed renamable $rdi, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0) ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) + renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) RET 0, $rax ... diff --git a/llvm/test/CodeGen/X86/statepoint-fixup-invoke.mir b/llvm/test/CodeGen/X86/statepoint-fixup-invoke.mir index cbeb1f9279f8c..3a19808f8a117 100644 --- a/llvm/test/CodeGen/X86/statepoint-fixup-invoke.mir +++ b/llvm/test/CodeGen/X86/statepoint-fixup-invoke.mir @@ -87,13 +87,13 @@ body: | ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x7ffff800), %bb.3(0x00000800) ; CHECK: liveins: $rdi, $rsi - ; CHECK-DAG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rsi :: (store 8 into %stack.0) - ; CHECK-DAG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdi :: (store 8 into %stack.1) + ; CHECK-DAG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rsi :: (store (s64) into %stack.0) + ; CHECK-DAG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdi :: (store (s64) into %stack.1) ; CHECK: EH_LABEL ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 5, 2, 0, 2, -1, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store 8 on %stack.0), (load store 8 on %stack.1) - ; CHECK-DAG: $r14 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) - ; CHECK-DAG: $rbx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) + ; CHECK: STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 5, 2, 0, 2, -1, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store (s64) on %stack.0), (load store (s64) on %stack.1) + ; CHECK-DAG: $r14 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; CHECK-DAG: $rbx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: EH_LABEL ; CHECK: JMP_1 %bb.1 @@ -107,8 +107,8 @@ body: | ; CHECK: bb.3.exceptional_return (landing-pad): ; CHECK: liveins: $rax, $rdx, $r14 ; CHECK: EH_LABEL - ; CHECK-DAG: $r14 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) - ; CHECK-DAG: $rbx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) + ; CHECK-DAG: $r14 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; CHECK-DAG: $rbx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) ; CHECK: $rax = COPY killed renamable $r14 ; CHECK: RET 0, $rax bb.0.entry: diff --git a/llvm/test/CodeGen/X86/statepoint-fixup-shared-ehpad.mir b/llvm/test/CodeGen/X86/statepoint-fixup-shared-ehpad.mir index 29e35f05a3574..5c4fda8f9353b 100644 --- a/llvm/test/CodeGen/X86/statepoint-fixup-shared-ehpad.mir +++ b/llvm/test/CodeGen/X86/statepoint-fixup-shared-ehpad.mir @@ -106,11 +106,11 @@ body: | ; CHECK: liveins: $rbx, $r14 ; CHECK: EH_LABEL ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: MOV64mr [[STACK0:%stack.[0-9]+]], 1, $noreg, 0, $noreg, killed $rbx :: (store 8 into [[STACK0]]) - ; CHECK: MOV64mr [[STACK1:%stack.[0-9]+]], 1, $noreg, 0, $noreg, killed $r14 :: (store 8 into [[STACK1]]) - ; CHECK: STATEPOINT 0, 0, 0, @foo, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, [[STACK0]], 0, 1, 8, [[STACK1]], 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store 8 on [[STACK1]]), (load store 8 on [[STACK0]]) - ; CHECK-DAG: $rbx = MOV64rm [[STACK0]], 1, $noreg, 0, $noreg :: (load 8 from [[STACK0]]) - ; CHECK-DAG: $r14 = MOV64rm [[STACK1]], 1, $noreg, 0, $noreg :: (load 8 from [[STACK1]]) + ; CHECK: MOV64mr [[STACK0:%stack.[0-9]+]], 1, $noreg, 0, $noreg, killed $rbx :: (store (s64) into [[STACK0]]) + ; CHECK: MOV64mr [[STACK1:%stack.[0-9]+]], 1, $noreg, 0, $noreg, killed $r14 :: (store (s64) into [[STACK1]]) + ; CHECK: STATEPOINT 0, 0, 0, @foo, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, [[STACK0]], 0, 1, 8, [[STACK1]], 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store (s64) on [[STACK1]]), (load store (s64) on [[STACK0]]) + ; CHECK-DAG: $rbx = MOV64rm [[STACK0]], 1, $noreg, 0, $noreg :: (load (s64) from [[STACK0]]) + ; CHECK-DAG: $r14 = MOV64rm [[STACK1]], 1, $noreg, 0, $noreg :: (load (s64) from [[STACK1]]) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: EH_LABEL ; CHECK: JMP_1 %bb.3 @@ -119,19 +119,19 @@ body: | ; CHECK: liveins: $rbx, $r14 ; CHECK: EH_LABEL ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-DAG: MOV64mr [[STACK0]], 1, $noreg, 0, $noreg, killed $rbx :: (store 8 into [[STACK0]]) - ; CHECK-DAG: MOV64mr [[STACK1]], 1, $noreg, 0, $noreg, killed $r14 :: (store 8 into [[STACK1]]) - ; CHECK: STATEPOINT 0, 0, 0, @bar, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, %stack.0, 0, 1, 8, [[STACK1]], 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store 8 on [[STACK1]]), (load store 8 on [[STACK0]]) - ; CHECK-DAG: $rbx = MOV64rm [[STACK0]], 1, $noreg, 0, $noreg :: (load 8 from [[STACK0]]) - ; CHECK-DAG: $r14 = MOV64rm [[STACK1]], 1, $noreg, 0, $noreg :: (load 8 from [[STACK1]]) + ; CHECK-DAG: MOV64mr [[STACK0]], 1, $noreg, 0, $noreg, killed $rbx :: (store (s64) into [[STACK0]]) + ; CHECK-DAG: MOV64mr [[STACK1]], 1, $noreg, 0, $noreg, killed $r14 :: (store (s64) into [[STACK1]]) + ; CHECK: STATEPOINT 0, 0, 0, @bar, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, %stack.0, 0, 1, 8, [[STACK1]], 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store (s64) on [[STACK1]]), (load store (s64) on [[STACK0]]) + ; CHECK-DAG: $rbx = MOV64rm [[STACK0]], 1, $noreg, 0, $noreg :: (load (s64) from [[STACK0]]) + ; CHECK-DAG: $r14 = MOV64rm [[STACK1]], 1, $noreg, 0, $noreg :: (load (s64) from [[STACK1]]) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: EH_LABEL ; CHECK: JMP_1 %bb.5 ; CHECK: bb.4 (landing-pad): ; CHECK: liveins: $rax, $rdx, $r14 ; CHECK: EH_LABEL - ; CHECK-DAG: $rbx = MOV64rm [[STACK0]], 1, $noreg, 0, $noreg :: (load 8 from [[STACK0]]) - ; CHECK-DAG: $r14 = MOV64rm [[STACK1]], 1, $noreg, 0, $noreg :: (load 8 from [[STACK1]]) + ; CHECK-DAG: $rbx = MOV64rm [[STACK0]], 1, $noreg, 0, $noreg :: (load (s64) from [[STACK0]]) + ; CHECK-DAG: $r14 = MOV64rm [[STACK1]], 1, $noreg, 0, $noreg :: (load (s64) from [[STACK1]]) ; CHECK: $rax = COPY killed renamable $r14 ; CHECK: RET 0, $rax bb.0: diff --git a/llvm/test/CodeGen/X86/statepoint-fixup-undef-def.mir b/llvm/test/CodeGen/X86/statepoint-fixup-undef-def.mir index 150ef9533a1e5..61be2a23665cc 100644 --- a/llvm/test/CodeGen/X86/statepoint-fixup-undef-def.mir +++ b/llvm/test/CodeGen/X86/statepoint-fixup-undef-def.mir @@ -98,24 +98,24 @@ body: | ; CHECK: renamable $r12 = COPY $rdx ; CHECK: renamable $r14 = COPY $rsi ; CHECK: renamable $r13 = COPY $rdi - ; CHECK: renamable $ebx = MOV32rm undef renamable $rax, 1, $noreg, 0, $noreg, implicit-def $rbx :: (load unordered 4 from `i32 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: renamable $ebx = MOV32rm undef renamable $rax, 1, $noreg, 0, $noreg, implicit-def $rbx :: (load unordered (s32) from `i32 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: bb.1.bb7: ; CHECK: liveins: $rbx, $r12, $r13, $r14, $r15 ; CHECK: renamable $ebp = LEA64_32r renamable $rbx, 1, $noreg, 6, $noreg ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: CALL64pcrel32 target-flags(x86-plt) @wombat, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $r13 :: (store 8 into %stack.0) - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $r12 :: (store 8 into %stack.1) - ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, killed renamable $r15 :: (store 8 into %stack.2) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $r13 :: (store (s64) into %stack.0) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $r12 :: (store (s64) into %stack.1) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, killed renamable $r15 :: (store (s64) into %stack.2) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: $esi = COPY renamable $ebx ; CHECK: $edx = COPY killed renamable $ebp ; CHECK: $rcx = COPY killed renamable $rax ; CHECK: $r8d = MOV32r0 implicit-def dead $eflags - ; CHECK: MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed $r14 :: (store 8 into %stack.3) - ; CHECK: STATEPOINT 2, 5, 5, undef renamable $rax, undef $rdi, $esi, $edx, $rcx, killed $r8d, 2, 0, 2, 0, 2, 7, undef renamable $rax, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, renamable $ebx, undef renamable $eax, 2, 6, 2, 2, 1, 8, %stack.3, 0, undef renamable $rbx, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp, implicit killed $rbx :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.2), (load store 8 on %stack.3) - ; CHECK: $r14 = MOV64rm %stack.3, 1, $noreg, 0, $noreg :: (load 8 from %stack.3) + ; CHECK: MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed $r14 :: (store (s64) into %stack.3) + ; CHECK: STATEPOINT 2, 5, 5, undef renamable $rax, undef $rdi, $esi, $edx, $rcx, killed $r8d, 2, 0, 2, 0, 2, 7, undef renamable $rax, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, renamable $ebx, undef renamable $eax, 2, 6, 2, 2, 1, 8, %stack.3, 0, undef renamable $rbx, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp, implicit killed $rbx :: (volatile load store (s64) on %stack.0), (volatile load store (s64) on %stack.1), (volatile load store (s64) on %stack.2), (load store (s64) on %stack.3) + ; CHECK: $r14 = MOV64rm %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: RET 0 bb.0.bb: @@ -126,7 +126,7 @@ body: | renamable $r12 = COPY $rdx renamable $r14 = COPY $rsi renamable $r13 = COPY $rdi - renamable $ebx = MOV32rm undef renamable $rax, 1, $noreg, 0, $noreg, implicit-def $rbx :: (load unordered 4 from `i32 addrspace(1)* undef`, align 8, addrspace 1) + renamable $ebx = MOV32rm undef renamable $rax, 1, $noreg, 0, $noreg, implicit-def $rbx :: (load unordered (s32) from `i32 addrspace(1)* undef`, align 8, addrspace 1) bb.1.bb7: liveins: $rbx, $r12, $r13, $r14, $r15 @@ -135,15 +135,15 @@ body: | ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp CALL64pcrel32 target-flags(x86-plt) @wombat, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $r13 :: (store 8 into %stack.0) - MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $r12 :: (store 8 into %stack.1) - MOV64mr %stack.2, 1, $noreg, 0, $noreg, killed renamable $r15 :: (store 8 into %stack.2) + MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $r13 :: (store (s64) into %stack.0) + MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $r12 :: (store (s64) into %stack.1) + MOV64mr %stack.2, 1, $noreg, 0, $noreg, killed renamable $r15 :: (store (s64) into %stack.2) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $esi = COPY renamable $ebx $edx = COPY killed renamable $ebp $rcx = COPY killed renamable $rax $r8d = MOV32r0 implicit-def dead $eflags - dead renamable $r14, dead renamable $rbx = STATEPOINT 2, 5, 5, undef renamable $rax, undef $rdi, $esi, $edx, $rcx, killed $r8d, 2, 0, 2, 0, 2, 7, undef renamable $rax, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, renamable $ebx, undef renamable $eax, 2, 6, 2, 2, killed renamable $r14(tied-def 0), undef renamable $rbx(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp, implicit killed $rbx :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.2) + dead renamable $r14, dead renamable $rbx = STATEPOINT 2, 5, 5, undef renamable $rax, undef $rdi, $esi, $edx, $rcx, killed $r8d, 2, 0, 2, 0, 2, 7, undef renamable $rax, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, renamable $ebx, undef renamable $eax, 2, 6, 2, 2, killed renamable $r14(tied-def 0), undef renamable $rbx(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp, implicit killed $rbx :: (volatile load store (s64) on %stack.0), (volatile load store (s64) on %stack.1), (volatile load store (s64) on %stack.2) ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp RET 0 diff --git a/llvm/test/CodeGen/X86/statepoint-fixup-undef.mir b/llvm/test/CodeGen/X86/statepoint-fixup-undef.mir index 434c8400316d9..b86e96baf0793 100644 --- a/llvm/test/CodeGen/X86/statepoint-fixup-undef.mir +++ b/llvm/test/CodeGen/X86/statepoint-fixup-undef.mir @@ -99,24 +99,24 @@ body: | ; CHECK: renamable $r14 = COPY $rdx ; CHECK: renamable $r13 = COPY $rsi ; CHECK: renamable $r12 = COPY $rdi - ; CHECK: renamable $ebx = MOV32rm undef renamable $rax, 1, $noreg, 0, $noreg, implicit-def $rbx :: (load unordered 4 from `i32 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: renamable $ebx = MOV32rm undef renamable $rax, 1, $noreg, 0, $noreg, implicit-def $rbx :: (load unordered (s32) from `i32 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: bb.1.bb7: ; CHECK: liveins: $rbx, $r12, $r13, $r14, $r15 ; CHECK: renamable $ebp = LEA64_32r renamable $rbx, 1, $noreg, 6, $noreg ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: CALL64pcrel32 target-flags(x86-plt) @wombat, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $r13 :: (store 8 into %stack.0) - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $r12 :: (store 8 into %stack.1) - ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, renamable $r14 :: (store 8 into %stack.2) - ; CHECK: MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $r15 :: (store 8 into %stack.3) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $r13 :: (store (s64) into %stack.0) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $r12 :: (store (s64) into %stack.1) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, renamable $r14 :: (store (s64) into %stack.2) + ; CHECK: MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $r15 :: (store (s64) into %stack.3) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: $rdi = COPY killed renamable $r14 ; CHECK: $esi = COPY renamable $ebx ; CHECK: $edx = COPY killed renamable $ebp ; CHECK: $rcx = COPY killed renamable $rax ; CHECK: $r8d = MOV32r0 implicit-def dead $eflags - ; CHECK: STATEPOINT 2, 5, 5, undef renamable $rax, $rdi, $esi, $edx, $rcx, killed $r8d, 2, 0, 2, 0, 2, 7, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.3, 0, renamable $ebx, undef renamable $eax, 2, 6, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit killed $rbx :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.2), (volatile load store 8 on %stack.3) + ; CHECK: STATEPOINT 2, 5, 5, undef renamable $rax, $rdi, $esi, $edx, $rcx, killed $r8d, 2, 0, 2, 0, 2, 7, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.3, 0, renamable $ebx, undef renamable $eax, 2, 6, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit killed $rbx :: (volatile load store (s64) on %stack.0), (volatile load store (s64) on %stack.1), (volatile load store (s64) on %stack.2), (volatile load store (s64) on %stack.3) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: RET 0 ; STACKMAP-LABEL: __LLVM_StackMaps: @@ -207,7 +207,7 @@ body: | renamable $r14 = COPY $rdx renamable $r13 = COPY $rsi renamable $r12 = COPY $rdi - renamable $ebx = MOV32rm undef renamable $rax, 1, $noreg, 0, $noreg, implicit-def $rbx :: (load unordered 4 from `i32 addrspace(1)* undef`, align 8, addrspace 1) + renamable $ebx = MOV32rm undef renamable $rax, 1, $noreg, 0, $noreg, implicit-def $rbx :: (load unordered (s32) from `i32 addrspace(1)* undef`, align 8, addrspace 1) bb.1.bb7: liveins: $rbx, $r12, $r13, $r14, $r15 @@ -216,17 +216,17 @@ body: | ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp CALL64pcrel32 target-flags(x86-plt) @wombat, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $r13 :: (store 8 into %stack.0) - MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $r12 :: (store 8 into %stack.1) - MOV64mr %stack.2, 1, $noreg, 0, $noreg, renamable $r14 :: (store 8 into %stack.2) - MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $r15 :: (store 8 into %stack.3) + MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $r13 :: (store (s64) into %stack.0) + MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $r12 :: (store (s64) into %stack.1) + MOV64mr %stack.2, 1, $noreg, 0, $noreg, renamable $r14 :: (store (s64) into %stack.2) + MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $r15 :: (store (s64) into %stack.3) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY killed renamable $r14 $esi = COPY renamable $ebx $edx = COPY killed renamable $ebp $rcx = COPY killed renamable $rax $r8d = MOV32r0 implicit-def dead $eflags - STATEPOINT 2, 5, 5, undef renamable $rax, $rdi, $esi, $edx, $rcx, killed $r8d, 2, 0, 2, 0, 2, 7, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.3, 0, renamable $ebx, undef renamable $eax, 2, 6, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit killed $rbx :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1), (volatile load store 8 on %stack.2), (volatile load store 8 on %stack.3) + STATEPOINT 2, 5, 5, undef renamable $rax, $rdi, $esi, $edx, $rcx, killed $r8d, 2, 0, 2, 0, 2, 7, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.3, 0, renamable $ebx, undef renamable $eax, 2, 6, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit killed $rbx :: (volatile load store (s64) on %stack.0), (volatile load store (s64) on %stack.1), (volatile load store (s64) on %stack.2), (volatile load store (s64) on %stack.3) ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp RET 0 diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir index 0d81c5b2e2c95..8901aa92694a2 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir @@ -297,7 +297,7 @@ body: | ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: bb.4.bb17: ; CHECK: successors: %bb.5(0x80000000), %bb.8(0x00000000) - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm undef %35:gr64, 1, $noreg, 0, $noreg :: (load unordered 4 from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm undef %35:gr64, 1, $noreg, 0, $noreg :: (load unordered (s32) from `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[AND32ri]], 33554431, implicit-def dead $eflags ; CHECK: EH_LABEL ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp @@ -309,8 +309,8 @@ body: | ; CHECK: JMP_1 %bb.5 ; CHECK: bb.5.bb21: ; CHECK: successors: %bb.9(0x7c000000), %bb.6(0x04000000) - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global, $noreg :: (load 8 from got) - ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[MOV64rm]], 1, $noreg, 0, $noreg :: (dereferenceable load unordered 8 from @global) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global, $noreg :: (load (s64) from got) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[MOV64rm]], 1, $noreg, 0, $noreg :: (dereferenceable load unordered (s64) from @global) ; CHECK: [[NOT64r:%[0-9]+]]:gr64 = NOT64r [[NOT64r]] ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: dead [[NOT64r]]:gr64, [[COPY2]]:gr64 = STATEPOINT 2, 5, 1, undef %50:gr64, undef $rdi, 2, 0, 2, 0, 2, 27, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 83, 2, 0, 2, 5, 2, 1, 2, 0, [[COPY2]], 2, 7, 2, 0, 2, 8, 2, 2, 2, 7, 2, 0, 2, 7, 2, 0, 2, 8, 2, 2, 2, 2, [[NOT64r]](tied-def 0), [[COPY2]](tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp @@ -322,9 +322,9 @@ body: | ; CHECK: JMP_1 %bb.6 ; CHECK: bb.6.bb33.preheader: ; CHECK: successors: %bb.7(0x80000000) - ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global.1, $noreg :: (load 8 from got) - ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm [[MOV64rm1]], 1, $noreg, 0, $noreg :: (dereferenceable load unordered 8 from @global.1) - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 660, $gs :: (load 4 from `i32 addrspace(256)* inttoptr (i64 660 to i32 addrspace(256)*)`, addrspace 256) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global.1, $noreg :: (load (s64) from got) + ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm [[MOV64rm1]], 1, $noreg, 0, $noreg :: (dereferenceable load unordered (s64) from @global.1) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 660, $gs :: (load (s32) from `i32 addrspace(256)* inttoptr (i64 660 to i32 addrspace(256)*)`, addrspace 256) ; CHECK: [[NOT64r1:%[0-9]+]]:gr64 = NOT64r [[NOT64r1]] ; CHECK: [[COPY4:%[0-9]+]]:gr64 = COPY [[NOT64r1]] ; CHECK: [[OR32ri:%[0-9]+]]:gr32 = OR32ri [[OR32ri]], 268435456, implicit-def dead $eflags @@ -335,19 +335,19 @@ body: | ; CHECK: bb.7.bb33: ; CHECK: successors: %bb.7(0x80000000) ; CHECK: [[INC64r:%[0-9]+]]:gr64_with_sub_8bit = nuw nsw INC64r [[INC64r]], implicit-def dead $eflags - ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm undef %59:gr64, 1, $noreg, 0, $noreg :: (load unordered 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm undef %59:gr64, 1, $noreg, 0, $noreg :: (load unordered (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) ; CHECK: [[NOT64r2:%[0-9]+]]:gr64 = NOT64r [[NOT64r2]] ; CHECK: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags ; CHECK: undef %102.sub_32bit:gr64_with_sub_8bit = MOV32ri 0 ; CHECK: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], %102, 4, implicit killed $eflags ; CHECK: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4325385 /* reguse:GR64 */, %102, 4325385 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags - ; CHECK: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic 4 on `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: $rdi = COPY [[COPY4]] ; CHECK: CALL64pcrel32 target-flags(x86-plt) @wobble, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global, $noreg :: (load 8 from got) - ; CHECK: [[MOV64rm3:%[0-9]+]]:gr64 = MOV64rm [[MOV64rm2]], 1, $noreg, 0, $noreg :: (dereferenceable load unordered 8 from @global) + ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global, $noreg :: (load (s64) from got) + ; CHECK: [[MOV64rm3:%[0-9]+]]:gr64 = MOV64rm [[MOV64rm2]], 1, $noreg, 0, $noreg :: (dereferenceable load unordered (s64) from @global) ; CHECK: [[NOT64r2:%[0-9]+]]:gr64 = NOT64r [[NOT64r2]] ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: $rdi = COPY [[NOT64r2]] @@ -364,7 +364,7 @@ body: | ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: bb.9.bb64: ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: LCMPXCHG32 undef %76:gr64, 1, $noreg, 0, $noreg, [[MOV32ri1]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic 4 on `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: LCMPXCHG32 undef %76:gr64, 1, $noreg, 0, $noreg, [[MOV32ri1]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `i32 addrspace(1)* undef`, addrspace 1) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: STATEPOINT 2, 5, 1, undef %79:gr64, undef $rdi, 2, 0, 2, 0, 2, 27, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 133, 2, 0, 2, 5, 2, 1, 2, 7, 2, 0, 2, 8, 2, 2, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 8, 2, 2, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp @@ -374,8 +374,8 @@ body: | %20:gr32 = MOV32r0 implicit-def dead $eflags %74:gr32 = MOV32ri -1 - %46:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global, $noreg :: (load 8 from got) - %51:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global.1, $noreg :: (load 8 from got) + %46:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global, $noreg :: (load (s64) from got) + %51:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @global.1, $noreg :: (load (s64) from got) bb.1.bb1: successors: %bb.9(0x40000000), %bb.2(0x40000000) @@ -415,7 +415,7 @@ body: | bb.4.bb17: successors: %bb.5(0x80000000), %bb.8(0x00000000) - %36:gr32 = MOV32rm undef %35:gr64, 1, $noreg, 0, $noreg :: (load unordered 4 from `i32 addrspace(1)* undef`, addrspace 1) + %36:gr32 = MOV32rm undef %35:gr64, 1, $noreg, 0, $noreg :: (load unordered (s32) from `i32 addrspace(1)* undef`, addrspace 1) %36:gr32 = AND32ri %36, 33554431, implicit-def dead $eflags EH_LABEL ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp @@ -428,7 +428,7 @@ body: | bb.5.bb21: successors: %bb.9(0x7c000000), %bb.6(0x04000000) - %44:gr64 = MOV64rm %46, 1, $noreg, 0, $noreg :: (dereferenceable load unordered 8 from @global) + %44:gr64 = MOV64rm %46, 1, $noreg, 0, $noreg :: (dereferenceable load unordered (s64) from @global) %44:gr64 = NOT64r %44 ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp dead %44:gr64, %31:gr64 = STATEPOINT 2, 5, 1, undef %50:gr64, undef $rdi, 2, 0, 2, 0, 2, 27, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 83, 2, 0, 2, 5, 2, 1, 2, 0, %31, 2, 7, 2, 0, 2, 8, 2, 2, 2, 7, 2, 0, 2, 7, 2, 0, 2, 8, 2, 2, 2, 2, %44(tied-def 0), %31(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp @@ -440,8 +440,8 @@ body: | bb.6.bb33.preheader: successors: %bb.7(0x80000000) - %64:gr64 = MOV64rm %51, 1, $noreg, 0, $noreg :: (dereferenceable load unordered 8 from @global.1) - %65:gr32 = MOV32rm $noreg, 1, $noreg, 660, $gs :: (load 4 from `i32 addrspace(256)* inttoptr (i64 660 to i32 addrspace(256)*)`, addrspace 256) + %64:gr64 = MOV64rm %51, 1, $noreg, 0, $noreg :: (dereferenceable load unordered (s64) from @global.1) + %65:gr32 = MOV32rm $noreg, 1, $noreg, 660, $gs :: (load (s32) from `i32 addrspace(256)* inttoptr (i64 660 to i32 addrspace(256)*)`, addrspace 256) undef %53.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags %64:gr64 = NOT64r %64 %65:gr32 = OR32ri %65, 268435456, implicit-def dead $eflags @@ -452,17 +452,17 @@ body: | successors: %bb.7(0x80000000) %81:gr64_with_sub_8bit = nuw nsw INC64r %81, implicit-def dead $eflags - %63:gr64 = MOV64rm undef %59:gr64, 1, $noreg, 0, $noreg :: (load unordered 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + %63:gr64 = MOV64rm undef %59:gr64, 1, $noreg, 0, $noreg :: (load unordered (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) %63:gr64 = NOT64r %63 CMP64rr %63, %31, implicit-def $eflags %63:gr64 = CMOV64rr %63, %53, 4, implicit killed $eflags INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4325385 /* reguse:GR64 */, %53, 4325385 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags - LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic 4 on `i32 addrspace(1)* undef`, addrspace 1) + LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `i32 addrspace(1)* undef`, addrspace 1) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY %64 CALL64pcrel32 target-flags(x86-plt) @wobble, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit-def $rsp, implicit-def $ssp ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %71:gr64 = MOV64rm %46, 1, $noreg, 0, $noreg :: (dereferenceable load unordered 8 from @global) + %71:gr64 = MOV64rm %46, 1, $noreg, 0, $noreg :: (dereferenceable load unordered (s64) from @global) %71:gr64 = NOT64r %71 ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY %71 @@ -483,7 +483,7 @@ body: | bb.9.bb64: successors: %bb.1(0x80000000) - LCMPXCHG32 undef %76:gr64, 1, $noreg, 0, $noreg, %74, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic 4 on `i32 addrspace(1)* undef`, addrspace 1) + LCMPXCHG32 undef %76:gr64, 1, $noreg, 0, $noreg, %74, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `i32 addrspace(1)* undef`, addrspace 1) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp STATEPOINT 2, 5, 1, undef %79:gr64, undef $rdi, 2, 0, 2, 0, 2, 27, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 133, 2, 0, 2, 5, 2, 1, 2, 7, 2, 0, 2, 8, 2, 2, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 8, 2, 2, 2, 0, 2, 0, 2, 0, csr_64, implicit-def $rsp, implicit-def $ssp ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-hoist-copies.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-hoist-copies.mir index ea0034f5f0f2b..cf0c673312f6b 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-hoist-copies.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-hoist-copies.mir @@ -420,12 +420,12 @@ body: | bb.0.bb: successors: %bb.1(0x80000000), %bb.21(0x00000000) - %0:gr64 = MOV64rm $noreg, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(3)* null`, addrspace 3) - %2:gr32 = MOV32rm undef %29:gr64, 1, $noreg, 0, $noreg :: (load 4 from `i32 addrspace(3)* undef`, addrspace 3) + %0:gr64 = MOV64rm $noreg, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(3)* null`, addrspace 3) + %2:gr32 = MOV32rm undef %29:gr64, 1, $noreg, 0, $noreg :: (load (s32) from `i32 addrspace(3)* undef`, addrspace 3) %1:gr32 = COPY %2 - %4:gr32 = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load 4 from `i32 addrspace(3)* null`, addrspace 3) + %4:gr32 = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load (s32) from `i32 addrspace(3)* null`, addrspace 3) %3:gr32 = COPY %4 - %9:gr64 = MOV64rm undef %30:gr64, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(3)* undef`, addrspace 3) + %9:gr64 = MOV64rm undef %30:gr64, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(3)* undef`, addrspace 3) %5:gr64 = COPY %9 %31:gr32 = MOV32r0 implicit-def dead $eflags %32:gr8 = COPY killed %31.sub_8bit @@ -436,7 +436,7 @@ body: | bb.1.bb12: successors: %bb.2(0x80000000), %bb.16(0x00000000) - %52:gr32 = MOV32rm undef %53:gr64, 1, $noreg, 0, $noreg :: (load unordered 4 from `i32 addrspace(1)* undef`, addrspace 1) + %52:gr32 = MOV32rm undef %53:gr64, 1, $noreg, 0, $noreg :: (load unordered (s32) from `i32 addrspace(1)* undef`, addrspace 1) %54:gr32 = COPY killed %52 %54:gr32 = AND32ri %54, 33554431, implicit-def dead $eflags ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp @@ -542,7 +542,7 @@ body: | successors: %bb.9(0x40000000), %bb.8(0x40000000) %12:gr64 = COPY %114 - %126:gr64 = MOV64rm %114, 1, $noreg, 904, $noreg :: (load unordered 8 from %ir.tmp44, addrspace 1) + %126:gr64 = MOV64rm %114, 1, $noreg, 904, $noreg :: (load unordered (s64) from %ir.tmp44, addrspace 1) %127:gr64 = COPY killed %126 dead %127:gr64 = NEG64r %127, implicit-def $eflags %14:gr64 = SETB_C64r implicit-def dead $eflags, implicit killed $eflags @@ -665,7 +665,7 @@ body: | EH_LABEL %19:gr64 = COPY killed %104 - %20:gr64 = MOV64rm $noreg, 1, $noreg, 8, $gs :: (load 8 from `i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*)`, addrspace 256) + %20:gr64 = MOV64rm $noreg, 1, $noreg, 8, $gs :: (load (s64) from `i8 addrspace(1)* addrspace(256)* inttoptr (i64 8 to i8 addrspace(1)* addrspace(256)*)`, addrspace 256) %163:gr64 = IMPLICIT_DEF %164:gr64 = COPY killed %19 %165:gr64 = IMPLICIT_DEF @@ -710,7 +710,7 @@ body: | bb.22.bb95: %23:gr64 = COPY killed %162 - MOV64mr killed %23, 1, $noreg, 928, $noreg, undef %158:gr64 :: (store unordered 8 into %ir.tmp98, addrspace 1) + MOV64mr killed %23, 1, $noreg, 928, $noreg, undef %158:gr64 :: (store unordered (s64) into %ir.tmp98, addrspace 1) RET 0, undef $xmm0 bb.23.bb99: @@ -720,7 +720,7 @@ body: | %26:gr64 = COPY killed %165 %25:gr64 = COPY killed %164 %24:gr64 = COPY killed %163 - %143:gr64 = MOV64rm %24, 1, $noreg, 904, $noreg :: (load unordered 8 from %ir.1, addrspace 1) + %143:gr64 = MOV64rm %24, 1, $noreg, 904, $noreg :: (load unordered (s64) from %ir.1, addrspace 1) %144:gr64 = COPY killed %143 dead %144:gr64 = NEG64r %144, implicit-def $eflags %145:gr64 = SETB_C64r implicit-def dead $eflags, implicit killed $eflags diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-inline-spiller.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-inline-spiller.mir index 45e28754e82aa..286db2e4b3128 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-inline-spiller.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-inline-spiller.mir @@ -212,7 +212,7 @@ body: | ; CHECK: successors: %bb.1(0x80000000) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: undef %75.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags - ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, %75 :: (store 8 into %stack.2) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, %75 :: (store (s64) into %stack.2) ; CHECK: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi ; CHECK: STATEPOINT 2, 5, 2, undef %24:gr64, $rdi, undef $rsi, 2, 0, 2, 0, 2, 37, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 6, 2, 0, 2, 4, 2, 1, 2, 0, 2, 0, 2, 7, 2, 0, 2, 0, 2, 0, 2, 7, 2, 0, 2, 0, 2, 0, 2, 2, 2, 4, 2, 5, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp @@ -224,27 +224,27 @@ body: | ; CHECK: bb.1.bb1: ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; CHECK: undef %66.sub_32bit:gr64_nosp = COPY [[MOV32r0_]] - ; CHECK: undef %65.sub_32bit:gr64_with_sub_8bit = MOV32rm undef %31:gr64, 1, $noreg, 0, $noreg :: (load unordered 4 from `i32 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[DEF]], 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.tmp2, addrspace 1) + ; CHECK: undef %65.sub_32bit:gr64_with_sub_8bit = MOV32rm undef %31:gr64, 1, $noreg, 0, $noreg :: (load unordered (s32) from `i32 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[DEF]], 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.tmp2, addrspace 1) ; CHECK: %65.sub_32bit:gr64_with_sub_8bit = SUB32rr %65.sub_32bit, [[MOV32rm]], implicit-def dead $eflags ; CHECK: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r %65, 1, %66, 0, $noreg - ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, %66.sub_32bit :: (store 4 into %stack.0) - ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, %65.sub_32bit :: (store 4 into %stack.1) + ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, %66.sub_32bit :: (store (s32) into %stack.0) + ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, %65.sub_32bit :: (store (s32) into %stack.1) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: $rdi = COPY [[DEF2]] ; CHECK: $esi = COPY %66.sub_32bit ; CHECK: $edx = COPY [[LEA64_32r]] ; CHECK: $r8d = COPY [[MOV32rm]] - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY [[MOV64rm]] - ; CHECK: [[STATEPOINT:%[0-9]+]]:gr64, [[STATEPOINT1:%[0-9]+]]:gr64, [[STATEPOINT2:%[0-9]+]]:gr64, [[STATEPOINT3:%[0-9]+]]:gr64 = STATEPOINT 2, 5, 5, undef %35:gr64, $rdi, $esi, $edx, undef $rcx, $r8d, 2, 0, 2, 0, 2, 85, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 10, 2, 5, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, [[STATEPOINT3]], 2, 0, [[STATEPOINT3]], 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 2, 2, 11, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 15, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 21, 2, 63, 2, 0, 2, 9, 2, 1, 2, 0, [[STATEPOINT2]], 2, 0, [[STATEPOINT1]], 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT]], 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 4278124286, 2, 3, 1, 4, %stack.1, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT]], 2, 4, [[STATEPOINT]](tied-def 0), [[STATEPOINT1]](tied-def 1), [[STATEPOINT2]](tied-def 2), [[STATEPOINT3]](tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 4 on %stack.0), (volatile load store 4 on %stack.1) + ; CHECK: [[STATEPOINT:%[0-9]+]]:gr64, [[STATEPOINT1:%[0-9]+]]:gr64, [[STATEPOINT2:%[0-9]+]]:gr64, [[STATEPOINT3:%[0-9]+]]:gr64 = STATEPOINT 2, 5, 5, undef %35:gr64, $rdi, $esi, $edx, undef $rcx, $r8d, 2, 0, 2, 0, 2, 85, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 10, 2, 5, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, [[STATEPOINT3]], 2, 0, [[STATEPOINT3]], 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 2, 2, 11, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 15, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 21, 2, 63, 2, 0, 2, 9, 2, 1, 2, 0, [[STATEPOINT2]], 2, 0, [[STATEPOINT1]], 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT]], 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 4278124286, 2, 3, 1, 4, %stack.1, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT]], 2, 4, [[STATEPOINT]](tied-def 0), [[STATEPOINT1]](tied-def 1), [[STATEPOINT2]](tied-def 2), [[STATEPOINT3]](tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: CMP32rr %65.sub_32bit, undef %37:gr32, implicit-def $eflags ; CHECK: JCC_1 %bb.4, 13, implicit killed $eflags ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY [[STATEPOINT3]] - ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, [[COPY]] :: (store 8 into %stack.2) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, [[COPY]] :: (store (s64) into %stack.2) ; CHECK: [[DEF1:%[0-9]+]]:gr64 = IMPLICIT_DEF ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY [[LEA64_32r]] ; CHECK: bb.3.bb21: @@ -252,19 +252,19 @@ body: | ; CHECK: JMP_1 %bb.1 ; CHECK: bb.4.bb28: ; CHECK: successors: %bb.5(0x80000000), %bb.6(0x00000000) - ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[LEA64_32r]] :: (store 4 into %stack.0) + ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[LEA64_32r]] :: (store (s32) into %stack.0) ; CHECK: EH_LABEL ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: $ecx = MOV32r0 implicit-def dead $eflags ; CHECK: [[COPY4:%[0-9]+]]:gr32 = COPY [[LEA64_32r]] - ; CHECK: [[STATEPOINT2]]:gr64, [[STATEPOINT3]]:gr64, [[STATEPOINT]]:gr64, dead [[STATEPOINT1]]:gr64 = STATEPOINT 1, 16, 5, undef %47:gr64, undef $edi, undef $rsi, undef $rdx, $ecx, undef $r8d, 2, 0, 2, 0, 2, 99, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 10, 2, 5, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, [[STATEPOINT3]], 2, 0, [[STATEPOINT3]], 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 2, 2, 11, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 15, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 21, 2, 96, 2, 0, 2, 9, 2, 1, 2, 0, [[STATEPOINT2]], 2, 0, [[STATEPOINT1]], 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT]], 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT]], 2, 8, 2, 12, 2, 34, 2, 0, 2, 3, 2, 1, 2, 0, [[STATEPOINT2]], 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 0, 2, 4278124286, 2, 5, [[STATEPOINT2]](tied-def 0), [[STATEPOINT3]](tied-def 1), [[STATEPOINT]](tied-def 2), [[STATEPOINT1]](tied-def 3), 2, 4278124286, 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 4 on %stack.0) + ; CHECK: [[STATEPOINT2]]:gr64, [[STATEPOINT3]]:gr64, [[STATEPOINT]]:gr64, dead [[STATEPOINT1]]:gr64 = STATEPOINT 1, 16, 5, undef %47:gr64, undef $edi, undef $rsi, undef $rdx, $ecx, undef $r8d, 2, 0, 2, 0, 2, 99, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 10, 2, 5, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, [[STATEPOINT3]], 2, 0, [[STATEPOINT3]], 2, 7, 2, 0, 2, 0, [[STATEPOINT3]], 2, 2, 2, 11, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 15, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 21, 2, 96, 2, 0, 2, 9, 2, 1, 2, 0, [[STATEPOINT2]], 2, 0, [[STATEPOINT1]], 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT]], 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[STATEPOINT]], 2, 8, 2, 12, 2, 34, 2, 0, 2, 3, 2, 1, 2, 0, [[STATEPOINT2]], 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 0, 2, 4278124286, 2, 5, [[STATEPOINT2]](tied-def 0), [[STATEPOINT3]](tied-def 1), [[STATEPOINT]](tied-def 2), [[STATEPOINT1]](tied-def 3), 2, 4278124286, 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: EH_LABEL ; CHECK: JMP_1 %bb.5 ; CHECK: bb.5.bb30: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: [[COPY2:%[0-9]+]]:gr64 = COPY [[STATEPOINT3]] - ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, [[COPY2]] :: (store 8 into %stack.2) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, [[COPY2]] :: (store (s64) into %stack.2) ; CHECK: [[COPY3:%[0-9]+]]:gr64 = COPY [[STATEPOINT2]] ; CHECK: [[ADD64ri8_:%[0-9]+]]:gr64 = nuw ADD64ri8 [[ADD64ri8_]], 28, implicit-def dead $eflags ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp @@ -306,15 +306,15 @@ body: | %2:gr64 = COPY killed %56 %1:gr64 = COPY killed %55 %0:gr64 = COPY killed %54 - %30:gr32 = MOV32rm undef %31:gr64, 1, $noreg, 0, $noreg :: (load unordered 4 from `i32 addrspace(1)* undef`, align 8, addrspace 1) - %32:gr32 = MOV32rm killed %0, 1, $noreg, 0, $noreg :: (load unordered 4 from %ir.tmp2, addrspace 1) + %30:gr32 = MOV32rm undef %31:gr64, 1, $noreg, 0, $noreg :: (load unordered (s32) from `i32 addrspace(1)* undef`, align 8, addrspace 1) + %32:gr32 = MOV32rm killed %0, 1, $noreg, 0, $noreg :: (load unordered (s32) from %ir.tmp2, addrspace 1) %33:gr32 = COPY killed %30 %33:gr32 = SUB32rr %33, %32, implicit-def dead $eflags undef %65.sub_32bit:gr64 = COPY %33 undef %66.sub_32bit:gr64_nosp = COPY %5 %6:gr32 = LEA64_32r killed %65, 1, killed %66, 0, $noreg - MOV32mr %stack.0, 1, $noreg, 0, $noreg, %5 :: (store 4 into %stack.0) - MOV32mr %stack.1, 1, $noreg, 0, $noreg, %33 :: (store 4 into %stack.1) + MOV32mr %stack.0, 1, $noreg, 0, $noreg, %5 :: (store (s32) into %stack.0) + MOV32mr %stack.1, 1, $noreg, 0, $noreg, %33 :: (store (s32) into %stack.1) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY %2 $esi = COPY killed %5 @@ -324,7 +324,7 @@ body: | %28:gr64 = COPY killed %4 %25:gr64 = COPY killed %1 %27:gr64 = COPY killed %3 - %25:gr64, %26:gr64, %27:gr64, %28:gr64 = STATEPOINT 2, 5, 5, undef %35:gr64, killed $rdi, killed $esi, killed $edx, undef $rcx, killed $r8d, 2, 0, 2, 0, 2, 85, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %28, 2, 7, 2, 0, 2, 0, %28, 2, 10, 2, 5, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, %28, 2, 0, %28, 2, 7, 2, 0, 2, 0, %28, 2, 2, 2, 11, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 15, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 21, 2, 63, 2, 0, 2, 9, 2, 1, 2, 0, %27, 2, 0, %26, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %25, 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 4278124286, 2, 3, 1, 4, %stack.1, 0, 2, 7, 2, 0, 2, 0, %25, 2, 4, %25(tied-def 0), %26(tied-def 1), %27(tied-def 2), %28(tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 4 on %stack.0), (volatile load store 4 on %stack.1) + %25:gr64, %26:gr64, %27:gr64, %28:gr64 = STATEPOINT 2, 5, 5, undef %35:gr64, killed $rdi, killed $esi, killed $edx, undef $rcx, killed $r8d, 2, 0, 2, 0, 2, 85, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %28, 2, 7, 2, 0, 2, 0, %28, 2, 10, 2, 5, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, %28, 2, 0, %28, 2, 7, 2, 0, 2, 0, %28, 2, 2, 2, 11, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 15, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 21, 2, 63, 2, 0, 2, 9, 2, 1, 2, 0, %27, 2, 0, %26, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %25, 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 4278124286, 2, 3, 1, 4, %stack.1, 0, 2, 7, 2, 0, 2, 0, %25, 2, 4, %25(tied-def 0), %26(tied-def 1), %27(tied-def 2), %28(tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1) ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp CMP32rr killed %33, undef %37:gr32, implicit-def $eflags JCC_1 %bb.3, 13, implicit killed $eflags @@ -357,7 +357,7 @@ body: | bb.3.bb28: successors: %bb.4(0x80000000), %bb.5(0x00000000) - MOV32mr %stack.0, 1, $noreg, 0, $noreg, %6 :: (store 4 into %stack.0) + MOV32mr %stack.0, 1, $noreg, 0, $noreg, %6 :: (store (s32) into %stack.0) EH_LABEL ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $ecx = COPY %22 @@ -365,7 +365,7 @@ body: | %39:gr64 = COPY killed %28 %40:gr64 = COPY killed %25 %38:gr64 = COPY killed %27 - %38:gr64, %39:gr64, %40:gr64, dead %46:gr64 = STATEPOINT 1, 16, 5, undef %47:gr64, undef $edi, undef $rsi, undef $rdx, killed $ecx, undef $r8d, 2, 0, 2, 0, 2, 99, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %39, 2, 7, 2, 0, 2, 0, %39, 2, 10, 2, 5, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, %39, 2, 0, %39, 2, 7, 2, 0, 2, 0, %39, 2, 2, 2, 11, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 15, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 21, 2, 96, 2, 0, 2, 9, 2, 1, 2, 0, %38, 2, 0, %46, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %40, 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %40, 2, 8, 2, 12, 2, 34, 2, 0, 2, 3, 2, 1, 2, 0, %38, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 0, 2, 4278124286, 2, 5, %38(tied-def 0), %39(tied-def 1), %40(tied-def 2), %46(tied-def 3), 2, 4278124286, 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 4 on %stack.0) + %38:gr64, %39:gr64, %40:gr64, dead %46:gr64 = STATEPOINT 1, 16, 5, undef %47:gr64, undef $edi, undef $rsi, undef $rdx, killed $ecx, undef $r8d, 2, 0, 2, 0, 2, 99, 2, 0, 2, 2, 2, 0, 2, 43, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 1, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %39, 2, 7, 2, 0, 2, 0, %39, 2, 10, 2, 5, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, %39, 2, 0, %39, 2, 7, 2, 0, 2, 0, %39, 2, 2, 2, 11, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 15, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 21, 2, 96, 2, 0, 2, 9, 2, 1, 2, 0, %38, 2, 0, %46, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %40, 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %40, 2, 8, 2, 12, 2, 34, 2, 0, 2, 3, 2, 1, 2, 0, %38, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 0, 2, 4278124286, 2, 5, %38(tied-def 0), %39(tied-def 1), %40(tied-def 2), %46(tied-def 3), 2, 4278124286, 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0) ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp EH_LABEL JMP_1 %bb.4 diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-remove-back-copies.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-remove-back-copies.mir index 7f0d89b73ea98..3e11b9f7f82fd 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-remove-back-copies.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-remove-back-copies.mir @@ -269,9 +269,9 @@ body: | ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.bb6: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm undef %17:gr64, 1, $noreg, 0, $noreg :: (load unordered 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm undef %17:gr64, 1, $noreg, 0, $noreg :: (load unordered (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) ; CHECK: [[NOT64r:%[0-9]+]]:gr64 = NOT64r [[NOT64r]] - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, [[NOT64r]] :: (store 8 into %stack.1) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, [[NOT64r]] :: (store (s64) into %stack.1) ; CHECK: undef %48.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags ; CHECK: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:gr64 = IMPLICIT_DEF @@ -293,7 +293,7 @@ body: | ; CHECK: JMP_1 %bb.16 ; CHECK: bb.6.bb7: ; CHECK: successors: %bb.16(0x00000000), %bb.7(0x80000000) - ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm undef %24:gr64, 1, $noreg, 0, $noreg :: (load unordered 4 from `i32 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm undef %24:gr64, 1, $noreg, 0, $noreg :: (load unordered (s32) from `i32 addrspace(1)* undef`, align 8, addrspace 1) ; CHECK: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr [[ADD32rr]], [[MOV32r0_1]], implicit-def dead $eflags ; CHECK: CMP32rr [[MOV32r0_1]], [[ADD32rr]], implicit-def $eflags ; CHECK: JCC_1 %bb.16, 15, implicit $eflags @@ -307,14 +307,14 @@ body: | ; CHECK: JMP_1 %bb.11 ; CHECK: bb.9.bb23: ; CHECK: successors: %bb.10(0x7ffff800), %bb.12(0x00000800) - ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[ADD32rr]] :: (store 4 into %stack.0) + ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[ADD32rr]] :: (store (s32) into %stack.0) ; CHECK: EH_LABEL ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) ; CHECK: dead $edx = MOV32r0 implicit-def dead $eflags, implicit-def $rdx ; CHECK: $ecx = MOV32r0 implicit-def dead $eflags ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY [[DEF2]] - ; CHECK: dead [[MOV64rm]]:gr64, dead [[COPY1]]:gr64, dead [[DEF1]]:gr64, dead [[DEF]]:gr64 = STATEPOINT 1, 16, 5, undef %41:gr64, undef $edi, undef $rsi, $rdx, $ecx, undef $r8d, 2, 0, 2, 0, 2, 89, 2, 0, 2, 10, 2, 0, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[COPY1]], 2, 7, 2, 0, 2, 0, [[COPY1]], 2, 10, 2, 2, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, [[COPY1]], 2, 0, [[COPY1]], 2, 7, 2, 0, 2, 0, [[COPY1]], 2, 2, 2, 8, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 12, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 18, 2, 96, 2, 0, 2, 9, 2, 1, 2, 0, [[DEF1]], 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[DEF]], 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[DEF]], 2, 8, 2, 9, 2, 34, 2, 0, 2, 3, 2, 1, 2, 0, [[DEF1]], 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 0, 2, 4278124286, 2, 5, [[MOV64rm]](tied-def 0), [[COPY1]](tied-def 1), [[DEF1]](tied-def 2), 2, 4278124286, [[DEF]](tied-def 3), 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 4 on %stack.0) + ; CHECK: dead [[MOV64rm]]:gr64, dead [[COPY1]]:gr64, dead [[DEF1]]:gr64, dead [[DEF]]:gr64 = STATEPOINT 1, 16, 5, undef %41:gr64, undef $edi, undef $rsi, $rdx, $ecx, undef $r8d, 2, 0, 2, 0, 2, 89, 2, 0, 2, 10, 2, 0, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[COPY1]], 2, 7, 2, 0, 2, 0, [[COPY1]], 2, 10, 2, 2, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, [[COPY1]], 2, 0, [[COPY1]], 2, 7, 2, 0, 2, 0, [[COPY1]], 2, 2, 2, 8, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 12, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 18, 2, 96, 2, 0, 2, 9, 2, 1, 2, 0, [[DEF1]], 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[DEF]], 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, [[DEF]], 2, 8, 2, 9, 2, 34, 2, 0, 2, 3, 2, 1, 2, 0, [[DEF1]], 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 0, 2, 4278124286, 2, 5, [[MOV64rm]](tied-def 0), [[COPY1]](tied-def 1), [[DEF1]](tied-def 2), 2, 4278124286, [[DEF]](tied-def 3), 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK: EH_LABEL ; CHECK: JMP_1 %bb.10 @@ -422,7 +422,7 @@ body: | bb.1.bb6: successors: %bb.2(0x80000000) - %0:gr64 = MOV64rm undef %17:gr64, 1, $noreg, 0, $noreg :: (load unordered 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + %0:gr64 = MOV64rm undef %17:gr64, 1, $noreg, 0, $noreg :: (load unordered (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) %0:gr64 = NOT64r %0 undef %48.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags %1:gr64 = IMPLICIT_DEF @@ -455,7 +455,7 @@ body: | bb.4.bb7: successors: %bb.13(0x00000000), %bb.5(0x80000000) - %5:gr32 = MOV32rm undef %24:gr64, 1, $noreg, 0, $noreg :: (load unordered 4 from `i32 addrspace(1)* undef`, align 8, addrspace 1) + %5:gr32 = MOV32rm undef %24:gr64, 1, $noreg, 0, $noreg :: (load unordered (s32) from `i32 addrspace(1)* undef`, align 8, addrspace 1) %5:gr32 = ADD32rr %5, %77, implicit-def dead $eflags CMP32rr %77, %5, implicit-def $eflags JCC_1 %bb.13, 15, implicit $eflags @@ -475,13 +475,13 @@ body: | bb.6.bb23: successors: %bb.7(0x7ffff800), %bb.9(0x00000800) - MOV32mr %stack.0, 1, $noreg, 0, $noreg, %5 :: (store 4 into %stack.0) + MOV32mr %stack.0, 1, $noreg, 0, $noreg, %5 :: (store (s32) into %stack.0) EH_LABEL ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %32:gr64 = COPY %0 dead $edx = MOV32r0 implicit-def dead $eflags, implicit-def $rdx $ecx = MOV32r0 implicit-def dead $eflags - dead %32:gr64, dead %3:gr64_with_sub_8bit, dead %2:gr64, dead %1:gr64 = STATEPOINT 1, 16, 5, undef %41:gr64, undef $edi, undef $rsi, $rdx, $ecx, undef $r8d, 2, 0, 2, 0, 2, 89, 2, 0, 2, 10, 2, 0, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %3, 2, 7, 2, 0, 2, 0, %3, 2, 10, 2, 2, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, %3, 2, 0, %3, 2, 7, 2, 0, 2, 0, %3, 2, 2, 2, 8, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 12, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 18, 2, 96, 2, 0, 2, 9, 2, 1, 2, 0, %2, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %1, 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %1, 2, 8, 2, 9, 2, 34, 2, 0, 2, 3, 2, 1, 2, 0, %2, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 0, 2, 4278124286, 2, 5, %32(tied-def 0), %3(tied-def 1), %2(tied-def 2), 2, 4278124286, %1(tied-def 3), 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 4 on %stack.0) + dead %32:gr64, dead %3:gr64_with_sub_8bit, dead %2:gr64, dead %1:gr64 = STATEPOINT 1, 16, 5, undef %41:gr64, undef $edi, undef $rsi, $rdx, $ecx, undef $r8d, 2, 0, 2, 0, 2, 89, 2, 0, 2, 10, 2, 0, 2, 10, 2, 0, 2, 4, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %3, 2, 7, 2, 0, 2, 0, %3, 2, 10, 2, 2, 2, 12, 2, 0, 2, 3, 2, 1, 2, 0, %3, 2, 0, %3, 2, 7, 2, 0, 2, 0, %3, 2, 2, 2, 8, 2, 4, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 12, 2, 7, 2, 0, 2, 2, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 10, 2, 18, 2, 96, 2, 0, 2, 9, 2, 1, 2, 0, %2, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %1, 2, 3, 1, 4, %stack.0, 0, 2, 3, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, %1, 2, 8, 2, 9, 2, 34, 2, 0, 2, 3, 2, 1, 2, 0, %2, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 0, 2, 4278124286, 2, 5, %32(tied-def 0), %3(tied-def 1), %2(tied-def 2), 2, 4278124286, %1(tied-def 3), 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0) ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp EH_LABEL JMP_1 %bb.7 diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra.mir index ed5d6c6f8385a..a1a592ea23f38 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra.mir @@ -9,25 +9,25 @@ # CHECK: %8:gr64 = COPY $rdx # CHECK: %7:gr32 = COPY $esi # CHECK: %6:gr64 = COPY $rdi -# CHECK: %30:gr64 = MOV64rm $noreg, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) -# CHECK: undef %33.sub_32bit:gr64_nosp = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load 4 from `i32 addrspace(1)* null`, addrspace 1) -# CHECK: %35:gr32 = MOV32rm %8, 1, $noreg, 96, $noreg :: (load 4 from %ir.tmp4, addrspace 1) -# CHECK: %43:gr32 = MOV32rm %8, 1, $noreg, 160, $noreg :: (load 4 from %ir.tmp6, addrspace 1) -# CHECK: %41:gr64 = MOV64rm undef %15:gr64, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) -# CHECK: %38:gr32 = MOV32rm %8, 1, $noreg, 352, $noreg :: (load 4 from %ir.tmp10, addrspace 1) -# CHECK: %31:gr64 = MOV64rm %6, 1, $noreg, 96, $noreg :: (load 8 from %ir.tmp13, addrspace 1) -# CHECK: %32:gr64 = MOV64rm %6, 1, $noreg, 104, $noreg :: (load 8 from %ir.tmp16, addrspace 1) +# CHECK: %30:gr64 = MOV64rm $noreg, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) +# CHECK: undef %33.sub_32bit:gr64_nosp = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load (s32) from `i32 addrspace(1)* null`, addrspace 1) +# CHECK: %35:gr32 = MOV32rm %8, 1, $noreg, 96, $noreg :: (load (s32) from %ir.tmp4, addrspace 1) +# CHECK: %43:gr32 = MOV32rm %8, 1, $noreg, 160, $noreg :: (load (s32) from %ir.tmp6, addrspace 1) +# CHECK: %41:gr64 = MOV64rm undef %15:gr64, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) +# CHECK: %38:gr32 = MOV32rm %8, 1, $noreg, 352, $noreg :: (load (s32) from %ir.tmp10, addrspace 1) +# CHECK: %31:gr64 = MOV64rm %6, 1, $noreg, 96, $noreg :: (load (s64) from %ir.tmp13, addrspace 1) +# CHECK: %32:gr64 = MOV64rm %6, 1, $noreg, 104, $noreg :: (load (s64) from %ir.tmp16, addrspace 1) # CHECK: %45:gr32 = LEA64_32r %33, 1, $noreg, -1, $noreg -# CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, %7 :: (store 4 into %stack.1) -# CHECK: MOV32mr %stack.9, 1, $noreg, 0, $noreg, %45 :: (store 4 into %stack.9) -# CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, %45 :: (store 4 into %stack.0) -# CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, %33.sub_32bit :: (store 4 into %stack.2) -# CHECK: MOV32mr %stack.6, 1, $noreg, 0, $noreg, %35 :: (store 4 into %stack.6) -# CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, %35 :: (store 4 into %stack.3) -# CHECK: MOV32mr %stack.8, 1, $noreg, 0, $noreg, %43 :: (store 4 into %stack.8) -# CHECK: MOV32mr %stack.4, 1, $noreg, 0, $noreg, %43 :: (store 4 into %stack.4) -# CHECK: MOV32mr %stack.7, 1, $noreg, 0, $noreg, %38 :: (store 4 into %stack.7) -# CHECK: MOV32mr %stack.5, 1, $noreg, 0, $noreg, %38 :: (store 4 into %stack.5) +# CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, %7 :: (store (s32) into %stack.1) +# CHECK: MOV32mr %stack.9, 1, $noreg, 0, $noreg, %45 :: (store (s32) into %stack.9) +# CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, %45 :: (store (s32) into %stack.0) +# CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, %33.sub_32bit :: (store (s32) into %stack.2) +# CHECK: MOV32mr %stack.6, 1, $noreg, 0, $noreg, %35 :: (store (s32) into %stack.6) +# CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, %35 :: (store (s32) into %stack.3) +# CHECK: MOV32mr %stack.8, 1, $noreg, 0, $noreg, %43 :: (store (s32) into %stack.8) +# CHECK: MOV32mr %stack.4, 1, $noreg, 0, $noreg, %43 :: (store (s32) into %stack.4) +# CHECK: MOV32mr %stack.7, 1, $noreg, 0, $noreg, %38 :: (store (s32) into %stack.7) +# CHECK: MOV32mr %stack.5, 1, $noreg, 0, $noreg, %38 :: (store (s32) into %stack.5) # CHECK: EH_LABEL # CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: $edi = MOV32r0 implicit-def dead $eflags @@ -35,29 +35,29 @@ # CHECK: $ecx = COPY %7 # CHECK: $r8d = MOV32r0 implicit-def dead $eflags # CHECK: %40:gr64 = COPY %41 -# CHECK: %32:gr64, %31:gr64, %30:gr64, %40:gr64 = STATEPOINT 1, 16, 5, undef %23:gr64, $edi, $rsi, undef $edx, $ecx, $r8d, 2, 0, 2, 0, 2, 11, 1, 4, %stack.0, 0, %30, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, 1, 4, %stack.4, 0, 1, 4, %stack.2, 0, %40, 1, 4, %stack.5, 0, %31, %32, 2, 4, %32(tied-def 0), %31(tied-def 1), %30(tied-def 2), %40(tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store 4 on %stack.0), (volatile load store 4 on %stack.1), (volatile load store 4 on %stack.2), (volatile load store 4 on %stack.3), (volatile load store 4 on %stack.4), (volatile load store 4 on %stack.5) +# CHECK: %32:gr64, %31:gr64, %30:gr64, %40:gr64 = STATEPOINT 1, 16, 5, undef %23:gr64, $edi, $rsi, undef $edx, $ecx, $r8d, 2, 0, 2, 0, 2, 11, 1, 4, %stack.0, 0, %30, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, 1, 4, %stack.4, 0, 1, 4, %stack.2, 0, %40, 1, 4, %stack.5, 0, %31, %32, 2, 4, %32(tied-def 0), %31(tied-def 1), %30(tied-def 2), %40(tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1), (volatile load store (s32) on %stack.2), (volatile load store (s32) on %stack.3), (volatile load store (s32) on %stack.4), (volatile load store (s32) on %stack.5) # CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: EH_LABEL # CHECK: JMP_1 %bb.1 # CHECK: bb.1.bb21: # CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: $edi = MOV32ri 10 -# CHECK: dead %30:gr64, dead %31:gr64, dead %32:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, $edi, 2, 0, 2, 2, 2, 10, 1, 4, %stack.9, 0, %30, %7, %33.sub_32bit, 1, 4, %stack.6, 0, 1, 4, %stack.8, 0, %33.sub_32bit, 1, 4, %stack.7, 0, %31, %32, 2, 3, %30(tied-def 0), %31(tied-def 1), %32(tied-def 2), 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_64, implicit-def $rsp, implicit-def $ssp :: (load 4 from %stack.6), (load 4 from %stack.7), (load 4 from %stack.8), (load 4 from %stack.9) +# CHECK: dead %30:gr64, dead %31:gr64, dead %32:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, $edi, 2, 0, 2, 2, 2, 10, 1, 4, %stack.9, 0, %30, %7, %33.sub_32bit, 1, 4, %stack.6, 0, 1, 4, %stack.8, 0, %33.sub_32bit, 1, 4, %stack.7, 0, %31, %32, 2, 3, %30(tied-def 0), %31(tied-def 1), %32(tied-def 2), 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_64, implicit-def $rsp, implicit-def $ssp :: (load (s32) from %stack.6), (load (s32) from %stack.7), (load (s32) from %stack.8), (load (s32) from %stack.9) # CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: RET 0 # CHECK: bb.2.bb26 (landing-pad): # CHECK: liveins: $rax, $rdx # CHECK: EH_LABEL -# CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, %33.sub_32bit :: (store 4 into %stack.1) -# CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, %7 :: (store 4 into %stack.0) -# CHECK: %36:gr32 = MOV32rm %stack.6, 1, $noreg, 0, $noreg :: (load 4 from %stack.6) -# CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, %36 :: (store 4 into %stack.2) -# CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, %33.sub_32bit :: (store 4 into %stack.3) -# CHECK: %39:gr32 = MOV32rm %stack.7, 1, $noreg, 0, $noreg :: (load 4 from %stack.7) -# CHECK: MOV32mr %stack.4, 1, $noreg, 0, $noreg, %39 :: (store 4 into %stack.4) +# CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, %33.sub_32bit :: (store (s32) into %stack.1) +# CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, %7 :: (store (s32) into %stack.0) +# CHECK: %36:gr32 = MOV32rm %stack.6, 1, $noreg, 0, $noreg :: (load (s32) from %stack.6) +# CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, %36 :: (store (s32) into %stack.2) +# CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, %33.sub_32bit :: (store (s32) into %stack.3) +# CHECK: %39:gr32 = MOV32rm %stack.7, 1, $noreg, 0, $noreg :: (load (s32) from %stack.7) +# CHECK: MOV32mr %stack.4, 1, $noreg, 0, $noreg, %39 :: (store (s32) into %stack.4) # CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp # CHECK: $edi = MOV32ri -271 -# CHECK: dead %40:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, $edi, 2, 0, 2, 0, 2, 6, 1, 4, %stack.0, 0, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, %40, 1, 4, %stack.4, 0, 2, 1, %40(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 4 on %stack.0), (volatile load store 4 on %stack.1), (volatile load store 4 on %stack.2), (volatile load store 4 on %stack.3), (volatile load store 4 on %stack.4) +# CHECK: dead %40:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, $edi, 2, 0, 2, 0, 2, 6, 1, 4, %stack.0, 0, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, %40, 1, 4, %stack.4, 0, 2, 1, %40(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1), (volatile load store (s32) on %stack.2), (volatile load store (s32) on %stack.3), (volatile load store (s32) on %stack.4) # CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp --- | @@ -220,22 +220,22 @@ body: | %8:gr64 = COPY killed $rdx %7:gr32 = COPY killed $esi %6:gr64 = COPY killed $rdi - %13:gr64 = MOV64rm $noreg, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - %3:gr32 = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load 4 from `i32 addrspace(1)* null`, addrspace 1) + %13:gr64 = MOV64rm $noreg, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) + %3:gr32 = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load (s32) from `i32 addrspace(1)* null`, addrspace 1) %0:gr32 = COPY %3 - %1:gr32 = MOV32rm %8, 1, $noreg, 96, $noreg :: (load 4 from %ir.tmp4, addrspace 1) - %2:gr32 = MOV32rm %8, 1, $noreg, 160, $noreg :: (load 4 from %ir.tmp6, addrspace 1) - %14:gr64 = MOV64rm undef %15:gr64, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) - %4:gr32 = MOV32rm killed %8, 1, $noreg, 352, $noreg :: (load 4 from %ir.tmp10, addrspace 1) - %16:gr64 = MOV64rm %6, 1, $noreg, 96, $noreg :: (load 8 from %ir.tmp13, addrspace 1) - %17:gr64 = MOV64rm killed %6, 1, $noreg, 104, $noreg :: (load 8 from %ir.tmp16, addrspace 1) + %1:gr32 = MOV32rm %8, 1, $noreg, 96, $noreg :: (load (s32) from %ir.tmp4, addrspace 1) + %2:gr32 = MOV32rm %8, 1, $noreg, 160, $noreg :: (load (s32) from %ir.tmp6, addrspace 1) + %14:gr64 = MOV64rm undef %15:gr64, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) + %4:gr32 = MOV32rm killed %8, 1, $noreg, 352, $noreg :: (load (s32) from %ir.tmp10, addrspace 1) + %16:gr64 = MOV64rm %6, 1, $noreg, 96, $noreg :: (load (s64) from %ir.tmp13, addrspace 1) + %17:gr64 = MOV64rm killed %6, 1, $noreg, 104, $noreg :: (load (s64) from %ir.tmp16, addrspace 1) %5:gr32 = DEC32r %3, implicit-def dead $eflags - MOV32mr %stack.1, 1, $noreg, 0, $noreg, %7 :: (store 4 into %stack.1) - MOV32mr %stack.0, 1, $noreg, 0, $noreg, %5 :: (store 4 into %stack.0) - MOV32mr %stack.2, 1, $noreg, 0, $noreg, %3 :: (store 4 into %stack.2) - MOV32mr %stack.3, 1, $noreg, 0, $noreg, %1 :: (store 4 into %stack.3) - MOV32mr %stack.4, 1, $noreg, 0, $noreg, %2 :: (store 4 into %stack.4) - MOV32mr %stack.5, 1, $noreg, 0, $noreg, %4 :: (store 4 into %stack.5) + MOV32mr %stack.1, 1, $noreg, 0, $noreg, %7 :: (store (s32) into %stack.1) + MOV32mr %stack.0, 1, $noreg, 0, $noreg, %5 :: (store (s32) into %stack.0) + MOV32mr %stack.2, 1, $noreg, 0, $noreg, %3 :: (store (s32) into %stack.2) + MOV32mr %stack.3, 1, $noreg, 0, $noreg, %1 :: (store (s32) into %stack.3) + MOV32mr %stack.4, 1, $noreg, 0, $noreg, %2 :: (store (s32) into %stack.4) + MOV32mr %stack.5, 1, $noreg, 0, $noreg, %4 :: (store (s32) into %stack.5) EH_LABEL ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %20:gr32 = MOV32r0 implicit-def dead $eflags @@ -244,7 +244,7 @@ body: | $rsi = COPY killed %21 $ecx = COPY %7 $r8d = COPY killed %20 - %9:gr64, %10:gr64, %11:gr64, %12:gr64 = STATEPOINT 1, 16, 5, undef %23:gr64, killed $edi, killed $rsi, undef $edx, killed $ecx, killed $r8d, 2, 0, 2, 0, 2, 11, 1, 4, %stack.0, 0, killed %13, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, 1, 4, %stack.4, 0, 1, 4, %stack.2, 0, killed %14, 1, 4, %stack.5, 0, killed %16, killed %17, 2, 4, %17(tied-def 0), %16(tied-def 1), %13(tied-def 2), %14(tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store 4 on %stack.0), (volatile load store 4 on %stack.1), (volatile load store 4 on %stack.2), (volatile load store 4 on %stack.3), (volatile load store 4 on %stack.4), (volatile load store 4 on %stack.5) + %9:gr64, %10:gr64, %11:gr64, %12:gr64 = STATEPOINT 1, 16, 5, undef %23:gr64, killed $edi, killed $rsi, undef $edx, killed $ecx, killed $r8d, 2, 0, 2, 0, 2, 11, 1, 4, %stack.0, 0, killed %13, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, 1, 4, %stack.4, 0, 1, 4, %stack.2, 0, killed %14, 1, 4, %stack.5, 0, killed %16, killed %17, 2, 4, %17(tied-def 0), %16(tied-def 1), %13(tied-def 2), %14(tied-def 3), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1), (volatile load store (s32) on %stack.2), (volatile load store (s32) on %stack.3), (volatile load store (s32) on %stack.4), (volatile load store (s32) on %stack.5) ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp EH_LABEL JMP_1 %bb.1 @@ -261,15 +261,15 @@ body: | liveins: $rax, $rdx EH_LABEL - MOV32mr %stack.1, 1, $noreg, 0, $noreg, killed %0 :: (store 4 into %stack.1) - MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed %7 :: (store 4 into %stack.0) - MOV32mr %stack.2, 1, $noreg, 0, $noreg, killed %1 :: (store 4 into %stack.2) - MOV32mr %stack.3, 1, $noreg, 0, $noreg, killed %3 :: (store 4 into %stack.3) - MOV32mr %stack.4, 1, $noreg, 0, $noreg, killed %4 :: (store 4 into %stack.4) + MOV32mr %stack.1, 1, $noreg, 0, $noreg, killed %0 :: (store (s32) into %stack.1) + MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed %7 :: (store (s32) into %stack.0) + MOV32mr %stack.2, 1, $noreg, 0, $noreg, killed %1 :: (store (s32) into %stack.2) + MOV32mr %stack.3, 1, $noreg, 0, $noreg, killed %3 :: (store (s32) into %stack.3) + MOV32mr %stack.4, 1, $noreg, 0, $noreg, killed %4 :: (store (s32) into %stack.4) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %27:gr32 = MOV32ri -271 $edi = COPY killed %27 - dead %28:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, killed $edi, 2, 0, 2, 0, 2, 6, 1, 4, %stack.0, 0, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, killed %12, 1, 4, %stack.4, 0, 2, 1, %12(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 4 on %stack.0), (volatile load store 4 on %stack.1), (volatile load store 4 on %stack.2), (volatile load store 4 on %stack.3), (volatile load store 4 on %stack.4) + dead %28:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @quux, killed $edi, 2, 0, 2, 0, 2, 6, 1, 4, %stack.0, 0, 1, 4, %stack.1, 0, 1, 4, %stack.2, 0, 1, 4, %stack.3, 0, killed %12, 1, 4, %stack.4, 0, 2, 1, %12(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s32) on %stack.0), (volatile load store (s32) on %stack.1), (volatile load store (s32) on %stack.2), (volatile load store (s32) on %stack.3), (volatile load store (s32) on %stack.4) ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ... diff --git a/llvm/test/CodeGen/X86/statepoint-ra.ll b/llvm/test/CodeGen/X86/statepoint-ra.ll index 38a14017fab5d..648752c7ebaa3 100644 --- a/llvm/test/CodeGen/X86/statepoint-ra.ll +++ b/llvm/test/CodeGen/X86/statepoint-ra.ll @@ -81,32 +81,32 @@ declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i8f64f64f64f64f64f64f ;CHECK: %58:fr64 = COPY $xmm1 ;CHECK: %62:fr64 = COPY $xmm0 ;CHECK: %3:gr64 = COPY $rdi -;CHECK: %76:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0) -;CHECK: %14:fr64 = MOVSDrm_alt %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1, align 16) -;CHECK: %66:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2) -;CHECK: %71:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16) -;CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %3 :: (store 8 into %stack.0) +;CHECK: %76:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) +;CHECK: %14:fr64 = MOVSDrm_alt %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) +;CHECK: %66:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +;CHECK: %71:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +;CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %3 :: (store (s64) into %stack.0) ;CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp -;CHECK: STATEPOINT 2882400000, 0, 0, target-flags(x86-plt) @blam, 2, 9, 2, 0, 2, 59, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 26, 2, 0, 2, 0, 1, 8, %stack.0, 0, 2, 4, %62, 2, 7, 2, 0, 2, 4, %58, 2, 7, 2, 0, 2, 4, %6, 2, 7, 2, 0, 2, 4, %53, 2, 7, 2, 0, 2, 4, %45, 2, 7, 2, 0, 2, 4, %41, 2, 7, 2, 0, 2, 4, %10, 2, 7, 2, 0, 2, 4, %49, 2, 7, 2, 0, 2, 4, %71, 2, 7, 2, 0, 2, 4, %66, 2, 7, 2, 0, 2, 4, %14, 2, 7, 2, 0, 2, 4, %76, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64_mostregs, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +;CHECK: STATEPOINT 2882400000, 0, 0, target-flags(x86-plt) @blam, 2, 9, 2, 0, 2, 59, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 26, 2, 0, 2, 0, 1, 8, %stack.0, 0, 2, 4, %62, 2, 7, 2, 0, 2, 4, %58, 2, 7, 2, 0, 2, 4, %6, 2, 7, 2, 0, 2, 4, %53, 2, 7, 2, 0, 2, 4, %45, 2, 7, 2, 0, 2, 4, %41, 2, 7, 2, 0, 2, 4, %10, 2, 7, 2, 0, 2, 4, %49, 2, 7, 2, 0, 2, 4, %71, 2, 7, 2, 0, 2, 4, %66, 2, 7, 2, 0, 2, 4, %14, 2, 7, 2, 0, 2, 4, %76, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64_mostregs, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0) ;CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: %17:gr32 = MOV32r0 implicit-def dead $eflags ;CHECK: TEST8rr %17.sub_8bit, %17.sub_8bit, implicit-def $eflags -;CHECK: MOVSDmr %stack.1, 1, $noreg, 0, $noreg, %41 :: (store 8 into %stack.1) -;CHECK: MOVSDmr %stack.2, 1, $noreg, 0, $noreg, %45 :: (store 8 into %stack.2) -;CHECK: MOVSDmr %stack.5, 1, $noreg, 0, $noreg, %58 :: (store 8 into %stack.5) -;CHECK: MOVSDmr %stack.6, 1, $noreg, 0, $noreg, %62 :: (store 8 into %stack.6) +;CHECK: MOVSDmr %stack.1, 1, $noreg, 0, $noreg, %41 :: (store (s64) into %stack.1) +;CHECK: MOVSDmr %stack.2, 1, $noreg, 0, $noreg, %45 :: (store (s64) into %stack.2) +;CHECK: MOVSDmr %stack.5, 1, $noreg, 0, $noreg, %58 :: (store (s64) into %stack.5) +;CHECK: MOVSDmr %stack.6, 1, $noreg, 0, $noreg, %62 :: (store (s64) into %stack.6) ;CHECK: JCC_1 %bb.2, 4, implicit killed $eflags ;CHECK: bb.1: ;CHECK: successors: %bb.3(0x80000000) -;CHECK: %54:fr64 = MOVSDrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool) -;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.3) -;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.4) -;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.7) +;CHECK: %54:fr64 = MOVSDrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s64) from constant-pool) +;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.3) +;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.4) +;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.7) ;CHECK: JMP_1 %bb.3 ;CHECK: bb.2.bb13: ;CHECK: successors: %bb.3(0x80000000) ;CHECK: ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp -;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %14 :: (store 8 into stack) +;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %14 :: (store (s64) into stack) ;CHECK: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi ;CHECK: $xmm0 = COPY %62 ;CHECK: $xmm1 = COPY %58 @@ -116,10 +116,10 @@ declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i8f64f64f64f64f64f64f ;CHECK: $xmm5 = COPY %10 ;CHECK: $xmm6 = COPY %71 ;CHECK: $xmm7 = COPY %66 -;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %49 :: (store 8 into %stack.3) -;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %53 :: (store 8 into %stack.4) -;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %76 :: (store 8 into %stack.7) -;CHECK: STATEPOINT 2, 5, 9, undef %22:gr64, $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, 2, 0, 2, 0, 2, 59, 2, 0, 2, 2, 2, 0, 2, 70, 2, 0, 2, 26, 2, 0, 2, 0, 2, 0, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.0, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load 8 from %stack.1), (load 8 from %stack.2), (load 8 from %stack.3), (load 8 from %stack.4), (load 8 from %stack.5), (load 8 from %stack.6), (load 8 from %fixed-stack.2), (load 8 from %fixed-stack.3, align 16), (load 8 from %fixed-stack.0) +;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %49 :: (store (s64) into %stack.3) +;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %53 :: (store (s64) into %stack.4) +;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %76 :: (store (s64) into %stack.7) +;CHECK: STATEPOINT 2, 5, 9, undef %22:gr64, $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, 2, 0, 2, 0, 2, 59, 2, 0, 2, 2, 2, 0, 2, 70, 2, 0, 2, 26, 2, 0, 2, 0, 2, 0, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.0, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load (s64) from %stack.1), (load (s64) from %stack.2), (load (s64) from %stack.3), (load (s64) from %stack.4), (load (s64) from %stack.5), (load (s64) from %stack.6), (load (s64) from %fixed-stack.2), (load (s64) from %fixed-stack.3, align 16), (load (s64) from %fixed-stack.0) ;CHECK: ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: bb.3.bb15: ;CHECK: successors: %bb.7(0x7ffff800), %bb.4(0x00000800) @@ -132,29 +132,29 @@ declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i8f64f64f64f64f64f64f ;CHECK: EH_LABEL ;CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: $edx = MOV32r0 implicit-def dead $eflags -;CHECK: STATEPOINT 1, 16, 3, undef %29:gr64, undef $edi, undef $rsi, $edx, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, 2, 2, 2, 3, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load 8 from %stack.1), (load 8 from %stack.2), (load 8 from %stack.3), (load 8 from %stack.4), (load 8 from %stack.5), (load 8 from %stack.6), (load 8 from %fixed-stack.2), (load 8 from %fixed-stack.3, align 16), (load 8 from %stack.7) +;CHECK: STATEPOINT 1, 16, 3, undef %29:gr64, undef $edi, undef $rsi, $edx, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, 2, 2, 2, 3, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load (s64) from %stack.1), (load (s64) from %stack.2), (load (s64) from %stack.3), (load (s64) from %stack.4), (load (s64) from %stack.5), (load (s64) from %stack.6), (load (s64) from %fixed-stack.2), (load (s64) from %fixed-stack.3, align 16), (load (s64) from %stack.7) ;CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: EH_LABEL ;CHECK: JMP_1 %bb.5 ;CHECK: bb.5.bb21: ;CHECK: successors: ;CHECK: ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp -;CHECK: %81:fr64 = MOVSDrm_alt %stack.7, 1, $noreg, 0, $noreg :: (load 8 from %stack.7) -;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %81 :: (store 8 into stack) -;CHECK: $xmm0 = MOVSDrm_alt %stack.6, 1, $noreg, 0, $noreg :: (load 8 from %stack.6) -;CHECK: $xmm1 = MOVSDrm_alt %stack.5, 1, $noreg, 0, $noreg :: (load 8 from %stack.5) -;CHECK: $xmm2 = MOVSDrm_alt %stack.4, 1, $noreg, 0, $noreg :: (load 8 from %stack.4) -;CHECK: $xmm3 = MOVSDrm_alt %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2) -;CHECK: $xmm4 = MOVSDrm_alt %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) -;CHECK: $xmm5 = MOVSDrm_alt %stack.3, 1, $noreg, 0, $noreg :: (load 8 from %stack.3) -;CHECK: %74:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16) +;CHECK: %81:fr64 = MOVSDrm_alt %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7) +;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %81 :: (store (s64) into stack) +;CHECK: $xmm0 = MOVSDrm_alt %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6) +;CHECK: $xmm1 = MOVSDrm_alt %stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %stack.5) +;CHECK: $xmm2 = MOVSDrm_alt %stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %stack.4) +;CHECK: $xmm3 = MOVSDrm_alt %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) +;CHECK: $xmm4 = MOVSDrm_alt %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) +;CHECK: $xmm5 = MOVSDrm_alt %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3) +;CHECK: %74:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) ;CHECK: %95:fr64 = COPY %74 ;CHECK: $xmm6 = COPY %95 ;CHECK: $esi = MOV32ri 51 -;CHECK: %69:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2) +;CHECK: %69:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) ;CHECK: %97:fr64 = COPY %69 ;CHECK: $xmm7 = COPY %97 -;CHECK: STATEPOINT 2, 5, 10, undef %36:gr64, undef $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, killed $esi, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 2, 2, 46, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 3, 2, 51, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (load 8 from %stack.7), (load 8 from %stack.6), (load 8 from %stack.5), (load 8 from %stack.4), (load 8 from %stack.2), (load 8 from %stack.1), (load 8 from %stack.3), (load 8 from %fixed-stack.3, align 16), (load 8 from %fixed-stack.2) +;CHECK: STATEPOINT 2, 5, 10, undef %36:gr64, undef $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, killed $esi, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 2, 2, 46, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 3, 2, 51, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (load (s64) from %stack.7), (load (s64) from %stack.6), (load (s64) from %stack.5), (load (s64) from %stack.4), (load (s64) from %stack.2), (load (s64) from %stack.1), (load (s64) from %stack.3), (load (s64) from %fixed-stack.3, align 16), (load (s64) from %fixed-stack.2) ;CHECK: ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: bb.6.bb23 (landing-pad): ;CHECK: liveins: $rax, $rdx diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll index a940fc52ac497..6594d90089f5f 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll @@ -84,18 +84,18 @@ entry: define i32 addrspace(1)* @test_alloca(i32 addrspace(1)* %ptr) gc "statepoint-example" { ; CHECK-VREG-LABEL: name: test_alloca ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.0.alloca, 1, $noreg, 0, $noreg, %0 :: (store 8 into %ir.alloca) -; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, %0(tied-def 0), 2, 1, 0, %stack.0.alloca, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0.alloca) +; CHECK-VREG: MOV64mr %stack.0.alloca, 1, $noreg, 0, $noreg, %0 :: (store (s64) into %ir.alloca) +; CHECK-VREG: %1:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, %0(tied-def 0), 2, 1, 0, %stack.0.alloca, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store (s64) on %stack.0.alloca) ; CHECK-VREG: %2:gr8 = COPY $al -; CHECK-VREG: %3:gr64 = MOV64rm %stack.0.alloca, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.alloca) +; CHECK-VREG: %3:gr64 = MOV64rm %stack.0.alloca, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.alloca) ; CHECK-VREG: $rdi = COPY %1 ; CHECK-VREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp ; CHECK-PREG-LABEL: name: test_alloca ; CHECK-PREG: renamable $rbx = COPY $rdi -; CHECK-PREG: MOV64mr %stack.0.alloca, 1, $noreg, 0, $noreg, renamable $rbx :: (store 8 into %ir.alloca) -; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 1, 0, %stack.0.alloca, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $al :: (volatile load store 8 on %stack.0.alloca) -; CHECK-PREG: renamable $r14 = MOV64rm %stack.0.alloca, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.alloca) +; CHECK-PREG: MOV64mr %stack.0.alloca, 1, $noreg, 0, $noreg, renamable $rbx :: (store (s64) into %ir.alloca) +; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 1, 0, %stack.0.alloca, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $al :: (volatile load store (s64) on %stack.0.alloca) +; CHECK-PREG: renamable $r14 = MOV64rm %stack.0.alloca, 1, $noreg, 0, $noreg :: (dereferenceable load (s64) from %ir.alloca) ; CHECK-PREG: $rdi = COPY killed renamable $rbx ; CHECK-PREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp @@ -272,9 +272,9 @@ entry: define <2 x i8 addrspace(1)*> @test_vector(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" { ; CHECK-VREG-LABEL: name: test_vector ; CHECK-VREG: %0:vr128 = COPY $xmm0 -; CHECK-VREG: MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 16 into %stack.0) -; CHECK-VREG: STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 1, 1, 16, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 16 on %stack.0) -; CHECK-VREG: %1:vr128 = MOVAPSrm %stack.0, 1, $noreg, 0, $noreg :: (load 16 from %stack.0) +; CHECK-VREG: MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s128) into %stack.0) +; CHECK-VREG: STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 1, 1, 16, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s128) on %stack.0) +; CHECK-VREG: %1:vr128 = MOVAPSrm %stack.0, 1, $noreg, 0, $noreg :: (load (s128) from %stack.0) ; CHECK-VREG: $xmm0 = COPY %1 ; CHECK-VREG: RET 0, $xmm0 @@ -293,9 +293,9 @@ define void @test_limit(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspac ; CHECK-VREG: %2:gr64 = COPY $rdx ; CHECK-VREG: %1:gr64 = COPY $rsi ; CHECK-VREG: %0:gr64 = COPY $rdi -; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0) -; CHECK-VREG: %5:gr64, %6:gr64, %7:gr64, %8:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 5, %4(tied-def 0), %3(tied-def 1), %2(tied-def 2), %1(tied-def 3), 1, 8, %stack.0, 0, 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) -; CHECK-VREG: %9:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +; CHECK-VREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s64) into %stack.0) +; CHECK-VREG: %5:gr64, %6:gr64, %7:gr64, %8:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 5, %4(tied-def 0), %3(tied-def 1), %2(tied-def 2), %1(tied-def 3), 1, 8, %stack.0, 0, 2, 0, 2, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0) +; CHECK-VREG: %9:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK-VREG: $rdi = COPY %9 ; CHECK-VREG: $rsi = COPY %8 ; CHECK-VREG: $rdx = COPY %7 diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-folding.mir b/llvm/test/CodeGen/X86/statepoint-vreg-folding.mir index 37e9f1b153ed5..18cf7c8a96255 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-folding.mir +++ b/llvm/test/CodeGen/X86/statepoint-vreg-folding.mir @@ -145,30 +145,30 @@ body: | ; CHECK-LABEL: name: test_spill ; CHECK: liveins: $rdi, $rsi, $rdx, $rcx, $r8, $r9 - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $r9 :: (store 8 into %stack.0) - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $r8 :: (store 8 into %stack.1) - ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rcx :: (store 8 into %stack.2) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $r9 :: (store (s64) into %stack.0) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $r8 :: (store (s64) into %stack.1) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.2) ; CHECK: [[R1:%[0-9]+]]:gr64 = COPY $rdx ; CHECK: [[R2:%[0-9]+]]:gr64 = COPY $rsi ; CHECK: [[R3:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[R4:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16) - ; CHECK: [[R5:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1) - ; CHECK: [[R6:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2, align 16) + ; CHECK: [[R4:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0, align 16) + ; CHECK: [[R5:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1) + ; CHECK: [[R6:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2, align 16) ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: [[R6]]:gr64, [[R5]]:gr64, [[R4]]:gr64, [[R1]]:gr64, [[R2]]:gr64, [[R3]]:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 9, [[R6]](tied-def 0), [[R5]](tied-def 1), [[R4]](tied-def 2), 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, [[R1]](tied-def 3), [[R2]](tied-def 4), [[R3]](tied-def 5), 2, 0, 2, 9, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store 8 on %stack.0), (load store 8 on %stack.1), (load store 8 on %stack.2) + ; CHECK: [[R6]]:gr64, [[R5]]:gr64, [[R4]]:gr64, [[R1]]:gr64, [[R2]]:gr64, [[R3]]:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 9, [[R6]](tied-def 0), [[R5]](tied-def 1), [[R4]](tied-def 2), 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, [[R1]](tied-def 3), [[R2]](tied-def 4), [[R3]](tied-def 5), 2, 0, 2, 9, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store (s64) on %stack.0), (load store (s64) on %stack.1), (load store (s64) on %stack.2) ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK: [[RES:%[0-9]+]]:gr32 = MOV32rm [[R3]], 1, $noreg, 4, $noreg :: (load 4 from %ir.gep00, addrspace 1) - ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R2]], 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep01, addrspace 1) - ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R1]], 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep02, addrspace 1) - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2) - ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm]], 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep03, addrspace 1) - ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) - ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm1]], 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep04, addrspace 1) - ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) - ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm2]], 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep05, addrspace 1) - ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R4]], 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep06, addrspace 1) - ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R5]], 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep07, addrspace 1) - ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R6]], 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep08, addrspace 1) + ; CHECK: [[RES:%[0-9]+]]:gr32 = MOV32rm [[R3]], 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R2]], 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R1]], 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm]], 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm1]], 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) + ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm2]], 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R4]], 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R5]], 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R6]], 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) ; CHECK: $eax = COPY [[RES]] ; CHECK: RET 0, $eax %9:gr64 = COPY $r9 @@ -177,21 +177,21 @@ body: | %12:gr64 = COPY $rdx %13:gr64 = COPY $rsi %14:gr64 = COPY $rdi - %8:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2, align 16) - %7:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1) - %6:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16) + %8:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2, align 16) + %7:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1) + %6:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0, align 16) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %6:gr64, %7:gr64, %8:gr64, %9:gr64, %10:gr64, %11:gr64, %12:gr64, %13:gr64, %14:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 9, %6(tied-def 0), %7(tied-def 1), %8(tied-def 2), %9(tied-def 3), %10(tied-def 4), %11(tied-def 5), %12(tied-def 6), %13(tied-def 7), %14(tied-def 8), 2, 0, 2, 9, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, csr_64, implicit-def $rsp, implicit-def $ssp ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %20:gr32 = MOV32rm %14, 1, $noreg, 4, $noreg :: (load 4 from %ir.gep00, addrspace 1) - %20:gr32 = ADD32rm %20, %13, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep01, addrspace 1) - %20:gr32 = ADD32rm %20, %12, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep02, addrspace 1) - %20:gr32 = ADD32rm %20, %11, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep03, addrspace 1) - %20:gr32 = ADD32rm %20, %10, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep04, addrspace 1) - %20:gr32 = ADD32rm %20, %9, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep05, addrspace 1) - %20:gr32 = ADD32rm %20, %8, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep06, addrspace 1) - %20:gr32 = ADD32rm %20, %7, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep07, addrspace 1) - %20:gr32 = ADD32rm %20, %6, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep08, addrspace 1) + %20:gr32 = MOV32rm %14, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) + %20:gr32 = ADD32rm %20, %13, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) + %20:gr32 = ADD32rm %20, %12, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) + %20:gr32 = ADD32rm %20, %11, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) + %20:gr32 = ADD32rm %20, %10, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) + %20:gr32 = ADD32rm %20, %9, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) + %20:gr32 = ADD32rm %20, %8, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) + %20:gr32 = ADD32rm %20, %7, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) + %20:gr32 = ADD32rm %20, %6, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) $eax = COPY %20 RET 0, killed $eax diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll index 922d7c2bbbb14..dd9e92564b07a 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll @@ -10,16 +10,16 @@ declare dso_local i32* @personality_function() define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) ; CHECK-LABEL: name: test_basic_invoke ; CHECK: bb.0.entry: -; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, renamable $rdi :: (store 8 into %stack.1) -; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store 8 into %stack.0) -; CHECK: STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 5, 2, 0, 2, -1, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0), (volatile load store 8 on %stack.1) +; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, renamable $rdi :: (store (s64) into %stack.1) +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store (s64) into %stack.0) +; CHECK: STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 5, 2, 0, 2, -1, 2, 0, 2, 0, 2, 0, 2, 2, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0), (volatile load store (s64) on %stack.1) ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.safepoint_normal_dest: -; CHECK: renamable $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) +; CHECK: renamable $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) ; CHECK: bb.2.normal_return: ; CHECK: RET 0, $rax ; CHECK: bb.3.exceptional_return (landing-pad): -; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: RET 0, $rax gc "statepoint-example" personality i32* ()* @"personality_function" { entry: @@ -52,30 +52,30 @@ define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1 ; CHECK: JCC_1 %bb.3, 4, implicit killed $eflags ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.left: -; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rsi :: (store 8 into %stack.0) +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rsi :: (store (s64) into %stack.0) ; CHECK: $rdi = COPY killed renamable $rsi -; CHECK: renamable $rbp = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbp(tied-def 0), 1, 8, %stack.0, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK: renamable $rbp = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbp(tied-def 0), 1, 8, %stack.0, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0) ; CHECK: JMP_1 %bb.2 ; CHECK: bb.2.left.relocs: -; CHECK: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +; CHECK: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: JMP_1 %bb.5 ; CHECK: bb.3.right: -; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rbp :: (store 8 into %stack.0) +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rbp :: (store (s64) into %stack.0) ; CHECK: $rdi = COPY killed renamable $rsi -; CHECK: renamable $rbx = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbx(tied-def 0), 1, 8, %stack.0, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0) +; CHECK: renamable $rbx = STATEPOINT 0, 0, 1, @some_call, $rdi, 2, 0, 2, 0, 2, 0, 2, 2, killed renamable $rbx(tied-def 0), 1, 8, %stack.0, 0, 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0) ; CHECK: JMP_1 %bb.4 ; CHECK: bb.4.right.relocs: -; CHECK: renamable $rbp = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +; CHECK: renamable $rbp = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: bb.5.normal_return: ; CHECK: TEST8ri renamable $r14b, 1, implicit-def $eflags, implicit killed $r14d ; CHECK: renamable $rbx = CMOV64rr killed renamable $rbx, killed renamable $rbp, 4, implicit killed $eflags ; CHECK: $rax = COPY killed renamable $rbx ; CHECK: RET 0, $rax ; CHECK: bb.6.exceptional_return.left (landing-pad): -; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: RET 0, $rax ; CHECK: bb.7.exceptional_return.right (landing-pad): -; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +; CHECK: renamable $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: RET 0, $rax gc "statepoint-example" personality i32* ()* @"personality_function" { entry: @@ -121,16 +121,16 @@ exceptional_return.right: define void @test_duplicate_ir_values() gc "statepoint-example" personality i32* ()* @personality_function { ; CHECK-LABEL: name: test_duplicate_ir_values ; CHECK: bb.0.entry: -; CHECK: renamable $rax = MOV64rm undef renamable $rax, 1, $noreg, 0, $noreg :: (load 8 from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) -; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.0) -; CHECK: STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store 8 on %stack.0) +; CHECK: renamable $rax = MOV64rm undef renamable $rax, 1, $noreg, 0, $noreg :: (load (s64) from `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) +; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) +; CHECK: STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s64) on %stack.0) ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.normal_continue: -; CHECK: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +; CHECK: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: $edi = MOV32ri 10 ; CHECK: dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 2, 2, 2, killed renamable $rbx, renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK: bb.2.exceptional_return (landing-pad): -; CHECK: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) +; CHECK: renamable $rbx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) ; CHECK: $edi = MOV32ri -271 ; CHECK: dead renamable $rbx = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @__llvm_deoptimize, killed $edi, 2, 0, 2, 0, 2, 1, killed renamable $rbx, 2, 1, renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp entry: diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll index bcfe8e8e363de..3749741bb28db 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll @@ -18,99 +18,99 @@ define i32 @test_spill( ; CHECK-VREG: %21:gr64 = COPY $rdx ; CHECK-VREG: %22:gr64 = COPY $rsi ; CHECK-VREG: %23:gr64 = COPY $rdi -; CHECK-VREG: %17:gr64 = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.11, align 16) -; CHECK-VREG: %16:gr64 = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.10) -; CHECK-VREG: %15:gr64 = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.9, align 16) -; CHECK-VREG: %14:gr64 = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.8) -; CHECK-VREG: %13:gr64 = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.7, align 16) -; CHECK-VREG: %12:gr64 = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.6) -; CHECK-VREG: %11:gr64 = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.5, align 16) -; CHECK-VREG: %10:gr64 = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.4) -; CHECK-VREG: %9:gr64 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16) -; CHECK-VREG: %8:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2) -; CHECK-VREG: %7:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1, align 16) -; CHECK-VREG: %6:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0) +; CHECK-VREG: %17:gr64 = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) +; CHECK-VREG: %16:gr64 = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) +; CHECK-VREG: %15:gr64 = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) +; CHECK-VREG: %14:gr64 = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) +; CHECK-VREG: %13:gr64 = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) +; CHECK-VREG: %12:gr64 = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) +; CHECK-VREG: %11:gr64 = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) +; CHECK-VREG: %10:gr64 = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) +; CHECK-VREG: %9:gr64 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +; CHECK-VREG: %8:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +; CHECK-VREG: %7:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) +; CHECK-VREG: %6:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) ; CHECK-VREG: %6:gr64, %7:gr64, %8:gr64, %9:gr64, %10:gr64, %11:gr64, %12:gr64, %13:gr64, %14:gr64, %15:gr64, %16:gr64, %17:gr64, %18:gr64, %19:gr64, %20:gr64, %21:gr64, %22:gr64, %23:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, %6(tied-def 0), %7(tied-def 1), %8(tied-def 2), %9(tied-def 3), %10(tied-def 4), %11(tied-def 5), %12(tied-def 6), %13(tied-def 7), %14(tied-def 8), %15(tied-def 9), %16(tied-def 10), %17(tied-def 11), %18(tied-def 12), %19(tied-def 13), %20(tied-def 14), %21(tied-def 15), %22(tied-def 16), %23(tied-def 17), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp -; CHECK-VREG: %38:gr32 = MOV32rm %23, 1, $noreg, 4, $noreg :: (load 4 from %ir.gep00, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %22, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep01, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %21, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep02, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %20, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep03, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %19, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep04, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %18, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep05, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %17, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep06, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %16, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep07, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %15, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep08, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %14, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep09, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %13, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep10, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %12, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep11, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %11, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep12, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %10, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep13, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %9, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep14, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %8, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep15, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %7, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep16, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %6, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep17, addrspace 1) +; CHECK-VREG: %38:gr32 = MOV32rm %23, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %22, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %21, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %20, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %19, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %18, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %17, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %16, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %15, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %14, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %13, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %12, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %11, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %10, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %9, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %8, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %7, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %6, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) ; CHECK-VREG: $eax = COPY %38 ; CHECK-PREG: renamable $rbx = COPY $r9 -; CHECK-PREG: MOV64mr %stack.6, 1, $noreg, 0, $noreg, $r8 :: (store 8 into %stack.6) +; CHECK-PREG: MOV64mr %stack.6, 1, $noreg, 0, $noreg, $r8 :: (store (s64) into %stack.6) ; CHECK-PREG: renamable $r15 = COPY $rcx ; CHECK-PREG: renamable $r12 = COPY $rdx ; CHECK-PREG: renamable $r14 = COPY $rsi ; CHECK-PREG: renamable $r13 = COPY $rdi -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.11, align 16) -; CHECK-PREG: MOV64mr %stack.7, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.7) -; CHECK-PREG: renamable $rbp = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.10) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.9, align 16) -; CHECK-PREG: MOV64mr %stack.11, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.11) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.8) -; CHECK-PREG: MOV64mr %stack.5, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.5) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.7, align 16) -; CHECK-PREG: MOV64mr %stack.4, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.4) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.6) -; CHECK-PREG: MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.3) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.5, align 16) -; CHECK-PREG: MOV64mr %stack.2, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.2) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.4) -; CHECK-PREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.1) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16) -; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.0) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2) -; CHECK-PREG: MOV64mr %stack.8, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.8) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1, align 16) -; CHECK-PREG: MOV64mr %stack.9, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.9) -; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0) -; CHECK-PREG: MOV64mr %stack.10, 1, $noreg, 0, $noreg, killed renamable $rax :: (store 8 into %stack.10) -; CHECK-PREG: renamable $rbp, renamable $rbx, renamable $r15, renamable $r12, renamable $r14, renamable $r13 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, 1, 8, %stack.10, 0, 1, 8, %stack.9, 0, 1, 8, %stack.8, 0, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.3, 0, 1, 8, %stack.4, 0, 1, 8, %stack.5, 0, 1, 8, %stack.11, 0, killed renamable $rbp(tied-def 0), 1, 8, %stack.7, 0, killed renamable $rbx(tied-def 1), 1, 8, %stack.6, 0, killed renamable $r15(tied-def 2), killed renamable $r12(tied-def 3), killed renamable $r14(tied-def 4), killed renamable $r13(tied-def 5), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store 8 on %stack.0), (load store 8 on %stack.1), (load store 8 on %stack.2), (load store 8 on %stack.3), (load store 8 on %stack.4), (load store 8 on %stack.5), (load store 8 on %stack.6), (load store 8 on %stack.7), (load store 8 on %stack.8), (load store 8 on %stack.9), (load store 8 on %stack.10), (load store 8 on %stack.11) -; CHECK-PREG: renamable $eax = MOV32rm killed renamable $r13, 1, $noreg, 4, $noreg :: (load 4 from %ir.gep00, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r14, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep01, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r12, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep02, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r15, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep03, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.6, 1, $noreg, 0, $noreg :: (load 8 from %stack.6) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep04, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rbx, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep05, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.7, 1, $noreg, 0, $noreg :: (load 8 from %stack.7) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep06, addrspace 1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rbp, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep07, addrspace 1) -; CHECK-PREG: renamable $rcx = MOV64rm %stack.11, 1, $noreg, 0, $noreg :: (load 8 from %stack.11) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rcx, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep08, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.5, 1, $noreg, 0, $noreg :: (load 8 from %stack.5) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep09, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.4, 1, $noreg, 0, $noreg :: (load 8 from %stack.4) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep10, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.3, 1, $noreg, 0, $noreg :: (load 8 from %stack.3) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep11, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep12, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep13, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep14, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.8, 1, $noreg, 0, $noreg :: (load 8 from %stack.8) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep15, addrspace 1) -; CHECK-PREG: renamable $rdx = MOV64rm %stack.9, 1, $noreg, 0, $noreg :: (load 8 from %stack.9) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep16, addrspace 1) -; CHECK-PREG: renamable $rcx = MOV64rm %stack.10, 1, $noreg, 0, $noreg :: (load 8 from %stack.10) -; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rcx, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep17, addrspace 1) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) +; CHECK-PREG: MOV64mr %stack.7, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.7) +; CHECK-PREG: renamable $rbp = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) +; CHECK-PREG: MOV64mr %stack.11, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.11) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) +; CHECK-PREG: MOV64mr %stack.5, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.5) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) +; CHECK-PREG: MOV64mr %stack.4, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.4) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) +; CHECK-PREG: MOV64mr %stack.3, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.3) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) +; CHECK-PREG: MOV64mr %stack.2, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.2) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) +; CHECK-PREG: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.1) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +; CHECK-PREG: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +; CHECK-PREG: MOV64mr %stack.8, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.8) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) +; CHECK-PREG: MOV64mr %stack.9, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.9) +; CHECK-PREG: renamable $rax = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) +; CHECK-PREG: MOV64mr %stack.10, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.10) +; CHECK-PREG: renamable $rbp, renamable $rbx, renamable $r15, renamable $r12, renamable $r14, renamable $r13 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, 1, 8, %stack.10, 0, 1, 8, %stack.9, 0, 1, 8, %stack.8, 0, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.3, 0, 1, 8, %stack.4, 0, 1, 8, %stack.5, 0, 1, 8, %stack.11, 0, killed renamable $rbp(tied-def 0), 1, 8, %stack.7, 0, killed renamable $rbx(tied-def 1), 1, 8, %stack.6, 0, killed renamable $r15(tied-def 2), killed renamable $r12(tied-def 3), killed renamable $r14(tied-def 4), killed renamable $r13(tied-def 5), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp :: (load store (s64) on %stack.0), (load store (s64) on %stack.1), (load store (s64) on %stack.2), (load store (s64) on %stack.3), (load store (s64) on %stack.4), (load store (s64) on %stack.5), (load store (s64) on %stack.6), (load store (s64) on %stack.7), (load store (s64) on %stack.8), (load store (s64) on %stack.9), (load store (s64) on %stack.10), (load store (s64) on %stack.11) +; CHECK-PREG: renamable $eax = MOV32rm killed renamable $r13, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r14, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r12, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $r15, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rbx, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rbp, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) +; CHECK-PREG: renamable $rcx = MOV64rm %stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %stack.11) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rcx, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %stack.5) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %stack.4) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %stack.8) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) +; CHECK-PREG: renamable $rdx = MOV64rm %stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %stack.9) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rdx, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) +; CHECK-PREG: renamable $rcx = MOV64rm %stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %stack.10) +; CHECK-PREG: renamable $eax = ADD32rm killed renamable $eax, killed renamable $rcx, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* %arg00, i32 addrspace(1)* %arg01, i32 addrspace(1)* %arg02, i32 addrspace(1)* %arg03, i32 addrspace(1)* %arg04, i32 addrspace(1)* %arg05, i32 addrspace(1)* %arg06, i32 addrspace(1)* %arg07, i32 addrspace(1)* %arg08, i32 addrspace(1)* %arg09, i32 addrspace(1)* %arg10, i32 addrspace(1)* %arg11, i32 addrspace(1)* %arg12, i32 addrspace(1)* %arg13, i32 addrspace(1)* %arg14, i32 addrspace(1)* %arg15, i32 addrspace(1)* %arg16, i32 addrspace(1)* %arg17) ] diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.mir b/llvm/test/CodeGen/X86/statepoint-vreg.mir index 933ca55eef5c5..a888fe4f04a7f 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg.mir +++ b/llvm/test/CodeGen/X86/statepoint-vreg.mir @@ -148,8 +148,8 @@ body: | ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp %2:gr64, %3:gr64 = STATEPOINT 2882400000, 0, 0, @bar, 2, 0, 2, 0, 2, 1, 2, 0, 2, 2, %1(tied-def 0), %0(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $noreg :: (load 4 from %ir.rel1, addrspace 1) - %5:gr32 = ADD32rm %4, killed %2, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.rel2, addrspace 1) + %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $noreg :: (load (s32) from %ir.rel1, addrspace 1) + %5:gr32 = ADD32rm %4, killed %2, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.rel2, addrspace 1) $eax = COPY %5 RET 0, $eax diff --git a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll index 36232af5904f9..68e87db96b2bd 100644 --- a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll +++ b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll @@ -12,13 +12,13 @@ define i8* @test1(i8** %arg1, i8* %arg2) { ; CHECK: liveins: $rdi, $rsi ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load 8 from %ir.arg1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.arg1) ; CHECK: [[SUB64rr:%[0-9]+]]:gr64 = SUB64rr [[MOV64rm]], [[COPY]], implicit-def $eflags ; CHECK: JCC_1 %bb.2, 4, implicit $eflags ; CHECK: JMP_1 %bb.1 ; CHECK: bb.1.bb100: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: MOV64mi32 [[COPY1]], 1, $noreg, 0, $noreg, 0 :: (store 8 into %ir.arg1) + ; CHECK: MOV64mi32 [[COPY1]], 1, $noreg, 0, $noreg, 0 :: (store (s64) into %ir.arg1) ; CHECK: JMP_1 %bb.3 ; CHECK: bb.2.bb106: ; CHECK: successors: %bb.3(0x80000000) diff --git a/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir b/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir index 1fbea33cbcbc4..97c2a2914f827 100644 --- a/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir +++ b/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir @@ -14,12 +14,12 @@ body: | ; CHECK: JCC_1 %bb.1, 5, implicit $eflags ; CHECK: bb.2: ; CHECK: successors: %bb.1(0x30000000), %bb.3(0x50000000) - ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load (s64)) ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags ; CHECK: JCC_1 %bb.1, 4, implicit $eflags ; CHECK: bb.3: ; CHECK: successors: %bb.6(0x30000000), %bb.4(0x50000000) - ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8) + ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load (s64)) ; CHECK: JCC_1 %bb.6, 4, implicit $eflags ; CHECK: bb.4: ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags @@ -27,16 +27,16 @@ body: | ; CHECK: RETQ $eax ; CHECK: bb.6: ; CHECK: successors: %bb.1(0x30000000), %bb.7(0x50000000) - ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load (s64)) ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags ; CHECK: JCC_1 %bb.1, 4, implicit $eflags ; CHECK: bb.7 (align 16): ; CHECK: successors: %bb.8(0x71555555), %bb.4(0x0eaaaaab) - ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8), (load 8) + ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load (s64)), (load (s64)) ; CHECK: JCC_1 %bb.4, 5, implicit $eflags ; CHECK: bb.8: ; CHECK: successors: %bb.1(0x04000000), %bb.7(0x7c000000) - ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load (s64)) ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags ; CHECK: JCC_1 %bb.7, 5, implicit $eflags ; CHECK: bb.1: @@ -57,7 +57,7 @@ body: | bb.7: successors: %bb.8(0x30000000), %bb.9(0x50000000) - $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load (s64)) TEST64rr $rax, $rax, implicit-def $eflags JCC_1 %bb.9, 5, implicit killed $eflags @@ -70,13 +70,13 @@ body: | bb.9: successors: %bb.10(0x30000000), %bb.15(0x50000000) - CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8) + CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load (s64)) JCC_1 %bb.15, 5, implicit $eflags bb.10: successors: %bb.11(0x30000000), %bb.12(0x50000000) - $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load (s64)) TEST64rr $rax, $rax, implicit-def $eflags JCC_1 %bb.12, 5, implicit $eflags @@ -89,13 +89,13 @@ body: | bb.12: successors: %bb.13(0x71555555), %bb.15(0x0eaaaaab) - CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8), (load 8) + CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load (s64)), (load (s64)) JCC_1 %bb.15, 5, implicit $eflags bb.13: successors: %bb.14(0x04000000), %bb.12(0x7c000000) - $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load (s64)) TEST64rr $rax, $rax, implicit-def $eflags JCC_1 %bb.12, 5, implicit $eflags diff --git a/llvm/test/CodeGen/X86/taildup-callsiteinfo.mir b/llvm/test/CodeGen/X86/taildup-callsiteinfo.mir index 9a1f23a9a4989..266dcc0bd0483 100644 --- a/llvm/test/CodeGen/X86/taildup-callsiteinfo.mir +++ b/llvm/test/CodeGen/X86/taildup-callsiteinfo.mir @@ -62,7 +62,7 @@ body: | successors: %bb.3(0x80000000) liveins: $rcx - renamable $ecx = MOV32rm killed renamable $rcx, 1, $noreg, 0, $noreg :: (load 4 from %ir.size_ptr) + renamable $ecx = MOV32rm killed renamable $rcx, 1, $noreg, 0, $noreg :: (load (s32) from %ir.size_ptr) bb.3.cond.end: liveins: $ecx diff --git a/llvm/test/CodeGen/X86/topdepthreduce-postra.mir b/llvm/test/CodeGen/X86/topdepthreduce-postra.mir index 7ca826d582b5c..d713b340ccf1b 100644 --- a/llvm/test/CodeGen/X86/topdepthreduce-postra.mir +++ b/llvm/test/CodeGen/X86/topdepthreduce-postra.mir @@ -8,9 +8,9 @@ body: | bb.0: ; CHECK-LABEL: name: test ; CHECK: $eax = MOV32rr killed $edi - ; CHECK: MOV8mi killed renamable $rsi, 1, $noreg, 0, $noreg, 1 :: (store 1) + ; CHECK: MOV8mi killed renamable $rsi, 1, $noreg, 0, $noreg, 1 :: (store (s8)) ; CHECK: renamable $eax = DEC32r killed renamable $eax, implicit-def $eflags $eax = MOV32rr $edi renamable $eax = DEC32r killed renamable $eax, implicit-def $eflags - MOV8mi killed renamable $rsi, 1, $noreg, 0, $noreg, 1 :: (store 1) + MOV8mi killed renamable $rsi, 1, $noreg, 0, $noreg, 1 :: (store (s8)) ... diff --git a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir index d51aff6670d07..aea7170ee8953 100644 --- a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir +++ b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir @@ -87,7 +87,7 @@ body: | ; CHECK: renamable $eax = MOV32r0 implicit-def dead $eflags ; CHECK: renamable $rcx = MOV64ri32 -4096 ; CHECK: [[MOV64ri32_:%[0-9]+]]:gr64 = MOV64ri32 -4096 - ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, @y, $noreg :: (dereferenceable load 8 from @y, !tbaa !3) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, @y, $noreg :: (dereferenceable load (s64) from @y, !tbaa !3) ; CHECK: JMP_1 %bb.2 ; CHECK: bb.1 (%ir-block.4): ; CHECK: RET 0 @@ -96,7 +96,7 @@ body: | ; CHECK: liveins: $eax, $rcx ; CHECK: CMP64ri32 [[MOV64rm]], @x, implicit-def $eflags ; CHECK: renamable $al = SETCCr 4, implicit killed $eflags, implicit killed $eax, implicit-def $eax - ; CHECK: MOV32mr renamable $rcx, 1, $noreg, @z + 4096, $noreg, renamable $eax :: (store 4 into %ir.scevgep, !tbaa !7) + ; CHECK: MOV32mr renamable $rcx, 1, $noreg, @z + 4096, $noreg, renamable $eax :: (store (s32) into %ir.scevgep, !tbaa !7) ; CHECK: renamable $rcx = ADD64ri8 killed renamable $rcx, 4, implicit-def $eflags ; CHECK: JCC_1 %bb.1, 4, implicit killed $eflags ; CHECK: JMP_1 %bb.2 @@ -111,9 +111,9 @@ body: | successors: %bb.1(0x04000000), %bb.2(0x7c000000) liveins: $eax, $rcx %2:gr64 = MOV64ri32 -4096 - CMP64mi32 $rip, 1, $noreg, @y, $noreg, @x, implicit-def $eflags :: (dereferenceable load 8 from @y, !tbaa !3) + CMP64mi32 $rip, 1, $noreg, @y, $noreg, @x, implicit-def $eflags :: (dereferenceable load (s64) from @y, !tbaa !3) renamable $al = SETCCr 4, implicit killed $eflags, implicit killed $eax, implicit-def $eax - MOV32mr renamable $rcx, 1, $noreg, @z + 4096, $noreg, renamable $eax :: (store 4 into %ir.scevgep, !tbaa !7) + MOV32mr renamable $rcx, 1, $noreg, @z + 4096, $noreg, renamable $eax :: (store (s32) into %ir.scevgep, !tbaa !7) renamable $rcx = ADD64ri8 killed renamable $rcx, 4, implicit-def $eflags JCC_1 %bb.1, 4, implicit killed $eflags JMP_1 %bb.2 diff --git a/llvm/test/CodeGen/X86/vecloadextract.ll b/llvm/test/CodeGen/X86/vecloadextract.ll index 84ef9f59dd408..54a95cf43802c 100755 --- a/llvm/test/CodeGen/X86/vecloadextract.ll +++ b/llvm/test/CodeGen/X86/vecloadextract.ll @@ -7,8 +7,8 @@ ; CHECK: name: const_index ; CHECK: bb.0 (%ir-block.0): -; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) -; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 1, $noreg, 4, $noreg :: (load 4 from %ir.v + 4) +; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0) +; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 1, $noreg, 4, $noreg :: (load (s32) from %ir.v + 4) ; CHECK: $eax = COPY [[LOAD]] ; CHECK: RET 0, $eax define i32 @const_index(<8 x i32>* %v) { @@ -19,10 +19,10 @@ define i32 @const_index(<8 x i32>* %v) { ; CHECK: name: variable_index ; CHECK: bb.0 (%ir-block.0): -; CHECK: [[INDEX:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) +; CHECK: [[INDEX:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0) ; CHECK: [[MASKED_INDEX:%[0-9]+]]:gr32_nosp = AND32ri8 [[INDEX]], 7, implicit-def dead $eflags -; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) -; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[MASKED_INDEX]], 0, $noreg :: (load 4) +; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1) +; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[MASKED_INDEX]], 0, $noreg :: (load (s32)) ; CHECK: $eax = COPY [[LOAD]] ; CHECK: RET 0, $eax define i32 @variable_index(<8 x i32>* %v, i32 %i) { @@ -33,10 +33,10 @@ define i32 @variable_index(<8 x i32>* %v, i32 %i) { ; CHECK: name: variable_index_with_addrspace ; CHECK: bb.0 (%ir-block.0): -; CHECK: [[INDEX:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) +; CHECK: [[INDEX:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0) ; CHECK: [[MASKED_INDEX:%[0-9]+]]:gr32_nosp = AND32ri8 [[INDEX]], 7, implicit-def dead $eflags -; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) -; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[MASKED_INDEX]], 0, $noreg :: (load 4, addrspace 1) +; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1) +; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[MASKED_INDEX]], 0, $noreg :: (load (s32), addrspace 1) ; CHECK: $eax = COPY [[LOAD]] ; CHECK: RET 0, $eax define i32 @variable_index_with_addrspace(<8 x i32> addrspace(1)* %v, i32 %i) { diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll index 4dbb6d888febb..9c80720ae921a 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics-flags.ll @@ -2,8 +2,8 @@ define <1 x float> @constrained_vector_fadd_v1f32() #0 { ; CHECK-LABEL: name: constrained_vector_fadd_v1f32 -; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) -; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool) +; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) +; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load (s32) from constant-pool) ; CHECK: $xmm0 = COPY [[ADDSSrm]] ; CHECK: RET 0, $xmm0 entry: @@ -14,10 +14,10 @@ entry: define <3 x float> @constrained_vector_fadd_v3f32() #0 { ; CHECK-LABEL: name: constrained_vector_fadd_v3f32 ; CHECK: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS -; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) +; CHECK: [[MOVSSrm_alt:%[0-9]+]]:fr32 = MOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK: [[ADDSSrr:%[0-9]+]]:fr32 = ADDSSrr [[MOVSSrm_alt]], killed [[FsFLD0SS]], implicit $mxcsr -; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 4 from constant-pool) -; CHECK: [[ADDSSrm1:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 4 from constant-pool) +; CHECK: [[ADDSSrm:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load (s32) from constant-pool) +; CHECK: [[ADDSSrm1:%[0-9]+]]:fr32 = ADDSSrm [[MOVSSrm_alt]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load (s32) from constant-pool) ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY [[ADDSSrm1]] ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[ADDSSrm]] ; CHECK: [[UNPCKLPSrr:%[0-9]+]]:vr128 = UNPCKLPSrr [[COPY1]], killed [[COPY]] @@ -37,9 +37,9 @@ entry: define <4 x double> @constrained_vector_fadd_v4f64() #0 { ; CHECK-LABEL: name: constrained_vector_fadd_v4f64 -; CHECK: [[MOVAPDrm:%[0-9]+]]:vr128 = MOVAPDrm $rip, 1, $noreg, %const.0, $noreg :: (load 16 from constant-pool) -; CHECK: [[ADDPDrm:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load 16 from constant-pool) -; CHECK: [[ADDPDrm1:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load 16 from constant-pool) +; CHECK: [[MOVAPDrm:%[0-9]+]]:vr128 = MOVAPDrm $rip, 1, $noreg, %const.0, $noreg :: (load (s128) from constant-pool) +; CHECK: [[ADDPDrm:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.1, $noreg, implicit $mxcsr :: (load (s128) from constant-pool) +; CHECK: [[ADDPDrm1:%[0-9]+]]:vr128 = ADDPDrm [[MOVAPDrm]], $rip, 1, $noreg, %const.2, $noreg, implicit $mxcsr :: (load (s128) from constant-pool) ; CHECK: $xmm0 = COPY [[ADDPDrm1]] ; CHECK: $xmm1 = COPY [[ADDPDrm]] ; CHECK: RET 0, $xmm0, $xmm1 diff --git a/llvm/test/CodeGen/X86/vmaskmov-offset.ll b/llvm/test/CodeGen/X86/vmaskmov-offset.ll index a67dcce037508..581dbfc1921c8 100644 --- a/llvm/test/CodeGen/X86/vmaskmov-offset.ll +++ b/llvm/test/CodeGen/X86/vmaskmov-offset.ll @@ -13,10 +13,10 @@ define void @test_v16f(<16 x i32> %x) { ; CHECK: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0 ; CHECK: [[VPCMPEQDYrr:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY]], [[AVX_SET0_]] ; CHECK: [[VPCMPEQDYrr1:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY1]], [[AVX_SET0_]] - ; CHECK: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4) - ; CHECK: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4) - ; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4) - ; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store 32 into %ir.stack_output_vec, align 4) + ; CHECK: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load (s256) from %ir.stack_input_vec, align 4) + ; CHECK: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load (s256) from %ir.stack_input_vec + 32, align 4) + ; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store (s256) into %ir.stack_output_vec + 32, align 4) + ; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store (s256) into %ir.stack_output_vec, align 4) ; CHECK: RET 0 bb: %stack_input_vec = alloca <16 x float>, align 64 @@ -39,10 +39,10 @@ define void @test_v8d(<8 x i64> %x) { ; CHECK: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0 ; CHECK: [[VPCMPEQQYrr:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY]], [[AVX_SET0_]] ; CHECK: [[VPCMPEQQYrr1:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY1]], [[AVX_SET0_]] - ; CHECK: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4) - ; CHECK: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4) - ; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4) - ; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store 32 into %ir.stack_output_vec, align 4) + ; CHECK: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load (s256) from %ir.stack_input_vec, align 4) + ; CHECK: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load (s256) from %ir.stack_input_vec + 32, align 4) + ; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store (s256) into %ir.stack_output_vec + 32, align 4) + ; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store (s256) into %ir.stack_output_vec, align 4) ; CHECK: RET 0 bb: %stack_input_vec = alloca <8 x double>, align 64 @@ -59,7 +59,7 @@ define <2 x double> @mload_constmask_v2f64(<2 x double>* %addr, <2 x double> %ds ; CHECK: liveins: $rdi, $xmm0 ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load 8 from %ir.addr + 8, align 4) + ; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load (s64) from %ir.addr + 8, align 4) ; CHECK: $xmm0 = COPY [[VMOVHPDrm]] ; CHECK: RET 0, $xmm0 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> , <2 x double> %dst) @@ -72,7 +72,7 @@ define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) { ; CHECK: liveins: $rdi, $xmm0 ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store 4 into %ir.addr + 8) + ; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store (s32) into %ir.addr + 8) ; CHECK: RET 0 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1>) ret void diff --git a/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir b/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir index df63ba4308c8e..d7e75db5ebbce 100644 --- a/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir +++ b/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir @@ -149,7 +149,7 @@ body: | $rbp = LEA64r $rsp, 1, $noreg, 48, $noreg frame-setup SEH_SetFrame 50, 48 frame-setup SEH_EndPrologue - MOV64mi32 $rbp, 1, $noreg, -8, $noreg, -2 :: (store 8 into %fixed-stack.0) + MOV64mi32 $rbp, 1, $noreg, -8, $noreg, -2 :: (store (s64) into %fixed-stack.0) TEST8rr killed renamable $cl, renamable $cl, implicit-def $eflags JCC_1 %bb.1, 5, implicit $eflags @@ -164,12 +164,12 @@ body: | successors: %bb.6(0x00000001), %bb.8(0x7fffffff) liveins: $r8b - MOV32mi $rbp, 1, $noreg, -12, $noreg, 0 :: (store 4 into %stack.0) + MOV32mi $rbp, 1, $noreg, -12, $noreg, 0 :: (store (s32) into %stack.0) TEST8rr killed renamable $r8b, renamable $r8b, implicit-def $eflags JCC_1 %bb.6, 5, implicit $eflags bb.8.return (address-taken): - $eax = MOV32rm $rbp, 1, $noreg, -12, $noreg :: (load 4 from %stack.0) + $eax = MOV32rm $rbp, 1, $noreg, -12, $noreg :: (load (s32) from %stack.0) SEH_Epilogue $rsp = frame-destroy ADD64ri8 $rsp, 48, implicit-def dead $eflags $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp @@ -213,7 +213,7 @@ body: | frame-setup SEH_StackAlloc 32 $rbp = LEA64r $rdx, 1, $noreg, 48, $noreg frame-setup SEH_EndPrologue - MOV32mi $rbp, 1, $noreg, -12, $noreg, 1 :: (store 4 into %stack.0) + MOV32mi $rbp, 1, $noreg, -12, $noreg, 1 :: (store (s32) into %stack.0) $rax = LEA64r $rip, 0, $noreg, %bb.8, $noreg SEH_Epilogue $rsp = frame-destroy ADD64ri8 $rsp, 32, implicit-def dead $eflags diff --git a/llvm/test/CodeGen/X86/x87-reg-usage.mir b/llvm/test/CodeGen/X86/x87-reg-usage.mir index 6f4ff700db783..bf4f99bc1b304 100644 --- a/llvm/test/CodeGen/X86/x87-reg-usage.mir +++ b/llvm/test/CodeGen/X86/x87-reg-usage.mir @@ -77,13 +77,13 @@ body: | ; CHECK-LABEL: name: f1 ; CHECK: liveins: $rdi, $rsi - ; CHECK: renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.a) - ; CHECK: renamable $fp0 = SUB_Fp32m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.b) - ; CHECK: ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %ir.a) + ; CHECK: renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.a) + ; CHECK: renamable $fp0 = SUB_Fp32m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.b) + ; CHECK: ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.a) ; CHECK: RET 0 - renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.a) - renamable $fp0 = SUB_Fp32m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.b) - ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %ir.a) + renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.a) + renamable $fp0 = SUB_Fp32m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.b) + ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.a) RET 0 ... @@ -103,13 +103,13 @@ body: | ; CHECK-LABEL: name: f2 ; CHECK: liveins: $rdi, $rsi - ; CHECK: renamable $fp0 = LD_Fp64m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %ir.a) - ; CHECK: renamable $fp0 = ADD_Fp64m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %ir.b) - ; CHECK: ST_Fp64m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 8 into %ir.a) + ; CHECK: renamable $fp0 = LD_Fp64m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %ir.a) + ; CHECK: renamable $fp0 = ADD_Fp64m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %ir.b) + ; CHECK: ST_Fp64m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s64) into %ir.a) ; CHECK: RET 0 - renamable $fp0 = LD_Fp64m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %ir.a) - renamable $fp0 = ADD_Fp64m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %ir.b) - ST_Fp64m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 8 into %ir.a) + renamable $fp0 = LD_Fp64m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %ir.a) + renamable $fp0 = ADD_Fp64m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %ir.b) + ST_Fp64m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s64) into %ir.a) RET 0 ... @@ -129,15 +129,15 @@ body: | ; CHECK-LABEL: name: f3 ; CHECK: liveins: $rdi, $rsi - ; CHECK: renamable $fp0 = LD_Fp80m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 10 from %ir.a, align 16) - ; CHECK: renamable $fp1 = LD_Fp80m killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 10 from %ir.b, align 16) + ; CHECK: renamable $fp0 = LD_Fp80m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s80) from %ir.a, align 16) + ; CHECK: renamable $fp1 = LD_Fp80m killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s80) from %ir.b, align 16) ; CHECK: renamable $fp0 = MUL_Fp80 killed renamable $fp0, killed renamable $fp1, implicit-def dead $fpsw, implicit $fpcw - ; CHECK: ST_FpP80m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into %ir.a, align 16) + ; CHECK: ST_FpP80m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s80) into %ir.a, align 16) ; CHECK: RET 0 - renamable $fp0 = LD_Fp80m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 10 from %ir.a, align 16) - renamable $fp1 = LD_Fp80m killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 10 from %ir.b, align 16) + renamable $fp0 = LD_Fp80m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s80) from %ir.a, align 16) + renamable $fp1 = LD_Fp80m killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s80) from %ir.b, align 16) renamable $fp0 = MUL_Fp80 killed renamable $fp0, killed renamable $fp1, implicit-def dead $fpsw, implicit $fpcw - ST_FpP80m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 10 into %ir.a, align 16) + ST_FpP80m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s80) into %ir.a, align 16) RET 0 ... @@ -157,13 +157,13 @@ body: | ; CHECK-LABEL: name: f4 ; CHECK: liveins: $rdi, $rsi - ; CHECK: renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.a) - ; CHECK: renamable $fp0 = DIV_Fp32m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.b) - ; CHECK: ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %ir.a) + ; CHECK: renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.a) + ; CHECK: renamable $fp0 = DIV_Fp32m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.b) + ; CHECK: ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.a) ; CHECK: RET 0 - renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.a) - renamable $fp0 = DIV_Fp32m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.b) - ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %ir.a) + renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.a) + renamable $fp0 = DIV_Fp32m killed renamable $fp0, killed renamable $rsi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.b) + ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.a) RET 0 ... @@ -183,11 +183,11 @@ body: | ; CHECK-LABEL: name: f5 ; CHECK: liveins: $rdi, $rsi - ; CHECK: renamable $fp0 = LD_Fp32m64 killed renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.val) - ; CHECK: ST_Fp64m killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 8 into %ir.ret) + ; CHECK: renamable $fp0 = LD_Fp32m64 killed renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.val) + ; CHECK: ST_Fp64m killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s64) into %ir.ret) ; CHECK: RET 0 - renamable $fp0 = LD_Fp32m64 killed renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.val) - ST_Fp64m killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 8 into %ir.ret) + renamable $fp0 = LD_Fp32m64 killed renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.val) + ST_Fp64m killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s64) into %ir.ret) RET 0 ... @@ -209,15 +209,15 @@ body: | ; CHECK-LABEL: name: f6 ; CHECK: liveins: $rdi, $rsi - ; CHECK: renamable $fp0 = LD_Fp64m killed renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %ir.val) - ; CHECK: ST_Fp64m32 %stack.0, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %stack.0) - ; CHECK: renamable $fp0 = LD_Fp32m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %stack.0) - ; CHECK: ST_Fp32m killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %ir.ret) + ; CHECK: renamable $fp0 = LD_Fp64m killed renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %ir.val) + ; CHECK: ST_Fp64m32 %stack.0, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %stack.0) + ; CHECK: renamable $fp0 = LD_Fp32m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %stack.0) + ; CHECK: ST_Fp32m killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.ret) ; CHECK: RET 0 - renamable $fp0 = LD_Fp64m killed renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %ir.val) - ST_Fp64m32 %stack.0, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %stack.0) - renamable $fp0 = LD_Fp32m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %stack.0) - ST_Fp32m killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %ir.ret) + renamable $fp0 = LD_Fp64m killed renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s64) from %ir.val) + ST_Fp64m32 %stack.0, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %stack.0) + renamable $fp0 = LD_Fp32m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %stack.0) + ST_Fp32m killed renamable $rsi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.ret) RET 0 ... @@ -236,13 +236,13 @@ body: | ; CHECK-LABEL: name: f7 ; CHECK: liveins: $rdi - ; CHECK: renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.a) + ; CHECK: renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.a) ; CHECK: renamable $fp0 = SQRT_Fp32 killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw - ; CHECK: ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %ir.a) + ; CHECK: ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.a) ; CHECK: RET 0 - renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 4 from %ir.a) + renamable $fp0 = LD_Fp32m renamable $rdi, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.a) renamable $fp0 = SQRT_Fp32 killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw - ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store 4 into %ir.a) + ST_Fp32m killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $fp0, implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.a) RET 0 ... diff --git a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir index d1e7ff2102a39..d5f4ead7166dd 100644 --- a/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir +++ b/llvm/test/DebugInfo/AArch64/asan-stack-vars.mir @@ -37,7 +37,7 @@ source_filename = "m.m" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios" - + %0 = type opaque %struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* } %struct._objc_cache = type opaque @@ -51,7 +51,7 @@ %struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] } %struct._prop_t = type { i8*, i8* } %struct.CGSize = type { double, double } - + @"OBJC_CLASS_$_Object" = external global %struct._class_t @"OBJC_CLASSLIST_REFERENCES_$_" = private global %struct._class_t* @"OBJC_CLASS_$_Object", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8 @OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"alloc\00", section "__TEXT,__objc_methname,cstring_literals", align 1 @@ -75,7 +75,7 @@ @__asan_shadow_memory_dynamic_address = external global i64 @___asan_gen_ = private unnamed_addr constant [34 x i8] c"2 32 16 9 imageSize 64 8 6 object\00", align 1 @__stack_chk_guard = external global i8* - + ; Function Attrs: noinline sanitize_address ssp uwtable define internal i8* @"\01+[MyObject doWithSize:]"(i8* %self, i8* %_cmd, [2 x double] %imageSize.coerce) #0 !dbg !14 { entry: @@ -120,12 +120,12 @@ %25 = load i16, i16* %24 %26 = icmp ne i16 %25, 0 br i1 %26, label %27, label %28 - + ;
")) { MainJD.addGenerator( cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess( @@ -82,36 +82,36 @@ class KaleidoscopeJIT { ~KaleidoscopeJIT() { if (auto Err = ES->endSession()) ES->reportError(std::move(Err)); - if (auto Err = TPCIU->cleanup()) + if (auto Err = EPCIU->cleanup()) ES->reportError(std::move(Err)); } static Expected> Create() { auto SSP = std::make_shared(); - auto TPC = SelfTargetProcessControl::Create(SSP); - if (!TPC) - return TPC.takeError(); + auto EPC = SelfExecutorProcessControl::Create(SSP); + if (!EPC) + return EPC.takeError(); auto ES = std::make_unique(std::move(SSP)); - auto TPCIU = TPCIndirectionUtils::Create(**TPC); - if (!TPCIU) - return TPCIU.takeError(); + auto EPCIU = EPCIndirectionUtils::Create(**EPC); + if (!EPCIU) + return EPCIU.takeError(); - (*TPCIU)->createLazyCallThroughManager( + (*EPCIU)->createLazyCallThroughManager( *ES, pointerToJITTargetAddress(&handleLazyCallThroughError)); - if (auto Err = setUpInProcessLCTMReentryViaTPCIU(**TPCIU)) + if (auto Err = setUpInProcessLCTMReentryViaEPCIU(**EPCIU)) return std::move(Err); - JITTargetMachineBuilder JTMB((*TPC)->getTargetTriple()); + JITTargetMachineBuilder JTMB((*EPC)->getTargetTriple()); auto DL = JTMB.getDefaultDataLayoutForTarget(); if (!DL) return DL.takeError(); - return std::make_unique(std::move(*TPC), std::move(ES), - std::move(*TPCIU), std::move(JTMB), + return std::make_unique(std::move(*EPC), std::move(ES), + std::move(*EPCIU), std::move(JTMB), std::move(*DL)); } diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h index e22dc5512727e..6b2f6a543bbf8 100644 --- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h +++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h @@ -18,13 +18,13 @@ #include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" -#include "llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" @@ -126,9 +126,9 @@ void KaleidoscopeASTMaterializationUnit::materialize( class KaleidoscopeJIT { private: - std::unique_ptr TPC; + std::unique_ptr EPC; std::unique_ptr ES; - std::unique_ptr TPCIU; + std::unique_ptr EPCIU; DataLayout DL; MangleAndInterner Mangle; @@ -146,11 +146,11 @@ class KaleidoscopeJIT { } public: - KaleidoscopeJIT(std::unique_ptr TPC, + KaleidoscopeJIT(std::unique_ptr EPC, std::unique_ptr ES, - std::unique_ptr TPCIU, + std::unique_ptr EPCIU, JITTargetMachineBuilder JTMB, DataLayout DL) - : TPC(std::move(TPC)), ES(std::move(ES)), TPCIU(std::move(TPCIU)), + : EPC(std::move(EPC)), ES(std::move(ES)), EPCIU(std::move(EPCIU)), DL(std::move(DL)), Mangle(*this->ES, this->DL), ObjectLayer(*this->ES, []() { return std::make_unique(); }), @@ -167,36 +167,36 @@ class KaleidoscopeJIT { ~KaleidoscopeJIT() { if (auto Err = ES->endSession()) ES->reportError(std::move(Err)); - if (auto Err = TPCIU->cleanup()) + if (auto Err = EPCIU->cleanup()) ES->reportError(std::move(Err)); } static Expected> Create() { auto SSP = std::make_shared(); - auto TPC = SelfTargetProcessControl::Create(SSP); - if (!TPC) - return TPC.takeError(); + auto EPC = SelfExecutorProcessControl::Create(SSP); + if (!EPC) + return EPC.takeError(); auto ES = std::make_unique(std::move(SSP)); - auto TPCIU = TPCIndirectionUtils::Create(**TPC); - if (!TPCIU) - return TPCIU.takeError(); + auto EPCIU = EPCIndirectionUtils::Create(**EPC); + if (!EPCIU) + return EPCIU.takeError(); - (*TPCIU)->createLazyCallThroughManager( + (*EPCIU)->createLazyCallThroughManager( *ES, pointerToJITTargetAddress(&handleLazyCallThroughError)); - if (auto Err = setUpInProcessLCTMReentryViaTPCIU(**TPCIU)) + if (auto Err = setUpInProcessLCTMReentryViaEPCIU(**EPCIU)) return std::move(Err); - JITTargetMachineBuilder JTMB((*TPC)->getTargetTriple()); + JITTargetMachineBuilder JTMB((*EPC)->getTargetTriple()); auto DL = JTMB.getDefaultDataLayoutForTarget(); if (!DL) return DL.takeError(); - return std::make_unique(std::move(*TPC), std::move(ES), - std::move(*TPCIU), std::move(JTMB), + return std::make_unique(std::move(*EPC), std::move(ES), + std::move(*EPCIU), std::move(JTMB), std::move(*DL)); } diff --git a/llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h index eb2d21c50b9a0..783c352cf057c 100644 --- a/llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h +++ b/llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h @@ -18,10 +18,10 @@ #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" @@ -32,7 +32,7 @@ namespace orc { class KaleidoscopeJIT { private: - std::unique_ptr TPC; + std::unique_ptr EPC; std::unique_ptr ES; DataLayout DL; @@ -44,10 +44,10 @@ class KaleidoscopeJIT { JITDylib &MainJD; public: - KaleidoscopeJIT(std::unique_ptr TPC, + KaleidoscopeJIT(std::unique_ptr EPC, std::unique_ptr ES, JITTargetMachineBuilder JTMB, DataLayout DL) - : TPC(std::move(TPC)), ES(std::move(ES)), DL(std::move(DL)), + : EPC(std::move(EPC)), ES(std::move(ES)), DL(std::move(DL)), Mangle(*this->ES, this->DL), ObjectLayer(*this->ES, []() { return std::make_unique(); }), @@ -66,19 +66,19 @@ class KaleidoscopeJIT { static Expected> Create() { auto SSP = std::make_shared(); - auto TPC = SelfTargetProcessControl::Create(SSP); - if (!TPC) - return TPC.takeError(); + auto EPC = SelfExecutorProcessControl::Create(SSP); + if (!EPC) + return EPC.takeError(); auto ES = std::make_unique(std::move(SSP)); - JITTargetMachineBuilder JTMB((*TPC)->getTargetTriple()); + JITTargetMachineBuilder JTMB((*EPC)->getTargetTriple()); auto DL = JTMB.getDefaultDataLayoutForTarget(); if (!DL) return DL.takeError(); - return std::make_unique(std::move(*TPC), std::move(ES), + return std::make_unique(std::move(*EPC), std::move(ES), std::move(JTMB), std::move(*DL)); } diff --git a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.cpp b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.cpp index abce14de0fe04..8e4a283556617 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.cpp @@ -9,10 +9,10 @@ #include "RemoteJITUtils.h" #include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h" -#include "llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h" +#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" +#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" +#include "llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" -#include "llvm/ExecutionEngine/Orc/TPCDebugObjectRegistrar.h" -#include "llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -31,25 +31,25 @@ using namespace llvm::orc; namespace llvm { namespace orc { -class RemoteTargetProcessControl - : public OrcRPCTargetProcessControlBase< +class RemoteExecutorProcessControl + : public OrcRPCExecutorProcessControlBase< shared::MultiThreadedRPCEndpoint> { public: using RPCChannel = JITLinkExecutor::RPCChannel; using RPCEndpoint = shared::MultiThreadedRPCEndpoint; private: - using ThisT = RemoteTargetProcessControl; - using BaseT = OrcRPCTargetProcessControlBase; - using MemoryAccess = OrcRPCTPCMemoryAccess; - using MemoryManager = OrcRPCTPCJITLinkMemoryManager; + using ThisT = RemoteExecutorProcessControl; + using BaseT = OrcRPCExecutorProcessControlBase; + using MemoryAccess = OrcRPCEPCMemoryAccess; + using MemoryManager = OrcRPCEPCJITLinkMemoryManager; public: - using BaseT::initializeORCRPCTPCBase; + using BaseT::initializeORCRPCEPCBase; - RemoteTargetProcessControl(ExecutionSession &ES, - std::unique_ptr Channel, - std::unique_ptr Endpoint); + RemoteExecutorProcessControl(ExecutionSession &ES, + std::unique_ptr Channel, + std::unique_ptr Endpoint); void initializeMemoryManagement(); Error disconnect() override; @@ -63,7 +63,7 @@ class RemoteTargetProcessControl std::thread ListenerThread; }; -RemoteTargetProcessControl::RemoteTargetProcessControl( +RemoteExecutorProcessControl::RemoteExecutorProcessControl( ExecutionSession &ES, std::unique_ptr Channel, std::unique_ptr Endpoint) : BaseT(ES.getSymbolStringPool(), *Endpoint, @@ -80,7 +80,7 @@ RemoteTargetProcessControl::RemoteTargetProcessControl( }); } -void RemoteTargetProcessControl::initializeMemoryManagement() { +void RemoteExecutorProcessControl::initializeMemoryManagement() { OwnedMemAccess = std::make_unique(*this); OwnedMemMgr = std::make_unique(*this); @@ -89,7 +89,7 @@ void RemoteTargetProcessControl::initializeMemoryManagement() { MemMgr = OwnedMemMgr.get(); } -Error RemoteTargetProcessControl::disconnect() { +Error RemoteExecutorProcessControl::disconnect() { std::promise P; auto F = P.get_future(); auto Err = closeConnection([&](Error Err) -> Error { @@ -109,11 +109,11 @@ JITLinkExecutor::~JITLinkExecutor() = default; Expected> JITLinkExecutor::operator()(ExecutionSession &ES, const Triple &TT) { - return std::make_unique(ES, TPC->getMemMgr()); + return std::make_unique(ES, EPC->getMemMgr()); } Error JITLinkExecutor::addDebugSupport(ObjectLayer &ObjLayer) { - auto Registrar = createJITLoaderGDBRegistrar(*TPC); + auto Registrar = createJITLoaderGDBRegistrar(*EPC); if (!Registrar) return Registrar.takeError(); @@ -126,18 +126,18 @@ Error JITLinkExecutor::addDebugSupport(ObjectLayer &ObjLayer) { Expected> JITLinkExecutor::loadDylib(StringRef RemotePath) { - if (auto Handle = TPC->loadDylib(RemotePath.data())) - return std::make_unique(*TPC, *Handle); + if (auto Handle = EPC->loadDylib(RemotePath.data())) + return std::make_unique(*EPC, *Handle); else return Handle.takeError(); } Expected JITLinkExecutor::runAsMain(JITEvaluatedSymbol MainSym, ArrayRef Args) { - return TPC->runAsMain(MainSym.getAddress(), Args); + return EPC->runAsMain(MainSym.getAddress(), Args); } -Error JITLinkExecutor::disconnect() { return TPC->disconnect(); } +Error JITLinkExecutor::disconnect() { return EPC->disconnect(); } static std::string defaultPath(const char *HostArgv0, StringRef ExecutorName) { // This just needs to be some symbol in the binary; C++ doesn't @@ -173,8 +173,8 @@ JITLinkExecutor::CreateLocal(std::string ExecutablePath) { } TCPSocketJITLinkExecutor::TCPSocketJITLinkExecutor( - std::unique_ptr TPC) { - this->TPC = std::move(TPC); + std::unique_ptr EPC) { + this->EPC = std::move(EPC); } #ifndef LLVM_ON_UNIX @@ -249,16 +249,16 @@ Error ChildProcessJITLinkExecutor::launch(ExecutionSession &ES) { auto Channel = std::make_unique(FromExecutor[ReadEnd], ToExecutor[WriteEnd]); - auto Endpoint = - std::make_unique(*Channel, true); + auto Endpoint = std::make_unique( + *Channel, true); - TPC = std::make_unique(ES, std::move(Channel), - std::move(Endpoint)); + EPC = std::make_unique(ES, std::move(Channel), + std::move(Endpoint)); - if (auto Err = TPC->initializeORCRPCTPCBase()) - return joinErrors(std::move(Err), TPC->disconnect()); + if (auto Err = EPC->initializeORCRPCEPCBase()) + return joinErrors(std::move(Err), EPC->disconnect()); - TPC->initializeMemoryManagement(); + EPC->initializeMemoryManagement(); shared::registerStringError(); return Error::success(); @@ -328,20 +328,20 @@ JITLinkExecutor::ConnectTCPSocket(StringRef NetworkAddress, return CreateErr(toString(SockFD.takeError())); auto Channel = std::make_unique(*SockFD, *SockFD); - auto Endpoint = - std::make_unique(*Channel, true); + auto Endpoint = std::make_unique( + *Channel, true); - auto TPC = std::make_unique( + auto EPC = std::make_unique( ES, std::move(Channel), std::move(Endpoint)); - if (auto Err = TPC->initializeORCRPCTPCBase()) - return joinErrors(std::move(Err), TPC->disconnect()); + if (auto Err = EPC->initializeORCRPCEPCBase()) + return joinErrors(std::move(Err), EPC->disconnect()); - TPC->initializeMemoryManagement(); + EPC->initializeMemoryManagement(); shared::registerStringError(); return std::unique_ptr( - new TCPSocketJITLinkExecutor(std::move(TPC))); + new TCPSocketJITLinkExecutor(std::move(EPC))); } #endif diff --git a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h index e629c0e036f50..baa376003a67b 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h +++ b/llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // -// Utilities for TargetProcessControl-based remote JITing with Orc and JITLink. +// Utilities for ExecutorProcessControl-based remote JITing with Orc and +// JITLink. // //===----------------------------------------------------------------------===// @@ -36,7 +37,7 @@ namespace llvm { namespace orc { class ChildProcessJITLinkExecutor; -class RemoteTargetProcessControl; +class RemoteExecutorProcessControl; class TCPSocketJITLinkExecutor; class JITLinkExecutor { @@ -73,7 +74,7 @@ class JITLinkExecutor { virtual ~JITLinkExecutor(); protected: - std::unique_ptr TPC; + std::unique_ptr EPC; JITLinkExecutor(); }; @@ -100,7 +101,7 @@ class ChildProcessJITLinkExecutor : public JITLinkExecutor { /// JITLinkExecutor connected through a TCP socket. class TCPSocketJITLinkExecutor : public JITLinkExecutor { private: - TCPSocketJITLinkExecutor(std::unique_ptr TPC); + TCPSocketJITLinkExecutor(std::unique_ptr EPC); friend class JITLinkExecutor; }; diff --git a/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp index 4e36631ef97db..953d90e4c06ef 100644 --- a/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp +++ b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp @@ -1,4 +1,4 @@ -//===--- LLJITWithLazyReexports.cpp - LLJIT example with custom laziness --===// +//===- LLJITWithExecutorProcessControl.cpp - LLJIT example with EPC utils -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,12 +22,12 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" +#include "llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/LLJIT.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" -#include "llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h" -#include "llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" @@ -105,8 +105,8 @@ static void *reenter(void *Ctx, void *TrampolineAddr) { std::promise LandingAddressP; auto LandingAddressF = LandingAddressP.get_future(); - auto *TPCIU = static_cast(Ctx); - TPCIU->getLazyCallThroughManager().resolveTrampolineLandingAddress( + auto *EPCIU = static_cast(Ctx); + EPCIU->getLazyCallThroughManager().resolveTrampolineLandingAddress( pointerToJITTargetAddress(TrampolineAddr), [&](JITTargetAddress LandingAddress) { LandingAddressP.set_value( @@ -135,8 +135,8 @@ int main(int argc, char *argv[]) { // (1) Create LLJIT instance. auto SSP = std::make_shared(); - auto TPC = ExitOnErr(SelfTargetProcessControl::Create(std::move(SSP))); - auto J = ExitOnErr(LLJITBuilder().setTargetProcessControl(*TPC).create()); + auto EPC = ExitOnErr(SelfExecutorProcessControl::Create(std::move(SSP))); + auto J = ExitOnErr(LLJITBuilder().setExecutorProcessControl(*EPC).create()); // (2) Install transform to print modules as they are compiled: J->getIRTransformLayer().setTransform( @@ -147,14 +147,14 @@ int main(int argc, char *argv[]) { }); // (3) Create stubs and call-through managers: - auto TPCIU = ExitOnErr(TPCIndirectionUtils::Create(*TPC)); - ExitOnErr(TPCIU->writeResolverBlock(pointerToJITTargetAddress(&reenter), - pointerToJITTargetAddress(TPCIU.get()))); - TPCIU->createLazyCallThroughManager( + auto EPCIU = ExitOnErr(EPCIndirectionUtils::Create(*EPC)); + ExitOnErr(EPCIU->writeResolverBlock(pointerToJITTargetAddress(&reenter), + pointerToJITTargetAddress(EPCIU.get()))); + EPCIU->createLazyCallThroughManager( J->getExecutionSession(), pointerToJITTargetAddress(&reportErrorAndExit)); - auto ISM = TPCIU->createIndirectStubsManager(); + auto ISM = EPCIU->createIndirectStubsManager(); J->getMainJITDylib().addGenerator( - ExitOnErr(TPCDynamicLibrarySearchGenerator::GetForTargetProcess(*TPC))); + ExitOnErr(EPCDynamicLibrarySearchGenerator::GetForTargetProcess(*EPC))); // (4) Add modules. ExitOnErr(J->addIRModule(ExitOnErr(parseExampleModule(FooMod, "foo-mod")))); @@ -171,7 +171,7 @@ int main(int argc, char *argv[]) { {Mangle("bar_body"), JITSymbolFlags::Exported | JITSymbolFlags::Callable}}}); ExitOnErr(J->getMainJITDylib().define( - lazyReexports(TPCIU->getLazyCallThroughManager(), *ISM, + lazyReexports(EPCIU->getLazyCallThroughManager(), *ISM, J->getMainJITDylib(), std::move(ReExports)))); // (6) Dump the ExecutionSession state. diff --git a/llvm/include/llvm-c/LLJIT.h b/llvm/include/llvm-c/LLJIT.h index d8156ccc1f553..bd98cfbab839c 100644 --- a/llvm/include/llvm-c/LLJIT.h +++ b/llvm/include/llvm-c/LLJIT.h @@ -208,7 +208,7 @@ LLVMErrorRef LLVMOrcLLJITAddLLVMIRModuleWithRT(LLVMOrcLLJITRef J, * This operation does not take ownership of the Name argument. */ LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J, - LLVMOrcJITTargetAddress *Result, + LLVMOrcExecutorAddress *Result, const char *Name); /** diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 75af7320ce253..1d26d7ca303c3 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -34,10 +34,15 @@ LLVM_C_EXTERN_C_BEGIN /** - * Represents an address in the target process. + * Represents an address in the executor process. */ typedef uint64_t LLVMOrcJITTargetAddress; +/** + * Represents an address in the executor process. + */ +typedef uint64_t LLVMOrcExecutorAddress; + /** * Represents generic linkage flags for a symbol definition. */ @@ -65,7 +70,7 @@ typedef struct { * Represents an evaluated symbol address and flags. */ typedef struct { - LLVMOrcJITTargetAddress Address; + LLVMOrcExecutorAddress Address; LLVMJITSymbolFlags Flags; } LLVMJITEvaluatedSymbol; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h index 2e332130d262a..455f5a8287f82 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h @@ -16,8 +16,8 @@ #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/JITLink/JITLink.h" #include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" -#include "llvm/ExecutionEngine/Orc/TPCDebugObjectRegistrar.h" #include "llvm/Support/Error.h" #include "llvm/Support/Memory.h" #include "llvm/Support/MemoryBufferRef.h" diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h similarity index 54% rename from llvm/include/llvm/ExecutionEngine/Orc/TPCDebugObjectRegistrar.h rename to llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h index 7995b0cb35d1f..dc42c74a61204 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TPCDebugObjectRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h @@ -1,4 +1,4 @@ -//===- TPCDebugObjectRegistrar.h - TPC-based debug registration -*- C++ -*-===// +//===- EPCDebugObjectRegistrar.h - EPC-based debug registration -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// // -// TargetProcessControl based registration of debug objects. +// ExecutorProcessControl based registration of debug objects. // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_TPCDEBUGOBJECTREGISTRAR_H -#define LLVM_EXECUTIONENGINE_ORC_TPCDEBUGOBJECTREGISTRAR_H +#ifndef LLVM_EXECUTIONENGINE_ORC_EPCDEBUGOBJECTREGISTRAR_H +#define LLVM_EXECUTIONENGINE_ORC_EPCDEBUGOBJECTREGISTRAR_H #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/Support/Error.h" #include "llvm/Support/Memory.h" @@ -27,38 +27,38 @@ using namespace llvm::orc::shared; namespace llvm { namespace orc { -/// Abstract interface for registering debug objects in the target process. +/// Abstract interface for registering debug objects in the executor process. class DebugObjectRegistrar { public: virtual Error registerDebugObject(sys::MemoryBlock) = 0; virtual ~DebugObjectRegistrar() {} }; -/// Use TargetProcessControl to register debug objects locally or in a remote -/// target process. -class TPCDebugObjectRegistrar : public DebugObjectRegistrar { +/// Use ExecutorProcessControl to register debug objects locally or in a remote +/// executor process. +class EPCDebugObjectRegistrar : public DebugObjectRegistrar { public: - TPCDebugObjectRegistrar(TargetProcessControl &TPC, + EPCDebugObjectRegistrar(ExecutorProcessControl &EPC, JITTargetAddress RegisterFn) - : TPC(TPC), RegisterFn(RegisterFn) {} + : EPC(EPC), RegisterFn(RegisterFn) {} Error registerDebugObject(sys::MemoryBlock TargetMem) override { return WrapperFunction::call( - TPCCaller(TPC, RegisterFn), pointerToJITTargetAddress(TargetMem.base()), + EPCCaller(EPC, RegisterFn), pointerToJITTargetAddress(TargetMem.base()), static_cast(TargetMem.allocatedSize())); } private: - TargetProcessControl &TPC; + ExecutorProcessControl &EPC; JITTargetAddress RegisterFn; }; -/// Create a TargetProcessControl-based DebugObjectRegistrar that emits debug +/// Create a ExecutorProcessControl-based DebugObjectRegistrar that emits debug /// objects to the GDB JIT interface. -Expected> -createJITLoaderGDBRegistrar(TargetProcessControl &TPC); +Expected> +createJITLoaderGDBRegistrar(ExecutorProcessControl &EPC); } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_TDEBUGOBJECTREGISTRAR_H +#endif // LLVM_EXECUTIONENGINE_ORC_EPCDEBUGOBJECTREGISTRAR_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h similarity index 63% rename from llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h rename to llvm/include/llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h index ed4f6080bb4eb..08b5ab05ba761 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h @@ -1,4 +1,4 @@ -//===------------ TPCDynamicLibrarySearchGenerator.h ------------*- C++ -*-===// +//===------------ EPCDynamicLibrarySearchGenerator.h ------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,21 +6,21 @@ // //===----------------------------------------------------------------------===// // -// Support loading and searching of dynamic libraries in a target process via -// the TargetProcessControl class. +// Support loading and searching of dynamic libraries in an executor process +// via the ExecutorProcessControl class. // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H -#define LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H +#ifndef LLVM_EXECUTIONENGINE_ORC_EPCDYNAMICLIBRARYSEARCHGENERATOR_H +#define LLVM_EXECUTIONENGINE_ORC_EPCDYNAMICLIBRARYSEARCHGENERATOR_H #include "llvm/ADT/FunctionExtras.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" namespace llvm { namespace orc { -class TPCDynamicLibrarySearchGenerator : public DefinitionGenerator { +class EPCDynamicLibrarySearchGenerator : public DefinitionGenerator { public: using SymbolPredicate = unique_function; @@ -30,24 +30,24 @@ class TPCDynamicLibrarySearchGenerator : public DefinitionGenerator { /// If the Allow predicate is given then only symbols matching the predicate /// will be searched for. If the predicate is not given then all symbols will /// be searched for. - TPCDynamicLibrarySearchGenerator(TargetProcessControl &TPC, + EPCDynamicLibrarySearchGenerator(ExecutorProcessControl &EPC, tpctypes::DylibHandle H, SymbolPredicate Allow = SymbolPredicate()) - : TPC(TPC), H(H), Allow(std::move(Allow)) {} + : EPC(EPC), H(H), Allow(std::move(Allow)) {} /// Permanently loads the library at the given path and, on success, returns /// a DynamicLibrarySearchGenerator that will search it for symbol definitions /// in the library. On failure returns the reason the library failed to load. - static Expected> - Load(TargetProcessControl &TPC, const char *LibraryPath, + static Expected> + Load(ExecutorProcessControl &EPC, const char *LibraryPath, SymbolPredicate Allow = SymbolPredicate()); - /// Creates a TPCDynamicLibrarySearchGenerator that searches for symbols in + /// Creates a EPCDynamicLibrarySearchGenerator that searches for symbols in /// the target process. - static Expected> - GetForTargetProcess(TargetProcessControl &TPC, + static Expected> + GetForTargetProcess(ExecutorProcessControl &EPC, SymbolPredicate Allow = SymbolPredicate()) { - return Load(TPC, nullptr, std::move(Allow)); + return Load(EPC, nullptr, std::move(Allow)); } Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, @@ -55,7 +55,7 @@ class TPCDynamicLibrarySearchGenerator : public DefinitionGenerator { const SymbolLookupSet &Symbols) override; private: - TargetProcessControl &TPC; + ExecutorProcessControl &EPC; tpctypes::DylibHandle H; SymbolPredicate Allow; }; @@ -63,4 +63,4 @@ class TPCDynamicLibrarySearchGenerator : public DefinitionGenerator { } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H +#endif // LLVM_EXECUTIONENGINE_ORC_EPCDYNAMICLIBRARYSEARCHGENERATOR_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h similarity index 59% rename from llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h rename to llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h index 519f818907f9c..f230cfcac4167 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h @@ -1,4 +1,4 @@ -//===-- TPCEHFrameRegistrar.h - TPC based eh-frame registration -*- C++ -*-===// +//===-- EPCEHFrameRegistrar.h - EPC based eh-frame registration -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,35 +6,35 @@ // //===----------------------------------------------------------------------===// // -// TargetProcessControl based eh-frame registration. +// ExecutorProcessControl based eh-frame registration. // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H -#define LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H +#ifndef LLVM_EXECUTIONENGINE_ORC_EPCEHFRAMEREGISTRAR_H +#define LLVM_EXECUTIONENGINE_ORC_EPCEHFRAMEREGISTRAR_H #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" namespace llvm { namespace orc { /// Register/Deregisters EH frames in a remote process via a -/// TargetProcessControl instance. -class TPCEHFrameRegistrar : public jitlink::EHFrameRegistrar { +/// ExecutorProcessControl instance. +class EPCEHFrameRegistrar : public jitlink::EHFrameRegistrar { public: - /// Create from a TargetProcessControl instance alone. This will use - /// the TPC's lookupSymbols method to find the registration/deregistration + /// Create from a ExecutorProcessControl instance alone. This will use + /// the EPC's lookupSymbols method to find the registration/deregistration /// funciton addresses by name. - static Expected> - Create(TargetProcessControl &TPC); + static Expected> + Create(ExecutorProcessControl &EPC); - /// Create a TPCEHFrameRegistrar with the given TargetProcessControl + /// Create a EPCEHFrameRegistrar with the given ExecutorProcessControl /// object and registration/deregistration function addresses. - TPCEHFrameRegistrar(TargetProcessControl &TPC, + EPCEHFrameRegistrar(ExecutorProcessControl &EPC, JITTargetAddress RegisterEHFrameWrapperFnAddr, JITTargetAddress DeregisterEHFRameWrapperFnAddr) - : TPC(TPC), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr), + : EPC(EPC), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr), DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {} Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, @@ -43,7 +43,7 @@ class TPCEHFrameRegistrar : public jitlink::EHFrameRegistrar { size_t EHFrameSectionSize) override; private: - TargetProcessControl &TPC; + ExecutorProcessControl &EPC; JITTargetAddress RegisterEHFrameWrapperFnAddr; JITTargetAddress DeregisterEHFrameWrapperFnAddr; }; @@ -51,4 +51,4 @@ class TPCEHFrameRegistrar : public jitlink::EHFrameRegistrar { } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H +#endif // LLVM_EXECUTIONENGINE_ORC_EPCEHFRAMEREGISTRAR_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h similarity index 78% rename from llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h rename to llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h index e7abd7fb90dfe..64f16d507c97e 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h @@ -1,4 +1,4 @@ -//===--- TPCIndirectionUtils.h - TPC based indirection utils ----*- C++ -*-===// +//===--- EPCIndirectionUtils.h - EPC based indirection utils ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// // // Indirection utilities (stubs, trampolines, lazy call-throughs) that use the -// TargetProcessControl API to interact with the target process. +// ExecutorProcessControl API to interact with the executor process. // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H -#define LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H +#ifndef LLVM_EXECUTIONENGINE_ORC_EPCINDIRECTIONUTILS_H +#define LLVM_EXECUTIONENGINE_ORC_EPCINDIRECTIONUTILS_H #include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" @@ -23,12 +23,12 @@ namespace llvm { namespace orc { -class TargetProcessControl; +class ExecutorProcessControl; -/// Provides TargetProcessControl based indirect stubs, trampoline pool and +/// Provides ExecutorProcessControl based indirect stubs, trampoline pool and /// lazy call through manager. -class TPCIndirectionUtils { - friend class TPCIndirectionUtilsAccess; +class EPCIndirectionUtils { + friend class EPCIndirectionUtilsAccess; public: /// ABI support base class. Used to write resolver, stub, and trampoline @@ -79,15 +79,15 @@ class TPCIndirectionUtils { /// Create using the given ABI class. template - static std::unique_ptr - CreateWithABI(TargetProcessControl &TPC); + static std::unique_ptr + CreateWithABI(ExecutorProcessControl &EPC); - /// Create based on the TargetProcessControl triple. - static Expected> - Create(TargetProcessControl &TPC); + /// Create based on the ExecutorProcessControl triple. + static Expected> + Create(ExecutorProcessControl &EPC); - /// Return a reference to the TargetProcessControl object. - TargetProcessControl &getTargetProcessControl() const { return TPC; } + /// Return a reference to the ExecutorProcessControl object. + ExecutorProcessControl &getExecutorProcessControl() const { return EPC; } /// Return a reference to the ABISupport object for this instance. ABISupport &getABISupport() const { return *ABI; } @@ -96,7 +96,7 @@ class TPCIndirectionUtils { /// prior to destruction of the class. Error cleanup(); - /// Write resolver code to the target process and return its address. + /// Write resolver code to the executor process and return its address. /// This must be called before any call to createTrampolinePool or /// createLazyCallThroughManager. Expected @@ -107,10 +107,10 @@ class TPCIndirectionUtils { /// writeResolverBlock method has not previously been called. JITTargetAddress getResolverBlockAddress() const { return ResolverBlockAddr; } - /// Create an IndirectStubsManager for the target process. + /// Create an IndirectStubsManager for the executor process. std::unique_ptr createIndirectStubsManager(); - /// Create a TrampolinePool for the target process. + /// Create a TrampolinePool for the executor process. TrampolinePool &getTrampolinePool(); /// Create a LazyCallThroughManager. @@ -119,7 +119,7 @@ class TPCIndirectionUtils { createLazyCallThroughManager(ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr); - /// Create a LazyCallThroughManager for the target process. + /// Create a LazyCallThroughManager for the executor process. LazyCallThroughManager &getLazyCallThroughManager() { assert(LCTM && "createLazyCallThroughManager must be called first"); return *LCTM; @@ -139,14 +139,14 @@ class TPCIndirectionUtils { using IndirectStubInfoVector = std::vector; - /// Create a TPCIndirectionUtils instance. - TPCIndirectionUtils(TargetProcessControl &TPC, + /// Create an EPCIndirectionUtils instance. + EPCIndirectionUtils(ExecutorProcessControl &EPC, std::unique_ptr ABI); Expected getIndirectStubs(unsigned NumStubs); - std::mutex TPCUIMutex; - TargetProcessControl &TPC; + std::mutex EPCUIMutex; + ExecutorProcessControl &EPC; std::unique_ptr ABI; JITTargetAddress ResolverBlockAddr; std::unique_ptr ResolverBlock; @@ -157,23 +157,23 @@ class TPCIndirectionUtils { std::vector> IndirectStubAllocs; }; -/// This will call writeResolver on the given TPCIndirectionUtils instance +/// This will call writeResolver on the given EPCIndirectionUtils instance /// to set up re-entry via a function that will directly return the trampoline /// landing address. /// -/// The TPCIndirectionUtils' LazyCallThroughManager must have been previously -/// created via TPCIndirectionUtils::createLazyCallThroughManager. +/// The EPCIndirectionUtils' LazyCallThroughManager must have been previously +/// created via EPCIndirectionUtils::createLazyCallThroughManager. /// -/// The TPCIndirectionUtils' writeResolver method must not have been previously +/// The EPCIndirectionUtils' writeResolver method must not have been previously /// called. /// /// This function is experimental and likely subject to revision. -Error setUpInProcessLCTMReentryViaTPCIU(TPCIndirectionUtils &TPCIU); +Error setUpInProcessLCTMReentryViaEPCIU(EPCIndirectionUtils &EPCIU); namespace detail { template -class ABISupportImpl : public TPCIndirectionUtils::ABISupport { +class ABISupportImpl : public EPCIndirectionUtils::ABISupport { public: ABISupportImpl() : ABISupport(ORCABI::PointerSize, ORCABI::TrampolineSize, @@ -210,13 +210,13 @@ class ABISupportImpl : public TPCIndirectionUtils::ABISupport { } // end namespace detail template -std::unique_ptr -TPCIndirectionUtils::CreateWithABI(TargetProcessControl &TPC) { - return std::unique_ptr(new TPCIndirectionUtils( - TPC, std::make_unique>())); +std::unique_ptr +EPCIndirectionUtils::CreateWithABI(ExecutorProcessControl &EPC) { + return std::unique_ptr(new EPCIndirectionUtils( + EPC, std::make_unique>())); } } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H +#endif // LLVM_EXECUTIONENGINE_ORC_EPCINDIRECTIONUTILS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h similarity index 86% rename from llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h rename to llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h index a39495f538208..7969a8398c952 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h @@ -1,4 +1,4 @@ -//===--- TargetProcessControl.h - Target process control APIs ---*- C++ -*-===// +//===- ExecutorProcessControl.h - Executor process control APIs -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// // -// Utilities for interacting with target processes. +// Utilities for interacting with the executor processes. // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H -#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H +#ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H +#define LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" @@ -29,8 +29,8 @@ namespace llvm { namespace orc { -/// TargetProcessControl supports interaction with a JIT target process. -class TargetProcessControl { +/// ExecutorProcessControl supports interaction with a JIT target process. +class ExecutorProcessControl { public: /// APIs for manipulating memory in the target process. class MemoryAccess { @@ -99,7 +99,7 @@ class TargetProcessControl { const SymbolLookupSet &Symbols; }; - virtual ~TargetProcessControl(); + virtual ~ExecutorProcessControl(); /// Intern a symbol name in the SymbolStringPool. SymbolStringPtr intern(StringRef SymName) { return SSP->intern(SymName); } @@ -153,7 +153,7 @@ class TargetProcessControl { virtual Error disconnect() = 0; protected: - TargetProcessControl(std::shared_ptr SSP) + ExecutorProcessControl(std::shared_ptr SSP) : SSP(std::move(SSP)) {} std::shared_ptr SSP; @@ -163,33 +163,34 @@ class TargetProcessControl { jitlink::JITLinkMemoryManager *MemMgr = nullptr; }; -/// Call a wrapper function via TargetProcessControl::runWrapper. -class TPCCaller { +/// Call a wrapper function via ExecutorProcessControl::runWrapper. +class EPCCaller { public: - TPCCaller(TargetProcessControl &TPC, JITTargetAddress WrapperFnAddr) - : TPC(TPC), WrapperFnAddr(WrapperFnAddr) {} + EPCCaller(ExecutorProcessControl &EPC, JITTargetAddress WrapperFnAddr) + : EPC(EPC), WrapperFnAddr(WrapperFnAddr) {} Expected operator()(const char *ArgData, size_t ArgSize) const { - return TPC.runWrapper(WrapperFnAddr, ArrayRef(ArgData, ArgSize)); + return EPC.runWrapper(WrapperFnAddr, ArrayRef(ArgData, ArgSize)); } private: - TargetProcessControl &TPC; + ExecutorProcessControl &EPC; JITTargetAddress WrapperFnAddr; }; -/// A TargetProcessControl implementation targeting the current process. -class SelfTargetProcessControl : public TargetProcessControl, - private TargetProcessControl::MemoryAccess { +/// A ExecutorProcessControl implementation targeting the current process. +class SelfExecutorProcessControl + : public ExecutorProcessControl, + private ExecutorProcessControl::MemoryAccess { public: - SelfTargetProcessControl( + SelfExecutorProcessControl( std::shared_ptr SSP, Triple TargetTriple, unsigned PageSize, std::unique_ptr MemMgr); - /// Create a SelfTargetProcessControl with the given memory manager. + /// Create a SelfExecutorProcessControl with the given memory manager. /// If no memory manager is given a jitlink::InProcessMemoryManager will /// be used by default. - static Expected> + static Expected> Create(std::shared_ptr SSP, std::unique_ptr MemMgr = nullptr); @@ -230,4 +231,4 @@ class SelfTargetProcessControl : public TargetProcessControl, } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H +#endif // LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h index 6eb3d0c3f96d8..50a2d95624c3c 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -29,7 +29,7 @@ namespace orc { class LLJITBuilderState; class LLLazyJITBuilderState; class ObjectTransformLayer; -class TargetProcessControl; +class ExecutorProcessControl; /// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT. /// @@ -267,7 +267,7 @@ class LLJITBuilderState { CompileFunctionCreator CreateCompileFunction; PlatformSetupFunction SetUpPlatform; unsigned NumCompileThreads = 0; - TargetProcessControl *TPC = nullptr; + ExecutorProcessControl *EPC = nullptr; /// Called prior to JIT class construcion to fix up defaults. Error prepareForConstruction(); @@ -350,14 +350,14 @@ class LLJITBuilderSetters { return impl(); } - /// Set a TargetProcessControl object. + /// Set an ExecutorProcessControl object. /// /// If the platform uses ObjectLinkingLayer by default and no - /// ObjectLinkingLayerCreator has been set then the TargetProcessControl + /// ObjectLinkingLayerCreator has been set then the ExecutorProcessControl /// object will be used to supply the memory manager for the /// ObjectLinkingLayer. - SetterImpl &setTargetProcessControl(TargetProcessControl &TPC) { - impl().TPC = &TPC; + SetterImpl &setExecutorProcessControl(ExecutorProcessControl &EPC) { + impl().EPC = &EPC; return impl(); } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h similarity index 90% rename from llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h rename to llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h index d42c5c4d6dd8b..0b5ee262bb706 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h @@ -1,4 +1,4 @@ -//===--- OrcRPCTargetProcessControl.h - Remote target control ---*- C++ -*-===// +//===-- OrcRPCExecutorProcessControl.h - Remote target control --*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,17 +6,17 @@ // //===----------------------------------------------------------------------===// // -// Utilities for interacting with target processes. +// Executor control via ORC RPC. // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H -#define LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H +#ifndef LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H +#define LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" #include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" #include "llvm/Support/MSVCErrorWorkarounds.h" namespace llvm { @@ -24,8 +24,8 @@ namespace orc { /// JITLinkMemoryManager implementation for a process connected via an ORC RPC /// endpoint. -template -class OrcRPCTPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { +template +class OrcRPCEPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { private: struct HostAlloc { std::unique_ptr Mem; @@ -43,7 +43,7 @@ class OrcRPCTPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { public: class OrcRPCAllocation : public Allocation { public: - OrcRPCAllocation(OrcRPCTPCJITLinkMemoryManager &Parent, + OrcRPCAllocation(OrcRPCEPCJITLinkMemoryManager &Parent, HostAllocMap HostAllocs, TargetAllocMap TargetAllocs) : Parent(Parent), HostAllocs(std::move(HostAllocs)), TargetAllocs(std::move(TargetAllocs)) { @@ -140,12 +140,12 @@ class OrcRPCTPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { } private: - OrcRPCTPCJITLinkMemoryManager &Parent; + OrcRPCEPCJITLinkMemoryManager &Parent; HostAllocMap HostAllocs; TargetAllocMap TargetAllocs; }; - OrcRPCTPCJITLinkMemoryManager(OrcRPCTPCImplT &Parent) : Parent(Parent) {} + OrcRPCEPCJITLinkMemoryManager(OrcRPCEPCImplT &Parent) : Parent(Parent) {} Expected> allocate(const jitlink::JITLinkDylib *JD, @@ -216,19 +216,19 @@ class OrcRPCTPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { private: void reportError(Error Err) { Parent.reportError(std::move(Err)); } - decltype(std::declval().getEndpoint()) getEndpoint() { + decltype(std::declval().getEndpoint()) getEndpoint() { return Parent.getEndpoint(); } - OrcRPCTPCImplT &Parent; + OrcRPCEPCImplT &Parent; }; -/// TargetProcessControl::MemoryAccess implementation for a process connected +/// ExecutorProcessControl::MemoryAccess implementation for a process connected /// via an ORC RPC endpoint. -template -class OrcRPCTPCMemoryAccess : public TargetProcessControl::MemoryAccess { +template +class OrcRPCEPCMemoryAccess : public ExecutorProcessControl::MemoryAccess { public: - OrcRPCTPCMemoryAccess(OrcRPCTPCImplT &Parent) : Parent(Parent) {} + OrcRPCEPCMemoryAccess(OrcRPCEPCImplT &Parent) : Parent(Parent) {} void writeUInt8s(ArrayRef Ws, WriteResultFn OnWriteComplete) override { @@ -269,20 +269,20 @@ class OrcRPCTPCMemoryAccess : public TargetProcessControl::MemoryAccess { } } - OrcRPCTPCImplT &Parent; + OrcRPCEPCImplT &Parent; }; -// TargetProcessControl for a process connected via an ORC RPC Endpoint. +// ExecutorProcessControl for a process connected via an ORC RPC Endpoint. template -class OrcRPCTargetProcessControlBase : public TargetProcessControl { +class OrcRPCExecutorProcessControlBase : public ExecutorProcessControl { public: using ErrorReporter = unique_function; using OnCloseConnectionFunction = unique_function; - OrcRPCTargetProcessControlBase(std::shared_ptr SSP, - RPCEndpointT &EP, ErrorReporter ReportError) - : TargetProcessControl(std::move(SSP)), + OrcRPCExecutorProcessControlBase(std::shared_ptr SSP, + RPCEndpointT &EP, ErrorReporter ReportError) + : ExecutorProcessControl(std::move(SSP)), ReportError(std::move(ReportError)), EP(EP) {} void reportError(Error Err) { ReportError(std::move(Err)); } @@ -391,7 +391,7 @@ class OrcRPCTargetProcessControlBase : public TargetProcessControl { protected: /// Subclasses must call this during construction to initialize the /// TargetTriple and PageSize members. - Error initializeORCRPCTPCBase() { + Error initializeORCRPCEPCBase() { if (auto TripleOrErr = EP.template callB()) TargetTriple = Triple(*TripleOrErr); else @@ -413,4 +413,4 @@ class OrcRPCTargetProcessControlBase : public TargetProcessControl { } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H +#endif // LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt index edcdfb2b1c418..edf9b3ff9b394 100644 --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -4,6 +4,10 @@ add_llvm_component_library(LLVMOrcJIT Core.cpp DebugObjectManagerPlugin.cpp DebugUtils.cpp + EPCDynamicLibrarySearchGenerator.cpp + EPCDebugObjectRegistrar.cpp + EPCEHFrameRegistrar.cpp + EPCIndirectionUtils.cpp ExecutionUtils.cpp IndirectionUtils.cpp IRCompileLayer.cpp @@ -21,12 +25,8 @@ add_llvm_component_library(LLVMOrcJIT RTDyldObjectLinkingLayer.cpp Speculation.cpp SpeculateAnalyses.cpp - TargetProcessControl.cpp + ExecutorProcessControl.cpp ThreadSafeModule.cpp - TPCDebugObjectRegistrar.cpp - TPCDynamicLibrarySearchGenerator.cpp - TPCEHFrameRegistrar.cpp - TPCIndirectionUtils.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc diff --git a/llvm/lib/ExecutionEngine/Orc/TPCDebugObjectRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp similarity index 61% rename from llvm/lib/ExecutionEngine/Orc/TPCDebugObjectRegistrar.cpp rename to llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp index 29dc26292d411..803cf96f5bddf 100644 --- a/llvm/lib/ExecutionEngine/Orc/TPCDebugObjectRegistrar.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp @@ -1,4 +1,4 @@ -//===----- TPCDebugObjectRegistrar.cpp - TPC-based debug registration -----===// +//===----- EPCDebugObjectRegistrar.cpp - EPC-based debug registration -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ExecutionEngine/Orc/TPCDebugObjectRegistrar.h" +#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" @@ -15,21 +15,21 @@ namespace llvm { namespace orc { -Expected> -createJITLoaderGDBRegistrar(TargetProcessControl &TPC) { - auto ProcessHandle = TPC.loadDylib(nullptr); +Expected> +createJITLoaderGDBRegistrar(ExecutorProcessControl &EPC) { + auto ProcessHandle = EPC.loadDylib(nullptr); if (!ProcessHandle) return ProcessHandle.takeError(); SymbolStringPtr RegisterFn = - TPC.getTargetTriple().isOSBinFormatMachO() - ? TPC.intern("_llvm_orc_registerJITLoaderGDBWrapper") - : TPC.intern("llvm_orc_registerJITLoaderGDBWrapper"); + EPC.getTargetTriple().isOSBinFormatMachO() + ? EPC.intern("_llvm_orc_registerJITLoaderGDBWrapper") + : EPC.intern("llvm_orc_registerJITLoaderGDBWrapper"); SymbolLookupSet RegistrationSymbols; RegistrationSymbols.add(RegisterFn); - auto Result = TPC.lookupSymbols({{*ProcessHandle, RegistrationSymbols}}); + auto Result = EPC.lookupSymbols({{*ProcessHandle, RegistrationSymbols}}); if (!Result) return Result.takeError(); @@ -37,7 +37,7 @@ createJITLoaderGDBRegistrar(TargetProcessControl &TPC) { assert((*Result)[0].size() == 1 && "Unexpected number of addresses in result"); - return std::make_unique(TPC, (*Result)[0][0]); + return std::make_unique(EPC, (*Result)[0][0]); } } // namespace orc diff --git a/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp similarity index 76% rename from llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp rename to llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp index bbf3ada1d4bad..9958a52f0e44c 100644 --- a/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp @@ -1,4 +1,4 @@ -//===---------------- TPCDynamicLibrarySearchGenerator.cpp ----------------===// +//===---------------- EPCDynamicLibrarySearchGenerator.cpp ----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,24 +6,24 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h" +#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" namespace llvm { namespace orc { -Expected> -TPCDynamicLibrarySearchGenerator::Load(TargetProcessControl &TPC, +Expected> +EPCDynamicLibrarySearchGenerator::Load(ExecutorProcessControl &EPC, const char *LibraryPath, SymbolPredicate Allow) { - auto Handle = TPC.loadDylib(LibraryPath); + auto Handle = EPC.loadDylib(LibraryPath); if (!Handle) return Handle.takeError(); - return std::make_unique(TPC, *Handle, + return std::make_unique(EPC, *Handle, std::move(Allow)); } -Error TPCDynamicLibrarySearchGenerator::tryToGenerate( +Error EPCDynamicLibrarySearchGenerator::tryToGenerate( LookupState &LS, LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) { @@ -41,8 +41,8 @@ Error TPCDynamicLibrarySearchGenerator::tryToGenerate( SymbolMap NewSymbols; - TargetProcessControl::LookupRequest Request(H, LookupSymbols); - auto Result = TPC.lookupSymbols(Request); + ExecutorProcessControl::LookupRequest Request(H, LookupSymbols); + auto Result = EPC.lookupSymbols(Request); if (!Result) return Result.takeError(); diff --git a/llvm/lib/ExecutionEngine/Orc/TPCEHFrameRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp similarity index 66% rename from llvm/lib/ExecutionEngine/Orc/TPCEHFrameRegistrar.cpp rename to llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp index 332fba7b6a6d9..c51a68c1bfed6 100644 --- a/llvm/lib/ExecutionEngine/Orc/TPCEHFrameRegistrar.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp @@ -1,4 +1,4 @@ -//===------ TPCEHFrameRegistrar.cpp - TPC-based eh-frame registration -----===// +//===------ EPCEHFrameRegistrar.cpp - EPC-based eh-frame registration -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h" +#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h" #include "llvm/Support/BinaryStreamWriter.h" using namespace llvm::orc::shared; @@ -14,19 +14,19 @@ using namespace llvm::orc::shared; namespace llvm { namespace orc { -Expected> -TPCEHFrameRegistrar::Create(TargetProcessControl &TPC) { +Expected> +EPCEHFrameRegistrar::Create(ExecutorProcessControl &EPC) { // FIXME: Proper mangling here -- we really need to decouple linker mangling // from DataLayout. // Find the addresses of the registration/deregistration functions in the - // target process. - auto ProcessHandle = TPC.loadDylib(nullptr); + // executor process. + auto ProcessHandle = EPC.loadDylib(nullptr); if (!ProcessHandle) return ProcessHandle.takeError(); std::string RegisterWrapperName, DeregisterWrapperName; - if (TPC.getTargetTriple().isOSBinFormatMachO()) { + if (EPC.getTargetTriple().isOSBinFormatMachO()) { RegisterWrapperName += '_'; DeregisterWrapperName += '_'; } @@ -34,10 +34,10 @@ TPCEHFrameRegistrar::Create(TargetProcessControl &TPC) { DeregisterWrapperName += "llvm_orc_deregisterEHFrameSectionWrapper"; SymbolLookupSet RegistrationSymbols; - RegistrationSymbols.add(TPC.intern(RegisterWrapperName)); - RegistrationSymbols.add(TPC.intern(DeregisterWrapperName)); + RegistrationSymbols.add(EPC.intern(RegisterWrapperName)); + RegistrationSymbols.add(EPC.intern(DeregisterWrapperName)); - auto Result = TPC.lookupSymbols({{*ProcessHandle, RegistrationSymbols}}); + auto Result = EPC.lookupSymbols({{*ProcessHandle, RegistrationSymbols}}); if (!Result) return Result.takeError(); @@ -48,22 +48,22 @@ TPCEHFrameRegistrar::Create(TargetProcessControl &TPC) { auto RegisterEHFrameWrapperFnAddr = (*Result)[0][0]; auto DeregisterEHFrameWrapperFnAddr = (*Result)[0][1]; - return std::make_unique( - TPC, RegisterEHFrameWrapperFnAddr, DeregisterEHFrameWrapperFnAddr); + return std::make_unique( + EPC, RegisterEHFrameWrapperFnAddr, DeregisterEHFrameWrapperFnAddr); } -Error TPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr, +Error EPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { return WrapperFunction::call( - TPCCaller(TPC, RegisterEHFrameWrapperFnAddr), EHFrameSectionAddr, + EPCCaller(EPC, RegisterEHFrameWrapperFnAddr), EHFrameSectionAddr, static_cast(EHFrameSectionSize)); } -Error TPCEHFrameRegistrar::deregisterEHFrames( +Error EPCEHFrameRegistrar::deregisterEHFrames( JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { return WrapperFunction::call( - TPCCaller(TPC, DeregisterEHFrameWrapperFnAddr), EHFrameSectionAddr, + EPCCaller(EPC, DeregisterEHFrameWrapperFnAddr), EHFrameSectionAddr, static_cast(EHFrameSectionSize)); } diff --git a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp similarity index 73% rename from llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp rename to llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index 7989ec41952d7..b9c70b0aeb3cb 100644 --- a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -1,4 +1,4 @@ -//===------ TargetProcessControl.cpp -- Target process control APIs -------===// +//===------- EPCIndirectionUtils.cpp -- EPC based indirection APIs --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/Support/MathExtras.h" #include @@ -19,14 +19,14 @@ using namespace llvm::orc; namespace llvm { namespace orc { -class TPCIndirectionUtilsAccess { +class EPCIndirectionUtilsAccess { public: - using IndirectStubInfo = TPCIndirectionUtils::IndirectStubInfo; - using IndirectStubInfoVector = TPCIndirectionUtils::IndirectStubInfoVector; + using IndirectStubInfo = EPCIndirectionUtils::IndirectStubInfo; + using IndirectStubInfoVector = EPCIndirectionUtils::IndirectStubInfoVector; static Expected - getIndirectStubs(TPCIndirectionUtils &TPCIU, unsigned NumStubs) { - return TPCIU.getIndirectStubs(NumStubs); + getIndirectStubs(EPCIndirectionUtils &EPCIU, unsigned NumStubs) { + return EPCIU.getIndirectStubs(NumStubs); }; }; @@ -35,9 +35,9 @@ class TPCIndirectionUtilsAccess { namespace { -class TPCTrampolinePool : public TrampolinePool { +class EPCTrampolinePool : public TrampolinePool { public: - TPCTrampolinePool(TPCIndirectionUtils &TPCIU); + EPCTrampolinePool(EPCIndirectionUtils &EPCIU); Error deallocatePool(); protected: @@ -45,16 +45,16 @@ class TPCTrampolinePool : public TrampolinePool { using Allocation = jitlink::JITLinkMemoryManager::Allocation; - TPCIndirectionUtils &TPCIU; + EPCIndirectionUtils &EPCIU; unsigned TrampolineSize = 0; unsigned TrampolinesPerPage = 0; std::vector> TrampolineBlocks; }; -class TPCIndirectStubsManager : public IndirectStubsManager, - private TPCIndirectionUtilsAccess { +class EPCIndirectStubsManager : public IndirectStubsManager, + private EPCIndirectionUtilsAccess { public: - TPCIndirectStubsManager(TPCIndirectionUtils &TPCIU) : TPCIU(TPCIU) {} + EPCIndirectStubsManager(EPCIndirectionUtils &EPCIU) : EPCIU(EPCIU) {} Error deallocateStubs(); @@ -73,43 +73,43 @@ class TPCIndirectStubsManager : public IndirectStubsManager, using StubInfo = std::pair; std::mutex ISMMutex; - TPCIndirectionUtils &TPCIU; + EPCIndirectionUtils &EPCIU; StringMap StubInfos; }; -TPCTrampolinePool::TPCTrampolinePool(TPCIndirectionUtils &TPCIU) - : TPCIU(TPCIU) { - auto &TPC = TPCIU.getTargetProcessControl(); - auto &ABI = TPCIU.getABISupport(); +EPCTrampolinePool::EPCTrampolinePool(EPCIndirectionUtils &EPCIU) + : EPCIU(EPCIU) { + auto &EPC = EPCIU.getExecutorProcessControl(); + auto &ABI = EPCIU.getABISupport(); TrampolineSize = ABI.getTrampolineSize(); TrampolinesPerPage = - (TPC.getPageSize() - ABI.getPointerSize()) / TrampolineSize; + (EPC.getPageSize() - ABI.getPointerSize()) / TrampolineSize; } -Error TPCTrampolinePool::deallocatePool() { +Error EPCTrampolinePool::deallocatePool() { Error Err = Error::success(); for (auto &Alloc : TrampolineBlocks) Err = joinErrors(std::move(Err), Alloc->deallocate()); return Err; } -Error TPCTrampolinePool::grow() { +Error EPCTrampolinePool::grow() { assert(AvailableTrampolines.empty() && "Grow called with trampolines still available"); - auto ResolverAddress = TPCIU.getResolverBlockAddress(); + auto ResolverAddress = EPCIU.getResolverBlockAddress(); assert(ResolverAddress && "Resolver address can not be null"); - auto &TPC = TPCIU.getTargetProcessControl(); + auto &EPC = EPCIU.getExecutorProcessControl(); constexpr auto TrampolinePagePermissions = static_cast(sys::Memory::MF_READ | sys::Memory::MF_EXEC); - auto PageSize = TPC.getPageSize(); + auto PageSize = EPC.getPageSize(); jitlink::JITLinkMemoryManager::SegmentsRequestMap Request; Request[TrampolinePagePermissions] = {PageSize, static_cast(PageSize), 0}; - auto Alloc = TPC.getMemMgr().allocate(nullptr, Request); + auto Alloc = EPC.getMemMgr().allocate(nullptr, Request); if (!Alloc) return Alloc.takeError(); @@ -119,7 +119,7 @@ Error TPCTrampolinePool::grow() { auto WorkingMemory = (*Alloc)->getWorkingMemory(TrampolinePagePermissions); auto TargetAddress = (*Alloc)->getTargetMemory(TrampolinePagePermissions); - TPCIU.getABISupport().writeTrampolines(WorkingMemory.data(), TargetAddress, + EPCIU.getABISupport().writeTrampolines(WorkingMemory.data(), TargetAddress, ResolverAddress, NumTrampolines); auto TargetAddr = (*Alloc)->getTargetMemory(TrampolinePagePermissions); @@ -134,7 +134,7 @@ Error TPCTrampolinePool::grow() { return Error::success(); } -Error TPCIndirectStubsManager::createStub(StringRef StubName, +Error EPCIndirectStubsManager::createStub(StringRef StubName, JITTargetAddress StubAddr, JITSymbolFlags StubFlags) { StubInitsMap SIM; @@ -142,8 +142,8 @@ Error TPCIndirectStubsManager::createStub(StringRef StubName, return createStubs(SIM); } -Error TPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) { - auto AvailableStubInfos = getIndirectStubs(TPCIU, StubInits.size()); +Error EPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) { + auto AvailableStubInfos = getIndirectStubs(EPCIU, StubInits.size()); if (!AvailableStubInfos) return AvailableStubInfos.takeError(); @@ -156,8 +156,8 @@ Error TPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) { } } - auto &MemAccess = TPCIU.getTargetProcessControl().getMemoryAccess(); - switch (TPCIU.getABISupport().getPointerSize()) { + auto &MemAccess = EPCIU.getExecutorProcessControl().getMemoryAccess(); + switch (EPCIU.getABISupport().getPointerSize()) { case 4: { unsigned ASIdx = 0; std::vector PtrUpdates; @@ -180,7 +180,7 @@ Error TPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) { } } -JITEvaluatedSymbol TPCIndirectStubsManager::findStub(StringRef Name, +JITEvaluatedSymbol EPCIndirectStubsManager::findStub(StringRef Name, bool ExportedStubsOnly) { std::lock_guard Lock(ISMMutex); auto I = StubInfos.find(Name); @@ -189,7 +189,7 @@ JITEvaluatedSymbol TPCIndirectStubsManager::findStub(StringRef Name, return {I->second.first.StubAddress, I->second.second}; } -JITEvaluatedSymbol TPCIndirectStubsManager::findPointer(StringRef Name) { +JITEvaluatedSymbol EPCIndirectStubsManager::findPointer(StringRef Name) { std::lock_guard Lock(ISMMutex); auto I = StubInfos.find(Name); if (I == StubInfos.end()) @@ -197,7 +197,7 @@ JITEvaluatedSymbol TPCIndirectStubsManager::findPointer(StringRef Name) { return {I->second.first.PointerAddress, I->second.second}; } -Error TPCIndirectStubsManager::updatePointer(StringRef Name, +Error EPCIndirectStubsManager::updatePointer(StringRef Name, JITTargetAddress NewAddr) { JITTargetAddress PtrAddr = 0; @@ -210,8 +210,8 @@ Error TPCIndirectStubsManager::updatePointer(StringRef Name, PtrAddr = I->second.first.PointerAddress; } - auto &MemAccess = TPCIU.getTargetProcessControl().getMemoryAccess(); - switch (TPCIU.getABISupport().getPointerSize()) { + auto &MemAccess = EPCIU.getExecutorProcessControl().getMemoryAccess(); + switch (EPCIU.getABISupport().getPointerSize()) { case 4: { tpctypes::UInt32Write PUpdate(PtrAddr, NewAddr); return MemAccess.writeUInt32s(PUpdate); @@ -231,42 +231,42 @@ Error TPCIndirectStubsManager::updatePointer(StringRef Name, namespace llvm { namespace orc { -TPCIndirectionUtils::ABISupport::~ABISupport() {} +EPCIndirectionUtils::ABISupport::~ABISupport() {} -Expected> -TPCIndirectionUtils::Create(TargetProcessControl &TPC) { - const auto &TT = TPC.getTargetTriple(); +Expected> +EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) { + const auto &TT = EPC.getTargetTriple(); switch (TT.getArch()) { default: return make_error( - std::string("No TPCIndirectionUtils available for ") + TT.str(), + std::string("No EPCIndirectionUtils available for ") + TT.str(), inconvertibleErrorCode()); case Triple::aarch64: case Triple::aarch64_32: - return CreateWithABI(TPC); + return CreateWithABI(EPC); case Triple::x86: - return CreateWithABI(TPC); + return CreateWithABI(EPC); case Triple::mips: - return CreateWithABI(TPC); + return CreateWithABI(EPC); case Triple::mipsel: - return CreateWithABI(TPC); + return CreateWithABI(EPC); case Triple::mips64: case Triple::mips64el: - return CreateWithABI(TPC); + return CreateWithABI(EPC); case Triple::x86_64: if (TT.getOS() == Triple::OSType::Win32) - return CreateWithABI(TPC); + return CreateWithABI(EPC); else - return CreateWithABI(TPC); + return CreateWithABI(EPC); } } -Error TPCIndirectionUtils::cleanup() { +Error EPCIndirectionUtils::cleanup() { Error Err = Error::success(); for (auto &A : IndirectStubAllocs) @@ -274,7 +274,7 @@ Error TPCIndirectionUtils::cleanup() { if (TP) Err = joinErrors(std::move(Err), - static_cast(*TP).deallocatePool()); + static_cast(*TP).deallocatePool()); if (ResolverBlock) Err = joinErrors(std::move(Err), ResolverBlock->deallocate()); @@ -283,7 +283,7 @@ Error TPCIndirectionUtils::cleanup() { } Expected -TPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, +EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, JITTargetAddress ReentryCtxAddr) { assert(ABI && "ABI can not be null"); constexpr auto ResolverBlockPermissions = @@ -292,9 +292,9 @@ TPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, auto ResolverSize = ABI->getResolverCodeSize(); jitlink::JITLinkMemoryManager::SegmentsRequestMap Request; - Request[ResolverBlockPermissions] = {TPC.getPageSize(), + Request[ResolverBlockPermissions] = {EPC.getPageSize(), static_cast(ResolverSize), 0}; - auto Alloc = TPC.getMemMgr().allocate(nullptr, Request); + auto Alloc = EPC.getMemMgr().allocate(nullptr, Request); if (!Alloc) return Alloc.takeError(); @@ -311,17 +311,17 @@ TPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr, } std::unique_ptr -TPCIndirectionUtils::createIndirectStubsManager() { - return std::make_unique(*this); +EPCIndirectionUtils::createIndirectStubsManager() { + return std::make_unique(*this); } -TrampolinePool &TPCIndirectionUtils::getTrampolinePool() { +TrampolinePool &EPCIndirectionUtils::getTrampolinePool() { if (!TP) - TP = std::make_unique(*this); + TP = std::make_unique(*this); return *TP; } -LazyCallThroughManager &TPCIndirectionUtils::createLazyCallThroughManager( +LazyCallThroughManager &EPCIndirectionUtils::createLazyCallThroughManager( ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) { assert(!LCTM && "createLazyCallThroughManager can not have been called before"); @@ -330,24 +330,24 @@ LazyCallThroughManager &TPCIndirectionUtils::createLazyCallThroughManager( return *LCTM; } -TPCIndirectionUtils::TPCIndirectionUtils(TargetProcessControl &TPC, +EPCIndirectionUtils::EPCIndirectionUtils(ExecutorProcessControl &EPC, std::unique_ptr ABI) - : TPC(TPC), ABI(std::move(ABI)) { + : EPC(EPC), ABI(std::move(ABI)) { assert(this->ABI && "ABI can not be null"); - assert(TPC.getPageSize() > getABISupport().getStubSize() && + assert(EPC.getPageSize() > getABISupport().getStubSize() && "Stubs larger than one page are not supported"); } -Expected -TPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { +Expected +EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { - std::lock_guard Lock(TPCUIMutex); + std::lock_guard Lock(EPCUIMutex); // If there aren't enough stubs available then allocate some more. if (NumStubs > AvailableIndirectStubs.size()) { auto NumStubsToAllocate = NumStubs; - auto PageSize = TPC.getPageSize(); + auto PageSize = EPC.getPageSize(); auto StubBytes = alignTo(NumStubsToAllocate * ABI->getStubSize(), PageSize); NumStubsToAllocate = StubBytes / ABI->getStubSize(); auto PointerBytes = @@ -364,7 +364,7 @@ TPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) { Request[StubPagePermissions] = {PageSize, static_cast(StubBytes), 0}; Request[PointerPagePermissions] = {PageSize, 0, PointerBytes}; - auto Alloc = TPC.getMemMgr().allocate(nullptr, Request); + auto Alloc = EPC.getMemMgr().allocate(nullptr, Request); if (!Alloc) return Alloc.takeError(); @@ -411,9 +411,9 @@ static JITTargetAddress reentry(JITTargetAddress LCTMAddr, return LandingAddrF.get(); } -Error setUpInProcessLCTMReentryViaTPCIU(TPCIndirectionUtils &TPCIU) { - auto &LCTM = TPCIU.getLazyCallThroughManager(); - return TPCIU +Error setUpInProcessLCTMReentryViaEPCIU(EPCIndirectionUtils &EPCIU) { + auto &LCTM = EPCIU.getLazyCallThroughManager(); + return EPCIU .writeResolverBlock(pointerToJITTargetAddress(&reentry), pointerToJITTargetAddress(&LCTM)) .takeError(); diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp similarity index 69% rename from llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp rename to llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp index bc5cff43bd934..f8bd74eabc9b4 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp @@ -1,4 +1,4 @@ -//===------ TargetProcessControl.cpp -- Target process control APIs -------===// +//===---- ExecutorProcessControl.cpp -- Executor process control APIs -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h" @@ -18,14 +18,14 @@ namespace llvm { namespace orc { -TargetProcessControl::MemoryAccess::~MemoryAccess() {} +ExecutorProcessControl::MemoryAccess::~MemoryAccess() {} -TargetProcessControl::~TargetProcessControl() {} +ExecutorProcessControl::~ExecutorProcessControl() {} -SelfTargetProcessControl::SelfTargetProcessControl( +SelfExecutorProcessControl::SelfExecutorProcessControl( std::shared_ptr SSP, Triple TargetTriple, unsigned PageSize, std::unique_ptr MemMgr) - : TargetProcessControl(std::move(SSP)) { + : ExecutorProcessControl(std::move(SSP)) { OwnedMemMgr = std::move(MemMgr); if (!OwnedMemMgr) @@ -39,8 +39,8 @@ SelfTargetProcessControl::SelfTargetProcessControl( GlobalManglingPrefix = '_'; } -Expected> -SelfTargetProcessControl::Create( +Expected> +SelfExecutorProcessControl::Create( std::shared_ptr SSP, std::unique_ptr MemMgr) { auto PageSize = sys::Process::getPageSize(); @@ -49,12 +49,12 @@ SelfTargetProcessControl::Create( Triple TT(sys::getProcessTriple()); - return std::make_unique( + return std::make_unique( std::move(SSP), std::move(TT), *PageSize, std::move(MemMgr)); } Expected -SelfTargetProcessControl::loadDylib(const char *DylibPath) { +SelfExecutorProcessControl::loadDylib(const char *DylibPath) { std::string ErrMsg; auto Dylib = std::make_unique( sys::DynamicLibrary::getPermanentLibrary(DylibPath, &ErrMsg)); @@ -65,7 +65,7 @@ SelfTargetProcessControl::loadDylib(const char *DylibPath) { } Expected> -SelfTargetProcessControl::lookupSymbols(ArrayRef Request) { +SelfExecutorProcessControl::lookupSymbols(ArrayRef Request) { std::vector R; for (auto &Elem : Request) { @@ -96,53 +96,53 @@ SelfTargetProcessControl::lookupSymbols(ArrayRef Request) { } Expected -SelfTargetProcessControl::runAsMain(JITTargetAddress MainFnAddr, - ArrayRef Args) { +SelfExecutorProcessControl::runAsMain(JITTargetAddress MainFnAddr, + ArrayRef Args) { using MainTy = int (*)(int, char *[]); return orc::runAsMain(jitTargetAddressToFunction(MainFnAddr), Args); } Expected -SelfTargetProcessControl::runWrapper(JITTargetAddress WrapperFnAddr, - ArrayRef ArgBuffer) { +SelfExecutorProcessControl::runWrapper(JITTargetAddress WrapperFnAddr, + ArrayRef ArgBuffer) { using WrapperFnTy = shared::detail::CWrapperFunctionResult (*)( const char *Data, uint64_t Size); auto *WrapperFn = jitTargetAddressToFunction(WrapperFnAddr); return WrapperFn(ArgBuffer.data(), ArgBuffer.size()); } -Error SelfTargetProcessControl::disconnect() { return Error::success(); } +Error SelfExecutorProcessControl::disconnect() { return Error::success(); } -void SelfTargetProcessControl::writeUInt8s(ArrayRef Ws, - WriteResultFn OnWriteComplete) { +void SelfExecutorProcessControl::writeUInt8s(ArrayRef Ws, + WriteResultFn OnWriteComplete) { for (auto &W : Ws) *jitTargetAddressToPointer(W.Address) = W.Value; OnWriteComplete(Error::success()); } -void SelfTargetProcessControl::writeUInt16s(ArrayRef Ws, - WriteResultFn OnWriteComplete) { +void SelfExecutorProcessControl::writeUInt16s( + ArrayRef Ws, WriteResultFn OnWriteComplete) { for (auto &W : Ws) *jitTargetAddressToPointer(W.Address) = W.Value; OnWriteComplete(Error::success()); } -void SelfTargetProcessControl::writeUInt32s(ArrayRef Ws, - WriteResultFn OnWriteComplete) { +void SelfExecutorProcessControl::writeUInt32s( + ArrayRef Ws, WriteResultFn OnWriteComplete) { for (auto &W : Ws) *jitTargetAddressToPointer(W.Address) = W.Value; OnWriteComplete(Error::success()); } -void SelfTargetProcessControl::writeUInt64s(ArrayRef Ws, - WriteResultFn OnWriteComplete) { +void SelfExecutorProcessControl::writeUInt64s( + ArrayRef Ws, WriteResultFn OnWriteComplete) { for (auto &W : Ws) *jitTargetAddressToPointer(W.Address) = W.Value; OnWriteComplete(Error::success()); } -void SelfTargetProcessControl::writeBuffers(ArrayRef Ws, - WriteResultFn OnWriteComplete) { +void SelfExecutorProcessControl::writeBuffers( + ArrayRef Ws, WriteResultFn OnWriteComplete) { for (auto &W : Ws) memcpy(jitTargetAddressToPointer(W.Address), W.Buffer.data(), W.Buffer.size()); diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index a79882a04e708..08fd86b7979b1 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -9,12 +9,12 @@ #include "llvm/ExecutionEngine/Orc/LLJIT.h" #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" #include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/MachOPlatform.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" @@ -976,13 +976,13 @@ Error LLJITBuilderState::prepareForConstruction() { JTMB->setRelocationModel(Reloc::PIC_); JTMB->setCodeModel(CodeModel::Small); CreateObjectLinkingLayer = - [TPC = this->TPC]( + [EPC = this->EPC]( ExecutionSession &ES, const Triple &) -> Expected> { std::unique_ptr ObjLinkingLayer; - if (TPC) + if (EPC) ObjLinkingLayer = - std::make_unique(ES, TPC->getMemMgr()); + std::make_unique(ES, EPC->getMemMgr()); else ObjLinkingLayer = std::make_unique( ES, std::make_unique()); diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index 8908b43d71e53..47fb364212897 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -22,12 +22,14 @@ #include "llvm/Config/llvm-config.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/Interpreter.h" -#include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h" #include "llvm/ExecutionEngine/Orc/DebugUtils.h" +#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" +#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" #include "llvm/ExecutionEngine/Orc/LLJIT.h" @@ -35,8 +37,6 @@ #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" -#include "llvm/ExecutionEngine/Orc/TPCDebugObjectRegistrar.h" -#include "llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h" @@ -936,18 +936,18 @@ int runOrcJIT(const char *ProgName) { } } - std::unique_ptr TPC = nullptr; + std::unique_ptr EPC = nullptr; if (JITLinker == JITLinkerKind::JITLink) { - TPC = ExitOnErr(orc::SelfTargetProcessControl::Create( + EPC = ExitOnErr(orc::SelfExecutorProcessControl::Create( std::make_shared())); - Builder.setObjectLinkingLayerCreator([&TPC](orc::ExecutionSession &ES, + Builder.setObjectLinkingLayerCreator([&EPC](orc::ExecutionSession &ES, const Triple &) { - auto L = std::make_unique(ES, TPC->getMemMgr()); + auto L = std::make_unique(ES, EPC->getMemMgr()); L->addPlugin(std::make_unique( - ES, ExitOnErr(orc::TPCEHFrameRegistrar::Create(*TPC)))); + ES, ExitOnErr(orc::EPCEHFrameRegistrar::Create(*EPC)))); L->addPlugin(std::make_unique( - ES, ExitOnErr(orc::createJITLoaderGDBRegistrar(*TPC)))); + ES, ExitOnErr(orc::createJITLoaderGDBRegistrar(*EPC)))); return L; }); } @@ -1069,9 +1069,9 @@ int runOrcJIT(const char *ProgName) { JITEvaluatedSymbol MainSym = ExitOnErr(J->lookup(EntryFunc)); int Result; - if (TPC) { - // TargetProcessControl-based execution with JITLink. - Result = ExitOnErr(TPC->runAsMain(MainSym.getAddress(), InputArgv)); + if (EPC) { + // ExecutorProcessControl-based execution with JITLink. + Result = ExitOnErr(EPC->runAsMain(MainSym.getAddress(), InputArgv)); } else { // Manual in-process execution with RuntimeDyld. using MainFnTy = int(int, char *[]); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 99af0a48b358f..f8298317fee57 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -16,10 +16,10 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h" +#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" +#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" +#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" -#include "llvm/ExecutionEngine/Orc/TPCDebugObjectRegistrar.h" -#include "llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h" -#include "llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" #include "llvm/MC/MCAsmInfo.h" @@ -588,8 +588,8 @@ Error LLVMJITLinkObjectLinkingLayer::add(ResourceTrackerSP RT, return JD.define(std::move(MU), std::move(RT)); } -Expected> -LLVMJITLinkRemoteTargetProcessControl::LaunchExecutor() { +Expected> +LLVMJITLinkRemoteExecutorProcessControl::LaunchExecutor() { #ifndef LLVM_ON_UNIX // FIXME: Add support for Windows. return make_error("-" + OutOfProcessExecutor.ArgStr + @@ -661,13 +661,13 @@ LLVMJITLinkRemoteTargetProcessControl::LaunchExecutor() { }; Error Err = Error::success(); - std::unique_ptr RTPC( - new LLVMJITLinkRemoteTargetProcessControl( + std::unique_ptr REPC( + new LLVMJITLinkRemoteExecutorProcessControl( std::move(SSP), std::move(Channel), std::move(Endpoint), std::move(ReportError), Err)); if (Err) return std::move(Err); - return std::move(RTPC); + return std::move(REPC); #endif } @@ -717,8 +717,8 @@ static Expected connectTCPSocket(std::string Host, std::string PortStr) { } #endif -Expected> -LLVMJITLinkRemoteTargetProcessControl::ConnectToExecutor() { +Expected> +LLVMJITLinkRemoteExecutorProcessControl::ConnectToExecutor() { #ifndef LLVM_ON_UNIX // FIXME: Add TCP support for Windows. return make_error("-" + OutOfProcessExecutorConnect.ArgStr + @@ -756,17 +756,17 @@ LLVMJITLinkRemoteTargetProcessControl::ConnectToExecutor() { }; Error Err = Error::success(); - std::unique_ptr RTPC( - new LLVMJITLinkRemoteTargetProcessControl( + std::unique_ptr REPC( + new LLVMJITLinkRemoteExecutorProcessControl( std::move(SSP), std::move(Channel), std::move(Endpoint), std::move(ReportError), Err)); if (Err) return std::move(Err); - return std::move(RTPC); + return std::move(REPC); #endif } -Error LLVMJITLinkRemoteTargetProcessControl::disconnect() { +Error LLVMJITLinkRemoteExecutorProcessControl::disconnect() { std::promise P; auto F = P.get_future(); auto Err = closeConnection([&](Error Err) -> Error { @@ -797,24 +797,25 @@ Expected> Session::Create(Triple TT) { return PageSize.takeError(); /// If -oop-executor is passed then launch the executor. - std::unique_ptr TPC; + std::unique_ptr EPC; if (OutOfProcessExecutor.getNumOccurrences()) { - if (auto RTPC = LLVMJITLinkRemoteTargetProcessControl::LaunchExecutor()) - TPC = std::move(*RTPC); + if (auto REPC = LLVMJITLinkRemoteExecutorProcessControl::LaunchExecutor()) + EPC = std::move(*REPC); else - return RTPC.takeError(); + return REPC.takeError(); } else if (OutOfProcessExecutorConnect.getNumOccurrences()) { - if (auto RTPC = LLVMJITLinkRemoteTargetProcessControl::ConnectToExecutor()) - TPC = std::move(*RTPC); + if (auto REPC = + LLVMJITLinkRemoteExecutorProcessControl::ConnectToExecutor()) + EPC = std::move(*REPC); else - return RTPC.takeError(); + return REPC.takeError(); } else - TPC = std::make_unique( + EPC = std::make_unique( std::make_shared(), std::move(TT), *PageSize, createMemoryManager()); Error Err = Error::success(); - std::unique_ptr S(new Session(std::move(TPC), Err)); + std::unique_ptr S(new Session(std::move(EPC), Err)); if (Err) return std::move(Err); return std::move(S); @@ -827,8 +828,8 @@ Session::~Session() { // FIXME: Move to createJITDylib if/when we start using Platform support in // llvm-jitlink. -Session::Session(std::unique_ptr TPC, Error &Err) - : TPC(std::move(TPC)), ObjLayer(*this, this->TPC->getMemMgr()) { +Session::Session(std::unique_ptr EPC, Error &Err) + : EPC(std::move(EPC)), ObjLayer(*this, this->EPC->getMemMgr()) { /// Local ObjectLinkingLayer::Plugin class to forward modifyPassConfig to the /// Session. @@ -862,11 +863,11 @@ Session::Session(std::unique_ptr TPC, Error &Err) return; } - if (!NoExec && !this->TPC->getTargetTriple().isOSWindows()) { + if (!NoExec && !this->EPC->getTargetTriple().isOSWindows()) { ObjLayer.addPlugin(std::make_unique( - ES, ExitOnErr(TPCEHFrameRegistrar::Create(*this->TPC)))); + ES, ExitOnErr(EPCEHFrameRegistrar::Create(*this->EPC)))); ObjLayer.addPlugin(std::make_unique( - ES, ExitOnErr(createJITLoaderGDBRegistrar(*this->TPC)))); + ES, ExitOnErr(createJITLoaderGDBRegistrar(*this->EPC)))); } ObjLayer.addPlugin(std::make_unique(*this)); @@ -913,10 +914,10 @@ void Session::modifyPassConfig(const Triple &TT, PassConfiguration &PassConfig) { if (!CheckFiles.empty()) PassConfig.PostFixupPasses.push_back([this](LinkGraph &G) { - if (TPC->getTargetTriple().getObjectFormat() == Triple::ELF) + if (EPC->getTargetTriple().getObjectFormat() == Triple::ELF) return registerELFGraphInfo(*this, G); - if (TPC->getTargetTriple().getObjectFormat() == Triple::MachO) + if (EPC->getTargetTriple().getObjectFormat() == Triple::MachO) return registerMachOGraphInfo(*this, G); return make_error("Unsupported object format for GOT/stub " @@ -1094,15 +1095,15 @@ static Error loadProcessSymbols(Session &S) { return Name != EPName; }; S.MainJD->addGenerator( - ExitOnErr(orc::TPCDynamicLibrarySearchGenerator::GetForTargetProcess( - *S.TPC, std::move(FilterMainEntryPoint)))); + ExitOnErr(orc::EPCDynamicLibrarySearchGenerator::GetForTargetProcess( + *S.EPC, std::move(FilterMainEntryPoint)))); return Error::success(); } static Error loadDylibs(Session &S) { for (const auto &Dylib : Dylibs) { - auto G = orc::TPCDynamicLibrarySearchGenerator::Load(*S.TPC, Dylib.c_str()); + auto G = orc::EPCDynamicLibrarySearchGenerator::Load(*S.EPC, Dylib.c_str()); if (!G) return G.takeError(); S.MainJD->addGenerator(std::move(*G)); @@ -1178,7 +1179,7 @@ static Error loadObjects(Session &S) { if (Magic == file_magic::archive || Magic == file_magic::macho_universal_binary) JD.addGenerator(ExitOnErr(StaticLibraryDefinitionGenerator::Load( - S.ObjLayer, InputFile.c_str(), S.TPC->getTargetTriple()))); + S.ObjLayer, InputFile.c_str(), S.EPC->getTargetTriple()))); else ExitOnErr(S.ObjLayer.add(JD, std::move(ObjBuffer))); } @@ -1226,7 +1227,7 @@ static Error loadObjects(Session &S) { static Error runChecks(Session &S) { - auto TripleName = S.TPC->getTargetTriple().str(); + auto TripleName = S.EPC->getTargetTriple().str(); std::string ErrorStr; const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, ErrorStr); if (!TheTarget) @@ -1292,7 +1293,7 @@ static Error runChecks(Session &S) { RuntimeDyldChecker Checker( IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, GetGOTInfo, - S.TPC->getTargetTriple().isLittleEndian() ? support::little + S.EPC->getTargetTriple().isLittleEndian() ? support::little : support::big, Disassembler.get(), InstPrinter.get(), dbgs()); @@ -1381,11 +1382,11 @@ int main(int argc, char *argv[]) { int Result = 0; { TimeRegion TR(Timers ? &Timers->RunTimer : nullptr); - Result = ExitOnErr(S->TPC->runAsMain(EntryPoint.getAddress(), InputArgv)); + Result = ExitOnErr(S->EPC->runAsMain(EntryPoint.getAddress(), InputArgv)); } ExitOnErr(S->ES.endSession()); - ExitOnErr(S->TPC->disconnect()); + ExitOnErr(S->EPC->disconnect()); return Result; } diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.h b/llvm/tools/llvm-jitlink/llvm-jitlink.h index 041c04d863168..750e543fba1c8 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.h +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.h @@ -17,11 +17,11 @@ #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" -#include "llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h" +#include "llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h" #include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" -#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" #include "llvm/ExecutionEngine/RuntimeDyldChecker.h" #include "llvm/Support/Error.h" #include "llvm/Support/Regex.h" @@ -52,26 +52,26 @@ using LLVMJITLinkChannel = orc::shared::FDRawByteChannel; using LLVMJITLinkRPCEndpoint = orc::shared::MultiThreadedRPCEndpoint; using LLVMJITLinkRemoteMemoryAccess = - orc::OrcRPCTPCMemoryAccess; + orc::OrcRPCEPCMemoryAccess; -class LLVMJITLinkRemoteTargetProcessControl - : public orc::OrcRPCTargetProcessControlBase { +class LLVMJITLinkRemoteExecutorProcessControl + : public orc::OrcRPCExecutorProcessControlBase { public: - using BaseT = orc::OrcRPCTargetProcessControlBase; - static Expected> LaunchExecutor(); + using BaseT = orc::OrcRPCExecutorProcessControlBase; + static Expected> LaunchExecutor(); - static Expected> ConnectToExecutor(); + static Expected> ConnectToExecutor(); Error disconnect() override; private: using LLVMJITLinkRemoteMemoryAccess = - orc::OrcRPCTPCMemoryAccess; + orc::OrcRPCEPCMemoryAccess; - using LLVMJITLinkRemoteMemoryManager = - orc::OrcRPCTPCJITLinkMemoryManager; + using LLVMJITLinkRemoteMemoryManager = orc::OrcRPCEPCJITLinkMemoryManager< + LLVMJITLinkRemoteExecutorProcessControl>; - LLVMJITLinkRemoteTargetProcessControl( + LLVMJITLinkRemoteExecutorProcessControl( std::shared_ptr SSP, std::unique_ptr Channel, std::unique_ptr Endpoint, @@ -89,7 +89,7 @@ class LLVMJITLinkRemoteTargetProcessControl } }); - if (auto Err2 = initializeORCRPCTPCBase()) { + if (auto Err2 = initializeORCRPCEPCBase()) { Err = joinErrors(std::move(Err2), disconnect()); return; } @@ -102,14 +102,14 @@ class LLVMJITLinkRemoteTargetProcessControl std::unique_ptr Channel; std::unique_ptr Endpoint; - std::unique_ptr OwnedMemAccess; + std::unique_ptr OwnedMemAccess; std::unique_ptr OwnedMemMgr; std::atomic Finished{false}; std::thread ListenerThread; }; struct Session { - std::unique_ptr TPC; + std::unique_ptr EPC; orc::ExecutionSession ES; orc::JITDylib *MainJD; LLVMJITLinkObjectLinkingLayer ObjLayer; @@ -156,7 +156,7 @@ struct Session { DenseMap CanonicalWeakDefs; private: - Session(std::unique_ptr TPC, Error &Err); + Session(std::unique_ptr EPC, Error &Err); }; /// Record symbols, GOT entries, stubs, and sections for ELF file. From b1fd009aab4bfe4f16bd78b7ac779c3f665ae060 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 30 Jun 2021 14:42:41 -0700 Subject: [PATCH 374/619] [scudo] GWP_ASAN runs on untagged pointers It's already covered by multiple tests, but to trigger this path we need MTE+GWP which disabled. Reviewed By: hctim, pcc Differential Revision: https://reviews.llvm.org/D105232 --- compiler-rt/lib/scudo/standalone/combined.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 079edab1875b7..e8bb8bf207be7 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -569,9 +569,6 @@ class Allocator { reportAllocationSizeTooBig(NewSize, 0, MaxAllowedMallocSize); } - void *OldTaggedPtr = OldPtr; - OldPtr = getHeaderTaggedPointer(OldPtr); - // The following cases are handled by the C wrappers. DCHECK_NE(OldPtr, nullptr); DCHECK_NE(NewSize, 0); @@ -591,6 +588,9 @@ class Allocator { } #endif // GWP_ASAN_HOOKS + void *OldTaggedPtr = OldPtr; + OldPtr = getHeaderTaggedPointer(OldPtr); + if (UNLIKELY(!isAligned(reinterpret_cast(OldPtr), MinAlignment))) reportMisalignedPointer(AllocatorAction::Reallocating, OldPtr); From 00f474e9b1c5e3b1504fb112f7f95a7c85a4145e Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 30 Jun 2021 14:46:03 -0700 Subject: [PATCH 375/619] [GWP_ASAN] Enable ARM tests Reviewed By: hctim Differential Revision: https://reviews.llvm.org/D105233 --- compiler-rt/cmake/config-ix.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 39e0a4b662c7b..9b27631f4af9b 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -318,7 +318,7 @@ else() set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64}) endif() -set(ALL_GWP_ASAN_SUPPORTED_ARCH ${X86} ${X86_64}) +set(ALL_GWP_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}) if(APPLE) set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64}) else() From f16029e3f7649259dfa4224bc922b6a42b868dbf Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 1 Jul 2021 03:45:41 +0000 Subject: [PATCH 376/619] [gn build] Port 662c55442f17 --- .../gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn index 86750a86c146d..41226d623383f 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn @@ -18,7 +18,12 @@ static_library("Orc") { "Core.cpp", "DebugObjectManagerPlugin.cpp", "DebugUtils.cpp", + "EPCDebugObjectRegistrar.cpp", + "EPCDynamicLibrarySearchGenerator.cpp", + "EPCEHFrameRegistrar.cpp", + "EPCIndirectionUtils.cpp", "ExecutionUtils.cpp", + "ExecutorProcessControl.cpp", "IRCompileLayer.cpp", "IRTransformLayer.cpp", "IndirectionUtils.cpp", @@ -35,11 +40,6 @@ static_library("Orc") { "RTDyldObjectLinkingLayer.cpp", "SpeculateAnalyses.cpp", "Speculation.cpp", - "TPCDebugObjectRegistrar.cpp", - "TPCDynamicLibrarySearchGenerator.cpp", - "TPCEHFrameRegistrar.cpp", - "TPCIndirectionUtils.cpp", - "TargetProcessControl.cpp", "ThreadSafeModule.cpp", ] } From ae7c3959e0e9a9f6acb854f2eb9afd32fa930168 Mon Sep 17 00:00:00 2001 From: theidexisted Date: Wed, 30 Jun 2021 20:48:23 -0700 Subject: [PATCH 377/619] [Compiler-rt] Add delete for noncopyable Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D105077 --- .../lib/sanitizer_common/sanitizer_mutex.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h index 40a65914299ea..742cd6562a226 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h @@ -66,8 +66,8 @@ class SpinMutex : public StaticSpinMutex { } private: - SpinMutex(const SpinMutex&); - void operator=(const SpinMutex&); + SpinMutex(const SpinMutex &) = delete; + void operator=(const SpinMutex &) = delete; }; class BlockingMutex { @@ -171,8 +171,8 @@ class RWMutex { } } - RWMutex(const RWMutex&); - void operator = (const RWMutex&); + RWMutex(const RWMutex &) = delete; + void operator=(const RWMutex &) = delete; }; template @@ -190,8 +190,8 @@ class GenericScopedLock { private: MutexType *mu_; - GenericScopedLock(const GenericScopedLock&); - void operator=(const GenericScopedLock&); + GenericScopedLock(const GenericScopedLock &) = delete; + void operator=(const GenericScopedLock &) = delete; }; template @@ -209,8 +209,8 @@ class GenericScopedReadLock { private: MutexType *mu_; - GenericScopedReadLock(const GenericScopedReadLock&); - void operator=(const GenericScopedReadLock&); + GenericScopedReadLock(const GenericScopedReadLock &) = delete; + void operator=(const GenericScopedReadLock &) = delete; }; typedef GenericScopedLock SpinMutexLock; From 28b01c59c93d10ed3a775dd13ff827048b59cda8 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Wed, 30 Jun 2021 22:44:27 -0500 Subject: [PATCH 378/619] [hexagon] Add {hvx,}hexagon_{protos,circ_brev...} Add definitions for Hexagon, Hexagon circular/bit-reverse and HVX intrinsics. --- clang/lib/Headers/CMakeLists.txt | 4 + .../Headers/hexagon_circ_brev_intrinsics.h | 298 + clang/lib/Headers/hexagon_protos.h | 8450 +++++++++++++++++ clang/lib/Headers/hexagon_types.h | 2653 ++++++ clang/lib/Headers/hvx_hexagon_protos.h | 4392 +++++++++ clang/test/Headers/hexagon-audio-headers.c | 36 + clang/test/Headers/hexagon-headers.c | 28 + clang/test/Headers/hexagon-hvx-headers.c | 37 + 8 files changed, 15898 insertions(+) create mode 100644 clang/lib/Headers/hexagon_circ_brev_intrinsics.h create mode 100644 clang/lib/Headers/hexagon_protos.h create mode 100644 clang/lib/Headers/hexagon_types.h create mode 100644 clang/lib/Headers/hvx_hexagon_protos.h create mode 100644 clang/test/Headers/hexagon-audio-headers.c create mode 100644 clang/test/Headers/hexagon-headers.c create mode 100644 clang/test/Headers/hexagon-hvx-headers.c diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 6d80d66fa11d7..382d40b2c0a99 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -66,6 +66,10 @@ set(files fmaintrin.h fxsrintrin.h gfniintrin.h + hexagon_circ_brev_intrinsics.h + hexagon_protos.h + hexagon_types.h + hvx_hexagon_protos.h hresetintrin.h htmintrin.h htmxlintrin.h diff --git a/clang/lib/Headers/hexagon_circ_brev_intrinsics.h b/clang/lib/Headers/hexagon_circ_brev_intrinsics.h new file mode 100644 index 0000000000000..c53786d3c37bc --- /dev/null +++ b/clang/lib/Headers/hexagon_circ_brev_intrinsics.h @@ -0,0 +1,298 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _HEXAGON_CIRC_BREV_INTRINSICS_H_ +#define _HEXAGON_CIRC_BREV_INTRINSICS_H_ 1 + +#include +#include + +/* Circular Load */ +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_load_update_D(Word64 dst, Word64 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_load_update_D(dest,ptr,incr,bufsize,K) \ + { ptr = (int64_t *) HEXAGON_circ_ldd (ptr, &(dest), ((((K)+1)<<24)|((bufsize)<<3)), ((incr)*8)); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_load_update_W(Word32 dst, Word32 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_load_update_W(dest,ptr,incr,bufsize,K) \ + { ptr = (int *) HEXAGON_circ_ldw (ptr, &(dest), (((K)<<24)|((bufsize)<<2)), ((incr)*4)); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_load_update_H(Word16 dst, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_load_update_H(dest,ptr,incr,bufsize,K) \ + { ptr = (int16_t *) HEXAGON_circ_ldh (ptr, &(dest), ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_load_update_UH( UWord16 dst, UWord16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_load_update_UH(dest,ptr,incr,bufsize,K) \ + { ptr = (uint16_t *) HEXAGON_circ_lduh (ptr, &(dest), ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_load_update_B(Word8 dst, Word8 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_load_update_B(dest,ptr,incr,bufsize,K) \ + { ptr = (int8_t *) HEXAGON_circ_ldb (ptr, &(dest), ((((K)-2)<<24)|(bufsize)), incr); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_load_update_UB(UWord8 dst, UWord8 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_load_update_UB(dest,ptr,incr,bufsize,K) \ + { ptr = (uint8_t *) HEXAGON_circ_ldub (ptr, &(dest), ((((K)-2)<<24)|(bufsize)), incr); } + +/* Circular Store */ +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_store_update_D(Word64 *src, Word64 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_store_update_D(src,ptr,incr,bufsize,K) \ + { ptr = (int64_t *) HEXAGON_circ_std (ptr, src, ((((K)+1)<<24)|((bufsize)<<3)), ((incr)*8)); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_store_update_W(Word32 *src, Word32 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_store_update_W(src,ptr,incr,bufsize,K) \ + { ptr = (int *) HEXAGON_circ_stw (ptr, src, (((K)<<24)|((bufsize)<<2)), ((incr)*4)); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_store_update_HL(Word16 *src, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_store_update_HL(src,ptr,incr,bufsize,K) \ + { ptr = (int16_t *) HEXAGON_circ_sth (ptr, src, ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_store_update_HH(Word16 *src, Word16 *ptr, UWord32 incr, UWord32 bufsize, UWord32 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_store_update_HH(src,ptr,incr,bufsize,K) \ + { ptr = (int16_t *) HEXAGON_circ_sthhi (ptr, src, ((((K)-1)<<24)|((bufsize)<<1)), ((incr)*2)); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_circ_store_update_B(Word8 *src, Word8 *ptr, UWord32 I4, UWord32 bufsize, UWord64 K) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_circ_store_update_B(src,ptr,incr,bufsize,K) \ + { ptr = (int8_t *) HEXAGON_circ_stb (ptr, src, ((((K)-2)<<24)|(bufsize)), incr); } + + +/* Bit Reverse Load */ +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_load_update_D(Word64 dst, Word64 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_load_update_D(dest,ptr,log2bufsize) \ + { ptr = (int64_t *) HEXAGON_brev_ldd (ptr, &(dest), (1<<(16-((log2bufsize) + 3)))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_load_update_W(Word32 dst, Word32 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_load_update_W(dest,ptr,log2bufsize) \ + { ptr = (int *) HEXAGON_brev_ldw (ptr, &(dest), (1<<(16-((log2bufsize) + 2)))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_load_update_H(Word16 dst, Word16 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_load_update_H(dest,ptr,log2bufsize) \ + { ptr = (int16_t *) HEXAGON_brev_ldh (ptr, &(dest), (1<<(16-((log2bufsize) + 1)))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_load_update_UH(UWord16 dst, UWord16 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_load_update_UH(dest,ptr,log2bufsize) \ + { ptr = (uint16_t *) HEXAGON_brev_lduh (ptr, &(dest), (1<<(16-((log2bufsize) + 1)))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_load_update_B(Word8 dst, Word8 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_load_update_B(dest,ptr,log2bufsize) \ + { ptr = (int8_t *) HEXAGON_brev_ldb (ptr, &(dest), (1<<(16-((log2bufsize))))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_load_update_UB(UWord8 dst, UWord8 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_load_update_UB(dest,ptr,log2bufsize) \ + { ptr = (uint8_t *) HEXAGON_brev_ldub (ptr, &(dest), (1<<(16-((log2bufsize))))); } + +/* Bit Reverse Store */ + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_store_update_D(Word64 *src, Word64 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_store_update_D(src,ptr,log2bufsize) \ + { ptr = (int64_t *) HEXAGON_brev_std (ptr, src, (1<<(16-((log2bufsize) + 3)))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_store_update_W(Word32 *src, Word32 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_store_update_W(src,ptr,log2bufsize) \ + { ptr = (int *) HEXAGON_brev_stw (ptr, src, (1<<(16-((log2bufsize) + 2)))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_store_update_HL(Word16 *src, Word16 *ptr, Word32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_store_update_HL(src,ptr,log2bufsize) \ + { ptr = (int16_t *) HEXAGON_brev_sth (ptr, src, (1<<(16-((log2bufsize) + 1)))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_store_update_HH(Word16 *src, Word16 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_store_update_HH(src,ptr,log2bufsize) \ + { ptr = (int16_t *) HEXAGON_brev_sthhi (ptr, src, (1<<(16-((log2bufsize) + 1)))); } + +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: void Q6_bitrev_store_update_B(Word8 *src, Word8 *ptr, UWord32 Iu4) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#define Q6_bitrev_store_update_B(src,ptr,log2bufsize) \ + { ptr = (int8_t *) HEXAGON_brev_stb (ptr, src, (1<<(16-((log2bufsize))))); } + + +#define HEXAGON_circ_ldd __builtin_circ_ldd +#define HEXAGON_circ_ldw __builtin_circ_ldw +#define HEXAGON_circ_ldh __builtin_circ_ldh +#define HEXAGON_circ_lduh __builtin_circ_lduh +#define HEXAGON_circ_ldb __builtin_circ_ldb +#define HEXAGON_circ_ldub __builtin_circ_ldub + + +#define HEXAGON_circ_std __builtin_circ_std +#define HEXAGON_circ_stw __builtin_circ_stw +#define HEXAGON_circ_sth __builtin_circ_sth +#define HEXAGON_circ_sthhi __builtin_circ_sthhi +#define HEXAGON_circ_stb __builtin_circ_stb + + +#define HEXAGON_brev_ldd __builtin_brev_ldd +#define HEXAGON_brev_ldw __builtin_brev_ldw +#define HEXAGON_brev_ldh __builtin_brev_ldh +#define HEXAGON_brev_lduh __builtin_brev_lduh +#define HEXAGON_brev_ldb __builtin_brev_ldb +#define HEXAGON_brev_ldub __builtin_brev_ldub + +#define HEXAGON_brev_std __builtin_brev_std +#define HEXAGON_brev_stw __builtin_brev_stw +#define HEXAGON_brev_sth __builtin_brev_sth +#define HEXAGON_brev_sthhi __builtin_brev_sthhi +#define HEXAGON_brev_stb __builtin_brev_stb + +#ifdef __HVX__ +/* ========================================================================== + Assembly Syntax: if (Qt) vmem(Rt+#0) = Vs + C Intrinsic Prototype: void Q6_vmaskedstoreq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) + Instruction Type: COPROC_VMEM + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vmaskedstoreq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstoreq) + +/* ========================================================================== + Assembly Syntax: if (!Qt) vmem(Rt+#0) = Vs + C Intrinsic Prototype: void Q6_vmaskedstorenq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) + Instruction Type: COPROC_VMEM + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vmaskedstorenq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorenq) + +/* ========================================================================== + Assembly Syntax: if (Qt) vmem(Rt+#0):nt = Vs + C Intrinsic Prototype: void Q6_vmaskedstorentq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) + Instruction Type: COPROC_VMEM + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vmaskedstorentq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorentq) + +/* ========================================================================== + Assembly Syntax: if (!Qt) vmem(Rt+#0):nt = Vs + C Intrinsic Prototype: void Q6_vmaskedstorentnq_QAV(HVX_VectorPred Qt, HVX_VectorAddress A, HVX_Vector Vs) + Instruction Type: COPROC_VMEM + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vmaskedstorentnq_QAV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaskedstorentnq) + +#endif + + +#endif /* #ifndef _HEXAGON_CIRC_BREV_INTRINSICS_H_ */ + +#ifdef __NOT_DEFINED__ +/*** comment block template ***/ +/* ========================================================================== + Assembly Syntax: Return=instruction() + C Intrinsic Prototype: ReturnType Intrinsic(ParamType Rs, ParamType Rt) + Instruction Type: InstructionType + Execution Slots: SLOT0123 + ========================================================================== */ +#endif /*** __NOT_DEFINED__ ***/ diff --git a/clang/lib/Headers/hexagon_protos.h b/clang/lib/Headers/hexagon_protos.h new file mode 100644 index 0000000000000..cdffd93bb8593 --- /dev/null +++ b/clang/lib/Headers/hexagon_protos.h @@ -0,0 +1,8450 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Automatically generated file, do not edit! +//===----------------------------------------------------------------------===// + + + +#ifndef __HEXAGON_PROTOS_H_ +#define __HEXAGON_PROTOS_H_ 1 + +/* ========================================================================== + Assembly Syntax: Rd32=abs(Rs32) + C Intrinsic Prototype: Word32 Q6_R_abs_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_abs_R __builtin_HEXAGON_A2_abs + +/* ========================================================================== + Assembly Syntax: Rdd32=abs(Rss32) + C Intrinsic Prototype: Word64 Q6_P_abs_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_abs_P __builtin_HEXAGON_A2_absp + +/* ========================================================================== + Assembly Syntax: Rd32=abs(Rs32):sat + C Intrinsic Prototype: Word32 Q6_R_abs_R_sat(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_abs_R_sat __builtin_HEXAGON_A2_abssat + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_add_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_add_RR __builtin_HEXAGON_A2_add + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.h,Rs32.h):<<16 + C Intrinsic Prototype: Word32 Q6_R_add_RhRh_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RhRh_s16 __builtin_HEXAGON_A2_addh_h16_hh + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.h,Rs32.l):<<16 + C Intrinsic Prototype: Word32 Q6_R_add_RhRl_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RhRl_s16 __builtin_HEXAGON_A2_addh_h16_hl + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):<<16 + C Intrinsic Prototype: Word32 Q6_R_add_RlRh_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RlRh_s16 __builtin_HEXAGON_A2_addh_h16_lh + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):<<16 + C Intrinsic Prototype: Word32 Q6_R_add_RlRl_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RlRl_s16 __builtin_HEXAGON_A2_addh_h16_ll + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.h,Rs32.h):sat:<<16 + C Intrinsic Prototype: Word32 Q6_R_add_RhRh_sat_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RhRh_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_hh + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.h,Rs32.l):sat:<<16 + C Intrinsic Prototype: Word32 Q6_R_add_RhRl_sat_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RhRl_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_hl + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):sat:<<16 + C Intrinsic Prototype: Word32 Q6_R_add_RlRh_sat_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RlRh_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_lh + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):sat:<<16 + C Intrinsic Prototype: Word32 Q6_R_add_RlRl_sat_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RlRl_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_ll + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.l,Rs32.h) + C Intrinsic Prototype: Word32 Q6_R_add_RlRh(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RlRh __builtin_HEXAGON_A2_addh_l16_hl + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.l,Rs32.l) + C Intrinsic Prototype: Word32 Q6_R_add_RlRl(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RlRl __builtin_HEXAGON_A2_addh_l16_ll + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.l,Rs32.h):sat + C Intrinsic Prototype: Word32 Q6_R_add_RlRh_sat(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RlRh_sat __builtin_HEXAGON_A2_addh_l16_sat_hl + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rt32.l,Rs32.l):sat + C Intrinsic Prototype: Word32 Q6_R_add_RlRl_sat(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_RlRl_sat __builtin_HEXAGON_A2_addh_l16_sat_ll + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rs32,#s16) + C Intrinsic Prototype: Word32 Q6_R_add_RI(Word32 Rs, Word32 Is16) + Instruction Type: ALU32_ADDI + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_add_RI __builtin_HEXAGON_A2_addi + +/* ========================================================================== + Assembly Syntax: Rdd32=add(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_add_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_add_PP __builtin_HEXAGON_A2_addp + +/* ========================================================================== + Assembly Syntax: Rdd32=add(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_add_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_add_PP_sat __builtin_HEXAGON_A2_addpsat + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rs32,Rt32):sat + C Intrinsic Prototype: Word32 Q6_R_add_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_add_RR_sat __builtin_HEXAGON_A2_addsat + +/* ========================================================================== + Assembly Syntax: Rdd32=add(Rs32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_add_RP(Word32 Rs, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_add_RP __builtin_HEXAGON_A2_addsp + +/* ========================================================================== + Assembly Syntax: Rd32=and(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_and_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_and_RR __builtin_HEXAGON_A2_and + +/* ========================================================================== + Assembly Syntax: Rd32=and(Rs32,#s10) + C Intrinsic Prototype: Word32 Q6_R_and_RI(Word32 Rs, Word32 Is10) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_and_RI __builtin_HEXAGON_A2_andir + +/* ========================================================================== + Assembly Syntax: Rdd32=and(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_and_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_and_PP __builtin_HEXAGON_A2_andp + +/* ========================================================================== + Assembly Syntax: Rd32=aslh(Rs32) + C Intrinsic Prototype: Word32 Q6_R_aslh_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_aslh_R __builtin_HEXAGON_A2_aslh + +/* ========================================================================== + Assembly Syntax: Rd32=asrh(Rs32) + C Intrinsic Prototype: Word32 Q6_R_asrh_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_asrh_R __builtin_HEXAGON_A2_asrh + +/* ========================================================================== + Assembly Syntax: Rd32=combine(Rt32.h,Rs32.h) + C Intrinsic Prototype: Word32 Q6_R_combine_RhRh(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_combine_RhRh __builtin_HEXAGON_A2_combine_hh + +/* ========================================================================== + Assembly Syntax: Rd32=combine(Rt32.h,Rs32.l) + C Intrinsic Prototype: Word32 Q6_R_combine_RhRl(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_combine_RhRl __builtin_HEXAGON_A2_combine_hl + +/* ========================================================================== + Assembly Syntax: Rd32=combine(Rt32.l,Rs32.h) + C Intrinsic Prototype: Word32 Q6_R_combine_RlRh(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_combine_RlRh __builtin_HEXAGON_A2_combine_lh + +/* ========================================================================== + Assembly Syntax: Rd32=combine(Rt32.l,Rs32.l) + C Intrinsic Prototype: Word32 Q6_R_combine_RlRl(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_combine_RlRl __builtin_HEXAGON_A2_combine_ll + +/* ========================================================================== + Assembly Syntax: Rdd32=combine(#s8,#S8) + C Intrinsic Prototype: Word64 Q6_P_combine_II(Word32 Is8, Word32 IS8) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_combine_II __builtin_HEXAGON_A2_combineii + +/* ========================================================================== + Assembly Syntax: Rdd32=combine(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_combine_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_combine_RR __builtin_HEXAGON_A2_combinew + +/* ========================================================================== + Assembly Syntax: Rd32=max(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_max_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_max_RR __builtin_HEXAGON_A2_max + +/* ========================================================================== + Assembly Syntax: Rdd32=max(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_max_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_max_PP __builtin_HEXAGON_A2_maxp + +/* ========================================================================== + Assembly Syntax: Rd32=maxu(Rs32,Rt32) + C Intrinsic Prototype: UWord32 Q6_R_maxu_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_maxu_RR __builtin_HEXAGON_A2_maxu + +/* ========================================================================== + Assembly Syntax: Rdd32=maxu(Rss32,Rtt32) + C Intrinsic Prototype: UWord64 Q6_P_maxu_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_maxu_PP __builtin_HEXAGON_A2_maxup + +/* ========================================================================== + Assembly Syntax: Rd32=min(Rt32,Rs32) + C Intrinsic Prototype: Word32 Q6_R_min_RR(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_min_RR __builtin_HEXAGON_A2_min + +/* ========================================================================== + Assembly Syntax: Rdd32=min(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_min_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_min_PP __builtin_HEXAGON_A2_minp + +/* ========================================================================== + Assembly Syntax: Rd32=minu(Rt32,Rs32) + C Intrinsic Prototype: UWord32 Q6_R_minu_RR(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_minu_RR __builtin_HEXAGON_A2_minu + +/* ========================================================================== + Assembly Syntax: Rdd32=minu(Rtt32,Rss32) + C Intrinsic Prototype: UWord64 Q6_P_minu_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_minu_PP __builtin_HEXAGON_A2_minup + +/* ========================================================================== + Assembly Syntax: Rd32=neg(Rs32) + C Intrinsic Prototype: Word32 Q6_R_neg_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_neg_R __builtin_HEXAGON_A2_neg + +/* ========================================================================== + Assembly Syntax: Rdd32=neg(Rss32) + C Intrinsic Prototype: Word64 Q6_P_neg_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_neg_P __builtin_HEXAGON_A2_negp + +/* ========================================================================== + Assembly Syntax: Rd32=neg(Rs32):sat + C Intrinsic Prototype: Word32 Q6_R_neg_R_sat(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_neg_R_sat __builtin_HEXAGON_A2_negsat + +/* ========================================================================== + Assembly Syntax: Rd32=not(Rs32) + C Intrinsic Prototype: Word32 Q6_R_not_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_not_R __builtin_HEXAGON_A2_not + +/* ========================================================================== + Assembly Syntax: Rdd32=not(Rss32) + C Intrinsic Prototype: Word64 Q6_P_not_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_not_P __builtin_HEXAGON_A2_notp + +/* ========================================================================== + Assembly Syntax: Rd32=or(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_or_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_or_RR __builtin_HEXAGON_A2_or + +/* ========================================================================== + Assembly Syntax: Rd32=or(Rs32,#s10) + C Intrinsic Prototype: Word32 Q6_R_or_RI(Word32 Rs, Word32 Is10) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_or_RI __builtin_HEXAGON_A2_orir + +/* ========================================================================== + Assembly Syntax: Rdd32=or(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_or_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_or_PP __builtin_HEXAGON_A2_orp + +/* ========================================================================== + Assembly Syntax: Rd32=round(Rss32):sat + C Intrinsic Prototype: Word32 Q6_R_round_P_sat(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_round_P_sat __builtin_HEXAGON_A2_roundsat + +/* ========================================================================== + Assembly Syntax: Rd32=sat(Rss32) + C Intrinsic Prototype: Word32 Q6_R_sat_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sat_P __builtin_HEXAGON_A2_sat + +/* ========================================================================== + Assembly Syntax: Rd32=satb(Rs32) + C Intrinsic Prototype: Word32 Q6_R_satb_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_satb_R __builtin_HEXAGON_A2_satb + +/* ========================================================================== + Assembly Syntax: Rd32=sath(Rs32) + C Intrinsic Prototype: Word32 Q6_R_sath_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sath_R __builtin_HEXAGON_A2_sath + +/* ========================================================================== + Assembly Syntax: Rd32=satub(Rs32) + C Intrinsic Prototype: Word32 Q6_R_satub_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_satub_R __builtin_HEXAGON_A2_satub + +/* ========================================================================== + Assembly Syntax: Rd32=satuh(Rs32) + C Intrinsic Prototype: Word32 Q6_R_satuh_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_satuh_R __builtin_HEXAGON_A2_satuh + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32,Rs32) + C Intrinsic Prototype: Word32 Q6_R_sub_RR(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_sub_RR __builtin_HEXAGON_A2_sub + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.h,Rs32.h):<<16 + C Intrinsic Prototype: Word32 Q6_R_sub_RhRh_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RhRh_s16 __builtin_HEXAGON_A2_subh_h16_hh + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.h,Rs32.l):<<16 + C Intrinsic Prototype: Word32 Q6_R_sub_RhRl_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RhRl_s16 __builtin_HEXAGON_A2_subh_h16_hl + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):<<16 + C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RlRh_s16 __builtin_HEXAGON_A2_subh_h16_lh + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):<<16 + C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RlRl_s16 __builtin_HEXAGON_A2_subh_h16_ll + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.h,Rs32.h):sat:<<16 + C Intrinsic Prototype: Word32 Q6_R_sub_RhRh_sat_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RhRh_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_hh + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.h,Rs32.l):sat:<<16 + C Intrinsic Prototype: Word32 Q6_R_sub_RhRl_sat_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RhRl_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_hl + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):sat:<<16 + C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_sat_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RlRh_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_lh + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):sat:<<16 + C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_sat_s16(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RlRl_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_ll + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h) + C Intrinsic Prototype: Word32 Q6_R_sub_RlRh(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RlRh __builtin_HEXAGON_A2_subh_l16_hl + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l) + C Intrinsic Prototype: Word32 Q6_R_sub_RlRl(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RlRl __builtin_HEXAGON_A2_subh_l16_ll + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.l,Rs32.h):sat + C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_sat(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RlRh_sat __builtin_HEXAGON_A2_subh_l16_sat_hl + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32.l,Rs32.l):sat + C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_sat(Word32 Rt, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_RlRl_sat __builtin_HEXAGON_A2_subh_l16_sat_ll + +/* ========================================================================== + Assembly Syntax: Rdd32=sub(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_sub_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_sub_PP __builtin_HEXAGON_A2_subp + +/* ========================================================================== + Assembly Syntax: Rd32=sub(#s10,Rs32) + C Intrinsic Prototype: Word32 Q6_R_sub_IR(Word32 Is10, Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_sub_IR __builtin_HEXAGON_A2_subri + +/* ========================================================================== + Assembly Syntax: Rd32=sub(Rt32,Rs32):sat + C Intrinsic Prototype: Word32 Q6_R_sub_RR_sat(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_sub_RR_sat __builtin_HEXAGON_A2_subsat + +/* ========================================================================== + Assembly Syntax: Rd32=vaddh(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_vaddh_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vaddh_RR __builtin_HEXAGON_A2_svaddh + +/* ========================================================================== + Assembly Syntax: Rd32=vaddh(Rs32,Rt32):sat + C Intrinsic Prototype: Word32 Q6_R_vaddh_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vaddh_RR_sat __builtin_HEXAGON_A2_svaddhs + +/* ========================================================================== + Assembly Syntax: Rd32=vadduh(Rs32,Rt32):sat + C Intrinsic Prototype: Word32 Q6_R_vadduh_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vadduh_RR_sat __builtin_HEXAGON_A2_svadduhs + +/* ========================================================================== + Assembly Syntax: Rd32=vavgh(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_vavgh_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vavgh_RR __builtin_HEXAGON_A2_svavgh + +/* ========================================================================== + Assembly Syntax: Rd32=vavgh(Rs32,Rt32):rnd + C Intrinsic Prototype: Word32 Q6_R_vavgh_RR_rnd(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vavgh_RR_rnd __builtin_HEXAGON_A2_svavghs + +/* ========================================================================== + Assembly Syntax: Rd32=vnavgh(Rt32,Rs32) + C Intrinsic Prototype: Word32 Q6_R_vnavgh_RR(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vnavgh_RR __builtin_HEXAGON_A2_svnavgh + +/* ========================================================================== + Assembly Syntax: Rd32=vsubh(Rt32,Rs32) + C Intrinsic Prototype: Word32 Q6_R_vsubh_RR(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vsubh_RR __builtin_HEXAGON_A2_svsubh + +/* ========================================================================== + Assembly Syntax: Rd32=vsubh(Rt32,Rs32):sat + C Intrinsic Prototype: Word32 Q6_R_vsubh_RR_sat(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vsubh_RR_sat __builtin_HEXAGON_A2_svsubhs + +/* ========================================================================== + Assembly Syntax: Rd32=vsubuh(Rt32,Rs32):sat + C Intrinsic Prototype: Word32 Q6_R_vsubuh_RR_sat(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vsubuh_RR_sat __builtin_HEXAGON_A2_svsubuhs + +/* ========================================================================== + Assembly Syntax: Rd32=swiz(Rs32) + C Intrinsic Prototype: Word32 Q6_R_swiz_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_swiz_R __builtin_HEXAGON_A2_swiz + +/* ========================================================================== + Assembly Syntax: Rd32=sxtb(Rs32) + C Intrinsic Prototype: Word32 Q6_R_sxtb_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_sxtb_R __builtin_HEXAGON_A2_sxtb + +/* ========================================================================== + Assembly Syntax: Rd32=sxth(Rs32) + C Intrinsic Prototype: Word32 Q6_R_sxth_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_sxth_R __builtin_HEXAGON_A2_sxth + +/* ========================================================================== + Assembly Syntax: Rdd32=sxtw(Rs32) + C Intrinsic Prototype: Word64 Q6_P_sxtw_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_sxtw_R __builtin_HEXAGON_A2_sxtw + +/* ========================================================================== + Assembly Syntax: Rd32=Rs32 + C Intrinsic Prototype: Word32 Q6_R_equals_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_equals_R __builtin_HEXAGON_A2_tfr + +/* ========================================================================== + Assembly Syntax: Rx32.h=#u16 + C Intrinsic Prototype: Word32 Q6_Rh_equals_I(Word32 Rx, Word32 Iu16) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Rh_equals_I __builtin_HEXAGON_A2_tfrih + +/* ========================================================================== + Assembly Syntax: Rx32.l=#u16 + C Intrinsic Prototype: Word32 Q6_Rl_equals_I(Word32 Rx, Word32 Iu16) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Rl_equals_I __builtin_HEXAGON_A2_tfril + +/* ========================================================================== + Assembly Syntax: Rdd32=Rss32 + C Intrinsic Prototype: Word64 Q6_P_equals_P(Word64 Rss) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_equals_P __builtin_HEXAGON_A2_tfrp + +/* ========================================================================== + Assembly Syntax: Rdd32=#s8 + C Intrinsic Prototype: Word64 Q6_P_equals_I(Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_equals_I __builtin_HEXAGON_A2_tfrpi + +/* ========================================================================== + Assembly Syntax: Rd32=#s16 + C Intrinsic Prototype: Word32 Q6_R_equals_I(Word32 Is16) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_equals_I __builtin_HEXAGON_A2_tfrsi + +/* ========================================================================== + Assembly Syntax: Rdd32=vabsh(Rss32) + C Intrinsic Prototype: Word64 Q6_P_vabsh_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vabsh_P __builtin_HEXAGON_A2_vabsh + +/* ========================================================================== + Assembly Syntax: Rdd32=vabsh(Rss32):sat + C Intrinsic Prototype: Word64 Q6_P_vabsh_P_sat(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vabsh_P_sat __builtin_HEXAGON_A2_vabshsat + +/* ========================================================================== + Assembly Syntax: Rdd32=vabsw(Rss32) + C Intrinsic Prototype: Word64 Q6_P_vabsw_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vabsw_P __builtin_HEXAGON_A2_vabsw + +/* ========================================================================== + Assembly Syntax: Rdd32=vabsw(Rss32):sat + C Intrinsic Prototype: Word64 Q6_P_vabsw_P_sat(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vabsw_P_sat __builtin_HEXAGON_A2_vabswsat + +/* ========================================================================== + Assembly Syntax: Rdd32=vaddb(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vaddb_PP(Word64 Rss, Word64 Rtt) + Instruction Type: MAPPING + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_vaddb_PP __builtin_HEXAGON_A2_vaddb_map + +/* ========================================================================== + Assembly Syntax: Rdd32=vaddh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vaddh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaddh_PP __builtin_HEXAGON_A2_vaddh + +/* ========================================================================== + Assembly Syntax: Rdd32=vaddh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vaddh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaddh_PP_sat __builtin_HEXAGON_A2_vaddhs + +/* ========================================================================== + Assembly Syntax: Rdd32=vaddub(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vaddub_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaddub_PP __builtin_HEXAGON_A2_vaddub + +/* ========================================================================== + Assembly Syntax: Rdd32=vaddub(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vaddub_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaddub_PP_sat __builtin_HEXAGON_A2_vaddubs + +/* ========================================================================== + Assembly Syntax: Rdd32=vadduh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vadduh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vadduh_PP_sat __builtin_HEXAGON_A2_vadduhs + +/* ========================================================================== + Assembly Syntax: Rdd32=vaddw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vaddw_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaddw_PP __builtin_HEXAGON_A2_vaddw + +/* ========================================================================== + Assembly Syntax: Rdd32=vaddw(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vaddw_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaddw_PP_sat __builtin_HEXAGON_A2_vaddws + +/* ========================================================================== + Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vavgh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavgh_PP __builtin_HEXAGON_A2_vavgh + +/* ========================================================================== + Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32):crnd + C Intrinsic Prototype: Word64 Q6_P_vavgh_PP_crnd(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavgh_PP_crnd __builtin_HEXAGON_A2_vavghcr + +/* ========================================================================== + Assembly Syntax: Rdd32=vavgh(Rss32,Rtt32):rnd + C Intrinsic Prototype: Word64 Q6_P_vavgh_PP_rnd(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavgh_PP_rnd __builtin_HEXAGON_A2_vavghr + +/* ========================================================================== + Assembly Syntax: Rdd32=vavgub(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vavgub_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavgub_PP __builtin_HEXAGON_A2_vavgub + +/* ========================================================================== + Assembly Syntax: Rdd32=vavgub(Rss32,Rtt32):rnd + C Intrinsic Prototype: Word64 Q6_P_vavgub_PP_rnd(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavgub_PP_rnd __builtin_HEXAGON_A2_vavgubr + +/* ========================================================================== + Assembly Syntax: Rdd32=vavguh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vavguh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavguh_PP __builtin_HEXAGON_A2_vavguh + +/* ========================================================================== + Assembly Syntax: Rdd32=vavguh(Rss32,Rtt32):rnd + C Intrinsic Prototype: Word64 Q6_P_vavguh_PP_rnd(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavguh_PP_rnd __builtin_HEXAGON_A2_vavguhr + +/* ========================================================================== + Assembly Syntax: Rdd32=vavguw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vavguw_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavguw_PP __builtin_HEXAGON_A2_vavguw + +/* ========================================================================== + Assembly Syntax: Rdd32=vavguw(Rss32,Rtt32):rnd + C Intrinsic Prototype: Word64 Q6_P_vavguw_PP_rnd(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavguw_PP_rnd __builtin_HEXAGON_A2_vavguwr + +/* ========================================================================== + Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vavgw_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavgw_PP __builtin_HEXAGON_A2_vavgw + +/* ========================================================================== + Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32):crnd + C Intrinsic Prototype: Word64 Q6_P_vavgw_PP_crnd(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavgw_PP_crnd __builtin_HEXAGON_A2_vavgwcr + +/* ========================================================================== + Assembly Syntax: Rdd32=vavgw(Rss32,Rtt32):rnd + C Intrinsic Prototype: Word64 Q6_P_vavgw_PP_rnd(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vavgw_PP_rnd __builtin_HEXAGON_A2_vavgwr + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpb.eq(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpb_eq_PP __builtin_HEXAGON_A2_vcmpbeq + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpb.gtu(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmpb_gtu_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpb_gtu_PP __builtin_HEXAGON_A2_vcmpbgtu + +/* ========================================================================== + Assembly Syntax: Pd4=vcmph.eq(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmph_eq_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmph_eq_PP __builtin_HEXAGON_A2_vcmpheq + +/* ========================================================================== + Assembly Syntax: Pd4=vcmph.gt(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmph_gt_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmph_gt_PP __builtin_HEXAGON_A2_vcmphgt + +/* ========================================================================== + Assembly Syntax: Pd4=vcmph.gtu(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmph_gtu_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmph_gtu_PP __builtin_HEXAGON_A2_vcmphgtu + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpw.eq(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmpw_eq_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpw_eq_PP __builtin_HEXAGON_A2_vcmpweq + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpw.gt(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmpw_gt_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpw_gt_PP __builtin_HEXAGON_A2_vcmpwgt + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpw.gtu(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmpw_gtu_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpw_gtu_PP __builtin_HEXAGON_A2_vcmpwgtu + +/* ========================================================================== + Assembly Syntax: Rdd32=vconj(Rss32):sat + C Intrinsic Prototype: Word64 Q6_P_vconj_P_sat(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vconj_P_sat __builtin_HEXAGON_A2_vconj + +/* ========================================================================== + Assembly Syntax: Rdd32=vmaxb(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vmaxb_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmaxb_PP __builtin_HEXAGON_A2_vmaxb + +/* ========================================================================== + Assembly Syntax: Rdd32=vmaxh(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vmaxh_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmaxh_PP __builtin_HEXAGON_A2_vmaxh + +/* ========================================================================== + Assembly Syntax: Rdd32=vmaxub(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vmaxub_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmaxub_PP __builtin_HEXAGON_A2_vmaxub + +/* ========================================================================== + Assembly Syntax: Rdd32=vmaxuh(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vmaxuh_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmaxuh_PP __builtin_HEXAGON_A2_vmaxuh + +/* ========================================================================== + Assembly Syntax: Rdd32=vmaxuw(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vmaxuw_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmaxuw_PP __builtin_HEXAGON_A2_vmaxuw + +/* ========================================================================== + Assembly Syntax: Rdd32=vmaxw(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vmaxw_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmaxw_PP __builtin_HEXAGON_A2_vmaxw + +/* ========================================================================== + Assembly Syntax: Rdd32=vminb(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vminb_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vminb_PP __builtin_HEXAGON_A2_vminb + +/* ========================================================================== + Assembly Syntax: Rdd32=vminh(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vminh_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vminh_PP __builtin_HEXAGON_A2_vminh + +/* ========================================================================== + Assembly Syntax: Rdd32=vminub(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vminub_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vminub_PP __builtin_HEXAGON_A2_vminub + +/* ========================================================================== + Assembly Syntax: Rdd32=vminuh(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vminuh_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vminuh_PP __builtin_HEXAGON_A2_vminuh + +/* ========================================================================== + Assembly Syntax: Rdd32=vminuw(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vminuw_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vminuw_PP __builtin_HEXAGON_A2_vminuw + +/* ========================================================================== + Assembly Syntax: Rdd32=vminw(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vminw_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vminw_PP __builtin_HEXAGON_A2_vminw + +/* ========================================================================== + Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vnavgh_PP __builtin_HEXAGON_A2_vnavgh + +/* ========================================================================== + Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32):crnd:sat + C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP_crnd_sat(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vnavgh_PP_crnd_sat __builtin_HEXAGON_A2_vnavghcr + +/* ========================================================================== + Assembly Syntax: Rdd32=vnavgh(Rtt32,Rss32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP_rnd_sat(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vnavgh_PP_rnd_sat __builtin_HEXAGON_A2_vnavghr + +/* ========================================================================== + Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vnavgw_PP __builtin_HEXAGON_A2_vnavgw + +/* ========================================================================== + Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32):crnd:sat + C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP_crnd_sat(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vnavgw_PP_crnd_sat __builtin_HEXAGON_A2_vnavgwcr + +/* ========================================================================== + Assembly Syntax: Rdd32=vnavgw(Rtt32,Rss32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP_rnd_sat(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vnavgw_PP_rnd_sat __builtin_HEXAGON_A2_vnavgwr + +/* ========================================================================== + Assembly Syntax: Rdd32=vraddub(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vraddub_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vraddub_PP __builtin_HEXAGON_A2_vraddub + +/* ========================================================================== + Assembly Syntax: Rxx32+=vraddub(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vraddubacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vraddubacc_PP __builtin_HEXAGON_A2_vraddub_acc + +/* ========================================================================== + Assembly Syntax: Rdd32=vrsadub(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrsadub_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrsadub_PP __builtin_HEXAGON_A2_vrsadub + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrsadub(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrsadubacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrsadubacc_PP __builtin_HEXAGON_A2_vrsadub_acc + +/* ========================================================================== + Assembly Syntax: Rdd32=vsubb(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vsubb_PP(Word64 Rss, Word64 Rtt) + Instruction Type: MAPPING + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_vsubb_PP __builtin_HEXAGON_A2_vsubb_map + +/* ========================================================================== + Assembly Syntax: Rdd32=vsubh(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vsubh_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsubh_PP __builtin_HEXAGON_A2_vsubh + +/* ========================================================================== + Assembly Syntax: Rdd32=vsubh(Rtt32,Rss32):sat + C Intrinsic Prototype: Word64 Q6_P_vsubh_PP_sat(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsubh_PP_sat __builtin_HEXAGON_A2_vsubhs + +/* ========================================================================== + Assembly Syntax: Rdd32=vsubub(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vsubub_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsubub_PP __builtin_HEXAGON_A2_vsubub + +/* ========================================================================== + Assembly Syntax: Rdd32=vsubub(Rtt32,Rss32):sat + C Intrinsic Prototype: Word64 Q6_P_vsubub_PP_sat(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsubub_PP_sat __builtin_HEXAGON_A2_vsububs + +/* ========================================================================== + Assembly Syntax: Rdd32=vsubuh(Rtt32,Rss32):sat + C Intrinsic Prototype: Word64 Q6_P_vsubuh_PP_sat(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsubuh_PP_sat __builtin_HEXAGON_A2_vsubuhs + +/* ========================================================================== + Assembly Syntax: Rdd32=vsubw(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vsubw_PP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsubw_PP __builtin_HEXAGON_A2_vsubw + +/* ========================================================================== + Assembly Syntax: Rdd32=vsubw(Rtt32,Rss32):sat + C Intrinsic Prototype: Word64 Q6_P_vsubw_PP_sat(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsubw_PP_sat __builtin_HEXAGON_A2_vsubws + +/* ========================================================================== + Assembly Syntax: Rd32=xor(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_xor_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_xor_RR __builtin_HEXAGON_A2_xor + +/* ========================================================================== + Assembly Syntax: Rdd32=xor(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_xor_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_xor_PP __builtin_HEXAGON_A2_xorp + +/* ========================================================================== + Assembly Syntax: Rd32=zxtb(Rs32) + C Intrinsic Prototype: Word32 Q6_R_zxtb_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_zxtb_R __builtin_HEXAGON_A2_zxtb + +/* ========================================================================== + Assembly Syntax: Rd32=zxth(Rs32) + C Intrinsic Prototype: Word32 Q6_R_zxth_R(Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_zxth_R __builtin_HEXAGON_A2_zxth + +/* ========================================================================== + Assembly Syntax: Rd32=and(Rt32,~Rs32) + C Intrinsic Prototype: Word32 Q6_R_and_RnR(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_and_RnR __builtin_HEXAGON_A4_andn + +/* ========================================================================== + Assembly Syntax: Rdd32=and(Rtt32,~Rss32) + C Intrinsic Prototype: Word64 Q6_P_and_PnP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_and_PnP __builtin_HEXAGON_A4_andnp + +/* ========================================================================== + Assembly Syntax: Rdd32=bitsplit(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_bitsplit_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_bitsplit_RR __builtin_HEXAGON_A4_bitsplit + +/* ========================================================================== + Assembly Syntax: Rdd32=bitsplit(Rs32,#u5) + C Intrinsic Prototype: Word64 Q6_P_bitsplit_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_bitsplit_RI __builtin_HEXAGON_A4_bitspliti + +/* ========================================================================== + Assembly Syntax: Pd4=boundscheck(Rs32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_boundscheck_RP(Word32 Rs, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_boundscheck_RP __builtin_HEXAGON_A4_boundscheck + +/* ========================================================================== + Assembly Syntax: Pd4=cmpb.eq(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmpb_eq_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmpb_eq_RR __builtin_HEXAGON_A4_cmpbeq + +/* ========================================================================== + Assembly Syntax: Pd4=cmpb.eq(Rs32,#u8) + C Intrinsic Prototype: Byte Q6_p_cmpb_eq_RI(Word32 Rs, Word32 Iu8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmpb_eq_RI __builtin_HEXAGON_A4_cmpbeqi + +/* ========================================================================== + Assembly Syntax: Pd4=cmpb.gt(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmpb_gt_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmpb_gt_RR __builtin_HEXAGON_A4_cmpbgt + +/* ========================================================================== + Assembly Syntax: Pd4=cmpb.gt(Rs32,#s8) + C Intrinsic Prototype: Byte Q6_p_cmpb_gt_RI(Word32 Rs, Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmpb_gt_RI __builtin_HEXAGON_A4_cmpbgti + +/* ========================================================================== + Assembly Syntax: Pd4=cmpb.gtu(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmpb_gtu_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmpb_gtu_RR __builtin_HEXAGON_A4_cmpbgtu + +/* ========================================================================== + Assembly Syntax: Pd4=cmpb.gtu(Rs32,#u7) + C Intrinsic Prototype: Byte Q6_p_cmpb_gtu_RI(Word32 Rs, Word32 Iu7) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmpb_gtu_RI __builtin_HEXAGON_A4_cmpbgtui + +/* ========================================================================== + Assembly Syntax: Pd4=cmph.eq(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmph_eq_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmph_eq_RR __builtin_HEXAGON_A4_cmpheq + +/* ========================================================================== + Assembly Syntax: Pd4=cmph.eq(Rs32,#s8) + C Intrinsic Prototype: Byte Q6_p_cmph_eq_RI(Word32 Rs, Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmph_eq_RI __builtin_HEXAGON_A4_cmpheqi + +/* ========================================================================== + Assembly Syntax: Pd4=cmph.gt(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmph_gt_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmph_gt_RR __builtin_HEXAGON_A4_cmphgt + +/* ========================================================================== + Assembly Syntax: Pd4=cmph.gt(Rs32,#s8) + C Intrinsic Prototype: Byte Q6_p_cmph_gt_RI(Word32 Rs, Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmph_gt_RI __builtin_HEXAGON_A4_cmphgti + +/* ========================================================================== + Assembly Syntax: Pd4=cmph.gtu(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmph_gtu_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmph_gtu_RR __builtin_HEXAGON_A4_cmphgtu + +/* ========================================================================== + Assembly Syntax: Pd4=cmph.gtu(Rs32,#u7) + C Intrinsic Prototype: Byte Q6_p_cmph_gtu_RI(Word32 Rs, Word32 Iu7) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmph_gtu_RI __builtin_HEXAGON_A4_cmphgtui + +/* ========================================================================== + Assembly Syntax: Rdd32=combine(#s8,Rs32) + C Intrinsic Prototype: Word64 Q6_P_combine_IR(Word32 Is8, Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_combine_IR __builtin_HEXAGON_A4_combineir + +/* ========================================================================== + Assembly Syntax: Rdd32=combine(Rs32,#s8) + C Intrinsic Prototype: Word64 Q6_P_combine_RI(Word32 Rs, Word32 Is8) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_combine_RI __builtin_HEXAGON_A4_combineri + +/* ========================================================================== + Assembly Syntax: Rd32=cround(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_cround_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cround_RI __builtin_HEXAGON_A4_cround_ri + +/* ========================================================================== + Assembly Syntax: Rd32=cround(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_cround_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cround_RR __builtin_HEXAGON_A4_cround_rr + +/* ========================================================================== + Assembly Syntax: Rd32=modwrap(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_modwrap_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_modwrap_RR __builtin_HEXAGON_A4_modwrapu + +/* ========================================================================== + Assembly Syntax: Rd32=or(Rt32,~Rs32) + C Intrinsic Prototype: Word32 Q6_R_or_RnR(Word32 Rt, Word32 Rs) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_or_RnR __builtin_HEXAGON_A4_orn + +/* ========================================================================== + Assembly Syntax: Rdd32=or(Rtt32,~Rss32) + C Intrinsic Prototype: Word64 Q6_P_or_PnP(Word64 Rtt, Word64 Rss) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_or_PnP __builtin_HEXAGON_A4_ornp + +/* ========================================================================== + Assembly Syntax: Rd32=cmp.eq(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_cmp_eq_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_cmp_eq_RR __builtin_HEXAGON_A4_rcmpeq + +/* ========================================================================== + Assembly Syntax: Rd32=cmp.eq(Rs32,#s8) + C Intrinsic Prototype: Word32 Q6_R_cmp_eq_RI(Word32 Rs, Word32 Is8) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_cmp_eq_RI __builtin_HEXAGON_A4_rcmpeqi + +/* ========================================================================== + Assembly Syntax: Rd32=!cmp.eq(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_not_cmp_eq_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_not_cmp_eq_RR __builtin_HEXAGON_A4_rcmpneq + +/* ========================================================================== + Assembly Syntax: Rd32=!cmp.eq(Rs32,#s8) + C Intrinsic Prototype: Word32 Q6_R_not_cmp_eq_RI(Word32 Rs, Word32 Is8) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_not_cmp_eq_RI __builtin_HEXAGON_A4_rcmpneqi + +/* ========================================================================== + Assembly Syntax: Rd32=round(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_round_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_round_RI __builtin_HEXAGON_A4_round_ri + +/* ========================================================================== + Assembly Syntax: Rd32=round(Rs32,#u5):sat + C Intrinsic Prototype: Word32 Q6_R_round_RI_sat(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_round_RI_sat __builtin_HEXAGON_A4_round_ri_sat + +/* ========================================================================== + Assembly Syntax: Rd32=round(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_round_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_round_RR __builtin_HEXAGON_A4_round_rr + +/* ========================================================================== + Assembly Syntax: Rd32=round(Rs32,Rt32):sat + C Intrinsic Prototype: Word32 Q6_R_round_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_round_RR_sat __builtin_HEXAGON_A4_round_rr_sat + +/* ========================================================================== + Assembly Syntax: Pd4=tlbmatch(Rss32,Rt32) + C Intrinsic Prototype: Byte Q6_p_tlbmatch_PR(Word64 Rss, Word32 Rt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_tlbmatch_PR __builtin_HEXAGON_A4_tlbmatch + +/* ========================================================================== + Assembly Syntax: Pd4=any8(vcmpb.eq(Rss32,Rtt32)) + C Intrinsic Prototype: Byte Q6_p_any8_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_any8_vcmpb_eq_PP __builtin_HEXAGON_A4_vcmpbeq_any + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpb.eq(Rss32,#u8) + C Intrinsic Prototype: Byte Q6_p_vcmpb_eq_PI(Word64 Rss, Word32 Iu8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpb_eq_PI __builtin_HEXAGON_A4_vcmpbeqi + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpb.gt(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_vcmpb_gt_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpb_gt_PP __builtin_HEXAGON_A4_vcmpbgt + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpb.gt(Rss32,#s8) + C Intrinsic Prototype: Byte Q6_p_vcmpb_gt_PI(Word64 Rss, Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpb_gt_PI __builtin_HEXAGON_A4_vcmpbgti + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpb.gtu(Rss32,#u7) + C Intrinsic Prototype: Byte Q6_p_vcmpb_gtu_PI(Word64 Rss, Word32 Iu7) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpb_gtu_PI __builtin_HEXAGON_A4_vcmpbgtui + +/* ========================================================================== + Assembly Syntax: Pd4=vcmph.eq(Rss32,#s8) + C Intrinsic Prototype: Byte Q6_p_vcmph_eq_PI(Word64 Rss, Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmph_eq_PI __builtin_HEXAGON_A4_vcmpheqi + +/* ========================================================================== + Assembly Syntax: Pd4=vcmph.gt(Rss32,#s8) + C Intrinsic Prototype: Byte Q6_p_vcmph_gt_PI(Word64 Rss, Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmph_gt_PI __builtin_HEXAGON_A4_vcmphgti + +/* ========================================================================== + Assembly Syntax: Pd4=vcmph.gtu(Rss32,#u7) + C Intrinsic Prototype: Byte Q6_p_vcmph_gtu_PI(Word64 Rss, Word32 Iu7) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmph_gtu_PI __builtin_HEXAGON_A4_vcmphgtui + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpw.eq(Rss32,#s8) + C Intrinsic Prototype: Byte Q6_p_vcmpw_eq_PI(Word64 Rss, Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpw_eq_PI __builtin_HEXAGON_A4_vcmpweqi + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpw.gt(Rss32,#s8) + C Intrinsic Prototype: Byte Q6_p_vcmpw_gt_PI(Word64 Rss, Word32 Is8) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpw_gt_PI __builtin_HEXAGON_A4_vcmpwgti + +/* ========================================================================== + Assembly Syntax: Pd4=vcmpw.gtu(Rss32,#u7) + C Intrinsic Prototype: Byte Q6_p_vcmpw_gtu_PI(Word64 Rss, Word32 Iu7) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_vcmpw_gtu_PI __builtin_HEXAGON_A4_vcmpwgtui + +/* ========================================================================== + Assembly Syntax: Rxx32=vrmaxh(Rss32,Ru32) + C Intrinsic Prototype: Word64 Q6_P_vrmaxh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmaxh_PR __builtin_HEXAGON_A4_vrmaxh + +/* ========================================================================== + Assembly Syntax: Rxx32=vrmaxuh(Rss32,Ru32) + C Intrinsic Prototype: Word64 Q6_P_vrmaxuh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmaxuh_PR __builtin_HEXAGON_A4_vrmaxuh + +/* ========================================================================== + Assembly Syntax: Rxx32=vrmaxuw(Rss32,Ru32) + C Intrinsic Prototype: Word64 Q6_P_vrmaxuw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmaxuw_PR __builtin_HEXAGON_A4_vrmaxuw + +/* ========================================================================== + Assembly Syntax: Rxx32=vrmaxw(Rss32,Ru32) + C Intrinsic Prototype: Word64 Q6_P_vrmaxw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmaxw_PR __builtin_HEXAGON_A4_vrmaxw + +/* ========================================================================== + Assembly Syntax: Rxx32=vrminh(Rss32,Ru32) + C Intrinsic Prototype: Word64 Q6_P_vrminh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrminh_PR __builtin_HEXAGON_A4_vrminh + +/* ========================================================================== + Assembly Syntax: Rxx32=vrminuh(Rss32,Ru32) + C Intrinsic Prototype: Word64 Q6_P_vrminuh_PR(Word64 Rxx, Word64 Rss, Word32 Ru) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrminuh_PR __builtin_HEXAGON_A4_vrminuh + +/* ========================================================================== + Assembly Syntax: Rxx32=vrminuw(Rss32,Ru32) + C Intrinsic Prototype: Word64 Q6_P_vrminuw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrminuw_PR __builtin_HEXAGON_A4_vrminuw + +/* ========================================================================== + Assembly Syntax: Rxx32=vrminw(Rss32,Ru32) + C Intrinsic Prototype: Word64 Q6_P_vrminw_PR(Word64 Rxx, Word64 Rss, Word32 Ru) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrminw_PR __builtin_HEXAGON_A4_vrminw + +/* ========================================================================== + Assembly Syntax: Rd32=vaddhub(Rss32,Rtt32):sat + C Intrinsic Prototype: Word32 Q6_R_vaddhub_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vaddhub_PP_sat __builtin_HEXAGON_A5_vaddhubs + +/* ========================================================================== + Assembly Syntax: Pd4=all8(Ps4) + C Intrinsic Prototype: Byte Q6_p_all8_p(Byte Ps) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_all8_p __builtin_HEXAGON_C2_all8 + +/* ========================================================================== + Assembly Syntax: Pd4=and(Pt4,Ps4) + C Intrinsic Prototype: Byte Q6_p_and_pp(Byte Pt, Byte Ps) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_and_pp __builtin_HEXAGON_C2_and + +/* ========================================================================== + Assembly Syntax: Pd4=and(Pt4,!Ps4) + C Intrinsic Prototype: Byte Q6_p_and_pnp(Byte Pt, Byte Ps) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_and_pnp __builtin_HEXAGON_C2_andn + +/* ========================================================================== + Assembly Syntax: Pd4=any8(Ps4) + C Intrinsic Prototype: Byte Q6_p_any8_p(Byte Ps) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_any8_p __builtin_HEXAGON_C2_any8 + +/* ========================================================================== + Assembly Syntax: Pd4=bitsclr(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_bitsclr_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_bitsclr_RR __builtin_HEXAGON_C2_bitsclr + +/* ========================================================================== + Assembly Syntax: Pd4=bitsclr(Rs32,#u6) + C Intrinsic Prototype: Byte Q6_p_bitsclr_RI(Word32 Rs, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_bitsclr_RI __builtin_HEXAGON_C2_bitsclri + +/* ========================================================================== + Assembly Syntax: Pd4=bitsset(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_bitsset_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_bitsset_RR __builtin_HEXAGON_C2_bitsset + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.eq(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmp_eq_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_eq_RR __builtin_HEXAGON_C2_cmpeq + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.eq(Rs32,#s10) + C Intrinsic Prototype: Byte Q6_p_cmp_eq_RI(Word32 Rs, Word32 Is10) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_eq_RI __builtin_HEXAGON_C2_cmpeqi + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.eq(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_cmp_eq_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmp_eq_PP __builtin_HEXAGON_C2_cmpeqp + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.ge(Rs32,#s8) + C Intrinsic Prototype: Byte Q6_p_cmp_ge_RI(Word32 Rs, Word32 Is8) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_ge_RI __builtin_HEXAGON_C2_cmpgei + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.geu(Rs32,#u8) + C Intrinsic Prototype: Byte Q6_p_cmp_geu_RI(Word32 Rs, Word32 Iu8) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_geu_RI __builtin_HEXAGON_C2_cmpgeui + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.gt(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmp_gt_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_gt_RR __builtin_HEXAGON_C2_cmpgt + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.gt(Rs32,#s10) + C Intrinsic Prototype: Byte Q6_p_cmp_gt_RI(Word32 Rs, Word32 Is10) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_gt_RI __builtin_HEXAGON_C2_cmpgti + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.gt(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_cmp_gt_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmp_gt_PP __builtin_HEXAGON_C2_cmpgtp + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.gtu(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmp_gtu_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_gtu_RR __builtin_HEXAGON_C2_cmpgtu + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.gtu(Rs32,#u9) + C Intrinsic Prototype: Byte Q6_p_cmp_gtu_RI(Word32 Rs, Word32 Iu9) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_gtu_RI __builtin_HEXAGON_C2_cmpgtui + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.gtu(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_cmp_gtu_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_cmp_gtu_PP __builtin_HEXAGON_C2_cmpgtup + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.lt(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmp_lt_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_lt_RR __builtin_HEXAGON_C2_cmplt + +/* ========================================================================== + Assembly Syntax: Pd4=cmp.ltu(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_cmp_ltu_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_cmp_ltu_RR __builtin_HEXAGON_C2_cmpltu + +/* ========================================================================== + Assembly Syntax: Rdd32=mask(Pt4) + C Intrinsic Prototype: Word64 Q6_P_mask_p(Byte Pt) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mask_p __builtin_HEXAGON_C2_mask + +/* ========================================================================== + Assembly Syntax: Rd32=mux(Pu4,Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_mux_pRR(Byte Pu, Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_mux_pRR __builtin_HEXAGON_C2_mux + +/* ========================================================================== + Assembly Syntax: Rd32=mux(Pu4,#s8,#S8) + C Intrinsic Prototype: Word32 Q6_R_mux_pII(Byte Pu, Word32 Is8, Word32 IS8) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_mux_pII __builtin_HEXAGON_C2_muxii + +/* ========================================================================== + Assembly Syntax: Rd32=mux(Pu4,Rs32,#s8) + C Intrinsic Prototype: Word32 Q6_R_mux_pRI(Byte Pu, Word32 Rs, Word32 Is8) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_mux_pRI __builtin_HEXAGON_C2_muxir + +/* ========================================================================== + Assembly Syntax: Rd32=mux(Pu4,#s8,Rs32) + C Intrinsic Prototype: Word32 Q6_R_mux_pIR(Byte Pu, Word32 Is8, Word32 Rs) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_mux_pIR __builtin_HEXAGON_C2_muxri + +/* ========================================================================== + Assembly Syntax: Pd4=not(Ps4) + C Intrinsic Prototype: Byte Q6_p_not_p(Byte Ps) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_not_p __builtin_HEXAGON_C2_not + +/* ========================================================================== + Assembly Syntax: Pd4=or(Pt4,Ps4) + C Intrinsic Prototype: Byte Q6_p_or_pp(Byte Pt, Byte Ps) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_or_pp __builtin_HEXAGON_C2_or + +/* ========================================================================== + Assembly Syntax: Pd4=or(Pt4,!Ps4) + C Intrinsic Prototype: Byte Q6_p_or_pnp(Byte Pt, Byte Ps) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_or_pnp __builtin_HEXAGON_C2_orn + +/* ========================================================================== + Assembly Syntax: Pd4=Ps4 + C Intrinsic Prototype: Byte Q6_p_equals_p(Byte Ps) + Instruction Type: MAPPING + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_equals_p __builtin_HEXAGON_C2_pxfer_map + +/* ========================================================================== + Assembly Syntax: Rd32=Ps4 + C Intrinsic Prototype: Word32 Q6_R_equals_p(Byte Ps) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_equals_p __builtin_HEXAGON_C2_tfrpr + +/* ========================================================================== + Assembly Syntax: Pd4=Rs32 + C Intrinsic Prototype: Byte Q6_p_equals_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_equals_R __builtin_HEXAGON_C2_tfrrp + +/* ========================================================================== + Assembly Syntax: Rd32=vitpack(Ps4,Pt4) + C Intrinsic Prototype: Word32 Q6_R_vitpack_pp(Byte Ps, Byte Pt) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vitpack_pp __builtin_HEXAGON_C2_vitpack + +/* ========================================================================== + Assembly Syntax: Rdd32=vmux(Pu4,Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vmux_pPP(Byte Pu, Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmux_pPP __builtin_HEXAGON_C2_vmux + +/* ========================================================================== + Assembly Syntax: Pd4=xor(Ps4,Pt4) + C Intrinsic Prototype: Byte Q6_p_xor_pp(Byte Ps, Byte Pt) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_xor_pp __builtin_HEXAGON_C2_xor + +/* ========================================================================== + Assembly Syntax: Pd4=and(Ps4,and(Pt4,Pu4)) + C Intrinsic Prototype: Byte Q6_p_and_and_ppp(Byte Ps, Byte Pt, Byte Pu) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_and_and_ppp __builtin_HEXAGON_C4_and_and + +/* ========================================================================== + Assembly Syntax: Pd4=and(Ps4,and(Pt4,!Pu4)) + C Intrinsic Prototype: Byte Q6_p_and_and_ppnp(Byte Ps, Byte Pt, Byte Pu) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_and_and_ppnp __builtin_HEXAGON_C4_and_andn + +/* ========================================================================== + Assembly Syntax: Pd4=and(Ps4,or(Pt4,Pu4)) + C Intrinsic Prototype: Byte Q6_p_and_or_ppp(Byte Ps, Byte Pt, Byte Pu) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_and_or_ppp __builtin_HEXAGON_C4_and_or + +/* ========================================================================== + Assembly Syntax: Pd4=and(Ps4,or(Pt4,!Pu4)) + C Intrinsic Prototype: Byte Q6_p_and_or_ppnp(Byte Ps, Byte Pt, Byte Pu) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_and_or_ppnp __builtin_HEXAGON_C4_and_orn + +/* ========================================================================== + Assembly Syntax: Pd4=!cmp.gt(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_not_cmp_gt_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_not_cmp_gt_RR __builtin_HEXAGON_C4_cmplte + +/* ========================================================================== + Assembly Syntax: Pd4=!cmp.gt(Rs32,#s10) + C Intrinsic Prototype: Byte Q6_p_not_cmp_gt_RI(Word32 Rs, Word32 Is10) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_not_cmp_gt_RI __builtin_HEXAGON_C4_cmpltei + +/* ========================================================================== + Assembly Syntax: Pd4=!cmp.gtu(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_not_cmp_gtu_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_not_cmp_gtu_RR __builtin_HEXAGON_C4_cmplteu + +/* ========================================================================== + Assembly Syntax: Pd4=!cmp.gtu(Rs32,#u9) + C Intrinsic Prototype: Byte Q6_p_not_cmp_gtu_RI(Word32 Rs, Word32 Iu9) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_not_cmp_gtu_RI __builtin_HEXAGON_C4_cmplteui + +/* ========================================================================== + Assembly Syntax: Pd4=!cmp.eq(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_not_cmp_eq_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_not_cmp_eq_RR __builtin_HEXAGON_C4_cmpneq + +/* ========================================================================== + Assembly Syntax: Pd4=!cmp.eq(Rs32,#s10) + C Intrinsic Prototype: Byte Q6_p_not_cmp_eq_RI(Word32 Rs, Word32 Is10) + Instruction Type: ALU32_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_p_not_cmp_eq_RI __builtin_HEXAGON_C4_cmpneqi + +/* ========================================================================== + Assembly Syntax: Pd4=fastcorner9(Ps4,Pt4) + C Intrinsic Prototype: Byte Q6_p_fastcorner9_pp(Byte Ps, Byte Pt) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_fastcorner9_pp __builtin_HEXAGON_C4_fastcorner9 + +/* ========================================================================== + Assembly Syntax: Pd4=!fastcorner9(Ps4,Pt4) + C Intrinsic Prototype: Byte Q6_p_not_fastcorner9_pp(Byte Ps, Byte Pt) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_not_fastcorner9_pp __builtin_HEXAGON_C4_fastcorner9_not + +/* ========================================================================== + Assembly Syntax: Pd4=!bitsclr(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_not_bitsclr_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_not_bitsclr_RR __builtin_HEXAGON_C4_nbitsclr + +/* ========================================================================== + Assembly Syntax: Pd4=!bitsclr(Rs32,#u6) + C Intrinsic Prototype: Byte Q6_p_not_bitsclr_RI(Word32 Rs, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_not_bitsclr_RI __builtin_HEXAGON_C4_nbitsclri + +/* ========================================================================== + Assembly Syntax: Pd4=!bitsset(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_not_bitsset_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_not_bitsset_RR __builtin_HEXAGON_C4_nbitsset + +/* ========================================================================== + Assembly Syntax: Pd4=or(Ps4,and(Pt4,Pu4)) + C Intrinsic Prototype: Byte Q6_p_or_and_ppp(Byte Ps, Byte Pt, Byte Pu) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_or_and_ppp __builtin_HEXAGON_C4_or_and + +/* ========================================================================== + Assembly Syntax: Pd4=or(Ps4,and(Pt4,!Pu4)) + C Intrinsic Prototype: Byte Q6_p_or_and_ppnp(Byte Ps, Byte Pt, Byte Pu) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_or_and_ppnp __builtin_HEXAGON_C4_or_andn + +/* ========================================================================== + Assembly Syntax: Pd4=or(Ps4,or(Pt4,Pu4)) + C Intrinsic Prototype: Byte Q6_p_or_or_ppp(Byte Ps, Byte Pt, Byte Pu) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_or_or_ppp __builtin_HEXAGON_C4_or_or + +/* ========================================================================== + Assembly Syntax: Pd4=or(Ps4,or(Pt4,!Pu4)) + C Intrinsic Prototype: Byte Q6_p_or_or_ppnp(Byte Ps, Byte Pt, Byte Pu) + Instruction Type: CR + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_or_or_ppnp __builtin_HEXAGON_C4_or_orn + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_d2df(Rss32) + C Intrinsic Prototype: Float64 Q6_P_convert_d2df_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_d2df_P __builtin_HEXAGON_F2_conv_d2df + +/* ========================================================================== + Assembly Syntax: Rd32=convert_d2sf(Rss32) + C Intrinsic Prototype: Float32 Q6_R_convert_d2sf_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_d2sf_P __builtin_HEXAGON_F2_conv_d2sf + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_df2d(Rss32) + C Intrinsic Prototype: Word64 Q6_P_convert_df2d_P(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_df2d_P __builtin_HEXAGON_F2_conv_df2d + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_df2d(Rss32):chop + C Intrinsic Prototype: Word64 Q6_P_convert_df2d_P_chop(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_df2d_P_chop __builtin_HEXAGON_F2_conv_df2d_chop + +/* ========================================================================== + Assembly Syntax: Rd32=convert_df2sf(Rss32) + C Intrinsic Prototype: Float32 Q6_R_convert_df2sf_P(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_df2sf_P __builtin_HEXAGON_F2_conv_df2sf + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_df2ud(Rss32) + C Intrinsic Prototype: Word64 Q6_P_convert_df2ud_P(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_df2ud_P __builtin_HEXAGON_F2_conv_df2ud + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_df2ud(Rss32):chop + C Intrinsic Prototype: Word64 Q6_P_convert_df2ud_P_chop(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_df2ud_P_chop __builtin_HEXAGON_F2_conv_df2ud_chop + +/* ========================================================================== + Assembly Syntax: Rd32=convert_df2uw(Rss32) + C Intrinsic Prototype: Word32 Q6_R_convert_df2uw_P(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_df2uw_P __builtin_HEXAGON_F2_conv_df2uw + +/* ========================================================================== + Assembly Syntax: Rd32=convert_df2uw(Rss32):chop + C Intrinsic Prototype: Word32 Q6_R_convert_df2uw_P_chop(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_df2uw_P_chop __builtin_HEXAGON_F2_conv_df2uw_chop + +/* ========================================================================== + Assembly Syntax: Rd32=convert_df2w(Rss32) + C Intrinsic Prototype: Word32 Q6_R_convert_df2w_P(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_df2w_P __builtin_HEXAGON_F2_conv_df2w + +/* ========================================================================== + Assembly Syntax: Rd32=convert_df2w(Rss32):chop + C Intrinsic Prototype: Word32 Q6_R_convert_df2w_P_chop(Float64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_df2w_P_chop __builtin_HEXAGON_F2_conv_df2w_chop + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_sf2d(Rs32) + C Intrinsic Prototype: Word64 Q6_P_convert_sf2d_R(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_sf2d_R __builtin_HEXAGON_F2_conv_sf2d + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_sf2d(Rs32):chop + C Intrinsic Prototype: Word64 Q6_P_convert_sf2d_R_chop(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_sf2d_R_chop __builtin_HEXAGON_F2_conv_sf2d_chop + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_sf2df(Rs32) + C Intrinsic Prototype: Float64 Q6_P_convert_sf2df_R(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_sf2df_R __builtin_HEXAGON_F2_conv_sf2df + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_sf2ud(Rs32) + C Intrinsic Prototype: Word64 Q6_P_convert_sf2ud_R(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_sf2ud_R __builtin_HEXAGON_F2_conv_sf2ud + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_sf2ud(Rs32):chop + C Intrinsic Prototype: Word64 Q6_P_convert_sf2ud_R_chop(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_sf2ud_R_chop __builtin_HEXAGON_F2_conv_sf2ud_chop + +/* ========================================================================== + Assembly Syntax: Rd32=convert_sf2uw(Rs32) + C Intrinsic Prototype: Word32 Q6_R_convert_sf2uw_R(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_sf2uw_R __builtin_HEXAGON_F2_conv_sf2uw + +/* ========================================================================== + Assembly Syntax: Rd32=convert_sf2uw(Rs32):chop + C Intrinsic Prototype: Word32 Q6_R_convert_sf2uw_R_chop(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_sf2uw_R_chop __builtin_HEXAGON_F2_conv_sf2uw_chop + +/* ========================================================================== + Assembly Syntax: Rd32=convert_sf2w(Rs32) + C Intrinsic Prototype: Word32 Q6_R_convert_sf2w_R(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_sf2w_R __builtin_HEXAGON_F2_conv_sf2w + +/* ========================================================================== + Assembly Syntax: Rd32=convert_sf2w(Rs32):chop + C Intrinsic Prototype: Word32 Q6_R_convert_sf2w_R_chop(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_sf2w_R_chop __builtin_HEXAGON_F2_conv_sf2w_chop + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_ud2df(Rss32) + C Intrinsic Prototype: Float64 Q6_P_convert_ud2df_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_ud2df_P __builtin_HEXAGON_F2_conv_ud2df + +/* ========================================================================== + Assembly Syntax: Rd32=convert_ud2sf(Rss32) + C Intrinsic Prototype: Float32 Q6_R_convert_ud2sf_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_ud2sf_P __builtin_HEXAGON_F2_conv_ud2sf + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_uw2df(Rs32) + C Intrinsic Prototype: Float64 Q6_P_convert_uw2df_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_uw2df_R __builtin_HEXAGON_F2_conv_uw2df + +/* ========================================================================== + Assembly Syntax: Rd32=convert_uw2sf(Rs32) + C Intrinsic Prototype: Float32 Q6_R_convert_uw2sf_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_uw2sf_R __builtin_HEXAGON_F2_conv_uw2sf + +/* ========================================================================== + Assembly Syntax: Rdd32=convert_w2df(Rs32) + C Intrinsic Prototype: Float64 Q6_P_convert_w2df_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_convert_w2df_R __builtin_HEXAGON_F2_conv_w2df + +/* ========================================================================== + Assembly Syntax: Rd32=convert_w2sf(Rs32) + C Intrinsic Prototype: Float32 Q6_R_convert_w2sf_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_convert_w2sf_R __builtin_HEXAGON_F2_conv_w2sf + +/* ========================================================================== + Assembly Syntax: Pd4=dfclass(Rss32,#u5) + C Intrinsic Prototype: Byte Q6_p_dfclass_PI(Float64 Rss, Word32 Iu5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_dfclass_PI __builtin_HEXAGON_F2_dfclass + +/* ========================================================================== + Assembly Syntax: Pd4=dfcmp.eq(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_dfcmp_eq_PP(Float64 Rss, Float64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_dfcmp_eq_PP __builtin_HEXAGON_F2_dfcmpeq + +/* ========================================================================== + Assembly Syntax: Pd4=dfcmp.ge(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_dfcmp_ge_PP(Float64 Rss, Float64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_dfcmp_ge_PP __builtin_HEXAGON_F2_dfcmpge + +/* ========================================================================== + Assembly Syntax: Pd4=dfcmp.gt(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_dfcmp_gt_PP(Float64 Rss, Float64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_dfcmp_gt_PP __builtin_HEXAGON_F2_dfcmpgt + +/* ========================================================================== + Assembly Syntax: Pd4=dfcmp.uo(Rss32,Rtt32) + C Intrinsic Prototype: Byte Q6_p_dfcmp_uo_PP(Float64 Rss, Float64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_dfcmp_uo_PP __builtin_HEXAGON_F2_dfcmpuo + +/* ========================================================================== + Assembly Syntax: Rdd32=dfmake(#u10):neg + C Intrinsic Prototype: Float64 Q6_P_dfmake_I_neg(Word32 Iu10) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfmake_I_neg __builtin_HEXAGON_F2_dfimm_n + +/* ========================================================================== + Assembly Syntax: Rdd32=dfmake(#u10):pos + C Intrinsic Prototype: Float64 Q6_P_dfmake_I_pos(Word32 Iu10) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfmake_I_pos __builtin_HEXAGON_F2_dfimm_p + +/* ========================================================================== + Assembly Syntax: Rd32=sfadd(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sfadd_RR(Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfadd_RR __builtin_HEXAGON_F2_sfadd + +/* ========================================================================== + Assembly Syntax: Pd4=sfclass(Rs32,#u5) + C Intrinsic Prototype: Byte Q6_p_sfclass_RI(Float32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_sfclass_RI __builtin_HEXAGON_F2_sfclass + +/* ========================================================================== + Assembly Syntax: Pd4=sfcmp.eq(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_sfcmp_eq_RR(Float32 Rs, Float32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_sfcmp_eq_RR __builtin_HEXAGON_F2_sfcmpeq + +/* ========================================================================== + Assembly Syntax: Pd4=sfcmp.ge(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_sfcmp_ge_RR(Float32 Rs, Float32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_sfcmp_ge_RR __builtin_HEXAGON_F2_sfcmpge + +/* ========================================================================== + Assembly Syntax: Pd4=sfcmp.gt(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_sfcmp_gt_RR(Float32 Rs, Float32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_sfcmp_gt_RR __builtin_HEXAGON_F2_sfcmpgt + +/* ========================================================================== + Assembly Syntax: Pd4=sfcmp.uo(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_sfcmp_uo_RR(Float32 Rs, Float32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_sfcmp_uo_RR __builtin_HEXAGON_F2_sfcmpuo + +/* ========================================================================== + Assembly Syntax: Rd32=sffixupd(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sffixupd_RR(Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sffixupd_RR __builtin_HEXAGON_F2_sffixupd + +/* ========================================================================== + Assembly Syntax: Rd32=sffixupn(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sffixupn_RR(Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sffixupn_RR __builtin_HEXAGON_F2_sffixupn + +/* ========================================================================== + Assembly Syntax: Rd32=sffixupr(Rs32) + C Intrinsic Prototype: Float32 Q6_R_sffixupr_R(Float32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sffixupr_R __builtin_HEXAGON_F2_sffixupr + +/* ========================================================================== + Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RR(Float32 Rx, Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmpyacc_RR __builtin_HEXAGON_F2_sffma + +/* ========================================================================== + Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32):lib + C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RR_lib(Float32 Rx, Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmpyacc_RR_lib __builtin_HEXAGON_F2_sffma_lib + +/* ========================================================================== + Assembly Syntax: Rx32+=sfmpy(Rs32,Rt32,Pu4):scale + C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RRp_scale(Float32 Rx, Float32 Rs, Float32 Rt, Byte Pu) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmpyacc_RRp_scale __builtin_HEXAGON_F2_sffma_sc + +/* ========================================================================== + Assembly Syntax: Rx32-=sfmpy(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sfmpynac_RR(Float32 Rx, Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmpynac_RR __builtin_HEXAGON_F2_sffms + +/* ========================================================================== + Assembly Syntax: Rx32-=sfmpy(Rs32,Rt32):lib + C Intrinsic Prototype: Float32 Q6_R_sfmpynac_RR_lib(Float32 Rx, Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmpynac_RR_lib __builtin_HEXAGON_F2_sffms_lib + +/* ========================================================================== + Assembly Syntax: Rd32=sfmake(#u10):neg + C Intrinsic Prototype: Float32 Q6_R_sfmake_I_neg(Word32 Iu10) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmake_I_neg __builtin_HEXAGON_F2_sfimm_n + +/* ========================================================================== + Assembly Syntax: Rd32=sfmake(#u10):pos + C Intrinsic Prototype: Float32 Q6_R_sfmake_I_pos(Word32 Iu10) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmake_I_pos __builtin_HEXAGON_F2_sfimm_p + +/* ========================================================================== + Assembly Syntax: Rd32=sfmax(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sfmax_RR(Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmax_RR __builtin_HEXAGON_F2_sfmax + +/* ========================================================================== + Assembly Syntax: Rd32=sfmin(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sfmin_RR(Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmin_RR __builtin_HEXAGON_F2_sfmin + +/* ========================================================================== + Assembly Syntax: Rd32=sfmpy(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sfmpy_RR(Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfmpy_RR __builtin_HEXAGON_F2_sfmpy + +/* ========================================================================== + Assembly Syntax: Rd32=sfsub(Rs32,Rt32) + C Intrinsic Prototype: Float32 Q6_R_sfsub_RR(Float32 Rs, Float32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sfsub_RR __builtin_HEXAGON_F2_sfsub + +/* ========================================================================== + Assembly Syntax: Rd32=memb(Rx32++#s4:0:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memb_IM_circ(void** Rx, Word32 Is4_0, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memb_IM_circ __builtin_HEXAGON_L2_loadrb_pci + +/* ========================================================================== + Assembly Syntax: Rd32=memb(Rx32++I:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memb_M_circ(void** Rx, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memb_M_circ __builtin_HEXAGON_L2_loadrb_pcr + +/* ========================================================================== + Assembly Syntax: Rdd32=memd(Rx32++#s4:3:circ(Mu2)) + C Intrinsic Prototype: Word64 Q6_P_memd_IM_circ(void** Rx, Word32 Is4_3, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_P_memd_IM_circ __builtin_HEXAGON_L2_loadrd_pci + +/* ========================================================================== + Assembly Syntax: Rdd32=memd(Rx32++I:circ(Mu2)) + C Intrinsic Prototype: Word64 Q6_P_memd_M_circ(void** Rx, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_P_memd_M_circ __builtin_HEXAGON_L2_loadrd_pcr + +/* ========================================================================== + Assembly Syntax: Rd32=memh(Rx32++#s4:1:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memh_IM_circ(void** Rx, Word32 Is4_1, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memh_IM_circ __builtin_HEXAGON_L2_loadrh_pci + +/* ========================================================================== + Assembly Syntax: Rd32=memh(Rx32++I:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memh_M_circ(void** Rx, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memh_M_circ __builtin_HEXAGON_L2_loadrh_pcr + +/* ========================================================================== + Assembly Syntax: Rd32=memw(Rx32++#s4:2:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memw_IM_circ(void** Rx, Word32 Is4_2, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memw_IM_circ __builtin_HEXAGON_L2_loadri_pci + +/* ========================================================================== + Assembly Syntax: Rd32=memw(Rx32++I:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memw_M_circ(void** Rx, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memw_M_circ __builtin_HEXAGON_L2_loadri_pcr + +/* ========================================================================== + Assembly Syntax: Rd32=memub(Rx32++#s4:0:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memub_IM_circ(void** Rx, Word32 Is4_0, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memub_IM_circ __builtin_HEXAGON_L2_loadrub_pci + +/* ========================================================================== + Assembly Syntax: Rd32=memub(Rx32++I:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memub_M_circ(void** Rx, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memub_M_circ __builtin_HEXAGON_L2_loadrub_pcr + +/* ========================================================================== + Assembly Syntax: Rd32=memuh(Rx32++#s4:1:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memuh_IM_circ(void** Rx, Word32 Is4_1, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memuh_IM_circ __builtin_HEXAGON_L2_loadruh_pci + +/* ========================================================================== + Assembly Syntax: Rd32=memuh(Rx32++I:circ(Mu2)) + C Intrinsic Prototype: Word32 Q6_R_memuh_M_circ(void** Rx, Word32 Mu, void* BaseAddress) + Instruction Type: LD + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_R_memuh_M_circ __builtin_HEXAGON_L2_loadruh_pcr + +/* ========================================================================== + Assembly Syntax: Rx32+=add(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_addacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_addacc_RR __builtin_HEXAGON_M2_acci + +/* ========================================================================== + Assembly Syntax: Rx32+=add(Rs32,#s8) + C Intrinsic Prototype: Word32 Q6_R_addacc_RI(Word32 Rx, Word32 Rs, Word32 Is8) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_addacc_RI __builtin_HEXAGON_M2_accii + +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpyi(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_cmpyiacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpyiacc_RR __builtin_HEXAGON_M2_cmaci_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpyr(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_cmpyracc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpyracc_RR __builtin_HEXAGON_M2_cmacr_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32):sat + C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpyacc_RR_sat __builtin_HEXAGON_M2_cmacs_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpyacc_RR_s1_sat __builtin_HEXAGON_M2_cmacs_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32*):sat + C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_conj_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpyacc_RR_conj_sat __builtin_HEXAGON_M2_cmacsc_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpy(Rs32,Rt32*):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_conj_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpyacc_RR_conj_s1_sat __builtin_HEXAGON_M2_cmacsc_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=cmpyi(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_cmpyi_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpyi_RR __builtin_HEXAGON_M2_cmpyi_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=cmpyr(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_cmpyr_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpyr_RR __builtin_HEXAGON_M2_cmpyr_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=cmpy(Rs32,Rt32):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cmpy_RR_rnd_sat __builtin_HEXAGON_M2_cmpyrs_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cmpy_RR_s1_rnd_sat __builtin_HEXAGON_M2_cmpyrs_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=cmpy(Rs32,Rt32*):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_conj_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cmpy_RR_conj_rnd_sat __builtin_HEXAGON_M2_cmpyrsc_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_conj_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cmpy_RR_conj_s1_rnd_sat __builtin_HEXAGON_M2_cmpyrsc_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=cmpy(Rs32,Rt32):sat + C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpy_RR_sat __builtin_HEXAGON_M2_cmpys_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=cmpy(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpy_RR_s1_sat __builtin_HEXAGON_M2_cmpys_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=cmpy(Rs32,Rt32*):sat + C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_conj_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpy_RR_conj_sat __builtin_HEXAGON_M2_cmpysc_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=cmpy(Rs32,Rt32*):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_conj_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpy_RR_conj_s1_sat __builtin_HEXAGON_M2_cmpysc_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32):sat + C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpynac_RR_sat __builtin_HEXAGON_M2_cnacs_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpynac_RR_s1_sat __builtin_HEXAGON_M2_cnacs_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32*):sat + C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_conj_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpynac_RR_conj_sat __builtin_HEXAGON_M2_cnacsc_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=cmpy(Rs32,Rt32*):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_conj_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cmpynac_RR_conj_s1_sat __builtin_HEXAGON_M2_cnacsc_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RR __builtin_HEXAGON_M2_dpmpyss_acc_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_mpynac_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RR __builtin_HEXAGON_M2_dpmpyss_nac_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32,Rt32):rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RR_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RR_rnd __builtin_HEXAGON_M2_dpmpyss_rnd_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_mpy_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RR __builtin_HEXAGON_M2_dpmpyss_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RR __builtin_HEXAGON_M2_dpmpyuu_acc_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RR __builtin_HEXAGON_M2_dpmpyuu_nac_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32,Rt32) + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RR __builtin_HEXAGON_M2_dpmpyuu_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32,Rt32.h):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RRh_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RRh_s1_rnd_sat __builtin_HEXAGON_M2_hmmpyh_rs1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32,Rt32.h):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RRh_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RRh_s1_sat __builtin_HEXAGON_M2_hmmpyh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32,Rt32.l):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RRl_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RRl_s1_rnd_sat __builtin_HEXAGON_M2_hmmpyl_rs1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32,Rt32.l):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RRl_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RRl_s1_sat __builtin_HEXAGON_M2_hmmpyl_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyi(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_mpyiacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyiacc_RR __builtin_HEXAGON_M2_maci + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyi(Rs32,#u8) + C Intrinsic Prototype: Word32 Q6_R_mpyinac_RI(Word32 Rx, Word32 Rs, Word32 Iu8) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyinac_RI __builtin_HEXAGON_M2_macsin + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyi(Rs32,#u8) + C Intrinsic Prototype: Word32 Q6_R_mpyiacc_RI(Word32 Rx, Word32 Rs, Word32 Iu8) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyiacc_RI __builtin_HEXAGON_M2_macsip + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywohacc_PP_rnd_sat __builtin_HEXAGON_M2_mmachs_rs0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywohacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmachs_rs1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywohacc_PP_sat __builtin_HEXAGON_M2_mmachs_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywohacc_PP_s1_sat __builtin_HEXAGON_M2_mmachs_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywehacc_PP_rnd_sat __builtin_HEXAGON_M2_mmacls_rs0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywehacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmacls_rs1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywehacc_PP_sat __builtin_HEXAGON_M2_mmacls_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywehacc_PP_s1_sat __builtin_HEXAGON_M2_mmacls_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywouhacc_PP_rnd_sat __builtin_HEXAGON_M2_mmacuhs_rs0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywouhacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmacuhs_rs1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywouhacc_PP_sat __builtin_HEXAGON_M2_mmacuhs_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywouhacc_PP_s1_sat __builtin_HEXAGON_M2_mmacuhs_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweuhacc_PP_rnd_sat __builtin_HEXAGON_M2_mmaculs_rs0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweuhacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmaculs_rs1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweuhacc_PP_sat __builtin_HEXAGON_M2_mmaculs_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweuhacc_PP_s1_sat __builtin_HEXAGON_M2_mmaculs_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywoh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyh_rs0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywoh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyh_rs1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywoh_PP_sat __builtin_HEXAGON_M2_mmpyh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywoh_PP_s1_sat __builtin_HEXAGON_M2_mmpyh_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyl_rs0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyl_rs1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweh_PP_sat __builtin_HEXAGON_M2_mmpyl_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweh_PP_s1_sat __builtin_HEXAGON_M2_mmpyl_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywouh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyuh_rs0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywouh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyuh_rs1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywouh_PP_sat __builtin_HEXAGON_M2_mmpyuh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpywouh_PP_s1_sat __builtin_HEXAGON_M2_mmpyuh_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweuh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyul_rs0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweuh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyul_rs1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweuh_PP_sat __builtin_HEXAGON_M2_mmpyul_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyweuh_PP_s1_sat __builtin_HEXAGON_M2_mmpyul_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RhRh __builtin_HEXAGON_M2_mpy_acc_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RhRh_s1 __builtin_HEXAGON_M2_mpy_acc_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RhRl __builtin_HEXAGON_M2_mpy_acc_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RhRl_s1 __builtin_HEXAGON_M2_mpy_acc_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RlRh __builtin_HEXAGON_M2_mpy_acc_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RlRh_s1 __builtin_HEXAGON_M2_mpy_acc_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RlRl __builtin_HEXAGON_M2_mpy_acc_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RlRl_s1 __builtin_HEXAGON_M2_mpy_acc_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RhRh_sat __builtin_HEXAGON_M2_mpy_acc_sat_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.h):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RhRl_sat __builtin_HEXAGON_M2_mpy_acc_sat_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.h,Rt32.l):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RlRh_sat __builtin_HEXAGON_M2_mpy_acc_sat_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.h):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RlRl_sat __builtin_HEXAGON_M2_mpy_acc_sat_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32.l,Rt32.l):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRh __builtin_HEXAGON_M2_mpy_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRh_s1 __builtin_HEXAGON_M2_mpy_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRl __builtin_HEXAGON_M2_mpy_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRl_s1 __builtin_HEXAGON_M2_mpy_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRh __builtin_HEXAGON_M2_mpy_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRh_s1 __builtin_HEXAGON_M2_mpy_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRl __builtin_HEXAGON_M2_mpy_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRl_s1 __builtin_HEXAGON_M2_mpy_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RhRh __builtin_HEXAGON_M2_mpy_nac_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RhRh_s1 __builtin_HEXAGON_M2_mpy_nac_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RhRl __builtin_HEXAGON_M2_mpy_nac_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RhRl_s1 __builtin_HEXAGON_M2_mpy_nac_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RlRh __builtin_HEXAGON_M2_mpy_nac_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RlRh_s1 __builtin_HEXAGON_M2_mpy_nac_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RlRl __builtin_HEXAGON_M2_mpy_nac_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RlRl_s1 __builtin_HEXAGON_M2_mpy_nac_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RhRh_sat __builtin_HEXAGON_M2_mpy_nac_sat_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.h):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RhRl_sat __builtin_HEXAGON_M2_mpy_nac_sat_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.h,Rt32.l):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RlRh_sat __builtin_HEXAGON_M2_mpy_nac_sat_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.h):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RlRl_sat __builtin_HEXAGON_M2_mpy_nac_sat_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32.l,Rt32.l):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRh_rnd __builtin_HEXAGON_M2_mpy_rnd_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRh_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRl_rnd __builtin_HEXAGON_M2_mpy_rnd_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRl_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRh_rnd __builtin_HEXAGON_M2_mpy_rnd_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRh_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRl_rnd __builtin_HEXAGON_M2_mpy_rnd_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRl_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRh_sat __builtin_HEXAGON_M2_mpy_sat_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_sat_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRl_sat __builtin_HEXAGON_M2_mpy_sat_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_sat_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRh_sat __builtin_HEXAGON_M2_mpy_sat_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_sat_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRl_sat __builtin_HEXAGON_M2_mpy_sat_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_sat_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRh_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRh_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRl_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RhRl_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRh_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRh_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRl_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RlRl_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_mpy_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RR __builtin_HEXAGON_M2_mpy_up + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32,Rt32):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpy_RR_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RR_s1 __builtin_HEXAGON_M2_mpy_up_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpy(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpy_RR_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpy_RR_s1_sat __builtin_HEXAGON_M2_mpy_up_s1_sat + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RhRh __builtin_HEXAGON_M2_mpyd_acc_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RhRh_s1 __builtin_HEXAGON_M2_mpyd_acc_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RhRl __builtin_HEXAGON_M2_mpyd_acc_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RhRl_s1 __builtin_HEXAGON_M2_mpyd_acc_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RlRh __builtin_HEXAGON_M2_mpyd_acc_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RlRh_s1 __builtin_HEXAGON_M2_mpyd_acc_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RlRl __builtin_HEXAGON_M2_mpyd_acc_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpy(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyacc_RlRl_s1 __builtin_HEXAGON_M2_mpyd_acc_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RhRh __builtin_HEXAGON_M2_mpyd_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RhRh_s1 __builtin_HEXAGON_M2_mpyd_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RhRl __builtin_HEXAGON_M2_mpyd_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RhRl_s1 __builtin_HEXAGON_M2_mpyd_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RlRh __builtin_HEXAGON_M2_mpyd_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RlRh_s1 __builtin_HEXAGON_M2_mpyd_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RlRl __builtin_HEXAGON_M2_mpyd_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RlRl_s1 __builtin_HEXAGON_M2_mpyd_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RhRh __builtin_HEXAGON_M2_mpyd_nac_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RhRh_s1 __builtin_HEXAGON_M2_mpyd_nac_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RhRl __builtin_HEXAGON_M2_mpyd_nac_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RhRl_s1 __builtin_HEXAGON_M2_mpyd_nac_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RlRh __builtin_HEXAGON_M2_mpyd_nac_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RlRh_s1 __builtin_HEXAGON_M2_mpyd_nac_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RlRl __builtin_HEXAGON_M2_mpyd_nac_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpy(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpynac_RlRl_s1 __builtin_HEXAGON_M2_mpyd_nac_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):rnd + C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RhRh_rnd __builtin_HEXAGON_M2_mpyd_rnd_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.h):<<1:rnd + C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_s1_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RhRh_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):rnd + C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RhRl_rnd __builtin_HEXAGON_M2_mpyd_rnd_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.h,Rt32.l):<<1:rnd + C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_s1_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RhRl_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):rnd + C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RlRh_rnd __builtin_HEXAGON_M2_mpyd_rnd_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.h):<<1:rnd + C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_s1_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RlRh_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):rnd + C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RlRl_rnd __builtin_HEXAGON_M2_mpyd_rnd_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpy(Rs32.l,Rt32.l):<<1:rnd + C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_s1_rnd(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpy_RlRl_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyi(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_mpyi_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyi_RR __builtin_HEXAGON_M2_mpyi + +/* ========================================================================== + Assembly Syntax: Rd32=mpyi(Rs32,#m9) + C Intrinsic Prototype: Word32 Q6_R_mpyi_RI(Word32 Rs, Word32 Im9) + Instruction Type: M + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_mpyi_RI __builtin_HEXAGON_M2_mpysmi + +/* ========================================================================== + Assembly Syntax: Rd32=mpysu(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_mpysu_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpysu_RR __builtin_HEXAGON_M2_mpysu_up + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyuacc_RhRh __builtin_HEXAGON_M2_mpyu_acc_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyuacc_RhRh_s1 __builtin_HEXAGON_M2_mpyu_acc_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyuacc_RhRl __builtin_HEXAGON_M2_mpyu_acc_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyu(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyuacc_RhRl_s1 __builtin_HEXAGON_M2_mpyu_acc_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyuacc_RlRh __builtin_HEXAGON_M2_mpyu_acc_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyuacc_RlRh_s1 __builtin_HEXAGON_M2_mpyu_acc_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyuacc_RlRl __builtin_HEXAGON_M2_mpyu_acc_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rx32+=mpyu(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyuacc_RlRl_s1 __builtin_HEXAGON_M2_mpyu_acc_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.h) + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRh(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RhRh __builtin_HEXAGON_M2_mpyu_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRh_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RhRh_s1 __builtin_HEXAGON_M2_mpyu_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.l) + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRl(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RhRl __builtin_HEXAGON_M2_mpyu_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRl_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RhRl_s1 __builtin_HEXAGON_M2_mpyu_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.h) + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRh(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RlRh __builtin_HEXAGON_M2_mpyu_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRh_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RlRh_s1 __builtin_HEXAGON_M2_mpyu_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.l) + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRl(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RlRl __builtin_HEXAGON_M2_mpyu_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRl_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RlRl_s1 __builtin_HEXAGON_M2_mpyu_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRh(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyunac_RhRh __builtin_HEXAGON_M2_mpyu_nac_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyunac_RhRh_s1 __builtin_HEXAGON_M2_mpyu_nac_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRl(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyunac_RhRl __builtin_HEXAGON_M2_mpyu_nac_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyu(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyunac_RhRl_s1 __builtin_HEXAGON_M2_mpyu_nac_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRh(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyunac_RlRh __builtin_HEXAGON_M2_mpyu_nac_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyunac_RlRh_s1 __builtin_HEXAGON_M2_mpyu_nac_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRl(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyunac_RlRl __builtin_HEXAGON_M2_mpyu_nac_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rx32-=mpyu(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyunac_RlRl_s1 __builtin_HEXAGON_M2_mpyu_nac_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyu(Rs32,Rt32) + C Intrinsic Prototype: UWord32 Q6_R_mpyu_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyu_RR __builtin_HEXAGON_M2_mpyu_up + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RhRh __builtin_HEXAGON_M2_mpyud_acc_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RhRh_s1 __builtin_HEXAGON_M2_mpyud_acc_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RhRl __builtin_HEXAGON_M2_mpyud_acc_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RhRl_s1 __builtin_HEXAGON_M2_mpyud_acc_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RlRh __builtin_HEXAGON_M2_mpyud_acc_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RlRh_s1 __builtin_HEXAGON_M2_mpyud_acc_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RlRl __builtin_HEXAGON_M2_mpyud_acc_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=mpyu(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyuacc_RlRl_s1 __builtin_HEXAGON_M2_mpyud_acc_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.h) + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRh(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RhRh __builtin_HEXAGON_M2_mpyud_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRh_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RhRh_s1 __builtin_HEXAGON_M2_mpyud_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.l) + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRl(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RhRl __builtin_HEXAGON_M2_mpyud_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRl_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RhRl_s1 __builtin_HEXAGON_M2_mpyud_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.h) + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRh(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RlRh __builtin_HEXAGON_M2_mpyud_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRh_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RlRh_s1 __builtin_HEXAGON_M2_mpyud_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.l) + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRl(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RlRl __builtin_HEXAGON_M2_mpyud_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=mpyu(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRl_s1(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyu_RlRl_s1 __builtin_HEXAGON_M2_mpyud_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RhRh __builtin_HEXAGON_M2_mpyud_nac_hh_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RhRh_s1 __builtin_HEXAGON_M2_mpyud_nac_hh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RhRl __builtin_HEXAGON_M2_mpyud_nac_hl_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32.h,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RhRl_s1 __builtin_HEXAGON_M2_mpyud_nac_hl_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.h) + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RlRh __builtin_HEXAGON_M2_mpyud_nac_lh_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.h):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RlRh_s1 __builtin_HEXAGON_M2_mpyud_nac_lh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.l) + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RlRl __builtin_HEXAGON_M2_mpyud_nac_ll_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32-=mpyu(Rs32.l,Rt32.l):<<1 + C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_mpyunac_RlRl_s1 __builtin_HEXAGON_M2_mpyud_nac_ll_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=mpyui(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_mpyui_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_mpyui_RR __builtin_HEXAGON_M2_mpyui + +/* ========================================================================== + Assembly Syntax: Rx32-=add(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_addnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_addnac_RR __builtin_HEXAGON_M2_nacci + +/* ========================================================================== + Assembly Syntax: Rx32-=add(Rs32,#s8) + C Intrinsic Prototype: Word32 Q6_R_addnac_RI(Word32 Rx, Word32 Rs, Word32 Is8) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_addnac_RI __builtin_HEXAGON_M2_naccii + +/* ========================================================================== + Assembly Syntax: Rx32+=sub(Rt32,Rs32) + C Intrinsic Prototype: Word32 Q6_R_subacc_RR(Word32 Rx, Word32 Rt, Word32 Rs) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_subacc_RR __builtin_HEXAGON_M2_subacc + +/* ========================================================================== + Assembly Syntax: Rdd32=vabsdiffh(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vabsdiffh_PP(Word64 Rtt, Word64 Rss) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vabsdiffh_PP __builtin_HEXAGON_M2_vabsdiffh + +/* ========================================================================== + Assembly Syntax: Rdd32=vabsdiffw(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vabsdiffw_PP(Word64 Rtt, Word64 Rss) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vabsdiffw_PP __builtin_HEXAGON_M2_vabsdiffw + +/* ========================================================================== + Assembly Syntax: Rxx32+=vcmpyi(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vcmpyiacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vcmpyiacc_PP_sat __builtin_HEXAGON_M2_vcmac_s0_sat_i + +/* ========================================================================== + Assembly Syntax: Rxx32+=vcmpyr(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vcmpyracc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vcmpyracc_PP_sat __builtin_HEXAGON_M2_vcmac_s0_sat_r + +/* ========================================================================== + Assembly Syntax: Rdd32=vcmpyi(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vcmpyi_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vcmpyi_PP_sat __builtin_HEXAGON_M2_vcmpy_s0_sat_i + +/* ========================================================================== + Assembly Syntax: Rdd32=vcmpyr(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vcmpyr_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vcmpyr_PP_sat __builtin_HEXAGON_M2_vcmpy_s0_sat_r + +/* ========================================================================== + Assembly Syntax: Rdd32=vcmpyi(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vcmpyi_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vcmpyi_PP_s1_sat __builtin_HEXAGON_M2_vcmpy_s1_sat_i + +/* ========================================================================== + Assembly Syntax: Rdd32=vcmpyr(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vcmpyr_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vcmpyr_PP_s1_sat __builtin_HEXAGON_M2_vcmpy_s1_sat_r + +/* ========================================================================== + Assembly Syntax: Rxx32+=vdmpy(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vdmpyacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vdmpyacc_PP_sat __builtin_HEXAGON_M2_vdmacs_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vdmpyacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vdmpyacc_PP_s1_sat __builtin_HEXAGON_M2_vdmacs_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=vdmpy(Rss32,Rtt32):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_vdmpy_PP_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vdmpy_PP_rnd_sat __builtin_HEXAGON_M2_vdmpyrs_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_vdmpy_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vdmpy_PP_s1_rnd_sat __builtin_HEXAGON_M2_vdmpyrs_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vdmpy(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vdmpy_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vdmpy_PP_sat __builtin_HEXAGON_M2_vdmpys_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vdmpy(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vdmpy_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vdmpy_PP_s1_sat __builtin_HEXAGON_M2_vdmpys_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyhacc_RR __builtin_HEXAGON_M2_vmac2 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyehacc_PP __builtin_HEXAGON_M2_vmac2es + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyehacc_PP_sat __builtin_HEXAGON_M2_vmac2es_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyehacc_PP_s1_sat __builtin_HEXAGON_M2_vmac2es_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyhacc_RR_sat __builtin_HEXAGON_M2_vmac2s_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyh(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyhacc_RR_s1_sat __builtin_HEXAGON_M2_vmac2s_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyhsu(Rs32,Rt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyhsuacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyhsuacc_RR_sat __builtin_HEXAGON_M2_vmac2su_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyhsuacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyhsuacc_RR_s1_sat __builtin_HEXAGON_M2_vmac2su_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyeh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyeh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyeh_PP_sat __builtin_HEXAGON_M2_vmpy2es_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyeh_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyeh_PP_s1_sat __builtin_HEXAGON_M2_vmpy2es_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyh(Rs32,Rt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyh_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyh_RR_sat __builtin_HEXAGON_M2_vmpy2s_s0 + +/* ========================================================================== + Assembly Syntax: Rd32=vmpyh(Rs32,Rt32):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_vmpyh_RR_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vmpyh_RR_rnd_sat __builtin_HEXAGON_M2_vmpy2s_s0pack + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyh(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyh_RR_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyh_RR_s1_sat __builtin_HEXAGON_M2_vmpy2s_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_vmpyh_RR_s1_rnd_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vmpyh_RR_s1_rnd_sat __builtin_HEXAGON_M2_vmpy2s_s1pack + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyhsu(Rs32,Rt32):sat + C Intrinsic Prototype: Word64 Q6_P_vmpyhsu_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyhsu_RR_sat __builtin_HEXAGON_M2_vmpy2su_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vmpyhsu_RR_s1_sat(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpyhsu_RR_s1_sat __builtin_HEXAGON_M2_vmpy2su_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=vraddh(Rss32,Rtt32) + C Intrinsic Prototype: Word32 Q6_R_vraddh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vraddh_PP __builtin_HEXAGON_M2_vraddh + +/* ========================================================================== + Assembly Syntax: Rd32=vradduh(Rss32,Rtt32) + C Intrinsic Prototype: Word32 Q6_R_vradduh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vradduh_PP __builtin_HEXAGON_M2_vradduh + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrcmpyi(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrcmpyiacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcmpyiacc_PP __builtin_HEXAGON_M2_vrcmaci_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrcmpyi(Rss32,Rtt32*) + C Intrinsic Prototype: Word64 Q6_P_vrcmpyiacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcmpyiacc_PP_conj __builtin_HEXAGON_M2_vrcmaci_s0c + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrcmpyr(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrcmpyracc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcmpyracc_PP __builtin_HEXAGON_M2_vrcmacr_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrcmpyr(Rss32,Rtt32*) + C Intrinsic Prototype: Word64 Q6_P_vrcmpyracc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcmpyracc_PP_conj __builtin_HEXAGON_M2_vrcmacr_s0c + +/* ========================================================================== + Assembly Syntax: Rdd32=vrcmpyi(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrcmpyi_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcmpyi_PP __builtin_HEXAGON_M2_vrcmpyi_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vrcmpyi(Rss32,Rtt32*) + C Intrinsic Prototype: Word64 Q6_P_vrcmpyi_PP_conj(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcmpyi_PP_conj __builtin_HEXAGON_M2_vrcmpyi_s0c + +/* ========================================================================== + Assembly Syntax: Rdd32=vrcmpyr(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrcmpyr_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcmpyr_PP __builtin_HEXAGON_M2_vrcmpyr_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vrcmpyr(Rss32,Rtt32*) + C Intrinsic Prototype: Word64 Q6_P_vrcmpyr_PP_conj(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcmpyr_PP_conj __builtin_HEXAGON_M2_vrcmpyr_s0c + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrcmpys(Rss32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vrcmpysacc_PR_s1_sat(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_vrcmpysacc_PR_s1_sat __builtin_HEXAGON_M2_vrcmpys_acc_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vrcmpys(Rss32,Rt32):<<1:sat + C Intrinsic Prototype: Word64 Q6_P_vrcmpys_PR_s1_sat(Word64 Rss, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_vrcmpys_PR_s1_sat __builtin_HEXAGON_M2_vrcmpys_s1 + +/* ========================================================================== + Assembly Syntax: Rd32=vrcmpys(Rss32,Rt32):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_vrcmpys_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vrcmpys_PR_s1_rnd_sat __builtin_HEXAGON_M2_vrcmpys_s1rp + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrmpyh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpyhacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpyhacc_PP __builtin_HEXAGON_M2_vrmac_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vrmpyh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpyh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpyh_PP __builtin_HEXAGON_M2_vrmpy_s0 + +/* ========================================================================== + Assembly Syntax: Rx32^=xor(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_xorxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_xorxacc_RR __builtin_HEXAGON_M2_xor_xacc + +/* ========================================================================== + Assembly Syntax: Rx32&=and(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_andand_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_andand_RR __builtin_HEXAGON_M4_and_and + +/* ========================================================================== + Assembly Syntax: Rx32&=and(Rs32,~Rt32) + C Intrinsic Prototype: Word32 Q6_R_andand_RnR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_andand_RnR __builtin_HEXAGON_M4_and_andn + +/* ========================================================================== + Assembly Syntax: Rx32&=or(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_orand_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_orand_RR __builtin_HEXAGON_M4_and_or + +/* ========================================================================== + Assembly Syntax: Rx32&=xor(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_xorand_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_xorand_RR __builtin_HEXAGON_M4_and_xor + +/* ========================================================================== + Assembly Syntax: Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyiwh_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cmpyiwh_PR_s1_rnd_sat __builtin_HEXAGON_M4_cmpyi_wh + +/* ========================================================================== + Assembly Syntax: Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyiwh_PR_conj_s1_rnd_sat(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cmpyiwh_PR_conj_s1_rnd_sat __builtin_HEXAGON_M4_cmpyi_whc + +/* ========================================================================== + Assembly Syntax: Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyrwh_PR_s1_rnd_sat(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cmpyrwh_PR_s1_rnd_sat __builtin_HEXAGON_M4_cmpyr_wh + +/* ========================================================================== + Assembly Syntax: Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyrwh_PR_conj_s1_rnd_sat(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cmpyrwh_PR_conj_s1_rnd_sat __builtin_HEXAGON_M4_cmpyr_whc + +/* ========================================================================== + Assembly Syntax: Rx32+=mpy(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpyacc_RR_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyacc_RR_s1_sat __builtin_HEXAGON_M4_mac_up_s1_sat + +/* ========================================================================== + Assembly Syntax: Rd32=add(#u6,mpyi(Rs32,#U6)) + C Intrinsic Prototype: Word32 Q6_R_add_mpyi_IRI(Word32 Iu6, Word32 Rs, Word32 IU6) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_mpyi_IRI __builtin_HEXAGON_M4_mpyri_addi + +/* ========================================================================== + Assembly Syntax: Rd32=add(Ru32,mpyi(Rs32,#u6)) + C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RRI(Word32 Ru, Word32 Rs, Word32 Iu6) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_mpyi_RRI __builtin_HEXAGON_M4_mpyri_addr + +/* ========================================================================== + Assembly Syntax: Rd32=add(Ru32,mpyi(#u6:2,Rs32)) + C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RIR(Word32 Ru, Word32 Iu6_2, Word32 Rs) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_mpyi_RIR __builtin_HEXAGON_M4_mpyri_addr_u2 + +/* ========================================================================== + Assembly Syntax: Rd32=add(#u6,mpyi(Rs32,Rt32)) + C Intrinsic Prototype: Word32 Q6_R_add_mpyi_IRR(Word32 Iu6, Word32 Rs, Word32 Rt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_mpyi_IRR __builtin_HEXAGON_M4_mpyrr_addi + +/* ========================================================================== + Assembly Syntax: Ry32=add(Ru32,mpyi(Ry32,Rs32)) + C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RRR(Word32 Ru, Word32 Ry, Word32 Rs) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_mpyi_RRR __builtin_HEXAGON_M4_mpyrr_addr + +/* ========================================================================== + Assembly Syntax: Rx32-=mpy(Rs32,Rt32):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_mpynac_RR_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpynac_RR_s1_sat __builtin_HEXAGON_M4_nac_up_s1_sat + +/* ========================================================================== + Assembly Syntax: Rx32|=and(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_andor_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_andor_RR __builtin_HEXAGON_M4_or_and + +/* ========================================================================== + Assembly Syntax: Rx32|=and(Rs32,~Rt32) + C Intrinsic Prototype: Word32 Q6_R_andor_RnR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_andor_RnR __builtin_HEXAGON_M4_or_andn + +/* ========================================================================== + Assembly Syntax: Rx32|=or(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_oror_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_oror_RR __builtin_HEXAGON_M4_or_or + +/* ========================================================================== + Assembly Syntax: Rx32|=xor(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_xoror_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_xoror_RR __builtin_HEXAGON_M4_or_xor + +/* ========================================================================== + Assembly Syntax: Rdd32=pmpyw(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_pmpyw_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_pmpyw_RR __builtin_HEXAGON_M4_pmpyw + +/* ========================================================================== + Assembly Syntax: Rxx32^=pmpyw(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_pmpywxacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_pmpywxacc_RR __builtin_HEXAGON_M4_pmpyw_acc + +/* ========================================================================== + Assembly Syntax: Rdd32=vpmpyh(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vpmpyh_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vpmpyh_RR __builtin_HEXAGON_M4_vpmpyh + +/* ========================================================================== + Assembly Syntax: Rxx32^=vpmpyh(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vpmpyhxacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vpmpyhxacc_RR __builtin_HEXAGON_M4_vpmpyh_acc + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrmpyweh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpywehacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpywehacc_PP __builtin_HEXAGON_M4_vrmpyeh_acc_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrmpyweh(Rss32,Rtt32):<<1 + C Intrinsic Prototype: Word64 Q6_P_vrmpywehacc_PP_s1(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpywehacc_PP_s1 __builtin_HEXAGON_M4_vrmpyeh_acc_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vrmpyweh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpyweh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpyweh_PP __builtin_HEXAGON_M4_vrmpyeh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vrmpyweh(Rss32,Rtt32):<<1 + C Intrinsic Prototype: Word64 Q6_P_vrmpyweh_PP_s1(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpyweh_PP_s1 __builtin_HEXAGON_M4_vrmpyeh_s1 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrmpywoh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpywohacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpywohacc_PP __builtin_HEXAGON_M4_vrmpyoh_acc_s0 + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrmpywoh(Rss32,Rtt32):<<1 + C Intrinsic Prototype: Word64 Q6_P_vrmpywohacc_PP_s1(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpywohacc_PP_s1 __builtin_HEXAGON_M4_vrmpyoh_acc_s1 + +/* ========================================================================== + Assembly Syntax: Rdd32=vrmpywoh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpywoh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpywoh_PP __builtin_HEXAGON_M4_vrmpyoh_s0 + +/* ========================================================================== + Assembly Syntax: Rdd32=vrmpywoh(Rss32,Rtt32):<<1 + C Intrinsic Prototype: Word64 Q6_P_vrmpywoh_PP_s1(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpywoh_PP_s1 __builtin_HEXAGON_M4_vrmpyoh_s1 + +/* ========================================================================== + Assembly Syntax: Rx32^=and(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_andxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_andxacc_RR __builtin_HEXAGON_M4_xor_and + +/* ========================================================================== + Assembly Syntax: Rx32^=and(Rs32,~Rt32) + C Intrinsic Prototype: Word32 Q6_R_andxacc_RnR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_andxacc_RnR __builtin_HEXAGON_M4_xor_andn + +/* ========================================================================== + Assembly Syntax: Rx32^=or(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_orxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_orxacc_RR __builtin_HEXAGON_M4_xor_or + +/* ========================================================================== + Assembly Syntax: Rxx32^=xor(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_xorxacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_xorxacc_PP __builtin_HEXAGON_M4_xor_xacc + +/* ========================================================================== + Assembly Syntax: Rxx32+=vdmpybsu(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vdmpybsuacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vdmpybsuacc_PP_sat __builtin_HEXAGON_M5_vdmacbsu + +/* ========================================================================== + Assembly Syntax: Rdd32=vdmpybsu(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vdmpybsu_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vdmpybsu_PP_sat __builtin_HEXAGON_M5_vdmpybsu + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpybsu(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vmpybsuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpybsuacc_RR __builtin_HEXAGON_M5_vmacbsu + +/* ========================================================================== + Assembly Syntax: Rxx32+=vmpybu(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vmpybuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpybuacc_RR __builtin_HEXAGON_M5_vmacbuu + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpybsu(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vmpybsu_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpybsu_RR __builtin_HEXAGON_M5_vmpybsu + +/* ========================================================================== + Assembly Syntax: Rdd32=vmpybu(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vmpybu_RR(Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vmpybu_RR __builtin_HEXAGON_M5_vmpybuu + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrmpybsu(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpybsuacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpybsuacc_PP __builtin_HEXAGON_M5_vrmacbsu + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrmpybu(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpybuacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpybuacc_PP __builtin_HEXAGON_M5_vrmacbuu + +/* ========================================================================== + Assembly Syntax: Rdd32=vrmpybsu(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpybsu_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpybsu_PP __builtin_HEXAGON_M5_vrmpybsu + +/* ========================================================================== + Assembly Syntax: Rdd32=vrmpybu(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vrmpybu_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrmpybu_PP __builtin_HEXAGON_M5_vrmpybuu + +/* ========================================================================== + Assembly Syntax: Rd32=addasl(Rt32,Rs32,#u3) + C Intrinsic Prototype: Word32 Q6_R_addasl_RRI(Word32 Rt, Word32 Rs, Word32 Iu3) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_addasl_RRI __builtin_HEXAGON_S2_addasl_rrri + +/* ========================================================================== + Assembly Syntax: Rdd32=asl(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_asl_PI(Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asl_PI __builtin_HEXAGON_S2_asl_i_p + +/* ========================================================================== + Assembly Syntax: Rxx32+=asl(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_aslacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_aslacc_PI __builtin_HEXAGON_S2_asl_i_p_acc + +/* ========================================================================== + Assembly Syntax: Rxx32&=asl(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_asland_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asland_PI __builtin_HEXAGON_S2_asl_i_p_and + +/* ========================================================================== + Assembly Syntax: Rxx32-=asl(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_aslnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_aslnac_PI __builtin_HEXAGON_S2_asl_i_p_nac + +/* ========================================================================== + Assembly Syntax: Rxx32|=asl(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_aslor_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_aslor_PI __builtin_HEXAGON_S2_asl_i_p_or + +/* ========================================================================== + Assembly Syntax: Rxx32^=asl(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_aslxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_aslxacc_PI __builtin_HEXAGON_S2_asl_i_p_xacc + +/* ========================================================================== + Assembly Syntax: Rd32=asl(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_asl_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asl_RI __builtin_HEXAGON_S2_asl_i_r + +/* ========================================================================== + Assembly Syntax: Rx32+=asl(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_aslacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_aslacc_RI __builtin_HEXAGON_S2_asl_i_r_acc + +/* ========================================================================== + Assembly Syntax: Rx32&=asl(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_asland_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asland_RI __builtin_HEXAGON_S2_asl_i_r_and + +/* ========================================================================== + Assembly Syntax: Rx32-=asl(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_aslnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_aslnac_RI __builtin_HEXAGON_S2_asl_i_r_nac + +/* ========================================================================== + Assembly Syntax: Rx32|=asl(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_aslor_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_aslor_RI __builtin_HEXAGON_S2_asl_i_r_or + +/* ========================================================================== + Assembly Syntax: Rd32=asl(Rs32,#u5):sat + C Intrinsic Prototype: Word32 Q6_R_asl_RI_sat(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asl_RI_sat __builtin_HEXAGON_S2_asl_i_r_sat + +/* ========================================================================== + Assembly Syntax: Rx32^=asl(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_aslxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_aslxacc_RI __builtin_HEXAGON_S2_asl_i_r_xacc + +/* ========================================================================== + Assembly Syntax: Rdd32=vaslh(Rss32,#u4) + C Intrinsic Prototype: Word64 Q6_P_vaslh_PI(Word64 Rss, Word32 Iu4) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaslh_PI __builtin_HEXAGON_S2_asl_i_vh + +/* ========================================================================== + Assembly Syntax: Rdd32=vaslw(Rss32,#u5) + C Intrinsic Prototype: Word64 Q6_P_vaslw_PI(Word64 Rss, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaslw_PI __builtin_HEXAGON_S2_asl_i_vw + +/* ========================================================================== + Assembly Syntax: Rdd32=asl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_asl_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asl_PR __builtin_HEXAGON_S2_asl_r_p + +/* ========================================================================== + Assembly Syntax: Rxx32+=asl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_aslacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_aslacc_PR __builtin_HEXAGON_S2_asl_r_p_acc + +/* ========================================================================== + Assembly Syntax: Rxx32&=asl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_asland_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asland_PR __builtin_HEXAGON_S2_asl_r_p_and + +/* ========================================================================== + Assembly Syntax: Rxx32-=asl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_aslnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_aslnac_PR __builtin_HEXAGON_S2_asl_r_p_nac + +/* ========================================================================== + Assembly Syntax: Rxx32|=asl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_aslor_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_aslor_PR __builtin_HEXAGON_S2_asl_r_p_or + +/* ========================================================================== + Assembly Syntax: Rxx32^=asl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_aslxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_aslxacc_PR __builtin_HEXAGON_S2_asl_r_p_xor + +/* ========================================================================== + Assembly Syntax: Rd32=asl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_asl_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asl_RR __builtin_HEXAGON_S2_asl_r_r + +/* ========================================================================== + Assembly Syntax: Rx32+=asl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_aslacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_aslacc_RR __builtin_HEXAGON_S2_asl_r_r_acc + +/* ========================================================================== + Assembly Syntax: Rx32&=asl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_asland_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asland_RR __builtin_HEXAGON_S2_asl_r_r_and + +/* ========================================================================== + Assembly Syntax: Rx32-=asl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_aslnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_aslnac_RR __builtin_HEXAGON_S2_asl_r_r_nac + +/* ========================================================================== + Assembly Syntax: Rx32|=asl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_aslor_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_aslor_RR __builtin_HEXAGON_S2_asl_r_r_or + +/* ========================================================================== + Assembly Syntax: Rd32=asl(Rs32,Rt32):sat + C Intrinsic Prototype: Word32 Q6_R_asl_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asl_RR_sat __builtin_HEXAGON_S2_asl_r_r_sat + +/* ========================================================================== + Assembly Syntax: Rdd32=vaslh(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vaslh_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaslh_PR __builtin_HEXAGON_S2_asl_r_vh + +/* ========================================================================== + Assembly Syntax: Rdd32=vaslw(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vaslw_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vaslw_PR __builtin_HEXAGON_S2_asl_r_vw + +/* ========================================================================== + Assembly Syntax: Rdd32=asr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_asr_PI(Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asr_PI __builtin_HEXAGON_S2_asr_i_p + +/* ========================================================================== + Assembly Syntax: Rxx32+=asr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_asracc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asracc_PI __builtin_HEXAGON_S2_asr_i_p_acc + +/* ========================================================================== + Assembly Syntax: Rxx32&=asr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_asrand_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asrand_PI __builtin_HEXAGON_S2_asr_i_p_and + +/* ========================================================================== + Assembly Syntax: Rxx32-=asr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_asrnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asrnac_PI __builtin_HEXAGON_S2_asr_i_p_nac + +/* ========================================================================== + Assembly Syntax: Rxx32|=asr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_asror_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asror_PI __builtin_HEXAGON_S2_asr_i_p_or + +/* ========================================================================== + Assembly Syntax: Rdd32=asr(Rss32,#u6):rnd + C Intrinsic Prototype: Word64 Q6_P_asr_PI_rnd(Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asr_PI_rnd __builtin_HEXAGON_S2_asr_i_p_rnd + +/* ========================================================================== + Assembly Syntax: Rdd32=asrrnd(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_asrrnd_PI(Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_asrrnd_PI __builtin_HEXAGON_S2_asr_i_p_rnd_goodsyntax + +/* ========================================================================== + Assembly Syntax: Rd32=asr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_asr_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asr_RI __builtin_HEXAGON_S2_asr_i_r + +/* ========================================================================== + Assembly Syntax: Rx32+=asr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_asracc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asracc_RI __builtin_HEXAGON_S2_asr_i_r_acc + +/* ========================================================================== + Assembly Syntax: Rx32&=asr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_asrand_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asrand_RI __builtin_HEXAGON_S2_asr_i_r_and + +/* ========================================================================== + Assembly Syntax: Rx32-=asr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_asrnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asrnac_RI __builtin_HEXAGON_S2_asr_i_r_nac + +/* ========================================================================== + Assembly Syntax: Rx32|=asr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_asror_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asror_RI __builtin_HEXAGON_S2_asr_i_r_or + +/* ========================================================================== + Assembly Syntax: Rd32=asr(Rs32,#u5):rnd + C Intrinsic Prototype: Word32 Q6_R_asr_RI_rnd(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asr_RI_rnd __builtin_HEXAGON_S2_asr_i_r_rnd + +/* ========================================================================== + Assembly Syntax: Rd32=asrrnd(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_asrrnd_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_asrrnd_RI __builtin_HEXAGON_S2_asr_i_r_rnd_goodsyntax + +/* ========================================================================== + Assembly Syntax: Rd32=vasrw(Rss32,#u5) + C Intrinsic Prototype: Word32 Q6_R_vasrw_PI(Word64 Rss, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vasrw_PI __builtin_HEXAGON_S2_asr_i_svw_trun + +/* ========================================================================== + Assembly Syntax: Rdd32=vasrh(Rss32,#u4) + C Intrinsic Prototype: Word64 Q6_P_vasrh_PI(Word64 Rss, Word32 Iu4) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vasrh_PI __builtin_HEXAGON_S2_asr_i_vh + +/* ========================================================================== + Assembly Syntax: Rdd32=vasrw(Rss32,#u5) + C Intrinsic Prototype: Word64 Q6_P_vasrw_PI(Word64 Rss, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vasrw_PI __builtin_HEXAGON_S2_asr_i_vw + +/* ========================================================================== + Assembly Syntax: Rdd32=asr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_asr_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asr_PR __builtin_HEXAGON_S2_asr_r_p + +/* ========================================================================== + Assembly Syntax: Rxx32+=asr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_asracc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asracc_PR __builtin_HEXAGON_S2_asr_r_p_acc + +/* ========================================================================== + Assembly Syntax: Rxx32&=asr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_asrand_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asrand_PR __builtin_HEXAGON_S2_asr_r_p_and + +/* ========================================================================== + Assembly Syntax: Rxx32-=asr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_asrnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asrnac_PR __builtin_HEXAGON_S2_asr_r_p_nac + +/* ========================================================================== + Assembly Syntax: Rxx32|=asr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_asror_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asror_PR __builtin_HEXAGON_S2_asr_r_p_or + +/* ========================================================================== + Assembly Syntax: Rxx32^=asr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_asrxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_asrxacc_PR __builtin_HEXAGON_S2_asr_r_p_xor + +/* ========================================================================== + Assembly Syntax: Rd32=asr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_asr_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asr_RR __builtin_HEXAGON_S2_asr_r_r + +/* ========================================================================== + Assembly Syntax: Rx32+=asr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_asracc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asracc_RR __builtin_HEXAGON_S2_asr_r_r_acc + +/* ========================================================================== + Assembly Syntax: Rx32&=asr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_asrand_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asrand_RR __builtin_HEXAGON_S2_asr_r_r_and + +/* ========================================================================== + Assembly Syntax: Rx32-=asr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_asrnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asrnac_RR __builtin_HEXAGON_S2_asr_r_r_nac + +/* ========================================================================== + Assembly Syntax: Rx32|=asr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_asror_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asror_RR __builtin_HEXAGON_S2_asr_r_r_or + +/* ========================================================================== + Assembly Syntax: Rd32=asr(Rs32,Rt32):sat + C Intrinsic Prototype: Word32 Q6_R_asr_RR_sat(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_asr_RR_sat __builtin_HEXAGON_S2_asr_r_r_sat + +/* ========================================================================== + Assembly Syntax: Rd32=vasrw(Rss32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_vasrw_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vasrw_PR __builtin_HEXAGON_S2_asr_r_svw_trun + +/* ========================================================================== + Assembly Syntax: Rdd32=vasrh(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vasrh_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vasrh_PR __builtin_HEXAGON_S2_asr_r_vh + +/* ========================================================================== + Assembly Syntax: Rdd32=vasrw(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vasrw_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vasrw_PR __builtin_HEXAGON_S2_asr_r_vw + +/* ========================================================================== + Assembly Syntax: Rd32=brev(Rs32) + C Intrinsic Prototype: Word32 Q6_R_brev_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_brev_R __builtin_HEXAGON_S2_brev + +/* ========================================================================== + Assembly Syntax: Rdd32=brev(Rss32) + C Intrinsic Prototype: Word64 Q6_P_brev_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_brev_P __builtin_HEXAGON_S2_brevp + +/* ========================================================================== + Assembly Syntax: Rd32=cl0(Rs32) + C Intrinsic Prototype: Word32 Q6_R_cl0_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cl0_R __builtin_HEXAGON_S2_cl0 + +/* ========================================================================== + Assembly Syntax: Rd32=cl0(Rss32) + C Intrinsic Prototype: Word32 Q6_R_cl0_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cl0_P __builtin_HEXAGON_S2_cl0p + +/* ========================================================================== + Assembly Syntax: Rd32=cl1(Rs32) + C Intrinsic Prototype: Word32 Q6_R_cl1_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cl1_R __builtin_HEXAGON_S2_cl1 + +/* ========================================================================== + Assembly Syntax: Rd32=cl1(Rss32) + C Intrinsic Prototype: Word32 Q6_R_cl1_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_cl1_P __builtin_HEXAGON_S2_cl1p + +/* ========================================================================== + Assembly Syntax: Rd32=clb(Rs32) + C Intrinsic Prototype: Word32 Q6_R_clb_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_clb_R __builtin_HEXAGON_S2_clb + +/* ========================================================================== + Assembly Syntax: Rd32=normamt(Rs32) + C Intrinsic Prototype: Word32 Q6_R_normamt_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_normamt_R __builtin_HEXAGON_S2_clbnorm + +/* ========================================================================== + Assembly Syntax: Rd32=clb(Rss32) + C Intrinsic Prototype: Word32 Q6_R_clb_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_clb_P __builtin_HEXAGON_S2_clbp + +/* ========================================================================== + Assembly Syntax: Rd32=clrbit(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_clrbit_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_clrbit_RI __builtin_HEXAGON_S2_clrbit_i + +/* ========================================================================== + Assembly Syntax: Rd32=clrbit(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_clrbit_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_clrbit_RR __builtin_HEXAGON_S2_clrbit_r + +/* ========================================================================== + Assembly Syntax: Rd32=ct0(Rs32) + C Intrinsic Prototype: Word32 Q6_R_ct0_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_ct0_R __builtin_HEXAGON_S2_ct0 + +/* ========================================================================== + Assembly Syntax: Rd32=ct0(Rss32) + C Intrinsic Prototype: Word32 Q6_R_ct0_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_ct0_P __builtin_HEXAGON_S2_ct0p + +/* ========================================================================== + Assembly Syntax: Rd32=ct1(Rs32) + C Intrinsic Prototype: Word32 Q6_R_ct1_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_ct1_R __builtin_HEXAGON_S2_ct1 + +/* ========================================================================== + Assembly Syntax: Rd32=ct1(Rss32) + C Intrinsic Prototype: Word32 Q6_R_ct1_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_ct1_P __builtin_HEXAGON_S2_ct1p + +/* ========================================================================== + Assembly Syntax: Rdd32=deinterleave(Rss32) + C Intrinsic Prototype: Word64 Q6_P_deinterleave_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_deinterleave_P __builtin_HEXAGON_S2_deinterleave + +/* ========================================================================== + Assembly Syntax: Rd32=extractu(Rs32,#u5,#U5) + C Intrinsic Prototype: Word32 Q6_R_extractu_RII(Word32 Rs, Word32 Iu5, Word32 IU5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_extractu_RII __builtin_HEXAGON_S2_extractu + +/* ========================================================================== + Assembly Syntax: Rd32=extractu(Rs32,Rtt32) + C Intrinsic Prototype: Word32 Q6_R_extractu_RP(Word32 Rs, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_extractu_RP __builtin_HEXAGON_S2_extractu_rp + +/* ========================================================================== + Assembly Syntax: Rdd32=extractu(Rss32,#u6,#U6) + C Intrinsic Prototype: Word64 Q6_P_extractu_PII(Word64 Rss, Word32 Iu6, Word32 IU6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_extractu_PII __builtin_HEXAGON_S2_extractup + +/* ========================================================================== + Assembly Syntax: Rdd32=extractu(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_extractu_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_extractu_PP __builtin_HEXAGON_S2_extractup_rp + +/* ========================================================================== + Assembly Syntax: Rx32=insert(Rs32,#u5,#U5) + C Intrinsic Prototype: Word32 Q6_R_insert_RII(Word32 Rx, Word32 Rs, Word32 Iu5, Word32 IU5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_insert_RII __builtin_HEXAGON_S2_insert + +/* ========================================================================== + Assembly Syntax: Rx32=insert(Rs32,Rtt32) + C Intrinsic Prototype: Word32 Q6_R_insert_RP(Word32 Rx, Word32 Rs, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_insert_RP __builtin_HEXAGON_S2_insert_rp + +/* ========================================================================== + Assembly Syntax: Rxx32=insert(Rss32,#u6,#U6) + C Intrinsic Prototype: Word64 Q6_P_insert_PII(Word64 Rxx, Word64 Rss, Word32 Iu6, Word32 IU6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_insert_PII __builtin_HEXAGON_S2_insertp + +/* ========================================================================== + Assembly Syntax: Rxx32=insert(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_insert_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_insert_PP __builtin_HEXAGON_S2_insertp_rp + +/* ========================================================================== + Assembly Syntax: Rdd32=interleave(Rss32) + C Intrinsic Prototype: Word64 Q6_P_interleave_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_interleave_P __builtin_HEXAGON_S2_interleave + +/* ========================================================================== + Assembly Syntax: Rdd32=lfs(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_lfs_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lfs_PP __builtin_HEXAGON_S2_lfsp + +/* ========================================================================== + Assembly Syntax: Rdd32=lsl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lsl_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsl_PR __builtin_HEXAGON_S2_lsl_r_p + +/* ========================================================================== + Assembly Syntax: Rxx32+=lsl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lslacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lslacc_PR __builtin_HEXAGON_S2_lsl_r_p_acc + +/* ========================================================================== + Assembly Syntax: Rxx32&=lsl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lsland_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsland_PR __builtin_HEXAGON_S2_lsl_r_p_and + +/* ========================================================================== + Assembly Syntax: Rxx32-=lsl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lslnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lslnac_PR __builtin_HEXAGON_S2_lsl_r_p_nac + +/* ========================================================================== + Assembly Syntax: Rxx32|=lsl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lslor_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lslor_PR __builtin_HEXAGON_S2_lsl_r_p_or + +/* ========================================================================== + Assembly Syntax: Rxx32^=lsl(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lslxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lslxacc_PR __builtin_HEXAGON_S2_lsl_r_p_xor + +/* ========================================================================== + Assembly Syntax: Rd32=lsl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lsl_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsl_RR __builtin_HEXAGON_S2_lsl_r_r + +/* ========================================================================== + Assembly Syntax: Rx32+=lsl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lslacc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lslacc_RR __builtin_HEXAGON_S2_lsl_r_r_acc + +/* ========================================================================== + Assembly Syntax: Rx32&=lsl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lsland_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsland_RR __builtin_HEXAGON_S2_lsl_r_r_and + +/* ========================================================================== + Assembly Syntax: Rx32-=lsl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lslnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lslnac_RR __builtin_HEXAGON_S2_lsl_r_r_nac + +/* ========================================================================== + Assembly Syntax: Rx32|=lsl(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lslor_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lslor_RR __builtin_HEXAGON_S2_lsl_r_r_or + +/* ========================================================================== + Assembly Syntax: Rdd32=vlslh(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vlslh_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vlslh_PR __builtin_HEXAGON_S2_lsl_r_vh + +/* ========================================================================== + Assembly Syntax: Rdd32=vlslw(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vlslw_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vlslw_PR __builtin_HEXAGON_S2_lsl_r_vw + +/* ========================================================================== + Assembly Syntax: Rdd32=lsr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_lsr_PI(Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsr_PI __builtin_HEXAGON_S2_lsr_i_p + +/* ========================================================================== + Assembly Syntax: Rxx32+=lsr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_lsracc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsracc_PI __builtin_HEXAGON_S2_lsr_i_p_acc + +/* ========================================================================== + Assembly Syntax: Rxx32&=lsr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_lsrand_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsrand_PI __builtin_HEXAGON_S2_lsr_i_p_and + +/* ========================================================================== + Assembly Syntax: Rxx32-=lsr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_lsrnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsrnac_PI __builtin_HEXAGON_S2_lsr_i_p_nac + +/* ========================================================================== + Assembly Syntax: Rxx32|=lsr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_lsror_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsror_PI __builtin_HEXAGON_S2_lsr_i_p_or + +/* ========================================================================== + Assembly Syntax: Rxx32^=lsr(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_lsrxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsrxacc_PI __builtin_HEXAGON_S2_lsr_i_p_xacc + +/* ========================================================================== + Assembly Syntax: Rd32=lsr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_lsr_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsr_RI __builtin_HEXAGON_S2_lsr_i_r + +/* ========================================================================== + Assembly Syntax: Rx32+=lsr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_lsracc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsracc_RI __builtin_HEXAGON_S2_lsr_i_r_acc + +/* ========================================================================== + Assembly Syntax: Rx32&=lsr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_lsrand_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsrand_RI __builtin_HEXAGON_S2_lsr_i_r_and + +/* ========================================================================== + Assembly Syntax: Rx32-=lsr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_lsrnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsrnac_RI __builtin_HEXAGON_S2_lsr_i_r_nac + +/* ========================================================================== + Assembly Syntax: Rx32|=lsr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_lsror_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsror_RI __builtin_HEXAGON_S2_lsr_i_r_or + +/* ========================================================================== + Assembly Syntax: Rx32^=lsr(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_lsrxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsrxacc_RI __builtin_HEXAGON_S2_lsr_i_r_xacc + +/* ========================================================================== + Assembly Syntax: Rdd32=vlsrh(Rss32,#u4) + C Intrinsic Prototype: Word64 Q6_P_vlsrh_PI(Word64 Rss, Word32 Iu4) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vlsrh_PI __builtin_HEXAGON_S2_lsr_i_vh + +/* ========================================================================== + Assembly Syntax: Rdd32=vlsrw(Rss32,#u5) + C Intrinsic Prototype: Word64 Q6_P_vlsrw_PI(Word64 Rss, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vlsrw_PI __builtin_HEXAGON_S2_lsr_i_vw + +/* ========================================================================== + Assembly Syntax: Rdd32=lsr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lsr_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsr_PR __builtin_HEXAGON_S2_lsr_r_p + +/* ========================================================================== + Assembly Syntax: Rxx32+=lsr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lsracc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsracc_PR __builtin_HEXAGON_S2_lsr_r_p_acc + +/* ========================================================================== + Assembly Syntax: Rxx32&=lsr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lsrand_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsrand_PR __builtin_HEXAGON_S2_lsr_r_p_and + +/* ========================================================================== + Assembly Syntax: Rxx32-=lsr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lsrnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsrnac_PR __builtin_HEXAGON_S2_lsr_r_p_nac + +/* ========================================================================== + Assembly Syntax: Rxx32|=lsr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lsror_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsror_PR __builtin_HEXAGON_S2_lsr_r_p_or + +/* ========================================================================== + Assembly Syntax: Rxx32^=lsr(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_lsrxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_lsrxacc_PR __builtin_HEXAGON_S2_lsr_r_p_xor + +/* ========================================================================== + Assembly Syntax: Rd32=lsr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lsr_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsr_RR __builtin_HEXAGON_S2_lsr_r_r + +/* ========================================================================== + Assembly Syntax: Rx32+=lsr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lsracc_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsracc_RR __builtin_HEXAGON_S2_lsr_r_r_acc + +/* ========================================================================== + Assembly Syntax: Rx32&=lsr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lsrand_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsrand_RR __builtin_HEXAGON_S2_lsr_r_r_and + +/* ========================================================================== + Assembly Syntax: Rx32-=lsr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lsrnac_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsrnac_RR __builtin_HEXAGON_S2_lsr_r_r_nac + +/* ========================================================================== + Assembly Syntax: Rx32|=lsr(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lsror_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsror_RR __builtin_HEXAGON_S2_lsr_r_r_or + +/* ========================================================================== + Assembly Syntax: Rdd32=vlsrh(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vlsrh_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vlsrh_PR __builtin_HEXAGON_S2_lsr_r_vh + +/* ========================================================================== + Assembly Syntax: Rdd32=vlsrw(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vlsrw_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vlsrw_PR __builtin_HEXAGON_S2_lsr_r_vw + +/* ========================================================================== + Assembly Syntax: Rdd32=packhl(Rs32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_packhl_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU32_3op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_packhl_RR __builtin_HEXAGON_S2_packhl + +/* ========================================================================== + Assembly Syntax: Rd32=parity(Rss32,Rtt32) + C Intrinsic Prototype: Word32 Q6_R_parity_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_parity_PP __builtin_HEXAGON_S2_parityp + +/* ========================================================================== + Assembly Syntax: Rd32=setbit(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_setbit_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_setbit_RI __builtin_HEXAGON_S2_setbit_i + +/* ========================================================================== + Assembly Syntax: Rd32=setbit(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_setbit_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_setbit_RR __builtin_HEXAGON_S2_setbit_r + +/* ========================================================================== + Assembly Syntax: Rdd32=shuffeb(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_shuffeb_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_shuffeb_PP __builtin_HEXAGON_S2_shuffeb + +/* ========================================================================== + Assembly Syntax: Rdd32=shuffeh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_shuffeh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_shuffeh_PP __builtin_HEXAGON_S2_shuffeh + +/* ========================================================================== + Assembly Syntax: Rdd32=shuffob(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_shuffob_PP(Word64 Rtt, Word64 Rss) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_shuffob_PP __builtin_HEXAGON_S2_shuffob + +/* ========================================================================== + Assembly Syntax: Rdd32=shuffoh(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_shuffoh_PP(Word64 Rtt, Word64 Rss) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_shuffoh_PP __builtin_HEXAGON_S2_shuffoh + +/* ========================================================================== + Assembly Syntax: memb(Rx32++#s4:0:circ(Mu2))=Rt32 + C Intrinsic Prototype: void Q6_memb_IMR_circ(void** Rx, Word32 Is4_0, Word32 Mu, Word32 Rt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memb_IMR_circ __builtin_HEXAGON_S2_storerb_pci + +/* ========================================================================== + Assembly Syntax: memb(Rx32++I:circ(Mu2))=Rt32 + C Intrinsic Prototype: void Q6_memb_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memb_MR_circ __builtin_HEXAGON_S2_storerb_pcr + +/* ========================================================================== + Assembly Syntax: memd(Rx32++#s4:3:circ(Mu2))=Rtt32 + C Intrinsic Prototype: void Q6_memd_IMP_circ(void** Rx, Word32 Is4_3, Word32 Mu, Word64 Rtt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memd_IMP_circ __builtin_HEXAGON_S2_storerd_pci + +/* ========================================================================== + Assembly Syntax: memd(Rx32++I:circ(Mu2))=Rtt32 + C Intrinsic Prototype: void Q6_memd_MP_circ(void** Rx, Word32 Mu, Word64 Rtt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memd_MP_circ __builtin_HEXAGON_S2_storerd_pcr + +/* ========================================================================== + Assembly Syntax: memh(Rx32++#s4:1:circ(Mu2))=Rt32.h + C Intrinsic Prototype: void Q6_memh_IMRh_circ(void** Rx, Word32 Is4_1, Word32 Mu, Word32 Rt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memh_IMRh_circ __builtin_HEXAGON_S2_storerf_pci + +/* ========================================================================== + Assembly Syntax: memh(Rx32++I:circ(Mu2))=Rt32.h + C Intrinsic Prototype: void Q6_memh_MRh_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memh_MRh_circ __builtin_HEXAGON_S2_storerf_pcr + +/* ========================================================================== + Assembly Syntax: memh(Rx32++#s4:1:circ(Mu2))=Rt32 + C Intrinsic Prototype: void Q6_memh_IMR_circ(void** Rx, Word32 Is4_1, Word32 Mu, Word32 Rt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memh_IMR_circ __builtin_HEXAGON_S2_storerh_pci + +/* ========================================================================== + Assembly Syntax: memh(Rx32++I:circ(Mu2))=Rt32 + C Intrinsic Prototype: void Q6_memh_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memh_MR_circ __builtin_HEXAGON_S2_storerh_pcr + +/* ========================================================================== + Assembly Syntax: memw(Rx32++#s4:2:circ(Mu2))=Rt32 + C Intrinsic Prototype: void Q6_memw_IMR_circ(void** Rx, Word32 Is4_2, Word32 Mu, Word32 Rt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memw_IMR_circ __builtin_HEXAGON_S2_storeri_pci + +/* ========================================================================== + Assembly Syntax: memw(Rx32++I:circ(Mu2))=Rt32 + C Intrinsic Prototype: void Q6_memw_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress) + Instruction Type: ST + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_memw_MR_circ __builtin_HEXAGON_S2_storeri_pcr + +/* ========================================================================== + Assembly Syntax: Rd32=vsathb(Rs32) + C Intrinsic Prototype: Word32 Q6_R_vsathb_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vsathb_R __builtin_HEXAGON_S2_svsathb + +/* ========================================================================== + Assembly Syntax: Rd32=vsathub(Rs32) + C Intrinsic Prototype: Word32 Q6_R_vsathub_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vsathub_R __builtin_HEXAGON_S2_svsathub + +/* ========================================================================== + Assembly Syntax: Rx32=tableidxb(Rs32,#u4,#U5) + C Intrinsic Prototype: Word32 Q6_R_tableidxb_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) + Instruction Type: S_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_tableidxb_RII __builtin_HEXAGON_S2_tableidxb_goodsyntax + +/* ========================================================================== + Assembly Syntax: Rx32=tableidxd(Rs32,#u4,#U5) + C Intrinsic Prototype: Word32 Q6_R_tableidxd_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) + Instruction Type: S_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_tableidxd_RII __builtin_HEXAGON_S2_tableidxd_goodsyntax + +/* ========================================================================== + Assembly Syntax: Rx32=tableidxh(Rs32,#u4,#U5) + C Intrinsic Prototype: Word32 Q6_R_tableidxh_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) + Instruction Type: S_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_tableidxh_RII __builtin_HEXAGON_S2_tableidxh_goodsyntax + +/* ========================================================================== + Assembly Syntax: Rx32=tableidxw(Rs32,#u4,#U5) + C Intrinsic Prototype: Word32 Q6_R_tableidxw_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5) + Instruction Type: S_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_tableidxw_RII __builtin_HEXAGON_S2_tableidxw_goodsyntax + +/* ========================================================================== + Assembly Syntax: Rd32=togglebit(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_togglebit_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_togglebit_RI __builtin_HEXAGON_S2_togglebit_i + +/* ========================================================================== + Assembly Syntax: Rd32=togglebit(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_togglebit_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_togglebit_RR __builtin_HEXAGON_S2_togglebit_r + +/* ========================================================================== + Assembly Syntax: Pd4=tstbit(Rs32,#u5) + C Intrinsic Prototype: Byte Q6_p_tstbit_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_tstbit_RI __builtin_HEXAGON_S2_tstbit_i + +/* ========================================================================== + Assembly Syntax: Pd4=tstbit(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_tstbit_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_tstbit_RR __builtin_HEXAGON_S2_tstbit_r + +/* ========================================================================== + Assembly Syntax: Rdd32=valignb(Rtt32,Rss32,#u3) + C Intrinsic Prototype: Word64 Q6_P_valignb_PPI(Word64 Rtt, Word64 Rss, Word32 Iu3) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_valignb_PPI __builtin_HEXAGON_S2_valignib + +/* ========================================================================== + Assembly Syntax: Rdd32=valignb(Rtt32,Rss32,Pu4) + C Intrinsic Prototype: Word64 Q6_P_valignb_PPp(Word64 Rtt, Word64 Rss, Byte Pu) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_valignb_PPp __builtin_HEXAGON_S2_valignrb + +/* ========================================================================== + Assembly Syntax: Rdd32=vcnegh(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vcnegh_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vcnegh_PR __builtin_HEXAGON_S2_vcnegh + +/* ========================================================================== + Assembly Syntax: Rdd32=vcrotate(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vcrotate_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vcrotate_PR __builtin_HEXAGON_S2_vcrotate + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrcnegh(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_vrcneghacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcneghacc_PR __builtin_HEXAGON_S2_vrcnegh + +/* ========================================================================== + Assembly Syntax: Rd32=vrndwh(Rss32) + C Intrinsic Prototype: Word32 Q6_R_vrndwh_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vrndwh_P __builtin_HEXAGON_S2_vrndpackwh + +/* ========================================================================== + Assembly Syntax: Rd32=vrndwh(Rss32):sat + C Intrinsic Prototype: Word32 Q6_R_vrndwh_P_sat(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vrndwh_P_sat __builtin_HEXAGON_S2_vrndpackwhs + +/* ========================================================================== + Assembly Syntax: Rd32=vsathb(Rss32) + C Intrinsic Prototype: Word32 Q6_R_vsathb_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vsathb_P __builtin_HEXAGON_S2_vsathb + +/* ========================================================================== + Assembly Syntax: Rdd32=vsathb(Rss32) + C Intrinsic Prototype: Word64 Q6_P_vsathb_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsathb_P __builtin_HEXAGON_S2_vsathb_nopack + +/* ========================================================================== + Assembly Syntax: Rd32=vsathub(Rss32) + C Intrinsic Prototype: Word32 Q6_R_vsathub_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vsathub_P __builtin_HEXAGON_S2_vsathub + +/* ========================================================================== + Assembly Syntax: Rdd32=vsathub(Rss32) + C Intrinsic Prototype: Word64 Q6_P_vsathub_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsathub_P __builtin_HEXAGON_S2_vsathub_nopack + +/* ========================================================================== + Assembly Syntax: Rd32=vsatwh(Rss32) + C Intrinsic Prototype: Word32 Q6_R_vsatwh_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vsatwh_P __builtin_HEXAGON_S2_vsatwh + +/* ========================================================================== + Assembly Syntax: Rdd32=vsatwh(Rss32) + C Intrinsic Prototype: Word64 Q6_P_vsatwh_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsatwh_P __builtin_HEXAGON_S2_vsatwh_nopack + +/* ========================================================================== + Assembly Syntax: Rd32=vsatwuh(Rss32) + C Intrinsic Prototype: Word32 Q6_R_vsatwuh_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vsatwuh_P __builtin_HEXAGON_S2_vsatwuh + +/* ========================================================================== + Assembly Syntax: Rdd32=vsatwuh(Rss32) + C Intrinsic Prototype: Word64 Q6_P_vsatwuh_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsatwuh_P __builtin_HEXAGON_S2_vsatwuh_nopack + +/* ========================================================================== + Assembly Syntax: Rd32=vsplatb(Rs32) + C Intrinsic Prototype: Word32 Q6_R_vsplatb_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vsplatb_R __builtin_HEXAGON_S2_vsplatrb + +/* ========================================================================== + Assembly Syntax: Rdd32=vsplath(Rs32) + C Intrinsic Prototype: Word64 Q6_P_vsplath_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsplath_R __builtin_HEXAGON_S2_vsplatrh + +/* ========================================================================== + Assembly Syntax: Rdd32=vspliceb(Rss32,Rtt32,#u3) + C Intrinsic Prototype: Word64 Q6_P_vspliceb_PPI(Word64 Rss, Word64 Rtt, Word32 Iu3) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vspliceb_PPI __builtin_HEXAGON_S2_vspliceib + +/* ========================================================================== + Assembly Syntax: Rdd32=vspliceb(Rss32,Rtt32,Pu4) + C Intrinsic Prototype: Word64 Q6_P_vspliceb_PPp(Word64 Rss, Word64 Rtt, Byte Pu) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vspliceb_PPp __builtin_HEXAGON_S2_vsplicerb + +/* ========================================================================== + Assembly Syntax: Rdd32=vsxtbh(Rs32) + C Intrinsic Prototype: Word64 Q6_P_vsxtbh_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsxtbh_R __builtin_HEXAGON_S2_vsxtbh + +/* ========================================================================== + Assembly Syntax: Rdd32=vsxthw(Rs32) + C Intrinsic Prototype: Word64 Q6_P_vsxthw_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsxthw_R __builtin_HEXAGON_S2_vsxthw + +/* ========================================================================== + Assembly Syntax: Rd32=vtrunehb(Rss32) + C Intrinsic Prototype: Word32 Q6_R_vtrunehb_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vtrunehb_P __builtin_HEXAGON_S2_vtrunehb + +/* ========================================================================== + Assembly Syntax: Rdd32=vtrunewh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vtrunewh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vtrunewh_PP __builtin_HEXAGON_S2_vtrunewh + +/* ========================================================================== + Assembly Syntax: Rd32=vtrunohb(Rss32) + C Intrinsic Prototype: Word32 Q6_R_vtrunohb_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vtrunohb_P __builtin_HEXAGON_S2_vtrunohb + +/* ========================================================================== + Assembly Syntax: Rdd32=vtrunowh(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vtrunowh_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vtrunowh_PP __builtin_HEXAGON_S2_vtrunowh + +/* ========================================================================== + Assembly Syntax: Rdd32=vzxtbh(Rs32) + C Intrinsic Prototype: Word64 Q6_P_vzxtbh_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vzxtbh_R __builtin_HEXAGON_S2_vzxtbh + +/* ========================================================================== + Assembly Syntax: Rdd32=vzxthw(Rs32) + C Intrinsic Prototype: Word64 Q6_P_vzxthw_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vzxthw_R __builtin_HEXAGON_S2_vzxthw + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rs32,add(Ru32,#s6)) + C Intrinsic Prototype: Word32 Q6_R_add_add_RRI(Word32 Rs, Word32 Ru, Word32 Is6) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_add_RRI __builtin_HEXAGON_S4_addaddi + +/* ========================================================================== + Assembly Syntax: Rx32=add(#u8,asl(Rx32,#U5)) + C Intrinsic Prototype: Word32 Q6_R_add_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_asl_IRI __builtin_HEXAGON_S4_addi_asl_ri + +/* ========================================================================== + Assembly Syntax: Rx32=add(#u8,lsr(Rx32,#U5)) + C Intrinsic Prototype: Word32 Q6_R_add_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_lsr_IRI __builtin_HEXAGON_S4_addi_lsr_ri + +/* ========================================================================== + Assembly Syntax: Rx32=and(#u8,asl(Rx32,#U5)) + C Intrinsic Prototype: Word32 Q6_R_and_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_and_asl_IRI __builtin_HEXAGON_S4_andi_asl_ri + +/* ========================================================================== + Assembly Syntax: Rx32=and(#u8,lsr(Rx32,#U5)) + C Intrinsic Prototype: Word32 Q6_R_and_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_and_lsr_IRI __builtin_HEXAGON_S4_andi_lsr_ri + +/* ========================================================================== + Assembly Syntax: Rd32=add(clb(Rs32),#s6) + C Intrinsic Prototype: Word32 Q6_R_add_clb_RI(Word32 Rs, Word32 Is6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_clb_RI __builtin_HEXAGON_S4_clbaddi + +/* ========================================================================== + Assembly Syntax: Rd32=add(clb(Rss32),#s6) + C Intrinsic Prototype: Word32 Q6_R_add_clb_PI(Word64 Rss, Word32 Is6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_clb_PI __builtin_HEXAGON_S4_clbpaddi + +/* ========================================================================== + Assembly Syntax: Rd32=normamt(Rss32) + C Intrinsic Prototype: Word32 Q6_R_normamt_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_normamt_P __builtin_HEXAGON_S4_clbpnorm + +/* ========================================================================== + Assembly Syntax: Rd32=extract(Rs32,#u5,#U5) + C Intrinsic Prototype: Word32 Q6_R_extract_RII(Word32 Rs, Word32 Iu5, Word32 IU5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_extract_RII __builtin_HEXAGON_S4_extract + +/* ========================================================================== + Assembly Syntax: Rd32=extract(Rs32,Rtt32) + C Intrinsic Prototype: Word32 Q6_R_extract_RP(Word32 Rs, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_extract_RP __builtin_HEXAGON_S4_extract_rp + +/* ========================================================================== + Assembly Syntax: Rdd32=extract(Rss32,#u6,#U6) + C Intrinsic Prototype: Word64 Q6_P_extract_PII(Word64 Rss, Word32 Iu6, Word32 IU6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_extract_PII __builtin_HEXAGON_S4_extractp + +/* ========================================================================== + Assembly Syntax: Rdd32=extract(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_extract_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_extract_PP __builtin_HEXAGON_S4_extractp_rp + +/* ========================================================================== + Assembly Syntax: Rd32=lsl(#s6,Rt32) + C Intrinsic Prototype: Word32 Q6_R_lsl_IR(Word32 Is6, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_lsl_IR __builtin_HEXAGON_S4_lsli + +/* ========================================================================== + Assembly Syntax: Pd4=!tstbit(Rs32,#u5) + C Intrinsic Prototype: Byte Q6_p_not_tstbit_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_not_tstbit_RI __builtin_HEXAGON_S4_ntstbit_i + +/* ========================================================================== + Assembly Syntax: Pd4=!tstbit(Rs32,Rt32) + C Intrinsic Prototype: Byte Q6_p_not_tstbit_RR(Word32 Rs, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_not_tstbit_RR __builtin_HEXAGON_S4_ntstbit_r + +/* ========================================================================== + Assembly Syntax: Rx32|=and(Rs32,#s10) + C Intrinsic Prototype: Word32 Q6_R_andor_RI(Word32 Rx, Word32 Rs, Word32 Is10) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_andor_RI __builtin_HEXAGON_S4_or_andi + +/* ========================================================================== + Assembly Syntax: Rx32=or(Ru32,and(Rx32,#s10)) + C Intrinsic Prototype: Word32 Q6_R_or_and_RRI(Word32 Ru, Word32 Rx, Word32 Is10) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_or_and_RRI __builtin_HEXAGON_S4_or_andix + +/* ========================================================================== + Assembly Syntax: Rx32|=or(Rs32,#s10) + C Intrinsic Prototype: Word32 Q6_R_oror_RI(Word32 Rx, Word32 Rs, Word32 Is10) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_oror_RI __builtin_HEXAGON_S4_or_ori + +/* ========================================================================== + Assembly Syntax: Rx32=or(#u8,asl(Rx32,#U5)) + C Intrinsic Prototype: Word32 Q6_R_or_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_or_asl_IRI __builtin_HEXAGON_S4_ori_asl_ri + +/* ========================================================================== + Assembly Syntax: Rx32=or(#u8,lsr(Rx32,#U5)) + C Intrinsic Prototype: Word32 Q6_R_or_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_or_lsr_IRI __builtin_HEXAGON_S4_ori_lsr_ri + +/* ========================================================================== + Assembly Syntax: Rd32=parity(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_parity_RR(Word32 Rs, Word32 Rt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_parity_RR __builtin_HEXAGON_S4_parity + +/* ========================================================================== + Assembly Syntax: Rd32=add(Rs32,sub(#s6,Ru32)) + C Intrinsic Prototype: Word32 Q6_R_add_sub_RIR(Word32 Rs, Word32 Is6, Word32 Ru) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_add_sub_RIR __builtin_HEXAGON_S4_subaddi + +/* ========================================================================== + Assembly Syntax: Rx32=sub(#u8,asl(Rx32,#U5)) + C Intrinsic Prototype: Word32 Q6_R_sub_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_asl_IRI __builtin_HEXAGON_S4_subi_asl_ri + +/* ========================================================================== + Assembly Syntax: Rx32=sub(#u8,lsr(Rx32,#U5)) + C Intrinsic Prototype: Word32 Q6_R_sub_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_sub_lsr_IRI __builtin_HEXAGON_S4_subi_lsr_ri + +/* ========================================================================== + Assembly Syntax: Rdd32=vrcrotate(Rss32,Rt32,#u2) + C Intrinsic Prototype: Word64 Q6_P_vrcrotate_PRI(Word64 Rss, Word32 Rt, Word32 Iu2) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcrotate_PRI __builtin_HEXAGON_S4_vrcrotate + +/* ========================================================================== + Assembly Syntax: Rxx32+=vrcrotate(Rss32,Rt32,#u2) + C Intrinsic Prototype: Word64 Q6_P_vrcrotateacc_PRI(Word64 Rxx, Word64 Rss, Word32 Rt, Word32 Iu2) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vrcrotateacc_PRI __builtin_HEXAGON_S4_vrcrotate_acc + +/* ========================================================================== + Assembly Syntax: Rdd32=vxaddsubh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vxaddsubh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vxaddsubh_PP_sat __builtin_HEXAGON_S4_vxaddsubh + +/* ========================================================================== + Assembly Syntax: Rdd32=vxaddsubh(Rss32,Rtt32):rnd:>>1:sat + C Intrinsic Prototype: Word64 Q6_P_vxaddsubh_PP_rnd_rs1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vxaddsubh_PP_rnd_rs1_sat __builtin_HEXAGON_S4_vxaddsubhr + +/* ========================================================================== + Assembly Syntax: Rdd32=vxaddsubw(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vxaddsubw_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vxaddsubw_PP_sat __builtin_HEXAGON_S4_vxaddsubw + +/* ========================================================================== + Assembly Syntax: Rdd32=vxsubaddh(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vxsubaddh_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vxsubaddh_PP_sat __builtin_HEXAGON_S4_vxsubaddh + +/* ========================================================================== + Assembly Syntax: Rdd32=vxsubaddh(Rss32,Rtt32):rnd:>>1:sat + C Intrinsic Prototype: Word64 Q6_P_vxsubaddh_PP_rnd_rs1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vxsubaddh_PP_rnd_rs1_sat __builtin_HEXAGON_S4_vxsubaddhr + +/* ========================================================================== + Assembly Syntax: Rdd32=vxsubaddw(Rss32,Rtt32):sat + C Intrinsic Prototype: Word64 Q6_P_vxsubaddw_PP_sat(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vxsubaddw_PP_sat __builtin_HEXAGON_S4_vxsubaddw + +/* ========================================================================== + Assembly Syntax: Rd32=vasrhub(Rss32,#u4):rnd:sat + C Intrinsic Prototype: Word32 Q6_R_vasrhub_PI_rnd_sat(Word64 Rss, Word32 Iu4) + Instruction Type: S_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_R_vasrhub_PI_rnd_sat __builtin_HEXAGON_S5_asrhub_rnd_sat_goodsyntax + +/* ========================================================================== + Assembly Syntax: Rd32=vasrhub(Rss32,#u4):sat + C Intrinsic Prototype: Word32 Q6_R_vasrhub_PI_sat(Word64 Rss, Word32 Iu4) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_vasrhub_PI_sat __builtin_HEXAGON_S5_asrhub_sat + +/* ========================================================================== + Assembly Syntax: Rd32=popcount(Rss32) + C Intrinsic Prototype: Word32 Q6_R_popcount_P(Word64 Rss) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_popcount_P __builtin_HEXAGON_S5_popcountp + +/* ========================================================================== + Assembly Syntax: Rdd32=vasrh(Rss32,#u4):rnd + C Intrinsic Prototype: Word64 Q6_P_vasrh_PI_rnd(Word64 Rss, Word32 Iu4) + Instruction Type: S_2op + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_P_vasrh_PI_rnd __builtin_HEXAGON_S5_vasrhrnd_goodsyntax + +/* ========================================================================== + Assembly Syntax: dccleana(Rs32) + C Intrinsic Prototype: void Q6_dccleana_A(Address Rs) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_dccleana_A __builtin_HEXAGON_Y2_dccleana + +/* ========================================================================== + Assembly Syntax: dccleaninva(Rs32) + C Intrinsic Prototype: void Q6_dccleaninva_A(Address Rs) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_dccleaninva_A __builtin_HEXAGON_Y2_dccleaninva + +/* ========================================================================== + Assembly Syntax: dcfetch(Rs32) + C Intrinsic Prototype: void Q6_dcfetch_A(Address Rs) + Instruction Type: MAPPING + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_dcfetch_A __builtin_HEXAGON_Y2_dcfetch + +/* ========================================================================== + Assembly Syntax: dcinva(Rs32) + C Intrinsic Prototype: void Q6_dcinva_A(Address Rs) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_dcinva_A __builtin_HEXAGON_Y2_dcinva + +/* ========================================================================== + Assembly Syntax: dczeroa(Rs32) + C Intrinsic Prototype: void Q6_dczeroa_A(Address Rs) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_dczeroa_A __builtin_HEXAGON_Y2_dczeroa + +/* ========================================================================== + Assembly Syntax: l2fetch(Rs32,Rt32) + C Intrinsic Prototype: void Q6_l2fetch_AR(Address Rs, Word32 Rt) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_l2fetch_AR __builtin_HEXAGON_Y4_l2fetch + +/* ========================================================================== + Assembly Syntax: l2fetch(Rs32,Rtt32) + C Intrinsic Prototype: void Q6_l2fetch_AP(Address Rs, Word64 Rtt) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_l2fetch_AP __builtin_HEXAGON_Y5_l2fetch + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rdd32=rol(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_rol_PI(Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_rol_PI __builtin_HEXAGON_S6_rol_i_p +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rxx32+=rol(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_rolacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_rolacc_PI __builtin_HEXAGON_S6_rol_i_p_acc +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rxx32&=rol(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_roland_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_roland_PI __builtin_HEXAGON_S6_rol_i_p_and +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rxx32-=rol(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_rolnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_rolnac_PI __builtin_HEXAGON_S6_rol_i_p_nac +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rxx32|=rol(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_rolor_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_rolor_PI __builtin_HEXAGON_S6_rol_i_p_or +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rxx32^=rol(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_rolxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_rolxacc_PI __builtin_HEXAGON_S6_rol_i_p_xacc +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rd32=rol(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_rol_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_rol_RI __builtin_HEXAGON_S6_rol_i_r +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rx32+=rol(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_rolacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_rolacc_RI __builtin_HEXAGON_S6_rol_i_r_acc +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rx32&=rol(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_roland_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_roland_RI __builtin_HEXAGON_S6_rol_i_r_and +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rx32-=rol(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_rolnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_rolnac_RI __builtin_HEXAGON_S6_rol_i_r_nac +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rx32|=rol(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_rolor_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_rolor_RI __builtin_HEXAGON_S6_rol_i_r_or +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rx32^=rol(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_rolxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_rolxacc_RI __builtin_HEXAGON_S6_rol_i_r_xacc +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HEXAGON_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Rdd32=vabsdiffb(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vabsdiffb_PP(Word64 Rtt, Word64 Rss) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vabsdiffb_PP __builtin_HEXAGON_M6_vabsdiffb +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HEXAGON_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Rdd32=vabsdiffub(Rtt32,Rss32) + C Intrinsic Prototype: Word64 Q6_P_vabsdiffub_PP(Word64 Rtt, Word64 Rss) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vabsdiffub_PP __builtin_HEXAGON_M6_vabsdiffub +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HEXAGON_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Rdd32=vsplatb(Rs32) + C Intrinsic Prototype: Word64 Q6_P_vsplatb_R(Word32 Rs) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vsplatb_R __builtin_HEXAGON_S6_vsplatrbp +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HEXAGON_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Rdd32=vtrunehb(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vtrunehb_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vtrunehb_PP __builtin_HEXAGON_S6_vtrunehb_ppp +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HEXAGON_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Rdd32=vtrunohb(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vtrunohb_PP(Word64 Rss, Word64 Rtt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vtrunohb_PP __builtin_HEXAGON_S6_vtrunohb_ppp +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HEXAGON_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32=vmem(Rt32):nt + C Intrinsic Prototype: HVX_Vector Q6_V_vmem_R_nt(Word32 Rt) + Instruction Type: MAPPING + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vmem_R_nt __builtin_HEXAGON_V6_ldntnt0 +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HEXAGON_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Pd4=!any8(vcmpb.eq(Rss32,Rtt32)) + C Intrinsic Prototype: Byte Q6_p_not_any8_vcmpb_eq_PP(Word64 Rss, Word64 Rtt) + Instruction Type: ALU64 + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_p_not_any8_vcmpb_eq_PP __builtin_HEXAGON_A6_vcmpbeq_notany +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HEXAGON_ARCH__ >= 66 +/* ========================================================================== + Assembly Syntax: Rdd32=dfadd(Rss32,Rtt32) + C Intrinsic Prototype: Float64 Q6_P_dfadd_PP(Float64 Rss, Float64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfadd_PP __builtin_HEXAGON_F2_dfadd +#endif /* __HEXAGON_ARCH___ >= 66 */ + +#if __HEXAGON_ARCH__ >= 66 +/* ========================================================================== + Assembly Syntax: Rdd32=dfsub(Rss32,Rtt32) + C Intrinsic Prototype: Float64 Q6_P_dfsub_PP(Float64 Rss, Float64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfsub_PP __builtin_HEXAGON_F2_dfsub +#endif /* __HEXAGON_ARCH___ >= 66 */ + +#if __HEXAGON_ARCH__ >= 66 +/* ========================================================================== + Assembly Syntax: Rx32-=mpyi(Rs32,Rt32) + C Intrinsic Prototype: Word32 Q6_R_mpyinac_RR(Word32 Rx, Word32 Rs, Word32 Rt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mpyinac_RR __builtin_HEXAGON_M2_mnaci +#endif /* __HEXAGON_ARCH___ >= 66 */ + +#if __HEXAGON_ARCH__ >= 66 +/* ========================================================================== + Assembly Syntax: Rd32=mask(#u5,#U5) + C Intrinsic Prototype: Word32 Q6_R_mask_II(Word32 Iu5, Word32 IU5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_mask_II __builtin_HEXAGON_S2_mask +#endif /* __HEXAGON_ARCH___ >= 66 */ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=clip(Rs32,#u5) + C Intrinsic Prototype: Word32 Q6_R_clip_RI(Word32 Rs, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_R_clip_RI __builtin_HEXAGON_A7_clip +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rdd32=cround(Rss32,#u6) + C Intrinsic Prototype: Word64 Q6_P_cround_PI(Word64 Rss, Word32 Iu6) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cround_PI __builtin_HEXAGON_A7_croundd_ri +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rdd32=cround(Rss32,Rt32) + C Intrinsic Prototype: Word64 Q6_P_cround_PR(Word64 Rss, Word32 Rt) + Instruction Type: S_3op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_cround_PR __builtin_HEXAGON_A7_croundd_rr +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rdd32=vclip(Rss32,#u5) + C Intrinsic Prototype: Word64 Q6_P_vclip_PI(Word64 Rss, Word32 Iu5) + Instruction Type: S_2op + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_vclip_PI __builtin_HEXAGON_A7_vclip +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 +/* ========================================================================== + Assembly Syntax: Rdd32=dfmax(Rss32,Rtt32) + C Intrinsic Prototype: Float64 Q6_P_dfmax_PP(Float64 Rss, Float64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfmax_PP __builtin_HEXAGON_F2_dfmax +#endif /* __HEXAGON_ARCH___ >= 67 */ + +#if __HEXAGON_ARCH__ >= 67 +/* ========================================================================== + Assembly Syntax: Rdd32=dfmin(Rss32,Rtt32) + C Intrinsic Prototype: Float64 Q6_P_dfmin_PP(Float64 Rss, Float64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfmin_PP __builtin_HEXAGON_F2_dfmin +#endif /* __HEXAGON_ARCH___ >= 67 */ + +#if __HEXAGON_ARCH__ >= 67 +/* ========================================================================== + Assembly Syntax: Rdd32=dfmpyfix(Rss32,Rtt32) + C Intrinsic Prototype: Float64 Q6_P_dfmpyfix_PP(Float64 Rss, Float64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfmpyfix_PP __builtin_HEXAGON_F2_dfmpyfix +#endif /* __HEXAGON_ARCH___ >= 67 */ + +#if __HEXAGON_ARCH__ >= 67 +/* ========================================================================== + Assembly Syntax: Rxx32+=dfmpyhh(Rss32,Rtt32) + C Intrinsic Prototype: Float64 Q6_P_dfmpyhhacc_PP(Float64 Rxx, Float64 Rss, Float64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfmpyhhacc_PP __builtin_HEXAGON_F2_dfmpyhh +#endif /* __HEXAGON_ARCH___ >= 67 */ + +#if __HEXAGON_ARCH__ >= 67 +/* ========================================================================== + Assembly Syntax: Rxx32+=dfmpylh(Rss32,Rtt32) + C Intrinsic Prototype: Float64 Q6_P_dfmpylhacc_PP(Float64 Rxx, Float64 Rss, Float64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfmpylhacc_PP __builtin_HEXAGON_F2_dfmpylh +#endif /* __HEXAGON_ARCH___ >= 67 */ + +#if __HEXAGON_ARCH__ >= 67 +/* ========================================================================== + Assembly Syntax: Rdd32=dfmpyll(Rss32,Rtt32) + C Intrinsic Prototype: Float64 Q6_P_dfmpyll_PP(Float64 Rss, Float64 Rtt) + Instruction Type: M + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_P_dfmpyll_PP __builtin_HEXAGON_F2_dfmpyll +#endif /* __HEXAGON_ARCH___ >= 67 */ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rdd32=cmpyiw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_cmpyiw_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_cmpyiw_PP __builtin_HEXAGON_M7_dcmpyiw +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpyiw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_cmpyiwacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_cmpyiwacc_PP __builtin_HEXAGON_M7_dcmpyiw_acc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rdd32=cmpyiw(Rss32,Rtt32*) + C Intrinsic Prototype: Word64 Q6_P_cmpyiw_PP_conj(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_cmpyiw_PP_conj __builtin_HEXAGON_M7_dcmpyiwc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpyiw(Rss32,Rtt32*) + C Intrinsic Prototype: Word64 Q6_P_cmpyiwacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_cmpyiwacc_PP_conj __builtin_HEXAGON_M7_dcmpyiwc_acc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rdd32=cmpyrw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_cmpyrw_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_cmpyrw_PP __builtin_HEXAGON_M7_dcmpyrw +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpyrw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_cmpyrwacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_cmpyrwacc_PP __builtin_HEXAGON_M7_dcmpyrw_acc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rdd32=cmpyrw(Rss32,Rtt32*) + C Intrinsic Prototype: Word64 Q6_P_cmpyrw_PP_conj(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_cmpyrw_PP_conj __builtin_HEXAGON_M7_dcmpyrwc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rxx32+=cmpyrw(Rss32,Rtt32*) + C Intrinsic Prototype: Word64 Q6_P_cmpyrwacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_cmpyrwacc_PP_conj __builtin_HEXAGON_M7_dcmpyrwc_acc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rdd32=vdmpyw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vdmpyw_PP(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_vdmpyw_PP __builtin_HEXAGON_M7_vdmpy +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rxx32+=vdmpyw(Rss32,Rtt32) + C Intrinsic Prototype: Word64 Q6_P_vdmpywacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_P_vdmpywacc_PP __builtin_HEXAGON_M7_vdmpy_acc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_R_cmpyiw_PP_s1_sat __builtin_HEXAGON_M7_wcmpyiw +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_R_cmpyiw_PP_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyiw_rnd +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32*):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_conj_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_R_cmpyiw_PP_conj_s1_sat __builtin_HEXAGON_M7_wcmpyiwc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=cmpyiw(Rss32,Rtt32*):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_conj_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_R_cmpyiw_PP_conj_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyiwc_rnd +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_R_cmpyrw_PP_s1_sat __builtin_HEXAGON_M7_wcmpyrw +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_R_cmpyrw_PP_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyrw_rnd +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32*):<<1:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_conj_s1_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_R_cmpyrw_PP_conj_s1_sat __builtin_HEXAGON_M7_wcmpyrwc +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__ +/* ========================================================================== + Assembly Syntax: Rd32=cmpyrw(Rss32,Rtt32*):<<1:rnd:sat + C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_conj_s1_rnd_sat(Word64 Rss, Word64 Rtt) + Instruction Type: M + Execution Slots: SLOT3 + ========================================================================== */ + +#define Q6_R_cmpyrw_PP_conj_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyrwc_rnd +#endif /* __HEXAGON_ARCH___ >= 67 && defined __HEXAGON_AUDIO__*/ + +#if __HEXAGON_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: dmlink(Rs32,Rt32) + C Intrinsic Prototype: void Q6_dmlink_AA(Address Rs, Address Rt) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_dmlink_AA __builtin_HEXAGON_Y6_dmlink +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HEXAGON_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Rd32=dmpause + C Intrinsic Prototype: Word32 Q6_R_dmpause() + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_R_dmpause __builtin_HEXAGON_Y6_dmpause +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HEXAGON_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Rd32=dmpoll + C Intrinsic Prototype: Word32 Q6_R_dmpoll() + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_R_dmpoll __builtin_HEXAGON_Y6_dmpoll +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HEXAGON_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: dmresume(Rs32) + C Intrinsic Prototype: void Q6_dmresume_A(Address Rs) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_dmresume_A __builtin_HEXAGON_Y6_dmresume +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HEXAGON_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: dmstart(Rs32) + C Intrinsic Prototype: void Q6_dmstart_A(Address Rs) + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_dmstart_A __builtin_HEXAGON_Y6_dmstart +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HEXAGON_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Rd32=dmwait + C Intrinsic Prototype: Word32 Q6_R_dmwait() + Instruction Type: ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_R_dmwait __builtin_HEXAGON_Y6_dmwait +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#include +#ifdef __HVX__ +#include +#endif /* __HVX__ */ +#endif diff --git a/clang/lib/Headers/hexagon_types.h b/clang/lib/Headers/hexagon_types.h new file mode 100644 index 0000000000000..6958809418d8f --- /dev/null +++ b/clang/lib/Headers/hexagon_types.h @@ -0,0 +1,2653 @@ +/******************************************************************************/ +/* (c) 2020 Qualcomm Innovation Center, Inc. All rights reserved. */ +/* */ +/******************************************************************************/ +#ifndef HEXAGON_TYPES_H +#define HEXAGON_TYPES_H + +#include + +/* Hexagon names */ +#define HEXAGON_Vect HEXAGON_Vect64 +#define HEXAGON_V_GET_D HEXAGON_V64_GET_D +#define HEXAGON_V_GET_UD HEXAGON_V64_GET_UD +#define HEXAGON_V_GET_W0 HEXAGON_V64_GET_W0 +#define HEXAGON_V_GET_W1 HEXAGON_V64_GET_W1 +#define HEXAGON_V_GET_UW0 HEXAGON_V64_GET_UW0 +#define HEXAGON_V_GET_UW1 HEXAGON_V64_GET_UW1 +#define HEXAGON_V_GET_H0 HEXAGON_V64_GET_H0 +#define HEXAGON_V_GET_H1 HEXAGON_V64_GET_H1 +#define HEXAGON_V_GET_H2 HEXAGON_V64_GET_H2 +#define HEXAGON_V_GET_H3 HEXAGON_V64_GET_H3 +#define HEXAGON_V_GET_UH0 HEXAGON_V64_GET_UH0 +#define HEXAGON_V_GET_UH1 HEXAGON_V64_GET_UH1 +#define HEXAGON_V_GET_UH2 HEXAGON_V64_GET_UH2 +#define HEXAGON_V_GET_UH3 HEXAGON_V64_GET_UH3 +#define HEXAGON_V_GET_B0 HEXAGON_V64_GET_B0 +#define HEXAGON_V_GET_B1 HEXAGON_V64_GET_B1 +#define HEXAGON_V_GET_B2 HEXAGON_V64_GET_B2 +#define HEXAGON_V_GET_B3 HEXAGON_V64_GET_B3 +#define HEXAGON_V_GET_B4 HEXAGON_V64_GET_B4 +#define HEXAGON_V_GET_B5 HEXAGON_V64_GET_B5 +#define HEXAGON_V_GET_B6 HEXAGON_V64_GET_B6 +#define HEXAGON_V_GET_B7 HEXAGON_V64_GET_B7 +#define HEXAGON_V_GET_UB0 HEXAGON_V64_GET_UB0 +#define HEXAGON_V_GET_UB1 HEXAGON_V64_GET_UB1 +#define HEXAGON_V_GET_UB2 HEXAGON_V64_GET_UB2 +#define HEXAGON_V_GET_UB3 HEXAGON_V64_GET_UB3 +#define HEXAGON_V_GET_UB4 HEXAGON_V64_GET_UB4 +#define HEXAGON_V_GET_UB5 HEXAGON_V64_GET_UB5 +#define HEXAGON_V_GET_UB6 HEXAGON_V64_GET_UB6 +#define HEXAGON_V_GET_UB7 HEXAGON_V64_GET_UB7 +#define HEXAGON_V_PUT_D HEXAGON_V64_PUT_D +#define HEXAGON_V_PUT_W0 HEXAGON_V64_PUT_W0 +#define HEXAGON_V_PUT_W1 HEXAGON_V64_PUT_W1 +#define HEXAGON_V_PUT_H0 HEXAGON_V64_PUT_H0 +#define HEXAGON_V_PUT_H1 HEXAGON_V64_PUT_H1 +#define HEXAGON_V_PUT_H2 HEXAGON_V64_PUT_H2 +#define HEXAGON_V_PUT_H3 HEXAGON_V64_PUT_H3 +#define HEXAGON_V_PUT_B0 HEXAGON_V64_PUT_B0 +#define HEXAGON_V_PUT_B1 HEXAGON_V64_PUT_B1 +#define HEXAGON_V_PUT_B2 HEXAGON_V64_PUT_B2 +#define HEXAGON_V_PUT_B3 HEXAGON_V64_PUT_B3 +#define HEXAGON_V_PUT_B4 HEXAGON_V64_PUT_B4 +#define HEXAGON_V_PUT_B5 HEXAGON_V64_PUT_B5 +#define HEXAGON_V_PUT_B6 HEXAGON_V64_PUT_B6 +#define HEXAGON_V_PUT_B7 HEXAGON_V64_PUT_B7 +#define HEXAGON_V_CREATE_D HEXAGON_V64_CREATE_D +#define HEXAGON_V_CREATE_W HEXAGON_V64_CREATE_W +#define HEXAGON_V_CREATE_H HEXAGON_V64_CREATE_H +#define HEXAGON_V_CREATE_B HEXAGON_V64_CREATE_B + +#ifdef __cplusplus +#define HEXAGON_VectC HEXAGON_Vect64C +#endif /* __cplusplus */ + +/* 64 Bit Vectors */ + +typedef long long __attribute__((__may_alias__)) HEXAGON_Vect64; + +/* Extract doubleword macros */ + +#define HEXAGON_V64_GET_D(v) (v) +#define HEXAGON_V64_GET_UD(v) ((unsigned long long)(v)) + +/* Extract word macros */ + +#define HEXAGON_V64_GET_W0(v) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.w[0]; \ + }) +#define HEXAGON_V64_GET_W1(v) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.w[1]; \ + }) +#define HEXAGON_V64_GET_UW0(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned int uw[2]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.uw[0]; \ + }) +#define HEXAGON_V64_GET_UW1(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned int uw[2]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.uw[1]; \ + }) + +/* Extract half word macros */ + +#define HEXAGON_V64_GET_H0(v) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.h[0]; \ + }) +#define HEXAGON_V64_GET_H1(v) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.h[1]; \ + }) +#define HEXAGON_V64_GET_H2(v) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.h[2]; \ + }) +#define HEXAGON_V64_GET_H3(v) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.h[3]; \ + }) +#define HEXAGON_V64_GET_UH0(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned short uh[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.uh[0]; \ + }) +#define HEXAGON_V64_GET_UH1(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned short uh[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.uh[1]; \ + }) +#define HEXAGON_V64_GET_UH2(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned short uh[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.uh[2]; \ + }) +#define HEXAGON_V64_GET_UH3(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned short uh[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.uh[3]; \ + }) + +/* Extract byte macros */ + +#define HEXAGON_V64_GET_B0(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[0]; \ + }) +#define HEXAGON_V64_GET_B1(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[1]; \ + }) +#define HEXAGON_V64_GET_B2(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[2]; \ + }) +#define HEXAGON_V64_GET_B3(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[3]; \ + }) +#define HEXAGON_V64_GET_B4(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[4]; \ + }) +#define HEXAGON_V64_GET_B5(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[5]; \ + }) +#define HEXAGON_V64_GET_B6(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[6]; \ + }) +#define HEXAGON_V64_GET_B7(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[7]; \ + }) +#define HEXAGON_V64_GET_UB0(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.ub[0]; \ + }) +#define HEXAGON_V64_GET_UB1(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.ub[1]; \ + }) +#define HEXAGON_V64_GET_UB2(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.ub[2]; \ + }) +#define HEXAGON_V64_GET_UB3(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.ub[3]; \ + }) +#define HEXAGON_V64_GET_UB4(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.ub[4]; \ + }) +#define HEXAGON_V64_GET_UB5(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.ub[5]; \ + }) +#define HEXAGON_V64_GET_UB6(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.ub[6]; \ + }) +#define HEXAGON_V64_GET_UB7(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.ub[7]; \ + }) + +/* NOTE: All set macros return a HEXAGON_Vect64 type */ + +/* Set doubleword macro */ + +#define HEXAGON_V64_PUT_D(v, new) (new) + +/* Set word macros */ + +#ifdef __hexagon__ + +#define HEXAGON_V64_PUT_W0(v, new) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.w[0] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_W1(v, new) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.w[1] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V64_PUT_W0(v, new) \ + (((v) & 0xffffffff00000000LL) | ((HEXAGON_Vect64)((unsigned int)(new)))) +#define HEXAGON_V64_PUT_W1(v, new) \ + (((v) & 0x00000000ffffffffLL) | (((HEXAGON_Vect64)(new)) << 32LL)) + +#endif /* !__hexagon__ */ + +/* Set half word macros */ + +#ifdef __hexagon__ + +#define HEXAGON_V64_PUT_H0(v, new) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.h[0] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_H1(v, new) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.h[1] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_H2(v, new) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.h[2] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_H3(v, new) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.h[3] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V64_PUT_H0(v, new) \ + (((v) & 0xffffffffffff0000LL) | ((HEXAGON_Vect64)((unsigned short)(new)))) +#define HEXAGON_V64_PUT_H1(v, new) \ + (((v) & 0xffffffff0000ffffLL) | (((HEXAGON_Vect64)((unsigned short)(new))) << 16LL)) +#define HEXAGON_V64_PUT_H2(v, new) \ + (((v) & 0xffff0000ffffffffLL) | (((HEXAGON_Vect64)((unsigned short)(new))) << 32LL)) +#define HEXAGON_V64_PUT_H3(v, new) \ + (((v) & 0x0000ffffffffffffLL) | (((HEXAGON_Vect64)(new)) << 48LL)) + +#endif /* !__hexagon__ */ + +/* Set byte macros */ + +#ifdef __hexagon__ + +#define HEXAGON_V64_PUT_B0(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[0] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_B1(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[1] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_B2(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[2] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_B3(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[3] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_B4(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[4] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_B5(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[5] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_B6(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[6] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) +#define HEXAGON_V64_PUT_B7(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.d = (v); \ + _HEXAGON_V64_internal_union.b[7] = (new); \ + _HEXAGON_V64_internal_union.d; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V64_PUT_B0(v, new) \ + (((v) & 0xffffffffffffff00LL) | ((HEXAGON_Vect64)((unsigned char)(new)))) +#define HEXAGON_V64_PUT_B1(v, new) \ + (((v) & 0xffffffffffff00ffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 8LL)) +#define HEXAGON_V64_PUT_B2(v, new) \ + (((v) & 0xffffffffff00ffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 16LL)) +#define HEXAGON_V64_PUT_B3(v, new) \ + (((v) & 0xffffffff00ffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 24LL)) +#define HEXAGON_V64_PUT_B4(v, new) \ + (((v) & 0xffffff00ffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 32LL)) +#define HEXAGON_V64_PUT_B5(v, new) \ + (((v) & 0xffff00ffffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 40LL)) +#define HEXAGON_V64_PUT_B6(v, new) \ + (((v) & 0xff00ffffffffffffLL) | (((HEXAGON_Vect64)((unsigned char)(new))) << 48LL)) +#define HEXAGON_V64_PUT_B7(v, new) \ + (((v) & 0x00ffffffffffffffLL) | (((HEXAGON_Vect64)(new)) << 56LL)) + +#endif /* !__hexagon__ */ + +/* NOTE: All create macros return a HEXAGON_Vect64 type */ + +/* Create from a doubleword */ + +#define HEXAGON_V64_CREATE_D(d) (d) + +/* Create from words */ + +#ifdef __hexagon__ + +#define HEXAGON_V64_CREATE_W(w1, w0) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.w[0] = (w0); \ + _HEXAGON_V64_internal_union.w[1] = (w1); \ + _HEXAGON_V64_internal_union.d; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V64_CREATE_W(w1, w0) \ + ((((HEXAGON_Vect64)(w1)) << 32LL) | ((HEXAGON_Vect64)((w0) & 0xffffffff))) + +#endif /* !__hexagon__ */ + +/* Create from half words */ + +#ifdef __hexagon__ + +#define HEXAGON_V64_CREATE_H(h3, h2, h1, h0) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.h[0] = (h0); \ + _HEXAGON_V64_internal_union.h[1] = (h1); \ + _HEXAGON_V64_internal_union.h[2] = (h2); \ + _HEXAGON_V64_internal_union.h[3] = (h3); \ + _HEXAGON_V64_internal_union.d; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V64_CREATE_H(h3, h2, h1, h0) \ + ((((HEXAGON_Vect64)(h3)) << 48LL) | (((HEXAGON_Vect64)((h2) & 0xffff)) << 32LL) | \ + (((HEXAGON_Vect64)((h1) & 0xffff)) << 16LL) | ((HEXAGON_Vect64)((h0) & 0xffff))) + +#endif /* !__hexagon__ */ + +/* Create from bytes */ + +#ifdef __hexagon__ + +#define HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _HEXAGON_V64_internal_union; \ + _HEXAGON_V64_internal_union.b[0] = (b0); \ + _HEXAGON_V64_internal_union.b[1] = (b1); \ + _HEXAGON_V64_internal_union.b[2] = (b2); \ + _HEXAGON_V64_internal_union.b[3] = (b3); \ + _HEXAGON_V64_internal_union.b[4] = (b4); \ + _HEXAGON_V64_internal_union.b[5] = (b5); \ + _HEXAGON_V64_internal_union.b[6] = (b6); \ + _HEXAGON_V64_internal_union.b[7] = (b7); \ + _HEXAGON_V64_internal_union.d; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ + ((((HEXAGON_Vect64)(b7)) << 56LL) | (((HEXAGON_Vect64)((b6) & 0xff)) << 48LL) | \ + (((HEXAGON_Vect64)((b5) & 0xff)) << 40LL) | (((HEXAGON_Vect64)((b4) & 0xff)) << 32LL) | \ + (((HEXAGON_Vect64)((b3) & 0xff)) << 24LL) | (((HEXAGON_Vect64)((b2) & 0xff)) << 16LL) | \ + (((HEXAGON_Vect64)((b1) & 0xff)) << 8LL) | ((HEXAGON_Vect64)((b0) & 0xff))) + +#endif /* !__hexagon__ */ + +#ifdef __cplusplus + +class HEXAGON_Vect64C { +public: + // Constructors + HEXAGON_Vect64C(long long d = 0) : data(d) {}; + HEXAGON_Vect64C(int w1, int w0) : data(HEXAGON_V64_CREATE_W(w1, w0)) {}; + HEXAGON_Vect64C(short h3, short h2, short h1, short h0) + : data(HEXAGON_V64_CREATE_H(h3, h2, h1, h0)) {}; + HEXAGON_Vect64C(signed char b7, signed char b6, signed char b5, signed char b4, + signed char b3, signed char b2, signed char b1, signed char b0) + : data(HEXAGON_V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0)) {}; + HEXAGON_Vect64C(const HEXAGON_Vect64C &v) : data(v.data) {}; + + HEXAGON_Vect64C &operator=(const HEXAGON_Vect64C &v) { + data = v.data; + return *this; + }; + + operator long long() { + return data; + }; + + // Extract doubleword methods + long long D(void) { + return HEXAGON_V64_GET_D(data); + }; + unsigned long long UD(void) { + return HEXAGON_V64_GET_UD(data); + }; + + // Extract word methods + int W0(void) { + return HEXAGON_V64_GET_W0(data); + }; + int W1(void) { + return HEXAGON_V64_GET_W1(data); + }; + unsigned int UW0(void) { + return HEXAGON_V64_GET_UW0(data); + }; + unsigned int UW1(void) { + return HEXAGON_V64_GET_UW1(data); + }; + + // Extract half word methods + short H0(void) { + return HEXAGON_V64_GET_H0(data); + }; + short H1(void) { + return HEXAGON_V64_GET_H1(data); + }; + short H2(void) { + return HEXAGON_V64_GET_H2(data); + }; + short H3(void) { + return HEXAGON_V64_GET_H3(data); + }; + unsigned short UH0(void) { + return HEXAGON_V64_GET_UH0(data); + }; + unsigned short UH1(void) { + return HEXAGON_V64_GET_UH1(data); + }; + unsigned short UH2(void) { + return HEXAGON_V64_GET_UH2(data); + }; + unsigned short UH3(void) { + return HEXAGON_V64_GET_UH3(data); + }; + + // Extract byte methods + signed char B0(void) { + return HEXAGON_V64_GET_B0(data); + }; + signed char B1(void) { + return HEXAGON_V64_GET_B1(data); + }; + signed char B2(void) { + return HEXAGON_V64_GET_B2(data); + }; + signed char B3(void) { + return HEXAGON_V64_GET_B3(data); + }; + signed char B4(void) { + return HEXAGON_V64_GET_B4(data); + }; + signed char B5(void) { + return HEXAGON_V64_GET_B5(data); + }; + signed char B6(void) { + return HEXAGON_V64_GET_B6(data); + }; + signed char B7(void) { + return HEXAGON_V64_GET_B7(data); + }; + unsigned char UB0(void) { + return HEXAGON_V64_GET_UB0(data); + }; + unsigned char UB1(void) { + return HEXAGON_V64_GET_UB1(data); + }; + unsigned char UB2(void) { + return HEXAGON_V64_GET_UB2(data); + }; + unsigned char UB3(void) { + return HEXAGON_V64_GET_UB3(data); + }; + unsigned char UB4(void) { + return HEXAGON_V64_GET_UB4(data); + }; + unsigned char UB5(void) { + return HEXAGON_V64_GET_UB5(data); + }; + unsigned char UB6(void) { + return HEXAGON_V64_GET_UB6(data); + }; + unsigned char UB7(void) { + return HEXAGON_V64_GET_UB7(data); + }; + + // NOTE: All set methods return a HEXAGON_Vect64C type + + // Set doubleword method + HEXAGON_Vect64C D(long long d) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_D(data, d)); + }; + + // Set word methods + HEXAGON_Vect64C W0(int w) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_W0(data, w)); + }; + HEXAGON_Vect64C W1(int w) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_W1(data, w)); + }; + + // Set half word methods + HEXAGON_Vect64C H0(short h) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_H0(data, h)); + }; + HEXAGON_Vect64C H1(short h) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_H1(data, h)); + }; + HEXAGON_Vect64C H2(short h) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_H2(data, h)); + }; + HEXAGON_Vect64C H3(short h) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_H3(data, h)); + }; + + // Set byte methods + HEXAGON_Vect64C B0(signed char b) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_B0(data, b)); + }; + HEXAGON_Vect64C B1(signed char b) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_B1(data, b)); + }; + HEXAGON_Vect64C B2(signed char b) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_B2(data, b)); + }; + HEXAGON_Vect64C B3(signed char b) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_B3(data, b)); + }; + HEXAGON_Vect64C B4(signed char b) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_B4(data, b)); + }; + HEXAGON_Vect64C B5(signed char b) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_B5(data, b)); + }; + HEXAGON_Vect64C B6(signed char b) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_B6(data, b)); + }; + HEXAGON_Vect64C B7(signed char b) { + return HEXAGON_Vect64C(HEXAGON_V64_PUT_B7(data, b)); + }; + +private: + long long data; +}; + +#endif /* __cplusplus */ + +/* 32 Bit Vectors */ + +typedef int HEXAGON_Vect32; + +/* Extract word macros */ + +#define HEXAGON_V32_GET_W(v) (v) +#define HEXAGON_V32_GET_UW(v) ((unsigned int)(v)) + +/* Extract half word macros */ + +#define HEXAGON_V32_GET_H0(v) \ + __extension__({ \ + union { \ + int w; \ + short h[2]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.h[0]; \ + }) +#define HEXAGON_V32_GET_H1(v) \ + __extension__({ \ + union { \ + int w; \ + short h[2]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.h[1]; \ + }) +#define HEXAGON_V32_GET_UH0(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned short uh[2]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.uh[0]; \ + }) +#define HEXAGON_V32_GET_UH1(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned short uh[2]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.uh[1]; \ + }) + +/* Extract byte macros */ + +#define HEXAGON_V32_GET_B0(v) \ + __extension__({ \ + union { \ + int w; \ + signed char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.b[0]; \ + }) +#define HEXAGON_V32_GET_B1(v) \ + __extension__({ \ + union { \ + int w; \ + signed char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.b[1]; \ + }) +#define HEXAGON_V32_GET_B2(v) \ + __extension__({ \ + union { \ + int w; \ + signed char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.b[2]; \ + }) +#define HEXAGON_V32_GET_B3(v) \ + __extension__({ \ + union { \ + int w; \ + signed char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.b[3]; \ + }) +#define HEXAGON_V32_GET_UB0(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned char ub[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.ub[0]; \ + }) +#define HEXAGON_V32_GET_UB1(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned char ub[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.ub[1]; \ + }) +#define HEXAGON_V32_GET_UB2(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned char ub[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.ub[2]; \ + }) +#define HEXAGON_V32_GET_UB3(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned char ub[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.ub[3]; \ + }) + +/* NOTE: All set macros return a HEXAGON_Vect32 type */ + +/* Set word macro */ + +#define HEXAGON_V32_PUT_W(v, new) (new) + +/* Set half word macros */ + +#ifdef __hexagon__ + +#define HEXAGON_V32_PUT_H0(v, new) \ + __extension__({ \ + union { \ + int w; \ + short h[2]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.h[0] = (new); \ + _HEXAGON_V32_internal_union.w; \ + }) +#define HEXAGON_V32_PUT_H1(v, new) \ + __extension__({ \ + union { \ + int w; \ + short h[2]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.h[1] = (new); \ + _HEXAGON_V32_internal_union.w; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V32_PUT_H0(v, new) \ + (((v) & 0xffff0000) | ((HEXAGON_Vect32)((unsigned short)(new)))) +#define HEXAGON_V32_PUT_H1(v, new) (((v) & 0x0000ffff) | (((HEXAGON_Vect32)(new)) << 16)) + +#endif /* !__hexagon__ */ + +/* Set byte macros */ + +#ifdef __hexagon__ + +#define HEXAGON_V32_PUT_B0(v, new) \ + __extension__({ \ + union { \ + int w; \ + char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.b[0] = (new); \ + _HEXAGON_V32_internal_union.w; \ + }) +#define HEXAGON_V32_PUT_B1(v, new) \ + __extension__({ \ + union { \ + int w; \ + char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.b[1] = (new); \ + _HEXAGON_V32_internal_union.w; \ + }) +#define HEXAGON_V32_PUT_B2(v, new) \ + __extension__({ \ + union { \ + int w; \ + char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.b[2] = (new); \ + _HEXAGON_V32_internal_union.w; \ + }) +#define HEXAGON_V32_PUT_B3(v, new) \ + __extension__({ \ + union { \ + int w; \ + char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.w = (v); \ + _HEXAGON_V32_internal_union.b[3] = (new); \ + _HEXAGON_V32_internal_union.w; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V32_PUT_B0(v, new) \ + (((v) & 0xffffff00) | ((HEXAGON_Vect32)((unsigned char)(new)))) +#define HEXAGON_V32_PUT_B1(v, new) \ + (((v) & 0xffff00ff) | (((HEXAGON_Vect32)((unsigned char)(new))) << 8)) +#define HEXAGON_V32_PUT_B2(v, new) \ + (((v) & 0xff00ffff) | (((HEXAGON_Vect32)((unsigned char)(new))) << 16)) +#define HEXAGON_V32_PUT_B3(v, new) (((v) & 0x00ffffff) | (((HEXAGON_Vect32)(new)) << 24)) + +#endif /* !__hexagon__ */ + +/* NOTE: All create macros return a HEXAGON_Vect32 type */ + +/* Create from a word */ + +#define HEXAGON_V32_CREATE_W(w) (w) + +/* Create from half words */ + +#ifdef __hexagon__ + +#define HEXAGON_V32_CREATE_H(h1, h0) \ + __extension__({ \ + union { \ + long long d; \ + short h[2]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.h[0] = (h0); \ + _HEXAGON_V32_internal_union.h[1] = (h1); \ + _HEXAGON_V32_internal_union.d; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V32_CREATE_H(h1, h0) \ + ((((HEXAGON_Vect32)(h1)) << 16) | ((HEXAGON_Vect32)((h0) & 0xffff))) + +#endif /* !__hexagon__ */ + +/* Create from bytes */ +#ifdef __hexagon__ + +#define HEXAGON_V32_CREATE_B(b3, b2, b1, b0) \ + __extension__({ \ + union { \ + long long d; \ + char b[4]; \ + } _HEXAGON_V32_internal_union; \ + _HEXAGON_V32_internal_union.b[0] = (b0); \ + _HEXAGON_V32_internal_union.b[1] = (b1); \ + _HEXAGON_V32_internal_union.b[2] = (b2); \ + _HEXAGON_V32_internal_union.b[3] = (b3); \ + _HEXAGON_V32_internal_union.d; \ + }) + +#else /* !__hexagon__ */ + +#define HEXAGON_V32_CREATE_B(b3, b2, b1, b0) \ + ((((HEXAGON_Vect32)(b3)) << 24) | (((HEXAGON_Vect32)((b2) & 0xff)) << 16) | \ + (((HEXAGON_Vect32)((b1) & 0xff)) << 8) | ((HEXAGON_Vect32)((b0) & 0xff))) + +#endif /* !__hexagon__ */ + +#ifdef __cplusplus + +class HEXAGON_Vect32C { +public: + // Constructors + HEXAGON_Vect32C(int w = 0) : data(w) {}; + HEXAGON_Vect32C(short h1, short h0) : data(HEXAGON_V32_CREATE_H(h1, h0)) {}; + HEXAGON_Vect32C(signed char b3, signed char b2, signed char b1, signed char b0) + : data(HEXAGON_V32_CREATE_B(b3, b2, b1, b0)) {}; + HEXAGON_Vect32C(const HEXAGON_Vect32C &v) : data(v.data) {}; + + HEXAGON_Vect32C &operator=(const HEXAGON_Vect32C &v) { + data = v.data; + return *this; + }; + + operator int() { + return data; + }; + + // Extract word methods + int W(void) { + return HEXAGON_V32_GET_W(data); + }; + unsigned int UW(void) { + return HEXAGON_V32_GET_UW(data); + }; + + // Extract half word methods + short H0(void) { + return HEXAGON_V32_GET_H0(data); + }; + short H1(void) { + return HEXAGON_V32_GET_H1(data); + }; + unsigned short UH0(void) { + return HEXAGON_V32_GET_UH0(data); + }; + unsigned short UH1(void) { + return HEXAGON_V32_GET_UH1(data); + }; + + // Extract byte methods + signed char B0(void) { + return HEXAGON_V32_GET_B0(data); + }; + signed char B1(void) { + return HEXAGON_V32_GET_B1(data); + }; + signed char B2(void) { + return HEXAGON_V32_GET_B2(data); + }; + signed char B3(void) { + return HEXAGON_V32_GET_B3(data); + }; + unsigned char UB0(void) { + return HEXAGON_V32_GET_UB0(data); + }; + unsigned char UB1(void) { + return HEXAGON_V32_GET_UB1(data); + }; + unsigned char UB2(void) { + return HEXAGON_V32_GET_UB2(data); + }; + unsigned char UB3(void) { + return HEXAGON_V32_GET_UB3(data); + }; + + // NOTE: All set methods return a HEXAGON_Vect32C type + + // Set word method + HEXAGON_Vect32C W(int w) { + return HEXAGON_Vect32C(HEXAGON_V32_PUT_W(data, w)); + }; + + // Set half word methods + HEXAGON_Vect32C H0(short h) { + return HEXAGON_Vect32C(HEXAGON_V32_PUT_H0(data, h)); + }; + HEXAGON_Vect32C H1(short h) { + return HEXAGON_Vect32C(HEXAGON_V32_PUT_H1(data, h)); + }; + + // Set byte methods + HEXAGON_Vect32C B0(signed char b) { + return HEXAGON_Vect32C(HEXAGON_V32_PUT_B0(data, b)); + }; + HEXAGON_Vect32C B1(signed char b) { + return HEXAGON_Vect32C(HEXAGON_V32_PUT_B1(data, b)); + }; + HEXAGON_Vect32C B2(signed char b) { + return HEXAGON_Vect32C(HEXAGON_V32_PUT_B2(data, b)); + }; + HEXAGON_Vect32C B3(signed char b) { + return HEXAGON_Vect32C(HEXAGON_V32_PUT_B3(data, b)); + }; + +private: + int data; +}; + +#endif /* __cplusplus */ + +// V65 Silver types +#if __Q6S_ARCH__ >= 65 + // Silver vector types are 128 bytes, and pairs are 256. The vector predicate + // types are 16 bytes and 32 bytes for pairs. + typedef long HEXAGON_VecPred128 __attribute__((__vector_size__(16))) + __attribute__((aligned(128))); + + typedef long HEXAGON_VecPred256 __attribute__((__vector_size__(32))) + __attribute__((aligned(128))); + + typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + + typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256))) + __attribute__((aligned(256))); + + typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) + __attribute__((aligned(4))); + + typedef long HEXAGON_UVect2048 __attribute__((__vector_size__(256))) + __attribute__((aligned(4))); + + #define Q6S_VectorPredPair HEXAGON_VecPred256 + #define Q6S_VectorPred HEXAGON_VecPred128 + #define Q6S_Vector HEXAGON_Vect1024 + #define Q6S_VectorPair HEXAGON_Vect2048 + #define Q6S_UVector HEXAGON_UVect1024 + #define Q6S_UVectorPair HEXAGON_UVect2048 + +#else /* __Q6S_ARCH__ >= 65 */ + +// V65 Vector types +#if __HVX_ARCH__ >= 65 +#if defined __HVX__ && (__HVX_LENGTH__ == 128) + typedef long HEXAGON_VecPred128 __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + + typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + + typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256))) + __attribute__((aligned(256))); + + typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) + __attribute__((aligned(4))); + + typedef long HEXAGON_UVect2048 __attribute__((__vector_size__(256))) + __attribute__((aligned(4))); + + #define HVX_VectorPred HEXAGON_VecPred128 + #define HVX_Vector HEXAGON_Vect1024 + #define HVX_VectorPair HEXAGON_Vect2048 + #define HVX_UVector HEXAGON_UVect1024 + #define HVX_UVectorPair HEXAGON_UVect2048 +#else /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ +#if defined __HVX__ && (__HVX_LENGTH__ == 64) + typedef long HEXAGON_VecPred64 __attribute__((__vector_size__(64))) + __attribute__((aligned(64))); + + typedef long HEXAGON_Vect512 __attribute__((__vector_size__(64))) + __attribute__((aligned(64))); + + typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + + typedef long HEXAGON_UVect512 __attribute__((__vector_size__(64))) + __attribute__((aligned(4))); + + typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) + __attribute__((aligned(4))); + + #define HVX_VectorPred HEXAGON_VecPred64 + #define HVX_Vector HEXAGON_Vect512 + #define HVX_VectorPair HEXAGON_Vect1024 + #define HVX_UVector HEXAGON_UVect512 + #define HVX_UVectorPair HEXAGON_UVect1024 +#endif /* defined __HVX__ && (__HVX_LENGTH__ == 64) */ +#endif /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ +#endif /* __HVX_ARCH__ >= 65 */ +#endif /* __Q6S_ARCH__ >= 65 */ + +/* Predicates */ + +typedef int HEXAGON_Pred; + +/*** + *** backward compatibility aliases + ***/ + +/* Old names */ +#define Q6Vect Q6Vect64 +#define Q6V_GET_D Q6V64_GET_D +#define Q6V_GET_UD Q6V64_GET_UD +#define Q6V_GET_W0 Q6V64_GET_W0 +#define Q6V_GET_W1 Q6V64_GET_W1 +#define Q6V_GET_UW0 Q6V64_GET_UW0 +#define Q6V_GET_UW1 Q6V64_GET_UW1 +#define Q6V_GET_H0 Q6V64_GET_H0 +#define Q6V_GET_H1 Q6V64_GET_H1 +#define Q6V_GET_H2 Q6V64_GET_H2 +#define Q6V_GET_H3 Q6V64_GET_H3 +#define Q6V_GET_UH0 Q6V64_GET_UH0 +#define Q6V_GET_UH1 Q6V64_GET_UH1 +#define Q6V_GET_UH2 Q6V64_GET_UH2 +#define Q6V_GET_UH3 Q6V64_GET_UH3 +#define Q6V_GET_B0 Q6V64_GET_B0 +#define Q6V_GET_B1 Q6V64_GET_B1 +#define Q6V_GET_B2 Q6V64_GET_B2 +#define Q6V_GET_B3 Q6V64_GET_B3 +#define Q6V_GET_B4 Q6V64_GET_B4 +#define Q6V_GET_B5 Q6V64_GET_B5 +#define Q6V_GET_B6 Q6V64_GET_B6 +#define Q6V_GET_B7 Q6V64_GET_B7 +#define Q6V_GET_UB0 Q6V64_GET_UB0 +#define Q6V_GET_UB1 Q6V64_GET_UB1 +#define Q6V_GET_UB2 Q6V64_GET_UB2 +#define Q6V_GET_UB3 Q6V64_GET_UB3 +#define Q6V_GET_UB4 Q6V64_GET_UB4 +#define Q6V_GET_UB5 Q6V64_GET_UB5 +#define Q6V_GET_UB6 Q6V64_GET_UB6 +#define Q6V_GET_UB7 Q6V64_GET_UB7 +#define Q6V_PUT_D Q6V64_PUT_D +#define Q6V_PUT_W0 Q6V64_PUT_W0 +#define Q6V_PUT_W1 Q6V64_PUT_W1 +#define Q6V_PUT_H0 Q6V64_PUT_H0 +#define Q6V_PUT_H1 Q6V64_PUT_H1 +#define Q6V_PUT_H2 Q6V64_PUT_H2 +#define Q6V_PUT_H3 Q6V64_PUT_H3 +#define Q6V_PUT_B0 Q6V64_PUT_B0 +#define Q6V_PUT_B1 Q6V64_PUT_B1 +#define Q6V_PUT_B2 Q6V64_PUT_B2 +#define Q6V_PUT_B3 Q6V64_PUT_B3 +#define Q6V_PUT_B4 Q6V64_PUT_B4 +#define Q6V_PUT_B5 Q6V64_PUT_B5 +#define Q6V_PUT_B6 Q6V64_PUT_B6 +#define Q6V_PUT_B7 Q6V64_PUT_B7 +#define Q6V_CREATE_D Q6V64_CREATE_D +#define Q6V_CREATE_W Q6V64_CREATE_W +#define Q6V_CREATE_H Q6V64_CREATE_H +#define Q6V_CREATE_B Q6V64_CREATE_B + +#ifdef __cplusplus +#define Q6VectC Q6Vect64C +#endif /* __cplusplus */ + +/* 64 Bit Vectors */ + +typedef long long __attribute__((__may_alias__)) Q6Vect64; + +/* Extract doubleword macros */ + +#define Q6V64_GET_D(v) (v) +#define Q6V64_GET_UD(v) ((unsigned long long)(v)) + +/* Extract word macros */ + +#define Q6V64_GET_W0(v) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.w[0]; \ + }) +#define Q6V64_GET_W1(v) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.w[1]; \ + }) +#define Q6V64_GET_UW0(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned int uw[2]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.uw[0]; \ + }) +#define Q6V64_GET_UW1(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned int uw[2]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.uw[1]; \ + }) + +/* Extract half word macros */ + +#define Q6V64_GET_H0(v) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.h[0]; \ + }) +#define Q6V64_GET_H1(v) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.h[1]; \ + }) +#define Q6V64_GET_H2(v) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.h[2]; \ + }) +#define Q6V64_GET_H3(v) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.h[3]; \ + }) +#define Q6V64_GET_UH0(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned short uh[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.uh[0]; \ + }) +#define Q6V64_GET_UH1(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned short uh[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.uh[1]; \ + }) +#define Q6V64_GET_UH2(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned short uh[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.uh[2]; \ + }) +#define Q6V64_GET_UH3(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned short uh[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.uh[3]; \ + }) + +/* Extract byte macros */ + +#define Q6V64_GET_B0(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[0]; \ + }) +#define Q6V64_GET_B1(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[1]; \ + }) +#define Q6V64_GET_B2(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[2]; \ + }) +#define Q6V64_GET_B3(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[3]; \ + }) +#define Q6V64_GET_B4(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[4]; \ + }) +#define Q6V64_GET_B5(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[5]; \ + }) +#define Q6V64_GET_B6(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[6]; \ + }) +#define Q6V64_GET_B7(v) \ + __extension__({ \ + union { \ + long long d; \ + signed char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[7]; \ + }) +#define Q6V64_GET_UB0(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.ub[0]; \ + }) +#define Q6V64_GET_UB1(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.ub[1]; \ + }) +#define Q6V64_GET_UB2(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.ub[2]; \ + }) +#define Q6V64_GET_UB3(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.ub[3]; \ + }) +#define Q6V64_GET_UB4(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.ub[4]; \ + }) +#define Q6V64_GET_UB5(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.ub[5]; \ + }) +#define Q6V64_GET_UB6(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.ub[6]; \ + }) +#define Q6V64_GET_UB7(v) \ + __extension__({ \ + union { \ + long long d; \ + unsigned char ub[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.ub[7]; \ + }) + +/* NOTE: All set macros return a Q6Vect64 type */ + +/* Set doubleword macro */ + +#define Q6V64_PUT_D(v, new) (new) + +/* Set word macros */ + +#ifdef __qdsp6__ + +#define Q6V64_PUT_W0(v, new) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.w[0] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_W1(v, new) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.w[1] = (new); \ + _Q6V64_internal_union.d; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V64_PUT_W0(v, new) \ + (((v) & 0xffffffff00000000LL) | ((Q6Vect64)((unsigned int)(new)))) +#define Q6V64_PUT_W1(v, new) \ + (((v) & 0x00000000ffffffffLL) | (((Q6Vect64)(new)) << 32LL)) + +#endif /* !__qdsp6__ */ + +/* Set half word macros */ + +#ifdef __qdsp6__ + +#define Q6V64_PUT_H0(v, new) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.h[0] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_H1(v, new) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.h[1] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_H2(v, new) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.h[2] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_H3(v, new) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.h[3] = (new); \ + _Q6V64_internal_union.d; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V64_PUT_H0(v, new) \ + (((v) & 0xffffffffffff0000LL) | ((Q6Vect64)((unsigned short)(new)))) +#define Q6V64_PUT_H1(v, new) \ + (((v) & 0xffffffff0000ffffLL) | (((Q6Vect64)((unsigned short)(new))) << 16LL)) +#define Q6V64_PUT_H2(v, new) \ + (((v) & 0xffff0000ffffffffLL) | (((Q6Vect64)((unsigned short)(new))) << 32LL)) +#define Q6V64_PUT_H3(v, new) \ + (((v) & 0x0000ffffffffffffLL) | (((Q6Vect64)(new)) << 48LL)) + +#endif /* !__qdsp6__ */ + +/* Set byte macros */ + +#ifdef __qdsp6__ + +#define Q6V64_PUT_B0(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[0] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_B1(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[1] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_B2(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[2] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_B3(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[3] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_B4(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[4] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_B5(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[5] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_B6(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[6] = (new); \ + _Q6V64_internal_union.d; \ + }) +#define Q6V64_PUT_B7(v, new) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.d = (v); \ + _Q6V64_internal_union.b[7] = (new); \ + _Q6V64_internal_union.d; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V64_PUT_B0(v, new) \ + (((v) & 0xffffffffffffff00LL) | ((Q6Vect64)((unsigned char)(new)))) +#define Q6V64_PUT_B1(v, new) \ + (((v) & 0xffffffffffff00ffLL) | (((Q6Vect64)((unsigned char)(new))) << 8LL)) +#define Q6V64_PUT_B2(v, new) \ + (((v) & 0xffffffffff00ffffLL) | (((Q6Vect64)((unsigned char)(new))) << 16LL)) +#define Q6V64_PUT_B3(v, new) \ + (((v) & 0xffffffff00ffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 24LL)) +#define Q6V64_PUT_B4(v, new) \ + (((v) & 0xffffff00ffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 32LL)) +#define Q6V64_PUT_B5(v, new) \ + (((v) & 0xffff00ffffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 40LL)) +#define Q6V64_PUT_B6(v, new) \ + (((v) & 0xff00ffffffffffffLL) | (((Q6Vect64)((unsigned char)(new))) << 48LL)) +#define Q6V64_PUT_B7(v, new) \ + (((v) & 0x00ffffffffffffffLL) | (((Q6Vect64)(new)) << 56LL)) + +#endif /* !__qdsp6__ */ + +/* NOTE: All create macros return a Q6Vect64 type */ + +/* Create from a doubleword */ + +#define Q6V64_CREATE_D(d) (d) + +/* Create from words */ + +#ifdef __qdsp6__ + +#define Q6V64_CREATE_W(w1, w0) \ + __extension__({ \ + union { \ + long long d; \ + int w[2]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.w[0] = (w0); \ + _Q6V64_internal_union.w[1] = (w1); \ + _Q6V64_internal_union.d; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V64_CREATE_W(w1, w0) \ + ((((Q6Vect64)(w1)) << 32LL) | ((Q6Vect64)((w0) & 0xffffffff))) + +#endif /* !__qdsp6__ */ + +/* Create from half words */ + +#ifdef __qdsp6__ + +#define Q6V64_CREATE_H(h3, h2, h1, h0) \ + __extension__({ \ + union { \ + long long d; \ + short h[4]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.h[0] = (h0); \ + _Q6V64_internal_union.h[1] = (h1); \ + _Q6V64_internal_union.h[2] = (h2); \ + _Q6V64_internal_union.h[3] = (h3); \ + _Q6V64_internal_union.d; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V64_CREATE_H(h3, h2, h1, h0) \ + ((((Q6Vect64)(h3)) << 48LL) | (((Q6Vect64)((h2) & 0xffff)) << 32LL) | \ + (((Q6Vect64)((h1) & 0xffff)) << 16LL) | ((Q6Vect64)((h0) & 0xffff))) + +#endif /* !__qdsp6__ */ + +/* Create from bytes */ + +#ifdef __qdsp6__ + +#define Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ + __extension__({ \ + union { \ + long long d; \ + char b[8]; \ + } _Q6V64_internal_union; \ + _Q6V64_internal_union.b[0] = (b0); \ + _Q6V64_internal_union.b[1] = (b1); \ + _Q6V64_internal_union.b[2] = (b2); \ + _Q6V64_internal_union.b[3] = (b3); \ + _Q6V64_internal_union.b[4] = (b4); \ + _Q6V64_internal_union.b[5] = (b5); \ + _Q6V64_internal_union.b[6] = (b6); \ + _Q6V64_internal_union.b[7] = (b7); \ + _Q6V64_internal_union.d; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0) \ + ((((Q6Vect64)(b7)) << 56LL) | (((Q6Vect64)((b6) & 0xff)) << 48LL) | \ + (((Q6Vect64)((b5) & 0xff)) << 40LL) | (((Q6Vect64)((b4) & 0xff)) << 32LL) | \ + (((Q6Vect64)((b3) & 0xff)) << 24LL) | (((Q6Vect64)((b2) & 0xff)) << 16LL) | \ + (((Q6Vect64)((b1) & 0xff)) << 8LL) | ((Q6Vect64)((b0) & 0xff))) + +#endif /* !__qdsp6__ */ + +#ifdef __cplusplus + +class Q6Vect64C { +public: + // Constructors + Q6Vect64C(long long d = 0) : data(d) {}; + Q6Vect64C(int w1, int w0) : data(Q6V64_CREATE_W(w1, w0)) {}; + Q6Vect64C(short h3, short h2, short h1, short h0) + : data(Q6V64_CREATE_H(h3, h2, h1, h0)) {}; + Q6Vect64C(signed char b7, signed char b6, signed char b5, signed char b4, + signed char b3, signed char b2, signed char b1, signed char b0) + : data(Q6V64_CREATE_B(b7, b6, b5, b4, b3, b2, b1, b0)) {}; + Q6Vect64C(const Q6Vect64C &v) : data(v.data) {}; + + Q6Vect64C &operator=(const Q6Vect64C &v) { + data = v.data; + return *this; + }; + + operator long long() { + return data; + }; + + // Extract doubleword methods + long long D(void) { + return Q6V64_GET_D(data); + }; + unsigned long long UD(void) { + return Q6V64_GET_UD(data); + }; + + // Extract word methods + int W0(void) { + return Q6V64_GET_W0(data); + }; + int W1(void) { + return Q6V64_GET_W1(data); + }; + unsigned int UW0(void) { + return Q6V64_GET_UW0(data); + }; + unsigned int UW1(void) { + return Q6V64_GET_UW1(data); + }; + + // Extract half word methods + short H0(void) { + return Q6V64_GET_H0(data); + }; + short H1(void) { + return Q6V64_GET_H1(data); + }; + short H2(void) { + return Q6V64_GET_H2(data); + }; + short H3(void) { + return Q6V64_GET_H3(data); + }; + unsigned short UH0(void) { + return Q6V64_GET_UH0(data); + }; + unsigned short UH1(void) { + return Q6V64_GET_UH1(data); + }; + unsigned short UH2(void) { + return Q6V64_GET_UH2(data); + }; + unsigned short UH3(void) { + return Q6V64_GET_UH3(data); + }; + + // Extract byte methods + signed char B0(void) { + return Q6V64_GET_B0(data); + }; + signed char B1(void) { + return Q6V64_GET_B1(data); + }; + signed char B2(void) { + return Q6V64_GET_B2(data); + }; + signed char B3(void) { + return Q6V64_GET_B3(data); + }; + signed char B4(void) { + return Q6V64_GET_B4(data); + }; + signed char B5(void) { + return Q6V64_GET_B5(data); + }; + signed char B6(void) { + return Q6V64_GET_B6(data); + }; + signed char B7(void) { + return Q6V64_GET_B7(data); + }; + unsigned char UB0(void) { + return Q6V64_GET_UB0(data); + }; + unsigned char UB1(void) { + return Q6V64_GET_UB1(data); + }; + unsigned char UB2(void) { + return Q6V64_GET_UB2(data); + }; + unsigned char UB3(void) { + return Q6V64_GET_UB3(data); + }; + unsigned char UB4(void) { + return Q6V64_GET_UB4(data); + }; + unsigned char UB5(void) { + return Q6V64_GET_UB5(data); + }; + unsigned char UB6(void) { + return Q6V64_GET_UB6(data); + }; + unsigned char UB7(void) { + return Q6V64_GET_UB7(data); + }; + + // NOTE: All set methods return a Q6Vect64C type + + // Set doubleword method + Q6Vect64C D(long long d) { + return Q6Vect64C(Q6V64_PUT_D(data, d)); + }; + + // Set word methods + Q6Vect64C W0(int w) { + return Q6Vect64C(Q6V64_PUT_W0(data, w)); + }; + Q6Vect64C W1(int w) { + return Q6Vect64C(Q6V64_PUT_W1(data, w)); + }; + + // Set half word methods + Q6Vect64C H0(short h) { + return Q6Vect64C(Q6V64_PUT_H0(data, h)); + }; + Q6Vect64C H1(short h) { + return Q6Vect64C(Q6V64_PUT_H1(data, h)); + }; + Q6Vect64C H2(short h) { + return Q6Vect64C(Q6V64_PUT_H2(data, h)); + }; + Q6Vect64C H3(short h) { + return Q6Vect64C(Q6V64_PUT_H3(data, h)); + }; + + // Set byte methods + Q6Vect64C B0(signed char b) { + return Q6Vect64C(Q6V64_PUT_B0(data, b)); + }; + Q6Vect64C B1(signed char b) { + return Q6Vect64C(Q6V64_PUT_B1(data, b)); + }; + Q6Vect64C B2(signed char b) { + return Q6Vect64C(Q6V64_PUT_B2(data, b)); + }; + Q6Vect64C B3(signed char b) { + return Q6Vect64C(Q6V64_PUT_B3(data, b)); + }; + Q6Vect64C B4(signed char b) { + return Q6Vect64C(Q6V64_PUT_B4(data, b)); + }; + Q6Vect64C B5(signed char b) { + return Q6Vect64C(Q6V64_PUT_B5(data, b)); + }; + Q6Vect64C B6(signed char b) { + return Q6Vect64C(Q6V64_PUT_B6(data, b)); + }; + Q6Vect64C B7(signed char b) { + return Q6Vect64C(Q6V64_PUT_B7(data, b)); + }; + +private: + long long data; +}; + +#endif /* __cplusplus */ + +/* 32 Bit Vectors */ + +typedef int Q6Vect32; + +/* Extract word macros */ + +#define Q6V32_GET_W(v) (v) +#define Q6V32_GET_UW(v) ((unsigned int)(v)) + +/* Extract half word macros */ + +#define Q6V32_GET_H0(v) \ + __extension__({ \ + union { \ + int w; \ + short h[2]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.h[0]; \ + }) +#define Q6V32_GET_H1(v) \ + __extension__({ \ + union { \ + int w; \ + short h[2]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.h[1]; \ + }) +#define Q6V32_GET_UH0(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned short uh[2]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.uh[0]; \ + }) +#define Q6V32_GET_UH1(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned short uh[2]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.uh[1]; \ + }) + +/* Extract byte macros */ + +#define Q6V32_GET_B0(v) \ + __extension__({ \ + union { \ + int w; \ + signed char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.b[0]; \ + }) +#define Q6V32_GET_B1(v) \ + __extension__({ \ + union { \ + int w; \ + signed char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.b[1]; \ + }) +#define Q6V32_GET_B2(v) \ + __extension__({ \ + union { \ + int w; \ + signed char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.b[2]; \ + }) +#define Q6V32_GET_B3(v) \ + __extension__({ \ + union { \ + int w; \ + signed char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.b[3]; \ + }) +#define Q6V32_GET_UB0(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned char ub[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.ub[0]; \ + }) +#define Q6V32_GET_UB1(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned char ub[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.ub[1]; \ + }) +#define Q6V32_GET_UB2(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned char ub[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.ub[2]; \ + }) +#define Q6V32_GET_UB3(v) \ + __extension__({ \ + union { \ + int w; \ + unsigned char ub[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.ub[3]; \ + }) + +/* NOTE: All set macros return a Q6Vect32 type */ + +/* Set word macro */ + +#define Q6V32_PUT_W(v, new) (new) + +/* Set half word macros */ + +#ifdef __qdsp6__ + +#define Q6V32_PUT_H0(v, new) \ + __extension__({ \ + union { \ + int w; \ + short h[2]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.h[0] = (new); \ + _Q6V32_internal_union.w; \ + }) +#define Q6V32_PUT_H1(v, new) \ + __extension__({ \ + union { \ + int w; \ + short h[2]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.h[1] = (new); \ + _Q6V32_internal_union.w; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V32_PUT_H0(v, new) \ + (((v) & 0xffff0000) | ((Q6Vect32)((unsigned short)(new)))) +#define Q6V32_PUT_H1(v, new) (((v) & 0x0000ffff) | (((Q6Vect32)(new)) << 16)) + +#endif /* !__qdsp6__ */ + +/* Set byte macros */ + +#ifdef __qdsp6__ + +#define Q6V32_PUT_B0(v, new) \ + __extension__({ \ + union { \ + int w; \ + char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.b[0] = (new); \ + _Q6V32_internal_union.w; \ + }) +#define Q6V32_PUT_B1(v, new) \ + __extension__({ \ + union { \ + int w; \ + char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.b[1] = (new); \ + _Q6V32_internal_union.w; \ + }) +#define Q6V32_PUT_B2(v, new) \ + __extension__({ \ + union { \ + int w; \ + char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.b[2] = (new); \ + _Q6V32_internal_union.w; \ + }) +#define Q6V32_PUT_B3(v, new) \ + __extension__({ \ + union { \ + int w; \ + char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.w = (v); \ + _Q6V32_internal_union.b[3] = (new); \ + _Q6V32_internal_union.w; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V32_PUT_B0(v, new) \ + (((v) & 0xffffff00) | ((Q6Vect32)((unsigned char)(new)))) +#define Q6V32_PUT_B1(v, new) \ + (((v) & 0xffff00ff) | (((Q6Vect32)((unsigned char)(new))) << 8)) +#define Q6V32_PUT_B2(v, new) \ + (((v) & 0xff00ffff) | (((Q6Vect32)((unsigned char)(new))) << 16)) +#define Q6V32_PUT_B3(v, new) (((v) & 0x00ffffff) | (((Q6Vect32)(new)) << 24)) + +#endif /* !__qdsp6__ */ + +/* NOTE: All create macros return a Q6Vect32 type */ + +/* Create from a word */ + +#define Q6V32_CREATE_W(w) (w) + +/* Create from half words */ + +#ifdef __qdsp6__ + +#define Q6V32_CREATE_H(h1, h0) \ + __extension__({ \ + union { \ + long long d; \ + short h[2]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.h[0] = (h0); \ + _Q6V32_internal_union.h[1] = (h1); \ + _Q6V32_internal_union.d; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V32_CREATE_H(h1, h0) \ + ((((Q6Vect32)(h1)) << 16) | ((Q6Vect32)((h0) & 0xffff))) + +#endif /* !__qdsp6__ */ + +/* Create from bytes */ +#ifdef __qdsp6__ + +#define Q6V32_CREATE_B(b3, b2, b1, b0) \ + __extension__({ \ + union { \ + long long d; \ + char b[4]; \ + } _Q6V32_internal_union; \ + _Q6V32_internal_union.b[0] = (b0); \ + _Q6V32_internal_union.b[1] = (b1); \ + _Q6V32_internal_union.b[2] = (b2); \ + _Q6V32_internal_union.b[3] = (b3); \ + _Q6V32_internal_union.d; \ + }) + +#else /* !__qdsp6__ */ + +#define Q6V32_CREATE_B(b3, b2, b1, b0) \ + ((((Q6Vect32)(b3)) << 24) | (((Q6Vect32)((b2) & 0xff)) << 16) | \ + (((Q6Vect32)((b1) & 0xff)) << 8) | ((Q6Vect32)((b0) & 0xff))) + +#endif /* !__qdsp6__ */ + +#ifdef __cplusplus + +class Q6Vect32C { +public: + // Constructors + Q6Vect32C(int w = 0) : data(w) {}; + Q6Vect32C(short h1, short h0) : data(Q6V32_CREATE_H(h1, h0)) {}; + Q6Vect32C(signed char b3, signed char b2, signed char b1, signed char b0) + : data(Q6V32_CREATE_B(b3, b2, b1, b0)) {}; + Q6Vect32C(const Q6Vect32C &v) : data(v.data) {}; + + Q6Vect32C &operator=(const Q6Vect32C &v) { + data = v.data; + return *this; + }; + + operator int() { + return data; + }; + + // Extract word methods + int W(void) { + return Q6V32_GET_W(data); + }; + unsigned int UW(void) { + return Q6V32_GET_UW(data); + }; + + // Extract half word methods + short H0(void) { + return Q6V32_GET_H0(data); + }; + short H1(void) { + return Q6V32_GET_H1(data); + }; + unsigned short UH0(void) { + return Q6V32_GET_UH0(data); + }; + unsigned short UH1(void) { + return Q6V32_GET_UH1(data); + }; + + // Extract byte methods + signed char B0(void) { + return Q6V32_GET_B0(data); + }; + signed char B1(void) { + return Q6V32_GET_B1(data); + }; + signed char B2(void) { + return Q6V32_GET_B2(data); + }; + signed char B3(void) { + return Q6V32_GET_B3(data); + }; + unsigned char UB0(void) { + return Q6V32_GET_UB0(data); + }; + unsigned char UB1(void) { + return Q6V32_GET_UB1(data); + }; + unsigned char UB2(void) { + return Q6V32_GET_UB2(data); + }; + unsigned char UB3(void) { + return Q6V32_GET_UB3(data); + }; + + // NOTE: All set methods return a Q6Vect32C type + + // Set word method + Q6Vect32C W(int w) { + return Q6Vect32C(Q6V32_PUT_W(data, w)); + }; + + // Set half word methods + Q6Vect32C H0(short h) { + return Q6Vect32C(Q6V32_PUT_H0(data, h)); + }; + Q6Vect32C H1(short h) { + return Q6Vect32C(Q6V32_PUT_H1(data, h)); + }; + + // Set byte methods + Q6Vect32C B0(signed char b) { + return Q6Vect32C(Q6V32_PUT_B0(data, b)); + }; + Q6Vect32C B1(signed char b) { + return Q6Vect32C(Q6V32_PUT_B1(data, b)); + }; + Q6Vect32C B2(signed char b) { + return Q6Vect32C(Q6V32_PUT_B2(data, b)); + }; + Q6Vect32C B3(signed char b) { + return Q6Vect32C(Q6V32_PUT_B3(data, b)); + }; + +private: + int data; +}; + +#endif /* __cplusplus */ + +// V65 Vector types +#if __HVX_ARCH__ >= 65 +#if defined __HVX__ && (__HVX_LENGTH__ == 128) +typedef long Q6VecPred128 __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + +typedef long Q6Vect1024 __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + +typedef long Q6Vect2048 __attribute__((__vector_size__(256))) + __attribute__((aligned(256))); + +#else /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ +#if defined __HVX__ && (__HVX_LENGTH__ == 64) +typedef long Q6VecPred64 __attribute__((__vector_size__(64))) + __attribute__((aligned(64))); + +typedef long Q6Vect512 __attribute__((__vector_size__(64))) + __attribute__((aligned(64))); + +typedef long Q6Vect1024 __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + +#endif /* defined __HVX__ && (__HVX_LENGTH__ == 64) */ +#endif /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ +#endif /* __HVX_ARCH__ >= 65 */ + +/* Predicates */ + +typedef int Q6Pred; + + +#ifdef __HVX__ + +// Extract HVX VectorPair macro. +#define HEXAGON_HVX_GET_W(v) (v) + +// Extract HVX Vector macros. +#define HEXAGON_HVX_GET_V0(v) \ + __extension__({ \ + union { \ + HVX_VectorPair W; \ + HVX_Vector V[2]; \ + } _HEXAGON_HVX_internal_union; \ + _HEXAGON_HVX_internal_union.W = (v); \ + _HEXAGON_HVX_internal_union.V[0]; \ + }) +#define HEXAGON_HVX_GET_V1(v) \ + __extension__({ \ + union { \ + HVX_VectorPair W; \ + HVX_Vector V[2]; \ + } _HEXAGON_HVX_internal_union; \ + _HEXAGON_HVX_internal_union.W = (v); \ + _HEXAGON_HVX_internal_union.V[1]; \ + }) +#define HEXAGON_HVX_GET_P(v) \ + __extension__({ \ + union { \ + HVX_VectorPair W; \ + HVX_VectorPred P[2]; \ + } _HEXAGON_HVX_internal_union; \ + _HEXAGON_HVX_internal_union.W = (v); \ + _HEXAGON_HVX_internal_union.P[0]; \ + }) + +// Set HVX VectorPair macro. +#define HEXAGON_HVX_PUT_W(v, new) (new) + +// Set HVX Vector macros. +#define HEXAGON_HVX_PUT_V0(v, new) \ + __extension__({ \ + union { \ + HVX_VectorPair W; \ + HVX_Vector V[2]; \ + } _HEXAGON_HVX_internal_union; \ + _HEXAGON_HVX_internal_union.W = (v); \ + _HEXAGON_HVX_internal_union.V[0] = (new); \ + _HEXAGON_HVX_internal_union.W; \ + }) + +#define HEXAGON_HVX_PUT_V1(v, new) \ + __extension__({ \ + union { \ + HVX_VectorPair W; \ + HVX_Vector V[2]; \ + } _HEXAGON_HVX_internal_union; \ + _HEXAGON_HVX_internal_union.W = (v); \ + _HEXAGON_HVX_internal_union.V[1] = (new); \ + _HEXAGON_HVX_internal_union.W; \ + }) + +#define HEXAGON_HVX_PUT_P(v, new) \ + __extension__({ \ + union { \ + HVX_VectorPair W; \ + HVX_VectorPred P[2]; \ + } _HEXAGON_HVX_internal_union; \ + _HEXAGON_HVX_internal_union.W = (v); \ + _HEXAGON_HVX_internal_union.P[0] = (new); \ + _HEXAGON_HVX_internal_union.W; \ + }) + + +#define HEXAGON_HVX_CREATE_W(v1, v0) \ + __extension__({ \ + union { \ + HVX_VectorPair W; \ + HVX_Vector V[2]; \ + } _HEXAGON_HVX_internal_union; \ + _HEXAGON_HVX_internal_union.V[0] = (v0); \ + _HEXAGON_HVX_internal_union.V[1] = (v1); \ + _HEXAGON_HVX_internal_union.W; \ + }) + +#ifdef __cplusplus + +class HVX_Vect { +public: + // Constructors. + // Default. + HVX_Vect() : data(Q6_W_vcombine_VV(Q6_V_vzero(), Q6_V_vzero())){}; + + // Custom constructors. + HVX_Vect(HVX_VectorPair W) : data(W){}; + HVX_Vect(HVX_Vector v1, HVX_Vector v0) : data(HEXAGON_HVX_CREATE_W(v1, v0)){}; + + // Copy constructor. + HVX_Vect(const HVX_Vect &W) = default; + + // Move constructor. + HVX_Vect(HVX_Vect &&W) = default; + + // Assignment operator. + HVX_Vect &operator=(const HVX_Vect &W) = default; + + operator HVX_VectorPair() { return data; }; + + // Extract VectorPair method. + HVX_VectorPair W(void) { return HEXAGON_HVX_GET_W(data); }; + + // Extract Vector methods. + HVX_Vector V0(void) { return HEXAGON_HVX_GET_V0(data); }; + HVX_Vector V1(void) { return HEXAGON_HVX_GET_V1(data); }; + HVX_VectorPred P(void) { return HEXAGON_HVX_GET_P(data); }; + + // NOTE: All set methods return a HVX_Vect type. + // Set HVX VectorPair method. + HVX_Vect W(HVX_VectorPair w) { return HVX_Vect(HEXAGON_HVX_PUT_W(data, w)); }; + + // Set HVX Vector methods. + HVX_Vect V0(HVX_Vector v) { return HVX_Vect(HEXAGON_HVX_PUT_V0(data, v)); }; + HVX_Vect V1(HVX_Vector v) { return HVX_Vect(HEXAGON_HVX_PUT_V1(data, v)); }; + HVX_Vect P(HVX_VectorPred p) { return HVX_Vect(HEXAGON_HVX_PUT_P(data, p)); }; + +private: + HVX_VectorPair data; +}; + +#endif /* __cplusplus */ +#endif /* __HVX__ */ + +#define HEXAGON_UDMA_DM0_STATUS_IDLE 0x00000000 +#define HEXAGON_UDMA_DM0_STATUS_RUN 0x00000001 +#define HEXAGON_UDMA_DM0_STATUS_ERROR 0x00000002 +#define HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE 0 +#define HEXAGON_UDMA_DESC_DSTATE_COMPLETE 1 +#define HEXAGON_UDMA_DESC_ORDER_NOORDER 0 +#define HEXAGON_UDMA_DESC_ORDER_ORDER 1 +#define HEXAGON_UDMA_DESC_BYPASS_OFF 0 +#define HEXAGON_UDMA_DESC_BYPASS_ON 1 +#define HEXAGON_UDMA_DESC_COMP_NONE 0 +#define HEXAGON_UDMA_DESC_COMP_DLBC 1 +#define HEXAGON_UDMA_DESC_DESCTYPE_TYPE0 0 +#define HEXAGON_UDMA_DESC_DESCTYPE_TYPE1 1 + +typedef struct hexagon_udma_descriptor_type0_s +{ + void *next; + unsigned int length:24; + unsigned int desctype:2; + unsigned int dstcomp:1; + unsigned int srccomp:1; + unsigned int dstbypass:1; + unsigned int srcbypass:1; + unsigned int order:1; + unsigned int dstate:1; + void *src; + void *dst; +} hexagon_udma_descriptor_type0_t; + +typedef struct hexagon_udma_descriptor_type1_s +{ + void *next; + unsigned int length:24; + unsigned int desctype:2; + unsigned int dstcomp:1; + unsigned int srccomp:1; + unsigned int dstbypass:1; + unsigned int srcbypass:1; + unsigned int order:1; + unsigned int dstate:1; + void *src; + void *dst; + unsigned int allocation:28; + unsigned int padding:4; + unsigned int roiwidth:16; + unsigned int roiheight:16; + unsigned int srcstride:16; + unsigned int dststride:16; + unsigned int srcwidthoffset:16; + unsigned int dstwidthoffset:16; +} hexagon_udma_descriptor_type1_t; + +#endif /* !HEXAGON_TYPES_H */ diff --git a/clang/lib/Headers/hvx_hexagon_protos.h b/clang/lib/Headers/hvx_hexagon_protos.h new file mode 100644 index 0000000000000..41ce7a6b93e93 --- /dev/null +++ b/clang/lib/Headers/hvx_hexagon_protos.h @@ -0,0 +1,4392 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Automatically generated file, do not edit! +//===----------------------------------------------------------------------===// + + + +#ifndef _HVX_HEXAGON_PROTOS_H_ +#define _HVX_HEXAGON_PROTOS_H_ 1 + +#ifdef __HVX__ +#if __HVX_LENGTH__ == 128 +#define __BUILTIN_VECTOR_WRAP(a) a ## _128B +#else +#define __BUILTIN_VECTOR_WRAP(a) a +#endif + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Rd32=vextract(Vu32,Rs32) + C Intrinsic Prototype: Word32 Q6_R_vextract_VR(HVX_Vector Vu, Word32 Rs) + Instruction Type: LD + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_R_vextract_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=hi(Vss32) + C Intrinsic Prototype: HVX_Vector Q6_V_hi_W(HVX_VectorPair Vss) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_hi_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=lo(Vss32) + C Intrinsic Prototype: HVX_Vector Q6_V_lo_W(HVX_VectorPair Vss) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_lo_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vsplat(Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vsplat_R(Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=and(Qs4,Qt4) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_and_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=and(Qs4,!Qt4) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_and_QQn __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=not(Qs4) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_not_Q(HVX_VectorPred Qs) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_not_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=or(Qs4,Qt4) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_or_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=or(Qs4,!Qt4) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_or_QQn __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vsetq(Rt32) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq_R(Word32 Rt) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vsetq_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=xor(Qs4,Qt4) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_xor_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_xor_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (!Qv4) vmem(Rt32+#s4)=Vs32 + C Intrinsic Prototype: void Q6_vmem_QnRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) + Instruction Type: CVI_VM_ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vmem_QnRIV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (!Qv4) vmem(Rt32+#s4):nt=Vs32 + C Intrinsic Prototype: void Q6_vmem_QnRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) + Instruction Type: CVI_VM_ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vmem_QnRIV_nt __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (Qv4) vmem(Rt32+#s4):nt=Vs32 + C Intrinsic Prototype: void Q6_vmem_QRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) + Instruction Type: CVI_VM_ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vmem_QRIV_nt __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (Qv4) vmem(Rt32+#s4)=Vs32 + C Intrinsic Prototype: void Q6_vmem_QRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs) + Instruction Type: CVI_VM_ST + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vmem_QRIV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vabsdiff(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuh_vabsdiff_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vabsdiff(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vabsdiff_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vub_vabsdiff_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vabsdiff(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuh_vabsdiff_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vabsdiff(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vabsdiff_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuw_vabsdiff_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vabs(Vu32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh(HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vabs_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vabs(Vu32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh_sat(HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vabs_Vh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vabs(Vu32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw(HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vabs_Vw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vabs(Vu32.w):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw_sat(HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vabs_Vw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vadd(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vadd_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.b=vadd(Vuu32.b,Vvv32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wb_vadd_WbWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (!Qv4) Vx32.b+=Vu32.b + C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_condacc_QnVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (Qv4) Vx32.b+=Vu32.b + C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_condacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vadd(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vadd_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vadd(Vuu32.h,Vvv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vadd_WhWh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (!Qv4) Vx32.h+=Vu32.h + C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_condacc_QnVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (Qv4) Vx32.h+=Vu32.h + C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_condacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vadd(Vu32.h,Vv32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vadd_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vadd(Vuu32.h,Vvv32.h):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vadd_WhWh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vadd(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vadd_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vadd(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vadd_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vadd(Vu32.ub,Vv32.ub):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vadd_VubVub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.ub=vadd(Vuu32.ub,Vvv32.ub):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vadd_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wub_vadd_WubWub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vadd(Vu32.uh,Vv32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vadd_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vadd_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uh=vadd(Vuu32.uh,Vvv32.uh):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vadd_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wuh_vadd_WuhWuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vadd(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vadd_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vadd_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vadd(Vuu32.w,Vvv32.w) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Ww_vadd_WwWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (!Qv4) Vx32.w+=Vu32.w + C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_condacc_QnVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (Qv4) Vx32.w+=Vu32.w + C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_condacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vadd_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vadd(Vuu32.w,Vvv32.w):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Ww_vadd_WwWw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=valign(Vu32,Vv32,Rt8) + C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_valign_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=valign(Vu32,Vv32,#u3) + C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_valign_VVI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vand(Vu32,Vv32) + C Intrinsic Prototype: HVX_Vector Q6_V_vand_VV(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vand_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vand(Qu4,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vand_QR(HVX_VectorPred Qu, Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vand_QR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32|=vand(Qu4,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vandor_VQR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vand(Vu32,Rt32) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vand_VR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Q_vand_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vand(Vu32,Rt32) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vandor_QVR(HVX_VectorPred Qx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Q_vandor_QVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vasl(Vu32.h,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vasl_VhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vasl(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vasl_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vasl(Vu32.w,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vasl_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vasl(Vu32.w,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vaslacc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vaslacc_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vasl(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vasl_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vasr(Vu32.h,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vasr_VhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vasr_VhVhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vasr_VhVhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vasr_VhVhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vasr(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vasr_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vasr(Vu32.w,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vasr_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vasr(Vu32.w,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vasracc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vasracc_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vasr_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vasr_VwVwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vasr_VwVwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vasr_VwVwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vasr(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vasr_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=Vu32 + C Intrinsic Prototype: HVX_Vector Q6_V_equals_V(HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_equals_V __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32=Vuu32 + C Intrinsic Prototype: HVX_VectorPair Q6_W_equals_W(HVX_VectorPair Vuu) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_W_equals_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vavg(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vavg_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vavg(Vu32.h,Vv32.h):rnd + C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh_rnd(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vavg_VhVh_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vavg(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vavg_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vavg(Vu32.ub,Vv32.ub):rnd + C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub_rnd(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vavg_VubVub_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vavg(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vavg_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vavg(Vu32.uh,Vv32.uh):rnd + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh_rnd(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vavg_VuhVuh_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vavg(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vavg_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vavg(Vu32.w,Vv32.w):rnd + C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw_rnd(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vavg_VwVw_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vcl0(Vu32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcl0_Vuh(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vcl0_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vcl0(Vu32.uw) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vcl0_Vuw(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuw_vcl0_Vuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32=vcombine(Vu32,Vv32) + C Intrinsic Prototype: HVX_VectorPair Q6_W_vcombine_VV(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_W_vcombine_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=#0 + C Intrinsic Prototype: HVX_Vector Q6_V_vzero() + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vzero __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vdeal(Vu32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeal_Vb(HVX_Vector Vu) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vdeal_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vdeale(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeale_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vdeale_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vdeal(Vu32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vdeal_Vh(HVX_Vector Vu) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vdeal_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32=vdeal(Vu32,Vv32,Rt8) + C Intrinsic Prototype: HVX_VectorPair Q6_W_vdeal_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_W_vdeal_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vdelta(Vu32,Vv32) + C Intrinsic Prototype: HVX_Vector Q6_V_vdelta_VV(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vdelta_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vdmpy(Vu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpy_VubRb(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vdmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.h+=vdmpy(Vu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpyacc_VhVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vdmpyacc_VhVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vdmpy(Vuu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vdmpy_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vdmpyacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRb(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpy_VhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpyacc_VwVhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vdmpy(Vuu32.h,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vdmpy_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vdmpy(Vuu32.h,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vdmpyacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRh_sat(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpy_WhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpyacc_VwWhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRh_sat(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpy_VhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpyacc_VwVhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRuh_sat(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpy_WhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRuh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpyacc_VwWhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRuh_sat(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpy_VhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpyacc_VwVhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Vv32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpy_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhVh_sat(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vdmpyacc_VwVhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsad_WuhRuh(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vdsad_WuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsadacc_WuwWuhRuh(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vdsadacc_WuwWuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.eq(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eq_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.eq(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqand_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.eq(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqor_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.eq(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqxacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.eq(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eq_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.eq(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqand_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.eq(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqor_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.eq(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqxacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.eq(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eq_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.eq(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqand_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.eq(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqor_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.eq(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_eqxacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.uw,Vv32.uw) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.uw,Vv32.uw) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.uw,Vv32.uw) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.uw,Vv32.uw) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w=vinsert(Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vinsert_VwR(HVX_Vector Vx, Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vinsert_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vlalign(Vu32,Vv32,Rt8) + C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vlalign_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vlalign(Vu32,Vv32,#u3) + C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vlalign_VVI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vlsr(Vu32.uh,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vlsr_VuhR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vlsr_VuhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vlsr(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vlsr_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vlsr_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vlsr(Vu32.uw,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vlsr_VuwR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuw_vlsr_VuwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vlsr(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vlsr_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vlsr_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vlut32_VbVbR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbR(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vlut32or_VbVbVbR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vlut16_VbVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhR(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vlut16or_WhVbVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vmax(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmax_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vmax_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vmax(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vmax_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vmax_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vmax(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmax_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vmax_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmax(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmax_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vmax_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vmin(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmin_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vmin_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vmin(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vmin_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vmin_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vmin(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmin_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vmin_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmin(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmin_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vmin_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRb(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpa_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vmpa(Vuu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpaacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Vvv32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpa_WubWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWub(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpa_WubWub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vmpa(Vuu32.h,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WhRb(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpa_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vmpa(Vuu32.h,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpaacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vmpy(Vu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubRb(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vmpy(Vu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubRb(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpyacc_WhVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vmpy(Vu32.ub,Vv32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpy_VubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vmpy(Vu32.ub,Vv32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpyacc_WhVubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vmpy(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpy_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vmpy(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVbVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpyacc_WhVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpye(Vu32.w,Vv32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpye_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Rt32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhRh(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpy_VhRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh_sat(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpyacc_WwVhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_rnd_sat(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vmpy_VhRh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_sat(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vmpy_VhRh_s1_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpy_VhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpyacc_WwVhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vmpy(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpy_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpyacc_WwVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vmpy_VhVh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpyieo(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieo_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyieo_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vmpyie(Vu32.w,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyieacc_VwVwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpyie(Vu32.w,Vv32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyie_VwVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyie_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vmpyie(Vu32.w,Vv32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVuh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyieacc_VwVwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vmpyi(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vmpyi_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.h+=vmpyi(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vmpyiacc_VhVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vmpyi(Vu32.h,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhRb(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vmpyi_VhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.h+=vmpyi(Vu32.h,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vmpyiacc_VhVhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpyio(Vu32.w,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyio_VwVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyio_VwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRb(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyi_VwRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyiacc_VwVwRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRh(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyi_VwRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyiacc_VwVwRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyo_VwVh_s1_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyo_VwVh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uh=vmpy(Vu32.ub,Rt32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubRub(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuh_vmpy_VubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubRub(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuh_vmpyacc_WuhVubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uh=vmpy(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuh_vmpy_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuh_vmpyacc_WuhVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vmpy(Vu32.uh,Rt32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhRuh(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vmpy_VuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhRuh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vmpyacc_WuwVuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vmpy(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vmpy_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vmpyacc_WuwVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vmux(Qt4,Vu32,Vv32) + C Intrinsic Prototype: HVX_Vector Q6_V_vmux_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vmux_QVV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vnavg(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vnavg_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vnavg_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vnavg(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vnavg_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vnavg(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vnavg_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vnavg_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vnormamt(Vu32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vnormamt_Vh(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vnormamt_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vnormamt(Vu32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vnormamt_Vw(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vnormamt_Vw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vnot(Vu32) + C Intrinsic Prototype: HVX_Vector Q6_V_vnot_V(HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vnot_V __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vor(Vu32,Vv32) + C Intrinsic Prototype: HVX_Vector Q6_V_vor_VV(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vor_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vpacke(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacke_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vpacke_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vpacke(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacke_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vpacke_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vpack(Vu32.h,Vv32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vb_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vpack_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vpack(Vu32.h,Vv32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vpack_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vpacko(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacko_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vpacko_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vpacko(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacko_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vpacko_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vpack(Vu32.w,Vv32.w):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vpack_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vpack(Vu32.w,Vv32.w):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vpack_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vpopcount(Vu32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vpopcount_Vh(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vpopcount_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vrdelta(Vu32,Vv32) + C Intrinsic Prototype: HVX_Vector Q6_V_vrdelta_VV(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vrdelta_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vrmpy(Vu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubRb(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vrmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vrmpyacc_VwVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpy_WubRbI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vrmpy_WubRbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpyacc_WwWubRbI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vrmpyacc_WwWubRbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vrmpy(Vu32.ub,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vrmpy_VubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vrmpyacc_VwVubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vrmpy(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vrmpy_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vrmpy(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVbVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vrmpyacc_VwVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vrmpy(Vu32.ub,Rt32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubRub(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuw_vrmpy_VubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuw_vrmpyacc_VuwVubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpy_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vrmpy_WubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpyacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vrmpyacc_WuwWubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vrmpy(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuw_vrmpy_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubVub(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuw_vrmpyacc_VuwVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vror(Vu32,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vror_VR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vror_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vround(Vu32.h,Vv32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vb_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vround_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vround(Vu32.h,Vv32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vround_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vround(Vu32.w,Vv32.w):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vround_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vround(Vu32.w,Vv32.w):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vround_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsad_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vrsad_WubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsadacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wuw_vrsadacc_WuwWubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vsat(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vsat_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vsat_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vsat(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vsat_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vsat_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vsxt(Vu32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsxt_Vb(HVX_Vector Vu) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vsxt_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vsxt(Vu32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsxt_Vh(HVX_Vector Vu) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Ww_vsxt_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vshuffe(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffe_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vshuffe_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vshuff(Vu32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuff_Vb(HVX_Vector Vu) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vshuff_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vshuffe(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffe_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vshuffe_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vshuff(Vu32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuff_Vh(HVX_Vector Vu) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vshuff_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vshuffo(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffo_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vshuffo_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32=vshuff(Vu32,Vv32,Rt8) + C Intrinsic Prototype: HVX_VectorPair Q6_W_vshuff_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_W_vshuff_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.b=vshuffoe(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vshuffoe_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wb_vshuffoe_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vshuffoe(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vshuffoe_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vshuffoe_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vshuffo(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffo_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vshuffo_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.b=vsub(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vsub_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.b=vsub(Vuu32.b,Vvv32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wb_vsub_WbWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (!Qv4) Vx32.b-=Vu32.b + C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_condnac_QnVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (Qv4) Vx32.b-=Vu32.b + C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_condnac_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vsub(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vsub_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vsub(Vuu32.h,Vvv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vsub_WhWh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (!Qv4) Vx32.h-=Vu32.h + C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_condnac_QnVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (Qv4) Vx32.h-=Vu32.h + C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_condnac_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.h=vsub(Vu32.h,Vv32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vsub_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vsub(Vuu32.h,Vvv32.h):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vsub_WhWh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vsub(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vsub_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vsub(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_VubVub(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vsub_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vsub(Vu32.ub,Vv32.ub):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vsub_VubVub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.ub=vsub(Vuu32.ub,Vvv32.ub):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vsub_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wub_vsub_WubWub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vsub(Vu32.uh,Vv32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsub_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vsub_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uh=vsub(Vuu32.uh,Vvv32.uh):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vsub_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wuh_vsub_WuhWuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vsub(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VuhVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vsub_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vsub_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vsub(Vuu32.w,Vvv32.w) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Ww_vsub_WwWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (!Qv4) Vx32.w-=Vu32.w + C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_condnac_QnVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: if (Qv4) Vx32.w-=Vu32.w + C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_condnac_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w):sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vsub_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vsub(Vuu32.w,Vvv32.w):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Ww_vsub_WwWw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32=vswap(Qt4,Vu32,Vv32) + C Intrinsic Prototype: HVX_VectorPair Q6_W_vswap_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_W_vswap_QVV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vtmpy(Vuu32.b,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WbRb(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vtmpy_WbRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vtmpy(Vuu32.b,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWbRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vtmpyacc_WhWbRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vtmpy(Vuu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vtmpy_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vtmpyacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vtmpy(Vuu32.h,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vtmpy_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vtmpy(Vuu32.h,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vtmpyacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vunpack(Vu32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpack_Vb(HVX_Vector Vu) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vunpack_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vunpack(Vu32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpack_Vh(HVX_Vector Vu) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Ww_vunpack_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.h|=vunpacko(Vu32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpackoor_WhVb(HVX_VectorPair Vxx, HVX_Vector Vu) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vunpackoor_WhVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vxx32.w|=vunpacko(Vu32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpackoor_WwVh(HVX_VectorPair Vxx, HVX_Vector Vu) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Ww_vunpackoor_WwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uh=vunpack(Vu32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vunpack_Vub(HVX_Vector Vu) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wuh_vunpack_Vub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vunpack(Vu32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vunpack_Vuh(HVX_Vector Vu) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wuw_vunpack_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vd32=vxor(Vu32,Vv32) + C Intrinsic Prototype: HVX_Vector Q6_V_vxor_VV(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vxor_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uh=vzxt(Vu32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vzxt_Vub(HVX_Vector Vu) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wuh_vzxt_Vub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 60 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vzxt(Vu32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vzxt_Vuh(HVX_Vector Vu) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wuw_vzxt_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh) +#endif /* __HEXAGON_ARCH___ >= 60 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.b=vsplat(Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vsplat_R(Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vb_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.h=vsplat(Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vsplat_R(Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Qd4=vsetq2(Rt32) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq2_R(Word32 Rt) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vsetq2_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Qd4.b=vshuffe(Qs4.h,Qt4.h) + C Intrinsic Prototype: HVX_VectorPred Q6_Qb_vshuffe_QhQh(HVX_VectorPred Qs, HVX_VectorPred Qt) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Qb_vshuffe_QhQh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Qd4.h=vshuffe(Qs4.w,Qt4.w) + C Intrinsic Prototype: HVX_VectorPred Q6_Qh_vshuffe_QwQw(HVX_VectorPred Qs, HVX_VectorPred Qt) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Qh_vshuffe_QwQw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.b=vadd(Vu32.b,Vv32.b):sat + C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vadd_VbVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vdd32.b=vadd(Vuu32.b,Vvv32.b):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wb_vadd_WbWb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w,Qx4):carry + C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVwQ_carry(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred* Qx) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vadd_VwVwQ_carry __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.h=vadd(vclb(Vu32.h),Vv32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_vclb_VhVh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vadd_vclb_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.w=vadd(vclb(Vu32.w),Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_vclb_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vadd_vclb_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vadd(Vu32.h,Vv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vaddacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vaddacc_WwVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vadd(Vu32.ub,Vv32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vaddacc_WhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vaddacc_WhVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vadd(Vu32.ub,Vv32.b):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVb_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vadd_VubVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vadd(Vu32.uh,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vaddacc_WwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vaddacc_WwVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vadd(Vu32.uw,Vv32.uw):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vadd_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuw_vadd_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vadd(Vuu32.uw,Vvv32.uw):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vadd_WuwWuw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wuw_vadd_WuwWuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32=vand(!Qu4,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vand_QnR(HVX_VectorPred Qu, Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vand_QnR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vx32|=vand(!Qu4,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQnR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vandor_VQnR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32=vand(!Qv4,Vu32) + C Intrinsic Prototype: HVX_Vector Q6_V_vand_QnV(HVX_VectorPred Qv, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vand_QnV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32=vand(Qv4,Vu32) + C Intrinsic Prototype: HVX_Vector Q6_V_vand_QV(HVX_VectorPred Qv, HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vand_QV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):sat + C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vasr_VhVhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VuwVuwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vasr_VuwVuwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vasr_VwVwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vlsr(Vu32.ub,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vlsr_VubR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vlsr_VubR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8):nomatch + C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR_nomatch(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vlut32_VbVbR_nomatch __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vx32.b|=vlut32(Vu32.b,Vv32.b,#u3) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbI(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vlut32or_VbVbVbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.b=vlut32(Vu32.b,Vv32.b,#u3) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) + Instruction Type: CVI_VP + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vlut32_VbVbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8):nomatch + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR_nomatch(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vlut16_VbVhR_nomatch __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vxx32.h|=vlut16(Vu32.b,Vv32.h,#u3) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhI(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vlut16or_WhVbVhI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vlut16(Vu32.b,Vv32.h,#u3) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wh_vlut16_VbVhI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.b=vmax(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vmax_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vmax_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.b=vmin(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vmin_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vmin_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vdd32.w=vmpa(Vuu32.uh,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WuhRb(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpa_WuhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vmpa(Vuu32.uh,Rt32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWuhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpaacc_WwWuhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vdd32=vmpye(Vu32.w,Vv32.uh) + C Intrinsic Prototype: HVX_VectorPair Q6_W_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_W_vmpye_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.w=vmpyi(Vu32.w,Rt32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRub(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyi_VwRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vx32.w+=vmpyi(Vu32.w,Rt32.ub) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vmpyiacc_VwVwRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vxx32+=vmpyo(Vu32.w,Vv32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_W_vmpyoacc_WVwVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_W_vmpyoacc_WVwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vround(Vu32.uh,Vv32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vround_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vround(Vu32.uw,Vv32.uw):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vround_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vsat(Vu32.uw,Vv32.uw) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsat_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vsat_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.b=vsub(Vu32.b,Vv32.b):sat + C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vsub_VbVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vdd32.b=vsub(Vuu32.b,Vvv32.b):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wb_vsub_WbWb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.w=vsub(Vu32.w,Vv32.w,Qx4):carry + C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVwQ_carry(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred* Qx) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vsub_VwVwQ_carry __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vsub(Vu32.ub,Vv32.b):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVb_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vsub_VubVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vsub(Vu32.uw,Vv32.uw):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vsub_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuw_vsub_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 62 +/* ========================================================================== + Assembly Syntax: Vdd32.uw=vsub(Vuu32.uw,Vvv32.uw):sat + C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vsub_WuwWuw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv) + Instruction Type: CVI_VA_DV + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Wuw_vsub_WuwWuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv) +#endif /* __HEXAGON_ARCH___ >= 62 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.b=vabs(Vu32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vabs_Vb(HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vabs_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.b=vabs(Vu32.b):sat + C Intrinsic Prototype: HVX_Vector Q6_Vb_vabs_Vb_sat(HVX_Vector Vu) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vabs_Vb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vx32.h+=vasl(Vu32.h,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vaslacc_VhVhR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vaslacc_VhVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vx32.h+=vasr(Vu32.h,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vasracc_VhVhR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_vasracc_VhVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VuhVuhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vasr_VuhVuhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VuhVuhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vasr_VuhVuhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VuwVuwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vasr_VuwVuwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.b=vavg(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vavg_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vavg_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.b=vavg(Vu32.b,Vv32.b):rnd + C Intrinsic Prototype: HVX_Vector Q6_Vb_vavg_VbVb_rnd(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vavg_VbVb_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vavg(Vu32.uw,Vv32.uw) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vavg_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuw_vavg_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vavg(Vu32.uw,Vv32.uw):rnd + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vavg_VuwVuw_rnd(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuw_vavg_VuwVuw_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vdd32=#0 + C Intrinsic Prototype: HVX_VectorPair Q6_W_vzero() + Instruction Type: MAPPING + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_W_vzero __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vtmp.h=vgather(Rt32,Mu2,Vv32.h).h + C Intrinsic Prototype: void Q6_vgather_ARMVh(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_Vector Vv) + Instruction Type: CVI_GATHER + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_vgather_ARMVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vv32.h).h + C Intrinsic Prototype: void Q6_vgather_AQRMVh(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv) + Instruction Type: CVI_GATHER + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_vgather_AQRMVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h + C Intrinsic Prototype: void Q6_vgather_ARMWw(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv) + Instruction Type: CVI_GATHER_DV + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_vgather_ARMWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h + C Intrinsic Prototype: void Q6_vgather_AQRMWw(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv) + Instruction Type: CVI_GATHER_DV + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_vgather_AQRMWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vtmp.w=vgather(Rt32,Mu2,Vv32.w).w + C Intrinsic Prototype: void Q6_vgather_ARMVw(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_Vector Vv) + Instruction Type: CVI_GATHER + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_vgather_ARMVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: if (Qs4) vtmp.w=vgather(Rt32,Mu2,Vv32.w).w + C Intrinsic Prototype: void Q6_vgather_AQRMVw(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv) + Instruction Type: CVI_GATHER + Execution Slots: SLOT01 + ========================================================================== */ + +#define Q6_vgather_AQRMVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.h=vlut4(Vu32.uh,Rtt32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vlut4_VuhPh(HVX_Vector Vu, Word64 Rtt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT2 + ========================================================================== */ + +#define Q6_Vh_vlut4_VuhPh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vdd32.h=vmpa(Vuu32.ub,Rt32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRub(HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpa_WubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vxx32.h+=vmpa(Vuu32.ub,Rt32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRub(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wh_vmpaacc_WhWubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vx32.h=vmpa(Vx32.h,Vu32.h,Rtt32.h):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpa_VhVhVhPh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT2 + ========================================================================== */ + +#define Q6_Vh_vmpa_VhVhVhPh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vx32.h=vmpa(Vx32.h,Vu32.uh,Rtt32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpa_VhVhVuhPuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT2 + ========================================================================== */ + +#define Q6_Vh_vmpa_VhVhVuhPuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vx32.h=vmps(Vx32.h,Vu32.uh,Rtt32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vh_vmps_VhVhVuhPuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT2 + ========================================================================== */ + +#define Q6_Vh_vmps_VhVhVuhPuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=vmpy(Vu32.h,Rt32.h) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_vmpyacc_WwVhRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vmpye(Vu32.uh,Rt32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vmpye_VuhRuh(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuw_vmpye_VuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vx32.uw+=vmpye(Vu32.uh,Rt32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vmpyeacc_VuwVuhRuh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuw_vmpyeacc_VuwVuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.b=vnavg(Vu32.b,Vv32.b) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VbVb(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_vnavg_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.b=prefixsum(Qv4) + C Intrinsic Prototype: HVX_Vector Q6_Vb_prefixsum_Q(HVX_VectorPred Qv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vb_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.h=prefixsum(Qv4) + C Intrinsic Prototype: HVX_Vector Q6_Vh_prefixsum_Q(HVX_VectorPred Qv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: Vd32.w=prefixsum(Qv4) + C Intrinsic Prototype: HVX_Vector Q6_Vw_prefixsum_Q(HVX_VectorPred Qv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vscatter(Rt32,Mu2,Vv32.h).h=Vw32 + C Intrinsic Prototype: void Q6_vscatter_RMVhV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatter_RMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vscatter(Rt32,Mu2,Vv32.h).h+=Vw32 + C Intrinsic Prototype: void Q6_vscatteracc_RMVhV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatteracc_RMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vv32.h).h=Vw32 + C Intrinsic Prototype: void Q6_vscatter_QRMVhV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatter_QRMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vscatter(Rt32,Mu2,Vvv32.w).h=Vw32 + C Intrinsic Prototype: void Q6_vscatter_RMWwV(Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER_DV + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatter_RMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32 + C Intrinsic Prototype: void Q6_vscatteracc_RMWwV(Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER_DV + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatteracc_RMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32 + C Intrinsic Prototype: void Q6_vscatter_QRMWwV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER_DV + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatter_QRMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vscatter(Rt32,Mu2,Vv32.w).w=Vw32 + C Intrinsic Prototype: void Q6_vscatter_RMVwV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatter_RMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: vscatter(Rt32,Mu2,Vv32.w).w+=Vw32 + C Intrinsic Prototype: void Q6_vscatteracc_RMVwV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatteracc_RMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 65 +/* ========================================================================== + Assembly Syntax: if (Qs4) vscatter(Rt32,Mu2,Vv32.w).w=Vw32 + C Intrinsic Prototype: void Q6_vscatter_QRMVwV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw) + Instruction Type: CVI_SCATTER + Execution Slots: SLOT0 + ========================================================================== */ + +#define Q6_vscatter_QRMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq) +#endif /* __HEXAGON_ARCH___ >= 65 */ + +#if __HVX_ARCH__ >= 66 +/* ========================================================================== + Assembly Syntax: Vd32.w=vadd(Vu32.w,Vv32.w,Qs4):carry:sat + C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVwQ_carry_sat(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred Qs) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vadd_VwVwQ_carry_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat) +#endif /* __HEXAGON_ARCH___ >= 66 */ + +#if __HVX_ARCH__ >= 66 +/* ========================================================================== + Assembly Syntax: Vxx32.w=vasrinto(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vasrinto_WwVwVw(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VP_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Ww_vasrinto_WwVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into) +#endif /* __HEXAGON_ARCH___ >= 66 */ + +#if __HVX_ARCH__ >= 66 +/* ========================================================================== + Assembly Syntax: Vd32.uw=vrotr(Vu32.uw,Vv32.uw) + C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrotr_VuwVuw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuw_vrotr_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr) +#endif /* __HEXAGON_ARCH___ >= 66 */ + +#if __HVX_ARCH__ >= 66 +/* ========================================================================== + Assembly Syntax: Vd32.w=vsatdw(Vu32.w,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vsatdw_VwVw(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_vsatdw_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw) +#endif /* __HEXAGON_ARCH___ >= 66 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpy_WubWbI_h(HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_v6mpy_WubWbI_h __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpyacc_WwWubWbI_h(HVX_VectorPair Vxx, HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_v6mpyacc_WwWubWbI_h __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpy_WubWbI_v(HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_v6mpy_WubWbI_v __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v + C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpyacc_WwWubWbI_v(HVX_VectorPair Vxx, HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Ww_v6mpyacc_WwWubWbI_v __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#endif /* __HVX__ */ + +#endif diff --git a/clang/test/Headers/hexagon-audio-headers.c b/clang/test/Headers/hexagon-audio-headers.c new file mode 100644 index 0000000000000..d7ebda3fbc44e --- /dev/null +++ b/clang/test/Headers/hexagon-audio-headers.c @@ -0,0 +1,36 @@ +// REQUIRES: hexagon-registered-target + +// RUN: %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv67t -triple hexagon-unknown-elf \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +// RUN: %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv67t -triple hexagon-unknown-elf -x c++ \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +// RUN: not %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv68 -triple hexagon-unknown-elf -x c++ \ +// RUN: -fsyntax-only %s 2>&1 | FileCheck --implicit-check-not='error:' \ +// RUN: --check-prefix=CHECK-ERR-CXX %s + +// RUN: not %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv68 -triple hexagon-unknown-elf -std=c99 \ +// RUN: -Wimplicit-function-declaration -Werror -fsyntax-only %s 2>&1 | \ +// RUN: FileCheck --implicit-check-not='error:' --check-prefix=CHECK-ERR-C99 %s + +#include + +void test_audio() { + unsigned int b; + unsigned long long c; + + // CHECK-ERR-CXX: error: use of undeclared identifier 'Q6_R_clip_RI' + // CHECK-ERR-C99: error: implicit declaration of function 'Q6_R_clip_RI' is invalid in C99 + // CHECK: call i32 @llvm.hexagon.A7.clip + b = Q6_R_clip_RI(b, 9); + + // CHECK-ERR-CXX: error: use of undeclared identifier 'Q6_P_cround_PI' + // CHECK-ERR-C99: error: implicit declaration of function 'Q6_P_cround_PI' is invalid in C99 + // CHECK: call i64 @llvm.hexagon.A7.cround + c = Q6_P_cround_PI(c, 12); +} diff --git a/clang/test/Headers/hexagon-headers.c b/clang/test/Headers/hexagon-headers.c new file mode 100644 index 0000000000000..529ffce0658c5 --- /dev/null +++ b/clang/test/Headers/hexagon-headers.c @@ -0,0 +1,28 @@ +// REQUIRES: hexagon-registered-target + +// RUN: %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv68 -triple hexagon-unknown-elf \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +// RUN: %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv68 -triple hexagon-unknown-elf -x c++ \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +#include + +// expected-no-diagnostics + +void test_protos(float a, unsigned int b) { + unsigned char c; + // CHECK: call i64 @llvm.hexagon.A2.absp + b = Q6_P_abs_P(b); +} + +void test_dma() { + unsigned int b; + + // CHECK: call i32 @llvm.hexagon.Y6.dmpoll + b = Q6_R_dmpoll(); + // CHECK: call i32 @llvm.hexagon.Y6.dmpause + b = Q6_R_dmpause(); +} diff --git a/clang/test/Headers/hexagon-hvx-headers.c b/clang/test/Headers/hexagon-hvx-headers.c new file mode 100644 index 0000000000000..afea9a6bee298 --- /dev/null +++ b/clang/test/Headers/hexagon-hvx-headers.c @@ -0,0 +1,37 @@ +// REQUIRES: hexagon-registered-target + +// RUN: %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv68 -triple hexagon-unknown-elf \ +// RUN: -target-feature +hvx-length128b -target-feature +hvxv68 \ +// RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK %s + +// RUN: %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv68 -triple hexagon-unknown-elf -DDIRECT \ +// RUN: -target-feature +hvx-length128b -target-feature +hvxv68 \ +// RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK %s + +// RUN: %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv68 -triple hexagon-unknown-elf -x c++ \ +// RUN: -target-feature +hvx-length128b -target-feature +hvxv68 \ +// RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK %s + +// RUN: %clang_cc1 -O0 -internal-isystem %S/../../lib/Headers/ \ +// RUN: -target-cpu hexagonv68 -triple hexagon-unknown-elf \ +// RUN: -target-feature +hvx-length64b -target-feature +hvxv68 \ +// RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-64 %s + +#ifdef DIRECT +#include +#else +#include +#endif +#include + +// expected-no-diagnostics + +void test_hvx_protos(float a, unsigned int b) { + HVX_VectorPair c; + // CHECK-64: call <32 x i32> @llvm.hexagon.V6.v6mpyhubs10 + // CHECK: call <64 x i32> @llvm.hexagon.V6.v6mpyhubs10.128B + c = Q6_Ww_v6mpy_WubWbI_h(c, c, 12); +} From 9184090c967c08aa78a87665e0cde20177cfc9ae Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 1 Jul 2021 04:00:13 +0000 Subject: [PATCH 379/619] [gn build] Port 28b01c59c93d --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 7c127c3a38f95..bac8baeea1010 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -145,9 +145,13 @@ copy("Headers") { "fmaintrin.h", "fxsrintrin.h", "gfniintrin.h", + "hexagon_circ_brev_intrinsics.h", + "hexagon_protos.h", + "hexagon_types.h", "hresetintrin.h", "htmintrin.h", "htmxlintrin.h", + "hvx_hexagon_protos.h", "ia32intrin.h", "immintrin.h", "intrin.h", From 96f15aa5bbb0c6b2a56fa5fcbbd58f5b69fe4076 Mon Sep 17 00:00:00 2001 From: Jacob Hegna Date: Wed, 23 Jun 2021 05:22:18 +0000 Subject: [PATCH 380/619] Fail gracefully if no inlining model is available to download. Differential Revision: https://reviews.llvm.org/D104829 --- llvm/cmake/modules/TensorFlowCompile.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/cmake/modules/TensorFlowCompile.cmake b/llvm/cmake/modules/TensorFlowCompile.cmake index 27b9d33a33fe5..6f77174982f22 100644 --- a/llvm/cmake/modules/TensorFlowCompile.cmake +++ b/llvm/cmake/modules/TensorFlowCompile.cmake @@ -88,6 +88,11 @@ endfunction() function(tf_find_and_compile model default_url default_path generation_config tag_set signature_def_key fname cpp_class) if ("${model}" STREQUAL "download") + # Crash if the user wants to download a model but a URL is set to "TO_BE_UPDATED" + if ("${LLVM_INLINER_MODEL_CURRENT_URL}" STREQUAL "TO_BE_UPDATED") + message(FATAL_ERROR "LLVM_INLINER_MODEL_PATH was set to 'download' but there is no model url currently specified in cmake - likely, the model interface recently changed, and so there is not a released model available.") + endif() + set(model ${default_url}) endif() From b9c24257c7b4da398798934ffefdd30015152180 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Thu, 1 Jul 2021 04:26:55 +0000 Subject: [PATCH 381/619] Add list of LWG papers accepted by WG21 during the June 2021 plenary Reviewed By: #libc, Quuxplusone, ldionne Differential Revision: https://reviews.llvm.org/D105103 --- libcxx/docs/Cxx2aStatusPaperStatus.csv | 7 ++++++- libcxx/docs/Cxx2bStatusPaperStatus.csv | 12 +++++++++++- libcxx/docs/FormatIssuePaperStatus.csv | 2 ++ libcxx/docs/RangesIssuePaperStatus.csv | 9 ++++++++- 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/libcxx/docs/Cxx2aStatusPaperStatus.csv b/libcxx/docs/Cxx2aStatusPaperStatus.csv index 90695107ad813..131111befe431 100644 --- a/libcxx/docs/Cxx2aStatusPaperStatus.csv +++ b/libcxx/docs/Cxx2aStatusPaperStatus.csv @@ -194,4 +194,9 @@ "`P2106 `__","LWG","Alternative wording for GB315 and GB316","Prague","* *","" "`P2116 `__","LWG","Remove tuple-like protocol support from fixed-extent span","Prague","|Complete|","11.0" "`P2231 `__","LWG","Missing constexpr in std::optional and std::variant","February 2021","|In progress|","13.0" -"`P2325 `__","LWG","Views should not be required to be default constructible","June Telecon","|In progress|","" +"`P2325 `__","LWG","Views should not be required to be default constructible","June 2021","|In progress|","" +"`P2210R2 `__","LWG",Superior String Splitting,"June 2021","","" +"`P2216R3 `__","LWG",std::format improvements,"June 2021","","" +"`P2281R1 `__","LWG",Clarifying range adaptor objects,"June 2021","","" +"`P2328R1 `__","LWG",join_view should join all views of ranges,"June 2021","","" +"`P2367R0 `__","LWG",Remove misuses of list-initialization from Clause 24,"June 2021","","" \ No newline at end of file diff --git a/libcxx/docs/Cxx2bStatusPaperStatus.csv b/libcxx/docs/Cxx2bStatusPaperStatus.csv index 8905486bb257f..db94ba0099336 100644 --- a/libcxx/docs/Cxx2bStatusPaperStatus.csv +++ b/libcxx/docs/Cxx2bStatusPaperStatus.csv @@ -11,5 +11,15 @@ "`P2212R2 `__","LWG","Relax Requirements for time_point::clock","February 2021","","" "`P2259R1 `__","LWG","Repairing input range adaptors and counted_iterator","February 2021","","" "","","","","","" +"`P0401R6 `__","LWG","Providing size feedback in the Allocator interface","June 2021","", +"`P0448R4 `__","LWG","A strstream replacement using span as buffer","June 2021","","" +"`P1132R8 `__","LWG","out_ptr - a scalable output pointer abstraction","June 2021","","" +"`P1328R1 `__","LWG","Making std::type_info::operator== constexpr","June 2021","","" +"`P1425R4 `__","LWG","Iterators pair constructors for stack and queue","June 2021","","" "`P1518R2 `__","LWG","Stop overconstraining allocators in container deduction guides","June 2021","|Complete|","13.0" -"","","","","","" +"`P1659R3 `__","LWG","starts_with and ends_with","June 2021","","" +"`P1951R1 `__","LWG","Default Arguments for pair Forwarding Constructor","June 2021","","" +"`P1989R2 `__","LWG","Range constructor for std::string_view","June 2021","","" +"`P2136R3 `__","LWG","invoke_r","June 2021","","" +"`P2166R1 `__","LWG","A Proposal to Prohibit std::basic_string and std::basic_string_view construction from nullptr","June 2021","","" +"","","","","","" \ No newline at end of file diff --git a/libcxx/docs/FormatIssuePaperStatus.csv b/libcxx/docs/FormatIssuePaperStatus.csv index 10d8b52e703e1..b42d9c6f842ca 100644 --- a/libcxx/docs/FormatIssuePaperStatus.csv +++ b/libcxx/docs/FormatIssuePaperStatus.csv @@ -22,3 +22,5 @@ Number,Name,Assignee,Patch,Status,First released version `LWG-3270 `_,"Parsing and formatting %j with durations",,,, `LWG-3272 `_,"%I%p should parse/format duration since midnight",,,, `LWG-3332 `_,"Issue in [time.format]",,,, + +`P2216 `_,"std::format improvements",,,, diff --git a/libcxx/docs/RangesIssuePaperStatus.csv b/libcxx/docs/RangesIssuePaperStatus.csv index a6d4cd2166aee..ee4a9aa2667ad 100644 --- a/libcxx/docs/RangesIssuePaperStatus.csv +++ b/libcxx/docs/RangesIssuePaperStatus.csv @@ -22,12 +22,19 @@ `P1994R1 `__,elements_view Needs Its Own sentinel,, `P2091R0 `__,Fixing Issues With Range Access CPOs,, `P2106R0 `__,Range Algorithm Result Types,, + +`P2325R3 `__,Views should not be required to be default constructible ,, +`P2328R1 `__,join_view should join all views of ranges,, +`P2210R2 `__,Superior String Splitting,, +`P2281R1 `__,Clarifying range adaptor objects,, +`P2367R0 `__,Remove misuses of list-initialization from Clause 24,, + `LWG3169 `__, ranges permutation generators discard useful information,, `LWG3173 `__, Enable CTAD for ref-view,, `LWG3179 `__, subrange should always model Range,, `LWG3180 `__, Inconsistently named return type for ranges::minmax_element,, `LWG3183 `__, Normative permission to specialize Ranges variable templates,, -`LWG3186 `__," ranges removal, partition, and partial_sort_copy algorithms discard useful information",, +`LWG3186 `__, "ranges removal, partition, and partial_sort_copy algorithms discard useful information",, `LWG3191 `__, std::ranges::shuffle synopsis does not match algorithm definition,, `LWG3276 `__, Class split_view::outer_iterator::value_type should inherit from view_interface,, `LWG3280 `__, View converting constructors can cause constraint recursion and are unneeded,, From 07f0faed1156e6ffe6b7df240ef422bd3adb9812 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Thu, 1 Jul 2021 14:31:47 +0800 Subject: [PATCH 382/619] [NFC][Scheduler] Refactor tryCandidate to return boolean This patch changes return type of tryCandidate from void to bool: 1. Methods in some targets already follow this convention. 2. This would help if some target wants to re-use generic code. 3. It looks more intuitive if these try-method returns the same type. We may need to change return type of them from bool to some enum further, to make it less confusing. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D103951 --- llvm/include/llvm/CodeGen/MachineScheduler.h | 4 +- llvm/lib/CodeGen/MachineScheduler.cpp | 64 +++++++++++-------- .../Target/PowerPC/PPCMachineScheduler.cpp | 52 ++++++++------- llvm/lib/Target/PowerPC/PPCMachineScheduler.h | 5 +- 4 files changed, 69 insertions(+), 56 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index ec4b1522d8eb3..5bd5c8aa757ae 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -1012,7 +1012,7 @@ class GenericScheduler : public GenericSchedulerBase { const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker); - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const; SUnit *pickNodeBidirectional(bool &IsTopNode); @@ -1075,7 +1075,7 @@ class PostGenericScheduler : public GenericSchedulerBase { } protected: - virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); + virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); void pickNodeFromQueue(SchedCandidate &Cand); }; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index dd6e3a2dbb9e9..4f42a2c8aeffa 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2818,6 +2818,8 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { namespace llvm { /// Return true if this heuristic determines order. +/// TODO: Consider refactor return type of these functions as integer or enum, +/// as we may need to differentiate whether TryCand is better than Cand. bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, @@ -3176,34 +3178,35 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, /// \param Cand provides the policy and current best candidate. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. /// \param Zone describes the scheduled zone that we are extending, or nullptr -// if Cand is from a different zone than TryCand. -void GenericScheduler::tryCandidate(SchedCandidate &Cand, +/// if Cand is from a different zone than TryCand. +/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) +bool GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Bias PhysReg Defs and copies to their uses and defined respectively. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; + return TryCand.Reason != NoCand; // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // We only compare a subset of features when comparing nodes between // Top and Bottom boundary. Some properties are simply incomparable, in many @@ -3217,12 +3220,12 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; } // Keep clustered nodes together to encourage downstream peephole @@ -3238,14 +3241,14 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, if (tryGreater(TryCand.SU == TryCandNextClusterSU, Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Weak edges are for clustering and other constraints. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; + return TryCand.Reason != NoCand; } // Avoid increasing the max pressure of the entire region. @@ -3253,31 +3256,34 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, Cand.RPDelta.CurrentMax, TryCand, Cand, RegMax, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { TryCand.Reason = NodeOrder; + return true; } } + + return false; } /// Pick the best candidate from the queue. @@ -3299,8 +3305,7 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; - tryCandidate(Cand, TryCand, ZoneArg); - if (TryCand.Reason != NoCand) { + if (tryCandidate(Cand, TryCand, ZoneArg)) { // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(DAG, SchedModel); @@ -3378,8 +3383,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { assert(TopCand.isValid()); SchedCandidate Cand = BotCand; TopCand.Reason = NoCand; - tryCandidate(Cand, TopCand, nullptr); - if (TopCand.Reason != NoCand) { + if (tryCandidate(Cand, TopCand, nullptr)) { Cand.setBest(TopCand); LLVM_DEBUG(traceCandidate(Cand)); } @@ -3543,42 +3547,47 @@ void PostGenericScheduler::registerRoots() { /// /// \param Cand provides the policy and current best candidate. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, +/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) +bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Top.getLatencyStallCycles(TryCand.SU), Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; // Keep clustered nodes together. if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; + return TryCand.Reason != NoCand; } // Fall through to original instruction order. - if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { TryCand.Reason = NodeOrder; + return true; + } + + return false; } void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { @@ -3588,8 +3597,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { TryCand.SU = SU; TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); - tryCandidate(Cand, TryCand); - if (TryCand.Reason != NoCand) { + if (tryCandidate(Cand, TryCand)) { Cand.setBest(TryCand); LLVM_DEBUG(traceCandidate(Cand)); } diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp index ce615e554d942..03712879f7c49 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -46,7 +46,7 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, return false; } -void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, +bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { // From GenericScheduler::tryCandidate @@ -54,25 +54,25 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Bias PhysReg Defs and copies to their uses and defined respectively. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) - return; + return TryCand.Reason != NoCand; // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; // We only compare a subset of features when comparing nodes between // Top and Bottom boundary. Some properties are simply incomparable, in many @@ -86,12 +86,12 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; } // Keep clustered nodes together to encourage downstream peephole @@ -106,37 +106,37 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); if (tryGreater(TryCand.SU == TryCandNextClusterSU, Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Weak edges are for clustering and other constraints. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; + return TryCand.Reason != NoCand; } // Avoid increasing the max pressure of the entire region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, Cand, RegMax, TRI, DAG->MF)) - return; + return TryCand.Reason != NoCand; if (SameBoundary) { // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; + return TryCand.Reason != NoCand; // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || @@ -150,14 +150,16 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, // Add powerpc specific heuristic only when TryCand isn't selected or // selected as node order. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) - return; + return true; // There are some benefits to schedule the ADDI before the load to hide the // latency, as RA may create a true dependency between the load and addi. if (SameBoundary) { if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) - return; + return TryCand.Reason != NoCand; } + + return TryCand.Reason != NoCand; } bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, @@ -172,38 +174,38 @@ bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, return false; } -void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, +bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { // From PostGenericScheduler::tryCandidate // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Top.getLatencyStallCycles(TryCand.SU), Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return TryCand.Reason != NoCand; // Keep clustered nodes together. if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) - return; + return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return TryCand.Reason != NoCand; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return TryCand.Reason != NoCand; // Avoid serializing long latency dependence chains. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { - return; + return TryCand.Reason != NoCand; } // Fall through to original instruction order. @@ -215,14 +217,16 @@ void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, // Add powerpc post ra specific heuristic only when TryCand isn't selected or // selected as node order. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) - return; + return true; // There are some benefits to schedule the ADDI as early as possible post ra // to avoid stalled by vector instructions which take up all the hw units. // And ADDI is usually used to post inc the loop indvar, which matters the // performance. if (biasAddiCandidate(Cand, TryCand)) - return; + return TryCand.Reason != NoCand; + + return TryCand.Reason != NoCand; } void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) { diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h index a9734ca71859a..27e80c7506a86 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h @@ -23,8 +23,9 @@ class PPCPreRASchedStrategy : public GenericScheduler { PPCPreRASchedStrategy(const MachineSchedContext *C) : GenericScheduler(C) {} protected: - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override; + private: bool biasAddiLoadCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, @@ -43,7 +44,7 @@ class PPCPostRASchedStrategy : public PostGenericScheduler { void enterMBB(MachineBasicBlock *MBB) override; void leaveMBB() override; - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; bool biasAddiCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) const; }; From 333d3a3cdfd44ad67e7a09a078decee3bae56eb2 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 1 Jul 2021 10:09:24 +0300 Subject: [PATCH 383/619] [NFC][PassBuilder] addVectorPasses(): clarify that 'IsLTO' is actually 'IsFullLTO' I.e. it will be `false` for thin lto. --- llvm/include/llvm/Passes/PassBuilder.h | 2 +- .../llvm/Transforms/IPO/PassManagerBuilder.h | 4 ++-- llvm/lib/Passes/PassBuilder.cpp | 16 ++++++++-------- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 16 ++++++++-------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index f8252b9583677..4aabd93dbaffd 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -710,7 +710,7 @@ class PassBuilder { void addRequiredLTOPreLinkPasses(ModulePassManager &MPM); void addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM, - bool IsLTO); + bool IsFullLTO); static Optional> parsePipelineText(StringRef Text); diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index 46d9bee2e10f0..4f941d26df4cf 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -218,8 +218,8 @@ class PassManagerBuilder { void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS); void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); - void addVectorPasses(legacy::PassManagerBase &PM, bool IsLTO); - + void addVectorPasses(legacy::PassManagerBase &PM, bool IsFullLTO); + public: /// populateFunctionPassManager - This fills in the function pass manager, /// which is expected to be run on each function immediately as it is diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 229fd453c8967..4f401fd05e855 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1198,11 +1198,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, /// TODO: Should LTO cause any differences to this set of passes? void PassBuilder::addVectorPasses(OptimizationLevel Level, - FunctionPassManager &FPM, bool IsLTO) { + FunctionPassManager &FPM, bool IsFullLTO) { FPM.addPass(LoopVectorizePass( LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); - if (IsLTO) { + if (IsFullLTO) { // The vectorizer may have significantly shortened a loop body; unroll // again. Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then @@ -1220,7 +1220,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, FPM.addPass(WarnMissedTransformationsPass()); } - if (!IsLTO) { + if (!IsFullLTO) { // Eliminate loads by forwarding stores from the previous iteration to loads // of the current iteration. FPM.addPass(LoopLoadEliminationPass()); @@ -1267,7 +1267,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, .hoistCommonInsts(true) .sinkCommonInsts(true))); - if (IsLTO) { + if (IsFullLTO) { FPM.addPass(SCCPPass()); FPM.addPass(InstCombinePass()); FPM.addPass(BDCEPass()); @@ -1283,7 +1283,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // Enhance/cleanup vector code. FPM.addPass(VectorCombinePass()); - if (!IsLTO) { + if (!IsFullLTO) { FPM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then @@ -1312,7 +1312,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // alignment information, try to re-derive it here. FPM.addPass(AlignmentFromAssumptionsPass()); - if (IsLTO) + if (IsFullLTO) FPM.addPass(InstCombinePass()); } @@ -1410,7 +1410,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // from the TargetLibraryInfo. OptimizePM.addPass(InjectTLIMappings()); - addVectorPasses(Level, OptimizePM, /* IsLTO */ false); + addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); // Split out cold code. Splitting is done late to avoid hiding context from // other optimizations and inadvertently regressing performance. The tradeoff @@ -1862,7 +1862,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MainFPM.addPass(LoopDistributePass()); - addVectorPasses(Level, MainFPM, /* IsLTO */ true); + addVectorPasses(Level, MainFPM, /* IsFullLTO */ true); invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 86fa0181f0002..7f8ba79c99bd6 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -531,10 +531,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses( /// FIXME: Should LTO cause any differences to this set of passes? void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, - bool IsLTO) { + bool IsFullLTO) { PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); - if (IsLTO) { + if (IsFullLTO) { // The vectorizer may have significantly shortened a loop body; unroll // again. Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then @@ -550,7 +550,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, PM.add(createWarnMissedTransformationsPass()); } - if (!IsLTO) { + if (!IsFullLTO) { // Eliminate loads by forwarding stores from the previous iteration to loads // of the current iteration. PM.add(createLoopLoadEliminationPass()); @@ -590,7 +590,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, .hoistCommonInsts(true) .sinkCommonInsts(true))); - if (IsLTO) { + if (IsFullLTO) { PM.add(createSCCPPass()); // Propagate exposed constants PM.add(createInstructionCombiningPass()); // Clean up again PM.add(createBitTrackingDCEPass()); @@ -606,7 +606,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // Enhance/cleanup vector code. PM.add(createVectorCombinePass()); - if (!IsLTO) { + if (!IsFullLTO) { addExtensionsToPM(EP_Peephole, PM); PM.add(createInstructionCombiningPass()); @@ -639,7 +639,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // about pointer alignments. PM.add(createAlignmentFromAssumptionsPass()); - if (IsLTO) + if (IsFullLTO) PM.add(createInstructionCombiningPass()); } @@ -918,7 +918,7 @@ void PassManagerBuilder::populateModulePassManager( // llvm.loop.distribute=true or when -enable-loop-distribute is specified. MPM.add(createLoopDistributePass()); - addVectorPasses(MPM, /* IsLTO */ false); + addVectorPasses(MPM, /* IsFullLTO */ false); // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes @@ -1133,7 +1133,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { ForgetAllSCEVInLoopUnroll)); PM.add(createLoopDistributePass()); - addVectorPasses(PM, /* IsLTO */ true); + addVectorPasses(PM, /* IsFullLTO */ true); addExtensionsToPM(EP_Peephole, PM); From e825c244b6063344ae726600d6a1225a05788dfa Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Thu, 1 Jul 2021 09:14:28 +0200 Subject: [PATCH 384/619] [lldb] Fix Recognizer/assert.test with glibc-2.33.9000-31.fc35.x86_64 While on regular Linux system (Fedora 34 GA, not updated): * thread #1, name = '1', stop reason = hit program assert frame #0: 0x00007ffff7e242a2 libc.so.6`raise + 322 frame #1: 0x00007ffff7e0d8a4 libc.so.6`abort + 278 frame #2: 0x00007ffff7e0d789 libc.so.6`__assert_fail_base.cold + 15 frame #3: 0x00007ffff7e1ca16 libc.so.6`__assert_fail + 70 * frame #4: 0x00000000004011bd 1`main at assert.c:7:3 On Fedora 35 pre-release one gets: * thread #1, name = '1', stop reason = signal SIGABRT * frame #0: 0x00007ffff7e48ee3 libc.so.6`pthread_kill@GLIBC_2.2.5 + 67 frame #1: 0x00007ffff7dfb986 libc.so.6`raise + 22 frame #2: 0x00007ffff7de5806 libc.so.6`abort + 230 frame #3: 0x00007ffff7de571b libc.so.6`__assert_fail_base.cold + 15 frame #4: 0x00007ffff7df4646 libc.so.6`__assert_fail + 70 frame #5: 0x00000000004011bd 1`main at assert.c:7:3 I did not write a testcase as one needs the specific glibc. An artificial test would just copy the changed source. Reviewed By: mib Differential Revision: https://reviews.llvm.org/D105133 --- lldb/source/Target/AssertFrameRecognizer.cpp | 36 ++++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/lldb/source/Target/AssertFrameRecognizer.cpp b/lldb/source/Target/AssertFrameRecognizer.cpp index cb671040d14fa..a2315b6d63c69 100644 --- a/lldb/source/Target/AssertFrameRecognizer.cpp +++ b/lldb/source/Target/AssertFrameRecognizer.cpp @@ -22,6 +22,10 @@ namespace lldb_private { struct SymbolLocation { FileSpec module_spec; std::vector symbols; + + // The symbols are regular expressions. In such case all symbols are matched + // with their trailing @VER symbol version stripped. + bool symbols_are_regex = false; }; /// Fetches the abort frame location depending on the current platform. @@ -45,6 +49,8 @@ bool GetAbortLocation(llvm::Triple::OSType os, SymbolLocation &location) { location.symbols.push_back(ConstString("raise")); location.symbols.push_back(ConstString("__GI_raise")); location.symbols.push_back(ConstString("gsignal")); + location.symbols.push_back(ConstString("pthread_kill")); + location.symbols_are_regex = true; break; default: Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_UNWIND)); @@ -93,9 +99,33 @@ void RegisterAssertFrameRecognizer(Process *process) { if (!GetAbortLocation(os, location)) return; + if (!location.symbols_are_regex) { + target.GetFrameRecognizerManager().AddRecognizer( + std::make_shared(), + location.module_spec.GetFilename(), location.symbols, + /*first_instruction_only*/ false); + return; + } + std::string module_re = "^"; + for (char c : location.module_spec.GetFilename().GetStringRef()) { + if (c == '.') + module_re += '\\'; + module_re += c; + } + module_re += '$'; + std::string symbol_re = "^("; + for (auto it = location.symbols.cbegin(); it != location.symbols.cend(); + ++it) { + if (it != location.symbols.cbegin()) + symbol_re += '|'; + symbol_re += it->GetStringRef(); + } + // Strip the trailing @VER symbol version. + symbol_re += ")(@.*)?$"; target.GetFrameRecognizerManager().AddRecognizer( - StackFrameRecognizerSP(new AssertFrameRecognizer()), - location.module_spec.GetFilename(), location.symbols, + std::make_shared(), + std::make_shared(std::move(module_re)), + std::make_shared(std::move(symbol_re)), /*first_instruction_only*/ false); } @@ -112,7 +142,7 @@ AssertFrameRecognizer::RecognizeFrame(lldb::StackFrameSP frame_sp) { if (!GetAssertLocation(os, location)) return RecognizedStackFrameSP(); - const uint32_t frames_to_fetch = 5; + const uint32_t frames_to_fetch = 6; const uint32_t last_frame_index = frames_to_fetch - 1; StackFrameSP prev_frame_sp = nullptr; From 8066f22c4663d9ee6c763d9108c89448e5c19848 Mon Sep 17 00:00:00 2001 From: Fabian Schuiki Date: Mon, 28 Jun 2021 13:12:36 +0200 Subject: [PATCH 385/619] [MLIR] Add argument insertion helpers for FunctionLike Add helpers to facilitate adding arguments and results to operations that implement the `FunctionLike` trait. These operations already have a convenient argument and result *erasure* mechanism, but a corresopnding utility for insertion is missing. This introduces such a utility. --- mlir/include/mlir/IR/BuiltinTypes.td | 7 ++ mlir/include/mlir/IR/FunctionSupport.h | 71 ++++++++++++++++++ mlir/lib/IR/BuiltinTypes.cpp | 39 ++++++++++ mlir/lib/IR/FunctionSupport.cpp | 89 +++++++++++++++++++++++ mlir/test/IR/test-func-insert-arg.mlir | 49 +++++++++++++ mlir/test/IR/test-func-insert-result.mlir | 37 ++++++++++ mlir/test/lib/IR/TestFunc.cpp | 83 +++++++++++++++++++-- 7 files changed, 367 insertions(+), 8 deletions(-) create mode 100644 mlir/test/IR/test-func-insert-arg.mlir create mode 100644 mlir/test/IR/test-func-insert-result.mlir diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td index 4edf72667bd6e..edbd1ea2ae912 100644 --- a/mlir/include/mlir/IR/BuiltinTypes.td +++ b/mlir/include/mlir/IR/BuiltinTypes.td @@ -154,6 +154,13 @@ def Builtin_Function : Builtin_Type<"Function", [ unsigned getNumResults() const; Type getResult(unsigned i) const { return getResults()[i]; } + /// Returns a new function type with the specified arguments and results + /// inserted. + FunctionType getWithArgsAndResults(ArrayRef argIndices, + TypeRange argTypes, + ArrayRef resultIndices, + TypeRange resultTypes); + /// Returns a new function type without the specified arguments and results. FunctionType getWithoutArgsAndResults(ArrayRef argIndices, ArrayRef resultIndices); diff --git a/mlir/include/mlir/IR/FunctionSupport.h b/mlir/include/mlir/IR/FunctionSupport.h index f27c857a175a6..c7ee9429d583e 100644 --- a/mlir/include/mlir/IR/FunctionSupport.h +++ b/mlir/include/mlir/IR/FunctionSupport.h @@ -68,6 +68,19 @@ inline ArrayRef getResultAttrs(Operation *op, unsigned index) { return resultDict ? resultDict.getValue() : llvm::None; } +/// Insert the specified arguments and update the function type attribute. +void insertFunctionArguments(Operation *op, ArrayRef argIndices, + TypeRange argTypes, + ArrayRef argAttrs, + ArrayRef> argLocs, + unsigned originalNumArgs, Type newType); + +/// Insert the specified results and update the function type attribute. +void insertFunctionResults(Operation *op, ArrayRef resultIndices, + TypeRange resultTypes, + ArrayRef resultAttrs, + unsigned originalNumResults, Type newType); + /// Erase the specified arguments and update the function type attribute. void eraseFunctionArguments(Operation *op, ArrayRef argIndices, unsigned originalNumArgs, Type newType); @@ -208,6 +221,22 @@ class FunctionLike : public OpTrait::TraitBase { return function_like_impl::getFunctionType(this->getOperation()); } + /// Return the type of this function with the specified arguments and results + /// inserted. This is used to update the function's signature in the + /// `insertArguments` and `insertResults` methods. The arrays must be sorted + /// by increasing index. + /// + /// Note that the concrete class must define a method with the same name to + /// hide this one if the concrete class does not use FunctionType for the + /// function type under the hood. + FunctionType getTypeWithArgsAndResults(ArrayRef argIndices, + TypeRange argTypes, + ArrayRef resultIndices, + TypeRange resultTypes) { + return getType().getWithArgsAndResults(argIndices, argTypes, resultIndices, + resultTypes); + } + /// Return the type of this function without the specified arguments and /// results. This is used to update the function's signature in the /// `eraseArguments` and `eraseResults` methods. The arrays of indices are @@ -267,6 +296,48 @@ class FunctionLike : public OpTrait::TraitBase { return getBody().getArgumentTypes(); } + /// Insert a single argument of type `argType` with attributes `argAttrs` and + /// location `argLoc` at `argIndex`. + void insertArgument(unsigned argIndex, Type argType, DictionaryAttr argAttrs, + Optional argLoc = {}) { + insertArguments({argIndex}, {argType}, {argAttrs}, {argLoc}); + } + + /// Inserts arguments with the listed types, attributes, and locations at the + /// listed indices. `argIndices` must be sorted. Arguments are inserted in the + /// order they are listed, such that arguments with identical index will + /// appear in the same order that they were listed here. + void insertArguments(ArrayRef argIndices, TypeRange argTypes, + ArrayRef argAttrs, + ArrayRef> argLocs) { + unsigned originalNumArgs = getNumArguments(); + Type newType = getTypeWithArgsAndResults( + argIndices, argTypes, /*resultIndices=*/{}, /*resultTypes=*/{}); + function_like_impl::insertFunctionArguments( + this->getOperation(), argIndices, argTypes, argAttrs, argLocs, + originalNumArgs, newType); + } + + /// Insert a single result of type `resultType` at `resultIndex`. + void insertResult(unsigned resultIndex, Type resultType, + DictionaryAttr resultAttrs) { + insertResults({resultIndex}, {resultType}, {resultAttrs}); + } + + /// Inserts results with the listed types at the listed indices. + /// `resultIndices` must be sorted. Results are inserted in the order they are + /// listed, such that results with identical index will appear in the same + /// order that they were listed here. + void insertResults(ArrayRef resultIndices, TypeRange resultTypes, + ArrayRef resultAttrs) { + unsigned originalNumResults = getNumResults(); + Type newType = getTypeWithArgsAndResults(/*argIndices=*/{}, /*argTypes=*/{}, + resultIndices, resultTypes); + function_like_impl::insertFunctionResults( + this->getOperation(), resultIndices, resultTypes, resultAttrs, + originalNumResults, newType); + } + /// Erase a single argument at `argIndex`. void eraseArgument(unsigned argIndex) { eraseArguments({argIndex}); } diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp index d5fd1eadbb69f..f350596384a90 100644 --- a/mlir/lib/IR/BuiltinTypes.cpp +++ b/mlir/lib/IR/BuiltinTypes.cpp @@ -172,6 +172,45 @@ inline void iterateIndicesExcept(unsigned totalIndices, callback(i); } +/// Returns a new function type with the specified arguments and results +/// inserted. +FunctionType FunctionType::getWithArgsAndResults( + ArrayRef argIndices, TypeRange argTypes, + ArrayRef resultIndices, TypeRange resultTypes) { + assert(argIndices.size() == argTypes.size()); + assert(resultIndices.size() == resultTypes.size()); + + ArrayRef newInputTypes = getInputs(); + SmallVector newInputTypesBuffer; + if (!argIndices.empty()) { + const auto *fromIt = newInputTypes.begin(); + for (auto it : llvm::zip(argIndices, argTypes)) { + const auto *toIt = newInputTypes.begin() + std::get<0>(it); + newInputTypesBuffer.append(fromIt, toIt); + newInputTypesBuffer.push_back(std::get<1>(it)); + fromIt = toIt; + } + newInputTypesBuffer.append(fromIt, newInputTypes.end()); + newInputTypes = newInputTypesBuffer; + } + + ArrayRef newResultTypes = getResults(); + SmallVector newResultTypesBuffer; + if (!resultIndices.empty()) { + const auto *fromIt = newResultTypes.begin(); + for (auto it : llvm::zip(resultIndices, resultTypes)) { + const auto *toIt = newResultTypes.begin() + std::get<0>(it); + newResultTypesBuffer.append(fromIt, toIt); + newResultTypesBuffer.push_back(std::get<1>(it)); + fromIt = toIt; + } + newResultTypesBuffer.append(fromIt, newResultTypes.end()); + newResultTypes = newResultTypesBuffer; + } + + return FunctionType::get(getContext(), newInputTypes, newResultTypes); +} + /// Returns a new function type without the specified arguments and results. FunctionType FunctionType::getWithoutArgsAndResults(ArrayRef argIndices, diff --git a/mlir/lib/IR/FunctionSupport.cpp b/mlir/lib/IR/FunctionSupport.cpp index b8a0ebc3f4a53..4f6f76cfbcfb9 100644 --- a/mlir/lib/IR/FunctionSupport.cpp +++ b/mlir/lib/IR/FunctionSupport.cpp @@ -121,6 +121,95 @@ void mlir::function_like_impl::setAllResultAttrDicts( llvm::to_vector<8>(wrappedAttrs)); } +void mlir::function_like_impl::insertFunctionArguments( + Operation *op, ArrayRef argIndices, TypeRange argTypes, + ArrayRef argAttrs, ArrayRef> argLocs, + unsigned originalNumArgs, Type newType) { + assert(argIndices.size() == argTypes.size()); + assert(argIndices.size() == argAttrs.size() || argAttrs.empty()); + assert(argIndices.size() == argLocs.size() || argLocs.empty()); + if (argIndices.empty()) + return; + + // There are 3 things that need to be updated: + // - Function type. + // - Arg attrs. + // - Block arguments of entry block. + Block &entry = op->getRegion(0).front(); + + // Update the argument attributes of the function. + auto oldArgAttrs = op->getAttrOfType(getArgDictAttrName()); + if (oldArgAttrs || !argAttrs.empty()) { + SmallVector newArgAttrs; + newArgAttrs.reserve(originalNumArgs + argIndices.size()); + unsigned oldIdx = 0; + auto migrate = [&](unsigned untilIdx) { + if (!oldArgAttrs) { + newArgAttrs.resize(newArgAttrs.size() + untilIdx - oldIdx); + } else { + auto oldArgAttrRange = oldArgAttrs.getAsRange(); + newArgAttrs.append(oldArgAttrRange.begin() + oldIdx, + oldArgAttrRange.begin() + untilIdx); + } + oldIdx = untilIdx; + }; + for (unsigned i = 0, e = argIndices.size(); i < e; ++i) { + migrate(argIndices[i]); + newArgAttrs.push_back(argAttrs.empty() ? DictionaryAttr{} : argAttrs[i]); + } + migrate(originalNumArgs); + setAllArgAttrDicts(op, newArgAttrs); + } + + // Update the function type and any entry block arguments. + op->setAttr(getTypeAttrName(), TypeAttr::get(newType)); + for (unsigned i = 0, e = argIndices.size(); i < e; ++i) + entry.insertArgument(argIndices[i], argTypes[i], + argLocs.empty() ? Optional{} : argLocs[i]); +} + +void mlir::function_like_impl::insertFunctionResults( + Operation *op, ArrayRef resultIndices, TypeRange resultTypes, + ArrayRef resultAttrs, unsigned originalNumResults, + Type newType) { + assert(resultIndices.size() == resultTypes.size()); + assert(resultIndices.size() == resultAttrs.size() || resultAttrs.empty()); + if (resultIndices.empty()) + return; + + // There are 2 things that need to be updated: + // - Function type. + // - Result attrs. + + // Update the result attributes of the function. + auto oldResultAttrs = op->getAttrOfType(getResultDictAttrName()); + if (oldResultAttrs || !resultAttrs.empty()) { + SmallVector newResultAttrs; + newResultAttrs.reserve(originalNumResults + resultIndices.size()); + unsigned oldIdx = 0; + auto migrate = [&](unsigned untilIdx) { + if (!oldResultAttrs) { + newResultAttrs.resize(newResultAttrs.size() + untilIdx - oldIdx); + } else { + auto oldResultAttrsRange = oldResultAttrs.getAsRange(); + newResultAttrs.append(oldResultAttrsRange.begin() + oldIdx, + oldResultAttrsRange.begin() + untilIdx); + } + oldIdx = untilIdx; + }; + for (unsigned i = 0, e = resultIndices.size(); i < e; ++i) { + migrate(resultIndices[i]); + newResultAttrs.push_back(resultAttrs.empty() ? DictionaryAttr{} + : resultAttrs[i]); + } + migrate(originalNumResults); + setAllResultAttrDicts(op, newResultAttrs); + } + + // Update the function type. + op->setAttr(getTypeAttrName(), TypeAttr::get(newType)); +} + void mlir::function_like_impl::eraseFunctionArguments( Operation *op, ArrayRef argIndices, unsigned originalNumArgs, Type newType) { diff --git a/mlir/test/IR/test-func-insert-arg.mlir b/mlir/test/IR/test-func-insert-arg.mlir new file mode 100644 index 0000000000000..2de6c666d0d31 --- /dev/null +++ b/mlir/test/IR/test-func-insert-arg.mlir @@ -0,0 +1,49 @@ +// RUN: mlir-opt %s -test-func-insert-arg -split-input-file | FileCheck %s + +// CHECK: func @f(%arg0: f32 {test.A}) +func @f() attributes {test.insert_args = [ + [0, f32, {test.A}]]} { + return +} + +// ----- + +// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}) +func @f(%arg0: f32 {test.B}) attributes {test.insert_args = [ + [0, f32, {test.A}]]} { + return +} + +// ----- + +// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}) +func @f(%arg0: f32 {test.A}) attributes {test.insert_args = [ + [1, f32, {test.B}]]} { + return +} + +// ----- + +// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}, %arg2: f32 {test.C}) +func @f(%arg0: f32 {test.A}, %arg1: f32 {test.C}) attributes {test.insert_args = [ + [1, f32, {test.B}]]} { + return +} + +// ----- + +// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}, %arg2: f32 {test.C}) +func @f(%arg0: f32 {test.B}) attributes {test.insert_args = [ + [0, f32, {test.A}], + [1, f32, {test.C}]]} { + return +} + +// ----- + +// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}, %arg2: f32 {test.C}) +func @f(%arg0: f32 {test.C}) attributes {test.insert_args = [ + [0, f32, {test.A}], + [0, f32, {test.B}]]} { + return +} diff --git a/mlir/test/IR/test-func-insert-result.mlir b/mlir/test/IR/test-func-insert-result.mlir new file mode 100644 index 0000000000000..129fff4b56fe9 --- /dev/null +++ b/mlir/test/IR/test-func-insert-result.mlir @@ -0,0 +1,37 @@ +// RUN: mlir-opt %s -test-func-insert-result -split-input-file | FileCheck %s + +// CHECK: func private @f() -> (f32 {test.A}) +func private @f() attributes {test.insert_results = [ + [0, f32, {test.A}]]} + +// ----- + +// CHECK: func private @f() -> (f32 {test.A}, f32 {test.B}) +func private @f() -> (f32 {test.B}) attributes {test.insert_results = [ + [0, f32, {test.A}]]} + +// ----- + +// CHECK: func private @f() -> (f32 {test.A}, f32 {test.B}) +func private @f() -> (f32 {test.A}) attributes {test.insert_results = [ + [1, f32, {test.B}]]} + +// ----- + +// CHECK: func private @f() -> (f32 {test.A}, f32 {test.B}, f32 {test.C}) +func private @f() -> (f32 {test.A}, f32 {test.C}) attributes {test.insert_results = [ + [1, f32, {test.B}]]} + +// ----- + +// CHECK: func private @f() -> (f32 {test.A}, f32 {test.B}, f32 {test.C}) +func private @f() -> (f32 {test.B}) attributes {test.insert_results = [ + [0, f32, {test.A}], + [1, f32, {test.C}]]} + +// ----- + +// CHECK: func private @f() -> (f32 {test.A}, f32 {test.B}, f32 {test.C}) +func private @f() -> (f32 {test.C}) attributes {test.insert_results = [ + [0, f32, {test.A}], + [0, f32, {test.B}]]} diff --git a/mlir/test/lib/IR/TestFunc.cpp b/mlir/test/lib/IR/TestFunc.cpp index 4f2b45628ed99..a2d0c796c66b5 100644 --- a/mlir/test/lib/IR/TestFunc.cpp +++ b/mlir/test/lib/IR/TestFunc.cpp @@ -12,6 +12,72 @@ using namespace mlir; namespace { +/// This is a test pass for verifying FuncOp's insertArgument method. +struct TestFuncInsertArg + : public PassWrapper> { + StringRef getArgument() const final { return "test-func-insert-arg"; } + StringRef getDescription() const final { return "Test inserting func args."; } + void runOnOperation() override { + auto module = getOperation(); + + for (FuncOp func : module.getOps()) { + auto inserts = func->getAttrOfType("test.insert_args"); + if (!inserts || inserts.empty()) + continue; + SmallVector indicesToInsert; + SmallVector typesToInsert; + SmallVector attrsToInsert; + SmallVector, 4> locsToInsert; + for (auto insert : inserts.getAsRange()) { + indicesToInsert.push_back( + insert[0].cast().getValue().getZExtValue()); + typesToInsert.push_back(insert[1].cast().getValue()); + attrsToInsert.push_back(insert.size() > 2 + ? insert[2].cast() + : DictionaryAttr::get(&getContext())); + locsToInsert.push_back( + insert.size() > 3 + ? Optional(insert[3].cast()) + : Optional{}); + } + func->removeAttr("test.insert_args"); + func.insertArguments(indicesToInsert, typesToInsert, attrsToInsert, + locsToInsert); + } + } +}; + +/// This is a test pass for verifying FuncOp's insertResult method. +struct TestFuncInsertResult + : public PassWrapper> { + StringRef getArgument() const final { return "test-func-insert-result"; } + StringRef getDescription() const final { + return "Test inserting func results."; + } + void runOnOperation() override { + auto module = getOperation(); + + for (FuncOp func : module.getOps()) { + auto inserts = func->getAttrOfType("test.insert_results"); + if (!inserts || inserts.empty()) + continue; + SmallVector indicesToInsert; + SmallVector typesToInsert; + SmallVector attrsToInsert; + for (auto insert : inserts.getAsRange()) { + indicesToInsert.push_back( + insert[0].cast().getValue().getZExtValue()); + typesToInsert.push_back(insert[1].cast().getValue()); + attrsToInsert.push_back(insert.size() > 2 + ? insert[2].cast() + : DictionaryAttr::get(&getContext())); + } + func->removeAttr("test.insert_results"); + func.insertResults(indicesToInsert, typesToInsert, attrsToInsert); + } + } +}; + /// This is a test pass for verifying FuncOp's eraseArgument method. struct TestFuncEraseArg : public PassWrapper> { @@ -51,18 +117,15 @@ struct TestFuncEraseResult for (FuncOp func : module.getOps()) { SmallVector indicesToErase; for (auto resultIndex : llvm::seq(0, func.getNumResults())) { - if (func.getResultAttr(resultIndex, "test.erase_this_" - "result")) { - // Push back twice to test - // that duplicate indices - // are handled correctly. + if (func.getResultAttr(resultIndex, "test.erase_this_result")) { + // Push back twice to test that duplicate indices are handled + // correctly. indicesToErase.push_back(resultIndex); indicesToErase.push_back(resultIndex); } } - // Reverse the order to test - // that unsorted index lists are - // handled correctly. + // Reverse the order to test that unsorted index lists are handled + // correctly. std::reverse(indicesToErase.begin(), indicesToErase.end()); func.eraseResults(indicesToErase); } @@ -90,6 +153,10 @@ struct TestFuncSetType namespace mlir { void registerTestFunc() { + PassRegistration(); + + PassRegistration(); + PassRegistration(); PassRegistration(); From 0e2d4bd4bfa516977d027ece8597fc8dd1f3c656 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 1 Jul 2021 09:33:22 +0200 Subject: [PATCH 386/619] [clangd] Fix gRPC build due to missing include path --- clang-tools-extra/clangd/index/remote/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/clangd/index/remote/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/CMakeLists.txt index beae5be405e08..51db6a7a141ee 100644 --- a/clang-tools-extra/clangd/index/remote/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/CMakeLists.txt @@ -14,6 +14,7 @@ if (CLANGD_ENABLE_REMOTE) ) include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../) + include_directories(${CMAKE_CURRENT_BINARY_DIR}/../../) # FIXME(kirillbobyrev): target_compile_definitions is not working with # add_clang_library for some reason. Is there any way to make this From 226f925c3f3461cbd3e76f383069b01b5f7db2f3 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Thu, 1 Jul 2021 06:41:52 +0000 Subject: [PATCH 387/619] [mlir][linalg][python] Update integration of OpDSL doc (NFC). Move the OpDSL doc to a linalg sub folder and updated the integration in the main linalg documentation. Differential Revision: https://reviews.llvm.org/D105188 --- .../Linalg/OpDSL.md} | 10 ++--- .../Dialects/{Linalg.md => Linalg/_index.md} | 23 +++++++----- mlir/docs/Rationale/RationaleLinalgDialect.md | 37 +++++++++---------- 3 files changed, 36 insertions(+), 34 deletions(-) rename mlir/docs/{Tools/LinalgOpDsl.md => Dialects/Linalg/OpDSL.md} (96%) rename mlir/docs/Dialects/{Linalg.md => Linalg/_index.md} (97%) diff --git a/mlir/docs/Tools/LinalgOpDsl.md b/mlir/docs/Dialects/Linalg/OpDSL.md similarity index 96% rename from mlir/docs/Tools/LinalgOpDsl.md rename to mlir/docs/Dialects/Linalg/OpDSL.md index f7be38ec6f656..bdfd58f88fb66 100644 --- a/mlir/docs/Tools/LinalgOpDsl.md +++ b/mlir/docs/Dialects/Linalg/OpDSL.md @@ -1,12 +1,12 @@ -# linalg_opdsl tool +# Linalg OpDSL Python based DSL for authoring Linalg op definitions and generating `linalg.generic` IR based on them for samples. -The tool `linalg_opdsl` provides a high level DSL for constructing structured op -definitions in a way that can be exported to built-in, named structured ops via -the above YAML-based definitions or used interactively to emit corresponding -`linalg.generic` IR for the composition. +The Linalg OpDSL is a high level DSL for constructing structured op definitions +in a way that can be exported to built-in, named structured ops via +[YAML-based definitions](_index.md/#yaml-gen) or used interactively to emit +corresponding `linalg.generic` IR for the composition. ## Basic usage diff --git a/mlir/docs/Dialects/Linalg.md b/mlir/docs/Dialects/Linalg/_index.md similarity index 97% rename from mlir/docs/Dialects/Linalg.md rename to mlir/docs/Dialects/Linalg/_index.md index 65fe07567d5ad..01415dd2c53a5 100644 --- a/mlir/docs/Dialects/Linalg.md +++ b/mlir/docs/Dialects/Linalg/_index.md @@ -10,7 +10,7 @@ Linalg is designed to solve the High-level Hierarchical Optimization (HHO box) in MLIR and to interoperate nicely within a *Mixture Of Expert Compilers* environment (i.e. the *CGSel* box). -The [Rationale Document](../Rationale/RationaleLinalgDialect.md) goes into +The [Rationale Document](../../Rationale/RationaleLinalgDialect.md) goes into significantly more design and architectural decision details. ## Set of Key Transformations @@ -37,9 +37,9 @@ Linalg IR and that have influenced its design: ## High-Level Description of Linalg Ops Linalg takes at least some inspiration from all previously -[listed prior art](../Rationale/RationaleLinalgDialect.md/#prior-art). The design enables the definition of -***CustomOps*** with generic properties that enable -[key transformations](#key_transformations), including lowering to scalar +[listed prior art](../../Rationale/RationaleLinalgDialect.md/#prior-art). The +design enables the definition of ***CustomOps*** with generic properties that +enable [key transformations](#key_transformations), including lowering to scalar load/store and other operations or to external library calls and intrinsics. These ops can have ***either tensor or buffer*** as both input and output @@ -70,7 +70,7 @@ abstraction on tensors and buffers. This `linalg.generic` operation can express custom operations that optionally have *indexing semantics* (by accessing the iteration indices using the `linalg.index` operation). The properties of `linalg.generic` are the result of applying the guiding principles described in -the [Rationale Document](../Rationale/RationaleLinalgDialect.md). They are +the [Rationale Document](../../Rationale/RationaleLinalgDialect.md). They are listed next, with a brief example and discussion for each. #### Property 1: Input and Output Operands Define The Iteration Space @@ -492,7 +492,7 @@ As it stands, the six properties above define the semantics of a `linalg.generic` op. It is an open question whether all of these semantics are strictly necessary in practice and whether some should or could be derived automatically while still maintaining the -[core guiding principles](../Rationale/RationaleLinalgDialect.md/#core-guiding-principlesa-nameguiding_principlesa). +[core guiding principles](../../Rationale/RationaleLinalgDialect.md/#core-guiding-principlesa-nameguiding_principlesa). For the time being, we have settled on the combination of these properties because of empirical evidence building and working on multiple high-level @@ -662,13 +662,16 @@ void batchmatmul::regionBuilder(ArrayRef args) { } ``` -### YAML Based Named Structured Ops +### YAML Based Named Structured Ops Linalg provides a declarative generation tool (`mlir-linalg-ods-yaml-gen`) to automatically produce named ops from a YAML-based op description format intended -to capture the structure of the named ops and be generated from a higher level -"mathy" DSL syntax. This facility is currently in flight and is intended to -subsume the above when ready. See the C++ class to YAML mapping traits in +to capture the structure of the named ops. The YAML-based op descriptions are +generated from a higher level [DSL](OpDSL.md) and are not meant to be edited +directly. + +This facility is currently in flight and is intended to subsume the above when +ready. See the C++ class to YAML mapping traits in `mlir-mlinalg-ods-yaml-gen.cpp` as the source of truth for the schema. Most of the above documentation roughly applies to this path and will be ported diff --git a/mlir/docs/Rationale/RationaleLinalgDialect.md b/mlir/docs/Rationale/RationaleLinalgDialect.md index 102e5f52efe62..c8dca244a4cb4 100644 --- a/mlir/docs/Rationale/RationaleLinalgDialect.md +++ b/mlir/docs/Rationale/RationaleLinalgDialect.md @@ -102,9 +102,9 @@ to the *structured control flow* dialect (named `LoopOps`). More components can be extracted, redesigned and generalized when new uses or requirements arise. -Several [design questions](../Dialects/Linalg.md/#open_issues) remain open in Linalg, which does not -claim to be a general solution to all compilation problems. -It does aim at driving thinking and implementations of domain-specific +Several [design questions](../Dialects/Linalg/_index.md/#open_issues) remain +open in Linalg, which does not claim to be a general solution to all compilation +problems. It does aim at driving thinking and implementations of domain-specific abstractions where programmer's intent can be captured at a very high level, directly in the IR. @@ -566,18 +566,18 @@ reconcile [core guiding principles](#guiding_principles) with real-world requirements when producing an implementation based on MLIR. ### Algorithms + Data Structures = Programs + This is a twist on Niklaus Wirth's formulation but captures the essence of the design of Linalg: control-flow does not exist in a vacuum, independently of -data. -On the contrary, there is a very strong relationship between control-flow and -data structures: one cannot exist without the other. This has multiple -implications on the [semantics of Linalg Ops](../Dialects/Linalg.md/#linalg_op) and their -transformations. In particular, this observation influences whether -certain transformations are better done: -- as control flow or data structure manipulation, -- on Linalg ops attributes or on loops after some partial lowering -occurred, -- as extensions to the Linalg dialect in terms of new ops or attributes. +data. On the contrary, there is a very strong relationship between control-flow +and data structures: one cannot exist without the other. This has multiple +implications on the +[semantics of Linalg Ops](../Dialects/Linalg/_index.md/#linalg_ops) and their +transformations. In particular, this observation influences whether certain +transformations are better done: - as control flow or data structure +manipulation, - on Linalg ops attributes or on loops after some partial lowering +occurred, - as extensions to the Linalg dialect in terms of new ops or +attributes. ### The Dialect Need not be Closed Under Transformations This is probably the most surprising and counter-intuitive @@ -615,9 +615,8 @@ Principles](#guiding_principles)---with the following picture. MLIR Codegen Flow -This figure is not meant to be perfectly accurate but a rough map of -how we view the distribution of structural information in existing -systems, from a codegen-friendly angle. Unsurprisingly, the -[Linalg Dialect](../Dialects/Linalg.md) and its -future evolutions aspire to a position in the top-right of this map. - +This figure is not meant to be perfectly accurate but a rough map of how we view +the distribution of structural information in existing systems, from a +codegen-friendly angle. Unsurprisingly, the +[Linalg Dialect](../Dialects/Linalg/_index.md) and its future evolutions aspire +to a position in the top-right of this map. From 788a5d4afe6407e647454a9832a7b4a27fba06bf Mon Sep 17 00:00:00 2001 From: Stuart Ellis Date: Wed, 23 Jun 2021 18:05:29 +0100 Subject: [PATCH 388/619] PoC for Flang Driver Plugins --- clang/include/clang/Driver/Options.td | 10 +-- flang/CMakeLists.txt | 1 - flang/examples/CMakeLists.txt | 2 + flang/examples/HelloWorld/CMakeLists.txt | 12 ++++ .../examples/HelloWorld/HelloWorldPlugin.cpp | 18 +++++ .../include/flang/Frontend/FrontendActions.h | 4 ++ .../include/flang/Frontend/FrontendOptions.h | 11 ++- .../flang/Frontend/FrontendPluginRegistry.h | 26 +++++++ flang/lib/Frontend/CompilerInvocation.cpp | 17 +++++ flang/lib/Frontend/FrontendAction.cpp | 4 ++ flang/lib/Frontend/FrontendActions.cpp | 4 ++ .../ExecuteCompilerInvocation.cpp | 67 +++++++++++++++++++ llvm/include/llvm/Support/Registry.h | 10 +++ llvm/lib/Support/DynamicLibrary.cpp | 6 ++ 14 files changed, 186 insertions(+), 6 deletions(-) create mode 100644 flang/examples/HelloWorld/CMakeLists.txt create mode 100644 flang/examples/HelloWorld/HelloWorldPlugin.cpp create mode 100644 flang/include/flang/Frontend/FrontendPluginRegistry.h diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 41b7299b02745..1629a74ae62c9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5221,10 +5221,6 @@ def enable_noundef_analysis : Flag<["-"], "enable-noundef-analysis">, Group, HelpText<"Discard value names in LLVM IR">, MarshallingInfoFlag>; -def load : Separate<["-"], "load">, MetaVarName<"">, - HelpText<"Load the named plugin (dynamic shared object)">; -def plugin : Separate<["-"], "plugin">, MetaVarName<"">, - HelpText<"Use the named plugin action instead of the default action (use \"help\" to list available options)">; def plugin_arg : JoinedAndSeparate<["-"], "plugin-arg-">, MetaVarName<" ">, HelpText<"Pass to plugin ">; @@ -5788,6 +5784,12 @@ def init_only : Flag<["-"], "init-only">, HelpText<"Only execute frontend initialization">; } // let Group = Action_Group + +def load : Separate<["-"], "load">, MetaVarName<"">, + HelpText<"Load the named plugin (dynamic shared object)">; +def plugin : Separate<["-"], "plugin">, MetaVarName<"">, + HelpText<"Use the named plugin action instead of the default action (use \"help\" to list available options)">; + } // let Flags = [CC1Option, FC1Option, NoDriverOption] //===----------------------------------------------------------------------===// diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index adf3dc819dc93..d9495c4748ce5 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -390,7 +390,6 @@ endif() include(CMakeParseArguments) include(AddFlang) - add_subdirectory(include) add_subdirectory(lib) add_subdirectory(cmake/modules) diff --git a/flang/examples/CMakeLists.txt b/flang/examples/CMakeLists.txt index 3ca9feddf33e9..f896874f7f89b 100644 --- a/flang/examples/CMakeLists.txt +++ b/flang/examples/CMakeLists.txt @@ -6,3 +6,5 @@ add_executable(external-hello-world target_link_libraries(external-hello-world FortranRuntime ) +#add_subdirectory(HelloEarth) +add_subdirectory(HelloWorld) diff --git a/flang/examples/HelloWorld/CMakeLists.txt b/flang/examples/HelloWorld/CMakeLists.txt new file mode 100644 index 0000000000000..eb716c2f17b49 --- /dev/null +++ b/flang/examples/HelloWorld/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library( + flangHelloWorldPlugin + MODULE + HelloWorldPlugin.cpp + + DEPENDS + clangBasic + + LINK_COMPONENTS + Option + Support +) diff --git a/flang/examples/HelloWorld/HelloWorldPlugin.cpp b/flang/examples/HelloWorld/HelloWorldPlugin.cpp new file mode 100644 index 0000000000000..30d23ce104227 --- /dev/null +++ b/flang/examples/HelloWorld/HelloWorldPlugin.cpp @@ -0,0 +1,18 @@ +#include "flang/Frontend/FrontendActions.h" +#include "flang/Frontend/FrontendPluginRegistry.h" + +__attribute__((constructor)) +static void printing() { + llvm::outs() << " > Plugin Constructed\n"; +} + +using namespace Fortran::frontend; + +class HelloWorldFlangPlugin : public PluginParseTreeAction +{ + void ExecuteAction() override { + llvm::outs() << "Hello World from your new plugin (Remote plugin)\n"; + } +}; + +static FrontendPluginRegistry::Add X("-hello-w", "Hello World Plugin example"); diff --git a/flang/include/flang/Frontend/FrontendActions.h b/flang/include/flang/Frontend/FrontendActions.h index 72eb44223fe49..d30ae1dbed0ff 100644 --- a/flang/include/flang/Frontend/FrontendActions.h +++ b/flang/include/flang/Frontend/FrontendActions.h @@ -30,6 +30,10 @@ struct MeasurementVisitor { // Custom Consumer Actions //===----------------------------------------------------------------------===// +class PluginParseTreeAction : public FrontendAction { + void ExecuteAction() override; +}; + class InputOutputTestAction : public FrontendAction { void ExecuteAction() override; }; diff --git a/flang/include/flang/Frontend/FrontendOptions.h b/flang/include/flang/Frontend/FrontendOptions.h index 42ce499566e9f..5867b790f6fce 100644 --- a/flang/include/flang/Frontend/FrontendOptions.h +++ b/flang/include/flang/Frontend/FrontendOptions.h @@ -77,7 +77,10 @@ enum ActionKind { GetSymbolsSources, /// Only execute frontend initialization - InitOnly + InitOnly, + + /// Run a plugin action, \see ActionName. + PluginAction /// TODO: RunPreprocessor, EmitLLVM, EmitLLVMOnly, /// EmitCodeGenOnly, EmitAssembly, (...) @@ -249,6 +252,12 @@ class FrontendOptions { // Source file encoding Fortran::parser::Encoding encoding_{Fortran::parser::Encoding::UTF_8}; + /// The list of plugins to load. + std::vector plugins; + + /// The name of the action to run when using a plugin action. + std::string ActionName; + public: FrontendOptions() : showHelp_(false), showVersion_(false), instrumentedParse_(false), diff --git a/flang/include/flang/Frontend/FrontendPluginRegistry.h b/flang/include/flang/Frontend/FrontendPluginRegistry.h new file mode 100644 index 0000000000000..d775393afe7f6 --- /dev/null +++ b/flang/include/flang/Frontend/FrontendPluginRegistry.h @@ -0,0 +1,26 @@ +//===- FrontendPluginRegistry.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Pluggable Frontend Action Interface +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FLANG_FRONTEND_FRONTENDPLUGINREGISTRY_H +#define LLVM_FLANG_FRONTEND_FRONTENDPLUGINREGISTRY_H + +#include "flang/Frontend/FrontendAction.h" +#include "llvm/Support/Registry.h" + +namespace Fortran::frontend { + +/// The frontend plugin registry. +using FrontendPluginRegistry = llvm::Registry; + +} // namespace flang + +#endif // LLVM_FLANG_FRONTEND_FRONTENDPLUGINREGISTRY_H \ No newline at end of file diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 6a2bf1947e350..20d3d2d253a75 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -199,6 +199,23 @@ static bool ParseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, } } + printf("--------- (ParseFrontendArgs) ----------\n"); + + if (llvm::opt::Arg *a = args.getLastArg(clang::driver::options::OPT_load)) { + llvm::outs() << " " << a->getOption().getName() << " >>> " << a->getValue() << "\n"; + //opts.plugins = a->getValue(); + opts.plugins.push_back(a->getValue()); + } + + if (const llvm::opt::Arg *a = args.getLastArg(clang::driver::options::OPT_plugin)) { + llvm::outs() << " " << a->getOption().getName() << " >>> " << a->getValue() << "\n"; + //opts.plugins.emplace_back(a->getValue()); + opts.programAction_ = PluginAction; + opts.ActionName = a->getValue(); + } + + printf("-------- (\\ParseFrontendArgs) ----------\n"); + opts.outputFile_ = args.getLastArgValue(clang::driver::options::OPT_o); opts.showHelp_ = args.hasArg(clang::driver::options::OPT_help); opts.showVersion_ = args.hasArg(clang::driver::options::OPT_version); diff --git a/flang/lib/Frontend/FrontendAction.cpp b/flang/lib/Frontend/FrontendAction.cpp index 23e4ca3f33063..62d4bac0df6fa 100644 --- a/flang/lib/Frontend/FrontendAction.cpp +++ b/flang/lib/Frontend/FrontendAction.cpp @@ -10,6 +10,7 @@ #include "flang/Frontend/CompilerInstance.h" #include "flang/Frontend/FrontendActions.h" #include "flang/Frontend/FrontendOptions.h" +#include "flang/Frontend/FrontendPluginRegistry.h" #include "flang/FrontendTool/Utils.h" #include "clang/Basic/DiagnosticFrontend.h" #include "llvm/Support/Errc.h" @@ -17,6 +18,9 @@ using namespace Fortran::frontend; +LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry) + + void FrontendAction::set_currentInput(const FrontendInputFile ¤tInput) { this->currentInput_ = currentInput; } diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 8ee42d73c6e46..d067ebd141b85 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -488,3 +488,7 @@ void InitOnlyAction::ExecuteAction() { "Use `-init-only` for testing purposes only"); ci.diagnostics().Report(DiagID); } + +void PluginParseTreeAction::ExecuteAction() { + +} diff --git a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 243e25163dc05..8ef0c93404c10 100644 --- a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -13,11 +13,14 @@ #include "flang/Frontend/CompilerInstance.h" #include "flang/Frontend/FrontendActions.h" +#include "flang/Frontend/FrontendAction.h" +#include "flang/Frontend/FrontendPluginRegistry.h" #include "clang/Driver/Options.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/DynamicLibrary.h" namespace Fortran::frontend { @@ -79,6 +82,22 @@ static std::unique_ptr CreateFrontendBaseAction( case InitOnly: return std::make_unique(); break; + case PluginAction: { + llvm::outs() << "---------- (case: PluginAction) --------\n"; + llvm::outs() << " Plugin Action: " << ci.frontendOpts().ActionName << "\n"; + for (const FrontendPluginRegistry::entry &plugin : FrontendPluginRegistry::entries()) { + llvm::outs() << " " << plugin.getName() << "\t-- " << plugin.getDesc() << "\n"; + if (plugin.getName() == ci.frontendOpts().ActionName) { + llvm::outs() << "We have found the plugin name!! :-)\n"; + std::unique_ptr P(plugin.instantiate()); + return std::move(P); + } + } + + unsigned diagID = ci.diagnostics().getCustomDiagID(clang::DiagnosticsEngine::Error, "unable to find plugin '%0'"); + ci.diagnostics().Report(diagID) << ci.frontendOpts().ActionName; + return nullptr; + } default: break; // TODO: @@ -92,6 +111,26 @@ static std::unique_ptr CreateFrontendBaseAction( return 0; } +/// <<< TEMP Plugin Example + +class HelloWorldFlangPlugin : public PluginParseTreeAction +{ + protected: + void ExecuteAction() override { + llvm::outs() << "Hello World from your new plugin (Hello World)\n"; + } +}; + +class HelloTwoFlangPlugin : public PluginParseTreeAction +{ + protected: + void ExecuteAction() override { + llvm::outs() << "Hello World from your new plugin (Hello Two)\n"; + } +}; + +/// <<<<< TEMP Plugin Example + std::unique_ptr CreateFrontendAction(CompilerInstance &ci) { // Create the underlying action. std::unique_ptr act = CreateFrontendBaseAction(ci); @@ -100,6 +139,7 @@ std::unique_ptr CreateFrontendAction(CompilerInstance &ci) { return act; } + bool ExecuteCompilerInvocation(CompilerInstance *flang) { // Honor -help. if (flang->frontendOpts().showHelp_) { @@ -117,6 +157,33 @@ bool ExecuteCompilerInvocation(CompilerInstance *flang) { return true; } + llvm::outs() << "------ (ExecuteCompilerInvocation) -----\n"; + + // Load any requested plugins. + for (const std::string &Path : flang->frontendOpts().plugins) { + llvm::outs() << " Load :: Path >> " << Path << "\n"; + std::string Error; + if (llvm::sys::DynamicLibrary::LoadLibraryPermanently(Path.c_str(), &Error)) { + unsigned diagID = flang->diagnostics().getCustomDiagID(clang::DiagnosticsEngine::Error, "unable to load plugin '%0': '%1'"); + flang->diagnostics().Report(diagID) << Path << Error; + } + } + + llvm::outs() << " Plugin Registry List >>\n"; + for (const FrontendPluginRegistry::entry &plugin : FrontendPluginRegistry::entries()) { + llvm::outs() << plugin.getName() << " -- " << plugin.getDesc() << "\n"; + } + llvm::outs() << " << Plugin Registry List\n"; + + static FrontendPluginRegistry::Add X("-hello-wor", "simple Plugin example"); + static FrontendPluginRegistry::Add Y("hellotwo", "another print plugin example"); + + llvm::outs() << "----- (\\ExecuteCompilerInvocation) -----\n"; + + // If there were errors in processing arguments, don't do anything else. + if (flang->diagnostics().hasErrorOccurred()) + return false; + // Create and execute the frontend action. std::unique_ptr act(CreateFrontendAction(*flang)); if (!act) diff --git a/llvm/include/llvm/Support/Registry.h b/llvm/include/llvm/Support/Registry.h index 5bb6a254a47f4..58b1825a71c06 100644 --- a/llvm/include/llvm/Support/Registry.h +++ b/llvm/include/llvm/Support/Registry.h @@ -18,6 +18,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/CommandLine.h" #include namespace llvm { @@ -120,7 +121,10 @@ namespace llvm { public: Add(StringRef Name, StringRef Desc) : Entry(Name, Desc, CtorFn), Node(Entry) { + llvm::outs() << " -------------- (Registry) --------------\n"; + llvm::outs() << " \tAdd :: " << Name << "\n"; add_node(&Node); + llvm::outs() << " ------------- (\\Registry) --------------\n"; } }; }; @@ -145,8 +149,14 @@ namespace llvm { else \ Head = N; \ Tail = N; \ + llvm::outs() << " REGISTRY_CLASS :: list >> \n"; \ + for (const REGISTRY_CLASS::entry &plugin : REGISTRY_CLASS::entries()) { \ + llvm::outs() << " " << plugin.getName() << " \t-- " << plugin.getDesc() << "\n"; \ + } \ } \ template typename Registry::iterator Registry::begin() { \ + llvm::outs() << "iterator (head)\n"; \ + llvm::outs() << Head << "\n"; \ return iterator(Head); \ } \ template REGISTRY_CLASS::node *Registry::Head; \ diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp index 2bcdbdcdb9b0d..ff989da453652 100644 --- a/llvm/lib/Support/DynamicLibrary.cpp +++ b/llvm/lib/Support/DynamicLibrary.cpp @@ -149,12 +149,18 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName, // ManagedStatic can be added from static constructors in HandleSet::DLOpen. HandleSet& HS = *OpenedHandles; + printf(" -------- (getPermantentLibrary) --------\n"); + printf(" get Lib: %s \n", FileName); + void *Handle = HandleSet::DLOpen(FileName, Err); if (Handle != &Invalid) { + printf(" Handle != Invalid \n"); SmartScopedLock Lock(*SymbolsMutex); HS.AddLibrary(Handle, /*IsProcess*/ FileName == nullptr); } + printf(" ------- (\\getPermantentLibrary) --------\n"); + return DynamicLibrary(Handle); } From ea3698ded34419310a92e441e92be1c85444140d Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Mon, 14 Jun 2021 14:26:58 +0000 Subject: [PATCH 389/619] [flang][docs] Add documentation for the new Flang driver Differential Revision: https://reviews.llvm.org/D104229 --- flang/docs/FlangDriver.md | 247 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 flang/docs/FlangDriver.md diff --git a/flang/docs/FlangDriver.md b/flang/docs/FlangDriver.md new file mode 100644 index 0000000000000..514cf47d25682 --- /dev/null +++ b/flang/docs/FlangDriver.md @@ -0,0 +1,247 @@ + + +# Flang drivers + +```eval_rst +.. contents:: + :local: +``` + +There are two main drivers in Flang: +* the compiler driver, `flang-new` +* the frontend driver, `flang-new -fc1` + +The compiler driver will allow you to control all compilation phases (i.e. +preprocessing, frontend code-generation, middlend/backend code-optimisation and +lowering, linking). For frontend specific tasks, the compiler driver creates a +Fortran compilation job and delegates it to `flang-new -fc1`, the frontend driver. + +The frontend driver glues all of the frontend libraries together and provides +an easy-to-use and intuitive interface to the frontend. It accepts many +frontend-specific options not available in `flang-new` and as such it provides a +finer control over the frontend. Similarly to `-Xclang` in `clang`, you can use +`-Xflang` to forward the frontend specific flags from the compiler directly to +the frontend driver. + +## Compiler Driver + +The main entry point for Flang's compiler driver is implemented in +`flang/tools/flang-driver/driver.cpp`. Flang's compiler driver is implemented +in terms of Clang's driver library, `clangDriver`. This approach allows us to: +* benefit from Clang's support for various targets, platforms and operating systems +* leverage Clang's ability to drive various backends available in LLVM, as well + as linkers and assemblers. +One implication of this dependency on Clang is that all of Flang's compiler +options are defined alongside Clang's options in +`clang/include/clang/Driver/Options.td`. For options that are common for both +Flang and Clang, the corresponding definitions are shared. + +Internally, a `clangDriver` based compiler driver works by creating actions +that correspond to various compilation phases, e.g. `PreprocessJobClass`, +`CompileJobClass`, `BackendJobClass` or `LinkJobClass` from the +`clang::driver::Action::ActionClass` enum. There are also other, more +specialised actions, e.g. `MigrateJobClass` or `InputClass`, that do not map +directly to common compilation steps. The actions to run are determined from +the supplied compiler flags, e.g. + +* `-E` for `PreprocessJobClass`, +* `-c` for `CompileJobClass`. + +In most cases, the driver creates a chain of actions/jobs/phases where the +output from one action is the input for the subsequent one. You can use the +`-ccc-print-phases` flag to see the sequence of actions that the driver will +create for your compiler invocation: +```bash +flang-new -ccc-print-phases -E file.f ++- 0: input, "file.f", f95-cpp-input +1: preprocessor, {0}, f95 +``` +As you can see, for `-E` the driver creates only two jobs and stops immediately +after preprocessing. The first job simply prepares the input. For `-c`, the +pipeline of the created jobs is more complex: +```bash +flang-new -ccc-print-phases -c file.f + +- 0: input, "file.f", f95-cpp-input + +- 1: preprocessor, {0}, f95 + +- 2: compiler, {1}, ir ++- 3: backend, {2}, assembler +4: assembler, {3}, object +``` +Note that currently Flang does not support code-generation and `flang-new` will +fail during the second step above with the following error: +```bash +error: code-generation is not available yet +``` +The other phases are printed nonetheless when using `-ccc-print-phases`, as +that reflects what `clangDriver`, the library, will try to create and run. + +For actions specific to the frontend (e.g. preprocessing or code generation), a +command to call the frontend driver is generated (more specifically, an +instance of `clang::driver::Command`). Every command is bound to an instance of +`clang::driver::Tool`. For Flang we introduced a specialisation of this class: +`clang::driver::Flang`. This class implements the logic to either translate or +forward compiler options to the frontend driver, `flang-new -fc1`. + +You can read more on the design of `clangDriver` in Clang's [Driver Design & +Internals](https://clang.llvm.org/docs/DriverInternals.html). + +## Frontend Driver +Flang's frontend driver is the main interface between end-users and the Flang +frontend. The high-level design is similar to Clang's frontend driver, `clang +-cc1` and consists of the following classes: +* `CompilerInstance`, which is a helper class that encapsulates and manages + various objects that are always required by the frontend (e.g. `AllSources`, + `AllCookedSources, `Parsing`, `CompilerInvocation`, etc.). In most cases + `CompilerInstance` owns these objects, but it also can share them with its + clients when required. It also implements utility methods to construct and + manipulate them. +* `CompilerInvocation` encapsulates the configuration of the current + invocation of the compiler as derived from the command-line options and the + input files (in particular, file extensions). Among other things, it holds an + instance of `FrontendOptions`. Like `CompilerInstance`, it owns the objects + that it manages. It can share them with its clients that want to access them + even after the corresponding `CompilerInvocation` has been destructed. +* `FrontendOptions` holds options that control the behaviour of the frontend, + as well as e.g. the list of the input files. These options come either + directly from the users (through command-line flags) or are derived from + e.g. the host system configuration. +* `FrontendAction` and `FrontendActions` (the former being the base class for + the latter) implement the actual actions to perform by the frontend. Usually + there is one specialisation of `FrontendActions` for every compiler action flag + (e.g. `-E`, `-fdebug-unparse`). These classes also contain various hooks that + allow you to e.g. fine-tune the configuration of the frontend based on the + input. + +This list is not exhaustive and only covers the main classes that implement the +driver. The main entry point for the frontend driver, `fc1_main`, is +implemented in `flang/tools/flang-driver/driver.cpp`. It can be accessed by +invoking the compiler driver, `flang-new`, with the `-fc1` flag. + +The frontend driver will only run one action at a time. If you specify multiple +action flags, only the last one will be taken into account. The default action +is `ParseSyntaxOnlyAction`, which corresponds to `-fsyntax-only`. In other +words, `flang-new -fc1 ` is equivalent to `flang-new -fc1 -fsyntax-only +`. + +## Adding new Compiler Options +Adding a new compiler option in Flang consists of two steps: +* define the new option in a dedicated TableGen file, +* parse and implement the option in the relevant drivers that support it. + +### Option Definition +All of Flang's compiler and frontend driver options are defined in +`clang/include/clang/Driver/Options.td` in Clang. When adding a new option to +Flang, you will either: + * extend the existing definition for an option that is already available + in one of Clang's drivers (e.g. `clang`), but not yet available in Flang, or + * add a completely new definition if the option that you are adding has not + been defined yet. + +There are many predefined TableGen classes and records that you can use to fine +tune your new option. The list of available configurations can be overwhelming +at times. Sometimes the easiest approach is to find an existing option that has +similar semantics to your new option and start by copying that. + +For every new option, you will also have to define the visibility of the new +option. This is controlled through the `Flags` field. You can use the following +Flang specific option flags to control this: + * `FlangOption` - this option will be available in the `flang-new` compiler driver, + * `FC1Option` - this option will be available in the `flang-new -fc1` frontend driver, + * `FlangOnlyOption` - this option will not be visible in Clang drivers. + +Please make sure that options that you add are only visible in drivers that can +support it. For example, options that only make sense for Fortran input files +(e.g. `-ffree-form`) should not be visible in Clang and be marked as +`FlangOnlyOption`. + +When deciding what `OptionGroup` to use when defining a new option in the +`Options.td` file, many new options fall into one of the following two +categories: + * `Action_Group` - options that define an action to run (e.g. + `-fsyntax-only`, `-E`) + * `f_Group` - target independent compiler flags (e.g. `-ffixed-form`, + `-fopenmp`) +There are also other groups and occasionally you will use them instead of the +groups listed above. + +### Option Implementation +First, every option needs to be parsed. Flang compiler options are parsed in +two different places, depending on which driver they belong to: + +* frontend driver: `flang/lib/Frontend/CompilerInvocation.cpp`, +* compiler driver: `clang/lib/Driver/ToolChains/Flang.cpp`. + +The parsing will depend on the semantics encoded in the TableGen definition. + +When adding a compiler driver option (i.e. an option that contains +`FlangOption` among its `Flags`) that you also intend to be understood by the +frontend, make sure that it is either forwarded to `flang-new -fc1` or translated +into some other option that is accepted by the frontend driver. In the case of +options that contain both `FlangOption` and `FC1Option` among its flags, we +usually just forward from `flang-new` to `flang-new -fc1`. This is then tested in +`flang/test/Driver/frontend-forward.F90`. + +What follows is usually very dependant on the meaning of the corresponding +option. In general, regular compiler flags (e.g. `-ffree-form`) are mapped to +some state within the driver. A lot of this state is stored within an instance +of `FrontendOptions`, but there are other more specialised classes too. Action +flags (e.g. `-fsyntax-only`) are usually more complex overall, but also more +structured in terms of the implementation. + +### Action Options +For options that correspond to an action (i.e. marked as `Action_Group`), you +will have to define a dedicated instance of `FrontendActions` in +`flang/include/flang/Frontend/FrontendOptions.h`. For example, for +`-fsyntax-only` we defined: +```cpp +class ParseSyntaxOnlyAction : public PrescanAndSemaAction { + void ExecuteAction() override; +}; +``` +Command line options are mapped to frontend actions through the +`Fortran::frontend::ActionKind` enum. For every new action option that you +add, you will have to add a dedicated entry in that enum (e.g. +`ParseSyntaxOnly` for `-fsyntax-only`) and a corresponding `case` in +`ParseFrontendArgs` function in the `CompilerInvocation.cpp` file, e.g.: +```cpp + case clang::driver::options::OPT_fsyntax_only: + opts.programAction_ = ParseSyntaxOnly; + break; +``` +Note that this simply sets the program/frontend action within the frontend +driver. You still have make sure that the corresponding frontend action class +is instantiated when your new action option is used. The relevant `switch` +statement is implemented in `Fortran::frontend::CreatedFrontendBaseAction` in +the `ExecuteCompilerInvocation.cpp` file. Here's an example for +`-fsyntax-only`: +```cpp + case ParseSyntaxOnly: + return std::make_unique(); +``` +At this point you should be able to trigger that frontend action that you have +just added using your new frontend option. + +# Testing +In LIT, we define two variables that you can use to invoke Flang's drivers: +* `%flang` is expanded as `flang-new` (i.e. the compiler driver) +* `%flang_fc1` is expanded as `flang-new -fc1` (i.e. the frontend driver) + +For most regression tests for the frontend, you will want to use `%flang_fc1`. +In some cases, the observable behaviour will be identical regardless of whether +`%flang` or `%flang_fc1` is used. However, when you are using `%flang` instead +of `%flang_fc1`, the compiler driver will add extra flags to the frontend +driver invocation (i.e. `flang-new -fc1 -`). In some cases that might +be exactly what you want to test. In fact, you can check these additional +flags by using the `-###` compiler driver command line option. + +Lastly, you can use `! REQUIRES: ` for tests that will only work when +`` is available. For example, you can use`! REQUIRES: shell` to mark a +test as only available on Unix-like systems (i.e. systems that contain a Unix +shell). In practice this means that the corresponding test is skipped on +Windows. From 39f64c4c83754b4e436d7fffa31bd70f11d7a657 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sat, 19 Jun 2021 17:36:47 +1000 Subject: [PATCH 390/619] [ORC] Add wrapper-function support methods to ExecutorProcessControl. Adds support for both synchronous and asynchronous calls to wrapper functions using SPS (Simple Packed Serialization). Also adds support for wrapping functions on the JIT side in SPS-based wrappers that can be called from the executor. These new methods simplify calls between the JIT and Executor, and will be used in upcoming ORC runtime patches to enable communication between ORC and the runtime. --- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 12 ++ .../Orc/ExecutorProcessControl.h | 109 +++++++++- .../Orc/OrcRPCExecutorProcessControl.h | 12 +- .../Orc/Shared/WrapperFunctionUtils.h | 186 +++++++++++++++--- .../Orc/ExecutorProcessControl.cpp | 65 +++++- .../ExecutionEngine/Orc/CMakeLists.txt | 1 + .../Orc/ExecutorProcessControlTest.cpp | 105 ++++++++++ .../Orc/WrapperFunctionUtilsTest.cpp | 48 ++++- 8 files changed, 489 insertions(+), 49 deletions(-) create mode 100644 llvm/unittests/ExecutionEngine/Orc/ExecutorProcessControlTest.cpp diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index ae826912d629f..42bcffd36b25a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -216,6 +216,18 @@ class SymbolLookupSet { add(Name, Flags); } + /// Construct a SymbolLookupSet from DenseMap keys. + template + static SymbolLookupSet + fromMapKeys(const DenseMap &M, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + SymbolLookupSet Result; + Result.Symbols.reserve(M.size()); + for (const auto &KV : M) + Result.add(KV.first, Flags); + return Result; + } + /// Add an element to the set. The client is responsible for checking that /// duplicates are not added. SymbolLookupSet & diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h index 7969a8398c952..566637e104456 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h @@ -24,6 +24,7 @@ #include "llvm/Support/MSVCErrorWorkarounds.h" #include +#include #include namespace llvm { @@ -32,6 +33,19 @@ namespace orc { /// ExecutorProcessControl supports interaction with a JIT target process. class ExecutorProcessControl { public: + /// Sender to return the result of a WrapperFunction executed in the JIT. + using SendResultFunction = + unique_function; + + /// An asynchronous wrapper-function. + using AsyncWrapperFunction = unique_function; + + /// A map associating tag names with asynchronous wrapper function + /// implementations in the JIT. + using WrapperFunctionAssociationMap = + DenseMap; + /// APIs for manipulating memory in the target process. class MemoryAccess { public: @@ -138,14 +152,91 @@ class ExecutorProcessControl { virtual Expected runAsMain(JITTargetAddress MainFnAddr, ArrayRef Args) = 0; - /// Run a wrapper function in the executor. + /// Run a wrapper function in the executor (async version). + /// + /// The wrapper function should be callable as: + /// + /// \code{.cpp} + /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size); + /// \endcode{.cpp} + /// + /// The given OnComplete function will be called to return the result. + virtual void runWrapperAsync(SendResultFunction OnComplete, + JITTargetAddress WrapperFnAddr, + ArrayRef ArgBuffer) = 0; + + /// Run a wrapper function in the executor. The wrapper function should be + /// callable as: /// /// \code{.cpp} /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size); /// \endcode{.cpp} + shared::WrapperFunctionResult runWrapper(JITTargetAddress WrapperFnAddr, + ArrayRef ArgBuffer) { + std::promise RP; + auto RF = RP.get_future(); + runWrapperAsync( + [&](shared::WrapperFunctionResult R) { RP.set_value(std::move(R)); }, + WrapperFnAddr, ArgBuffer); + return RF.get(); + } + + /// Run a wrapper function using SPS to serialize the arguments and + /// deserialize the results. + template + void runSPSWrapperAsync(SendResultT &&SendResult, + JITTargetAddress WrapperFnAddr, + const ArgTs &...Args) { + shared::WrapperFunction::callAsync( + [this, WrapperFnAddr](SendResultFunction SendResult, + const char *ArgData, size_t ArgSize) { + runWrapperAsync(std::move(SendResult), WrapperFnAddr, + ArrayRef(ArgData, ArgSize)); + }, + std::move(SendResult), Args...); + } + + /// Run a wrapper function using SPS to serialize the arguments and + /// deserialize the results. + template + Error runSPSWrapper(JITTargetAddress WrapperFnAddr, RetT &RetVal, + const ArgTs &...Args) { + return shared::WrapperFunction::call( + [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) { + return runWrapper(WrapperFnAddr, ArrayRef(ArgData, ArgSize)); + }, + RetVal, Args...); + } + + /// Wrap a handler that takes concrete argument types (and a sender for a + /// concrete return type) to produce an AsyncWrapperFunction. Uses SPS to + /// unpack the arguments and pack the result. /// - virtual Expected - runWrapper(JITTargetAddress WrapperFnAddr, ArrayRef ArgBuffer) = 0; + /// This function is usually used when building association maps. + template + static AsyncWrapperFunction wrapAsyncWithSPS(HandlerT &&H) { + return [H = std::forward(H)](SendResultFunction SendResult, + const char *ArgData, + size_t ArgSize) mutable { + shared::WrapperFunction::handleAsync(ArgData, ArgSize, H, + std::move(SendResult)); + }; + } + + /// For each symbol name, associate the AsyncWrapperFunction implementation + /// value with the address of that symbol. + /// + /// Symbols will be looked up using LookupKind::Static, + /// JITDylibLookupFlags::MatchAllSymbols (hidden tags will be found), and + /// LookupFlags::WeaklyReferencedSymbol (missing tags will not cause an + /// error, the implementations will simply be dropped). + Error associateJITSideWrapperFunctions(JITDylib &JD, + WrapperFunctionAssociationMap WFs); + + /// Run a registered jit-side wrapper function. + void runJITSideWrapperFunction(SendResultFunction SendResult, + JITTargetAddress TagAddr, + ArrayRef ArgBuffer); /// Disconnect from the target process. /// @@ -161,6 +252,9 @@ class ExecutorProcessControl { unsigned PageSize = 0; MemoryAccess *MemAccess = nullptr; jitlink::JITLinkMemoryManager *MemMgr = nullptr; + + std::mutex TagToFuncMapMutex; + DenseMap> TagToFunc; }; /// Call a wrapper function via ExecutorProcessControl::runWrapper. @@ -168,8 +262,8 @@ class EPCCaller { public: EPCCaller(ExecutorProcessControl &EPC, JITTargetAddress WrapperFnAddr) : EPC(EPC), WrapperFnAddr(WrapperFnAddr) {} - Expected operator()(const char *ArgData, - size_t ArgSize) const { + shared::WrapperFunctionResult operator()(const char *ArgData, + size_t ArgSize) const { return EPC.runWrapper(WrapperFnAddr, ArrayRef(ArgData, ArgSize)); } @@ -202,8 +296,9 @@ class SelfExecutorProcessControl Expected runAsMain(JITTargetAddress MainFnAddr, ArrayRef Args) override; - Expected - runWrapper(JITTargetAddress WrapperFnAddr, ArrayRef ArgBuffer) override; + void runWrapperAsync(SendResultFunction OnComplete, + JITTargetAddress WrapperFnAddr, + ArrayRef ArgBuffer) override; Error disconnect() override; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h index 0b5ee262bb706..69e37f9af9e43 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h @@ -354,9 +354,9 @@ class OrcRPCExecutorProcessControlBase : public ExecutorProcessControl { return Result; } - Expected - runWrapper(JITTargetAddress WrapperFnAddr, - ArrayRef ArgBuffer) override { + void runWrapperAsync(SendResultFunction OnComplete, + JITTargetAddress WrapperFnAddr, + ArrayRef ArgBuffer) override { DEBUG_WITH_TYPE("orc", { dbgs() << "Running as wrapper function " << formatv("{0:x16}", WrapperFnAddr) << " with " @@ -366,7 +366,11 @@ class OrcRPCExecutorProcessControlBase : public ExecutorProcessControl { WrapperFnAddr, ArrayRef(reinterpret_cast(ArgBuffer.data()), ArgBuffer.size())); - return Result; + + if (!Result) + OnComplete(shared::WrapperFunctionResult::createOutOfBandError( + toString(Result.takeError()))); + OnComplete(std::move(*Result)); } Error closeConnection(OnCloseConnectionFunction OnCloseConnection) { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h index 0fc8af770233c..ceaea1d2b20f2 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h @@ -172,17 +172,16 @@ class WrapperFunctionResult { namespace detail { template -Expected +WrapperFunctionResult serializeViaSPSToWrapperFunctionResult(const ArgTs &...Args) { WrapperFunctionResult Result; char *DataPtr = WrapperFunctionResult::allocate(Result, SPSArgListT::size(Args...)); SPSOutputBuffer OB(DataPtr, Result.size()); if (!SPSArgListT::serialize(OB, Args...)) - return make_error( - "Error serializing arguments to blob in call", - inconvertibleErrorCode()); - return std::move(Result); + return WrapperFunctionResult::createOutOfBandError( + "Error serializing arguments to blob in call"); + return Result; } template class WrapperFunctionHandlerCaller { @@ -230,12 +229,8 @@ class WrapperFunctionHandlerHelper::call( std::forward(H), Args, ArgIndices{}); - if (auto Result = ResultSerializer::serialize( - std::move(HandlerResult))) - return std::move(*Result); - else - return WrapperFunctionResult::createOutOfBandError( - toString(Result.takeError())); + return ResultSerializer::serialize( + std::move(HandlerResult)); } private: @@ -247,10 +242,10 @@ class WrapperFunctionHandlerHelper class ResultSerializer, typename... SPSTagTs> -class WrapperFunctionHandlerHelper : public WrapperFunctionHandlerHelper {}; @@ -271,9 +266,87 @@ class WrapperFunctionHandlerHelper {}; +template class ResultSerializer, typename... SPSTagTs> +class WrapperFunctionAsyncHandlerHelper + : public WrapperFunctionAsyncHandlerHelper< + decltype(&std::remove_reference_t::operator()), + ResultSerializer, SPSTagTs...> {}; + +template class ResultSerializer, typename... SPSTagTs> +class WrapperFunctionAsyncHandlerHelper { +public: + using ArgTuple = std::tuple...>; + using ArgIndices = std::make_index_sequence::value>; + + template + static void applyAsync(HandlerT &&H, + SendWrapperFunctionResultT &&SendWrapperFunctionResult, + const char *ArgData, size_t ArgSize) { + ArgTuple Args; + if (!deserialize(ArgData, ArgSize, Args, ArgIndices{})) { + SendWrapperFunctionResult(WrapperFunctionResult::createOutOfBandError( + "Could not deserialize arguments for wrapper function call")); + return; + } + + auto SendResult = + [SendWFR = std::move(SendWrapperFunctionResult)](auto Result) mutable { + using ResultT = decltype(Result); + SendWFR(ResultSerializer::serialize(std::move(Result))); + }; + + callAsync(std::forward(H), std::move(SendResult), Args, + ArgIndices{}); + } + +private: + template + static bool deserialize(const char *ArgData, size_t ArgSize, ArgTuple &Args, + std::index_sequence) { + SPSInputBuffer IB(ArgData, ArgSize); + return SPSArgList::deserialize(IB, std::get(Args)...); + } + + template + static void callAsync(HandlerT &&H, + SerializeAndSendResultT &&SerializeAndSendResult, + ArgTupleT &Args, std::index_sequence) { + return std::forward(H)(std::move(SerializeAndSendResult), + std::get(Args)...); + } +}; + +// Map function pointers to function types. +template class ResultSerializer, typename... SPSTagTs> +class WrapperFunctionAsyncHandlerHelper + : public WrapperFunctionAsyncHandlerHelper {}; + +// Map non-const member function types to function types. +template class ResultSerializer, typename... SPSTagTs> +class WrapperFunctionAsyncHandlerHelper + : public WrapperFunctionAsyncHandlerHelper {}; + +// Map const member function types to function types. +template class ResultSerializer, typename... SPSTagTs> +class WrapperFunctionAsyncHandlerHelper + : public WrapperFunctionAsyncHandlerHelper {}; + template class ResultSerializer { public: - static Expected serialize(RetT Result) { + static WrapperFunctionResult serialize(RetT Result) { return serializeViaSPSToWrapperFunctionResult>( Result); } @@ -281,7 +354,7 @@ template class ResultSerializer { template class ResultSerializer { public: - static Expected serialize(Error Err) { + static WrapperFunctionResult serialize(Error Err) { return serializeViaSPSToWrapperFunctionResult>( toSPSSerializable(std::move(Err))); } @@ -290,7 +363,7 @@ template class ResultSerializer { template class ResultSerializer> { public: - static Expected serialize(Expected E) { + static WrapperFunctionResult serialize(Expected E) { return serializeViaSPSToWrapperFunctionResult>( toSPSSerializable(std::move(E))); } @@ -298,6 +371,7 @@ class ResultSerializer> { template class ResultDeserializer { public: + static RetT makeValue() { return RetT(); } static void makeSafe(RetT &Result) {} static Error deserialize(RetT &Result, const char *ArgData, size_t ArgSize) { @@ -312,6 +386,7 @@ template class ResultDeserializer { template <> class ResultDeserializer { public: + static Error makeValue() { return Error::success(); } static void makeSafe(Error &Err) { cantFail(std::move(Err)); } static Error deserialize(Error &Err, const char *ArgData, size_t ArgSize) { @@ -329,6 +404,7 @@ template <> class ResultDeserializer { template class ResultDeserializer, Expected> { public: + static Expected makeValue() { return T(); } static void makeSafe(Expected &E) { cantFail(E.takeError()); } static Error deserialize(Expected &E, const char *ArgData, @@ -344,6 +420,10 @@ class ResultDeserializer, Expected> { } }; +template class AsyncCallResultHelper { + // Did you forget to use Error / Expected in your handler? +}; + } // end namespace detail template class WrapperFunction; @@ -355,7 +435,7 @@ class WrapperFunction { using ResultSerializer = detail::ResultSerializer; public: - /// Call a wrapper function. Callere should be callable as + /// Call a wrapper function. Caller should be callable as /// WrapperFunctionResult Fn(const char *ArgData, size_t ArgSize); template static Error call(const CallerFn &Caller, RetT &Result, @@ -369,18 +449,56 @@ class WrapperFunction { auto ArgBuffer = detail::serializeViaSPSToWrapperFunctionResult>( Args...); - if (!ArgBuffer) - return ArgBuffer.takeError(); - - Expected ResultBuffer = - Caller(ArgBuffer->data(), ArgBuffer->size()); - if (!ResultBuffer) - return ResultBuffer.takeError(); - if (auto ErrMsg = ResultBuffer->getOutOfBandError()) + if (const char *ErrMsg = ArgBuffer.getOutOfBandError()) + return make_error(ErrMsg, inconvertibleErrorCode()); + + WrapperFunctionResult ResultBuffer = + Caller(ArgBuffer.data(), ArgBuffer.size()); + if (auto ErrMsg = ResultBuffer.getOutOfBandError()) return make_error(ErrMsg, inconvertibleErrorCode()); return detail::ResultDeserializer::deserialize( - Result, ResultBuffer->data(), ResultBuffer->size()); + Result, ResultBuffer.data(), ResultBuffer.size()); + } + + /// Call an async wrapper function. + /// Caller should be callable as + /// void Fn(unique_function SendResult, + /// WrapperFunctionResult ArgBuffer); + template + static void callAsync(AsyncCallerFn &&Caller, + SendDeserializedResultFn &&SendDeserializedResult, + const ArgTs &...Args) { + using RetT = typename std::tuple_element< + 1, typename detail::WrapperFunctionHandlerHelper< + std::remove_reference_t, + ResultSerializer, SPSRetTagT>::ArgTuple>::type; + + auto ArgBuffer = + detail::serializeViaSPSToWrapperFunctionResult>( + Args...); + if (auto *ErrMsg = ArgBuffer.getOutOfBandError()) { + SendDeserializedResult( + make_error(ErrMsg, inconvertibleErrorCode()), + detail::ResultDeserializer::makeValue()); + return; + } + + auto SendSerializedResult = [SDR = std::move(SendDeserializedResult)]( + WrapperFunctionResult R) { + RetT RetVal = detail::ResultDeserializer::makeValue(); + detail::ResultDeserializer::makeSafe(RetVal); + + SPSInputBuffer IB(R.data(), R.size()); + if (auto Err = detail::ResultDeserializer::deserialize( + RetVal, R.data(), R.size())) + SDR(std::move(Err), std::move(RetVal)); + + SDR(Error::success(), std::move(RetVal)); + }; + + Caller(std::move(SendSerializedResult), ArgBuffer.data(), ArgBuffer.size()); } /// Handle a call to a wrapper function. @@ -388,11 +506,21 @@ class WrapperFunction { static WrapperFunctionResult handle(const char *ArgData, size_t ArgSize, HandlerT &&Handler) { using WFHH = - detail::WrapperFunctionHandlerHelper; + detail::WrapperFunctionHandlerHelper, + ResultSerializer, SPSTagTs...>; return WFHH::apply(std::forward(Handler), ArgData, ArgSize); } + /// Handle a call to an async wrapper function. + template + static void handleAsync(const char *ArgData, size_t ArgSize, + HandlerT &&Handler, SendResultT &&SendResult) { + using WFAHH = detail::WrapperFunctionAsyncHandlerHelper< + std::remove_reference_t, ResultSerializer, SPSTagTs...>; + WFAHH::applyAsync(std::forward(Handler), + std::forward(SendResult), ArgData, ArgSize); + } + private: template static const T &makeSerializable(const T &Value) { return Value; @@ -411,6 +539,7 @@ class WrapperFunction { template class WrapperFunction : private WrapperFunction { + public: template static Error call(const CallerFn &Caller, const ArgTs &...Args) { @@ -419,6 +548,7 @@ class WrapperFunction } using WrapperFunction::handle; + using WrapperFunction::handleAsync; }; } // end namespace shared diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp index f8bd74eabc9b4..12fa42ccdef6b 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp @@ -10,11 +10,10 @@ #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Host.h" #include "llvm/Support/Process.h" -#include - namespace llvm { namespace orc { @@ -22,6 +21,56 @@ ExecutorProcessControl::MemoryAccess::~MemoryAccess() {} ExecutorProcessControl::~ExecutorProcessControl() {} +Error ExecutorProcessControl::associateJITSideWrapperFunctions( + JITDylib &JD, WrapperFunctionAssociationMap WFs) { + + // Look up tag addresses. + auto &ES = JD.getExecutionSession(); + auto TagAddrs = + ES.lookup({{&JD, JITDylibLookupFlags::MatchAllSymbols}}, + SymbolLookupSet::fromMapKeys( + WFs, SymbolLookupFlags::WeaklyReferencedSymbol)); + if (!TagAddrs) + return TagAddrs.takeError(); + + // Associate tag addresses with implementations. + std::lock_guard Lock(TagToFuncMapMutex); + for (auto &KV : *TagAddrs) { + auto TagAddr = KV.second.getAddress(); + if (TagToFunc.count(TagAddr)) + return make_error("Tag " + formatv("{0:x16}", TagAddr) + + " (for " + *KV.first + + ") already registered", + inconvertibleErrorCode()); + auto I = WFs.find(KV.first); + assert(I != WFs.end() && I->second && + "AsyncWrapperFunction implementation missing"); + TagToFunc[KV.second.getAddress()] = + std::make_shared(std::move(I->second)); + } + return Error::success(); +} + +void ExecutorProcessControl::runJITSideWrapperFunction( + SendResultFunction SendResult, JITTargetAddress TagAddr, + ArrayRef ArgBuffer) { + + std::shared_ptr F; + { + std::lock_guard Lock(TagToFuncMapMutex); + auto I = TagToFunc.find(TagAddr); + if (I != TagToFunc.end()) + F = I->second; + } + + if (F) + (*F)(std::move(SendResult), ArgBuffer.data(), ArgBuffer.size()); + else + SendResult(shared::WrapperFunctionResult::createOutOfBandError( + ("No function registered for tag " + formatv("{0:x16}", TagAddr)) + .str())); +} + SelfExecutorProcessControl::SelfExecutorProcessControl( std::shared_ptr SSP, Triple TargetTriple, unsigned PageSize, std::unique_ptr MemMgr) @@ -102,13 +151,13 @@ SelfExecutorProcessControl::runAsMain(JITTargetAddress MainFnAddr, return orc::runAsMain(jitTargetAddressToFunction(MainFnAddr), Args); } -Expected -SelfExecutorProcessControl::runWrapper(JITTargetAddress WrapperFnAddr, - ArrayRef ArgBuffer) { - using WrapperFnTy = shared::detail::CWrapperFunctionResult (*)( - const char *Data, uint64_t Size); +void SelfExecutorProcessControl::runWrapperAsync(SendResultFunction SendResult, + JITTargetAddress WrapperFnAddr, + ArrayRef ArgBuffer) { + using WrapperFnTy = + shared::detail::CWrapperFunctionResult (*)(const char *Data, size_t Size); auto *WrapperFn = jitTargetAddressToFunction(WrapperFnAddr); - return WrapperFn(ArgBuffer.data(), ArgBuffer.size()); + SendResult(WrapperFn(ArgBuffer.data(), ArgBuffer.size())); } Error SelfExecutorProcessControl::disconnect() { return Error::success(); } diff --git a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt index b1cfd18e5d4e5..b544cfa1864e8 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/unittests/ExecutionEngine/Orc/CMakeLists.txt @@ -16,6 +16,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(OrcJITTests CoreAPIsTest.cpp + ExecutorProcessControlTest.cpp IndirectionUtilsTest.cpp JITTargetMachineBuilderTest.cpp LazyCallThroughAndReexportsTest.cpp diff --git a/llvm/unittests/ExecutionEngine/Orc/ExecutorProcessControlTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ExecutorProcessControlTest.cpp new file mode 100644 index 0000000000000..23096c86f4d33 --- /dev/null +++ b/llvm/unittests/ExecutionEngine/Orc/ExecutorProcessControlTest.cpp @@ -0,0 +1,105 @@ +//===- ExecutorProcessControlTest.cpp - Test ExecutorProcessControl utils -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/Support/MSVCErrorWorkarounds.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +#include + +using namespace llvm; +using namespace llvm::orc; +using namespace llvm::orc::shared; + +static llvm::orc::shared::detail::CWrapperFunctionResult +addWrapper(const char *ArgData, size_t ArgSize) { + return WrapperFunction::handle( + ArgData, ArgSize, [](int32_t X, int32_t Y) { return X + Y; }) + .release(); +} + +static void addAsyncWrapper(unique_function SendResult, + int32_t X, int32_t Y) { + SendResult(X + Y); +} + +TEST(ExecutorProcessControl, RunWrapperTemplate) { + auto EPC = cantFail( + SelfExecutorProcessControl::Create(std::make_shared())); + + int32_t Result; + EXPECT_THAT_ERROR(EPC->runSPSWrapper( + pointerToJITTargetAddress(addWrapper), Result, 2, 3), + Succeeded()); + EXPECT_EQ(Result, 5); +} + +TEST(ExecutorProcessControl, RunWrapperAsyncTemplate) { + auto EPC = cantFail( + SelfExecutorProcessControl::Create(std::make_shared())); + + std::promise> RP; + using Sig = int32_t(int32_t, int32_t); + EPC->runSPSWrapperAsync( + [&](Error SerializationErr, int32_t R) { + if (SerializationErr) + RP.set_value(std::move(SerializationErr)); + RP.set_value(std::move(R)); + }, + pointerToJITTargetAddress(addWrapper), 2, 3); + Expected Result = RP.get_future().get(); + EXPECT_THAT_EXPECTED(Result, HasValue(5)); +} + +TEST(ExecutorProcessControl, RegisterAsyncHandlerAndRun) { + + constexpr JITTargetAddress AddAsyncTagAddr = 0x01; + + auto EPC = cantFail( + SelfExecutorProcessControl::Create(std::make_shared())); + ExecutionSession ES(EPC->getSymbolStringPool()); + auto &JD = ES.createBareJITDylib("JD"); + + auto AddAsyncTag = ES.intern("addAsync_tag"); + cantFail(JD.define(absoluteSymbols( + {{AddAsyncTag, + JITEvaluatedSymbol(AddAsyncTagAddr, JITSymbolFlags::Exported)}}))); + + ExecutorProcessControl::WrapperFunctionAssociationMap Associations; + + Associations[AddAsyncTag] = + EPC->wrapAsyncWithSPS(addAsyncWrapper); + + cantFail(EPC->associateJITSideWrapperFunctions(JD, std::move(Associations))); + + std::promise RP; + auto RF = RP.get_future(); + + using ArgSerialization = SPSArgList; + size_t ArgBufferSize = ArgSerialization::size(1, 2); + WrapperFunctionResult ArgBuffer; + char *ArgBufferData = + WrapperFunctionResult::allocate(ArgBuffer, ArgBufferSize); + SPSOutputBuffer OB(ArgBufferData, ArgBufferSize); + EXPECT_TRUE(ArgSerialization::serialize(OB, 1, 2)); + + EPC->runJITSideWrapperFunction( + [&](WrapperFunctionResult ResultBuffer) { + int32_t Result; + SPSInputBuffer IB(ResultBuffer.data(), ResultBuffer.size()); + EXPECT_TRUE(SPSArgList::deserialize(IB, Result)); + RP.set_value(Result); + }, + AddAsyncTagAddr, ArrayRef(ArgBuffer.data(), ArgBuffer.size())); + + EXPECT_EQ(RF.get(), (int32_t)3); + + cantFail(ES.endSession()); +} diff --git a/llvm/unittests/ExecutionEngine/Orc/WrapperFunctionUtilsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/WrapperFunctionUtilsTest.cpp index 1f177b4c2d143..42051836506fb 100644 --- a/llvm/unittests/ExecutionEngine/Orc/WrapperFunctionUtilsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/WrapperFunctionUtilsTest.cpp @@ -7,8 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" +#include "llvm/ADT/FunctionExtras.h" #include "gtest/gtest.h" +#include + using namespace llvm; using namespace llvm::orc::shared; @@ -65,13 +68,54 @@ static WrapperFunctionResult addWrapper(const char *ArgData, size_t ArgSize) { ArgData, ArgSize, [](int32_t X, int32_t Y) -> int32_t { return X + Y; }); } -TEST(WrapperFunctionUtilsTest, WrapperFunctionCallVoidNoopAndHandle) { +TEST(WrapperFunctionUtilsTest, WrapperFunctionCallAndHandleVoid) { EXPECT_FALSE(!!WrapperFunction::call(voidNoopWrapper)); } -TEST(WrapperFunctionUtilsTest, WrapperFunctionCallAndHandle) { +TEST(WrapperFunctionUtilsTest, WrapperFunctionCallAndHandleRet) { int32_t Result; EXPECT_FALSE(!!WrapperFunction::call( addWrapper, Result, 1, 2)); EXPECT_EQ(Result, (int32_t)3); } + +static void voidNoopAsync(unique_function SendResult) { + SendResult(SPSEmpty()); +} + +static WrapperFunctionResult voidNoopAsyncWrapper(const char *ArgData, + size_t ArgSize) { + std::promise RP; + auto RF = RP.get_future(); + + WrapperFunction::handleAsync( + ArgData, ArgSize, voidNoopAsync, + [&](WrapperFunctionResult R) { RP.set_value(std::move(R)); }); + + return RF.get(); +} + +static WrapperFunctionResult addAsyncWrapper(const char *ArgData, + size_t ArgSize) { + std::promise RP; + auto RF = RP.get_future(); + + WrapperFunction::handleAsync( + ArgData, ArgSize, + [](unique_function SendResult, int32_t X, int32_t Y) { + SendResult(X + Y); + }, + [&](WrapperFunctionResult R) { RP.set_value(std::move(R)); }); + return RF.get(); +} + +TEST(WrapperFunctionUtilsTest, WrapperFunctionCallAndHandleAsyncVoid) { + EXPECT_FALSE(!!WrapperFunction::call(voidNoopAsyncWrapper)); +} + +TEST(WrapperFunctionUtilsTest, WrapperFunctionCallAndHandleAsyncRet) { + int32_t Result; + EXPECT_FALSE(!!WrapperFunction::call( + addAsyncWrapper, Result, 1, 2)); + EXPECT_EQ(Result, (int32_t)3); +} From c1f068b8f1d5f6ca75acc07fdedf2e7416e834ec Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Thu, 1 Jul 2021 08:27:31 +0000 Subject: [PATCH 391/619] [flang] Revert "PoC for Flang Driver Plugins" This patch has not been reviewed and was commited by accident. This reverts commit 788a5d4afe6407e647454a9832a7b4a27fba06bf. --- clang/include/clang/Driver/Options.td | 10 ++- flang/CMakeLists.txt | 1 + flang/examples/CMakeLists.txt | 2 - flang/examples/HelloWorld/CMakeLists.txt | 12 ---- .../examples/HelloWorld/HelloWorldPlugin.cpp | 18 ----- .../include/flang/Frontend/FrontendActions.h | 4 -- .../include/flang/Frontend/FrontendOptions.h | 11 +-- .../flang/Frontend/FrontendPluginRegistry.h | 26 ------- flang/lib/Frontend/CompilerInvocation.cpp | 17 ----- flang/lib/Frontend/FrontendAction.cpp | 4 -- flang/lib/Frontend/FrontendActions.cpp | 4 -- .../ExecuteCompilerInvocation.cpp | 67 ------------------- llvm/include/llvm/Support/Registry.h | 10 --- llvm/lib/Support/DynamicLibrary.cpp | 6 -- 14 files changed, 6 insertions(+), 186 deletions(-) delete mode 100644 flang/examples/HelloWorld/CMakeLists.txt delete mode 100644 flang/examples/HelloWorld/HelloWorldPlugin.cpp delete mode 100644 flang/include/flang/Frontend/FrontendPluginRegistry.h diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1629a74ae62c9..41b7299b02745 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5221,6 +5221,10 @@ def enable_noundef_analysis : Flag<["-"], "enable-noundef-analysis">, Group, HelpText<"Discard value names in LLVM IR">, MarshallingInfoFlag>; +def load : Separate<["-"], "load">, MetaVarName<"">, + HelpText<"Load the named plugin (dynamic shared object)">; +def plugin : Separate<["-"], "plugin">, MetaVarName<"">, + HelpText<"Use the named plugin action instead of the default action (use \"help\" to list available options)">; def plugin_arg : JoinedAndSeparate<["-"], "plugin-arg-">, MetaVarName<" ">, HelpText<"Pass to plugin ">; @@ -5784,12 +5788,6 @@ def init_only : Flag<["-"], "init-only">, HelpText<"Only execute frontend initialization">; } // let Group = Action_Group - -def load : Separate<["-"], "load">, MetaVarName<"">, - HelpText<"Load the named plugin (dynamic shared object)">; -def plugin : Separate<["-"], "plugin">, MetaVarName<"">, - HelpText<"Use the named plugin action instead of the default action (use \"help\" to list available options)">; - } // let Flags = [CC1Option, FC1Option, NoDriverOption] //===----------------------------------------------------------------------===// diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index d9495c4748ce5..adf3dc819dc93 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -390,6 +390,7 @@ endif() include(CMakeParseArguments) include(AddFlang) + add_subdirectory(include) add_subdirectory(lib) add_subdirectory(cmake/modules) diff --git a/flang/examples/CMakeLists.txt b/flang/examples/CMakeLists.txt index f896874f7f89b..3ca9feddf33e9 100644 --- a/flang/examples/CMakeLists.txt +++ b/flang/examples/CMakeLists.txt @@ -6,5 +6,3 @@ add_executable(external-hello-world target_link_libraries(external-hello-world FortranRuntime ) -#add_subdirectory(HelloEarth) -add_subdirectory(HelloWorld) diff --git a/flang/examples/HelloWorld/CMakeLists.txt b/flang/examples/HelloWorld/CMakeLists.txt deleted file mode 100644 index eb716c2f17b49..0000000000000 --- a/flang/examples/HelloWorld/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -add_llvm_library( - flangHelloWorldPlugin - MODULE - HelloWorldPlugin.cpp - - DEPENDS - clangBasic - - LINK_COMPONENTS - Option - Support -) diff --git a/flang/examples/HelloWorld/HelloWorldPlugin.cpp b/flang/examples/HelloWorld/HelloWorldPlugin.cpp deleted file mode 100644 index 30d23ce104227..0000000000000 --- a/flang/examples/HelloWorld/HelloWorldPlugin.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "flang/Frontend/FrontendActions.h" -#include "flang/Frontend/FrontendPluginRegistry.h" - -__attribute__((constructor)) -static void printing() { - llvm::outs() << " > Plugin Constructed\n"; -} - -using namespace Fortran::frontend; - -class HelloWorldFlangPlugin : public PluginParseTreeAction -{ - void ExecuteAction() override { - llvm::outs() << "Hello World from your new plugin (Remote plugin)\n"; - } -}; - -static FrontendPluginRegistry::Add X("-hello-w", "Hello World Plugin example"); diff --git a/flang/include/flang/Frontend/FrontendActions.h b/flang/include/flang/Frontend/FrontendActions.h index d30ae1dbed0ff..72eb44223fe49 100644 --- a/flang/include/flang/Frontend/FrontendActions.h +++ b/flang/include/flang/Frontend/FrontendActions.h @@ -30,10 +30,6 @@ struct MeasurementVisitor { // Custom Consumer Actions //===----------------------------------------------------------------------===// -class PluginParseTreeAction : public FrontendAction { - void ExecuteAction() override; -}; - class InputOutputTestAction : public FrontendAction { void ExecuteAction() override; }; diff --git a/flang/include/flang/Frontend/FrontendOptions.h b/flang/include/flang/Frontend/FrontendOptions.h index 5867b790f6fce..42ce499566e9f 100644 --- a/flang/include/flang/Frontend/FrontendOptions.h +++ b/flang/include/flang/Frontend/FrontendOptions.h @@ -77,10 +77,7 @@ enum ActionKind { GetSymbolsSources, /// Only execute frontend initialization - InitOnly, - - /// Run a plugin action, \see ActionName. - PluginAction + InitOnly /// TODO: RunPreprocessor, EmitLLVM, EmitLLVMOnly, /// EmitCodeGenOnly, EmitAssembly, (...) @@ -252,12 +249,6 @@ class FrontendOptions { // Source file encoding Fortran::parser::Encoding encoding_{Fortran::parser::Encoding::UTF_8}; - /// The list of plugins to load. - std::vector plugins; - - /// The name of the action to run when using a plugin action. - std::string ActionName; - public: FrontendOptions() : showHelp_(false), showVersion_(false), instrumentedParse_(false), diff --git a/flang/include/flang/Frontend/FrontendPluginRegistry.h b/flang/include/flang/Frontend/FrontendPluginRegistry.h deleted file mode 100644 index d775393afe7f6..0000000000000 --- a/flang/include/flang/Frontend/FrontendPluginRegistry.h +++ /dev/null @@ -1,26 +0,0 @@ -//===- FrontendPluginRegistry.h ---------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Pluggable Frontend Action Interface -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FLANG_FRONTEND_FRONTENDPLUGINREGISTRY_H -#define LLVM_FLANG_FRONTEND_FRONTENDPLUGINREGISTRY_H - -#include "flang/Frontend/FrontendAction.h" -#include "llvm/Support/Registry.h" - -namespace Fortran::frontend { - -/// The frontend plugin registry. -using FrontendPluginRegistry = llvm::Registry; - -} // namespace flang - -#endif // LLVM_FLANG_FRONTEND_FRONTENDPLUGINREGISTRY_H \ No newline at end of file diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 20d3d2d253a75..6a2bf1947e350 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -199,23 +199,6 @@ static bool ParseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, } } - printf("--------- (ParseFrontendArgs) ----------\n"); - - if (llvm::opt::Arg *a = args.getLastArg(clang::driver::options::OPT_load)) { - llvm::outs() << " " << a->getOption().getName() << " >>> " << a->getValue() << "\n"; - //opts.plugins = a->getValue(); - opts.plugins.push_back(a->getValue()); - } - - if (const llvm::opt::Arg *a = args.getLastArg(clang::driver::options::OPT_plugin)) { - llvm::outs() << " " << a->getOption().getName() << " >>> " << a->getValue() << "\n"; - //opts.plugins.emplace_back(a->getValue()); - opts.programAction_ = PluginAction; - opts.ActionName = a->getValue(); - } - - printf("-------- (\\ParseFrontendArgs) ----------\n"); - opts.outputFile_ = args.getLastArgValue(clang::driver::options::OPT_o); opts.showHelp_ = args.hasArg(clang::driver::options::OPT_help); opts.showVersion_ = args.hasArg(clang::driver::options::OPT_version); diff --git a/flang/lib/Frontend/FrontendAction.cpp b/flang/lib/Frontend/FrontendAction.cpp index 62d4bac0df6fa..23e4ca3f33063 100644 --- a/flang/lib/Frontend/FrontendAction.cpp +++ b/flang/lib/Frontend/FrontendAction.cpp @@ -10,7 +10,6 @@ #include "flang/Frontend/CompilerInstance.h" #include "flang/Frontend/FrontendActions.h" #include "flang/Frontend/FrontendOptions.h" -#include "flang/Frontend/FrontendPluginRegistry.h" #include "flang/FrontendTool/Utils.h" #include "clang/Basic/DiagnosticFrontend.h" #include "llvm/Support/Errc.h" @@ -18,9 +17,6 @@ using namespace Fortran::frontend; -LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry) - - void FrontendAction::set_currentInput(const FrontendInputFile ¤tInput) { this->currentInput_ = currentInput; } diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index d067ebd141b85..8ee42d73c6e46 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -488,7 +488,3 @@ void InitOnlyAction::ExecuteAction() { "Use `-init-only` for testing purposes only"); ci.diagnostics().Report(DiagID); } - -void PluginParseTreeAction::ExecuteAction() { - -} diff --git a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 8ef0c93404c10..243e25163dc05 100644 --- a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -13,14 +13,11 @@ #include "flang/Frontend/CompilerInstance.h" #include "flang/Frontend/FrontendActions.h" -#include "flang/Frontend/FrontendAction.h" -#include "flang/Frontend/FrontendPluginRegistry.h" #include "clang/Driver/Options.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/DynamicLibrary.h" namespace Fortran::frontend { @@ -82,22 +79,6 @@ static std::unique_ptr CreateFrontendBaseAction( case InitOnly: return std::make_unique(); break; - case PluginAction: { - llvm::outs() << "---------- (case: PluginAction) --------\n"; - llvm::outs() << " Plugin Action: " << ci.frontendOpts().ActionName << "\n"; - for (const FrontendPluginRegistry::entry &plugin : FrontendPluginRegistry::entries()) { - llvm::outs() << " " << plugin.getName() << "\t-- " << plugin.getDesc() << "\n"; - if (plugin.getName() == ci.frontendOpts().ActionName) { - llvm::outs() << "We have found the plugin name!! :-)\n"; - std::unique_ptr P(plugin.instantiate()); - return std::move(P); - } - } - - unsigned diagID = ci.diagnostics().getCustomDiagID(clang::DiagnosticsEngine::Error, "unable to find plugin '%0'"); - ci.diagnostics().Report(diagID) << ci.frontendOpts().ActionName; - return nullptr; - } default: break; // TODO: @@ -111,26 +92,6 @@ static std::unique_ptr CreateFrontendBaseAction( return 0; } -/// <<< TEMP Plugin Example - -class HelloWorldFlangPlugin : public PluginParseTreeAction -{ - protected: - void ExecuteAction() override { - llvm::outs() << "Hello World from your new plugin (Hello World)\n"; - } -}; - -class HelloTwoFlangPlugin : public PluginParseTreeAction -{ - protected: - void ExecuteAction() override { - llvm::outs() << "Hello World from your new plugin (Hello Two)\n"; - } -}; - -/// <<<<< TEMP Plugin Example - std::unique_ptr CreateFrontendAction(CompilerInstance &ci) { // Create the underlying action. std::unique_ptr act = CreateFrontendBaseAction(ci); @@ -139,7 +100,6 @@ std::unique_ptr CreateFrontendAction(CompilerInstance &ci) { return act; } - bool ExecuteCompilerInvocation(CompilerInstance *flang) { // Honor -help. if (flang->frontendOpts().showHelp_) { @@ -157,33 +117,6 @@ bool ExecuteCompilerInvocation(CompilerInstance *flang) { return true; } - llvm::outs() << "------ (ExecuteCompilerInvocation) -----\n"; - - // Load any requested plugins. - for (const std::string &Path : flang->frontendOpts().plugins) { - llvm::outs() << " Load :: Path >> " << Path << "\n"; - std::string Error; - if (llvm::sys::DynamicLibrary::LoadLibraryPermanently(Path.c_str(), &Error)) { - unsigned diagID = flang->diagnostics().getCustomDiagID(clang::DiagnosticsEngine::Error, "unable to load plugin '%0': '%1'"); - flang->diagnostics().Report(diagID) << Path << Error; - } - } - - llvm::outs() << " Plugin Registry List >>\n"; - for (const FrontendPluginRegistry::entry &plugin : FrontendPluginRegistry::entries()) { - llvm::outs() << plugin.getName() << " -- " << plugin.getDesc() << "\n"; - } - llvm::outs() << " << Plugin Registry List\n"; - - static FrontendPluginRegistry::Add X("-hello-wor", "simple Plugin example"); - static FrontendPluginRegistry::Add Y("hellotwo", "another print plugin example"); - - llvm::outs() << "----- (\\ExecuteCompilerInvocation) -----\n"; - - // If there were errors in processing arguments, don't do anything else. - if (flang->diagnostics().hasErrorOccurred()) - return false; - // Create and execute the frontend action. std::unique_ptr act(CreateFrontendAction(*flang)); if (!act) diff --git a/llvm/include/llvm/Support/Registry.h b/llvm/include/llvm/Support/Registry.h index 58b1825a71c06..5bb6a254a47f4 100644 --- a/llvm/include/llvm/Support/Registry.h +++ b/llvm/include/llvm/Support/Registry.h @@ -18,7 +18,6 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/CommandLine.h" #include namespace llvm { @@ -121,10 +120,7 @@ namespace llvm { public: Add(StringRef Name, StringRef Desc) : Entry(Name, Desc, CtorFn), Node(Entry) { - llvm::outs() << " -------------- (Registry) --------------\n"; - llvm::outs() << " \tAdd :: " << Name << "\n"; add_node(&Node); - llvm::outs() << " ------------- (\\Registry) --------------\n"; } }; }; @@ -149,14 +145,8 @@ namespace llvm { else \ Head = N; \ Tail = N; \ - llvm::outs() << " REGISTRY_CLASS :: list >> \n"; \ - for (const REGISTRY_CLASS::entry &plugin : REGISTRY_CLASS::entries()) { \ - llvm::outs() << " " << plugin.getName() << " \t-- " << plugin.getDesc() << "\n"; \ - } \ } \ template typename Registry::iterator Registry::begin() { \ - llvm::outs() << "iterator (head)\n"; \ - llvm::outs() << Head << "\n"; \ return iterator(Head); \ } \ template REGISTRY_CLASS::node *Registry::Head; \ diff --git a/llvm/lib/Support/DynamicLibrary.cpp b/llvm/lib/Support/DynamicLibrary.cpp index ff989da453652..2bcdbdcdb9b0d 100644 --- a/llvm/lib/Support/DynamicLibrary.cpp +++ b/llvm/lib/Support/DynamicLibrary.cpp @@ -149,18 +149,12 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName, // ManagedStatic can be added from static constructors in HandleSet::DLOpen. HandleSet& HS = *OpenedHandles; - printf(" -------- (getPermantentLibrary) --------\n"); - printf(" get Lib: %s \n", FileName); - void *Handle = HandleSet::DLOpen(FileName, Err); if (Handle != &Invalid) { - printf(" Handle != Invalid \n"); SmartScopedLock Lock(*SymbolsMutex); HS.AddLibrary(Handle, /*IsProcess*/ FileName == nullptr); } - printf(" ------- (\\getPermantentLibrary) --------\n"); - return DynamicLibrary(Handle); } From 820ced136280474a6702a10e9dfb35fdef1a0e6f Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 1 Jul 2021 08:31:00 +0000 Subject: [PATCH 392/619] [gn build] Port 39f64c4c8375 --- .../gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn index e73ed3887f357..3d66a8170ef79 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn @@ -14,6 +14,7 @@ unittest("OrcJITTests") { ] sources = [ "CoreAPIsTest.cpp", + "ExecutorProcessControlTest.cpp", "IndirectionUtilsTest.cpp", "JITTargetMachineBuilderTest.cpp", "LazyCallThroughAndReexportsTest.cpp", From aa454dda2eed4e71081bc57b1f32dfce2486b177 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Thu, 1 Jul 2021 10:54:22 +0200 Subject: [PATCH 393/619] [analyzer] LValueToRValueBitCasts should evaluate to an r-value Previously `LValueToRValueBitCast`s were modeled in the same way how a regular `BitCast` was. However, this should not produce an l-value. Modeling bitcasts accurately is tricky, so it's probably better to model this expression by binding a fresh conjured value. The following code should not result in a diagnostic: ```lang=C++ __attribute__((always_inline)) static inline constexpr unsigned int_castf32_u32(float __A) { return __builtin_bit_cast(unsigned int, __A); // no-warning } ``` Previously, it reported `Address of stack memory associated with local variable '__A' returned to caller [core.StackAddressEscape]`. Differential Revision: https://reviews.llvm.org/D105017 Reviewed by: NoQ, vsavchenko --- clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 5 +-- clang/test/Analysis/builtin_bitcast.cpp | 32 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 clang/test/Analysis/builtin_bitcast.cpp diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index a1c8128f1650a..bf3f8dfd0877a 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -304,7 +304,8 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, ExplodedNodeSet dstPreStmt; getCheckerManager().runCheckersForPreStmt(dstPreStmt, Pred, CastE, *this); - if (CastE->getCastKind() == CK_LValueToRValue) { + if (CastE->getCastKind() == CK_LValueToRValue || + CastE->getCastKind() == CK_LValueToRValueBitCast) { for (ExplodedNodeSet::iterator I = dstPreStmt.begin(), E = dstPreStmt.end(); I!=E; ++I) { ExplodedNode *subExprNode = *I; @@ -332,6 +333,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, switch (CastE->getCastKind()) { case CK_LValueToRValue: + case CK_LValueToRValueBitCast: llvm_unreachable("LValueToRValue casts handled earlier."); case CK_ToVoid: continue; @@ -380,7 +382,6 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_Dependent: case CK_ArrayToPointerDecay: case CK_BitCast: - case CK_LValueToRValueBitCast: case CK_AddressSpaceConversion: case CK_BooleanToSignedIntegral: case CK_IntegralToPointer: diff --git a/clang/test/Analysis/builtin_bitcast.cpp b/clang/test/Analysis/builtin_bitcast.cpp new file mode 100644 index 0000000000000..396e7caa45f6a --- /dev/null +++ b/clang/test/Analysis/builtin_bitcast.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_analyze_cc1 -triple x86_64-unknown-unknown -verify %s \ +// RUN: -analyzer-checker=core,debug.ExprInspection + +template void clang_analyzer_dump(T); + +__attribute__((always_inline)) static inline constexpr unsigned int _castf32_u32(float __A) { + return __builtin_bit_cast(unsigned int, __A); // no-warning +} + +void test(int i) { + _castf32_u32(42); + + float f = 42; + + // Loading from a floating point value results in unknown, + // which later materializes as a conjured value. + auto g = __builtin_bit_cast(unsigned int, f); + clang_analyzer_dump(g); + // expected-warning-re@-1 {{{{^conj_\$[0-9]+{unsigned int,}}}} + + auto g2 = __builtin_bit_cast(unsigned int, 42.0f); + clang_analyzer_dump(g2); + // expected-warning-re@-1 {{{{^conj_\$[0-9]+{unsigned int,}}}} + + auto g3 = __builtin_bit_cast(unsigned int, i); + clang_analyzer_dump(g3); + // expected-warning-re@-1 {{{{^reg_\$[0-9]+}}}} + + auto g4 = __builtin_bit_cast(unsigned long, &i); + clang_analyzer_dump(g4); + // expected-warning@-1 {{&i [as 64 bit integer]}} +} From 55662b24a4755680e4fd34dc22a3dd555fdb1291 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Thu, 1 Jul 2021 10:54:28 +0200 Subject: [PATCH 394/619] [analyzer][NFC] Inline ExprEngine::handleLVectorSplat() It seems like ExprEngine::handleLVectorSplat() was used at only 2 places. It might be better to directly inline them for readability. It seems like these cases were not covered by tests according to my coverage measurement, so I'm adding tests as well, demonstrating that no behavior changed. Besides that, I'm handling CK_MatrixCast similarly to how the rest of the unhandled casts are evaluated. Differential Revision: https://reviews.llvm.org/D105125 Reviewed by: NoQ --- clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 33 ++++++------------- clang/test/Analysis/casts.c | 29 +++++++++++++--- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index bf3f8dfd0877a..7ad3dca831ac4 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -282,22 +282,6 @@ ProgramStateRef ExprEngine::handleLValueBitCast( return state; } -ProgramStateRef ExprEngine::handleLVectorSplat( - ProgramStateRef state, const LocationContext* LCtx, const CastExpr* CastE, - StmtNodeBuilder &Bldr, ExplodedNode* Pred) { - // Recover some path sensitivity by conjuring a new value. - QualType resultType = CastE->getType(); - if (CastE->isGLValue()) - resultType = getContext().getPointerType(resultType); - SVal result = svalBuilder.conjureSymbolVal(nullptr, CastE, LCtx, - resultType, - currBldrCtx->blockCount()); - state = state->BindExpr(CastE, LCtx, result); - Bldr.generateNode(CastE, Pred, state); - - return state; -} - void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, ExplodedNode *Pred, ExplodedNodeSet &Dst) { @@ -535,17 +519,20 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, continue; } // Explicitly proceed with default handler for this case cascade. - state = handleLVectorSplat(state, LCtx, CastE, Bldr, Pred); - continue; } + LLVM_FALLTHROUGH; // Various C++ casts that are not handled yet. case CK_ToUnion: + case CK_MatrixCast: case CK_VectorSplat: { - state = handleLVectorSplat(state, LCtx, CastE, Bldr, Pred); - continue; - } - case CK_MatrixCast: { - // TODO: Handle MatrixCast here. + QualType resultType = CastE->getType(); + if (CastE->isGLValue()) + resultType = getContext().getPointerType(resultType); + SVal result = svalBuilder.conjureSymbolVal( + /*symbolTag=*/nullptr, CastE, LCtx, resultType, + currBldrCtx->blockCount()); + state = state->BindExpr(CastE, LCtx, result); + Bldr.generateNode(CastE, Pred, state); continue; } } diff --git a/clang/test/Analysis/casts.c b/clang/test/Analysis/casts.c index 6b9108ac6bb03..ce195297874b1 100644 --- a/clang/test/Analysis/casts.c +++ b/clang/test/Analysis/casts.c @@ -1,7 +1,7 @@ -// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin9 -analyzer-checker=core,alpha.core,debug.ExprInspection -analyzer-store=region -Wno-pointer-to-int-cast -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin9 -analyzer-checker=core,alpha.core,debug.ExprInspection -analyzer-store=region -Wno-pointer-to-int-cast -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin9 -analyzer-checker=core,alpha.core,debug.ExprInspection -Wno-pointer-to-int-cast -verify -DEAGERLY_ASSUME=1 -w %s -// RUN: %clang_analyze_cc1 -triple i386-apple-darwin9 -analyzer-checker=core,alpha.core,debug.ExprInspection -Wno-pointer-to-int-cast -verify -DEAGERLY_ASSUME=1 -DBIT32=1 -w %s +// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin9 -fenable-matrix -analyzer-checker=core,alpha.core,debug.ExprInspection -analyzer-store=region -Wno-pointer-to-int-cast -verify -analyzer-config eagerly-assume=false %s +// RUN: %clang_analyze_cc1 -triple i386-apple-darwin9 -fenable-matrix -analyzer-checker=core,alpha.core,debug.ExprInspection -analyzer-store=region -Wno-pointer-to-int-cast -verify -analyzer-config eagerly-assume=false %s +// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin9 -fenable-matrix -analyzer-checker=core,alpha.core,debug.ExprInspection -Wno-pointer-to-int-cast -verify -DEAGERLY_ASSUME=1 -w %s +// RUN: %clang_analyze_cc1 -triple i386-apple-darwin9 -fenable-matrix -analyzer-checker=core,alpha.core,debug.ExprInspection -Wno-pointer-to-int-cast -verify -DEAGERLY_ASSUME=1 -DBIT32=1 -w %s extern void clang_analyzer_eval(_Bool); @@ -193,6 +193,27 @@ void testSwitchWithSizeofs() { } } +void test_ToUnion_cast(unsigned long long x) { + union Key { + unsigned long long data; + }; + void clang_analyzer_dump_union(union Key); + clang_analyzer_dump_union((union Key)x); // expected-warning {{Unknown}} +} + +typedef char cx5x5 __attribute__((matrix_type(5, 5))); +typedef int ix5x5 __attribute__((matrix_type(5, 5))); +void test_MatrixCast_cast(cx5x5 c) { + void clang_analyzer_dump_ix5x5(ix5x5); + clang_analyzer_dump_ix5x5((ix5x5)c); // expected-warning {{Unknown}} +} + +void test_VectorSplat_cast(long x) { + typedef int __attribute__((ext_vector_type(2))) V; + void clang_analyzer_dump_V(V); + clang_analyzer_dump_V((V)x); // expected-warning {{Unknown}} +} + #endif #ifdef EAGERLY_ASSUME From dc4299a7f3ad7e4fa3c310d585de4e46bde58d16 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 1 Jul 2021 09:48:07 +0100 Subject: [PATCH 395/619] [BasicAA] Fix typo ScaleForGDC -> ScaleForGCD. --- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index d79df622ee501..357772c9c4f2e 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1148,15 +1148,15 @@ AliasResult BasicAAResult::aliasGEP( bool AllNonPositive = DecompGEP1.Offset.isNonPositive(); for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { APInt Scale = DecompGEP1.VarIndices[i].Scale; - APInt ScaleForGDC = DecompGEP1.VarIndices[i].Scale; + APInt ScaleForGCD = DecompGEP1.VarIndices[i].Scale; if (!DecompGEP1.VarIndices[i].IsNSW) - ScaleForGDC = APInt::getOneBitSet(Scale.getBitWidth(), + ScaleForGCD = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countTrailingZeros()); if (i == 0) - GCD = ScaleForGDC.abs(); + GCD = ScaleForGCD.abs(); else - GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGDC.abs()); + GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs()); if (AllNonNegative || AllNonPositive) { // If the Value could change between cycles, then any reasoning about From c32186038d6c581dcf5d12e16c47d003cd6fafff Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Thu, 1 Jul 2021 09:22:42 +0000 Subject: [PATCH 396/619] [PowerPC] add a testcase for byval parameter; NFC --- llvm/test/CodeGen/PowerPC/byval.ll | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/byval.ll diff --git a/llvm/test/CodeGen/PowerPC/byval.ll b/llvm/test/CodeGen/PowerPC/byval.ll new file mode 100644 index 0000000000000..c67cc1022695f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/byval.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" +target triple = "powerpc64le-unknown-linux-gnu" + +%struct = type { [4 x i32], [20 x i8] } + +declare dso_local i32 @foo1(%struct* byval(%struct) %var) +declare dso_local void @foo(%struct* %var) + +; FIXME: for the byval parameter %x, now the memory for local variable and +; for parameter save area are overlap. +; For the below case, +; the local variable space is r1 + 40 ~ r1 + 76 +; the parameter save area is r1 + 32 ~ r1 + 68 + +define dso_local i32 @bar() { +; CHECK-LABEL: bar: +; CHECK: addi 30, 1, 40 +; CHECK: li 3, 16 +; CHECK: lxvd2x 0, 30, 3 +; CHECK: li 3, 48 +; CHECK: stxvd2x 0, 1, 3 +; CHECK: li 3, 32 +; CHECK: lxvd2x 0, 0, 30 +; CHECK: stxvd2x 0, 1, 3 +; CHECK: lwz 3, 72(1) +; CHECK: stw 3, 64(1) +entry: + %x = alloca %struct, align 4 + call void @foo(%struct* %x) + %r = call i32 @foo1(%struct* byval(%struct) %x) + ret i32 %r +} From f9937106b7171eb1f4f8914e29c2be0c36ebc46d Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Thu, 1 Jul 2021 10:45:59 +0100 Subject: [PATCH 397/619] [clang-format] PR50727 C# Invoke Lamda Expression indentation incorrect https://bugs.llvm.org/show_bug.cgi?id=50727 When processing C# Lambda expression in the indentation can goes a little wrong, resulting the the closing } being at the wrong indentation level and meaning the remaining part of the file is incorrectly indented. This can be a fairly common pattern for when C# wants to peform a UI action from a thread, and it wants to invoke that action on the main thread Reviewed By: exv, jbcoe Differential Revision: https://reviews.llvm.org/D104388 --- clang/lib/Format/UnwrappedLineParser.cpp | 22 +++- clang/unittests/Format/FormatTestCSharp.cpp | 116 ++++++++++++++++++++ 2 files changed, 133 insertions(+), 5 deletions(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 45ff319b5841d..f76cb4d341a22 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1482,8 +1482,8 @@ void UnwrappedLineParser::parseStructuralElement() { } case tok::equal: // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType - // TT_FatArrow. The always start an expression or a child block if - // followed by a curly. + // TT_FatArrow. They always start an expression or a child block if + // followed by a curly brace. if (FormatTok->is(TT_FatArrow)) { nextToken(); if (FormatTok->is(tok::l_brace)) { @@ -1790,14 +1790,20 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, bool HasError = false; // FIXME: Once we have an expression parser in the UnwrappedLineParser, - // replace this by using parseAssigmentExpression() inside. + // replace this by using parseAssignmentExpression() inside. do { if (Style.isCSharp()) { + // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType + // TT_FatArrow. They always start an expression or a child block if + // followed by a curly brace. if (FormatTok->is(TT_FatArrow)) { nextToken(); - // Fat arrows can be followed by simple expressions or by child blocks - // in curly braces. if (FormatTok->is(tok::l_brace)) { + // C# may break after => if the next character is a newline. + if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) { + // calling `addUnwrappedLine()` here causes odd parsing errors. + FormatTok->MustBreakBefore = true; + } parseChildBlock(); continue; } @@ -1927,6 +1933,12 @@ void UnwrappedLineParser::parseParens() { parseBracedList(); } break; + case tok::equal: + if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) + parseStructuralElement(); + else + nextToken(); + break; case tok::kw_class: if (Style.Language == FormatStyle::LK_JavaScript) parseRecord(/*ParseAsExpr=*/true); diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp index 651b54cd342a7..3c990339cf748 100644 --- a/clang/unittests/Format/FormatTestCSharp.cpp +++ b/clang/unittests/Format/FormatTestCSharp.cpp @@ -640,6 +640,122 @@ class MyClass }; })", MicrosoftStyle); + + verifyFormat("void bar()\n" + "{\n" + " Function(Val, (Action)(() =>\n" + " {\n" + " lock (mylock)\n" + " {\n" + " if (true)\n" + " {\n" + " A.Remove(item);\n" + " }\n" + " }\n" + " }));\n" + "}", + MicrosoftStyle); + + verifyFormat("void baz()\n" + "{\n" + " Function(Val, (Action)(() =>\n" + " {\n" + " using (var a = new Lock())\n" + " {\n" + " if (true)\n" + " {\n" + " A.Remove(item);\n" + " }\n" + " }\n" + " }));\n" + "}", + MicrosoftStyle); + + verifyFormat("void baz()\n" + "{\n" + " Function(Val, (Action)(() =>\n" + " {\n" + " if (true)\n" + " {\n" + " A.Remove(item);\n" + " }\n" + " }));\n" + "}", + MicrosoftStyle); + + verifyFormat("void baz()\n" + "{\n" + " Function(Val, (Action)(() =>\n" + " {\n" + " do\n" + " {\n" + " A.Remove(item);\n" + " } while (true)\n" + " }));\n" + "}", + MicrosoftStyle); + + verifyFormat("void baz()\n" + "{\n" + " Function(Val, (Action)(() =>\n" + " { A.Remove(item); }));\n" + "}", + MicrosoftStyle); + + verifyFormat("void bar()\n" + "{\n" + " Function(Val, (() =>\n" + " {\n" + " lock (mylock)\n" + " {\n" + " if (true)\n" + " {\n" + " A.Remove(item);\n" + " }\n" + " }\n" + " }));\n" + "}", + MicrosoftStyle); + verifyFormat("void bar()\n" + "{\n" + " Function((() =>\n" + " {\n" + " lock (mylock)\n" + " {\n" + " if (true)\n" + " {\n" + " A.Remove(item);\n" + " }\n" + " }\n" + " }));\n" + "}", + MicrosoftStyle); + + MicrosoftStyle.IndentWidth = 2; + verifyFormat("void bar()\n" + "{\n" + " Function((() =>\n" + " {\n" + " lock (mylock)\n" + " {\n" + " if (true)\n" + " {\n" + " A.Remove(item);\n" + " }\n" + " }\n" + " }));\n" + "}", + MicrosoftStyle); + verifyFormat("void bar() {\n" + " Function((() => {\n" + " lock (mylock) {\n" + " if (true) {\n" + " A.Remove(item);\n" + " }\n" + " }\n" + " }));\n" + "}", + GoogleStyle); } TEST_F(FormatTestCSharp, CSharpObjectInitializers) { From cd8f979fe48c068e9cd50c283833fa8c4430b20b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=BChnel?= Date: Mon, 28 Jun 2021 14:29:50 +0200 Subject: [PATCH 398/619] added some example code for llvm::Expected Since I had some fun understanding how to properly use llvm::Expected I added some code examples that I would have liked to see when learning to use it. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D105014 --- llvm/include/llvm/Support/Error.h | 33 +++++++++++++++++++++++ llvm/include/llvm/Testing/Support/Error.h | 21 +++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index 9c2942ba7b7b3..4b7ab58263698 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -436,6 +436,39 @@ inline Error joinErrors(Error E1, Error E2) { /// Error cannot be copied, this class replaces getError() with /// takeError(). It also adds an bool errorIsA() method for testing the /// error class type. +/// +/// Example usage of 'Expected' as a function return type: +/// +/// @code{.cpp} +/// Expected myDivide(int A, int B) { +/// if (B == 0) { +/// // return an Error +/// return createStringError(inconvertibleErrorCode(), +/// "B must not be zero!"); +/// } +/// // return an integer +/// return A / B; +/// } +/// @endcode +/// +/// Checking the results of to a function returning 'Expected': +/// @code{.cpp} +/// if (auto E = Result.takeError()) { +/// // We must consume the error. Typically one of: +/// // - return the error to our caller +/// // - toString(), when logging +/// // - consumeError(), to silently swallow the error +/// // - handleErrors(), to distinguish error types +/// errs() << "Problem with division " << toString(std::move(E)) << "\n"; +/// return; +/// } +/// // use the result +/// outs() << "The answer is " << *Result << "\n"; +/// @endcode +/// +/// For unit-testing a function returning an 'Expceted', see the +/// 'EXPECT_THAT_EXPECTED' macros in llvm/Testing/Support/Error.h + template class LLVM_NODISCARD Expected { template friend class ExpectedAsOutParameter; template friend class Expected; diff --git a/llvm/include/llvm/Testing/Support/Error.h b/llvm/include/llvm/Testing/Support/Error.h index 67e9985b80f55..c04e4e2abf0cf 100644 --- a/llvm/include/llvm/Testing/Support/Error.h +++ b/llvm/include/llvm/Testing/Support/Error.h @@ -165,6 +165,27 @@ class ErrorMessageMatches #define ASSERT_THAT_ERROR(Err, Matcher) \ ASSERT_THAT(llvm::detail::TakeError(Err), Matcher) +/// Helper macro for checking the result of an 'Expected' +/// +/// @code{.cpp} +/// // function to be tested +/// Expected myDivide(int A, int B); +/// +/// TEST(myDivideTests, GoodAndBad) { +/// // test good case +/// // if you only care about success or failure: +/// EXPECT_THAT_EXPECTED(myDivide(10, 5), Succeeded()); +/// // if you also care about the value: +/// EXPECT_THAT_EXPECTED(myDivide(10, 5), HasValue(2)); +/// +/// // test the error case +/// EXPECT_THAT_EXPECTED(myDivide(10, 0), Failed()); +/// // also check the error message +/// EXPECT_THAT_EXPECTED(myDivide(10, 0), +/// FailedWithMessage("B must not be zero!")); +/// } +/// @endcode + #define EXPECT_THAT_EXPECTED(Err, Matcher) \ EXPECT_THAT(llvm::detail::TakeExpected(Err), Matcher) #define ASSERT_THAT_EXPECTED(Err, Matcher) \ From 7d2d5a3a6d7aaa40468c30250bf6b0938ef02c08 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Mon, 14 Jun 2021 13:23:18 +0200 Subject: [PATCH 399/619] [clang] Apply P1825 as Defect Report from C++11 up to C++20. This extends the effects of [[ http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1825r0.html | P1825 ]] to all C++ standards from C++11 up to C++20. According to Motion 23 from Cologne 2019, P1825R0 was accepted as a Defect Report, so we retroactively apply this all the way back to C++11. Note that we also remove implicit moves from C++98 as an extension altogether, since the expanded first overload resolution from P1825 can cause some meaning changes in C++98. For example it can change which copy constructor is picked when both const and non-const ones are available. This also rips out warn_return_std_move since there are no cases where it would be worthwhile to suggest it. This also fixes a bug with bailing into the second overload resolution when encountering a non-rvref qualified conversion operator. This was unnoticed until now, so two new test cases cover these. Signed-off-by: Matheus Izvekov Reviewed By: rsmith Differential Revision: https://reviews.llvm.org/D104500 --- .../clang/Basic/DiagnosticSemaKinds.td | 6 - clang/include/clang/Sema/Sema.h | 3 +- clang/lib/Sema/SemaStmt.cpp | 183 ++------- .../class.init/class.copy.elision/p3.cpp | 193 ++++++---- clang/test/SemaCXX/P1155.cpp | 26 +- clang/test/SemaCXX/conversion-function.cpp | 25 +- clang/test/SemaCXX/warn-return-std-move.cpp | 351 ------------------ clang/test/SemaObjCXX/block-capture.mm | 48 ++- 8 files changed, 194 insertions(+), 641 deletions(-) delete mode 100644 clang/test/SemaCXX/warn-return-std-move.cpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 22c2a1a39ea13..a9d7388950331 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6485,12 +6485,6 @@ def warn_pessimizing_move_on_initialization : Warning< InGroup, DefaultIgnore; def note_remove_move : Note<"remove std::move call here">; -def warn_return_std_move : Warning< - "local variable %0 will be copied despite being %select{returned|thrown}1 by name">, - InGroup, DefaultIgnore; -def note_add_std_move : Note< - "call 'std::move' explicitly to avoid copying">; - def warn_string_plus_int : Warning< "adding %0 to a string does not append to the string">, InGroup; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 3f7db9bc5be8b..ad987dffac03a 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4782,8 +4782,7 @@ class Sema final { bool isCopyElidable() const { return S == MoveEligibleAndCopyElidable; } }; NamedReturnInfo getNamedReturnInfo(Expr *&E, bool ForceCXX2b = false); - NamedReturnInfo getNamedReturnInfo(const VarDecl *VD, - bool ForceCXX20 = false); + NamedReturnInfo getNamedReturnInfo(const VarDecl *VD); const VarDecl *getCopyElisionCandidate(NamedReturnInfo &Info, QualType ReturnType); diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index afea878b299a6..1e86f382f060b 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3332,7 +3332,7 @@ Sema::NamedReturnInfo Sema::getNamedReturnInfo(Expr *&E, bool ForceCXX2b) { const auto *VD = dyn_cast(DR->getDecl()); if (!VD) return NamedReturnInfo(); - NamedReturnInfo Res = getNamedReturnInfo(VD, /*ForceCXX20=*/ForceCXX2b); + NamedReturnInfo Res = getNamedReturnInfo(VD); if (Res.Candidate && !E->isXValue() && (ForceCXX2b || getLangOpts().CPlusPlus2b)) { E = ImplicitCastExpr::Create(Context, VD->getType().getNonReferenceType(), @@ -3342,46 +3342,28 @@ Sema::NamedReturnInfo Sema::getNamedReturnInfo(Expr *&E, bool ForceCXX2b) { return Res; } -/// Updates the status in the given NamedReturnInfo object to disallow -/// copy elision, and optionally also implicit move. -/// -/// \param Info The NamedReturnInfo object to update. -/// -/// \param CanMove If true, disallow only copy elision. -/// If false, also disallow implcit move. -static void disallowNRVO(Sema::NamedReturnInfo &Info, bool CanMove) { - Info.S = std::min(Info.S, CanMove ? Sema::NamedReturnInfo::MoveEligible - : Sema::NamedReturnInfo::None); -} - /// Determine whether the given NRVO candidate variable is move-eligible or /// copy-elidable, without considering function return type. /// /// \param VD The NRVO candidate variable. /// -/// \param ForceCXX20 Overrides detection of current language mode -/// and uses the rules for C++20. -/// /// \returns An aggregate which contains the Candidate and isMoveEligible /// and isCopyElidable methods. If Candidate is non-null, it means /// isMoveEligible() would be true under the most permissive language standard. -Sema::NamedReturnInfo Sema::getNamedReturnInfo(const VarDecl *VD, - bool ForceCXX20) { - bool hasCXX11 = getLangOpts().CPlusPlus11 || ForceCXX20; - bool hasCXX20 = getLangOpts().CPlusPlus20 || ForceCXX20; +Sema::NamedReturnInfo Sema::getNamedReturnInfo(const VarDecl *VD) { NamedReturnInfo Info{VD, NamedReturnInfo::MoveEligibleAndCopyElidable}; // C++20 [class.copy.elision]p3: // - in a return statement in a function with ... // (other than a function ... parameter) if (VD->getKind() == Decl::ParmVar) - disallowNRVO(Info, hasCXX11); + Info.S = NamedReturnInfo::MoveEligible; else if (VD->getKind() != Decl::Var) return NamedReturnInfo(); // (other than ... a catch-clause parameter) if (VD->isExceptionVariable()) - disallowNRVO(Info, hasCXX20); + Info.S = NamedReturnInfo::MoveEligible; // ...automatic... if (!VD->hasLocalStorage()) @@ -3406,7 +3388,7 @@ Sema::NamedReturnInfo Sema::getNamedReturnInfo(const VarDecl *VD, if (VDReferencedType.isVolatileQualified() || !VDReferencedType->isObjectType()) return NamedReturnInfo(); - disallowNRVO(Info, hasCXX20); + Info.S = NamedReturnInfo::MoveEligible; } else { return NamedReturnInfo(); } @@ -3415,7 +3397,7 @@ Sema::NamedReturnInfo Sema::getNamedReturnInfo(const VarDecl *VD, // alignment cannot use NRVO. if (!VDType->isDependentType() && VD->hasAttr() && Context.getDeclAlign(VD) > Context.getTypeAlignInChars(VDType)) - disallowNRVO(Info, hasCXX11); + Info.S = NamedReturnInfo::MoveEligible; return Info; } @@ -3459,110 +3441,11 @@ const VarDecl *Sema::getCopyElisionCandidate(NamedReturnInfo &Info, // When considering moving this expression out, allow dissimilar types. if (!VDType->isDependentType() && !Context.hasSameUnqualifiedType(ReturnType, VDType)) - disallowNRVO(Info, getLangOpts().CPlusPlus11); + Info.S = NamedReturnInfo::MoveEligible; } return Info.isCopyElidable() ? Info.Candidate : nullptr; } -/// Try to perform the initialization of a potentially-movable value, -/// which is the operand to a return or throw statement. -/// -/// This routine implements C++20 [class.copy.elision]p3, which attempts to -/// treat returned lvalues as rvalues in certain cases (to prefer move -/// construction), then falls back to treating them as lvalues if that failed. -/// -/// \param ConvertingConstructorsOnly If true, follow [class.copy.elision]p3 and -/// reject resolutions that find non-constructors, such as derived-to-base -/// conversions or `operator T()&&` member functions. If false, do consider such -/// conversion sequences. -/// -/// \param Res We will fill this in if move-initialization was possible. -/// If move-initialization is not possible, such that we must fall back to -/// treating the operand as an lvalue, we will leave Res in its original -/// invalid state. -/// -/// \returns Whether we need to do the second overload resolution. If the first -/// overload resolution fails, or if the first overload resolution succeeds but -/// the selected constructor/operator doesn't match the additional criteria, we -/// need to do the second overload resolution. -static bool TryMoveInitialization(Sema &S, const InitializedEntity &Entity, - const VarDecl *NRVOCandidate, Expr *&Value, - bool ConvertingConstructorsOnly, - bool IsDiagnosticsCheck, ExprResult &Res) { - ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(), - CK_NoOp, Value, VK_XValue, FPOptionsOverride()); - - Expr *InitExpr = &AsRvalue; - - InitializationKind Kind = InitializationKind::CreateCopy( - Value->getBeginLoc(), Value->getBeginLoc()); - - InitializationSequence Seq(S, Entity, Kind, InitExpr); - - bool NeedSecondOverloadResolution = true; - if (!Seq && - (IsDiagnosticsCheck || Seq.getFailedOverloadResult() != OR_Deleted)) { - return NeedSecondOverloadResolution; - } - - for (const InitializationSequence::Step &Step : Seq.steps()) { - if (Step.Kind != InitializationSequence::SK_ConstructorInitialization && - Step.Kind != InitializationSequence::SK_UserConversion) - continue; - - FunctionDecl *FD = Step.Function.Function; - if (ConvertingConstructorsOnly) { - if (isa(FD)) { - // C++11 [class.copy]p32: - // C++14 [class.copy]p32: - // C++17 [class.copy.elision]p3: - // [...] if the type of the first parameter of the selected constructor - // is not an rvalue reference to the object's type (possibly - // cv-qualified), overload resolution is performed again, considering - // the object as an lvalue. - const RValueReferenceType *RRefType = - FD->getParamDecl(0)->getType()->getAs(); - if (!RRefType) - break; - if (!S.Context.hasSameUnqualifiedType(RRefType->getPointeeType(), - NRVOCandidate->getType())) - break; - } else { - continue; - } - } else { - if (isa(FD)) { - // Check that overload resolution selected a constructor taking an - // rvalue reference. If it selected an lvalue reference, then we - // didn't need to cast this thing to an rvalue in the first place. - if (IsDiagnosticsCheck && - !isa(FD->getParamDecl(0)->getType())) - break; - } else if (isa(FD)) { - // Check that overload resolution selected a conversion operator - // taking an rvalue reference. - if (cast(FD)->getRefQualifier() != RQ_RValue) - break; - } else { - continue; - } - } - - NeedSecondOverloadResolution = false; - // Promote "AsRvalue" to the heap, since we now need this - // expression node to persist. - Value = - ImplicitCastExpr::Create(S.Context, Value->getType(), CK_NoOp, Value, - nullptr, VK_XValue, FPOptionsOverride()); - - // Complete type-checking the initialization of the return type - // using the constructor we found. - Res = Seq.Perform(S, Entity, Kind, Value); - } - - return NeedSecondOverloadResolution; -} - /// Perform the initialization of a potentially-movable value, which /// is the result of return value. /// @@ -3573,42 +3456,26 @@ ExprResult Sema::PerformMoveOrCopyInitialization(const InitializedEntity &Entity, const NamedReturnInfo &NRInfo, Expr *Value) { - - if (NRInfo.Candidate && !getLangOpts().CPlusPlus2b) { - if (NRInfo.isMoveEligible()) { - ExprResult Res; - if (!TryMoveInitialization(*this, Entity, NRInfo.Candidate, Value, - !getLangOpts().CPlusPlus20, false, Res)) - return Res; - } - if (!getDiagnostics().isIgnored(diag::warn_return_std_move, - Value->getExprLoc())) { - QualType QT = NRInfo.Candidate->getType(); - if (QT.getNonReferenceType().getUnqualifiedType().isTriviallyCopyableType( - Context)) { - // Adding 'std::move' around a trivially copyable variable is probably - // pointless. Don't suggest it. - } else { - ExprResult FakeRes = ExprError(); - Expr *FakeValue = Value; - TryMoveInitialization(*this, Entity, NRInfo.Candidate, FakeValue, false, - true, FakeRes); - if (!FakeRes.isInvalid()) { - bool IsThrow = (Entity.getKind() == InitializedEntity::EK_Exception); - SmallString<32> Str; - Str += "std::move("; - Str += NRInfo.Candidate->getDeclName().getAsString(); - Str += ")"; - Diag(Value->getExprLoc(), diag::warn_return_std_move) - << Value->getSourceRange() << NRInfo.Candidate->getDeclName() - << IsThrow; - Diag(Value->getExprLoc(), diag::note_add_std_move) - << FixItHint::CreateReplacement(Value->getSourceRange(), Str); - } - } + if (getLangOpts().CPlusPlus11 && !getLangOpts().CPlusPlus2b && + NRInfo.isMoveEligible()) { + ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(), + CK_NoOp, Value, VK_XValue, FPOptionsOverride()); + Expr *InitExpr = &AsRvalue; + auto Kind = InitializationKind::CreateCopy(Value->getBeginLoc(), + Value->getBeginLoc()); + InitializationSequence Seq(*this, Entity, Kind, InitExpr); + auto Res = Seq.getFailedOverloadResult(); + if (Res == OR_Success || Res == OR_Deleted) { + // Promote "AsRvalue" to the heap, since we now need this + // expression node to persist. + Value = + ImplicitCastExpr::Create(Context, Value->getType(), CK_NoOp, Value, + nullptr, VK_XValue, FPOptionsOverride()); + // Complete type-checking the initialization of the return type + // using the constructor we found. + return Seq.Perform(*this, Entity, Kind, Value); } } - // Either we didn't meet the criteria for treating an lvalue as an rvalue, // above, or overload resolution failed. Either way, we need to try // (again) now with the return value expression as written. diff --git a/clang/test/CXX/class/class.init/class.copy.elision/p3.cpp b/clang/test/CXX/class/class.init/class.copy.elision/p3.cpp index bf79ac9e0b85c..ed6bec97e0bf2 100644 --- a/clang/test/CXX/class/class.init/class.copy.elision/p3.cpp +++ b/clang/test/CXX/class/class.init/class.copy.elision/p3.cpp @@ -1,18 +1,17 @@ -// RUN: %clang_cc1 -std=c++2b -fsyntax-only -fcxx-exceptions -verify=expected,cxx20_2b,cxx2b %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected,cxx11_20,cxx20_2b %s -// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fcxx-exceptions -verify=expected,cxx11_17,cxx11_20 %s -// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fcxx-exceptions -verify=expected,cxx11_17,cxx11_20 %s -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcxx-exceptions -verify=expected,cxx11_17,cxx11_20 %s +// RUN: %clang_cc1 -std=c++2b -fsyntax-only -fcxx-exceptions -verify=expected,cxx11_2b,cxx2b %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=expected,cxx98_20,cxx11_2b,cxx11_20 %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcxx-exceptions -verify=expected,cxx98_20,cxx11_2b,cxx11_20 %s +// RUN: %clang_cc1 -std=c++98 -fsyntax-only -fcxx-exceptions -Wno-c++11-extensions -verify=expected,cxx98_20,cxx98 %s namespace test_delete_function { struct A1 { A1(); A1(const A1 &); - A1(A1 &&) = delete; // expected-note {{'A1' has been explicitly marked deleted here}} + A1(A1 &&) = delete; // cxx11_2b-note {{'A1' has been explicitly marked deleted here}} }; A1 test1() { A1 a; - return a; // expected-error {{call to deleted constructor of 'test_delete_function::A1'}} + return a; // cxx11_2b-error {{call to deleted constructor of 'test_delete_function::A1'}} } struct A2 { @@ -20,91 +19,90 @@ struct A2 { A2(const A2 &); private: - A2(A2 &&); // expected-note {{declared private here}} + A2(A2 &&); // cxx11_2b-note {{declared private here}} }; A2 test2() { A2 a; - return a; // expected-error {{calling a private constructor of class 'test_delete_function::A2'}} + return a; // cxx11_2b-error {{calling a private constructor of class 'test_delete_function::A2'}} } struct C {}; struct B1 { B1(C &); - B1(C &&) = delete; // expected-note {{'B1' has been explicitly marked deleted here}} + B1(C &&) = delete; // cxx11_2b-note {{'B1' has been explicitly marked deleted here}} }; B1 test3() { C c; - return c; // expected-error {{conversion function from 'test_delete_function::C' to 'test_delete_function::B1' invokes a deleted function}} + return c; // cxx11_2b-error {{conversion function from 'test_delete_function::C' to 'test_delete_function::B1' invokes a deleted function}} } struct B2 { B2(C &); private: - B2(C &&); // expected-note {{declared private here}} + B2(C &&); // cxx11_2b-note {{declared private here}} }; B2 test4() { C c; - return c; // expected-error {{calling a private constructor of class 'test_delete_function::B2'}} + return c; // cxx11_2b-error {{calling a private constructor of class 'test_delete_function::B2'}} } } // namespace test_delete_function -// In C++20, implicitly movable entity can be rvalue reference to non-volatile +// Implicitly movable entity can be rvalue reference to non-volatile // automatic object. namespace test_implicitly_movable_rvalue_ref { struct A1 { A1(A1 &&); - A1(const A1 &) = delete; // cxx11_17-note {{'A1' has been explicitly marked deleted here}} + A1(const A1 &) = delete; // cxx98-note {{marked deleted here}} }; A1 test1(A1 &&a) { - return a; // cxx11_17-error {{call to deleted constructor of 'test_implicitly_movable_rvalue_ref::A1'}} + return a; // cxx98-error {{call to deleted constructor}} } struct A2 { A2(A2 &&); private: - A2(const A2 &); // cxx11_17-note {{declared private here}} + A2(const A2 &); // cxx98-note {{declared private here}} }; A2 test2(A2 &&a) { - return a; // cxx11_17-error {{calling a private constructor of class 'test_implicitly_movable_rvalue_ref::A2'}} + return a; // cxx98-error {{calling a private constructor}} } struct B1 { B1(const B1 &); - B1(B1 &&) = delete; // cxx20_2b-note {{'B1' has been explicitly marked deleted here}} + B1(B1 &&) = delete; // cxx11_2b-note {{'B1' has been explicitly marked deleted here}} }; B1 test3(B1 &&b) { - return b; // cxx20_2b-error {{call to deleted constructor of 'test_implicitly_movable_rvalue_ref::B1'}} + return b; // cxx11_2b-error {{call to deleted constructor of 'test_implicitly_movable_rvalue_ref::B1'}} } struct B2 { B2(const B2 &); private: - B2(B2 &&); // cxx20_2b-note {{declared private here}} + B2(B2 &&); // cxx11_2b-note {{declared private here}} }; B2 test4(B2 &&b) { - return b; // cxx20_2b-error {{calling a private constructor of class 'test_implicitly_movable_rvalue_ref::B2'}} + return b; // cxx11_2b-error {{calling a private constructor of class 'test_implicitly_movable_rvalue_ref::B2'}} } } // namespace test_implicitly_movable_rvalue_ref -// In C++20, operand of throw-expression can be function parameter or +// Operand of throw-expression can be function parameter or // catch-clause parameter. namespace test_throw_parameter { void func(); struct A1 { A1(const A1 &); - A1(A1 &&) = delete; // cxx20_2b-note {{'A1' has been explicitly marked deleted here}} - // expected-note@-1 {{'A1' has been explicitly marked deleted here}} + A1(A1 &&) = delete; // cxx11_2b-note 2{{'A1' has been explicitly marked deleted here}} }; void test1() { try { func(); } catch (A1 a) { - throw a; // cxx20_2b-error {{call to deleted constructor of 'test_throw_parameter::A1'}} + throw a; // cxx11_2b-error {{call to deleted constructor of 'test_throw_parameter::A1'}} } } @@ -112,70 +110,70 @@ struct A2 { A2(const A2 &); private: - A2(A2 &&); // cxx20_2b-note {{declared private here}} + A2(A2 &&); // cxx11_2b-note {{declared private here}} }; void test2() { try { func(); } catch (A2 a) { - throw a; // cxx20_2b-error {{calling a private constructor of class 'test_throw_parameter::A2'}} + throw a; // cxx11_2b-error {{calling a private constructor of class 'test_throw_parameter::A2'}} } } void test3(A1 a) try { func(); } catch (...) { - throw a; // expected-error {{call to deleted constructor of 'test_throw_parameter::A1'}} + throw a; // cxx11_2b-error {{call to deleted constructor of 'test_throw_parameter::A1'}} } } // namespace test_throw_parameter -// In C++20, during the first overload resolution, the selected function no +// During the first overload resolution, the selected function no // need to be a constructor. namespace test_non_ctor_conversion { class C {}; struct A1 { operator C() &&; - operator C() const & = delete; // cxx11_17-note {{'operator C' has been explicitly marked deleted here}} + operator C() const & = delete; // cxx98-note {{marked deleted here}} }; C test1() { A1 a; - return a; // cxx11_17-error {{conversion function from 'test_non_ctor_conversion::A1' to 'test_non_ctor_conversion::C' invokes a deleted function}} + return a; // cxx98-error {{invokes a deleted function}} } struct A2 { operator C() &&; private: - operator C() const &; // cxx11_17-note {{declared private here}} + operator C() const &; // cxx98-note {{declared private here}} }; C test2() { A2 a; - return a; // cxx11_17-error {{'operator C' is a private member of 'test_non_ctor_conversion::A2'}} + return a; // cxx98-error {{'operator C' is a private member}} } struct B1 { operator C() const &; - operator C() && = delete; // cxx20_2b-note {{'operator C' has been explicitly marked deleted here}} + operator C() && = delete; // cxx11_2b-note {{'operator C' has been explicitly marked deleted here}} }; C test3() { B1 b; - return b; // cxx20_2b-error {{conversion function from 'test_non_ctor_conversion::B1' to 'test_non_ctor_conversion::C' invokes a deleted function}} + return b; // cxx11_2b-error {{conversion function from 'test_non_ctor_conversion::B1' to 'test_non_ctor_conversion::C' invokes a deleted function}} } struct B2 { operator C() const &; private: - operator C() &&; // cxx20_2b-note {{declared private here}} + operator C() &&; // cxx11_2b-note {{declared private here}} }; C test4() { B2 b; - return b; // cxx20_2b-error {{'operator C' is a private member of 'test_non_ctor_conversion::B2'}} + return b; // cxx11_2b-error {{'operator C' is a private member of 'test_non_ctor_conversion::B2'}} } } // namespace test_non_ctor_conversion -// In C++20, during the first overload resolution, the first parameter of the +// During the first overload resolution, the first parameter of the // selected function no need to be an rvalue reference to the object's type. namespace test_ctor_param_rvalue_ref { struct A1; @@ -190,35 +188,35 @@ struct NeedRvalueRef { NeedRvalueRef(B2 &&); }; struct NeedValue { - NeedValue(A1); // cxx11_17-note 2 {{passing argument to parameter here}} + NeedValue(A1); // cxx98-note 2 {{passing argument to parameter here}} NeedValue(A2); - NeedValue(B1); // cxx20_2b-note 2 {{passing argument to parameter here}} + NeedValue(B1); // cxx11_2b-note 2 {{passing argument to parameter here}} NeedValue(B2); }; struct A1 { A1(); A1(A1 &&); - A1(const A1 &) = delete; // cxx11_17-note 3 {{'A1' has been explicitly marked deleted here}} + A1(const A1 &) = delete; // cxx98-note 3{{marked deleted here}} }; NeedValue test_1_1() { // not rvalue reference // same type A1 a; - return a; // cxx11_17-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::A1'}} + return a; // cxx98-error {{call to deleted constructor}} } class DerivedA1 : public A1 {}; A1 test_1_2() { // rvalue reference // not same type DerivedA1 a; - return a; // cxx11_17-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::A1'}} + return a; // cxx98-error {{call to deleted constructor}} } NeedValue test_1_3() { // not rvalue reference // not same type DerivedA1 a; - return a; // cxx11_17-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::A1'}} + return a; // cxx98-error {{call to deleted constructor}} } struct A2 { @@ -226,51 +224,51 @@ struct A2 { A2(A2 &&); private: - A2(const A2 &); // cxx11_17-note 3 {{declared private here}} + A2(const A2 &); // cxx98-note 3{{declared private here}} }; NeedValue test_2_1() { // not rvalue reference // same type A2 a; - return a; // cxx11_17-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::A2'}} + return a; // cxx98-error {{calling a private constructor}} } class DerivedA2 : public A2 {}; A2 test_2_2() { // rvalue reference // not same type DerivedA2 a; - return a; // cxx11_17-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::A2'}} + return a; // cxx98-error {{calling a private constructor}} } NeedValue test_2_3() { // not rvalue reference // not same type DerivedA2 a; - return a; // cxx11_17-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::A2'}} + return a; // cxx98-error {{calling a private constructor}} } struct B1 { B1(); B1(const B1 &); - B1(B1 &&) = delete; // cxx20_2b-note 3 {{'B1' has been explicitly marked deleted here}} + B1(B1 &&) = delete; // cxx11_2b-note 3 {{'B1' has been explicitly marked deleted here}} }; NeedValue test_3_1() { // not rvalue reference // same type B1 b; - return b; // cxx20_2b-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::B1'}} + return b; // cxx11_2b-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::B1'}} } class DerivedB1 : public B1 {}; B1 test_3_2() { // rvalue reference // not same type DerivedB1 b; - return b; // cxx20_2b-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::B1'}} + return b; // cxx11_2b-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::B1'}} } NeedValue test_3_3() { // not rvalue reference // not same type DerivedB1 b; - return b; // cxx20_2b-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::B1'}} + return b; // cxx11_2b-error {{call to deleted constructor of 'test_ctor_param_rvalue_ref::B1'}} } struct B2 { @@ -278,49 +276,46 @@ struct B2 { B2(const B2 &); private: - B2(B2 &&); // cxx20_2b-note 3 {{declared private here}} + B2(B2 &&); // cxx11_2b-note 3 {{declared private here}} }; NeedValue test_4_1() { // not rvalue reference // same type B2 b; - return b; // cxx20_2b-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::B2'}} + return b; // cxx11_2b-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::B2'}} } class DerivedB2 : public B2 {}; B2 test_4_2() { // rvalue reference // not same type DerivedB2 b; - return b; // cxx20_2b-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::B2'}} + return b; // cxx11_2b-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::B2'}} } NeedValue test_4_3() { // not rvalue reference // not same type DerivedB2 b; - return b; // cxx20_2b-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::B2'}} + return b; // cxx11_2b-error {{calling a private constructor of class 'test_ctor_param_rvalue_ref::B2'}} } } // namespace test_ctor_param_rvalue_ref namespace test_lvalue_ref_is_not_moved_from { struct Target {}; - // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable}} - // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}} - // cxx11_17-note@-3 {{candidate constructor (the implicit copy constructor) not viable}} - // cxx11_17-note@-4 {{candidate constructor (the implicit move constructor) not viable}} +// cxx11_2b-note@-1 {{candidate constructor (the implicit copy constructor) not viable}} +// cxx98-note@-2 2{{candidate constructor (the implicit copy constructor) not viable}} +// cxx11_2b-note@-3 {{candidate constructor (the implicit move constructor) not viable}} struct CopyOnly { - CopyOnly(CopyOnly&&) = delete; // cxx20_2b-note {{has been explicitly marked deleted here}} + CopyOnly(CopyOnly &&) = delete; // cxx11_2b-note {{has been explicitly marked deleted here}} CopyOnly(CopyOnly&); - operator Target() && = delete; // cxx20_2b-note {{has been explicitly marked deleted here}} + operator Target() && = delete; // cxx11_2b-note {{has been explicitly marked deleted here}} operator Target() &; }; struct MoveOnly { - MoveOnly(MoveOnly&&); // expected-note {{copy constructor is implicitly deleted because}} - // cxx11_17-note@-1 {{copy constructor is implicitly deleted because}} - operator Target() &&; // expected-note {{candidate function not viable}} - // cxx11_17-note@-1 {{candidate function not viable}} + MoveOnly(MoveOnly &&); // cxx11_2b-note {{copy constructor is implicitly deleted because}} + operator Target() &&; // expected-note {{candidate function not viable}} cxx98-note {{candidate function not viable}} }; extern CopyOnly copyonly; @@ -333,17 +328,17 @@ CopyOnly t1() { CopyOnly t2() { CopyOnly&& r = static_cast(copyonly); - return r; // cxx20_2b-error {{call to deleted constructor}} + return r; // cxx11_2b-error {{call to deleted constructor}} } MoveOnly t3() { MoveOnly& r = moveonly; - return r; // expected-error {{call to implicitly-deleted copy constructor}} + return r; // cxx11_2b-error {{call to implicitly-deleted copy constructor}} } MoveOnly t4() { MoveOnly&& r = static_cast(moveonly); - return r; // cxx11_17-error {{call to implicitly-deleted copy constructor}} + return r; } Target t5() { @@ -353,7 +348,7 @@ Target t5() { Target t6() { CopyOnly&& r = static_cast(copyonly); - return r; // cxx20_2b-error {{invokes a deleted function}} + return r; // cxx11_2b-error {{invokes a deleted function}} } Target t7() { @@ -363,7 +358,7 @@ Target t7() { Target t8() { MoveOnly&& r = static_cast(moveonly); - return r; // cxx11_17-error {{no viable conversion}} + return r; // cxx98-error {{no viable conversion}} } } // namespace test_lvalue_ref_is_not_moved_from @@ -376,8 +371,7 @@ struct MoveOnly {}; struct Target { Target(CopyOnly (&)()); Target(CopyOnly (&&)()) = delete; - Target(MoveOnly (&)()) = delete; // expected-note {{has been explicitly marked deleted here}} - // expected-note@-1 {{has been explicitly marked deleted here}} + Target(MoveOnly (&)()) = delete; // expected-note 2{{has been explicitly marked deleted here}} Target(MoveOnly (&&)()); }; @@ -406,6 +400,49 @@ Target t4() { } // namespace test_rvalue_ref_to_nonobject +namespace test_constandnonconstcopy { +struct ConstCopyOnly { + ConstCopyOnly(); + ConstCopyOnly(ConstCopyOnly &) = delete; // cxx98-note {{marked deleted here}} + ConstCopyOnly(const ConstCopyOnly &); +}; +ConstCopyOnly t1() { + ConstCopyOnly x; + return x; // cxx98-error {{call to deleted constructor}} +} + +struct NonConstCopyOnly { + NonConstCopyOnly(); + NonConstCopyOnly(NonConstCopyOnly &); + NonConstCopyOnly(const NonConstCopyOnly &) = delete; // cxx11_2b-note {{marked deleted here}} +}; +NonConstCopyOnly t2() { + NonConstCopyOnly x; + return x; // cxx11_2b-error {{call to deleted constructor}} +} + +} // namespace test_constandnonconstcopy + +namespace test_conversion { + +struct B; +struct A { + A(B &) = delete; // cxx98-note {{has been explicitly deleted}} +}; +struct B { + operator A(); // cxx98-note {{candidate function}} +}; +A test1(B x) { return x; } // cxx98-error-re {{conversion {{.*}} is ambiguous}} + +struct C {}; +struct D { + operator C() &; + operator C() const & = delete; // cxx11_2b-note {{marked deleted here}} +}; +C test2(D x) { return x; } // cxx11_2b-error {{invokes a deleted function}} + +} // namespace test_conversion + namespace test_simpler_implicit_move { struct CopyOnly { @@ -421,7 +458,7 @@ struct MoveOnly { MoveOnly &&rref(); MoveOnly &&test1(MoveOnly &&w) { - return w; // cxx11_20-error {{cannot bind to lvalue of type}} + return w; // cxx98_20-error {{cannot bind to lvalue of type}} } CopyOnly test2(bool b) { @@ -434,13 +471,13 @@ CopyOnly test2(bool b) { } } -template T &&test3(T &&x) { return x; } // cxx11_20-error {{cannot bind to lvalue of type}} +template T &&test3(T &&x) { return x; } // cxx98_20-error {{cannot bind to lvalue of type}} template MoveOnly& test3(MoveOnly&); -template MoveOnly &&test3(MoveOnly &&); // cxx11_20-note {{in instantiation of function template specialization}} +template MoveOnly &&test3(MoveOnly &&); // cxx98_20-note {{in instantiation of function template specialization}} MoveOnly &&test4() { MoveOnly &&x = rref(); - return x; // cxx11_20-error {{cannot bind to lvalue of type}} + return x; // cxx98_20-error {{cannot bind to lvalue of type}} } void test5() try { diff --git a/clang/test/SemaCXX/P1155.cpp b/clang/test/SemaCXX/P1155.cpp index 049987806e467..6dcbaa750557d 100644 --- a/clang/test/SemaCXX/P1155.cpp +++ b/clang/test/SemaCXX/P1155.cpp @@ -1,9 +1,7 @@ -// RUN: %clang_cc1 -std=c++2b -fsyntax-only -fcxx-exceptions -verify=cxx20_2b %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify=cxx20_2b %s -// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fcxx-exceptions -verify=cxx11_17 %s -// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fcxx-exceptions -verify=cxx11_17 %s -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcxx-exceptions -verify=cxx11_17 %s -// cxx20_2b-no-diagnostics +// RUN: %clang_cc1 -std=c++2b -fsyntax-only -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcxx-exceptions -verify %s +// expected-no-diagnostics // Throwing namespace test_throwing { @@ -14,7 +12,7 @@ class Widget { }; void seven(Widget w) { - throw w; // Clang already do this implicit move before -std=c++20 + throw w; } } // namespace test_throwing @@ -23,13 +21,13 @@ namespace test_non_constructor_conversion { class Widget {}; struct To { - operator Widget() const & = delete; // cxx11_17-note {{'operator Widget' has been explicitly marked deleted here}} + operator Widget() const & = delete; operator Widget() &&; }; Widget nine() { To t; - return t; // cxx11_17-error {{conversion function from 'test_non_constructor_conversion::To' to 'test_non_constructor_conversion::Widget' invokes a deleted function}} + return t; } } // namespace test_non_constructor_conversion @@ -39,16 +37,16 @@ class Widget { public: Widget(); Widget(Widget &&); - Widget(const Widget &) = delete; // cxx11_17-note {{'Widget' has been explicitly marked deleted here}} + Widget(const Widget &) = delete; }; struct Fowl { - Fowl(Widget); // cxx11_17-note {{passing argument to parameter here}} + Fowl(Widget); }; Fowl eleven() { Widget w; - return w; // cxx11_17-error {{call to deleted constructor of 'test_by_value_sinks::Widget'}} + return w; } } // namespace test_by_value_sinks @@ -58,13 +56,13 @@ class Base { public: Base(); Base(Base &&); - Base(Base const &) = delete; // cxx11_17-note {{'Base' has been explicitly marked deleted here}} + Base(Base const &) = delete; }; class Derived : public Base {}; Base thirteen() { Derived result; - return result; // cxx11_17-error {{call to deleted constructor of 'test_slicing::Base'}} + return result; } } // namespace test_slicing diff --git a/clang/test/SemaCXX/conversion-function.cpp b/clang/test/SemaCXX/conversion-function.cpp index 0a3bfd3bcdaa4..8ff709ddbbb25 100644 --- a/clang/test/SemaCXX/conversion-function.cpp +++ b/clang/test/SemaCXX/conversion-function.cpp @@ -1,9 +1,7 @@ -// RUN: %clang_cc1 -std=c++2b -fsyntax-only -verify=expected -triple %itanium_abi_triple -Wbind-to-temporary-copy %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected -triple %itanium_abi_triple -Wbind-to-temporary-copy %s -// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify=expected,cxx98_14 -triple %itanium_abi_triple -Wbind-to-temporary-copy %s -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=expected,cxx98_14 -triple %itanium_abi_triple -Wbind-to-temporary-copy %s -// RUN: %clang_cc1 -std=c++98 -fsyntax-only -verify=expected,cxx98_14 -triple %itanium_abi_triple -Wbind-to-temporary-copy %s -// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx98_14 -triple %itanium_abi_triple -Wbind-to-temporary-copy %s +// RUN: %clang_cc1 -std=c++2b -fsyntax-only -verify=expected -triple %itanium_abi_triple -Wbind-to-temporary-copy %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected -triple %itanium_abi_triple -Wbind-to-temporary-copy %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=expected,cxx98_11,cxx11 -triple %itanium_abi_triple -Wbind-to-temporary-copy %s +// RUN: %clang_cc1 -std=c++98 -fsyntax-only -verify=expected,cxx98_11,cxx98 -triple %itanium_abi_triple -Wbind-to-temporary-copy %s class X { public: @@ -126,7 +124,7 @@ void f(Yb& a) { class AutoPtrRef { }; class AutoPtr { - AutoPtr(AutoPtr &); // cxx98_14-note{{declared private here}} + AutoPtr(AutoPtr &); // cxx98-note {{declared private here}} public: AutoPtr(); @@ -142,7 +140,7 @@ AutoPtr test_auto_ptr(bool Cond) { AutoPtr p; if (Cond) - return p; // cxx98_14-error{{calling a private constructor}} + return p; // cxx98-error {{calling a private constructor}} return AutoPtr(); } @@ -152,17 +150,14 @@ struct A1 { ~A1(); private: - A1(const A1&); // cxx98_14-note 2 {{declared private here}} + A1(const A1 &); // cxx98_11-note 2 {{declared private here}} }; A1 f() { // FIXME: redundant diagnostics! - return "Hello"; // cxx98_14-error {{calling a private constructor}} -#if __cplusplus <= 199711L - // expected-warning@-2 {{an accessible copy constructor}} -#else - // cxx98_14-warning@-4 {{copying parameter of type 'A1' when binding a reference to a temporary would invoke an inaccessible constructor in C++98}} -#endif + return "Hello"; // cxx98_11-error {{calling a private constructor}} + // cxx98-warning@-1 {{an accessible copy constructor}} + // cxx11-warning@-2 {{copying parameter of type 'A1' when binding a reference to a temporary would invoke an inaccessible constructor in C++98}} } namespace source_locations { diff --git a/clang/test/SemaCXX/warn-return-std-move.cpp b/clang/test/SemaCXX/warn-return-std-move.cpp deleted file mode 100644 index 3dc81bc18ba60..0000000000000 --- a/clang/test/SemaCXX/warn-return-std-move.cpp +++ /dev/null @@ -1,351 +0,0 @@ -// RUN: %clang_cc1 -std=c++2b -fsyntax-only -verify=cxx20_2b,cxx2b -fcxx-exceptions -Wreturn-std-move %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=cxx20_2b -fcxx-exceptions -Wreturn-std-move %s -// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify=cxx11_17 -fcxx-exceptions -Wreturn-std-move %s -// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify=cxx11_17 -fcxx-exceptions -Wreturn-std-move %s -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=cxx11_17 -fcxx-exceptions -Wreturn-std-move %s - -// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fcxx-exceptions -Wreturn-std-move -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s -check-prefix=CHECK -// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fcxx-exceptions -Wreturn-std-move -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s -check-prefix=CHECK -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcxx-exceptions -Wreturn-std-move -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s -check-prefix=CHECK - -// definitions for std::move -namespace std { -inline namespace foo { -template struct remove_reference { typedef T type; }; -template struct remove_reference { typedef T type; }; -template struct remove_reference { typedef T type; }; - -template typename remove_reference::type &&move(T &&t); -} // namespace foo -} // namespace std - -struct Instrument { - Instrument() {} - Instrument(Instrument&&) { /* MOVE */ } - Instrument(const Instrument&) { /* COPY */ } -}; -struct ConvertFromBase { Instrument i; }; -struct ConvertFromDerived { Instrument i; }; -struct Base { - Instrument i; - operator ConvertFromBase() const& { return ConvertFromBase{i}; } - operator ConvertFromBase() && { return ConvertFromBase{std::move(i)}; } -}; -struct Derived : public Base { - operator ConvertFromDerived() const& { return ConvertFromDerived{i}; } - operator ConvertFromDerived() && { return ConvertFromDerived{std::move(i)}; } -}; -struct ConstructFromBase { - Instrument i; - ConstructFromBase(const Base& b): i(b.i) {} - ConstructFromBase(Base&& b): i(std::move(b.i)) {} -}; -struct ConstructFromDerived { - Instrument i; - ConstructFromDerived(const Derived& d): i(d.i) {} - ConstructFromDerived(Derived&& d): i(std::move(d.i)) {} -}; - -struct TrivialInstrument { - int i = 42; -}; -struct ConvertFromTrivialBase { TrivialInstrument i; }; -struct ConvertFromTrivialDerived { TrivialInstrument i; }; -struct TrivialBase { - TrivialInstrument i; - operator ConvertFromTrivialBase() const& { return ConvertFromTrivialBase{i}; } - operator ConvertFromTrivialBase() && { return ConvertFromTrivialBase{std::move(i)}; } -}; -struct TrivialDerived : public TrivialBase { - operator ConvertFromTrivialDerived() const& { return ConvertFromTrivialDerived{i}; } - operator ConvertFromTrivialDerived() && { return ConvertFromTrivialDerived{std::move(i)}; } -}; -struct ConstructFromTrivialBase { - TrivialInstrument i; - ConstructFromTrivialBase(const TrivialBase& b): i(b.i) {} - ConstructFromTrivialBase(TrivialBase&& b): i(std::move(b.i)) {} -}; -struct ConstructFromTrivialDerived { - TrivialInstrument i; - ConstructFromTrivialDerived(const TrivialDerived& d): i(d.i) {} - ConstructFromTrivialDerived(TrivialDerived&& d): i(std::move(d.i)) {} -}; - -Derived test1() { - Derived d1; - return d1; // ok -} -Base test2() { - Derived d2; - return d2; // e1 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:14}:"std::move(d2)" -} -ConstructFromDerived test3() { - Derived d3; - return d3; // ok -} -ConstructFromBase test4() { - Derived d4; - return d4; // e3 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:14}:"std::move(d4)" -} -ConvertFromDerived test5() { - Derived d5; - return d5; // e4 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:14}:"std::move(d5)" -} -ConvertFromBase test6() { - Derived d6; - return d6; // e5 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:14}:"std::move(d6)" -} - -// These test cases should not produce the warning. -Derived ok1() { Derived d; return d; } -Base ok2() { Derived d; return static_cast(d); } -ConstructFromDerived ok3() { Derived d; return static_cast(d); } -ConstructFromBase ok4() { Derived d; return static_cast(d); } -ConvertFromDerived ok5() { Derived d; return static_cast(d); } -ConvertFromBase ok6() { Derived d; return static_cast(d); } - -// If the target is an lvalue reference, assume it's not safe to move from. -Derived ok_plvalue1(Derived& d) { return d; } -Base ok_plvalue2(Derived& d) { return d; } -ConstructFromDerived ok_plvalue3(const Derived& d) { return d; } -ConstructFromBase ok_plvalue4(Derived& d) { return d; } -ConvertFromDerived ok_plvalue5(Derived& d) { return d; } -ConvertFromBase ok_plvalue6(Derived& d) { return d; } - -Derived ok_lvalue1(Derived *p) { Derived& d = *p; return d; } -Base ok_lvalue2(Derived *p) { Derived& d = *p; return d; } -ConstructFromDerived ok_lvalue3(Derived *p) { const Derived& d = *p; return d; } -ConstructFromBase ok_lvalue4(Derived *p) { Derived& d = *p; return d; } -ConvertFromDerived ok_lvalue5(Derived *p) { Derived& d = *p; return d; } -ConvertFromBase ok_lvalue6(Derived *p) { Derived& d = *p; return d; } - -// If the target is a global, assume it's not safe to move from. -static Derived global_d; -Derived ok_global1() { return global_d; } -Base ok_global2() { return global_d; } -ConstructFromDerived ok_global3() { return global_d; } -ConstructFromBase ok_global4() { return global_d; } -ConvertFromDerived ok_global5() { return global_d; } -ConvertFromBase ok_global6() { return global_d; } - -// If the target's copy constructor is trivial, assume the programmer doesn't care. -TrivialDerived ok_trivial1(TrivialDerived d) { return d; } -TrivialBase ok_trivial2(TrivialDerived d) { return d; } -ConstructFromTrivialDerived ok_trivial3(TrivialDerived d) { return d; } -ConstructFromTrivialBase ok_trivial4(TrivialDerived d) { return d; } -ConvertFromTrivialDerived ok_trivial5(TrivialDerived d) { return d; } -ConvertFromTrivialBase ok_trivial6(TrivialDerived d) { return d; } - -// If the target is a parameter, do apply the diagnostic. -Derived testParam1(Derived d) { return d; } -Base testParam2(Derived d) { - return d; // e6 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} -ConstructFromDerived testParam3(Derived d) { - return d; // ok -} -ConstructFromBase testParam4(Derived d) { - return d; // e8 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} -ConvertFromDerived testParam5(Derived d) { - return d; // e9 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} -ConvertFromBase testParam6(Derived d) { - return d; // e10 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} - -// If the target is an rvalue reference parameter, do apply the diagnostic. -Derived testRParam1(Derived&& d) { - return d; // e11 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} -Base testRParam2(Derived&& d) { - return d; // e12 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} -ConstructFromDerived testRParam3(Derived&& d) { - return d; // e13 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} -ConstructFromBase testRParam4(Derived&& d) { - return d; // e14 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} -ConvertFromDerived testRParam5(Derived&& d) { - return d; // e15 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} -ConvertFromBase testRParam6(Derived&& d) { - return d; // e16 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"std::move(d)" -} - -// But if the return type is a reference type, then moving would be wrong. -Derived &testRetRef1(Derived &&d) { return d; } // cxx2b-error {{non-const lvalue reference to type 'Derived' cannot bind to a temporary of type 'Derived'}} -Base &testRetRef2(Derived &&d) { return d; } // cxx2b-error {{non-const lvalue reference to type 'Base' cannot bind to a temporary of type 'Derived'}} -#if __cplusplus >= 201402L -auto&& testRetRef3(Derived&& d) { return d; } -decltype(auto) testRetRef4(Derived&& d) { return (d); } -#endif - -// As long as we're checking parentheses, make sure parentheses don't disable the warning. -Base testParens1() { - Derived d; - return (d); // e17 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:15}:"std::move(d)" -} -ConstructFromDerived testParens2() { - Derived d; - return (d); // ok -} - -// If the target is a catch-handler parameter, do apply the diagnostic. -void throw_derived(); -Derived testEParam1() { - try { throw_derived(); } catch (Derived d) { return d; } // e19 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:57-[[@LINE-3]]:58}:"std::move(d)" - __builtin_unreachable(); -} -Base testEParam2() { - try { throw_derived(); } catch (Derived d) { return d; } // e20 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:57-[[@LINE-3]]:58}:"std::move(d)" - __builtin_unreachable(); -} -ConstructFromDerived testEParam3() { - try { throw_derived(); } catch (Derived d) { return d; } // e21 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:57-[[@LINE-3]]:58}:"std::move(d)" - __builtin_unreachable(); -} -ConstructFromBase testEParam4() { - try { throw_derived(); } catch (Derived d) { return d; } // e22 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:57-[[@LINE-3]]:58}:"std::move(d)" - __builtin_unreachable(); -} -ConvertFromDerived testEParam5() { - try { throw_derived(); } catch (Derived d) { return d; } // e23 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:57-[[@LINE-3]]:58}:"std::move(d)" - __builtin_unreachable(); -} -ConvertFromBase testEParam6() { - try { throw_derived(); } catch (Derived d) { return d; } // e24 - // cxx11_17-warning@-1{{will be copied despite being returned by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:57-[[@LINE-3]]:58}:"std::move(d)" - __builtin_unreachable(); -} - -// If the exception variable is an lvalue reference, we cannot be sure -// that we own it; it is extremely contrived, but possible, for this to -// be a reference to an exception object that was thrown via -// `std::rethrow_exception(xp)` in Thread A, and meanwhile somebody else -// has got a copy of `xp` in Thread B, so that moving out of this object -// in Thread A would be observable (and racy) with respect to Thread B. -// Therefore assume it's not safe to move from. -Derived ok_REParam1() { try { throw_derived(); } catch (Derived& d) { return d; } __builtin_unreachable(); } -Base ok_REParam2() { try { throw_derived(); } catch (Derived& d) { return d; } __builtin_unreachable(); } -ConstructFromDerived ok_REParam3() { try { throw_derived(); } catch (Derived& d) { return d; } __builtin_unreachable(); } -ConstructFromBase ok_REParam4() { try { throw_derived(); } catch (Derived& d) { return d; } __builtin_unreachable(); } -ConvertFromDerived ok_REParam5() { try { throw_derived(); } catch (Derived& d) { return d; } __builtin_unreachable(); } -ConvertFromBase ok_REParam6() { try { throw_derived(); } catch (Derived& d) { return d; } __builtin_unreachable(); } - -Derived ok_CEParam1() { try { throw_derived(); } catch (const Derived& d) { return d; } __builtin_unreachable(); } -Base ok_CEParam2() { try { throw_derived(); } catch (const Derived& d) { return d; } __builtin_unreachable(); } -ConstructFromDerived ok_CEParam3() { try { throw_derived(); } catch (const Derived& d) { return d; } __builtin_unreachable(); } -ConstructFromBase ok_CEParam4() { try { throw_derived(); } catch (const Derived& d) { return d; } __builtin_unreachable(); } -ConvertFromDerived ok_CEParam5() { try { throw_derived(); } catch (const Derived& d) { return d; } __builtin_unreachable(); } -ConvertFromBase ok_CEParam6() { try { throw_derived(); } catch (const Derived& d) { return d; } __builtin_unreachable(); } - -// If rvalue overload resolution would find a copy constructor anyway, -// or if the copy constructor actually selected is trivial, then don't warn. -struct TriviallyCopyable {}; -struct OnlyCopyable { - OnlyCopyable() = default; - OnlyCopyable(const OnlyCopyable&) {} -}; - -TriviallyCopyable ok_copy1() { TriviallyCopyable c; return c; } -OnlyCopyable ok_copy2() { OnlyCopyable c; return c; } -TriviallyCopyable ok_copyparam1(TriviallyCopyable c) { return c; } -OnlyCopyable ok_copyparam2(OnlyCopyable c) { return c; } - -void test_throw1(Derived&& d) { - throw d; // e25 - // cxx11_17-warning@-1{{will be copied despite being thrown by name}} - // cxx11_17-note@-2{{to avoid copying}} - // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:11-[[@LINE-3]]:12}:"std::move(d)" -} - -void ok_throw1() { - Derived d; - throw d; -} -void ok_throw2(Derived d) { throw d; } -void ok_throw3(Derived &d) { throw d; } -void ok_throw4(Derived d) { throw std::move(d); } -void ok_throw5(Derived &d) { throw std::move(d); } -void ok_throw6(Derived &d) { throw static_cast(d); } -void ok_throw7(TriviallyCopyable d) { throw d; } -void ok_throw8(OnlyCopyable d) { throw d; } - -namespace test_delete { -struct Base { - Base(); - Base(Base &&) = delete; // cxx20_2b-note {{'Base' has been explicitly marked deleted here}} - Base(Base const &); -}; - -struct Derived : public Base {}; - -Base test_ok() { - Derived d; - return d; // cxx20_2b-error {{call to deleted constructor of 'test_delete::Base'}} -} -} // namespace test_delete diff --git a/clang/test/SemaObjCXX/block-capture.mm b/clang/test/SemaObjCXX/block-capture.mm index c4f80f5954bd0..77a3907c6578c 100644 --- a/clang/test/SemaObjCXX/block-capture.mm +++ b/clang/test/SemaObjCXX/block-capture.mm @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -std=c++2b -fsyntax-only -fobjc-arc -fblocks -verify=cxx98_2b,cxx20_2b,cxx2b %s -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fobjc-arc -fblocks -verify=cxx98_2b,cxx20_2b %s -// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fobjc-arc -fblocks -verify=cxx98_2b,cxx98_11 %s -// RUN: %clang_cc1 -std=c++98 -fsyntax-only -fobjc-arc -fblocks -Wno-c++11-extensions -verify=cxx98_2b,cxx98_11 %s +// RUN: %clang_cc1 -std=c++2b -fsyntax-only -fobjc-arc -fblocks -verify=cxx98_2b,cxx11_2b,cxx2b %s +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fobjc-arc -fblocks -verify=cxx98_2b,cxx11_2b %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fobjc-arc -fblocks -verify=cxx98_2b,cxx11_2b %s +// RUN: %clang_cc1 -std=c++98 -fsyntax-only -fobjc-arc -fblocks -Wno-c++11-extensions -verify=cxx98_2b,cxx98 %s #define TEST(T) void test_##T() { \ __block T x; \ @@ -14,54 +14,68 @@ }; TEST(CopyOnly); // cxx2b-error {{no matching constructor}} +struct ConstCopyOnly { + ConstCopyOnly(); + ConstCopyOnly(ConstCopyOnly &) = delete; // cxx98-note {{marked deleted here}} + ConstCopyOnly(const ConstCopyOnly &); +}; +TEST(ConstCopyOnly); // cxx98-error {{call to deleted constructor}} + +struct NonConstCopyOnly { + NonConstCopyOnly(); + NonConstCopyOnly(NonConstCopyOnly &); + NonConstCopyOnly(const NonConstCopyOnly &) = delete; // cxx11_2b-note {{marked deleted here}} +}; +TEST(NonConstCopyOnly); // cxx11_2b-error {{call to deleted constructor}} + struct CopyNoMove { CopyNoMove(); CopyNoMove(CopyNoMove &); - CopyNoMove(CopyNoMove &&) = delete; // cxx98_2b-note {{marked deleted here}} + CopyNoMove(CopyNoMove &&) = delete; // cxx11_2b-note {{marked deleted here}} }; -TEST(CopyNoMove); // cxx98_2b-error {{call to deleted constructor}} +TEST(CopyNoMove); // cxx11_2b-error {{call to deleted constructor}} struct MoveOnly { MoveOnly(); - MoveOnly(MoveOnly &) = delete; + MoveOnly(MoveOnly &) = delete; // cxx98-note {{marked deleted here}} MoveOnly(MoveOnly &&); }; -TEST(MoveOnly); +TEST(MoveOnly); // cxx98-error {{call to deleted constructor}} struct NoCopyNoMove { NoCopyNoMove(); - NoCopyNoMove(NoCopyNoMove &) = delete; - NoCopyNoMove(NoCopyNoMove &&) = delete; // cxx98_2b-note {{marked deleted here}} + NoCopyNoMove(NoCopyNoMove &) = delete; // cxx98-note {{marked deleted here}} + NoCopyNoMove(NoCopyNoMove &&) = delete; // cxx11_2b-note {{marked deleted here}} }; TEST(NoCopyNoMove); // cxx98_2b-error {{call to deleted constructor}} struct ConvertingRVRef { ConvertingRVRef(); - ConvertingRVRef(ConvertingRVRef &) = delete; // cxx98_11-note {{marked deleted here}} + ConvertingRVRef(ConvertingRVRef &) = delete; // cxx98-note {{marked deleted here}} struct X {}; ConvertingRVRef(X &&); operator X() const & = delete; operator X() &&; }; -TEST(ConvertingRVRef); // cxx98_11-error {{call to deleted constructor}} +TEST(ConvertingRVRef); // cxx98-error {{call to deleted constructor}} struct ConvertingCLVRef { ConvertingCLVRef(); ConvertingCLVRef(ConvertingCLVRef &); struct X {}; - ConvertingCLVRef(X &&); // cxx20_2b-note {{passing argument to parameter here}} + ConvertingCLVRef(X &&); // cxx11_2b-note {{passing argument to parameter here}} operator X() const &; - operator X() && = delete; // cxx20_2b-note {{marked deleted here}} + operator X() && = delete; // cxx11_2b-note {{marked deleted here}} }; -TEST(ConvertingCLVRef); // cxx20_2b-error {{invokes a deleted function}} +TEST(ConvertingCLVRef); // cxx11_2b-error {{invokes a deleted function}} struct SubSubMove {}; struct SubMove : SubSubMove { SubMove(); - SubMove(SubMove &) = delete; // cxx98_11-note {{marked deleted here}} + SubMove(SubMove &) = delete; // cxx98-note {{marked deleted here}} SubMove(SubSubMove &&); }; -TEST(SubMove); // cxx98_11-error {{call to deleted constructor}} +TEST(SubMove); // cxx98-error {{call to deleted constructor}} From 47c3fe2a22cf753fd55d08d367fbd817b4dd4a1c Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Thu, 1 Jul 2021 10:59:22 +0100 Subject: [PATCH 400/619] [DebugInfo][InstrRef][1/4] Support transformations that widen values Very late in compilation, backends like X86 will perform optimisations like this: $cx = MOV16rm $rax, ... -> $rcx = MOV64rm $rax, ... Widening the load from 16 bits to 64 bits. SEeing how the lower 16 bits remain the same, this doesn't affect execution. However, any debug instruction reference to the defined operand now refers to a 64 bit value, nto a 16 bit one, which might be unexpected. Elsewhere in codegen, there's often this pattern: CALL64pcrel32 @foo, implicit-def $rax %0:gr64 = COPY $rax %1:gr32 = COPY %0.sub_32bit Where we want to refer to the definition of $eax by the call, but don't want to refer the copies (they don't define values in the way LiveDebugValues sees it). To solve this, add a subregister field to the existing "substitutions" facility, so that we can describe a field within a larger value definition. I would imagine that this would be used most often when a value is widened, and we need to refer to the original, narrower definition. Differential Revision: https://reviews.llvm.org/D88891 --- llvm/include/llvm/CodeGen/MIRYamlMapping.h | 2 + llvm/include/llvm/CodeGen/MachineFunction.h | 26 ++++++-- llvm/include/llvm/CodeGen/MachineInstr.h | 5 ++ .../LiveDebugValues/InstrRefBasedImpl.cpp | 4 +- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 +- llvm/lib/CodeGen/MIRPrinter.cpp | 12 ++-- llvm/lib/CodeGen/MachineFunction.cpp | 7 +- llvm/lib/CodeGen/MachineInstr.cpp | 6 ++ llvm/lib/Target/X86/X86FixupBWInsts.cpp | 18 +++++ .../livedebugvalues_instrref_tolocs.mir | 2 +- .../MIR/InstrRef/substitusions-roundtrip.mir | 4 +- .../MIR/InstrRef/twoaddr-to-threeaddr-sub.mir | 2 +- .../MIR/InstrRef/x86-fixup-bw-inst-subreb.mir | 65 +++++++++++++++++++ 13 files changed, 137 insertions(+), 20 deletions(-) create mode 100644 llvm/test/DebugInfo/MIR/InstrRef/x86-fixup-bw-inst-subreb.mir diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h index 75f2ff86c29e7..e7428e7ad260a 100644 --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -492,6 +492,7 @@ struct DebugValueSubstitution { unsigned SrcOp; unsigned DstInst; unsigned DstOp; + unsigned Subreg; bool operator==(const DebugValueSubstitution &Other) const { return std::tie(SrcInst, SrcOp, DstInst, DstOp) == @@ -505,6 +506,7 @@ template <> struct MappingTraits { YamlIO.mapRequired("srcop", Sub.SrcOp); YamlIO.mapRequired("dstinst", Sub.DstInst); YamlIO.mapRequired("dstop", Sub.DstOp); + YamlIO.mapRequired("subreg", Sub.Subreg); } static const bool flow = true; diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index e9ce813428dc4..1d0a2a7deb761 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -451,11 +451,24 @@ class MachineFunction { /// Pair of instruction number and operand number. using DebugInstrOperandPair = std::pair; - /// Substitution map: from one pair to another. Used to - /// record changes in where a value is defined, so that debug variable - /// locations can find it later. - std::map - DebugValueSubstitutions; + /// Replacement definition for a debug instruction reference. Made up of an + /// instruction / operand pair, and a qualifying subregister indicating what + /// bits in the operand make up the substitution. For example, a debug user + /// of %1: + /// %0:gr32 = someinst, debug-instr-number 2 + /// %1:gr16 = %0.some_16_bit_subreg + /// Would receive the substitution {{2, 0}, $subreg}, where $subreg is the + /// subregister number for some_16_bit_subreg. + struct DebugSubstitution { + DebugInstrOperandPair Dest; ///< Replacement instruction / operand pair. + unsigned Subreg; ///< Qualifier for which part of Dest is read. + }; + + /// Substitution map: from one pair identifying a value, + /// to a DebugSubstitution identifying another. Used to record changes in + /// where a value is defined, so that debug variable locations can find it + /// later. + std::map DebugValueSubstitutions; /// Location of a PHI instruction that is also a debug-info variable value, /// for the duration of register allocation. Loaded by the PHI-elimination @@ -477,7 +490,8 @@ class MachineFunction { /// Create a substitution between one value to a different, /// new value. - void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair); + void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, + unsigned SubReg = 0); /// Create substitutions for any tracked values in \p Old, to point at /// \p New. Needed when we re-create an instruction during optimization, diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 12da008a7bf44..7fc1576fe5a09 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -455,6 +455,11 @@ class MachineInstr /// one already, a new and unique number will be assigned. unsigned getDebugInstrNum(); + /// Fetch instruction number of this MachineInstr -- but before it's inserted + /// into \p MF. Needed for transformations that create an instruction but + /// don't immediately insert them. + unsigned getDebugInstrNum(MachineFunction &MF); + /// Examine the instruction number of this MachineInstr. May be zero if /// it hasn't been assigned a number yet. unsigned peekDebugInstrNum() const { return DebugInstrNum; } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 1085564fa5d61..b92614ee124d0 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1830,8 +1830,8 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // the instruction / operand number in this DBG_INSTR_REF. auto Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo)); while (Sub != MF.DebugValueSubstitutions.end()) { - InstNo = Sub->second.first; - OpNo = Sub->second.second; + InstNo = Sub->second.Dest.first; + OpNo = Sub->second.Dest.second; Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo)); } diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 58ce95aaf023c..d77104752880a 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -425,8 +425,8 @@ void MIRParserImpl::setupDebugValueTracking( // Load any substitutions. for (auto &Sub : YamlMF.DebugValueSubstitutions) { - MF.makeDebugValueSubstitution(std::make_pair(Sub.SrcInst, Sub.SrcOp), - std::make_pair(Sub.DstInst, Sub.DstOp)); + MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp}, + {Sub.DstInst, Sub.DstOp}, Sub.Subreg); } } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index c7dc73191889a..0c8da19e3f41f 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -224,10 +224,14 @@ void MIRPrinter::print(const MachineFunction &MF) { convert(MST, YamlMF.FrameInfo, MF.getFrameInfo()); convertStackObjects(YamlMF, MF, MST); convertCallSiteObjects(YamlMF, MF, MST); - for (auto &Sub : MF.DebugValueSubstitutions) - YamlMF.DebugValueSubstitutions.push_back({Sub.first.first, Sub.first.second, - Sub.second.first, - Sub.second.second}); + for (const auto &Sub : MF.DebugValueSubstitutions) { + auto &SubSrc = Sub.first; + const MachineFunction::DebugSubstitution &SubDest = Sub.second; + YamlMF.DebugValueSubstitutions.push_back({SubSrc.first, SubSrc.second, + SubDest.Dest.first, + SubDest.Dest.second, + SubDest.Subreg}); + } if (const auto *ConstantPool = MF.getConstantPool()) convert(YamlMF, *ConstantPool); if (const auto *JumpTableInfo = MF.getJumpTableInfo()) diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 39feb92a9752f..8b1d05d252de9 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -969,8 +969,11 @@ void MachineFunction::setDebugInstrNumberingCount(unsigned Num) { } void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A, - DebugInstrOperandPair B) { - auto Result = DebugValueSubstitutions.insert(std::make_pair(A, B)); + DebugInstrOperandPair B, + unsigned Subreg) { + // Catch any accidental self-loops. + assert(A.first != B.first); + auto Result = DebugValueSubstitutions.insert({A, {B, Subreg}}); (void)Result; assert(Result.second && "Substitution for an already substituted value?"); } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 6a3bb0e78ff11..20d6ab88fac2f 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -2374,3 +2374,9 @@ unsigned MachineInstr::getDebugInstrNum() { DebugInstrNum = getParent()->getParent()->getNewDebugInstrNum(); return DebugInstrNum; } + +unsigned MachineInstr::getDebugInstrNum(MachineFunction &MF) { + if (DebugInstrNum == 0) + DebugInstrNum = MF.getNewDebugInstrNum(); + return DebugInstrNum; +} diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp index f8d822aebc5b6..e1d4b4c347721 100644 --- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -137,6 +137,8 @@ class FixupBWInstPass : public MachineFunctionPass { /// Machine instruction info used throughout the class. const X86InstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; + /// Local member for function's OptForSize attribute. bool OptForSize = false; @@ -162,6 +164,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getRegInfo().getTargetRegisterInfo(); MLI = &getAnalysis(); PSI = &getAnalysis().getPSI(); MBFI = (PSI && PSI->hasProfileSummary()) ? @@ -303,6 +306,14 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode, MIB.setMemRefs(MI->memoperands()); + // If it was debug tracked, record a substitution. + if (unsigned OldInstrNum = MI->peekDebugInstrNum()) { + unsigned Subreg = TRI->getSubRegIndex(MIB->getOperand(0).getReg(), + MI->getOperand(0).getReg()); + unsigned NewInstrNum = MIB->getDebugInstrNum(*MF); + MF->makeDebugValueSubstitution({OldInstrNum, 0}, {NewInstrNum, 0}, Subreg); + } + return MIB; } @@ -366,6 +377,13 @@ MachineInstr *FixupBWInstPass::tryReplaceExtend(unsigned New32BitOpcode, MIB.setMemRefs(MI->memoperands()); + if (unsigned OldInstrNum = MI->peekDebugInstrNum()) { + unsigned Subreg = TRI->getSubRegIndex(MIB->getOperand(0).getReg(), + MI->getOperand(0).getReg()); + unsigned NewInstrNum = MIB->getDebugInstrNum(*MF); + MF->makeDebugValueSubstitution({OldInstrNum, 0}, {NewInstrNum, 0}, Subreg); + } + return MIB; } diff --git a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir index 578cac9dc0ec4..13d9295ad656a 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir @@ -32,7 +32,7 @@ --- name: _Z8bb_to_bb debugValueSubstitutions: - - { srcinst: 4, srcop: 0, dstinst: 3, dstop: 0 } + - { srcinst: 4, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 } body: | bb.0.entry: $rax = MOV64ri 1, debug-instr-number 1, debug-location !17 diff --git a/llvm/test/DebugInfo/MIR/InstrRef/substitusions-roundtrip.mir b/llvm/test/DebugInfo/MIR/InstrRef/substitusions-roundtrip.mir index cf0ebe3502cf7..0a8fada169cf8 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/substitusions-roundtrip.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/substitusions-roundtrip.mir @@ -4,7 +4,7 @@ # REQUIRES: x86-registered-target # # CHECK: debugValueSubstitutions: -# CHECK-NEXT: - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0 } +# CHECK-NEXT: - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0, subreg: 0 } # # CHECK: MOV64rr $rdi, debug-instr-number 2 # CHECK-NEXT: DBG_INSTR_REF 1, 0 @@ -14,7 +14,7 @@ tracksRegLiveness: true liveins: - { reg: '$rdi', virtual-reg: '' } debugValueSubstitutions: - - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0 } + - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0, subreg: 0 } body: | bb.0: liveins: $rdi, $rax diff --git a/llvm/test/DebugInfo/MIR/InstrRef/twoaddr-to-threeaddr-sub.mir b/llvm/test/DebugInfo/MIR/InstrRef/twoaddr-to-threeaddr-sub.mir index 55a8d6be2f0f9..561f73524bd06 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/twoaddr-to-threeaddr-sub.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/twoaddr-to-threeaddr-sub.mir @@ -8,7 +8,7 @@ # lets not. # # CHECK: debugValueSubstitutions: -# CHECK-NEXT: - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0 } +# CHECK-NEXT: - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0, subreg: 0 } # # CHECK: LEA64_32r # CHECK-SAME: debug-instr-number 2 diff --git a/llvm/test/DebugInfo/MIR/InstrRef/x86-fixup-bw-inst-subreb.mir b/llvm/test/DebugInfo/MIR/InstrRef/x86-fixup-bw-inst-subreb.mir new file mode 100644 index 0000000000000..27b155639af5d --- /dev/null +++ b/llvm/test/DebugInfo/MIR/InstrRef/x86-fixup-bw-inst-subreb.mir @@ -0,0 +1,65 @@ +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass x86-fixup-bw-insts %s -o - -experimental-debug-variable-locations | FileCheck %s +# +# This test is a copy of llvm/test/CodeGen/X86/fixup-bw-inst.mir, with a few +# test bodies removed. The pass promotes certain register operations to be +# wider operations (such as loads and sign extensions), which has an instruction +# encoding benefit. New instructions are created, and so should have a debug +# instruction number substitution; but in addition a qualifiying subregister, +# because the newly def'd register is a different size to the old one. +# +# Plain copies that get transformed are not tested for, as they should never +# be instrumented. At a high level, copies do not define a value; they move +# them. + +--- +# CHECK-LABEL: name: test1 +name: test1 +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$rax' } +# CHECK: debugValueSubstitutions: +# CHECK-NEXT - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0, subreg: 4 } +## Subreg 4 -> sub_16bit +body: | + bb.0: + liveins: $rax + + $ax = MOV16rm killed $rax, 1, $noreg, 0, $noreg, debug-instr-number 1 + ; CHECK: $eax = MOVZX32rm16 killed $rax, {{.*}} debug-instr-number 2 + + RETQ $ax + +... +--- +# CHECK-LABEL: name: test3 +name: test3 +alignment: 16 +tracksRegLiveness: true +liveins: + - { reg: '$rdi' } +# CHECK: debugValueSubstitutions: +# CHECK-NEXT - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0, subreg: 4 } +## Subreg 4 -> sub_16bit +body: | + bb.0: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + liveins: $rdi + + TEST64rr $rdi, $rdi, implicit-def $eflags + JCC_1 %bb.1, 4, implicit $eflags + + bb.2: + liveins: $rdi + + $ax = MOV16rm killed $rdi, 1, $noreg, 0, $noreg, implicit-def $eax, debug-instr-number 1 + ; CHECK: $eax = MOVZX32rm16 killed $rdi, {{.*}} debug-instr-number 2 + $ax = KILL $ax, implicit killed $eax + RETQ $ax + + bb.1: + $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags + $ax = KILL $ax, implicit killed $eax + RETQ $ax + +... From ce857d3cfd42dc7056505e0f45390ef703ff0bfe Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 1 Jul 2021 12:24:55 +0200 Subject: [PATCH 401/619] [mlir][async] Remove unused variable. NFC. --- mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp index 373ee8b01dca9..521180cbd4b9c 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp @@ -112,9 +112,6 @@ struct AsyncParallelForRewrite : public OpRewritePattern { PatternRewriter &rewriter) const override; private: - // The maximum number of tasks per worker thread when sharding parallel op. - static constexpr int32_t kMaxOversharding = 4; - bool asyncDispatch; int32_t numWorkerThreads; int32_t targetBlockSize; From 73bea97a336ba2da276ef34fd21b2c5c676b0a97 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 29 Jun 2021 12:05:59 +0000 Subject: [PATCH 402/619] [mlir][Linalg] Add support for CallOp bufferization (10/n) Cross function boundary bufferization support is added. This is enabled by cross-function boundary alias analysis, for which the bufferization process is extended: it can now modify the BufferizationAliasInfo as new ops are introduced. A number of simplifying assumptions are made: 1. by default we bufferize to the most dynamic strided memref type, further memref::CastOp canonicalizations are expected to clean up the IR. 2. in the current implementation, the stride information is always erased at function boundaries. A subsequent pass will be required to analyze the meet of all call ops to a function and decide whether more static buffer types can be used. This will potentially clone functions when it is deemed profitable to do so (e.g. when the stride-1 dimension may vary). 3. external function always bufferize to the most dynamic strided memref version. This may require special annotations for specifying that particular operands of top-level functions have contiguous buffer layout. An alternative to point 3. would be to support tensor layout annotations, which is currently not supported in MLIR. Differential revision: https://reviews.llvm.org/D104873 --- .../Transforms/ComprehensiveBufferize.cpp | 651 +++++++++++++++--- ...omprehensive-module-bufferize-invalid.mlir | 31 + .../comprehensive-module-bufferize.mlir | 60 ++ 3 files changed, 649 insertions(+), 93 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index 14acc36fbf22e..824092df292ca 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -114,7 +114,9 @@ #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" #include "mlir/Transforms/BufferUtils.h" +#include "mlir/Transforms/Passes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/EquivalenceClasses.h" @@ -136,6 +138,8 @@ using namespace tensor; // Generic helpers. //===----------------------------------------------------------------------===// +static bool isaTensor(Type t) { return t.isa(); } + /// Return the FuncOp called by `callOp`. static FuncOp getCalledFunction(CallOpInterface callOp) { SymbolRefAttr sym = callOp.getCallableForCallee().dyn_cast(); @@ -145,6 +149,20 @@ static FuncOp getCalledFunction(CallOpInterface callOp) { SymbolTable::lookupNearestSymbolFrom(callOp, sym)); } +/// Return the unique ReturnOp that terminates `funcOp`. +/// Return nullptr if there is no such unique ReturnOp. +static ReturnOp getAssumedUniqueReturnOp(FuncOp funcOp) { + ReturnOp returnOp; + for (Block &b : funcOp.body()) { + if (auto candidateOp = dyn_cast(b.getTerminator())) { + if (returnOp) + return nullptr; + returnOp = candidateOp; + } + } + return returnOp; +} + //===----------------------------------------------------------------------===// // Bufferization-specific BlockAndValueMapping support with debugging. //===----------------------------------------------------------------------===// @@ -163,7 +181,7 @@ static void map(BlockAndValueMapping &bvm, Value key, Value value) { } /// Wrapper for better debugging. -static Value lookup(BlockAndValueMapping &bvm, Value key) { +static Value lookup(const BlockAndValueMapping &bvm, Value key) { // TODO: if key comes from bbArg, forward. assert(key.getType().isa()); Value v = bvm.lookupOrNull(key); @@ -347,10 +365,8 @@ static bool hasKnownBufferizationAliasingBehavior(Operation *op) { VectorTransferOpInterface, scf::YieldOp>(op) // clang-format on - || (none_of(op->getResultTypes(), - [](Type t) { return t.isa(); }) && - none_of(op->getOperandTypes(), - [](Type t) { return t.isa(); })); + || (none_of(op->getResultTypes(), isaTensor) && + none_of(op->getOperandTypes(), isaTensor)); } /// Return the OpResult that may bufferize into the same buffer as `opOperand` @@ -577,14 +593,22 @@ class BufferizationAliasInfo { /// beginning the alias and equivalence sets only contain `v` itself. void createAliasInfoEntry(Value v); + /// Insert an info entry for `newValue` and merge its alias set with that of + /// `alias`. + void insertNewBufferAlias(Value newValue, Value alias); + + /// Insert an info entry for `newValue` and merge its alias set with that of + /// `alias`. Additionally, merge their equivalence classes. + void insertNewBufferEquivalence(Value newValue, Value alias); + /// Return true if the buffer to which `operand` would bufferize aliases a /// buffer that is known to not be writeable. This implies that the matching /// OpResult cannot be bufferized inplace. bool aliasesNonWriteableBuffer(OpOperand &operand) const; /// Return true if the buffer to which `operand` would bufferize is equivalent - /// to some use that would bufferize to a write to a buffer. - bool aliasesInPlaceWrite(ExtractSliceOp extractSliceOp) const; + /// to some buffer write. + bool aliasesInPlaceWrite(Value v) const; /// Set the inPlace bufferization spec to true. /// Merge result's and operand's aliasing sets and iterate to a fixed point. @@ -619,6 +643,9 @@ class BufferizationAliasInfo { bool isSourceEquivalentToAMatchingExtractSliceOp( InsertSliceOp insertSliceOp) const; + /// Apply `fun` to all the members of the equivalence class of `v`. + void applyOnEquivalenceClass(Value v, function_ref fun) const; + /// Print to `os`. void print(raw_ostream &os) const; @@ -626,8 +653,9 @@ class BufferizationAliasInfo { void dump() const { print(llvm::errs()); } private: - /// Check aliasInfo for `v` exists and return a reference to it. + /// Check that aliasInfo for `v` exists and return a reference to it. DenseSet &getAliasInfoRef(Value v); + const DenseSet &getAliasInfoRef(Value v) const { return const_cast(this)->getAliasInfoRef(v); } @@ -740,6 +768,23 @@ void BufferizationAliasInfo::createAliasInfoEntry(Value v) { equivalentInfo.insert(v); } +/// Insert an info entry for `newValue` and merge its alias set with that of +/// `alias`. +void BufferizationAliasInfo::insertNewBufferAlias(Value newValue, Value alias) { + assert(aliasInfo.find(alias) != aliasInfo.end() && "Missing alias entry"); + createAliasInfoEntry(newValue); + mergeAliases(newValue, alias); + mergeAliasesToFixedPoint(); +} + +/// Insert an info entry for `newValue` and merge its alias set with that of +/// `alias`. Additionally, merge their equivalence classes. +void BufferizationAliasInfo::insertNewBufferEquivalence(Value newValue, + Value alias) { + insertNewBufferAlias(newValue, alias); + equivalentInfo.unionSets(newValue, alias); +} + /// Return true if the buffer to which `operand` would bufferize aliases a /// buffer that is known to not be writeable. This implies that the matching /// OpResult cannot be bufferized inplace. @@ -755,13 +800,13 @@ bool BufferizationAliasInfo::aliasesNonWriteableBuffer( LDBG("-----------bbArg is writeable -> skip: " << bbArg << '\n'); continue; } - LDBG("-----------notWriteable: " << v << '\n'); + LDBG("-----------notWriteable\n"); return true; } if (Operation *op = v.getDefiningOp()) { if (isa(op) || !hasKnownBufferizationAliasingBehavior(op)) { - LDBG("-----------notWriteable: " << v << '\n'); + LDBG("-----------notWriteable\n"); return true; } } @@ -771,12 +816,11 @@ bool BufferizationAliasInfo::aliasesNonWriteableBuffer( } /// Return true if the buffer to which `operand` would bufferize is equivalent -/// to some use that would bufferize to a write to a buffer. -bool BufferizationAliasInfo::aliasesInPlaceWrite( - ExtractSliceOp extractSliceOp) const { +/// to some buffer write. +bool BufferizationAliasInfo::aliasesInPlaceWrite(Value value) const { LDBG("----Start aliasesInPlaceWrite\n"); - LDBG("-------for op: " << *extractSliceOp.getOperation() << '\n'); - for (Value v : getAliasInfoRef(extractSliceOp.result())) { + LDBG("-------for : " << value << '\n'); + for (Value v : getAliasInfoRef(value)) { for (auto &use : v.getUses()) { if (bufferizesToMemoryWrite(use, InPlaceSpec::True)) { LDBG("-----------wants to bufferize to inPlace write: " @@ -785,7 +829,7 @@ bool BufferizationAliasInfo::aliasesInPlaceWrite( } } } - LDBG("----------->extract_slice does not alias an inplace write"); + LDBG("----------->does not alias an inplace write\n"); return false; } @@ -920,6 +964,16 @@ bool BufferizationAliasInfo::isSourceEquivalentToAMatchingExtractSliceOp( return false; } +/// Apply `fun` to all the members of the equivalence class of `v`. +void BufferizationAliasInfo::applyOnEquivalenceClass( + Value v, function_ref fun) const { + for (auto it = equivalentInfo.findLeader(v), + eit = equivalentInfo.member_end(); + it != eit; ++it) { + fun(v); + } +} + void BufferizationAliasInfo::print(raw_ostream &os) const { os << "\n/========================== AliasInfo " "==========================\n"; @@ -1106,6 +1160,21 @@ bool BufferizationAliasInfo::isClobberedWriteBeforeRead( return existsInterleavedValueClobber(aliasingRead, aliasingWrite, domInfo); } +//===----------------------------------------------------------------------===// +// Forward declarations. +//===----------------------------------------------------------------------===// + +/// Return the op with Allocate MemoryEffect if `v` is equivalent to an such +/// an op. Return null otherwise. +static Operation *getEquivalentAlloc(Value value, + const BufferizationAliasInfo &aliasInfo); + +/// Return the first argument of the enclosing FuncOp that is equivalent to `v`. +/// Return null if no such bbArg can be found. +static BlockArgument +getEquivalentEnclosingFuncBBArg(Value v, + const BufferizationAliasInfo &aliasInfo); + //===----------------------------------------------------------------------===// // Bufferization-specific MemRefType support. //===----------------------------------------------------------------------===// @@ -1152,6 +1221,47 @@ static MemRefType getDynamicMemRefType(RankedTensorType tensorType, stridedLayout, addressSpace); } +/// Return the FunctionType with `argumentTypes` and `resultTypes` where each +/// tensor is replaced by the corresponding buffer type. +/// In order for all the callers to agree, this *must* bufferize to the most +/// dynamic buffer type supported. +/// A later pass across all CallOps in the module can decide whether to simplify +/// the types of to version according to some cost model. +static FunctionType getBufferizedFunctionType(MLIRContext *ctx, + TypeRange argumentTypes, + TypeRange resultTypes) { + auto rewrite = [](Type t) -> Type { + // TODO: non-zero address space. + // TODO: layout information if relevant. + if (auto rankedTensorType = t.dyn_cast()) + return getDynamicMemRefType(rankedTensorType); + if (auto tensorType = t.dyn_cast()) + return getContiguousOrUnrankedMemRefType(tensorType); + return t; + }; + auto argTypes = llvm::to_vector<4>(llvm::map_range(argumentTypes, rewrite)); + auto retTypes = llvm::to_vector<4>(llvm::map_range(resultTypes, rewrite)); + return FunctionType::get(ctx, argTypes, retTypes); +} + +/// If an entry for `funcOp` is available in `bufferizedFunctionTypes`, return +/// it. Otherwise, construct a new entry based on `argumentTypes` and +/// `resultTypes`. +// TODO: improve the layering. +static FunctionType getOrCreateBufferizedFunctionType( + FuncOp funcOp, TypeRange argumentTypes, TypeRange resultTypes, + DenseMap &bufferizedFunctionTypes) { + auto it = bufferizedFunctionTypes.find(funcOp); + if (it != bufferizedFunctionTypes.end()) + return it->second; + + auto it2 = bufferizedFunctionTypes.try_emplace( + funcOp, getBufferizedFunctionType(funcOp.getContext(), argumentTypes, + resultTypes)); + LDBG("FT: " << funcOp.getType() << " -> " << it2.first->second << "\n"); + return it2.first->second; +} + //===----------------------------------------------------------------------===// // Bufferization-specific scoped alloc/dealloc insertion support. //===----------------------------------------------------------------------===// @@ -1159,8 +1269,10 @@ static MemRefType getDynamicMemRefType(RankedTensorType tensorType, /// Create an Allocop/DeAllocOp pair, where the AllocOp is after /// `shapedValue.getDefiningOp` (or at the top of the block in case of a /// bbArg) and the DeallocOp is at the end of the block. -static Value createNewAllocDeallocPairForShapedValue(OpBuilder &b, Location loc, - Value shapedValue) { +static Value +createNewAllocDeallocPairForShapedValue(OpBuilder &b, Location loc, + Value shapedValue, + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); @@ -1189,9 +1301,12 @@ static Value createNewAllocDeallocPairForShapedValue(OpBuilder &b, Location loc, dynShape.push_back(createOrFoldDimOp(b, loc, shapedValue, dim.index())); Value allocated = b.create(loc, allocMemRefType, dynShape); + aliasInfo.createAliasInfoEntry(allocated); Value casted = allocated; - if (memRefType != allocMemRefType) + if (memRefType != allocMemRefType) { casted = b.create(loc, memRefType, allocated); + aliasInfo.insertNewBufferEquivalence(casted, allocated); + } b.setInsertionPoint(allocated.getParentBlock()->getTerminator()); b.create(loc, allocated); return casted; @@ -1212,7 +1327,8 @@ static Value createNewAllocDeallocPairForShapedValue(OpBuilder &b, Location loc, static LogicalResult allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op, SmallVectorImpl &resultBuffers, - BlockAndValueMapping &bvm) { + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); @@ -1236,7 +1352,8 @@ allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op, // Otherwise, `op` is not inplaceable and we need to allocate its result. Value dimTensor = bvm.lookupOrDefault(output); - Value alloc = createNewAllocDeallocPairForShapedValue(b, loc, dimTensor); + Value alloc = + createNewAllocDeallocPairForShapedValue(b, loc, dimTensor, aliasInfo); b.setInsertionPointAfter(alloc.getDefiningOp()); resultBuffers.push_back(alloc); @@ -1258,7 +1375,7 @@ allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op, /// Generic conversion for any LinalgOp on tensors. static LogicalResult bufferize(OpBuilder &b, LinalgOp op, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); @@ -1267,8 +1384,6 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op, if (!op.hasTensorSemantics()) return failure(); - LDBG("bufferize: " << *op << '\n'); - b.setInsertionPoint(op); Location loc = op.getLoc(); SmallVector newInputBuffers; @@ -1284,7 +1399,8 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op, } SmallVector newOutputBuffers; // Try to allocate new buffers depending on op's inplace semantics. - if (failed(allocateBuffersForResults(b, loc, op, newOutputBuffers, bvm))) + if (failed(allocateBuffersForResults(b, loc, op, newOutputBuffers, bvm, + aliasInfo))) return failure(); // Clone the newly bufferized op. @@ -1301,11 +1417,153 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op, return success(); } +/// In a first approximation, all the function arguments of a FuncOp are marked +/// inplaceable. For now, it is the responsibility of the `callOp` bufferization +/// to allow FuncOp that are inplaceable to write inPlace. +static LogicalResult +bufferize(OpBuilder &b, CallOpInterface callOp, BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + DenseMap &bufferizedFunctionTypes) { + FuncOp funcOp = getCalledFunction(callOp); + assert(isa(callOp.getOperation()) && funcOp && + "expected Callop to a FuncOp"); + + // If nothing to do then we are done. + if (!llvm::any_of(funcOp.getType().getInputs(), isaTensor) && + !llvm::any_of(funcOp.getType().getResults(), isaTensor)) + return success(); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(callOp); + + // 1. Filter return types: + // - if the callee is bodiless / external, we cannot inspect it and we + // cannot assume anything. We can just assert that it does not return a + // tensor as this would have to bufferize to "return a memref", whose + // semantics is ill-defined. + // - if the callee has a body, we perform inter-procedural equivalence + // analysis. When successful, a result folds onto an operand. When + // unsuccessful, additional work is needed to either: + // * hoist a result into an inplaceable operand or + // * devise a better representation to truly return a buffer. + SmallVector resultTypes; + SmallVector hoistedArguments; + if (funcOp.body().empty()) { + if (llvm::any_of(funcOp.getType().getResults(), isaTensor)) + return callOp->emitError() + << "cannot bufferize bodiless function that returns a tensor"; + } else { + ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + if (!returnOp) + return funcOp->emitError() << "cannot bufferize a FuncOp with tensors " + "and without a unique ReturnOp"; + + // For each FuncOp result, keep track of which inplace argument it reuses. + for (OpOperand &returnOperand : returnOp->getOpOperands()) { + Type returnType = returnOperand.get().getType(); + if (!isaTensor(returnType)) { + resultTypes.push_back(returnType); + continue; + } + + // If return operand is equivalent to some bbArg, no need to return it. + Value returnVal = returnOperand.get(); + if (BlockArgument bbArg = + getEquivalentEnclosingFuncBBArg(returnVal, aliasInfo)) { + Value oldRes = callOp->getResult(returnOperand.getOperandNumber()); + int64_t idx = bbArg.getArgNumber(); + Value buffer = bvm.lookupOrNull(callOp->getOperand(idx)); + if (!buffer) + return callOp->emitError() << "operand #" << idx << " not bufferized"; + // Add CallOp operand/result equivalence: this is interprocedural info. + aliasInfo.insertNewBufferEquivalence(oldRes, buffer); + map(bvm, oldRes, buffer); + // Add a TensorLoadOp to kill all uses of the CallOp return. + // Replace all uses of the CallOp results so we can erase the CallOp. + // This TensorLoadOp must fold/DCE away or bufferization should be + // considered failed. + Value tensorLoad = + b.create(callOp.getLoc(), buffer); + oldRes.replaceAllUsesWith(tensorLoad); + // Add new op equivalence info. + aliasInfo.insertNewBufferEquivalence(tensorLoad, buffer); + map(bvm, tensorLoad, buffer); + continue; + } + + // TODO: Need to hoist above function boundary and add to + // `hoistedArgumentTypes`. + if (Operation *allocOp = getEquivalentAlloc(returnVal, aliasInfo)) + return allocOp->emitError() + << " needs hoist across function boundary\n"; + + // Other cases legitimately need to return a tensor, this is currently not + // supported. For instance, if hoisting across function boundary has + // failed, it may be due to e.g. data-dependent sizes. In such a case, we + // would we need a better type than memref. + resultTypes.push_back(returnType); + + int64_t returnIdx = returnOperand.getOperandNumber(); + return returnOp->emitError() + << " bufferize result #" << returnIdx << "\n"; + } + } + + // 2. Compute bufferized FunctionType. + SmallVector argumentTypes{callOp->getOperandTypes()}; + llvm::append_range(argumentTypes, ValueRange{hoistedArguments}.getTypes()); + // Get the bufferized FunctionType for funcOp or construct it if not yet + // available. + FunctionType bufferizedFuncType = getOrCreateBufferizedFunctionType( + funcOp, argumentTypes, resultTypes, bufferizedFunctionTypes); + + // 3. Rewrite tensor operands as memrefs based on `bufferizedFuncType`. + SmallVector newOperands; + newOperands.reserve(callOp->getNumOperands()); + for (OpOperand &opOperand : callOp->getOpOperands()) { + Value tensorOperand = opOperand.get(); + // Non-tensor operands are just copied. + if (!tensorOperand.getType().isa()) { + newOperands.push_back(tensorOperand); + continue; + } + + // Tensor operands are guaranteed to have been buferized. + int64_t idx = opOperand.getOperandNumber(); + Value buffer = bvm.lookupOrNull(tensorOperand); + assert(buffer && " missing buffer for operand"); + + // Caller / callee type mistmatch is handled with a CastOp. + auto memRefType = bufferizedFuncType.getInput(idx); + // Since we don't yet have a clear layout story, buffer_cast may + // conservatively turn tensors into more dynamic memref than necessary. + // If the memref type of the callee fails, introduce an extra memref.cast + // that will either canonicalize away or fail compilation until we can do + // something better. + if (buffer.getType() != memRefType) { + Value castBuffer = + b.create(callOp.getLoc(), memRefType, buffer); + // Add new op equivalence info. + aliasInfo.insertNewBufferEquivalence(castBuffer, buffer); + map(bvm, tensorOperand, castBuffer); + buffer = castBuffer; + } + newOperands.push_back(buffer); + } + + // 4. Create the new CallOp. + Operation *newCallOp = b.create(callOp.getLoc(), funcOp.sym_name(), + resultTypes, newOperands); + newCallOp->setAttrs(callOp->getAttrs()); + return success(); +} + /// DimOp tensor operand is modified inplace. This allows leaving dead /// tensors behind that will get DCE'd. static LogicalResult bufferize(OpBuilder &b, tensor::DimOp dimOp, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { if (dimOp.source().getType().isa()) { Value v = lookup(bvm, dimOp.source()); if (!v) @@ -1317,13 +1575,11 @@ static LogicalResult bufferize(OpBuilder &b, tensor::DimOp dimOp, static LogicalResult bufferize(OpBuilder &b, scf::ForOp forOp, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); Location loc = forOp.getLoc(); - LLVM_DEBUG(DBGS() << "bufferize: " << *forOp << "\n"); - // If inPlace, just forward the buffer. // Otherwise alloc and copy. b.setInsertionPoint(forOp); @@ -1337,11 +1593,12 @@ static LogicalResult bufferize(OpBuilder &b, scf::ForOp forOp, Value operandBuffer = lookup(bvm, operand); Value resultBuffer = operandBuffer; if (getInPlace(opResult) != InPlaceSpec::True) { - resultBuffer = createNewAllocDeallocPairForShapedValue(b, loc, operand); + resultBuffer = + createNewAllocDeallocPairForShapedValue(b, loc, operand, aliasInfo); // If the tensor comes from `linalg::InitTensorOp`, the value is // unitialized and we do not need to copy. - // TODO: if the matching bbArg does not bufferize to a read is more - // general. + // TODO: "matching bbArg does not bufferize to a read" is a more general + // check. if (!operand.getDefiningOp()) b.create(forOp.getLoc(), operandBuffer, resultBuffer); } @@ -1356,7 +1613,7 @@ static LogicalResult bufferize(OpBuilder &b, scf::ForOp forOp, /// FuncOp always creates TensorToMemRef ops. static LogicalResult bufferize(OpBuilder &b, FuncOp funcOp, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); b.setInsertionPointToStart(&funcOp.body().front()); @@ -1370,9 +1627,10 @@ static LogicalResult bufferize(OpBuilder &b, FuncOp funcOp, Type memRefType = rankedTensorType ? getDynamicMemRefType(rankedTensorType) : getContiguousOrUnrankedMemRefType(tensorType); - Value tensorToMemref = + Value bufferCast = b.create(funcOp.getLoc(), memRefType, bbArg); - map(bvm, bbArg, tensorToMemref); + aliasInfo.insertNewBufferEquivalence(bufferCast, bbArg); + map(bvm, bbArg, bufferCast); } return success(); } @@ -1380,7 +1638,7 @@ static LogicalResult bufferize(OpBuilder &b, FuncOp funcOp, /// ReturnOp always creates memref::TensorLoadOp. static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); b.setInsertionPoint(returnOp); @@ -1394,7 +1652,10 @@ static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp, Value v = lookup(bvm, operand.get()); if (!v) return failure(); - operand.set(b.create(returnOp.getLoc(), v)); + Value returnTensor = b.create(returnOp.getLoc(), v); + operand.set(returnTensor); + aliasInfo.insertNewBufferEquivalence(returnTensor, v); + map(bvm, returnTensor, v); } return success(); } @@ -1406,7 +1667,7 @@ static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp, /// isolation. static LogicalResult bufferize(OpBuilder &b, ExtractSliceOp extractSliceOp, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { LDBG("bufferize: " << *extractSliceOp << '\n'); // Take a guard before anything else. @@ -1426,8 +1687,8 @@ static LogicalResult bufferize(OpBuilder &b, ExtractSliceOp extractSliceOp, Value alloc; auto inPlace = getInPlace(extractSliceOp->getResult(0)); if (inPlace != InPlaceSpec::True) { - alloc = createNewAllocDeallocPairForShapedValue(b, loc, - extractSliceOp.result()); + alloc = createNewAllocDeallocPairForShapedValue( + b, loc, extractSliceOp.result(), aliasInfo); b.setInsertionPointAfter(alloc.getDefiningOp()); } @@ -1441,6 +1702,8 @@ static LogicalResult bufferize(OpBuilder &b, ExtractSliceOp extractSliceOp, Value subView = b.create( loc, subviewMemRefType, srcMemref, extractSliceOp.getMixedOffsets(), extractSliceOp.getMixedSizes(), extractSliceOp.getMixedStrides()); + // Insert new alias. + aliasInfo.insertNewBufferAlias(subView, srcMemref); /// If not inplaceable, copy. if (alloc) { @@ -1454,7 +1717,7 @@ static LogicalResult bufferize(OpBuilder &b, ExtractSliceOp extractSliceOp, static LogicalResult bufferize(OpBuilder &b, InsertSliceOp insertSliceOp, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { LDBG("bufferize: " << *insertSliceOp << '\n'); // Take a guard before anything else. @@ -1472,8 +1735,8 @@ static LogicalResult bufferize(OpBuilder &b, InsertSliceOp insertSliceOp, // cloning the whole tensor on every single iteration and is a symptom // of a catastrophically bad scheduling decision. // TODO: be very loud about it or even consider failing the pass. - Value newDstMemref = - createNewAllocDeallocPairForShapedValue(b, loc, insertSliceOp.result()); + Value newDstMemref = createNewAllocDeallocPairForShapedValue( + b, loc, insertSliceOp.result(), aliasInfo); b.setInsertionPointAfter(newDstMemref.getDefiningOp()); b.create(insertSliceOp.getLoc(), dstMemref, newDstMemref); dstMemref = newDstMemref; @@ -1503,6 +1766,8 @@ static LogicalResult bufferize(OpBuilder &b, InsertSliceOp insertSliceOp, Value subView = b.create( loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides()); + // Insert new alias. + aliasInfo.insertNewBufferAlias(subView, dstMemref); b.create(insertSliceOp.getLoc(), srcMemref, subView); } @@ -1513,7 +1778,7 @@ static LogicalResult bufferize(OpBuilder &b, InsertSliceOp insertSliceOp, static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); b.setInsertionPoint(op); @@ -1522,8 +1787,6 @@ static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op, if (op.getShapedType().isa()) return failure(); - LDBG("bufferize: " << *op << '\n'); - /// transfer_read from buffer always reads from the bufferized /// op.source(). if (auto readOp = dyn_cast(op.getOperation())) { @@ -1540,8 +1803,8 @@ static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op, // If transfer_write is not inPlace, allocate a new buffer. Value newInputBuffer; if (inPlace != InPlaceSpec::True) { - newInputBuffer = - createNewAllocDeallocPairForShapedValue(b, loc, writeOp.result()); + newInputBuffer = createNewAllocDeallocPairForShapedValue( + b, loc, writeOp.result(), aliasInfo); b.setInsertionPointAfter(newInputBuffer.getDefiningOp()); map(bvm, writeOp.result(), newInputBuffer); } else { @@ -1567,7 +1830,7 @@ static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op, static LogicalResult bufferize(OpBuilder &b, scf::YieldOp yieldOp, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); b.setInsertionPoint(yieldOp); @@ -1618,7 +1881,7 @@ bufferizableInPlaceAnalysis(ExtractSliceOp extractSliceOp, // If `extractSliceOp` were to be bufferized inplace, it cannot end up // aliasing a write into a non-writeable buffer. bool wouldCreateAliasingWriteToNonWriteableBuffer = - aliasInfo.aliasesInPlaceWrite(extractSliceOp) && + aliasInfo.aliasesInPlaceWrite(extractSliceOp.result()) && aliasInfo.aliasesNonWriteableBuffer(extractSliceOp->getOpOperand(0)); if (wouldCreateAliasingWriteToNonWriteableBuffer) @@ -1743,7 +2006,6 @@ inPlaceAnalysisFuncOpBody(FuncOp funcOp, BufferizationAliasInfo &aliasInfo, return extractSliceOps.push_back(extractSliceOp); if (auto insertSliceOp = dyn_cast(op)) return insertSliceOps.push_back(insertSliceOp); - auto isaTensor = [](Type t) { return t.isa(); }; // No tensors => no buffers. if (none_of(op->getOperandTypes(), isaTensor) && none_of(op->getResultTypes(), isaTensor)) @@ -1792,12 +2054,12 @@ inPlaceAnalysisFuncOpBody(FuncOp funcOp, BufferizationAliasInfo &aliasInfo, } //===----------------------------------------------------------------------===// -// Bufferization entry-point. +// Bufferization entry-point for functions. //===----------------------------------------------------------------------===// -static LogicalResult -bufferizeFuncOpInternals(FuncOp funcOp, BlockAndValueMapping &bvm, - const BufferizationAliasInfo &aliasInfo) { +static LogicalResult bufferizeFuncOpInternals( + FuncOp funcOp, BlockAndValueMapping &bvm, BufferizationAliasInfo &aliasInfo, + DenseMap &bufferizedFunctionTypes) { LLVM_DEBUG(llvm::dbgs() << "\n\n"); LDBG("Begin BufferizeFuncOpInternals:\n" << funcOp << '\n'); OpBuilder b(funcOp->getContext()); @@ -1805,42 +2067,54 @@ bufferizeFuncOpInternals(FuncOp funcOp, BlockAndValueMapping &bvm, if (failed(bufferize(b, funcOp, bvm, aliasInfo))) return failure(); // Walk in PreOrder to ensure ops with regions are handled before their body. - WalkResult result = funcOp.walk([&](Operation *op) { - LogicalResult status = - TypeSwitch(op) - // Skip BufferCast and TensorLoad ops. - // clang-format off - .Case( - [&](auto) { return success(); }) - .Case( - [&](auto op) { - LDBG("Begin buferize:\n" << op << '\n'); - return bufferize(b, op, bvm, aliasInfo); - }) - // clang-format on - .Default([&](Operation *op) { - auto isaTensor = [](Type t) { return t.isa(); }; - if (any_of(op->getOperandTypes(), isaTensor) || - any_of(op->getResultTypes(), isaTensor)) - return failure(); - return success(); - }); - if (failed(status)) { - op->emitError("Failed bufferization"); - return WalkResult::interrupt(); - } - return WalkResult::advance(); + // Since walk has to be PreOrder, we need to erase ops that require it + // separately: this is the case for CallOp + SmallVector toErase; + WalkResult result = funcOp.walk([&](Operation *op) + -> WalkResult { + // clang-format off + WalkResult result = + TypeSwitch(op) + // Skip BufferCast and TensorLoad ops. + .Case([&](auto) { return success(); }) + .Case([&](auto op) { + LDBG("Begin bufferize:\n" << op << '\n'); + return bufferize(b, op, bvm, aliasInfo); + }) + .Case([&](CallOpInterface op) { + LDBG("Begin bufferize:\n" << op << '\n'); + return bufferize(b, op, bvm, aliasInfo, bufferizedFunctionTypes); + }) + .Default([&](Operation *op) { + auto isaTensor = [](Type t) { return t.isa(); }; + if (any_of(op->getOperandTypes(), isaTensor) || + any_of(op->getResultTypes(), isaTensor)) + return failure(); + return success(); + }); + // clang-format on + + // Register post-walk erasure, if necessary. + if (isa(op)) + if (llvm::any_of(op->getOperandTypes(), isaTensor) || + llvm::any_of(op->getResultTypes(), isaTensor)) + toErase.push_back(op); + + return result; }); LDBG("End BufferizeFuncOpInternals:\n" << funcOp << '\n'); + for (Operation *op : toErase) + op->erase(); + return failure(result.wasInterrupted()); } @@ -1874,7 +2148,9 @@ void LinalgComprehensiveFuncBufferize::runOnFunction() { // Bufferization phase. BlockAndValueMapping bvm; - if (failed(bufferizeFuncOpInternals(funcOp, bvm, aliasInfo))) + DenseMap bufferizedFunctionTypes; + if (failed(bufferizeFuncOpInternals(funcOp, bvm, aliasInfo, + bufferizedFunctionTypes))) signalPassFailure(); // Post-pass cleanup of inplaceable attributes. @@ -1889,6 +2165,168 @@ std::unique_ptr mlir::createLinalgComprehensiveFuncBufferizePass() { // Bufferization entry-point for modules. //===----------------------------------------------------------------------===// +/// Return the op with Allocate MemoryEffect if `v` is equivalent to an such +/// an op. Return null otherwise. +static Operation *getEquivalentAlloc(Value value, + const BufferizationAliasInfo &aliasInfo) { + Operation *res; + aliasInfo.applyOnEquivalenceClass(value, [&](Value v) { + if (!res) + if (auto interface = + dyn_cast_or_null(v.getDefiningOp())) + if (auto effect = + interface.getEffectOnValue(value)) + res = v.getDefiningOp(); + }); + return res; +} + +/// Return the first argument of the enclosing FuncOp that is equivalent to `v`. +/// Return null if no such bbArg can be found. +static BlockArgument +getEquivalentEnclosingFuncBBArg(Value v, + const BufferizationAliasInfo &aliasInfo) { + Operation *op = v.getParentBlock()->getParentOp(); + FuncOp funcOp = dyn_cast(op); + if (!funcOp) + funcOp = op->getParentOfType(); + assert(funcOp && "expected non-null FuncOp"); + for (BlockArgument bbArg : funcOp.getArguments()) + if (aliasInfo.areEquivalentBufferizedValues(v, bbArg)) + return bbArg; + return nullptr; +} + +/// Rewrite the `funcOp` arguments analysis return values and terminator into +/// buffer form (using the canonical memref layout for now), according to the +/// inPlace-bufferizable information of the function arguments. +/// This relies on a buffer equivalence analysis of each return operand. When a +/// result buffer is equivalent to: +/// 1. a BlockArgument of `funcOp`, it can be dropped from the return values +/// and becomes inplaceable at all callers. This assumes all CallOp perform +/// the necessary work to clone operands so as to make them inplaceable. +// Reliance on this logic will need to be relaxed in thefuture. +/// 2. an op with an Alloc effect, this currently fails bufferization but is a +/// candidate for hoisting and creating a new inplace operand at all caller +/// sites. +/// 3. if such a hoisting for 2. is not possible (e.g. data-dependent that +/// prevents hoisting), this is currently unsupported and will require a +/// refcounted buffer type. +static LogicalResult bufferizeFuncOpBoundary( + FuncOp funcOp, BufferizationAliasInfo &aliasInfo, + DenseMap &bufferizedFunctionTypes) { + LLVM_DEBUG(DBGS() << "Begin bufferizeFuncOpBoundary:\n" << funcOp << "\n"); + + // If nothing to do then we are done. + if (!llvm::any_of(funcOp.getType().getInputs(), isaTensor) && + !llvm::any_of(funcOp.getType().getResults(), isaTensor)) + return success(); + + // Get the bufferized FunctionType for funcOp or construct it if not yet + // available. + // TODO: Atm we have 3 cases: + // 1. if a function is called from within the Module, it must have bufferized + // to inplaceable tensor results. + // 2. if it is bodiless, it must have bufferized and is not allowed to have + // result tensors. + // 3. if it is not called internally, it still must bufferize to inplaceable + // tensor results and we construct it now (e.g. top-level function called + // externally). + // -> Figure out a better layering. + TypeRange resultTypes; + FunctionType bufferizedFuncType = + getOrCreateBufferizedFunctionType(funcOp, funcOp.getType().getInputs(), + resultTypes, bufferizedFunctionTypes); + + // Corner case: Bodiless FuncOp + // ============================ + // The body of such functions is assumed opaque and we can't know the + // bufferization contract they want to enforce atm. + // As a consequence, only support functions that don't return any tensor atm. + if (funcOp.getBody().empty()) { + if (llvm::any_of(funcOp.getType().getResults(), isaTensor)) + return funcOp->emitError() << "cannot bufferize bodiless function that " + << "returns a tensor"; + funcOp.setType(bufferizedFuncType); + LLVM_DEBUG(DBGS() << "End bufferizeFuncOpBoundary no fun body: " << funcOp); + return success(); + } + + // Support only single return-terminated block in the function. + ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + if (!returnOp) + return funcOp->emitError() << "cannot bufferize a FuncOp with tensors and " + "without a unique ReturnOp"; + + // 1. For each FuncOp result, keep track of which inplace argument it reuses. + SmallVector returnValues; + for (OpOperand &returnOperand : returnOp->getOpOperands()) { + // If return operand is equivalent to some bbArg, no need to return it. + Value returnVal = returnOperand.get(); + if (getEquivalentEnclosingFuncBBArg(returnVal, aliasInfo)) + continue; + // TODO: Need to hoist above function boundary. If this is not possible due + // to data-depedent sizes, we need a better type than memref. + if (Operation *allocOp = getEquivalentAlloc(returnVal, aliasInfo)) + return allocOp->emitError() << " needs hoist across function boundary\n"; + int64_t returnIdx = returnOperand.getOperandNumber(); + return returnOp->emitError() << " bufferize result #" << returnIdx << "\n"; + } + + // 2. Rewrite the terminator without the inPlace bufferizable values. + OpBuilder(returnOp).create(returnOp.getLoc(), returnValues); + returnOp->erase(); + + // 3. Rewrite the bbArgs. + // Iterate on the original `numArgs` and replace them in order. + // This guarantees the argument order still matches after the rewrite. + Block &frontBlock = funcOp.body().front(); + unsigned numArgs = frontBlock.getNumArguments(); + for (unsigned idx = 0; idx < numArgs; ++idx) { + auto bbArg = frontBlock.getArgument(0); + auto tensorType = bbArg.getType().dyn_cast(); + // Non-tensor types are just forwarded. + if (!tensorType) { + frontBlock.addArgument(bbArg.getType()); + bbArg.replaceAllUsesWith(frontBlock.getArguments().back()); + frontBlock.eraseArgument(0); + continue; + } + + // Get the buffer type from the bufferized function type. + Type memrefType = bufferizedFuncType.getInput(idx); + Value memref = frontBlock.addArgument(memrefType); + OpBuilder b(funcOp->getContext()); + b.setInsertionPointToStart(&frontBlock); + // Replace all uses of bbArg through a BufferCastOp by a memref::CastOp. + for (auto &use : llvm::make_early_inc_range(bbArg.getUses())) { + if (auto bufferCastOp = dyn_cast(use.getOwner())) { + auto castOp = b.create( + funcOp.getLoc(), bufferCastOp.memref().getType(), memref); + bufferCastOp.memref().replaceAllUsesWith(castOp); + aliasInfo.insertNewBufferEquivalence(castOp.dest(), + bufferCastOp.memref()); + } + } + // Replace all remaining uses by a tensor_load. + if (!bbArg.use_empty()) { + auto tensorLoadOp = + b.create(funcOp.getLoc(), memref); + aliasInfo.insertNewBufferEquivalence(tensorLoadOp, bbArg); + bbArg.replaceAllUsesWith(tensorLoadOp); + } + frontBlock.eraseArgument(0); + // TODO: add support to erase aliasInfo entries if deemed necessary. + } + + // 4. Rewrite the FuncOp type to buffer form. + funcOp.setType(bufferizedFuncType); + + LLVM_DEBUG(DBGS() << "End bufferizeFuncOpBoundary:\n" << funcOp); + + return success(); +} + /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by /// callee-caller order (i.e. callees without callers first). /// Store the map of FuncOp to all its callers in `callerMap`. @@ -1905,10 +2343,12 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, DenseMap numberCallOpsContainedInFuncOp; WalkResult res = moduleOp.walk([&](FuncOp funcOp) { numberCallOpsContainedInFuncOp[funcOp] = 0; - return funcOp.walk([&](CallOpInterface callOp) { + return funcOp.walk([&](CallOpInterface callOp) -> WalkResult { + // Only support CallOp for now. + if (!isa(callOp.getOperation())) + return callOp->emitError() << "expected a CallOp"; FuncOp calledFunction = getCalledFunction(callOp); - if (!calledFunction) - return WalkResult::interrupt(); + assert(calledFunction && "could not retrieved called FuncOp"); auto it = callerMap.try_emplace(calledFunction, DenseSet{}); it.first->getSecond().insert(callOp); if (calledBy[calledFunction].count(funcOp) == 0) { @@ -1954,6 +2394,7 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() { SmallVector orderedFuncOps; DenseMap> callerMap; + DenseMap bufferizedFunctionTypes; if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap))) return signalPassFailure(); @@ -1985,12 +2426,30 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() { return; } - // TODO: Bufferization phase. + // Bufferization phase. + if (!testAnalysisOnly) { + BlockAndValueMapping tensorToBufferMap; + if (failed(bufferizeFuncOpInternals(funcOp, tensorToBufferMap, aliasInfo, + bufferizedFunctionTypes))) { + signalPassFailure(); + return; + } + } } // Don't drop the attributes if we only want to report the analysis. if (testAnalysisOnly) return; + for (FuncOp funcOp : orderedFuncOps) { + // Note: It would be good to apply cleanups here but we cannot as aliasInfo + // would be invalidated. + if (failed(bufferizeFuncOpBoundary(funcOp, aliasInfo, + bufferizedFunctionTypes))) { + signalPassFailure(); + return; + } + } + // Post-pass cleanup of inplaceable attributes. moduleOp.walk( [&](Operation *op) { op->removeAttr(kInPlaceResultsAttrName); }); @@ -1998,6 +2457,12 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() { for (BlockArgument bbArg : op.getArguments()) removeInPlaceFuncArgument(bbArg); }); + + OpPassManager cleanupPipeline(OpPassManager("module")); + cleanupPipeline.addPass(createCanonicalizerPass()); + cleanupPipeline.addPass(createCSEPass()); + cleanupPipeline.addPass(createLoopInvariantCodeMotionPass()); + (void)runPipeline(cleanupPipeline, moduleOp); } std::unique_ptr mlir::createLinalgComprehensiveModuleBufferizePass() { diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir index 0e378a89ef58c..d6a6d7c67f6cf 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir @@ -1,5 +1,36 @@ // RUN: mlir-opt %s -linalg-comprehensive-module-bufferize -split-input-file -verify-diagnostics +func private @foo() -> tensor + +func @bar() -> tensor { + %foo = constant @foo : () -> (tensor) +// expected-error @+1 {{expected a CallOp}} + %res = call_indirect %foo() : () -> (tensor) + return %res : tensor +} + +// ----- + +// expected-error @+1 {{cannot bufferize bodiless function that returns a tensor}} +func private @foo() -> tensor + +// ----- + +// expected-error @+1 {{cannot bufferize a FuncOp with tensors and without a unique ReturnOp}} +func @switch(%flag : i32, %caseOperand : i32, %t1 : tensor, %t2 : tensor) + -> (tensor) +{ + switch %flag : i32, [ + default: ^bb1(%caseOperand : i32), + 42: ^bb2(%caseOperand : i32) + ] + + ^bb1(%bb1arg : i32): + return %t1 : tensor + ^bb2(%bb2arg : i32): + return %t2 : tensor +} + // ----- // expected-error @-3 {{expected callgraph to be free of circular dependencies}} diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir new file mode 100644 index 0000000000000..7756587560ead --- /dev/null +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir @@ -0,0 +1,60 @@ +// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize -split-input-file | FileCheck %s + +// CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK: func private @some_external_func(memref) +func private @some_external_func(tensor) + +// CHECK: func @scf_for_with_tensor_insert_slice( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]> +func @scf_for_with_tensor_insert_slice( + %A : tensor, %B : tensor, %C : tensor<4xf32>, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ + // CHECK-NEXT: scf.for + %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) + -> (tensor, tensor) + { + // CHECK-NEXT: %[[SVA:.*]] = memref.subview %[[A]] + // CHECK-NEXT: linalg.copy(%[[C]], %[[SVA]]) : memref<4xf32, #[[$DYN_1D_MAP]]>, memref<4xf32, #[[$DYN_1D_MAP]]> + %ttA = tensor.insert_slice %C into %tA[%i][4][1] : tensor<4xf32> into tensor + + // CHECK-NEXT: %[[SVB:.*]] = memref.subview %[[B]] + // CHECK-NEXT: linalg.copy(%[[C]], %[[SVB]]) : memref<4xf32, #[[$DYN_1D_MAP]]>, memref<4xf32, #[[$DYN_1D_MAP]]> + %ttB = tensor.insert_slice %C into %tB[%i][4][1] : tensor<4xf32> into tensor + + // scf.yield is empty and is elided + // CHECK-NOT: scf.yield + scf.yield %ttA, %ttB : tensor, tensor + } + + // Swaparoo requires bufferizing the whole function to figure out who's who. + return %r0#1, %r0#0: tensor, tensor +} + +// CHECK: func @bar( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$DYN_1D_MAP]]> +func @bar( + %A : tensor {linalg.inplaceable = true}, + %B : tensor {linalg.inplaceable = true}, + %C : tensor<4xf32> {linalg.inplaceable = true}, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ +// CHECK-NEXT: call @scf_for_with_tensor_insert_slice(%[[A]], %[[B]], %[[C]] + %r0:2 = call @scf_for_with_tensor_insert_slice(%A, %B, %C, %lb, %ub, %step) : + (tensor, tensor, tensor<4xf32>, index, index, index) + -> (tensor, tensor) + + // %r0#0 is actually %B after inplaceable results are swapped in the callee. +// CHECK-NEXT: call @some_external_func(%[[B]]) : (memref) -> () + call @some_external_func(%r0#0) : (tensor) -> () + +// CHECK-NEXT: return + return %r0#0, %r0#1: tensor, tensor +} From 2a1ef8784ad9a78583a2f1f3bba536ee57b6b13b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 1 Jul 2021 11:39:59 +0100 Subject: [PATCH 403/619] [CostModel][X86] getCastInstrCost - attempt to match custom cast/conversion before legalized types. Move the (SSE-only) generic, legalized type conversion matching after the specific,custom conversion cases, allowing us to properly provide cost overrides. The next step will be to clean up some of the weird existing costs and then to enable AVX+ legalized costs, which will let us strip out a lot of the cost tables entries. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 118 +++++++++--------- llvm/test/Analysis/CostModel/X86/cast.ll | 2 +- llvm/test/Analysis/CostModel/X86/sitofp.ll | 2 +- 3 files changed, 63 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index d245324cc9ce6..e400000f83160 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1519,9 +1519,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return Cost; }; + // The cost tables include both specific, custom (non-legal) src/dst type + // conversions and generic, legalized types. We test for customs first, before + // falling back to legalization. // FIXME: Need a better design of the cost table to handle non-simple types of // potential massive combinations (elem_num x src_type x dst_type). - static const TypeConversionCostTblEntry AVX512BWConversionTbl[] { { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, @@ -2173,85 +2175,87 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD }; - std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); - std::pair LTDest = - TLI->getTypeLegalizationCost(DL, Dst); - - if (ST->hasSSE41() && !ST->hasAVX()) - if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, - LTDest.second, LTSrc.second)) - return AdjustCost(LTSrc.first * Entry->Cost); - - if (ST->hasSSE2() && !ST->hasAVX()) - if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, - LTDest.second, LTSrc.second)) - return AdjustCost(LTSrc.first * Entry->Cost); - + // Attempt to map directly to (simple) MVT types to let us match custom entries. EVT SrcTy = TLI->getValueType(DL, Src); EVT DstTy = TLI->getValueType(DL, Dst); // The function getSimpleVT only handles simple value types. - if (!SrcTy.isSimple() || !DstTy.isSimple()) - return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind)); - - MVT SimpleSrcTy = SrcTy.getSimpleVT(); - MVT SimpleDstTy = DstTy.getSimpleVT(); + if (SrcTy.isSimple() && DstTy.isSimple()) { + MVT SimpleSrcTy = SrcTy.getSimpleVT(); + MVT SimpleDstTy = DstTy.getSimpleVT(); + + if (ST->useAVX512Regs()) { + if (ST->hasBWI()) + if (const auto *Entry = ConvertCostTableLookup( + AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) + return AdjustCost(Entry->Cost); + + if (ST->hasDQI()) + if (const auto *Entry = ConvertCostTableLookup( + AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) + return AdjustCost(Entry->Cost); + + if (ST->hasAVX512()) + if (const auto *Entry = ConvertCostTableLookup( + AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) + return AdjustCost(Entry->Cost); + } - if (ST->useAVX512Regs()) { if (ST->hasBWI()) - if (const auto *Entry = ConvertCostTableLookup(AVX512BWConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) + if (const auto *Entry = ConvertCostTableLookup( + AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) return AdjustCost(Entry->Cost); if (ST->hasDQI()) - if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) + if (const auto *Entry = ConvertCostTableLookup( + AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) return AdjustCost(Entry->Cost); if (ST->hasAVX512()) - if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD, + if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) return AdjustCost(Entry->Cost); - } - if (ST->hasBWI()) - if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) - return AdjustCost(Entry->Cost); + if (ST->hasAVX2()) { + if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, + SimpleDstTy, SimpleSrcTy)) + return AdjustCost(Entry->Cost); + } - if (ST->hasDQI()) - if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) - return AdjustCost(Entry->Cost); + if (ST->hasAVX()) { + if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, + SimpleDstTy, SimpleSrcTy)) + return AdjustCost(Entry->Cost); + } - if (ST->hasAVX512()) - if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) - return AdjustCost(Entry->Cost); - - if (ST->hasAVX2()) { - if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) - return AdjustCost(Entry->Cost); - } + if (ST->hasSSE41()) { + if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, + SimpleDstTy, SimpleSrcTy)) + return AdjustCost(Entry->Cost); + } - if (ST->hasAVX()) { - if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) - return AdjustCost(Entry->Cost); + if (ST->hasSSE2()) { + if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, + SimpleDstTy, SimpleSrcTy)) + return AdjustCost(Entry->Cost); + } } - if (ST->hasSSE41()) { + // Fall back to legalized types. + // TODO: Add AVX support. + std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); + std::pair LTDest = + TLI->getTypeLegalizationCost(DL, Dst); + + if (ST->hasSSE41() && !ST->hasAVX()) if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) - return AdjustCost(Entry->Cost); - } + LTDest.second, LTSrc.second)) + return AdjustCost(LTSrc.first * Entry->Cost); - if (ST->hasSSE2()) { + if (ST->hasSSE2() && !ST->hasAVX()) if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, - SimpleDstTy, SimpleSrcTy)) - return AdjustCost(Entry->Cost); - } + LTDest.second, LTSrc.second)) + return AdjustCost(LTSrc.first * Entry->Cost); return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index 60fd218a19f6c..5377c68761a3b 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -391,7 +391,7 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> ; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'sitofp4' diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index 718467c1e7832..de4b2c2276896 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -79,7 +79,7 @@ define i32 @sitofp_i32_double() { ; SSE42-LABEL: 'sitofp_i32_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; From 65722561dff2c3110af1617f91eae162f32d5968 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 30 Jun 2021 13:26:07 +0000 Subject: [PATCH 404/619] [llvm][docs] Bump release number from 12 -> 13 This seems to have been forgotten. The result was the title of pages like https://llvm.org/docs/ReleaseNotes.html Was: LLVM 13.0.0 Release Notes — LLVM 12 documentation Reviewed By: tstellar Differential Revision: https://reviews.llvm.org/D105189 --- llvm/docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py index 687d812f82d78..5be2e8671111c 100644 --- a/llvm/docs/conf.py +++ b/llvm/docs/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short version. -version = '12' +version = '13' # The full version, including alpha/beta/rc tags. -release = '12' +release = '13' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 01b846674d27f035becac4c980a91c1204b6587b Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Mon, 7 Jun 2021 14:34:03 +0100 Subject: [PATCH 405/619] [AArch64][SVE] Add support for fixed length MSCATTER/MGATHER Since gather lowering can now lower to nodes that may need expansion via the vector legalizer, do MGATHER lowering via vector legalizer. Additionally, as part of adding passthru support for fixed typed gathers, fix passthru support for scalable types. Depends on D104910 Differential Revision: https://reviews.llvm.org/D104217 --- .../SelectionDAG/LegalizeVectorOps.cpp | 1 + .../Target/AArch64/AArch64ISelLowering.cpp | 119 +- .../AArch64/AArch64TargetTransformInfo.h | 8 +- .../AArch64/sve-fixed-length-masked-gather.ll | 1138 +++++++++++++++++ .../sve-fixed-length-masked-scatter.ll | 999 +++++++++++++++ .../test/CodeGen/AArch64/sve-masked-gather.ll | 21 + 6 files changed, 2264 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 0aa2660c7c71b..ebe3bfc4b75ac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -457,6 +457,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::USHLSAT: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: + case ISD::MGATHER: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::SMULFIX: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d65c8ad6ff2a0..2702cdcc6d70e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1211,15 +1211,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); } + } - // NEON doesn't support masked loads or stores, but SVE does - for (auto VT : - {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64, - MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, - MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) { - setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::MSTORE, VT, Custom); - } + // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does + for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64, + MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, + MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) { + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Custom); + setOperationAction(ISD::MGATHER, VT, Custom); + setOperationAction(ISD::MSCATTER, VT, Custom); } for (MVT VT : MVT::fp_scalable_vector_valuetypes()) { @@ -1513,7 +1514,9 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::FSUB, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::MSTORE, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::MULHS, VT, Custom); @@ -2228,6 +2231,13 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( // Lowering Code //===----------------------------------------------------------------------===// +// Forward declarations of SVE fixed length lowering helpers +static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT); +static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V); +static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V); +static SDValue convertFixedMaskToScalableVector(SDValue Mask, + SelectionDAG &DAG); + /// isZerosVector - Check whether SDNode N is a zero-filled vector. static bool isZerosVector(const SDNode *N) { // Look through a bit convert. @@ -4232,6 +4242,12 @@ void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT, if (!isNullConstant(BasePtr)) return; + // FIXME: This will not match for fixed vector type codegen as the nodes in + // question will have fixed<->scalable conversions around them. This should be + // moved to a DAG combine or complex pattern so that is executes after all of + // the fixed vector insert and extracts have been removed. This deficiency + // will result in a sub-optimal addressing mode being used, i.e. an ADD not + // being folded into the scatter/gather. ConstantSDNode *Offset = nullptr; if (Index.getOpcode() == ISD::ADD) if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) { @@ -4276,6 +4292,8 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, MaskedGatherSDNode *MGT = cast(Op); assert(MGT && "Can only custom lower gather load nodes"); + bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector(); + SDValue Index = MGT->getIndex(); SDValue Chain = MGT->getChain(); SDValue PassThru = MGT->getPassThru(); @@ -4294,6 +4312,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD; EVT VT = PassThru.getSimpleValueType(); + EVT IndexVT = Index.getSimpleValueType(); EVT MemVT = MGT->getMemoryVT(); SDValue InputVT = DAG.getValueType(MemVT); @@ -4301,14 +4320,27 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, !static_cast(DAG.getSubtarget()).hasBF16()) return SDValue(); - // Handle FP data by using an integer gather and casting the result. - if (VT.isFloatingPoint()) { - EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount()); - PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG); + if (IsFixedLength) { + assert(Subtarget->useSVEForFixedLengthVectors() && + "Cannot lower when not using SVE for fixed vectors"); + IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); + MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); + } + + if (PassThru->isUndef() || isZerosVector(PassThru.getNode())) + PassThru = SDValue(); + + if (VT.isFloatingPoint() && !IsFixedLength) { + // Handle FP data by using an integer gather and casting the result. + if (PassThru) { + EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount()); + PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG); + } InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); } - SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other); + SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other); if (getGatherScatterIndexIsExtended(Index)) Index = Index.getOperand(0); @@ -4320,15 +4352,36 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, if (ResNeedsSignExtend) Opcode = getSignExtendedGatherOpcode(Opcode); - SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru}; - SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops); + if (IsFixedLength) { + if (Index.getSimpleValueType().isFixedLengthVector()) + Index = convertToScalableVector(DAG, IndexVT, Index); + if (BasePtr.getSimpleValueType().isFixedLengthVector()) + BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr); + Mask = convertFixedMaskToScalableVector(Mask, DAG); + } - if (VT.isFloatingPoint()) { - SDValue Cast = getSVESafeBitCast(VT, Gather, DAG); - return DAG.getMergeValues({Cast, Gather.getValue(1)}, DL); + SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT}; + SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops); + Chain = Result.getValue(1); + + if (IsFixedLength) { + Result = convertFromScalableVector( + DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()), + Result); + Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result); + Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); + + if (PassThru) + Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru); + } else { + if (PassThru) + Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru); + + if (VT.isFloatingPoint()) + Result = getSVESafeBitCast(VT, Result, DAG); } - return Gather; + return DAG.getMergeValues({Result, Chain}, DL); } SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, @@ -4337,6 +4390,8 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, MaskedScatterSDNode *MSC = cast(Op); assert(MSC && "Can only custom lower scatter store nodes"); + bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector(); + SDValue Index = MSC->getIndex(); SDValue Chain = MSC->getChain(); SDValue StoreVal = MSC->getValue(); @@ -4353,6 +4408,7 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, Index.getSimpleValueType().getVectorElementType() == MVT::i32; EVT VT = StoreVal.getSimpleValueType(); + EVT IndexVT = Index.getSimpleValueType(); SDVTList VTs = DAG.getVTList(MVT::Other); EVT MemVT = MSC->getMemoryVT(); SDValue InputVT = DAG.getValueType(MemVT); @@ -4361,8 +4417,21 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, !static_cast(DAG.getSubtarget()).hasBF16()) return SDValue(); - // Handle FP data by casting the data so an integer scatter can be used. - if (VT.isFloatingPoint()) { + if (IsFixedLength) { + assert(Subtarget->useSVEForFixedLengthVectors() && + "Cannot lower when not using SVE for fixed vectors"); + IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); + MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); + + StoreVal = + DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal); + StoreVal = DAG.getNode( + ISD::ANY_EXTEND, DL, + VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal); + StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal); + } else if (VT.isFloatingPoint()) { + // Handle FP data by casting the data so an integer scatter can be used. EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount()); StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG); InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); @@ -4375,6 +4444,14 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, /*isGather=*/false, DAG); + if (IsFixedLength) { + if (Index.getSimpleValueType().isFixedLengthVector()) + Index = convertToScalableVector(DAG, IndexVT, Index); + if (BasePtr.getSimpleValueType().isFixedLengthVector()) + BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr); + Mask = convertFixedMaskToScalableVector(Mask, DAG); + } + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT}; return DAG.getNode(Opcode, DL, VTs, Ops); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 5e52c6ece6662..4210a1ef6b9dd 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -243,7 +243,13 @@ class AArch64TTIImpl : public BasicTTIImplBase { } bool isLegalMaskedGatherScatter(Type *DataType) const { - if (isa(DataType) || !ST->hasSVE()) + if (!ST->hasSVE()) + return false; + + // For fixed vectors, scalarize if not using SVE for them. + auto *DataTypeFVTy = dyn_cast(DataType); + if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || + DataTypeFVTy->getNumElements() < 2)) return false; return isLegalElementTypeForSVE(DataType->getScalarType()); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll new file mode 100644 index 0000000000000..5ae9f9ecbb419 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -0,0 +1,1138 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048,VBITS_GE_1024,VBITS_GE_512 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +; +; LD1B +; + +define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) #0 { +; CHECK-LABEL: masked_gather_v2i8: +; CHECK: ldrb [[VALS_LO:w[0-9]+]], [x0] +; CHECK-NEXT: ldrb [[VALS_HI:w[0-9]+]], [x0, #1] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].s, vl2 +; CHECK-NEXT: fmov s[[VALS:[0-9]+]], [[VALS_LO]] +; CHECK-NEXT: mov v[[VALS]].s[1], [[VALS_HI]] +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].2s, v[[VALS]].2s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ld1sb { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d] +; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d +; CHECK-NEXT: mov [[RES_HI:w[0-9]+]], v[[XTN]].s[1] +; CHECK-NEXT: fmov [[RES_LO:w[0-9]+]], s[[XTN]] +; CHECK-NEXT: strb [[RES_LO]], [x0] +; CHECK-NEXT: strb [[RES_HI]], [x0, #1] +; CHECK-NEXT: ret + %cval = load <2 x i8>, <2 x i8>* %a + %ptrs = load <2 x i8*>, <2 x i8*>* %b + %mask = icmp eq <2 x i8> %cval, zeroinitializer + %vals = call <2 x i8> @llvm.masked.gather.v2i8(<2 x i8*> %ptrs, i32 8, <2 x i1> %mask, <2 x i8> undef) + store <2 x i8> %vals, <2 x i8>* %a + ret void +} + +define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) #0 { +; CHECK-LABEL: masked_gather_v4i8: +; CHECK: ldr s[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; CHECK-NEXT: ushll [[SHL:v[0-9]+]].8h, v[[VALS]].8b, #0 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].4h, [[SHL]].4h, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; CHECK-NEXT: ld1sb { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; CHECK-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; CHECK-NEXT: uzp1 v[[UZP3:[0-9]+]].8b, v[[UZP2]].8b, v[[UZP2]].8b +; CHECK-NEXT: str s[[UZP3]], [x0] +; CHECK-NEXT: ret + %cval = load <4 x i8>, <4 x i8>* %a + %ptrs = load <4 x i8*>, <4 x i8*>* %b + %mask = icmp eq <4 x i8> %cval, zeroinitializer + %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*> %ptrs, i32 8, <4 x i1> %mask, <4 x i8> undef) + store <4 x i8> %vals, <4 x i8>* %a + ret void +} + +define void @masked_gather_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 { +; CHECK-LABEL: masked_gather_v8i8: +; VBITS_EQ_256: ldr d[[VALS:[0-9]+]], [x0] +; VBITS_EQ_256-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; VBITS_EQ_256-NEXT: add x8, x1, #32 +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_EQ_256-NEXT: cmeq [[ZMSK:v[0-9]+]].8b, v[[VALS]].8b, #0 +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-NEXT: zip2 [[VAL_HI:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b +; VBITS_EQ_256-NEXT: zip1 [[VAL_LO:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b +; VBITS_EQ_256-NEXT: shl [[SHL_HI:v[0-9]+]].4h, [[VAL_HI]].4h, #8 +; VBITS_EQ_256-NEXT: shl [[SHL_LO:v[0-9]+]].4h, [[VAL_LO]].4h, #8 +; VBITS_EQ_256-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; VBITS_EQ_256-NEXT: sshr v[[SSHR_HI:[0-9]+]].4h, [[SHL_HI]].4h, #8 +; VBITS_EQ_256-NEXT: sshr v[[SSHR_LO:[0-9]+]].4h, [[SHL_LO]].4h, #8 +; VBITS_EQ_256-NEXT: cmpne [[MASK_HI:p[0-9]+]].h, [[PG1]]/z, z[[SSHR_HI]].h, #0 +; VBITS_EQ_256-NEXT: cmpne [[MASK_LO:p[0-9]+]].h, [[PG1]]/z, z[[SSHR_LO]].h, #0 +; VBITS_EQ_256-NEXT: ld1sb { [[RES_HI:z[0-9]+]].d }, [[MASK_HI]]/z, {{\[}}[[PTRS_HI]].d] +; VBITS_EQ_256-NEXT: ld1sb { [[RES_LO:z[0-9]+]].d }, [[MASK_LO]]/z, {{\[}}[[PTRS_LO]].d] +; VBITS_EQ_256-NEXT: uzp1 [[UZP1_HI:z[0-9]+]].s, [[RES_HI]].s, [[RES_HI]].s +; VBITS_EQ_256-NEXT: uzp1 [[UZP1_LO:z[0-9]+]].s, [[RES_LO]].s, [[RES_LO]].s +; VBITS_EQ_256-NEXT: uzp1 z[[UZP2_HI:[0-9]+]].h, [[UZP1_HI]].h, [[UZP1_HI]].h +; VBITS_EQ_256-NEXT: uzp1 z[[UZP2_LO:[0-9]+]].h, [[UZP1_LO]].h, [[UZP1_LO]].h +; VBITS_EQ_256-NEXT: uzp1 v[[UZP3:[0-9]+]].8b, v[[UZP2_LO]].8b, v[[UZP2_HI]].8b +; VBITS_EQ_256-NEXT: str d[[UZP3]], [x0] +; VBITS_EQ_256-NEXT: ret + +; VBITS_GE_512: ldr d[[VALS:[0-9]+]], [x0] +; VBITS_GE_512-NEXT: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].b, vl8 +; VBITS_GE_512-NEXT: cmeq v[[CMP:[0-9]+]].8b, v[[VALS]].8b, #0 +; VBITS_GE_512-NEXT: cmpne [[MASK:p[0-9]+]].b, [[PG1]]/z, z[[CMP]].b, #0 +; VBITS_GE_512-NEXT: ld1b { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_512-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_512-NEXT: uzp1 z[[UZP3:[0-9]+]].b, [[UZP2]].b, [[UZP2]].b +; VBITS_GE_512-NEXT: str d[[UZP3]], [x0] +; VBITS_GE_512-NEXT: ret + %cval = load <8 x i8>, <8 x i8>* %a + %ptrs = load <8 x i8*>, <8 x i8*>* %b + %mask = icmp eq <8 x i8> %cval, zeroinitializer + %vals = call <8 x i8> @llvm.masked.gather.v8i8(<8 x i8*> %ptrs, i32 8, <8 x i1> %mask, <8 x i8> undef) + store <8 x i8> %vals, <8 x i8>* %a + ret void +} + +define void @masked_gather_v16i8(<16 x i8>* %a, <16 x i8*>* %b) #0 { +; CHECK-LABEL: masked_gather_v16i8: +; VBITS_GE_1024: ldr q[[VALS:[0-9]+]], [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG0:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].b, vl16 +; VBITS_GE_1024-NEXT: cmeq v[[CMP:[0-9]+]].16b, v[[VALS]].16b, #0 +; VBITS_GE_1024-NEXT: cmpne [[MASK:p[0-9]+]].b, [[PG1]]/z, z[[CMP]].b, #0 +; VBITS_GE_1024-NEXT: ld1b { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_1024-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_1024-NEXT: uzp1 z[[UZP3:[0-9]+]].b, [[UZP2]].b, [[UZP2]].b +; VBITS_GE_1024-NEXT: str q[[UZP3]], [x0] +; VBITS_GE_1024-NEXT: ret + %cval = load <16 x i8>, <16 x i8>* %a + %ptrs = load <16 x i8*>, <16 x i8*>* %b + %mask = icmp eq <16 x i8> %cval, zeroinitializer + %vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x i8*> %ptrs, i32 8, <16 x i1> %mask, <16 x i8> undef) + store <16 x i8> %vals, <16 x i8>* %a + ret void +} + +define void @masked_gather_v32i8(<32 x i8>* %a, <32 x i8*>* %b) #0 { +; CHECK-LABEL: masked_gather_v32i8: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].b, vl32 +; VBITS_GE_2048-NEXT: ld1b { [[VALS:z[0-9]+]].b }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq [[MASK:p[0-9]+]].b, [[PG0]]/z, [[VALS]].b, #0 +; VBITS_GE_2048-NEXT: ld1b { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_2048-NEXT: uzp1 [[UZP3:z[0-9]+]].b, [[UZP2]].b, [[UZP2]].b +; VBITS_GE_2048-NEXT: st1b { [[UZP3]].b }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cval = load <32 x i8>, <32 x i8>* %a + %ptrs = load <32 x i8*>, <32 x i8*>* %b + %mask = icmp eq <32 x i8> %cval, zeroinitializer + %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*> %ptrs, i32 8, <32 x i1> %mask, <32 x i8> undef) + store <32 x i8> %vals, <32 x i8>* %a + ret void +} + +; +; LD1H +; + +define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) #0 { +; CHECK-LABEL: masked_gather_v2i16: +; CHECK: ldrh [[VALS_LO:w[0-9]+]], [x0] +; CHECK-NEXT: ldrh [[VALS_HI:w[0-9]+]], [x0, #2] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].s, vl2 +; CHECK-NEXT: fmov s[[VALS:[0-9]+]], [[VALS_LO]] +; CHECK-NEXT: mov v[[VALS]].s[1], [[VALS_HI]] +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].2s, v[[VALS]].2s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ld1sh { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d] +; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d +; CHECK-NEXT: mov [[RES_HI:w[0-9]+]], v[[XTN]].s[1] +; CHECK-NEXT: fmov [[RES_LO:w[0-9]+]], s[[XTN]] +; CHECK-NEXT: strh [[RES_LO]], [x0] +; CHECK-NEXT: strh [[RES_HI]], [x0, #2] +; CHECK-NEXT: ret + %cval = load <2 x i16>, <2 x i16>* %a + %ptrs = load <2 x i16*>, <2 x i16*>* %b + %mask = icmp eq <2 x i16> %cval, zeroinitializer + %vals = call <2 x i16> @llvm.masked.gather.v2i16(<2 x i16*> %ptrs, i32 8, <2 x i1> %mask, <2 x i16> undef) + store <2 x i16> %vals, <2 x i16>* %a + ret void +} + +define void @masked_gather_v4i16(<4 x i16>* %a, <4 x i16*>* %b) #0 { +; CHECK-LABEL: masked_gather_v4i16: +; CHECK: ldr d[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].4h, v[[VALS]].4h, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; CHECK-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; CHECK-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; CHECK-NEXT: str d[[UZP2]], [x0] +; CHECK-NEXT: ret + %cval = load <4 x i16>, <4 x i16>* %a + %ptrs = load <4 x i16*>, <4 x i16*>* %b + %mask = icmp eq <4 x i16> %cval, zeroinitializer + %vals = call <4 x i16> @llvm.masked.gather.v4i16(<4 x i16*> %ptrs, i32 8, <4 x i1> %mask, <4 x i16> undef) + store <4 x i16> %vals, <4 x i16>* %a + ret void +} + +define void @masked_gather_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 { +; CHECK-LABEL: masked_gather_v8i16: +; VBITS_EQ_256: ldr q[[VALS:[0-9]+]], [x0] +; VBITS_EQ_256-NEXT: add x8, x1, #32 +; VBITS_EQ_256-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_EQ_256-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; VBITS_EQ_256-NEXT: cmeq v[[ZMSK:[0-9]+]].8h, v[[VALS]].8h, #0 +; VBITS_EQ_256-DAG: cmpne [[MASK_LO:p[0-9]+]].h, [[PG1]]/z, z[[ZMSK]].h, #0 +; VBITS_EQ_256-DAG: ext v[[ZEXT:[0-9]+]].16b, v[[ZMSK]].16b, v[[ZMSK]].16b, #8 +; VBITS_EQ_256-DAG: cmpne [[MASK_HI:p[0-9]+]].h, [[PG1]]/z, z[[ZEXT]].h, #0 +; VBITS_EQ_256-DAG: ld1h { [[RES_LO:z[0-9]+]].d }, [[MASK_LO]]/z, {{\[}}[[PTRS_LO]].d] +; VBITS_EQ_256-DAG: ld1h { [[RES_HI:z[0-9]+]].d }, [[MASK_HI]]/z, {{\[}}[[PTRS_HI]].d] +; VBITS_EQ_256-NEXT: uzp1 [[UZP1_LO:z[0-9]+]].s, [[RES_LO]].s, [[RES_LO]].s +; VBITS_EQ_256-NEXT: uzp1 z[[UZP2_LO:[0-9]+]].h, [[UZP1_LO]].h, [[UZP1_LO]].h +; VBITS_EQ_256-NEXT: uzp1 [[UZP1_HI:z[0-9]+]].s, [[RES_HI]].s, [[RES_HI]].s +; VBITS_EQ_256-NEXT: uzp1 z[[UZP2_HI:[0-9]+]].h, [[UZP1_HI]].h, [[UZP1_HI]].h +; VBITS_EQ_256-NEXT: mov v[[UZP2_LO]].d[1], v[[UZP2_HI]].d[0] +; VBITS_EQ_256-NEXT: str q[[UZP2_LO]], [x0] +; VBITS_EQ_256-NEXT: ret + +; VBITS_GE_512: ldr q[[VALS:[0-9]+]], [x0] +; VBITS_GE_512-NEXT: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].h, vl8 +; VBITS_GE_512-NEXT: cmeq v[[CMP:[0-9]+]].8h, v[[VALS]].8h, #0 +; VBITS_GE_512-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; VBITS_GE_512-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_512-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_512-NEXT: str q[[UZP2]], [x0] +; VBITS_GE_512-NEXT: ret + %cval = load <8 x i16>, <8 x i16>* %a + %ptrs = load <8 x i16*>, <8 x i16*>* %b + %mask = icmp eq <8 x i16> %cval, zeroinitializer + %vals = call <8 x i16> @llvm.masked.gather.v8i16(<8 x i16*> %ptrs, i32 8, <8 x i1> %mask, <8 x i16> undef) + store <8 x i16> %vals, <8 x i16>* %a + ret void +} + +define void @masked_gather_v16i16(<16 x i16>* %a, <16 x i16*>* %b) #0 { +; CHECK-LABEL: masked_gather_v16i16: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].h, vl16 +; VBITS_GE_1024-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_1024-NEXT: cmpeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0 +; VBITS_GE_1024-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_1024-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_1024-NEXT: st1h { [[UZP2]].h }, [[PG0]], [x0] +; VBITS_GE_1024-NEXT: ret + %cval = load <16 x i16>, <16 x i16>* %a + %ptrs = load <16 x i16*>, <16 x i16*>* %b + %mask = icmp eq <16 x i16> %cval, zeroinitializer + %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*> %ptrs, i32 8, <16 x i1> %mask, <16 x i16> undef) + store <16 x i16> %vals, <16 x i16>* %a + ret void +} + +define void @masked_gather_v32i16(<32 x i16>* %a, <32 x i16*>* %b) #0 { +; CHECK-LABEL: masked_gather_v32i16: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0 +; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_2048-NEXT: st1h { [[UZP2]].h }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cval = load <32 x i16>, <32 x i16>* %a + %ptrs = load <32 x i16*>, <32 x i16*>* %b + %mask = icmp eq <32 x i16> %cval, zeroinitializer + %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*> %ptrs, i32 8, <32 x i1> %mask, <32 x i16> undef) + store <32 x i16> %vals, <32 x i16>* %a + ret void +} + +; +; LD1W +; + +define void @masked_gather_v2i32(<2 x i32>* %a, <2 x i32*>* %b) #0 { +; CHECK-LABEL: masked_gather_v2i32: +; CHECK: ldr d[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].s, vl2 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].2s, v[[VALS]].2s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ld1w { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d] +; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d +; CHECK-NEXT: str d[[XTN]], [x0] +; CHECK-NEXT: ret + %cval = load <2 x i32>, <2 x i32>* %a + %ptrs = load <2 x i32*>, <2 x i32*>* %b + %mask = icmp eq <2 x i32> %cval, zeroinitializer + %vals = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 8, <2 x i1> %mask, <2 x i32> undef) + store <2 x i32> %vals, <2 x i32>* %a + ret void +} + +define void @masked_gather_v4i32(<4 x i32>* %a, <4 x i32*>* %b) #0 { +; CHECK-LABEL: masked_gather_v4i32: +; CHECK: ldr q[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl4 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].4s, v[[VALS]].4s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG1]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; CHECK-NEXT: uzp1 z[[UZP:[0-9]+]].s, [[RES]].s, [[RES]].s +; CHECK-NEXT: str q[[UZP]], [x0] +; CHECK-NEXT: ret + %cval = load <4 x i32>, <4 x i32>* %a + %ptrs = load <4 x i32*>, <4 x i32*>* %b + %mask = icmp eq <4 x i32> %cval, zeroinitializer + %vals = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 8, <4 x i1> %mask, <4 x i32> undef) + store <4 x i32> %vals, <4 x i32>* %a + ret void +} + +define void @masked_gather_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 { +; CHECK-LABEL: masked_gather_v8i32: +; VBITS_EQ_256: ptrue [[PG0:p[0-9]+]].s, vl8 +; VBITS_EQ_256-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_EQ_256-NEXT: ptrue [[PG1:p[0-9]+]].d, vl4 +; VBITS_EQ_256-NEXT: add x8, x1, #32 +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x8] +; VBITS_EQ_256-NEXT: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 +; VBITS_EQ_256-NEXT: mov x8, sp +; VBITS_EQ_256-NEXT: mov [[MONE:z[0-9]+]].s, p1/z, #-1 +; VBITS_EQ_256-NEXT: st1w { [[MONE]].s }, [[PG0]], [x8] +; VBITS_EQ_256-NEXT: ldr q[[CMP_HI:[0-9]+]], [sp, #16] +; VBITS_EQ_256-NEXT: ptrue [[PG2:p[0-9]+]].s, vl4 +; VBITS_EQ_256-NEXT: cmpne [[MASK_HI:p[0-9]+]].s, [[PG2]]/z, z[[CMP_HI]].s, #0 +; VBITS_EQ_256-NEXT: ld1w { [[RES_HI:z[0-9]+]].d }, [[MASK_HI]]/z, {{\[}}[[PTRS_HI]].d] +; VBITS_EQ_256-NEXT: ldr q[[CMP_LO:[0-9]+]], [sp] +; VBITS_EQ_256-NEXT: uzp1 [[UZP_HI:z[0-9]+]].s, [[RES_HI]].s, [[RES_HI]].s +; VBITS_EQ_256-NEXT: cmpne [[MASK_LO:p[0-9]+]].s, [[PG2]]/z, z[[CMP_LO]].s, #0 +; VBITS_EQ_256-NEXT: ld1w { [[RES_LO:z[0-9]+]].d }, [[MASK_LO]]/z, {{\[}}[[PTRS_LO]].d] +; VBITS_EQ_256-NEXT: uzp1 [[UZP_LO:z[0-9]+]].s, [[RES_LO]].s, [[RES_LO]].s +; VBITS_EQ_256-NEXT: splice [[RES:z[0-9]+]].s, [[PG1]], [[RES_LO]].s, [[RES_HI]].s +; VBITS_EQ_256-NEXT: st1w { [[RES]].s }, [[PG0]], [x0] + +; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].s, vl8 +; VBITS_GE_512-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_512-NEXT: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 +; VBITS_GE_512-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_512-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_512-NEXT: ret + %cval = load <8 x i32>, <8 x i32>* %a + %ptrs = load <8 x i32*>, <8 x i32*>* %b + %mask = icmp eq <8 x i32> %cval, zeroinitializer + %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 8, <8 x i1> %mask, <8 x i32> undef) + store <8 x i32> %vals, <8 x i32>* %a + ret void +} + +define void @masked_gather_v16i32(<16 x i32>* %a, <16 x i32*>* %b) #0 { +; CHECK-LABEL: masked_gather_v16i32: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].s, vl16 +; VBITS_GE_1024-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_1024-NEXT: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 +; VBITS_GE_1024-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_1024-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_1024-NEXT: ret + %cval = load <16 x i32>, <16 x i32>* %a + %ptrs = load <16 x i32*>, <16 x i32*>* %b + %mask = icmp eq <16 x i32> %cval, zeroinitializer + %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 8, <16 x i1> %mask, <16 x i32> undef) + store <16 x i32> %vals, <16 x i32>* %a + ret void +} + +define void @masked_gather_v32i32(<32 x i32>* %a, <32 x i32*>* %b) #0 { +; CHECK-LABEL: masked_gather_v32i32: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 +; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cval = load <32 x i32>, <32 x i32>* %a + %ptrs = load <32 x i32*>, <32 x i32*>* %b + %mask = icmp eq <32 x i32> %cval, zeroinitializer + %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*> %ptrs, i32 8, <32 x i1> %mask, <32 x i32> undef) + store <32 x i32> %vals, <32 x i32>* %a + ret void +} + +; +; LD1D +; + +; Scalarize 1 x i64 gathers +define void @masked_gather_v1i64(<1 x i64>* %a, <1 x i64*>* %b) #0 { +; CHECK-LABEL: masked_gather_v1i64: +; CHECK-NOT: ptrue + %cval = load <1 x i64>, <1 x i64>* %a + %ptrs = load <1 x i64*>, <1 x i64*>* %b + %mask = icmp eq <1 x i64> %cval, zeroinitializer + %vals = call <1 x i64> @llvm.masked.gather.v1i64(<1 x i64*> %ptrs, i32 8, <1 x i1> %mask, <1 x i64> undef) + store <1 x i64> %vals, <1 x i64>* %a + ret void +} + +define void @masked_gather_v2i64(<2 x i64>* %a, <2 x i64*>* %b) #0 { +; CHECK-LABEL: masked_gather_v2i64: +; CHECK: ldr q[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl2 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].2d, v[[VALS]].2d, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].d, [[PG0]]/z, z[[CMP]].d, #0 +; CHECK-NEXT: ld1d { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d] +; CHECK-NEXT: str q[[RES]], [x0] +; CHECK-NEXT: ret + %cval = load <2 x i64>, <2 x i64>* %a + %ptrs = load <2 x i64*>, <2 x i64*>* %b + %mask = icmp eq <2 x i64> %cval, zeroinitializer + %vals = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> undef) + store <2 x i64> %vals, <2 x i64>* %a + ret void +} + +define void @masked_gather_v4i64(<4 x i64>* %a, <4 x i64*>* %b) #0 { +; CHECK-LABEL: masked_gather_v4i64: +; CHECK: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: cmpeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0 +; CHECK-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; CHECK-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] +; CHECK-NEXT: ret + %cval = load <4 x i64>, <4 x i64>* %a + %ptrs = load <4 x i64*>, <4 x i64*>* %b + %mask = icmp eq <4 x i64> %cval, zeroinitializer + %vals = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 8, <4 x i1> %mask, <4 x i64> undef) + store <4 x i64> %vals, <4 x i64>* %a + ret void +} + +define void @masked_gather_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 { +; CHECK-LABEL: masked_gather_v8i64: +; VBITS_EQ_256: add x8, x0, #32 +; VBITS_EQ_256-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; VBITS_EQ_256-NEXT: add x9, x1, #32 +; VBITS_EQ_256-NEXT: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-NEXT: ld1d { [[VALS_LO:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x9] +; VBITS_EQ_256-NEXT: cmpeq [[MASK_HI:p[0-9]+]].d, [[PG0]]/z, [[VALS_HI]].d, #0 +; VBITS_EQ_256-NEXT: cmpeq [[MASK_LO:p[0-9]+]].d, [[PG0]]/z, [[VALS_LO]].d, #0 +; VBITS_EQ_256-NEXT: ld1d { [[RES_HI:z[0-9]+]].d }, [[MASK_HI]]/z, {{\[}}[[PTRS_HI]].d] +; VBITS_EQ_256-NEXT: ld1d { [[RES_LO:z[0-9]+]].d }, [[MASK_LO]]/z, {{\[}}[[PTRS_LO]].d] +; VBITS_EQ_256-NEXT: st1d { [[RES_HI]].d }, [[PG0]], [x8] +; VBITS_EQ_256-NEXT: st1d { [[RES_LO]].d }, [[PG0]], [x0] +; VBITS_EQ_256-NEXT: ret + +; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: cmpeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0 +; VBITS_GE_512-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] +; VBITS_GE_512-NEXT: ret + %cval = load <8 x i64>, <8 x i64>* %a + %ptrs = load <8 x i64*>, <8 x i64*>* %b + %mask = icmp eq <8 x i64> %cval, zeroinitializer + %vals = call <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*> %ptrs, i32 8, <8 x i1> %mask, <8 x i64> undef) + store <8 x i64> %vals, <8 x i64>* %a + ret void +} + +define void @masked_gather_v16i64(<16 x i64>* %a, <16 x i64*>* %b) #0 { +; CHECK-LABEL: masked_gather_v16i64: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_1024-NEXT: cmpeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0 +; VBITS_GE_1024-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] +; VBITS_GE_1024-NEXT: ret + %cval = load <16 x i64>, <16 x i64>* %a + %ptrs = load <16 x i64*>, <16 x i64*>* %b + %mask = icmp eq <16 x i64> %cval, zeroinitializer + %vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 8, <16 x i1> %mask, <16 x i64> undef) + store <16 x i64> %vals, <16 x i64>* %a + ret void +} + +define void @masked_gather_v32i64(<32 x i64>* %a, <32 x i64*>* %b) #0 { +; CHECK-LABEL: masked_gather_v32i64: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0 +; VBITS_GE_2048-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cval = load <32 x i64>, <32 x i64>* %a + %ptrs = load <32 x i64*>, <32 x i64*>* %b + %mask = icmp eq <32 x i64> %cval, zeroinitializer + %vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x i64*> %ptrs, i32 8, <32 x i1> %mask, <32 x i64> undef) + store <32 x i64> %vals, <32 x i64>* %a + ret void +} + +; +; LD1H (float) +; + +define void @masked_gather_v2f16(<2 x half>* %a, <2 x half*>* %b) #0 { +; CHECK-LABEL: masked_gather_v2f16: +; CHECK: ldr s[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: movi d[[ZERO:[0-9]+]], #0000000000000000 +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].h, vl4 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].4h, v[[VALS]].4h, #0.0 +; CHECK-NEXT: umov w8, v[[CMP]].h[0] +; CHECK-NEXT: umov w9, v[[CMP]].h[1] +; CHECK-NEXT: fmov s[[CMP]], w8 +; CHECK-NEXT: mov v[[CMP]].s[1], w9 +; CHECK-NEXT: shl v[[CMP]].2s, v[[CMP]].2s, #16 +; CHECK-NEXT: sshr v[[CMP]].2s, v[[CMP]].2s, #16 +; CHECK-NEXT: fmov w9, s[[CMP]] +; CHECK-NEXT: mov w8, v[[CMP]].s[1] +; CHECK-NEXT: mov v[[NCMP:[0-9]+]].h[0], w9 +; CHECK-NEXT: mov v[[NCMP]].h[1], w8 +; CHECK-NEXT: shl v[[SHL:[0-9]+]].4h, v[[NCMP]].4h, #15 +; CHECK-NEXT: sshr v[[SHL]].4h, v[[SHL]].4h, #15 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG0]]/z, z[[SHL]].h, #0 +; CHECK-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d] +; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; CHECK-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; CHECK-NEXT: str s[[UZP2]], [x0] +; CHECK-NEXT: ret + %cval = load <2 x half>, <2 x half>* %a + %ptrs = load <2 x half*>, <2 x half*>* %b + %mask = fcmp oeq <2 x half> %cval, zeroinitializer + %vals = call <2 x half> @llvm.masked.gather.v2f16(<2 x half*> %ptrs, i32 8, <2 x i1> %mask, <2 x half> undef) + store <2 x half> %vals, <2 x half>* %a + ret void +} + +define void @masked_gather_v4f16(<4 x half>* %a, <4 x half*>* %b) #0 { +; CHECK-LABEL: masked_gather_v4f16: +; CHECK: ldr d[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].4h, v[[VALS]].4h, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; CHECK-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; CHECK-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; CHECK-NEXT: str d[[UZP2]], [x0] +; CHECK-NEXT: ret + %cval = load <4 x half>, <4 x half>* %a + %ptrs = load <4 x half*>, <4 x half*>* %b + %mask = fcmp oeq <4 x half> %cval, zeroinitializer + %vals = call <4 x half> @llvm.masked.gather.v4f16(<4 x half*> %ptrs, i32 8, <4 x i1> %mask, <4 x half> undef) + store <4 x half> %vals, <4 x half>* %a + ret void +} + +define void @masked_gather_v8f16(<8 x half>* %a, <8 x half*>* %b) #0 { +; CHECK-LABEL: masked_gather_v8f16: +; VBITS_GE_512: ldr q[[VALS:[0-9]+]], [x0] +; VBITS_GE_512-NEXT: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].h, vl8 +; VBITS_GE_512-NEXT: fcmeq v[[CMP:[0-9]+]].8h, v[[VALS]].8h, #0 +; VBITS_GE_512-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; VBITS_GE_512-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_512-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_512-NEXT: str q[[UZP2]], [x0] +; VBITS_GE_512-NEXT: ret + %cval = load <8 x half>, <8 x half>* %a + %ptrs = load <8 x half*>, <8 x half*>* %b + %mask = fcmp oeq <8 x half> %cval, zeroinitializer + %vals = call <8 x half> @llvm.masked.gather.v8f16(<8 x half*> %ptrs, i32 8, <8 x i1> %mask, <8 x half> undef) + store <8 x half> %vals, <8 x half>* %a + ret void +} + +define void @masked_gather_v16f16(<16 x half>* %a, <16 x half*>* %b) #0 { +; CHECK-LABEL: masked_gather_v16f16: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].h, vl16 +; VBITS_GE_1024-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]].h +; VBITS_GE_1024-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_1024-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_1024-NEXT: st1h { [[UZP2]].h }, [[PG0]], [x0] +; VBITS_GE_1024-NEXT: ret + %cval = load <16 x half>, <16 x half>* %a + %ptrs = load <16 x half*>, <16 x half*>* %b + %mask = fcmp oeq <16 x half> %cval, zeroinitializer + %vals = call <16 x half> @llvm.masked.gather.v16f16(<16 x half*> %ptrs, i32 8, <16 x i1> %mask, <16 x half> undef) + store <16 x half> %vals, <16 x half>* %a + ret void +} + +define void @masked_gather_v32f16(<32 x half>* %a, <32 x half*>* %b) #0 { +; CHECK-LABEL: masked_gather_v32f16: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]].h +; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: uzp1 [[UZP2:z[0-9]+]].h, [[UZP1]].h, [[UZP1]].h +; VBITS_GE_2048-NEXT: st1h { [[UZP2]].h }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cval = load <32 x half>, <32 x half>* %a + %ptrs = load <32 x half*>, <32 x half*>* %b + %mask = fcmp oeq <32 x half> %cval, zeroinitializer + %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef) + store <32 x half> %vals, <32 x half>* %a + ret void +} + +; +; LD1W (float) +; + +define void @masked_gather_v2f32(<2 x float>* %a, <2 x float*>* %b) #0 { +; CHECK-LABEL: masked_gather_v2f32: +; CHECK: ldr d[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].s, vl2 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].2s, v[[VALS]].2s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ld1w { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d] +; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d +; CHECK-NEXT: str d[[XTN]], [x0] +; CHECK-NEXT: ret + %cval = load <2 x float>, <2 x float>* %a + %ptrs = load <2 x float*>, <2 x float*>* %b + %mask = fcmp oeq <2 x float> %cval, zeroinitializer + %vals = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ptrs, i32 8, <2 x i1> %mask, <2 x float> undef) + store <2 x float> %vals, <2 x float>* %a + ret void +} + +define void @masked_gather_v4f32(<4 x float>* %a, <4 x float*>* %b) #0 { +; CHECK-LABEL: masked_gather_v4f32: +; CHECK: ldr q[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl4 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].4s, v[[VALS]].4s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG1]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; CHECK-NEXT: uzp1 z[[UZP:[0-9]+]].s, [[RES]].s, [[RES]].s +; CHECK-NEXT: str q[[UZP]], [x0] +; CHECK-NEXT: ret + %cval = load <4 x float>, <4 x float>* %a + %ptrs = load <4 x float*>, <4 x float*>* %b + %mask = fcmp oeq <4 x float> %cval, zeroinitializer + %vals = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %ptrs, i32 8, <4 x i1> %mask, <4 x float> undef) + store <4 x float> %vals, <4 x float>* %a + ret void +} + +define void @masked_gather_v8f32(<8 x float>* %a, <8 x float*>* %b) #0 { +; CHECK-LABEL: masked_gather_v8f32: +; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].s, vl8 +; VBITS_GE_512-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s +; VBITS_GE_512-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_512-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_512-NEXT: ret + %cval = load <8 x float>, <8 x float>* %a + %ptrs = load <8 x float*>, <8 x float*>* %b + %mask = fcmp oeq <8 x float> %cval, zeroinitializer + %vals = call <8 x float> @llvm.masked.gather.v8f32(<8 x float*> %ptrs, i32 8, <8 x i1> %mask, <8 x float> undef) + store <8 x float> %vals, <8 x float>* %a + ret void +} + +define void @masked_gather_v16f32(<16 x float>* %a, <16 x float*>* %b) #0 { +; CHECK-LABEL: masked_gather_v16f32: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].s, vl16 +; VBITS_GE_1024-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s +; VBITS_GE_1024-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_1024-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_1024-NEXT: ret + %cval = load <16 x float>, <16 x float>* %a + %ptrs = load <16 x float*>, <16 x float*>* %b + %mask = fcmp oeq <16 x float> %cval, zeroinitializer + %vals = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %ptrs, i32 8, <16 x i1> %mask, <16 x float> undef) + store <16 x float> %vals, <16 x float>* %a + ret void +} + +define void @masked_gather_v32f32(<32 x float>* %a, <32 x float*>* %b) #0 { +; CHECK-LABEL: masked_gather_v32f32: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]].s +; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cval = load <32 x float>, <32 x float>* %a + %ptrs = load <32 x float*>, <32 x float*>* %b + %mask = fcmp oeq <32 x float> %cval, zeroinitializer + %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef) + store <32 x float> %vals, <32 x float>* %a + ret void +} + +; +; LD1D (float) +; + +; Scalarize 1 x double gathers +define void @masked_gather_v1f64(<1 x double>* %a, <1 x double*>* %b) #0 { +; CHECK-LABEL: masked_gather_v1f64: +; CHECK-NOT: ptrue + %cval = load <1 x double>, <1 x double>* %a + %ptrs = load <1 x double*>, <1 x double*>* %b + %mask = fcmp oeq <1 x double> %cval, zeroinitializer + %vals = call <1 x double> @llvm.masked.gather.v1f64(<1 x double*> %ptrs, i32 8, <1 x i1> %mask, <1 x double> undef) + store <1 x double> %vals, <1 x double>* %a + ret void +} + +define void @masked_gather_v2f64(<2 x double>* %a, <2 x double*>* %b) #0 { +; CHECK-LABEL: masked_gather_v2f64: +; CHECK: ldr q[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl2 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].2d, v[[VALS]].2d, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].d, [[PG0]]/z, z[[CMP]].d, #0 +; CHECK-NEXT: ld1d { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d] +; CHECK-NEXT: str q[[RES]], [x0] +; CHECK-NEXT: ret + %cval = load <2 x double>, <2 x double>* %a + %ptrs = load <2 x double*>, <2 x double*>* %b + %mask = fcmp oeq <2 x double> %cval, zeroinitializer + %vals = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 8, <2 x i1> %mask, <2 x double> undef) + store <2 x double> %vals, <2 x double>* %a + ret void +} + +define void @masked_gather_v4f64(<4 x double>* %a, <4 x double*>* %b) #0 { +; CHECK-LABEL: masked_gather_v4f64: +; CHECK: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: mov [[ZERO:z[0-9]+]].d, #0 +; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; CHECK-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; CHECK-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] +; CHECK-NEXT: ret + %cval = load <4 x double>, <4 x double>* %a + %ptrs = load <4 x double*>, <4 x double*>* %b + %mask = fcmp oeq <4 x double> %cval, zeroinitializer + %vals = call <4 x double> @llvm.masked.gather.v4f64(<4 x double*> %ptrs, i32 8, <4 x i1> %mask, <4 x double> undef) + store <4 x double> %vals, <4 x double>* %a + ret void +} + +define void @masked_gather_v8f64(<8 x double>* %a, <8 x double*>* %b) #0 { +; CHECK-LABEL: masked_gather_v8f64: +; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].d, #0 +; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; VBITS_GE_512-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] +; VBITS_GE_512-NEXT: ret + %cval = load <8 x double>, <8 x double>* %a + %ptrs = load <8 x double*>, <8 x double*>* %b + %mask = fcmp oeq <8 x double> %cval, zeroinitializer + %vals = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs, i32 8, <8 x i1> %mask, <8 x double> undef) + store <8 x double> %vals, <8 x double>* %a + ret void +} + +define void @masked_gather_v16f64(<16 x double>* %a, <16 x double*>* %b) #0 { +; CHECK-LABEL: masked_gather_v16f64: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].d, #0 +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; VBITS_GE_1024-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] +; VBITS_GE_1024-NEXT: ret + %cval = load <16 x double>, <16 x double>* %a + %ptrs = load <16 x double*>, <16 x double*>* %b + %mask = fcmp oeq <16 x double> %cval, zeroinitializer + %vals = call <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32 8, <16 x i1> %mask, <16 x double> undef) + store <16 x double> %vals, <16 x double>* %a + ret void +} + +define void @masked_gather_v32f64(<32 x double>* %a, <32 x double*>* %b) #0 { +; CHECK-LABEL: masked_gather_v32f64: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].d, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; VBITS_GE_2048-NEXT: ld1d { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cval = load <32 x double>, <32 x double>* %a + %ptrs = load <32 x double*>, <32 x double*>* %b + %mask = fcmp oeq <32 x double> %cval, zeroinitializer + %vals = call <32 x double> @llvm.masked.gather.v32f64(<32 x double*> %ptrs, i32 8, <32 x i1> %mask, <32 x double> undef) + store <32 x double> %vals, <32 x double>* %a + ret void +} + +; The above tests test the types, the below tests check that the addressing +; modes still function + +define void @masked_gather_32b_scaled_sext(<32 x half>* %a, <32 x i32>* %b, half* %base) #0 { +; CHECK-LABEL: masked_gather_32b_scaled_sext: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, sxtw #1] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h +; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x half>, <32 x half>* %a + %idxs = load <32 x i32>, <32 x i32>* %b + %ext = sext <32 x i32> %idxs to <32 x i64> + %ptrs = getelementptr half, half* %base, <32 x i64> %ext + %mask = fcmp oeq <32 x half> %cvals, zeroinitializer + %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef) + store <32 x half> %vals, <32 x half>* %a + ret void +} + +define void @masked_gather_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, half* %base) #0 { +; CHECK-LABEL: masked_gather_32b_scaled_zext: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, uxtw #1] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h +; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x half>, <32 x half>* %a + %idxs = load <32 x i32>, <32 x i32>* %b + %ext = zext <32 x i32> %idxs to <32 x i64> + %ptrs = getelementptr half, half* %base, <32 x i64> %ext + %mask = fcmp oeq <32 x half> %cvals, zeroinitializer + %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef) + store <32 x half> %vals, <32 x half>* %a + ret void +} + +define void @masked_gather_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i8* %base) #0 { +; CHECK-LABEL: masked_gather_32b_unscaled_sext: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, sxtw] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h +; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x half>, <32 x half>* %a + %idxs = load <32 x i32>, <32 x i32>* %b + %ext = sext <32 x i32> %idxs to <32 x i64> + %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %ext + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x half*> + %mask = fcmp oeq <32 x half> %cvals, zeroinitializer + %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef) + store <32 x half> %vals, <32 x half>* %a + ret void +} + +define void @masked_gather_32b_unscaled_zext(<32 x half>* %a, <32 x i32>* %b, i8* %base) #0 { +; CHECK-LABEL: masked_gather_32b_unscaled_zext: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1h { [[RES:z[0-9]+]].s }, [[MASK]]/z, [x2, [[PTRS]].s, uxtw] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].h, [[RES]].h, [[RES]].h +; VBITS_GE_2048-NEXT: st1h { [[UZP]].h }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x half>, <32 x half>* %a + %idxs = load <32 x i32>, <32 x i32>* %b + %ext = zext <32 x i32> %idxs to <32 x i64> + %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %ext + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x half*> + %mask = fcmp oeq <32 x half> %cvals, zeroinitializer + %vals = call <32 x half> @llvm.masked.gather.v32f16(<32 x half*> %ptrs, i32 8, <32 x i1> %mask, <32 x half> undef) + store <32 x half> %vals, <32 x half>* %a + ret void +} + +define void @masked_gather_64b_scaled(<32 x float>* %a, <32 x i64>* %b, float* %base) #0 { +; CHECK-LABEL: masked_gather_64b_scaled: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, [x2, [[PTRS]].d, lsl #2] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x float>, <32 x float>* %a + %idxs = load <32 x i64>, <32 x i64>* %b + %ptrs = getelementptr float, float* %base, <32 x i64> %idxs + %mask = fcmp oeq <32 x float> %cvals, zeroinitializer + %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef) + store <32 x float> %vals, <32 x float>* %a + ret void +} + +define void @masked_gather_64b_unscaled(<32 x float>* %a, <32 x i64>* %b, i8* %base) #0 { +; CHECK-LABEL: masked_gather_64b_unscaled: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, [x2, [[PTRS]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x float>, <32 x float>* %a + %idxs = load <32 x i64>, <32 x i64>* %b + %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %idxs + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*> + %mask = fcmp oeq <32 x float> %cvals, zeroinitializer + %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef) + store <32 x float> %vals, <32 x float>* %a + ret void +} + +; FIXME: This case does not yet codegen well due to deficiencies in opcode selection +define void @masked_gather_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %off) #0 { +; CHECK-LABEL: masked_gather_vec_plus_reg: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, x2 +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS_ADD]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x float>, <32 x float>* %a + %bases = load <32 x i8*>, <32 x i8*>* %b + %byte_ptrs = getelementptr i8, <32 x i8*> %bases, i64 %off + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*> + %mask = fcmp oeq <32 x float> %cvals, zeroinitializer + %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef) + store <32 x float> %vals, <32 x float>* %a + ret void +} + +; FIXME: This case does not yet codegen well due to deficiencies in opcode selection +define void @masked_gather_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) #0 { +; CHECK-LABEL: masked_gather_vec_plus_imm: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, #4 +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS_ADD]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x float>, <32 x float>* %a + %bases = load <32 x i8*>, <32 x i8*>* %b + %byte_ptrs = getelementptr i8, <32 x i8*> %bases, i64 4 + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*> + %mask = fcmp oeq <32 x float> %cvals, zeroinitializer + %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> undef) + store <32 x float> %vals, <32 x float>* %a + ret void +} + +define void @masked_gather_passthru(<32 x float>* %a, <32 x float*>* %b, <32 x float>* %c) #0 { +; CHECK-LABEL: masked_gather_passthru: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1w { [[PT:z[0-9]+]].s }, [[PG0]]/z, [x2] +; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: sel [[SEL:z[0-9]+]].s, [[PG1]], [[UZP]].s, [[PT]].s +; VBITS_GE_2048-NEXT: st1w { [[SEL]].s }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x float>, <32 x float>* %a + %ptrs = load <32 x float*>, <32 x float*>* %b + %passthru = load <32 x float>, <32 x float>* %c + %mask = fcmp oeq <32 x float> %cvals, zeroinitializer + %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> %passthru) + store <32 x float> %vals, <32 x float>* %a + ret void +} + +define void @masked_gather_passthru_0(<32 x float>* %a, <32 x float*>* %b) #0 { +; CHECK-LABEL: masked_gather_passthru_0: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: ld1w { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[RES]].s, [[RES]].s +; VBITS_GE_2048-NEXT: st1w { [[UZP]].s }, [[PG0]], [x0] +; VBITS_GE_2048-NEXT: ret + %cvals = load <32 x float>, <32 x float>* %a + %ptrs = load <32 x float*>, <32 x float*>* %b + %mask = fcmp oeq <32 x float> %cvals, zeroinitializer + %vals = call <32 x float> @llvm.masked.gather.v32f32(<32 x float*> %ptrs, i32 8, <32 x i1> %mask, <32 x float> zeroinitializer) + store <32 x float> %vals, <32 x float>* %a + ret void +} + +declare <2 x i8> @llvm.masked.gather.v2i8(<2 x i8*>, i32, <2 x i1>, <2 x i8>) +declare <4 x i8> @llvm.masked.gather.v4i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.gather.v8i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>) +declare <16 x i8> @llvm.masked.gather.v16i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>) +declare <32 x i8> @llvm.masked.gather.v32i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>) + +declare <2 x i16> @llvm.masked.gather.v2i16(<2 x i16*>, i32, <2 x i1>, <2 x i16>) +declare <4 x i16> @llvm.masked.gather.v4i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>) +declare <8 x i16> @llvm.masked.gather.v8i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>) +declare <16 x i16> @llvm.masked.gather.v16i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>) +declare <32 x i16> @llvm.masked.gather.v32i16(<32 x i16*>, i32, <32 x i1>, <32 x i16>) + +declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) +declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>) +declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>) +declare <32 x i32> @llvm.masked.gather.v32i32(<32 x i32*>, i32, <32 x i1>, <32 x i32>) + +declare <1 x i64> @llvm.masked.gather.v1i64(<1 x i64*>, i32, <1 x i1>, <1 x i64>) +declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) +declare <8 x i64> @llvm.masked.gather.v8i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>) +declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*>, i32, <16 x i1>, <16 x i64>) +declare <32 x i64> @llvm.masked.gather.v32i64(<32 x i64*>, i32, <32 x i1>, <32 x i64>) + +declare <2 x half> @llvm.masked.gather.v2f16(<2 x half*>, i32, <2 x i1>, <2 x half>) +declare <4 x half> @llvm.masked.gather.v4f16(<4 x half*>, i32, <4 x i1>, <4 x half>) +declare <8 x half> @llvm.masked.gather.v8f16(<8 x half*>, i32, <8 x i1>, <8 x half>) +declare <16 x half> @llvm.masked.gather.v16f16(<16 x half*>, i32, <16 x i1>, <16 x half>) +declare <32 x half> @llvm.masked.gather.v32f16(<32 x half*>, i32, <32 x i1>, <32 x half>) + +declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*>, i32, <2 x i1>, <2 x float>) +declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>) +declare <8 x float> @llvm.masked.gather.v8f32(<8 x float*>, i32, <8 x i1>, <8 x float>) +declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>) +declare <32 x float> @llvm.masked.gather.v32f32(<32 x float*>, i32, <32 x i1>, <32 x float>) + +declare <1 x double> @llvm.masked.gather.v1f64(<1 x double*>, i32, <1 x i1>, <1 x double>) +declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*>, i32, <2 x i1>, <2 x double>) +declare <4 x double> @llvm.masked.gather.v4f64(<4 x double*>, i32, <4 x i1>, <4 x double>) +declare <8 x double> @llvm.masked.gather.v8f64(<8 x double*>, i32, <8 x i1>, <8 x double>) +declare <16 x double> @llvm.masked.gather.v16f64(<16 x double*>, i32, <16 x i1>, <16 x double>) +declare <32 x double> @llvm.masked.gather.v32f64(<32 x double*>, i32, <32 x i1>, <32 x double>) + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll new file mode 100644 index 0000000000000..aa79ea7992b70 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -0,0 +1,999 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048,VBITS_GE_1024,VBITS_GE_512 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +; +; ST1B +; + +define void @masked_scatter_v2i8(<2 x i8>* %a, <2 x i8*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v2i8: +; CHECK: ldrb [[VALS_LO:w[0-9]+]], [x0] +; CHECK-NEXT: ldrb [[VALS_HI:w[0-9]+]], [x0, #1] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].s, vl2 +; CHECK-NEXT: fmov s[[VALS:[0-9]+]], [[VALS_LO]] +; CHECK-NEXT: mov v[[VALS]].s[1], [[VALS_HI]] +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].2s, v[[VALS]].2s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ushll v[[SHL:[0-9]+]].2d, v[[VALS]].2s, #0 +; CHECK-NEXT: st1b { z[[SHL]].d }, [[MASK]], [z[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <2 x i8>, <2 x i8>* %a + %ptrs = load <2 x i8*>, <2 x i8*>* %b + %mask = icmp eq <2 x i8> %vals, zeroinitializer + call void @llvm.masked.scatter.v2i8(<2 x i8> %vals, <2 x i8*> %ptrs, i32 8, <2 x i1> %mask) + ret void +} + +define void @masked_scatter_v4i8(<4 x i8>* %a, <4 x i8*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v4i8: +; CHECK: ldr s[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; CHECK-NEXT: ushll [[SHL:v[0-9]+]].8h, v[[VALS]].8b, #0 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].4h, [[SHL]].4h, #0 +; CHECK-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[VALS]].h +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; CHECK-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; CHECK-NEXT: st1b { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <4 x i8>, <4 x i8>* %a + %ptrs = load <4 x i8*>, <4 x i8*>* %b + %mask = icmp eq <4 x i8> %vals, zeroinitializer + call void @llvm.masked.scatter.v4i8(<4 x i8> %vals, <4 x i8*> %ptrs, i32 8, <4 x i1> %mask) + ret void +} + +define void @masked_scatter_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v8i8: +; VBITS_EQ_256: ldr d[[VALS:[0-9]+]], [x0] +; VBITS_EQ_256-NEXT: add x8, x1, #32 +; VBITS_EQ_256-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; VBITS_EQ_256-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; VBITS_EQ_256-NEXT: cmeq [[ZMSK:v[0-9]+]].8b, v[[VALS]].8b, #0 +; VBITS_EQ_256-NEXT: zip1 [[VAL_LO:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b +; VBITS_EQ_256-NEXT: zip2 [[VAL_HI:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b +; VBITS_EQ_256-NEXT: shl [[SHL_LO:v[0-9]+]].4h, [[VAL_LO]].4h, #8 +; VBITS_EQ_256-NEXT: shl [[SHL_HI:v[0-9]+]].4h, [[VAL_HI]].4h, #8 +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_EQ_256-NEXT: sshr v[[SSHR_LO:[0-9]+]].4h, [[SHL_LO]].4h, #8 +; VBITS_EQ_256-NEXT: sshr v[[SSHR_HI:[0-9]+]].4h, [[SHL_HI]].4h, #8 +; VBITS_EQ_256-NEXT: cmpne [[MASK_LO:p[0-9]+]].h, [[PG1]]/z, z[[SSHR_LO]].h, #0 +; VBITS_EQ_256-NEXT: cmpne [[MASK_HI:p[0-9]+]].h, [[PG1]]/z, z[[SSHR_HI]].h, #0 +; VBITS_EQ_256-NEXT: zip1 v[[VALS2_LO:[0-9]+]].8b, v[[VALS]].8b, v[[VALS]].8b +; VBITS_EQ_256-NEXT: zip2 v[[VALS2_HI:[0-9]+]].8b, v[[VALS]].8b, v[[VALS]].8b +; VBITS_EQ_256-NEXT: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[VALS2_LO]].h +; VBITS_EQ_256-NEXT: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[VALS2_HI]].h +; VBITS_EQ_256-NEXT: uunpklo [[UPK2_LO:z[0-9]+]].d, [[UPK1_LO]].s +; VBITS_EQ_256-NEXT: uunpklo [[UPK2_HI:z[0-9]+]].d, [[UPK1_HI]].s +; VBITS_EQ_256-NEXT: st1b { [[UPK2_LO]].d }, [[MASK_LO]], {{\[}}[[PTRS_LO]].d] +; VBITS_EQ_256-NEXT: st1b { [[UPK2_HI]].d }, [[MASK_HI]], {{\[}}[[PTRS_HI]].d] +; VBITS_EQ_256-NEXT: ret + +; VBITS_GE_512: ldr d[[VALS:[0-9]+]], [x0] +; VBITS_GE_512-NEXT: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].b, vl8 +; VBITS_GE_512-NEXT: cmeq v[[CMP:[0-9]+]].8b, v[[VALS]].8b, #0 +; VBITS_GE_512-NEXT: uunpklo [[UPK1:z[0-9]+]].h, z[[VALS]].b +; VBITS_GE_512-NEXT: uunpklo [[UPK2:z[0-9]+]].s, [[UPK1]].h +; VBITS_GE_512-NEXT: cmpne [[MASK:p[0-9]+]].b, [[PG1]]/z, z[[CMP]].b, #0 +; VBITS_GE_512-NEXT: uunpklo [[UPK3:z[0-9]+]].d, [[UPK2]].s +; VBITS_GE_512-NEXT: st1b { [[UPK3]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: ret + %vals = load <8 x i8>, <8 x i8>* %a + %ptrs = load <8 x i8*>, <8 x i8*>* %b + %mask = icmp eq <8 x i8> %vals, zeroinitializer + call void @llvm.masked.scatter.v8i8(<8 x i8> %vals, <8 x i8*> %ptrs, i32 8, <8 x i1> %mask) + ret void +} + +define void @masked_scatter_v16i8(<16 x i8>* %a, <16 x i8*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v16i8: +; VBITS_GE_1024: ldr q[[VALS:[0-9]+]], [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG0:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].b, vl16 +; VBITS_GE_1024-NEXT: cmeq v[[CMP:[0-9]+]].16b, v[[VALS]].16b, #0 +; VBITS_GE_1024-NEXT: uunpklo [[UPK1:z[0-9]+]].h, z[[VALS]].b +; VBITS_GE_1024-NEXT: uunpklo [[UPK2:z[0-9]+]].s, [[UPK1]].h +; VBITS_GE_1024-NEXT: cmpne [[MASK:p[0-9]+]].b, [[PG1]]/z, z[[CMP]].b, #0 +; VBITS_GE_1024-NEXT: uunpklo [[UPK3:z[0-9]+]].d, [[UPK2]].s +; VBITS_GE_1024-NEXT: st1b { [[UPK3]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: ret + %vals = load <16 x i8>, <16 x i8>* %a + %ptrs = load <16 x i8*>, <16 x i8*>* %b + %mask = icmp eq <16 x i8> %vals, zeroinitializer + call void @llvm.masked.scatter.v16i8(<16 x i8> %vals, <16 x i8*> %ptrs, i32 8, <16 x i1> %mask) + ret void +} + +define void @masked_scatter_v32i8(<32 x i8>* %a, <32 x i8*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v32i8: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].b, vl32 +; VBITS_GE_2048-NEXT: ld1b { [[VALS:z[0-9]+]].b }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq [[MASK:p[0-9]+]].b, [[PG0]]/z, [[VALS]].b, #0 +; VBITS_GE_2048-NEXT: uunpklo [[UPK1:z[0-9]+]].h, [[VALS]].b +; VBITS_GE_2048-NEXT: uunpklo [[UPK2:z[0-9]+]].s, [[UPK1]].h +; VBITS_GE_2048-NEXT: uunpklo [[UPK3:z[0-9]+]].d, [[UPK2]].s +; VBITS_GE_2048-NEXT: st1b { [[UPK3]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x i8>, <32 x i8>* %a + %ptrs = load <32 x i8*>, <32 x i8*>* %b + %mask = icmp eq <32 x i8> %vals, zeroinitializer + call void @llvm.masked.scatter.v32i8(<32 x i8> %vals, <32 x i8*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; +; ST1H +; + +define void @masked_scatter_v2i16(<2 x i16>* %a, <2 x i16*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v2i16: +; CHECK: ldrh [[VALS_LO:w[0-9]+]], [x0] +; CHECK-NEXT: ldrh [[VALS_HI:w[0-9]+]], [x0, #2] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].s, vl2 +; CHECK-NEXT: fmov s[[VALS:[0-9]+]], [[VALS_LO]] +; CHECK-NEXT: mov v[[VALS]].s[1], [[VALS_HI]] +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].2s, v[[VALS]].2s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ushll v[[SHL:[0-9]+]].2d, v[[VALS]].2s, #0 +; CHECK-NEXT: st1h { z[[SHL]].d }, [[MASK]], [z[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <2 x i16>, <2 x i16>* %a + %ptrs = load <2 x i16*>, <2 x i16*>* %b + %mask = icmp eq <2 x i16> %vals, zeroinitializer + call void @llvm.masked.scatter.v2i16(<2 x i16> %vals, <2 x i16*> %ptrs, i32 8, <2 x i1> %mask) + ret void +} + +define void @masked_scatter_v4i16(<4 x i16>* %a, <4 x i16*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v4i16: +; CHECK: ldr d[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].4h, v[[VALS]].4h, #0 +; CHECK-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[VALS]].h +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; CHECK-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; CHECK-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <4 x i16>, <4 x i16>* %a + %ptrs = load <4 x i16*>, <4 x i16*>* %b + %mask = icmp eq <4 x i16> %vals, zeroinitializer + call void @llvm.masked.scatter.v4i16(<4 x i16> %vals, <4 x i16*> %ptrs, i32 8, <4 x i1> %mask) + ret void +} + +define void @masked_scatter_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v8i16: +; VBITS_EQ_256: ldr q[[VALS:[0-9]+]], [x0] +; VBITS_EQ_256-NEXT: add x8, x1, #32 +; VBITS_EQ_256-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_EQ_256-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; VBITS_EQ_256-NEXT: cmeq v[[ZMSK:[0-9]+]].8h, v[[VALS]].8h, #0 +; VBITS_EQ_256-DAG: ext v[[EXT:[0-9]+]].16b, v[[VALS]].16b, v[[VALS]].16b, #8 +; VBITS_EQ_256-DAG: cmpne [[MASK_LO:p[0-9]+]].h, [[PG1]]/z, z[[ZMSK]].h, #0 +; VBITS_EQ_256-DAG: ext v[[ZEXT:[0-9]+]].16b, v[[ZMSK]].16b, v[[ZMSK]].16b, #8 +; VBITS_EQ_256-DAG: cmpne [[MASK_HI:p[0-9]+]].h, [[PG1]]/z, z[[ZEXT]].h, #0 +; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[VALS]].h +; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[EXT]].h +; VBITS_EQ_256-DAG: uunpklo [[UPK2_LO:z[0-9]+]].d, [[UPK1_LO]].s +; VBITS_EQ_256-DAG: uunpklo [[UPK2_HI:z[0-9]+]].d, [[UPK1_HI]].s +; VBITS_EQ_256-DAG: st1h { [[UPK2_LO]].d }, [[MASK_LO]], {{\[}}[[PTRS_LO]].d] +; VBITS_EQ_256-DAG: st1h { [[UPK2_HI]].d }, [[MASK_HI]], {{\[}}[[PTRS_HI]].d] +; VBITS_EQ_256-NEXT: ret + +; VBITS_GE_512: ldr q[[VALS:[0-9]+]], [x0] +; VBITS_GE_512-NEXT: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].h, vl8 +; VBITS_GE_512-NEXT: cmeq v[[CMP:[0-9]+]].8h, v[[VALS]].8h, #0 +; VBITS_GE_512-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[VALS]].h +; VBITS_GE_512-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; VBITS_GE_512-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; VBITS_GE_512-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: ret + %vals = load <8 x i16>, <8 x i16>* %a + %ptrs = load <8 x i16*>, <8 x i16*>* %b + %mask = icmp eq <8 x i16> %vals, zeroinitializer + call void @llvm.masked.scatter.v8i16(<8 x i16> %vals, <8 x i16*> %ptrs, i32 8, <8 x i1> %mask) + ret void +} + +define void @masked_scatter_v16i16(<16 x i16>* %a, <16 x i16*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v16i16: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].h, vl16 +; VBITS_GE_1024-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_1024-NEXT: cmpeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0 +; VBITS_GE_1024-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[VALS]].h +; VBITS_GE_1024-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; VBITS_GE_1024-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: ret + %vals = load <16 x i16>, <16 x i16>* %a + %ptrs = load <16 x i16*>, <16 x i16*>* %b + %mask = icmp eq <16 x i16> %vals, zeroinitializer + call void @llvm.masked.scatter.v16i16(<16 x i16> %vals, <16 x i16*> %ptrs, i32 8, <16 x i1> %mask) + ret void +} + +define void @masked_scatter_v32i16(<32 x i16>* %a, <32 x i16*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v32i16: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, #0 +; VBITS_GE_2048-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[VALS]].h +; VBITS_GE_2048-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; VBITS_GE_2048-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x i16>, <32 x i16>* %a + %ptrs = load <32 x i16*>, <32 x i16*>* %b + %mask = icmp eq <32 x i16> %vals, zeroinitializer + call void @llvm.masked.scatter.v32i16(<32 x i16> %vals, <32 x i16*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; +; ST1W +; + +define void @masked_scatter_v2i32(<2 x i32>* %a, <2 x i32*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v2i32: +; CHECK: ldr d[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].s, vl2 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].2s, v[[VALS]].2s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ushll v[[SHL:[0-9]+]].2d, v[[VALS]].2s, #0 +; CHECK-NEXT: st1w { z[[SHL]].d }, [[MASK]], [z[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <2 x i32>, <2 x i32>* %a + %ptrs = load <2 x i32*>, <2 x i32*>* %b + %mask = icmp eq <2 x i32> %vals, zeroinitializer + call void @llvm.masked.scatter.v2i32(<2 x i32> %vals, <2 x i32*> %ptrs, i32 8, <2 x i1> %mask) + ret void +} + +define void @masked_scatter_v4i32(<4 x i32>* %a, <4 x i32*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v4i32: +; CHECK: ldr q[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl4 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].4s, v[[VALS]].4s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG1]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: uunpklo [[UPK:z[0-9]+]].d, z[[VALS]].s +; CHECK-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <4 x i32>, <4 x i32>* %a + %ptrs = load <4 x i32*>, <4 x i32*>* %b + %mask = icmp eq <4 x i32> %vals, zeroinitializer + call void @llvm.masked.scatter.v4i32(<4 x i32> %vals, <4 x i32*> %ptrs, i32 8, <4 x i1> %mask) + ret void +} + +define void @masked_scatter_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v8i32: +; VBITS_EQ_256: ptrue [[PG0:p[0-9]+]].s, vl8 +; VBITS_EQ_256-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_EQ_256-NEXT: add x8, x1, #32 +; VBITS_EQ_256-NEXT: ptrue [[PG1:p[0-9]+]].d, vl4 +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x8] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_EQ_256-NEXT: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 +; VBITS_EQ_256-NEXT: add x8, sp, #32 +; VBITS_EQ_256-NEXT: mov x9, sp +; VBITS_EQ_256-NEXT: mov [[MONE:z[0-9]+]].s, p1/z, #-1 +; VBITS_EQ_256-NEXT: st1w { [[MONE]].s }, [[PG0]], [x8] +; VBITS_EQ_256-NEXT: st1w { [[VALS]].s }, [[PG0]], [x9] +; VBITS_EQ_256-NEXT: ldr q[[CMP_LO:[0-9]+]], [sp, #32] +; VBITS_EQ_256-NEXT: ldr q[[VAL_LO:[0-9]+]], [sp] +; VBITS_EQ_256-NEXT: ptrue [[PG2:p[0-9]+]].s, vl4 +; VBITS_EQ_256-NEXT: cmpne [[MASK_LO:p[0-9]+]].s, [[PG2]]/z, z[[CMP_LO]].s, #0 +; VBITS_EQ_256-NEXT: uunpklo [[UPK1_LO:z[0-9]+]].d, z[[VAL_LO]].s +; VBITS_EQ_256-NEXT: st1w { [[UPK1_LO]].d }, [[MASK_LO]], {{\[}}[[PTRS_LO]].d] +; VBITS_EQ_256-NEXT: ldr q[[CMP_HI:[0-9]+]], [sp, #48] +; VBITS_EQ_256-NEXT: ldr q[[VAL_HI:[0-9]+]], [sp, #16] +; VBITS_EQ_256-NEXT: cmpne [[MASK_HI:p[0-9]+]].s, [[PG2]]/z, z[[CMP_HI]].s, #0 +; VBITS_EQ_256-NEXT: uunpklo [[UPK1_HI:z[0-9]+]].d, z[[VAL_HI]].s +; VBITS_EQ_256-NEXT: st1w { [[UPK1_HI]].d }, [[MASK_HI]], {{\[}}[[PTRS_HI]].d] + +; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].s, vl8 +; VBITS_GE_512-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_512-NEXT: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 +; VBITS_GE_512-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_512-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: ret + %vals = load <8 x i32>, <8 x i32>* %a + %ptrs = load <8 x i32*>, <8 x i32*>* %b + %mask = icmp eq <8 x i32> %vals, zeroinitializer + call void @llvm.masked.scatter.v8i32(<8 x i32> %vals, <8 x i32*> %ptrs, i32 8, <8 x i1> %mask) + ret void +} + +define void @masked_scatter_v16i32(<16 x i32>* %a, <16 x i32*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v16i32: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].s, vl16 +; VBITS_GE_1024-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_1024-NEXT: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 +; VBITS_GE_1024-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_1024-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: ret + %vals = load <16 x i32>, <16 x i32>* %a + %ptrs = load <16 x i32*>, <16 x i32*>* %b + %mask = icmp eq <16 x i32> %vals, zeroinitializer + call void @llvm.masked.scatter.v16i32(<16 x i32> %vals, <16 x i32*> %ptrs, i32 8, <16 x i1> %mask) + ret void +} + +define void @masked_scatter_v32i32(<32 x i32>* %a, <32 x i32*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v32i32: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_2048-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x i32>, <32 x i32>* %a + %ptrs = load <32 x i32*>, <32 x i32*>* %b + %mask = icmp eq <32 x i32> %vals, zeroinitializer + call void @llvm.masked.scatter.v32i32(<32 x i32> %vals, <32 x i32*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; +; ST1D +; + +; Scalarize 1 x i64 scatters +define void @masked_scatter_v1i64(<1 x i64>* %a, <1 x i64*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v1i64: +; CHECK-NOT: ptrue + %vals = load <1 x i64>, <1 x i64>* %a + %ptrs = load <1 x i64*>, <1 x i64*>* %b + %mask = icmp eq <1 x i64> %vals, zeroinitializer + call void @llvm.masked.scatter.v1i64(<1 x i64> %vals, <1 x i64*> %ptrs, i32 8, <1 x i1> %mask) + ret void +} + +define void @masked_scatter_v2i64(<2 x i64>* %a, <2 x i64*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v2i64: +; CHECK: ldr q[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl2 +; CHECK-NEXT: cmeq v[[CMP:[0-9]+]].2d, v[[VALS]].2d, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].d, [[PG0]]/z, z[[CMP]].d, #0 +; CHECK-NEXT: st1d { z[[VALS]].d }, [[MASK]], [z[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <2 x i64>, <2 x i64>* %a + %ptrs = load <2 x i64*>, <2 x i64*>* %b + %mask = icmp eq <2 x i64> %vals, zeroinitializer + call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x i64*> %ptrs, i32 8, <2 x i1> %mask) + ret void +} + +define void @masked_scatter_v4i64(<4 x i64>* %a, <4 x i64*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v4i64: +; CHECK: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: cmpeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0 +; CHECK-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <4 x i64>, <4 x i64>* %a + %ptrs = load <4 x i64*>, <4 x i64*>* %b + %mask = icmp eq <4 x i64> %vals, zeroinitializer + call void @llvm.masked.scatter.v4i64(<4 x i64> %vals, <4 x i64*> %ptrs, i32 8, <4 x i1> %mask) + ret void +} + +define void @masked_scatter_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v8i64: +; VBITS_EQ_256: ptrue [[PG0:p[0-9]+]].d, vl4 +; VBITS_EQ_256-NEXT: add x8, x0, #32 +; VBITS_EQ_256-NEXT: add x9, x1, #32 +; VBITS_EQ_256-NEXT: ld1d { [[VALS_LO:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_EQ_256-NEXT: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x9] +; VBITS_EQ_256-NEXT: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_EQ_256-NEXT: cmpeq [[MASK_HI:p[0-9]+]].d, [[PG0]]/z, [[VALS_HI]].d, #0 +; VBITS_EQ_256-NEXT: cmpeq [[MASK_LO:p[0-9]+]].d, [[PG0]]/z, [[VALS_LO]].d, #0 +; VBITS_EQ_256-NEXT: st1d { [[VALS_LO]].d }, [[MASK_LO]], {{\[}}[[PTRS_LO]].d] +; VBITS_EQ_256-NEXT: st1d { [[VALS_HI]].d }, [[MASK_HI]], {{\[}}[[PTRS_HI]].d] +; VBITS_EQ_256-NEXT: ret + +; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: cmpeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0 +; VBITS_GE_512-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: ret + %vals = load <8 x i64>, <8 x i64>* %a + %ptrs = load <8 x i64*>, <8 x i64*>* %b + %mask = icmp eq <8 x i64> %vals, zeroinitializer + call void @llvm.masked.scatter.v8i64(<8 x i64> %vals, <8 x i64*> %ptrs, i32 8, <8 x i1> %mask) + ret void +} + +define void @masked_scatter_v16i64(<16 x i64>* %a, <16 x i64*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v16i64: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_1024-NEXT: cmpeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0 +; VBITS_GE_1024-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: ret + %vals = load <16 x i64>, <16 x i64>* %a + %ptrs = load <16 x i64*>, <16 x i64*>* %b + %mask = icmp eq <16 x i64> %vals, zeroinitializer + call void @llvm.masked.scatter.v16i64(<16 x i64> %vals, <16 x i64*> %ptrs, i32 8, <16 x i1> %mask) + ret void +} + +define void @masked_scatter_v32i64(<32 x i64>* %a, <32 x i64*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v32i64: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, #0 +; VBITS_GE_2048-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x i64>, <32 x i64>* %a + %ptrs = load <32 x i64*>, <32 x i64*>* %b + %mask = icmp eq <32 x i64> %vals, zeroinitializer + call void @llvm.masked.scatter.v32i64(<32 x i64> %vals, <32 x i64*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; +; ST1H (float) +; + +define void @masked_scatter_v2f16(<2 x half>* %a, <2 x half*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v2f16: +; CHECK: ldr s[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: movi d2, #0000000000000000 +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].h, vl4 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].4h, v[[VALS]].4h, #0.0 +; CHECK-NEXT: umov w8, v[[CMP]].h[0] +; CHECK-NEXT: umov w9, v[[CMP]].h[1] +; CHECK-NEXT: fmov s[[CMP]], w8 +; CHECK-NEXT: mov v[[CMP]].s[1], w9 +; CHECK-NEXT: shl v[[CMP]].2s, v[[CMP]].2s, #16 +; CHECK-NEXT: sshr v[[CMP]].2s, v[[CMP]].2s, #16 +; CHECK-NEXT: fmov w9, s[[CMP]] +; CHECK-NEXT: mov w8, v[[CMP]].s[1] +; CHECK-NEXT: mov v[[NCMP:[0-9]+]].h[0], w9 +; CHECK-NEXT: mov v[[NCMP]].h[1], w8 +; CHECK-NEXT: shl v[[NCMP]].4h, v[[NCMP]].4h, #15 +; CHECK-NEXT: uunpklo [[UPK1]].s, z[[VALS]].h +; CHECK-NEXT: sshr v[[NCMP]].4h, v[[NCMP]].4h, #15 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG0]]/z, z[[NCMP]].h, #0 +; CHECK-NEXT: uunpklo [[UPK2]].d, [[UPK1]].s +; CHECK-NEXT: st1h { [[UPK2]].d }, [[MASK]], [z[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <2 x half>, <2 x half>* %a + %ptrs = load <2 x half*>, <2 x half*>* %b + %mask = fcmp oeq <2 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v2f16(<2 x half> %vals, <2 x half*> %ptrs, i32 8, <2 x i1> %mask) + ret void +} + +define void @masked_scatter_v4f16(<4 x half>* %a, <4 x half*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v4f16: +; CHECK: ldr d[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].h, vl4 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].4h, v[[VALS]].4h, #0 +; CHECK-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[VALS]].h +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; CHECK-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; CHECK-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <4 x half>, <4 x half>* %a + %ptrs = load <4 x half*>, <4 x half*>* %b + %mask = fcmp oeq <4 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v4f16(<4 x half> %vals, <4 x half*> %ptrs, i32 8, <4 x i1> %mask) + ret void +} + +define void @masked_scatter_v8f16(<8 x half>* %a, <8 x half*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v8f16: +; VBITS_GE_512: ldr q[[VALS:[0-9]+]], [x0] +; VBITS_GE_512-NEXT: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].h, vl8 +; VBITS_GE_512-NEXT: fcmeq v[[CMP:[0-9]+]].8h, v[[VALS]].8h, #0 +; VBITS_GE_512-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[VALS]].h +; VBITS_GE_512-NEXT: cmpne [[MASK:p[0-9]+]].h, [[PG1]]/z, z[[CMP]].h, #0 +; VBITS_GE_512-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; VBITS_GE_512-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: ret + %vals = load <8 x half>, <8 x half>* %a + %ptrs = load <8 x half*>, <8 x half*>* %b + %mask = fcmp oeq <8 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v8f16(<8 x half> %vals, <8 x half*> %ptrs, i32 8, <8 x i1> %mask) + ret void +} + +define void @masked_scatter_v16f16(<16 x half>* %a, <16 x half*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v16f16: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].h, vl16 +; VBITS_GE_1024-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_1024-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[VALS]].h +; VBITS_GE_1024-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; VBITS_GE_1024-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: ret + %vals = load <16 x half>, <16 x half>* %a + %ptrs = load <16 x half*>, <16 x half*>* %b + %mask = fcmp oeq <16 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v16f16(<16 x half> %vals, <16 x half*> %ptrs, i32 8, <16 x i1> %mask) + ret void +} + +define void @masked_scatter_v32f16(<32 x half>* %a, <32 x half*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v32f16: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[VALS]].h +; VBITS_GE_2048-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s +; VBITS_GE_2048-NEXT: st1h { [[UPK2]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x half>, <32 x half>* %a + %ptrs = load <32 x half*>, <32 x half*>* %b + %mask = fcmp oeq <32 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; +; ST1W (float) +; + +define void @masked_scatter_v2f32(<2 x float>* %a, <2 x float*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v2f32: +; CHECK: ldr d[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].s, vl2 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].2s, v[[VALS]].2s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: ushll v[[SHL:[0-9]+]].2d, v[[VALS]].2s, #0 +; CHECK-NEXT: st1w { z[[SHL]].d }, [[MASK]], [z[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <2 x float>, <2 x float>* %a + %ptrs = load <2 x float*>, <2 x float*>* %b + %mask = fcmp oeq <2 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v2f32(<2 x float> %vals, <2 x float*> %ptrs, i32 8, <2 x i1> %mask) + ret void +} + +define void @masked_scatter_v4f32(<4 x float>* %a, <4 x float*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v4f32: +; CHECK: ldr q[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl4 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].4s, v[[VALS]].4s, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG1]]/z, z[[CMP]].s, #0 +; CHECK-NEXT: uunpklo [[UPK:z[0-9]+]].d, z[[VALS]].s +; CHECK-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <4 x float>, <4 x float>* %a + %ptrs = load <4 x float*>, <4 x float*>* %b + %mask = fcmp oeq <4 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v4f32(<4 x float> %vals, <4 x float*> %ptrs, i32 8, <4 x i1> %mask) + ret void +} + +define void @masked_scatter_v8f32(<8 x float>* %a, <8 x float*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v8f32: +; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].s, vl8 +; VBITS_GE_512-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_512-NEXT: ptrue [[PG1:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_512-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_512-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: ret + %vals = load <8 x float>, <8 x float>* %a + %ptrs = load <8 x float*>, <8 x float*>* %b + %mask = fcmp oeq <8 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v8f32(<8 x float> %vals, <8 x float*> %ptrs, i32 8, <8 x i1> %mask) + ret void +} + +define void @masked_scatter_v16f32(<16 x float>* %a, <16 x float*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v16f32: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].s, vl16 +; VBITS_GE_1024-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ptrue [[PG1:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_1024-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_1024-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: ret + %vals = load <16 x float>, <16 x float>* %a + %ptrs = load <16 x float*>, <16 x float*>* %b + %mask = fcmp oeq <16 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v16f32(<16 x float> %vals, <16 x float*> %ptrs, i32 8, <16 x i1> %mask) + ret void +} + +define void @masked_scatter_v32f32(<32 x float>* %a, <32 x float*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v32f32: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_2048-NEXT: st1w { [[UPK]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x float>, <32 x float>* %a + %ptrs = load <32 x float*>, <32 x float*>* %b + %mask = fcmp oeq <32 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; +; ST1D (float) +; + +; Scalarize 1 x double scatters +define void @masked_scatter_v1f64(<1 x double>* %a, <1 x double*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v1f64: +; CHECK-NOT: ptrue + %vals = load <1 x double>, <1 x double>* %a + %ptrs = load <1 x double*>, <1 x double*>* %b + %mask = fcmp oeq <1 x double> %vals, zeroinitializer + call void @llvm.masked.scatter.v1f64(<1 x double> %vals, <1 x double*> %ptrs, i32 8, <1 x i1> %mask) + ret void +} + +define void @masked_scatter_v2f64(<2 x double>* %a, <2 x double*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v2f64: +; CHECK: ldr q[[VALS:[0-9]+]], [x0] +; CHECK-NEXT: ldr q[[PTRS:[0-9]+]], [x1] +; CHECK-NEXT: ptrue [[PG0:p[0-9]+]].d, vl2 +; CHECK-NEXT: fcmeq v[[CMP:[0-9]+]].2d, v[[VALS]].2d, #0 +; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].d, [[PG0]]/z, z[[CMP]].d, #0 +; CHECK-NEXT: st1d { z[[VALS]].d }, [[MASK]], [z[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <2 x double>, <2 x double>* %a + %ptrs = load <2 x double*>, <2 x double*>* %b + %mask = fcmp oeq <2 x double> %vals, zeroinitializer + call void @llvm.masked.scatter.v2f64(<2 x double> %vals, <2 x double*> %ptrs, i32 8, <2 x i1> %mask) + ret void +} + +define void @masked_scatter_v4f64(<4 x double>* %a, <4 x double*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v4f64: +; CHECK: ptrue [[PG0:p[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; CHECK-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; CHECK-NEXT: mov [[ZERO:z[0-9]+]].d, #0 +; CHECK-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]].d +; CHECK-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; CHECK-NEXT: ret + %vals = load <4 x double>, <4 x double>* %a + %ptrs = load <4 x double*>, <4 x double*>* %b + %mask = fcmp oeq <4 x double> %vals, zeroinitializer + call void @llvm.masked.scatter.v4f64(<4 x double> %vals, <4 x double*> %ptrs, i32 8, <4 x i1> %mask) + ret void +} + +define void @masked_scatter_v8f64(<8 x double>* %a, <8 x double*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v8f64: +; VBITS_GE_512: ptrue [[PG0:p[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_512-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_512-NEXT: mov [[ZERO:z[0-9]+]].d, #0 +; VBITS_GE_512-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]] +; VBITS_GE_512-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_512-NEXT: ret + %vals = load <8 x double>, <8 x double>* %a + %ptrs = load <8 x double*>, <8 x double*>* %b + %mask = fcmp oeq <8 x double> %vals, zeroinitializer + call void @llvm.masked.scatter.v8f64(<8 x double> %vals, <8 x double*> %ptrs, i32 8, <8 x i1> %mask) + ret void +} + +define void @masked_scatter_v16f64(<16 x double>* %a, <16 x double*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v16f64: +; VBITS_GE_1024: ptrue [[PG0:p[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_1024-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_1024-NEXT: mov [[ZERO:z[0-9]+]].d, #0 +; VBITS_GE_1024-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]] +; VBITS_GE_1024-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_1024-NEXT: ret + %vals = load <16 x double>, <16 x double>* %a + %ptrs = load <16 x double*>, <16 x double*>* %b + %mask = fcmp oeq <16 x double> %vals, zeroinitializer + call void @llvm.masked.scatter.v16f64(<16 x double> %vals, <16 x double*> %ptrs, i32 8, <16 x i1> %mask) + ret void +} + +define void @masked_scatter_v32f64(<32 x double>* %a, <32 x double*>* %b) #0 { +; CHECK-LABEL: masked_scatter_v32f64: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[VALS:z[0-9]+]].d }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].d, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].d, [[PG0]]/z, [[VALS]].d, [[ZERO]] +; VBITS_GE_2048-NEXT: st1d { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x double>, <32 x double>* %a + %ptrs = load <32 x double*>, <32 x double*>* %b + %mask = fcmp oeq <32 x double> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f64(<32 x double> %vals, <32 x double*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; The above tests test the types, the below tests check that the addressing +; modes still function + +define void @masked_scatter_32b_scaled_sext(<32 x half>* %a, <32 x i32>* %b, half* %base) #0 { +; CHECK-LABEL: masked_scatter_32b_scaled_sext: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h +; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, sxtw #1] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x half>, <32 x half>* %a + %idxs = load <32 x i32>, <32 x i32>* %b + %ext = sext <32 x i32> %idxs to <32 x i64> + %ptrs = getelementptr half, half* %base, <32 x i64> %ext + %mask = fcmp oeq <32 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +define void @masked_scatter_32b_scaled_zext(<32 x half>* %a, <32 x i32>* %b, half* %base) #0 { +; CHECK-LABEL: masked_scatter_32b_scaled_zext: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h +; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, uxtw #1] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x half>, <32 x half>* %a + %idxs = load <32 x i32>, <32 x i32>* %b + %ext = zext <32 x i32> %idxs to <32 x i64> + %ptrs = getelementptr half, half* %base, <32 x i64> %ext + %mask = fcmp oeq <32 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +define void @masked_scatter_32b_unscaled_sext(<32 x half>* %a, <32 x i32>* %b, i8* %base) #0 { +; CHECK-LABEL: masked_scatter_32b_unscaled_sext: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h +; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, sxtw] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x half>, <32 x half>* %a + %idxs = load <32 x i32>, <32 x i32>* %b + %ext = sext <32 x i32> %idxs to <32 x i64> + %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %ext + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x half*> + %mask = fcmp oeq <32 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +define void @masked_scatter_32b_unscaled_zext(<32 x half>* %a, <32 x i32>* %b, i8* %base) #0 { +; CHECK-LABEL: masked_scatter_32b_unscaled_zext: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].h, vl32 +; VBITS_GE_2048-NEXT: ld1h { [[VALS:z[0-9]+]].h }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[PTRS:z[0-9]+]].s }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].h, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].h, [[PG0]]/z, [[VALS]].h, [[ZERO]] +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[VALS]].h +; VBITS_GE_2048-NEXT: st1h { [[VALS]].s }, [[MASK]], [x2, [[PTRS]].s, uxtw] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x half>, <32 x half>* %a + %idxs = load <32 x i32>, <32 x i32>* %b + %ext = zext <32 x i32> %idxs to <32 x i64> + %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %ext + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x half*> + %mask = fcmp oeq <32 x half> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f16(<32 x half> %vals, <32 x half*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +define void @masked_scatter_64b_scaled(<32 x float>* %a, <32 x i64>* %b, float* %base) #0 { +; CHECK-LABEL: masked_scatter_64b_scaled: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], [x2, [[PTRS]].d, lsl #2] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x float>, <32 x float>* %a + %idxs = load <32 x i64>, <32 x i64>* %b + %ptrs = getelementptr float, float* %base, <32 x i64> %idxs + %mask = fcmp oeq <32 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +define void @masked_scatter_64b_unscaled(<32 x float>* %a, <32 x i64>* %b, i8* %base) #0 { +; CHECK-LABEL: masked_scatter_64b_unscaled: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], [x2, [[PTRS]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x float>, <32 x float>* %a + %idxs = load <32 x i64>, <32 x i64>* %b + %byte_ptrs = getelementptr i8, i8* %base, <32 x i64> %idxs + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*> + %mask = fcmp oeq <32 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; FIXME: This case does not yet codegen well due to deficiencies in opcode selection +define void @masked_scatter_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %off) #0 { +; CHECK-LABEL: masked_scatter_vec_plus_reg: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, x2 +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS_ADD]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x float>, <32 x float>* %a + %bases = load <32 x i8*>, <32 x i8*>* %b + %byte_ptrs = getelementptr i8, <32 x i8*> %bases, i64 %off + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*> + %mask = fcmp oeq <32 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +; FIXME: This case does not yet codegen well due to deficiencies in opcode selection +define void @masked_scatter_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) #0 { +; CHECK-LABEL: masked_scatter_vec_plus_imm: +; VBITS_GE_2048: ptrue [[PG0:p[0-9]+]].s, vl32 +; VBITS_GE_2048-NEXT: ptrue [[PG1:p[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] +; VBITS_GE_2048-NEXT: ld1d { [[PTRS:z[0-9]+]].d }, [[PG1]]/z, [x1] +; VBITS_GE_2048-NEXT: mov [[OFF:z[0-9]+]].d, #4 +; VBITS_GE_2048-NEXT: mov [[ZERO:z[0-9]+]].s, #0 +; VBITS_GE_2048-NEXT: fcmeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, [[ZERO]] +; VBITS_GE_2048-NEXT: add [[PTRS_ADD:z[0-9]+]].d, [[PG1]]/m, [[PTRS]].d, [[OFF]].d +; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[VALS]].s +; VBITS_GE_2048-NEXT: st1w { [[VALS]].d }, [[MASK]], {{\[}}[[PTRS_ADD]].d] +; VBITS_GE_2048-NEXT: ret + %vals = load <32 x float>, <32 x float>* %a + %bases = load <32 x i8*>, <32 x i8*>* %b + %byte_ptrs = getelementptr i8, <32 x i8*> %bases, i64 4 + %ptrs = bitcast <32 x i8*> %byte_ptrs to <32 x float*> + %mask = fcmp oeq <32 x float> %vals, zeroinitializer + call void @llvm.masked.scatter.v32f32(<32 x float> %vals, <32 x float*> %ptrs, i32 8, <32 x i1> %mask) + ret void +} + +declare void @llvm.masked.scatter.v2i8(<2 x i8>, <2 x i8*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v4i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v8i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v16i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v32i8(<32 x i8>, <32 x i8*>, i32, <32 x i1>) + +declare void @llvm.masked.scatter.v2i16(<2 x i16>, <2 x i16*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v4i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v8i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v16i16(<16 x i16>, <16 x i16*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v32i16(<32 x i16>, <32 x i16*>, i32, <32 x i1>) + +declare void @llvm.masked.scatter.v2i32(<2 x i32>, <2 x i32*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v4i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v8i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v16i32(<16 x i32>, <16 x i32*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v32i32(<32 x i32>, <32 x i32*>, i32, <32 x i1>) + +declare void @llvm.masked.scatter.v1i64(<1 x i64>, <1 x i64*>, i32, <1 x i1>) +declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x i64*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v4i64(<4 x i64>, <4 x i64*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v8i64(<8 x i64>, <8 x i64*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v16i64(<16 x i64>, <16 x i64*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v32i64(<32 x i64>, <32 x i64*>, i32, <32 x i1>) + +declare void @llvm.masked.scatter.v2f16(<2 x half>, <2 x half*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v4f16(<4 x half>, <4 x half*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v8f16(<8 x half>, <8 x half*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v16f16(<16 x half>, <16 x half*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v32f16(<32 x half>, <32 x half*>, i32, <32 x i1>) + +declare void @llvm.masked.scatter.v2f32(<2 x float>, <2 x float*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v4f32(<4 x float>, <4 x float*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v8f32(<8 x float>, <8 x float*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v16f32(<16 x float>, <16 x float*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v32f32(<32 x float>, <32 x float*>, i32, <32 x i1>) + +declare void @llvm.masked.scatter.v1f64(<1 x double>, <1 x double*>, i32, <1 x i1>) +declare void @llvm.masked.scatter.v2f64(<2 x double>, <2 x double*>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v4f64(<4 x double>, <4 x double*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v8f64(<8 x double>, <8 x double*>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v16f64(<16 x double>, <16 x double*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v32f64(<32 x double>, <32 x double*>, i32, <32 x i1>) + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather.ll index f9a476bbb1c1a..784053be075fc 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather.ll @@ -106,6 +106,27 @@ define @masked_sgather_nxv2i32( %ptrs, %vals.sext } +define @masked_gather_passthru( %ptrs, %mask, %passthru) { +; CHECK-LABEL: masked_gather_passthru: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, %passthru) + %vals.sext = sext %vals to + ret %vals.sext +} + +define @masked_gather_passthru_0( %ptrs, %mask) { +; CHECK-LABEL: masked_gather_passthru_0: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d] +; CHECK-NEXT: ret + %vals = call @llvm.masked.gather.nxv2i32( %ptrs, i32 4, %mask, zeroinitializer) + %vals.sext = sext %vals to + ret %vals.sext +} + declare @llvm.masked.gather.nxv2i8(, i32, , ) declare @llvm.masked.gather.nxv2i16(, i32, , ) declare @llvm.masked.gather.nxv2i32(, i32, , ) From 231b9dd9de87f15170850e7d752dd6bd19799449 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 29 Jun 2021 12:53:09 +0000 Subject: [PATCH 406/619] [mlir][Linalg] Add comprehensive bufferization support for linalg::InitTensor and tensor::CastOp (11/n) Also add an integration test that connects all the dots end to end, including with cast to unranked tensor for external library calls. Differential Revision: https://reviews.llvm.org/D105106 --- .../Transforms/ComprehensiveBufferize.cpp | 84 ++++++++++++++++--- .../comprehensive-module-bufferize.mlir | 70 ++++++++++++++++ .../CPU/test-comprehensive-bufferize.mlir | 44 ++++++++++ 3 files changed, 185 insertions(+), 13 deletions(-) create mode 100644 mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index 824092df292ca..03191a85e506c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -357,7 +357,9 @@ static bool hasKnownBufferizationAliasingBehavior(Operation *op) { return // clang-format off isagetResult(0); } +/// Return the OpResult that may bufferize into the same buffer as `opOperand` +/// when the op is bufferized inplace. +/// Return null if no such result exists. +static OpResult getInplaceableOpResult(tensor::CastOp op, + OpOperand &opOperand) { + return op->getResult(0); +} + /// Return the OpResult that may bufferize into the same buffer as `opOperand` /// when the op is bufferized inplace. /// The inplace analysis uses this information along with interfering read @@ -428,7 +438,8 @@ static OpResult getInplaceableOpResult(OpOperand &opOperand) { // clang-format off // Ops that perform destructive updates on operand(s) to produce // result(s). - .Case( @@ -455,6 +466,7 @@ static Optional getAliasingOpOperand(OpResult result) { if (!hasKnownBufferizationAliasingBehavior(result.getDefiningOp())) return None; return TypeSwitch(result.getDefiningOp()) + .Case([&](tensor::CastOp op) { return &op->getOpOperand(0); }) .Case([&](LinalgOp op) { return op.getOutputTensorOperands()[result.getResultNumber()]; }) @@ -1559,6 +1571,35 @@ bufferize(OpBuilder &b, CallOpInterface callOp, BlockAndValueMapping &bvm, return success(); } +/// tensor::CastOp bufferizes to memref::CastOp. +static LogicalResult bufferize(OpBuilder &b, tensor::CastOp castOp, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo) { + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(castOp); + + Type sourceType = lookup(bvm, castOp.source()).getType(); + auto rankedMemRefType = sourceType.dyn_cast(); + auto unrankedMemRefType = sourceType.dyn_cast(); + assert(rankedMemRefType || unrankedMemRefType); + unsigned memorySpace = rankedMemRefType + ? rankedMemRefType.getMemorySpaceAsInt() + : unrankedMemRefType.getMemorySpaceAsInt(); + TensorType tensorType = castOp.getResult().getType().cast(); + ArrayRef affineMaps = + rankedMemRefType && tensorType.isa() + ? rankedMemRefType.getAffineMaps() + : ArrayRef{}; + Type memRefType = getContiguousOrUnrankedMemRefType( + castOp.getResult().getType(), {}, memorySpace); + Value res = b.create(castOp.getLoc(), memRefType, + lookup(bvm, castOp.source())); + aliasInfo.insertNewBufferEquivalence(res, castOp.getResult()); + map(bvm, castOp.getResult(), res); + return success(); +} + /// DimOp tensor operand is modified inplace. This allows leaving dead /// tensors behind that will get DCE'd. static LogicalResult bufferize(OpBuilder &b, tensor::DimOp dimOp, @@ -1635,6 +1676,21 @@ static LogicalResult bufferize(OpBuilder &b, FuncOp funcOp, return success(); } +/// InitTensor always allocates. +/// TODO: consider hoisting across function boundaries prior to bufferization. +static LogicalResult bufferize(OpBuilder &b, InitTensorOp initTensorOp, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo) { + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(initTensorOp); + + Value alloc = createNewAllocDeallocPairForShapedValue( + b, initTensorOp->getLoc(), initTensorOp.result(), aliasInfo); + map(bvm, initTensorOp.result(), alloc); + return success(); +} + /// ReturnOp always creates memref::TensorLoadOp. static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp, BlockAndValueMapping &bvm, @@ -2070,16 +2126,18 @@ static LogicalResult bufferizeFuncOpInternals( // Since walk has to be PreOrder, we need to erase ops that require it // separately: this is the case for CallOp SmallVector toErase; - WalkResult result = funcOp.walk([&](Operation *op) - -> WalkResult { - // clang-format off + WalkResult result = + funcOp.walk([&](Operation *op) -> WalkResult { + // clang-format off WalkResult result = TypeSwitch(op) // Skip BufferCast and TensorLoad ops. .Case([&](auto) { return success(); }) - .Case(op)) - if (llvm::any_of(op->getOperandTypes(), isaTensor) || - llvm::any_of(op->getResultTypes(), isaTensor)) - toErase.push_back(op); + // Register post-walk erasure, if necessary. + if (isa(op)) + if (llvm::any_of(op->getOperandTypes(), isaTensor) || + llvm::any_of(op->getResultTypes(), isaTensor)) + toErase.push_back(op); - return result; - }); + return result; + }); LDBG("End BufferizeFuncOpInternals:\n" << funcOp << '\n'); for (Operation *op : toErase) diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir index 7756587560ead..b71f6f92d51ed 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir @@ -58,3 +58,73 @@ func @bar( // CHECK-NEXT: return return %r0#0, %r0#1: tensor, tensor } + +// ----- + +// CHECK-DAG: #[[$DYN_0D_MAP:.*]] = affine_map<()[s0] -> (s0)> +// CHECK-DAG: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK: func @init_and_dot( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]> +// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, #[[$DYN_1D_MAP]]> +// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref +func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor) -> tensor { + // CHECK-NEXT: %[[C0:.*]] = constant 0{{.*}} : f32 + %v0 = constant 0.0 : f32 + + // CHECK-NEXT: linalg.fill(%[[C0]], %[[C]]) : f32, memref + %d = linalg.fill(%v0, %c) : f32, tensor -> tensor + + // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, #[[$DYN_1D_MAP]]>, memref<64xf32, #[[$DYN_1D_MAP]]>) outs(%[[C]] : memref) + %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>) + outs(%d: tensor) -> tensor + + // CHECK-NEXT: return + return %e : tensor +} + +// CHECK: func @main() +func @main() { + // CHECK-DAG: %[[C0:.*]] = constant 0{{.*}} : f32 + // CHECK-DAG: %[[C1:.*]] = constant 1{{.*}} : f32 + // CHECK-DAG: %[[C2:.*]] = constant 2{{.*}} : f32 + %v0 = constant 0.0 : f32 + %v1 = constant 1.0 : f32 + %v2 = constant 2.0 : f32 + + // CHECK-NEXT: %[[A:.*]] = memref.alloc() : memref<64xf32> + // CHECK-NEXT: %[[B:.*]] = memref.alloc() : memref<64xf32> + // CHECK-NEXT: %[[C:.*]] = memref.alloc() : memref + %A = linalg.init_tensor [64] : tensor<64xf32> + %B = linalg.init_tensor [64] : tensor<64xf32> + %C = linalg.init_tensor [] : tensor + + // CHECK-NEXT: linalg.fill(%[[C1]], %[[A]]) : f32, memref<64xf32> + // CHECK-NEXT: linalg.fill(%[[C2]], %[[B]]) : f32, memref<64xf32> + // CHECK-NEXT: linalg.fill(%[[C0]], %[[C]]) : f32, memref + %AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32> + %BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32> + %CC = linalg.fill(%v0, %C) : f32, tensor -> tensor + + // CHECK-NEXT: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> + // CHECK-NEXT: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> + // CHECK-NEXT: %[[cC:.*]] = memref.cast %[[C]] : memref to memref + // CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]]) + %res = call @init_and_dot(%AA, %BB, %CC) : + (tensor<64xf32>, tensor<64xf32>, tensor) -> tensor + + // CHECK-NEXT: %[[dC:.*]] = memref.cast %[[C]] : memref to memref<*xf32> + %res2 = tensor.cast %res: tensor to tensor<*xf32> + + // CHECK-NEXT: call @print_memref_f32(%[[dC]]) : (memref<*xf32>) -> () + call @print_memref_f32(%res2) : (tensor<*xf32>) -> () + + // CHECK-DAG: memref.dealloc %[[A]] : memref<64xf32> + // CHECK-DAG: memref.dealloc %[[B]] : memref<64xf32> + // CHECK-DAG: memref.dealloc %[[C]] : memref + // CHECK-NEXT: return + return +} + +// CHECK: func private @print_memref_f32(memref<*xf32>) +func private @print_memref_f32(tensor<*xf32>) diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir new file mode 100644 index 0000000000000..7a4e134e498f8 --- /dev/null +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir @@ -0,0 +1,44 @@ +// RUN: mlir-opt %s -canonicalize -cse -linalg-comprehensive-module-bufferize |\ +// RUN: mlir-opt -convert-vector-to-scf -lower-affine -convert-linalg-to-loops |\ +// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ + +// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext |\ +// RUN: FileCheck %s + +func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor) -> tensor { + %v0 = constant 0.0 : f32 + + %d = linalg.fill(%v0, %c) : f32, tensor -> tensor + + %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>) + outs(%d: tensor) -> tensor + + return %e : tensor +} + +func @main() { + %v0 = constant 0.0 : f32 + %v1 = constant 1.0 : f32 + %v2 = constant 2.0 : f32 + + %A = linalg.init_tensor [64] : tensor<64xf32> + %B = linalg.init_tensor [64] : tensor<64xf32> + %C = linalg.init_tensor [] : tensor + %AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32> + %BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32> + %CC = linalg.fill(%v0, %C) : f32, tensor -> tensor + + %res = call @init_and_dot(%AA, %BB, %CC) : + (tensor<64xf32>, tensor<64xf32>, tensor) -> tensor + + %res2 = tensor.cast %res: tensor to tensor<*xf32> + +// CHECK: Unranked Memref base@ = {{.*}} rank = 0 offset = 0 sizes = [] strides = [] data = +// CHECK-NEXT: [128] + call @print_memref_f32(%res2) : (tensor<*xf32>) -> () + + return +} + +func private @print_memref_f32(tensor<*xf32>) attributes { llvm.emit_c_interface } From d21a35ac0a958fd4cff0b8f424a2706b8785b89d Mon Sep 17 00:00:00 2001 From: Hussain Kadhem Date: Thu, 1 Jul 2021 11:30:49 +0200 Subject: [PATCH 407/619] [VP] Implementation of intrinsic and SDNode definitions for VP load, store, gather, scatter. This patch adds intrinsic definitions and SDNodes for predicated load/store/gather/scatter, based on the work done in D57504. Reviewed By: simoll, craig.topper Differential Revision: https://reviews.llvm.org/D99355 --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 57 +++++++------ llvm/include/llvm/IR/IntrinsicInst.h | 21 ++++- llvm/include/llvm/IR/Intrinsics.td | 26 ++++++ llvm/include/llvm/IR/VPIntrinsics.def | 43 ++++++++++ llvm/lib/IR/IntrinsicInst.cpp | 82 +++++++++++++++++-- llvm/unittests/IR/VPIntrinsicTest.cpp | 5 ++ 6 files changed, 198 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index dba39a361a68b..929bcb4bd5f10 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1367,33 +1367,36 @@ class MemSDNode : public SDNode { static bool classof(const SDNode *N) { // For some targets, we lower some target intrinsics to a MemIntrinsicNode // with either an intrinsic or a target opcode. - return N->getOpcode() == ISD::LOAD || - N->getOpcode() == ISD::STORE || - N->getOpcode() == ISD::PREFETCH || - N->getOpcode() == ISD::ATOMIC_CMP_SWAP || - N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS || - N->getOpcode() == ISD::ATOMIC_SWAP || - N->getOpcode() == ISD::ATOMIC_LOAD_ADD || - N->getOpcode() == ISD::ATOMIC_LOAD_SUB || - N->getOpcode() == ISD::ATOMIC_LOAD_AND || - N->getOpcode() == ISD::ATOMIC_LOAD_CLR || - N->getOpcode() == ISD::ATOMIC_LOAD_OR || - N->getOpcode() == ISD::ATOMIC_LOAD_XOR || - N->getOpcode() == ISD::ATOMIC_LOAD_NAND || - N->getOpcode() == ISD::ATOMIC_LOAD_MIN || - N->getOpcode() == ISD::ATOMIC_LOAD_MAX || - N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || - N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || - N->getOpcode() == ISD::ATOMIC_LOAD_FADD || - N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || - N->getOpcode() == ISD::ATOMIC_LOAD || - N->getOpcode() == ISD::ATOMIC_STORE || - N->getOpcode() == ISD::MLOAD || - N->getOpcode() == ISD::MSTORE || - N->getOpcode() == ISD::MGATHER || - N->getOpcode() == ISD::MSCATTER || - N->isMemIntrinsic() || - N->isTargetMemoryOpcode(); + switch (N->getOpcode()) { + case ISD::LOAD: + case ISD::STORE: + case ISD::PREFETCH: + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_FADD: + case ISD::ATOMIC_LOAD_FSUB: + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: + case ISD::MLOAD: + case ISD::MSTORE: + case ISD::MGATHER: + case ISD::MSCATTER: + return true; + default: + return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); + } } }; diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 606c2b570c1b7..2dbbda2aa7dfd 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -403,21 +403,34 @@ class VPIntrinsic : public IntrinsicInst { // Whether \p ID is a VP intrinsic ID. static bool isVPIntrinsic(Intrinsic::ID); - /// \return the mask parameter or nullptr. + /// \return The mask parameter or nullptr. Value *getMaskParam() const; void setMaskParam(Value *); - /// \return the vector length parameter or nullptr. + /// \return The vector length parameter or nullptr. Value *getVectorLengthParam() const; void setVectorLengthParam(Value *); - /// \return whether the vector length param can be ignored. + /// \return Whether the vector length param can be ignored. bool canIgnoreVectorLengthParam() const; - /// \return the static element count (vector number of elements) the vector + /// \return The static element count (vector number of elements) the vector /// length parameter applies to. ElementCount getStaticVectorLength() const; + /// \return The alignment of the pointer used by this load/store/gather or + /// scatter. + MaybeAlign getPointerAlignment() const; + // MaybeAlign setPointerAlignment(Align NewAlign); // TODO + + /// \return The pointer operand of this load,store, gather or scatter. + Value *getMemoryPointerParam() const; + static Optional getMemoryPointerParamPos(Intrinsic::ID); + + /// \return The data (payload) operand of this store or scatter. + Value *getMemoryDataParam() const; + static Optional getMemoryDataParamPos(Intrinsic::ID); + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { return isVPIntrinsic(I->getIntrinsicID()); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index c7bdd86d82f83..975a109526353 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1368,6 +1368,32 @@ def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; //===---------------- Vector Predication Intrinsics --------------===// +// Memory Intrinsics +def int_vp_store : DefaultAttrsIntrinsic<[], + [ llvm_anyvector_ty, + LLVMAnyPointerType>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ NoCapture>, IntrNoSync, IntrWriteMem, IntrArgMemOnly, IntrWillReturn ]>; + +def int_vp_load : DefaultAttrsIntrinsic<[ llvm_anyvector_ty], + [ LLVMAnyPointerType>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; + +def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty], + [ LLVMVectorOfAnyPointersToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrReadMem, IntrNoSync, IntrWillReturn, IntrArgMemOnly ]>; + +def int_vp_scatter: DefaultAttrsIntrinsic<[], + [ llvm_anyvector_ty, + LLVMVectorOfAnyPointersToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrArgMemOnly, IntrNoSync, IntrWillReturn ]>; // TODO allow IntrNoCapture for vectors of pointers // Speculatable Binary operators let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index f6a283c800eb9..92e2cd3a27830 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -100,6 +100,17 @@ END_REGISTER_VP_SDNODE(SDOPC) #define HANDLE_VP_TO_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID) #endif +// Map this VP intrinsic to its canonical functional intrinsic. +#ifndef HANDLE_VP_TO_INTRIN +#define HANDLE_VP_TO_INTRIN(ID) +#endif + +// This VP Intrinsic is a memory operation +// The pointer arg is at POINTERPOS and the data arg is at DATAPOS. +#ifndef HANDLE_VP_IS_MEMOP +#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) +#endif + /// } Property Macros ///// Integer Arithmetic { @@ -191,6 +202,36 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem) ///// } Floating-Point Arithmetic +///// Memory Operations { +// llvm.vp.store(ptr,val,mask,vlen) +BEGIN_REGISTER_VP(vp_store, 2, 3, VP_STORE, 0) +HANDLE_VP_TO_OPC(Store) +HANDLE_VP_TO_INTRIN(masked_store) +HANDLE_VP_IS_MEMOP(vp_store, 1, 0) +END_REGISTER_VP(vp_store, VP_STORE) + +// llvm.vp.scatter(ptr,val,mask,vlen) +BEGIN_REGISTER_VP(vp_scatter, 2, 3, VP_SCATTER, 0) +HANDLE_VP_TO_INTRIN(masked_scatter) +HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0) +END_REGISTER_VP(vp_scatter, VP_SCATTER) + +// llvm.vp.load(ptr,mask,vlen) +BEGIN_REGISTER_VP(vp_load, 1, 2, VP_LOAD, -1) +HANDLE_VP_TO_OPC(Load) +HANDLE_VP_TO_INTRIN(masked_load) +HANDLE_VP_IS_MEMOP(vp_load, 0, None) +END_REGISTER_VP(vp_load, VP_LOAD) + +// llvm.vp.gather(ptr,mask,vlen) +BEGIN_REGISTER_VP(vp_gather, 1, 2, VP_GATHER, -1) +HANDLE_VP_TO_INTRIN(masked_gather) +HANDLE_VP_IS_MEMOP(vp_gather, 0, None) +END_REGISTER_VP(vp_gather, VP_GATHER) + +///// } Memory Operations + + #undef BEGIN_REGISTER_VP #undef BEGIN_REGISTER_VP_INTRINSIC #undef BEGIN_REGISTER_VP_SDNODE @@ -199,3 +240,5 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem) #undef END_REGISTER_VP_SDNODE #undef HANDLE_VP_TO_OPC #undef HANDLE_VP_TO_CONSTRAINEDFP +#undef HANDLE_VP_TO_INTRIN +#undef HANDLE_VP_IS_MEMOP diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 776590af9a32e..19942fa187fd2 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -1,4 +1,4 @@ -//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers ---------------===// +//===-- IntrinsicInst.cpp - Intrinsic Instruction Wrappers ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -340,6 +340,53 @@ VPIntrinsic::getVectorLengthParamPos(Intrinsic::ID IntrinsicID) { } } +/// \return the alignment of the pointer used by this load/store/gather or +/// scatter. +MaybeAlign VPIntrinsic::getPointerAlignment() const { + Optional PtrParamOpt = getMemoryPointerParamPos(getIntrinsicID()); + assert(PtrParamOpt.hasValue() && "no pointer argument!"); + return getParamAlign(PtrParamOpt.getValue()); +} + +/// \return The pointer operand of this load,store, gather or scatter. +Value *VPIntrinsic::getMemoryPointerParam() const { + if (auto PtrParamOpt = getMemoryPointerParamPos(getIntrinsicID())) + return getArgOperand(PtrParamOpt.getValue()); + return nullptr; +} + +Optional VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) { + switch (VPID) { + default: + return None; + +#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) \ + case Intrinsic::VPID: \ + return POINTERPOS; +#include "llvm/IR/VPIntrinsics.def" + } +} + +/// \return The data (payload) operand of this store or scatter. +Value *VPIntrinsic::getMemoryDataParam() const { + auto DataParamOpt = getMemoryDataParamPos(getIntrinsicID()); + if (!DataParamOpt.hasValue()) + return nullptr; + return getArgOperand(DataParamOpt.getValue()); +} + +Optional VPIntrinsic::getMemoryDataParamPos(Intrinsic::ID VPID) { + switch (VPID) { + default: + return None; + +#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) \ + case Intrinsic::VPID: \ + return DATAPOS; +#include "llvm/IR/VPIntrinsics.def" + } +} + bool VPIntrinsic::isVPIntrinsic(Intrinsic::ID ID) { switch (ID) { default: @@ -424,10 +471,35 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const { Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID, ArrayRef Params) { assert(isVPIntrinsic(VPID) && "not a VP intrinsic"); - - // TODO: Extend this for other VP intrinsics as they are upstreamed. This - // works for binary arithmetic VP intrinsics. - auto *VPFunc = Intrinsic::getDeclaration(M, VPID, Params[0]->getType()); + Function *VPFunc; + switch (VPID) { + default: + VPFunc = Intrinsic::getDeclaration(M, VPID, Params[0]->getType()); + break; + case Intrinsic::vp_load: + VPFunc = Intrinsic::getDeclaration( + M, VPID, + {Params[0]->getType()->getPointerElementType(), Params[0]->getType()}); + break; + case Intrinsic::vp_gather: + VPFunc = Intrinsic::getDeclaration( + M, VPID, + {VectorType::get(cast(Params[0]->getType()) + ->getElementType() + ->getPointerElementType(), + cast(Params[0]->getType())), + Params[0]->getType()}); + break; + case Intrinsic::vp_store: + VPFunc = Intrinsic::getDeclaration( + M, VPID, + {Params[1]->getType()->getPointerElementType(), Params[1]->getType()}); + break; + case Intrinsic::vp_scatter: + VPFunc = Intrinsic::getDeclaration( + M, VPID, {Params[0]->getType(), Params[1]->getType()}); + break; + } assert(VPFunc && "Could not declare VP intrinsic"); return VPFunc; } diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp index 33fc647561af0..f5ff54b55cc62 100644 --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -46,6 +46,11 @@ class VPIntrinsicTest : public testing::Test { Str << " declare <8 x float> @llvm.vp." << BinaryFPOpcode << ".v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) "; + Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) "; + Str << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, <8 x i1>, i32) "; + Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x i1>, i32) "; + Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x i1>, i32) "; + return parseAssemblyString(Str.str(), Err, C); } }; From 55c274d7d30eb4de129a70bf48a063e740b71c9c Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Wed, 30 Jun 2021 21:10:26 +0000 Subject: [PATCH 408/619] [mlir][Linalg] Drop comprehensive-func-bufferize (12/n) This revision drops the comprehensive bufferization Function pass, which has issues when trying to bufferize constants. Instead, only support the comprehensive-module-bufferize by default. Differential Revision: https://reviews.llvm.org/D105228 --- mlir/include/mlir/Dialect/Linalg/Passes.h | 11 +- mlir/include/mlir/Dialect/Linalg/Passes.td | 25 +- .../Transforms/ComprehensiveBufferize.cpp | 119 ++--- ...nsive-func-bufferize-analysis-invalid.mlir | 26 - ...comprehensive-func-bufferize-analysis.mlir | 474 ----------------- .../Linalg/comprehensive-func-bufferize.mlir | 353 ------------- ...mprehensive-module-bufferize-analysis.mlir | 478 ++++++++++++++++++ ...omprehensive-module-bufferize-invalid.mlir | 41 ++ .../comprehensive-module-bufferize.mlir | 350 +++++++++++++ 9 files changed, 921 insertions(+), 956 deletions(-) delete mode 100644 mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis-invalid.mlir delete mode 100644 mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir delete mode 100644 mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index d80eb9a0652de..27bb50d5a2f2c 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -56,18 +56,13 @@ std::unique_ptr> createConvertLinalgToParallelLoopsPass(); /// Placeholder for now, this is NYI. std::unique_ptr> createConvertLinalgToAffineLoopsPass(); -/// Create a pass that bufferizes the body of a FuncOp and tries to reuse the -/// buffers for those arguments that: -/// a) have been annotated 'inplaceable' and -/// b) whose buffer uses would be free of memory hazards. -std::unique_ptr createLinalgComprehensiveFuncBufferizePass(); - /// This pass implements a cross-dialect bufferization approach and performs an /// analysis to determine which op operands and results may be bufferized in the /// same buffers. The analysis is performed on topologically sorted CallOp and /// FuncOp within a module. It provides analyses and bufferization across -/// function boundaries. Within a single function body, the bufferization used -/// is that provided by `LinalgComprehensiveFuncBufferizePass`. +/// function boundaries. Within a function boundary, the analysis is performed +/// on SSA use-def chains starting from function operands that are annotated +/// with the 'inplaceable' attribute. std::unique_ptr createLinalgComprehensiveModuleBufferizePass(); /// Create a pass to convert Linalg operations which work on tensors to use diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 3d9833061a090..c638294b12109 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -22,26 +22,6 @@ def ConvertElementwiseToLinalg : FunctionPass<"convert-elementwise-to-linalg"> { let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"]; } -def LinalgComprehensiveFuncBufferize : - FunctionPass<"linalg-comprehensive-func-bufferize"> { - let summary = "Bufferize (tensor into memref) the body of a FuncOp and try " - "to reuse the buffers for those arguments that " - "a) have been annotated 'inplaceable' and " - "b) whose buffer uses would be free of memory hazards"; - let description = [{ - This pass implements a cross-dialect bufferization approach and performs an - analysis to determine which op operands and results may be bufferized in the - same buffers. The analysis is performed on SSA use-def chains starting from - function operands that are annotated with the 'inplaceable' attribute. - }]; - let options = [ - Option<"testAnalysisOnly", "test-analysis-only", "bool", - /*default=*/"false", - "Only runs inplaceability analysis (for testing purposes only)"> - ]; - let constructor = "mlir::createLinalgComprehensiveFuncBufferizePass()"; -} - def LinalgComprehensiveModuleBufferize : Pass<"linalg-comprehensive-module-bufferize", "ModuleOp"> { let summary = "Bufferize (tensor into memref) for a Module."; @@ -50,8 +30,9 @@ def LinalgComprehensiveModuleBufferize : analysis to determine which op operands and results may be bufferized in the same buffers. The analysis is performed on topologically sorted CallOp and FuncOp within a module. It provides analyses and bufferization across - function boundaries. Within a single function body, the bufferization used - is that provided by `-linalg-comprehensive-func-bufferize`. + function boundaries. Within a function boundary, the analysis is performed + on SSA use-def chains starting from function operands that are annotated + with the 'inplaceable' attribute. }]; let options = [ Option<"testAnalysisOnly", "test-analysis-only", "bool", diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index 03191a85e506c..dec08dfd4da2c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -979,10 +979,10 @@ bool BufferizationAliasInfo::isSourceEquivalentToAMatchingExtractSliceOp( /// Apply `fun` to all the members of the equivalence class of `v`. void BufferizationAliasInfo::applyOnEquivalenceClass( Value v, function_ref fun) const { - for (auto it = equivalentInfo.findLeader(v), - eit = equivalentInfo.member_end(); - it != eit; ++it) { - fun(v); + auto leaderIt = equivalentInfo.findLeader(v); + for (auto mit = leaderIt, meit = equivalentInfo.member_end(); mit != meit; + ++mit) { + fun(mit->v); } } @@ -1485,9 +1485,8 @@ bufferize(OpBuilder &b, CallOpInterface callOp, BlockAndValueMapping &bvm, getEquivalentEnclosingFuncBBArg(returnVal, aliasInfo)) { Value oldRes = callOp->getResult(returnOperand.getOperandNumber()); int64_t idx = bbArg.getArgNumber(); - Value buffer = bvm.lookupOrNull(callOp->getOperand(idx)); - if (!buffer) - return callOp->emitError() << "operand #" << idx << " not bufferized"; + Value buffer = lookup(bvm, callOp->getOperand(idx)); + assert(buffer && "expected bufferized value"); // Add CallOp operand/result equivalence: this is interprocedural info. aliasInfo.insertNewBufferEquivalence(oldRes, buffer); map(bvm, oldRes, buffer); @@ -1504,11 +1503,11 @@ bufferize(OpBuilder &b, CallOpInterface callOp, BlockAndValueMapping &bvm, continue; } - // TODO: Need to hoist above function boundary and add to - // `hoistedArgumentTypes`. - if (Operation *allocOp = getEquivalentAlloc(returnVal, aliasInfo)) - return allocOp->emitError() - << " needs hoist across function boundary\n"; + // TODO: Need to hoist above function boundary. + if (Operation *allocOp = getEquivalentAlloc(returnVal, aliasInfo)) { + hoistedArguments.push_back(allocOp->getResult(0)); + continue; + } // Other cases legitimately need to return a tensor, this is currently not // supported. For instance, if hoisting across function boundary has @@ -1518,13 +1517,14 @@ bufferize(OpBuilder &b, CallOpInterface callOp, BlockAndValueMapping &bvm, int64_t returnIdx = returnOperand.getOperandNumber(); return returnOp->emitError() - << " bufferize result #" << returnIdx << "\n"; + << "buffer result #" << returnIdx << " not produced by an alloc\n"; } } // 2. Compute bufferized FunctionType. SmallVector argumentTypes{callOp->getOperandTypes()}; - llvm::append_range(argumentTypes, ValueRange{hoistedArguments}.getTypes()); + ValueRange hoistedArgs{hoistedArguments}; + llvm::append_range(argumentTypes, hoistedArgs.getTypes()); // Get the bufferized FunctionType for funcOp or construct it if not yet // available. FunctionType bufferizedFuncType = getOrCreateBufferizedFunctionType( @@ -1543,8 +1543,8 @@ bufferize(OpBuilder &b, CallOpInterface callOp, BlockAndValueMapping &bvm, // Tensor operands are guaranteed to have been buferized. int64_t idx = opOperand.getOperandNumber(); - Value buffer = bvm.lookupOrNull(tensorOperand); - assert(buffer && " missing buffer for operand"); + Value buffer = lookup(bvm, tensorOperand); + assert(buffer && "expected bufferized value"); // Caller / callee type mistmatch is handled with a CastOp. auto memRefType = bufferizedFuncType.getInput(idx); @@ -1592,7 +1592,7 @@ static LogicalResult bufferize(OpBuilder &b, tensor::CastOp castOp, ? rankedMemRefType.getAffineMaps() : ArrayRef{}; Type memRefType = getContiguousOrUnrankedMemRefType( - castOp.getResult().getType(), {}, memorySpace); + castOp.getResult().getType(), affineMaps, memorySpace); Value res = b.create(castOp.getLoc(), memRefType, lookup(bvm, castOp.source())); aliasInfo.insertNewBufferEquivalence(res, castOp.getResult()); @@ -2176,64 +2176,21 @@ static LogicalResult bufferizeFuncOpInternals( return failure(result.wasInterrupted()); } -namespace { -struct LinalgComprehensiveFuncBufferize - : public LinalgComprehensiveFuncBufferizeBase< - LinalgComprehensiveFuncBufferize> { - void runOnFunction() override; - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } -}; -} // end namespace - -void LinalgComprehensiveFuncBufferize::runOnFunction() { - auto funcOp = getFunction(); - - // Analysis phase. - DominanceInfo domInfo(funcOp); - BufferizationAliasInfo aliasInfo(funcOp); - // If the analysis fails, just return. This is expected to reset the IR and no - // single OpResult should be marked inPlace. - if (failed(inPlaceAnalysisFuncOpBody(funcOp, aliasInfo, domInfo))) { - signalPassFailure(); - return; - } - - if (testAnalysisOnly) - return; - - // Bufferization phase. - BlockAndValueMapping bvm; - DenseMap bufferizedFunctionTypes; - if (failed(bufferizeFuncOpInternals(funcOp, bvm, aliasInfo, - bufferizedFunctionTypes))) - signalPassFailure(); - - // Post-pass cleanup of inplaceable attributes. - funcOp.walk([&](Operation *op) { op->removeAttr(kInPlaceResultsAttrName); }); -} - -std::unique_ptr mlir::createLinalgComprehensiveFuncBufferizePass() { - return std::make_unique(); -} - //===----------------------------------------------------------------------===// // Bufferization entry-point for modules. //===----------------------------------------------------------------------===// -/// Return the op with Allocate MemoryEffect if `v` is equivalent to an such +/// Return the op with Allocate MemoryEffect if `v` is equivalent to such an /// an op. Return null otherwise. static Operation *getEquivalentAlloc(Value value, const BufferizationAliasInfo &aliasInfo) { - Operation *res; + Operation *res = nullptr; aliasInfo.applyOnEquivalenceClass(value, [&](Value v) { if (!res) if (auto interface = dyn_cast_or_null(v.getDefiningOp())) if (auto effect = - interface.getEffectOnValue(value)) + interface.getEffectOnValue(v)) res = v.getDefiningOp(); }); return res; @@ -2249,9 +2206,12 @@ getEquivalentEnclosingFuncBBArg(Value v, if (!funcOp) funcOp = op->getParentOfType(); assert(funcOp && "expected non-null FuncOp"); - for (BlockArgument bbArg : funcOp.getArguments()) + for (BlockArgument bbArg : funcOp.getArguments()) { + if (!bbArg.getType().isa()) + continue; if (aliasInfo.areEquivalentBufferizedValues(v, bbArg)) return bbArg; + } return nullptr; } @@ -2292,9 +2252,6 @@ static LogicalResult bufferizeFuncOpBoundary( // externally). // -> Figure out a better layering. TypeRange resultTypes; - FunctionType bufferizedFuncType = - getOrCreateBufferizedFunctionType(funcOp, funcOp.getType().getInputs(), - resultTypes, bufferizedFunctionTypes); // Corner case: Bodiless FuncOp // ============================ @@ -2305,6 +2262,9 @@ static LogicalResult bufferizeFuncOpBoundary( if (llvm::any_of(funcOp.getType().getResults(), isaTensor)) return funcOp->emitError() << "cannot bufferize bodiless function that " << "returns a tensor"; + FunctionType bufferizedFuncType = + getOrCreateBufferizedFunctionType(funcOp, funcOp.getType().getInputs(), + TypeRange{}, bufferizedFunctionTypes); funcOp.setType(bufferizedFuncType); LLVM_DEBUG(DBGS() << "End bufferizeFuncOpBoundary no fun body: " << funcOp); return success(); @@ -2323,16 +2283,29 @@ static LogicalResult bufferizeFuncOpBoundary( Value returnVal = returnOperand.get(); if (getEquivalentEnclosingFuncBBArg(returnVal, aliasInfo)) continue; - // TODO: Need to hoist above function boundary. If this is not possible due - // to data-depedent sizes, we need a better type than memref. - if (Operation *allocOp = getEquivalentAlloc(returnVal, aliasInfo)) - return allocOp->emitError() << " needs hoist across function boundary\n"; + + // TODO: Need to hoist above function boundary. + if (Operation *allocOp = getEquivalentAlloc(returnVal, aliasInfo)) { + returnValues.push_back(allocOp->getResult(0)); + continue; + } + + // Other cases legitimately need to return a tensor, this is currently not + // supported. For instance, if hoisting across function boundary has + // failed, it may be due to e.g. data-dependent sizes. In such a case, we + // would need a better type than memref. int64_t returnIdx = returnOperand.getOperandNumber(); - return returnOp->emitError() << " bufferize result #" << returnIdx << "\n"; + return returnOp->emitError() + << "buffer result #" << returnIdx << " not produced by an alloc\n"; } // 2. Rewrite the terminator without the inPlace bufferizable values. - OpBuilder(returnOp).create(returnOp.getLoc(), returnValues); + ValueRange retValues{returnValues}; + FunctionType bufferizedFuncType = getOrCreateBufferizedFunctionType( + funcOp, funcOp.getType().getInputs(), retValues.getTypes(), + bufferizedFunctionTypes); + OpBuilder b(returnOp); + b.create(returnOp.getLoc(), returnValues); returnOp->erase(); // 3. Rewrite the bbArgs. diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis-invalid.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis-invalid.mlir deleted file mode 100644 index 41e698f97c873..0000000000000 --- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis-invalid.mlir +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: mlir-opt %s -linalg-comprehensive-func-bufferize=test-analysis-only -split-input-file -verify-diagnostics - -// ----- - -func @scf_for(%A : tensor, - %B : tensor {linalg.inplaceable = true}, - %C : tensor<4xf32>, - %lb : index, %ub : index, %step : index) - -> (tensor, tensor) -{ - %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) - -> (tensor, tensor) - { - %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor - %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor - - // Throw a wrench in the system by swapping yielded values: this result in a - // ping-pong of values at each iteration on which we currently want to fail. - - // expected-error @+1 {{Yield operand #1 does not bufferize to an equivalent buffer}} - scf.yield %ttB, %ttA : tensor, tensor - } - - return %r0#0, %r0#1: tensor, tensor -} - diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir deleted file mode 100644 index 5234d85b0b5b1..0000000000000 --- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir +++ /dev/null @@ -1,474 +0,0 @@ -// RUN: mlir-opt %s -linalg-comprehensive-func-bufferize=test-analysis-only -split-input-file | FileCheck %s - -//===----------------------------------------------------------------------===// -// Simple cases -//===----------------------------------------------------------------------===// - -// ----- - -// CHECK-LABEL: func @extract_slice_fun -func @extract_slice_fun(%A : tensor, %B : tensor {linalg.inplaceable = true}) - -> (tensor<4xf32>, tensor<8xf32>) -{ - // tensor.extract_slice is not used in a write, it is not compelled to - // bufferize out of place. Let callers decide whether they want to create - // aliasing subviews at all call sites or whether they allocate. - // This is true irrespective of whether the function argument is inplaceable. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> - - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r1 = tensor.extract_slice %B[0][8][1] : tensor to tensor<8xf32> - - return %r0, %r1: tensor<4xf32>, tensor<8xf32> -} - -// ----- - -// CHECK-LABEL: func @insert_slice_fun -func @insert_slice_fun( - %A : tensor, - %B : tensor {linalg.inplaceable = true}, - %C : tensor<4xf32>) - -> (tensor, tensor) -{ - // must bufferize out of place. - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor - - // bufferizes inplace. - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor - - return %r0, %r1: tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @conflict_on_B -func @conflict_on_B( - %A : tensor<4x4xf32> {linalg.inplaceable = true}, - %B : tensor<4x4xf32> {linalg.inplaceable = true}) - -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>) -{ - // matmul output operand interferes with input operand. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) - -> tensor<4x4xf32> - - // matmul output operand interferes with input operand. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) - -> tensor<4x4xf32> - - // matmul output operand does not interferes with input operand. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) - -> tensor<4x4xf32> - - return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32> -} - -//===----------------------------------------------------------------------===// -// Length-1 producer-consumer cases. -//===----------------------------------------------------------------------===// - -// ----- - -// CHECK-LABEL: func @extract_slice_extract_slice -func @extract_slice_extract_slice( - %A : tensor {linalg.inplaceable = true}, %B : tensor) - -> (tensor<2xf32>, tensor<2xf32>) -{ - // tensor.extract_slice is not used in a write, it is not compelled to - // bufferize out of place. Let callers decide whether they want to create - // aliasing subviews at all call sites or whether they allocate. - // This is true irrespective of whether the function argument is inplaceable. - // CHECK: {__inplace_results_attr__ = ["true"]} - %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> - - // CHECK: {__inplace_results_attr__ = ["true"]} - %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> - - // CHECK: {__inplace_results_attr__ = ["true"]} - %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> - - // CHECK: {__inplace_results_attr__ = ["true"]} - %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> - - return %r1, %r3: tensor<2xf32>, tensor<2xf32> -} - -// ----- - -// CHECK-LABEL: func @insert_slice_insert_slice -func @insert_slice_insert_slice( - %A : tensor {linalg.inplaceable = true}, - %A2 : tensor<4xf32> {linalg.inplaceable = true}, - %A3 : tensor<2xf32> {linalg.inplaceable = true}, - %B : tensor, %B2 : tensor<4xf32>, %B3 : tensor<2xf32>) - -> (tensor, tensor) -{ - // CHECK: {__inplace_results_attr__ = ["true"]} - %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> - - // CHECK: {__inplace_results_attr__ = ["true"]} - %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor - - // CHECK: {__inplace_results_attr__ = ["false"]} - %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> - - // CHECK: {__inplace_results_attr__ = ["false"]} - %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor - - return %r1, %r3: tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice -func @extract_slice_nonmatching_insert_slice( - %A : tensor {linalg.inplaceable = true}, - %B : tensor, %idx: index) - -> (tensor, tensor) -{ - // %r1 bufferizes inplace because %A is inplaceable. - // %r0 is an overlapping tensor.extract_slice that does not match, it must be - // out of place. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> - - // %r1 can bufferize inplace fine. - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor - - // %r3 does bufferizes inplace because %B is not inplaceable. - // %r0 is an overlapping tensor.extract_slice that does not match, but does - // not alias with the buffer coming from %r3 so it can actually bufferize - // inplace. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> - - // %r3 cannot bufferize inplace since %B is not inplaceable. - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor - - return %r1, %r3: tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @extract_slice_matching_insert_slice -func @extract_slice_matching_insert_slice( - %A : tensor {linalg.inplaceable = true}, - %B : tensor) - -> (tensor, tensor) -{ - // %r1 bufferizes inplace because %A is inplaceable. - // %r0 is a tensor.extract_slice that matches, it can also be bufferized - // inplace. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> - - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor - - // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized - // inplace. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> - - // tensor.insert_slice cannot bufferize inplace. - // This should have been captured by a canonicalization pattern and it would - // be unproductive to have special logic in bufferization to encode matching - // insert_slice(extract_slice(A), A). - // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor - - return %r1, %r3: tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @extract_slice_linalg_readonly_use -func @extract_slice_linalg_readonly_use( - %A : tensor, - %B : tensor<4x4xf32>, - %C : tensor<4x4xf32> {linalg.inplaceable = true}) - -> (tensor<4x4xf32>, tensor<4x4xf32>) -{ - // tensor.extract_slice is only used as a read, no interference irrespective - // of user's inplace status. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - - // matmul output operand is not inplaceable at the function boundary. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) - -> tensor<4x4xf32> - - // matmul output operand is inplaceable at the function boundary. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%C: tensor<4x4xf32>) - -> tensor<4x4xf32> - - return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> -} - -// ----- - -// CHECK-LABEL: func @extract_slice_to_linalg_write_use -func @extract_slice_to_linalg_write_use( - %A : tensor<4x4xf32>, - %B : tensor, - %C : tensor {linalg.inplaceable = true}) - -> (tensor<4x4xf32>, tensor<4x4xf32>) -{ - // Step 3. %sB forward propagates to a write in %D but it is not inplace. - // So this is only ever read and can bufferize inplace. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - - // Step 2. %sB has a read interference in %E, it does not bufferize inplace. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %D = linalg.matmul ins(%B, %C: tensor, tensor) - outs(%sB: tensor<4x4xf32>) - -> tensor<4x4xf32> - - // Step 4. %sC forward propagates to an inplace write in %E. - // %sC backward propagates to %C which is inplaceable. - // As a consequence this is bufferized inplace. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - - // Step 1. %sC backprops to the tensor.extract_slice producer which is not - // considered an interference. This bufferizes inplace. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%sC: tensor<4x4xf32>) - -> tensor<4x4xf32> - - return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> -} - -//===----------------------------------------------------------------------===// -// Transitive cases -//===----------------------------------------------------------------------===// - -// ----- - -// CHECK-LABEL: func @extract_slice_to_linalg_write_use -func @extract_slice_to_linalg_write_use( - %A : tensor<4x4xf32>, - %B : tensor, - %C : tensor {linalg.inplaceable = true}) - -> (tensor<4x4xf32>, tensor<4x4xf32>) -{ - // Step 4. %sB forward propagates to an inplace write in %D. - // %sB backward propagates to %B which is not inplaceable. - // As a consequence this is bufferized out of place. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - - // Step 1. %sB backprops to the tensor.extract_slice producer which is not - // considered an interference. This bufferizes inplace. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %D = linalg.matmul ins(%B, %C: tensor, tensor) - outs(%sB: tensor<4x4xf32>) - -> tensor<4x4xf32> - - // Step 3. %sC forward propagates to an inplace write in %E. - // %sC backward propagates to %C which is inplaceable. - // As a consequence this is bufferized inplace. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - - // Step 1. %sC backprops to the tensor.extract_slice producer which is not - // considered an interference. This bufferizes inplace. - // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%sC: tensor<4x4xf32>) - -> tensor<4x4xf32> - - return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> -} - -// ----- - -// CHECK-LABEL: func @nested_extract_slice_and_insert -func @nested_extract_slice_and_insert( - %A : tensor, - %B : tensor {linalg.inplaceable = true}, - %C : tensor {linalg.inplaceable = true}, - %idx : index) - -> (tensor, tensor, tensor) -{ - %f0 = constant 0.0 : f32 - - // 2-level matching tensor.extract_slice / tensor.insert_slice into non - // inplaceable %A. - // - %rA is not inplaceable because %A is not inplaceable at function boundary. - // - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable - // - this propagates to %FA and %ssA being inplaceable. - // - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not - // inplaceable and so %sA is not inplaceable. - // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor - %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FA = linalg.fill(%f0, %ssA) : f32, tensor<4x4xf32> -> tensor<4x4xf32> - %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor - %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor - - // 3-level matching tensor.extract_slice / tensor.insert_slice into - // inplaceable %B. - // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.extract_slice - // Atm, this 2nd tensor.extract_slice fails to bufferize inplace because - // clobbering analysis conservatively test for equivalent buffers. - // TODO: This is currently too restrictive and misses clobberings. - // When available, use container-containee analysis. - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor - %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> - %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> - %FB = linalg.fill(%f0, %sssB) : f32, tensor<4x4xf32> -> tensor<4x4xf32> - %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> - %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor - %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor into tensor - - // 2-level matching tensor.extract_slice / tensor.insert_slice into - // inplaceable %C with a twist. - // Throw a wrench in the system: %rsC production sizes do not match %ssC. - // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // The tensor.insert_slice that would be candidate for matching does not actually - // match. That tensor.insert_slice can still be bufferized inplace nonetheless - // but this tensor.extract_slice, which bufferizes to an inplace write, cannot. - // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor - %ssC = tensor.extract_slice %sC[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FC = linalg.fill(%f0, %ssC) : f32, tensor<4x4xf32> -> tensor<4x4xf32> - %rsC = tensor.insert_slice %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor - %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor - - return %rA, %rB, %rC: tensor, tensor, tensor -} - -//===----------------------------------------------------------------------===// -// Simple loop cases -//===----------------------------------------------------------------------===// - -// ----- - -// CHECK-LABEL: func @scf_for_yield_only -func @scf_for_yield_only(%A : tensor, - %B : tensor {linalg.inplaceable = true}, - %lb : index, %ub : index, %step : index) - -> (tensor, tensor) -{ - // CHECK: scf.for - // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} - %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - scf.yield %t : tensor - } - - // CHECK: scf.for - // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} - %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { - scf.yield %t : tensor - } - - return %r0, %r1: tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @scf_for_with_tensor.insert_slice -func @scf_for_with_tensor.insert_slice(%A : tensor, - %B : tensor {linalg.inplaceable = true}, - %C : tensor<4xf32>, - %lb : index, %ub : index, %step : index) - -> (tensor, tensor) -{ - // CHECK: scf.for - // scf.for bbArgs are always inplaceable seen from ops inside the body: - // 1. Either the matching tensor is not inplaceable and an alloc occurs - // which makes bbArg inplaceable. - // 2. Or it is already inplaceable and so is bbArg. - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false", "true"]} - %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) - -> (tensor, tensor) - { - %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor - %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor - scf.yield %ttA, %ttB : tensor, tensor - } - - return %r0#0, %r0#1: tensor, tensor -} - diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir deleted file mode 100644 index e217a7062a94f..0000000000000 --- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir +++ /dev/null @@ -1,353 +0,0 @@ -// RUN: mlir-opt %s -linalg-comprehensive-func-bufferize -split-input-file | FileCheck %s - -// CHECK-DAG: #[[$map_2d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> - -// CHECK-LABEL: func @fill_inplace( -// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: tensor {linalg.inplaceable = true}) -func @fill_inplace(%A : tensor {linalg.inplaceable = true}) -> tensor { - // CHECK: %[[I:.*]] = memref.buffer_cast %[[A]] : memref - - // CHECK: %[[F0:.*]] = constant 0.000000e+00 : f32 - %f0 = constant 0.0 : f32 - - /// Inplaceable, no alloc - // CHECK-NOT: alloc - // CHECK: linalg.fill(%[[F0]], %[[I]]) : f32, memref - %r = linalg.fill(%f0, %A) : f32, tensor -> tensor - - // CHECK: %[[R:.*]] = memref.tensor_load %[[I]] : memref - // CHECK: return %[[R]] : tensor - return %r: tensor -} - -// ----- - -// CHECK-DAG: #[[$map_2d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> - -/// No linalg.inplaceable flag, must allocate. -// CHECK-LABEL: func @not_inplace( -// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: tensor) -func @not_inplace(%A : tensor) -> tensor { - // CHECK: %[[I:.*]] = memref.buffer_cast %[[A]] : memref - - // CHECK: %[[D0:.*]] = memref.dim %[[I]], {{.*}} : memref - // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[D0]]) : memref - // CHECK: %[[I2:.*]] = memref.cast %[[ALLOC]] : memref to memref - - // CHECK: %[[F0:.*]] = constant 0.000000e+00 : f32 - %f0 = constant 0.0 : f32 - - // CHECK: linalg.fill(%[[F0]], %[[I2]]) : f32, memref - %r = linalg.fill(%f0, %A) : f32, tensor -> tensor - - // CHECK: dealloc %[[ALLOC]] : memref - // CHECK: %[[R:.*]] = memref.tensor_load %[[I2]] : memref - // CHECK: return %[[R]] : tensor - return %r: tensor -} - -// ----- - -// CHECK-LABEL: func @not_inplace -// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: tensor -func @not_inplace(%A : tensor {linalg.inplaceable = true}) -> tensor { - %f0 = constant 0.0 : f32 - - // CHECK: %[[BUFFER_CAST:.*]] = memref.buffer_cast %[[A]] : memref -> tensor - - /// The second op has no interfering reads and can reuse. - // CHECK-NOT: alloc - // CHECK: linalg.matmul{{.*}}outs(%[[BUFFER_CAST]] - %r = linalg.matmul ins(%f, %f: tensor, tensor) - outs(%A: tensor) - -> tensor - return %r: tensor -} - -// ----- - -// CHECK-LABEL: func @not_inplace -func @not_inplace(%A : tensor {linalg.inplaceable = true}) -> tensor { - /// Within op multiple uses of %A, must alloc. - // CHECK: alloc - %r = linalg.matmul ins(%A, %A: tensor, tensor) - outs(%A: tensor) - -> tensor - return %r: tensor -} -// ----- - -// CHECK-LABEL: func @vec_inplace -func @vec_inplace(%A : tensor {linalg.inplaceable = true}, %vec : vector<4xf32>) - -> tensor -{ - %c0 = constant 0 : index - // CHECK-NOT: alloc - %r = vector.transfer_write %vec, %A[%c0] : vector<4xf32>, tensor - return %r: tensor -} - -// ----- - -// CHECK-LABEL: func @vec_not_inplace -func @vec_not_inplace(%A : tensor {linalg.inplaceable = true}, %vec : vector<4xf32>) - -> (tensor, tensor) -{ - %c0 = constant 0 : index - %c1 = constant 1 : index - - // CHECK: %[[BUFFER_CAST:.*]] = memref.buffer_cast {{.*}} : memref - - /// Cross-op multiple uses of %A, the first vector.transfer which has interfering reads must alloc. - // CHECK: %[[ALLOC:.*]] = memref.alloc - // CHECK-NEXT: vector.transfer_write {{.*}}, %[[ALLOC]] - %r0 = vector.transfer_write %vec, %A[%c0] : vector<4xf32>, tensor - - /// The second vector.transfer has no interfering reads and can reuse the buffer. - // CHECK-NOT: alloc - // CHECK-NEXT: vector.transfer_write {{.*}}, %[[BUFFER_CAST]] - %r1 = vector.transfer_write %vec, %A[%c1] : vector<4xf32>, tensor - return %r0, %r1: tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @insert_slice_fun -func @insert_slice_fun(%A0 : tensor, %A1 : tensor {linalg.inplaceable = true}, - %t0 : tensor<4xf32>, %t1 : tensor<4xf32> {linalg.inplaceable = true}) - -> (tensor, tensor, tensor, tensor) -{ - // CHECK: %[[BUFFER_CAST_A0:.*]] = memref.buffer_cast {{.*}} : memref into tensor - - // Alloc and copy the whole result tensor. Copy the tensor.extract_slice. - // CHECK: %[[REALLOC_A0_2:.*]] = memref.alloc - // CHECK: linalg.copy(%[[BUFFER_CAST_A0]] - // CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC_A0_2]] - // CHECK: linalg.copy(%[[BUFFER_CAST_t1]], %[[SV_A0_2]]) - %r1 = tensor.insert_slice %t1 into %A0[0][4][1] : tensor<4xf32> into tensor - - // Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice. - // CHECK: %[[REALLOC_A1:.*]] = memref.alloc - // CHECK: linalg.copy(%[[BUFFER_CAST_A1]] - // CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC_A1]] - // CHECK: linalg.copy(%[[BUFFER_CAST_t0]], %[[SV_A1]]) - %r2 = tensor.insert_slice %t0 into %A1[0][4][1] : tensor<4xf32> into tensor - - // Do not realloc the large tensor. Copy the tensor.extract_slice. - // CHECK-NOT: alloc - // CHECK: %[[SV_A1_2:.*]] = memref.subview %[[BUFFER_CAST_A1]] - // CHECK: linalg.copy(%[[BUFFER_CAST_t1]], %[[SV_A1_2]]) - %r3 = tensor.insert_slice %t1 into %A1[0][4][1] : tensor<4xf32> into tensor - - return %r0, %r1, %r2, %r3: tensor, tensor, tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @insert_slice_fun -func @insert_slice_fun(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) - -> tensor -{ - %f0 = constant 0.0 : f32 - - // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref into tensor - - /// Overwrite BUFFER_CAST_A inplace. - // CHECK: linalg.fill({{.*}}, %[[BUFFER_CAST_A]] - %r1 = linalg.fill(%f0, %r0) : f32, tensor -> tensor - return %r1: tensor -} - -// ----- - -// CHECK-LABEL: func @insert_slice_fun -func @insert_slice_fun(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) - -> tensor -{ - %f0 = constant 0.0 : f32 - - // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref -> tensor - - // CHECK-NOT: alloc - // CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]] - /// Overwrite BUFFER_CAST_A inplace by copying into the subview. - // CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) - %r1 = tensor.insert_slice %t into %r0[0][4][1] : tensor<4xf32> into tensor - - return %r1: tensor -} - -// ----- - -// CHECK-LABEL: func @insert_slice_fun_not_inplace -func @insert_slice_fun_not_inplace(%A : tensor, %t : tensor<4xf32>) - -> tensor -{ - // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref - // CHECK: linalg.copy(%[[BUFFER_CAST_A]], %[[ALLOC]]) : memref - // CHECK: %[[SV:.*]] = memref.subview %[[ALLOC]][0] [4] [1] : memref to memref<4xf32> - // CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) : memref<4xf32, #map>, memref<4xf32> - // CHECK: memref.dealloc %[[ALLOC]] : memref - %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor - return %r0: tensor -} - -// ----- - -// CHECK-LABEL: func @insert_slice_fun_not_inplace -func @insert_slice_fun_not_inplace(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) - -> (tensor, tensor) -{ - %f0 = constant 0.0 : f32 - - // CHECK-DAG: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref to memref<4xf32, {{.*}}> - // CHECK-DAG: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) : memref<4xf32, {{.*}}>, memref<4xf32, {{.*}}> - %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor - - // fill would interfere with %r0 that is also being returned. - // So we need to bufferize it out of place and make a new alloc. - // CHECK-DAG: %[[ALLOC:.*]] = memref.alloc({{.*}}) : memref - // CHECK-DAG: %[[ALLOC_CAST_DYNAMIC:.*]] = memref.cast %[[ALLOC]] : memref to memref - %r1 = linalg.fill(%f0, %A) : f32, tensor -> tensor - - // CHECK-DAG: %[[RET_A:.*]] = memref.tensor_load %[[BUFFER_CAST_A]] : memref, tensor -} - -// ----- - -// CHECK-LABEL: func @extract_slice_fun -func @extract_slice_fun(%A : tensor {linalg.inplaceable = true}) - -> tensor<4xf32> -{ - // This bufferizes to a pattern that the cross-function boundary pass needs to - // convert into a new memref argument at all call site; this may be either: - // - an externally created aliasing subview (if we want to allow aliasing - // function arguments). - // - a new alloc + copy (more expensive but does not create new function - // argument aliasing). - // CHECK-NOT: alloc - // CHECK-NOT: copy - // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref to tensor<4xf32> - - // CHECK: return %[[RES]] - return %r0: tensor<4xf32> -} - -//===----------------------------------------------------------------------===// -// Simple loop cases -//===----------------------------------------------------------------------===// - -// ----- - -// CHECK-LABEL: func @scf_for_yield_only -func @scf_for_yield_only(%A : tensor, - %B : tensor {linalg.inplaceable = true}, - %lb : index, %ub : index, %step : index) - -> (tensor, tensor) -{ - // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc - // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast - // CHECK: %[[BUFFER_CAST_B:.*]] = memref.buffer_cast - // CHECK: linalg.copy(%[[BUFFER_CAST_A]], %[[ALLOC_FOR_A]]) - - // The first scf.for remains but just turns into dead code. - %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - scf.yield %t : tensor - } - - // The second scf.for remains but just turns into dead code. - %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { - scf.yield %t : tensor - } - - // Cross function call alloc/dealloc pattern must be hoist out. - // CHECK: memref.dealloc %[[ALLOC_FOR_A]] : memref - // CHECK: %[[rA:.*]] = memref.tensor_load %[[ALLOC_FOR_A]] - // Returning tensor_load of the buffer cast makes the %r1 loop dead. - // CHECK: %[[rB:.*]] = memref.tensor_load %[[BUFFER_CAST_B:.*]] - // CHECK: return %[[rA]], %[[rB]] : tensor, tensor - return %r0, %r1: tensor, tensor -} - -// ----- - -// CHECK-LABEL: func @scf_for_with_tensor.insert_slice -func @scf_for_with_tensor.insert_slice( - %A : tensor, - %B : tensor {linalg.inplaceable = true}, - %C : tensor<4xf32>, - %lb : index, %ub : index, %step : index) - -> (tensor, tensor) -{ - // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc - // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast - // CHECK: %[[BUFFER_CAST_B:.*]] = memref.buffer_cast - // CHECK: %[[BUFFER_CAST_C:.*]] = memref.buffer_cast - // CHECK: linalg.copy(%[[BUFFER_CAST_A]], %[[ALLOC_FOR_A]]) - - // CHECK: scf.for {{.*}} iter_args(%[[bbA:.*]] = %{{.*}}, %[[bbB:.*]] = %{{.*}}) - %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) - -> (tensor, tensor) - { - // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1] - // %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA - // CHECK: linalg.copy(%[[BUFFER_CAST_C]], %[[svA]]) - %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor - - // %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B - // CHECK: %[[svB:.*]] = memref.subview %[[BUFFER_CAST_B]][0] [4] [1] - // CHECK: linalg.copy(%[[BUFFER_CAST_C]], %[[svB]]) - %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor - - // Yielding bbA and bbB will canonicalize away into oblivion. - // CHECK: scf.yield %[[bbA]], %[[bbB]] : tensor, tensor - scf.yield %ttA, %ttB : tensor, tensor - } - - // CHECK: memref.dealloc %[[ALLOC_FOR_A]] : memref - // CHECK: %[[rA:.*]] = memref.tensor_load %[[ALLOC_FOR_A]] : memref - // CHECK: %[[rB:.*]] = memref.tensor_load %[[BUFFER_CAST_B]] : memref - // CHECK: return %[[rA]], %[[rB]] : tensor, tensor - return %r0#0, %r0#1: tensor, tensor -} diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir index 108119467ea63..a580cbb36060f 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir @@ -1,5 +1,483 @@ // RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=test-analysis-only -split-input-file | FileCheck %s +//===----------------------------------------------------------------------===// +// Simple cases +//===----------------------------------------------------------------------===// + +// ----- + +// CHECK-LABEL: func @extract_slice_fun +func @extract_slice_fun(%A : tensor, %B : tensor {linalg.inplaceable = true}) + -> (tensor<4xf32>, tensor<8xf32>) +{ + // tensor.extract_slice is not used in a write, it is not compelled to + // bufferize out of place. Let callers decide whether they want to create + // aliasing subviews at all call sites or whether they allocate. + // This is true irrespective of whether the function argument is inplaceable. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> + + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %r1 = tensor.extract_slice %B[0][8][1] : tensor to tensor<8xf32> + + return %r0, %r1: tensor<4xf32>, tensor<8xf32> +} + +// ----- + +// CHECK-LABEL: func @insert_slice_fun +func @insert_slice_fun( + %A : tensor, + %B : tensor {linalg.inplaceable = true}, + %C : tensor<4xf32>) + -> (tensor, tensor) +{ + // must bufferize out of place. + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor + + // bufferizes inplace. + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor + + return %r0, %r1: tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @conflict_on_B +func @conflict_on_B( + %A : tensor<4x4xf32> {linalg.inplaceable = true}, + %B : tensor<4x4xf32> {linalg.inplaceable = true}) + -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>) +{ + // matmul output operand interferes with input operand. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) + outs(%B: tensor<4x4xf32>) + -> tensor<4x4xf32> + + // matmul output operand interferes with input operand. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) + outs(%B: tensor<4x4xf32>) + -> tensor<4x4xf32> + + // matmul output operand does not interferes with input operand. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) + outs(%B: tensor<4x4xf32>) + -> tensor<4x4xf32> + + return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32> +} + +//===----------------------------------------------------------------------===// +// Length-1 producer-consumer cases. +//===----------------------------------------------------------------------===// + +// ----- + +// CHECK-LABEL: func @extract_slice_extract_slice +func @extract_slice_extract_slice( + %A : tensor {linalg.inplaceable = true}, %B : tensor) + -> (tensor<2xf32>, tensor<2xf32>) +{ + // tensor.extract_slice is not used in a write, it is not compelled to + // bufferize out of place. Let callers decide whether they want to create + // aliasing subviews at all call sites or whether they allocate. + // This is true irrespective of whether the function argument is inplaceable. + // CHECK: {__inplace_results_attr__ = ["true"]} + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> + + // CHECK: {__inplace_results_attr__ = ["true"]} + %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> + + // CHECK: {__inplace_results_attr__ = ["true"]} + %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> + + // CHECK: {__inplace_results_attr__ = ["true"]} + %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> + + return %r1, %r3: tensor<2xf32>, tensor<2xf32> +} + +// ----- + +// CHECK-LABEL: func @insert_slice_insert_slice +func @insert_slice_insert_slice( + %A : tensor {linalg.inplaceable = true}, + %A2 : tensor<4xf32> {linalg.inplaceable = true}, + %A3 : tensor<2xf32> {linalg.inplaceable = true}, + %B : tensor, %B2 : tensor<4xf32>, %B3 : tensor<2xf32>) + -> (tensor, tensor) +{ + // CHECK: {__inplace_results_attr__ = ["true"]} + %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> + + // CHECK: {__inplace_results_attr__ = ["true"]} + %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor + + // CHECK: {__inplace_results_attr__ = ["false"]} + %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> + + // CHECK: {__inplace_results_attr__ = ["false"]} + %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor + + return %r1, %r3: tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice +func @extract_slice_nonmatching_insert_slice( + %A : tensor {linalg.inplaceable = true}, + %B : tensor, %idx: index) + -> (tensor, tensor) +{ + // %r1 bufferizes inplace because %A is inplaceable. + // %r0 is an overlapping tensor.extract_slice that does not match, it must be + // out of place. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> + + // %r1 can bufferize inplace fine. + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor + + // %r3 does bufferizes inplace because %B is not inplaceable. + // %r0 is an overlapping tensor.extract_slice that does not match, but does + // not alias with the buffer coming from %r3 so it can actually bufferize + // inplace. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> + + // %r3 cannot bufferize inplace since %B is not inplaceable. + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor + + return %r1, %r3: tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @extract_slice_matching_insert_slice +func @extract_slice_matching_insert_slice( + %A : tensor {linalg.inplaceable = true}, + %B : tensor) + -> (tensor, tensor) +{ + // %r1 bufferizes inplace because %A is inplaceable. + // %r0 is a tensor.extract_slice that matches, it can also be bufferized + // inplace. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> + + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor + + // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized + // inplace. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> + + // tensor.insert_slice cannot bufferize inplace. + // This should have been captured by a canonicalization pattern and it would + // be unproductive to have special logic in bufferization to encode matching + // insert_slice(extract_slice(A), A). + // CHECK: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor + + return %r1, %r3: tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @extract_slice_linalg_readonly_use +func @extract_slice_linalg_readonly_use( + %A : tensor, + %B : tensor<4x4xf32>, + %C : tensor<4x4xf32> {linalg.inplaceable = true}) + -> (tensor<4x4xf32>, tensor<4x4xf32>) +{ + // tensor.extract_slice is only used as a read, no interference irrespective + // of user's inplace status. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + + // matmul output operand is not inplaceable at the function boundary. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) + outs(%B: tensor<4x4xf32>) + -> tensor<4x4xf32> + + // matmul output operand is inplaceable at the function boundary. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) + outs(%C: tensor<4x4xf32>) + -> tensor<4x4xf32> + + return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> +} + +// ----- + +// CHECK-LABEL: func @extract_slice_to_linalg_write_use +func @extract_slice_to_linalg_write_use( + %A : tensor<4x4xf32>, + %B : tensor, + %C : tensor {linalg.inplaceable = true}) + -> (tensor<4x4xf32>, tensor<4x4xf32>) +{ + // Step 3. %sB forward propagates to a write in %D but it is not inplace. + // So this is only ever read and can bufferize inplace. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + + // Step 2. %sB has a read interference in %E, it does not bufferize inplace. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %D = linalg.matmul ins(%B, %C: tensor, tensor) + outs(%sB: tensor<4x4xf32>) + -> tensor<4x4xf32> + + // Step 4. %sC forward propagates to an inplace write in %E. + // %sC backward propagates to %C which is inplaceable. + // As a consequence this is bufferized inplace. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + + // Step 1. %sC backprops to the tensor.extract_slice producer which is not + // considered an interference. This bufferizes inplace. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) + outs(%sC: tensor<4x4xf32>) + -> tensor<4x4xf32> + + return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> +} + +//===----------------------------------------------------------------------===// +// Transitive cases +//===----------------------------------------------------------------------===// + +// ----- + +// CHECK-LABEL: func @extract_slice_to_linalg_write_use +func @extract_slice_to_linalg_write_use( + %A : tensor<4x4xf32>, + %B : tensor, + %C : tensor {linalg.inplaceable = true}) + -> (tensor<4x4xf32>, tensor<4x4xf32>) +{ + // Step 4. %sB forward propagates to an inplace write in %D. + // %sB backward propagates to %B which is not inplaceable. + // As a consequence this is bufferized out of place. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + + // Step 1. %sB backprops to the tensor.extract_slice producer which is not + // considered an interference. This bufferizes inplace. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %D = linalg.matmul ins(%B, %C: tensor, tensor) + outs(%sB: tensor<4x4xf32>) + -> tensor<4x4xf32> + + // Step 3. %sC forward propagates to an inplace write in %E. + // %sC backward propagates to %C which is inplaceable. + // As a consequence this is bufferized inplace. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + + // Step 1. %sC backprops to the tensor.extract_slice producer which is not + // considered an interference. This bufferizes inplace. + // CHECK: linalg.matmul + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) + outs(%sC: tensor<4x4xf32>) + -> tensor<4x4xf32> + + return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> +} + +// ----- + +// CHECK-LABEL: func @nested_extract_slice_and_insert +func @nested_extract_slice_and_insert( + %A : tensor, + %B : tensor {linalg.inplaceable = true}, + %C : tensor {linalg.inplaceable = true}, + %idx : index) + -> (tensor, tensor, tensor) +{ + %f0 = constant 0.0 : f32 + + // 2-level matching tensor.extract_slice / tensor.insert_slice into non + // inplaceable %A. + // - %rA is not inplaceable because %A is not inplaceable at function boundary. + // - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable + // - this propagates to %FA and %ssA being inplaceable. + // - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not + // inplaceable and so %sA is not inplaceable. + // CHECK: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-NEXT: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor + %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %FA = linalg.fill(%f0, %ssA) : f32, tensor<4x4xf32> -> tensor<4x4xf32> + %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor + %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor + + // 3-level matching tensor.extract_slice / tensor.insert_slice into + // inplaceable %B. + // CHECK-NEXT: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.extract_slice + // Atm, this 2nd tensor.extract_slice fails to bufferize inplace because + // clobbering analysis conservatively test for equivalent buffers. + // TODO: This is currently too restrictive and misses clobberings. + // When available, use container-containee analysis. + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-NEXT: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor + %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> + %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> + %FB = linalg.fill(%f0, %sssB) : f32, tensor<4x4xf32> -> tensor<4x4xf32> + %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> + %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor + %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor into tensor + + // 2-level matching tensor.extract_slice / tensor.insert_slice into + // inplaceable %C with a twist. + // Throw a wrench in the system: %rsC production sizes do not match %ssC. + // CHECK-NEXT: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // The tensor.insert_slice that would be candidate for matching does not actually + // match. That tensor.insert_slice can still be bufferized inplace nonetheless + // but this tensor.extract_slice, which bufferizes to an inplace write, cannot. + // CHECK-NEXT: tensor.extract_slice + // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-NEXT: fill + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor + %ssC = tensor.extract_slice %sC[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %FC = linalg.fill(%f0, %ssC) : f32, tensor<4x4xf32> -> tensor<4x4xf32> + %rsC = tensor.insert_slice %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor + %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor + + return %rA, %rB, %rC: tensor, tensor, tensor +} + +//===----------------------------------------------------------------------===// +// Simple loop cases +//===----------------------------------------------------------------------===// + +// ----- + +// CHECK-LABEL: func @scf_for_yield_only +func @scf_for_yield_only(%A : tensor, + %B : tensor {linalg.inplaceable = true}, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ + // CHECK: scf.for + // CHECK-NEXT: scf.yield + // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} + %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { + scf.yield %t : tensor + } + + // CHECK: scf.for + // CHECK-NEXT: scf.yield + // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} + %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { + scf.yield %t : tensor + } + + return %r0, %r1: tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @scf_for_with_tensor.insert_slice +func @scf_for_with_tensor.insert_slice(%A : tensor, + %B : tensor {linalg.inplaceable = true}, + %C : tensor<4xf32>, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ + // CHECK: scf.for + // scf.for bbArgs are always inplaceable seen from ops inside the body: + // 1. Either the matching tensor is not inplaceable and an alloc occurs + // which makes bbArg inplaceable. + // 2. Or it is already inplaceable and so is bbArg. + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: tensor.insert_slice + // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-NEXT: scf.yield + // CHECK-NEXT: {__inplace_results_attr__ = ["false", "true"]} + %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) + -> (tensor, tensor) + { + %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor + %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor + scf.yield %ttA, %ttB : tensor, tensor + } + + return %r0#0, %r0#1: tensor, tensor +} + +// ----- + +//===----------------------------------------------------------------------===// +// Cross function boundary cases. +//===----------------------------------------------------------------------===// + func private @foo(tensor<64xf32>) // CHECK-LABEL: dependence_through_call diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir index d6a6d7c67f6cf..78f84cc8540c4 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir @@ -44,3 +44,44 @@ func @bar() { call @foo() : () -> () return } + +// ----- + +func @scf_for(%A : tensor, + %B : tensor {linalg.inplaceable = true}, + %C : tensor<4xf32>, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ + %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) + -> (tensor, tensor) + { + %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor + %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor + + // Throw a wrench in the system by swapping yielded values: this result in a + // ping-pong of values at each iteration on which we currently want to fail. + + // expected-error @+1 {{Yield operand #1 does not bufferize to an equivalent buffer}} + scf.yield %ttB, %ttA : tensor, tensor + } + + return %r0#0, %r0#1: tensor, tensor +} + +// ----- + +func @extract_slice_fun(%A : tensor {linalg.inplaceable = true}) + -> tensor<4xf32> +{ + // This bufferizes to a pattern that the cross-function boundary pass needs to + // convert into a new memref argument at all call site; this may be either: + // - an externally created aliasing subview (if we want to allow aliasing + // function arguments). + // - a new alloc + copy (more expensive but does not create new function + // argument aliasing). + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> + + // expected-error @+1 {{buffer result #0 not produced by an alloc}} + return %r0: tensor<4xf32> +} diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir index b71f6f92d51ed..bc6488bca8e58 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir @@ -1,5 +1,355 @@ // RUN: mlir-opt %s -linalg-comprehensive-module-bufferize -split-input-file | FileCheck %s +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @fill_inplace( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +func @fill_inplace(%A : tensor {linalg.inplaceable = true}) -> tensor { + // CHECK: %[[F0:.*]] = constant 0.000000e+00 : f32 + %f0 = constant 0.0 : f32 + + /// Inplaceable, no alloc + // CHECK-NOT: alloc + // CHECK: linalg.fill(%[[F0]], %[[A]]) : f32, memref + %r = linalg.fill(%f0, %A) : f32, tensor -> tensor + + // CHECK: return + // CHECK-NOT: tensor + return %r: tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +/// No linalg.inplaceable flag, must allocate. +// CHECK-LABEL: func @not_inplace( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref) -> memref { +func @not_inplace(%A : tensor) -> tensor { + // CHECK: %[[F0:.*]] = constant 0.000000e+00 : f32 + %f0 = constant 0.0 : f32 + + // CHECK: %[[D0:.*]] = memref.dim %[[A]], {{.*}} : memref + // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[D0]]) : memref + // CHECK: linalg.fill(%[[F0]], %[[ALLOC]]) : f32, memref + %r = linalg.fill(%f0, %A) : f32, tensor -> tensor + + // CHECK: dealloc %[[ALLOC]] : memref + // CHECK: return %[[ALLOC]] : memref + return %r: tensor +} + +// ----- + +// CHECK-DAG: #[[$map_2d_dyn:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> + +// CHECK-LABEL: func @not_inplace +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref) { +func @not_inplace(%A : tensor {linalg.inplaceable = true}) -> tensor { + %f0 = constant 0.0 : f32 + + /// Cross-op multiple uses of %A, the first op which has interfering reads must alloc. + // CHECK: %[[ALLOC:.*]] = memref.alloc + // CHECK: linalg.fill({{.*}}, %[[ALLOC]] + %f = linalg.fill(%f0, %A) : f32, tensor -> tensor + + /// The second op has no interfering reads and can reuse. + // CHECK-NOT: alloc + // CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) outs(%[[A]] + %r = linalg.matmul ins(%f, %f: tensor, tensor) + outs(%A: tensor) + -> tensor + + // CHECK: return + // CHECK-NOT: tensor + return %r: tensor +} + +// ----- + +// CHECK-LABEL: func @not_inplace +func @not_inplace(%A : tensor {linalg.inplaceable = true}) -> tensor { + /// Within op multiple uses of %A, must alloc. + // CHECK: alloc + %r = linalg.matmul ins(%A, %A: tensor, tensor) + outs(%A: tensor) + -> tensor + return %r: tensor +} +// ----- + +// CHECK-LABEL: func @vec_inplace +func @vec_inplace(%A : tensor {linalg.inplaceable = true}, %vec : vector<4xf32>) + -> tensor +{ + %c0 = constant 0 : index + + // CHECK-NOT: alloc + %r = vector.transfer_write %vec, %A[%c0] : vector<4xf32>, tensor + + // CHECK: return + // CHECK-NOT: tensor + return %r: tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @vec_not_inplace +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +func @vec_not_inplace(%A : tensor {linalg.inplaceable = true}, %vec : vector<4xf32>) + -> (tensor, tensor) +{ + %c0 = constant 0 : index + %c1 = constant 1 : index + + /// Cross-op multiple uses of %A, the first vector.transfer which has interfering reads must alloc. + // CHECK: %[[ALLOC:.*]] = memref.alloc + // CHECK-NEXT: vector.transfer_write {{.*}}, %[[ALLOC]] + %r0 = vector.transfer_write %vec, %A[%c0] : vector<4xf32>, tensor + + /// The second vector.transfer has no interfering reads and can reuse the buffer. + // CHECK-NOT: alloc + // CHECK-NEXT: vector.transfer_write {{.*}}, %[[A]] + %r1 = vector.transfer_write %vec, %A[%c1] : vector<4xf32>, tensor + + // CHECK: return + // CHECK-NOT: tensor + return %r0, %r1: tensor, tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @insert_slice_fun +// CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref, +// CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref, +// CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>, +// CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]> +func @insert_slice_fun(%A0 : tensor, + %A1 : tensor {linalg.inplaceable = true}, + %t0 : tensor<4xf32>, + %t1 : tensor<4xf32> {linalg.inplaceable = true}) + -> (tensor, tensor, tensor, tensor) +{ + // Alloc and copy the whole result tensor. Copy the tensor.extract_slice. + // CHECK: %[[REALLOC_A0:.*]] = memref.alloc + // CHECK: linalg.copy(%[[A0]], %[[REALLOC_A0]] + // CHECK: %[[SV_A0:.*]] = memref.subview %[[REALLOC_A0]] + // CHECK: linalg.copy(%[[t0]], %[[SV_A0]]) + %r0 = tensor.insert_slice %t0 into %A0[0][4][1] : tensor<4xf32> into tensor + + // Alloc and copy the whole result tensor. Copy the tensor.extract_slice. + // CHECK: %[[REALLOC_A0_2:.*]] = memref.alloc + // CHECK: linalg.copy(%[[A0]] + // CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC_A0_2]] + // CHECK: linalg.copy(%[[t1]], %[[SV_A0_2]]) + %r1 = tensor.insert_slice %t1 into %A0[0][4][1] : tensor<4xf32> into tensor + + // Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice. + // CHECK: %[[REALLOC_A1:.*]] = memref.alloc + // CHECK: linalg.copy(%[[A1]] + // CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC_A1]] + // CHECK: linalg.copy(%[[t0]], %[[SV_A1]]) + %r2 = tensor.insert_slice %t0 into %A1[0][4][1] : tensor<4xf32> into tensor + + // Do not realloc the large tensor. Copy the tensor.extract_slice. + // CHECK-NOT: alloc + // CHECK: %[[SV_A1_2:.*]] = memref.subview %[[A1]] + // CHECK: linalg.copy(%[[t1]], %[[SV_A1_2]]) + %r3 = tensor.insert_slice %t1 into %A1[0][4][1] : tensor<4xf32> into tensor + + // CHECK: return %[[REALLOC_A0]], %[[REALLOC_A0_2]], %[[REALLOC_A1]] : + // CHECK-SAME: memref, memref, memref + return %r0, %r1, %r2, %r3: tensor, tensor, tensor, tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @insert_slice_fun +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]> +func @insert_slice_fun(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) + -> tensor +{ + %f0 = constant 0.0 : f32 + + // CHECK-NOT: alloc + // CHECK: %[[SV_A:.*]] = memref.subview %[[A]] + // CHECK: linalg.copy(%[[t]], %[[SV_A]]) + %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor + + /// Overwrite A inplace. + // CHECK: linalg.fill({{.*}}, %[[A]] + %r1 = linalg.fill(%f0, %r0) : f32, tensor -> tensor + + // CHECK: return + // CHECK-NOT: tensor + return %r1: tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @insert_slice_fun +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]> +func @insert_slice_fun(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) + -> tensor +{ + %f0 = constant 0.0 : f32 + + // CHECK: linalg.fill({{.*}}, %[[A]] + %r0 = linalg.fill(%f0, %A) : f32, tensor -> tensor + + // CHECK-NOT: alloc + // CHECK: %[[SV_A:.*]] = memref.subview %[[A]] + /// Overwrite A inplace by copying into the subview. + // CHECK: linalg.copy(%[[t]], %[[SV_A]]) + %r1 = tensor.insert_slice %t into %r0[0][4][1] : tensor<4xf32> into tensor + + // CHECK: return + // CHECK-NOT: tensor + return %r1: tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @insert_slice_fun_not_inplace +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]> +func @insert_slice_fun_not_inplace(%A : tensor, %t : tensor<4xf32>) + -> tensor +{ + // CHECK: %[[ALLOC:.*]] = memref.alloc(%{{.*}}) : memref + // CHECK: linalg.copy(%[[A]], %[[ALLOC]]) : memref + // CHECK: %[[SV:.*]] = memref.subview %[[ALLOC]][0] [4] [1] : memref to memref<4xf32> + // CHECK: linalg.copy(%[[t]], %[[SV]]) : memref<4xf32, #map>, memref<4xf32> + // CHECK: memref.dealloc %[[ALLOC]] : memref + %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor + + // CHECK: return %{{.*}} : memref + return %r0: tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @insert_slice_fun_not_inplace +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]> +func @insert_slice_fun_not_inplace(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) + -> (tensor, tensor) +{ + %f0 = constant 0.0 : f32 + + // tensor.insert_slice is bufferized first, %A is inplaceable so we can make this inplace + // CHECK-DAG: %[[SV_A:.*]] = memref.subview %[[A]][0] [4] [1] : memref to memref<4xf32, {{.*}}> + // CHECK-DAG: linalg.copy(%[[t]], %[[SV_A]]) : memref<4xf32, {{.*}}>, memref<4xf32, {{.*}}> + %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor + + // fill would interfere with %r0 that is also being returned. + // So we need to bufferize it out of place and make a new alloc. + // CHECK-DAG: %[[ALLOC:.*]] = memref.alloc({{.*}}) : memref + // CHECK: linalg.fill(%{{.*}}, %[[ALLOC]] + %r1 = linalg.fill(%f0, %A) : f32, tensor -> tensor + + // CHECK: memref.dealloc %[[ALLOC]] : memref + // CHECK: return %[[ALLOC]] : memref + return %r1, %r0: tensor, tensor +} + +//===----------------------------------------------------------------------===// +// Simple loop cases +//===----------------------------------------------------------------------===// + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @scf_for_yield_only +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref +func @scf_for_yield_only(%A : tensor, + %B : tensor {linalg.inplaceable = true}, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ + // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc + // CHECK: linalg.copy(%[[A]], %[[ALLOC_FOR_A]]) + + // The first scf.for remains but just turns into dead code. + %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { + scf.yield %t : tensor + } + + // The second scf.for remains but just turns into dead code. + %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { + scf.yield %t : tensor + } + + // CHECK: memref.dealloc %[[ALLOC_FOR_A]] : memref + // CHECK: return %[[ALLOC_FOR_A]] : memref + return %r0, %r1: tensor, tensor +} + +// ----- + +// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + +// CHECK-LABEL: func @scf_for_with_tensor.insert_slice +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]> +func @scf_for_with_tensor.insert_slice( + %A : tensor, + %B : tensor {linalg.inplaceable = true}, + %C : tensor<4xf32>, + %lb : index, %ub : index, %step : index) + -> (tensor, tensor) +{ + // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc + // CHECK: linalg.copy(%[[A]], %[[ALLOC_FOR_A]]) + + // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1] + // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1] + + // CHECK: scf.for {{.*}} + // CHECK-NOT: iter_args + %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) + -> (tensor, tensor) + { + // %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA + // CHECK: linalg.copy(%[[C]], %[[svA]]) + %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor + + // %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B + // CHECK: linalg.copy(%[[C]], %[[svB]]) + %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor + + // CHECK-NOT: scf.yield + scf.yield %ttA, %ttB : tensor, tensor + } + + // CHECK: memref.dealloc %[[ALLOC_FOR_A]] : memref + // CHECK: return %[[ALLOC_FOR_A]] : memref + return %r0#0, %r0#1: tensor, tensor +} + +// ----- + +//===----------------------------------------------------------------------===// +// Cross function boundary cases. +//===----------------------------------------------------------------------===// + // CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> // CHECK: func private @some_external_func(memref) From ed1681ed3aff0f1fb97f1a738938671df098b939 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Wed, 30 Jun 2021 21:18:13 +0000 Subject: [PATCH 409/619] [mlir][Linalg] Add comprehensive bufferization support for ConstantOp (13/n) ConstantOp are only supported in the ModulePass because they require a GlobalCreator object that must be constructed from a ModuleOp. If the standlaone FunctionPass encounters a ConstantOp, bufferization fails. Differential revision: https://reviews.llvm.org/D105156 --- .../Transforms/ComprehensiveBufferize.cpp | 37 ++++++++++++++++++- .../comprehensive-module-bufferize.mlir | 20 ++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index dec08dfd4da2c..178676c5e4b7b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -358,6 +358,7 @@ static bool hasKnownBufferizationAliasingBehavior(Operation *op) { // clang-format off isa getAliasingOpOperand(OpResult result) { return None; return TypeSwitch(result.getDefiningOp()) .Case([&](tensor::CastOp op) { return &op->getOpOperand(0); }) + .Case([&](ConstantOp op) { return &op->getOpOperand(0); }) .Case([&](LinalgOp op) { return op.getOutputTensorOperands()[result.getResultNumber()]; }) @@ -499,6 +501,8 @@ static Optional getAliasingOpResult(OpOperand &opOperand) { // These terminators legitimately have no result. .Case( [&](auto op) { return OpResult(); }) + // ConstantOp is never inplaceable. + .Case([&](ConstantOp op) { return op->getResult(0); }) // ExtractSliceOp is different: its result is not inplaceable on op.source // but when bufferized inplace, the result is an aliasing subregion of // op.source. @@ -1600,6 +1604,26 @@ static LogicalResult bufferize(OpBuilder &b, tensor::CastOp castOp, return success(); } +static LogicalResult bufferize(OpBuilder &b, ConstantOp constantOp, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + GlobalCreator &globalCreator) { + if (!constantOp.getType().dyn_cast()) + return failure(); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(constantOp); + + auto globalMemref = globalCreator.getGlobalFor(constantOp); + Value memref = b.create( + constantOp.getLoc(), globalMemref.type(), globalMemref.getName()); + aliasInfo.insertNewBufferEquivalence(memref, constantOp.getResult()); + map(bvm, constantOp, memref); + + return success(); +} + /// DimOp tensor operand is modified inplace. This allows leaving dead /// tensors behind that will get DCE'd. static LogicalResult bufferize(OpBuilder &b, tensor::DimOp dimOp, @@ -2115,7 +2139,8 @@ inPlaceAnalysisFuncOpBody(FuncOp funcOp, BufferizationAliasInfo &aliasInfo, static LogicalResult bufferizeFuncOpInternals( FuncOp funcOp, BlockAndValueMapping &bvm, BufferizationAliasInfo &aliasInfo, - DenseMap &bufferizedFunctionTypes) { + DenseMap &bufferizedFunctionTypes, + GlobalCreator &globalCreator) { LLVM_DEBUG(llvm::dbgs() << "\n\n"); LDBG("Begin BufferizeFuncOpInternals:\n" << funcOp << '\n'); OpBuilder b(funcOp->getContext()); @@ -2151,6 +2176,12 @@ static LogicalResult bufferizeFuncOpInternals( LDBG("Begin bufferize:\n" << op << '\n'); return bufferize(b, op, bvm, aliasInfo, bufferizedFunctionTypes); }) + .Case([&](ConstantOp op) { + if (!isaTensor(op.getResult().getType())) + return success(); + LDBG("Begin bufferize:\n" << op << '\n'); + return bufferize(b, op, bvm, aliasInfo, globalCreator); + }) .Default([&](Operation *op) { auto isaTensor = [](Type t) { return t.isa(); }; if (any_of(op->getOperandTypes(), isaTensor) || @@ -2429,6 +2460,7 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() { if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap))) return signalPassFailure(); + GlobalCreator globalCreator(moduleOp); DominanceInfo domInfo(moduleOp); BufferizationAliasInfo aliasInfo(moduleOp); // Interestingly, all function args that are not visible outside of a module @@ -2461,7 +2493,8 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() { if (!testAnalysisOnly) { BlockAndValueMapping tensorToBufferMap; if (failed(bufferizeFuncOpInternals(funcOp, tensorToBufferMap, aliasInfo, - bufferizedFunctionTypes))) { + bufferizedFunctionTypes, + globalCreator))) { signalPassFailure(); return; } diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir index bc6488bca8e58..f7f221b2b77fb 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir @@ -352,6 +352,26 @@ func @scf_for_with_tensor.insert_slice( // CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> +// CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> +// CHECK: func private @some_external_func(memref<4xi32, #[[$DYN_1D_MAP]]>) +func private @some_external_func(tensor<4xi32>) + +// CHECK: func @main() +func @main() { +// CHECK: %[[A:.*]] = memref.get_global @__constant_4xi32 : memref<4xi32> + %A = constant dense<[1, 2, 3, 4]> : tensor<4xi32> + +// CHECK: %[[B:.*]] = memref.cast %[[A]] : memref<4xi32> to memref<4xi32, #[[$DYN_1D_MAP]]> +// CHECK: call @some_external_func(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> () + call @some_external_func(%A) : (tensor<4xi32>) -> () + + return +} + +// ----- + +// CHECK: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> + // CHECK: func private @some_external_func(memref) func private @some_external_func(tensor) From db89414da4eae1c4cde63b8a4b0c762bbebf53b7 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 1 Jul 2021 12:53:03 +0100 Subject: [PATCH 410/619] [libomptarget][nfc] Move grid size computation Change getLaunchVals to return the integers used for launch Reviewed By: pdhaliwal Differential Revision: https://reviews.llvm.org/D105237 --- .../libomptarget/plugins/amdgpu/src/rtl.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index b047455296525..8f40778083cf1 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1886,8 +1886,8 @@ int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) { // EnvTeamLimit, EnvNumTeams, num_teams, thread_limit, // loop_tripcount. struct launchVals { - int threadsPerGroup; - int num_groups; + int WorkgroupSize; + int GridSize; }; launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit, @@ -2031,8 +2031,8 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit, threadsPerGroup); launchVals res; - res.threadsPerGroup = threadsPerGroup; - res.num_groups = num_groups; + res.WorkgroupSize = threadsPerGroup; + res.GridSize = threadsPerGroup * num_groups; return res; } @@ -2118,10 +2118,11 @@ int32_t __tgt_rtl_run_target_team_region_locked( thread_limit, // From run_region arg loop_tripcount, // From run_region arg DeviceInfo.NumTeams[KernelInfo->device_id]); - int num_groups = LV.num_groups; - int threadsPerGroup = LV.threadsPerGroup; + const int GridSize = LV.GridSize; + const int WorkgroupSize = LV.WorkgroupSize; if (print_kernel_trace >= LAUNCH) { + int num_groups = GridSize / WorkgroupSize; // enum modes are SPMD, GENERIC, NONE 0,1,2 // if doing rtl timing, print to stderr, unless stdout requested. bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING); @@ -2130,7 +2131,7 @@ int32_t __tgt_rtl_run_target_team_region_locked( "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u " "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu n:%s\n", device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize, - arg_num, num_groups, threadsPerGroup, num_teams, thread_limit, + arg_num, num_groups, WorkgroupSize, num_teams, thread_limit, group_segment_size, sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count, loop_tripcount, KernelInfo->Name); } @@ -2150,11 +2151,11 @@ int32_t __tgt_rtl_run_target_team_region_locked( // packet->header is written last packet->setup = UINT16_C(1) << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; - packet->workgroup_size_x = threadsPerGroup; + packet->workgroup_size_x = WorkgroupSize; packet->workgroup_size_y = 1; packet->workgroup_size_z = 1; packet->reserved0 = 0; - packet->grid_size_x = num_groups * threadsPerGroup; + packet->grid_size_x = GridSize; packet->grid_size_y = 1; packet->grid_size_z = 1; packet->private_segment_size = KernelInfoEntry.private_segment_size; From aa13e4fe7e7b03b827fe2ade346174ce57fb9411 Mon Sep 17 00:00:00 2001 From: Marcos Horro Date: Thu, 1 Jul 2021 11:49:24 +0100 Subject: [PATCH 411/619] [llvm-mca] Fix JSON output (PR50922) Based on the discussion in PR50922, minor changes have been done to properly output a valid JSON. Removed "not implemented" keys. Differential Revision: https://reviews.llvm.org/D105064 --- llvm/docs/CommandGuide/llvm-mca.rst | 16 +- llvm/test/tools/llvm-mca/JSON/X86/views.s | 287 +++++++++--------- llvm/tools/llvm-mca/PipelinePrinter.cpp | 14 +- .../tools/llvm-mca/Views/BottleneckAnalysis.h | 2 +- .../llvm-mca/Views/DispatchStatistics.cpp | 11 + .../tools/llvm-mca/Views/DispatchStatistics.h | 1 + .../llvm-mca/Views/InstructionInfoView.cpp | 2 +- .../llvm-mca/Views/RegisterFileStatistics.h | 1 + .../Views/RetireControlUnitStatistics.h | 1 + .../llvm-mca/Views/SchedulerStatistics.h | 1 + llvm/tools/llvm-mca/Views/View.h | 1 + 11 files changed, 190 insertions(+), 147 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index bbad0bca29416..cd5f30c826e5c 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -128,7 +128,7 @@ option specifies "``-``", then the output will also be sent to standard output. Specify the size of the load queue in the load/store unit emulated by the tool. By default, the tool assumes an unbound number of entries in the load queue. A value of zero for this flag is ignored, and the default load queue size is - used instead. + used instead. .. option:: -squeue= @@ -203,16 +203,18 @@ option specifies "``-``", then the output will also be sent to standard output. .. option:: -bottleneck-analysis Print information about bottlenecks that affect the throughput. This analysis - can be expensive, and it is disabled by default. Bottlenecks are highlighted + can be expensive, and it is disabled by default. Bottlenecks are highlighted in the summary view. Bottleneck analysis is currently not supported for processors with an in-order backend. .. option:: -json - Print the requested views in JSON format. The instructions and the processor - resources are printed as members of special top level JSON objects. The - individual views refer to them by index. - + Print the requested views in valid JSON format. The instructions and the + processor resources are printed as members of special top level JSON objects. + The individual views refer to them by index. However, not all views are + currently supported. For example, the report from the bottleneck analysis is + not printed out in JSON. All the default views are currently supported. + .. option:: -disable-cb Force usage of the generic CustomBehaviour class rather than using the target @@ -987,7 +989,7 @@ an instruction is allowed to commit writes and retire out-of-order if Custom Behaviour """""""""""""""""""""""""""""""""""" Due to certain instructions not being expressed perfectly within their -scheduling model, :program:`llvm-ma` isn't always able to simulate them +scheduling model, :program:`llvm-mca` isn't always able to simulate them perfectly. Modifying the scheduling model isn't always a viable option though (maybe because the instruction is modeled incorrectly on purpose or the instruction's behaviour is quite complex). The diff --git a/llvm/test/tools/llvm-mca/JSON/X86/views.s b/llvm/test/tools/llvm-mca/JSON/X86/views.s index ebf059617a604..ed5d9c2da0cf5 100644 --- a/llvm/test/tools/llvm-mca/JSON/X86/views.s +++ b/llvm/test/tools/llvm-mca/JSON/X86/views.s @@ -2,7 +2,11 @@ # Verify that we create proper JSON for the MCA views TimelineView, ResourcePressureview, # InstructionInfoView and SummaryView. -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --timeline < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --timeline --all-stats --all-views < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell --json --timeline-max-iterations=1 --timeline --all-stats --all-views -o %t.json < %s +# RUN: cat %t.json \ +# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +# RUN: | FileCheck %s add %eax, %eax add %ebx, %ebx @@ -10,29 +14,122 @@ add %ecx, %ecx add %edx, %edx # CHECK: { -# CHECK-NEXT: "Instructions": [ -# CHECK-NEXT: "addl\t%eax, %eax", -# CHECK-NEXT: "addl\t%ebx, %ebx", -# CHECK-NEXT: "addl\t%ecx, %ecx", -# CHECK-NEXT: "addl\t%edx, %edx" -# CHECK-NEXT: ], -# CHECK-NEXT: "Resources": { -# CHECK-NEXT: "CPUName": "haswell", -# CHECK-NEXT: "Resources": [ -# CHECK-NEXT: "HWDivider", -# CHECK-NEXT: "HWFPDivider", -# CHECK-NEXT: "HWPort0", -# CHECK-NEXT: "HWPort1", -# CHECK-NEXT: "HWPort2", -# CHECK-NEXT: "HWPort3", -# CHECK-NEXT: "HWPort4", -# CHECK-NEXT: "HWPort5", -# CHECK-NEXT: "HWPort6", -# CHECK-NEXT: "HWPort7" +# CHECK-NEXT: "DispatchStatistics": { +# CHECK-NEXT: "GROUP": 0, +# CHECK-NEXT: "LQ": 0, +# CHECK-NEXT: "RAT": 0, +# CHECK-NEXT: "RCU": 0, +# CHECK-NEXT: "SCHEDQ": 0, +# CHECK-NEXT: "SQ": 0, +# CHECK-NEXT: "USH": 0 +# CHECK-NEXT: }, +# CHECK-NEXT: "InstructionInfoView": { +# CHECK-NEXT: "InstructionList": [ +# CHECK-NEXT: { +# CHECK-NEXT: "Instruction": 0, +# CHECK-NEXT: "Latency": 1, +# CHECK-NEXT: "NumMicroOpcodes": 1, +# CHECK-NEXT: "RThroughput": 0.25, +# CHECK-NEXT: "hasUnmodeledSideEffects": false, +# CHECK-NEXT: "mayLoad": false, +# CHECK-NEXT: "mayStore": false +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "Instruction": 1, +# CHECK-NEXT: "Latency": 1, +# CHECK-NEXT: "NumMicroOpcodes": 1, +# CHECK-NEXT: "RThroughput": 0.25, +# CHECK-NEXT: "hasUnmodeledSideEffects": false, +# CHECK-NEXT: "mayLoad": false, +# CHECK-NEXT: "mayStore": false +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "Instruction": 2, +# CHECK-NEXT: "Latency": 1, +# CHECK-NEXT: "NumMicroOpcodes": 1, +# CHECK-NEXT: "RThroughput": 0.25, +# CHECK-NEXT: "hasUnmodeledSideEffects": false, +# CHECK-NEXT: "mayLoad": false, +# CHECK-NEXT: "mayStore": false +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "Instruction": 3, +# CHECK-NEXT: "Latency": 1, +# CHECK-NEXT: "NumMicroOpcodes": 1, +# CHECK-NEXT: "RThroughput": 0.25, +# CHECK-NEXT: "hasUnmodeledSideEffects": false, +# CHECK-NEXT: "mayLoad": false, +# CHECK-NEXT: "mayStore": false +# CHECK-NEXT: } # CHECK-NEXT: ] -# CHECK-NEXT: } -# CHECK-NEXT: } -# CHECK-NEXT: { +# CHECK-NEXT: }, +# CHECK-NEXT: "Instructions and CPU resources": { +# CHECK-NEXT: "Instructions": [ +# CHECK-NEXT: "addl\t%eax, %eax", +# CHECK-NEXT: "addl\t%ebx, %ebx", +# CHECK-NEXT: "addl\t%ecx, %ecx", +# CHECK-NEXT: "addl\t%edx, %edx" +# CHECK-NEXT: ], +# CHECK-NEXT: "Resources": { +# CHECK-NEXT: "CPUName": "haswell", +# CHECK-NEXT: "Resources": [ +# CHECK-NEXT: "HWDivider", +# CHECK-NEXT: "HWFPDivider", +# CHECK-NEXT: "HWPort0", +# CHECK-NEXT: "HWPort1", +# CHECK-NEXT: "HWPort2", +# CHECK-NEXT: "HWPort3", +# CHECK-NEXT: "HWPort4", +# CHECK-NEXT: "HWPort5", +# CHECK-NEXT: "HWPort6", +# CHECK-NEXT: "HWPort7" +# CHECK-NEXT: ] +# CHECK-NEXT: } +# CHECK-NEXT: }, +# CHECK-NEXT: "ResourcePressureView": { +# CHECK-NEXT: "ResourcePressureInfo": [ +# CHECK-NEXT: { +# CHECK-NEXT: "InstructionIndex": 0, +# CHECK-NEXT: "ResourceIndex": 8, +# CHECK-NEXT: "ResourceUsage": 1 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "InstructionIndex": 1, +# CHECK-NEXT: "ResourceIndex": 7, +# CHECK-NEXT: "ResourceUsage": 1 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "InstructionIndex": 2, +# CHECK-NEXT: "ResourceIndex": 3, +# CHECK-NEXT: "ResourceUsage": 1 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "InstructionIndex": 3, +# CHECK-NEXT: "ResourceIndex": 2, +# CHECK-NEXT: "ResourceUsage": 1 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "InstructionIndex": 4, +# CHECK-NEXT: "ResourceIndex": 2, +# CHECK-NEXT: "ResourceUsage": 1 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "InstructionIndex": 4, +# CHECK-NEXT: "ResourceIndex": 3, +# CHECK-NEXT: "ResourceUsage": 1 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "InstructionIndex": 4, +# CHECK-NEXT: "ResourceIndex": 7, +# CHECK-NEXT: "ResourceUsage": 1 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "InstructionIndex": 4, +# CHECK-NEXT: "ResourceIndex": 8, +# CHECK-NEXT: "ResourceUsage": 1 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: }, # CHECK-NEXT: "SummaryView": { # CHECK-NEXT: "BlockRThroughput": 1, # CHECK-NEXT: "DispatchWidth": 4, @@ -42,119 +139,37 @@ add %edx, %edx # CHECK-NEXT: "TotalCycles": 103, # CHECK-NEXT: "TotaluOps": 400, # CHECK-NEXT: "uOpsPerCycle": 3.883495145631068 -# CHECK-NEXT: } -# CHECK-NEXT: } -# CHECK-NEXT: [ -# CHECK-NEXT: { -# CHECK-NEXT: "Instruction": 0, -# CHECK-NEXT: "Latency": 1, -# CHECK-NEXT: "NumMicroOpcodes": 1, -# CHECK-NEXT: "RThroughput": 0.25, -# CHECK-NEXT: "hasUnmodeledSideEffects": false, -# CHECK-NEXT: "mayLoad": false, -# CHECK-NEXT: "mayStore": false -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "Instruction": 1, -# CHECK-NEXT: "Latency": 1, -# CHECK-NEXT: "NumMicroOpcodes": 1, -# CHECK-NEXT: "RThroughput": 0.25, -# CHECK-NEXT: "hasUnmodeledSideEffects": false, -# CHECK-NEXT: "mayLoad": false, -# CHECK-NEXT: "mayStore": false -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "Instruction": 2, -# CHECK-NEXT: "Latency": 1, -# CHECK-NEXT: "NumMicroOpcodes": 1, -# CHECK-NEXT: "RThroughput": 0.25, -# CHECK-NEXT: "hasUnmodeledSideEffects": false, -# CHECK-NEXT: "mayLoad": false, -# CHECK-NEXT: "mayStore": false # CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "Instruction": 3, -# CHECK-NEXT: "Latency": 1, -# CHECK-NEXT: "NumMicroOpcodes": 1, -# CHECK-NEXT: "RThroughput": 0.25, -# CHECK-NEXT: "hasUnmodeledSideEffects": false, -# CHECK-NEXT: "mayLoad": false, -# CHECK-NEXT: "mayStore": false +# CHECK-NEXT: "TimelineView": { +# CHECK-NEXT: "TimelineInfo": [ +# CHECK-NEXT: { +# CHECK-NEXT: "CycleDispatched": 0, +# CHECK-NEXT: "CycleExecuted": 2, +# CHECK-NEXT: "CycleIssued": 1, +# CHECK-NEXT: "CycleReady": 0, +# CHECK-NEXT: "CycleRetired": 3 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "CycleDispatched": 0, +# CHECK-NEXT: "CycleExecuted": 2, +# CHECK-NEXT: "CycleIssued": 1, +# CHECK-NEXT: "CycleReady": 0, +# CHECK-NEXT: "CycleRetired": 3 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "CycleDispatched": 0, +# CHECK-NEXT: "CycleExecuted": 2, +# CHECK-NEXT: "CycleIssued": 1, +# CHECK-NEXT: "CycleReady": 0, +# CHECK-NEXT: "CycleRetired": 3 +# CHECK-NEXT: }, +# CHECK-NEXT: { +# CHECK-NEXT: "CycleDispatched": 0, +# CHECK-NEXT: "CycleExecuted": 2, +# CHECK-NEXT: "CycleIssued": 1, +# CHECK-NEXT: "CycleReady": 0, +# CHECK-NEXT: "CycleRetired": 3 +# CHECK-NEXT: } +# CHECK-NEXT: ] # CHECK-NEXT: } -# CHECK-NEXT: ] -# CHECK-NEXT: { -# CHECK-NEXT: "ResourcePressureInfo": [ -# CHECK-NEXT: { -# CHECK-NEXT: "InstructionIndex": 0, -# CHECK-NEXT: "ResourceIndex": 8, -# CHECK-NEXT: "ResourceUsage": 1 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "InstructionIndex": 1, -# CHECK-NEXT: "ResourceIndex": 7, -# CHECK-NEXT: "ResourceUsage": 1 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "InstructionIndex": 2, -# CHECK-NEXT: "ResourceIndex": 3, -# CHECK-NEXT: "ResourceUsage": 1 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "InstructionIndex": 3, -# CHECK-NEXT: "ResourceIndex": 2, -# CHECK-NEXT: "ResourceUsage": 1 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "InstructionIndex": 4, -# CHECK-NEXT: "ResourceIndex": 2, -# CHECK-NEXT: "ResourceUsage": 1 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "InstructionIndex": 4, -# CHECK-NEXT: "ResourceIndex": 3, -# CHECK-NEXT: "ResourceUsage": 1 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "InstructionIndex": 4, -# CHECK-NEXT: "ResourceIndex": 7, -# CHECK-NEXT: "ResourceUsage": 1 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "InstructionIndex": 4, -# CHECK-NEXT: "ResourceIndex": 8, -# CHECK-NEXT: "ResourceUsage": 1 -# CHECK-NEXT: } -# CHECK-NEXT: ] -# CHECK-NEXT: } -# CHECK-NEXT: { -# CHECK-NEXT: "TimelineInfo": [ -# CHECK-NEXT: { -# CHECK-NEXT: "CycleDispatched": 0, -# CHECK-NEXT: "CycleExecuted": 2, -# CHECK-NEXT: "CycleIssued": 1, -# CHECK-NEXT: "CycleReady": 0, -# CHECK-NEXT: "CycleRetired": 3 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "CycleDispatched": 0, -# CHECK-NEXT: "CycleExecuted": 2, -# CHECK-NEXT: "CycleIssued": 1, -# CHECK-NEXT: "CycleReady": 0, -# CHECK-NEXT: "CycleRetired": 3 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "CycleDispatched": 0, -# CHECK-NEXT: "CycleExecuted": 2, -# CHECK-NEXT: "CycleIssued": 1, -# CHECK-NEXT: "CycleReady": 0, -# CHECK-NEXT: "CycleRetired": 3 -# CHECK-NEXT: }, -# CHECK-NEXT: { -# CHECK-NEXT: "CycleDispatched": 0, -# CHECK-NEXT: "CycleExecuted": 2, -# CHECK-NEXT: "CycleIssued": 1, -# CHECK-NEXT: "CycleReady": 0, -# CHECK-NEXT: "CycleRetired": 3 -# CHECK-NEXT: } -# CHECK-NEXT: ] # CHECK-NEXT: } diff --git a/llvm/tools/llvm-mca/PipelinePrinter.cpp b/llvm/tools/llvm-mca/PipelinePrinter.cpp index e7dfbfdce26de..8ca689fefc51d 100644 --- a/llvm/tools/llvm-mca/PipelinePrinter.cpp +++ b/llvm/tools/llvm-mca/PipelinePrinter.cpp @@ -18,8 +18,18 @@ namespace llvm { namespace mca { void PipelinePrinter::printReport(llvm::raw_ostream &OS) const { - for (const auto &V : Views) - V->printView(OutputKind, OS); + json::Object JO; + for (const auto &V : Views) { + if ((OutputKind == View::OK_JSON)) { + if (V->isSerializable()) { + JO.try_emplace(V->getNameAsString().str(), V->toJSON()); + } + } else { + V->printView(OS); + } + } + if (OutputKind == View::OK_JSON) + OS << formatv("{0:2}", json::Value(std::move(JO))) << "\n"; } } // namespace mca. } // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h index 427937d9e3d78..73401c62b7f39 100644 --- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -333,7 +333,7 @@ class BottleneckAnalysis : public InstructionView { void printView(raw_ostream &OS) const override; StringRef getNameAsString() const override { return "BottleneckAnalysis"; } - json::Value toJSON() const override { return "not implemented"; } + bool isSerializable() const override { return false; } #ifndef NDEBUG void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); } diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp index d5e4171ef1fae..9d3cdf81a504e 100644 --- a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp +++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp @@ -84,5 +84,16 @@ void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const { OS << Buffer; } +json::Value DispatchStatistics::toJSON() const { + json::Object JO({{"RAT", HWStalls[HWStallEvent::RegisterFileStall]}, + {"RCU", HWStalls[HWStallEvent::RetireControlUnitStall]}, + {"SCHEDQ", HWStalls[HWStallEvent::SchedulerQueueFull]}, + {"LQ", HWStalls[HWStallEvent::LoadQueueFull]}, + {"SQ", HWStalls[HWStallEvent::StoreQueueFull]}, + {"GROUP", HWStalls[HWStallEvent::DispatchGroupStall]}, + {"USH", HWStalls[HWStallEvent::CustomBehaviourStall]}}); + return JO; +} + } // namespace mca } // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h index 8d999fb0acfeb..81b582f74a6b9 100644 --- a/llvm/tools/llvm-mca/Views/DispatchStatistics.h +++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h @@ -79,6 +79,7 @@ class DispatchStatistics : public View { printDispatchHistogram(OS); } StringRef getNameAsString() const override { return "DispatchStatistics"; } + json::Value toJSON() const override; }; } // namespace mca } // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp index 52a29ff1f5ff9..3f6abf4af2cf6 100644 --- a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -147,7 +147,7 @@ json::Value InstructionInfoView::toJSON() const { JO.try_emplace("Instruction", (unsigned)I.index()); InstInfo.push_back(std::move(JO)); } - return json::Value(std::move(InstInfo)); + return json::Object({{"InstructionList", json::Value(std::move(InstInfo))}}); } } // namespace mca. } // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h index cf384dbfe3378..ec5c5f431e127 100644 --- a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h @@ -76,6 +76,7 @@ class RegisterFileStatistics : public View { StringRef getNameAsString() const override { return "RegisterFileStatistics"; } + bool isSerializable() const override { return false; } }; } // namespace mca } // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h index 662a223662e66..86b46e93aa7ce 100644 --- a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h +++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h @@ -55,6 +55,7 @@ class RetireControlUnitStatistics : public View { StringRef getNameAsString() const override { return "RetireControlUnitStatistics"; } + bool isSerializable() const override { return false; } }; } // namespace mca diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h index 734046c3112f8..66f4b0011866d 100644 --- a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h +++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h @@ -89,6 +89,7 @@ class SchedulerStatistics final : public View { void printView(llvm::raw_ostream &OS) const override; StringRef getNameAsString() const override { return "SchedulerStatistics"; } + bool isSerializable() const override { return false; } }; } // namespace mca } // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/View.h b/llvm/tools/llvm-mca/Views/View.h index 85464bfda662c..8eeb25d21dbfa 100644 --- a/llvm/tools/llvm-mca/Views/View.h +++ b/llvm/tools/llvm-mca/Views/View.h @@ -43,6 +43,7 @@ class View : public HWEventListener { virtual ~View() = default; virtual StringRef getNameAsString() const = 0; virtual json::Value toJSON() const { return "not implemented"; } + virtual bool isSerializable() const { return true; } void anchor() override; }; } // namespace mca From e9641c911ef4127da1d98c4e4d37039989e6052b Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Thu, 1 Jul 2021 12:34:50 +0100 Subject: [PATCH 412/619] [DebugInfo][InstrRef][2/4] Use subreg substitutions in LiveDebugValues Added in 47c3fe2a22cf, we sometimes need to describe a variable value substitution with a subregister qualifier, to say that "the value is the lower 32 bits of this 64 bit register def" for example. That then needs support during LiveDebugValues to interpret the subregister qualifiers, which is what this patch adds. Whenever we encounter a DBG_INSTR_REF and find its value by using a substitution, collect any subregister qualifiers seen. Then, accumulate the effects of the qualifiers to work out what offset and what size should be extracted from the defined register. Finally, for the target ValueIDNum, extract whatever subregister is in the correct position Currently, describing a subregister field of a larger value that has been spilt to the stack, is unimplemented. Differential Revision: https://reviews.llvm.org/D88894 --- .../LiveDebugValues/InstrRefBasedImpl.cpp | 79 ++++++++++++- .../livedebugvalues_subreg_substitutions.mir | 106 ++++++++++++++++++ 2 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_subreg_substitutions.mir diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index b92614ee124d0..b8fa02860ff16 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -1829,9 +1829,13 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // recorded in the value substitution table. Apply any substitutions to // the instruction / operand number in this DBG_INSTR_REF. auto Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo)); + // Collect any subregister extractions performed during optimization. + SmallVector SeenSubregs; while (Sub != MF.DebugValueSubstitutions.end()) { - InstNo = Sub->second.Dest.first; - OpNo = Sub->second.Dest.second; + std::tie(InstNo, OpNo) = Sub->second.Dest; + unsigned Subreg = Sub->second.Subreg; + if (Subreg) + SeenSubregs.push_back(Subreg); Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo)); } @@ -1865,6 +1869,77 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, MI, InstNo); } + // Apply any subregister extractions, in reverse. We might have seen code + // like this: + // CALL64 @foo, implicit-def $rax + // %0:gr64 = COPY $rax + // %1:gr32 = COPY %0.sub_32bit + // %2:gr16 = COPY %1.sub_16bit + // %3:gr8 = COPY %2.sub_8bit + // In which case each copy would have been recorded as a substitution with + // a subregister qualifier. Apply those qualifiers now. + if (NewID && !SeenSubregs.empty()) { + unsigned Offset = 0; + unsigned Size = 0; + + // Look at each subregister that we passed through, and progressively + // narrow in, accumulating any offsets that occur. Substitutions should + // only ever be the same or narrower width than what they read from; + // iterate in reverse order so that we go from wide to small. + for (unsigned Subreg : reverse(SeenSubregs)) { + unsigned ThisSize = TRI->getSubRegIdxSize(Subreg); + unsigned ThisOffset = TRI->getSubRegIdxOffset(Subreg); + Offset += ThisOffset; + Size = (Size == 0) ? ThisSize : std::min(Size, ThisSize); + } + + // If that worked, look for an appropriate subregister with the register + // where the define happens. Don't look at values that were defined during + // a stack write: we can't currently express register locations within + // spills. + LocIdx L = NewID->getLoc(); + if (NewID && !MTracker->isSpill(L)) { + // Find the register class for the register where this def happened. + // FIXME: no index for this? + Register Reg = MTracker->LocIdxToLocID[L]; + const TargetRegisterClass *TRC = nullptr; + for (auto *TRCI : TRI->regclasses()) + if (TRCI->contains(Reg)) + TRC = TRCI; + assert(TRC && "Couldn't find target register class?"); + + // If the register we have isn't the right size or in the right place, + // Try to find a subregister inside it. + unsigned MainRegSize = TRI->getRegSizeInBits(*TRC); + if (Size != MainRegSize || Offset) { + // Enumerate all subregisters, searching. + Register NewReg = 0; + for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { + unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI); + unsigned SubregSize = TRI->getSubRegIdxSize(Subreg); + unsigned SubregOffset = TRI->getSubRegIdxOffset(Subreg); + if (SubregSize == Size && SubregOffset == Offset) { + NewReg = *SRI; + break; + } + } + + // If we didn't find anything: there's no way to express our value. + if (!NewReg) { + NewID = None; + } else { + // Re-state the value as being defined within the subregister + // that we found. + LocIdx NewLoc = MTracker->lookupOrTrackRegister(NewReg); + NewID = ValueIDNum(NewID->getBlock(), NewID->getInst(), NewLoc); + } + } + } else { + // If we can't handle subregisters, unset the new value. + NewID = None; + } + } + // We, we have a value number or None. Tell the variable value tracker about // it. The rest of this LiveDebugValues implementation acts exactly the same // for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that diff --git a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_subreg_substitutions.mir b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_subreg_substitutions.mir new file mode 100644 index 0000000000000..df160699402ec --- /dev/null +++ b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_subreg_substitutions.mir @@ -0,0 +1,106 @@ +# RUN: llc %s -march=x86-64 -run-pass=livedebugvalues -experimental-debug-variable-locations -o - 2>&1 | FileCheck %s +# +# Test that when we have a subregister qualifiers in substitutions, that +# InstrRefBasedLDV correctly applies them to the variable location. Below, a +# call defines all of $rax, but the variable locations should only apply to +# the low order 8 bits. +--- | + define i8 @test(i32 %bar) local_unnamed_addr !dbg !7 { + entry: + ret i8 0, !dbg !12 + } + + declare dso_local void @ext(i64) + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5, !6} + !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) + !1 = !DIFile(filename: "foo.cpp", directory: ".") + !2 = !DIBasicType(name: "int", size: 8, encoding: DW_ATE_signed) + !3 = !{i32 2, !"Dwarf Version", i32 4} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"wchar_size", i32 2} + !6 = !{i32 7, !"PIC Level", i32 2} + !7 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !10) + !8 = !DISubroutineType(types: !9) + !9 = !{!2, !2} + !10 = !{!11} + !11 = !DILocalVariable(name: "baz", scope: !7, file: !1, line: 7, type: !2) + !12 = !DILocation(line: 10, scope: !7) +... +--- +name: test +tracksRegLiveness: true +liveins: + - { reg: '$rdi', virtual-reg: '' } +debugValueSubstitutions: + - { srcinst: 1, srcop: 0, dstinst: 2, dstop: 0, subreg: 1 } # sub_8bit + - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 4 } # sub_16bit + - { srcinst: 3, srcop: 0, dstinst: 4, dstop: 5, subreg: 6 } # sub_32bit + # Substitution involving sub_8bit_hi, should land in $ah + - { srcinst: 5, srcop: 0, dstinst: 6, dstop: 0, subreg: 2 } # sub_8bit_hi + - { srcinst: 6, srcop: 0, dstinst: 7, dstop: 0, subreg: 4 } # sub_16bit + - { srcinst: 7, srcop: 0, dstinst: 4, dstop: 5, subreg: 6 } # sub_32bit + # Several redundant substitutions, representing extractions from a small + # register, followed by larger spurious ones, for example: + # %0:gr64 = COPY $rax + # %1:gr32 = COPY %0.sub_32bit + # %2:gr16 = COPY %1.sub_16bit + # %3:gr64 = SUBREG_TO_REG %2, sub_8bit_hi + # %4:gr32 = COPY %3.sub_32bit + # %5:gr16 = COPY %2.sub_16bit + # Should still come out as ah. + - { srcinst: 8, srcop: 0, dstinst: 9, dstop: 0, subreg: 4 } # sub_16bit + - { srcinst: 9, srcop: 0, dstinst: 10,dstop: 0, subreg: 6 } # sub_32bit + - { srcinst: 10,srcop: 0, dstinst: 11,dstop: 0, subreg: 2 } # sub_8bit_hi + - { srcinst: 11,srcop: 0, dstinst: 12,dstop: 0, subreg: 4 } # sub_16bit + - { srcinst: 12,srcop: 0, dstinst: 4, dstop: 5, subreg: 6 } # sub_32bit + # If some kind of really mal-formed code appears that extracts the high bits + # out of a too-small location, we should drop it. It's not clear whether this + # scenario could ever happen; but if it did, best to not emit a known bad + # variable location. Should generate a DBG_VALUE $noreg. + - { srcinst: 13, srcop: 0, dstinst: 14,dstop: 0, subreg: 5 } # sub_16bit_hi + - { srcinst: 14, srcop: 0, dstinst: 15,dstop: 0, subreg: 6 } # sub_32bit + - { srcinst: 15, srcop: 0, dstinst: 4, dstop: 5, subreg: 1 } # sub_8bit +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +body: | + bb.0: + liveins: $rdi, $rax + CALL64pcrel32 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rax, debug-instr-number 4, debug-location !12 + ; CHECK: CALL64pcrel32 + DBG_INSTR_REF 1, 0, !11, !DIExpression(), debug-location !12 + ; CHECK-NEXT: DBG_INSTR_REF 1, 0 + ; CHECK-NEXT: DBG_VALUE $al + DBG_INSTR_REF 5, 0, !11, !DIExpression(), debug-location !12 + ; CHECK-NEXT: DBG_INSTR_REF 5, 0 + ; CHECK-NEXT: DBG_VALUE $ah + DBG_INSTR_REF 8, 0, !11, !DIExpression(), debug-location !12 + ; CHECK-NEXT: DBG_INSTR_REF 8, 0 + ; CHECK-NEXT: DBG_VALUE $ah + DBG_INSTR_REF 13, 0, !11, !DIExpression(), debug-location !12 + ; CHECK-NEXT: DBG_INSTR_REF 13, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + MOV64mr $rsp, 1, $noreg, 16, $noreg, $rax :: (store 8 into %stack.0) + $rax = MOV64ri 0, debug-location !12 + ; CHECK: $rax = MOV64ri 0 + ; The value is now located in a spill slot; currently InstrRefBasedLDV + ; can't express subregister locations inside spills. These should all + ; end up being $noreg, but could be improved in the future. + DBG_INSTR_REF 1, 0, !11, !DIExpression(), debug-location !12 + ; CHECK-NEXT: DBG_INSTR_REF 1, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + DBG_INSTR_REF 5, 0, !11, !DIExpression(), debug-location !12 + ; CHECK-NEXT: DBG_INSTR_REF 5, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + DBG_INSTR_REF 8, 0, !11, !DIExpression(), debug-location !12 + ; CHECK-NEXT: DBG_INSTR_REF 8, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + DBG_INSTR_REF 13, 0, !11, !DIExpression(), debug-location !12 + ; CHECK-NEXT: DBG_INSTR_REF 13, 0 + ; CHECK-NEXT: DBG_VALUE $noreg + $rax = MOV64rm $rsp, 1, $noreg, 8, $noreg :: (load 8 from %stack.0) + RETQ $rax, debug-location !12 +... From cf76569f6a617b782373548a6b030ab3dd44c6af Mon Sep 17 00:00:00 2001 From: Irina Dobrescu Date: Thu, 17 Jun 2021 12:41:23 +0100 Subject: [PATCH 413/619] [AArch64] Add test for min/max Differential Revision: https://reviews.llvm.org/D104447 --- llvm/test/CodeGen/AArch64/min-max.ll | 609 +++++++++++++++++++++++++++ 1 file changed, 609 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/min-max.ll diff --git a/llvm/test/CodeGen/AArch64/min-max.ll b/llvm/test/CodeGen/AArch64/min-max.ll new file mode 100644 index 0000000000000..ec4998bbc0234 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/min-max.ll @@ -0,0 +1,609 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s + +; These tests just check that the plumbing is in place for @llvm.smax, @llvm.umax, +; @llvm.smin, @llvm.umin. + +declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone + +define i8 @smaxi8(i8 %a, i8 %b) { +; CHECK-LABEL: smaxi8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: ret + %c = call i8 @llvm.smax.i8(i8 %a, i8 %b) + ret i8 %c +} + +declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone + +define i16 @smaxi16(i16 %a, i16 %b) { +; CHECK-LABEL: smaxi16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: ret + %c = call i16 @llvm.smax.i16(i16 %a, i16 %b) + ret i16 %c +} + +declare i32 @llvm.smax.i32(i32 %a, i32 %b) readnone + +define i32 @smaxi32(i32 %a, i32 %b) { +; CHECK-LABEL: smaxi32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: csel w0, w0, w1, gt +; CHECK-NEXT: ret + %c = call i32 @llvm.smax.i32(i32 %a, i32 %b) + ret i32 %c +} + +declare i64 @llvm.smax.i64(i64 %a, i64 %b) readnone + +define i64 @smaxi64(i64 %a, i64 %b) { +; CHECK-LABEL: smaxi64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, x1 +; CHECK-NEXT: csel x0, x0, x1, gt +; CHECK-NEXT: ret + %c = call i64 @llvm.smax.i64(i64 %a, i64 %b) + ret i64 %c +} + +declare <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone + +define <8 x i8> @smax8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: smax8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %c = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %c +} + +declare <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b) readnone + +define <16 x i8> @smax16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: smax16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %c = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %c +} + +declare <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b) readnone + +define <4 x i16> @smax4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: smax4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %c = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %c +} + +declare <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b) readnone + +define <8 x i16> @smax8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: smax8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %c = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %c +} + +declare <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) readnone + +define <2 x i32> @smax2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: smax2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %c = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %c +} + +declare <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) readnone + +define <4 x i32> @smax4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: smax4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %c = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %c +} + +declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone + +define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: smax1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x8, x9, x8, gt +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %c +} + +declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone + +define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: smax2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, v1.d[1] +; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x8, x9, x8, gt +; CHECK-NEXT: cmp x11, x10 +; CHECK-NEXT: csel x9, x11, x10, gt +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %c +} + +declare i8 @llvm.umax.i8(i8 %a, i8 %b) readnone + +define i8 @umaxi8(i8 %a, i8 %b) { +; CHECK-LABEL: umaxi8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, hi +; CHECK-NEXT: ret + %c = call i8 @llvm.umax.i8(i8 %a, i8 %b) + ret i8 %c +} + +declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone + +define i16 @umaxi16(i16 %a, i16 %b) { +; CHECK-LABEL: umaxi16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, hi +; CHECK-NEXT: ret + %c = call i16 @llvm.umax.i16(i16 %a, i16 %b) + ret i16 %c +} + +declare i32 @llvm.umax.i32(i32 %a, i32 %b) readnone + +define i32 @umaxi32(i32 %a, i32 %b) { +; CHECK-LABEL: umaxi32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: csel w0, w0, w1, hi +; CHECK-NEXT: ret + %c = call i32 @llvm.umax.i32(i32 %a, i32 %b) + ret i32 %c +} + +declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone + +define i64 @umaxi64(i64 %a, i64 %b) { +; CHECK-LABEL: umaxi64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, x1 +; CHECK-NEXT: csel x0, x0, x1, hi +; CHECK-NEXT: ret + %c = call i64 @llvm.umax.i64(i64 %a, i64 %b) + ret i64 %c +} + +declare <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone + +define <8 x i8> @umax8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: umax8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %c = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %c +} + +declare <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b) readnone + +define <16 x i8> @umax16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: umax16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %c = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %c +} + +declare <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b) readnone + +define <4 x i16> @umax4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: umax4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %c = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %c +} + +declare <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b) readnone + +define <8 x i16> @umax8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: umax8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %c = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %c +} + +declare <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) readnone + +define <2 x i32> @umax2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: umax2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %c = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %c +} + +declare <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) readnone + +define <4 x i32> @umax4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: umax4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %c = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %c +} + +declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone + +define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: umax1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x8, x9, x8, hi +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %c +} + +declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone + +define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: umax2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub v1.2d, v1.2d, v0.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %c +} + +declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone + +define i8 @smini8(i8 %a, i8 %b) { +; CHECK-LABEL: smini8: +; CHECK: // %bb.0: +; CHECK-NEXT: sxtb w8, w1 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, lt +; CHECK-NEXT: ret + %c = call i8 @llvm.smin.i8(i8 %a, i8 %b) + ret i8 %c +} + +declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone + +define i16 @smini16(i16 %a, i16 %b) { +; CHECK-LABEL: smini16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth w8, w1 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, lt +; CHECK-NEXT: ret + %c = call i16 @llvm.smin.i16(i16 %a, i16 %b) + ret i16 %c +} + +declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone + +define i32 @smini32(i32 %a, i32 %b) { +; CHECK-LABEL: smini32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: csel w0, w0, w1, lt +; CHECK-NEXT: ret + %c = call i32 @llvm.smin.i32(i32 %a, i32 %b) + ret i32 %c +} + +declare i64 @llvm.smin.i64(i64 %a, i64 %b) readnone + +define i64 @smini64(i64 %a, i64 %b) { +; CHECK-LABEL: smini64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, x1 +; CHECK-NEXT: csel x0, x0, x1, lt +; CHECK-NEXT: ret + %c = call i64 @llvm.smin.i64(i64 %a, i64 %b) + ret i64 %c +} + +declare <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b) readnone + +define <8 x i8> @smin8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: smin8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %c = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %c +} + +declare <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b) readnone + +define <16 x i8> @smin16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: smin16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %c = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %c +} + +declare <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b) readnone + +define <4 x i16> @smin4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: smin4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %c = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %c +} + +declare <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b) readnone + +define <8 x i16> @smin8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: smin8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %c = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %c +} + +declare <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) readnone + +define <2 x i32> @smin2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: smin2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %c = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %c +} + +declare <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) readnone + +define <4 x i32> @smin4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: smin4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %c = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %c +} + +declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone + +define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: smin1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x8, x9, x8, lt +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %c +} + +declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone + +define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: smin2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, v1.d[1] +; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x8, x9, x8, lt +; CHECK-NEXT: cmp x11, x10 +; CHECK-NEXT: csel x9, x11, x10, lt +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret + %c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %c +} + +declare i8 @llvm.umin.i8(i8 %a, i8 %b) readnone + +define i8 @umini8(i8 %a, i8 %b) { +; CHECK-LABEL: umini8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: ret + %c = call i8 @llvm.umin.i8(i8 %a, i8 %b) + ret i8 %c +} + +declare i16 @llvm.umin.i16(i16 %a, i16 %b) readnone + +define i16 @umini16(i16 %a, i16 %b) { +; CHECK-LABEL: umini16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: ret + %c = call i16 @llvm.umin.i16(i16 %a, i16 %b) + ret i16 %c +} + +declare i32 @llvm.umin.i32(i32 %a, i32 %b) readnone + +define i32 @umini32(i32 %a, i32 %b) { +; CHECK-LABEL: umini32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: csel w0, w0, w1, lo +; CHECK-NEXT: ret + %c = call i32 @llvm.umin.i32(i32 %a, i32 %b) + ret i32 %c +} + +declare i64 @llvm.umin.i64(i64 %a, i64 %b) readnone + +define i64 @umini64(i64 %a, i64 %b) { +; CHECK-LABEL: umini64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, x1 +; CHECK-NEXT: csel x0, x0, x1, lo +; CHECK-NEXT: ret + %c = call i64 @llvm.umin.i64(i64 %a, i64 %b) + ret i64 %c +} + +declare <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b) readnone + +define <8 x i8> @umin8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: umin8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %c = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b) + ret <8 x i8> %c +} + +declare <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b) readnone + +define <16 x i8> @umin16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: umin16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %c = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %c +} + +declare <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b) readnone + +define <4 x i16> @umin4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: umin4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %c = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %c +} + +declare <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b) readnone + +define <8 x i16> @umin8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: umin8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %c = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %c +} + +declare <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) readnone + +define <2 x i32> @umin2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: umin2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %c = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %c +} + +declare <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b) readnone + +define <4 x i32> @umin4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: umin4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %c = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %c +} + +declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone + +define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: umin1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) + ret <1 x i64> %c +} + +declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone + +define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: umin2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub v1.2d, v0.2d, v1.2d +; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %c +} From 314e456dfe85f8b5c53b85a7d815f7d463fe02ef Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 30 Jun 2021 10:15:59 +0200 Subject: [PATCH 414/619] [clang] Fix CallExpr dependence bit may not respect all its arguments. Before this patch, the dependence of CallExpr was only computed in the constructor, the dependence bits might not reflect truth -- some arguments might be not set (nullptr) during this time, e.g. CXXDefaultArgExpr will be set via the setArg method in the later parsing stage, so we need to recompute the dependence bits. --- clang/include/clang/AST/Expr.h | 11 +++++++++++ clang/lib/AST/Expr.cpp | 2 +- clang/lib/Sema/SemaExpr.cpp | 2 ++ clang/test/SemaCXX/recovery-expr-type.cpp | 5 +++-- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index ddef2564a43cd..06164411cc2d4 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -2987,11 +2987,22 @@ class CallExpr : public Expr { } /// setArg - Set the specified argument. + /// ! the dependence bits might be stale after calling this setter, it is + /// *caller*'s responsibility to recompute them by calling + /// computeDependence(). void setArg(unsigned Arg, Expr *ArgExpr) { assert(Arg < getNumArgs() && "Arg access out of range!"); getArgs()[Arg] = ArgExpr; } + /// Compute and set dependence bits. + void computeDependence() { + setDependence(clang::computeDependence( + this, llvm::makeArrayRef( + reinterpret_cast(getTrailingStmts() + PREARGS_START), + getNumPreArgs()))); + } + /// Reduce the number of arguments in this call expression. This is used for /// example during error recovery to drop extra arguments. There is no way /// to perform the opposite because: 1.) We don't track how much storage diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 613abb7383dad..03dc65eeb6b03 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1398,7 +1398,7 @@ CallExpr::CallExpr(StmtClass SC, Expr *Fn, ArrayRef PreArgs, for (unsigned I = Args.size(); I != NumArgs; ++I) setArg(I, nullptr); - setDependence(computeDependence(this, PreArgs)); + this->computeDependence(); CallExprBits.HasFPFeatures = FPFeatures.requiresTrailingStorage(); if (hasStoredFPFeatures()) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3df74b5ea9dbc..a3a26d21422f0 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5928,6 +5928,7 @@ Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, for (unsigned i = 0; i < TotalNumArgs; ++i) Call->setArg(i, AllArgs[i]); + Call->computeDependence(); return false; } @@ -6863,6 +6864,7 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, TheCall->setArg(i, Arg); } + TheCall->computeDependence(); } if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) diff --git a/clang/test/SemaCXX/recovery-expr-type.cpp b/clang/test/SemaCXX/recovery-expr-type.cpp index d3ac772db0089..509cd17459762 100644 --- a/clang/test/SemaCXX/recovery-expr-type.cpp +++ b/clang/test/SemaCXX/recovery-expr-type.cpp @@ -139,6 +139,7 @@ void baz() { namespace test12 { // Verify we do not crash. -void fun(int *foo = no_such_function()); // expected-error {{undeclared identifier}} -void baz() { fun(); } +int fun(int *foo = no_such_function()); // expected-error {{undeclared identifier}} +void crash1() { fun(); } +void crash2() { constexpr int s = fun(); } } // namespace test12 From b122ff71f3084d70dea7bf75c08414bf4bc1b38d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 1 Jul 2021 13:10:26 +0100 Subject: [PATCH 415/619] [AArch64] Add fp16 tests for vector copysign. Add additional fp16 vector tests for copysign, to show improvements in follow-up patch. --- llvm/test/CodeGen/AArch64/vector-fcopysign.ll | 353 +++++++++++++++++- 1 file changed, 352 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll index 00518c05b27fa..47c9e34396873 100644 --- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s +; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck --check-prefixes=CHECK,NOFP16 %s +; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra -mattr=+v8.2a,+fullfp16 | FileCheck --check-prefixes=CHECK,FP16 %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -162,4 +163,354 @@ define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0 +;============ v4f16 + +define <4 x half> @test_copysign_v4f16_v4f16(<4 x half> %a, <4 x half> %b) #0 { +; CHECK-LABEL: test_copysign_v4f16_v4f16: +; NOFP16-NEXT: mov h2, v1[1] +; NOFP16-NEXT: mov h3, v0[1] +; NOFP16-NEXT: movi.4s v4, #128, lsl #24 +; NOFP16-NEXT: fcvt s5, h1 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: bit.16b v6, v5, v4 +; NOFP16-NEXT: mov h5, v1[2] +; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: fcvt s3, h3 +; NOFP16-NEXT: bit.16b v3, v2, v4 +; NOFP16-NEXT: mov h2, v0[2] +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: bit.16b v2, v5, v4 +; NOFP16-NEXT: mov h1, v1[3] +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: fcvt s1, h1 +; NOFP16-NEXT: fcvt s5, h0 +; NOFP16-NEXT: fcvt h0, s6 +; NOFP16-NEXT: bit.16b v5, v1, v4 +; NOFP16-NEXT: fcvt h1, s3 +; NOFP16-NEXT: fcvt h2, s2 +; NOFP16-NEXT: mov.h v0[1], v1[0] +; NOFP16-NEXT: mov.h v0[2], v2[0] +; NOFP16-NEXT: fcvt h1, s5 +; NOFP16-NEXT: mov.h v0[3], v1[0] +; NOFP16-NEXT: ret + +; FP16-NEXT: mov h2, v1[1] +; FP16-NEXT: mov h3, v0[1] +; FP16-NEXT: movi.8h v4, #128, lsl #8 +; FP16-NEXT: mov h5, v1[2] +; FP16-NEXT: bit.16b v3, v2, v4 +; FP16-NEXT: mov h2, v0[2] +; FP16-NEXT: bit.16b v2, v5, v4 +; FP16-NEXT: mov h5, v0[3] +; FP16-NEXT: bit.16b v0, v1, v4 +; FP16-NEXT: mov h1, v1[3] +; FP16-NEXT: mov.h v0[1], v3[0] +; FP16-NEXT: mov.h v0[2], v2[0] +; FP16-NEXT: bit.16b v5, v1, v4 +; FP16-NEXT: mov.h v0[3], v5[0] +; FP16-NEXT: ret + %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %r +} + +define <4 x half> @test_copysign_v4f16_v4f32(<4 x half> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_copysign_v4f16_v4f32: +; NOFP16-NEXT: fcvtn v1.4h, v1.4s +; NOFP16-NEXT: mov h2, v0[1] +; NOFP16-NEXT: movi.4s v3, #128, lsl #24 +; NOFP16-NEXT: fcvt s4, h0 +; NOFP16-NEXT: mov h5, v0[2] +; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: fcvt s6, h1 +; NOFP16-NEXT: bit.16b v4, v6, v3 +; NOFP16-NEXT: mov h6, v1[1] +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: bit.16b v2, v6, v3 +; NOFP16-NEXT: mov h6, v1[2] +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: bit.16b v5, v6, v3 +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: mov h0, v1[3] +; NOFP16-NEXT: fcvt s1, h0 +; NOFP16-NEXT: fcvt h0, s4 +; NOFP16-NEXT: bit.16b v6, v1, v3 +; NOFP16-NEXT: fcvt h1, s2 +; NOFP16-NEXT: fcvt h2, s5 +; NOFP16-NEXT: mov.h v0[1], v1[0] +; NOFP16-NEXT: mov.h v0[2], v2[0] +; NOFP16-NEXT: fcvt h1, s6 +; NOFP16-NEXT: mov.h v0[3], v1[0] +; NOFP16-NEXT: ret + +; FP16-NEXT: fcvtn v1.4h, v1.4s +; FP16-NEXT: mov h2, v0[1] +; FP16-NEXT: movi.8h v3, #128, lsl #8 +; FP16-NEXT: mov h4, v0[2] +; FP16-NEXT: mov h5, v1[1] +; FP16-NEXT: bit.16b v2, v5, v3 +; FP16-NEXT: mov h5, v1[2] +; FP16-NEXT: bit.16b v4, v5, v3 +; FP16-NEXT: mov h5, v0[3] +; FP16-NEXT: bit.16b v0, v1, v3 +; FP16-NEXT: mov h1, v1[3] +; FP16-NEXT: mov.h v0[1], v2[0] +; FP16-NEXT: mov.h v0[2], v4[0] +; FP16-NEXT: bit.16b v5, v1, v3 +; FP16-NEXT: mov.h v0[3], v5[0] +; FP16-NEXT: ret + %tmp0 = fptrunc <4 x float> %b to <4 x half> + %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) + ret <4 x half> %r +} + +define <4 x half> @test_copysign_v4f16_v4f64(<4 x half> %a, <4 x double> %b) #0 { +; CHECK-LABEL: test_copysign_v4f16_v4f64: +; NOFP16-NEXT: mov d3, v2[1] +; NOFP16-NEXT: mov d4, v1[1] +; NOFP16-NEXT: movi.4s v5, #128, lsl #24 +; NOFP16-NEXT: fcvt s1, d1 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: bit.16b v6, v1, v5 +; NOFP16-NEXT: mov h1, v0[1] +; NOFP16-NEXT: fcvt s2, d2 +; NOFP16-NEXT: fcvt s4, d4 +; NOFP16-NEXT: fcvt s1, h1 +; NOFP16-NEXT: bit.16b v1, v4, v5 +; NOFP16-NEXT: mov h4, v0[2] +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: fcvt s3, d3 +; NOFP16-NEXT: fcvt s7, h0 +; NOFP16-NEXT: fcvt h0, s6 +; NOFP16-NEXT: bit.16b v4, v2, v5 +; NOFP16-NEXT: bit.16b v7, v3, v5 +; NOFP16-NEXT: fcvt h1, s1 +; NOFP16-NEXT: fcvt h2, s4 +; NOFP16-NEXT: mov.h v0[1], v1[0] +; NOFP16-NEXT: mov.h v0[2], v2[0] +; NOFP16-NEXT: fcvt h1, s7 +; NOFP16-NEXT: mov.h v0[3], v1[0] +; NOFP16-NEXT: ret + +; FP16-NEXT: mov h3, v0[1] +; FP16-NEXT: movi.8h v4, #128, lsl #8 +; FP16-NEXT: fcvt h5, d1 +; FP16-NEXT: mov h6, v0[2] +; FP16-NEXT: mov h7, v0[3] +; FP16-NEXT: bit.16b v0, v5, v4 +; FP16-NEXT: fcvt h5, d2 +; FP16-NEXT: bit.16b v6, v5, v4 +; FP16-NEXT: mov d2, v2[1] +; FP16-NEXT: mov d1, v1[1] +; FP16-NEXT: fcvt h1, d1 +; FP16-NEXT: fcvt h2, d2 +; FP16-NEXT: bit.16b v3, v1, v4 +; FP16-NEXT: mov.h v0[1], v3[0] +; FP16-NEXT: mov.h v0[2], v6[0] +; FP16-NEXT: bit.16b v7, v2, v4 +; FP16-NEXT: mov.h v0[3], v7[0] +; FP16-NEXT: ret + %tmp0 = fptrunc <4 x double> %b to <4 x half> + %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) + ret <4 x half> %r +} + +declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0 + +;============ v8f16 + +define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 { +; CHECK-LABEL: test_copysign_v8f16_v8f16: +; NOFP16-NEXT: mov h4, v1[1] +; NOFP16-NEXT: mov h5, v0[1] +; NOFP16-NEXT: movi.4s v2, #128, lsl #24 +; NOFP16-NEXT: fcvt s6, h1 +; NOFP16-NEXT: fcvt s3, h0 +; NOFP16-NEXT: mov h7, v1[2] +; NOFP16-NEXT: mov h16, v0[2] +; NOFP16-NEXT: mov h17, v1[3] +; NOFP16-NEXT: mov h18, v0[3] +; NOFP16-NEXT: bit.16b v3, v6, v2 +; NOFP16-NEXT: mov h6, v1[4] +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: bit.16b v5, v4, v2 +; NOFP16-NEXT: mov h4, v0[4] +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: bit.16b v16, v7, v2 +; NOFP16-NEXT: mov h7, v1[5] +; NOFP16-NEXT: fcvt s17, h17 +; NOFP16-NEXT: fcvt s18, h18 +; NOFP16-NEXT: bit.16b v18, v17, v2 +; NOFP16-NEXT: mov h17, v0[5] +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: bit.16b v4, v6, v2 +; NOFP16-NEXT: mov h6, v1[6] +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: fcvt s17, h17 +; NOFP16-NEXT: bit.16b v17, v7, v2 +; NOFP16-NEXT: mov h7, v0[6] +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: bit.16b v7, v6, v2 +; NOFP16-NEXT: mov h1, v1[7] +; NOFP16-NEXT: mov h0, v0[7] +; NOFP16-NEXT: fcvt s1, h1 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: bit.16b v6, v1, v2 +; NOFP16-NEXT: fcvt h0, s3 +; NOFP16-NEXT: fcvt h1, s5 +; NOFP16-NEXT: mov.h v0[1], v1[0] +; NOFP16-NEXT: fcvt h1, s16 +; NOFP16-NEXT: mov.h v0[2], v1[0] +; NOFP16-NEXT: fcvt h1, s18 +; NOFP16-NEXT: fcvt h2, s4 +; NOFP16-NEXT: fcvt h3, s17 +; NOFP16-NEXT: fcvt h4, s7 +; NOFP16-NEXT: mov.h v0[3], v1[0] +; NOFP16-NEXT: mov.h v0[4], v2[0] +; NOFP16-NEXT: mov.h v0[5], v3[0] +; NOFP16-NEXT: mov.h v0[6], v4[0] +; NOFP16-NEXT: fcvt h1, s6 +; NOFP16-NEXT: mov.h v0[7], v1[0] +; NOFP16-NEXT: ret + +; FP16-NEXT: mov h4, v1[1] +; FP16-NEXT: mov h3, v0[1] +; FP16-NEXT: movi.8h v2, #128, lsl #8 +; FP16-NEXT: mov h5, v1[2] +; FP16-NEXT: mov h6, v0[2] +; FP16-NEXT: mov h7, v1[3] +; FP16-NEXT: mov h16, v0[3] +; FP16-NEXT: mov h17, v1[4] +; FP16-NEXT: bit.16b v3, v4, v2 +; FP16-NEXT: mov h4, v0[4] +; FP16-NEXT: bit.16b v6, v5, v2 +; FP16-NEXT: mov h5, v1[5] +; FP16-NEXT: bit.16b v16, v7, v2 +; FP16-NEXT: mov h7, v0[5] +; FP16-NEXT: bit.16b v4, v17, v2 +; FP16-NEXT: mov h17, v1[6] +; FP16-NEXT: bit.16b v7, v5, v2 +; FP16-NEXT: mov h5, v0[6] +; FP16-NEXT: bit.16b v5, v17, v2 +; FP16-NEXT: mov h17, v0[7] +; FP16-NEXT: bit.16b v0, v1, v2 +; FP16-NEXT: mov.h v0[1], v3[0] +; FP16-NEXT: mov.h v0[2], v6[0] +; FP16-NEXT: mov.h v0[3], v16[0] +; FP16-NEXT: mov.h v0[4], v4[0] +; FP16-NEXT: mov h1, v1[7] +; FP16-NEXT: mov.h v0[5], v7[0] +; FP16-NEXT: mov.h v0[6], v5[0] +; FP16-NEXT: bit.16b v17, v1, v2 +; FP16-NEXT: mov.h v0[7], v17[0] +; FP16-NEXT: ret + %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %r +} + +define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 { +; CHECK-LABEL: test_copysign_v8f16_v8f32: +; NOFP16-NEXT: fcvtn v2.4h, v2.4s +; NOFP16-NEXT: fcvtn v4.4h, v1.4s +; NOFP16-NEXT: mov h5, v0[1] +; NOFP16-NEXT: movi.4s v1, #128, lsl #24 +; NOFP16-NEXT: fcvt s3, h0 +; NOFP16-NEXT: mov h6, v0[2] +; NOFP16-NEXT: mov h7, v0[3] +; NOFP16-NEXT: mov h16, v0[4] +; NOFP16-NEXT: mov h17, v0[5] +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s18, h4 +; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: bit.16b v3, v18, v1 +; NOFP16-NEXT: fcvt s18, h2 +; NOFP16-NEXT: bit.16b v16, v18, v1 +; NOFP16-NEXT: mov h18, v4[1] +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: fcvt s18, h18 +; NOFP16-NEXT: bit.16b v5, v18, v1 +; NOFP16-NEXT: mov h18, v4[2] +; NOFP16-NEXT: fcvt s18, h18 +; NOFP16-NEXT: bit.16b v6, v18, v1 +; NOFP16-NEXT: mov h18, v0[6] +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: mov h4, v4[3] +; NOFP16-NEXT: fcvt s17, h17 +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: bit.16b v7, v4, v1 +; NOFP16-NEXT: mov h4, v2[1] +; NOFP16-NEXT: fcvt s18, h18 +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: bit.16b v17, v4, v1 +; NOFP16-NEXT: mov h4, v2[2] +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: bit.16b v18, v4, v1 +; NOFP16-NEXT: mov h0, v0[7] +; NOFP16-NEXT: fcvt s4, h0 +; NOFP16-NEXT: mov h0, v2[3] +; NOFP16-NEXT: fcvt s0, h0 +; NOFP16-NEXT: bit.16b v4, v0, v1 +; NOFP16-NEXT: fcvt h0, s3 +; NOFP16-NEXT: fcvt h1, s5 +; NOFP16-NEXT: mov.h v0[1], v1[0] +; NOFP16-NEXT: fcvt h1, s16 +; NOFP16-NEXT: fcvt h2, s6 +; NOFP16-NEXT: fcvt h3, s7 +; NOFP16-NEXT: fcvt h5, s17 +; NOFP16-NEXT: fcvt h6, s18 +; NOFP16-NEXT: mov.h v0[2], v2[0] +; NOFP16-NEXT: mov.h v0[3], v3[0] +; NOFP16-NEXT: mov.h v0[4], v1[0] +; NOFP16-NEXT: mov.h v0[5], v5[0] +; NOFP16-NEXT: mov.h v0[6], v6[0] +; NOFP16-NEXT: fcvt h1, s4 +; NOFP16-NEXT: mov.h v0[7] +; NOFP16-NEXT: ret + +; FP16-NEXT: fcvtn v2.4h, v2.4s +; FP16-NEXT: fcvtn v4.4h, v1.4s +; FP16-NEXT: mov h3, v0[1] +; FP16-NEXT: movi.8h v1, #128, lsl #8 +; FP16-NEXT: mov h5, v0[2] +; FP16-NEXT: mov h6, v0[3] +; FP16-NEXT: mov h7, v0[4] +; FP16-NEXT: mov h16, v0[5] +; FP16-NEXT: mov h17, v0[6] +; FP16-NEXT: mov h18, v4[1] +; FP16-NEXT: bit.16b v3, v18, v1 +; FP16-NEXT: mov h18, v4[2] +; FP16-NEXT: bit.16b v5, v18, v1 +; FP16-NEXT: mov h18, v0[7] +; FP16-NEXT: bit.16b v0, v4, v1 +; FP16-NEXT: mov h4, v4[3] +; FP16-NEXT: bit.16b v6, v4, v1 +; FP16-NEXT: mov h4, v2[1] +; FP16-NEXT: bit.16b v16, v4, v1 +; FP16-NEXT: mov h4, v2[2] +; FP16-NEXT: bit.16b v17, v4, v1 +; FP16-NEXT: mov.h v0[1], v3[0] +; FP16-NEXT: mov.h v0[2], v5[0] +; FP16-NEXT: mov.h v0[3], v6[0] +; FP16-NEXT: bit.16b v7, v2, v1 +; FP16-NEXT: mov h2, v2[3] +; FP16-NEXT: mov.h v0[4], v7[0] +; FP16-NEXT: mov.h v0[5], v16[0] +; FP16-NEXT: mov.h v0[6], v17[0] +; FP16-NEXT: bit.16b v18, v2, v1 +; FP16-NEXT: mov.h v0[7], v18[0] +; FP16-NEXT: ret + %tmp0 = fptrunc <8 x float> %b to <8 x half> + %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) + ret <8 x half> %r +} + +declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0 + attributes #0 = { nounwind } From e77191c35e334bbdbb72aeaecb44c7b5f6bfd31b Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Thu, 1 Jul 2021 09:33:00 +0100 Subject: [PATCH 416/619] [flang][driver] Extend the `flang` bash script to act as a driver Until now, `f18` would: 1. Use Flang to unparse the input files 2. Call an external Fortran compiler to compile the unparsed source files (generated in step 1) With this patch, `f18` will stop after unparsing the input source files, i.e. step 1 above. The `flang` bash script will take care of step 2, i.e. calling an external Fortran compiler driver to compile them. This way: * the functionality of `f18` is reduced - it will only drive Flang (as opposed to delegating code-generation to an external tool on top of this) * we will able to switch between `f18` and `flang-new` for unparsing before an external Fortran compiler is called for code-generation The updated `flang` bash script needs to specify the output file when using the `-fdebug-unparse` action. Both `f18` and `flang-new` have been updated accordingly. These changes were discussed in [1] as a requirement for replacing `f18` with `flang-new`. [1] https://lists.llvm.org/pipermail/flang-dev/2021-April/000677.html Differential Revision: https://reviews.llvm.org/D103177 --- flang/lib/Frontend/FrontendActions.cpp | 6 +- flang/tools/f18/CMakeLists.txt | 9 +- flang/tools/f18/f18.cpp | 17 +- flang/tools/f18/flang | 16 -- flang/tools/f18/flang.in | 376 +++++++++++++++++++++++++ 5 files changed, 403 insertions(+), 21 deletions(-) delete mode 100644 flang/tools/f18/flang create mode 100755 flang/tools/f18/flang.in diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 8ee42d73c6e46..fae058468275b 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -257,8 +257,12 @@ void DebugUnparseAction::ExecuteAction() { auto &invoc = this->instance().invocation(); auto &parseTree{instance().parsing().parseTree()}; + CompilerInstance &ci = this->instance(); + auto os{ci.CreateDefaultOutputFile( + /*Binary=*/false, /*InFile=*/GetCurrentFileOrBufferName())}; + // TODO: Options should come from CompilerInvocation - Unparse(llvm::outs(), *parseTree, + Unparse(*os, *parseTree, /*encoding=*/Fortran::parser::Encoding::UTF_8, /*capitalizeKeywords=*/true, /*backslashEscapes=*/false, /*preStatement=*/nullptr, diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index fc84bbf09c59d..239859b5e5b9d 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -62,10 +62,13 @@ add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) install(TARGETS f18 DESTINATION bin) +set(FLANG_DEFAULT_DRIVER "flang-new") +if (NOT FLANG_BUILD_NEW_DRIVER) + set(FLANG_DEFAULT_DRIVER "f18") +endif() + # This flang shell script will only work in a POSIX shell. if (NOT WIN32) - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/flang - DESTINATION ${CMAKE_BINARY_DIR}/bin - FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.in ${CMAKE_BINARY_DIR}/bin/flang @ONLY) install(PROGRAMS ${CMAKE_BINARY_DIR}/bin/flang DESTINATION bin) endif() diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index 71f7673c84075..4fe5d1a99a3f0 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -319,7 +319,22 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options, Fortran::parser::DumpTree(llvm::outs(), parseTree, &asFortran); } if (driver.dumpUnparse) { - Unparse(llvm::outs(), parseTree, driver.encoding, true /*capitalize*/, + // Prepare the output stream + std::unique_ptr os; + std::string outputFile = "-"; + if (!driver.outputPath.empty()) { + outputFile = driver.outputPath; + } + + std::error_code EC; + os.reset(new llvm::raw_fd_ostream( + outputFile, EC, llvm::sys::fs::OF_TextWithCRLF)); + if (EC) { + llvm::errs() << EC.message() << "\n"; + std::exit(EXIT_FAILURE); + } + + Unparse(*os, parseTree, driver.encoding, true /*capitalize*/, options.features.IsEnabled( Fortran::common::LanguageFeature::BackslashEscapes), nullptr /* action before each statement */, diff --git a/flang/tools/f18/flang b/flang/tools/f18/flang deleted file mode 100644 index 846be4a18fdd8..0000000000000 --- a/flang/tools/f18/flang +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -#===-- tools/f18/flang.sh -----------------------------------------*- sh -*-===# -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#===------------------------------------------------------------------------===# - -wd=$(cd $(dirname "$0")/.. && pwd) -opts="-fno-analyzed-objects-for-unparse -module-suffix .f18.mod " -if ! $wd/bin/f18 $opts "$@" -then status=$? - echo flang: in $PWD, f18 failed with exit status $status: $wd/bin/f18 $opts "$@" >&2 - exit $status -fi diff --git a/flang/tools/f18/flang.in b/flang/tools/f18/flang.in new file mode 100755 index 0000000000000..f0371efecd849 --- /dev/null +++ b/flang/tools/f18/flang.in @@ -0,0 +1,376 @@ +#! /usr/bin/env bash +#===-- tools/f18/flang.sh -----------------------------------------*- sh -*-===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===------------------------------------------------------------------------===# +# A wrapper script for Flang's compiler driver that was developed for testing and +# experimenting. You should be able to use it as a regular compiler driver. It +# will: +# * run Flang's compiler driver to unparse the input source files +# * use the external compiler (defined via F18_FC environment variable) to +# compile the unparsed source files +#===------------------------------------------------------------------------===# +set -euo pipefail + +# Global variables to make the parsing of input arguments a bit easier +INPUT_FILES=() +OPTIONS=() +OUTPUT_FILE="" +MODULE_DIR="" +INTRINSICS_MOD_DIR="" +COMPILE_ONLY="False" +PREPROCESS_ONLY="False" +TEMP_OUTPUT="flang_temp_out_" + +# === parse_args ============================================================== +# +# Parse the input arguments passed to this script. Sets the global variables +# declared at the top. +# +# INPUTS: +# $1 - all input arguments +# OUTPUTS: +# Saved in the global variables for this script +# ============================================================================= +parse_args() +{ + while [ "${1:-}" != "" ]; do + # CASE 1: Compiler option + if [[ "${1:0:1}" == "-" ]] ; then + # Output file - extract it into a global variable + if [[ "$1" == "-o" ]] ; then + shift + OUTPUT_FILE="$1" + shift + continue + fi + + # Module directory - extract it into a global variable + if [[ "$1" == "-module-dir" ]]; then + shift + MODULE_DIR="$1" + shift + continue + fi + + # Intrinsics module dir - extract it into a global var + if [[ "$1" == "-intrinsics-module-directory" ]]; then shift + INTRINSICS_MOD_DIR=$1 + shift + continue + fi + + # Module suffix cannot be modified - this script defines it before + # calling the driver. + if [[ "$1" == "-module-suffix" ]]; then + echo "ERROR: \'-module-suffix\' is not available when using the \'flang\' script" + exit 1 + fi + + # Special treatment for `J ` and `-I `. We translate these + # into `J` and `-I` respectively. + if [[ "$1" == "-J" ]] || [[ "$1" == "-I" ]]; then + opt=$1 + shift + OPTIONS+=("$opt$1") + shift + continue + fi + + # This is a regular option - just add it to the list. + OPTIONS+=($1) + if [[ $1 == "-c" ]]; then + COMPILE_ONLY="True" + fi + + if [[ $1 == "-E" ]]; then + PREPROCESS_ONLY="True" + fi + + shift + continue + + # CASE 2: A regular file (either source or a library file) + elif [[ -f "$1" ]]; then + INPUT_FILES+=($1) + shift + continue + + else + # CASE 3: Unsupported + echo "ERROR: unrecognised option format $1" + exit 1 + fi + done +} + +# === categorise_files ======================================================== +# +# Categorises input files into: +# * Fortran source files (to be compiled) +# * library files (to be linked into the final executable) +# +# INPUTS: +# $1 - all input files to be categorised (array, name reference) +# OUTPUTS: +# $2 - Fortran source files extracted from $1 (array, name reference) +# $3 - other source files extracted from $1 (array, name reference) +# $4 - object files extracted from $1 (array, name reference) +# $4 - lib files extracted from $1 (array, name reference) +# ============================================================================= +categorise_files() +{ + local -n -r all_files=$1 + local -n fortran_sources=$2 + local -n other_sources=$3 + local -n libs=$4 + + for current_file in "${all_files[@]}"; do + file_ext=${current_file##*.} + if [[ $file_ext == "f" ]] || [[ $file_ext == "f90" ]] || + [[ $file_ext == "f" ]] || [[ $file_ext == "F" ]] || [[ $file_ext == "ff" ]] || + [[ $file_ext == "f90" ]] || [[ $file_ext == "F90" ]] || [[ $file_ext == "ff90" ]] || + [[ $file_ext == "f95" ]] || [[ $file_ext == "F95" ]] || [[ $file_ext == "ff95" ]] || + [[ $file_ext == "cuf" ]] || [[ $file_ext == "CUF" ]] || [[ $file_ext == "f18" ]] || + [[ $file_ext == "F18" ]] || [[ $file_ext == "ff18" ]]; then + fortran_sources+=($current_file) + elif [[ $file_ext == "a" ]] || [[ $file_ext == "so" ]]; then + libs+=($current_file) + elif [[ $file_ext == "o" ]]; then + object_files+=($current_file) + else + other_sources+=($current_file) + fi + done +} + +# === categorise_opts ========================================================== +# +# Categorises compiler options into options for: +# * the Flang driver (either new or the "throwaway" driver) +# * the external Fortran driver that will generate the code +# Most options accepted by Flang will be claimed by it. The only exceptions are +# `-I` and `-J`. +# +# INPUTS: +# $1 - all compiler options (array, name reference) +# OUTPUTS: +# $2 - compiler options for the Flang driver (array, name reference) +# $3 - compiler options for the external driver (array, name reference) +# ============================================================================= +categorise_opts() +{ + local -n all_opts=$1 + local -n flang_opts=$2 + local -n fc_opts=$3 + + for opt in "${all_opts[@]}"; do + # These options are claimed by Flang, but should've been dealt with in parse_args. + if [[ $opt == "-module-dir" ]] || + [[ $opt == "-o" ]] || + [[ $opt == "-fintrinsic-modules-path" ]] ; then + echo "ERROR: $opt should've been fully processed by \`parse_args\`" + exit 1 + fi + + if + # The options claimed by Flang. This list needs to be compatible with + # what's supported by Flang's compiler driver (i.e. `flang-new` and f18). + [[ $opt == "-cpp" ]] || + [[ $opt =~ ^-D.* ]] || + [[ $opt == "-E" ]] || + [[ $opt == "-falternative-parameter-statement" ]] || + [[ $opt == "-fbackslash" ]] || + [[ $opt == "-fcolor-diagnostics" ]] || + [[ $opt == "-fdefault-double-8" ]] || + [[ $opt == "-fdefault-integer-8" ]] || + [[ $opt == "-fdefault-real-8" ]] || + [[ $opt == "-ffixed-form" ]] || + [[ $opt =~ ^-ffixed-line-length=.* ]] || + [[ $opt == "-ffree-form" ]] || + [[ $opt == "-fimplicit-none" ]] || + [[ $opt =~ ^-finput-charset=.* ]] || + [[ $opt == "-flarge-sizes" ]] || + [[ $opt == "-flogical-abbreviations" ]] || + [[ $opt == "-fno-color-diagnostics" ]] || + [[ $opt == "-fopenacc" ]] || + [[ $opt == "-fopenmp" ]] || + [[ $opt == "-fxor-operator" ]] || + [[ $opt == "-help" ]] || + [[ $opt == "-nocpp" ]] || + [[ $opt == "-pedantic" ]] || + [[ $opt =~ ^-std=.* ]] || + [[ $opt =~ ^-U.* ]] || + [[ $opt == "--version" ]] || + [[ $opt == "-Werror" ]]; then + flang_opts+=($opt) + elif [[ $opt =~ -I.* ]] || [[ $opt =~ -J.* ]]; then + # Options that are needed for both Flang and the external driver. + flang_opts+=($opt) + fc_opts+=($opt) + else + # All other options are claimed for the external driver. + fc_opts+=($opt) + fi + done +} + +# === preprocess ============================================================== +# +# Runs the preprocessing. Fortran files are preprocessed using Flang. Other +# files are preprocessed using the external Fortran compiler. +# +# INPUTS: +# $1 - Fortran source files (array, name reference) +# $2 - other source files (array, name reference) +# $3 - compiler flags (array, name reference) +# ============================================================================= +preprocess() { + local -n fortran_srcs=$1 + local -n other_srcs=$2 + local -n opts=$3 + + local -r ext_fc="${F18_FC:-gfortran}" + local -r wd=$(cd "$(dirname "$0")/.." && pwd) + + # Use the provided output file name. + if [[ ! -z ${OUTPUT_FILE:+x} ]]; then + output_definition="-o $OUTPUT_FILE" + fi + + # Preprocess fortran sources using Flang + for idx in "${!fortran_srcs[@]}"; do + if ! "$wd/bin/@FLANG_DEFAULT_DRIVER@" -E "${opts[@]}" "${fortran_srcs[$idx]}" ${output_definition:+$output_definition} + then status=$? + echo flang: in "$PWD", @FLANG_DEFAULT_DRIVER@ failed with exit status $status: "$wd/bin/@FLANG_DEFAULT_DRIVER@" "${opts[@]}" "$@" >&2 + exit $status + fi + done + + # Preprocess other sources using Flang + for idx in "${!other_srcs[@]}"; do + if ! $ext_fc -E "${opts[@]}" "${other_srcs[$idx]}" ${output_definition:+$output_definition} + then status=$? + echo flang: in "$PWD", flang-new failed with exit status $status: "$wd/bin/flang-new" "${flang_options[@]}" "$@" >&2 + exit $status + fi + done +} + +# === main ==================================================================== +# Main entry point for this script +# ============================================================================= +main() { + parse_args "$@" + + fortran_source_files=() + other_source_files=() + object_files=() + lib_files=() + categorise_files INPUT_FILES fortran_source_files other_source_files object_files lib_files + + if [[ $PREPROCESS_ONLY == "True" ]]; then + preprocess fortran_source_files other_source_files OPTIONS + exit 0 + fi + + # Options for the Flang driver. + # NOTE: We need `-fc1` to make sure that the frontend driver rather than + # compiler driver is used. We also need to make sure that that's the first + # flag that the driver will see (otherwise it assumes compiler/toolchain + # driver mode).`f18` will just ignore this flag when uparsing, so it's fine + # to add it here unconditionally. + flang_options=("-fc1") + # Options for the external Fortran Compiler + ext_fc_options=() + categorise_opts OPTIONS flang_options ext_fc_options + + local -r wd=$(cd "$(dirname "$0")/.." && pwd) + + # STEP 1: Unparse + local -r unparsed_file="flang_unparsed_source_file" + flang_options+=("-module-suffix") + flang_options+=(".f18.mod") + flang_options+=("-fdebug-unparse") + flang_options+=("-fno-analyzed-objects-for-unparse") + + [[ ! -z ${MODULE_DIR} ]] && flang_options+=("-module-dir ${MODULE_DIR}") + [[ ! -z ${INTRINSICS_MOD_DIR} ]] && flang_options+=("-intrinsics-module-directory ${INTRINSICS_MOD_DIR}") + for idx in "${!fortran_source_files[@]}"; do + if ! "$wd/bin/@FLANG_DEFAULT_DRIVER@" "${flang_options[@]}" "${fortran_source_files[$idx]}" -o "${unparsed_file}_${idx}.f90" + then status=$? + echo flang: in "$PWD", @FLANG_DEFAULT_DRIVER@ failed with exit status $status: "$wd/bin/@FLANG_DEFAULT_DRIVER@" "${flang_options[@]}" "$@" >&2 + exit $status + fi + done + + # STEP 2: Compile Fortran Source Files + readonly ext_fc="${F18_FC:-gfortran}" + for idx in "${!fortran_source_files[@]}"; do + # Use the value of $OUTPUT_FILE for the output file iff `-c` was used. + if [[ ! -z ${OUTPUT_FILE:+x} ]] && [[ $COMPILE_ONLY == "True" ]]; then + output_definition="-o $OUTPUT_FILE" + elif [[ $COMPILE_ONLY == "False" ]]; then + output_definition="-o ${TEMP_OUTPUT}_${idx}" + fi + + if ! $ext_fc -c "${ext_fc_options[@]}" "${unparsed_file}_${idx}.f90" ${output_definition:+$output_definition} + then status=$? + echo flang: in "$PWD", "$ext_fc" failed with exit status $status: "$ext_fc" "${ext_fc_options[@]}" "$@" >&2 + exit $status + fi + object_files+=(${TEMP_OUTPUT}_${idx}) + done + + # Delete the unparsed files + for idx in "${!fortran_source_files[@]}"; do + rm "${unparsed_file}_${idx}.f90" + done + + # STEP 3: Compile Other Source Files + for idx in "${!other_source_files[@]}"; do + # Use the value of $OUTPUT_FILE for the output file iff `-c` was used. + if [[ ! -z ${OUTPUT_FILE:+x} ]] && [[ $COMPILE_ONLY == "True" ]]; then + output_definition="-o $OUTPUT_FILE" + elif [[ $COMPILE_ONLY == "False" ]]; then + output_definition="-o ${TEMP_OUTPUT}_${idx}" + fi + + if ! $ext_fc -c "${ext_fc_options[@]}" "${other_source_files[${idx}]}" ${output_definition:+$output_definition} + then status=$? + echo flang: in "$PWD", "$ext_fc" failed with exit status $status: "$ext_fc" "${ext_fc_options[@]}" "$@" >&2 + exit $status + fi + object_files+=(${TEMP_OUTPUT}_${idx}) + done + + # STEP 4: Link + if [[ $COMPILE_ONLY == "True" ]]; then + exit 0; + fi + + if [[ ${#object_files[@]} -ge 1 ]]; then + # If $OUTPUT_FILE was specified, use it for the output name. + if [[ ! -z ${OUTPUT_FILE:+x} ]]; then + output_definition="-o $OUTPUT_FILE" + else + output_definition="" + fi + + if ! $ext_fc "${ext_fc_options[@]}" "${object_files[@]}" "${lib_files[@]}" ${output_definition:+$output_definition} + then status=$? + echo flang: in "$PWD", "$ext_fc" failed with exit status $status: "$ext_fc" "${ext_fc_options[@]}" "$@" >&2 + exit $status + fi + fi + + # Delete intermediate object files + for idx in "${!fortran_source_files[@]}"; do + rm "${TEMP_OUTPUT}_${idx}" + done +} + +main "${@}" From 67643f46ee9fb08e32075d67715a59c840aa761b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 30 Jun 2021 05:47:19 -0700 Subject: [PATCH 417/619] [OPENMP]Fix PR50640: OpenMP target clause implicitly scaling loop bounds to uint64_t. Need to add some conversions to suppress possible warning messages. Differential Revision: https://reviews.llvm.org/D105187 --- clang/lib/Sema/SemaOpenMP.cpp | 15 +- .../distribute_parallel_for_codegen.cpp | 282 +++-- .../distribute_parallel_for_simd_codegen.cpp | 342 +++--- ..._teams_distribute_parallel_for_codegen.cpp | 66 +- ...bute_parallel_for_generic_mode_codegen.cpp | 90 +- ...s_distribute_parallel_for_simd_codegen.cpp | 1039 ++++++++--------- ...eams_distribute_parallel_for_ast_print.cpp | 4 +- ...stribute_parallel_for_schedule_codegen.cpp | 264 ++--- ...ute_parallel_for_simd_schedule_codegen.cpp | 264 ++--- ...stribute_parallel_for_schedule_codegen.cpp | 264 ++--- ...ute_parallel_for_simd_schedule_codegen.cpp | 264 ++--- 11 files changed, 1427 insertions(+), 1467 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 7fddff7992fc1..6e2552a023907 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9401,11 +9401,20 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, // Build expression: UB = min(UB, prevUB) for #for in composite or combined // construct + ExprResult NewPrevUB = PrevUB; + if (!SemaRef.Context.hasSameType(UB.get()->getType(), + PrevUB.get()->getType())) { + NewPrevUB = SemaRef.PerformImplicitConversion( + NewPrevUB.get(), UB.get()->getType(), Sema::AA_Converting, + /*AllowExplicit=*/true); + if (!NewPrevUB.isUsable()) + return 0; + } SourceLocation DistEUBLoc = AStmt->getBeginLoc(); - ExprResult IsUBGreater = - SemaRef.BuildBinOp(CurScope, DistEUBLoc, BO_GT, UB.get(), PrevUB.get()); + ExprResult IsUBGreater = SemaRef.BuildBinOp(CurScope, DistEUBLoc, BO_GT, + UB.get(), NewPrevUB.get()); ExprResult CondOp = SemaRef.ActOnConditionalOp( - DistEUBLoc, DistEUBLoc, IsUBGreater.get(), PrevUB.get(), UB.get()); + DistEUBLoc, DistEUBLoc, IsUBGreater.get(), NewPrevUB.get(), UB.get()); PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(), CondOp.get()); PrevEUB = diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp index 2034f82e25d5e..d1a3f33c33d9c 100644 --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -1976,34 +1976,33 @@ int main() { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -2016,15 +2015,15 @@ int main() { // CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 -// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -2039,20 +2038,20 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -3769,34 +3768,33 @@ int main() { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -3809,15 +3807,15 @@ int main() { // CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 -// CHECK2-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// CHECK2-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK2-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK2-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK2-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK2-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -3832,20 +3830,20 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK2-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -5522,7 +5520,7 @@ int main() { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7249,7 +7247,7 @@ int main() { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9465,34 +9463,33 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -9505,34 +9502,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 -// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11683,34 +11680,33 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -11723,34 +11719,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 -// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 +// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4 +// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] +// CHECK9-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13896,34 +13892,33 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -13936,34 +13931,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 -// CHECK10-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// CHECK10-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK10-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -16114,34 +16109,33 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -16154,34 +16148,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 -// CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 +// CHECK10-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4 +// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] +// CHECK10-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -18277,7 +18271,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -20419,7 +20413,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -22556,7 +22550,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24698,7 +24692,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp index c9a16f2db634c..60b4685ed9b46 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -2084,34 +2084,33 @@ int main() { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -2124,15 +2123,15 @@ int main() { // CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -2147,20 +2146,20 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -2171,11 +2170,11 @@ int main() { // CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK1-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK1-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK1-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK1-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -4045,34 +4044,33 @@ int main() { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -4085,15 +4083,15 @@ int main() { // CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !40 -// CHECK2-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK2-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK2-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK2-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -4108,20 +4106,20 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK2-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -4132,11 +4130,11 @@ int main() { // CHECK2-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK2-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK2-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK2-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK2-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK2-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK2-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK2-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -5966,7 +5964,7 @@ int main() { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7861,7 +7859,7 @@ int main() { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10353,34 +10351,33 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -10393,34 +10390,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !47 -// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10431,11 +10428,11 @@ int main() { // CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK9-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK9-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK9-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK9-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK9-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -12724,34 +12721,33 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -12764,34 +12760,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] +// CHECK9-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -12802,11 +12798,11 @@ int main() { // CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK9-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK9-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK9-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK9-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK9-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -15105,34 +15101,33 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -15145,34 +15140,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !47 -// CHECK10-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK10-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !47 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -15183,11 +15178,11 @@ int main() { // CHECK10-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK10-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK10-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK10-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK10-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK10-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK10-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK10-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK10-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK10-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -17476,34 +17471,33 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -17516,34 +17510,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] +// CHECK10-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4, !llvm.access.group !89 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17554,11 +17548,11 @@ int main() { // CHECK10-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK10-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK10-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK10-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK10-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK10-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK10-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK10-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK10-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK10-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -19807,7 +19801,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -22102,7 +22096,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24407,7 +24401,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26702,7 +26696,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 4256228fdea9a..de393701c2f0f 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18721,34 +18721,33 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 @@ -18765,20 +18764,20 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -20328,34 +20327,33 @@ int bar(int n){ // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 @@ -20372,20 +20370,20 @@ int bar(int n){ // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21917,7 +21915,7 @@ int bar(int n){ // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -23463,7 +23461,7 @@ int bar(int n){ // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp index 7b17bb7824a03..4b558c34e1e6f 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -274,64 +274,63 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp ugt i64 [[CONV8]], [[TMP11]] +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP10]], [[CONV8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[CONV10]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV11:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV11]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV10]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[CALL14:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CALL]], [[CALL14]] -// CHECK1-NEXT: [[CALL16:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CALL16]] -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] +// CHECK1-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -574,7 +573,7 @@ int main(int argc, char **argv) { // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -870,7 +869,7 @@ int main(int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -1179,64 +1178,63 @@ int main(int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i32 [[TMP10]] to i64 // CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP9:%.*]] = icmp ugt i64 [[CONV8]], [[TMP11]] +// CHECK4-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP10]], [[CONV8]] // CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK4-NEXT: [[CONV10:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[CONV10]], [[COND_FALSE]] ] -// CHECK4-NEXT: [[CONV11:%.*]] = trunc i64 [[COND]] to i32 -// CHECK4-NEXT: store i32 [[CONV11]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[CONV10]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK4-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 // CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK4-NEXT: [[CALL14:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] -// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[CALL]], [[CALL14]] -// CHECK4-NEXT: [[CALL16:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] -// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CALL16]] -// CHECK4-NEXT: store i32 [[ADD17]], i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] +// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] +// CHECK4-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK4-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK4-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: // CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -1479,7 +1477,7 @@ int main(int argc, char **argv) { // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -1775,7 +1773,7 @@ int main(int argc, char **argv) { // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK6-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 35816730ed9b0..a348ff7d45780 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9488,73 +9488,73 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 // CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5), !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK1: cond.true14: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[COND_END16:%.*]] // CHECK1: cond.false15: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[COND_END16]] // CHECK1: cond.end16: // CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] -// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -9646,64 +9646,63 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8, !llvm.access.group !16 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9714,11 +9713,11 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK1-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 -// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 0, [[MUL18]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[I6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -9829,66 +9828,66 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4), !llvm.access.group !19 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: // CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -9969,33 +9968,33 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10084,53 +10083,53 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group !25 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !25 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !25 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3), !llvm.access.group !25 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10183,31 +10182,31 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !28 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10302,60 +10301,60 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4), !llvm.access.group !31 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] // CHECK1: cond.true7: // CHECK1-NEXT: br label [[COND_END9:%.*]] // CHECK1: cond.false8: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: br label [[COND_END9]] // CHECK1: cond.end9: // CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10415,49 +10414,49 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] // CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 -// CHECK1-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4, !llvm.access.group !34 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10575,69 +10574,69 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5), !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: // CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] -// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -10726,7 +10725,7 @@ int bar(int n){ // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10746,28 +10745,28 @@ int bar(int n){ // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4, !llvm.access.group !16 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -10901,63 +10900,63 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4), !llvm.access.group !19 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: // CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11035,31 +11034,31 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11148,51 +11147,51 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3), !llvm.access.group !25 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11243,29 +11242,29 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11357,57 +11356,57 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4), !llvm.access.group !31 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK2: cond.true6: // CHECK2-NEXT: br label [[COND_END8:%.*]] // CHECK2: cond.false7: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK2-NEXT: br label [[COND_END8]] // CHECK2: cond.end8: // CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11464,46 +11463,46 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK2-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4, !llvm.access.group !34 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11621,69 +11620,69 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK3-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[COND_END13:%.*]] // CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[COND_END13]] // CHECK3: cond.end13: // CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] -// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -11772,7 +11771,7 @@ int bar(int n){ // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -11792,28 +11791,28 @@ int bar(int n){ // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -11947,63 +11946,63 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4), !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: // CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12081,31 +12080,31 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12194,51 +12193,51 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3), !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: // CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12289,29 +12288,29 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12403,57 +12402,57 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4), !llvm.access.group !31 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: // CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12510,46 +12509,46 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK3-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4, !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp index 39813253f0af9..67554172e9485 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s // expected-no-diagnostics diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp index 3d9b9c871d95b..364dac905a3d2 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -741,34 +741,33 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -784,20 +783,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1656,34 +1655,33 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1699,20 +1697,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2558,7 +2556,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3445,7 +3443,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4345,34 +4343,33 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4388,20 +4385,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5260,34 +5257,33 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5303,20 +5299,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6162,7 +6158,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7049,7 +7045,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9537,34 +9533,33 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -9579,20 +9574,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -12057,34 +12052,33 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -12099,20 +12093,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -14494,7 +14488,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -16911,7 +16905,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -19411,34 +19405,33 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19453,20 +19446,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21931,34 +21924,33 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21973,20 +21965,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24368,7 +24360,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26785,7 +26777,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp index 411dd6de11286..da7a6b1712ce6 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -776,34 +776,33 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -819,20 +818,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1761,34 +1760,33 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1804,20 +1802,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2733,7 +2731,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3690,7 +3688,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4660,34 +4658,33 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4703,20 +4700,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5645,34 +5642,33 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5688,20 +5684,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6617,7 +6613,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7574,7 +7570,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -11022,34 +11018,33 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -11064,20 +11059,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -13732,34 +13727,33 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -13774,20 +13768,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -16359,7 +16353,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -18966,7 +18960,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -21656,34 +21650,33 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21698,20 +21691,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24366,34 +24359,33 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -24408,20 +24400,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -26993,7 +26985,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -29600,7 +29592,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp index a48230f44f502..b1a46f0dfbb58 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -755,34 +755,33 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -798,20 +797,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1670,34 +1669,33 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1713,20 +1711,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2572,7 +2570,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3459,7 +3457,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4359,34 +4357,33 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4402,20 +4399,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5274,34 +5271,33 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5317,20 +5313,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6176,7 +6172,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7063,7 +7059,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9498,34 +9494,33 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -9540,20 +9535,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -11968,34 +11963,33 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -12010,20 +12004,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -14375,7 +14369,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -16762,7 +16756,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -19212,34 +19206,33 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19254,20 +19247,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21682,34 +21675,33 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21724,20 +21716,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24089,7 +24081,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26476,7 +26468,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp index 00ec282f9322c..c62b4386594ff 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -800,34 +800,33 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -843,20 +842,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1785,34 +1784,33 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1828,20 +1826,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2770,34 +2768,33 @@ int main (int argc, char **argv) { // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK3-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -2813,20 +2810,20 @@ int main (int argc, char **argv) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -3755,34 +3752,33 @@ int main (int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK4-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK4-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK4-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -3798,20 +3794,20 @@ int main (int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -4727,7 +4723,7 @@ int main (int argc, char **argv) { // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -5684,7 +5680,7 @@ int main (int argc, char **argv) { // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -6641,7 +6637,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7598,7 +7594,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10993,34 +10989,33 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -11035,20 +11030,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -13653,34 +13648,33 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -13695,20 +13689,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -16313,34 +16307,33 @@ int main (int argc, char **argv) { // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK15-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK15-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK15-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: // CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK15-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK15-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: // CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK15-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK15-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: // CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -16355,20 +16348,20 @@ int main (int argc, char **argv) { // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: // CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: // CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -18973,34 +18966,33 @@ int main (int argc, char **argv) { // CHECK16-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK16-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK16-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK16-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK16-NEXT: br label [[COND_END:%.*]] // CHECK16: cond.false: // CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK16-NEXT: br label [[COND_END]] // CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK16-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK16-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK16: omp.dispatch.body: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: // CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK16-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK16-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: // CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19015,20 +19007,20 @@ int main (int argc, char **argv) { // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: // CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: // CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK16: omp.dispatch.end: // CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21570,7 +21562,7 @@ int main (int argc, char **argv) { // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24147,7 +24139,7 @@ int main (int argc, char **argv) { // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26724,7 +26716,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -29301,7 +29293,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 From 51b4ab26ca583b8d313da7663478392196ae6b6a Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Fri, 18 Jun 2021 13:20:10 +0100 Subject: [PATCH 418/619] [NFC] Add new setDebugLocFromInst that uses the class Builder by default In lots of places we were calling setDebugLocFromInst and passing in the same Builder member variable found in InnerLoopVectorizer. I personally found this confusing so I've changed the interface to take an Optional *> and we can now pass in None when we want to use the class member variable. Differential Revision: https://reviews.llvm.org/D105100 --- .../Transforms/Vectorize/LoopVectorize.cpp | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 57abd0d26f5b5..d923d20029063 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -547,9 +547,10 @@ class InnerLoopVectorizer { VPValue *Def, VPValue *Addr, VPValue *StoredValue, VPValue *BlockInMask); - /// Set the debug location in the builder using the debug location in - /// the instruction. - void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr); + /// Set the debug location in the builder \p Ptr using the debug location in + /// \p V. If \p Ptr is None then it uses the class member's Builder. + void setDebugLocFromInst(const Value *V, + Optional *> CustomBuilder = None); /// Fix the non-induction PHIs in the OrigPHIsToFix vector. void fixNonInductionPHIs(VPTransformState &State); @@ -1040,8 +1041,10 @@ static Instruction *getDebugLocFromInstOrOperands(Instruction *I) { return I; } -void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) { - if (const Instruction *Inst = dyn_cast_or_null(Ptr)) { +void InnerLoopVectorizer::setDebugLocFromInst( + const Value *V, Optional *> CustomBuilder) { + IRBuilder<> *B = (CustomBuilder == None) ? &Builder : *CustomBuilder; + if (const Instruction *Inst = dyn_cast_or_null(V)) { const DILocation *DIL = Inst->getDebugLoc(); // When a FSDiscriminator is enabled, we don't need to add the multiply @@ -1052,15 +1055,15 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); if (NewDIL) - B.SetCurrentDebugLocation(NewDIL.getValue()); + B->SetCurrentDebugLocation(NewDIL.getValue()); else LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " << DIL->getFilename() << " Line: " << DIL->getLine()); } else - B.SetCurrentDebugLocation(DIL); + B->SetCurrentDebugLocation(DIL); } else - B.SetCurrentDebugLocation(DebugLoc()); + B->SetCurrentDebugLocation(DebugLoc()); } /// Write a \p DebugMsg about vectorization to the debug output stream. If \p I @@ -2718,7 +2721,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( for (unsigned Part = 0; Part < UF; Part++) { Value *AddrPart = State.get(Addr, VPIteration(Part, 0)); - setDebugLocFromInst(Builder, AddrPart); + setDebugLocFromInst(AddrPart); // Notice current instruction could be any index. Need to adjust the address // to the member of index 0. @@ -2744,7 +2747,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( AddrParts.push_back(Builder.CreateBitCast(AddrPart, PtrTy)); } - setDebugLocFromInst(Builder, Instr); + setDebugLocFromInst(Instr); Value *PoisonVec = PoisonValue::get(VecTy); Value *MaskForGaps = nullptr; @@ -2949,7 +2952,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction( // Handle Stores: if (SI) { - setDebugLocFromInst(Builder, SI); + setDebugLocFromInst(SI); for (unsigned Part = 0; Part < UF; ++Part) { Instruction *NewSI = nullptr; @@ -2981,7 +2984,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction( // Handle loads. assert(LI && "Must have a load instruction"); - setDebugLocFromInst(Builder, LI); + setDebugLocFromInst(LI); for (unsigned Part = 0; Part < UF; ++Part) { Value *NewLI; if (CreateGatherScatter) { @@ -3023,7 +3026,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def, if (!Instance.isFirstIteration()) return; - setDebugLocFromInst(Builder, Instr); + setDebugLocFromInst(Instr); // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); @@ -3073,11 +3076,11 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, IRBuilder<> B(&*Header->getFirstInsertionPt()); Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction); - setDebugLocFromInst(B, OldInst); + setDebugLocFromInst(OldInst, &B); auto *Induction = B.CreatePHI(Start->getType(), 2, "index"); B.SetInsertPoint(Latch->getTerminator()); - setDebugLocFromInst(B, OldInst); + setDebugLocFromInst(OldInst, &B); // Create i+1 and fill the PHINode. // @@ -4322,7 +4325,7 @@ void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, RecurKind RK = RdxDesc.getRecurrenceKind(); TrackingVH ReductionStartValue = RdxDesc.getRecurrenceStartValue(); Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); - setDebugLocFromInst(Builder, ReductionStartValue); + setDebugLocFromInst(ReductionStartValue); bool IsInLoopReductionPhi = Cost->isInLoopReduction(OrigPhi); VPValue *LoopExitInstDef = State.Plan->getVPValue(LoopExitInst); @@ -4357,7 +4360,7 @@ void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, // instructions. Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); - setDebugLocFromInst(Builder, LoopExitInst); + setDebugLocFromInst(LoopExitInst); Type *PhiTy = OrigPhi->getType(); // If tail is folded by masking, the vector value to leave the loop should be @@ -4436,7 +4439,7 @@ void InnerLoopVectorizer::fixReduction(VPWidenPHIRecipe *PhiR, // conditional branch, and (c) other passes may add new predecessors which // terminate on this line. This is the easiest way to ensure we don't // accidentally cause an extra step back into the loop while debugging. - setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator()); + setDebugLocFromInst(LoopMiddleBlock->getTerminator()); if (IsOrdered) ReducedPartRdx = State.get(LoopExitInstDef, UF - 1); else { @@ -4809,7 +4812,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, assert(!Legal->isReductionVariable(P) && "reductions should be handled above"); - setDebugLocFromInst(Builder, P); + setDebugLocFromInst(P); // This PHINode must be an induction variable. // Make sure that we know about it. @@ -4976,7 +4979,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, case Instruction::Or: case Instruction::Xor: { // Just widen unops and binops. - setDebugLocFromInst(Builder, &I); + setDebugLocFromInst(&I); for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Ops; @@ -5000,7 +5003,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, // Widen compares. Generate vector compares. bool FCmp = (I.getOpcode() == Instruction::FCmp); auto *Cmp = cast(&I); - setDebugLocFromInst(Builder, Cmp); + setDebugLocFromInst(Cmp); for (unsigned Part = 0; Part < UF; ++Part) { Value *A = State.get(User.getOperand(0), Part); Value *B = State.get(User.getOperand(1), Part); @@ -5033,7 +5036,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, case Instruction::FPTrunc: case Instruction::BitCast: { auto *CI = cast(&I); - setDebugLocFromInst(Builder, CI); + setDebugLocFromInst(CI); /// Vectorize casts. Type *DestTy = @@ -5059,7 +5062,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, VPTransformState &State) { assert(!isa(I) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); - setDebugLocFromInst(Builder, &I); + setDebugLocFromInst(&I); Module *M = I.getParent()->getParent()->getParent(); auto *CI = cast(&I); @@ -5131,7 +5134,7 @@ void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I, VPValue *VPDef, VPUser &Operands, bool InvariantCond, VPTransformState &State) { - setDebugLocFromInst(Builder, &I); + setDebugLocFromInst(&I); // The condition can be loop invariant but still defined inside the // loop. This means that we can't just use the original 'cond' value. @@ -9484,7 +9487,7 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) { } void VPBlendRecipe::execute(VPTransformState &State) { - State.ILV->setDebugLocFromInst(State.Builder, Phi); + State.ILV->setDebugLocFromInst(Phi, &State.Builder); // We know that all PHIs in non-header blocks are converted into // selects, so we don't have to worry about the insertion order and we // can just use the builder. From d93ca4d27ef75676728eaeff47457ebd22eb234f Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 1 Jul 2021 06:39:53 -0700 Subject: [PATCH 419/619] Revert "[OPENMP]Fix PR50640: OpenMP target clause implicitly scaling loop bounds to uint64_t." This reverts commit 67643f46ee9fb08e32075d67715a59c840aa761b to fix unexpected diagnostic notes. --- clang/lib/Sema/SemaOpenMP.cpp | 15 +- .../distribute_parallel_for_codegen.cpp | 282 ++--- .../distribute_parallel_for_simd_codegen.cpp | 342 +++--- ..._teams_distribute_parallel_for_codegen.cpp | 66 +- ...bute_parallel_for_generic_mode_codegen.cpp | 90 +- ...s_distribute_parallel_for_simd_codegen.cpp | 1039 +++++++++-------- ...eams_distribute_parallel_for_ast_print.cpp | 4 +- ...stribute_parallel_for_schedule_codegen.cpp | 264 +++-- ...ute_parallel_for_simd_schedule_codegen.cpp | 264 +++-- ...stribute_parallel_for_schedule_codegen.cpp | 264 +++-- ...ute_parallel_for_simd_schedule_codegen.cpp | 264 +++-- 11 files changed, 1467 insertions(+), 1427 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 6e2552a023907..7fddff7992fc1 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9401,20 +9401,11 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, // Build expression: UB = min(UB, prevUB) for #for in composite or combined // construct - ExprResult NewPrevUB = PrevUB; - if (!SemaRef.Context.hasSameType(UB.get()->getType(), - PrevUB.get()->getType())) { - NewPrevUB = SemaRef.PerformImplicitConversion( - NewPrevUB.get(), UB.get()->getType(), Sema::AA_Converting, - /*AllowExplicit=*/true); - if (!NewPrevUB.isUsable()) - return 0; - } SourceLocation DistEUBLoc = AStmt->getBeginLoc(); - ExprResult IsUBGreater = SemaRef.BuildBinOp(CurScope, DistEUBLoc, BO_GT, - UB.get(), NewPrevUB.get()); + ExprResult IsUBGreater = + SemaRef.BuildBinOp(CurScope, DistEUBLoc, BO_GT, UB.get(), PrevUB.get()); ExprResult CondOp = SemaRef.ActOnConditionalOp( - DistEUBLoc, DistEUBLoc, IsUBGreater.get(), NewPrevUB.get(), UB.get()); + DistEUBLoc, DistEUBLoc, IsUBGreater.get(), PrevUB.get(), UB.get()); PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(), CondOp.get()); PrevEUB = diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp index d1a3f33c33d9c..2034f82e25d5e 100644 --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -1976,33 +1976,34 @@ int main() { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -2015,15 +2016,15 @@ int main() { // CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] +// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] +// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -2038,20 +2039,20 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -3768,33 +3769,34 @@ int main() { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -3807,15 +3809,15 @@ int main() { // CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK2-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] +// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK2-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK2-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK2-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 +// CHECK2-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] +// CHECK2-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -3830,20 +3832,20 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK2-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -5520,7 +5522,7 @@ int main() { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7247,7 +7249,7 @@ int main() { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9463,33 +9465,34 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -9502,34 +9505,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] +// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 +// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] +// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11680,33 +11683,34 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -11719,34 +11723,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 -// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 +// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] -// CHECK9-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4 +// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] +// CHECK9-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13892,33 +13896,34 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -13931,34 +13936,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 -// CHECK10-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] +// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK10-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK10-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 +// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] +// CHECK10-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -16109,33 +16114,34 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -16148,34 +16154,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 -// CHECK10-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 +// CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] -// CHECK10-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4 +// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] +// CHECK10-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -18271,7 +18277,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -20413,7 +20419,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -22550,7 +22556,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24692,7 +24698,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp index 60b4685ed9b46..c9a16f2db634c 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -2084,33 +2084,34 @@ int main() { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -2123,15 +2124,15 @@ int main() { // CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] +// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] +// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -2146,20 +2147,20 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -2170,11 +2171,11 @@ int main() { // CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK1-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK1-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// CHECK1-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] +// CHECK1-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -4044,33 +4045,34 @@ int main() { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -4083,15 +4085,15 @@ int main() { // CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !40 -// CHECK2-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] +// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK2-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK2-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] +// CHECK2-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -4106,20 +4108,20 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK2-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -4130,11 +4132,11 @@ int main() { // CHECK2-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK2-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK2-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK2-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK2-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK2-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// CHECK2-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 +// CHECK2-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] +// CHECK2-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -5964,7 +5966,7 @@ int main() { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7859,7 +7861,7 @@ int main() { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10351,33 +10353,34 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -10390,34 +10393,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !47 -// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] +// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] +// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10428,11 +10431,11 @@ int main() { // CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK9-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// CHECK9-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 +// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] +// CHECK9-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -12721,33 +12724,34 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -12760,34 +12764,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] -// CHECK9-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] +// CHECK9-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -12798,11 +12802,11 @@ int main() { // CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK9-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// CHECK9-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 +// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] +// CHECK9-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -15101,33 +15105,34 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -15140,34 +15145,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !47 -// CHECK10-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] +// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] -// CHECK10-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] +// CHECK10-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !47 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -15178,11 +15183,11 @@ int main() { // CHECK10-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK10-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK10-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK10-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK10-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK10-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK10-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// CHECK10-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 +// CHECK10-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] +// CHECK10-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -17471,33 +17476,34 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] +// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -17510,34 +17516,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] -// CHECK10-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] +// CHECK10-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4, !llvm.access.group !89 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17548,11 +17554,11 @@ int main() { // CHECK10-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK10-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK10-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 -// CHECK10-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] -// CHECK10-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 +// CHECK10-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK10-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 +// CHECK10-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 +// CHECK10-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] +// CHECK10-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -19801,7 +19807,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -22096,7 +22102,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24401,7 +24407,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26696,7 +26702,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index de393701c2f0f..4256228fdea9a 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18721,33 +18721,34 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 @@ -18764,20 +18765,20 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -20327,33 +20328,34 @@ int bar(int n){ // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] +// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 @@ -20370,20 +20372,20 @@ int bar(int n){ // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21915,7 +21917,7 @@ int bar(int n){ // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -23461,7 +23463,7 @@ int bar(int n){ // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp index 4b558c34e1e6f..7b17bb7824a03 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -274,63 +274,64 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP11]] to i32 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP10]], [[CONV8]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp ugt i64 [[CONV8]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV10]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[CONV10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV11:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV11]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] -// CHECK1-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] -// CHECK1-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[CALL14:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CALL]], [[CALL14]] +// CHECK1-NEXT: [[CALL16:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CALL16]] +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[TMP0]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -573,7 +574,7 @@ int main(int argc, char **argv) { // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -869,7 +870,7 @@ int main(int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -1178,63 +1179,64 @@ int main(int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CONV8:%.*]] = sext i32 [[TMP10]] to i64 // CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP11]] to i32 -// CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP10]], [[CONV8]] +// CHECK4-NEXT: [[CMP9:%.*]] = icmp ugt i64 [[CONV8]], [[TMP11]] // CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV10:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[CONV10]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[CONV10]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[CONV11:%.*]] = trunc i64 [[COND]] to i32 +// CHECK4-NEXT: store i32 [[CONV11]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK4-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 // CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK4-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] -// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] -// CHECK4-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[CALL14:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[CALL]], [[CALL14]] +// CHECK4-NEXT: [[CALL16:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CALL16]] +// CHECK4-NEXT: store i32 [[ADD17]], i32* [[TMP0]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK4-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: // CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -1477,7 +1479,7 @@ int main(int argc, char **argv) { // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -1773,7 +1775,7 @@ int main(int argc, char **argv) { // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] // CHECK6-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index a348ff7d45780..35816730ed9b0 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9488,73 +9488,73 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 // CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5), !llvm.access.group !12 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK1: cond.true14: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK1-NEXT: br label [[COND_END16:%.*]] // CHECK1: cond.false15: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END16]] // CHECK1: cond.end16: // CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] -// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -9646,63 +9646,64 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8, !llvm.access.group !16 +// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9713,11 +9714,11 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK1-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 0, [[MUL18]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[I6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -9828,66 +9829,66 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 // CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4), !llvm.access.group !19 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: // CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -9968,33 +9969,33 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 // CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10083,53 +10084,53 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !25 +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3), !llvm.access.group !25 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10182,31 +10183,31 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10301,60 +10302,60 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4), !llvm.access.group !31 +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] // CHECK1: cond.true7: // CHECK1-NEXT: br label [[COND_END9:%.*]] // CHECK1: cond.false8: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END9]] // CHECK1: cond.end9: // CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10414,49 +10415,49 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] // CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 +// CHECK1-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 // CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10574,69 +10575,69 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 // CHECK2-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5), !llvm.access.group !12 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: // CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] -// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -10725,7 +10726,7 @@ int bar(int n){ // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10745,28 +10746,28 @@ int bar(int n){ // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -10900,63 +10901,63 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 // CHECK2-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4), !llvm.access.group !19 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: // CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11034,31 +11035,31 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11147,51 +11148,51 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3), !llvm.access.group !25 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11242,29 +11243,29 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11356,57 +11357,57 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4), !llvm.access.group !31 +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK2: cond.true6: // CHECK2-NEXT: br label [[COND_END8:%.*]] // CHECK2: cond.false7: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END8]] // CHECK2: cond.end8: // CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11463,46 +11464,46 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK2-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11620,69 +11621,69 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK3-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5), !llvm.access.group !12 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END13:%.*]] // CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END13]] // CHECK3: cond.end13: // CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] -// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -11771,7 +11772,7 @@ int bar(int n){ // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -11791,28 +11792,28 @@ int bar(int n){ // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -11946,63 +11947,63 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4), !llvm.access.group !19 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: // CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12080,31 +12081,31 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12193,51 +12194,51 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3), !llvm.access.group !25 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: // CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12288,29 +12289,29 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12402,57 +12403,57 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4), !llvm.access.group !31 +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: // CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12509,46 +12510,46 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK3-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp index 67554172e9485..39813253f0af9 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping -Wsign-conversion | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping | FileCheck %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s // expected-no-diagnostics diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp index 364dac905a3d2..3d9b9c871d95b 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -741,33 +741,34 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -783,20 +784,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1655,33 +1656,34 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1697,20 +1699,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2556,7 +2558,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3443,7 +3445,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4343,33 +4345,34 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4385,20 +4388,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5257,33 +5260,34 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5299,20 +5303,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6158,7 +6162,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7045,7 +7049,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9533,33 +9537,34 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -9574,20 +9579,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -12052,33 +12057,34 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -12093,20 +12099,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -14488,7 +14494,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -16905,7 +16911,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -19405,33 +19411,34 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19446,20 +19453,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21924,33 +21931,34 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21965,20 +21973,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24360,7 +24368,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26777,7 +26785,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp index da7a6b1712ce6..411dd6de11286 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -776,33 +776,34 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -818,20 +819,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1760,33 +1761,34 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1802,20 +1804,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2731,7 +2733,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3688,7 +3690,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4658,33 +4660,34 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4700,20 +4703,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5642,33 +5645,34 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5684,20 +5688,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6613,7 +6617,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7570,7 +7574,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -11018,33 +11022,34 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -11059,20 +11064,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -13727,33 +13732,34 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -13768,20 +13774,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -16353,7 +16359,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -18960,7 +18966,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -21650,33 +21656,34 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21691,20 +21698,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24359,33 +24366,34 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -24400,20 +24408,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -26985,7 +26993,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -29592,7 +29600,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp index b1a46f0dfbb58..a48230f44f502 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -755,33 +755,34 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -797,20 +798,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1669,33 +1670,34 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1711,20 +1713,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2570,7 +2572,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3457,7 +3459,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4357,33 +4359,34 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4399,20 +4402,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5271,33 +5274,34 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5313,20 +5317,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6172,7 +6176,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7059,7 +7063,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9494,33 +9498,34 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -9535,20 +9540,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -11963,33 +11968,34 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -12004,20 +12010,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -14369,7 +14375,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -16756,7 +16762,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -19206,33 +19212,34 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19247,20 +19254,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21675,33 +21682,34 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21716,20 +21724,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24081,7 +24089,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26468,7 +26476,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp index c62b4386594ff..00ec282f9322c 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -800,33 +800,34 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -842,20 +843,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1784,33 +1785,34 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1826,20 +1828,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2768,33 +2770,34 @@ int main (int argc, char **argv) { // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK3-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -2810,20 +2813,20 @@ int main (int argc, char **argv) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -3752,33 +3755,34 @@ int main (int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 +// CHECK4-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -3794,20 +3798,20 @@ int main (int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -4723,7 +4727,7 @@ int main (int argc, char **argv) { // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -5680,7 +5684,7 @@ int main (int argc, char **argv) { // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -6637,7 +6641,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7594,7 +7598,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10989,33 +10993,34 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -11030,20 +11035,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -13648,33 +13653,34 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -13689,20 +13695,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -16307,33 +16313,34 @@ int main (int argc, char **argv) { // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK15-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK15-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK15-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: // CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK15-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: // CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: // CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -16348,20 +16355,20 @@ int main (int argc, char **argv) { // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: // CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: // CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -18966,33 +18973,34 @@ int main (int argc, char **argv) { // CHECK16-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK16-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK16-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK16-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK16-NEXT: br label [[COND_END:%.*]] // CHECK16: cond.false: // CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK16-NEXT: br label [[COND_END]] // CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] -// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// CHECK16-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 +// CHECK16-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK16: omp.dispatch.body: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: // CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK16-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: // CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19007,20 +19015,20 @@ int main (int argc, char **argv) { // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: // CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: // CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK16: omp.dispatch.end: // CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21562,7 +21570,7 @@ int main (int argc, char **argv) { // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24139,7 +24147,7 @@ int main (int argc, char **argv) { // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26716,7 +26724,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -29293,7 +29301,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 From c475efe9162437045d7292befc5e4d35ad5c4351 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 1 Jul 2021 10:09:13 -0400 Subject: [PATCH 420/619] [libc++] Fix incorrect shell expansion in macos-ci-setup --- libcxx/utils/ci/macos-ci-setup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/ci/macos-ci-setup b/libcxx/utils/ci/macos-ci-setup index 5cb9d1e3e1fd0..f9bf4888e2d15 100755 --- a/libcxx/utils/ci/macos-ci-setup +++ b/libcxx/utils/ci/macos-ci-setup @@ -20,7 +20,7 @@ CFG_DIR="$(brew --prefix)/etc/buildkite-agent" version="$(sw_vers -productVersion | sed -E 's/([0-9]+).([0-9]+).[0-9]+/\1.\2/')" # Setup the tags of the agent -echo 'tags="queue=libcxx-builders-macos,queue=libcxx-builders-macos${version}"' >> "${CFG_DIR}/buildkite-agent.cfg" +echo "tags=\"queue=libcxx-builders-macos,queue=libcxx-builders-macos${version}\"" >> "${CFG_DIR}/buildkite-agent.cfg" # Setup the BuildKite Agent token sed -i '' "s/xxx/${BUILDKITE_AGENT_TOKEN}/g" "${CFG_DIR}/buildkite-agent.cfg" From 24d76419d6b7a3191ec1f4bfc33a640e716f11c8 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Mon, 21 Jun 2021 16:00:11 +0100 Subject: [PATCH 421/619] [ARM] Transform a floating-point to fixed-point conversion to a VCVT_fix Much like fixed-point to floating-point conversion, the converse can also be transformed into a fixed-point VCVT. This patch transforms multiplications of floating point numbers by 2^n into a VCVT_fix. The exception is that a float to fixed conversion with 1 fractional bit ends up being an FADD (FADD(x, x) emulates FMUL(x, 2)) rather than an FMUL so there is a special case for that. This patch also moves the code from https://reviews.llvm.org/D103903 into a separate function as fixed to float and float to fixed are very similar. Differential Revision: https://reviews.llvm.org/D104793 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 164 ++- ...vt-fixed.ll => mve-vcvt-fixed-to-float.ll} | 0 .../CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll | 1026 +++++++++++++++++ 3 files changed, 1147 insertions(+), 43 deletions(-) rename llvm/test/CodeGen/Thumb2/{mve-vcvt-fixed.ll => mve-vcvt-fixed-to-float.ll} (100%) create mode 100644 llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index bb6a0c95a114b..9c7055deaaf8c 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -197,6 +197,10 @@ class ARMDAGToDAGISel : public SelectionDAGISel { bool tryT2IndexedLoad(SDNode *N); bool tryMVEIndexedLoad(SDNode *N); bool tryFMULFixed(SDNode *N, SDLoc dl); + bool tryFP_TO_INT(SDNode *N, SDLoc dl); + bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul, + bool IsUnsigned, + bool FixedToFloat); /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for @@ -3150,47 +3154,47 @@ bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { return false; } -bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { - // Transform a fixed-point to floating-point conversion to a VCVT - if (!Subtarget->hasMVEFloatOps()) - return false; +bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, + SDNode *FMul, + bool IsUnsigned, + bool FixedToFloat) { auto Type = N->getValueType(0); - if (!Type.isVector()) + unsigned ScalarBits = Type.getScalarSizeInBits(); + if (ScalarBits > 32) return false; - auto ScalarType = Type.getVectorElementType(); - unsigned ScalarBits = ScalarType.getSizeInBits(); - auto LHS = N->getOperand(0); - auto RHS = N->getOperand(1); + SDNodeFlags FMulFlags = FMul->getFlags(); + // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is + // allowed in 16 bit unsigned floats + if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) + return false; - if (ScalarBits > 32) + SDValue ImmNode = FMul->getOperand(1); + SDValue VecVal = FMul->getOperand(0); + if (VecVal->getOpcode() == ISD::UINT_TO_FP || + VecVal->getOpcode() == ISD::SINT_TO_FP) + VecVal = VecVal->getOperand(0); + + if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits) return false; - if (RHS.getOpcode() == ISD::BITCAST) { - if (RHS.getValueType().getVectorElementType().getSizeInBits() != ScalarBits) + if (ImmNode.getOpcode() == ISD::BITCAST) { + if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) return false; - RHS = RHS.getOperand(0); + ImmNode = ImmNode.getOperand(0); } - if (RHS.getValueType().getVectorElementType().getSizeInBits() != ScalarBits) - return false; - if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) - return false; - bool IsUnsigned = LHS.getOpcode() == ISD::UINT_TO_FP; - SDNodeFlags FMulFlags = N->getFlags(); - // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is - // allowed in 16 bit unsigned floats - if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) + if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits) return false; APFloat ImmAPF(0.0f); - switch (RHS.getOpcode()) { + switch (ImmNode.getOpcode()) { case ARMISD::VMOVIMM: case ARMISD::VDUP: { - if (!isa(RHS.getOperand(0))) + if (!isa(ImmNode.getOperand(0))) return false; - unsigned Imm = RHS.getConstantOperandVal(0); - if (RHS.getOpcode() == ARMISD::VMOVIMM) + unsigned Imm = ImmNode.getConstantOperandVal(0); + if (ImmNode.getOpcode() == ARMISD::VMOVIMM) Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); ImmAPF = APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), @@ -3198,24 +3202,26 @@ bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { break; } case ARMISD::VMOVFPIMM: { - ImmAPF = APFloat(ARM_AM::getFPImmFloat(RHS.getConstantOperandVal(0))); + ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); break; } default: return false; } - // Multiplying by a factor of 2^(-n) will convert from fixed point to - // floating point, where n is the number of fractional bits in the fixed - // point number. Taking the inverse and log2 of the factor will give n - APFloat Inverse(0.0f); - if (!ImmAPF.getExactInverse(&Inverse)) - return false; - + // Where n is the number of fractional bits, multiplying by 2^n will convert + // from float to fixed and multiplying by 2^-n will convert from fixed to + // float. Taking log2 of the factor (after taking the inverse in the case of + // float to fixed) will give n. + APFloat ToConvert = ImmAPF; + if (FixedToFloat) { + if (!ImmAPF.getExactInverse(&ToConvert)) + return false; + } APSInt Converted(64, 0); bool IsExact; - Inverse.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, - &IsExact); + ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, + &IsExact); if (!IsExact || !Converted.isPowerOf2()) return false; @@ -3223,28 +3229,95 @@ bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { if (FracBits > ScalarBits) return false; - auto SintToFpOperand = LHS.getOperand(0); - SmallVector Ops{SintToFpOperand, - CurDAG->getConstant(FracBits, dl, MVT::i32)}; - AddEmptyMVEPredicateToOps(Ops, dl, Type); + SmallVector Ops{ + VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)}; + AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type); unsigned int Opcode; switch (ScalarBits) { case 16: - Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; + if (FixedToFloat) + Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; + else + Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; break; case 32: - Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; + if (FixedToFloat) + Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; + else + Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; break; default: llvm_unreachable("unexpected number of scalar bits"); break; } - ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); + ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops)); return true; } +bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { + // Transform a floating-point to fixed-point conversion to a VCVT + if (!Subtarget->hasMVEFloatOps()) + return false; + EVT Type = N->getValueType(0); + if (!Type.isVector()) + return false; + unsigned int ScalarBits = Type.getScalarSizeInBits(); + + bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT; + SDNode *Node = N->getOperand(0).getNode(); + + // floating-point to fixed-point with one fractional bit gets turned into an + // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) + if (Node->getOpcode() == ISD::FADD) { + if (Node->getOperand(0) != Node->getOperand(1)) + return false; + SDNodeFlags Flags = Node->getFlags(); + // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is + // allowed in 16 bit unsigned floats + if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) + return false; + + unsigned Opcode; + switch (ScalarBits) { + case 16: + Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; + break; + case 32: + Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; + break; + } + SmallVector Ops{Node->getOperand(0), + CurDAG->getConstant(1, dl, MVT::i32)}; + AddEmptyMVEPredicateToOps(Ops, dl, Type); + + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); + return true; + } + + if (Node->getOpcode() != ISD::FMUL) + return false; + + return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false); +} + +bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { + // Transform a fixed-point to floating-point conversion to a VCVT + if (!Subtarget->hasMVEFloatOps()) + return false; + auto Type = N->getValueType(0); + if (!Type.isVector()) + return false; + + auto LHS = N->getOperand(0); + if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) + return false; + + return transformFixedFloatingPointConversion( + N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); +} + bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) return false; @@ -3680,6 +3753,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (tryV6T2BitfieldExtractOp(N, true)) return; break; + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + if (tryFP_TO_INT(N, dl)) + return; + break; case ISD::FMUL: if (tryFMULFixed(N, dl)) return; diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt-fixed.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt-fixed-to-float.ll similarity index 100% rename from llvm/test/CodeGen/Thumb2/mve-vcvt-fixed.ll rename to llvm/test/CodeGen/Thumb2/mve-vcvt-fixed-to-float.ll diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll new file mode 100644 index 0000000000000..cab409891ca8b --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll @@ -0,0 +1,1026 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - -mattr=+mve.fp | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_1(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_2(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_3(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_4(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_5(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_6(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_7(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_8(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_9(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_10(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_11(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_12(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_13(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_14(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_15(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_16(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #16 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_17(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #17 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_18(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_18: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #18 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_19(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_19: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #19 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_20(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_20: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #20 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_21(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_21: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #21 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_22(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_22: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #22 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_23(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_23: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #23 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_24(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_24: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #24 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_25(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_25: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #25 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_26(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_26: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #26 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_27(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_27: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #27 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_28(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_28: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #28 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_29(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_29: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #29 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_30(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_30: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #30 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_31(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_31: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #31 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_32(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #32 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_33(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_33: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0x50000000 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_1(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_2(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_3(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_4(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_5(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_6(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_7(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_8(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_9(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_10(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_11(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_12(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_13(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_14(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_15(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_1(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_2(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_3(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_4(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_5(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_6(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_7(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_8(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_9(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_10(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_11(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_12(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_13(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_14(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_15(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_16(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #16 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_17(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #17 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_18(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_18: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #18 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_19(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_19: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #19 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_20(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_20: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #20 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_21(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_21: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #21 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_22(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_22: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #22 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_23(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_23: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #23 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_24(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_24: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #24 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_25(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_25: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #25 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_26(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_26: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #26 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_27(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_27: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #27 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_28(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_28: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #28 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_29(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_29: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #29 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_30(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_30: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #30 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_31(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_31: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #31 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_32(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #32 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_33(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_33: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0x50000000 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: vcvt.u32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_1(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_2(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_3(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_4(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_5(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_6(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_7(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_8(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_9(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_10(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_11(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_12(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_13(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_14(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_15(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_inf(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q1, #0x7800 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: vcvt.u16.f16 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_s16_inf(<8 x half> %0) { +; CHECK-LABEL: vcvt_s16_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + + +define arm_aapcs_vfpcc <4 x i32> @vcvt_bad_imm(<4 x float> %0) { +; CHECK-LABEL: vcvt_bad_imm: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #2048 +; CHECK-NEXT: movt r0, #15104 +; CHECK-NEXT: vmul.f32 q0, q0, r0 +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_negative(<4 x float> %0) { +; CHECK-LABEL: vcvt_negative: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xb8000000 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_negative2(<4 x float> %0) { +; CHECK-LABEL: vcvt_negative2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xb0000000 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} From 513ad683157d547628e246cc86487a70ddadf98d Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 1 Jul 2021 14:44:35 +0100 Subject: [PATCH 422/619] [AArch64] Add some more tests to CodeGen/AArch64/aarch64-load-ext.ll. NFC. --- llvm/test/CodeGen/AArch64/aarch64-load-ext.ll | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll index 1bbab3879dc35..ec58526468810 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -102,6 +102,56 @@ define <4 x i8> @test4(<4 x i8>* %v4i8_ptr) { ret <4 x i8> %v4i8 } +define <2 x i32> @fsext_v2i32(<2 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v2i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldrsb w8, [x0] +; CHECK-LE-NEXT: ldrsb w9, [x0, #1] +; CHECK-LE-NEXT: fmov s0, w8 +; CHECK-LE-NEXT: mov v0.s[1], w9 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v2i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldrsb w8, [x0] +; CHECK-BE-NEXT: ldrsb w9, [x0, #1] +; CHECK-BE-NEXT: fmov s0, w8 +; CHECK-BE-NEXT: mov v0.s[1], w9 +; CHECK-BE-NEXT: rev64 v0.2s, v0.2s +; CHECK-BE-NEXT: ret + %x = load <2 x i8>, <2 x i8>* %a + %y = sext <2 x i8> %x to <2 x i32> + ret <2 x i32> %y +} + +define <3 x i32> @fsext_v3i32(<3 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v3i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-LE-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v3i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <3 x i8>, <3 x i8>* %a + %y = sext <3 x i8> %x to <3 x i32> + ret <3 x i32> %y +} + define <4 x i32> @fsext_v4i32(<4 x i8>* %a) { ; CHECK-LE-LABEL: fsext_v4i32: ; CHECK-LE: // %bb.0: @@ -124,6 +174,31 @@ define <4 x i32> @fsext_v4i32(<4 x i8>* %a) { ret <4 x i32> %y } +define <8 x i32> @fsext_v8i32(<8 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v8i32: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v8i32: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-BE-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-BE-NEXT: rev64 v0.4s, v0.4s +; CHECK-BE-NEXT: rev64 v2.4s, v1.4s +; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <8 x i8>, <8 x i8>* %a + %y = sext <8 x i8> %x to <8 x i32> + ret <8 x i32> %y +} + define <4 x i32> @fzext_v4i32(<4 x i8>* %a) { ; CHECK-LE-LABEL: fzext_v4i32: ; CHECK-LE: // %bb.0: @@ -172,6 +247,53 @@ define i32 @loadExti32(<4 x i8>* %ref) { ret i32 %conv } +define <2 x i16> @fsext_v2i16(<2 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v2i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldrsb w8, [x0] +; CHECK-LE-NEXT: ldrsb w9, [x0, #1] +; CHECK-LE-NEXT: fmov s0, w8 +; CHECK-LE-NEXT: mov v0.s[1], w9 +; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v2i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldrsb w8, [x0] +; CHECK-BE-NEXT: ldrsb w9, [x0, #1] +; CHECK-BE-NEXT: fmov s0, w8 +; CHECK-BE-NEXT: mov v0.s[1], w9 +; CHECK-BE-NEXT: rev64 v0.2s, v0.2s +; CHECK-BE-NEXT: ret + %x = load <2 x i8>, <2 x i8>* %a + %y = sext <2 x i8> %x to <2 x i16> + ret <2 x i16> %y +} + +define <3 x i16> @fsext_v3i16(<3 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v3i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr s0, [x0] +; CHECK-LE-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v3i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ldr s0, [x0] +; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-BE-NEXT: rev64 v0.4h, v0.4h +; CHECK-BE-NEXT: ret + %x = load <3 x i8>, <3 x i8>* %a + %y = sext <3 x i8> %x to <3 x i16> + ret <3 x i16> %y +} + define <4 x i16> @fsext_v4i16(<4 x i8>* %a) { ; CHECK-LE-LABEL: fsext_v4i16: ; CHECK-LE: // %bb.0: @@ -192,6 +314,48 @@ define <4 x i16> @fsext_v4i16(<4 x i8>* %a) { ret <4 x i16> %y } +define <8 x i16> @fsext_v8i16(<8 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v8i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v8i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: rev64 v0.8h, v0.8h +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <8 x i8>, <8 x i8>* %a + %y = sext <8 x i8> %x to <8 x i16> + ret <8 x i16> %y +} + +define <16 x i16> @fsext_v16i16(<16 x i8>* %a) { +; CHECK-LE-LABEL: fsext_v16i16: +; CHECK-LE: // %bb.0: +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: fsext_v16i16: +; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll2 v0.8h, v0.16b, #0 +; CHECK-BE-NEXT: rev64 v0.8h, v0.8h +; CHECK-BE-NEXT: rev64 v2.8h, v1.8h +; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: ret + %x = load <16 x i8>, <16 x i8>* %a + %y = sext <16 x i8> %x to <16 x i16> + ret <16 x i16> %y +} + define <4 x i16> @fzext_v4i16(<4 x i8>* %a) { ; CHECK-LE-LABEL: fzext_v4i16: ; CHECK-LE: // %bb.0: From 4a361f52093b976401a78bdd261964e45492f2e3 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Wed, 30 Jun 2021 06:57:40 -0700 Subject: [PATCH 423/619] [coro async] Add support for specifying which parameter is swiftself in async resume functions Differential Revision: https://reviews.llvm.org/D104147 --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 21 ++++++++++++++++--- llvm/test/Transforms/Coroutines/coro-async.ll | 8 +++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index e4bf5e3f4d537..ccfd498a64fd0 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -765,8 +765,8 @@ Value *CoroCloner::deriveNewFramePointer() { // context header. case coro::ABI::Async: { auto *ActiveAsyncSuspend = cast(ActiveSuspend); - auto *CalleeContext = - NewF->getArg(ActiveAsyncSuspend->getStorageArgumentIndex()); + auto ContextIdx = ActiveAsyncSuspend->getStorageArgumentIndex() & 0xff; + auto *CalleeContext = NewF->getArg(ContextIdx); auto *FramePtrTy = Shape.FrameTy->getPointerTo(); auto *ProjectionFunc = ActiveAsyncSuspend->getAsyncContextProjectionFunction(); @@ -827,6 +827,13 @@ static void addAsyncContextAttrs(AttributeList &Attrs, LLVMContext &Context, Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); } +static void addSwiftSelfAttrs(AttributeList &Attrs, LLVMContext &Context, + unsigned ParamIndex) { + AttrBuilder ParamAttrs; + ParamAttrs.addAttribute(Attribute::SwiftSelf); + Attrs = Attrs.addParamAttributes(Context, ParamIndex, ParamAttrs); +} + /// Clone the body of the original function into a resume function of /// some sort. void CoroCloner::create() { @@ -949,8 +956,16 @@ void CoroCloner::create() { auto *ActiveAsyncSuspend = cast(ActiveSuspend); if (OrigF.hasParamAttribute(Shape.AsyncLowering.ContextArgNo, Attribute::SwiftAsync)) { - auto ContextArgIndex = ActiveAsyncSuspend->getStorageArgumentIndex(); + uint32_t ArgAttributeIndices = + ActiveAsyncSuspend->getStorageArgumentIndex(); + auto ContextArgIndex = ArgAttributeIndices & 0xff; addAsyncContextAttrs(NewAttrs, Context, ContextArgIndex); + + // `swiftasync` must preceed `swiftself` so 0 is not a valid index for + // `swiftself`. + auto SwiftSelfIndex = ArgAttributeIndices >> 8; + if (SwiftSelfIndex) + addSwiftSelfAttrs(NewAttrs, Context, SwiftSelfIndex); } break; } diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll index d8f75af2f29a9..5fb88d04c9c41 100644 --- a/llvm/test/Transforms/Coroutines/coro-async.ll +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -410,7 +410,7 @@ is_not_equal: i32 64 ; Initial async context size without space for frame }> -define swiftcc void @polymorphic_suspend_return(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { +define swiftcc void @polymorphic_suspend_return(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) { entry: %tmp = alloca { i64, i64 }, align 8 %proj.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 0 @@ -445,7 +445,7 @@ entry: %resume_proj_fun = bitcast i8*(i8*)* @resume_context_projection to i8* %callee = bitcast void(i8*, %async.task*, %async.actor*)* @asyncSuspend to i8* %res = call {i8*, i8*, i8*, i8*} (i32, i8*, i8*, ...) - @llvm.coro.suspend.async.sl_p0i8p0i8p0i8p0i8s(i32 0, + @llvm.coro.suspend.async.sl_p0i8p0i8p0i8p0i8s(i32 256, ;; swiftasync at 0 and swiftself at 1 in resume function i8* %resume.func_ptr, i8* %resume_proj_fun, void (i8*, i8*, %async.task*, %async.actor*)* @my_async_function.my_other_async_function_fp.apply, @@ -464,8 +464,8 @@ entry: unreachable } -; CHECK-LABEL: define swiftcc void @polymorphic_suspend_return(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) -; CHECK-LABEL: define internal swiftcc void @polymorphic_suspend_return.resume.0(i8* {{.*}}%0, i8* {{.*}}%1, i8* {{.*}}%2, i8* {{.*}}%3) +; CHECK-LABEL: define swiftcc void @polymorphic_suspend_return(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) +; CHECK-LABEL: define internal swiftcc void @polymorphic_suspend_return.resume.0(i8* {{.*}}swiftasync{{.*}} %0, i8* {{.*}}swiftself{{.*}} %1, i8* {{.*}}%2, i8* {{.*}}%3) ; CHECK: bitcast i8* %3 to %async.task* ; CHECK: } From 5e5ba14b4d839dab361fcd929b45e9a35ca315c8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 1 Jul 2021 14:16:02 +0100 Subject: [PATCH 424/619] [CostModel][X86] Adjust fp<->int vXi32 SSE legalized costs based on llvm-mca reports. Building on rG2a1ef8784ad9a, adjust the SSE cost tables to use the legalized types based on the worst case costs from the script in D103695. To account for different numbers of src/dst legalized type registers we must scale the cost by maximum of the src/dst, not just use src --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 53 +++--- llvm/test/Analysis/CostModel/X86/cast.ll | 56 +++--- llvm/test/Analysis/CostModel/X86/fptoui.ll | 32 ++-- llvm/test/Analysis/CostModel/X86/sitofp.ll | 74 ++++---- llvm/test/Analysis/CostModel/X86/uitofp.ll | 64 +++---- .../X86/alternate-cast-inseltpoison.ll | 148 +++++++++------- .../SLPVectorizer/X86/alternate-cast.ll | 136 +++++++++------ .../SLPVectorizer/X86/sitofp-inseltpoison.ll | 149 ++++------------ .../Transforms/SLPVectorizer/X86/sitofp.ll | 149 ++++------------ .../Transforms/SLPVectorizer/X86/uitofp.ll | 160 ++++-------------- 10 files changed, 411 insertions(+), 610 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index e400000f83160..a58b15083b7d3 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2063,9 +2063,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB - { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, @@ -2084,24 +2082,25 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, // These are somewhat magic numbers justified by looking at the output of // Intel's IACA, running some kernels and making sure when we take // legalization into account the throughput will be overestimated. - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, - - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 8 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 8 }, + + { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 8 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 9 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 4 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 7 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 18 }, { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 }, { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 }, @@ -2109,14 +2108,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 }, - { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 }, - { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, - { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 15 }, { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 }, { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 }, { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, @@ -2138,11 +2133,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 }, - { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 }, { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 }, @@ -2250,12 +2245,12 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (ST->hasSSE41() && !ST->hasAVX()) if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(LTSrc.first * Entry->Cost); + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); if (ST->hasSSE2() && !ST->hasAVX()) if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, LTDest.second, LTSrc.second)) - return AdjustCost(LTSrc.first * Entry->Cost); + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index 5377c68761a3b..7d0a3fd8fba13 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -373,23 +373,23 @@ define i32 @masks4(<4 x i1> %in) { define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; SSE2-LABEL: 'sitofp4' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'sitofp4' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> -; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -440,16 +440,16 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { ; SSE2-LABEL: 'sitofp8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'sitofp8' -; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> -; SSE41-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -484,13 +484,13 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { ; SSE-LABEL: 'uitofp4' ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'uitofp4' @@ -539,9 +539,9 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { ; SSE-LABEL: 'uitofp8' -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll index 390aeaaef4fd6..5a9f6bf4eb793 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -12,24 +12,24 @@ define i32 @fptoui_double_i64(i32 %arg) { ; SSE2-LABEL: 'fptoui_double_i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_double_i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64 +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i64' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64 +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_double_i64' @@ -47,10 +47,10 @@ define i32 @fptoui_double_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fptoui_double_i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 -; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptoui double undef to i64 diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index de4b2c2276896..67d3663624552 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -13,9 +13,9 @@ define i32 @sitofp_i8_double() { ; SSE-LABEL: 'sitofp_i8_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f64 = sitofp i8 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i8_double' @@ -42,9 +42,9 @@ define i32 @sitofp_i8_double() { define i32 @sitofp_i16_double() { ; SSE-LABEL: 'sitofp_i16_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f64 = sitofp i16 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i16_double' @@ -71,35 +71,35 @@ define i32 @sitofp_i16_double() { define i32 @sitofp_i32_double() { ; SSE2-LABEL: 'sitofp_i32_double' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sitofp_i32_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sitofp_i32_double' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sitofp_i32_double' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i32_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -114,21 +114,21 @@ define i32 @sitofp_i32_double() { define i32 @sitofp_i64_double() { ; SSE-LABEL: 'sitofp_i64_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i64_double' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_double' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -150,10 +150,10 @@ define i32 @sitofp_i64_double() { define i32 @sitofp_i8_float() { ; SSE-LABEL: 'sitofp_i8_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i8_float' @@ -183,10 +183,10 @@ define i32 @sitofp_i8_float() { define i32 @sitofp_i16_float() { ; SSE-LABEL: 'sitofp_i16_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i16_float' @@ -216,10 +216,10 @@ define i32 @sitofp_i16_float() { define i32 @sitofp_i32_float() { ; SSE2-LABEL: 'sitofp_i32_float' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sitofp_i32_float' @@ -232,7 +232,7 @@ define i32 @sitofp_i32_float() { ; ; AVX1-LABEL: 'sitofp_i32_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> @@ -240,7 +240,7 @@ define i32 @sitofp_i32_float() { ; ; AVX2-LABEL: 'sitofp_i32_float' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> @@ -248,7 +248,7 @@ define i32 @sitofp_i32_float() { ; ; AVX512-LABEL: 'sitofp_i32_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> @@ -265,10 +265,10 @@ define i32 @sitofp_i32_float() { define i32 @sitofp_i64_float() { ; SSE-LABEL: 'sitofp_i64_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'sitofp_i64_float' diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll index deb6bd496e13e..94d5a7c92f8d9 100644 --- a/llvm/test/Analysis/CostModel/X86/uitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll @@ -13,9 +13,9 @@ define i32 @uitofp_i8_double() { ; SSE-LABEL: 'uitofp_i8_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f64 = uitofp i8 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i8_double' @@ -42,9 +42,9 @@ define i32 @uitofp_i8_double() { define i32 @uitofp_i16_double() { ; SSE-LABEL: 'uitofp_i16_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f64 = uitofp i16 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i16_double' @@ -71,9 +71,9 @@ define i32 @uitofp_i16_double() { define i32 @uitofp_i32_double() { ; SSE-LABEL: 'uitofp_i32_double' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'uitofp_i32_double' @@ -106,17 +106,17 @@ define i32 @uitofp_i32_double() { define i32 @uitofp_i64_double() { ; SSE2-LABEL: 'uitofp_i64_double' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_i64_f64 = uitofp i64 undef to double -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cvt_i64_f64 = uitofp i64 undef to double +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'uitofp_i64_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i64_double' @@ -150,10 +150,10 @@ define i32 @uitofp_i64_double() { define i32 @uitofp_i8_float() { ; SSE-LABEL: 'uitofp_i8_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = uitofp i8 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i8_float' @@ -183,10 +183,10 @@ define i32 @uitofp_i8_float() { define i32 @uitofp_i16_float() { ; SSE-LABEL: 'uitofp_i16_float' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = uitofp i16 undef to float -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i16_float' @@ -256,19 +256,19 @@ define i32 @uitofp_i32_float() { define i32 @uitofp_i64_float() { ; SSE2-LABEL: 'uitofp_i64_float' -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_i64_f32 = uitofp i64 undef to float -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> -; SSE2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_i64_f32 = uitofp i64 undef to float +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'uitofp_i64_float' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i64_float' diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll index 5536030018dc7..c5977a72302ec 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basic-aa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512 define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; CHECK-LABEL: @sitofp_uitofp( @@ -161,43 +161,12 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { } define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { -; SSE-LABEL: @sitofp_4i32_8i16( -; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3 -; SSE-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; SSE-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float -; SSE-NEXT: [[AB5:%.*]] = sitofp i16 [[B1]] to float -; SSE-NEXT: [[AB6:%.*]] = sitofp i16 [[B2]] to float -; SSE-NEXT: [[AB7:%.*]] = sitofp i16 [[B3]] to float -; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 -; SSE-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; SSE-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; SSE-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 -; SSE-NEXT: ret <8 x float> [[R7]] -; -; SLM-LABEL: @sitofp_4i32_8i16( -; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> -; SLM-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; SLM-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> -; SLM-NEXT: ret <8 x float> [[R72]] -; -; AVX-LABEL: @sitofp_4i32_8i16( -; AVX-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> -; AVX-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; AVX-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> -; AVX-NEXT: ret <8 x float> [[R72]] -; -; AVX512-LABEL: @sitofp_4i32_8i16( -; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> -; AVX512-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; AVX512-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> -; AVX512-NEXT: ret <8 x float> [[R72]] +; CHECK-LABEL: @sitofp_4i32_8i16( +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> +; CHECK-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R72]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 @@ -228,24 +197,81 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; Inspired by PR38154 define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 x i8> %c) { -; CHECK-LABEL: @sitofp_uitofp_4i32_8i16_16i8( -; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 -; CHECK-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 -; CHECK-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float -; CHECK-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float -; CHECK-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float -; CHECK-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float -; CHECK-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 -; CHECK-NEXT: ret <8 x float> [[R7]] +; SSE-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; SSE-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float> +; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> +; SSE-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> +; SSE-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> +; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> +; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> +; SSE-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> +; SSE-NEXT: ret <8 x float> [[R72]] +; +; SLM-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 +; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; SLM-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; SLM-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float +; SLM-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = sitofp <2 x i8> [[TMP4]] to <2 x float> +; SLM-NEXT: [[TMP6:%.*]] = uitofp <2 x i8> [[TMP4]] to <2 x float> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> +; SLM-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 +; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R5]], <8 x float> [[TMP8]], <8 x i32> +; SLM-NEXT: ret <8 x float> [[R72]] +; +; AVX-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; AVX-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 +; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 +; AVX-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 +; AVX-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 +; AVX-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; AVX-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float +; AVX-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float +; AVX-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float +; AVX-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float +; AVX-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 +; AVX-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 +; AVX-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 +; AVX-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 +; AVX-NEXT: ret <8 x float> [[R7]] +; +; AVX512-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; AVX512-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 +; AVX512-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 +; AVX512-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 +; AVX512-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 +; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; AVX512-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float +; AVX512-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float +; AVX512-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float +; AVX512-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float +; AVX512-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 +; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 +; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 +; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 +; AVX512-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll index 3aa10795d179f..72940f93eba7f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -161,43 +161,12 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { } define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { -; SSE-LABEL: @sitofp_4i32_8i16( -; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 -; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2 -; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3 -; SSE-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; SSE-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float -; SSE-NEXT: [[AB5:%.*]] = sitofp i16 [[B1]] to float -; SSE-NEXT: [[AB6:%.*]] = sitofp i16 [[B2]] to float -; SSE-NEXT: [[AB7:%.*]] = sitofp i16 [[B3]] to float -; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 -; SSE-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; SSE-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; SSE-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 -; SSE-NEXT: ret <8 x float> [[R7]] -; -; SLM-LABEL: @sitofp_4i32_8i16( -; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> -; SLM-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; SLM-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> -; SLM-NEXT: ret <8 x float> [[R72]] -; -; AVX-LABEL: @sitofp_4i32_8i16( -; AVX-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> -; AVX-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; AVX-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> -; AVX-NEXT: ret <8 x float> [[R72]] -; -; AVX512-LABEL: @sitofp_4i32_8i16( -; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> -; AVX512-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; AVX512-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> -; AVX512-NEXT: ret <8 x float> [[R72]] +; CHECK-LABEL: @sitofp_4i32_8i16( +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> +; CHECK-NEXT: [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R72]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 @@ -228,24 +197,81 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; Inspired by PR38154 define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 x i8> %c) { -; CHECK-LABEL: @sitofp_uitofp_4i32_8i16_16i8( -; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 -; CHECK-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 -; CHECK-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float -; CHECK-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float -; CHECK-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float -; CHECK-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float -; CHECK-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 -; CHECK-NEXT: ret <8 x float> [[R7]] +; SSE-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; SSE-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <2 x i32> +; SSE-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP4]] to <2 x float> +; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i16> [[TMP4]] to <2 x float> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> +; SSE-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> +; SSE-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> +; SSE-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> +; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> +; SSE-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> +; SSE-NEXT: ret <8 x float> [[R72]] +; +; SLM-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 +; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 +; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; SLM-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; SLM-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float +; SLM-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float +; SLM-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <2 x i32> +; SLM-NEXT: [[TMP5:%.*]] = sitofp <2 x i8> [[TMP4]] to <2 x float> +; SLM-NEXT: [[TMP6:%.*]] = uitofp <2 x i8> [[TMP4]] to <2 x float> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <2 x i32> +; SLM-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 +; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R72:%.*]] = shufflevector <8 x float> [[R5]], <8 x float> [[TMP8]], <8 x i32> +; SLM-NEXT: ret <8 x float> [[R72]] +; +; AVX-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; AVX-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 +; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 +; AVX-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 +; AVX-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 +; AVX-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; AVX-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float +; AVX-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float +; AVX-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float +; AVX-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float +; AVX-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 +; AVX-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 +; AVX-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 +; AVX-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 +; AVX-NEXT: ret <8 x float> [[R7]] +; +; AVX512-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; AVX512-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 +; AVX512-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 +; AVX512-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 +; AVX512-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 +; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; AVX512-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float +; AVX512-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float +; AVX512-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float +; AVX512-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float +; AVX512-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R31]], float [[AB4]], i32 4 +; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 +; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 +; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 +; AVX512-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll index 4b4b01ba8ce15..73710e2d8888e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ +; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -216,20 +216,14 @@ define void @sitofp_8i64_8f64() #0 { } define void @sitofp_2i32_2f64() #0 { -; SSE-LABEL: @sitofp_2i32_2f64( -; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 -; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double -; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double -; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; SSE-NEXT: ret void -; -; AVX-LABEL: @sitofp_2i32_2f64( -; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 -; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double> -; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; AVX-NEXT: ret void +; CHECK-LABEL: @sitofp_2i32_2f64( +; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double +; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: ret void ; %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 @@ -922,26 +916,11 @@ define void @sitofp_16i32_16f32() #0 { } define void @sitofp_4i16_4f32() #0 { -; SSE-LABEL: @sitofp_4i16_4f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: ret void -; -; AVX-LABEL: @sitofp_4i16_4f32( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 -; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> -; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX-NEXT: ret void +; CHECK-LABEL: @sitofp_4i16_4f32( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 @@ -960,30 +939,12 @@ define void @sitofp_4i16_4f32() #0 { define void @sitofp_8i16_8f32() #0 { ; SSE-LABEL: @sitofp_8i16_8f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 -; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 -; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 -; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 -; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float -; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float -; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float -; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float -; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> +; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sitofp_8i16_8f32( @@ -1021,54 +982,18 @@ define void @sitofp_8i16_8f32() #0 { define void @sitofp_16i16_16f32() #0 { ; SSE-LABEL: @sitofp_16i16_16f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 -; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 -; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 -; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 -; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16 -; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2 -; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4 -; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2 -; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8 -; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2 -; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4 -; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2 -; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float -; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float -; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float -; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float -; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float -; SSE-NEXT: [[CVT8:%.*]] = sitofp i16 [[LD8]] to float -; SSE-NEXT: [[CVT9:%.*]] = sitofp i16 [[LD9]] to float -; SSE-NEXT: [[CVT10:%.*]] = sitofp i16 [[LD10]] to float -; SSE-NEXT: [[CVT11:%.*]] = sitofp i16 [[LD11]] to float -; SSE-NEXT: [[CVT12:%.*]] = sitofp i16 [[LD12]] to float -; SSE-NEXT: [[CVT13:%.*]] = sitofp i16 [[LD13]] to float -; SSE-NEXT: [[CVT14:%.*]] = sitofp i16 [[LD14]] to float -; SSE-NEXT: [[CVT15:%.*]] = sitofp i16 [[LD15]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 -; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32 -; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4 -; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8 -; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4 -; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16 -; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4 -; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8 -; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP5:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> +; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> +; SSE-NEXT: [[TMP7:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float> +; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP4]] to <4 x float> +; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 +; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32 +; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX256-LABEL: @sitofp_16i16_16f32( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll index ef63088afbb7a..fc90662061d5b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ +; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256NODQ +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX256,AVX256DQ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -216,20 +216,14 @@ define void @sitofp_8i64_8f64() #0 { } define void @sitofp_2i32_2f64() #0 { -; SSE-LABEL: @sitofp_2i32_2f64( -; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 -; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double -; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double -; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; SSE-NEXT: ret void -; -; AVX-LABEL: @sitofp_2i32_2f64( -; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 -; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double> -; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 -; AVX-NEXT: ret void +; CHECK-LABEL: @sitofp_2i32_2f64( +; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double +; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: ret void ; %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 @@ -922,26 +916,11 @@ define void @sitofp_16i32_16f32() #0 { } define void @sitofp_4i16_4f32() #0 { -; SSE-LABEL: @sitofp_4i16_4f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: ret void -; -; AVX-LABEL: @sitofp_4i16_4f32( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 -; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> -; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX-NEXT: ret void +; CHECK-LABEL: @sitofp_4i16_4f32( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 @@ -960,30 +939,12 @@ define void @sitofp_4i16_4f32() #0 { define void @sitofp_8i16_8f32() #0 { ; SSE-LABEL: @sitofp_8i16_8f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 -; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 -; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 -; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 -; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float -; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float -; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float -; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float -; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> +; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sitofp_8i16_8f32( @@ -1021,54 +982,18 @@ define void @sitofp_8i16_8f32() #0 { define void @sitofp_16i16_16f32() #0 { ; SSE-LABEL: @sitofp_16i16_16f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 -; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 -; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 -; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 -; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16 -; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2 -; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4 -; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2 -; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8 -; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2 -; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4 -; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2 -; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to float -; SSE-NEXT: [[CVT4:%.*]] = sitofp i16 [[LD4]] to float -; SSE-NEXT: [[CVT5:%.*]] = sitofp i16 [[LD5]] to float -; SSE-NEXT: [[CVT6:%.*]] = sitofp i16 [[LD6]] to float -; SSE-NEXT: [[CVT7:%.*]] = sitofp i16 [[LD7]] to float -; SSE-NEXT: [[CVT8:%.*]] = sitofp i16 [[LD8]] to float -; SSE-NEXT: [[CVT9:%.*]] = sitofp i16 [[LD9]] to float -; SSE-NEXT: [[CVT10:%.*]] = sitofp i16 [[LD10]] to float -; SSE-NEXT: [[CVT11:%.*]] = sitofp i16 [[LD11]] to float -; SSE-NEXT: [[CVT12:%.*]] = sitofp i16 [[LD12]] to float -; SSE-NEXT: [[CVT13:%.*]] = sitofp i16 [[LD13]] to float -; SSE-NEXT: [[CVT14:%.*]] = sitofp i16 [[LD14]] to float -; SSE-NEXT: [[CVT15:%.*]] = sitofp i16 [[LD15]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 -; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32 -; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4 -; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8 -; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4 -; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16 -; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4 -; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8 -; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP5:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> +; SSE-NEXT: [[TMP6:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> +; SSE-NEXT: [[TMP7:%.*]] = sitofp <4 x i16> [[TMP3]] to <4 x float> +; SSE-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP4]] to <4 x float> +; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 +; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32 +; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX256-LABEL: @sitofp_16i16_16f32( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll index dfc7d64103390..e3b8beb4a058c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll @@ -576,18 +576,9 @@ define void @uitofp_2i64_2f32() #0 { define void @uitofp_4i64_4f32() #0 { ; SSE-LABEL: @uitofp_4i64_4f32( -; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> +; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 ; SSE-NEXT: ret void ; ; AVX256NODQ-LABEL: @uitofp_4i64_4f32( @@ -634,30 +625,12 @@ define void @uitofp_4i64_4f32() #0 { define void @uitofp_8i64_8f32() #0 { ; SSE-LABEL: @uitofp_8i64_8f32( -; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32 -; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8 -; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16 -; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8 -; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float -; SSE-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to float -; SSE-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to float -; SSE-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to float -; SSE-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32 +; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> +; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP2]] to <4 x float> +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX256NODQ-LABEL: @uitofp_8i64_8f32( @@ -874,26 +847,11 @@ define void @uitofp_16i32_16f32() #0 { } define void @uitofp_4i16_4f32() #0 { -; SSE-LABEL: @uitofp_4i16_4f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: ret void -; -; AVX-LABEL: @uitofp_4i16_4f32( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 -; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> -; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX-NEXT: ret void +; CHECK-LABEL: @uitofp_4i16_4f32( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 @@ -912,30 +870,12 @@ define void @uitofp_4i16_4f32() #0 { define void @uitofp_8i16_8f32() #0 { ; SSE-LABEL: @uitofp_8i16_8f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 -; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 -; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 -; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 -; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float -; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to float -; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to float -; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to float -; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> +; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP2]] to <4 x float> +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @uitofp_8i16_8f32( @@ -973,54 +913,18 @@ define void @uitofp_8i16_8f32() #0 { define void @uitofp_16i16_16f32() #0 { ; SSE-LABEL: @uitofp_16i16_16f32( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 -; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 -; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 -; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 -; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 -; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16 -; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2 -; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4 -; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2 -; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8 -; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2 -; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4 -; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2 -; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float -; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float -; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float -; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float -; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to float -; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to float -; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to float -; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to float -; SSE-NEXT: [[CVT8:%.*]] = uitofp i16 [[LD8]] to float -; SSE-NEXT: [[CVT9:%.*]] = uitofp i16 [[LD9]] to float -; SSE-NEXT: [[CVT10:%.*]] = uitofp i16 [[LD10]] to float -; SSE-NEXT: [[CVT11:%.*]] = uitofp i16 [[LD11]] to float -; SSE-NEXT: [[CVT12:%.*]] = uitofp i16 [[LD12]] to float -; SSE-NEXT: [[CVT13:%.*]] = uitofp i16 [[LD13]] to float -; SSE-NEXT: [[CVT14:%.*]] = uitofp i16 [[LD14]] to float -; SSE-NEXT: [[CVT15:%.*]] = uitofp i16 [[LD15]] to float -; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 -; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32 -; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4 -; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8 -; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4 -; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16 -; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4 -; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8 -; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <4 x i16>*), align 16 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP5:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> +; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i16> [[TMP2]] to <4 x float> +; SSE-NEXT: [[TMP7:%.*]] = uitofp <4 x i16> [[TMP3]] to <4 x float> +; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i16> [[TMP4]] to <4 x float> +; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 +; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32 +; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX256-LABEL: @uitofp_16i16_16f32( From 0af9b25affc9187bc8314bc3999a0ef8b16b522a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 1 Jul 2021 15:17:48 +0100 Subject: [PATCH 425/619] [LoopVectorize][X86] Regenerate conversion-cost.ll tests --- .../LoopVectorize/X86/conversion-cost.ll | 165 +++++++++++++++++- 1 file changed, 159 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index eb2a2a56fae1a..20144a9ac6be7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -1,12 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -;CHECK-LABEL: @conversion_cost1( -;CHECK: store <32 x i8> -;CHECK: ret define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { +; CHECK-LABEL: @conversion_cost1( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 3 +; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -4 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 32 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = add i64 3, [[N_VEC]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <32 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 5 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 9 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 10 +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 11 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 12 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 13 +; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[OFFSET_IDX]], 14 +; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 15 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 17 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 18 +; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 19 +; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 20 +; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 21 +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 22 +; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[OFFSET_IDX]], 23 +; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[OFFSET_IDX]], 25 +; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[OFFSET_IDX]], 26 +; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 27 +; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[OFFSET_IDX]], 28 +; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[OFFSET_IDX]], 29 +; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[OFFSET_IDX]], 30 +; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[OFFSET_IDX]], 31 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, i8* [[TMP37]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8* [[TMP38]] to <32 x i8>* +; CHECK-NEXT: store <32 x i8> [[VEC_IND1]], <32 x i8>* [[TMP39]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <32 x i8> [[VEC_IND1]], +; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP41:%.*]] = trunc i64 [[INDVARS_IV]] to i8 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 [[TMP41]], i8* [[TMP42]], align 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: ._crit_edge.loopexit: +; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] +; CHECK: ._crit_edge: +; CHECK-NEXT: ret i32 undef +; %1 = icmp sgt i32 %n, 3 br i1 %1, label %.lr.ph, label %._crit_edge @@ -24,10 +102,85 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun ret i32 undef } -;CHECK-LABEL: @conversion_cost2( -;CHECK: <2 x float> -;CHECK: ret define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { +; CHECK-LABEL: @conversion_cost2( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 9 +; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] +; CHECK: .lr.ph.preheader: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -10 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = add i64 9, [[N_VEC]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <2 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 9, [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 5 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP15:%.*]] = add nsw <2 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[TMP16:%.*]] = add nsw <2 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP17:%.*]] = sitofp <2 x i64> [[TMP13]] to <2 x float> +; CHECK-NEXT: [[TMP18:%.*]] = sitofp <2 x i64> [[TMP14]] to <2 x float> +; CHECK-NEXT: [[TMP19:%.*]] = sitofp <2 x i64> [[TMP15]] to <2 x float> +; CHECK-NEXT: [[TMP20:%.*]] = sitofp <2 x i64> [[TMP16]] to <2 x float> +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP17]], <2 x float>* [[TMP26]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 2 +; CHECK-NEXT: [[TMP28:%.*]] = bitcast float* [[TMP27]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP18]], <2 x float>* [[TMP28]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 4 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast float* [[TMP29]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[TMP30]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 6 +; CHECK-NEXT: [[TMP32:%.*]] = bitcast float* [[TMP31]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[TMP20]], <2 x float>* [[TMP32]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 9, [[DOTLR_PH_PREHEADER]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[INDVARS_IV]], 3 +; CHECK-NEXT: [[TOFP:%.*]] = sitofp i64 [[ADD]] to float +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[TOFP]], float* [[GEP]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: ._crit_edge.loopexit: +; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] +; CHECK: ._crit_edge: +; CHECK-NEXT: ret i32 undef +; %1 = icmp sgt i32 %n, 9 br i1 %1, label %.lr.ph, label %._crit_edge From 2f79acb7b701c41494abff588b5f03a74ea2e11d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 1 Jul 2021 10:49:10 -0400 Subject: [PATCH 426/619] [clangd] Unbreak mac build differently 0c96a92d8666b8 This reverts b56e5f8a10c1 (and follow-up f6db88535cb) and instead restores the state we had before 0c96a92d8666b8: ClangdMain.cpp includes Features.inc before including Transport.h. This is a bit ugly, but it matches the former state and making Transport.h include Features.h means that xpc/ needs to be able to find the generated Features.inc, wich is also a bit ugly. --- clang-tools-extra/clangd/Transport.h | 1 - clang-tools-extra/clangd/tool/ClangdMain.cpp | 4 +++- llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/Transport.h b/clang-tools-extra/clangd/Transport.h index b3db4eba85f93..ae6da722d91b1 100644 --- a/clang-tools-extra/clangd/Transport.h +++ b/clang-tools-extra/clangd/Transport.h @@ -18,7 +18,6 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRANSPORT_H_ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRANSPORT_H_ -#include "Features.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 8db52c65061c8..c03dd927970d4 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// +// Must be before Transport.h include. +#include "Features.h" + #include "ClangdLSPServer.h" #include "CodeComplete.h" #include "Config.h" #include "ConfigProvider.h" -#include "Features.h" #include "PathMapping.h" #include "Protocol.h" #include "TidyProvider.h" diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn index 921e0dbedeb54..0d375392ae257 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn @@ -16,7 +16,6 @@ static_library("transport") { deps = [ ":conversions", "//clang-tools-extra/clangd", - "//clang-tools-extra/clangd:features", "//clang-tools-extra/clangd/support", "//llvm/lib/Support", ] From 3eb2158f4fea90d56aeb200a5ca06f536c1df683 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 1 Jul 2021 06:42:16 -0700 Subject: [PATCH 427/619] [OPENMP]Fix PR50640: OpenMP target clause implicitly scaling loop bounds to uint64_t. Need to add some conversions to suppress possible warning messages. Differential Revision: https://reviews.llvm.org/D105187 --- clang/lib/Sema/SemaOpenMP.cpp | 16 +- .../distribute_parallel_for_codegen.cpp | 282 +++-- .../distribute_parallel_for_simd_codegen.cpp | 342 +++--- ..._teams_distribute_parallel_for_codegen.cpp | 66 +- ...bute_parallel_for_generic_mode_codegen.cpp | 90 +- ...s_distribute_parallel_for_simd_codegen.cpp | 1039 ++++++++--------- ...eams_distribute_parallel_for_ast_print.cpp | 4 +- ...stribute_parallel_for_schedule_codegen.cpp | 264 ++--- ...ute_parallel_for_simd_schedule_codegen.cpp | 264 ++--- ...stribute_parallel_for_schedule_codegen.cpp | 264 ++--- ...ute_parallel_for_simd_schedule_codegen.cpp | 264 ++--- 11 files changed, 1428 insertions(+), 1467 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 7fddff7992fc1..c0cd2bf18a774 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -9401,11 +9401,21 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, // Build expression: UB = min(UB, prevUB) for #for in composite or combined // construct + ExprResult NewPrevUB = PrevUB; SourceLocation DistEUBLoc = AStmt->getBeginLoc(); - ExprResult IsUBGreater = - SemaRef.BuildBinOp(CurScope, DistEUBLoc, BO_GT, UB.get(), PrevUB.get()); + if (!SemaRef.Context.hasSameType(UB.get()->getType(), + PrevUB.get()->getType())) { + NewPrevUB = SemaRef.BuildCStyleCastExpr( + DistEUBLoc, + SemaRef.Context.getTrivialTypeSourceInfo(UB.get()->getType()), + DistEUBLoc, NewPrevUB.get()); + if (!NewPrevUB.isUsable()) + return 0; + } + ExprResult IsUBGreater = SemaRef.BuildBinOp(CurScope, DistEUBLoc, BO_GT, + UB.get(), NewPrevUB.get()); ExprResult CondOp = SemaRef.ActOnConditionalOp( - DistEUBLoc, DistEUBLoc, IsUBGreater.get(), PrevUB.get(), UB.get()); + DistEUBLoc, DistEUBLoc, IsUBGreater.get(), NewPrevUB.get(), UB.get()); PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(), CondOp.get()); PrevEUB = diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp index 2034f82e25d5e..d1a3f33c33d9c 100644 --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -1976,34 +1976,33 @@ int main() { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -2016,15 +2015,15 @@ int main() { // CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 -// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -2039,20 +2038,20 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -3769,34 +3768,33 @@ int main() { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -3809,15 +3807,15 @@ int main() { // CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 -// CHECK2-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// CHECK2-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK2-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK2-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK2-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK2-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -3832,20 +3830,20 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK2-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -5522,7 +5520,7 @@ int main() { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7249,7 +7247,7 @@ int main() { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9465,34 +9463,33 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -9505,34 +9502,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 -// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11683,34 +11680,33 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -11723,34 +11719,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 -// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 +// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4 +// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] +// CHECK9-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13896,34 +13892,33 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -13936,34 +13931,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 // CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 -// CHECK10-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// CHECK10-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK10-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -16114,34 +16109,33 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -16154,34 +16148,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 -// CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4 +// CHECK10-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4 +// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] +// CHECK10-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -18277,7 +18271,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -20419,7 +20413,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -22556,7 +22550,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24698,7 +24692,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp index c9a16f2db634c..60b4685ed9b46 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -2084,34 +2084,33 @@ int main() { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -2124,15 +2123,15 @@ int main() { // CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !40 -// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK1-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -2147,20 +2146,20 @@ int main() { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -2171,11 +2170,11 @@ int main() { // CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK1-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK1-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK1-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK1-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -4045,34 +4044,33 @@ int main() { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -4085,15 +4083,15 @@ int main() { // CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !40 -// CHECK2-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK2-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK2-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK2-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 @@ -4108,20 +4106,20 @@ int main() { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK2-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -4132,11 +4130,11 @@ int main() { // CHECK2-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK2-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK2-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK2-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK2-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK2-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK2-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK2-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -5966,7 +5964,7 @@ int main() { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7861,7 +7859,7 @@ int main() { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10353,34 +10351,33 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -10393,34 +10390,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !47 -// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK9-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10431,11 +10428,11 @@ int main() { // CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK9-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK9-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK9-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK9-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK9-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -12724,34 +12721,33 @@ int main() { // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK9-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK9-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -12764,34 +12760,34 @@ int main() { // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 // CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK9-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 // CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] +// CHECK9-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK9-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: // CHECK9-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -12802,11 +12798,11 @@ int main() { // CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK9-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK9-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK9-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK9-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK9-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -15105,34 +15101,33 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -15145,34 +15140,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !47 -// CHECK10-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM12]] +// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX13]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[ADD14:%.*]] = fadd double [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM15]] +// CHECK10-NEXT: store double [[ADD14]], double* [[ARRAYIDX16]], align 8, !llvm.access.group !47 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -15183,11 +15178,11 @@ int main() { // CHECK10-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK10-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK10-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK10-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK10-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK10-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK10-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK10-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK10-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK10-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -17476,34 +17471,33 @@ int main() { // CHECK10-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK10: omp.dispatch.cond: // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP14]] +// CHECK10-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP13]], [[CONV7]] // CHECK10-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CONV9:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i64 [ [[TMP15]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK10-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK10-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 @@ -17516,34 +17510,34 @@ int main() { // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 // CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] +// CHECK10-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK10-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM12]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX13]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] // CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 // CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM15]] +// CHECK10-NEXT: store i32 [[ADD14]], i32* [[ARRAYIDX16]], align 4, !llvm.access.group !89 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: // CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 -// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: // CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK10-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK10: omp.dispatch.end: // CHECK10-NEXT: [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17554,11 +17548,11 @@ int main() { // CHECK10-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[SUB21:%.*]] = sub nsw i32 [[TMP40]], 0 -// CHECK10-NEXT: [[DIV22:%.*]] = sdiv i32 [[SUB21]], 1 -// CHECK10-NEXT: [[MUL23:%.*]] = mul nsw i32 [[DIV22]], 1 -// CHECK10-NEXT: [[ADD24:%.*]] = add nsw i32 0, [[MUL23]] -// CHECK10-NEXT: store i32 [[ADD24]], i32* [[I6]], align 4 +// CHECK10-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK10-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK10-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK10-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK10-NEXT: store i32 [[ADD23]], i32* [[I6]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -19807,7 +19801,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -22102,7 +22096,7 @@ int main() { // CHECK11: omp.dispatch.cond: // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24407,7 +24401,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26702,7 +26696,7 @@ int main() { // CHECK12: omp.dispatch.cond: // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 4256228fdea9a..de393701c2f0f 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18721,34 +18721,33 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 @@ -18765,20 +18764,20 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -20328,34 +20327,33 @@ int bar(int n){ // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 @@ -20372,20 +20370,20 @@ int bar(int n){ // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21917,7 +21915,7 @@ int bar(int n){ // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -23463,7 +23461,7 @@ int bar(int n){ // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp index 7b17bb7824a03..4b558c34e1e6f 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -274,64 +274,63 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp ugt i64 [[CONV8]], [[TMP11]] +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP10]], [[CONV8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[CONV10]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV11:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV11]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV10]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[CALL14:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CALL]], [[CALL14]] -// CHECK1-NEXT: [[CALL16:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CALL16]] -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] +// CHECK1-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] +// CHECK1-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -574,7 +573,7 @@ int main(int argc, char **argv) { // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -870,7 +869,7 @@ int main(int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -1179,64 +1178,63 @@ int main(int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i32 [[TMP10]] to i64 // CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP9:%.*]] = icmp ugt i64 [[CONV8]], [[TMP11]] +// CHECK4-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP10]], [[CONV8]] // CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK4-NEXT: [[CONV10:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[CONV10]], [[COND_FALSE]] ] -// CHECK4-NEXT: [[CONV11:%.*]] = trunc i64 [[COND]] to i32 -// CHECK4-NEXT: store i32 [[CONV11]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[CONV10]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK4-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 // CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK4-NEXT: [[CALL14:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] -// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[CALL]], [[CALL14]] -// CHECK4-NEXT: [[CALL16:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] -// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD15]], [[CALL16]] -// CHECK4-NEXT: store i32 [[ADD17]], i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] +// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] +// CHECK4-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK4-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK4-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: // CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -1479,7 +1477,7 @@ int main(int argc, char **argv) { // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -1775,7 +1773,7 @@ int main(int argc, char **argv) { // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK6-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 35816730ed9b0..a348ff7d45780 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9488,73 +9488,73 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 // CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5), !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK1: cond.true14: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[COND_END16:%.*]] // CHECK1: cond.false15: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[COND_END16]] // CHECK1: cond.end16: // CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] -// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -9646,64 +9646,63 @@ int bar(int n){ // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP9]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV9]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8, !llvm.access.group !16 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9714,11 +9713,11 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK1-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 -// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 0, [[MUL18]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[I6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -9829,66 +9828,66 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !llvm.access.group !19 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4), !llvm.access.group !19 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: // CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -9969,33 +9968,33 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10084,53 +10083,53 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group !25 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !25 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !25 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3), !llvm.access.group !25 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10183,31 +10182,31 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !28 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10302,60 +10301,60 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8, !llvm.access.group !31 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4), !llvm.access.group !31 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] // CHECK1: cond.true7: // CHECK1-NEXT: br label [[COND_END9:%.*]] // CHECK1: cond.false8: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: br label [[COND_END9]] // CHECK1: cond.end9: // CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10415,49 +10414,49 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] // CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 -// CHECK1-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4, !llvm.access.group !34 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -10575,69 +10574,69 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5), !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: // CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] -// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -10726,7 +10725,7 @@ int bar(int n){ // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10746,28 +10745,28 @@ int bar(int n){ // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4, !llvm.access.group !16 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -10901,63 +10900,63 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4), !llvm.access.group !19 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: // CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11035,31 +11034,31 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11148,51 +11147,51 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3), !llvm.access.group !25 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11243,29 +11242,29 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11357,57 +11356,57 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4), !llvm.access.group !31 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK2: cond.true6: // CHECK2-NEXT: br label [[COND_END8:%.*]] // CHECK2: cond.false7: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK2-NEXT: br label [[COND_END8]] // CHECK2: cond.end8: // CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11464,46 +11463,46 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK2-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4, !llvm.access.group !34 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -11621,69 +11620,69 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK3-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[COND_END13:%.*]] // CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[COND_END13]] // CHECK3: cond.end13: // CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] -// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -11772,7 +11771,7 @@ int bar(int n){ // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -11792,28 +11791,28 @@ int bar(int n){ // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -11947,63 +11946,63 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4), !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: // CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12081,31 +12080,31 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2, !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12194,51 +12193,51 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3), !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: // CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12289,29 +12288,29 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12403,57 +12402,57 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4), !llvm.access.group !31 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: // CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -12510,46 +12509,46 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK3-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4, !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp index 39813253f0af9..67554172e9485 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -ast-print %s -Wno-openmp-mapping -Wsign-conversion | FileCheck %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -emit-pch -o %t %s -Wno-openmp-mapping // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print -Wno-openmp-mapping | FileCheck %s // expected-no-diagnostics diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp index 3d9b9c871d95b..364dac905a3d2 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -741,34 +741,33 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -784,20 +783,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1656,34 +1655,33 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1699,20 +1697,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2558,7 +2556,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3445,7 +3443,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4345,34 +4343,33 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4388,20 +4385,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5260,34 +5257,33 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5303,20 +5299,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6162,7 +6158,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7049,7 +7045,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9537,34 +9533,33 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -9579,20 +9574,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -12057,34 +12052,33 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -12099,20 +12093,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -14494,7 +14488,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -16911,7 +16905,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -19411,34 +19405,33 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19453,20 +19446,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21931,34 +21924,33 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21973,20 +21965,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24368,7 +24360,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26785,7 +26777,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp index 411dd6de11286..da7a6b1712ce6 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -776,34 +776,33 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -819,20 +818,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1761,34 +1760,33 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1804,20 +1802,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2733,7 +2731,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3690,7 +3688,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4660,34 +4658,33 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4703,20 +4700,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5645,34 +5642,33 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5688,20 +5684,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6617,7 +6613,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7574,7 +7570,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -11022,34 +11018,33 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -11064,20 +11059,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -13732,34 +13727,33 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -13774,20 +13768,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -16359,7 +16353,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -18966,7 +18960,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -21656,34 +21650,33 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21698,20 +21691,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24366,34 +24359,33 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -24408,20 +24400,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -26993,7 +26985,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -29600,7 +29592,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp index a48230f44f502..b1a46f0dfbb58 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -755,34 +755,33 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -798,20 +797,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1670,34 +1669,33 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1713,20 +1711,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2572,7 +2570,7 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -3459,7 +3457,7 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -4359,34 +4357,33 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK5-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -4402,20 +4399,20 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -5274,34 +5271,33 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK6-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK6-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK6-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK6-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -5317,20 +5313,20 @@ int main (int argc, char **argv) { // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: // CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -6176,7 +6172,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7063,7 +7059,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -9498,34 +9494,33 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -9540,20 +9535,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -11968,34 +11963,33 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -12010,20 +12004,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -14375,7 +14369,7 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -16762,7 +16756,7 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -19212,34 +19206,33 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK17-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: // CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: // CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19254,20 +19247,20 @@ int main (int argc, char **argv) { // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21682,34 +21675,33 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK18-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK18-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK18-NEXT: br label [[COND_END:%.*]] // CHECK18: cond.false: // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK18-NEXT: br label [[COND_END]] // CHECK18: cond.end: -// CHECK18-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK18-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK18-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: // CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: // CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -21724,20 +21716,20 @@ int main (int argc, char **argv) { // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK18: omp.dispatch.end: // CHECK18-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -24089,7 +24081,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26476,7 +26468,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp index 00ec282f9322c..c62b4386594ff 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -800,34 +800,33 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -843,20 +842,20 @@ int main (int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -1785,34 +1784,33 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -1828,20 +1826,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -2770,34 +2768,33 @@ int main (int argc, char **argv) { // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK3-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -2813,20 +2810,20 @@ int main (int argc, char **argv) { // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -3755,34 +3752,33 @@ int main (int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK4: omp.dispatch.cond: // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV2]], [[TMP6]] +// CHECK4-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV3:%.*]] = sext i32 [[TMP8]] to i64 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i64 [ [[TMP7]], [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK4-NEXT: [[CONV4:%.*]] = trunc i64 [[COND]] to i32 -// CHECK4-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 @@ -3798,20 +3794,20 @@ int main (int argc, char **argv) { // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_LB]], align 4 // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK4: omp.dispatch.end: // CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) @@ -4727,7 +4723,7 @@ int main (int argc, char **argv) { // CHECK5: omp.dispatch.cond: // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -5684,7 +5680,7 @@ int main (int argc, char **argv) { // CHECK6: omp.dispatch.cond: // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -6641,7 +6637,7 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.cond: // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -7598,7 +7594,7 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.cond: // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -10993,34 +10989,33 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: // CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK13-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: // CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -11035,20 +11030,20 @@ int main (int argc, char **argv) { // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -13653,34 +13648,33 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: // CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK14-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: // CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: // CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -13695,20 +13689,20 @@ int main (int argc, char **argv) { // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: // CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -16313,34 +16307,33 @@ int main (int argc, char **argv) { // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: // CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK15-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK15-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK15-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: // CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK15-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK15-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: // CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK15-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK15-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: // CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -16355,20 +16348,20 @@ int main (int argc, char **argv) { // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: // CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: // CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -18973,34 +18966,33 @@ int main (int argc, char **argv) { // CHECK16-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK16: omp.dispatch.cond: // CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 // CHECK16-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ugt i64 [[CONV3]], [[TMP7]] +// CHECK16-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV3]] // CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK16: cond.true: // CHECK16-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK16-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK16-NEXT: br label [[COND_END:%.*]] // CHECK16: cond.false: // CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CONV4:%.*]] = sext i32 [[TMP9]] to i64 // CHECK16-NEXT: br label [[COND_END]] // CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] -// CHECK16-NEXT: [[CONV5:%.*]] = trunc i64 [[COND]] to i32 -// CHECK16-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[CONV4]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK16: omp.dispatch.body: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: // CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 -// CHECK16-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK16-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: // CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 @@ -19015,20 +19007,20 @@ int main (int argc, char **argv) { // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: // CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: // CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK16: omp.dispatch.end: // CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) @@ -21570,7 +21562,7 @@ int main (int argc, char **argv) { // CHECK17: omp.dispatch.cond: // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -24147,7 +24139,7 @@ int main (int argc, char **argv) { // CHECK18: omp.dispatch.cond: // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK18-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK18-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK18: cond.true: // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -26724,7 +26716,7 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.cond: // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 @@ -29301,7 +29293,7 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.cond: // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK20-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK20-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK20: cond.true: // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 From 71d5b0a7572010e07430dd0c3c79bb6c4ebfdfac Mon Sep 17 00:00:00 2001 From: Irina Dobrescu Date: Wed, 30 Jun 2021 15:49:48 +0100 Subject: [PATCH 428/619] [AArch64][GlobalISel]Legalise some vector types for min/max Differential Revision: https://reviews.llvm.org/D105200 --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 1 + .../AArch64/GlobalISel/legalize-min-max.mir | 130 ++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index e2532b23e81a2..69be7fb94778b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -168,6 +168,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX}) + .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); }); getActionDefinitionsBuilder( diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir new file mode 100644 index 0000000000000..5510b6d243929 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir @@ -0,0 +1,130 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: v8s8_smin +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: v8s8_smin + ; CHECK: liveins: $x0 + ; CHECK: %vec:_(<8 x s8>) = G_IMPLICIT_DEF + ; CHECK: %vec1:_(<8 x s8>) = G_IMPLICIT_DEF + ; CHECK: %smin:_(<8 x s8>) = G_SMIN %vec, %vec1 + ; CHECK: $x0 = COPY %smin(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $x0 + %vec:_(<8 x s8>) = G_IMPLICIT_DEF + %vec1:_(<8 x s8>) = G_IMPLICIT_DEF + %smin:_(<8 x s8>) = G_SMIN %vec, %vec1 + $x0 = COPY %smin + RET_ReallyLR implicit $x0 + +... +--- +name: v16s8_smin +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: v16s8_smin + ; CHECK: liveins: $q0 + ; CHECK: %vec:_(<16 x s8>) = G_IMPLICIT_DEF + ; CHECK: %vec1:_(<16 x s8>) = G_IMPLICIT_DEF + ; CHECK: %smin:_(<16 x s8>) = G_SMIN %vec, %vec1 + ; CHECK: $q0 = COPY %smin(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %vec:_(<16 x s8>) = G_IMPLICIT_DEF + %vec1:_(<16 x s8>) = G_IMPLICIT_DEF + %smin:_(<16 x s8>) = G_SMIN %vec, %vec1 + $q0 = COPY %smin + RET_ReallyLR implicit $q0 + +... +--- +name: v4s16_smin +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: v4s16_smin + ; CHECK: liveins: $x0 + ; CHECK: %vec:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: %vec1:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: %smin:_(<4 x s16>) = G_SMIN %vec, %vec1 + ; CHECK: $x0 = COPY %smin(<4 x s16>) + ; CHECK: RET_ReallyLR implicit $x0 + %vec:_(<4 x s16>) = G_IMPLICIT_DEF + %vec1:_(<4 x s16>) = G_IMPLICIT_DEF + %smin:_(<4 x s16>) = G_SMIN %vec, %vec1 + $x0 = COPY %smin + RET_ReallyLR implicit $x0 + +... +--- +name: v8s16_smin +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: v8s16_smin + ; CHECK: liveins: $q0 + ; CHECK: %vec:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK: %vec1:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK: %smin:_(<8 x s16>) = G_SMIN %vec, %vec1 + ; CHECK: $q0 = COPY %smin(<8 x s16>) + ; CHECK: RET_ReallyLR implicit $q0 + %vec:_(<8 x s16>) = G_IMPLICIT_DEF + %vec1:_(<8 x s16>) = G_IMPLICIT_DEF + %smin:_(<8 x s16>) = G_SMIN %vec, %vec1 + $q0 = COPY %smin + RET_ReallyLR implicit $q0 + +... +--- +name: v2s32_smin +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: v2s32_smin + ; CHECK: liveins: $x0 + ; CHECK: %vec:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: %vec1:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: %smin:_(<2 x s32>) = G_SMIN %vec, %vec1 + ; CHECK: $x0 = COPY %smin(<2 x s32>) + ; CHECK: RET_ReallyLR implicit $x0 + %vec:_(<2 x s32>) = G_IMPLICIT_DEF + %vec1:_(<2 x s32>) = G_IMPLICIT_DEF + %smin:_(<2 x s32>) = G_SMIN %vec, %vec1 + $x0 = COPY %smin + RET_ReallyLR implicit $x0 + +... +--- +name: v4s32_smin +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: v4s32_smin + ; CHECK: liveins: $q0 + ; CHECK: %vec:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: %vec1:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: %smin:_(<4 x s32>) = G_SMIN %vec, %vec1 + ; CHECK: $q0 = COPY %smin(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %vec:_(<4 x s32>) = G_IMPLICIT_DEF + %vec1:_(<4 x s32>) = G_IMPLICIT_DEF + %smin:_(<4 x s32>) = G_SMIN %vec, %vec1 + $q0 = COPY %smin + RET_ReallyLR implicit $q0 + +... + From 42d7d52314e0719b899b6e7ba924d34260866880 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 1 Jul 2021 16:56:23 +0100 Subject: [PATCH 429/619] [ARM] Extra BFI codegen tests. NFC --- llvm/test/CodeGen/ARM/bfi.ll | 225 +++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll index 725e173324af4..b6126ab51c350 100644 --- a/llvm/test/CodeGen/ARM/bfi.ll +++ b/llvm/test/CodeGen/ARM/bfi.ll @@ -216,3 +216,228 @@ define i32 @f13(i32 %x, i32 %y) { %sel = select i1 %cmp, i32 %y2, i32 %or ret i32 %sel } + +define i32 @bfi1(i32 %a, i32 %b) { +; CHECK-LABEL: bfi1: +; CHECK: @ %bb.0: +; CHECK-NEXT: and r2, r0, #1 +; CHECK-NEXT: bic r1, r1, #19 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: and r2, r0, #16 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: and r0, r0, #2 +; CHECK-NEXT: orr r0, r1, r0 +; CHECK-NEXT: bx lr + %x1 = and i32 %a, 1 + %y1 = and i32 %b, 4294967294 + %z1 = or i32 %y1, %x1 + %x2 = and i32 %a, 16 + %y2 = and i32 %z1, 4294967279 + %z2 = or i32 %y2, %x2 + %x3 = and i32 %a, 2 + %y3 = and i32 %z2, 4294967293 + %z3 = or i32 %y3, %x3 + ret i32 %z3 +} + +define void @bfi1_use(i32 %a, i32 %b) { +; CHECK-LABEL: bfi1_use: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: mov r2, r1 +; CHECK-NEXT: lsr r3, r0, #4 +; CHECK-NEXT: bfi r2, r0, #0, #1 +; CHECK-NEXT: lsr r0, r0, #1 +; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: bfi r1, r3, #4, #1 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: bfi r3, r0, #1, #1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r2, r3 +; CHECK-NEXT: bl use +; CHECK-NEXT: pop {r11, pc} + %x1 = and i32 %a, 1 + %y1 = and i32 %b, 4294967294 + %z1 = or i32 %y1, %x1 + %x2 = and i32 %a, 16 + %y2 = and i32 %z1, 4294967279 + %z2 = or i32 %y2, %x2 + %x3 = and i32 %a, 2 + %y3 = and i32 %z2, 4294967293 + %z3 = or i32 %y3, %x3 + call void @use(i32 %z1, i32 %z2, i32 %z3, i32 %z3) + ret void +} + +define i32 @bfi2(i32 %a, i32 %b) { +; CHECK-LABEL: bfi2: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r2, #65148 +; CHECK-NEXT: movt r2, #65535 +; CHECK-NEXT: and r1, r1, r2 +; CHECK-NEXT: and r2, r0, #1 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: and r2, r0, #2 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: and r2, r0, #128 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: and r0, r0, #256 +; CHECK-NEXT: orr r0, r1, r0 +; CHECK-NEXT: bx lr + %x1 = and i32 %a, 1 + %y1 = and i32 %b, 4294967294 + %z1 = or i32 %y1, %x1 + %x2 = and i32 %a, 2 + %y2 = and i32 %z1, 4294967293 + %z2 = or i32 %y2, %x2 + %x3 = and i32 %a, 128 + %y3 = and i32 %z2, 4294967167 + %z3 = or i32 %y3, %x3 + %x4 = and i32 %a, 256 + %y4 = and i32 %z3, 4294967039 + %z4 = or i32 %y4, %x4 + ret i32 %z4 +} + +define void @bfi2_uses(i32 %a, i32 %b) { +; CHECK-LABEL: bfi2_uses: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: mov r12, r1 +; CHECK-NEXT: bfi r1, r0, #0, #2 +; CHECK-NEXT: bfi r12, r0, #0, #1 +; CHECK-NEXT: lsr r0, r0, #7 +; CHECK-NEXT: mov r2, r1 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: bfi r2, r0, #7, #1 +; CHECK-NEXT: bfi r3, r0, #7, #2 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: bl use +; CHECK-NEXT: pop {r11, pc} + %x1 = and i32 %a, 1 + %y1 = and i32 %b, 4294967294 + %z1 = or i32 %y1, %x1 + %x2 = and i32 %a, 2 + %y2 = and i32 %z1, 4294967293 + %z2 = or i32 %y2, %x2 + %x3 = and i32 %a, 128 + %y3 = and i32 %z2, 4294967167 + %z3 = or i32 %y3, %x3 + %x4 = and i32 %a, 256 + %y4 = and i32 %z3, 4294967039 + %z4 = or i32 %y4, %x4 + call void @use(i32 %z1, i32 %z2, i32 %z3, i32 %z4) + ret void +} + +define i32 @bfi3(i32 %a, i32 %b) { +; CHECK-LABEL: bfi3: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r2, #65148 +; CHECK-NEXT: movt r2, #65535 +; CHECK-NEXT: and r1, r1, r2 +; CHECK-NEXT: and r2, r0, #1 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: and r2, r0, #128 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: and r2, r0, #2 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: and r0, r0, #256 +; CHECK-NEXT: orr r0, r1, r0 +; CHECK-NEXT: bx lr + %x1 = and i32 %a, 1 + %y1 = and i32 %b, 4294967294 + %z1 = or i32 %y1, %x1 + %x2 = and i32 %a, 128 + %y2 = and i32 %z1, 4294967167 + %z2 = or i32 %y2, %x2 + %x3 = and i32 %a, 2 + %y3 = and i32 %z2, 4294967293 + %z3 = or i32 %y3, %x3 + %x4 = and i32 %a, 256 + %y4 = and i32 %z3, 4294967039 + %z4 = or i32 %y4, %x4 + ret i32 %z4 +} + +define void @bfi3_uses(i32 %a, i32 %b) { +; CHECK-LABEL: bfi3_uses: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: mov r12, r1 +; CHECK-NEXT: lsr r2, r0, #7 +; CHECK-NEXT: bfi r12, r0, #0, #1 +; CHECK-NEXT: lsr r3, r0, #1 +; CHECK-NEXT: lsr r0, r0, #8 +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: bfi r1, r2, #7, #1 +; CHECK-NEXT: mov r2, r1 +; CHECK-NEXT: bfi r2, r3, #1, #1 +; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: bfi r3, r0, #8, #1 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: bl use +; CHECK-NEXT: pop {r11, pc} + %x1 = and i32 %a, 1 + %y1 = and i32 %b, 4294967294 + %z1 = or i32 %y1, %x1 + %x2 = and i32 %a, 128 + %y2 = and i32 %z1, 4294967167 + %z2 = or i32 %y2, %x2 + %x3 = and i32 %a, 2 + %y3 = and i32 %z2, 4294967293 + %z3 = or i32 %y3, %x3 + %x4 = and i32 %a, 256 + %y4 = and i32 %z3, 4294967039 + %z4 = or i32 %y4, %x4 + call void @use(i32 %z1, i32 %z2, i32 %z3, i32 %z4) + ret void +} + +define i32 @bfi4(i32 %A, i2 zeroext %BB, i32* %d) { +; CHECK-LABEL: bfi4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsr r3, r0, #1 +; CHECK-NEXT: mov r12, #96 +; CHECK-NEXT: bfi r1, r3, #2, #1 +; CHECK-NEXT: tst r0, #32 +; CHECK-NEXT: movweq r12, #32 +; CHECK-NEXT: bfi r1, r3, #9, #1 +; CHECK-NEXT: lsr r3, r0, #2 +; CHECK-NEXT: bfi r1, r3, #3, #1 +; CHECK-NEXT: bfi r1, r3, #10, #1 +; CHECK-NEXT: and r3, r0, #8 +; CHECK-NEXT: orr r1, r1, r3, lsl #8 +; CHECK-NEXT: and r3, r0, #64 +; CHECK-NEXT: and r0, r0, #128 +; CHECK-NEXT: orr r1, r1, r12 +; CHECK-NEXT: orr r1, r1, r3, lsl #1 +; CHECK-NEXT: str r1, [r2] +; CHECK-NEXT: bx lr +entry: + %B = zext i2 %BB to i32 + %and = and i32 %A, 2 + %tobool12.not = icmp eq i32 %and, 0 + %or17 = or i32 %B, 516 + %spec.select112 = select i1 %tobool12.not, i32 %B, i32 %or17 + %and20 = and i32 %A, 4 + %tobool21.not = icmp eq i32 %and20, 0 + %or26 = or i32 %spec.select112, 1032 + %spec.select114 = select i1 %tobool21.not, i32 %spec.select112, i32 %or26 + store i32 %spec.select114, i32* %d, align 4 + %and29 = shl i32 %A, 8 + %l2 = and i32 %and29, 2048 + %l3 = or i32 %l2, %spec.select114 + %and38 = and i32 %A, 32 + %tobool39.not = icmp eq i32 %and38, 0 + %spec.select.v = select i1 %tobool39.not, i32 32, i32 96 + %spec.select = or i32 %l3, %spec.select.v + %and45 = shl i32 %A, 1 + %l4 = and i32 %and45, 128 + %l5 = or i32 %l4, %spec.select + store i32 %l5, i32* %d, align 4 + %and52 = and i32 %A, 128 + ret i32 %and52 +} + +declare void @use(i32, i32, i32, i32) From 661577e698645d0645a5639ec180f0e3c83af021 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 30 Jun 2021 14:48:34 -0700 Subject: [PATCH 430/619] [AMDGPU] Fix immediate sign during V_MOV_B64_PSEUDO expansion Creating a V_MOV_B32 with zero extended immediate source prevented conversion to V_BFREV_B32. Differential Revision: https://reviews.llvm.org/D105235 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 ++-- .../AMDGPU/v_mov_b64_expand_and_shrink.mir | 12 ++++++++++++ .../CodeGen/AMDGPU/v_mov_b64_expansion.mir | 18 +++++++++--------- 3 files changed, 23 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/v_mov_b64_expand_and_shrink.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7fd275bd0ade9..0bba1d7e283b9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1729,10 +1729,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addImm(0); // clamp } else { BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) - .addImm(Lo.getZExtValue()) + .addImm(Lo.getSExtValue()) .addReg(Dst, RegState::Implicit | RegState::Define); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) - .addImm(Hi.getZExtValue()) + .addImm(Hi.getSExtValue()) .addReg(Dst, RegState::Implicit | RegState::Define); } } else { diff --git a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expand_and_shrink.mir b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expand_and_shrink.mir new file mode 100644 index 0000000000000..5a3f8abbc532f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expand_and_shrink.mir @@ -0,0 +1,12 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass postrapseudos,si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- +# GCN-LABEL: name: expand_imm64_sext_shrink_to_bfrev +# GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr0_vgpr1 +# GCN: $vgpr1 = V_BFREV_B32_e32 1, implicit $exec, implicit-def $vgpr0_vgpr1 +name: expand_imm64_sext_shrink_to_bfrev +tracksRegLiveness: true +body: | + bb.0: + $vgpr0_vgpr1 = V_MOV_B64_PSEUDO -9223372036854775808, implicit $exec +... diff --git a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir index 9560d1c927222..1fc72422edf57 100644 --- a/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir +++ b/llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir @@ -22,10 +22,10 @@ body: | ... # GCN-LABEL: name: v_mov_b64_from_sext_inline_imm -# GFX900: $vgpr0 = V_MOV_B32_e32 4294967294, implicit $exec, implicit-def $vgpr0_vgpr1 -# GFX900: $vgpr1 = V_MOV_B32_e32 4294967295, implicit $exec, implicit-def $vgpr0_vgpr1 -# GFX90A: $vgpr0 = V_MOV_B32_e32 4294967294, implicit $exec, implicit-def $vgpr0_vgpr1 -# GFX90A: $vgpr1 = V_MOV_B32_e32 4294967295, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX900: $vgpr0 = V_MOV_B32_e32 -2, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX900: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX90A: $vgpr0 = V_MOV_B32_e32 -2, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX90A: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 name: v_mov_b64_from_sext_inline_imm body: | bb.0: @@ -34,7 +34,7 @@ body: | # GCN-LABEL: name: v_mov_b64_from_lit # GCN: $vgpr0 = V_MOV_B32_e32 1430494974, implicit $exec, implicit-def $vgpr0_vgpr1 -# GCN: $vgpr1 = V_MOV_B32_e32 4294734465, implicit $exec, implicit-def $vgpr0_vgpr1 +# GCN: $vgpr1 = V_MOV_B32_e32 -232831, implicit $exec, implicit-def $vgpr0_vgpr1 name: v_mov_b64_from_lit body: | bb.0: @@ -42,7 +42,7 @@ body: | ... # GCN-LABEL: name: v_mov_b64_from_first_inline_imm -# GCN: $vgpr0 = V_MOV_B32_e32 4294967295, implicit $exec, implicit-def $vgpr0_vgpr1 +# GCN: $vgpr0 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 # GCN: $vgpr1 = V_MOV_B32_e32 268435455, implicit $exec, implicit-def $vgpr0_vgpr1 name: v_mov_b64_from_first_inline_imm body: | @@ -52,7 +52,7 @@ body: | # GCN-LABEL: name: v_mov_b64_from_second_inline_imm # GCN: $vgpr0 = V_MOV_B32_e32 268435455, implicit $exec, implicit-def $vgpr0_vgpr1 -# GCN: $vgpr1 = V_MOV_B32_e32 4294967295, implicit $exec, implicit-def $vgpr0_vgpr1 +# GCN: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 name: v_mov_b64_from_second_inline_imm body: | bb.0: @@ -60,8 +60,8 @@ body: | ... # GCN-LABEL: name: v_mov_b64_from_same_sext_inline_imm -# GFX900: $vgpr0 = V_MOV_B32_e32 4294967295, implicit $exec, implicit-def $vgpr0_vgpr1 -# GFX900: $vgpr1 = V_MOV_B32_e32 4294967295, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX900: $vgpr0 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 +# GFX900: $vgpr1 = V_MOV_B32_e32 -1, implicit $exec, implicit-def $vgpr0_vgpr1 # GFX90A: $vgpr0_vgpr1 = V_PK_MOV_B32 8, -1, 8, -1, 0, 0, 0, 0, 0, implicit $exec name: v_mov_b64_from_same_sext_inline_imm body: | From 2668727929e497553eba485876eb6190d38cc367 Mon Sep 17 00:00:00 2001 From: Bradley Smith Date: Tue, 29 Jun 2021 12:40:46 +0100 Subject: [PATCH 431/619] [SelectionDAG] Implement PromoteIntRes_INSERT_SUBVECTOR Inserting into a smaller-than-legal scalable vector would result in an internal compiler error. For example, inserting a into a (both illegal vector types for SVE) would cause a crash. This crash was happening because there was no code to promote (legalise) the result of an INSERT_SUBVECTOR node. This patch implements PromoteIntRes_INSERT_SUBVECTOR, which legalises the ISD node. This is currently done by going through memory. This is necessary because of the requirement that the SubVec parameter of the INSERT_SUBVECTOR node must be smaller than the Vec parameter, which means that INSERT_SUBVECTOR cannot always have a legal result/operand types. Co-Authored-by: Joe Ellis Differential Revision: https://reviews.llvm.org/D102766 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 46 +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 12 +- .../Target/AArch64/AArch64ISelLowering.cpp | 4 + .../insert-subvector-res-legalization.ll | 276 ++++++++++++++++++ 5 files changed, 334 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 27bc2d8c05f7f..7dc0cd0cc4f37 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -97,6 +97,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: + Res = PromoteIntRes_INSERT_SUBVECTOR(N); break; case ISD::VECTOR_REVERSE: Res = PromoteIntRes_VECTOR_REVERSE(N); break; case ISD::VECTOR_SHUFFLE: @@ -4729,6 +4731,50 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getBuildVector(NOutVT, dl, Ops); } +SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_SUBVECTOR(SDNode *N) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + SDLoc dl(N); + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + + auto *ConstantIdx = cast(Idx); + unsigned IdxN = ConstantIdx->getZExtValue(); + + EVT VecVT = Vec.getValueType(); + EVT SubVecVT = SubVec.getValueType(); + + // To insert SubVec into Vec, store the wider vector to memory, overwrite the + // appropriate bits with the narrower vector, and reload. + Align SmallestAlign = DAG.getReducedAlign(SubVecVT, /*UseABI=*/false); + + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); + auto StackPtrVT = StackPtr->getValueType(0); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); + + SDValue ScaledIdx = Idx; + if (SubVecVT.isScalableVector() && IdxN != 0) { + APInt IdxAPInt = cast(Idx)->getAPIntValue(); + ScaledIdx = DAG.getVScale(dl, StackPtrVT, + IdxAPInt.sextOrSelf(StackPtrVT.getSizeInBits())); + } + + SDValue SubVecPtr = + TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, ScaledIdx); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, PtrInfo, SmallestAlign); + return DAG.getExtLoad(ISD::LoadExtType::EXTLOAD, dl, NOutVT, Store, StackPtr, + PtrInfo, OutVT, SmallestAlign); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) { SDLoc dl(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e95ca266b875a..9051899a9652c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -298,6 +298,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_INSERT_SUBVECTOR(SDNode *N); SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N); SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a5e3cc23972ee..00403a9260b01 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7837,11 +7837,13 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); - assert(SubVecVT.isFixedLengthVector() && - SubVecVT.getVectorElementType() == EltVT && - "Sub-vector must be a fixed vector with matching element type"); - Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, - SubVecVT.getVectorNumElements()); + // Scalable vectors don't need clamping as these are checked at compile time + if (SubVecVT.isFixedLengthVector()) { + assert(SubVecVT.getVectorElementType() == EltVT && + "Sub-vector must be a fixed vector with matching element type"); + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, + SubVecVT.getVectorNumElements()); + } EVT IdxVT = Index.getValueType(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2702cdcc6d70e..ce778c5ebfcab 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17100,6 +17100,10 @@ void AArch64TargetLowering::ReplaceNodeResults( case ISD::EXTRACT_SUBVECTOR: ReplaceExtractSubVectorResults(N, Results, DAG); return; + case ISD::INSERT_SUBVECTOR: + // Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate + // to common code for result type legalisation + return; case ISD::INTRINSIC_WO_CHAIN: { EVT VT = N->getValueType(0); assert((VT == MVT::i8 || VT == MVT::i16) && diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll new file mode 100644 index 0000000000000..56f206d24033a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -0,0 +1,276 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; SCALABLE INSERTED INTO SCALABLE TESTS + +define @vec_scalable_subvec_scalable_idx_zero_i8(* %a, * %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1] +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1b { z1.s }, p1, [sp, #2, mul vl] +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load , * %b + %ins = call @llvm.experimental.vector.insert.nxv8i8.nxv4i8( %vec, %subvec, i64 0) + ret %ins +} + +define @vec_scalable_subvec_scalable_idx_nonzero_i8(* %a, * %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1] +; CHECK-NEXT: addpl x8, sp, #4 +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1b { z1.s }, p1, [x8, #1, mul vl] +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load , * %b + %ins = call @llvm.experimental.vector.insert.nxv8i8.nxv4i8( %vec, %subvec, i64 4) + ret %ins +} + +define @vec_scalable_subvec_scalable_idx_zero_i16(* %a, * %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1] +; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z1.d }, p1, [sp, #2, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load , * %b + %ins = call @llvm.experimental.vector.insert.nxv4i16.nxv2i16( %vec, %subvec, i64 0) + ret %ins +} + +define @vec_scalable_subvec_scalable_idx_nonzero_i16(* %a, * %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1] +; CHECK-NEXT: addpl x8, sp, #4 +; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z1.d }, p1, [x8, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load , * %b + %ins = call @llvm.experimental.vector.insert.nxv4i16.nxv2i16( %vec, %subvec, i64 2) + ret %ins +} + +; FIXED INSERTED INTO SCALABLE TESTS + +define @vec_scalable_subvec_fixed_idx_zero_i8(* %a, <8 x i8>* %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: addpl x8, sp, #4 +; CHECK-NEXT: str d1, [x8] +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load <8 x i8>, <8 x i8>* %b + %ins = call @llvm.experimental.vector.insert.nxv8i8.v8i8( %vec, <8 x i8> %subvec, i64 0) + ret %ins +} + +define @vec_scalable_subvec_fixed_idx_nonzero_i8(* %a, <8 x i8>* %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cnth x9 +; CHECK-NEXT: addpl x10, sp, #4 +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sub x9, x9, #8 // =8 +; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: cmp x9, #8 // =8 +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: str d1, [x10, x8] +; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load <8 x i8>, <8 x i8>* %b + %ins = call @llvm.experimental.vector.insert.nxv8i8.v8i8( %vec, <8 x i8> %subvec, i64 8) + ret %ins +} + +define @vec_scalable_subvec_fixed_idx_zero_i16(* %a, <4 x i16>* %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: addpl x8, sp, #4 +; CHECK-NEXT: str d1, [x8] +; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load <4 x i16>, <4 x i16>* %b + %ins = call @llvm.experimental.vector.insert.nxv4i16.v4i16( %vec, <4 x i16> %subvec, i64 0) + ret %ins +} + +define @vec_scalable_subvec_fixed_idx_nonzero_i16(* %a, <4 x i16>* %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cntw x9 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sub x9, x9, #4 // =4 +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: cmp x9, #4 // =4 +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: addpl x9, sp, #4 +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: str d1, [x9, x8] +; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load <4 x i16>, <4 x i16>* %b + %ins = call @llvm.experimental.vector.insert.nxv4i16.v4i16( %vec, <4 x i16> %subvec, i64 4) + ret %ins +} + +define @vec_scalable_subvec_fixed_idx_zero_i32(* %a, <2 x i32>* %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: addpl x8, sp, #4 +; CHECK-NEXT: str d1, [x8] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load <2 x i32>, <2 x i32>* %b + %ins = call @llvm.experimental.vector.insert.nxv2i32.v2i32( %vec, <2 x i32> %subvec, i64 0) + ret %ins +} + +define @vec_scalable_subvec_fixed_idx_nonzero_i32(* %a, <2 x i32>* %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sub x9, x9, #2 // =2 +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: cmp x9, #2 // =2 +; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: addpl x9, sp, #4 +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: str d1, [x9, x8] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load <2 x i32>, <2 x i32>* %b + %ins = call @llvm.experimental.vector.insert.nxv2i32.v2i32( %vec, <2 x i32> %subvec, i64 2) + ret %ins +} + +define @vec_scalable_subvec_fixed_idx_nonzero_large_i32(* %a, <8 x i32>* %b) #0 { +; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_large_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: subs x8, x8, #8 // =8 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldp q1, q2, [x1] +; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: cmp x8, #8 // =8 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: add x8, x9, x8, lsl #2 +; CHECK-NEXT: st1w { z0.d }, p0, [sp] +; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %vec = load , * %a + %subvec = load <8 x i32>, <8 x i32>* %b + %ins = call @llvm.experimental.vector.insert.nxv2i32.v8i32( %vec, <8 x i32> %subvec, i64 8) + ret %ins +} + +declare @llvm.experimental.vector.insert.nxv8i8.nxv4i8(, , i64) +declare @llvm.experimental.vector.insert.nxv4i16.nxv2i16(, , i64) + +declare @llvm.experimental.vector.insert.nxv8i8.v8i8(, <8 x i8>, i64) +declare @llvm.experimental.vector.insert.nxv4i16.v4i16(, <4 x i16>, i64) +declare @llvm.experimental.vector.insert.nxv2i32.v2i32(, <2 x i32>, i64) + +declare @llvm.experimental.vector.insert.nxv2i32.v8i32(, <8 x i32>, i64) + +attributes #0 = { nounwind "target-features"="+sve" } From 99c7e918b5ea2262635cc5f80b8887e487227638 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 10 Jun 2021 17:31:30 -0400 Subject: [PATCH 432/619] GlobalISel: Use LLT in call lowering callbacks This preserves the memory type so the lowerings can rely on them. --- .../llvm/CodeGen/GlobalISel/CallLowering.h | 12 +++--- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 17 ++++---- .../AArch64/GISel/AArch64CallLowering.cpp | 42 +++++++++---------- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 19 ++++----- llvm/lib/Target/ARM/ARMCallLowering.cpp | 22 ++++------ .../Target/M68k/GlSel/M68kCallLowering.cpp | 4 +- llvm/lib/Target/M68k/GlSel/M68kCallLowering.h | 2 +- .../Target/PowerPC/GISel/PPCCallLowering.cpp | 11 ++--- .../Target/PowerPC/GISel/PPCCallLowering.h | 2 +- llvm/lib/Target/X86/X86CallLowering.cpp | 9 ++-- .../GlobalISel/call-translator-tail-call.ll | 8 ++-- .../AMDGPU/GlobalISel/irtranslator-call.ll | 10 ++--- .../GlobalISel/irtranslator-function-args.ll | 8 ++-- .../GlobalISel/irtranslator-callingconv.ll | 8 ++-- 14 files changed, 80 insertions(+), 94 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 7b45c4dd293a1..cf10fe6cfe07c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -236,7 +236,7 @@ class CallLowering { /// direct SP manipulation, depending on the context. \p MPO /// should be initialized to an appropriate description of the /// address created. - virtual Register getStackAddress(uint64_t Size, int64_t Offset, + virtual Register getStackAddress(uint64_t MemSize, int64_t Offset, MachinePointerInfo &MPO, ISD::ArgFlagsTy Flags) = 0; @@ -245,8 +245,8 @@ class CallLowering { /// /// This is overridable primarily for targets to maintain compatibility with /// hacks around the existing DAG call lowering infrastructure. - virtual uint64_t getStackValueStoreSize(const DataLayout &DL, - const CCValAssign &VA) const; + virtual LLT getStackValueStoreType(const DataLayout &DL, + const CCValAssign &VA) const; /// The specified value has been assigned to a physical register, /// handle the appropriate COPY (either to or from) and mark any @@ -258,17 +258,17 @@ class CallLowering { /// location. Load or store it there, with appropriate extension /// if necessary. virtual void assignValueToAddress(Register ValVReg, Register Addr, - uint64_t Size, MachinePointerInfo &MPO, + LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) = 0; /// An overload which takes an ArgInfo if additional information about the /// arg is needed. \p ValRegIndex is the index in \p Arg.Regs for the value /// to store. virtual void assignValueToAddress(const ArgInfo &Arg, unsigned ValRegIndex, - Register Addr, uint64_t Size, + Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) { - assignValueToAddress(Arg.Regs[ValRegIndex], Addr, Size, MPO, VA); + assignValueToAddress(Arg.Regs[ValRegIndex], Addr, MemTy, MPO, VA); } /// Handle custom values, which may be passed into one or more of \p VAs. diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 2815dae41b7d5..6ac7e31c77b6b 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -682,14 +682,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, // TODO: The memory size may be larger than the value we need to // store. We may need to adjust the offset for big endian targets. - uint64_t MemSize = Handler.getStackValueStoreSize(DL, VA); + LLT MemTy = Handler.getStackValueStoreType(DL, VA); MachinePointerInfo MPO; - Register StackAddr = - Handler.getStackAddress(MemSize, VA.getLocMemOffset(), MPO, Flags); + Register StackAddr = Handler.getStackAddress( + MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags); - Handler.assignValueToAddress(Args[i], Part, StackAddr, MemSize, MPO, - VA); + Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA); continue; } @@ -1016,14 +1015,14 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info, return true; } -uint64_t CallLowering::ValueHandler::getStackValueStoreSize( +LLT CallLowering::ValueHandler::getStackValueStoreType( const DataLayout &DL, const CCValAssign &VA) const { - const EVT ValVT = VA.getValVT(); + const MVT ValVT = VA.getValVT(); if (ValVT != MVT::iPTR) - return ValVT.getStoreSize(); + return LLT(ValVT); /// FIXME: We need to get the correct pointer address space. - return DL.getPointerSize(); + return LLT::pointer(0, DL.getPointerSize(0)); } void CallLowering::ValueHandler::copyArgumentMemory( diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index f87b290dfbfb3..bd76c8d16843a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -66,10 +66,10 @@ static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, } // Account for i1/i8/i16 stack passed value hack -static uint64_t getStackValueStoreSizeHack(const CCValAssign &VA) { +static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { const MVT ValVT = VA.getValVT(); - return (ValVT == MVT::i8 || ValVT == MVT::i16) ? ValVT.getStoreSize() - : VA.getLocVT().getStoreSize(); + return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) + : LLT(VA.getLocVT()); } namespace { @@ -146,9 +146,9 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler { return AddrReg.getReg(0); } - uint64_t getStackValueStoreSize(const DataLayout &, - const CCValAssign &VA) const override { - return getStackValueStoreSizeHack(VA); + LLT getStackValueStoreType(const DataLayout &, + const CCValAssign &VA) const override { + return getStackValueStoreTypeHack(VA); } void assignValueToReg(Register ValVReg, Register PhysReg, @@ -157,7 +157,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler { IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); @@ -170,11 +170,9 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler { if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) std::swap(ValTy, LocTy); - MemSize = LocTy.getSizeInBytes(); - auto MMO = MF.getMachineMemOperand( - MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, - MemSize, inferAlignFromPtrInfo(MF, MPO)); + MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy, + inferAlignFromPtrInfo(MF, MPO)); if (RealRegTy.getSizeInBits() == ValTy.getSizeInBits()) { // No extension information, or no extension necessary. Load into the @@ -264,9 +262,9 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { /// we invert the interpretation of ValVT and LocVT in certain cases. This is /// for compatability with the DAG call lowering implementation, which we're /// currently building on top of. - uint64_t getStackValueStoreSize(const DataLayout &, - const CCValAssign &VA) const override { - return getStackValueStoreSizeHack(VA); + LLT getStackValueStoreType(const DataLayout &, + const CCValAssign &VA) const override { + return getStackValueStoreTypeHack(VA); } void assignValueToReg(Register ValVReg, Register PhysReg, @@ -276,18 +274,18 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { MIRBuilder.buildCopy(PhysReg, ExtReg); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); - auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size, + auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildStore(ValVReg, Addr, *MMO); } void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, - Register Addr, uint64_t MemSize, - MachinePointerInfo &MPO, CCValAssign &VA) override { - unsigned MaxSize = MemSize * 8; + Register Addr, LLT MemTy, MachinePointerInfo &MPO, + CCValAssign &VA) override { + unsigned MaxSize = MemTy.getSizeInBytes() * 8; // For varargs, we always want to extend them to 8 bytes, in which case // we disable setting a max. if (!Arg.IsFixed) @@ -300,7 +298,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { std::swap(ValVT, LocVT); - MemSize = VA.getValVT().getStoreSize(); + MemTy = LLT(VA.getValVT()); } ValVReg = extendRegister(ValVReg, VA, MaxSize); @@ -310,10 +308,10 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { ValVReg = MIRBuilder.buildTrunc(RegTy, ValVReg).getReg(0); } else { // The store does not cover the full allocated stack slot. - MemSize = VA.getValVT().getStoreSize(); + MemTy = LLT(VA.getValVT()); } - assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA); + assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); } MachineInstrBuilder MIB; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 0d3676898c934..4fc3e90a8dabb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -54,7 +54,7 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler { llvm_unreachable("not implemented"); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { llvm_unreachable("not implemented"); } @@ -122,12 +122,12 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler { IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); auto MMO = MF.getMachineMemOperand( - MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemSize, + MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildLoad(ValVReg, Addr, *MMO); } @@ -209,26 +209,25 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler { MIRBuilder.buildCopy(PhysReg, ExtReg); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); uint64_t LocMemOffset = VA.getLocMemOffset(); const auto &ST = MF.getSubtarget(); auto MMO = MF.getMachineMemOperand( - MPO, MachineMemOperand::MOStore, Size, - commonAlignment(ST.getStackAlignment(), LocMemOffset)); + MPO, MachineMemOperand::MOStore, MemTy, + commonAlignment(ST.getStackAlignment(), LocMemOffset)); MIRBuilder.buildStore(ValVReg, Addr, *MMO); } void assignValueToAddress(const CallLowering::ArgInfo &Arg, - unsigned ValRegIndex, Register Addr, - uint64_t MemSize, MachinePointerInfo &MPO, - CCValAssign &VA) override { + unsigned ValRegIndex, Register Addr, LLT MemTy, + MachinePointerInfo &MPO, CCValAssign &VA) override { Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt ? extendRegister(Arg.Regs[ValRegIndex], VA) : Arg.Regs[ValRegIndex]; - assignValueToAddress(ValVReg, Addr, MemSize, MPO, VA); + assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); } }; } diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index ee600477ed9fc..04d70f9ded9ae 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -121,14 +121,11 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler { MIB.addUse(PhysReg, RegState::Implicit); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { - assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && - "Unsupported size"); - Register ExtReg = extendRegister(ValVReg, VA); auto MMO = MIRBuilder.getMF().getMachineMemOperand( - MPO, MachineMemOperand::MOStore, LLT(VA.getLocVT()), Align(1)); + MPO, MachineMemOperand::MOStore, MemTy, Align(1)); MIRBuilder.buildStore(ExtReg, Addr, *MMO); } @@ -249,31 +246,28 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler { .getReg(0); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { - assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && - "Unsupported size"); - if (VA.getLocInfo() == CCValAssign::SExt || VA.getLocInfo() == CCValAssign::ZExt) { // If the value is zero- or sign-extended, its size becomes 4 bytes, so // that's what we should load. - Size = 4; + MemTy = LLT::scalar(32); assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm"); - auto LoadVReg = buildLoad(LLT::scalar(32), Addr, Size, MPO); + auto LoadVReg = buildLoad(LLT::scalar(32), Addr, MemTy, MPO); MIRBuilder.buildTrunc(ValVReg, LoadVReg); } else { // If the value is not extended, a simple load will suffice. - buildLoad(ValVReg, Addr, Size, MPO); + buildLoad(ValVReg, Addr, MemTy, MPO); } } - MachineInstrBuilder buildLoad(const DstOp &Res, Register Addr, uint64_t Size, + MachineInstrBuilder buildLoad(const DstOp &Res, Register Addr, LLT MemTy, MachinePointerInfo &MPO) { MachineFunction &MF = MIRBuilder.getMF(); - auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size, + auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy, inferAlignFromPtrInfo(MF, MPO)); return MIRBuilder.buildLoad(Res, Addr, *MMO); } diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp index d6435e5639021..7628e66d5f6d1 100644 --- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp +++ b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp @@ -74,11 +74,11 @@ void M68kIncomingValueHandler::assignValueToReg(Register ValVReg, void M68kIncomingValueHandler::assignValueToAddress(Register ValVReg, Register Addr, - uint64_t Size, + LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) { MachineFunction &MF = MIRBuilder.getMF(); - auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size, + auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildLoad(ValVReg, Addr, *MMO); } diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h index a9ebb2b0f0aa5..9e0d462db677f 100644 --- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h +++ b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h @@ -54,7 +54,7 @@ struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler { void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign &VA) override; - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override; Register getStackAddress(uint64_t Size, int64_t Offset, diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp index 2fc2c8a213e20..2621d94887efa 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp @@ -86,22 +86,19 @@ void PPCIncomingValueHandler::assignValueToReg(Register ValVReg, } void PPCIncomingValueHandler::assignValueToAddress(Register ValVReg, - Register Addr, uint64_t Size, + Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) { - assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && - "Unsupported size"); - // define a lambda expression to load value auto BuildLoad = [](MachineIRBuilder &MIRBuilder, MachinePointerInfo &MPO, - uint64_t Size, const DstOp &Res, Register Addr) { + LLT MemTy, const DstOp &Res, Register Addr) { MachineFunction &MF = MIRBuilder.getMF(); - auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size, + auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, MemTy, inferAlignFromPtrInfo(MF, MPO)); return MIRBuilder.buildLoad(Res, Addr, *MMO); }; - BuildLoad(MIRBuilder, MPO, Size, ValVReg, Addr); + BuildLoad(MIRBuilder, MPO, MemTy, ValVReg, Addr); } Register PPCIncomingValueHandler::getStackAddress(uint64_t Size, int64_t Offset, diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h index 0d46374d41fa2..b045032bec065 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h +++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h @@ -48,7 +48,7 @@ class PPCIncomingValueHandler : public CallLowering::IncomingValueHandler { void assignValueToReg(Register ValVReg, Register PhysReg, CCValAssign &VA) override; - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override; Register getStackAddress(uint64_t Size, int64_t Offset, diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp index 5dbea3480df91..3549f34f6011e 100644 --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -111,13 +111,12 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler { MIRBuilder.buildCopy(PhysReg, ExtReg); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); Register ExtReg = extendRegister(ValVReg, VA); - auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, - VA.getLocVT().getStoreSize(), + auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildStore(ExtReg, Addr, *MMO); } @@ -186,11 +185,11 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler { .getReg(0); } - void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, + void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); auto *MMO = MF.getMachineMemOperand( - MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, + MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildLoad(ValVReg, Addr, *MMO); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll index 30876eb332659..a0ba7a0d570b0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -66,7 +66,7 @@ define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) { ; DARWIN: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 ; DARWIN: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 ; DARWIN: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) + ; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16) ; DARWIN: $d0 = COPY [[LOAD]](<4 x s16>) ; DARWIN: TCRETURNdi @outgoing_stack_args_fn, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $d0 ; WINDOWS-LABEL: name: test_outgoing_stack_args @@ -81,7 +81,7 @@ define void @test_outgoing_stack_args([8 x <2 x double>], <4 x half> %arg) { ; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 ; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 ; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) + ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16) ; WINDOWS: $d0 = COPY [[LOAD]](<4 x s16>) ; WINDOWS: TCRETURNdi @outgoing_stack_args_fn, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0 tail call void @outgoing_stack_args_fn(<4 x half> %arg) @@ -242,7 +242,7 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) { ; DARWIN: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 ; DARWIN: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 ; DARWIN: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) + ; DARWIN: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16) ; DARWIN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; DARWIN: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 ; DARWIN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 @@ -270,7 +270,7 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) { ; WINDOWS: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6 ; WINDOWS: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7 ; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.0, align 16) + ; WINDOWS: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s16>) from %fixed-stack.0, align 16) ; WINDOWS: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index d5f29b415f940..1ef018955dd76 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -2459,7 +2459,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2551,10 +2551,10 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2644,10 +2644,10 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 { ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) + ; CHECK: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) + ; CHECK: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index d295cf5bbc7d3..5474bd0bd9c0b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1293,7 +1293,7 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK: [[COPY30:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr30 ; CHECK: [[COPY31:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 16, addrspace 5) ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[COPY31]](<2 x s16>), [[LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) @@ -2079,7 +2079,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s8) from %fixed-stack.3, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s1) from %fixed-stack.3, align 16, addrspace 5) ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s32) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s16) from %fixed-stack.2, align 4, addrspace 5) @@ -2265,9 +2265,9 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll index 1ae5136bd4f81..666da92e43383 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -492,7 +492,7 @@ define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) { ; X86: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) ; X86: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2 ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s128) from %fixed-stack.0) + ; X86: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s32>) from %fixed-stack.0) ; X86: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[LOAD]](<4 x s32>) ; X86: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>) @@ -570,21 +570,21 @@ define void @test_abi_exts_call(i8* %addr) { ; X86: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; X86: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) ; X86: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) - ; X86: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1) + ; X86: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s8) into stack) ; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) ; X86: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) - ; X86: G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack, align 1) + ; X86: G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store (s8) into stack) ; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp ; X86: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32) ; X86: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) - ; X86: G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack, align 1) + ; X86: G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store (s8) into stack) ; X86: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp ; X86: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: RET 0 From 8c7349b3f43267036f6c133d3502def86307528a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 29 Jun 2021 23:30:27 +0200 Subject: [PATCH 433/619] [OpaquePtr] Support opaque pointers in intrinsic type check This adds support for opaque pointers in intrinsic type checks of IIT kind Pointer and PtrToElt. This is less straight-forward than it might initially seem, because we should only accept opaque pointers here in --force-opaque-pointers mode. Otherwise, there would be more than one valid type signature for a given intrinsic name. Differential Revision: https://reviews.llvm.org/D105155 --- llvm/include/llvm/IR/LLVMContext.h | 3 +++ llvm/lib/IR/Function.cpp | 27 +++++++++++++++---- llvm/lib/IR/LLVMContext.cpp | 4 +++ .../remangle-intrinsic-opaque-ptr.ll | 20 ++++++++++++++ llvm/test/Other/force-opaque-ptrs.ll | 19 +++++++++++++ 5 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Assembler/remangle-intrinsic-opaque-ptr.ll diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index 3bd889485dd1d..50671db30eeb3 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -305,6 +305,9 @@ class LLVMContext { /// LLVMContext is used by compilation. void setOptPassGate(OptPassGate&); + /// Whether typed pointers are supported. If false, all pointers are opaque. + bool supportsTypedPointers() const; + private: // Module needs access to the add/removeModule methods. friend class Module; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 07c603fd98a6a..8f096f0a69eb4 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1404,9 +1404,21 @@ static bool matchIntrinsicType( } case IITDescriptor::Pointer: { PointerType *PT = dyn_cast(Ty); - return !PT || PT->getAddressSpace() != D.Pointer_AddressSpace || - matchIntrinsicType(PT->getElementType(), Infos, ArgTys, - DeferredChecks, IsDeferredCheck); + if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace) + return true; + if (!PT->isOpaque()) + return matchIntrinsicType(PT->getElementType(), Infos, ArgTys, + DeferredChecks, IsDeferredCheck); + // If typed pointers are supported, do not allow using opaque pointer in + // place of fixed pointer type. This would make the intrinsic signature + // non-unique. + if (Ty->getContext().supportsTypedPointers()) + return true; + // Consume IIT descriptors relating to the pointer element type. + while (Infos.front().Kind == IITDescriptor::Pointer) + Infos = Infos.slice(1); + Infos = Infos.slice(1); + return false; } case IITDescriptor::Struct: { @@ -1517,8 +1529,13 @@ static bool matchIntrinsicType( dyn_cast (ArgTys[D.getArgumentNumber()]); PointerType *ThisArgType = dyn_cast(Ty); - return (!ThisArgType || !ReferenceType || - ThisArgType->getElementType() != ReferenceType->getElementType()); + if (!ThisArgType || !ReferenceType) + return true; + if (!ThisArgType->isOpaque()) + return ThisArgType->getElementType() != ReferenceType->getElementType(); + // If typed pointers are supported, do not allow opaque pointer to ensure + // uniqueness. + return Ty->getContext().supportsTypedPointers(); } case IITDescriptor::VecOfAnyPtrsToElt: { unsigned RefArgNumber = D.getRefArgNumber(); diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index 79002fb1b1bc7..234806b5dce03 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -347,3 +347,7 @@ const DiagnosticHandler *LLVMContext::getDiagHandlerPtr() const { std::unique_ptr LLVMContext::getDiagnosticHandler() { return std::move(pImpl->DiagHandler); } + +bool LLVMContext::supportsTypedPointers() const { + return !pImpl->ForceOpaquePointers; +} diff --git a/llvm/test/Assembler/remangle-intrinsic-opaque-ptr.ll b/llvm/test/Assembler/remangle-intrinsic-opaque-ptr.ll new file mode 100644 index 0000000000000..6f7930ce4e162 --- /dev/null +++ b/llvm/test/Assembler/remangle-intrinsic-opaque-ptr.ll @@ -0,0 +1,20 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s --check-prefix=TYPED +; RUN: llvm-as --force-opaque-pointers < %s | llvm-dis --force-opaque-pointers | FileCheck %s --check-prefix=OPAQUE + +; An opaque pointer type should not be accepted for an intrinsic that +; specifies a fixed pointer type, outside of --force-opaque-pointers mode. + +define void @test() { +; TYPED: Intrinsic has incorrect return type! +; OPAQUE: call ptr @llvm.stacksave() + call ptr @llvm.stacksave() + +; TYPED: Intrinsic has incorrect argument type! +; OPAQUE: call <2 x i64> @llvm.masked.expandload.v2i64(ptr null, <2 x i1> zeroinitializer, <2 x i64> zeroinitializer) + call <2 x i64> @llvm.masked.expandload.v2i64(ptr null, <2 x i1> zeroinitializer, <2 x i64> zeroinitializer) + + ret void +} + +declare ptr @llvm.stacksave() +declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) diff --git a/llvm/test/Other/force-opaque-ptrs.ll b/llvm/test/Other/force-opaque-ptrs.ll index 1a0738dfcd637..ee7c752de4113 100644 --- a/llvm/test/Other/force-opaque-ptrs.ll +++ b/llvm/test/Other/force-opaque-ptrs.ll @@ -48,3 +48,22 @@ define void @f3(i32 addrspace(1)* addrspace(2)* %p) { ; unreachable } + +define void @remangle_intrinsic() { +; CHECK-LABEL: define {{[^@]+}}@remangle_intrinsic() { +; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave() +; CHECK-NEXT: call void @llvm.stackprotector(ptr null, ptr [[A]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.masked.expandload.v2i64(ptr null, <2 x i1> zeroinitializer, <2 x i64> zeroinitializer) +; CHECK-NEXT: ret void +; + %a = alloca i8* + call i8* @llvm.stacksave() + call void @llvm.stackprotector(i8* null, i8** %a) + call <2 x i64> @llvm.masked.expandload.v2i64(i64* null, <2 x i1> zeroinitializer, <2 x i64> zeroinitializer) + ret void +} + +declare i8* @llvm.stacksave() +declare void @llvm.stackprotector(i8*, i8**) +declare <2 x i64> @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>) From 266a7414d8f2643be2b1dad86693b12a9f1246fa Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Wed, 30 Jun 2021 14:41:10 -0700 Subject: [PATCH 434/619] [mlir][sparse] move tensor expression builder into Merger utility Rationale: Follow-up on migrating lattice and tensor expression related methods into the new utility. This also prepares the next step of generalizing the op kinds that are handled. Reviewed By: gussmith23 Differential Revision: https://reviews.llvm.org/D105219 --- .../mlir/Dialect/SparseTensor/Utils/Merger.h | 18 ++- .../Transforms/Sparsification.cpp | 55 +-------- .../Dialect/SparseTensor/Utils/CMakeLists.txt | 1 + .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 114 +++++++++++++----- 4 files changed, 104 insertions(+), 84 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index cbb0aede83f81..d087e98ac42f3 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -13,6 +13,7 @@ #ifndef MLIR_DIALECT_SPARSETENSOR_UTILS_MERGER_H_ #define MLIR_DIALECT_SPARSETENSOR_UTILS_MERGER_H_ +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/IR/Value.h" #include "llvm/ADT/BitVector.h" @@ -148,11 +149,6 @@ class Merger { /// Returns true if any set bit corresponds to queried dim. bool hasAnyDimOf(const llvm::BitVector &bits, Dim d) const; - /// Builds the iteration lattices in a bottom-up traversal given the remaining - /// tensor (sub)expression and the next loop index in the iteration graph. - /// Returns index of the root expression. - unsigned buildLattices(unsigned exp, unsigned idx); - /// Setter void setDim(unsigned t, unsigned i, Dim d) { dims[t][i] = d; } @@ -169,7 +165,19 @@ class Merger { void dumpBits(const llvm::BitVector &bits) const; #endif + /// Builds the iteration lattices in a bottom-up traversal given the remaining + /// tensor (sub)expression and the next loop index in the iteration graph. + /// Returns index of the root expression. + unsigned buildLattices(unsigned exp, unsigned idx); + + /// Builds a tensor expression from the given Linalg operation. + /// Returns index of the root expression on success. + Optional buildTensorExpFromLinalg(linalg::GenericOp op); + private: + /// Traverses the SSA tree (possibly a DAG) to build a tensor expression. + Optional buildTensorExp(linalg::GenericOp op, Value val); + const unsigned outTensor; const unsigned syntheticTensor; const unsigned numTensors; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index dd8d4967f1325..0409a7eabdfb7 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -208,51 +208,6 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op, return true; } -/// Traverses the SSA tree (possibly a DAG) to build a tensor expression. -/// This simplifies constructing (sub)expressions during iteration lattice -/// building (compared to using the SSA representation everywhere). -static Optional buildTensorExp(Merger &merger, linalg::GenericOp op, - Value val) { - if (auto arg = val.dyn_cast()) { - unsigned argN = arg.getArgNumber(); - // Any argument of the generic op that is not marked as a scalar - // argument is considered a tensor, indexed by the implicit loop - // bounds. This includes rank-0 tensor arguments. - if (arg.getOwner()->getParentOp() == op) { - OpOperand *t = op.getInputAndOutputOperands()[argN]; - if (!op.isScalar(t)) - return merger.addExp(Kind::kTensor, argN); - val = t->get(); // get scalar value - } - // Any other argument (marked as scalar argument for the generic op - // or belonging to an enveloping op) is considered invariant. - return merger.addExp(Kind::kInvariant, val); - } - Operation *def = val.getDefiningOp(); - if (def->getBlock() != &op.region().front()) { - // Something defined outside is invariant. - return merger.addExp(Kind::kInvariant, val); - } else if (def->getNumOperands() == 2) { - // Construct binary operations if subexpressions could be built. - auto x = buildTensorExp(merger, op, def->getOperand(0)); - auto y = buildTensorExp(merger, op, def->getOperand(1)); - if (x.hasValue() && y.hasValue()) { - unsigned e0 = x.getValue(); - unsigned e1 = y.getValue(); - if (isa(def)) - return merger.addExp(Kind::kMulF, e0, e1); - if (isa(def)) - return merger.addExp(Kind::kMulI, e0, e1); - if (isa(def)) - return merger.addExp(Kind::kAddF, e0, e1); - if (isa(def)) - return merger.addExp(Kind::kAddI, e0, e1); - } - } - // Cannot build (yet). - return None; -} - /// Returns true if given tensor co-iterates with conjunction only. /// For the output tensor, this defines a "simply dynamic" operation. /// For instance: A(I) = A(I) * B(I) * C(I) @@ -1224,14 +1179,12 @@ struct GenericOpSparsifier : public OpRewritePattern { !computeIterationGraph(merger, op, topSort, /*sparseOnly=*/true)) return failure(); - // Finds the terminating yield statement and builds the tensor - // expression for the Linalg operation in SSA form. - Operation *yield = op.region().front().getTerminator(); - Optional exp = buildTensorExp(merger, op, yield->getOperand(0)); + // Builds the tensor expression for the Linalg operation in SSA form. + Optional exp = merger.buildTensorExpFromLinalg(op); if (!exp.hasValue()) - return failure(); // build failure + return failure(); - // Reject an inadmissable tensor expression. + // Rejects an inadmissable tensor expression. if (!isAdmissableTensorExp(merger, op, exp.getValue())) return failure(); diff --git a/mlir/lib/Dialect/SparseTensor/Utils/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Utils/CMakeLists.txt index bfd614cb8df4f..cbb82cb83d72c 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Utils/CMakeLists.txt @@ -6,4 +6,5 @@ add_mlir_dialect_library(MLIRSparseTensorUtils LINK_LIBS PUBLIC MLIRIR + MLIRLinalg ) diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 3d63246e950fa..0c869be07a125 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -14,6 +14,10 @@ namespace mlir { namespace sparse_tensor { +// +// Lattice methods. +// + unsigned Merger::addExp(Kind k, unsigned e0, unsigned e1, Value v) { unsigned e = tensorExps.size(); tensorExps.push_back(TensorExp(k, e0, e1, v)); @@ -68,7 +72,7 @@ unsigned Merger::optimizeSet(unsigned s0) { if (p0 != p1) { // Is this a straightforward copy? unsigned e = latPoints[p1].exp; - if (exp(e).kind == Kind::kTensor && exp(e).e0 == outTensor) + if (tensorExps[e].kind == Kind::kTensor && tensorExps[e].e0 == outTensor) continue; // Conjunction already covered? for (unsigned p2 : latSets[s]) { @@ -137,33 +141,6 @@ bool Merger::hasAnyDimOf(const llvm::BitVector &bits, Dim d) const { return false; } -unsigned Merger::buildLattices(unsigned e, unsigned idx) { - Kind kind = exp(e).kind; - if (kind == Kind::kTensor || kind == Kind::kInvariant) { - // Either the index is really used in the tensor expression, or it is - // set to the undefined index in that dimension. An invariant expression - // is set to a synthetic tensor with undefined indices only. - unsigned s = addSet(); - unsigned t = kind == Kind::kTensor ? exp(e).e0 : syntheticTensor; - set(s).push_back(addLat(t, idx, e)); - return s; - } - unsigned s0 = buildLattices(exp(e).e0, idx); - unsigned s1 = buildLattices(exp(e).e1, idx); - switch (kind) { - case Kind::kTensor: - case Kind::kInvariant: - llvm_unreachable("handled above"); - case Kind::kMulF: - case Kind::kMulI: - return takeConj(kind, s0, s1); - case Kind::kAddF: - case Kind::kAddI: - return takeDisj(kind, s0, s1); - } - llvm_unreachable("unexpected expression kind"); -} - #ifndef NDEBUG // @@ -173,6 +150,10 @@ unsigned Merger::buildLattices(unsigned e, unsigned idx) { void Merger::dumpExp(unsigned e) const { switch (tensorExps[e].kind) { case Kind::kTensor: + if (tensorExps[e].e0 == syntheticTensor) + llvm::dbgs() << "synthetic_"; + else if (tensorExps[e].e0 == outTensor) + llvm::dbgs() << "output_"; llvm::dbgs() << "tensor_" << tensorExps[e].e0; break; case Kind::kInvariant: @@ -242,5 +223,82 @@ void Merger::dumpBits(const llvm::BitVector &bits) const { #endif // NDEBUG +// +// Builder methods. +// + +unsigned Merger::buildLattices(unsigned e, unsigned idx) { + Kind kind = tensorExps[e].kind; + if (kind == Kind::kTensor || kind == Kind::kInvariant) { + // Either the index is really used in the tensor expression, or it is + // set to the undefined index in that dimension. An invariant expression + // is set to a synthetic tensor with undefined indices only. + unsigned s = addSet(); + unsigned t = kind == Kind::kTensor ? tensorExps[e].e0 : syntheticTensor; + latSets[s].push_back(addLat(t, idx, e)); + return s; + } + unsigned s0 = buildLattices(tensorExps[e].e0, idx); + unsigned s1 = buildLattices(tensorExps[e].e1, idx); + switch (kind) { + case Kind::kTensor: + case Kind::kInvariant: + llvm_unreachable("handled above"); + case Kind::kMulF: + case Kind::kMulI: + return takeConj(kind, s0, s1); + case Kind::kAddF: + case Kind::kAddI: + return takeDisj(kind, s0, s1); + } + llvm_unreachable("unexpected expression kind"); +} + +Optional Merger::buildTensorExpFromLinalg(linalg::GenericOp op) { + Operation *yield = op.region().front().getTerminator(); + return buildTensorExp(op, yield->getOperand(0)); +} + +Optional Merger::buildTensorExp(linalg::GenericOp op, Value val) { + if (auto arg = val.dyn_cast()) { + unsigned argN = arg.getArgNumber(); + // Any argument of the generic op that is not marked as a scalar + // argument is considered a tensor, indexed by the implicit loop + // bounds. This includes rank-0 tensor arguments. + if (arg.getOwner()->getParentOp() == op) { + OpOperand *t = op.getInputAndOutputOperands()[argN]; + if (!op.isScalar(t)) + return addExp(Kind::kTensor, argN); + val = t->get(); // get scalar value + } + // Any other argument (marked as scalar argument for the generic op + // or belonging to an enveloping op) is considered invariant. + return addExp(Kind::kInvariant, val); + } + // Something defined outside is invariant. + Operation *def = val.getDefiningOp(); + if (def->getBlock() != &op.region().front()) + return addExp(Kind::kInvariant, val); + // Construct binary operations if subexpressions could be built. + if (def->getNumOperands() == 2) { + auto x = buildTensorExp(op, def->getOperand(0)); + auto y = buildTensorExp(op, def->getOperand(1)); + if (x.hasValue() && y.hasValue()) { + unsigned e0 = x.getValue(); + unsigned e1 = y.getValue(); + if (isa(def)) + return addExp(Kind::kMulF, e0, e1); + if (isa(def)) + return addExp(Kind::kMulI, e0, e1); + if (isa(def)) + return addExp(Kind::kAddF, e0, e1); + if (isa(def)) + return addExp(Kind::kAddI, e0, e1); + } + } + // Cannot build. + return None; +} + } // namespace sparse_tensor } // namespace mlir From 369216ab3132623e98c8c974ded915803f080dcf Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 1 Jul 2021 12:31:45 -0400 Subject: [PATCH 435/619] [OpenMP][Offloading] Refined return value of `DeviceTy::getOrAllocTgtPtr` `DeviceTy::getOrAllocTgtPtr` just returns a target pointer. In addition, two bool values (`IsNew` and `IsHostPtr`) are passed by reference to make the change in the function available in callee. In this patch, a struct, which contains the target pointer, two flags, and an iterator to the map table entry corresponding to the queried host pointer, will be returned. In addition to make the logic clearer regarding the two bool values, this paves the way for the next patch to fix the data race in `bug49334.cpp` by attaching an event to the map table entry (and that's why we need the iterator). Reviewed By: grokos Differential Revision: https://reviews.llvm.org/D104382 --- openmp/libomptarget/src/device.cpp | 69 ++++++++++++++------------- openmp/libomptarget/src/device.h | 27 +++++++++-- openmp/libomptarget/src/omptarget.cpp | 25 +++++----- 3 files changed, 73 insertions(+), 48 deletions(-) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 36bf23d41bfd1..b1efb3dc6a7c8 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -191,50 +191,55 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) { } // Used by targetDataBegin -// Return the target pointer begin (where the data will be moved). +// Return a struct containing target pointer begin (where the data will be +// moved). // Allocate memory if this is the first occurrence of this mapping. // Increment the reference counter. -// If NULL is returned, then either data allocation failed or the user tried -// to do an illegal mapping. -void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, - int64_t Size, map_var_info_t HstPtrName, - bool &IsNew, bool &IsHostPtr, bool IsImplicit, - bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier) { - void *rc = NULL; - IsHostPtr = false; - IsNew = false; +// If the target pointer is NULL, then either data allocation failed or the user +// tried to do an illegal mapping. +// The returned struct also returns an iterator to the map table entry +// corresponding to the host pointer (if exists), and two flags indicating +// whether the entry is just created, and if the target pointer included is +// actually a host pointer (when unified memory enabled). +TargetPointerResultTy +DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, + map_var_info_t HstPtrName, bool IsImplicit, + bool UpdateRefCount, bool HasCloseModifier, + bool HasPresentModifier) { + void *TargetPointer = NULL; + bool IsNew = false; + bool IsHostPtr = false; DataMapMtx.lock(); - LookupResult lr = lookupMapping(HstPtrBegin, Size); + LookupResult LR = lookupMapping(HstPtrBegin, Size); + auto Entry = LR.Entry; // Check if the pointer is contained. // If a variable is mapped to the device manually by the user - which would // lead to the IsContained flag to be true - then we must ensure that the // device address is returned even under unified memory conditions. - if (lr.Flags.IsContained || - ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) { - auto &HT = *lr.Entry; - IsNew = false; + if (LR.Flags.IsContained || + ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) { + auto &HT = *LR.Entry; if (UpdateRefCount) HT.incRefCount(); - uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " "Size=%" PRId64 ", RefCount=%s (%s), Name=%s\n", - (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(tp), + (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size, HT.refCountToStr().c_str(), UpdateRefCount ? "incremented" : "update suppressed", (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); - rc = (void *)tp; - } else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) { + TargetPointer = (void *)Ptr; + } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) { // Explicit extension of mapped data - not allowed. MESSAGE("explicit extension not allowed: host address specified is " DPxMOD " (%" PRId64 " bytes), but device allocation maps to host at " DPxMOD " (%" PRId64 " bytes)", - DPxPTR(HstPtrBegin), Size, DPxPTR(lr.Entry->HstPtrBegin), - lr.Entry->HstPtrEnd - lr.Entry->HstPtrBegin); + DPxPTR(HstPtrBegin), Size, DPxPTR(Entry->HstPtrBegin), + Entry->HstPtrEnd - Entry->HstPtrBegin); if (HasPresentModifier) MESSAGE("device mapping required by 'present' map type modifier does not " "exist for host address " DPxMOD " (%" PRId64 " bytes)", @@ -252,7 +257,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, "memory\n", DPxPTR((uintptr_t)HstPtrBegin), Size); IsHostPtr = true; - rc = HstPtrBegin; + TargetPointer = HstPtrBegin; } } else if (HasPresentModifier) { DP("Mapping required by 'present' map type modifier does not exist for " @@ -264,24 +269,22 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, } else if (Size) { // If it is not contained and Size > 0, we should create a new entry for it. IsNew = true; - uintptr_t tp = (uintptr_t)allocData(Size, HstPtrBegin); - const HostDataToTargetTy &newEntry = - *HostDataToTargetMap - .emplace((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, - (uintptr_t)HstPtrBegin + Size, tp, HstPtrName) - .first; + uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin); + Entry = HostDataToTargetMap + .emplace((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, + (uintptr_t)HstPtrBegin + Size, Ptr, HstPtrName) + .first; INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, "Creating new map entry with " "HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, " "RefCount=%s, Name=%s\n", - DPxPTR(HstPtrBegin), DPxPTR(tp), Size, - newEntry.refCountToStr().c_str(), + DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size, Entry->refCountToStr().c_str(), (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); - rc = (void *)tp; + TargetPointer = (void *)Ptr; } DataMapMtx.unlock(); - return rc; + return {{IsNew, IsHostPtr}, Entry, TargetPointer}; } // Used by targetDataBegin, targetDataEnd, targetDataUpdate and target. diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h index 69fc65d983d5c..3aa7f0b73c4f9 100644 --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -128,6 +128,23 @@ struct LookupResult { LookupResult() : Flags({0, 0, 0}), Entry() {} }; +/// This struct will be returned by \p DeviceTy::getOrAllocTgtPtr which provides +/// more data than just a target pointer. +struct TargetPointerResultTy { + struct { + /// If the map table entry is just created + unsigned IsNewEntry : 1; + /// If the pointer is actually a host pointer (when unified memory enabled) + unsigned IsHostPointer : 1; + } Flags = {0, 0}; + + /// The iterator to the corresponding map table entry + HostDataToTargetListTy::iterator MapTableEntry{}; + + /// The corresponding target pointer + void *TargetPointer = nullptr; +}; + /// Map for shadow pointers struct ShadowPtrValTy { void *HstPtrVal; @@ -179,10 +196,12 @@ struct DeviceTy { uint64_t getMapEntryRefCnt(void *HstPtrBegin); LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); - void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, - map_var_info_t HstPtrName, bool &IsNew, - bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount, - bool HasCloseModifier, bool HasPresentModifier); + TargetPointerResultTy getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, + int64_t Size, + map_var_info_t HstPtrName, + bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, + bool HasPresentModifier); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, bool UpdateRefCount, bool &IsHostPtr, diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index dcc1f61dff327..e187e3d650d60 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -458,7 +458,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, // Address of pointer on the host and device, respectively. void *Pointer_HstPtrBegin, *PointerTgtPtrBegin; - bool IsNew, Pointer_IsNew; + TargetPointerResultTy Pointer_TPR; bool IsHostPtr = false; bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT; // Force the creation of a device side copy of the data when: @@ -487,10 +487,11 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, // entry for a global that might not already be allocated by the time the // PTR_AND_OBJ entry is handled below, and so the allocation might fail // when HasPresentModifier. - PointerTgtPtrBegin = Device.getOrAllocTgtPtr( - HstPtrBase, HstPtrBase, sizeof(void *), nullptr, Pointer_IsNew, - IsHostPtr, IsImplicit, UpdateRef, HasCloseModifier, - HasPresentModifier); + Pointer_TPR = Device.getOrAllocTgtPtr( + HstPtrBase, HstPtrBase, sizeof(void *), nullptr, IsImplicit, + UpdateRef, HasCloseModifier, HasPresentModifier); + PointerTgtPtrBegin = Pointer_TPR.TargetPointer; + IsHostPtr = Pointer_TPR.Flags.IsHostPointer; if (!PointerTgtPtrBegin) { REPORT("Call to getOrAllocTgtPtr returned null pointer (%s).\n", HasPresentModifier ? "'present' map type modifier" @@ -500,7 +501,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" "\n", sizeof(void *), DPxPTR(PointerTgtPtrBegin), - (Pointer_IsNew ? "" : " not")); + (Pointer_TPR.Flags.IsNewEntry ? "" : " not")); Pointer_HstPtrBegin = HstPtrBase; // modify current entry. HstPtrBase = *(void **)HstPtrBase; @@ -510,9 +511,11 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, (!FromMapper || i != 0); // subsequently update ref count of pointee } - void *TgtPtrBegin = Device.getOrAllocTgtPtr( - HstPtrBegin, HstPtrBase, data_size, HstPtrName, IsNew, IsHostPtr, - IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier); + auto TPR = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, data_size, + HstPtrName, IsImplicit, UpdateRef, + HasCloseModifier, HasPresentModifier); + void *TgtPtrBegin = TPR.TargetPointer; + IsHostPtr = TPR.Flags.IsHostPointer; // If data_size==0, then the argument could be a zero-length pointer to // NULL, so getOrAlloc() returning NULL is not an error. if (!TgtPtrBegin && (data_size || HasPresentModifier)) { @@ -523,7 +526,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, } DP("There are %" PRId64 " bytes allocated at target address " DPxMOD " - is%s new\n", - data_size, DPxPTR(TgtPtrBegin), (IsNew ? "" : " not")); + data_size, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not")); if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) { uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase; @@ -536,7 +539,7 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, bool copy = false; if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || HasCloseModifier) { - if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { + if (TPR.Flags.IsNewEntry || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { copy = true; } else if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { From c605dfcfc0c1c1605d254b00536351e2e8a87e19 Mon Sep 17 00:00:00 2001 From: Arjun P Date: Thu, 1 Jul 2021 20:12:56 +0530 Subject: [PATCH 436/619] [MLIR] FlatAffineConstraints: Use Matrix objects to store the constraints This results in significant deduplication of code. This patch is not expected to change any functionality, it's just some simplification in preparation for future work. Also slightly simplified some code that was being touched anyway and added some unit tests for some functions that were touched. Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D105152 --- mlir/include/mlir/Analysis/AffineStructures.h | 80 ++---- .../include/mlir/Analysis/Presburger/Matrix.h | 77 +++++- mlir/lib/Analysis/AffineStructures.cpp | 254 ++++-------------- mlir/lib/Analysis/Presburger/Matrix.cpp | 128 ++++++++- .../Analysis/AffineStructuresTest.cpp | 40 +++ .../Analysis/Presburger/MatrixTest.cpp | 77 ++++++ 6 files changed, 385 insertions(+), 271 deletions(-) diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 6aa0a38243b97..e96d52c225069 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -67,13 +67,12 @@ class FlatAffineConstraints { unsigned numReservedCols, unsigned numDims = 0, unsigned numSymbols = 0, unsigned numLocals = 0, ArrayRef> idArgs = {}) - : numReservedCols(numReservedCols), numDims(numDims), - numSymbols(numSymbols) { - assert(numReservedCols >= numDims + numSymbols + 1); - assert(idArgs.empty() || idArgs.size() == numDims + numSymbols + numLocals); - equalities.reserve(numReservedCols * numReservedEqualities); - inequalities.reserve(numReservedCols * numReservedInequalities); - numIds = numDims + numSymbols + numLocals; + : numIds(numDims + numSymbols + numLocals), numDims(numDims), + numSymbols(numSymbols), + equalities(0, numIds + 1, numReservedEqualities, numReservedCols), + inequalities(0, numIds + 1, numReservedInequalities, numReservedCols) { + assert(numReservedCols >= numIds + 1); + assert(idArgs.empty() || idArgs.size() == numIds); ids.reserve(numReservedCols); if (idArgs.empty()) ids.resize(numIds, None); @@ -86,17 +85,11 @@ class FlatAffineConstraints { FlatAffineConstraints(unsigned numDims = 0, unsigned numSymbols = 0, unsigned numLocals = 0, ArrayRef> idArgs = {}) - : numReservedCols(numDims + numSymbols + numLocals + 1), numDims(numDims), - numSymbols(numSymbols) { - assert(numReservedCols >= numDims + numSymbols + 1); - assert(idArgs.empty() || idArgs.size() == numDims + numSymbols + numLocals); - numIds = numDims + numSymbols + numLocals; - ids.reserve(numIds); - if (idArgs.empty()) - ids.resize(numIds, None); - else - ids.append(idArgs.begin(), idArgs.end()); - } + : FlatAffineConstraints(/*numReservedInequalities=*/0, + /*numReservedEqualities=*/0, + /*numReservedCols=*/numDims + numSymbols + + numLocals + 1, + numDims, numSymbols, numLocals, idArgs) {} /// Return a system with no constraints, i.e., one which is satisfied by all /// points. @@ -113,8 +106,6 @@ class FlatAffineConstraints { /// Creates an affine constraint system from an IntegerSet. explicit FlatAffineConstraints(IntegerSet set); - FlatAffineConstraints(const FlatAffineConstraints &other); - FlatAffineConstraints(ArrayRef avmRef, IntegerSet set); @@ -173,51 +164,38 @@ class FlatAffineConstraints { std::unique_ptr clone() const; /// Returns the value at the specified equality row and column. - inline int64_t atEq(unsigned i, unsigned j) const { - return equalities[i * numReservedCols + j]; - } - inline int64_t &atEq(unsigned i, unsigned j) { - return equalities[i * numReservedCols + j]; - } + inline int64_t atEq(unsigned i, unsigned j) const { return equalities(i, j); } + inline int64_t &atEq(unsigned i, unsigned j) { return equalities(i, j); } inline int64_t atIneq(unsigned i, unsigned j) const { - return inequalities[i * numReservedCols + j]; + return inequalities(i, j); } - inline int64_t &atIneq(unsigned i, unsigned j) { - return inequalities[i * numReservedCols + j]; - } + inline int64_t &atIneq(unsigned i, unsigned j) { return inequalities(i, j); } /// Returns the number of columns in the constraint system. inline unsigned getNumCols() const { return numIds + 1; } - inline unsigned getNumEqualities() const { - assert(equalities.size() % numReservedCols == 0 && - "inconsistent equality buffer size"); - return equalities.size() / numReservedCols; - } + inline unsigned getNumEqualities() const { return equalities.getNumRows(); } inline unsigned getNumInequalities() const { - assert(inequalities.size() % numReservedCols == 0 && - "inconsistent inequality buffer size"); - return inequalities.size() / numReservedCols; + return inequalities.getNumRows(); } inline unsigned getNumReservedEqualities() const { - return equalities.capacity() / numReservedCols; + return equalities.getNumReservedRows(); } inline unsigned getNumReservedInequalities() const { - return inequalities.capacity() / numReservedCols; + return inequalities.getNumReservedRows(); } inline ArrayRef getEquality(unsigned idx) const { - return ArrayRef(&equalities[idx * numReservedCols], getNumCols()); + return equalities.getRow(idx); } inline ArrayRef getInequality(unsigned idx) const { - return ArrayRef(&inequalities[idx * numReservedCols], - getNumCols()); + return inequalities.getRow(idx); } /// Adds constraints (lower and upper bounds) for the specified 'affine.for' @@ -649,16 +627,6 @@ class FlatAffineConstraints { /// arrays as needed. void removeIdRange(unsigned idStart, unsigned idLimit); - /// Coefficients of affine equalities (in == 0 form). - SmallVector equalities; - - /// Coefficients of affine inequalities (in >= 0 form). - SmallVector inequalities; - - /// Number of columns reserved. Actual ones in used are returned by - /// getNumCols(). - unsigned numReservedCols; - /// Total number of identifiers. unsigned numIds; @@ -669,6 +637,12 @@ class FlatAffineConstraints { /// analysis). unsigned numSymbols; + /// Coefficients of affine equalities (in == 0 form). + Matrix equalities; + + /// Coefficients of affine inequalities (in >= 0 form). + Matrix inequalities; + /// Values corresponding to the (column) identifiers of this constraint /// system appearing in the order the identifiers correspond to columns. /// Temporary ones or those that aren't associated to any Value are set to diff --git a/mlir/include/mlir/Analysis/Presburger/Matrix.h b/mlir/include/mlir/Analysis/Presburger/Matrix.h index 8ed40bb9c0266..0561aa920d145 100644 --- a/mlir/include/mlir/Analysis/Presburger/Matrix.h +++ b/mlir/include/mlir/Analysis/Presburger/Matrix.h @@ -22,16 +22,26 @@ namespace mlir { -/// This is a simple class to represent a resizable matrix. +/// This is a class to represent a resizable matrix. /// -/// The data is stored in the form of a vector of vectors. +/// More columns and rows can be reserved than are currently used. The data is +/// stored as a single 1D array, viewed as a 2D matrix with nRows rows and +/// nReservedColumns columns, stored in row major form. Thus the element at +/// (i, j) is stored at data[i*nReservedColumns + j]. The reserved but unused +/// columns always have all zero values. The reserved rows are just reserved +/// space in the underlying SmallVector's capacity. class Matrix { public: Matrix() = delete; /// Construct a matrix with the specified number of rows and columns. - /// Initially, the values are default initialized. - Matrix(unsigned rows, unsigned columns); + /// The number of reserved rows and columns will be at least the number + /// specified, and will always be sufficient to accomodate the number of rows + /// and columns specified. + /// + /// Initially, the entries are initialized to ero. + Matrix(unsigned rows, unsigned columns, unsigned reservedRows = 0, + unsigned reservedColumns = 0); /// Return the identity matrix of the specified dimension. static Matrix identity(unsigned dimension); @@ -52,9 +62,50 @@ class Matrix { unsigned getNumColumns() const; + /// Return the maximum number of rows/columns that can be added without + /// incurring a reallocation. + unsigned getNumReservedRows() const; + unsigned getNumReservedColumns() const; + + /// Reserve enough space to resize to the specified number of rows without + /// reallocations. + void reserveRows(unsigned rows); + /// Get an ArrayRef corresponding to the specified row. ArrayRef getRow(unsigned row) const; + /// Insert columns having positions pos, pos + 1, ... pos + count - 1. + /// Columns that were at positions 0 to pos - 1 will stay where they are; + /// columns that were at positions pos to nColumns - 1 will be pushed to the + /// right. pos should be at most nColumns. + void insertColumns(unsigned pos, unsigned count); + void insertColumn(unsigned pos); + + /// Insert rows having positions pos, pos + 1, ... pos + count - 1. + /// Rows that were at positions 0 to pos - 1 will stay where they are; + /// rows that were at positions pos to nColumns - 1 will be pushed to the + /// right. pos should be at most nRows. + void insertRows(unsigned pos, unsigned count); + void insertRow(unsigned pos); + + /// Remove the columns having positions pos, pos + 1, ... pos + count - 1. + /// Rows that were at positions 0 to pos - 1 will stay where they are; + /// columns that were at positions pos + count - 1 or later will be pushed to + /// the right. The columns to be deleted must be valid rows: pos + count - 1 + /// must be at most nColumns - 1. + void removeColumns(unsigned pos, unsigned count); + void removeColumn(unsigned pos); + + /// Remove the rows having positions pos, pos + 1, ... pos + count - 1. + /// Rows that were at positions 0 to pos - 1 will stay where they are; + /// rows that were at positions pos + count - 1 or later will be pushed to the + /// right. The rows to be deleted must be valid rows: pos + count - 1 must be + /// at most nRows - 1. + void removeRows(unsigned pos, unsigned count); + void removeRow(unsigned pos); + + void copyRow(unsigned sourceRow, unsigned targetRow); + /// Add `scale` multiples of the source row to the target row. void addToRow(unsigned sourceRow, unsigned targetRow, int64_t scale); @@ -69,14 +120,26 @@ class Matrix { /// initialized. void resizeVertically(unsigned newNRows); + /// Add an extra row at the bottom of the matrix and return its position. + unsigned appendExtraRow(); + /// Print the matrix. void print(raw_ostream &os) const; void dump() const; -private: - unsigned nRows, nColumns; + /// Return whether the Matrix is in a consistent state with all its + /// invariants satisfied. + bool hasConsistentState() const; - /// Stores the data. data.size() is equal to nRows * nColumns. +private: + /// The current number of rows, columns, and reserved columns. The underlying + /// data vector is viewed as an nRows x nReservedColumns matrix, of which the + /// first nColumns columns are currently in use, and the remaining are + /// reserved columns filled with zeros. + unsigned nRows, nColumns, nReservedColumns; + + /// Stores the data. data.size() is equal to nRows * nReservedColumns. + /// data.capacity() / nReservedColumns is the number of reserved rows. SmallVector data; }; diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 8a3b7b6b9a922..7fb82c86b1eed 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -144,32 +144,6 @@ LogicalResult mlir::getFlattenedAffineExprs( // FlatAffineConstraints. //===----------------------------------------------------------------------===// -// Copy constructor. -FlatAffineConstraints::FlatAffineConstraints( - const FlatAffineConstraints &other) { - numReservedCols = other.numReservedCols; - numDims = other.getNumDimIds(); - numSymbols = other.getNumSymbolIds(); - numIds = other.getNumIds(); - - auto otherIds = other.getIds(); - ids.reserve(numReservedCols); - ids.append(otherIds.begin(), otherIds.end()); - - unsigned numReservedEqualities = other.getNumReservedEqualities(); - unsigned numReservedInequalities = other.getNumReservedInequalities(); - - equalities.reserve(numReservedEqualities * numReservedCols); - inequalities.reserve(numReservedInequalities * numReservedCols); - - for (unsigned r = 0, e = other.getNumInequalities(); r < e; r++) { - addInequality(other.getInequality(r)); - } - for (unsigned r = 0, e = other.getNumEqualities(); r < e; r++) { - addEquality(other.getEquality(r)); - } -} - // Clones this object. std::unique_ptr FlatAffineConstraints::clone() const { return std::make_unique(*this); @@ -177,11 +151,10 @@ std::unique_ptr FlatAffineConstraints::clone() const { // Construct from an IntegerSet. FlatAffineConstraints::FlatAffineConstraints(IntegerSet set) - : numReservedCols(set.getNumInputs() + 1), - numIds(set.getNumDims() + set.getNumSymbols()), numDims(set.getNumDims()), - numSymbols(set.getNumSymbols()) { - equalities.reserve(set.getNumEqualities() * numReservedCols); - inequalities.reserve(set.getNumInequalities() * numReservedCols); + : numIds(set.getNumDims() + set.getNumSymbols()), numDims(set.getNumDims()), + numSymbols(set.getNumSymbols()), + equalities(0, numIds + 1, set.getNumEqualities(), numIds + 1), + inequalities(0, numIds + 1, set.getNumInequalities(), numIds + 1) { ids.resize(numIds, None); // Flatten expressions and add them to the constraint system. @@ -217,22 +190,13 @@ void FlatAffineConstraints::reset(unsigned numReservedInequalities, ArrayRef idArgs) { assert(newNumReservedCols >= newNumDims + newNumSymbols + newNumLocals + 1 && "minimum 1 column"); - numReservedCols = newNumReservedCols; - numDims = newNumDims; - numSymbols = newNumSymbols; - numIds = numDims + numSymbols + newNumLocals; - assert(idArgs.empty() || idArgs.size() == numIds); + SmallVector, 8> newIds; + if (!idArgs.empty()) + newIds.assign(idArgs.begin(), idArgs.end()); - clearConstraints(); - if (numReservedEqualities >= 1) - equalities.reserve(newNumReservedCols * numReservedEqualities); - if (numReservedInequalities >= 1) - inequalities.reserve(newNumReservedCols * numReservedInequalities); - if (idArgs.empty()) { - ids.resize(numIds, None); - } else { - ids.assign(idArgs.begin(), idArgs.end()); - } + *this = FlatAffineConstraints(numReservedInequalities, numReservedEqualities, + newNumReservedCols, newNumDims, newNumSymbols, + newNumLocals, newIds); } void FlatAffineConstraints::reset(unsigned newNumDims, unsigned newNumSymbols, @@ -247,10 +211,9 @@ void FlatAffineConstraints::append(const FlatAffineConstraints &other) { assert(other.getNumDimIds() == getNumDimIds()); assert(other.getNumSymbolIds() == getNumSymbolIds()); - inequalities.reserve(inequalities.size() + - other.getNumInequalities() * numReservedCols); - equalities.reserve(equalities.size() + - other.getNumEqualities() * numReservedCols); + inequalities.reserveRows(inequalities.getNumRows() + + other.getNumInequalities()); + equalities.reserveRows(equalities.getNumRows() + other.getNumEqualities()); for (unsigned r = 0, e = other.getNumInequalities(); r < e; r++) { addInequality(other.getInequality(r)); @@ -282,17 +245,7 @@ void FlatAffineConstraints::addId(IdKind kind, unsigned pos, Value id) { else assert(pos <= getNumLocalIds()); - unsigned oldNumReservedCols = numReservedCols; - - // Check if a resize is necessary. - if (getNumCols() + 1 > numReservedCols) { - equalities.resize(getNumEqualities() * (getNumCols() + 1)); - inequalities.resize(getNumInequalities() * (getNumCols() + 1)); - numReservedCols++; - } - int absolutePos; - if (kind == IdKind::Dimension) { absolutePos = pos; numDims++; @@ -304,35 +257,8 @@ void FlatAffineConstraints::addId(IdKind kind, unsigned pos, Value id) { } numIds++; - // Note that getNumCols() now will already return the new size, which will be - // at least one. - int numInequalities = static_cast(getNumInequalities()); - int numEqualities = static_cast(getNumEqualities()); - int numCols = static_cast(getNumCols()); - for (int r = numInequalities - 1; r >= 0; r--) { - for (int c = numCols - 2; c >= 0; c--) { - if (c < absolutePos) - atIneq(r, c) = inequalities[r * oldNumReservedCols + c]; - else - atIneq(r, c + 1) = inequalities[r * oldNumReservedCols + c]; - } - atIneq(r, absolutePos) = 0; - } - - for (int r = numEqualities - 1; r >= 0; r--) { - for (int c = numCols - 2; c >= 0; c--) { - // All values in column absolutePositions < absolutePos have the same - // coordinates in the 2-d view of the coefficient buffer. - if (c < absolutePos) - atEq(r, c) = equalities[r * oldNumReservedCols + c]; - else - // Those at absolutePosition >= absolutePos, get a shifted - // absolutePosition. - atEq(r, c + 1) = equalities[r * oldNumReservedCols + c]; - } - // Initialize added dimension to zero. - atEq(r, absolutePos) = 0; - } + inequalities.insertColumn(absolutePos); + equalities.insertColumn(absolutePos); // If an 'id' is provided, insert it; otherwise use None. if (id) @@ -840,9 +766,9 @@ void FlatAffineConstraints::normalizeConstraintsByGCD() { } bool FlatAffineConstraints::hasConsistentState() const { - if (inequalities.size() != getNumInequalities() * numReservedCols) + if (!inequalities.hasConsistentState()) return false; - if (equalities.size() != getNumEqualities() * numReservedCols) + if (!equalities.hasConsistentState()) return false; if (ids.size() != getNumIds()) return false; @@ -923,31 +849,6 @@ static void eliminateFromConstraint(FlatAffineConstraints *constraints, } } -// Remove coefficients in column range [colStart, colLimit) in place. -// This removes in data in the specified column range, and copies any -// remaining valid data into place. -static void shiftColumnsToLeft(FlatAffineConstraints *constraints, - unsigned colStart, unsigned colLimit, - bool isEq) { - assert(colLimit <= constraints->getNumIds()); - if (colLimit <= colStart) - return; - - unsigned numCols = constraints->getNumCols(); - unsigned numRows = isEq ? constraints->getNumEqualities() - : constraints->getNumInequalities(); - unsigned numToEliminate = colLimit - colStart; - for (unsigned r = 0, e = numRows; r < e; ++r) { - for (unsigned c = colLimit; c < numCols; ++c) { - if (isEq) { - constraints->atEq(r, c - numToEliminate) = constraints->atEq(r, c); - } else { - constraints->atIneq(r, c - numToEliminate) = constraints->atIneq(r, c); - } - } - } -} - // Removes identifiers in column range [idStart, idLimit), and copies any // remaining valid data into place, and updates member variables. void FlatAffineConstraints::removeIdRange(unsigned idStart, unsigned idLimit) { @@ -960,11 +861,9 @@ void FlatAffineConstraints::removeIdRange(unsigned idStart, unsigned idLimit) { assert(idStart < numIds && "invalid idStart position"); // TODO: Make 'removeIdRange' a lambda called from here. - // Remove eliminated identifiers from equalities. - shiftColumnsToLeft(this, idStart, idLimit, /*isEq=*/true); - - // Remove eliminated identifiers from inequalities. - shiftColumnsToLeft(this, idStart, idLimit, /*isEq=*/false); + // Remove eliminated identifiers from the constraints.. + equalities.removeColumns(idStart, idLimit - idStart); + inequalities.removeColumns(idStart, idLimit - idStart); // Update members numDims, numSymbols and numIds. unsigned numDimsEliminated = 0; @@ -987,8 +886,6 @@ void FlatAffineConstraints::removeIdRange(unsigned idStart, unsigned idLimit) { numIds = numIds - numColsEliminated; ids.erase(ids.begin() + idStart, ids.begin() + idLimit); - - // No resize necessary. numReservedCols remains the same. } /// Returns the position of the identifier that has the minimum FlatAffineConstraints::getLowerAndUpperBound( @@ -2185,60 +2082,45 @@ LogicalResult FlatAffineConstraints::addSliceBounds(ArrayRef values, void FlatAffineConstraints::addEquality(ArrayRef eq) { assert(eq.size() == getNumCols()); - unsigned offset = equalities.size(); - equalities.resize(equalities.size() + numReservedCols); - std::copy(eq.begin(), eq.end(), equalities.begin() + offset); + unsigned row = equalities.appendExtraRow(); + for (unsigned i = 0, e = eq.size(); i < e; ++i) + equalities(row, i) = eq[i]; } void FlatAffineConstraints::addInequality(ArrayRef inEq) { assert(inEq.size() == getNumCols()); - unsigned offset = inequalities.size(); - inequalities.resize(inequalities.size() + numReservedCols); - std::copy(inEq.begin(), inEq.end(), inequalities.begin() + offset); + unsigned row = inequalities.appendExtraRow(); + for (unsigned i = 0, e = inEq.size(); i < e; ++i) + inequalities(row, i) = inEq[i]; } void FlatAffineConstraints::addConstantLowerBound(unsigned pos, int64_t lb) { assert(pos < getNumCols()); - unsigned offset = inequalities.size(); - inequalities.resize(inequalities.size() + numReservedCols); - std::fill(inequalities.begin() + offset, - inequalities.begin() + offset + getNumCols(), 0); - inequalities[offset + pos] = 1; - inequalities[offset + getNumCols() - 1] = -lb; + unsigned row = inequalities.appendExtraRow(); + inequalities(row, pos) = 1; + inequalities(row, getNumCols() - 1) = -lb; } void FlatAffineConstraints::addConstantUpperBound(unsigned pos, int64_t ub) { assert(pos < getNumCols()); - unsigned offset = inequalities.size(); - inequalities.resize(inequalities.size() + numReservedCols); - std::fill(inequalities.begin() + offset, - inequalities.begin() + offset + getNumCols(), 0); - inequalities[offset + pos] = -1; - inequalities[offset + getNumCols() - 1] = ub; + unsigned row = inequalities.appendExtraRow(); + inequalities(row, pos) = -1; + inequalities(row, getNumCols() - 1) = ub; } void FlatAffineConstraints::addConstantLowerBound(ArrayRef expr, int64_t lb) { - assert(expr.size() == getNumCols()); - unsigned offset = inequalities.size(); - inequalities.resize(inequalities.size() + numReservedCols); - std::fill(inequalities.begin() + offset, - inequalities.begin() + offset + getNumCols(), 0); - std::copy(expr.begin(), expr.end(), inequalities.begin() + offset); - inequalities[offset + getNumCols() - 1] += -lb; + addInequality(expr); + inequalities(inequalities.getNumRows() - 1, getNumCols() - 1) += -lb; } void FlatAffineConstraints::addConstantUpperBound(ArrayRef expr, int64_t ub) { assert(expr.size() == getNumCols()); - unsigned offset = inequalities.size(); - inequalities.resize(inequalities.size() + numReservedCols); - std::fill(inequalities.begin() + offset, - inequalities.begin() + offset + getNumCols(), 0); - for (unsigned i = 0, e = getNumCols(); i < e; i++) { - inequalities[offset + i] = -expr[i]; - } - inequalities[offset + getNumCols() - 1] += ub; + unsigned row = inequalities.appendExtraRow(); + for (unsigned i = 0, e = expr.size(); i < e; ++i) + inequalities(row, i) = -expr[i]; + inequalities(inequalities.getNumRows() - 1, getNumCols() - 1) += ub; } /// Adds a new local identifier as the floordiv of an affine function of other @@ -2311,12 +2193,10 @@ void FlatAffineConstraints::setDimSymbolSeparation(unsigned newSymbolCount) { /// Sets the specified identifier to a constant value. void FlatAffineConstraints::setIdToConstant(unsigned pos, int64_t val) { - unsigned offset = equalities.size(); - equalities.resize(equalities.size() + numReservedCols); - std::fill(equalities.begin() + offset, - equalities.begin() + offset + getNumCols(), 0); - equalities[offset + pos] = 1; - equalities[offset + getNumCols() - 1] = -val; + equalities.resizeVertically(equalities.getNumRows() + 1); + unsigned row = equalities.getNumRows() - 1; + equalities(row, pos) = 1; + equalities(row, getNumCols() - 1) = -val; } /// Sets the specified identifier to a constant value; asserts if the id is not @@ -2330,29 +2210,11 @@ void FlatAffineConstraints::setIdToConstant(Value id, int64_t val) { } void FlatAffineConstraints::removeEquality(unsigned pos) { - unsigned numEqualities = getNumEqualities(); - assert(pos < numEqualities); - unsigned outputIndex = pos * numReservedCols; - unsigned inputIndex = (pos + 1) * numReservedCols; - unsigned numElemsToCopy = (numEqualities - pos - 1) * numReservedCols; - std::copy(equalities.begin() + inputIndex, - equalities.begin() + inputIndex + numElemsToCopy, - equalities.begin() + outputIndex); - assert(equalities.size() >= numReservedCols); - equalities.resize(equalities.size() - numReservedCols); + equalities.removeRow(pos); } void FlatAffineConstraints::removeInequality(unsigned pos) { - unsigned numInequalities = getNumInequalities(); - assert(pos < numInequalities && "invalid position"); - unsigned outputIndex = pos * numReservedCols; - unsigned inputIndex = (pos + 1) * numReservedCols; - unsigned numElemsToCopy = (numInequalities - pos - 1) * numReservedCols; - std::copy(inequalities.begin() + inputIndex, - inequalities.begin() + inputIndex + numElemsToCopy, - inequalities.begin() + outputIndex); - assert(inequalities.size() >= numReservedCols); - inequalities.resize(inequalities.size() - numReservedCols); + inequalities.removeRow(pos); } /// Finds an equality that equates the specified identifier to a constant. @@ -2716,7 +2578,7 @@ void FlatAffineConstraints::removeTrivialRedundancy() { // Detect and mark redundant constraints. SmallVector redunIneq(getNumInequalities(), false); for (unsigned r = 0, e = getNumInequalities(); r < e; r++) { - int64_t *rowStart = inequalities.data() + numReservedCols * r; + int64_t *rowStart = &inequalities(r, 0); auto row = ArrayRef(rowStart, getNumCols()); if (isTriviallyValid(r) || !rowSet.insert(row).second) { redunIneq[r] = true; @@ -2745,21 +2607,13 @@ void FlatAffineConstraints::removeTrivialRedundancy() { } } - auto copyRow = [&](unsigned src, unsigned dest) { - if (src == dest) - return; - for (unsigned c = 0, e = getNumCols(); c < e; c++) { - atIneq(dest, c) = atIneq(src, c); - } - }; - // Scan to get rid of all rows marked redundant, in-place. unsigned pos = 0; - for (unsigned r = 0, e = getNumInequalities(); r < e; r++) { + for (unsigned r = 0, e = getNumInequalities(); r < e; r++) if (!redunIneq[r]) - copyRow(r, pos++); - } - inequalities.resize(numReservedCols * pos); + inequalities.copyRow(r, pos++); + + inequalities.resizeVertically(pos); // TODO: consider doing this for equalities as well, but probably not worth // the savings. @@ -3053,8 +2907,8 @@ void FlatAffineConstraints::projectOut(Value id) { } void FlatAffineConstraints::clearConstraints() { - equalities.clear(); - inequalities.clear(); + equalities.resizeVertically(0); + inequalities.resizeVertically(0); } namespace { diff --git a/mlir/lib/Analysis/Presburger/Matrix.cpp b/mlir/lib/Analysis/Presburger/Matrix.cpp index 4a5a53921548c..1b5861012d3cf 100644 --- a/mlir/lib/Analysis/Presburger/Matrix.cpp +++ b/mlir/lib/Analysis/Presburger/Matrix.cpp @@ -7,11 +7,17 @@ //===----------------------------------------------------------------------===// #include "mlir/Analysis/Presburger/Matrix.h" +#include "llvm/Support/MathExtras.h" namespace mlir { -Matrix::Matrix(unsigned rows, unsigned columns) - : nRows(rows), nColumns(columns), data(nRows * nColumns) {} +Matrix::Matrix(unsigned rows, unsigned columns, unsigned reservedRows, + unsigned reservedColumns) + : nRows(rows), nColumns(columns), + nReservedColumns(std::max(nColumns, reservedColumns)), + data(nRows * nReservedColumns) { + data.reserve(std::max(nRows, reservedRows) * nReservedColumns); +} Matrix Matrix::identity(unsigned dimension) { Matrix matrix(dimension, dimension); @@ -21,15 +27,15 @@ Matrix Matrix::identity(unsigned dimension) { } int64_t &Matrix::at(unsigned row, unsigned column) { - assert(row < getNumRows() && "Row outside of range"); - assert(column < getNumColumns() && "Column outside of range"); - return data[row * nColumns + column]; + assert(row < nRows && "Row outside of range"); + assert(column < nColumns && "Column outside of range"); + return data[row * nReservedColumns + column]; } int64_t Matrix::at(unsigned row, unsigned column) const { - assert(row < getNumRows() && "Row outside of range"); - assert(column < getNumColumns() && "Column outside of range"); - return data[row * nColumns + column]; + assert(row < nRows && "Row outside of range"); + assert(column < nColumns && "Column outside of range"); + return data[row * nReservedColumns + column]; } int64_t &Matrix::operator()(unsigned row, unsigned column) { @@ -44,9 +50,24 @@ unsigned Matrix::getNumRows() const { return nRows; } unsigned Matrix::getNumColumns() const { return nColumns; } +unsigned Matrix::getNumReservedColumns() const { return nReservedColumns; } + +unsigned Matrix::getNumReservedRows() const { + return data.capacity() / nReservedColumns; +} + +void Matrix::reserveRows(unsigned rows) { + data.reserve(rows * nReservedColumns); +} + +unsigned Matrix::appendExtraRow() { + resizeVertically(nRows + 1); + return nRows - 1; +} + void Matrix::resizeVertically(unsigned newNRows) { nRows = newNRows; - data.resize(nRows * nColumns); + data.resize(nRows * nReservedColumns); } void Matrix::swapRows(unsigned row, unsigned otherRow) { @@ -68,7 +89,81 @@ void Matrix::swapColumns(unsigned column, unsigned otherColumn) { } ArrayRef Matrix::getRow(unsigned row) const { - return {&data[row * nColumns], nColumns}; + return {&data[row * nReservedColumns], nColumns}; +} + +void Matrix::insertColumn(unsigned pos) { insertColumns(pos, 1); } +void Matrix::insertColumns(unsigned pos, unsigned count) { + if (count == 0) + return; + assert(pos <= nColumns); + unsigned oldNReservedColumns = nReservedColumns; + if (nColumns + count > nReservedColumns) { + nReservedColumns = llvm::NextPowerOf2(nColumns + count); + data.resize(nRows * nReservedColumns); + } + nColumns += count; + + for (int ri = nRows - 1; ri >= 0; --ri) { + for (int ci = nReservedColumns - 1; ci >= 0; --ci) { + unsigned r = ri; + unsigned c = ci; + int64_t &dest = data[r * nReservedColumns + c]; + if (c >= nColumns) + dest = 0; + else if (c >= pos + count) + dest = data[r * oldNReservedColumns + c - count]; + else if (c >= pos) + dest = 0; + else + dest = data[r * oldNReservedColumns + c]; + } + } +} + +void Matrix::removeColumn(unsigned pos) { removeColumns(pos, 1); } +void Matrix::removeColumns(unsigned pos, unsigned count) { + if (count == 0) + return; + assert(pos + count - 1 < nColumns); + for (unsigned r = 0; r < nRows; ++r) { + for (unsigned c = pos; c < nColumns - count; ++c) + at(r, c) = at(r, c + count); + for (unsigned c = nColumns - count; c < nColumns; ++c) + at(r, c) = 0; + } + nColumns -= count; +} + +void Matrix::insertRow(unsigned pos) { insertRows(pos, 1); } +void Matrix::insertRows(unsigned pos, unsigned count) { + if (count == 0) + return; + + assert(pos <= nRows); + resizeVertically(nRows + count); + for (int r = nRows - 1; r >= int(pos + count); --r) + copyRow(r - count, r); + for (int r = pos + count - 1; r >= int(pos); --r) + for (unsigned c = 0; c < nColumns; ++c) + at(r, c) = 0; +} + +void Matrix::removeRow(unsigned pos) { removeRows(pos, 1); } +void Matrix::removeRows(unsigned pos, unsigned count) { + if (count == 0) + return; + assert(pos + count - 1 <= nRows); + for (unsigned r = pos; r + count < nRows; ++r) + copyRow(r + count, r); + resizeVertically(nRows - count); +} + +void Matrix::copyRow(unsigned sourceRow, unsigned targetRow) { + if (sourceRow == targetRow) + return; + for (unsigned c = 0; c < nColumns; ++c) + at(targetRow, c) = at(sourceRow, c); } void Matrix::addToRow(unsigned sourceRow, unsigned targetRow, int64_t scale) { @@ -76,7 +171,6 @@ void Matrix::addToRow(unsigned sourceRow, unsigned targetRow, int64_t scale) { return; for (unsigned col = 0; col < nColumns; ++col) at(targetRow, col) += scale * at(sourceRow, col); - return; } void Matrix::addToColumn(unsigned sourceColumn, unsigned targetColumn, @@ -102,4 +196,16 @@ void Matrix::print(raw_ostream &os) const { void Matrix::dump() const { print(llvm::errs()); } +bool Matrix::hasConsistentState() const { + if (data.size() != nRows * nReservedColumns) + return false; + if (nColumns > nReservedColumns) + return false; + for (unsigned r = 0; r < nRows; ++r) + for (unsigned c = nColumns; c < nReservedColumns; ++c) + if (data[r * nReservedColumns + c] != 0) + return false; + return true; +} + } // namespace mlir diff --git a/mlir/unittests/Analysis/AffineStructuresTest.cpp b/mlir/unittests/Analysis/AffineStructuresTest.cpp index 2121fec978d88..3ee6f049c9a20 100644 --- a/mlir/unittests/Analysis/AffineStructuresTest.cpp +++ b/mlir/unittests/Analysis/AffineStructuresTest.cpp @@ -547,4 +547,44 @@ TEST(FlatAffineConstraintsTest, removeRedundantConstraintsTest) { } } +TEST(FlatAffineConstraintsTest, addConstantUpperBound) { + FlatAffineConstraints fac = makeFACFromConstraints(2, {}, {}); + fac.addConstantUpperBound(0, 1); + EXPECT_EQ(fac.atIneq(0, 0), -1); + EXPECT_EQ(fac.atIneq(0, 1), 0); + EXPECT_EQ(fac.atIneq(0, 2), 1); + + fac.addConstantUpperBound({1, 2, 3}, 1); + EXPECT_EQ(fac.atIneq(1, 0), -1); + EXPECT_EQ(fac.atIneq(1, 1), -2); + EXPECT_EQ(fac.atIneq(1, 2), -2); +} + +TEST(FlatAffineConstraintsTest, addConstantLowerBound) { + FlatAffineConstraints fac = makeFACFromConstraints(2, {}, {}); + fac.addConstantLowerBound(0, 1); + EXPECT_EQ(fac.atIneq(0, 0), 1); + EXPECT_EQ(fac.atIneq(0, 1), 0); + EXPECT_EQ(fac.atIneq(0, 2), -1); + + fac.addConstantLowerBound({1, 2, 3}, 1); + EXPECT_EQ(fac.atIneq(1, 0), 1); + EXPECT_EQ(fac.atIneq(1, 1), 2); + EXPECT_EQ(fac.atIneq(1, 2), 2); +} + +TEST(FlatAffineConstraintsTest, clearConstraints) { + FlatAffineConstraints fac = makeFACFromConstraints(1, {}, {}); + + fac.addInequality({1, 0}); + EXPECT_EQ(fac.atIneq(0, 0), 1); + EXPECT_EQ(fac.atIneq(0, 1), 0); + + fac.clearConstraints(); + + fac.addInequality({1, 0}); + EXPECT_EQ(fac.atIneq(0, 0), 1); + EXPECT_EQ(fac.atIneq(0, 1), 0); +} + } // namespace mlir diff --git a/mlir/unittests/Analysis/Presburger/MatrixTest.cpp b/mlir/unittests/Analysis/Presburger/MatrixTest.cpp index 4d8801b579a79..9c892f73249e1 100644 --- a/mlir/unittests/Analysis/Presburger/MatrixTest.cpp +++ b/mlir/unittests/Analysis/Presburger/MatrixTest.cpp @@ -75,6 +75,7 @@ TEST(MatrixTest, resizeVertically) { mat(row, col) = 10 * row + col; mat.resizeVertically(3); + ASSERT_TRUE(mat.hasConsistentState()); EXPECT_EQ(mat.getNumRows(), 3u); EXPECT_EQ(mat.getNumColumns(), 5u); for (unsigned row = 0; row < 3; ++row) @@ -82,6 +83,7 @@ TEST(MatrixTest, resizeVertically) { EXPECT_EQ(mat(row, col), int(10 * row + col)); mat.resizeVertically(5); + ASSERT_TRUE(mat.hasConsistentState()); EXPECT_EQ(mat.getNumRows(), 5u); EXPECT_EQ(mat.getNumColumns(), 5u); for (unsigned row = 0; row < 5; ++row) @@ -89,4 +91,79 @@ TEST(MatrixTest, resizeVertically) { EXPECT_EQ(mat(row, col), row >= 3 ? 0 : int(10 * row + col)); } +TEST(MatrixTest, insertColumns) { + Matrix mat(5, 5, 5, 10); + EXPECT_EQ(mat.getNumRows(), 5u); + EXPECT_EQ(mat.getNumColumns(), 5u); + for (unsigned row = 0; row < 5; ++row) + for (unsigned col = 0; col < 5; ++col) + mat(row, col) = 10 * row + col; + + mat.insertColumns(3, 100); + ASSERT_TRUE(mat.hasConsistentState()); + EXPECT_EQ(mat.getNumRows(), 5u); + EXPECT_EQ(mat.getNumColumns(), 105u); + for (unsigned row = 0; row < 5; ++row) { + for (unsigned col = 0; col < 105; ++col) { + if (col < 3) + EXPECT_EQ(mat(row, col), int(10 * row + col)); + else if (3 <= col && col <= 102) + EXPECT_EQ(mat(row, col), 0); + else + EXPECT_EQ(mat(row, col), int(10 * row + col - 100)); + } + } + + mat.removeColumns(3, 100); + ASSERT_TRUE(mat.hasConsistentState()); + mat.insertColumns(0, 0); + ASSERT_TRUE(mat.hasConsistentState()); + mat.insertColumn(5); + ASSERT_TRUE(mat.hasConsistentState()); + + EXPECT_EQ(mat.getNumRows(), 5u); + EXPECT_EQ(mat.getNumColumns(), 6u); + for (unsigned row = 0; row < 5; ++row) + for (unsigned col = 0; col < 6; ++col) + EXPECT_EQ(mat(row, col), col == 5 ? 0 : 10 * row + col); +} + +TEST(MatrixTest, insertRows) { + Matrix mat(5, 5, 5, 10); + ASSERT_TRUE(mat.hasConsistentState()); + EXPECT_EQ(mat.getNumRows(), 5u); + EXPECT_EQ(mat.getNumColumns(), 5u); + for (unsigned row = 0; row < 5; ++row) + for (unsigned col = 0; col < 5; ++col) + mat(row, col) = 10 * row + col; + + mat.insertRows(3, 100); + ASSERT_TRUE(mat.hasConsistentState()); + EXPECT_EQ(mat.getNumRows(), 105u); + EXPECT_EQ(mat.getNumColumns(), 5u); + for (unsigned row = 0; row < 105; ++row) { + for (unsigned col = 0; col < 5; ++col) { + if (row < 3) + EXPECT_EQ(mat(row, col), int(10 * row + col)); + else if (3 <= row && row <= 102) + EXPECT_EQ(mat(row, col), 0); + else + EXPECT_EQ(mat(row, col), int(10 * (row - 100) + col)); + } + } + + mat.removeRows(3, 100); + ASSERT_TRUE(mat.hasConsistentState()); + mat.insertRows(0, 0); + ASSERT_TRUE(mat.hasConsistentState()); + mat.insertRow(5); + ASSERT_TRUE(mat.hasConsistentState()); + + EXPECT_EQ(mat.getNumRows(), 6u); + EXPECT_EQ(mat.getNumColumns(), 5u); + for (unsigned row = 0; row < 6; ++row) + for (unsigned col = 0; col < 5; ++col) + EXPECT_EQ(mat(row, col), row == 5 ? 0 : 10 * row + col); +} + } // namespace mlir From bef9464c514ad868eb99c4ad391690ac2eb690c1 Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Thu, 1 Jul 2021 09:38:35 -0700 Subject: [PATCH 437/619] [Bazel] Fixes for 266a7414d8 Adds LinalgOps dep needed for https://github.com/llvm/llvm-project/commit/266a7414d8f26 Differential Revision: https://reviews.llvm.org/D105294 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index e1119061e12c3..1969e77ea6bd7 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1575,6 +1575,7 @@ cc_library( includes = ["include"], deps = [ ":IR", + ":LinalgOps", ":SideEffectInterfaces", ":SparseTensorAttrDefsIncGen", ":SparseTensorOpsIncGen", From bc7cc2074b7b7043e05cb46346f1368eb4ae9949 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 1 Jul 2021 12:39:17 -0400 Subject: [PATCH 438/619] Fix an accepts-invalid issue with [[]] attributes in the type position in C A user reported an issue to me via email that Clang was accepting some code that GCC was rejecting. After investigation, it turned out to be a general problem of us failing to properly reject attributes written in the type position in C when they don't apply to types. The root cause was a terminology issue -- we sometimes use "CXX11Attr" to mean [[]] in C++11 mode and sometimes [[]] in general -- and this came back to bite us because in this particular case, it really meant [[]] in C++ mode. I fixed the issue by introducing a new function AttributeCommonInfo::isStandardAttributeSyntax() to represent [[]] in either C or C++ mode. This fix pointed out that we've had the issue in some of our existing tests, which have all been corrected. This resolves https://bugs.llvm.org/show_bug.cgi?id=50954. --- clang/include/clang/Basic/AttributeCommonInfo.h | 6 ++++++ clang/lib/Parse/Parser.cpp | 2 +- clang/lib/Sema/SemaDeclAttr.cpp | 10 +++++----- clang/lib/Sema/SemaType.cpp | 14 +++++++------- clang/test/AST/ast-dump-c-attr.c | 5 ----- .../test/Sema/attr-availability-square-brackets.c | 9 +++++---- clang/test/Sema/attr-c2x.c | 10 +++++----- clang/test/Sema/attr-deprecated-c2x.c | 12 ++++++++---- clang/test/Sema/attr-external-source-symbol.c | 14 +++++++------- clang/test/Sema/c2x-maybe_unused-errors.c | 3 +++ clang/test/Sema/overloadable.c | 3 ++- 11 files changed, 49 insertions(+), 39 deletions(-) diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index f4a5db84aa9f1..4be598e109fd8 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -155,6 +155,12 @@ class AttributeCommonInfo { bool isC2xAttribute() const { return SyntaxUsed == AS_C2x; } + /// The attribute is spelled [[]] in either C or C++ mode, including standard + /// attributes spelled with a keyword, like alignas. + bool isStandardAttributeSyntax() const { + return isCXX11Attribute() || isC2xAttribute(); + } + bool isKeywordAttribute() const { return SyntaxUsed == AS_Keyword || SyntaxUsed == AS_ContextSensitiveKeyword; } diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index c0b83db69ce9f..55b25d20db51c 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -1214,7 +1214,7 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D, // a definition. Late parsed attributes are checked at the end. if (Tok.isNot(tok::equal)) { for (const ParsedAttr &AL : D.getAttributes()) - if (AL.isKnownToGCC() && !AL.isCXX11Attribute()) + if (AL.isKnownToGCC() && !AL.isStandardAttributeSyntax()) Diag(AL.getLoc(), diag::warn_attribute_on_function_definition) << AL; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index d8416c6b5769a..0741b5f6fda92 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2093,7 +2093,7 @@ static void handleAnalyzerNoReturnAttr(Sema &S, Decl *D, const ParsedAttr &AL) { ValueDecl *VD = dyn_cast(D); if (!VD || (!VD->getType()->isBlockPointerType() && !VD->getType()->isFunctionPointerType())) { - S.Diag(AL.getLoc(), AL.isCXX11Attribute() + S.Diag(AL.getLoc(), AL.isStandardAttributeSyntax() ? diag::err_attribute_wrong_decl_type : diag::warn_attribute_wrong_decl_type) << AL << ExpectedFunctionMethodOrBlock; @@ -2863,7 +2863,7 @@ static void handleWarnUnusedResult(Sema &S, Decl *D, const ParsedAttr &AL) { } StringRef Str; - if ((AL.isCXX11Attribute() || AL.isC2xAttribute()) && !AL.getScopeName()) { + if (AL.isStandardAttributeSyntax() && !AL.getScopeName()) { // The standard attribute cannot be applied to variable declarations such // as a function pointer. if (isa(D)) @@ -7280,8 +7280,8 @@ static void handleDeprecatedAttr(Sema &S, Decl *D, const ParsedAttr &AL) { !S.checkStringLiteralArgumentAttr(AL, 0, Str)) return; - // Only support a single optional message for Declspec and CXX11. - if (AL.isDeclspecAttribute() || AL.isCXX11Attribute()) + // Support a single optional message only for Declspec and [[]] spellings. + if (AL.isDeclspecAttribute() || AL.isStandardAttributeSyntax()) AL.checkAtMostNumArgs(S, 1); else if (AL.isArgExpr(1) && AL.getArgAsExpr(1) && !S.checkStringLiteralArgumentAttr(AL, 1, Replacement)) @@ -7348,7 +7348,7 @@ static void handleNoSanitizeSpecificAttr(Sema &S, Decl *D, // getSpelling() or prettyPrint() on the resulting semantic attribute object // without failing assertions. unsigned TranslatedSpellingIndex = 0; - if (AL.isC2xAttribute() || AL.isCXX11Attribute()) + if (AL.isStandardAttributeSyntax()) TranslatedSpellingIndex = 1; AttributeCommonInfo Info = AL; diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 2434554ba4652..ef0320fb26f8e 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -634,7 +634,7 @@ static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state, // C++11 attributes before the decl specifiers actually appertain to // the declarators. Move them straight there. We don't support the // 'put them wherever you like' semantics we allow for GNU attributes. - if (attr.isCXX11Attribute()) { + if (attr.isStandardAttributeSyntax()) { moveAttrFromListToList(attr, state.getCurrentAttributes(), state.getDeclarator().getAttributes()); return; @@ -687,9 +687,9 @@ static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state, // non-owning copy and iterate over that. ParsedAttributesView AttrsCopy{state.getDeclarator().getAttributes()}; for (ParsedAttr &attr : AttrsCopy) { - // Do not distribute C++11 attributes. They have strict rules for what + // Do not distribute [[]] attributes. They have strict rules for what // they appertain to. - if (attr.isCXX11Attribute()) + if (attr.isStandardAttributeSyntax()) continue; switch (attr.getKind()) { @@ -8058,7 +8058,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, if (attr.isInvalid()) continue; - if (attr.isCXX11Attribute()) { + if (attr.isStandardAttributeSyntax()) { // [[gnu::...]] attributes are treated as declaration attributes, so may // not appertain to a DeclaratorChunk. If we handle them as type // attributes, accept them in that position and diagnose the GCC @@ -8087,8 +8087,8 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, // otherwise, add it to the FnAttrs list for rechaining. switch (attr.getKind()) { default: - // A C++11 attribute on a declarator chunk must appertain to a type. - if (attr.isCXX11Attribute() && TAL == TAL_DeclChunk) { + // A [[]] attribute on a declarator chunk must appertain to a type. + if (attr.isStandardAttributeSyntax() && TAL == TAL_DeclChunk) { state.getSema().Diag(attr.getLoc(), diag::err_attribute_not_type_attr) << attr; attr.setUsedAsTypeAttr(); @@ -8096,7 +8096,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, break; case ParsedAttr::UnknownAttribute: - if (attr.isCXX11Attribute() && TAL == TAL_DeclChunk) + if (attr.isStandardAttributeSyntax() && TAL == TAL_DeclChunk) state.getSema().Diag(attr.getLoc(), diag::warn_unknown_attribute_ignored) << attr << attr.getRange(); diff --git a/clang/test/AST/ast-dump-c-attr.c b/clang/test/AST/ast-dump-c-attr.c index 7d18f0cdc9f13..1f28501f1bb02 100644 --- a/clang/test/AST/ast-dump-c-attr.c +++ b/clang/test/AST/ast-dump-c-attr.c @@ -52,8 +52,3 @@ struct [[deprecated]] Test8; void Test11 [[deprecated]](void); // CHECK: FunctionDecl{{.*}}Test11 // CHECK-NEXT: DeprecatedAttr 0x{{[^ ]*}} "" "" - -void Test12(void) [[deprecated]] {} -// CHECK: FunctionDecl{{.*}}Test12 -// CHECK-NEXT: CompoundStmt -// CHECK-NEXT: DeprecatedAttr 0x{{[^ ]*}} "" "" diff --git a/clang/test/Sema/attr-availability-square-brackets.c b/clang/test/Sema/attr-availability-square-brackets.c index 13dbf0abb17f5..b03e19e4da233 100644 --- a/clang/test/Sema/attr-availability-square-brackets.c +++ b/clang/test/Sema/attr-availability-square-brackets.c @@ -1,11 +1,12 @@ // RUN: %clang_cc1 -triple x86_64-apple-darwin9 -fsyntax-only -fdouble-square-bracket-attributes -verify %s -void f0() [[clang::availability(macosx,introduced=10.4,deprecated=10.2)]]; // expected-warning{{feature cannot be deprecated in macOS version 10.2 before it was introduced in version 10.4; attribute ignored}} -void f1() [[clang::availability(ios,obsoleted=2.1,deprecated=3.0)]]; // expected-warning{{feature cannot be obsoleted in iOS version 2.1 before it was deprecated in version 3.0; attribute ignored}} -void f2() [[clang::availability(ios,introduced=2.1,deprecated=2.1)]]; +[[clang::availability(macosx,introduced=10.4,deprecated=10.2)]] void f0(); // expected-warning{{feature cannot be deprecated in macOS version 10.2 before it was introduced in version 10.4; attribute ignored}} +[[clang::availability(ios,obsoleted=2.1,deprecated=3.0)]] void f1(); // expected-warning{{feature cannot be obsoleted in iOS version 2.1 before it was deprecated in version 3.0; attribute ignored}} +[[clang::availability(ios,introduced=2.1,deprecated=2.1)]] void f2(); +[[clang::availability(macosx,introduced=8.0,deprecated=9.0, message="use CTFontCopyFullName")]] extern void -ATSFontGetName(const char *oName) [[clang::availability(macosx,introduced=8.0,deprecated=9.0, message="use CTFontCopyFullName")]]; // expected-note {{'ATSFontGetName' has been explicitly marked deprecated here}} +ATSFontGetName(const char *oName); // expected-note {{'ATSFontGetName' has been explicitly marked deprecated here}} void test_10095131() { ATSFontGetName("Hello"); // expected-warning {{'ATSFontGetName' is deprecated: first deprecated in macOS 9.0 - use CTFontCopyFullName}} diff --git a/clang/test/Sema/attr-c2x.c b/clang/test/Sema/attr-c2x.c index fae4c5d0fa907..016b1f58e3a73 100644 --- a/clang/test/Sema/attr-c2x.c +++ b/clang/test/Sema/attr-c2x.c @@ -11,16 +11,16 @@ enum [[clang::flag_enum]] EnumFlag { D0 = 1, D1 = 8 }; -void foo(void *c) [[clang::overloadable]]; -void foo(char *c) [[clang::overloadable]]; +[[clang::overloadable]] void foo(void *c); +[[clang::overloadable]] void foo(char *c); void context_okay(void *context [[clang::swift_context]]) [[clang::swiftcall]]; void context_okay2(void *context [[clang::swift_context]], void *selfType, char **selfWitnessTable) [[clang::swiftcall]]; -void *f1(void) [[clang::ownership_returns(foo)]]; -void *f2() [[clang::ownership_returns(foo)]]; // expected-warning {{'ownership_returns' attribute only applies to non-K&R-style functions}} +[[clang::ownership_returns(foo)]] void *f1(void); +[[clang::ownership_returns(foo)]] void *f2(); // expected-warning {{'ownership_returns' attribute only applies to non-K&R-style functions}} -void foo2(void) [[clang::unavailable("not available - replaced")]]; // expected-note {{'foo2' has been explicitly marked unavailable here}} +[[clang::unavailable("not available - replaced")]] void foo2(void); // expected-note {{'foo2' has been explicitly marked unavailable here}} void bar(void) { foo2(); // expected-error {{'foo2' is unavailable: not available - replaced}} } diff --git a/clang/test/Sema/attr-deprecated-c2x.c b/clang/test/Sema/attr-deprecated-c2x.c index 744fb1f7c4002..ba8434e8b094a 100644 --- a/clang/test/Sema/attr-deprecated-c2x.c +++ b/clang/test/Sema/attr-deprecated-c2x.c @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 %s -verify -fsyntax-only --std=c2x +// RUN: %clang_cc1 %s -verify -fsyntax-only -std=c2x -int f() [[deprecated]]; // expected-note 2 {{'f' has been explicitly marked deprecated here}} -void g() [[deprecated]];// expected-note {{'g' has been explicitly marked deprecated here}} +[[deprecated]] int f(); // expected-note 2 {{'f' has been explicitly marked deprecated here}} +[[deprecated]] void g();// expected-note {{'g' has been explicitly marked deprecated here}} void g(); extern int var [[deprecated]]; // expected-note 2 {{'var' has been explicitly marked deprecated here}} @@ -22,7 +22,7 @@ int w() { return var; // expected-warning {{'var' is deprecated}} } -int old_fn() [[deprecated]];// expected-note {{'old_fn' has been explicitly marked deprecated here}} +[[deprecated]] int old_fn();// expected-note {{'old_fn' has been explicitly marked deprecated here}} int old_fn(); int (*fn_ptr)() = old_fn; // expected-warning {{'old_fn' is deprecated}} @@ -52,3 +52,7 @@ struct bar_dep *test3; // expected-warning {{'bar_dep' is deprecated}} void test4(void) { i = 12; // expected-warning {{'i' is deprecated: this is the message}} } + +// Ensure that deprecated only accepts one argument, not the replacement +// argument supported as a GNU extension. +[[deprecated("message", "replacement not supported")]] void test5(void); // expected-error {{'deprecated' attribute takes no more than 1 argument}} diff --git a/clang/test/Sema/attr-external-source-symbol.c b/clang/test/Sema/attr-external-source-symbol.c index dfed609c8e87e..f257a63504d40 100644 --- a/clang/test/Sema/attr-external-source-symbol.c +++ b/clang/test/Sema/attr-external-source-symbol.c @@ -18,14 +18,14 @@ void namedDeclsOnly() { }; } -void threeClauses2() [[clang::external_source_symbol(language="Swift", defined_in="module", generated_declaration)]]; +[[clang::external_source_symbol(language="Swift", defined_in="module", generated_declaration)]] void threeClauses2(); -void twoClauses2() [[clang::external_source_symbol(language="Swift", defined_in="module")]]; +[[clang::external_source_symbol(language="Swift", defined_in="module")]] void twoClauses2(); -void fourClauses2() -[[clang::external_source_symbol(language="Swift", defined_in="module", generated_declaration, generated_declaration)]]; // expected-error {{duplicate 'generated_declaration' clause in an 'external_source_symbol' attribute}} +[[clang::external_source_symbol(language="Swift", defined_in="module", generated_declaration, generated_declaration)]] // expected-error {{duplicate 'generated_declaration' clause in an 'external_source_symbol' attribute}} +void fourClauses2(); -void oneClause2() [[clang::external_source_symbol(generated_declaration)]]; +[[clang::external_source_symbol(generated_declaration)]] void oneClause2(); -void noArguments2() -[[clang::external_source_symbol]]; // expected-error {{'external_source_symbol' attribute takes at least 1 argument}} +[[clang::external_source_symbol]] // expected-error {{'external_source_symbol' attribute takes at least 1 argument}} +void noArguments2(); diff --git a/clang/test/Sema/c2x-maybe_unused-errors.c b/clang/test/Sema/c2x-maybe_unused-errors.c index 72cefd10291a9..bb9931cd8d3da 100644 --- a/clang/test/Sema/c2x-maybe_unused-errors.c +++ b/clang/test/Sema/c2x-maybe_unused-errors.c @@ -10,3 +10,6 @@ struct [[maybe_unused("Wrong")]] S3 { // expected-error {{'maybe_unused' cannot int a; }; +void func(void) { + int a[10] [[maybe_unused]]; // expected-error {{'maybe_unused' attribute cannot be applied to types}} +} diff --git a/clang/test/Sema/overloadable.c b/clang/test/Sema/overloadable.c index 360f3308302e8..b520d76f9e7e8 100644 --- a/clang/test/Sema/overloadable.c +++ b/clang/test/Sema/overloadable.c @@ -1,6 +1,7 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s -Wincompatible-pointer-types +// RUN: %clang_cc1 -fsyntax-only -fdouble-square-bracket-attributes -verify %s -Wincompatible-pointer-types int var __attribute__((overloadable)); // expected-error{{'overloadable' attribute only applies to functions}} +void bad_attr_target(int) [[clang::overloadable]]; // expected-error{{'overloadable' attribute cannot be applied to types}} void params(void) __attribute__((overloadable(12))); // expected-error {{'overloadable' attribute takes no arguments}} int *f(int) __attribute__((overloadable)); // expected-note{{previous overload of function is here}} From 955f12589940634acc6c9901e8b25534808f691c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 1 Jul 2021 09:17:04 -0700 Subject: [PATCH 439/619] [instcombine] Fold overflow check using overflow intrinsic to comparison This follows up to D104665 (which added umulo handling alongside the existing uaddo case), and generalizes for the remaining overflow intrinsics. I went to add analogous handling to LVI, and discovered that LVI already had a more general implementation. Instead, we can port was LVI does to instcombine. (For context, LVI uses makeExactNoWrapRegion to constrain the value 'x' in blocks reached after a branch on the condition `op.with.overflow(x, C).overflow`.) Differential Revision: https://reviews.llvm.org/D104932 --- .../InstCombine/InstructionCombining.cpp | 51 +++++++++---------- llvm/test/Transforms/InstCombine/saddo.ll | 21 +++----- llvm/test/Transforms/InstCombine/smulo.ll | 22 ++++---- llvm/test/Transforms/InstCombine/ssubo.ll | 21 +++----- llvm/test/Transforms/InstCombine/usubo.ll | 21 +++----- 5 files changed, 56 insertions(+), 80 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 25b740503fbe2..8f75a7eac6f95 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3089,33 +3089,32 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { assert(*EV.idx_begin() == 1 && "unexpected extract index for overflow inst"); - // If the normal result of the computation is dead, and the RHS is a - // constant, we can transform this into a range comparison for many cases. - // TODO: We can generalize these for non-constant rhs when the newly - // formed expressions are known to simplify. Constants are merely one - // such case. - // TODO: Handle vector splats. - switch (WO->getIntrinsicID()) { - default: - break; - case Intrinsic::uadd_with_overflow: - // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 - if (ConstantInt *CI = dyn_cast(WO->getRHS())) - return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(), - ConstantExpr::getNot(CI)); - break; - case Intrinsic::umul_with_overflow: - // overflow for umul a, C --> a > UINT_MAX udiv C - // (unless C == 0, in which case no overflow ever occurs) - if (ConstantInt *CI = dyn_cast(WO->getRHS())) { - assert(!CI->isZero() && "handled by instruction simplify"); - auto UMax = APInt::getMaxValue(CI->getType()->getBitWidth()); - auto *Op = - ConstantExpr::getUDiv(ConstantInt::get(CI->getType(), UMax), CI); - return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(), Op); + // If only the overflow result is used, and the right hand side is a + // constant (or constant splat), we can remove the intrinsic by directly + // checking for overflow. + const APInt *C; + if (match(WO->getRHS(), m_APInt(C))) { + // Compute the no-wrap range [X,Y) for LHS given RHS=C, then + // check for the inverted range using range offset trick (i.e. + // use a subtract to shift the range to bottom of either the + // signed or unsigned domain and then use a single compare to + // check range membership). + ConstantRange NWR = + ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C, + WO->getNoWrapKind()); + APInt Min = WO->isSigned() ? NWR.getSignedMin() : NWR.getUnsignedMin(); + NWR = NWR.subtract(Min); + + CmpInst::Predicate Pred; + APInt NewRHSC; + if (NWR.getEquivalentICmp(Pred, NewRHSC)) { + auto *OpTy = WO->getRHS()->getType(); + auto *NewLHS = Builder.CreateSub(WO->getLHS(), + ConstantInt::get(OpTy, Min)); + return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS, + ConstantInt::get(OpTy, NewRHSC)); } - break; - }; + } } } if (LoadInst *L = dyn_cast(Agg)) diff --git a/llvm/test/Transforms/InstCombine/saddo.ll b/llvm/test/Transforms/InstCombine/saddo.ll index 5a3c8f5844e6a..8585c4efd55f7 100644 --- a/llvm/test/Transforms/InstCombine/saddo.ll +++ b/llvm/test/Transforms/InstCombine/saddo.ll @@ -26,8 +26,7 @@ define i1 @test_constant0(i8 %a) { define i1 @test_constant1(i8 %a) { ; CHECK-LABEL: @test_constant1( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 1) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp eq i8 [[A:%.*]], 127 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 1) @@ -37,8 +36,7 @@ define i1 @test_constant1(i8 %a) { define i1 @test_constant2(i8 %a) { ; CHECK-LABEL: @test_constant2( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 2) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp sgt i8 [[A:%.*]], 125 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 2) @@ -48,8 +46,7 @@ define i1 @test_constant2(i8 %a) { define i1 @test_constant3(i8 %a) { ; CHECK-LABEL: @test_constant3( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 3) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp sgt i8 [[A:%.*]], 124 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 3) @@ -59,8 +56,7 @@ define i1 @test_constant3(i8 %a) { define i1 @test_constant4(i8 %a) { ; CHECK-LABEL: @test_constant4( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 4) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp sgt i8 [[A:%.*]], 123 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 4) @@ -70,8 +66,7 @@ define i1 @test_constant4(i8 %a) { define i1 @test_constant127(i8 %a) { ; CHECK-LABEL: @test_constant127( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 127) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp sgt i8 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 127) @@ -81,8 +76,7 @@ define i1 @test_constant127(i8 %a) { define i1 @test_constant128(i8 %a) { ; CHECK-LABEL: @test_constant128( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 -128) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp slt i8 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 128) @@ -92,8 +86,7 @@ define i1 @test_constant128(i8 %a) { define i1 @test_constant255(i8 %a) { ; CHECK-LABEL: @test_constant255( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 -1) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp eq i8 [[A:%.*]], -128 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 255) diff --git a/llvm/test/Transforms/InstCombine/smulo.ll b/llvm/test/Transforms/InstCombine/smulo.ll index 47315202f9527..c909ed1d45ae3 100644 --- a/llvm/test/Transforms/InstCombine/smulo.ll +++ b/llvm/test/Transforms/InstCombine/smulo.ll @@ -35,8 +35,8 @@ define i1 @test_constant1(i8 %a) { define i1 @test_constant2(i8 %a) { ; CHECK-LABEL: @test_constant2( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 2) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], 64 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp slt i8 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 2) @@ -46,8 +46,8 @@ define i1 @test_constant2(i8 %a) { define i1 @test_constant3(i8 %a) { ; CHECK-LABEL: @test_constant3( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 3) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], 42 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[TMP1]], 84 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 3) @@ -57,8 +57,8 @@ define i1 @test_constant3(i8 %a) { define i1 @test_constant4(i8 %a) { ; CHECK-LABEL: @test_constant4( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 4) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], 32 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[TMP1]], 63 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 4) @@ -69,8 +69,8 @@ define i1 @test_constant4(i8 %a) { define i1 @test_constant127(i8 %a) { ; CHECK-LABEL: @test_constant127( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 127) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 127) @@ -80,8 +80,7 @@ define i1 @test_constant127(i8 %a) { define i1 @test_constant128(i8 %a) { ; CHECK-LABEL: @test_constant128( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 -128) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 1 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 128) @@ -91,8 +90,7 @@ define i1 @test_constant128(i8 %a) { define i1 @test_constant255(i8 %a) { ; CHECK-LABEL: @test_constant255( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 -1) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp eq i8 [[A:%.*]], -128 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 255) diff --git a/llvm/test/Transforms/InstCombine/ssubo.ll b/llvm/test/Transforms/InstCombine/ssubo.ll index 9b11f6ac145fd..c87fe920010e7 100644 --- a/llvm/test/Transforms/InstCombine/ssubo.ll +++ b/llvm/test/Transforms/InstCombine/ssubo.ll @@ -26,8 +26,7 @@ define i1 @test_constant0(i8 %a) { define i1 @test_constant1(i8 %a) { ; CHECK-LABEL: @test_constant1( -; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 -1) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp eq i8 [[A:%.*]], -128 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 1) @@ -37,8 +36,7 @@ define i1 @test_constant1(i8 %a) { define i1 @test_constant2(i8 %a) { ; CHECK-LABEL: @test_constant2( -; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 -2) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp slt i8 [[A:%.*]], -126 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 2) @@ -48,8 +46,7 @@ define i1 @test_constant2(i8 %a) { define i1 @test_constant3(i8 %a) { ; CHECK-LABEL: @test_constant3( -; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 -3) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp slt i8 [[A:%.*]], -125 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 3) @@ -59,8 +56,7 @@ define i1 @test_constant3(i8 %a) { define i1 @test_constant4(i8 %a) { ; CHECK-LABEL: @test_constant4( -; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 -4) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp slt i8 [[A:%.*]], -124 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 4) @@ -71,8 +67,7 @@ define i1 @test_constant4(i8 %a) { define i1 @test_constant127(i8 %a) { ; CHECK-LABEL: @test_constant127( -; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 -127) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp slt i8 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 127) @@ -82,8 +77,7 @@ define i1 @test_constant127(i8 %a) { define i1 @test_constant128(i8 %a) { ; CHECK-LABEL: @test_constant128( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[A:%.*]], i8 -128) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp sgt i8 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 128) @@ -93,8 +87,7 @@ define i1 @test_constant128(i8 %a) { define i1 @test_constant255(i8 %a) { ; CHECK-LABEL: @test_constant255( -; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A:%.*]], i8 1) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp eq i8 [[A:%.*]], 127 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 255) diff --git a/llvm/test/Transforms/InstCombine/usubo.ll b/llvm/test/Transforms/InstCombine/usubo.ll index 26f8cd2421d77..38ab25c500c36 100644 --- a/llvm/test/Transforms/InstCombine/usubo.ll +++ b/llvm/test/Transforms/InstCombine/usubo.ll @@ -26,8 +26,7 @@ define i1 @test_constant0(i8 %a) { define i1 @test_constant1(i8 %a) { ; CHECK-LABEL: @test_constant1( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A:%.*]], i8 1) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 1) @@ -37,8 +36,7 @@ define i1 @test_constant1(i8 %a) { define i1 @test_constant2(i8 %a) { ; CHECK-LABEL: @test_constant2( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A:%.*]], i8 2) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i8 [[A:%.*]], 2 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 2) @@ -48,8 +46,7 @@ define i1 @test_constant2(i8 %a) { define i1 @test_constant3(i8 %a) { ; CHECK-LABEL: @test_constant3( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A:%.*]], i8 3) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i8 [[A:%.*]], 3 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 3) @@ -59,8 +56,7 @@ define i1 @test_constant3(i8 %a) { define i1 @test_constant4(i8 %a) { ; CHECK-LABEL: @test_constant4( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A:%.*]], i8 4) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i8 [[A:%.*]], 4 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 4) @@ -71,8 +67,7 @@ define i1 @test_constant4(i8 %a) { define i1 @test_constant127(i8 %a) { ; CHECK-LABEL: @test_constant127( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A:%.*]], i8 127) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i8 [[A:%.*]], 127 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 127) @@ -82,8 +77,7 @@ define i1 @test_constant127(i8 %a) { define i1 @test_constant128(i8 %a) { ; CHECK-LABEL: @test_constant128( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A:%.*]], i8 -128) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp sgt i8 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 128) @@ -93,8 +87,7 @@ define i1 @test_constant128(i8 %a) { define i1 @test_constant255(i8 %a) { ; CHECK-LABEL: @test_constant255( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[A:%.*]], i8 -1) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ne i8 [[A:%.*]], -1 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 255) From f03d29601e0951da2c88f07d4234128e14e87870 Mon Sep 17 00:00:00 2001 From: Emily Shi Date: Thu, 1 Jul 2021 10:13:00 -0700 Subject: [PATCH 440/619] [NFC][compiler-rt] add back solaris xfail for unpoison-alternate-stack.cpp --- .../test/asan/TestCases/Posix/unpoison-alternate-stack.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp index 50d28ddf84c2f..a2082ed082154 100644 --- a/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/unpoison-alternate-stack.cpp @@ -6,6 +6,9 @@ // RUN: %clangxx_asan -std=c++20 -fexceptions -O0 %s -o %t -pthread // RUN: %run %t +// longjmp from signal handler is unportable. +// XFAIL: solaris + #include #include #include From 39a15b5ae00df9a5e35f67dbffaed082b7e54d50 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 1 Jul 2021 10:22:35 -0700 Subject: [PATCH 441/619] [NFC][scudo] Extract MapAllocatorTest for TEST_F --- .../scudo/standalone/tests/secondary_test.cpp | 142 +++++++++--------- 1 file changed, 74 insertions(+), 68 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 2dc041b94a8c0..bbaf79261ba77 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -11,11 +11,11 @@ #include "allocator_config.h" #include "secondary.h" -#include - #include +#include #include #include +#include #include #include @@ -94,14 +94,22 @@ TEST(ScudoSecondaryTest, SecondaryBasic) { using LargeAllocator = scudo::MapAllocator; +struct MapAllocatorTest : public Test { + void SetUp() override { Allocator->init(nullptr); } + + void TearDown() override { Allocator->unmapTestOnly(); } + + std::unique_ptr Allocator = + std::make_unique(); + scudo::Options Options = {}; +}; + // This exercises a variety of combinations of size and alignment for the // MapAllocator. The size computation done here mimic the ones done by the // combined allocator. -TEST(ScudoSecondaryTest, SecondaryCombinations) { +TEST_F(MapAllocatorTest, SecondaryCombinations) { constexpr scudo::uptr MinAlign = FIRST_32_SECOND_64(8, 16); constexpr scudo::uptr HeaderSize = scudo::roundUpTo(8, MinAlign); - std::unique_ptr L(new LargeAllocator); - L->init(nullptr); for (scudo::uptr SizeLog = 0; SizeLog <= 20; SizeLog++) { for (scudo::uptr AlignLog = FIRST_32_SECOND_64(3, 4); AlignLog <= 16; AlignLog++) { @@ -113,103 +121,102 @@ TEST(ScudoSecondaryTest, SecondaryCombinations) { scudo::roundUpTo((1U << SizeLog) + Delta, MinAlign); const scudo::uptr Size = HeaderSize + UserSize + (Align > MinAlign ? Align - HeaderSize : 0); - void *P = L->allocate(scudo::Options{}, Size, Align); + void *P = Allocator->allocate(Options, Size, Align); EXPECT_NE(P, nullptr); void *AlignedP = reinterpret_cast( scudo::roundUpTo(reinterpret_cast(P), Align)); memset(AlignedP, 0xff, UserSize); - L->deallocate(scudo::Options{}, P); + Allocator->deallocate(Options, P); } } } scudo::ScopedString Str; - L->getStats(&Str); + Allocator->getStats(&Str); Str.output(); - L->unmapTestOnly(); } -TEST(ScudoSecondaryTest, SecondaryIterate) { - std::unique_ptr L(new LargeAllocator); - L->init(nullptr); +TEST_F(MapAllocatorTest, SecondaryIterate) { std::vector V; const scudo::uptr PageSize = scudo::getPageSizeCached(); for (scudo::uptr I = 0; I < 32U; I++) - V.push_back(L->allocate(scudo::Options{}, (std::rand() % 16) * PageSize)); + V.push_back(Allocator->allocate(Options, (std::rand() % 16) * PageSize)); auto Lambda = [V](scudo::uptr Block) { EXPECT_NE(std::find(V.begin(), V.end(), reinterpret_cast(Block)), V.end()); }; - L->disable(); - L->iterateOverBlocks(Lambda); - L->enable(); + Allocator->disable(); + Allocator->iterateOverBlocks(Lambda); + Allocator->enable(); while (!V.empty()) { - L->deallocate(scudo::Options{}, V.back()); + Allocator->deallocate(Options, V.back()); V.pop_back(); } scudo::ScopedString Str; - L->getStats(&Str); + Allocator->getStats(&Str); Str.output(); - L->unmapTestOnly(); } -TEST(ScudoSecondaryTest, SecondaryOptions) { - std::unique_ptr L(new LargeAllocator); - L->init(nullptr); +TEST_F(MapAllocatorTest, SecondaryOptions) { // Attempt to set a maximum number of entries higher than the array size. - EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4096U)); + EXPECT_FALSE( + Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4096U)); // A negative number will be cast to a scudo::u32, and fail. - EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, -1)); - if (L->canCache(0U)) { + EXPECT_FALSE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, -1)); + if (Allocator->canCache(0U)) { // Various valid combinations. - EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U)); - EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20)); - EXPECT_TRUE(L->canCache(1UL << 18)); - EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 17)); - EXPECT_FALSE(L->canCache(1UL << 18)); - EXPECT_TRUE(L->canCache(1UL << 16)); - EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 0U)); - EXPECT_FALSE(L->canCache(1UL << 16)); - EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U)); - EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20)); - EXPECT_TRUE(L->canCache(1UL << 16)); + EXPECT_TRUE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4U)); + EXPECT_TRUE( + Allocator->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20)); + EXPECT_TRUE(Allocator->canCache(1UL << 18)); + EXPECT_TRUE( + Allocator->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 17)); + EXPECT_FALSE(Allocator->canCache(1UL << 18)); + EXPECT_TRUE(Allocator->canCache(1UL << 16)); + EXPECT_TRUE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 0U)); + EXPECT_FALSE(Allocator->canCache(1UL << 16)); + EXPECT_TRUE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4U)); + EXPECT_TRUE( + Allocator->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20)); + EXPECT_TRUE(Allocator->canCache(1UL << 16)); } - L->unmapTestOnly(); } -static std::mutex Mutex; -static std::condition_variable Cv; -static bool Ready; +struct MapAllocatorWithReleaseTest : public MapAllocatorTest { + void SetUp() override { Allocator->init(nullptr, /*ReleaseToOsInterval=*/0); } -static void performAllocations(LargeAllocator *L) { - std::vector V; - const scudo::uptr PageSize = scudo::getPageSizeCached(); - { - std::unique_lock Lock(Mutex); - while (!Ready) - Cv.wait(Lock); - } - for (scudo::uptr I = 0; I < 128U; I++) { - // Deallocate 75% of the blocks. - const bool Deallocate = (rand() & 3) != 0; - void *P = L->allocate(scudo::Options{}, (std::rand() % 16) * PageSize); - if (Deallocate) - L->deallocate(scudo::Options{}, P); - else - V.push_back(P); - } - while (!V.empty()) { - L->deallocate(scudo::Options{}, V.back()); - V.pop_back(); + void performAllocations() { + std::vector V; + const scudo::uptr PageSize = scudo::getPageSizeCached(); + { + std::unique_lock Lock(Mutex); + while (!Ready) + Cv.wait(Lock); + } + for (scudo::uptr I = 0; I < 128U; I++) { + // Deallocate 75% of the blocks. + const bool Deallocate = (rand() & 3) != 0; + void *P = Allocator->allocate(Options, (std::rand() % 16) * PageSize); + if (Deallocate) + Allocator->deallocate(Options, P); + else + V.push_back(P); + } + while (!V.empty()) { + Allocator->deallocate(Options, V.back()); + V.pop_back(); + } } -} -TEST(ScudoSecondaryTest, SecondaryThreadsRace) { - Ready = false; - std::unique_ptr L(new LargeAllocator); - L->init(nullptr, /*ReleaseToOsInterval=*/0); + std::mutex Mutex; + std::condition_variable Cv; + bool Ready = false; +}; + +TEST_F(MapAllocatorWithReleaseTest, SecondaryThreadsRace) { std::thread Threads[16]; for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) - Threads[I] = std::thread(performAllocations, L.get()); + Threads[I] = + std::thread(&MapAllocatorWithReleaseTest::performAllocations, this); { std::unique_lock Lock(Mutex); Ready = true; @@ -218,7 +225,6 @@ TEST(ScudoSecondaryTest, SecondaryThreadsRace) { for (auto &T : Threads) T.join(); scudo::ScopedString Str; - L->getStats(&Str); + Allocator->getStats(&Str); Str.output(); - L->unmapTestOnly(); } From 7d207472030a244d540fa53e47a734675b289a94 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 30 Jun 2021 14:27:53 -0700 Subject: [PATCH 442/619] [gwp_asan] Improve symbolizer script Show inlined functions. Hide unhelpful DWARF related warnings. Reviewed By: hctim Differential Revision: https://reviews.llvm.org/D105230 --- compiler-rt/lib/gwp_asan/scripts/symbolize.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/gwp_asan/scripts/symbolize.sh b/compiler-rt/lib/gwp_asan/scripts/symbolize.sh index fad9620a676e3..6974ee816701b 100755 --- a/compiler-rt/lib/gwp_asan/scripts/symbolize.sh +++ b/compiler-rt/lib/gwp_asan/scripts/symbolize.sh @@ -25,7 +25,7 @@ while read -r line; do if [ -z "$function_name" ]; then # If the offset is binary-relative, just resolve that. - symbolized="$(echo $function_offset | addr2line -e $binary_name)" + symbolized="$(echo $function_offset | addr2line -ie $binary_name)" else # Otherwise, the offset is function-relative. Get the address of the # function, and add it to the offset, then symbolize. @@ -41,7 +41,7 @@ while read -r line; do # Add the function address and offset to get the offset into the binary. binary_offset="$(printf "0x%X" "$((function_addr+function_offset))")" - symbolized="$(echo $binary_offset | addr2line -e $binary_name)" + symbolized="$(echo $binary_offset | addr2line -ie $binary_name)" fi # Check that it symbolized properly. If it didn't, output the old line. @@ -52,4 +52,4 @@ while read -r line; do else echo "${frame_number}${symbolized}" fi -done +done 2> >(grep -v "addr2line: DWARF error: could not find variable specification") From f83654982be65567d41c513b27ef76c3c64946f5 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 1 Jul 2021 13:36:09 -0400 Subject: [PATCH 443/619] [libc++] Migrate the additional_features parameter to the DSL This is required to run the tests under any configuration that uses additional_features using a from-scratch config. That is the case of e.g. the Debug mode (which uses LIBCXX-DEBUG-FIXME) and the tests on Windows. --- libcxx/test/libcxx/selftest/dsl/dsl.sh.py | 20 ++++++++++++++++ libcxx/utils/libcxx/test/config.py | 5 ---- libcxx/utils/libcxx/test/dsl.py | 29 +++++++++++++++++------ libcxx/utils/libcxx/test/params.py | 8 +++++-- 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py index e27c13a9d2056..1901ddb9964d0 100644 --- a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py +++ b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py @@ -435,6 +435,26 @@ def test_boolean_value_from_false_boolean_parameter(self): a.applyTo(self.config) self.assertIn('-fno-exceptions', self.config.available_features) + def test_list_parsed_from_comma_delimited_string_empty(self): + self.litConfig.params['additional_features'] = "" + param = dsl.Parameter(name='additional_features', type=list, help='', actions=lambda f: f) + self.assertEqual(param.getActions(self.config, self.litConfig.params), []) + + def test_list_parsed_from_comma_delimited_string_1(self): + self.litConfig.params['additional_features'] = "feature1" + param = dsl.Parameter(name='additional_features', type=list, help='', actions=lambda f: f) + self.assertEqual(param.getActions(self.config, self.litConfig.params), ['feature1']) + + def test_list_parsed_from_comma_delimited_string_2(self): + self.litConfig.params['additional_features'] = "feature1,feature2" + param = dsl.Parameter(name='additional_features', type=list, help='', actions=lambda f: f) + self.assertEqual(param.getActions(self.config, self.litConfig.params), ['feature1', 'feature2']) + + def test_list_parsed_from_comma_delimited_string_3(self): + self.litConfig.params['additional_features'] = "feature1,feature2, feature3" + param = dsl.Parameter(name='additional_features', type=list, help='', actions=lambda f: f) + self.assertEqual(param.getActions(self.config, self.litConfig.params), ['feature1', 'feature2', 'feature3']) + if __name__ == '__main__': unittest.main(verbosity=2) diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 4978482006c67..9538ecc6db798 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -232,11 +232,6 @@ def configure_obj_root(self): self.libcxx_obj_root = self.project_obj_root def configure_features(self): - additional_features = self.get_lit_conf('additional_features') - if additional_features: - for f in additional_features.split(','): - self.config.available_features.add(f.strip()) - if self.target_info.is_windows(): if self.cxx_stdlib_under_test == 'libc++': # LIBCXX-WINDOWS-FIXME is the feature name used to XFAIL the diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py index 1071636702478..64988e1609b31 100644 --- a/libcxx/utils/libcxx/test/dsl.py +++ b/libcxx/utils/libcxx/test/dsl.py @@ -511,6 +511,13 @@ def _str_to_bool(s): else: raise ValueError("Got string '{}', which isn't a valid boolean".format(s)) +def _parse_parameter(s, type): + if type is bool and isinstance(s, str): + return _str_to_bool(s) + elif type is list and isinstance(s, str): + return [x.strip() for x in s.split(',') if x.strip()] + return type(s) + class Parameter(object): """ @@ -554,7 +561,8 @@ def __init__(self, name, type, help, actions, choices=None, default=None): - type A callable that can be used to parse the value of the parameter given on the command-line. As a special case, using the type `bool` also - allows parsing strings with boolean-like contents. + allows parsing strings with boolean-like contents, and the type `list` + will parse a string delimited by commas into a list of the substrings. - help A string explaining the parameter, for documentation purposes. @@ -584,8 +592,7 @@ def __init__(self, name, type, help, actions, choices=None, default=None): else: self._choices = None - self._parse = lambda x: (_str_to_bool(x) if type is bool and isinstance(x, str) - else type(x)) + self._parse = lambda x: _parse_parameter(x, type) self._help = help self._actions = actions self._default = default @@ -599,10 +606,16 @@ def _getValue(self, config, litParams): if param is None and self._default is None: raise ValueError("Parameter {} doesn't have a default value, but it was not specified in the Lit parameters or in the Lit config".format(self.name)) getDefault = lambda: self._default(config) if callable(self._default) else self._default - value = self._parse(param) if param is not None else getDefault() + + if param is not None: + (pretty, value) = (param, self._parse(param)) + else: + value = getDefault() + pretty = '{} (default)'.format(value) + if self._choices and value not in self._choices: raise ValueError("Got value '{}' for parameter '{}', which is not in the provided set of possible choices: {}".format(value, self.name, self._choices)) - return value + return (pretty, value) @property def name(self): @@ -618,10 +631,12 @@ def getActions(self, config, litParams): """ Return the list of actions associated to this value of the parameter. """ - return self._actions(self._getValue(config, litParams)) + (_, parameterValue) = self._getValue(config, litParams) + return self._actions(parameterValue) def pretty(self, config, litParams): """ Return a pretty representation of the parameter's name and value. """ - return "{}={}".format(self.name, self._getValue(config, litParams)) + (prettyParameterValue, _) = self._getValue(config, litParams) + return "{}={}".format(self.name, prettyParameterValue) diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index 5ab82711c1b40..bdaa573205e80 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -53,7 +53,6 @@ def getStdFlag(cfg, std): return None DEFAULT_PARAMETERS = [ - # Core parameters of the test suite Parameter(name='target_triple', type=str, default=getHostTriple, help="The target triple to compile the test suite for. This must be " "compatible with the target that the tests will be run on.", @@ -142,7 +141,6 @@ def getStdFlag(cfg, std): AddFeature('sanitizer-new-delete') if sanitizer in ['Address', 'Memory', 'MemoryWithOrigins', 'Thread'] else None, ])), - # Parameters to enable or disable parts of the test suite Parameter(name='enable_experimental', choices=[True, False], type=bool, default=True, help="Whether to enable tests for experimental C++ libraries (typically Library Fundamentals TSes).", actions=lambda experimental: [] if not experimental else [ @@ -166,6 +164,12 @@ def getStdFlag(cfg, std): actions=lambda enabled: [] if enabled else [ AddFeature('libcxx-no-debug-mode') ]), + + Parameter(name='additional_features', type=list, default=[], + help="A comma-delimited list of additional features that will be enabled when running the tests. " + "This should be used sparingly since specifying ad-hoc features manually is error-prone and " + "brittle in the long run as changes are made to the test suite.", + actions=lambda features: [AddFeature(f) for f in features]), ] DEFAULT_PARAMETERS += [ From 000444214f1658de9905533231f4c80fde2b7f1b Mon Sep 17 00:00:00 2001 From: zoecarver Date: Thu, 1 Jul 2021 10:18:27 -0700 Subject: [PATCH 444/619] [libcxx] Update optional star operator to be noexcept. Differential Revision: https://reviews.llvm.org/D105296 --- libcxx/include/optional | 8 ++++---- .../optional.object.observe/dereference.pass.cpp | 1 + .../optional.object.observe/dereference_const.pass.cpp | 1 + .../dereference_const_rvalue.pass.cpp | 1 + .../optional.object.observe/dereference_rvalue.pass.cpp | 1 + 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libcxx/include/optional b/libcxx/include/optional index 0e6c1b88f52cf..118db66a4abc9 100644 --- a/libcxx/include/optional +++ b/libcxx/include/optional @@ -906,7 +906,7 @@ public: _LIBCPP_INLINE_VISIBILITY constexpr const value_type& - operator*() const& + operator*() const& noexcept { _LIBCPP_ASSERT(this->has_value(), "optional operator* called on a disengaged value"); return this->__get(); @@ -915,7 +915,7 @@ public: _LIBCPP_INLINE_VISIBILITY constexpr value_type& - operator*() & + operator*() & noexcept { _LIBCPP_ASSERT(this->has_value(), "optional operator* called on a disengaged value"); return this->__get(); @@ -924,7 +924,7 @@ public: _LIBCPP_INLINE_VISIBILITY constexpr value_type&& - operator*() && + operator*() && noexcept { _LIBCPP_ASSERT(this->has_value(), "optional operator* called on a disengaged value"); return _VSTD::move(this->__get()); @@ -933,7 +933,7 @@ public: _LIBCPP_INLINE_VISIBILITY constexpr const value_type&& - operator*() const&& + operator*() const&& noexcept { _LIBCPP_ASSERT(this->has_value(), "optional operator* called on a disengaged value"); return _VSTD::move(this->__get()); diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp index 98216df242cce..5b04e5a35aafa 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference.pass.cpp @@ -44,6 +44,7 @@ int main(int, char**) { optional opt; ((void)opt); ASSERT_SAME_TYPE(decltype(*opt), X&); + LIBCPP_STATIC_ASSERT(noexcept(*opt)); // ASSERT_NOT_NOEXCEPT(*opt); // FIXME: This assertion fails with GCC because it can see that // (A) operator*() is constexpr, and diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp index f61cfcee75066..f323cd1a5e405 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const.pass.cpp @@ -37,6 +37,7 @@ int main(int, char**) { const optional opt; ((void)opt); ASSERT_SAME_TYPE(decltype(*opt), X const&); + LIBCPP_STATIC_ASSERT(noexcept(*opt)); // ASSERT_NOT_NOEXCEPT(*opt); // FIXME: This assertion fails with GCC because it can see that // (A) operator*() is constexpr, and diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const_rvalue.pass.cpp index bc6745de161d8..68591c5e2dbcb 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const_rvalue.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_const_rvalue.pass.cpp @@ -37,6 +37,7 @@ int main(int, char**) { const optional opt; ((void)opt); ASSERT_SAME_TYPE(decltype(*std::move(opt)), X const &&); + LIBCPP_STATIC_ASSERT(noexcept(*opt)); // ASSERT_NOT_NOEXCEPT(*std::move(opt)); // FIXME: This assertion fails with GCC because it can see that // (A) operator*() is constexpr, and diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_rvalue.pass.cpp index c8ee573aea9ad..67edbb903353e 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_rvalue.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/dereference_rvalue.pass.cpp @@ -44,6 +44,7 @@ int main(int, char**) { optional opt; ((void)opt); ASSERT_SAME_TYPE(decltype(*std::move(opt)), X&&); + LIBCPP_STATIC_ASSERT(noexcept(*opt)); // ASSERT_NOT_NOEXCEPT(*std::move(opt)); // FIXME: This assertion fails with GCC because it can see that // (A) operator*() is constexpr, and From 050b064f15ee56ee0b42c9b957a3dd0f32532394 Mon Sep 17 00:00:00 2001 From: Christopher Di Bella Date: Thu, 1 Jul 2021 09:25:35 -0400 Subject: [PATCH 445/619] [libcxx][functional][modular] splices into modular headers Differential Revision: https://reviews.llvm.org/D104942 --- libcxx/include/CMakeLists.txt | 26 +- libcxx/include/__functional/binary_function.h | 31 + libcxx/include/__functional/binary_negate.h | 50 + libcxx/include/__functional/bind.h | 386 +++ libcxx/include/__functional/bind_front.h | 52 + libcxx/include/__functional/binder1st.h | 54 + libcxx/include/__functional/binder2nd.h | 54 + .../include/__functional/default_searcher.h | 56 + .../function.h} | 1228 ++++++- libcxx/include/__functional/identity.h | 37 + libcxx/include/__functional/invoke.h | 100 + libcxx/include/__functional/is_transparent.h | 36 + libcxx/include/__functional/mem_fn.h | 161 + libcxx/include/__functional/mem_fun_ref.h | 173 + libcxx/include/__functional/not_fn.h | 47 + libcxx/include/__functional/operations.h | 729 ++++ libcxx/include/__functional/perfect_forward.h | 88 + .../__functional/pointer_to_binary_function.h | 46 + .../__functional/pointer_to_unary_function.h | 46 + .../include/__functional/ranges_operations.h | 97 + .../include/__functional/reference_wrapper.h | 223 ++ libcxx/include/__functional/unary_negate.h | 47 + libcxx/include/__functional/unwrap_ref.h | 6 + .../include/__functional/weak_result_type.h | 481 +++ libcxx/include/__functional_base | 678 +--- libcxx/include/__functional_base_03 | 223 -- libcxx/include/__iterator/advance.h | 1 + libcxx/include/__memory/allocator_arg_t.h | 78 + libcxx/include/__memory/shared_ptr.h | 8 +- libcxx/include/__memory/unique_ptr.h | 3 +- libcxx/include/__memory/uses_allocator.h | 60 + libcxx/include/__ranges/ref_view.h | 1 + libcxx/include/__string | 19 +- libcxx/include/concepts | 1 + libcxx/include/experimental/__memory | 27 +- libcxx/include/experimental/functional | 1 + libcxx/include/ext/__hash | 1 + libcxx/include/functional | 2922 +---------------- libcxx/include/future | 2 + libcxx/include/map | 1 + libcxx/include/memory | 2 + libcxx/include/module.modulemap | 30 +- libcxx/include/queue | 1 + libcxx/include/set | 1 + libcxx/include/stack | 1 + libcxx/include/system_error | 3 +- libcxx/include/tuple | 6 +- libcxx/include/typeindex | 1 + libcxx/include/unordered_map | 1 + libcxx/include/unordered_set | 1 + .../meta.trans.other/result_of.pass.cpp | 3 +- .../meta.trans.other/result_of11.pass.cpp | 1 + .../tuple.tuple/tuple.cnstr/deduct.pass.cpp | 3 +- 53 files changed, 4514 insertions(+), 3820 deletions(-) create mode 100644 libcxx/include/__functional/binary_function.h create mode 100644 libcxx/include/__functional/binary_negate.h create mode 100644 libcxx/include/__functional/bind.h create mode 100644 libcxx/include/__functional/bind_front.h create mode 100644 libcxx/include/__functional/binder1st.h create mode 100644 libcxx/include/__functional/binder2nd.h create mode 100644 libcxx/include/__functional/default_searcher.h rename libcxx/include/{__functional_03 => __functional/function.h} (53%) create mode 100644 libcxx/include/__functional/identity.h create mode 100644 libcxx/include/__functional/invoke.h create mode 100644 libcxx/include/__functional/is_transparent.h create mode 100644 libcxx/include/__functional/mem_fn.h create mode 100644 libcxx/include/__functional/mem_fun_ref.h create mode 100644 libcxx/include/__functional/not_fn.h create mode 100644 libcxx/include/__functional/operations.h create mode 100644 libcxx/include/__functional/perfect_forward.h create mode 100644 libcxx/include/__functional/pointer_to_binary_function.h create mode 100644 libcxx/include/__functional/pointer_to_unary_function.h create mode 100644 libcxx/include/__functional/ranges_operations.h create mode 100644 libcxx/include/__functional/reference_wrapper.h create mode 100644 libcxx/include/__functional/unary_negate.h create mode 100644 libcxx/include/__functional/weak_result_type.h delete mode 100644 libcxx/include/__functional_base_03 create mode 100644 libcxx/include/__memory/allocator_arg_t.h create mode 100644 libcxx/include/__memory/uses_allocator.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index ec1e0762c6612..2cbf7c6fe56ca 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -103,12 +103,32 @@ set(files __format/format_error.h __format/format_parse_context.h __function_like.h - __functional_03 __functional_base - __functional_base_03 + __functional/binary_function.h + __functional/binary_negate.h + __functional/bind_front.h + __functional/bind.h + __functional/binder1st.h + __functional/binder2nd.h + __functional/default_searcher.h + __functional/function.h __functional/hash.h + __functional/identity.h + __functional/invoke.h + __functional/is_transparent.h + __functional/mem_fn.h + __functional/mem_fun_ref.h + __functional/not_fn.h + __functional/operations.h + __functional/perfect_forward.h + __functional/pointer_to_binary_function.h + __functional/pointer_to_unary_function.h + __functional/ranges_operations.h + __functional/reference_wrapper.h __functional/unary_function.h + __functional/unary_negate.h __functional/unwrap_ref.h + __functional/weak_result_type.h __hash_table __iterator/advance.h __iterator/back_insert_iterator.h @@ -137,6 +157,7 @@ set(files __memory/addressof.h __memory/allocation_guard.h __memory/allocator.h + __memory/allocator_arg_t.h __memory/allocator_traits.h __memory/auto_ptr.h __memory/compressed_pair.h @@ -148,6 +169,7 @@ set(files __memory/temporary_buffer.h __memory/uninitialized_algorithms.h __memory/unique_ptr.h + __memory/uses_allocator.h __mutex_base __node_handle __nullptr diff --git a/libcxx/include/__functional/binary_function.h b/libcxx/include/__functional/binary_function.h new file mode 100644 index 0000000000000..8ca7b06662ae5 --- /dev/null +++ b/libcxx/include/__functional/binary_function.h @@ -0,0 +1,31 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_BINARY_FUNCTION_H +#define _LIBCPP___FUNCTIONAL_BINARY_FUNCTION_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct _LIBCPP_TEMPLATE_VIS binary_function +{ + typedef _Arg1 first_argument_type; + typedef _Arg2 second_argument_type; + typedef _Result result_type; +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_BINARY_FUNCTION_H diff --git a/libcxx/include/__functional/binary_negate.h b/libcxx/include/__functional/binary_negate.h new file mode 100644 index 0000000000000..4fc3f1ba28750 --- /dev/null +++ b/libcxx/include/__functional/binary_negate.h @@ -0,0 +1,50 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_BINARY_NEGATE_H +#define _LIBCPP___FUNCTIONAL_BINARY_NEGATE_H + +#include <__config> +#include <__functional/binary_function.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS) + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 binary_negate + : public binary_function +{ + _Predicate __pred_; +public: + _LIBCPP_INLINE_VISIBILITY explicit _LIBCPP_CONSTEXPR_AFTER_CXX11 + binary_negate(const _Predicate& __pred) : __pred_(__pred) {} + + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const typename _Predicate::first_argument_type& __x, + const typename _Predicate::second_argument_type& __y) const + {return !__pred_(__x, __y);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX17 inline _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY +binary_negate<_Predicate> +not2(const _Predicate& __pred) {return binary_negate<_Predicate>(__pred);} + +#endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_BINARY_NEGATE_H diff --git a/libcxx/include/__functional/bind.h b/libcxx/include/__functional/bind.h new file mode 100644 index 0000000000000..79dfad723c68f --- /dev/null +++ b/libcxx/include/__functional/bind.h @@ -0,0 +1,386 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_BIND_H +#define _LIBCPP___FUNCTIONAL_BIND_H + +#include <__config> +#include <__functional/weak_result_type.h> +#include <__functional/invoke.h> +#include +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template struct __is_bind_expression : public false_type {}; +template struct _LIBCPP_TEMPLATE_VIS is_bind_expression + : public __is_bind_expression::type> {}; + +#if _LIBCPP_STD_VER > 14 +template +_LIBCPP_INLINE_VAR constexpr size_t is_bind_expression_v = is_bind_expression<_Tp>::value; +#endif + +template struct __is_placeholder : public integral_constant {}; +template struct _LIBCPP_TEMPLATE_VIS is_placeholder + : public __is_placeholder::type> {}; + +#if _LIBCPP_STD_VER > 14 +template +_LIBCPP_INLINE_VAR constexpr size_t is_placeholder_v = is_placeholder<_Tp>::value; +#endif + +namespace placeholders +{ + +template struct __ph {}; + +#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) +_LIBCPP_FUNC_VIS extern const __ph<1> _1; +_LIBCPP_FUNC_VIS extern const __ph<2> _2; +_LIBCPP_FUNC_VIS extern const __ph<3> _3; +_LIBCPP_FUNC_VIS extern const __ph<4> _4; +_LIBCPP_FUNC_VIS extern const __ph<5> _5; +_LIBCPP_FUNC_VIS extern const __ph<6> _6; +_LIBCPP_FUNC_VIS extern const __ph<7> _7; +_LIBCPP_FUNC_VIS extern const __ph<8> _8; +_LIBCPP_FUNC_VIS extern const __ph<9> _9; +_LIBCPP_FUNC_VIS extern const __ph<10> _10; +#else +/* _LIBCPP_INLINE_VAR */ constexpr __ph<1> _1{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<2> _2{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<3> _3{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<4> _4{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<5> _5{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<6> _6{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<7> _7{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<8> _8{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<9> _9{}; +/* _LIBCPP_INLINE_VAR */ constexpr __ph<10> _10{}; +#endif // defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) + +} // placeholders + +template +struct __is_placeholder > + : public integral_constant {}; + + +#ifndef _LIBCPP_CXX03_LANG + +template +inline _LIBCPP_INLINE_VISIBILITY +_Tp& +__mu(reference_wrapper<_Tp> __t, _Uj&) +{ + return __t.get(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __invoke_of<_Ti&, _Uj...>::type +__mu_expand(_Ti& __ti, tuple<_Uj...>& __uj, __tuple_indices<_Indx...>) +{ + return __ti(_VSTD::forward<_Uj>(_VSTD::get<_Indx>(__uj))...); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename _EnableIf +< + is_bind_expression<_Ti>::value, + __invoke_of<_Ti&, _Uj...> +>::type +__mu(_Ti& __ti, tuple<_Uj...>& __uj) +{ + typedef typename __make_tuple_indices::type __indices; + return _VSTD::__mu_expand(__ti, __uj, __indices()); +} + +template +struct __mu_return2 {}; + +template +struct __mu_return2 +{ + typedef typename tuple_element::value - 1, _Uj>::type type; +}; + +template +inline _LIBCPP_INLINE_VISIBILITY +typename enable_if +< + 0 < is_placeholder<_Ti>::value, + typename __mu_return2<0 < is_placeholder<_Ti>::value, _Ti, _Uj>::type +>::type +__mu(_Ti&, _Uj& __uj) +{ + const size_t _Indx = is_placeholder<_Ti>::value - 1; + return _VSTD::forward::type>(_VSTD::get<_Indx>(__uj)); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename enable_if +< + !is_bind_expression<_Ti>::value && + is_placeholder<_Ti>::value == 0 && + !__is_reference_wrapper<_Ti>::value, + _Ti& +>::type +__mu(_Ti& __ti, _Uj&) +{ + return __ti; +} + +template +struct __mu_return_impl; + +template +struct __mu_return_invokable // false +{ + typedef __nat type; +}; + +template +struct __mu_return_invokable +{ + typedef typename __invoke_of<_Ti&, _Uj...>::type type; +}; + +template +struct __mu_return_impl<_Ti, false, true, false, tuple<_Uj...> > + : public __mu_return_invokable<__invokable<_Ti&, _Uj...>::value, _Ti, _Uj...> +{ +}; + +template +struct __mu_return_impl<_Ti, false, false, true, _TupleUj> +{ + typedef typename tuple_element::value - 1, + _TupleUj>::type&& type; +}; + +template +struct __mu_return_impl<_Ti, true, false, false, _TupleUj> +{ + typedef typename _Ti::type& type; +}; + +template +struct __mu_return_impl<_Ti, false, false, false, _TupleUj> +{ + typedef _Ti& type; +}; + +template +struct __mu_return + : public __mu_return_impl<_Ti, + __is_reference_wrapper<_Ti>::value, + is_bind_expression<_Ti>::value, + 0 < is_placeholder<_Ti>::value && + is_placeholder<_Ti>::value <= tuple_size<_TupleUj>::value, + _TupleUj> +{ +}; + +template +struct __is_valid_bind_return +{ + static const bool value = false; +}; + +template +struct __is_valid_bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj> +{ + static const bool value = __invokable<_Fp, + typename __mu_return<_BoundArgs, _TupleUj>::type...>::value; +}; + +template +struct __is_valid_bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj> +{ + static const bool value = __invokable<_Fp, + typename __mu_return::type...>::value; +}; + +template ::value> +struct __bind_return; + +template +struct __bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj, true> +{ + typedef typename __invoke_of + < + _Fp&, + typename __mu_return + < + _BoundArgs, + _TupleUj + >::type... + >::type type; +}; + +template +struct __bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj, true> +{ + typedef typename __invoke_of + < + _Fp&, + typename __mu_return + < + const _BoundArgs, + _TupleUj + >::type... + >::type type; +}; + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __bind_return<_Fp, _BoundArgs, _Args>::type +__apply_functor(_Fp& __f, _BoundArgs& __bound_args, __tuple_indices<_Indx...>, + _Args&& __args) +{ + return _VSTD::__invoke(__f, _VSTD::__mu(_VSTD::get<_Indx>(__bound_args), __args)...); +} + +template +class __bind +#if _LIBCPP_STD_VER <= 17 || !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : public __weak_result_type::type> +#endif +{ +protected: + typedef typename decay<_Fp>::type _Fd; + typedef tuple::type...> _Td; +private: + _Fd __f_; + _Td __bound_args_; + + typedef typename __make_tuple_indices::type __indices; +public: + template ::value && + !is_same::type, + __bind>::value + >::type> + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + explicit __bind(_Gp&& __f, _BA&& ...__bound_args) + : __f_(_VSTD::forward<_Gp>(__f)), + __bound_args_(_VSTD::forward<_BA>(__bound_args)...) {} + + template + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + typename __bind_return<_Fd, _Td, tuple<_Args&&...> >::type + operator()(_Args&& ...__args) + { + return _VSTD::__apply_functor(__f_, __bound_args_, __indices(), + tuple<_Args&&...>(_VSTD::forward<_Args>(__args)...)); + } + + template + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + typename __bind_return >::type + operator()(_Args&& ...__args) const + { + return _VSTD::__apply_functor(__f_, __bound_args_, __indices(), + tuple<_Args&&...>(_VSTD::forward<_Args>(__args)...)); + } +}; + +template +struct __is_bind_expression<__bind<_Fp, _BoundArgs...> > : public true_type {}; + +template +class __bind_r + : public __bind<_Fp, _BoundArgs...> +{ + typedef __bind<_Fp, _BoundArgs...> base; + typedef typename base::_Fd _Fd; + typedef typename base::_Td _Td; +public: + typedef _Rp result_type; + + + template ::value && + !is_same::type, + __bind_r>::value + >::type> + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + explicit __bind_r(_Gp&& __f, _BA&& ...__bound_args) + : base(_VSTD::forward<_Gp>(__f), + _VSTD::forward<_BA>(__bound_args)...) {} + + template + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + typename enable_if + < + is_convertible >::type, + result_type>::value || is_void<_Rp>::value, + result_type + >::type + operator()(_Args&& ...__args) + { + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(static_cast(*this), _VSTD::forward<_Args>(__args)...); + } + + template + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + typename enable_if + < + is_convertible >::type, + result_type>::value || is_void<_Rp>::value, + result_type + >::type + operator()(_Args&& ...__args) const + { + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(static_cast(*this), _VSTD::forward<_Args>(__args)...); + } +}; + +template +struct __is_bind_expression<__bind_r<_Rp, _Fp, _BoundArgs...> > : public true_type {}; + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +__bind<_Fp, _BoundArgs...> +bind(_Fp&& __f, _BoundArgs&&... __bound_args) +{ + typedef __bind<_Fp, _BoundArgs...> type; + return type(_VSTD::forward<_Fp>(__f), _VSTD::forward<_BoundArgs>(__bound_args)...); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +__bind_r<_Rp, _Fp, _BoundArgs...> +bind(_Fp&& __f, _BoundArgs&&... __bound_args) +{ + typedef __bind_r<_Rp, _Fp, _BoundArgs...> type; + return type(_VSTD::forward<_Fp>(__f), _VSTD::forward<_BoundArgs>(__bound_args)...); +} + +#endif // _LIBCPP_CXX03_LANG + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_BIND_H diff --git a/libcxx/include/__functional/bind_front.h b/libcxx/include/__functional/bind_front.h new file mode 100644 index 0000000000000..8690499f2b0c9 --- /dev/null +++ b/libcxx/include/__functional/bind_front.h @@ -0,0 +1,52 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_BIND_FRONT_H +#define _LIBCPP___FUNCTIONAL_BIND_FRONT_H + +#include <__config> +#include <__functional/perfect_forward.h> +#include <__functional/invoke.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +struct __bind_front_op +{ + template + constexpr static auto __call(_Args&&... __args) + noexcept(noexcept(_VSTD::invoke(_VSTD::forward<_Args>(__args)...))) + -> decltype( _VSTD::invoke(_VSTD::forward<_Args>(__args)...)) + { return _VSTD::invoke(_VSTD::forward<_Args>(__args)...); } +}; + +template, _Fn>, + is_move_constructible>, + is_constructible, _Args>..., + is_move_constructible>... + >::value>> +constexpr auto bind_front(_Fn&& __f, _Args&&... __args) +{ + return __perfect_forward<__bind_front_op, _Fn, _Args...>(_VSTD::forward<_Fn>(__f), + _VSTD::forward<_Args>(__args)...); +} + +#endif // _LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_BIND_FRONT_H diff --git a/libcxx/include/__functional/binder1st.h b/libcxx/include/__functional/binder1st.h new file mode 100644 index 0000000000000..5dd8f5cf01553 --- /dev/null +++ b/libcxx/include/__functional/binder1st.h @@ -0,0 +1,54 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_BINDER1ST_H +#define _LIBCPP___FUNCTIONAL_BINDER1ST_H + +#include <__config> +#include <__functional/unary_function.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binder1st + : public unary_function +{ +protected: + __Operation op; + typename __Operation::first_argument_type value; +public: + _LIBCPP_INLINE_VISIBILITY binder1st(const __Operation& __x, + const typename __Operation::first_argument_type __y) + : op(__x), value(__y) {} + _LIBCPP_INLINE_VISIBILITY typename __Operation::result_type operator() + (typename __Operation::second_argument_type& __x) const + {return op(value, __x);} + _LIBCPP_INLINE_VISIBILITY typename __Operation::result_type operator() + (const typename __Operation::second_argument_type& __x) const + {return op(value, __x);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +binder1st<__Operation> +bind1st(const __Operation& __op, const _Tp& __x) + {return binder1st<__Operation>(__op, __x);} + +#endif // _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_BINDER1ST_H diff --git a/libcxx/include/__functional/binder2nd.h b/libcxx/include/__functional/binder2nd.h new file mode 100644 index 0000000000000..3ed5f5bf45408 --- /dev/null +++ b/libcxx/include/__functional/binder2nd.h @@ -0,0 +1,54 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_BINDER2ND_H +#define _LIBCPP___FUNCTIONAL_BINDER2ND_H + +#include <__config> +#include <__functional/unary_function.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binder2nd + : public unary_function +{ +protected: + __Operation op; + typename __Operation::second_argument_type value; +public: + _LIBCPP_INLINE_VISIBILITY + binder2nd(const __Operation& __x, const typename __Operation::second_argument_type __y) + : op(__x), value(__y) {} + _LIBCPP_INLINE_VISIBILITY typename __Operation::result_type operator() + ( typename __Operation::first_argument_type& __x) const + {return op(__x, value);} + _LIBCPP_INLINE_VISIBILITY typename __Operation::result_type operator() + (const typename __Operation::first_argument_type& __x) const + {return op(__x, value);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +binder2nd<__Operation> +bind2nd(const __Operation& __op, const _Tp& __x) + {return binder2nd<__Operation>(__op, __x);} + +#endif // _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_BINDER2ND_H diff --git a/libcxx/include/__functional/default_searcher.h b/libcxx/include/__functional/default_searcher.h new file mode 100644 index 0000000000000..1acbc1883afc3 --- /dev/null +++ b/libcxx/include/__functional/default_searcher.h @@ -0,0 +1,56 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_DEFAULT_SEARCHER_H +#define _LIBCPP___FUNCTIONAL_DEFAULT_SEARCHER_H + +#include <__algorithm/search.h> +#include <__config> +#include <__functional/operations.h> +#include <__iterator/iterator_traits.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +// default searcher +template> +class _LIBCPP_TEMPLATE_VIS default_searcher { +public: + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + default_searcher(_ForwardIterator __f, _ForwardIterator __l, + _BinaryPredicate __p = _BinaryPredicate()) + : __first_(__f), __last_(__l), __pred_(__p) {} + + template + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + pair<_ForwardIterator2, _ForwardIterator2> + operator () (_ForwardIterator2 __f, _ForwardIterator2 __l) const + { + return _VSTD::__search(__f, __l, __first_, __last_, __pred_, + typename iterator_traits<_ForwardIterator>::iterator_category(), + typename iterator_traits<_ForwardIterator2>::iterator_category()); + } + +private: + _ForwardIterator __first_; + _ForwardIterator __last_; + _BinaryPredicate __pred_; + }; + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_DEFAULT_SEARCHER_H diff --git a/libcxx/include/__functional_03 b/libcxx/include/__functional/function.h similarity index 53% rename from libcxx/include/__functional_03 rename to libcxx/include/__functional/function.h index 619d92e5385e0..ba629e1d145e4 100644 --- a/libcxx/include/__functional_03 +++ b/libcxx/include/__functional/function.h @@ -7,15 +7,1229 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP_FUNCTIONAL_03 -#define _LIBCPP_FUNCTIONAL_03 - -// manual variadic expansion for +#ifndef _LIBCPP___FUNCTIONAL_FUNCTION_H +#define _LIBCPP___FUNCTIONAL_FUNCTION_H + +#include <__config> +#include <__functional/binary_function.h> +#include <__functional/invoke.h> +#include <__functional/unary_function.h> +#include <__iterator/iterator_traits.h> +#include <__memory/allocator_traits.h> +#include <__memory/compressed_pair.h> +#include <__memory/shared_ptr.h> +#include +#include // TODO: replace with <__memory/__builtin_new_allocator.h> +#include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif +_LIBCPP_BEGIN_NAMESPACE_STD + +// bad_function_call + +class _LIBCPP_EXCEPTION_ABI bad_function_call + : public exception +{ +#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION +public: + virtual ~bad_function_call() _NOEXCEPT; + + virtual const char* what() const _NOEXCEPT; +#endif +}; + +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY +void __throw_bad_function_call() +{ +#ifndef _LIBCPP_NO_EXCEPTIONS + throw bad_function_call(); +#else + _VSTD::abort(); +#endif +} + +#if defined(_LIBCPP_CXX03_LANG) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) && __has_attribute(deprecated) +# define _LIBCPP_DEPRECATED_CXX03_FUNCTION \ + __attribute__((deprecated("Using std::function in C++03 is not supported anymore. Please upgrade to C++11 or later, or use a different type"))) +#else +# define _LIBCPP_DEPRECATED_CXX03_FUNCTION /* nothing */ +#endif + +template class _LIBCPP_DEPRECATED_CXX03_FUNCTION _LIBCPP_TEMPLATE_VIS function; // undefined + +namespace __function +{ + +template +struct __maybe_derive_from_unary_function +{ +}; + +template +struct __maybe_derive_from_unary_function<_Rp(_A1)> + : public unary_function<_A1, _Rp> +{ +}; + +template +struct __maybe_derive_from_binary_function +{ +}; + +template +struct __maybe_derive_from_binary_function<_Rp(_A1, _A2)> + : public binary_function<_A1, _A2, _Rp> +{ +}; + +template +_LIBCPP_INLINE_VISIBILITY +bool __not_null(_Fp const&) { return true; } + +template +_LIBCPP_INLINE_VISIBILITY +bool __not_null(_Fp* __ptr) { return __ptr; } + +template +_LIBCPP_INLINE_VISIBILITY +bool __not_null(_Ret _Class::*__ptr) { return __ptr; } + +template +_LIBCPP_INLINE_VISIBILITY +bool __not_null(function<_Fp> const& __f) { return !!__f; } + +#ifdef _LIBCPP_HAS_EXTENSION_BLOCKS +template +_LIBCPP_INLINE_VISIBILITY +bool __not_null(_Rp (^__p)(_Args...)) { return __p; } +#endif + +} // namespace __function + +#ifndef _LIBCPP_CXX03_LANG + +namespace __function { + +// __alloc_func holds a functor and an allocator. + +template class __alloc_func; +template +class __default_alloc_func; + +template +class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)> +{ + __compressed_pair<_Fp, _Ap> __f_; + + public: + typedef _LIBCPP_NODEBUG_TYPE _Fp _Target; + typedef _LIBCPP_NODEBUG_TYPE _Ap _Alloc; + + _LIBCPP_INLINE_VISIBILITY + const _Target& __target() const { return __f_.first(); } + + // WIN32 APIs may define __allocator, so use __get_allocator instead. + _LIBCPP_INLINE_VISIBILITY + const _Alloc& __get_allocator() const { return __f_.second(); } + + _LIBCPP_INLINE_VISIBILITY + explicit __alloc_func(_Target&& __f) + : __f_(piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__f)), + _VSTD::forward_as_tuple()) + { + } + + _LIBCPP_INLINE_VISIBILITY + explicit __alloc_func(const _Target& __f, const _Alloc& __a) + : __f_(piecewise_construct, _VSTD::forward_as_tuple(__f), + _VSTD::forward_as_tuple(__a)) + { + } + + _LIBCPP_INLINE_VISIBILITY + explicit __alloc_func(const _Target& __f, _Alloc&& __a) + : __f_(piecewise_construct, _VSTD::forward_as_tuple(__f), + _VSTD::forward_as_tuple(_VSTD::move(__a))) + { + } + + _LIBCPP_INLINE_VISIBILITY + explicit __alloc_func(_Target&& __f, _Alloc&& __a) + : __f_(piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__f)), + _VSTD::forward_as_tuple(_VSTD::move(__a))) + { + } + + _LIBCPP_INLINE_VISIBILITY + _Rp operator()(_ArgTypes&&... __arg) + { + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(__f_.first(), + _VSTD::forward<_ArgTypes>(__arg)...); + } + + _LIBCPP_INLINE_VISIBILITY + __alloc_func* __clone() const + { + typedef allocator_traits<_Alloc> __alloc_traits; + typedef + typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type + _AA; + _AA __a(__f_.second()); + typedef __allocator_destructor<_AA> _Dp; + unique_ptr<__alloc_func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); + ::new ((void*)__hold.get()) __alloc_func(__f_.first(), _Alloc(__a)); + return __hold.release(); + } + + _LIBCPP_INLINE_VISIBILITY + void destroy() _NOEXCEPT { __f_.~__compressed_pair<_Target, _Alloc>(); } + + static void __destroy_and_delete(__alloc_func* __f) { + typedef allocator_traits<_Alloc> __alloc_traits; + typedef typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type + _FunAlloc; + _FunAlloc __a(__f->__get_allocator()); + __f->destroy(); + __a.deallocate(__f, 1); + } +}; + +template +class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> { + _Fp __f_; + +public: + typedef _LIBCPP_NODEBUG_TYPE _Fp _Target; + + _LIBCPP_INLINE_VISIBILITY + const _Target& __target() const { return __f_; } + + _LIBCPP_INLINE_VISIBILITY + explicit __default_alloc_func(_Target&& __f) : __f_(_VSTD::move(__f)) {} + + _LIBCPP_INLINE_VISIBILITY + explicit __default_alloc_func(const _Target& __f) : __f_(__f) {} + + _LIBCPP_INLINE_VISIBILITY + _Rp operator()(_ArgTypes&&... __arg) { + typedef __invoke_void_return_wrapper<_Rp> _Invoker; + return _Invoker::__call(__f_, _VSTD::forward<_ArgTypes>(__arg)...); + } + + _LIBCPP_INLINE_VISIBILITY + __default_alloc_func* __clone() const { + __builtin_new_allocator::__holder_t __hold = + __builtin_new_allocator::__allocate_type<__default_alloc_func>(1); + __default_alloc_func* __res = + ::new ((void*)__hold.get()) __default_alloc_func(__f_); + (void)__hold.release(); + return __res; + } + + _LIBCPP_INLINE_VISIBILITY + void destroy() _NOEXCEPT { __f_.~_Target(); } + + static void __destroy_and_delete(__default_alloc_func* __f) { + __f->destroy(); + __builtin_new_allocator::__deallocate_type<__default_alloc_func>(__f, 1); + } +}; + +// __base provides an abstract interface for copyable functors. + +template class _LIBCPP_TEMPLATE_VIS __base; + +template +class __base<_Rp(_ArgTypes...)> +{ + __base(const __base&); + __base& operator=(const __base&); +public: + _LIBCPP_INLINE_VISIBILITY __base() {} + _LIBCPP_INLINE_VISIBILITY virtual ~__base() {} + virtual __base* __clone() const = 0; + virtual void __clone(__base*) const = 0; + virtual void destroy() _NOEXCEPT = 0; + virtual void destroy_deallocate() _NOEXCEPT = 0; + virtual _Rp operator()(_ArgTypes&& ...) = 0; +#ifndef _LIBCPP_NO_RTTI + virtual const void* target(const type_info&) const _NOEXCEPT = 0; + virtual const std::type_info& target_type() const _NOEXCEPT = 0; +#endif // _LIBCPP_NO_RTTI +}; + +// __func implements __base for a given functor type. + +template class __func; + +template +class __func<_Fp, _Alloc, _Rp(_ArgTypes...)> + : public __base<_Rp(_ArgTypes...)> +{ + __alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> __f_; +public: + _LIBCPP_INLINE_VISIBILITY + explicit __func(_Fp&& __f) + : __f_(_VSTD::move(__f)) {} + + _LIBCPP_INLINE_VISIBILITY + explicit __func(const _Fp& __f, const _Alloc& __a) + : __f_(__f, __a) {} + + _LIBCPP_INLINE_VISIBILITY + explicit __func(const _Fp& __f, _Alloc&& __a) + : __f_(__f, _VSTD::move(__a)) {} + + _LIBCPP_INLINE_VISIBILITY + explicit __func(_Fp&& __f, _Alloc&& __a) + : __f_(_VSTD::move(__f), _VSTD::move(__a)) {} + + virtual __base<_Rp(_ArgTypes...)>* __clone() const; + virtual void __clone(__base<_Rp(_ArgTypes...)>*) const; + virtual void destroy() _NOEXCEPT; + virtual void destroy_deallocate() _NOEXCEPT; + virtual _Rp operator()(_ArgTypes&&... __arg); +#ifndef _LIBCPP_NO_RTTI + virtual const void* target(const type_info&) const _NOEXCEPT; + virtual const std::type_info& target_type() const _NOEXCEPT; +#endif // _LIBCPP_NO_RTTI +}; + +template +__base<_Rp(_ArgTypes...)>* +__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone() const +{ + typedef allocator_traits<_Alloc> __alloc_traits; + typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap; + _Ap __a(__f_.__get_allocator()); + typedef __allocator_destructor<_Ap> _Dp; + unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); + ::new ((void*)__hold.get()) __func(__f_.__target(), _Alloc(__a)); + return __hold.release(); +} + +template +void +__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone(__base<_Rp(_ArgTypes...)>* __p) const +{ + ::new ((void*)__p) __func(__f_.__target(), __f_.__get_allocator()); +} + +template +void +__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy() _NOEXCEPT +{ + __f_.destroy(); +} + +template +void +__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy_deallocate() _NOEXCEPT +{ + typedef allocator_traits<_Alloc> __alloc_traits; + typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap; + _Ap __a(__f_.__get_allocator()); + __f_.destroy(); + __a.deallocate(this, 1); +} + +template +_Rp +__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::operator()(_ArgTypes&& ... __arg) +{ + return __f_(_VSTD::forward<_ArgTypes>(__arg)...); +} + +#ifndef _LIBCPP_NO_RTTI + +template +const void* +__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target(const type_info& __ti) const _NOEXCEPT +{ + if (__ti == typeid(_Fp)) + return &__f_.__target(); + return nullptr; +} + +template +const std::type_info& +__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target_type() const _NOEXCEPT +{ + return typeid(_Fp); +} + +#endif // _LIBCPP_NO_RTTI + +// __value_func creates a value-type from a __func. + +template class __value_func; + +template class __value_func<_Rp(_ArgTypes...)> +{ + typename aligned_storage<3 * sizeof(void*)>::type __buf_; + + typedef __base<_Rp(_ArgTypes...)> __func; + __func* __f_; + + _LIBCPP_NO_CFI static __func* __as_base(void* p) + { + return reinterpret_cast<__func*>(p); + } + + public: + _LIBCPP_INLINE_VISIBILITY + __value_func() _NOEXCEPT : __f_(nullptr) {} + + template + _LIBCPP_INLINE_VISIBILITY __value_func(_Fp&& __f, const _Alloc& __a) + : __f_(nullptr) + { + typedef allocator_traits<_Alloc> __alloc_traits; + typedef __function::__func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun; + typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type + _FunAlloc; + + if (__function::__not_null(__f)) + { + _FunAlloc __af(__a); + if (sizeof(_Fun) <= sizeof(__buf_) && + is_nothrow_copy_constructible<_Fp>::value && + is_nothrow_copy_constructible<_FunAlloc>::value) + { + __f_ = + ::new ((void*)&__buf_) _Fun(_VSTD::move(__f), _Alloc(__af)); + } + else + { + typedef __allocator_destructor<_FunAlloc> _Dp; + unique_ptr<__func, _Dp> __hold(__af.allocate(1), _Dp(__af, 1)); + ::new ((void*)__hold.get()) _Fun(_VSTD::move(__f), _Alloc(__a)); + __f_ = __hold.release(); + } + } + } + + template ::type, __value_func>::value>::type> + _LIBCPP_INLINE_VISIBILITY explicit __value_func(_Fp&& __f) + : __value_func(_VSTD::forward<_Fp>(__f), allocator<_Fp>()) {} + + _LIBCPP_INLINE_VISIBILITY + __value_func(const __value_func& __f) + { + if (__f.__f_ == nullptr) + __f_ = nullptr; + else if ((void*)__f.__f_ == &__f.__buf_) + { + __f_ = __as_base(&__buf_); + __f.__f_->__clone(__f_); + } + else + __f_ = __f.__f_->__clone(); + } + + _LIBCPP_INLINE_VISIBILITY + __value_func(__value_func&& __f) _NOEXCEPT + { + if (__f.__f_ == nullptr) + __f_ = nullptr; + else if ((void*)__f.__f_ == &__f.__buf_) + { + __f_ = __as_base(&__buf_); + __f.__f_->__clone(__f_); + } + else + { + __f_ = __f.__f_; + __f.__f_ = nullptr; + } + } + + _LIBCPP_INLINE_VISIBILITY + ~__value_func() + { + if ((void*)__f_ == &__buf_) + __f_->destroy(); + else if (__f_) + __f_->destroy_deallocate(); + } + + _LIBCPP_INLINE_VISIBILITY + __value_func& operator=(__value_func&& __f) + { + *this = nullptr; + if (__f.__f_ == nullptr) + __f_ = nullptr; + else if ((void*)__f.__f_ == &__f.__buf_) + { + __f_ = __as_base(&__buf_); + __f.__f_->__clone(__f_); + } + else + { + __f_ = __f.__f_; + __f.__f_ = nullptr; + } + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + __value_func& operator=(nullptr_t) + { + __func* __f = __f_; + __f_ = nullptr; + if ((void*)__f == &__buf_) + __f->destroy(); + else if (__f) + __f->destroy_deallocate(); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + _Rp operator()(_ArgTypes&&... __args) const + { + if (__f_ == nullptr) + __throw_bad_function_call(); + return (*__f_)(_VSTD::forward<_ArgTypes>(__args)...); + } + + _LIBCPP_INLINE_VISIBILITY + void swap(__value_func& __f) _NOEXCEPT + { + if (&__f == this) + return; + if ((void*)__f_ == &__buf_ && (void*)__f.__f_ == &__f.__buf_) + { + typename aligned_storage::type __tempbuf; + __func* __t = __as_base(&__tempbuf); + __f_->__clone(__t); + __f_->destroy(); + __f_ = nullptr; + __f.__f_->__clone(__as_base(&__buf_)); + __f.__f_->destroy(); + __f.__f_ = nullptr; + __f_ = __as_base(&__buf_); + __t->__clone(__as_base(&__f.__buf_)); + __t->destroy(); + __f.__f_ = __as_base(&__f.__buf_); + } + else if ((void*)__f_ == &__buf_) + { + __f_->__clone(__as_base(&__f.__buf_)); + __f_->destroy(); + __f_ = __f.__f_; + __f.__f_ = __as_base(&__f.__buf_); + } + else if ((void*)__f.__f_ == &__f.__buf_) + { + __f.__f_->__clone(__as_base(&__buf_)); + __f.__f_->destroy(); + __f.__f_ = __f_; + __f_ = __as_base(&__buf_); + } + else + _VSTD::swap(__f_, __f.__f_); + } + + _LIBCPP_INLINE_VISIBILITY + explicit operator bool() const _NOEXCEPT { return __f_ != nullptr; } + +#ifndef _LIBCPP_NO_RTTI + _LIBCPP_INLINE_VISIBILITY + const std::type_info& target_type() const _NOEXCEPT + { + if (__f_ == nullptr) + return typeid(void); + return __f_->target_type(); + } + + template + _LIBCPP_INLINE_VISIBILITY const _Tp* target() const _NOEXCEPT + { + if (__f_ == nullptr) + return nullptr; + return (const _Tp*)__f_->target(typeid(_Tp)); + } +#endif // _LIBCPP_NO_RTTI +}; + +// Storage for a functor object, to be used with __policy to manage copy and +// destruction. +union __policy_storage +{ + mutable char __small[sizeof(void*) * 2]; + void* __large; +}; + +// True if _Fun can safely be held in __policy_storage.__small. +template +struct __use_small_storage + : public integral_constant< + bool, sizeof(_Fun) <= sizeof(__policy_storage) && + _LIBCPP_ALIGNOF(_Fun) <= _LIBCPP_ALIGNOF(__policy_storage) && + is_trivially_copy_constructible<_Fun>::value && + is_trivially_destructible<_Fun>::value> {}; + +// Policy contains information about how to copy, destroy, and move the +// underlying functor. You can think of it as a vtable of sorts. +struct __policy +{ + // Used to copy or destroy __large values. null for trivial objects. + void* (*const __clone)(const void*); + void (*const __destroy)(void*); + + // True if this is the null policy (no value). + const bool __is_null; + + // The target type. May be null if RTTI is disabled. + const std::type_info* const __type_info; + + // Returns a pointer to a static policy object suitable for the functor + // type. + template + _LIBCPP_INLINE_VISIBILITY static const __policy* __create() + { + return __choose_policy<_Fun>(__use_small_storage<_Fun>()); + } + + _LIBCPP_INLINE_VISIBILITY + static const __policy* __create_empty() + { + static const _LIBCPP_CONSTEXPR __policy __policy_ = {nullptr, nullptr, + true, +#ifndef _LIBCPP_NO_RTTI + &typeid(void) +#else + nullptr +#endif + }; + return &__policy_; + } + + private: + template static void* __large_clone(const void* __s) + { + const _Fun* __f = static_cast(__s); + return __f->__clone(); + } + + template + static void __large_destroy(void* __s) { + _Fun::__destroy_and_delete(static_cast<_Fun*>(__s)); + } + + template + _LIBCPP_INLINE_VISIBILITY static const __policy* + __choose_policy(/* is_small = */ false_type) { + static const _LIBCPP_CONSTEXPR __policy __policy_ = { + &__large_clone<_Fun>, &__large_destroy<_Fun>, false, +#ifndef _LIBCPP_NO_RTTI + &typeid(typename _Fun::_Target) +#else + nullptr +#endif + }; + return &__policy_; + } + + template + _LIBCPP_INLINE_VISIBILITY static const __policy* + __choose_policy(/* is_small = */ true_type) + { + static const _LIBCPP_CONSTEXPR __policy __policy_ = { + nullptr, nullptr, false, +#ifndef _LIBCPP_NO_RTTI + &typeid(typename _Fun::_Target) +#else + nullptr +#endif + }; + return &__policy_; + } +}; + +// Used to choose between perfect forwarding or pass-by-value. Pass-by-value is +// faster for types that can be passed in registers. +template +using __fast_forward = + typename conditional::value, _Tp, _Tp&&>::type; + +// __policy_invoker calls an instance of __alloc_func held in __policy_storage. + +template struct __policy_invoker; + +template +struct __policy_invoker<_Rp(_ArgTypes...)> +{ + typedef _Rp (*__Call)(const __policy_storage*, + __fast_forward<_ArgTypes>...); + + __Call __call_; + + // Creates an invoker that throws bad_function_call. + _LIBCPP_INLINE_VISIBILITY + __policy_invoker() : __call_(&__call_empty) {} + + // Creates an invoker that calls the given instance of __func. + template + _LIBCPP_INLINE_VISIBILITY static __policy_invoker __create() + { + return __policy_invoker(&__call_impl<_Fun>); + } + + private: + _LIBCPP_INLINE_VISIBILITY + explicit __policy_invoker(__Call __c) : __call_(__c) {} + + static _Rp __call_empty(const __policy_storage*, + __fast_forward<_ArgTypes>...) + { + __throw_bad_function_call(); + } + + template + static _Rp __call_impl(const __policy_storage* __buf, + __fast_forward<_ArgTypes>... __args) + { + _Fun* __f = reinterpret_cast<_Fun*>(__use_small_storage<_Fun>::value + ? &__buf->__small + : __buf->__large); + return (*__f)(_VSTD::forward<_ArgTypes>(__args)...); + } +}; + +// __policy_func uses a __policy and __policy_invoker to create a type-erased, +// copyable functor. + +template class __policy_func; + +template class __policy_func<_Rp(_ArgTypes...)> +{ + // Inline storage for small objects. + __policy_storage __buf_; + + // Calls the value stored in __buf_. This could technically be part of + // policy, but storing it here eliminates a level of indirection inside + // operator(). + typedef __function::__policy_invoker<_Rp(_ArgTypes...)> __invoker; + __invoker __invoker_; + + // The policy that describes how to move / copy / destroy __buf_. Never + // null, even if the function is empty. + const __policy* __policy_; + + public: + _LIBCPP_INLINE_VISIBILITY + __policy_func() : __policy_(__policy::__create_empty()) {} + + template + _LIBCPP_INLINE_VISIBILITY __policy_func(_Fp&& __f, const _Alloc& __a) + : __policy_(__policy::__create_empty()) + { + typedef __alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun; + typedef allocator_traits<_Alloc> __alloc_traits; + typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type + _FunAlloc; + + if (__function::__not_null(__f)) + { + __invoker_ = __invoker::template __create<_Fun>(); + __policy_ = __policy::__create<_Fun>(); + + _FunAlloc __af(__a); + if (__use_small_storage<_Fun>()) + { + ::new ((void*)&__buf_.__small) + _Fun(_VSTD::move(__f), _Alloc(__af)); + } + else + { + typedef __allocator_destructor<_FunAlloc> _Dp; + unique_ptr<_Fun, _Dp> __hold(__af.allocate(1), _Dp(__af, 1)); + ::new ((void*)__hold.get()) + _Fun(_VSTD::move(__f), _Alloc(__af)); + __buf_.__large = __hold.release(); + } + } + } + + template ::type, __policy_func>::value>::type> + _LIBCPP_INLINE_VISIBILITY explicit __policy_func(_Fp&& __f) + : __policy_(__policy::__create_empty()) { + typedef __default_alloc_func<_Fp, _Rp(_ArgTypes...)> _Fun; + + if (__function::__not_null(__f)) { + __invoker_ = __invoker::template __create<_Fun>(); + __policy_ = __policy::__create<_Fun>(); + if (__use_small_storage<_Fun>()) { + ::new ((void*)&__buf_.__small) _Fun(_VSTD::move(__f)); + } else { + __builtin_new_allocator::__holder_t __hold = + __builtin_new_allocator::__allocate_type<_Fun>(1); + __buf_.__large = ::new ((void*)__hold.get()) _Fun(_VSTD::move(__f)); + (void)__hold.release(); + } + } + } + + _LIBCPP_INLINE_VISIBILITY + __policy_func(const __policy_func& __f) + : __buf_(__f.__buf_), __invoker_(__f.__invoker_), + __policy_(__f.__policy_) + { + if (__policy_->__clone) + __buf_.__large = __policy_->__clone(__f.__buf_.__large); + } + + _LIBCPP_INLINE_VISIBILITY + __policy_func(__policy_func&& __f) + : __buf_(__f.__buf_), __invoker_(__f.__invoker_), + __policy_(__f.__policy_) + { + if (__policy_->__destroy) + { + __f.__policy_ = __policy::__create_empty(); + __f.__invoker_ = __invoker(); + } + } + + _LIBCPP_INLINE_VISIBILITY + ~__policy_func() + { + if (__policy_->__destroy) + __policy_->__destroy(__buf_.__large); + } + + _LIBCPP_INLINE_VISIBILITY + __policy_func& operator=(__policy_func&& __f) + { + *this = nullptr; + __buf_ = __f.__buf_; + __invoker_ = __f.__invoker_; + __policy_ = __f.__policy_; + __f.__policy_ = __policy::__create_empty(); + __f.__invoker_ = __invoker(); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + __policy_func& operator=(nullptr_t) + { + const __policy* __p = __policy_; + __policy_ = __policy::__create_empty(); + __invoker_ = __invoker(); + if (__p->__destroy) + __p->__destroy(__buf_.__large); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + _Rp operator()(_ArgTypes&&... __args) const + { + return __invoker_.__call_(_VSTD::addressof(__buf_), + _VSTD::forward<_ArgTypes>(__args)...); + } + + _LIBCPP_INLINE_VISIBILITY + void swap(__policy_func& __f) + { + _VSTD::swap(__invoker_, __f.__invoker_); + _VSTD::swap(__policy_, __f.__policy_); + _VSTD::swap(__buf_, __f.__buf_); + } + + _LIBCPP_INLINE_VISIBILITY + explicit operator bool() const _NOEXCEPT + { + return !__policy_->__is_null; + } + +#ifndef _LIBCPP_NO_RTTI + _LIBCPP_INLINE_VISIBILITY + const std::type_info& target_type() const _NOEXCEPT + { + return *__policy_->__type_info; + } + + template + _LIBCPP_INLINE_VISIBILITY const _Tp* target() const _NOEXCEPT + { + if (__policy_->__is_null || typeid(_Tp) != *__policy_->__type_info) + return nullptr; + if (__policy_->__clone) // Out of line storage. + return reinterpret_cast(__buf_.__large); + else + return reinterpret_cast(&__buf_.__small); + } +#endif // _LIBCPP_NO_RTTI +}; + +#if defined(_LIBCPP_HAS_BLOCKS_RUNTIME) && !defined(_LIBCPP_HAS_OBJC_ARC) + +extern "C" void *_Block_copy(const void *); +extern "C" void _Block_release(const void *); + +template +class __func<_Rp1(^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> + : public __base<_Rp(_ArgTypes...)> +{ + typedef _Rp1(^__block_type)(_ArgTypes1...); + __block_type __f_; + +public: + _LIBCPP_INLINE_VISIBILITY + explicit __func(__block_type const& __f) + : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) + { } + + // [TODO] add && to save on a retain + + _LIBCPP_INLINE_VISIBILITY + explicit __func(__block_type __f, const _Alloc& /* unused */) + : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) + { } + + virtual __base<_Rp(_ArgTypes...)>* __clone() const { + _LIBCPP_ASSERT(false, + "Block pointers are just pointers, so they should always fit into " + "std::function's small buffer optimization. This function should " + "never be invoked."); + return nullptr; + } + + virtual void __clone(__base<_Rp(_ArgTypes...)>* __p) const { + ::new ((void*)__p) __func(__f_); + } + + virtual void destroy() _NOEXCEPT { + if (__f_) + _Block_release(__f_); + __f_ = 0; + } + + virtual void destroy_deallocate() _NOEXCEPT { + _LIBCPP_ASSERT(false, + "Block pointers are just pointers, so they should always fit into " + "std::function's small buffer optimization. This function should " + "never be invoked."); + } + + virtual _Rp operator()(_ArgTypes&& ... __arg) { + return _VSTD::__invoke(__f_, _VSTD::forward<_ArgTypes>(__arg)...); + } + +#ifndef _LIBCPP_NO_RTTI + virtual const void* target(type_info const& __ti) const _NOEXCEPT { + if (__ti == typeid(__func::__block_type)) + return &__f_; + return (const void*)nullptr; + } + + virtual const std::type_info& target_type() const _NOEXCEPT { + return typeid(__func::__block_type); + } +#endif // _LIBCPP_NO_RTTI +}; + +#endif // _LIBCPP_HAS_EXTENSION_BLOCKS && !_LIBCPP_HAS_OBJC_ARC + +} // __function + +template +class _LIBCPP_TEMPLATE_VIS function<_Rp(_ArgTypes...)> +#if _LIBCPP_STD_VER <= 17 || !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : public __function::__maybe_derive_from_unary_function<_Rp(_ArgTypes...)>, + public __function::__maybe_derive_from_binary_function<_Rp(_ArgTypes...)> +#endif +{ +#ifndef _LIBCPP_ABI_OPTIMIZED_FUNCTION + typedef __function::__value_func<_Rp(_ArgTypes...)> __func; +#else + typedef __function::__policy_func<_Rp(_ArgTypes...)> __func; +#endif + + __func __f_; + + template , function>, + __invokable<_Fp, _ArgTypes...> + >::value> + struct __callable; + template + struct __callable<_Fp, true> + { + static const bool value = is_void<_Rp>::value || + __is_core_convertible::type, + _Rp>::value; + }; + template + struct __callable<_Fp, false> + { + static const bool value = false; + }; + + template + using _EnableIfLValueCallable = typename enable_if<__callable<_Fp&>::value>::type; +public: + typedef _Rp result_type; + + // construct/copy/destroy: + _LIBCPP_INLINE_VISIBILITY + function() _NOEXCEPT { } + _LIBCPP_INLINE_VISIBILITY + function(nullptr_t) _NOEXCEPT {} + function(const function&); + function(function&&) _NOEXCEPT; + template> + function(_Fp); + +#if _LIBCPP_STD_VER <= 14 + template + _LIBCPP_INLINE_VISIBILITY + function(allocator_arg_t, const _Alloc&) _NOEXCEPT {} + template + _LIBCPP_INLINE_VISIBILITY + function(allocator_arg_t, const _Alloc&, nullptr_t) _NOEXCEPT {} + template + function(allocator_arg_t, const _Alloc&, const function&); + template + function(allocator_arg_t, const _Alloc&, function&&); + template> + function(allocator_arg_t, const _Alloc& __a, _Fp __f); +#endif + + function& operator=(const function&); + function& operator=(function&&) _NOEXCEPT; + function& operator=(nullptr_t) _NOEXCEPT; + template::type>> + function& operator=(_Fp&&); + + ~function(); + + // function modifiers: + void swap(function&) _NOEXCEPT; + +#if _LIBCPP_STD_VER <= 14 + template + _LIBCPP_INLINE_VISIBILITY + void assign(_Fp&& __f, const _Alloc& __a) + {function(allocator_arg, __a, _VSTD::forward<_Fp>(__f)).swap(*this);} +#endif + + // function capacity: + _LIBCPP_INLINE_VISIBILITY + explicit operator bool() const _NOEXCEPT { + return static_cast(__f_); + } + + // deleted overloads close possible hole in the type system + template + bool operator==(const function<_R2(_ArgTypes2...)>&) const = delete; + template + bool operator!=(const function<_R2(_ArgTypes2...)>&) const = delete; +public: + // function invocation: + _Rp operator()(_ArgTypes...) const; + +#ifndef _LIBCPP_NO_RTTI + // function target access: + const std::type_info& target_type() const _NOEXCEPT; + template _Tp* target() _NOEXCEPT; + template const _Tp* target() const _NOEXCEPT; +#endif // _LIBCPP_NO_RTTI +}; + +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template +function(_Rp(*)(_Ap...)) -> function<_Rp(_Ap...)>; + +template +struct __strip_signature; + +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...)> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) const> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile> { using type = _Rp(_Ap...); }; + +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) &> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) const &> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile &> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile &> { using type = _Rp(_Ap...); }; + +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) noexcept> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) const noexcept> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile noexcept> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile noexcept> { using type = _Rp(_Ap...); }; + +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) & noexcept> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) const & noexcept> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile & noexcept> { using type = _Rp(_Ap...); }; +template +struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile & noexcept> { using type = _Rp(_Ap...); }; + +template::type> +function(_Fp) -> function<_Stripped>; +#endif // !_LIBCPP_HAS_NO_DEDUCTION_GUIDES + +template +function<_Rp(_ArgTypes...)>::function(const function& __f) : __f_(__f.__f_) {} + +#if _LIBCPP_STD_VER <= 14 +template +template +function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&, + const function& __f) : __f_(__f.__f_) {} +#endif + +template +function<_Rp(_ArgTypes...)>::function(function&& __f) _NOEXCEPT + : __f_(_VSTD::move(__f.__f_)) {} + +#if _LIBCPP_STD_VER <= 14 +template +template +function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&, + function&& __f) + : __f_(_VSTD::move(__f.__f_)) {} +#endif + +template +template +function<_Rp(_ArgTypes...)>::function(_Fp __f) : __f_(_VSTD::move(__f)) {} + +#if _LIBCPP_STD_VER <= 14 +template +template +function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc& __a, + _Fp __f) + : __f_(_VSTD::move(__f), __a) {} +#endif + +template +function<_Rp(_ArgTypes...)>& +function<_Rp(_ArgTypes...)>::operator=(const function& __f) +{ + function(__f).swap(*this); + return *this; +} + +template +function<_Rp(_ArgTypes...)>& +function<_Rp(_ArgTypes...)>::operator=(function&& __f) _NOEXCEPT +{ + __f_ = _VSTD::move(__f.__f_); + return *this; +} + +template +function<_Rp(_ArgTypes...)>& +function<_Rp(_ArgTypes...)>::operator=(nullptr_t) _NOEXCEPT +{ + __f_ = nullptr; + return *this; +} + +template +template +function<_Rp(_ArgTypes...)>& +function<_Rp(_ArgTypes...)>::operator=(_Fp&& __f) +{ + function(_VSTD::forward<_Fp>(__f)).swap(*this); + return *this; +} + +template +function<_Rp(_ArgTypes...)>::~function() {} + +template +void +function<_Rp(_ArgTypes...)>::swap(function& __f) _NOEXCEPT +{ + __f_.swap(__f.__f_); +} + +template +_Rp +function<_Rp(_ArgTypes...)>::operator()(_ArgTypes... __arg) const +{ + return __f_(_VSTD::forward<_ArgTypes>(__arg)...); +} + +#ifndef _LIBCPP_NO_RTTI + +template +const std::type_info& +function<_Rp(_ArgTypes...)>::target_type() const _NOEXCEPT +{ + return __f_.target_type(); +} + +template +template +_Tp* +function<_Rp(_ArgTypes...)>::target() _NOEXCEPT +{ + return (_Tp*)(__f_.template target<_Tp>()); +} + +template +template +const _Tp* +function<_Rp(_ArgTypes...)>::target() const _NOEXCEPT +{ + return __f_.template target<_Tp>(); +} + +#endif // _LIBCPP_NO_RTTI + +template +inline _LIBCPP_INLINE_VISIBILITY +bool +operator==(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT {return !__f;} + +template +inline _LIBCPP_INLINE_VISIBILITY +bool +operator==(nullptr_t, const function<_Rp(_ArgTypes...)>& __f) _NOEXCEPT {return !__f;} + +template +inline _LIBCPP_INLINE_VISIBILITY +bool +operator!=(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT {return (bool)__f;} + +template +inline _LIBCPP_INLINE_VISIBILITY +bool +operator!=(nullptr_t, const function<_Rp(_ArgTypes...)>& __f) _NOEXCEPT {return (bool)__f;} + +template +inline _LIBCPP_INLINE_VISIBILITY +void +swap(function<_Rp(_ArgTypes...)>& __x, function<_Rp(_ArgTypes...)>& __y) _NOEXCEPT +{return __x.swap(__y);} + +#else // _LIBCPP_CXX03_LANG + namespace __function { template class __base; @@ -1588,4 +2802,8 @@ void swap(function<_Fp>& __x, function<_Fp>& __y) {return __x.swap(__y);} -#endif // _LIBCPP_FUNCTIONAL_03 +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_FUNCTION_H diff --git a/libcxx/include/__functional/identity.h b/libcxx/include/__functional/identity.h new file mode 100644 index 0000000000000..6b8346b3b2a7a --- /dev/null +++ b/libcxx/include/__functional/identity.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_IDENTITY_H +#define _LIBCPP___FUNCTIONAL_IDENTITY_H + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +struct identity { + template + _LIBCPP_NODISCARD_EXT constexpr _Tp&& operator()(_Tp&& __t) const noexcept + { + return _VSTD::forward<_Tp>(__t); + } + + using is_transparent = void; +}; +#endif // _LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_IDENTITY_H diff --git a/libcxx/include/__functional/invoke.h b/libcxx/include/__functional/invoke.h new file mode 100644 index 0000000000000..0e167c75d6908 --- /dev/null +++ b/libcxx/include/__functional/invoke.h @@ -0,0 +1,100 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_INVOKE_H +#define _LIBCPP___FUNCTIONAL_INVOKE_H + +#include <__config> +#include <__functional/weak_result_type.h> +#include <__utility/forward.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template ::value> +struct __invoke_void_return_wrapper +{ +#ifndef _LIBCPP_CXX03_LANG + template + static _Ret __call(_Args&&... __args) { + return _VSTD::__invoke(_VSTD::forward<_Args>(__args)...); + } +#else + template + static _Ret __call(_Fn __f) { + return _VSTD::__invoke(__f); + } + + template + static _Ret __call(_Fn __f, _A0& __a0) { + return _VSTD::__invoke(__f, __a0); + } + + template + static _Ret __call(_Fn __f, _A0& __a0, _A1& __a1) { + return _VSTD::__invoke(__f, __a0, __a1); + } + + template + static _Ret __call(_Fn __f, _A0& __a0, _A1& __a1, _A2& __a2){ + return _VSTD::__invoke(__f, __a0, __a1, __a2); + } +#endif +}; + +template +struct __invoke_void_return_wrapper<_Ret, true> +{ +#ifndef _LIBCPP_CXX03_LANG + template + static void __call(_Args&&... __args) { + _VSTD::__invoke(_VSTD::forward<_Args>(__args)...); + } +#else + template + static void __call(_Fn __f) { + _VSTD::__invoke(__f); + } + + template + static void __call(_Fn __f, _A0& __a0) { + _VSTD::__invoke(__f, __a0); + } + + template + static void __call(_Fn __f, _A0& __a0, _A1& __a1) { + _VSTD::__invoke(__f, __a0, __a1); + } + + template + static void __call(_Fn __f, _A0& __a0, _A1& __a1, _A2& __a2) { + _VSTD::__invoke(__f, __a0, __a1, __a2); + } +#endif +}; + +#if _LIBCPP_STD_VER > 14 + +template +_LIBCPP_CONSTEXPR_AFTER_CXX17 invoke_result_t<_Fn, _Args...> +invoke(_Fn&& __f, _Args&&... __args) + noexcept(is_nothrow_invocable_v<_Fn, _Args...>) +{ + return _VSTD::__invoke(_VSTD::forward<_Fn>(__f), _VSTD::forward<_Args>(__args)...); +} + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_INVOKE_H diff --git a/libcxx/include/__functional/is_transparent.h b/libcxx/include/__functional/is_transparent.h new file mode 100644 index 0000000000000..4a72aa8e29eea --- /dev/null +++ b/libcxx/include/__functional/is_transparent.h @@ -0,0 +1,36 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_IS_TRANSPARENT +#define _LIBCPP___FUNCTIONAL_IS_TRANSPARENT + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 11 + +template +struct __is_transparent : false_type {}; + +template +struct __is_transparent<_Tp, _Up, + typename __void_t::type> + : true_type {}; + +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_IS_TRANSPARENT diff --git a/libcxx/include/__functional/mem_fn.h b/libcxx/include/__functional/mem_fn.h new file mode 100644 index 0000000000000..1fa070a42cc9c --- /dev/null +++ b/libcxx/include/__functional/mem_fn.h @@ -0,0 +1,161 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_MEM_FN_H +#define _LIBCPP___FUNCTIONAL_MEM_FN_H + +#include <__config> +#include <__functional/weak_result_type.h> +#include <__functional/binary_function.h> +#include <__functional/invoke.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class __mem_fn +#if _LIBCPP_STD_VER <= 17 || !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : public __weak_result_type<_Tp> +#endif +{ +public: + // types + typedef _Tp type; +private: + type __f_; + +public: + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + __mem_fn(type __f) _NOEXCEPT : __f_(__f) {} + +#ifndef _LIBCPP_CXX03_LANG + // invoke + template + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + typename __invoke_return::type + operator() (_ArgTypes&&... __args) const { + return _VSTD::__invoke(__f_, _VSTD::forward<_ArgTypes>(__args)...); + } +#else + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return0::type + operator() (_A0& __a0) const { + return _VSTD::__invoke(__f_, __a0); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return0::type + operator() (_A0 const& __a0) const { + return _VSTD::__invoke(__f_, __a0); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return1::type + operator() (_A0& __a0, _A1& __a1) const { + return _VSTD::__invoke(__f_, __a0, __a1); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return1::type + operator() (_A0 const& __a0, _A1& __a1) const { + return _VSTD::__invoke(__f_, __a0, __a1); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return1::type + operator() (_A0& __a0, _A1 const& __a1) const { + return _VSTD::__invoke(__f_, __a0, __a1); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return1::type + operator() (_A0 const& __a0, _A1 const& __a1) const { + return _VSTD::__invoke(__f_, __a0, __a1); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0& __a0, _A1& __a1, _A2& __a2) const { + return _VSTD::__invoke(__f_, __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0 const& __a0, _A1& __a1, _A2& __a2) const { + return _VSTD::__invoke(__f_, __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0& __a0, _A1 const& __a1, _A2& __a2) const { + return _VSTD::__invoke(__f_, __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0& __a0, _A1& __a1, _A2 const& __a2) const { + return _VSTD::__invoke(__f_, __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0 const& __a0, _A1 const& __a1, _A2& __a2) const { + return _VSTD::__invoke(__f_, __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0 const& __a0, _A1& __a1, _A2 const& __a2) const { + return _VSTD::__invoke(__f_, __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0& __a0, _A1 const& __a1, _A2 const& __a2) const { + return _VSTD::__invoke(__f_, __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0 const& __a0, _A1 const& __a1, _A2 const& __a2) const { + return _VSTD::__invoke(__f_, __a0, __a1, __a2); + } +#endif +}; + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +__mem_fn<_Rp _Tp::*> +mem_fn(_Rp _Tp::* __pm) _NOEXCEPT +{ + return __mem_fn<_Rp _Tp::*>(__pm); +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_MEM_FN_H diff --git a/libcxx/include/__functional/mem_fun_ref.h b/libcxx/include/__functional/mem_fun_ref.h new file mode 100644 index 0000000000000..4616da0b07482 --- /dev/null +++ b/libcxx/include/__functional/mem_fun_ref.h @@ -0,0 +1,173 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_MEM_FUN_REF_H +#define _LIBCPP___FUNCTIONAL_MEM_FUN_REF_H + +#include <__config> +#include <__functional/unary_function.h> +#include <__functional/binary_function.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_t + : public unary_function<_Tp*, _Sp> +{ + _Sp (_Tp::*__p_)(); +public: + _LIBCPP_INLINE_VISIBILITY explicit mem_fun_t(_Sp (_Tp::*__p)()) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY _Sp operator()(_Tp* __p) const + {return (__p->*__p_)();} +}; + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_t + : public binary_function<_Tp*, _Ap, _Sp> +{ + _Sp (_Tp::*__p_)(_Ap); +public: + _LIBCPP_INLINE_VISIBILITY explicit mem_fun1_t(_Sp (_Tp::*__p)(_Ap)) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY _Sp operator()(_Tp* __p, _Ap __x) const + {return (__p->*__p_)(__x);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +mem_fun_t<_Sp,_Tp> +mem_fun(_Sp (_Tp::*__f)()) + {return mem_fun_t<_Sp,_Tp>(__f);} + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +mem_fun1_t<_Sp,_Tp,_Ap> +mem_fun(_Sp (_Tp::*__f)(_Ap)) + {return mem_fun1_t<_Sp,_Tp,_Ap>(__f);} + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_ref_t + : public unary_function<_Tp, _Sp> +{ + _Sp (_Tp::*__p_)(); +public: + _LIBCPP_INLINE_VISIBILITY explicit mem_fun_ref_t(_Sp (_Tp::*__p)()) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY _Sp operator()(_Tp& __p) const + {return (__p.*__p_)();} +}; + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_ref_t + : public binary_function<_Tp, _Ap, _Sp> +{ + _Sp (_Tp::*__p_)(_Ap); +public: + _LIBCPP_INLINE_VISIBILITY explicit mem_fun1_ref_t(_Sp (_Tp::*__p)(_Ap)) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY _Sp operator()(_Tp& __p, _Ap __x) const + {return (__p.*__p_)(__x);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +mem_fun_ref_t<_Sp,_Tp> +mem_fun_ref(_Sp (_Tp::*__f)()) + {return mem_fun_ref_t<_Sp,_Tp>(__f);} + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +mem_fun1_ref_t<_Sp,_Tp,_Ap> +mem_fun_ref(_Sp (_Tp::*__f)(_Ap)) + {return mem_fun1_ref_t<_Sp,_Tp,_Ap>(__f);} + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_t + : public unary_function +{ + _Sp (_Tp::*__p_)() const; +public: + _LIBCPP_INLINE_VISIBILITY explicit const_mem_fun_t(_Sp (_Tp::*__p)() const) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY _Sp operator()(const _Tp* __p) const + {return (__p->*__p_)();} +}; + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_t + : public binary_function +{ + _Sp (_Tp::*__p_)(_Ap) const; +public: + _LIBCPP_INLINE_VISIBILITY explicit const_mem_fun1_t(_Sp (_Tp::*__p)(_Ap) const) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY _Sp operator()(const _Tp* __p, _Ap __x) const + {return (__p->*__p_)(__x);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +const_mem_fun_t<_Sp,_Tp> +mem_fun(_Sp (_Tp::*__f)() const) + {return const_mem_fun_t<_Sp,_Tp>(__f);} + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +const_mem_fun1_t<_Sp,_Tp,_Ap> +mem_fun(_Sp (_Tp::*__f)(_Ap) const) + {return const_mem_fun1_t<_Sp,_Tp,_Ap>(__f);} + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_ref_t + : public unary_function<_Tp, _Sp> +{ + _Sp (_Tp::*__p_)() const; +public: + _LIBCPP_INLINE_VISIBILITY explicit const_mem_fun_ref_t(_Sp (_Tp::*__p)() const) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY _Sp operator()(const _Tp& __p) const + {return (__p.*__p_)();} +}; + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_ref_t + : public binary_function<_Tp, _Ap, _Sp> +{ + _Sp (_Tp::*__p_)(_Ap) const; +public: + _LIBCPP_INLINE_VISIBILITY explicit const_mem_fun1_ref_t(_Sp (_Tp::*__p)(_Ap) const) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY _Sp operator()(const _Tp& __p, _Ap __x) const + {return (__p.*__p_)(__x);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +const_mem_fun_ref_t<_Sp,_Tp> +mem_fun_ref(_Sp (_Tp::*__f)() const) + {return const_mem_fun_ref_t<_Sp,_Tp>(__f);} + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +const_mem_fun1_ref_t<_Sp,_Tp,_Ap> +mem_fun_ref(_Sp (_Tp::*__f)(_Ap) const) + {return const_mem_fun1_ref_t<_Sp,_Tp,_Ap>(__f);} + +#endif // _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_MEM_FUN_REF_H diff --git a/libcxx/include/__functional/not_fn.h b/libcxx/include/__functional/not_fn.h new file mode 100644 index 0000000000000..632be5ff096b5 --- /dev/null +++ b/libcxx/include/__functional/not_fn.h @@ -0,0 +1,47 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_NOT_FN_H +#define _LIBCPP___FUNCTIONAL_NOT_FN_H + +#include <__config> +#include <__functional/perfect_forward.h> +#include <__functional/invoke.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +struct __not_fn_op +{ + template + static _LIBCPP_CONSTEXPR_AFTER_CXX17 auto __call(_Args&&... __args) + noexcept(noexcept(!_VSTD::invoke(_VSTD::forward<_Args>(__args)...))) + -> decltype( !_VSTD::invoke(_VSTD::forward<_Args>(__args)...)) + { return !_VSTD::invoke(_VSTD::forward<_Args>(__args)...); } +}; + +template, _Fn> && + is_move_constructible_v<_Fn>>> +_LIBCPP_CONSTEXPR_AFTER_CXX17 auto not_fn(_Fn&& __f) +{ + return __perfect_forward<__not_fn_op, _Fn>(_VSTD::forward<_Fn>(__f)); +} + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_NOT_FN_H diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h new file mode 100644 index 0000000000000..667d17988bc4e --- /dev/null +++ b/libcxx/include/__functional/operations.h @@ -0,0 +1,729 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_OPERATIONS_H +#define _LIBCPP___FUNCTIONAL_OPERATIONS_H + +#include <__config> +#include <__functional/binary_function.h> +#include <__functional/unary_function.h> +#include <__utility/forward.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// Arithmetic operations + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS plus +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x, const _Tp& __y) const + {return __x + __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS plus +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS minus +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x, const _Tp& __y) const + {return __x - __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS minus +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS multiplies +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x, const _Tp& __y) const + {return __x * __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS multiplies +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS divides +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x, const _Tp& __y) const + {return __x / __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS divides +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS modulus +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x, const _Tp& __y) const + {return __x % __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS modulus +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS negate +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : unary_function<_Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x) const + {return -__x;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS negate +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_Tp&& __x) const + _NOEXCEPT_(noexcept(- _VSTD::forward<_Tp>(__x))) + -> decltype (- _VSTD::forward<_Tp>(__x)) + { return - _VSTD::forward<_Tp>(__x); } + typedef void is_transparent; +}; +#endif + +// Bitwise operations + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS bit_and +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x, const _Tp& __y) const + {return __x & __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS bit_and +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +#if _LIBCPP_STD_VER > 11 +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +struct _LIBCPP_TEMPLATE_VIS bit_not +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : unary_function<_Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x) const + {return ~__x;} +}; + +template <> +struct _LIBCPP_TEMPLATE_VIS bit_not +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_Tp&& __x) const + _NOEXCEPT_(noexcept(~_VSTD::forward<_Tp>(__x))) + -> decltype (~_VSTD::forward<_Tp>(__x)) + { return ~_VSTD::forward<_Tp>(__x); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS bit_or +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x, const _Tp& __y) const + {return __x | __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS bit_or +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS bit_xor +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, _Tp> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef _Tp __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + _Tp operator()(const _Tp& __x, const _Tp& __y) const + {return __x ^ __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS bit_xor +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +// Comparison operations + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS equal_to +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x, const _Tp& __y) const + {return __x == __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS equal_to +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS not_equal_to +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x, const _Tp& __y) const + {return __x != __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS not_equal_to +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS less +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x, const _Tp& __y) const + {return __x < __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS less +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS less_equal +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x, const _Tp& __y) const + {return __x <= __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS less_equal +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS greater_equal +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x, const _Tp& __y) const + {return __x >= __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS greater_equal +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS greater +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x, const _Tp& __y) const + {return __x > __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS greater +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +// Logical operations + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS logical_and +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x, const _Tp& __y) const + {return __x && __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS logical_and +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS logical_not +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : unary_function<_Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x) const + {return !__x;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS logical_not +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_Tp&& __x) const + _NOEXCEPT_(noexcept(!_VSTD::forward<_Tp>(__x))) + -> decltype (!_VSTD::forward<_Tp>(__x)) + { return !_VSTD::forward<_Tp>(__x); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +#if _LIBCPP_STD_VER > 11 +template +#else +template +#endif +struct _LIBCPP_TEMPLATE_VIS logical_or +#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : binary_function<_Tp, _Tp, bool> +#endif +{ +_LIBCPP_SUPPRESS_DEPRECATED_POP + typedef bool __result_type; // used by valarray +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) + _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; +#endif + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const _Tp& __x, const _Tp& __y) const + {return __x || __y;} +}; + +#if _LIBCPP_STD_VER > 11 +template <> +struct _LIBCPP_TEMPLATE_VIS logical_or +{ + template + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + auto operator()(_T1&& __t, _T2&& __u) const + _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u))) + -> decltype (_VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u)) + { return _VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u); } + typedef void is_transparent; +}; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_OPERATIONS_H diff --git a/libcxx/include/__functional/perfect_forward.h b/libcxx/include/__functional/perfect_forward.h new file mode 100644 index 0000000000000..a5678e1593bba --- /dev/null +++ b/libcxx/include/__functional/perfect_forward.h @@ -0,0 +1,88 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_PERFECT_FORWARD_H +#define _LIBCPP___FUNCTIONAL_PERFECT_FORWARD_H + +#include <__config> +#include +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +template::value>::type> +struct __perfect_forward_impl; + +template +struct __perfect_forward_impl<_Op, __tuple_types<_Bound...>, __tuple_indices<_Idxs...>> +{ + tuple<_Bound...> __bound_; + + template + _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) & + noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...))) + -> decltype( _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...)) + {return _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...);} + + template + _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) const& + noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...))) + -> decltype( _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...)) + {return _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...);} + + template + _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) && + noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., + _VSTD::forward<_Args>(__args)...))) + -> decltype( _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., + _VSTD::forward<_Args>(__args)...)) + {return _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., + _VSTD::forward<_Args>(__args)...);} + + template + _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) const&& + noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., + _VSTD::forward<_Args>(__args)...))) + -> decltype( _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., + _VSTD::forward<_Args>(__args)...)) + {return _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., + _VSTD::forward<_Args>(__args)...);} + + template>::type, + class = _EnableIf>> + constexpr __perfect_forward_impl(__perfect_forward_impl const& __other) + : __bound_(__other.__bound_) {} + + template>::type, + class = _EnableIf>> + constexpr __perfect_forward_impl(__perfect_forward_impl && __other) + : __bound_(_VSTD::move(__other.__bound_)) {} + + template + explicit constexpr __perfect_forward_impl(_BoundArgs&&... __bound) : + __bound_(_VSTD::forward<_BoundArgs>(__bound)...) { } +}; + +template +using __perfect_forward = + __perfect_forward_impl<_Op, __tuple_types...>>; + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_PERFECT_FORWARD_H diff --git a/libcxx/include/__functional/pointer_to_binary_function.h b/libcxx/include/__functional/pointer_to_binary_function.h new file mode 100644 index 0000000000000..d4a6c1674aec5 --- /dev/null +++ b/libcxx/include/__functional/pointer_to_binary_function.h @@ -0,0 +1,46 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_POINTER_TO_BINARY_FUNCTION_H +#define _LIBCPP___FUNCTIONAL_POINTER_TO_BINARY_FUNCTION_H + +#include <__config> +#include <__functional/binary_function.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 pointer_to_binary_function + : public binary_function<_Arg1, _Arg2, _Result> +{ + _Result (*__f_)(_Arg1, _Arg2); +public: + _LIBCPP_INLINE_VISIBILITY explicit pointer_to_binary_function(_Result (*__f)(_Arg1, _Arg2)) + : __f_(__f) {} + _LIBCPP_INLINE_VISIBILITY _Result operator()(_Arg1 __x, _Arg2 __y) const + {return __f_(__x, __y);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +pointer_to_binary_function<_Arg1,_Arg2,_Result> +ptr_fun(_Result (*__f)(_Arg1,_Arg2)) + {return pointer_to_binary_function<_Arg1,_Arg2,_Result>(__f);} + +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_POINTER_TO_BINARY_FUNCTION_H diff --git a/libcxx/include/__functional/pointer_to_unary_function.h b/libcxx/include/__functional/pointer_to_unary_function.h new file mode 100644 index 0000000000000..0ac4561cc3053 --- /dev/null +++ b/libcxx/include/__functional/pointer_to_unary_function.h @@ -0,0 +1,46 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_POINTER_TO_UNARY_FUNCTION_H +#define _LIBCPP___FUNCTIONAL_POINTER_TO_UNARY_FUNCTION_H + +#include <__config> +#include <__functional/unary_function.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 pointer_to_unary_function + : public unary_function<_Arg, _Result> +{ + _Result (*__f_)(_Arg); +public: + _LIBCPP_INLINE_VISIBILITY explicit pointer_to_unary_function(_Result (*__f)(_Arg)) + : __f_(__f) {} + _LIBCPP_INLINE_VISIBILITY _Result operator()(_Arg __x) const + {return __f_(__x);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY +pointer_to_unary_function<_Arg,_Result> +ptr_fun(_Result (*__f)(_Arg)) + {return pointer_to_unary_function<_Arg,_Result>(__f);} + +#endif // _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_POINTER_TO_UNARY_FUNCTION_H diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h new file mode 100644 index 0000000000000..777c535251029 --- /dev/null +++ b/libcxx/include/__functional/ranges_operations.h @@ -0,0 +1,97 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_RANGES_OPERATIONS_H +#define _LIBCPP___FUNCTIONAL_RANGES_OPERATIONS_H + +#include <__config> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if !defined(_LIBCPP_HAS_NO_RANGES) +namespace ranges { + +struct equal_to { + template + requires equality_comparable_with<_Tp, _Up> + [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const + noexcept(noexcept(bool(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u)))) { + return _VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u); + } + + using is_transparent = void; +}; + +struct not_equal_to { + template + requires equality_comparable_with<_Tp, _Up> + [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const + noexcept(noexcept(bool(!(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u))))) { + return !(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u)); + } + + using is_transparent = void; +}; + +struct less { + template + requires totally_ordered_with<_Tp, _Up> + [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const + noexcept(noexcept(bool(_VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u)))) { + return _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u); + } + + using is_transparent = void; +}; + +struct less_equal { + template + requires totally_ordered_with<_Tp, _Up> + [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const + noexcept(noexcept(bool(!(_VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t))))) { + return !(_VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t)); + } + + using is_transparent = void; +}; + +struct greater { + template + requires totally_ordered_with<_Tp, _Up> + [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const + noexcept(noexcept(bool(_VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t)))) { + return _VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t); + } + + using is_transparent = void; +}; + +struct greater_equal { + template + requires totally_ordered_with<_Tp, _Up> + [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const + noexcept(noexcept(bool(!(_VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u))))) { + return !(_VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u)); + } + + using is_transparent = void; +}; + +} // namespace ranges +#endif // !defined(_LIBCPP_HAS_NO_RANGES) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_RANGES_OPERATIONS_H diff --git a/libcxx/include/__functional/reference_wrapper.h b/libcxx/include/__functional/reference_wrapper.h new file mode 100644 index 0000000000000..09f4a64945022 --- /dev/null +++ b/libcxx/include/__functional/reference_wrapper.h @@ -0,0 +1,223 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_REFERENCE_WRAPPER_H +#define _LIBCPP___FUNCTIONAL_REFERENCE_WRAPPER_H + +#include <__config> +#include <__functional/weak_result_type.h> +#include <__memory/addressof.h> +#include <__utility/forward.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class _LIBCPP_TEMPLATE_VIS reference_wrapper +#if _LIBCPP_STD_VER <= 17 || !defined(_LIBCPP_ABI_NO_BINDER_BASES) + : public __weak_result_type<_Tp> +#endif +{ +public: + // types + typedef _Tp type; +private: + type* __f_; + +#ifndef _LIBCPP_CXX03_LANG + static void __fun(_Tp&) _NOEXCEPT; + static void __fun(_Tp&&) = delete; +#endif + +public: + // construct/copy/destroy +#ifdef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + reference_wrapper(type& __f) _NOEXCEPT + : __f_(_VSTD::addressof(__f)) {} +#else + template ::value, decltype(__fun(declval<_Up>())) >> + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + reference_wrapper(_Up&& __u) _NOEXCEPT_(noexcept(__fun(declval<_Up>()))) { + type& __f = static_cast<_Up&&>(__u); + __f_ = _VSTD::addressof(__f); + } +#endif + + // access + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + operator type&() const _NOEXCEPT {return *__f_;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + type& get() const _NOEXCEPT {return *__f_;} + +#ifndef _LIBCPP_CXX03_LANG + // invoke + template + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + typename __invoke_of::type + operator() (_ArgTypes&&... __args) const { + return _VSTD::__invoke(get(), _VSTD::forward<_ArgTypes>(__args)...); + } +#else + + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return::type + operator() () const { + return _VSTD::__invoke(get()); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return0::type + operator() (_A0& __a0) const { + return _VSTD::__invoke(get(), __a0); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return0::type + operator() (_A0 const& __a0) const { + return _VSTD::__invoke(get(), __a0); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return1::type + operator() (_A0& __a0, _A1& __a1) const { + return _VSTD::__invoke(get(), __a0, __a1); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return1::type + operator() (_A0 const& __a0, _A1& __a1) const { + return _VSTD::__invoke(get(), __a0, __a1); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return1::type + operator() (_A0& __a0, _A1 const& __a1) const { + return _VSTD::__invoke(get(), __a0, __a1); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return1::type + operator() (_A0 const& __a0, _A1 const& __a1) const { + return _VSTD::__invoke(get(), __a0, __a1); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0& __a0, _A1& __a1, _A2& __a2) const { + return _VSTD::__invoke(get(), __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0 const& __a0, _A1& __a1, _A2& __a2) const { + return _VSTD::__invoke(get(), __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0& __a0, _A1 const& __a1, _A2& __a2) const { + return _VSTD::__invoke(get(), __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0& __a0, _A1& __a1, _A2 const& __a2) const { + return _VSTD::__invoke(get(), __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0 const& __a0, _A1 const& __a1, _A2& __a2) const { + return _VSTD::__invoke(get(), __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0 const& __a0, _A1& __a1, _A2 const& __a2) const { + return _VSTD::__invoke(get(), __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0& __a0, _A1 const& __a1, _A2 const& __a2) const { + return _VSTD::__invoke(get(), __a0, __a1, __a2); + } + + template + _LIBCPP_INLINE_VISIBILITY + typename __invoke_return2::type + operator() (_A0 const& __a0, _A1 const& __a1, _A2 const& __a2) const { + return _VSTD::__invoke(get(), __a0, __a1, __a2); + } +#endif // _LIBCPP_CXX03_LANG +}; + +#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES +template +reference_wrapper(_Tp&) -> reference_wrapper<_Tp>; +#endif + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +reference_wrapper<_Tp> +ref(_Tp& __t) _NOEXCEPT +{ + return reference_wrapper<_Tp>(__t); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +reference_wrapper<_Tp> +ref(reference_wrapper<_Tp> __t) _NOEXCEPT +{ + return _VSTD::ref(__t.get()); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +reference_wrapper +cref(const _Tp& __t) _NOEXCEPT +{ + return reference_wrapper(__t); +} + +template +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +reference_wrapper +cref(reference_wrapper<_Tp> __t) _NOEXCEPT +{ + return _VSTD::cref(__t.get()); +} + +#ifndef _LIBCPP_CXX03_LANG +template void ref(const _Tp&&) = delete; +template void cref(const _Tp&&) = delete; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_REFERENCE_WRAPPER_H diff --git a/libcxx/include/__functional/unary_negate.h b/libcxx/include/__functional/unary_negate.h new file mode 100644 index 0000000000000..71257cf40c0dd --- /dev/null +++ b/libcxx/include/__functional/unary_negate.h @@ -0,0 +1,47 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_UNARY_NEGATE_H +#define _LIBCPP___FUNCTIONAL_UNARY_NEGATE_H + +#include <__config> +#include <__functional/unary_function.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS) + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 unary_negate + : public unary_function +{ + _Predicate __pred_; +public: + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + explicit unary_negate(const _Predicate& __pred) + : __pred_(__pred) {} + _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY + bool operator()(const typename _Predicate::argument_type& __x) const + {return !__pred_(__x);} +}; + +template +_LIBCPP_DEPRECATED_IN_CXX17 inline _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY +unary_negate<_Predicate> +not1(const _Predicate& __pred) {return unary_negate<_Predicate>(__pred);} + +#endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_UNARY_NEGATE_H diff --git a/libcxx/include/__functional/unwrap_ref.h b/libcxx/include/__functional/unwrap_ref.h index 85c0c20ee7e0f..4d091ec35c5d2 100644 --- a/libcxx/include/__functional/unwrap_ref.h +++ b/libcxx/include/__functional/unwrap_ref.h @@ -36,8 +36,14 @@ struct decay; template struct unwrap_reference : __unwrap_reference<_Tp> { }; +template +using unwrap_reference_t = typename unwrap_reference<_Tp>::type; + template struct unwrap_ref_decay : unwrap_reference::type> { }; + +template +using unwrap_ref_decay_t = typename unwrap_ref_decay<_Tp>::type; #endif // > C++17 template diff --git a/libcxx/include/__functional/weak_result_type.h b/libcxx/include/__functional/weak_result_type.h new file mode 100644 index 0000000000000..2ee85acf1ef4d --- /dev/null +++ b/libcxx/include/__functional/weak_result_type.h @@ -0,0 +1,481 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL_WEAK_RESULT_TYPE_H +#define _LIBCPP___FUNCTIONAL_WEAK_RESULT_TYPE_H + +#include <__config> +#include <__functional/binary_function.h> +#include <__functional/unary_function.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct __has_result_type +{ +private: + struct __two {char __lx; char __lxx;}; + template static __two __test(...); + template static char __test(typename _Up::result_type* = 0); +public: + static const bool value = sizeof(__test<_Tp>(0)) == 1; +}; + +// __weak_result_type + +template +struct __derives_from_unary_function +{ +private: + struct __two {char __lx; char __lxx;}; + static __two __test(...); + template + static unary_function<_Ap, _Rp> + __test(const volatile unary_function<_Ap, _Rp>*); +public: + static const bool value = !is_same::value; + typedef decltype(__test((_Tp*)0)) type; +}; + +template +struct __derives_from_binary_function +{ +private: + struct __two {char __lx; char __lxx;}; + static __two __test(...); + template + static binary_function<_A1, _A2, _Rp> + __test(const volatile binary_function<_A1, _A2, _Rp>*); +public: + static const bool value = !is_same::value; + typedef decltype(__test((_Tp*)0)) type; +}; + +template ::value> +struct __maybe_derive_from_unary_function // bool is true + : public __derives_from_unary_function<_Tp>::type +{ +}; + +template +struct __maybe_derive_from_unary_function<_Tp, false> +{ +}; + +template ::value> +struct __maybe_derive_from_binary_function // bool is true + : public __derives_from_binary_function<_Tp>::type +{ +}; + +template +struct __maybe_derive_from_binary_function<_Tp, false> +{ +}; + +template ::value> +struct __weak_result_type_imp // bool is true + : public __maybe_derive_from_unary_function<_Tp>, + public __maybe_derive_from_binary_function<_Tp> +{ + typedef _LIBCPP_NODEBUG_TYPE typename _Tp::result_type result_type; +}; + +template +struct __weak_result_type_imp<_Tp, false> + : public __maybe_derive_from_unary_function<_Tp>, + public __maybe_derive_from_binary_function<_Tp> +{ +}; + +template +struct __weak_result_type + : public __weak_result_type_imp<_Tp> +{ +}; + +// 0 argument case + +template +struct __weak_result_type<_Rp ()> +{ + typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; +}; + +template +struct __weak_result_type<_Rp (&)()> +{ + typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; +}; + +template +struct __weak_result_type<_Rp (*)()> +{ + typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; +}; + +// 1 argument case + +template +struct __weak_result_type<_Rp (_A1)> + : public unary_function<_A1, _Rp> +{ +}; + +template +struct __weak_result_type<_Rp (&)(_A1)> + : public unary_function<_A1, _Rp> +{ +}; + +template +struct __weak_result_type<_Rp (*)(_A1)> + : public unary_function<_A1, _Rp> +{ +}; + +template +struct __weak_result_type<_Rp (_Cp::*)()> + : public unary_function<_Cp*, _Rp> +{ +}; + +template +struct __weak_result_type<_Rp (_Cp::*)() const> + : public unary_function +{ +}; + +template +struct __weak_result_type<_Rp (_Cp::*)() volatile> + : public unary_function +{ +}; + +template +struct __weak_result_type<_Rp (_Cp::*)() const volatile> + : public unary_function +{ +}; + +// 2 argument case + +template +struct __weak_result_type<_Rp (_A1, _A2)> + : public binary_function<_A1, _A2, _Rp> +{ +}; + +template +struct __weak_result_type<_Rp (*)(_A1, _A2)> + : public binary_function<_A1, _A2, _Rp> +{ +}; + +template +struct __weak_result_type<_Rp (&)(_A1, _A2)> + : public binary_function<_A1, _A2, _Rp> +{ +}; + +template +struct __weak_result_type<_Rp (_Cp::*)(_A1)> + : public binary_function<_Cp*, _A1, _Rp> +{ +}; + +template +struct __weak_result_type<_Rp (_Cp::*)(_A1) const> + : public binary_function +{ +}; + +template +struct __weak_result_type<_Rp (_Cp::*)(_A1) volatile> + : public binary_function +{ +}; + +template +struct __weak_result_type<_Rp (_Cp::*)(_A1) const volatile> + : public binary_function +{ +}; + + +#ifndef _LIBCPP_CXX03_LANG +// 3 or more arguments + +template +struct __weak_result_type<_Rp (_A1, _A2, _A3, _A4...)> +{ + typedef _Rp result_type; +}; + +template +struct __weak_result_type<_Rp (&)(_A1, _A2, _A3, _A4...)> +{ + typedef _Rp result_type; +}; + +template +struct __weak_result_type<_Rp (*)(_A1, _A2, _A3, _A4...)> +{ + typedef _Rp result_type; +}; + +template +struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...)> +{ + typedef _Rp result_type; +}; + +template +struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) const> +{ + typedef _Rp result_type; +}; + +template +struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) volatile> +{ + typedef _Rp result_type; +}; + +template +struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) const volatile> +{ + typedef _Rp result_type; +}; + +template +struct __invoke_return +{ + typedef decltype(_VSTD::__invoke(declval<_Tp>(), declval<_Args>()...)) type; +}; + +#else // defined(_LIBCPP_CXX03_LANG) + +template +struct __enable_invoke_imp; + +template +struct __enable_invoke_imp<_Ret, _T1, true, true> { + typedef _Ret _Bullet1; + typedef _Bullet1 type; +}; + +template +struct __enable_invoke_imp<_Ret, _T1, true, false> { + typedef _Ret _Bullet2; + typedef _Bullet2 type; +}; + +template +struct __enable_invoke_imp<_Ret, _T1, false, true> { + typedef typename add_lvalue_reference< + typename __apply_cv<_T1, _Ret>::type + >::type _Bullet3; + typedef _Bullet3 type; +}; + +template +struct __enable_invoke_imp<_Ret, _T1, false, false> { + typedef typename add_lvalue_reference< + typename __apply_cv()), _Ret>::type + >::type _Bullet4; + typedef _Bullet4 type; +}; + +template +struct __enable_invoke_imp<_Ret, _T1*, false, false> { + typedef typename add_lvalue_reference< + typename __apply_cv<_T1, _Ret>::type + >::type _Bullet4; + typedef _Bullet4 type; +}; + +template , + class _Ret = typename _Traits::_ReturnType, + class _Class = typename _Traits::_ClassType> +struct __enable_invoke : __enable_invoke_imp< + _Ret, _T1, + is_member_function_pointer<_Fn>::value, + is_base_of<_Class, typename remove_reference<_T1>::type>::value> +{ +}; + +__nat __invoke(__any, ...); + +// first bullet + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet1 +__invoke(_Fn __f, _T1& __t1) { + return (__t1.*__f)(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet1 +__invoke(_Fn __f, _T1& __t1, _A0& __a0) { + return (__t1.*__f)(__a0); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet1 +__invoke(_Fn __f, _T1& __t1, _A0& __a0, _A1& __a1) { + return (__t1.*__f)(__a0, __a1); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet1 +__invoke(_Fn __f, _T1& __t1, _A0& __a0, _A1& __a1, _A2& __a2) { + return (__t1.*__f)(__a0, __a1, __a2); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet2 +__invoke(_Fn __f, _T1& __t1) { + return ((*__t1).*__f)(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet2 +__invoke(_Fn __f, _T1& __t1, _A0& __a0) { + return ((*__t1).*__f)(__a0); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet2 +__invoke(_Fn __f, _T1& __t1, _A0& __a0, _A1& __a1) { + return ((*__t1).*__f)(__a0, __a1); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet2 +__invoke(_Fn __f, _T1& __t1, _A0& __a0, _A1& __a1, _A2& __a2) { + return ((*__t1).*__f)(__a0, __a1, __a2); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet3 +__invoke(_Fn __f, _T1& __t1) { + return __t1.*__f; +} + +template +inline _LIBCPP_INLINE_VISIBILITY +typename __enable_invoke<_Fn, _T1>::_Bullet4 +__invoke(_Fn __f, _T1& __t1) { + return (*__t1).*__f; +} + +// fifth bullet + +template +inline _LIBCPP_INLINE_VISIBILITY +decltype(declval<_Fp&>()()) +__invoke(_Fp& __f) +{ + return __f(); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +decltype(declval<_Fp&>()(declval<_A0&>())) +__invoke(_Fp& __f, _A0& __a0) +{ + return __f(__a0); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +decltype(declval<_Fp&>()(declval<_A0&>(), declval<_A1&>())) +__invoke(_Fp& __f, _A0& __a0, _A1& __a1) +{ + return __f(__a0, __a1); +} + +template +inline _LIBCPP_INLINE_VISIBILITY +decltype(declval<_Fp&>()(declval<_A0&>(), declval<_A1&>(), declval<_A2&>())) +__invoke(_Fp& __f, _A0& __a0, _A1& __a1, _A2& __a2) +{ + return __f(__a0, __a1, __a2); +} + +template >::value> +struct __invoke_return +{ + typedef typename __weak_result_type<_Fp>::result_type type; +}; + +template +struct __invoke_return<_Fp, false> +{ + typedef decltype(_VSTD::__invoke(declval<_Fp&>())) type; +}; + +template +struct __invoke_return0 +{ + typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>())) type; +}; + +template +struct __invoke_return0<_Rp _Tp::*, _A0> +{ + typedef typename __enable_invoke<_Rp _Tp::*, _A0>::type type; +}; + +template +struct __invoke_return1 +{ + typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>(), + declval<_A1&>())) type; +}; + +template +struct __invoke_return1<_Rp _Class::*, _A0, _A1> { + typedef typename __enable_invoke<_Rp _Class::*, _A0>::type type; +}; + +template +struct __invoke_return2 +{ + typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>(), + declval<_A1&>(), + declval<_A2&>())) type; +}; + +template +struct __invoke_return2<_Ret _Class::*, _A0, _A1, _A2> { + typedef typename __enable_invoke<_Ret _Class::*, _A0>::type type; +}; + +#endif // !defined(_LIBCPP_CXX03_LANG) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL_WEAK_RESULT_TYPE_H diff --git a/libcxx/include/__functional_base b/libcxx/include/__functional_base index 479dfcf841b24..ccc3f3a58ca5c 100644 --- a/libcxx/include/__functional_base +++ b/libcxx/include/__functional_base @@ -11,7 +11,14 @@ #define _LIBCPP_FUNCTIONAL_BASE #include <__config> +#include <__functional/binary_function.h> +#include <__functional/invoke.h> +#include <__functional/operations.h> +#include <__functional/reference_wrapper.h> #include <__functional/unary_function.h> +#include <__functional/weak_result_type.h> +#include <__memory/allocator_arg_t.h> +#include <__memory/uses_allocator.h> #include #include #include @@ -22,675 +29,4 @@ #pragma GCC system_header #endif -_LIBCPP_BEGIN_NAMESPACE_STD - -template -struct _LIBCPP_TEMPLATE_VIS binary_function -{ - typedef _Arg1 first_argument_type; - typedef _Arg2 second_argument_type; - typedef _Result result_type; -}; - -template -struct __has_result_type -{ -private: - struct __two {char __lx; char __lxx;}; - template static __two __test(...); - template static char __test(typename _Up::result_type* = 0); -public: - static const bool value = sizeof(__test<_Tp>(0)) == 1; -}; - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS less -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x, const _Tp& __y) const - {return __x < __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS less -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - -// __weak_result_type - -template -struct __derives_from_unary_function -{ -private: - struct __two {char __lx; char __lxx;}; - static __two __test(...); - template - static unary_function<_Ap, _Rp> - __test(const volatile unary_function<_Ap, _Rp>*); -public: - static const bool value = !is_same::value; - typedef decltype(__test((_Tp*)0)) type; -}; - -template -struct __derives_from_binary_function -{ -private: - struct __two {char __lx; char __lxx;}; - static __two __test(...); - template - static binary_function<_A1, _A2, _Rp> - __test(const volatile binary_function<_A1, _A2, _Rp>*); -public: - static const bool value = !is_same::value; - typedef decltype(__test((_Tp*)0)) type; -}; - -template ::value> -struct __maybe_derive_from_unary_function // bool is true - : public __derives_from_unary_function<_Tp>::type -{ -}; - -template -struct __maybe_derive_from_unary_function<_Tp, false> -{ -}; - -template ::value> -struct __maybe_derive_from_binary_function // bool is true - : public __derives_from_binary_function<_Tp>::type -{ -}; - -template -struct __maybe_derive_from_binary_function<_Tp, false> -{ -}; - -template ::value> -struct __weak_result_type_imp // bool is true - : public __maybe_derive_from_unary_function<_Tp>, - public __maybe_derive_from_binary_function<_Tp> -{ - typedef _LIBCPP_NODEBUG_TYPE typename _Tp::result_type result_type; -}; - -template -struct __weak_result_type_imp<_Tp, false> - : public __maybe_derive_from_unary_function<_Tp>, - public __maybe_derive_from_binary_function<_Tp> -{ -}; - -template -struct __weak_result_type - : public __weak_result_type_imp<_Tp> -{ -}; - -// 0 argument case - -template -struct __weak_result_type<_Rp ()> -{ - typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; -}; - -template -struct __weak_result_type<_Rp (&)()> -{ - typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; -}; - -template -struct __weak_result_type<_Rp (*)()> -{ - typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; -}; - -// 1 argument case - -template -struct __weak_result_type<_Rp (_A1)> - : public unary_function<_A1, _Rp> -{ -}; - -template -struct __weak_result_type<_Rp (&)(_A1)> - : public unary_function<_A1, _Rp> -{ -}; - -template -struct __weak_result_type<_Rp (*)(_A1)> - : public unary_function<_A1, _Rp> -{ -}; - -template -struct __weak_result_type<_Rp (_Cp::*)()> - : public unary_function<_Cp*, _Rp> -{ -}; - -template -struct __weak_result_type<_Rp (_Cp::*)() const> - : public unary_function -{ -}; - -template -struct __weak_result_type<_Rp (_Cp::*)() volatile> - : public unary_function -{ -}; - -template -struct __weak_result_type<_Rp (_Cp::*)() const volatile> - : public unary_function -{ -}; - -// 2 argument case - -template -struct __weak_result_type<_Rp (_A1, _A2)> - : public binary_function<_A1, _A2, _Rp> -{ -}; - -template -struct __weak_result_type<_Rp (*)(_A1, _A2)> - : public binary_function<_A1, _A2, _Rp> -{ -}; - -template -struct __weak_result_type<_Rp (&)(_A1, _A2)> - : public binary_function<_A1, _A2, _Rp> -{ -}; - -template -struct __weak_result_type<_Rp (_Cp::*)(_A1)> - : public binary_function<_Cp*, _A1, _Rp> -{ -}; - -template -struct __weak_result_type<_Rp (_Cp::*)(_A1) const> - : public binary_function -{ -}; - -template -struct __weak_result_type<_Rp (_Cp::*)(_A1) volatile> - : public binary_function -{ -}; - -template -struct __weak_result_type<_Rp (_Cp::*)(_A1) const volatile> - : public binary_function -{ -}; - - -#ifndef _LIBCPP_CXX03_LANG -// 3 or more arguments - -template -struct __weak_result_type<_Rp (_A1, _A2, _A3, _A4...)> -{ - typedef _Rp result_type; -}; - -template -struct __weak_result_type<_Rp (&)(_A1, _A2, _A3, _A4...)> -{ - typedef _Rp result_type; -}; - -template -struct __weak_result_type<_Rp (*)(_A1, _A2, _A3, _A4...)> -{ - typedef _Rp result_type; -}; - -template -struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...)> -{ - typedef _Rp result_type; -}; - -template -struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) const> -{ - typedef _Rp result_type; -}; - -template -struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) volatile> -{ - typedef _Rp result_type; -}; - -template -struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) const volatile> -{ - typedef _Rp result_type; -}; - -template -struct __invoke_return -{ - typedef decltype(_VSTD::__invoke(declval<_Tp>(), declval<_Args>()...)) type; -}; - -#else // defined(_LIBCPP_CXX03_LANG) - -#include <__functional_base_03> - -#endif // !defined(_LIBCPP_CXX03_LANG) - - -template ::value> -struct __invoke_void_return_wrapper -{ -#ifndef _LIBCPP_CXX03_LANG - template - static _Ret __call(_Args&&... __args) { - return _VSTD::__invoke(_VSTD::forward<_Args>(__args)...); - } -#else - template - static _Ret __call(_Fn __f) { - return _VSTD::__invoke(__f); - } - - template - static _Ret __call(_Fn __f, _A0& __a0) { - return _VSTD::__invoke(__f, __a0); - } - - template - static _Ret __call(_Fn __f, _A0& __a0, _A1& __a1) { - return _VSTD::__invoke(__f, __a0, __a1); - } - - template - static _Ret __call(_Fn __f, _A0& __a0, _A1& __a1, _A2& __a2){ - return _VSTD::__invoke(__f, __a0, __a1, __a2); - } -#endif -}; - -template -struct __invoke_void_return_wrapper<_Ret, true> -{ -#ifndef _LIBCPP_CXX03_LANG - template - static void __call(_Args&&... __args) { - _VSTD::__invoke(_VSTD::forward<_Args>(__args)...); - } -#else - template - static void __call(_Fn __f) { - _VSTD::__invoke(__f); - } - - template - static void __call(_Fn __f, _A0& __a0) { - _VSTD::__invoke(__f, __a0); - } - - template - static void __call(_Fn __f, _A0& __a0, _A1& __a1) { - _VSTD::__invoke(__f, __a0, __a1); - } - - template - static void __call(_Fn __f, _A0& __a0, _A1& __a1, _A2& __a2) { - _VSTD::__invoke(__f, __a0, __a1, __a2); - } -#endif -}; - -template -class _LIBCPP_TEMPLATE_VIS reference_wrapper -#if _LIBCPP_STD_VER <= 17 || !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : public __weak_result_type<_Tp> -#endif -{ -public: - // types - typedef _Tp type; -private: - type* __f_; - -#ifndef _LIBCPP_CXX03_LANG - static void __fun(_Tp&) _NOEXCEPT; - static void __fun(_Tp&&) = delete; -#endif - -public: - // construct/copy/destroy -#ifdef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - reference_wrapper(type& __f) _NOEXCEPT - : __f_(_VSTD::addressof(__f)) {} -#else - template ::value, decltype(__fun(declval<_Up>())) >> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - reference_wrapper(_Up&& __u) _NOEXCEPT_(noexcept(__fun(declval<_Up>()))) { - type& __f = static_cast<_Up&&>(__u); - __f_ = _VSTD::addressof(__f); - } -#endif - - // access - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - operator type&() const _NOEXCEPT {return *__f_;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - type& get() const _NOEXCEPT {return *__f_;} - -#ifndef _LIBCPP_CXX03_LANG - // invoke - template - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - typename __invoke_of::type - operator() (_ArgTypes&&... __args) const { - return _VSTD::__invoke(get(), _VSTD::forward<_ArgTypes>(__args)...); - } -#else - - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return::type - operator() () const { - return _VSTD::__invoke(get()); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return0::type - operator() (_A0& __a0) const { - return _VSTD::__invoke(get(), __a0); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return0::type - operator() (_A0 const& __a0) const { - return _VSTD::__invoke(get(), __a0); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return1::type - operator() (_A0& __a0, _A1& __a1) const { - return _VSTD::__invoke(get(), __a0, __a1); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return1::type - operator() (_A0 const& __a0, _A1& __a1) const { - return _VSTD::__invoke(get(), __a0, __a1); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return1::type - operator() (_A0& __a0, _A1 const& __a1) const { - return _VSTD::__invoke(get(), __a0, __a1); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return1::type - operator() (_A0 const& __a0, _A1 const& __a1) const { - return _VSTD::__invoke(get(), __a0, __a1); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0& __a0, _A1& __a1, _A2& __a2) const { - return _VSTD::__invoke(get(), __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0 const& __a0, _A1& __a1, _A2& __a2) const { - return _VSTD::__invoke(get(), __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0& __a0, _A1 const& __a1, _A2& __a2) const { - return _VSTD::__invoke(get(), __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0& __a0, _A1& __a1, _A2 const& __a2) const { - return _VSTD::__invoke(get(), __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0 const& __a0, _A1 const& __a1, _A2& __a2) const { - return _VSTD::__invoke(get(), __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0 const& __a0, _A1& __a1, _A2 const& __a2) const { - return _VSTD::__invoke(get(), __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0& __a0, _A1 const& __a1, _A2 const& __a2) const { - return _VSTD::__invoke(get(), __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0 const& __a0, _A1 const& __a1, _A2 const& __a2) const { - return _VSTD::__invoke(get(), __a0, __a1, __a2); - } -#endif // _LIBCPP_CXX03_LANG -}; - -#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES -template -reference_wrapper(_Tp&) -> reference_wrapper<_Tp>; -#endif - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -reference_wrapper<_Tp> -ref(_Tp& __t) _NOEXCEPT -{ - return reference_wrapper<_Tp>(__t); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -reference_wrapper<_Tp> -ref(reference_wrapper<_Tp> __t) _NOEXCEPT -{ - return _VSTD::ref(__t.get()); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -reference_wrapper -cref(const _Tp& __t) _NOEXCEPT -{ - return reference_wrapper(__t); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -reference_wrapper -cref(reference_wrapper<_Tp> __t) _NOEXCEPT -{ - return _VSTD::cref(__t.get()); -} - -#ifndef _LIBCPP_CXX03_LANG -template void ref(const _Tp&&) = delete; -template void cref(const _Tp&&) = delete; -#endif - -#if _LIBCPP_STD_VER > 11 -template -struct __is_transparent : false_type {}; - -template -struct __is_transparent<_Tp, _Up, - typename __void_t::type> - : true_type {}; -#endif - -// allocator_arg_t - -struct _LIBCPP_TEMPLATE_VIS allocator_arg_t { explicit allocator_arg_t() = default; }; - -#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) -extern _LIBCPP_EXPORTED_FROM_ABI const allocator_arg_t allocator_arg; -#else -/* _LIBCPP_INLINE_VAR */ constexpr allocator_arg_t allocator_arg = allocator_arg_t(); -#endif - -// uses_allocator - -template -struct __has_allocator_type -{ -private: - struct __two {char __lx; char __lxx;}; - template static __two __test(...); - template static char __test(typename _Up::allocator_type* = 0); -public: - static const bool value = sizeof(__test<_Tp>(0)) == 1; -}; - -template ::value> -struct __uses_allocator - : public integral_constant::value> -{ -}; - -template -struct __uses_allocator<_Tp, _Alloc, false> - : public false_type -{ -}; - -template -struct _LIBCPP_TEMPLATE_VIS uses_allocator - : public __uses_allocator<_Tp, _Alloc> -{ -}; - -#if _LIBCPP_STD_VER > 14 -template -_LIBCPP_INLINE_VAR constexpr size_t uses_allocator_v = uses_allocator<_Tp, _Alloc>::value; -#endif - -#ifndef _LIBCPP_CXX03_LANG - -// allocator construction - -template -struct __uses_alloc_ctor_imp -{ - typedef _LIBCPP_NODEBUG_TYPE typename __uncvref<_Alloc>::type _RawAlloc; - static const bool __ua = uses_allocator<_Tp, _RawAlloc>::value; - static const bool __ic = - is_constructible<_Tp, allocator_arg_t, _Alloc, _Args...>::value; - static const int value = __ua ? 2 - __ic : 0; -}; - -template -struct __uses_alloc_ctor - : integral_constant::value> - {}; - -template -inline _LIBCPP_INLINE_VISIBILITY -void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &, _Args &&... __args ) -{ - new (__storage) _Tp (_VSTD::forward<_Args>(__args)...); -} - -// FIXME: This should have a version which takes a non-const alloc. -template -inline _LIBCPP_INLINE_VISIBILITY -void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &__a, _Args &&... __args ) -{ - new (__storage) _Tp (allocator_arg, __a, _VSTD::forward<_Args>(__args)...); -} - -// FIXME: This should have a version which takes a non-const alloc. -template -inline _LIBCPP_INLINE_VISIBILITY -void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &__a, _Args &&... __args ) -{ - new (__storage) _Tp (_VSTD::forward<_Args>(__args)..., __a); -} - -#endif // _LIBCPP_CXX03_LANG - -#if _LIBCPP_STD_VER > 14 - -template -_LIBCPP_CONSTEXPR_AFTER_CXX17 invoke_result_t<_Fn, _Args...> -invoke(_Fn&& __f, _Args&&... __args) - noexcept(is_nothrow_invocable_v<_Fn, _Args...>) -{ - return _VSTD::__invoke(_VSTD::forward<_Fn>(__f), _VSTD::forward<_Args>(__args)...); -} - -#endif // _LIBCPP_STD_VER > 14 - -_LIBCPP_END_NAMESPACE_STD - #endif // _LIBCPP_FUNCTIONAL_BASE diff --git a/libcxx/include/__functional_base_03 b/libcxx/include/__functional_base_03 deleted file mode 100644 index 21b39a4bf8f02..0000000000000 --- a/libcxx/include/__functional_base_03 +++ /dev/null @@ -1,223 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP_FUNCTIONAL_BASE_03 -#define _LIBCPP_FUNCTIONAL_BASE_03 - -// manual variadic expansion for - -// __invoke - -template -struct __enable_invoke_imp; - -template -struct __enable_invoke_imp<_Ret, _T1, true, true> { - typedef _Ret _Bullet1; - typedef _Bullet1 type; -}; - -template -struct __enable_invoke_imp<_Ret, _T1, true, false> { - typedef _Ret _Bullet2; - typedef _Bullet2 type; -}; - -template -struct __enable_invoke_imp<_Ret, _T1, false, true> { - typedef typename add_lvalue_reference< - typename __apply_cv<_T1, _Ret>::type - >::type _Bullet3; - typedef _Bullet3 type; -}; - -template -struct __enable_invoke_imp<_Ret, _T1, false, false> { - typedef typename add_lvalue_reference< - typename __apply_cv()), _Ret>::type - >::type _Bullet4; - typedef _Bullet4 type; -}; - -template -struct __enable_invoke_imp<_Ret, _T1*, false, false> { - typedef typename add_lvalue_reference< - typename __apply_cv<_T1, _Ret>::type - >::type _Bullet4; - typedef _Bullet4 type; -}; - -template , - class _Ret = typename _Traits::_ReturnType, - class _Class = typename _Traits::_ClassType> -struct __enable_invoke : __enable_invoke_imp< - _Ret, _T1, - is_member_function_pointer<_Fn>::value, - is_base_of<_Class, typename remove_reference<_T1>::type>::value> -{ -}; - -__nat __invoke(__any, ...); - -// first bullet - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet1 -__invoke(_Fn __f, _T1& __t1) { - return (__t1.*__f)(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet1 -__invoke(_Fn __f, _T1& __t1, _A0& __a0) { - return (__t1.*__f)(__a0); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet1 -__invoke(_Fn __f, _T1& __t1, _A0& __a0, _A1& __a1) { - return (__t1.*__f)(__a0, __a1); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet1 -__invoke(_Fn __f, _T1& __t1, _A0& __a0, _A1& __a1, _A2& __a2) { - return (__t1.*__f)(__a0, __a1, __a2); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet2 -__invoke(_Fn __f, _T1& __t1) { - return ((*__t1).*__f)(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet2 -__invoke(_Fn __f, _T1& __t1, _A0& __a0) { - return ((*__t1).*__f)(__a0); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet2 -__invoke(_Fn __f, _T1& __t1, _A0& __a0, _A1& __a1) { - return ((*__t1).*__f)(__a0, __a1); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet2 -__invoke(_Fn __f, _T1& __t1, _A0& __a0, _A1& __a1, _A2& __a2) { - return ((*__t1).*__f)(__a0, __a1, __a2); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet3 -__invoke(_Fn __f, _T1& __t1) { - return __t1.*__f; -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __enable_invoke<_Fn, _T1>::_Bullet4 -__invoke(_Fn __f, _T1& __t1) { - return (*__t1).*__f; -} - -// fifth bullet - -template -inline _LIBCPP_INLINE_VISIBILITY -decltype(declval<_Fp&>()()) -__invoke(_Fp& __f) -{ - return __f(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -decltype(declval<_Fp&>()(declval<_A0&>())) -__invoke(_Fp& __f, _A0& __a0) -{ - return __f(__a0); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -decltype(declval<_Fp&>()(declval<_A0&>(), declval<_A1&>())) -__invoke(_Fp& __f, _A0& __a0, _A1& __a1) -{ - return __f(__a0, __a1); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -decltype(declval<_Fp&>()(declval<_A0&>(), declval<_A1&>(), declval<_A2&>())) -__invoke(_Fp& __f, _A0& __a0, _A1& __a1, _A2& __a2) -{ - return __f(__a0, __a1, __a2); -} - -template >::value> -struct __invoke_return -{ - typedef typename __weak_result_type<_Fp>::result_type type; -}; - -template -struct __invoke_return<_Fp, false> -{ - typedef decltype(_VSTD::__invoke(declval<_Fp&>())) type; -}; - -template -struct __invoke_return0 -{ - typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>())) type; -}; - -template -struct __invoke_return0<_Rp _Tp::*, _A0> -{ - typedef typename __enable_invoke<_Rp _Tp::*, _A0>::type type; -}; - -template -struct __invoke_return1 -{ - typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>(), - declval<_A1&>())) type; -}; - -template -struct __invoke_return1<_Rp _Class::*, _A0, _A1> { - typedef typename __enable_invoke<_Rp _Class::*, _A0>::type type; -}; - -template -struct __invoke_return2 -{ - typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>(), - declval<_A1&>(), - declval<_A2&>())) type; -}; - -template -struct __invoke_return2<_Ret _Class::*, _A0, _A1, _A2> { - typedef typename __enable_invoke<_Ret _Class::*, _A0>::type type; -}; -#endif // _LIBCPP_FUNCTIONAL_BASE_03 diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h index fdac109f3a86c..4971bebfed864 100644 --- a/libcxx/include/__iterator/advance.h +++ b/libcxx/include/__iterator/advance.h @@ -16,6 +16,7 @@ #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> +#include #include #include #include diff --git a/libcxx/include/__memory/allocator_arg_t.h b/libcxx/include/__memory/allocator_arg_t.h new file mode 100644 index 0000000000000..830c6b8148eb8 --- /dev/null +++ b/libcxx/include/__memory/allocator_arg_t.h @@ -0,0 +1,78 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FUNCTIONAL___ALLOCATOR_ARG_T_H +#define _LIBCPP___FUNCTIONAL___ALLOCATOR_ARG_T_H + +#include <__config> +#include <__memory/uses_allocator.h> +#include <__utility/forward.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +struct _LIBCPP_TEMPLATE_VIS allocator_arg_t { explicit allocator_arg_t() = default; }; + +#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) +extern _LIBCPP_EXPORTED_FROM_ABI const allocator_arg_t allocator_arg; +#else +/* _LIBCPP_INLINE_VAR */ constexpr allocator_arg_t allocator_arg = allocator_arg_t(); +#endif + +#ifndef _LIBCPP_CXX03_LANG + +// allocator construction + +template +struct __uses_alloc_ctor_imp +{ + typedef _LIBCPP_NODEBUG_TYPE typename __uncvref<_Alloc>::type _RawAlloc; + static const bool __ua = uses_allocator<_Tp, _RawAlloc>::value; + static const bool __ic = + is_constructible<_Tp, allocator_arg_t, _Alloc, _Args...>::value; + static const int value = __ua ? 2 - __ic : 0; +}; + +template +struct __uses_alloc_ctor + : integral_constant::value> + {}; + +template +inline _LIBCPP_INLINE_VISIBILITY +void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &, _Args &&... __args ) +{ + new (__storage) _Tp (_VSTD::forward<_Args>(__args)...); +} + +// FIXME: This should have a version which takes a non-const alloc. +template +inline _LIBCPP_INLINE_VISIBILITY +void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &__a, _Args &&... __args ) +{ + new (__storage) _Tp (allocator_arg, __a, _VSTD::forward<_Args>(__args)...); +} + +// FIXME: This should have a version which takes a non-const alloc. +template +inline _LIBCPP_INLINE_VISIBILITY +void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &__a, _Args &&... __args ) +{ + new (__storage) _Tp (_VSTD::forward<_Args>(__args)..., __a); +} + +#endif // _LIBCPP_CXX03_LANG + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FUNCTIONAL___ALLOCATOR_ARG_T_H diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 64f5b06ac26b2..04161c4b73ed3 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -12,11 +12,14 @@ #include <__availability> #include <__config> -#include <__functional_base> // std::less, std::binary_function +#include <__functional_base> +#include <__functional/binary_function.h> +#include <__functional/operations.h> +#include <__functional/reference_wrapper.h> #include <__memory/addressof.h> #include <__memory/allocation_guard.h> -#include <__memory/allocator.h> #include <__memory/allocator_traits.h> +#include <__memory/allocator.h> #include <__memory/compressed_pair.h> #include <__memory/pointer_traits.h> #include <__memory/unique_ptr.h> @@ -25,6 +28,7 @@ #include // abort #include #include +#include #include #include #if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h index d730fc9ef4e5f..083e0a8c250d2 100644 --- a/libcxx/include/__memory/unique_ptr.h +++ b/libcxx/include/__memory/unique_ptr.h @@ -11,8 +11,9 @@ #define _LIBCPP___MEMORY_UNIQUE_PTR_H #include <__config> -#include <__functional_base> // std::less +#include <__functional_base> #include <__functional/hash.h> +#include <__functional/operations.h> #include <__memory/allocator_traits.h> // __pointer #include <__memory/compressed_pair.h> #include <__utility/forward.h> diff --git a/libcxx/include/__memory/uses_allocator.h b/libcxx/include/__memory/uses_allocator.h new file mode 100644 index 0000000000000..36e7520575458 --- /dev/null +++ b/libcxx/include/__memory/uses_allocator.h @@ -0,0 +1,60 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MEMORY_USES_ALLOCATOR_H +#define _LIBCPP___MEMORY_USES_ALLOCATOR_H + +#include <__config> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct __has_allocator_type +{ +private: + struct __two {char __lx; char __lxx;}; + template static __two __test(...); + template static char __test(typename _Up::allocator_type* = 0); +public: + static const bool value = sizeof(__test<_Tp>(0)) == 1; +}; + +template ::value> +struct __uses_allocator + : public integral_constant::value> +{ +}; + +template +struct __uses_allocator<_Tp, _Alloc, false> + : public false_type +{ +}; + +template +struct _LIBCPP_TEMPLATE_VIS uses_allocator + : public __uses_allocator<_Tp, _Alloc> +{ +}; + +#if _LIBCPP_STD_VER > 14 +template +_LIBCPP_INLINE_VAR constexpr size_t uses_allocator_v = uses_allocator<_Tp, _Alloc>::value; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___MEMORY_USES_ALLOCATOR_H diff --git a/libcxx/include/__ranges/ref_view.h b/libcxx/include/__ranges/ref_view.h index 5ca4ca16065c9..1df7939aa7c6f 100644 --- a/libcxx/include/__ranges/ref_view.h +++ b/libcxx/include/__ranges/ref_view.h @@ -13,6 +13,7 @@ #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> +#include <__memory/addressof.h> #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/data.h> diff --git a/libcxx/include/__string b/libcxx/include/__string index a00c73623c4f3..b77a7fb4f8d36 100644 --- a/libcxx/include/__string +++ b/libcxx/include/__string @@ -11,11 +11,20 @@ #define _LIBCPP___STRING #include <__config> -#include // for search and min -#include // for EOF -#include // for memcpy -#include // for wmemcpy -#include // for __murmur2_or_cityhash +#include <__algorithm/copy.h> +#include <__algorithm/copy_backward.h> +#include <__algorithm/copy_n.h> +#include <__algorithm/fill_n.h> +#include <__algorithm/find_first_of.h> +#include <__algorithm/find_end.h> +#include <__algorithm/min.h> +#include <__functional/hash.h> // for __murmur2_or_cityhash +#include <__iterator/iterator_traits.h> +#include // for EOF +#include // for uint_least16_t +#include // for memcpy +#include // for wmemcpy +#include // for __libcpp_is_constant_evaluated #include <__debug> diff --git a/libcxx/include/concepts b/libcxx/include/concepts index 0b51f53dcc393..3dec9b5279019 100644 --- a/libcxx/include/concepts +++ b/libcxx/include/concepts @@ -130,6 +130,7 @@ namespace std { */ #include <__config> +#include <__functional/invoke.h> #include <__functional_base> #include #include diff --git a/libcxx/include/experimental/__memory b/libcxx/include/experimental/__memory index 4cf8978468cee..b38b664b339ae 100644 --- a/libcxx/include/experimental/__memory +++ b/libcxx/include/experimental/__memory @@ -10,6 +10,8 @@ #ifndef _LIBCPP_EXPERIMENTAL___MEMORY #define _LIBCPP_EXPERIMENTAL___MEMORY +#include <__memory/allocator_arg_t.h> +#include <__memory/uses_allocator.h> #include #include // for erased_type #include <__functional_base> @@ -73,12 +75,35 @@ struct __lfts_uses_alloc_ctor > {}; +template +inline _LIBCPP_INLINE_VISIBILITY +void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &, _Args &&... __args ) +{ + new (__storage) _Tp (_VSTD::forward<_Args>(__args)...); +} + +// FIXME: This should have a version which takes a non-const alloc. +template +inline _LIBCPP_INLINE_VISIBILITY +void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &__a, _Args &&... __args ) +{ + new (__storage) _Tp (allocator_arg, __a, _VSTD::forward<_Args>(__args)...); +} + +// FIXME: This should have a version which takes a non-const alloc. +template +inline _LIBCPP_INLINE_VISIBILITY +void __user_alloc_construct_impl (integral_constant, _Tp *__storage, const _Allocator &__a, _Args &&... __args ) +{ + new (__storage) _Tp (_VSTD::forward<_Args>(__args)..., __a); +} + template inline _LIBCPP_INLINE_VISIBILITY void __lfts_user_alloc_construct( _Tp * __store, const _Alloc & __a, _Args &&... __args) { - _VSTD::__user_alloc_construct_impl( + ::std::experimental::fundamentals_v1::__user_alloc_construct_impl( typename __lfts_uses_alloc_ctor<_Tp, _Alloc, _Args...>::type() , __store, __a, _VSTD::forward<_Args>(__args)... ); diff --git a/libcxx/include/experimental/functional b/libcxx/include/experimental/functional index e18962002d4ae..e3220e16caeb8 100644 --- a/libcxx/include/experimental/functional +++ b/libcxx/include/experimental/functional @@ -86,6 +86,7 @@ inline namespace fundamentals_v1 { */ +#include <__memory/uses_allocator.h> #include #include #include diff --git a/libcxx/include/ext/__hash b/libcxx/include/ext/__hash index 86fd7ef2cfc48..fbeddf03a404b 100644 --- a/libcxx/include/ext/__hash +++ b/libcxx/include/ext/__hash @@ -12,6 +12,7 @@ #pragma GCC system_header +#include <__string> #include #include diff --git a/libcxx/include/functional b/libcxx/include/functional index 976b94585b0b8..ecbc5667af18f 100644 --- a/libcxx/include/functional +++ b/libcxx/include/functional @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------------ functional ----------------------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -490,9 +490,27 @@ POLICY: For non-variadic implementations, the number of arguments is limited #include <__algorithm/search.h> #include <__config> #include <__debug> -#include <__functional_base> +#include <__functional/binary_function.h> // TODO: deprecate +#include <__functional/binary_negate.h> +#include <__functional/bind_front.h> +#include <__functional/bind.h> +#include <__functional/binder1st.h> +#include <__functional/binder2nd.h> +#include <__functional/default_searcher.h> +#include <__functional/function.h> #include <__functional/hash.h> -#include <__functional/unary_function.h> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/mem_fn.h> // TODO: deprecate +#include <__functional/mem_fun_ref.h> +#include <__functional/not_fn.h> +#include <__functional/operations.h> +#include <__functional/pointer_to_binary_function.h> +#include <__functional/pointer_to_unary_function.h> +#include <__functional/ranges_operations.h> +#include <__functional/reference_wrapper.h> +#include <__functional/unary_function.h> // TODO: deprecate +#include <__functional/unary_negate.h> #include <__functional/unwrap_ref.h> #include <__utility/forward.h> #include @@ -508,2902 +526,4 @@ POLICY: For non-variadic implementations, the number of arguments is limited #pragma GCC system_header #endif -_LIBCPP_BEGIN_NAMESPACE_STD - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS plus -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x, const _Tp& __y) const - {return __x + __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS plus -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS minus -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x, const _Tp& __y) const - {return __x - __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS minus -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS multiplies -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x, const _Tp& __y) const - {return __x * __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS multiplies -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS divides -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x, const _Tp& __y) const - {return __x / __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS divides -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS modulus -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x, const _Tp& __y) const - {return __x % __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS modulus -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS negate -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : unary_function<_Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x) const - {return -__x;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS negate -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_Tp&& __x) const - _NOEXCEPT_(noexcept(- _VSTD::forward<_Tp>(__x))) - -> decltype (- _VSTD::forward<_Tp>(__x)) - { return - _VSTD::forward<_Tp>(__x); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS equal_to -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x, const _Tp& __y) const - {return __x == __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS equal_to -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS not_equal_to -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x, const _Tp& __y) const - {return __x != __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS not_equal_to -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS greater -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x, const _Tp& __y) const - {return __x > __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS greater -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -// less in <__functional_base> - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS greater_equal -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x, const _Tp& __y) const - {return __x >= __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS greater_equal -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS less_equal -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x, const _Tp& __y) const - {return __x <= __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS less_equal -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS logical_and -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x, const _Tp& __y) const - {return __x && __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS logical_and -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS logical_or -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x, const _Tp& __y) const - {return __x || __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS logical_or -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS logical_not -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : unary_function<_Tp, bool> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef bool __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef bool result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const _Tp& __x) const - {return !__x;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS logical_not -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_Tp&& __x) const - _NOEXCEPT_(noexcept(!_VSTD::forward<_Tp>(__x))) - -> decltype (!_VSTD::forward<_Tp>(__x)) - { return !_VSTD::forward<_Tp>(__x); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS bit_and -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x, const _Tp& __y) const - {return __x & __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS bit_and -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS bit_or -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x, const _Tp& __y) const - {return __x | __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS bit_or -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -#if _LIBCPP_STD_VER > 11 -template -#else -template -#endif -struct _LIBCPP_TEMPLATE_VIS bit_xor -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : binary_function<_Tp, _Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP - typedef _Tp __result_type; // used by valarray -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp first_argument_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp second_argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x, const _Tp& __y) const - {return __x ^ __y;} -}; - -#if _LIBCPP_STD_VER > 11 -template <> -struct _LIBCPP_TEMPLATE_VIS bit_xor -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_T1&& __t, _T2&& __u) const - _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u))) - -> decltype (_VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u)) - { return _VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u); } - typedef void is_transparent; -}; -#endif - - -#if _LIBCPP_STD_VER > 11 -_LIBCPP_SUPPRESS_DEPRECATED_PUSH -template -struct _LIBCPP_TEMPLATE_VIS bit_not -#if !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : unary_function<_Tp, _Tp> -#endif -{ -_LIBCPP_SUPPRESS_DEPRECATED_POP -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS) - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp result_type; - _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp argument_type; -#endif - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - _Tp operator()(const _Tp& __x) const - {return ~__x;} -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS bit_not -{ - template - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - auto operator()(_Tp&& __x) const - _NOEXCEPT_(noexcept(~_VSTD::forward<_Tp>(__x))) - -> decltype (~_VSTD::forward<_Tp>(__x)) - { return ~_VSTD::forward<_Tp>(__x); } - typedef void is_transparent; -}; -#endif - -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS) -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 unary_negate - : public unary_function -{ - _Predicate __pred_; -public: - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - explicit unary_negate(const _Predicate& __pred) - : __pred_(__pred) {} - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const typename _Predicate::argument_type& __x) const - {return !__pred_(__x);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX17 inline _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY -unary_negate<_Predicate> -not1(const _Predicate& __pred) {return unary_negate<_Predicate>(__pred);} - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 binary_negate - : public binary_function -{ - _Predicate __pred_; -public: - _LIBCPP_INLINE_VISIBILITY explicit _LIBCPP_CONSTEXPR_AFTER_CXX11 - binary_negate(const _Predicate& __pred) : __pred_(__pred) {} - - _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY - bool operator()(const typename _Predicate::first_argument_type& __x, - const typename _Predicate::second_argument_type& __y) const - {return !__pred_(__x, __y);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX17 inline _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY -binary_negate<_Predicate> -not2(const _Predicate& __pred) {return binary_negate<_Predicate>(__pred);} -#endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS) - -#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binder1st - : public unary_function -{ -protected: - __Operation op; - typename __Operation::first_argument_type value; -public: - _LIBCPP_INLINE_VISIBILITY binder1st(const __Operation& __x, - const typename __Operation::first_argument_type __y) - : op(__x), value(__y) {} - _LIBCPP_INLINE_VISIBILITY typename __Operation::result_type operator() - (typename __Operation::second_argument_type& __x) const - {return op(value, __x);} - _LIBCPP_INLINE_VISIBILITY typename __Operation::result_type operator() - (const typename __Operation::second_argument_type& __x) const - {return op(value, __x);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -binder1st<__Operation> -bind1st(const __Operation& __op, const _Tp& __x) - {return binder1st<__Operation>(__op, __x);} - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binder2nd - : public unary_function -{ -protected: - __Operation op; - typename __Operation::second_argument_type value; -public: - _LIBCPP_INLINE_VISIBILITY - binder2nd(const __Operation& __x, const typename __Operation::second_argument_type __y) - : op(__x), value(__y) {} - _LIBCPP_INLINE_VISIBILITY typename __Operation::result_type operator() - ( typename __Operation::first_argument_type& __x) const - {return op(__x, value);} - _LIBCPP_INLINE_VISIBILITY typename __Operation::result_type operator() - (const typename __Operation::first_argument_type& __x) const - {return op(__x, value);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -binder2nd<__Operation> -bind2nd(const __Operation& __op, const _Tp& __x) - {return binder2nd<__Operation>(__op, __x);} - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 pointer_to_unary_function - : public unary_function<_Arg, _Result> -{ - _Result (*__f_)(_Arg); -public: - _LIBCPP_INLINE_VISIBILITY explicit pointer_to_unary_function(_Result (*__f)(_Arg)) - : __f_(__f) {} - _LIBCPP_INLINE_VISIBILITY _Result operator()(_Arg __x) const - {return __f_(__x);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -pointer_to_unary_function<_Arg,_Result> -ptr_fun(_Result (*__f)(_Arg)) - {return pointer_to_unary_function<_Arg,_Result>(__f);} - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 pointer_to_binary_function - : public binary_function<_Arg1, _Arg2, _Result> -{ - _Result (*__f_)(_Arg1, _Arg2); -public: - _LIBCPP_INLINE_VISIBILITY explicit pointer_to_binary_function(_Result (*__f)(_Arg1, _Arg2)) - : __f_(__f) {} - _LIBCPP_INLINE_VISIBILITY _Result operator()(_Arg1 __x, _Arg2 __y) const - {return __f_(__x, __y);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -pointer_to_binary_function<_Arg1,_Arg2,_Result> -ptr_fun(_Result (*__f)(_Arg1,_Arg2)) - {return pointer_to_binary_function<_Arg1,_Arg2,_Result>(__f);} - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_t - : public unary_function<_Tp*, _Sp> -{ - _Sp (_Tp::*__p_)(); -public: - _LIBCPP_INLINE_VISIBILITY explicit mem_fun_t(_Sp (_Tp::*__p)()) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY _Sp operator()(_Tp* __p) const - {return (__p->*__p_)();} -}; - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_t - : public binary_function<_Tp*, _Ap, _Sp> -{ - _Sp (_Tp::*__p_)(_Ap); -public: - _LIBCPP_INLINE_VISIBILITY explicit mem_fun1_t(_Sp (_Tp::*__p)(_Ap)) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY _Sp operator()(_Tp* __p, _Ap __x) const - {return (__p->*__p_)(__x);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -mem_fun_t<_Sp,_Tp> -mem_fun(_Sp (_Tp::*__f)()) - {return mem_fun_t<_Sp,_Tp>(__f);} - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -mem_fun1_t<_Sp,_Tp,_Ap> -mem_fun(_Sp (_Tp::*__f)(_Ap)) - {return mem_fun1_t<_Sp,_Tp,_Ap>(__f);} - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_ref_t - : public unary_function<_Tp, _Sp> -{ - _Sp (_Tp::*__p_)(); -public: - _LIBCPP_INLINE_VISIBILITY explicit mem_fun_ref_t(_Sp (_Tp::*__p)()) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY _Sp operator()(_Tp& __p) const - {return (__p.*__p_)();} -}; - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_ref_t - : public binary_function<_Tp, _Ap, _Sp> -{ - _Sp (_Tp::*__p_)(_Ap); -public: - _LIBCPP_INLINE_VISIBILITY explicit mem_fun1_ref_t(_Sp (_Tp::*__p)(_Ap)) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY _Sp operator()(_Tp& __p, _Ap __x) const - {return (__p.*__p_)(__x);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -mem_fun_ref_t<_Sp,_Tp> -mem_fun_ref(_Sp (_Tp::*__f)()) - {return mem_fun_ref_t<_Sp,_Tp>(__f);} - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -mem_fun1_ref_t<_Sp,_Tp,_Ap> -mem_fun_ref(_Sp (_Tp::*__f)(_Ap)) - {return mem_fun1_ref_t<_Sp,_Tp,_Ap>(__f);} - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_t - : public unary_function -{ - _Sp (_Tp::*__p_)() const; -public: - _LIBCPP_INLINE_VISIBILITY explicit const_mem_fun_t(_Sp (_Tp::*__p)() const) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY _Sp operator()(const _Tp* __p) const - {return (__p->*__p_)();} -}; - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_t - : public binary_function -{ - _Sp (_Tp::*__p_)(_Ap) const; -public: - _LIBCPP_INLINE_VISIBILITY explicit const_mem_fun1_t(_Sp (_Tp::*__p)(_Ap) const) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY _Sp operator()(const _Tp* __p, _Ap __x) const - {return (__p->*__p_)(__x);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -const_mem_fun_t<_Sp,_Tp> -mem_fun(_Sp (_Tp::*__f)() const) - {return const_mem_fun_t<_Sp,_Tp>(__f);} - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -const_mem_fun1_t<_Sp,_Tp,_Ap> -mem_fun(_Sp (_Tp::*__f)(_Ap) const) - {return const_mem_fun1_t<_Sp,_Tp,_Ap>(__f);} - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_ref_t - : public unary_function<_Tp, _Sp> -{ - _Sp (_Tp::*__p_)() const; -public: - _LIBCPP_INLINE_VISIBILITY explicit const_mem_fun_ref_t(_Sp (_Tp::*__p)() const) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY _Sp operator()(const _Tp& __p) const - {return (__p.*__p_)();} -}; - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_ref_t - : public binary_function<_Tp, _Ap, _Sp> -{ - _Sp (_Tp::*__p_)(_Ap) const; -public: - _LIBCPP_INLINE_VISIBILITY explicit const_mem_fun1_ref_t(_Sp (_Tp::*__p)(_Ap) const) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY _Sp operator()(const _Tp& __p, _Ap __x) const - {return (__p.*__p_)(__x);} -}; - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -const_mem_fun_ref_t<_Sp,_Tp> -mem_fun_ref(_Sp (_Tp::*__f)() const) - {return const_mem_fun_ref_t<_Sp,_Tp>(__f);} - -template -_LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_INLINE_VISIBILITY -const_mem_fun1_ref_t<_Sp,_Tp,_Ap> -mem_fun_ref(_Sp (_Tp::*__f)(_Ap) const) - {return const_mem_fun1_ref_t<_Sp,_Tp,_Ap>(__f);} -#endif - -//////////////////////////////////////////////////////////////////////////////// -// MEMFUN -//============================================================================== - -template -class __mem_fn -#if _LIBCPP_STD_VER <= 17 || !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : public __weak_result_type<_Tp> -#endif -{ -public: - // types - typedef _Tp type; -private: - type __f_; - -public: - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - __mem_fn(type __f) _NOEXCEPT : __f_(__f) {} - -#ifndef _LIBCPP_CXX03_LANG - // invoke - template - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - typename __invoke_return::type - operator() (_ArgTypes&&... __args) const { - return _VSTD::__invoke(__f_, _VSTD::forward<_ArgTypes>(__args)...); - } -#else - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return0::type - operator() (_A0& __a0) const { - return _VSTD::__invoke(__f_, __a0); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return0::type - operator() (_A0 const& __a0) const { - return _VSTD::__invoke(__f_, __a0); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return1::type - operator() (_A0& __a0, _A1& __a1) const { - return _VSTD::__invoke(__f_, __a0, __a1); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return1::type - operator() (_A0 const& __a0, _A1& __a1) const { - return _VSTD::__invoke(__f_, __a0, __a1); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return1::type - operator() (_A0& __a0, _A1 const& __a1) const { - return _VSTD::__invoke(__f_, __a0, __a1); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return1::type - operator() (_A0 const& __a0, _A1 const& __a1) const { - return _VSTD::__invoke(__f_, __a0, __a1); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0& __a0, _A1& __a1, _A2& __a2) const { - return _VSTD::__invoke(__f_, __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0 const& __a0, _A1& __a1, _A2& __a2) const { - return _VSTD::__invoke(__f_, __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0& __a0, _A1 const& __a1, _A2& __a2) const { - return _VSTD::__invoke(__f_, __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0& __a0, _A1& __a1, _A2 const& __a2) const { - return _VSTD::__invoke(__f_, __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0 const& __a0, _A1 const& __a1, _A2& __a2) const { - return _VSTD::__invoke(__f_, __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0 const& __a0, _A1& __a1, _A2 const& __a2) const { - return _VSTD::__invoke(__f_, __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0& __a0, _A1 const& __a1, _A2 const& __a2) const { - return _VSTD::__invoke(__f_, __a0, __a1, __a2); - } - - template - _LIBCPP_INLINE_VISIBILITY - typename __invoke_return2::type - operator() (_A0 const& __a0, _A1 const& __a1, _A2 const& __a2) const { - return _VSTD::__invoke(__f_, __a0, __a1, __a2); - } -#endif -}; - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -__mem_fn<_Rp _Tp::*> -mem_fn(_Rp _Tp::* __pm) _NOEXCEPT -{ - return __mem_fn<_Rp _Tp::*>(__pm); -} - -//////////////////////////////////////////////////////////////////////////////// -// FUNCTION -//============================================================================== - -// bad_function_call - -class _LIBCPP_EXCEPTION_ABI bad_function_call - : public exception -{ -#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION -public: - virtual ~bad_function_call() _NOEXCEPT; - - virtual const char* what() const _NOEXCEPT; -#endif -}; - -_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY -void __throw_bad_function_call() -{ -#ifndef _LIBCPP_NO_EXCEPTIONS - throw bad_function_call(); -#else - _VSTD::abort(); -#endif -} - -#if defined(_LIBCPP_CXX03_LANG) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) && __has_attribute(deprecated) -# define _LIBCPP_DEPRECATED_CXX03_FUNCTION \ - __attribute__((deprecated("Using std::function in C++03 is not supported anymore. Please upgrade to C++11 or later, or use a different type"))) -#else -# define _LIBCPP_DEPRECATED_CXX03_FUNCTION /* nothing */ -#endif - -template class _LIBCPP_DEPRECATED_CXX03_FUNCTION _LIBCPP_TEMPLATE_VIS function; // undefined - -namespace __function -{ - -template -struct __maybe_derive_from_unary_function -{ -}; - -template -struct __maybe_derive_from_unary_function<_Rp(_A1)> - : public unary_function<_A1, _Rp> -{ -}; - -template -struct __maybe_derive_from_binary_function -{ -}; - -template -struct __maybe_derive_from_binary_function<_Rp(_A1, _A2)> - : public binary_function<_A1, _A2, _Rp> -{ -}; - -template -_LIBCPP_INLINE_VISIBILITY -bool __not_null(_Fp const&) { return true; } - -template -_LIBCPP_INLINE_VISIBILITY -bool __not_null(_Fp* __ptr) { return __ptr; } - -template -_LIBCPP_INLINE_VISIBILITY -bool __not_null(_Ret _Class::*__ptr) { return __ptr; } - -template -_LIBCPP_INLINE_VISIBILITY -bool __not_null(function<_Fp> const& __f) { return !!__f; } - -#ifdef _LIBCPP_HAS_EXTENSION_BLOCKS -template -_LIBCPP_INLINE_VISIBILITY -bool __not_null(_Rp (^__p)(_Args...)) { return __p; } -#endif - -} // namespace __function - -#ifndef _LIBCPP_CXX03_LANG - -namespace __function { - -// __alloc_func holds a functor and an allocator. - -template class __alloc_func; -template -class __default_alloc_func; - -template -class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)> -{ - __compressed_pair<_Fp, _Ap> __f_; - - public: - typedef _LIBCPP_NODEBUG_TYPE _Fp _Target; - typedef _LIBCPP_NODEBUG_TYPE _Ap _Alloc; - - _LIBCPP_INLINE_VISIBILITY - const _Target& __target() const { return __f_.first(); } - - // WIN32 APIs may define __allocator, so use __get_allocator instead. - _LIBCPP_INLINE_VISIBILITY - const _Alloc& __get_allocator() const { return __f_.second(); } - - _LIBCPP_INLINE_VISIBILITY - explicit __alloc_func(_Target&& __f) - : __f_(piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__f)), - _VSTD::forward_as_tuple()) - { - } - - _LIBCPP_INLINE_VISIBILITY - explicit __alloc_func(const _Target& __f, const _Alloc& __a) - : __f_(piecewise_construct, _VSTD::forward_as_tuple(__f), - _VSTD::forward_as_tuple(__a)) - { - } - - _LIBCPP_INLINE_VISIBILITY - explicit __alloc_func(const _Target& __f, _Alloc&& __a) - : __f_(piecewise_construct, _VSTD::forward_as_tuple(__f), - _VSTD::forward_as_tuple(_VSTD::move(__a))) - { - } - - _LIBCPP_INLINE_VISIBILITY - explicit __alloc_func(_Target&& __f, _Alloc&& __a) - : __f_(piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__f)), - _VSTD::forward_as_tuple(_VSTD::move(__a))) - { - } - - _LIBCPP_INLINE_VISIBILITY - _Rp operator()(_ArgTypes&&... __arg) - { - typedef __invoke_void_return_wrapper<_Rp> _Invoker; - return _Invoker::__call(__f_.first(), - _VSTD::forward<_ArgTypes>(__arg)...); - } - - _LIBCPP_INLINE_VISIBILITY - __alloc_func* __clone() const - { - typedef allocator_traits<_Alloc> __alloc_traits; - typedef - typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type - _AA; - _AA __a(__f_.second()); - typedef __allocator_destructor<_AA> _Dp; - unique_ptr<__alloc_func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new ((void*)__hold.get()) __alloc_func(__f_.first(), _Alloc(__a)); - return __hold.release(); - } - - _LIBCPP_INLINE_VISIBILITY - void destroy() _NOEXCEPT { __f_.~__compressed_pair<_Target, _Alloc>(); } - - static void __destroy_and_delete(__alloc_func* __f) { - typedef allocator_traits<_Alloc> __alloc_traits; - typedef typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type - _FunAlloc; - _FunAlloc __a(__f->__get_allocator()); - __f->destroy(); - __a.deallocate(__f, 1); - } -}; - -template -class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> { - _Fp __f_; - -public: - typedef _LIBCPP_NODEBUG_TYPE _Fp _Target; - - _LIBCPP_INLINE_VISIBILITY - const _Target& __target() const { return __f_; } - - _LIBCPP_INLINE_VISIBILITY - explicit __default_alloc_func(_Target&& __f) : __f_(_VSTD::move(__f)) {} - - _LIBCPP_INLINE_VISIBILITY - explicit __default_alloc_func(const _Target& __f) : __f_(__f) {} - - _LIBCPP_INLINE_VISIBILITY - _Rp operator()(_ArgTypes&&... __arg) { - typedef __invoke_void_return_wrapper<_Rp> _Invoker; - return _Invoker::__call(__f_, _VSTD::forward<_ArgTypes>(__arg)...); - } - - _LIBCPP_INLINE_VISIBILITY - __default_alloc_func* __clone() const { - __builtin_new_allocator::__holder_t __hold = - __builtin_new_allocator::__allocate_type<__default_alloc_func>(1); - __default_alloc_func* __res = - ::new ((void*)__hold.get()) __default_alloc_func(__f_); - (void)__hold.release(); - return __res; - } - - _LIBCPP_INLINE_VISIBILITY - void destroy() _NOEXCEPT { __f_.~_Target(); } - - static void __destroy_and_delete(__default_alloc_func* __f) { - __f->destroy(); - __builtin_new_allocator::__deallocate_type<__default_alloc_func>(__f, 1); - } -}; - -// __base provides an abstract interface for copyable functors. - -template class _LIBCPP_TEMPLATE_VIS __base; - -template -class __base<_Rp(_ArgTypes...)> -{ - __base(const __base&); - __base& operator=(const __base&); -public: - _LIBCPP_INLINE_VISIBILITY __base() {} - _LIBCPP_INLINE_VISIBILITY virtual ~__base() {} - virtual __base* __clone() const = 0; - virtual void __clone(__base*) const = 0; - virtual void destroy() _NOEXCEPT = 0; - virtual void destroy_deallocate() _NOEXCEPT = 0; - virtual _Rp operator()(_ArgTypes&& ...) = 0; -#ifndef _LIBCPP_NO_RTTI - virtual const void* target(const type_info&) const _NOEXCEPT = 0; - virtual const std::type_info& target_type() const _NOEXCEPT = 0; -#endif // _LIBCPP_NO_RTTI -}; - -// __func implements __base for a given functor type. - -template class __func; - -template -class __func<_Fp, _Alloc, _Rp(_ArgTypes...)> - : public __base<_Rp(_ArgTypes...)> -{ - __alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> __f_; -public: - _LIBCPP_INLINE_VISIBILITY - explicit __func(_Fp&& __f) - : __f_(_VSTD::move(__f)) {} - - _LIBCPP_INLINE_VISIBILITY - explicit __func(const _Fp& __f, const _Alloc& __a) - : __f_(__f, __a) {} - - _LIBCPP_INLINE_VISIBILITY - explicit __func(const _Fp& __f, _Alloc&& __a) - : __f_(__f, _VSTD::move(__a)) {} - - _LIBCPP_INLINE_VISIBILITY - explicit __func(_Fp&& __f, _Alloc&& __a) - : __f_(_VSTD::move(__f), _VSTD::move(__a)) {} - - virtual __base<_Rp(_ArgTypes...)>* __clone() const; - virtual void __clone(__base<_Rp(_ArgTypes...)>*) const; - virtual void destroy() _NOEXCEPT; - virtual void destroy_deallocate() _NOEXCEPT; - virtual _Rp operator()(_ArgTypes&&... __arg); -#ifndef _LIBCPP_NO_RTTI - virtual const void* target(const type_info&) const _NOEXCEPT; - virtual const std::type_info& target_type() const _NOEXCEPT; -#endif // _LIBCPP_NO_RTTI -}; - -template -__base<_Rp(_ArgTypes...)>* -__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone() const -{ - typedef allocator_traits<_Alloc> __alloc_traits; - typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap; - _Ap __a(__f_.__get_allocator()); - typedef __allocator_destructor<_Ap> _Dp; - unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new ((void*)__hold.get()) __func(__f_.__target(), _Alloc(__a)); - return __hold.release(); -} - -template -void -__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone(__base<_Rp(_ArgTypes...)>* __p) const -{ - ::new ((void*)__p) __func(__f_.__target(), __f_.__get_allocator()); -} - -template -void -__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy() _NOEXCEPT -{ - __f_.destroy(); -} - -template -void -__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy_deallocate() _NOEXCEPT -{ - typedef allocator_traits<_Alloc> __alloc_traits; - typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap; - _Ap __a(__f_.__get_allocator()); - __f_.destroy(); - __a.deallocate(this, 1); -} - -template -_Rp -__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::operator()(_ArgTypes&& ... __arg) -{ - return __f_(_VSTD::forward<_ArgTypes>(__arg)...); -} - -#ifndef _LIBCPP_NO_RTTI - -template -const void* -__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target(const type_info& __ti) const _NOEXCEPT -{ - if (__ti == typeid(_Fp)) - return &__f_.__target(); - return nullptr; -} - -template -const std::type_info& -__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target_type() const _NOEXCEPT -{ - return typeid(_Fp); -} - -#endif // _LIBCPP_NO_RTTI - -// __value_func creates a value-type from a __func. - -template class __value_func; - -template class __value_func<_Rp(_ArgTypes...)> -{ - typename aligned_storage<3 * sizeof(void*)>::type __buf_; - - typedef __base<_Rp(_ArgTypes...)> __func; - __func* __f_; - - _LIBCPP_NO_CFI static __func* __as_base(void* p) - { - return reinterpret_cast<__func*>(p); - } - - public: - _LIBCPP_INLINE_VISIBILITY - __value_func() _NOEXCEPT : __f_(nullptr) {} - - template - _LIBCPP_INLINE_VISIBILITY __value_func(_Fp&& __f, const _Alloc& __a) - : __f_(nullptr) - { - typedef allocator_traits<_Alloc> __alloc_traits; - typedef __function::__func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun; - typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type - _FunAlloc; - - if (__function::__not_null(__f)) - { - _FunAlloc __af(__a); - if (sizeof(_Fun) <= sizeof(__buf_) && - is_nothrow_copy_constructible<_Fp>::value && - is_nothrow_copy_constructible<_FunAlloc>::value) - { - __f_ = - ::new ((void*)&__buf_) _Fun(_VSTD::move(__f), _Alloc(__af)); - } - else - { - typedef __allocator_destructor<_FunAlloc> _Dp; - unique_ptr<__func, _Dp> __hold(__af.allocate(1), _Dp(__af, 1)); - ::new ((void*)__hold.get()) _Fun(_VSTD::move(__f), _Alloc(__a)); - __f_ = __hold.release(); - } - } - } - - template ::type, __value_func>::value>::type> - _LIBCPP_INLINE_VISIBILITY explicit __value_func(_Fp&& __f) - : __value_func(_VSTD::forward<_Fp>(__f), allocator<_Fp>()) {} - - _LIBCPP_INLINE_VISIBILITY - __value_func(const __value_func& __f) - { - if (__f.__f_ == nullptr) - __f_ = nullptr; - else if ((void*)__f.__f_ == &__f.__buf_) - { - __f_ = __as_base(&__buf_); - __f.__f_->__clone(__f_); - } - else - __f_ = __f.__f_->__clone(); - } - - _LIBCPP_INLINE_VISIBILITY - __value_func(__value_func&& __f) _NOEXCEPT - { - if (__f.__f_ == nullptr) - __f_ = nullptr; - else if ((void*)__f.__f_ == &__f.__buf_) - { - __f_ = __as_base(&__buf_); - __f.__f_->__clone(__f_); - } - else - { - __f_ = __f.__f_; - __f.__f_ = nullptr; - } - } - - _LIBCPP_INLINE_VISIBILITY - ~__value_func() - { - if ((void*)__f_ == &__buf_) - __f_->destroy(); - else if (__f_) - __f_->destroy_deallocate(); - } - - _LIBCPP_INLINE_VISIBILITY - __value_func& operator=(__value_func&& __f) - { - *this = nullptr; - if (__f.__f_ == nullptr) - __f_ = nullptr; - else if ((void*)__f.__f_ == &__f.__buf_) - { - __f_ = __as_base(&__buf_); - __f.__f_->__clone(__f_); - } - else - { - __f_ = __f.__f_; - __f.__f_ = nullptr; - } - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - __value_func& operator=(nullptr_t) - { - __func* __f = __f_; - __f_ = nullptr; - if ((void*)__f == &__buf_) - __f->destroy(); - else if (__f) - __f->destroy_deallocate(); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - _Rp operator()(_ArgTypes&&... __args) const - { - if (__f_ == nullptr) - __throw_bad_function_call(); - return (*__f_)(_VSTD::forward<_ArgTypes>(__args)...); - } - - _LIBCPP_INLINE_VISIBILITY - void swap(__value_func& __f) _NOEXCEPT - { - if (&__f == this) - return; - if ((void*)__f_ == &__buf_ && (void*)__f.__f_ == &__f.__buf_) - { - typename aligned_storage::type __tempbuf; - __func* __t = __as_base(&__tempbuf); - __f_->__clone(__t); - __f_->destroy(); - __f_ = nullptr; - __f.__f_->__clone(__as_base(&__buf_)); - __f.__f_->destroy(); - __f.__f_ = nullptr; - __f_ = __as_base(&__buf_); - __t->__clone(__as_base(&__f.__buf_)); - __t->destroy(); - __f.__f_ = __as_base(&__f.__buf_); - } - else if ((void*)__f_ == &__buf_) - { - __f_->__clone(__as_base(&__f.__buf_)); - __f_->destroy(); - __f_ = __f.__f_; - __f.__f_ = __as_base(&__f.__buf_); - } - else if ((void*)__f.__f_ == &__f.__buf_) - { - __f.__f_->__clone(__as_base(&__buf_)); - __f.__f_->destroy(); - __f.__f_ = __f_; - __f_ = __as_base(&__buf_); - } - else - _VSTD::swap(__f_, __f.__f_); - } - - _LIBCPP_INLINE_VISIBILITY - explicit operator bool() const _NOEXCEPT { return __f_ != nullptr; } - -#ifndef _LIBCPP_NO_RTTI - _LIBCPP_INLINE_VISIBILITY - const std::type_info& target_type() const _NOEXCEPT - { - if (__f_ == nullptr) - return typeid(void); - return __f_->target_type(); - } - - template - _LIBCPP_INLINE_VISIBILITY const _Tp* target() const _NOEXCEPT - { - if (__f_ == nullptr) - return nullptr; - return (const _Tp*)__f_->target(typeid(_Tp)); - } -#endif // _LIBCPP_NO_RTTI -}; - -// Storage for a functor object, to be used with __policy to manage copy and -// destruction. -union __policy_storage -{ - mutable char __small[sizeof(void*) * 2]; - void* __large; -}; - -// True if _Fun can safely be held in __policy_storage.__small. -template -struct __use_small_storage - : public integral_constant< - bool, sizeof(_Fun) <= sizeof(__policy_storage) && - _LIBCPP_ALIGNOF(_Fun) <= _LIBCPP_ALIGNOF(__policy_storage) && - is_trivially_copy_constructible<_Fun>::value && - is_trivially_destructible<_Fun>::value> {}; - -// Policy contains information about how to copy, destroy, and move the -// underlying functor. You can think of it as a vtable of sorts. -struct __policy -{ - // Used to copy or destroy __large values. null for trivial objects. - void* (*const __clone)(const void*); - void (*const __destroy)(void*); - - // True if this is the null policy (no value). - const bool __is_null; - - // The target type. May be null if RTTI is disabled. - const std::type_info* const __type_info; - - // Returns a pointer to a static policy object suitable for the functor - // type. - template - _LIBCPP_INLINE_VISIBILITY static const __policy* __create() - { - return __choose_policy<_Fun>(__use_small_storage<_Fun>()); - } - - _LIBCPP_INLINE_VISIBILITY - static const __policy* __create_empty() - { - static const _LIBCPP_CONSTEXPR __policy __policy_ = {nullptr, nullptr, - true, -#ifndef _LIBCPP_NO_RTTI - &typeid(void) -#else - nullptr -#endif - }; - return &__policy_; - } - - private: - template static void* __large_clone(const void* __s) - { - const _Fun* __f = static_cast(__s); - return __f->__clone(); - } - - template - static void __large_destroy(void* __s) { - _Fun::__destroy_and_delete(static_cast<_Fun*>(__s)); - } - - template - _LIBCPP_INLINE_VISIBILITY static const __policy* - __choose_policy(/* is_small = */ false_type) { - static const _LIBCPP_CONSTEXPR __policy __policy_ = { - &__large_clone<_Fun>, &__large_destroy<_Fun>, false, -#ifndef _LIBCPP_NO_RTTI - &typeid(typename _Fun::_Target) -#else - nullptr -#endif - }; - return &__policy_; - } - - template - _LIBCPP_INLINE_VISIBILITY static const __policy* - __choose_policy(/* is_small = */ true_type) - { - static const _LIBCPP_CONSTEXPR __policy __policy_ = { - nullptr, nullptr, false, -#ifndef _LIBCPP_NO_RTTI - &typeid(typename _Fun::_Target) -#else - nullptr -#endif - }; - return &__policy_; - } -}; - -// Used to choose between perfect forwarding or pass-by-value. Pass-by-value is -// faster for types that can be passed in registers. -template -using __fast_forward = - typename conditional::value, _Tp, _Tp&&>::type; - -// __policy_invoker calls an instance of __alloc_func held in __policy_storage. - -template struct __policy_invoker; - -template -struct __policy_invoker<_Rp(_ArgTypes...)> -{ - typedef _Rp (*__Call)(const __policy_storage*, - __fast_forward<_ArgTypes>...); - - __Call __call_; - - // Creates an invoker that throws bad_function_call. - _LIBCPP_INLINE_VISIBILITY - __policy_invoker() : __call_(&__call_empty) {} - - // Creates an invoker that calls the given instance of __func. - template - _LIBCPP_INLINE_VISIBILITY static __policy_invoker __create() - { - return __policy_invoker(&__call_impl<_Fun>); - } - - private: - _LIBCPP_INLINE_VISIBILITY - explicit __policy_invoker(__Call __c) : __call_(__c) {} - - static _Rp __call_empty(const __policy_storage*, - __fast_forward<_ArgTypes>...) - { - __throw_bad_function_call(); - } - - template - static _Rp __call_impl(const __policy_storage* __buf, - __fast_forward<_ArgTypes>... __args) - { - _Fun* __f = reinterpret_cast<_Fun*>(__use_small_storage<_Fun>::value - ? &__buf->__small - : __buf->__large); - return (*__f)(_VSTD::forward<_ArgTypes>(__args)...); - } -}; - -// __policy_func uses a __policy and __policy_invoker to create a type-erased, -// copyable functor. - -template class __policy_func; - -template class __policy_func<_Rp(_ArgTypes...)> -{ - // Inline storage for small objects. - __policy_storage __buf_; - - // Calls the value stored in __buf_. This could technically be part of - // policy, but storing it here eliminates a level of indirection inside - // operator(). - typedef __function::__policy_invoker<_Rp(_ArgTypes...)> __invoker; - __invoker __invoker_; - - // The policy that describes how to move / copy / destroy __buf_. Never - // null, even if the function is empty. - const __policy* __policy_; - - public: - _LIBCPP_INLINE_VISIBILITY - __policy_func() : __policy_(__policy::__create_empty()) {} - - template - _LIBCPP_INLINE_VISIBILITY __policy_func(_Fp&& __f, const _Alloc& __a) - : __policy_(__policy::__create_empty()) - { - typedef __alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun; - typedef allocator_traits<_Alloc> __alloc_traits; - typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type - _FunAlloc; - - if (__function::__not_null(__f)) - { - __invoker_ = __invoker::template __create<_Fun>(); - __policy_ = __policy::__create<_Fun>(); - - _FunAlloc __af(__a); - if (__use_small_storage<_Fun>()) - { - ::new ((void*)&__buf_.__small) - _Fun(_VSTD::move(__f), _Alloc(__af)); - } - else - { - typedef __allocator_destructor<_FunAlloc> _Dp; - unique_ptr<_Fun, _Dp> __hold(__af.allocate(1), _Dp(__af, 1)); - ::new ((void*)__hold.get()) - _Fun(_VSTD::move(__f), _Alloc(__af)); - __buf_.__large = __hold.release(); - } - } - } - - template ::type, __policy_func>::value>::type> - _LIBCPP_INLINE_VISIBILITY explicit __policy_func(_Fp&& __f) - : __policy_(__policy::__create_empty()) { - typedef __default_alloc_func<_Fp, _Rp(_ArgTypes...)> _Fun; - - if (__function::__not_null(__f)) { - __invoker_ = __invoker::template __create<_Fun>(); - __policy_ = __policy::__create<_Fun>(); - if (__use_small_storage<_Fun>()) { - ::new ((void*)&__buf_.__small) _Fun(_VSTD::move(__f)); - } else { - __builtin_new_allocator::__holder_t __hold = - __builtin_new_allocator::__allocate_type<_Fun>(1); - __buf_.__large = ::new ((void*)__hold.get()) _Fun(_VSTD::move(__f)); - (void)__hold.release(); - } - } - } - - _LIBCPP_INLINE_VISIBILITY - __policy_func(const __policy_func& __f) - : __buf_(__f.__buf_), __invoker_(__f.__invoker_), - __policy_(__f.__policy_) - { - if (__policy_->__clone) - __buf_.__large = __policy_->__clone(__f.__buf_.__large); - } - - _LIBCPP_INLINE_VISIBILITY - __policy_func(__policy_func&& __f) - : __buf_(__f.__buf_), __invoker_(__f.__invoker_), - __policy_(__f.__policy_) - { - if (__policy_->__destroy) - { - __f.__policy_ = __policy::__create_empty(); - __f.__invoker_ = __invoker(); - } - } - - _LIBCPP_INLINE_VISIBILITY - ~__policy_func() - { - if (__policy_->__destroy) - __policy_->__destroy(__buf_.__large); - } - - _LIBCPP_INLINE_VISIBILITY - __policy_func& operator=(__policy_func&& __f) - { - *this = nullptr; - __buf_ = __f.__buf_; - __invoker_ = __f.__invoker_; - __policy_ = __f.__policy_; - __f.__policy_ = __policy::__create_empty(); - __f.__invoker_ = __invoker(); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - __policy_func& operator=(nullptr_t) - { - const __policy* __p = __policy_; - __policy_ = __policy::__create_empty(); - __invoker_ = __invoker(); - if (__p->__destroy) - __p->__destroy(__buf_.__large); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - _Rp operator()(_ArgTypes&&... __args) const - { - return __invoker_.__call_(_VSTD::addressof(__buf_), - _VSTD::forward<_ArgTypes>(__args)...); - } - - _LIBCPP_INLINE_VISIBILITY - void swap(__policy_func& __f) - { - _VSTD::swap(__invoker_, __f.__invoker_); - _VSTD::swap(__policy_, __f.__policy_); - _VSTD::swap(__buf_, __f.__buf_); - } - - _LIBCPP_INLINE_VISIBILITY - explicit operator bool() const _NOEXCEPT - { - return !__policy_->__is_null; - } - -#ifndef _LIBCPP_NO_RTTI - _LIBCPP_INLINE_VISIBILITY - const std::type_info& target_type() const _NOEXCEPT - { - return *__policy_->__type_info; - } - - template - _LIBCPP_INLINE_VISIBILITY const _Tp* target() const _NOEXCEPT - { - if (__policy_->__is_null || typeid(_Tp) != *__policy_->__type_info) - return nullptr; - if (__policy_->__clone) // Out of line storage. - return reinterpret_cast(__buf_.__large); - else - return reinterpret_cast(&__buf_.__small); - } -#endif // _LIBCPP_NO_RTTI -}; - -#if defined(_LIBCPP_HAS_BLOCKS_RUNTIME) && !defined(_LIBCPP_HAS_OBJC_ARC) - -extern "C" void *_Block_copy(const void *); -extern "C" void _Block_release(const void *); - -template -class __func<_Rp1(^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> - : public __base<_Rp(_ArgTypes...)> -{ - typedef _Rp1(^__block_type)(_ArgTypes1...); - __block_type __f_; - -public: - _LIBCPP_INLINE_VISIBILITY - explicit __func(__block_type const& __f) - : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) - { } - - // [TODO] add && to save on a retain - - _LIBCPP_INLINE_VISIBILITY - explicit __func(__block_type __f, const _Alloc& /* unused */) - : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) - { } - - virtual __base<_Rp(_ArgTypes...)>* __clone() const { - _LIBCPP_ASSERT(false, - "Block pointers are just pointers, so they should always fit into " - "std::function's small buffer optimization. This function should " - "never be invoked."); - return nullptr; - } - - virtual void __clone(__base<_Rp(_ArgTypes...)>* __p) const { - ::new ((void*)__p) __func(__f_); - } - - virtual void destroy() _NOEXCEPT { - if (__f_) - _Block_release(__f_); - __f_ = 0; - } - - virtual void destroy_deallocate() _NOEXCEPT { - _LIBCPP_ASSERT(false, - "Block pointers are just pointers, so they should always fit into " - "std::function's small buffer optimization. This function should " - "never be invoked."); - } - - virtual _Rp operator()(_ArgTypes&& ... __arg) { - return _VSTD::__invoke(__f_, _VSTD::forward<_ArgTypes>(__arg)...); - } - -#ifndef _LIBCPP_NO_RTTI - virtual const void* target(type_info const& __ti) const _NOEXCEPT { - if (__ti == typeid(__func::__block_type)) - return &__f_; - return (const void*)nullptr; - } - - virtual const std::type_info& target_type() const _NOEXCEPT { - return typeid(__func::__block_type); - } -#endif // _LIBCPP_NO_RTTI -}; - -#endif // _LIBCPP_HAS_EXTENSION_BLOCKS && !_LIBCPP_HAS_OBJC_ARC - -} // __function - -template -class _LIBCPP_TEMPLATE_VIS function<_Rp(_ArgTypes...)> -#if _LIBCPP_STD_VER <= 17 || !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : public __function::__maybe_derive_from_unary_function<_Rp(_ArgTypes...)>, - public __function::__maybe_derive_from_binary_function<_Rp(_ArgTypes...)> -#endif -{ -#ifndef _LIBCPP_ABI_OPTIMIZED_FUNCTION - typedef __function::__value_func<_Rp(_ArgTypes...)> __func; -#else - typedef __function::__policy_func<_Rp(_ArgTypes...)> __func; -#endif - - __func __f_; - - template , function>, - __invokable<_Fp, _ArgTypes...> - >::value> - struct __callable; - template - struct __callable<_Fp, true> - { - static const bool value = is_void<_Rp>::value || - __is_core_convertible::type, - _Rp>::value; - }; - template - struct __callable<_Fp, false> - { - static const bool value = false; - }; - - template - using _EnableIfLValueCallable = typename enable_if<__callable<_Fp&>::value>::type; -public: - typedef _Rp result_type; - - // construct/copy/destroy: - _LIBCPP_INLINE_VISIBILITY - function() _NOEXCEPT { } - _LIBCPP_INLINE_VISIBILITY - function(nullptr_t) _NOEXCEPT {} - function(const function&); - function(function&&) _NOEXCEPT; - template> - function(_Fp); - -#if _LIBCPP_STD_VER <= 14 - template - _LIBCPP_INLINE_VISIBILITY - function(allocator_arg_t, const _Alloc&) _NOEXCEPT {} - template - _LIBCPP_INLINE_VISIBILITY - function(allocator_arg_t, const _Alloc&, nullptr_t) _NOEXCEPT {} - template - function(allocator_arg_t, const _Alloc&, const function&); - template - function(allocator_arg_t, const _Alloc&, function&&); - template> - function(allocator_arg_t, const _Alloc& __a, _Fp __f); -#endif - - function& operator=(const function&); - function& operator=(function&&) _NOEXCEPT; - function& operator=(nullptr_t) _NOEXCEPT; - template::type>> - function& operator=(_Fp&&); - - ~function(); - - // function modifiers: - void swap(function&) _NOEXCEPT; - -#if _LIBCPP_STD_VER <= 14 - template - _LIBCPP_INLINE_VISIBILITY - void assign(_Fp&& __f, const _Alloc& __a) - {function(allocator_arg, __a, _VSTD::forward<_Fp>(__f)).swap(*this);} -#endif - - // function capacity: - _LIBCPP_INLINE_VISIBILITY - explicit operator bool() const _NOEXCEPT { - return static_cast(__f_); - } - - // deleted overloads close possible hole in the type system - template - bool operator==(const function<_R2(_ArgTypes2...)>&) const = delete; - template - bool operator!=(const function<_R2(_ArgTypes2...)>&) const = delete; -public: - // function invocation: - _Rp operator()(_ArgTypes...) const; - -#ifndef _LIBCPP_NO_RTTI - // function target access: - const std::type_info& target_type() const _NOEXCEPT; - template _Tp* target() _NOEXCEPT; - template const _Tp* target() const _NOEXCEPT; -#endif // _LIBCPP_NO_RTTI -}; - -#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES -template -function(_Rp(*)(_Ap...)) -> function<_Rp(_Ap...)>; - -template -struct __strip_signature; - -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...)> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) const> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile> { using type = _Rp(_Ap...); }; - -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) &> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) const &> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile &> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile &> { using type = _Rp(_Ap...); }; - -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) noexcept> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) const noexcept> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile noexcept> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile noexcept> { using type = _Rp(_Ap...); }; - -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) & noexcept> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) const & noexcept> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile & noexcept> { using type = _Rp(_Ap...); }; -template -struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile & noexcept> { using type = _Rp(_Ap...); }; - -template::type> -function(_Fp) -> function<_Stripped>; -#endif // !_LIBCPP_HAS_NO_DEDUCTION_GUIDES - -template -function<_Rp(_ArgTypes...)>::function(const function& __f) : __f_(__f.__f_) {} - -#if _LIBCPP_STD_VER <= 14 -template -template -function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&, - const function& __f) : __f_(__f.__f_) {} -#endif - -template -function<_Rp(_ArgTypes...)>::function(function&& __f) _NOEXCEPT - : __f_(_VSTD::move(__f.__f_)) {} - -#if _LIBCPP_STD_VER <= 14 -template -template -function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&, - function&& __f) - : __f_(_VSTD::move(__f.__f_)) {} -#endif - -template -template -function<_Rp(_ArgTypes...)>::function(_Fp __f) : __f_(_VSTD::move(__f)) {} - -#if _LIBCPP_STD_VER <= 14 -template -template -function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc& __a, - _Fp __f) - : __f_(_VSTD::move(__f), __a) {} -#endif - -template -function<_Rp(_ArgTypes...)>& -function<_Rp(_ArgTypes...)>::operator=(const function& __f) -{ - function(__f).swap(*this); - return *this; -} - -template -function<_Rp(_ArgTypes...)>& -function<_Rp(_ArgTypes...)>::operator=(function&& __f) _NOEXCEPT -{ - __f_ = _VSTD::move(__f.__f_); - return *this; -} - -template -function<_Rp(_ArgTypes...)>& -function<_Rp(_ArgTypes...)>::operator=(nullptr_t) _NOEXCEPT -{ - __f_ = nullptr; - return *this; -} - -template -template -function<_Rp(_ArgTypes...)>& -function<_Rp(_ArgTypes...)>::operator=(_Fp&& __f) -{ - function(_VSTD::forward<_Fp>(__f)).swap(*this); - return *this; -} - -template -function<_Rp(_ArgTypes...)>::~function() {} - -template -void -function<_Rp(_ArgTypes...)>::swap(function& __f) _NOEXCEPT -{ - __f_.swap(__f.__f_); -} - -template -_Rp -function<_Rp(_ArgTypes...)>::operator()(_ArgTypes... __arg) const -{ - return __f_(_VSTD::forward<_ArgTypes>(__arg)...); -} - -#ifndef _LIBCPP_NO_RTTI - -template -const std::type_info& -function<_Rp(_ArgTypes...)>::target_type() const _NOEXCEPT -{ - return __f_.target_type(); -} - -template -template -_Tp* -function<_Rp(_ArgTypes...)>::target() _NOEXCEPT -{ - return (_Tp*)(__f_.template target<_Tp>()); -} - -template -template -const _Tp* -function<_Rp(_ArgTypes...)>::target() const _NOEXCEPT -{ - return __f_.template target<_Tp>(); -} - -#endif // _LIBCPP_NO_RTTI - -template -inline _LIBCPP_INLINE_VISIBILITY -bool -operator==(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT {return !__f;} - -template -inline _LIBCPP_INLINE_VISIBILITY -bool -operator==(nullptr_t, const function<_Rp(_ArgTypes...)>& __f) _NOEXCEPT {return !__f;} - -template -inline _LIBCPP_INLINE_VISIBILITY -bool -operator!=(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT {return (bool)__f;} - -template -inline _LIBCPP_INLINE_VISIBILITY -bool -operator!=(nullptr_t, const function<_Rp(_ArgTypes...)>& __f) _NOEXCEPT {return (bool)__f;} - -template -inline _LIBCPP_INLINE_VISIBILITY -void -swap(function<_Rp(_ArgTypes...)>& __x, function<_Rp(_ArgTypes...)>& __y) _NOEXCEPT -{return __x.swap(__y);} - -#else // _LIBCPP_CXX03_LANG - -#include <__functional_03> - -#endif - -//////////////////////////////////////////////////////////////////////////////// -// BIND -//============================================================================== - -template struct __is_bind_expression : public false_type {}; -template struct _LIBCPP_TEMPLATE_VIS is_bind_expression - : public __is_bind_expression::type> {}; - -#if _LIBCPP_STD_VER > 14 -template -_LIBCPP_INLINE_VAR constexpr size_t is_bind_expression_v = is_bind_expression<_Tp>::value; -#endif - -template struct __is_placeholder : public integral_constant {}; -template struct _LIBCPP_TEMPLATE_VIS is_placeholder - : public __is_placeholder::type> {}; - -#if _LIBCPP_STD_VER > 14 -template -_LIBCPP_INLINE_VAR constexpr size_t is_placeholder_v = is_placeholder<_Tp>::value; -#endif - -namespace placeholders -{ - -template struct __ph {}; - -#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) -_LIBCPP_FUNC_VIS extern const __ph<1> _1; -_LIBCPP_FUNC_VIS extern const __ph<2> _2; -_LIBCPP_FUNC_VIS extern const __ph<3> _3; -_LIBCPP_FUNC_VIS extern const __ph<4> _4; -_LIBCPP_FUNC_VIS extern const __ph<5> _5; -_LIBCPP_FUNC_VIS extern const __ph<6> _6; -_LIBCPP_FUNC_VIS extern const __ph<7> _7; -_LIBCPP_FUNC_VIS extern const __ph<8> _8; -_LIBCPP_FUNC_VIS extern const __ph<9> _9; -_LIBCPP_FUNC_VIS extern const __ph<10> _10; -#else -/* _LIBCPP_INLINE_VAR */ constexpr __ph<1> _1{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<2> _2{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<3> _3{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<4> _4{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<5> _5{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<6> _6{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<7> _7{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<8> _8{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<9> _9{}; -/* _LIBCPP_INLINE_VAR */ constexpr __ph<10> _10{}; -#endif // defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY) - -} // placeholders - -template -struct __is_placeholder > - : public integral_constant {}; - - -#ifndef _LIBCPP_CXX03_LANG - -template -inline _LIBCPP_INLINE_VISIBILITY -_Tp& -__mu(reference_wrapper<_Tp> __t, _Uj&) -{ - return __t.get(); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __invoke_of<_Ti&, _Uj...>::type -__mu_expand(_Ti& __ti, tuple<_Uj...>& __uj, __tuple_indices<_Indx...>) -{ - return __ti(_VSTD::forward<_Uj>(_VSTD::get<_Indx>(__uj))...); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename _EnableIf -< - is_bind_expression<_Ti>::value, - __invoke_of<_Ti&, _Uj...> ->::type -__mu(_Ti& __ti, tuple<_Uj...>& __uj) -{ - typedef typename __make_tuple_indices::type __indices; - return _VSTD::__mu_expand(__ti, __uj, __indices()); -} - -template -struct __mu_return2 {}; - -template -struct __mu_return2 -{ - typedef typename tuple_element::value - 1, _Uj>::type type; -}; - -template -inline _LIBCPP_INLINE_VISIBILITY -typename enable_if -< - 0 < is_placeholder<_Ti>::value, - typename __mu_return2<0 < is_placeholder<_Ti>::value, _Ti, _Uj>::type ->::type -__mu(_Ti&, _Uj& __uj) -{ - const size_t _Indx = is_placeholder<_Ti>::value - 1; - return _VSTD::forward::type>(_VSTD::get<_Indx>(__uj)); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -typename enable_if -< - !is_bind_expression<_Ti>::value && - is_placeholder<_Ti>::value == 0 && - !__is_reference_wrapper<_Ti>::value, - _Ti& ->::type -__mu(_Ti& __ti, _Uj&) -{ - return __ti; -} - -template -struct __mu_return_impl; - -template -struct __mu_return_invokable // false -{ - typedef __nat type; -}; - -template -struct __mu_return_invokable -{ - typedef typename __invoke_of<_Ti&, _Uj...>::type type; -}; - -template -struct __mu_return_impl<_Ti, false, true, false, tuple<_Uj...> > - : public __mu_return_invokable<__invokable<_Ti&, _Uj...>::value, _Ti, _Uj...> -{ -}; - -template -struct __mu_return_impl<_Ti, false, false, true, _TupleUj> -{ - typedef typename tuple_element::value - 1, - _TupleUj>::type&& type; -}; - -template -struct __mu_return_impl<_Ti, true, false, false, _TupleUj> -{ - typedef typename _Ti::type& type; -}; - -template -struct __mu_return_impl<_Ti, false, false, false, _TupleUj> -{ - typedef _Ti& type; -}; - -template -struct __mu_return - : public __mu_return_impl<_Ti, - __is_reference_wrapper<_Ti>::value, - is_bind_expression<_Ti>::value, - 0 < is_placeholder<_Ti>::value && - is_placeholder<_Ti>::value <= tuple_size<_TupleUj>::value, - _TupleUj> -{ -}; - -template -struct __is_valid_bind_return -{ - static const bool value = false; -}; - -template -struct __is_valid_bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj> -{ - static const bool value = __invokable<_Fp, - typename __mu_return<_BoundArgs, _TupleUj>::type...>::value; -}; - -template -struct __is_valid_bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj> -{ - static const bool value = __invokable<_Fp, - typename __mu_return::type...>::value; -}; - -template ::value> -struct __bind_return; - -template -struct __bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj, true> -{ - typedef typename __invoke_of - < - _Fp&, - typename __mu_return - < - _BoundArgs, - _TupleUj - >::type... - >::type type; -}; - -template -struct __bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj, true> -{ - typedef typename __invoke_of - < - _Fp&, - typename __mu_return - < - const _BoundArgs, - _TupleUj - >::type... - >::type type; -}; - -template -inline _LIBCPP_INLINE_VISIBILITY -typename __bind_return<_Fp, _BoundArgs, _Args>::type -__apply_functor(_Fp& __f, _BoundArgs& __bound_args, __tuple_indices<_Indx...>, - _Args&& __args) -{ - return _VSTD::__invoke(__f, _VSTD::__mu(_VSTD::get<_Indx>(__bound_args), __args)...); -} - -template -class __bind -#if _LIBCPP_STD_VER <= 17 || !defined(_LIBCPP_ABI_NO_BINDER_BASES) - : public __weak_result_type::type> -#endif -{ -protected: - typedef typename decay<_Fp>::type _Fd; - typedef tuple::type...> _Td; -private: - _Fd __f_; - _Td __bound_args_; - - typedef typename __make_tuple_indices::type __indices; -public: - template ::value && - !is_same::type, - __bind>::value - >::type> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - explicit __bind(_Gp&& __f, _BA&& ...__bound_args) - : __f_(_VSTD::forward<_Gp>(__f)), - __bound_args_(_VSTD::forward<_BA>(__bound_args)...) {} - - template - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - typename __bind_return<_Fd, _Td, tuple<_Args&&...> >::type - operator()(_Args&& ...__args) - { - return _VSTD::__apply_functor(__f_, __bound_args_, __indices(), - tuple<_Args&&...>(_VSTD::forward<_Args>(__args)...)); - } - - template - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - typename __bind_return >::type - operator()(_Args&& ...__args) const - { - return _VSTD::__apply_functor(__f_, __bound_args_, __indices(), - tuple<_Args&&...>(_VSTD::forward<_Args>(__args)...)); - } -}; - -template -struct __is_bind_expression<__bind<_Fp, _BoundArgs...> > : public true_type {}; - -template -class __bind_r - : public __bind<_Fp, _BoundArgs...> -{ - typedef __bind<_Fp, _BoundArgs...> base; - typedef typename base::_Fd _Fd; - typedef typename base::_Td _Td; -public: - typedef _Rp result_type; - - - template ::value && - !is_same::type, - __bind_r>::value - >::type> - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - explicit __bind_r(_Gp&& __f, _BA&& ...__bound_args) - : base(_VSTD::forward<_Gp>(__f), - _VSTD::forward<_BA>(__bound_args)...) {} - - template - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - typename enable_if - < - is_convertible >::type, - result_type>::value || is_void<_Rp>::value, - result_type - >::type - operator()(_Args&& ...__args) - { - typedef __invoke_void_return_wrapper<_Rp> _Invoker; - return _Invoker::__call(static_cast(*this), _VSTD::forward<_Args>(__args)...); - } - - template - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - typename enable_if - < - is_convertible >::type, - result_type>::value || is_void<_Rp>::value, - result_type - >::type - operator()(_Args&& ...__args) const - { - typedef __invoke_void_return_wrapper<_Rp> _Invoker; - return _Invoker::__call(static_cast(*this), _VSTD::forward<_Args>(__args)...); - } -}; - -template -struct __is_bind_expression<__bind_r<_Rp, _Fp, _BoundArgs...> > : public true_type {}; - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -__bind<_Fp, _BoundArgs...> -bind(_Fp&& __f, _BoundArgs&&... __bound_args) -{ - typedef __bind<_Fp, _BoundArgs...> type; - return type(_VSTD::forward<_Fp>(__f), _VSTD::forward<_BoundArgs>(__bound_args)...); -} - -template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -__bind_r<_Rp, _Fp, _BoundArgs...> -bind(_Fp&& __f, _BoundArgs&&... __bound_args) -{ - typedef __bind_r<_Rp, _Fp, _BoundArgs...> type; - return type(_VSTD::forward<_Fp>(__f), _VSTD::forward<_BoundArgs>(__bound_args)...); -} - -#endif // _LIBCPP_CXX03_LANG - -#if _LIBCPP_STD_VER > 14 - -template::value>::type> -struct __perfect_forward_impl; - -template -struct __perfect_forward_impl<_Op, __tuple_types<_Bound...>, __tuple_indices<_Idxs...>> -{ - tuple<_Bound...> __bound_; - - template - _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) & - noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...))) - -> decltype( _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...)) - {return _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...);} - - template - _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) const& - noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...))) - -> decltype( _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...)) - {return _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...);} - - template - _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) && - noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., - _VSTD::forward<_Args>(__args)...))) - -> decltype( _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., - _VSTD::forward<_Args>(__args)...)) - {return _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., - _VSTD::forward<_Args>(__args)...);} - - template - _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) const&& - noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., - _VSTD::forward<_Args>(__args)...))) - -> decltype( _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., - _VSTD::forward<_Args>(__args)...)) - {return _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))..., - _VSTD::forward<_Args>(__args)...);} - - template>::type, - class = _EnableIf>> - constexpr __perfect_forward_impl(__perfect_forward_impl const& __other) - : __bound_(__other.__bound_) {} - - template>::type, - class = _EnableIf>> - constexpr __perfect_forward_impl(__perfect_forward_impl && __other) - : __bound_(_VSTD::move(__other.__bound_)) {} - - template - explicit constexpr __perfect_forward_impl(_BoundArgs&&... __bound) : - __bound_(_VSTD::forward<_BoundArgs>(__bound)...) { } -}; - -template -using __perfect_forward = - __perfect_forward_impl<_Op, __tuple_types...>>; - -struct __not_fn_op -{ - template - static _LIBCPP_CONSTEXPR_AFTER_CXX17 auto __call(_Args&&... __args) - noexcept(noexcept(!_VSTD::invoke(_VSTD::forward<_Args>(__args)...))) - -> decltype( !_VSTD::invoke(_VSTD::forward<_Args>(__args)...)) - { return !_VSTD::invoke(_VSTD::forward<_Args>(__args)...); } -}; - -template, _Fn> && - is_move_constructible_v<_Fn>>> -_LIBCPP_CONSTEXPR_AFTER_CXX17 auto not_fn(_Fn&& __f) -{ - return __perfect_forward<__not_fn_op, _Fn>(_VSTD::forward<_Fn>(__f)); -} - -#endif // _LIBCPP_STD_VER > 14 - -#if _LIBCPP_STD_VER > 17 - -struct __bind_front_op -{ - template - constexpr static auto __call(_Args&&... __args) - noexcept(noexcept(_VSTD::invoke(_VSTD::forward<_Args>(__args)...))) - -> decltype( _VSTD::invoke(_VSTD::forward<_Args>(__args)...)) - { return _VSTD::invoke(_VSTD::forward<_Args>(__args)...); } -}; - -template, _Fn>, - is_move_constructible>, - is_constructible, _Args>..., - is_move_constructible>... - >::value>> -constexpr auto bind_front(_Fn&& __f, _Args&&... __args) -{ - return __perfect_forward<__bind_front_op, _Fn, _Args...>(_VSTD::forward<_Fn>(__f), - _VSTD::forward<_Args>(__args)...); -} - -#endif // _LIBCPP_STD_VER > 17 - -// struct hash in - -#if _LIBCPP_STD_VER > 14 - -// default searcher -template> -class _LIBCPP_TEMPLATE_VIS default_searcher { -public: - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - default_searcher(_ForwardIterator __f, _ForwardIterator __l, - _BinaryPredicate __p = _BinaryPredicate()) - : __first_(__f), __last_(__l), __pred_(__p) {} - - template - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - pair<_ForwardIterator2, _ForwardIterator2> - operator () (_ForwardIterator2 __f, _ForwardIterator2 __l) const - { - return _VSTD::__search(__f, __l, __first_, __last_, __pred_, - typename iterator_traits<_ForwardIterator>::iterator_category(), - typename iterator_traits<_ForwardIterator2>::iterator_category()); - } - -private: - _ForwardIterator __first_; - _ForwardIterator __last_; - _BinaryPredicate __pred_; - }; - -#endif // _LIBCPP_STD_VER > 14 - -#if _LIBCPP_STD_VER > 17 -template -using unwrap_reference_t = typename unwrap_reference<_Tp>::type; - -template -using unwrap_ref_decay_t = typename unwrap_ref_decay<_Tp>::type; -#endif // > C++17 - -#if _LIBCPP_STD_VER > 17 -// [func.identity] -struct identity { - template - _LIBCPP_NODISCARD_EXT constexpr _Tp&& operator()(_Tp&& __t) const noexcept - { - return _VSTD::forward<_Tp>(__t); - } - - using is_transparent = void; -}; -#endif // _LIBCPP_STD_VER > 17 - -#if !defined(_LIBCPP_HAS_NO_RANGES) - -namespace ranges { - -struct equal_to { - template - requires equality_comparable_with<_Tp, _Up> - [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const - noexcept(noexcept(bool(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u)))) { - return _VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u); - } - - using is_transparent = void; -}; - -struct not_equal_to { - template - requires equality_comparable_with<_Tp, _Up> - [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const - noexcept(noexcept(bool(!(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u))))) { - return !(_VSTD::forward<_Tp>(__t) == _VSTD::forward<_Up>(__u)); - } - - using is_transparent = void; -}; - -struct greater { - template - requires totally_ordered_with<_Tp, _Up> - [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const - noexcept(noexcept(bool(_VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t)))) { - return _VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t); - } - - using is_transparent = void; -}; - -struct less { - template - requires totally_ordered_with<_Tp, _Up> - [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const - noexcept(noexcept(bool(_VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u)))) { - return _VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u); - } - - using is_transparent = void; -}; - -struct greater_equal { - template - requires totally_ordered_with<_Tp, _Up> - [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const - noexcept(noexcept(bool(!(_VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u))))) { - return !(_VSTD::forward<_Tp>(__t) < _VSTD::forward<_Up>(__u)); - } - - using is_transparent = void; -}; - -struct less_equal { - template - requires totally_ordered_with<_Tp, _Up> - [[nodiscard]] constexpr bool operator()(_Tp &&__t, _Up &&__u) const - noexcept(noexcept(bool(!(_VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t))))) { - return !(_VSTD::forward<_Up>(__u) < _VSTD::forward<_Tp>(__t)); - } - - using is_transparent = void; -}; - -} // namespace ranges - -#endif // !defined(_LIBCPP_HAS_NO_RANGES) - -_LIBCPP_END_NAMESPACE_STD - #endif // _LIBCPP_FUNCTIONAL diff --git a/libcxx/include/future b/libcxx/include/future index 5d2732b73cf61..349e6afc43e4a 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -364,6 +364,8 @@ template struct uses_allocator, Alloc>; #include <__availability> #include <__config> #include <__debug> +#include <__memory/allocator_arg_t.h> +#include <__memory/uses_allocator.h> #include <__utility/__decay_copy.h> #include <__utility/forward.h> #include diff --git a/libcxx/include/map b/libcxx/include/map index 0da590fd71148..513a04dd79230 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -491,6 +491,7 @@ erase_if(multimap& c, Predicate pred); // C++20 #include <__config> #include <__debug> +#include <__functional/is_transparent.h> #include <__node_handle> #include <__tree> #include <__utility/forward.h> diff --git a/libcxx/include/memory b/libcxx/include/memory index 62dd992b47278..4f73a81a2d86e 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -672,6 +672,7 @@ void* align(size_t alignment, size_t size, void*& ptr, size_t& space); #include <__memory/addressof.h> #include <__memory/allocation_guard.h> #include <__memory/allocator.h> +#include <__memory/allocator_arg_t.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> #include <__memory/construct_at.h> @@ -682,6 +683,7 @@ void* align(size_t alignment, size_t size, void*& ptr, size_t& space); #include <__memory/temporary_buffer.h> #include <__memory/uninitialized_algorithms.h> #include <__memory/unique_ptr.h> +#include <__memory/uses_allocator.h> #include #include #include diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 9c6c678affbb6..7549815d4d9e8 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -407,10 +407,32 @@ module std [system] { module functional { header "functional" export * + module __functional { - module hash { header "__functional/hash.h" } - module unary_function { header "__functional/unary_function.h" } - module unwrap_ref { header "__functional/unwrap_ref.h" } + module binary_function { header "__functional/binary_function.h" } + module binary_negate { header "__functional/binary_negate.h" } + module bind { header "__functional/bind.h" } + module bind_front { header "__functional/bind_front.h" } + module binder1st { header "__functional/binder1st.h" } + module binder2nd { header "__functional/binder2nd.h" } + module default_searcher { header "__functional/default_searcher.h" } + module function { header "__functional/function.h" } + module hash { header "__functional/hash.h" } + module identity { header "__functional/identity.h" } + module invoke { header "__functional/invoke.h" } + module mem_fn { header "__functional/mem_fn.h" } + module mem_fun_ref { header "__functional/mem_fun_ref.h" } + module not_fn { header "__functional/not_fn.h" } + module operations { header "__functional/operations.h" } + module perfect_forward { header "__functional/perfect_forward.h" } + module pointer_to_binary_function { header "__functional/pointer_to_binary_function.h" } + module pointer_to_unary_function { header "__functional/pointer_to_unary_function.h" } + module ranges_operations { header "__functional/ranges_operations.h" } + module reference_wrapper { header "__functional/reference_wrapper.h" } + module unary_function { header "__functional/unary_function.h" } + module unary_negate { header "__functional/unary_negate.h" } + module unwrap_ref { header "__functional/unwrap_ref.h" } + module weak_result_type { header "__functional/weak_result_type.h" } } } module future { @@ -507,6 +529,7 @@ module std [system] { module addressof { header "__memory/addressof.h" } module allocation_guard { header "__memory/allocation_guard.h" } module allocator_traits { header "__memory/allocator_traits.h" } + module allocator_arg_t { header "__memory/allocator_arg_t.h" } module allocator { header "__memory/allocator.h" } module auto_ptr { header "__memory/auto_ptr.h" } module compressed_pair { header "__memory/compressed_pair.h" } @@ -725,7 +748,6 @@ module std [system] { module __debug { header "__debug" export * } module __errc { header "__errc" export * } module __function_like { header "__function_like.h" export * } - module __functional_base { header "__functional_base" export * } module __hash_table { header "__hash_table" export * } module __locale { header "__locale" export * } module __mutex_base { header "__mutex_base" export * } diff --git a/libcxx/include/queue b/libcxx/include/queue index 3c7bbf2f6adb7..42470e3a10224 100644 --- a/libcxx/include/queue +++ b/libcxx/include/queue @@ -179,6 +179,7 @@ template */ #include <__config> +#include <__memory/uses_allocator.h> #include <__utility/forward.h> #include #include diff --git a/libcxx/include/set b/libcxx/include/set index 0da484b2248d1..21ec8435dd840 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -435,6 +435,7 @@ erase_if(multiset& c, Predicate pred); // C++20 #include <__config> #include <__debug> +#include <__functional/is_transparent.h> #include <__node_handle> #include <__tree> #include <__utility/forward.h> diff --git a/libcxx/include/stack b/libcxx/include/stack index 6dd055e86860f..aefef31ac97b4 100644 --- a/libcxx/include/stack +++ b/libcxx/include/stack @@ -88,6 +88,7 @@ template */ #include <__config> +#include <__memory/uses_allocator.h> #include <__utility/forward.h> #include diff --git a/libcxx/include/system_error b/libcxx/include/system_error index 564f37a96b4f0..aab97681156cf 100644 --- a/libcxx/include/system_error +++ b/libcxx/include/system_error @@ -144,7 +144,8 @@ template <> struct hash; #include <__config> #include <__errc> -#include <__functional_base> // unary_function +#include <__functional/unary_function.h> +#include <__functional_base> #include #include #include diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 42e05b988faa8..032ac861d22ff 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -150,11 +150,13 @@ template */ #include <__config> -#include <__functional_base> #include <__functional/unwrap_ref.h> +#include <__functional_base> +#include <__memory/allocator_arg_t.h> +#include <__memory/uses_allocator.h> +#include <__tuple> #include <__utility/forward.h> #include <__utility/move.h> -#include <__tuple> #include #include #include diff --git a/libcxx/include/typeindex b/libcxx/include/typeindex index 36d8bfd88ec3f..88bb9ef03d61e 100644 --- a/libcxx/include/typeindex +++ b/libcxx/include/typeindex @@ -45,6 +45,7 @@ struct hash */ #include <__config> +#include <__functional/unary_function.h> #include <__functional_base> #include #include diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index 6e58fa9bf4fbe..ea0382de7d401 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -433,6 +433,7 @@ template #include <__config> #include <__debug> +#include <__functional/is_transparent.h> #include <__hash_table> #include <__node_handle> #include <__utility/forward.h> diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 0e4901d6c8190..a775a9250268d 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -388,6 +388,7 @@ template #include <__config> #include <__debug> +#include <__functional/is_transparent.h> #include <__hash_table> #include <__node_handle> #include <__utility/forward.h> diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp index e6897be11867a..5efb568ce30b3 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp @@ -11,8 +11,9 @@ // result_of #include -#include #include +#include +#include #include "test_macros.h" struct S diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp index 844e11eef7bbf..2589f17b437bc 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp @@ -13,6 +13,7 @@ // result_of #include +#include #include #include #include "test_macros.h" diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp index 4951ae22d4069..e8c0fbf66937f 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/deduct.pass.cpp @@ -22,8 +22,9 @@ // possible that they provide explicit guides to make it work. #include -#include #include +#include +#include #include "test_macros.h" #include "archetypes.h" From a319eafd160d8d8f3ebd47a3a636e01597b36bff Mon Sep 17 00:00:00 2001 From: jasonliu Date: Thu, 1 Jul 2021 17:59:47 +0000 Subject: [PATCH 446/619] [libc++] Provide c++03 alternative for va_copy if available in xlocale.h Summary: If we are on c++03 mode for some reason, and __builtin_va_copy is available, then use it instead of error out on not having va_copy in 03 mode. Reviewed by: ldionne Differential Revision: https://reviews.llvm.org/D100336 --- libcxx/include/__support/ibm/xlocale.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libcxx/include/__support/ibm/xlocale.h b/libcxx/include/__support/ibm/xlocale.h index 77ac02a2241ac..58bdc67af9681 100644 --- a/libcxx/include/__support/ibm/xlocale.h +++ b/libcxx/include/__support/ibm/xlocale.h @@ -310,7 +310,12 @@ int vasprintf(char **strp, const char *fmt, va_list ap) { } va_list ap_copy; + // va_copy may not be provided by the C library in C++ 03 mode. +#if defined(_LIBCPP_CXX03_LANG) && __has_builtin(__builtin_va_copy) + __builtin_va_copy(ap_copy, ap); +#else va_copy(ap_copy, ap); +#endif int str_size = vsnprintf(*strp, buff_size, fmt, ap_copy); va_end(ap_copy); From c360553c15a8e5aa94d2236eb73e7dfeab9543e5 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 18 Jun 2021 13:33:14 -0400 Subject: [PATCH 447/619] [runtimes] Simplify how we specify XFAIL & friends based on the triple Now that Lit supports regular expressions inside XFAIL & friends, it is much easier to write Lit annotations based on the triple. Differential Revision: https://reviews.llvm.org/D104747 --- .../language.support/cxa_deleted_virtual.pass.cpp | 7 +------ .../aligned_alloc_availability.verify.cpp | 6 +----- .../support.dynamic/libcpp_deallocate.sh.cpp | 5 +---- .../new_faligned_allocation.pass.cpp | 5 +---- .../aligned_allocation_macro.compile.pass.cpp | 6 +----- .../libcxx/thread/atomic.availability.verify.cpp | 8 +------- .../libcxx/thread/barrier.availability.verify.cpp | 8 +------- .../libcxx/thread/latch.availability.verify.cpp | 8 +------- .../libcxx/thread/semaphore.availability.verify.cpp | 8 +------- ...0202_notify_from_pthread_created_thread.pass.cpp | 5 +---- .../thread.thread.this/sleep_for.pass.cpp | 4 +--- .../thread.thread.this/sleep_for.signals.pass.cpp | 4 +--- .../charconv.to.chars/availability.fail.cpp | 7 +------ .../atomic_wait.pass.cpp | 8 +------- .../syserr.errcat.objects/generic_category.pass.cpp | 5 +---- .../syserr.errcat.objects/system_category.pass.cpp | 5 +---- .../fstreams/filebuf.members/open_path.pass.cpp | 7 +------ .../fstreams/fstream.cons/path.pass.cpp | 7 +------ .../fstreams/fstream.members/open_path.pass.cpp | 7 +------ .../fstreams/ifstream.cons/path.pass.cpp | 7 +------ .../fstreams/ifstream.members/open_path.pass.cpp | 7 +------ .../fstreams/ofstream.cons/path.pass.cpp | 7 +------ .../fstreams/ofstream.members/open_path.pass.cpp | 7 +------ .../directory_entry.mods/refresh.pass.cpp | 2 +- .../directory_entry.obs/file_size.pass.cpp | 2 +- .../directory_entry.obs/hard_link_count.pass.cpp | 2 +- .../directory_entry.obs/last_write_time.pass.cpp | 2 +- .../fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp | 2 +- .../create_directories.pass.cpp | 2 +- .../create_directory.pass.cpp | 2 +- .../create_directory_with_attributes.pass.cpp | 2 +- .../fs.op.funcs/fs.op.file_size/file_size.pass.cpp | 2 +- .../fs.op.last_write_time/last_write_time.pass.cpp | 2 +- .../test/std/input.output/filesystems/lit.local.cfg | 2 +- .../istream.formatted.arithmetic/bool.pass.cpp | 7 +------ .../istream.formatted.arithmetic/double.pass.cpp | 7 +------ .../istream.formatted.arithmetic/float.pass.cpp | 7 +------ .../istream.formatted.arithmetic/int.pass.cpp | 7 +------ .../istream.formatted.arithmetic/long.pass.cpp | 7 +------ .../long_double.pass.cpp | 7 +------ .../istream.formatted.arithmetic/long_long.pass.cpp | 7 +------ .../istream.formatted.arithmetic/pointer.pass.cpp | 7 +------ .../istream.formatted.arithmetic/short.pass.cpp | 7 +------ .../unsigned_int.pass.cpp | 7 +------ .../unsigned_long.pass.cpp | 7 +------ .../unsigned_long_long.pass.cpp | 7 +------ .../unsigned_short.pass.cpp | 7 +------ .../istream_extractors/streambuf.pass.cpp | 7 +------ .../input.streams/istream.unformatted/get.pass.cpp | 7 +------ .../istream.unformatted/get_chart.pass.cpp | 7 +------ .../istream.unformatted/get_pointer_size.pass.cpp | 7 +------ .../get_pointer_size_chart.pass.cpp | 7 +------ .../istream.unformatted/get_streambuf.pass.cpp | 7 +------ .../get_streambuf_chart.pass.cpp | 7 +------ .../getline_pointer_size.pass.cpp | 7 +------ .../getline_pointer_size_chart.pass.cpp | 7 +------ .../istream.unformatted/ignore.pass.cpp | 7 +------ .../input.streams/istream.unformatted/peek.pass.cpp | 7 +------ .../input.streams/istream.unformatted/read.pass.cpp | 7 +------ .../istream.unformatted/seekg_off.pass.cpp | 4 +--- .../minmax_showbase.pass.cpp | 4 +--- .../ostream.inserters.arithmetic/minus1.pass.cpp | 2 +- .../ios.types/ios_Init/ios_Init.multiple.pass.cpp | 7 +------ .../delete_align_val_t_replace.pass.cpp | 6 +----- .../new.delete.array/new_align_val_t.pass.cpp | 6 +----- .../new_align_val_t_nothrow.pass.cpp | 6 +----- .../new_align_val_t_nothrow_replace.pass.cpp | 6 +----- .../sized_delete_array_fsizeddeallocation.pass.cpp | 4 +--- .../delete_align_val_t_replace.pass.cpp | 6 +----- .../new.delete.single/new_align_val_t.pass.cpp | 6 +----- .../new_align_val_t_nothrow.pass.cpp | 6 +----- .../new_align_val_t_nothrow_replace.pass.cpp | 6 +----- .../sized_delete_fsizeddeallocation.pass.cpp | 4 +--- .../uncaught/uncaught_exceptions.pass.cpp | 7 ++----- .../locale.collate.byname/compare.pass.cpp | 5 ++--- .../ctor_char16_t_char8_t.pass.cpp | 8 +------- .../ctor_char32_t_char8_t.pass.cpp | 8 +------- .../locale.codecvt/ctor_char16_t_char8_t.pass.cpp | 8 +------- .../locale.codecvt/ctor_char32_t_char8_t.pass.cpp | 8 +------- .../char16_t_char8_t_always_noconv.pass.cpp | 8 +------- .../char16_t_char8_t_encoding.pass.cpp | 8 +------- .../char16_t_char8_t_in.pass.cpp | 8 +------- .../char16_t_char8_t_length.pass.cpp | 8 +------- .../char16_t_char8_t_max_length.pass.cpp | 8 +------- .../char16_t_char8_t_out.pass.cpp | 8 +------- .../char16_t_char8_t_unshift.pass.cpp | 8 +------- .../char32_t_char8_t_always_noconv.pass.cpp | 8 +------- .../char32_t_char8_t_encoding.pass.cpp | 8 +------- .../char32_t_char8_t_in.pass.cpp | 8 +------- .../char32_t_char8_t_length.pass.cpp | 8 +------- .../char32_t_char8_t_max_length.pass.cpp | 8 +------- .../char32_t_char8_t_out.pass.cpp | 8 +------- .../char32_t_char8_t_unshift.pass.cpp | 8 +------- .../utf_sanity_check.pass.cpp | 8 +------- .../locale.codecvt/types_char16_t_char8_t.pass.cpp | 8 +------- .../locale.codecvt/types_char32_t_char8_t.pass.cpp | 8 +------- .../get_long_double_zh_CN.pass.cpp | 2 +- .../put_long_double_zh_CN.pass.cpp | 2 +- .../facet.num.put.members/put_long_double.pass.cpp | 2 +- .../facet.num.get.members/get_long.pass.cpp | 7 +------ .../locale.time.get.byname/get_one.pass.cpp | 2 +- .../locale.time.get.byname/get_one_wide.pass.cpp | 2 +- .../locale.time.get.byname/get_weekday.pass.cpp | 2 +- .../get_weekday_wide.pass.cpp | 2 +- .../locale.time.put.byname/put1.pass.cpp | 2 +- .../locales/locale/locale.cons/assign.pass.cpp | 8 +------- .../locale/locale.cons/char_pointer.pass.cpp | 8 +------- .../locales/locale/locale.cons/copy.pass.cpp | 8 +------- .../locales/locale/locale.cons/default.pass.cpp | 8 +------- .../locale.cons/locale_char_pointer_cat.pass.cpp | 8 +------- .../locale/locale.cons/locale_facetptr.pass.cpp | 8 +------- .../locale/locale.cons/locale_locale_cat.pass.cpp | 8 +------- .../locale/locale.cons/locale_string_cat.pass.cpp | 8 +------- .../locales/locale/locale.cons/string.pass.cpp | 8 +------- .../locales/locale/locale.members/combine.pass.cpp | 8 +------- .../locales/locale/locale.statics/classic.pass.cpp | 8 +------- .../locales/locale/locale.statics/global.pass.cpp | 8 +------- .../std/numerics/rand/rand.device/ctor.pass.cpp | 4 +--- .../std/numerics/rand/rand.device/eval.pass.cpp | 4 +--- .../std/re/re.alg/re.alg.match/awk.locale.pass.cpp | 2 +- .../re/re.alg/re.alg.match/basic.locale.pass.cpp | 2 +- .../std/re/re.alg/re.alg.match/ecma.locale.pass.cpp | 2 +- .../re/re.alg/re.alg.match/extended.locale.pass.cpp | 2 +- .../std/re/re.alg/re.alg.search/awk.locale.pass.cpp | 2 +- .../re/re.alg/re.alg.search/basic.locale.pass.cpp | 2 +- .../re/re.alg/re.alg.search/ecma.locale.pass.cpp | 2 +- .../re.alg/re.alg.search/extended.locale.pass.cpp | 2 +- .../std/re/re.traits/lookup_collatename.pass.cpp | 2 +- .../string.capacity/over_max_size.pass.cpp | 4 +--- .../string.capacity/reserve_size.pass.cpp | 8 +------- .../futures/futures.async/async_race.38682.pass.cpp | 7 +------ .../futures/futures.future_error/what.pass.cpp | 4 +--- .../test/std/thread/thread.barrier/arrive.pass.cpp | 8 +------- .../thread/thread.barrier/arrive_and_drop.pass.cpp | 8 +------- .../thread/thread.barrier/arrive_and_wait.pass.cpp | 8 +------- .../std/thread/thread.barrier/completion.pass.cpp | 8 +------- .../thread/thread.latch/arrive_and_wait.pass.cpp | 8 +------- .../std/thread/thread.latch/count_down.pass.cpp | 8 +------- .../test/std/thread/thread.latch/try_wait.pass.cpp | 8 +------- .../thread.lock.shared.cons/default.pass.cpp | 4 +--- .../thread.lock.shared.cons/move_assign.pass.cpp | 4 +--- .../thread.lock.shared.cons/move_ctor.pass.cpp | 4 +--- .../thread.lock.shared.cons/mutex.pass.cpp | 4 +--- .../mutex_adopt_lock.pass.cpp | 4 +--- .../mutex_defer_lock.pass.cpp | 4 +--- .../thread.lock.shared.cons/mutex_duration.pass.cpp | 4 +--- .../mutex_time_point.pass.cpp | 4 +--- .../mutex_try_to_lock.pass.cpp | 4 +--- .../thread.lock.shared.locking/lock.pass.cpp | 4 +--- .../thread.lock.shared.obs/mutex.pass.cpp | 4 +--- .../thread.lock.shared.obs/owns_lock.pass.cpp | 4 +--- .../thread.shared_mutex.class/assign.fail.cpp | 4 +--- .../thread.shared_mutex.class/copy.fail.cpp | 4 +--- .../thread.shared_mutex.class/default.pass.cpp | 4 +--- .../thread.shared_mutex.class/lock.pass.cpp | 4 +--- .../thread.shared_mutex.class/lock_shared.pass.cpp | 4 +--- .../thread.shared_mutex.class/try_lock.pass.cpp | 4 +--- .../try_lock_shared.pass.cpp | 4 +--- .../assign.compile.fail.cpp | 4 +--- .../copy.compile.fail.cpp | 4 +--- .../thread.sharedtimedmutex.class/default.pass.cpp | 4 +--- .../thread.sharedtimedmutex.class/lock.pass.cpp | 4 +--- .../lock_shared.pass.cpp | 4 +--- .../thread.sharedtimedmutex.class/try_lock.pass.cpp | 4 +--- .../try_lock_for.pass.cpp | 4 +--- .../try_lock_shared.pass.cpp | 4 +--- .../try_lock_shared_for.pass.cpp | 4 +--- .../try_lock_shared_until.pass.cpp | 4 +--- .../try_lock_until.pass.cpp | 4 +--- .../try_lock_until_deadlock_bug.pass.cpp | 4 +--- .../std/thread/thread.semaphore/acquire.pass.cpp | 8 +------- .../std/thread/thread.semaphore/binary.pass.cpp | 8 +------- .../std/thread/thread.semaphore/release.pass.cpp | 8 +------- .../test/std/thread/thread.semaphore/timed.pass.cpp | 8 +------- .../thread/thread.semaphore/try_acquire.pass.cpp | 8 +------- .../any/any.class/any.assign/copy.pass.cpp | 5 +---- .../any/any.class/any.assign/move.pass.cpp | 5 +---- .../any/any.class/any.assign/value.pass.cpp | 5 +---- .../utilities/any/any.class/any.cons/copy.pass.cpp | 5 +---- .../any/any.class/any.cons/in_place_type.pass.cpp | 5 +---- .../utilities/any/any.class/any.cons/move.pass.cpp | 5 +---- .../utilities/any/any.class/any.cons/value.pass.cpp | 5 +---- .../any/any.class/any.modifiers/emplace.pass.cpp | 5 +---- .../any/any.class/any.modifiers/reset.pass.cpp | 5 +---- .../any/any.class/any.modifiers/swap.pass.cpp | 5 +---- .../any.cast/any_cast_pointer.pass.cpp | 5 +---- .../any.cast/any_cast_reference.pass.cpp | 5 +---- ...any_cast_request_invalid_value_category.fail.cpp | 5 +---- .../any.cast/const_correctness.fail.cpp | 5 +---- .../any.cast/not_copy_constructible.fail.cpp | 5 +---- .../utilities/any/any.nonmembers/make_any.pass.cpp | 5 +---- .../std/utilities/any/any.nonmembers/swap.pass.cpp | 5 +---- .../charconv.from.chars/integral.roundtrip.pass.cpp | 7 +------ .../charconv/charconv.to.chars/integral.pass.cpp | 7 +------ .../format/format.error/format.error.pass.cpp | 8 +------- .../format.parse.ctx/check_arg_id.pass.cpp | 8 +------- .../format.parse.ctx/next_arg_id.pass.cpp | 8 +------- .../memory/temporary.buffer/overaligned.pass.cpp | 5 +---- .../optional.bad_optional_access/default.pass.cpp | 5 +---- .../optional.bad_optional_access/derive.pass.cpp | 5 +---- .../optional.object/optional.object.ctor/U.pass.cpp | 5 +---- .../optional.object.ctor/const_T.pass.cpp | 5 +---- .../optional.object.ctor/move.pass.cpp | 5 +---- .../optional.object.ctor/rvalue_T.pass.cpp | 5 +---- .../optional.object.observe/value.pass.cpp | 5 +---- .../optional.object.observe/value_const.pass.cpp | 5 +---- .../value_const_rvalue.pass.cpp | 5 +---- .../optional.object.observe/value_rvalue.pass.cpp | 5 +---- .../optional.specalg/make_optional.pass.cpp | 5 +---- .../time/time.clock/time.clock.file/now.pass.cpp | 7 +------ .../bad_variant_access.pass.cpp | 5 +---- .../variant/variant.get/get_index.pass.cpp | 5 +---- .../utilities/variant/variant.get/get_type.pass.cpp | 5 +---- .../variant.variant/variant.assign/T.pass.cpp | 5 +---- .../variant.variant/variant.assign/copy.pass.cpp | 5 +---- .../variant.variant/variant.assign/move.pass.cpp | 5 +---- .../variant/variant.variant/variant.ctor/T.pass.cpp | 5 +---- .../variant.variant/variant.ctor/copy.pass.cpp | 5 +---- .../variant.variant/variant.ctor/default.pass.cpp | 5 +---- .../variant.ctor/in_place_index_args.pass.cpp | 5 +---- .../in_place_index_init_list_args.pass.cpp | 5 +---- .../variant.ctor/in_place_type_args.pass.cpp | 5 +---- .../in_place_type_init_list_args.pass.cpp | 5 +---- .../variant.variant/variant.ctor/move.pass.cpp | 5 +---- .../variant.mod/emplace_index_args.pass.cpp | 5 +---- .../emplace_index_init_list_args.pass.cpp | 5 +---- .../variant.mod/emplace_type_args.pass.cpp | 5 +---- .../emplace_type_init_list_args.pass.cpp | 5 +---- .../variant.variant/variant.swap/swap.pass.cpp | 5 +---- .../variant.visit/robust_against_adl.pass.cpp | 5 +---- .../utilities/variant/variant.visit/visit.pass.cpp | 5 +---- .../variant.visit/visit_return_type.pass.cpp | 5 +---- libcxx/utils/libcxx/test/params.py | 9 +-------- libcxxabi/test/catch_function_01.pass.cpp | 3 +-- .../test/catch_member_data_pointer_01.pass.cpp | 3 +-- .../test/catch_member_pointer_nullptr.pass.cpp | 5 +---- libcxxabi/test/catch_multi_level_pointer.pass.cpp | 3 +-- libcxxabi/test/catch_pointer_nullptr.pass.cpp | 5 +---- libcxxabi/test/catch_ptr_02.pass.cpp | 2 +- .../test/cxa_vec_new_overflow_PR41395.pass.cpp | 7 +------ libcxxabi/test/dynamic_cast.pass.cpp | 8 ++------ libcxxabi/test/exception_object_alignment.pass.cpp | 6 +----- libcxxabi/test/forced_unwind1.pass.cpp | 8 +------- libcxxabi/test/forced_unwind2.pass.cpp | 8 +------- libcxxabi/test/incomplete_type.sh.cpp | 5 +---- .../test/test_aux_runtime_op_array_new.pass.cpp | 7 +------ libcxxabi/test/test_demangle.pass.cpp | 8 +------- .../test/test_exception_address_alignment.pass.cpp | 13 +++++-------- libcxxabi/test/uncaught_exceptions.pass.cpp | 7 +------ libunwind/test/remember_state_leak.pass.sh.s | 4 ++-- libunwind/test/signal_unwind.pass.cpp | 2 +- libunwind/test/unwind_leaffunction.pass.cpp | 2 +- 252 files changed, 260 insertions(+), 1141 deletions(-) diff --git a/libcxx/test/libcxx/language.support/cxa_deleted_virtual.pass.cpp b/libcxx/test/libcxx/language.support/cxa_deleted_virtual.pass.cpp index 68c610af526d7..2f63ee3ceb7c2 100644 --- a/libcxx/test/libcxx/language.support/cxa_deleted_virtual.pass.cpp +++ b/libcxx/test/libcxx/language.support/cxa_deleted_virtual.pass.cpp @@ -11,12 +11,7 @@ // Test exporting the symbol: "__cxa_deleted_virtual" in macosx // But don't expect the symbol to be exported in previous versions. // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} struct S { virtual void f() = delete; virtual ~S() {} }; int main(int, char**) { diff --git a/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp b/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp index 3f4f3a1ce14c6..8d5a73060e673 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/aligned_alloc_availability.verify.cpp @@ -16,11 +16,7 @@ // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// REQUIRES: use_system_cxx_lib && (x86_64-apple-macosx10.13 || \ -// REQUIRES: x86_64-apple-macosx10.12 || \ -// REQUIRES: x86_64-apple-macosx10.11 || \ -// REQUIRES: x86_64-apple-macosx10.10 || \ -// REQUIRES: x86_64-apple-macosx10.9) +// REQUIRES: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} #include #include diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp index 34905bb2e2885..66627d9fa6767 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp @@ -16,10 +16,7 @@ // The dylibs shipped before macosx10.13 do not contain the aligned allocation // functions, so trying to force using those with -faligned-allocation results // in a link error. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // AppleClang < 10 incorrectly warns that aligned allocation is not supported // even when it is supported. diff --git a/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp b/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp index d423fc4d86c77..5060f062c34ca 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp @@ -16,10 +16,7 @@ // The dylibs shipped before macosx10.13 do not contain the aligned allocation // functions, so trying to force using those with -faligned-allocation results // in a link error. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // REQUIRES: -faligned-allocation // ADDITIONAL_COMPILE_FLAGS: -faligned-allocation diff --git a/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp b/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp index fbf8f41314c8b..2704c9584d435 100644 --- a/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp +++ b/libcxx/test/libcxx/memory/aligned_allocation_macro.compile.pass.cpp @@ -15,11 +15,7 @@ // GCC 5 doesn't support aligned allocation // UNSUPPORTED: gcc-5 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} #include diff --git a/libcxx/test/libcxx/thread/atomic.availability.verify.cpp b/libcxx/test/libcxx/thread/atomic.availability.verify.cpp index 8321193da8838..e96d461a45720 100644 --- a/libcxx/test/libcxx/thread/atomic.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/atomic.availability.verify.cpp @@ -7,13 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// REQUIRES: use_system_cxx_lib && (x86_64-apple-macosx10.9 || \ -// REQUIRES: x86_64-apple-macosx10.10 || \ -// REQUIRES: x86_64-apple-macosx10.11 || \ -// REQUIRES: x86_64-apple-macosx10.12 || \ -// REQUIRES: x86_64-apple-macosx10.13 || \ -// REQUIRES: x86_64-apple-macosx10.14 || \ -// REQUIRES: x86_64-apple-macosx10.15) +// REQUIRES: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // Test the availability markup on the C++20 Synchronization Library diff --git a/libcxx/test/libcxx/thread/barrier.availability.verify.cpp b/libcxx/test/libcxx/thread/barrier.availability.verify.cpp index eda89bb058649..8da39fe3c2dfd 100644 --- a/libcxx/test/libcxx/thread/barrier.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/barrier.availability.verify.cpp @@ -7,13 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// REQUIRES: use_system_cxx_lib && (x86_64-apple-macosx10.9 || \ -// REQUIRES: x86_64-apple-macosx10.10 || \ -// REQUIRES: x86_64-apple-macosx10.11 || \ -// REQUIRES: x86_64-apple-macosx10.12 || \ -// REQUIRES: x86_64-apple-macosx10.13 || \ -// REQUIRES: x86_64-apple-macosx10.14 || \ -// REQUIRES: x86_64-apple-macosx10.15) +// REQUIRES: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // Test the availability markup on std::barrier. diff --git a/libcxx/test/libcxx/thread/latch.availability.verify.cpp b/libcxx/test/libcxx/thread/latch.availability.verify.cpp index 402c73181ce3e..8555e0e70267a 100644 --- a/libcxx/test/libcxx/thread/latch.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/latch.availability.verify.cpp @@ -7,13 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// REQUIRES: use_system_cxx_lib && (x86_64-apple-macosx10.9 || \ -// REQUIRES: x86_64-apple-macosx10.10 || \ -// REQUIRES: x86_64-apple-macosx10.11 || \ -// REQUIRES: x86_64-apple-macosx10.12 || \ -// REQUIRES: x86_64-apple-macosx10.13 || \ -// REQUIRES: x86_64-apple-macosx10.14 || \ -// REQUIRES: x86_64-apple-macosx10.15) +// REQUIRES: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // Test the availability markup on std::latch. diff --git a/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp b/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp index 7b7688bd6a061..a6d5c36678284 100644 --- a/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp +++ b/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp @@ -7,13 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 -// REQUIRES: use_system_cxx_lib && (x86_64-apple-macosx10.9 || \ -// REQUIRES: x86_64-apple-macosx10.10 || \ -// REQUIRES: x86_64-apple-macosx10.11 || \ -// REQUIRES: x86_64-apple-macosx10.12 || \ -// REQUIRES: x86_64-apple-macosx10.13 || \ -// REQUIRES: x86_64-apple-macosx10.14 || \ -// REQUIRES: x86_64-apple-macosx10.15) +// REQUIRES: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // Test the availability markup on std::counting_semaphore and std::binary_semaphore. diff --git a/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp index 6c084e0c5cb5d..30c0391368814 100644 --- a/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp @@ -14,10 +14,7 @@ // UNSUPPORTED: c++03 // PR30202 was fixed starting in macosx10.13. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp index dafa16667871b..474520c339296 100644 --- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp @@ -11,9 +11,7 @@ // Until 58a0a70fb2f1, this_thread::sleep_for could sometimes get interrupted // by signals and this test would fail spuriously. Disable the test on the // corresponding system libraries. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp index 59e3b28fcfdc7..3cdb6735218b8 100644 --- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.signals.pass.cpp @@ -14,9 +14,7 @@ // Until 58a0a70fb2f1, this_thread::sleep_for misbehaves when interrupted by // a signal, as tested here. Disable the test on the corresponding system // libraries. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/libcxx/utilities/charconv/charconv.to.chars/availability.fail.cpp b/libcxx/test/libcxx/utilities/charconv/charconv.to.chars/availability.fail.cpp index 8c4b1e9b5ac2e..e1ca28a20b67c 100644 --- a/libcxx/test/libcxx/utilities/charconv/charconv.to.chars/availability.fail.cpp +++ b/libcxx/test/libcxx/utilities/charconv/charconv.to.chars/availability.fail.cpp @@ -7,12 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 -// REQUIRES: use_system_cxx_lib && (x86_64-apple-macosx10.9 || \ -// REQUIRES: x86_64-apple-macosx10.10 || \ -// REQUIRES: x86_64-apple-macosx10.11 || \ -// REQUIRES: x86_64-apple-macosx10.12 || \ -// REQUIRES: x86_64-apple-macosx10.13 || \ -// REQUIRES: x86_64-apple-macosx10.14) +// REQUIRES: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // Test the availability markup on std::to_chars. diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp index a54988dd73312..d2779aaa19f56 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp @@ -12,13 +12,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp index cd8d10b40b91f..d90834681bb84 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // XFAIL: suse-linux-enterprise-server-11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp index 1e25e90547ac7..0e82934e870b0 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp @@ -13,10 +13,7 @@ // const error_category& system_category(); // XFAIL: suse-linux-enterprise-server-11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} #include #include diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp index da5e01e6bab8a..cf852964b9114 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp @@ -10,12 +10,7 @@ // UNSUPPORTED: libcpp-has-no-filesystem-library // Filesystem is supported on Apple platforms starting with macosx10.15. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp index 9bb5665aef48e..364fe72817aeb 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp @@ -10,12 +10,7 @@ // UNSUPPORTED: libcpp-has-no-filesystem-library // Filesystem is supported on Apple platforms starting with macosx10.15. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp index 0ca2f06d553bf..824201a7cf074 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp @@ -10,12 +10,7 @@ // UNSUPPORTED: libcpp-has-no-filesystem-library // Filesystem is supported on Apple platforms starting with macosx10.15. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp index 2309caf9e8c47..aef3014f670a4 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp @@ -10,12 +10,7 @@ // UNSUPPORTED: libcpp-has-no-filesystem-library // Filesystem is supported on Apple platforms starting with macosx10.15. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // FILE_DEPENDENCIES: test.dat diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp index 24922d6fe3e62..2bb46b876b14e 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp @@ -10,12 +10,7 @@ // UNSUPPORTED: libcpp-has-no-filesystem-library // Filesystem is supported on Apple platforms starting with macosx10.15. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // FILE_DEPENDENCIES: test.dat diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp index 8e3261450bde4..5dc1f74bdaf9d 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp @@ -10,12 +10,7 @@ // UNSUPPORTED: libcpp-has-no-filesystem-library // Filesystem is supported on Apple platforms starting with macosx10.15. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp index 495f8134b5df9..2c8fa8ad846cf 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp @@ -10,12 +10,7 @@ // UNSUPPORTED: libcpp-has-no-filesystem-library // Filesystem is supported on Apple platforms starting with macosx10.15. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp index f32608687d113..da436fc6b959c 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp @@ -10,7 +10,7 @@ // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp index 0dba01fe1a9c0..271a6e826f2b7 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp @@ -10,7 +10,7 @@ // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp index 8875c39ceef3b..44eac78fe8f46 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp @@ -10,7 +10,7 @@ // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp index cb8fb3f6a7688..928248b3c2b87 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp @@ -10,7 +10,7 @@ // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp index 4e40906a8d848..88e272433fb2d 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp @@ -10,7 +10,7 @@ // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp index 5174b7d454e06..d6b18e2e043be 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // This test requires the dylib support introduced in D92769. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp index cad76aa751993..51c9180f81604 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // This test requires the dylib support introduced in D92769. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp index e5f610c80a280..4d5cdf31e5b59 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp @@ -9,7 +9,7 @@ // UNSUPPORTED: c++03 // This test requires the dylib support introduced in D92769. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp index 51f2fa841e53a..413ba881b59f1 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp @@ -10,7 +10,7 @@ // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp index 930dae563b1f5..f3a4bb5013f45 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp @@ -10,7 +10,7 @@ // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 // diff --git a/libcxx/test/std/input.output/filesystems/lit.local.cfg b/libcxx/test/std/input.output/filesystems/lit.local.cfg index fbd625d21ff52..95e9ec4ccd979 100644 --- a/libcxx/test/std/input.output/filesystems/lit.local.cfg +++ b/libcxx/test/std/input.output/filesystems/lit.local.cfg @@ -3,7 +3,7 @@ # suite against an older macOS. too_old = {'10.9', '10.10', '10.11', '10.12', '10.13', '10.14'} if 'use_system_cxx_lib' in config.available_features: - if any('x86_64-apple-macosx{}'.format(v) in config.available_features for v in too_old): + if any('target=x86_64-apple-macosx{}'.format(v) in config.available_features for v in too_old): config.unsupported = True if 'libcpp-has-no-filesystem-library' in config.available_features: diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/bool.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/bool.pass.cpp index e152f4ab4d8c0..ea354a487e7b8 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/bool.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/bool.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/double.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/double.pass.cpp index eaad095cae568..a3f977bcd1e8b 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/double.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/double.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/float.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/float.pass.cpp index 2a303c691a342..2d68dfddd076e 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/float.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/float.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/int.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/int.pass.cpp index 5250b30c103e8..563b8d2dff7e6 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/int.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/int.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long.pass.cpp index 8cb545cb38742..5d00260a64703 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long_double.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long_double.pass.cpp index dfb4afc966dbb..ed198d54a5463 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long_double.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long_double.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long_long.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long_long.pass.cpp index 06130c1d9a1b9..d6ccc11ee9927 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long_long.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/long_long.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/pointer.pass.cpp index db7fe577a233c..9b9fab8b9e790 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/pointer.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/pointer.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/short.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/short.pass.cpp index 8a6d94417a989..dbecc33c5308c 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/short.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/short.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_int.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_int.pass.cpp index a5d179d936c9f..d0ad52174524a 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_int.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_int.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_long.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_long.pass.cpp index bad99e42059f3..984c18890e9ce 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_long.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_long.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_long_long.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_long_long.pass.cpp index d87088bc546be..a67ae60a8322b 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_long_long.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_long_long.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_short.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_short.pass.cpp index 00dc9d0e3baae..7157c885cc368 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_short.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream.formatted.arithmetic/unsigned_short.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/streambuf.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/streambuf.pass.cpp index 580f9f7bf0d4b..d71f29595e251 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/streambuf.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/streambuf.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get.pass.cpp index 9d5710bbbef9b..91ae1045b3b7c 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_chart.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_chart.pass.cpp index 6a561c1c03ec4..fb3c2d07d36e8 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_chart.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_chart.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_pointer_size.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_pointer_size.pass.cpp index 1d3957a6d93a5..b3af36e32b398 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_pointer_size.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_pointer_size.pass.cpp @@ -8,12 +8,7 @@ // In macosx10.9 to macosx10.14, streams are provided in the dylib AND they // have a bug in how they handle null-termination in case of errors (see D40677). -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_pointer_size_chart.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_pointer_size_chart.pass.cpp index cb822177bc3bf..11b820ef4d617 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_pointer_size_chart.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_pointer_size_chart.pass.cpp @@ -8,12 +8,7 @@ // In macosx10.9 to macosx10.14, streams are provided in the dylib AND they // have a bug in how they handle null-termination in case of errors (see D40677). -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_streambuf.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_streambuf.pass.cpp index e932ca4071494..8067792e09de2 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_streambuf.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_streambuf.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_streambuf_chart.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_streambuf_chart.pass.cpp index e30ea1225eed8..dceb06ada7945 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_streambuf_chart.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/get_streambuf_chart.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/getline_pointer_size.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/getline_pointer_size.pass.cpp index 50a1184f74910..44fa59501a511 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/getline_pointer_size.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/getline_pointer_size.pass.cpp @@ -8,12 +8,7 @@ // In macosx10.9 to macosx10.14, streams are provided in the dylib AND they // have a bug in how they handle null-termination in case of errors (see D40677). -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/getline_pointer_size_chart.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/getline_pointer_size_chart.pass.cpp index edebd020a2782..72bc2a4c00ccd 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/getline_pointer_size_chart.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/getline_pointer_size_chart.pass.cpp @@ -8,12 +8,7 @@ // In macosx10.9 to macosx10.14, streams are provided in the dylib AND they // have a bug in how they handle null-termination in case of errors (see D40677). -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/ignore.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/ignore.pass.cpp index 4555c6c1ea910..0d0b4a8f40c45 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/ignore.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/ignore.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/peek.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/peek.pass.cpp index acde79b71f50d..be9600cf15614 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/peek.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/peek.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/read.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/read.pass.cpp index 3ef761c00980d..2e6c2a2082164 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/read.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/read.pass.cpp @@ -6,12 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg_off.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg_off.pass.cpp index ebd3a25486b10..d9412bf920ba0 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg_off.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg_off.pass.cpp @@ -6,9 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minmax_showbase.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minmax_showbase.pass.cpp index 36ca0155c5a7b..126151221aa4b 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minmax_showbase.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minmax_showbase.pass.cpp @@ -25,9 +25,7 @@ // This test exposes a regression that was not fixed yet in the libc++ // shipped with macOS 10.12, 10.13 and 10.14. See D32670 for details. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{12|13|14}} #include #include diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp index 4b9cbd479c6cb..8e3b77c420bea 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.12 // diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_Init/ios_Init.multiple.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_Init/ios_Init.multiple.pass.cpp index 22f2c74898ddb..1a9aaa96162aa 100644 --- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_Init/ios_Init.multiple.pass.cpp +++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_Init/ios_Init.multiple.pass.cpp @@ -16,12 +16,7 @@ // The dylibs shipped on macOS so far do not contain the fix for PR43300, so // this test fails. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} int main(int, char**) { diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp index 968fe205b3969..b6c6d40342717 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/delete_align_val_t_replace.pass.cpp @@ -18,11 +18,7 @@ // However, support for that was broken prior to Clang 8 and AppleClang 11. // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // On Windows libc++ doesn't provide its own definitions for new/delete // but instead depends on the ones in VCRuntime. However VCRuntime does not diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp index ec53942de44a2..08350e59cda67 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t.pass.cpp @@ -16,11 +16,7 @@ // However, support for that was broken prior to Clang 8 and AppleClang 11. // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // test operator new diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp index c085ceed5c353..159d39af12b3c 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow.pass.cpp @@ -16,11 +16,7 @@ // However, support for that was broken prior to Clang 8 and AppleClang 11. // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // test operator new (nothrow) diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp index b631f8a8ce204..b407207cc3263 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp @@ -14,11 +14,7 @@ // However, support for that was broken prior to Clang 8 and AppleClang 11. // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // test operator new nothrow by replacing only operator new diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array_fsizeddeallocation.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array_fsizeddeallocation.pass.cpp index b40e5ff1408b1..2a5bb21fcdd16 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array_fsizeddeallocation.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/sized_delete_array_fsizeddeallocation.pass.cpp @@ -12,9 +12,7 @@ // when sized deallocation is not supported, e.g., prior to C++14. // UNSUPPORTED: sanitizer-new-delete -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // REQUIRES: -fsized-deallocation // ADDITIONAL_COMPILE_FLAGS: -fsized-deallocation diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp index 4189ad9658b4c..336a163784bdb 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/delete_align_val_t_replace.pass.cpp @@ -18,11 +18,7 @@ // However, support for that was broken prior to Clang 8 and AppleClang 11. // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // On Windows libc++ doesn't provide its own definitions for new/delete // but instead depends on the ones in VCRuntime. However VCRuntime does not diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp index 7030e08a404d1..d2fc41751ae74 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t.pass.cpp @@ -13,11 +13,7 @@ // However, support for that was broken prior to Clang 8 and AppleClang 11. // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // asan and msan will not call the new handler. // UNSUPPORTED: sanitizer-new-delete diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp index c3d1d6b199444..e65280bb25a11 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow.pass.cpp @@ -13,11 +13,7 @@ // However, support for that was broken prior to Clang 8 and AppleClang 11. // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // asan and msan will not call the new handler. // UNSUPPORTED: sanitizer-new-delete diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp index 843a3f83f3c31..657f8e0fd38af 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp @@ -14,11 +14,7 @@ // However, support for that was broken prior to Clang 8 and AppleClang 11. // UNSUPPORTED: apple-clang-9, apple-clang-10 // UNSUPPORTED: clang-5, clang-6, clang-7 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // test operator new nothrow by replacing only operator new diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete_fsizeddeallocation.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete_fsizeddeallocation.pass.cpp index 25af691897a63..da296f1a3f933 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete_fsizeddeallocation.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/sized_delete_fsizeddeallocation.pass.cpp @@ -12,9 +12,7 @@ // when sized deallocation is not supported, e.g., prior to C++14. // UNSUPPORTED: sanitizer-new-delete -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // NOTE: Only clang-3.7 and GCC 5.1 and greater support -fsized-deallocation. // REQUIRES: -fsized-deallocation diff --git a/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exceptions.pass.cpp b/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exceptions.pass.cpp index 4f6c1ba15c373..658c0a822ed9c 100644 --- a/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exceptions.pass.cpp +++ b/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exceptions.pass.cpp @@ -9,14 +9,11 @@ // UNSUPPORTED: no-exceptions // std::uncaught_exceptions() was introduced in the dylib on Mac OS 10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // However, std::uncaught_exceptions() gives the wrong answer in Mac OS 10.12 // and 10.13, where it only gives 0 or 1. This was fixed later. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{12|13}} // test uncaught_exceptions diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp index 792292dc927a0..8d759ac7eac89 100644 --- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp @@ -20,9 +20,8 @@ // up the OS's collation files. // TODO investigation needed. -// Glibc seems to collate files differently from the way Apple's C library does -// it. -// XFAIL: linux-gnu +// Glibc seems to collate files differently from the way Apple's C library does it. +// XFAIL: target={{.*}}-linux-gnu{{.*}} // XFAIL: LIBCXX-WINDOWS-FIXME diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char16_t_char8_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char16_t_char8_t.pass.cpp index 67a9bfed9abfa..d1da936d8faa6 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char16_t_char8_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char16_t_char8_t.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char32_t_char8_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char32_t_char8_t.pass.cpp index e077c870501b3..9bd7a2e8489cb 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char32_t_char8_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char32_t_char8_t.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char16_t_char8_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char16_t_char8_t.pass.cpp index eb3e2c969d025..5621f3b35b749 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char16_t_char8_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char16_t_char8_t.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char32_t_char8_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char32_t_char8_t.pass.cpp index faf29fd2e1b06..4499d8a1a076f 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char32_t_char8_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char32_t_char8_t.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_always_noconv.pass.cpp index c6a7a46ef81f9..56a56ac159996 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_always_noconv.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_always_noconv.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_encoding.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_encoding.pass.cpp index 5167bcc345435..570d017a1df17 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_encoding.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_encoding.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_in.pass.cpp index d40d4e5b817d5..81dcb922f623c 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_in.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_in.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_length.pass.cpp index 3f1afb280bee3..752eea4c80247 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_length.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_length.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_max_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_max_length.pass.cpp index 0c359d0b68501..eb77c8237f6ff 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_max_length.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_max_length.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_out.pass.cpp index c4844e5558fd7..4a8093228359b 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_out.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_out.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_unshift.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_unshift.pass.cpp index 32da259d50ae0..790fd1d3ddd18 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_unshift.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_unshift.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_always_noconv.pass.cpp index 0cf2c2ecd3501..e3f9384f3b4a6 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_always_noconv.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_always_noconv.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_encoding.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_encoding.pass.cpp index 1976234e1bb3c..f193b8568efcd 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_encoding.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_encoding.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_in.pass.cpp index baaadb2bca236..c7d50f29dbcf2 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_in.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_in.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_length.pass.cpp index d95ba2a84e4a3..75cce8154c240 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_length.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_length.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_max_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_max_length.pass.cpp index 31aad35c9ae55..436535bb82fa5 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_max_length.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_max_length.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_out.pass.cpp index 3649597fec2d1..30743d5946e9c 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_out.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_out.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_unshift.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_unshift.pass.cpp index be865e15ed075..7b6124457a17f 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_unshift.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_unshift.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp index 8bbe9e0e0dde1..31fd07fd1dc57 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp @@ -9,13 +9,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char16_t_char8_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char16_t_char8_t.pass.cpp index 82fd14c4e4520..95ee89a73cb0a 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char16_t_char8_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char16_t_char8_t.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char32_t_char8_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char32_t_char8_t.pass.cpp index 2beb60c80be53..c009b1f20d297 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char32_t_char8_t.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char32_t_char8_t.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp index 69331bba6ff5d..8cf77b47a0bd5 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp @@ -21,7 +21,7 @@ // ios_base::iostate& err, long double& v) const; // TODO For zh_CN GLIBC puts the negative sign after the currency symbol. -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp index b8cc831f15a5c..0e5254cb9bd65 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp @@ -21,7 +21,7 @@ // long double units) const; // TODO For zh_CN GLIBC puts the negative sign after the currency symbol. -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp index a7bcacb7408f2..ae0837b9e5df6 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp @@ -13,7 +13,7 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, long double v) const; // TODO GLIBC uses a different string for positive and negative NAN numbers. -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} // XFAIL: LIBCXX-WINDOWS-FIXME diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp index f81bff0086978..47e4ab502b876 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp @@ -15,12 +15,7 @@ // This test exercises the fix for PR28704, which isn't in the dylib for // some systems. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one.pass.cpp index 8c2712dc1838f..791a71c17bc39 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one.pass.cpp @@ -24,7 +24,7 @@ // ios_base::iostate& err, tm *t, char format, char modifier = 0) const; // TODO: investigation needed -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp index 3246b4dee1599..d3315d6faee68 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp @@ -24,7 +24,7 @@ // ios_base::iostate& err, tm *t, char format, char modifier = 0) const; // TODO: investigation needed -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp index c2c2a98442a45..0ddef03215500 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp @@ -22,7 +22,7 @@ // ios_base::iostate& err, tm* t) const; // TODO: investigation needed -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp index 856cf3f60b6ca..9ff73e9c2ad0b 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp @@ -20,7 +20,7 @@ // ios_base::iostate& err, tm* t) const; // TODO: investigation needed -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp index 8daf3405d86b5..0a4d5df78019a 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp @@ -29,7 +29,7 @@ // }; // TODO: investigation needed -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} #include #include diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/assign.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/assign.pass.cpp index b078cbcdb4ab4..94e2d26f2c63e 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/assign.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/assign.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/char_pointer.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/char_pointer.pass.cpp index af9299af22f30..42b99d3347e02 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/char_pointer.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/char_pointer.pass.cpp @@ -15,13 +15,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/copy.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/copy.pass.cpp index 0660b331eb8d3..012086a626d7f 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/copy.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/copy.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/default.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/default.pass.cpp index a9a1656de1583..c7a67595d1755 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/default.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/default.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp index 23bbc13540b59..694af889dc0e1 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp @@ -13,13 +13,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/locale_facetptr.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/locale_facetptr.pass.cpp index dba31ef8759cb..1ae3f59a8bd8a 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/locale_facetptr.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/locale_facetptr.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/locale_locale_cat.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/locale_locale_cat.pass.cpp index fb926768e8d37..09889087890c6 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/locale_locale_cat.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/locale_locale_cat.pass.cpp @@ -13,13 +13,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/locale_string_cat.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/locale_string_cat.pass.cpp index 3d97944444c57..5745f8a8448c7 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/locale_string_cat.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/locale_string_cat.pass.cpp @@ -13,13 +13,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/string.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/string.pass.cpp index 1e2fde612b1fe..733c7e93275b0 100644 --- a/libcxx/test/std/localization/locales/locale/locale.cons/string.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.cons/string.pass.cpp @@ -12,13 +12,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp index 10fca15564a89..5914bb7774434 100644 --- a/libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.members/combine.pass.cpp @@ -9,13 +9,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.statics/classic.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.statics/classic.pass.cpp index f873b911aee67..f32c2432123a2 100644 --- a/libcxx/test/std/localization/locales/locale/locale.statics/classic.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.statics/classic.pass.cpp @@ -9,13 +9,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/localization/locales/locale/locale.statics/global.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.statics/global.pass.cpp index 38ec71ea0a201..458fe0866a1b0 100644 --- a/libcxx/test/std/localization/locales/locale/locale.statics/global.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.statics/global.pass.cpp @@ -11,13 +11,7 @@ // This test relies on P0482 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // This test runs in C++20, but we have deprecated codecvt in C++20. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS diff --git a/libcxx/test/std/numerics/rand/rand.device/ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.device/ctor.pass.cpp index bcbb9ec8569eb..4cef9bd160af7 100644 --- a/libcxx/test/std/numerics/rand/rand.device/ctor.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.device/ctor.pass.cpp @@ -8,9 +8,7 @@ // See bugs.llvm.org/PR20183 // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // UNSUPPORTED: libcpp-has-no-random-device diff --git a/libcxx/test/std/numerics/rand/rand.device/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.device/eval.pass.cpp index a55915d121b38..f91850f0cd5df 100644 --- a/libcxx/test/std/numerics/rand/rand.device/eval.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.device/eval.pass.cpp @@ -8,9 +8,7 @@ // See bugs.llvm.org/PR20183 // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // UNSUPPORTED: libcpp-has-no-random-device diff --git a/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp index a93a2694464b7..7588f2d365442 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp @@ -18,7 +18,7 @@ // TODO: investigation needed // TODO(netbsd): incomplete support for locales -// XFAIL: linux-gnu, netbsd, freebsd +// XFAIL: target={{.*}}-linux-gnu{{.*}}, netbsd, freebsd // REQUIRES: locale.cs_CZ.ISO8859-2 #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp index 1cec602096484..3b9b8e2a83018 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp @@ -21,7 +21,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: linux-gnu, freebsd +// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp index 8e651f6d0301e..26be2069fab5e 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp @@ -21,7 +21,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: linux-gnu, freebsd +// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp index ea77dd83fc979..5a0dd18c821e2 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp @@ -21,7 +21,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: linux-gnu, freebsd +// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp index 4ba87bb45232c..275927841c242 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp @@ -21,7 +21,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: linux-gnu, freebsd +// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp index fe3435eae9298..130ef49e81e41 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp @@ -21,7 +21,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: linux-gnu, freebsd +// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp index 840cc634a3da5..e97c4df3473ff 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp @@ -21,7 +21,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: linux-gnu, freebsd +// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp index 3a171f81e6966..8902d06183b16 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp @@ -21,7 +21,7 @@ // regex_constants::match_flag_type flags = regex_constants::match_default); // TODO: investigation needed -// XFAIL: linux-gnu, freebsd +// XFAIL: target={{.*}}-linux-gnu{{.*}}, freebsd #include #include diff --git a/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp b/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp index aeb7c50bb8177..27ff6eef38dc9 100644 --- a/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp +++ b/libcxx/test/std/re/re.traits/lookup_collatename.pass.cpp @@ -20,7 +20,7 @@ // lookup_collatename(ForwardIterator first, ForwardIterator last) const; // TODO: investigation needed -// XFAIL: linux-gnu +// XFAIL: target={{.*}}-linux-gnu{{.*}} #include #include diff --git a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp index d7d2efca9042a..54d9c4ee69cb8 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp @@ -7,9 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/strings/basic.string/string.capacity/reserve_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/reserve_size.pass.cpp index a14ec20c651ae..c6e2cd14dceed 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/reserve_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/reserve_size.pass.cpp @@ -13,13 +13,7 @@ // This test relies on https://llvm.org/PR45368 being fixed, which isn't in // older Apple dylibs // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} #include #include diff --git a/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp b/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp index c8686a3dc2e56..4fe7cd0543398 100644 --- a/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp @@ -11,12 +11,7 @@ // There's currently no release of OS X whose dylib contains the patch for // PR38682. Since the fix for future is in the dylib, this test may fail. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // This test is designed to cause and allow TSAN to detect a race condition // in std::async, as reported in https://llvm.org/PR38682. diff --git a/libcxx/test/std/thread/futures/futures.future_error/what.pass.cpp b/libcxx/test/std/thread/futures/futures.future_error/what.pass.cpp index 4b256111a4422..0b093fc235bcb 100644 --- a/libcxx/test/std/thread/futures/futures.future_error/what.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.future_error/what.pass.cpp @@ -11,9 +11,7 @@ // LWG 2056 changed the values of future_errc, so if we're using new headers // with an old library we'll get incorrect messages. // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp index e19695e760c57..31df3714315d6 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp index 7f6ce35396040..038c5ac169e2a 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive_and_drop.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp index 920281981e46a..de3f9f785af0d 100644 --- a/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/arrive_and_wait.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp index abbfb869895ce..baa73fb3ae772 100644 --- a/libcxx/test/std/thread/thread.barrier/completion.pass.cpp +++ b/libcxx/test/std/thread/thread.barrier/completion.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp index 69160fe89a8e2..5d0a01206492b 100644 --- a/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/arrive_and_wait.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp index e33590f1c78af..0038bf1ca86d2 100644 --- a/libcxx/test/std/thread/thread.latch/count_down.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/count_down.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp index 8283aa3546a10..b627bbb264393 100644 --- a/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp +++ b/libcxx/test/std/thread/thread.latch/try_wait.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp index d6081a63c3c15..3f3868e1289f4 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp index d251b0d147772..07b1b32108910 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp index d16bf5441eedd..612fc34e8acde 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp index 250ba77025bd4..d09cde085db3d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp index 0147a32e46d75..4e1ffd5d3e556 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp index 16719e0f92a15..fac91220eafc9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp index 29160d5e76a89..66e810b02cc5c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp index 9f242f484a48e..074c906b2d0f3 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp index 60c85945b904e..0e3721bb0de72 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp index fb0650ba79b79..dc980ae463411 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // ALLOW_RETRIES: 2 diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp index d3ae30e578c28..7c2fdcddf082a 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp index a8c6bcb6d7d64..23eae754bcfef 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // dylib support for shared_mutex was added in macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.fail.cpp index 2b71e9cadab65..7fc5442c64fb8 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/assign.fail.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // shared_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.fail.cpp index 402735ac39476..27086a84919c9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/copy.fail.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // shared_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp index ce659e8463978..76c6cebbee49e 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // shared_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp index c442c4ce747b9..4d97129cd38d2 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp index b6c42085d6baf..fec48900fab4c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp index 075074419cedd..f1d18c80d6a8e 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp index 7c8dcc2a62c17..fc9eb36937ff4 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/assign.compile.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/assign.compile.fail.cpp index 6474fd9e7cc02..9af5a4fc0a065 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/assign.compile.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/assign.compile.fail.cpp @@ -9,9 +9,7 @@ // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/copy.compile.fail.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/copy.compile.fail.cpp index a9f9b7e6c7d2e..c9ec2483a6649 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/copy.compile.fail.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/copy.compile.fail.cpp @@ -9,9 +9,7 @@ // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp index e966622dae85b..cd27c79cc9422 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp index ef0c39729df70..f0bdf0dfa75ed 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp index ac2f702db4b18..d8a02a6bc4c0f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp index ffc9eeabc6385..172742161d81a 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp index 481741fbcda0b..20bf000d3fe0f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_for.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp index a42fdc1b73cc1..16e9291876333 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp index 61c569cf6e690..eda6453d58eeb 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_for.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp index 324870a50e3b0..02ee39876a7a3 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared_until.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp index 5ea731afbe088..3118893efd21c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until.pass.cpp @@ -12,9 +12,7 @@ // ALLOW_RETRIES: 2 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp index c817c5ff1ac2c..5dd2b7cf2fa4c 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp @@ -10,9 +10,7 @@ // UNSUPPORTED: c++03, c++11 // shared_timed_mutex was introduced in macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11}} // diff --git a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp index 547594b71e591..cd08e2ba81017 100644 --- a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp index 2ebee35e3608f..b80c9fea0295c 100644 --- a/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/binary.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp index 76dac0cc403e2..e491e13e50f95 100644 --- a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp index 34520ecfae05d..0954341adfae9 100644 --- a/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/timed.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp index f71d12f8740e8..35cb6ce7dc822 100644 --- a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp @@ -11,13 +11,7 @@ // This test requires the dylib support introduced in D68480, which shipped in // macOS 11.0. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp index 0e41114af3bb4..b59bb76e5b760 100644 --- a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp index fff04834fadd1..4a1b0fc8eed1d 100644 --- a/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp index 5065ed607624a..f054cd8a719aa 100644 --- a/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp index 06d1825afa0b7..579ea24cc18ef 100644 --- a/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp index 1a7baa4baf1c7..0c8b668ddaa4f 100644 --- a/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp index fe908bf07cd06..d8710213f1715 100644 --- a/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp index 75e8ad9ee9f73..d55bbda741439 100644 --- a/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp index feec4c00ddc43..5168d69ddea4a 100644 --- a/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp index 5f1ba18b2c83e..4b4aa2923662c 100644 --- a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp index 7839d4b4fcf1a..ee5684ee2ea8c 100644 --- a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp +++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp index e62b74f083378..1a50a694ef8d7 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp index 2d6562a3b55c8..79d9a440dba17 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp index ac793b2bc7a69..73cdad798cfbe 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp index 6b9a5e9e4034d..234efc83423bb 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp index 9c3238879469b..44a67f7aa03dc 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp index 6b2883ed3ce27..6b8f93073b030 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp index 05027bacf8a83..440159f12d72f 100644 --- a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp +++ b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_any_cast is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/charconv/charconv.from.chars/integral.roundtrip.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.from.chars/integral.roundtrip.pass.cpp index 5329bd3a0a406..88450a33afdc9 100644 --- a/libcxx/test/std/utilities/charconv/charconv.from.chars/integral.roundtrip.pass.cpp +++ b/libcxx/test/std/utilities/charconv/charconv.from.chars/integral.roundtrip.pass.cpp @@ -13,12 +13,7 @@ // The roundtrip test uses to_chars, which requires functions in the dylib // that were introduced in Mac OS 10.15. // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp index 3548b65724a17..4afe49190603d 100644 --- a/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp +++ b/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp @@ -12,12 +12,7 @@ // to_chars requires functions in the dylib that were introduced in Mac OS 10.15. // -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp b/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp index 88958e460ff6d..979a3fed463b2 100644 --- a/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp +++ b/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp @@ -9,13 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp index 68e99578b0c9b..5b18834e51462 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp @@ -10,13 +10,7 @@ // UNSUPPORTED: no-exceptions // This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp index 3b2304f61f5ed..6d53995a627ad 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp @@ -10,13 +10,7 @@ // UNSUPPORTED: no-exceptions // This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} // diff --git a/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp b/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp index b5286fb529948..a4911a5a445e3 100644 --- a/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp +++ b/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp @@ -11,10 +11,7 @@ // Aligned allocations are not supported on macOS < 10.13 // Note: use 'unsupported' instead of 'xfail' to ensure // we won't pass prior to c++17. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp index 517e0d64d4017..7f0439be216da 100644 --- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp index 34b770e368586..ad1112f2583db 100644 --- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp index c9058a305880f..e99c872d64f32 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp index 45c3c66a5cb5a..ddfc491c1696c 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp index 4e86bcd49bb15..e4918f7cac118 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp index e10346e14eb19..dddd8facff05b 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp index eba21bb9db0d4..8cd9bcb2fa695 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp index 03aa47454808e..103a6317b24e7 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp index dff3dca9a62c7..7402e98b663fd 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp index d677b9a8a0508..28856997c562e 100644 --- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp @@ -10,10 +10,7 @@ // // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // constexpr T& optional::value() &&; diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp index c1ad1f09c5cc0..681fe28a2a9b2 100644 --- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp @@ -9,10 +9,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_optional_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // // diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.file/now.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.file/now.pass.cpp index dc050818da11d..6011424f6e7ee 100644 --- a/libcxx/test/std/utilities/time/time.clock/time.clock.file/now.pass.cpp +++ b/libcxx/test/std/utilities/time/time.clock/time.clock.file/now.pass.cpp @@ -10,12 +10,7 @@ // UNSUPPORTED: libcpp-has-no-filesystem-library // Filesystem is supported on Apple platforms starting with macosx10.15. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.14 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} // diff --git a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp index 751c6551e115a..cb020b04a4216 100644 --- a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // diff --git a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp index 830d4a4465a84..bed0026ccc506 100644 --- a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp index 9f0889c089b18..e96269b4fadf1 100644 --- a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp index 546273187c66c..816578c0af670 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp index 6c85377bb9b96..2e6f51ee2faf4 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp index db8db2e8059ba..bdda20d7f4b42 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp index 340c270ad84e8..78ac9e14407ed 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp index 1270598020329..810d12139063c 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp index ca391666835c1..a61e0377bce3e 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp index 77a85ccee226b..f39e21ceb1339 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp index eb25721fb6262..248730f568068 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp index 075f9687d6c8d..05a0121fd4985 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp index 8e26c86f4e10e..a77aa40fb3b95 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp index aabfbf1e93bae..cb6a44b143598 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp index 691cb35382c9a..cbe3e53f46363 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp index 67587d418ddf9..e838c1359bb17 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp index a2b77c28cc76f..9dbf41f986e2e 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp index 03fad25caa061..721ebfe8e488e 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp index aa699860a31db..9fff8fee64b06 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // diff --git a/libcxx/test/std/utilities/variant/variant.visit/robust_against_adl.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/robust_against_adl.pass.cpp index 3b81a66508aad..5886162dbab8f 100644 --- a/libcxx/test/std/utilities/variant/variant.visit/robust_against_adl.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit/robust_against_adl.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // // template diff --git a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp index d720fa968fff2..9bb4311c6629f 100644 --- a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // // template diff --git a/libcxx/test/std/utilities/variant/variant.visit/visit_return_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/visit_return_type.pass.cpp index 60fd7b3f91990..5b9d59567eaa0 100644 --- a/libcxx/test/std/utilities/variant/variant.visit/visit_return_type.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit/visit_return_type.pass.cpp @@ -10,10 +10,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // Throwing bad_variant_access is supported starting in macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 && !no-exceptions -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 && !no-exceptions +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions // // template diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index bdaa573205e80..991f51ef902ca 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -57,15 +57,8 @@ def getStdFlag(cfg, std): help="The target triple to compile the test suite for. This must be " "compatible with the target that the tests will be run on.", actions=lambda triple: filter(None, [ - AddFeature(triple), + AddFeature('target={}'.format(triple)), AddFlagIfSupported('--target={}'.format(triple)), - AddFeature('linux-gnu') if re.match(r'^.*-linux-gnu', triple) else None, - AddFeature('x86_64-linux') if re.match(r'^x86_64.*-linux', triple) else None, - AddFeature('x86_64-apple') if re.match(r'^x86_64.*-apple', triple) else None, - AddFeature('target-x86') if re.match(r'^i.86.*', triple) else None, - AddFeature('target-x86_64') if re.match(r'^x86_64.*', triple) else None, - AddFeature('target-aarch64') if re.match(r'^aarch64.*', triple) else None, - AddFeature('target-arm') if re.match(r'^arm.*', triple) else None, ])), Parameter(name='std', choices=_allStandards, type=str, diff --git a/libcxxabi/test/catch_function_01.pass.cpp b/libcxxabi/test/catch_function_01.pass.cpp index 02ecec22e9280..b7a6a007c84d5 100644 --- a/libcxxabi/test/catch_function_01.pass.cpp +++ b/libcxxabi/test/catch_function_01.pass.cpp @@ -14,8 +14,7 @@ // UNSUPPORTED: no-exceptions // 65ace9daa360 made it in the dylib in macOS 10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10}} #include diff --git a/libcxxabi/test/catch_member_data_pointer_01.pass.cpp b/libcxxabi/test/catch_member_data_pointer_01.pass.cpp index 2ee68ba4770b7..7279eee2a07f2 100644 --- a/libcxxabi/test/catch_member_data_pointer_01.pass.cpp +++ b/libcxxabi/test/catch_member_data_pointer_01.pass.cpp @@ -9,8 +9,7 @@ // UNSUPPORTED: no-exceptions // 1b00fc5d8133 made it in the dylib in macOS 10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10}} #include diff --git a/libcxxabi/test/catch_member_pointer_nullptr.pass.cpp b/libcxxabi/test/catch_member_pointer_nullptr.pass.cpp index 321aa8272747e..b2af9190ece40 100644 --- a/libcxxabi/test/catch_member_pointer_nullptr.pass.cpp +++ b/libcxxabi/test/catch_member_pointer_nullptr.pass.cpp @@ -8,10 +8,7 @@ // Catching an exception thrown as nullptr was not properly handled before // 2f984cab4fa7, which landed in macOS 10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // UNSUPPORTED: no-exceptions diff --git a/libcxxabi/test/catch_multi_level_pointer.pass.cpp b/libcxxabi/test/catch_multi_level_pointer.pass.cpp index 26b025e2cf1d8..38374e8abd684 100644 --- a/libcxxabi/test/catch_multi_level_pointer.pass.cpp +++ b/libcxxabi/test/catch_multi_level_pointer.pass.cpp @@ -9,8 +9,7 @@ // UNSUPPORTED: no-exceptions // 1b00fc5d8133 made it in the dylib in macOS 10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10}} #include #include diff --git a/libcxxabi/test/catch_pointer_nullptr.pass.cpp b/libcxxabi/test/catch_pointer_nullptr.pass.cpp index 3320d28851044..9abcf7444e158 100644 --- a/libcxxabi/test/catch_pointer_nullptr.pass.cpp +++ b/libcxxabi/test/catch_pointer_nullptr.pass.cpp @@ -8,10 +8,7 @@ // Catching an exception thrown as nullptr was not properly handled before // 2f984cab4fa7, which landed in macOS 10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // UNSUPPORTED: c++03 // UNSUPPORTED: no-exceptions diff --git a/libcxxabi/test/catch_ptr_02.pass.cpp b/libcxxabi/test/catch_ptr_02.pass.cpp index 444aac899d5e9..b849559ea42f6 100644 --- a/libcxxabi/test/catch_ptr_02.pass.cpp +++ b/libcxxabi/test/catch_ptr_02.pass.cpp @@ -15,7 +15,7 @@ // ADDITIONAL_COMPILE_FLAGS: -Wno-error // The fix for PR17222 made it in the dylib for macOS 10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.9 #include diff --git a/libcxxabi/test/cxa_vec_new_overflow_PR41395.pass.cpp b/libcxxabi/test/cxa_vec_new_overflow_PR41395.pass.cpp index b821ae86b98a4..9d75fc1060daf 100644 --- a/libcxxabi/test/cxa_vec_new_overflow_PR41395.pass.cpp +++ b/libcxxabi/test/cxa_vec_new_overflow_PR41395.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: no-exceptions // PR41395 isn't fixed until the dylib shipped with macOS 10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} #include "cxxabi.h" #include diff --git a/libcxxabi/test/dynamic_cast.pass.cpp b/libcxxabi/test/dynamic_cast.pass.cpp index 87ca6a6dd70e6..51e294f74c05d 100644 --- a/libcxxabi/test/dynamic_cast.pass.cpp +++ b/libcxxabi/test/dynamic_cast.pass.cpp @@ -9,14 +9,10 @@ // XFAIL: gcc-7, gcc-8, gcc-9 // PR33425 and PR33487 are not fixed until the dylib shipped with macOS 10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.14 // PR33439 isn't fixed until the dylib shipped with macOS 10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} #include diff --git a/libcxxabi/test/exception_object_alignment.pass.cpp b/libcxxabi/test/exception_object_alignment.pass.cpp index 2d39800e9d712..c6228ba878991 100644 --- a/libcxxabi/test/exception_object_alignment.pass.cpp +++ b/libcxxabi/test/exception_object_alignment.pass.cpp @@ -12,11 +12,7 @@ // before macOS 10.14. The test fails on macOS 10.9 to 10.12, passes on macOS // 10.13 (no investigation done), and passes afterwards. Just mark all the OSes // before 10.14 as unsupported. -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.13 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.12 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.11 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.10 -// UNSUPPORTED: use_system_cxx_lib && x86_64-apple-macosx10.9 +// UNSUPPORTED: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} // Check that the pointer __cxa_allocate_exception returns is aligned to the // default alignment for the target architecture. diff --git a/libcxxabi/test/forced_unwind1.pass.cpp b/libcxxabi/test/forced_unwind1.pass.cpp index 2bd39f310cbd1..69f93ffaacc0c 100644 --- a/libcxxabi/test/forced_unwind1.pass.cpp +++ b/libcxxabi/test/forced_unwind1.pass.cpp @@ -12,13 +12,7 @@ // UNSUPPORTED: no-exceptions, c++03 // These tests fail on previously released dylibs, investigation needed. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} #include #include diff --git a/libcxxabi/test/forced_unwind2.pass.cpp b/libcxxabi/test/forced_unwind2.pass.cpp index 9f73a673b420e..cb527581687a1 100644 --- a/libcxxabi/test/forced_unwind2.pass.cpp +++ b/libcxxabi/test/forced_unwind2.pass.cpp @@ -11,13 +11,7 @@ // UNSUPPORTED: no-exceptions, c++03 // These tests fail on previously released dylibs, investigation needed. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} #include #include diff --git a/libcxxabi/test/incomplete_type.sh.cpp b/libcxxabi/test/incomplete_type.sh.cpp index 6995226fe808c..aae1a3c4360f9 100644 --- a/libcxxabi/test/incomplete_type.sh.cpp +++ b/libcxxabi/test/incomplete_type.sh.cpp @@ -17,10 +17,7 @@ // UNSUPPORTED: no-rtti // The fix for PR25898 landed in the system dylibs in macOS 10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // RUN: %{cxx} %{flags} %{compile_flags} -Wno-unreachable-code -c %s -o %t.one.o // RUN: %{cxx} %{flags} %{compile_flags} -Wno-unreachable-code -c %s -o %t.two.o -DTU_ONE diff --git a/libcxxabi/test/test_aux_runtime_op_array_new.pass.cpp b/libcxxabi/test/test_aux_runtime_op_array_new.pass.cpp index ddf802dc6c578..c70c781044d21 100644 --- a/libcxxabi/test/test_aux_runtime_op_array_new.pass.cpp +++ b/libcxxabi/test/test_aux_runtime_op_array_new.pass.cpp @@ -10,12 +10,7 @@ // ___cxa_throw_bad_array_new_length is re-exported from libc++ only starting // in macosx 10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} #include #include diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp index 009bf45028780..1780e684cbb57 100644 --- a/libcxxabi/test/test_demangle.pass.cpp +++ b/libcxxabi/test/test_demangle.pass.cpp @@ -7,13 +7,7 @@ //===----------------------------------------------------------------------===// // The demangler does not pass all these tests with the system dylibs on macOS. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.15 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} #include "support/timer.h" #include diff --git a/libcxxabi/test/test_exception_address_alignment.pass.cpp b/libcxxabi/test/test_exception_address_alignment.pass.cpp index 5d59709d599c0..117251cbe34cf 100644 --- a/libcxxabi/test/test_exception_address_alignment.pass.cpp +++ b/libcxxabi/test/test_exception_address_alignment.pass.cpp @@ -13,14 +13,11 @@ // an incorrectly aligned _Unwind_Exception type on non-ARM. That causes these // tests to fail when compiling against such a SDK, or when running against a // system libc++abi that was compiled with an incorrect definition of _Unwind_Exception. -// XFAIL: apple-clang-12.0.0 && !target-arm -// XFAIL: apple-clang-11 && !target-arm -// XFAIL: apple-clang-10 && !target-arm -// XFAIL: apple-clang-9 && !target-arm -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: apple-clang-12.0.0 && !target={{arm.*}} +// XFAIL: apple-clang-11 && !target={{arm.*}} +// XFAIL: apple-clang-10 && !target={{arm.*}} +// XFAIL: apple-clang-9 && !target={{arm.*}} +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} // Test that the address of the exception object is properly aligned as required // by the relevant ABI diff --git a/libcxxabi/test/uncaught_exceptions.pass.cpp b/libcxxabi/test/uncaught_exceptions.pass.cpp index 44186d59e3ba8..82041ad203ccc 100644 --- a/libcxxabi/test/uncaught_exceptions.pass.cpp +++ b/libcxxabi/test/uncaught_exceptions.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: no-exceptions // __cxa_uncaught_exceptions is not re-exported from libc++ until macOS 10.15. -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.14 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.13 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.12 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.11 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.10 -// XFAIL: use_system_cxx_lib && x86_64-apple-macosx10.9 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} #include #include diff --git a/libunwind/test/remember_state_leak.pass.sh.s b/libunwind/test/remember_state_leak.pass.sh.s index eb363d0102a80..f18d1768e7c4b 100644 --- a/libunwind/test/remember_state_leak.pass.sh.s +++ b/libunwind/test/remember_state_leak.pass.sh.s @@ -1,5 +1,5 @@ -# REQUIRES: x86, linux -# RUN: %{build} -target x86_64-unknown-linux-gnu +# REQUIRES: target={{x86_64-.+-linux-gnu}} +# RUN: %{build} # RUN: %{run} # The following assembly is a translation of this code: diff --git a/libunwind/test/signal_unwind.pass.cpp b/libunwind/test/signal_unwind.pass.cpp index 3acd77209a1c9..c16adeb4d18cc 100644 --- a/libunwind/test/signal_unwind.pass.cpp +++ b/libunwind/test/signal_unwind.pass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // Ensure that the unwinder can cope with the signal handler. -// REQUIRES: linux && (target-aarch64 || target-x86_64) +// REQUIRES: linux && (target={{aarch64-.+}} || target={{x86_64-.+}}) #include #include diff --git a/libunwind/test/unwind_leaffunction.pass.cpp b/libunwind/test/unwind_leaffunction.pass.cpp index 725a29163e503..a05e315d9bafc 100644 --- a/libunwind/test/unwind_leaffunction.pass.cpp +++ b/libunwind/test/unwind_leaffunction.pass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // Ensure that leaf function can be unwund. -// REQUIRES: linux && (target-aarch64 || target-x86_64) +// REQUIRES: linux && (target={{aarch64-.+}} || target={{x86_64-.+}}) #include #include From 83887df15597990308e9903d0480fa7676d772a1 Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Thu, 1 Jul 2021 18:21:04 +0000 Subject: [PATCH 448/619] [ADT] Follow up to fix bug in "Add makeVisitor to STLExtras.h" Address mistakenly comparing the pointer values of two C-style strings rather than comparing their contents in the unit tests for makeVisitor, added in 6d6f35eb7b92c6dd4478834497752f4e963db16d --- llvm/unittests/ADT/STLExtrasTest.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index 2c2b649030880..85208e4f4a2f8 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -777,9 +777,9 @@ TEST(STLExtrasTest, MakeVisitorOneCallable) { TEST(STLExtrasTest, MakeVisitorTwoCallables) { auto Visitor = - makeVisitor([](int) { return "int"; }, [](std::string) { return "str"; }); - EXPECT_EQ(Visitor(42), "int"); - EXPECT_EQ(Visitor("foo"), "str"); + makeVisitor([](int) { return 0; }, [](std::string) { return 1; }); + EXPECT_EQ(Visitor(42), 0); + EXPECT_EQ(Visitor("foo"), 1); } TEST(STLExtrasTest, MakeVisitorCallableMultipleOperands) { @@ -793,20 +793,20 @@ TEST(STLExtrasTest, MakeVisitorDefaultCase) { { auto Visitor = makeVisitor([](int I) { return I + 100; }, [](float F) { return F * 2; }, - [](auto) { return "unhandled type"; }); + [](auto) { return -1; }); EXPECT_EQ(Visitor(24), 124); EXPECT_EQ(Visitor(2.f), 4.f); - EXPECT_EQ(Visitor(2.), "unhandled type"); - EXPECT_EQ(Visitor(Visitor), "unhandled type"); + EXPECT_EQ(Visitor(2.), -1); + EXPECT_EQ(Visitor(Visitor), -1); } { - auto Visitor = makeVisitor([](auto) { return "unhandled type"; }, + auto Visitor = makeVisitor([](auto) { return -1; }, [](int I) { return I + 100; }, [](float F) { return F * 2; }); EXPECT_EQ(Visitor(24), 124); EXPECT_EQ(Visitor(2.f), 4.f); - EXPECT_EQ(Visitor(2.), "unhandled type"); - EXPECT_EQ(Visitor(Visitor), "unhandled type"); + EXPECT_EQ(Visitor(2.), -1); + EXPECT_EQ(Visitor(Visitor), -1); } } From 460dfbd9f83f1178ee754e2734322ddacbd707f3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 1 Jul 2021 20:32:10 +0200 Subject: [PATCH 449/619] [OpaquePtr] Support VecOfAnyPtrsToElt intrinsics In this case the pointer type is part of the mangled name, so we can allow on opaque pointer outside --force-opaque-pointers mode as well. --- llvm/lib/IR/Function.cpp | 3 ++- llvm/test/Verifier/opaque-ptr.ll | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 8f096f0a69eb4..b502e5c48dbc0 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1566,7 +1566,8 @@ static bool matchIntrinsicType( dyn_cast(ThisArgVecTy->getElementType()); if (!ThisArgEltTy) return true; - return ThisArgEltTy->getElementType() != ReferenceType->getElementType(); + return !ThisArgEltTy->isOpaqueOrPointeeTypeMatches( + ReferenceType->getElementType()); } case IITDescriptor::VecElementArgument: { if (D.getArgumentNumber() >= ArgTys.size()) diff --git a/llvm/test/Verifier/opaque-ptr.ll b/llvm/test/Verifier/opaque-ptr.ll index 944b24cd652b3..4d824ef49f7d5 100644 --- a/llvm/test/Verifier/opaque-ptr.ll +++ b/llvm/test/Verifier/opaque-ptr.ll @@ -52,10 +52,12 @@ define void @intrinsic_calls(ptr %a) { ; CHECK-LABEL: @intrinsic_calls( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr [[A:%.*]], i32 4, <2 x i1> zeroinitializer, <2 x i32> zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.store.v2i32.p0(<2 x i32> zeroinitializer, ptr [[A]], i32 4, <2 x i1> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> zeroinitializer, i32 4, <2 x i1> zeroinitializer, <2 x i64> zeroinitializer) ; CHECK-NEXT: ret void ; call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %a, i32 4, <2 x i1> zeroinitializer, <2 x i32> zeroinitializer) call void @llvm.masked.store.v2i32.p0(<2 x i32> zeroinitializer, ptr %a, i32 4, <2 x i1> zeroinitializer) + call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> zeroinitializer, i32 4, <2 x i1> zeroinitializer, <2 x i64> zeroinitializer) ret void } @@ -66,3 +68,4 @@ declare void @llvm.lifetime.end.p0(i64, ptr nocapture) declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) From 19443c13b5d02b0bc2a0d641c65dd6842f5a5511 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 1 Jul 2021 14:51:46 -0400 Subject: [PATCH 450/619] [gn build] Port 050b064f15ee --- .../gn/secondary/libcxx/include/BUILD.gn | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 439ef44425dd4..d39a26ef0f405 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -163,12 +163,32 @@ if (current_toolchain == default_toolchain) { "__format/format_error.h", "__format/format_parse_context.h", "__function_like.h", + "__functional/binary_function.h", + "__functional/binary_negate.h", + "__functional/bind.h", + "__functional/bind_front.h", + "__functional/binder1st.h", + "__functional/binder2nd.h", + "__functional/default_searcher.h", + "__functional/function.h", "__functional/hash.h", + "__functional/identity.h", + "__functional/invoke.h", + "__functional/is_transparent.h", + "__functional/mem_fn.h", + "__functional/mem_fun_ref.h", + "__functional/not_fn.h", + "__functional/operations.h", + "__functional/perfect_forward.h", + "__functional/pointer_to_binary_function.h", + "__functional/pointer_to_unary_function.h", + "__functional/ranges_operations.h", + "__functional/reference_wrapper.h", "__functional/unary_function.h", + "__functional/unary_negate.h", "__functional/unwrap_ref.h", - "__functional_03", + "__functional/weak_result_type.h", "__functional_base", - "__functional_base_03", "__hash_table", "__iterator/advance.h", "__iterator/back_insert_iterator.h", @@ -197,6 +217,7 @@ if (current_toolchain == default_toolchain) { "__memory/addressof.h", "__memory/allocation_guard.h", "__memory/allocator.h", + "__memory/allocator_arg_t.h", "__memory/allocator_traits.h", "__memory/auto_ptr.h", "__memory/compressed_pair.h", @@ -208,6 +229,7 @@ if (current_toolchain == default_toolchain) { "__memory/temporary_buffer.h", "__memory/uninitialized_algorithms.h", "__memory/unique_ptr.h", + "__memory/uses_allocator.h", "__mutex_base", "__node_handle", "__nullptr", From 8eb4b3e2be008fc3455b3c2820b1b55d2a5c25f0 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Thu, 1 Jul 2021 16:41:35 +0000 Subject: [PATCH 451/619] [CMake][MLIR][Linalg] Adding variable to specify tablegen file dependencies. Synchronizing multiple custom targets requires not only target but also file dependencies. Building Linalg involves running yaml-gen followed by tablegen. Currently, these custom targets are only synchronized using a target dependency resulting in issues in specific incremental build setups (https://llvm.discourse.group/t/missing-build-cmake-tblgen-dependency/3727/10). This patch introduces a novel LLVM_TARGET_DEPENDS variable to the TableGen.cmake file to provide a way to specify file dependencies. Additionally, it adapts the Linalg CMakeLists.txt to introduce the necessary file dependency between yaml-gen and tablegen. Differential Revision: https://reviews.llvm.org/D105272 --- llvm/cmake/modules/TableGen.cmake | 4 +++- mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt | 13 ++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index 5f07acc1f6922..5e9e2674405ee 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -1,4 +1,5 @@ -# LLVM_TARGET_DEFINITIONS must contain the name of the .td file to process. +# LLVM_TARGET_DEFINITIONS must contain the name of the .td file to process, +# while LLVM_TARGET_DEPENDS may contain additional file dependencies. # Extra parameters for `tblgen' may come after `ofn' parameter. # Adds the name of the generated file to TABLEGEN_OUTPUT. @@ -104,6 +105,7 @@ function(tablegen project ofn) DEPENDS ${${project}_TABLEGEN_TARGET} ${${project}_TABLEGEN_EXE} ${local_tds} ${global_tds} ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} + ${LLVM_TARGET_DEPENDS} COMMENT "Building ${ofn}..." ) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt index 6056c5e5259e8..4e3727839329d 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Linalg/IR/CMakeLists.txt @@ -25,6 +25,10 @@ function(add_linalg_ods_tc_gen tc_filename output_file) ${MLIR_LINALG_ODS_GEN_EXE} ${MLIR_LINALG_ODS_GEN_TARGET} ${GEN_ODS_FILE} ${GEN_CPP_FILE}) + # Setup the file dependencies needed for the subsequent tablegen step. + # TODO: Once there is only one way of generating named ops remove this parent + # scope manipulation and implement the tablegen generation in the same scope. + set(LLVM_TARGET_DEPENDS ${LLVM_TARGET_DEPENDS} ${GEN_ODS_FILE} PARENT_SCOPE) endfunction() # Declare a function to generate ODS with mlir-linalg-ods-yaml-gen @@ -52,10 +56,17 @@ function(add_linalg_ods_yaml_gen yaml_ast_file output_file) ${MLIR_LINALG_ODS_YAML_GEN_EXE} ${MLIR_LINALG_ODS_YAML_GEN_TARGET} ${GEN_ODS_FILE} ${GEN_CPP_FILE}) + # Setup the file dependencies needed for the subsequent tablegen step. + # TODO: Once there is only one way of generating named ops remove this parent + # scope manipulation and implement the tablegen generation in the same scope. + set(LLVM_TARGET_DEPENDS ${LLVM_TARGET_DEPENDS} ${GEN_ODS_FILE} PARENT_SCOPE) endfunction() # TODO: Delete tc generation and replace with the YAML variant once all ops are -# ported. +# ported. At the same time, move the YAML and TableGen generation to the same +# scope to avoid the at a distance dependency manipulation via +# LLVM_TARGET_DEPENDS. +set(LLVM_TARGET_DEPENDS "") add_linalg_ods_tc_gen(LinalgNamedStructuredOpsSpec.tc LinalgNamedStructuredOps) add_linalg_ods_yaml_gen(LinalgNamedStructuredOps.yaml LinalgNamedStructuredOps) From 33b579c8a5efa476b8a1bd528fe5e47429249847 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 1 Jul 2021 11:56:11 -0700 Subject: [PATCH 452/619] [NFC][scudo] Exctract getOptionsForConfig in test --- .../scudo/standalone/tests/secondary_test.cpp | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index bbaf79261ba77..6b8a60d386b89 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -19,19 +19,24 @@ #include #include +template static scudo::Options getOptionsForConfig() { + return {}; +} + template static void testSecondaryBasic(void) { using SecondaryT = scudo::MapAllocator; + scudo::Options Options = getOptionsForConfig(); scudo::GlobalStats S; S.init(); std::unique_ptr L(new SecondaryT); L->init(&S); const scudo::uptr Size = 1U << 16; - void *P = L->allocate(scudo::Options{}, Size); + void *P = L->allocate(Options, Size); EXPECT_NE(P, nullptr); memset(P, 'A', Size); EXPECT_GE(SecondaryT::getBlockSize(P), Size); - L->deallocate(scudo::Options{}, P); + L->deallocate(Options, P); // If the Secondary can't cache that pointer, it will be unmapped. if (!L->canCache(Size)) { @@ -40,8 +45,8 @@ template static void testSecondaryBasic(void) { // Repeat few time to avoid missing crash if it's mmaped by unrelated // code. for (int i = 0; i < 10; ++i) { - P = L->allocate(scudo::Options{}, Size); - L->deallocate(scudo::Options{}, P); + P = L->allocate(Options, Size); + L->deallocate(Options, P); memset(P, 'A', Size); } }, @@ -49,19 +54,19 @@ template static void testSecondaryBasic(void) { } const scudo::uptr Align = 1U << 16; - P = L->allocate(scudo::Options{}, Size + Align, Align); + P = L->allocate(Options, Size + Align, Align); EXPECT_NE(P, nullptr); void *AlignedP = reinterpret_cast( scudo::roundUpTo(reinterpret_cast(P), Align)); memset(AlignedP, 'A', Size); - L->deallocate(scudo::Options{}, P); + L->deallocate(Options, P); std::vector V; for (scudo::uptr I = 0; I < 32U; I++) - V.push_back(L->allocate(scudo::Options{}, Size)); + V.push_back(L->allocate(Options, Size)); std::shuffle(V.begin(), V.end(), std::mt19937(std::random_device()())); while (!V.empty()) { - L->deallocate(scudo::Options{}, V.back()); + L->deallocate(Options, V.back()); V.pop_back(); } scudo::ScopedString Str; @@ -92,16 +97,17 @@ TEST(ScudoSecondaryTest, SecondaryBasic) { testSecondaryBasic(); } -using LargeAllocator = scudo::MapAllocator; - struct MapAllocatorTest : public Test { + using Config = scudo::DefaultConfig; + using LargeAllocator = scudo::MapAllocator; + void SetUp() override { Allocator->init(nullptr); } void TearDown() override { Allocator->unmapTestOnly(); } std::unique_ptr Allocator = std::make_unique(); - scudo::Options Options = {}; + scudo::Options Options = getOptionsForConfig(); }; // This exercises a variety of combinations of size and alignment for the From fe08e9c4871e8842dc5c8f75a4796a86029c1ebe Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Thu, 1 Jul 2021 15:01:59 -0400 Subject: [PATCH 453/619] [lld-macho] Add support for LTO optimization level Everything (including test) modified from ELF/COFF. Using the same syntax (--lto-O3, etc) as ELF. Differential Revision: https://reviews.llvm.org/D105223 --- lld/MachO/Config.h | 1 + lld/MachO/Driver.cpp | 3 +++ lld/MachO/LTO.cpp | 3 +++ lld/MachO/Options.td | 4 ++++ 4 files changed, 11 insertions(+) diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 722b19f712531..ee4d49bd135a0 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -126,6 +126,7 @@ struct Configuration { llvm::StringRef outputFile; llvm::StringRef ltoObjPath; llvm::StringRef thinLTOJobs; + uint32_t ltoo = 2; bool deadStripDylibs = false; bool demangle = false; bool deadStrip = false; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 59f764f8c1d2c..23b505d7b9477 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1082,6 +1082,9 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, config->ltoNewPassManager = args.hasFlag(OPT_no_lto_legacy_pass_manager, OPT_lto_legacy_pass_manager, LLVM_ENABLE_NEW_PASS_MANAGER); + config->ltoo = args::getInteger(args, OPT_lto_O, 2); + if (config->ltoo > 3) + error("--lto-O: invalid optimization level: " + Twine(config->ltoo)); config->runtimePaths = args::getStrings(args, OPT_rpath); config->allLoad = args.hasArg(OPT_all_load); config->forceLoadObjC = args.hasArg(OPT_ObjC); diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp index 55e155e1a7dd6..3fa7fe4e32e59 100644 --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -13,6 +13,7 @@ #include "Symbols.h" #include "Target.h" +#include "lld/Common/Args.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Strings.h" #include "lld/Common/TargetOptionsCommandFlags.h" @@ -40,6 +41,8 @@ static lto::Config createConfig() { }; c.TimeTraceEnabled = config->timeTraceEnabled; c.TimeTraceGranularity = config->timeTraceGranularity; + c.OptLevel = config->ltoo; + c.CGOptLevel = args::getCGOptLevel(config->ltoo); if (config->saveTemps) checkError(c.addSaveTemps(config->outputFile.str() + ".", /*UseInputModulePath=*/true)); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index ebff0d5813a02..515053a64cf10 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -64,6 +64,10 @@ def icf_eq: Joined<["--"], "icf=">, HelpText<"Set level for identical code folding (default: none)">, MetaVarName<"[none,safe,all]">, Group; +def lto_O: Joined<["--"], "lto-O">, + HelpText<"Set optimization level for LTO (default: 2)">, + MetaVarName<"">, + Group; // This is a complete Options.td compiled from Apple's ld(1) manpage // dated 2018-03-07 and cross checked with ld64 source code in repo From 78e70cee0d46bb14dcbedec993fbf855a4d13266 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 30 Jun 2021 23:39:12 -0700 Subject: [PATCH 454/619] [scudo] Remove false DCHECK MTE Cache.store passes MAP_NOACCESS here. Reviewed By: pcc, cryptoad Differential Revision: https://reviews.llvm.org/D105266 --- compiler-rt/lib/scudo/standalone/linux.cpp | 5 +---- compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp index dedab61631bc5..c77c1bb600d93 100644 --- a/compiler-rt/lib/scudo/standalone/linux.cpp +++ b/compiler-rt/lib/scudo/standalone/linux.cpp @@ -58,11 +58,8 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, if (Flags & MAP_MEMTAG) MmapProt |= PROT_MTE; #endif - if (Addr) { - // Currently no scenario for a noaccess mapping with a fixed address. - DCHECK_EQ(Flags & MAP_NOACCESS, 0); + if (Addr) MmapFlags |= MAP_FIXED; - } void *P = mmap(Addr, Size, MmapProt, MmapFlags, -1, 0); if (P == MAP_FAILED) { if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 6b8a60d386b89..d3b7c486f7c3e 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "memtag.h" #include "tests/scudo_unit_test.h" #include "allocator_config.h" @@ -20,7 +21,11 @@ #include template static scudo::Options getOptionsForConfig() { - return {}; + if (!Config::MaySupportMemoryTagging || !scudo::archSupportsMemoryTagging()) + return {}; + scudo::AtomicOptions AO; + AO.set(scudo::OptionBit::UseMemoryTagging); + return AO.load(); } template static void testSecondaryBasic(void) { From e86fe368db11e43b4b7e84065ba5e8439fc24840 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Thu, 1 Jul 2021 15:05:27 -0400 Subject: [PATCH 455/619] [MLIR] Allow Affine scalar replacement to handle inner operations Affine scalar replacement (and other affine passes, though not fixed here) don't properly handle operations with nested regions. This patch fixes the pass and two affine utilities to function properly given a non-affine internal region This patch prevents the pass from throwing an internal compiler error when running on the added test case. Differential Revision: https://reviews.llvm.org/D105058 --- mlir/lib/Analysis/Utils.cpp | 10 +++++----- mlir/test/Dialect/Affine/scalrep.mlir | 26 +++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index e87ecdac2d6ca..fc14e198c5b28 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -35,9 +35,8 @@ void mlir::getLoopIVs(Operation &op, SmallVectorImpl *loops) { AffineForOp currAffineForOp; // Traverse up the hierarchy collecting all 'affine.for' operation while // skipping over 'affine.if' operations. - while (currOp && ((currAffineForOp = dyn_cast(currOp)) || - isa(currOp))) { - if (currAffineForOp) + while (currOp) { + if (AffineForOp currAffineForOp = dyn_cast(currOp)) loops->push_back(currAffineForOp); currOp = currOp->getParentOp(); } @@ -54,8 +53,9 @@ void mlir::getEnclosingAffineForAndIfOps(Operation &op, // Traverse up the hierarchy collecting all `affine.for` and `affine.if` // operations. - while (currOp && (isa(currOp))) { - ops->push_back(currOp); + while (currOp) { + if (isa(currOp)) + ops->push_back(currOp); currOp = currOp->getParentOp(); } std::reverse(ops->begin(), ops->end()); diff --git a/mlir/test/Dialect/Affine/scalrep.mlir b/mlir/test/Dialect/Affine/scalrep.mlir index 308186fe676db..31e52323ed78f 100644 --- a/mlir/test/Dialect/Affine/scalrep.mlir +++ b/mlir/test/Dialect/Affine/scalrep.mlir @@ -670,10 +670,34 @@ func @redundant_store_elim_fail(%out : memref<512xf32>) { } return } - // CHECK: affine.for // CHECK-NEXT: affine.store // CHECK-NEXT: "test.use" // CHECK-NEXT: affine.store // CHECK-NEXT: } +// CHECK-LABEL: @with_inner_ops +func @with_inner_ops(%arg0: memref, %arg1: memref, %arg2: i1) { + %cst = constant 0.000000e+00 : f64 + %cst_0 = constant 3.140000e+00 : f64 + %cst_1 = constant 1.000000e+00 : f64 + affine.for %arg3 = 0 to 28 { + affine.store %cst, %arg1[%arg3] : memref + affine.store %cst_0, %arg1[%arg3] : memref + %0 = scf.if %arg2 -> (f64) { + scf.yield %cst_1 : f64 + } else { + %1 = affine.load %arg1[%arg3] : memref + scf.yield %1 : f64 + } + affine.store %0, %arg0[%arg3] : memref + } + return +} + +// CHECK: %[[pi:.+]] = constant 3.140000e+00 : f64 +// CHECK: %{{.*}} = scf.if %arg2 -> (f64) { +// CHECK: scf.yield %{{.*}} : f64 +// CHECK: } else { +// CHECK: scf.yield %[[pi]] : f64 +// CHECK: } From 0516f49c081590305a9db972ebc7fceb942b8ce3 Mon Sep 17 00:00:00 2001 From: Ahmed Taei Date: Wed, 30 Jun 2021 16:03:19 -0700 Subject: [PATCH 456/619] Add linalg.mmt4d named op This op performs matrix-matrix-transpose multiplication of 4-d inputs as the following: ``` C[m1, n1, m0, n0] = sum_{k1, k0}(A[m1, k1, m0, k0] * B[n1, k1, n0, k0]) ``` Reviewed By: Benoit Differential Revision: https://reviews.llvm.org/D105244 --- .../Linalg/IR/LinalgNamedStructuredOps.yaml | 73 +++++++++++++++++++ .../linalg/opdsl/ops/core_named_ops.py | 20 +++++ 2 files changed, 93 insertions(+) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index 8781e16bba34e..a8baf23bbfaab 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -62,6 +62,79 @@ structured_op: !LinalgStructuredOpConfig - !ScalarExpression scalar_arg: B --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: mmt4d + cpp_class_name: Mmt4DOp + doc: |- + Performs a matrix-matrix-transpose multiplication of two 4D inputs. + + Differences from linalg.matmul: + * The right hand side is transposed, whence the 't' in 'mmt'. + * The input and output tensors have a 4D shape instead of a 2D shape. They + are interpreted as 2D matrices with one level of 2D tile subdivision, + whence the 2+2=4 dimensions. The inner tile dimensions are identified with + '0' suffixes below, for instance the LHS matrix shape (M, K, M0, K0) reads + as: MxK tiles, each of shape M0xK0. + implements: + - LinalgContractionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: lhs + usage: InputOperand + type_var: LhsType + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2, s3)> + - !LinalgOperandDefConfig + name: rhs + usage: InputOperand + type_var: RhsType + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s5, s3)> + - !LinalgOperandDefConfig + name: accum + usage: OutputOperand + type_var: AccumType + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s4, s2, s5)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d4, d1, + d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d2, d4, d3, + d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d1, + d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + assignments: + - !ScalarAssign + arg: accum + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: accum + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: AccumType + operands: + - !ScalarExpression + scalar_arg: lhs + - !ScalarExpression + symbolic_cast: + type_var: AccumType + operands: + - !ScalarExpression + scalar_arg: rhs +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: batch_matmul cpp_class_name: BatchMatmulOp diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index 561cd2e7d08db..095d94956f5b7 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -21,6 +21,26 @@ def matmul( C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n]) +@linalg_structured_op +def mmt4d(lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0), + rhs=TensorDef(TV.RhsType, S.N, S.K, S.N0, S.K0), + accum=TensorDef(TV.AccumType, S.M, S.N, S.M0, S.N0, + output=True)): + """Performs a matrix-matrix-transpose multiplication of two 4D inputs. + + Differences from linalg.matmul: + * The right hand side is transposed, whence the 't' in 'mmt'. + * The input and output tensors have a 4D shape instead of a 2D shape. They + are interpreted as 2D matrices with one level of 2D tile subdivision, + whence the 2+2=4 dimensions. The inner tile dimensions are identified with + '0' suffixes below, for instance the LHS matrix shape (M, K, M0, K0) reads + as: MxK tiles, each of shape M0xK0. + """ + domain(D.m, D.m0, D.n, D.n0, D.k, D.k0) + implements(ContractionOpInterface) + accum[D.m, D.n, D.m0, D.n0] += cast(TV.AccumType, lhs[D.m, D.k, D.m0, D.k0]) * cast(TV.AccumType, rhs[D.n, D.k, D.n0, D.k0]) + + @linalg_structured_op def batch_matmul( A=TensorDef(T1, Batch, S.M, S.K), From 355bf7c1f0b25310b814f7733fa59767e474de0b Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Thu, 1 Jul 2021 15:42:33 -0400 Subject: [PATCH 457/619] [lit] Extend --xfail/LIT_XFAIL to take full test name The new documentation entry gives an example use case from libomptarget. Reviewed By: yln, jhenderson, davezarzycki Differential Revision: https://reviews.llvm.org/D105208 --- llvm/docs/CommandGuide/lit.rst | 25 +++++++++++++++++ llvm/utils/lit/lit/main.py | 4 ++- .../lit/tests/Inputs/xfail-cl/a/false.txt | 1 + .../utils/lit/tests/Inputs/xfail-cl/a/lit.cfg | 4 +++ .../lit/tests/Inputs/xfail-cl/a/test.txt | 1 + .../lit/tests/Inputs/xfail-cl/b/false.txt | 1 + .../utils/lit/tests/Inputs/xfail-cl/b/lit.cfg | 4 +++ .../lit/tests/Inputs/xfail-cl/b/test.txt | 1 + llvm/utils/lit/tests/xfail-cl.py | 27 +++++++++++++------ 9 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/xfail-cl/a/false.txt create mode 100644 llvm/utils/lit/tests/Inputs/xfail-cl/a/lit.cfg create mode 100644 llvm/utils/lit/tests/Inputs/xfail-cl/a/test.txt create mode 100644 llvm/utils/lit/tests/Inputs/xfail-cl/b/false.txt create mode 100644 llvm/utils/lit/tests/Inputs/xfail-cl/b/lit.cfg create mode 100644 llvm/utils/lit/tests/Inputs/xfail-cl/b/test.txt diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst index 413b64e950077..dc2b57cda8a4e 100644 --- a/llvm/docs/CommandGuide/lit.rst +++ b/llvm/docs/CommandGuide/lit.rst @@ -239,6 +239,31 @@ The timing data is stored in the `test_exec_root` in a file named this option, which is especially useful in environments where the call to ``lit`` is issued indirectly. + A test name can specified as a file name relative to the test suite directory. + For example: + + .. code-block:: none + + LIT_XFAIL="affinity/kmp-hw-subset.c;offloading/memory_manager.cpp" + + In this case, all of the following tests are treated as ``XFAIL``: + + .. code-block:: none + + libomp :: affinity/kmp-hw-subset.c + libomptarget :: nvptx64-nvidia-cuda :: offloading/memory_manager.cpp + libomptarget :: x86_64-pc-linux-gnu :: offloading/memory_manager.cpp + + Alternatively, a test name can be specified as the full test name + reported in LIT output. For example, we can adjust the previous + example not to treat the ``nvptx64-nvidia-cuda`` version of + ``offloading/memory_manager.cpp`` as XFAIL: + + .. code-block:: none + + LIT_XFAIL="affinity/kmp-hw-subset.c;libomptarget :: x86_64-pc-linux-gnu :: offloading/memory_manager.cpp" + + ADDITIONAL OPTIONS ------------------ diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index 47fe73388eaa7..b60c30c68457c 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -193,7 +193,9 @@ def filter_by_shard(tests, run, shards, lit_config): def mark_xfail(selected_tests, opts): for t in selected_tests: - if os.sep.join(t.path_in_suite) in opts.xfail: + test_file = os.sep.join(t.path_in_suite) + test_full_name = t.getFullName() + if test_file in opts.xfail or test_full_name in opts.xfail: t.xfails += '*' def mark_excluded(discovered_tests, selected_tests): diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/a/false.txt b/llvm/utils/lit/tests/Inputs/xfail-cl/a/false.txt new file mode 100644 index 0000000000000..49932c3006e15 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/xfail-cl/a/false.txt @@ -0,0 +1 @@ +# RUN: false diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/a/lit.cfg b/llvm/utils/lit/tests/Inputs/xfail-cl/a/lit.cfg new file mode 100644 index 0000000000000..09f49c32a173a --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/xfail-cl/a/lit.cfg @@ -0,0 +1,4 @@ +import lit.formats +config.name = 'top-level-suite :: a' +config.suffixes = ['.txt'] +config.test_format = lit.formats.ShTest() diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/a/test.txt b/llvm/utils/lit/tests/Inputs/xfail-cl/a/test.txt new file mode 100644 index 0000000000000..b80b60b7a2794 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/xfail-cl/a/test.txt @@ -0,0 +1 @@ +# RUN: true diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/b/false.txt b/llvm/utils/lit/tests/Inputs/xfail-cl/b/false.txt new file mode 100644 index 0000000000000..49932c3006e15 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/xfail-cl/b/false.txt @@ -0,0 +1 @@ +# RUN: false diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/b/lit.cfg b/llvm/utils/lit/tests/Inputs/xfail-cl/b/lit.cfg new file mode 100644 index 0000000000000..62f721c671392 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/xfail-cl/b/lit.cfg @@ -0,0 +1,4 @@ +import lit.formats +config.name = 'top-level-suite :: b' +config.suffixes = ['.txt'] +config.test_format = lit.formats.ShTest() diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/b/test.txt b/llvm/utils/lit/tests/Inputs/xfail-cl/b/test.txt new file mode 100644 index 0000000000000..49932c3006e15 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/xfail-cl/b/test.txt @@ -0,0 +1 @@ +# RUN: false diff --git a/llvm/utils/lit/tests/xfail-cl.py b/llvm/utils/lit/tests/xfail-cl.py index 129e3092b9969..1d5fdb079c2c0 100644 --- a/llvm/utils/lit/tests/xfail-cl.py +++ b/llvm/utils/lit/tests/xfail-cl.py @@ -1,9 +1,20 @@ -# Check that regex-XFAILing works and can be configured via env var. -# -# RUN: %{lit} --xfail 'false.txt;false2.txt' %{inputs}/xfail-cl | FileCheck --check-prefix=CHECK-FILTER %s -# RUN: env LIT_XFAIL='false.txt;false2.txt' %{lit} %{inputs}/xfail-cl | FileCheck --check-prefix=CHECK-FILTER %s +# Check that XFAILing works via command line or env var. + +# RUN: %{lit} --xfail 'false.txt;false2.txt;top-level-suite :: b :: test.txt' \ +# RUN: %{inputs}/xfail-cl \ +# RUN: | FileCheck --check-prefix=CHECK-FILTER %s + +# RUN: env LIT_XFAIL='false.txt;false2.txt;top-level-suite :: b :: test.txt' \ +# RUN: %{lit} %{inputs}/xfail-cl \ +# RUN: | FileCheck --check-prefix=CHECK-FILTER %s + # END. -# CHECK-FILTER: Testing: 3 tests, {{[1-3]}} workers -# CHECK-FILTER-DAG: XFAIL: top-level-suite :: false.txt -# CHECK-FILTER-DAG: XFAIL: top-level-suite :: false2.txt -# CHECK-FILTER-DAG: PASS: top-level-suite :: true.txt + +# CHECK-FILTER: Testing: 7 tests, {{[1-7]}} workers +# CHECK-FILTER-DAG: {{^}}PASS: top-level-suite :: a :: test.txt +# CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: b :: test.txt +# CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: a :: false.txt +# CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: b :: false.txt +# CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: false.txt +# CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: false2.txt +# CHECK-FILTER-DAG: {{^}}PASS: top-level-suite :: true.txt From 45e8a0befbc8e174c3fa4ba4d21ef4445a16191b Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 30 Jun 2021 21:47:43 +0200 Subject: [PATCH 458/619] [Orc] At CBindings for LazyRexports At C bindings and an example for LLJIT with lazy reexports Differential Revision: https://reviews.llvm.org/D104672 --- llvm/examples/OrcV2Examples/CMakeLists.txt | 1 + .../OrcV2CBindingsLazy/CMakeLists.txt | 15 ++ .../OrcV2CBindingsLazy/OrcV2CBindingsLazy.c | 244 ++++++++++++++++++ llvm/include/llvm-c/Orc.h | 86 ++++++ .../ExecutionEngine/Orc/OrcV2CBindings.cpp | 53 +++- llvm/test/CMakeLists.txt | 1 + .../OrcV2Examples/orcv2-cbindings-lazy.test | 5 + llvm/test/lit.cfg.py | 3 +- 8 files changed, 406 insertions(+), 2 deletions(-) create mode 100644 llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/CMakeLists.txt create mode 100644 llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/OrcV2CBindingsLazy.c create mode 100644 llvm/test/Examples/OrcV2Examples/orcv2-cbindings-lazy.test diff --git a/llvm/examples/OrcV2Examples/CMakeLists.txt b/llvm/examples/OrcV2Examples/CMakeLists.txt index e46448bed06f2..59311f8fbf1c0 100644 --- a/llvm/examples/OrcV2Examples/CMakeLists.txt +++ b/llvm/examples/OrcV2Examples/CMakeLists.txt @@ -14,6 +14,7 @@ add_subdirectory(OrcV2CBindingsDumpObjects) add_subdirectory(OrcV2CBindingsIRTransforms) add_subdirectory(OrcV2CBindingsReflectProcessSymbols) add_subdirectory(OrcV2CBindingsRemovableCode) +add_subdirectory(OrcV2CBindingsLazy) if(CMAKE_HOST_UNIX) add_subdirectory(LLJITWithRemoteDebugging) diff --git a/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/CMakeLists.txt b/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/CMakeLists.txt new file mode 100644 index 0000000000000..52eb2d496fc23 --- /dev/null +++ b/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS + Core + ExecutionEngine + IRReader + JITLink + MC + OrcJIT + Support + Target + nativecodegen + ) + +add_llvm_example(OrcV2CBindingsLazy + OrcV2CBindingsLazy.c + ) diff --git a/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/OrcV2CBindingsLazy.c b/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/OrcV2CBindingsLazy.c new file mode 100644 index 0000000000000..0f4f979127e85 --- /dev/null +++ b/llvm/examples/OrcV2Examples/OrcV2CBindingsLazy/OrcV2CBindingsLazy.c @@ -0,0 +1,244 @@ +//===-------- BasicOrcV2CBindings.c - Basic OrcV2 C Bindings Demo ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Core.h" +#include "llvm-c/Error.h" +#include "llvm-c/IRReader.h" +#include "llvm-c/Initialization.h" +#include "llvm-c/LLJIT.h" +#include "llvm-c/Support.h" +#include "llvm-c/Target.h" + +#include + +int handleError(LLVMErrorRef Err) { + char *ErrMsg = LLVMGetErrorMessage(Err); + fprintf(stderr, "Error: %s\n", ErrMsg); + LLVMDisposeErrorMessage(ErrMsg); + return 1; +} + +// Example IR modules. +// +// Note that in the conditionally compiled modules, FooMod and BarMod, functions +// have been given an _body suffix. This is to ensure that their names do not +// clash with their lazy-reexports. +// For clients who do not wish to rename function bodies (e.g. because they want +// to re-use cached objects between static and JIT compiles) techniques exist to +// avoid renaming. See the lazy-reexports section of the ORCv2 design doc. + +const char FooMod[] = " define i32 @foo_body() { \n" + " entry: \n" + " ret i32 1 \n" + " } \n"; + +const char BarMod[] = " define i32 @bar_body() { \n" + " entry: \n" + " ret i32 2 \n" + " } \n"; + +const char MainMod[] = + " define i32 @entry(i32 %argc) { \n" + " entry: \n" + " %and = and i32 %argc, 1 \n" + " %tobool = icmp eq i32 %and, 0 \n" + " br i1 %tobool, label %if.end, label %if.then \n" + " \n" + " if.then: \n" + " %call = tail call i32 @foo() \n" + " br label %return \n" + " \n" + " if.end: \n" + " %call1 = tail call i32 @bar() \n" + " br label %return \n" + " \n" + " return: \n" + " %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.end ] \n" + " ret i32 %retval.0 \n" + " } \n" + " \n" + " declare i32 @foo() \n" + " declare i32 @bar() \n"; + +LLVMErrorRef parseExampleModule(const char *Source, size_t Len, + const char *Name, + LLVMOrcThreadSafeModuleRef *TSM) { + // Create a new ThreadSafeContext and underlying LLVMContext. + LLVMOrcThreadSafeContextRef TSCtx = LLVMOrcCreateNewThreadSafeContext(); + + // Get a reference to the underlying LLVMContext. + LLVMContextRef Ctx = LLVMOrcThreadSafeContextGetContext(TSCtx); + + // Wrap Source in a MemoryBuffer + LLVMMemoryBufferRef MB = + LLVMCreateMemoryBufferWithMemoryRange(Source, Len, Name, 0); + + // Parse the LLVM module. + LLVMModuleRef M; + char *ErrMsg; + if (LLVMParseIRInContext(Ctx, MB, &M, &ErrMsg)) { + return LLVMCreateStringError(ErrMsg); + // TODO: LLVMDisposeMessage(ErrMsg); + } + + // Our module is now complete. Wrap it and our ThreadSafeContext in a + // ThreadSafeModule. + *TSM = LLVMOrcCreateNewThreadSafeModule(M, TSCtx); + + // Dispose of our local ThreadSafeContext value. The underlying LLVMContext + // will be kept alive by our ThreadSafeModule, TSM. + LLVMOrcDisposeThreadSafeContext(TSCtx); + + return LLVMErrorSuccess; +} + +int main(int argc, char *argv[]) { + + int MainResult = 0; + + // Parse command line arguments and initialize LLVM Core. + LLVMParseCommandLineOptions(argc, (const char **)argv, ""); + LLVMInitializeCore(LLVMGetGlobalPassRegistry()); + + // Initialize native target codegen and asm printer. + LLVMInitializeNativeTarget(); + LLVMInitializeNativeAsmPrinter(); + + // Set up a JIT instance. + LLVMOrcLLJITRef J; + const char *TargetTriple; + { + LLVMErrorRef Err; + if ((Err = LLVMOrcCreateLLJIT(&J, 0))) { + MainResult = handleError(Err); + goto llvm_shutdown; + } + TargetTriple = LLVMOrcLLJITGetTripleString(J); + } + + // Add our demo modules to the JIT. + { + LLVMOrcJITDylibRef MainJD = LLVMOrcLLJITGetMainJITDylib(J); + LLVMErrorRef Err; + + LLVMOrcThreadSafeModuleRef FooTSM; + if ((Err = + parseExampleModule(FooMod, sizeof(FooMod), "foo-mod", &FooTSM))) { + MainResult = handleError(Err); + goto jit_cleanup; + } + + if ((Err = LLVMOrcLLJITAddLLVMIRModule(J, MainJD, FooTSM))) { + // If adding the ThreadSafeModule fails then we need to clean it up + // ourselves. If adding it succeeds the JIT will manage the memory. + LLVMOrcDisposeThreadSafeModule(FooTSM); + MainResult = handleError(Err); + goto jit_cleanup; + } + + LLVMOrcThreadSafeModuleRef BarTSM; + if ((Err = + parseExampleModule(BarMod, sizeof(BarMod), "bar-mod", &BarTSM))) { + MainResult = handleError(Err); + goto jit_cleanup; + } + + if ((Err = LLVMOrcLLJITAddLLVMIRModule(J, MainJD, BarTSM))) { + LLVMOrcDisposeThreadSafeModule(BarTSM); + MainResult = handleError(Err); + goto jit_cleanup; + } + + LLVMOrcThreadSafeModuleRef MainTSM; + if ((Err = parseExampleModule(MainMod, sizeof(MainMod), "main-mod", + &MainTSM))) { + MainResult = handleError(Err); + goto jit_cleanup; + } + + if ((Err = LLVMOrcLLJITAddLLVMIRModule(J, MainJD, MainTSM))) { + LLVMOrcDisposeThreadSafeModule(MainTSM); + MainResult = handleError(Err); + goto jit_cleanup; + } + } + + // add lazy reexports + LLVMOrcIndirectStubsManagerRef ISM = + LLVMOrcCreateLocalIndirectStubsManager(TargetTriple); + + LLVMOrcLazyCallThroughManagerRef LCTM; + { + LLVMErrorRef Err; + LLVMOrcExecutionSessionRef ES = LLVMOrcLLJITGetExecutionSession(J); + if ((Err = LLVMOrcCreateLocalLazyCallThroughManager(TargetTriple, ES, 0, + &LCTM))) { + LLVMOrcDisposeIndirectStubsManager(ISM); + MainResult = handleError(Err); + goto jit_cleanup; + } + } + + LLVMJITSymbolFlags flag = { + LLVMJITSymbolGenericFlagsExported | LLVMJITSymbolGenericFlagsCallable, 0}; + LLVMOrcCSymbolAliasMapPair ReExports[2] = { + {LLVMOrcLLJITMangleAndIntern(J, "foo"), + {LLVMOrcLLJITMangleAndIntern(J, "foo_body"), flag}}, + {LLVMOrcLLJITMangleAndIntern(J, "bar"), + {LLVMOrcLLJITMangleAndIntern(J, "bar_body"), flag}}, + }; + + { + LLVMOrcJITDylibRef MainJD = LLVMOrcLLJITGetMainJITDylib(J); + LLVMOrcMaterializationUnitRef MU = + LLVMOrcLazyReexports(LCTM, ISM, MainJD, ReExports, 2); + LLVMOrcJITDylibDefine(MainJD, MU); + } + + // Look up the address of our demo entry point. + LLVMOrcJITTargetAddress EntryAddr; + { + LLVMErrorRef Err; + if ((Err = LLVMOrcLLJITLookup(J, &EntryAddr, "entry"))) { + MainResult = handleError(Err); + goto cleanup; + } + } + + // If we made it here then everything succeeded. Execute our JIT'd code. + int32_t (*Entry)(int32_t) = (int32_t(*)(int32_t))EntryAddr; + int32_t Result = Entry(argc); + + printf("--- Result ---\n"); + printf("entry(%i) = %i\n", argc, Result); + +cleanup : { + LLVMOrcDisposeIndirectStubsManager(ISM); + LLVMOrcDisposeLazyCallThroughManager(LCTM); +} + +jit_cleanup: + // Destroy our JIT instance. This will clean up any memory that the JIT has + // taken ownership of. This operation is non-trivial (e.g. it may need to + // JIT static destructors) and may also fail. In that case we want to render + // the error to stderr, but not overwrite any existing return value. + { + LLVMErrorRef Err; + if ((Err = LLVMOrcDisposeLLJIT(J))) { + int NewFailureResult = handleError(Err); + if (MainResult == 0) + MainResult = NewFailureResult; + } + } + +llvm_shutdown: + // Shut down LLVM. + LLVMShutdown(); + + return MainResult; +} diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 1d26d7ca303c3..62688d2675d5a 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -123,6 +123,28 @@ typedef struct { */ typedef LLVMJITCSymbolMapPair *LLVMOrcCSymbolMapPairs; +/** + * Represents a SymbolAliasMapEntry + */ +typedef struct { + LLVMOrcSymbolStringPoolEntryRef Name; + LLVMJITSymbolFlags Flags; +} LLVMOrcCSymbolAliasMapEntry; + +/** + * Represents a pair of a symbol name and SymbolAliasMapEntry. + */ +typedef struct { + LLVMOrcSymbolStringPoolEntryRef Name; + LLVMOrcCSymbolAliasMapEntry Entry; +} LLVMOrcCSymbolAliasMapPair; + +/** + * Represents a list of (SymbolStringPtr, (SymbolStringPtr, JITSymbolFlags)) + * pairs that can be used to construct a SymbolFlagsMap. + */ +typedef LLVMOrcCSymbolAliasMapPair *LLVMOrcCSymbolAliasMapPairs; + /** * Lookup kind. This can be used by definition generators when deciding whether * to produce a definition for a requested symbol. @@ -373,6 +395,18 @@ typedef struct LLVMOrcOpaqueObjectTransformLayer typedef LLVMErrorRef (*LLVMOrcObjectTransformLayerTransformFunction)( void *Ctx, LLVMMemoryBufferRef *ObjInOut); +/** + * A reference to an orc::IndirectStubsManager instance. + */ +typedef struct LLVMOrcOpaqueIndirectStubsManager + *LLVMOrcIndirectStubsManagerRef; + +/** + * A reference to an orc::LazyCallThroughManager instance. + */ +typedef struct LLVMOrcOpaqueLazyCallThroughManager + *LLVMOrcLazyCallThroughManagerRef; + /** * A reference to an orc::DumpObjects object. * @@ -536,6 +570,33 @@ LLVMOrcMaterializationUnitRef LLVMOrcCreateCustomMaterializationUnit( LLVMOrcMaterializationUnitRef LLVMOrcAbsoluteSymbols(LLVMOrcCSymbolMapPairs Syms, size_t NumPairs); +/** + * Create a MaterializationUnit to define lazy re-expots. These are callable + * entry points that call through to the given symbols. + * + * This function takes ownership of the CallableAliases array. The Name + * fields of the array elements are taken to have been retained for this + * function. This allows the following pattern... + * + * size_t NumPairs; + * LLVMOrcCSymbolAliasMapPairs CallableAliases; + * -- Build CallableAliases array -- + * LLVMOrcMaterializationUnitRef MU = + * LLVMOrcLazyReexports(LCTM, ISM, JD, CallableAliases, NumPairs); + * + * ... without requiring cleanup of the elements of the CallableAliases array afterwards. + * + * The client is still responsible for deleting the CallableAliases array itself. + * + * If a client wishes to reuse elements of the CallableAliases array after this call they + * must explicitly retain each of the elements for themselves. + */ +LLVMOrcMaterializationUnitRef LLVMOrcLazyReexports( + LLVMOrcLazyCallThroughManagerRef LCTM, LLVMOrcIndirectStubsManagerRef ISM, + LLVMOrcJITDylibRef SourceRef, LLVMOrcCSymbolAliasMapPairs CallableAliases, + size_t NumPairs); +// TODO: ImplSymbolMad SrcJDLoc + /** * Create a "bare" JITDylib. * @@ -799,6 +860,31 @@ void LLVMOrcObjectTransformLayerSetTransform( LLVMOrcObjectTransformLayerRef ObjTransformLayer, LLVMOrcObjectTransformLayerTransformFunction TransformFunction, void *Ctx); +/** + * Create a LocalIndirectStubsManager from the given target triple. + * + * The resulting IndirectStubsManager is owned by the client + * and must be disposed of by calling LLVMOrcDisposeDisposeIndirectStubsManager. + */ +LLVMOrcIndirectStubsManagerRef +LLVMOrcCreateLocalIndirectStubsManager(const char *TargetTriple); + +/** + * Dispose of an IndirectStubsManager. + */ +void LLVMOrcDisposeIndirectStubsManager(LLVMOrcIndirectStubsManagerRef ISM); + +LLVMErrorRef LLVMOrcCreateLocalLazyCallThroughManager( + const char *TargetTriple, LLVMOrcExecutionSessionRef ES, + LLVMOrcJITTargetAddress ErrorHandlerAddr, + LLVMOrcLazyCallThroughManagerRef *LCTM); + +/** + * Dispose of an LazyCallThroughManager. + */ +void LLVMOrcDisposeLazyCallThroughManager( + LLVMOrcLazyCallThroughManagerRef LCTM); + /** * Create a DumpObjects instance. * diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp index ed45ec6f836be..3068a41b932ba 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp @@ -98,9 +98,12 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRTransformLayer, LLVMOrcIRTransformLayerRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ObjectTransformLayer, LLVMOrcObjectTransformLayerRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DumpObjects, LLVMOrcDumpObjectsRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IndirectStubsManager, + LLVMOrcIndirectStubsManagerRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LazyCallThroughManager, + LLVMOrcLazyCallThroughManagerRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJITBuilder, LLVMOrcLLJITBuilderRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLJIT, LLVMOrcLLJITRef) - DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef) namespace llvm { @@ -341,6 +344,26 @@ LLVMOrcAbsoluteSymbols(LLVMOrcCSymbolMapPairs Syms, size_t NumPairs) { return wrap(absoluteSymbols(std::move(SM)).release()); } +LLVMOrcMaterializationUnitRef LLVMOrcLazyReexports( + LLVMOrcLazyCallThroughManagerRef LCTM, LLVMOrcIndirectStubsManagerRef ISM, + LLVMOrcJITDylibRef SourceJD, LLVMOrcCSymbolAliasMapPairs CallableAliases, + size_t NumPairs) { + + SymbolAliasMap SAM; + for (size_t I = 0; I != NumPairs; ++I) { + auto pair = CallableAliases[I]; + JITSymbolFlags Flags = toJITSymbolFlags(pair.Entry.Flags); + SymbolStringPtr Name = + OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(pair.Entry.Name)); + SAM[OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(pair.Name))] = + SymbolAliasMapEntry(Name, Flags); + } + + return wrap(lazyReexports(*unwrap(LCTM), *unwrap(ISM), *unwrap(SourceJD), + std::move(SAM)) + .release()); +} + LLVMOrcJITDylibRef LLVMOrcExecutionSessionCreateBareJITDylib(LLVMOrcExecutionSessionRef ES, const char *Name) { @@ -725,3 +748,31 @@ void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener( LLVMOrcIRTransformLayerRef LLVMOrcLLJITGetIRTransformLayer(LLVMOrcLLJITRef J) { return wrap(&unwrap(J)->getIRTransformLayer()); } + +LLVMOrcIndirectStubsManagerRef +LLVMOrcCreateLocalIndirectStubsManager(const char *TargetTriple) { + auto builder = createLocalIndirectStubsManagerBuilder(Triple(TargetTriple)); + return wrap(builder().release()); +} + +void LLVMOrcDisposeIndirectStubsManager(LLVMOrcIndirectStubsManagerRef ISM) { + std::unique_ptr TmpISM(unwrap(ISM)); +} + +LLVMErrorRef LLVMOrcCreateLocalLazyCallThroughManager( + const char *TargetTriple, LLVMOrcExecutionSessionRef ES, + LLVMOrcJITTargetAddress ErrorHandlerAddr, + LLVMOrcLazyCallThroughManagerRef *Result) { + auto LCTM = createLocalLazyCallThroughManager(Triple(TargetTriple), + *unwrap(ES), ErrorHandlerAddr); + + if (!LCTM) + return wrap(LCTM.takeError()); + *Result = wrap(LCTM->release()); + return LLVMErrorSuccess; +} + +void LLVMOrcDisposeLazyCallThroughManager( + LLVMOrcLazyCallThroughManagerRef LCM) { + std::unique_ptr TmpLCM(unwrap(LCM)); +} diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 7fd52d207df46..89a2d8b9e113d 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -166,6 +166,7 @@ if(LLVM_BUILD_EXAMPLES) OrcV2CBindingsAddObjectFile OrcV2CBindingsRemovableCode OrcV2CBindingsReflectProcessSymbols + OrcV2CBindingsLazy ) if(CMAKE_HOST_UNIX) list(APPEND LLVM_TEST_DEPENDS diff --git a/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-lazy.test b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-lazy.test new file mode 100644 index 0000000000000..b8e8f53361ea9 --- /dev/null +++ b/llvm/test/Examples/OrcV2Examples/orcv2-cbindings-lazy.test @@ -0,0 +1,5 @@ +# RUN: OrcV2CBindingsLazy 2>&1 | FileCheck -check-prefix=THIS %s +# RUN: OrcV2CBindingsLazy 0 2>&1 | FileCheck -check-prefix=OTHER %s + +# THIS: entry(1) = 1 +# OTHER: entry(2) = 2 diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index e8de805eae715..887be9abaa122 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -186,7 +186,8 @@ def get_asan_rtlib(): ToolSubst('OrcV2CBindingsBasicUsage', unresolved='ignore'), ToolSubst('OrcV2CBindingsAddObjectFile', unresolved='ignore'), ToolSubst('OrcV2CBindingsRemovableCode', unresolved='ignore'), - ToolSubst('OrcV2CBindingsReflectProcessSymbols', unresolved='ignore')]) + ToolSubst('OrcV2CBindingsReflectProcessSymbols', unresolved='ignore'), + ToolSubst('OrcV2CBindingsLazy', unresolved='ignore')]) llvm_config.add_tool_substitutions(tools, config.llvm_tools_dir) From e386871e1d21cf206a1287356e88c5853563fc77 Mon Sep 17 00:00:00 2001 From: Vassil Vassilev Date: Thu, 1 Jul 2021 17:03:23 +0000 Subject: [PATCH 459/619] [clang-repl] Allow passing in code as positional arguments. Now we can do things like: clang-repl "int i = 1;" "int j = 2;". Differential revision: https://reviews.llvm.org/D104898 --- clang/test/Interpreter/execute.cpp | 7 ++++++- clang/tools/clang-repl/ClangRepl.cpp | 22 ++++++++++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/clang/test/Interpreter/execute.cpp b/clang/test/Interpreter/execute.cpp index 108b79b23a59d..730796bd4016a 100644 --- a/clang/test/Interpreter/execute.cpp +++ b/clang/test/Interpreter/execute.cpp @@ -1,7 +1,12 @@ -// RUN: cat %s | clang-repl | FileCheck %s +// RUN: clang-repl "int i = 10;" 'extern "C" int printf(const char*,...);' \ +// RUN: 'auto r1 = printf("i = %d\n", i);' | FileCheck --check-prefix=CHECK-DRIVER %s // REQUIRES: host-supports-jit // UNSUPPORTED: system-aix +// CHECK-DRIVER: i = 10 + +// RUN: cat %s | clang-repl | FileCheck %s + extern "C" int printf(const char *, ...); int i = 42; auto r1 = printf("i = %d\n", i); diff --git a/clang/tools/clang-repl/ClangRepl.cpp b/clang/tools/clang-repl/ClangRepl.cpp index b5b5bf6e0c6bb..ba6bb11abc867 100644 --- a/clang/tools/clang-repl/ClangRepl.cpp +++ b/clang/tools/clang-repl/ClangRepl.cpp @@ -28,6 +28,9 @@ static llvm::cl::list llvm::cl::CommaSeparated); static llvm::cl::opt OptHostSupportsJit("host-supports-jit", llvm::cl::Hidden); +static llvm::cl::list OptInputs(llvm::cl::Positional, + llvm::cl::ZeroOrMore, + llvm::cl::desc("[code to run]")); static void LLVMErrorHandler(void *UserData, const std::string &Message, bool GenCrashDiag) { @@ -78,15 +81,22 @@ int main(int argc, const char **argv) { static_cast(&CI->getDiagnostics())); auto Interp = ExitOnErr(clang::Interpreter::create(std::move(CI))); - llvm::LineEditor LE("clang-repl"); - // FIXME: Add LE.setListCompleter - while (llvm::Optional Line = LE.readLine()) { - if (*Line == "quit") - break; - if (auto Err = Interp->ParseAndExecute(*Line)) + for (const std::string &input : OptInputs) { + if (auto Err = Interp->ParseAndExecute(input)) llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "error: "); } + if (OptInputs.empty()) { + llvm::LineEditor LE("clang-repl"); + // FIXME: Add LE.setListCompleter + while (llvm::Optional Line = LE.readLine()) { + if (*Line == "quit") + break; + if (auto Err = Interp->ParseAndExecute(*Line)) + llvm::logAllUnhandledErrors(std::move(Err), llvm::errs(), "error: "); + } + } + // Our error handler depends on the Diagnostics object, which we're // potentially about to delete. Uninstall the handler now so that any // later errors use the default handling behavior instead. From fe30963600ea579d4046c9a92c6e38cc2be0e9a2 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 30 Jun 2021 20:22:41 -0700 Subject: [PATCH 460/619] [scudo] Untag BlockEnd in reallocate If we get here from reallocate, BlockEnd is tagged. Then we will storeTag(UntaggedEnd) into the header of the next chunk. Luckily header tag is 0 so unpatched code still works. Reviewed By: pcc Differential Revision: https://reviews.llvm.org/D105261 --- compiler-rt/lib/scudo/standalone/combined.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index e8bb8bf207be7..fd5360ce0f55a 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -639,7 +639,7 @@ class Allocator { if (ClassId) { resizeTaggedChunk(reinterpret_cast(OldTaggedPtr) + OldSize, reinterpret_cast(OldTaggedPtr) + NewSize, - NewSize, BlockEnd); + NewSize, untagPointer(BlockEnd)); storePrimaryAllocationStackMaybe(Options, OldPtr); } else { storeSecondaryAllocationStackMaybe(Options, OldPtr, NewSize); @@ -1154,6 +1154,7 @@ class Allocator { // address tags against chunks. To allow matching in this case we store the // address tag in the first byte of the chunk. void storeEndMarker(uptr End, uptr Size, uptr BlockEnd) { + DCHECK_EQ(BlockEnd, untagPointer(BlockEnd)); uptr UntaggedEnd = untagPointer(End); if (UntaggedEnd != BlockEnd) { storeTag(UntaggedEnd); From 3d48775b89cfcaa20dae9928f20410ee61bdda4c Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 1 Jul 2021 21:08:13 +0100 Subject: [PATCH 461/619] [ARM] Reassociate BFI D104868 removed an (incorrect) fold for distributing BFI instructions in a chain, combining them into a single instruction. BFIs like that are hard to test, as the patterns are often destroyed before they become BFIs. But it can come up in places, with chains of BFIs that can be combined. This patch adds a replacement, which reassociates BFI instructions with non-overlapping insertion masks so that low bits are inserted first. This can end up sorting the nodes so that adjacent inserts are next to one another, allowing the existing folds to combine into a single BFI. Differential Revision: https://reviews.llvm.org/D105096 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 65 +++++++++++++++++-------- llvm/test/CodeGen/ARM/bfi.ll | 20 ++++---- 2 files changed, 53 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 43b8cec412f85..653dbdf281e82 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14076,7 +14076,9 @@ static SDValue FindBFIToCombineWith(SDNode *N) { static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::AND) { // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff // the bits being cleared by the AND are not demanded by the BFI. @@ -14097,33 +14099,54 @@ static SDValue PerformBFICombine(SDNode *N, N->getOperand(2)); return SDValue(); } + // Look for another BFI to combine with. - SDValue CombineBFI = FindBFIToCombineWith(N); - if (CombineBFI == SDValue()) - return SDValue(); + if (SDValue CombineBFI = FindBFIToCombineWith(N)) { + // We've found a BFI. + APInt ToMask1, FromMask1; + SDValue From1 = ParseBFI(N, ToMask1, FromMask1); - // We've found a BFI. - APInt ToMask1, FromMask1; - SDValue From1 = ParseBFI(N, ToMask1, FromMask1); + APInt ToMask2, FromMask2; + SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2); + assert(From1 == From2); + (void)From2; - APInt ToMask2, FromMask2; - SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2); - assert(From1 == From2); - (void)From2; + // Create a new BFI, combining the two together. + APInt NewFromMask = FromMask1 | FromMask2; + APInt NewToMask = ToMask1 | ToMask2; - // Create a new BFI, combining the two together. - APInt NewFromMask = FromMask1 | FromMask2; - APInt NewToMask = ToMask1 | ToMask2; + EVT VT = N->getValueType(0); + SDLoc dl(N); - EVT VT = N->getValueType(0); - SDLoc dl(N); + if (NewFromMask[0] == 0) + From1 = DCI.DAG.getNode( + ISD::SRL, dl, VT, From1, + DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT)); + return DCI.DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1, + DCI.DAG.getConstant(~NewToMask, dl, VT)); + } - if (NewFromMask[0] == 0) - From1 = DCI.DAG.getNode( - ISD::SRL, dl, VT, From1, - DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT)); - return DCI.DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1, - DCI.DAG.getConstant(~NewToMask, dl, VT)); + // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so + // that lower bit insertions are performed first, providing that M1 and M2 + // do no overlap. This can allow multiple BFI instructions to be combined + // together by the other folds above. + if (N->getOperand(0).getOpcode() == ARMISD::BFI) { + APInt ToMask1 = ~N->getConstantOperandAPInt(2); + APInt ToMask2 = ~N0.getConstantOperandAPInt(2); + + if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 || + ToMask1.countLeadingZeros() < ToMask2.countLeadingZeros()) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue BFI1 = DCI.DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0), + N->getOperand(1), N->getOperand(2)); + return DCI.DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1), + N0.getOperand(2)); + } + + return SDValue(); } /// PerformVMOVRRDCombine - Target-specific dag combine xforms for diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll index b6126ab51c350..786bf1c2522ce 100644 --- a/llvm/test/CodeGen/ARM/bfi.ll +++ b/llvm/test/CodeGen/ARM/bfi.ll @@ -397,23 +397,21 @@ define void @bfi3_uses(i32 %a, i32 %b) { define i32 @bfi4(i32 %A, i2 zeroext %BB, i32* %d) { ; CHECK-LABEL: bfi4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: lsr r3, r0, #1 -; CHECK-NEXT: mov r12, #96 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: tst r0, #32 -; CHECK-NEXT: movweq r12, #32 -; CHECK-NEXT: bfi r1, r3, #9, #1 -; CHECK-NEXT: lsr r3, r0, #2 -; CHECK-NEXT: bfi r1, r3, #3, #1 -; CHECK-NEXT: bfi r1, r3, #10, #1 +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: lsr r12, r0, #1 ; CHECK-NEXT: and r3, r0, #8 +; CHECK-NEXT: bfi r1, r12, #2, #2 +; CHECK-NEXT: mov lr, #96 +; CHECK-NEXT: tst r0, #32 +; CHECK-NEXT: bfi r1, r12, #9, #2 +; CHECK-NEXT: movweq lr, #32 ; CHECK-NEXT: orr r1, r1, r3, lsl #8 ; CHECK-NEXT: and r3, r0, #64 ; CHECK-NEXT: and r0, r0, #128 -; CHECK-NEXT: orr r1, r1, r12 +; CHECK-NEXT: orr r1, r1, lr ; CHECK-NEXT: orr r1, r1, r3, lsl #1 ; CHECK-NEXT: str r1, [r2] -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r11, pc} entry: %B = zext i2 %BB to i32 %and = and i32 %A, 2 From b77533fb70ac6388955ee34a1d1e96ba05b6b01f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez=20Troiti=C3=B1o?= Date: Thu, 1 Jul 2021 13:36:32 -0700 Subject: [PATCH 462/619] [llvm-strip] Support grouped options in llvm-strip GNU and Apple `strip` implementations seems to support grouped options. Enable the support for grouped options introduced in https://reviews.llvm.org/D83639 for `llvm-strip` invocations. Includes test that checks that both the grouped and non grouped invocations produces the same result. Reviewed By: alexander-shaposhnikov, MaskRay Differential Revision: https://reviews.llvm.org/D105249 --- .../tools/llvm-objcopy/grouped-options.test | 53 +++++++++++++++++++ llvm/tools/llvm-objcopy/ConfigManager.cpp | 6 ++- 2 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 llvm/test/tools/llvm-objcopy/grouped-options.test diff --git a/llvm/test/tools/llvm-objcopy/grouped-options.test b/llvm/test/tools/llvm-objcopy/grouped-options.test new file mode 100644 index 0000000000000..2644030426b09 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/grouped-options.test @@ -0,0 +1,53 @@ +# This test checks that both grouped and ungrouped options (-S -x and -Sx) +# produce exactly the same result given the same input. +# +# RUN: yaml2obj %s -o %t +# RUN: llvm-strip -S -x -o %t-strip-separated %t +# RUN: llvm-strip -Sx -o %t-strip-grouped %t +# RUN: cmp %t-strip-separated %t-strip-grouped + +# RUN: llvm-objcopy -S -x %t %t-objcopy-separated +# RUN: llvm-objcopy -Sx %t %t-objcopy-grouped +# RUN: cmp %t-objcopy-separated %t-objcopy-grouped + +!ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .debugGlobal + Type: SHT_PROGBITS + Content: "00000000" + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1000 + AddressAlign: 0x0000000000000010 + Size: 64 +Symbols: + - Name: Local + Type: STT_FUNC + Section: .text + Value: 0x1000 + Size: 8 + - Name: LocalSection + Type: STT_SECTION + Section: .text + - Name: LocalFile + Type: STT_FILE + - Name: Global + Type: STT_FUNC + Size: 8 + Section: .text + Value: 0x1010 + Binding: STB_GLOBAL + - Name: Weak + Type: STT_FUNC + Size: 8 + Section: .text + Value: 0x1008 + - Name: debugGlobal + Section: .debugGlobal + Binding: STB_GLOBAL diff --git a/llvm/tools/llvm-objcopy/ConfigManager.cpp b/llvm/tools/llvm-objcopy/ConfigManager.cpp index 70939ee896678..2f04e70dd6ffd 100644 --- a/llvm/tools/llvm-objcopy/ConfigManager.cpp +++ b/llvm/tools/llvm-objcopy/ConfigManager.cpp @@ -60,7 +60,9 @@ static const opt::OptTable::Info ObjcopyInfoTable[] = { class ObjcopyOptTable : public opt::OptTable { public: - ObjcopyOptTable() : OptTable(ObjcopyInfoTable) {} + ObjcopyOptTable() : OptTable(ObjcopyInfoTable) { + setGroupedShortOptions(true); + } }; enum InstallNameToolID { @@ -164,7 +166,7 @@ static const opt::OptTable::Info StripInfoTable[] = { class StripOptTable : public opt::OptTable { public: - StripOptTable() : OptTable(StripInfoTable) {} + StripOptTable() : OptTable(StripInfoTable) { setGroupedShortOptions(true); } }; } // namespace From 48088425b37818cb80113d0137c83297390265f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez=20Troiti=C3=B1o?= Date: Thu, 1 Jul 2021 13:37:48 -0700 Subject: [PATCH 463/619] [cmake] Invoke strip without -l and with non-grouped flags. `llvm-strip` does not support `-l`. Apple's `strip` supports `-l`, but it is not documented, and the latest code doesn't seem to do anything meaningful. From the old source code drops it seems that `-l` was added around version 795 of cctools and removed before 898. The code around the flag usage in 795 talks about problems with kext and forcing the execution of `ld -r`, which seems a behaviour that is not enforceable in latest versions of cctools. The `-l` flag was added in https://reviews.llvm.org/D15133 without a lot of explanation. Since the flag is not active, removing it should not modify the behaviour for most people (except if someone is trying to compile LLVM with a really old version of `strip`). Additionally, break the invocation into two different flags, since `llvm-strip` doesn't at the moment support grouped flags, and other `strip` implementations should work the same no matter if grouped or not. Test Plan: Using `strip` from Xcode 12.5 in Big Sur to strip the same binary (a simple Hello World), using both `-Sxl` and `-Sx` produces exactly the same binary. Repeating the same process with `clang` results also in the same binary. Reviewed By: smeenai Differential Revision: https://reviews.llvm.org/D105243 --- llvm/cmake/modules/AddLLVM.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 7e91b77adcab2..ce16a5da4c19b 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -2007,7 +2007,7 @@ function(llvm_externalize_debuginfo name) if(NOT CMAKE_STRIP) set(CMAKE_STRIP xcrun strip) endif() - set(strip_command COMMAND ${CMAKE_STRIP} -Sxl $) + set(strip_command COMMAND ${CMAKE_STRIP} -S -x $) else() set(strip_command COMMAND ${CMAKE_STRIP} -g -x $) endif() From e4b9fecd392fdd135815f2069179a12e4c73347d Mon Sep 17 00:00:00 2001 From: Caitlyn Cano Date: Thu, 1 Jul 2021 20:41:51 +0000 Subject: [PATCH 464/619] [libc] Add minimal Windows config A README file with procedure for building/testing LLVM libc on Windows has also been added. Reviewed By: sivachandra, aeubanks Differential Revision: https://reviews.llvm.org/D105231 --- libc/config/windows/README.md | 76 +++++++++++++++++++++++++++++ libc/config/windows/entrypoints.txt | 44 +++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 libc/config/windows/README.md create mode 100644 libc/config/windows/entrypoints.txt diff --git a/libc/config/windows/README.md b/libc/config/windows/README.md new file mode 100644 index 0000000000000..8e01a409f9247 --- /dev/null +++ b/libc/config/windows/README.md @@ -0,0 +1,76 @@ +# Building and Testing LLVM libc on Windows + +## Setting Up Environment + +To build LLVM libc on Windows, first build Clang using the following steps. + +1. Open Command Prompt in Windows +2. Set TEMP and TMP to a directory. Creating this path is necessary for a + successful clang build. + 1. Create tmp under your preferred directory or under `C:\src`: + + ``` + cd C:\src + mkdir tmp + ``` + + 2. In the start menu, search for "environment variables for your account". + Set TEMP and TMP to `C:\src\tmp` or the corresponding path elsewhere. +3. Download [Visual Studio Community](https://visualstudio.microsoft.com/downloads/). +4. Install [CMake](https://cmake.org/download/) and + [Ninja](https://github.com/ninja-build/ninja/releases). (Optional, included + in Visual Studio). +5. Load the Visual Studio environment variables using this command. This is + crucial as it allows you to use build tools like CMake and Ninja: + + ``` + "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 + ``` + + Note: **Rerun this command every time you open a new Command Prompt + window.** + +6. If you have not used Git before, install + [Git](https://git-scm.com/download/win) for Windows. Check out the LLVM + source tree from Github using: + + ``` + git clone https://github.com/llvm/llvm-project.git + ``` + +7. Ensure you have access to Clang, either by downloading from + [LLVM Download](https://releases.llvm.org/download.html) or + [building it yourself](https://clang.llvm.org/get_started.html). + +## Building LLVM libc + +In this section, Clang will be used to compile LLVM +libc, and finally, build and test the libc. + +8. Create a empty build directory in `C:\src` or your preferred directory and + cd to it using: + + ``` + mkdir libc-build + cd libc-build + ``` + +9. Run the following CMake command to generate build files. LLVM libc must be built + by Clang, so ensure Clang is specified as the C and C++ compiler. + + ``` + cmake -G Ninja ../llvm-project/llvm -DCMAKE_C_COMPILER=C:/src/clang-build/bin/clang-cl.exe -DCMAKE_CXX_COMPILER=C:/src/clang-build/bin/clang-cl.exe -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_FORCE_BUILD_RUNTIME=libc -DLLVM_ENABLE_PROJECTS=libc -DLLVM_NATIVE_ARCH=x86_64 -DLLVM_HOST_TRIPLE=x86_64-window-x86-gnu + ``` + +10. Build LLVM libc using: + + ``` + ninja llvmlibc + + ``` + +11. Run tests using: + + ``` + ninja checklibc + ``` diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt new file mode 100644 index 0000000000000..390f2fdf12146 --- /dev/null +++ b/libc/config/windows/entrypoints.txt @@ -0,0 +1,44 @@ +set(TARGET_LIBC_ENTRYPOINTS + # ctype.h entrypoints + libc.src.ctype.isalnum + libc.src.ctype.isalpha + libc.src.ctype.isascii + libc.src.ctype.isblank + libc.src.ctype.iscntrl + libc.src.ctype.isdigit + libc.src.ctype.isgraph + libc.src.ctype.islower + libc.src.ctype.isprint + libc.src.ctype.ispunct + libc.src.ctype.isspace + libc.src.ctype.isupper + libc.src.ctype.isxdigit + libc.src.ctype.toascii + libc.src.ctype.tolower + libc.src.ctype.toupper + # string.h entrypoints + libc.src.string.bzero + libc.src.string.memchr + libc.src.string.memcmp + libc.src.string.memcpy + libc.src.string.memmove + libc.src.string.memset + libc.src.string.memrchr + libc.src.string.strcat + libc.src.string.strchr + libc.src.string.strcpy + libc.src.string.strcmp + libc.src.string.strcspn + libc.src.string.strlen + libc.src.string.strncpy + libc.src.string.strnlen + libc.src.string.strpbrk + libc.src.string.strrchr + libc.src.string.strspn + libc.src.string.strstr + libc.src.string.strtok + libc.src.string.strtok_r +) +set(TARGET_LLVMLIBC_ENTRYPOINTS + ${TARGET_LIBC_ENTRYPOINTS} +) From 09e3bf01b3da978b5437bf84c0b0f290974d8743 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 1 Jul 2021 13:36:46 -0400 Subject: [PATCH 465/619] [InstCombine][test] add tests for icmp simplify miscompile (PR50944); NFC --- llvm/test/Transforms/InstCombine/icmp.ll | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 554619caa6151..95c006e76ccb8 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -3930,3 +3930,23 @@ bb: %i3 = icmp eq i32 %i, %i2 ret i1 %i3 } + +; PR50944 + +define i1 @thread_cmp_over_select_with_poison_trueval(i1 %b) { +; CHECK-LABEL: @thread_cmp_over_select_with_poison_trueval( +; CHECK-NEXT: ret i1 poison +; + %s = select i1 %b, i32 poison, i32 0 + %tobool = icmp ne i32 %s, 0 + ret i1 %tobool +} + +define i1 @thread_cmp_over_select_with_poison_falseval(i1 %b) { +; CHECK-LABEL: @thread_cmp_over_select_with_poison_falseval( +; CHECK-NEXT: ret i1 poison +; + %s = select i1 %b, i32 1, i32 poison + %tobool = icmp ne i32 %s, 0 + ret i1 %tobool +} From 9eb613b2de3163686b1a4bd1160f15ac56a4b083 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 1 Jul 2021 17:30:04 -0400 Subject: [PATCH 466/619] [InstSimplify] do not propagate poison from select arm to icmp user This is the cause of the miscompile in: https://llvm.org/PR50944 The problem has likely existed for some time, but it was made visible with: 5af8bacc94024 ( D104661 ) handleOtherCmpSelSimplifications() assumed it can convert select of constants to bool logic ops, but that does not work with poison. We had a very similar construct in InstCombine, so the fix here mimics the fix there. The bug is in instsimplify, but I'm not sure how to reproduce it outside of instcombine. The reason this is visible in instcombine is because we have a hack (FIXME) to bypass simplification of a select when it has an icmp user: https://github.com/llvm/llvm-project/blob/955f12589940634acc6c9901e8b25534808f691c/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp#L2632 So we get to an unusual case where we are trying to simplify an instruction that has an operand that would have already simplified if we had processed it in normal order. Differential Revision: https://reviews.llvm.org/D105298 --- llvm/lib/Analysis/InstructionSimplify.cpp | 7 +++++-- llvm/test/Transforms/InstCombine/icmp.ll | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 2dbd3e0f7ad35..f713d5317b8cf 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -189,12 +189,15 @@ static Value *handleOtherCmpSelSimplifications(Value *TCmp, Value *FCmp, // If the false value simplified to false, then the result of the compare // is equal to "Cond && TCmp". This also catches the case when the false // value simplified to false and the true value to true, returning "Cond". - if (match(FCmp, m_Zero())) + // Folding select to and/or isn't poison-safe in general; impliesPoison + // checks whether folding it does not convert a well-defined value into + // poison. + if (match(FCmp, m_Zero()) && impliesPoison(TCmp, Cond)) if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) return V; // If the true value simplified to true, then the result of the compare // is equal to "Cond || FCmp". - if (match(TCmp, m_One())) + if (match(TCmp, m_One()) && impliesPoison(FCmp, Cond)) if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) return V; // Finally, if the false value simplified to true and the true value to diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 95c006e76ccb8..c64f2ac54cd15 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -3935,7 +3935,7 @@ bb: define i1 @thread_cmp_over_select_with_poison_trueval(i1 %b) { ; CHECK-LABEL: @thread_cmp_over_select_with_poison_trueval( -; CHECK-NEXT: ret i1 poison +; CHECK-NEXT: ret i1 false ; %s = select i1 %b, i32 poison, i32 0 %tobool = icmp ne i32 %s, 0 @@ -3944,7 +3944,7 @@ define i1 @thread_cmp_over_select_with_poison_trueval(i1 %b) { define i1 @thread_cmp_over_select_with_poison_falseval(i1 %b) { ; CHECK-LABEL: @thread_cmp_over_select_with_poison_falseval( -; CHECK-NEXT: ret i1 poison +; CHECK-NEXT: ret i1 true ; %s = select i1 %b, i32 1, i32 poison %tobool = icmp ne i32 %s, 0 From edc1f0c12c836abaeeab7b0d9f7e8fb73c233ae6 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Thu, 1 Jul 2021 11:58:54 -0700 Subject: [PATCH 467/619] [libcxx][ranges] Implement indirectly_swappable. Differential Revision: https://reviews.llvm.org/D105304 --- libcxx/docs/OneRangesProposalStatus.csv | 2 +- libcxx/include/__iterator/concepts.h | 3 + libcxx/include/__iterator/iter_swap.h | 10 +++ libcxx/include/iterator | 4 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 4 + ...rator_concept_conformance.compile.pass.cpp | 4 + ...rator_concept_conformance.compile.pass.cpp | 4 + ...rator_concept_conformance.compile.pass.cpp | 4 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 2 + .../indirectly_swappable.compile.pass.cpp | 80 +++++++++++++++++++ ...tly_swappable.subsumption.compile.pass.cpp | 32 ++++++++ ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 2 +- ...rator_concept_conformance.compile.pass.cpp | 3 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 2 + ...rator_concept_conformance.compile.pass.cpp | 1 + ...rator_concept_conformance.compile.pass.cpp | 1 + 37 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 libcxx/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp create mode 100644 libcxx/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.subsumption.compile.pass.cpp diff --git a/libcxx/docs/OneRangesProposalStatus.csv b/libcxx/docs/OneRangesProposalStatus.csv index 0b6ca9ed127c7..0bfb00d441846 100644 --- a/libcxx/docs/OneRangesProposalStatus.csv +++ b/libcxx/docs/OneRangesProposalStatus.csv @@ -61,7 +61,7 @@ Section,Description,Dependencies,Assignee,Complete | indirectly_copyable | indirectly_copyable_storable",[iterator.concepts],Zoe Carver,In progress [common.alg.req]: pt. 2,indirectly_swappable,"| [iterator.concepts] -| [iterator.cust.swap]",Louis Dionne,Not started +| [iterator.cust.swap]",Zoe Carver,✅ [common.alg.req]: pt. 3,indirectly_comparable,[projected],Louis Dionne,Not started [common.alg.req]: pt. 4,"| permutable | mergeable diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h index e3664db14ae26..94e5f5d0a6cc4 100644 --- a/libcxx/include/__iterator/concepts.h +++ b/libcxx/include/__iterator/concepts.h @@ -249,6 +249,9 @@ concept indirectly_movable_storable = constructible_from, iter_rvalue_reference_t<_In>> && assignable_from&, iter_rvalue_reference_t<_In>>; +// Note: indirectly_swappable is located in iter_swap.h to prevent a dependency cycle +// (both iter_swap and indirectly_swappable require indirectly_readable). + // clang-format on #endif // !defined(_LIBCPP_HAS_NO_RANGES) diff --git a/libcxx/include/__iterator/iter_swap.h b/libcxx/include/__iterator/iter_swap.h index a529472e2a13d..17153728f0846 100644 --- a/libcxx/include/__iterator/iter_swap.h +++ b/libcxx/include/__iterator/iter_swap.h @@ -85,6 +85,16 @@ inline namespace __cpo { } // namespace ranges +template +concept indirectly_swappable = + indirectly_readable<_I1> && indirectly_readable<_I2> && + requires(const _I1 __i1, const _I2 __i2) { + ranges::iter_swap(__i1, __i1); + ranges::iter_swap(__i2, __i2); + ranges::iter_swap(__i1, __i2); + ranges::iter_swap(__i2, __i1); + }; + #endif // !defined(_LIBCPP_HAS_NO_RANGES) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/iterator b/libcxx/include/iterator index b4e15c283d935..f6b0d2ae0a963 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -132,6 +132,10 @@ template template concept indirectly_movable_storable = see below; // since C++20 +// [alg.req.ind.swap], concept indirectly_swappable +template + concept indirectly_swappable = see below; // since C++20 + template struct iterator // deprecated in C++17 diff --git a/libcxx/test/std/containers/associative/map/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/map/iterator_concept_conformance.compile.pass.cpp index 43fa91d8a451a..08be1b48725e7 100644 --- a/libcxx/test/std/containers/associative/map/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/map/iterator_concept_conformance.compile.pass.cpp @@ -35,6 +35,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert( std::indirectly_movable*>); static_assert(!std::indirectly_movable_storable*>); +static_assert(!std::indirectly_swappable); static_assert(std::bidirectional_iterator); static_assert(!std::random_access_iterator); @@ -47,3 +48,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/associative/multimap/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/multimap/iterator_concept_conformance.compile.pass.cpp index 539d01e002ef0..d768e3152d287 100644 --- a/libcxx/test/std/containers/associative/multimap/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/iterator_concept_conformance.compile.pass.cpp @@ -35,6 +35,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert( std::indirectly_movable*>); static_assert(!std::indirectly_movable_storable*>); +static_assert(!std::indirectly_swappable); static_assert(std::bidirectional_iterator); static_assert(!std::random_access_iterator); @@ -47,3 +48,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/associative/multiset/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/multiset/iterator_concept_conformance.compile.pass.cpp index 0b44c3ed40694..61a200cfb7a92 100644 --- a/libcxx/test/std/containers/associative/multiset/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/iterator_concept_conformance.compile.pass.cpp @@ -35,6 +35,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); static_assert(std::bidirectional_iterator); static_assert(!std::random_access_iterator); @@ -47,3 +48,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/associative/set/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/associative/set/iterator_concept_conformance.compile.pass.cpp index 76b42fef33943..5bcf23d10906e 100644 --- a/libcxx/test/std/containers/associative/set/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/associative/set/iterator_concept_conformance.compile.pass.cpp @@ -35,6 +35,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); static_assert(std::bidirectional_iterator); static_assert(!std::random_access_iterator); @@ -47,3 +48,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/sequences/array/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/array/iterator_concept_conformance.compile.pass.cpp index 29a0f3ade42a8..24c13d25d8eea 100644 --- a/libcxx/test/std/containers/sequences/array/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/array/iterator_concept_conformance.compile.pass.cpp @@ -39,6 +39,7 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert( std::indirectly_swappable); static_assert(std::contiguous_iterator); static_assert(!std::indirectly_writable); @@ -58,3 +59,4 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/sequences/deque/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/deque/iterator_concept_conformance.compile.pass.cpp index 1b1bf60272f2e..c66b818c2862a 100644 --- a/libcxx/test/std/containers/sequences/deque/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/iterator_concept_conformance.compile.pass.cpp @@ -41,6 +41,7 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(std::indirectly_swappable); static_assert(std::random_access_iterator); static_assert(!std::contiguous_iterator); @@ -61,3 +62,4 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/iterator_concept_conformance.compile.pass.cpp index 8caa26e1fde3f..bf4cc81a0cdfb 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/iterator_concept_conformance.compile.pass.cpp @@ -31,6 +31,7 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -43,3 +44,4 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/sequences/list/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/list/iterator_concept_conformance.compile.pass.cpp index 2a4ffe57d9000..bf0553107a88c 100644 --- a/libcxx/test/std/containers/sequences/list/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/iterator_concept_conformance.compile.pass.cpp @@ -41,6 +41,7 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(std::indirectly_swappable); static_assert(std::bidirectional_iterator); static_assert(!std::random_access_iterator); @@ -61,3 +62,4 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/sequences/vector.bool/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/iterator_concept_conformance.compile.pass.cpp index 037383af0308f..d051f7e36645a 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/iterator_concept_conformance.compile.pass.cpp @@ -37,6 +37,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(std::indirectly_swappable); static_assert( std::random_access_iterator); static_assert( std::random_access_iterator); @@ -51,3 +52,4 @@ static_assert( std::sized_sentinel_for); static_assert( std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/sequences/vector/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/iterator_concept_conformance.compile.pass.cpp index 3af726723176b..3280402a620cc 100644 --- a/libcxx/test/std/containers/sequences/vector/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/iterator_concept_conformance.compile.pass.cpp @@ -42,6 +42,7 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(std::indirectly_swappable); static_assert( std::contiguous_iterator); static_assert( std::random_access_iterator); @@ -63,3 +64,4 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/unord/unord.map/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/iterator_concept_conformance.compile.pass.cpp index 7f8403029f2d9..b2bcabf1a7afb 100644 --- a/libcxx/test/std/containers/unord/unord.map/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/iterator_concept_conformance.compile.pass.cpp @@ -35,6 +35,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable*>); static_assert(!std::indirectly_movable_storable*>); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -47,6 +48,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -61,6 +63,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable*>); static_assert(!std::indirectly_movable_storable*>); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -73,3 +76,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/unord/unord.multimap/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/iterator_concept_conformance.compile.pass.cpp index b5afdc11a2f7e..2223ce7a0ee76 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/iterator_concept_conformance.compile.pass.cpp @@ -35,6 +35,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable*>); static_assert(!std::indirectly_movable_storable*>); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -47,6 +48,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -61,6 +63,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable*>); static_assert(!std::indirectly_movable_storable*>); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -73,3 +76,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/unord/unord.multiset/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/iterator_concept_conformance.compile.pass.cpp index eb318ca19e954..9063d604b059d 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/iterator_concept_conformance.compile.pass.cpp @@ -35,6 +35,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -47,6 +48,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -61,6 +63,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -73,3 +76,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/unord/unord.set/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/iterator_concept_conformance.compile.pass.cpp index db4d5c1159897..d77df390de576 100644 --- a/libcxx/test/std/containers/unord/unord.set/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/iterator_concept_conformance.compile.pass.cpp @@ -35,6 +35,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -47,6 +48,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -60,6 +62,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); static_assert(std::forward_iterator); static_assert(!std::bidirectional_iterator); @@ -72,3 +75,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/containers/views/span.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/views/span.iterators/iterator_concept_conformance.compile.pass.cpp index bbbddcd211e33..a64f8a69c136b 100644 --- a/libcxx/test/std/containers/views/span.iterators/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/containers/views/span.iterators/iterator_concept_conformance.compile.pass.cpp @@ -28,3 +28,4 @@ static_assert(std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(std::indirectly_swappable); diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/iterator_concept_conformance.compile.pass.cpp index 1d3016ca9833b..ac168a08aab2a 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/iterator_concept_conformance.compile.pass.cpp @@ -26,6 +26,7 @@ static_assert(std::sentinel_for) static_assert(!std::sized_sentinel_for); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); static_assert(std::input_iterator); static_assert(!std::forward_iterator); @@ -35,3 +36,4 @@ static_assert(std::sentinel_for); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp new file mode 100644 index 0000000000000..d91d81e2fe888 --- /dev/null +++ b/libcxx/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.compile.pass.cpp @@ -0,0 +1,80 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: gcc-10 + +// template +// concept indirectly_swappable; + +#include + +#include "test_macros.h" + +template +struct PointerTo { + using value_type = ValueType; + T& operator*() const; +}; + +static_assert(std::indirectly_swappable>); +static_assert(std::indirectly_swappable, PointerTo>); + +struct B; + +struct A { + friend void iter_swap(const PointerTo&, const PointerTo&); +}; + +// Is indirectly swappable. +struct B { + friend void iter_swap(const PointerTo&, const PointerTo&); + friend void iter_swap(const PointerTo&, const PointerTo&); + friend void iter_swap(const PointerTo&, const PointerTo&); +}; + +// Valid except ranges::iter_swap(i2, i1). +struct C { + friend void iter_swap(const PointerTo&, const PointerTo&); + friend void iter_swap(const PointerTo&, const PointerTo&); + friend void iter_swap(const PointerTo&, const PointerTo&) = delete; +}; + +// Valid except ranges::iter_swap(i1, i2). +struct D { + friend void iter_swap(const PointerTo&, const PointerTo&); + friend void iter_swap(const PointerTo&, const PointerTo&) = delete; + friend void iter_swap(const PointerTo&, const PointerTo&); +}; + +// Valid except ranges::iter_swap(i2, i2). +struct E { + E operator=(const E&) = delete; + friend void iter_swap(const PointerTo&, const PointerTo&) = delete; + friend void iter_swap(const PointerTo&, const PointerTo&); + friend void iter_swap(const PointerTo&, const PointerTo&); +}; + +struct F { + friend void iter_swap(const PointerTo&, const PointerTo&) = delete; +}; + +// Valid except ranges::iter_swap(i1, i1). +struct G { + friend void iter_swap(const PointerTo&, const PointerTo&); + friend void iter_swap(const PointerTo&, const PointerTo&); + friend void iter_swap(const PointerTo&, const PointerTo&); +}; + + +static_assert( std::indirectly_swappable, PointerTo>); +static_assert(!std::indirectly_swappable, PointerTo>); +static_assert(!std::indirectly_swappable, PointerTo>); +static_assert(!std::indirectly_swappable, PointerTo>); +static_assert(!std::indirectly_swappable, PointerTo>); diff --git a/libcxx/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.subsumption.compile.pass.cpp b/libcxx/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.subsumption.compile.pass.cpp new file mode 100644 index 0000000000000..adbc6fa26d517 --- /dev/null +++ b/libcxx/test/std/iterators/iterator.requirements/alg.req.ind.swap/indirectly_swappable.subsumption.compile.pass.cpp @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: gcc-10 + +// template +// concept indirectly_swappable; + +#include + +#include + +template + requires std::indirectly_readable && std::indirectly_readable +constexpr bool indirectly_swappable_subsumption() { + return false; +} + +template + requires std::indirectly_swappable +constexpr bool indirectly_swappable_subsumption() { + return true; +} + +static_assert(indirectly_swappable_subsumption()); diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/iterator_concept_conformance.compile.pass.cpp index ea04ea8482b22..536f1df2249ae 100644 --- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/iterator_concept_conformance.compile.pass.cpp @@ -26,3 +26,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::input_iterator); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/iterator_concept_conformance.compile.pass.cpp index 13e8b0352baf9..ec488240c7e3c 100644 --- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/iterator_concept_conformance.compile.pass.cpp @@ -26,3 +26,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::input_iterator); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iterator/iterator_concept_conformance.compile.pass.cpp index b73ca79491474..3f95ccd99a504 100644 --- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iterator/iterator_concept_conformance.compile.pass.cpp @@ -25,3 +25,4 @@ static_assert(!std::sentinel_for); static_assert(!std::input_iterator); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/iterator_concept_conformance.compile.pass.cpp index bd28036dc6de3..4f01334412b92 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iterator/iterator_concept_conformance.compile.pass.cpp @@ -16,7 +16,6 @@ using iterator = std::move_iterator; - static_assert(std::input_iterator); static_assert(!std::forward_iterator); static_assert(!std::indirectly_writable); @@ -25,3 +24,4 @@ static_assert(std::sentinel_for); static_assert(std::sized_sentinel_for); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/iterator_concept_conformance.compile.pass.cpp index 687d37db30cd8..25e208e27153a 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/iterator_concept_conformance.compile.pass.cpp @@ -32,6 +32,7 @@ static_assert(!std::random_access_iterator); static_assert(!std::sized_sentinel_for); static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); +static_assert( std::indirectly_swappable); using reverse_random_access_iterator = std::reverse_iterator>; static_assert(common_reverse_iterator_checks()); @@ -40,6 +41,7 @@ static_assert(!std::contiguous_iterator); static_assert(std::sized_sentinel_for); static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); +static_assert( std::indirectly_swappable); using reverse_contiguous_iterator = std::reverse_iterator>; static_assert(common_reverse_iterator_checks()); @@ -48,3 +50,4 @@ static_assert(!std::contiguous_iterator); static_assert(std::sized_sentinel_for); static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); +static_assert( std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/stream.iterators/istream.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istream.iterator/iterator_concept_conformance.compile.pass.cpp index 8bb5e94218f42..e4116cb67b2c0 100644 --- a/libcxx/test/std/iterators/stream.iterators/istream.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/istream.iterator/iterator_concept_conformance.compile.pass.cpp @@ -26,3 +26,4 @@ static_assert(!std::sized_sentinel_for); static_assert(std::input_iterator); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/iterator_concept_conformance.compile.pass.cpp index 2e529ca1fe2a1..cd4fe7690eb70 100644 --- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/iterator_concept_conformance.compile.pass.cpp @@ -27,3 +27,4 @@ static_assert(std::sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/iterator_concept_conformance.compile.pass.cpp index 068692e637742..9ce64a71a3bf2 100644 --- a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/iterator_concept_conformance.compile.pass.cpp @@ -26,3 +26,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::input_iterator); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/iterator_concept_conformance.compile.pass.cpp index 2b2d0f9cbdd97..18c3608d77fe5 100644 --- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/iterator_concept_conformance.compile.pass.cpp @@ -27,3 +27,4 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::input_iterator); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/re/re.iter/re.regiter/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/re/re.iter/re.regiter/iterator_concept_conformance.compile.pass.cpp index 7818c475330c6..d13f770c241dd 100644 --- a/libcxx/test/std/re/re.iter/re.regiter/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/re/re.iter/re.regiter/iterator_concept_conformance.compile.pass.cpp @@ -23,3 +23,4 @@ static_assert(std::sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/re/re.iter/re.tokiter/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/re/re.iter/re.tokiter/iterator_concept_conformance.compile.pass.cpp index b1ed97a9173b7..a5c40e54c1817 100644 --- a/libcxx/test/std/re/re.iter/re.tokiter/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/re/re.iter/re.tokiter/iterator_concept_conformance.compile.pass.cpp @@ -23,3 +23,4 @@ static_assert(std::sentinel_for); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/strings/basic.string/string.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/iterator_concept_conformance.compile.pass.cpp index 6efb5ae836b5b..98b0cec9c7662 100644 --- a/libcxx/test/std/strings/basic.string/string.iterators/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.iterators/iterator_concept_conformance.compile.pass.cpp @@ -40,6 +40,7 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert( std::indirectly_swappable); static_assert(std::contiguous_iterator); static_assert(!std::indirectly_writable); @@ -59,3 +60,4 @@ static_assert( std::indirectly_movable); static_assert( std::indirectly_movable_storable); static_assert(!std::indirectly_movable); static_assert(!std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/strings/string.view/string.view.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/strings/string.view/string.view.iterators/iterator_concept_conformance.compile.pass.cpp index fb12fa84491a1..0dfce8ea8b0c6 100644 --- a/libcxx/test/std/strings/string.view/string.view.iterators/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.iterators/iterator_concept_conformance.compile.pass.cpp @@ -33,6 +33,7 @@ static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(std::indirectly_movable); static_assert(std::indirectly_movable_storable); +static_assert(!std::indirectly_swappable); static_assert(std::contiguous_iterator); static_assert(!std::indirectly_writable); @@ -44,3 +45,4 @@ static_assert(std::sized_sentinel_for); static_assert(std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); static_assert(!std::sized_sentinel_for); +static_assert(!std::indirectly_swappable); diff --git a/libcxx/test/std/utilities/memory/unique.ptr/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/utilities/memory/unique.ptr/iterator_concept_conformance.compile.pass.cpp index 68c06bb8168ff..9a64d0f414cbf 100644 --- a/libcxx/test/std/utilities/memory/unique.ptr/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/utilities/memory/unique.ptr/iterator_concept_conformance.compile.pass.cpp @@ -21,6 +21,7 @@ static_assert(std::indirectly_writable, int>); static_assert(!std::weakly_incrementable >); static_assert(std::indirectly_movable, std::unique_ptr>); static_assert(std::indirectly_movable_storable, std::unique_ptr>); +static_assert(std::indirectly_swappable, std::unique_ptr >); static_assert(!std::indirectly_readable >); static_assert(!std::indirectly_writable, void>); diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/iterator_concept_conformance.compile.pass.cpp index 4fe7710f85ba4..df8df75aa315a 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/iterator_concept_conformance.compile.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/iterator_concept_conformance.compile.pass.cpp @@ -21,6 +21,7 @@ static_assert(std::indirectly_writable, int>); static_assert(!std::weakly_incrementable >); static_assert(std::indirectly_movable, std::shared_ptr>); static_assert(std::indirectly_movable_storable, std::shared_ptr>); +static_assert(std::indirectly_swappable, std::shared_ptr >); static_assert(!std::indirectly_readable >); static_assert(!std::indirectly_writable, void>); From f1b9ce2736d826df2ff4cbd67116864f0856334f Mon Sep 17 00:00:00 2001 From: Hansang Bae Date: Wed, 30 Jun 2021 14:01:04 -0500 Subject: [PATCH 468/619] [OpenMP] Fix a few issues with hidden helper task This patch includes the following changes to address a few issues when using hidden helper task. - Assertion is triggered when there are inadvertent calls to hidden helper functions on non-Linux OS - Added deinit code in __kmp_internal_end_library function to fix random shutdown crashes - Moved task data access into the lock-guarded region in __kmp_push_task Differential Revision: https://reviews.llvm.org/D105308 --- openmp/runtime/src/kmp_runtime.cpp | 16 ++++++++++-- openmp/runtime/src/kmp_tasking.cpp | 4 ++- openmp/runtime/src/z_Linux_util.cpp | 40 +++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 414e9ba4e36d7..f6a53825f2d10 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -6204,6 +6204,16 @@ void __kmp_internal_end_library(int gtid_req) { return; } + // If hidden helper team has been initialized, we need to deinit it + if (TCR_4(__kmp_init_hidden_helper) && + !TCR_4(__kmp_hidden_helper_team_done)) { + TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); + // First release the main thread to let it continue its work + __kmp_hidden_helper_main_thread_release(); + // Wait until the hidden helper team has been destroyed + __kmp_hidden_helper_threads_deinitz_wait(); + } + KMP_MB(); /* Flush all pending memory write invalidates. */ /* find out who we are and what we should do */ { @@ -6317,7 +6327,8 @@ void __kmp_internal_end_thread(int gtid_req) { } // If hidden helper team has been initialized, we need to deinit it - if (TCR_4(__kmp_init_hidden_helper)) { + if (TCR_4(__kmp_init_hidden_helper) && + !TCR_4(__kmp_hidden_helper_team_done)) { TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE); // First release the main thread to let it continue its work __kmp_hidden_helper_main_thread_release(); @@ -8697,11 +8708,12 @@ void __kmp_omp_display_env(int verbose) { // Globals and functions for hidden helper task kmp_info_t **__kmp_hidden_helper_threads; kmp_info_t *__kmp_hidden_helper_main_thread; -kmp_int32 __kmp_hidden_helper_threads_num = 8; std::atomic __kmp_unexecuted_hidden_helper_tasks; #if KMP_OS_LINUX +kmp_int32 __kmp_hidden_helper_threads_num = 8; kmp_int32 __kmp_enable_hidden_helper = TRUE; #else +kmp_int32 __kmp_hidden_helper_threads_num = 0; kmp_int32 __kmp_enable_hidden_helper = FALSE; #endif diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 62f0bdca4be93..7dfd256801b5a 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -436,10 +436,12 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); + auto hidden_helper = taskdata->td_flags.hidden_helper; + __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); // Signal one worker thread to execute the task - if (taskdata->td_flags.hidden_helper) { + if (UNLIKELY(hidden_helper)) { // Wake hidden helper threads up if they're sleeping __kmp_hidden_helper_worker_thread_signal(); } diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index bd50987a857ac..5c2486904a76e 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -25,7 +25,9 @@ #include #endif #include // HUGE_VAL. +#if KMP_OS_LINUX #include +#endif // KMP_OS_LINUX #include #include #include @@ -2468,6 +2470,7 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, #endif +#if KMP_OS_LINUX // Functions for hidden helper task namespace { // Condition variable for initializing hidden helper team @@ -2628,5 +2631,42 @@ void __kmp_hidden_helper_threads_deinitz_release() { status = pthread_mutex_unlock(&hidden_helper_threads_deinitz_lock); KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); } +#else // KMP_OS_LINUX +void __kmp_hidden_helper_worker_thread_wait() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} + +void __kmp_do_initialize_hidden_helper_threads() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} + +void __kmp_hidden_helper_threads_initz_wait() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} + +void __kmp_hidden_helper_initz_release() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} + +void __kmp_hidden_helper_main_thread_wait() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} + +void __kmp_hidden_helper_main_thread_release() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} + +void __kmp_hidden_helper_worker_thread_signal() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} + +void __kmp_hidden_helper_threads_deinitz_wait() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} + +void __kmp_hidden_helper_threads_deinitz_release() { + KMP_ASSERT(0 && "Hidden helper task is not supported on this OS"); +} +#endif // KMP_OS_LINUX // end of file // From 64a0241d64c274eb40fc1cf8b9f938ca013873a1 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Thu, 1 Jul 2021 18:12:30 -0400 Subject: [PATCH 469/619] [libc++] IWYU <__utility/pair.h> in <__functional/hash.h>. NFCI. This was the only thing preventing any one of our detail headers from compiling on its own. --- libcxx/include/__functional/hash.h | 1 + 1 file changed, 1 insertion(+) diff --git a/libcxx/include/__functional/hash.h b/libcxx/include/__functional/hash.h index e169ebc6ea598..eb715e4b9c890 100644 --- a/libcxx/include/__functional/hash.h +++ b/libcxx/include/__functional/hash.h @@ -14,6 +14,7 @@ #include <__tuple> #include <__utility/forward.h> #include <__utility/move.h> +#include <__utility/pair.h> #include <__utility/swap.h> #include #include From 6bbbd7b499f2d5e1d716f33fdf5c072083f007da Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 1 Jul 2021 20:57:53 +0000 Subject: [PATCH 470/619] Update MLIRContext to allow injecting an external ThreadPool (NFC) The context can be created with threading disabled, to avoid creating a thread pool that may be destroyed when injecting another one later. Differential Revision: https://reviews.llvm.org/D105302 --- mlir/include/mlir/IR/MLIRContext.h | 30 ++++++++++++++-- mlir/lib/IR/MLIRContext.cpp | 57 +++++++++++++++++++++++------- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h index 7b0fcd66dc2b0..196c6ef441a6a 100644 --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -38,11 +38,27 @@ class StorageUniquer; /// a very generic name ("Context") and because it is uncommon for clients to /// interact with it. /// +/// The context wrap some multi-threading facilities, and in particular by +/// default it will implicitly create a thread pool. +/// This can be undesirable if multiple context exists at the same time or if a +/// process will be long-lived and create and destroy contexts. +/// To control better thread spawning, an externally owned ThreadPool can be +/// injected in the context. For example: +/// +/// llvm::ThreadPool myThreadPool; +/// while (auto *request = nextCompilationRequests()) { +/// MLIRContext ctx(registry, MLIRContext::Threading::DISABLED); +/// ctx.setThreadPool(myThreadPool); +/// processRequest(request, cxt); +/// } +/// class MLIRContext { public: + enum class Threading { DISABLED, ENABLED }; /// Create a new Context. - explicit MLIRContext(); - explicit MLIRContext(const DialectRegistry ®istry); + explicit MLIRContext(Threading multithreading = Threading::ENABLED); + explicit MLIRContext(const DialectRegistry ®istry, + Threading multithreading = Threading::ENABLED); ~MLIRContext(); /// Return information about all IR dialects loaded in the context. @@ -118,7 +134,15 @@ class MLIRContext { disableMultithreading(!enable); } - /// Return the thread pool owned by this context. This method requires that + /// Set a new thread pool to be used in this context. This method requires + /// that multithreading is disabled for this context prior to the call. This + /// allows to share a thread pool across multiple contexts, as well as + /// decoupling the lifetime of the threads from the contexts. The thread pool + /// must outlive the context. Multi-threading will be enabled as part of this + /// method. + void setThreadPool(llvm::ThreadPool &pool); + + /// Return the thread pool used by this context. This method requires that /// multithreading be enabled within the context, and should generally not be /// used directly. Users should instead prefer the threading utilities within /// Threading.h. diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index ddb909949cdfc..7e4ec8261a2fc 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -261,8 +261,15 @@ class MLIRContextImpl { // Other //===--------------------------------------------------------------------===// - /// The thread pool to use when processing MLIR tasks in parallel. - llvm::Optional threadPool; + /// This points to the ThreadPool used when processing MLIR tasks in parallel. + /// It can't be nullptr when multi-threading is enabled. Otherwise if + /// multi-threading is disabled, and the threadpool wasn't externally provided + /// using `setThreadPool`, this will be nullptr. + llvm::ThreadPool *threadPool = nullptr; + + /// In case where the thread pool is owned by the context, this ensures + /// destruction with the context. + std::unique_ptr ownedThreadPool; /// This is a list of dialects that are created referring to this context. /// The MLIRContext owns the objects. @@ -334,9 +341,13 @@ class MLIRContextImpl { StringAttr emptyStringAttr; public: - MLIRContextImpl() : identifiers(identifierAllocator) { - if (threadingIsEnabled) - threadPool.emplace(); + MLIRContextImpl(bool threadingIsEnabled) + : threadingIsEnabled(threadingIsEnabled), + identifiers(identifierAllocator) { + if (threadingIsEnabled) { + ownedThreadPool = std::make_unique(); + threadPool = ownedThreadPool.get(); + } } ~MLIRContextImpl() { for (auto typeMapping : registeredTypes) @@ -347,10 +358,11 @@ class MLIRContextImpl { }; } // end namespace mlir -MLIRContext::MLIRContext() : MLIRContext(DialectRegistry()) {} +MLIRContext::MLIRContext(Threading setting) + : MLIRContext(DialectRegistry(), setting) {} -MLIRContext::MLIRContext(const DialectRegistry ®istry) - : impl(new MLIRContextImpl) { +MLIRContext::MLIRContext(const DialectRegistry ®istry, Threading setting) + : impl(new MLIRContextImpl(setting == Threading::ENABLED)) { // Initialize values based on the command line flags if they were provided. if (clOptions.isConstructed()) { disableMultithreading(clOptions->disableThreading); @@ -579,15 +591,36 @@ void MLIRContext::disableMultithreading(bool disable) { // Destroy thread pool (stop all threads) if it is no longer needed, or create // a new one if multithreading was re-enabled. - if (!impl->threadingIsEnabled) - impl->threadPool.reset(); - else if (!impl->threadPool.hasValue()) - impl->threadPool.emplace(); + if (disable) { + // If the thread pool is owned, explicitly set it to nullptr to avoid + // keeping a dangling pointer around. If the thread pool is externally + // owned, we don't do anything. + if (impl->ownedThreadPool) { + assert(impl->threadPool); + impl->threadPool = nullptr; + impl->ownedThreadPool.reset(); + } + } else if (!impl->threadPool) { + // The thread pool isn't externally provided. + assert(!impl->ownedThreadPool); + impl->ownedThreadPool = std::make_unique(); + impl->threadPool = impl->ownedThreadPool.get(); + } +} + +void MLIRContext::setThreadPool(llvm::ThreadPool &pool) { + assert(!isMultithreadingEnabled() && + "expected multi-threading to be disabled when setting a ThreadPool"); + impl->threadPool = &pool; + impl->ownedThreadPool.reset(); + enableMultithreading(); } llvm::ThreadPool &MLIRContext::getThreadPool() { assert(isMultithreadingEnabled() && "expected multi-threading to be enabled within the context"); + assert(impl->threadPool && + "multi-threading is enabled but threadpool not set"); return *impl->threadPool; } From 14d64be6e54a23e1a20216b6a42ae2ce5926d2ed Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Thu, 1 Jul 2021 08:25:30 -0700 Subject: [PATCH 471/619] [GISel] Print better error messages for missing Combiner Observer calls Differential revision: https://reviews.llvm.org/D105290 --- llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp | 19 ++++++++++++++++--- llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 12 +++++++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index f146271718ee7..f9bfe8518083c 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -260,8 +260,17 @@ void GISelCSEInfo::releaseMemory() { #endif } +#ifndef NDEBUG +static const char *stringify(const MachineInstr *MI, std::string &S) { + raw_string_ostream OS(S); + OS << *MI; + return OS.str().c_str(); +} +#endif + Error GISelCSEInfo::verify() { #ifndef NDEBUG + std::string S1, S2; handleRecordedInsts(); // For each instruction in map from MI -> UMI, // Profile(MI) and make sure UMI is found for that profile. @@ -274,7 +283,8 @@ Error GISelCSEInfo::verify() { if (FoundNode != It.second) return createStringError(std::errc::not_supported, "CSEMap mismatch, InstrMapping has MIs without " - "corresponding Nodes in CSEMap"); + "corresponding Nodes in CSEMap:\n%s", + stringify(It.second->MI, S1)); } // For every node in the CSEMap, make sure that the InstrMapping @@ -282,11 +292,14 @@ Error GISelCSEInfo::verify() { for (const UniqueMachineInstr &UMI : CSEMap) { if (!InstrMapping.count(UMI.MI)) return createStringError(std::errc::not_supported, - "Node in CSE without InstrMapping", UMI.MI); + "Node in CSE without InstrMapping:\n%s", + stringify(UMI.MI, S1)); if (InstrMapping[UMI.MI] != &UMI) return createStringError(std::make_error_code(std::errc::not_supported), - "Mismatch in CSE mapping"); + "Mismatch in CSE mapping:\n%s\n%s", + stringify(InstrMapping[UMI.MI]->MI, S1), + stringify(UMI.MI, S2)); } #endif return Error::success(); diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index f1071d96e5a36..6f103bca6892f 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -153,8 +153,14 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, MFChanged |= Changed; } while (Changed); - assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) && - "CSEInfo is not consistent. Likely missing calls to " - "observer on mutations")); +#ifndef NDEBUG + if (CSEInfo) { + if (auto E = CSEInfo->verify()) { + errs() << E << '\n'; + assert(false && "CSEInfo is not consistent. Likely missing calls to " + "observer on mutations."); + } + } +#endif return MFChanged; } From 33a7b4d9d8e6a113108aa71ed78ca32a83c68523 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Wed, 30 Jun 2021 00:19:04 -0700 Subject: [PATCH 472/619] [InstrProfiling] Use external weak reference for bias variable We need the compiler generated variable to override the weak symbol of the same name inside the profile runtime, but using LinkOnceODRLinkage results in weak symbol being emitted which leads to an issue where the linker might choose either of the weak symbols potentially disabling the runtime counter relocation. This change replaces the use of weak definition inside the runtime with an external weak reference to address the issue. We also place the compiler generated symbol inside a COMDAT group so dead definition can be garbage collected by the linker. Differential Revision: https://reviews.llvm.org/D105176 --- compiler-rt/lib/profile/CMakeLists.txt | 1 - compiler-rt/lib/profile/InstrProfiling.h | 8 ++++---- compiler-rt/lib/profile/InstrProfilingBiasVar.c | 15 --------------- compiler-rt/lib/profile/InstrProfilingFile.c | 4 +++- .../lib/profile/InstrProfilingPlatformFuchsia.c | 7 +++---- .../Transforms/Instrumentation/InstrProfiling.cpp | 9 +++++++++ .../InstrProfiling/runtime-counter-relocation.ll | 3 ++- 7 files changed, 21 insertions(+), 26 deletions(-) delete mode 100644 compiler-rt/lib/profile/InstrProfilingBiasVar.c diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt index f899e402d9222..f5e13574b7ce8 100644 --- a/compiler-rt/lib/profile/CMakeLists.txt +++ b/compiler-rt/lib/profile/CMakeLists.txt @@ -53,7 +53,6 @@ set(PROFILE_SOURCES InstrProfiling.c InstrProfilingInternal.c InstrProfilingValue.c - InstrProfilingBiasVar.c InstrProfilingBuffer.c InstrProfilingFile.c InstrProfilingMerge.c diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 39fe4db73da62..1c0e8f3c5c8ca 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -320,10 +320,10 @@ extern uint64_t INSTR_PROF_RAW_VERSION_VAR; /* __llvm_profile_raw_version */ extern char INSTR_PROF_PROFILE_NAME_VAR[1]; /* __llvm_profile_filename. */ /*! - * This variable is a weak symbol defined in InstrProfilingBiasVar.c. It - * allows compiler instrumentation to provide overriding definition with - * value from compiler command line. This variable has hidden visibility. + * This variable is a weak external reference which could be used to detect + * whether or not the compiler defined this symbol. */ -COMPILER_RT_VISIBILITY extern intptr_t __llvm_profile_counter_bias; +COMPILER_RT_VISIBILITY COMPILER_RT_WEAK extern intptr_t + __llvm_profile_counter_bias; #endif /* PROFILE_INSTRPROFILING_H_ */ diff --git a/compiler-rt/lib/profile/InstrProfilingBiasVar.c b/compiler-rt/lib/profile/InstrProfilingBiasVar.c deleted file mode 100644 index 05745fd858d97..0000000000000 --- a/compiler-rt/lib/profile/InstrProfilingBiasVar.c +++ /dev/null @@ -1,15 +0,0 @@ -/*===- InstrProfilingBiasVar.c - profile counter bias variable setup ------===*\ -|* -|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -|* See https://llvm.org/LICENSE.txt for license information. -|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -|* -\*===----------------------------------------------------------------------===*/ - -#include "InstrProfiling.h" - -/* The runtime should only provide its own definition of this symbol when the - * user has not specified one. Set this up by moving the runtime's copy of this - * symbol to an object file within the archive. - */ -COMPILER_RT_WEAK intptr_t __llvm_profile_counter_bias = -1; diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 420e8246f4337..d88531cbcb633 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -999,7 +999,9 @@ void __llvm_profile_initialize_file(void) { ProfileNameSpecifier PNS = PNS_unknown; int hasCommandLineOverrider = (INSTR_PROF_PROFILE_NAME_VAR[0] != 0); - if (__llvm_profile_counter_bias != -1) + /* This symbol is defined by the compiler when runtime counter relocation is + * used and runtime provides a weak external reference so we can check it. */ + if (&__llvm_profile_counter_bias) lprofSetRuntimeCounterRelocation(1); EnvFilenamePat = getFilenamePatFromEnv(); diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c index 8bd5e969aa50c..31f3e11a072e1 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c @@ -116,10 +116,9 @@ void __llvm_profile_initialize(void) { return; } - /* This symbol is defined as weak and initialized to -1 by the runtimer, but - * compiler will generate a strong definition initialized to 0 when runtime - * counter relocation is used. */ - if (__llvm_profile_counter_bias == -1) { + /* This symbol is defined by the compiler when runtime counter relocation is + * used and runtime provides a weak external reference so we can check it. */ + if (!&__llvm_profile_counter_bias) { lprofWrite("LLVM Profile: counter relocation at runtime is required\n"); return; } diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 721f8c034438f..9264f83156c55 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -690,10 +690,19 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { Type *Int64Ty = Type::getInt64Ty(M->getContext()); GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); if (!Bias) { + // Compiler must define this variable when runtime counter relocation + // is being used. Runtime has a weak external reference that is used + // to check whether that's the case or not. Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); Bias->setVisibility(GlobalVariable::HiddenVisibility); + // A definition that's weak (linkonce_odr) without being in a COMDAT + // section wouldn't lead to link errors, but it would lead to a dead + // data word from every TU but one. Putting it in COMDAT ensures there + // will be exactly one data slot in the link. + if (TT.supportsCOMDAT()) + Bias->setComdat(M->getOrInsertComdat(Bias->getName())); } LI = Builder.CreateLoad(Int64Ty, Bias); } diff --git a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll index 672492474c5ff..cd5d36b8a6e3e 100644 --- a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll +++ b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll @@ -4,7 +4,8 @@ target triple = "x86_64-unknown-linux-gnu" @__profn_foo = private constant [3 x i8] c"foo" -; RELOC: @__llvm_profile_counter_bias = linkonce_odr hidden global i64 0 +; RELOC: $__llvm_profile_counter_bias = comdat any +; RELOC: @__llvm_profile_counter_bias = linkonce_odr hidden global i64 0, comdat ; CHECK-LABEL: define void @foo ; CHECK-NEXT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i64 0, i64 0) From 430bfc4f3ba631bee7f662895d78642b78adf54d Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 1 Jul 2021 22:26:09 +0000 Subject: [PATCH 473/619] [gn build] Port 33a7b4d9d8e6 --- llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn index 5fab007153e49..a0bc9b72c3652 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn @@ -34,7 +34,6 @@ static_library("profile") { "GCDAProfiling.c", "InstrProfiling.c", "InstrProfiling.h", - "InstrProfilingBiasVar.c", "InstrProfilingBuffer.c", "InstrProfilingFile.c", "InstrProfilingInternal.c", From 0176ac95035eb6508f8f838c7d72afa03d67b5aa Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 1 Jul 2021 15:35:48 -0700 Subject: [PATCH 474/619] [AArch64] Optimize SVE bitcasts of unpacked types. Target-independent code only knows how to spill to the stack; instead, use AArch64ISD::REINTERPRET_CAST. Differential Revision: https://reviews.llvm.org/D104573 --- .../Target/AArch64/AArch64ISelLowering.cpp | 32 ++++++++++++++--- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 ++ llvm/test/CodeGen/AArch64/sve-bitcast.ll | 34 +++++++++++++++++++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ce778c5ebfcab..1a490ab11822a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1192,6 +1192,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); } + // Legalize unpacked bitcasts to REINTERPRET_CAST. + for (auto VT : {MVT::nxv2i32, MVT::nxv2f32}) + setOperationAction(ISD::BITCAST, VT, Custom); + for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -3508,17 +3512,30 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, return CallResult.first; } +static MVT getSVEContainerType(EVT ContentTy); + SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { EVT OpVT = Op.getValueType(); + EVT ArgVT = Op.getOperand(0).getValueType(); if (useSVEForFixedLengthVectorVT(OpVT)) return LowerFixedLengthBitcastToSVE(Op, DAG); + if (OpVT == MVT::nxv2f32) { + if (ArgVT.isInteger()) { + SDValue ExtResult = + DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT), + Op.getOperand(0)); + return getSVESafeBitCast(MVT::nxv2f32, ExtResult, DAG); + } + return getSVESafeBitCast(MVT::nxv2f32, Op.getOperand(0), DAG); + } + if (OpVT != MVT::f16 && OpVT != MVT::bf16) return SDValue(); - assert(Op.getOperand(0).getValueType() == MVT::i16); + assert(ArgVT == MVT::i16); SDLoc DL(Op); Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0)); @@ -16866,11 +16883,18 @@ bool AArch64TargetLowering::getPostIndexedAddressParts( return true; } -static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) { +void AArch64TargetLowering::ReplaceBITCASTResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDLoc DL(N); SDValue Op = N->getOperand(0); + if (N->getValueType(0) == MVT::nxv2i32 && + Op.getValueType().isFloatingPoint()) { + SDValue CastResult = getSVESafeBitCast(MVT::nxv2i64, Op, DAG); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::nxv2i32, CastResult)); + return; + } + if (N->getValueType(0) != MVT::i16 || (Op.getValueType() != MVT::f16 && Op.getValueType() != MVT::bf16)) return; @@ -18428,8 +18452,6 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op, EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType()); EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType()); - assert((VT == PackedVT || InVT == PackedInVT) && - "Cannot cast between unpacked scalable vector types!"); // Pack input if required. if (InVT != PackedInVT) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7daa61996739f..a2e11afb337a1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1066,6 +1066,8 @@ class AArch64TargetLowering : public TargetLowering { void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; + void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; void ReplaceExtractSubVectorResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll index 94566b7de3edd..dda4232059a56 100644 --- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll @@ -450,5 +450,39 @@ define @bitcast_double_to_bfloat( %v) ret %bc } +define @bitcast_short_float_to_i32( %v) #0 { +; CHECK-LABEL: bitcast_short_float_to_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: ret + %trunc = fptrunc %v to + %bitcast = bitcast %trunc to + ret %bitcast +} + +define @bitcast_short_i32_to_float( %v) #0 { +; CHECK-LABEL: bitcast_short_i32_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %trunc = trunc %v to + %bitcast = bitcast %trunc to + %extended = fpext %bitcast to + ret %extended +} + +define @bitcast_short_half_to_float( %v) #0 { +; CHECK-LABEL: bitcast_short_half_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z0.h +; CHECK-NEXT: ret + %add = fadd %v, %v + %bitcast = bitcast %add to + ret %bitcast +} + ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" } From 8dea784b3ed7df3edd9e3b59b1e1b58d2a4ac175 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 17 Jun 2021 15:58:37 -0700 Subject: [PATCH 475/619] [mlir][tosa] Add tosa shape inference with InferReturnTypeComponent Added InferReturnTypeComponents for NAry operations, reshape, and reverse. With the additional tosa-infer-shapes pass, we can infer/propagate shapes across a set of TOSA operations. Current version does not modify the FuncOp type by inserting an unrealized conversion cast prior to any new non-matchin returns. Differential Revision: https://reviews.llvm.org/D105312 --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h | 1 + mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 207 +++++++++---- .../mlir/Dialect/Tosa/Transforms/Passes.h | 2 + .../mlir/Dialect/Tosa/Transforms/Passes.td | 15 + mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 142 +++++++++ .../Dialect/Tosa/Transforms/CMakeLists.txt | 1 + .../Tosa/Transforms/TosaInferShapes.cpp | 247 ++++++++++++++++ .../Tosa/Transforms/TosaMakeBroadcastable.cpp | 1 + .../TosaToLinalg/tosa-to-linalg.mlir | 2 +- mlir/test/Dialect/Tosa/tosa_infer_shapes.mlir | 278 ++++++++++++++++++ mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp | 3 +- 11 files changed, 835 insertions(+), 64 deletions(-) create mode 100644 mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp create mode 100644 mlir/test/Dialect/Tosa/tosa_infer_shapes.mlir diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h index 0af72312af28f..b0d5eb79fbfcf 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h @@ -15,6 +15,7 @@ #include "mlir/Dialect/Quant/QuantOps.h" #include "mlir/Dialect/Traits.h" +#include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/LoopLikeInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 3a1f9d26be118..06867ef199e11 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -17,6 +17,7 @@ include "mlir/IR/OpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/Dialect/Tosa/IR/TosaInterfaces.td" @@ -284,7 +285,10 @@ def Tosa_TransposeConv2DOp : Tosa_Op<"transpose_conv2d", [NoSideEffect]> { //===----------------------------------------------------------------------===// // Operator: clamp //===----------------------------------------------------------------------===// -def Tosa_ClampOp : Tosa_Op<"clamp", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_ClampOp : Tosa_Op<"clamp", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Computes clamp(features, min, max)."; let description = [{ @@ -309,7 +313,10 @@ def Tosa_ClampOp : Tosa_Op<"clamp", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: reluN //===----------------------------------------------------------------------===// -def Tosa_ReluNOp : Tosa_Op<"reluN", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_ReluNOp : Tosa_Op<"reluN", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Computes rectified linear: `max(features, N)`."; let description = [{ @@ -330,8 +337,10 @@ def Tosa_ReluNOp : Tosa_Op<"reluN", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: sigmoid //===----------------------------------------------------------------------===// -def Tosa_SigmoidOp : Tosa_Op<"sigmoid", [NoSideEffect, - SameOperandsAndResultType]> { +def Tosa_SigmoidOp : Tosa_Op<"sigmoid", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Computes elementwise sigmoid of input."; let description = [{ @@ -354,7 +363,10 @@ def Tosa_SigmoidOp : Tosa_Op<"sigmoid", [NoSideEffect, //===----------------------------------------------------------------------===// // Operator: tanh //===----------------------------------------------------------------------===// -def Tosa_TanhOp : Tosa_Op<"tanh", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_TanhOp : Tosa_Op<"tanh", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Computes elementwise hyperbolic tangent of input"; let description = [{ @@ -382,8 +394,10 @@ def Tosa_TanhOp : Tosa_Op<"tanh", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: add //===----------------------------------------------------------------------===// -def Tosa_AddOp : Tosa_Op<"add", [ResultsBroadcastableShape, NoSideEffect, - Commutative]> { +def Tosa_AddOp : Tosa_Op<"add", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect, Commutative]> { let summary = "Elementwise addition operator"; let description = [{ @@ -404,9 +418,10 @@ def Tosa_AddOp : Tosa_Op<"add", [ResultsBroadcastableShape, NoSideEffect, //===----------------------------------------------------------------------===// // Operator: arithmetic_right_shift //===----------------------------------------------------------------------===// -def Tosa_ArithmeticRightShiftOp : Tosa_Op<"arithmetic_right_shift", - [ResultsBroadcastableShape, - NoSideEffect]> { +def Tosa_ArithmeticRightShiftOp : Tosa_Op<"arithmetic_right_shift", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect]> { let summary = "Elementwise Arithmetic Right Shift"; let description = [{ @@ -429,8 +444,10 @@ def Tosa_ArithmeticRightShiftOp : Tosa_Op<"arithmetic_right_shift", //===----------------------------------------------------------------------===// // Operator: bitwise_and //===----------------------------------------------------------------------===// -def Tosa_BitwiseAndOp : Tosa_Op<"bitwise_and", [ResultsBroadcastableShape, - NoSideEffect, Commutative]> { +def Tosa_BitwiseAndOp : Tosa_Op<"bitwise_and", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect, Commutative]> { let summary = "Bitwise AND operator"; let description = [{ @@ -451,8 +468,10 @@ def Tosa_BitwiseAndOp : Tosa_Op<"bitwise_and", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: bitwise_or //===----------------------------------------------------------------------===// -def Tosa_BitwiseOrOp : Tosa_Op<"bitwise_or", [ResultsBroadcastableShape, - NoSideEffect, Commutative]> { +def Tosa_BitwiseOrOp : Tosa_Op<"bitwise_or", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect, Commutative]> { let summary = "Bitwise OR operator"; let description = [{ @@ -473,8 +492,10 @@ def Tosa_BitwiseOrOp : Tosa_Op<"bitwise_or", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: bitwise_xor //===----------------------------------------------------------------------===// -def Tosa_BitwiseXorOp : Tosa_Op<"bitwise_xor", [ResultsBroadcastableShape, - NoSideEffect, Commutative]> { +def Tosa_BitwiseXorOp : Tosa_Op<"bitwise_xor", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect, Commutative]> { let summary = "Bitwise XOR operator"; let description = [{ @@ -495,8 +516,10 @@ def Tosa_BitwiseXorOp : Tosa_Op<"bitwise_xor", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: div //===----------------------------------------------------------------------===// -def Tosa_DivOp : Tosa_Op<"div", [ResultsBroadcastableShape, - NoSideEffect]> { +def Tosa_DivOp : Tosa_Op<"div", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect]> { let summary = "Integer divide operator"; let description = [{ @@ -517,8 +540,10 @@ def Tosa_DivOp : Tosa_Op<"div", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: logical_and //===----------------------------------------------------------------------===// -def Tosa_LogicalAndOp : Tosa_Op<"logical_and", [ResultsBroadcastableShape, - Commutative, NoSideEffect]> { +def Tosa_LogicalAndOp : Tosa_Op<"logical_and", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, Commutative, NoSideEffect]> { let summary = "Returns the truth value of x AND y element-wise."; let description = [{ @@ -539,9 +564,10 @@ def Tosa_LogicalAndOp : Tosa_Op<"logical_and", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: logical_left_shift //===----------------------------------------------------------------------===// -def Tosa_LogicalLeftShiftOp : Tosa_Op<"logical_left_shift", - [ResultsBroadcastableShape, - NoSideEffect]> { +def Tosa_LogicalLeftShiftOp : Tosa_Op<"logical_left_shift", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect]> { let summary = "Elementwise Logical Left Shift"; let description = [{ @@ -562,9 +588,9 @@ def Tosa_LogicalLeftShiftOp : Tosa_Op<"logical_left_shift", //===----------------------------------------------------------------------===// // Operator: logical_right_shift //===----------------------------------------------------------------------===// -def Tosa_LogicalRightShiftOp : Tosa_Op<"logical_right_shift", - [ResultsBroadcastableShape, - NoSideEffect]> { +def Tosa_LogicalRightShiftOp : Tosa_Op<"logical_right_shift", [ + DeclareOpInterfaceMethods, ResultsBroadcastableShape, + NoSideEffect]> { let summary = "Elementwise Logical Right Shift"; let description = [{ @@ -586,8 +612,10 @@ def Tosa_LogicalRightShiftOp : Tosa_Op<"logical_right_shift", //===----------------------------------------------------------------------===// // Operator: logical_or //===----------------------------------------------------------------------===// -def Tosa_LogicalOrOp : Tosa_Op<"logical_or", [ResultsBroadcastableShape, - Commutative, NoSideEffect]> { +def Tosa_LogicalOrOp : Tosa_Op<"logical_or", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, Commutative, NoSideEffect]> { let summary = "Returns the truth value of x OR y element-wise."; let description = [{ @@ -608,8 +636,10 @@ def Tosa_LogicalOrOp : Tosa_Op<"logical_or", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: logical_xor //===----------------------------------------------------------------------===// -def Tosa_LogicalXorOp : Tosa_Op<"logical_xor", [ResultsBroadcastableShape, - Commutative, NoSideEffect]> { +def Tosa_LogicalXorOp : Tosa_Op<"logical_xor", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, Commutative, NoSideEffect]> { let summary = "Returns the truth value of x XOR y element-wise."; let description = [{ @@ -630,8 +660,10 @@ def Tosa_LogicalXorOp : Tosa_Op<"logical_xor", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: maximum //===----------------------------------------------------------------------===// -def Tosa_MaximumOp : Tosa_Op<"maximum", [ResultsBroadcastableShape, - NoSideEffect, Commutative]> { +def Tosa_MaximumOp : Tosa_Op<"maximum", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect, Commutative]> { let summary = "Elementwise Maximum"; let description = [{ @@ -652,8 +684,10 @@ def Tosa_MaximumOp : Tosa_Op<"maximum", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: minimum //===----------------------------------------------------------------------===// -def Tosa_MinimumOp : Tosa_Op<"minimum", [ResultsBroadcastableShape, - NoSideEffect, Commutative]> { +def Tosa_MinimumOp : Tosa_Op<"minimum", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect, Commutative]> { let summary = "Elementwise Minimum"; let description = [{ @@ -674,8 +708,10 @@ def Tosa_MinimumOp : Tosa_Op<"minimum", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: mul //===----------------------------------------------------------------------===// -def Tosa_MulOp : Tosa_Op<"mul", [ResultsBroadcastableShape, NoSideEffect, - Commutative]> { +def Tosa_MulOp : Tosa_Op<"mul", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect, Commutative]> { let summary = "Multiplication operator"; let description = [{ @@ -698,7 +734,10 @@ def Tosa_MulOp : Tosa_Op<"mul", [ResultsBroadcastableShape, NoSideEffect, //===----------------------------------------------------------------------===// // Operator: pow //===----------------------------------------------------------------------===// -def Tosa_PowOp : Tosa_Op<"pow", [ResultsBroadcastableShape, NoSideEffect]> { +def Tosa_PowOp : Tosa_Op<"pow", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect]> { let summary = "Computes the power of one value to another."; let description = [{ @@ -720,7 +759,10 @@ def Tosa_PowOp : Tosa_Op<"pow", [ResultsBroadcastableShape, NoSideEffect]> { //===----------------------------------------------------------------------===// // Operator: sub //===----------------------------------------------------------------------===// -def Tosa_SubOp : Tosa_Op<"sub", [ResultsBroadcastableShape, NoSideEffect]> { +def Tosa_SubOp : Tosa_Op<"sub", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect]> { let summary = "Elementwise subtraction operator"; let description = [{ @@ -781,7 +823,10 @@ def Tosa_TableOp : Tosa_Op<"table", [NoSideEffect]> { //===----------------------------------------------------------------------===// // Operator: abs //===----------------------------------------------------------------------===// -def Tosa_AbsOp : Tosa_Op<"abs", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_AbsOp : Tosa_Op<"abs", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise abs op"; let description = [{ @@ -800,8 +845,10 @@ def Tosa_AbsOp : Tosa_Op<"abs", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: bitwise_not //===----------------------------------------------------------------------===// -def Tosa_BitwiseNotOp : Tosa_Op<"bitwise_not", [ResultsBroadcastableShape, - NoSideEffect]> { +def Tosa_BitwiseNotOp : Tosa_Op<"bitwise_not", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect]> { let summary = "Bitwise NOT operator"; let description = [{ @@ -820,7 +867,10 @@ def Tosa_BitwiseNotOp : Tosa_Op<"bitwise_not", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: ceil //===----------------------------------------------------------------------===// -def Tosa_CeilOp : Tosa_Op<"ceil", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_CeilOp : Tosa_Op<"ceil", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise ceil op"; let description = [{ @@ -839,7 +889,10 @@ def Tosa_CeilOp : Tosa_Op<"ceil", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: clz //===----------------------------------------------------------------------===// -def Tosa_ClzOp : Tosa_Op<"clz", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_ClzOp : Tosa_Op<"clz", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise count leading zero op"; let description = [{ @@ -858,7 +911,10 @@ def Tosa_ClzOp : Tosa_Op<"clz", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: exp //===----------------------------------------------------------------------===// -def Tosa_ExpOp : Tosa_Op<"exp", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_ExpOp : Tosa_Op<"exp", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise exp op"; let description = [{ @@ -877,7 +933,10 @@ def Tosa_ExpOp : Tosa_Op<"exp", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: floor //===----------------------------------------------------------------------===// -def Tosa_FloorOp : Tosa_Op<"floor", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_FloorOp : Tosa_Op<"floor", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise floor op"; let description = [{ @@ -896,7 +955,10 @@ def Tosa_FloorOp : Tosa_Op<"floor", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: log //===----------------------------------------------------------------------===// -def Tosa_LogOp : Tosa_Op<"log", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_LogOp : Tosa_Op<"log", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise log op"; let description = [{ @@ -915,8 +977,10 @@ def Tosa_LogOp : Tosa_Op<"log", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: logical_not //===----------------------------------------------------------------------===// -def Tosa_LogicalNotOp : Tosa_Op<"logical_not", [NoSideEffect, - SameOperandsAndResultType]> { +def Tosa_LogicalNotOp : Tosa_Op<"logical_not", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Returns the truth value of NOT x element-wise."; let description = [{ @@ -935,8 +999,10 @@ def Tosa_LogicalNotOp : Tosa_Op<"logical_not", [NoSideEffect, //===----------------------------------------------------------------------===// // Operator: negate //===----------------------------------------------------------------------===// -def Tosa_NegateOp : Tosa_Op<"negate", [NoSideEffect, - SameOperandsAndResultType]> { +def Tosa_NegateOp : Tosa_Op<"negate", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise negate op"; let description = [{ @@ -958,8 +1024,10 @@ def Tosa_NegateOp : Tosa_Op<"negate", [NoSideEffect, //===----------------------------------------------------------------------===// // Operator: reciprocal //===----------------------------------------------------------------------===// -def Tosa_ReciprocalOp : Tosa_Op<"reciprocal", [NoSideEffect, - SameOperandsAndResultType]> { +def Tosa_ReciprocalOp : Tosa_Op<"reciprocal", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise reciprocal op"; let description = [{ @@ -979,7 +1047,10 @@ def Tosa_ReciprocalOp : Tosa_Op<"reciprocal", [NoSideEffect, //===----------------------------------------------------------------------===// // Operator: rsqrt //===----------------------------------------------------------------------===// -def Tosa_RsqrtOp : Tosa_Op<"rsqrt", [NoSideEffect, SameOperandsAndResultType]> { +def Tosa_RsqrtOp : Tosa_Op<"rsqrt", [ + DeclareOpInterfaceMethods, + NoSideEffect, SameOperandsAndResultType]> { let summary = "Elementwise 1/sqrt op"; let description = [{ @@ -1005,7 +1076,9 @@ def Tosa_RsqrtOp : Tosa_Op<"rsqrt", [NoSideEffect, SameOperandsAndResultType]> { //===----------------------------------------------------------------------===// // Operator: select //===----------------------------------------------------------------------===// -def Tosa_SelectOp : Tosa_Op<"select", [NoSideEffect]> { +def Tosa_SelectOp : Tosa_Op<"select", [ + DeclareOpInterfaceMethods, NoSideEffect]> { let summary = "Elementwise select operator"; let description = [{ @@ -1031,8 +1104,10 @@ def Tosa_SelectOp : Tosa_Op<"select", [NoSideEffect]> { //===----------------------------------------------------------------------===// // Operator: equal //===----------------------------------------------------------------------===// -def Tosa_EqualOp : Tosa_Op<"equal", [ResultsBroadcastableShape, Commutative, - NoSideEffect]> { +def Tosa_EqualOp : Tosa_Op<"equal", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, Commutative, NoSideEffect]> { let summary = "Returns the truth value of (x == y) element-wise."; let description = [{ @@ -1052,8 +1127,10 @@ def Tosa_EqualOp : Tosa_Op<"equal", [ResultsBroadcastableShape, Commutative, //===----------------------------------------------------------------------===// // Operator: greater //===----------------------------------------------------------------------===// -def Tosa_GreaterOp : Tosa_Op<"greater", [ResultsBroadcastableShape, - NoSideEffect]> { +def Tosa_GreaterOp : Tosa_Op<"greater", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect]> { let summary = "Returns the truth value of (x > y) element-wise."; let description = [{ @@ -1073,8 +1150,10 @@ def Tosa_GreaterOp : Tosa_Op<"greater", [ResultsBroadcastableShape, //===----------------------------------------------------------------------===// // Operator: greater_equal //===----------------------------------------------------------------------===// -def Tosa_GreaterEqualOp : Tosa_Op<"greater_equal", [ResultsBroadcastableShape, - NoSideEffect]> { +def Tosa_GreaterEqualOp : Tosa_Op<"greater_equal", [ + DeclareOpInterfaceMethods, + ResultsBroadcastableShape, NoSideEffect]> { let summary = "Returns the truth value of (x >= y) element-wise."; let description = [{ @@ -1269,7 +1348,9 @@ def Tosa_PadOp : Tosa_Op<"pad", [NoSideEffect]> { // Operator: reshape //===----------------------------------------------------------------------===// def Tosa_ReshapeOp: Tosa_Op<"reshape", [ - NoSideEffect]> { + DeclareOpInterfaceMethods, + NoSideEffect]> { let summary = "Reshape operator"; let description = [{ @@ -1291,7 +1372,9 @@ def Tosa_ReshapeOp: Tosa_Op<"reshape", [ //===----------------------------------------------------------------------===// // Operator: reverse //===----------------------------------------------------------------------===// -def Tosa_ReverseOp: Tosa_Op<"reverse", [NoSideEffect]> { +def Tosa_ReverseOp: Tosa_Op<"reverse", [ + DeclareOpInterfaceMethods, NoSideEffect]> { let summary = "Reverse operator"; let description = [{ diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h index b9032dfd351e0..b00b161aef156 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h @@ -13,11 +13,13 @@ #ifndef MLIR_DIALECT_TOSA_TRANSFORMS_PASSES_H #define MLIR_DIALECT_TOSA_TRANSFORMS_PASSES_H +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Pass/Pass.h" namespace mlir { namespace tosa { +std::unique_ptr createTosaInferShapesPass(); std::unique_ptr createTosaMakeBroadcastablePass(); std::unique_ptr createTosaTestQuantUtilAPIPass(); diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td index a29a1676c2647..dfa7b1f8582e3 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td @@ -15,6 +15,21 @@ include "mlir/Pass/PassBase.td" +def TosaInferShapes : FunctionPass<"tosa-infer-shapes"> { + let summary = "Propagate shapes across TOSA operations"; + let description = [{ + Pass that uses operand types and propagates shapes to TOSA operations. + This includes legalizing rankless and dynamic shapes towards static. + }]; + + let constructor = "createTosaInferShapesPass()"; + let dependentDialects = [ + "StandardOpsDialect", + "tensor::TensorDialect", + "tosa::TosaDialect", + ]; +} + def TosaMakeBroadcastable : FunctionPass<"tosa-make-broadcastable"> { let summary = "TOSA rank Reshape to enable Broadcasting"; let description = [{ diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 39b864ff62c02..fd744372fceab 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -291,6 +291,148 @@ static void buildPadOpWithQuantInfo(OpBuilder &builder, OperationState &result, result.types.push_back(outputType); } +//===----------------------------------------------------------------------===// +// TOSA Operator Return Type Inference. +//===----------------------------------------------------------------------===// + +static void getI64Values(ArrayAttr arrayAttr, SmallVector &values) { + for (auto it : arrayAttr) { + values.push_back(it.cast().getValue().getSExtValue()); + } +} + +LogicalResult tosa::ReshapeOp::inferReturnTypeComponents( + MLIRContext *context, ::llvm::Optional location, + ValueRange operands, DictionaryAttr attributes, RegionRange regions, + SmallVectorImpl &inferredReturnShapes) { + ShapedType type = operands.front().getType().cast(); + + auto newShape = attributes.get("new_shape").cast(); + llvm::SmallVector newShapeValue; + getI64Values(newShape, newShapeValue); + + // We cannot infer from the total number of elements so we must take the + // shape attribute as exact. + if (!type.hasRank() || !type.hasStaticShape()) { + inferredReturnShapes.push_back(ShapedTypeComponents(newShapeValue)); + return success(); + } + + // Determine the number of elements covered by the slice of all static + // dimensions. This allows us to infer the length of the remaining dynamic + // dimension. + int64_t numElements = type.getNumElements(); + int64_t staticMul = 1; + for (auto val : newShapeValue) { + if (val != -1) { + staticMul *= val; + } + } + + // Determine the length of the dynamic dimension. + for (auto &val : newShapeValue) { + if (val == -1) + val = numElements / staticMul; + } + + inferredReturnShapes.push_back(ShapedTypeComponents(newShapeValue)); + return success(); +} + +static LogicalResult resolveBroadcastShape(ValueRange operands, + SmallVector &outShape) { + int64_t outRank = 0; + for (auto operand : operands) { + auto type = operand.getType().cast(); + if (!type.hasRank()) + return failure(); + outRank = std::max(outRank, type.getRank()); + } + + outShape.resize(outRank, 1); + + for (auto operand : operands) { + auto type = operand.getType().cast(); + auto shape = type.getShape(); + auto rankDiff = outShape.size() - shape.size(); + + for (size_t i = 0; i < shape.size(); i++) { + auto dim1 = outShape[i + rankDiff]; + auto dim2 = shape[i]; + auto resolvedDim = dim1; + + if (dim1 == 1) { + resolvedDim = dim2; + } else if (dim2 == 1) { + resolvedDim = dim1; + } else if (dim1 != dim2) { + return failure(); + } + outShape[i + rankDiff] = resolvedDim; + } + } + + return success(); +} + +static LogicalResult NAryInferReturnTypes( + ValueRange operands, + SmallVectorImpl &inferredReturnShapes) { + llvm::SmallVector outShape; + if (resolveBroadcastShape(operands, outShape).failed()) { + inferredReturnShapes.push_back(ShapedTypeComponents()); + } else { + inferredReturnShapes.push_back(ShapedTypeComponents(outShape)); + } + return success(); +} + +#define NARY_SHAPE_INFER(OP) \ + LogicalResult OP::inferReturnTypeComponents( \ + MLIRContext *context, ::llvm::Optional location, \ + ValueRange operands, DictionaryAttr attributes, RegionRange regions, \ + SmallVectorImpl &inferredReturnShapes) { \ + return NAryInferReturnTypes(operands, inferredReturnShapes); \ + } + +NARY_SHAPE_INFER(tosa::AbsOp) +NARY_SHAPE_INFER(tosa::AddOp) +NARY_SHAPE_INFER(tosa::ArithmeticRightShiftOp) +NARY_SHAPE_INFER(tosa::BitwiseAndOp) +NARY_SHAPE_INFER(tosa::BitwiseOrOp) +NARY_SHAPE_INFER(tosa::BitwiseXorOp) +NARY_SHAPE_INFER(tosa::BitwiseNotOp) +NARY_SHAPE_INFER(tosa::CeilOp) +NARY_SHAPE_INFER(tosa::ClampOp) +NARY_SHAPE_INFER(tosa::ClzOp) +NARY_SHAPE_INFER(tosa::DivOp) +NARY_SHAPE_INFER(tosa::EqualOp) +NARY_SHAPE_INFER(tosa::ExpOp) +NARY_SHAPE_INFER(tosa::FloorOp) +NARY_SHAPE_INFER(tosa::GreaterEqualOp) +NARY_SHAPE_INFER(tosa::GreaterOp) +NARY_SHAPE_INFER(tosa::LogOp) +NARY_SHAPE_INFER(tosa::LogicalAndOp) +NARY_SHAPE_INFER(tosa::LogicalLeftShiftOp) +NARY_SHAPE_INFER(tosa::LogicalNotOp) +NARY_SHAPE_INFER(tosa::LogicalOrOp) +NARY_SHAPE_INFER(tosa::LogicalRightShiftOp) +NARY_SHAPE_INFER(tosa::LogicalXorOp) +NARY_SHAPE_INFER(tosa::MaximumOp) +NARY_SHAPE_INFER(tosa::MinimumOp) +NARY_SHAPE_INFER(tosa::MulOp) +NARY_SHAPE_INFER(tosa::NegateOp) +NARY_SHAPE_INFER(tosa::PowOp) +NARY_SHAPE_INFER(tosa::ReciprocalOp) +NARY_SHAPE_INFER(tosa::ReluNOp) +NARY_SHAPE_INFER(tosa::ReverseOp) +NARY_SHAPE_INFER(tosa::RsqrtOp) +NARY_SHAPE_INFER(tosa::SelectOp) +NARY_SHAPE_INFER(tosa::SubOp) +NARY_SHAPE_INFER(tosa::TanhOp) +NARY_SHAPE_INFER(tosa::SigmoidOp) +#undef PRED_SHAPE_INFER + //===----------------------------------------------------------------------===// // TOSA Operator Definitions. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt index 04acbf6425b75..f466b1ab85389 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIRTosaTransforms + TosaInferShapes.cpp TosaMakeBroadcastable.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp new file mode 100644 index 0000000000000..eca63e1e8ab39 --- /dev/null +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp @@ -0,0 +1,247 @@ +//===- TosaInferShapes.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Propogate shapes forward along TOSA operations to resolve dynamic shape +// operations. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/DataFlowAnalysis.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tosa/IR/TosaOps.h" +#include "mlir/Dialect/Tosa/Transforms/PassDetail.h" +#include "mlir/Dialect/Tosa/Transforms/Passes.h" +#include "mlir/IR/BlockAndValueMapping.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Matchers.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; +using namespace mlir::tosa; + +namespace { + +// ----------------------------------------------------------------------------- +// Analysis. +// ----------------------------------------------------------------------------- + +static Type joinElementTypes(Type lhs, Type rhs) { + return lhs == rhs ? lhs : Type(); +} + +namespace { +// Statically known information for a particular Value. +// +// This struct currently tracks only information relevant for tensor/array-like +// shaped types. It is fine to associate a `ValueKnowledge` with a non-shaped +// type as long as it is in the default "no knowledge" state returned by +// `getPessimisticValueState`. The important invariant is that we cannot +// claim to know something about a value which is false. +// +// This class could also be called "dataflow facts", "lattice value", etc. +struct ValueKnowledge { + ValueKnowledge() = delete; + ValueKnowledge(bool hasSizes, std::vector sizes, Type dtype) + : hasSizes(hasSizes), sizes(sizes), dtype(dtype) { + assert(sizes.size() == 0 || hasSizes); + } + + // Get the static knowledge intrinsic to `type`. + static ValueKnowledge getKnowledgeFromType(Type type) { + ValueKnowledge result = getPessimisticValueState(type.getContext()); + if (auto shapedType = type.dyn_cast()) { + if (shapedType.hasRank()) { + result.hasSizes = true; + result.sizes = shapedType.getShape(); + } + result.dtype = shapedType.getElementType(); + } + return result; + } + + // Return a pessimistic/conservative value state without assuming any knowlege + // about the IR. + static ValueKnowledge getPessimisticValueState(MLIRContext *context) { + return ValueKnowledge(false, {}, Type()); + } + + Type getType() const { + if (hasSizes) { + return RankedTensorType::get(llvm::makeArrayRef(sizes), dtype); + } + return UnrankedTensorType::get(dtype); + } + + bool operator==(const ValueKnowledge &rhs) const { + return std::make_tuple(hasSizes, sizes, dtype) == + std::make_tuple(rhs.hasSizes, rhs.sizes, rhs.dtype); + } + + // Given two pieces of static knowledge, calculate conservatively the + // information we can be sure about. + static ValueKnowledge join(const ValueKnowledge &lhs, + const ValueKnowledge &rhs) { + // Mental model: All conditions are checking how to change from the safe "no + // knowledge" default-initialized state to a state with more knowledge + // consistent with lhs and rhs. + ValueKnowledge result = getPessimisticValueState(nullptr); + + if (lhs.hasSizes && !rhs.hasSizes) { + result.hasSizes = true; + result.sizes = lhs.sizes; + } else if (!lhs.hasSizes && rhs.hasSizes) { + result.hasSizes = true; + result.sizes = rhs.sizes; + } else if (lhs.hasSizes && rhs.hasSizes && + lhs.sizes.size() == rhs.sizes.size()) { + result.hasSizes = true; + result.sizes.resize(lhs.sizes.size(), ShapedType::kDynamicSize); + for (int i = 0, e = result.sizes.size(); i != e; i++) { + int64_t lhsSize = lhs.sizes[i]; + int64_t rhsSize = rhs.sizes[i]; + int64_t &resultSize = result.sizes[i]; + if (lhsSize == ShapedType::kDynamicSize) { + resultSize = rhsSize; + } else if (rhsSize == ShapedType::kDynamicSize) { + resultSize = lhsSize; + } else if (lhsSize == rhsSize) { + resultSize = lhsSize; + } + } + } + + result.dtype = joinElementTypes(lhs.dtype, rhs.dtype); + return result; + } + + // Whether the Value is known to have a list of sizes. + bool hasSizes; + // If `hasSizes`, the sizes along each rank. Unknown sizes are represented as + // `ShapedType::kDynamicSize`. + std::vector sizes; + // The dtype of a tensor. + // This is equal to nullptr if we don't know that it is a specific concrete + // type. + Type dtype; +}; + +} // namespace + +/// Pass that enables broadcast by making all input arrays have the same +/// number of dimensions. Insert RESHAPE operations to lower rank operand +struct TosaInferShapes : public TosaInferShapesBase { +public: + void runOnFunction() override { + FuncOp func = getOperation(); + + IRRewriter rewriter(func.getContext()); + + func.walk([&](Operation *op) { + if (op->getDialect()->getNamespace() != + tosa::TosaDialect::getDialectNamespace()) + return; + InferShapedTypeOpInterface shapeInterface = + dyn_cast(op); + if (!shapeInterface) + return; + + SmallVector returnedShapes; + if (shapeInterface + .inferReturnTypeComponents( + op->getContext(), op->getLoc(), op->getOperands(), + op->getAttrDictionary(), op->getRegions(), returnedShapes) + .succeeded()) { + for (auto it : llvm::zip(op->getResults(), returnedShapes)) { + Value result = std::get<0>(it); + ShapedTypeComponents predictedShape = std::get<1>(it); + + // Check whether this use case is replaceable. We define an op as + // being replaceable if it is used by a ReturnOp or a TosaOp. + bool replaceable = true; + for (auto user : result.getUsers()) { + if (isa(user)) + continue; + if (user->getDialect()->getNamespace() == + tosa::TosaDialect::getDialectNamespace()) + continue; + + replaceable = false; + } + + // Determine the knowledge based on the output type. + Type resultTy = result.getType(); + auto currentKnowledge = + ValueKnowledge::getKnowledgeFromType(resultTy); + + // Compute the knowledge based on the inferred type. + auto inferredKnowledge = + ValueKnowledge::getPessimisticValueState(op->getContext()); + inferredKnowledge.dtype = + resultTy.cast().getElementType(); + inferredKnowledge.hasSizes = predictedShape.hasRank(); + if (predictedShape.hasRank()) { + for (auto dim : predictedShape.getDims()) { + inferredKnowledge.sizes.push_back(dim); + } + } + + if (!replaceable) + continue; + + // Compute the new type based on the joined version. + auto newKnowledge = + ValueKnowledge::join(currentKnowledge, inferredKnowledge); + result.setType(newKnowledge.getType()); + } + } + }); + + // Insert UnrealizedConversionCasts to guarantee ReturnOp agress with + // the FuncOp type. + func.walk([&](ReturnOp op) { + FuncOp parent = dyn_cast(op->getParentOp()); + if (!parent) + return; + + rewriter.setInsertionPoint(op); + FunctionType funcTy = func.getType(); + auto resultTys = funcTy.getResults(); + + bool castAdded = false; + SmallVector castedValues; + for (auto it : llvm::zip(op->getOperands(), resultTys)) { + auto operand = std::get<0>(it); + auto currentTy = operand.getType(); + auto castTy = std::get<1>(it); + if (currentTy == castTy) { + castedValues.push_back(operand); + continue; + } + + castedValues.push_back( + rewriter.create(op.getLoc(), castTy, operand) + .getResult()); + + castAdded = true; + } + + if (castAdded) { + rewriter.replaceOpWithNewOp(op, castedValues); + } + }); + } +}; +} // end anonymous namespace + +std::unique_ptr mlir::tosa::createTosaInferShapesPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp index 60bc6357eb3c8..e850e1f517d2f 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tosa/IR//TosaOps.h" #include "mlir/Dialect/Tosa/Transforms/PassDetail.h" #include "mlir/Dialect/Tosa/Transforms/Passes.h" diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 6f381be8e7202..44cfda613d313 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -500,7 +500,7 @@ func @test_reshape_samerank(%arg0: tensor<3x2xf32>) -> tensor<2x3xf32> { // CHECK-LABEL: @test_reshape_downrank_6D func @test_reshape_downrank_6D(%arg0: tensor<1x2x3x5x7x11xf32>) -> tensor<6x5x77xf32> { // CHECK: linalg.tensor_collapse_shape %arg0 {{\[}}[0, 1, 2], [3], [4, 5]] - %0 = "tosa.reshape"(%arg0) {new_shape = [2, 3]} : (tensor<1x2x3x5x7x11xf32>) -> tensor<6x5x77xf32> + %0 = "tosa.reshape"(%arg0) {new_shape = [6, 5, 77]} : (tensor<1x2x3x5x7x11xf32>) -> tensor<6x5x77xf32> return %0 : tensor<6x5x77xf32> } diff --git a/mlir/test/Dialect/Tosa/tosa_infer_shapes.mlir b/mlir/test/Dialect/Tosa/tosa_infer_shapes.mlir new file mode 100644 index 0000000000000..e73c79cb3ceef --- /dev/null +++ b/mlir/test/Dialect/Tosa/tosa_infer_shapes.mlir @@ -0,0 +1,278 @@ +// RUN: mlir-opt --split-input-file --tosa-infer-shapes %s | FileCheck %s + +// CHECK-LABEL: @test_return +func @test_return(%arg0 : tensor<4xf32>) -> tensor<*xf32> { + // CHECK: [[LOG:%.+]] = "tosa.log"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: tensor.cast [[LOG]] : tensor<4xf32> to tensor<*xf32> + %0 = "tosa.log"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + +// ----- + +// CHECK-LABEL: @test_multiple +func @test_multiple(%arg0 : tensor<4xf32>, %arg1 : tensor<1xf32>, %arg2 : tensor) -> tensor<*xf32> { + // CHECK: [[ADD:%.+]] = "tosa.add"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xf32> + %0 = "tosa.add"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xf32> + + // CHECK: [[LOG:%.+]] = "tosa.log"(%0) : (tensor<4xf32>) -> tensor<4xf32> + %1 = "tosa.log"(%0) : (tensor<*xf32>) -> tensor<*xf32> + + // CHECK: [[SUB:%.+]] = "tosa.sub"(%0, %arg2) : (tensor<4xf32>, tensor) -> tensor<4xf32> + %2 = "tosa.sub"(%0, %arg2) : (tensor<*xf32>, tensor) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + +// ----- + +// CHECK-LABEL: @test_unary_f32 +func @test_unary_f32(%arg0 : tensor<4xf32>) -> () { + // CHECK: "tosa.abs"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %0 = "tosa.abs"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.ceil"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %1 = "tosa.ceil"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.clamp"(%arg0) {{.+}} : (tensor<4xf32>) -> tensor<4xf32> + %2 = "tosa.clamp"(%arg0) { max_int = 10 : i64, min_int = 0 : i64, min_fp = 0.0 : f32, max_fp = 10.0 : f32 } : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.exp"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %3 = "tosa.exp"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.floor"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %4 = "tosa.floor"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.log"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %5 = "tosa.log"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.negate"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %6 = "tosa.negate"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.reciprocal"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %7 = "tosa.reciprocal"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.reluN"(%arg0) {{.+}} : (tensor<4xf32>) -> tensor<4xf32> + %8 = "tosa.reluN"(%arg0) { max_int = 10 : i64, min_int = 0 : i64, min_fp = 0.0 : f32, max_fp = 10.0 : f32 } : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.reverse"(%arg0) {axis = 0 : i64} : (tensor<4xf32>) -> tensor<4xf32> + %9 = "tosa.reverse"(%arg0) { axis = 0 : i64 } : (tensor<4xf32>) -> tensor + + // CHECK: "tosa.rsqrt"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %10 = "tosa.rsqrt"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.tanh"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %11 = "tosa.tanh"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + + // CHECK: "tosa.sigmoid"(%arg0) : (tensor<4xf32>) -> tensor<4xf32> + %12 = "tosa.sigmoid"(%arg0) : (tensor<4xf32>) -> tensor<*xf32> + return +} + +// ----- + +// CHECK-LABEL: @test_unary_i32 +func @test_unary_i32(%arg0 : tensor<4xi32>) -> () { + // CHECK: "tosa.abs"(%arg0) : (tensor<4xi32>) -> tensor<4xi32> + %0 = "tosa.abs"(%arg0) : (tensor<4xi32>) -> tensor<*xi32> + + // CHECK: "tosa.bitwise_not"(%arg0) : (tensor<4xi32>) -> tensor<4xi32> + %1 = "tosa.bitwise_not"(%arg0) : (tensor<4xi32>) -> tensor<*xi32> + + // CHECK: "tosa.clamp"(%arg0) {{.+}} : (tensor<4xi32>) -> tensor<4xi32> + %2 = "tosa.clamp"(%arg0) { max_int = 10 : i64, min_int = 0 : i64, min_fp = 0.0 : f32, max_fp = 10.0 : f32 } : (tensor<4xi32>) -> tensor<*xi32> + + // CHECK: "tosa.clz"(%arg0) : (tensor<4xi32>) -> tensor<4xi32> + %3 = "tosa.clz"(%arg0) : (tensor<4xi32>) -> tensor<*xi32> + + // CHECK: "tosa.negate"(%arg0) : (tensor<4xi32>) -> tensor<4xi32> + %4 = "tosa.negate"(%arg0) : (tensor<4xi32>) -> tensor<*xi32> + + // CHECK: "tosa.reluN"(%arg0) {{.+}} : (tensor<4xi32>) -> tensor<4xi32> + %5 = "tosa.reluN"(%arg0) { max_int = 10 : i64, min_int = 0 : i64, min_fp = 0.0 : f32, max_fp = 10.0 : f32 } : (tensor<4xi32>) -> tensor<*xi32> + + // CHECK: "tosa.reverse"(%arg0) {axis = 0 : i64} : (tensor<4xi32>) -> tensor<4xi32> + %6 = "tosa.reverse"(%arg0) { axis = 0 : i64 } : (tensor<4xi32>) -> tensor + return +} + +// ----- + +// CHECK-LABEL: @test_unary_i1 +func @test_unary_i1(%arg0 : tensor<4xi1>) -> () { + // CHECK: "tosa.logical_not"(%arg0) : (tensor<4xi1>) -> tensor<4xi1> + %0 = "tosa.logical_not"(%arg0) : (tensor<4xi1>) -> tensor<*xi1> + return +} + +// ----- + +// CHECK-LABEL: @test_binary_scalar_f32 +func @test_binary_scalar_f32(%arg0 : tensor<4xf32>, %arg1 : tensor) -> () { + // CHECK: "tosa.add"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<4xf32> + %0 = "tosa.add"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<*xf32> + + // CHECK: "tosa.maximum"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<4xf32> + %1 = "tosa.maximum"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<*xf32> + + // CHECK: "tosa.minimum"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<4xf32> + %2 = "tosa.minimum"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<*xf32> + + // CHECK: "tosa.mul"(%arg0, %arg1) {shift = 0 : i32} : (tensor<4xf32>, tensor) -> tensor<4xf32> + %3 = "tosa.mul"(%arg0, %arg1) { shift = 0 : i32 }: (tensor<4xf32>, tensor) -> tensor<*xf32> + + // CHECK: "tosa.pow"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<4xf32> + %4 = "tosa.pow"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<*xf32> + + // CHECK: "tosa.sub"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<4xf32> + %5 = "tosa.sub"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<*xf32> + + // CHECK: "tosa.equal"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<4xi1> + %6 = "tosa.equal"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<*xi1> + + // CHECK: "tosa.greater"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<4xi1> + %7 = "tosa.greater"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<*xi1> + + // CHECK: "tosa.greater_equal"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<4xi1> + %8 = "tosa.greater_equal"(%arg0, %arg1) : (tensor<4xf32>, tensor) -> tensor<*xi1> + + return +} + +// ----- + +// CHECK-LABEL: @test_binary_broadcast_f32 +func @test_binary_broadcast_f32(%arg0 : tensor<4xf32>, %arg1 : tensor<1xf32>) -> () { + // CHECK: "tosa.add"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xf32> + %0 = "tosa.add"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xf32> + + // CHECK: "tosa.maximum"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xf32> + %1 = "tosa.maximum"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xf32> + + // CHECK: "tosa.minimum"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xf32> + %2 = "tosa.minimum"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xf32> + + // CHECK: "tosa.mul"(%arg0, %arg1) {shift = 0 : i32} : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xf32> + %3 = "tosa.mul"(%arg0, %arg1) { shift = 0 : i32 } : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xf32> + + // CHECK: "tosa.pow"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xf32> + %4 = "tosa.pow"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xf32> + + // CHECK: "tosa.sub"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xf32> + %5 = "tosa.sub"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xf32> + + // CHECK: "tosa.equal"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xi1> + %6 = "tosa.equal"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xi1> + + // CHECK: "tosa.greater"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xi1> + %7 = "tosa.greater"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xi1> + + // CHECK: "tosa.greater_equal"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<4xi1> + %8 = "tosa.greater_equal"(%arg0, %arg1) : (tensor<4xf32>, tensor<1xf32>) -> tensor<*xi1> + + return +} + +// ----- + +// CHECK-LABEL: @test_binary_i32 +func @test_binary_i32(%arg0 : tensor<4xi32>, %arg1 : tensor) -> () { + // CHECK: "tosa.add"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<4xi32> + %0 = "tosa.add"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.bitwise_and"(%arg0, %arg1) {shift = 0 : i32} : (tensor<4xi32>, tensor) -> tensor<4xi32> + %1 = "tosa.bitwise_and"(%arg0, %arg1) { shift = 0 : i32 }: (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.bitwise_or"(%arg0, %arg1) {shift = 0 : i32} : (tensor<4xi32>, tensor) -> tensor<4xi32> + %2 = "tosa.bitwise_or"(%arg0, %arg1) { shift = 0 : i32 }: (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.bitwise_xor"(%arg0, %arg1) {shift = 0 : i32} : (tensor<4xi32>, tensor) -> tensor<4xi32> + %3 = "tosa.bitwise_xor"(%arg0, %arg1) { shift = 0 : i32 }: (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.equal"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<4xi1> + %4 = "tosa.equal"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<*xi1> + + // CHECK: "tosa.greater"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<4xi1> + %5 = "tosa.greater"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<*xi1> + + // CHECK: "tosa.greater_equal"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<4xi1> + %6 = "tosa.greater_equal"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<*xi1> + + // CHECK: "tosa.logical_left_shift"(%arg0, %arg1) {shift = 0 : i32} : (tensor<4xi32>, tensor) -> tensor<4xi32> + %7 = "tosa.logical_left_shift"(%arg0, %arg1) { shift = 0 : i32 }: (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.logical_right_shift"(%arg0, %arg1) {shift = 0 : i32} : (tensor<4xi32>, tensor) -> tensor<4xi32> + %8 = "tosa.logical_right_shift"(%arg0, %arg1) { shift = 0 : i32 }: (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.maximum"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<4xi32> + %9 = "tosa.maximum"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.minimum"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<4xi32> + %10 = "tosa.minimum"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.mul"(%arg0, %arg1) {shift = 0 : i32} : (tensor<4xi32>, tensor) -> tensor<4xi32> + %11 = "tosa.mul"(%arg0, %arg1) { shift = 0 : i32 }: (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.pow"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<4xi32> + %12 = "tosa.pow"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<*xi32> + + // CHECK: "tosa.sub"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<4xi32> + %13 = "tosa.sub"(%arg0, %arg1) : (tensor<4xi32>, tensor) -> tensor<*xi32> + + return +} + +// ----- + +// CHECK-LABEL: @test_binary_i1 +func @test_binary_i1(%arg0 : tensor<4xi1>, %arg1 : tensor) -> () { + // CHECK "tosa.logical_and"(%arg0, %arg1) : (tensor<4xi1>, tensor) -> tensor<4xi1> + %0 = "tosa.logical_and"(%arg0, %arg1): (tensor<4xi1>, tensor) -> tensor<*xi1> + + // CHECK "tosa.logical_or"(%arg0, %arg1) : (tensor<4xi1>, tensor) -> tensor<4xi1> + %1 = "tosa.logical_or"(%arg0, %arg1): (tensor<4xi1>, tensor) -> tensor<*xi1> + + // CHECK "tosa.logical_xor"(%arg0, %arg1) : (tensor<4xi1>, tensor) -> tensor<*4i1> + %2 = "tosa.logical_xor"(%arg0, %arg1): (tensor<4xi1>, tensor) -> tensor<*xi1> + + return +} + +// ----- + +// CHECK-LABEL: @test_select_i32 +func @test_select_i32(%arg0 : tensor<4xi1>, %arg1 : tensor, %arg2 : tensor<4xi32>) -> () { + // CHECK: "tosa.select"(%arg0, %arg1, %arg2) : (tensor<4xi1>, tensor, tensor<4xi32>) -> tensor<4xi32> + %0 = "tosa.select"(%arg0, %arg1, %arg2): (tensor<4xi1>, tensor, tensor<4xi32>) -> tensor<*xi32> + + return +} + +// ----- + +func @test_static_reshape(%arg0 : tensor<4x4xi32>) -> () { + // CHECK: "tosa.reshape"(%arg0) {new_shape = [16]} : (tensor<4x4xi32>) -> tensor<16xi32> + %0 = "tosa.reshape"(%arg0) {new_shape = [16]} : (tensor<4x4xi32>) -> tensor + + // CHECK: "tosa.reshape"(%arg0) {new_shape = [-1]} : (tensor<4x4xi32>) -> tensor<16xi32> + %1 = "tosa.reshape"(%arg0) {new_shape = [-1]} : (tensor<4x4xi32>) -> tensor + + // CHECK: "tosa.reshape"(%arg0) {new_shape = [2, -1]} : (tensor<4x4xi32>) -> tensor<2x8xi32> + %2 = "tosa.reshape"(%arg0) {new_shape = [2, -1]} : (tensor<4x4xi32>) -> tensor + + return +} +// ----- + +func @test_dynamic_reshape(%arg0 : tensor<4x?xi32>) -> () { + // CHECK: %0 = "tosa.reshape"(%arg0) {new_shape = [16]} : (tensor<4x?xi32>) -> tensor<16xi32> + %0 = "tosa.reshape"(%arg0) {new_shape = [16]} : (tensor<4x?xi32>) -> tensor + + // CHECK: %1 = "tosa.reshape"(%arg0) {new_shape = [-1]} : (tensor<4x?xi32>) -> tensor + %1 = "tosa.reshape"(%arg0) {new_shape = [-1]} : (tensor<4x?xi32>) -> tensor + + // CHECK: %2 = "tosa.reshape"(%arg0) {new_shape = [2, -1]} : (tensor<4x?xi32>) -> tensor<2x?xi32> + %2 = "tosa.reshape"(%arg0) {new_shape = [2, -1]} : (tensor<4x?xi32>) -> tensor + + return +} + diff --git a/mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp b/mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp index a9bb40c76db5a..99bf14b6b59fd 100644 --- a/mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp +++ b/mlir/test/lib/Dialect/Tosa/TosaTestPasses.cpp @@ -11,7 +11,8 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/Dialect/Tosa/IR//TosaOps.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Transforms/PassDetail.h" #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Tosa/Utils/QuantUtils.h" From e59f02216f1c6972925c5ef0f1df6d434c652c69 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Thu, 1 Jul 2021 13:32:07 -0700 Subject: [PATCH 476/619] [GlobalISel] Translate <1 x N> getelementptrs to scalar G_PTR_ADDs In `IRTranslator::translateGetElementPtr`, when we run into a vector gep with some scalar operands, we try to normalize those operands using `buildSplatVector`. This is fine except for when the getelementptr has a <1 x N> type. In that case it is treated as a scalar. If we run into one of these then every call to ``` // With VectorWidth = 1 LLT::fixed_vector(VectorWidth, PtrTy) ``` will assert. Here's an example (equivalent to the added testcase): https://godbolt.org/z/hGsTnMYdW To get around this, this patch adds a variable, `WantSplatVector`, which is true when our vector type ought to actually be represented using a vector. When it's false, we'll translate as a scalar. This checks if `VectorWidth > 1`. This fixes this bug: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=35496 Differential Revision: https://reviews.llvm.org/D105316 --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 13 ++++-- .../irtranslator-one-by-n-vector-ptr-add.ll | 42 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index e791232e74f3d..73b763710fdff 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1472,12 +1472,19 @@ bool IRTranslator::translateGetElementPtr(const User &U, // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = 0; - if (auto *VT = dyn_cast(U.getType())) + + // True if we should use a splat vector; using VectorWidth alone is not + // sufficient. + bool WantSplatVector = false; + if (auto *VT = dyn_cast(U.getType())) { VectorWidth = cast(VT)->getNumElements(); + // We don't produce 1 x N vectors; those are treated as scalars. + WantSplatVector = VectorWidth > 1; + } // We might need to splat the base pointer into a vector if the offsets // are vectors. - if (VectorWidth && !PtrTy.isVector()) { + if (WantSplatVector && !PtrTy.isVector()) { BaseReg = MIRBuilder .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg) @@ -1516,7 +1523,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, Register IdxReg = getOrCreateVReg(*Idx); LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { - if (!IdxTy.isVector() && VectorWidth) { + if (!IdxTy.isVector() && WantSplatVector) { IdxReg = MIRBuilder.buildSplatVector( OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll new file mode 100644 index 0000000000000..849fb01637855 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -global-isel -mtriple aarch64 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +; Make sure we treat <1 x N> getelementptrs like scalar getelementptrs. + +; We should not create a splat vector for the non-vector index on this +; getelementptr. The entire getelementptr should be translated to a scalar +; G_PTR_ADD. +define <1 x i8*> @one_elt_vector_ptr_add_non_vector_idx(<1 x i8*> %vec) { + ; CHECK-LABEL: name: one_elt_vector_ptr_add_non_vector_idx + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) + ; CHECK: $d0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $d0 + %ptr_add = getelementptr i8, <1 x i8*> %vec, <1 x i32> + ret <1 x i8*> %ptr_add +} + +; We should not create a splat vector for the non-vector pointer on this +; getelementptr. The entire getelementptr should be translated to a scalar +; G_PTR_ADD. +define <1 x i8*> @one_elt_vector_ptr_add_non_vector_ptr(i8* %vec) { + ; CHECK-LABEL: name: one_elt_vector_ptr_add_non_vector_ptr + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) + ; CHECK: $d0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $d0 + %ptr_add = getelementptr i8, i8* %vec, <1 x i32> + ret <1 x i8*> %ptr_add +} From 65eb4028ad0322115ff5420499806db79a27289e Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 1 Jul 2021 16:35:49 -0700 Subject: [PATCH 477/619] [mlir][tosa] Added missing includes on PassDetails.h Includes were missing in the PassDetails.h that cause downstream failures on TOSA passes. Differential Revision: https://reviews.llvm.org/D105323 --- mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h b/mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h index 004c9cee251e5..1d7ad62cfd24f 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h @@ -9,6 +9,8 @@ #ifndef MLIR_DIALECT_TOSA_TRANSFORMS_PASSDETAIL_H #define MLIR_DIALECT_TOSA_TRANSFORMS_PASSDETAIL_H +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Pass/Pass.h" namespace mlir { From 76dd98ec75ce70c1a2012e543e50709d646a6d8d Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Thu, 1 Jul 2021 17:02:45 -0700 Subject: [PATCH 478/619] Precommit test cases in https://reviews.llvm.org/D104953 --- llvm/test/Transforms/ObjCARC/code-motion.ll | 90 +++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/llvm/test/Transforms/ObjCARC/code-motion.ll b/llvm/test/Transforms/ObjCARC/code-motion.ll index 7f254e6980463..305a27c203c27 100644 --- a/llvm/test/Transforms/ObjCARC/code-motion.ll +++ b/llvm/test/Transforms/ObjCARC/code-motion.ll @@ -2,6 +2,7 @@ declare void @alterRefCount() declare void @use(i8*) +declare void @readOnlyFunc(i8*, i8*) @g0 = global i8* null, align 8 @@ -38,10 +39,99 @@ define void @test2() { ret void } +; ARC optimizer shouldn't reverse the order of retains and releases in +; if.then in @test3 and @test4. + +define void @test3(i8* %obj, i1 %cond) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @readOnlyFunc(i8* [[OBJ:%.*]], i8* null) +; CHECK-NEXT: call void @llvm.objc.release(i8* [[OBJ]]) {{.*}}, !clang.imprecise_release !2 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 1, 2 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i8* @llvm.objc.retain(i8* [[OBJ]]) +; CHECK-NEXT: call void @alterRefCount() +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @llvm.objc.retain(i8* [[OBJ]]) +; CHECK-NEXT: call void @alterRefCount() +; CHECK-NEXT: call void @use(i8* [[OBJ]]) +; CHECK-NEXT: call void @llvm.objc.release(i8* [[OBJ]]) {{.*}}, !clang.imprecise_release !2 +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: ret void +; + %v0 = call i8* @llvm.objc.retain(i8* %obj) + br i1 %cond, label %if.then, label %if.else + +if.then: + call void @readOnlyFunc(i8* %obj, i8* null) #0 + add i32 1, 2 + call void @alterRefCount() + br label %join + +if.else: + call void @alterRefCount() + call void @use(i8* %obj) + br label %join + +join: + call void @llvm.objc.release(i8* %obj), !clang.imprecise_release !9 + ret void +} + +define void @test4(i8* %obj0, i8* %obj1, i1 %cond) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @readOnlyFunc(i8* [[OBJ0:%.*]], i8* [[OBJ1:%.*]]) +; CHECK-NEXT: call void @llvm.objc.release(i8* [[OBJ1]]) {{.*}}, !clang.imprecise_release !2 +; CHECK-NEXT: call void @llvm.objc.release(i8* [[OBJ0]]) {{.*}}, !clang.imprecise_release !2 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 1, 2 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i8* @llvm.objc.retain(i8* [[OBJ1]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @llvm.objc.retain(i8* [[OBJ0]]) +; CHECK-NEXT: call void @alterRefCount() +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[TMP4:%.*]] = tail call i8* @llvm.objc.retain(i8* [[OBJ1]]) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i8* @llvm.objc.retain(i8* [[OBJ0]]) +; CHECK-NEXT: call void @alterRefCount() +; CHECK-NEXT: call void @use(i8* [[OBJ0]]) +; CHECK-NEXT: call void @llvm.objc.release(i8* [[OBJ0]]) {{.*}}, !clang.imprecise_release !2 +; CHECK-NEXT: call void @use(i8* [[OBJ1]]) +; CHECK-NEXT: call void @llvm.objc.release(i8* [[OBJ1]]) {{.*}}, !clang.imprecise_release !2 +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: ret void +; + %v0 = call i8* @llvm.objc.retain(i8* %obj0) + %v1 = call i8* @llvm.objc.retain(i8* %obj1) + br i1 %cond, label %if.then, label %if.else + +if.then: + call void @readOnlyFunc(i8* %obj0, i8* %obj1) #0 + add i32 1, 2 + call void @alterRefCount() + br label %join + +if.else: + call void @alterRefCount() + call void @use(i8* %obj0) + call void @use(i8* %obj1) + br label %join + +join: + call void @llvm.objc.release(i8* %obj0), !clang.imprecise_release !9 + call void @llvm.objc.release(i8* %obj1), !clang.imprecise_release !9 + ret void +} + declare void @llvm.dbg.declare(metadata, metadata, metadata) declare i8* @llvm.objc.retain(i8*) local_unnamed_addr declare void @llvm.objc.release(i8*) local_unnamed_addr +attributes #0 = { readonly } + !llvm.module.flags = !{!0, !1} !0 = !{i32 2, !"Dwarf Version", i32 4} From 32a73198fc3a84364996c2d8bf2e6470d2bb98d9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 11 Jun 2021 12:02:38 -0400 Subject: [PATCH 479/619] Mips/GlobalISel: Use accurate memory LLTs --- llvm/lib/Target/Mips/MipsLegalizerInfo.cpp | 7 ++++--- llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index a3970781ccece..588b7e85c94c7 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -516,13 +516,14 @@ bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, } case Intrinsic::vacopy: { MachinePointerInfo MPO; + LLT PtrTy = LLT::pointer(0, 32); auto Tmp = - MIRBuilder.buildLoad(LLT::pointer(0, 32), MI.getOperand(2), + MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *MI.getMF()->getMachineMemOperand( - MPO, MachineMemOperand::MOLoad, 4, Align(4))); + MPO, MachineMemOperand::MOLoad, PtrTy, Align(4))); MIRBuilder.buildStore(Tmp, MI.getOperand(1), *MI.getMF()->getMachineMemOperand( - MPO, MachineMemOperand::MOStore, 4, Align(4))); + MPO, MachineMemOperand::MOStore, PtrTy, Align(4))); MI.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir index 3924d914fc62f..dfda755bff268 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/var_arg.mir @@ -72,8 +72,8 @@ body: | ; MIPS32: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.s ; MIPS32: G_STORE [[COPY]](p0), [[FRAME_INDEX3]](p0) :: (store (p0) into %ir.fmt.addr) ; MIPS32: G_VASTART [[FRAME_INDEX4]](p0) :: (store (p0) into %ir.ap1, align 1) - ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX4]](p0) :: (load (s32)) - ; MIPS32: G_STORE [[LOAD]](p0), [[FRAME_INDEX5]](p0) :: (store (s32)) + ; MIPS32: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX4]](p0) :: (load (p0)) + ; MIPS32: G_STORE [[LOAD]](p0), [[FRAME_INDEX5]](p0) :: (store (p0)) ; MIPS32: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (load (p0) from %ir.aq) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[LOAD1]], [[C]](s32) From 6aaaeacd3d968885ef65d30283b62945ce7e6ce0 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 1 Jul 2021 17:04:37 -0700 Subject: [PATCH 480/619] [mlir][tosa] Include TosaDialect as include in tosa PassDetail.h Tosa's PassDetail.h may be used in non-TOSA transforms. Include TosaDialect to avoid transient dependency. Differential Revision: https://reviews.llvm.org/D105324 --- mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h b/mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h index 1d7ad62cfd24f..d7084452bd0e0 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h @@ -11,6 +11,7 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Pass/Pass.h" namespace mlir { From 06ac83fcac098441be4b5cbd635453706acadc98 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 1 Jul 2021 17:44:52 -0700 Subject: [PATCH 481/619] [mlir][tosa] Update Bazel files for TOSA pass changes There were some missing bazel dependencies for the Tosa dialect. Added these deps. Reviewed By: GMNGeoffrey Differential Revision: https://reviews.llvm.org/D105326 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 3 +++ utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 1 + 2 files changed, 4 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 1969e77ea6bd7..b58be676141cf 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6086,6 +6086,7 @@ td_library( name = "TosaDialectTdFiles", srcs = glob(["include/mlir/Dialect/Tosa/IR/*.td"]), deps = [ + ":InferTypeOpInterfaceTdFiles", ":LoopLikeInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", @@ -6182,6 +6183,7 @@ cc_library( deps = [ ":Dialect", ":IR", + ":InferTypeOpInterface", ":LoopLikeInterface", ":Pass", ":QuantOps", @@ -6260,6 +6262,7 @@ cc_library( "lib/Conversion/TosaToStandard", ], deps = [ + ":Analysis", ":ConversionPassIncGen", ":IR", ":Pass", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index b8f590a1b9841..8015cdae2ae20 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -517,6 +517,7 @@ cc_library( "//mlir:IR", "//mlir:Pass", "//mlir:StandardOps", + "//mlir:TensorDialect", "//mlir:TosaDialect", "//mlir:Transforms", ], From e895a670f8bceb235802e617bba34a0572fd9f49 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 2 Jul 2021 09:44:41 +0900 Subject: [PATCH 482/619] [mlir] Move BufferizeDimOp to Tensor/Transforms/Bufferize.cpp Differential Revision: https://reviews.llvm.org/D105256 --- .../StandardOps/Transforms/Bufferize.cpp | 17 ++-------------- .../Dialect/Tensor/Transforms/Bufferize.cpp | 20 +++++++++++++++++-- mlir/test/Dialect/Standard/bufferize.mlir | 11 ---------- mlir/test/Dialect/Tensor/bufferize.mlir | 14 ++++++++++++- 4 files changed, 33 insertions(+), 29 deletions(-) diff --git a/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp index 14b521067ef66..06f6c1251c90a 100644 --- a/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp @@ -23,19 +23,6 @@ using namespace mlir; namespace { -class BufferizeDimOp : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tensor::DimOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - tensor::DimOp::Adaptor adaptor(operands); - rewriter.replaceOpWithNewOp(op, adaptor.source(), - adaptor.index()); - return success(); - } -}; - class BufferizeIndexCastOp : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -70,8 +57,8 @@ class BufferizeSelectOp : public OpConversionPattern { void mlir::populateStdBufferizePatterns(BufferizeTypeConverter &typeConverter, RewritePatternSet &patterns) { - patterns.add( - typeConverter, patterns.getContext()); + patterns.add(typeConverter, + patterns.getContext()); } namespace { diff --git a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp index d772ae7d4e93b..f9faba08cf9f2 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp @@ -35,6 +35,21 @@ class BufferizeCastOp : public OpConversionPattern { }; } // namespace +namespace { +class BufferizeDimOp : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tensor::DimOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + tensor::DimOp::Adaptor adaptor(operands); + rewriter.replaceOpWithNewOp(op, adaptor.source(), + adaptor.index()); + return success(); + } +}; +} // namespace + namespace { class BufferizeExtractOp : public OpConversionPattern { public: @@ -139,8 +154,9 @@ class BufferizeGenerateOp : public OpConversionPattern { void mlir::populateTensorBufferizePatterns( BufferizeTypeConverter &typeConverter, RewritePatternSet &patterns) { - patterns.add(typeConverter, patterns.getContext()); + patterns.add( + typeConverter, patterns.getContext()); } namespace { diff --git a/mlir/test/Dialect/Standard/bufferize.mlir b/mlir/test/Dialect/Standard/bufferize.mlir index c90f331edea54..3d75423951854 100644 --- a/mlir/test/Dialect/Standard/bufferize.mlir +++ b/mlir/test/Dialect/Standard/bufferize.mlir @@ -1,16 +1,5 @@ // RUN: mlir-opt %s -std-bufferize | FileCheck %s -// CHECK-LABEL: func @dim( -// CHECK-SAME: %[[TENSOR:.*]]: tensor, -// CHECK-SAME: %[[INDEX:.*]]: index) -> index { -// CHECK: %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref -// CHECK: %[[EXTENT:.*]] = memref.dim %[[MEMREF]], %[[INDEX]] : memref -// CHECK: return %[[EXTENT]] : index -func @dim(%arg0: tensor, %arg1: index) -> index { - %0 = tensor.dim %arg0, %arg1 : tensor - return %0 : index -} - // CHECK-LABEL: func @select( // CHECK-SAME: %[[PRED:.*]]: i1, // CHECK-SAME: %[[TRUE_VAL:.*]]: tensor, diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir index 819c16fcb873e..e5ffbe090f616 100644 --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -1,5 +1,16 @@ // RUN: mlir-opt %s -tensor-bufferize | FileCheck %s +// CHECK-LABEL: func @dim( +// CHECK-SAME: %[[TENSOR:.*]]: tensor, +// CHECK-SAME: %[[INDEX:.*]]: index) -> index { +// CHECK: %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref +// CHECK: %[[EXTENT:.*]] = memref.dim %[[MEMREF]], %[[INDEX]] : memref +// CHECK: return %[[EXTENT]] : index +func @dim(%arg0: tensor, %arg1: index) -> index { + %0 = tensor.dim %arg0, %arg1 : tensor + return %0 : index +} + // CHECK-LABEL: func @tensor.cast( // CHECK-SAME: %[[TENSOR:.*]]: tensor) -> tensor<2xindex> { // CHECK: %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] @@ -67,7 +78,8 @@ func @tensor.from_elements(%arg0: index, %arg1: index) -> tensor<2xindex> { // CHECK: %[[C0:.*]] = constant 0 : index // CHECK: %[[C1:.*]] = constant 1 : index // CHECK: scf.parallel (%[[I:.*]]) = (%[[C0]]) to (%[[DYNAMIC_EXTENT]]) step (%[[C1]]) { -// CHECK: %[[ELEM:.*]] = tensor.dim %[[ARG]], %[[I]] : tensor<*xf32> +// CHECK: %[[CASTED:.*]] = memref.buffer_cast %[[ARG]] : memref<*xf32> +// CHECK: %[[ELEM:.*]] = memref.dim %[[CASTED]], %[[I]] : memref<*xf32> // CHECK: store %[[ELEM]], %[[MEMREF]][%[[I]]] : memref // CHECK: scf.yield // CHECK: } From 3a11528d97a788781de82f939f502abe7fbd729d Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Thu, 1 Jul 2021 20:33:42 -0400 Subject: [PATCH 483/619] [lld-macho] Move ICF earlier to avoid emitting redundant binds This is a pretty big refactoring diff, so here are the motivations: Previously, ICF ran after scanRelocations(), where we emitting bind/rebase opcodes etc. So we had a bunch of redundant leftovers after ICF. Having ICF run before Writer seems like a better design, and is what LLD-ELF does, so this diff refactors it accordingly. However, ICF had two dependencies on things occurring in Writer: 1) it needs literals to be deduplicated beforehand and 2) it needs to know which functions have unwind info, which was being handled by `UnwindInfoSection::prepareRelocations()`. In order to do literal deduplication earlier, we need to add literal input sections to their corresponding output sections. So instead of putting all input sections into the big `inputSections` vector, and then filtering them by type later on, I've changed things so that literal sections get added directly to their output sections during the 'gather' phase. Likewise for compact unwind sections -- they get added directly to the UnwindInfoSection now. This latter change is not strictly necessary, but makes it easier for ICF to determine which functions have unwind info. Adding literal sections directly to their output sections means that we can no longer determine `inputOrder` from iterating over `inputSections`. Instead, we store that order explicitly on InputSection. Bloating the size of InputSection for this purpose would be unfortunate -- but LLD-ELF has already solved this problem: it reuses `outSecOff` to store this order value. One downside of this refactor is that we now make an additional pass over the unwind info relocations to figure out which functions have unwind info, since want to know that before `processRelocations()`. I've made sure to run that extra loop only if ICF is enabled, so there should be no overhead in non-optimizing runs of the linker. The upside of all this is that the `inputSections` vector now contains only ConcatInputSections that are destined for ConcatOutputSections, so we can clean up a bunch of code that just existed to filter out other elements from that vector. I will test for the lack of redundant binds/rebases in the upcoming cfstring deduplication diff. While binds/rebases can also happen in the regular `.text` section, they're more common in `.data` sections, so it seems more natural to test it that way. This change is perf-neutral when linking chromium_framework. Reviewed By: oontvoo Differential Revision: https://reviews.llvm.org/D105044 --- lld/MachO/ConcatOutputSection.cpp | 9 --- lld/MachO/ConcatOutputSection.h | 1 - lld/MachO/Driver.cpp | 66 +++++++++++++---- lld/MachO/ICF.cpp | 78 ++++++++++++++++++++ lld/MachO/ICF.h | 21 +----- lld/MachO/InputSection.cpp | 12 +-- lld/MachO/InputSection.h | 9 ++- lld/MachO/MarkLive.cpp | 58 +++++++-------- lld/MachO/SyntheticSections.cpp | 72 +++++++++--------- lld/MachO/SyntheticSections.h | 3 + lld/MachO/UnwindInfoSection.cpp | 32 +++++--- lld/MachO/UnwindInfoSection.h | 20 ++--- lld/MachO/Writer.cpp | 119 ++++++------------------------ 13 files changed, 258 insertions(+), 242 deletions(-) diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index ca4ab2d2381ae..1c3c055b89309 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -355,12 +355,3 @@ void ConcatOutputSection::finalizeFlags(InputSection *input) { break; } } - -void ConcatOutputSection::eraseOmittedInputSections() { - // Remove the duplicates from inputs - inputs.erase(std::remove_if(inputs.begin(), inputs.end(), - [](const ConcatInputSection *isec) -> bool { - return isec->shouldOmitFromOutput(); - }), - inputs.end()); -} diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h index f8332f6410d44..531983cd9c13e 100644 --- a/lld/MachO/ConcatOutputSection.h +++ b/lld/MachO/ConcatOutputSection.h @@ -40,7 +40,6 @@ class ConcatOutputSection final : public OutputSection { void finalize() override; bool needsThunks() const; uint64_t estimateStubsInRangeVA(size_t callIdx) const; - void eraseOmittedInputSections(); void writeTo(uint8_t *buf) const override; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 23b505d7b9477..a64d34ba5689b 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -8,6 +8,7 @@ #include "Driver.h" #include "Config.h" +#include "ICF.h" #include "InputFiles.h" #include "LTO.h" #include "MarkLive.h" @@ -18,6 +19,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "UnwindInfoSection.h" #include "Writer.h" #include "lld/Common/Args.h" @@ -983,6 +985,48 @@ void createFiles(const InputArgList &args) { } } +static void gatherInputSections() { + TimeTraceScope timeScope("Gathering input sections"); + int inputOrder = 0; + for (const InputFile *file : inputFiles) { + for (const SubsectionMap &map : file->subsections) { + for (const SubsectionEntry &entry : map) { + if (auto *isec = dyn_cast(entry.isec)) { + if (isec->isCoalescedWeak()) + continue; + if (isec->segname == segment_names::ld) { + assert(isec->name == section_names::compactUnwind); + in.unwindInfo->addInput(isec); + continue; + } + isec->outSecOff = inputOrder++; + inputSections.push_back(isec); + } else if (auto *isec = dyn_cast(entry.isec)) { + if (in.cStringSection->inputOrder == UnspecifiedInputOrder) + in.cStringSection->inputOrder = inputOrder++; + in.cStringSection->addInput(isec); + } else if (auto *isec = dyn_cast(entry.isec)) { + if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) + in.wordLiteralSection->inputOrder = inputOrder++; + in.wordLiteralSection->addInput(isec); + } else { + llvm_unreachable("unexpected input section kind"); + } + } + } + } + assert(inputOrder <= UnspecifiedInputOrder); +} + +static void foldIdenticalLiterals() { + // We always create a cStringSection, regardless of whether dedupLiterals is + // true. If it isn't, we simply create a non-deduplicating CStringSection. + // Either way, we must unconditionally finalize it here. + in.cStringSection->finalizeContents(); + if (in.wordLiteralSection) + in.wordLiteralSection->finalizeContents(); +} + bool macho::link(ArrayRef argsArr, bool canExitEarly, raw_ostream &stdoutOS, raw_ostream &stderrOS) { lld::stdoutOS = &stdoutOS; @@ -1344,25 +1388,17 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, inputFiles.insert(make(*buffer, segName, sectName)); } - { - TimeTraceScope timeScope("Gathering input sections"); - // Gather all InputSections into one vector. - for (const InputFile *file : inputFiles) { - for (const SubsectionMap &map : file->subsections) { - for (const SubsectionEntry &entry : map) { - if (auto concatIsec = dyn_cast(entry.isec)) - if (concatIsec->isCoalescedWeak()) - continue; - inputSections.push_back(entry.isec); - } - } - } - assert(inputSections.size() < UnspecifiedInputOrder); - } + gatherInputSections(); if (config->deadStrip) markLive(); + // ICF assumes that all literals have been folded already, so we must run + // foldIdenticalLiterals before foldIdenticalSections. + foldIdenticalLiterals(); + if (config->icfLevel != ICFLevel::none) + foldIdenticalSections(); + // Write to an output file. if (target->wordSize == 8) writeResult(); diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index 4ff8c578d56c2..c1b8325d2a6c8 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -10,7 +10,10 @@ #include "ConcatOutputSection.h" #include "InputSection.h" #include "Symbols.h" +#include "UnwindInfoSection.h" + #include "llvm/Support/Parallel.h" +#include "llvm/Support/TimeProfiler.h" #include @@ -18,6 +21,25 @@ using namespace llvm; using namespace lld; using namespace lld::macho; +class ICF { +public: + ICF(std::vector &inputs); + + void run(); + void segregate(size_t begin, size_t end, + std::function + equals); + size_t findBoundary(size_t begin, size_t end); + void forEachClassRange(size_t begin, size_t end, + std::function func); + void forEachClass(std::function func); + + // ICF needs a copy of the inputs vector because its equivalence-class + // segregation algorithm destroys the proper sequence. + std::vector icfInputs; +}; + ICF::ICF(std::vector &inputs) { icfInputs.assign(inputs.begin(), inputs.end()); } @@ -276,3 +298,59 @@ void ICF::segregate( begin = mid; } } + +template +DenseSet findFunctionsWithUnwindInfo() { + DenseSet result; + for (ConcatInputSection *isec : in.unwindInfo->getInputs()) { + for (size_t i = 0; i < isec->relocs.size(); ++i) { + Reloc &r = isec->relocs[i]; + assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); + if (r.offset % sizeof(CompactUnwindEntry) != + offsetof(CompactUnwindEntry, functionAddress)) + continue; + result.insert(r.referent.get()); + } + } + return result; +} + +void macho::foldIdenticalSections() { + TimeTraceScope timeScope("Fold Identical Code Sections"); + // The ICF equivalence-class segregation algorithm relies on pre-computed + // hashes of InputSection::data for the ConcatOutputSection::inputs and all + // sections referenced by their relocs. We could recursively traverse the + // relocs to find every referenced InputSection, but that precludes easy + // parallelization. Therefore, we hash every InputSection here where we have + // them all accessible as simple vectors. + std::vector codeSections; + + // ICF can't fold functions with unwind info + DenseSet functionsWithUnwindInfo = + target->wordSize == 8 ? findFunctionsWithUnwindInfo() + : findFunctionsWithUnwindInfo(); + + // If an InputSection is ineligible for ICF, we give it a unique ID to force + // it into an unfoldable singleton equivalence class. Begin the unique-ID + // space at inputSections.size(), so that it will never intersect with + // equivalence-class IDs which begin at 0. Since hashes & unique IDs never + // coexist with equivalence-class IDs, this is not necessary, but might help + // someone keep the numbers straight in case we ever need to debug the + // ICF::segregate() + uint64_t icfUniqueID = inputSections.size(); + for (ConcatInputSection *isec : inputSections) { + bool isHashable = isCodeSection(isec) && !isec->shouldOmitFromOutput() && + !functionsWithUnwindInfo.contains(isec) && + isec->isHashableForICF(); + if (isHashable) { + codeSections.push_back(isec); + } else { + isec->icfEqClass[0] = ++icfUniqueID; + } + } + parallelForEach(codeSections, + [](ConcatInputSection *isec) { isec->hashForICF(); }); + // Now that every input section is either hashed or marked as unique, run the + // segregation algorithm to detect foldable subsections. + ICF(codeSections).run(); +} diff --git a/lld/MachO/ICF.h b/lld/MachO/ICF.h index 767630f0d7eb1..9500a946601ea 100644 --- a/lld/MachO/ICF.h +++ b/lld/MachO/ICF.h @@ -15,26 +15,7 @@ namespace lld { namespace macho { -class ConcatInputSection; - -class ICF { -public: - ICF(std::vector &inputs); - - void run(); - void segregate(size_t begin, size_t end, - std::function - equals); - size_t findBoundary(size_t begin, size_t end); - void forEachClassRange(size_t begin, size_t end, - std::function func); - void forEachClass(std::function func); - - // ICF needs a copy of the inputs vector because its equivalence-class - // segregation algorithm destroys the proper sequence. - std::vector icfInputs; -}; +void foldIdenticalSections(); } // namespace macho } // namespace lld diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 740eea6d8fd41..4ad790377676f 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -25,7 +25,7 @@ using namespace llvm::support; using namespace lld; using namespace lld::macho; -std::vector macho::inputSections; +std::vector macho::inputSections; uint64_t InputSection::getFileSize() const { return isZeroFill(flags) ? 0 : getSize(); @@ -48,16 +48,10 @@ static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { // ICF needs to hash any section that might potentially be duplicated so // that it can match on content rather than identity. -bool ConcatInputSection::isHashableForICF(bool isText) const { - if (shouldOmitFromOutput()) - return false; +bool ConcatInputSection::isHashableForICF() const { switch (sectionType(flags)) { case S_REGULAR: - if (isText) - return !hasPersonality; - // One might hope that we could hash __TEXT,__const subsections to fold - // references to duplicated values, but alas, many tests fail. - return false; + return true; case S_CSTRING_LITERALS: case S_4BYTE_LITERALS: case S_8BYTE_LITERALS: diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 9eea39105f147..efa175e0bfc7f 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -95,7 +95,7 @@ class ConcatInputSection final : public InputSection { void markLive(uint64_t off) override { live = true; } bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; } bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } - bool isHashableForICF(bool isText) const; + bool isHashableForICF() const; void hashForICF(); void writeTo(uint8_t *buf); @@ -108,8 +108,6 @@ class ConcatInputSection final : public InputSection { return isec->kind() == ConcatKind; } - // ICF can't fold functions with LSDA+personality - bool hasPersonality = false; // Points to the surviving section after this one is folded by ICF InputSection *replacement = nullptr; // Equivalence-class ID for ICF @@ -124,6 +122,9 @@ class ConcatInputSection final : public InputSection { bool live = !config->deadStrip; // How many symbols refer to this InputSection. uint32_t numRefs = 0; + // This variable has two usages. Initially, it represents the input order. + // After assignAddresses is called, it represents the offset from the + // beginning of the output section this section was assigned to. uint64_t outSecOff = 0; }; @@ -256,7 +257,7 @@ inline bool isWordLiteralSection(uint32_t flags) { bool isCodeSection(const InputSection *); -extern std::vector inputSections; +extern std::vector inputSections; namespace section_names { diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp index a63f57d6d0f7c..7962ff9b094a2 100644 --- a/lld/MachO/MarkLive.cpp +++ b/lld/MachO/MarkLive.cpp @@ -101,10 +101,9 @@ void markLive() { if (auto *stubBinder = dyn_cast_or_null(symtab->find("dyld_stub_binder"))) addSym(stubBinder); - for (InputSection *isec : inputSections) { + for (ConcatInputSection *isec : inputSections) { // Sections marked no_dead_strip if (isec->flags & S_ATTR_NO_DEAD_STRIP) { - assert(isa(isec)); enqueue(isec, 0); continue; } @@ -112,37 +111,33 @@ void markLive() { // mod_init_funcs, mod_term_funcs sections if (sectionType(isec->flags) == S_MOD_INIT_FUNC_POINTERS || sectionType(isec->flags) == S_MOD_TERM_FUNC_POINTERS) { - assert(isa(isec)); enqueue(isec, 0); continue; } + } - // Dead strip runs before UnwindInfoSection handling so we need to keep - // __LD,__compact_unwind alive here. - // But that section contains absolute references to __TEXT,__text and - // keeps most code alive due to that. So we can't just enqueue() the - // section: We must skip the relocations for the functionAddress - // in each CompactUnwindEntry. - // See also scanEhFrameSection() in lld/ELF/MarkLive.cpp. - if (isec->segname == segment_names::ld && - isec->name == section_names::compactUnwind) { - auto concatIsec = cast(isec); - concatIsec->live = true; - const int compactUnwindEntrySize = - target->wordSize == 8 ? sizeof(CompactUnwindEntry) - : sizeof(CompactUnwindEntry); - for (const Reloc &r : isec->relocs) { - // This is the relocation for the address of the function itself. - // Ignore it, else these would keep everything alive. - if (r.offset % compactUnwindEntrySize == 0) - continue; + // Dead strip runs before UnwindInfoSection handling so we need to keep + // __LD,__compact_unwind alive here. + // But that section contains absolute references to __TEXT,__text and + // keeps most code alive due to that. So we can't just enqueue() the + // section: We must skip the relocations for the functionAddress + // in each CompactUnwindEntry. + // See also scanEhFrameSection() in lld/ELF/MarkLive.cpp. + for (ConcatInputSection *isec : in.unwindInfo->getInputs()) { + isec->live = true; + const int compactUnwindEntrySize = + target->wordSize == 8 ? sizeof(CompactUnwindEntry) + : sizeof(CompactUnwindEntry); + for (const Reloc &r : isec->relocs) { + // This is the relocation for the address of the function itself. + // Ignore it, else these would keep everything alive. + if (r.offset % compactUnwindEntrySize == 0) + continue; - if (auto *s = r.referent.dyn_cast()) - addSym(s); - else - enqueue(r.referent.get(), r.addend); - } - continue; + if (auto *s = r.referent.dyn_cast()) + addSym(s); + else + enqueue(r.referent.get(), r.addend); } } @@ -163,13 +158,10 @@ void markLive() { // S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live section. // Process them in a second pass. - for (InputSection *isec : inputSections) { - if (!isa(isec)) - continue; - auto concatIsec = cast(isec); + for (ConcatInputSection *isec : inputSections) { // FIXME: Check if copying all S_ATTR_LIVE_SUPPORT sections into a // separate vector and only walking that here is faster. - if (!(concatIsec->flags & S_ATTR_LIVE_SUPPORT) || concatIsec->live) + if (!(isec->flags & S_ATTR_LIVE_SUPPORT) || isec->live) continue; for (const Reloc &r : isec->relocs) { diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 03976ff2d6882..07406650dfd87 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -775,7 +775,7 @@ void SymtabSection::emitStabs() { } StabsEntry symStab; - symStab.sect = defined->isec->parent->index; + symStab.sect = defined->isec->canonical()->parent->index; symStab.strx = stringTableSection.addString(defined->getName()); symStab.value = defined->getVA(); @@ -900,7 +900,7 @@ template void SymtabSectionImpl::writeTo(uint8_t *buf) const { nList->n_value = defined->value; } else { nList->n_type = scope | N_SECT; - nList->n_sect = defined->isec->parent->index; + nList->n_sect = defined->isec->canonical()->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol nList->n_value = defined->getVA(); } @@ -1255,40 +1255,46 @@ WordLiteralSection::WordLiteralSection() void WordLiteralSection::addInput(WordLiteralInputSection *isec) { isec->parent = this; - // We do all processing of the InputSection here, so it will be effectively - // finalized. - isec->isFinal = true; - const uint8_t *buf = isec->data.data(); - switch (sectionType(isec->flags)) { - case S_4BYTE_LITERALS: { - for (size_t off = 0, e = isec->data.size(); off < e; off += 4) { - if (!isec->isLive(off)) - continue; - uint32_t value = *reinterpret_cast(buf + off); - literal4Map.emplace(value, literal4Map.size()); + inputs.push_back(isec); +} + +void WordLiteralSection::finalizeContents() { + for (WordLiteralInputSection *isec : inputs) { + // We do all processing of the InputSection here, so it will be effectively + // finalized. + isec->isFinal = true; + const uint8_t *buf = isec->data.data(); + switch (sectionType(isec->flags)) { + case S_4BYTE_LITERALS: { + for (size_t off = 0, e = isec->data.size(); off < e; off += 4) { + if (!isec->isLive(off)) + continue; + uint32_t value = *reinterpret_cast(buf + off); + literal4Map.emplace(value, literal4Map.size()); + } + break; } - break; - } - case S_8BYTE_LITERALS: { - for (size_t off = 0, e = isec->data.size(); off < e; off += 8) { - if (!isec->isLive(off)) - continue; - uint64_t value = *reinterpret_cast(buf + off); - literal8Map.emplace(value, literal8Map.size()); + case S_8BYTE_LITERALS: { + for (size_t off = 0, e = isec->data.size(); off < e; off += 8) { + if (!isec->isLive(off)) + continue; + uint64_t value = *reinterpret_cast(buf + off); + literal8Map.emplace(value, literal8Map.size()); + } + break; } - break; - } - case S_16BYTE_LITERALS: { - for (size_t off = 0, e = isec->data.size(); off < e; off += 16) { - if (!isec->isLive(off)) - continue; - UInt128 value = *reinterpret_cast(buf + off); - literal16Map.emplace(value, literal16Map.size()); + case S_16BYTE_LITERALS: { + for (size_t off = 0, e = isec->data.size(); off < e; off += 16) { + if (!isec->isLive(off)) + continue; + UInt128 value = *reinterpret_cast(buf + off); + literal16Map.emplace(value, literal16Map.size()); + } + break; + } + default: + llvm_unreachable("invalid literal section type"); } - break; - } - default: - llvm_unreachable("invalid literal section type"); } } diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 73d4c31534386..cf464b866d33a 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -557,6 +557,7 @@ class WordLiteralSection final : public SyntheticSection { WordLiteralSection(); void addInput(WordLiteralInputSection *); + void finalizeContents(); void writeTo(uint8_t *buf) const override; uint64_t getSize() const override { @@ -584,6 +585,8 @@ class WordLiteralSection final : public SyntheticSection { } private: + std::vector inputs; + template struct Hasher { llvm::hash_code operator()(T v) const { return llvm::hash_value(v); } }; diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index f4bd08d2f3cbc..567e18a607e2e 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -103,9 +103,11 @@ struct SecondLevelPage { EncodingMap localEncodingIndexes; }; -template class UnwindInfoSectionImpl : public UnwindInfoSection { +template +class UnwindInfoSectionImpl final : public UnwindInfoSection { public: void prepareRelocations(ConcatInputSection *) override; + void addInput(ConcatInputSection *) override; void finalize() override; void writeTo(uint8_t *buf) const override; @@ -126,6 +128,25 @@ template class UnwindInfoSectionImpl : public UnwindInfoSection { uint64_t level2PagesOffset = 0; }; +UnwindInfoSection::UnwindInfoSection() + : SyntheticSection(segment_names::text, section_names::unwindInfo) { + align = 4; + compactUnwindSection = + make(section_names::compactUnwind); +} + +void UnwindInfoSection::prepareRelocations() { + for (ConcatInputSection *isec : compactUnwindSection->inputs) + prepareRelocations(isec); +} + +template +void UnwindInfoSectionImpl::addInput(ConcatInputSection *isec) { + assert(isec->segname == segment_names::ld && + isec->name == section_names::compactUnwind); + compactUnwindSection->addInput(isec); +} + // Compact unwind relocations have different semantics, so we handle them in a // separate code path from regular relocations. First, we do not wish to add // rebase opcodes for __LD,__compact_unwind, because that section doesn't @@ -133,8 +154,6 @@ template class UnwindInfoSectionImpl : public UnwindInfoSection { // reside in the GOT and must be treated specially. template void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { - assert(isec->segname == segment_names::ld && - isec->name == section_names::compactUnwind); assert(!isec->shouldOmitFromOutput() && "__compact_unwind section should not be omitted"); @@ -150,13 +169,6 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { offsetof(CompactUnwindEntry, personality)) continue; - Reloc &rFunc = isec->relocs[++i]; - assert(r.offset == - rFunc.offset + offsetof(CompactUnwindEntry, personality)); - auto *referentIsec = - cast(rFunc.referent.get()); - referentIsec->hasPersonality = true; - if (auto *s = r.referent.dyn_cast()) { if (auto *undefined = dyn_cast(s)) { treatUndefinedSymbol(*undefined); diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h index 7ccf7a4dfde7e..11503aaed0da9 100644 --- a/lld/MachO/UnwindInfoSection.h +++ b/lld/MachO/UnwindInfoSection.h @@ -27,21 +27,21 @@ template struct CompactUnwindEntry { class UnwindInfoSection : public SyntheticSection { public: - bool isNeeded() const override { return compactUnwindSection != nullptr; } + bool isNeeded() const override { + return !compactUnwindSection->inputs.empty(); + } uint64_t getSize() const override { return unwindInfoSize; } - virtual void prepareRelocations(ConcatInputSection *) = 0; - - void setCompactUnwindSection(ConcatOutputSection *cuSection) { - compactUnwindSection = cuSection; + virtual void addInput(ConcatInputSection *) = 0; + std::vector getInputs() { + return compactUnwindSection->inputs; } + void prepareRelocations(); protected: - UnwindInfoSection() - : SyntheticSection(segment_names::text, section_names::unwindInfo) { - align = 4; - } + UnwindInfoSection(); + virtual void prepareRelocations(ConcatInputSection *) = 0; - ConcatOutputSection *compactUnwindSection = nullptr; + ConcatOutputSection *compactUnwindSection; uint64_t unwindInfoSize = 0; }; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 443cd99b73f49..7e3dc70cd18d8 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -9,7 +9,6 @@ #include "Writer.h" #include "ConcatOutputSection.h" #include "Config.h" -#include "ICF.h" #include "InputFiles.h" #include "InputSection.h" #include "MapFile.h" @@ -52,8 +51,6 @@ class Writer { void scanSymbols(); template void createOutputSections(); template void createLoadCommands(); - void foldIdenticalLiterals(); - void foldIdenticalSections(); void finalizeAddresses(); void finalizeLinkEditSegment(); void assignAddresses(OutputSegment *); @@ -592,18 +589,9 @@ static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, void Writer::scanRelocations() { TimeTraceScope timeScope("Scan relocations"); - for (InputSection *isec : inputSections) { - if (!isa(isec)) + for (ConcatInputSection *isec : inputSections) { + if (isec->shouldOmitFromOutput()) continue; - auto concatIsec = cast(isec); - - if (concatIsec->shouldOmitFromOutput()) - continue; - - if (concatIsec->segname == segment_names::ld) { - in.unwindInfo->prepareRelocations(concatIsec); - continue; - } for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) { Reloc &r = *it; @@ -621,12 +609,18 @@ void Writer::scanRelocations() { if (!isa(sym) && validateSymbolRelocation(sym, isec, r)) prepareSymbolRelocation(sym, isec, r); } else { - assert(r.referent.is()); + // Canonicalize the referent so that later accesses in Writer won't + // have to worry about it. Perhaps we should do this for Defined::isec + // too... + auto *referentIsec = r.referent.get(); + r.referent = referentIsec->canonical(); if (!r.pcrel) in.rebase->addEntry(isec, r.offset); } } } + + in.unwindInfo->prepareRelocations(); } void Writer::scanSymbols() { @@ -892,28 +886,16 @@ template void Writer::createOutputSections() { } // Then add input sections to output sections. - for (const auto &p : enumerate(inputSections)) { - InputSection *isec = p.value(); - OutputSection *osec; - if (auto *concatIsec = dyn_cast(isec)) { - if (concatIsec->shouldOmitFromOutput()) - continue; - NamePair names = maybeRenameSection({isec->segname, isec->name}); - ConcatOutputSection *&concatOsec = concatOutputSections[names]; - if (concatOsec == nullptr) - concatOsec = make(names.second); - concatOsec->addInput(concatIsec); - osec = concatOsec; - } else if (auto *cStringIsec = dyn_cast(isec)) { - in.cStringSection->addInput(cStringIsec); - osec = in.cStringSection; - } else if (auto *litIsec = dyn_cast(isec)) { - in.wordLiteralSection->addInput(litIsec); - osec = in.wordLiteralSection; - } else { - llvm_unreachable("unhandled InputSection type"); - } - osec->inputOrder = std::min(osec->inputOrder, static_cast(p.index())); + for (ConcatInputSection *isec : inputSections) { + if (isec->shouldOmitFromOutput()) + continue; + NamePair names = maybeRenameSection({isec->segname, isec->name}); + ConcatOutputSection *&osec = concatOutputSections[names]; + if (!osec) + osec = make(names.second); + osec->addInput(isec); + osec->inputOrder = + std::min(osec->inputOrder, static_cast(isec->outSecOff)); } // Once all the inputs are added, we can finalize the output section @@ -921,12 +903,8 @@ template void Writer::createOutputSections() { for (const auto &it : concatOutputSections) { StringRef segname = it.first.first; ConcatOutputSection *osec = it.second; - if (segname == segment_names::ld) { - assert(osec->name == section_names::compactUnwind); - in.unwindInfo->setCompactUnwindSection(osec); - } else { - getOrCreateOutputSegment(segname)->addOutputSection(osec); - } + assert(segname != segment_names::ld); + getOrCreateOutputSegment(segname)->addOutputSection(osec); } for (SyntheticSection *ssec : syntheticSections) { @@ -946,57 +924,6 @@ template void Writer::createOutputSections() { linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit); } -void Writer::foldIdenticalLiterals() { - if (in.cStringSection) - in.cStringSection->finalizeContents(); - // TODO: WordLiteralSection & CFStringSection should be finalized here too -} - -void Writer::foldIdenticalSections() { - if (config->icfLevel == ICFLevel::none) - return; - ConcatOutputSection *textOutputSection = concatOutputSections.lookup( - maybeRenameSection({segment_names::text, section_names::text})); - if (textOutputSection == nullptr) - return; - - TimeTraceScope timeScope("Fold Identical Code Sections"); - // The ICF equivalence-class segregation algorithm relies on pre-computed - // hashes of InputSection::data for the ConcatOutputSection::inputs and all - // sections referenced by their relocs. We could recursively traverse the - // relocs to find every referenced InputSection, but that precludes easy - // parallelization. Therefore, we hash every InputSection here where we have - // them all accessible as a simple vector. - std::vector hashable; - // If an InputSection is ineligible for ICF, we give it a unique ID to force - // it into an unfoldable singleton equivalence class. Begin the unique-ID - // space at inputSections.size(), so that it will never intersect with - // equivalence-class IDs which begin at 0. Since hashes & unique IDs never - // coexist with equivalence-class IDs, this is not necessary, but might help - // someone keep the numbers straight in case we ever need to debug the - // ICF::segregate() - uint64_t icfUniqueID = inputSections.size(); - for (InputSection *isec : inputSections) { - if (auto *concatIsec = dyn_cast(isec)) { - if (concatIsec->isHashableForICF(isec->parent == textOutputSection)) - hashable.push_back(concatIsec); - else - concatIsec->icfEqClass[0] = ++icfUniqueID; - } - } - // FIXME: hash literal sections here too? - parallelForEach(hashable, - [](ConcatInputSection *isec) { isec->hashForICF(); }); - // Now that every input section is either hashed or marked as unique, - // run the segregation algorithm to detect foldable subsections - ICF(textOutputSection->inputs).run(); - size_t oldSize = textOutputSection->inputs.size(); - textOutputSection->eraseOmittedInputSections(); - size_t newSize = textOutputSection->inputs.size(); - log("ICF kept " + Twine(newSize) + " removed " + Twine(oldSize - newSize) + - " of " + Twine(oldSize)); -} - void Writer::finalizeAddresses() { TimeTraceScope timeScope("Finalize addresses"); uint64_t pageSize = target->getPageSize(); @@ -1128,10 +1055,6 @@ template void Writer::run() { in.stubHelper->setup(); scanSymbols(); createOutputSections(); - // ICF assumes that all literals have been folded already, so we must run - // foldIdenticalLiterals before foldIdenticalSections. - foldIdenticalLiterals(); - foldIdenticalSections(); // After this point, we create no new segments; HOWEVER, we might // yet create branch-range extension thunks for architectures whose // hardware call instructions have limited range, e.g., ARM(64). From ac2dd06b91ae7264fa7d396c15c7647510ede231 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Thu, 1 Jul 2021 20:33:44 -0400 Subject: [PATCH 484/619] [lld-macho] Deduplicate CFStrings `__cfstring` is a special literal section, so instead of breaking it up at symbol boundaries, we break it up at fixed-width boundaries (since each literal is the same size). Symbols can only occur at one of those boundaries, so this is strictly more powerful than `.subsections_via_symbols`. With that in place, we then run the section through ICF. This change is about perf-neutral when linking chromium_framework. Reviewed By: #lld-macho, gkm Differential Revision: https://reviews.llvm.org/D105045 --- lld/MachO/ICF.cpp | 19 ++- lld/MachO/InputFiles.cpp | 42 ++++- lld/MachO/InputSection.cpp | 5 + lld/MachO/InputSection.h | 2 + .../CoreFoundation.tbd | 2 +- lld/test/MachO/cfstring-dedup.s | 146 ++++++++++++++++++ lld/test/MachO/invalid/cfstring.s | 19 +++ 7 files changed, 226 insertions(+), 9 deletions(-) create mode 100644 lld/test/MachO/cfstring-dedup.s create mode 100644 lld/test/MachO/invalid/cfstring.s diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index c1b8325d2a6c8..fbd7cb36514fa 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -324,6 +324,7 @@ void macho::foldIdenticalSections() { // parallelization. Therefore, we hash every InputSection here where we have // them all accessible as simple vectors. std::vector codeSections; + std::vector cfStringSections; // ICF can't fold functions with unwind info DenseSet functionsWithUnwindInfo = @@ -339,18 +340,30 @@ void macho::foldIdenticalSections() { // ICF::segregate() uint64_t icfUniqueID = inputSections.size(); for (ConcatInputSection *isec : inputSections) { - bool isHashable = isCodeSection(isec) && !isec->shouldOmitFromOutput() && + bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) && + !isec->shouldOmitFromOutput() && !functionsWithUnwindInfo.contains(isec) && isec->isHashableForICF(); if (isHashable) { - codeSections.push_back(isec); + if (isCodeSection(isec)) + codeSections.push_back(isec); + else { + assert(isCfStringSection(isec)); + cfStringSections.push_back(isec); + } } else { isec->icfEqClass[0] = ++icfUniqueID; } } - parallelForEach(codeSections, + std::vector hashable(codeSections); + hashable.insert(hashable.end(), cfStringSections.begin(), + cfStringSections.end()); + parallelForEach(hashable, [](ConcatInputSection *isec) { isec->hashForICF(); }); // Now that every input section is either hashed or marked as unique, run the // segregation algorithm to detect foldable subsections. + // We dedup cfStringSections first since code sections may refer to them, but + // not vice-versa. + ICF(cfStringSections).run(); ICF(codeSections).run(); } diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index f75c65f9370d8..6caf48232ad9b 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -281,6 +281,17 @@ void ObjFile::parseSections(ArrayRef
sections) { flags); } subsections.push_back({{0, isec}}); + } else if (config->icfLevel != ICFLevel::none && + (name == section_names::cfString && + segname == segment_names::data)) { + uint64_t literalSize = target->wordSize == 8 ? 32 : 16; + subsections.push_back({}); + SubsectionMap &subsecMap = subsections.back(); + for (uint64_t off = 0; off < data.size(); off += literalSize) + subsecMap.push_back( + {off, make(segname, name, this, + data.slice(off, literalSize), align, + flags)}); } else { auto *isec = make(segname, name, this, data, align, flags); @@ -593,22 +604,43 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, } } - // Calculate symbol sizes and create subsections by splitting the sections - // along symbol boundaries. for (size_t i = 0; i < subsections.size(); ++i) { SubsectionMap &subsecMap = subsections[i]; if (subsecMap.empty()) continue; std::vector &symbolIndices = symbolsBySection[i]; - llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { - return nList[lhs].n_value < nList[rhs].n_value; - }); uint64_t sectionAddr = sectionHeaders[i].addr; uint32_t sectionAlign = 1u << sectionHeaders[i].align; + InputSection *isec = subsecMap.back().isec; + // __cfstring has already been split into subsections during + // parseSections(), so we simply need to match Symbols to the corresponding + // subsection here. + if (config->icfLevel != ICFLevel::none && isCfStringSection(isec)) { + for (size_t j = 0; j < symbolIndices.size(); ++j) { + uint32_t symIndex = symbolIndices[j]; + const NList &sym = nList[symIndex]; + StringRef name = strtab + sym.n_strx; + uint64_t symbolOffset = sym.n_value - sectionAddr; + InputSection *isec = findContainingSubsection(subsecMap, &symbolOffset); + if (symbolOffset != 0) { + error(toString(this) + ": __cfstring contains symbol " + name + + " at misaligned offset"); + continue; + } + symbols[symIndex] = createDefined(sym, name, isec, 0, isec->getSize()); + } + continue; + } + + // Calculate symbol sizes and create subsections by splitting the sections + // along symbol boundaries. // We populate subsecMap by repeatedly splitting the last (highest address) // subsection. + llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { + return nList[lhs].n_value < nList[rhs].n_value; + }); SubsectionEntry subsecEntry = subsecMap.back(); for (size_t j = 0; j < symbolIndices.size(); ++j) { uint32_t symIndex = symbolIndices[j]; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 4ad790377676f..5f1eab349ba5a 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -232,6 +232,11 @@ bool macho::isCodeSection(const InputSection *isec) { return false; } +bool macho::isCfStringSection(const InputSection *isec) { + return isec->name == section_names::cfString && + isec->segname == segment_names::data; +} + std::string lld::toString(const InputSection *isec) { return (toString(isec->file) + ":(" + isec->name + ")").str(); } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index efa175e0bfc7f..a7d8b6e29fcf9 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -257,6 +257,8 @@ inline bool isWordLiteralSection(uint32_t flags) { bool isCodeSection(const InputSection *); +bool isCfStringSection(const InputSection *); + extern std::vector inputSections; namespace section_names { diff --git a/lld/test/MachO/Inputs/MacOSX.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation.tbd b/lld/test/MachO/Inputs/MacOSX.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation.tbd index 643dff44bde46..4faf3f2be6bbb 100644 --- a/lld/test/MachO/Inputs/MacOSX.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation.tbd +++ b/lld/test/MachO/Inputs/MacOSX.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation.tbd @@ -7,7 +7,7 @@ current-version: 0001.001.1 compatibility-version: 150 exports: - archs: [ 'x86_64' ] - symbols: [ '__CFBigNumGetInt128' ] + symbols: [ __CFBigNumGetInt128, ___CFConstantStringClassReference ] objc-classes: [ NSObject ] objc-ivars: [ NSConstantArray._count ] objc-eh-types: [ NSException ] diff --git a/lld/test/MachO/cfstring-dedup.s b/lld/test/MachO/cfstring-dedup.s new file mode 100644 index 0000000000000..1a043064b6e0d --- /dev/null +++ b/lld/test/MachO/cfstring-dedup.s @@ -0,0 +1,146 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo1.s -o %t/foo1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo2.s -o %t/foo2.o +# RUN: %lld -dylib --icf=all -framework CoreFoundation %t/foo1.o %t/foo2.o -o %t/foo +# RUN: llvm-objdump --macho --rebase --bind --syms -d %t/foo | FileCheck %s + +# CHECK: (__TEXT,__text) section +# CHECK-NEXT: _foo1: +# CHECK-NEXT: _foo2: +# CHECK-NEXT: movq _named_cfstring(%rip), %rax +# CHECK-NEXT: _foo1_utf16: +# CHECK-NEXT: movq [[#]](%rip), %rax +# CHECK-NEXT: _named_foo1: +# CHECK-NEXT: _named_foo2: +# CHECK-NEXT: movq _named_cfstring(%rip), %rax +# CHECK-NEXT: _foo2_utf16: +# CHECK-NEXT: movq [[#]](%rip), %rax + +# CHECK: SYMBOL TABLE: +# CHECK-DAG: [[#%.16x,FOO:]] g F __TEXT,__text _foo1 +# CHECK-DAG: [[#FOO]] g F __TEXT,__text _foo2 + +## Make sure we don't emit redundant bind / rebase opcodes for folded sections. +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type +# CHECK-NEXT: __DATA_CONST __cfstring {{.*}} pointer +# CHECK-NEXT: __DATA_CONST __cfstring {{.*}} pointer +# CHECK-NEXT: __DATA_CONST __cfstring {{.*}} pointer +# CHECK-EMPTY: +# CHECK-NEXT: Bind table: +# CHECK-NEXT: segment section address type addend dylib symbol +# CHECK-NEXT: __DATA_CONST __cfstring {{.*}} pointer 0 CoreFoundation ___CFConstantStringClassReference +# CHECK-NEXT: __DATA_CONST __cfstring {{.*}} pointer 0 CoreFoundation ___CFConstantStringClassReference +# CHECK-NEXT: __DATA_CONST __cfstring {{.*}} pointer 0 CoreFoundation ___CFConstantStringClassReference +# CHECK-EMPTY: + +#--- foo1.s +.cstring +L_.str: + .asciz "foo" + +.section __DATA,__cfstring +.p2align 3 +L__unnamed_cfstring_: + .quad ___CFConstantStringClassReference + .long 1992 ## utf-8 + .space 4 + .quad L_.str + .quad 3 ## strlen + +_named_cfstring: + .quad ___CFConstantStringClassReference + .long 1992 ## utf-8 + .space 4 + .quad L_.str + .quad 3 ## strlen + +.section __TEXT,__ustring +l_.str.2: + .short 102 ## f + .short 111 ## o + .short 0 ## \0 + .short 111 ## o + .short 0 ## \0 + +## FIXME: We should be able to deduplicate UTF-16 CFStrings too. +## Note that this string contains a null byte in the middle -- any dedup code +## we add should take care to handle this correctly. +## Technically, UTF-8 should support encoding null bytes too, but since we +## atomize the __cstring section at every null byte, this isn't supported. ld64 +## doesn't support it either, and clang seems to always emit a UTF-16 CFString +## if it needs to contain a null, so I think we're good here. +.section __DATA,__cfstring +.p2align 3 +L__unnamed_cfstring_.2: + .quad ___CFConstantStringClassReference + .long 2000 ## utf-16 + .space 4 + .quad l_.str.2 + .quad 4 ## strlen + +.text +.globl _foo1, _foo1_utf16, _named_foo1 +_foo1: + movq L__unnamed_cfstring_(%rip), %rax + +_foo1_utf16: + movq L__unnamed_cfstring_.2(%rip), %rax + +_named_foo1: + movq _named_cfstring(%rip), %rax + +.subsections_via_symbols + +#--- foo2.s +.cstring +L_.str: + .asciz "foo" + +.section __DATA,__cfstring +.p2align 3 +L__unnamed_cfstring_: + .quad ___CFConstantStringClassReference + .long 1992 ## utf-8 + .space 4 + .quad L_.str + .quad 3 ## strlen + +_named_cfstring: + .quad ___CFConstantStringClassReference + .long 1992 ## utf-8 + .space 4 + .quad L_.str + .quad 3 ## strlen + +.section __TEXT,__ustring + .p2align 1 +l_.str.2: + .short 102 ## f + .short 111 ## o + .short 0 ## \0 + .short 111 ## o + .short 0 ## \0 + +.section __DATA,__cfstring +.p2align 3 +L__unnamed_cfstring_.2: + .quad ___CFConstantStringClassReference + .long 2000 ## utf-16 + .space 4 + .quad l_.str.2 + .quad 4 ## strlen + +.text +.globl _foo2, _foo2_utf16, _named_foo2 +_foo2: + movq L__unnamed_cfstring_(%rip), %rax + +_foo2_utf16: + movq L__unnamed_cfstring_.2(%rip), %rax + +_named_foo2: + movq _named_cfstring(%rip), %rax + +.subsections_via_symbols diff --git a/lld/test/MachO/invalid/cfstring.s b/lld/test/MachO/invalid/cfstring.s new file mode 100644 index 0000000000000..50a1038cb7522 --- /dev/null +++ b/lld/test/MachO/invalid/cfstring.s @@ -0,0 +1,19 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; mkdir %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: not %lld -dylib -framework CoreFoundation --icf=all %t/test.o 2>&1 | FileCheck %s +# CHECK: error: {{.*}}test.o: __cfstring contains symbol _uh_oh at misaligned offset + +.cstring +L_.str: + .asciz "foo" + +.section __DATA,__cfstring +.p2align 3 +L__unnamed_cfstring_: + .quad ___CFConstantStringClassReference + .long 1992 ## utf-8 +_uh_oh: + .space 4 + .quad L_.str + .quad 3 ## strlen From 08715e6c47f68b7ea985fbd76d4945dfdff0a9aa Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Sun, 27 Jun 2021 15:31:34 -0400 Subject: [PATCH 485/619] [lld-macho][nfc] Remove unnecessary vertical spacing This makes NonLazyPointerSectionBase's style more in line with the rest of the classes in its file. --- lld/MachO/SyntheticSections.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index cf464b866d33a..b33d5e596c6b7 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -113,19 +113,13 @@ class PageZeroSection final : public SyntheticSection { class NonLazyPointerSectionBase : public SyntheticSection { public: NonLazyPointerSectionBase(const char *segname, const char *name); - const llvm::SetVector &getEntries() const { return entries; } - bool isNeeded() const override { return !entries.empty(); } - uint64_t getSize() const override { return entries.size() * target->wordSize; } - void writeTo(uint8_t *buf) const override; - void addEntry(Symbol *sym); - uint64_t getVA(uint32_t gotIndex) const { return addr + gotIndex * target->wordSize; } From f6b6e7214366fc12469aba2fe16495e5f7a375a6 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Thu, 1 Jul 2021 20:33:55 -0400 Subject: [PATCH 486/619] [lld-macho] Factor out common InputSection members We have been creating many ConcatInputSections with identical values due to .subsections_via_symbols. This diff factors out the identical values into a Shared struct, to reduce memory consumption and make copying cheaper. I also changed `callSiteCount` from a uint32_t to a 31-bit field to save an extra word. All in all, this takes InputSection from 120 to 72 bytes (and ConcatInputSection from 160 to 112 bytes), i.e. 30% size reduction in ConcatInputSection. Numbers for linking chromium_framework on my 3.2 GHz 16-Core Intel Xeon W: N Min Max Median Avg Stddev x 20 4.14 4.24 4.18 4.183 0.027548999 + 20 4.04 4.11 4.075 4.0775 0.018027756 Difference at 95.0% confidence -0.1055 +/- 0.0149005 -2.52211% +/- 0.356215% (Student's t, pooled s = 0.0232803) Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D105305 --- lld/MachO/ConcatOutputSection.cpp | 10 +++--- lld/MachO/Driver.cpp | 17 +++++------ lld/MachO/Dwarf.cpp | 2 +- lld/MachO/ICF.cpp | 2 +- lld/MachO/InputFiles.cpp | 18 +++++------ lld/MachO/InputSection.cpp | 22 ++++++------- lld/MachO/InputSection.h | 51 ++++++++++++++++++++++--------- lld/MachO/MarkLive.cpp | 8 ++--- lld/MachO/SymbolTable.cpp | 2 +- lld/MachO/Symbols.h | 2 +- lld/MachO/SyntheticSections.cpp | 8 ++--- lld/MachO/UnwindInfoSection.cpp | 6 ++-- lld/MachO/Writer.cpp | 10 +++--- 13 files changed, 90 insertions(+), 68 deletions(-) diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index 1c3c055b89309..99e4558ab82f7 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -27,7 +27,7 @@ using namespace lld::macho; void ConcatOutputSection::addInput(ConcatInputSection *input) { if (inputs.empty()) { align = input->align; - flags = input->flags; + flags = input->getFlags(); } else { align = std::max(align, input->align); finalizeFlags(input); @@ -288,7 +288,8 @@ void ConcatOutputSection::finalize() { // unfinalized inputs[finalIdx]. fatal(Twine(__FUNCTION__) + ": FIXME: thunk range overrun"); } - thunkInfo.isec = make(isec->segname, isec->name); + thunkInfo.isec = + make(isec->getSegName(), isec->getName()); thunkInfo.isec->parent = this; StringRef thunkName = saver.save(funcSym->getName() + ".thunk." + std::to_string(thunkInfo.sequence++)); @@ -332,8 +333,7 @@ void ConcatOutputSection::writeTo(uint8_t *buf) const { } void ConcatOutputSection::finalizeFlags(InputSection *input) { - uint8_t inputType = input->flags & SECTION_TYPE; - switch (inputType) { + switch (sectionType(input->getFlags())) { default /*type-unspec'ed*/: // FIXME: Add additional logics here when supporting emitting obj files. break; @@ -351,7 +351,7 @@ void ConcatOutputSection::finalizeFlags(InputSection *input) { case S_THREAD_LOCAL_VARIABLE_POINTERS: case S_NON_LAZY_SYMBOL_POINTERS: case S_SYMBOL_STUBS: - flags |= input->flags; + flags |= input->getFlags(); break; } } diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index a64d34ba5689b..5a17941e5dff6 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -546,20 +546,19 @@ static void replaceCommonSymbols() { if (common == nullptr) continue; - auto *isec = - make(segment_names::data, section_names::common); - isec->file = common->getFile(); - isec->align = common->align; // Casting to size_t will truncate large values on 32-bit architectures, // but it's not really worth supporting the linking of 64-bit programs on // 32-bit archs. - isec->data = {nullptr, static_cast(common->size)}; - isec->flags = S_ZEROFILL; + ArrayRef data = {nullptr, static_cast(common->size)}; + auto *isec = make( + segment_names::data, section_names::common, common->getFile(), data, + common->align, S_ZEROFILL); inputSections.push_back(isec); // FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip // and pass them on here. - replaceSymbol(sym, sym->getName(), isec->file, isec, /*value=*/0, + replaceSymbol(sym, sym->getName(), isec->getFile(), isec, + /*value=*/0, /*size=*/0, /*isWeakDef=*/false, /*isExternal=*/true, common->privateExtern, @@ -994,8 +993,8 @@ static void gatherInputSections() { if (auto *isec = dyn_cast(entry.isec)) { if (isec->isCoalescedWeak()) continue; - if (isec->segname == segment_names::ld) { - assert(isec->name == section_names::compactUnwind); + if (isec->getSegName() == segment_names::ld) { + assert(isec->getName() == section_names::compactUnwind); in.unwindInfo->addInput(isec); continue; } diff --git a/lld/MachO/Dwarf.cpp b/lld/MachO/Dwarf.cpp index 29b39ddf6416d..c142cc1b169f6 100644 --- a/lld/MachO/Dwarf.cpp +++ b/lld/MachO/Dwarf.cpp @@ -27,7 +27,7 @@ std::unique_ptr DwarfObject::create(ObjFile *obj) { // ourselves. for (const InputSection *isec : obj->debugSections) { if (StringRef *s = - StringSwitch(isec->name) + StringSwitch(isec->getName()) .Case(section_names::debugInfo, &dObj->infoSection.Data) .Case(section_names::debugAbbrev, &dObj->abbrevSection) .Case(section_names::debugStr, &dObj->strSection) diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp index fbd7cb36514fa..a27a0f5accbd1 100644 --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -88,7 +88,7 @@ static bool equalsConstant(const ConcatInputSection *ia, return false; if (ia->data != ib->data) return false; - if (ia->flags != ib->flags) + if (ia->getFlags() != ib->getFlags()) return false; if (ia->relocs.size() != ib->relocs.size()) return false; diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 6caf48232ad9b..b8c8b09564217 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -295,8 +295,8 @@ void ObjFile::parseSections(ArrayRef
sections) { } else { auto *isec = make(segname, name, this, data, align, flags); - if (!(isDebugSection(isec->flags) && - isec->segname == segment_names::dwarf)) { + if (!(isDebugSection(isec->getFlags()) && + isec->getSegName() == segment_names::dwarf)) { subsections.push_back({{0, isec}}); } else { // Instead of emitting DWARF sections, we emit STABS symbols to the @@ -522,7 +522,7 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name, isPrivateExtern = true; return symtab->addDefined( - name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF, + name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF, isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP); } @@ -530,7 +530,7 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name, assert(!isWeakDefCanBeHidden && "weak_def_can_be_hidden on already-hidden symbol?"); return make( - name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF, + name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF, /*isExternal=*/false, /*isPrivateExtern=*/false, sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP); @@ -672,7 +672,7 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, auto *nextIsec = make(*concatIsec); nextIsec->numRefs = 0; nextIsec->wasCoalesced = false; - if (isZeroFill(isec->flags)) { + if (isZeroFill(isec->getFlags())) { // Zero-fill sections have NULL data.data() non-zero data.size() nextIsec->data = {nullptr, isec->data.size() - symbolOffset}; isec->data = {nullptr, symbolOffset}; @@ -698,11 +698,11 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName) : InputFile(OpaqueKind, mb) { - ConcatInputSection *isec = - make(segName.take_front(16), sectName.take_front(16)); - isec->file = this; const auto *buf = reinterpret_cast(mb.getBufferStart()); - isec->data = {buf, mb.getBufferSize()}; + ArrayRef data = {buf, mb.getBufferSize()}; + ConcatInputSection *isec = + make(segName.take_front(16), sectName.take_front(16), + /*file=*/this, data); isec->live = true; subsections.push_back({{0, isec}}); } diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 5f1eab349ba5a..5762e4ef59f0e 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -28,7 +28,7 @@ using namespace lld::macho; std::vector macho::inputSections; uint64_t InputSection::getFileSize() const { - return isZeroFill(flags) ? 0 : getSize(); + return isZeroFill(getFlags()) ? 0 : getSize(); } uint64_t InputSection::getVA(uint64_t off) const { @@ -49,7 +49,7 @@ static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { // ICF needs to hash any section that might potentially be duplicated so // that it can match on content rather than identity. bool ConcatInputSection::isHashableForICF() const { - switch (sectionType(flags)) { + switch (sectionType(getFlags())) { case S_REGULAR: return true; case S_CSTRING_LITERALS: @@ -127,7 +127,7 @@ void ConcatInputSection::writeTo(uint8_t *buf) { target->relaxGotLoad(loc, r.type); referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; - if (isThreadLocalVariables(flags)) { + if (isThreadLocalVariables(getFlags())) { // References from thread-local variable sections are treated as offsets // relative to the start of the thread-local data memory area, which // is initialized via copying all the TLV data sections (which are all @@ -203,7 +203,7 @@ WordLiteralInputSection::WordLiteralInputSection(StringRef segname, uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { auto *osec = cast(parent); const uint8_t *buf = data.data(); - switch (sectionType(flags)) { + switch (sectionType(getFlags())) { case S_4BYTE_LITERALS: return osec->getLiteral4Offset(buf + off); case S_8BYTE_LITERALS: @@ -216,16 +216,16 @@ uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { } bool macho::isCodeSection(const InputSection *isec) { - uint32_t type = sectionType(isec->flags); + uint32_t type = sectionType(isec->getFlags()); if (type != S_REGULAR && type != S_COALESCED) return false; - uint32_t attr = isec->flags & SECTION_ATTRIBUTES_USR; + uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR; if (attr == S_ATTR_PURE_INSTRUCTIONS) return true; - if (isec->segname == segment_names::text) - return StringSwitch(isec->name) + if (isec->getSegName() == segment_names::text) + return StringSwitch(isec->getName()) .Cases(section_names::textCoalNt, section_names::staticInit, true) .Default(false); @@ -233,10 +233,10 @@ bool macho::isCodeSection(const InputSection *isec) { } bool macho::isCfStringSection(const InputSection *isec) { - return isec->name == section_names::cfString && - isec->segname == segment_names::data; + return isec->getName() == section_names::cfString && + isec->getSegName() == segment_names::data; } std::string lld::toString(const InputSection *isec) { - return (toString(isec->file) + ":(" + isec->name + ")").str(); + return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index a7d8b6e29fcf9..a104570870432 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -13,6 +13,7 @@ #include "Relocations.h" #include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/CachedHashString.h" @@ -33,9 +34,13 @@ class InputSection { WordLiteralKind, }; - Kind kind() const { return sectionKind; } + Kind kind() const { return shared->sectionKind; } virtual ~InputSection() = default; virtual uint64_t getSize() const { return data.size(); } + InputFile *getFile() const { return shared->file; } + StringRef getName() const { return shared->name; } + StringRef getSegName() const { return shared->segname; } + uint32_t getFlags() const { return shared->flags; } uint64_t getFileSize() const; // Translates \p off -- an offset relative to this InputSection -- into an // offset from the beginning of its parent OutputSection. @@ -47,33 +52,43 @@ class InputSection { virtual void markLive(uint64_t off) = 0; virtual InputSection *canonical() { return this; } - InputFile *file = nullptr; - StringRef name; - StringRef segname; - OutputSection *parent = nullptr; uint32_t align = 1; - uint32_t flags = 0; - uint32_t callSiteCount = 0; - + uint32_t callSiteCount : 31; // is address assigned? - bool isFinal = false; + uint32_t isFinal : 1; ArrayRef data; std::vector relocs; protected: + // The fields in this struct are immutable. Since we create a lot of + // InputSections with identical values for them (due to + // .subsections_via_symbols), factoring them out into a shared struct reduces + // memory consumption and makes copying cheaper. + struct Shared { + InputFile *file; + StringRef name; + StringRef segname; + uint32_t flags; + Kind sectionKind; + Shared(InputFile *file, StringRef name, StringRef segname, uint32_t flags, + Kind kind) + : file(file), name(name), segname(segname), flags(flags), + sectionKind(kind) {} + }; + InputSection(Kind kind, StringRef segname, StringRef name) - : name(name), segname(segname), sectionKind(kind) {} + : callSiteCount(0), isFinal(false), + shared(make(nullptr, name, segname, 0, kind)) {} InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file, ArrayRef data, uint32_t align, uint32_t flags) - : file(file), name(name), segname(segname), align(align), flags(flags), - data(data), sectionKind(kind) {} + : align(align), callSiteCount(0), isFinal(false), data(data), + shared(make(file, name, segname, flags, kind)) {} -private: - Kind sectionKind; + const Shared *const shared; }; // ConcatInputSections are combined into (Concat)OutputSections through simple @@ -85,7 +100,8 @@ class ConcatInputSection final : public InputSection { : InputSection(ConcatKind, segname, name) {} ConcatInputSection(StringRef segname, StringRef name, InputFile *file, - ArrayRef data, uint32_t align, uint32_t flags) + ArrayRef data, uint32_t align = 1, + uint32_t flags = 0) : InputSection(ConcatKind, segname, name, file, data, align, flags) {} uint64_t getOffset(uint64_t off) const override { return outSecOff + off; } @@ -128,6 +144,11 @@ class ConcatInputSection final : public InputSection { uint64_t outSecOff = 0; }; +// Verify ConcatInputSection's size on 64-bit builds. +static_assert(sizeof(int) != 8 || sizeof(ConcatInputSection) == 112, + "Try to minimize ConcatInputSection's size, we create many " + "instances of it"); + // Helper functions to make it easy to sprinkle asserts. inline bool shouldOmitFromOutput(InputSection *isec) { diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp index 7962ff9b094a2..8e2c1aa472925 100644 --- a/lld/MachO/MarkLive.cpp +++ b/lld/MachO/MarkLive.cpp @@ -103,14 +103,14 @@ void markLive() { addSym(stubBinder); for (ConcatInputSection *isec : inputSections) { // Sections marked no_dead_strip - if (isec->flags & S_ATTR_NO_DEAD_STRIP) { + if (isec->getFlags() & S_ATTR_NO_DEAD_STRIP) { enqueue(isec, 0); continue; } // mod_init_funcs, mod_term_funcs sections - if (sectionType(isec->flags) == S_MOD_INIT_FUNC_POINTERS || - sectionType(isec->flags) == S_MOD_TERM_FUNC_POINTERS) { + if (sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS || + sectionType(isec->getFlags()) == S_MOD_TERM_FUNC_POINTERS) { enqueue(isec, 0); continue; } @@ -161,7 +161,7 @@ void markLive() { for (ConcatInputSection *isec : inputSections) { // FIXME: Check if copying all S_ATTR_LIVE_SUPPORT sections into a // separate vector and only walking that here is faster. - if (!(isec->flags & S_ATTR_LIVE_SUPPORT) || isec->live) + if (!(isec->getFlags() & S_ATTR_LIVE_SUPPORT) || isec->live) continue; for (const Reloc &r : isec->relocs) { diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index c0ae11bfd2f09..b2a6046e81cf9 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -54,7 +54,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, std::tie(s, wasInserted) = insert(name, file); assert(!isWeakDef || (isa(file) && !isec) || - (isa(file) && file == isec->file)); + (isa(file) && file == isec->getFile())); if (!wasInserted) { if (auto *defined = dyn_cast(s)) { diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index 3d3c84c799526..c30fbb9d68853 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -129,7 +129,7 @@ class Defined : public Symbol { return isWeakDef() && isExternal() && !privateExtern; } bool isTlv() const override { - return !isAbsolute() && isThreadLocalVariables(isec->flags); + return !isAbsolute() && isThreadLocalVariables(isec->getFlags()); } bool isExternal() const { return external; } diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 07406650dfd87..2a409bfcfa112 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -746,7 +746,7 @@ void SymtabSection::emitStabs() { if (defined->isAbsolute()) continue; InputSection *isec = defined->isec; - ObjFile *file = dyn_cast_or_null(isec->file); + ObjFile *file = dyn_cast_or_null(isec->getFile()); if (!file || !file->compileUnit) continue; symbolsNeedingStabs.push_back(defined); @@ -754,7 +754,7 @@ void SymtabSection::emitStabs() { } llvm::stable_sort(symbolsNeedingStabs, [&](Defined *a, Defined *b) { - return a->isec->file->id < b->isec->file->id; + return a->isec->getFile()->id < b->isec->getFile()->id; }); // Emit STABS symbols so that dsymutil and/or the debugger can map address @@ -763,7 +763,7 @@ void SymtabSection::emitStabs() { InputFile *lastFile = nullptr; for (Defined *defined : symbolsNeedingStabs) { InputSection *isec = defined->isec; - ObjFile *file = cast(isec->file); + ObjFile *file = cast(isec->getFile()); if (lastFile == nullptr || lastFile != file) { if (lastFile != nullptr) @@ -1264,7 +1264,7 @@ void WordLiteralSection::finalizeContents() { // finalized. isec->isFinal = true; const uint8_t *buf = isec->data.data(); - switch (sectionType(isec->flags)) { + switch (sectionType(isec->getFlags())) { case S_4BYTE_LITERALS: { for (size_t off = 0, e = isec->data.size(); off < e; off += 4) { if (!isec->isLive(off)) diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 567e18a607e2e..5c9c5e03d1c13 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -142,8 +142,8 @@ void UnwindInfoSection::prepareRelocations() { template void UnwindInfoSectionImpl::addInput(ConcatInputSection *isec) { - assert(isec->segname == segment_names::ld && - isec->name == section_names::compactUnwind); + assert(isec->getSegName() == segment_names::ld && + isec->getName() == section_names::compactUnwind); compactUnwindSection->addInput(isec); } @@ -220,7 +220,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { // the exact addresses that it references. So it is safe for compact unwind to // reference addresses in __TEXT, but not addresses in any other segment. static ConcatInputSection *checkTextSegment(InputSection *isec) { - if (isec->segname != segment_names::text) + if (isec->getSegName() != segment_names::text) error("compact unwind references address in " + toString(isec) + " which is not in segment __TEXT"); // __text should always be a ConcatInputSection. diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index 7e3dc70cd18d8..874708e376a75 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -582,7 +582,7 @@ static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec, // References from thread-local variable sections are treated as offsets // relative to the start of the referent section, and therefore have no // need of rebase opcodes. - if (!(isThreadLocalVariables(isec->flags) && isa(sym))) + if (!(isThreadLocalVariables(isec->getFlags()) && isa(sym))) addNonLazyBindingEntries(sym, isec, r.offset, r.addend); } } @@ -802,7 +802,8 @@ static DenseMap buildInputSectionPriorities() { SymbolPriorityEntry &entry = it->second; size_t &priority = sectionPriorities[sym.isec]; - priority = std::max(priority, getSymbolPriority(entry, sym.isec->file)); + priority = + std::max(priority, getSymbolPriority(entry, sym.isec->getFile())); }; // TODO: Make sure this handles weak symbols correctly. @@ -889,7 +890,7 @@ template void Writer::createOutputSections() { for (ConcatInputSection *isec : inputSections) { if (isec->shouldOmitFromOutput()) continue; - NamePair names = maybeRenameSection({isec->segname, isec->name}); + NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()}); ConcatOutputSection *&osec = concatOutputSections[names]; if (!osec) osec = make(names.second); @@ -913,7 +914,8 @@ template void Writer::createOutputSections() { if (it == concatOutputSections.end()) { getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec); } else { - fatal("section from " + toString(it->second->firstSection()->file) + + fatal("section from " + + toString(it->second->firstSection()->getFile()) + " conflicts with synthetic section " + ssec->segname + "," + ssec->name); } From 9ab5f761176c376c0a117c53f0f111fed1bcc842 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Thu, 1 Jul 2021 18:43:01 -0700 Subject: [PATCH 487/619] Support for merging UsingPackDecls across modules. Fixes a false-positive error if the same std::variant<...> type is instantiated across multiple modules. --- clang/lib/Serialization/ASTReaderDecl.cpp | 13 +++++++++++-- clang/test/Modules/Inputs/merge-using-decls/a.h | 12 ++++++++++++ clang/test/Modules/Inputs/merge-using-decls/b.h | 12 ++++++++++++ clang/test/Modules/merge-using-decls.cpp | 15 +++++++++++++++ 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 1d9bae5d3129c..ff79f91e5db1b 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -3067,7 +3067,7 @@ static bool hasSameOverloadableAttrs(const FunctionDecl *A, return true; } -/// Determine whether the two declarations refer to the same entity.pr +/// Determine whether the two declarations refer to the same entity. static bool isSameEntity(NamedDecl *X, NamedDecl *Y) { assert(X->getDeclName() == Y->getDeclName() && "Declaration name mismatch!"); @@ -3261,10 +3261,19 @@ static bool isSameEntity(NamedDecl *X, NamedDecl *Y) { return isSameQualifier(UX->getQualifier(), UY->getQualifier()) && UX->isAccessDeclaration() == UY->isAccessDeclaration(); } - if (const auto *UX = dyn_cast(X)) + if (const auto *UX = dyn_cast(X)) { return isSameQualifier( UX->getQualifier(), cast(Y)->getQualifier()); + } + + // Using-pack declarations are only created by instantiation, and match if + // they're instantiated from matching UnresolvedUsing...Decls. + if (const auto *UX = dyn_cast(X)) { + return declaresSameEntity( + UX->getInstantiatedFromUsingDecl(), + cast(Y)->getInstantiatedFromUsingDecl()); + } // Namespace alias definitions with the same target match. if (const auto *NAX = dyn_cast(X)) { diff --git a/clang/test/Modules/Inputs/merge-using-decls/a.h b/clang/test/Modules/Inputs/merge-using-decls/a.h index 0fe0067bf23c4..2469cbd7601fe 100644 --- a/clang/test/Modules/Inputs/merge-using-decls/a.h +++ b/clang/test/Modules/Inputs/merge-using-decls/a.h @@ -1,6 +1,7 @@ struct X { int v; typedef int t; + void f(X); }; struct YA { @@ -8,6 +9,10 @@ struct YA { typedef int type; }; +struct Z { + void f(Z); +}; + template struct C : X, T { using T::value; using typename T::type; @@ -41,3 +46,10 @@ typedef C::type I; typedef D::type I; typedef E::type I; typedef F::type I; + +#if __cplusplus >= 201702L +template struct G : T... { + using T::f...; +}; +using Q = decltype(G()); +#endif diff --git a/clang/test/Modules/Inputs/merge-using-decls/b.h b/clang/test/Modules/Inputs/merge-using-decls/b.h index 5d112ffbfe96f..be9bf240ccebf 100644 --- a/clang/test/Modules/Inputs/merge-using-decls/b.h +++ b/clang/test/Modules/Inputs/merge-using-decls/b.h @@ -1,6 +1,7 @@ struct X { int v; typedef int t; + void f(X); }; struct YB { @@ -14,6 +15,10 @@ struct YBRev { int type; }; +struct Z { + void f(Z); +}; + template struct C : X, T { using T::value; using typename T::type; @@ -54,3 +59,10 @@ typedef E::type I; #endif typedef F::type I; + +#if __cplusplus >= 201702L +template struct G : T... { + using T::f...; +}; +using Q = decltype(G()); +#endif diff --git a/clang/test/Modules/merge-using-decls.cpp b/clang/test/Modules/merge-using-decls.cpp index 1ec9a9a17bdee..e3bf977f05449 100644 --- a/clang/test/Modules/merge-using-decls.cpp +++ b/clang/test/Modules/merge-using-decls.cpp @@ -2,9 +2,11 @@ // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-using-decls -verify %s -DORDER=1 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-using-decls -verify -std=c++98 %s -DORDER=1 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-using-decls -verify -std=c++11 %s -DORDER=1 +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-using-decls -verify -std=c++17 %s -DORDER=1 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-using-decls -verify %s -DORDER=2 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-using-decls -verify -std=c++98 %s -DORDER=2 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-using-decls -verify -std=c++11 %s -DORDER=2 +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-using-decls -verify -std=c++17 %s -DORDER=2 #if ORDER == 1 #include "a.h" @@ -39,6 +41,19 @@ template int UseAll(); template int UseAll(); template int UseAll(); +#if __cplusplus >= 201702L +void use_g(Q q) { + q.f(q); // expected-error {{ambiguous}} +#if ORDER == 1 + // expected-note@a.h:* {{candidate function}} + // expected-note@a.h:* {{candidate function}} +#else + // expected-note@b.h:* {{candidate function}} + // expected-note@b.h:* {{candidate function}} +#endif +} +#endif + // Which of these two sets of diagnostics is chosen is not important. It's OK // if this varies with ORDER, but it must be consistent across runs. #if ORDER == 1 From 066524ea5438c5c30ac91a7702091be6dc5fd2d0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 1 Jul 2021 17:08:23 -0700 Subject: [PATCH 488/619] [ScalarizeMaskedMemIntrin][SelectionDAGBuilder] Use the element type to calculate alignment for gather/scatter when alignment operand is 0. Previously we used the vector type, but we're loading/storing invididual elements so I think only element alignment should matter. Noticed while looking at the code for something else so I don't have a test case. Differential Revision: https://reviews.llvm.org/D105220 --- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 ++-- .../Scalar/ScalarizeMaskedMemIntrin.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 0774c7fcf0117..7f012dde2e5d4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4409,7 +4409,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { EVT VT = Src0.getValueType(); Align Alignment = cast(I.getArgOperand(2)) ->getMaybeAlignValue() - .getValueOr(DAG.getEVTAlign(VT)); + .getValueOr(DAG.getEVTAlign(VT.getScalarType())); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); AAMDNodes AAInfo; @@ -4527,7 +4527,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Align Alignment = cast(I.getArgOperand(1)) ->getMaybeAlignValue() - .getValueOr(DAG.getEVTAlign(VT)); + .getValueOr(DAG.getEVTAlign(VT.getScalarType())); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp index 8740b35746dbd..ca288a533f46a 100644 --- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp @@ -956,22 +956,22 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, scalarizeMaskedStore(DL, CI, DTU, ModifiedDT); return true; case Intrinsic::masked_gather: { - unsigned AlignmentInt = - cast(CI->getArgOperand(1))->getZExtValue(); + MaybeAlign MA = + cast(CI->getArgOperand(1))->getMaybeAlignValue(); Type *LoadTy = CI->getType(); - Align Alignment = - DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); + Align Alignment = DL.getValueOrABITypeAlignment(MA, + LoadTy->getScalarType()); if (TTI.isLegalMaskedGather(LoadTy, Alignment)) return false; scalarizeMaskedGather(DL, CI, DTU, ModifiedDT); return true; } case Intrinsic::masked_scatter: { - unsigned AlignmentInt = - cast(CI->getArgOperand(2))->getZExtValue(); + MaybeAlign MA = + cast(CI->getArgOperand(2))->getMaybeAlignValue(); Type *StoreTy = CI->getArgOperand(0)->getType(); - Align Alignment = - DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); + Align Alignment = DL.getValueOrABITypeAlignment(MA, + StoreTy->getScalarType()); if (TTI.isLegalMaskedScatter(StoreTy, Alignment)) return false; scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT); From 425b908301e48ee6f372150bbc17f04cf199beb7 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 2 Jul 2021 12:37:22 +1000 Subject: [PATCH 489/619] [ORC] Rename SPSTargetAddress to SPSExecutorAddress. Also removes SPSTagTargetAddress, which was accidentally introduced at some point (and never used). --- .../ExecutionEngine/Orc/EPCDebugObjectRegistrar.h | 2 +- .../Orc/Shared/SimplePackedSerialization.h | 15 +++------------ .../ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp | 4 ++-- .../Orc/TargetProcess/JITLoaderGDB.cpp | 2 +- .../Orc/TargetProcess/RegisterEHFrames.cpp | 4 ++-- 5 files changed, 9 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h index dc42c74a61204..db5c18b4b4911 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h @@ -43,7 +43,7 @@ class EPCDebugObjectRegistrar : public DebugObjectRegistrar { : EPC(EPC), RegisterFn(RegisterFn) {} Error registerDebugObject(sys::MemoryBlock TargetMem) override { - return WrapperFunction::call( + return WrapperFunction::call( EPCCaller(EPC, RegisterFn), pointerToJITTargetAddress(TargetMem.base()), static_cast(TargetMem.allocatedSize())); } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h index e834675b4b414..a2ad84647d266 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h @@ -173,15 +173,6 @@ class SPSSerializationTraits< // Any empty placeholder suitable as a substitute for void when deserializing class SPSEmpty {}; -/// SPS tag type for target addresses. -/// -/// SPSTagTargetAddresses should be serialized as a uint64_t value. -class SPSTagTargetAddress; - -template <> -class SPSSerializationTraits - : public SPSSerializationTraits {}; - /// SPS tag type for tuples. /// /// A blob tuple should be serialized by serializing each of the elements in @@ -201,11 +192,11 @@ template class SPSSequence; /// SPS tag type for strings, which are equivalent to sequences of chars. using SPSString = SPSSequence; -/// SPS tag type for target addresseses. -class SPSTargetAddress {}; +/// SPS tag type for executor addresseses. +class SPSExecutorAddress {}; template <> -class SPSSerializationTraits +class SPSSerializationTraits : public SPSSerializationTraits {}; /// SPS tag type for maps. diff --git a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp index c51a68c1bfed6..4bfcf132c8f7e 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp @@ -55,14 +55,14 @@ EPCEHFrameRegistrar::Create(ExecutorProcessControl &EPC) { Error EPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { - return WrapperFunction::call( + return WrapperFunction::call( EPCCaller(EPC, RegisterEHFrameWrapperFnAddr), EHFrameSectionAddr, static_cast(EHFrameSectionSize)); } Error EPCEHFrameRegistrar::deregisterEHFrames( JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) { - return WrapperFunction::call( + return WrapperFunction::call( EPCCaller(EPC, DeregisterEHFrameWrapperFnAddr), EHFrameSectionAddr, static_cast(EHFrameSectionSize)); } diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp index 55b1220a03534..43c2a44835fdf 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp @@ -95,7 +95,7 @@ static void registerJITLoaderGDBImpl(JITTargetAddress Addr, uint64_t Size) { extern "C" orc::shared::detail::CWrapperFunctionResult llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size) { using namespace orc::shared; - return WrapperFunction::handle( + return WrapperFunction::handle( Data, Size, registerJITLoaderGDBImpl) .release(); } diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp index 9463a36668cbd..4a408d61ee38b 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp @@ -167,14 +167,14 @@ static Error deregisterEHFrameWrapper(JITTargetAddress Addr, uint64_t Size) { extern "C" orc::shared::detail::CWrapperFunctionResult llvm_orc_registerEHFrameSectionWrapper(const char *Data, uint64_t Size) { - return WrapperFunction::handle( + return WrapperFunction::handle( Data, Size, registerEHFrameWrapper) .release(); } extern "C" orc::shared::detail::CWrapperFunctionResult llvm_orc_deregisterEHFrameSectionWrapper(const char *Data, uint64_t Size) { - return WrapperFunction::handle( + return WrapperFunction::handle( Data, Size, deregisterEHFrameWrapper) .release(); } From 5efffac71a1c640b0cba8e34f5e2374d397f6eb3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 1 Jul 2021 19:43:49 -0700 Subject: [PATCH 490/619] [llvm-symbolizer] Move setGroupedShortOptions and don't ignore case setGroupedShortOptions in the ctor seems more popular. --- llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 2359e5d2b4587..227ce12a6d9af 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -66,7 +66,9 @@ static const opt::OptTable::Info InfoTable[] = { class SymbolizerOptTable : public opt::OptTable { public: - SymbolizerOptTable() : OptTable(InfoTable, true) {} + SymbolizerOptTable() : OptTable(InfoTable) { + setGroupedShortOptions(true); + } }; } // namespace @@ -206,7 +208,6 @@ static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line, StringSaver &Saver, SymbolizerOptTable &Tbl) { StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer"; - Tbl.setGroupedShortOptions(true); // The environment variable specifies initial options which can be overridden // by commnad line options. Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" From 9568811cb8a4c45e8143456b91dd7cc1acb16ad3 Mon Sep 17 00:00:00 2001 From: Evgeniy Brevnov Date: Fri, 2 Jul 2021 09:59:06 +0700 Subject: [PATCH 491/619] [NFC][DSE]Change 'do-while' to 'for' loop to simplify code structure With 'for' loop there is is a single place where 'Current' is adjusted. It helps to avoid copy paste and makes a bit easy to understand overall loop controll flow. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D101044 --- .../Scalar/DeadStoreElimination.cpp | 40 +++++-------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 358c5b831c5ca..d22b3f409585c 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1347,13 +1347,11 @@ struct DSEState { MemoryAccess *Current = StartAccess; Instruction *KillingI = KillingDef->getMemoryInst(); - bool StepAgain; LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); // Find the next clobbering Mod access for DefLoc, starting at StartAccess. Optional CurrentLoc; - do { - StepAgain = false; + for (;; Current = cast(Current)->getDefiningAccess()) { LLVM_DEBUG({ dbgs() << " visiting " << *Current; if (!MSSA.isLiveOnEntryDef(Current) && isa(Current)) @@ -1391,11 +1389,8 @@ struct DSEState { MemoryDef *CurrentDef = cast(Current); Instruction *CurrentI = CurrentDef->getMemoryInst(); - if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); + if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) continue; - } // Before we try to remove anything, check for any extra throwing // instructions that block us from DSEing @@ -1431,27 +1426,19 @@ struct DSEState { // If Current cannot be analyzed or is not removable, check the next // candidate. - if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); + if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) continue; - } // If Current does not have an analyzable write location, skip it CurrentLoc = getLocForWriteEx(CurrentI); - if (!CurrentLoc) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); + if (!CurrentLoc) continue; - } // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same // memory location and not located in different loops. if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) { LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n"); - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); WalkerStepLimit -= 1; continue; } @@ -1460,35 +1447,30 @@ struct DSEState { // If the killing def is a memory terminator (e.g. lifetime.end), check // the next candidate if the current Current does not write the same // underlying object as the terminator. - if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - } - continue; + if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) + continue; } else { int64_t InstWriteOffset, DepWriteOffset; auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DepWriteOffset, InstWriteOffset); // If Current does not write to the same object as KillingDef, check // the next candidate. - if (OR == OW_Unknown) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - } else if (OR == OW_MaybePartial) { + if (OR == OW_Unknown) + continue; + else if (OR == OW_MaybePartial) { // If KillingDef only partially overwrites Current, check the next // candidate if the partial step limit is exceeded. This aggressively // limits the number of candidates for partial store elimination, // which are less likely to be removable in the end. if (PartialLimit <= 1) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); WalkerStepLimit -= 1; continue; } PartialLimit -= 1; } } - } while (StepAgain); + break; + }; // Accesses to objects accessible after the function returns can only be // eliminated if the access is killed along all paths to the exit. Collect From 07a1f3513e2e3802671a0a4ca1edf2fe577fad03 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 1 Jul 2021 21:40:04 -0700 Subject: [PATCH 492/619] [scudo] Fix test on aarch64 without MTE --- compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index d3b7c486f7c3e..723679228cbab 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -21,7 +21,8 @@ #include template static scudo::Options getOptionsForConfig() { - if (!Config::MaySupportMemoryTagging || !scudo::archSupportsMemoryTagging()) + if (!Config::MaySupportMemoryTagging || !scudo::archSupportsMemoryTagging() || + !scudo::systemSupportsMemoryTagging()) return {}; scudo::AtomicOptions AO; AO.set(scudo::OptionBit::UseMemoryTagging); From f737d9794a40c066c9ccffb9ac277c1b70442ede Mon Sep 17 00:00:00 2001 From: Douglas Yung Date: Thu, 1 Jul 2021 23:08:48 -0700 Subject: [PATCH 493/619] Relax newly added opcode check to check only for a number instead of a specific opcode. --- .../CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index bb18a95beb9f5..f9a92b180cba9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -629,7 +629,7 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: G_MEMCPY_INLINE (opcode 219): 3 type indices, 0 imm indices +# DEBUG-NEXT: G_MEMCPY_INLINE (opcode {{[0-9]+}}): 3 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index From 86c5afa6e601c4a80d46a7a8b892d5c49bcec078 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Jul 2021 08:47:46 +0200 Subject: [PATCH 494/619] [clangd] Fix XPC build due to missing include path (Tentative, untested as I don't have a mac) --- clang-tools-extra/clangd/xpc/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/clangd/xpc/CMakeLists.txt b/clang-tools-extra/clangd/xpc/CMakeLists.txt index df8c361817a84..d551f8b2b5b3c 100644 --- a/clang-tools-extra/clangd/xpc/CMakeLists.txt +++ b/clang-tools-extra/clangd/xpc/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory(test-client) include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/../ + ${CMAKE_CURRENT_BINARY_DIR}/../ ) set(LLVM_LINK_COMPONENTS From a9ff1ce1b9a52add7557cf0579d424c9d0678860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 16 Jun 2021 16:59:46 +0300 Subject: [PATCH 495/619] [LLD] [COFF] Support linking directly against DLLs in MinGW mode GNU ld.bfd supports linking directly against DLLs without using an import library, and some projects have picked up on this habit. (There's no one single unsurmountable issue with using import libraries, but this is a regularly surfacing missing feature.) As long as one is linking by name (instead of by ordinal), the DLL export table contains most of the information needed. (One can inspect what section a symbol points at, to see if it's a function or data symbol. The practical implementation of this loops over all sections for each symbol, but as long as they're not very many, that should hopefully be tolerable performance wise.) One exception where the information in the DLL isn't entirely enough is on i386 with stdcall functions; depending on how they're done, the exported function name can be a plain undecorated name, while the import library would contain the full decorated symbol name. This issue is addressed separately in a different patch. This is implemented mimicing the structure of a regular import library, with one InputFile corresponding to the static archive that just adds lazy symbols, which then are fetched when they are needed. When such a symbol is fetched, we synthesize a coff_import_header structure in memory and create a regular ImportFile out of it. The implementation could be even smaller by just creating ImportFiles for every symbol available immediately, but that would have the drawback of actually ending up importing all symbols unless running with GC enabled (and mingw mode defaults to having it disabled for historical reasons). Differential Revision: https://reviews.llvm.org/D104530 --- lld/COFF/Driver.cpp | 4 ++ lld/COFF/InputFiles.cpp | 90 +++++++++++++++++++++++++++++++++++ lld/COFF/InputFiles.h | 26 +++++++++- lld/COFF/SymbolTable.cpp | 21 ++++++++ lld/COFF/SymbolTable.h | 1 + lld/COFF/Symbols.cpp | 2 + lld/COFF/Symbols.h | 19 +++++++- lld/COFF/Writer.cpp | 1 + lld/test/COFF/link-dll-i386.s | 64 +++++++++++++++++++++++++ lld/test/COFF/link-dll.s | 66 +++++++++++++++++++++++++ 10 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 lld/test/COFF/link-dll-i386.s create mode 100644 lld/test/COFF/link-dll.s diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 0bf82182ec0f4..937f605590bc8 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -234,6 +234,10 @@ void LinkerDriver::addBuffer(std::unique_ptr mb, error(filename + ": is not a native COFF file. Recompile without /GL"); break; case file_magic::pecoff_executable: + if (config->mingw) { + symtab->addFile(make(mbref)); + break; + } if (filename.endswith_insensitive(".dll")) { error(filename + ": bad file type. Did you specify a DLL instead of an " "import library?"); diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 9d36b5e25cfaa..7488ed947698b 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -1138,3 +1138,93 @@ std::string lld::coff::replaceThinLTOSuffix(StringRef path) { return (path + repl).str(); return std::string(path); } + +static bool isRVACode(COFFObjectFile *coffObj, uint64_t rva, InputFile *file) { + for (size_t i = 1, e = coffObj->getNumberOfSections(); i <= e; i++) { + const coff_section *sec = CHECK(coffObj->getSection(i), file); + if (rva >= sec->VirtualAddress && + rva <= sec->VirtualAddress + sec->VirtualSize) { + return (sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE) != 0; + } + } + return false; +} + +void DLLFile::parse() { + // Parse a memory buffer as a PE-COFF executable. + std::unique_ptr bin = CHECK(createBinary(mb), this); + + if (auto *obj = dyn_cast(bin.get())) { + bin.release(); + coffObj.reset(obj); + } else { + error(toString(this) + " is not a COFF file"); + return; + } + + if (!coffObj->getPE32Header() && !coffObj->getPE32PlusHeader()) { + error(toString(this) + " is not a PE-COFF executable"); + return; + } + + for (const auto &exp : coffObj->export_directories()) { + StringRef dllName, symbolName; + uint32_t exportRVA; + checkError(exp.getDllName(dllName)); + checkError(exp.getSymbolName(symbolName)); + checkError(exp.getExportRVA(exportRVA)); + + if (symbolName.empty()) + continue; + + bool code = isRVACode(coffObj.get(), exportRVA, this); + + Symbol *s = make(); + s->dllName = dllName; + s->symbolName = symbolName; + s->importType = code ? ImportType::IMPORT_CODE : ImportType::IMPORT_DATA; + s->nameType = ImportNameType::IMPORT_NAME; + + if (coffObj->getMachine() == I386) { + s->symbolName = symbolName = saver.save("_" + symbolName); + s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX; + } + + StringRef impName = saver.save("__imp_" + symbolName); + symtab->addLazyDLLSymbol(this, s, impName); + if (code) + symtab->addLazyDLLSymbol(this, s, symbolName); + } +} + +MachineTypes DLLFile::getMachineType() { + if (coffObj) + return static_cast(coffObj->getMachine()); + return IMAGE_FILE_MACHINE_UNKNOWN; +} + +void DLLFile::makeImport(DLLFile::Symbol *s) { + if (!seen.insert(s->symbolName).second) + return; + + size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs + size_t size = sizeof(coff_import_header) + impSize; + char *buf = bAlloc.Allocate(size); + memset(buf, 0, size); + char *p = buf; + auto *imp = reinterpret_cast(p); + p += sizeof(*imp); + imp->Sig2 = 0xFFFF; + imp->Machine = coffObj->getMachine(); + imp->SizeOfData = impSize; + imp->OrdinalHint = 0; // Only linking by name + imp->TypeInfo = (s->nameType << 2) | s->importType; + + // Write symbol name and DLL name. + memcpy(p, s->symbolName.data(), s->symbolName.size()); + p += s->symbolName.size() + 1; + memcpy(p, s->dllName.data(), s->dllName.size()); + MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName); + ImportFile *impFile = make(mbref); + symtab->addFile(impFile); +} diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 644369f9ee984..7fc2429372924 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -14,6 +14,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" @@ -67,7 +68,8 @@ class InputFile { LazyObjectKind, PDBKind, ImportKind, - BitcodeKind + BitcodeKind, + DLLKind }; Kind kind() const { return fileKind; } virtual ~InputFile() {} @@ -393,6 +395,28 @@ class BitcodeFile : public InputFile { std::vector symbols; }; +// .dll file. +class DLLFile : public InputFile { +public: + explicit DLLFile(MemoryBufferRef m) : InputFile(DLLKind, m) {} + static bool classof(const InputFile *f) { return f->kind() == DLLKind; } + void parse() override; + MachineTypes getMachineType() override; + + struct Symbol { + StringRef dllName; + StringRef symbolName; + llvm::COFF::ImportNameType nameType; + llvm::COFF::ImportType importType; + }; + + void makeImport(Symbol *s); + +private: + std::unique_ptr coffObj; + llvm::StringSet<> seen; +}; + inline bool isBitcode(MemoryBufferRef mb) { return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; } diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 024a408ca4545..3e0741839757c 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -75,6 +75,11 @@ static void forceLazy(Symbol *s) { case Symbol::Kind::LazyObjectKind: cast(s)->file->fetch(); break; + case Symbol::Kind::LazyDLLSymbolKind: { + auto *l = cast(s); + l->file->makeImport(l->sym); + break; + } default: llvm_unreachable( "symbol passed to forceLazy is not a LazyArchive or LazyObject"); @@ -540,6 +545,22 @@ void SymbolTable::addLazyObject(LazyObjFile *f, StringRef n) { f->fetch(); } +void SymbolTable::addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym, + StringRef n) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(n); + if (wasInserted) { + replaceSymbol(s, f, sym, n); + return; + } + auto *u = dyn_cast(s); + if (!u || u->weakAlias || s->pendingArchiveLoad) + return; + s->pendingArchiveLoad = true; + f->makeImport(sym); +} + static std::string getSourceLocationBitcode(BitcodeFile *file) { std::string res("\n>>> defined at "); StringRef source = file->obj->getSourceFileName(); diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 870a7151fa8ea..2d3ec65eda236 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -87,6 +87,7 @@ class SymbolTable { Symbol *addUndefined(StringRef name, InputFile *f, bool isWeakAlias); void addLazyArchive(ArchiveFile *f, const Archive::Symbol &sym); void addLazyObject(LazyObjFile *f, StringRef n); + void addLazyDLLSymbol(DLLFile *f, DLLFile::Symbol *sym, StringRef n); Symbol *addAbsolute(StringRef n, COFFSymbolRef s); Symbol *addRegular(InputFile *f, StringRef n, const llvm::object::coff_symbol_generic *s = nullptr, diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index 60ff72aeb5225..8a6a9b27d45fa 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -70,6 +70,8 @@ InputFile *Symbol::getFile() { return sym->file; if (auto *sym = dyn_cast(this)) return sym->file; + if (auto *sym = dyn_cast(this)) + return sym->file; return nullptr; } diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 13e7488d6b879..65412362ef15c 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -61,6 +61,7 @@ class Symbol { UndefinedKind, LazyArchiveKind, LazyObjectKind, + LazyDLLSymbolKind, LastDefinedCOFFKind = DefinedCommonKind, LastDefinedKind = DefinedSyntheticKind, @@ -92,7 +93,8 @@ class Symbol { bool isLive() const; bool isLazy() const { - return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; + return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind || + symbolKind == LazyDLLSymbolKind; } private: @@ -309,6 +311,18 @@ class LazyObject : public Symbol { LazyObjFile *file; }; +class LazyDLLSymbol : public Symbol { +public: + LazyDLLSymbol(DLLFile *f, DLLFile::Symbol *s, StringRef n) + : Symbol(LazyDLLSymbolKind, n), file(f), sym(s) {} + static bool classof(const Symbol *s) { + return s->kind() == LazyDLLSymbolKind; + } + + DLLFile *file; + DLLFile::Symbol *sym; +}; + // Undefined symbols. class Undefined : public Symbol { public: @@ -423,6 +437,7 @@ inline uint64_t Defined::getRVA() { return cast(this)->getRVA(); case LazyArchiveKind: case LazyObjectKind: + case LazyDLLSymbolKind: case UndefinedKind: llvm_unreachable("Cannot get the address for an undefined symbol."); } @@ -447,6 +462,7 @@ inline Chunk *Defined::getChunk() { return cast(this)->getChunk(); case LazyArchiveKind: case LazyObjectKind: + case LazyDLLSymbolKind: case UndefinedKind: llvm_unreachable("Cannot get the chunk of an undefined symbol."); } @@ -467,6 +483,7 @@ union SymbolUnion { alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)]; alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)]; alignas(LazyObject) char j[sizeof(LazyObject)]; + alignas(LazyDLLSymbol) char k[sizeof(LazyDLLSymbol)]; }; template diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 7a85ecbd456a8..37cbe2bb96a80 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1583,6 +1583,7 @@ static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms, break; case Symbol::LazyArchiveKind: case Symbol::LazyObjectKind: + case Symbol::LazyDLLSymbolKind: case Symbol::UndefinedKind: // Undefined symbols resolve to zero, so they don't have an RVA. Lazy // symbols shouldn't have relocations. diff --git a/lld/test/COFF/link-dll-i386.s b/lld/test/COFF/link-dll-i386.s new file mode 100644 index 0000000000000..7121678770109 --- /dev/null +++ b/lld/test/COFF/link-dll-i386.s @@ -0,0 +1,64 @@ +# REQUIRES: x86 + +## Test creating a DLL and linking against the DLL without using an import +## library. + +## Test on i386 with cdecl decorated symbols. + +## Linking the executable with -opt:noref, to make sure that we don't +## pull in more import entries than what's needed, even if not running GC. + +# RUN: split-file %s %t.dir + +# RUN: llvm-mc -filetype=obj -triple=i386-windows-gnu %t.dir/lib.s -o %t.lib.o +# RUN: lld-link -safeseh:no -noentry -dll -def:%t.dir/lib.def %t.lib.o -out:%t.lib.dll -implib:%t.implib.lib +# RUN: llvm-mc -filetype=obj -triple=i386-windows-gnu %t.dir/main.s -o %t.main.o +# RUN: lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref -verbose 2>&1 | FileCheck --check-prefix=LOG %s +# RUN: llvm-readobj --coff-imports %t.main.exe | FileCheck %s + +#--- lib.s +.text +.global _func1 +_func1: + ret +.global _func2 +_func2: + ret +.global _func3 +_func3: + ret +.data +.global _variable +_variable: + .int 42 + +#--- lib.def +EXPORTS +func1 +func2 +func3 +variable + +#--- main.s +.text +.global _mainCRTStartup +_mainCRTStartup: + call _func2 + movl .refptr._variable, %eax + movl (%eax), %eax + ret + +.section .rdata$.refptr._variable,"dr",discard,.refptr._variable +.globl .refptr._variable +.refptr._variable: + .long _variable + +# CHECK: Import { +# CHECK-NEXT: Name: link-dll-i386.s.tmp.lib.dll +# CHECK-NEXT: ImportLookupTableRVA: +# CHECK-NEXT: ImportAddressTableRVA +# CHECK-NEXT: Symbol: func2 +# CHECK-NEXT: Symbol: variable +# CHECK-NEXT: } + +# LOG: Automatically importing _variable from link-dll-i386.s.tmp.lib.dll diff --git a/lld/test/COFF/link-dll.s b/lld/test/COFF/link-dll.s new file mode 100644 index 0000000000000..997c5b02c7aba --- /dev/null +++ b/lld/test/COFF/link-dll.s @@ -0,0 +1,66 @@ +# REQUIRES: x86 + +## Test creating a DLL and linking against the DLL without using an import +## library. + +## Explicitly creating an import library but naming it differently than the +## DLL, to avoid any risk of implicitly referencing it instead of the DLL +## itself. + +## Linking the executable with -opt:noref, to make sure that we don't +## pull in more import entries than what's needed, even if not running GC. + +# RUN: split-file %s %t.dir + +# RUN: llvm-mc -filetype=obj -triple=x86_64-windows-gnu %t.dir/lib.s -o %t.lib.o +# RUN: lld-link -noentry -dll -def:%t.dir/lib.def %t.lib.o -out:%t.lib.dll -implib:%t.implib.lib +# RUN: llvm-mc -filetype=obj -triple=x86_64-windows-gnu %t.dir/main.s -o %t.main.o +# RUN: lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref -verbose 2>&1 | FileCheck --check-prefix=LOG %s +# RUN: llvm-readobj --coff-imports %t.main.exe | FileCheck %s + +#--- lib.s +.text +.global func1 +func1: + ret +.global func2 +func2: + ret +.global func3 +func3: + ret +.data +.global variable +variable: + .int 42 + +#--- lib.def +EXPORTS +func1 +func2 +func3 +variable + +#--- main.s +.text +.global mainCRTStartup +mainCRTStartup: + call func2 + movq .refptr.variable(%rip), %rax + movl (%rax), %eax + ret + +.section .rdata$.refptr.variable,"dr",discard,.refptr.variable +.globl .refptr.variable +.refptr.variable: + .quad variable + +# CHECK: Import { +# CHECK-NEXT: Name: link-dll.s.tmp.lib.dll +# CHECK-NEXT: ImportLookupTableRVA: +# CHECK-NEXT: ImportAddressTableRVA +# CHECK-NEXT: Symbol: func2 +# CHECK-NEXT: Symbol: variable +# CHECK-NEXT: } + +# LOG: Automatically importing variable from link-dll.s.tmp.lib.dll From c09e5e50b13aa1f5a2eafc81097ffe8a5799e5b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 17 Jun 2021 15:57:20 +0300 Subject: [PATCH 496/619] [LLD] [MinGW] Allow linking to DLLs directly As the COFF linker is capable of linking directly against a DLL now (after D104530, as long as it is running in mingw mode), don't error out here but successfully load libraries specified with "-l" from DLLs if that's what ld.bfd would have matched. Differential Revision: https://reviews.llvm.org/D104531 --- lld/MinGW/Driver.cpp | 14 ++++---------- lld/test/MinGW/lib.test | 8 ++++---- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index 734f4092a666f..27cb508403f63 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -142,16 +142,10 @@ searchLibrary(StringRef name, ArrayRef searchPaths, bool bStatic) { if (!bStatic) { if (Optional s = findFile(dir, name + ".lib")) return *s; - if (Optional s = findFile(dir, "lib" + name + ".dll")) { - error("lld doesn't support linking directly against " + *s + - ", use an import library"); - return ""; - } - if (Optional s = findFile(dir, name + ".dll")) { - error("lld doesn't support linking directly against " + *s + - ", use an import library"); - return ""; - } + if (Optional s = findFile(dir, "lib" + name + ".dll")) + return *s; + if (Optional s = findFile(dir, name + ".dll")) + return *s; } } error("unable to find library -l" + name); diff --git a/lld/test/MinGW/lib.test b/lld/test/MinGW/lib.test index ddff5debab3ce..45dd79712213d 100644 --- a/lld/test/MinGW/lib.test +++ b/lld/test/MinGW/lib.test @@ -40,7 +40,7 @@ OTHERSTYLES-SAME: msvcstyle.lib RUN: echo > %t/lib/libnoimplib.dll RUN: echo > %t/lib/noprefix_noimplib.dll -RUN: not ld.lld -### -m i386pep -L%t/lib -lnoimplib 2>&1 | FileCheck -check-prefix=UNSUPPORTED-DLL1 %s -RUN: not ld.lld -### -m i386pep -L%t/lib -lnoprefix_noimplib 2>&1 | FileCheck -check-prefix=UNSUPPORTED-DLL2 %s -UNSUPPORTED-DLL1: lld doesn't support linking directly against {{.*}}libnoimplib.dll, use an import library -UNSUPPORTED-DLL2: lld doesn't support linking directly against {{.*}}noprefix_noimplib.dll, use an import library +RUN: ld.lld -### -m i386pep -L%t/lib -lnoimplib 2>&1 | FileCheck -check-prefix=DLL1 %s +RUN: ld.lld -### -m i386pep -L%t/lib -lnoprefix_noimplib 2>&1 | FileCheck -check-prefix=DLL2 %s +DLL1: libnoimplib.dll +DLL2: noprefix_noimplib.dll From ce211c505b82e5bbb68b936968d9b54608285416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 17 Jun 2021 21:51:37 +0300 Subject: [PATCH 497/619] [LLD] [COFF] Fix up missing stdcall decorations in MinGW mode If linking directly against a DLL without an import library, the DLL export symbols might not contain stdcall decorations. If we have an undefined symbol with decoration, and we happen to have a matching undecorated symbol (which either is lazy and can be loaded, or already defined), then alias it against that instead. This matches what's done in reverse, when we have a def file declaring to export a symbol without decoration, but we only have a defined decorated symbol. In that case we do a fuzzy match (SymbolTable::findMangle). This case is more straightforward; if we have a decorated undefined symbol, just strip the decoration and look for the corresponding undecorated symbol name. Add warnings and options for either silencing the warning or disabling the whole feature, corresponding to how ld.bfd does it. (This feature works for any symbol decoration mismatch, not only when linking against a DLL directly; ld.bfd also tolerates it anywhere, and also fixes up mismatches in the other direction, like SymbolTable::findMangle, for any symbol, not only exports. But in practice, at least for lld, it would primarily end up used for linking against DLLs.) Differential Revision: https://reviews.llvm.org/D104532 --- lld/COFF/Config.h | 2 + lld/COFF/Driver.cpp | 15 ++++-- lld/COFF/InputFiles.cpp | 6 --- lld/COFF/Options.td | 1 + lld/COFF/SymbolTable.cpp | 61 ++++++++++++++++++---- lld/COFF/SymbolTable.h | 6 ++- lld/MinGW/Driver.cpp | 5 ++ lld/MinGW/Options.td | 4 ++ lld/test/COFF/link-dll-stdcall.s | 88 ++++++++++++++++++++++++++++++++ lld/test/MinGW/driver.test | 9 ++++ 10 files changed, 176 insertions(+), 21 deletions(-) create mode 100644 lld/test/COFF/link-dll-stdcall.s diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 71ba27e19f069..df883b779ee4a 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -263,6 +263,7 @@ struct Configuration { bool warnLocallyDefinedImported = true; bool warnDebugInfoUnusable = true; bool warnLongSectionNames = true; + bool warnStdcallFixup = true; bool incremental = true; bool integrityCheck = false; bool killAt = false; @@ -273,6 +274,7 @@ struct Configuration { bool thinLTOIndexOnly; bool autoImport = false; bool pseudoRelocs = false; + bool stdcallFixup = false; }; extern Configuration *config; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 937f605590bc8..6ebd80f741808 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1773,6 +1773,9 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { OPT_runtime_pseudo_reloc, OPT_runtime_pseudo_reloc_no, config->mingw); config->callGraphProfileSort = args.hasFlag( OPT_call_graph_profile_sort, OPT_call_graph_profile_sort_no, true); + config->stdcallFixup = + args.hasFlag(OPT_stdcall_fixup, OPT_stdcall_fixup_no, config->mingw); + config->warnStdcallFixup = !args.hasArg(OPT_stdcall_fixup); // Don't warn about long section names, such as .debug_info, for mingw or // when -debug:dwarf is requested. @@ -2106,10 +2109,10 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (!wrapped.empty()) while (run()); - if (config->autoImport) { + if (config->autoImport || config->stdcallFixup) { // MinGW specific. // Load any further object files that might be needed for doing automatic - // imports. + // imports, and do stdcall fixups. // // For cases with no automatically imported symbols, this iterates once // over the symbol table and doesn't do anything. @@ -2121,7 +2124,13 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // normal object file as well (although that won't be used for the // actual autoimport later on). If this pass adds new undefined references, // we won't iterate further to resolve them. - symtab->loadMinGWAutomaticImports(); + // + // If stdcall fixups only are needed for loading import entries from + // a DLL without import library, this also just needs running once. + // If it ends up pulling in more object files from static libraries, + // (and maybe doing more stdcall fixups along the way), this would need + // to loop these two calls. + symtab->loadMinGWSymbols(); run(); } diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 7488ed947698b..ef37e203d7edf 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -957,12 +957,6 @@ Optional ObjFile::getDILineInfo(uint32_t offset, return dwarf->getDILineInfo(offset, sectionIndex); } -static StringRef ltrim1(StringRef s, const char *chars) { - if (!s.empty() && strchr(chars, s[0])) - return s.substr(1); - return s; -} - void ImportFile::parse() { const char *buf = mb.getBufferStart(); const auto *hdr = reinterpret_cast(buf); diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index 33d560902f78d..2ce145520ea89 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -221,6 +221,7 @@ def rsp_quoting : Joined<["--"], "rsp-quoting=">, HelpText<"Quoting style for response files, 'windows' (default) or 'posix'">; def start_lib : F<"start-lib">, HelpText<"Start group of objects treated as if they were in a library">; +defm stdcall_fixup : B_priv<"stdcall-fixup">; def thinlto_emit_imports_files : F<"thinlto-emit-imports-files">, HelpText<"Emit .imports files with -thinlto-index-only">; diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 3e0741839757c..536f343507243 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -28,6 +28,12 @@ using namespace llvm; namespace lld { namespace coff { +StringRef ltrim1(StringRef s, const char *chars) { + if (!s.empty() && strchr(chars, s[0])) + return s.substr(1); + return s; +} + static Timer ltoTimer("LTO", Timer::root()); SymbolTable *symtab; @@ -249,7 +255,7 @@ static void reportUndefinedSymbol(const UndefinedDiag &undefDiag) { errorOrWarn(os.str()); } -void SymbolTable::loadMinGWAutomaticImports() { +void SymbolTable::loadMinGWSymbols() { for (auto &i : symMap) { Symbol *sym = i.second; auto *undef = dyn_cast(sym); @@ -260,17 +266,50 @@ void SymbolTable::loadMinGWAutomaticImports() { StringRef name = undef->getName(); - if (name.startswith("__imp_")) - continue; - // If we have an undefined symbol, but we have a lazy symbol we could - // load, load it. - Symbol *l = find(("__imp_" + name).str()); - if (!l || l->pendingArchiveLoad || !l->isLazy()) - continue; + if (config->machine == I386 && config->stdcallFixup) { + // Check if we can resolve an undefined decorated symbol by finding + // the indended target as an undecorated symbol (only with a leading + // underscore). + StringRef origName = name; + StringRef baseName = name; + // Trim down stdcall/fastcall/vectorcall symbols to the base name. + baseName = ltrim1(baseName, "_@"); + baseName = baseName.substr(0, baseName.find('@')); + // Add a leading underscore, as it would be in cdecl form. + std::string newName = ("_" + baseName).str(); + Symbol *l; + if (newName != origName && (l = find(newName)) != nullptr) { + // If we found a symbol and it is lazy; load it. + if (l->isLazy() && !l->pendingArchiveLoad) { + log("Loading lazy " + l->getName() + " from " + + l->getFile()->getName() + " for stdcall fixup"); + forceLazy(l); + } + // If it's lazy or already defined, hook it up as weak alias. + if (l->isLazy() || isa(l)) { + if (config->warnStdcallFixup) + warn("Resolving " + origName + " by linking to " + newName); + else + log("Resolving " + origName + " by linking to " + newName); + undef->weakAlias = l; + continue; + } + } + } + + if (config->autoImport) { + if (name.startswith("__imp_")) + continue; + // If we have an undefined symbol, but we have a lazy symbol we could + // load, load it. + Symbol *l = find(("__imp_" + name).str()); + if (!l || l->pendingArchiveLoad || !l->isLazy()) + continue; - log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() + - " for automatic import"); - forceLazy(l); + log("Loading lazy " + l->getName() + " from " + l->getFile()->getName() + + " for automatic import"); + forceLazy(l); + } } } diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 2d3ec65eda236..e88002c883101 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -57,7 +57,9 @@ class SymbolTable { // symbols and warn about imported local symbols. void resolveRemainingUndefines(); - void loadMinGWAutomaticImports(); + // Load lazy objects that are needed for MinGW automatic import and for + // doing stdcall fixups. + void loadMinGWSymbols(); bool handleMinGWAutomaticImport(Symbol *sym, StringRef name); // Returns a list of chunks of selected symbols. @@ -135,6 +137,8 @@ extern SymbolTable *symtab; std::vector getSymbolLocations(ObjFile *file, uint32_t symIndex); +StringRef ltrim1(StringRef s, const char *chars); + } // namespace coff } // namespace lld diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index 27cb508403f63..4a3a9ef9be030 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -289,6 +289,11 @@ bool mingw::link(ArrayRef argsArr, bool canExitEarly, else add("-WX:no"); + if (args.hasFlag(OPT_enable_stdcall_fixup, OPT_disable_stdcall_fixup, false)) + add("-stdcall-fixup"); + else if (args.hasArg(OPT_disable_stdcall_fixup)) + add("-stdcall-fixup:no"); + if (args.hasArg(OPT_shared)) add("-dll"); if (args.hasArg(OPT_verbose)) diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index e27c1365ab1f3..e1a505240cb82 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -35,11 +35,15 @@ def disable_auto_import: F<"disable-auto-import">, HelpText<"Don't automatically import data symbols from other DLLs without dllimport">; def disable_runtime_pseudo_reloc: F<"disable-runtime-pseudo-reloc">, HelpText<"Don't do automatic imports that require runtime fixups">; +def disable_stdcall_fixup: F<"disable-stdcall-fixup">, + HelpText<"Don't resolve stdcall/fastcall/vectorcall to undecorated symbols">; defm dynamicbase: B<"dynamicbase", "Enable ASLR", "Disable ASLR">; def enable_auto_import: F<"enable-auto-import">, HelpText<"Automatically import data symbols from other DLLs where needed">; def enable_runtime_pseudo_reloc: F<"enable-runtime-pseudo-reloc">, HelpText<"Allow automatic imports that require runtime fixups">; +def enable_stdcall_fixup: F<"enable-stdcall-fixup">, + HelpText<"Resolve stdcall/fastcall/vectorcall to undecorated symbols without warnings">; defm entry: Eq<"entry", "Name of entry point symbol">, MetaVarName<"">; def exclude_all_symbols: F<"exclude-all-symbols">, HelpText<"Don't automatically export any symbols">; diff --git a/lld/test/COFF/link-dll-stdcall.s b/lld/test/COFF/link-dll-stdcall.s new file mode 100644 index 0000000000000..d9a6dbb342240 --- /dev/null +++ b/lld/test/COFF/link-dll-stdcall.s @@ -0,0 +1,88 @@ +# REQUIRES: x86 + +## Test creating a DLL and linking against the DLL without using an import +## library. + +## Test on i386 with stdcall/fastcall/vectorcall decorated symbols. + +## Check that we normally warn about these fixups. If -stdcall-fixup:no +## (--disable-stdcall-fixup on the MinGW linker level) is passed, we don't +## do these fixups. If we -stdcall-fixup (--enable-stdcall-fixup on the MinGW +## linker level) is passed, we don't warn about it at all. + +# RUN: split-file %s %t.dir + +# RUN: llvm-mc -filetype=obj -triple=i386-windows-gnu %t.dir/lib.s -o %t.lib.o +# RUN: lld-link -safeseh:no -noentry -dll -def:%t.dir/lib.def %t.lib.o -out:%t.lib.dll -implib:%t.implib.lib +# RUN: llvm-mc -filetype=obj -triple=i386-windows-gnu %t.dir/main.s -o %t.main.o +# RUN: lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref 2>&1 | FileCheck --check-prefix=LOG %s +# RUN: llvm-readobj --coff-imports %t.main.exe | FileCheck %s +# RUN: not lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref -stdcall-fixup:no 2>&1 | FileCheck --check-prefix=ERROR %s +# RUN: lld-link -lldmingw %t.main.o -out:%t.main.exe %t.lib.dll -opt:noref -stdcall-fixup 2>&1 | count 0 + +#--- lib.s + .text + .globl _stdcall@8 + .globl @fastcall@8 + .globl vectorcall@@8 + .globl __underscored +_stdcall@8: + movl 8(%esp), %eax + addl 4(%esp), %eax + retl $8 +@fastcall@8: + movl 8(%esp), %eax + addl 4(%esp), %eax + retl $8 +vectorcall@@8: + movl 8(%esp), %eax + addl 4(%esp), %eax + retl $8 +__underscored: + ret + +#--- lib.def +EXPORTS +stdcall +fastcall +vectorcall +_underscored + +#--- main.s +.text +.global _mainCRTStartup +_mainCRTStartup: + pushl $2 + pushl $1 + calll _stdcall@8 + movl $1, %ecx + movl $2, %edx + calll @fastcall@8 + movl $1, %ecx + movl $2, %edx + calll vectorcall@@8 + pushl $2 + pushl $1 + calll __underscored + addl $8, %esp + xorl %eax, %eax + popl %ebp + retl + +# CHECK: Import { +# CHECK-NEXT: Name: link-dll-stdcall.s.tmp.lib.dll +# CHECK-NEXT: ImportLookupTableRVA: +# CHECK-NEXT: ImportAddressTableRVA +# CHECK-NEXT: Symbol: _underscored +# CHECK-NEXT: Symbol: fastcall +# CHECK-NEXT: Symbol: stdcall +# CHECK-NEXT: Symbol: vectorcall +# CHECK-NEXT: } + +# LOG-DAG: Resolving vectorcall@@8 by linking to _vectorcall +# LOG-DAG: Resolving @fastcall@8 by linking to _fastcall +# LOG-DAG: Resolving _stdcall@8 by linking to _stdcall + +# ERROR-DAG: undefined symbol: _stdcall@8 +# ERROR-DAG: undefined symbol: @fastcall@8 +# ERROR-DAG: undefined symbol: vectorcall@@8 diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index cb164df6ecce1..ab5ca4c5c791c 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -303,3 +303,12 @@ RUN: ld.lld -### -m i386pep foo.o 2>&1 | FileCheck -check-prefix NO-FATAL_WARNIN RUN: ld.lld -### -m i386pep foo.o -no-fatal-warnings 2>&1 | FileCheck -check-prefix NO-FATAL_WARNINGS %s RUN: ld.lld -### -m i386pep foo.o --no-fatal-warnings 2>&1 | FileCheck -check-prefix NO-FATAL_WARNINGS %s NO-FATAL_WARNINGS: -WX:no + +RUN: ld.lld -### -m i386pep foo.o 2>&1 | FileCheck -check-prefix NO-FIXUP %s +NO-FIXUP-NOT: -stdcall-fixup + +RUN: ld.lld -### -m i386pep foo.o --enable-stdcall-fixup --disable-stdcall-fixup 2>&1 | FileCheck -check-prefix DISABLE-FIXUP %s +DISABLE-FIXUP: -stdcall-fixup:no + +RUN: ld.lld -### -m i386pep foo.o --disable-stdcall-fixup --enable-stdcall-fixup 2>&1 | FileCheck -check-prefix ENABLE-FIXUP %s +ENABLE-FIXUP: -stdcall-fixup{{ }} From 0c53f602d5a9d7207abb13e463f68e9d092f47a7 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Jul 2021 08:55:34 +0200 Subject: [PATCH 498/619] [clangd] Add some more missing include dirs for completeness --- clang-tools-extra/clangd/benchmarks/CMakeLists.txt | 3 ++- clang-tools-extra/clangd/indexer/CMakeLists.txt | 1 + clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/benchmarks/CMakeLists.txt b/clang-tools-extra/clangd/benchmarks/CMakeLists.txt index b62ffd7a1ad16..7a17637b6c377 100644 --- a/clang-tools-extra/clangd/benchmarks/CMakeLists.txt +++ b/clang-tools-extra/clangd/benchmarks/CMakeLists.txt @@ -1,4 +1,5 @@ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. + ${CMAKE_CURRENT_BINARY_DIR}/..) add_subdirectory(CompletionModel) diff --git a/clang-tools-extra/clangd/indexer/CMakeLists.txt b/clang-tools-extra/clangd/indexer/CMakeLists.txt index ff110693c7066..f6389654b3628 100644 --- a/clang-tools-extra/clangd/indexer/CMakeLists.txt +++ b/clang-tools-extra/clangd/indexer/CMakeLists.txt @@ -1,4 +1,5 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/../) set(LLVM_LINK_COMPONENTS Support diff --git a/clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt b/clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt index 21a1667b52d37..372528d1e82d3 100644 --- a/clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt @@ -4,8 +4,11 @@ set(LLVM_LINK_COMPONENTS get_filename_component(CLANGD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../clangd REALPATH) +get_filename_component(CLANGD_BINARY_DIR + ${CMAKE_CURRENT_SOURCE_DIR}/../../clangd REALPATH) include_directories( ${CLANGD_SOURCE_DIR} + ${CLANGD_BINARY_DIR} ) add_custom_target(ClangdXpcUnitTests) From 3b95400f78a9824172629123580c0a0df36cbc70 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Fri, 2 Jul 2021 06:45:34 +0000 Subject: [PATCH 499/619] [mlir][linalg][python] Add max operation in OpDSL Add the max operation to the OpDSL and introduce a max pooling operation to test the implementation. As MLIR has no builtin max operation, the max function is lowered to a compare and select pair. Differential Revision: https://reviews.llvm.org/D105203 --- .../Linalg/IR/LinalgNamedStructuredOps.yaml | 72 ++++++++++++++++++- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 15 ++++ .../dialects/linalg/opdsl/lang/emitter.py | 12 ++++ .../linalg/opdsl/ops/core_named_ops.py | 18 +++++ .../generalize-named-polymorphic-ops.mlir | 30 ++++++++ .../linalg/opdsl/emit_structured_generic.py | 26 +++++-- mlir/test/python/dialects/linalg/opsrun.py | 20 +++--- 7 files changed, 178 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index a8baf23bbfaab..39045a212ce11 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -1,4 +1,3 @@ - --- !LinalgOpConfig metadata: !LinalgOpMetadata name: matmul @@ -594,6 +593,77 @@ structured_op: !LinalgStructuredOpConfig - !ScalarExpression scalar_arg: I --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_nhwc_max_poly + cpp_class_name: PoolingNhwcMaxPolyOp + doc: |- + Performs max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s2, s3)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s4, s5)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s6, s7, s3)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s8, s9)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s10, s11)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1 * s8 + d3 * s10, d2 * s9 + d4 * s11, d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d3, d4)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1, d2, d5)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + - reduction + - parallel + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: max + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: fill_rng_2d cpp_class_name: FillRng2DOp diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index d0c69b4148345..9b729b9db5d10 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -274,6 +274,21 @@ class RegionBuilderHelper { llvm_unreachable("unsupported non numeric type"); } + Value applyfn__max(Value lhs, Value rhs) { + OpBuilder builder = getBuilder(); + if (isFloatingPoint(lhs)) { + Value condition = + builder.create(lhs.getLoc(), CmpFPredicate::OGT, lhs, rhs); + return builder.create(lhs.getLoc(), condition, lhs, rhs); + } + if (isInteger(lhs)) { + Value condition = + builder.create(lhs.getLoc(), CmpIPredicate::sgt, lhs, rhs); + return builder.create(lhs.getLoc(), condition, lhs, rhs); + } + llvm_unreachable("unsupported non numeric type"); + } + void yieldOutputs(ValueRange values) { assert(!values.empty() && "linalg ops must yield outputs"); if (values.empty()) diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py index f6fb0cc7d0d0e..9489dec522716 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py @@ -307,6 +307,18 @@ def _eval_mul(self, lhs: Value, rhs: Value) -> Value: return std.MulIOp(lhs.type, lhs, rhs).result raise NotImplementedError("Unsupported 'mul' operand: {lhs}") + def _eval_max(self, lhs: Value, rhs: Value) -> Value: + i1 = IntegerType.get_signless(1) + if _is_floating_point_type(lhs.type): + ogt_attr = IntegerAttr.get(IntegerType.get_signless(64), 2) + cond = std.CmpFOp(i1, ogt_attr, lhs, rhs).result + return std.SelectOp(lhs.type, cond, lhs, rhs).result + if _is_integer_type(lhs.type) or _is_index_type(lhs.type): + sgt_attr = IntegerAttr.get(IntegerType.get_signless(64), 4) + cond = std.CmpIOp(i1, sgt_attr, lhs, rhs).result + return std.SelectOp(lhs.type, cond, lhs, rhs).result + raise NotImplementedError("Unsupported 'max' operand: {lhs}") + def _infer_structured_outs(op_config: LinalgStructuredOpConfig, in_arg_defs: Sequence[OperandDefConfig], diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index 095d94956f5b7..04c950e0a44db 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -148,6 +148,24 @@ def pooling_nhwc_sum_poly( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) +@linalg_structured_op +def pooling_nhwc_max_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + """Performs max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) + O[D.n, D.oh, D.ow, D.c] = ReduceFn.max(D.kh, D.kw)( + cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.c])) + + @linalg_structured_op def fill_rng_2d( min=ScalarDef(F64), diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir index 723859c913c04..4a1cb8dbcfa58 100644 --- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir @@ -60,6 +60,36 @@ func @generalize_depthwise_conv_2d_input_nhwc_filter_hwc_poly_i32(%input : tenso // ----- +func @generalize_pooling_nhwc_max_poly_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { + %0 = linalg.pooling_nhwc_max_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + return %0: tensor<1x2x4x1xf32> +} + +// CHECK-LABEL: @generalize_pooling_nhwc_max_poly_f32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32) +// CHECK-NEXT: %[[COND:.+]] = cmpf ogt, %[[OUT_ARG]], %[[IN_ARG]] : f32 +// CHECK-NEXT: %[[MAX:.+]] = select %[[COND]], %[[OUT_ARG]], %[[IN_ARG]] : f32 +// CHECK-NEXT: linalg.yield %[[MAX]] : f32 +// CHECK-NEXT: -> tensor<1x2x4x1xf32> + +// ----- + +func @generalize_pooling_nhwc_max_poly_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { + %0 = linalg.pooling_nhwc_max_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + return %0: tensor<1x2x4x1xi32> +} + +// CHECK-LABEL: @generalize_pooling_nhwc_max_poly_i32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: i32, %[[SHAPE_ARG:.+]]: i32, %[[OUT_ARG:.+]]: i32) +// CHECK-NEXT: %[[COND:.+]] = cmpi sgt, %[[OUT_ARG]], %[[IN_ARG]] : i32 +// CHECK-NEXT: %[[MAX:.+]] = select %[[COND]], %[[OUT_ARG]], %[[IN_ARG]] : i32 +// CHECK-NEXT: linalg.yield %[[MAX]] : i32 +// CHECK-NEXT: -> tensor<1x2x4x1xi32> + +// ----- + func @generalize_pooling_nhwc_sum_poly_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_sum_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py index f7db532dced5c..12f6c560cfecc 100644 --- a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py +++ b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py @@ -50,8 +50,9 @@ def pooling_poly( strides=AttributeDef(S.SH, S.SW), dilations=AttributeDef(S.DH, S.DW)): domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) - O[D.n, D.oh, D.ow, D.c] += cast( - U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) + O[D.n, D.oh, D.ow, D.c] = ReduceFn.max(D.kh, D.kw)( + cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.c])) @linalg_structured_op @@ -221,8 +222,9 @@ def test_f32i32_conv(input, filter, init_result): # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: i32) # CHECK-NEXT: %[[IN_CAST:.+]] = fptosi %[[IN:.+]] : f32 to i32 - # CHECK-NEXT: %[[SUM:.+]] = addi %[[OUT]], %[[IN_CAST]] : i32 - # CHECK-NEXT: linalg.yield %[[SUM]] : i32 + # CHECK-NEXT: %[[COND:.+]] = cmpi sgt, %[[OUT]], %[[IN_CAST:.+]] : i32 + # CHECK-NEXT: %[[MAX:.+]] = select %[[COND]], %[[OUT]], %[[IN_CAST:.+]] : i32 + # CHECK-NEXT: linalg.yield %[[MAX]] : i32 # CHECK-NEXT: -> tensor<2x4xi32> @builtin.FuncOp.from_py_func( RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), @@ -231,6 +233,22 @@ def test_f32i32_pooling(input, shape, init_result): return pooling_poly( input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + # CHECK-LABEL: @test_f32f32_pooling + # CHECK: linalg.generic + # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$POOL_MAP_K]], #[[$CONV_MAP_O]]] + # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] + # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: f32) + # CHECK-NEXT: %[[COND:.+]] = cmpf ogt, %[[OUT]], %[[IN:.+]] : f32 + # CHECK-NEXT: %[[MAX:.+]] = select %[[COND]], %[[OUT]], %[[IN:.+]] : f32 + # CHECK-NEXT: linalg.yield %[[MAX]] : f32 + # CHECK-NEXT: -> tensor<2x4xf32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), + RankedTensorType.get((2, 4), f32)) + def test_f32f32_pooling(input, shape, init_result): + return pooling_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + # CHECK-LABEL: @test_i32_fill_rng # CHECK: ^{{.*}}(%[[MIN:.+]]: f64, %[[MAX:.+]]: f64, %[[SEED:.+]]: i32, %{{.*}} # CHECK-DAG: %[[IDX0:.+]] = linalg.index 0 : index diff --git a/mlir/test/python/dialects/linalg/opsrun.py b/mlir/test/python/dialects/linalg/opsrun.py index 08b13a5352984..c6d26d1c6b858 100644 --- a/mlir/test/python/dialects/linalg/opsrun.py +++ b/mlir/test/python/dialects/linalg/opsrun.py @@ -85,6 +85,7 @@ def log(*args): pooling_boiler = """ func @main() -> i32 attributes {llvm.emit_c_interface} { %v0 = constant 0 : i32 + %v42 = constant 42.0 : f64 %v1 = constant 1.0 : f64 %input = memref.alloc() : memref<1x4x16x1xf64> @@ -94,10 +95,12 @@ def log(*args): linalg.fill(%v1, %shape) : f64, memref<2x2xf64> linalg.fill(%v0, %output) : i32, memref<1x2x4x1xi32> + %c0 = constant 0 : index + memref.store %v42, %input[%c0, %c0, %c0, %c0] : memref<1x4x16x1xf64> + call @pooling_on_buffers(%input, %shape, %output) : (memref<1x4x16x1xf64>, memref<2x2xf64>, memref<1x2x4x1xi32>) -> () - %c0 = constant 0 : index %0 = memref.load %output[%c0, %c0, %c0, %c0] : memref<1x2x4x1xi32> // TODO: FFI-based solution to allow testing and printing with python code. @@ -105,6 +108,7 @@ def log(*args): } """ + def transform(module, boilerplate): import mlir.conversions import mlir.dialects.linalg.passes @@ -308,12 +312,8 @@ def test_pooling_builtin(): MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64), MemRefType.get((1, 2, 4, 1), i32)) def pooling_on_buffers(input, shape, output): - linalg.pooling_nhwc_sum_poly( - input, - shape, - outs=[output], - strides=[2, 4], - dilations=[1, 2]) + linalg.pooling_nhwc_max_poly( + input, shape, outs=[output], strides=[2, 4], dilations=[1, 2]) execution_engine = ExecutionEngine(transform(module, pooling_boiler)) @@ -325,7 +325,7 @@ def pooling_on_buffers(input, shape, output): execution_engine.invoke("main", res) log("RESULT: ", res[0]) - # CHECK: RESULT: 4 + # CHECK: RESULT: 42 test_pooling_builtin() @@ -342,7 +342,7 @@ def test_pooling_generic(): MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64), MemRefType.get((1, 2, 4, 1), i32)) def pooling_on_buffers(input, shape, output): - linalg.pooling_nhwc_sum_poly( + linalg.pooling_nhwc_max_poly( input, shape, outs=[output], @@ -360,7 +360,7 @@ def pooling_on_buffers(input, shape, output): execution_engine.invoke("main", res) log("RESULT: ", res[0]) - # CHECK: RESULT: 4 + # CHECK: RESULT: 42 test_pooling_generic() From 6944f7da25517cc554bf7fd11205082e2c976a97 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Fri, 2 Jul 2021 07:20:09 +0000 Subject: [PATCH 500/619] [mlir][linalg][python] Introduce python integration test folder. Introduce an integration test folder in the test/python subfolder and move the opsrun.py test into the newly created folder. The test verifies named operations end-to-end using both the yaml and the python path. Differential Revision: https://reviews.llvm.org/D105276 --- mlir/test/python/{ => integration}/dialects/linalg/opsrun.py | 0 mlir/test/python/integration/lit.local.cfg | 2 ++ 2 files changed, 2 insertions(+) rename mlir/test/python/{ => integration}/dialects/linalg/opsrun.py (100%) create mode 100644 mlir/test/python/integration/lit.local.cfg diff --git a/mlir/test/python/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py similarity index 100% rename from mlir/test/python/dialects/linalg/opsrun.py rename to mlir/test/python/integration/dialects/linalg/opsrun.py diff --git a/mlir/test/python/integration/lit.local.cfg b/mlir/test/python/integration/lit.local.cfg new file mode 100644 index 0000000000000..f428e2534065c --- /dev/null +++ b/mlir/test/python/integration/lit.local.cfg @@ -0,0 +1,2 @@ +if config.mlir_include_integration_tests != 'ON': + config.unsupported = True From 4facbf213c51e4add2e8c19b08d5e58ad71c72de Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Thu, 10 Jun 2021 10:02:10 +0200 Subject: [PATCH 501/619] [WebAssembly] Implementation of global.get/set for reftypes in LLVM IR Reland of 31859f896. This change implements new DAG notes GLOBAL_GET/GLOBAL_SET, and lowering methods for load and stores of reference types from IR globals. Once the lowering creates the new nodes, tablegen pattern matches those and converts them to Wasm global.get/set. Differential Revision: https://reviews.llvm.org/D104797 --- clang/lib/Basic/Targets/WebAssembly.h | 4 +- clang/test/CodeGen/target-data.c | 4 +- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/include/llvm/CodeGen/ValueTypes.h | 10 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 4 + llvm/lib/CodeGen/MachineOperand.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 +- llvm/lib/CodeGen/ValueTypes.cpp | 4 + .../Utils/WebAssemblyUtilities.cpp | 25 +++++ .../WebAssembly/Utils/WebAssemblyUtilities.h | 6 ++ .../WebAssembly/WebAssemblyFastISel.cpp | 5 +- .../lib/Target/WebAssembly/WebAssemblyISD.def | 1 + .../WebAssembly/WebAssemblyISelDAGToDAG.cpp | 23 +++++ .../WebAssembly/WebAssemblyISelLowering.cpp | 98 ++++++++++++++++++- .../WebAssembly/WebAssemblyISelLowering.h | 30 ++++++ .../WebAssembly/WebAssemblyInstrTable.td | 18 +++- .../WebAssembly/WebAssemblyMCInstLower.cpp | 4 + .../WebAssembly/WebAssemblyTargetMachine.cpp | 4 +- .../WebAssembly/externref-globalget.ll | 19 ++++ .../WebAssembly/externref-globalset.ll | 20 ++++ .../CodeGen/WebAssembly/externref-inttoptr.ll | 11 +++ .../CodeGen/WebAssembly/externref-ptrtoint.ll | 11 +++ .../CodeGen/WebAssembly/externref-undef.ll | 21 ++++ .../WebAssembly/externref-unsized-load.ll | 11 +++ .../WebAssembly/externref-unsized-store.ll | 11 +++ llvm/test/CodeGen/WebAssembly/funcref-call.ll | 23 +++++ .../CodeGen/WebAssembly/funcref-globalget.ll | 19 ++++ .../CodeGen/WebAssembly/funcref-globalset.ll | 20 ++++ 29 files changed, 400 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/externref-globalget.ll create mode 100644 llvm/test/CodeGen/WebAssembly/externref-globalset.ll create mode 100644 llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll create mode 100644 llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll create mode 100644 llvm/test/CodeGen/WebAssembly/externref-undef.ll create mode 100644 llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll create mode 100644 llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll create mode 100644 llvm/test/CodeGen/WebAssembly/funcref-call.ll create mode 100644 llvm/test/CodeGen/WebAssembly/funcref-globalget.ll create mode 100644 llvm/test/CodeGen/WebAssembly/funcref-globalset.ll diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index b29730c5d706b..ed590fe7e3338 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -147,7 +147,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly32TargetInfo explicit WebAssembly32TargetInfo(const llvm::Triple &T, const TargetOptions &Opts) : WebAssemblyTargetInfo(T, Opts) { - resetDataLayout("e-m:e-p:32:32-i64:64-n32:64-S128-ni:1"); + resetDataLayout("e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20"); } protected: @@ -166,7 +166,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly64TargetInfo SizeType = UnsignedLong; PtrDiffType = SignedLong; IntPtrType = SignedLong; - resetDataLayout("e-m:e-p:64:64-i64:64-n32:64-S128-ni:1"); + resetDataLayout("e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20"); } protected: diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 1d88984530e5b..1be01efd16515 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -108,11 +108,11 @@ // RUN: %clang_cc1 -triple wasm32-unknown-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=WEBASSEMBLY32 -// WEBASSEMBLY32: target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1" +// WEBASSEMBLY32: target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20" // RUN: %clang_cc1 -triple wasm64-unknown-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=WEBASSEMBLY64 -// WEBASSEMBLY64: target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1" +// WEBASSEMBLY64: target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20" // RUN: %clang_cc1 -triple lanai-unknown-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=LANAI diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 47d6ca43a5ac3..75894d15a9693 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -350,7 +350,7 @@ class TargetLoweringBase { /// Return the in-memory pointer type for the given address space, defaults to /// the pointer type from the data layout. FIXME: The default needs to be /// removed once all the code is updated. - MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { + virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); } diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h index e7346f7a75abc..7b17b98d5c555 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -120,6 +120,12 @@ namespace llvm { return changeExtendedTypeToInteger(); } + /// Test if the given EVT has zero size, this will fail if called on a + /// scalable type + bool isZeroSized() const { + return !isScalableVector() && getSizeInBits() == 0; + } + /// Test if the given EVT is simple (as opposed to being extended). bool isSimple() const { return V.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE; @@ -207,7 +213,9 @@ namespace llvm { } /// Return true if the bit size is a multiple of 8. - bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); } + bool isByteSized() const { + return !isZeroSized() && getSizeInBits().isKnownMultipleOf(8); + } /// Return true if the size is a power-of-two number of bytes. bool isRound() const { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index baf674cea5653..9d188ad86fd8c 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6465,6 +6465,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); unsigned BitWidth = LoadResultVT.getSizeInBits(); + // If the BitWidth is 0, do not try to optimize the type + if (BitWidth == 0) + return false; + APInt DemandBits(BitWidth, 0); APInt WidestAndBits(BitWidth, 0); diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index db973bda5e555..b8ba0453d24c7 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1180,7 +1180,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << "unknown-address"; } MachineOperand::printOperandOffset(OS, getOffset()); - if (getAlign() != getSize()) + if (getSize() > 0 && getAlign() != getSize()) OS << ", align " << getAlign().value(); if (getAlign() != getBaseAlign()) OS << ", basealign " << getBaseAlign().value(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5ea3de9d0db66..f00ec5e13d45e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23174,6 +23174,10 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { if (BasePtr.getBase().isUndef()) return false; + // Do not handle stores to opaque types + if (St->getMemoryVT().isZeroSized()) + return false; + // BaseIndexOffset assumes that offsets are fixed-size, which // is not valid for scalable vectors where the offsets are // scaled by `vscale`, so bail out early. diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index ebac779984ec5..3c5dd29036db0 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1710,7 +1710,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( // For example, the ABI alignment may change based on software platform while // this function should only be affected by hardware implementation. Type *Ty = VT.getTypeForEVT(Context); - if (Alignment >= DL.getABITypeAlign(Ty)) { + if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { // Assume that an access that meets the ABI-specified alignment is fast. if (Fast != nullptr) *Fast = true; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index ae0b945bdba8e..9daebfd9e63d6 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -198,6 +198,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); case MVT::x86amx: return Type::getX86_AMXTy(Context); + case MVT::externref: + return PointerType::get(StructType::create(Context), 10); + case MVT::funcref: + return PointerType::get(StructType::create(Context), 20); case MVT::v1i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1); case MVT::v2i1: diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp index 824d336651360..3da80f4fc8752 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp @@ -116,6 +116,31 @@ MCSymbolWasm *WebAssembly::getOrCreateFunctionTableSymbol( return Sym; } +MCSymbolWasm *WebAssembly::getOrCreateFuncrefCallTableSymbol( + MCContext &Ctx, const WebAssemblySubtarget *Subtarget) { + StringRef Name = "__funcref_call_table"; + MCSymbolWasm *Sym = cast_or_null(Ctx.lookupSymbol(Name)); + if (Sym) { + if (!Sym->isFunctionTable()) + Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table"); + } else { + Sym = cast(Ctx.getOrCreateSymbol(Name)); + + // Setting Weak ensure only one table is left after linking when multiple + // modules define the table. + Sym->setWeak(true); + + wasm::WasmLimits Limits = {0, 1, 1}; + wasm::WasmTableType TableType = {wasm::WASM_TYPE_FUNCREF, Limits}; + Sym->setType(wasm::WASM_SYMBOL_TYPE_TABLE); + Sym->setTableType(TableType); + } + // MVP object files can't have symtab entries for tables. + if (!(Subtarget && Subtarget->hasReferenceTypes())) + Sym->setOmitFromLinkingSection(); + return Sym; +} + // Find a catch instruction from an EH pad. MachineInstr *WebAssembly::findCatch(MachineBasicBlock *EHPad) { assert(EHPad->isEHPad()); diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h index 1ec1df5d0c3d1..673dc9521ced7 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h @@ -68,6 +68,12 @@ MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget); +/// Returns the __funcref_call_table, for use in funcref calls when lowered to +/// table.set + call_indirect. +MCSymbolWasm * +getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, + const WebAssemblySubtarget *Subtarget); + /// Find a catch instruction from an EH pad. Returns null if no catch /// instruction found or the catch is in an invalid location. MachineInstr *findCatch(MachineBasicBlock *EHPad); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index cb0cdf1d8f985..171d59ae4c6b8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -130,9 +130,12 @@ class WebAssemblyFastISel final : public FastISel { case MVT::i64: case MVT::f32: case MVT::f64: + return VT; case MVT::funcref: case MVT::externref: - return VT; + if (Subtarget->hasReferenceTypes()) + return VT; + break; case MVT::f16: return MVT::f32; case MVT::v16i8: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index 9e229450222ff..200df9d4d9ffe 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -48,3 +48,4 @@ HANDLE_NODETYPE(MEMORY_FILL) HANDLE_MEM_NODETYPE(LOAD_SPLAT) HANDLE_MEM_NODETYPE(GLOBAL_GET) HANDLE_MEM_NODETYPE(GLOBAL_SET) +HANDLE_MEM_NODETYPE(TABLE_SET) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index c3230cb9ca029..f4bae59132e68 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" +#include "WebAssemblyISelLowering.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -47,11 +48,32 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { return "WebAssembly Instruction Selection"; } + void checkForInvalidNodes(const Function &F) { + // This function will check for uses of ptrtoint on reference types and + // report a fatal error if these are found. + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + if (const PtrToIntInst *PTI = dyn_cast(&I)) { + const Value *V = PTI->getPointerOperand(); + if (WebAssemblyTargetLowering::isFuncrefType(V->getType()) || + WebAssemblyTargetLowering::isExternrefType(V->getType())) + report_fatal_error("ptrtoint not allowed on reference types"); + } else if (const IntToPtrInst *ITP = dyn_cast(&I)) { + if (WebAssemblyTargetLowering::isFuncrefType(ITP->getDestTy()) || + WebAssemblyTargetLowering::isExternrefType(ITP->getDestTy())) + report_fatal_error("inttoptr not allowed on reference types"); + } + } + } + } + bool runOnMachineFunction(MachineFunction &MF) override { LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n" "********** Function: " << MF.getName() << '\n'); + checkForInvalidNodes(MF.getFunction()); + Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); @@ -63,6 +85,7 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; + bool SelectExternRefAddr(const SDValue &Addr, const SDValue &Base); // Include the pieces autogenerated from the target description. #include "WebAssemblyGenDAGISel.inc" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 76427653770f3..2e2cb1b81c6c3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -13,6 +13,7 @@ #include "WebAssemblyISelLowering.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "Utils/WebAssemblyTypeUtilities.h" #include "Utils/WebAssemblyUtilities.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" @@ -66,6 +67,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); } + if (Subtarget->hasReferenceTypes()) { + addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass); + addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass); + } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -82,6 +87,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::STORE, T, Custom); } } + if (Subtarget->hasReferenceTypes()) { + for (auto T : {MVT::externref, MVT::funcref}) { + setOperationAction(ISD::LOAD, T, Custom); + setOperationAction(ISD::STORE, T, Custom); + } + } setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom); @@ -468,6 +479,16 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, bool IsIndirect = CallParams.getOperand(0).isReg(); bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS; + bool IsFuncrefCall = false; + if (IsIndirect) { + Register Reg = CallParams.getOperand(0).getReg(); + const MachineFunction *MF = BB->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterClass *TRC = MRI.getRegClass(Reg); + IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass); + assert(!IsFuncrefCall || Subtarget->hasReferenceTypes()); + } + unsigned CallOp; if (IsIndirect && IsRetCall) { CallOp = WebAssembly::RET_CALL_INDIRECT; @@ -511,8 +532,11 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, // Placeholder for the type index. MIB.addImm(0); // The table into which this call_indirect indexes. - MCSymbolWasm *Table = - WebAssembly::getOrCreateFunctionTableSymbol(MF.getContext(), Subtarget); + MCSymbolWasm *Table = IsFuncrefCall + ? WebAssembly::getOrCreateFuncrefCallTableSymbol( + MF.getContext(), Subtarget) + : WebAssembly::getOrCreateFunctionTableSymbol( + MF.getContext(), Subtarget); if (Subtarget->hasReferenceTypes()) { MIB.addSym(Table); } else { @@ -531,6 +555,39 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, CallParams.eraseFromParent(); CallResults.eraseFromParent(); + // If this is a funcref call, to avoid hidden GC roots, we need to clear the + // table slot with ref.null upon call_indirect return. + // + // This generates the following code, which comes right after a call_indirect + // of a funcref: + // + // i32.const 0 + // ref.null func + // table.set __funcref_call_table + if (IsIndirect && IsFuncrefCall) { + MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol( + MF.getContext(), Subtarget); + Register RegZero = + MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); + MachineInstr *Const0 = + BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0); + BB->insertAfter(MIB.getInstr()->getIterator(), Const0); + + Register RegFuncref = + MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass); + MachineInstr *RefNull = + BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref) + .addImm(static_cast(WebAssembly::HeapType::Funcref)); + BB->insertAfter(Const0->getIterator(), RefNull); + + MachineInstr *TableSet = + BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF)) + .addSym(Table) + .addReg(RegZero) + .addReg(RegFuncref); + BB->insertAfter(RefNull->getIterator(), TableSet); + } + return BB; } @@ -1054,6 +1111,33 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, InTys.push_back(In.VT); } + // Lastly, if this is a call to a funcref we need to add an instruction + // table.set to the chain and transform the call. + if (CLI.CB && isFuncrefType(CLI.CB->getCalledOperand()->getType())) { + // In the absence of function references proposal where a funcref call is + // lowered to call_ref, using reference types we generate a table.set to set + // the funcref to a special table used solely for this purpose, followed by + // a call_indirect. Here we just generate the table set, and return the + // SDValue of the table.set so that LowerCall can finalize the lowering by + // generating the call_indirect. + SDValue Chain = Ops[0]; + + MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol( + MF.getContext(), Subtarget); + SDValue Sym = DAG.getMCSymbol(Table, PtrVT); + SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32); + SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee}; + SDValue TableSet = DAG.getMemIntrinsicNode( + WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps, + MVT::funcref, + // Machine Mem Operand args + MachinePointerInfo(WasmAddressSpace::FUNCREF), + CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()), + MachineMemOperand::MOStore); + + Ops[0] = TableSet; // The new chain is the TableSet itself + } + if (CLI.IsTailCall) { // ret_calls do not return values to the current frame SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -1285,6 +1369,16 @@ static Optional IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) { return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex()); } +bool WebAssemblyTargetLowering::isFuncrefType(const Type *Ty) { + return isa(Ty) && + Ty->getPointerAddressSpace() == WasmAddressSpace::FUNCREF; +} + +bool WebAssemblyTargetLowering::isExternrefType(const Type *Ty) { + return isa(Ty) && + Ty->getPointerAddressSpace() == WasmAddressSpace::EXTERNREF; +} + SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 632c039deac09..b2445b2dec04e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -45,6 +45,36 @@ class WebAssemblyTargetLowering final : public TargetLowering { WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI); + enum WasmAddressSpace : unsigned { + // WebAssembly uses the following address spaces: + // AS 0 : is the default address space for values in linear memory + DEFAULT = 0, + // AS 1 : is a non-integral address space for global variables + GLOBAL = 1, + // AS 10 : is a non-integral address space for externref values + EXTERNREF = 10, + // AS 20 : is a non-integral address space for funcref values + FUNCREF = 20, + }; + + MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { + if (AS == WasmAddressSpace::EXTERNREF) + return MVT::externref; + if (AS == WasmAddressSpace::FUNCREF) + return MVT::funcref; + return TargetLowering::getPointerTy(DL, AS); + } + MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override { + if (AS == WasmAddressSpace::EXTERNREF) + return MVT::externref; + if (AS == WasmAddressSpace::FUNCREF) + return MVT::funcref; + return TargetLowering::getPointerMemTy(DL, AS); + } + + static bool isFuncrefType(const Type *Ty); + static bool isExternrefType(const Type *Ty); + private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td index 8fa80ad40995f..2348bb165daf0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td @@ -11,15 +11,16 @@ /// Instructions that handle tables //===----------------------------------------------------------------------===// - multiclass TABLE { - defm TABLE_GET_#rt : I<(outs rt:$res), (ins table32_op:$table), + let mayLoad = 1 in + defm TABLE_GET_#rt : I<(outs rt:$res), (ins table32_op:$table, I32:$i), (outs), (ins table32_op:$table), [], - "table.get\t$res, $table", + "table.get\t$res, $table, $i", "table.get\t$table", 0x25>; + let mayStore = 1 in defm TABLE_SET_#rt : I<(outs), (ins table32_op:$table, I32:$i, rt:$val), (outs), (ins table32_op:$table), [], @@ -46,6 +47,17 @@ multiclass TABLE { defm "" : TABLE, Requires<[HasReferenceTypes]>; defm "" : TABLE, Requires<[HasReferenceTypes]>; +def wasm_table_set_t : SDTypeProfile<0, 3, []>; +def wasm_table_set : SDNode<"WebAssemblyISD::TABLE_SET", wasm_table_set_t, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def : Pat<(wasm_table_set i32:$table, i32:$idx, funcref:$r), + (TABLE_SET_FUNCREF i32:$table, i32:$idx, funcref:$r)>, + Requires<[HasReferenceTypes]>; +def : Pat<(wasm_table_set i32:$table, i32:$idx, externref:$r), + (TABLE_SET_EXTERNREF i32:$table, i32:$idx, externref:$r)>, + Requires<[HasReferenceTypes]>; + defm TABLE_SIZE : I<(outs I32:$sz), (ins table32_op:$table), (outs), (ins table32_op:$table), [], diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 93e5f2300386e..a266410a98fd0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -217,6 +217,10 @@ static wasm::ValType getType(const TargetRegisterClass *RC) { return wasm::ValType::F64; if (RC == &WebAssembly::V128RegClass) return wasm::ValType::V128; + if (RC == &WebAssembly::EXTERNREFRegClass) + return wasm::ValType::EXTERNREF; + if (RC == &WebAssembly::FUNCREFRegClass) + return wasm::ValType::FUNCREF; llvm_unreachable("Unexpected register class"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index c305da514ef39..35701e00da3db 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -121,8 +121,8 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine( Optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, TT.isArch64Bit() - ? "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1" - : "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1", + ? "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20" + : "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20", TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT), getEffectiveCodeModel(CM, CodeModel::Large), OL), TLOF(new WebAssemblyTargetObjectFile()) { diff --git a/llvm/test/CodeGen/WebAssembly/externref-globalget.ll b/llvm/test/CodeGen/WebAssembly/externref-globalget.ll new file mode 100644 index 0000000000000..b8303704227c1 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/externref-globalget.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s + +%extern = type opaque +%externref = type %extern addrspace(10)* ;; addrspace 10 is nonintegral + +@externref_global = local_unnamed_addr addrspace(1) global %externref undef + +define %externref @return_externref_global() { + ;; this generates a global.get of @externref_global + %ref = load %externref, %externref addrspace(1)* @externref_global + ret %externref %ref +} + +; CHECK-LABEL: return_externref_global: +; CHECK-NEXT: functype return_externref_global () -> (externref) +; CHECK-NEXT: global.get externref_global +; CHECK-NEXT: end_function + +; CHECK: .globl externref_global diff --git a/llvm/test/CodeGen/WebAssembly/externref-globalset.ll b/llvm/test/CodeGen/WebAssembly/externref-globalset.ll new file mode 100644 index 0000000000000..749c7daaf4abd --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/externref-globalset.ll @@ -0,0 +1,20 @@ +; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s + +%extern = type opaque +%externref = type %extern addrspace(10)* ;; addrspace 10 is nonintegral + +@externref_global = local_unnamed_addr addrspace(1) global %externref undef + +define void @set_externref_global(%externref %g) { + ;; this generates a global.set of @externref.global + store %externref %g, %externref addrspace(1)* @externref_global + ret void +} + +; CHECK-LABEL: set_externref_global: +; CHECK-NEXT: functype set_externref_global (externref) -> () +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: global.set externref_global +; CHECK-NEXT: end_function + +; CHECK: .globl externref_global diff --git a/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll b/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll new file mode 100644 index 0000000000000..cc106b5dee32e --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll @@ -0,0 +1,11 @@ +; RUN: not --crash llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + +%extern = type opaque +%externref = type %extern addrspace(10)* + +define %externref @int_to_externref(i32 %i) { + %ref = inttoptr i32 %i to %externref + ret %externref %ref +} + +; CHECK-ERROR: LLVM ERROR: inttoptr not allowed on reference types diff --git a/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll b/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll new file mode 100644 index 0000000000000..8aea1506a903e --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll @@ -0,0 +1,11 @@ +; RUN: not --crash llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + +%extern = type opaque +%externref = type %extern addrspace(10)* + +define i32 @externref_to_int(%externref %ref) { + %i = ptrtoint %externref %ref to i32 + ret i32 %i +} + +; CHECK-ERROR: LLVM ERROR: ptrtoint not allowed on reference types diff --git a/llvm/test/CodeGen/WebAssembly/externref-undef.ll b/llvm/test/CodeGen/WebAssembly/externref-undef.ll new file mode 100644 index 0000000000000..e5453420e825d --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/externref-undef.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s + +%extern = type opaque +%externref = type %extern addrspace(10)* ;; addrspace 10 is nonintegral + +@externref_global = local_unnamed_addr addrspace(1) global %externref undef + +define %extern @return_extern_undef() { + ; Returning a ref.null or an uninitialized externref would make + ; more sense if the return type would be %externref. However, in + ; this case this is an %extern value, which really is an opaque + ; type and should never really happen. + ret %extern undef +} + +; CHECK-LABEL: return_extern_undef: +; CHECK-NEXT: functype return_extern_undef () -> () +; CHECK-NEXT: end_function + +; CHECK: .globl externref_global + diff --git a/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll b/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll new file mode 100644 index 0000000000000..69aa53a9732c7 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll @@ -0,0 +1,11 @@ +; RUN: not llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + +%extern = type opaque +%externref = type %extern addrspace(10)* + +define void @load_extern(%externref %ref) { + %e = load %extern, %externref %ref + ret void +} + +; CHECK-ERROR: error: loading unsized types is not allowed diff --git a/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll b/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll new file mode 100644 index 0000000000000..9608873a78b95 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll @@ -0,0 +1,11 @@ +; RUN: not llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR + +%extern = type opaque +%externref = type %extern addrspace(10)* + +define void @store_extern(%externref %ref) { + store %extern undef, %externref %ref + ret void +} + +; CHECK-ERROR: error: storing unsized types is not allowed diff --git a/llvm/test/CodeGen/WebAssembly/funcref-call.ll b/llvm/test/CodeGen/WebAssembly/funcref-call.ll new file mode 100644 index 0000000000000..05fd7ad99fa05 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/funcref-call.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s + +%func = type void () +%funcref = type %func addrspace(20)* ;; addrspace 20 is nonintegral + +define void @call_funcref(%funcref %ref) { + call addrspace(20) void %ref() + ret void +} + +; CHECK-LABEL: call_funcref: +; CHECK-NEXT: functype call_funcref (funcref) -> () +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: table.set __funcref_call_table +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: call_indirect __funcref_call_table, () -> () +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: ref.null func +; CHECK-NEXT: table.set __funcref_call_table +; CHECK-NEXT: end_function + +; CHECK: .tabletype __funcref_call_table, funcref diff --git a/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll b/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll new file mode 100644 index 0000000000000..901404aafd717 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s + +%func = type opaque +%funcref = type %func addrspace(20)* ;; addrspace 20 is nonintegral + +@funcref_global = local_unnamed_addr addrspace(1) global %funcref undef + +define %funcref @return_funcref_global() { + ;; this generates a global.get of @funcref_global + %ref = load %funcref, %funcref addrspace(1)* @funcref_global + ret %funcref %ref +} + +; CHECK-LABEL: return_funcref_global: +; CHECK-NEXT: .functype return_funcref_global () -> (funcref) +; CHECK-NEXT: global.get funcref_global +; CHECK-NEXT: end_function + +; CHECK: .globl funcref_global diff --git a/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll b/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll new file mode 100644 index 0000000000000..18b0e02cf7390 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s + +%func = type opaque +%funcref = type %func addrspace(20)* ;; addrspace 20 is nonintegral + +@funcref_global = local_unnamed_addr addrspace(1) global %funcref undef + +define void @set_funcref_global(%funcref %g) { + ;; this generates a global.set of @funcref_global + store %funcref %g, %funcref addrspace(1)* @funcref_global + ret void +} + +; CHECK-LABEL: set_funcref_global: +; CHECK-NEXT: functype set_funcref_global (funcref) -> () +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: global.set funcref_global +; CHECK-NEXT: end_function + +; CHECK: .globl funcref_global From 26e1553a107f52667be879e99739a4153f8799d8 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Jul 2021 09:48:58 +0200 Subject: [PATCH 502/619] [clangd] CMake: express -Iclangd/ at top level and inherit For files directly under clangd/, -Iclang-tools-extra/clangd (and the equivalent for generated files) are not required, as CMake/the compiler puts these directories on the include path by default. However this means each subdirectory needs to include_directories(.. ${CMAKE_CURRENT_BINARY_DIR}/..) etc, and this proved annoying and error-prone to maintain and debug. Since include_directories is inherited by subdirectories, we just configure this explicitly at the top level instead. --- clang-tools-extra/clangd/CMakeLists.txt | 4 ++++ clang-tools-extra/clangd/benchmarks/CMakeLists.txt | 3 --- .../clangd/benchmarks/CompletionModel/CMakeLists.txt | 2 -- clang-tools-extra/clangd/fuzzer/CMakeLists.txt | 3 --- clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt | 3 --- clang-tools-extra/clangd/index/remote/CMakeLists.txt | 2 -- clang-tools-extra/clangd/indexer/CMakeLists.txt | 3 --- clang-tools-extra/clangd/support/CMakeLists.txt | 1 - clang-tools-extra/clangd/tool/CMakeLists.txt | 3 --- clang-tools-extra/clangd/unittests/CMakeLists.txt | 9 --------- clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt | 9 --------- clang-tools-extra/clangd/xpc/CMakeLists.txt | 5 ----- clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt | 4 ---- 13 files changed, 4 insertions(+), 47 deletions(-) diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index b983b71cc90f4..3c2b097e89fd1 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -1,3 +1,7 @@ +# This is a no-op for building files in this dir, but is inherited by subdirs. +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + add_subdirectory(support) # Configure the Features.inc file. diff --git a/clang-tools-extra/clangd/benchmarks/CMakeLists.txt b/clang-tools-extra/clangd/benchmarks/CMakeLists.txt index 7a17637b6c377..b1bd26f2e5599 100644 --- a/clang-tools-extra/clangd/benchmarks/CMakeLists.txt +++ b/clang-tools-extra/clangd/benchmarks/CMakeLists.txt @@ -1,6 +1,3 @@ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. - ${CMAKE_CURRENT_BINARY_DIR}/..) - add_subdirectory(CompletionModel) add_benchmark(IndexBenchmark IndexBenchmark.cpp) diff --git a/clang-tools-extra/clangd/benchmarks/CompletionModel/CMakeLists.txt b/clang-tools-extra/clangd/benchmarks/CompletionModel/CMakeLists.txt index 3998aa1225338..4c7cd779eb3e7 100644 --- a/clang-tools-extra/clangd/benchmarks/CompletionModel/CMakeLists.txt +++ b/clang-tools-extra/clangd/benchmarks/CompletionModel/CMakeLists.txt @@ -1,5 +1,3 @@ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) - add_benchmark(DecisionForestBenchmark DecisionForestBenchmark.cpp) target_link_libraries(DecisionForestBenchmark diff --git a/clang-tools-extra/clangd/fuzzer/CMakeLists.txt b/clang-tools-extra/clangd/fuzzer/CMakeLists.txt index 18cab4b41e1a0..5600a354decb3 100644 --- a/clang-tools-extra/clangd/fuzzer/CMakeLists.txt +++ b/clang-tools-extra/clangd/fuzzer/CMakeLists.txt @@ -1,6 +1,3 @@ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. - ${CMAKE_CURRENT_BINARY_DIR}/..) - set(LLVM_LINK_COMPONENTS FuzzMutate Support diff --git a/clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt b/clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt index 7b4b6e53a4ad0..4fe42cb8786f1 100644 --- a/clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/dex/dexp/CMakeLists.txt @@ -1,6 +1,3 @@ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../) -include_directories(${CMAKE_CURRENT_BINARY_DIR}/../../../) - set(LLVM_LINK_COMPONENTS LineEditor Support diff --git a/clang-tools-extra/clangd/index/remote/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/CMakeLists.txt index 51db6a7a141ee..5bfc241945437 100644 --- a/clang-tools-extra/clangd/index/remote/CMakeLists.txt +++ b/clang-tools-extra/clangd/index/remote/CMakeLists.txt @@ -13,8 +13,6 @@ if (CLANGD_ENABLE_REMOTE) MonitoringServiceProto ) include_directories(${CMAKE_CURRENT_BINARY_DIR}) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../) - include_directories(${CMAKE_CURRENT_BINARY_DIR}/../../) # FIXME(kirillbobyrev): target_compile_definitions is not working with # add_clang_library for some reason. Is there any way to make this diff --git a/clang-tools-extra/clangd/indexer/CMakeLists.txt b/clang-tools-extra/clangd/indexer/CMakeLists.txt index f6389654b3628..a9438008ea039 100644 --- a/clang-tools-extra/clangd/indexer/CMakeLists.txt +++ b/clang-tools-extra/clangd/indexer/CMakeLists.txt @@ -1,6 +1,3 @@ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) -include_directories(${CMAKE_CURRENT_BINARY_DIR}/../) - set(LLVM_LINK_COMPONENTS Support ) diff --git a/clang-tools-extra/clangd/support/CMakeLists.txt b/clang-tools-extra/clangd/support/CMakeLists.txt index fc7d7a28117b1..681505586ca9d 100644 --- a/clang-tools-extra/clangd/support/CMakeLists.txt +++ b/clang-tools-extra/clangd/support/CMakeLists.txt @@ -15,7 +15,6 @@ if(NOT HAVE_CXX_ATOMICS_WITHOUT_LIB OR NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB) list(APPEND CLANGD_ATOMIC_LIB "atomic") endif() -include_directories(..) add_clang_library(clangdSupport Cancellation.cpp Context.cpp diff --git a/clang-tools-extra/clangd/tool/CMakeLists.txt b/clang-tools-extra/clangd/tool/CMakeLists.txt index da9d2060f7009..5a1556b813b59 100644 --- a/clang-tools-extra/clangd/tool/CMakeLists.txt +++ b/clang-tools-extra/clangd/tool/CMakeLists.txt @@ -1,6 +1,3 @@ -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) -include_directories(${CMAKE_CURRENT_BINARY_DIR}/..) - add_clang_tool(clangd ClangdMain.cpp Check.cpp diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index 3a439b11e6322..2f5a754f882ae 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -4,15 +4,6 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP ) -get_filename_component(CLANGD_SOURCE_DIR - ${CMAKE_CURRENT_SOURCE_DIR}/../../clangd REALPATH) -get_filename_component(CLANGD_BINARY_DIR - ${CMAKE_CURRENT_BINARY_DIR}/../../clangd REALPATH) -include_directories( - ${CLANGD_SOURCE_DIR} - ${CLANGD_BINARY_DIR} - ) - if(CLANG_BUILT_STANDALONE) # LLVMTestingSupport library is needed for clangd tests. if (EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Testing/Support diff --git a/clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt b/clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt index 372528d1e82d3..ac04c3f4aed66 100644 --- a/clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/xpc/CMakeLists.txt @@ -2,15 +2,6 @@ set(LLVM_LINK_COMPONENTS support ) -get_filename_component(CLANGD_SOURCE_DIR - ${CMAKE_CURRENT_SOURCE_DIR}/../../clangd REALPATH) -get_filename_component(CLANGD_BINARY_DIR - ${CMAKE_CURRENT_SOURCE_DIR}/../../clangd REALPATH) -include_directories( - ${CLANGD_SOURCE_DIR} - ${CLANGD_BINARY_DIR} - ) - add_custom_target(ClangdXpcUnitTests) add_unittest(ClangdXpcUnitTests ClangdXpcTests ConversionTests.cpp diff --git a/clang-tools-extra/clangd/xpc/CMakeLists.txt b/clang-tools-extra/clangd/xpc/CMakeLists.txt index d551f8b2b5b3c..5ccdf2f5d06a5 100644 --- a/clang-tools-extra/clangd/xpc/CMakeLists.txt +++ b/clang-tools-extra/clangd/xpc/CMakeLists.txt @@ -7,11 +7,6 @@ include(CreateClangdXPCFramework) add_subdirectory(framework) add_subdirectory(test-client) -include_directories( - ${CMAKE_CURRENT_SOURCE_DIR}/../ - ${CMAKE_CURRENT_BINARY_DIR}/../ -) - set(LLVM_LINK_COMPONENTS Support ) diff --git a/clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt b/clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt index 1bf01c63d7224..94477e3bc57e6 100644 --- a/clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt +++ b/clang-tools-extra/clangd/xpc/test-client/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( - ${CMAKE_CURRENT_SOURCE_DIR}/../../ -) - add_clang_tool( clangd-xpc-test-client ClangdXPCTestClient.cpp From a27a17f883864c1c44a0ba3fb01bbf1c89110b82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <1.int32@gmail.com> Date: Fri, 2 Jul 2021 09:08:54 +0200 Subject: [PATCH 503/619] [clang][AST] Add support for BindingDecl to ASTImporter. Reviewed By: martong Differential Revision: https://reviews.llvm.org/D102492 --- clang/lib/AST/ASTImporter.cpp | 30 ++++++++++++++++ clang/unittests/AST/ASTImporterTest.cpp | 46 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 0d0cabc965566..8fb55488e836a 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -496,6 +496,7 @@ namespace clang { ExpectedDecl VisitAccessSpecDecl(AccessSpecDecl *D); ExpectedDecl VisitStaticAssertDecl(StaticAssertDecl *D); ExpectedDecl VisitTranslationUnitDecl(TranslationUnitDecl *D); + ExpectedDecl VisitBindingDecl(BindingDecl *D); ExpectedDecl VisitNamespaceDecl(NamespaceDecl *D); ExpectedDecl VisitNamespaceAliasDecl(NamespaceAliasDecl *D); ExpectedDecl VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias); @@ -2291,6 +2292,35 @@ ExpectedDecl ASTNodeImporter::VisitTranslationUnitDecl(TranslationUnitDecl *D) { return ToD; } +ExpectedDecl ASTNodeImporter::VisitBindingDecl(BindingDecl *D) { + DeclContext *DC, *LexicalDC; + DeclarationName Name; + SourceLocation Loc; + NamedDecl *ToND; + if (Error Err = ImportDeclParts(D, DC, LexicalDC, Name, ToND, Loc)) + return std::move(Err); + if (ToND) + return ToND; + + Error Err = Error::success(); + QualType ToType = importChecked(Err, D->getType()); + Expr *ToBinding = importChecked(Err, D->getBinding()); + ValueDecl *ToDecomposedDecl = importChecked(Err, D->getDecomposedDecl()); + if (Err) + return std::move(Err); + + BindingDecl *ToD; + if (GetImportedOrCreateDecl(ToD, D, Importer.getToContext(), DC, Loc, + Name.getAsIdentifierInfo())) + return ToD; + + ToD->setBinding(ToType, ToBinding); + ToD->setDecomposedDecl(ToDecomposedDecl); + addDeclToContexts(D, ToD); + + return ToD; +} + ExpectedDecl ASTNodeImporter::VisitAccessSpecDecl(AccessSpecDecl *D) { ExpectedSLoc LocOrErr = import(D->getLocation()); if (!LocOrErr) diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index da4bce16d23b8..3536b1cfcbffc 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -3679,6 +3679,52 @@ TEST_P(ImportVariables, InitAndDefinitionAreInTheFromContext) { EXPECT_TRUE(ImportedD->getDefinition()); } +TEST_P(ImportVariables, ImportBindingDecl) { + Decl *From, *To; + std::tie(From, To) = getImportedDecl( + R"( + void declToImport() { + int a[2] = {1,2}; + auto [x1,y1] = a; + auto& [x2,y2] = a; + + struct S { + mutable int x1 : 2; + volatile double y1; + }; + S b; + const auto [x3, y3] = b; + }; + )", + Lang_CXX17, "", Lang_CXX17); + + TranslationUnitDecl *FromTU = From->getTranslationUnitDecl(); + auto *FromF = FirstDeclMatcher().match( + FromTU, functionDecl(hasName("declToImport"))); + auto *ToF = Import(FromF, Lang_CXX17); + EXPECT_TRUE(ToF); + + auto VerifyImport = [&](llvm::StringRef BindName) { + auto *FromB = FirstDeclMatcher().match( + FromF, bindingDecl(hasName(BindName))); + ASSERT_TRUE(FromB); + auto *ToB = Import(FromB, Lang_CXX17); + EXPECT_TRUE(ToB); + EXPECT_EQ(FromB->getBinding() != nullptr, ToB->getBinding() != nullptr); + EXPECT_EQ(FromB->getDecomposedDecl() != nullptr, + ToB->getDecomposedDecl() != nullptr); + EXPECT_EQ(FromB->getHoldingVar() != nullptr, + ToB->getHoldingVar() != nullptr); + }; + + VerifyImport("x1"); + VerifyImport("y1"); + VerifyImport("x2"); + VerifyImport("y2"); + VerifyImport("x3"); + VerifyImport("y3"); +} + struct ImportClasses : ASTImporterOptionSpecificTestBase {}; TEST_P(ImportClasses, ImportDefinitionWhenProtoIsInNestedToContext) { From b7c140335beb11bcbb2abe51222d7a300cd365e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sun, 11 Apr 2021 12:31:06 +0200 Subject: [PATCH 504/619] [lldb] [gdb-remote server] Support selecting process via Hg Support using the extended thread-id syntax with Hg packet to select a subprocess. This makes it possible to start providing support for running some of the debugger packets against another subprocesses. Differential Revision: https://reviews.llvm.org/D100261 --- .../GDBRemoteCommunicationServerLLGS.cpp | 48 ++++--- .../tools/lldb-server/TestGdbRemoteFork.py | 136 +++++++++++++++++- 2 files changed, 163 insertions(+), 21 deletions(-) diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index 000d0249cea91..f6c3ba46bb515 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -2088,16 +2088,6 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_H(StringExtractorGDBRemote &packet) { Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD)); - // Fail if we don't have a current process. - if (!m_current_process || - (m_current_process->GetID() == LLDB_INVALID_PROCESS_ID)) { - LLDB_LOGF( - log, - "GDBRemoteCommunicationServerLLGS::%s failed, no process available", - __FUNCTION__); - return SendErrorResponse(0x15); - } - // Parse out which variant of $H is requested. packet.SetFilePos(strlen("H")); if (packet.GetBytesLeft() < 1) { @@ -2109,14 +2099,14 @@ GDBRemoteCommunicationServerLLGS::Handle_H(StringExtractorGDBRemote &packet) { } const char h_variant = packet.GetChar(); - lldb::pid_t default_pid; + NativeProcessProtocol *default_process; switch (h_variant) { case 'g': - default_pid = m_current_process->GetID(); + default_process = m_current_process; break; case 'c': - default_pid = m_continue_process->GetID(); + default_process = m_continue_process; break; default: @@ -2129,16 +2119,32 @@ GDBRemoteCommunicationServerLLGS::Handle_H(StringExtractorGDBRemote &packet) { } // Parse out the thread number. - llvm::Expected tid_ret = - ReadTid(packet, /*allow_all=*/true, default_pid); - if (!tid_ret) - return SendErrorResponse(tid_ret.takeError()); + auto pid_tid = packet.GetPidTid(default_process ? default_process->GetID() + : LLDB_INVALID_PROCESS_ID); + if (!pid_tid) + return SendErrorResponse(llvm::make_error( + inconvertibleErrorCode(), "Malformed thread-id")); + + lldb::pid_t pid = pid_tid->first; + lldb::tid_t tid = pid_tid->second; + + if (pid == StringExtractorGDBRemote::AllProcesses) + return SendUnimplementedResponse("Selecting all processes not supported"); + if (pid == LLDB_INVALID_PROCESS_ID) + return SendErrorResponse(llvm::make_error( + inconvertibleErrorCode(), "No current process and no PID provided")); + + // Check the process ID and find respective process instance. + auto new_process_it = m_debugged_processes.find(pid); + if (new_process_it == m_debugged_processes.end()) + return SendErrorResponse(llvm::make_error( + inconvertibleErrorCode(), + llvm::formatv("No process with PID {0} debugged", pid))); - lldb::tid_t tid = tid_ret.get(); // Ensure we have the given thread when not specifying -1 (all threads) or 0 // (any thread). if (tid != LLDB_INVALID_THREAD_ID && tid != 0) { - NativeThreadProtocol *thread = m_current_process->GetThreadByID(tid); + NativeThreadProtocol *thread = new_process_it->second->GetThreadByID(tid); if (!thread) { LLDB_LOGF(log, "GDBRemoteCommunicationServerLLGS::%s failed, tid %" PRIu64 @@ -2148,13 +2154,15 @@ GDBRemoteCommunicationServerLLGS::Handle_H(StringExtractorGDBRemote &packet) { } } - // Now switch the given thread type. + // Now switch the given process and thread type. switch (h_variant) { case 'g': + m_current_process = new_process_it->second.get(); SetCurrentThreadID(tid); break; case 'c': + m_continue_process = new_process_it->second.get(); SetContinueThreadID(tid); break; diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py index af39dbbb188b2..bcf728c34b797 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py @@ -16,7 +16,7 @@ def fork_and_detach_test(self, variant): self.reset_test_sequence() # continue and expect fork - fork_regex = "[$]T.*;{}:p([0-9a-f]*)[.]([0-9a-f]*).*".format(variant) + fork_regex = "[$]T.*;{}:p([0-9a-f]+)[.]([0-9a-f]+).*".format(variant) self.test_sequence.add_log_lines([ "read packet: $c#00", {"direction": "send", "regex": fork_regex, @@ -57,3 +57,137 @@ def test_vfork(self): {"direction": "send", "regex": r"[$]W00#.*"}, ], True) self.expect_gdbremote_sequence() + + def fork_and_follow_test(self, variant): + self.build() + self.prep_debug_monitor_and_inferior(inferior_args=[variant]) + self.add_qSupported_packets(["multiprocess+", + "{}-events+".format(variant)]) + ret = self.expect_gdbremote_sequence() + self.assertIn("{}-events+".format(variant), ret["qSupported_response"]) + self.reset_test_sequence() + + # continue and expect fork + procinfo_regex = "[$]pid:([0-9a-f]+);.*" + fork_regex = "[$]T.*;{}:p([0-9a-f]+)[.]([0-9a-f]+).*".format(variant) + self.test_sequence.add_log_lines([ + "read packet: $qProcessInfo#00", + {"direction": "send", "regex": procinfo_regex, + "capture": {1: "parent_pid"}}, + "read packet: $c#00", + {"direction": "send", "regex": fork_regex, + "capture": {1: "pid", 2: "tid"}}, + ], True) + ret = self.expect_gdbremote_sequence() + parent_pid, pid, tid = (int(ret[x], 16) for x + in ("parent_pid", "pid", "tid")) + self.reset_test_sequence() + + # switch to the forked child + self.test_sequence.add_log_lines([ + "read packet: $Hgp{:x}.{:x}#00".format(pid, tid), + {"direction": "send", "regex": r"[$]OK#.*"}, + "read packet: $Hcp{:x}.{:x}#00".format(pid, tid), + {"direction": "send", "regex": r"[$]OK#.*"}, + ], True) + + # detach the parent + self.test_sequence.add_log_lines([ + "read packet: $D;{:x}#00".format(parent_pid), + {"direction": "send", "regex": r"[$]OK#.*"}, + ], True) + ret = self.expect_gdbremote_sequence() + self.reset_test_sequence() + + # resume the child + self.test_sequence.add_log_lines([ + "read packet: $c#00", + {"direction": "send", "regex": r"[$]W00#.*"}, + ], True) + self.expect_gdbremote_sequence() + + @add_test_categories(["fork"]) + def test_fork_follow(self): + self.fork_and_follow_test("fork") + + @add_test_categories(["fork"]) + def test_vfork_follow(self): + self.fork_and_follow_test("vfork") + + @add_test_categories(["fork"]) + def test_select_wrong_pid(self): + self.build() + self.prep_debug_monitor_and_inferior() + self.add_qSupported_packets(["multiprocess+"]) + ret = self.expect_gdbremote_sequence() + self.assertIn("multiprocess+", ret["qSupported_response"]) + self.reset_test_sequence() + + # get process pid + procinfo_regex = "[$]pid:([0-9a-f]+);.*" + self.test_sequence.add_log_lines([ + "read packet: $qProcessInfo#00", + {"direction": "send", "regex": procinfo_regex, + "capture": {1: "pid"}}, + "read packet: $qC#00", + {"direction": "send", "regex": "[$]QC([0-9a-f]+)#.*", + "capture": {1: "tid"}}, + ], True) + ret = self.expect_gdbremote_sequence() + pid, tid = (int(ret[x], 16) for x in ("pid", "tid")) + self.reset_test_sequence() + + # try switching to correct pid + self.test_sequence.add_log_lines([ + "read packet: $Hgp{:x}.{:x}#00".format(pid, tid), + {"direction": "send", "regex": r"[$]OK#.*"}, + "read packet: $Hcp{:x}.{:x}#00".format(pid, tid), + {"direction": "send", "regex": r"[$]OK#.*"}, + ], True) + ret = self.expect_gdbremote_sequence() + + # try switching to invalid tid + self.test_sequence.add_log_lines([ + "read packet: $Hgp{:x}.{:x}#00".format(pid, tid+1), + {"direction": "send", "regex": r"[$]E15#.*"}, + "read packet: $Hcp{:x}.{:x}#00".format(pid, tid+1), + {"direction": "send", "regex": r"[$]E15#.*"}, + ], True) + ret = self.expect_gdbremote_sequence() + + # try switching to invalid pid + self.test_sequence.add_log_lines([ + "read packet: $Hgp{:x}.{:x}#00".format(pid+1, tid), + {"direction": "send", "regex": r"[$]Eff#.*"}, + "read packet: $Hcp{:x}.{:x}#00".format(pid+1, tid), + {"direction": "send", "regex": r"[$]Eff#.*"}, + ], True) + ret = self.expect_gdbremote_sequence() + + def test_detach_current(self): + self.build() + self.prep_debug_monitor_and_inferior() + self.add_qSupported_packets(["multiprocess+"]) + ret = self.expect_gdbremote_sequence() + self.assertIn("multiprocess+", ret["qSupported_response"]) + self.reset_test_sequence() + + # get process pid + procinfo_regex = "[$]pid:([0-9a-f]+);.*" + self.test_sequence.add_log_lines([ + "read packet: $qProcessInfo#00", + {"direction": "send", "regex": procinfo_regex, + "capture": {1: "pid"}}, + ], True) + ret = self.expect_gdbremote_sequence() + pid = int(ret["pid"], 16) + self.reset_test_sequence() + + # detach the process + self.test_sequence.add_log_lines([ + "read packet: $D;{:x}#00".format(pid), + {"direction": "send", "regex": r"[$]OK#.*"}, + "read packet: $qC#00", + {"direction": "send", "regex": r"[$]E44#.*"}, + ], True) + ret = self.expect_gdbremote_sequence() From c2c0d3ea894328667583155334f0607db0c0a73a Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 2 Jul 2021 11:48:44 +0300 Subject: [PATCH 505/619] Revert "[WebAssembly] Implementation of global.get/set for reftypes in LLVM IR" This reverts commit 4facbf213c51e4add2e8c19b08d5e58ad71c72de. ``` ******************** FAIL: LLVM :: CodeGen/WebAssembly/funcref-call.ll (44466 of 44468) ******************** TEST 'LLVM :: CodeGen/WebAssembly/funcref-call.ll' FAILED ******************** Script: -- : 'RUN: at line 1'; /builddirs/llvm-project/build-Clang12/bin/llc < /repositories/llvm-project/llvm/test/CodeGen/WebAssembly/funcref-call.ll --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | /builddirs/llvm-project/build-Clang12/bin/FileCheck /repositories/llvm-project/llvm/test/CodeGen/WebAssembly/funcref-call.ll -- Exit Code: 2 Command Output (stderr): -- llc: /repositories/llvm-project/llvm/include/llvm/Support/LowLevelTypeImpl.h:44: static llvm::LLT llvm::LLT::scalar(unsigned int): Assertion `SizeInBits > 0 && "invalid scalar size"' failed. ``` --- clang/lib/Basic/Targets/WebAssembly.h | 4 +- clang/test/CodeGen/target-data.c | 4 +- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/include/llvm/CodeGen/ValueTypes.h | 10 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 4 - llvm/lib/CodeGen/MachineOperand.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 - llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 +- llvm/lib/CodeGen/ValueTypes.cpp | 4 - .../Utils/WebAssemblyUtilities.cpp | 25 ----- .../WebAssembly/Utils/WebAssemblyUtilities.h | 6 -- .../WebAssembly/WebAssemblyFastISel.cpp | 5 +- .../lib/Target/WebAssembly/WebAssemblyISD.def | 1 - .../WebAssembly/WebAssemblyISelDAGToDAG.cpp | 23 ----- .../WebAssembly/WebAssemblyISelLowering.cpp | 98 +------------------ .../WebAssembly/WebAssemblyISelLowering.h | 30 ------ .../WebAssembly/WebAssemblyInstrTable.td | 18 +--- .../WebAssembly/WebAssemblyMCInstLower.cpp | 4 - .../WebAssembly/WebAssemblyTargetMachine.cpp | 4 +- .../WebAssembly/externref-globalget.ll | 19 ---- .../WebAssembly/externref-globalset.ll | 20 ---- .../CodeGen/WebAssembly/externref-inttoptr.ll | 11 --- .../CodeGen/WebAssembly/externref-ptrtoint.ll | 11 --- .../CodeGen/WebAssembly/externref-undef.ll | 21 ---- .../WebAssembly/externref-unsized-load.ll | 11 --- .../WebAssembly/externref-unsized-store.ll | 11 --- llvm/test/CodeGen/WebAssembly/funcref-call.ll | 23 ----- .../CodeGen/WebAssembly/funcref-globalget.ll | 19 ---- .../CodeGen/WebAssembly/funcref-globalset.ll | 20 ---- 29 files changed, 16 insertions(+), 400 deletions(-) delete mode 100644 llvm/test/CodeGen/WebAssembly/externref-globalget.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/externref-globalset.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/externref-undef.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/funcref-call.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/funcref-globalget.ll delete mode 100644 llvm/test/CodeGen/WebAssembly/funcref-globalset.ll diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index ed590fe7e3338..b29730c5d706b 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -147,7 +147,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly32TargetInfo explicit WebAssembly32TargetInfo(const llvm::Triple &T, const TargetOptions &Opts) : WebAssemblyTargetInfo(T, Opts) { - resetDataLayout("e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20"); + resetDataLayout("e-m:e-p:32:32-i64:64-n32:64-S128-ni:1"); } protected: @@ -166,7 +166,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly64TargetInfo SizeType = UnsignedLong; PtrDiffType = SignedLong; IntPtrType = SignedLong; - resetDataLayout("e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20"); + resetDataLayout("e-m:e-p:64:64-i64:64-n32:64-S128-ni:1"); } protected: diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 1be01efd16515..1d88984530e5b 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -108,11 +108,11 @@ // RUN: %clang_cc1 -triple wasm32-unknown-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=WEBASSEMBLY32 -// WEBASSEMBLY32: target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20" +// WEBASSEMBLY32: target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1" // RUN: %clang_cc1 -triple wasm64-unknown-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=WEBASSEMBLY64 -// WEBASSEMBLY64: target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20" +// WEBASSEMBLY64: target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1" // RUN: %clang_cc1 -triple lanai-unknown-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=LANAI diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 75894d15a9693..47d6ca43a5ac3 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -350,7 +350,7 @@ class TargetLoweringBase { /// Return the in-memory pointer type for the given address space, defaults to /// the pointer type from the data layout. FIXME: The default needs to be /// removed once all the code is updated. - virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { + MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); } diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h index 7b17b98d5c555..e7346f7a75abc 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -120,12 +120,6 @@ namespace llvm { return changeExtendedTypeToInteger(); } - /// Test if the given EVT has zero size, this will fail if called on a - /// scalable type - bool isZeroSized() const { - return !isScalableVector() && getSizeInBits() == 0; - } - /// Test if the given EVT is simple (as opposed to being extended). bool isSimple() const { return V.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE; @@ -213,9 +207,7 @@ namespace llvm { } /// Return true if the bit size is a multiple of 8. - bool isByteSized() const { - return !isZeroSized() && getSizeInBits().isKnownMultipleOf(8); - } + bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); } /// Return true if the size is a power-of-two number of bytes. bool isRound() const { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9d188ad86fd8c..baf674cea5653 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6465,10 +6465,6 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); unsigned BitWidth = LoadResultVT.getSizeInBits(); - // If the BitWidth is 0, do not try to optimize the type - if (BitWidth == 0) - return false; - APInt DemandBits(BitWidth, 0); APInt WidestAndBits(BitWidth, 0); diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index b8ba0453d24c7..db973bda5e555 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1180,7 +1180,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << "unknown-address"; } MachineOperand::printOperandOffset(OS, getOffset()); - if (getSize() > 0 && getAlign() != getSize()) + if (getAlign() != getSize()) OS << ", align " << getAlign().value(); if (getAlign() != getBaseAlign()) OS << ", basealign " << getBaseAlign().value(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f00ec5e13d45e..5ea3de9d0db66 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23174,10 +23174,6 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { if (BasePtr.getBase().isUndef()) return false; - // Do not handle stores to opaque types - if (St->getMemoryVT().isZeroSized()) - return false; - // BaseIndexOffset assumes that offsets are fixed-size, which // is not valid for scalable vectors where the offsets are // scaled by `vscale`, so bail out early. diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 3c5dd29036db0..ebac779984ec5 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1710,7 +1710,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( // For example, the ABI alignment may change based on software platform while // this function should only be affected by hardware implementation. Type *Ty = VT.getTypeForEVT(Context); - if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { + if (Alignment >= DL.getABITypeAlign(Ty)) { // Assume that an access that meets the ABI-specified alignment is fast. if (Fast != nullptr) *Fast = true; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 9daebfd9e63d6..ae0b945bdba8e 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -198,10 +198,6 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); case MVT::x86amx: return Type::getX86_AMXTy(Context); - case MVT::externref: - return PointerType::get(StructType::create(Context), 10); - case MVT::funcref: - return PointerType::get(StructType::create(Context), 20); case MVT::v1i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1); case MVT::v2i1: diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp index 3da80f4fc8752..824d336651360 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp @@ -116,31 +116,6 @@ MCSymbolWasm *WebAssembly::getOrCreateFunctionTableSymbol( return Sym; } -MCSymbolWasm *WebAssembly::getOrCreateFuncrefCallTableSymbol( - MCContext &Ctx, const WebAssemblySubtarget *Subtarget) { - StringRef Name = "__funcref_call_table"; - MCSymbolWasm *Sym = cast_or_null(Ctx.lookupSymbol(Name)); - if (Sym) { - if (!Sym->isFunctionTable()) - Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table"); - } else { - Sym = cast(Ctx.getOrCreateSymbol(Name)); - - // Setting Weak ensure only one table is left after linking when multiple - // modules define the table. - Sym->setWeak(true); - - wasm::WasmLimits Limits = {0, 1, 1}; - wasm::WasmTableType TableType = {wasm::WASM_TYPE_FUNCREF, Limits}; - Sym->setType(wasm::WASM_SYMBOL_TYPE_TABLE); - Sym->setTableType(TableType); - } - // MVP object files can't have symtab entries for tables. - if (!(Subtarget && Subtarget->hasReferenceTypes())) - Sym->setOmitFromLinkingSection(); - return Sym; -} - // Find a catch instruction from an EH pad. MachineInstr *WebAssembly::findCatch(MachineBasicBlock *EHPad) { assert(EHPad->isEHPad()); diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h index 673dc9521ced7..1ec1df5d0c3d1 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h @@ -68,12 +68,6 @@ MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget); -/// Returns the __funcref_call_table, for use in funcref calls when lowered to -/// table.set + call_indirect. -MCSymbolWasm * -getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, - const WebAssemblySubtarget *Subtarget); - /// Find a catch instruction from an EH pad. Returns null if no catch /// instruction found or the catch is in an invalid location. MachineInstr *findCatch(MachineBasicBlock *EHPad); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 171d59ae4c6b8..cb0cdf1d8f985 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -130,12 +130,9 @@ class WebAssemblyFastISel final : public FastISel { case MVT::i64: case MVT::f32: case MVT::f64: - return VT; case MVT::funcref: case MVT::externref: - if (Subtarget->hasReferenceTypes()) - return VT; - break; + return VT; case MVT::f16: return MVT::f32; case MVT::v16i8: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index 200df9d4d9ffe..9e229450222ff 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -48,4 +48,3 @@ HANDLE_NODETYPE(MEMORY_FILL) HANDLE_MEM_NODETYPE(LOAD_SPLAT) HANDLE_MEM_NODETYPE(GLOBAL_GET) HANDLE_MEM_NODETYPE(GLOBAL_SET) -HANDLE_MEM_NODETYPE(TABLE_SET) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index f4bae59132e68..c3230cb9ca029 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" -#include "WebAssemblyISelLowering.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -48,32 +47,11 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { return "WebAssembly Instruction Selection"; } - void checkForInvalidNodes(const Function &F) { - // This function will check for uses of ptrtoint on reference types and - // report a fatal error if these are found. - for (const BasicBlock &BB : F) { - for (const Instruction &I : BB) { - if (const PtrToIntInst *PTI = dyn_cast(&I)) { - const Value *V = PTI->getPointerOperand(); - if (WebAssemblyTargetLowering::isFuncrefType(V->getType()) || - WebAssemblyTargetLowering::isExternrefType(V->getType())) - report_fatal_error("ptrtoint not allowed on reference types"); - } else if (const IntToPtrInst *ITP = dyn_cast(&I)) { - if (WebAssemblyTargetLowering::isFuncrefType(ITP->getDestTy()) || - WebAssemblyTargetLowering::isExternrefType(ITP->getDestTy())) - report_fatal_error("inttoptr not allowed on reference types"); - } - } - } - } - bool runOnMachineFunction(MachineFunction &MF) override { LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n" "********** Function: " << MF.getName() << '\n'); - checkForInvalidNodes(MF.getFunction()); - Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); @@ -85,7 +63,6 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; - bool SelectExternRefAddr(const SDValue &Addr, const SDValue &Base); // Include the pieces autogenerated from the target description. #include "WebAssemblyGenDAGISel.inc" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 2e2cb1b81c6c3..76427653770f3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -13,7 +13,6 @@ #include "WebAssemblyISelLowering.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" -#include "Utils/WebAssemblyTypeUtilities.h" #include "Utils/WebAssemblyUtilities.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" @@ -67,10 +66,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); } - if (Subtarget->hasReferenceTypes()) { - addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass); - addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass); - } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -87,12 +82,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::STORE, T, Custom); } } - if (Subtarget->hasReferenceTypes()) { - for (auto T : {MVT::externref, MVT::funcref}) { - setOperationAction(ISD::LOAD, T, Custom); - setOperationAction(ISD::STORE, T, Custom); - } - } setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom); @@ -479,16 +468,6 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, bool IsIndirect = CallParams.getOperand(0).isReg(); bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS; - bool IsFuncrefCall = false; - if (IsIndirect) { - Register Reg = CallParams.getOperand(0).getReg(); - const MachineFunction *MF = BB->getParent(); - const MachineRegisterInfo &MRI = MF->getRegInfo(); - const TargetRegisterClass *TRC = MRI.getRegClass(Reg); - IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass); - assert(!IsFuncrefCall || Subtarget->hasReferenceTypes()); - } - unsigned CallOp; if (IsIndirect && IsRetCall) { CallOp = WebAssembly::RET_CALL_INDIRECT; @@ -532,11 +511,8 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, // Placeholder for the type index. MIB.addImm(0); // The table into which this call_indirect indexes. - MCSymbolWasm *Table = IsFuncrefCall - ? WebAssembly::getOrCreateFuncrefCallTableSymbol( - MF.getContext(), Subtarget) - : WebAssembly::getOrCreateFunctionTableSymbol( - MF.getContext(), Subtarget); + MCSymbolWasm *Table = + WebAssembly::getOrCreateFunctionTableSymbol(MF.getContext(), Subtarget); if (Subtarget->hasReferenceTypes()) { MIB.addSym(Table); } else { @@ -555,39 +531,6 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, CallParams.eraseFromParent(); CallResults.eraseFromParent(); - // If this is a funcref call, to avoid hidden GC roots, we need to clear the - // table slot with ref.null upon call_indirect return. - // - // This generates the following code, which comes right after a call_indirect - // of a funcref: - // - // i32.const 0 - // ref.null func - // table.set __funcref_call_table - if (IsIndirect && IsFuncrefCall) { - MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol( - MF.getContext(), Subtarget); - Register RegZero = - MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); - MachineInstr *Const0 = - BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0); - BB->insertAfter(MIB.getInstr()->getIterator(), Const0); - - Register RegFuncref = - MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass); - MachineInstr *RefNull = - BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref) - .addImm(static_cast(WebAssembly::HeapType::Funcref)); - BB->insertAfter(Const0->getIterator(), RefNull); - - MachineInstr *TableSet = - BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF)) - .addSym(Table) - .addReg(RegZero) - .addReg(RegFuncref); - BB->insertAfter(RefNull->getIterator(), TableSet); - } - return BB; } @@ -1111,33 +1054,6 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, InTys.push_back(In.VT); } - // Lastly, if this is a call to a funcref we need to add an instruction - // table.set to the chain and transform the call. - if (CLI.CB && isFuncrefType(CLI.CB->getCalledOperand()->getType())) { - // In the absence of function references proposal where a funcref call is - // lowered to call_ref, using reference types we generate a table.set to set - // the funcref to a special table used solely for this purpose, followed by - // a call_indirect. Here we just generate the table set, and return the - // SDValue of the table.set so that LowerCall can finalize the lowering by - // generating the call_indirect. - SDValue Chain = Ops[0]; - - MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol( - MF.getContext(), Subtarget); - SDValue Sym = DAG.getMCSymbol(Table, PtrVT); - SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32); - SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee}; - SDValue TableSet = DAG.getMemIntrinsicNode( - WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps, - MVT::funcref, - // Machine Mem Operand args - MachinePointerInfo(WasmAddressSpace::FUNCREF), - CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()), - MachineMemOperand::MOStore); - - Ops[0] = TableSet; // The new chain is the TableSet itself - } - if (CLI.IsTailCall) { // ret_calls do not return values to the current frame SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -1369,16 +1285,6 @@ static Optional IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) { return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex()); } -bool WebAssemblyTargetLowering::isFuncrefType(const Type *Ty) { - return isa(Ty) && - Ty->getPointerAddressSpace() == WasmAddressSpace::FUNCREF; -} - -bool WebAssemblyTargetLowering::isExternrefType(const Type *Ty) { - return isa(Ty) && - Ty->getPointerAddressSpace() == WasmAddressSpace::EXTERNREF; -} - SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index b2445b2dec04e..632c039deac09 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -45,36 +45,6 @@ class WebAssemblyTargetLowering final : public TargetLowering { WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI); - enum WasmAddressSpace : unsigned { - // WebAssembly uses the following address spaces: - // AS 0 : is the default address space for values in linear memory - DEFAULT = 0, - // AS 1 : is a non-integral address space for global variables - GLOBAL = 1, - // AS 10 : is a non-integral address space for externref values - EXTERNREF = 10, - // AS 20 : is a non-integral address space for funcref values - FUNCREF = 20, - }; - - MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { - if (AS == WasmAddressSpace::EXTERNREF) - return MVT::externref; - if (AS == WasmAddressSpace::FUNCREF) - return MVT::funcref; - return TargetLowering::getPointerTy(DL, AS); - } - MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override { - if (AS == WasmAddressSpace::EXTERNREF) - return MVT::externref; - if (AS == WasmAddressSpace::FUNCREF) - return MVT::funcref; - return TargetLowering::getPointerMemTy(DL, AS); - } - - static bool isFuncrefType(const Type *Ty); - static bool isExternrefType(const Type *Ty); - private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td index 2348bb165daf0..8fa80ad40995f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td @@ -11,16 +11,15 @@ /// Instructions that handle tables //===----------------------------------------------------------------------===// + multiclass TABLE { - let mayLoad = 1 in - defm TABLE_GET_#rt : I<(outs rt:$res), (ins table32_op:$table, I32:$i), + defm TABLE_GET_#rt : I<(outs rt:$res), (ins table32_op:$table), (outs), (ins table32_op:$table), [], - "table.get\t$res, $table, $i", + "table.get\t$res, $table", "table.get\t$table", 0x25>; - let mayStore = 1 in defm TABLE_SET_#rt : I<(outs), (ins table32_op:$table, I32:$i, rt:$val), (outs), (ins table32_op:$table), [], @@ -47,17 +46,6 @@ multiclass TABLE { defm "" : TABLE, Requires<[HasReferenceTypes]>; defm "" : TABLE, Requires<[HasReferenceTypes]>; -def wasm_table_set_t : SDTypeProfile<0, 3, []>; -def wasm_table_set : SDNode<"WebAssemblyISD::TABLE_SET", wasm_table_set_t, - [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - -def : Pat<(wasm_table_set i32:$table, i32:$idx, funcref:$r), - (TABLE_SET_FUNCREF i32:$table, i32:$idx, funcref:$r)>, - Requires<[HasReferenceTypes]>; -def : Pat<(wasm_table_set i32:$table, i32:$idx, externref:$r), - (TABLE_SET_EXTERNREF i32:$table, i32:$idx, externref:$r)>, - Requires<[HasReferenceTypes]>; - defm TABLE_SIZE : I<(outs I32:$sz), (ins table32_op:$table), (outs), (ins table32_op:$table), [], diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index a266410a98fd0..93e5f2300386e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -217,10 +217,6 @@ static wasm::ValType getType(const TargetRegisterClass *RC) { return wasm::ValType::F64; if (RC == &WebAssembly::V128RegClass) return wasm::ValType::V128; - if (RC == &WebAssembly::EXTERNREFRegClass) - return wasm::ValType::EXTERNREF; - if (RC == &WebAssembly::FUNCREFRegClass) - return wasm::ValType::FUNCREF; llvm_unreachable("Unexpected register class"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 35701e00da3db..c305da514ef39 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -121,8 +121,8 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine( Optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, TT.isArch64Bit() - ? "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20" - : "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20", + ? "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1" + : "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1", TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT), getEffectiveCodeModel(CM, CodeModel::Large), OL), TLOF(new WebAssemblyTargetObjectFile()) { diff --git a/llvm/test/CodeGen/WebAssembly/externref-globalget.ll b/llvm/test/CodeGen/WebAssembly/externref-globalget.ll deleted file mode 100644 index b8303704227c1..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/externref-globalget.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s - -%extern = type opaque -%externref = type %extern addrspace(10)* ;; addrspace 10 is nonintegral - -@externref_global = local_unnamed_addr addrspace(1) global %externref undef - -define %externref @return_externref_global() { - ;; this generates a global.get of @externref_global - %ref = load %externref, %externref addrspace(1)* @externref_global - ret %externref %ref -} - -; CHECK-LABEL: return_externref_global: -; CHECK-NEXT: functype return_externref_global () -> (externref) -; CHECK-NEXT: global.get externref_global -; CHECK-NEXT: end_function - -; CHECK: .globl externref_global diff --git a/llvm/test/CodeGen/WebAssembly/externref-globalset.ll b/llvm/test/CodeGen/WebAssembly/externref-globalset.ll deleted file mode 100644 index 749c7daaf4abd..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/externref-globalset.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s - -%extern = type opaque -%externref = type %extern addrspace(10)* ;; addrspace 10 is nonintegral - -@externref_global = local_unnamed_addr addrspace(1) global %externref undef - -define void @set_externref_global(%externref %g) { - ;; this generates a global.set of @externref.global - store %externref %g, %externref addrspace(1)* @externref_global - ret void -} - -; CHECK-LABEL: set_externref_global: -; CHECK-NEXT: functype set_externref_global (externref) -> () -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: global.set externref_global -; CHECK-NEXT: end_function - -; CHECK: .globl externref_global diff --git a/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll b/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll deleted file mode 100644 index cc106b5dee32e..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: not --crash llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR - -%extern = type opaque -%externref = type %extern addrspace(10)* - -define %externref @int_to_externref(i32 %i) { - %ref = inttoptr i32 %i to %externref - ret %externref %ref -} - -; CHECK-ERROR: LLVM ERROR: inttoptr not allowed on reference types diff --git a/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll b/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll deleted file mode 100644 index 8aea1506a903e..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: not --crash llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR - -%extern = type opaque -%externref = type %extern addrspace(10)* - -define i32 @externref_to_int(%externref %ref) { - %i = ptrtoint %externref %ref to i32 - ret i32 %i -} - -; CHECK-ERROR: LLVM ERROR: ptrtoint not allowed on reference types diff --git a/llvm/test/CodeGen/WebAssembly/externref-undef.ll b/llvm/test/CodeGen/WebAssembly/externref-undef.ll deleted file mode 100644 index e5453420e825d..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/externref-undef.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s - -%extern = type opaque -%externref = type %extern addrspace(10)* ;; addrspace 10 is nonintegral - -@externref_global = local_unnamed_addr addrspace(1) global %externref undef - -define %extern @return_extern_undef() { - ; Returning a ref.null or an uninitialized externref would make - ; more sense if the return type would be %externref. However, in - ; this case this is an %extern value, which really is an opaque - ; type and should never really happen. - ret %extern undef -} - -; CHECK-LABEL: return_extern_undef: -; CHECK-NEXT: functype return_extern_undef () -> () -; CHECK-NEXT: end_function - -; CHECK: .globl externref_global - diff --git a/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll b/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll deleted file mode 100644 index 69aa53a9732c7..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: not llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR - -%extern = type opaque -%externref = type %extern addrspace(10)* - -define void @load_extern(%externref %ref) { - %e = load %extern, %externref %ref - ret void -} - -; CHECK-ERROR: error: loading unsized types is not allowed diff --git a/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll b/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll deleted file mode 100644 index 9608873a78b95..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: not llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR - -%extern = type opaque -%externref = type %extern addrspace(10)* - -define void @store_extern(%externref %ref) { - store %extern undef, %externref %ref - ret void -} - -; CHECK-ERROR: error: storing unsized types is not allowed diff --git a/llvm/test/CodeGen/WebAssembly/funcref-call.ll b/llvm/test/CodeGen/WebAssembly/funcref-call.ll deleted file mode 100644 index 05fd7ad99fa05..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/funcref-call.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s - -%func = type void () -%funcref = type %func addrspace(20)* ;; addrspace 20 is nonintegral - -define void @call_funcref(%funcref %ref) { - call addrspace(20) void %ref() - ret void -} - -; CHECK-LABEL: call_funcref: -; CHECK-NEXT: functype call_funcref (funcref) -> () -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: table.set __funcref_call_table -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call_indirect __funcref_call_table, () -> () -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: ref.null func -; CHECK-NEXT: table.set __funcref_call_table -; CHECK-NEXT: end_function - -; CHECK: .tabletype __funcref_call_table, funcref diff --git a/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll b/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll deleted file mode 100644 index 901404aafd717..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s - -%func = type opaque -%funcref = type %func addrspace(20)* ;; addrspace 20 is nonintegral - -@funcref_global = local_unnamed_addr addrspace(1) global %funcref undef - -define %funcref @return_funcref_global() { - ;; this generates a global.get of @funcref_global - %ref = load %funcref, %funcref addrspace(1)* @funcref_global - ret %funcref %ref -} - -; CHECK-LABEL: return_funcref_global: -; CHECK-NEXT: .functype return_funcref_global () -> (funcref) -; CHECK-NEXT: global.get funcref_global -; CHECK-NEXT: end_function - -; CHECK: .globl funcref_global diff --git a/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll b/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll deleted file mode 100644 index 18b0e02cf7390..0000000000000 --- a/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s - -%func = type opaque -%funcref = type %func addrspace(20)* ;; addrspace 20 is nonintegral - -@funcref_global = local_unnamed_addr addrspace(1) global %funcref undef - -define void @set_funcref_global(%funcref %g) { - ;; this generates a global.set of @funcref_global - store %funcref %g, %funcref addrspace(1)* @funcref_global - ret void -} - -; CHECK-LABEL: set_funcref_global: -; CHECK-NEXT: functype set_funcref_global (funcref) -> () -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: global.set funcref_global -; CHECK-NEXT: end_function - -; CHECK: .globl funcref_global From 7655061cc64d76615e9215cd6dcbf5ee186363be Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 2 Jul 2021 09:42:47 +0100 Subject: [PATCH 506/619] [Matrix] Hoist address computation before multiply to enable fusion. If the store address does not dominate the matrix multiply, try to hoist address computation instructions without side-effects and/or memory reads before the multiply, to allow fusion. Reviewed By: thegameg Differential Revision: https://reviews.llvm.org/D105193 --- .../Scalar/LowerMatrixIntrinsics.cpp | 27 +- .../multiply-fused-dominance.ll | 313 ++++++++++++++---- 2 files changed, 265 insertions(+), 75 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index ab27e5b9c3e35..9ac219df0a0c2 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -1559,10 +1559,29 @@ class LowerMatrixIntrinsics { if (LoadOp0 && LoadOp1 && Store) { // The store address must dominate the MatMul instruction, otherwise // we create invalid IR. - // FIXME: See if we can hoist the store address computation. - auto *AddrI = dyn_cast(Store->getOperand(1)); - if (AddrI && (!DT->dominates(AddrI, MatMul))) - return; + SetVector WorkList; + WorkList.insert(Store->getOperand(1)); + SmallVector ToHoist; + for (unsigned I = 0; I != WorkList.size(); ++I) { + Value *Current = WorkList[I]; + auto *CurrI = dyn_cast(Current); + if (!CurrI) + continue; + if (isa(CurrI)) + return; + if (DT->dominates(CurrI, MatMul)) + continue; + if (CurrI->mayHaveSideEffects() || CurrI->mayReadFromMemory()) + return; + ToHoist.push_back(CurrI); + WorkList.insert(CurrI->op_begin(), CurrI->op_end()); + } + + sort(ToHoist, [this](Instruction *A, Instruction *B) { + return DT->dominates(A, B); + }); + for (Instruction *I : ToHoist) + I->moveBefore(MatMul); emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts); return; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll index 567b66002b322..bfff576d21e6a 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll @@ -10,29 +10,86 @@ target triple = "aarch64-apple-ios" define void @multiply_can_hoist_cast(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) { ; CHECK-LABEL: @multiply_can_hoist_cast( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 -; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 -; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 -; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) -; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) -; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast [4 x double]* [[C:%.*]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 -; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 0, i64 2 -; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: [[STORE_BEGIN:%.*]] = ptrtoint [4 x double]* [[C:%.*]] to i64 +; CHECK-NEXT: [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 32 +; CHECK-NEXT: [[LOAD_BEGIN:%.*]] = ptrtoint <4 x double>* [[B:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]] +; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]] +; CHECK: alias_cont: +; CHECK-NEXT: [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]] +; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]] +; CHECK: copy: +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x double>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double>* [[TMP2]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[B]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 32 dereferenceable(32) [[TMP3]], i8* noundef nonnull align 8 dereferenceable(32) [[TMP4]], i64 32, i1 false) +; CHECK-NEXT: br label [[NO_ALIAS]] +; CHECK: no_alias: +; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x double>* [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[ALIAS_CONT]] ], [ [[TMP2]], [[COPY]] ] +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_CAST2:%.*]] = bitcast <4 x double>* [[TMP5]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fmul contract <1 x double> [[COL_LOAD]], [[COL_LOAD3]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST5:%.*]] = bitcast double* [[TMP7]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD6:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST5]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST8:%.*]] = bitcast double* [[TMP8]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD6]], <1 x double> [[COL_LOAD9]], <1 x double> [[TMP6]]) +; CHECK-NEXT: [[VEC_CAST15:%.*]] = bitcast [4 x double]* [[C]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP9]], <1 x double>* [[VEC_CAST15]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[TMP10]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD18:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_CAST20:%.*]] = bitcast <4 x double>* [[TMP5]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD21:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST20]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = fmul contract <1 x double> [[COL_LOAD18]], [[COL_LOAD21]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST26:%.*]] = bitcast double* [[TMP12]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD27:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST26]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST29:%.*]] = bitcast double* [[TMP13]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD30:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST29]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD27]], <1 x double> [[COL_LOAD30]], <1 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST36:%.*]] = bitcast double* [[TMP15]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP14]], <1 x double>* [[VEC_CAST36]], align 8 +; CHECK-NEXT: [[VEC_CAST38:%.*]] = bitcast <4 x double>* [[A]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD39:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST38]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST41:%.*]] = bitcast double* [[TMP16]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD42:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST41]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = fmul contract <1 x double> [[COL_LOAD39]], [[COL_LOAD42]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST47:%.*]] = bitcast double* [[TMP18]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD48:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST47]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST50:%.*]] = bitcast double* [[TMP19]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD51:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST50]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD48]], <1 x double> [[COL_LOAD51]], <1 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST57:%.*]] = bitcast double* [[TMP21]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP20]], <1 x double>* [[VEC_CAST57]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST59:%.*]] = bitcast double* [[TMP22]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD60:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST59]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST62:%.*]] = bitcast double* [[TMP23]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD63:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST62]], align 8 +; CHECK-NEXT: [[TMP24:%.*]] = fmul contract <1 x double> [[COL_LOAD60]], [[COL_LOAD63]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST68:%.*]] = bitcast double* [[TMP25]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD69:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST68]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST71:%.*]] = bitcast double* [[TMP26]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD72:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST71]], align 8 +; CHECK-NEXT: [[TMP27:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD69]], <1 x double> [[COL_LOAD72]], <1 x double> [[TMP24]]) +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST78:%.*]] = bitcast double* [[TMP28]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP27]], <1 x double>* [[VEC_CAST78]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -47,30 +104,87 @@ entry: define void @multiply_can_hoist_multiple_insts(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) { ; CHECK-LABEL: @multiply_can_hoist_multiple_insts( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 -; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 -; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 -; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) -; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [4 x double], [4 x double]* [[C:%.*]], i64 2, i64 0 -; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[TMP4]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 -; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 2, i64 2 -; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x double], [4 x double]* [[C:%.*]], i64 2 +; CHECK-NEXT: [[STORE_BEGIN:%.*]] = ptrtoint [4 x double]* [[GEP]] to i64 +; CHECK-NEXT: [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 32 +; CHECK-NEXT: [[LOAD_BEGIN:%.*]] = ptrtoint <4 x double>* [[B:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]] +; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]] +; CHECK: alias_cont: +; CHECK-NEXT: [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]] +; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]] +; CHECK: copy: +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x double>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double>* [[TMP2]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[B]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 32 dereferenceable(32) [[TMP3]], i8* noundef nonnull align 8 dereferenceable(32) [[TMP4]], i64 32, i1 false) +; CHECK-NEXT: br label [[NO_ALIAS]] +; CHECK: no_alias: +; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x double>* [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[ALIAS_CONT]] ], [ [[TMP2]], [[COPY]] ] +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_CAST2:%.*]] = bitcast <4 x double>* [[TMP5]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fmul contract <1 x double> [[COL_LOAD]], [[COL_LOAD3]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST5:%.*]] = bitcast double* [[TMP7]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD6:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST5]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST8:%.*]] = bitcast double* [[TMP8]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD6]], <1 x double> [[COL_LOAD9]], <1 x double> [[TMP6]]) +; CHECK-NEXT: [[VEC_CAST15:%.*]] = bitcast [4 x double]* [[GEP]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP9]], <1 x double>* [[VEC_CAST15]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[TMP10]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD18:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_CAST20:%.*]] = bitcast <4 x double>* [[TMP5]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD21:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST20]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = fmul contract <1 x double> [[COL_LOAD18]], [[COL_LOAD21]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST26:%.*]] = bitcast double* [[TMP12]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD27:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST26]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST29:%.*]] = bitcast double* [[TMP13]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD30:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST29]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD27]], <1 x double> [[COL_LOAD30]], <1 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 2, i64 1 +; CHECK-NEXT: [[VEC_CAST36:%.*]] = bitcast double* [[TMP15]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP14]], <1 x double>* [[VEC_CAST36]], align 8 +; CHECK-NEXT: [[VEC_CAST38:%.*]] = bitcast <4 x double>* [[A]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD39:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST38]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST41:%.*]] = bitcast double* [[TMP16]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD42:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST41]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = fmul contract <1 x double> [[COL_LOAD39]], [[COL_LOAD42]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST47:%.*]] = bitcast double* [[TMP18]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD48:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST47]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST50:%.*]] = bitcast double* [[TMP19]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD51:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST50]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD48]], <1 x double> [[COL_LOAD51]], <1 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 2, i64 2 +; CHECK-NEXT: [[VEC_CAST57:%.*]] = bitcast double* [[TMP21]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP20]], <1 x double>* [[VEC_CAST57]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST59:%.*]] = bitcast double* [[TMP22]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD60:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST59]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST62:%.*]] = bitcast double* [[TMP23]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD63:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST62]], align 8 +; CHECK-NEXT: [[TMP24:%.*]] = fmul contract <1 x double> [[COL_LOAD60]], [[COL_LOAD63]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST68:%.*]] = bitcast double* [[TMP25]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD69:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST68]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST71:%.*]] = bitcast double* [[TMP26]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD72:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST71]], align 8 +; CHECK-NEXT: [[TMP27:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD69]], <1 x double> [[COL_LOAD72]], <1 x double> [[TMP24]]) +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 2, i64 3 +; CHECK-NEXT: [[VEC_CAST78:%.*]] = bitcast double* [[TMP28]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP27]], <1 x double>* [[VEC_CAST78]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -87,30 +201,87 @@ entry: define void @multiply_can_hoist_multiple_insts2(<4 x double>* noalias %A, <4 x double> * %B, [4 x double]* %C) { ; CHECK-LABEL: @multiply_can_hoist_multiple_insts2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 -; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 -; CHECK-NEXT: [[VEC_CAST3:%.*]] = bitcast <4 x double>* [[B:%.*]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST3]], align 8 -; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr <4 x double>, <4 x double>* [[B]], i64 0, i64 2 -; CHECK-NEXT: [[VEC_CAST6:%.*]] = bitcast double* [[VEC_GEP5]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD7:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST6]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD4]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT10]], <2 x double> [[TMP0]]) -; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT13]] -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD7]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [4 x double], [4 x double]* [[C:%.*]], i64 42, i64 0 -; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[TMP4]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP1]], <2 x double>* [[VEC_CAST17]], align 8 -; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 42, i64 2 -; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[VEC_GEP18]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: [[GEP_179:%.*]] = getelementptr [4 x double], [4 x double]* [[C:%.*]], i64 42 +; CHECK-NEXT: [[STORE_BEGIN:%.*]] = ptrtoint [4 x double]* [[GEP_179]] to i64 +; CHECK-NEXT: [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 32 +; CHECK-NEXT: [[LOAD_BEGIN:%.*]] = ptrtoint <4 x double>* [[B:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]] +; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]] +; CHECK: alias_cont: +; CHECK-NEXT: [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]] +; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]] +; CHECK: copy: +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x double>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double>* [[TMP2]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double>* [[B]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 32 dereferenceable(32) [[TMP3]], i8* noundef nonnull align 8 dereferenceable(32) [[TMP4]], i64 32, i1 false) +; CHECK-NEXT: br label [[NO_ALIAS]] +; CHECK: no_alias: +; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x double>* [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[ALIAS_CONT]] ], [ [[TMP2]], [[COPY]] ] +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <4 x double>* [[A:%.*]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_CAST2:%.*]] = bitcast <4 x double>* [[TMP5]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fmul contract <1 x double> [[COL_LOAD]], [[COL_LOAD3]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST5:%.*]] = bitcast double* [[TMP7]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD6:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST5]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST8:%.*]] = bitcast double* [[TMP8]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD6]], <1 x double> [[COL_LOAD9]], <1 x double> [[TMP6]]) +; CHECK-NEXT: [[VEC_CAST15:%.*]] = bitcast [4 x double]* [[GEP_179]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP9]], <1 x double>* [[VEC_CAST15]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST17:%.*]] = bitcast double* [[TMP10]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD18:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST17]], align 8 +; CHECK-NEXT: [[VEC_CAST20:%.*]] = bitcast <4 x double>* [[TMP5]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD21:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST20]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = fmul contract <1 x double> [[COL_LOAD18]], [[COL_LOAD21]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST26:%.*]] = bitcast double* [[TMP12]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD27:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST26]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST29:%.*]] = bitcast double* [[TMP13]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD30:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST29]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD27]], <1 x double> [[COL_LOAD30]], <1 x double> [[TMP11]]) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 42, i64 1 +; CHECK-NEXT: [[VEC_CAST36:%.*]] = bitcast double* [[TMP15]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP14]], <1 x double>* [[VEC_CAST36]], align 8 +; CHECK-NEXT: [[VEC_CAST38:%.*]] = bitcast <4 x double>* [[A]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD39:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST38]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST41:%.*]] = bitcast double* [[TMP16]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD42:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST41]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = fmul contract <1 x double> [[COL_LOAD39]], [[COL_LOAD42]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST47:%.*]] = bitcast double* [[TMP18]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD48:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST47]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST50:%.*]] = bitcast double* [[TMP19]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD51:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST50]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD48]], <1 x double> [[COL_LOAD51]], <1 x double> [[TMP17]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 42, i64 2 +; CHECK-NEXT: [[VEC_CAST57:%.*]] = bitcast double* [[TMP21]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP20]], <1 x double>* [[VEC_CAST57]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 1 +; CHECK-NEXT: [[VEC_CAST59:%.*]] = bitcast double* [[TMP22]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD60:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST59]], align 8 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST62:%.*]] = bitcast double* [[TMP23]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD63:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST62]], align 8 +; CHECK-NEXT: [[TMP24:%.*]] = fmul contract <1 x double> [[COL_LOAD60]], [[COL_LOAD63]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <4 x double>, <4 x double>* [[A]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST68:%.*]] = bitcast double* [[TMP25]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD69:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST68]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP5]], i64 0, i64 3 +; CHECK-NEXT: [[VEC_CAST71:%.*]] = bitcast double* [[TMP26]] to <1 x double>* +; CHECK-NEXT: [[COL_LOAD72:%.*]] = load <1 x double>, <1 x double>* [[VEC_CAST71]], align 8 +; CHECK-NEXT: [[TMP27:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD69]], <1 x double> [[COL_LOAD72]], <1 x double> [[TMP24]]) +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [4 x double], [4 x double]* [[C]], i64 42, i64 3 +; CHECK-NEXT: [[VEC_CAST78:%.*]] = bitcast double* [[TMP28]] to <1 x double>* +; CHECK-NEXT: store <1 x double> [[TMP27]], <1 x double>* [[VEC_CAST78]], align 8 ; CHECK-NEXT: ret void ; entry: From 4d2503cd5443ef7697465ab1ba199e9e69c497e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Fri, 2 Jul 2021 11:44:41 +0200 Subject: [PATCH 507/619] [lldb] [test] Add missing category to test_detach_current --- lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py index bcf728c34b797..8937621fb6012 100644 --- a/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py +++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteFork.py @@ -164,6 +164,7 @@ def test_select_wrong_pid(self): ], True) ret = self.expect_gdbremote_sequence() + @add_test_categories(["fork"]) def test_detach_current(self): self.build() self.prep_debug_monitor_and_inferior() From 48db080383765893d5dca3c48ef575c36afd137b Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 2 Jul 2021 12:59:01 +0300 Subject: [PATCH 508/619] [NFC][SimplifyCFG] Autogenerate checklines in trapping-load-unreachable.ll test --- .../SimplifyCFG/trapping-load-unreachable.ll | 129 ++++++++++++------ 1 file changed, 85 insertions(+), 44 deletions(-) diff --git a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll index c16c618a90811..b277cb6cf4f9a 100644 --- a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll +++ b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s ; PR2967 @@ -6,85 +7,113 @@ target datalayout = target triple = "i386-pc-linux-gnu" define void @test1(i32 %x) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[BB:%.*]], label [[RETURN:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, i32* null, align 4 +; CHECK-NEXT: unreachable +; CHECK: return: +; CHECK-NEXT: ret void +; entry: - %0 = icmp eq i32 %x, 0 ; [#uses=1] - br i1 %0, label %bb, label %return + %0 = icmp eq i32 %x, 0 ; [#uses=1] + br i1 %0, label %bb, label %return bb: ; preds = %entry - %1 = load volatile i32, i32* null - unreachable + %1 = load volatile i32, i32* null + unreachable - br label %return + br label %return return: ; preds = %entry - ret void -; CHECK-LABEL: @test1( -; CHECK: load volatile + ret void } define void @test1_no_null_opt(i32 %x) nounwind #0 { +; CHECK-LABEL: @test1_no_null_opt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[BB:%.*]], label [[RETURN:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, i32* null, align 4 +; CHECK-NEXT: unreachable +; CHECK: return: +; CHECK-NEXT: ret void +; entry: - %0 = icmp eq i32 %x, 0 ; [#uses=1] - br i1 %0, label %bb, label %return + %0 = icmp eq i32 %x, 0 ; [#uses=1] + br i1 %0, label %bb, label %return bb: ; preds = %entry - %1 = load volatile i32, i32* null - unreachable + %1 = load volatile i32, i32* null + unreachable - br label %return + br label %return return: ; preds = %entry - ret void -; CHECK-LABEL: @test1_no_null_opt( -; CHECK: load volatile -; CHECK: unreachable + ret void } ; rdar://7958343 define void @test2() nounwind { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: unreachable +; entry: - store i32 4,i32* null - ret void + store i32 4,i32* null + ret void -; CHECK-LABEL: @test2( -; CHECK: call void @llvm.trap -; CHECK: unreachable } define void @test2_no_null_opt() nounwind #0 { -entry: - store i32 4,i32* null - ret void ; CHECK-LABEL: @test2_no_null_opt( -; CHECK: store i32 4, i32* null -; CHECK-NOT: call void @llvm.trap -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 4, i32* null, align 4 +; CHECK-NEXT: ret void +; +entry: + store i32 4,i32* null + ret void } ; PR7369 define void @test3() nounwind { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: store volatile i32 4, i32* null, align 4 +; CHECK-NEXT: ret void +; entry: - store volatile i32 4, i32* null - ret void + store volatile i32 4, i32* null + ret void -; CHECK-LABEL: @test3( -; CHECK: store volatile i32 4, i32* null -; CHECK: ret } define void @test3_no_null_opt() nounwind #0 { +; CHECK-LABEL: @test3_no_null_opt( +; CHECK-NEXT: entry: +; CHECK-NEXT: store volatile i32 4, i32* null, align 4 +; CHECK-NEXT: ret void +; entry: - store volatile i32 4, i32* null - ret void + store volatile i32 4, i32* null + ret void -; CHECK-LABEL: @test3_no_null_opt( -; CHECK: store volatile i32 4, i32* null -; CHECK: ret } ; Check store before unreachable. define void @test4(i1 %C, i32* %P) { ; CHECK-LABEL: @test4( -; CHECK: entry: -; CHECK-NEXT: br i1 %C +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 +; CHECK-NEXT: unreachable +; CHECK: F: +; CHECK-NEXT: ret void +; entry: br i1 %C, label %T, label %F T: @@ -97,8 +126,14 @@ F: ; Check cmpxchg before unreachable. define void @test5(i1 %C, i32* %P) { ; CHECK-LABEL: @test5( -; CHECK: entry: -; CHECK-NEXT: br i1 %C +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[TMP0:%.*]] = cmpxchg volatile i32* [[P:%.*]], i32 0, i32 1 seq_cst seq_cst, align 4 +; CHECK-NEXT: unreachable +; CHECK: F: +; CHECK-NEXT: ret void +; entry: br i1 %C, label %T, label %F T: @@ -111,8 +146,14 @@ F: ; Check atomicrmw before unreachable. define void @test6(i1 %C, i32* %P) { ; CHECK-LABEL: @test6( -; CHECK: entry: -; CHECK-NEXT: br i1 %C +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw volatile xchg i32* [[P:%.*]], i32 0 seq_cst, align 4 +; CHECK-NEXT: unreachable +; CHECK: F: +; CHECK-NEXT: ret void +; entry: br i1 %C, label %T, label %F T: From 1a248233a5d9e374f977a0ca3509e2fa0448652e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 2 Jul 2021 10:03:22 +0100 Subject: [PATCH 509/619] [AArch64] Use custom lowering for fp16 vector copysign. The custom copysign lowering already supports fp16. Use it. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D105277 --- .../Target/AArch64/AArch64ISelLowering.cpp | 6 +- .../Analysis/CostModel/AArch64/arith-fp.ll | 6 +- llvm/test/CodeGen/AArch64/vector-fcopysign.ll | 122 +++--------------- 3 files changed, 26 insertions(+), 108 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1a490ab11822a..6d2f6a32553fa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1396,10 +1396,12 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); + } - // But we do support custom-lowering for FCOPYSIGN. + // But we do support custom-lowering for FCOPYSIGN. + if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 || + ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16())) setOperationAction(ISD::FCOPYSIGN, VT, Custom); - } setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll index 21ab1b26d87eb..48c2602b4e368 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll @@ -292,9 +292,9 @@ define i32 @fabs(i32 %arg) { define i32 @fcopysign(i32 %arg) { ; CHECK-LABEL: 'fcopysign' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) diff --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll index 47c9e34396873..755bed466f597 100644 --- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -195,20 +195,8 @@ define <4 x half> @test_copysign_v4f16_v4f16(<4 x half> %a, <4 x half> %b) #0 { ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ret -; FP16-NEXT: mov h2, v1[1] -; FP16-NEXT: mov h3, v0[1] -; FP16-NEXT: movi.8h v4, #128, lsl #8 -; FP16-NEXT: mov h5, v1[2] -; FP16-NEXT: bit.16b v3, v2, v4 -; FP16-NEXT: mov h2, v0[2] -; FP16-NEXT: bit.16b v2, v5, v4 -; FP16-NEXT: mov h5, v0[3] -; FP16-NEXT: bit.16b v0, v1, v4 -; FP16-NEXT: mov h1, v1[3] -; FP16-NEXT: mov.h v0[1], v3[0] -; FP16-NEXT: mov.h v0[2], v2[0] -; FP16-NEXT: bit.16b v5, v1, v4 -; FP16-NEXT: mov.h v0[3], v5[0] +; FP16-NEXT: movi.4h v2, #128, lsl #8 +; FP16-NEXT: bit.8b v0, v1, v2 ; FP16-NEXT: ret %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) ret <4 x half> %r @@ -246,20 +234,8 @@ define <4 x half> @test_copysign_v4f16_v4f32(<4 x half> %a, <4 x float> %b) #0 { ; NOFP16-NEXT: ret ; FP16-NEXT: fcvtn v1.4h, v1.4s -; FP16-NEXT: mov h2, v0[1] -; FP16-NEXT: movi.8h v3, #128, lsl #8 -; FP16-NEXT: mov h4, v0[2] -; FP16-NEXT: mov h5, v1[1] -; FP16-NEXT: bit.16b v2, v5, v3 -; FP16-NEXT: mov h5, v1[2] -; FP16-NEXT: bit.16b v4, v5, v3 -; FP16-NEXT: mov h5, v0[3] -; FP16-NEXT: bit.16b v0, v1, v3 -; FP16-NEXT: mov h1, v1[3] -; FP16-NEXT: mov.h v0[1], v2[0] -; FP16-NEXT: mov.h v0[2], v4[0] -; FP16-NEXT: bit.16b v5, v1, v3 -; FP16-NEXT: mov.h v0[3], v5[0] +; FP16-NEXT: movi.4h v2, #128, lsl #8 +; FP16-NEXT: bit.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x float> %b to <4 x half> %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) @@ -295,23 +271,17 @@ define <4 x half> @test_copysign_v4f16_v4f64(<4 x half> %a, <4 x double> %b) #0 ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ret -; FP16-NEXT: mov h3, v0[1] -; FP16-NEXT: movi.8h v4, #128, lsl #8 -; FP16-NEXT: fcvt h5, d1 -; FP16-NEXT: mov h6, v0[2] -; FP16-NEXT: mov h7, v0[3] -; FP16-NEXT: bit.16b v0, v5, v4 -; FP16-NEXT: fcvt h5, d2 -; FP16-NEXT: bit.16b v6, v5, v4 +; FP16-NEXT: mov d3, v1[1] +; FP16-NEXT: fcvt h1, d1 +; FP16-NEXT: fcvt h3, d3 +; FP16-NEXT: mov.h v1[1], v3[0] +; FP16-NEXT: fcvt h3, d2 ; FP16-NEXT: mov d2, v2[1] -; FP16-NEXT: mov d1, v1[1] -; FP16-NEXT: fcvt h1, d1 -; FP16-NEXT: fcvt h2, d2 -; FP16-NEXT: bit.16b v3, v1, v4 -; FP16-NEXT: mov.h v0[1], v3[0] -; FP16-NEXT: mov.h v0[2], v6[0] -; FP16-NEXT: bit.16b v7, v2, v4 -; FP16-NEXT: mov.h v0[3], v7[0] +; FP16-NEXT: fcvt h2, d2 +; FP16-NEXT: mov.h v1[2], v3[0] +; FP16-NEXT: mov.h v1[3], v2[0] +; FP16-NEXT: movi.4h v2, #128, lsl #8 +; FP16-NEXT: bit.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x half> %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) @@ -380,36 +350,8 @@ define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 { ; NOFP16-NEXT: mov.h v0[7], v1[0] ; NOFP16-NEXT: ret -; FP16-NEXT: mov h4, v1[1] -; FP16-NEXT: mov h3, v0[1] ; FP16-NEXT: movi.8h v2, #128, lsl #8 -; FP16-NEXT: mov h5, v1[2] -; FP16-NEXT: mov h6, v0[2] -; FP16-NEXT: mov h7, v1[3] -; FP16-NEXT: mov h16, v0[3] -; FP16-NEXT: mov h17, v1[4] -; FP16-NEXT: bit.16b v3, v4, v2 -; FP16-NEXT: mov h4, v0[4] -; FP16-NEXT: bit.16b v6, v5, v2 -; FP16-NEXT: mov h5, v1[5] -; FP16-NEXT: bit.16b v16, v7, v2 -; FP16-NEXT: mov h7, v0[5] -; FP16-NEXT: bit.16b v4, v17, v2 -; FP16-NEXT: mov h17, v1[6] -; FP16-NEXT: bit.16b v7, v5, v2 -; FP16-NEXT: mov h5, v0[6] -; FP16-NEXT: bit.16b v5, v17, v2 -; FP16-NEXT: mov h17, v0[7] -; FP16-NEXT: bit.16b v0, v1, v2 -; FP16-NEXT: mov.h v0[1], v3[0] -; FP16-NEXT: mov.h v0[2], v6[0] -; FP16-NEXT: mov.h v0[3], v16[0] -; FP16-NEXT: mov.h v0[4], v4[0] -; FP16-NEXT: mov h1, v1[7] -; FP16-NEXT: mov.h v0[5], v7[0] -; FP16-NEXT: mov.h v0[6], v5[0] -; FP16-NEXT: bit.16b v17, v1, v2 -; FP16-NEXT: mov.h v0[7], v17[0] +; FP16-NEXT: bit.16b v0, v1, v2 ; FP16-NEXT: ret %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) ret <8 x half> %r @@ -475,36 +417,10 @@ define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 { ; NOFP16-NEXT: ret ; FP16-NEXT: fcvtn v2.4h, v2.4s -; FP16-NEXT: fcvtn v4.4h, v1.4s -; FP16-NEXT: mov h3, v0[1] -; FP16-NEXT: movi.8h v1, #128, lsl #8 -; FP16-NEXT: mov h5, v0[2] -; FP16-NEXT: mov h6, v0[3] -; FP16-NEXT: mov h7, v0[4] -; FP16-NEXT: mov h16, v0[5] -; FP16-NEXT: mov h17, v0[6] -; FP16-NEXT: mov h18, v4[1] -; FP16-NEXT: bit.16b v3, v18, v1 -; FP16-NEXT: mov h18, v4[2] -; FP16-NEXT: bit.16b v5, v18, v1 -; FP16-NEXT: mov h18, v0[7] -; FP16-NEXT: bit.16b v0, v4, v1 -; FP16-NEXT: mov h4, v4[3] -; FP16-NEXT: bit.16b v6, v4, v1 -; FP16-NEXT: mov h4, v2[1] -; FP16-NEXT: bit.16b v16, v4, v1 -; FP16-NEXT: mov h4, v2[2] -; FP16-NEXT: bit.16b v17, v4, v1 -; FP16-NEXT: mov.h v0[1], v3[0] -; FP16-NEXT: mov.h v0[2], v5[0] -; FP16-NEXT: mov.h v0[3], v6[0] -; FP16-NEXT: bit.16b v7, v2, v1 -; FP16-NEXT: mov h2, v2[3] -; FP16-NEXT: mov.h v0[4], v7[0] -; FP16-NEXT: mov.h v0[5], v16[0] -; FP16-NEXT: mov.h v0[6], v17[0] -; FP16-NEXT: bit.16b v18, v2, v1 -; FP16-NEXT: mov.h v0[7], v18[0] +; FP16-NEXT: fcvtn v1.4h, v1.4s +; FP16-NEXT: mov.d v1[1], v2[0] +; FP16-NEXT: movi.8h v2, #128, lsl #8 +; FP16-NEXT: bit.16b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <8 x float> %b to <8 x half> %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) From b77b2201dc1f50f10e724c8c0b63963c5d98bf74 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 2 Jul 2021 11:39:17 +0100 Subject: [PATCH 510/619] [NFC] Fix typo in comment Reported-by: Marco Cali --- clang/lib/Format/TokenAnnotator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index aa69ff88bd747..2b83ff4f78503 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -363,7 +363,7 @@ class AnnotatingParser { Left->Previous && Left->Previous->is(tok::l_paren)) { // Detect the case where macros are used to generate lambdas or // function bodies, e.g.: - // auto my_lambda = MARCO((Type *type, int i) { .. body .. }); + // auto my_lambda = MACRO((Type *type, int i) { .. body .. }); for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) { if (Tok->is(TT_BinaryOperator) && Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) From 791ddb79f1dde3432f6214c1c8deadc59151b15e Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Fri, 2 Jul 2021 11:03:19 +0200 Subject: [PATCH 511/619] Add LogOp to Complex dialect. Differential Revision: https://reviews.llvm.org/D105337 --- .../mlir/Dialect/Complex/IR/ComplexOps.td | 23 ++++++++++++++++++- mlir/test/Dialect/Complex/ops.mlir | 16 +++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td index d533b5db6b416..a116242dd0781 100644 --- a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td +++ b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td @@ -159,7 +159,7 @@ def ExpOp : ComplexUnaryOp<"exp", [SameOperandsAndResultType]> { let summary = "computes exponential of a complex number"; let description = [{ The `exp` op takes a single complex number and computes the exponential of - it, i.e. `exp(x)` or `e^(x)`, where `x` is the input tensor. + it, i.e. `exp(x)` or `e^(x)`, where `x` is the input value. `e` denotes Euler's number and is approximately equal to 2.718281. Example: @@ -195,6 +195,27 @@ def ImOp : ComplexUnaryOp<"im", let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// LogOp +//===----------------------------------------------------------------------===// + +def LogOp : ComplexUnaryOp<"log", [SameOperandsAndResultType]> { + let summary = "computes natural logarithm of a complex number"; + let description = [{ + The `log` op takes a single complex number and computes the natural + logarithm of it, i.e. `log(x)` or `log_e(x)`, where `x` is the input value. + `e` denotes Euler's number and is approximately equal to 2.718281. + + Example: + + ```mlir + %a = complex.log %b : complex + ``` + }]; + + let results = (outs Complex:$result); +} + //===----------------------------------------------------------------------===// // MulOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Complex/ops.mlir b/mlir/test/Dialect/Complex/ops.mlir index 09d31ba400f4f..74b45b8ae230a 100644 --- a/mlir/test/Dialect/Complex/ops.mlir +++ b/mlir/test/Dialect/Complex/ops.mlir @@ -26,12 +26,24 @@ func @ops(%f: f32) { // CHECK: complex.eq %[[C]], %[[C]] : complex %eq = complex.eq %complex, %complex : complex - // CHECK: complex.neq %[[C]], %[[C]] : complex - %neq = complex.neq %complex, %complex : complex + // CHECK: complex.exp %[[C]] : complex + %exp = complex.exp %complex : complex + + // CHECK: complex.log %[[C]] : complex + %log = complex.log %complex : complex // CHECK: complex.mul %[[C]], %[[C]] : complex %prod = complex.mul %complex, %complex : complex + // CHECK: complex.neg %[[C]] : complex + %neg = complex.neg %complex : complex + + // CHECK: complex.neq %[[C]], %[[C]] : complex + %neq = complex.neq %complex, %complex : complex + + // CHECK: complex.sign %[[C]] : complex + %sign = complex.sign %complex : complex + // CHECK: complex.sub %[[C]], %[[C]] : complex %diff = complex.sub %complex, %complex : complex return From c142c06c19b3725f26f33652db098a2302586f22 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 2 Jul 2021 11:25:43 +0100 Subject: [PATCH 512/619] Place the BlockAddress type in the address space of the containing function While this should not matter for most architectures (where the program address space is 0), it is important for CHERI (and therefore Arm Morello). We use address space 200 for all of our code pointers and without this change we assert in the SelectionDAG handling of BlockAddress nodes. It is also useful for AVR: previously programs targeting AVR that attempt to read their own machine code via a pointer to a label would instead read from RAM using a pointer relative to the the start of program flash. Reviewed By: dylanmckay, theraven Differential Revision: https://reviews.llvm.org/D48803 --- llvm/docs/LangRef.rst | 6 +- llvm/include/llvm/AsmParser/LLParser.h | 3 +- llvm/lib/AsmParser/LLParser.cpp | 59 ++-- llvm/lib/IR/Constants.cpp | 4 +- llvm/test/Bitcode/blockaddress-addrspace.ll | 286 ++++++++++++++++++ .../AVR/block-address-is-in-progmem-space.ll | 51 ++++ llvm/test/CodeGen/AVR/brind.ll | 8 +- 7 files changed, 392 insertions(+), 25 deletions(-) create mode 100644 llvm/test/Bitcode/blockaddress-addrspace.ll create mode 100644 llvm/test/CodeGen/AVR/block-address-is-in-progmem-space.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 848ee2343b5e0..d87f791077e51 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4078,7 +4078,11 @@ Addresses of Basic Blocks ``blockaddress(@function, %block)`` The '``blockaddress``' constant computes the address of the specified -basic block in the specified function, and always has an ``i8*`` type. +basic block in the specified function. + +It always has an ``i8 addrspace(P)*`` type, where ``P`` is the address space +of the function containing ``%block`` (usually ``addrspace(0)``). + Taking the address of the entry block is illegal. This value only has defined behavior when used as an operand to the diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 3981241bb2eee..8b26b65528972 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -507,7 +507,8 @@ namespace llvm { PerFunctionState &PFS); // Constant Parsing. - bool parseValID(ValID &ID, PerFunctionState *PFS = nullptr); + bool parseValID(ValID &ID, PerFunctionState *PFS, + Type *ExpectedTy = nullptr); bool parseGlobalValue(Type *Ty, Constant *&C); bool parseGlobalTypeAndValue(Constant *&V); bool parseGlobalValueVector(SmallVectorImpl &Elts, diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index f9f73d2a4ffd4..678bf822fe08e 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -970,7 +970,7 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc, } else { // The bitcast dest type is not present, it is implied by the dest type. ValID ID; - if (parseValID(ID)) + if (parseValID(ID, /*PFS=*/nullptr)) return true; if (ID.Kind != ValID::t_Constant) return error(AliaseeLoc, "invalid aliasee"); @@ -3321,7 +3321,7 @@ BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name, /// sanity. PFS is used to convert function-local operands of metadata (since /// metadata operands are not just parsed here but also converted to values). /// PFS can be null when we are not parsing metadata values inside a function. -bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS) { +bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { ID.Loc = Lex.getLoc(); switch (Lex.getKind()) { default: @@ -3493,10 +3493,10 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS) { ValID Fn, Label; if (parseToken(lltok::lparen, "expected '(' in block address expression") || - parseValID(Fn) || + parseValID(Fn, PFS) || parseToken(lltok::comma, "expected comma in block address expression") || - parseValID(Label) || + parseValID(Label, PFS) || parseToken(lltok::rparen, "expected ')' in block address expression")) return true; @@ -3531,9 +3531,27 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS) { std::map())) .first->second.insert(std::make_pair(std::move(Label), nullptr)) .first->second; - if (!FwdRef) - FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), false, - GlobalValue::InternalLinkage, nullptr, ""); + if (!FwdRef) { + unsigned FwdDeclAS; + if (ExpectedTy) { + // If we know the type that the blockaddress is being assigned to, + // we can use the address space of that type. + if (!ExpectedTy->isPointerTy()) + return error(ID.Loc, + "type of blockaddress must be a pointer and not '" + + getTypeString(ExpectedTy) + "'"); + FwdDeclAS = ExpectedTy->getPointerAddressSpace(); + } else if (PFS) { + // Otherwise, we default the address space of the current function. + FwdDeclAS = PFS->getFunction().getAddressSpace(); + } else { + llvm_unreachable("Unknown address space for blockaddress"); + } + FwdRef = new GlobalVariable( + *M, Type::getInt8Ty(Context), false, GlobalValue::InternalLinkage, + nullptr, "", nullptr, GlobalValue::NotThreadLocal, FwdDeclAS); + } + ID.ConstantVal = FwdRef; ID.Kind = ValID::t_Constant; return false; @@ -3570,7 +3588,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS) { ValID Fn; - if (parseValID(Fn)) + if (parseValID(Fn, PFS)) return true; if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName) @@ -3960,7 +3978,7 @@ bool LLParser::parseGlobalValue(Type *Ty, Constant *&C) { C = nullptr; ValID ID; Value *V = nullptr; - bool Parsed = parseValID(ID) || + bool Parsed = parseValID(ID, /*PFS=*/nullptr, Ty) || convertValIDToValue(Ty, ID, V, nullptr, /*IsCall=*/false); if (V && !(C = dyn_cast(V))) return error(ID.Loc, "global values must be constants"); @@ -5679,7 +5697,9 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V, return false; case ValID::t_Constant: if (ID.ConstantVal->getType() != Ty) - return error(ID.Loc, "constant expression type mismatch"); + return error(ID.Loc, "constant expression type mismatch: got type '" + + getTypeString(ID.ConstantVal->getType()) + + "' but expected '" + getTypeString(Ty) + "'"); V = ID.ConstantVal; return false; case ValID::t_ConstantStruct: @@ -5739,7 +5759,7 @@ bool LLParser::parseConstantValue(Type *Ty, Constant *&C) { bool LLParser::parseValue(Type *Ty, Value *&V, PerFunctionState *PFS) { V = nullptr; ValID ID; - return parseValID(ID, PFS) || + return parseValID(ID, PFS, Ty) || convertValIDToValue(Ty, ID, V, PFS, /*IsCall=*/false); } @@ -6033,7 +6053,12 @@ bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() { if (!BB) return P.error(BBID.Loc, "referenced value is not a basic block"); - GV->replaceAllUsesWith(BlockAddress::get(&F, BB)); + Value *ResolvedVal = BlockAddress::get(&F, BB); + ResolvedVal = P.checkValidVariableType(BBID.Loc, BBID.StrVal, GV->getType(), + ResolvedVal, false); + if (!ResolvedVal) + return true; + GV->replaceAllUsesWith(ResolvedVal); GV->eraseFromParent(); } @@ -6568,7 +6593,7 @@ bool LLParser::parseInvoke(Instruction *&Inst, PerFunctionState &PFS) { if (parseOptionalCallingConv(CC) || parseOptionalReturnAttrs(RetAttrs) || parseOptionalProgramAddrSpace(InvokeAddrSpace) || parseType(RetType, RetTypeLoc, true /*void allowed*/) || - parseValID(CalleeID) || parseParameterList(ArgList, PFS) || + parseValID(CalleeID, &PFS) || parseParameterList(ArgList, PFS) || parseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, NoBuiltinLoc) || parseOptionalOperandBundles(BundleList, PFS) || @@ -6876,7 +6901,7 @@ bool LLParser::parseCallBr(Instruction *&Inst, PerFunctionState &PFS) { BasicBlock *DefaultDest; if (parseOptionalCallingConv(CC) || parseOptionalReturnAttrs(RetAttrs) || parseType(RetType, RetTypeLoc, true /*void allowed*/) || - parseValID(CalleeID) || parseParameterList(ArgList, PFS) || + parseValID(CalleeID, &PFS) || parseParameterList(ArgList, PFS) || parseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, NoBuiltinLoc) || parseOptionalOperandBundles(BundleList, PFS) || @@ -7303,7 +7328,7 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS, if (parseOptionalCallingConv(CC) || parseOptionalReturnAttrs(RetAttrs) || parseOptionalProgramAddrSpace(CallAddrSpace) || parseType(RetType, RetTypeLoc, true /*void allowed*/) || - parseValID(CalleeID) || + parseValID(CalleeID, &PFS) || parseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail, PFS.getFunction().isVarArg()) || parseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, BuiltinLoc) || @@ -7979,9 +8004,9 @@ bool LLParser::parseUseListOrderBB() { ValID Fn, Label; SmallVector Indexes; - if (parseValID(Fn) || + if (parseValID(Fn, /*PFS=*/nullptr) || parseToken(lltok::comma, "expected comma in uselistorder_bb directive") || - parseValID(Label) || + parseValID(Label, /*PFS=*/nullptr) || parseToken(lltok::comma, "expected comma in uselistorder_bb directive") || parseUseListOrderIndexes(Indexes)) return true; diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 6dcf8b5b6a67d..bc197b98b327b 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -1829,8 +1829,8 @@ BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) { } BlockAddress::BlockAddress(Function *F, BasicBlock *BB) -: Constant(Type::getInt8PtrTy(F->getContext()), Value::BlockAddressVal, - &Op<0>(), 2) { + : Constant(Type::getInt8PtrTy(F->getContext(), F->getAddressSpace()), + Value::BlockAddressVal, &Op<0>(), 2) { setOperand(0, F); setOperand(1, BB); BB->AdjustBlockAddressRefCount(1); diff --git a/llvm/test/Bitcode/blockaddress-addrspace.ll b/llvm/test/Bitcode/blockaddress-addrspace.ll new file mode 100644 index 0000000000000..4d7a474bca3fa --- /dev/null +++ b/llvm/test/Bitcode/blockaddress-addrspace.ll @@ -0,0 +1,286 @@ +; RUN: rm -rf %t && split-file %s %t +; RUN: llvm-as %t/global-use-good.ll -o - | llvm-dis -o /dev/null +; RUN: not llvm-as %t/global-use-bad.ll -o /dev/null 2>&1 | FileCheck %t/global-use-bad.ll +; RUN: llvm-as %t/global-fwddecl-good.ll -o - | llvm-dis -o /dev/null +; RUN: not llvm-as %t/global-fwddecl-bad.ll -o /dev/null 2>&1 | FileCheck %t/global-fwddecl-bad.ll +; RUN: llvm-as %t/return-fwddecl-good.ll -o - | llvm-dis -o /dev/null +; RUN: not llvm-as %t/return-fwddecl-bad.ll -o /dev/null 2>&1 | FileCheck %t/return-fwddecl-bad.ll +; RUN: llvm-as %t/return-self-good.ll -o - | llvm-dis -o /dev/null +; RUN: not llvm-as %t/return-self-bad.ll -o /dev/null 2>&1 | FileCheck %t/return-self-bad.ll +; RUN: not llvm-as %t/return-self-bad-2.ll -o /dev/null 2>&1 | FileCheck %t/return-self-bad-2.ll +; RUN: not llvm-as %t/return-unknown-fn-bad.ll -o /dev/null 2>&1 | FileCheck %t/return-unknown-fn-bad.ll +; RUN: llvm-as %t/call-fwddecl-good.ll -o - | llvm-dis -o /dev/null +; RUN: not llvm-as %t/call-fwddecl-bad.ll -o /dev/null 2>&1 | FileCheck %t/call-fwddecl-bad.ll +; RUN: llvm-as %t/phi-good.ll -o - | llvm-dis -o /dev/null +; RUN: not llvm-as %t/phi-bad.ll -o /dev/null 2>&1 | FileCheck %t/phi-bad.ll +; RUN: llvm-as %t/fwddecl-phi-good.ll -o - | llvm-dis -o /dev/null +; RUN: not llvm-as %t/fwddecl-phi-bad.ll -o /dev/null 2>&1 | FileCheck %t/fwddecl-phi-bad.ll +; RUN: not llvm-as %t/bad-type-not-ptr.ll -o /dev/null 2>&1 | FileCheck %t/bad-type-not-ptr.ll +; RUN: not llvm-as %t/bad-type-not-i8-ptr.ll -o /dev/null 2>&1 | FileCheck %t/bad-type-not-i8-ptr.ll + + +;--- global-use-good.ll +target datalayout = "P2" +define void @fn_in_prog_as_implicit() { + unreachable +bb: + ret void +} +define void @fn_in_prog_as_explicit() addrspace(2) { + unreachable +bb: + ret void +} +define void @fn_in_other_as() addrspace(1) { + unreachable +bb: + ret void +} +@global1 = constant i8 addrspace(2)* blockaddress(@fn_in_prog_as_implicit, %bb) +@global2 = constant i8 addrspace(2)* blockaddress(@fn_in_prog_as_explicit, %bb) +@global3 = constant i8 addrspace(1)* blockaddress(@fn_in_other_as, %bb) + +;--- global-use-bad.ll +define void @fn() addrspace(1) { + unreachable +bb: + ret void +} +@global1 = constant i8 addrspace(2)* blockaddress(@fn, %bb) +; CHECK: [[#@LINE-1]]:38: error: constant expression type mismatch: got type 'i8 addrspace(1)*' but expected 'i8 addrspace(2)*' + +; Check that a global blockaddress of a forward-declared function +; uses the type of the global variable address space for the forward declaration +;--- global-fwddecl-good.ll +@global = constant i8 addrspace(2)* blockaddress(@fwddecl_in_prog_as, %bb) +define void @fwddecl_in_prog_as() addrspace(2) { + unreachable +bb: + ret void +} + +;--- global-fwddecl-bad.ll +; This forward declaration does not match the actual function type so we should get an error: +@global = constant i8 addrspace(2)* blockaddress(@fwddecl_in_unexpected_as, %bb) +; CHECK: [[#@LINE-1]]:77: error: 'bb' defined with type 'i8 addrspace(1)*' but expected 'i8 addrspace(2)*' +define void @fwddecl_in_unexpected_as() addrspace(1) { + unreachable +bb: + ret void +} + + +; When returning blockaddresses of forward-declared functions we +; can also use the type of the variable. +;--- return-fwddecl-good.ll +define i8 addrspace(2)* @take_as2() { + ret i8 addrspace(2)* blockaddress(@fwddecl_as2, %bb) +} +define i8 addrspace(1)* @take_as1() { + ret i8 addrspace(1)* blockaddress(@fwddecl_as1, %bb) +} +define void @fwddecl_as1() addrspace(1) { + unreachable +bb: + ret void +} +define void @fwddecl_as2() addrspace(2) { + unreachable +bb: + ret void +} + +;--- return-fwddecl-bad.ll +define i8 addrspace(2)* @take_bad() { + ret i8 addrspace(2)* blockaddress(@fwddecl_as1, %bb) + ; CHECK: [[#@LINE-1]]:51: error: 'bb' defined with type 'i8 addrspace(1)*' but expected 'i8 addrspace(2)*' +} +define void @fwddecl_as1() addrspace(1) { + unreachable +bb: + ret void +} + +;--- return-self-good.ll +target datalayout = "P2" +define i8 addrspace(0)* @take_self_as0() addrspace(0) { +L1: + br label %L2 +L2: + ret i8 addrspace(0)* blockaddress(@take_self_as0, %L3) +L3: + unreachable +} +define i8 addrspace(2)* @take_self_prog_as() { +L1: + br label %L2 +L2: + ret i8 addrspace(2)* blockaddress(@take_self_prog_as, %L3) +L3: + unreachable +} +define i8 addrspace(1)* @take_self_as1() addrspace(1) { +L1: + br label %L2 +L2: + ret i8 addrspace(1)* blockaddress(@take_self_as1, %L3) +L3: + unreachable +} +define i8 addrspace(2)* @take_self_as2() addrspace(2) { +L1: + br label %L2 +L2: + ret i8 addrspace(2)* blockaddress(@take_self_as2, %L3) +L3: + unreachable +} + +;--- return-self-bad.ll +target datalayout = "P2" +define i8 addrspace(2)* @take_self_bad() addrspace(1) { +L1: + br label %L2 +L2: + ret i8 addrspace(2)* blockaddress(@take_self_bad, %L3) + ; CHECK: [[#@LINE-1]]:24: error: constant expression type mismatch: got type 'i8 addrspace(1)*' but expected 'i8 addrspace(2)*' +L3: + unreachable +} +;--- return-self-bad-2.ll +target datalayout = "P2" +define i8* @take_self_bad_prog_as() { +L1: + br label %L2 +L2: + ret i8* blockaddress(@take_self_bad_prog_as, %L3) + ; CHECK: [[#@LINE-1]]:11: error: constant expression type mismatch: got type 'i8 addrspace(2)*' but expected 'i8*' +L3: + unreachable +} + +;--- return-unknown-fn-bad.ll +target datalayout = "P2" +define i8 addrspace(1)* @return_unknown_fn() addrspace(1) { + ret i8 addrspace(1)* blockaddress(@undefined, %bb) + ; CHECK: [[#@LINE-1]]:37: error: expected function name in blockaddress +} + + +;--- call-fwddecl-good.ll +target datalayout = "P2" +define void @call_from_fn_in_as2() addrspace(2) { + call addrspace(2) void bitcast (i8 addrspace(2)* blockaddress(@fwddecl_as2, %bb) to void () addrspace(2)*)() + ret void +} +define void @call_from_fn_in_as1() addrspace(1) { + call addrspace(1) void bitcast (i8 addrspace(1)* blockaddress(@fwddecl_as1, %bb) to void () addrspace(1)*)() + ret void +} +define void @fwddecl_as2() addrspace(2) { + unreachable +bb: + ret void +} +define void @fwddecl_as1() addrspace(1) { + unreachable +bb: + ret void +} + +;--- call-fwddecl-bad.ll +target datalayout = "P2" +define void @call_from_fn_in_as2_explicit() addrspace(2) { + call addrspace(2) void bitcast (i8 addrspace(2)* blockaddress(@fwddecl_as1, %bb) to void () addrspace(2)*)() + ; CHECK: [[#@LINE-1]]:79: error: 'bb' defined with type 'i8 addrspace(1)*' but expected 'i8 addrspace(2)*' + ret void +} +define void @fwddecl_as1() addrspace(1) { + unreachable +bb: + ret void +} + +;--- phi-good.ll +target datalayout = "P2" +define i8 addrspace(1)* @f1() addrspace(1) { +L1: + br label %L3 +L2: + br label %L3 +L3: + %p = phi i8 addrspace(1)* [ blockaddress(@f1, %L4), %L2 ], [ null, %L1 ] + ret i8 addrspace(1)* %p +L4: + unreachable +} +define i8 addrspace(2)* @f2() { +L1: + br label %L3 +L2: + br label %L3 +L3: + %p = phi i8 addrspace(2)* [ blockaddress(@f2, %L4), %L2 ], [ null, %L1 ] + ret i8 addrspace(2)* %p +L4: + unreachable +} + +;--- phi-bad.ll +target datalayout = "P2" +define i8* @f() { +L1: + br label %L3 +L2: + br label %L3 +L3: + %p = phi i8* [ blockaddress(@f, %L4), %L2 ], [ null, %L1 ] + ; CHECK: [[#@LINE-1]]:18: error: constant expression type mismatch: got type 'i8 addrspace(2)*' but expected 'i8*' + ret i8* %p +} + +; A blockaddress function forward-declaration used in a phi node should +; create the forward declaration in the same address space as the current function +;--- fwddecl-phi-good.ll +define i8 addrspace(1)* @f() addrspace(1) { +L1: + br label %L3 +L2: + br label %L3 +L3: + %p = phi i8 addrspace(1)* [ blockaddress(@fwddecl_as1, %bb), %L2 ], [ null, %L1 ] + ret i8 addrspace(1)* %p +L4: + unreachable +} +define void @fwddecl_as1() addrspace(1) { + unreachable +bb: + ret void +} + +;--- fwddecl-phi-bad.ll +define i8 addrspace(2)* @f() addrspace(2) { +L1: + br label %L3 +L2: + br label %L3 +L3: + %p = phi i8 addrspace(2)* [ blockaddress(@fwddecl_as1, %bb), %L2 ], [ null, %L1 ] + ; CHECK: [[#@LINE-1]]:58: error: 'bb' defined with type 'i8 addrspace(1)*' but expected 'i8 addrspace(2)*' + ret i8 addrspace(2)* %p +L4: + unreachable +} +define void @fwddecl_as1() addrspace(1) { + unreachable +bb: + ret void +} + +;--- bad-type-not-ptr.ll +@global = constant i8 blockaddress(@unknown_fn, %bb) +; CHECK: [[#@LINE-1]]:23: error: type of blockaddress must be a pointer and not 'i8' +;--- bad-type-not-i8-ptr.ll +@global = constant i32* blockaddress(@unknown_fn, %bb) +; CHECK: [[#@LINE-1]]:25: error: constant expression type mismatch: got type 'i8*' but expected 'i32*' diff --git a/llvm/test/CodeGen/AVR/block-address-is-in-progmem-space.ll b/llvm/test/CodeGen/AVR/block-address-is-in-progmem-space.ll new file mode 100644 index 0000000000000..8e6e3a7106283 --- /dev/null +++ b/llvm/test/CodeGen/AVR/block-address-is-in-progmem-space.ll @@ -0,0 +1,51 @@ +; RUN: llc -mcpu=atmega328 < %s -march=avr | FileCheck %s + +; This test verifies that the pointer to a basic block +; should always be a pointer in address space 1. +; +; If this were not the case, then programs targeting +; AVR that attempted to read their own machine code +; via a pointer to a label would actually read from RAM +; using a pointer relative to the the start of program flash. +; +; This would cause a load of uninitialized memory, not even +; touching the program's machine code as otherwise desired. + +target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" + +; CHECK-LABEL: load_with_no_forward_reference +define i8 @load_with_no_forward_reference(i8 %a, i8 %b) { +second: + ; CHECK: ldi r30, .Ltmp0+2 + ; CHECK-NEXT: ldi r31, .Ltmp0+4 + ; CHECK: lpm r24, Z + %bar = load i8, i8 addrspace(1)* blockaddress(@function_with_no_forward_reference, %second) + ret i8 %bar +} + +; CHECK-LABEL: load_from_local_label +define i8 @load_from_local_label(i8 %a, i8 %b) { +entry: + %result1 = add i8 %a, %b + + br label %second + +; CHECK-LABEL: .Ltmp1: +second: + ; CHECK: ldi r30, .Ltmp1+2 + ; CHECK-NEXT: ldi r31, .Ltmp1+4 + ; CHECK-NEXT: lpm r24, Z + %result2 = load i8, i8 addrspace(1)* blockaddress(@load_from_local_label, %second) + ret i8 %result2 +} + +; A function with no forward reference, right at the end +; of the file. +define i8 @function_with_no_forward_reference(i8 %a, i8 %b) { +entry: + %result = add i8 %a, %b + br label %second +second: + ret i8 0 +} + diff --git a/llvm/test/CodeGen/AVR/brind.ll b/llvm/test/CodeGen/AVR/brind.ll index 4eea966062db7..b66319c872fe5 100644 --- a/llvm/test/CodeGen/AVR/brind.ll +++ b/llvm/test/CodeGen/AVR/brind.ll @@ -1,15 +1,15 @@ ; RUN: llc -mattr=sram,eijmpcall < %s -march=avr -verify-machineinstrs | FileCheck %s -@brind.k = private unnamed_addr constant [2 x i8*] [i8* blockaddress(@brind, %return), i8* blockaddress(@brind, %b)], align 1 +@brind.k = private unnamed_addr constant [2 x i8 addrspace(1)*] [i8 addrspace(1)* blockaddress(@brind, %return), i8 addrspace(1)* blockaddress(@brind, %b)], align 1 define i8 @brind(i8 %p) { ; CHECK-LABEL: brind: ; CHECK: ijmp entry: %idxprom = sext i8 %p to i16 - %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @brind.k, i16 0, i16 %idxprom - %s = load i8*, i8** %arrayidx - indirectbr i8* %s, [label %return, label %b] + %arrayidx = getelementptr inbounds [2 x i8 addrspace(1)*], [2 x i8 addrspace(1)*]* @brind.k, i16 0, i16 %idxprom + %s = load i8 addrspace(1)*, i8 addrspace(1)** %arrayidx + indirectbr i8 addrspace(1)* %s, [label %return, label %b] b: br label %return return: From e90c6f559637446330335ce6638ae3e3827992e8 Mon Sep 17 00:00:00 2001 From: Alexandru Octavian Butiu Date: Fri, 2 Jul 2021 18:50:07 +0800 Subject: [PATCH 513/619] [MachineCopyPropagation] Fix differences in code gen when compiling with -g Fixes bugs [[ https://bugs.llvm.org/show_bug.cgi?id=50580 | 50580 ]] and [[ https://bugs.llvm.org/show_bug.cgi?id=49446 | 49446 ]] When compiling with -g "DBG_VALUE " instructions are added in the MIR, if such a instruction is inserted between instructions that use then MachineCopyPropagation invalidates , this causes some copies to not be propagated and causes differences in code generation (ex bugs 50580 and 49446 ). DBG_VALUE instructions should be ignored since they don't actually modify the register. Reviewed By: lkail Differential Revision: https://reviews.llvm.org/D104394 --- llvm/lib/CodeGen/MachineCopyPropagation.cpp | 24 +++++++++++++++++-- .../CodeGen/X86/machine-copy-dbgvalue.mir | 20 ++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/X86/machine-copy-dbgvalue.mir diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 7bac590580599..10b74f5f47f55 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -870,12 +870,32 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( if (MO.isDef()) Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); - if (MO.readsReg()) - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + if (MO.readsReg()) { + if (MO.isDebug()) { + // Check if the register in the debug instruction is utilized + // in a copy instruction, so we can update the debug info if the + // register is changed. + for (MCRegUnitIterator RUI(MO.getReg().asMCReg(), TRI); RUI.isValid(); + ++RUI) { + if (auto *Copy = Tracker.findCopyDefViaUnit(*RUI, *TRI)) { + CopyDbgUsers[Copy].insert(MI); + } + } + } else { + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + } + } } } for (auto *Copy : MaybeDeadCopies) { + + Register Src = Copy->getOperand(1).getReg(); + Register Def = Copy->getOperand(0).getReg(); + SmallVector MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(), + CopyDbgUsers[Copy].end()); + + MRI->updateDbgUsersToReg(Src.asMCReg(), Def.asMCReg(), MaybeDeadDbgUsers); Copy->eraseFromParent(); ++NumDeletes; } diff --git a/llvm/test/CodeGen/X86/machine-copy-dbgvalue.mir b/llvm/test/CodeGen/X86/machine-copy-dbgvalue.mir new file mode 100644 index 0000000000000..914beab052dbc --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-copy-dbgvalue.mir @@ -0,0 +1,20 @@ +# RUN: llc -mtriple=i686-- -run-pass machine-cp -verify-machineinstrs -o - %s | FileCheck %s + + +--- +# Test that machine copy propagation ignores DBG_VALUE and DBL_VALUE_LIST and updates it. +# CHECK-LABEL: name: foo +# CHECK: bb.0: +# CHECK-NEXT: $rax = MOV64ri 31 +# CHECK-NEXT: DBG_VALUE $rax +# CHECK-NEXT: DBG_VALUE_LIST 0, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 4, DW_OP_mul, DW_OP_plus, DW_OP_stack_value), $rax, 0, 0 +# CHECK-NEXT: RETQ implicit killed $rax +name: foo +body: | + bb.0: + renamable $rcx = MOV64ri 31 + DBG_VALUE $rcx, 0, 0, 0, 0 + DBG_VALUE_LIST 0, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 4, DW_OP_mul, DW_OP_plus, DW_OP_stack_value), $rcx, 0, 0 + $rax = COPY killed renamable $rcx + RETQ implicit killed $rax +... From 28ac873bcb319c870522fc7cc8eac96b220f22e4 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 1 Jul 2021 05:14:24 -0700 Subject: [PATCH 514/619] [SLP]Fix gathering of the scalars by not ignoring UndefValues. The compiler should not ignore UndefValue when gathering the scalars, otherwise the resulting code may be less defined than the original one. Also, grouped scalars to insert them at first to reduce the analysis in further passes. Differential Revision: https://reviews.llvm.org/D105275 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 14 +++++++++---- .../AArch64/insertelement-inseltpoison.ll | 2 +- .../SLPVectorizer/AArch64/insertelement.ll | 2 +- .../SLPVectorizer/AArch64/trunc-insertion.ll | 10 +++++----- .../X86/crash_exceed_scheduling.ll | 4 ++-- .../SLPVectorizer/X86/crash_lencod.ll | 2 +- .../SLPVectorizer/X86/crash_mandeltext.ll | 6 +++--- .../SLPVectorizer/X86/crash_smallpt.ll | 20 +++++++++---------- .../SLPVectorizer/X86/geps-non-pow-2.ll | 2 +- .../test/Transforms/SLPVectorizer/X86/phi3.ll | 12 +++++------ .../SLPVectorizer/X86/phi_landingpad.ll | 4 ++-- .../Transforms/SLPVectorizer/X86/pr35497.ll | 4 ++-- .../SLPVectorizer/X86/reorder_repeated_ops.ll | 10 +++++----- .../SLPVectorizer/X86/shrink_after_reorder.ll | 2 +- .../X86/value-bug-inseltpoison.ll | 16 +++++++-------- .../Transforms/SLPVectorizer/X86/value-bug.ll | 16 +++++++-------- 16 files changed, 66 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 47424541ff5bf..adad762267212 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4838,9 +4838,6 @@ Value *BoUpSLP::gather(ArrayRef VL) { } auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos) { - // No need to insert undefs elements - exit. - if (isa(V)) - return Vec; Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(Pos)); auto *InsElt = dyn_cast(Vec); if (!InsElt) @@ -4865,11 +4862,20 @@ Value *BoUpSLP::gather(ArrayRef VL) { isa(VL[0]) ? cast(VL[0])->getValueOperand() : VL[0]; FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size()); Value *Vec = PoisonValue::get(VecTy); + SmallVector NonConsts; + // Insert constant values at first. for (int I = 0, E = VL.size(); I < E; ++I) { if (PostponedIndices.contains(I)) continue; + if (!isConstant(VL[I])) { + NonConsts.push_back(I); + continue; + } Vec = CreateInsertElement(Vec, VL[I], I); } + // Insert non-constant values. + for (int I : NonConsts) + Vec = CreateInsertElement(Vec, VL[I], I); // Append instructions, which are/may be part of the loop, in the end to make // it possible to hoist non-loop-based instructions. for (const std::pair &Pair : PostponedInsts) @@ -5043,7 +5049,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues)); } UniqueValues.append(VF - UniqueValues.size(), - UndefValue::get(VL[0]->getType())); + PoisonValue::get(VL[0]->getType())); VL = UniqueValues; } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll index 8f386d672487a..c6ceb1cc7e793 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll @@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu" define <2 x float> @insertelement-fixed-vector() { ; CHECK-LABEL: @insertelement-fixed-vector( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> poison) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef) ; CHECK-NEXT: ret <2 x float> [[TMP1]] ; %f0 = tail call fast float @llvm.fabs.f32(float undef) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll index ffe3ab4382d12..12031a679e831 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll @@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu" define <2 x float> @insertelement-fixed-vector() { ; CHECK-LABEL: @insertelement-fixed-vector( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> poison) +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef) ; CHECK-NEXT: ret <2 x float> [[TMP1]] ; %f0 = tail call fast float @llvm.fabs.f32(float undef) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll index 184630c47bcac..f6ab38bb3935e 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll @@ -8,21 +8,21 @@ define dso_local void @l() local_unnamed_addr { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ poison, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ] ; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[I4:%.*]] = zext i1 undef to i32 -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], poison +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> ; CHECK-NEXT: br label [[BB25]] ; CHECK: bb11: ; CHECK-NEXT: [[I12:%.*]] = zext i1 undef to i32 -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP0]], poison +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP0]], undef ; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i16> [[TMP4]] to <2 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> poison, [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <2 x i64> undef, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> poison, [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult <2 x i32> undef, [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i1> [[TMP8]] to <2 x i32> ; CHECK-NEXT: br label [[BB25]] ; CHECK: bb25: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll index 8d66bc9565a03..6be7dda2375d1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -34,8 +34,8 @@ define void @exceed(double %0, double %1) { ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> , double [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP13]], [[TMP14]] ; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ ; CHECK-NEXT: i32 0, label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll index d4be8dc00581c..fa8c8a14e891c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll @@ -130,7 +130,7 @@ define fastcc void @dct36(double* %inbuf) { ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> , double [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll index da96683570501..e3ff057355537 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll @@ -97,10 +97,10 @@ define void @zot(%struct.hoge* %arg) { ; CHECK-NEXT: [[TMP2:%.*]] = load double, double* undef, align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], poison +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[ARG:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], poison -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], undef +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP7]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: br i1 undef, label [[BB11:%.*]], label [[BB12:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll index d75d9cbfcf56c..c8beac34fc90d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -30,11 +30,11 @@ define void @main() #0 { ; CHECK-NEXT: br i1 undef, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]] ; CHECK: cond.false66.us: ; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[ADD_I276_US]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double [[ADD_I276_US]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[TMP0]], ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> poison, [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[AGG_TMP101211_SROA_0_0_IDX]] to <2 x double>* @@ -110,14 +110,14 @@ define void @_Z8radianceRK3RayiPt() #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]] ; CHECK: if.then38: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double poison, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> poison, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> poison, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> poison, [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> poison, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> poison, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> poison, [[TMP6]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double undef, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]] ; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll index 1ecda901cf1de..596543880d365 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll @@ -12,7 +12,7 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: br i1 [[TOBOOL_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[C_022:%.*]] = phi i32* [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32*> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ poison, [[ENTRY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32*> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ] ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[C_022]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[C_022]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll index 88825a7cf40f3..b8e5c3e89f6f7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll @@ -14,12 +14,12 @@ define void @Rf_GReset() { ; CHECK-LABEL: @Rf_GReset( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> , [[TMP1]] ; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison -; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], poison +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef +; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]] @@ -55,12 +55,12 @@ define void @Rf_GReset_unary_fneg() { ; CHECK-LABEL: @Rf_GReset_unary_fneg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x double> [[TMP1]] ; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison -; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], poison +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef +; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll index a94ba522eaa92..0a752889065f0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll @@ -12,12 +12,12 @@ define void @test_phi_in_landingpad() personality i8* ; CHECK-NEXT: invoke void @foo() ; CHECK-NEXT: to label [[DONE:%.*]] unwind label [[LPAD]] ; CHECK: lpad: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ poison, [[ENTRY:%.*]] ], [ poison, [[INNER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ undef, [[ENTRY:%.*]] ], [ undef, [[INNER]] ] ; CHECK-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null ; CHECK-NEXT: br label [[DONE]] ; CHECK: done: -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ poison, [[INNER]] ], [ [[TMP0]], [[LPAD]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ undef, [[INNER]] ], [ [[TMP0]], [[LPAD]] ] ; CHECK-NEXT: ret void ; bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll index 8871133cdf876..e283628949107 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -103,11 +103,11 @@ define void @pr35497() local_unnamed_addr #0 { ; AVX-NEXT: [[ADD:%.*]] = add i64 undef, undef ; AVX-NEXT: store i64 [[ADD]], i64* undef, align 1 ; AVX-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5 -; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 1 +; AVX-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 1 ; AVX-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], ; AVX-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], ; AVX-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4 -; AVX-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], poison +; AVX-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer ; AVX-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1 ; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 ; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 3cdb56d4545b2..52a1bc605ffa8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -11,17 +11,17 @@ define void @hoge() { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> , i16 [[T]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], poison +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef ; CHECK-NEXT: [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE10]], ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[TMP5]], i32 undef ; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63 -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <2 x i32> poison, [[TMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP6]], poison +; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <2 x i32> undef, [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP6]], undef ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll index 9adc9fbdbd7de..86d728cb7c4bd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll @@ -66,7 +66,7 @@ define internal i32 @ipvideo_decode_block_opcode_0xD_16() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ poison, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll index 583a896374db4..fc41ec9e93869 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll @@ -13,27 +13,27 @@ define void @test() { ; CHECK-NEXT: bb279: ; CHECK-NEXT: br label [[BB283:%.*]] ; CHECK: bb283: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ poison, [[BB279]] ], [ poison, [[EXIT]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ] ; CHECK-NEXT: br label [[BB284:%.*]] ; CHECK: bb284: ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: br label [[BB21_I:%.*]] ; CHECK: bb21.i: ; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] ; CHECK: bb22.i: -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] ; CHECK-NEXT: br label [[BB32_I:%.*]] ; CHECK: bb32.i: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> poison, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], poison +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll index 24d97c32b636d..cdde0971b6df6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -13,27 +13,27 @@ define void @test() { ; CHECK-NEXT: bb279: ; CHECK-NEXT: br label [[BB283:%.*]] ; CHECK: bb283: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ poison, [[BB279]] ], [ poison, [[EXIT]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ] ; CHECK-NEXT: br label [[BB284:%.*]] ; CHECK: bb284: ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> -; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison -; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: br label [[BB21_I:%.*]] ; CHECK: bb21.i: ; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] ; CHECK: bb22.i: -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] ; CHECK-NEXT: br label [[BB32_I:%.*]] ; CHECK: bb32.i: ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> -; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> poison, [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], poison +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; From cdca1785d35f41ef154d7271bf97c4d1d0e69995 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Jul 2021 13:07:07 +0100 Subject: [PATCH 515/619] [CostModel][X86] Adjust uitofp(vXi64) SSE/AVX legalized costs based on llvm-mca reports. Update v4i64 -> v4f32/v4f64 uitofp costs based on the worst case costs from the script in D103695. Fixes a few regressions before we start adding AVX costs for legalized types. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 5 ++ llvm/test/Analysis/CostModel/X86/uitofp.ll | 26 +++--- .../X86/uint64_to_fp64-cost-model.ll | 2 +- .../Transforms/SLPVectorizer/X86/uitofp.ll | 80 +++---------------- 4 files changed, 29 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index a58b15083b7d3..265c4d7bcf718 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1968,6 +1968,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, @@ -2069,6 +2071,9 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 }, { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 12 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 22 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 4 }, { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 }, { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 }, diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll index 94d5a7c92f8d9..56cac09a5b453 100644 --- a/llvm/test/Analysis/CostModel/X86/uitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll @@ -114,16 +114,16 @@ define i32 @uitofp_i64_double() { ; ; SSE42-LABEL: 'uitofp_i64_double' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i64_double' ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'uitofp_i64_double' @@ -265,24 +265,24 @@ define i32 @uitofp_i64_float() { ; ; SSE42-LABEL: 'uitofp_i64_float' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> -; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'uitofp_i64_float' ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'uitofp_i64_float' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = uitofp i64 undef to float ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef diff --git a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll index e08ef002d0ec7..6a43fa86057b2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll @@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK: cost of 4 for VF 1 For instruction: %conv = uitofp i64 %tmp to double ; CHECK: cost of 5 for VF 2 For instruction: %conv = uitofp i64 %tmp to double -; CHECK: cost of 6 for VF 4 For instruction: %conv = uitofp i64 %tmp to double +; CHECK: cost of 10 for VF 4 For instruction: %conv = uitofp i64 %tmp to double define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind { entry: br label %for.body diff --git a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll index e3b8beb4a058c..2544a87b64d57 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll @@ -575,38 +575,11 @@ define void @uitofp_2i64_2f32() #0 { } define void @uitofp_4i64_4f32() #0 { -; SSE-LABEL: @uitofp_4i64_4f32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 -; SSE-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> -; SSE-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; SSE-NEXT: ret void -; -; AVX256NODQ-LABEL: @uitofp_4i64_4f32( -; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float -; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float -; AVX256NODQ-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float -; AVX256NODQ-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float -; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; AVX256NODQ-NEXT: ret void -; -; AVX512-LABEL: @uitofp_4i64_4f32( -; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 -; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> -; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX512-NEXT: ret void -; -; AVX256DQ-LABEL: @uitofp_4i64_4f32( -; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 -; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> -; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX256DQ-NEXT: ret void +; CHECK-LABEL: @uitofp_4i64_4f32( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 +; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; CHECK-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 @@ -633,44 +606,11 @@ define void @uitofp_8i64_8f32() #0 { ; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; -; AVX256NODQ-LABEL: @uitofp_8i64_8f32( -; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32 -; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8 -; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16 -; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8 -; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float -; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float -; AVX256NODQ-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float -; AVX256NODQ-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float -; AVX256NODQ-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to float -; AVX256NODQ-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to float -; AVX256NODQ-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to float -; AVX256NODQ-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to float -; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 -; AVX256NODQ-NEXT: ret void -; -; AVX512-LABEL: @uitofp_8i64_8f32( -; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 -; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float> -; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 -; AVX512-NEXT: ret void -; -; AVX256DQ-LABEL: @uitofp_8i64_8f32( -; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 -; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float> -; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 -; AVX256DQ-NEXT: ret void +; AVX-LABEL: @uitofp_8i64_8f32( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float> +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 From 02ef0f5ab483875b7b6b38e24b245e4fd4053959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Wed, 14 Apr 2021 11:56:09 +0200 Subject: [PATCH 516/619] [lldb] [gdb-remote client] Refactor SetCurrentThread*() Refactor SetCurrentThread() and SetCurrentThreadForRun() to reduce code duplication and simplify it. Both methods now call common SendSetCurrentThreadPacket() that implements the common protocol exchange part (the only variable is sending `Hg` vs `Hc`) and returns the selected TID. The logic is rewritten to use a StreamString instead of snprintf(). A side effect of the change is that thread-id sent is now zero-padded. However, this should not have practical impact on the server as both forms are equivalent. Differential Revision: https://reviews.llvm.org/D100459 --- .../GDBRemoteCommunicationClient.cpp | 80 +++++++------------ .../gdb-remote/GDBRemoteCommunicationClient.h | 2 + .../GDBRemoteCommunicationClientTest.cpp | 4 +- 3 files changed, 33 insertions(+), 53 deletions(-) diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 0e529d221495e..ec320543a7828 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -2639,25 +2639,21 @@ bool GDBRemoteCommunicationClient::KillSpawnedProcess(lldb::pid_t pid) { return false; } -bool GDBRemoteCommunicationClient::SetCurrentThread(uint64_t tid) { - if (m_curr_tid == tid) - return true; - - char packet[32]; - int packet_len; +llvm::Optional +GDBRemoteCommunicationClient::SendSetCurrentThreadPacket(uint64_t tid, + char op) { + lldb_private::StreamString packet; + packet.PutChar('H'); + packet.PutChar(op); if (tid == UINT64_MAX) - packet_len = ::snprintf(packet, sizeof(packet), "Hg-1"); + packet.PutCString("-1"); else - packet_len = ::snprintf(packet, sizeof(packet), "Hg%" PRIx64, tid); - assert(packet_len + 1 < (int)sizeof(packet)); - UNUSED_IF_ASSERT_DISABLED(packet_len); + packet.PutHex64(tid); StringExtractorGDBRemote response; - if (SendPacketAndWaitForResponse(packet, response, false) == + if (SendPacketAndWaitForResponse(packet.GetString(), response, false) == PacketResult::Success) { - if (response.IsOKResponse()) { - m_curr_tid = tid; - return true; - } + if (response.IsOKResponse()) + return tid; /* * Connected bare-iron target (like YAMON gdb-stub) may not have support for @@ -2665,49 +2661,31 @@ bool GDBRemoteCommunicationClient::SetCurrentThread(uint64_t tid) { * The reply from '?' packet could be as simple as 'S05'. There is no packet * which can * give us pid and/or tid. Assume pid=tid=1 in such cases. - */ - if (response.IsUnsupportedResponse() && IsConnected()) { - m_curr_tid = 1; - return true; - } + */ + if (response.IsUnsupportedResponse() && IsConnected()) + return 1; } - return false; + return llvm::None; } -bool GDBRemoteCommunicationClient::SetCurrentThreadForRun(uint64_t tid) { - if (m_curr_tid_run == tid) +bool GDBRemoteCommunicationClient::SetCurrentThread(uint64_t tid) { + if (m_curr_tid == tid) return true; - char packet[32]; - int packet_len; - if (tid == UINT64_MAX) - packet_len = ::snprintf(packet, sizeof(packet), "Hc-1"); - else - packet_len = ::snprintf(packet, sizeof(packet), "Hc%" PRIx64, tid); + llvm::Optional ret = SendSetCurrentThreadPacket(tid, 'g'); + if (ret.hasValue()) + m_curr_tid = ret.getValue(); + return ret.hasValue(); +} - assert(packet_len + 1 < (int)sizeof(packet)); - UNUSED_IF_ASSERT_DISABLED(packet_len); - StringExtractorGDBRemote response; - if (SendPacketAndWaitForResponse(packet, response, false) == - PacketResult::Success) { - if (response.IsOKResponse()) { - m_curr_tid_run = tid; - return true; - } +bool GDBRemoteCommunicationClient::SetCurrentThreadForRun(uint64_t tid) { + if (m_curr_tid_run == tid) + return true; - /* - * Connected bare-iron target (like YAMON gdb-stub) may not have support for - * Hc packet. - * The reply from '?' packet could be as simple as 'S05'. There is no packet - * which can - * give us pid and/or tid. Assume pid=tid=1 in such cases. - */ - if (response.IsUnsupportedResponse() && IsConnected()) { - m_curr_tid_run = 1; - return true; - } - } - return false; + llvm::Optional ret = SendSetCurrentThreadPacket(tid, 'c'); + if (ret.hasValue()) + m_curr_tid_run = ret.getValue(); + return ret.hasValue(); } bool GDBRemoteCommunicationClient::GetStopReply( diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h index fa67a6c69a535..03704dfdd8cf0 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h @@ -336,6 +336,8 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase { // and response times. bool SendSpeedTestPacket(uint32_t send_size, uint32_t recv_size); + llvm::Optional SendSetCurrentThreadPacket(uint64_t tid, char op); + bool SetCurrentThread(uint64_t tid); bool SetCurrentThreadForRun(uint64_t tid); diff --git a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp index b9fc107527a21..45e0356c49486 100644 --- a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp +++ b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp @@ -98,7 +98,7 @@ TEST_F(GDBRemoteCommunicationClientTest, WriteRegisterNoSuffix) { }); Handle_QThreadSuffixSupported(server, false); - HandlePacket(server, "Hg47", "OK"); + HandlePacket(server, "Hg0000000000000047", "OK"); HandlePacket(server, "P4=" + one_register_hex, "OK"); ASSERT_TRUE(write_result.get()); @@ -143,7 +143,7 @@ TEST_F(GDBRemoteCommunicationClientTest, SaveRestoreRegistersNoSuffix) { return client.SaveRegisterState(tid, save_id); }); Handle_QThreadSuffixSupported(server, false); - HandlePacket(server, "Hg47", "OK"); + HandlePacket(server, "Hg0000000000000047", "OK"); HandlePacket(server, "QSaveRegisterState", "1"); ASSERT_TRUE(async_result.get()); EXPECT_EQ(1u, save_id); From 2aecffcd40de99bd3155f57e9c91fe57c3207d39 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Jul 2021 13:41:27 +0100 Subject: [PATCH 517/619] [CostModel][X86] Find AVX conversion costs using legalized types if custom types didn't match Building on rG2a1ef8784ad9a, fallback to attempting to match against legalized types like we do for SSE targets. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 50 +++++++++- llvm/test/Analysis/CostModel/X86/fptosi.ll | 2 +- llvm/test/Analysis/CostModel/X86/sitofp.ll | 32 +++---- llvm/test/Analysis/CostModel/X86/uitofp.ll | 2 +- .../SLPVectorizer/X86/sitofp-inseltpoison.ll | 96 +++++-------------- .../Transforms/SLPVectorizer/X86/sitofp.ll | 96 +++++-------------- 6 files changed, 113 insertions(+), 165 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 265c4d7bcf718..d55cd8a8c7a84 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1955,6 +1955,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, @@ -1966,6 +1968,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 }, @@ -2242,17 +2245,58 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, } // Fall back to legalized types. - // TODO: Add AVX support. std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); - if (ST->hasSSE41() && !ST->hasAVX()) + if (ST->useAVX512Regs()) { + if (ST->hasBWI()) + if (const auto *Entry = ConvertCostTableLookup( + AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second)) + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + + if (ST->hasDQI()) + if (const auto *Entry = ConvertCostTableLookup( + AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second)) + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + + if (ST->hasAVX512()) + if (const auto *Entry = ConvertCostTableLookup( + AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second)) + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + } + + if (ST->hasBWI()) + if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD, + LTDest.second, LTSrc.second)) + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + + if (ST->hasDQI()) + if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD, + LTDest.second, LTSrc.second)) + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + + if (ST->hasAVX512()) + if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, + LTDest.second, LTSrc.second)) + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + + if (ST->hasAVX2()) + if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, + LTDest.second, LTSrc.second)) + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + + if (ST->hasAVX()) + if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, + LTDest.second, LTSrc.second)) + return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); + + if (ST->hasSSE41()) if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, LTDest.second, LTSrc.second)) return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); - if (ST->hasSSE2() && !ST->hasAVX()) + if (ST->hasSSE2()) if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, LTDest.second, LTSrc.second)) return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index fd84c5eafdae3..d92310cb79f8a 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -239,7 +239,7 @@ define i32 @fptosi_float_i32(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fptosi_float_i32' diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index 67d3663624552..b3c400c93b9fe 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -85,21 +85,21 @@ define i32 @sitofp_i32_double() { ; ; AVX1-LABEL: 'sitofp_i32_double' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'sitofp_i32_double' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i32_double' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = sitofp i32 undef to double -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -158,7 +158,7 @@ define i32 @sitofp_i8_float() { ; ; AVX-LABEL: 'sitofp_i8_float' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> @@ -166,7 +166,7 @@ define i32 @sitofp_i8_float() { ; ; AVX512-LABEL: 'sitofp_i8_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i8_f32 = sitofp i8 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> @@ -191,7 +191,7 @@ define i32 @sitofp_i16_float() { ; ; AVX-LABEL: 'sitofp_i16_float' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> @@ -199,7 +199,7 @@ define i32 @sitofp_i16_float() { ; ; AVX512-LABEL: 'sitofp_i16_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i16_f32 = sitofp i16 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> @@ -232,7 +232,7 @@ define i32 @sitofp_i32_float() { ; ; AVX1-LABEL: 'sitofp_i32_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> @@ -240,7 +240,7 @@ define i32 @sitofp_i32_float() { ; ; AVX2-LABEL: 'sitofp_i32_float' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> @@ -248,7 +248,7 @@ define i32 @sitofp_i32_float() { ; ; AVX512-LABEL: 'sitofp_i32_float' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = sitofp i32 undef to float -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> @@ -273,16 +273,16 @@ define i32 @sitofp_i64_float() { ; ; AVX-LABEL: 'sitofp_i64_float' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_float' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float -; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll index 56cac09a5b453..db092ab20f355 100644 --- a/llvm/test/Analysis/CostModel/X86/uitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll @@ -224,7 +224,7 @@ define i32 @uitofp_i32_float() { ; ; AVX1-LABEL: 'uitofp_i32_float' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll index 73710e2d8888e..2e72351858fbc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll @@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 { } define void @sitofp_2i32_2f64() #0 { -; CHECK-LABEL: @sitofp_2i32_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 -; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 -; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double -; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: ret void +; SSE-LABEL: @sitofp_2i32_2f64( +; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double +; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: ret void +; +; AVX-LABEL: @sitofp_2i32_2f64( +; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double> +; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 @@ -632,32 +638,11 @@ define void @sitofp_4i64_4f32() #0 { ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ; SSE-NEXT: ret void ; -; AVX256NODQ-LABEL: @sitofp_4i64_4f32( -; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float -; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float -; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float -; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float -; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; AVX256NODQ-NEXT: ret void -; -; AVX512-LABEL: @sitofp_4i64_4f32( -; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 -; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float> -; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX512-NEXT: ret void -; -; AVX256DQ-LABEL: @sitofp_4i64_4f32( -; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 -; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float> -; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX256DQ-NEXT: ret void +; AVX-LABEL: @sitofp_4i64_4f32( +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float> +; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 @@ -702,44 +687,11 @@ define void @sitofp_8i64_8f32() #0 { ; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 ; SSE-NEXT: ret void ; -; AVX256NODQ-LABEL: @sitofp_8i64_8f32( -; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32 -; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8 -; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16 -; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8 -; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float -; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float -; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float -; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float -; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float -; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float -; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float -; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float -; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 -; AVX256NODQ-NEXT: ret void -; -; AVX512-LABEL: @sitofp_8i64_8f32( -; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 -; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float> -; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 -; AVX512-NEXT: ret void -; -; AVX256DQ-LABEL: @sitofp_8i64_8f32( -; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 -; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float> -; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 -; AVX256DQ-NEXT: ret void +; AVX-LABEL: @sitofp_8i64_8f32( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float> +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll index fc90662061d5b..2109520db81de 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -216,14 +216,20 @@ define void @sitofp_8i64_8f64() #0 { } define void @sitofp_2i32_2f64() #0 { -; CHECK-LABEL: @sitofp_2i32_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 -; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 -; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double -; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: ret void +; SSE-LABEL: @sitofp_2i32_2f64( +; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double +; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: ret void +; +; AVX-LABEL: @sitofp_2i32_2f64( +; AVX-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x double> +; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 @@ -632,32 +638,11 @@ define void @sitofp_4i64_4f32() #0 { ; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ; SSE-NEXT: ret void ; -; AVX256NODQ-LABEL: @sitofp_4i64_4f32( -; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float -; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float -; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float -; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float -; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; AVX256NODQ-NEXT: ret void -; -; AVX512-LABEL: @sitofp_4i64_4f32( -; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 -; AVX512-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float> -; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX512-NEXT: ret void -; -; AVX256DQ-LABEL: @sitofp_4i64_4f32( -; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 -; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float> -; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 -; AVX256DQ-NEXT: ret void +; AVX-LABEL: @sitofp_4i64_4f32( +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i64> [[TMP1]] to <4 x float> +; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 @@ -702,44 +687,11 @@ define void @sitofp_8i64_8f32() #0 { ; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 ; SSE-NEXT: ret void ; -; AVX256NODQ-LABEL: @sitofp_8i64_8f32( -; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 -; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 -; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32 -; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8 -; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16 -; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8 -; AVX256NODQ-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float -; AVX256NODQ-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float -; AVX256NODQ-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float -; AVX256NODQ-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float -; AVX256NODQ-NEXT: [[CVT4:%.*]] = sitofp i64 [[LD4]] to float -; AVX256NODQ-NEXT: [[CVT5:%.*]] = sitofp i64 [[LD5]] to float -; AVX256NODQ-NEXT: [[CVT6:%.*]] = sitofp i64 [[LD6]] to float -; AVX256NODQ-NEXT: [[CVT7:%.*]] = sitofp i64 [[LD7]] to float -; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 -; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 -; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 -; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 -; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 -; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 -; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 -; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 -; AVX256NODQ-NEXT: ret void -; -; AVX512-LABEL: @sitofp_8i64_8f32( -; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 -; AVX512-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float> -; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 -; AVX512-NEXT: ret void -; -; AVX256DQ-LABEL: @sitofp_8i64_8f32( -; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 -; AVX256DQ-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float> -; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 -; AVX256DQ-NEXT: ret void +; AVX-LABEL: @sitofp_8i64_8f32( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 +; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i64> [[TMP1]] to <8 x float> +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 +; AVX-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 From a3ca578eb912b6954002914d50f847f45e699ded Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 2 Jul 2021 13:48:05 +0100 Subject: [PATCH 518/619] [Matrix] Fix crash during fusion if the same load is re-used. This patch fixes a crash when the same load is used for both operands of a fuseable multiply. --- .../Scalar/LowerMatrixIntrinsics.cpp | 2 +- .../LowerMatrixIntrinsics/multiply-fused.ll | 181 ++++++++++++++++++ 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 9ac219df0a0c2..dde918bd0d02d 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -1483,7 +1483,7 @@ class LowerMatrixIntrinsics { FusedInsts.insert(LoadOp0); LoadOp0->eraseFromParent(); } - if (LoadOp1->hasNUses(0)) { + if (LoadOp1 != LoadOp0 && LoadOp1->hasNUses(0)) { FusedInsts.insert(LoadOp1); LoadOp1->eraseFromParent(); } diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll index 9f6048fa714a4..457edfaed06e5 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll @@ -274,4 +274,185 @@ entry: ret void } +; The same load is used for both operands of the multiply. +define void @multiply_reuse_load(<16 x double>* noalias %A, <16 x double>* noalias %B, <16 x double>* noalias %C) { +; CHECK-LABEL: @multiply_reuse_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast <16 x double>* [[A:%.*]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 4 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT11]], <2 x double> [[TMP0]]) +; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT14]] +; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <2 x double> [[COL_LOAD2]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT17]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 8 +; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[TMP4]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD20:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST19]], align 8 +; CHECK-NEXT: [[VEC_GEP21:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 12 +; CHECK-NEXT: [[VEC_CAST22:%.*]] = bitcast double* [[VEC_GEP21]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD23:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST22]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST25:%.*]] = bitcast double* [[TMP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD26:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST25]], align 8 +; CHECK-NEXT: [[VEC_GEP27:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 6 +; CHECK-NEXT: [[VEC_CAST28:%.*]] = bitcast double* [[VEC_GEP27]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD29:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST28]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <2 x double> [[COL_LOAD26]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD20]], <2 x double> [[SPLAT_SPLAT33]], <2 x double> [[TMP1]]) +; CHECK-NEXT: [[SPLAT_SPLAT36:%.*]] = shufflevector <2 x double> [[COL_LOAD26]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD23]], <2 x double> [[SPLAT_SPLAT36]], <2 x double> [[TMP6]]) +; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x double> [[COL_LOAD29]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD20]], <2 x double> [[SPLAT_SPLAT40]], <2 x double> [[TMP3]]) +; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <2 x double> [[COL_LOAD29]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD23]], <2 x double> [[SPLAT_SPLAT43]], <2 x double> [[TMP8]]) +; CHECK-NEXT: [[VEC_CAST45:%.*]] = bitcast <16 x double>* [[C:%.*]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST45]], align 8 +; CHECK-NEXT: [[VEC_GEP46:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 4 +; CHECK-NEXT: [[VEC_CAST47:%.*]] = bitcast double* [[VEC_GEP46]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[VEC_CAST47]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST49:%.*]] = bitcast double* [[TMP10]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD50:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST49]], align 8 +; CHECK-NEXT: [[VEC_GEP51:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 6 +; CHECK-NEXT: [[VEC_CAST52:%.*]] = bitcast double* [[VEC_GEP51]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD53:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST52]], align 8 +; CHECK-NEXT: [[VEC_CAST55:%.*]] = bitcast <16 x double>* [[A]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD56:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST55]], align 8 +; CHECK-NEXT: [[VEC_GEP57:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 4 +; CHECK-NEXT: [[VEC_CAST58:%.*]] = bitcast double* [[VEC_GEP57]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD59:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST58]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <2 x double> [[COL_LOAD56]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = fmul contract <2 x double> [[COL_LOAD50]], [[SPLAT_SPLAT62]] +; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <2 x double> [[COL_LOAD56]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD53]], <2 x double> [[SPLAT_SPLAT65]], <2 x double> [[TMP11]]) +; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <2 x double> [[COL_LOAD59]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = fmul contract <2 x double> [[COL_LOAD50]], [[SPLAT_SPLAT68]] +; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <2 x double> [[COL_LOAD59]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD53]], <2 x double> [[SPLAT_SPLAT71]], <2 x double> [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 10 +; CHECK-NEXT: [[VEC_CAST73:%.*]] = bitcast double* [[TMP15]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD74:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST73]], align 8 +; CHECK-NEXT: [[VEC_GEP75:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 14 +; CHECK-NEXT: [[VEC_CAST76:%.*]] = bitcast double* [[VEC_GEP75]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD77:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST76]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST79:%.*]] = bitcast double* [[TMP16]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD80:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST79]], align 8 +; CHECK-NEXT: [[VEC_GEP81:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 6 +; CHECK-NEXT: [[VEC_CAST82:%.*]] = bitcast double* [[VEC_GEP81]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD83:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST82]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT87:%.*]] = shufflevector <2 x double> [[COL_LOAD80]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD74]], <2 x double> [[SPLAT_SPLAT87]], <2 x double> [[TMP12]]) +; CHECK-NEXT: [[SPLAT_SPLAT90:%.*]] = shufflevector <2 x double> [[COL_LOAD80]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD77]], <2 x double> [[SPLAT_SPLAT90]], <2 x double> [[TMP17]]) +; CHECK-NEXT: [[SPLAT_SPLAT94:%.*]] = shufflevector <2 x double> [[COL_LOAD83]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD74]], <2 x double> [[SPLAT_SPLAT94]], <2 x double> [[TMP14]]) +; CHECK-NEXT: [[SPLAT_SPLAT97:%.*]] = shufflevector <2 x double> [[COL_LOAD83]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD77]], <2 x double> [[SPLAT_SPLAT97]], <2 x double> [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST99:%.*]] = bitcast double* [[TMP21]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[VEC_CAST99]], align 8 +; CHECK-NEXT: [[VEC_GEP100:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 6 +; CHECK-NEXT: [[VEC_CAST101:%.*]] = bitcast double* [[VEC_GEP100]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP20]], <2 x double>* [[VEC_CAST101]], align 8 +; CHECK-NEXT: [[VEC_CAST103:%.*]] = bitcast <16 x double>* [[A]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD104:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST103]], align 8 +; CHECK-NEXT: [[VEC_GEP105:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 4 +; CHECK-NEXT: [[VEC_CAST106:%.*]] = bitcast double* [[VEC_GEP105]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD107:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST106]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 8 +; CHECK-NEXT: [[VEC_CAST109:%.*]] = bitcast double* [[TMP22]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD110:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST109]], align 8 +; CHECK-NEXT: [[VEC_GEP111:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 12 +; CHECK-NEXT: [[VEC_CAST112:%.*]] = bitcast double* [[VEC_GEP111]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD113:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST112]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT116:%.*]] = shufflevector <2 x double> [[COL_LOAD110]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = fmul contract <2 x double> [[COL_LOAD104]], [[SPLAT_SPLAT116]] +; CHECK-NEXT: [[SPLAT_SPLAT119:%.*]] = shufflevector <2 x double> [[COL_LOAD110]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD107]], <2 x double> [[SPLAT_SPLAT119]], <2 x double> [[TMP23]]) +; CHECK-NEXT: [[SPLAT_SPLAT122:%.*]] = shufflevector <2 x double> [[COL_LOAD113]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = fmul contract <2 x double> [[COL_LOAD104]], [[SPLAT_SPLAT122]] +; CHECK-NEXT: [[SPLAT_SPLAT125:%.*]] = shufflevector <2 x double> [[COL_LOAD113]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD107]], <2 x double> [[SPLAT_SPLAT125]], <2 x double> [[TMP25]]) +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 8 +; CHECK-NEXT: [[VEC_CAST127:%.*]] = bitcast double* [[TMP27]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD128:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST127]], align 8 +; CHECK-NEXT: [[VEC_GEP129:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 12 +; CHECK-NEXT: [[VEC_CAST130:%.*]] = bitcast double* [[VEC_GEP129]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD131:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST130]], align 8 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 10 +; CHECK-NEXT: [[VEC_CAST133:%.*]] = bitcast double* [[TMP28]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD134:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST133]], align 8 +; CHECK-NEXT: [[VEC_GEP135:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 14 +; CHECK-NEXT: [[VEC_CAST136:%.*]] = bitcast double* [[VEC_GEP135]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD137:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST136]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT141:%.*]] = shufflevector <2 x double> [[COL_LOAD134]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD128]], <2 x double> [[SPLAT_SPLAT141]], <2 x double> [[TMP24]]) +; CHECK-NEXT: [[SPLAT_SPLAT144:%.*]] = shufflevector <2 x double> [[COL_LOAD134]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD131]], <2 x double> [[SPLAT_SPLAT144]], <2 x double> [[TMP29]]) +; CHECK-NEXT: [[SPLAT_SPLAT148:%.*]] = shufflevector <2 x double> [[COL_LOAD137]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD128]], <2 x double> [[SPLAT_SPLAT148]], <2 x double> [[TMP26]]) +; CHECK-NEXT: [[SPLAT_SPLAT151:%.*]] = shufflevector <2 x double> [[COL_LOAD137]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD131]], <2 x double> [[SPLAT_SPLAT151]], <2 x double> [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 8 +; CHECK-NEXT: [[VEC_CAST153:%.*]] = bitcast double* [[TMP33]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP30]], <2 x double>* [[VEC_CAST153]], align 8 +; CHECK-NEXT: [[VEC_GEP154:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 12 +; CHECK-NEXT: [[VEC_CAST155:%.*]] = bitcast double* [[VEC_GEP154]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP32]], <2 x double>* [[VEC_CAST155]], align 8 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 2 +; CHECK-NEXT: [[VEC_CAST157:%.*]] = bitcast double* [[TMP34]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD158:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST157]], align 8 +; CHECK-NEXT: [[VEC_GEP159:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 6 +; CHECK-NEXT: [[VEC_CAST160:%.*]] = bitcast double* [[VEC_GEP159]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD161:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST160]], align 8 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 8 +; CHECK-NEXT: [[VEC_CAST163:%.*]] = bitcast double* [[TMP35]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD164:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST163]], align 8 +; CHECK-NEXT: [[VEC_GEP165:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 12 +; CHECK-NEXT: [[VEC_CAST166:%.*]] = bitcast double* [[VEC_GEP165]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD167:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST166]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT170:%.*]] = shufflevector <2 x double> [[COL_LOAD164]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = fmul contract <2 x double> [[COL_LOAD158]], [[SPLAT_SPLAT170]] +; CHECK-NEXT: [[SPLAT_SPLAT173:%.*]] = shufflevector <2 x double> [[COL_LOAD164]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD161]], <2 x double> [[SPLAT_SPLAT173]], <2 x double> [[TMP36]]) +; CHECK-NEXT: [[SPLAT_SPLAT176:%.*]] = shufflevector <2 x double> [[COL_LOAD167]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP38:%.*]] = fmul contract <2 x double> [[COL_LOAD158]], [[SPLAT_SPLAT176]] +; CHECK-NEXT: [[SPLAT_SPLAT179:%.*]] = shufflevector <2 x double> [[COL_LOAD167]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD161]], <2 x double> [[SPLAT_SPLAT179]], <2 x double> [[TMP38]]) +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 10 +; CHECK-NEXT: [[VEC_CAST181:%.*]] = bitcast double* [[TMP40]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD182:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST181]], align 8 +; CHECK-NEXT: [[VEC_GEP183:%.*]] = getelementptr <16 x double>, <16 x double>* [[A]], i64 0, i64 14 +; CHECK-NEXT: [[VEC_CAST184:%.*]] = bitcast double* [[VEC_GEP183]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD185:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST184]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT195:%.*]] = shufflevector <2 x double> [[COL_LOAD182]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP41:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD182]], <2 x double> [[SPLAT_SPLAT195]], <2 x double> [[TMP37]]) +; CHECK-NEXT: [[SPLAT_SPLAT198:%.*]] = shufflevector <2 x double> [[COL_LOAD182]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD185]], <2 x double> [[SPLAT_SPLAT198]], <2 x double> [[TMP41]]) +; CHECK-NEXT: [[SPLAT_SPLAT202:%.*]] = shufflevector <2 x double> [[COL_LOAD185]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD182]], <2 x double> [[SPLAT_SPLAT202]], <2 x double> [[TMP39]]) +; CHECK-NEXT: [[SPLAT_SPLAT205:%.*]] = shufflevector <2 x double> [[COL_LOAD185]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD185]], <2 x double> [[SPLAT_SPLAT205]], <2 x double> [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 10 +; CHECK-NEXT: [[VEC_CAST207:%.*]] = bitcast double* [[TMP45]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP42]], <2 x double>* [[VEC_CAST207]], align 8 +; CHECK-NEXT: [[VEC_GEP208:%.*]] = getelementptr <16 x double>, <16 x double>* [[C]], i64 0, i64 14 +; CHECK-NEXT: [[VEC_CAST209:%.*]] = bitcast double* [[VEC_GEP208]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP44]], <2 x double>* [[VEC_CAST209]], align 8 +; CHECK-NEXT: ret void +; +entry: + %a = load <16 x double>, <16 x double>* %A, align 8 + %c = call <16 x double> @llvm.matrix.multiply(<16 x double> %a, <16 x double> %a, i32 4, i32 4, i32 4) + store <16 x double> %c, <16 x double>* %C, align 8 + ret void +} + declare <16 x double> @llvm.matrix.multiply(<16 x double>, <16 x double>, i32, i32, i32) From a92964779cb5fa59e832816b14a30bc8dbf927a9 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 2 Jul 2021 09:05:12 -0400 Subject: [PATCH 519/619] Revert "[InstrProfiling] Use external weak reference for bias variable" This reverts commit 33a7b4d9d8e6a113108aa71ed78ca32a83c68523. Breaks check-profile on macOS, see comments on https://reviews.llvm.org/D105176 --- compiler-rt/lib/profile/CMakeLists.txt | 1 + compiler-rt/lib/profile/InstrProfiling.h | 8 ++++---- compiler-rt/lib/profile/InstrProfilingBiasVar.c | 15 +++++++++++++++ compiler-rt/lib/profile/InstrProfilingFile.c | 4 +--- .../lib/profile/InstrProfilingPlatformFuchsia.c | 7 ++++--- .../Transforms/Instrumentation/InstrProfiling.cpp | 9 --------- .../InstrProfiling/runtime-counter-relocation.ll | 3 +-- 7 files changed, 26 insertions(+), 21 deletions(-) create mode 100644 compiler-rt/lib/profile/InstrProfilingBiasVar.c diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt index f5e13574b7ce8..f899e402d9222 100644 --- a/compiler-rt/lib/profile/CMakeLists.txt +++ b/compiler-rt/lib/profile/CMakeLists.txt @@ -53,6 +53,7 @@ set(PROFILE_SOURCES InstrProfiling.c InstrProfilingInternal.c InstrProfilingValue.c + InstrProfilingBiasVar.c InstrProfilingBuffer.c InstrProfilingFile.c InstrProfilingMerge.c diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h index 1c0e8f3c5c8ca..39fe4db73da62 100644 --- a/compiler-rt/lib/profile/InstrProfiling.h +++ b/compiler-rt/lib/profile/InstrProfiling.h @@ -320,10 +320,10 @@ extern uint64_t INSTR_PROF_RAW_VERSION_VAR; /* __llvm_profile_raw_version */ extern char INSTR_PROF_PROFILE_NAME_VAR[1]; /* __llvm_profile_filename. */ /*! - * This variable is a weak external reference which could be used to detect - * whether or not the compiler defined this symbol. + * This variable is a weak symbol defined in InstrProfilingBiasVar.c. It + * allows compiler instrumentation to provide overriding definition with + * value from compiler command line. This variable has hidden visibility. */ -COMPILER_RT_VISIBILITY COMPILER_RT_WEAK extern intptr_t - __llvm_profile_counter_bias; +COMPILER_RT_VISIBILITY extern intptr_t __llvm_profile_counter_bias; #endif /* PROFILE_INSTRPROFILING_H_ */ diff --git a/compiler-rt/lib/profile/InstrProfilingBiasVar.c b/compiler-rt/lib/profile/InstrProfilingBiasVar.c new file mode 100644 index 0000000000000..05745fd858d97 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingBiasVar.c @@ -0,0 +1,15 @@ +/*===- InstrProfilingBiasVar.c - profile counter bias variable setup ------===*\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ + +#include "InstrProfiling.h" + +/* The runtime should only provide its own definition of this symbol when the + * user has not specified one. Set this up by moving the runtime's copy of this + * symbol to an object file within the archive. + */ +COMPILER_RT_WEAK intptr_t __llvm_profile_counter_bias = -1; diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index d88531cbcb633..420e8246f4337 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -999,9 +999,7 @@ void __llvm_profile_initialize_file(void) { ProfileNameSpecifier PNS = PNS_unknown; int hasCommandLineOverrider = (INSTR_PROF_PROFILE_NAME_VAR[0] != 0); - /* This symbol is defined by the compiler when runtime counter relocation is - * used and runtime provides a weak external reference so we can check it. */ - if (&__llvm_profile_counter_bias) + if (__llvm_profile_counter_bias != -1) lprofSetRuntimeCounterRelocation(1); EnvFilenamePat = getFilenamePatFromEnv(); diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c index 31f3e11a072e1..8bd5e969aa50c 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c @@ -116,9 +116,10 @@ void __llvm_profile_initialize(void) { return; } - /* This symbol is defined by the compiler when runtime counter relocation is - * used and runtime provides a weak external reference so we can check it. */ - if (!&__llvm_profile_counter_bias) { + /* This symbol is defined as weak and initialized to -1 by the runtimer, but + * compiler will generate a strong definition initialized to 0 when runtime + * counter relocation is used. */ + if (__llvm_profile_counter_bias == -1) { lprofWrite("LLVM Profile: counter relocation at runtime is required\n"); return; } diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 9264f83156c55..721f8c034438f 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -690,19 +690,10 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { Type *Int64Ty = Type::getInt64Ty(M->getContext()); GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); if (!Bias) { - // Compiler must define this variable when runtime counter relocation - // is being used. Runtime has a weak external reference that is used - // to check whether that's the case or not. Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); Bias->setVisibility(GlobalVariable::HiddenVisibility); - // A definition that's weak (linkonce_odr) without being in a COMDAT - // section wouldn't lead to link errors, but it would lead to a dead - // data word from every TU but one. Putting it in COMDAT ensures there - // will be exactly one data slot in the link. - if (TT.supportsCOMDAT()) - Bias->setComdat(M->getOrInsertComdat(Bias->getName())); } LI = Builder.CreateLoad(Int64Ty, Bias); } diff --git a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll index cd5d36b8a6e3e..672492474c5ff 100644 --- a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll +++ b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll @@ -4,8 +4,7 @@ target triple = "x86_64-unknown-linux-gnu" @__profn_foo = private constant [3 x i8] c"foo" -; RELOC: $__llvm_profile_counter_bias = comdat any -; RELOC: @__llvm_profile_counter_bias = linkonce_odr hidden global i64 0, comdat +; RELOC: @__llvm_profile_counter_bias = linkonce_odr hidden global i64 0 ; CHECK-LABEL: define void @foo ; CHECK-NEXT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i64 0, i64 0) From 5df556ac8bb8c5f4ef3dff1a2039dd389d1d27c0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 2 Jul 2021 13:05:44 +0000 Subject: [PATCH 520/619] [gn build] Port a92964779cb5 --- llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn index a0bc9b72c3652..5fab007153e49 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn @@ -34,6 +34,7 @@ static_library("profile") { "GCDAProfiling.c", "InstrProfiling.c", "InstrProfiling.h", + "InstrProfilingBiasVar.c", "InstrProfilingBuffer.c", "InstrProfilingFile.c", "InstrProfilingInternal.c", From d181fd918d18cbd99768f025e14a69d35d275f14 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Jul 2021 14:27:27 +0100 Subject: [PATCH 521/619] [CostModel][X86] Drop some hard coded fp<->int scalarization costs Scalarization costs handling is a lot better now, and the hard coded costs were higher than the worse case numbers from the script in D103695 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 13 ------------- llvm/test/Analysis/CostModel/X86/sitofp.ll | 6 +++--- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index d55cd8a8c7a84..9eb5abe4dd9b5 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1977,13 +1977,6 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 6 }, - // The generic code to compute the scalar overhead is currently broken. - // Workaround this limitation by estimating the scalarization overhead - // here. We have roughly 10 instructions per scalar element. - // Multiply that by the vector width. - // FIXME: remove that when PR19268 is fixed. - { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, - { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 }, @@ -2003,12 +1996,6 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 }, { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 9 }, { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 19 }, - // This node is expanded into scalarized operations but BasicTTI is overly - // optimistic estimating its cost. It computes 3 per element (one - // vector-extract, one scalar conversion and one vector-insert). The - // problem is that the inserts form a read-modify-write chain so latency - // should be factored in too. Inflating the cost per element by 1. - { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 }, { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 }, { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 }, diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll index b3c400c93b9fe..b327454c1d090 100644 --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -122,14 +122,14 @@ define i32 @sitofp_i64_double() { ; AVX-LABEL: 'sitofp_i64_double' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_double' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; From d867634fbdb1f1f521981c244c410a9b839c8325 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Jul 2021 14:29:18 +0100 Subject: [PATCH 522/619] [CostModel][X86] Update comment describing source of costs - we now use llvm-mca more than IACA --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9eb5abe4dd9b5..9064cca9da520 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2074,9 +2074,9 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, }; static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { - // These are somewhat magic numbers justified by looking at the output of - // Intel's IACA, running some kernels and making sure when we take - // legalization into account the throughput will be overestimated. + // These are somewhat magic numbers justified by comparing the + // output of llvm-mca for our various supported scheduler models + // and basing it off the worst case scenario. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 3 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 3 }, From e5fdff1cf822e8687ddb5568d14b8a609a5831d0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Jul 2021 14:50:24 +0100 Subject: [PATCH 523/619] [X86][SLM] Keep similar scheduler costs types together. NFCI. The SLM model is inconsistent about where it kept its 'unsupported' schedule classes - better to keep them close to similar classes. I'm not sure why some ymm classes are defined and others are unsupported though (but I haven't altered them) - the only SLM-like CPU supporting any ymm is KNL and that currently uses the HSW model. --- llvm/lib/Target/X86/X86ScheduleSLM.td | 67 +++++++++++++-------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 1f47d1aa404ca..9adc302fa284d 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -111,6 +111,7 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +def : WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -224,6 +225,10 @@ defm : SLMWriteResPair defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -270,6 +275,13 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; // Conversion between integer and float. defm : SLMWriteResPair; @@ -299,6 +311,17 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; + // Vector integer operations. def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 3; } @@ -330,6 +353,10 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; @@ -361,6 +388,9 @@ defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; +defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; @@ -369,6 +399,9 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : X86WriteResPairUnsupported; defm : SLMWriteResPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; // Vector insert/extract operations. defm : SLMWriteResPair; @@ -424,40 +457,6 @@ def : WriteRes { let Latency = 100; } def : WriteRes; def : WriteRes; -// AVX/FMA is not supported on that architecture, but we should define the basic -// scheduling resources anyway. -def : WriteRes; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : SLMWriteResPair; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; - -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResPairUnsupported; -defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; -defm : X86WriteResUnsupported; - // Remaining SLM instrs. def SLMWriteResGroup1rr : SchedWriteRes<[SLM_FPC_RSV01]> { From 7a2c1acd5fef32bdd2e374229f66ab047c7538b5 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 2 Jul 2021 09:56:21 -0400 Subject: [PATCH 524/619] [llvm-dwarfdump] Add comment saying where DumpDebugFrame comes from --- llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index f099d4e45279d..1180e029f2553 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -144,6 +144,7 @@ static std::array, (unsigned)DIDT_ID_Count> #include "llvm/BinaryFormat/Dwarf.def" #undef HANDLE_DWARF_SECTION +// The aliased DumpDebugFrame is created by the Dwarf.def x-macro just above. static alias DumpDebugFrameAlias("eh-frame", desc("Alias for --debug-frame"), NotHidden, cat(SectionCategory), aliasopt(DumpDebugFrame)); From 50566947e98ea845030cfa3b4c199fb9a2052d53 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Jul 2021 15:55:43 +0200 Subject: [PATCH 525/619] [clangd] Fix possible assertion fail in TUScheduler BlockUntilIdle is supposed to return false if it fails. If an intermediate step fails to clear the queue, we shouldn't charge ahead and assert on the state of the queue. --- clang-tools-extra/clangd/TUScheduler.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 09c68a3a250ba..05ce4f9c8272d 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -1380,11 +1380,13 @@ bool ASTWorker::blockUntilIdle(Deadline Timeout) const { }; // Make sure ASTWorker has processed all requests, which might issue new // updates to PreamblePeer. - WaitUntilASTWorkerIsIdle(); + if (WaitUntilASTWorkerIsIdle()) + return false; // Now that ASTWorker processed all requests, ensure PreamblePeer has served // all update requests. This might create new PreambleRequests for the // ASTWorker. - PreamblePeer.blockUntilIdle(Timeout); + if (!PreamblePeer.blockUntilIdle(Timeout)) + return false; assert(Requests.empty() && "No new normal tasks can be scheduled concurrently with " "blockUntilIdle(): ASTWorker isn't threadsafe"); From fff966b6855aee6fc0d0d4cd401cdd525a838572 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Jul 2021 16:07:11 +0200 Subject: [PATCH 526/619] Revert "[clangd] Fix possible assertion fail in TUScheduler" This reverts commit 50566947e98ea845030cfa3b4c199fb9a2052d53. --- clang-tools-extra/clangd/TUScheduler.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 05ce4f9c8272d..09c68a3a250ba 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -1380,13 +1380,11 @@ bool ASTWorker::blockUntilIdle(Deadline Timeout) const { }; // Make sure ASTWorker has processed all requests, which might issue new // updates to PreamblePeer. - if (WaitUntilASTWorkerIsIdle()) - return false; + WaitUntilASTWorkerIsIdle(); // Now that ASTWorker processed all requests, ensure PreamblePeer has served // all update requests. This might create new PreambleRequests for the // ASTWorker. - if (!PreamblePeer.blockUntilIdle(Timeout)) - return false; + PreamblePeer.blockUntilIdle(Timeout); assert(Requests.empty() && "No new normal tasks can be scheduled concurrently with " "blockUntilIdle(): ASTWorker isn't threadsafe"); From 597ccc92ce4b0f90883406d1f78d9d776f602804 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 2 Jul 2021 13:06:37 +0300 Subject: [PATCH 527/619] https://godbolt.org/z/5vhv4K5b8 --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 19 ++++++------- .../SimplifyCFG/trapping-load-unreachable.ll | 28 ++++++------------- 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6288a62326554..8294a79a1c0a1 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4673,17 +4673,13 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { if (BBI->mayHaveSideEffects()) { if (auto *SI = dyn_cast(BBI)) { + // Temporarily disable removal of volatile stores preceding unreachable, + // pending a potential LangRef change permitting volatile stores to + // trap. + // TODO: Either remove this code, or properly integrate the check into + // isGuaranteedToTransferExecutionToSuccessor(). if (SI->isVolatile()) break; - } else if (auto *LI = dyn_cast(BBI)) { - if (LI->isVolatile()) - break; - } else if (auto *RMWI = dyn_cast(BBI)) { - if (RMWI->isVolatile()) - break; - } else if (auto *CXI = dyn_cast(BBI)) { - if (CXI->isVolatile()) - break; } else if (isa(BBI)) { // A catchpad may invoke exception object constructors and such, which // in some languages can be arbitrary code, so be conservative by @@ -4692,8 +4688,9 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) != EHPersonality::CoreCLR) break; - } else if (!isa(BBI) && !isa(BBI) && - !isa(BBI)) { + } else if (!isa(BBI) && !isa(BBI) && + !isa(BBI) && !isa(BBI) && + !isa(BBI) && !isa(BBI)) { break; } // Note that deleting LandingPad's here is in fact okay, although it diff --git a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll index b277cb6cf4f9a..e437f40cbe753 100644 --- a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll +++ b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll @@ -10,11 +10,8 @@ define void @test1(i32 %x) nounwind { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: br i1 [[TMP0]], label [[BB:%.*]], label [[RETURN:%.*]] -; CHECK: bb: -; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, i32* null, align 4 -; CHECK-NEXT: unreachable -; CHECK: return: +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) ; CHECK-NEXT: ret void ; entry: @@ -34,11 +31,8 @@ define void @test1_no_null_opt(i32 %x) nounwind #0 { ; CHECK-LABEL: @test1_no_null_opt( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: br i1 [[TMP0]], label [[BB:%.*]], label [[RETURN:%.*]] -; CHECK: bb: -; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, i32* null, align 4 -; CHECK-NEXT: unreachable -; CHECK: return: +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) ; CHECK-NEXT: ret void ; entry: @@ -127,11 +121,8 @@ F: define void @test5(i1 %C, i32* %P) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] -; CHECK: T: -; CHECK-NEXT: [[TMP0:%.*]] = cmpxchg volatile i32* [[P:%.*]], i32 0, i32 1 seq_cst seq_cst, align 4 -; CHECK-NEXT: unreachable -; CHECK: F: +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[C:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) ; CHECK-NEXT: ret void ; entry: @@ -147,11 +138,8 @@ F: define void @test6(i1 %C, i32* %P) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] -; CHECK: T: -; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw volatile xchg i32* [[P:%.*]], i32 0 seq_cst, align 4 -; CHECK-NEXT: unreachable -; CHECK: F: +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[C:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) ; CHECK-NEXT: ret void ; entry: From d9d65527c289fb27a9f92f150723bbb3c58e413f Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 2 Jul 2021 17:16:33 +0300 Subject: [PATCH 528/619] [NFCI][InstCombine] visitUnreachableInst(): iteratively erase instructions leading to unreachable In the original review D87149 it was mentioned that this approach was tried, and it lead to infinite combine loops, but i'm not seeing anything like that now, neither in the `check-llvm`, nor on some codebases i tried. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D105339 --- .../Transforms/InstCombine/InstructionCombining.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 8f75a7eac6f95..d29527f3a0dcb 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2874,23 +2874,24 @@ Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { // Try to remove the previous instruction if it must lead to unreachable. // This includes instructions like stores and "llvm.assume" that may not get // removed by simple dead code elimination. - Instruction *Prev = I.getPrevNonDebugInstruction(); - if (Prev && !Prev->isEHPad() && - isGuaranteedToTransferExecutionToSuccessor(Prev)) { + while (Instruction *Prev = I.getPrevNonDebugInstruction()) { + if (Prev->isEHPad() || !isGuaranteedToTransferExecutionToSuccessor(Prev)) + return nullptr; // Can not drop any more instructions. We're done here. // Temporarily disable removal of volatile stores preceding unreachable, // pending a potential LangRef change permitting volatile stores to trap. // TODO: Either remove this code, or properly integrate the check into // isGuaranteedToTransferExecutionToSuccessor(). if (auto *SI = dyn_cast(Prev)) if (SI->isVolatile()) - return nullptr; + return nullptr; // Can not drop this instruction. We're done here. // A value may still have uses before we process it here (for example, in // another unreachable block), so convert those to poison. replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType())); eraseInstFromFunction(*Prev); - return &I; } + assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty."); + // FIXME: recurse into unconditional predecessors? return nullptr; } From 93a1642763c9105f0e7d8388dcd7dfc6e2bae630 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 2 Jul 2021 17:17:47 +0300 Subject: [PATCH 529/619] Revert "[NFCI][InstCombine] visitUnreachableInst(): iteratively erase instructions leading to unreachable" This reverts commit d9d65527c289fb27a9f92f150723bbb3c58e413f. --- .../Transforms/InstCombine/InstructionCombining.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index d29527f3a0dcb..8f75a7eac6f95 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2874,24 +2874,23 @@ Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { // Try to remove the previous instruction if it must lead to unreachable. // This includes instructions like stores and "llvm.assume" that may not get // removed by simple dead code elimination. - while (Instruction *Prev = I.getPrevNonDebugInstruction()) { - if (Prev->isEHPad() || !isGuaranteedToTransferExecutionToSuccessor(Prev)) - return nullptr; // Can not drop any more instructions. We're done here. + Instruction *Prev = I.getPrevNonDebugInstruction(); + if (Prev && !Prev->isEHPad() && + isGuaranteedToTransferExecutionToSuccessor(Prev)) { // Temporarily disable removal of volatile stores preceding unreachable, // pending a potential LangRef change permitting volatile stores to trap. // TODO: Either remove this code, or properly integrate the check into // isGuaranteedToTransferExecutionToSuccessor(). if (auto *SI = dyn_cast(Prev)) if (SI->isVolatile()) - return nullptr; // Can not drop this instruction. We're done here. + return nullptr; // A value may still have uses before we process it here (for example, in // another unreachable block), so convert those to poison. replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType())); eraseInstFromFunction(*Prev); + return &I; } - assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty."); - // FIXME: recurse into unconditional predecessors? return nullptr; } From 24d271bb18bfdb314762ceebc10b57a9c50ed506 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 2 Jul 2021 17:17:55 +0300 Subject: [PATCH 530/619] Revert "https://godbolt.org/z/5vhv4K5b8" This reverts commit 597ccc92ce4b0f90883406d1f78d9d776f602804. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 19 +++++++------ .../SimplifyCFG/trapping-load-unreachable.ll | 28 +++++++++++++------ 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 8294a79a1c0a1..6288a62326554 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4673,13 +4673,17 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { if (BBI->mayHaveSideEffects()) { if (auto *SI = dyn_cast(BBI)) { - // Temporarily disable removal of volatile stores preceding unreachable, - // pending a potential LangRef change permitting volatile stores to - // trap. - // TODO: Either remove this code, or properly integrate the check into - // isGuaranteedToTransferExecutionToSuccessor(). if (SI->isVolatile()) break; + } else if (auto *LI = dyn_cast(BBI)) { + if (LI->isVolatile()) + break; + } else if (auto *RMWI = dyn_cast(BBI)) { + if (RMWI->isVolatile()) + break; + } else if (auto *CXI = dyn_cast(BBI)) { + if (CXI->isVolatile()) + break; } else if (isa(BBI)) { // A catchpad may invoke exception object constructors and such, which // in some languages can be arbitrary code, so be conservative by @@ -4688,9 +4692,8 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) != EHPersonality::CoreCLR) break; - } else if (!isa(BBI) && !isa(BBI) && - !isa(BBI) && !isa(BBI) && - !isa(BBI) && !isa(BBI)) { + } else if (!isa(BBI) && !isa(BBI) && + !isa(BBI)) { break; } // Note that deleting LandingPad's here is in fact okay, although it diff --git a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll index e437f40cbe753..b277cb6cf4f9a 100644 --- a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll +++ b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll @@ -10,8 +10,11 @@ define void @test1(i32 %x) nounwind { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: br i1 [[TMP0]], label [[BB:%.*]], label [[RETURN:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, i32* null, align 4 +; CHECK-NEXT: unreachable +; CHECK: return: ; CHECK-NEXT: ret void ; entry: @@ -31,8 +34,11 @@ define void @test1_no_null_opt(i32 %x) nounwind #0 { ; CHECK-LABEL: @test1_no_null_opt( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: br i1 [[TMP0]], label [[BB:%.*]], label [[RETURN:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, i32* null, align 4 +; CHECK-NEXT: unreachable +; CHECK: return: ; CHECK-NEXT: ret void ; entry: @@ -121,8 +127,11 @@ F: define void @test5(i1 %C, i32* %P) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[C:%.*]], true -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[TMP0:%.*]] = cmpxchg volatile i32* [[P:%.*]], i32 0, i32 1 seq_cst seq_cst, align 4 +; CHECK-NEXT: unreachable +; CHECK: F: ; CHECK-NEXT: ret void ; entry: @@ -138,8 +147,11 @@ F: define void @test6(i1 %C, i32* %P) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[C:%.*]], true -; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) +; CHECK-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw volatile xchg i32* [[P:%.*]], i32 0 seq_cst, align 4 +; CHECK-NEXT: unreachable +; CHECK: F: ; CHECK-NEXT: ret void ; entry: From dadedc99e9b276cfd0f2ebf9eb553650b07b4ca4 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 2 Jul 2021 17:18:48 +0300 Subject: [PATCH 531/619] [InstCombine] visitUnreachableInst(): iteratively erase instructions leading to unreachable In the original review D87149 it was mentioned that this approach was tried, and it lead to infinite combine loops, but i'm not seeing anything like that now, neither in the `check-llvm`, nor on some codebases i tried. This is a recommit of d9d65527c289fb27a9f92f150723bbb3c58e413f, which i immediately reverted because i have messed up something during branch switch, and 597ccc92ce4b0f90883406d1f78d9d776f602804 accidentally ended up being pushed, which was very much not the intention. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D105339 --- .../Transforms/InstCombine/InstructionCombining.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 8f75a7eac6f95..d29527f3a0dcb 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2874,23 +2874,24 @@ Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { // Try to remove the previous instruction if it must lead to unreachable. // This includes instructions like stores and "llvm.assume" that may not get // removed by simple dead code elimination. - Instruction *Prev = I.getPrevNonDebugInstruction(); - if (Prev && !Prev->isEHPad() && - isGuaranteedToTransferExecutionToSuccessor(Prev)) { + while (Instruction *Prev = I.getPrevNonDebugInstruction()) { + if (Prev->isEHPad() || !isGuaranteedToTransferExecutionToSuccessor(Prev)) + return nullptr; // Can not drop any more instructions. We're done here. // Temporarily disable removal of volatile stores preceding unreachable, // pending a potential LangRef change permitting volatile stores to trap. // TODO: Either remove this code, or properly integrate the check into // isGuaranteedToTransferExecutionToSuccessor(). if (auto *SI = dyn_cast(Prev)) if (SI->isVolatile()) - return nullptr; + return nullptr; // Can not drop this instruction. We're done here. // A value may still have uses before we process it here (for example, in // another unreachable block), so convert those to poison. replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType())); eraseInstFromFunction(*Prev); - return &I; } + assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty."); + // FIXME: recurse into unconditional predecessors? return nullptr; } From ad0050c6073d8b9a6cbc9ab94c75fc5ba30051fd Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 2 Jul 2021 07:41:22 +0000 Subject: [PATCH 532/619] [mlir][Linalg] Add comprehensive bufferization support for TiledLoopOp (14/n) Differential Revision: https://reviews.llvm.org/D105335 --- .../Transforms/ComprehensiveBufferize.cpp | 287 ++++++++++++++---- ...omprehensive-module-bufferize-invalid.mlir | 24 +- .../comprehensive-module-bufferize.mlir | 57 ++++ 3 files changed, 303 insertions(+), 65 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp index 178676c5e4b7b..ad296ff8c199e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -296,13 +296,13 @@ static InPlaceSpec getInPlace(BlockArgument bbArg) { return InPlaceSpec::None; return inplaceAttr.getValue() ? InPlaceSpec::True : InPlaceSpec::False; } - // Interestingly, scf::ForOp's bbArg can **always** be viewed inplace from the - // perspective of ops nested under it: + // Interestingly, scf::ForOp's and TiledLoop's bbArg can **always** be viewed + // inplace from the perspective of ops nested under: // 1. Either the matching iter operand is not bufferized inplace and an // alloc + optional copy makes the bbArg itself inplaceable. // 2. Or the matching iter operand is bufferized inplace and bbArg just // bufferizes to that too. - if (auto forOp = dyn_cast(bbArg.getOwner()->getParentOp())) + if (isa(bbArg.getOwner()->getParentOp())) return InPlaceSpec::True; // Unknown cases. return InPlaceSpec::None; @@ -359,19 +359,28 @@ static bool hasKnownBufferizationAliasingBehavior(Operation *op) { isa(op) // clang-format on || (none_of(op->getResultTypes(), isaTensor) && none_of(op->getOperandTypes(), isaTensor)); } +/// Return the OpResult that may bufferize into the same buffer as `opOperand` +/// when the op is bufferized inplace. +/// Return null if no such result exists. +static OpResult getInplaceableOpResult(TiledLoopOp op, OpOperand &opOperand) { + return op.getTiedOpResult(opOperand); +} + /// Return the OpResult that may bufferize into the same buffer as `opOperand` /// when the op is bufferized inplace. /// Return null if no such result exists. @@ -441,8 +450,9 @@ static OpResult getInplaceableOpResult(OpOperand &opOperand) { // result(s). .Case( [&](auto op) { return getInplaceableOpResult(op, opOperand); }) // ExtractSliceOp is special, when bufferized inplace it just returns an @@ -469,18 +479,23 @@ static Optional getAliasingOpOperand(OpResult result) { return TypeSwitch(result.getDefiningOp()) .Case([&](tensor::CastOp op) { return &op->getOpOperand(0); }) .Case([&](ConstantOp op) { return &op->getOpOperand(0); }) - .Case([&](LinalgOp op) { - return op.getOutputTensorOperands()[result.getResultNumber()]; - }) .Case([&](ExtractSliceOp op) { return &op->getOpOperand(0); }) - .Case([&](InsertSliceOp op) { return &op->getOpOperand(1); }) - .Case([&](vector::TransferWriteOp op) { return &op->getOpOperand(1); }) // In the case of scf::ForOp, this currently assumes the iter_args / yield // are 1-1. This may fail and is verified at the end. // TODO: update this. .Case([&](scf::ForOp op) { return &op.getIterOpOperands()[result.getResultNumber()]; }) + .Case([&](InsertSliceOp op) { return &op->getOpOperand(1); }) + .Case([&](LinalgOp op) { + return op.getOutputTensorOperands()[result.getResultNumber()]; + }) + .Case([&](TiledLoopOp op) { + // TODO: TiledLoopOp helper method to avoid leaking impl details. + return &op->getOpOperand(op.getNumControlOperands() + + op.getNumInputs() + result.getResultNumber()); + }) + .Case([&](vector::TransferWriteOp op) { return &op->getOpOperand(1); }) .Default([&](Operation *op) { op->dump(); llvm_unreachable("unexpected defining op"); @@ -528,6 +543,10 @@ static bool bufferizesToMemoryRead(OpOperand &opOperand) { // matching bbArg may. if (isa(opOperand.getOwner())) return false; + // TiledLoop alone doesn't bufferize to a memory read, one of the uses of its + // matching bbArg may. + if (isa(opOperand.getOwner())) + return false; // CallOpInterface alone doesn't bufferize to a memory read, one of the uses // of the matching bbArg may. It is the responsibility of the caller to // inspect bbArgs. In the absence of a BufferizationAliasInfo, we need to be @@ -1340,11 +1359,10 @@ createNewAllocDeallocPairForShapedValue(OpBuilder &b, Location loc, /// When allocating a new buffer, analyze whether `op` want to read form that /// buffer. In such a case, insert a copy to ensure the newly allocated buffer /// is properly initialiazed. -static LogicalResult -allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op, - SmallVectorImpl &resultBuffers, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo) { +static void allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op, + SmallVectorImpl &resultBuffers, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo) { // Take a guard before anything else. OpBuilder::InsertionGuard g(b); @@ -1360,8 +1378,7 @@ allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op, OpResult opResult = getInplaceableOpResult(*opOperand); if (getInPlace(opResult) == InPlaceSpec::True) { Value v = lookup(bvm, output); - if (!v) - return failure(); + assert(v && "missing buffer"); resultBuffers.push_back(v); continue; } @@ -1375,17 +1392,13 @@ allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op, // Additionally, if the output buffer is used, clone its value for now. if (op.payloadUsesValueFromOperand(opOperand)) { - if (Value v = lookup(bvm, output)) - b.create(loc, v, alloc); - else - return failure(); + Value v = lookup(bvm, output); + b.create(loc, v, alloc); } } if (op->getNumResults()) map(bvm, op->getResults(), resultBuffers); - - return success(); } /// Generic conversion for any LinalgOp on tensors. @@ -1398,7 +1411,7 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op, // Ensure op has only tensors. Allow mixed tensor-buffer mode on a per-need // basis. if (!op.hasTensorSemantics()) - return failure(); + return op->emitError() << "op does not have tensor semantics"; b.setInsertionPoint(op); Location loc = op.getLoc(); @@ -1410,14 +1423,11 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op, continue; } newInputBuffers.push_back(lookup(bvm, opOperand->get())); - if (!newInputBuffers.back()) - return failure(); + assert(newInputBuffers.back() && "missing buffer"); } SmallVector newOutputBuffers; // Try to allocate new buffers depending on op's inplace semantics. - if (failed(allocateBuffersForResults(b, loc, op, newOutputBuffers, bvm, - aliasInfo))) - return failure(); + allocateBuffersForResults(b, loc, op, newOutputBuffers, bvm, aliasInfo); // Clone the newly bufferized op. SmallVector newOperands = newInputBuffers; @@ -1608,8 +1618,8 @@ static LogicalResult bufferize(OpBuilder &b, ConstantOp constantOp, BlockAndValueMapping &bvm, BufferizationAliasInfo &aliasInfo, GlobalCreator &globalCreator) { - if (!constantOp.getType().dyn_cast()) - return failure(); + assert(constantOp.getType().dyn_cast() && + "not a constant ranked tensor"); // Take a guard before anything else. OpBuilder::InsertionGuard g(b); @@ -1629,11 +1639,15 @@ static LogicalResult bufferize(OpBuilder &b, ConstantOp constantOp, static LogicalResult bufferize(OpBuilder &b, tensor::DimOp dimOp, BlockAndValueMapping &bvm, BufferizationAliasInfo &aliasInfo) { + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(dimOp); + if (dimOp.source().getType().isa()) { Value v = lookup(bvm, dimOp.source()); - if (!v) - return failure(); - dimOp.sourceMutable().assign(v); + assert(v && "missing buffer"); + dimOp.result().replaceAllUsesWith( + b.create(dimOp.getLoc(), v, dimOp.index())); } return success(); } @@ -1649,10 +1663,12 @@ static LogicalResult bufferize(OpBuilder &b, scf::ForOp forOp, // Otherwise alloc and copy. b.setInsertionPoint(forOp); for (OpResult opResult : forOp->getResults()) { + if (!opResult.getType().isa()) + continue; // TODO: Atm we bail on unranked TensorType because we don't know how to // alloc an UnrankedMemRefType + its underlying ranked MemRefType. - if (!opResult.getType().isa()) - return failure(); + assert(opResult.getType().isa() && + "unsupported unranked tensor"); OpOperand &opOperand = forOp.getOpOperandForResult(opResult); Value operand = opOperand.get(); Value operandBuffer = lookup(bvm, operand); @@ -1730,8 +1746,7 @@ static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp, if (!tensorType) continue; Value v = lookup(bvm, operand.get()); - if (!v) - return failure(); + assert(v && "missing buffer for result"); Value returnTensor = b.create(returnOp.getLoc(), v); operand.set(returnTensor); aliasInfo.insertNewBufferEquivalence(returnTensor, v); @@ -1740,6 +1755,135 @@ static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp, return success(); } +/// Bufferization for TiledLoopOp.. +static LogicalResult bufferize(OpBuilder &b, TiledLoopOp tiledLoopOp, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo) { + // Allocate output buffers if needed, forward output tensor args to the + // terminator. + Operation *yieldOp = tiledLoopOp.getBody()->getTerminator(); + Block *body = tiledLoopOp.getBody(); + + // Take copies of the old input and output operands, so we can insert inplace + // easily. + auto oldInputs = llvm::to_vector<4>(tiledLoopOp.inputs()); + auto oldOutputs = llvm::to_vector<4>(tiledLoopOp.outputs()); + + int numLoops = tiledLoopOp.getNumLoops(); + int numControlOperands = tiledLoopOp.getNumControlOperands(); + + // Add buffers for outputs and the corresponding block arguments. + // Keep separate iterators to increment without further leaking impl. details. + // Start with outputs to avoid interference from new input buffers. + int numNewOutputBuffers = 0; + int resultIndex = 0; + int oldOutputBBArgIndex = numLoops + oldInputs.size(); + int nextOutputBBArgIndex = numLoops + oldInputs.size() + oldOutputs.size(); + int nextOutputOperandIndex = + numControlOperands + oldInputs.size() + oldOutputs.size(); + for (Value oldOutputTensor : oldOutputs) { + if (!oldOutputTensor.getType().isa()) { + // Skip and increment the old bbarg index only. + ++oldOutputBBArgIndex; + // Do not increment resultIndex as only tensors are returned. + // TODO: better interface to avoid leaking such impl details. + continue; + } + + assert(oldOutputTensor.getType().isa() && + "bufferizable output must be a ranked tensor"); + + Value outputBuffer = lookup(bvm, oldOutputTensor); + const OpResult &opResult = tiledLoopOp->getResult(resultIndex); + OpOperand &yieldOperand = yieldOp->getOpOperand(resultIndex); + // If the result is not inplaceable, need to allocate a copy for it. + if (getInPlace(opResult) != InPlaceSpec::True) { + auto loc = tiledLoopOp.getLoc(); + Value alloc = createNewAllocDeallocPairForShapedValue( + b, loc, oldOutputTensor, aliasInfo); + // If the tensor comes from `linalg::InitTensorOp`, the value is + // unitialized and we do not need to copy. + // TODO: "matching bbArg does not bufferize to a read" is a more general + // check. + if (!oldOutputTensor.getDefiningOp()) { + b.setInsertionPointAfter(alloc.getDefiningOp()); + b.create(loc, outputBuffer, alloc); + } + outputBuffer = alloc; + } + // Insert mapping and aliasing info. + aliasInfo.createAliasInfoEntry(outputBuffer); + aliasInfo.insertNewBufferEquivalence(opResult, outputBuffer); + map(bvm, opResult, outputBuffer); + + // Insert new operand and bbArg. + tiledLoopOp->insertOperands(nextOutputOperandIndex, outputBuffer); + BlockArgument newBufferBBArg = + body->insertArgument(nextOutputBBArgIndex, outputBuffer.getType()); + BlockArgument oldTensorBBArg = body->getArgument(oldOutputBBArgIndex); + // Insert mapping and aliasing info. + aliasInfo.createAliasInfoEntry(newBufferBBArg); + aliasInfo.insertNewBufferEquivalence(oldTensorBBArg, newBufferBBArg); + map(bvm, oldTensorBBArg, newBufferBBArg); + + // Set operand of `linalg.yield` to the bbArg so it just canonicalizes away + // later. + yieldOperand.set(oldTensorBBArg); + + // Increment indices. + ++numNewOutputBuffers; + ++resultIndex; + ++oldOutputBBArgIndex; + ++nextOutputBBArgIndex; + ++nextOutputOperandIndex; + } + + // Add buffers for inputs and the corresponding block arguments. + // Keep separate iterators to increment without further leaking impl. details. + int numNewInputBuffers = 0; + int oldInputBBArgIndex = numLoops; + int nextInputBBArgIndex = numLoops + oldInputs.size(); + int nextInputOperandIndex = numControlOperands + oldInputs.size(); + for (Value oldInputTensor : oldInputs) { + if (!oldInputTensor.getType().isa()) { + // Skip and increment the old bbarg index only. + ++oldInputBBArgIndex; + continue; + } + + Value inputBuffer = lookup(bvm, oldInputTensor); + assert(inputBuffer && " missing buffer for operand"); + + // Insert new operand and bbArg. + tiledLoopOp->insertOperands(nextInputOperandIndex, inputBuffer); + BlockArgument newBufferBBArg = + body->insertArgument(nextInputBBArgIndex, inputBuffer.getType()); + BlockArgument oldTensorBBArg = body->getArgument(oldInputBBArgIndex); + + // Insert mapping and aliasing info. + aliasInfo.createAliasInfoEntry(newBufferBBArg); + aliasInfo.insertNewBufferEquivalence(oldTensorBBArg, newBufferBBArg); + map(bvm, oldTensorBBArg, newBufferBBArg); + + // Increment indices. + ++numNewInputBuffers; + ++oldInputBBArgIndex; + ++nextInputBBArgIndex; + ++nextInputOperandIndex; + } + + // Update segment sizes. + // TODO: Helper method to avoid leaking impl details. + tiledLoopOp->setAttr( + TiledLoopOp::getOperandSegmentSizeAttr(), + b.getI32VectorAttr( + {numLoops, numLoops, numLoops, + static_cast(oldInputs.size()) + numNewInputBuffers, + static_cast(oldOutputs.size()) + numNewOutputBuffers})); + + return success(); +} + /// Bufferize ExtractSliceOp to subview with optional alloc + copy depending on /// whether or not it is marked inplaceable. /// Note that `getInplaceableOpResult` on a ExtractSliceOp always returns null. @@ -1871,8 +2015,7 @@ static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op, /// op.source(). if (auto readOp = dyn_cast(op.getOperation())) { Value v = lookup(bvm, op.source()); - if (!v) - return failure(); + assert(v && "missing buffer"); readOp.sourceMutable().assign(v); return success(); } @@ -1891,8 +2034,7 @@ static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op, // InPlace write will result in memref.tensor_load(x) which must // canonicalize away with one of it uses. newInputBuffer = lookup(bvm, writeOp.source()); - if (!newInputBuffer) - return failure(); + assert(newInputBuffer && "missing buffer"); } // Create a new transfer_write on buffer that doesn't have a return value. @@ -1933,6 +2075,22 @@ static LogicalResult bufferize(OpBuilder &b, scf::YieldOp yieldOp, return success(); } +/// Bufferization for linalg::YieldOp either does not involve tensors or just +/// results in later canonicalization. In either case it does nothing. +static LogicalResult bufferize(OpBuilder &b, linalg::YieldOp yieldOp, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo) { + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(yieldOp); + // No tensors -> success. + if (!llvm::any_of(yieldOp.getOperandTypes(), isaTensor)) + return success(); + // linalg::YieldOp nested under TiledLoop must just canonicalize. + if (yieldOp->getParentOfType()) + return success(); + llvm_unreachable("unexpected yieldOp"); +} //===----------------------------------------------------------------------===// // Bufferization analyses. //===----------------------------------------------------------------------===// @@ -2043,7 +2201,7 @@ bufferizationSanityCheck(scf::YieldOp yieldOp, const BufferizationAliasInfo &aliasInfo) { auto parentForOp = yieldOp->getParentOfType(); if (!parentForOp) - return failure(); + return yieldOp->emitError() << "not nested under ForOp"; for (OpOperand &operand : yieldOp->getOpOperands()) { OpResult matchingForOpResult = @@ -2057,11 +2215,10 @@ bufferizationSanityCheck(scf::YieldOp yieldOp, parentForOp.getRegionIterArgForOpOperand(machingForOpOperand); if (!aliasInfo.areEquivalentBufferizedValues(matchingForOpIterArg, operand.get())) { - yieldOp->emitError() - << "Yield operand #" << operand.getOperandNumber() - << " does not bufferize to an equivalent buffer to the matching" - << " enclosing scf::for operand -> Fail the pass\n"; - return failure(); + return yieldOp->emitError() + << "Yield operand #" << operand.getOperandNumber() + << " does not bufferize to an equivalent buffer to the matching" + << " enclosing scf::for operand -> Fail the pass\n"; } } @@ -2150,10 +2307,10 @@ static LogicalResult bufferizeFuncOpInternals( // Walk in PreOrder to ensure ops with regions are handled before their body. // Since walk has to be PreOrder, we need to erase ops that require it // separately: this is the case for CallOp + // clang-format off SmallVector toErase; - WalkResult result = - funcOp.walk([&](Operation *op) -> WalkResult { - // clang-format off + WalkResult result = funcOp.walk([&](Operation *op) + -> WalkResult { WalkResult result = TypeSwitch(op) // Skip BufferCast and TensorLoad ops. @@ -2161,13 +2318,15 @@ static LogicalResult bufferizeFuncOpInternals( memref::TensorLoadOp>([&](auto) { return success(); }) .Case([&](auto op) { LDBG("Begin bufferize:\n" << op << '\n'); return bufferize(b, op, bvm, aliasInfo); @@ -2182,23 +2341,23 @@ static LogicalResult bufferizeFuncOpInternals( LDBG("Begin bufferize:\n" << op << '\n'); return bufferize(b, op, bvm, aliasInfo, globalCreator); }) - .Default([&](Operation *op) { + .Default([&](Operation *op) -> LogicalResult { auto isaTensor = [](Type t) { return t.isa(); }; if (any_of(op->getOperandTypes(), isaTensor) || any_of(op->getResultTypes(), isaTensor)) - return failure(); + return op->emitError() << "unsupported op with tensors"; return success(); }); - // clang-format on - // Register post-walk erasure, if necessary. - if (isa(op)) - if (llvm::any_of(op->getOperandTypes(), isaTensor) || - llvm::any_of(op->getResultTypes(), isaTensor)) - toErase.push_back(op); + // Register post-walk erasure, if necessary. + if (isa(op)) + if (llvm::any_of(op->getOperandTypes(), isaTensor) || + llvm::any_of(op->getResultTypes(), isaTensor)) + toErase.push_back(op); - return result; - }); + return result; + }); + // clang-format on LDBG("End BufferizeFuncOpInternals:\n" << funcOp << '\n'); for (Operation *op : toErase) diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir index 78f84cc8540c4..d8257dd172c63 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-invalid.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize -split-input-file -verify-diagnostics +// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize -split-input-file -verify-diagnostics func private @foo() -> tensor @@ -85,3 +85,25 @@ func @extract_slice_fun(%A : tensor {linalg.inplaceable = true}) // expected-error @+1 {{buffer result #0 not produced by an alloc}} return %r0: tensor<4xf32> } + +// ----- + +func @scf_yield(%b : i1, %A : tensor<4xf32>, %B : tensor<4xf32>) -> tensor<4xf32> +{ + %r = scf.if %b -> (tensor<4xf32>) { + // expected-error @+1 {{not nested under ForOp}} + scf.yield %A : tensor<4xf32> + } else { + scf.yield %B : tensor<4xf32> + } + return %r: tensor<4xf32> +} + +// ----- + +func @unknown_op(%A : tensor<4xf32>) -> tensor<4xf32> +{ + // expected-error @+1 {{unsupported op with tensors}} + %r = "marklar"(%A) : (tensor<4xf32>) -> (tensor<4xf32>) + return %r: tensor<4xf32> +} diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir index f7f221b2b77fb..b29cf6e81f92c 100644 --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir @@ -498,3 +498,60 @@ func @main() { // CHECK: func private @print_memref_f32(memref<*xf32>) func private @print_memref_f32(tensor<*xf32>) + +// ----- + +func private @some_use(memref) + +#TILE_MAP = affine_map<(d0)[s0] -> (3, -d0 + s0)> + +// CHECK-DAG: #[[$DYN_0D_MAP:.*]] = affine_map<()[s0] -> (s0)> +// CHECK-DAG: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> +// CHECK-DAG: #[[$TILE_MAP:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> + +// CHECK: func @tiled_dot( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[c:[a-zA-Z0-9]*]]: memref +func @tiled_dot(%A: tensor, %B: tensor, %c: tensor {linalg.inplaceable = true}, + %effecting: memref) -> tensor { + %c3 = constant 3 : index + %c0 = constant 0 : index + + // CHECK: %[[M:.*]] = memref.dim %[[A]], {{.*}} : memref + %0 = tensor.dim %A, %c0 : tensor + + // CHECK: linalg.tiled_loop {{.*}} to (%[[M]]) {{.*}} %[[A]]{{.*}}%[[B]]{{.*}}outs{{.*}}%[[c]] + %1 = linalg.tiled_loop (%arg3) = (%c0) to (%0) step (%c3) + ins (%arg4 = %A: tensor, %use = %effecting : memref, %arg5 = %B: tensor) + outs (%arg6 = %c: tensor) + iterators["reduction"] + { + // CHECK-NOT: alloc + + %2 = tensor.dim %arg4, %c0 : tensor + %3 = affine.min #TILE_MAP(%arg3)[%2] + + // CHECK: %[[SV_A:.*]] = memref.subview {{.*}} + %4 = tensor.extract_slice %arg4[%arg3] [%3] [1] : tensor to tensor + %5 = tensor.dim %arg5, %c0 : tensor + %6 = affine.min #TILE_MAP(%arg3)[%5] + + // CHECK: %[[SV_B:.*]] = memref.subview {{.*}} + %7 = tensor.extract_slice %arg5[%arg3] [%6] [1] : tensor to tensor + + // CHECK: linalg.dot ins(%[[SV_A]], %[[SV_B]] : memref, memref) outs(%{{.*}} : memref) + %8 = linalg.dot ins(%4, %7 : tensor, tensor) outs(%arg6 : tensor) -> tensor + + // CHECK: call @some_use(%{{.*}}) : (memref) -> () + call @some_use(%use) : (memref) -> () + + linalg.yield %8 : tensor + // CHECK: linalg.yield + // CHECK-NOT: tensor + } + + // CHECK: return + // CHECK-NOT: tensor + return %1 : tensor +} From 33ff8078ff744cb317ec8806c990a52d33310834 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Jul 2021 16:26:01 +0200 Subject: [PATCH 533/619] Revert "[clangd] Unbreak mac build differently 0c96a92d8666b8" This reverts commit 2f79acb7b701c41494abff588b5f03a74ea2e11d. Should no longer be needed after 26e1553a107f52667be879e99739a4153f8799d8 --- clang-tools-extra/clangd/Transport.h | 1 + clang-tools-extra/clangd/tool/ClangdMain.cpp | 4 +--- llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/Transport.h b/clang-tools-extra/clangd/Transport.h index ae6da722d91b1..b3db4eba85f93 100644 --- a/clang-tools-extra/clangd/Transport.h +++ b/clang-tools-extra/clangd/Transport.h @@ -18,6 +18,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRANSPORT_H_ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_TRANSPORT_H_ +#include "Features.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index c03dd927970d4..8db52c65061c8 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -6,13 +6,11 @@ // //===----------------------------------------------------------------------===// -// Must be before Transport.h include. -#include "Features.h" - #include "ClangdLSPServer.h" #include "CodeComplete.h" #include "Config.h" #include "ConfigProvider.h" +#include "Features.h" #include "PathMapping.h" #include "Protocol.h" #include "TidyProvider.h" diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn index 0d375392ae257..921e0dbedeb54 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn @@ -16,6 +16,7 @@ static_library("transport") { deps = [ ":conversions", "//clang-tools-extra/clangd", + "//clang-tools-extra/clangd:features", "//clang-tools-extra/clangd/support", "//llvm/lib/Support", ] From 13e35ac1249472d4b092d76264dcfc9fe8d7d13b Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 2 Jul 2021 17:28:33 +0300 Subject: [PATCH 534/619] [NFC][InstCombine] visitUnreachableInst(): enhance comments somewhat --- .../Transforms/InstCombine/InstructionCombining.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index d29527f3a0dcb..393180306ee8c 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2875,8 +2875,18 @@ Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { // This includes instructions like stores and "llvm.assume" that may not get // removed by simple dead code elimination. while (Instruction *Prev = I.getPrevNonDebugInstruction()) { - if (Prev->isEHPad() || !isGuaranteedToTransferExecutionToSuccessor(Prev)) + // While we theoretically can erase EH, that would result in a block that + // used to start with an EH no longer starting with EH, which is invalid. + // To make it valid, we'd need to fixup predecessors to no longer refer to + // this block, but that changes CFG, which is not allowed in InstCombine. + if (Prev->isEHPad()) return nullptr; // Can not drop any more instructions. We're done here. + + if (!isGuaranteedToTransferExecutionToSuccessor(Prev)) + return nullptr; // Can not drop any more instructions. We're done here. + // Otherwise, this instruction can be freely erased, + // even if it is not side-effect free. + // Temporarily disable removal of volatile stores preceding unreachable, // pending a potential LangRef change permitting volatile stores to trap. // TODO: Either remove this code, or properly integrate the check into From e42bb5e35a88c6a163934080883d4c5bc1f32cfc Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 2 Jul 2021 16:30:19 +0200 Subject: [PATCH 535/619] Reapply [clangd] Fix possible assertion fail in TUScheduler This reverts commit fff966b6855aee6fc0d0d4cd401cdd525a838572. Seems I managed to delete a critical ! after running the tests :-\ --- clang-tools-extra/clangd/TUScheduler.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 09c68a3a250ba..700d8264555f9 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -1380,11 +1380,13 @@ bool ASTWorker::blockUntilIdle(Deadline Timeout) const { }; // Make sure ASTWorker has processed all requests, which might issue new // updates to PreamblePeer. - WaitUntilASTWorkerIsIdle(); + if (!WaitUntilASTWorkerIsIdle()) + return false; // Now that ASTWorker processed all requests, ensure PreamblePeer has served // all update requests. This might create new PreambleRequests for the // ASTWorker. - PreamblePeer.blockUntilIdle(Timeout); + if (!PreamblePeer.blockUntilIdle(Timeout)) + return false; assert(Requests.empty() && "No new normal tasks can be scheduled concurrently with " "blockUntilIdle(): ASTWorker isn't threadsafe"); From 4569c14ac347180d9514f43c45c6f52569ce8f8c Mon Sep 17 00:00:00 2001 From: Gus Smith Date: Thu, 1 Jul 2021 21:45:18 +0000 Subject: [PATCH 536/619] Refactor TensorExp parameters into a union To make TensorExp clearer, this change refactors the e0/e1 fields into a union: e0/e1 for a binary op tensor expression, and tensor_num for a tensor-kinded tensor expression. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D105303 --- .../mlir/Dialect/SparseTensor/Utils/Merger.h | 31 ++++++++++++++----- .../Transforms/Sparsification.cpp | 18 +++++------ .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 24 +++++++------- 3 files changed, 45 insertions(+), 28 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index d087e98ac42f3..4141c68a5e379 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -26,24 +26,39 @@ enum class Kind { kTensor, kInvariant, kMulF, kMulI, kAddF, kAddI }; /// Dimension level type for a tensor (undef means index does not appear). enum class Dim { kSparse, kDense, kSingle, kUndef }; +/// Children expressions of a binary TensorExp. +struct Children { + unsigned e0; + unsigned e1; +}; + /// Tensor expression. Represents a MLIR expression in tensor index notation. /// For tensors, e0 denotes the tensor index. For invariants, the IR value is /// stored directly. For binary operations, e0 and e1 denote the index of the /// children tensor expressions. struct TensorExp { - TensorExp(Kind k, unsigned x, unsigned y, Value v) - : kind(k), e0(x), e1(y), val(v) { - assert((kind == Kind::kTensor && e0 != -1u && e1 == -1u && !val) || - (kind == Kind::kInvariant && e0 == -1u && e1 == -1u && val) || - (kind >= Kind::kMulF && e0 != -1u && e1 != -1u && !val)); + TensorExp(Kind k, unsigned x, unsigned y, Value v) : kind(k), val(v) { + assert((kind == Kind::kTensor && x != -1u && y == -1u && !val) || + (kind == Kind::kInvariant && x == -1u && y == -1u && val) || + (kind >= Kind::kMulF && x != -1u && y != -1u && !val)); + if (kind == Kind::kTensor) { + tensor = x; + } else if (kind >= Kind::kMulF) { + children.e0 = x; + children.e1 = y; + } } /// Tensor expression kind. Kind kind; - /// Indices of children expression(s). - unsigned e0; - unsigned e1; + union { + /// Expressions representing tensors simply have a tensor number. + unsigned tensor; + + /// Binary operations hold the indices of their child expressions. + Children children; + }; /// Direct link to IR for an invariant. During code generation, /// field is used to cache "hoisted" loop invariant tensor loads. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 0409a7eabdfb7..813fe683ae619 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -214,11 +214,11 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op, static unsigned isConjunction(Merger &merger, unsigned tensor, unsigned exp) { switch (merger.exp(exp).kind) { case Kind::kTensor: - return merger.exp(exp).e0 == tensor; + return merger.exp(exp).tensor == tensor; case Kind::kMulF: case Kind::kMulI: - return isConjunction(merger, tensor, merger.exp(exp).e0) || - isConjunction(merger, tensor, merger.exp(exp).e1); + return isConjunction(merger, tensor, merger.exp(exp).children.e0) || + isConjunction(merger, tensor, merger.exp(exp).children.e1); default: return false; } @@ -455,7 +455,7 @@ static Value genTensorLoad(Merger &merger, CodeGen &codegen, } // Actual load. SmallVector args; - OpOperand *t = op.getInputAndOutputOperands()[merger.exp(exp).e0]; + OpOperand *t = op.getInputAndOutputOperands()[merger.exp(exp).tensor]; unsigned tensor = t->getOperandNumber(); auto map = op.getTiedIndexingMap(t); auto enc = getSparseTensorEncoding(t->get().getType()); @@ -628,8 +628,8 @@ static Value genExp(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, return genTensorLoad(merger, codegen, rewriter, op, exp); else if (merger.exp(exp).kind == Kind::kInvariant) return genInvariantValue(merger, codegen, rewriter, exp); - Value v0 = genExp(merger, codegen, rewriter, op, merger.exp(exp).e0); - Value v1 = genExp(merger, codegen, rewriter, op, merger.exp(exp).e1); + Value v0 = genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e0); + Value v1 = genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e1); switch (merger.exp(exp).kind) { case Kind::kTensor: case Kind::kInvariant: @@ -653,7 +653,7 @@ static void genInvariants(Merger &merger, CodeGen &codegen, if (merger.exp(exp).kind == Kind::kTensor) { // Inspect tensor indices. bool atLevel = ldx == -1u; - OpOperand *t = op.getInputAndOutputOperands()[merger.exp(exp).e0]; + OpOperand *t = op.getInputAndOutputOperands()[merger.exp(exp).tensor]; auto map = op.getTiedIndexingMap(t); auto enc = getSparseTensorEncoding(t->get().getType()); for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) { @@ -675,8 +675,8 @@ static void genInvariants(Merger &merger, CodeGen &codegen, // Traverse into the binary operations. Note that we only hoist // tensor loads, since subsequent MLIR/LLVM passes know how to // deal with all other kinds of derived loop invariants. - unsigned e0 = merger.exp(exp).e0; - unsigned e1 = merger.exp(exp).e1; + unsigned e0 = merger.exp(exp).children.e0; + unsigned e1 = merger.exp(exp).children.e1; genInvariants(merger, codegen, rewriter, op, e0, ldx, hoist); genInvariants(merger, codegen, rewriter, op, e1, ldx, hoist); } diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 0c869be07a125..6150c15a0ad18 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -72,7 +72,8 @@ unsigned Merger::optimizeSet(unsigned s0) { if (p0 != p1) { // Is this a straightforward copy? unsigned e = latPoints[p1].exp; - if (tensorExps[e].kind == Kind::kTensor && tensorExps[e].e0 == outTensor) + if (tensorExps[e].kind == Kind::kTensor && + tensorExps[e].tensor == outTensor) continue; // Conjunction already covered? for (unsigned p2 : latSets[s]) { @@ -150,11 +151,11 @@ bool Merger::hasAnyDimOf(const llvm::BitVector &bits, Dim d) const { void Merger::dumpExp(unsigned e) const { switch (tensorExps[e].kind) { case Kind::kTensor: - if (tensorExps[e].e0 == syntheticTensor) + if (tensorExps[e].tensor == syntheticTensor) llvm::dbgs() << "synthetic_"; - else if (tensorExps[e].e0 == outTensor) + else if (tensorExps[e].tensor == outTensor) llvm::dbgs() << "output_"; - llvm::dbgs() << "tensor_" << tensorExps[e].e0; + llvm::dbgs() << "tensor_" << tensorExps[e].tensor; break; case Kind::kInvariant: llvm::dbgs() << "invariant"; @@ -162,17 +163,17 @@ void Merger::dumpExp(unsigned e) const { default: case Kind::kMulI: llvm::dbgs() << "("; - dumpExp(tensorExps[e].e0); + dumpExp(tensorExps[e].children.e0); llvm::dbgs() << " * "; - dumpExp(tensorExps[e].e1); + dumpExp(tensorExps[e].children.e1); llvm::dbgs() << ")"; break; case Kind::kAddF: case Kind::kAddI: llvm::dbgs() << "("; - dumpExp(tensorExps[e].e0); + dumpExp(tensorExps[e].children.e0); llvm::dbgs() << " + "; - dumpExp(tensorExps[e].e1); + dumpExp(tensorExps[e].children.e1); llvm::dbgs() << ")"; break; } @@ -234,12 +235,13 @@ unsigned Merger::buildLattices(unsigned e, unsigned idx) { // set to the undefined index in that dimension. An invariant expression // is set to a synthetic tensor with undefined indices only. unsigned s = addSet(); - unsigned t = kind == Kind::kTensor ? tensorExps[e].e0 : syntheticTensor; + unsigned t = + kind == Kind::kTensor ? tensorExps[e].children.e0 : syntheticTensor; latSets[s].push_back(addLat(t, idx, e)); return s; } - unsigned s0 = buildLattices(tensorExps[e].e0, idx); - unsigned s1 = buildLattices(tensorExps[e].e1, idx); + unsigned s0 = buildLattices(tensorExps[e].children.e0, idx); + unsigned s1 = buildLattices(tensorExps[e].children.e1, idx); switch (kind) { case Kind::kTensor: case Kind::kInvariant: From 78309ebef4a35700597bde2cca3052f9c600a0bb Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 2 Jul 2021 17:03:49 +0100 Subject: [PATCH 537/619] [AArch64] Neon saturated truncate tests. NFC --- llvm/test/CodeGen/AArch64/qmovn.ll | 162 +++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/qmovn.ll diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll new file mode 100644 index 0000000000000..515f4d5bd114c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/qmovn.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs %s -o - | FileCheck %s + +define <4 x i16> @vqmovni32_smaxmin(<4 x i32> %s0) { +; CHECK-LABEL: vqmovni32_smaxmin: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v1.4s, #127, msl #8 +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mvni v1.4s, #127, msl #8 +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c1 = icmp slt <4 x i32> %s0, + %s1 = select <4 x i1> %c1, <4 x i32> %s0, <4 x i32> + %c2 = icmp sgt <4 x i32> %s1, + %s2 = select <4 x i1> %c2, <4 x i32> %s1, <4 x i32> + %t = trunc <4 x i32> %s2 to <4 x i16> + ret <4 x i16> %t +} + +define <4 x i16> @vqmovni32_sminmax(<4 x i32> %s0) { +; CHECK-LABEL: vqmovni32_sminmax: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mvni v1.4s, #127, msl #8 +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v1.4s, #127, msl #8 +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c1 = icmp sgt <4 x i32> %s0, + %s1 = select <4 x i1> %c1, <4 x i32> %s0, <4 x i32> + %c2 = icmp slt <4 x i32> %s1, + %s2 = select <4 x i1> %c2, <4 x i32> %s1, <4 x i32> + %t = trunc <4 x i32> %s2 to <4 x i16> + ret <4 x i16> %t +} + +define <4 x i16> @vqmovni32_umaxmin(<4 x i32> %s0) { +; CHECK-LABEL: vqmovni32_umaxmin: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: ret +entry: + %c1 = icmp ult <4 x i32> %s0, + %s1 = select <4 x i1> %c1, <4 x i32> %s0, <4 x i32> + %t = trunc <4 x i32> %s1 to <4 x i16> + ret <4 x i16> %t +} + +define <8 x i8> @vqmovni16_smaxmin(<8 x i16> %s0) { +; CHECK-LABEL: vqmovni16_smaxmin: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v1.8h, #127 +; CHECK-NEXT: smin v0.8h, v0.8h, v1.8h +; CHECK-NEXT: mvni v1.8h, #127 +; CHECK-NEXT: smax v0.8h, v0.8h, v1.8h +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: ret +entry: + %c1 = icmp slt <8 x i16> %s0, + %s1 = select <8 x i1> %c1, <8 x i16> %s0, <8 x i16> + %c2 = icmp sgt <8 x i16> %s1, + %s2 = select <8 x i1> %c2, <8 x i16> %s1, <8 x i16> + %t = trunc <8 x i16> %s2 to <8 x i8> + ret <8 x i8> %t +} + +define <8 x i8> @vqmovni16_sminmax(<8 x i16> %s0) { +; CHECK-LABEL: vqmovni16_sminmax: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mvni v1.8h, #127 +; CHECK-NEXT: smax v0.8h, v0.8h, v1.8h +; CHECK-NEXT: movi v1.8h, #127 +; CHECK-NEXT: smin v0.8h, v0.8h, v1.8h +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: ret +entry: + %c1 = icmp sgt <8 x i16> %s0, + %s1 = select <8 x i1> %c1, <8 x i16> %s0, <8 x i16> + %c2 = icmp slt <8 x i16> %s1, + %s2 = select <8 x i1> %c2, <8 x i16> %s1, <8 x i16> + %t = trunc <8 x i16> %s2 to <8 x i8> + ret <8 x i8> %t +} + +define <8 x i8> @vqmovni16_umaxmin(<8 x i16> %s0) { +; CHECK-LABEL: vqmovni16_umaxmin: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v1.2d, #0xff00ff00ff00ff +; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: ret +entry: + %c1 = icmp ult <8 x i16> %s0, + %s1 = select <8 x i1> %c1, <8 x i16> %s0, <8 x i16> + %t = trunc <8 x i16> %s1 to <8 x i8> + ret <8 x i8> %t +} + +define <2 x i32> @vqmovni64_smaxmin(<2 x i64> %s0) { +; CHECK-LABEL: vqmovni64_smaxmin: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov x9, #-2147483648 +; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: dup v1.2d, x9 +; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c1 = icmp slt <2 x i64> %s0, + %s1 = select <2 x i1> %c1, <2 x i64> %s0, <2 x i64> + %c2 = icmp sgt <2 x i64> %s1, + %s2 = select <2 x i1> %c2, <2 x i64> %s1, <2 x i64> + %t = trunc <2 x i64> %s2 to <2 x i32> + ret <2 x i32> %t +} + +define <2 x i32> @vqmovni64_sminmax(<2 x i64> %s0) { +; CHECK-LABEL: vqmovni64_sminmax: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #-2147483648 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov w9, #2147483647 +; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: dup v1.2d, x9 +; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c1 = icmp sgt <2 x i64> %s0, + %s1 = select <2 x i1> %c1, <2 x i64> %s0, <2 x i64> + %c2 = icmp slt <2 x i64> %s1, + %s2 = select <2 x i1> %c2, <2 x i64> %s1, <2 x i64> + %t = trunc <2 x i64> %s2 to <2 x i32> + ret <2 x i32> %t +} + +define <2 x i32> @vqmovni64_umaxmin(<2 x i64> %s0) { +; CHECK-LABEL: vqmovni64_umaxmin: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c1 = icmp ult <2 x i64> %s0, + %s1 = select <2 x i1> %c1, <2 x i64> %s0, <2 x i64> + %t = trunc <2 x i64> %s1 to <2 x i32> + ret <2 x i32> %t +} From bf64210fd88f4f3fe920376861b418be1834add6 Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Fri, 2 Jul 2021 15:46:26 +0000 Subject: [PATCH 538/619] [AIX] Add dummy XCOFF MCAsmParserExtension Implement XCOFFMCAsmParser so that we can use MC to parse inline asm. The directives and storage mapping classes will be added later iteratively. Reviewed By: xgupta Differential Revision: https://reviews.llvm.org/D105259 --- llvm/lib/MC/MCParser/AsmParser.cpp | 4 +- llvm/lib/MC/MCParser/CMakeLists.txt | 1 + llvm/lib/MC/MCParser/XCOFFAsmParser.cpp | 63 +++++++++++++++++++++++++ llvm/test/MC/XCOFF/inlineasm.s | 23 +++++++++ llvm/test/MC/XCOFF/lit.local.cfg | 2 + 5 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 llvm/lib/MC/MCParser/XCOFFAsmParser.cpp create mode 100644 llvm/test/MC/XCOFF/inlineasm.s create mode 100644 llvm/test/MC/XCOFF/lit.local.cfg diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 1adde169c0a16..3bc668e699cbc 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -749,6 +749,7 @@ namespace llvm { extern MCAsmParserExtension *createDarwinAsmParser(); extern MCAsmParserExtension *createELFAsmParser(); extern MCAsmParserExtension *createCOFFAsmParser(); +extern MCAsmParserExtension *createXCOFFAsmParser(); extern MCAsmParserExtension *createWasmAsmParser(); } // end namespace llvm @@ -785,8 +786,7 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, PlatformParser.reset(createWasmAsmParser()); break; case MCContext::IsXCOFF: - report_fatal_error( - "Need to implement createXCOFFAsmParser for XCOFF format."); + PlatformParser.reset(createXCOFFAsmParser()); break; } diff --git a/llvm/lib/MC/MCParser/CMakeLists.txt b/llvm/lib/MC/MCParser/CMakeLists.txt index 6f76f368a9699..f70787ad9d20e 100644 --- a/llvm/lib/MC/MCParser/CMakeLists.txt +++ b/llvm/lib/MC/MCParser/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_component_library(LLVMMCParser MCTargetAsmParser.cpp MasmParser.cpp WasmAsmParser.cpp + XCOFFAsmParser.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/MC/MCParser diff --git a/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp b/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp new file mode 100644 index 0000000000000..7494fe07734c4 --- /dev/null +++ b/llvm/lib/MC/MCParser/XCOFFAsmParser.cpp @@ -0,0 +1,63 @@ +//===- XCOFFAsmParser.cpp - XCOFF Assembly Parser +//-----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/XCOFF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCSectionXCOFF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolXCOFF.h" +#include "llvm/Support/MachineValueType.h" + +using namespace llvm; + +namespace { + +class XCOFFAsmParser : public MCAsmParserExtension { + MCAsmParser *Parser = nullptr; + MCAsmLexer *Lexer = nullptr; + + template + void addDirectiveHandler(StringRef Directive) { + MCAsmParser::ExtensionDirectiveHandler Handler = + std::make_pair(this, HandleDirective); + + getParser().addDirectiveHandler(Directive, Handler); + } + +public: + XCOFFAsmParser() {} + + void Initialize(MCAsmParser &P) override { + Parser = &P; + Lexer = &Parser->getLexer(); + // Call the base implementation. + MCAsmParserExtension::Initialize(*Parser); + + addDirectiveHandler<&XCOFFAsmParser::ParseDirectiveCSect>(".csect"); + } + bool ParseDirectiveCSect(StringRef, SMLoc); +}; + +} // end anonymous namespace + +namespace llvm { + +MCAsmParserExtension *createXCOFFAsmParser() { return new XCOFFAsmParser; } + +} // end namespace llvm + +// .csect QualName [, Number ] +bool XCOFFAsmParser::ParseDirectiveCSect(StringRef, SMLoc) { + report_fatal_error("XCOFFAsmParser directive not yet supported!"); + return false; +} diff --git a/llvm/test/MC/XCOFF/inlineasm.s b/llvm/test/MC/XCOFF/inlineasm.s new file mode 100644 index 0000000000000..85a40024711a1 --- /dev/null +++ b/llvm/test/MC/XCOFF/inlineasm.s @@ -0,0 +1,23 @@ +// RUN: llvm-mc -filetype=asm -triple powerpc-ibm-aix-xcoff %s | FileCheck %s + +// CHECK-label: .csect .text[PR],2 +// CHECK:L..tmp0: +// CHECK-NEXT: lwarx 3, 0, 4 +// CHECK-NEXT: cmpw 5, 3 +// CHECK-NEXT: bne- 0, L..tmp1 +// CHECK-NEXT: stwcx. 6, 0, 4 +// CHECK-NEXT: bne- 0, L..tmp0 +// CHECK-NEXT:L..tmp1: + + + #APP +1: + lwarx 3, 0, 4 + cmpw 5, 3 + bne- 2f + stwcx. 6, 0, 4 + bne- 1b +2: + + #NO_APP + diff --git a/llvm/test/MC/XCOFF/lit.local.cfg b/llvm/test/MC/XCOFF/lit.local.cfg new file mode 100644 index 0000000000000..091332439b186 --- /dev/null +++ b/llvm/test/MC/XCOFF/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True From 03e9dcfd41d461633536690d1cc281e5f7300a3e Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Fri, 2 Jul 2021 15:46:49 +0000 Subject: [PATCH 539/619] [AIX] Use AsmParser to do inline asm parsing Add a flag so that target can choose to use AsmParser for parsing inline asm. And set the flag by default for AIX. -no-intergrated-as will override this default if specified explicitly. Reviewed By: #powerpc, shchenz Differential Revision: https://reviews.llvm.org/D105314 --- llvm/include/llvm/MC/MCAsmInfo.h | 13 +++++ .../AsmPrinter/AsmPrinterInlineAsm.cpp | 3 +- llvm/lib/CodeGen/LLVMTargetMachine.cpp | 6 ++- llvm/lib/MC/MCAsmInfo.cpp | 1 + llvm/lib/MC/MCAsmInfoXCOFF.cpp | 1 + llvm/test/CodeGen/PowerPC/inline-asm-label.ll | 47 +++++++++++++++++++ .../PowerPC/inline-asm-physical-fpr.ll | 4 +- 7 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/inline-asm-label.ll diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index f5efd0dee21fa..b1d6b7fb7fd34 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -497,6 +497,9 @@ class MCAsmInfo { /// construction (see LLVMTargetMachine::initAsmInfo()). bool UseIntegratedAssembler; + /// Use AsmParser to parse inlineAsm when UseIntegratedAssembler is not set. + bool ParseInlineAsmUsingAsmParser; + /// Preserve Comments in assembly bool PreserveAsmComments; @@ -805,6 +808,11 @@ class MCAsmInfo { /// Return true if assembly (inline or otherwise) should be parsed. bool useIntegratedAssembler() const { return UseIntegratedAssembler; } + /// Return true if target want to use AsmParser to parse inlineasm. + bool parseInlineAsmUsingAsmParser() const { + return ParseInlineAsmUsingAsmParser; + } + bool binutilsIsAtLeast(int Major, int Minor) const { return BinutilsVersion >= std::make_pair(Major, Minor); } @@ -814,6 +822,11 @@ class MCAsmInfo { UseIntegratedAssembler = Value; } + /// Set whether target want to use AsmParser to parse inlineasm. + virtual void setParseInlineAsmUsingAsmParser(bool Value) { + ParseInlineAsmUsingAsmParser = Value; + } + /// Return true if assembly (inline or otherwise) should be parsed. bool preserveAsmComments() const { return PreserveAsmComments; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index b49e7f6c700cb..f00e4924e9f25 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -78,13 +78,14 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, Str = Str.substr(0, Str.size()-1); // If the output streamer does not have mature MC support or the integrated - // assembler has been disabled, just emit the blob textually. + // assembler has been disabled or not required, just emit the blob textually. // Otherwise parse the asm and emit it via MC support. // This is useful in case the asm parser doesn't handle something but the // system assembler does. const MCAsmInfo *MCAI = TM.getMCAsmInfo(); assert(MCAI && "No MCAsmInfo"); if (!MCAI->useIntegratedAssembler() && + !MCAI->parseInlineAsmUsingAsmParser() && !OutStreamer->isIntegratedAssemblerRequired()) { emitInlineAsmStart(); OutStreamer->emitRawText(Str); diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index f9b7bf613ff6b..37c0b44ea2b24 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -64,8 +64,12 @@ void LLVMTargetMachine::initAsmInfo() { if (Options.BinutilsVersion.first > 0) TmpAsmInfo->setBinutilsVersion(Options.BinutilsVersion); - if (Options.DisableIntegratedAS) + if (Options.DisableIntegratedAS) { TmpAsmInfo->setUseIntegratedAssembler(false); + // If there is explict option disable integratedAS, we can't use it for + // inlineasm either. + TmpAsmInfo->setParseInlineAsmUsingAsmParser(false); + } TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments); diff --git a/llvm/lib/MC/MCAsmInfo.cpp b/llvm/lib/MC/MCAsmInfo.cpp index 7484bd1827cdb..f52503d7b1606 100644 --- a/llvm/lib/MC/MCAsmInfo.cpp +++ b/llvm/lib/MC/MCAsmInfo.cpp @@ -77,6 +77,7 @@ MCAsmInfo::MCAsmInfo() { // architecture basis. // - The target subclasses for AArch64, ARM, and X86 handle these cases UseIntegratedAssembler = true; + ParseInlineAsmUsingAsmParser = false; PreserveAsmComments = true; } diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp index a552578976088..0006754acb86b 100644 --- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp @@ -52,6 +52,7 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() { LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; HasDotTypeDotSizeDirective = false; UseIntegratedAssembler = false; + ParseInlineAsmUsingAsmParser = true; NeedsFunctionDescriptors = true; ExceptionsType = ExceptionHandling::AIX; diff --git a/llvm/test/CodeGen/PowerPC/inline-asm-label.ll b/llvm/test/CodeGen/PowerPC/inline-asm-label.ll new file mode 100644 index 0000000000000..6bacbd77aba2c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/inline-asm-label.ll @@ -0,0 +1,47 @@ +; RUN: llc -mcpu=pwr7 -verify-machineinstrs \ +; RUN: -mtriple=powerpc-unknown-aix < %s | FileCheck %s + +; RUN: llc -mcpu=pwr7 -verify-machineinstrs \ +; RUN: -mtriple=powerpc64-unknown-aix < %s | FileCheck %s + +; RUN: llc -mcpu=pwr7 -verify-machineinstrs -no-integrated-as \ +; RUN: -mtriple=powerpc64-unknown-aix < %s | FileCheck %s --check-prefix=NOIS + + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local signext i32 @NoBarrier_CompareAndSwap(i32* %ptr, i32 signext %old_value, i32 signext %new_value) #0 { +; CHECK-LABEL: NoBarrier_CompareAndSwap: +; CHECK: #APP +; CHECK-NEXT: L..tmp0: +; CHECK-NEXT: lwarx 6, 0, 3 +; CHECK-NEXT: cmpw 4, 6 +; CHECK-NEXT: bne- 0, L..tmp1 +; CHECK-NEXT: stwcx. 5, 0, 3 +; CHECK-NEXT: bne- 0, L..tmp0 +; CHECK-NEXT: L..tmp1: + +; NOIS-LABEL: NoBarrier_CompareAndSwap: +; NOIS: #APP +; NOIS-NEXT: 1: lwarx 6, 0, 3 +; NOIS-NEXT: cmpw 4, 6 +; NOIS-NEXT: bne- 2f +; NOIS-NEXT: stwcx. 5, 0, 3 +; NOIS-NEXT: bne- 1b +; NOIS-NEXT: 2: + +entry: + %ptr.addr = alloca i32*, align 8 %old_value.addr = alloca i32, align 4 + %new_value.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32* %ptr, i32** %ptr.addr, align 8 + store i32 %old_value, i32* %old_value.addr, align 4 + store i32 %new_value, i32* %new_value.addr, align 4 + %0 = load i32*, i32** %ptr.addr, align 8 + %1 = load i32, i32* %old_value.addr, align 4 + %2 = load i32, i32* %new_value.addr, align 4 + %3 = call i32 asm sideeffect "1: lwarx $0, $4, $1 \0A\09 cmpw $2, $0 \0A\09 bne- 2f \0A\09 stwcx. $3, $4, $1 \0A\09 bne- 1b \0A\092: \0A\09", "=&b,b,b,b,i,~{cr0},~{ctr}"(i32* %0, i32 %1, i32 %2, i32 0) + store i32 %3, i32* %result, align 4 + %4 = load i32, i32* %result, align 4 + ret i32 %4 +} + diff --git a/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll b/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll index 132b5d1500548..75a13fcfb2287 100644 --- a/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll +++ b/llvm/test/CodeGen/PowerPC/inline-asm-physical-fpr.ll @@ -13,7 +13,7 @@ entry: ; CHECK-LABEL: test_double ; CHECK: #APP -; CHECK-NEXT: fadd. 31,1,2 +; CHECK-NEXT: fadd. 31, 1, 2 define dso_local signext i32 @test_int(double %a, double %b) { entry: @@ -23,4 +23,4 @@ entry: ; CHECK-LABEL: test_int ; CHECK: #APP -; CHECK-NEXT: fadd. 0,1,2 +; CHECK-NEXT: fadd. 0, 1, 2 From 25473d7b08e906d34ecb9ee34e5199fa16cebfb2 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 2 Jul 2021 16:12:54 +0000 Subject: [PATCH 540/619] [gn build] Port bf64210fd88f --- llvm/utils/gn/secondary/llvm/lib/MC/MCParser/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/MC/MCParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/MC/MCParser/BUILD.gn index 8eaacc85dc08d..19de7578a0670 100644 --- a/llvm/utils/gn/secondary/llvm/lib/MC/MCParser/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/MC/MCParser/BUILD.gn @@ -18,5 +18,6 @@ static_library("MCParser") { "MCTargetAsmParser.cpp", "MasmParser.cpp", "WasmAsmParser.cpp", + "XCOFFAsmParser.cpp", ] } From 361f742f168de0f0f256802a329c19d081615d0d Mon Sep 17 00:00:00 2001 From: Aaron Green Date: Fri, 2 Jul 2021 09:15:17 -0700 Subject: [PATCH 541/619] Refactor mutation strategies into a standalone library This change introduces libMutagen/libclang_rt.mutagen.a as a subset of libFuzzer/libclang_rt.fuzzer.a. This library contains only the fuzzing strategies used by libFuzzer to produce new test inputs from provided inputs, dictionaries, and SanitizerCoverage feedback. Most of this change is simply moving sections of code to one side or the other of the library boundary. The only meaningful new code is: * The Mutagen.h interface and its implementation in Mutagen.cpp. * The following methods in MutagenDispatcher.cpp: * UseCmp * UseMemmem * SetCustomMutator * SetCustomCrossOver * LateInitialize (similar to the MutationDispatcher's original constructor) * Mutate_AddWordFromTORC (uses callbacks instead of accessing TPC directly) * StartMutationSequence * MutationSequence * DictionaryEntrySequence * RecommendDictionary * RecommendDictionaryEntry * FuzzerMutate.cpp (which now justs sets callbacks and handles printing) * MutagenUnittest.cpp (which adds tests of Mutagen.h) A note on performance: This change was tested with a 100 passes of test/fuzzer/LargeTest.cpp with 1000 runs per pass, both with and without the change. The running time distribution was qualitatively similar both with and without the change, and the average difference was within 30 microseconds (2.240 ms/run vs 2.212 ms/run, respectively). Both times were much higher than observed with the fully optimized system clang (~0.38 ms/run), most likely due to the combination of CMake "dev mode" settings (e.g. CMAKE_BUILD_TYPE="Debug", LLVM_ENABLE_LTO=OFF, etc.). The difference between the two versions built similarly seems to be "in the noise" and suggests no meaningful performance degradation. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D102447 --- compiler-rt/lib/fuzzer/CMakeLists.txt | 45 +- compiler-rt/lib/fuzzer/FuzzerDefs.h | 36 +- compiler-rt/lib/fuzzer/FuzzerDictionary.h | 120 --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 8 +- compiler-rt/lib/fuzzer/FuzzerInternal.h | 5 + compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 9 +- compiler-rt/lib/fuzzer/FuzzerMutate.cpp | 609 +---------- compiler-rt/lib/fuzzer/FuzzerMutate.h | 140 +-- compiler-rt/lib/fuzzer/FuzzerRandom.h | 1 + compiler-rt/lib/fuzzer/FuzzerTracePC.cpp | 1 - compiler-rt/lib/fuzzer/FuzzerTracePC.h | 4 +- compiler-rt/lib/fuzzer/FuzzerUtil.h | 12 +- compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp | 5 - compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp | 5 - compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp | 21 - compiler-rt/lib/fuzzer/build.sh | 10 +- compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt | 59 ++ compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp | 100 ++ compiler-rt/lib/fuzzer/mutagen/Mutagen.h | 119 +++ .../MutagenCrossOver.cpp} | 13 +- .../lib/fuzzer/mutagen/MutagenDictionary.h | 85 ++ .../lib/fuzzer/mutagen/MutagenDispatcher.cpp | 659 ++++++++++++ .../lib/fuzzer/mutagen/MutagenDispatcher.h | 190 ++++ .../lib/fuzzer/mutagen/MutagenSequence.h | 101 ++ compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h | 24 + .../lib/fuzzer/mutagen/MutagenUtilPosix.cpp | 23 + .../lib/fuzzer/mutagen/MutagenUtilWindows.cpp | 41 + compiler-rt/lib/fuzzer/mutagen/build.sh | 12 + compiler-rt/lib/fuzzer/tests/CMakeLists.txt | 32 +- .../lib/fuzzer/tests/FuzzerUnittest.cpp | 477 --------- .../lib/fuzzer/tests/MutagenUnittest.cpp | 971 ++++++++++++++++++ compiler-rt/test/fuzzer/CMakeLists.txt | 1 + 32 files changed, 2571 insertions(+), 1367 deletions(-) delete mode 100644 compiler-rt/lib/fuzzer/FuzzerDictionary.h create mode 100644 compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt create mode 100644 compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp create mode 100644 compiler-rt/lib/fuzzer/mutagen/Mutagen.h rename compiler-rt/lib/fuzzer/{FuzzerCrossOver.cpp => mutagen/MutagenCrossOver.cpp} (86%) create mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenDictionary.h create mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.cpp create mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.h create mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenSequence.h create mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h create mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenUtilPosix.cpp create mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenUtilWindows.cpp create mode 100755 compiler-rt/lib/fuzzer/mutagen/build.sh create mode 100644 compiler-rt/lib/fuzzer/tests/MutagenUnittest.cpp diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt index 3201ed279a621..e27cf8d5e6fe8 100644 --- a/compiler-rt/lib/fuzzer/CMakeLists.txt +++ b/compiler-rt/lib/fuzzer/CMakeLists.txt @@ -1,5 +1,4 @@ set(LIBFUZZER_SOURCES - FuzzerCrossOver.cpp FuzzerDataFlowTrace.cpp FuzzerDriver.cpp FuzzerExtFunctionsDlsym.cpp @@ -29,7 +28,6 @@ set(LIBFUZZER_HEADERS FuzzerCorpus.h FuzzerDataFlowTrace.h FuzzerDefs.h - FuzzerDictionary.h FuzzerExtFunctions.def FuzzerExtFunctions.h FuzzerFlags.def @@ -84,6 +82,32 @@ else() endif() endif() +macro(partially_link_libcxx name dir arch) + if(${arch} MATCHES "i386") + set(EMULATION_ARGUMENT "-m" "elf_i386") + else() + set(EMULATION_ARGUMENT "") + endif() + set(cxx_${arch}_merge_dir "${CMAKE_CURRENT_BINARY_DIR}/cxx_${arch}_merge.dir") + file(MAKE_DIRECTORY ${cxx_${arch}_merge_dir}) + add_custom_command(TARGET clang_rt.${name}-${arch} POST_BUILD + COMMAND ${CMAKE_LINKER} ${EMULATION_ARGUMENT} --whole-archive "$" --no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o + COMMAND ${CMAKE_OBJCOPY} --localize-hidden ${name}.o + COMMAND ${CMAKE_COMMAND} -E remove "$" + COMMAND ${CMAKE_AR} qcs "$" ${name}.o + WORKING_DIRECTORY ${cxx_${arch}_merge_dir} + ) +endmacro() + +add_subdirectory(mutagen) +foreach(X IN LISTS LIBFUZZER_MUTAGEN_SOURCES) + list(APPEND LIBFUZZER_SOURCES "mutagen/${X}") +endforeach() +foreach(X IN LISTS LIBFUZZER_MUTAGEN_HEADERS) + list(APPEND LIBFUZZER_HEADERS "mutagen/${X}") +endforeach() +include_directories(.) + add_compiler_rt_component(fuzzer) add_compiler_rt_object_libraries(RTfuzzer @@ -135,23 +159,6 @@ add_compiler_rt_runtime(clang_rt.fuzzer_interceptors if(OS_NAME MATCHES "Linux|Fuchsia" AND COMPILER_RT_LIBCXX_PATH AND COMPILER_RT_LIBCXXABI_PATH) - macro(partially_link_libcxx name dir arch) - if(${arch} MATCHES "i386") - set(EMULATION_ARGUMENT "-m" "elf_i386") - else() - set(EMULATION_ARGUMENT "") - endif() - set(cxx_${arch}_merge_dir "${CMAKE_CURRENT_BINARY_DIR}/cxx_${arch}_merge.dir") - file(MAKE_DIRECTORY ${cxx_${arch}_merge_dir}) - add_custom_command(TARGET clang_rt.${name}-${arch} POST_BUILD - COMMAND ${CMAKE_LINKER} ${EMULATION_ARGUMENT} --whole-archive "$" --no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o - COMMAND ${CMAKE_OBJCOPY} --localize-hidden ${name}.o - COMMAND ${CMAKE_COMMAND} -E remove "$" - COMMAND ${CMAKE_AR} qcs "$" ${name}.o - WORKING_DIRECTORY ${cxx_${arch}_merge_dir} - ) - endmacro() - foreach(arch ${FUZZER_SUPPORTED_ARCH}) get_target_flags_for_arch(${arch} TARGET_CFLAGS) set(LIBCXX_${arch}_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libcxx_fuzzer_${arch}) diff --git a/compiler-rt/lib/fuzzer/FuzzerDefs.h b/compiler-rt/lib/fuzzer/FuzzerDefs.h index 1a2752af2f4d5..36820b61c2aa5 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDefs.h +++ b/compiler-rt/lib/fuzzer/FuzzerDefs.h @@ -15,21 +15,18 @@ #include #include #include +#include #include #include #include #include - namespace fuzzer { template T Min(T a, T b) { return a < b ? a : b; } template T Max(T a, T b) { return a > b ? a : b; } class Random; -class Dictionary; -class DictionaryEntry; -class MutationDispatcher; struct FuzzingOptions; class InputCorpus; struct InputInfo; @@ -60,6 +57,37 @@ using Set = std::set, fuzzer_allocator>; typedef Vector Unit; typedef Vector UnitVector; + +// A simple POD sized array of bytes. +template class FixedWord { +public: + static const size_t kMaxSize = kMaxSizeT; + FixedWord() { memset(Data, 0, kMaxSize); } + FixedWord(const uint8_t *B, size_t S) { Set(B, S); } + + void Set(const uint8_t *B, size_t S) { + static_assert(kMaxSizeT <= std::numeric_limits::max(), + "FixedWord::kMaxSizeT cannot fit in a uint8_t."); + assert(S <= kMaxSize); + memcpy(Data, B, S); + Size = static_cast(S); + } + + bool operator==(const FixedWord &w) const { + return Size == w.Size && 0 == memcmp(Data, w.Data, Size); + } + + static size_t GetMaxSize() { return kMaxSize; } + const uint8_t *data() const { return Data; } + uint8_t size() const { return Size; } + +private: + uint8_t Size = 0; + uint8_t Data[kMaxSize]; +}; + +typedef FixedWord<64> Word; + typedef int (*UserCallback)(const uint8_t *Data, size_t Size); int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); diff --git a/compiler-rt/lib/fuzzer/FuzzerDictionary.h b/compiler-rt/lib/fuzzer/FuzzerDictionary.h deleted file mode 100644 index db55907d93631..0000000000000 --- a/compiler-rt/lib/fuzzer/FuzzerDictionary.h +++ /dev/null @@ -1,120 +0,0 @@ -//===- FuzzerDictionary.h - Internal header for the Fuzzer ------*- C++ -* ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// fuzzer::Dictionary -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_DICTIONARY_H -#define LLVM_FUZZER_DICTIONARY_H - -#include "FuzzerDefs.h" -#include "FuzzerIO.h" -#include "FuzzerUtil.h" -#include -#include - -namespace fuzzer { -// A simple POD sized array of bytes. -template class FixedWord { -public: - static const size_t kMaxSize = kMaxSizeT; - FixedWord() {} - FixedWord(const uint8_t *B, size_t S) { Set(B, S); } - - void Set(const uint8_t *B, size_t S) { - static_assert(kMaxSizeT <= std::numeric_limits::max(), - "FixedWord::kMaxSizeT cannot fit in a uint8_t."); - assert(S <= kMaxSize); - memcpy(Data, B, S); - Size = static_cast(S); - } - - bool operator==(const FixedWord &w) const { - return Size == w.Size && 0 == memcmp(Data, w.Data, Size); - } - - static size_t GetMaxSize() { return kMaxSize; } - const uint8_t *data() const { return Data; } - uint8_t size() const { return Size; } - -private: - uint8_t Size = 0; - uint8_t Data[kMaxSize]; -}; - -typedef FixedWord<64> Word; - -class DictionaryEntry { - public: - DictionaryEntry() {} - DictionaryEntry(Word W) : W(W) {} - DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} - const Word &GetW() const { return W; } - - bool HasPositionHint() const { return PositionHint != std::numeric_limits::max(); } - size_t GetPositionHint() const { - assert(HasPositionHint()); - return PositionHint; - } - void IncUseCount() { UseCount++; } - void IncSuccessCount() { SuccessCount++; } - size_t GetUseCount() const { return UseCount; } - size_t GetSuccessCount() const {return SuccessCount; } - - void Print(const char *PrintAfter = "\n") { - PrintASCII(W.data(), W.size()); - if (HasPositionHint()) - Printf("@%zd", GetPositionHint()); - Printf("%s", PrintAfter); - } - -private: - Word W; - size_t PositionHint = std::numeric_limits::max(); - size_t UseCount = 0; - size_t SuccessCount = 0; -}; - -class Dictionary { - public: - static const size_t kMaxDictSize = 1 << 14; - - bool ContainsWord(const Word &W) const { - return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { - return DE.GetW() == W; - }); - } - const DictionaryEntry *begin() const { return &DE[0]; } - const DictionaryEntry *end() const { return begin() + Size; } - DictionaryEntry & operator[] (size_t Idx) { - assert(Idx < Size); - return DE[Idx]; - } - void push_back(DictionaryEntry DE) { - if (Size < kMaxDictSize) - this->DE[Size++] = DE; - } - void clear() { Size = 0; } - bool empty() const { return Size == 0; } - size_t size() const { return Size; } - -private: - DictionaryEntry DE[kMaxDictSize]; - size_t Size = 0; -}; - -// Parses one dictionary entry. -// If successful, write the enty to Unit and returns true, -// otherwise returns false. -bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); -// Parses the dictionary file, fills Units, returns true iff all lines -// were parsed successfully. -bool ParseDictionaryFile(const std::string &Text, Vector *Units); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_DICTIONARY_H diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index ceaa9070512f0..38efc2e18863c 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -19,15 +19,16 @@ #include "FuzzerPlatform.h" #include "FuzzerRandom.h" #include "FuzzerTracePC.h" +#include "mutagen/MutagenDispatcher.h" #include #include #include #include #include +#include #include #include #include -#include // This function should be present in the libFuzzer so that the client // binary can test for its existence. @@ -803,8 +804,9 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { ReadCorpora(*Inputs, {})); } - Random Rand(Seed); - auto *MD = new MutationDispatcher(Rand, Options); + LLVMMutagenConfiguration Config; + ConfigureMutagen(Seed, Options, &Config); + auto *MD = new MutationDispatcher(&Config); auto *Corpus = new InputCorpus(Options.OutputCorpus, Entropic); auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h index 37c8a01dc3c64..a629c3d02f774 100644 --- a/compiler-rt/lib/fuzzer/FuzzerInternal.h +++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h @@ -18,6 +18,7 @@ #include "FuzzerOptions.h" #include "FuzzerSHA1.h" #include "FuzzerValueBitMap.h" +#include "mutagen/MutagenDispatcher.h" #include #include #include @@ -26,8 +27,12 @@ #include namespace fuzzer { +namespace { using namespace std::chrono; +using mutagen::MutationDispatcher; + +} // namespace class Fuzzer { public: diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index 86a78ab751741..d50277e9f2a0d 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -177,7 +177,7 @@ void Fuzzer::DumpCurrentUnit(const char *Prefix) { if (!CurrentUnitData) return; // Happens when running individual inputs. ScopedDisableMsanInterceptorChecks S; - MD.PrintMutationSequence(); + PrintMutationSequence(MD); Printf("; base unit: %s\n", Sha1ToString(BaseSha1).c_str()); size_t UnitSize = CurrentUnitSize; if (UnitSize <= kMaxUnitSizeToPrint) { @@ -539,8 +539,9 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, TimeOfUnit, UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); + const auto &MS = MD.MutationSequence(); WriteEdgeToMutationGraphFile(Options.MutationGraphFile, NewII, II, - MD.MutationSequence()); + MS.GetString()); return true; } if (II && FoundUniqFeaturesOfII && @@ -652,7 +653,7 @@ void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) { PrintStats(Text, ""); if (Options.Verbosity) { Printf(" L: %zd/%zd ", U.size(), Corpus.MaxInputSize()); - MD.PrintMutationSequence(Options.Verbosity >= 2); + PrintMutationSequence(MD, Options.Verbosity >= 2); Printf("\n"); } } @@ -898,7 +899,7 @@ void Fuzzer::Loop(Vector &CorporaFiles) { } PrintStats("DONE ", "\n"); - MD.PrintRecommendedDictionary(); + PrintRecommendedDictionary(MD); } void Fuzzer::MinimizeCrashLoop(const Unit &U) { diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp index 4650f1beceacd..bbce4aab58024 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp @@ -1,497 +1,77 @@ -//===- FuzzerMutate.cpp - Mutate a test input -----------------------------===// +//===- FuzzerMutate.cpp - Mutation utilities -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// Mutate a test input. +// Mutate utilities. //===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" +#include "FuzzerMutate.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" -#include "FuzzerMutate.h" -#include "FuzzerOptions.h" #include "FuzzerTracePC.h" +#include "FuzzerUtil.h" namespace fuzzer { +namespace { -const size_t Dictionary::kMaxDictSize; -static const size_t kMaxMutationsToPrint = 10; - -static void PrintASCII(const Word &W, const char *PrintAfter) { - PrintASCII(W.data(), W.size(), PrintAfter); -} - -MutationDispatcher::MutationDispatcher(Random &Rand, - const FuzzingOptions &Options) - : Rand(Rand), Options(Options) { - DefaultMutators.insert( - DefaultMutators.begin(), - { - {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"}, - {&MutationDispatcher::Mutate_InsertByte, "InsertByte"}, - {&MutationDispatcher::Mutate_InsertRepeatedBytes, - "InsertRepeatedBytes"}, - {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}, - {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}, - {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}, - {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}, - {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"}, - {&MutationDispatcher::Mutate_CopyPart, "CopyPart"}, - {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, - {&MutationDispatcher::Mutate_AddWordFromManualDictionary, - "ManualDict"}, - {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, - "PersAutoDict"}, - }); - if(Options.UseCmp) - DefaultMutators.push_back( - {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"}); - - if (EF->LLVMFuzzerCustomMutator) - Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"}); - else - Mutators = DefaultMutators; - - if (EF->LLVMFuzzerCustomCrossOver) - Mutators.push_back( - {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"}); -} - -static char RandCh(Random &Rand) { - if (Rand.RandBool()) - return static_cast(Rand(256)); - const char Special[] = "!*'();:@&=+$,/?%#[]012Az-`~.\xff\x00"; - return Special[Rand(sizeof(Special) - 1)]; -} - -size_t MutationDispatcher::Mutate_Custom(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (EF->__msan_unpoison) - EF->__msan_unpoison(Data, Size); - if (EF->__msan_unpoison_param) - EF->__msan_unpoison_param(4); - return EF->LLVMFuzzerCustomMutator(Data, Size, MaxSize, - Rand.Rand()); -} - -size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size == 0) - return 0; - if (!CrossOverWith) return 0; - const Unit &Other = *CrossOverWith; - if (Other.empty()) - return 0; - CustomCrossOverInPlaceHere.resize(MaxSize); - auto &U = CustomCrossOverInPlaceHere; - - if (EF->__msan_unpoison) { - EF->__msan_unpoison(Data, Size); - EF->__msan_unpoison(Other.data(), Other.size()); - EF->__msan_unpoison(U.data(), U.size()); - } - if (EF->__msan_unpoison_param) - EF->__msan_unpoison_param(7); - size_t NewSize = EF->LLVMFuzzerCustomCrossOver( - Data, Size, Other.data(), Other.size(), U.data(), U.size(), - Rand.Rand()); - - if (!NewSize) - return 0; - assert(NewSize <= MaxSize && "CustomCrossOver returned overisized unit"); - memcpy(Data, U.data(), NewSize); - return NewSize; -} - -size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize || Size == 0) return 0; - size_t ShuffleAmount = - Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size. - size_t ShuffleStart = Rand(Size - ShuffleAmount); - assert(ShuffleStart + ShuffleAmount <= Size); - std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand); - return Size; -} - -size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size <= 1) return 0; - size_t N = Rand(Size / 2) + 1; - assert(N < Size); - size_t Idx = Rand(Size - N + 1); - // Erase Data[Idx:Idx+N]. - memmove(Data + Idx, Data + Idx + N, Size - Idx - N); - // Printf("Erase: %zd %zd => %zd; Idx %zd\n", N, Size, Size - N, Idx); - return Size - N; +void FromTORC4(size_t Idx, uint32_t *A, uint32_t *B) { + const auto &X = TPC.TORC4.Get(Idx); + *A = X.A; + *B = X.B; } -size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size >= MaxSize) return 0; - size_t Idx = Rand(Size + 1); - // Insert new value at Data[Idx]. - memmove(Data + Idx + 1, Data + Idx, Size - Idx); - Data[Idx] = RandCh(Rand); - return Size + 1; +void FromTORC8(size_t Idx, uint64_t *A, uint64_t *B) { + const auto &X = TPC.TORC8.Get(Idx); + *A = X.A; + *B = X.B; } -size_t MutationDispatcher::Mutate_InsertRepeatedBytes(uint8_t *Data, - size_t Size, - size_t MaxSize) { - const size_t kMinBytesToInsert = 3; - if (Size + kMinBytesToInsert >= MaxSize) return 0; - size_t MaxBytesToInsert = std::min(MaxSize - Size, (size_t)128); - size_t N = Rand(MaxBytesToInsert - kMinBytesToInsert + 1) + kMinBytesToInsert; - assert(Size + N <= MaxSize && N); - size_t Idx = Rand(Size + 1); - // Insert new values at Data[Idx]. - memmove(Data + Idx + N, Data + Idx, Size - Idx); - // Give preference to 0x00 and 0xff. - uint8_t Byte = static_cast( - Rand.RandBool() ? Rand(256) : (Rand.RandBool() ? 0 : 255)); - for (size_t i = 0; i < N; i++) - Data[Idx + i] = Byte; - return Size + N; +void FromTORCW(size_t Idx, const uint8_t **DataA, size_t *SizeA, + const uint8_t **DataB, size_t *SizeB) { + const auto &X = TPC.TORCW.Get(Idx); + *DataA = X.A.data(); + *SizeA = X.A.size(); + *DataB = X.B.data(); + *SizeB = X.B.size(); } -size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t Idx = Rand(Size); - Data[Idx] = RandCh(Rand); - return Size; +void FromMMT(size_t Idx, const uint8_t **Data, size_t *Size) { + const auto &W = TPC.MMT.Get(Idx); + *Data = W.data(); + *Size = W.size(); } -size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t Idx = Rand(Size); - Data[Idx] ^= 1 << Rand(8); - return Size; +void PrintASCII(const Word &W, const char *PrintAfter) { + fuzzer::PrintASCII(W.data(), W.size(), PrintAfter); } -size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data, - size_t Size, - size_t MaxSize) { - return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize); -} - -size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size, - size_t MaxSize, - DictionaryEntry &DE) { - const Word &W = DE.GetW(); - bool UsePositionHint = DE.HasPositionHint() && - DE.GetPositionHint() + W.size() < Size && - Rand.RandBool(); - if (Rand.RandBool()) { // Insert W. - if (Size + W.size() > MaxSize) return 0; - size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1); - memmove(Data + Idx + W.size(), Data + Idx, Size - Idx); - memcpy(Data + Idx, W.data(), W.size()); - Size += W.size(); - } else { // Overwrite some bytes with W. - if (W.size() > Size) return 0; - size_t Idx = - UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1 - W.size()); - memcpy(Data + Idx, W.data(), W.size()); - } - return Size; -} - -// Somewhere in the past we have observed a comparison instructions -// with arguments Arg1 Arg2. This function tries to guess a dictionary -// entry that will satisfy that comparison. -// It first tries to find one of the arguments (possibly swapped) in the -// input and if it succeeds it creates a DE with a position hint. -// Otherwise it creates a DE with one of the arguments w/o a position hint. -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - const void *Arg1, const void *Arg2, - const void *Arg1Mutation, const void *Arg2Mutation, - size_t ArgSize, const uint8_t *Data, - size_t Size) { - bool HandleFirst = Rand.RandBool(); - const void *ExistingBytes, *DesiredBytes; - Word W; - const uint8_t *End = Data + Size; - for (int Arg = 0; Arg < 2; Arg++) { - ExistingBytes = HandleFirst ? Arg1 : Arg2; - DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation; - HandleFirst = !HandleFirst; - W.Set(reinterpret_cast(DesiredBytes), ArgSize); - const size_t kMaxNumPositions = 8; - size_t Positions[kMaxNumPositions]; - size_t NumPositions = 0; - for (const uint8_t *Cur = Data; - Cur < End && NumPositions < kMaxNumPositions; Cur++) { - Cur = - (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); - if (!Cur) break; - Positions[NumPositions++] = Cur - Data; - } - if (!NumPositions) continue; - return DictionaryEntry(W, Positions[Rand(NumPositions)]); - } - DictionaryEntry DE(W); - return DE; -} - - -template -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - T Arg1, T Arg2, const uint8_t *Data, size_t Size) { - if (Rand.RandBool()) Arg1 = Bswap(Arg1); - if (Rand.RandBool()) Arg2 = Bswap(Arg2); - T Arg1Mutation = static_cast(Arg1 + Rand(-1, 1)); - T Arg2Mutation = static_cast(Arg2 + Rand(-1, 1)); - return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation, - sizeof(Arg1), Data, Size); -} +} // namespace -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - const Word &Arg1, const Word &Arg2, const uint8_t *Data, size_t Size) { - return MakeDictionaryEntryFromCMP(Arg1.data(), Arg2.data(), Arg1.data(), - Arg2.data(), Arg1.size(), Data, Size); +void ConfigureMutagen(unsigned int Seed, const FuzzingOptions &Options, + LLVMMutagenConfiguration *OutConfig) { + memset(OutConfig, 0, sizeof(*OutConfig)); + OutConfig->Seed = Seed; + OutConfig->UseCmp = Options.UseCmp; + OutConfig->FromTORC4 = FromTORC4; + OutConfig->FromTORC8 = FromTORC8; + OutConfig->FromTORCW = FromTORCW; + OutConfig->UseMemmem = Options.UseMemmem; + OutConfig->FromMMT = FromMMT; + OutConfig->CustomMutator = EF->LLVMFuzzerCustomMutator; + OutConfig->CustomCrossOver = EF->LLVMFuzzerCustomCrossOver; + OutConfig->MSanUnpoison = EF->__msan_unpoison; + OutConfig->MSanUnpoisonParam = EF->__msan_unpoison_param; } -size_t MutationDispatcher::Mutate_AddWordFromTORC( - uint8_t *Data, size_t Size, size_t MaxSize) { - Word W; - DictionaryEntry DE; - switch (Rand(4)) { - case 0: { - auto X = TPC.TORC8.Get(Rand.Rand()); - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 1: { - auto X = TPC.TORC4.Get(Rand.Rand()); - if ((X.A >> 16) == 0 && (X.B >> 16) == 0 && Rand.RandBool()) - DE = MakeDictionaryEntryFromCMP((uint16_t)X.A, (uint16_t)X.B, Data, Size); - else - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 2: { - auto X = TPC.TORCW.Get(Rand.Rand()); - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 3: if (Options.UseMemmem) { - auto X = TPC.MMT.Get(Rand.Rand()); - DE = DictionaryEntry(X); - } break; - default: - assert(0); - } - if (!DE.GetW().size()) return 0; - Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); - if (!Size) return 0; - DictionaryEntry &DERef = - CmpDictionaryEntriesDeque[CmpDictionaryEntriesDequeIdx++ % - kCmpDictionaryEntriesDequeSize]; - DERef = DE; - CurrentDictionaryEntrySequence.push_back(&DERef); - return Size; -} - -size_t MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary( - uint8_t *Data, size_t Size, size_t MaxSize) { - return AddWordFromDictionary(PersistentAutoDictionary, Data, Size, MaxSize); -} - -size_t MutationDispatcher::AddWordFromDictionary(Dictionary &D, uint8_t *Data, - size_t Size, size_t MaxSize) { - if (Size > MaxSize) return 0; - if (D.empty()) return 0; - DictionaryEntry &DE = D[Rand(D.size())]; - Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); - if (!Size) return 0; - DE.IncUseCount(); - CurrentDictionaryEntrySequence.push_back(&DE); - return Size; -} - -// Overwrites part of To[0,ToSize) with a part of From[0,FromSize). -// Returns ToSize. -size_t MutationDispatcher::CopyPartOf(const uint8_t *From, size_t FromSize, - uint8_t *To, size_t ToSize) { - // Copy From[FromBeg, FromBeg + CopySize) into To[ToBeg, ToBeg + CopySize). - size_t ToBeg = Rand(ToSize); - size_t CopySize = Rand(ToSize - ToBeg) + 1; - assert(ToBeg + CopySize <= ToSize); - CopySize = std::min(CopySize, FromSize); - size_t FromBeg = Rand(FromSize - CopySize + 1); - assert(FromBeg + CopySize <= FromSize); - memmove(To + ToBeg, From + FromBeg, CopySize); - return ToSize; -} - -// Inserts part of From[0,ToSize) into To. -// Returns new size of To on success or 0 on failure. -size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize, - uint8_t *To, size_t ToSize, - size_t MaxToSize) { - if (ToSize >= MaxToSize) return 0; - size_t AvailableSpace = MaxToSize - ToSize; - size_t MaxCopySize = std::min(AvailableSpace, FromSize); - size_t CopySize = Rand(MaxCopySize) + 1; - size_t FromBeg = Rand(FromSize - CopySize + 1); - assert(FromBeg + CopySize <= FromSize); - size_t ToInsertPos = Rand(ToSize + 1); - assert(ToInsertPos + CopySize <= MaxToSize); - size_t TailSize = ToSize - ToInsertPos; - if (To == From) { - MutateInPlaceHere.resize(MaxToSize); - memcpy(MutateInPlaceHere.data(), From + FromBeg, CopySize); - memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); - memmove(To + ToInsertPos, MutateInPlaceHere.data(), CopySize); - } else { - memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); - memmove(To + ToInsertPos, From + FromBeg, CopySize); - } - return ToSize + CopySize; -} - -size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize || Size == 0) return 0; - // If Size == MaxSize, `InsertPartOf(...)` will - // fail so there's no point using it in this case. - if (Size == MaxSize || Rand.RandBool()) - return CopyPartOf(Data, Size, Data, Size); - else - return InsertPartOf(Data, Size, Data, Size, MaxSize); -} - -size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t B = Rand(Size); - while (B < Size && !isdigit(Data[B])) B++; - if (B == Size) return 0; - size_t E = B; - while (E < Size && isdigit(Data[E])) E++; - assert(B < E); - // now we have digits in [B, E). - // strtol and friends don't accept non-zero-teminated data, parse it manually. - uint64_t Val = Data[B] - '0'; - for (size_t i = B + 1; i < E; i++) - Val = Val * 10 + Data[i] - '0'; - - // Mutate the integer value. - switch(Rand(5)) { - case 0: Val++; break; - case 1: Val--; break; - case 2: Val /= 2; break; - case 3: Val *= 2; break; - case 4: Val = Rand(Val * Val); break; - default: assert(0); - } - // Just replace the bytes with the new ones, don't bother moving bytes. - for (size_t i = B; i < E; i++) { - size_t Idx = E + B - i - 1; - assert(Idx >= B && Idx < E); - Data[Idx] = (Val % 10) + '0'; - Val /= 10; - } - return Size; -} - -template -size_t ChangeBinaryInteger(uint8_t *Data, size_t Size, Random &Rand) { - if (Size < sizeof(T)) return 0; - size_t Off = Rand(Size - sizeof(T) + 1); - assert(Off + sizeof(T) <= Size); - T Val; - if (Off < 64 && !Rand(4)) { - Val = static_cast(Size); - if (Rand.RandBool()) - Val = Bswap(Val); - } else { - memcpy(&Val, Data + Off, sizeof(Val)); - T Add = static_cast(Rand(21)); - Add -= 10; - if (Rand.RandBool()) - Val = Bswap(T(Bswap(Val) + Add)); // Add assuming different endiannes. - else - Val = Val + Add; // Add assuming current endiannes. - if (Add == 0 || Rand.RandBool()) // Maybe negate. - Val = -Val; - } - memcpy(Data + Off, &Val, sizeof(Val)); - return Size; -} - -size_t MutationDispatcher::Mutate_ChangeBinaryInteger(uint8_t *Data, - size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - switch (Rand(4)) { - case 3: return ChangeBinaryInteger(Data, Size, Rand); - case 2: return ChangeBinaryInteger(Data, Size, Rand); - case 1: return ChangeBinaryInteger(Data, Size, Rand); - case 0: return ChangeBinaryInteger(Data, Size, Rand); - default: assert(0); - } - return 0; -} - -size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - if (Size == 0) return 0; - if (!CrossOverWith) return 0; - const Unit &O = *CrossOverWith; - if (O.empty()) return 0; - size_t NewSize = 0; - switch(Rand(3)) { - case 0: - MutateInPlaceHere.resize(MaxSize); - NewSize = CrossOver(Data, Size, O.data(), O.size(), - MutateInPlaceHere.data(), MaxSize); - memcpy(Data, MutateInPlaceHere.data(), NewSize); - break; - case 1: - NewSize = InsertPartOf(O.data(), O.size(), Data, Size, MaxSize); - if (!NewSize) - NewSize = CopyPartOf(O.data(), O.size(), Data, Size); - break; - case 2: - NewSize = CopyPartOf(O.data(), O.size(), Data, Size); - break; - default: assert(0); - } - assert(NewSize > 0 && "CrossOver returned empty unit"); - assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); - return NewSize; -} - -void MutationDispatcher::StartMutationSequence() { - CurrentMutatorSequence.clear(); - CurrentDictionaryEntrySequence.clear(); -} - -// Copy successful dictionary entries to PersistentAutoDictionary. -void MutationDispatcher::RecordSuccessfulMutationSequence() { - for (auto DE : CurrentDictionaryEntrySequence) { - // PersistentAutoDictionary.AddWithSuccessCountOne(DE); - DE->IncSuccessCount(); - assert(DE->GetW().size()); - // Linear search is fine here as this happens seldom. - if (!PersistentAutoDictionary.ContainsWord(DE->GetW())) - PersistentAutoDictionary.push_back(*DE); - } -} - -void MutationDispatcher::PrintRecommendedDictionary() { - Vector V; - for (auto &DE : PersistentAutoDictionary) - if (!ManualDictionary.ContainsWord(DE.GetW())) - V.push_back(DE); - if (V.empty()) return; +void PrintRecommendedDictionary(MutationDispatcher &MD) { + auto RecommendedDictionary = MD.RecommendDictionary(); + if (RecommendedDictionary.empty()) + return; Printf("###### Recommended dictionary. ######\n"); - for (auto &DE: V) { + for (auto &DE : RecommendedDictionary) { assert(DE.GetW().size()); Printf("\""); PrintASCII(DE.GetW(), "\""); @@ -500,97 +80,12 @@ void MutationDispatcher::PrintRecommendedDictionary() { Printf("###### End of recommended dictionary. ######\n"); } -void MutationDispatcher::PrintMutationSequence(bool Verbose) { - Printf("MS: %zd ", CurrentMutatorSequence.size()); - size_t EntriesToPrint = - Verbose ? CurrentMutatorSequence.size() - : std::min(kMaxMutationsToPrint, CurrentMutatorSequence.size()); - for (size_t i = 0; i < EntriesToPrint; i++) - Printf("%s-", CurrentMutatorSequence[i].Name); - if (!CurrentDictionaryEntrySequence.empty()) { - Printf(" DE: "); - EntriesToPrint = Verbose ? CurrentDictionaryEntrySequence.size() - : std::min(kMaxMutationsToPrint, - CurrentDictionaryEntrySequence.size()); - for (size_t i = 0; i < EntriesToPrint; i++) { - Printf("\""); - PrintASCII(CurrentDictionaryEntrySequence[i]->GetW(), "\"-"); - } - } -} - -std::string MutationDispatcher::MutationSequence() { - std::string MS; - for (auto M : CurrentMutatorSequence) { - MS += M.Name; - MS += "-"; - } - return MS; -} - -size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { - return MutateImpl(Data, Size, MaxSize, Mutators); -} - -size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size, - size_t MaxSize) { - return MutateImpl(Data, Size, MaxSize, DefaultMutators); -} - -// Mutates Data in place, returns new size. -size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, - size_t MaxSize, - Vector &Mutators) { - assert(MaxSize > 0); - // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), - // in which case they will return 0. - // Try several times before returning un-mutated data. - for (int Iter = 0; Iter < 100; Iter++) { - auto M = Mutators[Rand(Mutators.size())]; - size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); - if (NewSize && NewSize <= MaxSize) { - if (Options.OnlyASCII) - ToASCII(Data, NewSize); - CurrentMutatorSequence.push_back(M); - return NewSize; - } - } - *Data = ' '; - return 1; // Fallback, should not happen frequently. -} - -// Mask represents the set of Data bytes that are worth mutating. -size_t MutationDispatcher::MutateWithMask(uint8_t *Data, size_t Size, - size_t MaxSize, - const Vector &Mask) { - size_t MaskedSize = std::min(Size, Mask.size()); - // * Copy the worthy bytes into a temporary array T - // * Mutate T - // * Copy T back. - // This is totally unoptimized. - auto &T = MutateWithMaskTemp; - if (T.size() < Size) - T.resize(Size); - size_t OneBits = 0; - for (size_t I = 0; I < MaskedSize; I++) - if (Mask[I]) - T[OneBits++] = Data[I]; - - if (!OneBits) return 0; - assert(!T.empty()); - size_t NewSize = Mutate(T.data(), OneBits, OneBits); - assert(NewSize <= OneBits); - (void)NewSize; - // Even if NewSize < OneBits we still use all OneBits bytes. - for (size_t I = 0, J = 0; I < MaskedSize; I++) - if (Mask[I]) - Data[I] = T[J++]; - return Size; -} - -void MutationDispatcher::AddWordToManualDictionary(const Word &W) { - ManualDictionary.push_back( - {W, std::numeric_limits::max()}); +void PrintMutationSequence(MutationDispatcher &MD, bool Verbose) { + const auto &MS = MD.MutationSequence(); + const auto &DS = MD.DictionaryEntrySequence(); + Printf("MS: %zd %s", MS.size(), MS.GetString(Verbose).c_str()); + if (!DS.empty()) + Printf(" DE: %s", DS.GetString(Verbose).c_str()); } } // namespace fuzzer diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.h b/compiler-rt/lib/fuzzer/FuzzerMutate.h index fd37191156d3f..85e284ef571c1 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.h +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.h @@ -11,145 +11,23 @@ #ifndef LLVM_FUZZER_MUTATE_H #define LLVM_FUZZER_MUTATE_H -#include "FuzzerDefs.h" -#include "FuzzerDictionary.h" #include "FuzzerOptions.h" -#include "FuzzerRandom.h" +#include "mutagen/Mutagen.h" +#include "mutagen/MutagenDispatcher.h" namespace fuzzer { +namespace { -class MutationDispatcher { -public: - MutationDispatcher(Random &Rand, const FuzzingOptions &Options); - ~MutationDispatcher() {} - /// Indicate that we are about to start a new sequence of mutations. - void StartMutationSequence(); - /// Print the current sequence of mutations. Only prints the full sequence - /// when Verbose is true. - void PrintMutationSequence(bool Verbose = true); - /// Return the current sequence of mutations. - std::string MutationSequence(); - /// Indicate that the current sequence of mutations was successful. - void RecordSuccessfulMutationSequence(); - /// Mutates data by invoking user-provided mutator. - size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by invoking user-provided crossover. - size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by shuffling bytes. - size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by erasing bytes. - size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by inserting a byte. - size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by inserting several repeated bytes. - size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by changing one byte. - size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by changing one bit. - size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by copying/inserting a part of data into a different place. - size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize); +using mutagen::MutationDispatcher; - /// Mutates data by adding a word from the manual dictionary. - size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); +} // namespace - /// Mutates data by adding a word from the TORC. - size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize); +void ConfigureMutagen(unsigned int Seed, const FuzzingOptions &Options, + LLVMMutagenConfiguration *OutConfig); - /// Mutates data by adding a word from the persistent automatic dictionary. - size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); +void PrintRecommendedDictionary(MutationDispatcher &MD); - /// Tries to find an ASCII integer in Data, changes it to another ASCII int. - size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); - /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways. - size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize); - - /// CrossOver Data with CrossOverWith. - size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Applies one of the configured mutations. - /// Returns the new size of data which could be up to MaxSize. - size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Applies one of the configured mutations to the bytes of Data - /// that have '1' in Mask. - /// Mask.size() should be >= Size. - size_t MutateWithMask(uint8_t *Data, size_t Size, size_t MaxSize, - const Vector &Mask); - - /// Applies one of the default mutations. Provided as a service - /// to mutation authors. - size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Creates a cross-over of two pieces of Data, returns its size. - size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, - size_t Size2, uint8_t *Out, size_t MaxOutSize); - - void AddWordToManualDictionary(const Word &W); - - void PrintRecommendedDictionary(); - - void SetCrossOverWith(const Unit *U) { CrossOverWith = U; } - - Random &GetRand() { return Rand; } - - private: - struct Mutator { - size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); - const char *Name; - }; - - size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, - size_t MaxSize); - size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, - Vector &Mutators); - - size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, - size_t ToSize, size_t MaxToSize); - size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, - size_t ToSize); - size_t ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize, - DictionaryEntry &DE); - - template - DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2, - const uint8_t *Data, size_t Size); - DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2, - const uint8_t *Data, size_t Size); - DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2, - const void *Arg1Mutation, - const void *Arg2Mutation, - size_t ArgSize, - const uint8_t *Data, size_t Size); - - Random &Rand; - const FuzzingOptions Options; - - // Dictionary provided by the user via -dict=DICT_FILE. - Dictionary ManualDictionary; - // Persistent dictionary modified by the fuzzer, consists of - // entries that led to successful discoveries in the past mutations. - Dictionary PersistentAutoDictionary; - - Vector CurrentDictionaryEntrySequence; - - static const size_t kCmpDictionaryEntriesDequeSize = 16; - DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize]; - size_t CmpDictionaryEntriesDequeIdx = 0; - - const Unit *CrossOverWith = nullptr; - Vector MutateInPlaceHere; - Vector MutateWithMaskTemp; - // CustomCrossOver needs its own buffer as a custom implementation may call - // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere. - Vector CustomCrossOverInPlaceHere; - - Vector Mutators; - Vector DefaultMutators; - Vector CurrentMutatorSequence; -}; +void PrintMutationSequence(MutationDispatcher &MD, bool Verbose = true); } // namespace fuzzer diff --git a/compiler-rt/lib/fuzzer/FuzzerRandom.h b/compiler-rt/lib/fuzzer/FuzzerRandom.h index ad6c07eb5ef56..8256853a65bb5 100644 --- a/compiler-rt/lib/fuzzer/FuzzerRandom.h +++ b/compiler-rt/lib/fuzzer/FuzzerRandom.h @@ -11,6 +11,7 @@ #ifndef LLVM_FUZZER_RANDOM_H #define LLVM_FUZZER_RANDOM_H +#include #include namespace fuzzer { diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp index d808b9b00fa35..b613aef7b59f5 100644 --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp @@ -16,7 +16,6 @@ #include "FuzzerBuiltinsMsvc.h" #include "FuzzerCorpus.h" #include "FuzzerDefs.h" -#include "FuzzerDictionary.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" #include "FuzzerPlatform.h" diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.h b/compiler-rt/lib/fuzzer/FuzzerTracePC.h index a93732972f7d7..921a13f082ae3 100644 --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.h +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.h @@ -12,7 +12,7 @@ #define LLVM_FUZZER_TRACE_PC #include "FuzzerDefs.h" -#include "FuzzerDictionary.h" +#include "FuzzerUtil.h" #include "FuzzerValueBitMap.h" #include @@ -40,7 +40,7 @@ struct TableOfRecentCompares { Table[Idx].B = Arg2; } - Pair Get(size_t I) { return Table[I % kSize]; } + const Pair &Get(size_t I) { return Table[I % kSize]; } Pair Table[kSize]; }; diff --git a/compiler-rt/lib/fuzzer/FuzzerUtil.h b/compiler-rt/lib/fuzzer/FuzzerUtil.h index a188a7be32a53..285f56be8a767 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtil.h +++ b/compiler-rt/lib/fuzzer/FuzzerUtil.h @@ -47,6 +47,15 @@ void PrintMemoryProfile(); unsigned NumberOfCpuCores(); +// Parses one dictionary entry. +// If successful, write the enty to Unit and returns true, +// otherwise returns false. +bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); + +// Parses the dictionary file, fills Units, returns true iff all lines +// were parsed successfully. +bool ParseDictionaryFile(const std::string &Text, Vector *Units); + // Platform specific functions. void SetSignalHandler(const FuzzingOptions& Options); @@ -63,9 +72,6 @@ bool ExecuteCommand(const Command &Cmd, std::string *CmdOutput); FILE *OpenProcessPipe(const char *Command, const char *Mode); int CloseProcessPipe(FILE *F); -const void *SearchMemory(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen); - std::string CloneArgsWithoutX(const Vector &Args, const char *X1, const char *X2); diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp index 5034b4a28d3f5..e83baa62886c0 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp @@ -528,11 +528,6 @@ bool ExecuteCommand(const Command &BaseCmd, std::string *CmdOutput) { return Ret == 0; } -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - return memmem(Data, DataLen, Patt, PattLen); -} - // In fuchsia, accessing /dev/null is not supported. There's nothing // similar to a file that discards everything that is written to it. // The way of doing something similar in fuchsia is by using diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp index 0446d732a9ec8..5f0aa0190dd66 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp @@ -170,11 +170,6 @@ int CloseProcessPipe(FILE *F) { return pclose(F); } -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - return memmem(Data, DataLen, Patt, PattLen); -} - std::string DisassembleCmd(const std::string &FileName) { return "objdump -d " + FileName; } diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index 1a54bb569eca4..5deb5998fccb4 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -182,27 +182,6 @@ bool ExecuteCommand(const Command &Cmd, std::string *CmdOutput) { return _pclose(Pipe) == 0; } -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - // TODO: make this implementation more efficient. - const char *Cdata = (const char *)Data; - const char *Cpatt = (const char *)Patt; - - if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen) - return NULL; - - if (PattLen == 1) - return memchr(Data, *Cpatt, DataLen); - - const char *End = Cdata + DataLen - PattLen + 1; - - for (const char *It = Cdata; It < End; ++It) - if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0) - return It; - - return NULL; -} - std::string DisassembleCmd(const std::string &FileName) { Vector command_vector; command_vector.push_back("dumpbin /summary > nul"); diff --git a/compiler-rt/lib/fuzzer/build.sh b/compiler-rt/lib/fuzzer/build.sh index 504e54e3a819e..822b606041278 100755 --- a/compiler-rt/lib/fuzzer/build.sh +++ b/compiler-rt/lib/fuzzer/build.sh @@ -1,11 +1,11 @@ #!/bin/sh LIBFUZZER_SRC_DIR=$(dirname $0) +LIBMUTAGEN_SRC_DIR=$LIBFUZZER_SRC_DIR/mutagen CXX="${CXX:-clang}" -for f in $LIBFUZZER_SRC_DIR/*.cpp; do - $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c & +for f in $LIBFUZZER_SRC_DIR/*.cpp $LIBMUTAGEN_SRC_DIR/*.cpp; do + $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c -I$LIBFUZZER_SRC_DIR & done wait rm -f libFuzzer.a -ar ru libFuzzer.a Fuzzer*.o -rm -f Fuzzer*.o - +ar ru libFuzzer.a Fuzzer*.o Mutagen*.o +rm -f Fuzzer*.o Mutagen*.o diff --git a/compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt b/compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt new file mode 100644 index 0000000000000..1a8175ce6e6ec --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt @@ -0,0 +1,59 @@ +set(MUTAGEN_SOURCES + Mutagen.cpp + MutagenCrossOver.cpp + MutagenDispatcher.cpp + MutagenUtilPosix.cpp + MutagenUtilWindows.cpp) + +set(MUTAGEN_HEADERS + Mutagen.h + MutagenDictionary.h + MutagenDispatcher.h + MutagenUtil.h) + +# Expose the files in this library to libFuzzer for optimized, direct inclusion. +set(LIBFUZZER_MUTAGEN_SOURCES ${MUTAGEN_SOURCES} PARENT_SCOPE) +set(LIBFUZZER_MUTAGEN_HEADERS ${MUTAGEN_HEADERS} PARENT_SCOPE) + +# Reuse the following variables from libFuzzer: +# FUZZER_SUPPORTED_ARCH +# FUZZER_SUPPORTED_OS +# LIBFUZZER_CFLAGS +# LIBFUZZER_DEPS +include_directories(..) + +add_compiler_rt_component(mutagen) + +add_compiler_rt_object_libraries(RTmutagen + OS ${FUZZER_SUPPORTED_OS} + ARCHS ${FUZZER_SUPPORTED_ARCH} + SOURCES ${MUTAGEN_SOURCES} + ADDITIONAL_HEADERS ${MUTAGEN_HEADERS} + CFLAGS ${LIBFUZZER_CFLAGS} + DEPS ${LIBFUZZER_DEPS}) + +add_compiler_rt_runtime(clang_rt.mutagen + STATIC + OS ${FUZZER_SUPPORTED_OS} + ARCHS ${FUZZER_SUPPORTED_ARCH} + OBJECT_LIBS RTmutagen + CFLAGS ${LIBFUZZER_CFLAGS} + PARENT_TARGET mutagen) + +if(OS_NAME MATCHES "Linux|Fuchsia" AND + COMPILER_RT_LIBCXX_PATH AND + COMPILER_RT_LIBCXXABI_PATH) + foreach(arch ${FUZZER_SUPPORTED_ARCH}) + get_target_flags_for_arch(${arch} TARGET_CFLAGS) + set(LIBCXX_${arch}_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libcxx_mutagen_${arch}) + add_custom_libcxx(libcxx_mutagen_${arch} ${LIBCXX_${arch}_PREFIX} + CFLAGS ${TARGET_CFLAGS} + CMAKE_ARGS -DCMAKE_CXX_COMPILER_WORKS=ON + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DLIBCXXABI_ENABLE_EXCEPTIONS=OFF + -DLIBCXX_ABI_NAMESPACE=__Fuzzer) + target_compile_options(RTmutagen.${arch} PRIVATE -isystem ${LIBCXX_${arch}_PREFIX}/include/c++/v1) + add_dependencies(RTmutagen.${arch} libcxx_mutagen_${arch}-build) + partially_link_libcxx(mutagen ${LIBCXX_${arch}_PREFIX} ${arch}) + endforeach() +endif() diff --git a/compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp b/compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp new file mode 100644 index 0000000000000..8d5858191afeb --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp @@ -0,0 +1,100 @@ +//===- Mutagen.cpp - Interface header for the mutagen -----------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Define the interface between libMutagen and its consumers. +//===----------------------------------------------------------------------===// + +#include "Mutagen.h" +#include "FuzzerDefs.h" +#include "MutagenDispatcher.h" +#include +#include +#include + +namespace mutagen { +namespace { + +MutationDispatcher *MD = nullptr; + +} // namespace + +MutationDispatcher *GetMutationDispatcherForTest() { return MD; } + +} // namespace mutagen + +using fuzzer::Unit; +using mutagen::MD; +using mutagen::MutationDispatcher; +using mutagen::Word; + +extern "C" { + +ATTRIBUTE_INTERFACE void +LLVMMutagenConfigure(const LLVMMutagenConfiguration *Config) { + if (MD) + delete MD; + MD = new MutationDispatcher(Config); +} + +ATTRIBUTE_INTERFACE void LLVMMutagenResetSequence() { + MD->StartMutationSequence(); +} + +ATTRIBUTE_INTERFACE void LLVMMutagenSetCrossOverWith(const uint8_t *Data, + size_t Size) { + static Unit CrossOverWith; + Unit U(Data, Data + Size); + CrossOverWith = std::move(U); + MD->SetCrossOverWith(&CrossOverWith); +} + +ATTRIBUTE_INTERFACE size_t LLVMMutagenMutate(uint8_t *Data, size_t Size, + size_t Max) { + return MD->Mutate(Data, Size, Max); +} + +ATTRIBUTE_INTERFACE size_t LLVMMutagenDefaultMutate(uint8_t *Data, size_t Size, + size_t Max) { + return MD->DefaultMutate(Data, Size, Max); +} + +ATTRIBUTE_INTERFACE void LLVMMutagenRecordSequence() { + MD->RecordSuccessfulMutationSequence(); +} + +ATTRIBUTE_INTERFACE size_t LLVMMutagenGetMutationSequence(int Verbose, + char *Out, size_t Max, + size_t *OutNumItems) { + const auto &Seq = MD->MutationSequence(); + if (OutNumItems) + *OutNumItems = Seq.size(); + return snprintf(Out, Max, "%s", Seq.GetString(Verbose).c_str()); +} + +ATTRIBUTE_INTERFACE void LLVMMutagenAddWordToDictionary(const uint8_t *Data, + size_t Size) { + MD->AddWordToManualDictionary(Word(Data, std::min(Size, Word::GetMaxSize()))); +} + +ATTRIBUTE_INTERFACE size_t LLVMMutagenGetDictionaryEntrySequence( + int Verbose, char *Out, size_t Max, size_t *OutNumItems) { + const auto &Seq = MD->DictionaryEntrySequence(); + if (OutNumItems) + *OutNumItems = Seq.size(); + return snprintf(Out, Max, "%s", Seq.GetString(Verbose).c_str()); +} + +ATTRIBUTE_INTERFACE size_t LLVMMutagenRecommendDictionary() { + return MD->RecommendDictionary().size(); +} + +ATTRIBUTE_INTERFACE const char * +LLVMMutagenRecommendDictionaryEntry(size_t *OutUseCount) { + return MD->RecommendDictionaryEntry(OutUseCount); +} + +} // extern "C" diff --git a/compiler-rt/lib/fuzzer/mutagen/Mutagen.h b/compiler-rt/lib/fuzzer/mutagen/Mutagen.h new file mode 100644 index 0000000000000..757ee3e07d2cd --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/Mutagen.h @@ -0,0 +1,119 @@ +//===- Mutagen.h - Interface header for the mutagen -------------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Define the interface between libMutagen and its consumers. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_MUTAGEN_H +#define LLVM_FUZZER_MUTAGEN_H + +#include "FuzzerPlatform.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define MAX_WORD_SIZE 64 + +typedef struct { + // PRNG seed. + unsigned int Seed; + + // If non-zero, use CMP traces to guide mutations. Ignored if any of + // |FromTORC4|, |FromTORC8|, or |FromTORCW| are null. + int UseCmp; + void (*FromTORC4)(size_t Idx, uint32_t *Arg1, uint32_t *Arg2); + void (*FromTORC8)(size_t Idx, uint64_t *Arg1, uint64_t *Arg2); + void (*FromTORCW)(size_t Idx, const uint8_t **Data1, size_t *Size1, + const uint8_t **Data2, size_t *Size2); + + // If non-zero, use hints from intercepting memmem, strstr, etc. Ignored if + // |UseCmp| is zero or if |FromMMT| is null. + int UseMemmem; + void (*FromMMT)(size_t Idx, const uint8_t **Data, size_t *Size); + + // If non-zero, generate only ASCII (isprint+isspace) inputs. + int OnlyASCII; + + // Optional user-provided custom mutator. + size_t (*CustomMutator)(uint8_t *Data, size_t Size, size_t MaxSize, + unsigned int Seed); + + // Optional user-provided custom cross-over function. + size_t (*CustomCrossOver)(const uint8_t *Data1, size_t Size1, + const uint8_t *Data2, size_t Size2, uint8_t *Out, + size_t MaxOutSize, unsigned int Seed); + + // Optional MemorySanitizer callbacks. + void (*MSanUnpoison)(const volatile void *, size_t size); + void (*MSanUnpoisonParam)(size_t n); +} LLVMMutagenConfiguration; + +// Re-seeds the PRNG and sets mutator-related options. +ATTRIBUTE_INTERFACE void +LLVMMutagenConfigure(const LLVMMutagenConfiguration *config); + +// Writes the mutation sequence to |Out|, and returns the number of +// characters it wrote, or would have written given a large enough buffer, +// excluding the null terminator. Thus, a return value of |Max| or greater +// indicates the sequence was truncated (like snprintf). May truncate the +// sequence unless |Verbose| is non-zero. Sets |OutNumItems| to the number of +// items in the untruncated sequence. +ATTRIBUTE_INTERFACE size_t LLVMMutagenGetMutationSequence(int Verbose, + char *Out, size_t Max, + size_t *OutNumItems); + +// Writes the dictionary entry sequence to |Out|, and returns the number of +// characters it wrote, or would have written given a large enough buffer, +// excluding a null terminator. Thus, a return value of |Max| or greater +// indicates the sequence was truncated (like snprintf). May truncate the +// sequence unless |Verbose| is non-zero. Sets |OutNumItems| to the number of +// items in the untruncated sequence. +ATTRIBUTE_INTERFACE size_t LLVMMutagenGetDictionaryEntrySequence( + int Verbose, char *Out, size_t Max, size_t *OutNumItems); + +// Instructs the library to record the current mutation sequence as successful +// at increasing coverage. +ATTRIBUTE_INTERFACE void LLVMMutagenRecordSequence(); + +// Clears the mutation and dictionary entry sequences. +ATTRIBUTE_INTERFACE void LLVMMutagenResetSequence(); + +// Adds data used by various mutators to produce new inputs. +ATTRIBUTE_INTERFACE void LLVMMutagenSetCrossOverWith(const uint8_t *Data, + size_t Size); +ATTRIBUTE_INTERFACE void LLVMMutagenAddWordToDictionary(const uint8_t *Word, + size_t Size); + +// Mutates the contents of |Data| and returns the new size. +ATTRIBUTE_INTERFACE size_t LLVMMutagenMutate(uint8_t *Data, size_t Size, + size_t Max); + +// Like |LLVMMutagenMutate|, but never selects the custom mutators and is +// therefore suitable to be called from them. +ATTRIBUTE_INTERFACE size_t LLVMMutagenDefaultMutate(uint8_t *Data, size_t Size, + size_t Max); + +// Creates a recommended dictionary and returns its number of entries. The +// entries can be retrieved by subsequent calls to +// |LLVMMutagenRecommendDictionaryEntry|. +ATTRIBUTE_INTERFACE size_t LLVMMutagenRecommendDictionary(); + +// Returns the ASCII representation of the next recommended dictionary entry, +// or null if no entries remain (or |LLVMMutagenRecommendDictionary| wasn't +// called). If non-null, the return pointer is valid until the next call to this +// method, and if provided, |OutUseCount| is set to the entry's use count. +ATTRIBUTE_INTERFACE const char * +LLVMMutagenRecommendDictionaryEntry(size_t *OutUseCount); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // LLVM_FUZZER_MUTAGEN_H diff --git a/compiler-rt/lib/fuzzer/FuzzerCrossOver.cpp b/compiler-rt/lib/fuzzer/mutagen/MutagenCrossOver.cpp similarity index 86% rename from compiler-rt/lib/fuzzer/FuzzerCrossOver.cpp rename to compiler-rt/lib/fuzzer/mutagen/MutagenCrossOver.cpp index 83d9f8d47cb18..0fcffaf68ddf4 100644 --- a/compiler-rt/lib/fuzzer/FuzzerCrossOver.cpp +++ b/compiler-rt/lib/fuzzer/mutagen/MutagenCrossOver.cpp @@ -1,4 +1,4 @@ -//===- FuzzerCrossOver.cpp - Cross over two test inputs -------------------===// +//===- MutagenCrossOver.cpp - Cross over two test inputs ------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,12 +8,11 @@ // Cross over test inputs. //===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#include "FuzzerMutate.h" #include "FuzzerRandom.h" +#include "MutagenDispatcher.h" #include -namespace fuzzer { +namespace mutagen { // Cross Data1 and Data2, store the result (up to MaxOutSize bytes) in Out. size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1, @@ -40,12 +39,12 @@ size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1, (*InPos) += ExtraSize; } // Use the other input data on the next iteration. - InPos = CurrentlyUsingFirstData ? &Pos2 : &Pos1; + InPos = CurrentlyUsingFirstData ? &Pos2 : &Pos1; InSize = CurrentlyUsingFirstData ? Size2 : Size1; - Data = CurrentlyUsingFirstData ? Data2 : Data1; + Data = CurrentlyUsingFirstData ? Data2 : Data1; CurrentlyUsingFirstData = !CurrentlyUsingFirstData; } return OutPos; } -} // namespace fuzzer +} // namespace mutagen diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenDictionary.h b/compiler-rt/lib/fuzzer/mutagen/MutagenDictionary.h new file mode 100644 index 0000000000000..a665cabc316e7 --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/MutagenDictionary.h @@ -0,0 +1,85 @@ +//===- MutagenDictionary.h - Internal header for the mutagen ----*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// mutagen::Dictionary +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_MUTAGEN_DICTIONARY_H +#define LLVM_FUZZER_MUTAGEN_DICTIONARY_H + +#include "FuzzerDefs.h" +#include +#include +#include +#include +#include + +namespace mutagen { +namespace { + +using fuzzer::Word; + +} // namespace + +class DictionaryEntry { +public: + DictionaryEntry() {} + DictionaryEntry(Word W) : W(W) {} + DictionaryEntry(Word W, size_t PositionHint) + : W(W), PositionHint(PositionHint) {} + const Word &GetW() const { return W; } + + bool HasPositionHint() const { + return PositionHint != std::numeric_limits::max(); + } + size_t GetPositionHint() const { + assert(HasPositionHint()); + return PositionHint; + } + void IncUseCount() { UseCount++; } + void IncSuccessCount() { SuccessCount++; } + size_t GetUseCount() const { return UseCount; } + size_t GetSuccessCount() const { return SuccessCount; } + +private: + Word W; + size_t PositionHint = std::numeric_limits::max(); + size_t UseCount = 0; + size_t SuccessCount = 0; +}; + +class Dictionary { +public: + static const size_t kMaxDictSize = 1 << 14; + + bool ContainsWord(const Word &W) const { + return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { + return DE.GetW() == W; + }); + } + const DictionaryEntry *begin() const { return &DE[0]; } + const DictionaryEntry *end() const { return begin() + Size; } + DictionaryEntry &operator[](size_t Idx) { + assert(Idx < Size); + return DE[Idx]; + } + void push_back(DictionaryEntry DE) { + if (Size < kMaxDictSize) + this->DE[Size++] = DE; + } + void clear() { Size = 0; } + bool empty() const { return Size == 0; } + size_t size() const { return Size; } + +private: + DictionaryEntry DE[kMaxDictSize]; + size_t Size = 0; +}; + +} // namespace mutagen + +#endif // LLVM_FUZZER_MUTAGEN_DICTIONARY_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.cpp b/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.cpp new file mode 100644 index 0000000000000..32b5694cc6862 --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.cpp @@ -0,0 +1,659 @@ +//===- MutagenDispatcher.cpp - Mutate a test input ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Mutate a test input. +//===----------------------------------------------------------------------===// + +#include "MutagenDispatcher.h" +#include "FuzzerBuiltins.h" +#include "FuzzerBuiltinsMsvc.h" +#include "FuzzerPlatform.h" +#include "MutagenUtil.h" +#include +#include + +namespace mutagen { +namespace { + +using fuzzer::Bswap; + +std::string ToASCII(const uint8_t *Data, size_t Size) { + std::ostringstream OSS; + for (size_t i = 0; i < Size; i++) { + uint16_t Byte = Data[i]; + if (Byte == '\\') + OSS << "\\\\"; + else if (Byte == '"') + OSS << "\\\""; + else if (Byte >= 32 && Byte < 127) + OSS << static_cast(Byte); + else + OSS << "\\x" << std::hex << std::setw(2) << std::setfill('0') << Byte + << std::dec; + } + return OSS.str(); +} + +std::string ToASCII(const Word &W) { return ToASCII(W.data(), W.size()); } + +} // namespace + +void MutationDispatcher::SetConfig(const LLVMMutagenConfiguration *C) { + memcpy(&Config, C, sizeof(Config)); + if (!Config.FromTORC4 || !Config.FromTORC8 || !Config.FromTORCW) + Config.UseCmp = 0; + if (!Config.FromMMT) + Config.UseMemmem = 0; +} + +MutationDispatcher::MutationDispatcher(const LLVMMutagenConfiguration *config) + : Rand(config->Seed) { + SetConfig(config); + DefaultMutators.insert( + DefaultMutators.begin(), + { + {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"}, + {&MutationDispatcher::Mutate_InsertByte, "InsertByte"}, + {&MutationDispatcher::Mutate_InsertRepeatedBytes, + "InsertRepeatedBytes"}, + {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}, + {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}, + {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}, + {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}, + {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"}, + {&MutationDispatcher::Mutate_CopyPart, "CopyPart"}, + {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, + {&MutationDispatcher::Mutate_AddWordFromManualDictionary, + "ManualDict"}, + {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, + "PersAutoDict"}, + }); + if (Config.UseCmp) + DefaultMutators.push_back( + {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"}); + + if (Config.CustomMutator) + Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"}); + else + Mutators = DefaultMutators; + + if (Config.CustomCrossOver) + Mutators.push_back( + {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"}); +} + +static char RandCh(Random &Rand) { + if (Rand.RandBool()) + return static_cast(Rand(256)); + const char Special[] = "!*'();:@&=+$,/?%#[]012Az-`~.\xff\x00"; + return Special[Rand(sizeof(Special) - 1)]; +} + +size_t MutationDispatcher::Mutate_Custom(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Config.MSanUnpoison) + Config.MSanUnpoison(Data, Size); + if (Config.MSanUnpoisonParam) + Config.MSanUnpoisonParam(4); + return Config.CustomMutator(Data, Size, MaxSize, Rand.Rand()); +} + +size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size == 0) + return 0; + if (!CrossOverWith) + return 0; + const Unit &Other = *CrossOverWith; + if (Other.empty()) + return 0; + CustomCrossOverInPlaceHere.resize(MaxSize); + auto &U = CustomCrossOverInPlaceHere; + + if (Config.MSanUnpoison) { + Config.MSanUnpoison(Data, Size); + Config.MSanUnpoison(Other.data(), Other.size()); + Config.MSanUnpoison(U.data(), U.size()); + } + if (Config.MSanUnpoisonParam) + Config.MSanUnpoisonParam(7); + size_t NewSize = + Config.CustomCrossOver(Data, Size, Other.data(), Other.size(), U.data(), + U.size(), Rand.Rand()); + + if (!NewSize) + return 0; + assert(NewSize <= MaxSize && "CustomCrossOver returned overisized unit"); + memcpy(Data, U.data(), NewSize); + return NewSize; +} + +size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize || Size == 0) + return 0; + size_t ShuffleAmount = + Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size. + size_t ShuffleStart = Rand(Size - ShuffleAmount); + assert(ShuffleStart + ShuffleAmount <= Size); + std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand); + return Size; +} + +size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size <= 1) + return 0; + size_t N = Rand(Size / 2) + 1; + assert(N < Size); + size_t Idx = Rand(Size - N + 1); + // Erase Data[Idx:Idx+N]. + memmove(Data + Idx, Data + Idx + N, Size - Idx - N); + // Printf("Erase: %zd %zd => %zd; Idx %zd\n", N, Size, Size - N, Idx); + return Size - N; +} + +size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size >= MaxSize) + return 0; + size_t Idx = Rand(Size + 1); + // Insert new value at Data[Idx]. + memmove(Data + Idx + 1, Data + Idx, Size - Idx); + Data[Idx] = RandCh(Rand); + return Size + 1; +} + +size_t MutationDispatcher::Mutate_InsertRepeatedBytes(uint8_t *Data, + size_t Size, + size_t MaxSize) { + const size_t kMinBytesToInsert = 3; + if (Size + kMinBytesToInsert >= MaxSize) + return 0; + size_t MaxBytesToInsert = std::min(MaxSize - Size, (size_t)128); + size_t N = Rand(MaxBytesToInsert - kMinBytesToInsert + 1) + kMinBytesToInsert; + assert(Size + N <= MaxSize && N); + size_t Idx = Rand(Size + 1); + // Insert new values at Data[Idx]. + memmove(Data + Idx + N, Data + Idx, Size - Idx); + // Give preference to 0x00 and 0xff. + uint8_t Byte = static_cast( + Rand.RandBool() ? Rand(256) : (Rand.RandBool() ? 0 : 255)); + for (size_t i = 0; i < N; i++) + Data[Idx + i] = Byte; + return Size + N; +} + +size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) + return 0; + size_t Idx = Rand(Size); + Data[Idx] = RandCh(Rand); + return Size; +} + +size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) + return 0; + size_t Idx = Rand(Size); + Data[Idx] ^= 1 << Rand(8); + return Size; +} + +size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data, + size_t Size, + size_t MaxSize) { + return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize); +} + +size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size, + size_t MaxSize, + DictionaryEntry &DE) { + const Word &W = DE.GetW(); + bool UsePositionHint = DE.HasPositionHint() && + DE.GetPositionHint() + W.size() < Size && + Rand.RandBool(); + if (Rand.RandBool()) { // Insert W. + if (Size + W.size() > MaxSize) + return 0; + size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1); + memmove(Data + Idx + W.size(), Data + Idx, Size - Idx); + memcpy(Data + Idx, W.data(), W.size()); + Size += W.size(); + } else { // Overwrite some bytes with W. + if (W.size() > Size) + return 0; + size_t Idx = + UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1 - W.size()); + memcpy(Data + Idx, W.data(), W.size()); + } + return Size; +} + +// Somewhere in the past we have observed a comparison instructions +// with arguments Arg1 Arg2. This function tries to guess a dictionary +// entry that will satisfy that comparison. +// It first tries to find one of the arguments (possibly swapped) in the +// input and if it succeeds it creates a DE with a position hint. +// Otherwise it creates a DE with one of the arguments w/o a position hint. +DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( + const void *Arg1, const void *Arg2, const void *Arg1Mutation, + const void *Arg2Mutation, size_t ArgSize, const uint8_t *Data, + size_t Size) { + bool HandleFirst = Rand.RandBool(); + const void *ExistingBytes, *DesiredBytes; + Word W; + const uint8_t *End = Data + Size; + for (int Arg = 0; Arg < 2; Arg++) { + ExistingBytes = HandleFirst ? Arg1 : Arg2; + DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation; + HandleFirst = !HandleFirst; + W.Set(reinterpret_cast(DesiredBytes), ArgSize); + const size_t kMaxNumPositions = 8; + size_t Positions[kMaxNumPositions]; + size_t NumPositions = 0; + for (const uint8_t *Cur = Data; + Cur < End && NumPositions < kMaxNumPositions; Cur++) { + Cur = + (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); + if (!Cur) + break; + Positions[NumPositions++] = Cur - Data; + } + if (!NumPositions) + continue; + return DictionaryEntry(W, Positions[Rand(NumPositions)]); + } + DictionaryEntry DE(W); + return DE; +} + +template +DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( + T Arg1, T Arg2, const uint8_t *Data, size_t Size) { + if (Rand.RandBool()) + Arg1 = Bswap(Arg1); + if (Rand.RandBool()) + Arg2 = Bswap(Arg2); + T Arg1Mutation = static_cast(Arg1 + Rand(-1, 1)); + T Arg2Mutation = static_cast(Arg2 + Rand(-1, 1)); + return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation, + sizeof(Arg1), Data, Size); +} + +size_t MutationDispatcher::Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, + size_t MaxSize) { + Word W; + DictionaryEntry DE; + switch (Rand(4)) { + case 0: { + uint64_t A, B; + Config.FromTORC8(Rand.Rand(), &A, &B); + DE = MakeDictionaryEntryFromCMP(A, B, Data, Size); + } break; + case 1: { + uint32_t A, B; + Config.FromTORC4(Rand.Rand(), &A, &B); + if ((A >> 16) == 0 && (B >> 16) == 0 && Rand.RandBool()) + DE = MakeDictionaryEntryFromCMP((uint16_t)A, (uint16_t)B, Data, Size); + else + DE = MakeDictionaryEntryFromCMP(A, B, Data, Size); + } break; + case 2: { + const uint8_t *DataA, *DataB; + size_t SizeA, SizeB; + Config.FromTORCW(Rand.Rand(), &DataA, &SizeA, &DataB, &SizeB); + DE = MakeDictionaryEntryFromCMP(DataA, DataB, DataA, DataB, SizeA, Data, + Size); + } break; + case 3: + if (Config.UseMemmem) { + const uint8_t *DataW; + size_t SizeW; + Config.FromMMT(Rand.Rand(), &DataW, &SizeW); + DE = DictionaryEntry(Word(DataW, SizeW)); + } + break; + default: + assert(0); + } + if (!DE.GetW().size()) + return 0; + Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); + if (!Size) + return 0; + DictionaryEntry &DERef = + CmpDictionaryEntriesDeque[CmpDictionaryEntriesDequeIdx++ % + kCmpDictionaryEntriesDequeSize]; + DERef = DE; + CurrentDictionaryEntrySequence.push_back(&DERef); + return Size; +} + +size_t MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary( + uint8_t *Data, size_t Size, size_t MaxSize) { + return AddWordFromDictionary(PersistentAutoDictionary, Data, Size, MaxSize); +} + +size_t MutationDispatcher::AddWordFromDictionary(Dictionary &D, uint8_t *Data, + size_t Size, size_t MaxSize) { + if (Size > MaxSize) + return 0; + if (D.empty()) + return 0; + DictionaryEntry &DE = D[Rand(D.size())]; + Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); + if (!Size) + return 0; + DE.IncUseCount(); + CurrentDictionaryEntrySequence.push_back(&DE); + return Size; +} + +// Overwrites part of To[0,ToSize) with a part of From[0,FromSize). +// Returns ToSize. +size_t MutationDispatcher::CopyPartOf(const uint8_t *From, size_t FromSize, + uint8_t *To, size_t ToSize) { + // Copy From[FromBeg, FromBeg + CopySize) into To[ToBeg, ToBeg + CopySize). + size_t ToBeg = Rand(ToSize); + size_t CopySize = Rand(ToSize - ToBeg) + 1; + assert(ToBeg + CopySize <= ToSize); + CopySize = std::min(CopySize, FromSize); + size_t FromBeg = Rand(FromSize - CopySize + 1); + assert(FromBeg + CopySize <= FromSize); + memmove(To + ToBeg, From + FromBeg, CopySize); + return ToSize; +} + +// Inserts part of From[0,ToSize) into To. +// Returns new size of To on success or 0 on failure. +size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize, + uint8_t *To, size_t ToSize, + size_t MaxToSize) { + if (ToSize >= MaxToSize) + return 0; + size_t AvailableSpace = MaxToSize - ToSize; + size_t MaxCopySize = std::min(AvailableSpace, FromSize); + size_t CopySize = Rand(MaxCopySize) + 1; + size_t FromBeg = Rand(FromSize - CopySize + 1); + assert(FromBeg + CopySize <= FromSize); + size_t ToInsertPos = Rand(ToSize + 1); + assert(ToInsertPos + CopySize <= MaxToSize); + size_t TailSize = ToSize - ToInsertPos; + if (To == From) { + MutateInPlaceHere.resize(MaxToSize); + memcpy(MutateInPlaceHere.data(), From + FromBeg, CopySize); + memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); + memmove(To + ToInsertPos, MutateInPlaceHere.data(), CopySize); + } else { + memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); + memmove(To + ToInsertPos, From + FromBeg, CopySize); + } + return ToSize + CopySize; +} + +size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize || Size == 0) + return 0; + // If Size == MaxSize, `InsertPartOf(...)` will + // fail so there's no point using it in this case. + if (Size == MaxSize || Rand.RandBool()) + return CopyPartOf(Data, Size, Data, Size); + else + return InsertPartOf(Data, Size, Data, Size, MaxSize); +} + +size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) + return 0; + size_t B = Rand(Size); + while (B < Size && !isdigit(Data[B])) + B++; + if (B == Size) + return 0; + size_t E = B; + while (E < Size && isdigit(Data[E])) + E++; + assert(B < E); + // now we have digits in [B, E). + // strtol and friends don't accept non-zero-teminated data, parse it manually. + uint64_t Val = Data[B] - '0'; + for (size_t i = B + 1; i < E; i++) + Val = Val * 10 + Data[i] - '0'; + + // Mutate the integer value. + switch (Rand(5)) { + case 0: + Val++; + break; + case 1: + Val--; + break; + case 2: + Val /= 2; + break; + case 3: + Val *= 2; + break; + case 4: + Val = Rand(Val * Val); + break; + default: + assert(0); + } + // Just replace the bytes with the new ones, don't bother moving bytes. + for (size_t i = B; i < E; i++) { + size_t Idx = E + B - i - 1; + assert(Idx >= B && Idx < E); + Data[Idx] = (Val % 10) + '0'; + Val /= 10; + } + return Size; +} + +template +size_t ChangeBinaryInteger(uint8_t *Data, size_t Size, Random &Rand) { + if (Size < sizeof(T)) + return 0; + size_t Off = Rand(Size - sizeof(T) + 1); + assert(Off + sizeof(T) <= Size); + T Val; + if (Off < 64 && !Rand(4)) { + Val = static_cast(Size); + if (Rand.RandBool()) + Val = Bswap(Val); + } else { + memcpy(&Val, Data + Off, sizeof(Val)); + T Add = static_cast(Rand(21)); + Add -= 10; + if (Rand.RandBool()) + Val = Bswap(T(Bswap(Val) + Add)); // Add assuming different endiannes. + else + Val = Val + Add; // Add assuming current endiannes. + if (Add == 0 || Rand.RandBool()) // Maybe negate. + Val = -Val; + } + memcpy(Data + Off, &Val, sizeof(Val)); + return Size; +} + +size_t MutationDispatcher::Mutate_ChangeBinaryInteger(uint8_t *Data, + size_t Size, + size_t MaxSize) { + if (Size > MaxSize) + return 0; + switch (Rand(4)) { + case 3: + return ChangeBinaryInteger(Data, Size, Rand); + case 2: + return ChangeBinaryInteger(Data, Size, Rand); + case 1: + return ChangeBinaryInteger(Data, Size, Rand); + case 0: + return ChangeBinaryInteger(Data, Size, Rand); + default: + assert(0); + } + return 0; +} + +size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) + return 0; + if (Size == 0) + return 0; + if (!CrossOverWith) + return 0; + const Unit &O = *CrossOverWith; + if (O.empty()) + return 0; + size_t NewSize = 0; + switch (Rand(3)) { + case 0: + MutateInPlaceHere.resize(MaxSize); + NewSize = CrossOver(Data, Size, O.data(), O.size(), + MutateInPlaceHere.data(), MaxSize); + memcpy(Data, MutateInPlaceHere.data(), NewSize); + break; + case 1: + NewSize = InsertPartOf(O.data(), O.size(), Data, Size, MaxSize); + if (!NewSize) + NewSize = CopyPartOf(O.data(), O.size(), Data, Size); + break; + case 2: + NewSize = CopyPartOf(O.data(), O.size(), Data, Size); + break; + default: + assert(0); + } + assert(NewSize > 0 && "CrossOver returned empty unit"); + assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); + return NewSize; +} + +void MutationDispatcher::StartMutationSequence() { + CurrentMutatorSequence.clear(); + CurrentDictionaryEntrySequence.clear(); +} + +// Copy successful dictionary entries to PersistentAutoDictionary. +void MutationDispatcher::RecordSuccessfulMutationSequence() { + for (auto *DE : CurrentDictionaryEntrySequence) { + // PersistentAutoDictionary.AddWithSuccessCountOne(DE); + DE->IncSuccessCount(); + assert(DE->GetW().size()); + // Linear search is fine here as this happens seldom. + if (!PersistentAutoDictionary.ContainsWord(DE->GetW())) + PersistentAutoDictionary.push_back(*DE); + } +} + +const Dictionary &MutationDispatcher::RecommendDictionary() { + RecommendedDictionary.clear(); + for (auto &DE : PersistentAutoDictionary) + if (!ManualDictionary.ContainsWord(DE.GetW())) + RecommendedDictionary.push_back(DE); + NextRecommendedDictionaryEntry = 0; + return RecommendedDictionary; +} + +const char *MutationDispatcher::RecommendDictionaryEntry(size_t *UseCount) { + if (NextRecommendedDictionaryEntry >= RecommendedDictionary.size()) + return nullptr; + auto &DE = RecommendedDictionary[NextRecommendedDictionaryEntry++]; + assert(DE.GetW().size()); + DictionaryEntryWord = ToASCII(DE.GetW()); + if (UseCount) + *UseCount = DE.GetUseCount(); + return DictionaryEntryWord.c_str(); +} + +const Sequence & +MutationDispatcher::MutationSequence() { + CurrentMutatorSequence.SetString([](Mutator M) { return M.Name; }); + return CurrentMutatorSequence; +} + +const Sequence & +MutationDispatcher::DictionaryEntrySequence() { + CurrentDictionaryEntrySequence.SetString([](DictionaryEntry *DE) { + return std::string("\"") + ToASCII(DE->GetW()) + std::string("\""); + }); + return CurrentDictionaryEntrySequence; +} + +size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { + return MutateImpl(Data, Size, MaxSize, Mutators); +} + +size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size, + size_t MaxSize) { + return MutateImpl(Data, Size, MaxSize, DefaultMutators); +} + +// Mutates Data in place, returns new size. +size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, + size_t MaxSize, + Vector &Mutators) { + assert(MaxSize > 0); + // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), + // in which case they will return 0. + // Try several times before returning un-mutated data. + for (int Iter = 0; Iter < 100; Iter++) { + auto M = Mutators[Rand(Mutators.size())]; + size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); + if (NewSize && NewSize <= MaxSize) { + if (Config.OnlyASCII) + ToASCII(Data, NewSize); + CurrentMutatorSequence.push_back(M); + return NewSize; + } + } + *Data = ' '; + return 1; // Fallback, should not happen frequently. +} + +// Mask represents the set of Data bytes that are worth mutating. +size_t MutationDispatcher::MutateWithMask(uint8_t *Data, size_t Size, + size_t MaxSize, + const Vector &Mask) { + size_t MaskedSize = std::min(Size, Mask.size()); + // * Copy the worthy bytes into a temporary array T + // * Mutate T + // * Copy T back. + // This is totally unoptimized. + auto &T = MutateWithMaskTemp; + if (T.size() < Size) + T.resize(Size); + size_t OneBits = 0; + for (size_t I = 0; I < MaskedSize; I++) + if (Mask[I]) + T[OneBits++] = Data[I]; + + if (!OneBits) + return 0; + assert(!T.empty()); + size_t NewSize = Mutate(T.data(), OneBits, OneBits); + assert(NewSize <= OneBits); + (void)NewSize; + // Even if NewSize < OneBits we still use all OneBits bytes. + for (size_t I = 0, J = 0; I < MaskedSize; I++) + if (Mask[I]) + Data[I] = T[J++]; + return Size; +} + +void MutationDispatcher::AddWordToManualDictionary(const Word &W) { + ManualDictionary.push_back({W, std::numeric_limits::max()}); +} + +} // namespace mutagen diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.h b/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.h new file mode 100644 index 0000000000000..c5c43d5c346fe --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.h @@ -0,0 +1,190 @@ +//===- MutagenDispatcher.h - Internal header for the mutagen ----*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// mutagen::MutationDispatcher +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_MUTAGEN_DISPATCHER_H +#define LLVM_FUZZER_MUTAGEN_DISPATCHER_H + +#include "FuzzerRandom.h" +#include "Mutagen.h" +#include "MutagenDictionary.h" +#include "MutagenSequence.h" +#include +#include +#include + +namespace mutagen { +namespace { + +using fuzzer::Random; +using fuzzer::Unit; +using fuzzer::Vector; +using fuzzer::Word; + +} // namespace + +class MutationDispatcher final { +public: + struct Mutator { + size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); + const char *Name; + }; + + explicit MutationDispatcher(const LLVMMutagenConfiguration *Config); + ~MutationDispatcher() = default; + + /// Indicate that we are about to start a new sequence of mutations. + void StartMutationSequence(); + /// Returns the current sequence of mutations. May truncate the sequence + /// unless Verbose is true. Sets |OutSize| to the length of the untrancated + /// sequence, if provided. + const Sequence &MutationSequence(); + /// Returns the current sequence of dictionary entries. May truncate the + /// sequence unless Verbose is true. Sets |OutSize| to the length of the + /// untrancated sequence, if provided. + const Sequence &DictionaryEntrySequence(); + /// Indicate that the current sequence of mutations was successful. + void RecordSuccessfulMutationSequence(); + /// Mutates data by invoking user-provided mutator. + size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by invoking user-provided crossover. + size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by shuffling bytes. + size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by erasing bytes. + size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by inserting a byte. + size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by inserting several repeated bytes. + size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by changing one byte. + size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by changing one bit. + size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by copying/inserting a part of data into a different place. + size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Mutates data by adding a word from the manual dictionary. + size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); + + /// Mutates data by adding a word from the TORC. + size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Mutates data by adding a word from the persistent automatic dictionary. + size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); + + /// Tries to find an ASCII integer in Data, changes it to another ASCII int. + size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); + /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways. + size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize); + + /// CrossOver Data with CrossOverWith. + size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + + size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, + size_t MaxSize); + size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, + Vector &Mutators); + + size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, + size_t ToSize, size_t MaxToSize); + size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, + size_t ToSize); + size_t ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize, + DictionaryEntry &DE); + + template + DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2, + const uint8_t *Data, size_t Size); + DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2, + const uint8_t *Data, size_t Size); + DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2, + const void *Arg1Mutation, + const void *Arg2Mutation, + size_t ArgSize, + const uint8_t *Data, size_t Size); + + /// Applies one of the configured mutations. + /// Returns the new size of data which could be up to MaxSize. + size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Applies one of the configured mutations to the bytes of Data + /// that have '1' in Mask. + /// Mask.size() should be >= Size. + size_t MutateWithMask(uint8_t *Data, size_t Size, size_t MaxSize, + const Vector &Mask); + + /// Applies one of the default mutations. Provided as a service + /// to mutation authors. + size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Creates a cross-over of two pieces of Data, returns its size. + size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, + size_t Size2, uint8_t *Out, size_t MaxOutSize); + + void AddWordToManualDictionary(const Word &W); + + // Creates a recommended dictionary and returns its number of entries. The + // entries can be retrieved by subsequent calls to + // |LLVMMutagenRecommendDictionaryEntry|. + const Dictionary &RecommendDictionary(); + + // Returns the ASCII representation of the next recommended dictionary entry, + // and sets |OutUseCount| to its use count. The return pointer is valid until + // the next call to this method. + const char *RecommendDictionaryEntry(size_t *OutUseCount); + + void SetCrossOverWith(const Unit *U) { CrossOverWith = U; } + + Random &GetRand() { return Rand; } + +private: + // Imports and validates the disptacher's configuration. + void SetConfig(const LLVMMutagenConfiguration *Config); + + Random Rand; + LLVMMutagenConfiguration Config; + + // Dictionary provided by the user via -dict=DICT_FILE. + Dictionary ManualDictionary; + // Persistent dictionary modified by the fuzzer, consists of + // entries that led to successful discoveries in the past mutations. + Dictionary PersistentAutoDictionary; + // Recommended dictionary buolt by |RecommendDictionary|. + Dictionary RecommendedDictionary; + size_t NextRecommendedDictionaryEntry = 0; + std::string DictionaryEntryWord; + + Sequence CurrentDictionaryEntrySequence; + + static const size_t kCmpDictionaryEntriesDequeSize = 16; + DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize]; + size_t CmpDictionaryEntriesDequeIdx = 0; + + const Unit *CrossOverWith = nullptr; + Vector MutateInPlaceHere; + Vector MutateWithMaskTemp; + // CustomCrossOver needs its own buffer as a custom implementation may call + // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere. + Vector CustomCrossOverInPlaceHere; + + Vector Mutators; + Vector DefaultMutators; + Sequence CurrentMutatorSequence; +}; + +// Returns a pointer to the MutationDispatcher is use by MutagenInterface. +// This should only be used for testing. +MutationDispatcher *GetMutationDispatcherForTest(); + +} // namespace mutagen + +#endif // LLVM_FUZZER_MUTAGEN_DISPATCHER_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenSequence.h b/compiler-rt/lib/fuzzer/mutagen/MutagenSequence.h new file mode 100644 index 0000000000000..fd0ab2cb5f0f9 --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/MutagenSequence.h @@ -0,0 +1,101 @@ +//===- MutagenSequence.h - Internal header for the mutagen ------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// mutagen::Sequence +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_MUTAGEN_SEQUENCE_H +#define LLVM_FUZZER_MUTAGEN_SEQUENCE_H + +#include "FuzzerDefs.h" +#include +#include + +namespace mutagen { +namespace { + +using fuzzer::Vector; + +} // namespace + +// The Sequence type bundles together a list of items, a string representation, +// and a position in that string suitable for truncating it when overly long, +// e.g. after the tenth item. +template class Sequence { +public: + constexpr static size_t kMaxBriefItems = 10; + + void clear() { + Items.clear(); + Size = 0; + Str.clear(); + Brief = 0; + } + + bool empty() const { return Size == 0; } + + size_t size() const { return Size; } + + void push_back(T t) { Items.push_back(t); } + + typename Vector::const_iterator begin() const { return Items.begin(); } + typename Vector::iterator begin() { return Items.begin(); } + + typename Vector::const_iterator end() const { return Items.end(); } + typename Vector::iterator end() { return Items.end(); } + + std::string GetString(bool Verbose = true) const { + return Verbose ? Str : Str.substr(0, Brief); + } + + // Constructs the string representation of the sequence, using a callback that + // converts items to strings. + template + // std::string ItemCallback(T Item); + void SetString(ItemCallback ConvertToASCII) { + // No change since last call. + if (Size == Items.size()) + return; + Size = Items.size(); + std::ostringstream OSS; + size_t i = 0; + for (; i < Size && i < kMaxBriefItems; i++) + OSS << ConvertToASCII(Items[i]) << "-"; + Brief = static_cast(OSS.tellp()); + for (; i < Size; i++) + OSS << ConvertToASCII(Items[i]) << "-"; + Str = OSS.str(); + } + +private: + Vector Items; + size_t Size = 0; + std::string Str; + size_t Brief = 0; +}; + +template +typename Vector::const_iterator begin(const Sequence &S) { + return S.begin(); +} + +template typename Vector::iterator begin(Sequence &S) { + return S.begin(); +} + +template +typename Vector::const_iterator end(const Sequence &S) { + return S.end(); +} + +template typename Vector::iterator end(Sequence &S) { + return S.end(); +} + +} // namespace mutagen + +#endif // LLVM_FUZZER_MUTAGEN_SEQUENCE_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h b/compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h new file mode 100644 index 0000000000000..cf3b78b9655af --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h @@ -0,0 +1,24 @@ +//===- MutagenUtil.h - Internal header for the mutagen Utils ----*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Util functions. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_MUTAGEN_UTIL_H +#define LLVM_FUZZER_MUTAGEN_UTIL_H + +#include +#include + +namespace mutagen { + +const void *SearchMemory(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen); + +} // namespace mutagen + +#endif // LLVM_FUZZER_MUTAGEN_UTIL_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenUtilPosix.cpp b/compiler-rt/lib/fuzzer/mutagen/MutagenUtilPosix.cpp new file mode 100644 index 0000000000000..c157c6190c5d7 --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/MutagenUtilPosix.cpp @@ -0,0 +1,23 @@ +//===- MutagenUtilPosix.cpp - Misc utils for Posix. -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Misc utils implementation using Posix API. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if (LIBFUZZER_POSIX || LIBFUZZER_FUCHSIA) +#include + +namespace mutagen { + +const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, + size_t PattLen) { + return memmem(Data, DataLen, Patt, PattLen); +} + +} // namespace mutagen + +#endif // (LIBFUZZER_POSIX || LIBFUZZER_FUCHSIA) diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenUtilWindows.cpp b/compiler-rt/lib/fuzzer/mutagen/MutagenUtilWindows.cpp new file mode 100644 index 0000000000000..93b86556b1393 --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/MutagenUtilWindows.cpp @@ -0,0 +1,41 @@ +//===- MutagenUtilWindows.cpp - Misc utils for Windows. -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Misc utils implementation for Windows. +//===----------------------------------------------------------------------===// +#include "FuzzerPlatform.h" +#if LIBFUZZER_WINDOWS +#include +#include +#include + +namespace mutagen { + +const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, + size_t PattLen) { + // TODO: make this implementation more efficient. + const char *Cdata = (const char *)Data; + const char *Cpatt = (const char *)Patt; + + if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen) + return NULL; + + if (PattLen == 1) + return memchr(Data, *Cpatt, DataLen); + + const char *End = Cdata + DataLen - PattLen + 1; + + for (const char *It = Cdata; It < End; ++It) + if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0) + return It; + + return NULL; +} + +} // namespace mutagen + +#endif // LIBFUZZER_WINDOWS diff --git a/compiler-rt/lib/fuzzer/mutagen/build.sh b/compiler-rt/lib/fuzzer/mutagen/build.sh new file mode 100755 index 0000000000000..19c22b8fc5562 --- /dev/null +++ b/compiler-rt/lib/fuzzer/mutagen/build.sh @@ -0,0 +1,12 @@ +#!/bin/sh +LIBMUTAGEN_SRC_DIR=$(dirname $0) +LIBFUZZER_SRC_DIR=$LIBMUTAGEN_SRC_DIR/.. +CXX="${CXX:-clang}" +for f in $LIBMUTAGEN_SRC_DIR/*.cpp; do + $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c -I$LIBFUZZER_SRC_DIR & +done +wait +rm -f libMutagen.a +ar ru libMutagen.a Mutagen*.o +rm -f Mutagen*.o + diff --git a/compiler-rt/lib/fuzzer/tests/CMakeLists.txt b/compiler-rt/lib/fuzzer/tests/CMakeLists.txt index 5b3e906419546..974efc3c5b630 100644 --- a/compiler-rt/lib/fuzzer/tests/CMakeLists.txt +++ b/compiler-rt/lib/fuzzer/tests/CMakeLists.txt @@ -17,6 +17,9 @@ set_target_properties(FuzzerUnitTests PROPERTIES FOLDER "Compiler-RT Tests") add_custom_target(FuzzedDataProviderUnitTests) set_target_properties(FuzzedDataProviderUnitTests PROPERTIES FOLDER "Compiler-RT Tests") +add_custom_target(MutagenUnitTests) +set_target_properties(MutagenUnitTests PROPERTIES FOLDER "Compiler-RT Tests") + set(LIBFUZZER_UNITTEST_LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS}) list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS --driver-mode=g++) @@ -46,23 +49,35 @@ if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST FUZZER_SUPPORTED_ARCH) set(arch ${COMPILER_RT_DEFAULT_TARGET_ARCH}) set(LIBFUZZER_TEST_RUNTIME RTFuzzerTest.${arch}) + set(LIBMUTAGEN_TEST_RUNTIME RTMutagenTest.${arch}) if(APPLE) set(LIBFUZZER_TEST_RUNTIME_OBJECTS $) + set(LIBMUTAGEN_TEST_RUNTIME_OBJECTS + $) else() set(LIBFUZZER_TEST_RUNTIME_OBJECTS $) + set(LIBMUTAGEN_TEST_RUNTIME_OBJECTS + $) endif() add_library(${LIBFUZZER_TEST_RUNTIME} STATIC - ${LIBFUZZER_TEST_RUNTIME_OBJECTS}) + ${LIBFUZZER_TEST_RUNTIME_OBJECTS} + ${LIBMUTAGEN_TEST_RUNTIME_OBJECTS}) set_target_properties(${LIBFUZZER_TEST_RUNTIME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} FOLDER "Compiler-RT Runtime tests") + add_library(${LIBMUTAGEN_TEST_RUNTIME} STATIC + ${LIBMUTAGEN_TEST_RUNTIME_OBJECTS}) + set_target_properties(${LIBMUTAGEN_TEST_RUNTIME} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + FOLDER "Compiler-RT Runtime tests") + if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND COMPILER_RT_LIBCXX_PATH AND COMPILER_RT_LIBCXXABI_PATH) - file(GLOB libfuzzer_headers ../*.h) + file(GLOB libfuzzer_headers ../*.h ../mutagen/*.h) set(LIBFUZZER_TEST_RUNTIME_DEPS libcxx_fuzzer_${arch}-build ${libfuzzer_headers}) set(LIBFUZZER_TEST_RUNTIME_CFLAGS -isystem ${LIBCXX_${arch}_PREFIX}/include/c++/v1) set(LIBFUZZER_TEST_RUNTIME_LINK_FLAGS ${LIBCXX_${arch}_PREFIX}/lib/libc++.a) @@ -73,7 +88,7 @@ if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST FUZZER_SUPPORTED_ARCH) FuzzerUnitTests "Fuzzer-${arch}-Test" ${arch} SOURCES FuzzerUnittest.cpp ${COMPILER_RT_GTEST_SOURCE} RUNTIME ${LIBFUZZER_TEST_RUNTIME} - DEPS gtest ${LIBFUZZER_TEST_RUNTIME_DEPS} + DEPS gtest ${LIBFUZZER_TEST_RUNTIME_DEPS} CFLAGS ${LIBFUZZER_UNITTEST_CFLAGS} ${LIBFUZZER_TEST_RUNTIME_CFLAGS} LINK_FLAGS ${LIBFUZZER_UNITTEST_LINK_FLAGS} ${LIBFUZZER_TEST_RUNTIME_LINK_FLAGS}) set_target_properties(FuzzerUnitTests PROPERTIES @@ -88,4 +103,15 @@ if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST FUZZER_SUPPORTED_ARCH) LINK_FLAGS ${LIBFUZZER_UNITTEST_LINK_FLAGS} ${LIBFUZZER_TEST_RUNTIME_LINK_FLAGS}) set_target_properties(FuzzedDataProviderUnitTests PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + set(MutagenTestObjects) + generate_compiler_rt_tests(MutagenTestObjects + MutagenUnitTests "Mutagen-${arch}-Test" ${arch} + SOURCES MutagenUnittest.cpp ${COMPILER_RT_GTEST_SOURCE} + RUNTIME ${LIBFUZZER_TEST_RUNTIME} + DEPS gtest ${LIBFUZZER_TEST_RUNTIME_DEPS} ${LIBMUTAGEN_TEST_RUNTIME_DEPS} + CFLAGS ${LIBFUZZER_UNITTEST_CFLAGS} ${LIBFUZZER_TEST_RUNTIME_CFLAGS} + LINK_FLAGS ${LIBFUZZER_UNITTEST_LINK_FLAGS} ${LIBFUZZER_TEST_RUNTIME_LINK_FLAGS}) + set_target_properties(MutagenUnitTests PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp index 974a01ff4ab6e..277fa5e6183f8 100644 --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -10,7 +10,6 @@ #define GTEST_NO_LLVM_SUPPORT 1 #include "FuzzerCorpus.h" -#include "FuzzerDictionary.h" #include "FuzzerInternal.h" #include "FuzzerMerge.h" #include "FuzzerMutate.h" @@ -44,65 +43,6 @@ TEST(Fuzzer, Basename) { #endif } -TEST(Fuzzer, CrossOver) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - Unit A({0, 1, 2}), B({5, 6, 7}); - Unit C; - Unit Expected[] = { - { 0 }, - { 0, 1 }, - { 0, 5 }, - { 0, 1, 2 }, - { 0, 1, 5 }, - { 0, 5, 1 }, - { 0, 5, 6 }, - { 0, 1, 2, 5 }, - { 0, 1, 5, 2 }, - { 0, 1, 5, 6 }, - { 0, 5, 1, 2 }, - { 0, 5, 1, 6 }, - { 0, 5, 6, 1 }, - { 0, 5, 6, 7 }, - { 0, 1, 2, 5, 6 }, - { 0, 1, 5, 2, 6 }, - { 0, 1, 5, 6, 2 }, - { 0, 1, 5, 6, 7 }, - { 0, 5, 1, 2, 6 }, - { 0, 5, 1, 6, 2 }, - { 0, 5, 1, 6, 7 }, - { 0, 5, 6, 1, 2 }, - { 0, 5, 6, 1, 7 }, - { 0, 5, 6, 7, 1 }, - { 0, 1, 2, 5, 6, 7 }, - { 0, 1, 5, 2, 6, 7 }, - { 0, 1, 5, 6, 2, 7 }, - { 0, 1, 5, 6, 7, 2 }, - { 0, 5, 1, 2, 6, 7 }, - { 0, 5, 1, 6, 2, 7 }, - { 0, 5, 1, 6, 7, 2 }, - { 0, 5, 6, 1, 2, 7 }, - { 0, 5, 6, 1, 7, 2 }, - { 0, 5, 6, 7, 1, 2 } - }; - for (size_t Len = 1; Len < 8; Len++) { - Set FoundUnits, ExpectedUnitsWitThisLength; - for (int Iter = 0; Iter < 3000; Iter++) { - C.resize(Len); - size_t NewSize = MD->CrossOver(A.data(), A.size(), B.data(), B.size(), - C.data(), C.size()); - C.resize(NewSize); - FoundUnits.insert(C); - } - for (const Unit &U : Expected) - if (U.size() <= Len) - ExpectedUnitsWitThisLength.insert(U); - EXPECT_EQ(ExpectedUnitsWitThisLength, FoundUnits); - } -} - TEST(Fuzzer, Hash) { uint8_t A[] = {'a', 'b', 'c'}; fuzzer::Unit U(A, A + sizeof(A)); @@ -111,423 +51,6 @@ TEST(Fuzzer, Hash) { EXPECT_EQ("81fe8bfe87576c3ecb22426f8e57847382917acf", fuzzer::Hash(U)); } -typedef size_t (MutationDispatcher::*Mutator)(uint8_t *Data, size_t Size, - size_t MaxSize); - -void TestEraseBytes(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - uint8_t REM0[8] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM1[8] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM2[8] = {0x00, 0x11, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM3[8] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM4[8] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x66, 0x77}; - uint8_t REM5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x66, 0x77}; - uint8_t REM6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x77}; - uint8_t REM7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - - uint8_t REM8[6] = {0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM9[6] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; - uint8_t REM10[6] = {0x00, 0x11, 0x22, 0x55, 0x66, 0x77}; - - uint8_t REM11[5] = {0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM12[5] = {0x00, 0x11, 0x22, 0x33, 0x44}; - uint8_t REM13[5] = {0x00, 0x44, 0x55, 0x66, 0x77}; - - - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - int FoundMask = 0; - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, sizeof(T), sizeof(T)); - if (NewSize == 7 && !memcmp(REM0, T, 7)) FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(REM1, T, 7)) FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(REM2, T, 7)) FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(REM3, T, 7)) FoundMask |= 1 << 3; - if (NewSize == 7 && !memcmp(REM4, T, 7)) FoundMask |= 1 << 4; - if (NewSize == 7 && !memcmp(REM5, T, 7)) FoundMask |= 1 << 5; - if (NewSize == 7 && !memcmp(REM6, T, 7)) FoundMask |= 1 << 6; - if (NewSize == 7 && !memcmp(REM7, T, 7)) FoundMask |= 1 << 7; - - if (NewSize == 6 && !memcmp(REM8, T, 6)) FoundMask |= 1 << 8; - if (NewSize == 6 && !memcmp(REM9, T, 6)) FoundMask |= 1 << 9; - if (NewSize == 6 && !memcmp(REM10, T, 6)) FoundMask |= 1 << 10; - - if (NewSize == 5 && !memcmp(REM11, T, 5)) FoundMask |= 1 << 11; - if (NewSize == 5 && !memcmp(REM12, T, 5)) FoundMask |= 1 << 12; - if (NewSize == 5 && !memcmp(REM13, T, 5)) FoundMask |= 1 << 13; - } - EXPECT_EQ(FoundMask, (1 << 14) - 1); -} - -TEST(FuzzerMutate, EraseBytes1) { - TestEraseBytes(&MutationDispatcher::Mutate_EraseBytes, 200); -} -TEST(FuzzerMutate, EraseBytes2) { - TestEraseBytes(&MutationDispatcher::Mutate, 2000); -} - -void TestInsertByte(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - int FoundMask = 0; - uint8_t INS0[8] = {0xF1, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t INS1[8] = {0x00, 0xF2, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t INS2[8] = {0x00, 0x11, 0xF3, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t INS3[8] = {0x00, 0x11, 0x22, 0xF4, 0x33, 0x44, 0x55, 0x66}; - uint8_t INS4[8] = {0x00, 0x11, 0x22, 0x33, 0xF5, 0x44, 0x55, 0x66}; - uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF6, 0x55, 0x66}; - uint8_t INS6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF7, 0x66}; - uint8_t INS7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF8}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t NewSize = (*MD.*M)(T, 7, 8); - if (NewSize == 8 && !memcmp(INS0, T, 8)) FoundMask |= 1 << 0; - if (NewSize == 8 && !memcmp(INS1, T, 8)) FoundMask |= 1 << 1; - if (NewSize == 8 && !memcmp(INS2, T, 8)) FoundMask |= 1 << 2; - if (NewSize == 8 && !memcmp(INS3, T, 8)) FoundMask |= 1 << 3; - if (NewSize == 8 && !memcmp(INS4, T, 8)) FoundMask |= 1 << 4; - if (NewSize == 8 && !memcmp(INS5, T, 8)) FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(INS6, T, 8)) FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(INS7, T, 8)) FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(FuzzerMutate, InsertByte1) { - TestInsertByte(&MutationDispatcher::Mutate_InsertByte, 1 << 15); -} -TEST(FuzzerMutate, InsertByte2) { - TestInsertByte(&MutationDispatcher::Mutate, 1 << 17); -} - -void TestInsertRepeatedBytes(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - int FoundMask = 0; - uint8_t INS0[7] = {0x00, 0x11, 0x22, 0x33, 'a', 'a', 'a'}; - uint8_t INS1[7] = {0x00, 0x11, 0x22, 'a', 'a', 'a', 0x33}; - uint8_t INS2[7] = {0x00, 0x11, 'a', 'a', 'a', 0x22, 0x33}; - uint8_t INS3[7] = {0x00, 'a', 'a', 'a', 0x11, 0x22, 0x33}; - uint8_t INS4[7] = {'a', 'a', 'a', 0x00, 0x11, 0x22, 0x33}; - - uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 'b', 'b', 'b', 'b'}; - uint8_t INS6[8] = {0x00, 0x11, 0x22, 'b', 'b', 'b', 'b', 0x33}; - uint8_t INS7[8] = {0x00, 0x11, 'b', 'b', 'b', 'b', 0x22, 0x33}; - uint8_t INS8[8] = {0x00, 'b', 'b', 'b', 'b', 0x11, 0x22, 0x33}; - uint8_t INS9[8] = {'b', 'b', 'b', 'b', 0x00, 0x11, 0x22, 0x33}; - - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33}; - size_t NewSize = (*MD.*M)(T, 4, 8); - if (NewSize == 7 && !memcmp(INS0, T, 7)) FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(INS1, T, 7)) FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(INS2, T, 7)) FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(INS3, T, 7)) FoundMask |= 1 << 3; - if (NewSize == 7 && !memcmp(INS4, T, 7)) FoundMask |= 1 << 4; - - if (NewSize == 8 && !memcmp(INS5, T, 8)) FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(INS6, T, 8)) FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(INS7, T, 8)) FoundMask |= 1 << 7; - if (NewSize == 8 && !memcmp(INS8, T, 8)) FoundMask |= 1 << 8; - if (NewSize == 8 && !memcmp(INS9, T, 8)) FoundMask |= 1 << 9; - - } - EXPECT_EQ(FoundMask, (1 << 10) - 1); -} - -TEST(FuzzerMutate, InsertRepeatedBytes1) { - TestInsertRepeatedBytes(&MutationDispatcher::Mutate_InsertRepeatedBytes, 10000); -} -TEST(FuzzerMutate, InsertRepeatedBytes2) { - TestInsertRepeatedBytes(&MutationDispatcher::Mutate, 300000); -} - -void TestChangeByte(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - int FoundMask = 0; - uint8_t CH0[8] = {0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH1[8] = {0x00, 0xF1, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH2[8] = {0x00, 0x11, 0xF2, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH3[8] = {0x00, 0x11, 0x22, 0xF3, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0xF4, 0x55, 0x66, 0x77}; - uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF5, 0x66, 0x77}; - uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF5, 0x77}; - uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, 8, 9); - if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; - if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; - if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; - if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; - if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4; - if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(FuzzerMutate, ChangeByte1) { - TestChangeByte(&MutationDispatcher::Mutate_ChangeByte, 1 << 15); -} -TEST(FuzzerMutate, ChangeByte2) { - TestChangeByte(&MutationDispatcher::Mutate, 1 << 17); -} - -void TestChangeBit(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - int FoundMask = 0; - uint8_t CH0[8] = {0x01, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH1[8] = {0x00, 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH2[8] = {0x00, 0x11, 0x02, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH3[8] = {0x00, 0x11, 0x22, 0x37, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x54, 0x55, 0x66, 0x77}; - uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x54, 0x66, 0x77}; - uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x76, 0x77}; - uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, 8, 9); - if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; - if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; - if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; - if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; - if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4; - if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(FuzzerMutate, ChangeBit1) { - TestChangeBit(&MutationDispatcher::Mutate_ChangeBit, 1 << 16); -} -TEST(FuzzerMutate, ChangeBit2) { - TestChangeBit(&MutationDispatcher::Mutate, 1 << 18); -} - -void TestShuffleBytes(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - int FoundMask = 0; - uint8_t CH0[7] = {0x00, 0x22, 0x11, 0x33, 0x44, 0x55, 0x66}; - uint8_t CH1[7] = {0x11, 0x00, 0x33, 0x22, 0x44, 0x55, 0x66}; - uint8_t CH2[7] = {0x00, 0x33, 0x11, 0x22, 0x44, 0x55, 0x66}; - uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x33}; - uint8_t CH4[7] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x44, 0x66}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t NewSize = (*MD.*M)(T, 7, 7); - if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3; - if (NewSize == 7 && !memcmp(CH4, T, 7)) FoundMask |= 1 << 4; - } - EXPECT_EQ(FoundMask, 31); -} - -TEST(FuzzerMutate, ShuffleBytes1) { - TestShuffleBytes(&MutationDispatcher::Mutate_ShuffleBytes, 1 << 17); -} -TEST(FuzzerMutate, ShuffleBytes2) { - TestShuffleBytes(&MutationDispatcher::Mutate, 1 << 20); -} - -void TestCopyPart(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - int FoundMask = 0; - uint8_t CH0[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11}; - uint8_t CH1[7] = {0x55, 0x66, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t CH2[7] = {0x00, 0x55, 0x66, 0x33, 0x44, 0x55, 0x66}; - uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x66}; - uint8_t CH4[7] = {0x00, 0x11, 0x11, 0x22, 0x33, 0x55, 0x66}; - - for (int i = 0; i < NumIter; i++) { - uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t NewSize = (*MD.*M)(T, 7, 7); - if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3; - if (NewSize == 7 && !memcmp(CH4, T, 7)) FoundMask |= 1 << 4; - } - - uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22}; - uint8_t CH6[8] = {0x22, 0x33, 0x44, 0x00, 0x11, 0x22, 0x33, 0x44}; - uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x33, 0x44}; - uint8_t CH8[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x22, 0x33, 0x44}; - uint8_t CH9[8] = {0x00, 0x11, 0x22, 0x22, 0x33, 0x44, 0x33, 0x44}; - - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, 5, 8); - if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; - if (NewSize == 8 && !memcmp(CH8, T, 8)) FoundMask |= 1 << 8; - if (NewSize == 8 && !memcmp(CH9, T, 8)) FoundMask |= 1 << 9; - } - - EXPECT_EQ(FoundMask, 1023); -} - -TEST(FuzzerMutate, CopyPart1) { - TestCopyPart(&MutationDispatcher::Mutate_CopyPart, 1 << 10); -} -TEST(FuzzerMutate, CopyPart2) { - TestCopyPart(&MutationDispatcher::Mutate, 1 << 13); -} -TEST(FuzzerMutate, CopyPartNoInsertAtMaxSize) { - // This (non exhaustively) tests if `Mutate_CopyPart` tries to perform an - // insert on an input of size `MaxSize`. Performing an insert in this case - // will lead to the mutation failing. - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - uint8_t Data[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22}; - size_t MaxSize = sizeof(Data); - for (int count = 0; count < (1 << 18); ++count) { - size_t NewSize = MD->Mutate_CopyPart(Data, MaxSize, MaxSize); - ASSERT_EQ(NewSize, MaxSize); - } -} - -void TestAddWordFromDictionary(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD}; - uint8_t Word2[3] = {0xFF, 0xEE, 0xEF}; - MD->AddWordToManualDictionary(Word(Word1, sizeof(Word1))); - MD->AddWordToManualDictionary(Word(Word2, sizeof(Word2))); - int FoundMask = 0; - uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD}; - uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22}; - uint8_t CH2[7] = {0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22}; - uint8_t CH3[7] = {0xAA, 0xBB, 0xCC, 0xDD, 0x00, 0x11, 0x22}; - uint8_t CH4[6] = {0x00, 0x11, 0x22, 0xFF, 0xEE, 0xEF}; - uint8_t CH5[6] = {0x00, 0x11, 0xFF, 0xEE, 0xEF, 0x22}; - uint8_t CH6[6] = {0x00, 0xFF, 0xEE, 0xEF, 0x11, 0x22}; - uint8_t CH7[6] = {0xFF, 0xEE, 0xEF, 0x00, 0x11, 0x22}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[7] = {0x00, 0x11, 0x22}; - size_t NewSize = (*MD.*M)(T, 3, 7); - if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3; - if (NewSize == 6 && !memcmp(CH4, T, 6)) FoundMask |= 1 << 4; - if (NewSize == 6 && !memcmp(CH5, T, 6)) FoundMask |= 1 << 5; - if (NewSize == 6 && !memcmp(CH6, T, 6)) FoundMask |= 1 << 6; - if (NewSize == 6 && !memcmp(CH7, T, 6)) FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(FuzzerMutate, AddWordFromDictionary1) { - TestAddWordFromDictionary( - &MutationDispatcher::Mutate_AddWordFromManualDictionary, 1 << 15); -} - -TEST(FuzzerMutate, AddWordFromDictionary2) { - TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15); -} - -void TestChangeASCIIInteger(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - - uint8_t CH0[8] = {'1', '2', '3', '4', '5', '6', '7', '7'}; - uint8_t CH1[8] = {'1', '2', '3', '4', '5', '6', '7', '9'}; - uint8_t CH2[8] = {'2', '4', '6', '9', '1', '3', '5', '6'}; - uint8_t CH3[8] = {'0', '6', '1', '7', '2', '8', '3', '9'}; - int FoundMask = 0; - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {'1', '2', '3', '4', '5', '6', '7', '8'}; - size_t NewSize = (*MD.*M)(T, 8, 8); - /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; - else if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; - else if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; - else if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; - else if (NewSize == 8) FoundMask |= 1 << 4; - } - EXPECT_EQ(FoundMask, 31); -} - -TEST(FuzzerMutate, ChangeASCIIInteger1) { - TestChangeASCIIInteger(&MutationDispatcher::Mutate_ChangeASCIIInteger, - 1 << 15); -} - -TEST(FuzzerMutate, ChangeASCIIInteger2) { - TestChangeASCIIInteger(&MutationDispatcher::Mutate, 1 << 15); -} - -void TestChangeBinaryInteger(Mutator M, int NumIter) { - std::unique_ptr t(new ExternalFunctions()); - fuzzer::EF = t.get(); - Random Rand(0); - std::unique_ptr MD(new MutationDispatcher(Rand, {})); - - uint8_t CH0[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x79}; - uint8_t CH1[8] = {0x00, 0x11, 0x22, 0x31, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH2[8] = {0xff, 0x10, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH3[8] = {0x00, 0x11, 0x2a, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x4f, 0x66, 0x77}; - uint8_t CH5[8] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88}; - uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x00, 0x00, 0x00, 0x08, 0x77}; // Size - uint8_t CH7[8] = {0x00, 0x08, 0x00, 0x33, 0x44, 0x55, 0x66, 0x77}; // Sw(Size) - - int FoundMask = 0; - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, 8, 8); - /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; - else if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; - else if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; - else if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; - else if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4; - else if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; - else if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; - else if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(FuzzerMutate, ChangeBinaryInteger1) { - TestChangeBinaryInteger(&MutationDispatcher::Mutate_ChangeBinaryInteger, - 1 << 12); -} - -TEST(FuzzerMutate, ChangeBinaryInteger2) { - TestChangeBinaryInteger(&MutationDispatcher::Mutate, 1 << 15); -} - - TEST(FuzzerDictionary, ParseOneDictionaryEntry) { Unit U; EXPECT_FALSE(ParseOneDictionaryEntry("", &U)); diff --git a/compiler-rt/lib/fuzzer/tests/MutagenUnittest.cpp b/compiler-rt/lib/fuzzer/tests/MutagenUnittest.cpp new file mode 100644 index 0000000000000..287eecf5fe2c8 --- /dev/null +++ b/compiler-rt/lib/fuzzer/tests/MutagenUnittest.cpp @@ -0,0 +1,971 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "mutagen/Mutagen.h" +#include "mutagen/MutagenDispatcher.h" +#include "mutagen/MutagenSequence.h" +#include "mutagen/MutagenUtil.h" +#include "gtest/gtest.h" +#include + +// This test doesn't set Config.MsanUnpoison*, so ensure MSan isn't present. +// Avoid using fuzzer::ExternalFunctions, since it may not be linked against +// the test binary. +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#error MemorySanitizer is not supported for the mutagen unit tests. +#endif // __has_feature(memory_sanitizer) +#endif // defined(__has_feature) + +namespace mutagen { +namespace { + +using fuzzer::Set; + +std::unique_ptr CreateMutationDispatcher() { + LLVMMutagenConfiguration Config; + memset(&Config, 0, sizeof(Config)); + return std::unique_ptr(new MutationDispatcher(&Config)); +} + +typedef size_t (MutationDispatcher::*Mutator)(uint8_t *Data, size_t Size, + size_t MaxSize); + +TEST(MutationDispatcher, CrossOver) { + auto MD = CreateMutationDispatcher(); + Unit A({0, 1, 2}), B({5, 6, 7}); + Unit C; + Unit Expected[] = {{0}, + {0, 1}, + {0, 5}, + {0, 1, 2}, + {0, 1, 5}, + {0, 5, 1}, + {0, 5, 6}, + {0, 1, 2, 5}, + {0, 1, 5, 2}, + {0, 1, 5, 6}, + {0, 5, 1, 2}, + {0, 5, 1, 6}, + {0, 5, 6, 1}, + {0, 5, 6, 7}, + {0, 1, 2, 5, 6}, + {0, 1, 5, 2, 6}, + {0, 1, 5, 6, 2}, + {0, 1, 5, 6, 7}, + {0, 5, 1, 2, 6}, + {0, 5, 1, 6, 2}, + {0, 5, 1, 6, 7}, + {0, 5, 6, 1, 2}, + {0, 5, 6, 1, 7}, + {0, 5, 6, 7, 1}, + {0, 1, 2, 5, 6, 7}, + {0, 1, 5, 2, 6, 7}, + {0, 1, 5, 6, 2, 7}, + {0, 1, 5, 6, 7, 2}, + {0, 5, 1, 2, 6, 7}, + {0, 5, 1, 6, 2, 7}, + {0, 5, 1, 6, 7, 2}, + {0, 5, 6, 1, 2, 7}, + {0, 5, 6, 1, 7, 2}, + {0, 5, 6, 7, 1, 2}}; + for (size_t Len = 1; Len < 8; Len++) { + Set FoundUnits, ExpectedUnitsWitThisLength; + for (int Iter = 0; Iter < 3000; Iter++) { + C.resize(Len); + size_t NewSize = MD->CrossOver(A.data(), A.size(), B.data(), B.size(), + C.data(), C.size()); + C.resize(NewSize); + FoundUnits.insert(C); + } + for (const Unit &U : Expected) + if (U.size() <= Len) + ExpectedUnitsWitThisLength.insert(U); + EXPECT_EQ(ExpectedUnitsWitThisLength, FoundUnits); + } +} + +void TestEraseBytes(Mutator M, int NumIter) { + uint8_t REM0[8] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM1[8] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM2[8] = {0x00, 0x11, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM3[8] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM4[8] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x66, 0x77}; + uint8_t REM5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x66, 0x77}; + uint8_t REM6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x77}; + uint8_t REM7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + + uint8_t REM8[6] = {0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM9[6] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; + uint8_t REM10[6] = {0x00, 0x11, 0x22, 0x55, 0x66, 0x77}; + + uint8_t REM11[5] = {0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM12[5] = {0x00, 0x11, 0x22, 0x33, 0x44}; + uint8_t REM13[5] = {0x00, 0x44, 0x55, 0x66, 0x77}; + + auto MD = CreateMutationDispatcher(); + int FoundMask = 0; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, sizeof(T), sizeof(T)); + if (NewSize == 7 && !memcmp(REM0, T, 7)) + FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(REM1, T, 7)) + FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(REM2, T, 7)) + FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(REM3, T, 7)) + FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(REM4, T, 7)) + FoundMask |= 1 << 4; + if (NewSize == 7 && !memcmp(REM5, T, 7)) + FoundMask |= 1 << 5; + if (NewSize == 7 && !memcmp(REM6, T, 7)) + FoundMask |= 1 << 6; + if (NewSize == 7 && !memcmp(REM7, T, 7)) + FoundMask |= 1 << 7; + + if (NewSize == 6 && !memcmp(REM8, T, 6)) + FoundMask |= 1 << 8; + if (NewSize == 6 && !memcmp(REM9, T, 6)) + FoundMask |= 1 << 9; + if (NewSize == 6 && !memcmp(REM10, T, 6)) + FoundMask |= 1 << 10; + + if (NewSize == 5 && !memcmp(REM11, T, 5)) + FoundMask |= 1 << 11; + if (NewSize == 5 && !memcmp(REM12, T, 5)) + FoundMask |= 1 << 12; + if (NewSize == 5 && !memcmp(REM13, T, 5)) + FoundMask |= 1 << 13; + } + EXPECT_EQ(FoundMask, (1 << 14) - 1); +} + +TEST(MutationDispatcher, EraseBytes1) { + TestEraseBytes(&MutationDispatcher::Mutate_EraseBytes, 200); +} +TEST(MutationDispatcher, EraseBytes2) { + TestEraseBytes(&MutationDispatcher::Mutate, 2000); +} + +void TestInsertByte(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + int FoundMask = 0; + uint8_t INS0[8] = {0xF1, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS1[8] = {0x00, 0xF2, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS2[8] = {0x00, 0x11, 0xF3, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS3[8] = {0x00, 0x11, 0x22, 0xF4, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS4[8] = {0x00, 0x11, 0x22, 0x33, 0xF5, 0x44, 0x55, 0x66}; + uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF6, 0x55, 0x66}; + uint8_t INS6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF7, 0x66}; + uint8_t INS7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF8}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + size_t NewSize = (*MD.*M)(T, 7, 8); + if (NewSize == 8 && !memcmp(INS0, T, 8)) + FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(INS1, T, 8)) + FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(INS2, T, 8)) + FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(INS3, T, 8)) + FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(INS4, T, 8)) + FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(INS5, T, 8)) + FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(INS6, T, 8)) + FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(INS7, T, 8)) + FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(MutationDispatcher, InsertByte1) { + TestInsertByte(&MutationDispatcher::Mutate_InsertByte, 1 << 15); +} +TEST(MutationDispatcher, InsertByte2) { + TestInsertByte(&MutationDispatcher::Mutate, 1 << 17); +} + +void TestInsertRepeatedBytes(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + int FoundMask = 0; + uint8_t INS0[7] = {0x00, 0x11, 0x22, 0x33, 'a', 'a', 'a'}; + uint8_t INS1[7] = {0x00, 0x11, 0x22, 'a', 'a', 'a', 0x33}; + uint8_t INS2[7] = {0x00, 0x11, 'a', 'a', 'a', 0x22, 0x33}; + uint8_t INS3[7] = {0x00, 'a', 'a', 'a', 0x11, 0x22, 0x33}; + uint8_t INS4[7] = {'a', 'a', 'a', 0x00, 0x11, 0x22, 0x33}; + + uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 'b', 'b', 'b', 'b'}; + uint8_t INS6[8] = {0x00, 0x11, 0x22, 'b', 'b', 'b', 'b', 0x33}; + uint8_t INS7[8] = {0x00, 0x11, 'b', 'b', 'b', 'b', 0x22, 0x33}; + uint8_t INS8[8] = {0x00, 'b', 'b', 'b', 'b', 0x11, 0x22, 0x33}; + uint8_t INS9[8] = {'b', 'b', 'b', 'b', 0x00, 0x11, 0x22, 0x33}; + + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33}; + size_t NewSize = (*MD.*M)(T, 4, 8); + if (NewSize == 7 && !memcmp(INS0, T, 7)) + FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(INS1, T, 7)) + FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(INS2, T, 7)) + FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(INS3, T, 7)) + FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(INS4, T, 7)) + FoundMask |= 1 << 4; + + if (NewSize == 8 && !memcmp(INS5, T, 8)) + FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(INS6, T, 8)) + FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(INS7, T, 8)) + FoundMask |= 1 << 7; + if (NewSize == 8 && !memcmp(INS8, T, 8)) + FoundMask |= 1 << 8; + if (NewSize == 8 && !memcmp(INS9, T, 8)) + FoundMask |= 1 << 9; + } + EXPECT_EQ(FoundMask, (1 << 10) - 1); +} + +TEST(MutationDispatcher, InsertRepeatedBytes1) { + TestInsertRepeatedBytes(&MutationDispatcher::Mutate_InsertRepeatedBytes, + 10000); +} +TEST(MutationDispatcher, InsertRepeatedBytes2) { + TestInsertRepeatedBytes(&MutationDispatcher::Mutate, 300000); +} + +void TestChangeByte(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + int FoundMask = 0; + uint8_t CH0[8] = {0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH1[8] = {0x00, 0xF1, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH2[8] = {0x00, 0x11, 0xF2, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH3[8] = {0x00, 0x11, 0x22, 0xF3, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0xF4, 0x55, 0x66, 0x77}; + uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF5, 0x66, 0x77}; + uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF5, 0x77}; + uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, 8, 9); + if (NewSize == 8 && !memcmp(CH0, T, 8)) + FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(CH1, T, 8)) + FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(CH2, T, 8)) + FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(CH3, T, 8)) + FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(CH4, T, 8)) + FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(CH5, T, 8)) + FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(CH6, T, 8)) + FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(CH7, T, 8)) + FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(MutationDispatcher, ChangeByte1) { + TestChangeByte(&MutationDispatcher::Mutate_ChangeByte, 1 << 15); +} +TEST(MutationDispatcher, ChangeByte2) { + TestChangeByte(&MutationDispatcher::Mutate, 1 << 17); +} + +void TestChangeBit(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + int FoundMask = 0; + uint8_t CH0[8] = {0x01, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH1[8] = {0x00, 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH2[8] = {0x00, 0x11, 0x02, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH3[8] = {0x00, 0x11, 0x22, 0x37, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x54, 0x55, 0x66, 0x77}; + uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x54, 0x66, 0x77}; + uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x76, 0x77}; + uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, 8, 9); + if (NewSize == 8 && !memcmp(CH0, T, 8)) + FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(CH1, T, 8)) + FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(CH2, T, 8)) + FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(CH3, T, 8)) + FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(CH4, T, 8)) + FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(CH5, T, 8)) + FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(CH6, T, 8)) + FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(CH7, T, 8)) + FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(MutationDispatcher, ChangeBit1) { + TestChangeBit(&MutationDispatcher::Mutate_ChangeBit, 1 << 16); +} +TEST(MutationDispatcher, ChangeBit2) { + TestChangeBit(&MutationDispatcher::Mutate, 1 << 18); +} + +void TestShuffleBytes(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + int FoundMask = 0; + uint8_t CH0[7] = {0x00, 0x22, 0x11, 0x33, 0x44, 0x55, 0x66}; + uint8_t CH1[7] = {0x11, 0x00, 0x33, 0x22, 0x44, 0x55, 0x66}; + uint8_t CH2[7] = {0x00, 0x33, 0x11, 0x22, 0x44, 0x55, 0x66}; + uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x33}; + uint8_t CH4[7] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x44, 0x66}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + size_t NewSize = (*MD.*M)(T, 7, 7); + if (NewSize == 7 && !memcmp(CH0, T, 7)) + FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(CH1, T, 7)) + FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(CH2, T, 7)) + FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(CH3, T, 7)) + FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(CH4, T, 7)) + FoundMask |= 1 << 4; + } + EXPECT_EQ(FoundMask, 31); +} + +TEST(MutationDispatcher, ShuffleBytes1) { + TestShuffleBytes(&MutationDispatcher::Mutate_ShuffleBytes, 1 << 17); +} +TEST(MutationDispatcher, ShuffleBytes2) { + TestShuffleBytes(&MutationDispatcher::Mutate, 1 << 20); +} + +void TestCopyPart(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + int FoundMask = 0; + uint8_t CH0[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11}; + uint8_t CH1[7] = {0x55, 0x66, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t CH2[7] = {0x00, 0x55, 0x66, 0x33, 0x44, 0x55, 0x66}; + uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x66}; + uint8_t CH4[7] = {0x00, 0x11, 0x11, 0x22, 0x33, 0x55, 0x66}; + + for (int i = 0; i < NumIter; i++) { + uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + size_t NewSize = (*MD.*M)(T, 7, 7); + if (NewSize == 7 && !memcmp(CH0, T, 7)) + FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(CH1, T, 7)) + FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(CH2, T, 7)) + FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(CH3, T, 7)) + FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(CH4, T, 7)) + FoundMask |= 1 << 4; + } + + uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22}; + uint8_t CH6[8] = {0x22, 0x33, 0x44, 0x00, 0x11, 0x22, 0x33, 0x44}; + uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x33, 0x44}; + uint8_t CH8[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x22, 0x33, 0x44}; + uint8_t CH9[8] = {0x00, 0x11, 0x22, 0x22, 0x33, 0x44, 0x33, 0x44}; + + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, 5, 8); + if (NewSize == 8 && !memcmp(CH5, T, 8)) + FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(CH6, T, 8)) + FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(CH7, T, 8)) + FoundMask |= 1 << 7; + if (NewSize == 8 && !memcmp(CH8, T, 8)) + FoundMask |= 1 << 8; + if (NewSize == 8 && !memcmp(CH9, T, 8)) + FoundMask |= 1 << 9; + } + + EXPECT_EQ(FoundMask, 1023); +} + +TEST(MutationDispatcher, CopyPart1) { + TestCopyPart(&MutationDispatcher::Mutate_CopyPart, 1 << 10); +} +TEST(MutationDispatcher, CopyPart2) { + TestCopyPart(&MutationDispatcher::Mutate, 1 << 13); +} +TEST(MutationDispatcher, CopyPartNoInsertAtMaxSize) { + // This (non exhaustively) tests if `Mutate_CopyPart` tries to perform an + // insert on an input of size `MaxSize`. Performing an insert in this case + // will lead to the mutation failing. + auto MD = CreateMutationDispatcher(); + uint8_t Data[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22}; + size_t MaxSize = sizeof(Data); + for (int count = 0; count < (1 << 18); ++count) { + size_t NewSize = MD->Mutate_CopyPart(Data, MaxSize, MaxSize); + ASSERT_EQ(NewSize, MaxSize); + } +} + +void TestAddWordFromDictionary(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD}; + uint8_t Word2[3] = {0xFF, 0xEE, 0xEF}; + MD->AddWordToManualDictionary(Word(Word1, sizeof(Word1))); + MD->AddWordToManualDictionary(Word(Word2, sizeof(Word2))); + int FoundMask = 0; + uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD}; + uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22}; + uint8_t CH2[7] = {0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22}; + uint8_t CH3[7] = {0xAA, 0xBB, 0xCC, 0xDD, 0x00, 0x11, 0x22}; + uint8_t CH4[6] = {0x00, 0x11, 0x22, 0xFF, 0xEE, 0xEF}; + uint8_t CH5[6] = {0x00, 0x11, 0xFF, 0xEE, 0xEF, 0x22}; + uint8_t CH6[6] = {0x00, 0xFF, 0xEE, 0xEF, 0x11, 0x22}; + uint8_t CH7[6] = {0xFF, 0xEE, 0xEF, 0x00, 0x11, 0x22}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[7] = {0x00, 0x11, 0x22}; + size_t NewSize = (*MD.*M)(T, 3, 7); + if (NewSize == 7 && !memcmp(CH0, T, 7)) + FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(CH1, T, 7)) + FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(CH2, T, 7)) + FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(CH3, T, 7)) + FoundMask |= 1 << 3; + if (NewSize == 6 && !memcmp(CH4, T, 6)) + FoundMask |= 1 << 4; + if (NewSize == 6 && !memcmp(CH5, T, 6)) + FoundMask |= 1 << 5; + if (NewSize == 6 && !memcmp(CH6, T, 6)) + FoundMask |= 1 << 6; + if (NewSize == 6 && !memcmp(CH7, T, 6)) + FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(MutationDispatcher, AddWordFromDictionary1) { + TestAddWordFromDictionary( + &MutationDispatcher::Mutate_AddWordFromManualDictionary, 1 << 15); +} + +TEST(MutationDispatcher, AddWordFromDictionary2) { + TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15); +} + +void TestChangeASCIIInteger(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + + uint8_t CH0[8] = {'1', '2', '3', '4', '5', '6', '7', '7'}; + uint8_t CH1[8] = {'1', '2', '3', '4', '5', '6', '7', '9'}; + uint8_t CH2[8] = {'2', '4', '6', '9', '1', '3', '5', '6'}; + uint8_t CH3[8] = {'0', '6', '1', '7', '2', '8', '3', '9'}; + int FoundMask = 0; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {'1', '2', '3', '4', '5', '6', '7', '8'}; + size_t NewSize = (*MD.*M)(T, 8, 8); + /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) + FoundMask |= 1 << 0; + else if (NewSize == 8 && !memcmp(CH1, T, 8)) + FoundMask |= 1 << 1; + else if (NewSize == 8 && !memcmp(CH2, T, 8)) + FoundMask |= 1 << 2; + else if (NewSize == 8 && !memcmp(CH3, T, 8)) + FoundMask |= 1 << 3; + else if (NewSize == 8) + FoundMask |= 1 << 4; + } + EXPECT_EQ(FoundMask, 31); +} + +TEST(MutationDispatcher, ChangeASCIIInteger1) { + TestChangeASCIIInteger(&MutationDispatcher::Mutate_ChangeASCIIInteger, + 1 << 15); +} + +TEST(MutationDispatcher, ChangeASCIIInteger2) { + TestChangeASCIIInteger(&MutationDispatcher::Mutate, 1 << 15); +} + +void TestChangeBinaryInteger(Mutator M, int NumIter) { + auto MD = CreateMutationDispatcher(); + + uint8_t CH0[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x79}; + uint8_t CH1[8] = {0x00, 0x11, 0x22, 0x31, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH2[8] = {0xff, 0x10, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH3[8] = {0x00, 0x11, 0x2a, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x4f, 0x66, 0x77}; + uint8_t CH5[8] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88}; + uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x00, 0x00, 0x00, 0x08, 0x77}; // Size + uint8_t CH7[8] = {0x00, 0x08, 0x00, 0x33, 0x44, 0x55, 0x66, 0x77}; // Sw(Size) + + int FoundMask = 0; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, 8, 8); + /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) + FoundMask |= 1 << 0; + else if (NewSize == 8 && !memcmp(CH1, T, 8)) + FoundMask |= 1 << 1; + else if (NewSize == 8 && !memcmp(CH2, T, 8)) + FoundMask |= 1 << 2; + else if (NewSize == 8 && !memcmp(CH3, T, 8)) + FoundMask |= 1 << 3; + else if (NewSize == 8 && !memcmp(CH4, T, 8)) + FoundMask |= 1 << 4; + else if (NewSize == 8 && !memcmp(CH5, T, 8)) + FoundMask |= 1 << 5; + else if (NewSize == 8 && !memcmp(CH6, T, 8)) + FoundMask |= 1 << 6; + else if (NewSize == 8 && !memcmp(CH7, T, 8)) + FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(MutationDispatcher, ChangeBinaryInteger1) { + TestChangeBinaryInteger(&MutationDispatcher::Mutate_ChangeBinaryInteger, + 1 << 12); +} + +TEST(MutationDispatcher, ChangeBinaryInteger2) { + TestChangeBinaryInteger(&MutationDispatcher::Mutate, 1 << 15); +} + +// Test fixture for MutagenInterface unit tests. +static const char *kWord1 = "word1"; +static const char *kWord2 = "word2"; + +class MutagenInterface : public ::testing::Test { +protected: + void SetUp() override { + Current = this; + memset(&Config, 0, sizeof(Config)); + + Config.Seed = 1; + + Config.UseCmp = 1; + Config.FromTORC4 = [](size_t Idx, uint32_t *Arg1, uint32_t *Arg2) { + ++(Current->FromTORC4Calls); + *Arg1 = 0x0401; + *Arg2 = 0x0402; + }; + Config.FromTORC8 = [](size_t Idx, uint64_t *Arg1, uint64_t *Arg2) { + ++(Current->FromTORC8Calls); + *Arg1 = 0x0801; + *Arg2 = 0x0802; + }; + Config.FromTORCW = [](size_t Idx, const uint8_t **Data1, size_t *Size1, + const uint8_t **Data2, size_t *Size2) { + ++(Current->FromTORCWCalls); + *Data1 = reinterpret_cast(kWord1); + *Size1 = strlen(kWord1); + *Data2 = reinterpret_cast(kWord2); + *Size2 = strlen(kWord2); + }; + + Config.UseMemmem = 0; + Config.FromMMT = [](size_t Idx, const uint8_t **Data, size_t *Size) { + ++(Current->FromMMTCalls); + *Data = reinterpret_cast(kWord1); + *Size = strlen(kWord1); + }; + + Config.OnlyASCII = 0; + + Config.CustomMutator = [](uint8_t *Data, size_t Size, size_t MaxSize, + unsigned int Seed) { + ++(Current->CustomMutatorCalls); + return LLVMMutagenDefaultMutate(Data, Size, MaxSize); + }; + + Config.CustomCrossOver = + [](const uint8_t *Data1, size_t Size1, const uint8_t *Data2, + size_t Size2, uint8_t *Out, size_t MaxOutSize, unsigned int Seed) { + ++(Current->CustomCrossOverCalls); + auto *MD = GetMutationDispatcherForTest(); + return MD->CrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize); + }; + + U = Unit({1, 2, 3, 4}); + U.reserve(8); + } + + void TearDown() override { + Current = nullptr; + memset(&Config, 0, sizeof(Config)); + LLVMMutagenConfigure(&Config); + } + + LLVMMutagenConfiguration Config; + Unit U; + + size_t FromTORC4Calls = 0; + size_t FromTORC8Calls = 0; + size_t FromTORCWCalls = 0; + size_t FromMMTCalls = 0; + size_t CustomMutatorCalls = 0; + size_t CustomCrossOverCalls = 0; + +private: + static MutagenInterface *Current; +}; + +MutagenInterface *MutagenInterface::Current = nullptr; + +// Unit tests for MutagenInterface. + +TEST_F(MutagenInterface, Configure) { + Config.OnlyASCII = 1; + LLVMMutagenConfigure(&Config); + auto *MD = GetMutationDispatcherForTest(); + ASSERT_NE(MD, nullptr); + + Random Rand1(Config.Seed); + Random &Rand2 = MD->GetRand(); + for (size_t i = 0; i < 10; ++i) + EXPECT_EQ(Rand1(), Rand2()); + + Config.Seed = static_cast( + std::chrono::system_clock::now().time_since_epoch().count()); + Config.OnlyASCII = 0; + LLVMMutagenConfigure(&Config); + MD = GetMutationDispatcherForTest(); + ASSERT_NE(MD, nullptr); + + Random Rand3(Config.Seed); + Random &Rand4 = MD->GetRand(); + for (size_t i = 0; i < 10; ++i) + EXPECT_EQ(Rand3(), Rand4()); +} + +TEST_F(MutagenInterface, UseTORCs) { + // If !UseCmp, none of the TORC/MMT callbacks are called, regardless of + // UseMemmem. + Config.UseCmp = 0; + Config.UseMemmem = 1; + LLVMMutagenConfigure(&Config); + for (size_t i = 0; i < 200; ++i) + LLVMMutagenMutate(U.data(), U.size(), U.capacity()); + EXPECT_EQ(FromTORC4Calls, 0U); + EXPECT_EQ(FromTORC8Calls, 0U); + EXPECT_EQ(FromTORCWCalls, 0U); + EXPECT_EQ(FromMMTCalls, 0U); + + // If UseCmp, but !UseMemmem, only the TORC callbacks are invoked. + Config.UseCmp = 1; + Config.UseMemmem = 0; + LLVMMutagenConfigure(&Config); + for (size_t i = 0; i < 200; ++i) + LLVMMutagenMutate(U.data(), U.size(), U.capacity()); + EXPECT_NE(FromTORC4Calls, 0U); + EXPECT_NE(FromTORC8Calls, 0U); + EXPECT_NE(FromTORCWCalls, 0U); + EXPECT_EQ(FromMMTCalls, 0U); + + // If UseCmp and UseMemmem, all the TORC/MMT callbacks are invoked. + Config.UseCmp = 1; + Config.UseMemmem = 1; + LLVMMutagenConfigure(&Config); + for (size_t i = 0; i < 200; ++i) + LLVMMutagenMutate(U.data(), U.size(), U.capacity()); + EXPECT_NE(FromTORC4Calls, 0U); + EXPECT_NE(FromTORC8Calls, 0U); + EXPECT_NE(FromTORCWCalls, 0U); + EXPECT_NE(FromMMTCalls, 0U); +} + +TEST_F(MutagenInterface, CustomCallbacks) { + // DefaultMutate never selects custom callbacks. + LLVMMutagenConfigure(&Config); + for (size_t i = 0; i < 200; ++i) + LLVMMutagenDefaultMutate(U.data(), U.size(), U.capacity()); + + // Valid. + auto *MD = GetMutationDispatcherForTest(); + EXPECT_EQ(CustomMutatorCalls, 0U); + MD->Mutate_Custom(U.data(), U.size(), U.capacity()); + EXPECT_EQ(CustomMutatorCalls, 1U); + + // Null cross-over input disables CustomCrossOver. + LLVMMutagenSetCrossOverWith(nullptr, 0); + MD->Mutate_CustomCrossOver(U.data(), U.size(), U.capacity()); + EXPECT_EQ(CustomCrossOverCalls, 0U); + + // Zero-length cross-over input disables CustomCrossOver. + Unit CrossOverWith = {4, 3, 2, 1}; + LLVMMutagenSetCrossOverWith(CrossOverWith.data(), 0); + MD->Mutate_CustomCrossOver(U.data(), U.size(), U.capacity()); + EXPECT_EQ(CustomCrossOverCalls, 0U); + + // Valid. + LLVMMutagenSetCrossOverWith(CrossOverWith.data(), CrossOverWith.size()); + MD->Mutate_CustomCrossOver(U.data(), U.size(), U.capacity()); + EXPECT_EQ(CustomCrossOverCalls, 1U); + + // Can mutate without custom callbacks. + Config.CustomMutator = nullptr; + Config.CustomCrossOver = nullptr; + LLVMMutagenConfigure(&Config); + for (size_t i = 0; i < 200; ++i) + LLVMMutagenMutate(U.data(), U.size(), U.capacity()); +} + +TEST_F(MutagenInterface, MutationSequence) { + LLVMMutagenConfigure(&Config); + char Buf[1024]; + size_t NumItems; + + Set Names = { + "ShuffleBytes", "EraseBytes", "InsertBytes", "InsertRepeatedBytes", + "ChangeByte", "ChangeBit", "CopyPart", "ChangeASCIIInt", + "ChangeBinInt", + }; + std::string Name; + std::istringstream ISS; + + // Empty sequences + auto Size = LLVMMutagenGetMutationSequence(true, Buf, sizeof(Buf), &NumItems); + EXPECT_STREQ(Buf, ""); + EXPECT_EQ(Size, 0U); + EXPECT_EQ(NumItems, 0U); + + while (true) { + // Can get size without output parameters. + Size = LLVMMutagenGetMutationSequence(true, nullptr, 0, &NumItems); + if (NumItems > Sequence::kMaxBriefItems) + break; + // !Verbose has no effect for <= 10 items. + EXPECT_EQ(LLVMMutagenGetMutationSequence(false, nullptr, 0, nullptr), Size); + EXPECT_GT(LLVMMutagenDefaultMutate(U.data(), U.size(), U.capacity()), 0U); + } + + // All items are valid. + LLVMMutagenGetMutationSequence(true, Buf, sizeof(Buf), nullptr); + ISS.str(Buf); + size_t N = 0; + while (std::getline(ISS, Name, '-')) { + EXPECT_GT(Names.count(Name), 0U); + ++N; + } + EXPECT_EQ(N, NumItems); + + // !Verbose truncates, but items are still valid. + EXPECT_LT(LLVMMutagenGetMutationSequence(false, Buf, sizeof(Buf), nullptr), + Size); + ISS.str(Buf); + N = 0; + while (std::getline(ISS, Name, '-')) { + EXPECT_GT(Names.count(Name), 0U); + ++N; + } + EXPECT_LT(N, NumItems); + + // Truncated sequence is a prefix of its untruncated equivalent. + std::string Truncated(Buf); + LLVMMutagenGetMutationSequence(true, Buf, sizeof(Buf), &NumItems); + Buf[Truncated.size()] = '\0'; + EXPECT_STREQ(Truncated.c_str(), Buf); + + // Stops at the end of |Buf|, and null terminates. + EXPECT_EQ(LLVMMutagenGetMutationSequence(true, Buf, Size - 1, nullptr), Size); + EXPECT_EQ(strlen(Buf), Size - 2); + + // Clear the sequence. + LLVMMutagenResetSequence(); + EXPECT_EQ(LLVMMutagenGetMutationSequence(true, nullptr, 0, nullptr), 0U); +} + +static uint8_t FromASCIINybble(char C) { + if ('0' <= C && C <= '9') + return static_cast(C - '0'); + if ('A' <= C && C <= 'F') + return static_cast(C - 'A' + 10); + assert('a' <= C && C <= 'f'); + return static_cast(C - 'a' + 10); +} + +static Word FromASCII(const char *DE) { + Unit Tmp; + bool Escape = false; + size_t Hex = 0; + uint8_t Nybble = 0; + for (char C = *DE++; C; C = *DE++) { + if (Hex == 2) { + Nybble = FromASCIINybble(C); + --Hex; + } else if (Hex == 1) { + Tmp.push_back(static_cast(Nybble << 4) | FromASCIINybble(C)); + --Hex; + } else if (Escape) { + switch (C) { + case '\\': + case '"': + Tmp.push_back(static_cast(C)); + break; + case 'x': + Hex = 2; + break; + default: + assert(false && "FromASCII failure."); + } + Escape = false; + } else if (C == '\\') { + Escape = true; + } else { + Tmp.push_back(static_cast(C)); + } + } + return Word(Tmp.data(), Tmp.size()); +} + +TEST_F(MutagenInterface, Dictionaries) { + LLVMMutagenConfigure(&Config); + size_t NumItems; + char Buf[1024]; + std::istringstream ISS; + std::string Str; + + // Empty sequences + auto Size = + LLVMMutagenGetDictionaryEntrySequence(true, Buf, sizeof(Buf), &NumItems); + EXPECT_STREQ(Buf, ""); + EXPECT_EQ(Size, 0U); + EXPECT_EQ(NumItems, 0U); + + auto *MD = GetMutationDispatcherForTest(); + while (true) { + // Can get size without output parameters. + Size = LLVMMutagenGetDictionaryEntrySequence(true, nullptr, 0, &NumItems); + if (NumItems > Sequence::kMaxBriefItems) + break; + // !Verbose has no effect for <= 10 items. + EXPECT_EQ(LLVMMutagenGetDictionaryEntrySequence(false, nullptr, 0, nullptr), + Size); + MD->Mutate_AddWordFromTORC(U.data(), U.size(), U.capacity()); + } + + // All items are valid. + LLVMMutagenGetDictionaryEntrySequence(true, Buf, sizeof(Buf), nullptr); + ISS.str(Buf); + size_t N = 0; + while (std::getline(ISS, Str, '-')) { + ASSERT_FALSE(Str.empty()); + EXPECT_EQ(Str[0], '"'); + EXPECT_EQ(Str[Str.size() - 1], '"'); + ++N; + } + EXPECT_EQ(N, NumItems); + + // !Verbose truncates, but items are still valid. + EXPECT_LT( + LLVMMutagenGetDictionaryEntrySequence(false, Buf, sizeof(Buf), nullptr), + Size); + ISS.str(Buf); + N = 0; + while (std::getline(ISS, Str, '-')) { + ASSERT_FALSE(Str.empty()); + EXPECT_EQ(Str[0], '"'); + EXPECT_EQ(Str[Str.size() - 1], '"'); + ++N; + } + EXPECT_LT(N, NumItems); + + // Truncated sequence is a prefix of its untruncated equivalent. + std::string Truncated(Buf); + LLVMMutagenGetDictionaryEntrySequence(true, Buf, sizeof(Buf), &NumItems); + Buf[Truncated.size()] = '\0'; + EXPECT_STREQ(Truncated.c_str(), Buf); + + // Stops at the end of |Buf|, and null terminates. + EXPECT_EQ(LLVMMutagenGetDictionaryEntrySequence(true, Buf, Size - 1, nullptr), + Size); + EXPECT_EQ(strlen(Buf), Size - 2); + + // Clear the sequence. + LLVMMutagenResetSequence(); + EXPECT_EQ(LLVMMutagenGetDictionaryEntrySequence(true, nullptr, 0, nullptr), + 0U); + + // Retuns null if no recommendations. + size_t UseCount = 0; + EXPECT_EQ(LLVMMutagenRecommendDictionaryEntry(&UseCount), nullptr); + EXPECT_EQ(LLVMMutagenRecommendDictionary(), 0U); + EXPECT_EQ(LLVMMutagenRecommendDictionaryEntry(&UseCount), nullptr); + + // Record sequences. + for (size_t i = 0; i < 5; ++i) { + for (size_t i = 0; i < 5; ++i) { + MD->Mutate_AddWordFromTORC(U.data(), U.size(), U.capacity()); + } + LLVMMutagenRecordSequence(); + } + + size_t NumDEs = LLVMMutagenRecommendDictionary(); + EXPECT_NE(NumDEs, 0U); + for (size_t i = 0; i < NumDEs; ++i) { + auto *DE = LLVMMutagenRecommendDictionaryEntry(&UseCount); + EXPECT_NE(DE, nullptr); + EXPECT_EQ(UseCount, 0U); + } + + // Increment the use counts of entries. + for (size_t i = 0; i < 100; ++i) + MD->Mutate_AddWordFromPersistentAutoDictionary(U.data(), U.size(), + U.capacity()); + NumDEs = LLVMMutagenRecommendDictionary(); + EXPECT_NE(NumDEs, 0U); + for (size_t i = 0; i < NumDEs; ++i) { + auto *DE = LLVMMutagenRecommendDictionaryEntry(&UseCount); + EXPECT_NE(DE, nullptr); + EXPECT_NE(UseCount, 0U); + } + + // Add the first few words manually to exclude them from recommendations. + Vector ManualAdditions; + NumDEs = LLVMMutagenRecommendDictionary(); + ASSERT_GT(NumDEs, 3U); + for (size_t i = 0; i < 3; ++i) { + auto *DE = LLVMMutagenRecommendDictionaryEntry(nullptr); + auto W = FromASCII(DE); + LLVMMutagenAddWordToDictionary(W.data(), W.size()); + ManualAdditions.push_back(W); + } + N = NumDEs; + + // Get the recommended dictionary without the manual additions. + NumDEs = LLVMMutagenRecommendDictionary(); + EXPECT_EQ(NumDEs, N - 3); + for (size_t i = 0; i < NumDEs; ++i) { + auto *DE = LLVMMutagenRecommendDictionaryEntry(nullptr); + ASSERT_NE(DE, nullptr); + Word W1(reinterpret_cast(DE), strlen(DE)); + for (const auto &W2 : ManualAdditions) + EXPECT_FALSE(W1 == W2); + } +} + +} // namespace +} // namespace mutagen + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/compiler-rt/test/fuzzer/CMakeLists.txt b/compiler-rt/test/fuzzer/CMakeLists.txt index c12a04b6f2702..acfcd437f0287 100644 --- a/compiler-rt/test/fuzzer/CMakeLists.txt +++ b/compiler-rt/test/fuzzer/CMakeLists.txt @@ -20,6 +20,7 @@ endif() if(COMPILER_RT_INCLUDE_TESTS) list(APPEND LIBFUZZER_TEST_DEPS FuzzerUnitTests) list(APPEND LIBFUZZER_TEST_DEPS FuzzedDataProviderUnitTests) + list(APPEND LIBFUZZER_TEST_DEPS MutagenUnitTests) endif() add_custom_target(check-fuzzer) From 7c5d654f64ceae5060132e9cc4b945a4ac60619c Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Fri, 2 Jul 2021 09:09:59 -0700 Subject: [PATCH 542/619] [Bazel] Fix build for c0a6318d96 This adds explicit deps to satisfy layering_check Differential Revision: https://reviews.llvm.org/D105356 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 3 ++- utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index b58be676141cf..bde7feeabdd3a 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -2302,6 +2302,7 @@ cc_library( ":StandardOps", ":StandardOpsTransformsPassIncGen", ":Support", + ":TensorDialect", ":Transforms", "//llvm:Support", ], @@ -6181,6 +6182,7 @@ cc_library( ]), includes = ["include"], deps = [ + ":Analysis", ":Dialect", ":IR", ":InferTypeOpInterface", @@ -6215,7 +6217,6 @@ cc_library( ":IR", ":LinalgOps", ":MathDialect", - ":MemRefDialect", ":Pass", ":StandardOps", ":TensorDialect", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 8015cdae2ae20..6f543024ef752 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -227,7 +227,6 @@ cc_library( "//mlir:Dialect", "//mlir:IR", "//mlir:InferTypeOpInterface", - "//mlir:MemRefDialect", "//mlir:Pass", "//mlir:Reducer", "//mlir:SideEffects", From f239026f89b24e4eeaf16f171f95da53e28f36f0 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Fri, 2 Jul 2021 16:08:22 +0000 Subject: [PATCH 543/619] [mlir][linalg][python] Add min operation in OpDSL. Add the min operation to OpDSL and introduce a min pooling operation to test the implementation. The patch is a sibling of the max operation patch https://reviews.llvm.org/D105203 and the min operation is again lowered to a compare and select pair. Differential Revision: https://reviews.llvm.org/D105345 --- .../Linalg/IR/LinalgNamedStructuredOps.yaml | 71 ++++++++++++++++ mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 34 +++++--- .../linalg/opdsl/lang/comprehension.py | 2 + .../dialects/linalg/opdsl/lang/emitter.py | 26 ++++-- .../linalg/opdsl/ops/core_named_ops.py | 18 +++++ .../generalize-named-polymorphic-ops.mlir | 30 +++++++ .../linalg/opdsl/emit_structured_generic.py | 45 +++++++++-- .../integration/dialects/linalg/opsrun.py | 81 ++++++++++++++++++- 8 files changed, 280 insertions(+), 27 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index 39045a212ce11..1e4277ecd7bdf 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -664,6 +664,77 @@ structured_op: !LinalgStructuredOpConfig - !ScalarExpression scalar_arg: I --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_nhwc_min_poly + cpp_class_name: PoolingNhwcMinPolyOp + doc: |- + Performs min pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s2, s3)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s4, s5)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s6, s7, s3)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s8, s9)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s10, s11)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1 * s8 + d3 * s10, d2 * s9 + d4 * s11, d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d3, d4)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1, d2, d5)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + - reduction + - parallel + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: min + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: fill_rng_2d cpp_class_name: FillRng2DOp diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 9b729b9db5d10..18c55f4019cab 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -275,17 +275,18 @@ class RegionBuilderHelper { } Value applyfn__max(Value lhs, Value rhs) { - OpBuilder builder = getBuilder(); - if (isFloatingPoint(lhs)) { - Value condition = - builder.create(lhs.getLoc(), CmpFPredicate::OGT, lhs, rhs); - return builder.create(lhs.getLoc(), condition, lhs, rhs); - } - if (isInteger(lhs)) { - Value condition = - builder.create(lhs.getLoc(), CmpIPredicate::sgt, lhs, rhs); - return builder.create(lhs.getLoc(), condition, lhs, rhs); - } + if (isFloatingPoint(lhs)) + return emitCmpFAndSelect(lhs, rhs, CmpFPredicate::OGT); + if (isInteger(lhs)) + return emitCmpIAndSelect(lhs, rhs, CmpIPredicate::sgt); + llvm_unreachable("unsupported non numeric type"); + } + + Value applyfn__min(Value lhs, Value rhs) { + if (isFloatingPoint(lhs)) + return emitCmpFAndSelect(lhs, rhs, CmpFPredicate::OLT); + if (isInteger(lhs)) + return emitCmpIAndSelect(lhs, rhs, CmpIPredicate::slt); llvm_unreachable("unsupported non numeric type"); } @@ -322,6 +323,17 @@ class RegionBuilderHelper { MLIRContext *context; Block █ + Value emitCmpFAndSelect(Value lhs, Value rhs, CmpFPredicate predicate) { + OpBuilder builder = getBuilder(); + Value condition = builder.create(lhs.getLoc(), predicate, lhs, rhs); + return builder.create(lhs.getLoc(), condition, lhs, rhs); + } + Value emitCmpIAndSelect(Value lhs, Value rhs, CmpIPredicate predicate) { + OpBuilder builder = getBuilder(); + Value condition = builder.create(lhs.getLoc(), predicate, lhs, rhs); + return builder.create(lhs.getLoc(), condition, lhs, rhs); + } + bool isFloatingPoint(Value value) { return value.getType().isa(); } bool isInteger(Value value) { return value.getType().isa(); } diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py index 1f9230de397a2..66d7510b68abf 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py @@ -339,6 +339,7 @@ class PrimFn: log = PrimFnType("log") mul = PrimFnType("mul") max = PrimFnType("max") + min = PrimFnType("min") sub = PrimFnType("sub") @@ -364,6 +365,7 @@ class ReduceFn: add = PrimFn.add.reduce mul = PrimFn.mul.reduce max = PrimFn.max.reduce + min = PrimFn.min.reduce class PrimApply(TensorExpression): diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py index 9489dec522716..61d2260587116 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py @@ -308,17 +308,23 @@ def _eval_mul(self, lhs: Value, rhs: Value) -> Value: raise NotImplementedError("Unsupported 'mul' operand: {lhs}") def _eval_max(self, lhs: Value, rhs: Value) -> Value: - i1 = IntegerType.get_signless(1) if _is_floating_point_type(lhs.type): ogt_attr = IntegerAttr.get(IntegerType.get_signless(64), 2) - cond = std.CmpFOp(i1, ogt_attr, lhs, rhs).result - return std.SelectOp(lhs.type, cond, lhs, rhs).result + return _emit_cmpf_and_select(lhs, rhs, ogt_attr) if _is_integer_type(lhs.type) or _is_index_type(lhs.type): sgt_attr = IntegerAttr.get(IntegerType.get_signless(64), 4) - cond = std.CmpIOp(i1, sgt_attr, lhs, rhs).result - return std.SelectOp(lhs.type, cond, lhs, rhs).result + return _emit_cmpi_and_select(lhs, rhs, sgt_attr) raise NotImplementedError("Unsupported 'max' operand: {lhs}") + def _eval_min(self, lhs: Value, rhs: Value) -> Value: + if _is_floating_point_type(lhs.type): + olt_attr = IntegerAttr.get(IntegerType.get_signless(64), 4) + return _emit_cmpf_and_select(lhs, rhs, olt_attr) + if _is_integer_type(lhs.type) or _is_index_type(lhs.type): + slt_attr = IntegerAttr.get(IntegerType.get_signless(64), 2) + return _emit_cmpi_and_select(lhs, rhs, slt_attr) + raise NotImplementedError("Unsupported 'min' operand: {lhs}") + def _infer_structured_outs(op_config: LinalgStructuredOpConfig, in_arg_defs: Sequence[OperandDefConfig], @@ -397,3 +403,13 @@ def _get_floating_point_width(t: Type) -> int: if BF16Type.isinstance(t): return 16 raise NotImplementedError(f"Unhandled floating point type switch {t}") + + +def _emit_cmpf_and_select(lhs: Value, rhs: Value, pred: IntegerAttr) -> Value: + cond = std.CmpFOp(IntegerType.get_signless(1), pred, lhs, rhs).result + return std.SelectOp(lhs.type, cond, lhs, rhs).result + + +def _emit_cmpi_and_select(lhs: Value, rhs: Value, pred: IntegerAttr) -> Value: + cond = std.CmpIOp(IntegerType.get_signless(1), pred, lhs, rhs).result + return std.SelectOp(lhs.type, cond, lhs, rhs).result diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index 04c950e0a44db..a37e1944c1f75 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -166,6 +166,24 @@ def pooling_nhwc_max_poly( D.c])) +@linalg_structured_op +def pooling_nhwc_min_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + """Performs min pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) + O[D.n, D.oh, D.ow, D.c] = ReduceFn.min(D.kh, D.kw)( + cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.c])) + + @linalg_structured_op def fill_rng_2d( min=ScalarDef(F64), diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir index 4a1cb8dbcfa58..0e1c6a62a7b10 100644 --- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir @@ -90,6 +90,36 @@ func @generalize_pooling_nhwc_max_poly_i32(%input : tensor<1x4x16x1xi32>, %shape // ----- +func @generalize_pooling_nhwc_min_poly_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { + %0 = linalg.pooling_nhwc_min_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + return %0: tensor<1x2x4x1xf32> +} + +// CHECK-LABEL: @generalize_pooling_nhwc_min_poly_f32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32) +// CHECK-NEXT: %[[COND:.+]] = cmpf olt, %[[OUT_ARG]], %[[IN_ARG]] : f32 +// CHECK-NEXT: %[[MAX:.+]] = select %[[COND]], %[[OUT_ARG]], %[[IN_ARG]] : f32 +// CHECK-NEXT: linalg.yield %[[MAX]] : f32 +// CHECK-NEXT: -> tensor<1x2x4x1xf32> + +// ----- + +func @generalize_pooling_nhwc_min_poly_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { + %0 = linalg.pooling_nhwc_min_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + return %0: tensor<1x2x4x1xi32> +} + +// CHECK-LABEL: @generalize_pooling_nhwc_min_poly_i32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: i32, %[[SHAPE_ARG:.+]]: i32, %[[OUT_ARG:.+]]: i32) +// CHECK-NEXT: %[[COND:.+]] = cmpi slt, %[[OUT_ARG]], %[[IN_ARG]] : i32 +// CHECK-NEXT: %[[MAX:.+]] = select %[[COND]], %[[OUT_ARG]], %[[IN_ARG]] : i32 +// CHECK-NEXT: linalg.yield %[[MAX]] : i32 +// CHECK-NEXT: -> tensor<1x2x4x1xi32> + +// ----- + func @generalize_pooling_nhwc_sum_poly_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_sum_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py index 12f6c560cfecc..44ac4e8e8c5b4 100644 --- a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py +++ b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py @@ -43,7 +43,7 @@ def conv_poly( @linalg_structured_op -def pooling_poly( +def pooling_max_poly( I=TensorDef(T1, S.N, S.H, S.W, S.C), K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), @@ -55,6 +55,19 @@ def pooling_poly( D.c])) +@linalg_structured_op +def pooling_min_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) + O[D.n, D.oh, D.ow, D.c] = ReduceFn.min(D.kh, D.kw)( + cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.c])) + + @linalg_structured_op def fill_rng_poly( min=ScalarDef(F64), @@ -216,7 +229,7 @@ def test_f32i32_conv(input, filter, init_result): return conv_poly( input, filter, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - # CHECK-LABEL: @test_f32i32_pooling + # CHECK-LABEL: @test_f32i32_max_pooling # CHECK: linalg.generic # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$POOL_MAP_K]], #[[$CONV_MAP_O]]] # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] @@ -229,11 +242,11 @@ def test_f32i32_conv(input, filter, init_result): @builtin.FuncOp.from_py_func( RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), RankedTensorType.get((2, 4), i32)) - def test_f32i32_pooling(input, shape, init_result): - return pooling_poly( + def test_f32i32_max_pooling(input, shape, init_result): + return pooling_max_poly( input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - # CHECK-LABEL: @test_f32f32_pooling + # CHECK-LABEL: @test_f32f32_max_pooling # CHECK: linalg.generic # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$POOL_MAP_K]], #[[$CONV_MAP_O]]] # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] @@ -245,8 +258,26 @@ def test_f32i32_pooling(input, shape, init_result): @builtin.FuncOp.from_py_func( RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), RankedTensorType.get((2, 4), f32)) - def test_f32f32_pooling(input, shape, init_result): - return pooling_poly( + def test_f32f32_max_pooling(input, shape, init_result): + return pooling_max_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + # CHECK-LABEL: @test_f32i32_min_pooling + # CHECK: = cmpi slt, + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), + RankedTensorType.get((2, 4), i32)) + def test_f32i32_min_pooling(input, shape, init_result): + return pooling_min_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + # CHECK-LABEL: @test_f32f32_min_pooling + # CHECK: = cmpf olt, + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), + RankedTensorType.get((2, 4), f32)) + def test_f32f32_min_pooling(input, shape, init_result): + return pooling_min_poly( input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) # CHECK-LABEL: @test_i32_fill_rng diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py index c6d26d1c6b858..8ec4b6c44da20 100644 --- a/mlir/test/python/integration/dialects/linalg/opsrun.py +++ b/mlir/test/python/integration/dialects/linalg/opsrun.py @@ -86,6 +86,8 @@ def log(*args): func @main() -> i32 attributes {llvm.emit_c_interface} { %v0 = constant 0 : i32 %v42 = constant 42.0 : f64 + %v77 = constant 77.0 : f64 + %v-13 = constant -13.0 : f64 %v1 = constant 1.0 : f64 %input = memref.alloc() : memref<1x4x16x1xf64> @@ -96,7 +98,11 @@ def log(*args): linalg.fill(%v0, %output) : i32, memref<1x2x4x1xi32> %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index memref.store %v42, %input[%c0, %c0, %c0, %c0] : memref<1x4x16x1xf64> + memref.store %v77, %input[%c0, %c0, %c1, %c0] : memref<1x4x16x1xf64> + memref.store %v-13, %input[%c0, %c0, %c2, %c0] : memref<1x4x16x1xf64> call @pooling_on_buffers(%input, %shape, %output) : (memref<1x4x16x1xf64>, memref<2x2xf64>, memref<1x2x4x1xi32>) -> () @@ -301,7 +307,7 @@ def conv_on_buffers(input, filter, output): test_conv_generic() -def test_pooling_builtin(): +def test_max_pooling_builtin(): with Context() as ctx, Location.unknown(): module = Module.create() f64 = F64Type.get() @@ -325,13 +331,14 @@ def pooling_on_buffers(input, shape, output): execution_engine.invoke("main", res) log("RESULT: ", res[0]) + # 77 is not selected due to the dilation 2 in the second dimension. # CHECK: RESULT: 42 -test_pooling_builtin() +test_max_pooling_builtin() -def test_pooling_generic(): +def test_max_pooling_generic(): with Context() as ctx, Location.unknown(): module = Module.create() f64 = F64Type.get() @@ -360,7 +367,73 @@ def pooling_on_buffers(input, shape, output): execution_engine.invoke("main", res) log("RESULT: ", res[0]) + # 77 is not selected due to the dilation 2 in the second dimension. # CHECK: RESULT: 42 -test_pooling_generic() +test_max_pooling_generic() + + +def test_min_pooling_builtin(): + with Context() as ctx, Location.unknown(): + module = Module.create() + f64 = F64Type.get() + i32 = IntegerType.get_signless(32) + with InsertionPoint(module.body): + + @builtin.FuncOp.from_py_func( + MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64), + MemRefType.get((1, 2, 4, 1), i32)) + def pooling_on_buffers(input, shape, output): + linalg.pooling_nhwc_min_poly( + input, shape, outs=[output], strides=[2, 4], dilations=[1, 2]) + + execution_engine = ExecutionEngine(transform(module, pooling_boiler)) + + # TODO: FFI-based solution to allow testing and printing with python code. + # Prepare arguments: one result i32. + # Arguments must be passed as pointers. + c_int_p = ctypes.c_int * 1 + res = c_int_p(-1) + execution_engine.invoke("main", res) + + log("RESULT: ", res[0]) + # CHECK: RESULT: -13 + + +test_min_pooling_builtin() + + +def test_min_pooling_generic(): + with Context() as ctx, Location.unknown(): + module = Module.create() + f64 = F64Type.get() + i32 = IntegerType.get_signless(32) + with InsertionPoint(module.body): + + @builtin.FuncOp.from_py_func( + MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64), + MemRefType.get((1, 2, 4, 1), i32)) + def pooling_on_buffers(input, shape, output): + linalg.pooling_nhwc_min_poly( + input, + shape, + outs=[output], + strides=[2, 4], + dilations=[1, 2], + emit_generic=True) + + execution_engine = ExecutionEngine(transform(module, pooling_boiler)) + + # TODO: FFI-based solution to allow testing and printing with python code. + # Prepare arguments: one result i32. + # Arguments must be passed as pointers. + c_int_p = ctypes.c_int * 1 + res = c_int_p(-1) + execution_engine.invoke("main", res) + + log("RESULT: ", res[0]) + # CHECK: RESULT: -13 + + +test_min_pooling_generic() From 21e92612c006ef8aa2744f5fff6d0a060e6524e3 Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Thu, 1 Jul 2021 20:00:02 -0400 Subject: [PATCH 544/619] [Libomptarget] Experimental Remote Plugin Fixes D97883 introduced a compile-time error in the experimental remote offloading libomptarget plugin, this patch fixes it and resolves a number of inconsistencies in the plugin as well: 1. Non-functional Asynchronous API 2. Unnecessarily verbose debug printing 3. Misc. code clean ups This is not intended to make any functional changes to the plugin. Differential Revision: https://reviews.llvm.org/D105325 --- .../plugins/remote/include/Utils.h | 35 +- .../plugins/remote/include/openmp.proto | 43 +-- .../libomptarget/plugins/remote/lib/Utils.cpp | 57 +--- .../remote/server/OffloadingServer.cpp | 3 +- .../plugins/remote/server/Server.cpp | 153 +++------ .../plugins/remote/server/Server.h | 30 +- .../plugins/remote/src/Client.cpp | 318 +++++++----------- .../libomptarget/plugins/remote/src/Client.h | 88 +++-- .../libomptarget/plugins/remote/src/rtl.cpp | 71 +--- 9 files changed, 283 insertions(+), 515 deletions(-) diff --git a/openmp/libomptarget/plugins/remote/include/Utils.h b/openmp/libomptarget/plugins/remote/include/Utils.h index ca31f03f51288..15d5eafa900f0 100644 --- a/openmp/libomptarget/plugins/remote/include/Utils.h +++ b/openmp/libomptarget/plugins/remote/include/Utils.h @@ -47,20 +47,39 @@ using openmp::libomptarget::remote::TargetBinaryDescription; using openmp::libomptarget::remote::TargetOffloadEntry; using openmp::libomptarget::remote::TargetTable; -struct RPCConfig { +struct ClientManagerConfigTy { std::vector ServerAddresses; uint64_t MaxSize; uint64_t BlockSize; - RPCConfig() { - ServerAddresses = {"0.0.0.0:50051"}; - MaxSize = 1 << 30; - BlockSize = 1 << 20; + int Timeout; + + ClientManagerConfigTy() + : ServerAddresses({"0.0.0.0:50051"}), MaxSize(1 << 30), + BlockSize(1 << 20), Timeout(5) { + // TODO: Error handle for incorrect inputs + if (const char *Env = std::getenv("LIBOMPTARGET_RPC_ADDRESS")) { + ServerAddresses.clear(); + std::string AddressString = Env; + const std::string Delimiter = ","; + + size_t Pos; + std::string Token; + while ((Pos = AddressString.find(Delimiter)) != std::string::npos) { + Token = AddressString.substr(0, Pos); + ServerAddresses.push_back(Token); + AddressString.erase(0, Pos + Delimiter.length()); + } + ServerAddresses.push_back(AddressString); + } + if (const char *Env = std::getenv("LIBOMPTARGET_RPC_ALLOCATOR_MAX")) + MaxSize = std::stoi(Env); + if (const char *Env = std::getenv("LIBOMPTARGET_RPC_BLOCK_SIZE")) + BlockSize = std::stoi(Env); + if (const char *Env1 = std::getenv("LIBOMPTARGET_RPC_LATENCY")) + Timeout = std::stoi(Env1); } }; -/// Helper function to parse common environment variables between client/server -void parseEnvironment(RPCConfig &Config); - /// Loads a target binary description into protobuf. void loadTargetBinaryDescription(const __tgt_bin_desc *Desc, TargetBinaryDescription &Request); diff --git a/openmp/libomptarget/plugins/remote/include/openmp.proto b/openmp/libomptarget/plugins/remote/include/openmp.proto index 1a940bddfe507..8c22be3afeb78 100644 --- a/openmp/libomptarget/plugins/remote/include/openmp.proto +++ b/openmp/libomptarget/plugins/remote/include/openmp.proto @@ -16,19 +16,18 @@ service RemoteOffload { rpc InitRequires(I64) returns (I32) {} rpc LoadBinary(Binary) returns (TargetTable) {} - rpc Synchronize(SynchronizeDevice) returns (I32) {} rpc DataAlloc(AllocData) returns (Pointer) {} rpc DataDelete(DeleteData) returns (I32) {} - rpc DataSubmitAsync(stream SubmitDataAsync) returns (I32) {} - rpc DataRetrieveAsync(RetrieveDataAsync) returns (stream Data) {} + rpc DataSubmit(stream SubmitData) returns (I32) {} + rpc DataRetrieve(RetrieveData) returns (stream Data) {} rpc IsDataExchangeable(DevicePair) returns (I32) {} - rpc DataExchangeAsync(ExchangeDataAsync) returns (I32) {} + rpc DataExchange(ExchangeData) returns (I32) {} - rpc RunTargetRegionAsync(TargetRegionAsync) returns (I32) {} - rpc RunTargetTeamRegionAsync(TargetTeamRegionAsync) returns (I32) {} + rpc RunTargetRegion(TargetRegion) returns (I32) {} + rpc RunTargetTeamRegion(TargetTeamRegion) returns (I32) {} } message Null {} @@ -92,32 +91,25 @@ message TargetBinaryDescription { uint64 bin_ptr = 5; } -message SynchronizeDevice { - uint64 queue_ptr = 1; - int32 device_id = 2; -} - message AllocData { uint64 size = 1; uint64 hst_ptr = 2; int32 device_id = 3; } -message SubmitDataAsync { +message SubmitData { bytes data = 1; uint64 hst_ptr = 2; uint64 tgt_ptr = 3; - uint64 queue_ptr = 4; uint64 start = 5; uint64 size = 6; int32 device_id = 7; } -message RetrieveDataAsync { +message RetrieveData { uint64 hst_ptr = 1; uint64 tgt_ptr = 2; uint64 size = 3; - uint64 queue_ptr = 4; int32 device_id = 5; } @@ -128,12 +120,11 @@ message Data { int32 ret = 4; } -message ExchangeDataAsync { +message ExchangeData { uint64 src_dev_id = 1; uint64 src_ptr = 2; uint64 dst_dev_id = 3; uint64 dst_ptr = 4; - uint64 queue_ptr = 5; uint64 size = 6; } @@ -142,23 +133,21 @@ message DeleteData { int32 device_id = 2; } -message TargetRegionAsync { +message TargetRegion { repeated uint64 tgt_args = 1; repeated int64 tgt_offsets = 2; uint64 tgt_entry_ptr = 3; - uint64 queue_ptr = 4; - int32 device_id = 5; - int32 arg_num = 6; + int32 device_id = 4; + int32 arg_num = 5; } -message TargetTeamRegionAsync { +message TargetTeamRegion { repeated uint64 tgt_args = 1; repeated int64 tgt_offsets = 2; uint64 tgt_entry_ptr = 3; uint64 loop_tripcount = 4; - uint64 queue_ptr = 5; - int32 device_id = 6; - int32 arg_num = 7; - int32 team_num = 8; - int32 thread_limit = 9; + int32 device_id = 5; + int32 arg_num = 6; + int32 team_num = 7; + int32 thread_limit = 8; } diff --git a/openmp/libomptarget/plugins/remote/lib/Utils.cpp b/openmp/libomptarget/plugins/remote/lib/Utils.cpp index 5bdd2b82a6c6f..5f1eb2950b5a6 100644 --- a/openmp/libomptarget/plugins/remote/lib/Utils.cpp +++ b/openmp/libomptarget/plugins/remote/lib/Utils.cpp @@ -14,27 +14,6 @@ #include "omptarget.h" namespace RemoteOffloading { -void parseEnvironment(RPCConfig &Config) { - // TODO: Error handle for incorrect inputs - if (const char *Env = std::getenv("LIBOMPTARGET_RPC_ADDRESS")) { - Config.ServerAddresses.clear(); - std::string AddressString = Env; - const std::string Delimiter = ","; - - size_t Pos = 0; - std::string Token; - while ((Pos = AddressString.find(Delimiter)) != std::string::npos) { - Token = AddressString.substr(0, Pos); - Config.ServerAddresses.push_back(Token); - AddressString.erase(0, Pos + Delimiter.length()); - } - Config.ServerAddresses.push_back(AddressString); - } - if (const char *Env = std::getenv("LIBOMPTARGET_RPC_ALLOCATOR_MAX")) - Config.MaxSize = std::stoi(Env); - if (const char *Env = std::getenv("LIBOMPTARGET_RPC_BLOCK_SIZE")) - Config.BlockSize = std::stoi(Env); -} void loadTargetBinaryDescription(const __tgt_bin_desc *Desc, TargetBinaryDescription &Request) { @@ -101,10 +80,12 @@ void unloadTargetBinaryDescription( // Copy Global Offload Entries __tgt_offload_entry *CurEntry = Desc->HostEntriesBegin; - for (int i = 0; i < Request->entries_size(); i++) { - copyOffloadEntry(Request->entries()[i], CurEntry); - CopiedOffloadEntries[(void *)Request->entry_ptrs()[i]] = CurEntry; + size_t I = 0; + for (auto &Entry : Request->entries()) { + copyOffloadEntry(Entry, CurEntry); + CopiedOffloadEntries[(void *)Request->entry_ptrs()[I]] = CurEntry; CurEntry++; + I++; } Desc->HostEntriesEnd = CurEntry; @@ -113,7 +94,7 @@ void unloadTargetBinaryDescription( auto ImageItr = Request->image_ptrs().begin(); for (auto Image : Request->images()) { // Copy Device Offload Entries - auto *CurEntry = Desc->HostEntriesBegin; + CurEntry = Desc->HostEntriesBegin; bool Found = false; if (!Desc->HostEntriesBegin) { @@ -121,21 +102,19 @@ void unloadTargetBinaryDescription( CurImage->EntriesEnd = nullptr; } - for (int i = 0; i < Image.entries_size(); i++) { + for (size_t I = 0; I < Image.entries_size(); I++) { auto TgtEntry = - CopiedOffloadEntries.find((void *)Request->entry_ptrs()[i]); + CopiedOffloadEntries.find((void *)Request->entry_ptrs()[I]); if (TgtEntry != CopiedOffloadEntries.end()) { if (!Found) CurImage->EntriesBegin = CurEntry; + CurImage->EntriesEnd = CurEntry + 1; Found = true; - if (Found) { - CurImage->EntriesEnd = CurEntry + 1; - } } else { Found = false; - copyOffloadEntry(Image.entries()[i], CurEntry); - CopiedOffloadEntries[(void *)(Request->entry_ptrs()[i])] = CurEntry; + copyOffloadEntry(Image.entries()[I], CurEntry); + CopiedOffloadEntries[(void *)(Request->entry_ptrs()[I])] = CurEntry; } CurEntry++; } @@ -199,10 +178,10 @@ void unloadTargetTable( Table->EntriesBegin = new __tgt_offload_entry[TableResponse.entries_size()]; auto *CurEntry = Table->EntriesBegin; - for (int i = 0; i < TableResponse.entries_size(); i++) { - copyOffloadEntry(TableResponse.entries()[i], CurEntry); + for (size_t I = 0; I < TableResponse.entries_size(); I++) { + copyOffloadEntry(TableResponse.entries()[I], CurEntry); HostToRemoteTargetTableMap[CurEntry->addr] = - (void *)TableResponse.entry_ptrs()[i]; + (void *)TableResponse.entry_ptrs()[I]; CurEntry++; } Table->EntriesEnd = CurEntry; @@ -292,10 +271,10 @@ void dump(__tgt_target_table *Table) { void dump(TargetOffloadEntry Entry) { fprintf(stderr, "Entry: "); - fprintf(stderr, " %s\n", Entry.name().c_str()); - fprintf(stderr, " %d\n", Entry.reserved()); - fprintf(stderr, " %d\n", Entry.flags()); - fprintf(stderr, " %ld\n", Entry.data().size()); + fprintf(stderr, " Name: %s\n", Entry.name().c_str()); + fprintf(stderr, " Reserved: %d\n", Entry.reserved()); + fprintf(stderr, " Flags: %d\n", Entry.flags()); + fprintf(stderr, " Size: %ld\n", Entry.data().size()); dump(static_cast(Entry.data().data()), static_cast((Entry.data().c_str() + Entry.data().size()))); } diff --git a/openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp b/openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp index ff373b828bad2..55c4c31fd5624 100644 --- a/openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp +++ b/openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp @@ -24,8 +24,7 @@ using grpc::ServerBuilder; std::promise ShutdownPromise; int main() { - RPCConfig Config; - parseEnvironment(Config); + ClientManagerConfigTy Config; RemoteOffloadImpl Service(Config.MaxSize, Config.BlockSize); diff --git a/openmp/libomptarget/plugins/remote/server/Server.cpp b/openmp/libomptarget/plugins/remote/server/Server.cpp index 5bf8200988c56..340f44a5ef187 100644 --- a/openmp/libomptarget/plugins/remote/server/Server.cpp +++ b/openmp/libomptarget/plugins/remote/server/Server.cpp @@ -24,7 +24,7 @@ extern std::promise ShutdownPromise; Status RemoteOffloadImpl::Shutdown(ServerContext *Context, const Null *Request, I32 *Reply) { - SERVER_DBG("Shutting down the server"); + SERVER_DBG("Shutting down the server") Reply->set_number(0); ShutdownPromise.set_value(); @@ -35,8 +35,6 @@ Status RemoteOffloadImpl::RegisterLib(ServerContext *Context, const TargetBinaryDescription *Description, I32 *Reply) { - SERVER_DBG("Registering library"); - auto Desc = std::make_unique<__tgt_bin_desc>(); unloadTargetBinaryDescription(Description, Desc.get(), @@ -49,15 +47,13 @@ RemoteOffloadImpl::RegisterLib(ServerContext *Context, else Descriptions[(void *)Description->bin_ptr()] = std::move(Desc); - SERVER_DBG("Registered library"); + SERVER_DBG("Registered library") Reply->set_number(0); return Status::OK; } Status RemoteOffloadImpl::UnregisterLib(ServerContext *Context, const Pointer *Request, I32 *Reply) { - SERVER_DBG("Unregistering library"); - if (Descriptions.find((void *)Request->number()) == Descriptions.end()) { Reply->set_number(1); return Status::OK; @@ -67,7 +63,7 @@ Status RemoteOffloadImpl::UnregisterLib(ServerContext *Context, freeTargetBinaryDescription(Descriptions[(void *)Request->number()].get()); Descriptions.erase((void *)Request->number()); - SERVER_DBG("Unregistered library"); + SERVER_DBG("Unregistered library") Reply->set_number(0); return Status::OK; } @@ -75,9 +71,6 @@ Status RemoteOffloadImpl::UnregisterLib(ServerContext *Context, Status RemoteOffloadImpl::IsValidBinary(ServerContext *Context, const TargetDeviceImagePtr *DeviceImage, I32 *IsValid) { - SERVER_DBG("Checking if binary (%p) is valid", - (void *)(DeviceImage->image_ptr())); - __tgt_device_image *Image = HostToRemoteDeviceImage[(void *)DeviceImage->image_ptr()]; @@ -90,14 +83,13 @@ Status RemoteOffloadImpl::IsValidBinary(ServerContext *Context, } SERVER_DBG("Checked if binary (%p) is valid", - (void *)(DeviceImage->image_ptr())); + (void *)(DeviceImage->image_ptr())) return Status::OK; } Status RemoteOffloadImpl::GetNumberOfDevices(ServerContext *Context, const Null *Null, I32 *NumberOfDevices) { - SERVER_DBG("Getting number of devices"); std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); int32_t Devices = 0; @@ -108,39 +100,32 @@ Status RemoteOffloadImpl::GetNumberOfDevices(ServerContext *Context, NumberOfDevices->set_number(Devices); - SERVER_DBG("Got number of devices"); + SERVER_DBG("Got number of devices") return Status::OK; } Status RemoteOffloadImpl::InitDevice(ServerContext *Context, const I32 *DeviceNum, I32 *Reply) { - SERVER_DBG("Initializing device %d", DeviceNum->number()); - Reply->set_number(PM->Devices[DeviceNum->number()].RTL->init_device( mapHostRTLDeviceId(DeviceNum->number()))); - SERVER_DBG("Initialized device %d", DeviceNum->number()); + SERVER_DBG("Initialized device %d", DeviceNum->number()) return Status::OK; } Status RemoteOffloadImpl::InitRequires(ServerContext *Context, const I64 *RequiresFlag, I32 *Reply) { - SERVER_DBG("Initializing requires for devices"); - for (auto &Device : PM->Devices) if (Device.RTL->init_requires) Device.RTL->init_requires(RequiresFlag->number()); Reply->set_number(RequiresFlag->number()); - SERVER_DBG("Initialized requires for devices"); + SERVER_DBG("Initialized requires for devices") return Status::OK; } Status RemoteOffloadImpl::LoadBinary(ServerContext *Context, const Binary *Binary, TargetTable *Reply) { - SERVER_DBG("Loading binary (%p) to device %d", (void *)Binary->image_ptr(), - Binary->device_id()); - __tgt_device_image *Image = HostToRemoteDeviceImage[(void *)Binary->image_ptr()]; @@ -150,32 +135,13 @@ Status RemoteOffloadImpl::LoadBinary(ServerContext *Context, loadTargetTable(Table, *Reply, Image); SERVER_DBG("Loaded binary (%p) to device %d", (void *)Binary->image_ptr(), - Binary->device_id()); - return Status::OK; -} - -Status RemoteOffloadImpl::Synchronize(ServerContext *Context, - const SynchronizeDevice *Info, - I32 *Reply) { - SERVER_DBG("Synchronizing device %d (probably won't work)", - Info->device_id()); - - void *AsyncInfo = (void *)Info->queue_ptr(); - Reply->set_number(0); - if (PM->Devices[Info->device_id()].RTL->synchronize) - Reply->set_number(PM->Devices[Info->device_id()].synchronize( - (__tgt_async_info *)AsyncInfo)); - - SERVER_DBG("Synchronized device %d", Info->device_id()); + Binary->device_id()) return Status::OK; } Status RemoteOffloadImpl::IsDataExchangeable(ServerContext *Context, const DevicePair *Request, I32 *Reply) { - SERVER_DBG("Checking if data exchangable between device %d and device %d", - Request->src_dev_id(), Request->dst_dev_id()); - Reply->set_number(-1); if (PM->Devices[mapHostRTLDeviceId(Request->src_dev_id())] .RTL->is_data_exchangable) @@ -183,40 +149,30 @@ Status RemoteOffloadImpl::IsDataExchangeable(ServerContext *Context, .RTL->is_data_exchangable(Request->src_dev_id(), Request->dst_dev_id())); - SERVER_DBG("Checked if data exchangable between device %d and device %d", - Request->src_dev_id(), Request->dst_dev_id()); + SERVER_DBG("Checked if data exchangeable between device %d and device %d", + Request->src_dev_id(), Request->dst_dev_id()) return Status::OK; } Status RemoteOffloadImpl::DataAlloc(ServerContext *Context, const AllocData *Request, Pointer *Reply) { - SERVER_DBG("Allocating %ld bytes on sevice %d", Request->size(), - Request->device_id()); - uint64_t TgtPtr = (uint64_t)PM->Devices[Request->device_id()].RTL->data_alloc( mapHostRTLDeviceId(Request->device_id()), Request->size(), - (void *)Request->hst_ptr()); + (void *)Request->hst_ptr(), TARGET_ALLOC_DEFAULT); Reply->set_number(TgtPtr); - SERVER_DBG("Allocated at " DPxMOD "", DPxPTR((void *)TgtPtr)); + SERVER_DBG("Allocated at " DPxMOD "", DPxPTR((void *)TgtPtr)) return Status::OK; } -Status RemoteOffloadImpl::DataSubmitAsync(ServerContext *Context, - ServerReader *Reader, - I32 *Reply) { - SubmitDataAsync Request; +Status RemoteOffloadImpl::DataSubmit(ServerContext *Context, + ServerReader *Reader, + I32 *Reply) { + SubmitData Request; uint8_t *HostCopy = nullptr; while (Reader->Read(&Request)) { if (Request.start() == 0 && Request.size() == Request.data().size()) { - SERVER_DBG("Submitting %lu bytes async to (%p) on device %d", - Request.data().size(), (void *)Request.tgt_ptr(), - Request.device_id()); - - SERVER_DBG(" Host Pointer Info: %p, %p", (void *)Request.hst_ptr(), - static_cast(Request.data().data())); - Reader->SendInitialMetadata(); Reply->set_number(PM->Devices[Request.device_id()].RTL->data_submit( @@ -225,7 +181,7 @@ Status RemoteOffloadImpl::DataSubmitAsync(ServerContext *Context, SERVER_DBG("Submitted %lu bytes async to (%p) on device %d", Request.data().size(), (void *)Request.tgt_ptr(), - Request.device_id()); + Request.device_id()) return Status::OK; } @@ -234,15 +190,9 @@ Status RemoteOffloadImpl::DataSubmitAsync(ServerContext *Context, Reader->SendInitialMetadata(); } - SERVER_DBG("Submitting %lu-%lu/%lu bytes async to (%p) on device %d", - Request.start(), Request.start() + Request.data().size(), - Request.size(), (void *)Request.tgt_ptr(), Request.device_id()); - memcpy((void *)((char *)HostCopy + Request.start()), Request.data().data(), Request.data().size()); } - SERVER_DBG(" Host Pointer Info: %p, %p", (void *)Request.hst_ptr(), - static_cast(Request.data().data())); Reply->set_number(PM->Devices[Request.device_id()].RTL->data_submit( mapHostRTLDeviceId(Request.device_id()), (void *)Request.tgt_ptr(), @@ -251,15 +201,16 @@ Status RemoteOffloadImpl::DataSubmitAsync(ServerContext *Context, delete[] HostCopy; SERVER_DBG("Submitted %lu bytes to (%p) on device %d", Request.data().size(), - (void *)Request.tgt_ptr(), Request.device_id()); + (void *)Request.tgt_ptr(), Request.device_id()) return Status::OK; } -Status RemoteOffloadImpl::DataRetrieveAsync(ServerContext *Context, - const RetrieveDataAsync *Request, - ServerWriter *Writer) { +Status RemoteOffloadImpl::DataRetrieve(ServerContext *Context, + const RetrieveData *Request, + ServerWriter *Writer) { auto HstPtr = std::make_unique(Request->size()); + auto Ret = PM->Devices[Request->device_id()].RTL->data_retrieve( mapHostRTLDeviceId(Request->device_id()), HstPtr.get(), (void *)Request->tgt_ptr(), Request->size()); @@ -277,17 +228,13 @@ Status RemoteOffloadImpl::DataRetrieveAsync(ServerContext *Context, Reply->set_data((char *)HstPtr.get() + Start, End - Start); Reply->set_ret(Ret); - SERVER_DBG("Retrieving %lu-%lu/%lu bytes from (%p) on device %d", Start, - End, Request->size(), (void *)Request->tgt_ptr(), - mapHostRTLDeviceId(Request->device_id())); - if (!Writer->Write(*Reply)) { - CLIENT_DBG("Broken stream when submitting data"); + CLIENT_DBG("Broken stream when submitting data") } SERVER_DBG("Retrieved %lu-%lu/%lu bytes from (%p) on device %d", Start, End, Request->size(), (void *)Request->tgt_ptr(), - mapHostRTLDeviceId(Request->device_id())); + mapHostRTLDeviceId(Request->device_id())) Start += BlockSize; End += BlockSize; @@ -297,10 +244,6 @@ Status RemoteOffloadImpl::DataRetrieveAsync(ServerContext *Context, } else { auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); - SERVER_DBG("Retrieve %lu bytes from (%p) on device %d", Request->size(), - (void *)Request->tgt_ptr(), - mapHostRTLDeviceId(Request->device_id())); - Reply->set_start(0); Reply->set_size(Request->size()); Reply->set_data((char *)HstPtr.get(), Request->size()); @@ -308,7 +251,7 @@ Status RemoteOffloadImpl::DataRetrieveAsync(ServerContext *Context, SERVER_DBG("Retrieved %lu bytes from (%p) on device %d", Request->size(), (void *)Request->tgt_ptr(), - mapHostRTLDeviceId(Request->device_id())); + mapHostRTLDeviceId(Request->device_id())) Writer->WriteLast(*Reply, WriteOptions()); } @@ -316,16 +259,9 @@ Status RemoteOffloadImpl::DataRetrieveAsync(ServerContext *Context, return Status::OK; } -Status RemoteOffloadImpl::DataExchangeAsync(ServerContext *Context, - const ExchangeDataAsync *Request, - I32 *Reply) { - SERVER_DBG( - "Exchanging data asynchronously from device %d (%p) to device %d (%p) of " - "size %lu", - mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(), - mapHostRTLDeviceId(Request->dst_dev_id()), (void *)Request->dst_ptr(), - Request->size()); - +Status RemoteOffloadImpl::DataExchange(ServerContext *Context, + const ExchangeData *Request, + I32 *Reply) { if (PM->Devices[Request->src_dev_id()].RTL->data_exchange) { int32_t Ret = PM->Devices[Request->src_dev_id()].RTL->data_exchange( mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(), @@ -340,30 +276,24 @@ Status RemoteOffloadImpl::DataExchangeAsync(ServerContext *Context, "size %lu", mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(), mapHostRTLDeviceId(Request->dst_dev_id()), (void *)Request->dst_ptr(), - Request->size()); + Request->size()) return Status::OK; } Status RemoteOffloadImpl::DataDelete(ServerContext *Context, const DeleteData *Request, I32 *Reply) { - SERVER_DBG("Deleting data from (%p) on device %d", (void *)Request->tgt_ptr(), - mapHostRTLDeviceId(Request->device_id())); - auto Ret = PM->Devices[Request->device_id()].RTL->data_delete( mapHostRTLDeviceId(Request->device_id()), (void *)Request->tgt_ptr()); Reply->set_number(Ret); SERVER_DBG("Deleted data from (%p) on device %d", (void *)Request->tgt_ptr(), - mapHostRTLDeviceId(Request->device_id())); + mapHostRTLDeviceId(Request->device_id())) return Status::OK; } -Status RemoteOffloadImpl::RunTargetRegionAsync(ServerContext *Context, - const TargetRegionAsync *Request, - I32 *Reply) { - SERVER_DBG("Running TargetRegionAsync on device %d with %d args", - mapHostRTLDeviceId(Request->device_id()), Request->arg_num()); - +Status RemoteOffloadImpl::RunTargetRegion(ServerContext *Context, + const TargetRegion *Request, + I32 *Reply) { std::vector TgtArgs(Request->arg_num()); for (auto I = 0; I < Request->arg_num(); I++) TgtArgs[I] = (uint64_t)Request->tgt_args()[I]; @@ -381,16 +311,14 @@ Status RemoteOffloadImpl::RunTargetRegionAsync(ServerContext *Context, Reply->set_number(Ret); - SERVER_DBG("Ran TargetRegionAsync on device %d with %d args", - mapHostRTLDeviceId(Request->device_id()), Request->arg_num()); + SERVER_DBG("Ran TargetRegion on device %d with %d args", + mapHostRTLDeviceId(Request->device_id()), Request->arg_num()) return Status::OK; } -Status RemoteOffloadImpl::RunTargetTeamRegionAsync( - ServerContext *Context, const TargetTeamRegionAsync *Request, I32 *Reply) { - SERVER_DBG("Running TargetTeamRegionAsync on device %d with %d args", - mapHostRTLDeviceId(Request->device_id()), Request->arg_num()); - +Status RemoteOffloadImpl::RunTargetTeamRegion(ServerContext *Context, + const TargetTeamRegion *Request, + I32 *Reply) { std::vector TgtArgs(Request->arg_num()); for (auto I = 0; I < Request->arg_num(); I++) TgtArgs[I] = (uint64_t)Request->tgt_args()[I]; @@ -401,6 +329,7 @@ Status RemoteOffloadImpl::RunTargetTeamRegionAsync( TgtOffsets[I] = (ptrdiff_t)*TgtOffsetItr; void *TgtEntryPtr = ((__tgt_offload_entry *)Request->tgt_entry_ptr())->addr; + int32_t Ret = PM->Devices[Request->device_id()].RTL->run_team_region( mapHostRTLDeviceId(Request->device_id()), TgtEntryPtr, (void **)TgtArgs.data(), TgtOffsets.data(), Request->arg_num(), @@ -408,8 +337,8 @@ Status RemoteOffloadImpl::RunTargetTeamRegionAsync( Reply->set_number(Ret); - SERVER_DBG("Ran TargetTeamRegionAsync on device %d with %d args", - mapHostRTLDeviceId(Request->device_id()), Request->arg_num()); + SERVER_DBG("Ran TargetTeamRegion on device %d with %d args", + mapHostRTLDeviceId(Request->device_id()), Request->arg_num()) return Status::OK; } diff --git a/openmp/libomptarget/plugins/remote/server/Server.h b/openmp/libomptarget/plugins/remote/server/Server.h index 3d20b9f177172..5414ad13913c5 100644 --- a/openmp/libomptarget/plugins/remote/server/Server.h +++ b/openmp/libomptarget/plugins/remote/server/Server.h @@ -40,8 +40,6 @@ class RemoteOffloadImpl final : public RemoteOffload::Service { std::unordered_map HostToRemoteDeviceImage; - std::unordered_map - HostToRemoteOffloadEntry; std::unordered_map> Descriptions; __tgt_target_table *Table = nullptr; @@ -80,35 +78,29 @@ class RemoteOffloadImpl final : public RemoteOffload::Service { Status LoadBinary(ServerContext *Context, const Binary *Binary, TargetTable *Reply) override; - Status Synchronize(ServerContext *Context, const SynchronizeDevice *Info, - I32 *Reply) override; Status IsDataExchangeable(ServerContext *Context, const DevicePair *Request, I32 *Reply) override; Status DataAlloc(ServerContext *Context, const AllocData *Request, Pointer *Reply) override; - Status DataSubmitAsync(ServerContext *Context, - ServerReader *Reader, - I32 *Reply) override; - Status DataRetrieveAsync(ServerContext *Context, - const RetrieveDataAsync *Request, - ServerWriter *Writer) override; + Status DataSubmit(ServerContext *Context, ServerReader *Reader, + I32 *Reply) override; + Status DataRetrieve(ServerContext *Context, const RetrieveData *Request, + ServerWriter *Writer) override; - Status DataExchangeAsync(ServerContext *Context, - const ExchangeDataAsync *Request, - I32 *Reply) override; + Status DataExchange(ServerContext *Context, const ExchangeData *Request, + I32 *Reply) override; Status DataDelete(ServerContext *Context, const DeleteData *Request, I32 *Reply) override; - Status RunTargetRegionAsync(ServerContext *Context, - const TargetRegionAsync *Request, - I32 *Reply) override; + Status RunTargetRegion(ServerContext *Context, const TargetRegion *Request, + I32 *Reply) override; - Status RunTargetTeamRegionAsync(ServerContext *Context, - const TargetTeamRegionAsync *Request, - I32 *Reply) override; + Status RunTargetTeamRegion(ServerContext *Context, + const TargetTeamRegion *Request, + I32 *Reply) override; }; #endif diff --git a/openmp/libomptarget/plugins/remote/src/Client.cpp b/openmp/libomptarget/plugins/remote/src/Client.cpp index 49b595c6e7770..917acbba03478 100644 --- a/openmp/libomptarget/plugins/remote/src/Client.cpp +++ b/openmp/libomptarget/plugins/remote/src/Client.cpp @@ -24,31 +24,30 @@ using grpc::ClientWriter; using grpc::Status; template -auto RemoteOffloadClient::remoteCall(Fn1 Preprocess, Fn2 Postprocess, - TReturn ErrorValue, bool Timeout) { +auto RemoteOffloadClient::remoteCall(Fn1 Preprocessor, Fn2 Postprocessor, + TReturn ErrorValue, bool CanTimeOut) { ArenaAllocatorLock->lock(); if (Arena->SpaceAllocated() >= MaxSize) Arena->Reset(); ArenaAllocatorLock->unlock(); ClientContext Context; - if (Timeout) { + if (CanTimeOut) { auto Deadline = std::chrono::system_clock::now() + std::chrono::seconds(Timeout); Context.set_deadline(Deadline); } Status RPCStatus; - auto Reply = Preprocess(RPCStatus, Context); + auto Reply = Preprocessor(RPCStatus, Context); - // TODO: Error handle more appropriately if (!RPCStatus.ok()) { - CLIENT_DBG("%s", RPCStatus.error_message().c_str()); + CLIENT_DBG("%s", RPCStatus.error_message().c_str()) } else { - return Postprocess(Reply); + return Postprocessor(Reply); } - CLIENT_DBG("Failed"); + CLIENT_DBG("Failed") return ErrorValue; } @@ -56,7 +55,7 @@ int32_t RemoteOffloadClient::shutdown(void) { ClientContext Context; Null Request; I32 Reply; - CLIENT_DBG("Shutting down server."); + CLIENT_DBG("Shutting down server.") auto Status = Stub->Shutdown(&Context, Request, &Reply); if (Status.ok()) return Reply.number(); @@ -65,7 +64,7 @@ int32_t RemoteOffloadClient::shutdown(void) { int32_t RemoteOffloadClient::registerLib(__tgt_bin_desc *Desc) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Request = protobuf::Arena::CreateMessage( Arena.get()); @@ -73,14 +72,13 @@ int32_t RemoteOffloadClient::registerLib(__tgt_bin_desc *Desc) { loadTargetBinaryDescription(Desc, *Request); Request->set_bin_ptr((uint64_t)Desc); - CLIENT_DBG("Registering library"); RPCStatus = Stub->RegisterLib(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](const auto &Reply) { if (Reply->number() == 0) { - CLIENT_DBG("Registered library"); + CLIENT_DBG("Registered library") return 0; } return 1; @@ -90,24 +88,23 @@ int32_t RemoteOffloadClient::registerLib(__tgt_bin_desc *Desc) { int32_t RemoteOffloadClient::unregisterLib(__tgt_bin_desc *Desc) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Request = protobuf::Arena::CreateMessage(Arena.get()); auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); Request->set_number((uint64_t)Desc); - CLIENT_DBG("Unregistering library"); RPCStatus = Stub->UnregisterLib(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](const auto &Reply) { if (Reply->number() == 0) { - CLIENT_DBG("Unregistered library"); + CLIENT_DBG("Unregistered library") return 0; } - CLIENT_DBG("Failed to unregister library"); + CLIENT_DBG("Failed to unregister library") return 1; }, /* Error Value */ 1); @@ -115,7 +112,7 @@ int32_t RemoteOffloadClient::unregisterLib(__tgt_bin_desc *Desc) { int32_t RemoteOffloadClient::isValidBinary(__tgt_device_image *Image) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Request = protobuf::Arena::CreateMessage(Arena.get()); @@ -127,16 +124,15 @@ int32_t RemoteOffloadClient::isValidBinary(__tgt_device_image *Image) { while (EntryItr != Image->EntriesEnd) Request->add_entry_ptrs((uint64_t)EntryItr++); - CLIENT_DBG("Validating binary"); RPCStatus = Stub->IsValidBinary(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](const auto &Reply) { if (Reply->number()) { - CLIENT_DBG("Validated binary"); + CLIENT_DBG("Validated binary") } else { - CLIENT_DBG("Could not validate binary"); + CLIENT_DBG("Could not validate binary") } return Reply->number(); }, @@ -145,22 +141,21 @@ int32_t RemoteOffloadClient::isValidBinary(__tgt_device_image *Image) { int32_t RemoteOffloadClient::getNumberOfDevices() { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](Status &RPCStatus, ClientContext &Context) { auto *Request = protobuf::Arena::CreateMessage(Arena.get()); auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); - CLIENT_DBG("Getting number of devices"); RPCStatus = Stub->GetNumberOfDevices(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](const auto &Reply) { if (Reply->number()) { - CLIENT_DBG("Found %d devices", Reply->number()); + CLIENT_DBG("Found %d devices", Reply->number()) } else { - CLIENT_DBG("Could not get the number of devices"); + CLIENT_DBG("Could not get the number of devices") } return Reply->number(); }, @@ -169,24 +164,23 @@ int32_t RemoteOffloadClient::getNumberOfDevices() { int32_t RemoteOffloadClient::initDevice(int32_t DeviceId) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Request = protobuf::Arena::CreateMessage(Arena.get()); auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); Request->set_number(DeviceId); - CLIENT_DBG("Initializing device %d", DeviceId); RPCStatus = Stub->InitDevice(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](const auto &Reply) { if (!Reply->number()) { - CLIENT_DBG("Initialized device %d", DeviceId); + CLIENT_DBG("Initialized device %d", DeviceId) } else { - CLIENT_DBG("Could not initialize device %d", DeviceId); + CLIENT_DBG("Could not initialize device %d", DeviceId) } return Reply->number(); }, @@ -195,21 +189,20 @@ int32_t RemoteOffloadClient::initDevice(int32_t DeviceId) { int32_t RemoteOffloadClient::initRequires(int64_t RequiresFlags) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Request = protobuf::Arena::CreateMessage(Arena.get()); auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); Request->set_number(RequiresFlags); - CLIENT_DBG("Initializing requires"); RPCStatus = Stub->InitRequires(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](const auto &Reply) { if (Reply->number()) { - CLIENT_DBG("Initialized requires"); + CLIENT_DBG("Initialized requires") } else { - CLIENT_DBG("Could not initialize requires"); + CLIENT_DBG("Could not initialize requires") } return Reply->number(); }, @@ -219,7 +212,7 @@ int32_t RemoteOffloadClient::initRequires(int64_t RequiresFlags) { __tgt_target_table *RemoteOffloadClient::loadBinary(int32_t DeviceId, __tgt_device_image *Image) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *ImageMessage = protobuf::Arena::CreateMessage(Arena.get()); @@ -227,14 +220,13 @@ __tgt_target_table *RemoteOffloadClient::loadBinary(int32_t DeviceId, ImageMessage->set_image_ptr((uint64_t)Image->ImageStart); ImageMessage->set_device_id(DeviceId); - CLIENT_DBG("Loading Image %p to device %d", Image, DeviceId); RPCStatus = Stub->LoadBinary(&Context, *ImageMessage, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (Reply->entries_size() == 0) { - CLIENT_DBG("Could not load image %p onto device %d", Image, DeviceId); + CLIENT_DBG("Could not load image %p onto device %d", Image, DeviceId) return (__tgt_target_table *)nullptr; } DevicesToTables[DeviceId] = std::make_unique<__tgt_target_table>(); @@ -242,46 +234,18 @@ __tgt_target_table *RemoteOffloadClient::loadBinary(int32_t DeviceId, RemoteEntries[DeviceId]); CLIENT_DBG("Loaded Image %p to device %d with %d entries", Image, - DeviceId, Reply->entries_size()); + DeviceId, Reply->entries_size()) return DevicesToTables[DeviceId].get(); }, /* Error Value */ (__tgt_target_table *)nullptr, - /* Timeout */ false); -} - -int64_t RemoteOffloadClient::synchronize(int32_t DeviceId, - __tgt_async_info *AsyncInfo) { - return remoteCall( - /* Preprocess */ - [&](auto &RPCStatus, auto &Context) { - auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); - auto *Info = - protobuf::Arena::CreateMessage(Arena.get()); - - Info->set_device_id(DeviceId); - Info->set_queue_ptr((uint64_t)AsyncInfo); - - CLIENT_DBG("Synchronizing device %d", DeviceId); - RPCStatus = Stub->Synchronize(&Context, *Info, Reply); - return Reply; - }, - /* Postprocess */ - [&](auto &Reply) { - if (Reply->number()) { - CLIENT_DBG("Synchronized device %d", DeviceId); - } else { - CLIENT_DBG("Could not synchronize device %d", DeviceId); - } - return Reply->number(); - }, - /* Error Value */ -1); + /* CanTimeOut */ false); } int32_t RemoteOffloadClient::isDataExchangeable(int32_t SrcDevId, int32_t DstDevId) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Request = protobuf::Arena::CreateMessage(Arena.get()); auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); @@ -289,18 +253,16 @@ int32_t RemoteOffloadClient::isDataExchangeable(int32_t SrcDevId, Request->set_src_dev_id(SrcDevId); Request->set_dst_dev_id(DstDevId); - CLIENT_DBG("Asking if data is exchangeable between %d, %d", SrcDevId, - DstDevId); RPCStatus = Stub->IsDataExchangeable(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (Reply->number()) { - CLIENT_DBG("Data is exchangeable between %d, %d", SrcDevId, DstDevId); + CLIENT_DBG("Data is exchangeable between %d, %d", SrcDevId, DstDevId) } else { CLIENT_DBG("Data is not exchangeable between %d, %d", SrcDevId, - DstDevId); + DstDevId) } return Reply->number(); }, @@ -310,7 +272,7 @@ int32_t RemoteOffloadClient::isDataExchangeable(int32_t SrcDevId, void *RemoteOffloadClient::dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); auto *Request = protobuf::Arena::CreateMessage(Arena.get()); @@ -319,40 +281,38 @@ void *RemoteOffloadClient::dataAlloc(int32_t DeviceId, int64_t Size, Request->set_size(Size); Request->set_hst_ptr((uint64_t)HstPtr); - CLIENT_DBG("Allocating %ld bytes on device %d", Size, DeviceId); RPCStatus = Stub->DataAlloc(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (Reply->number()) { CLIENT_DBG("Allocated %ld bytes on device %d at %p", Size, DeviceId, - (void *)Reply->number()); + (void *)Reply->number()) } else { CLIENT_DBG("Could not allocate %ld bytes on device %d at %p", Size, - DeviceId, (void *)Reply->number()); + DeviceId, (void *)Reply->number()) } return (void *)Reply->number(); }, /* Error Value */ (void *)nullptr); } -int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, - void *HstPtr, int64_t Size, - __tgt_async_info *AsyncInfo) { +int32_t RemoteOffloadClient::dataSubmit(int32_t DeviceId, void *TgtPtr, + void *HstPtr, int64_t Size) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); - std::unique_ptr> Writer( - Stub->DataSubmitAsync(&Context, Reply)); + std::unique_ptr> Writer( + Stub->DataSubmit(&Context, Reply)); if (Size > BlockSize) { int64_t Start = 0, End = BlockSize; for (auto I = 0; I < ceil((float)Size / BlockSize); I++) { auto *Request = - protobuf::Arena::CreateMessage(Arena.get()); + protobuf::Arena::CreateMessage(Arena.get()); Request->set_device_id(DeviceId); Request->set_data((char *)HstPtr + Start, End - Start); @@ -360,13 +320,9 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, Request->set_tgt_ptr((uint64_t)TgtPtr); Request->set_start(Start); Request->set_size(Size); - Request->set_queue_ptr((uint64_t)AsyncInfo); - - CLIENT_DBG("Submitting %ld-%ld/%ld bytes async on device %d at %p", - Start, End, Size, DeviceId, TgtPtr) if (!Writer->Write(*Request)) { - CLIENT_DBG("Broken stream when submitting data"); + CLIENT_DBG("Broken stream when submitting data") Reply->set_number(0); return Reply; } @@ -378,7 +334,7 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, } } else { auto *Request = - protobuf::Arena::CreateMessage(Arena.get()); + protobuf::Arena::CreateMessage(Arena.get()); Request->set_device_id(DeviceId); Request->set_data(HstPtr, Size); @@ -387,10 +343,8 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, Request->set_start(0); Request->set_size(Size); - CLIENT_DBG("Submitting %ld bytes async on device %d at %p", Size, - DeviceId, TgtPtr) if (!Writer->Write(*Request)) { - CLIENT_DBG("Broken stream when submitting data"); + CLIENT_DBG("Broken stream when submitting data") Reply->set_number(0); return Reply; } @@ -401,11 +355,11 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (!Reply->number()) { - CLIENT_DBG("Async submitted %ld bytes on device %d at %p", Size, - DeviceId, TgtPtr) + CLIENT_DBG(" submitted %ld bytes on device %d at %p", Size, DeviceId, + TgtPtr) } else { CLIENT_DBG("Could not async submit %ld bytes on device %d at %p", Size, DeviceId, TgtPtr) @@ -413,27 +367,25 @@ int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, return Reply->number(); }, /* Error Value */ -1, - /* Timeout */ false); + /* CanTimeOut */ false); } -int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr, - void *TgtPtr, int64_t Size, - __tgt_async_info *AsyncInfo) { +int32_t RemoteOffloadClient::dataRetrieve(int32_t DeviceId, void *HstPtr, + void *TgtPtr, int64_t Size) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Request = - protobuf::Arena::CreateMessage(Arena.get()); + protobuf::Arena::CreateMessage(Arena.get()); Request->set_device_id(DeviceId); Request->set_size(Size); Request->set_hst_ptr((int64_t)HstPtr); Request->set_tgt_ptr((int64_t)TgtPtr); - Request->set_queue_ptr((uint64_t)AsyncInfo); auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); std::unique_ptr> Reader( - Stub->DataRetrieveAsync(&Context, *Request)); + Stub->DataRetrieve(&Context, *Request)); Reader->WaitForInitialMetadata(); while (Reader->Read(Reply)) { if (Reply->ret()) { @@ -444,18 +396,10 @@ int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr, } if (Reply->start() == 0 && Reply->size() == Reply->data().size()) { - CLIENT_DBG("Async retrieving %ld bytes on device %d at %p for %p", - Size, DeviceId, TgtPtr, HstPtr) - memcpy(HstPtr, Reply->data().data(), Reply->data().size()); return Reply; } - CLIENT_DBG("Retrieving %lu-%lu/%lu bytes async from (%p) to (%p) " - "on Device %d", - Reply->start(), Reply->start() + Reply->data().size(), - Reply->size(), (void *)Request->tgt_ptr(), HstPtr, - Request->device_id()); memcpy((void *)((char *)HstPtr + Reply->start()), Reply->data().data(), Reply->data().size()); @@ -464,54 +408,49 @@ int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr, return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (!Reply->ret()) { - CLIENT_DBG("Async retrieve %ld bytes on Device %d", Size, DeviceId); + CLIENT_DBG("Retrieved %ld bytes on Device %d", Size, DeviceId) } else { CLIENT_DBG("Could not async retrieve %ld bytes on Device %d", Size, - DeviceId); + DeviceId) } return Reply->ret(); }, /* Error Value */ -1, - /* Timeout */ false); + /* CanTimeOut */ false); } -int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, - int32_t DstDevId, void *DstPtr, - int64_t Size, - __tgt_async_info *AsyncInfo) { +int32_t RemoteOffloadClient::dataExchange(int32_t SrcDevId, void *SrcPtr, + int32_t DstDevId, void *DstPtr, + int64_t Size) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); auto *Request = - protobuf::Arena::CreateMessage(Arena.get()); + protobuf::Arena::CreateMessage(Arena.get()); Request->set_src_dev_id(SrcDevId); Request->set_src_ptr((uint64_t)SrcPtr); Request->set_dst_dev_id(DstDevId); Request->set_dst_ptr((uint64_t)DstPtr); Request->set_size(Size); - Request->set_queue_ptr((uint64_t)AsyncInfo); - CLIENT_DBG( - "Exchanging %ld bytes on device %d at %p for %p on device %d", Size, - SrcDevId, SrcPtr, DstPtr, DstDevId); - RPCStatus = Stub->DataExchangeAsync(&Context, *Request, Reply); + RPCStatus = Stub->DataExchange(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (Reply->number()) { CLIENT_DBG( "Exchanged %ld bytes on device %d at %p for %p on device %d", - Size, SrcDevId, SrcPtr, DstPtr, DstDevId); + Size, SrcDevId, SrcPtr, DstPtr, DstDevId) } else { CLIENT_DBG("Could not exchange %ld bytes on device %d at %p for %p " "on device %d", - Size, SrcDevId, SrcPtr, DstPtr, DstDevId); + Size, SrcDevId, SrcPtr, DstPtr, DstDevId) } return Reply->number(); }, @@ -520,7 +459,7 @@ int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t RemoteOffloadClient::dataDelete(int32_t DeviceId, void *TgtPtr) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); auto *Request = protobuf::Arena::CreateMessage(Arena.get()); @@ -528,11 +467,10 @@ int32_t RemoteOffloadClient::dataDelete(int32_t DeviceId, void *TgtPtr) { Request->set_device_id(DeviceId); Request->set_tgt_ptr((uint64_t)TgtPtr); - CLIENT_DBG("Deleting data at %p on device %d", TgtPtr, DeviceId) RPCStatus = Stub->DataDelete(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (!Reply->number()) { CLIENT_DBG("Deleted data at %p on device %d", TgtPtr, DeviceId) @@ -545,18 +483,18 @@ int32_t RemoteOffloadClient::dataDelete(int32_t DeviceId, void *TgtPtr) { /* Error Value */ -1); } -int32_t RemoteOffloadClient::runTargetRegionAsync( - int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, __tgt_async_info *AsyncInfo) { +int32_t RemoteOffloadClient::runTargetRegion(int32_t DeviceId, + void *TgtEntryPtr, void **TgtArgs, + ptrdiff_t *TgtOffsets, + int32_t ArgNum) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); auto *Request = - protobuf::Arena::CreateMessage(Arena.get()); + protobuf::Arena::CreateMessage(Arena.get()); Request->set_device_id(DeviceId); - Request->set_queue_ptr((uint64_t)AsyncInfo); Request->set_tgt_entry_ptr( (uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]); @@ -571,37 +509,34 @@ int32_t RemoteOffloadClient::runTargetRegionAsync( Request->set_arg_num(ArgNum); - CLIENT_DBG("Running target region async on device %d", DeviceId); - RPCStatus = Stub->RunTargetRegionAsync(&Context, *Request, Reply); + RPCStatus = Stub->RunTargetRegion(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (!Reply->number()) { - CLIENT_DBG("Ran target region async on device %d", DeviceId); + CLIENT_DBG("Ran target region async on device %d", DeviceId) } else { - CLIENT_DBG("Could not run target region async on device %d", - DeviceId); + CLIENT_DBG("Could not run target region async on device %d", DeviceId) } return Reply->number(); }, /* Error Value */ -1, - /* Timeout */ false); + /* CanTimeOut */ false); } -int32_t RemoteOffloadClient::runTargetTeamRegionAsync( +int32_t RemoteOffloadClient::runTargetTeamRegion( int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, - uint64_t LoopTripcount, __tgt_async_info *AsyncInfo) { + uint64_t LoopTripcount) { return remoteCall( - /* Preprocess */ + /* Preprocessor */ [&](auto &RPCStatus, auto &Context) { auto *Reply = protobuf::Arena::CreateMessage(Arena.get()); auto *Request = - protobuf::Arena::CreateMessage(Arena.get()); + protobuf::Arena::CreateMessage(Arena.get()); Request->set_device_id(DeviceId); - Request->set_queue_ptr((uint64_t)AsyncInfo); Request->set_tgt_entry_ptr( (uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]); @@ -620,25 +555,23 @@ int32_t RemoteOffloadClient::runTargetTeamRegionAsync( Request->set_thread_limit(ThreadLimit); Request->set_loop_tripcount(LoopTripcount); - CLIENT_DBG("Running target team region async on device %d", DeviceId); - RPCStatus = Stub->RunTargetTeamRegionAsync(&Context, *Request, Reply); + RPCStatus = Stub->RunTargetTeamRegion(&Context, *Request, Reply); return Reply; }, - /* Postprocess */ + /* Postprocessor */ [&](auto &Reply) { if (!Reply->number()) { - CLIENT_DBG("Ran target team region async on device %d", DeviceId); + CLIENT_DBG("Ran target team region async on device %d", DeviceId) } else { CLIENT_DBG("Could not run target team region async on device %d", - DeviceId); + DeviceId) } return Reply->number(); }, /* Error Value */ -1, - /* Timeout */ false); + /* CanTimeOut */ false); } -// TODO: Better error handling for the next three functions int32_t RemoteClientManager::shutdown(void) { int32_t Ret = 0; for (auto &Client : Clients) @@ -684,7 +617,7 @@ int32_t RemoteClientManager::getNumberOfDevices() { std::pair RemoteClientManager::mapDeviceId(int32_t DeviceId) { for (size_t ClientIdx = 0; ClientIdx < Devices.size(); ClientIdx++) { - if (!(DeviceId >= Devices[ClientIdx])) + if (DeviceId < Devices[ClientIdx]) return {ClientIdx, DeviceId}; DeviceId -= Devices[ClientIdx]; } @@ -711,13 +644,6 @@ __tgt_target_table *RemoteClientManager::loadBinary(int32_t DeviceId, return Clients[ClientIdx].loadBinary(DeviceIdx, Image); } -int64_t RemoteClientManager::synchronize(int32_t DeviceId, - __tgt_async_info *AsyncInfo) { - int32_t ClientIdx, DeviceIdx; - std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); - return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfo); -} - int32_t RemoteClientManager::isDataExchangeable(int32_t SrcDevId, int32_t DstDevId) { int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx; @@ -739,51 +665,47 @@ int32_t RemoteClientManager::dataDelete(int32_t DeviceId, void *TgtPtr) { return Clients[ClientIdx].dataDelete(DeviceIdx, TgtPtr); } -int32_t RemoteClientManager::dataSubmitAsync(int32_t DeviceId, void *TgtPtr, - void *HstPtr, int64_t Size, - __tgt_async_info *AsyncInfo) { +int32_t RemoteClientManager::dataSubmit(int32_t DeviceId, void *TgtPtr, + void *HstPtr, int64_t Size) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); - return Clients[ClientIdx].dataSubmitAsync(DeviceIdx, TgtPtr, HstPtr, Size, - AsyncInfo); + return Clients[ClientIdx].dataSubmit(DeviceIdx, TgtPtr, HstPtr, Size); } -int32_t RemoteClientManager::dataRetrieveAsync(int32_t DeviceId, void *HstPtr, - void *TgtPtr, int64_t Size, - __tgt_async_info *AsyncInfo) { +int32_t RemoteClientManager::dataRetrieve(int32_t DeviceId, void *HstPtr, + void *TgtPtr, int64_t Size) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); - return Clients[ClientIdx].dataRetrieveAsync(DeviceIdx, HstPtr, TgtPtr, Size, - AsyncInfo); + return Clients[ClientIdx].dataRetrieve(DeviceIdx, HstPtr, TgtPtr, Size); } -int32_t RemoteClientManager::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, - int32_t DstDevId, void *DstPtr, - int64_t Size, - __tgt_async_info *AsyncInfo) { +int32_t RemoteClientManager::dataExchange(int32_t SrcDevId, void *SrcPtr, + int32_t DstDevId, void *DstPtr, + int64_t Size) { int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx; std::tie(SrcClientIdx, SrcDeviceIdx) = mapDeviceId(SrcDevId); std::tie(DstClientIdx, DstDeviceIdx) = mapDeviceId(DstDevId); - return Clients[SrcClientIdx].dataExchangeAsync( - SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfo); + return Clients[SrcClientIdx].dataExchange(SrcDeviceIdx, SrcPtr, DstDeviceIdx, + DstPtr, Size); } -int32_t RemoteClientManager::runTargetRegionAsync( - int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, __tgt_async_info *AsyncInfo) { +int32_t RemoteClientManager::runTargetRegion(int32_t DeviceId, + void *TgtEntryPtr, void **TgtArgs, + ptrdiff_t *TgtOffsets, + int32_t ArgNum) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); - return Clients[ClientIdx].runTargetRegionAsync( - DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfo); + return Clients[ClientIdx].runTargetRegion(DeviceIdx, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum); } -int32_t RemoteClientManager::runTargetTeamRegionAsync( +int32_t RemoteClientManager::runTargetTeamRegion( int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, - uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) { + uint64_t LoopTripCount) { int32_t ClientIdx, DeviceIdx; std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId); - return Clients[ClientIdx].runTargetTeamRegionAsync( - DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit, - LoopTripCount, AsyncInfo); + return Clients[ClientIdx].runTargetTeamRegion(DeviceIdx, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, TeamNum, + ThreadLimit, LoopTripCount); } diff --git a/openmp/libomptarget/plugins/remote/src/Client.h b/openmp/libomptarget/plugins/remote/src/Client.h index e29d6eb1c6a6b..5f4006926e934 100644 --- a/openmp/libomptarget/plugins/remote/src/Client.h +++ b/openmp/libomptarget/plugins/remote/src/Client.h @@ -30,11 +30,10 @@ using namespace RemoteOffloading; using namespace google; class RemoteOffloadClient { - const int Timeout; - int DebugLevel; - uint64_t MaxSize; - int64_t BlockSize; + const int Timeout; + const uint64_t MaxSize; + const int64_t BlockSize; std::unique_ptr Stub; std::unique_ptr Arena; @@ -45,8 +44,8 @@ class RemoteOffloadClient { std::map> DevicesToTables; template - auto remoteCall(Fn1 Preprocess, Fn2 Postprocess, TReturn ErrorValue, - bool Timeout = true); + auto remoteCall(Fn1 Preprocessor, Fn2 Postprocessor, TReturn ErrorValue, + bool CanTimeOut = true); public: RemoteOffloadClient(std::shared_ptr Channel, int Timeout, @@ -77,35 +76,29 @@ class RemoteOffloadClient { int32_t initRequires(int64_t RequiresFlags); __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image); - int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo); - int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId); void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr); int32_t dataDelete(int32_t DeviceId, void *TgtPtr); - int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr, - int64_t Size, __tgt_async_info *AsyncInfo); - int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr, - int64_t Size, __tgt_async_info *AsyncInfo); + int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size); + int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size); - int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, - void *DstPtr, int64_t Size, - __tgt_async_info *AsyncInfo); - - int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr, - void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, __tgt_async_info *AsyncInfo); - - int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr, - void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, int32_t TeamNum, - int32_t ThreadLimit, uint64_t LoopTripCount, - __tgt_async_info *AsyncInfo); + int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId); + int32_t dataExchange(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, + void *DstPtr, int64_t Size); + + int32_t runTargetRegion(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, + ptrdiff_t *TgtOffsets, int32_t ArgNum); + int32_t runTargetTeamRegion(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, + int32_t ThreadLimit, uint64_t LoopTripCount); }; class RemoteClientManager { private: - std::vector Addresses; std::vector Clients; std::vector Devices; @@ -113,16 +106,16 @@ class RemoteClientManager { int DebugLevel; public: - RemoteClientManager(std::vector Addresses, int Timeout, - uint64_t MaxSize, int64_t BlockSize) - : Addresses(Addresses) { + RemoteClientManager() { + ClientManagerConfigTy Config; + grpc::ChannelArguments ChArgs; ChArgs.SetMaxReceiveMessageSize(-1); DebugLevel = getDebugLevel(); - for (auto Address : Addresses) { + for (auto Address : Config.ServerAddresses) { Clients.push_back(RemoteOffloadClient( grpc::CreateChannel(Address, grpc::InsecureChannelCredentials()), - Timeout, MaxSize, BlockSize)); + Config.Timeout, Config.MaxSize, Config.BlockSize)); } } @@ -138,30 +131,25 @@ class RemoteClientManager { int32_t initRequires(int64_t RequiresFlags); __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image); - int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo); - int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId); void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr); int32_t dataDelete(int32_t DeviceId, void *TgtPtr); - int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr, - int64_t Size, __tgt_async_info *AsyncInfo); - int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr, - int64_t Size, __tgt_async_info *AsyncInfo); + int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size); + int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size); - int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, - void *DstPtr, int64_t Size, - __tgt_async_info *AsyncInfo); - - int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr, - void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, __tgt_async_info *AsyncInfo); - - int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr, - void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, int32_t TeamNum, - int32_t ThreadLimit, uint64_t LoopTripCount, - __tgt_async_info *AsyncInfo); + int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId); + int32_t dataExchange(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, + void *DstPtr, int64_t Size); + + int32_t runTargetRegion(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, + ptrdiff_t *TgtOffsets, int32_t ArgNum); + int32_t runTargetTeamRegion(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, + int32_t ThreadLimit, uint64_t LoopTripCount); }; #endif diff --git a/openmp/libomptarget/plugins/remote/src/rtl.cpp b/openmp/libomptarget/plugins/remote/src/rtl.cpp index 26f172a1fdcf7..e80f55c6245b7 100644 --- a/openmp/libomptarget/plugins/remote/src/rtl.cpp +++ b/openmp/libomptarget/plugins/remote/src/rtl.cpp @@ -27,15 +27,7 @@ RemoteClientManager *Manager; __attribute__((constructor(101))) void initRPC() { DP("Init RPC library!\n"); - RPCConfig Config; - parseEnvironment(Config); - - int Timeout = 5; - if (const char *Env1 = std::getenv("LIBOMPTARGET_RPC_LATENCY")) - Timeout = std::stoi(Env1); - - Manager = new RemoteClientManager(Config.ServerAddresses, Timeout, - Config.MaxSize, Config.BlockSize); + Manager = new RemoteClientManager(); } __attribute__((destructor(101))) void deinitRPC() { @@ -76,17 +68,13 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, return Manager->loadBinary(DeviceId, (__tgt_device_image *)Image); } -int32_t __tgt_rtl_synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo) { - return Manager->synchronize(DeviceId, AsyncInfo); -} - int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) { return Manager->isDataExchangeable(SrcDevId, DstDevId); } void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr, - int32_t kind) { - if (kind != TARGET_ALLOC_DEFAULT) { + int32_t Kind) { + if (Kind != TARGET_ALLOC_DEFAULT) { REPORT("Invalid target data allocation kind or requested allocator not " "implemented yet\n"); return NULL; @@ -97,24 +85,12 @@ void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr, int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size) { - return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, nullptr); -} - -int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, - void *HstPtr, int64_t Size, - __tgt_async_info *AsyncInfo) { - return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfo); + return Manager->dataSubmit(DeviceId, TgtPtr, HstPtr, Size); } int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size) { - return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, nullptr); -} - -int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr, - void *TgtPtr, int64_t Size, - __tgt_async_info *AsyncInfo) { - return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, AsyncInfo); + return Manager->dataRetrieve(DeviceId, HstPtr, TgtPtr, Size); } int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) { @@ -123,31 +99,15 @@ int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) { int32_t __tgt_rtl_data_exchange(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId, void *DstPtr, int64_t Size) { - return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size, - nullptr); + return Manager->dataExchange(SrcDevId, SrcPtr, DstDevId, DstPtr, Size); } -int32_t __tgt_rtl_data_exchange_async(int32_t SrcDevId, void *SrcPtr, - int32_t DstDevId, void *DstPtr, - int64_t Size, - __tgt_async_info *AsyncInfo) { - return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size, - AsyncInfo); -} int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t ArgNum) { - return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs, - TgtOffsets, ArgNum, nullptr); -} - -int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr, - void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, - __tgt_async_info *AsyncInfo) { - return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs, - TgtOffsets, ArgNum, AsyncInfo); + return Manager->runTargetRegion(DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, + ArgNum); } int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, @@ -155,18 +115,9 @@ int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, uint64_t LoopTripCount) { - return Manager->runTargetTeamRegionAsync(DeviceId, TgtEntryPtr, TgtArgs, - TgtOffsets, ArgNum, TeamNum, - ThreadLimit, LoopTripCount, nullptr); -} - -int32_t __tgt_rtl_run_target_team_region_async( - int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, - int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, - uint64_t LoopTripCount, __tgt_async_info *AsyncInfo) { - return Manager->runTargetTeamRegionAsync( - DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit, - LoopTripCount, AsyncInfo); + return Manager->runTargetTeamRegion(DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, TeamNum, ThreadLimit, + LoopTripCount); } // Exposed library API function From db188adfb12f6783c5419d5165a1123b9f5b56b0 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Fri, 2 Jul 2021 15:32:53 +0000 Subject: [PATCH 545/619] [mlir][Vector] NFC - Compress vector to outerproduct lowering. The implementation has become too unwieldy and cognitive overhead wins. Instead compress the implementation in preparation for additional lowering paths. Differential Revision: https://reviews.llvm.org/D105359 --- mlir/lib/Dialect/Vector/VectorTransforms.cpp | 204 +++++++++++-------- 1 file changed, 115 insertions(+), 89 deletions(-) diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 1a7d2e80d56f7..39a6c39059fba 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -1816,6 +1816,72 @@ ContractionOpToMatmulOpLowering::matchAndRewrite(vector::ContractionOp op, return success(); } +namespace { +struct IteratorType { + IteratorType(StringRef strRef) : strRef(strRef) {} + bool isOfType(Attribute attr) const { + auto sAttr = attr.dyn_cast(); + return sAttr && sAttr.getValue() == strRef; + } + StringRef strRef; +}; +struct Par : public IteratorType { + Par() : IteratorType(getParallelIteratorTypeName()) {} +}; +struct Red : public IteratorType { + Red() : IteratorType(getReductionIteratorTypeName()) {} +}; + +// Unroll outer-products along reduction. +struct UnrolledOuterProductEmitter { + using MapList = ArrayRef>; + + UnrolledOuterProductEmitter(PatternRewriter &rewriter, + vector::ContractionOp op) + : rewriter(rewriter), loc(op.getLoc()), kind(op.kind()), + iterators(op.iterator_types()), maps(op.getIndexingMaps()), op(op) {} + + Value t(Value v) { + static constexpr std::array perm = {1, 0}; + return rewriter.create(loc, v, perm); + } + + bool iters(ArrayRef its) { + if (its.size() != iterators.size()) + return false; + for (int i = 0, e = its.size(); i != e; ++i) { + if (!its[i].isOfType(iterators[i])) + return false; + } + return true; + } + + bool layout(MapList l) { + auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; + return maps == infer(l); + } + + LogicalResult outer_prod(Value lhs, Value rhs, Value res, int reductionSize) { + assert(reductionSize > 0); + for (int64_t k = 0; k < reductionSize; ++k) { + Value a = rewriter.create(loc, lhs, k); + Value b = rewriter.create(loc, rhs, k); + res = rewriter.create(loc, res.getType(), a, b, + res, kind); + } + rewriter.replaceOp(op, res); + return success(); + } + + PatternRewriter &rewriter; + Location loc; + vector::CombiningKind kind; + ArrayAttr iterators; + SmallVector maps; + Operation *op; +}; +} // namespace + /// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul /// semantics to a reduction_size-unrolled sequence: /// ``` @@ -1844,104 +1910,64 @@ LogicalResult ContractionOpToOuterProductOpLowering::matchAndRewrite( if (failed(filter(op))) return failure(); - Location loc = op.getLoc(); - int64_t reductionSize = 0; VectorType lhsType = op.getLhsType(); Value lhs = op.lhs(), rhs = op.rhs(), res = op.acc(); // Set up the parallel/reduction structure in right form. - using MapList = ArrayRef>; - auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; AffineExpr m, n, k; bindDims(rewriter.getContext(), m, n, k); - static constexpr std::array perm = {1, 0}; - auto iteratorTypes = op.iterator_types().getValue(); - SmallVector maps = op.getIndexingMaps(); - if (isParallelIterator(iteratorTypes[0]) && - isParallelIterator(iteratorTypes[1]) && - isReductionIterator(iteratorTypes[2])) { - // - // Two outer parallel, one inner reduction (matmat flavor). - // - if (maps == infer({{m, k}, {k, n}, {m, n}})) { - // This is the classical row-major matmul. Just permute the lhs. - reductionSize = lhsType.getDimSize(1); - lhs = rewriter.create(loc, lhs, perm); - } else if (maps == infer({{m, k}, {n, k}, {m, n}})) { - // TODO: may be better to fail and use some vector -> scalar reduction. - reductionSize = lhsType.getDimSize(1); - lhs = rewriter.create(loc, lhs, perm); - rhs = rewriter.create(loc, rhs, perm); - } else if (maps == infer({{k, m}, {k, n}, {m, n}})) { - // No need to permute anything. - reductionSize = lhsType.getDimSize(0); - } else if (maps == infer({{k, m}, {n, k}, {m, n}})) { - // Just permute the rhs. - reductionSize = lhsType.getDimSize(0); - rhs = rewriter.create(loc, rhs, perm); - } else if (maps == infer({{m, k}, {k, n}, {n, m}})) { - // This is the classical row-major matmul. Just permute the lhs. - reductionSize = lhsType.getDimSize(1); - Value tmp = rhs; - rhs = rewriter.create(loc, lhs, perm); - lhs = tmp; - } else if (maps == infer({{m, k}, {n, k}, {n, m}})) { - // TODO: may be better to fail and use some vector -> scalar reduction. - reductionSize = lhsType.getDimSize(1); - Value tmp = rhs; - rhs = rewriter.create(loc, lhs, perm); - lhs = rewriter.create(loc, tmp, perm); - } else if (maps == infer({{k, m}, {k, n}, {n, m}})) { - // No need to permute anything, but still swap lhs and rhs. - reductionSize = lhsType.getDimSize(0); - std::swap(lhs, rhs); - } else if (maps == infer({{k, m}, {n, k}, {n, m}})) { - // Just permute the rhs. - reductionSize = lhsType.getDimSize(0); - Value tmp = lhs; - lhs = rewriter.create(loc, rhs, perm); - rhs = tmp; - } else { - return failure(); - } - } else if (isParallelIterator(iteratorTypes[0]) && - isReductionIterator(iteratorTypes[1])) { - // - // One outer parallel, one inner reduction (matvec flavor) - // - if (maps == infer({{m, n}, {n}, {m}})) { - // Case mat-vec: transpose. - reductionSize = lhsType.getDimSize(1); - lhs = rewriter.create(loc, lhs, perm); - } else if (maps == infer({{n, m}, {n}, {m}})) { - // Case mat-trans-vec: ready to go. - reductionSize = lhsType.getDimSize(0); - } else if (maps == infer({{n}, {m, n}, {m}})) { - // Case vec-mat: swap and transpose. - reductionSize = lhsType.getDimSize(0); - std::swap(lhs, rhs); - lhs = rewriter.create(loc, lhs, perm); - } else if (maps == infer({{n}, {n, m}, {m}})) { - // Case vec-mat-trans: swap and ready to go. - reductionSize = lhsType.getDimSize(0); - std::swap(lhs, rhs); - } else { - return failure(); - } - } else { + + // + // Two outer parallel, one inner reduction (matmat flavor). + // + UnrolledOuterProductEmitter e(rewriter, op); + if (e.iters({Par(), Par(), Red()})) { + // Classical row-major matmul: Just permute the lhs. + if (e.layout({{m, k}, {k, n}, {m, n}})) + return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1)); + // TODO: may be better to fail and use some vector -> scalar reduction. + if (e.layout({{m, k}, {n, k}, {m, n}})) + return e.outer_prod(e.t(lhs), e.t(rhs), res, lhsType.getDimSize(1)); + // No need to permute anything. + if (e.layout({{k, m}, {k, n}, {m, n}})) + return e.outer_prod(lhs, rhs, res, lhsType.getDimSize(0)); + // Just permute the rhs. + if (e.layout({{k, m}, {n, k}, {m, n}})) + return e.outer_prod(lhs, e.t(rhs), res, lhsType.getDimSize(0)); + // Transposed output: swap RHS and LHS. + // Classical row-major matmul: permute the lhs. + if (e.layout({{m, k}, {k, n}, {n, m}})) + return e.outer_prod(rhs, e.t(lhs), res, lhsType.getDimSize(1)); + // TODO: may be better to fail and use some vector -> scalar reduction. + if (e.layout({{m, k}, {n, k}, {n, m}})) + return e.outer_prod(e.t(rhs), e.t(lhs), res, lhsType.getDimSize(1)); + if (e.layout({{k, m}, {k, n}, {n, m}})) + return e.outer_prod(rhs, lhs, res, lhsType.getDimSize(0)); + if (e.layout({{k, m}, {n, k}, {n, m}})) + return e.outer_prod(e.t(rhs), lhs, res, lhsType.getDimSize(0)); return failure(); } - assert(reductionSize > 0); - - // Unroll outer-products along reduction. - for (int64_t k = 0; k < reductionSize; ++k) { - Value a = rewriter.create(op.getLoc(), lhs, k); - Value b = rewriter.create(op.getLoc(), rhs, k); - res = rewriter.create(op.getLoc(), res.getType(), a, - b, res, op.kind()); + + // + // One outer parallel, one inner reduction (matvec flavor) + // + if (e.iters({Par(), Red()})) { + // Case mat-vec: transpose. + if (e.layout({{m, n}, {n}, {m}})) + return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1)); + // Case mat-trans-vec: ready to go. + if (e.layout({{n, m}, {n}, {m}})) + return e.outer_prod(lhs, rhs, res, lhsType.getDimSize(0)); + // Case vec-mat: swap and transpose. + if (e.layout({{n}, {m, n}, {m}})) + return e.outer_prod(e.t(rhs), lhs, res, lhsType.getDimSize(0)); + // Case vec-mat-trans: swap and ready to go. + if (e.layout({{n}, {n, m}, {m}})) + return e.outer_prod(rhs, lhs, res, lhsType.getDimSize(0)); + return failure(); } - rewriter.replaceOp(op, res); - return success(); + + return failure(); } LogicalResult From c5d725172d4d919dcdc5a64a21699b6309a7b872 Mon Sep 17 00:00:00 2001 From: Marco Vanotti Date: Fri, 2 Jul 2021 09:44:54 -0700 Subject: [PATCH 546/619] Revert "Refactor mutation strategies into a standalone library" This reverts commit 361f742f168de0f0f256802a329c19d081615d0d. --- compiler-rt/lib/fuzzer/CMakeLists.txt | 45 +- ...tagenCrossOver.cpp => FuzzerCrossOver.cpp} | 13 +- compiler-rt/lib/fuzzer/FuzzerDefs.h | 36 +- compiler-rt/lib/fuzzer/FuzzerDictionary.h | 120 +++ compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 8 +- compiler-rt/lib/fuzzer/FuzzerInternal.h | 5 - compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 9 +- compiler-rt/lib/fuzzer/FuzzerMutate.cpp | 609 ++++++++++- compiler-rt/lib/fuzzer/FuzzerMutate.h | 140 ++- compiler-rt/lib/fuzzer/FuzzerRandom.h | 1 - compiler-rt/lib/fuzzer/FuzzerTracePC.cpp | 1 + compiler-rt/lib/fuzzer/FuzzerTracePC.h | 4 +- compiler-rt/lib/fuzzer/FuzzerUtil.h | 12 +- compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp | 5 + compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp | 5 + compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp | 21 + compiler-rt/lib/fuzzer/build.sh | 10 +- compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt | 59 -- compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp | 100 -- compiler-rt/lib/fuzzer/mutagen/Mutagen.h | 119 --- .../lib/fuzzer/mutagen/MutagenDictionary.h | 85 -- .../lib/fuzzer/mutagen/MutagenDispatcher.cpp | 659 ------------ .../lib/fuzzer/mutagen/MutagenDispatcher.h | 190 ---- .../lib/fuzzer/mutagen/MutagenSequence.h | 101 -- compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h | 24 - .../lib/fuzzer/mutagen/MutagenUtilPosix.cpp | 23 - .../lib/fuzzer/mutagen/MutagenUtilWindows.cpp | 41 - compiler-rt/lib/fuzzer/mutagen/build.sh | 12 - compiler-rt/lib/fuzzer/tests/CMakeLists.txt | 32 +- .../lib/fuzzer/tests/FuzzerUnittest.cpp | 477 +++++++++ .../lib/fuzzer/tests/MutagenUnittest.cpp | 971 ------------------ compiler-rt/test/fuzzer/CMakeLists.txt | 1 - 32 files changed, 1367 insertions(+), 2571 deletions(-) rename compiler-rt/lib/fuzzer/{mutagen/MutagenCrossOver.cpp => FuzzerCrossOver.cpp} (86%) create mode 100644 compiler-rt/lib/fuzzer/FuzzerDictionary.h delete mode 100644 compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt delete mode 100644 compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp delete mode 100644 compiler-rt/lib/fuzzer/mutagen/Mutagen.h delete mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenDictionary.h delete mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.cpp delete mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.h delete mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenSequence.h delete mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h delete mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenUtilPosix.cpp delete mode 100644 compiler-rt/lib/fuzzer/mutagen/MutagenUtilWindows.cpp delete mode 100755 compiler-rt/lib/fuzzer/mutagen/build.sh delete mode 100644 compiler-rt/lib/fuzzer/tests/MutagenUnittest.cpp diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt index e27cf8d5e6fe8..3201ed279a621 100644 --- a/compiler-rt/lib/fuzzer/CMakeLists.txt +++ b/compiler-rt/lib/fuzzer/CMakeLists.txt @@ -1,4 +1,5 @@ set(LIBFUZZER_SOURCES + FuzzerCrossOver.cpp FuzzerDataFlowTrace.cpp FuzzerDriver.cpp FuzzerExtFunctionsDlsym.cpp @@ -28,6 +29,7 @@ set(LIBFUZZER_HEADERS FuzzerCorpus.h FuzzerDataFlowTrace.h FuzzerDefs.h + FuzzerDictionary.h FuzzerExtFunctions.def FuzzerExtFunctions.h FuzzerFlags.def @@ -82,32 +84,6 @@ else() endif() endif() -macro(partially_link_libcxx name dir arch) - if(${arch} MATCHES "i386") - set(EMULATION_ARGUMENT "-m" "elf_i386") - else() - set(EMULATION_ARGUMENT "") - endif() - set(cxx_${arch}_merge_dir "${CMAKE_CURRENT_BINARY_DIR}/cxx_${arch}_merge.dir") - file(MAKE_DIRECTORY ${cxx_${arch}_merge_dir}) - add_custom_command(TARGET clang_rt.${name}-${arch} POST_BUILD - COMMAND ${CMAKE_LINKER} ${EMULATION_ARGUMENT} --whole-archive "$" --no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o - COMMAND ${CMAKE_OBJCOPY} --localize-hidden ${name}.o - COMMAND ${CMAKE_COMMAND} -E remove "$" - COMMAND ${CMAKE_AR} qcs "$" ${name}.o - WORKING_DIRECTORY ${cxx_${arch}_merge_dir} - ) -endmacro() - -add_subdirectory(mutagen) -foreach(X IN LISTS LIBFUZZER_MUTAGEN_SOURCES) - list(APPEND LIBFUZZER_SOURCES "mutagen/${X}") -endforeach() -foreach(X IN LISTS LIBFUZZER_MUTAGEN_HEADERS) - list(APPEND LIBFUZZER_HEADERS "mutagen/${X}") -endforeach() -include_directories(.) - add_compiler_rt_component(fuzzer) add_compiler_rt_object_libraries(RTfuzzer @@ -159,6 +135,23 @@ add_compiler_rt_runtime(clang_rt.fuzzer_interceptors if(OS_NAME MATCHES "Linux|Fuchsia" AND COMPILER_RT_LIBCXX_PATH AND COMPILER_RT_LIBCXXABI_PATH) + macro(partially_link_libcxx name dir arch) + if(${arch} MATCHES "i386") + set(EMULATION_ARGUMENT "-m" "elf_i386") + else() + set(EMULATION_ARGUMENT "") + endif() + set(cxx_${arch}_merge_dir "${CMAKE_CURRENT_BINARY_DIR}/cxx_${arch}_merge.dir") + file(MAKE_DIRECTORY ${cxx_${arch}_merge_dir}) + add_custom_command(TARGET clang_rt.${name}-${arch} POST_BUILD + COMMAND ${CMAKE_LINKER} ${EMULATION_ARGUMENT} --whole-archive "$" --no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o + COMMAND ${CMAKE_OBJCOPY} --localize-hidden ${name}.o + COMMAND ${CMAKE_COMMAND} -E remove "$" + COMMAND ${CMAKE_AR} qcs "$" ${name}.o + WORKING_DIRECTORY ${cxx_${arch}_merge_dir} + ) + endmacro() + foreach(arch ${FUZZER_SUPPORTED_ARCH}) get_target_flags_for_arch(${arch} TARGET_CFLAGS) set(LIBCXX_${arch}_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libcxx_fuzzer_${arch}) diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenCrossOver.cpp b/compiler-rt/lib/fuzzer/FuzzerCrossOver.cpp similarity index 86% rename from compiler-rt/lib/fuzzer/mutagen/MutagenCrossOver.cpp rename to compiler-rt/lib/fuzzer/FuzzerCrossOver.cpp index 0fcffaf68ddf4..83d9f8d47cb18 100644 --- a/compiler-rt/lib/fuzzer/mutagen/MutagenCrossOver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerCrossOver.cpp @@ -1,4 +1,4 @@ -//===- MutagenCrossOver.cpp - Cross over two test inputs ------------------===// +//===- FuzzerCrossOver.cpp - Cross over two test inputs -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,11 +8,12 @@ // Cross over test inputs. //===----------------------------------------------------------------------===// +#include "FuzzerDefs.h" +#include "FuzzerMutate.h" #include "FuzzerRandom.h" -#include "MutagenDispatcher.h" #include -namespace mutagen { +namespace fuzzer { // Cross Data1 and Data2, store the result (up to MaxOutSize bytes) in Out. size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1, @@ -39,12 +40,12 @@ size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1, (*InPos) += ExtraSize; } // Use the other input data on the next iteration. - InPos = CurrentlyUsingFirstData ? &Pos2 : &Pos1; + InPos = CurrentlyUsingFirstData ? &Pos2 : &Pos1; InSize = CurrentlyUsingFirstData ? Size2 : Size1; - Data = CurrentlyUsingFirstData ? Data2 : Data1; + Data = CurrentlyUsingFirstData ? Data2 : Data1; CurrentlyUsingFirstData = !CurrentlyUsingFirstData; } return OutPos; } -} // namespace mutagen +} // namespace fuzzer diff --git a/compiler-rt/lib/fuzzer/FuzzerDefs.h b/compiler-rt/lib/fuzzer/FuzzerDefs.h index 36820b61c2aa5..1a2752af2f4d5 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDefs.h +++ b/compiler-rt/lib/fuzzer/FuzzerDefs.h @@ -15,18 +15,21 @@ #include #include #include -#include #include #include #include #include + namespace fuzzer { template T Min(T a, T b) { return a < b ? a : b; } template T Max(T a, T b) { return a > b ? a : b; } class Random; +class Dictionary; +class DictionaryEntry; +class MutationDispatcher; struct FuzzingOptions; class InputCorpus; struct InputInfo; @@ -57,37 +60,6 @@ using Set = std::set, fuzzer_allocator>; typedef Vector Unit; typedef Vector UnitVector; - -// A simple POD sized array of bytes. -template class FixedWord { -public: - static const size_t kMaxSize = kMaxSizeT; - FixedWord() { memset(Data, 0, kMaxSize); } - FixedWord(const uint8_t *B, size_t S) { Set(B, S); } - - void Set(const uint8_t *B, size_t S) { - static_assert(kMaxSizeT <= std::numeric_limits::max(), - "FixedWord::kMaxSizeT cannot fit in a uint8_t."); - assert(S <= kMaxSize); - memcpy(Data, B, S); - Size = static_cast(S); - } - - bool operator==(const FixedWord &w) const { - return Size == w.Size && 0 == memcmp(Data, w.Data, Size); - } - - static size_t GetMaxSize() { return kMaxSize; } - const uint8_t *data() const { return Data; } - uint8_t size() const { return Size; } - -private: - uint8_t Size = 0; - uint8_t Data[kMaxSize]; -}; - -typedef FixedWord<64> Word; - typedef int (*UserCallback)(const uint8_t *Data, size_t Size); int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); diff --git a/compiler-rt/lib/fuzzer/FuzzerDictionary.h b/compiler-rt/lib/fuzzer/FuzzerDictionary.h new file mode 100644 index 0000000000000..db55907d93631 --- /dev/null +++ b/compiler-rt/lib/fuzzer/FuzzerDictionary.h @@ -0,0 +1,120 @@ +//===- FuzzerDictionary.h - Internal header for the Fuzzer ------*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// fuzzer::Dictionary +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_DICTIONARY_H +#define LLVM_FUZZER_DICTIONARY_H + +#include "FuzzerDefs.h" +#include "FuzzerIO.h" +#include "FuzzerUtil.h" +#include +#include + +namespace fuzzer { +// A simple POD sized array of bytes. +template class FixedWord { +public: + static const size_t kMaxSize = kMaxSizeT; + FixedWord() {} + FixedWord(const uint8_t *B, size_t S) { Set(B, S); } + + void Set(const uint8_t *B, size_t S) { + static_assert(kMaxSizeT <= std::numeric_limits::max(), + "FixedWord::kMaxSizeT cannot fit in a uint8_t."); + assert(S <= kMaxSize); + memcpy(Data, B, S); + Size = static_cast(S); + } + + bool operator==(const FixedWord &w) const { + return Size == w.Size && 0 == memcmp(Data, w.Data, Size); + } + + static size_t GetMaxSize() { return kMaxSize; } + const uint8_t *data() const { return Data; } + uint8_t size() const { return Size; } + +private: + uint8_t Size = 0; + uint8_t Data[kMaxSize]; +}; + +typedef FixedWord<64> Word; + +class DictionaryEntry { + public: + DictionaryEntry() {} + DictionaryEntry(Word W) : W(W) {} + DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} + const Word &GetW() const { return W; } + + bool HasPositionHint() const { return PositionHint != std::numeric_limits::max(); } + size_t GetPositionHint() const { + assert(HasPositionHint()); + return PositionHint; + } + void IncUseCount() { UseCount++; } + void IncSuccessCount() { SuccessCount++; } + size_t GetUseCount() const { return UseCount; } + size_t GetSuccessCount() const {return SuccessCount; } + + void Print(const char *PrintAfter = "\n") { + PrintASCII(W.data(), W.size()); + if (HasPositionHint()) + Printf("@%zd", GetPositionHint()); + Printf("%s", PrintAfter); + } + +private: + Word W; + size_t PositionHint = std::numeric_limits::max(); + size_t UseCount = 0; + size_t SuccessCount = 0; +}; + +class Dictionary { + public: + static const size_t kMaxDictSize = 1 << 14; + + bool ContainsWord(const Word &W) const { + return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { + return DE.GetW() == W; + }); + } + const DictionaryEntry *begin() const { return &DE[0]; } + const DictionaryEntry *end() const { return begin() + Size; } + DictionaryEntry & operator[] (size_t Idx) { + assert(Idx < Size); + return DE[Idx]; + } + void push_back(DictionaryEntry DE) { + if (Size < kMaxDictSize) + this->DE[Size++] = DE; + } + void clear() { Size = 0; } + bool empty() const { return Size == 0; } + size_t size() const { return Size; } + +private: + DictionaryEntry DE[kMaxDictSize]; + size_t Size = 0; +}; + +// Parses one dictionary entry. +// If successful, write the enty to Unit and returns true, +// otherwise returns false. +bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); +// Parses the dictionary file, fills Units, returns true iff all lines +// were parsed successfully. +bool ParseDictionaryFile(const std::string &Text, Vector *Units); + +} // namespace fuzzer + +#endif // LLVM_FUZZER_DICTIONARY_H diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index 38efc2e18863c..ceaa9070512f0 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -19,16 +19,15 @@ #include "FuzzerPlatform.h" #include "FuzzerRandom.h" #include "FuzzerTracePC.h" -#include "mutagen/MutagenDispatcher.h" #include #include #include #include #include -#include #include #include #include +#include // This function should be present in the libFuzzer so that the client // binary can test for its existence. @@ -804,9 +803,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { ReadCorpora(*Inputs, {})); } - LLVMMutagenConfiguration Config; - ConfigureMutagen(Seed, Options, &Config); - auto *MD = new MutationDispatcher(&Config); + Random Rand(Seed); + auto *MD = new MutationDispatcher(Rand, Options); auto *Corpus = new InputCorpus(Options.OutputCorpus, Entropic); auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h index a629c3d02f774..37c8a01dc3c64 100644 --- a/compiler-rt/lib/fuzzer/FuzzerInternal.h +++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h @@ -18,7 +18,6 @@ #include "FuzzerOptions.h" #include "FuzzerSHA1.h" #include "FuzzerValueBitMap.h" -#include "mutagen/MutagenDispatcher.h" #include #include #include @@ -27,12 +26,8 @@ #include namespace fuzzer { -namespace { using namespace std::chrono; -using mutagen::MutationDispatcher; - -} // namespace class Fuzzer { public: diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index d50277e9f2a0d..86a78ab751741 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -177,7 +177,7 @@ void Fuzzer::DumpCurrentUnit(const char *Prefix) { if (!CurrentUnitData) return; // Happens when running individual inputs. ScopedDisableMsanInterceptorChecks S; - PrintMutationSequence(MD); + MD.PrintMutationSequence(); Printf("; base unit: %s\n", Sha1ToString(BaseSha1).c_str()); size_t UnitSize = CurrentUnitSize; if (UnitSize <= kMaxUnitSizeToPrint) { @@ -539,9 +539,8 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, TimeOfUnit, UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); - const auto &MS = MD.MutationSequence(); WriteEdgeToMutationGraphFile(Options.MutationGraphFile, NewII, II, - MS.GetString()); + MD.MutationSequence()); return true; } if (II && FoundUniqFeaturesOfII && @@ -653,7 +652,7 @@ void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) { PrintStats(Text, ""); if (Options.Verbosity) { Printf(" L: %zd/%zd ", U.size(), Corpus.MaxInputSize()); - PrintMutationSequence(MD, Options.Verbosity >= 2); + MD.PrintMutationSequence(Options.Verbosity >= 2); Printf("\n"); } } @@ -899,7 +898,7 @@ void Fuzzer::Loop(Vector &CorporaFiles) { } PrintStats("DONE ", "\n"); - PrintRecommendedDictionary(MD); + MD.PrintRecommendedDictionary(); } void Fuzzer::MinimizeCrashLoop(const Unit &U) { diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp index bbce4aab58024..4650f1beceacd 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp @@ -1,77 +1,497 @@ -//===- FuzzerMutate.cpp - Mutation utilities -----------------------------===// +//===- FuzzerMutate.cpp - Mutate a test input -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// Mutate utilities. +// Mutate a test input. //===----------------------------------------------------------------------===// -#include "FuzzerMutate.h" +#include "FuzzerDefs.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" +#include "FuzzerMutate.h" +#include "FuzzerOptions.h" #include "FuzzerTracePC.h" -#include "FuzzerUtil.h" namespace fuzzer { -namespace { -void FromTORC4(size_t Idx, uint32_t *A, uint32_t *B) { - const auto &X = TPC.TORC4.Get(Idx); - *A = X.A; - *B = X.B; +const size_t Dictionary::kMaxDictSize; +static const size_t kMaxMutationsToPrint = 10; + +static void PrintASCII(const Word &W, const char *PrintAfter) { + PrintASCII(W.data(), W.size(), PrintAfter); +} + +MutationDispatcher::MutationDispatcher(Random &Rand, + const FuzzingOptions &Options) + : Rand(Rand), Options(Options) { + DefaultMutators.insert( + DefaultMutators.begin(), + { + {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"}, + {&MutationDispatcher::Mutate_InsertByte, "InsertByte"}, + {&MutationDispatcher::Mutate_InsertRepeatedBytes, + "InsertRepeatedBytes"}, + {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}, + {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}, + {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}, + {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}, + {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"}, + {&MutationDispatcher::Mutate_CopyPart, "CopyPart"}, + {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, + {&MutationDispatcher::Mutate_AddWordFromManualDictionary, + "ManualDict"}, + {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, + "PersAutoDict"}, + }); + if(Options.UseCmp) + DefaultMutators.push_back( + {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"}); + + if (EF->LLVMFuzzerCustomMutator) + Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"}); + else + Mutators = DefaultMutators; + + if (EF->LLVMFuzzerCustomCrossOver) + Mutators.push_back( + {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"}); +} + +static char RandCh(Random &Rand) { + if (Rand.RandBool()) + return static_cast(Rand(256)); + const char Special[] = "!*'();:@&=+$,/?%#[]012Az-`~.\xff\x00"; + return Special[Rand(sizeof(Special) - 1)]; +} + +size_t MutationDispatcher::Mutate_Custom(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (EF->__msan_unpoison) + EF->__msan_unpoison(Data, Size); + if (EF->__msan_unpoison_param) + EF->__msan_unpoison_param(4); + return EF->LLVMFuzzerCustomMutator(Data, Size, MaxSize, + Rand.Rand()); +} + +size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size == 0) + return 0; + if (!CrossOverWith) return 0; + const Unit &Other = *CrossOverWith; + if (Other.empty()) + return 0; + CustomCrossOverInPlaceHere.resize(MaxSize); + auto &U = CustomCrossOverInPlaceHere; + + if (EF->__msan_unpoison) { + EF->__msan_unpoison(Data, Size); + EF->__msan_unpoison(Other.data(), Other.size()); + EF->__msan_unpoison(U.data(), U.size()); + } + if (EF->__msan_unpoison_param) + EF->__msan_unpoison_param(7); + size_t NewSize = EF->LLVMFuzzerCustomCrossOver( + Data, Size, Other.data(), Other.size(), U.data(), U.size(), + Rand.Rand()); + + if (!NewSize) + return 0; + assert(NewSize <= MaxSize && "CustomCrossOver returned overisized unit"); + memcpy(Data, U.data(), NewSize); + return NewSize; +} + +size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize || Size == 0) return 0; + size_t ShuffleAmount = + Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size. + size_t ShuffleStart = Rand(Size - ShuffleAmount); + assert(ShuffleStart + ShuffleAmount <= Size); + std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand); + return Size; +} + +size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size <= 1) return 0; + size_t N = Rand(Size / 2) + 1; + assert(N < Size); + size_t Idx = Rand(Size - N + 1); + // Erase Data[Idx:Idx+N]. + memmove(Data + Idx, Data + Idx + N, Size - Idx - N); + // Printf("Erase: %zd %zd => %zd; Idx %zd\n", N, Size, Size - N, Idx); + return Size - N; } -void FromTORC8(size_t Idx, uint64_t *A, uint64_t *B) { - const auto &X = TPC.TORC8.Get(Idx); - *A = X.A; - *B = X.B; +size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size >= MaxSize) return 0; + size_t Idx = Rand(Size + 1); + // Insert new value at Data[Idx]. + memmove(Data + Idx + 1, Data + Idx, Size - Idx); + Data[Idx] = RandCh(Rand); + return Size + 1; } -void FromTORCW(size_t Idx, const uint8_t **DataA, size_t *SizeA, - const uint8_t **DataB, size_t *SizeB) { - const auto &X = TPC.TORCW.Get(Idx); - *DataA = X.A.data(); - *SizeA = X.A.size(); - *DataB = X.B.data(); - *SizeB = X.B.size(); +size_t MutationDispatcher::Mutate_InsertRepeatedBytes(uint8_t *Data, + size_t Size, + size_t MaxSize) { + const size_t kMinBytesToInsert = 3; + if (Size + kMinBytesToInsert >= MaxSize) return 0; + size_t MaxBytesToInsert = std::min(MaxSize - Size, (size_t)128); + size_t N = Rand(MaxBytesToInsert - kMinBytesToInsert + 1) + kMinBytesToInsert; + assert(Size + N <= MaxSize && N); + size_t Idx = Rand(Size + 1); + // Insert new values at Data[Idx]. + memmove(Data + Idx + N, Data + Idx, Size - Idx); + // Give preference to 0x00 and 0xff. + uint8_t Byte = static_cast( + Rand.RandBool() ? Rand(256) : (Rand.RandBool() ? 0 : 255)); + for (size_t i = 0; i < N; i++) + Data[Idx + i] = Byte; + return Size + N; } -void FromMMT(size_t Idx, const uint8_t **Data, size_t *Size) { - const auto &W = TPC.MMT.Get(Idx); - *Data = W.data(); - *Size = W.size(); +size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + size_t Idx = Rand(Size); + Data[Idx] = RandCh(Rand); + return Size; } -void PrintASCII(const Word &W, const char *PrintAfter) { - fuzzer::PrintASCII(W.data(), W.size(), PrintAfter); +size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + size_t Idx = Rand(Size); + Data[Idx] ^= 1 << Rand(8); + return Size; } -} // namespace +size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data, + size_t Size, + size_t MaxSize) { + return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize); +} + +size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size, + size_t MaxSize, + DictionaryEntry &DE) { + const Word &W = DE.GetW(); + bool UsePositionHint = DE.HasPositionHint() && + DE.GetPositionHint() + W.size() < Size && + Rand.RandBool(); + if (Rand.RandBool()) { // Insert W. + if (Size + W.size() > MaxSize) return 0; + size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1); + memmove(Data + Idx + W.size(), Data + Idx, Size - Idx); + memcpy(Data + Idx, W.data(), W.size()); + Size += W.size(); + } else { // Overwrite some bytes with W. + if (W.size() > Size) return 0; + size_t Idx = + UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1 - W.size()); + memcpy(Data + Idx, W.data(), W.size()); + } + return Size; +} + +// Somewhere in the past we have observed a comparison instructions +// with arguments Arg1 Arg2. This function tries to guess a dictionary +// entry that will satisfy that comparison. +// It first tries to find one of the arguments (possibly swapped) in the +// input and if it succeeds it creates a DE with a position hint. +// Otherwise it creates a DE with one of the arguments w/o a position hint. +DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( + const void *Arg1, const void *Arg2, + const void *Arg1Mutation, const void *Arg2Mutation, + size_t ArgSize, const uint8_t *Data, + size_t Size) { + bool HandleFirst = Rand.RandBool(); + const void *ExistingBytes, *DesiredBytes; + Word W; + const uint8_t *End = Data + Size; + for (int Arg = 0; Arg < 2; Arg++) { + ExistingBytes = HandleFirst ? Arg1 : Arg2; + DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation; + HandleFirst = !HandleFirst; + W.Set(reinterpret_cast(DesiredBytes), ArgSize); + const size_t kMaxNumPositions = 8; + size_t Positions[kMaxNumPositions]; + size_t NumPositions = 0; + for (const uint8_t *Cur = Data; + Cur < End && NumPositions < kMaxNumPositions; Cur++) { + Cur = + (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); + if (!Cur) break; + Positions[NumPositions++] = Cur - Data; + } + if (!NumPositions) continue; + return DictionaryEntry(W, Positions[Rand(NumPositions)]); + } + DictionaryEntry DE(W); + return DE; +} + + +template +DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( + T Arg1, T Arg2, const uint8_t *Data, size_t Size) { + if (Rand.RandBool()) Arg1 = Bswap(Arg1); + if (Rand.RandBool()) Arg2 = Bswap(Arg2); + T Arg1Mutation = static_cast(Arg1 + Rand(-1, 1)); + T Arg2Mutation = static_cast(Arg2 + Rand(-1, 1)); + return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation, + sizeof(Arg1), Data, Size); +} -void ConfigureMutagen(unsigned int Seed, const FuzzingOptions &Options, - LLVMMutagenConfiguration *OutConfig) { - memset(OutConfig, 0, sizeof(*OutConfig)); - OutConfig->Seed = Seed; - OutConfig->UseCmp = Options.UseCmp; - OutConfig->FromTORC4 = FromTORC4; - OutConfig->FromTORC8 = FromTORC8; - OutConfig->FromTORCW = FromTORCW; - OutConfig->UseMemmem = Options.UseMemmem; - OutConfig->FromMMT = FromMMT; - OutConfig->CustomMutator = EF->LLVMFuzzerCustomMutator; - OutConfig->CustomCrossOver = EF->LLVMFuzzerCustomCrossOver; - OutConfig->MSanUnpoison = EF->__msan_unpoison; - OutConfig->MSanUnpoisonParam = EF->__msan_unpoison_param; +DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( + const Word &Arg1, const Word &Arg2, const uint8_t *Data, size_t Size) { + return MakeDictionaryEntryFromCMP(Arg1.data(), Arg2.data(), Arg1.data(), + Arg2.data(), Arg1.size(), Data, Size); } -void PrintRecommendedDictionary(MutationDispatcher &MD) { - auto RecommendedDictionary = MD.RecommendDictionary(); - if (RecommendedDictionary.empty()) - return; +size_t MutationDispatcher::Mutate_AddWordFromTORC( + uint8_t *Data, size_t Size, size_t MaxSize) { + Word W; + DictionaryEntry DE; + switch (Rand(4)) { + case 0: { + auto X = TPC.TORC8.Get(Rand.Rand()); + DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); + } break; + case 1: { + auto X = TPC.TORC4.Get(Rand.Rand()); + if ((X.A >> 16) == 0 && (X.B >> 16) == 0 && Rand.RandBool()) + DE = MakeDictionaryEntryFromCMP((uint16_t)X.A, (uint16_t)X.B, Data, Size); + else + DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); + } break; + case 2: { + auto X = TPC.TORCW.Get(Rand.Rand()); + DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); + } break; + case 3: if (Options.UseMemmem) { + auto X = TPC.MMT.Get(Rand.Rand()); + DE = DictionaryEntry(X); + } break; + default: + assert(0); + } + if (!DE.GetW().size()) return 0; + Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); + if (!Size) return 0; + DictionaryEntry &DERef = + CmpDictionaryEntriesDeque[CmpDictionaryEntriesDequeIdx++ % + kCmpDictionaryEntriesDequeSize]; + DERef = DE; + CurrentDictionaryEntrySequence.push_back(&DERef); + return Size; +} + +size_t MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary( + uint8_t *Data, size_t Size, size_t MaxSize) { + return AddWordFromDictionary(PersistentAutoDictionary, Data, Size, MaxSize); +} + +size_t MutationDispatcher::AddWordFromDictionary(Dictionary &D, uint8_t *Data, + size_t Size, size_t MaxSize) { + if (Size > MaxSize) return 0; + if (D.empty()) return 0; + DictionaryEntry &DE = D[Rand(D.size())]; + Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); + if (!Size) return 0; + DE.IncUseCount(); + CurrentDictionaryEntrySequence.push_back(&DE); + return Size; +} + +// Overwrites part of To[0,ToSize) with a part of From[0,FromSize). +// Returns ToSize. +size_t MutationDispatcher::CopyPartOf(const uint8_t *From, size_t FromSize, + uint8_t *To, size_t ToSize) { + // Copy From[FromBeg, FromBeg + CopySize) into To[ToBeg, ToBeg + CopySize). + size_t ToBeg = Rand(ToSize); + size_t CopySize = Rand(ToSize - ToBeg) + 1; + assert(ToBeg + CopySize <= ToSize); + CopySize = std::min(CopySize, FromSize); + size_t FromBeg = Rand(FromSize - CopySize + 1); + assert(FromBeg + CopySize <= FromSize); + memmove(To + ToBeg, From + FromBeg, CopySize); + return ToSize; +} + +// Inserts part of From[0,ToSize) into To. +// Returns new size of To on success or 0 on failure. +size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize, + uint8_t *To, size_t ToSize, + size_t MaxToSize) { + if (ToSize >= MaxToSize) return 0; + size_t AvailableSpace = MaxToSize - ToSize; + size_t MaxCopySize = std::min(AvailableSpace, FromSize); + size_t CopySize = Rand(MaxCopySize) + 1; + size_t FromBeg = Rand(FromSize - CopySize + 1); + assert(FromBeg + CopySize <= FromSize); + size_t ToInsertPos = Rand(ToSize + 1); + assert(ToInsertPos + CopySize <= MaxToSize); + size_t TailSize = ToSize - ToInsertPos; + if (To == From) { + MutateInPlaceHere.resize(MaxToSize); + memcpy(MutateInPlaceHere.data(), From + FromBeg, CopySize); + memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); + memmove(To + ToInsertPos, MutateInPlaceHere.data(), CopySize); + } else { + memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); + memmove(To + ToInsertPos, From + FromBeg, CopySize); + } + return ToSize + CopySize; +} + +size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize || Size == 0) return 0; + // If Size == MaxSize, `InsertPartOf(...)` will + // fail so there's no point using it in this case. + if (Size == MaxSize || Rand.RandBool()) + return CopyPartOf(Data, Size, Data, Size); + else + return InsertPartOf(Data, Size, Data, Size, MaxSize); +} + +size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + size_t B = Rand(Size); + while (B < Size && !isdigit(Data[B])) B++; + if (B == Size) return 0; + size_t E = B; + while (E < Size && isdigit(Data[E])) E++; + assert(B < E); + // now we have digits in [B, E). + // strtol and friends don't accept non-zero-teminated data, parse it manually. + uint64_t Val = Data[B] - '0'; + for (size_t i = B + 1; i < E; i++) + Val = Val * 10 + Data[i] - '0'; + + // Mutate the integer value. + switch(Rand(5)) { + case 0: Val++; break; + case 1: Val--; break; + case 2: Val /= 2; break; + case 3: Val *= 2; break; + case 4: Val = Rand(Val * Val); break; + default: assert(0); + } + // Just replace the bytes with the new ones, don't bother moving bytes. + for (size_t i = B; i < E; i++) { + size_t Idx = E + B - i - 1; + assert(Idx >= B && Idx < E); + Data[Idx] = (Val % 10) + '0'; + Val /= 10; + } + return Size; +} + +template +size_t ChangeBinaryInteger(uint8_t *Data, size_t Size, Random &Rand) { + if (Size < sizeof(T)) return 0; + size_t Off = Rand(Size - sizeof(T) + 1); + assert(Off + sizeof(T) <= Size); + T Val; + if (Off < 64 && !Rand(4)) { + Val = static_cast(Size); + if (Rand.RandBool()) + Val = Bswap(Val); + } else { + memcpy(&Val, Data + Off, sizeof(Val)); + T Add = static_cast(Rand(21)); + Add -= 10; + if (Rand.RandBool()) + Val = Bswap(T(Bswap(Val) + Add)); // Add assuming different endiannes. + else + Val = Val + Add; // Add assuming current endiannes. + if (Add == 0 || Rand.RandBool()) // Maybe negate. + Val = -Val; + } + memcpy(Data + Off, &Val, sizeof(Val)); + return Size; +} + +size_t MutationDispatcher::Mutate_ChangeBinaryInteger(uint8_t *Data, + size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + switch (Rand(4)) { + case 3: return ChangeBinaryInteger(Data, Size, Rand); + case 2: return ChangeBinaryInteger(Data, Size, Rand); + case 1: return ChangeBinaryInteger(Data, Size, Rand); + case 0: return ChangeBinaryInteger(Data, Size, Rand); + default: assert(0); + } + return 0; +} + +size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size > MaxSize) return 0; + if (Size == 0) return 0; + if (!CrossOverWith) return 0; + const Unit &O = *CrossOverWith; + if (O.empty()) return 0; + size_t NewSize = 0; + switch(Rand(3)) { + case 0: + MutateInPlaceHere.resize(MaxSize); + NewSize = CrossOver(Data, Size, O.data(), O.size(), + MutateInPlaceHere.data(), MaxSize); + memcpy(Data, MutateInPlaceHere.data(), NewSize); + break; + case 1: + NewSize = InsertPartOf(O.data(), O.size(), Data, Size, MaxSize); + if (!NewSize) + NewSize = CopyPartOf(O.data(), O.size(), Data, Size); + break; + case 2: + NewSize = CopyPartOf(O.data(), O.size(), Data, Size); + break; + default: assert(0); + } + assert(NewSize > 0 && "CrossOver returned empty unit"); + assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); + return NewSize; +} + +void MutationDispatcher::StartMutationSequence() { + CurrentMutatorSequence.clear(); + CurrentDictionaryEntrySequence.clear(); +} + +// Copy successful dictionary entries to PersistentAutoDictionary. +void MutationDispatcher::RecordSuccessfulMutationSequence() { + for (auto DE : CurrentDictionaryEntrySequence) { + // PersistentAutoDictionary.AddWithSuccessCountOne(DE); + DE->IncSuccessCount(); + assert(DE->GetW().size()); + // Linear search is fine here as this happens seldom. + if (!PersistentAutoDictionary.ContainsWord(DE->GetW())) + PersistentAutoDictionary.push_back(*DE); + } +} + +void MutationDispatcher::PrintRecommendedDictionary() { + Vector V; + for (auto &DE : PersistentAutoDictionary) + if (!ManualDictionary.ContainsWord(DE.GetW())) + V.push_back(DE); + if (V.empty()) return; Printf("###### Recommended dictionary. ######\n"); - for (auto &DE : RecommendedDictionary) { + for (auto &DE: V) { assert(DE.GetW().size()); Printf("\""); PrintASCII(DE.GetW(), "\""); @@ -80,12 +500,97 @@ void PrintRecommendedDictionary(MutationDispatcher &MD) { Printf("###### End of recommended dictionary. ######\n"); } -void PrintMutationSequence(MutationDispatcher &MD, bool Verbose) { - const auto &MS = MD.MutationSequence(); - const auto &DS = MD.DictionaryEntrySequence(); - Printf("MS: %zd %s", MS.size(), MS.GetString(Verbose).c_str()); - if (!DS.empty()) - Printf(" DE: %s", DS.GetString(Verbose).c_str()); +void MutationDispatcher::PrintMutationSequence(bool Verbose) { + Printf("MS: %zd ", CurrentMutatorSequence.size()); + size_t EntriesToPrint = + Verbose ? CurrentMutatorSequence.size() + : std::min(kMaxMutationsToPrint, CurrentMutatorSequence.size()); + for (size_t i = 0; i < EntriesToPrint; i++) + Printf("%s-", CurrentMutatorSequence[i].Name); + if (!CurrentDictionaryEntrySequence.empty()) { + Printf(" DE: "); + EntriesToPrint = Verbose ? CurrentDictionaryEntrySequence.size() + : std::min(kMaxMutationsToPrint, + CurrentDictionaryEntrySequence.size()); + for (size_t i = 0; i < EntriesToPrint; i++) { + Printf("\""); + PrintASCII(CurrentDictionaryEntrySequence[i]->GetW(), "\"-"); + } + } +} + +std::string MutationDispatcher::MutationSequence() { + std::string MS; + for (auto M : CurrentMutatorSequence) { + MS += M.Name; + MS += "-"; + } + return MS; +} + +size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { + return MutateImpl(Data, Size, MaxSize, Mutators); +} + +size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size, + size_t MaxSize) { + return MutateImpl(Data, Size, MaxSize, DefaultMutators); +} + +// Mutates Data in place, returns new size. +size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, + size_t MaxSize, + Vector &Mutators) { + assert(MaxSize > 0); + // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), + // in which case they will return 0. + // Try several times before returning un-mutated data. + for (int Iter = 0; Iter < 100; Iter++) { + auto M = Mutators[Rand(Mutators.size())]; + size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); + if (NewSize && NewSize <= MaxSize) { + if (Options.OnlyASCII) + ToASCII(Data, NewSize); + CurrentMutatorSequence.push_back(M); + return NewSize; + } + } + *Data = ' '; + return 1; // Fallback, should not happen frequently. +} + +// Mask represents the set of Data bytes that are worth mutating. +size_t MutationDispatcher::MutateWithMask(uint8_t *Data, size_t Size, + size_t MaxSize, + const Vector &Mask) { + size_t MaskedSize = std::min(Size, Mask.size()); + // * Copy the worthy bytes into a temporary array T + // * Mutate T + // * Copy T back. + // This is totally unoptimized. + auto &T = MutateWithMaskTemp; + if (T.size() < Size) + T.resize(Size); + size_t OneBits = 0; + for (size_t I = 0; I < MaskedSize; I++) + if (Mask[I]) + T[OneBits++] = Data[I]; + + if (!OneBits) return 0; + assert(!T.empty()); + size_t NewSize = Mutate(T.data(), OneBits, OneBits); + assert(NewSize <= OneBits); + (void)NewSize; + // Even if NewSize < OneBits we still use all OneBits bytes. + for (size_t I = 0, J = 0; I < MaskedSize; I++) + if (Mask[I]) + Data[I] = T[J++]; + return Size; +} + +void MutationDispatcher::AddWordToManualDictionary(const Word &W) { + ManualDictionary.push_back( + {W, std::numeric_limits::max()}); } } // namespace fuzzer diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.h b/compiler-rt/lib/fuzzer/FuzzerMutate.h index 85e284ef571c1..fd37191156d3f 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.h +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.h @@ -11,23 +11,145 @@ #ifndef LLVM_FUZZER_MUTATE_H #define LLVM_FUZZER_MUTATE_H +#include "FuzzerDefs.h" +#include "FuzzerDictionary.h" #include "FuzzerOptions.h" -#include "mutagen/Mutagen.h" -#include "mutagen/MutagenDispatcher.h" +#include "FuzzerRandom.h" namespace fuzzer { -namespace { -using mutagen::MutationDispatcher; +class MutationDispatcher { +public: + MutationDispatcher(Random &Rand, const FuzzingOptions &Options); + ~MutationDispatcher() {} + /// Indicate that we are about to start a new sequence of mutations. + void StartMutationSequence(); + /// Print the current sequence of mutations. Only prints the full sequence + /// when Verbose is true. + void PrintMutationSequence(bool Verbose = true); + /// Return the current sequence of mutations. + std::string MutationSequence(); + /// Indicate that the current sequence of mutations was successful. + void RecordSuccessfulMutationSequence(); + /// Mutates data by invoking user-provided mutator. + size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by invoking user-provided crossover. + size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by shuffling bytes. + size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by erasing bytes. + size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by inserting a byte. + size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by inserting several repeated bytes. + size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by changing one byte. + size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by changing one bit. + size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by copying/inserting a part of data into a different place. + size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize); -} // namespace + /// Mutates data by adding a word from the manual dictionary. + size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); -void ConfigureMutagen(unsigned int Seed, const FuzzingOptions &Options, - LLVMMutagenConfiguration *OutConfig); + /// Mutates data by adding a word from the TORC. + size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize); -void PrintRecommendedDictionary(MutationDispatcher &MD); + /// Mutates data by adding a word from the persistent automatic dictionary. + size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); -void PrintMutationSequence(MutationDispatcher &MD, bool Verbose = true); + /// Tries to find an ASCII integer in Data, changes it to another ASCII int. + size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); + /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways. + size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize); + + /// CrossOver Data with CrossOverWith. + size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Applies one of the configured mutations. + /// Returns the new size of data which could be up to MaxSize. + size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Applies one of the configured mutations to the bytes of Data + /// that have '1' in Mask. + /// Mask.size() should be >= Size. + size_t MutateWithMask(uint8_t *Data, size_t Size, size_t MaxSize, + const Vector &Mask); + + /// Applies one of the default mutations. Provided as a service + /// to mutation authors. + size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Creates a cross-over of two pieces of Data, returns its size. + size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, + size_t Size2, uint8_t *Out, size_t MaxOutSize); + + void AddWordToManualDictionary(const Word &W); + + void PrintRecommendedDictionary(); + + void SetCrossOverWith(const Unit *U) { CrossOverWith = U; } + + Random &GetRand() { return Rand; } + + private: + struct Mutator { + size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); + const char *Name; + }; + + size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, + size_t MaxSize); + size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, + Vector &Mutators); + + size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, + size_t ToSize, size_t MaxToSize); + size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, + size_t ToSize); + size_t ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize, + DictionaryEntry &DE); + + template + DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2, + const uint8_t *Data, size_t Size); + DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2, + const uint8_t *Data, size_t Size); + DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2, + const void *Arg1Mutation, + const void *Arg2Mutation, + size_t ArgSize, + const uint8_t *Data, size_t Size); + + Random &Rand; + const FuzzingOptions Options; + + // Dictionary provided by the user via -dict=DICT_FILE. + Dictionary ManualDictionary; + // Persistent dictionary modified by the fuzzer, consists of + // entries that led to successful discoveries in the past mutations. + Dictionary PersistentAutoDictionary; + + Vector CurrentDictionaryEntrySequence; + + static const size_t kCmpDictionaryEntriesDequeSize = 16; + DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize]; + size_t CmpDictionaryEntriesDequeIdx = 0; + + const Unit *CrossOverWith = nullptr; + Vector MutateInPlaceHere; + Vector MutateWithMaskTemp; + // CustomCrossOver needs its own buffer as a custom implementation may call + // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere. + Vector CustomCrossOverInPlaceHere; + + Vector Mutators; + Vector DefaultMutators; + Vector CurrentMutatorSequence; +}; } // namespace fuzzer diff --git a/compiler-rt/lib/fuzzer/FuzzerRandom.h b/compiler-rt/lib/fuzzer/FuzzerRandom.h index 8256853a65bb5..ad6c07eb5ef56 100644 --- a/compiler-rt/lib/fuzzer/FuzzerRandom.h +++ b/compiler-rt/lib/fuzzer/FuzzerRandom.h @@ -11,7 +11,6 @@ #ifndef LLVM_FUZZER_RANDOM_H #define LLVM_FUZZER_RANDOM_H -#include #include namespace fuzzer { diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp index b613aef7b59f5..d808b9b00fa35 100644 --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp @@ -16,6 +16,7 @@ #include "FuzzerBuiltinsMsvc.h" #include "FuzzerCorpus.h" #include "FuzzerDefs.h" +#include "FuzzerDictionary.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" #include "FuzzerPlatform.h" diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.h b/compiler-rt/lib/fuzzer/FuzzerTracePC.h index 921a13f082ae3..a93732972f7d7 100644 --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.h +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.h @@ -12,7 +12,7 @@ #define LLVM_FUZZER_TRACE_PC #include "FuzzerDefs.h" -#include "FuzzerUtil.h" +#include "FuzzerDictionary.h" #include "FuzzerValueBitMap.h" #include @@ -40,7 +40,7 @@ struct TableOfRecentCompares { Table[Idx].B = Arg2; } - const Pair &Get(size_t I) { return Table[I % kSize]; } + Pair Get(size_t I) { return Table[I % kSize]; } Pair Table[kSize]; }; diff --git a/compiler-rt/lib/fuzzer/FuzzerUtil.h b/compiler-rt/lib/fuzzer/FuzzerUtil.h index 285f56be8a767..a188a7be32a53 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtil.h +++ b/compiler-rt/lib/fuzzer/FuzzerUtil.h @@ -47,15 +47,6 @@ void PrintMemoryProfile(); unsigned NumberOfCpuCores(); -// Parses one dictionary entry. -// If successful, write the enty to Unit and returns true, -// otherwise returns false. -bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); - -// Parses the dictionary file, fills Units, returns true iff all lines -// were parsed successfully. -bool ParseDictionaryFile(const std::string &Text, Vector *Units); - // Platform specific functions. void SetSignalHandler(const FuzzingOptions& Options); @@ -72,6 +63,9 @@ bool ExecuteCommand(const Command &Cmd, std::string *CmdOutput); FILE *OpenProcessPipe(const char *Command, const char *Mode); int CloseProcessPipe(FILE *F); +const void *SearchMemory(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen); + std::string CloneArgsWithoutX(const Vector &Args, const char *X1, const char *X2); diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp index e83baa62886c0..5034b4a28d3f5 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp @@ -528,6 +528,11 @@ bool ExecuteCommand(const Command &BaseCmd, std::string *CmdOutput) { return Ret == 0; } +const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, + size_t PattLen) { + return memmem(Data, DataLen, Patt, PattLen); +} + // In fuchsia, accessing /dev/null is not supported. There's nothing // similar to a file that discards everything that is written to it. // The way of doing something similar in fuchsia is by using diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp index 5f0aa0190dd66..0446d732a9ec8 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp @@ -170,6 +170,11 @@ int CloseProcessPipe(FILE *F) { return pclose(F); } +const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, + size_t PattLen) { + return memmem(Data, DataLen, Patt, PattLen); +} + std::string DisassembleCmd(const std::string &FileName) { return "objdump -d " + FileName; } diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index 5deb5998fccb4..1a54bb569eca4 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -182,6 +182,27 @@ bool ExecuteCommand(const Command &Cmd, std::string *CmdOutput) { return _pclose(Pipe) == 0; } +const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, + size_t PattLen) { + // TODO: make this implementation more efficient. + const char *Cdata = (const char *)Data; + const char *Cpatt = (const char *)Patt; + + if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen) + return NULL; + + if (PattLen == 1) + return memchr(Data, *Cpatt, DataLen); + + const char *End = Cdata + DataLen - PattLen + 1; + + for (const char *It = Cdata; It < End; ++It) + if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0) + return It; + + return NULL; +} + std::string DisassembleCmd(const std::string &FileName) { Vector command_vector; command_vector.push_back("dumpbin /summary > nul"); diff --git a/compiler-rt/lib/fuzzer/build.sh b/compiler-rt/lib/fuzzer/build.sh index 822b606041278..504e54e3a819e 100755 --- a/compiler-rt/lib/fuzzer/build.sh +++ b/compiler-rt/lib/fuzzer/build.sh @@ -1,11 +1,11 @@ #!/bin/sh LIBFUZZER_SRC_DIR=$(dirname $0) -LIBMUTAGEN_SRC_DIR=$LIBFUZZER_SRC_DIR/mutagen CXX="${CXX:-clang}" -for f in $LIBFUZZER_SRC_DIR/*.cpp $LIBMUTAGEN_SRC_DIR/*.cpp; do - $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c -I$LIBFUZZER_SRC_DIR & +for f in $LIBFUZZER_SRC_DIR/*.cpp; do + $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c & done wait rm -f libFuzzer.a -ar ru libFuzzer.a Fuzzer*.o Mutagen*.o -rm -f Fuzzer*.o Mutagen*.o +ar ru libFuzzer.a Fuzzer*.o +rm -f Fuzzer*.o + diff --git a/compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt b/compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt deleted file mode 100644 index 1a8175ce6e6ec..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/CMakeLists.txt +++ /dev/null @@ -1,59 +0,0 @@ -set(MUTAGEN_SOURCES - Mutagen.cpp - MutagenCrossOver.cpp - MutagenDispatcher.cpp - MutagenUtilPosix.cpp - MutagenUtilWindows.cpp) - -set(MUTAGEN_HEADERS - Mutagen.h - MutagenDictionary.h - MutagenDispatcher.h - MutagenUtil.h) - -# Expose the files in this library to libFuzzer for optimized, direct inclusion. -set(LIBFUZZER_MUTAGEN_SOURCES ${MUTAGEN_SOURCES} PARENT_SCOPE) -set(LIBFUZZER_MUTAGEN_HEADERS ${MUTAGEN_HEADERS} PARENT_SCOPE) - -# Reuse the following variables from libFuzzer: -# FUZZER_SUPPORTED_ARCH -# FUZZER_SUPPORTED_OS -# LIBFUZZER_CFLAGS -# LIBFUZZER_DEPS -include_directories(..) - -add_compiler_rt_component(mutagen) - -add_compiler_rt_object_libraries(RTmutagen - OS ${FUZZER_SUPPORTED_OS} - ARCHS ${FUZZER_SUPPORTED_ARCH} - SOURCES ${MUTAGEN_SOURCES} - ADDITIONAL_HEADERS ${MUTAGEN_HEADERS} - CFLAGS ${LIBFUZZER_CFLAGS} - DEPS ${LIBFUZZER_DEPS}) - -add_compiler_rt_runtime(clang_rt.mutagen - STATIC - OS ${FUZZER_SUPPORTED_OS} - ARCHS ${FUZZER_SUPPORTED_ARCH} - OBJECT_LIBS RTmutagen - CFLAGS ${LIBFUZZER_CFLAGS} - PARENT_TARGET mutagen) - -if(OS_NAME MATCHES "Linux|Fuchsia" AND - COMPILER_RT_LIBCXX_PATH AND - COMPILER_RT_LIBCXXABI_PATH) - foreach(arch ${FUZZER_SUPPORTED_ARCH}) - get_target_flags_for_arch(${arch} TARGET_CFLAGS) - set(LIBCXX_${arch}_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libcxx_mutagen_${arch}) - add_custom_libcxx(libcxx_mutagen_${arch} ${LIBCXX_${arch}_PREFIX} - CFLAGS ${TARGET_CFLAGS} - CMAKE_ARGS -DCMAKE_CXX_COMPILER_WORKS=ON - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DLIBCXXABI_ENABLE_EXCEPTIONS=OFF - -DLIBCXX_ABI_NAMESPACE=__Fuzzer) - target_compile_options(RTmutagen.${arch} PRIVATE -isystem ${LIBCXX_${arch}_PREFIX}/include/c++/v1) - add_dependencies(RTmutagen.${arch} libcxx_mutagen_${arch}-build) - partially_link_libcxx(mutagen ${LIBCXX_${arch}_PREFIX} ${arch}) - endforeach() -endif() diff --git a/compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp b/compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp deleted file mode 100644 index 8d5858191afeb..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/Mutagen.cpp +++ /dev/null @@ -1,100 +0,0 @@ -//===- Mutagen.cpp - Interface header for the mutagen -----------*- C++ -* ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Define the interface between libMutagen and its consumers. -//===----------------------------------------------------------------------===// - -#include "Mutagen.h" -#include "FuzzerDefs.h" -#include "MutagenDispatcher.h" -#include -#include -#include - -namespace mutagen { -namespace { - -MutationDispatcher *MD = nullptr; - -} // namespace - -MutationDispatcher *GetMutationDispatcherForTest() { return MD; } - -} // namespace mutagen - -using fuzzer::Unit; -using mutagen::MD; -using mutagen::MutationDispatcher; -using mutagen::Word; - -extern "C" { - -ATTRIBUTE_INTERFACE void -LLVMMutagenConfigure(const LLVMMutagenConfiguration *Config) { - if (MD) - delete MD; - MD = new MutationDispatcher(Config); -} - -ATTRIBUTE_INTERFACE void LLVMMutagenResetSequence() { - MD->StartMutationSequence(); -} - -ATTRIBUTE_INTERFACE void LLVMMutagenSetCrossOverWith(const uint8_t *Data, - size_t Size) { - static Unit CrossOverWith; - Unit U(Data, Data + Size); - CrossOverWith = std::move(U); - MD->SetCrossOverWith(&CrossOverWith); -} - -ATTRIBUTE_INTERFACE size_t LLVMMutagenMutate(uint8_t *Data, size_t Size, - size_t Max) { - return MD->Mutate(Data, Size, Max); -} - -ATTRIBUTE_INTERFACE size_t LLVMMutagenDefaultMutate(uint8_t *Data, size_t Size, - size_t Max) { - return MD->DefaultMutate(Data, Size, Max); -} - -ATTRIBUTE_INTERFACE void LLVMMutagenRecordSequence() { - MD->RecordSuccessfulMutationSequence(); -} - -ATTRIBUTE_INTERFACE size_t LLVMMutagenGetMutationSequence(int Verbose, - char *Out, size_t Max, - size_t *OutNumItems) { - const auto &Seq = MD->MutationSequence(); - if (OutNumItems) - *OutNumItems = Seq.size(); - return snprintf(Out, Max, "%s", Seq.GetString(Verbose).c_str()); -} - -ATTRIBUTE_INTERFACE void LLVMMutagenAddWordToDictionary(const uint8_t *Data, - size_t Size) { - MD->AddWordToManualDictionary(Word(Data, std::min(Size, Word::GetMaxSize()))); -} - -ATTRIBUTE_INTERFACE size_t LLVMMutagenGetDictionaryEntrySequence( - int Verbose, char *Out, size_t Max, size_t *OutNumItems) { - const auto &Seq = MD->DictionaryEntrySequence(); - if (OutNumItems) - *OutNumItems = Seq.size(); - return snprintf(Out, Max, "%s", Seq.GetString(Verbose).c_str()); -} - -ATTRIBUTE_INTERFACE size_t LLVMMutagenRecommendDictionary() { - return MD->RecommendDictionary().size(); -} - -ATTRIBUTE_INTERFACE const char * -LLVMMutagenRecommendDictionaryEntry(size_t *OutUseCount) { - return MD->RecommendDictionaryEntry(OutUseCount); -} - -} // extern "C" diff --git a/compiler-rt/lib/fuzzer/mutagen/Mutagen.h b/compiler-rt/lib/fuzzer/mutagen/Mutagen.h deleted file mode 100644 index 757ee3e07d2cd..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/Mutagen.h +++ /dev/null @@ -1,119 +0,0 @@ -//===- Mutagen.h - Interface header for the mutagen -------------*- C++ -* ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Define the interface between libMutagen and its consumers. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MUTAGEN_H -#define LLVM_FUZZER_MUTAGEN_H - -#include "FuzzerPlatform.h" -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -#define MAX_WORD_SIZE 64 - -typedef struct { - // PRNG seed. - unsigned int Seed; - - // If non-zero, use CMP traces to guide mutations. Ignored if any of - // |FromTORC4|, |FromTORC8|, or |FromTORCW| are null. - int UseCmp; - void (*FromTORC4)(size_t Idx, uint32_t *Arg1, uint32_t *Arg2); - void (*FromTORC8)(size_t Idx, uint64_t *Arg1, uint64_t *Arg2); - void (*FromTORCW)(size_t Idx, const uint8_t **Data1, size_t *Size1, - const uint8_t **Data2, size_t *Size2); - - // If non-zero, use hints from intercepting memmem, strstr, etc. Ignored if - // |UseCmp| is zero or if |FromMMT| is null. - int UseMemmem; - void (*FromMMT)(size_t Idx, const uint8_t **Data, size_t *Size); - - // If non-zero, generate only ASCII (isprint+isspace) inputs. - int OnlyASCII; - - // Optional user-provided custom mutator. - size_t (*CustomMutator)(uint8_t *Data, size_t Size, size_t MaxSize, - unsigned int Seed); - - // Optional user-provided custom cross-over function. - size_t (*CustomCrossOver)(const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, uint8_t *Out, - size_t MaxOutSize, unsigned int Seed); - - // Optional MemorySanitizer callbacks. - void (*MSanUnpoison)(const volatile void *, size_t size); - void (*MSanUnpoisonParam)(size_t n); -} LLVMMutagenConfiguration; - -// Re-seeds the PRNG and sets mutator-related options. -ATTRIBUTE_INTERFACE void -LLVMMutagenConfigure(const LLVMMutagenConfiguration *config); - -// Writes the mutation sequence to |Out|, and returns the number of -// characters it wrote, or would have written given a large enough buffer, -// excluding the null terminator. Thus, a return value of |Max| or greater -// indicates the sequence was truncated (like snprintf). May truncate the -// sequence unless |Verbose| is non-zero. Sets |OutNumItems| to the number of -// items in the untruncated sequence. -ATTRIBUTE_INTERFACE size_t LLVMMutagenGetMutationSequence(int Verbose, - char *Out, size_t Max, - size_t *OutNumItems); - -// Writes the dictionary entry sequence to |Out|, and returns the number of -// characters it wrote, or would have written given a large enough buffer, -// excluding a null terminator. Thus, a return value of |Max| or greater -// indicates the sequence was truncated (like snprintf). May truncate the -// sequence unless |Verbose| is non-zero. Sets |OutNumItems| to the number of -// items in the untruncated sequence. -ATTRIBUTE_INTERFACE size_t LLVMMutagenGetDictionaryEntrySequence( - int Verbose, char *Out, size_t Max, size_t *OutNumItems); - -// Instructs the library to record the current mutation sequence as successful -// at increasing coverage. -ATTRIBUTE_INTERFACE void LLVMMutagenRecordSequence(); - -// Clears the mutation and dictionary entry sequences. -ATTRIBUTE_INTERFACE void LLVMMutagenResetSequence(); - -// Adds data used by various mutators to produce new inputs. -ATTRIBUTE_INTERFACE void LLVMMutagenSetCrossOverWith(const uint8_t *Data, - size_t Size); -ATTRIBUTE_INTERFACE void LLVMMutagenAddWordToDictionary(const uint8_t *Word, - size_t Size); - -// Mutates the contents of |Data| and returns the new size. -ATTRIBUTE_INTERFACE size_t LLVMMutagenMutate(uint8_t *Data, size_t Size, - size_t Max); - -// Like |LLVMMutagenMutate|, but never selects the custom mutators and is -// therefore suitable to be called from them. -ATTRIBUTE_INTERFACE size_t LLVMMutagenDefaultMutate(uint8_t *Data, size_t Size, - size_t Max); - -// Creates a recommended dictionary and returns its number of entries. The -// entries can be retrieved by subsequent calls to -// |LLVMMutagenRecommendDictionaryEntry|. -ATTRIBUTE_INTERFACE size_t LLVMMutagenRecommendDictionary(); - -// Returns the ASCII representation of the next recommended dictionary entry, -// or null if no entries remain (or |LLVMMutagenRecommendDictionary| wasn't -// called). If non-null, the return pointer is valid until the next call to this -// method, and if provided, |OutUseCount| is set to the entry's use count. -ATTRIBUTE_INTERFACE const char * -LLVMMutagenRecommendDictionaryEntry(size_t *OutUseCount); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // LLVM_FUZZER_MUTAGEN_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenDictionary.h b/compiler-rt/lib/fuzzer/mutagen/MutagenDictionary.h deleted file mode 100644 index a665cabc316e7..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/MutagenDictionary.h +++ /dev/null @@ -1,85 +0,0 @@ -//===- MutagenDictionary.h - Internal header for the mutagen ----*- C++ -* ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// mutagen::Dictionary -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MUTAGEN_DICTIONARY_H -#define LLVM_FUZZER_MUTAGEN_DICTIONARY_H - -#include "FuzzerDefs.h" -#include -#include -#include -#include -#include - -namespace mutagen { -namespace { - -using fuzzer::Word; - -} // namespace - -class DictionaryEntry { -public: - DictionaryEntry() {} - DictionaryEntry(Word W) : W(W) {} - DictionaryEntry(Word W, size_t PositionHint) - : W(W), PositionHint(PositionHint) {} - const Word &GetW() const { return W; } - - bool HasPositionHint() const { - return PositionHint != std::numeric_limits::max(); - } - size_t GetPositionHint() const { - assert(HasPositionHint()); - return PositionHint; - } - void IncUseCount() { UseCount++; } - void IncSuccessCount() { SuccessCount++; } - size_t GetUseCount() const { return UseCount; } - size_t GetSuccessCount() const { return SuccessCount; } - -private: - Word W; - size_t PositionHint = std::numeric_limits::max(); - size_t UseCount = 0; - size_t SuccessCount = 0; -}; - -class Dictionary { -public: - static const size_t kMaxDictSize = 1 << 14; - - bool ContainsWord(const Word &W) const { - return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { - return DE.GetW() == W; - }); - } - const DictionaryEntry *begin() const { return &DE[0]; } - const DictionaryEntry *end() const { return begin() + Size; } - DictionaryEntry &operator[](size_t Idx) { - assert(Idx < Size); - return DE[Idx]; - } - void push_back(DictionaryEntry DE) { - if (Size < kMaxDictSize) - this->DE[Size++] = DE; - } - void clear() { Size = 0; } - bool empty() const { return Size == 0; } - size_t size() const { return Size; } - -private: - DictionaryEntry DE[kMaxDictSize]; - size_t Size = 0; -}; - -} // namespace mutagen - -#endif // LLVM_FUZZER_MUTAGEN_DICTIONARY_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.cpp b/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.cpp deleted file mode 100644 index 32b5694cc6862..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.cpp +++ /dev/null @@ -1,659 +0,0 @@ -//===- MutagenDispatcher.cpp - Mutate a test input ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Mutate a test input. -//===----------------------------------------------------------------------===// - -#include "MutagenDispatcher.h" -#include "FuzzerBuiltins.h" -#include "FuzzerBuiltinsMsvc.h" -#include "FuzzerPlatform.h" -#include "MutagenUtil.h" -#include -#include - -namespace mutagen { -namespace { - -using fuzzer::Bswap; - -std::string ToASCII(const uint8_t *Data, size_t Size) { - std::ostringstream OSS; - for (size_t i = 0; i < Size; i++) { - uint16_t Byte = Data[i]; - if (Byte == '\\') - OSS << "\\\\"; - else if (Byte == '"') - OSS << "\\\""; - else if (Byte >= 32 && Byte < 127) - OSS << static_cast(Byte); - else - OSS << "\\x" << std::hex << std::setw(2) << std::setfill('0') << Byte - << std::dec; - } - return OSS.str(); -} - -std::string ToASCII(const Word &W) { return ToASCII(W.data(), W.size()); } - -} // namespace - -void MutationDispatcher::SetConfig(const LLVMMutagenConfiguration *C) { - memcpy(&Config, C, sizeof(Config)); - if (!Config.FromTORC4 || !Config.FromTORC8 || !Config.FromTORCW) - Config.UseCmp = 0; - if (!Config.FromMMT) - Config.UseMemmem = 0; -} - -MutationDispatcher::MutationDispatcher(const LLVMMutagenConfiguration *config) - : Rand(config->Seed) { - SetConfig(config); - DefaultMutators.insert( - DefaultMutators.begin(), - { - {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"}, - {&MutationDispatcher::Mutate_InsertByte, "InsertByte"}, - {&MutationDispatcher::Mutate_InsertRepeatedBytes, - "InsertRepeatedBytes"}, - {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}, - {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}, - {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}, - {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}, - {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"}, - {&MutationDispatcher::Mutate_CopyPart, "CopyPart"}, - {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, - {&MutationDispatcher::Mutate_AddWordFromManualDictionary, - "ManualDict"}, - {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, - "PersAutoDict"}, - }); - if (Config.UseCmp) - DefaultMutators.push_back( - {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"}); - - if (Config.CustomMutator) - Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"}); - else - Mutators = DefaultMutators; - - if (Config.CustomCrossOver) - Mutators.push_back( - {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"}); -} - -static char RandCh(Random &Rand) { - if (Rand.RandBool()) - return static_cast(Rand(256)); - const char Special[] = "!*'();:@&=+$,/?%#[]012Az-`~.\xff\x00"; - return Special[Rand(sizeof(Special) - 1)]; -} - -size_t MutationDispatcher::Mutate_Custom(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Config.MSanUnpoison) - Config.MSanUnpoison(Data, Size); - if (Config.MSanUnpoisonParam) - Config.MSanUnpoisonParam(4); - return Config.CustomMutator(Data, Size, MaxSize, Rand.Rand()); -} - -size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size == 0) - return 0; - if (!CrossOverWith) - return 0; - const Unit &Other = *CrossOverWith; - if (Other.empty()) - return 0; - CustomCrossOverInPlaceHere.resize(MaxSize); - auto &U = CustomCrossOverInPlaceHere; - - if (Config.MSanUnpoison) { - Config.MSanUnpoison(Data, Size); - Config.MSanUnpoison(Other.data(), Other.size()); - Config.MSanUnpoison(U.data(), U.size()); - } - if (Config.MSanUnpoisonParam) - Config.MSanUnpoisonParam(7); - size_t NewSize = - Config.CustomCrossOver(Data, Size, Other.data(), Other.size(), U.data(), - U.size(), Rand.Rand()); - - if (!NewSize) - return 0; - assert(NewSize <= MaxSize && "CustomCrossOver returned overisized unit"); - memcpy(Data, U.data(), NewSize); - return NewSize; -} - -size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize || Size == 0) - return 0; - size_t ShuffleAmount = - Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size. - size_t ShuffleStart = Rand(Size - ShuffleAmount); - assert(ShuffleStart + ShuffleAmount <= Size); - std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand); - return Size; -} - -size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size <= 1) - return 0; - size_t N = Rand(Size / 2) + 1; - assert(N < Size); - size_t Idx = Rand(Size - N + 1); - // Erase Data[Idx:Idx+N]. - memmove(Data + Idx, Data + Idx + N, Size - Idx - N); - // Printf("Erase: %zd %zd => %zd; Idx %zd\n", N, Size, Size - N, Idx); - return Size - N; -} - -size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size >= MaxSize) - return 0; - size_t Idx = Rand(Size + 1); - // Insert new value at Data[Idx]. - memmove(Data + Idx + 1, Data + Idx, Size - Idx); - Data[Idx] = RandCh(Rand); - return Size + 1; -} - -size_t MutationDispatcher::Mutate_InsertRepeatedBytes(uint8_t *Data, - size_t Size, - size_t MaxSize) { - const size_t kMinBytesToInsert = 3; - if (Size + kMinBytesToInsert >= MaxSize) - return 0; - size_t MaxBytesToInsert = std::min(MaxSize - Size, (size_t)128); - size_t N = Rand(MaxBytesToInsert - kMinBytesToInsert + 1) + kMinBytesToInsert; - assert(Size + N <= MaxSize && N); - size_t Idx = Rand(Size + 1); - // Insert new values at Data[Idx]. - memmove(Data + Idx + N, Data + Idx, Size - Idx); - // Give preference to 0x00 and 0xff. - uint8_t Byte = static_cast( - Rand.RandBool() ? Rand(256) : (Rand.RandBool() ? 0 : 255)); - for (size_t i = 0; i < N; i++) - Data[Idx + i] = Byte; - return Size + N; -} - -size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) - return 0; - size_t Idx = Rand(Size); - Data[Idx] = RandCh(Rand); - return Size; -} - -size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) - return 0; - size_t Idx = Rand(Size); - Data[Idx] ^= 1 << Rand(8); - return Size; -} - -size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data, - size_t Size, - size_t MaxSize) { - return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize); -} - -size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size, - size_t MaxSize, - DictionaryEntry &DE) { - const Word &W = DE.GetW(); - bool UsePositionHint = DE.HasPositionHint() && - DE.GetPositionHint() + W.size() < Size && - Rand.RandBool(); - if (Rand.RandBool()) { // Insert W. - if (Size + W.size() > MaxSize) - return 0; - size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1); - memmove(Data + Idx + W.size(), Data + Idx, Size - Idx); - memcpy(Data + Idx, W.data(), W.size()); - Size += W.size(); - } else { // Overwrite some bytes with W. - if (W.size() > Size) - return 0; - size_t Idx = - UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1 - W.size()); - memcpy(Data + Idx, W.data(), W.size()); - } - return Size; -} - -// Somewhere in the past we have observed a comparison instructions -// with arguments Arg1 Arg2. This function tries to guess a dictionary -// entry that will satisfy that comparison. -// It first tries to find one of the arguments (possibly swapped) in the -// input and if it succeeds it creates a DE with a position hint. -// Otherwise it creates a DE with one of the arguments w/o a position hint. -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - const void *Arg1, const void *Arg2, const void *Arg1Mutation, - const void *Arg2Mutation, size_t ArgSize, const uint8_t *Data, - size_t Size) { - bool HandleFirst = Rand.RandBool(); - const void *ExistingBytes, *DesiredBytes; - Word W; - const uint8_t *End = Data + Size; - for (int Arg = 0; Arg < 2; Arg++) { - ExistingBytes = HandleFirst ? Arg1 : Arg2; - DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation; - HandleFirst = !HandleFirst; - W.Set(reinterpret_cast(DesiredBytes), ArgSize); - const size_t kMaxNumPositions = 8; - size_t Positions[kMaxNumPositions]; - size_t NumPositions = 0; - for (const uint8_t *Cur = Data; - Cur < End && NumPositions < kMaxNumPositions; Cur++) { - Cur = - (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); - if (!Cur) - break; - Positions[NumPositions++] = Cur - Data; - } - if (!NumPositions) - continue; - return DictionaryEntry(W, Positions[Rand(NumPositions)]); - } - DictionaryEntry DE(W); - return DE; -} - -template -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - T Arg1, T Arg2, const uint8_t *Data, size_t Size) { - if (Rand.RandBool()) - Arg1 = Bswap(Arg1); - if (Rand.RandBool()) - Arg2 = Bswap(Arg2); - T Arg1Mutation = static_cast(Arg1 + Rand(-1, 1)); - T Arg2Mutation = static_cast(Arg2 + Rand(-1, 1)); - return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation, - sizeof(Arg1), Data, Size); -} - -size_t MutationDispatcher::Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, - size_t MaxSize) { - Word W; - DictionaryEntry DE; - switch (Rand(4)) { - case 0: { - uint64_t A, B; - Config.FromTORC8(Rand.Rand(), &A, &B); - DE = MakeDictionaryEntryFromCMP(A, B, Data, Size); - } break; - case 1: { - uint32_t A, B; - Config.FromTORC4(Rand.Rand(), &A, &B); - if ((A >> 16) == 0 && (B >> 16) == 0 && Rand.RandBool()) - DE = MakeDictionaryEntryFromCMP((uint16_t)A, (uint16_t)B, Data, Size); - else - DE = MakeDictionaryEntryFromCMP(A, B, Data, Size); - } break; - case 2: { - const uint8_t *DataA, *DataB; - size_t SizeA, SizeB; - Config.FromTORCW(Rand.Rand(), &DataA, &SizeA, &DataB, &SizeB); - DE = MakeDictionaryEntryFromCMP(DataA, DataB, DataA, DataB, SizeA, Data, - Size); - } break; - case 3: - if (Config.UseMemmem) { - const uint8_t *DataW; - size_t SizeW; - Config.FromMMT(Rand.Rand(), &DataW, &SizeW); - DE = DictionaryEntry(Word(DataW, SizeW)); - } - break; - default: - assert(0); - } - if (!DE.GetW().size()) - return 0; - Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); - if (!Size) - return 0; - DictionaryEntry &DERef = - CmpDictionaryEntriesDeque[CmpDictionaryEntriesDequeIdx++ % - kCmpDictionaryEntriesDequeSize]; - DERef = DE; - CurrentDictionaryEntrySequence.push_back(&DERef); - return Size; -} - -size_t MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary( - uint8_t *Data, size_t Size, size_t MaxSize) { - return AddWordFromDictionary(PersistentAutoDictionary, Data, Size, MaxSize); -} - -size_t MutationDispatcher::AddWordFromDictionary(Dictionary &D, uint8_t *Data, - size_t Size, size_t MaxSize) { - if (Size > MaxSize) - return 0; - if (D.empty()) - return 0; - DictionaryEntry &DE = D[Rand(D.size())]; - Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); - if (!Size) - return 0; - DE.IncUseCount(); - CurrentDictionaryEntrySequence.push_back(&DE); - return Size; -} - -// Overwrites part of To[0,ToSize) with a part of From[0,FromSize). -// Returns ToSize. -size_t MutationDispatcher::CopyPartOf(const uint8_t *From, size_t FromSize, - uint8_t *To, size_t ToSize) { - // Copy From[FromBeg, FromBeg + CopySize) into To[ToBeg, ToBeg + CopySize). - size_t ToBeg = Rand(ToSize); - size_t CopySize = Rand(ToSize - ToBeg) + 1; - assert(ToBeg + CopySize <= ToSize); - CopySize = std::min(CopySize, FromSize); - size_t FromBeg = Rand(FromSize - CopySize + 1); - assert(FromBeg + CopySize <= FromSize); - memmove(To + ToBeg, From + FromBeg, CopySize); - return ToSize; -} - -// Inserts part of From[0,ToSize) into To. -// Returns new size of To on success or 0 on failure. -size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize, - uint8_t *To, size_t ToSize, - size_t MaxToSize) { - if (ToSize >= MaxToSize) - return 0; - size_t AvailableSpace = MaxToSize - ToSize; - size_t MaxCopySize = std::min(AvailableSpace, FromSize); - size_t CopySize = Rand(MaxCopySize) + 1; - size_t FromBeg = Rand(FromSize - CopySize + 1); - assert(FromBeg + CopySize <= FromSize); - size_t ToInsertPos = Rand(ToSize + 1); - assert(ToInsertPos + CopySize <= MaxToSize); - size_t TailSize = ToSize - ToInsertPos; - if (To == From) { - MutateInPlaceHere.resize(MaxToSize); - memcpy(MutateInPlaceHere.data(), From + FromBeg, CopySize); - memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); - memmove(To + ToInsertPos, MutateInPlaceHere.data(), CopySize); - } else { - memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); - memmove(To + ToInsertPos, From + FromBeg, CopySize); - } - return ToSize + CopySize; -} - -size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize || Size == 0) - return 0; - // If Size == MaxSize, `InsertPartOf(...)` will - // fail so there's no point using it in this case. - if (Size == MaxSize || Rand.RandBool()) - return CopyPartOf(Data, Size, Data, Size); - else - return InsertPartOf(Data, Size, Data, Size, MaxSize); -} - -size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) - return 0; - size_t B = Rand(Size); - while (B < Size && !isdigit(Data[B])) - B++; - if (B == Size) - return 0; - size_t E = B; - while (E < Size && isdigit(Data[E])) - E++; - assert(B < E); - // now we have digits in [B, E). - // strtol and friends don't accept non-zero-teminated data, parse it manually. - uint64_t Val = Data[B] - '0'; - for (size_t i = B + 1; i < E; i++) - Val = Val * 10 + Data[i] - '0'; - - // Mutate the integer value. - switch (Rand(5)) { - case 0: - Val++; - break; - case 1: - Val--; - break; - case 2: - Val /= 2; - break; - case 3: - Val *= 2; - break; - case 4: - Val = Rand(Val * Val); - break; - default: - assert(0); - } - // Just replace the bytes with the new ones, don't bother moving bytes. - for (size_t i = B; i < E; i++) { - size_t Idx = E + B - i - 1; - assert(Idx >= B && Idx < E); - Data[Idx] = (Val % 10) + '0'; - Val /= 10; - } - return Size; -} - -template -size_t ChangeBinaryInteger(uint8_t *Data, size_t Size, Random &Rand) { - if (Size < sizeof(T)) - return 0; - size_t Off = Rand(Size - sizeof(T) + 1); - assert(Off + sizeof(T) <= Size); - T Val; - if (Off < 64 && !Rand(4)) { - Val = static_cast(Size); - if (Rand.RandBool()) - Val = Bswap(Val); - } else { - memcpy(&Val, Data + Off, sizeof(Val)); - T Add = static_cast(Rand(21)); - Add -= 10; - if (Rand.RandBool()) - Val = Bswap(T(Bswap(Val) + Add)); // Add assuming different endiannes. - else - Val = Val + Add; // Add assuming current endiannes. - if (Add == 0 || Rand.RandBool()) // Maybe negate. - Val = -Val; - } - memcpy(Data + Off, &Val, sizeof(Val)); - return Size; -} - -size_t MutationDispatcher::Mutate_ChangeBinaryInteger(uint8_t *Data, - size_t Size, - size_t MaxSize) { - if (Size > MaxSize) - return 0; - switch (Rand(4)) { - case 3: - return ChangeBinaryInteger(Data, Size, Rand); - case 2: - return ChangeBinaryInteger(Data, Size, Rand); - case 1: - return ChangeBinaryInteger(Data, Size, Rand); - case 0: - return ChangeBinaryInteger(Data, Size, Rand); - default: - assert(0); - } - return 0; -} - -size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) - return 0; - if (Size == 0) - return 0; - if (!CrossOverWith) - return 0; - const Unit &O = *CrossOverWith; - if (O.empty()) - return 0; - size_t NewSize = 0; - switch (Rand(3)) { - case 0: - MutateInPlaceHere.resize(MaxSize); - NewSize = CrossOver(Data, Size, O.data(), O.size(), - MutateInPlaceHere.data(), MaxSize); - memcpy(Data, MutateInPlaceHere.data(), NewSize); - break; - case 1: - NewSize = InsertPartOf(O.data(), O.size(), Data, Size, MaxSize); - if (!NewSize) - NewSize = CopyPartOf(O.data(), O.size(), Data, Size); - break; - case 2: - NewSize = CopyPartOf(O.data(), O.size(), Data, Size); - break; - default: - assert(0); - } - assert(NewSize > 0 && "CrossOver returned empty unit"); - assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); - return NewSize; -} - -void MutationDispatcher::StartMutationSequence() { - CurrentMutatorSequence.clear(); - CurrentDictionaryEntrySequence.clear(); -} - -// Copy successful dictionary entries to PersistentAutoDictionary. -void MutationDispatcher::RecordSuccessfulMutationSequence() { - for (auto *DE : CurrentDictionaryEntrySequence) { - // PersistentAutoDictionary.AddWithSuccessCountOne(DE); - DE->IncSuccessCount(); - assert(DE->GetW().size()); - // Linear search is fine here as this happens seldom. - if (!PersistentAutoDictionary.ContainsWord(DE->GetW())) - PersistentAutoDictionary.push_back(*DE); - } -} - -const Dictionary &MutationDispatcher::RecommendDictionary() { - RecommendedDictionary.clear(); - for (auto &DE : PersistentAutoDictionary) - if (!ManualDictionary.ContainsWord(DE.GetW())) - RecommendedDictionary.push_back(DE); - NextRecommendedDictionaryEntry = 0; - return RecommendedDictionary; -} - -const char *MutationDispatcher::RecommendDictionaryEntry(size_t *UseCount) { - if (NextRecommendedDictionaryEntry >= RecommendedDictionary.size()) - return nullptr; - auto &DE = RecommendedDictionary[NextRecommendedDictionaryEntry++]; - assert(DE.GetW().size()); - DictionaryEntryWord = ToASCII(DE.GetW()); - if (UseCount) - *UseCount = DE.GetUseCount(); - return DictionaryEntryWord.c_str(); -} - -const Sequence & -MutationDispatcher::MutationSequence() { - CurrentMutatorSequence.SetString([](Mutator M) { return M.Name; }); - return CurrentMutatorSequence; -} - -const Sequence & -MutationDispatcher::DictionaryEntrySequence() { - CurrentDictionaryEntrySequence.SetString([](DictionaryEntry *DE) { - return std::string("\"") + ToASCII(DE->GetW()) + std::string("\""); - }); - return CurrentDictionaryEntrySequence; -} - -size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { - return MutateImpl(Data, Size, MaxSize, Mutators); -} - -size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size, - size_t MaxSize) { - return MutateImpl(Data, Size, MaxSize, DefaultMutators); -} - -// Mutates Data in place, returns new size. -size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, - size_t MaxSize, - Vector &Mutators) { - assert(MaxSize > 0); - // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), - // in which case they will return 0. - // Try several times before returning un-mutated data. - for (int Iter = 0; Iter < 100; Iter++) { - auto M = Mutators[Rand(Mutators.size())]; - size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); - if (NewSize && NewSize <= MaxSize) { - if (Config.OnlyASCII) - ToASCII(Data, NewSize); - CurrentMutatorSequence.push_back(M); - return NewSize; - } - } - *Data = ' '; - return 1; // Fallback, should not happen frequently. -} - -// Mask represents the set of Data bytes that are worth mutating. -size_t MutationDispatcher::MutateWithMask(uint8_t *Data, size_t Size, - size_t MaxSize, - const Vector &Mask) { - size_t MaskedSize = std::min(Size, Mask.size()); - // * Copy the worthy bytes into a temporary array T - // * Mutate T - // * Copy T back. - // This is totally unoptimized. - auto &T = MutateWithMaskTemp; - if (T.size() < Size) - T.resize(Size); - size_t OneBits = 0; - for (size_t I = 0; I < MaskedSize; I++) - if (Mask[I]) - T[OneBits++] = Data[I]; - - if (!OneBits) - return 0; - assert(!T.empty()); - size_t NewSize = Mutate(T.data(), OneBits, OneBits); - assert(NewSize <= OneBits); - (void)NewSize; - // Even if NewSize < OneBits we still use all OneBits bytes. - for (size_t I = 0, J = 0; I < MaskedSize; I++) - if (Mask[I]) - Data[I] = T[J++]; - return Size; -} - -void MutationDispatcher::AddWordToManualDictionary(const Word &W) { - ManualDictionary.push_back({W, std::numeric_limits::max()}); -} - -} // namespace mutagen diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.h b/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.h deleted file mode 100644 index c5c43d5c346fe..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/MutagenDispatcher.h +++ /dev/null @@ -1,190 +0,0 @@ -//===- MutagenDispatcher.h - Internal header for the mutagen ----*- C++ -* ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// mutagen::MutationDispatcher -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MUTAGEN_DISPATCHER_H -#define LLVM_FUZZER_MUTAGEN_DISPATCHER_H - -#include "FuzzerRandom.h" -#include "Mutagen.h" -#include "MutagenDictionary.h" -#include "MutagenSequence.h" -#include -#include -#include - -namespace mutagen { -namespace { - -using fuzzer::Random; -using fuzzer::Unit; -using fuzzer::Vector; -using fuzzer::Word; - -} // namespace - -class MutationDispatcher final { -public: - struct Mutator { - size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); - const char *Name; - }; - - explicit MutationDispatcher(const LLVMMutagenConfiguration *Config); - ~MutationDispatcher() = default; - - /// Indicate that we are about to start a new sequence of mutations. - void StartMutationSequence(); - /// Returns the current sequence of mutations. May truncate the sequence - /// unless Verbose is true. Sets |OutSize| to the length of the untrancated - /// sequence, if provided. - const Sequence &MutationSequence(); - /// Returns the current sequence of dictionary entries. May truncate the - /// sequence unless Verbose is true. Sets |OutSize| to the length of the - /// untrancated sequence, if provided. - const Sequence &DictionaryEntrySequence(); - /// Indicate that the current sequence of mutations was successful. - void RecordSuccessfulMutationSequence(); - /// Mutates data by invoking user-provided mutator. - size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by invoking user-provided crossover. - size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by shuffling bytes. - size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by erasing bytes. - size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by inserting a byte. - size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by inserting several repeated bytes. - size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by changing one byte. - size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by changing one bit. - size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by copying/inserting a part of data into a different place. - size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Mutates data by adding a word from the manual dictionary. - size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); - - /// Mutates data by adding a word from the TORC. - size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Mutates data by adding a word from the persistent automatic dictionary. - size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); - - /// Tries to find an ASCII integer in Data, changes it to another ASCII int. - size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); - /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways. - size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize); - - /// CrossOver Data with CrossOverWith. - size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); - - size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, - size_t MaxSize); - size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, - Vector &Mutators); - - size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, - size_t ToSize, size_t MaxToSize); - size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, - size_t ToSize); - size_t ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize, - DictionaryEntry &DE); - - template - DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2, - const uint8_t *Data, size_t Size); - DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2, - const uint8_t *Data, size_t Size); - DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2, - const void *Arg1Mutation, - const void *Arg2Mutation, - size_t ArgSize, - const uint8_t *Data, size_t Size); - - /// Applies one of the configured mutations. - /// Returns the new size of data which could be up to MaxSize. - size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Applies one of the configured mutations to the bytes of Data - /// that have '1' in Mask. - /// Mask.size() should be >= Size. - size_t MutateWithMask(uint8_t *Data, size_t Size, size_t MaxSize, - const Vector &Mask); - - /// Applies one of the default mutations. Provided as a service - /// to mutation authors. - size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Creates a cross-over of two pieces of Data, returns its size. - size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, - size_t Size2, uint8_t *Out, size_t MaxOutSize); - - void AddWordToManualDictionary(const Word &W); - - // Creates a recommended dictionary and returns its number of entries. The - // entries can be retrieved by subsequent calls to - // |LLVMMutagenRecommendDictionaryEntry|. - const Dictionary &RecommendDictionary(); - - // Returns the ASCII representation of the next recommended dictionary entry, - // and sets |OutUseCount| to its use count. The return pointer is valid until - // the next call to this method. - const char *RecommendDictionaryEntry(size_t *OutUseCount); - - void SetCrossOverWith(const Unit *U) { CrossOverWith = U; } - - Random &GetRand() { return Rand; } - -private: - // Imports and validates the disptacher's configuration. - void SetConfig(const LLVMMutagenConfiguration *Config); - - Random Rand; - LLVMMutagenConfiguration Config; - - // Dictionary provided by the user via -dict=DICT_FILE. - Dictionary ManualDictionary; - // Persistent dictionary modified by the fuzzer, consists of - // entries that led to successful discoveries in the past mutations. - Dictionary PersistentAutoDictionary; - // Recommended dictionary buolt by |RecommendDictionary|. - Dictionary RecommendedDictionary; - size_t NextRecommendedDictionaryEntry = 0; - std::string DictionaryEntryWord; - - Sequence CurrentDictionaryEntrySequence; - - static const size_t kCmpDictionaryEntriesDequeSize = 16; - DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize]; - size_t CmpDictionaryEntriesDequeIdx = 0; - - const Unit *CrossOverWith = nullptr; - Vector MutateInPlaceHere; - Vector MutateWithMaskTemp; - // CustomCrossOver needs its own buffer as a custom implementation may call - // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere. - Vector CustomCrossOverInPlaceHere; - - Vector Mutators; - Vector DefaultMutators; - Sequence CurrentMutatorSequence; -}; - -// Returns a pointer to the MutationDispatcher is use by MutagenInterface. -// This should only be used for testing. -MutationDispatcher *GetMutationDispatcherForTest(); - -} // namespace mutagen - -#endif // LLVM_FUZZER_MUTAGEN_DISPATCHER_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenSequence.h b/compiler-rt/lib/fuzzer/mutagen/MutagenSequence.h deleted file mode 100644 index fd0ab2cb5f0f9..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/MutagenSequence.h +++ /dev/null @@ -1,101 +0,0 @@ -//===- MutagenSequence.h - Internal header for the mutagen ------*- C++ -* ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// mutagen::Sequence -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MUTAGEN_SEQUENCE_H -#define LLVM_FUZZER_MUTAGEN_SEQUENCE_H - -#include "FuzzerDefs.h" -#include -#include - -namespace mutagen { -namespace { - -using fuzzer::Vector; - -} // namespace - -// The Sequence type bundles together a list of items, a string representation, -// and a position in that string suitable for truncating it when overly long, -// e.g. after the tenth item. -template class Sequence { -public: - constexpr static size_t kMaxBriefItems = 10; - - void clear() { - Items.clear(); - Size = 0; - Str.clear(); - Brief = 0; - } - - bool empty() const { return Size == 0; } - - size_t size() const { return Size; } - - void push_back(T t) { Items.push_back(t); } - - typename Vector::const_iterator begin() const { return Items.begin(); } - typename Vector::iterator begin() { return Items.begin(); } - - typename Vector::const_iterator end() const { return Items.end(); } - typename Vector::iterator end() { return Items.end(); } - - std::string GetString(bool Verbose = true) const { - return Verbose ? Str : Str.substr(0, Brief); - } - - // Constructs the string representation of the sequence, using a callback that - // converts items to strings. - template - // std::string ItemCallback(T Item); - void SetString(ItemCallback ConvertToASCII) { - // No change since last call. - if (Size == Items.size()) - return; - Size = Items.size(); - std::ostringstream OSS; - size_t i = 0; - for (; i < Size && i < kMaxBriefItems; i++) - OSS << ConvertToASCII(Items[i]) << "-"; - Brief = static_cast(OSS.tellp()); - for (; i < Size; i++) - OSS << ConvertToASCII(Items[i]) << "-"; - Str = OSS.str(); - } - -private: - Vector Items; - size_t Size = 0; - std::string Str; - size_t Brief = 0; -}; - -template -typename Vector::const_iterator begin(const Sequence &S) { - return S.begin(); -} - -template typename Vector::iterator begin(Sequence &S) { - return S.begin(); -} - -template -typename Vector::const_iterator end(const Sequence &S) { - return S.end(); -} - -template typename Vector::iterator end(Sequence &S) { - return S.end(); -} - -} // namespace mutagen - -#endif // LLVM_FUZZER_MUTAGEN_SEQUENCE_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h b/compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h deleted file mode 100644 index cf3b78b9655af..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/MutagenUtil.h +++ /dev/null @@ -1,24 +0,0 @@ -//===- MutagenUtil.h - Internal header for the mutagen Utils ----*- C++ -* ===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Util functions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MUTAGEN_UTIL_H -#define LLVM_FUZZER_MUTAGEN_UTIL_H - -#include -#include - -namespace mutagen { - -const void *SearchMemory(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen); - -} // namespace mutagen - -#endif // LLVM_FUZZER_MUTAGEN_UTIL_H diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenUtilPosix.cpp b/compiler-rt/lib/fuzzer/mutagen/MutagenUtilPosix.cpp deleted file mode 100644 index c157c6190c5d7..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/MutagenUtilPosix.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===- MutagenUtilPosix.cpp - Misc utils for Posix. -----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Misc utils implementation using Posix API. -//===----------------------------------------------------------------------===// -#include "FuzzerPlatform.h" -#if (LIBFUZZER_POSIX || LIBFUZZER_FUCHSIA) -#include - -namespace mutagen { - -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - return memmem(Data, DataLen, Patt, PattLen); -} - -} // namespace mutagen - -#endif // (LIBFUZZER_POSIX || LIBFUZZER_FUCHSIA) diff --git a/compiler-rt/lib/fuzzer/mutagen/MutagenUtilWindows.cpp b/compiler-rt/lib/fuzzer/mutagen/MutagenUtilWindows.cpp deleted file mode 100644 index 93b86556b1393..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/MutagenUtilWindows.cpp +++ /dev/null @@ -1,41 +0,0 @@ -//===- MutagenUtilWindows.cpp - Misc utils for Windows. -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Misc utils implementation for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerPlatform.h" -#if LIBFUZZER_WINDOWS -#include -#include -#include - -namespace mutagen { - -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - // TODO: make this implementation more efficient. - const char *Cdata = (const char *)Data; - const char *Cpatt = (const char *)Patt; - - if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen) - return NULL; - - if (PattLen == 1) - return memchr(Data, *Cpatt, DataLen); - - const char *End = Cdata + DataLen - PattLen + 1; - - for (const char *It = Cdata; It < End; ++It) - if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0) - return It; - - return NULL; -} - -} // namespace mutagen - -#endif // LIBFUZZER_WINDOWS diff --git a/compiler-rt/lib/fuzzer/mutagen/build.sh b/compiler-rt/lib/fuzzer/mutagen/build.sh deleted file mode 100755 index 19c22b8fc5562..0000000000000 --- a/compiler-rt/lib/fuzzer/mutagen/build.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -LIBMUTAGEN_SRC_DIR=$(dirname $0) -LIBFUZZER_SRC_DIR=$LIBMUTAGEN_SRC_DIR/.. -CXX="${CXX:-clang}" -for f in $LIBMUTAGEN_SRC_DIR/*.cpp; do - $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c -I$LIBFUZZER_SRC_DIR & -done -wait -rm -f libMutagen.a -ar ru libMutagen.a Mutagen*.o -rm -f Mutagen*.o - diff --git a/compiler-rt/lib/fuzzer/tests/CMakeLists.txt b/compiler-rt/lib/fuzzer/tests/CMakeLists.txt index 974efc3c5b630..5b3e906419546 100644 --- a/compiler-rt/lib/fuzzer/tests/CMakeLists.txt +++ b/compiler-rt/lib/fuzzer/tests/CMakeLists.txt @@ -17,9 +17,6 @@ set_target_properties(FuzzerUnitTests PROPERTIES FOLDER "Compiler-RT Tests") add_custom_target(FuzzedDataProviderUnitTests) set_target_properties(FuzzedDataProviderUnitTests PROPERTIES FOLDER "Compiler-RT Tests") -add_custom_target(MutagenUnitTests) -set_target_properties(MutagenUnitTests PROPERTIES FOLDER "Compiler-RT Tests") - set(LIBFUZZER_UNITTEST_LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS}) list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS --driver-mode=g++) @@ -49,35 +46,23 @@ if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST FUZZER_SUPPORTED_ARCH) set(arch ${COMPILER_RT_DEFAULT_TARGET_ARCH}) set(LIBFUZZER_TEST_RUNTIME RTFuzzerTest.${arch}) - set(LIBMUTAGEN_TEST_RUNTIME RTMutagenTest.${arch}) if(APPLE) set(LIBFUZZER_TEST_RUNTIME_OBJECTS $) - set(LIBMUTAGEN_TEST_RUNTIME_OBJECTS - $) else() set(LIBFUZZER_TEST_RUNTIME_OBJECTS $) - set(LIBMUTAGEN_TEST_RUNTIME_OBJECTS - $) endif() add_library(${LIBFUZZER_TEST_RUNTIME} STATIC - ${LIBFUZZER_TEST_RUNTIME_OBJECTS} - ${LIBMUTAGEN_TEST_RUNTIME_OBJECTS}) + ${LIBFUZZER_TEST_RUNTIME_OBJECTS}) set_target_properties(${LIBFUZZER_TEST_RUNTIME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} FOLDER "Compiler-RT Runtime tests") - add_library(${LIBMUTAGEN_TEST_RUNTIME} STATIC - ${LIBMUTAGEN_TEST_RUNTIME_OBJECTS}) - set_target_properties(${LIBMUTAGEN_TEST_RUNTIME} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - FOLDER "Compiler-RT Runtime tests") - if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND COMPILER_RT_LIBCXX_PATH AND COMPILER_RT_LIBCXXABI_PATH) - file(GLOB libfuzzer_headers ../*.h ../mutagen/*.h) + file(GLOB libfuzzer_headers ../*.h) set(LIBFUZZER_TEST_RUNTIME_DEPS libcxx_fuzzer_${arch}-build ${libfuzzer_headers}) set(LIBFUZZER_TEST_RUNTIME_CFLAGS -isystem ${LIBCXX_${arch}_PREFIX}/include/c++/v1) set(LIBFUZZER_TEST_RUNTIME_LINK_FLAGS ${LIBCXX_${arch}_PREFIX}/lib/libc++.a) @@ -88,7 +73,7 @@ if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST FUZZER_SUPPORTED_ARCH) FuzzerUnitTests "Fuzzer-${arch}-Test" ${arch} SOURCES FuzzerUnittest.cpp ${COMPILER_RT_GTEST_SOURCE} RUNTIME ${LIBFUZZER_TEST_RUNTIME} - DEPS gtest ${LIBFUZZER_TEST_RUNTIME_DEPS} + DEPS gtest ${LIBFUZZER_TEST_RUNTIME_DEPS} CFLAGS ${LIBFUZZER_UNITTEST_CFLAGS} ${LIBFUZZER_TEST_RUNTIME_CFLAGS} LINK_FLAGS ${LIBFUZZER_UNITTEST_LINK_FLAGS} ${LIBFUZZER_TEST_RUNTIME_LINK_FLAGS}) set_target_properties(FuzzerUnitTests PROPERTIES @@ -103,15 +88,4 @@ if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST FUZZER_SUPPORTED_ARCH) LINK_FLAGS ${LIBFUZZER_UNITTEST_LINK_FLAGS} ${LIBFUZZER_TEST_RUNTIME_LINK_FLAGS}) set_target_properties(FuzzedDataProviderUnitTests PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - - set(MutagenTestObjects) - generate_compiler_rt_tests(MutagenTestObjects - MutagenUnitTests "Mutagen-${arch}-Test" ${arch} - SOURCES MutagenUnittest.cpp ${COMPILER_RT_GTEST_SOURCE} - RUNTIME ${LIBFUZZER_TEST_RUNTIME} - DEPS gtest ${LIBFUZZER_TEST_RUNTIME_DEPS} ${LIBMUTAGEN_TEST_RUNTIME_DEPS} - CFLAGS ${LIBFUZZER_UNITTEST_CFLAGS} ${LIBFUZZER_TEST_RUNTIME_CFLAGS} - LINK_FLAGS ${LIBFUZZER_UNITTEST_LINK_FLAGS} ${LIBFUZZER_TEST_RUNTIME_LINK_FLAGS}) - set_target_properties(MutagenUnitTests PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp index 277fa5e6183f8..974a01ff4ab6e 100644 --- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp +++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp @@ -10,6 +10,7 @@ #define GTEST_NO_LLVM_SUPPORT 1 #include "FuzzerCorpus.h" +#include "FuzzerDictionary.h" #include "FuzzerInternal.h" #include "FuzzerMerge.h" #include "FuzzerMutate.h" @@ -43,6 +44,65 @@ TEST(Fuzzer, Basename) { #endif } +TEST(Fuzzer, CrossOver) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + Unit A({0, 1, 2}), B({5, 6, 7}); + Unit C; + Unit Expected[] = { + { 0 }, + { 0, 1 }, + { 0, 5 }, + { 0, 1, 2 }, + { 0, 1, 5 }, + { 0, 5, 1 }, + { 0, 5, 6 }, + { 0, 1, 2, 5 }, + { 0, 1, 5, 2 }, + { 0, 1, 5, 6 }, + { 0, 5, 1, 2 }, + { 0, 5, 1, 6 }, + { 0, 5, 6, 1 }, + { 0, 5, 6, 7 }, + { 0, 1, 2, 5, 6 }, + { 0, 1, 5, 2, 6 }, + { 0, 1, 5, 6, 2 }, + { 0, 1, 5, 6, 7 }, + { 0, 5, 1, 2, 6 }, + { 0, 5, 1, 6, 2 }, + { 0, 5, 1, 6, 7 }, + { 0, 5, 6, 1, 2 }, + { 0, 5, 6, 1, 7 }, + { 0, 5, 6, 7, 1 }, + { 0, 1, 2, 5, 6, 7 }, + { 0, 1, 5, 2, 6, 7 }, + { 0, 1, 5, 6, 2, 7 }, + { 0, 1, 5, 6, 7, 2 }, + { 0, 5, 1, 2, 6, 7 }, + { 0, 5, 1, 6, 2, 7 }, + { 0, 5, 1, 6, 7, 2 }, + { 0, 5, 6, 1, 2, 7 }, + { 0, 5, 6, 1, 7, 2 }, + { 0, 5, 6, 7, 1, 2 } + }; + for (size_t Len = 1; Len < 8; Len++) { + Set FoundUnits, ExpectedUnitsWitThisLength; + for (int Iter = 0; Iter < 3000; Iter++) { + C.resize(Len); + size_t NewSize = MD->CrossOver(A.data(), A.size(), B.data(), B.size(), + C.data(), C.size()); + C.resize(NewSize); + FoundUnits.insert(C); + } + for (const Unit &U : Expected) + if (U.size() <= Len) + ExpectedUnitsWitThisLength.insert(U); + EXPECT_EQ(ExpectedUnitsWitThisLength, FoundUnits); + } +} + TEST(Fuzzer, Hash) { uint8_t A[] = {'a', 'b', 'c'}; fuzzer::Unit U(A, A + sizeof(A)); @@ -51,6 +111,423 @@ TEST(Fuzzer, Hash) { EXPECT_EQ("81fe8bfe87576c3ecb22426f8e57847382917acf", fuzzer::Hash(U)); } +typedef size_t (MutationDispatcher::*Mutator)(uint8_t *Data, size_t Size, + size_t MaxSize); + +void TestEraseBytes(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + uint8_t REM0[8] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM1[8] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM2[8] = {0x00, 0x11, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM3[8] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM4[8] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x66, 0x77}; + uint8_t REM5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x66, 0x77}; + uint8_t REM6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x77}; + uint8_t REM7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + + uint8_t REM8[6] = {0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM9[6] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; + uint8_t REM10[6] = {0x00, 0x11, 0x22, 0x55, 0x66, 0x77}; + + uint8_t REM11[5] = {0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM12[5] = {0x00, 0x11, 0x22, 0x33, 0x44}; + uint8_t REM13[5] = {0x00, 0x44, 0x55, 0x66, 0x77}; + + + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + int FoundMask = 0; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, sizeof(T), sizeof(T)); + if (NewSize == 7 && !memcmp(REM0, T, 7)) FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(REM1, T, 7)) FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(REM2, T, 7)) FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(REM3, T, 7)) FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(REM4, T, 7)) FoundMask |= 1 << 4; + if (NewSize == 7 && !memcmp(REM5, T, 7)) FoundMask |= 1 << 5; + if (NewSize == 7 && !memcmp(REM6, T, 7)) FoundMask |= 1 << 6; + if (NewSize == 7 && !memcmp(REM7, T, 7)) FoundMask |= 1 << 7; + + if (NewSize == 6 && !memcmp(REM8, T, 6)) FoundMask |= 1 << 8; + if (NewSize == 6 && !memcmp(REM9, T, 6)) FoundMask |= 1 << 9; + if (NewSize == 6 && !memcmp(REM10, T, 6)) FoundMask |= 1 << 10; + + if (NewSize == 5 && !memcmp(REM11, T, 5)) FoundMask |= 1 << 11; + if (NewSize == 5 && !memcmp(REM12, T, 5)) FoundMask |= 1 << 12; + if (NewSize == 5 && !memcmp(REM13, T, 5)) FoundMask |= 1 << 13; + } + EXPECT_EQ(FoundMask, (1 << 14) - 1); +} + +TEST(FuzzerMutate, EraseBytes1) { + TestEraseBytes(&MutationDispatcher::Mutate_EraseBytes, 200); +} +TEST(FuzzerMutate, EraseBytes2) { + TestEraseBytes(&MutationDispatcher::Mutate, 2000); +} + +void TestInsertByte(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + int FoundMask = 0; + uint8_t INS0[8] = {0xF1, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS1[8] = {0x00, 0xF2, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS2[8] = {0x00, 0x11, 0xF3, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS3[8] = {0x00, 0x11, 0x22, 0xF4, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS4[8] = {0x00, 0x11, 0x22, 0x33, 0xF5, 0x44, 0x55, 0x66}; + uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF6, 0x55, 0x66}; + uint8_t INS6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF7, 0x66}; + uint8_t INS7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF8}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + size_t NewSize = (*MD.*M)(T, 7, 8); + if (NewSize == 8 && !memcmp(INS0, T, 8)) FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(INS1, T, 8)) FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(INS2, T, 8)) FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(INS3, T, 8)) FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(INS4, T, 8)) FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(INS5, T, 8)) FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(INS6, T, 8)) FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(INS7, T, 8)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, InsertByte1) { + TestInsertByte(&MutationDispatcher::Mutate_InsertByte, 1 << 15); +} +TEST(FuzzerMutate, InsertByte2) { + TestInsertByte(&MutationDispatcher::Mutate, 1 << 17); +} + +void TestInsertRepeatedBytes(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + int FoundMask = 0; + uint8_t INS0[7] = {0x00, 0x11, 0x22, 0x33, 'a', 'a', 'a'}; + uint8_t INS1[7] = {0x00, 0x11, 0x22, 'a', 'a', 'a', 0x33}; + uint8_t INS2[7] = {0x00, 0x11, 'a', 'a', 'a', 0x22, 0x33}; + uint8_t INS3[7] = {0x00, 'a', 'a', 'a', 0x11, 0x22, 0x33}; + uint8_t INS4[7] = {'a', 'a', 'a', 0x00, 0x11, 0x22, 0x33}; + + uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 'b', 'b', 'b', 'b'}; + uint8_t INS6[8] = {0x00, 0x11, 0x22, 'b', 'b', 'b', 'b', 0x33}; + uint8_t INS7[8] = {0x00, 0x11, 'b', 'b', 'b', 'b', 0x22, 0x33}; + uint8_t INS8[8] = {0x00, 'b', 'b', 'b', 'b', 0x11, 0x22, 0x33}; + uint8_t INS9[8] = {'b', 'b', 'b', 'b', 0x00, 0x11, 0x22, 0x33}; + + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33}; + size_t NewSize = (*MD.*M)(T, 4, 8); + if (NewSize == 7 && !memcmp(INS0, T, 7)) FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(INS1, T, 7)) FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(INS2, T, 7)) FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(INS3, T, 7)) FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(INS4, T, 7)) FoundMask |= 1 << 4; + + if (NewSize == 8 && !memcmp(INS5, T, 8)) FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(INS6, T, 8)) FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(INS7, T, 8)) FoundMask |= 1 << 7; + if (NewSize == 8 && !memcmp(INS8, T, 8)) FoundMask |= 1 << 8; + if (NewSize == 8 && !memcmp(INS9, T, 8)) FoundMask |= 1 << 9; + + } + EXPECT_EQ(FoundMask, (1 << 10) - 1); +} + +TEST(FuzzerMutate, InsertRepeatedBytes1) { + TestInsertRepeatedBytes(&MutationDispatcher::Mutate_InsertRepeatedBytes, 10000); +} +TEST(FuzzerMutate, InsertRepeatedBytes2) { + TestInsertRepeatedBytes(&MutationDispatcher::Mutate, 300000); +} + +void TestChangeByte(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + int FoundMask = 0; + uint8_t CH0[8] = {0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH1[8] = {0x00, 0xF1, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH2[8] = {0x00, 0x11, 0xF2, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH3[8] = {0x00, 0x11, 0x22, 0xF3, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0xF4, 0x55, 0x66, 0x77}; + uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF5, 0x66, 0x77}; + uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF5, 0x77}; + uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, 8, 9); + if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, ChangeByte1) { + TestChangeByte(&MutationDispatcher::Mutate_ChangeByte, 1 << 15); +} +TEST(FuzzerMutate, ChangeByte2) { + TestChangeByte(&MutationDispatcher::Mutate, 1 << 17); +} + +void TestChangeBit(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + int FoundMask = 0; + uint8_t CH0[8] = {0x01, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH1[8] = {0x00, 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH2[8] = {0x00, 0x11, 0x02, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH3[8] = {0x00, 0x11, 0x22, 0x37, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x54, 0x55, 0x66, 0x77}; + uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x54, 0x66, 0x77}; + uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x76, 0x77}; + uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, 8, 9); + if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, ChangeBit1) { + TestChangeBit(&MutationDispatcher::Mutate_ChangeBit, 1 << 16); +} +TEST(FuzzerMutate, ChangeBit2) { + TestChangeBit(&MutationDispatcher::Mutate, 1 << 18); +} + +void TestShuffleBytes(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + int FoundMask = 0; + uint8_t CH0[7] = {0x00, 0x22, 0x11, 0x33, 0x44, 0x55, 0x66}; + uint8_t CH1[7] = {0x11, 0x00, 0x33, 0x22, 0x44, 0x55, 0x66}; + uint8_t CH2[7] = {0x00, 0x33, 0x11, 0x22, 0x44, 0x55, 0x66}; + uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x33}; + uint8_t CH4[7] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x44, 0x66}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + size_t NewSize = (*MD.*M)(T, 7, 7); + if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(CH4, T, 7)) FoundMask |= 1 << 4; + } + EXPECT_EQ(FoundMask, 31); +} + +TEST(FuzzerMutate, ShuffleBytes1) { + TestShuffleBytes(&MutationDispatcher::Mutate_ShuffleBytes, 1 << 17); +} +TEST(FuzzerMutate, ShuffleBytes2) { + TestShuffleBytes(&MutationDispatcher::Mutate, 1 << 20); +} + +void TestCopyPart(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + int FoundMask = 0; + uint8_t CH0[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11}; + uint8_t CH1[7] = {0x55, 0x66, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t CH2[7] = {0x00, 0x55, 0x66, 0x33, 0x44, 0x55, 0x66}; + uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x66}; + uint8_t CH4[7] = {0x00, 0x11, 0x11, 0x22, 0x33, 0x55, 0x66}; + + for (int i = 0; i < NumIter; i++) { + uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + size_t NewSize = (*MD.*M)(T, 7, 7); + if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(CH4, T, 7)) FoundMask |= 1 << 4; + } + + uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22}; + uint8_t CH6[8] = {0x22, 0x33, 0x44, 0x00, 0x11, 0x22, 0x33, 0x44}; + uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x33, 0x44}; + uint8_t CH8[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x22, 0x33, 0x44}; + uint8_t CH9[8] = {0x00, 0x11, 0x22, 0x22, 0x33, 0x44, 0x33, 0x44}; + + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, 5, 8); + if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; + if (NewSize == 8 && !memcmp(CH8, T, 8)) FoundMask |= 1 << 8; + if (NewSize == 8 && !memcmp(CH9, T, 8)) FoundMask |= 1 << 9; + } + + EXPECT_EQ(FoundMask, 1023); +} + +TEST(FuzzerMutate, CopyPart1) { + TestCopyPart(&MutationDispatcher::Mutate_CopyPart, 1 << 10); +} +TEST(FuzzerMutate, CopyPart2) { + TestCopyPart(&MutationDispatcher::Mutate, 1 << 13); +} +TEST(FuzzerMutate, CopyPartNoInsertAtMaxSize) { + // This (non exhaustively) tests if `Mutate_CopyPart` tries to perform an + // insert on an input of size `MaxSize`. Performing an insert in this case + // will lead to the mutation failing. + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + uint8_t Data[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22}; + size_t MaxSize = sizeof(Data); + for (int count = 0; count < (1 << 18); ++count) { + size_t NewSize = MD->Mutate_CopyPart(Data, MaxSize, MaxSize); + ASSERT_EQ(NewSize, MaxSize); + } +} + +void TestAddWordFromDictionary(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD}; + uint8_t Word2[3] = {0xFF, 0xEE, 0xEF}; + MD->AddWordToManualDictionary(Word(Word1, sizeof(Word1))); + MD->AddWordToManualDictionary(Word(Word2, sizeof(Word2))); + int FoundMask = 0; + uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD}; + uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22}; + uint8_t CH2[7] = {0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22}; + uint8_t CH3[7] = {0xAA, 0xBB, 0xCC, 0xDD, 0x00, 0x11, 0x22}; + uint8_t CH4[6] = {0x00, 0x11, 0x22, 0xFF, 0xEE, 0xEF}; + uint8_t CH5[6] = {0x00, 0x11, 0xFF, 0xEE, 0xEF, 0x22}; + uint8_t CH6[6] = {0x00, 0xFF, 0xEE, 0xEF, 0x11, 0x22}; + uint8_t CH7[6] = {0xFF, 0xEE, 0xEF, 0x00, 0x11, 0x22}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[7] = {0x00, 0x11, 0x22}; + size_t NewSize = (*MD.*M)(T, 3, 7); + if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3; + if (NewSize == 6 && !memcmp(CH4, T, 6)) FoundMask |= 1 << 4; + if (NewSize == 6 && !memcmp(CH5, T, 6)) FoundMask |= 1 << 5; + if (NewSize == 6 && !memcmp(CH6, T, 6)) FoundMask |= 1 << 6; + if (NewSize == 6 && !memcmp(CH7, T, 6)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, AddWordFromDictionary1) { + TestAddWordFromDictionary( + &MutationDispatcher::Mutate_AddWordFromManualDictionary, 1 << 15); +} + +TEST(FuzzerMutate, AddWordFromDictionary2) { + TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15); +} + +void TestChangeASCIIInteger(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + + uint8_t CH0[8] = {'1', '2', '3', '4', '5', '6', '7', '7'}; + uint8_t CH1[8] = {'1', '2', '3', '4', '5', '6', '7', '9'}; + uint8_t CH2[8] = {'2', '4', '6', '9', '1', '3', '5', '6'}; + uint8_t CH3[8] = {'0', '6', '1', '7', '2', '8', '3', '9'}; + int FoundMask = 0; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {'1', '2', '3', '4', '5', '6', '7', '8'}; + size_t NewSize = (*MD.*M)(T, 8, 8); + /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; + else if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; + else if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; + else if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; + else if (NewSize == 8) FoundMask |= 1 << 4; + } + EXPECT_EQ(FoundMask, 31); +} + +TEST(FuzzerMutate, ChangeASCIIInteger1) { + TestChangeASCIIInteger(&MutationDispatcher::Mutate_ChangeASCIIInteger, + 1 << 15); +} + +TEST(FuzzerMutate, ChangeASCIIInteger2) { + TestChangeASCIIInteger(&MutationDispatcher::Mutate, 1 << 15); +} + +void TestChangeBinaryInteger(Mutator M, int NumIter) { + std::unique_ptr t(new ExternalFunctions()); + fuzzer::EF = t.get(); + Random Rand(0); + std::unique_ptr MD(new MutationDispatcher(Rand, {})); + + uint8_t CH0[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x79}; + uint8_t CH1[8] = {0x00, 0x11, 0x22, 0x31, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH2[8] = {0xff, 0x10, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH3[8] = {0x00, 0x11, 0x2a, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x4f, 0x66, 0x77}; + uint8_t CH5[8] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88}; + uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x00, 0x00, 0x00, 0x08, 0x77}; // Size + uint8_t CH7[8] = {0x00, 0x08, 0x00, 0x33, 0x44, 0x55, 0x66, 0x77}; // Sw(Size) + + int FoundMask = 0; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (*MD.*M)(T, 8, 8); + /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; + else if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; + else if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; + else if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; + else if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4; + else if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; + else if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; + else if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, ChangeBinaryInteger1) { + TestChangeBinaryInteger(&MutationDispatcher::Mutate_ChangeBinaryInteger, + 1 << 12); +} + +TEST(FuzzerMutate, ChangeBinaryInteger2) { + TestChangeBinaryInteger(&MutationDispatcher::Mutate, 1 << 15); +} + + TEST(FuzzerDictionary, ParseOneDictionaryEntry) { Unit U; EXPECT_FALSE(ParseOneDictionaryEntry("", &U)); diff --git a/compiler-rt/lib/fuzzer/tests/MutagenUnittest.cpp b/compiler-rt/lib/fuzzer/tests/MutagenUnittest.cpp deleted file mode 100644 index 287eecf5fe2c8..0000000000000 --- a/compiler-rt/lib/fuzzer/tests/MutagenUnittest.cpp +++ /dev/null @@ -1,971 +0,0 @@ -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "mutagen/Mutagen.h" -#include "mutagen/MutagenDispatcher.h" -#include "mutagen/MutagenSequence.h" -#include "mutagen/MutagenUtil.h" -#include "gtest/gtest.h" -#include - -// This test doesn't set Config.MsanUnpoison*, so ensure MSan isn't present. -// Avoid using fuzzer::ExternalFunctions, since it may not be linked against -// the test binary. -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#error MemorySanitizer is not supported for the mutagen unit tests. -#endif // __has_feature(memory_sanitizer) -#endif // defined(__has_feature) - -namespace mutagen { -namespace { - -using fuzzer::Set; - -std::unique_ptr CreateMutationDispatcher() { - LLVMMutagenConfiguration Config; - memset(&Config, 0, sizeof(Config)); - return std::unique_ptr(new MutationDispatcher(&Config)); -} - -typedef size_t (MutationDispatcher::*Mutator)(uint8_t *Data, size_t Size, - size_t MaxSize); - -TEST(MutationDispatcher, CrossOver) { - auto MD = CreateMutationDispatcher(); - Unit A({0, 1, 2}), B({5, 6, 7}); - Unit C; - Unit Expected[] = {{0}, - {0, 1}, - {0, 5}, - {0, 1, 2}, - {0, 1, 5}, - {0, 5, 1}, - {0, 5, 6}, - {0, 1, 2, 5}, - {0, 1, 5, 2}, - {0, 1, 5, 6}, - {0, 5, 1, 2}, - {0, 5, 1, 6}, - {0, 5, 6, 1}, - {0, 5, 6, 7}, - {0, 1, 2, 5, 6}, - {0, 1, 5, 2, 6}, - {0, 1, 5, 6, 2}, - {0, 1, 5, 6, 7}, - {0, 5, 1, 2, 6}, - {0, 5, 1, 6, 2}, - {0, 5, 1, 6, 7}, - {0, 5, 6, 1, 2}, - {0, 5, 6, 1, 7}, - {0, 5, 6, 7, 1}, - {0, 1, 2, 5, 6, 7}, - {0, 1, 5, 2, 6, 7}, - {0, 1, 5, 6, 2, 7}, - {0, 1, 5, 6, 7, 2}, - {0, 5, 1, 2, 6, 7}, - {0, 5, 1, 6, 2, 7}, - {0, 5, 1, 6, 7, 2}, - {0, 5, 6, 1, 2, 7}, - {0, 5, 6, 1, 7, 2}, - {0, 5, 6, 7, 1, 2}}; - for (size_t Len = 1; Len < 8; Len++) { - Set FoundUnits, ExpectedUnitsWitThisLength; - for (int Iter = 0; Iter < 3000; Iter++) { - C.resize(Len); - size_t NewSize = MD->CrossOver(A.data(), A.size(), B.data(), B.size(), - C.data(), C.size()); - C.resize(NewSize); - FoundUnits.insert(C); - } - for (const Unit &U : Expected) - if (U.size() <= Len) - ExpectedUnitsWitThisLength.insert(U); - EXPECT_EQ(ExpectedUnitsWitThisLength, FoundUnits); - } -} - -void TestEraseBytes(Mutator M, int NumIter) { - uint8_t REM0[8] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM1[8] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM2[8] = {0x00, 0x11, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM3[8] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM4[8] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x66, 0x77}; - uint8_t REM5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x66, 0x77}; - uint8_t REM6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x77}; - uint8_t REM7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - - uint8_t REM8[6] = {0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM9[6] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; - uint8_t REM10[6] = {0x00, 0x11, 0x22, 0x55, 0x66, 0x77}; - - uint8_t REM11[5] = {0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t REM12[5] = {0x00, 0x11, 0x22, 0x33, 0x44}; - uint8_t REM13[5] = {0x00, 0x44, 0x55, 0x66, 0x77}; - - auto MD = CreateMutationDispatcher(); - int FoundMask = 0; - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, sizeof(T), sizeof(T)); - if (NewSize == 7 && !memcmp(REM0, T, 7)) - FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(REM1, T, 7)) - FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(REM2, T, 7)) - FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(REM3, T, 7)) - FoundMask |= 1 << 3; - if (NewSize == 7 && !memcmp(REM4, T, 7)) - FoundMask |= 1 << 4; - if (NewSize == 7 && !memcmp(REM5, T, 7)) - FoundMask |= 1 << 5; - if (NewSize == 7 && !memcmp(REM6, T, 7)) - FoundMask |= 1 << 6; - if (NewSize == 7 && !memcmp(REM7, T, 7)) - FoundMask |= 1 << 7; - - if (NewSize == 6 && !memcmp(REM8, T, 6)) - FoundMask |= 1 << 8; - if (NewSize == 6 && !memcmp(REM9, T, 6)) - FoundMask |= 1 << 9; - if (NewSize == 6 && !memcmp(REM10, T, 6)) - FoundMask |= 1 << 10; - - if (NewSize == 5 && !memcmp(REM11, T, 5)) - FoundMask |= 1 << 11; - if (NewSize == 5 && !memcmp(REM12, T, 5)) - FoundMask |= 1 << 12; - if (NewSize == 5 && !memcmp(REM13, T, 5)) - FoundMask |= 1 << 13; - } - EXPECT_EQ(FoundMask, (1 << 14) - 1); -} - -TEST(MutationDispatcher, EraseBytes1) { - TestEraseBytes(&MutationDispatcher::Mutate_EraseBytes, 200); -} -TEST(MutationDispatcher, EraseBytes2) { - TestEraseBytes(&MutationDispatcher::Mutate, 2000); -} - -void TestInsertByte(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - int FoundMask = 0; - uint8_t INS0[8] = {0xF1, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t INS1[8] = {0x00, 0xF2, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t INS2[8] = {0x00, 0x11, 0xF3, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t INS3[8] = {0x00, 0x11, 0x22, 0xF4, 0x33, 0x44, 0x55, 0x66}; - uint8_t INS4[8] = {0x00, 0x11, 0x22, 0x33, 0xF5, 0x44, 0x55, 0x66}; - uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF6, 0x55, 0x66}; - uint8_t INS6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF7, 0x66}; - uint8_t INS7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF8}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t NewSize = (*MD.*M)(T, 7, 8); - if (NewSize == 8 && !memcmp(INS0, T, 8)) - FoundMask |= 1 << 0; - if (NewSize == 8 && !memcmp(INS1, T, 8)) - FoundMask |= 1 << 1; - if (NewSize == 8 && !memcmp(INS2, T, 8)) - FoundMask |= 1 << 2; - if (NewSize == 8 && !memcmp(INS3, T, 8)) - FoundMask |= 1 << 3; - if (NewSize == 8 && !memcmp(INS4, T, 8)) - FoundMask |= 1 << 4; - if (NewSize == 8 && !memcmp(INS5, T, 8)) - FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(INS6, T, 8)) - FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(INS7, T, 8)) - FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(MutationDispatcher, InsertByte1) { - TestInsertByte(&MutationDispatcher::Mutate_InsertByte, 1 << 15); -} -TEST(MutationDispatcher, InsertByte2) { - TestInsertByte(&MutationDispatcher::Mutate, 1 << 17); -} - -void TestInsertRepeatedBytes(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - int FoundMask = 0; - uint8_t INS0[7] = {0x00, 0x11, 0x22, 0x33, 'a', 'a', 'a'}; - uint8_t INS1[7] = {0x00, 0x11, 0x22, 'a', 'a', 'a', 0x33}; - uint8_t INS2[7] = {0x00, 0x11, 'a', 'a', 'a', 0x22, 0x33}; - uint8_t INS3[7] = {0x00, 'a', 'a', 'a', 0x11, 0x22, 0x33}; - uint8_t INS4[7] = {'a', 'a', 'a', 0x00, 0x11, 0x22, 0x33}; - - uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 'b', 'b', 'b', 'b'}; - uint8_t INS6[8] = {0x00, 0x11, 0x22, 'b', 'b', 'b', 'b', 0x33}; - uint8_t INS7[8] = {0x00, 0x11, 'b', 'b', 'b', 'b', 0x22, 0x33}; - uint8_t INS8[8] = {0x00, 'b', 'b', 'b', 'b', 0x11, 0x22, 0x33}; - uint8_t INS9[8] = {'b', 'b', 'b', 'b', 0x00, 0x11, 0x22, 0x33}; - - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33}; - size_t NewSize = (*MD.*M)(T, 4, 8); - if (NewSize == 7 && !memcmp(INS0, T, 7)) - FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(INS1, T, 7)) - FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(INS2, T, 7)) - FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(INS3, T, 7)) - FoundMask |= 1 << 3; - if (NewSize == 7 && !memcmp(INS4, T, 7)) - FoundMask |= 1 << 4; - - if (NewSize == 8 && !memcmp(INS5, T, 8)) - FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(INS6, T, 8)) - FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(INS7, T, 8)) - FoundMask |= 1 << 7; - if (NewSize == 8 && !memcmp(INS8, T, 8)) - FoundMask |= 1 << 8; - if (NewSize == 8 && !memcmp(INS9, T, 8)) - FoundMask |= 1 << 9; - } - EXPECT_EQ(FoundMask, (1 << 10) - 1); -} - -TEST(MutationDispatcher, InsertRepeatedBytes1) { - TestInsertRepeatedBytes(&MutationDispatcher::Mutate_InsertRepeatedBytes, - 10000); -} -TEST(MutationDispatcher, InsertRepeatedBytes2) { - TestInsertRepeatedBytes(&MutationDispatcher::Mutate, 300000); -} - -void TestChangeByte(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - int FoundMask = 0; - uint8_t CH0[8] = {0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH1[8] = {0x00, 0xF1, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH2[8] = {0x00, 0x11, 0xF2, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH3[8] = {0x00, 0x11, 0x22, 0xF3, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0xF4, 0x55, 0x66, 0x77}; - uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF5, 0x66, 0x77}; - uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF5, 0x77}; - uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, 8, 9); - if (NewSize == 8 && !memcmp(CH0, T, 8)) - FoundMask |= 1 << 0; - if (NewSize == 8 && !memcmp(CH1, T, 8)) - FoundMask |= 1 << 1; - if (NewSize == 8 && !memcmp(CH2, T, 8)) - FoundMask |= 1 << 2; - if (NewSize == 8 && !memcmp(CH3, T, 8)) - FoundMask |= 1 << 3; - if (NewSize == 8 && !memcmp(CH4, T, 8)) - FoundMask |= 1 << 4; - if (NewSize == 8 && !memcmp(CH5, T, 8)) - FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(CH6, T, 8)) - FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(CH7, T, 8)) - FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(MutationDispatcher, ChangeByte1) { - TestChangeByte(&MutationDispatcher::Mutate_ChangeByte, 1 << 15); -} -TEST(MutationDispatcher, ChangeByte2) { - TestChangeByte(&MutationDispatcher::Mutate, 1 << 17); -} - -void TestChangeBit(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - int FoundMask = 0; - uint8_t CH0[8] = {0x01, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH1[8] = {0x00, 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH2[8] = {0x00, 0x11, 0x02, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH3[8] = {0x00, 0x11, 0x22, 0x37, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x54, 0x55, 0x66, 0x77}; - uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x54, 0x66, 0x77}; - uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x76, 0x77}; - uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, 8, 9); - if (NewSize == 8 && !memcmp(CH0, T, 8)) - FoundMask |= 1 << 0; - if (NewSize == 8 && !memcmp(CH1, T, 8)) - FoundMask |= 1 << 1; - if (NewSize == 8 && !memcmp(CH2, T, 8)) - FoundMask |= 1 << 2; - if (NewSize == 8 && !memcmp(CH3, T, 8)) - FoundMask |= 1 << 3; - if (NewSize == 8 && !memcmp(CH4, T, 8)) - FoundMask |= 1 << 4; - if (NewSize == 8 && !memcmp(CH5, T, 8)) - FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(CH6, T, 8)) - FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(CH7, T, 8)) - FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(MutationDispatcher, ChangeBit1) { - TestChangeBit(&MutationDispatcher::Mutate_ChangeBit, 1 << 16); -} -TEST(MutationDispatcher, ChangeBit2) { - TestChangeBit(&MutationDispatcher::Mutate, 1 << 18); -} - -void TestShuffleBytes(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - int FoundMask = 0; - uint8_t CH0[7] = {0x00, 0x22, 0x11, 0x33, 0x44, 0x55, 0x66}; - uint8_t CH1[7] = {0x11, 0x00, 0x33, 0x22, 0x44, 0x55, 0x66}; - uint8_t CH2[7] = {0x00, 0x33, 0x11, 0x22, 0x44, 0x55, 0x66}; - uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x33}; - uint8_t CH4[7] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x44, 0x66}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t NewSize = (*MD.*M)(T, 7, 7); - if (NewSize == 7 && !memcmp(CH0, T, 7)) - FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(CH1, T, 7)) - FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(CH2, T, 7)) - FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(CH3, T, 7)) - FoundMask |= 1 << 3; - if (NewSize == 7 && !memcmp(CH4, T, 7)) - FoundMask |= 1 << 4; - } - EXPECT_EQ(FoundMask, 31); -} - -TEST(MutationDispatcher, ShuffleBytes1) { - TestShuffleBytes(&MutationDispatcher::Mutate_ShuffleBytes, 1 << 17); -} -TEST(MutationDispatcher, ShuffleBytes2) { - TestShuffleBytes(&MutationDispatcher::Mutate, 1 << 20); -} - -void TestCopyPart(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - int FoundMask = 0; - uint8_t CH0[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11}; - uint8_t CH1[7] = {0x55, 0x66, 0x22, 0x33, 0x44, 0x55, 0x66}; - uint8_t CH2[7] = {0x00, 0x55, 0x66, 0x33, 0x44, 0x55, 0x66}; - uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x66}; - uint8_t CH4[7] = {0x00, 0x11, 0x11, 0x22, 0x33, 0x55, 0x66}; - - for (int i = 0; i < NumIter; i++) { - uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; - size_t NewSize = (*MD.*M)(T, 7, 7); - if (NewSize == 7 && !memcmp(CH0, T, 7)) - FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(CH1, T, 7)) - FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(CH2, T, 7)) - FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(CH3, T, 7)) - FoundMask |= 1 << 3; - if (NewSize == 7 && !memcmp(CH4, T, 7)) - FoundMask |= 1 << 4; - } - - uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22}; - uint8_t CH6[8] = {0x22, 0x33, 0x44, 0x00, 0x11, 0x22, 0x33, 0x44}; - uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x00, 0x11, 0x22, 0x33, 0x44}; - uint8_t CH8[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x22, 0x33, 0x44}; - uint8_t CH9[8] = {0x00, 0x11, 0x22, 0x22, 0x33, 0x44, 0x33, 0x44}; - - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, 5, 8); - if (NewSize == 8 && !memcmp(CH5, T, 8)) - FoundMask |= 1 << 5; - if (NewSize == 8 && !memcmp(CH6, T, 8)) - FoundMask |= 1 << 6; - if (NewSize == 8 && !memcmp(CH7, T, 8)) - FoundMask |= 1 << 7; - if (NewSize == 8 && !memcmp(CH8, T, 8)) - FoundMask |= 1 << 8; - if (NewSize == 8 && !memcmp(CH9, T, 8)) - FoundMask |= 1 << 9; - } - - EXPECT_EQ(FoundMask, 1023); -} - -TEST(MutationDispatcher, CopyPart1) { - TestCopyPart(&MutationDispatcher::Mutate_CopyPart, 1 << 10); -} -TEST(MutationDispatcher, CopyPart2) { - TestCopyPart(&MutationDispatcher::Mutate, 1 << 13); -} -TEST(MutationDispatcher, CopyPartNoInsertAtMaxSize) { - // This (non exhaustively) tests if `Mutate_CopyPart` tries to perform an - // insert on an input of size `MaxSize`. Performing an insert in this case - // will lead to the mutation failing. - auto MD = CreateMutationDispatcher(); - uint8_t Data[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x00, 0x11, 0x22}; - size_t MaxSize = sizeof(Data); - for (int count = 0; count < (1 << 18); ++count) { - size_t NewSize = MD->Mutate_CopyPart(Data, MaxSize, MaxSize); - ASSERT_EQ(NewSize, MaxSize); - } -} - -void TestAddWordFromDictionary(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD}; - uint8_t Word2[3] = {0xFF, 0xEE, 0xEF}; - MD->AddWordToManualDictionary(Word(Word1, sizeof(Word1))); - MD->AddWordToManualDictionary(Word(Word2, sizeof(Word2))); - int FoundMask = 0; - uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD}; - uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22}; - uint8_t CH2[7] = {0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22}; - uint8_t CH3[7] = {0xAA, 0xBB, 0xCC, 0xDD, 0x00, 0x11, 0x22}; - uint8_t CH4[6] = {0x00, 0x11, 0x22, 0xFF, 0xEE, 0xEF}; - uint8_t CH5[6] = {0x00, 0x11, 0xFF, 0xEE, 0xEF, 0x22}; - uint8_t CH6[6] = {0x00, 0xFF, 0xEE, 0xEF, 0x11, 0x22}; - uint8_t CH7[6] = {0xFF, 0xEE, 0xEF, 0x00, 0x11, 0x22}; - for (int i = 0; i < NumIter; i++) { - uint8_t T[7] = {0x00, 0x11, 0x22}; - size_t NewSize = (*MD.*M)(T, 3, 7); - if (NewSize == 7 && !memcmp(CH0, T, 7)) - FoundMask |= 1 << 0; - if (NewSize == 7 && !memcmp(CH1, T, 7)) - FoundMask |= 1 << 1; - if (NewSize == 7 && !memcmp(CH2, T, 7)) - FoundMask |= 1 << 2; - if (NewSize == 7 && !memcmp(CH3, T, 7)) - FoundMask |= 1 << 3; - if (NewSize == 6 && !memcmp(CH4, T, 6)) - FoundMask |= 1 << 4; - if (NewSize == 6 && !memcmp(CH5, T, 6)) - FoundMask |= 1 << 5; - if (NewSize == 6 && !memcmp(CH6, T, 6)) - FoundMask |= 1 << 6; - if (NewSize == 6 && !memcmp(CH7, T, 6)) - FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(MutationDispatcher, AddWordFromDictionary1) { - TestAddWordFromDictionary( - &MutationDispatcher::Mutate_AddWordFromManualDictionary, 1 << 15); -} - -TEST(MutationDispatcher, AddWordFromDictionary2) { - TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15); -} - -void TestChangeASCIIInteger(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - - uint8_t CH0[8] = {'1', '2', '3', '4', '5', '6', '7', '7'}; - uint8_t CH1[8] = {'1', '2', '3', '4', '5', '6', '7', '9'}; - uint8_t CH2[8] = {'2', '4', '6', '9', '1', '3', '5', '6'}; - uint8_t CH3[8] = {'0', '6', '1', '7', '2', '8', '3', '9'}; - int FoundMask = 0; - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {'1', '2', '3', '4', '5', '6', '7', '8'}; - size_t NewSize = (*MD.*M)(T, 8, 8); - /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) - FoundMask |= 1 << 0; - else if (NewSize == 8 && !memcmp(CH1, T, 8)) - FoundMask |= 1 << 1; - else if (NewSize == 8 && !memcmp(CH2, T, 8)) - FoundMask |= 1 << 2; - else if (NewSize == 8 && !memcmp(CH3, T, 8)) - FoundMask |= 1 << 3; - else if (NewSize == 8) - FoundMask |= 1 << 4; - } - EXPECT_EQ(FoundMask, 31); -} - -TEST(MutationDispatcher, ChangeASCIIInteger1) { - TestChangeASCIIInteger(&MutationDispatcher::Mutate_ChangeASCIIInteger, - 1 << 15); -} - -TEST(MutationDispatcher, ChangeASCIIInteger2) { - TestChangeASCIIInteger(&MutationDispatcher::Mutate, 1 << 15); -} - -void TestChangeBinaryInteger(Mutator M, int NumIter) { - auto MD = CreateMutationDispatcher(); - - uint8_t CH0[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x79}; - uint8_t CH1[8] = {0x00, 0x11, 0x22, 0x31, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH2[8] = {0xff, 0x10, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH3[8] = {0x00, 0x11, 0x2a, 0x33, 0x44, 0x55, 0x66, 0x77}; - uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x4f, 0x66, 0x77}; - uint8_t CH5[8] = {0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88}; - uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x00, 0x00, 0x00, 0x08, 0x77}; // Size - uint8_t CH7[8] = {0x00, 0x08, 0x00, 0x33, 0x44, 0x55, 0x66, 0x77}; // Sw(Size) - - int FoundMask = 0; - for (int i = 0; i < NumIter; i++) { - uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; - size_t NewSize = (*MD.*M)(T, 8, 8); - /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) - FoundMask |= 1 << 0; - else if (NewSize == 8 && !memcmp(CH1, T, 8)) - FoundMask |= 1 << 1; - else if (NewSize == 8 && !memcmp(CH2, T, 8)) - FoundMask |= 1 << 2; - else if (NewSize == 8 && !memcmp(CH3, T, 8)) - FoundMask |= 1 << 3; - else if (NewSize == 8 && !memcmp(CH4, T, 8)) - FoundMask |= 1 << 4; - else if (NewSize == 8 && !memcmp(CH5, T, 8)) - FoundMask |= 1 << 5; - else if (NewSize == 8 && !memcmp(CH6, T, 8)) - FoundMask |= 1 << 6; - else if (NewSize == 8 && !memcmp(CH7, T, 8)) - FoundMask |= 1 << 7; - } - EXPECT_EQ(FoundMask, 255); -} - -TEST(MutationDispatcher, ChangeBinaryInteger1) { - TestChangeBinaryInteger(&MutationDispatcher::Mutate_ChangeBinaryInteger, - 1 << 12); -} - -TEST(MutationDispatcher, ChangeBinaryInteger2) { - TestChangeBinaryInteger(&MutationDispatcher::Mutate, 1 << 15); -} - -// Test fixture for MutagenInterface unit tests. -static const char *kWord1 = "word1"; -static const char *kWord2 = "word2"; - -class MutagenInterface : public ::testing::Test { -protected: - void SetUp() override { - Current = this; - memset(&Config, 0, sizeof(Config)); - - Config.Seed = 1; - - Config.UseCmp = 1; - Config.FromTORC4 = [](size_t Idx, uint32_t *Arg1, uint32_t *Arg2) { - ++(Current->FromTORC4Calls); - *Arg1 = 0x0401; - *Arg2 = 0x0402; - }; - Config.FromTORC8 = [](size_t Idx, uint64_t *Arg1, uint64_t *Arg2) { - ++(Current->FromTORC8Calls); - *Arg1 = 0x0801; - *Arg2 = 0x0802; - }; - Config.FromTORCW = [](size_t Idx, const uint8_t **Data1, size_t *Size1, - const uint8_t **Data2, size_t *Size2) { - ++(Current->FromTORCWCalls); - *Data1 = reinterpret_cast(kWord1); - *Size1 = strlen(kWord1); - *Data2 = reinterpret_cast(kWord2); - *Size2 = strlen(kWord2); - }; - - Config.UseMemmem = 0; - Config.FromMMT = [](size_t Idx, const uint8_t **Data, size_t *Size) { - ++(Current->FromMMTCalls); - *Data = reinterpret_cast(kWord1); - *Size = strlen(kWord1); - }; - - Config.OnlyASCII = 0; - - Config.CustomMutator = [](uint8_t *Data, size_t Size, size_t MaxSize, - unsigned int Seed) { - ++(Current->CustomMutatorCalls); - return LLVMMutagenDefaultMutate(Data, Size, MaxSize); - }; - - Config.CustomCrossOver = - [](const uint8_t *Data1, size_t Size1, const uint8_t *Data2, - size_t Size2, uint8_t *Out, size_t MaxOutSize, unsigned int Seed) { - ++(Current->CustomCrossOverCalls); - auto *MD = GetMutationDispatcherForTest(); - return MD->CrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize); - }; - - U = Unit({1, 2, 3, 4}); - U.reserve(8); - } - - void TearDown() override { - Current = nullptr; - memset(&Config, 0, sizeof(Config)); - LLVMMutagenConfigure(&Config); - } - - LLVMMutagenConfiguration Config; - Unit U; - - size_t FromTORC4Calls = 0; - size_t FromTORC8Calls = 0; - size_t FromTORCWCalls = 0; - size_t FromMMTCalls = 0; - size_t CustomMutatorCalls = 0; - size_t CustomCrossOverCalls = 0; - -private: - static MutagenInterface *Current; -}; - -MutagenInterface *MutagenInterface::Current = nullptr; - -// Unit tests for MutagenInterface. - -TEST_F(MutagenInterface, Configure) { - Config.OnlyASCII = 1; - LLVMMutagenConfigure(&Config); - auto *MD = GetMutationDispatcherForTest(); - ASSERT_NE(MD, nullptr); - - Random Rand1(Config.Seed); - Random &Rand2 = MD->GetRand(); - for (size_t i = 0; i < 10; ++i) - EXPECT_EQ(Rand1(), Rand2()); - - Config.Seed = static_cast( - std::chrono::system_clock::now().time_since_epoch().count()); - Config.OnlyASCII = 0; - LLVMMutagenConfigure(&Config); - MD = GetMutationDispatcherForTest(); - ASSERT_NE(MD, nullptr); - - Random Rand3(Config.Seed); - Random &Rand4 = MD->GetRand(); - for (size_t i = 0; i < 10; ++i) - EXPECT_EQ(Rand3(), Rand4()); -} - -TEST_F(MutagenInterface, UseTORCs) { - // If !UseCmp, none of the TORC/MMT callbacks are called, regardless of - // UseMemmem. - Config.UseCmp = 0; - Config.UseMemmem = 1; - LLVMMutagenConfigure(&Config); - for (size_t i = 0; i < 200; ++i) - LLVMMutagenMutate(U.data(), U.size(), U.capacity()); - EXPECT_EQ(FromTORC4Calls, 0U); - EXPECT_EQ(FromTORC8Calls, 0U); - EXPECT_EQ(FromTORCWCalls, 0U); - EXPECT_EQ(FromMMTCalls, 0U); - - // If UseCmp, but !UseMemmem, only the TORC callbacks are invoked. - Config.UseCmp = 1; - Config.UseMemmem = 0; - LLVMMutagenConfigure(&Config); - for (size_t i = 0; i < 200; ++i) - LLVMMutagenMutate(U.data(), U.size(), U.capacity()); - EXPECT_NE(FromTORC4Calls, 0U); - EXPECT_NE(FromTORC8Calls, 0U); - EXPECT_NE(FromTORCWCalls, 0U); - EXPECT_EQ(FromMMTCalls, 0U); - - // If UseCmp and UseMemmem, all the TORC/MMT callbacks are invoked. - Config.UseCmp = 1; - Config.UseMemmem = 1; - LLVMMutagenConfigure(&Config); - for (size_t i = 0; i < 200; ++i) - LLVMMutagenMutate(U.data(), U.size(), U.capacity()); - EXPECT_NE(FromTORC4Calls, 0U); - EXPECT_NE(FromTORC8Calls, 0U); - EXPECT_NE(FromTORCWCalls, 0U); - EXPECT_NE(FromMMTCalls, 0U); -} - -TEST_F(MutagenInterface, CustomCallbacks) { - // DefaultMutate never selects custom callbacks. - LLVMMutagenConfigure(&Config); - for (size_t i = 0; i < 200; ++i) - LLVMMutagenDefaultMutate(U.data(), U.size(), U.capacity()); - - // Valid. - auto *MD = GetMutationDispatcherForTest(); - EXPECT_EQ(CustomMutatorCalls, 0U); - MD->Mutate_Custom(U.data(), U.size(), U.capacity()); - EXPECT_EQ(CustomMutatorCalls, 1U); - - // Null cross-over input disables CustomCrossOver. - LLVMMutagenSetCrossOverWith(nullptr, 0); - MD->Mutate_CustomCrossOver(U.data(), U.size(), U.capacity()); - EXPECT_EQ(CustomCrossOverCalls, 0U); - - // Zero-length cross-over input disables CustomCrossOver. - Unit CrossOverWith = {4, 3, 2, 1}; - LLVMMutagenSetCrossOverWith(CrossOverWith.data(), 0); - MD->Mutate_CustomCrossOver(U.data(), U.size(), U.capacity()); - EXPECT_EQ(CustomCrossOverCalls, 0U); - - // Valid. - LLVMMutagenSetCrossOverWith(CrossOverWith.data(), CrossOverWith.size()); - MD->Mutate_CustomCrossOver(U.data(), U.size(), U.capacity()); - EXPECT_EQ(CustomCrossOverCalls, 1U); - - // Can mutate without custom callbacks. - Config.CustomMutator = nullptr; - Config.CustomCrossOver = nullptr; - LLVMMutagenConfigure(&Config); - for (size_t i = 0; i < 200; ++i) - LLVMMutagenMutate(U.data(), U.size(), U.capacity()); -} - -TEST_F(MutagenInterface, MutationSequence) { - LLVMMutagenConfigure(&Config); - char Buf[1024]; - size_t NumItems; - - Set Names = { - "ShuffleBytes", "EraseBytes", "InsertBytes", "InsertRepeatedBytes", - "ChangeByte", "ChangeBit", "CopyPart", "ChangeASCIIInt", - "ChangeBinInt", - }; - std::string Name; - std::istringstream ISS; - - // Empty sequences - auto Size = LLVMMutagenGetMutationSequence(true, Buf, sizeof(Buf), &NumItems); - EXPECT_STREQ(Buf, ""); - EXPECT_EQ(Size, 0U); - EXPECT_EQ(NumItems, 0U); - - while (true) { - // Can get size without output parameters. - Size = LLVMMutagenGetMutationSequence(true, nullptr, 0, &NumItems); - if (NumItems > Sequence::kMaxBriefItems) - break; - // !Verbose has no effect for <= 10 items. - EXPECT_EQ(LLVMMutagenGetMutationSequence(false, nullptr, 0, nullptr), Size); - EXPECT_GT(LLVMMutagenDefaultMutate(U.data(), U.size(), U.capacity()), 0U); - } - - // All items are valid. - LLVMMutagenGetMutationSequence(true, Buf, sizeof(Buf), nullptr); - ISS.str(Buf); - size_t N = 0; - while (std::getline(ISS, Name, '-')) { - EXPECT_GT(Names.count(Name), 0U); - ++N; - } - EXPECT_EQ(N, NumItems); - - // !Verbose truncates, but items are still valid. - EXPECT_LT(LLVMMutagenGetMutationSequence(false, Buf, sizeof(Buf), nullptr), - Size); - ISS.str(Buf); - N = 0; - while (std::getline(ISS, Name, '-')) { - EXPECT_GT(Names.count(Name), 0U); - ++N; - } - EXPECT_LT(N, NumItems); - - // Truncated sequence is a prefix of its untruncated equivalent. - std::string Truncated(Buf); - LLVMMutagenGetMutationSequence(true, Buf, sizeof(Buf), &NumItems); - Buf[Truncated.size()] = '\0'; - EXPECT_STREQ(Truncated.c_str(), Buf); - - // Stops at the end of |Buf|, and null terminates. - EXPECT_EQ(LLVMMutagenGetMutationSequence(true, Buf, Size - 1, nullptr), Size); - EXPECT_EQ(strlen(Buf), Size - 2); - - // Clear the sequence. - LLVMMutagenResetSequence(); - EXPECT_EQ(LLVMMutagenGetMutationSequence(true, nullptr, 0, nullptr), 0U); -} - -static uint8_t FromASCIINybble(char C) { - if ('0' <= C && C <= '9') - return static_cast(C - '0'); - if ('A' <= C && C <= 'F') - return static_cast(C - 'A' + 10); - assert('a' <= C && C <= 'f'); - return static_cast(C - 'a' + 10); -} - -static Word FromASCII(const char *DE) { - Unit Tmp; - bool Escape = false; - size_t Hex = 0; - uint8_t Nybble = 0; - for (char C = *DE++; C; C = *DE++) { - if (Hex == 2) { - Nybble = FromASCIINybble(C); - --Hex; - } else if (Hex == 1) { - Tmp.push_back(static_cast(Nybble << 4) | FromASCIINybble(C)); - --Hex; - } else if (Escape) { - switch (C) { - case '\\': - case '"': - Tmp.push_back(static_cast(C)); - break; - case 'x': - Hex = 2; - break; - default: - assert(false && "FromASCII failure."); - } - Escape = false; - } else if (C == '\\') { - Escape = true; - } else { - Tmp.push_back(static_cast(C)); - } - } - return Word(Tmp.data(), Tmp.size()); -} - -TEST_F(MutagenInterface, Dictionaries) { - LLVMMutagenConfigure(&Config); - size_t NumItems; - char Buf[1024]; - std::istringstream ISS; - std::string Str; - - // Empty sequences - auto Size = - LLVMMutagenGetDictionaryEntrySequence(true, Buf, sizeof(Buf), &NumItems); - EXPECT_STREQ(Buf, ""); - EXPECT_EQ(Size, 0U); - EXPECT_EQ(NumItems, 0U); - - auto *MD = GetMutationDispatcherForTest(); - while (true) { - // Can get size without output parameters. - Size = LLVMMutagenGetDictionaryEntrySequence(true, nullptr, 0, &NumItems); - if (NumItems > Sequence::kMaxBriefItems) - break; - // !Verbose has no effect for <= 10 items. - EXPECT_EQ(LLVMMutagenGetDictionaryEntrySequence(false, nullptr, 0, nullptr), - Size); - MD->Mutate_AddWordFromTORC(U.data(), U.size(), U.capacity()); - } - - // All items are valid. - LLVMMutagenGetDictionaryEntrySequence(true, Buf, sizeof(Buf), nullptr); - ISS.str(Buf); - size_t N = 0; - while (std::getline(ISS, Str, '-')) { - ASSERT_FALSE(Str.empty()); - EXPECT_EQ(Str[0], '"'); - EXPECT_EQ(Str[Str.size() - 1], '"'); - ++N; - } - EXPECT_EQ(N, NumItems); - - // !Verbose truncates, but items are still valid. - EXPECT_LT( - LLVMMutagenGetDictionaryEntrySequence(false, Buf, sizeof(Buf), nullptr), - Size); - ISS.str(Buf); - N = 0; - while (std::getline(ISS, Str, '-')) { - ASSERT_FALSE(Str.empty()); - EXPECT_EQ(Str[0], '"'); - EXPECT_EQ(Str[Str.size() - 1], '"'); - ++N; - } - EXPECT_LT(N, NumItems); - - // Truncated sequence is a prefix of its untruncated equivalent. - std::string Truncated(Buf); - LLVMMutagenGetDictionaryEntrySequence(true, Buf, sizeof(Buf), &NumItems); - Buf[Truncated.size()] = '\0'; - EXPECT_STREQ(Truncated.c_str(), Buf); - - // Stops at the end of |Buf|, and null terminates. - EXPECT_EQ(LLVMMutagenGetDictionaryEntrySequence(true, Buf, Size - 1, nullptr), - Size); - EXPECT_EQ(strlen(Buf), Size - 2); - - // Clear the sequence. - LLVMMutagenResetSequence(); - EXPECT_EQ(LLVMMutagenGetDictionaryEntrySequence(true, nullptr, 0, nullptr), - 0U); - - // Retuns null if no recommendations. - size_t UseCount = 0; - EXPECT_EQ(LLVMMutagenRecommendDictionaryEntry(&UseCount), nullptr); - EXPECT_EQ(LLVMMutagenRecommendDictionary(), 0U); - EXPECT_EQ(LLVMMutagenRecommendDictionaryEntry(&UseCount), nullptr); - - // Record sequences. - for (size_t i = 0; i < 5; ++i) { - for (size_t i = 0; i < 5; ++i) { - MD->Mutate_AddWordFromTORC(U.data(), U.size(), U.capacity()); - } - LLVMMutagenRecordSequence(); - } - - size_t NumDEs = LLVMMutagenRecommendDictionary(); - EXPECT_NE(NumDEs, 0U); - for (size_t i = 0; i < NumDEs; ++i) { - auto *DE = LLVMMutagenRecommendDictionaryEntry(&UseCount); - EXPECT_NE(DE, nullptr); - EXPECT_EQ(UseCount, 0U); - } - - // Increment the use counts of entries. - for (size_t i = 0; i < 100; ++i) - MD->Mutate_AddWordFromPersistentAutoDictionary(U.data(), U.size(), - U.capacity()); - NumDEs = LLVMMutagenRecommendDictionary(); - EXPECT_NE(NumDEs, 0U); - for (size_t i = 0; i < NumDEs; ++i) { - auto *DE = LLVMMutagenRecommendDictionaryEntry(&UseCount); - EXPECT_NE(DE, nullptr); - EXPECT_NE(UseCount, 0U); - } - - // Add the first few words manually to exclude them from recommendations. - Vector ManualAdditions; - NumDEs = LLVMMutagenRecommendDictionary(); - ASSERT_GT(NumDEs, 3U); - for (size_t i = 0; i < 3; ++i) { - auto *DE = LLVMMutagenRecommendDictionaryEntry(nullptr); - auto W = FromASCII(DE); - LLVMMutagenAddWordToDictionary(W.data(), W.size()); - ManualAdditions.push_back(W); - } - N = NumDEs; - - // Get the recommended dictionary without the manual additions. - NumDEs = LLVMMutagenRecommendDictionary(); - EXPECT_EQ(NumDEs, N - 3); - for (size_t i = 0; i < NumDEs; ++i) { - auto *DE = LLVMMutagenRecommendDictionaryEntry(nullptr); - ASSERT_NE(DE, nullptr); - Word W1(reinterpret_cast(DE), strlen(DE)); - for (const auto &W2 : ManualAdditions) - EXPECT_FALSE(W1 == W2); - } -} - -} // namespace -} // namespace mutagen - -int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/compiler-rt/test/fuzzer/CMakeLists.txt b/compiler-rt/test/fuzzer/CMakeLists.txt index acfcd437f0287..c12a04b6f2702 100644 --- a/compiler-rt/test/fuzzer/CMakeLists.txt +++ b/compiler-rt/test/fuzzer/CMakeLists.txt @@ -20,7 +20,6 @@ endif() if(COMPILER_RT_INCLUDE_TESTS) list(APPEND LIBFUZZER_TEST_DEPS FuzzerUnitTests) list(APPEND LIBFUZZER_TEST_DEPS FuzzedDataProviderUnitTests) - list(APPEND LIBFUZZER_TEST_DEPS MutagenUnitTests) endif() add_custom_target(check-fuzzer) From dba74c68178bfaa54e6270d4790b78ef5b6e37c2 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Thu, 1 Jul 2021 20:41:05 +0000 Subject: [PATCH 547/619] [libc] Make ULP error reflect the bit distance more closely. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D105334 --- libc/utils/MPFRWrapper/MPFRUtils.cpp | 80 ++++++++++++++++------------ 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 959701cf94125..a61f02243579c 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -12,6 +12,7 @@ #include "utils/FPUtil/FPBits.h" #include "utils/FPUtil/TestHelpers.h" +#include #include #include #include @@ -256,51 +257,64 @@ class MPFRNumber { // Return the ULP (units-in-the-last-place) difference between the // stored MPFR and a floating point number. // - // We define: - // ULP(mpfr_value, value) = abs(mpfr_value - value) / eps(value) + // We define ULP difference as follows: + // If exponents of this value and the |input| are same, then: + // ULP(this_value, input) = abs(this_value - input) / eps(input) + // else: + // max = max(abs(this_value), abs(input)) + // min = min(abs(this_value), abs(input)) + // maxExponent = exponent(max) + // ULP(this_value, input) = (max - 2^maxExponent) / eps(max) + + // (2^maxExponent - min) / eps(min) // // Remarks: - // 1. ULP < 0.5 will imply that the value is correctly rounded. + // 1. A ULP of 0.0 will imply that the value is correctly rounded. // 2. We expect that this value and the value to be compared (the [input] // argument) are reasonable close, and we will provide an upper bound // of ULP value for testing. Morever, most of the fractional parts of // ULP value do not matter much, so using double as the return type // should be good enough. + // 3. For close enough values (values which don't diff in their exponent by + // not more than 1), a ULP difference of N indicates a bit distance + // of N between this number and [input]. + // 4. A values of +0.0 and -0.0 are treated as equal. template cpp::EnableIfType::Value, double> ulp(T input) { - fputil::FPBits bits(input); - MPFRNumber mpfrInput(input); - // When calculating error, we first round this number to the floating - // point type T and calculate the ulp error against this rounded number. - // The input is always either exact or rounded. So, we if compare - // with this number directly, we can end up with a large ULP error. - MPFRNumber thisRoundedToFloat(as()); - - // abs(thisRoundedToFloat - input) - mpfr_sub(mpfrInput.value, thisRoundedToFloat.value, mpfrInput.value, - MPFR_RNDN); - mpfr_abs(mpfrInput.value, mpfrInput.value, MPFR_RNDN); - - // get eps(input) - int epsExponent = bits.encoding.exponent - fputil::FPBits::exponentBias - - fputil::MantissaWidth::value; - if (bits.encoding.exponent == 0) { - // correcting denormal exponent - ++epsExponent; - } else if ((bits.encoding.mantissa == 0) && (bits.encoding.exponent > 1) && - mpfr_less_p(thisRoundedToFloat.value, mpfrInput.value)) { - // when the input is exactly 2^n, distance (epsilon) between the input - // and the next floating point number is different from the distance to - // the previous floating point number. So in that case, if the correct - // value from MPFR is smaller than the input, we use the smaller epsilon - --epsExponent; + T thisAsT = as(); + int thisExponent = fputil::FPBits(thisAsT).getExponent(); + int inputExponent = fputil::FPBits(input).getExponent(); + if (thisAsT * input < 0 || thisExponent == inputExponent) { + MPFRNumber inputMPFR(input); + mpfr_sub(inputMPFR.value, value, inputMPFR.value, MPFR_RNDN); + mpfr_abs(inputMPFR.value, inputMPFR.value, MPFR_RNDN); + mpfr_mul_2si(inputMPFR.value, inputMPFR.value, -thisExponent, MPFR_RNDN); + return inputMPFR.as(); } - // Since eps(value) is of the form 2^e, instead of dividing such number, - // we multiply by its inverse 2^{-e}. - mpfr_mul_2si(mpfrInput.value, mpfrInput.value, -epsExponent, MPFR_RNDN); + // If the control reaches here, it means that this number and input are + // of the same sign but different exponent. In such a case, ULP error is + // calculated as sum of two parts. + thisAsT = std::abs(thisAsT); + input = std::abs(input); + T min = thisAsT > input ? input : thisAsT; + T max = thisAsT > input ? thisAsT : input; + int minExponent = fputil::FPBits(min).getExponent(); + int maxExponent = fputil::FPBits(max).getExponent(); - return mpfrInput.as(); + MPFRNumber minMPFR(min); + MPFRNumber maxMPFR(max); + + MPFRNumber pivot(uint32_t(1)); + mpfr_mul_2si(pivot.value, pivot.value, maxExponent, MPFR_RNDN); + + mpfr_sub(minMPFR.value, pivot.value, minMPFR.value, MPFR_RNDN); + mpfr_mul_2si(minMPFR.value, minMPFR.value, -minExponent, MPFR_RNDN); + + mpfr_sub(maxMPFR.value, maxMPFR.value, pivot.value, MPFR_RNDN); + mpfr_mul_2si(maxMPFR.value, maxMPFR.value, -maxExponent, MPFR_RNDN); + + mpfr_add(minMPFR.value, minMPFR.value, maxMPFR.value, MPFR_RNDN); + return minMPFR.as(); } }; From 99f00635d7acf1cbcdba35e7621f3a211aa3f237 Mon Sep 17 00:00:00 2001 From: Jacob Hegna Date: Thu, 10 Jun 2021 02:16:04 +0000 Subject: [PATCH 548/619] Unpack the CostEstimate feature in ML inlining models. This change yields an additional 2% size reduction on an internal search binary, and an additional 0.5% size reduction on fuchsia. Differential Revision: https://reviews.llvm.org/D104751 --- llvm/include/llvm/Analysis/InlineCost.h | 10 + .../llvm/Analysis/InlineModelFeatureMaps.h | 71 ++++- llvm/lib/Analysis/CMakeLists.txt | 2 +- llvm/lib/Analysis/InlineCost.cpp | 254 ++++++++++++++++-- llvm/lib/Analysis/MLInlineAdvisor.cpp | 27 +- llvm/lib/Analysis/models/inlining/config.py | 41 ++- llvm/unittests/Analysis/CMakeLists.txt | 1 + llvm/unittests/Analysis/InlineCostTest.cpp | 77 ++++++ 8 files changed, 458 insertions(+), 25 deletions(-) create mode 100644 llvm/unittests/Analysis/InlineCostTest.cpp diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 7f04a8ce8f5fa..a974e07bd767a 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -15,6 +15,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/InlineModelFeatureMaps.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include #include @@ -270,6 +271,15 @@ Optional getInliningCostEstimate( ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); +/// Get the expanded cost features. The features are returned unconditionally, +/// even if inlining is impossible. +Optional getInliningCostFeatures( + CallBase &Call, TargetTransformInfo &CalleeTTI, + function_ref GetAssumptionCache, + function_ref GetBFI = nullptr, + ProfileSummaryInfo *PSI = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); + /// Minimal filter to detect invalid constructs for inlining. InlineResult isInlineViable(Function &Callee); diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h index 9e5286d478cd1..1afa8a825f15a 100644 --- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h +++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h @@ -16,6 +16,61 @@ namespace llvm { +// List of cost features. A "cost" feature is a summand of the heuristic-based +// inline cost, and we define them separately to preserve the original heuristic +// behavior. +#define INLINE_COST_FEATURE_ITERATOR(M) \ + M(SROASavings, "sroa_savings") \ + M(SROALosses, "sroa_losses") \ + M(LoadElimination, "load_elimination") \ + M(CallPenalty, "call_penalty") \ + M(CallArgumentSetup, "call_argument_setup") \ + M(LoadRelativeIntrinsic, "load_relative_intrinsic") \ + M(LoweredCallArgSetup, "lowered_call_arg_setup") \ + M(IndirectCallPenalty, "indirect_call_penalty") \ + M(JumpTablePenalty, "jump_table_penalty") \ + M(CaseClusterPenalty, "case_cluster_penalty") \ + M(SwitchPenalty, "switch_penalty") \ + M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions") \ + M(NumLoops, "num_loops") \ + M(DeadBlocks, "dead_blocks") \ + M(SimplifiedInstructions, "simplified_instructions") \ + M(ConstantArgs, "constant_args") \ + M(ConstantOffsetPtrArgs, "constant_offset_ptr_args") \ + M(CallSiteCost, "callsite_cost") \ + M(ColdCcPenalty, "cold_cc_penalty") \ + M(LastCallToStaticBonus, "last_call_to_static_bonus") \ + M(IsMultipleBlocks, "is_multiple_blocks") \ + M(NestedInlines, "nested_inlines") \ + M(NestedInlineCostEstimate, "nested_inline_cost_estimate") \ + M(Threshold, "threshold") + +// clang-format off +enum class InlineCostFeatureIndex : size_t { +#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME, + INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES) +#undef POPULATE_INDICES + + NumberOfFeatures +}; +// clang-format on + +using InlineCostFeatures = + std::array(InlineCostFeatureIndex::NumberOfFeatures)>; + +constexpr bool isHeuristicInlineCostFeature(InlineCostFeatureIndex Feature) { + return Feature != InlineCostFeatureIndex::SROASavings && + Feature != InlineCostFeatureIndex::IsMultipleBlocks && + Feature != InlineCostFeatureIndex::DeadBlocks && + Feature != InlineCostFeatureIndex::SimplifiedInstructions && + Feature != InlineCostFeatureIndex::ConstantArgs && + Feature != InlineCostFeatureIndex::ConstantOffsetPtrArgs && + Feature != InlineCostFeatureIndex::NestedInlines && + Feature != InlineCostFeatureIndex::NestedInlineCostEstimate && + Feature != InlineCostFeatureIndex::Threshold; +} + // List of features. Each feature is defined through a triple: // - the name of an enum member, which will be the feature index // - a textual name, used for Tensorflow model binding (so it needs to match the @@ -48,12 +103,26 @@ namespace llvm { "number of module-internal users of the callee, +1 if the callee is " \ "exposed externally") +// clang-format off enum class FeatureIndex : size_t { +// InlineCost features - these must come first +#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME, + INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES) +#undef POPULATE_INDICES + +// Non-cost features #define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME, INLINE_FEATURE_ITERATOR(POPULATE_INDICES) #undef POPULATE_INDICES - NumberOfFeatures + + NumberOfFeatures }; +// clang-format on + +constexpr FeatureIndex +inlineCostFeatureToMlFeature(InlineCostFeatureIndex Feature) { + return static_cast(static_cast(Feature)); +} constexpr size_t NumberOfFeatures = static_cast(FeatureIndex::NumberOfFeatures); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 0165e4bc1bcc0..8e1efc1f2b85d 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -5,7 +5,7 @@ if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API) # This url points to the most recent most which is known to be compatible with # LLVM. When better models are published, this url should be updated to aid # discoverability. - set(LLVM_INLINER_MODEL_CURRENT_URL "https://github.com/google/ml-compiler-opt/releases/download/inlining-Oz-v0.1/inlining-Oz-acabaf6-v0.1.tar.gz") + set(LLVM_INLINER_MODEL_CURRENT_URL "TO_BE_UPDATED") if (DEFINED LLVM_HAVE_TF_AOT) # If the path is empty, autogenerate the model diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 92b0fbd840860..4e03629148c86 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -439,6 +439,25 @@ class CallAnalyzer : public InstVisitor { void dump(); }; +// Considering forming a binary search, we should find the number of nodes +// which is same as the number of comparisons when lowered. For a given +// number of clusters, n, we can define a recursive function, f(n), to find +// the number of nodes in the tree. The recursion is : +// f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, +// and f(n) = n, when n <= 3. +// This will lead a binary tree where the leaf should be either f(2) or f(3) +// when n > 3. So, the number of comparisons from leaves should be n, while +// the number of non-leaf should be : +// 2^(log2(n) - 1) - 1 +// = 2^log2(n) * 2^-1 - 1 +// = n / 2 - 1. +// Considering comparisons from leaf and non-leaf nodes, we can estimate the +// number of comparisons in a simple closed form : +// n + n / 2 - 1 = n * 3 / 2 - 1 +int64_t getExpectedNumberOfCompare(int NumCaseCluster) { + return 3 * static_cast(NumCaseCluster) / 2 - 1; +} + /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer class InlineCostCallAnalyzer final : public CallAnalyzer { @@ -582,28 +601,15 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { addCost(JTCost, (int64_t)CostUpperBound); return; } - // Considering forming a binary search, we should find the number of nodes - // which is same as the number of comparisons when lowered. For a given - // number of clusters, n, we can define a recursive function, f(n), to find - // the number of nodes in the tree. The recursion is : - // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, - // and f(n) = n, when n <= 3. - // This will lead a binary tree where the leaf should be either f(2) or f(3) - // when n > 3. So, the number of comparisons from leaves should be n, while - // the number of non-leaf should be : - // 2^(log2(n) - 1) - 1 - // = 2^log2(n) * 2^-1 - 1 - // = n / 2 - 1. - // Considering comparisons from leaf and non-leaf nodes, we can estimate the - // number of comparisons in a simple closed form : - // n + n / 2 - 1 = n * 3 / 2 - 1 + if (NumCaseCluster <= 3) { // Suppose a comparison includes one compare and one conditional branch. addCost(NumCaseCluster * 2 * InlineConstants::InstrCost); return; } - int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; + int64_t ExpectedNumberOfCompare = + getExpectedNumberOfCompare(NumCaseCluster); int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; @@ -936,6 +942,209 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { int getCost() { return Cost; } bool wasDecidedByCostBenefit() { return DecidedByCostBenefit; } }; + +class InlineCostFeaturesAnalyzer final : public CallAnalyzer { +private: + InlineCostFeatures Cost = {}; + + // FIXME: These constants are taken from the heuristic-based cost visitor. + // These should be removed entirely in a later revision to avoid reliance on + // heuristics in the ML inliner. + static constexpr int JTCostMultiplier = 4; + static constexpr int CaseClusterCostMultiplier = 2; + static constexpr int SwitchCostMultiplier = 2; + + // FIXME: These are taken from the heuristic-based cost visitor: we should + // eventually abstract these to the CallAnalyzer to avoid duplication. + unsigned SROACostSavingOpportunities = 0; + int VectorBonus = 0; + int SingleBBBonus = 0; + int Threshold = 5; + + DenseMap SROACosts; + + void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) { + Cost[static_cast(Feature)] += Delta; + } + + void set(InlineCostFeatureIndex Feature, int64_t Value) { + Cost[static_cast(Feature)] = Value; + } + + void onDisableSROA(AllocaInst *Arg) override { + auto CostIt = SROACosts.find(Arg); + if (CostIt == SROACosts.end()) + return; + + increment(InlineCostFeatureIndex::SROALosses, CostIt->second); + SROACostSavingOpportunities -= CostIt->second; + SROACosts.erase(CostIt); + } + + void onDisableLoadElimination() override { + set(InlineCostFeatureIndex::LoadElimination, 1); + } + + void onCallPenalty() override { + increment(InlineCostFeatureIndex::CallPenalty, + InlineConstants::CallPenalty); + } + + void onCallArgumentSetup(const CallBase &Call) override { + increment(InlineCostFeatureIndex::CallArgumentSetup, + Call.arg_size() * InlineConstants::InstrCost); + } + + void onLoadRelativeIntrinsic() override { + increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, + 3 * InlineConstants::InstrCost); + } + + void onLoweredCall(Function *F, CallBase &Call, + bool IsIndirectCall) override { + increment(InlineCostFeatureIndex::LoweredCallArgSetup, + Call.arg_size() * InlineConstants::InstrCost); + + if (IsIndirectCall) { + InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0, + /*HintThreshold*/ {}, + /*ColdThreshold*/ {}, + /*OptSizeThreshold*/ {}, + /*OptMinSizeThreshold*/ {}, + /*HotCallSiteThreshold*/ {}, + /*LocallyHotCallSiteThreshold*/ {}, + /*ColdCallSiteThreshold*/ {}, + /*ComputeFullInlineCost*/ true, + /*EnableDeferral*/ true}; + IndirectCallParams.DefaultThreshold = + InlineConstants::IndirectCallThreshold; + + InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, + GetAssumptionCache, GetBFI, PSI, ORE, false, + true); + if (CA.analyze().isSuccess()) { + increment(InlineCostFeatureIndex::NestedInlineCostEstimate, + CA.getCost()); + increment(InlineCostFeatureIndex::NestedInlines, 1); + } + } else { + onCallPenalty(); + } + } + + void onFinalizeSwitch(unsigned JumpTableSize, + unsigned NumCaseCluster) override { + + if (JumpTableSize) { + int64_t JTCost = + static_cast(JumpTableSize) * InlineConstants::InstrCost + + JTCostMultiplier * InlineConstants::InstrCost; + increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost); + return; + } + + if (NumCaseCluster <= 3) { + increment(InlineCostFeatureIndex::CaseClusterPenalty, + NumCaseCluster * CaseClusterCostMultiplier * + InlineConstants::InstrCost); + return; + } + + int64_t ExpectedNumberOfCompare = + getExpectedNumberOfCompare(NumCaseCluster); + + int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier * + InlineConstants::InstrCost; + increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost); + } + + void onMissedSimplification() override { + increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions, + InlineConstants::InstrCost); + } + + void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; } + void onAggregateSROAUse(AllocaInst *Arg) override { + SROACosts.find(Arg)->second += InlineConstants::InstrCost; + SROACostSavingOpportunities += InlineConstants::InstrCost; + } + + void onBlockAnalyzed(const BasicBlock *BB) override { + if (BB->getTerminator()->getNumSuccessors() > 1) + set(InlineCostFeatureIndex::IsMultipleBlocks, 1); + Threshold -= SingleBBBonus; + } + + InlineResult finalizeAnalysis() override { + auto *Caller = CandidateCall.getFunction(); + if (Caller->hasMinSize()) { + DominatorTree DT(F); + LoopInfo LI(DT); + for (Loop *L : LI) { + // Ignore loops that will not be executed + if (DeadBlocks.count(L->getHeader())) + continue; + increment(InlineCostFeatureIndex::NumLoops, + InlineConstants::CallPenalty); + } + } + set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size()); + set(InlineCostFeatureIndex::SimplifiedInstructions, + NumInstructionsSimplified); + set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs); + set(InlineCostFeatureIndex::ConstantOffsetPtrArgs, + NumConstantOffsetPtrArgs); + set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities); + + if (NumVectorInstructions <= NumInstructions / 10) + increment(InlineCostFeatureIndex::Threshold, -1 * VectorBonus); + else if (NumVectorInstructions <= NumInstructions / 2) + increment(InlineCostFeatureIndex::Threshold, -1 * (VectorBonus / 2)); + + set(InlineCostFeatureIndex::Threshold, Threshold); + + return InlineResult::success(); + } + + bool shouldStop() override { return false; } + + void onLoadEliminationOpportunity() override { + increment(InlineCostFeatureIndex::LoadElimination, 1); + } + + InlineResult onAnalysisStart() override { + increment(InlineCostFeatureIndex::CallSiteCost, + -1 * getCallsiteCost(this->CandidateCall, DL)); + + set(InlineCostFeatureIndex::ColdCcPenalty, + (F.getCallingConv() == CallingConv::Cold)); + + // FIXME: we shouldn't repeat this logic in both the Features and Cost + // analyzer - instead, we should abstract it to a common method in the + // CallAnalyzer + int SingleBBBonusPercent = 50; + int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); + Threshold += TTI.adjustInliningThreshold(&CandidateCall); + Threshold *= TTI.getInliningThresholdMultiplier(); + SingleBBBonus = Threshold * SingleBBBonusPercent / 100; + VectorBonus = Threshold * VectorBonusPercent / 100; + Threshold += (SingleBBBonus + VectorBonus); + + return InlineResult::success(); + } + +public: + InlineCostFeaturesAnalyzer( + const TargetTransformInfo &TTI, + function_ref &GetAssumptionCache, + function_ref GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, + CallBase &Call) + : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {} + + const InlineCostFeatures &features() const { return Cost; } +}; + } // namespace /// Test whether the given value is an Alloca-derived function argument. @@ -2502,6 +2711,19 @@ Optional llvm::getInliningCostEstimate( return CA.getCost(); } +Optional llvm::getInliningCostFeatures( + CallBase &Call, TargetTransformInfo &CalleeTTI, + function_ref GetAssumptionCache, + function_ref GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { + InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, + ORE, *Call.getCalledFunction(), Call); + auto R = CFA.analyze(); + if (!R.isSuccess()) + return None; + return CFA.features(); +} + Optional llvm::getAttributeBasedInliningDecision( CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref GetTLI) { diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index 5ef460960f283..5b95ed223fd90 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -43,11 +43,19 @@ static cl::opt SizeIncreaseThreshold( "blocking any further inlining."), cl::init(2.0)); +// clang-format off const std::array llvm::FeatureNameMap{ +// InlineCost features - these must come first +#define POPULATE_NAMES(INDEX_NAME, NAME) NAME, + INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES) +#undef POPULATE_NAMES + +// Non-cost features #define POPULATE_NAMES(INDEX_NAME, NAME, COMMENT) NAME, - INLINE_FEATURE_ITERATOR(POPULATE_NAMES) + INLINE_FEATURE_ITERATOR(POPULATE_NAMES) #undef POPULATE_NAMES }; +// clang-format on const char *const llvm::DecisionName = "inlining_decision"; const char *const llvm::DefaultDecisionName = "inlining_default"; @@ -217,6 +225,12 @@ std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { CostEstimate = *IsCallSiteInlinable; } + const auto CostFeatures = + llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache); + if (!CostFeatures) { + return std::make_unique(this, CB, ORE, false); + } + if (Mandatory) return getMandatoryAdvice(CB, true); @@ -234,7 +248,6 @@ std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { FunctionLevels[&Caller]); ModelRunner->setFeature(FeatureIndex::NodeCount, NodeCount); ModelRunner->setFeature(FeatureIndex::NrCtantParams, NrCtantParams); - ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate); ModelRunner->setFeature(FeatureIndex::EdgeCount, EdgeCount); ModelRunner->setFeature(FeatureIndex::CallerUsers, CallerBefore.Uses); ModelRunner->setFeature(FeatureIndex::CallerConditionallyExecutedBlocks, @@ -244,6 +257,16 @@ std::unique_ptr MLInlineAdvisor::getAdviceImpl(CallBase &CB) { ModelRunner->setFeature(FeatureIndex::CalleeConditionallyExecutedBlocks, CalleeBefore.BlocksReachedFromConditionalInstruction); ModelRunner->setFeature(FeatureIndex::CalleeUsers, CalleeBefore.Uses); + ModelRunner->setFeature(FeatureIndex::CostEstimate, CostEstimate); + + // Add the cost features + for (size_t I = 0; + I < static_cast(InlineCostFeatureIndex::NumberOfFeatures); ++I) { + ModelRunner->setFeature( + inlineCostFeatureToMlFeature(static_cast(I)), + CostFeatures->at(I)); + } + return getAdviceFromModel(CB, ORE); } diff --git a/llvm/lib/Analysis/models/inlining/config.py b/llvm/lib/Analysis/models/inlining/config.py index 3da64a6973975..78d3a8259cc29 100644 --- a/llvm/lib/Analysis/models/inlining/config.py +++ b/llvm/lib/Analysis/models/inlining/config.py @@ -26,11 +26,42 @@ def get_input_signature(): # int64 features inputs = [ tf.TensorSpec(dtype=tf.int64, shape=(), name=key) for key in [ - 'caller_basic_block_count', 'caller_conditionally_executed_blocks', - 'caller_users', 'callee_basic_block_count', - 'callee_conditionally_executed_blocks', 'callee_users', - 'nr_ctant_params', 'node_count', 'edge_count', 'callsite_height', - 'cost_estimate', 'inlining_default' + 'caller_basic_block_count', + 'caller_conditionally_executed_blocks', + 'caller_users', + 'callee_basic_block_count', + 'callee_conditionally_executed_blocks', + 'callee_users', + 'nr_ctant_params', + 'node_count', + 'edge_count', + 'callsite_height', + 'cost_estimate', + 'inlining_default', + 'sroa_savings', + 'sroa_losses', + 'load_elimination', + 'call_penalty', + 'call_argument_setup', + 'load_relative_intrinsic', + 'lowered_call_arg_setup', + 'indirect_call_penalty', + 'jump_table_penalty', + 'case_cluster_penalty', + 'switch_penalty', + 'unsimplified_common_instructions', + 'num_loops', + 'dead_blocks', + 'simplified_instructions', + 'constant_args', + 'constant_offset_ptr_args', + 'callsite_cost', + 'cold_cc_penalty', + 'last_call_to_static_bonus', + 'is_multiple_blocks', + 'nested_inlines', + 'nested_inline_cost_estimate', + 'threshold', ] ] diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index 0480649352214..7e3e20e4af287 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_unittest_with_input_files(AnalysisTests DomTreeUpdaterTest.cpp GlobalsModRefTest.cpp FunctionPropertiesAnalysisTest.cpp + InlineCostTest.cpp IRSimilarityIdentifierTest.cpp IVDescriptorsTest.cpp LazyCallGraphTest.cpp diff --git a/llvm/unittests/Analysis/InlineCostTest.cpp b/llvm/unittests/Analysis/InlineCostTest.cpp new file mode 100644 index 0000000000000..cb92ea79c4568 --- /dev/null +++ b/llvm/unittests/Analysis/InlineCostTest.cpp @@ -0,0 +1,77 @@ +//===- InlineCostTest.cpp - test for InlineCost ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "gtest/gtest.h" + +namespace { + +// Tests that we can retrieve the CostFeatures without an error +TEST(InlineCostTest, CostFeatures) { + using namespace llvm; + + const auto *const IR = R"IR( +define i32 @f(i32) { + ret i32 4 +} + +define i32 @g(i32) { + %2 = call i32 @f(i32 0) + ret i32 %2 +} +)IR"; + + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString(IR, Err, C); + ASSERT_TRUE(M); + + auto *G = M->getFunction("g"); + ASSERT_TRUE(G); + + // find the call to f in g + CallBase *CB = nullptr; + for (auto &BB : *G) { + for (auto &I : BB) { + if ((CB = dyn_cast(&I))) + break; + } + } + ASSERT_TRUE(CB); + + ModuleAnalysisManager MAM; + FunctionAnalysisManager FAM; + FAM.registerPass([&] { return TargetIRAnalysis(); }); + FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); }); + FAM.registerPass([&] { return AssumptionAnalysis(); }); + MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); }); + + MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + FAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + + ModulePassManager MPM; + MPM.run(*M, MAM); + + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + auto &TIR = FAM.getResult(*G); + + const auto Features = + llvm::getInliningCostFeatures(*CB, TIR, GetAssumptionCache); + + // Check that the optional is not empty + ASSERT_TRUE(Features); +} + +} // namespace From 75e941b05c78e19c51e5c960c925f334b08109bb Mon Sep 17 00:00:00 2001 From: Joachim Meyer Date: Fri, 2 Jul 2021 01:24:29 +0200 Subject: [PATCH 549/619] [NFC][OpenMP][CUDA] Add test for using `-x cuda -fopenmp` This adds a very basic test in `cuda_with_openmp.cu` that just checks whether the CUDA & OpenMP integrated headers do compile, when a CUDA file is compiled with OpenMP (CPU) enabled. Thus this basically adds the missing test for https://reviews.llvm.org/D90415. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D105322 --- .../include/crt/device_double_functions.hpp | 2 + .../Inputs/include/crt/device_functions.hpp | 3 + .../Inputs/include/crt/device_runtime.h | 2 + .../Headers/Inputs/include/crt/host_runtime.h | 2 + .../Inputs/include/crt/math_functions.hpp | 12 ++ .../Headers/Inputs/include/crt/sm_70_rt.hpp | 2 + clang/test/Headers/Inputs/include/cstdlib | 2 - clang/test/Headers/Inputs/include/cuda.h | 127 ++++++++++++++++++ .../Headers/Inputs/include/cuda_runtime.h | 2 + .../Inputs/include/curand_mtgp32_kernel.h | 2 + .../Inputs/include/device_atomic_functions.h | 2 + .../include/device_atomic_functions.hpp | 2 + .../Inputs/include/device_double_functions.h | 2 + .../Headers/Inputs/include/driver_types.h | 4 + .../test/Headers/Inputs/include/host_config.h | 2 + .../Headers/Inputs/include/host_defines.h | 3 + .../include/math_functions_dbl_ptx3.hpp | 2 + clang/test/Headers/Inputs/include/new | 1 + .../Inputs/include/sm_20_atomic_functions.hpp | 2 + .../Inputs/include/sm_20_intrinsics.hpp | 2 + .../Inputs/include/sm_32_atomic_functions.hpp | 2 + .../Inputs/include/sm_60_atomic_functions.hpp | 2 + .../Inputs/include/sm_61_intrinsics.hpp | 2 + clang/test/Headers/Inputs/include/string.h | 3 + .../include/texture_indirect_functions.h | 2 + clang/test/Headers/cuda_with_openmp.cu | 8 ++ 26 files changed, 195 insertions(+), 2 deletions(-) create mode 100644 clang/test/Headers/Inputs/include/crt/device_double_functions.hpp create mode 100644 clang/test/Headers/Inputs/include/crt/device_functions.hpp create mode 100644 clang/test/Headers/Inputs/include/crt/device_runtime.h create mode 100644 clang/test/Headers/Inputs/include/crt/host_runtime.h create mode 100644 clang/test/Headers/Inputs/include/crt/math_functions.hpp create mode 100644 clang/test/Headers/Inputs/include/crt/sm_70_rt.hpp create mode 100644 clang/test/Headers/Inputs/include/cuda.h create mode 100644 clang/test/Headers/Inputs/include/cuda_runtime.h create mode 100644 clang/test/Headers/Inputs/include/curand_mtgp32_kernel.h create mode 100644 clang/test/Headers/Inputs/include/device_atomic_functions.h create mode 100644 clang/test/Headers/Inputs/include/device_atomic_functions.hpp create mode 100644 clang/test/Headers/Inputs/include/device_double_functions.h create mode 100644 clang/test/Headers/Inputs/include/driver_types.h create mode 100644 clang/test/Headers/Inputs/include/host_config.h create mode 100644 clang/test/Headers/Inputs/include/host_defines.h create mode 100644 clang/test/Headers/Inputs/include/math_functions_dbl_ptx3.hpp create mode 100644 clang/test/Headers/Inputs/include/sm_20_atomic_functions.hpp create mode 100644 clang/test/Headers/Inputs/include/sm_20_intrinsics.hpp create mode 100644 clang/test/Headers/Inputs/include/sm_32_atomic_functions.hpp create mode 100644 clang/test/Headers/Inputs/include/sm_60_atomic_functions.hpp create mode 100644 clang/test/Headers/Inputs/include/sm_61_intrinsics.hpp create mode 100644 clang/test/Headers/Inputs/include/string.h create mode 100644 clang/test/Headers/Inputs/include/texture_indirect_functions.h create mode 100644 clang/test/Headers/cuda_with_openmp.cu diff --git a/clang/test/Headers/Inputs/include/crt/device_double_functions.hpp b/clang/test/Headers/Inputs/include/crt/device_double_functions.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/crt/device_double_functions.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/crt/device_functions.hpp b/clang/test/Headers/Inputs/include/crt/device_functions.hpp new file mode 100644 index 0000000000000..41dc8722fe643 --- /dev/null +++ b/clang/test/Headers/Inputs/include/crt/device_functions.hpp @@ -0,0 +1,3 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once +__device__ void __brkpt(); diff --git a/clang/test/Headers/Inputs/include/crt/device_runtime.h b/clang/test/Headers/Inputs/include/crt/device_runtime.h new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/crt/device_runtime.h @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/crt/host_runtime.h b/clang/test/Headers/Inputs/include/crt/host_runtime.h new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/crt/host_runtime.h @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/crt/math_functions.hpp b/clang/test/Headers/Inputs/include/crt/math_functions.hpp new file mode 100644 index 0000000000000..fbd2bb52fa842 --- /dev/null +++ b/clang/test/Headers/Inputs/include/crt/math_functions.hpp @@ -0,0 +1,12 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once +__device__ int __isinff(float); +__device__ int __isinf(double); +__device__ int __finitef(float); +__device__ int __isfinited(double); +__device__ int __isnanf(float); +__device__ int __isnan(double); +__device__ int __signbitf(float); +__device__ int __signbitd(double); +__device__ double max(double, double); +__device__ float max(float, float); diff --git a/clang/test/Headers/Inputs/include/crt/sm_70_rt.hpp b/clang/test/Headers/Inputs/include/crt/sm_70_rt.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/crt/sm_70_rt.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/cstdlib b/clang/test/Headers/Inputs/include/cstdlib index 1d1864a98976b..689b5e06edec9 100644 --- a/clang/test/Headers/Inputs/include/cstdlib +++ b/clang/test/Headers/Inputs/include/cstdlib @@ -5,11 +5,9 @@ #if __cplusplus >= 201703L extern int abs (int __x) throw() __attribute__ ((__const__)) ; extern long int labs (long int __x) throw() __attribute__ ((__const__)) ; -extern float fabs (float __x) throw() __attribute__ ((__const__)) ; #else extern int abs (int __x) __attribute__ ((__const__)) ; extern long int labs (long int __x) __attribute__ ((__const__)) ; -extern float fabs (float __x) __attribute__ ((__const__)) ; #endif namespace std diff --git a/clang/test/Headers/Inputs/include/cuda.h b/clang/test/Headers/Inputs/include/cuda.h new file mode 100644 index 0000000000000..32870938a8e18 --- /dev/null +++ b/clang/test/Headers/Inputs/include/cuda.h @@ -0,0 +1,127 @@ +/* Minimal declarations for CUDA support. Testing purposes only. */ +#pragma once + +#include + +// Make this file work with nvcc, for testing compatibility. + +#ifndef __NVCC__ +#define __constant__ __attribute__((constant)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __host__ __attribute__((host)) +#define __shared__ __attribute__((shared)) +#define __managed__ __attribute__((managed)) +#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__))) + +struct dim3 { + unsigned x, y, z; + __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {} +}; + +// Host- and device-side placement new overloads. +void *operator new(__SIZE_TYPE__, void *p) { return p; } +void *operator new[](__SIZE_TYPE__, void *p) { return p; } +__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; } +__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; } + +#define CUDA_VERSION 10100 + +struct char2 { + char x, y; + __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {} +}; +struct char4 { + char x, y, z, w; + __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct uchar2 { + unsigned char x, y; + __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {} +}; +struct uchar4 { + unsigned char x, y, z, w; + __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct short2 { + short x, y; + __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {} +}; +struct short4 { + short x, y, z, w; + __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct ushort2 { + unsigned short x, y; + __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {} +}; +struct ushort4 { + unsigned short x, y, z, w; + __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct int2 { + int x, y; + __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {} +}; +struct int4 { + int x, y, z, w; + __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct uint2 { + unsigned x, y; + __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {} +}; +struct uint3 { + unsigned x, y, z; + __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {} +}; +struct uint4 { + unsigned x, y, z, w; + __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {} +}; + + +struct longlong2 { + long long x, y; + __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {} +}; +struct longlong4 { + long long x, y, z, w; + __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct ulonglong2 { + unsigned long long x, y; + __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {} +}; +struct ulonglong4 { + unsigned long long x, y, z, w; + __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {} +}; + + +struct float2 { + float x, y; + __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {} +}; +struct float4 { + float x, y, z, w; + __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct double2 { + double x, y; + __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {} +}; +struct double4 { + double x, y, z, w; + __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {} +}; + + +#endif // !__NVCC__ diff --git a/clang/test/Headers/Inputs/include/cuda_runtime.h b/clang/test/Headers/Inputs/include/cuda_runtime.h new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/cuda_runtime.h @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/curand_mtgp32_kernel.h b/clang/test/Headers/Inputs/include/curand_mtgp32_kernel.h new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/curand_mtgp32_kernel.h @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/device_atomic_functions.h b/clang/test/Headers/Inputs/include/device_atomic_functions.h new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/device_atomic_functions.h @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/device_atomic_functions.hpp b/clang/test/Headers/Inputs/include/device_atomic_functions.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/device_atomic_functions.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/device_double_functions.h b/clang/test/Headers/Inputs/include/device_double_functions.h new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/device_double_functions.h @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/driver_types.h b/clang/test/Headers/Inputs/include/driver_types.h new file mode 100644 index 0000000000000..b5d366aee29fd --- /dev/null +++ b/clang/test/Headers/Inputs/include/driver_types.h @@ -0,0 +1,4 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once + +#include diff --git a/clang/test/Headers/Inputs/include/host_config.h b/clang/test/Headers/Inputs/include/host_config.h new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/host_config.h @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/host_defines.h b/clang/test/Headers/Inputs/include/host_defines.h new file mode 100644 index 0000000000000..03fed3a2a5d46 --- /dev/null +++ b/clang/test/Headers/Inputs/include/host_defines.h @@ -0,0 +1,3 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once +#define __forceinline__ diff --git a/clang/test/Headers/Inputs/include/math_functions_dbl_ptx3.hpp b/clang/test/Headers/Inputs/include/math_functions_dbl_ptx3.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/math_functions_dbl_ptx3.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/new b/clang/test/Headers/Inputs/include/new index 8159d5527cc3a..5110731fb2f89 100644 --- a/clang/test/Headers/Inputs/include/new +++ b/clang/test/Headers/Inputs/include/new @@ -1,3 +1,4 @@ +#pragma once namespace std { diff --git a/clang/test/Headers/Inputs/include/sm_20_atomic_functions.hpp b/clang/test/Headers/Inputs/include/sm_20_atomic_functions.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/sm_20_atomic_functions.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/sm_20_intrinsics.hpp b/clang/test/Headers/Inputs/include/sm_20_intrinsics.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/sm_20_intrinsics.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/sm_32_atomic_functions.hpp b/clang/test/Headers/Inputs/include/sm_32_atomic_functions.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/sm_32_atomic_functions.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/sm_60_atomic_functions.hpp b/clang/test/Headers/Inputs/include/sm_60_atomic_functions.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/sm_60_atomic_functions.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/sm_61_intrinsics.hpp b/clang/test/Headers/Inputs/include/sm_61_intrinsics.hpp new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/sm_61_intrinsics.hpp @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/Inputs/include/string.h b/clang/test/Headers/Inputs/include/string.h new file mode 100644 index 0000000000000..98cf77fd564c0 --- /dev/null +++ b/clang/test/Headers/Inputs/include/string.h @@ -0,0 +1,3 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once +void* memcpy(void* dst, const void* src, size_t num); diff --git a/clang/test/Headers/Inputs/include/texture_indirect_functions.h b/clang/test/Headers/Inputs/include/texture_indirect_functions.h new file mode 100644 index 0000000000000..bffa775cb2822 --- /dev/null +++ b/clang/test/Headers/Inputs/include/texture_indirect_functions.h @@ -0,0 +1,2 @@ +// required for __clang_cuda_runtime_wrapper.h tests +#pragma once diff --git a/clang/test/Headers/cuda_with_openmp.cu b/clang/test/Headers/cuda_with_openmp.cu new file mode 100644 index 0000000000000..efde4ecdc6626 --- /dev/null +++ b/clang/test/Headers/cuda_with_openmp.cu @@ -0,0 +1,8 @@ +// Test using -x cuda -fopenmp does not clash integrated headers. +// Reported in https://bugs.llvm.org/show_bug.cgi?id=48014 +///==========================================================================/// + +// REQUIRES: nvptx-registered-target + +// RUN: %clang -x cuda -fopenmp -c %s -o - --cuda-path=%S/../Driver/Inputs/CUDA/usr/local/cuda -nocudalib -isystem %S/Inputs/include -isystem %S/../../lib/Headers -fsyntax-only + From 94ff00f988938d9f653eadde03cbe752eddb5120 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 2 Jul 2021 17:03:49 +0000 Subject: [PATCH 550/619] [gn build] Port 99f00635d7ac --- llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn index 50c02aa2214ef..f83243e4cb1a3 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -27,6 +27,7 @@ unittest("AnalysisTests") { "GlobalsModRefTest.cpp", "IRSimilarityIdentifierTest.cpp", "IVDescriptorsTest.cpp", + "InlineCostTest.cpp", "LazyCallGraphTest.cpp", "LoadsTest.cpp", "LoopInfoTest.cpp", From 2ff5a56e1ab2a95c36d3c5d2bef7c585125718ae Mon Sep 17 00:00:00 2001 From: wmbat Date: Fri, 2 Jul 2021 17:08:36 +0000 Subject: [PATCH 551/619] [libcxx][type_traits] remove `std::is_literal_type` and `std::result_of` for C++20 C++17 deprecated `std::is_literal_type` and `std::result_of`, C++20 removed them. Implements parts of: * P0174R2 'Deprecating Vestigial Library Parts in C++17'. * P0619R4 'Reviewing Deprecated Facilities of C++17 for C++20'. Reviewed By: ldionne, Mordante, Quuxplusone, #libc Differential Revision: https://reviews.llvm.org/D102992 --- libcxx/docs/Cxx2aStatus.rst | 2 +- libcxx/docs/UsingLibcxx.rst | 3 + libcxx/include/__config | 1 + libcxx/include/type_traits | 26 ++-- .../standard_layout.compile.pass.cpp | 35 +++++ .../atomic_helpers.h | 131 ---------------- .../ctor.pass.cpp | 49 +++--- .../dtor.pass.cpp | 37 +++++ .../atomic_wait.pass.cpp | 2 +- .../func.invoke/invoke.pass.cpp | 3 + .../func.invoke/invoke_constexpr.pass.cpp | 3 + .../meta.trans.other/common_type.pass.cpp | 33 ---- .../result_of.deprecated.fail.cpp} | 10 +- .../meta.trans.other/result_of.pass.cpp | 3 + .../meta.trans.other/result_of11.pass.cpp | 3 + .../is_literal_type.deprecated.fail.cpp | 24 +++ .../meta.unary.prop/is_literal_type.pass.cpp | 3 + libcxx/test/support/atomic_helpers.h | 142 ++++++++++++++++++ libcxx/test/support/poisoned_hash_helper.h | 74 ++++----- 19 files changed, 339 insertions(+), 245 deletions(-) create mode 100644 libcxx/test/std/atomics/atomics.types.generic/standard_layout.compile.pass.cpp delete mode 100644 libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h create mode 100644 libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/dtor.pass.cpp rename libcxx/test/std/utilities/{any/any.class/not_literal_type.pass.cpp => meta/meta.trans/meta.trans.other/result_of.deprecated.fail.cpp} (69%) create mode 100644 libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_literal_type.deprecated.fail.cpp create mode 100644 libcxx/test/support/atomic_helpers.h diff --git a/libcxx/docs/Cxx2aStatus.rst b/libcxx/docs/Cxx2aStatus.rst index 8bbf7d80c3aa4..ae23daf0238f7 100644 --- a/libcxx/docs/Cxx2aStatus.rst +++ b/libcxx/docs/Cxx2aStatus.rst @@ -42,7 +42,7 @@ Paper Status .. [#note-P0600] P0600: The missing bits in P0600 are in |sect|\ [mem.res.class], |sect|\ [mem.poly.allocator.class], and |sect|\ [container.node.overview]. .. [#note-P0966] P0966: It was previously erroneously marked as complete in version 8.0. See `bug 45368 `__. - .. [#note-P0619] P0619: Only sections D.8, D.9, and D.10 are implemented. Sections D.4, D.7, D.11, D.12, and D.14 remain undone. + .. [#note-P0619] P0619: Only sections D.8, D.9, D.10 and D.13 are implemented. Sections D.4, D.7, D.11, D.12, and D.14 remain undone. .. [#note-P0883] P0883: shared_ptr and floating-point changes weren't applied as they themselves aren't implemented yet. diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst index 31ec90ce1ffbd..b967c986165e8 100644 --- a/libcxx/docs/UsingLibcxx.rst +++ b/libcxx/docs/UsingLibcxx.rst @@ -266,6 +266,9 @@ C++20 Specific Configuration Macros: **_LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR**: This macro is used to re-enable `raw_storage_iterator`. +**_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS**: + This macro is used to re-enable `is_literal_type`, `is_literal_type_v`, + `result_of` and `result_of_t`. Libc++ Extensions ================= diff --git a/libcxx/include/__config b/libcxx/include/__config index a408382385541..9a5343c14d5ff 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1359,6 +1359,7 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container( #define _LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS #define _LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS #define _LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR +#define _LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS #endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES #if !defined(__cpp_deduction_guides) || __cpp_deduction_guides < 201611 diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 09b66c7871d80..cc364ce3b1f49 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -99,7 +99,7 @@ namespace std template struct is_trivial; template struct is_trivially_copyable; template struct is_standard_layout; - template struct is_literal_type; + template struct is_literal_type; // Deprecated in C++17; removed in C++20 template struct is_empty; template struct is_polymorphic; template struct is_abstract; @@ -165,8 +165,8 @@ namespace std template struct decay; template struct common_type; template struct underlying_type; - template class result_of; // undefined - template class result_of; + template class result_of; // undefined; deprecated in C++17; removed in C++20 + template class result_of; // deprecated in C++17; removed in C++20 template struct invoke_result; // C++17 // const-volatile modifications: @@ -233,7 +233,7 @@ namespace std template using underlying_type_t = typename underlying_type::type; // C++14 template - using result_of_t = typename result_of::type; // C++14 + using result_of_t = typename result_of::type; // C++14; deprecated in C++17; removed in C++20 template using invoke_result_t = typename invoke_result::type; // C++17 @@ -302,7 +302,7 @@ namespace std template inline constexpr bool is_pod_v = is_pod::value; // C++17 template inline constexpr bool is_literal_type_v - = is_literal_type::value; // C++17 + = is_literal_type::value; // C++17; deprecated in C++17; removed in C++20 template inline constexpr bool is_empty_v = is_empty::value; // C++17 template inline constexpr bool is_polymorphic_v @@ -3677,15 +3677,17 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_pod_v // is_literal_type; -template struct _LIBCPP_TEMPLATE_VIS is_literal_type +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS) +template struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 is_literal_type : public integral_constant {}; #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) template -_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_literal_type_v +_LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_literal_type_v = is_literal_type<_Tp>::value; -#endif +#endif // _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) +#endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS) // is_standard_layout; @@ -4003,7 +4005,8 @@ struct __invoke_of // result_of -template class result_of; +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS) +template class _LIBCPP_DEPRECATED_IN_CXX17 result_of; #ifndef _LIBCPP_CXX03_LANG @@ -4091,8 +4094,9 @@ class _LIBCPP_TEMPLATE_VIS result_of<_Fn(_Args...)> #endif // C++03 #if _LIBCPP_STD_VER > 11 -template using result_of_t = typename result_of<_Tp>::type; -#endif +template using result_of_t _LIBCPP_DEPRECATED_IN_CXX17 = typename result_of<_Tp>::type; +#endif // _LIBCPP_STD_VER > 11 +#endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS) #if _LIBCPP_STD_VER > 14 diff --git a/libcxx/test/std/atomics/atomics.types.generic/standard_layout.compile.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/standard_layout.compile.pass.cpp new file mode 100644 index 0000000000000..a95986283e2fa --- /dev/null +++ b/libcxx/test/std/atomics/atomics.types.generic/standard_layout.compile.pass.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: c++03 + +// + +#include +#include +#include + +#include "test_macros.h" +#include "atomic_helpers.h" + +template +struct CheckStandardLayout { + void operator()() const { + typedef std::atomic Atomic; + static_assert(std::is_standard_layout::value, ""); + } +}; + +int main(int, char**) { + TestEachIntegralType()(); + TestEachFloatingPointType()(); + TestEachPointerType()(); + + return 0; +} diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h deleted file mode 100644 index 40514f078ac51..0000000000000 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h +++ /dev/null @@ -1,131 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef ATOMIC_HELPERS_H -#define ATOMIC_HELPERS_H - -#include - -#include "test_macros.h" - -struct UserAtomicType -{ - int i; - - explicit UserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} - - friend bool operator==(const UserAtomicType& x, const UserAtomicType& y) - { return x.i == y.i; } -}; - -/* - -Enable these once we have P0528 - -struct WeirdUserAtomicType -{ - char i, j, k; // the 3 chars of doom - - explicit WeirdUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} - - friend bool operator==(const WeirdUserAtomicType& x, const WeirdUserAtomicType& y) - { return x.i == y.i; } -}; - -struct PaddedUserAtomicType -{ - char i; int j; // probably lock-free? - - explicit PaddedUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} - - friend bool operator==(const PaddedUserAtomicType& x, const PaddedUserAtomicType& y) - { return x.i == y.i; } -}; - -*/ - -struct LargeUserAtomicType -{ - int a[128]; /* decidedly not lock-free */ - - LargeUserAtomicType(int d = 0) TEST_NOEXCEPT - { - for (auto && e : a) - e = d++; - } - - friend bool operator==(LargeUserAtomicType const& x, LargeUserAtomicType const& y) TEST_NOEXCEPT - { - for (int i = 0; i < 128; ++i) - if (x.a[i] != y.a[i]) - return false; - return true; - } -}; - -template < template class TestFunctor > -struct TestEachIntegralType { - void operator()() const { - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor(); -#if TEST_STD_VER > 17 && defined(__cpp_char8_t) - TestFunctor()(); -#endif -#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS - TestFunctor()(); - TestFunctor()(); -#endif - TestFunctor< int8_t>()(); - TestFunctor< uint8_t>()(); - TestFunctor< int16_t>()(); - TestFunctor()(); - TestFunctor< int32_t>()(); - TestFunctor()(); - TestFunctor< int64_t>()(); - TestFunctor()(); - } -}; - -template < template class TestFunctor > -struct TestEachAtomicType { - void operator()() const { - TestEachIntegralType()(); - TestFunctor()(); - /* - Note: These aren't going to be lock-free, - so some libatomic.a is necessary. To handle - the case where the support functions are - missing, all tests that use this file should add: - XFAIL: !non-lockfree-atomics - */ - TestFunctor()(); -/* - Enable these once we have P0528 - - TestFunctor()(); - TestFunctor()(); -*/ - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - TestFunctor()(); - } -}; - - -#endif // ATOMIC_HELPER_H diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp index 5cfad1116dfe0..6036d8e05ac26 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp @@ -21,42 +21,37 @@ #include "atomic_helpers.h" struct UserType { - int i; + int i; - UserType() noexcept {} - constexpr explicit UserType(int d) noexcept : i(d) {} + UserType() noexcept {} + constexpr explicit UserType(int d) noexcept : i(d) {} - friend bool operator==(const UserType& x, const UserType& y) { - return x.i == y.i; - } + friend bool operator==(const UserType& x, const UserType& y) { return x.i == y.i; } }; template struct TestFunc { - void operator()() const { - typedef std::atomic Atomic; - static_assert(std::is_literal_type::value, ""); - constexpr Tp t(42); - { - constexpr Atomic a(t); - assert(a == t); - } - { - constexpr Atomic a{t}; - assert(a == t); - } - { - constexpr Atomic a = ATOMIC_VAR_INIT(t); - assert(a == t); - } + void operator()() const { + typedef std::atomic Atomic; + constexpr Tp t(42); + { + constexpr Atomic a(t); + assert(a == t); + } + { + constexpr Atomic a{t}; + assert(a == t); } + { + constexpr Atomic a = ATOMIC_VAR_INIT(t); + assert(a == t); + } + } }; - -int main(int, char**) -{ - TestFunc()(); - TestEachIntegralType()(); +int main(int, char**) { + TestFunc()(); + TestEachIntegralType()(); return 0; } diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/dtor.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/dtor.pass.cpp new file mode 100644 index 0000000000000..ea85cde7860a7 --- /dev/null +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/dtor.pass.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: libcpp-has-no-threads +// UNSUPPORTED: c++03 + +// + +// constexpr atomic::~atomic() + +#include +#include +#include + +#include "test_macros.h" +#include "atomic_helpers.h" + +template +struct CheckTriviallyDestructible { + void operator()() const { + typedef std::atomic Atomic; + static_assert(std::is_trivially_destructible::value, ""); + } +}; + +int main(int, char**) { + TestEachIntegralType()(); + TestEachFloatingPointType()(); + TestEachPointerType()(); + + return 0; +} diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp index d2779aaa19f56..9e918e201dfda 100644 --- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp @@ -23,7 +23,7 @@ #include "make_test_thread.h" #include "test_macros.h" -#include "../atomics.types.operations.req/atomic_helpers.h" +#include "atomic_helpers.h" template struct TestFn { diff --git a/libcxx/test/std/utilities/function.objects/func.invoke/invoke.pass.cpp b/libcxx/test/std/utilities/function.objects/func.invoke/invoke.pass.cpp index 455d8dd24779a..82849a815e7e3 100644 --- a/libcxx/test/std/utilities/function.objects/func.invoke/invoke.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.invoke/invoke.pass.cpp @@ -10,6 +10,9 @@ // +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // template // invoke_result_t invoke(F&& f, Args&&... args) // C++17 // noexcept(is_nothrow_invocable_v<_Fn, _Args...>); diff --git a/libcxx/test/std/utilities/function.objects/func.invoke/invoke_constexpr.pass.cpp b/libcxx/test/std/utilities/function.objects/func.invoke/invoke_constexpr.pass.cpp index 8bfc7428fad9d..bfd8765b7d779 100644 --- a/libcxx/test/std/utilities/function.objects/func.invoke/invoke_constexpr.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.invoke/invoke_constexpr.pass.cpp @@ -10,6 +10,9 @@ // +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + // template // constexpr // constexpr in C++20 // invoke_result_t invoke(F&& f, Args&&... args) diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp index e4668b08d1d65..f070b2770d0a0 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp @@ -261,39 +261,6 @@ void test_bullet_four() { } } - -// The example code specified in Note B for common_type -namespace note_b_example { - -typedef bool (&PF1)(); -typedef short (*PF2)(long); - -struct S { - operator PF2() const; - double operator()(char, int&); - void fn(long) const; - char data; -}; - -typedef void (S::*PMF)(long) const; -typedef char S::*PMD; - -using std::is_same; -using std::result_of; -using std::unique_ptr; - -static_assert((is_same::type, short>::value), "Error!"); -static_assert((is_same::type, double>::value), "Error!"); -static_assert((is_same::type, bool>::value), "Error!"); -static_assert((is_same, int)>::type, void>::value), "Error!"); -#if TEST_STD_VER >= 11 -static_assert((is_same::type, char&&>::value), "Error!"); -#endif -static_assert((is_same::type, const char&>::value), "Error!"); - -} // namespace note_b_example - - int main(int, char**) { static_assert((std::is_same::type, int>::value), ""); diff --git a/libcxx/test/std/utilities/any/any.class/not_literal_type.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.deprecated.fail.cpp similarity index 69% rename from libcxx/test/std/utilities/any/any.class/not_literal_type.pass.cpp rename to libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.deprecated.fail.cpp index d1b649dc46c44..aece06674cf01 100644 --- a/libcxx/test/std/utilities/any/any.class/not_literal_type.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.deprecated.fail.cpp @@ -7,18 +7,16 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS -// +// type_traits -// [Note any is a not a literal type --end note] +// result_of -#include #include #include "test_macros.h" int main(int, char**) { - static_assert(!std::is_literal_type::value, ""); - - return 0; + [[maybe_unused]] std::result_of a; // expected-warning {{'result_of' is deprecated}} } diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp index 5efb568ce30b3..78ff8fbd1170c 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp @@ -10,6 +10,9 @@ // result_of +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + #include #include #include diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp index 2589f17b437bc..e98df1ef96be9 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp @@ -12,6 +12,9 @@ // // result_of +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + #include #include #include diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_literal_type.deprecated.fail.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_literal_type.deprecated.fail.cpp new file mode 100644 index 0000000000000..c63524da5b40a --- /dev/null +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_literal_type.deprecated.fail.cpp @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS + +// type_traits + +// is_literal_type + +#include + +#include "test_macros.h" + +int main(int, char**) { + static_assert(std::is_literal_type::value, ""); // expected-warning {{'is_literal_type' is deprecated}} + + return 0; +} diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_literal_type.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_literal_type.pass.cpp index b86ff5a854656..1c85560ba7b2a 100644 --- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_literal_type.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_literal_type.pass.cpp @@ -10,6 +10,9 @@ // is_literal_type +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + #include #include // for std::nullptr_t #include "test_macros.h" diff --git a/libcxx/test/support/atomic_helpers.h b/libcxx/test/support/atomic_helpers.h new file mode 100644 index 0000000000000..a5c17035d0d90 --- /dev/null +++ b/libcxx/test/support/atomic_helpers.h @@ -0,0 +1,142 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef ATOMIC_HELPERS_H +#define ATOMIC_HELPERS_H + +#include + +#include "test_macros.h" + +struct UserAtomicType { + int i; + + explicit UserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} + + friend bool operator==(const UserAtomicType& x, const UserAtomicType& y) { return x.i == y.i; } +}; + +/* + +Enable these once we have P0528 + +struct WeirdUserAtomicType +{ + char i, j, k; // the 3 chars of doom + + explicit WeirdUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} + + friend bool operator==(const WeirdUserAtomicType& x, const WeirdUserAtomicType& y) + { return x.i == y.i; } +}; + +struct PaddedUserAtomicType +{ + char i; int j; // probably lock-free? + + explicit PaddedUserAtomicType(int d = 0) TEST_NOEXCEPT : i(d) {} + + friend bool operator==(const PaddedUserAtomicType& x, const PaddedUserAtomicType& y) + { return x.i == y.i; } +}; + +*/ + +struct LargeUserAtomicType { + int a[128]; /* decidedly not lock-free */ + + LargeUserAtomicType(int d = 0) TEST_NOEXCEPT { + for (auto&& e : a) + e = d++; + } + + friend bool operator==(LargeUserAtomicType const& x, LargeUserAtomicType const& y) TEST_NOEXCEPT { + for (int i = 0; i < 128; ++i) + if (x.a[i] != y.a[i]) + return false; + return true; + } +}; + +template